diff --git a/.devcontainer/docker-compose.yml b/.devcontainer/docker-compose.yml index 1619f3e6c9..6d5618a9b6 100644 --- a/.devcontainer/docker-compose.yml +++ b/.devcontainer/docker-compose.yml @@ -1,7 +1,7 @@ version: "3.9" services: tensorrt_llm-dev: - image: urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm:pytorch-25.04-py3-x86_64-ubuntu24.04-trt10.10.0.31-skip-tritondevel-202505160532-3934 + image: urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm:pytorch-25.04-py3-x86_64-ubuntu24.04-trt10.10.0.31-skip-tritondevel-202505191345-4400 network_mode: host ipc: host diff --git a/constraints.txt b/constraints.txt new file mode 100644 index 0000000000..ac9b5531d6 --- /dev/null +++ b/constraints.txt @@ -0,0 +1,2 @@ +# WAR against https://github.com/advisories/GHSA-vqfr-h8mv-ghfj +h11>=0.16.0 diff --git a/docker/Dockerfile.multi b/docker/Dockerfile.multi index d1c8552f7c..1b26d4e6e7 100644 --- a/docker/Dockerfile.multi +++ b/docker/Dockerfile.multi @@ -72,6 +72,9 @@ RUN bash ./install_pytorch.sh $TORCH_INSTALL_TYPE && rm install_pytorch.sh RUN pip3 uninstall -y opencv && rm -rf /usr/local/lib/python3*/dist-packages/cv2/ RUN pip3 install opencv-python-headless --force-reinstall --no-deps --no-cache-dir +# WAR against https://github.com/advisories/GHSA-vqfr-h8mv-ghfj +RUN pip3 install --upgrade h11>=0.16 --no-cache-dir + FROM ${TRITON_IMAGE}:${TRITON_BASE_TAG} AS triton @@ -102,7 +105,7 @@ COPY benchmarks benchmarks COPY scripts scripts COPY tensorrt_llm tensorrt_llm COPY 3rdparty 3rdparty -COPY .gitmodules setup.py requirements.txt requirements-dev.txt ./ +COPY .gitmodules setup.py requirements.txt requirements-dev.txt constraints.txt ./ # Create cache directories for pip and ccache RUN mkdir -p /root/.cache/pip /root/.cache/ccache diff --git a/docker/Makefile b/docker/Makefile index 4726e7f625..6303d75f31 100644 --- a/docker/Makefile +++ b/docker/Makefile @@ -28,12 +28,8 @@ PUSH_TO_STAGING ?= 1 DOCKER_BUILD_OPTS ?= --pull --load DOCKER_BUILD_ARGS ?= DOCKER_PROGRESS ?= auto -CUDA_ARCHS ?= PLATFORM ?= $(shell uname -m | grep -q 'aarch64' && echo "arm64" || echo "amd64") -ifeq ($(PLATFORM), arm64) - CUDA_ARCHS = '90-real;100-real;120-real' -endif - +CUDA_ARCHS ?= $(if $(filter arm64,$(PLATFORM)),'90-real;100-real;120-real',) BUILD_WHEEL_OPTS ?= BUILD_WHEEL_ARGS ?= $(shell grep 'ARG BUILD_WHEEL_ARGS=' Dockerfile.multi | grep -o '=.*' | tr -d '="')$(if $(CUDA_ARCHS), --cuda_architectures $(CUDA_ARCHS))$(if $(BUILD_WHEEL_OPTS), $(BUILD_WHEEL_OPTS)) TORCH_INSTALL_TYPE ?= skip @@ -47,6 +43,8 @@ TRT_LLM_VERSION ?= $(shell grep '^__version__' ../tensorrt_llm/version.py | g GITHUB_MIRROR ?= PYTHON_VERSION ?= NGC_STAGING_REPO ?= nvcr.io/nvstaging/tensorrt-llm +NGC_REPO ?= nvcr.io/nvidia/tensorrt-llm +NGC_USE_STAGING ?= 0 define add_local_user docker build \ @@ -201,22 +199,29 @@ ngc-devel_%: IMAGE_TAG = $(TRT_LLM_VERSION) ngc-devel_push: DOCKER_BUILD_ARGS = --push ngc-devel_push: ngc-devel_build ; +ngc-devel_run: IMAGE_NAME = $(if $(filter 1,$(NGC_USE_STAGING)),$(NGC_STAGING_REPO),$(NGC_REPO)) + ngc-release_%: STAGE = release ngc-release_%: DOCKER_BUILD_OPTS = --pull --load --platform linux/$(PLATFORM) ngc-release_%: DEVEL_IMAGE = $(NGC_STAGING_REPO)/devel:$(TRT_LLM_VERSION) -ngc-release_%: IMAGE_NAME = nvcr.io/nvstaging/tensorrt-llm +ngc-release_%: IMAGE_NAME = $(NGC_STAGING_REPO) ngc-release_%: IMAGE_TAG = $(TRT_LLM_VERSION)-$(PLATFORM) +ngc-release_run: IMAGE_NAME = $(if $(filter 1,$(NGC_USE_STAGING)),$(NGC_STAGING_REPO),$(NGC_REPO)) +ngc-release_run: WORK_DIR = /app/tensorrt_llm + ngc-manifest_%: STAGE = release ngc-manifest_%: IMAGE_NAME = $(NGC_STAGING_REPO) ngc-manifest_%: IMAGE_TAG = $(TRT_LLM_VERSION) ngc-manifest_create: + docker pull $(IMAGE_WITH_TAG)-amd64 + docker pull $(IMAGE_WITH_TAG)-arm64 docker manifest create $(IMAGE_WITH_TAG) \ --amend $(IMAGE_WITH_TAG)-amd64 \ --amend $(IMAGE_WITH_TAG)-arm64 -ngc-manifest_push: +ngc-manifest_push: ngc-manifest_create docker manifest push $(IMAGE_WITH_TAG) build: devel_build ; diff --git a/docker/README.md b/docker/README.md index b4f1f144e5..d986b8c849 100644 --- a/docker/README.md +++ b/docker/README.md @@ -52,6 +52,28 @@ make -C docker release_build CUDA_ARCHS="80-real;90-real" For more build options, see the variables defined in [`Makefile`](Makefile). +### NGC Integration + +When building from source, one can conveniently download a docker image for development from +the [NVIDIA NGC Catalog](https://catalog.ngc.nvidia.com/) and start it like so: + +```bash +make -C docker ngc-devel_run LOCAL_USER=1 DOCKER_PULL=1 +``` + +As before, specifying `LOCAL_USER=1` will run the container with the local user's identity. Specifying `DOCKER_PULL=1` +is optional, but it will pull the latest image from the NGC Catalog. This will map the source code into the container +in the directory `/code/tensorrt_llm`. + +We also provide an image with pre-installed binaries for release. This can be used like so: + +```bash +make -C docker ngc-release_run LOCAL_USER=1 DOCKER_PULL=1 +``` + +If you want to deploy a specific version of TensorRT-LLM, you can specify the version with +`TRT_LLM_VERSION=`. The application examples and benchmarks are installed in `/app/tensorrt_llm`. + ### Jenkins Integration [`Makefile`](Makefile) has special targets for building, pushing and running the Docker build image used on Jenkins. @@ -91,14 +113,3 @@ make -C docker trtllm_run LOCAL_USER=1 DOCKER_PULL=1 The argument `DOCKER_PULL=1` instructs `make` to pull the latest version of the image before deploying it in the container. By default, images are tagged by their `git` branch name and may be frequently updated. - -### Binary Compatible Environment - -Currently, `BatchManager` is released as a closed source binary library. In order to make it deployable in a wider -scope, the compilation environment needs to be constructed in the following way. - -The compilation environment for x86_64 architecture - -```bash -make -C docker centos7_push -``` diff --git a/jenkins/L0_MergeRequest.groovy b/jenkins/L0_MergeRequest.groovy index 430cf10934..b7f4974b1e 100644 --- a/jenkins/L0_MergeRequest.groovy +++ b/jenkins/L0_MergeRequest.groovy @@ -21,10 +21,10 @@ UPLOAD_PATH = env.uploadPath ? env.uploadPath : "sw-tensorrt-generic/llm-artifac // Container configuration // available tags can be found in: https://urm.nvidia.com/artifactory/sw-tensorrt-docker/tensorrt-llm/ // [base_image_name]-[arch]-[os](-[python_version])-[trt_version]-[torch_install_type]-[stage]-[date]-[mr_id] -LLM_DOCKER_IMAGE = "urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm:pytorch-25.04-py3-x86_64-ubuntu24.04-trt10.10.0.31-skip-tritondevel-202505160532-3934" -LLM_SBSA_DOCKER_IMAGE = "urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm:pytorch-25.04-py3-aarch64-ubuntu24.04-trt10.10.0.31-skip-tritondevel-202505160532-3934" -LLM_ROCKYLINUX8_PY310_DOCKER_IMAGE = "urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm:cuda-12.9.0-devel-rocky8-x86_64-rocky8-py310-trt10.10.0.31-skip-tritondevel-202505160532-3934" -LLM_ROCKYLINUX8_PY312_DOCKER_IMAGE = "urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm:cuda-12.9.0-devel-rocky8-x86_64-rocky8-py312-trt10.10.0.31-skip-tritondevel-202505160532-3934" +LLM_DOCKER_IMAGE = "urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm:pytorch-25.04-py3-x86_64-ubuntu24.04-trt10.10.0.31-skip-tritondevel-202505191345-4400" +LLM_SBSA_DOCKER_IMAGE = "urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm:pytorch-25.04-py3-aarch64-ubuntu24.04-trt10.10.0.31-skip-tritondevel-202505191345-4400" +LLM_ROCKYLINUX8_PY310_DOCKER_IMAGE = "urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm:cuda-12.9.0-devel-rocky8-x86_64-rocky8-py310-trt10.10.0.31-skip-tritondevel-202505191345-4400" +LLM_ROCKYLINUX8_PY312_DOCKER_IMAGE = "urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm:cuda-12.9.0-devel-rocky8-x86_64-rocky8-py312-trt10.10.0.31-skip-tritondevel-202505191345-4400" // TODO: Move common variables to an unified location BUILD_CORES_REQUEST = "8" diff --git a/jenkins/controlCCache.groovy b/jenkins/controlCCache.groovy index d481c1aa1e..4f202fc1bf 100644 --- a/jenkins/controlCCache.groovy +++ b/jenkins/controlCCache.groovy @@ -1,7 +1,7 @@ import java.lang.InterruptedException -DOCKER_IMAGE = "urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm:pytorch-25.04-py3-x86_64-ubuntu24.04-trt10.10.0.31-skip-tritondevel-202505121727-4049" +DOCKER_IMAGE = "urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm:pytorch-25.04-py3-x86_64-ubuntu24.04-trt10.10.0.31-skip-tritondevel-202505191345-4400" def createKubernetesPodConfig(image) { diff --git a/requirements.txt b/requirements.txt index 88c9654921..feb106108a 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,4 +1,5 @@ --extra-index-url https://download.pytorch.org/whl/cu128 +-c constraints.txt accelerate>=0.25.0 build colored diff --git a/setup.py b/setup.py index 1e28393ced..1b06f0700f 100644 --- a/setup.py +++ b/setup.py @@ -31,7 +31,8 @@ def extract_url(line): extra_URLs = [] deps = [] for line in requirements: - if line.startswith("#") or line.startswith("-r"): + if line.startswith("#") or line.startswith("-r") or line.startswith( + "-c"): continue # handle -i and --extra-index-url options @@ -87,6 +88,10 @@ def has_ext_modules(self): devel_deps, _ = parse_requirements( Path("requirements-dev-windows.txt" if on_windows else "requirements-dev.txt")) +constraints_file = Path("constraints.txt") +if constraints_file.exists(): + constraints, _ = parse_requirements(constraints_file) + required_deps.extend(constraints) if on_windows: package_data = [