NVIDIA · MartinMarciniszyn · May 20, 2025 · May 16, 2025 · May 16, 2025 · May 19, 2025
diff --git a/.devcontainer/docker-compose.yml b/.devcontainer/docker-compose.yml
@@ -1,7 +1,7 @@
 version: "3.9"
 services:
   tensorrt_llm-dev:
-    image: urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm:pytorch-25.04-py3-x86_64-ubuntu24.04-trt10.10.0.31-skip-tritondevel-202505160532-3934
+    image: urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm:pytorch-25.04-py3-x86_64-ubuntu24.04-trt10.10.0.31-skip-tritondevel-202505191345-4400
     network_mode: host
     ipc: host
 

diff --git a/constraints.txt b/constraints.txt
@@ -0,0 +1,2 @@
+# WAR against https://github.com/advisories/GHSA-vqfr-h8mv-ghfj
+h11>=0.16.0
diff --git a/docker/Dockerfile.multi b/docker/Dockerfile.multi
@@ -72,6 +72,9 @@ RUN bash ./install_pytorch.sh $TORCH_INSTALL_TYPE && rm install_pytorch.sh
 RUN pip3 uninstall -y opencv && rm -rf /usr/local/lib/python3*/dist-packages/cv2/
 RUN pip3 install opencv-python-headless --force-reinstall --no-deps --no-cache-dir
 
+# WAR against https://github.com/advisories/GHSA-vqfr-h8mv-ghfj
+RUN pip3 install --upgrade h11>=0.16 --no-cache-dir
+
 
 FROM ${TRITON_IMAGE}:${TRITON_BASE_TAG} AS triton
 
@@ -102,7 +105,7 @@ COPY benchmarks benchmarks
 COPY scripts scripts
 COPY tensorrt_llm tensorrt_llm
 COPY 3rdparty 3rdparty
-COPY .gitmodules setup.py requirements.txt requirements-dev.txt ./
+COPY .gitmodules setup.py requirements.txt requirements-dev.txt constraints.txt ./
 
 # Create cache directories for pip and ccache
 RUN mkdir -p /root/.cache/pip /root/.cache/ccache

diff --git a/docker/Makefile b/docker/Makefile
@@ -28,12 +28,8 @@ PUSH_TO_STAGING    ?= 1
 DOCKER_BUILD_OPTS  ?= --pull --load
 DOCKER_BUILD_ARGS  ?=
 DOCKER_PROGRESS    ?= auto
-CUDA_ARCHS         ?=
 PLATFORM           ?= $(shell uname -m | grep -q 'aarch64' && echo "arm64" || echo "amd64")
-ifeq ($(PLATFORM), arm64)
-  CUDA_ARCHS = '90-real;100-real;120-real'
-endif
-
+CUDA_ARCHS         ?= $(if $(filter arm64,$(PLATFORM)),'90-real;100-real;120-real',)
 BUILD_WHEEL_OPTS   ?=
 BUILD_WHEEL_ARGS   ?= $(shell grep 'ARG BUILD_WHEEL_ARGS=' Dockerfile.multi | grep -o '=.*' | tr -d '="')$(if $(CUDA_ARCHS), --cuda_architectures $(CUDA_ARCHS))$(if $(BUILD_WHEEL_OPTS), $(BUILD_WHEEL_OPTS))
 TORCH_INSTALL_TYPE ?= skip
@@ -47,6 +43,8 @@ TRT_LLM_VERSION    ?= $(shell grep '^__version__' ../tensorrt_llm/version.py | g
 GITHUB_MIRROR      ?=
 PYTHON_VERSION     ?=
 NGC_STAGING_REPO   ?= nvcr.io/nvstaging/tensorrt-llm
+NGC_REPO           ?= nvcr.io/nvidia/tensorrt-llm
+NGC_USE_STAGING    ?= 0
 
 define add_local_user
 	docker build \
@@ -201,22 +199,29 @@ ngc-devel_%: IMAGE_TAG = $(TRT_LLM_VERSION)
 ngc-devel_push: DOCKER_BUILD_ARGS = --push
 ngc-devel_push: ngc-devel_build ;
 
+ngc-devel_run: IMAGE_NAME = $(if $(filter 1,$(NGC_USE_STAGING)),$(NGC_STAGING_REPO),$(NGC_REPO))
+
 ngc-release_%: STAGE = release
 ngc-release_%: DOCKER_BUILD_OPTS = --pull --load --platform linux/$(PLATFORM)
 ngc-release_%: DEVEL_IMAGE = $(NGC_STAGING_REPO)/devel:$(TRT_LLM_VERSION)
-ngc-release_%: IMAGE_NAME = nvcr.io/nvstaging/tensorrt-llm
+ngc-release_%: IMAGE_NAME = $(NGC_STAGING_REPO)
 ngc-release_%: IMAGE_TAG = $(TRT_LLM_VERSION)-$(PLATFORM)
 
+ngc-release_run: IMAGE_NAME = $(if $(filter 1,$(NGC_USE_STAGING)),$(NGC_STAGING_REPO),$(NGC_REPO))
+ngc-release_run: WORK_DIR = /app/tensorrt_llm
+
 ngc-manifest_%: STAGE = release
 ngc-manifest_%: IMAGE_NAME = $(NGC_STAGING_REPO)
 ngc-manifest_%: IMAGE_TAG = $(TRT_LLM_VERSION)
 
 ngc-manifest_create:
+	docker pull $(IMAGE_WITH_TAG)-amd64
+	docker pull $(IMAGE_WITH_TAG)-arm64
 	docker manifest create $(IMAGE_WITH_TAG) \
   		--amend $(IMAGE_WITH_TAG)-amd64 \
   		--amend $(IMAGE_WITH_TAG)-arm64
 
-ngc-manifest_push:
+ngc-manifest_push: ngc-manifest_create
 	docker manifest push $(IMAGE_WITH_TAG)
 
 build: devel_build ;

diff --git a/docker/README.md b/docker/README.md
@@ -52,6 +52,28 @@ make -C docker release_build CUDA_ARCHS="80-real;90-real"
 
 For more build options, see the variables defined in [`Makefile`](Makefile).
 
+### NGC Integration
+
+When building from source, one can conveniently download a docker image for development from
+the [NVIDIA NGC Catalog](https://catalog.ngc.nvidia.com/) and start it like so:
+
+```bash
+make -C docker ngc-devel_run LOCAL_USER=1 DOCKER_PULL=1
+```
+
+As before, specifying `LOCAL_USER=1` will run the container with the local user's identity. Specifying `DOCKER_PULL=1`
+is optional, but it will pull the latest image from the NGC Catalog. This will map the source code into the container
+in the directory `/code/tensorrt_llm`.
+
+We also provide an image with pre-installed binaries for release. This can be used like so:
+
+```bash
+make -C docker ngc-release_run LOCAL_USER=1 DOCKER_PULL=1
+```
+
+If you want to deploy a specific version of TensorRT-LLM, you can specify the version with
+`TRT_LLM_VERSION=<version_tag>`. The application examples and benchmarks are installed in `/app/tensorrt_llm`.
+
 ### Jenkins Integration
 
 [`Makefile`](Makefile) has special targets for building, pushing and running the Docker build image used on Jenkins.
@@ -91,14 +113,3 @@ make -C docker trtllm_run LOCAL_USER=1 DOCKER_PULL=1
 
 The argument `DOCKER_PULL=1` instructs `make` to pull the latest version of the image before deploying it in the container.
 By default, images are tagged by their `git` branch name and may be frequently updated.
-
-### Binary Compatible Environment
-
-Currently, `BatchManager` is released as a closed source binary library. In order to make it deployable in a wider
-scope, the compilation environment needs to be constructed in the following way.
-
-The compilation environment for x86_64 architecture
-
-```bash
-make -C docker centos7_push
-```
diff --git a/jenkins/L0_MergeRequest.groovy b/jenkins/L0_MergeRequest.groovy
@@ -21,10 +21,10 @@ UPLOAD_PATH = env.uploadPath ? env.uploadPath : "sw-tensorrt-generic/llm-artifac
 // Container configuration
 // available tags can be found in: https://urm.nvidia.com/artifactory/sw-tensorrt-docker/tensorrt-llm/
 // [base_image_name]-[arch]-[os](-[python_version])-[trt_version]-[torch_install_type]-[stage]-[date]-[mr_id]
-LLM_DOCKER_IMAGE = "urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm:pytorch-25.04-py3-x86_64-ubuntu24.04-trt10.10.0.31-skip-tritondevel-202505160532-3934"
-LLM_SBSA_DOCKER_IMAGE = "urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm:pytorch-25.04-py3-aarch64-ubuntu24.04-trt10.10.0.31-skip-tritondevel-202505160532-3934"
-LLM_ROCKYLINUX8_PY310_DOCKER_IMAGE = "urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm:cuda-12.9.0-devel-rocky8-x86_64-rocky8-py310-trt10.10.0.31-skip-tritondevel-202505160532-3934"
-LLM_ROCKYLINUX8_PY312_DOCKER_IMAGE = "urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm:cuda-12.9.0-devel-rocky8-x86_64-rocky8-py312-trt10.10.0.31-skip-tritondevel-202505160532-3934"
+LLM_DOCKER_IMAGE = "urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm:pytorch-25.04-py3-x86_64-ubuntu24.04-trt10.10.0.31-skip-tritondevel-202505191345-4400"
+LLM_SBSA_DOCKER_IMAGE = "urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm:pytorch-25.04-py3-aarch64-ubuntu24.04-trt10.10.0.31-skip-tritondevel-202505191345-4400"
+LLM_ROCKYLINUX8_PY310_DOCKER_IMAGE = "urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm:cuda-12.9.0-devel-rocky8-x86_64-rocky8-py310-trt10.10.0.31-skip-tritondevel-202505191345-4400"
+LLM_ROCKYLINUX8_PY312_DOCKER_IMAGE = "urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm:cuda-12.9.0-devel-rocky8-x86_64-rocky8-py312-trt10.10.0.31-skip-tritondevel-202505191345-4400"
 
 // TODO: Move common variables to an unified location
 BUILD_CORES_REQUEST = "8"

diff --git a/jenkins/controlCCache.groovy b/jenkins/controlCCache.groovy
@@ -1,7 +1,7 @@
 
 import java.lang.InterruptedException
 
-DOCKER_IMAGE = "urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm:pytorch-25.04-py3-x86_64-ubuntu24.04-trt10.10.0.31-skip-tritondevel-202505121727-4049"
+DOCKER_IMAGE = "urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm:pytorch-25.04-py3-x86_64-ubuntu24.04-trt10.10.0.31-skip-tritondevel-202505191345-4400"
 
 def createKubernetesPodConfig(image)
 {

diff --git a/requirements.txt b/requirements.txt
@@ -1,4 +1,5 @@
 --extra-index-url https://download.pytorch.org/whl/cu128
+-c constraints.txt
 accelerate>=0.25.0
 build
 colored

diff --git a/setup.py b/setup.py
@@ -31,7 +31,8 @@ def extract_url(line):
         extra_URLs = []
         deps = []
         for line in requirements:
-            if line.startswith("#") or line.startswith("-r"):
+            if line.startswith("#") or line.startswith("-r") or line.startswith(
+                    "-c"):
                 continue
 
             # handle -i and --extra-index-url options
@@ -87,6 +88,10 @@ def has_ext_modules(self):
 devel_deps, _ = parse_requirements(
     Path("requirements-dev-windows.txt"
          if on_windows else "requirements-dev.txt"))
+constraints_file = Path("constraints.txt")
+if constraints_file.exists():
+    constraints, _ = parse_requirements(constraints_file)
+    required_deps.extend(constraints)
 
 if on_windows:
     package_data = [
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1,2 @@
		# WAR against https://github.com/advisories/GHSA-vqfr-h8mv-ghfj
		h11>=0.16.0