Skip to content

doc: [TRTLLM-325]Integrate the NGC image in Makefile automation and document #4400

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 5 commits into from
May 20, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .devcontainer/docker-compose.yml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
version: "3.9"
services:
tensorrt_llm-dev:
image: urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm:pytorch-25.04-py3-x86_64-ubuntu24.04-trt10.10.0.31-skip-tritondevel-202505160532-3934
image: urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm:pytorch-25.04-py3-x86_64-ubuntu24.04-trt10.10.0.31-skip-tritondevel-202505191345-4400
network_mode: host
ipc: host

Expand Down
2 changes: 2 additions & 0 deletions constraints.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
# WAR against https://github.com/advisories/GHSA-vqfr-h8mv-ghfj
h11>=0.16.0
5 changes: 4 additions & 1 deletion docker/Dockerfile.multi
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,9 @@ RUN bash ./install_pytorch.sh $TORCH_INSTALL_TYPE && rm install_pytorch.sh
RUN pip3 uninstall -y opencv && rm -rf /usr/local/lib/python3*/dist-packages/cv2/
RUN pip3 install opencv-python-headless --force-reinstall --no-deps --no-cache-dir

# WAR against https://github.com/advisories/GHSA-vqfr-h8mv-ghfj
RUN pip3 install --upgrade h11>=0.16 --no-cache-dir


FROM ${TRITON_IMAGE}:${TRITON_BASE_TAG} AS triton

Expand Down Expand Up @@ -102,7 +105,7 @@ COPY benchmarks benchmarks
COPY scripts scripts
COPY tensorrt_llm tensorrt_llm
COPY 3rdparty 3rdparty
COPY .gitmodules setup.py requirements.txt requirements-dev.txt ./
COPY .gitmodules setup.py requirements.txt requirements-dev.txt constraints.txt ./

# Create cache directories for pip and ccache
RUN mkdir -p /root/.cache/pip /root/.cache/ccache
Expand Down
19 changes: 12 additions & 7 deletions docker/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -28,12 +28,8 @@ PUSH_TO_STAGING ?= 1
DOCKER_BUILD_OPTS ?= --pull --load
DOCKER_BUILD_ARGS ?=
DOCKER_PROGRESS ?= auto
CUDA_ARCHS ?=
PLATFORM ?= $(shell uname -m | grep -q 'aarch64' && echo "arm64" || echo "amd64")
ifeq ($(PLATFORM), arm64)
CUDA_ARCHS = '90-real;100-real;120-real'
endif

CUDA_ARCHS ?= $(if $(filter arm64,$(PLATFORM)),'90-real;100-real;120-real',)
BUILD_WHEEL_OPTS ?=
BUILD_WHEEL_ARGS ?= $(shell grep 'ARG BUILD_WHEEL_ARGS=' Dockerfile.multi | grep -o '=.*' | tr -d '="')$(if $(CUDA_ARCHS), --cuda_architectures $(CUDA_ARCHS))$(if $(BUILD_WHEEL_OPTS), $(BUILD_WHEEL_OPTS))
TORCH_INSTALL_TYPE ?= skip
Expand All @@ -47,6 +43,8 @@ TRT_LLM_VERSION ?= $(shell grep '^__version__' ../tensorrt_llm/version.py | g
GITHUB_MIRROR ?=
PYTHON_VERSION ?=
NGC_STAGING_REPO ?= nvcr.io/nvstaging/tensorrt-llm
NGC_REPO ?= nvcr.io/nvidia/tensorrt-llm
NGC_USE_STAGING ?= 0

define add_local_user
docker build \
Expand Down Expand Up @@ -201,22 +199,29 @@ ngc-devel_%: IMAGE_TAG = $(TRT_LLM_VERSION)
ngc-devel_push: DOCKER_BUILD_ARGS = --push
ngc-devel_push: ngc-devel_build ;

ngc-devel_run: IMAGE_NAME = $(if $(filter 1,$(NGC_USE_STAGING)),$(NGC_STAGING_REPO),$(NGC_REPO))

ngc-release_%: STAGE = release
ngc-release_%: DOCKER_BUILD_OPTS = --pull --load --platform linux/$(PLATFORM)
ngc-release_%: DEVEL_IMAGE = $(NGC_STAGING_REPO)/devel:$(TRT_LLM_VERSION)
ngc-release_%: IMAGE_NAME = nvcr.io/nvstaging/tensorrt-llm
ngc-release_%: IMAGE_NAME = $(NGC_STAGING_REPO)
ngc-release_%: IMAGE_TAG = $(TRT_LLM_VERSION)-$(PLATFORM)

ngc-release_run: IMAGE_NAME = $(if $(filter 1,$(NGC_USE_STAGING)),$(NGC_STAGING_REPO),$(NGC_REPO))
ngc-release_run: WORK_DIR = /app/tensorrt_llm

ngc-manifest_%: STAGE = release
ngc-manifest_%: IMAGE_NAME = $(NGC_STAGING_REPO)
ngc-manifest_%: IMAGE_TAG = $(TRT_LLM_VERSION)

ngc-manifest_create:
docker pull $(IMAGE_WITH_TAG)-amd64
docker pull $(IMAGE_WITH_TAG)-arm64
docker manifest create $(IMAGE_WITH_TAG) \
--amend $(IMAGE_WITH_TAG)-amd64 \
--amend $(IMAGE_WITH_TAG)-arm64

ngc-manifest_push:
ngc-manifest_push: ngc-manifest_create
docker manifest push $(IMAGE_WITH_TAG)

build: devel_build ;
Expand Down
33 changes: 22 additions & 11 deletions docker/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,28 @@ make -C docker release_build CUDA_ARCHS="80-real;90-real"

For more build options, see the variables defined in [`Makefile`](Makefile).

### NGC Integration

When building from source, one can conveniently download a docker image for development from
the [NVIDIA NGC Catalog](https://catalog.ngc.nvidia.com/) and start it like so:

```bash
make -C docker ngc-devel_run LOCAL_USER=1 DOCKER_PULL=1
```

As before, specifying `LOCAL_USER=1` will run the container with the local user's identity. Specifying `DOCKER_PULL=1`
is optional, but it will pull the latest image from the NGC Catalog. This will map the source code into the container
in the directory `/code/tensorrt_llm`.

We also provide an image with pre-installed binaries for release. This can be used like so:

```bash
make -C docker ngc-release_run LOCAL_USER=1 DOCKER_PULL=1
```

If you want to deploy a specific version of TensorRT-LLM, you can specify the version with
`TRT_LLM_VERSION=<version_tag>`. The application examples and benchmarks are installed in `/app/tensorrt_llm`.

### Jenkins Integration

[`Makefile`](Makefile) has special targets for building, pushing and running the Docker build image used on Jenkins.
Expand Down Expand Up @@ -91,14 +113,3 @@ make -C docker trtllm_run LOCAL_USER=1 DOCKER_PULL=1

The argument `DOCKER_PULL=1` instructs `make` to pull the latest version of the image before deploying it in the container.
By default, images are tagged by their `git` branch name and may be frequently updated.

### Binary Compatible Environment

Currently, `BatchManager` is released as a closed source binary library. In order to make it deployable in a wider
scope, the compilation environment needs to be constructed in the following way.

The compilation environment for x86_64 architecture

```bash
make -C docker centos7_push
```
8 changes: 4 additions & 4 deletions jenkins/L0_MergeRequest.groovy
Original file line number Diff line number Diff line change
Expand Up @@ -21,10 +21,10 @@ UPLOAD_PATH = env.uploadPath ? env.uploadPath : "sw-tensorrt-generic/llm-artifac
// Container configuration
// available tags can be found in: https://urm.nvidia.com/artifactory/sw-tensorrt-docker/tensorrt-llm/
// [base_image_name]-[arch]-[os](-[python_version])-[trt_version]-[torch_install_type]-[stage]-[date]-[mr_id]
LLM_DOCKER_IMAGE = "urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm:pytorch-25.04-py3-x86_64-ubuntu24.04-trt10.10.0.31-skip-tritondevel-202505160532-3934"
LLM_SBSA_DOCKER_IMAGE = "urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm:pytorch-25.04-py3-aarch64-ubuntu24.04-trt10.10.0.31-skip-tritondevel-202505160532-3934"
LLM_ROCKYLINUX8_PY310_DOCKER_IMAGE = "urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm:cuda-12.9.0-devel-rocky8-x86_64-rocky8-py310-trt10.10.0.31-skip-tritondevel-202505160532-3934"
LLM_ROCKYLINUX8_PY312_DOCKER_IMAGE = "urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm:cuda-12.9.0-devel-rocky8-x86_64-rocky8-py312-trt10.10.0.31-skip-tritondevel-202505160532-3934"
LLM_DOCKER_IMAGE = "urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm:pytorch-25.04-py3-x86_64-ubuntu24.04-trt10.10.0.31-skip-tritondevel-202505191345-4400"
LLM_SBSA_DOCKER_IMAGE = "urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm:pytorch-25.04-py3-aarch64-ubuntu24.04-trt10.10.0.31-skip-tritondevel-202505191345-4400"
LLM_ROCKYLINUX8_PY310_DOCKER_IMAGE = "urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm:cuda-12.9.0-devel-rocky8-x86_64-rocky8-py310-trt10.10.0.31-skip-tritondevel-202505191345-4400"
LLM_ROCKYLINUX8_PY312_DOCKER_IMAGE = "urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm:cuda-12.9.0-devel-rocky8-x86_64-rocky8-py312-trt10.10.0.31-skip-tritondevel-202505191345-4400"

// TODO: Move common variables to an unified location
BUILD_CORES_REQUEST = "8"
Expand Down
2 changes: 1 addition & 1 deletion jenkins/controlCCache.groovy
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@

import java.lang.InterruptedException

DOCKER_IMAGE = "urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm:pytorch-25.04-py3-x86_64-ubuntu24.04-trt10.10.0.31-skip-tritondevel-202505121727-4049"
DOCKER_IMAGE = "urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm:pytorch-25.04-py3-x86_64-ubuntu24.04-trt10.10.0.31-skip-tritondevel-202505191345-4400"

def createKubernetesPodConfig(image)
{
Expand Down
1 change: 1 addition & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
--extra-index-url https://download.pytorch.org/whl/cu128
-c constraints.txt
accelerate>=0.25.0
build
colored
Expand Down
7 changes: 6 additions & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,8 @@ def extract_url(line):
extra_URLs = []
deps = []
for line in requirements:
if line.startswith("#") or line.startswith("-r"):
if line.startswith("#") or line.startswith("-r") or line.startswith(
"-c"):
continue

# handle -i and --extra-index-url options
Expand Down Expand Up @@ -87,6 +88,10 @@ def has_ext_modules(self):
devel_deps, _ = parse_requirements(
Path("requirements-dev-windows.txt"
if on_windows else "requirements-dev.txt"))
constraints_file = Path("constraints.txt")
if constraints_file.exists():
constraints, _ = parse_requirements(constraints_file)
required_deps.extend(constraints)

if on_windows:
package_data = [
Expand Down