Skip to content

Commit ff92545

Browse files
a few CI changes (#3612)
1 parent 0e9d884 commit ff92545

12 files changed

+45
-45
lines changed

.github/scripts/install-cuda-aarch64.sh

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,12 +3,16 @@ install_cuda_aarch64() {
33
echo "install cuda ${CU_VERSION}"
44
# CU_VERSION: cu128 --> CU_VER: 12-8
55
CU_VER=${CU_VERSION:2:2}-${CU_VERSION:4:1}
6+
# CU_VERSION: cu129 --> CU_DOT_VER: 12.9
7+
CU_DOT_VER=${CU_VERSION:2:2}.${CU_VERSION:4:1}
68
dnf config-manager --add-repo https://developer.download.nvidia.com/compute/cuda/repos/rhel8/sbsa/cuda-rhel8.repo
9+
# nccl version must match libtorch_cuda.so was built with https://github.com/pytorch/pytorch/blob/main/.ci/docker/ci_commit_pins/nccl-cu12.txt
710
dnf -y install cuda-compiler-${CU_VER}.aarch64 \
811
cuda-libraries-${CU_VER}.aarch64 \
9-
cuda-libraries-devel-${CU_VER}.aarch64
12+
cuda-libraries-devel-${CU_VER}.aarch64 \
13+
libnccl-2.27.3-1+cuda${CU_DOT_VER} libnccl-devel-2.27.3-1+cuda${CU_DOT_VER} libnccl-static-2.27.3-1+cuda${CU_DOT_VER}
1014
dnf clean all
11-
export LD_LIBRARY_PATH=/usr/local/cuda/lib64:$LD_LIBRARY_PATH
15+
export LD_LIBRARY_PATH=/usr/local/cuda/lib64:/usr/lib64:$LD_LIBRARY_PATH
1216
ls -lart /usr/local/
1317
nvcc --version
1418
echo "cuda ${CU_VER} installed successfully"

.github/scripts/install-torch-tensorrt.sh

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
set -x
33

44
TORCH=$(grep "^torch>" ${PWD}/py/requirements.txt)
5-
TORCHVISION=$(grep "^torchvision" ${PWD}/py/requirements.txt)
5+
TORCHVISION=$(grep "^torchvision>" ${PWD}/tests/py/requirements.txt)
66
INDEX_URL=https://download.pytorch.org/whl/${CHANNEL}/${CU_VERSION}
77
PLATFORM=$(python -c "import sys; print(sys.platform)")
88

@@ -14,8 +14,12 @@ fi
1414

1515
# Install all the dependencies required for Torch-TensorRT
1616
pip install --pre -r ${PWD}/tests/py/requirements.txt
17+
# dependencies in the tests/py/requirements.txt might install a different version of torch or torchvision
18+
# eg. timm will install the latest torchvision, however we want to use the torchvision from nightly
19+
# reinstall torch torchvisionto make sure we have the correct version
20+
pip uninstall -y torch torchvision
21+
pip install --force-reinstall --pre ${TORCHVISION} --index-url ${INDEX_URL} --no-deps
1722
pip install --force-reinstall --pre ${TORCH} --index-url ${INDEX_URL}
18-
pip install --force-reinstall --pre ${TORCHVISION} --index-url ${INDEX_URL}
1923

2024

2125
# Install Torch-TensorRT

.github/workflows/build-test-linux-x86_64.yml

Lines changed: 1 addition & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -138,22 +138,7 @@ jobs:
138138
export USE_HOST_DEPS=1
139139
export CI_BUILD=1
140140
pushd .
141-
cd tests/py
142-
major=${PYTHON_VERSION%%.*}
143-
minor=${PYTHON_VERSION#*.}
144-
minor=${minor%%.*}
145-
if (( major > 3 || (major == 3 && minor >= 13) )); then
146-
echo "flashinfer-python is not supported for python version 3.13 or higher"
147-
else
148-
echo "Installing flashinfer-python"
149-
# flashinfer-python is broken on python 3.9 at the moment, so we skip it for now
150-
if (major == 3 && minor == 9); then
151-
echo "Skipping flashinfer-python for python 3.9"
152-
else
153-
python -m pip install flashinfer-python --no-deps
154-
fi
155-
fi
156-
cd dynamo
141+
cd tests/py/dynamo
157142
python -m pytest -ra --junitxml=${RUNNER_TEST_RESULTS_DIR}/dynamo_converters_test_results.xml -n 4 conversion/
158143
python -m pytest -ra --junitxml=${RUNNER_TEST_RESULTS_DIR}/dynamo_converters_test_results.xml automatic_plugin/test_automatic_plugin.py
159144
python -m pytest -ra --junitxml=${RUNNER_TEST_RESULTS_DIR}/dynamo_converters_test_results.xml automatic_plugin/test_automatic_plugin_with_attrs.py

.github/workflows/build-test-tensorrt-linux.yml

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -94,6 +94,7 @@ jobs:
9494
build-matrix: ${{ needs.generate-tensorrt-matrix.outputs.matrix }}
9595
pre-script: ${{ matrix.pre-script }}
9696
script: |
97+
set -euo pipefail
9798
export USE_HOST_DEPS=1
9899
export CI_BUILD=1
99100
export LD_LIBRARY_PATH=/usr/lib64:$LD_LIBRARY_PATH
@@ -130,6 +131,7 @@ jobs:
130131
build-matrix: ${{ needs.generate-tensorrt-matrix.outputs.matrix }}
131132
pre-script: ${{ matrix.pre-script }}
132133
script: |
134+
set -euo pipefail
133135
export USE_HOST_DEPS=1
134136
export CI_BUILD=1
135137
pushd .
@@ -160,6 +162,7 @@ jobs:
160162
build-matrix: ${{ needs.generate-tensorrt-matrix.outputs.matrix }}
161163
pre-script: ${{ matrix.pre-script }}
162164
script: |
165+
set -euo pipefail
163166
export USE_HOST_DEPS=1
164167
export CI_BUILD=1
165168
pushd .
@@ -190,6 +193,7 @@ jobs:
190193
build-matrix: ${{ needs.generate-tensorrt-matrix.outputs.matrix }}
191194
pre-script: ${{ matrix.pre-script }}
192195
script: |
196+
set -euo pipefail
193197
export USE_HOST_DEPS=1
194198
export CI_BUILD=1
195199
pushd .
@@ -220,6 +224,7 @@ jobs:
220224
build-matrix: ${{ needs.generate-tensorrt-matrix.outputs.matrix }}
221225
pre-script: ${{ matrix.pre-script }}
222226
script: |
227+
set -euo pipefail
223228
export USE_HOST_DEPS=1
224229
export CI_BUILD=1
225230
pushd .
@@ -252,6 +257,7 @@ jobs:
252257
build-matrix: ${{ needs.generate-tensorrt-matrix.outputs.matrix }}
253258
pre-script: ${{ matrix.pre-script }}
254259
script: |
260+
set -euo pipefail
255261
export USE_HOST_DEPS=1
256262
export CI_BUILD=1
257263
pushd .
@@ -284,6 +290,7 @@ jobs:
284290
build-matrix: ${{ needs.generate-tensorrt-matrix.outputs.matrix }}
285291
pre-script: ${{ matrix.pre-script }}
286292
script: |
293+
set -euo pipefail
287294
export USE_HOST_DEPS=1
288295
export CI_BUILD=1
289296
pushd .
@@ -316,6 +323,7 @@ jobs:
316323
build-matrix: ${{ needs.generate-tensorrt-matrix.outputs.matrix }}
317324
pre-script: ${{ matrix.pre-script }}
318325
script: |
326+
set -euo pipefail
319327
export USE_HOST_DEPS=1
320328
export CI_BUILD=1
321329
pushd .

.github/workflows/build-test-tensorrt-windows.yml

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -101,6 +101,7 @@ jobs:
101101
build-matrix: ${{ needs.substitute-runner.outputs.matrix }}
102102
pre-script: packaging/driver_upgrade.bat
103103
script: |
104+
set -euo pipefail
104105
export USE_HOST_DEPS=1
105106
export CI_BUILD=1
106107
pushd .
@@ -133,6 +134,7 @@ jobs:
133134
build-matrix: ${{ needs.substitute-runner.outputs.matrix }}
134135
pre-script: packaging/driver_upgrade.bat
135136
script: |
137+
set -euo pipefail
136138
export USE_HOST_DEPS=1
137139
export CI_BUILD=1
138140
pushd .
@@ -160,6 +162,7 @@ jobs:
160162
build-matrix: ${{ needs.substitute-runner.outputs.matrix }}
161163
pre-script: packaging/driver_upgrade.bat
162164
script: |
165+
set -euo pipefail
163166
export USE_HOST_DEPS=1
164167
export CI_BUILD=1
165168
pushd .
@@ -187,6 +190,7 @@ jobs:
187190
build-matrix: ${{ needs.substitute-runner.outputs.matrix }}
188191
pre-script: packaging/driver_upgrade.bat
189192
script: |
193+
set -euo pipefail
190194
export USE_HOST_DEPS=1
191195
export CI_BUILD=1
192196
pushd .
@@ -214,6 +218,7 @@ jobs:
214218
build-matrix: ${{ needs.substitute-runner.outputs.matrix }}
215219
pre-script: packaging/driver_upgrade.bat
216220
script: |
221+
set -euo pipefail
217222
export USE_HOST_DEPS=1
218223
export CI_BUILD=1
219224
pushd .
@@ -243,6 +248,7 @@ jobs:
243248
build-matrix: ${{ needs.substitute-runner.outputs.matrix }}
244249
pre-script: packaging/driver_upgrade.bat
245250
script: |
251+
set -euo pipefail
246252
export USE_HOST_DEPS=1
247253
export CI_BUILD=1
248254
pushd .
@@ -272,6 +278,7 @@ jobs:
272278
build-matrix: ${{ needs.substitute-runner.outputs.matrix }}
273279
pre-script: packaging/driver_upgrade.bat
274280
script: |
281+
set -euo pipefail
275282
export USE_HOST_DEPS=1
276283
export CI_BUILD=1
277284
pushd .
@@ -300,6 +307,7 @@ jobs:
300307
build-matrix: ${{ needs.substitute-runner.outputs.matrix }}
301308
pre-script: packaging/driver_upgrade.bat
302309
script: |
310+
set -euo pipefail
303311
export USE_HOST_DEPS=1
304312
export CI_BUILD=1
305313
pushd .

.github/workflows/windows-test.yml

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,10 @@ on:
3939
description: "Prevents a job from failing when a step fails. Set to true to allow a job to pass when exec script step fails."
4040
default: false
4141
type: boolean
42-
42+
architecture:
43+
description: 'CPU architecture to build for'
44+
default: "x64"
45+
type: string
4346
jobs:
4447
test:
4548
strategy:
@@ -107,7 +110,7 @@ jobs:
107110
if: ${{ matrix.tensorrt == '' }}
108111
uses: actions/download-artifact@v4
109112
with:
110-
name: ${{ env.ARTIFACT_NAME }}
113+
name: ${{ env.ARTIFACT_NAME }}${{ inputs.architecture }}
111114
path: ${{ runner.temp }}/artifacts/
112115
- name: Download artifacts
113116
if: ${{ matrix.tensorrt != '' }}

MODULE.bazel

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -66,7 +66,7 @@ http_archive(
6666
name = "libtorch",
6767
build_file = "@//third_party/libtorch:BUILD",
6868
strip_prefix = "libtorch",
69-
urls = ["https://download.pytorch.org/libtorch/nightly/cu128/libtorch-cxx11-abi-shared-with-deps-latest.zip"],
69+
urls = ["https://download.pytorch.org/libtorch/nightly/cu128/libtorch-shared-with-deps-latest.zip"],
7070
)
7171

7272
# in aarch64 platform you can get libtorch via either local or wheel file

packaging/pre_build_script.sh

Lines changed: 0 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -66,14 +66,6 @@ if [[ ${TENSORRT_VERSION} != "" ]]; then
6666
pyproject.toml
6767
fi
6868

69-
if [[ "${CU_VERSION::4}" < "cu12" ]]; then
70-
# replace dependencies from tensorrt-cu12-bindings/libs to tensorrt-cu11-bindings/libs
71-
sed -i -e "s/tensorrt-cu12/tensorrt-${CU_VERSION::4}/g" \
72-
-e "s/tensorrt-cu12-bindings/tensorrt-${CU_VERSION::4}-bindings/g" \
73-
-e "s/tensorrt-cu12-libs/tensorrt-${CU_VERSION::4}-libs/g" \
74-
pyproject.toml
75-
fi
76-
7769
cat toolchains/ci_workspaces/MODULE.bazel.tmpl | envsubst > MODULE.bazel
7870

7971
if [[ ${TENSORRT_VERSION} != "" ]]; then

packaging/pre_build_script_windows.sh

Lines changed: 0 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -17,14 +17,6 @@ if [[ ${TENSORRT_VERSION} != "" ]]; then
1717
pyproject.toml
1818
fi
1919

20-
if [[ "${CU_VERSION::4}" < "cu12" ]]; then
21-
# replace dependencies from tensorrt-cu12-bindings/libs to tensorrt-cu11-bindings/libs
22-
sed -i -e "s/tensorrt-cu12/tensorrt-${CU_VERSION::4}/g" \
23-
-e "s/tensorrt-cu12-bindings/tensorrt-${CU_VERSION::4}-bindings/g" \
24-
-e "s/tensorrt-cu12-libs/tensorrt-${CU_VERSION::4}-libs/g" \
25-
pyproject.toml
26-
fi
27-
2820
TORCH=$(grep "^torch>" py/requirements.txt)
2921
INDEX_URL=https://download.pytorch.org/whl/${CHANNEL}/${CU_VERSION}
3022

py/requirements.txt

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,8 @@
11
numpy
22
packaging
33
pybind11==2.6.2
4-
--extra-index-url https://download.pytorch.org/whl/nightly/cu128
4+
--extra-index-url https://download.pytorch.org/whl/nightly/cu129
55
torch>=2.8.0.dev,<2.9.0
6-
torchvision>=0.22.0.dev,<0.23.0
76
--extra-index-url https://pypi.ngc.nvidia.com
87
pyyaml
98
dllist

tests/py/requirements.txt

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,12 @@ parameterized>=0.2.0
77
pytest>=8.2.1
88
pytest-xdist>=3.6.1
99
pyyaml
10-
timm>=1.0.3
1110
transformers==4.49.0
12-
nvidia-modelopt[all]~=0.27.0; python_version >'3.9' and python_version <'3.13'
13-
--extra-index-url https://pypi.nvidia.com
11+
nvidia-modelopt[all]; python_version >'3.9' and python_version <'3.13'
12+
--extra-index-url https://pypi.nvidia.com
13+
# flashinfer-python is not supported for python version 3.13 or higher
14+
# flashinfer-python is broken on python 3.9 at the moment, so skip it for now
15+
flashinfer-python; python_version >'3.9' and python_version <'3.13'
16+
--extra-index-url https://download.pytorch.org/whl/nightly/cu129
17+
torchvision>=0.23.0.dev,<0.24.0
18+
timm>=1.0.3

toolchains/ci_workspaces/MODULE.bazel.tmpl

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -65,7 +65,7 @@ http_archive = use_repo_rule("@bazel_tools//tools/build_defs/repo:http.bzl", "ht
6565
# name = "libtorch",
6666
# build_file = "@//third_party/libtorch:BUILD",
6767
# strip_prefix = "libtorch",
68-
# urls = ["https://download.pytorch.org/libtorch/${CHANNEL}/${CU_VERSION}/libtorch-cxx11-abi-shared-with-deps-latest.zip"],
68+
# urls = ["https://download.pytorch.org/libtorch/${CHANNEL}/${CU_VERSION}/libtorch-shared-with-deps-latest.zip"],
6969
#)
7070

7171
# Download these tarballs manually from the NVIDIA website

0 commit comments

Comments
 (0)