Skip to content

Commit 863b25f

Browse files
misccoalliepiper
andauthored
Update CI matrix to use NVKS nodes. (#3572) (#3605)
* Update CI matrix to use NVKS nodes. * Update windows CI scripts to accept -arch. * Move all non-Catch2 device algo tests to lid0/lid1. This makes sure that they run in the correct CI config on appropriate hardware. * Switch to all rtx queues: CUB -> RTXA6000 (48GiB) Thrust -> RTX4090 (24GiB) Others -> RTX2080 (8GiB) Co-authored-by: Allison Piper <[email protected]>
1 parent 2b5ed0b commit 863b25f

8 files changed

+78
-49
lines changed

ci/matrix.yaml

+26-37
Original file line numberDiff line numberDiff line change
@@ -19,26 +19,28 @@ workflows:
1919
- {jobs: ['build'], std: 'max', cxx: ['intel', 'msvc2019']}
2020
- {jobs: ['build'], std: [17, 20], cxx: ['gcc', 'clang', 'msvc']}
2121
# Current CTK testing:
22-
- {jobs: ['test'], project: ['libcudacxx', 'thrust'], std: 'max', cxx: ['gcc', 'clang']}
22+
- {jobs: ['test'], project: ['thrust'], std: 'max', cxx: ['gcc', 'clang'], gpu: 'rtx4090'}
23+
- {jobs: ['test'], project: ['libcudacxx'], std: 'max', cxx: ['gcc', 'clang'], gpu: 'rtx2080'}
2324
# Disabled until we figure out the issue with the TBB dll
24-
#- {jobs: ['test'], project: ['libcudacxx', 'thrust'], std: 'max', cxx: ['msvc']}
25+
#- {jobs: ['test'], project: ['thrust'], std: 'max', cxx: ['msvc'], gpu: 'rtx4090'}
26+
- {jobs: ['test'], project: ['libcudacxx'], std: 'max', cxx: ['msvc'], gpu: 'rtx2080'}
2527
# Split up cub tests:
26-
- {jobs: ['test_nolid', 'test_lid0'], project: ['cub'], std: 'max', cxx: ['gcc']}
27-
- {jobs: ['test_lid1', 'test_lid2'], project: ['cub'], std: 'max', cxx: ['gcc']}
28-
- {jobs: ['test_nolid', 'test_lid0'], project: ['cub'], std: 'max', cxx: ['clang', 'msvc']}
29-
- {jobs: ['test_lid0'], project: ['cub'], std: 'max', cxx: 'gcc12', gpu: 'h100', sm: 'gpu' }
28+
- {jobs: ['test_nolid', 'test_lid0'], project: ['cub'], std: 'max', cxx: ['gcc'], gpu: 'rtxa6000'}
29+
- {jobs: ['test_lid1', 'test_lid2'], project: ['cub'], std: 'max', cxx: ['gcc'], gpu: 'rtxa6000'}
30+
- {jobs: ['test_nolid', 'test_lid0'], project: ['cub'], std: 'max', cxx: ['clang', 'msvc'], gpu: 'rtxa6000'}
31+
- {jobs: ['test_lid0'], project: ['cub'], std: 'max', cxx: 'gcc12', gpu: 'h100', sm: 'gpu' }
3032
# Modded builds:
3133
- {jobs: ['build'], std: [17, 20], ctk: '12.5', cxx: 'nvhpc'}
3234
- {jobs: ['build'], std: 'max', cxx: ['gcc', 'clang'], cpu: 'arm64'}
3335
- {jobs: ['build'], std: 'max', cxx: ['gcc'], sm: '90a'}
3436
# Test Thrust 32-bit-only dispatch here, since it's most likely to break. 64-bit-only is tested in nightly.
35-
- {jobs: ['test_gpu'], project: 'thrust', cmake_options: '-DTHRUST_DISPATCH_TYPE=Force32bit'}
37+
- {jobs: ['test_gpu'], project: 'thrust', cmake_options: '-DTHRUST_DISPATCH_TYPE=Force32bit', gpu: 'rtx4090'}
3638
# default_projects: clang-cuda
3739
- {jobs: ['build'], std: [17, 20], cudacxx: 'clang', cxx: 'clang'}
3840
- {jobs: ['build'], project: 'libcudacxx', std: 'max', cudacxx: 'clang', cxx: 'clang', sm: '90'}
3941
- {jobs: ['build'], project: 'libcudacxx', std: 'max', cudacxx: 'clang', cxx: 'clang', sm: '90a'}
4042
# nvrtc:
41-
- {jobs: ['nvrtc'], project: 'libcudacxx', std: 'all'}
43+
- {jobs: ['nvrtc'], project: 'libcudacxx', std: 'all', gpu: 'rtx2080', sm: 'gpu'}
4244
# verify-codegen:
4345
- {jobs: ['verify_codegen'], project: 'libcudacxx'}
4446
# cudax has different CTK reqs:
@@ -52,19 +54,19 @@ workflows:
5254
- {jobs: ['build'], project: 'cudax', ctk: ['curr'], std: 17, cxx: ['gcc'], sm: "90"}
5355
- {jobs: ['build'], project: 'cudax', ctk: ['curr'], std: 20, cxx: ['gcc'], sm: "90a"}
5456
- {jobs: ['build'], project: 'cudax', ctk: ['curr'], std: [17, 20], cxx: ['gcc', 'clang'], cpu: 'arm64'}
55-
- {jobs: ['test'], project: 'cudax', ctk: ['curr'], std: 20, cxx: ['gcc12', 'clang', 'msvc']}
57+
- {jobs: ['test'], project: 'cudax', ctk: ['curr'], std: 20, cxx: ['gcc12', 'clang', 'msvc'], gpu: 'rtx2080'}
5658
# Python and c/parallel jobs:
57-
- {jobs: ['test'], project: ['cccl_c_parallel', 'python'], ctk: '12.6'}
59+
- {jobs: ['test'], project: ['cccl_c_parallel', 'python'], ctk: '12.6', gpu: 'rtx2080'}
5860
# cccl-infra:
59-
- {jobs: ['infra'], project: 'cccl', ctk: '11.1', cxx: ['gcc6', 'clang9']}
60-
- {jobs: ['infra'], project: 'cccl', ctk: '12.0', cxx: ['gcc12', 'clang14']}
61-
- {jobs: ['infra'], project: 'cccl', ctk: 'curr', cxx: ['gcc', 'clang']}
61+
- {jobs: ['infra'], project: 'cccl', ctk: '11.1', cxx: ['gcc6', 'clang9'], gpu: 'rtx2080'}
62+
- {jobs: ['infra'], project: 'cccl', ctk: '12.0', cxx: ['gcc12', 'clang14'], gpu: 'rtx2080'}
63+
- {jobs: ['infra'], project: 'cccl', ctk: 'curr', cxx: ['gcc', 'clang'], gpu: 'rtx2080'}
6264

6365
nightly:
6466
# Edge-case jobs
65-
- {jobs: ['limited'], project: 'cub', std: 17}
66-
- {jobs: ['test_gpu'], project: 'thrust', cmake_options: '-DTHRUST_DISPATCH_TYPE=Force32bit'}
67-
- {jobs: ['test_gpu'], project: 'thrust', cmake_options: '-DTHRUST_DISPATCH_TYPE=Force64bit'}
67+
- {jobs: ['limited'], project: 'cub', std: 17, gpu: 'rtx2080'}
68+
- {jobs: ['test_gpu'], project: 'thrust', cmake_options: '-DTHRUST_DISPATCH_TYPE=Force32bit', gpu: 'rtx4090'}
69+
- {jobs: ['test_gpu'], project: 'thrust', cmake_options: '-DTHRUST_DISPATCH_TYPE=Force64bit', gpu: 'rtx4090'}
6870
# Old CTK
6971
- {jobs: ['build'], std: 'all', ctk: '11.1', cxx: ['gcc6', 'gcc7', 'gcc8', 'gcc9', 'clang9', 'msvc2017']}
7072
- {jobs: ['build'], std: 'all', ctk: '11.8', cxx: ['gcc11'], sm: '60;70;80;90'}
@@ -73,7 +75,11 @@ workflows:
7375
- {jobs: ['build'], std: 'all', cxx: ['clang9', 'clang10', 'clang11', 'clang12', 'clang13', 'clang14', 'clang15', 'clang16', 'clang17']}
7476
- {jobs: ['build'], std: 'all', cxx: ['intel', 'msvc2019']}
7577
# Test current CTK
76-
- {jobs: ['test'], std: 'all', cxx: ['gcc13', 'clang18', 'msvc2022']}
78+
- {jobs: ['test'], project: 'cub', std: 'all', cxx: ['gcc', 'clang', 'msvc'], gpu: 'rtxa6000'}
79+
- {jobs: ['test_lid0'], project: 'cub', std: 'max', cxx: 'gcc', gpu: 'v100'}
80+
- {jobs: ['test_lid0'], project: 'cub', std: 'max', cxx: 'gcc', gpu: 'h100', sm: 'gpu' }
81+
- {jobs: ['test'], project: 'thrust', std: 'all', cxx: ['gcc', 'clang', 'msvc'], gpu: 'rtx4090'}
82+
- {jobs: ['test'], project: 'libcudacxx', std: 'all', cxx: ['gcc', 'clang', 'msvc'], gpu: 'rtx2080'}
7783
# Modded builds:
7884
- {jobs: ['build'], std: 'all', ctk: '12.5', cxx: 'nvhpc'}
7985
- {jobs: ['build'], std: 'all', cxx: ['gcc', 'clang'], cpu: 'arm64'}
@@ -92,26 +98,9 @@ workflows:
9298
- {jobs: ['build'], project: 'cudax', ctk: ['12.0' ], std: 'all', cxx: ['gcc12'], sm: "90"}
9399
- {jobs: ['build'], project: 'cudax', ctk: [ 'curr'], std: 'all', cxx: ['gcc13'], sm: "90a"}
94100
- {jobs: ['build'], project: 'cudax', ctk: [ 'curr'], std: 'all', cxx: ['gcc13', 'clang16'], cpu: 'arm64'}
95-
- {jobs: ['test'], project: 'cudax', ctk: ['12.0', 'curr'], std: 'all', cxx: ['gcc12']}
96-
- {jobs: ['test'], project: 'cudax', ctk: ['12.0' ], std: 'all', cxx: ['clang14']}
97-
- {jobs: ['test'], project: 'cudax', ctk: [ 'curr'], std: 'all', cxx: ['clang18']}
98-
99-
# # These are waiting on the NVKS nodes:
100-
# - {jobs: ['test'], ctk: '11.1', gpu: 'v100', sm: 'gpu', cxx: 'gcc6', std: [11]}
101-
# - {jobs: ['test'], ctk: '11.1', gpu: 't4', sm: 'gpu', cxx: 'clang9', std: [17]}
102-
# - {jobs: ['test'], ctk: '11.8', gpu: 'rtx2080', sm: 'gpu', cxx: 'gcc11', std: [17]}
103-
# - {jobs: ['test'], ctk: 'curr', gpu: 'rtxa6000', sm: 'gpu', cxx: 'gcc7', std: [14]}
104-
# - {jobs: ['test'], ctk: 'curr', gpu: 'l4', sm: 'gpu', cxx: 'gcc13', std: 'all'}
105-
# - {jobs: ['test'], ctk: 'curr', gpu: 'rtx4090', sm: 'gpu', cxx: 'clang9', std: [11]}
106-
# # H100 runners are currently flakey, only build since those use CPU-only runners:
107-
# - {jobs: ['build'], ctk: 'curr', gpu: 'h100', sm: 'gpu', cxx: 'gcc12', std: [11, 20]}
108-
# - {jobs: ['build'], ctk: 'curr', gpu: 'h100', sm: 'gpu', cxx: 'clang18', std: [17]}
109-
#
110-
# # nvrtc:
111-
# - {jobs: ['nvrtc'], ctk: 'curr', gpu: 't4', sm: 'gpu', cxx: 'gcc13', std: [20], project: ['libcudacxx']}
112-
# - {jobs: ['nvrtc'], ctk: 'curr', gpu: 'rtxa6000', sm: 'gpu', cxx: 'gcc13', std: [20], project: ['libcudacxx']}
113-
# - {jobs: ['nvrtc'], ctk: 'curr', gpu: 'l4', sm: 'gpu', cxx: 'gcc13', std: 'all', project: ['libcudacxx']}
114-
# - {jobs: ['nvrtc'], ctk: 'curr', gpu: 'h100', sm: 'gpu', cxx: 'gcc13', std: [11, 20], project: ['libcudacxx']}
101+
- {jobs: ['test'], project: 'cudax', ctk: ['12.0', 'curr'], std: 'all', cxx: ['gcc12'] , gpu: 'rtx2080'}
102+
- {jobs: ['test'], project: 'cudax', ctk: ['12.0' ], std: 'all', cxx: ['clang14'], gpu: 'rtx2080'}
103+
- {jobs: ['test'], project: 'cudax', ctk: [ 'curr'], std: 'all', cxx: ['clang18'], gpu: 'rtx2080'}
115104

116105
# Any generated jobs that match the entries in `exclude` will be removed from the final matrix for all workflows.
117106
exclude:

ci/windows/build_common.psm1

+13-2
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,11 @@ Param(
33
[Alias("std")]
44
[ValidateNotNullOrEmpty()]
55
[ValidateSet(11, 14, 17, 20)]
6-
[int]$CXX_STANDARD = 17
6+
[int]$CXX_STANDARD = 17,
7+
[Parameter(Mandatory = $false)]
8+
[ValidateNotNullOrEmpty()]
9+
[Alias("arch")]
10+
[int]$CUDA_ARCH = 0
711
)
812

913
$ErrorActionPreference = "Stop"
@@ -20,6 +24,12 @@ if ($script:CL_VERSION_STRING -match "Version (\d+\.\d+)\.\d+") {
2024
Write-Host "Detected cl.exe version: $CL_VERSION"
2125
}
2226

27+
$script:GLOBAL_CMAKE_OPTIONS = ""
28+
if ($CUDA_ARCH -ne 0) {
29+
$script:GLOBAL_CMAKE_OPTIONS += "-DCMAKE_CUDA_ARCHITECTURES=$CUDA_ARCH"
30+
}
31+
32+
2333
if (-not $env:CCCL_BUILD_INFIX) {
2434
$env:CCCL_BUILD_INFIX = ""
2535
}
@@ -56,6 +66,7 @@ Write-Host "NVCC_VERSION=$NVCC_VERSION"
5666
Write-Host "CMAKE_BUILD_PARALLEL_LEVEL=$env:CMAKE_BUILD_PARALLEL_LEVEL"
5767
Write-Host "CTEST_PARALLEL_LEVEL=$env:CTEST_PARALLEL_LEVEL"
5868
Write-Host "CCCL_BUILD_INFIX=$env:CCCL_BUILD_INFIX"
69+
Write-Host "GLOBAL_CMAKE_OPTIONS=$script:GLOBAL_CMAKE_OPTIONS"
5970
Write-Host "Current commit is:"
6071
Write-Host "$(git log -1 --format=short)"
6172
Write-Host "========================================"
@@ -82,7 +93,7 @@ function configure_preset {
8293
pushd ".."
8394

8495
# Echo and execute command to stdout:
85-
$configure_command = "cmake --preset $PRESET $CMAKE_OPTIONS --log-level VERBOSE"
96+
$configure_command = "cmake --preset $PRESET $script:GLOBAL_CMAKE_OPTIONS $CMAKE_OPTIONS --log-level VERBOSE"
8697
Write-Host $configure_command
8798
Invoke-Expression $configure_command
8899
$test_result = $LastExitCode

ci/windows/build_cub.ps1

+6-2
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,11 @@ Param(
33
[Alias("std")]
44
[ValidateNotNullOrEmpty()]
55
[ValidateSet(11, 14, 17, 20)]
6-
[int]$CXX_STANDARD = 17
6+
[int]$CXX_STANDARD = 17,
7+
[Parameter(Mandatory = $false)]
8+
[ValidateNotNullOrEmpty()]
9+
[Alias("arch")]
10+
[int]$CUDA_ARCH = 0
711
)
812

913
$ErrorActionPreference = "Stop"
@@ -14,7 +18,7 @@ If($CURRENT_PATH -ne "ci") {
1418
pushd "$PSScriptRoot/.."
1519
}
1620

17-
Import-Module $PSScriptRoot/build_common.psm1 -ArgumentList $CXX_STANDARD
21+
Import-Module $PSScriptRoot/build_common.psm1 -ArgumentList $CXX_STANDARD, $CUDA_ARCH
1822

1923
$PRESET = "cub-cpp$CXX_STANDARD"
2024
$CMAKE_OPTIONS = ""

ci/windows/build_cudax.ps1

+6-2
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,11 @@ Param(
44
[Alias("std")]
55
[ValidateNotNullOrEmpty()]
66
[ValidateSet(20)]
7-
[int]$CXX_STANDARD = 20
7+
[int]$CXX_STANDARD = 20,
8+
[Parameter(Mandatory = $false)]
9+
[ValidateNotNullOrEmpty()]
10+
[Alias("arch")]
11+
[int]$CUDA_ARCH = 0
812
)
913

1014
$CURRENT_PATH = Split-Path $pwd -leaf
@@ -14,7 +18,7 @@ If($CURRENT_PATH -ne "ci") {
1418
}
1519

1620
Remove-Module -Name build_common
17-
Import-Module $PSScriptRoot/build_common.psm1 -ArgumentList $CXX_STANDARD
21+
Import-Module $PSScriptRoot/build_common.psm1 -ArgumentList $CXX_STANDARD, $CUDA_ARCH
1822

1923
$PRESET = "cudax-cpp$CXX_STANDARD"
2024
$CMAKE_OPTIONS = ""

ci/windows/build_libcudacxx.ps1

+6-2
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,11 @@ Param(
33
[Alias("std")]
44
[ValidateNotNullOrEmpty()]
55
[ValidateSet(11, 14, 17, 20)]
6-
[int]$CXX_STANDARD = 17
6+
[int]$CXX_STANDARD = 17,
7+
[Parameter(Mandatory = $false)]
8+
[ValidateNotNullOrEmpty()]
9+
[Alias("arch")]
10+
[int]$CUDA_ARCH = 0
711
)
812

913
$ErrorActionPreference = "Stop"
@@ -14,7 +18,7 @@ If($CURRENT_PATH -ne "ci") {
1418
pushd "$PSScriptRoot/.."
1519
}
1620

17-
Import-Module $PSScriptRoot/build_common.psm1 -ArgumentList $CXX_STANDARD, $GPU_ARCHS
21+
Import-Module $PSScriptRoot/build_common.psm1 -ArgumentList $CXX_STANDARD, $CUDA_ARCH
1822

1923
$PRESET = "libcudacxx-cpp${CXX_STANDARD}"
2024
$CMAKE_OPTIONS = ""

ci/windows/build_thrust.ps1

+6-2
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,11 @@ Param(
33
[Alias("std")]
44
[ValidateNotNullOrEmpty()]
55
[ValidateSet(11, 14, 17, 20)]
6-
[int]$CXX_STANDARD = 17
6+
[int]$CXX_STANDARD = 17,
7+
[Parameter(Mandatory = $false)]
8+
[ValidateNotNullOrEmpty()]
9+
[Alias("arch")]
10+
[int]$CUDA_ARCH = 0
711
)
812

913
$ErrorActionPreference = "Stop"
@@ -14,7 +18,7 @@ If($CURRENT_PATH -ne "ci") {
1418
pushd "$PSScriptRoot/.."
1519
}
1620

17-
Import-Module $PSScriptRoot/build_common.psm1 -ArgumentList $CXX_STANDARD
21+
Import-Module $PSScriptRoot/build_common.psm1 -ArgumentList $CXX_STANDARD, $CUDA_ARCH
1822

1923
$PRESET = "thrust-cpp$CXX_STANDARD"
2024
$CMAKE_OPTIONS = ""

ci/windows/test_thrust.ps1

+6-2
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,10 @@ Param(
55
[ValidateSet(11, 14, 17, 20)]
66
[int]$CXX_STANDARD = 17,
77
[Parameter(Mandatory = $false)]
8+
[ValidateNotNullOrEmpty()]
9+
[Alias("arch")]
10+
[int]$CUDA_ARCH = 0,
11+
[Parameter(Mandatory = $false)]
812
[Alias("cpu-only")]
913
[switch]$CPU_ONLY = $false
1014
)
@@ -24,11 +28,11 @@ If($CURRENT_PATH -ne "ci") {
2428
}
2529

2630
# Execute the build script:
27-
$build_command = "$PSScriptRoot/build_thrust.ps1 -std $CXX_STANDARD"
31+
$build_command = "$PSScriptRoot/build_thrust.ps1 -std $CXX_STANDARD -arch $CUDA_ARCH"
2832
Write-Host "Executing: $build_command"
2933
Invoke-Expression $build_command
3034

31-
Import-Module $PSScriptRoot/build_common.psm1 -ArgumentList $CXX_STANDARD
35+
Import-Module -Name "$PSScriptRoot/build_common.psm1" -ArgumentList $CXX_STANDARD, $CUDA_ARCH
3236

3337
$PRESET = "thrust-cpu-cpp$CXX_STANDARD"
3438

cub/test/CMakeLists.txt

+9
Original file line numberDiff line numberDiff line change
@@ -370,6 +370,15 @@ foreach (test_src IN LISTS test_srcs)
370370
set(launcher 0)
371371
endif()
372372

373+
# FIXME: There are a few remaining device algorithm tests that have not been ported to
374+
# use Catch2 and lid variants. Mark these as `lid_0/1` so they'll run in the appropriate
375+
# CI configs:
376+
string(REGEX MATCH "^device_" is_device_test "${test_name}")
377+
_cub_is_fail_test(is_fail_test "%{test_name}")
378+
if (is_device_test AND NOT is_fail_test)
379+
string(APPEND test_name ".lid_${launcher}")
380+
endif()
381+
373382
# Only one version of this test.
374383
cub_add_test(test_target ${test_name} "${test_src}" ${cub_target} ${launcher})
375384
cub_configure_cuda_target(${test_target} RDC ${CUB_FORCE_RDC})

0 commit comments

Comments
 (0)