From b8e1549eead0a8d33bd4259cf49be2d256e3d105 Mon Sep 17 00:00:00 2001 From: Allison Piper Date: Tue, 28 Jan 2025 14:52:28 -0500 Subject: [PATCH 1/9] Update CI matrix to use NVKS nodes. General allocation strategy is: - Primary CUB testing continues to use v100 (32GiB). This is because CUB tests often require very large amounts of gmem. - Other CUB builds use t4 (16GiB). These should have enough memory to run most tests. - Thrust testing uses t4 (16GiB). Some tests may require >8GiB, but not as much as CUB requires. - libcudacxx/cudax/python testing uses rtx2080 (8GiB), as these are not as memory intensive as Thrust/CUB. None of the NVKS queue require the testing tag anymore, so this has been removed as well. --- ci/matrix.yaml | 85 +++++++++++++++++++++----------------------------- 1 file changed, 36 insertions(+), 49 deletions(-) diff --git a/ci/matrix.yaml b/ci/matrix.yaml index 6a98e8fc5b0..c9632abf87c 100644 --- a/ci/matrix.yaml +++ b/ci/matrix.yaml @@ -19,49 +19,50 @@ workflows: - {jobs: ['build'], std: 'max', cxx: ['msvc2019']} - {jobs: ['build'], std: 'all', cxx: ['gcc', 'clang', 'msvc']} # Current CTK testing: - - {jobs: ['test'], project: ['libcudacxx', 'thrust'], std: 'max', cxx: ['gcc', 'clang']} + - {jobs: ['test'], project: ['thrust'], std: 'max', cxx: ['gcc', 'clang'], gpu: 't4', sm: 'gpu'} + - {jobs: ['test'], project: ['libcudacxx'], std: 'max', cxx: ['gcc', 'clang'], gpu: 'rtx2080', sm: 'gpu'} # Disabled until we figure out the issue with the TBB dll - #- {jobs: ['test'], project: ['libcudacxx', 'thrust'], std: 'max', cxx: ['msvc']} + #- {jobs: ['test'], project: ['libcudacxx', 'thrust'], std: 'max', cxx: ['msvc'], gpu: 't4', sm: 'gpu'} # Split up cub tests: - - {jobs: ['test_nolid', 'test_lid0'], project: ['cub'], std: 'max', cxx: ['gcc']} - - {jobs: ['test_lid1', 'test_lid2'], project: ['cub'], std: 'max', cxx: ['gcc']} - - {jobs: ['test_nolid', 'test_lid0'], project: ['cub'], std: 'max', cxx: ['clang', 'msvc']} - - {jobs: ['test_lid0'], project: ['cub'], std: 'max', cxx: 'gcc12', gpu: 'h100', sm: 'gpu' } + - {jobs: ['test_lid0'], project: ['cub'], std: 'max', cxx: ['gcc'], gpu: 'v100', sm: 'gpu'} + - {jobs: ['test_nolid', 'test_lid1', 'test_lid2'], project: ['cub'], std: 'max', cxx: ['gcc'], gpu: 't4', sm: 'gpu'} + - {jobs: ['test_nolid', 'test_lid0'], project: ['cub'], std: 'max', cxx: ['clang', 'msvc'], gpu: 't4', sm: 'gpu'} + - {jobs: ['test_lid0'], project: ['cub'], std: 'max', cxx: 'gcc12', gpu: 'h100', sm: 'gpu' } # Modded builds: - {jobs: ['build'], std: 'all', ctk: '12.5', cxx: 'nvhpc'} - {jobs: ['build'], std: 'max', cxx: ['gcc', 'clang'], cpu: 'arm64'} - {jobs: ['build'], std: 'max', cxx: ['gcc'], sm: '90a'} # Test Thrust 32-bit-only dispatch here, since it's most likely to break. 64-bit-only is tested in nightly. - - {jobs: ['test_gpu'], project: 'thrust', cmake_options: '-DTHRUST_DISPATCH_TYPE=Force32bit'} + - {jobs: ['test_gpu'], project: 'thrust', cmake_options: '-DTHRUST_DISPATCH_TYPE=Force32bit', gpu: 't4', sm: 'gpu'} # default_projects: clang-cuda - {jobs: ['build'], std: 'all', cudacxx: 'clang', cxx: 'clang'} - {jobs: ['build'], project: 'libcudacxx', std: 'max', cudacxx: 'clang', cxx: 'clang', sm: '90'} - {jobs: ['build'], project: 'libcudacxx', std: 'max', cudacxx: 'clang', cxx: 'clang', sm: '90a'} # nvrtc: - - {jobs: ['nvrtc'], project: 'libcudacxx', std: 'all'} + - {jobs: ['nvrtc'], project: 'libcudacxx', std: 'all', gpu: 'rtx2080', sm: 'gpu'} # verify-codegen: - {jobs: ['verify_codegen'], project: 'libcudacxx'} # cudax has different CTK reqs: - - {jobs: ['build'], project: 'cudax', ctk: ['12.0'], std: 20, cxx: ['msvc14.36']} - - {jobs: ['build'], project: 'cudax', ctk: ['curr'], std: 20, cxx: ['gcc10', 'gcc11', 'gcc12']} - - {jobs: ['build'], project: 'cudax', ctk: ['curr'], std: 20, cxx: ['clang14', 'clang15', 'clang16', 'clang17']} + - {jobs: ['build'], project: 'cudax', ctk: ['12.0'], std: 20, cxx: ['msvc14.36']} + - {jobs: ['build'], project: 'cudax', ctk: ['curr'], std: 20, cxx: ['gcc10', 'gcc11', 'gcc12']} + - {jobs: ['build'], project: 'cudax', ctk: ['curr'], std: 20, cxx: ['clang14', 'clang15', 'clang16', 'clang17']} - {jobs: ['build'], project: 'cudax', ctk: ['12.5'], std: 'all', cxx: ['nvhpc']} - - {jobs: ['build'], project: 'cudax', ctk: ['curr'], std: 20, cxx: ['msvc2022']} - - {jobs: ['build'], project: 'cudax', ctk: ['curr'], std: 17, cxx: ['gcc'], sm: "90"} - - {jobs: ['build'], project: 'cudax', ctk: ['curr'], std: 20, cxx: ['gcc'], sm: "90a"} + - {jobs: ['build'], project: 'cudax', ctk: ['curr'], std: 20, cxx: ['msvc2022']} + - {jobs: ['build'], project: 'cudax', ctk: ['curr'], std: 17, cxx: ['gcc'], sm: "90"} + - {jobs: ['build'], project: 'cudax', ctk: ['curr'], std: 20, cxx: ['gcc'], sm: "90a"} - {jobs: ['build'], project: 'cudax', ctk: ['curr'], std: 'all', cxx: ['gcc', 'clang'], cpu: 'arm64'} - - {jobs: ['test'], project: 'cudax', ctk: ['curr'], std: 20, cxx: ['gcc12', 'clang', 'msvc']} + - {jobs: ['test'], project: 'cudax', ctk: ['curr'], std: 20, cxx: ['gcc12', 'clang', 'msvc'], gpu: 'rtx2080', sm: 'gpu'} # Python and c/parallel jobs: - - {jobs: ['test'], project: ['cccl_c_parallel', 'python'], ctk: '12.6'} + - {jobs: ['test'], project: ['cccl_c_parallel', 'python'], ctk: '12.6', gpu: 'rtx2080', sm: 'gpu'} # cccl-infra: - - {jobs: ['infra'], project: 'cccl', ctk: '12.0', cxx: ['gcc12', 'clang14']} - - {jobs: ['infra'], project: 'cccl', ctk: 'curr', cxx: ['gcc', 'clang']} + - {jobs: ['infra'], project: 'cccl', ctk: '12.0', cxx: ['gcc12', 'clang14'], gpu: 'rtx2080', sm: 'gpu'} + - {jobs: ['infra'], project: 'cccl', ctk: 'curr', cxx: ['gcc', 'clang'], gpu: 'rtx2080', sm: 'gpu'} nightly: # Edge-case jobs - - {jobs: ['limited'], project: 'cub', std: 17} - - {jobs: ['test_gpu'], project: 'thrust', cmake_options: '-DTHRUST_DISPATCH_TYPE=Force32bit'} - - {jobs: ['test_gpu'], project: 'thrust', cmake_options: '-DTHRUST_DISPATCH_TYPE=Force64bit'} + - {jobs: ['limited'], project: 'cub', std: 17, gpu: 'rtx2080', sm: 'gpu'} + - {jobs: ['test_gpu'], project: 'thrust', cmake_options: '-DTHRUST_DISPATCH_TYPE=Force32bit', gpu: 't4', sm: 'gpu'} + - {jobs: ['test_gpu'], project: 'thrust', cmake_options: '-DTHRUST_DISPATCH_TYPE=Force64bit', gpu: 't4', sm: 'gpu'} # Old CTK/compiler - {jobs: ['build'], std: 'all', ctk: '12.0', cxx: ['gcc7', 'gcc8', 'gcc9', 'clang14', 'msvc2019']} - {jobs: ['build'], std: 'all', ctk: '12.0', cxx: ['gcc11'], sm: '60;70;80;90'} @@ -70,7 +71,10 @@ workflows: - {jobs: ['build'], std: 'all', cxx: ['clang14', 'clang15', 'clang16', 'clang17']} - {jobs: ['build'], std: 'all', cxx: ['msvc2019']} # Test current CTK - - {jobs: ['test'], std: 'all', cxx: ['gcc13', 'clang18', 'msvc2022']} + - {jobs: ['test_lid0'], project: 'cub', std: 'all', cxx: ['gcc13', 'clang18', 'msvc2022'], gpu: 'v100', sm: 'gpu'} + - {jobs: ['test_nolid', 'test_lid1', 'test_lid2'], project: 'cub', std: 'all', cxx: ['gcc13', 'clang18', 'msvc2022'], gpu: 't4', sm: 'gpu'} + - {jobs: ['test'], project: 'thrust', std: 'all', cxx: ['gcc13', 'clang18', 'msvc2022'], gpu: 't4', sm: 'gpu'} + - {jobs: ['test'], project: 'libcudacxx', std: 'all', cxx: ['gcc13', 'clang18', 'msvc2022'], gpu: 'rtx2080', sm: 'gpu'} # Modded builds: - {jobs: ['build'], std: 'all', ctk: '12.5', cxx: 'nvhpc'} - {jobs: ['build'], std: 'all', cxx: ['gcc', 'clang'], cpu: 'arm64'} @@ -88,26 +92,9 @@ workflows: - {jobs: ['build'], project: 'cudax', ctk: ['12.0' ], std: 'all', cxx: ['gcc12'], sm: "90"} - {jobs: ['build'], project: 'cudax', ctk: [ 'curr'], std: 'all', cxx: ['gcc13'], sm: "90a"} - {jobs: ['build'], project: 'cudax', ctk: [ 'curr'], std: 'all', cxx: ['gcc13', 'clang16'], cpu: 'arm64'} - - {jobs: ['test'], project: 'cudax', ctk: ['12.0', 'curr'], std: 'all', cxx: ['gcc12']} - - {jobs: ['test'], project: 'cudax', ctk: ['12.0' ], std: 'all', cxx: ['clang14']} - - {jobs: ['test'], project: 'cudax', ctk: [ 'curr'], std: 'all', cxx: ['clang18']} - -# # These are waiting on the NVKS nodes: -# - {jobs: ['test'], ctk: '11.1', gpu: 'v100', sm: 'gpu', cxx: 'gcc7', std: [11]} -# - {jobs: ['test'], ctk: '11.1', gpu: 't4', sm: 'gpu', cxx: 'clang14', std: [17]} -# - {jobs: ['test'], ctk: '11.8', gpu: 'rtx2080', sm: 'gpu', cxx: 'gcc11', std: [17]} -# - {jobs: ['test'], ctk: 'curr', gpu: 'rtxa6000', sm: 'gpu', cxx: 'gcc7', std: [14]} -# - {jobs: ['test'], ctk: 'curr', gpu: 'l4', sm: 'gpu', cxx: 'gcc13', std: 'all'} -# - {jobs: ['test'], ctk: 'curr', gpu: 'rtx4090', sm: 'gpu', cxx: 'clang14', std: [11]} -# # H100 runners are currently flakey, only build since those use CPU-only runners: -# - {jobs: ['build'], ctk: 'curr', gpu: 'h100', sm: 'gpu', cxx: 'gcc12', std: [11, 20]} -# - {jobs: ['build'], ctk: 'curr', gpu: 'h100', sm: 'gpu', cxx: 'clang18', std: [17]} -# -# # nvrtc: -# - {jobs: ['nvrtc'], ctk: 'curr', gpu: 't4', sm: 'gpu', cxx: 'gcc13', std: [20], project: ['libcudacxx']} -# - {jobs: ['nvrtc'], ctk: 'curr', gpu: 'rtxa6000', sm: 'gpu', cxx: 'gcc13', std: [20], project: ['libcudacxx']} -# - {jobs: ['nvrtc'], ctk: 'curr', gpu: 'l4', sm: 'gpu', cxx: 'gcc13', std: 'all', project: ['libcudacxx']} -# - {jobs: ['nvrtc'], ctk: 'curr', gpu: 'h100', sm: 'gpu', cxx: 'gcc13', std: [11, 20], project: ['libcudacxx']} + - {jobs: ['test'], project: 'cudax', ctk: ['12.0', 'curr'], std: 'all', cxx: ['gcc12'] , gpu: 'rtx2080', sm: 'gpu'} + - {jobs: ['test'], project: 'cudax', ctk: ['12.0' ], std: 'all', cxx: ['clang14'], gpu: 'rtx2080', sm: 'gpu'} + - {jobs: ['test'], project: 'cudax', ctk: [ 'curr'], std: 'all', cxx: ['clang18'], gpu: 'rtx2080', sm: 'gpu'} # Any generated jobs that match the entries in `exclude` will be removed from the final matrix for all workflows. exclude: @@ -257,13 +244,13 @@ projects: # testing -> Runner with GPU is in a nv-gh-runners testing pool gpus: - v100: { sm: 70 } # 32 GB, 40 runners - t4: { sm: 75, testing: true } # 16 GB, 8 runners - rtx2080: { sm: 75, testing: true } # 8 GB, 8 runners - rtxa6000: { sm: 86, testing: true } # 48 GB, 12 runners - l4: { sm: 89, testing: true } # 24 GB, 48 runners - rtx4090: { sm: 89, testing: true } # 24 GB, 10 runners - h100: { sm: 90, testing: true } # 80 GB, 16 runners + v100: { sm: 70 } # 32 GB, 40 runners + t4: { sm: 75 } # 16 GB, 10 runners + rtx2080: { sm: 75 } # 8 GB, 12 runners + rtxa6000: { sm: 86 } # 48 GB, 12 runners + l4: { sm: 89 } # 24 GB, 48 runners + rtx4090: { sm: 89 } # 24 GB, 10 runners + h100: { sm: 90 } # 80 GB, 16 runners # Tags are used to define a `matrix job` in the workflow section. # From 50864b97c6315845c4eb4ae5039111da93b7b4ff Mon Sep 17 00:00:00 2001 From: Allison Piper Date: Tue, 28 Jan 2025 16:31:23 -0500 Subject: [PATCH 2/9] Update windows CI scripts to accept -arch. --- ci/windows/build_common.psm1 | 15 +++++++++++++-- ci/windows/build_cub.ps1 | 8 ++++++-- ci/windows/build_cudax.ps1 | 8 ++++++-- ci/windows/build_libcudacxx.ps1 | 8 ++++++-- ci/windows/build_thrust.ps1 | 8 ++++++-- ci/windows/test_thrust.ps1 | 8 ++++++-- 6 files changed, 43 insertions(+), 12 deletions(-) diff --git a/ci/windows/build_common.psm1 b/ci/windows/build_common.psm1 index 1eb5f1a9d63..151bb1f112e 100644 --- a/ci/windows/build_common.psm1 +++ b/ci/windows/build_common.psm1 @@ -3,7 +3,11 @@ Param( [Alias("std")] [ValidateNotNullOrEmpty()] [ValidateSet(11, 14, 17, 20)] - [int]$CXX_STANDARD = 17 + [int]$CXX_STANDARD = 17, + [Parameter(Mandatory = $false)] + [ValidateNotNullOrEmpty()] + [Alias("arch")] + [int]$CUDA_ARCH = 0 ) $ErrorActionPreference = "Stop" @@ -20,6 +24,12 @@ if ($script:CL_VERSION_STRING -match "Version (\d+\.\d+)\.\d+") { Write-Host "Detected cl.exe version: $CL_VERSION" } +$script:GLOBAL_CMAKE_OPTIONS = "" +if ($CUDA_ARCH -ne 0) { + $script:GLOBAL_CMAKE_OPTIONS += "-DCMAKE_CUDA_ARCHITECTURES=$CUDA_ARCH" +} + + if (-not $env:CCCL_BUILD_INFIX) { $env:CCCL_BUILD_INFIX = "" } @@ -56,6 +66,7 @@ Write-Host "NVCC_VERSION=$NVCC_VERSION" Write-Host "CMAKE_BUILD_PARALLEL_LEVEL=$env:CMAKE_BUILD_PARALLEL_LEVEL" Write-Host "CTEST_PARALLEL_LEVEL=$env:CTEST_PARALLEL_LEVEL" Write-Host "CCCL_BUILD_INFIX=$env:CCCL_BUILD_INFIX" +Write-Host "GLOBAL_CMAKE_OPTIONS=$script:GLOBAL_CMAKE_OPTIONS" Write-Host "Current commit is:" Write-Host "$(git log -1 --format=short)" Write-Host "========================================" @@ -82,7 +93,7 @@ function configure_preset { pushd ".." # Echo and execute command to stdout: - $configure_command = "cmake --preset $PRESET $CMAKE_OPTIONS --log-level VERBOSE" + $configure_command = "cmake --preset $PRESET $script:GLOBAL_CMAKE_OPTIONS $CMAKE_OPTIONS --log-level VERBOSE" Write-Host $configure_command Invoke-Expression $configure_command $test_result = $LastExitCode diff --git a/ci/windows/build_cub.ps1 b/ci/windows/build_cub.ps1 index 32e4f71ee9a..27c5360ded9 100644 --- a/ci/windows/build_cub.ps1 +++ b/ci/windows/build_cub.ps1 @@ -3,7 +3,11 @@ Param( [Alias("std")] [ValidateNotNullOrEmpty()] [ValidateSet(11, 14, 17, 20)] - [int]$CXX_STANDARD = 17 + [int]$CXX_STANDARD = 17, + [Parameter(Mandatory = $false)] + [ValidateNotNullOrEmpty()] + [Alias("arch")] + [int]$CUDA_ARCH = 0 ) $ErrorActionPreference = "Stop" @@ -14,7 +18,7 @@ If($CURRENT_PATH -ne "ci") { pushd "$PSScriptRoot/.." } -Import-Module $PSScriptRoot/build_common.psm1 -ArgumentList $CXX_STANDARD +Import-Module $PSScriptRoot/build_common.psm1 -ArgumentList $CXX_STANDARD, $CUDA_ARCH $PRESET = "cub-cpp$CXX_STANDARD" $CMAKE_OPTIONS = "" diff --git a/ci/windows/build_cudax.ps1 b/ci/windows/build_cudax.ps1 index ca7bd578291..7b8cd0ff771 100644 --- a/ci/windows/build_cudax.ps1 +++ b/ci/windows/build_cudax.ps1 @@ -4,7 +4,11 @@ Param( [Alias("std")] [ValidateNotNullOrEmpty()] [ValidateSet(20)] - [int]$CXX_STANDARD = 20 + [int]$CXX_STANDARD = 20, + [Parameter(Mandatory = $false)] + [ValidateNotNullOrEmpty()] + [Alias("arch")] + [int]$CUDA_ARCH = 0 ) $CURRENT_PATH = Split-Path $pwd -leaf @@ -14,7 +18,7 @@ If($CURRENT_PATH -ne "ci") { } Remove-Module -Name build_common -Import-Module $PSScriptRoot/build_common.psm1 -ArgumentList $CXX_STANDARD +Import-Module $PSScriptRoot/build_common.psm1 -ArgumentList $CXX_STANDARD, $CUDA_ARCH $PRESET = "cudax-cpp$CXX_STANDARD" $CMAKE_OPTIONS = "" diff --git a/ci/windows/build_libcudacxx.ps1 b/ci/windows/build_libcudacxx.ps1 index a57e2280de7..2f80619f76b 100644 --- a/ci/windows/build_libcudacxx.ps1 +++ b/ci/windows/build_libcudacxx.ps1 @@ -3,7 +3,11 @@ Param( [Alias("std")] [ValidateNotNullOrEmpty()] [ValidateSet(11, 14, 17, 20)] - [int]$CXX_STANDARD = 17 + [int]$CXX_STANDARD = 17, + [Parameter(Mandatory = $false)] + [ValidateNotNullOrEmpty()] + [Alias("arch")] + [int]$CUDA_ARCH = 0 ) $ErrorActionPreference = "Stop" @@ -14,7 +18,7 @@ If($CURRENT_PATH -ne "ci") { pushd "$PSScriptRoot/.." } -Import-Module $PSScriptRoot/build_common.psm1 -ArgumentList $CXX_STANDARD, $GPU_ARCHS +Import-Module $PSScriptRoot/build_common.psm1 -ArgumentList $CXX_STANDARD, $CUDA_ARCH $PRESET = "libcudacxx-cpp${CXX_STANDARD}" $CMAKE_OPTIONS = "" diff --git a/ci/windows/build_thrust.ps1 b/ci/windows/build_thrust.ps1 index 186ed94eace..bda86859fd4 100644 --- a/ci/windows/build_thrust.ps1 +++ b/ci/windows/build_thrust.ps1 @@ -3,7 +3,11 @@ Param( [Alias("std")] [ValidateNotNullOrEmpty()] [ValidateSet(11, 14, 17, 20)] - [int]$CXX_STANDARD = 17 + [int]$CXX_STANDARD = 17, + [Parameter(Mandatory = $false)] + [ValidateNotNullOrEmpty()] + [Alias("arch")] + [int]$CUDA_ARCH = 0 ) $ErrorActionPreference = "Stop" @@ -14,7 +18,7 @@ If($CURRENT_PATH -ne "ci") { pushd "$PSScriptRoot/.." } -Import-Module $PSScriptRoot/build_common.psm1 -ArgumentList $CXX_STANDARD +Import-Module $PSScriptRoot/build_common.psm1 -ArgumentList $CXX_STANDARD, $CUDA_ARCH $PRESET = "thrust-cpp$CXX_STANDARD" $CMAKE_OPTIONS = "" diff --git a/ci/windows/test_thrust.ps1 b/ci/windows/test_thrust.ps1 index 7c020714208..eabda06df5b 100644 --- a/ci/windows/test_thrust.ps1 +++ b/ci/windows/test_thrust.ps1 @@ -5,6 +5,10 @@ Param( [ValidateSet(11, 14, 17, 20)] [int]$CXX_STANDARD = 17, [Parameter(Mandatory = $false)] + [ValidateNotNullOrEmpty()] + [Alias("arch")] + [int]$CUDA_ARCH = 0, + [Parameter(Mandatory = $false)] [Alias("cpu-only")] [switch]$CPU_ONLY = $false ) @@ -24,11 +28,11 @@ If($CURRENT_PATH -ne "ci") { } # Execute the build script: -$build_command = "$PSScriptRoot/build_thrust.ps1 -std $CXX_STANDARD" +$build_command = "$PSScriptRoot/build_thrust.ps1 -std $CXX_STANDARD -arch $CUDA_ARCH" Write-Host "Executing: $build_command" Invoke-Expression $build_command -Import-Module $PSScriptRoot/build_common.psm1 -ArgumentList $CXX_STANDARD +Import-Module -Name "$PSScriptRoot/build_common.psm1" -ArgumentList $CXX_STANDARD, $CUDA_ARCH $PRESET = "thrust-cpu-cpp$CXX_STANDARD" From 571e72997fb003f168bd1c73cfa9e1e31b14892b Mon Sep 17 00:00:00 2001 From: Allison Piper Date: Tue, 28 Jan 2025 19:19:49 -0500 Subject: [PATCH 3/9] Move all non-Catch2 device algo tests to lid0/lid1. This makes sure that they run in the correct CI config on appropriate hardware. --- cub/test/CMakeLists.txt | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/cub/test/CMakeLists.txt b/cub/test/CMakeLists.txt index 5a093526edd..aaab1984e21 100644 --- a/cub/test/CMakeLists.txt +++ b/cub/test/CMakeLists.txt @@ -370,6 +370,15 @@ foreach (test_src IN LISTS test_srcs) set(launcher 0) endif() + # FIXME: There are a few remaining device algorithm tests that have not been ported to + # use Catch2 and lid variants. Mark these as `lid_0/1` so they'll run in the appropriate + # CI configs: + string(REGEX MATCH "^device_" is_device_test "${test_name}") + _cub_is_fail_test(is_fail_test "%{test_name}") + if (is_device_test AND NOT is_fail_test) + string(APPEND test_name ".lid_${launcher}") + endif() + # Only one version of this test. cub_add_test(test_target ${test_name} "${test_src}" ${cub_target} ${launcher}) cub_configure_cuda_target(${test_target} RDC ${CUB_FORCE_RDC}) From 6434ef536dcf2ffedacb1b595602eb34d4dbb493 Mon Sep 17 00:00:00 2001 From: Allison Piper Date: Tue, 28 Jan 2025 19:24:18 -0500 Subject: [PATCH 4/9] Move libcudacxx builds to t4 temporarily. heterogeneous/barrier_abi_v2.pass.cpp is timing out on rtx2080. --- ci/matrix.yaml | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/ci/matrix.yaml b/ci/matrix.yaml index c9632abf87c..1b8d206dcf7 100644 --- a/ci/matrix.yaml +++ b/ci/matrix.yaml @@ -20,7 +20,9 @@ workflows: - {jobs: ['build'], std: 'all', cxx: ['gcc', 'clang', 'msvc']} # Current CTK testing: - {jobs: ['test'], project: ['thrust'], std: 'max', cxx: ['gcc', 'clang'], gpu: 't4', sm: 'gpu'} - - {jobs: ['test'], project: ['libcudacxx'], std: 'max', cxx: ['gcc', 'clang'], gpu: 'rtx2080', sm: 'gpu'} + # Switching to t4 temporarily while investigating a bug in heterogeneous/barrier_abi_v2.pass.cpp + # - {jobs: ['test'], project: ['libcudacxx'], std: 'max', cxx: ['gcc', 'clang'], gpu: 'rtx2080', sm: 'gpu'} + - {jobs: ['test'], project: ['libcudacxx'], std: 'max', cxx: ['gcc', 'clang'], gpu: 't4', sm: 'gpu'} # Disabled until we figure out the issue with the TBB dll #- {jobs: ['test'], project: ['libcudacxx', 'thrust'], std: 'max', cxx: ['msvc'], gpu: 't4', sm: 'gpu'} # Split up cub tests: @@ -74,7 +76,9 @@ workflows: - {jobs: ['test_lid0'], project: 'cub', std: 'all', cxx: ['gcc13', 'clang18', 'msvc2022'], gpu: 'v100', sm: 'gpu'} - {jobs: ['test_nolid', 'test_lid1', 'test_lid2'], project: 'cub', std: 'all', cxx: ['gcc13', 'clang18', 'msvc2022'], gpu: 't4', sm: 'gpu'} - {jobs: ['test'], project: 'thrust', std: 'all', cxx: ['gcc13', 'clang18', 'msvc2022'], gpu: 't4', sm: 'gpu'} - - {jobs: ['test'], project: 'libcudacxx', std: 'all', cxx: ['gcc13', 'clang18', 'msvc2022'], gpu: 'rtx2080', sm: 'gpu'} + # Switching to t4 temporarily while investigating a bug in heterogeneous/barrier_abi_v2.pass.cpp + # - {jobs: ['test'], project: 'libcudacxx', std: 'all', cxx: ['gcc13', 'clang18', 'msvc2022'], gpu: 'rtx2080', sm: 'gpu'} + - {jobs: ['test'], project: 'libcudacxx', std: 'all', cxx: ['gcc13', 'clang18', 'msvc2022'], gpu: 't4', sm: 'gpu'} # Modded builds: - {jobs: ['build'], std: 'all', ctk: '12.5', cxx: 'nvhpc'} - {jobs: ['build'], std: 'all', cxx: ['gcc', 'clang'], cpu: 'arm64'} From bf68a88934338c6c94985d2baa87a64e1f4be0c3 Mon Sep 17 00:00:00 2001 From: Allison Piper Date: Wed, 29 Jan 2025 10:58:31 -0500 Subject: [PATCH 5/9] libcudacxx test fail on t4, too. Moving back to v100. --- ci/matrix.yaml | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/ci/matrix.yaml b/ci/matrix.yaml index 1b8d206dcf7..19a9dcfb27c 100644 --- a/ci/matrix.yaml +++ b/ci/matrix.yaml @@ -20,9 +20,9 @@ workflows: - {jobs: ['build'], std: 'all', cxx: ['gcc', 'clang', 'msvc']} # Current CTK testing: - {jobs: ['test'], project: ['thrust'], std: 'max', cxx: ['gcc', 'clang'], gpu: 't4', sm: 'gpu'} - # Switching to t4 temporarily while investigating a bug in heterogeneous/barrier_abi_v2.pass.cpp + # Switching to v100 temporarily while investigating a timeout in heterogeneous/barrier*.pass.cpp # - {jobs: ['test'], project: ['libcudacxx'], std: 'max', cxx: ['gcc', 'clang'], gpu: 'rtx2080', sm: 'gpu'} - - {jobs: ['test'], project: ['libcudacxx'], std: 'max', cxx: ['gcc', 'clang'], gpu: 't4', sm: 'gpu'} + - {jobs: ['test'], project: ['libcudacxx'], std: 'max', cxx: ['gcc', 'clang'], gpu: 'v100', sm: 'gpu'} # Disabled until we figure out the issue with the TBB dll #- {jobs: ['test'], project: ['libcudacxx', 'thrust'], std: 'max', cxx: ['msvc'], gpu: 't4', sm: 'gpu'} # Split up cub tests: @@ -76,9 +76,9 @@ workflows: - {jobs: ['test_lid0'], project: 'cub', std: 'all', cxx: ['gcc13', 'clang18', 'msvc2022'], gpu: 'v100', sm: 'gpu'} - {jobs: ['test_nolid', 'test_lid1', 'test_lid2'], project: 'cub', std: 'all', cxx: ['gcc13', 'clang18', 'msvc2022'], gpu: 't4', sm: 'gpu'} - {jobs: ['test'], project: 'thrust', std: 'all', cxx: ['gcc13', 'clang18', 'msvc2022'], gpu: 't4', sm: 'gpu'} - # Switching to t4 temporarily while investigating a bug in heterogeneous/barrier_abi_v2.pass.cpp + # Switching to v100 temporarily while investigating a timeout in heterogeneous/barrier*.pass.cpp # - {jobs: ['test'], project: 'libcudacxx', std: 'all', cxx: ['gcc13', 'clang18', 'msvc2022'], gpu: 'rtx2080', sm: 'gpu'} - - {jobs: ['test'], project: 'libcudacxx', std: 'all', cxx: ['gcc13', 'clang18', 'msvc2022'], gpu: 't4', sm: 'gpu'} + - {jobs: ['test'], project: 'libcudacxx', std: 'all', cxx: ['gcc13', 'clang18', 'msvc2022'], gpu: 'v100', sm: 'gpu'} # Modded builds: - {jobs: ['build'], std: 'all', ctk: '12.5', cxx: 'nvhpc'} - {jobs: ['build'], std: 'all', cxx: ['gcc', 'clang'], cpu: 'arm64'} From 2977b2e11cc2e552bcf3e9bea5a08ecd4b91e4ab Mon Sep 17 00:00:00 2001 From: Allison Piper Date: Wed, 29 Jan 2025 12:03:39 -0500 Subject: [PATCH 6/9] TEMP override matrix to experiment with libcu++ failures. --- ci/matrix.yaml | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/ci/matrix.yaml b/ci/matrix.yaml index 19a9dcfb27c..c4484d3d58a 100644 --- a/ci/matrix.yaml +++ b/ci/matrix.yaml @@ -8,6 +8,12 @@ workflows: # - {jobs: ['test'], project: 'thrust', std: 17, ctk: 'curr', cxx: ['gcc12', 'clang16']} # override: + - {jobs: ['test'], project: 'libcudacxx', std: 'all', cxx: ['gcc13', 'clang18'], gpu: 'rtx2080', sm: 'gpu'} + - {jobs: ['test'], project: 'libcudacxx', std: 'all', cxx: ['gcc13', 'clang18'], gpu: 't4', sm: 'gpu'} + - {jobs: ['test'], project: 'libcudacxx', std: 'all', cxx: ['gcc13', 'clang18'], gpu: 'v100', sm: 'gpu'} + - {jobs: ['test'], project: 'libcudacxx', std: 'all', cxx: ['gcc13', 'clang18'], gpu: 'rtx2080'} + - {jobs: ['test'], project: 'libcudacxx', std: 'all', cxx: ['gcc13', 'clang18'], gpu: 't4'} + - {jobs: ['test'], project: 'libcudacxx', std: 'all', cxx: ['gcc13', 'clang18'], gpu: 'v100'} pull_request: # Old CTK/compiler From 9f70b22fb4c9dde3cbfee5a8ffd8de07909f79c4 Mon Sep 17 00:00:00 2001 From: Allison Piper Date: Wed, 29 Jan 2025 12:54:02 -0500 Subject: [PATCH 7/9] Remove `sm: 'gpu'` from most CI jobs. This will allow us to reuse more build artifacts, and works around some issues with libcudacxx (#3590). --- ci/matrix.yaml | 49 +++++++++++++++++++++---------------------------- 1 file changed, 21 insertions(+), 28 deletions(-) diff --git a/ci/matrix.yaml b/ci/matrix.yaml index c4484d3d58a..800f1b900ec 100644 --- a/ci/matrix.yaml +++ b/ci/matrix.yaml @@ -8,12 +8,6 @@ workflows: # - {jobs: ['test'], project: 'thrust', std: 17, ctk: 'curr', cxx: ['gcc12', 'clang16']} # override: - - {jobs: ['test'], project: 'libcudacxx', std: 'all', cxx: ['gcc13', 'clang18'], gpu: 'rtx2080', sm: 'gpu'} - - {jobs: ['test'], project: 'libcudacxx', std: 'all', cxx: ['gcc13', 'clang18'], gpu: 't4', sm: 'gpu'} - - {jobs: ['test'], project: 'libcudacxx', std: 'all', cxx: ['gcc13', 'clang18'], gpu: 'v100', sm: 'gpu'} - - {jobs: ['test'], project: 'libcudacxx', std: 'all', cxx: ['gcc13', 'clang18'], gpu: 'rtx2080'} - - {jobs: ['test'], project: 'libcudacxx', std: 'all', cxx: ['gcc13', 'clang18'], gpu: 't4'} - - {jobs: ['test'], project: 'libcudacxx', std: 'all', cxx: ['gcc13', 'clang18'], gpu: 'v100'} pull_request: # Old CTK/compiler @@ -25,23 +19,21 @@ workflows: - {jobs: ['build'], std: 'max', cxx: ['msvc2019']} - {jobs: ['build'], std: 'all', cxx: ['gcc', 'clang', 'msvc']} # Current CTK testing: - - {jobs: ['test'], project: ['thrust'], std: 'max', cxx: ['gcc', 'clang'], gpu: 't4', sm: 'gpu'} - # Switching to v100 temporarily while investigating a timeout in heterogeneous/barrier*.pass.cpp - # - {jobs: ['test'], project: ['libcudacxx'], std: 'max', cxx: ['gcc', 'clang'], gpu: 'rtx2080', sm: 'gpu'} - - {jobs: ['test'], project: ['libcudacxx'], std: 'max', cxx: ['gcc', 'clang'], gpu: 'v100', sm: 'gpu'} + - {jobs: ['test'], project: ['thrust'], std: 'max', cxx: ['gcc', 'clang'], gpu: 't4'} + - {jobs: ['test'], project: ['libcudacxx'], std: 'max', cxx: ['gcc', 'clang'], gpu: 'rtx2080'} # Disabled until we figure out the issue with the TBB dll #- {jobs: ['test'], project: ['libcudacxx', 'thrust'], std: 'max', cxx: ['msvc'], gpu: 't4', sm: 'gpu'} # Split up cub tests: - - {jobs: ['test_lid0'], project: ['cub'], std: 'max', cxx: ['gcc'], gpu: 'v100', sm: 'gpu'} - - {jobs: ['test_nolid', 'test_lid1', 'test_lid2'], project: ['cub'], std: 'max', cxx: ['gcc'], gpu: 't4', sm: 'gpu'} - - {jobs: ['test_nolid', 'test_lid0'], project: ['cub'], std: 'max', cxx: ['clang', 'msvc'], gpu: 't4', sm: 'gpu'} + - {jobs: ['test_lid0'], project: ['cub'], std: 'max', cxx: ['gcc'], gpu: 'v100'} + - {jobs: ['test_nolid', 'test_lid1', 'test_lid2'], project: ['cub'], std: 'max', cxx: ['gcc'], gpu: 't4'} + - {jobs: ['test_nolid', 'test_lid0'], project: ['cub'], std: 'max', cxx: ['clang', 'msvc'], gpu: 't4'} - {jobs: ['test_lid0'], project: ['cub'], std: 'max', cxx: 'gcc12', gpu: 'h100', sm: 'gpu' } # Modded builds: - {jobs: ['build'], std: 'all', ctk: '12.5', cxx: 'nvhpc'} - {jobs: ['build'], std: 'max', cxx: ['gcc', 'clang'], cpu: 'arm64'} - {jobs: ['build'], std: 'max', cxx: ['gcc'], sm: '90a'} # Test Thrust 32-bit-only dispatch here, since it's most likely to break. 64-bit-only is tested in nightly. - - {jobs: ['test_gpu'], project: 'thrust', cmake_options: '-DTHRUST_DISPATCH_TYPE=Force32bit', gpu: 't4', sm: 'gpu'} + - {jobs: ['test_gpu'], project: 'thrust', cmake_options: '-DTHRUST_DISPATCH_TYPE=Force32bit', gpu: 't4'} # default_projects: clang-cuda - {jobs: ['build'], std: 'all', cudacxx: 'clang', cxx: 'clang'} - {jobs: ['build'], project: 'libcudacxx', std: 'max', cudacxx: 'clang', cxx: 'clang', sm: '90'} @@ -59,18 +51,18 @@ workflows: - {jobs: ['build'], project: 'cudax', ctk: ['curr'], std: 17, cxx: ['gcc'], sm: "90"} - {jobs: ['build'], project: 'cudax', ctk: ['curr'], std: 20, cxx: ['gcc'], sm: "90a"} - {jobs: ['build'], project: 'cudax', ctk: ['curr'], std: 'all', cxx: ['gcc', 'clang'], cpu: 'arm64'} - - {jobs: ['test'], project: 'cudax', ctk: ['curr'], std: 20, cxx: ['gcc12', 'clang', 'msvc'], gpu: 'rtx2080', sm: 'gpu'} + - {jobs: ['test'], project: 'cudax', ctk: ['curr'], std: 20, cxx: ['gcc12', 'clang', 'msvc'], gpu: 'rtx2080'} # Python and c/parallel jobs: - - {jobs: ['test'], project: ['cccl_c_parallel', 'python'], ctk: '12.6', gpu: 'rtx2080', sm: 'gpu'} + - {jobs: ['test'], project: ['cccl_c_parallel', 'python'], ctk: '12.6', gpu: 'rtx2080'} # cccl-infra: - - {jobs: ['infra'], project: 'cccl', ctk: '12.0', cxx: ['gcc12', 'clang14'], gpu: 'rtx2080', sm: 'gpu'} - - {jobs: ['infra'], project: 'cccl', ctk: 'curr', cxx: ['gcc', 'clang'], gpu: 'rtx2080', sm: 'gpu'} + - {jobs: ['infra'], project: 'cccl', ctk: '12.0', cxx: ['gcc12', 'clang14'], gpu: 'rtx2080'} + - {jobs: ['infra'], project: 'cccl', ctk: 'curr', cxx: ['gcc', 'clang'], gpu: 'rtx2080'} nightly: # Edge-case jobs - - {jobs: ['limited'], project: 'cub', std: 17, gpu: 'rtx2080', sm: 'gpu'} - - {jobs: ['test_gpu'], project: 'thrust', cmake_options: '-DTHRUST_DISPATCH_TYPE=Force32bit', gpu: 't4', sm: 'gpu'} - - {jobs: ['test_gpu'], project: 'thrust', cmake_options: '-DTHRUST_DISPATCH_TYPE=Force64bit', gpu: 't4', sm: 'gpu'} + - {jobs: ['limited'], project: 'cub', std: 17, gpu: 'rtx2080'} + - {jobs: ['test_gpu'], project: 'thrust', cmake_options: '-DTHRUST_DISPATCH_TYPE=Force32bit', gpu: 't4'} + - {jobs: ['test_gpu'], project: 'thrust', cmake_options: '-DTHRUST_DISPATCH_TYPE=Force64bit', gpu: 't4'} # Old CTK/compiler - {jobs: ['build'], std: 'all', ctk: '12.0', cxx: ['gcc7', 'gcc8', 'gcc9', 'clang14', 'msvc2019']} - {jobs: ['build'], std: 'all', ctk: '12.0', cxx: ['gcc11'], sm: '60;70;80;90'} @@ -79,12 +71,13 @@ workflows: - {jobs: ['build'], std: 'all', cxx: ['clang14', 'clang15', 'clang16', 'clang17']} - {jobs: ['build'], std: 'all', cxx: ['msvc2019']} # Test current CTK - - {jobs: ['test_lid0'], project: 'cub', std: 'all', cxx: ['gcc13', 'clang18', 'msvc2022'], gpu: 'v100', sm: 'gpu'} - - {jobs: ['test_nolid', 'test_lid1', 'test_lid2'], project: 'cub', std: 'all', cxx: ['gcc13', 'clang18', 'msvc2022'], gpu: 't4', sm: 'gpu'} - - {jobs: ['test'], project: 'thrust', std: 'all', cxx: ['gcc13', 'clang18', 'msvc2022'], gpu: 't4', sm: 'gpu'} + - {jobs: ['test_lid0'], project: 'cub', std: 'all', cxx: ['gcc13', 'clang18', 'msvc2022'], gpu: 'v100'} + - {jobs: ['test_nolid', 'test_lid1', 'test_lid2'], project: 'cub', std: 'all', cxx: ['gcc13', 'clang18', 'msvc2022'], gpu: 't4'} + - {jobs: ['test_lid0'], project: 'cub', std: 'max', cxx: 'gcc12', gpu: 'h100', sm: 'gpu' } + - {jobs: ['test'], project: 'thrust', std: 'all', cxx: ['gcc13', 'clang18', 'msvc2022'], gpu: 't4'} # Switching to v100 temporarily while investigating a timeout in heterogeneous/barrier*.pass.cpp # - {jobs: ['test'], project: 'libcudacxx', std: 'all', cxx: ['gcc13', 'clang18', 'msvc2022'], gpu: 'rtx2080', sm: 'gpu'} - - {jobs: ['test'], project: 'libcudacxx', std: 'all', cxx: ['gcc13', 'clang18', 'msvc2022'], gpu: 'v100', sm: 'gpu'} + - {jobs: ['test'], project: 'libcudacxx', std: 'all', cxx: ['gcc13', 'clang18', 'msvc2022'], gpu: 'v100'} # Modded builds: - {jobs: ['build'], std: 'all', ctk: '12.5', cxx: 'nvhpc'} - {jobs: ['build'], std: 'all', cxx: ['gcc', 'clang'], cpu: 'arm64'} @@ -102,9 +95,9 @@ workflows: - {jobs: ['build'], project: 'cudax', ctk: ['12.0' ], std: 'all', cxx: ['gcc12'], sm: "90"} - {jobs: ['build'], project: 'cudax', ctk: [ 'curr'], std: 'all', cxx: ['gcc13'], sm: "90a"} - {jobs: ['build'], project: 'cudax', ctk: [ 'curr'], std: 'all', cxx: ['gcc13', 'clang16'], cpu: 'arm64'} - - {jobs: ['test'], project: 'cudax', ctk: ['12.0', 'curr'], std: 'all', cxx: ['gcc12'] , gpu: 'rtx2080', sm: 'gpu'} - - {jobs: ['test'], project: 'cudax', ctk: ['12.0' ], std: 'all', cxx: ['clang14'], gpu: 'rtx2080', sm: 'gpu'} - - {jobs: ['test'], project: 'cudax', ctk: [ 'curr'], std: 'all', cxx: ['clang18'], gpu: 'rtx2080', sm: 'gpu'} + - {jobs: ['test'], project: 'cudax', ctk: ['12.0', 'curr'], std: 'all', cxx: ['gcc12'] , gpu: 'rtx2080'} + - {jobs: ['test'], project: 'cudax', ctk: ['12.0' ], std: 'all', cxx: ['clang14'], gpu: 'rtx2080'} + - {jobs: ['test'], project: 'cudax', ctk: [ 'curr'], std: 'all', cxx: ['clang18'], gpu: 'rtx2080'} # Any generated jobs that match the entries in `exclude` will be removed from the final matrix for all workflows. exclude: From ee229964288cefaf5b8269cf08c0e1c97e2e07fd Mon Sep 17 00:00:00 2001 From: Allison Piper Date: Wed, 29 Jan 2025 14:32:26 -0500 Subject: [PATCH 8/9] Move t4 jobs to v100 while runners team investigates issues. --- ci/matrix.yaml | 37 +++++++++++++++++++++++++------------ 1 file changed, 25 insertions(+), 12 deletions(-) diff --git a/ci/matrix.yaml b/ci/matrix.yaml index 800f1b900ec..77bb9785649 100644 --- a/ci/matrix.yaml +++ b/ci/matrix.yaml @@ -19,21 +19,29 @@ workflows: - {jobs: ['build'], std: 'max', cxx: ['msvc2019']} - {jobs: ['build'], std: 'all', cxx: ['gcc', 'clang', 'msvc']} # Current CTK testing: - - {jobs: ['test'], project: ['thrust'], std: 'max', cxx: ['gcc', 'clang'], gpu: 't4'} + # Moving this job to v100 while runners team investigates issues on t4 pool. + # - {jobs: ['test'], project: ['thrust'], std: 'max', cxx: ['gcc', 'clang'], gpu: 't4'} + - {jobs: ['test'], project: ['thrust'], std: 'max', cxx: ['gcc', 'clang'], gpu: 'v100'} - {jobs: ['test'], project: ['libcudacxx'], std: 'max', cxx: ['gcc', 'clang'], gpu: 'rtx2080'} # Disabled until we figure out the issue with the TBB dll - #- {jobs: ['test'], project: ['libcudacxx', 'thrust'], std: 'max', cxx: ['msvc'], gpu: 't4', sm: 'gpu'} + #- {jobs: ['test'], project: ['thrust'], std: 'max', cxx: ['msvc'], gpu: 't4'} + - {jobs: ['test'], project: ['libcudacxx'], std: 'max', cxx: ['msvc'], gpu: 'rtx2080'} # Split up cub tests: - {jobs: ['test_lid0'], project: ['cub'], std: 'max', cxx: ['gcc'], gpu: 'v100'} - - {jobs: ['test_nolid', 'test_lid1', 'test_lid2'], project: ['cub'], std: 'max', cxx: ['gcc'], gpu: 't4'} - - {jobs: ['test_nolid', 'test_lid0'], project: ['cub'], std: 'max', cxx: ['clang', 'msvc'], gpu: 't4'} +# Moving these jobs to v100 while runners team investigates issues on t4 pool. +# - {jobs: ['test_nolid', 'test_lid1', 'test_lid2'], project: ['cub'], std: 'max', cxx: ['gcc'], gpu: 't4'} +# - {jobs: ['test_nolid', 'test_lid0'], project: ['cub'], std: 'max', cxx: ['clang', 'msvc'], gpu: 't4'} + - {jobs: ['test_nolid', 'test_lid1', 'test_lid2'], project: ['cub'], std: 'max', cxx: ['gcc'], gpu: 'v100'} + - {jobs: ['test_nolid', 'test_lid0'], project: ['cub'], std: 'max', cxx: ['clang', 'msvc'], gpu: 'v100'} - {jobs: ['test_lid0'], project: ['cub'], std: 'max', cxx: 'gcc12', gpu: 'h100', sm: 'gpu' } # Modded builds: - {jobs: ['build'], std: 'all', ctk: '12.5', cxx: 'nvhpc'} - {jobs: ['build'], std: 'max', cxx: ['gcc', 'clang'], cpu: 'arm64'} - {jobs: ['build'], std: 'max', cxx: ['gcc'], sm: '90a'} # Test Thrust 32-bit-only dispatch here, since it's most likely to break. 64-bit-only is tested in nightly. - - {jobs: ['test_gpu'], project: 'thrust', cmake_options: '-DTHRUST_DISPATCH_TYPE=Force32bit', gpu: 't4'} +# Moving this job to v100 while runners team investigates issues on t4 pool. +# - {jobs: ['test_gpu'], project: 'thrust', cmake_options: '-DTHRUST_DISPATCH_TYPE=Force32bit', gpu: 't4'} + - {jobs: ['test_gpu'], project: 'thrust', cmake_options: '-DTHRUST_DISPATCH_TYPE=Force32bit', gpu: 'v100'} # default_projects: clang-cuda - {jobs: ['build'], std: 'all', cudacxx: 'clang', cxx: 'clang'} - {jobs: ['build'], project: 'libcudacxx', std: 'max', cudacxx: 'clang', cxx: 'clang', sm: '90'} @@ -61,8 +69,11 @@ workflows: nightly: # Edge-case jobs - {jobs: ['limited'], project: 'cub', std: 17, gpu: 'rtx2080'} - - {jobs: ['test_gpu'], project: 'thrust', cmake_options: '-DTHRUST_DISPATCH_TYPE=Force32bit', gpu: 't4'} - - {jobs: ['test_gpu'], project: 'thrust', cmake_options: '-DTHRUST_DISPATCH_TYPE=Force64bit', gpu: 't4'} + # Moving these jobs to v100 while runners team investigates issues on t4 pool. + # - {jobs: ['test_gpu'], project: 'thrust', cmake_options: '-DTHRUST_DISPATCH_TYPE=Force32bit', gpu: 't4'} + # - {jobs: ['test_gpu'], project: 'thrust', cmake_options: '-DTHRUST_DISPATCH_TYPE=Force64bit', gpu: 't4'} + - {jobs: ['test_gpu'], project: 'thrust', cmake_options: '-DTHRUST_DISPATCH_TYPE=Force32bit', gpu: 'v100'} + - {jobs: ['test_gpu'], project: 'thrust', cmake_options: '-DTHRUST_DISPATCH_TYPE=Force64bit', gpu: 'v100'} # Old CTK/compiler - {jobs: ['build'], std: 'all', ctk: '12.0', cxx: ['gcc7', 'gcc8', 'gcc9', 'clang14', 'msvc2019']} - {jobs: ['build'], std: 'all', ctk: '12.0', cxx: ['gcc11'], sm: '60;70;80;90'} @@ -72,12 +83,14 @@ workflows: - {jobs: ['build'], std: 'all', cxx: ['msvc2019']} # Test current CTK - {jobs: ['test_lid0'], project: 'cub', std: 'all', cxx: ['gcc13', 'clang18', 'msvc2022'], gpu: 'v100'} - - {jobs: ['test_nolid', 'test_lid1', 'test_lid2'], project: 'cub', std: 'all', cxx: ['gcc13', 'clang18', 'msvc2022'], gpu: 't4'} +# Moving this job to v100 while runners team investigates issues on t4 pool. +# - {jobs: ['test_nolid', 'test_lid1', 'test_lid2'], project: 'cub', std: 'all', cxx: ['gcc13', 'clang18', 'msvc2022'], gpu: 't4'} + - {jobs: ['test_nolid', 'test_lid1', 'test_lid2'], project: 'cub', std: 'all', cxx: ['gcc13', 'clang18', 'msvc2022'], gpu: 'v100'} - {jobs: ['test_lid0'], project: 'cub', std: 'max', cxx: 'gcc12', gpu: 'h100', sm: 'gpu' } - - {jobs: ['test'], project: 'thrust', std: 'all', cxx: ['gcc13', 'clang18', 'msvc2022'], gpu: 't4'} - # Switching to v100 temporarily while investigating a timeout in heterogeneous/barrier*.pass.cpp - # - {jobs: ['test'], project: 'libcudacxx', std: 'all', cxx: ['gcc13', 'clang18', 'msvc2022'], gpu: 'rtx2080', sm: 'gpu'} - - {jobs: ['test'], project: 'libcudacxx', std: 'all', cxx: ['gcc13', 'clang18', 'msvc2022'], gpu: 'v100'} +# Moving this job to v100 while runners team investigates issues on t4 pool. +# - {jobs: ['test'], project: 'thrust', std: 'all', cxx: ['gcc13', 'clang18', 'msvc2022'], gpu: 't4'} + - {jobs: ['test'], project: 'thrust', std: 'all', cxx: ['gcc13', 'clang18', 'msvc2022'], gpu: 'v100'} + - {jobs: ['test'], project: 'libcudacxx', std: 'all', cxx: ['gcc13', 'clang18', 'msvc2022'], gpu: 'rtx2080'} # Modded builds: - {jobs: ['build'], std: 'all', ctk: '12.5', cxx: 'nvhpc'} - {jobs: ['build'], std: 'all', cxx: ['gcc', 'clang'], cpu: 'arm64'} From ea4d40e93a8c5904ac3f5b433182c9386c15a3b5 Mon Sep 17 00:00:00 2001 From: Allison Piper Date: Wed, 29 Jan 2025 15:11:40 -0500 Subject: [PATCH 9/9] Switch to all rtx queues: CUB -> RTXA6000 (48GiB) Thrust -> RTX4090 (24GiB) Others -> RTX2080 (8GiB) --- ci/matrix.yaml | 42 ++++++++++++++---------------------------- 1 file changed, 14 insertions(+), 28 deletions(-) diff --git a/ci/matrix.yaml b/ci/matrix.yaml index 77bb9785649..5ec715fb59b 100644 --- a/ci/matrix.yaml +++ b/ci/matrix.yaml @@ -19,29 +19,22 @@ workflows: - {jobs: ['build'], std: 'max', cxx: ['msvc2019']} - {jobs: ['build'], std: 'all', cxx: ['gcc', 'clang', 'msvc']} # Current CTK testing: - # Moving this job to v100 while runners team investigates issues on t4 pool. - # - {jobs: ['test'], project: ['thrust'], std: 'max', cxx: ['gcc', 'clang'], gpu: 't4'} - - {jobs: ['test'], project: ['thrust'], std: 'max', cxx: ['gcc', 'clang'], gpu: 'v100'} + - {jobs: ['test'], project: ['thrust'], std: 'max', cxx: ['gcc', 'clang'], gpu: 'rtx4090'} - {jobs: ['test'], project: ['libcudacxx'], std: 'max', cxx: ['gcc', 'clang'], gpu: 'rtx2080'} # Disabled until we figure out the issue with the TBB dll - #- {jobs: ['test'], project: ['thrust'], std: 'max', cxx: ['msvc'], gpu: 't4'} + #- {jobs: ['test'], project: ['thrust'], std: 'max', cxx: ['msvc'], gpu: 'rtx4090'} - {jobs: ['test'], project: ['libcudacxx'], std: 'max', cxx: ['msvc'], gpu: 'rtx2080'} # Split up cub tests: - - {jobs: ['test_lid0'], project: ['cub'], std: 'max', cxx: ['gcc'], gpu: 'v100'} -# Moving these jobs to v100 while runners team investigates issues on t4 pool. -# - {jobs: ['test_nolid', 'test_lid1', 'test_lid2'], project: ['cub'], std: 'max', cxx: ['gcc'], gpu: 't4'} -# - {jobs: ['test_nolid', 'test_lid0'], project: ['cub'], std: 'max', cxx: ['clang', 'msvc'], gpu: 't4'} - - {jobs: ['test_nolid', 'test_lid1', 'test_lid2'], project: ['cub'], std: 'max', cxx: ['gcc'], gpu: 'v100'} - - {jobs: ['test_nolid', 'test_lid0'], project: ['cub'], std: 'max', cxx: ['clang', 'msvc'], gpu: 'v100'} - - {jobs: ['test_lid0'], project: ['cub'], std: 'max', cxx: 'gcc12', gpu: 'h100', sm: 'gpu' } + - {jobs: ['test_nolid', 'test_lid0'], project: ['cub'], std: 'max', cxx: ['gcc'], gpu: 'rtxa6000'} + - {jobs: ['test_lid1', 'test_lid2'], project: ['cub'], std: 'max', cxx: ['gcc'], gpu: 'rtxa6000'} + - {jobs: ['test_nolid', 'test_lid0'], project: ['cub'], std: 'max', cxx: ['clang', 'msvc'], gpu: 'rtxa6000'} + - {jobs: ['test_lid0'], project: ['cub'], std: 'max', cxx: 'gcc12', gpu: 'h100', sm: 'gpu' } # Modded builds: - {jobs: ['build'], std: 'all', ctk: '12.5', cxx: 'nvhpc'} - {jobs: ['build'], std: 'max', cxx: ['gcc', 'clang'], cpu: 'arm64'} - {jobs: ['build'], std: 'max', cxx: ['gcc'], sm: '90a'} # Test Thrust 32-bit-only dispatch here, since it's most likely to break. 64-bit-only is tested in nightly. -# Moving this job to v100 while runners team investigates issues on t4 pool. -# - {jobs: ['test_gpu'], project: 'thrust', cmake_options: '-DTHRUST_DISPATCH_TYPE=Force32bit', gpu: 't4'} - - {jobs: ['test_gpu'], project: 'thrust', cmake_options: '-DTHRUST_DISPATCH_TYPE=Force32bit', gpu: 'v100'} + - {jobs: ['test_gpu'], project: 'thrust', cmake_options: '-DTHRUST_DISPATCH_TYPE=Force32bit', gpu: 'rtx4090'} # default_projects: clang-cuda - {jobs: ['build'], std: 'all', cudacxx: 'clang', cxx: 'clang'} - {jobs: ['build'], project: 'libcudacxx', std: 'max', cudacxx: 'clang', cxx: 'clang', sm: '90'} @@ -69,11 +62,8 @@ workflows: nightly: # Edge-case jobs - {jobs: ['limited'], project: 'cub', std: 17, gpu: 'rtx2080'} - # Moving these jobs to v100 while runners team investigates issues on t4 pool. - # - {jobs: ['test_gpu'], project: 'thrust', cmake_options: '-DTHRUST_DISPATCH_TYPE=Force32bit', gpu: 't4'} - # - {jobs: ['test_gpu'], project: 'thrust', cmake_options: '-DTHRUST_DISPATCH_TYPE=Force64bit', gpu: 't4'} - - {jobs: ['test_gpu'], project: 'thrust', cmake_options: '-DTHRUST_DISPATCH_TYPE=Force32bit', gpu: 'v100'} - - {jobs: ['test_gpu'], project: 'thrust', cmake_options: '-DTHRUST_DISPATCH_TYPE=Force64bit', gpu: 'v100'} + - {jobs: ['test_gpu'], project: 'thrust', cmake_options: '-DTHRUST_DISPATCH_TYPE=Force32bit', gpu: 'rtx4090'} + - {jobs: ['test_gpu'], project: 'thrust', cmake_options: '-DTHRUST_DISPATCH_TYPE=Force64bit', gpu: 'rtx4090'} # Old CTK/compiler - {jobs: ['build'], std: 'all', ctk: '12.0', cxx: ['gcc7', 'gcc8', 'gcc9', 'clang14', 'msvc2019']} - {jobs: ['build'], std: 'all', ctk: '12.0', cxx: ['gcc11'], sm: '60;70;80;90'} @@ -82,15 +72,11 @@ workflows: - {jobs: ['build'], std: 'all', cxx: ['clang14', 'clang15', 'clang16', 'clang17']} - {jobs: ['build'], std: 'all', cxx: ['msvc2019']} # Test current CTK - - {jobs: ['test_lid0'], project: 'cub', std: 'all', cxx: ['gcc13', 'clang18', 'msvc2022'], gpu: 'v100'} -# Moving this job to v100 while runners team investigates issues on t4 pool. -# - {jobs: ['test_nolid', 'test_lid1', 'test_lid2'], project: 'cub', std: 'all', cxx: ['gcc13', 'clang18', 'msvc2022'], gpu: 't4'} - - {jobs: ['test_nolid', 'test_lid1', 'test_lid2'], project: 'cub', std: 'all', cxx: ['gcc13', 'clang18', 'msvc2022'], gpu: 'v100'} - - {jobs: ['test_lid0'], project: 'cub', std: 'max', cxx: 'gcc12', gpu: 'h100', sm: 'gpu' } -# Moving this job to v100 while runners team investigates issues on t4 pool. -# - {jobs: ['test'], project: 'thrust', std: 'all', cxx: ['gcc13', 'clang18', 'msvc2022'], gpu: 't4'} - - {jobs: ['test'], project: 'thrust', std: 'all', cxx: ['gcc13', 'clang18', 'msvc2022'], gpu: 'v100'} - - {jobs: ['test'], project: 'libcudacxx', std: 'all', cxx: ['gcc13', 'clang18', 'msvc2022'], gpu: 'rtx2080'} + - {jobs: ['test'], project: 'cub', std: 'all', cxx: ['gcc', 'clang', 'msvc'], gpu: 'rtxa6000'} + - {jobs: ['test_lid0'], project: 'cub', std: 'max', cxx: 'gcc', gpu: 'v100'} + - {jobs: ['test_lid0'], project: 'cub', std: 'max', cxx: 'gcc', gpu: 'h100', sm: 'gpu' } + - {jobs: ['test'], project: 'thrust', std: 'all', cxx: ['gcc', 'clang', 'msvc'], gpu: 'rtx4090'} + - {jobs: ['test'], project: 'libcudacxx', std: 'all', cxx: ['gcc', 'clang', 'msvc'], gpu: 'rtx2080'} # Modded builds: - {jobs: ['build'], std: 'all', ctk: '12.5', cxx: 'nvhpc'} - {jobs: ['build'], std: 'all', cxx: ['gcc', 'clang'], cpu: 'arm64'}