Skip to content

Commit

Permalink
Merge branch 'branch/2.8.x' into backport_depr_macros
Browse files Browse the repository at this point in the history
  • Loading branch information
bernhardmgruber authored Jan 30, 2025
2 parents 49a0ab1 + 3f8c8d5 commit 5196644
Show file tree
Hide file tree
Showing 79 changed files with 1,650 additions and 746 deletions.
2 changes: 1 addition & 1 deletion .github/actions/docs-build/action.yml
Original file line number Diff line number Diff line change
Expand Up @@ -54,4 +54,4 @@ runs:
# Upload docs as pages artifacts
- name: Upload artifact
if: ${{ inputs.upload_pages_artifact == 'true' }}
uses: actions/upload-pages-artifact@v2
uses: actions/upload-pages-artifact@v3
11 changes: 11 additions & 0 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,17 @@ repos:
hooks:
- id: ruff # linter
- id: ruff-format # formatter

# TOML lint & format
- repo: https://github.com/ComPWA/taplo-pre-commit
rev: v0.9.3
hooks:
# See https://github.com/NVIDIA/cccl/issues/3426
# - id: taplo-lint
# exclude: "^docs/"
- id: taplo-format
exclude: "^docs/"

- repo: https://github.com/codespell-project/codespell
rev: v2.3.0
hooks:
Expand Down
78 changes: 34 additions & 44 deletions ci/matrix.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -19,25 +19,28 @@ workflows:
- {jobs: ['build'], std: 'max', cxx: ['intel', 'msvc2019']}
- {jobs: ['build'], std: [17, 20], cxx: ['gcc', 'clang', 'msvc']}
# Current CTK testing:
- {jobs: ['test'], project: ['libcudacxx', 'thrust'], std: 'max', cxx: ['gcc']}
- {jobs: ['test'], project: ['libcudacxx', 'thrust'], std: 'max', cxx: ['clang', 'msvc']}
- {jobs: ['test'], project: ['thrust'], std: 'max', cxx: ['gcc', 'clang'], gpu: 'rtx4090'}
- {jobs: ['test'], project: ['libcudacxx'], std: 'max', cxx: ['gcc', 'clang'], gpu: 'rtx2080'}
# Disabled until we figure out the issue with the TBB dll
#- {jobs: ['test'], project: ['thrust'], std: 'max', cxx: ['msvc'], gpu: 'rtx4090'}
- {jobs: ['test'], project: ['libcudacxx'], std: 'max', cxx: ['msvc'], gpu: 'rtx2080'}
# Split up cub tests:
- {jobs: ['test_nolid', 'test_lid0'], project: ['cub'], std: 'max', cxx: ['gcc']}
- {jobs: ['test_lid1', 'test_lid2'], project: ['cub'], std: 'max', cxx: ['gcc']}
- {jobs: ['test_nolid', 'test_lid0'], project: ['cub'], std: 'max', cxx: ['clang', 'msvc']}
- {jobs: ['test_lid0'], project: ['cub'], std: 'max', cxx: 'gcc12', gpu: 'h100', sm: 'gpu' }
- {jobs: ['test_nolid', 'test_lid0'], project: ['cub'], std: 'max', cxx: ['gcc'], gpu: 'rtxa6000'}
- {jobs: ['test_lid1', 'test_lid2'], project: ['cub'], std: 'max', cxx: ['gcc'], gpu: 'rtxa6000'}
- {jobs: ['test_nolid', 'test_lid0'], project: ['cub'], std: 'max', cxx: ['clang', 'msvc'], gpu: 'rtxa6000'}
- {jobs: ['test_lid0'], project: ['cub'], std: 'max', cxx: 'gcc12', gpu: 'h100', sm: 'gpu' }
# Modded builds:
- {jobs: ['build'], std: [17, 20], ctk: '12.5', cxx: 'nvhpc'}
- {jobs: ['build'], std: 'max', cxx: ['gcc', 'clang'], cpu: 'arm64'}
- {jobs: ['build'], std: 'max', cxx: ['gcc'], sm: '90a'}
# Test Thrust 32-bit-only dispatch here, since it's most likely to break. 64-bit-only is tested in nightly.
- {jobs: ['test_gpu'], project: 'thrust', cmake_options: '-DTHRUST_DISPATCH_TYPE=Force32bit'}
- {jobs: ['test_gpu'], project: 'thrust', cmake_options: '-DTHRUST_DISPATCH_TYPE=Force32bit', gpu: 'rtx4090'}
# default_projects: clang-cuda
- {jobs: ['build'], std: [17, 20], cudacxx: 'clang', cxx: 'clang'}
- {jobs: ['build'], project: 'libcudacxx', std: 'max', cudacxx: 'clang', cxx: 'clang', sm: '90'}
- {jobs: ['build'], project: 'libcudacxx', std: 'max', cudacxx: 'clang', cxx: 'clang', sm: '90a'}
# nvrtc:
- {jobs: ['nvrtc'], project: 'libcudacxx', std: 'all'}
- {jobs: ['nvrtc'], project: 'libcudacxx', std: 'all', gpu: 'rtx2080', sm: 'gpu'}
# verify-codegen:
- {jobs: ['verify_codegen'], project: 'libcudacxx'}
# cudax has different CTK reqs:
Expand All @@ -51,19 +54,19 @@ workflows:
- {jobs: ['build'], project: 'cudax', ctk: ['curr'], std: 17, cxx: ['gcc'], sm: "90"}
- {jobs: ['build'], project: 'cudax', ctk: ['curr'], std: 20, cxx: ['gcc'], sm: "90a"}
- {jobs: ['build'], project: 'cudax', ctk: ['curr'], std: [17, 20], cxx: ['gcc', 'clang'], cpu: 'arm64'}
- {jobs: ['test'], project: 'cudax', ctk: ['curr'], std: 20, cxx: ['gcc12', 'clang', 'msvc']}
- {jobs: ['test'], project: 'cudax', ctk: ['curr'], std: 20, cxx: ['gcc12', 'clang', 'msvc'], gpu: 'rtx2080'}
# Python and c/parallel jobs:
- {jobs: ['test'], project: ['cccl_c_parallel', 'python'], ctk: '12.6'}
- {jobs: ['test'], project: ['cccl_c_parallel', 'python'], ctk: '12.6', gpu: 'rtx2080'}
# cccl-infra:
- {jobs: ['infra'], project: 'cccl', ctk: '11.1', cxx: ['gcc6', 'clang9']}
- {jobs: ['infra'], project: 'cccl', ctk: '12.0', cxx: ['gcc12', 'clang14']}
- {jobs: ['infra'], project: 'cccl', ctk: 'curr', cxx: ['gcc', 'clang']}
- {jobs: ['infra'], project: 'cccl', ctk: '11.1', cxx: ['gcc6', 'clang9'], gpu: 'rtx2080'}
- {jobs: ['infra'], project: 'cccl', ctk: '12.0', cxx: ['gcc12', 'clang14'], gpu: 'rtx2080'}
- {jobs: ['infra'], project: 'cccl', ctk: 'curr', cxx: ['gcc', 'clang'], gpu: 'rtx2080'}

nightly:
# Edge-case jobs
- {jobs: ['limited'], project: 'cub', std: 17}
- {jobs: ['test_gpu'], project: 'thrust', cmake_options: '-DTHRUST_DISPATCH_TYPE=Force32bit'}
- {jobs: ['test_gpu'], project: 'thrust', cmake_options: '-DTHRUST_DISPATCH_TYPE=Force64bit'}
- {jobs: ['limited'], project: 'cub', std: 17, gpu: 'rtx2080'}
- {jobs: ['test_gpu'], project: 'thrust', cmake_options: '-DTHRUST_DISPATCH_TYPE=Force32bit', gpu: 'rtx4090'}
- {jobs: ['test_gpu'], project: 'thrust', cmake_options: '-DTHRUST_DISPATCH_TYPE=Force64bit', gpu: 'rtx4090'}
# Old CTK
- {jobs: ['build'], std: 'all', ctk: '11.1', cxx: ['gcc6', 'gcc7', 'gcc8', 'gcc9', 'clang9', 'msvc2017']}
- {jobs: ['build'], std: 'all', ctk: '11.8', cxx: ['gcc11'], sm: '60;70;80;90'}
Expand All @@ -72,7 +75,11 @@ workflows:
- {jobs: ['build'], std: 'all', cxx: ['clang9', 'clang10', 'clang11', 'clang12', 'clang13', 'clang14', 'clang15', 'clang16', 'clang17']}
- {jobs: ['build'], std: 'all', cxx: ['intel', 'msvc2019']}
# Test current CTK
- {jobs: ['test'], std: 'all', cxx: ['gcc13', 'clang18', 'msvc2022']}
- {jobs: ['test'], project: 'cub', std: 'all', cxx: ['gcc', 'clang', 'msvc'], gpu: 'rtxa6000'}
- {jobs: ['test_lid0'], project: 'cub', std: 'max', cxx: 'gcc', gpu: 'v100'}
- {jobs: ['test_lid0'], project: 'cub', std: 'max', cxx: 'gcc', gpu: 'h100', sm: 'gpu' }
- {jobs: ['test'], project: 'thrust', std: 'all', cxx: ['gcc', 'clang', 'msvc'], gpu: 'rtx4090'}
- {jobs: ['test'], project: 'libcudacxx', std: 'all', cxx: ['gcc', 'clang', 'msvc'], gpu: 'rtx2080'}
# Modded builds:
- {jobs: ['build'], std: 'all', ctk: '12.5', cxx: 'nvhpc'}
- {jobs: ['build'], std: 'all', cxx: ['gcc', 'clang'], cpu: 'arm64'}
Expand All @@ -91,26 +98,9 @@ workflows:
- {jobs: ['build'], project: 'cudax', ctk: ['12.0' ], std: 'all', cxx: ['gcc12'], sm: "90"}
- {jobs: ['build'], project: 'cudax', ctk: [ 'curr'], std: 'all', cxx: ['gcc13'], sm: "90a"}
- {jobs: ['build'], project: 'cudax', ctk: [ 'curr'], std: 'all', cxx: ['gcc13', 'clang16'], cpu: 'arm64'}
- {jobs: ['test'], project: 'cudax', ctk: ['12.0', 'curr'], std: 'all', cxx: ['gcc12']}
- {jobs: ['test'], project: 'cudax', ctk: ['12.0' ], std: 'all', cxx: ['clang14']}
- {jobs: ['test'], project: 'cudax', ctk: [ 'curr'], std: 'all', cxx: ['clang18']}

# # These are waiting on the NVKS nodes:
# - {jobs: ['test'], ctk: '11.1', gpu: 'v100', sm: 'gpu', cxx: 'gcc6', std: [11]}
# - {jobs: ['test'], ctk: '11.1', gpu: 't4', sm: 'gpu', cxx: 'clang9', std: [17]}
# - {jobs: ['test'], ctk: '11.8', gpu: 'rtx2080', sm: 'gpu', cxx: 'gcc11', std: [17]}
# - {jobs: ['test'], ctk: 'curr', gpu: 'rtxa6000', sm: 'gpu', cxx: 'gcc7', std: [14]}
# - {jobs: ['test'], ctk: 'curr', gpu: 'l4', sm: 'gpu', cxx: 'gcc13', std: 'all'}
# - {jobs: ['test'], ctk: 'curr', gpu: 'rtx4090', sm: 'gpu', cxx: 'clang9', std: [11]}
# # H100 runners are currently flakey, only build since those use CPU-only runners:
# - {jobs: ['build'], ctk: 'curr', gpu: 'h100', sm: 'gpu', cxx: 'gcc12', std: [11, 20]}
# - {jobs: ['build'], ctk: 'curr', gpu: 'h100', sm: 'gpu', cxx: 'clang18', std: [17]}
#
# # nvrtc:
# - {jobs: ['nvrtc'], ctk: 'curr', gpu: 't4', sm: 'gpu', cxx: 'gcc13', std: [20], project: ['libcudacxx']}
# - {jobs: ['nvrtc'], ctk: 'curr', gpu: 'rtxa6000', sm: 'gpu', cxx: 'gcc13', std: [20], project: ['libcudacxx']}
# - {jobs: ['nvrtc'], ctk: 'curr', gpu: 'l4', sm: 'gpu', cxx: 'gcc13', std: 'all', project: ['libcudacxx']}
# - {jobs: ['nvrtc'], ctk: 'curr', gpu: 'h100', sm: 'gpu', cxx: 'gcc13', std: [11, 20], project: ['libcudacxx']}
- {jobs: ['test'], project: 'cudax', ctk: ['12.0', 'curr'], std: 'all', cxx: ['gcc12'] , gpu: 'rtx2080'}
- {jobs: ['test'], project: 'cudax', ctk: ['12.0' ], std: 'all', cxx: ['clang14'], gpu: 'rtx2080'}
- {jobs: ['test'], project: 'cudax', ctk: [ 'curr'], std: 'all', cxx: ['clang18'], gpu: 'rtx2080'}

# Any generated jobs that match the entries in `exclude` will be removed from the final matrix for all workflows.
exclude:
Expand Down Expand Up @@ -278,13 +268,13 @@ projects:

# testing -> Runner with GPU is in a nv-gh-runners testing pool
gpus:
v100: { sm: 70 } # 32 GB, 40 runners
t4: { sm: 75, testing: true } # 16 GB, 8 runners
rtx2080: { sm: 75, testing: true } # 8 GB, 8 runners
rtxa6000: { sm: 86, testing: true } # 48 GB, 12 runners
l4: { sm: 89, testing: true } # 24 GB, 48 runners
rtx4090: { sm: 89, testing: true } # 24 GB, 10 runners
h100: { sm: 90, testing: true } # 80 GB, 16 runners
v100: { sm: 70 } # 32 GB, 40 runners
t4: { sm: 75 } # 16 GB, 10 runners
rtx2080: { sm: 75 } # 8 GB, 12 runners
rtxa6000: { sm: 86 } # 48 GB, 12 runners
l4: { sm: 89 } # 24 GB, 48 runners
rtx4090: { sm: 89 } # 24 GB, 10 runners
h100: { sm: 90 } # 80 GB, 16 runners

# Tags are used to define a `matrix job` in the workflow section.
#
Expand Down
33 changes: 18 additions & 15 deletions ci/test_python.sh
Original file line number Diff line number Diff line change
Expand Up @@ -8,25 +8,28 @@ print_environment_details

fail_if_no_gpu

readonly prefix="${BUILD_DIR}/python/"
export PYTHONPATH="${prefix}:${PYTHONPATH:-}"
begin_group "⚙️ Existing site-packages"
pip freeze
end_group "⚙️ Existing site-packages"

pushd ../python/cuda_cooperative >/dev/null
for module in cuda_parallel cuda_cooperative; do

run_command "⚙️ Pip install cuda_cooperative" pip install --force-reinstall --upgrade --target "${prefix}" .[test]
run_command "🚀 Pytest cuda_cooperative" python -m pytest -v ./tests
pushd "../python/${module}" >/dev/null

popd >/dev/null
TEMP_VENV_DIR="/tmp/${module}_venv"
rm -rf "${TEMP_VENV_DIR}"
python -m venv "${TEMP_VENV_DIR}"
. "${TEMP_VENV_DIR}/bin/activate"
echo 'cuda-cccl @ file:///home/coder/cccl/python/cuda_cccl' > /tmp/cuda-cccl_constraints.txt
run_command "⚙️ Pip install ${module}" pip install -c /tmp/cuda-cccl_constraints.txt .[test]
begin_group "⚙️ ${module} site-packages"
pip freeze
end_group "⚙️ ${module} site-packages"
run_command "🚀 Pytest ${module}" python -m pytest -v ./tests
deactivate

pushd ../python/cuda_parallel >/dev/null
popd >/dev/null

# Temporarily install the package twice to populate include directory as part of the first installation
# and to let manifest discover these includes during the second installation. Do not forget to remove the
# second installation after https://github.com/NVIDIA/cccl/issues/2281 is addressed.
run_command "⚙️ Pip install cuda_parallel once" pip install --force-reinstall --upgrade --target "${prefix}" .[test]
run_command "⚙️ Pip install cuda_parallel twice" pip install --force-reinstall --upgrade --target "${prefix}" .[test]
run_command "🚀 Pytest cuda_parallel" python -m pytest -v ./tests

popd >/dev/null
done

print_time_summary
2 changes: 2 additions & 0 deletions ci/update_version.sh
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@ CUB_CMAKE_VERSION_FILE="lib/cmake/cub/cub-config-version.cmake"
LIBCUDACXX_CMAKE_VERSION_FILE="lib/cmake/libcudacxx/libcudacxx-config-version.cmake"
THRUST_CMAKE_VERSION_FILE="lib/cmake/thrust/thrust-config-version.cmake"
CUDAX_CMAKE_VERSION_FILE="lib/cmake/cudax/cudax-config-version.cmake"
CUDA_CCCL_VERSION_FILE="python/cuda_cccl/cuda/cccl/_version.py"
CUDA_COOPERATIVE_VERSION_FILE="python/cuda_cooperative/cuda/cooperative/_version.py"
CUDA_PARALLEL_VERSION_FILE="python/cuda_parallel/cuda/parallel/_version.py"

Expand Down Expand Up @@ -110,6 +111,7 @@ update_file "$CUDAX_CMAKE_VERSION_FILE" "set(cudax_VERSION_MAJOR \([0-9]\+\))" "
update_file "$CUDAX_CMAKE_VERSION_FILE" "set(cudax_VERSION_MINOR \([0-9]\+\))" "set(cudax_VERSION_MINOR $minor)"
update_file "$CUDAX_CMAKE_VERSION_FILE" "set(cudax_VERSION_PATCH \([0-9]\+\))" "set(cudax_VERSION_PATCH $patch)"

update_file "$CUDA_CCCL_VERSION_FILE" "^__version__ = \"\([0-9.]\+\)\"" "__version__ = \"$major.$minor.$patch\""
update_file "$CUDA_COOPERATIVE_VERSION_FILE" "^__version__ = \"\([0-9.]\+\)\"" "__version__ = \"$pymajor.$pyminor.$major.$minor.$patch\""
update_file "$CUDA_PARALLEL_VERSION_FILE" "^__version__ = \"\([0-9.]\+\)\"" "__version__ = \"$pymajor.$pyminor.$major.$minor.$patch\""

Expand Down
15 changes: 13 additions & 2 deletions ci/windows/build_common.psm1
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,11 @@ Param(
[Alias("std")]
[ValidateNotNullOrEmpty()]
[ValidateSet(11, 14, 17, 20)]
[int]$CXX_STANDARD = 17
[int]$CXX_STANDARD = 17,
[Parameter(Mandatory = $false)]
[ValidateNotNullOrEmpty()]
[Alias("arch")]
[int]$CUDA_ARCH = 0
)

$ErrorActionPreference = "Stop"
Expand All @@ -20,6 +24,12 @@ if ($script:CL_VERSION_STRING -match "Version (\d+\.\d+)\.\d+") {
Write-Host "Detected cl.exe version: $CL_VERSION"
}

$script:GLOBAL_CMAKE_OPTIONS = ""
if ($CUDA_ARCH -ne 0) {
$script:GLOBAL_CMAKE_OPTIONS += "-DCMAKE_CUDA_ARCHITECTURES=$CUDA_ARCH"
}


if (-not $env:CCCL_BUILD_INFIX) {
$env:CCCL_BUILD_INFIX = ""
}
Expand Down Expand Up @@ -56,6 +66,7 @@ Write-Host "NVCC_VERSION=$NVCC_VERSION"
Write-Host "CMAKE_BUILD_PARALLEL_LEVEL=$env:CMAKE_BUILD_PARALLEL_LEVEL"
Write-Host "CTEST_PARALLEL_LEVEL=$env:CTEST_PARALLEL_LEVEL"
Write-Host "CCCL_BUILD_INFIX=$env:CCCL_BUILD_INFIX"
Write-Host "GLOBAL_CMAKE_OPTIONS=$script:GLOBAL_CMAKE_OPTIONS"
Write-Host "Current commit is:"
Write-Host "$(git log -1 --format=short)"
Write-Host "========================================"
Expand All @@ -82,7 +93,7 @@ function configure_preset {
pushd ".."

# Echo and execute command to stdout:
$configure_command = "cmake --preset $PRESET $CMAKE_OPTIONS --log-level VERBOSE"
$configure_command = "cmake --preset $PRESET $script:GLOBAL_CMAKE_OPTIONS $CMAKE_OPTIONS --log-level VERBOSE"
Write-Host $configure_command
Invoke-Expression $configure_command
$test_result = $LastExitCode
Expand Down
8 changes: 6 additions & 2 deletions ci/windows/build_cub.ps1
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,11 @@ Param(
[Alias("std")]
[ValidateNotNullOrEmpty()]
[ValidateSet(11, 14, 17, 20)]
[int]$CXX_STANDARD = 17
[int]$CXX_STANDARD = 17,
[Parameter(Mandatory = $false)]
[ValidateNotNullOrEmpty()]
[Alias("arch")]
[int]$CUDA_ARCH = 0
)

$ErrorActionPreference = "Stop"
Expand All @@ -14,7 +18,7 @@ If($CURRENT_PATH -ne "ci") {
pushd "$PSScriptRoot/.."
}

Import-Module $PSScriptRoot/build_common.psm1 -ArgumentList $CXX_STANDARD
Import-Module $PSScriptRoot/build_common.psm1 -ArgumentList $CXX_STANDARD, $CUDA_ARCH

$PRESET = "cub-cpp$CXX_STANDARD"
$CMAKE_OPTIONS = ""
Expand Down
8 changes: 6 additions & 2 deletions ci/windows/build_cudax.ps1
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,11 @@ Param(
[Alias("std")]
[ValidateNotNullOrEmpty()]
[ValidateSet(20)]
[int]$CXX_STANDARD = 20
[int]$CXX_STANDARD = 20,
[Parameter(Mandatory = $false)]
[ValidateNotNullOrEmpty()]
[Alias("arch")]
[int]$CUDA_ARCH = 0
)

$CURRENT_PATH = Split-Path $pwd -leaf
Expand All @@ -14,7 +18,7 @@ If($CURRENT_PATH -ne "ci") {
}

Remove-Module -Name build_common
Import-Module $PSScriptRoot/build_common.psm1 -ArgumentList $CXX_STANDARD
Import-Module $PSScriptRoot/build_common.psm1 -ArgumentList $CXX_STANDARD, $CUDA_ARCH

$PRESET = "cudax-cpp$CXX_STANDARD"
$CMAKE_OPTIONS = ""
Expand Down
8 changes: 6 additions & 2 deletions ci/windows/build_libcudacxx.ps1
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,11 @@ Param(
[Alias("std")]
[ValidateNotNullOrEmpty()]
[ValidateSet(11, 14, 17, 20)]
[int]$CXX_STANDARD = 17
[int]$CXX_STANDARD = 17,
[Parameter(Mandatory = $false)]
[ValidateNotNullOrEmpty()]
[Alias("arch")]
[int]$CUDA_ARCH = 0
)

$ErrorActionPreference = "Stop"
Expand All @@ -14,7 +18,7 @@ If($CURRENT_PATH -ne "ci") {
pushd "$PSScriptRoot/.."
}

Import-Module $PSScriptRoot/build_common.psm1 -ArgumentList $CXX_STANDARD, $GPU_ARCHS
Import-Module $PSScriptRoot/build_common.psm1 -ArgumentList $CXX_STANDARD, $CUDA_ARCH

$PRESET = "libcudacxx-cpp${CXX_STANDARD}"
$CMAKE_OPTIONS = ""
Expand Down
8 changes: 6 additions & 2 deletions ci/windows/build_thrust.ps1
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,11 @@ Param(
[Alias("std")]
[ValidateNotNullOrEmpty()]
[ValidateSet(11, 14, 17, 20)]
[int]$CXX_STANDARD = 17
[int]$CXX_STANDARD = 17,
[Parameter(Mandatory = $false)]
[ValidateNotNullOrEmpty()]
[Alias("arch")]
[int]$CUDA_ARCH = 0
)

$ErrorActionPreference = "Stop"
Expand All @@ -14,7 +18,7 @@ If($CURRENT_PATH -ne "ci") {
pushd "$PSScriptRoot/.."
}

Import-Module $PSScriptRoot/build_common.psm1 -ArgumentList $CXX_STANDARD
Import-Module $PSScriptRoot/build_common.psm1 -ArgumentList $CXX_STANDARD, $CUDA_ARCH

$PRESET = "thrust-cpp$CXX_STANDARD"
$CMAKE_OPTIONS = ""
Expand Down
8 changes: 6 additions & 2 deletions ci/windows/test_thrust.ps1
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,10 @@ Param(
[ValidateSet(11, 14, 17, 20)]
[int]$CXX_STANDARD = 17,
[Parameter(Mandatory = $false)]
[ValidateNotNullOrEmpty()]
[Alias("arch")]
[int]$CUDA_ARCH = 0,
[Parameter(Mandatory = $false)]
[Alias("cpu-only")]
[switch]$CPU_ONLY = $false
)
Expand All @@ -24,11 +28,11 @@ If($CURRENT_PATH -ne "ci") {
}

# Execute the build script:
$build_command = "$PSScriptRoot/build_thrust.ps1 -std $CXX_STANDARD"
$build_command = "$PSScriptRoot/build_thrust.ps1 -std $CXX_STANDARD -arch $CUDA_ARCH"
Write-Host "Executing: $build_command"
Invoke-Expression $build_command

Import-Module $PSScriptRoot/build_common.psm1 -ArgumentList $CXX_STANDARD
Import-Module -Name "$PSScriptRoot/build_common.psm1" -ArgumentList $CXX_STANDARD, $CUDA_ARCH

$PRESET = "thrust-cpu-cpp$CXX_STANDARD"

Expand Down
9 changes: 9 additions & 0 deletions cub/test/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -370,6 +370,15 @@ foreach (test_src IN LISTS test_srcs)
set(launcher 0)
endif()

# FIXME: There are a few remaining device algorithm tests that have not been ported to
# use Catch2 and lid variants. Mark these as `lid_0/1` so they'll run in the appropriate
# CI configs:
string(REGEX MATCH "^device_" is_device_test "${test_name}")
_cub_is_fail_test(is_fail_test "%{test_name}")
if (is_device_test AND NOT is_fail_test)
string(APPEND test_name ".lid_${launcher}")
endif()

# Only one version of this test.
cub_add_test(test_target ${test_name} "${test_src}" ${cub_target} ${launcher})
cub_configure_cuda_target(${test_target} RDC ${CUB_FORCE_RDC})
Expand Down
1 change: 1 addition & 0 deletions docs/repo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -348,6 +348,7 @@ autodoc.mock_imports = [
"numba",
"pynvjitlink",
"cuda.bindings",
"cuda.cccl",
"llvmlite",
"numpy",
]
Expand Down
Loading

0 comments on commit 5196644

Please sign in to comment.