Skip to content

Commit

Permalink
Merge remote-tracking branch 'upstream/main' into enh/merge-large-num…
Browse files Browse the repository at this point in the history
…-items
  • Loading branch information
elstehle committed Jan 30, 2025
2 parents 57d2b17 + 0c17dbd commit 381e16c
Show file tree
Hide file tree
Showing 209 changed files with 58,325 additions and 3,243 deletions.
73 changes: 31 additions & 42 deletions ci/matrix.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -19,49 +19,51 @@ workflows:
- {jobs: ['build'], std: 'max', cxx: ['msvc2019']}
- {jobs: ['build'], std: 'all', cxx: ['gcc', 'clang', 'msvc']}
# Current CTK testing:
- {jobs: ['test'], project: ['libcudacxx', 'thrust'], std: 'max', cxx: ['gcc', 'clang']}
- {jobs: ['test'], project: ['thrust'], std: 'max', cxx: ['gcc', 'clang'], gpu: 'rtx4090'}
- {jobs: ['test'], project: ['libcudacxx'], std: 'max', cxx: ['gcc', 'clang'], gpu: 'rtx2080'}
# Disabled until we figure out the issue with the TBB dll
#- {jobs: ['test'], project: ['libcudacxx', 'thrust'], std: 'max', cxx: ['msvc']}
#- {jobs: ['test'], project: ['thrust'], std: 'max', cxx: ['msvc'], gpu: 'rtx4090'}
- {jobs: ['test'], project: ['libcudacxx'], std: 'max', cxx: ['msvc'], gpu: 'rtx2080'}
# Split up cub tests:
- {jobs: ['test_nolid', 'test_lid0'], project: ['cub'], std: 'max', cxx: ['gcc']}
- {jobs: ['test_lid1', 'test_lid2'], project: ['cub'], std: 'max', cxx: ['gcc']}
- {jobs: ['test_nolid', 'test_lid0'], project: ['cub'], std: 'max', cxx: ['clang', 'msvc']}
- {jobs: ['test_lid0'], project: ['cub'], std: 'max', cxx: 'gcc12', gpu: 'h100', sm: 'gpu' }
- {jobs: ['test_nolid', 'test_lid0'], project: ['cub'], std: 'max', cxx: ['gcc'], gpu: 'rtxa6000'}
- {jobs: ['test_lid1', 'test_lid2'], project: ['cub'], std: 'max', cxx: ['gcc'], gpu: 'rtxa6000'}
- {jobs: ['test_nolid', 'test_lid0'], project: ['cub'], std: 'max', cxx: ['clang', 'msvc'], gpu: 'rtxa6000'}
- {jobs: ['test_lid0'], project: ['cub'], std: 'max', cxx: 'gcc12', gpu: 'h100', sm: 'gpu' }
# Modded builds:
- {jobs: ['build'], std: 'all', ctk: '12.5', cxx: 'nvhpc'}
- {jobs: ['build'], std: 'max', cxx: ['gcc', 'clang'], cpu: 'arm64'}
- {jobs: ['build'], std: 'max', cxx: ['gcc'], sm: '90a'}
# Test Thrust 32-bit-only dispatch here, since it's most likely to break. 64-bit-only is tested in nightly.
- {jobs: ['test_gpu'], project: 'thrust', cmake_options: '-DTHRUST_DISPATCH_TYPE=Force32bit'}
- {jobs: ['test_gpu'], project: 'thrust', cmake_options: '-DTHRUST_DISPATCH_TYPE=Force32bit', gpu: 'rtx4090'}
# default_projects: clang-cuda
- {jobs: ['build'], std: 'all', cudacxx: 'clang', cxx: 'clang'}
- {jobs: ['build'], project: 'libcudacxx', std: 'max', cudacxx: 'clang', cxx: 'clang', sm: '90'}
- {jobs: ['build'], project: 'libcudacxx', std: 'max', cudacxx: 'clang', cxx: 'clang', sm: '90a'}
# nvrtc:
- {jobs: ['nvrtc'], project: 'libcudacxx', std: 'all'}
- {jobs: ['nvrtc'], project: 'libcudacxx', std: 'all', gpu: 'rtx2080', sm: 'gpu'}
# verify-codegen:
- {jobs: ['verify_codegen'], project: 'libcudacxx'}
# cudax has different CTK reqs:
- {jobs: ['build'], project: 'cudax', ctk: ['12.0'], std: 20, cxx: ['msvc14.36']}
- {jobs: ['build'], project: 'cudax', ctk: ['curr'], std: 20, cxx: ['gcc10', 'gcc11', 'gcc12']}
- {jobs: ['build'], project: 'cudax', ctk: ['curr'], std: 20, cxx: ['clang14', 'clang15', 'clang16', 'clang17']}
- {jobs: ['build'], project: 'cudax', ctk: ['12.0'], std: 20, cxx: ['msvc14.36']}
- {jobs: ['build'], project: 'cudax', ctk: ['curr'], std: 20, cxx: ['gcc10', 'gcc11', 'gcc12']}
- {jobs: ['build'], project: 'cudax', ctk: ['curr'], std: 20, cxx: ['clang14', 'clang15', 'clang16', 'clang17']}
- {jobs: ['build'], project: 'cudax', ctk: ['12.5'], std: 'all', cxx: ['nvhpc']}
- {jobs: ['build'], project: 'cudax', ctk: ['curr'], std: 20, cxx: ['msvc2022']}
- {jobs: ['build'], project: 'cudax', ctk: ['curr'], std: 17, cxx: ['gcc'], sm: "90"}
- {jobs: ['build'], project: 'cudax', ctk: ['curr'], std: 20, cxx: ['gcc'], sm: "90a"}
- {jobs: ['build'], project: 'cudax', ctk: ['curr'], std: 20, cxx: ['msvc2022']}
- {jobs: ['build'], project: 'cudax', ctk: ['curr'], std: 17, cxx: ['gcc'], sm: "90"}
- {jobs: ['build'], project: 'cudax', ctk: ['curr'], std: 20, cxx: ['gcc'], sm: "90a"}
- {jobs: ['build'], project: 'cudax', ctk: ['curr'], std: 'all', cxx: ['gcc', 'clang'], cpu: 'arm64'}
- {jobs: ['test'], project: 'cudax', ctk: ['curr'], std: 20, cxx: ['gcc12', 'clang', 'msvc']}
- {jobs: ['test'], project: 'cudax', ctk: ['curr'], std: 20, cxx: ['gcc12', 'clang', 'msvc'], gpu: 'rtx2080'}
# Python and c/parallel jobs:
- {jobs: ['test'], project: ['cccl_c_parallel', 'python'], ctk: '12.6'}
- {jobs: ['test'], project: ['cccl_c_parallel', 'python'], ctk: '12.6', gpu: 'rtx2080'}
# cccl-infra:
- {jobs: ['infra'], project: 'cccl', ctk: '12.0', cxx: ['gcc12', 'clang14']}
- {jobs: ['infra'], project: 'cccl', ctk: 'curr', cxx: ['gcc', 'clang']}
- {jobs: ['infra'], project: 'cccl', ctk: '12.0', cxx: ['gcc12', 'clang14'], gpu: 'rtx2080'}
- {jobs: ['infra'], project: 'cccl', ctk: 'curr', cxx: ['gcc', 'clang'], gpu: 'rtx2080'}

nightly:
# Edge-case jobs
- {jobs: ['limited'], project: 'cub', std: 17}
- {jobs: ['test_gpu'], project: 'thrust', cmake_options: '-DTHRUST_DISPATCH_TYPE=Force32bit'}
- {jobs: ['test_gpu'], project: 'thrust', cmake_options: '-DTHRUST_DISPATCH_TYPE=Force64bit'}
- {jobs: ['limited'], project: 'cub', std: 17, gpu: 'rtx2080'}
- {jobs: ['test_gpu'], project: 'thrust', cmake_options: '-DTHRUST_DISPATCH_TYPE=Force32bit', gpu: 'rtx4090'}
- {jobs: ['test_gpu'], project: 'thrust', cmake_options: '-DTHRUST_DISPATCH_TYPE=Force64bit', gpu: 'rtx4090'}
# Old CTK/compiler
- {jobs: ['build'], std: 'all', ctk: '12.0', cxx: ['gcc7', 'gcc8', 'gcc9', 'clang14', 'msvc2019']}
- {jobs: ['build'], std: 'all', ctk: '12.0', cxx: ['gcc11'], sm: '60;70;80;90'}
Expand All @@ -70,7 +72,11 @@ workflows:
- {jobs: ['build'], std: 'all', cxx: ['clang14', 'clang15', 'clang16', 'clang17']}
- {jobs: ['build'], std: 'all', cxx: ['msvc2019']}
# Test current CTK
- {jobs: ['test'], std: 'all', cxx: ['gcc13', 'clang18', 'msvc2022']}
- {jobs: ['test'], project: 'cub', std: 'all', cxx: ['gcc', 'clang', 'msvc'], gpu: 'rtxa6000'}
- {jobs: ['test_lid0'], project: 'cub', std: 'max', cxx: 'gcc', gpu: 'v100'}
- {jobs: ['test_lid0'], project: 'cub', std: 'max', cxx: 'gcc', gpu: 'h100', sm: 'gpu' }
- {jobs: ['test'], project: 'thrust', std: 'all', cxx: ['gcc', 'clang', 'msvc'], gpu: 'rtx4090'}
- {jobs: ['test'], project: 'libcudacxx', std: 'all', cxx: ['gcc', 'clang', 'msvc'], gpu: 'rtx2080'}
# Modded builds:
- {jobs: ['build'], std: 'all', ctk: '12.5', cxx: 'nvhpc'}
- {jobs: ['build'], std: 'all', cxx: ['gcc', 'clang'], cpu: 'arm64'}
Expand All @@ -88,26 +94,9 @@ workflows:
- {jobs: ['build'], project: 'cudax', ctk: ['12.0' ], std: 'all', cxx: ['gcc12'], sm: "90"}
- {jobs: ['build'], project: 'cudax', ctk: [ 'curr'], std: 'all', cxx: ['gcc13'], sm: "90a"}
- {jobs: ['build'], project: 'cudax', ctk: [ 'curr'], std: 'all', cxx: ['gcc13', 'clang16'], cpu: 'arm64'}
- {jobs: ['test'], project: 'cudax', ctk: ['12.0', 'curr'], std: 'all', cxx: ['gcc12']}
- {jobs: ['test'], project: 'cudax', ctk: ['12.0' ], std: 'all', cxx: ['clang14']}
- {jobs: ['test'], project: 'cudax', ctk: [ 'curr'], std: 'all', cxx: ['clang18']}

# # These are waiting on the NVKS nodes:
# - {jobs: ['test'], ctk: '11.1', gpu: 'v100', sm: 'gpu', cxx: 'gcc7', std: [11]}
# - {jobs: ['test'], ctk: '11.1', gpu: 't4', sm: 'gpu', cxx: 'clang14', std: [17]}
# - {jobs: ['test'], ctk: '11.8', gpu: 'rtx2080', sm: 'gpu', cxx: 'gcc11', std: [17]}
# - {jobs: ['test'], ctk: 'curr', gpu: 'rtxa6000', sm: 'gpu', cxx: 'gcc7', std: [14]}
# - {jobs: ['test'], ctk: 'curr', gpu: 'l4', sm: 'gpu', cxx: 'gcc13', std: 'all'}
# - {jobs: ['test'], ctk: 'curr', gpu: 'rtx4090', sm: 'gpu', cxx: 'clang14', std: [11]}
# # H100 runners are currently flakey, only build since those use CPU-only runners:
# - {jobs: ['build'], ctk: 'curr', gpu: 'h100', sm: 'gpu', cxx: 'gcc12', std: [11, 20]}
# - {jobs: ['build'], ctk: 'curr', gpu: 'h100', sm: 'gpu', cxx: 'clang18', std: [17]}
#
# # nvrtc:
# - {jobs: ['nvrtc'], ctk: 'curr', gpu: 't4', sm: 'gpu', cxx: 'gcc13', std: [20], project: ['libcudacxx']}
# - {jobs: ['nvrtc'], ctk: 'curr', gpu: 'rtxa6000', sm: 'gpu', cxx: 'gcc13', std: [20], project: ['libcudacxx']}
# - {jobs: ['nvrtc'], ctk: 'curr', gpu: 'l4', sm: 'gpu', cxx: 'gcc13', std: 'all', project: ['libcudacxx']}
# - {jobs: ['nvrtc'], ctk: 'curr', gpu: 'h100', sm: 'gpu', cxx: 'gcc13', std: [11, 20], project: ['libcudacxx']}
- {jobs: ['test'], project: 'cudax', ctk: ['12.0', 'curr'], std: 'all', cxx: ['gcc12'] , gpu: 'rtx2080'}
- {jobs: ['test'], project: 'cudax', ctk: ['12.0' ], std: 'all', cxx: ['clang14'], gpu: 'rtx2080'}
- {jobs: ['test'], project: 'cudax', ctk: [ 'curr'], std: 'all', cxx: ['clang18'], gpu: 'rtx2080'}

# Any generated jobs that match the entries in `exclude` will be removed from the final matrix for all workflows.
exclude:
Expand Down
15 changes: 13 additions & 2 deletions ci/windows/build_common.psm1
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,11 @@ Param(
[Alias("std")]
[ValidateNotNullOrEmpty()]
[ValidateSet(11, 14, 17, 20)]
[int]$CXX_STANDARD = 17
[int]$CXX_STANDARD = 17,
[Parameter(Mandatory = $false)]
[ValidateNotNullOrEmpty()]
[Alias("arch")]
[int]$CUDA_ARCH = 0
)

$ErrorActionPreference = "Stop"
Expand All @@ -20,6 +24,12 @@ if ($script:CL_VERSION_STRING -match "Version (\d+\.\d+)\.\d+") {
Write-Host "Detected cl.exe version: $CL_VERSION"
}

$script:GLOBAL_CMAKE_OPTIONS = ""
if ($CUDA_ARCH -ne 0) {
$script:GLOBAL_CMAKE_OPTIONS += "-DCMAKE_CUDA_ARCHITECTURES=$CUDA_ARCH"
}


if (-not $env:CCCL_BUILD_INFIX) {
$env:CCCL_BUILD_INFIX = ""
}
Expand Down Expand Up @@ -56,6 +66,7 @@ Write-Host "NVCC_VERSION=$NVCC_VERSION"
Write-Host "CMAKE_BUILD_PARALLEL_LEVEL=$env:CMAKE_BUILD_PARALLEL_LEVEL"
Write-Host "CTEST_PARALLEL_LEVEL=$env:CTEST_PARALLEL_LEVEL"
Write-Host "CCCL_BUILD_INFIX=$env:CCCL_BUILD_INFIX"
Write-Host "GLOBAL_CMAKE_OPTIONS=$script:GLOBAL_CMAKE_OPTIONS"
Write-Host "Current commit is:"
Write-Host "$(git log -1 --format=short)"
Write-Host "========================================"
Expand All @@ -82,7 +93,7 @@ function configure_preset {
pushd ".."

# Echo and execute command to stdout:
$configure_command = "cmake --preset $PRESET $CMAKE_OPTIONS --log-level VERBOSE"
$configure_command = "cmake --preset $PRESET $script:GLOBAL_CMAKE_OPTIONS $CMAKE_OPTIONS --log-level VERBOSE"
Write-Host $configure_command
Invoke-Expression $configure_command
$test_result = $LastExitCode
Expand Down
8 changes: 6 additions & 2 deletions ci/windows/build_cub.ps1
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,11 @@ Param(
[Alias("std")]
[ValidateNotNullOrEmpty()]
[ValidateSet(11, 14, 17, 20)]
[int]$CXX_STANDARD = 17
[int]$CXX_STANDARD = 17,
[Parameter(Mandatory = $false)]
[ValidateNotNullOrEmpty()]
[Alias("arch")]
[int]$CUDA_ARCH = 0
)

$ErrorActionPreference = "Stop"
Expand All @@ -14,7 +18,7 @@ If($CURRENT_PATH -ne "ci") {
pushd "$PSScriptRoot/.."
}

Import-Module $PSScriptRoot/build_common.psm1 -ArgumentList $CXX_STANDARD
Import-Module $PSScriptRoot/build_common.psm1 -ArgumentList $CXX_STANDARD, $CUDA_ARCH

$PRESET = "cub-cpp$CXX_STANDARD"
$CMAKE_OPTIONS = ""
Expand Down
8 changes: 6 additions & 2 deletions ci/windows/build_cudax.ps1
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,11 @@ Param(
[Alias("std")]
[ValidateNotNullOrEmpty()]
[ValidateSet(20)]
[int]$CXX_STANDARD = 20
[int]$CXX_STANDARD = 20,
[Parameter(Mandatory = $false)]
[ValidateNotNullOrEmpty()]
[Alias("arch")]
[int]$CUDA_ARCH = 0
)

$CURRENT_PATH = Split-Path $pwd -leaf
Expand All @@ -14,7 +18,7 @@ If($CURRENT_PATH -ne "ci") {
}

Remove-Module -Name build_common
Import-Module $PSScriptRoot/build_common.psm1 -ArgumentList $CXX_STANDARD
Import-Module $PSScriptRoot/build_common.psm1 -ArgumentList $CXX_STANDARD, $CUDA_ARCH

$PRESET = "cudax-cpp$CXX_STANDARD"
$CMAKE_OPTIONS = ""
Expand Down
8 changes: 6 additions & 2 deletions ci/windows/build_libcudacxx.ps1
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,11 @@ Param(
[Alias("std")]
[ValidateNotNullOrEmpty()]
[ValidateSet(11, 14, 17, 20)]
[int]$CXX_STANDARD = 17
[int]$CXX_STANDARD = 17,
[Parameter(Mandatory = $false)]
[ValidateNotNullOrEmpty()]
[Alias("arch")]
[int]$CUDA_ARCH = 0
)

$ErrorActionPreference = "Stop"
Expand All @@ -14,7 +18,7 @@ If($CURRENT_PATH -ne "ci") {
pushd "$PSScriptRoot/.."
}

Import-Module $PSScriptRoot/build_common.psm1 -ArgumentList $CXX_STANDARD, $GPU_ARCHS
Import-Module $PSScriptRoot/build_common.psm1 -ArgumentList $CXX_STANDARD, $CUDA_ARCH

$PRESET = "libcudacxx-cpp${CXX_STANDARD}"
$CMAKE_OPTIONS = ""
Expand Down
8 changes: 6 additions & 2 deletions ci/windows/build_thrust.ps1
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,11 @@ Param(
[Alias("std")]
[ValidateNotNullOrEmpty()]
[ValidateSet(11, 14, 17, 20)]
[int]$CXX_STANDARD = 17
[int]$CXX_STANDARD = 17,
[Parameter(Mandatory = $false)]
[ValidateNotNullOrEmpty()]
[Alias("arch")]
[int]$CUDA_ARCH = 0
)

$ErrorActionPreference = "Stop"
Expand All @@ -14,7 +18,7 @@ If($CURRENT_PATH -ne "ci") {
pushd "$PSScriptRoot/.."
}

Import-Module $PSScriptRoot/build_common.psm1 -ArgumentList $CXX_STANDARD
Import-Module $PSScriptRoot/build_common.psm1 -ArgumentList $CXX_STANDARD, $CUDA_ARCH

$PRESET = "thrust-cpp$CXX_STANDARD"
$CMAKE_OPTIONS = ""
Expand Down
8 changes: 6 additions & 2 deletions ci/windows/test_thrust.ps1
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,10 @@ Param(
[ValidateSet(11, 14, 17, 20)]
[int]$CXX_STANDARD = 17,
[Parameter(Mandatory = $false)]
[ValidateNotNullOrEmpty()]
[Alias("arch")]
[int]$CUDA_ARCH = 0,
[Parameter(Mandatory = $false)]
[Alias("cpu-only")]
[switch]$CPU_ONLY = $false
)
Expand All @@ -24,11 +28,11 @@ If($CURRENT_PATH -ne "ci") {
}

# Execute the build script:
$build_command = "$PSScriptRoot/build_thrust.ps1 -std $CXX_STANDARD"
$build_command = "$PSScriptRoot/build_thrust.ps1 -std $CXX_STANDARD -arch $CUDA_ARCH"
Write-Host "Executing: $build_command"
Invoke-Expression $build_command

Import-Module $PSScriptRoot/build_common.psm1 -ArgumentList $CXX_STANDARD
Import-Module -Name "$PSScriptRoot/build_common.psm1" -ArgumentList $CXX_STANDARD, $CUDA_ARCH

$PRESET = "thrust-cpu-cpp$CXX_STANDARD"

Expand Down
4 changes: 2 additions & 2 deletions cub/benchmarks/bench/adjacent_difference/subtract_left.cu
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@
#if !TUNE_BASE
struct policy_hub_t
{
struct Policy350 : cub::ChainedPolicy<350, Policy350, Policy350>
struct Policy500 : cub::ChainedPolicy<500, Policy500, Policy500>
{
using AdjacentDifferencePolicy =
cub::AgentAdjacentDifferencePolicy<TUNE_THREADS_PER_BLOCK,
Expand All @@ -45,7 +45,7 @@ struct policy_hub_t
cub::BLOCK_STORE_WARP_TRANSPOSE>;
};

using MaxPolicy = Policy350;
using MaxPolicy = Policy500;
};
#endif // !TUNE_BASE

Expand Down
2 changes: 1 addition & 1 deletion cub/benchmarks/bench/copy/memcpy.cu
Original file line number Diff line number Diff line change
Expand Up @@ -118,7 +118,7 @@ using block_delay_constructor_t =

struct policy_hub_t
{
struct policy_t : cub::ChainedPolicy<350, policy_t, policy_t>
struct policy_t : cub::ChainedPolicy<500, policy_t, policy_t>
{
using AgentSmallBufferPolicyT = cub::detail::AgentBatchMemcpyPolicy<
TUNE_THREADS,
Expand Down
2 changes: 1 addition & 1 deletion cub/benchmarks/bench/histogram/histogram_common.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@ constexpr cub::BlockHistogramMemoryPreference MEM_PREFERENCE = cub::BLEND;
template <typename SampleT, int NUM_CHANNELS, int NUM_ACTIVE_CHANNELS>
struct policy_hub_t
{
struct policy_t : cub::ChainedPolicy<350, policy_t, policy_t>
struct policy_t : cub::ChainedPolicy<500, policy_t, policy_t>
{
static constexpr cub::BlockLoadAlgorithm load_algorithm =
(TUNE_LOAD_ALGORITHM == cub::BLOCK_LOAD_STRIPED)
Expand Down
2 changes: 1 addition & 1 deletion cub/benchmarks/bench/partition/three_way.cu
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@
template <typename InputT>
struct policy_hub_t
{
struct policy_t : cub::ChainedPolicy<350, policy_t, policy_t>
struct policy_t : cub::ChainedPolicy<500, policy_t, policy_t>
{
using ThreeWayPartitionPolicy = //
cub::AgentThreeWayPartitionPolicy<TUNE_THREADS_PER_BLOCK,
Expand Down
4 changes: 2 additions & 2 deletions cub/benchmarks/bench/reduce/by_key.cu
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@

struct reduce_by_key_policy_hub
{
struct Policy350 : cub::ChainedPolicy<350, Policy350, Policy350>
struct Policy500 : cub::ChainedPolicy<500, Policy500, Policy500>
{
using ReduceByKeyPolicyT =
cub::AgentReduceByKeyPolicy<TUNE_THREADS,
Expand All @@ -64,7 +64,7 @@ struct reduce_by_key_policy_hub
delay_constructor_t>;
};

using MaxPolicy = Policy350;
using MaxPolicy = Policy500;
};
#endif // !TUNE_BASE

Expand Down
4 changes: 2 additions & 2 deletions cub/benchmarks/bench/run_length_encode/encode.cu
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@

struct reduce_by_key_policy_hub
{
struct Policy350 : cub::ChainedPolicy<350, Policy350, Policy350>
struct Policy500 : cub::ChainedPolicy<500, Policy500, Policy500>
{
using ReduceByKeyPolicyT =
cub::AgentReduceByKeyPolicy<TUNE_THREADS,
Expand All @@ -66,7 +66,7 @@ struct reduce_by_key_policy_hub
delay_constructor_t>;
};

using MaxPolicy = Policy350;
using MaxPolicy = Policy500;
};
#endif // !TUNE_BASE

Expand Down
4 changes: 2 additions & 2 deletions cub/benchmarks/bench/run_length_encode/non_trivial_runs.cu
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@

struct device_rle_policy_hub
{
struct Policy350 : cub::ChainedPolicy<350, Policy350, Policy350>
struct Policy500 : cub::ChainedPolicy<500, Policy500, Policy500>
{
using RleSweepPolicyT =
cub::AgentRlePolicy<TUNE_THREADS,
Expand All @@ -66,7 +66,7 @@ struct device_rle_policy_hub
delay_constructor_t>;
};

using MaxPolicy = Policy350;
using MaxPolicy = Policy500;
};
#endif // !TUNE_BASE

Expand Down
4 changes: 2 additions & 2 deletions cub/benchmarks/bench/segmented_sort/keys.cu
Original file line number Diff line number Diff line change
Expand Up @@ -109,7 +109,7 @@ struct device_seg_sort_policy_hub
{
using DominantT = KeyT;

struct Policy350 : cub::ChainedPolicy<350, Policy350, Policy350>
struct Policy500 : cub::ChainedPolicy<500, Policy500, Policy500>
{
static constexpr int BLOCK_THREADS = TUNE_THREADS;
static constexpr int RADIX_BITS = TUNE_RADIX_BITS;
Expand Down Expand Up @@ -143,7 +143,7 @@ struct device_seg_sort_policy_hub
TUNE_M_LOAD_MODIFIER>>;
};

using MaxPolicy = Policy350;
using MaxPolicy = Policy500;
};
#endif // !TUNE_BASE

Expand Down
Loading

0 comments on commit 381e16c

Please sign in to comment.