Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add CUB tests for segmented sort/radix sort with 64-bit num. items and segments #2254

Merged
merged 51 commits into from
Sep 26, 2024
Merged
Show file tree
Hide file tree
Changes from 5 commits
Commits
Show all changes
51 commits
Select commit Hold shift + click to select a range
5bff5ed
add segmented [radix] sort tests for 64-bit indices
fbusato Aug 15, 2024
62ac106
Add test for device segmented sort pairs with 64-bit indices
fbusato Aug 16, 2024
994da36
Merge branch 'main' into cub/test/64bit_segmented_sort
fbusato Aug 16, 2024
dd92bd9
Add a macro guard to protect tests that are not supported yet
fbusato Aug 27, 2024
06ef7c0
Merge branch 'main' into cub/test/64bit_segmented_sort
fbusato Aug 28, 2024
cf14d60
Merge branch 'main' into cub/test/64bit_segmented_sort
fbusato Aug 29, 2024
ea067ad
Extend segmented radix sort/sort tests to all offset types, added ove…
fbusato Aug 30, 2024
7fed29d
Merge branch 'main' into cub/test/64bit_segmented_sort
fbusato Aug 30, 2024
c22e7c1
Merge branch 'main' into cub/test/64bit_segmented_sort
fbusato Aug 30, 2024
df5230e
Add very large segment case
fbusato Aug 30, 2024
c845c90
Merge branch 'cub/test/64bit_segmented_sort' of github.com:fbusato/cc…
fbusato Aug 30, 2024
dd5b776
Fix formatting issue
fbusato Aug 30, 2024
8d080c1
remove enable macro
fbusato Sep 3, 2024
7983212
Merge branch 'main' into cub/test/64bit_segmented_sort
fbusato Sep 3, 2024
51921f1
Merge branch 'main' into cub/test/64bit_segmented_sort
fbusato Sep 4, 2024
58a0fce
Update cub/test/catch2_test_device_segmented_radix_sort_keys.cu
fbusato Sep 6, 2024
17b3fef
Update cub/test/catch2_test_device_segmented_radix_sort_keys.cu
fbusato Sep 6, 2024
24677e5
Fix potential bug for is_override
fbusato Sep 6, 2024
5dd9464
fix formatting issues
fbusato Sep 6, 2024
4db3a55
Merge branch 'main' into cub/test/64bit_segmented_sort
fbusato Sep 6, 2024
8944ff2
Merge branch 'main' into cub/test/64bit_segmented_sort
fbusato Sep 10, 2024
281dbc6
move reference computation before kernel call
fbusato Sep 10, 2024
27bc2d0
fix pointer references for correctness checking
fbusato Sep 10, 2024
88efaea
Merge branch 'main' into cub/test/64bit_segmented_sort
fbusato Sep 10, 2024
9103bbd
remove redundant test cases
fbusato Sep 10, 2024
4eaa1dd
Merge branch 'cub/test/64bit_segmented_sort' of github.com:fbusato/cc…
fbusato Sep 10, 2024
cce6253
Merge branch 'main' into cub/test/64bit_segmented_sort
fbusato Sep 11, 2024
2166bb7
Update cub/test/catch2_test_device_segmented_sort_keys.cu
fbusato Sep 20, 2024
a42cbc6
Update cub/test/catch2_test_device_segmented_sort_pairs.cu
fbusato Sep 20, 2024
c45de31
Merge branch 'main' into cub/test/64bit_segmented_sort
fbusato Sep 20, 2024
19caa47
Update cub/test/catch2_test_device_segmented_sort_pairs.cu
fbusato Sep 20, 2024
5c056f0
Update cub/test/catch2_test_device_segmented_sort_keys.cu
fbusato Sep 20, 2024
4c9be63
Update cub/test/catch2_test_device_segmented_sort_keys.cu
fbusato Sep 20, 2024
fd34b05
Update cub/test/catch2_test_device_segmented_sort_keys.cu
fbusato Sep 20, 2024
fe82e30
Update cub/test/catch2_test_device_segmented_sort_keys.cu
fbusato Sep 20, 2024
7bef872
Update cub/test/catch2_test_device_segmented_sort_keys.cu
fbusato Sep 20, 2024
6490db8
Update cub/test/catch2_test_device_segmented_sort_pairs.cu
fbusato Sep 20, 2024
c983761
Update cub/test/catch2_test_device_segmented_sort_keys.cu
fbusato Sep 20, 2024
284b998
Update cub/test/catch2_test_device_segmented_sort_keys.cu
fbusato Sep 20, 2024
4e9e852
Update cub/test/catch2_test_device_segmented_sort_keys.cu
fbusato Sep 20, 2024
783beb0
Update cub/test/catch2_test_device_segmented_sort_pairs.cu
fbusato Sep 20, 2024
f35fcc0
Update cub/test/catch2_test_device_segmented_sort_pairs.cu
fbusato Sep 20, 2024
e11b57d
propagate cuda managed memory pointer selector
fbusato Sep 21, 2024
0866e80
Merge branch 'main' into cub/test/64bit_segmented_sort
fbusato Sep 25, 2024
10f656b
fix run-time bugs
fbusato Sep 25, 2024
3d559ae
Merge branch 'cub/test/64bit_segmented_sort' of github.com:fbusato/cc…
fbusato Sep 25, 2024
17d3840
fix formatting
fbusato Sep 25, 2024
b21b78e
Merge branch 'main' into cub/test/64bit_segmented_sort
fbusato Sep 25, 2024
34684df
Merge branch 'main' into cub/test/64bit_segmented_sort
fbusato Sep 25, 2024
459a77a
Merge branch 'main' into cub/test/64bit_segmented_sort
fbusato Sep 26, 2024
f619a3d
Merge branch 'main' into cub/test/64bit_segmented_sort
fbusato Sep 26, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions cub/test/catch2_radix_sort_helper.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -446,7 +446,7 @@ std::pair<c2h::host_vector<KeyT>, c2h::host_vector<ValueT>> segmented_radix_sort
}

template <typename OffsetT>
struct offset_scan_op_t
struct radix_offset_scan_op_t
{
OffsetT num_items;

Expand All @@ -470,5 +470,5 @@ void generate_segment_offsets(c2h::seed_t seed, c2h::device_vector<OffsetT>& off
offsets.end(),
offsets.begin(),
OffsetT{0},
offset_scan_op_t<OffsetT>{static_cast<OffsetT>(num_items)});
radix_offset_scan_op_t<OffsetT>{static_cast<OffsetT>(num_items)});
}
99 changes: 97 additions & 2 deletions cub/test/catch2_test_device_segmented_radix_sort_keys.cu
Original file line number Diff line number Diff line change
Expand Up @@ -24,11 +24,11 @@
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
******************************************************************************/

#include "insert_nested_NVTX_range_guard.h"
// above header needs to be included first

#include <cub/device/device_segmented_radix_sort.cuh>
#include <cub/device/dispatch/dispatch_radix_sort.cuh> // DispatchSegmentedRadixSort
#include <cub/util_type.cuh>

#include <thrust/functional.h>
Expand All @@ -46,11 +46,50 @@
#include "catch2_test_helper.h"
#include "catch2_test_launch_helper.h"

// TODO replace with DeviceSegmentedRadixSort::If interface once https://github.com/NVIDIA/cccl/issues/50 is addressed
fbusato marked this conversation as resolved.
Show resolved Hide resolved
// Temporary wrapper that allows specializing the DeviceSegmentedRadixSort algorithm for different offset types
template <bool IS_DESCENDING, typename KeyT, typename BeginOffsetIteratorT, typename EndOffsetIteratorT, typename NumItemsT>
CUB_RUNTIME_FUNCTION _CCCL_FORCEINLINE static cudaError_t dispatch_segmented_radix_sort_wrapper(
void* d_temp_storage,
size_t& temp_storage_bytes,
const KeyT* d_keys_in,
KeyT* d_keys_out,
NumItemsT num_items,
NumItemsT num_segments,
BeginOffsetIteratorT d_begin_offsets,
EndOffsetIteratorT d_end_offsets,
int begin_bit = 0,
int end_bit = sizeof(KeyT) * 8,
cudaStream_t stream = 0)
{
cub::DoubleBuffer<KeyT> d_keys(const_cast<KeyT*>(d_keys_in), d_keys_out);
cub::DoubleBuffer<cub::NullType> d_values;
return cub::DispatchSegmentedRadixSort<
IS_DESCENDING,
KeyT,
cub::NullType,
BeginOffsetIteratorT,
EndOffsetIteratorT, //
NumItemsT>::Dispatch(d_temp_storage,
temp_storage_bytes,
d_keys,
d_values,
num_items,
num_segments,
d_begin_offsets,
d_end_offsets,
begin_bit,
end_bit,
false,
fbusato marked this conversation as resolved.
Show resolved Hide resolved
stream);
}

// %PARAM% TEST_LAUNCH lid 0:1:2

DECLARE_LAUNCH_WRAPPER(cub::DeviceSegmentedRadixSort::SortKeys, sort_keys);
DECLARE_LAUNCH_WRAPPER(cub::DeviceSegmentedRadixSort::SortKeysDescending, sort_keys_descending);

DECLARE_LAUNCH_WRAPPER(dispatch_segmented_radix_sort_wrapper<true>, dispatch_segmented_radix_sort_descending);
DECLARE_LAUNCH_WRAPPER(dispatch_segmented_radix_sort_wrapper<false>, dispatch_segmented_radix_sort);
// %PARAM% TEST_KEY_BITS key_bits 8:16:32:64

// TODO:
Expand Down Expand Up @@ -457,3 +496,59 @@ CUB_TEST("DeviceSegmentedRadixSort::SortKeys: unspecified ranges",

REQUIRE((ref_keys == out_keys) == true);
}

#if defined(CCCL_TEST_ENABLE_64BIT_SEGMENTED_SORT)

CUB_TEST("DeviceSegmentedRadixSort::SortKeys: 64-bit num. items and num. segments",
"[keys][segmented][radix][sort][device]")
{
fbusato marked this conversation as resolved.
Show resolved Hide resolved
using key_t = cuda::std::uint8_t; // minimize memory footprint to support a wider range of GPUs
using offset_t = cuda::std::int64_t; // the test requires ~30 GB GPU memory including temporary buffer size
fbusato marked this conversation as resolved.
Show resolved Hide resolved

constexpr std::size_t min_num_items = std::size_t{1} << 31;
constexpr std::size_t max_num_items = min_num_items + (std::size_t{1} << 20);
constexpr int num_key_seeds = 1;
constexpr int num_segment_seeds = 1;
const std::size_t num_items = GENERATE_COPY(take(1, random(min_num_items, max_num_items)));
fbusato marked this conversation as resolved.
Show resolved Hide resolved
const std::size_t num_segments = GENERATE_COPY(take(1, random(min_num_items, max_num_items)));
const bool is_descending = GENERATE(false, true);
CAPTURE(num_items, num_segments, is_descending);

c2h::device_vector<key_t> in_keys(num_items);
c2h::device_vector<key_t> out_keys(num_items);
c2h::device_vector<offset_t> offsets(num_segments + 1);
fbusato marked this conversation as resolved.
Show resolved Hide resolved
c2h::gen(CUB_SEED(num_key_seeds), in_keys);
generate_segment_offsets(CUB_SEED(num_segment_seeds), offsets, static_cast<offset_t>(num_items));

if (is_descending)
{
dispatch_segmented_radix_sort_descending(
thrust::raw_pointer_cast(in_keys.data()),
thrust::raw_pointer_cast(out_keys.data()),
static_cast<offset_t>(num_items),
static_cast<offset_t>(num_segments),
// Mix pointers/iterators for segment info to test using different iterable types:
thrust::raw_pointer_cast(offsets.data()),
offsets.cbegin() + 1,
begin_bit<key_t>(),
end_bit<key_t>());
}
else
{
dispatch_segmented_radix_sort(
thrust::raw_pointer_cast(in_keys.data()),
thrust::raw_pointer_cast(out_keys.data()),
static_cast<offset_t>(num_items),
static_cast<offset_t>(num_segments),
// Mix pointers/iterators for segment info to test using different iterable types:
thrust::raw_pointer_cast(offsets.data()),
offsets.cbegin() + 1,
begin_bit<key_t>(),
end_bit<key_t>());
}
// compoute the reference only if the routine is able to terminate correctly
auto ref_keys = segmented_radix_sort_reference(in_keys, is_descending, offsets);
REQUIRE((ref_keys == out_keys) == true);
}

#endif // defined(CCCL_TEST_ENABLE_64BIT_SEGMENTED_SORT)
123 changes: 122 additions & 1 deletion cub/test/catch2_test_device_segmented_radix_sort_pairs.cu
Original file line number Diff line number Diff line change
Expand Up @@ -24,11 +24,13 @@
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
******************************************************************************/

#define DEBUG_CHECKED_ALLOC_FAILURE
#define CUB_DETAIL_DEBUG_ENABLE_LOG
fbusato marked this conversation as resolved.
Show resolved Hide resolved
#include "insert_nested_NVTX_range_guard.h"
// above header needs to be included first

#include <cub/device/device_segmented_radix_sort.cuh>
#include <cub/device/dispatch/dispatch_radix_sort.cuh> // DispatchSegmentedRadixSort
#include <cub/util_type.cuh>

#include <thrust/iterator/constant_iterator.h>
Expand All @@ -43,10 +45,58 @@
#include "catch2_test_launch_helper.h"
#include "thrust/detail/raw_pointer_cast.h"

// TODO replace with DeviceSegmentedRadixSort::If interface once https://github.com/NVIDIA/cccl/issues/50 is addressed
fbusato marked this conversation as resolved.
Show resolved Hide resolved
// Temporary wrapper that allows specializing the DeviceSegmentedRadixSort algorithm for different offset types
template <bool IS_DESCENDING,
typename KeyT,
typename ValueT,
typename NumItemsT,
typename BeginOffsetIteratorT,
typename EndOffsetIteratorT>
CUB_RUNTIME_FUNCTION _CCCL_FORCEINLINE static cudaError_t dispatch_segmented_radix_sort_pairs_wrapper(
void* d_temp_storage,
size_t& temp_storage_bytes,
const KeyT* d_keys_in,
KeyT* d_keys_out,
const ValueT* d_values_in,
ValueT* d_values_out,
NumItemsT num_items,
NumItemsT num_segments,
BeginOffsetIteratorT d_begin_offsets,
EndOffsetIteratorT d_end_offsets,
int begin_bit = 0,
int end_bit = sizeof(KeyT) * 8,
cudaStream_t stream = 0)
{
cub::DoubleBuffer<KeyT> d_keys(const_cast<KeyT*>(d_keys_in), d_keys_out);
cub::DoubleBuffer<ValueT> d_values(const_cast<ValueT*>(d_values_in), d_values_out);
return cub::DispatchSegmentedRadixSort<
IS_DESCENDING,
KeyT,
ValueT,
BeginOffsetIteratorT,
EndOffsetIteratorT, //
NumItemsT>::Dispatch(d_temp_storage,
temp_storage_bytes,
d_keys,
d_values,
num_items,
num_segments,
d_begin_offsets,
d_end_offsets,
begin_bit,
end_bit,
false,
stream);
}

// %PARAM% TEST_LAUNCH lid 0:1:2

DECLARE_LAUNCH_WRAPPER(cub::DeviceSegmentedRadixSort::SortPairs, sort_pairs);
DECLARE_LAUNCH_WRAPPER(cub::DeviceSegmentedRadixSort::SortPairsDescending, sort_pairs_descending);
DECLARE_LAUNCH_WRAPPER(dispatch_segmented_radix_sort_pairs_wrapper<true>,
dispatch_segmented_radix_sort_pairs_descending);
DECLARE_LAUNCH_WRAPPER(dispatch_segmented_radix_sort_pairs_wrapper<false>, dispatch_segmented_radix_sort_pairs);

using custom_value_t = c2h::custom_type_t<c2h::equal_comparable_t>;
using value_types = c2h::type_list<cuda::std::uint8_t, cuda::std::uint64_t, custom_value_t>;
Expand Down Expand Up @@ -278,3 +328,74 @@ CUB_TEST("DeviceSegmentedRadixSort::SortPairs: unspecified ranges",
REQUIRE((ref_keys == out_keys) == true);
REQUIRE((ref_values == out_values) == true);
}

#if defined(CCCL_TEST_ENABLE_64BIT_SEGMENTED_SORT)

CUB_TEST("DeviceSegmentedRadixSort::SortPairs: 64-bit num. items and num. segments",
"[pairs][segmented][radix][sort][device]")
{
using key_t = cuda::std::uint8_t; // minimize memory footprint to support a wider range of GPUs
using value_t = cuda::std::uint8_t;
using offset_t = cuda::std::int64_t; // the test requires ~22 GB GPU memory + temporary buffer size

constexpr std::size_t min_num_items = std::size_t{1} << 31;
constexpr std::size_t max_num_items = min_num_items + (std::size_t{1} << 20);
constexpr int num_key_seeds = 1;
constexpr int num_value_seeds = 1;
constexpr int num_segment_seeds = 1;
const std::size_t num_items = GENERATE_COPY(take(2, random(min_num_items, max_num_items)));
const std::size_t num_segments = GENERATE_COPY(take(2, random(min_num_items, max_num_items)));
const bool is_descending = GENERATE(false, true);
CAPTURE(num_items, num_segments, is_descending);

c2h::device_vector<key_t> in_keys(num_items);
c2h::device_vector<offset_t> offsets(num_segments + 1);
c2h::device_vector<value_t> in_values(num_items);
c2h::gen(CUB_SEED(num_key_seeds), in_keys);
c2h::gen(CUB_SEED(num_value_seeds), in_values);
generate_segment_offsets(CUB_SEED(num_segment_seeds), offsets, static_cast<offset_t>(num_items));

// Initialize the output vectors by copying the inputs since not all items may belong to a segment.
c2h::device_vector<key_t> out_keys(in_keys);
c2h::device_vector<value_t> out_values(in_values);

if (is_descending)
{
dispatch_segmented_radix_sort_pairs_descending(
thrust::raw_pointer_cast(in_keys.data()),
thrust::raw_pointer_cast(out_keys.data()),
thrust::raw_pointer_cast(in_values.data()),
thrust::raw_pointer_cast(out_values.data()),
static_cast<offset_t>(num_items),
static_cast<offset_t>(num_segments),
// Mix pointers/iterators for segment info to test using different iterable types:
thrust::raw_pointer_cast(offsets.data()),
offsets.cbegin() + 1,
begin_bit<key_t>(),
end_bit<key_t>());
}
else
{
dispatch_segmented_radix_sort_pairs(
thrust::raw_pointer_cast(in_keys.data()),
thrust::raw_pointer_cast(out_keys.data()),
thrust::raw_pointer_cast(in_values.data()),
thrust::raw_pointer_cast(out_values.data()),
static_cast<offset_t>(num_items),
static_cast<offset_t>(num_segments),
// Mix pointers/iterators for segment info to test using different iterable types:
thrust::raw_pointer_cast(offsets.data()),
offsets.cbegin() + 1,
begin_bit<key_t>(),
end_bit<key_t>());
}
// compoute the reference only if the routine is able to terminate correctly
auto refs = segmented_radix_sort_reference(in_keys, in_values, is_descending, offsets);
auto& ref_keys = refs.first;
auto& ref_values = refs.second;

REQUIRE(ref_keys == out_keys);
REQUIRE(ref_values == out_values);
}

#endif // defined(CCCL_TEST_ENABLE_64BIT_SEGMENTED_SORT)
88 changes: 87 additions & 1 deletion cub/test/catch2_test_device_segmented_sort_keys.cu
Original file line number Diff line number Diff line change
Expand Up @@ -27,15 +27,49 @@

#include "insert_nested_NVTX_range_guard.h"
// above header needs to be included first

#include "catch2_radix_sort_helper.cuh"
#include <catch2_segmented_sort_helper.cuh>
#include <catch2_test_helper.h>

// FIXME: Graph launch disabled, algorithm syncs internally. WAR exists for device-launch, figure out how to enable for
// graph launch.

// TODO replace with DeviceSegmentedSort::If interface once https://github.com/NVIDIA/cccl/issues/50 is addressed
// Temporary wrapper that allows specializing the DeviceSegmentedSort algorithm for different offset types
template <bool IS_DESCENDING, typename KeyT, typename BeginOffsetIteratorT, typename EndOffsetIteratorT, typename NumItemsT>
CUB_RUNTIME_FUNCTION _CCCL_FORCEINLINE static cudaError_t dispatch_segmented_sort_wrapper(
void* d_temp_storage,
size_t& temp_storage_bytes,
const KeyT* d_keys_in,
KeyT* d_keys_out,
NumItemsT num_items,
NumItemsT num_segments,
BeginOffsetIteratorT d_begin_offsets,
EndOffsetIteratorT d_end_offsets,
cudaStream_t stream = 0)
{
cub::DoubleBuffer<KeyT> d_keys(const_cast<KeyT*>(d_keys_in), d_keys_out);
cub::DoubleBuffer<cub::NullType> d_values;
return cub::
DispatchSegmentedSort<IS_DESCENDING, KeyT, cub::NullType, NumItemsT, BeginOffsetIteratorT, EndOffsetIteratorT>::
Dispatch(
d_temp_storage,
temp_storage_bytes,
d_keys,
d_values,
num_items,
num_segments,
d_begin_offsets,
d_end_offsets,
false,
stream);
}

// %PARAM% TEST_LAUNCH lid 0:1

DECLARE_LAUNCH_WRAPPER(dispatch_segmented_sort_wrapper<true>, dispatch_segmented_sort_descending);
DECLARE_LAUNCH_WRAPPER(dispatch_segmented_sort_wrapper<false>, dispatch_segmented_sort);

using key_types =
c2h::type_list<bool,
std::uint8_t,
Expand Down Expand Up @@ -174,3 +208,55 @@ CUB_TEST("DeviceSegmentedSortKeys: Unspecified segments, random keys", "[keys][s
using KeyT = c2h::get<0, TestType>;
test_unspecified_segments_random<KeyT>(CUB_SEED(4));
}

#if defined(CCCL_TEST_ENABLE_64BIT_SEGMENTED_SORT)

// we can reuse the same structure of DeviceSegmentedRadixSortKeys for simplicity
CUB_TEST("DeviceSegmentedSortKeys: 64-bit num. items and num. segments", "[keys][segmented][sort][device]")
{
using key_t = cuda::std::uint8_t; // minimize memory footprint to support a wider range of GPUs
using offset_t = cuda::std::int64_t; // the test requires ~30 GB GPU memory including temporary buffer size

constexpr std::size_t min_num_items = std::size_t{1} << 31;
constexpr std::size_t max_num_items = min_num_items + (std::size_t{1} << 20);
constexpr int num_key_seeds = 1;
constexpr int num_segment_seeds = 1;
const std::size_t num_items = GENERATE_COPY(take(1, random(min_num_items, max_num_items)));
const std::size_t num_segments = GENERATE_COPY(take(1, random(min_num_items, max_num_items)));
const bool is_descending = GENERATE(false, true);
CAPTURE(num_items, num_segments, is_descending);

c2h::device_vector<key_t> in_keys(num_items);
c2h::device_vector<key_t> out_keys(num_items);
c2h::device_vector<offset_t> offsets(num_segments + 1);
c2h::gen(CUB_SEED(num_key_seeds), in_keys);
generate_segment_offsets(CUB_SEED(num_segment_seeds), offsets, static_cast<offset_t>(num_items));

fbusato marked this conversation as resolved.
Show resolved Hide resolved
if (is_descending)
{
dispatch_segmented_sort_descending(
thrust::raw_pointer_cast(in_keys.data()),
thrust::raw_pointer_cast(out_keys.data()),
static_cast<offset_t>(num_items),
static_cast<offset_t>(num_segments),
// Mix pointers/iterators for segment info to test using different iterable types:
thrust::raw_pointer_cast(offsets.data()),
offsets.cbegin() + 1);
fbusato marked this conversation as resolved.
Show resolved Hide resolved
}
else
{
dispatch_segmented_sort(
thrust::raw_pointer_cast(in_keys.data()),
thrust::raw_pointer_cast(out_keys.data()),
static_cast<offset_t>(num_items),
static_cast<offset_t>(num_segments),
// Mix pointers/iterators for segment info to test using different iterable types:
thrust::raw_pointer_cast(offsets.data()),
offsets.cbegin() + 1);
fbusato marked this conversation as resolved.
Show resolved Hide resolved
}
// compoute the reference only if the routine is able to terminate correctly
auto ref_keys = segmented_radix_sort_reference(in_keys, is_descending, offsets);
REQUIRE((ref_keys == out_keys) == true);
}

#endif // defined(CCCL_TEST_ENABLE_64BIT_SEGMENTED_SORT)
Loading