diff --git a/cub/detail/temporary_storage.cuh b/cub/detail/temporary_storage.cuh
index 70c67f342b..005a3763ef 100644
--- a/cub/detail/temporary_storage.cuh
+++ b/cub/detail/temporary_storage.cuh
@@ -132,7 +132,7 @@ class alias
: m_slot(slot)
, m_elements(elements)
{
- update_slot();
+ this->update_slot();
}
__host__ __device__ void update_slot()
@@ -156,7 +156,7 @@ public:
__host__ __device__ void grow(std::size_t new_elements)
{
m_elements = new_elements;
- update_slot();
+ this->update_slot();
}
/**
@@ -250,7 +250,7 @@ class layout
slot m_slots[SlotsCount];
std::size_t m_sizes[SlotsCount];
void *m_pointers[SlotsCount];
- bool layout_was_mapped {};
+ bool m_layout_was_mapped {};
public:
layout() = default;
@@ -270,7 +270,7 @@ public:
*/
__host__ __device__ std::size_t get_size()
{
- prepare_interface();
+ this->prepare_interface();
// AliasTemporaries can return error only in mapping stage,
// so it's safe to ignore it here.
@@ -300,12 +300,12 @@ public:
__host__ __device__ cudaError_t map_to_buffer(void *d_temp_storage,
std::size_t temp_storage_bytes)
{
- if (layout_was_mapped)
+ if (m_layout_was_mapped)
{
return cudaErrorAlreadyMapped;
}
- prepare_interface();
+ this->prepare_interface();
cudaError_t error = cudaSuccess;
if ((error = AliasTemporaries(d_temp_storage,
@@ -321,14 +321,14 @@ public:
m_slots[slot_id].set_storage(m_pointers[slot_id]);
}
- layout_was_mapped = true;
+ m_layout_was_mapped = true;
return error;
}
private:
__host__ __device__ void prepare_interface()
{
- if (layout_was_mapped)
+ if (m_layout_was_mapped)
{
return;
}
diff --git a/cub/device/device_segmented_sort.cuh b/cub/device/device_segmented_sort.cuh
index 7a3ded6ed3..bc80275fd0 100644
--- a/cub/device/device_segmented_sort.cuh
+++ b/cub/device/device_segmented_sort.cuh
@@ -171,9 +171,6 @@ struct DeviceSegmentedSort
* @tparam KeyT
* [inferred] Key type
*
- * @tparam OffsetT
- * [inferred] Integer type for global offsets
- *
* @tparam BeginOffsetIteratorT
* [inferred] Random-access input iterator type for reading segment
* beginning offsets \iterator
@@ -225,7 +222,6 @@ struct DeviceSegmentedSort
* be printed to the console. Default is `false`.
*/
template
CUB_RUNTIME_FUNCTION static cudaError_t
@@ -233,8 +229,8 @@ struct DeviceSegmentedSort
std::size_t &temp_storage_bytes,
const KeyT *d_keys_in,
KeyT *d_keys_out,
- OffsetT num_items,
- unsigned int num_segments,
+ int num_items,
+ int num_segments,
BeginOffsetIteratorT d_begin_offsets,
EndOffsetIteratorT d_end_offsets,
cudaStream_t stream = 0,
@@ -245,7 +241,7 @@ struct DeviceSegmentedSort
using DispatchT = DispatchSegmentedSort;
@@ -319,9 +315,6 @@ struct DeviceSegmentedSort
* @tparam KeyT
* [inferred] Key type
*
- * @tparam OffsetT
- * [inferred] Integer type for global offsets
- *
* @tparam BeginOffsetIteratorT
* [inferred] Random-access input iterator type for reading segment
* beginning offsets \iterator
@@ -373,7 +366,6 @@ struct DeviceSegmentedSort
* to be printed to the console. Default is @p false.
*/
template
CUB_RUNTIME_FUNCTION static cudaError_t
@@ -381,8 +373,8 @@ struct DeviceSegmentedSort
std::size_t &temp_storage_bytes,
const KeyT *d_keys_in,
KeyT *d_keys_out,
- OffsetT num_items,
- unsigned int num_segments,
+ int num_items,
+ int num_segments,
BeginOffsetIteratorT d_begin_offsets,
EndOffsetIteratorT d_end_offsets,
cudaStream_t stream = 0,
@@ -393,7 +385,7 @@ struct DeviceSegmentedSort
using DispatchT = DispatchSegmentedSort;
@@ -477,9 +469,6 @@ struct DeviceSegmentedSort
* @tparam KeyT
* [inferred] Key type
*
- * @tparam OffsetT
- * [inferred] Integer type for global offsets
- *
* @tparam BeginOffsetIteratorT
* [inferred] Random-access input iterator type for reading segment
* beginning offsets \iterator
@@ -530,15 +519,14 @@ struct DeviceSegmentedSort
* be printed to the console. Default is @p false.
*/
template
CUB_RUNTIME_FUNCTION static cudaError_t
SortKeys(void *d_temp_storage,
std::size_t &temp_storage_bytes,
DoubleBuffer &d_keys,
- OffsetT num_items,
- unsigned int num_segments,
+ int num_items,
+ int num_segments,
BeginOffsetIteratorT d_begin_offsets,
EndOffsetIteratorT d_end_offsets,
cudaStream_t stream = 0,
@@ -550,7 +538,7 @@ struct DeviceSegmentedSort
using DispatchT = DispatchSegmentedSort;
@@ -633,9 +621,6 @@ struct DeviceSegmentedSort
* @tparam KeyT
* [inferred] Key type
*
- * @tparam OffsetT
- * [inferred] Integer type for global offsets
- *
* @tparam BeginOffsetIteratorT
* [inferred] Random-access input iterator type for reading segment
* beginning offsets \iterator
@@ -686,15 +671,14 @@ struct DeviceSegmentedSort
* to be printed to the console. Default is @p false.
*/
template
CUB_RUNTIME_FUNCTION static cudaError_t
SortKeysDescending(void *d_temp_storage,
std::size_t &temp_storage_bytes,
DoubleBuffer &d_keys,
- OffsetT num_items,
- unsigned int num_segments,
+ int num_items,
+ int num_segments,
BeginOffsetIteratorT d_begin_offsets,
EndOffsetIteratorT d_end_offsets,
cudaStream_t stream = 0,
@@ -706,7 +690,7 @@ struct DeviceSegmentedSort
using DispatchT = DispatchSegmentedSort;
@@ -780,9 +764,6 @@ struct DeviceSegmentedSort
* @tparam KeyT
* [inferred] Key type
*
- * @tparam OffsetT
- * [inferred] Integer type for global offsets
- *
* @tparam BeginOffsetIteratorT
* [inferred] Random-access input iterator type for reading segment
* beginning offsets \iterator
@@ -834,7 +815,6 @@ struct DeviceSegmentedSort
* to be printed to the console. Default is @p false.
*/
template
CUB_RUNTIME_FUNCTION static cudaError_t
@@ -842,14 +822,14 @@ struct DeviceSegmentedSort
std::size_t &temp_storage_bytes,
const KeyT *d_keys_in,
KeyT *d_keys_out,
- OffsetT num_items,
- unsigned int num_segments,
+ int num_items,
+ int num_segments,
BeginOffsetIteratorT d_begin_offsets,
EndOffsetIteratorT d_end_offsets,
cudaStream_t stream = 0,
bool debug_synchronous = false)
{
- return SortKeys(
+ return SortKeys(
d_temp_storage,
temp_storage_bytes,
d_keys_in,
@@ -917,9 +897,6 @@ struct DeviceSegmentedSort
* @tparam KeyT
* [inferred] Key type
*
- * @tparam OffsetT
- * [inferred] Integer type for global offsets
- *
* @tparam BeginOffsetIteratorT
* [inferred] Random-access input iterator type for reading segment
* beginning offsets \iterator
@@ -971,7 +948,6 @@ struct DeviceSegmentedSort
* to be printed to the console. Default is @p false.
*/
template
CUB_RUNTIME_FUNCTION static cudaError_t
@@ -979,15 +955,14 @@ struct DeviceSegmentedSort
std::size_t &temp_storage_bytes,
const KeyT *d_keys_in,
KeyT *d_keys_out,
- OffsetT num_items,
- unsigned int num_segments,
+ int num_items,
+ int num_segments,
BeginOffsetIteratorT d_begin_offsets,
EndOffsetIteratorT d_end_offsets,
cudaStream_t stream = 0,
bool debug_synchronous = false)
{
return SortKeysDescending(d_temp_storage,
temp_storage_bytes,
@@ -1066,9 +1041,6 @@ struct DeviceSegmentedSort
* @tparam KeyT
* [inferred] Key type
*
- * @tparam OffsetT
- * [inferred] Integer type for global offsets
- *
* @tparam BeginOffsetIteratorT
* [inferred] Random-access input iterator type for reading segment
* beginning offsets \iterator
@@ -1119,21 +1091,20 @@ struct DeviceSegmentedSort
* to be printed to the console. Default is @p false.
*/
template
CUB_RUNTIME_FUNCTION static cudaError_t
StableSortKeys(void *d_temp_storage,
std::size_t &temp_storage_bytes,
DoubleBuffer &d_keys,
- OffsetT num_items,
- unsigned int num_segments,
+ int num_items,
+ int num_segments,
BeginOffsetIteratorT d_begin_offsets,
EndOffsetIteratorT d_end_offsets,
cudaStream_t stream = 0,
bool debug_synchronous = false)
{
- return SortKeys(
+ return SortKeys(
d_temp_storage,
temp_storage_bytes,
d_keys,
@@ -1210,9 +1181,6 @@ struct DeviceSegmentedSort
* @tparam KeyT
* [inferred] Key type
*
- * @tparam OffsetT
- * [inferred] Integer type for global offsets
- *
* @tparam BeginOffsetIteratorT
* [inferred] Random-access input iterator type for reading segment
* beginning offsets \iterator
@@ -1263,22 +1231,20 @@ struct DeviceSegmentedSort
* to be printed to the console. Default is @p false.
*/
template
CUB_RUNTIME_FUNCTION static cudaError_t
StableSortKeysDescending(void *d_temp_storage,
std::size_t &temp_storage_bytes,
DoubleBuffer &d_keys,
- OffsetT num_items,
- unsigned int num_segments,
+ int num_items,
+ int num_segments,
BeginOffsetIteratorT d_begin_offsets,
EndOffsetIteratorT d_end_offsets,
cudaStream_t stream = 0,
bool debug_synchronous = false)
{
return SortKeysDescending(d_temp_storage,
temp_storage_bytes,
@@ -1361,9 +1327,6 @@ struct DeviceSegmentedSort
* @tparam ValueT
* [inferred] Value type
*
- * @tparam OffsetT
- * [inferred] Integer type for global offsets
- *
* @tparam BeginOffsetIteratorT
* [inferred] Random-access input iterator type for reading segment
* beginning offsets \iterator
@@ -1424,7 +1387,6 @@ struct DeviceSegmentedSort
*/
template
CUB_RUNTIME_FUNCTION static cudaError_t
@@ -1434,8 +1396,8 @@ struct DeviceSegmentedSort
KeyT *d_keys_out,
const ValueT *d_values_in,
ValueT *d_values_out,
- OffsetT num_items,
- unsigned int num_segments,
+ int num_items,
+ int num_segments,
BeginOffsetIteratorT d_begin_offsets,
EndOffsetIteratorT d_end_offsets,
cudaStream_t stream = 0,
@@ -1446,7 +1408,7 @@ struct DeviceSegmentedSort
using DispatchT = DispatchSegmentedSort;
@@ -1529,9 +1491,6 @@ struct DeviceSegmentedSort
* @tparam ValueT
* [inferred] Value type
*
- * @tparam OffsetT
- * [inferred] Integer type for global offsets
- *
* @tparam BeginOffsetIteratorT
* [inferred] Random-access input iterator type for reading segment
* beginning offsets \iterator
@@ -1592,7 +1551,6 @@ struct DeviceSegmentedSort
*/
template
CUB_RUNTIME_FUNCTION static cudaError_t
@@ -1602,8 +1560,8 @@ struct DeviceSegmentedSort
KeyT *d_keys_out,
const ValueT *d_values_in,
ValueT *d_values_out,
- OffsetT num_items,
- unsigned int num_segments,
+ int num_items,
+ int num_segments,
BeginOffsetIteratorT d_begin_offsets,
EndOffsetIteratorT d_end_offsets,
cudaStream_t stream = 0,
@@ -1614,7 +1572,7 @@ struct DeviceSegmentedSort
using DispatchT = DispatchSegmentedSort;
@@ -1709,9 +1667,6 @@ struct DeviceSegmentedSort
* @tparam ValueT
* [inferred] Value type
*
- * @tparam OffsetT
- * [inferred] Integer type for global offsets
- *
* @tparam BeginOffsetIteratorT
* [inferred] Random-access input iterator type for reading segment
* beginning offsets \iterator
@@ -1768,7 +1723,6 @@ struct DeviceSegmentedSort
*/
template
CUB_RUNTIME_FUNCTION static cudaError_t
@@ -1776,8 +1730,8 @@ struct DeviceSegmentedSort
std::size_t &temp_storage_bytes,
DoubleBuffer &d_keys,
DoubleBuffer &d_values,
- OffsetT num_items,
- unsigned int num_segments,
+ int num_items,
+ int num_segments,
BeginOffsetIteratorT d_begin_offsets,
EndOffsetIteratorT d_end_offsets,
cudaStream_t stream = 0,
@@ -1788,7 +1742,7 @@ struct DeviceSegmentedSort
using DispatchT = DispatchSegmentedSort;
@@ -1880,9 +1834,6 @@ struct DeviceSegmentedSort
* @tparam ValueT
* [inferred] Value type
*
- * @tparam OffsetT
- * [inferred] Integer type for global offsets
- *
* @tparam BeginOffsetIteratorT
* [inferred] Random-access input iterator type for reading segment
* beginning offsets \iterator
@@ -1939,7 +1890,6 @@ struct DeviceSegmentedSort
*/
template
CUB_RUNTIME_FUNCTION static cudaError_t
@@ -1947,8 +1897,8 @@ struct DeviceSegmentedSort
std::size_t &temp_storage_bytes,
DoubleBuffer &d_keys,
DoubleBuffer &d_values,
- OffsetT num_items,
- unsigned int num_segments,
+ int num_items,
+ int num_segments,
BeginOffsetIteratorT d_begin_offsets,
EndOffsetIteratorT d_end_offsets,
cudaStream_t stream = 0,
@@ -1959,7 +1909,7 @@ struct DeviceSegmentedSort
using DispatchT = DispatchSegmentedSort;
@@ -2040,9 +1990,6 @@ struct DeviceSegmentedSort
* @tparam ValueT
* [inferred] Value type
*
- * @tparam OffsetT
- * [inferred] Integer type for global offsets
- *
* @tparam BeginOffsetIteratorT
* [inferred] Random-access input iterator type for reading segment
* beginning offsets \iterator
@@ -2102,7 +2049,6 @@ struct DeviceSegmentedSort
*/
template
CUB_RUNTIME_FUNCTION static cudaError_t
@@ -2112,8 +2058,8 @@ struct DeviceSegmentedSort
KeyT *d_keys_out,
const ValueT *d_values_in,
ValueT *d_values_out,
- OffsetT num_items,
- unsigned int num_segments,
+ int num_items,
+ int num_segments,
BeginOffsetIteratorT d_begin_offsets,
EndOffsetIteratorT d_end_offsets,
cudaStream_t stream = 0,
@@ -2121,7 +2067,6 @@ struct DeviceSegmentedSort
{
return SortPairs(d_temp_storage,
temp_storage_bytes,
@@ -2202,9 +2147,6 @@ struct DeviceSegmentedSort
* @tparam ValueT
* [inferred] Value type
*
- * @tparam OffsetT
- * [inferred] Integer type for global offsets
- *
* @tparam BeginOffsetIteratorT
* [inferred] Random-access input iterator type for reading segment
* beginning offsets \iterator
@@ -2265,7 +2207,6 @@ struct DeviceSegmentedSort
*/
template
CUB_RUNTIME_FUNCTION static cudaError_t
@@ -2275,8 +2216,8 @@ struct DeviceSegmentedSort
KeyT *d_keys_out,
const ValueT *d_values_in,
ValueT *d_values_out,
- OffsetT num_items,
- unsigned int num_segments,
+ int num_items,
+ int num_segments,
BeginOffsetIteratorT d_begin_offsets,
EndOffsetIteratorT d_end_offsets,
cudaStream_t stream = 0,
@@ -2284,7 +2225,6 @@ struct DeviceSegmentedSort
{
return SortPairsDescending(d_temp_storage,
temp_storage_bytes,
@@ -2376,9 +2316,6 @@ struct DeviceSegmentedSort
* @tparam ValueT
* [inferred] Value type
*
- * @tparam OffsetT
- * [inferred] Integer type for global offsets
- *
* @tparam BeginOffsetIteratorT
* [inferred] Random-access input iterator type for reading segment
* beginning offsets \iterator
@@ -2435,7 +2372,6 @@ struct DeviceSegmentedSort
*/
template
CUB_RUNTIME_FUNCTION static cudaError_t
@@ -2443,8 +2379,8 @@ struct DeviceSegmentedSort
std::size_t &temp_storage_bytes,
DoubleBuffer &d_keys,
DoubleBuffer &d_values,
- OffsetT num_items,
- unsigned int num_segments,
+ int num_items,
+ int num_segments,
BeginOffsetIteratorT d_begin_offsets,
EndOffsetIteratorT d_end_offsets,
cudaStream_t stream = 0,
@@ -2452,7 +2388,6 @@ struct DeviceSegmentedSort
{
return SortPairs(d_temp_storage,
temp_storage_bytes,
@@ -2541,9 +2476,6 @@ struct DeviceSegmentedSort
* @tparam ValueT
* [inferred] Value type
*
- * @tparam OffsetT
- * [inferred] Integer type for global offsets
- *
* @tparam BeginOffsetIteratorT
* [inferred] Random-access input iterator type for reading segment
* beginning offsets \iterator
@@ -2600,7 +2532,6 @@ struct DeviceSegmentedSort
*/
template
CUB_RUNTIME_FUNCTION static cudaError_t
@@ -2608,8 +2539,8 @@ struct DeviceSegmentedSort
std::size_t &temp_storage_bytes,
DoubleBuffer &d_keys,
DoubleBuffer &d_values,
- OffsetT num_items,
- unsigned int num_segments,
+ int num_items,
+ int num_segments,
BeginOffsetIteratorT d_begin_offsets,
EndOffsetIteratorT d_end_offsets,
cudaStream_t stream = 0,
@@ -2617,7 +2548,6 @@ struct DeviceSegmentedSort
{
return SortPairsDescending(d_temp_storage,
temp_storage_bytes,
diff --git a/cub/device/dispatch/dispatch_segmented_sort.cuh b/cub/device/dispatch/dispatch_segmented_sort.cuh
index 113fe7128a..69ddf1771f 100644
--- a/cub/device/dispatch/dispatch_segmented_sort.cuh
+++ b/cub/device/dispatch/dispatch_segmented_sort.cuh
@@ -985,7 +985,7 @@ struct DispatchSegmentedSort : SelectedPolicy
OffsetT num_items;
/// The number of segments that comprise the sorting data
- unsigned int num_segments;
+ int num_segments;
/**
* Random-access input iterator to the sequence of beginning offsets of length
@@ -1021,7 +1021,7 @@ struct DispatchSegmentedSort : SelectedPolicy
DoubleBuffer &d_keys,
DoubleBuffer &d_values,
OffsetT num_items,
- unsigned int num_segments,
+ int num_segments,
BeginOffsetIteratorT d_begin_offsets,
EndOffsetIteratorT d_end_offsets,
bool is_overwrite_okay,
@@ -1132,7 +1132,7 @@ struct DispatchSegmentedSort : SelectedPolicy
small_segments_indices.get(),
medium_indices_iterator,
group_sizes.get(),
- static_cast(num_segments),
+ num_segments,
large_segments_selector,
small_segments_selector,
stream,
@@ -1264,7 +1264,7 @@ struct DispatchSegmentedSort : SelectedPolicy
DoubleBuffer &d_keys,
DoubleBuffer &d_values,
OffsetT num_items,
- unsigned int num_segments,
+ int num_segments,
BeginOffsetIteratorT d_begin_offsets,
EndOffsetIteratorT d_end_offsets,
bool is_overwrite_okay,
@@ -1366,7 +1366,7 @@ private:
small_segments_indices.get(),
medium_indices_iterator,
group_sizes.get(),
- static_cast(num_segments),
+ num_segments,
large_segments_selector,
small_segments_selector,
stream,
@@ -1453,8 +1453,9 @@ private:
}
const unsigned int small_segments = h_group_sizes[1];
- const unsigned int medium_segments = num_segments -
- (large_segments + small_segments);
+ const unsigned int medium_segments =
+ static_cast(num_segments) -
+ (large_segments + small_segments);
const unsigned int small_blocks =
DivideAndRoundUp(small_segments,
@@ -1530,8 +1531,9 @@ private:
{
cudaError_t error = cudaSuccess;
- const unsigned int blocks_in_grid = num_segments;
- const unsigned int threads_in_block = LargeSegmentPolicyT::BLOCK_THREADS;
+ const auto blocks_in_grid = static_cast(num_segments);
+ const auto threads_in_block =
+ static_cast(LargeSegmentPolicyT::BLOCK_THREADS);
// Log kernel configuration
if (debug_synchronous)
diff --git a/test/test_device_segmented_sort.cu b/test/test_device_segmented_sort.cu
index 20ddc4545a..fab1063928 100644
--- a/test/test_device_segmented_sort.cu
+++ b/test/test_device_segmented_sort.cu
@@ -93,15 +93,14 @@ public:
int segment_size {};
};
-template
+template
struct SegmentChecker
{
const KeyT *sorted_keys {};
- const OffsetT *offsets {};
+ const int *offsets {};
SegmentChecker(const KeyT *sorted_keys,
- const OffsetT *offsets)
+ const int *offsets)
: sorted_keys(sorted_keys)
, offsets(offsets)
{}
@@ -124,15 +123,14 @@ struct SegmentChecker
}
};
-template
+template
struct DescendingSegmentChecker
{
const KeyT *sorted_keys{};
- const OffsetT *offsets{};
+ const int *offsets{};
DescendingSegmentChecker(const KeyT *sorted_keys,
- const OffsetT *offsets)
+ const int *offsets)
: sorted_keys(sorted_keys)
, offsets(offsets)
{}
@@ -155,15 +153,14 @@ struct DescendingSegmentChecker
}
};
-template
+template
struct ReversedIota
{
KeyT *data {};
- const OffsetT *offsets {};
+ const int *offsets {};
ReversedIota(KeyT *data,
- const OffsetT *offsets)
+ const int *offsets)
: data(data)
, offsets(offsets)
{}
@@ -183,14 +180,13 @@ struct ReversedIota
};
-template
+template
struct Iota
{
KeyT *data{};
- const OffsetT *offsets{};
+ const int *offsets{};
- Iota(KeyT *data, const OffsetT *offsets)
+ Iota(KeyT *data, const int *offsets)
: data(data)
, offsets(offsets)
{}
@@ -210,14 +206,13 @@ struct Iota
template
class Input
{
thrust::default_random_engine random_engine;
- thrust::device_vector d_segment_sizes;
- thrust::device_vector d_offsets;
- thrust::host_vector h_offsets;
+ thrust::device_vector d_segment_sizes;
+ thrust::device_vector d_offsets;
+ thrust::host_vector h_offsets;
using MaskedValueT = typename std::conditional<
std::is_same::value,
@@ -227,17 +222,17 @@ class Input
using UnwrappedKeyT = typename UnwrapHalfAndBfloat16::Type;
bool reverse {};
- OffsetT num_items {};
+ int num_items {};
thrust::device_vector d_keys;
thrust::device_vector d_values;
public:
- Input(bool reverse, const thrust::host_vector &h_segment_sizes)
+ Input(bool reverse, const thrust::host_vector &h_segment_sizes)
: d_segment_sizes(h_segment_sizes)
, d_offsets(d_segment_sizes.size() + 1)
, h_offsets(d_segment_sizes.size() + 1)
, reverse(reverse)
- , num_items(static_cast(
+ , num_items(static_cast(
thrust::reduce(d_segment_sizes.begin(), d_segment_sizes.end())))
, d_keys(num_items)
, d_values(num_items)
@@ -245,7 +240,7 @@ public:
update();
}
- Input(thrust::host_vector &h_offsets)
+ Input(thrust::host_vector &h_offsets)
: d_offsets(h_offsets)
, h_offsets(h_offsets)
, reverse(false)
@@ -262,7 +257,7 @@ public:
update();
}
- OffsetT get_num_items() const
+ int get_num_items() const
{
return num_items;
}
@@ -292,7 +287,7 @@ public:
return thrust::raw_pointer_cast(d_keys.data());
}
- const thrust::host_vector& get_h_offsets()
+ const thrust::host_vector& get_h_offsets()
{
return h_offsets;
}
@@ -302,7 +297,7 @@ public:
return thrust::raw_pointer_cast(d_values.data());
}
- const OffsetT *get_d_offsets() const
+ const int *get_d_offsets() const
{
return thrust::raw_pointer_cast(d_offsets.data());
}
@@ -319,8 +314,8 @@ public:
thrust::counting_iterator(
static_cast(get_num_segments())),
is_segment_sorted.begin(),
- DescendingSegmentChecker{keys_output,
- get_d_offsets()});
+ DescendingSegmentChecker{keys_output,
+ get_d_offsets()});
}
else
{
@@ -329,8 +324,7 @@ public:
thrust::counting_iterator(
static_cast(get_num_segments())),
is_segment_sorted.begin(),
- SegmentChecker{keys_output,
- get_d_offsets()});
+ SegmentChecker{keys_output, get_d_offsets()});
}
return thrust::reduce(is_segment_sorted.begin(),
@@ -375,7 +369,7 @@ private:
{
thrust::for_each(thrust::counting_iterator(0),
thrust::counting_iterator(total_segments),
- Iota{
+ Iota{
reinterpret_cast(get_d_keys()),
get_d_offsets()});
}
@@ -383,7 +377,7 @@ private:
{
thrust::for_each(thrust::counting_iterator(0),
thrust::counting_iterator(total_segments),
- ReversedIota{
+ ReversedIota{
reinterpret_cast(get_d_keys()),
get_d_offsets()});
}
@@ -392,10 +386,10 @@ private:
}
};
-template
+template
class InputDescription
{
- thrust::host_vector segment_sizes;
+ thrust::host_vector segment_sizes;
public:
InputDescription& add(const SizeGroupDescription &group)
@@ -413,16 +407,16 @@ public:
}
template
- Input gen(bool reverse)
+ Input gen(bool reverse)
{
- return Input(reverse, segment_sizes);
+ return Input(reverse, segment_sizes);
}
};
-template
-class InputDescription
+template <>
+class InputDescription
{
- thrust::host_vector segment_sizes;
+ thrust::host_vector segment_sizes;
public:
InputDescription& add(const SizeGroupDescription &group)
@@ -436,16 +430,15 @@ public:
}
template
- Input gen(bool reverse)
+ Input gen(bool reverse)
{
- return Input(reverse, segment_sizes);
+ return Input(reverse, segment_sizes);
}
};
template
+ typename ValueT>
void Sort(bool pairs,
bool descending,
bool double_buffer,
@@ -460,9 +453,9 @@ void Sort(bool pairs,
ValueT *input_values,
ValueT *output_values,
- OffsetT num_items,
- unsigned int num_segments,
- const OffsetT *d_offsets,
+ int num_items,
+ int num_segments,
+ const int *d_offsets,
int *keys_selector = nullptr,
int *values_selector = nullptr)
@@ -797,8 +790,7 @@ void Sort(bool pairs,
}
template
+ typename ValueT>
std::size_t Sort(bool pairs,
bool descending,
bool double_buffer,
@@ -810,49 +802,49 @@ std::size_t Sort(bool pairs,
ValueT *input_values,
ValueT *output_values,
- OffsetT num_items,
- unsigned int num_segments,
- const OffsetT *d_offsets,
+ int num_items,
+ int num_segments,
+ const int *d_offsets,
int *keys_selector = nullptr,
int *values_selector = nullptr)
{
std::size_t temp_storage_bytes = 42ul;
- Sort(pairs,
- descending,
- double_buffer,
- stable_sort,
- nullptr,
- temp_storage_bytes,
- input_keys,
- output_keys,
- input_values,
- output_values,
- num_items,
- num_segments,
- d_offsets,
- keys_selector,
- values_selector);
+ Sort(pairs,
+ descending,
+ double_buffer,
+ stable_sort,
+ nullptr,
+ temp_storage_bytes,
+ input_keys,
+ output_keys,
+ input_values,
+ output_values,
+ num_items,
+ num_segments,
+ d_offsets,
+ keys_selector,
+ values_selector);
thrust::device_vector temp_storage(temp_storage_bytes);
std::uint8_t *d_temp_storage = thrust::raw_pointer_cast(temp_storage.data());
- Sort(pairs,
- descending,
- double_buffer,
- stable_sort,
- d_temp_storage,
- temp_storage_bytes,
- input_keys,
- output_keys,
- input_values,
- output_values,
- num_items,
- num_segments,
- d_offsets,
- keys_selector,
- values_selector);
+ Sort(pairs,
+ descending,
+ double_buffer,
+ stable_sort,
+ d_temp_storage,
+ temp_storage_bytes,
+ input_keys,
+ output_keys,
+ input_values,
+ output_values,
+ num_items,
+ num_segments,
+ d_offsets,
+ keys_selector,
+ values_selector);
return temp_storage_bytes;
}
@@ -878,7 +870,6 @@ void TestZeroSegments()
using KeyT = std::uint8_t;
using ValueT = std::uint64_t;
- using OffsetT = std::uint32_t;
for (bool stable_sort: { unstable, stable })
{
@@ -892,19 +883,19 @@ void TestZeroSegments()
cub::DoubleBuffer values_buffer(nullptr, nullptr);
values_buffer.selector = 1;
- Sort(sort_pairs,
- sort_descending,
- sort_buffer,
- stable_sort,
- nullptr,
- nullptr,
- nullptr,
- nullptr,
- OffsetT{},
- OffsetT{},
- nullptr,
- &keys_buffer.selector,
- &values_buffer.selector);
+ Sort(sort_pairs,
+ sort_descending,
+ sort_buffer,
+ stable_sort,
+ nullptr,
+ nullptr,
+ nullptr,
+ nullptr,
+ int{},
+ int{},
+ nullptr,
+ &keys_buffer.selector,
+ &values_buffer.selector);
AssertEquals(keys_buffer.selector, 0);
AssertEquals(values_buffer.selector, 1);
@@ -915,17 +906,16 @@ void TestZeroSegments()
}
-void TestEmptySegments(unsigned int segments)
+void TestEmptySegments(int segments)
{
// Type doesn't affect the escape logic, so it should be fine
// to test only one set of types here.
using KeyT = std::uint8_t;
using ValueT = std::uint64_t;
- using OffsetT = std::uint32_t;
- thrust::device_vector offsets(segments + 1, OffsetT{});
- const OffsetT *d_offsets = thrust::raw_pointer_cast(offsets.data());
+ thrust::device_vector offsets(segments + 1, int{});
+ const int *d_offsets = thrust::raw_pointer_cast(offsets.data());
for (bool sort_stable: { unstable, stable })
{
@@ -939,19 +929,19 @@ void TestEmptySegments(unsigned int segments)
cub::DoubleBuffer values_buffer(nullptr, nullptr);
values_buffer.selector = 1;
- Sort(sort_pairs,
- sort_descending,
- sort_buffer,
- sort_stable,
- nullptr,
- nullptr,
- nullptr,
- nullptr,
- OffsetT{},
- segments,
- d_offsets,
- &keys_buffer.selector,
- &values_buffer.selector);
+ Sort(sort_pairs,
+ sort_descending,
+ sort_buffer,
+ sort_stable,
+ nullptr,
+ nullptr,
+ nullptr,
+ nullptr,
+ int{},
+ segments,
+ d_offsets,
+ &keys_buffer.selector,
+ &values_buffer.selector);
AssertEquals(keys_buffer.selector, 0);
AssertEquals(values_buffer.selector, 1);
@@ -963,22 +953,21 @@ void TestEmptySegments(unsigned int segments)
template
-void TestSameSizeSegments(OffsetT segment_size,
- unsigned int segments,
+ typename ValueT>
+void TestSameSizeSegments(int segment_size,
+ int segments,
bool skip_values = false)
{
- const OffsetT num_items = segment_size * segments;
+ const int num_items = segment_size * segments;
using UnwrappedKeyT = typename UnwrapHalfAndBfloat16::Type;
- thrust::device_vector offsets(segments + 1);
+ thrust::device_vector offsets(segments + 1);
thrust::sequence(offsets.begin(),
offsets.end(),
- OffsetT{},
- OffsetT{segment_size});
+ int{},
+ segment_size);
- const OffsetT *d_offsets = thrust::raw_pointer_cast(offsets.data());
+ const int *d_offsets = thrust::raw_pointer_cast(offsets.data());
const KeyT target_key {42};
const ValueT target_value {42};
@@ -1033,19 +1022,19 @@ void TestSameSizeSegments(OffsetT segment_size,
}
const std::size_t temp_storage_bytes =
- Sort(sort_pairs,
- sort_descending,
- sort_buffers,
- stable_sort,
- d_keys_input,
- d_keys_output,
- d_values_input,
- d_values_output,
- num_items,
- segments,
- d_offsets,
- &keys_buffer.selector,
- &values_buffer.selector);
+ Sort(sort_pairs,
+ sort_descending,
+ sort_buffers,
+ stable_sort,
+ d_keys_input,
+ d_keys_output,
+ d_values_input,
+ d_values_output,
+ num_items,
+ segments,
+ d_offsets,
+ &keys_buffer.selector,
+ &values_buffer.selector);
// If temporary storage size is defined by extra keys storage
if (sort_buffers)
@@ -1088,7 +1077,7 @@ void TestSameSizeSegments(OffsetT segment_size,
reinterpret_cast(d_keys_input +
num_items),
static_cast(target_key));
- AssertEquals(items_selected, num_items);
+ AssertEquals(static_cast(items_selected), num_items);
}
if (sort_pairs)
@@ -1110,7 +1099,7 @@ void TestSameSizeSegments(OffsetT segment_size,
target_value);
} ();
- AssertEquals(items_selected, num_items);
+ AssertEquals(static_cast(items_selected), num_items);
}
}
}
@@ -1120,10 +1109,9 @@ void TestSameSizeSegments(OffsetT segment_size,
template
+ typename ValueT>
void InputTest(bool sort_descending,
- Input &input)
+ Input &input)
{
thrust::device_vector keys_output(input.get_num_items());
KeyT *d_keys_output = thrust::raw_pointer_cast(keys_output.data());
@@ -1147,19 +1135,19 @@ void InputTest(bool sort_descending,
cub::DoubleBuffer values_buffer(input.get_d_values(),
d_values_output);
- Sort(sort_pairs,
- sort_descending,
- sort_buffers,
- stable_sort,
- input.get_d_keys(),
- d_keys_output,
- input.get_d_values(),
- d_values_output,
- input.get_num_items(),
- input.get_num_segments(),
- input.get_d_offsets(),
- &keys_buffer.selector,
- &values_buffer.selector);
+ Sort(sort_pairs,
+ sort_descending,
+ sort_buffers,
+ stable_sort,
+ input.get_d_keys(),
+ d_keys_output,
+ input.get_d_values(),
+ d_values_output,
+ input.get_num_items(),
+ input.get_num_segments(),
+ input.get_d_offsets(),
+ &keys_buffer.selector,
+ &values_buffer.selector);
if (sort_buffers)
{
@@ -1206,9 +1194,8 @@ struct ComparisonPredicate
}
};
-template
-bool compare_two_outputs(const thrust::host_vector &offsets,
+template
+bool compare_two_outputs(const thrust::host_vector &offsets,
const thrust::host_vector &lhs,
const thrust::host_vector &rhs)
{
@@ -1281,12 +1268,11 @@ void RandomizeInput(thrust::host_vector &h_keys,
template
+ typename ValueT>
void HostReferenceSort(bool sort_pairs,
bool sort_descending,
unsigned int num_segments,
- const thrust::host_vector &h_offsets,
+ const thrust::host_vector &h_offsets,
thrust::host_vector &h_keys,
thrust::host_vector &h_values)
{
@@ -1294,8 +1280,8 @@ void HostReferenceSort(bool sort_pairs,
segment_i < num_segments;
segment_i++)
{
- const OffsetT segment_begin = h_offsets[segment_i];
- const OffsetT segment_end = h_offsets[segment_i + 1];
+ const int segment_begin = h_offsets[segment_i];
+ const int segment_end = h_offsets[segment_i + 1];
if (sort_pairs)
{
@@ -1333,16 +1319,15 @@ void HostReferenceSort(bool sort_pairs,
#if STORE_ON_FAILURE
template
+ typename ValueT>
void DumpInput(bool sort_pairs,
bool sort_descending,
bool sort_buffers,
- Input &input,
+ Input &input,
thrust::host_vector &h_keys,
thrust::host_vector &h_values)
{
- const thrust::host_vector &h_offsets = input.get_h_offsets();
+ const thrust::host_vector &h_offsets = input.get_h_offsets();
std::cout << "sort pairs: " << sort_pairs << "\n";
std::cout << "sort descending: " << sort_descending << "\n";
@@ -1356,7 +1341,7 @@ void DumpInput(bool sort_pairs,
std::ofstream offsets_dump("offsets", std::ios::binary);
offsets_dump.write(reinterpret_cast(
thrust::raw_pointer_cast(h_offsets.data())),
- sizeof(OffsetT) * h_offsets.size());
+ sizeof(int) * h_offsets.size());
std::ofstream keys_dump("keys", std::ios::binary);
keys_dump.write(reinterpret_cast(
@@ -1372,9 +1357,8 @@ void DumpInput(bool sort_pairs,
template
-void InputTestRandom(Input &input)
+ typename ValueT>
+void InputTestRandom(Input &input)
{
thrust::host_vector h_keys_output(input.get_num_items());
thrust::device_vector keys_output(input.get_num_items());
@@ -1388,7 +1372,7 @@ void InputTestRandom(Input &input)
thrust::host_vector h_keys(input.get_num_items());
thrust::host_vector h_values(input.get_num_items());
- const thrust::host_vector &h_offsets = input.get_h_offsets();
+ const thrust::host_vector &h_offsets = input.get_h_offsets();
for (bool stable_sort: { unstable, stable })
{
@@ -1413,20 +1397,19 @@ void InputTestRandom(Input &input)
cub::DoubleBuffer keys_buffer(input.get_d_keys(), d_keys_output);
cub::DoubleBuffer values_buffer(input.get_d_values(), d_values_output);
- Sort(
- sort_pairs,
- sort_descending,
- sort_buffers,
- stable_sort,
- input.get_d_keys(),
- d_keys_output,
- input.get_d_values(),
- d_values_output,
- input.get_num_items(),
- input.get_num_segments(),
- input.get_d_offsets(),
- &keys_buffer.selector,
- &values_buffer.selector);
+ Sort(sort_pairs,
+ sort_descending,
+ sort_buffers,
+ stable_sort,
+ input.get_d_keys(),
+ d_keys_output,
+ input.get_d_values(),
+ d_values_output,
+ input.get_num_items(),
+ input.get_num_segments(),
+ input.get_d_offsets(),
+ &keys_buffer.selector,
+ &values_buffer.selector);
HostReferenceSort(sort_pairs,
sort_descending,
@@ -1472,12 +1455,12 @@ void InputTestRandom(Input &input)
#if STORE_ON_FAILURE
if (!keys_ok || !values_ok)
{
- DumpInput(sort_pairs,
- sort_descending,
- sort_buffers,
- input,
- h_keys_backup,
- h_values_backup);
+ DumpInput(sort_pairs,
+ sort_descending,
+ sort_buffers,
+ input,
+ h_keys_backup,
+ h_values_backup);
}
#endif
@@ -1493,8 +1476,7 @@ AssertTrue(keys_ok);
}
template
+ typename ValueT>
struct EdgeTestDispatch
{
// Edge cases that needs to be tested
@@ -1530,8 +1512,8 @@ struct EdgeTestDispatch
for (bool sort_descending : {ascending, descending})
{
- Input edge_cases =
- InputDescription()
+ Input edge_cases =
+ InputDescription()
.add({a_lot_of, empty_short_circuit_segment_size})
.add({a_lot_of, copy_short_circuit_segment_size})
.add({a_lot_of, swap_short_circuit_segment_size})
@@ -1557,7 +1539,7 @@ struct EdgeTestDispatch
.add({a_few, large_cached_segment_max_segment_size * 5})
.template gen(sort_descending);
- InputTest(sort_descending, edge_cases);
+ InputTest(sort_descending, edge_cases);
}
return cudaSuccess;
@@ -1565,8 +1547,7 @@ struct EdgeTestDispatch
};
template
+ typename ValueT>
void EdgePatternsTest()
{
int ptx_version = 0;
@@ -1577,7 +1558,7 @@ void EdgePatternsTest()
using MaxPolicyT =
typename cub::DeviceSegmentedSortPolicy::MaxPolicy;
- using EdgeTestDispatchT = EdgeTestDispatch;
+ using EdgeTestDispatchT = EdgeTestDispatch;
EdgeTestDispatchT dispatch;
MaxPolicyT::Invoke(ptx_version, dispatch);
@@ -1585,26 +1566,24 @@ void EdgePatternsTest()
}
template
-Input GenRandomInput(OffsetT max_items,
- OffsetT min_segments,
- OffsetT max_segments,
- bool descending)
+ typename ValueT>
+Input GenRandomInput(int max_items,
+ int min_segments,
+ int max_segments,
+ bool descending)
{
std::size_t items_generated {};
const std::size_t segments_num = RandomValue(max_segments) + min_segments;
- thrust::host_vector segment_sizes;
+ thrust::host_vector segment_sizes;
segment_sizes.reserve(segments_num);
- const OffsetT max_segment_size = 6000;
+ const int max_segment_size = 6000;
for (std::size_t segment_id = 0; segment_id < segments_num; segment_id++)
{
- const OffsetT segment_size_raw = RandomValue(max_segment_size);
- const OffsetT segment_size = segment_size_raw > OffsetT{0} ? segment_size_raw
- : OffsetT{0};
+ const int segment_size_raw = RandomValue(max_segment_size);
+ const int segment_size = segment_size_raw > 0 ? segment_size_raw : 0;
if (segment_size + items_generated > max_items)
{
@@ -1615,72 +1594,65 @@ Input GenRandomInput(OffsetT max_items,
segment_sizes.push_back(segment_size);
}
- return Input{descending, segment_sizes};
+ return Input{descending, segment_sizes};
}
template
-void RandomTest(OffsetT min_segments,
- OffsetT max_segments)
+ typename ValueT>
+void RandomTest(int min_segments,
+ int max_segments)
{
- const OffsetT max_items = 10000000;
+ const int max_items = 10000000;
for (int iteration = 0; iteration < 10 * MAX_ITERATIONS; iteration++)
{
- Input edge_cases =
- GenRandomInput(max_items,
- min_segments,
- max_segments,
- descending);
+ Input edge_cases = GenRandomInput(max_items,
+ min_segments,
+ max_segments,
+ descending);
InputTestRandom(edge_cases);
}
}
-template
+template
void TestKeys()
{
const bool skip_values = true;
- for (OffsetT segment_size : {1, 1024, 24 * 1024})
+ for (int segment_size : {1, 1024, 24 * 1024})
{
for (int segments : {1, 1024})
{
- TestSameSizeSegments(segment_size,
- segments,
- skip_values);
+ TestSameSizeSegments(segment_size, segments, skip_values);
}
}
}
template
+ typename ValueT>
void TestPairs()
{
- for (OffsetT segment_size: { 1, 1024, 24 * 1024 })
+ for (int segment_size: { 1, 1024, 24 * 1024 })
{
for (int segments: { 1, 1024 })
{
- TestSameSizeSegments(segment_size, segments);
+ TestSameSizeSegments(segment_size, segments);
}
}
- RandomTest(1 << 2, 1 << 8);
- RandomTest(1 << 9, 1 << 19);
- EdgePatternsTest();
+ RandomTest(1 << 2, 1 << 8);
+ RandomTest(1 << 9, 1 << 19);
+ EdgePatternsTest();
}
-template
+template
void TestKeysAndPairs()
{
- TestKeys();
- TestPairs();
+ TestKeys();
+ TestPairs();
}
@@ -1695,23 +1667,22 @@ int main(int argc, char** argv)
TestEmptySegments(1 << 2);
TestEmptySegments(1 << 22);
- TestPairs();
+ TestPairs();
#if TEST_HALF_T
- TestPairs();
+ TestPairs();
#endif
#if TEST_BF_T
- TestPairs();
+ TestPairs();
#endif
- TestKeysAndPairs();
- TestKeysAndPairs();
- TestKeysAndPairs();
- TestKeysAndPairs();
- TestKeysAndPairs();
- TestPairs();
- TestPairs();
+ TestKeysAndPairs();
+ TestKeysAndPairs();
+ TestKeysAndPairs();
+ TestKeysAndPairs();
+ TestPairs();
+ TestPairs();
return 0;
}
diff --git a/test/test_thread_sort.cu b/test/test_thread_sort.cu
index 5b7290304e..98853232c7 100644
--- a/test/test_thread_sort.cu
+++ b/test/test_thread_sort.cu
@@ -145,4 +145,4 @@ int main()
Test();
return 0;
-}
\ No newline at end of file
+}