diff --git a/cub/benchmarks/bench/radix_sort/keys.cu b/cub/benchmarks/bench/radix_sort/keys.cu index 20e8a3e2253..f3b7ba38675 100644 --- a/cub/benchmarks/bench/radix_sort/keys.cu +++ b/cub/benchmarks/bench/radix_sort/keys.cu @@ -27,6 +27,8 @@ #include +#include + #include // %//RANGE//% TUNE_RADIX_BITS bits 8:9:1 @@ -46,7 +48,7 @@ struct policy_hub_t { static constexpr bool KEYS_ONLY = std::is_same::value; - using DominantT = cub::detail::conditional_t<(sizeof(ValueT) > sizeof(KeyT)), ValueT, KeyT>; + using DominantT = ::cuda::std::_If<(sizeof(ValueT) > sizeof(KeyT)), ValueT, KeyT>; struct policy_t : cub::ChainedPolicy<300, policy_t, policy_t> { diff --git a/cub/benchmarks/bench/radix_sort/pairs.cu b/cub/benchmarks/bench/radix_sort/pairs.cu index 074a35b9a2a..2729ce1b623 100644 --- a/cub/benchmarks/bench/radix_sort/pairs.cu +++ b/cub/benchmarks/bench/radix_sort/pairs.cu @@ -27,6 +27,8 @@ #include +#include + #include // %//RANGE//% TUNE_RADIX_BITS bits 8:9:1 @@ -44,7 +46,7 @@ struct policy_hub_t { static constexpr bool KEYS_ONLY = std::is_same::value; - using DominantT = cub::detail::conditional_t<(sizeof(ValueT) > sizeof(KeyT)), ValueT, KeyT>; + using DominantT = ::cuda::std::_If<(sizeof(ValueT) > sizeof(KeyT)), ValueT, KeyT>; struct policy_t : cub::ChainedPolicy<300, policy_t, policy_t> { diff --git a/cub/cub/agent/agent_histogram.cuh b/cub/cub/agent/agent_histogram.cuh index 84385d6376d..ce204273da8 100644 --- a/cub/cub/agent/agent_histogram.cuh +++ b/cub/cub/agent/agent_histogram.cuh @@ -49,6 +49,8 @@ #include #include +#include + #include CUB_NAMESPACE_BEGIN @@ -225,9 +227,9 @@ struct AgentHistogram // Wrap the native input pointer with CacheModifiedInputIterator // or directly use the supplied input iterator type using WrappedSampleIteratorT = - cub::detail::conditional_t::value, - CacheModifiedInputIterator, - SampleIteratorT>; + ::cuda::std::_If::value, + CacheModifiedInputIterator, + SampleIteratorT>; /// Pixel input iterator type (for applying cache modifier) using WrappedPixelIteratorT = CacheModifiedInputIterator; diff --git a/cub/cub/agent/agent_radix_sort_onesweep.cuh b/cub/cub/agent/agent_radix_sort_onesweep.cuh index ff74b6be251..a78ee66c7b2 100644 --- a/cub/cub/agent/agent_radix_sort_onesweep.cuh +++ b/cub/cub/agent/agent_radix_sort_onesweep.cuh @@ -49,6 +49,8 @@ #include #include +#include + CUB_NAMESPACE_BEGIN /** \brief cub::RadixSortStoreAlgorithm enumerates different algorithms to write @@ -146,10 +148,10 @@ struct AgentRadixSortOnesweep || RANK_ALGORITHM == RADIX_RANK_MATCH_EARLY_COUNTS_ATOMIC_OR, "for onesweep agent, the ranking algorithm must warp-strided key arrangement"); - using BlockRadixRankT = cub::detail::conditional_t< + using BlockRadixRankT = ::cuda::std::_If< RANK_ALGORITHM == RADIX_RANK_MATCH_EARLY_COUNTS_ATOMIC_OR, BlockRadixRankMatchEarlyCounts, - cub::detail::conditional_t< + ::cuda::std::_If< RANK_ALGORITHM == RADIX_RANK_MATCH, BlockRadixRankMatch, BlockRadixRankMatchEarlyCounts>>; diff --git a/cub/cub/agent/agent_reduce.cuh b/cub/cub/agent/agent_reduce.cuh index a796c7dd153..3492bd5f41d 100644 --- a/cub/cub/agent/agent_reduce.cuh +++ b/cub/cub/agent/agent_reduce.cuh @@ -51,6 +51,8 @@ #include #include +#include + #include _CCCL_SUPPRESS_DEPRECATED_PUSH @@ -145,9 +147,9 @@ struct AgentReduce // Wrap the native input pointer with CacheModifiedInputIterator // or directly use the supplied input iterator type using WrappedInputIteratorT = - cub::detail::conditional_t::value, - CacheModifiedInputIterator, - InputIteratorT>; + ::cuda::std::_If::value, + CacheModifiedInputIterator, + InputIteratorT>; /// Constants static constexpr int BLOCK_THREADS = AgentReducePolicy::BLOCK_THREADS; diff --git a/cub/cub/agent/agent_reduce_by_key.cuh b/cub/cub/agent/agent_reduce_by_key.cuh index a1ff251e621..7e14b793db9 100644 --- a/cub/cub/agent/agent_reduce_by_key.cuh +++ b/cub/cub/agent/agent_reduce_by_key.cuh @@ -51,6 +51,8 @@ #include #include +#include + #include CUB_NAMESPACE_BEGIN @@ -225,27 +227,27 @@ struct AgentReduceByKey // CacheModifiedValuesInputIterator or directly use the supplied input // iterator type using WrappedKeysInputIteratorT = - cub::detail::conditional_t::value, - CacheModifiedInputIterator, - KeysInputIteratorT>; + ::cuda::std::_If::value, + CacheModifiedInputIterator, + KeysInputIteratorT>; // Cache-modified Input iterator wrapper type (for applying cache modifier) // for values Wrap the native input pointer with // CacheModifiedValuesInputIterator or directly use the supplied input // iterator type using WrappedValuesInputIteratorT = - cub::detail::conditional_t::value, - CacheModifiedInputIterator, - ValuesInputIteratorT>; + ::cuda::std::_If::value, + CacheModifiedInputIterator, + ValuesInputIteratorT>; // Cache-modified Input iterator wrapper type (for applying cache modifier) // for fixup values Wrap the native input pointer with // CacheModifiedValuesInputIterator or directly use the supplied input // iterator type using WrappedFixupInputIteratorT = - cub::detail::conditional_t::value, - CacheModifiedInputIterator, - AggregatesOutputIteratorT>; + ::cuda::std::_If::value, + CacheModifiedInputIterator, + AggregatesOutputIteratorT>; // Reduce-value-by-segment scan operator using ReduceBySegmentOpT = ReduceBySegmentOp; diff --git a/cub/cub/agent/agent_rle.cuh b/cub/cub/agent/agent_rle.cuh index 08723868088..c498f1737d4 100644 --- a/cub/cub/agent/agent_rle.cuh +++ b/cub/cub/agent/agent_rle.cuh @@ -54,6 +54,8 @@ #include #include +#include + #include CUB_NAMESPACE_BEGIN @@ -231,9 +233,9 @@ struct AgentRle // Wrap the native input pointer with CacheModifiedVLengthnputIterator // Directly use the supplied input iterator type using WrappedInputIteratorT = - cub::detail::conditional_t::value, - CacheModifiedInputIterator, - InputIteratorT>; + ::cuda::std::_If::value, + CacheModifiedInputIterator, + InputIteratorT>; // Parameterized BlockLoad type for data using BlockLoadT = @@ -257,7 +259,7 @@ struct AgentRle using WarpExchangePairs = WarpExchange; using WarpExchangePairsStorage = - cub::detail::conditional_t; + ::cuda::std::_If; using WarpExchangeOffsets = WarpExchange; using WarpExchangeLengths = WarpExchange; diff --git a/cub/cub/agent/agent_scan.cuh b/cub/cub/agent/agent_scan.cuh index d7bbab4dbad..7da0fec7cdd 100644 --- a/cub/cub/agent/agent_scan.cuh +++ b/cub/cub/agent/agent_scan.cuh @@ -50,6 +50,8 @@ #include #include +#include + #include CUB_NAMESPACE_BEGIN @@ -157,9 +159,9 @@ struct AgentScan // Wrap the native input pointer with CacheModifiedInputIterator // or directly use the supplied input iterator type using WrappedInputIteratorT = - cub::detail::conditional_t::value, - CacheModifiedInputIterator, - InputIteratorT>; + ::cuda::std::_If::value, + CacheModifiedInputIterator, + InputIteratorT>; // Constants enum diff --git a/cub/cub/agent/agent_scan_by_key.cuh b/cub/cub/agent/agent_scan_by_key.cuh index 42114bf5dd5..306ef9006c4 100644 --- a/cub/cub/agent/agent_scan_by_key.cuh +++ b/cub/cub/agent/agent_scan_by_key.cuh @@ -50,6 +50,8 @@ #include #include +#include + #include CUB_NAMESPACE_BEGIN @@ -152,14 +154,14 @@ struct AgentScanByKey static constexpr int ITEMS_PER_TILE = BLOCK_THREADS * ITEMS_PER_THREAD; using WrappedKeysInputIteratorT = - cub::detail::conditional_t::value, - CacheModifiedInputIterator, - KeysInputIteratorT>; + ::cuda::std::_If::value, + CacheModifiedInputIterator, + KeysInputIteratorT>; using WrappedValuesInputIteratorT = - cub::detail::conditional_t::value, - CacheModifiedInputIterator, - ValuesInputIteratorT>; + ::cuda::std::_If::value, + CacheModifiedInputIterator, + ValuesInputIteratorT>; using BlockLoadKeysT = BlockLoad; diff --git a/cub/cub/agent/agent_segment_fixup.cuh b/cub/cub/agent/agent_segment_fixup.cuh index 49924ee2ce7..4f01df6a09f 100644 --- a/cub/cub/agent/agent_segment_fixup.cuh +++ b/cub/cub/agent/agent_segment_fixup.cuh @@ -52,6 +52,8 @@ #include #include +#include + #include CUB_NAMESPACE_BEGIN @@ -171,18 +173,18 @@ struct AgentSegmentFixup // Cache-modified Input iterator wrapper type (for applying cache modifier) for keys // Wrap the native input pointer with CacheModifiedValuesInputIterator // or directly use the supplied input iterator type - using WrappedPairsInputIteratorT = cub::detail::conditional_t< - std::is_pointer::value, - CacheModifiedInputIterator, - PairsInputIteratorT>; + using WrappedPairsInputIteratorT = + ::cuda::std::_If::value, + CacheModifiedInputIterator, + PairsInputIteratorT>; // Cache-modified Input iterator wrapper type (for applying cache modifier) for fixup values // Wrap the native input pointer with CacheModifiedValuesInputIterator // or directly use the supplied input iterator type using WrappedFixupInputIteratorT = - cub::detail::conditional_t::value, - CacheModifiedInputIterator, - AggregatesOutputIteratorT>; + ::cuda::std::_If::value, + CacheModifiedInputIterator, + AggregatesOutputIteratorT>; // Reduce-value-by-segment scan operator using ReduceBySegmentOpT = ReduceByKeyOp; diff --git a/cub/cub/agent/agent_select_if.cuh b/cub/cub/agent/agent_select_if.cuh index a48fa175807..ab9c982dca7 100644 --- a/cub/cub/agent/agent_select_if.cuh +++ b/cub/cub/agent/agent_select_if.cuh @@ -219,17 +219,17 @@ struct AgentSelectIf // Wrap the native input pointer with CacheModifiedValuesInputIterator // or directly use the supplied input iterator type using WrappedInputIteratorT = - cub::detail::conditional_t<::cuda::std::is_pointer::value, - CacheModifiedInputIterator, - InputIteratorT>; + ::cuda::std::_If<::cuda::std::is_pointer::value, + CacheModifiedInputIterator, + InputIteratorT>; // Cache-modified Input iterator wrapper type (for applying cache modifier) for values // Wrap the native input pointer with CacheModifiedValuesInputIterator // or directly use the supplied input iterator type using WrappedFlagsInputIteratorT = - cub::detail::conditional_t<::cuda::std::is_pointer::value, - CacheModifiedInputIterator, - FlagsInputIteratorT>; + ::cuda::std::_If<::cuda::std::is_pointer::value, + CacheModifiedInputIterator, + FlagsInputIteratorT>; // Parameterized BlockLoad type for input data using BlockLoadT = BlockLoad; diff --git a/cub/cub/agent/agent_spmv_orig.cuh b/cub/cub/agent/agent_spmv_orig.cuh index 3853a059272..a392359537f 100644 --- a/cub/cub/agent/agent_spmv_orig.cuh +++ b/cub/cub/agent/agent_spmv_orig.cuh @@ -52,6 +52,8 @@ #include #include +#include + #include CUB_NAMESPACE_BEGIN @@ -264,7 +266,7 @@ struct AgentSpmv { // Value type to pair with index type OffsetT // (NullType if loading values directly during merge) - using MergeValueT = cub::detail::conditional_t; + using MergeValueT = ::cuda::std::_If; OffsetT row_end_offset; MergeValueT nonzero; diff --git a/cub/cub/agent/agent_three_way_partition.cuh b/cub/cub/agent/agent_three_way_partition.cuh index 85933fa5aa4..0c0556ffe79 100644 --- a/cub/cub/agent/agent_three_way_partition.cuh +++ b/cub/cub/agent/agent_three_way_partition.cuh @@ -197,9 +197,9 @@ struct AgentThreeWayPartition static constexpr int TILE_ITEMS = BLOCK_THREADS * ITEMS_PER_THREAD; using WrappedInputIteratorT = - cub::detail::conditional_t::value, - cub::CacheModifiedInputIterator, - InputIteratorT>; + ::cuda::std::_If::value, + cub::CacheModifiedInputIterator, + InputIteratorT>; // Parameterized BlockLoad type for input data using BlockLoadT = cub::BlockLoad; diff --git a/cub/cub/agent/single_pass_scan_operators.cuh b/cub/cub/agent/single_pass_scan_operators.cuh index 67ffb965017..312b7ac98c5 100644 --- a/cub/cub/agent/single_pass_scan_operators.cuh +++ b/cub/cub/agent/single_pass_scan_operators.cuh @@ -51,6 +51,8 @@ #include #include +#include + #include #include @@ -476,16 +478,16 @@ using default_no_delay_t = default_no_delay_constructor_t::delay_t; template using default_delay_constructor_t = - cub::detail::conditional_t::PRIMITIVE, fixed_delay_constructor_t<350, 450>, default_no_delay_constructor_t>; + ::cuda::std::_If::PRIMITIVE, fixed_delay_constructor_t<350, 450>, default_no_delay_constructor_t>; template using default_delay_t = typename default_delay_constructor_t::delay_t; template using default_reduce_by_key_delay_constructor_t = - detail::conditional_t<(Traits::PRIMITIVE) && (sizeof(ValueT) + sizeof(KeyT) < 16), - reduce_by_key_delay_constructor_t<350, 450>, - default_delay_constructor_t>>; + ::cuda::std::_If<(Traits::PRIMITIVE) && (sizeof(ValueT) + sizeof(KeyT) < 16), + reduce_by_key_delay_constructor_t<350, 450>, + default_delay_constructor_t>>; } // namespace detail /** @@ -503,16 +505,13 @@ template struct ScanTileState { // Status word type - using StatusWord = cub::detail::conditional_t< + using StatusWord = ::cuda::std::_If< sizeof(T) == 8, unsigned long long, - cub::detail::conditional_t>>; + ::cuda::std::_If>>; // Unit word type - using TxnWord = cub::detail:: - conditional_t>; + using TxnWord = ::cuda::std::_If>; // Device word type struct TileDescriptor @@ -889,18 +888,15 @@ struct ReduceByKeyScanTileState }; // Status word type - using StatusWord = cub::detail::conditional_t< + using StatusWord = ::cuda::std::_If< STATUS_WORD_SIZE == 8, unsigned long long, - cub::detail::conditional_t>>; + ::cuda::std:: + _If>>; // Status word type - using TxnWord = - cub::detail::conditional_t>; + using TxnWord = ::cuda::std:: + _If>; // Device word type (for when sizeof(ValueT) == sizeof(KeyT)) struct TileDescriptorBigStatus @@ -920,7 +916,7 @@ struct ReduceByKeyScanTileState // Device word type using TileDescriptor = - cub::detail::conditional_t; + ::cuda::std::_If; // Device storage TxnWord* d_tile_descriptors; diff --git a/cub/cub/block/block_adjacent_difference.cuh b/cub/cub/block/block_adjacent_difference.cuh index 649f2a563f1..709e9c1bd07 100644 --- a/cub/cub/block/block_adjacent_difference.cuh +++ b/cub/cub/block/block_adjacent_difference.cuh @@ -143,7 +143,7 @@ private: } /// Specialization for when FlagOp has third index param - template ::HAS_PARAM> + template ::value> struct ApplyOp { // Apply flag operator diff --git a/cub/cub/block/block_discontinuity.cuh b/cub/cub/block/block_discontinuity.cuh index 6ec1e05616b..95fe29df5d1 100644 --- a/cub/cub/block/block_discontinuity.cuh +++ b/cub/cub/block/block_discontinuity.cuh @@ -149,7 +149,7 @@ private: } /// Specialization for when FlagOp has third index param - template ::HAS_PARAM> + template ::value> struct ApplyOp { // Apply flag operator diff --git a/cub/cub/block/block_histogram.cuh b/cub/cub/block/block_histogram.cuh index 864b4df9cb1..3553ec79da6 100644 --- a/cub/cub/block/block_histogram.cuh +++ b/cub/cub/block/block_histogram.cuh @@ -48,6 +48,8 @@ #include #include +#include + CUB_NAMESPACE_BEGIN //! @brief BlockHistogramAlgorithm enumerates alternative algorithms for the parallel construction of @@ -199,9 +201,9 @@ private: /// Internal specialization. using InternalBlockHistogram = - cub::detail::conditional_t, - BlockHistogramAtomic>; + ::cuda::std::_If, + BlockHistogramAtomic>; /// Shared memory storage layout type for BlockHistogram using _TempStorage = typename InternalBlockHistogram::TempStorage; diff --git a/cub/cub/block/block_radix_rank.cuh b/cub/cub/block/block_radix_rank.cuh index 23ad226cef0..c91731ae033 100644 --- a/cub/cub/block/block_radix_rank.cuh +++ b/cub/cub/block/block_radix_rank.cuh @@ -221,7 +221,7 @@ private: // Integer type for packing DigitCounters into columns of shared memory banks using PackedCounter = - cub::detail::conditional_t; + ::cuda::std::_If; static constexpr DigitCounter max_tile_size = ::cuda::std::numeric_limits::max(); @@ -1195,16 +1195,16 @@ namespace detail // - Support multi-dimensional thread blocks in the rest of implementations // - Repurpose BlockRadixRank as an entry name with the algorithm template parameter template -using block_radix_rank_t = cub::detail::conditional_t< +using block_radix_rank_t = ::cuda::std::_If< RankAlgorithm == RADIX_RANK_BASIC, BlockRadixRank, - cub::detail::conditional_t< + ::cuda::std::_If< RankAlgorithm == RADIX_RANK_MEMOIZE, BlockRadixRank, - cub::detail::conditional_t< + ::cuda::std::_If< RankAlgorithm == RADIX_RANK_MATCH, BlockRadixRankMatch, - cub::detail::conditional_t< + ::cuda::std::_If< RankAlgorithm == RADIX_RANK_MATCH_EARLY_COUNTS_ANY, BlockRadixRankMatchEarlyCounts, BlockRadixRankMatchEarlyCounts>>>>; diff --git a/cub/cub/block/block_reduce.cuh b/cub/cub/block/block_reduce.cuh index 0b2c1c53e0b..d35c90c06d4 100644 --- a/cub/cub/block/block_reduce.cuh +++ b/cub/cub/block/block_reduce.cuh @@ -48,6 +48,8 @@ #include #include +#include + CUB_NAMESPACE_BEGIN /****************************************************************************** @@ -253,11 +255,11 @@ private: /// Internal specialization type using InternalBlockReduce = - cub::detail::conditional_t>; // BlockReduceRaking + ::cuda::std::_If>; // BlockReduceRaking /// Shared memory storage layout type for BlockReduce using _TempStorage = typename InternalBlockReduce::TempStorage; diff --git a/cub/cub/block/block_scan.cuh b/cub/cub/block/block_scan.cuh index adfba7dada6..a06b7c185fb 100644 --- a/cub/cub/block/block_scan.cuh +++ b/cub/cub/block/block_scan.cuh @@ -46,6 +46,8 @@ #include #include +#include + CUB_NAMESPACE_BEGIN /****************************************************************************** @@ -252,7 +254,7 @@ private: BlockScanRaking; /// Define the delegate type for the desired algorithm - using InternalBlockScan = cub::detail::conditional_t; + using InternalBlockScan = ::cuda::std::_If; /// Shared memory storage layout type for BlockScan using _TempStorage = typename InternalBlockScan::TempStorage; diff --git a/cub/cub/device/dispatch/dispatch_batch_memcpy.cuh b/cub/cub/device/dispatch/dispatch_batch_memcpy.cuh index 57969548a2f..93c93e4c489 100644 --- a/cub/cub/device/dispatch/dispatch_batch_memcpy.cuh +++ b/cub/cub/device/dispatch/dispatch_batch_memcpy.cuh @@ -454,11 +454,11 @@ struct DispatchBatchMemcpy : SelectedPolicy // The number of thread blocks (or tiles) required to process all of the given buffers BlockOffsetT num_tiles = DivideAndRoundUp(num_buffers, TILE_SIZE); - using BlevBufferSrcsOutT = cub::detail::conditional_t>; - using BlevBufferDstOutT = cub::detail::conditional_t>; - using BlevBufferSrcsOutItT = BlevBufferSrcsOutT*; - using BlevBufferDstsOutItT = BlevBufferDstOutT*; - using BlevBufferSizesOutItT = BufferSizeT*; + using BlevBufferSrcsOutT = ::cuda::std::_If>; + using BlevBufferDstOutT = ::cuda::std::_If>; + using BlevBufferSrcsOutItT = BlevBufferSrcsOutT*; + using BlevBufferDstsOutItT = BlevBufferDstOutT*; + using BlevBufferSizesOutItT = BufferSizeT*; using BlevBufferTileOffsetsOutItT = BlockOffsetT*; temporary_storage::layout temporary_storage_layout; diff --git a/cub/cub/device/dispatch/dispatch_histogram.cuh b/cub/cub/device/dispatch/dispatch_histogram.cuh index 0574305cc10..ad16b68e57a 100644 --- a/cub/cub/device/dispatch/dispatch_histogram.cuh +++ b/cub/cub/device/dispatch/dispatch_histogram.cuh @@ -611,9 +611,9 @@ public: // Wrap the native input pointer with CacheModifiedInputIterator // or Directly use the supplied input iterator type using WrappedLevelIteratorT = - cub::detail::conditional_t::value, - CacheModifiedInputIterator, - LevelIteratorT>; + ::cuda::std::_If::value, + CacheModifiedInputIterator, + LevelIteratorT>; WrappedLevelIteratorT wrapped_levels(d_levels); @@ -647,11 +647,11 @@ public: // rule: 2^l * 2^r = 2^(l + r) to determine a sufficiently large type to hold the // multiplication result. // If CommonT used to be a 128-bit wide integral type already, we use CommonT's arithmetic - using IntArithmeticT = cub::detail::conditional_t< // + using IntArithmeticT = ::cuda::std::_If< // sizeof(SampleT) + sizeof(CommonT) <= sizeof(uint32_t), // uint32_t, // #if CUB_IS_INT128_ENABLED - cub::detail::conditional_t< // + ::cuda::std::_If< // (::cuda::std::is_same::value || // ::cuda::std::is_same::value), // CommonT, // @@ -665,10 +665,9 @@ public: template using is_integral_excl_int128 = #if CUB_IS_INT128_ENABLED - cub::detail::conditional_t< - ::cuda::std::is_same::value&& ::cuda::std::is_same::value, - ::cuda::std::false_type, - ::cuda::std::is_integral>; + ::cuda::std::_If<::cuda::std::is_same::value&& ::cuda::std::is_same::value, + ::cuda::std::false_type, + ::cuda::std::is_integral>; #else ::cuda::std::is_integral; #endif diff --git a/cub/cub/device/dispatch/dispatch_merge_sort.cuh b/cub/cub/device/dispatch/dispatch_merge_sort.cuh index 8bb025fb687..11939b632c7 100644 --- a/cub/cub/device/dispatch/dispatch_merge_sort.cuh +++ b/cub/cub/device/dispatch/dispatch_merge_sort.cuh @@ -47,6 +47,8 @@ #include #include +#include + CUB_NAMESPACE_BEGIN namespace detail @@ -130,10 +132,10 @@ private: (max_default_size > max_smem_per_block) && (max_fallback_size <= max_smem_per_block); public: - using policy_t = cub::detail::conditional_t; + using policy_t = ::cuda::std::_If; using block_sort_agent_t = - cub::detail::conditional_t; - using merge_agent_t = cub::detail::conditional_t; + ::cuda::std::_If; + using merge_agent_t = ::cuda::std::_If; }; } // namespace detail diff --git a/cub/cub/device/dispatch/dispatch_radix_sort.cuh b/cub/cub/device/dispatch/dispatch_radix_sort.cuh index 1ecc1240a0c..fc0d8b8c225 100644 --- a/cub/cub/device/dispatch/dispatch_radix_sort.cuh +++ b/cub/cub/device/dispatch/dispatch_radix_sort.cuh @@ -59,6 +59,8 @@ #include +#include + #include #include @@ -131,14 +133,14 @@ __launch_bounds__(int((ALT_DIGIT_BITS) ? int(ChainedPolicyT::ActivePolicy::AltUp DecomposerT decomposer = {}) { using ActiveUpsweepPolicyT = - cub::detail::conditional_t; + ::cuda::std::_If; using ActiveDownsweepPolicyT = - cub::detail::conditional_t; + ::cuda::std::_If; enum { @@ -284,14 +286,14 @@ __launch_bounds__(int((ALT_DIGIT_BITS) ? int(ChainedPolicyT::ActivePolicy::AltDo DecomposerT decomposer = {}) { using ActiveUpsweepPolicyT = - cub::detail::conditional_t; + ::cuda::std::_If; using ActiveDownsweepPolicyT = - cub::detail::conditional_t; + ::cuda::std::_If; enum { @@ -547,9 +549,9 @@ __launch_bounds__(int((ALT_DIGIT_BITS) ? ChainedPolicyT::ActivePolicy::AltSegmen // using SegmentedPolicyT = - cub::detail::conditional_t; + ::cuda::std::_If; enum { @@ -892,7 +894,7 @@ struct DeviceRadixSortPolicy static constexpr bool KEYS_ONLY = std::is_same::value; // Dominant-sized key/value type - using DominantT = cub::detail::conditional_t<(sizeof(ValueT) > sizeof(KeyT)), ValueT, KeyT>; + using DominantT = ::cuda::std::_If<(sizeof(ValueT) > sizeof(KeyT)), ValueT, KeyT>; //------------------------------------------------------------------------------ // Architecture-specific tuning policies @@ -963,9 +965,9 @@ struct DeviceRadixSortPolicy PRIMARY_RADIX_BITS - 1>; // Downsweep policies - using DownsweepPolicy = cub::detail::conditional_t; + using DownsweepPolicy = ::cuda::std::_If; - using AltDownsweepPolicy = cub::detail::conditional_t; + using AltDownsweepPolicy = ::cuda::std::_If; // Upsweep policies using UpsweepPolicy = DownsweepPolicy; @@ -1575,7 +1577,7 @@ struct DeviceRadixSortPolicy ONESWEEP_RADIX_BITS>; using OnesweepLargeKeyPolicy = // - cub::detail::conditional_t; + ::cuda::std::_If; using OnesweepSmallKeyPolicySizes = // detail::radix::sm90_small_key_tuning; @@ -1589,9 +1591,9 @@ struct DeviceRadixSortPolicy RADIX_SORT_STORE_DIRECT, 8>; using OnesweepPolicy = // - cub::detail::conditional_t; + ::cuda::std::_If; using ScanPolicy = AgentScanPolicy<512, diff --git a/cub/cub/device/dispatch/dispatch_scan.cuh b/cub/cub/device/dispatch/dispatch_scan.cuh index b1e31c53a12..346bda4c286 100644 --- a/cub/cub/device/dispatch/dispatch_scan.cuh +++ b/cub/cub/device/dispatch/dispatch_scan.cuh @@ -56,6 +56,8 @@ #include +#include + #include CUB_NAMESPACE_BEGIN @@ -228,9 +230,9 @@ template ::value, - cub::detail::value_t, - typename InitValueT::value_type>, + ::cuda::std::_If::value, + cub::detail::value_t, + typename InitValueT::value_type>, cub::detail::value_t>, typename SelectedPolicy = DeviceScanPolicy> struct DispatchScan : SelectedPolicy diff --git a/cub/cub/device/dispatch/dispatch_scan_by_key.cuh b/cub/cub/device/dispatch/dispatch_scan_by_key.cuh index 9a1cdad9704..0d0ce192415 100644 --- a/cub/cub/device/dispatch/dispatch_scan_by_key.cuh +++ b/cub/cub/device/dispatch/dispatch_scan_by_key.cuh @@ -54,6 +54,8 @@ #include +#include + #include CUB_NAMESPACE_BEGIN @@ -228,8 +230,7 @@ template < typename OffsetT, typename AccumT = detail::accumulator_t< ScanOpT, - cub::detail:: - conditional_t::value, cub::detail::value_t, InitValueT>, + ::cuda::std::_If::value, cub::detail::value_t, InitValueT>, cub::detail::value_t>, typename SelectedPolicy = DeviceScanByKeyPolicy, ScanOpT>> diff --git a/cub/cub/device/dispatch/dispatch_segmented_sort.cuh b/cub/cub/device/dispatch/dispatch_segmented_sort.cuh index 656fc2574d9..84c81f34a98 100644 --- a/cub/cub/device/dispatch/dispatch_segmented_sort.cuh +++ b/cub/cub/device/dispatch/dispatch_segmented_sort.cuh @@ -58,6 +58,8 @@ #include #include +#include + #include #include @@ -694,7 +696,7 @@ __launch_bounds__(1) CUB_DETAIL_KERNEL_ATTRIBUTES void DeviceSegmentedSortContin template struct DeviceSegmentedSortPolicy { - using DominantT = cub::detail::conditional_t<(sizeof(ValueT) > sizeof(KeyT)), ValueT, KeyT>; + using DominantT = ::cuda::std::_If<(sizeof(ValueT) > sizeof(KeyT)), ValueT, KeyT>; static constexpr int KEYS_ONLY = std::is_same::value; diff --git a/cub/cub/util_device.cuh b/cub/cub/util_device.cuh index d4b61814d0d..2998608d567 100644 --- a/cub/cub/util_device.cuh +++ b/cub/cub/util_device.cuh @@ -673,8 +673,7 @@ template struct ChainedPolicy { /// The policy for the active compiler pass - using ActivePolicy = - cub::detail::conditional_t<(CUB_PTX_ARCH < PolicyPtxVersion), typename PrevPolicyT::ActivePolicy, PolicyT>; + using ActivePolicy = ::cuda::std::_If<(CUB_PTX_ARCH < PolicyPtxVersion), typename PrevPolicyT::ActivePolicy, PolicyT>; /// Specializes and dispatches op in accordance to the first policy in the chain of adequate PTX version template diff --git a/cub/cub/util_type.cuh b/cub/cub/util_type.cuh index d8c03500081..5afd2dd23aa 100644 --- a/cub/cub/util_type.cuh +++ b/cub/cub/util_type.cuh @@ -1,6 +1,6 @@ /****************************************************************************** * Copyright (c) 2011, Duane Merrill. All rights reserved. - * Copyright (c) 2011-2018, NVIDIA CORPORATION. All rights reserved. + * Copyright (c) 2011-2024, NVIDIA CORPORATION. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -46,6 +46,7 @@ #include #include +#include #include #include @@ -58,12 +59,6 @@ _CCCL_DIAG_POP # endif // !_CCCL_CUDACC_BELOW_11_8 #endif // _CCCL_HAS_NV_BF16 -#if !defined(_CCCL_COMPILER_NVRTC) -# include -#else -# include -#endif - CUB_NAMESPACE_BEGIN #ifndef CUB_IS_INT128_ENABLED @@ -88,22 +83,18 @@ CUB_NAMESPACE_BEGIN #ifndef DOXYGEN_SHOULD_SKIP_THIS // Do not document namespace detail { - -template -using conditional_t = typename ::cuda::std::conditional::type; - +//! Alias to the given iterator's value_type. +// Aliases to std::iterator_traits, since users can specialize this template to provide traits for their iterators. We +// only defer to the libcu++ implementation for NVRTC. template using value_t = -# if !defined(_CCCL_COMPILER_NVRTC) - typename std::iterator_traits::value_type; -# else // defined(_CCCL_COMPILER_NVRTC) +# ifdef _CCCL_COMPILER_NVRTC typename ::cuda::std::iterator_traits::value_type; +# else // !defined(_CCCL_COMPILER_NVRTC) + typename std::iterator_traits::value_type; # endif // defined(_CCCL_COMPILER_NVRTC) -template ::type>::type, void>::value> +template >::value> struct non_void_value_impl { using type = FallbackT; @@ -112,8 +103,7 @@ struct non_void_value_impl template struct non_void_value_impl { - using type = - typename ::cuda::std::conditional<::cuda::std::is_same, void>::value, FallbackT, value_t>::type; + using type = ::cuda::std::_If<::cuda::std::is_void>::value, FallbackT, value_t>; }; /** @@ -326,17 +316,8 @@ private: template struct AlignBytes { - struct Pad - { - T val; - char byte; - }; - - enum - { - /// The "true CUDA" alignment of T in bytes - ALIGN_BYTES = sizeof(Pad) - sizeof(T) - }; + /// The "true CUDA" alignment of T in bytes + static constexpr unsigned ALIGN_BYTES = alignof(T); /// The "truly aligned" type using Type = T; @@ -350,10 +331,8 @@ struct AlignBytes template <> \ struct AlignBytes \ { \ - enum \ - { \ - ALIGN_BYTES = b \ - }; \ + static constexpr unsigned ALIGN_BYTES = b; \ + \ typedef __align__(b) t Type; \ /* TODO(bgruber): rewriting the above to using Type __align__(b) = t; does not compile :S */ \ }; @@ -395,42 +374,31 @@ template struct AlignBytes : AlignBytes {}; template struct UnitWord { - enum - { - ALIGN_BYTES = AlignBytes::ALIGN_BYTES - }; + static constexpr auto ALIGN_BYTES = AlignBytes::ALIGN_BYTES; template struct IsMultiple { - enum - { - UNIT_ALIGN_BYTES = AlignBytes::ALIGN_BYTES, - IS_MULTIPLE = (sizeof(T) % sizeof(Unit) == 0) && (int(ALIGN_BYTES) % int(UNIT_ALIGN_BYTES) == 0) - }; + static constexpr auto UNIT_ALIGN_BYTES = AlignBytes::ALIGN_BYTES; + static constexpr bool IS_MULTIPLE = + (sizeof(T) % sizeof(Unit) == 0) && (int(ALIGN_BYTES) % int(UNIT_ALIGN_BYTES) == 0); }; - /// Biggest shuffle word that T is a whole multiple of and is not larger than - /// the alignment of T - using ShuffleWord = cub::detail::conditional_t< - IsMultiple::IS_MULTIPLE, - unsigned int, - cub::detail::conditional_t::IS_MULTIPLE, unsigned short, unsigned char>>; - - /// Biggest volatile word that T is a whole multiple of and is not larger than - /// the alignment of T - using VolatileWord = cub::detail::conditional_t::IS_MULTIPLE, unsigned long long, ShuffleWord>; - - /// Biggest memory-access word that T is a whole multiple of and is not larger - /// than the alignment of T - using DeviceWord = cub::detail::conditional_t::IS_MULTIPLE, ulonglong2, VolatileWord>; - - /// Biggest texture reference word that T is a whole multiple of and is not - /// larger than the alignment of T - using TextureWord = - cub::detail::conditional_t::IS_MULTIPLE, - uint4, - cub::detail::conditional_t::IS_MULTIPLE, uint2, ShuffleWord>>; + /// Biggest shuffle word that T is a whole multiple of and is not larger than the alignment of T + using ShuffleWord = + ::cuda::std::_If::IS_MULTIPLE, + unsigned int, + ::cuda::std::_If::IS_MULTIPLE, unsigned short, unsigned char>>; + + /// Biggest volatile word that T is a whole multiple of and is not larger than the alignment of T + using VolatileWord = ::cuda::std::_If::IS_MULTIPLE, unsigned long long, ShuffleWord>; + + /// Biggest memory-access word that T is a whole multiple of and is not larger than the alignment of T + using DeviceWord = ::cuda::std::_If::IS_MULTIPLE, ulonglong2, VolatileWord>; + + /// Biggest texture reference word that T is a whole multiple of and is not larger than the alignment of T + using TextureWord = ::cuda::std:: + _If::IS_MULTIPLE, uint4, ::cuda::std::_If::IS_MULTIPLE, uint2, ShuffleWord>>; }; // float2 specialization workaround (for SM10-SM13) @@ -483,11 +451,8 @@ struct CubVector static_assert(!sizeof(T), "CubVector can only have 1-4 elements"); }; -enum -{ - /// The maximum number of elements in CUDA vector types - MAX_VEC_ELEMENTS = 4, -}; +/// The maximum number of elements in CUDA vector types +_LIBCUDACXX_INLINE_VAR constexpr int MAX_VEC_ELEMENTS = 4; /** * Generic vector-1 type @@ -498,7 +463,7 @@ struct CubVector T x; using BaseType = T; - using Type = CubVector; + using Type = CubVector; }; /** @@ -511,7 +476,7 @@ struct CubVector T y; using BaseType = T; - using Type = CubVector; + using Type = CubVector; }; /** @@ -525,7 +490,7 @@ struct CubVector T z; using BaseType = T; - using Type = CubVector; + using Type = CubVector; }; /** @@ -540,7 +505,7 @@ struct CubVector T w; using BaseType = T; - using Type = CubVector; + using Type = CubVector; }; /** @@ -654,7 +619,6 @@ CUB_DEFINE_VECTOR_TYPE(double, double) CUB_DEFINE_VECTOR_TYPE(bool, uchar) // clang-format on -// Undefine macros # undef CUB_DEFINE_VECTOR_TYPE /****************************************************************************** @@ -820,28 +784,20 @@ template struct DoubleBuffer { /// Pair of device buffer pointers - T* d_buffers[2]; + T* d_buffers[2]{}; /// Selector into \p d_buffers (i.e., the active/valid buffer) - int selector; + int selector = 0; /// \brief Constructor - _CCCL_HOST_DEVICE _CCCL_FORCEINLINE DoubleBuffer() - { - selector = 0; - d_buffers[0] = nullptr; - d_buffers[1] = nullptr; - } + DoubleBuffer() = default; /// \brief Constructor _CCCL_HOST_DEVICE _CCCL_FORCEINLINE DoubleBuffer(T* d_current, ///< The currently valid buffer T* d_alternate) ///< Alternate storage buffer of the same size as \p ///< d_current - { - selector = 0; - d_buffers[0] = d_current; - d_buffers[1] = d_alternate; - } + : d_buffers{d_current, d_alternate} + {} /// \brief Return pointer to the currently valid buffer _CCCL_HOST_DEVICE _CCCL_FORCEINLINE T* Current() @@ -862,20 +818,18 @@ struct DoubleBuffer /** * \brief Defines a structure \p detector_name that is templated on type \p T. The \p detector_name struct exposes a - * constant member \p VALUE indicating whether or not parameter \p T exposes a nested type \p nested_type_name + * constant member \p value indicating whether or not parameter \p T exposes a nested type \p nested_type_name */ -# define CUB_DEFINE_DETECT_NESTED_TYPE(detector_name, nested_type_name) \ - template \ - struct detector_name \ - { \ - template \ - static char& test(typename C::nested_type_name*); \ - template \ - static int& test(...); \ - enum \ - { \ - VALUE = sizeof(test(0)) < sizeof(int) \ - }; \ +# define CUB_DEFINE_DETECT_NESTED_TYPE(detector_name, nested_type_name) \ + template \ + struct detector_name : ::cuda::std::false_type \ + { \ + CUB_DEPRECATED_BECAUSE("Use ::value instead") static constexpr bool VALUE = false; \ + }; \ + template \ + struct detector_name> : ::cuda::std::true_type \ + { \ + CUB_DEPRECATED_BECAUSE("Use ::value instead") static constexpr bool VALUE = true; \ }; /****************************************************************************** @@ -886,50 +840,19 @@ struct DoubleBuffer * \brief Determine whether or not BinaryOp's functor is of the form bool operator()(const T& a, const T&b) or * bool operator()(const T& a, const T&b, unsigned int idx) */ +template +struct BinaryOpHasIdxParam : ::cuda::std::false_type +{ + CUB_DEPRECATED_BECAUSE("Use ::value instead") static constexpr bool HAS_PARAM = false; +}; + template -struct BinaryOpHasIdxParam +struct BinaryOpHasIdxParam()( + ::cuda::std::declval(), ::cuda::std::declval(), int{}))>> : ::cuda::std::true_type { -private: - /* - template struct SFINAE1 - {}; template struct - SFINAE2 {}; template struct SFINAE3 {}; - template struct SFINAE4 - {}; - */ - template - struct SFINAE5 - {}; - template - struct SFINAE6 - {}; - template - struct SFINAE7 - {}; - template - struct SFINAE8 - {}; - /* - template static char Test(SFINAE1 *); - template static char Test(SFINAE2 *); - template static char Test(SFINAE3 *); - template static char Test(SFINAE4 *); - */ - template - _CCCL_HOST_DEVICE static char Test(SFINAE5*); - template - _CCCL_HOST_DEVICE static char Test(SFINAE6*); - template - _CCCL_HOST_DEVICE static char Test(SFINAE7*); - template - _CCCL_HOST_DEVICE static char Test(SFINAE8*); - - template - _CCCL_HOST_DEVICE static int Test(...); - -public: - /// Whether the functor BinaryOp has a third unsigned int index param - static constexpr bool HAS_PARAM = sizeof(Test(nullptr)) == sizeof(char); + CUB_DEPRECATED_BECAUSE("Use ::value instead") static constexpr bool HAS_PARAM = true; }; /****************************************************************************** @@ -960,13 +883,9 @@ enum Category template struct BaseTraits { - /// Category static constexpr Category CATEGORY = _CATEGORY; - enum - { - PRIMITIVE = _PRIMITIVE, - NULL_TYPE = _NULL_TYPE, - }; + static constexpr bool PRIMITIVE = _PRIMITIVE; + static constexpr bool NULL_TYPE = _NULL_TYPE; }; /** @@ -980,12 +899,8 @@ struct BaseTraits static constexpr Category CATEGORY = UNSIGNED_INTEGER; static constexpr UnsignedBits LOWEST_KEY = UnsignedBits(0); static constexpr UnsignedBits MAX_KEY = UnsignedBits(-1); - - enum - { - PRIMITIVE = true, - NULL_TYPE = false, - }; + static constexpr bool PRIMITIVE = true; + static constexpr bool NULL_TYPE = false; static _CCCL_HOST_DEVICE _CCCL_FORCEINLINE UnsignedBits TwiddleIn(UnsignedBits key) { @@ -1026,12 +941,8 @@ struct BaseTraits static constexpr UnsignedBits HIGH_BIT = UnsignedBits(1) << ((sizeof(UnsignedBits) * 8) - 1); static constexpr UnsignedBits LOWEST_KEY = HIGH_BIT; static constexpr UnsignedBits MAX_KEY = UnsignedBits(-1) ^ HIGH_BIT; - - enum - { - PRIMITIVE = true, - NULL_TYPE = false, - }; + static constexpr bool PRIMITIVE = true; + static constexpr bool NULL_TYPE = false; static _CCCL_HOST_DEVICE _CCCL_FORCEINLINE UnsignedBits TwiddleIn(UnsignedBits key) { @@ -1178,12 +1089,8 @@ struct BaseTraits static constexpr UnsignedBits HIGH_BIT = UnsignedBits(1) << ((sizeof(UnsignedBits) * 8) - 1); static constexpr UnsignedBits LOWEST_KEY = UnsignedBits(-1); static constexpr UnsignedBits MAX_KEY = UnsignedBits(-1) ^ HIGH_BIT; - - enum - { - PRIMITIVE = true, - NULL_TYPE = false, - }; + static constexpr bool PRIMITIVE = true; + static constexpr bool NULL_TYPE = false; static _CCCL_HOST_DEVICE _CCCL_FORCEINLINE UnsignedBits TwiddleIn(UnsignedBits key) { @@ -1240,7 +1147,6 @@ struct NumericTraits<__uint128_t> static constexpr Category CATEGORY = UNSIGNED_INTEGER; static constexpr UnsignedBits LOWEST_KEY = UnsignedBits(0); static constexpr UnsignedBits MAX_KEY = UnsignedBits(-1); - static constexpr bool PRIMITIVE = false; static constexpr bool NULL_TYPE = false; @@ -1275,7 +1181,6 @@ struct NumericTraits<__int128_t> static constexpr UnsignedBits HIGH_BIT = UnsignedBits(1) << ((sizeof(UnsignedBits) * 8) - 1); static constexpr UnsignedBits LOWEST_KEY = HIGH_BIT; static constexpr UnsignedBits MAX_KEY = UnsignedBits(-1) ^ HIGH_BIT; - static constexpr bool PRIMITIVE = false; static constexpr bool NULL_TYPE = false; diff --git a/cub/cub/util_vsmem.cuh b/cub/cub/util_vsmem.cuh index 5eebc8533db..6a0d6b9a94e 100644 --- a/cub/cub/util_vsmem.cuh +++ b/cub/cub/util_vsmem.cuh @@ -47,6 +47,7 @@ #include #include +#include #include @@ -95,7 +96,7 @@ private: public: // Type alias to be used for static temporary storage declaration within the algorithm's kernel - using static_temp_storage_t = cub::detail::conditional_t; + using static_temp_storage_t = ::cuda::std::_If; // The amount of global memory-backed virtual shared memory needed, padded to an integer multiple of 128 bytes static constexpr std::size_t vsmem_per_block = needs_vsmem ? (required_smem + padding_bytes) : 0; diff --git a/cub/cub/warp/warp_exchange.cuh b/cub/cub/warp/warp_exchange.cuh index 3f8ce8f22fb..712d0a6bcd3 100644 --- a/cub/cub/warp/warp_exchange.cuh +++ b/cub/cub/warp/warp_exchange.cuh @@ -48,6 +48,8 @@ #include #include +#include + CUB_NAMESPACE_BEGIN enum WarpExchangeAlgorithm @@ -60,9 +62,9 @@ namespace detail { template using InternalWarpExchangeImpl = - cub::detail::conditional_t, - WarpExchangeShfl>; + ::cuda::std::_If, + WarpExchangeShfl>; } // namespace detail /** diff --git a/cub/cub/warp/warp_reduce.cuh b/cub/cub/warp/warp_reduce.cuh index e9b6896ca33..7785b8992a8 100644 --- a/cub/cub/warp/warp_reduce.cuh +++ b/cub/cub/warp/warp_reduce.cuh @@ -49,6 +49,8 @@ #include #include +#include + CUB_NAMESPACE_BEGIN //! @rst @@ -172,8 +174,8 @@ public: /// Internal specialization. /// Use SHFL-based reduction if LOGICAL_WARP_THREADS is a power-of-two - using InternalWarpReduce = cub::detail:: - conditional_t, WarpReduceSmem>; + using InternalWarpReduce = + ::cuda::std::_If, WarpReduceSmem>; #endif // DOXYGEN_SHOULD_SKIP_THIS diff --git a/cub/cub/warp/warp_scan.cuh b/cub/cub/warp/warp_scan.cuh index 71124764353..5daeec6e37d 100644 --- a/cub/cub/warp/warp_scan.cuh +++ b/cub/cub/warp/warp_scan.cuh @@ -49,6 +49,8 @@ #include #include +#include + CUB_NAMESPACE_BEGIN //! @rst @@ -177,8 +179,8 @@ private: /// Internal specialization. /// Use SHFL-based scan if LOGICAL_WARP_THREADS is a power-of-two - using InternalWarpScan = cub::detail:: - conditional_t, WarpScanSmem>; + using InternalWarpScan = + ::cuda::std::_If, WarpScanSmem>; /// Shared memory storage layout type for WarpScan using _TempStorage = typename InternalWarpScan::TempStorage; diff --git a/cub/test/c2h/generators.cu b/cub/test/c2h/generators.cu index e404136932a..20952f6ff94 100644 --- a/cub/test/c2h/generators.cu +++ b/cub/test/c2h/generators.cu @@ -40,6 +40,8 @@ #include #include +#include + #include #include @@ -132,7 +134,7 @@ struct random_to_item_t template struct random_to_item_t { - using storage_t = cub::detail::conditional_t<(sizeof(T) > 4), double, float>; + using storage_t = ::cuda::std::_If<(sizeof(T) > 4), double, float>; storage_t m_min; storage_t m_max; diff --git a/cub/test/catch2_test_block_run_length_decode.cu b/cub/test/catch2_test_block_run_length_decode.cu index 824cd473102..db2166659f9 100644 --- a/cub/test/catch2_test_block_run_length_decode.cu +++ b/cub/test/catch2_test_block_run_length_decode.cu @@ -33,6 +33,8 @@ #include #include +#include + #include "catch2_test_helper.h" /****************************************************************************** @@ -161,7 +163,7 @@ public: { typename BlockLoadRunItemT::TempStorage load_uniques_storage; typename BlockLoadRunLengthsT::TempStorage load_run_lengths_storage; - cub::detail::conditional_t + ::cuda::std::_If run_offsets_scan_storage; struct { diff --git a/cub/test/catch2_test_util_type.cu b/cub/test/catch2_test_util_type.cu index 9d46c3028ed..ed201a6ea48 100644 --- a/cub/test/catch2_test_util_type.cu +++ b/cub/test/catch2_test_util_type.cu @@ -61,3 +61,20 @@ CUB_TEST("Tests non_void_value_t", "[util][type]") STATIC_REQUIRE(::cuda::std::is_same>::value); } + +CUB_DEFINE_DETECT_NESTED_TYPE(cat_detect, cat); + +struct HasCat +{ + using cat = int; +}; +struct HasDog +{ + using dog = int; +}; + +CUB_TEST("Test CUB_DEFINE_DETECT_NESTED_TYPE", "[util][type]") +{ + STATIC_REQUIRE(cat_detect::value); + STATIC_REQUIRE(!cat_detect::value); +} diff --git a/cub/test/catch2_test_warp_merge_sort.cu b/cub/test/catch2_test_warp_merge_sort.cu index 6db81fb33c4..faab58054d1 100644 --- a/cub/test/catch2_test_warp_merge_sort.cu +++ b/cub/test/catch2_test_warp_merge_sort.cu @@ -31,6 +31,8 @@ #include +#include + #include #include "c2h/custom_type.cuh" @@ -403,7 +405,7 @@ CUB_TEST( { using params = params_t; using type = typename params::type; - using warp_sort_delegate = cub::detail::conditional_t; + using warp_sort_delegate = ::cuda::std::_If; // Prepare test data c2h::device_vector d_in(params::tile_size); @@ -434,7 +436,7 @@ CUB_TEST("Warp sort keys-only on partial warp-tile works", using params = params_t; using type = typename params::type; using warp_sort_delegate = - cub::detail::conditional_t; + ::cuda::std::_If; // Prepare test data c2h::device_vector d_in(params::tile_size); @@ -468,7 +470,7 @@ CUB_TEST("Warp sort on keys-value pairs works", using params = params_t; using key_type = typename params::type; using value_type = typename c2h::get<4, TestType>; - using warp_sort_delegate = cub::detail::conditional_t; + using warp_sort_delegate = ::cuda::std::_If; // Prepare test data c2h::device_vector d_keys_in(params::tile_size); @@ -511,7 +513,7 @@ CUB_TEST("Warp sort on key-value pairs of a partial warp-tile works", using key_type = typename params::type; using value_type = typename c2h::get<4, TestType>; using warp_sort_delegate = - cub::detail::conditional_t; + ::cuda::std::_If; // Prepare test data c2h::device_vector d_keys_in(params::tile_size); diff --git a/cub/test/catch2_test_warp_reduce.cu b/cub/test/catch2_test_warp_reduce.cu index 9a075551d37..55c3ed3e532 100644 --- a/cub/test/catch2_test_warp_reduce.cu +++ b/cub/test/catch2_test_warp_reduce.cu @@ -476,8 +476,8 @@ CUB_TEST("Warp segmented sum works", "[reduce][warp]", full_type_list, logical_w constexpr auto segmented_mod = c2h::get<2, TestType>::value; static_assert(segmented_mod == reduce_mode::tail_flags || segmented_mod == reduce_mode::head_flags, "Segmented tests must either be head or tail flags"); - using warp_seg_sum_t = cub::detail:: - conditional_t<(segmented_mod == reduce_mode::tail_flags), warp_seg_sum_tail_t, warp_seg_sum_head_t>; + using warp_seg_sum_t = + ::cuda::std::_If<(segmented_mod == reduce_mode::tail_flags), warp_seg_sum_tail_t, warp_seg_sum_head_t>; // Prepare test data c2h::device_vector d_in(params::tile_size); @@ -521,9 +521,9 @@ CUB_TEST("Warp segmented reduction works", "[reduce][warp]", builtin_type_list, static_assert(segmented_mod == reduce_mode::tail_flags || segmented_mod == reduce_mode::head_flags, "Segmented tests must either be head or tail flags"); using warp_seg_reduction_t = - cub::detail::conditional_t<(segmented_mod == reduce_mode::tail_flags), - warp_seg_reduce_tail_t, - warp_seg_reduce_head_t>; + ::cuda::std::_If<(segmented_mod == reduce_mode::tail_flags), + warp_seg_reduce_tail_t, + warp_seg_reduce_head_t>; // Prepare test data c2h::device_vector d_in(params::tile_size); diff --git a/cub/test/test_device_spmv.cu b/cub/test/test_device_spmv.cu index 24a4befd7f2..2e2699dd17c 100644 --- a/cub/test/test_device_spmv.cu +++ b/cub/test/test_device_spmv.cu @@ -37,6 +37,8 @@ #include #include +#include + #include #include #include @@ -205,7 +207,7 @@ struct csr_matrix private: template - using vector_t = cub::detail::conditional_t, c2h::device_vector>; + using vector_t = ::cuda::std::_If, c2h::device_vector>; vector_t m_values; vector_t m_row_offsets; diff --git a/docs/cub/developer_overview.rst b/docs/cub/developer_overview.rst index 904ecb6cfe0..0a1163bbc1a 100644 --- a/docs/cub/developer_overview.rst +++ b/docs/cub/developer_overview.rst @@ -237,7 +237,7 @@ For example, :cpp:struct:`cub::WarpReduce` dispatches to two different implement .. code-block:: c++ - using InternalWarpReduce = cub::detail::conditional_t< + using InternalWarpReduce = cuda::std::conditional_t< IS_POW_OF_TWO, WarpReduceShfl, // shuffle-based implementation WarpReduceSmem>; // smem-based implementation