diff --git a/cub/cub/device/dispatch/dispatch_segmented_sort.cuh b/cub/cub/device/dispatch/dispatch_segmented_sort.cuh index af085f65274..ef912463d6d 100644 --- a/cub/cub/device/dispatch/dispatch_segmented_sort.cuh +++ b/cub/cub/device/dispatch/dispatch_segmented_sort.cuh @@ -60,6 +60,8 @@ #include #include +#include +#include #include #include @@ -964,7 +966,7 @@ struct DispatchSegmentedSort constexpr auto num_segments_per_invocation_limit = static_cast(::cuda::std::numeric_limits::max()); auto const max_num_segments_per_invocation = static_cast( - ::cuda::std::min(static_cast(num_segments), num_segments_per_invocation_limit)); + (::cuda::std::min)(static_cast(num_segments), num_segments_per_invocation_limit)); large_and_medium_segments_indices.grow(max_num_segments_per_invocation); small_segments_indices.grow(max_num_segments_per_invocation); diff --git a/cub/cub/util_arch.cuh b/cub/cub/util_arch.cuh index 3c6aea5cc5c..a2093ae288b 100644 --- a/cub/cub/util_arch.cuh +++ b/cub/cub/util_arch.cuh @@ -121,20 +121,20 @@ template struct MemBoundScaling { - static constexpr int ITEMS_PER_THREAD = ::cuda::std::max( - 1, ::cuda::std::min(Nominal4ByteItemsPerThread * 4 / int{sizeof(T)}, Nominal4ByteItemsPerThread * 2)); - static constexpr int BLOCK_THREADS = - ::cuda::std::min(Nominal4ByteBlockThreads, - ::cuda::ceil_div(int{detail::max_smem_per_block} / (int{sizeof(T)} * ITEMS_PER_THREAD), 32) * 32); + static constexpr int ITEMS_PER_THREAD = (::cuda::std::max)( + 1, (::cuda::std::min)(Nominal4ByteItemsPerThread * 4 / int{sizeof(T)}, Nominal4ByteItemsPerThread * 2)); + static constexpr int BLOCK_THREADS = (::cuda::std::min)( + Nominal4ByteBlockThreads, + ::cuda::ceil_div(int{detail::max_smem_per_block} / (int{sizeof(T)} * ITEMS_PER_THREAD), 32) * 32); }; #endif // Do not document diff --git a/cub/cub/util_macro.cuh b/cub/cub/util_macro.cuh index e95a7136f43..3314123c9ab 100644 --- a/cub/cub/util_macro.cuh +++ b/cub/cub/util_macro.cuh @@ -49,6 +49,26 @@ CUB_NAMESPACE_BEGIN +#ifndef _CCCL_DOXYGEN_INVOKED // Do not document +# define CUB_PREVENT_MACRO_SUBSTITUTION +template +CCCL_DEPRECATED_BECAUSE("Use cuda::std::min from instead") +constexpr _CCCL_HOST_DEVICE auto min CUB_PREVENT_MACRO_SUBSTITUTION(T&& t, U&& u) + -> decltype(t < u ? ::cuda::std::forward(t) : ::cuda::std::forward(u)) +{ + return t < u ? ::cuda::std::forward(t) : ::cuda::std::forward(u); +} + +template +CCCL_DEPRECATED_BECAUSE("Use cuda::std::max from instead") +constexpr _CCCL_HOST_DEVICE auto max CUB_PREVENT_MACRO_SUBSTITUTION(T&& t, U&& u) + -> decltype(t < u ? ::cuda::std::forward(u) : ::cuda::std::forward(t)) +{ + return t < u ? ::cuda::std::forward(u) : ::cuda::std::forward(t); +} +# undef CUB_PREVENT_MACRO_SUBSTITUTION +#endif + #ifndef CUB_MAX /// Select maximum(a, b) /// Deprecated since [2.8] diff --git a/cub/test/catch2_radix_sort_helper.cuh b/cub/test/catch2_radix_sort_helper.cuh index 9b7870f6ef0..642b2aed4f1 100644 --- a/cub/test/catch2_radix_sort_helper.cuh +++ b/cub/test/catch2_radix_sort_helper.cuh @@ -69,7 +69,7 @@ struct segment_iterator __host__ __device__ OffsetT operator()(std::int64_t x) const { - return ::cuda::std::min(last, x * Step); + return (::cuda::std::min)(last, x * Step); } };