Skip to content

Commit

Permalink
Deprecate cub::{min, max} and replace internal uses with those from…
Browse files Browse the repository at this point in the history
… libcu++ (NVIDIA#3419)

* Deprecate `cub::{min, max}` and replace internal uses with those from libcu++

Fixes NVIDIA#3404
  • Loading branch information
miscco authored and davebayer committed Jan 23, 2025
1 parent 7187e18 commit e364cc9
Show file tree
Hide file tree
Showing 4 changed files with 33 additions and 11 deletions.
4 changes: 3 additions & 1 deletion cub/cub/device/dispatch/dispatch_segmented_sort.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,8 @@
#include <thrust/system/cuda/detail/core/triple_chevron_launch.h>

#include <cuda/cmath>
#include <cuda/std/__algorithm/max.h>
#include <cuda/std/__algorithm/min.h>
#include <cuda/std/type_traits>

#include <type_traits>
Expand Down Expand Up @@ -964,7 +966,7 @@ struct DispatchSegmentedSort
constexpr auto num_segments_per_invocation_limit =
static_cast<global_segment_offset_t>(::cuda::std::numeric_limits<int>::max());
auto const max_num_segments_per_invocation = static_cast<global_segment_offset_t>(
::cuda::std::min(static_cast<global_segment_offset_t>(num_segments), num_segments_per_invocation_limit));
(::cuda::std::min)(static_cast<global_segment_offset_t>(num_segments), num_segments_per_invocation_limit));

large_and_medium_segments_indices.grow(max_num_segments_per_invocation);
small_segments_indices.grow(max_num_segments_per_invocation);
Expand Down
18 changes: 9 additions & 9 deletions cub/cub/util_arch.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -121,20 +121,20 @@ template <int Nominal4ByteBlockThreads, int Nominal4ByteItemsPerThread, typename
struct RegBoundScaling
{
static constexpr int ITEMS_PER_THREAD =
::cuda::std::max(1, Nominal4ByteItemsPerThread * 4 / ::cuda::std::max(4, int{sizeof(T)}));
static constexpr int BLOCK_THREADS =
::cuda::std::min(Nominal4ByteBlockThreads,
::cuda::ceil_div(int{detail::max_smem_per_block} / (int{sizeof(T)} * ITEMS_PER_THREAD), 32) * 32);
(::cuda::std::max)(1, Nominal4ByteItemsPerThread * 4 / (::cuda::std::max)(4, int{sizeof(T)}));
static constexpr int BLOCK_THREADS = (::cuda::std::min)(
Nominal4ByteBlockThreads,
::cuda::ceil_div(int{detail::max_smem_per_block} / (int{sizeof(T)} * ITEMS_PER_THREAD), 32) * 32);
};

template <int Nominal4ByteBlockThreads, int Nominal4ByteItemsPerThread, typename T>
struct MemBoundScaling
{
static constexpr int ITEMS_PER_THREAD = ::cuda::std::max(
1, ::cuda::std::min(Nominal4ByteItemsPerThread * 4 / int{sizeof(T)}, Nominal4ByteItemsPerThread * 2));
static constexpr int BLOCK_THREADS =
::cuda::std::min(Nominal4ByteBlockThreads,
::cuda::ceil_div(int{detail::max_smem_per_block} / (int{sizeof(T)} * ITEMS_PER_THREAD), 32) * 32);
static constexpr int ITEMS_PER_THREAD = (::cuda::std::max)(
1, (::cuda::std::min)(Nominal4ByteItemsPerThread * 4 / int{sizeof(T)}, Nominal4ByteItemsPerThread * 2));
static constexpr int BLOCK_THREADS = (::cuda::std::min)(
Nominal4ByteBlockThreads,
::cuda::ceil_div(int{detail::max_smem_per_block} / (int{sizeof(T)} * ITEMS_PER_THREAD), 32) * 32);
};

#endif // Do not document
Expand Down
20 changes: 20 additions & 0 deletions cub/cub/util_macro.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,26 @@

CUB_NAMESPACE_BEGIN

#ifndef _CCCL_DOXYGEN_INVOKED // Do not document
# define CUB_PREVENT_MACRO_SUBSTITUTION
template <typename T, typename U>
CCCL_DEPRECATED_BECAUSE("Use cuda::std::min from <cuda/std/functional> instead")
constexpr _CCCL_HOST_DEVICE auto min CUB_PREVENT_MACRO_SUBSTITUTION(T&& t, U&& u)
-> decltype(t < u ? ::cuda::std::forward<T>(t) : ::cuda::std::forward<U>(u))
{
return t < u ? ::cuda::std::forward<T>(t) : ::cuda::std::forward<U>(u);
}

template <typename T, typename U>
CCCL_DEPRECATED_BECAUSE("Use cuda::std::max from <cuda/std/functional> instead")
constexpr _CCCL_HOST_DEVICE auto max CUB_PREVENT_MACRO_SUBSTITUTION(T&& t, U&& u)
-> decltype(t < u ? ::cuda::std::forward<U>(u) : ::cuda::std::forward<T>(t))
{
return t < u ? ::cuda::std::forward<U>(u) : ::cuda::std::forward<T>(t);
}
# undef CUB_PREVENT_MACRO_SUBSTITUTION
#endif

#ifndef CUB_MAX
/// Select maximum(a, b)
/// Deprecated since [2.8]
Expand Down
2 changes: 1 addition & 1 deletion cub/test/catch2_radix_sort_helper.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,7 @@ struct segment_iterator

__host__ __device__ OffsetT operator()(std::int64_t x) const
{
return ::cuda::std::min(last, x * Step);
return (::cuda::std::min)(last, x * Step);
}
};

Expand Down

0 comments on commit e364cc9

Please sign in to comment.