Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Deprecate and replace CUB_IS_INT128_ENABLED #3427

Merged
merged 4 commits into from
Jan 30, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions cub/cub/detail/fast_modulo_division.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@
#endif // no system header

#include <cub/detail/type_traits.cuh> // implicit_prom_t
#include <cub/util_type.cuh> // CUB_IS_INT128_ENABLED
#include <cub/util_type.cuh> // _CCCL_HAS_INT128()

#include <cuda/cmath> // cuda::std::ceil_div
#include <cuda/std/bit> // std::has_single_bit
Expand Down Expand Up @@ -79,15 +79,15 @@ struct larger_unsigned_type<T, typename ::cuda::std::enable_if<(sizeof(T) == 4)>
using type = ::cuda::std::uint64_t;
};

#if CUB_IS_INT128_ENABLED
#if _CCCL_HAS_INT128()

template <typename T>
struct larger_unsigned_type<T, typename ::cuda::std::enable_if<(sizeof(T) == 8)>::type>
{
using type = __uint128_t;
};

#endif // CUB_IS_INT128_ENABLED
#endif // _CCCL_HAS_INT128()

template <typename T>
using larger_unsigned_type_t = typename larger_unsigned_type<T>::type;
Expand Down
12 changes: 6 additions & 6 deletions cub/cub/device/dispatch/dispatch_histogram.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -646,27 +646,27 @@ public:
using IntArithmeticT = ::cuda::std::_If< //
sizeof(SampleT) + sizeof(CommonT) <= sizeof(uint32_t), //
uint32_t, //
#if CUB_IS_INT128_ENABLED
#if _CCCL_HAS_INT128()
::cuda::std::_If< //
(::cuda::std::is_same<CommonT, __int128_t>::value || //
::cuda::std::is_same<CommonT, __uint128_t>::value), //
CommonT, //
uint64_t> //
#else // ^^^ CUB_IS_INT128_ENABLED ^^^ / vvv !CUB_IS_INT128_ENABLED vvv
#else // ^^^ _CCCL_HAS_INT128() ^^^ / vvv !_CCCL_HAS_INT128() vvv
uint64_t
#endif // !CUB_IS_INT128_ENABLED
#endif // !_CCCL_HAS_INT128()
>;

// Alias template that excludes __[u]int128 from the integral types
template <typename T>
using is_integral_excl_int128 =
#if CUB_IS_INT128_ENABLED
#if _CCCL_HAS_INT128()
::cuda::std::_If<::cuda::std::is_same<T, __int128_t>::value&& ::cuda::std::is_same<T, __uint128_t>::value,
::cuda::std::false_type,
::cuda::std::is_integral<T>>;
#else // ^^^ CUB_IS_INT128_ENABLED ^^^ / vvv !CUB_IS_INT128_ENABLED vvv
#else // ^^^ _CCCL_HAS_INT128() ^^^ / vvv !_CCCL_HAS_INT128() vvv
::cuda::std::is_integral<T>;
#endif // !CUB_IS_INT128_ENABLED
#endif // !_CCCL_HAS_INT128()

union ScaleT
{
Expand Down
8 changes: 4 additions & 4 deletions cub/cub/device/dispatch/tuning/tuning_run_length_encode.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -156,7 +156,7 @@ struct sm80_tuning<LengthT, KeyT, primitive_length::yes, primitive_key::yes, len
using delay_constructor = detail::no_delay_constructor_t<1075>;
};

#if CUB_IS_INT128_ENABLED
#if _CCCL_HAS_INT128()
template <class LengthT>
struct sm80_tuning<LengthT, __int128_t, primitive_length::yes, primitive_key::no, length_size::_4, key_size::_16>
{
Expand Down Expand Up @@ -216,7 +216,7 @@ struct sm90_tuning<LengthT, KeyT, primitive_length::yes, primitive_key::yes, len
using delay_constructor = detail::no_delay_constructor_t<515>;
};

#if CUB_IS_INT128_ENABLED
#if _CCCL_HAS_INT128()
template <class LengthT>
struct sm90_tuning<LengthT, __int128_t, primitive_length::yes, primitive_key::no, length_size::_4, key_size::_16>
{
Expand Down Expand Up @@ -349,7 +349,7 @@ struct sm80_tuning<LengthT, KeyT, primitive_length::yes, primitive_key::yes, len
using delay_constructor = detail::no_delay_constructor_t<1065>;
};

#if CUB_IS_INT128_ENABLED
#if _CCCL_HAS_INT128()
template <class LengthT>
struct sm80_tuning<LengthT, __int128_t, primitive_length::yes, primitive_key::no, length_size::_4, key_size::_16>
{
Expand Down Expand Up @@ -414,7 +414,7 @@ struct sm90_tuning<LengthT, KeyT, primitive_length::yes, primitive_key::yes, len
using delay_constructor = detail::no_delay_constructor_t<840>;
};

#if CUB_IS_INT128_ENABLED
#if _CCCL_HAS_INT128()
template <class LengthT>
struct sm90_tuning<LengthT, __int128_t, primitive_length::yes, primitive_key::no, length_size::_4, key_size::_16>
{
Expand Down
4 changes: 2 additions & 2 deletions cub/cub/device/dispatch/tuning/tuning_scan.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -175,7 +175,7 @@ struct sm80_tuning<double, primitive_op::yes, primitive_accum::yes, accum_size::
static constexpr BlockStoreAlgorithm store_algorithm = BLOCK_STORE_WARP_TRANSPOSE;
};

#if CUB_IS_INT128_ENABLED
#if _CCCL_HAS_INT128()
template <>
struct sm80_tuning<__int128_t, primitive_op::yes, primitive_accum::no, accum_size::_16>
{
Expand Down Expand Up @@ -221,7 +221,7 @@ template <class T> struct sm90_tuning<T, primitive_op::yes, primitive_accum::yes
template <> struct sm90_tuning<float, primitive_op::yes, primitive_accum::yes, accum_size::_4> : sm90_tuning_vals<float, 128, 24, 688, 1140> {};
template <> struct sm90_tuning<double, primitive_op::yes, primitive_accum::yes, accum_size::_8> : sm90_tuning_vals<double, 224, 24, 576, 1215> {};

#if CUB_IS_INT128_ENABLED
#if _CCCL_HAS_INT128()
template <> struct sm90_tuning<__int128_t, primitive_op::yes, primitive_accum::no, accum_size::_16> : sm90_tuning_vals<__int128_t, 576, 21, 860, 630> {};
template <>
struct sm90_tuning<__uint128_t, primitive_op::yes, primitive_accum::no, accum_size::_16>
Expand Down
20 changes: 10 additions & 10 deletions cub/cub/device/dispatch/tuning/tuning_scan_by_key.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -172,7 +172,7 @@ struct sm80_tuning<KeyT, ValueT, primitive_op::yes, key_size::_1, val_size::_8,
using delay_constructor = fixed_delay_constructor_t<124, 1040>;
};

#if CUB_IS_INT128_ENABLED
#if _CCCL_HAS_INT128()
template <class KeyT>
struct sm80_tuning<KeyT, __int128_t, primitive_op::yes, key_size::_1, val_size::_16, primitive_accum::no>
{
Expand Down Expand Up @@ -229,7 +229,7 @@ struct sm80_tuning<KeyT, ValueT, primitive_op::yes, key_size::_2, val_size::_8,
using delay_constructor = fixed_delay_constructor_t<160, 695>;
};

#if CUB_IS_INT128_ENABLED
#if _CCCL_HAS_INT128()
template <class KeyT>
struct sm80_tuning<KeyT, __int128_t, primitive_op::yes, key_size::_2, val_size::_16, primitive_accum::no>
{
Expand Down Expand Up @@ -286,7 +286,7 @@ struct sm80_tuning<KeyT, ValueT, primitive_op::yes, key_size::_4, val_size::_8,
using delay_constructor = fixed_delay_constructor_t<888, 635>;
};

#if CUB_IS_INT128_ENABLED
#if _CCCL_HAS_INT128()
template <class KeyT>
struct sm80_tuning<KeyT, __int128_t, primitive_op::yes, key_size::_4, val_size::_16, primitive_accum::no>
{
Expand Down Expand Up @@ -343,7 +343,7 @@ struct sm80_tuning<KeyT, ValueT, primitive_op::yes, key_size::_8, val_size::_8,
using delay_constructor = no_delay_constructor_t<1160>;
};

#if CUB_IS_INT128_ENABLED
#if _CCCL_HAS_INT128()
template <class KeyT>
struct sm80_tuning<KeyT, __int128_t, primitive_op::yes, key_size::_8, val_size::_16, primitive_accum::no>
{
Expand Down Expand Up @@ -400,7 +400,7 @@ struct sm80_tuning<KeyT, ValueT, primitive_op::yes, key_size::_16, val_size::_8,
using delay_constructor = no_delay_constructor_t<1030>;
};

#if CUB_IS_INT128_ENABLED
#if _CCCL_HAS_INT128()
template <class KeyT>
struct sm80_tuning<KeyT, __int128_t, primitive_op::yes, key_size::_16, val_size::_16, primitive_accum::no>
{
Expand Down Expand Up @@ -465,7 +465,7 @@ struct sm90_tuning<KeyT, ValueT, primitive_op::yes, key_size::_1, val_size::_8,
using delay_constructor = fixed_delay_constructor_t<488, 1070>;
};

#if CUB_IS_INT128_ENABLED
#if _CCCL_HAS_INT128()
template <class KeyT>
struct sm90_tuning<KeyT, __int128_t, primitive_op::yes, key_size::_1, val_size::_16, primitive_accum::no>
{
Expand Down Expand Up @@ -522,7 +522,7 @@ struct sm90_tuning<KeyT, ValueT, primitive_op::yes, key_size::_2, val_size::_8,
using delay_constructor = fixed_delay_constructor_t<352, 1170>;
};

#if CUB_IS_INT128_ENABLED
#if _CCCL_HAS_INT128()
template <class KeyT>
struct sm90_tuning<KeyT, __int128_t, primitive_op::yes, key_size::_2, val_size::_16, primitive_accum::no>
{
Expand Down Expand Up @@ -579,7 +579,7 @@ struct sm90_tuning<KeyT, ValueT, primitive_op::yes, key_size::_4, val_size::_8,
using delay_constructor = fixed_delay_constructor_t<556, 1195>;
};

#if CUB_IS_INT128_ENABLED
#if _CCCL_HAS_INT128()
template <class KeyT>
struct sm90_tuning<KeyT, __int128_t, primitive_op::yes, key_size::_4, val_size::_16, primitive_accum::no>
{
Expand Down Expand Up @@ -636,7 +636,7 @@ struct sm90_tuning<KeyT, ValueT, primitive_op::yes, key_size::_8, val_size::_8,
using delay_constructor = fixed_delay_constructor_t<600, 930>;
};

#if CUB_IS_INT128_ENABLED
#if _CCCL_HAS_INT128()
template <class KeyT>
struct sm90_tuning<KeyT, __int128_t, primitive_op::yes, key_size::_8, val_size::_16, primitive_accum::no>
{
Expand Down Expand Up @@ -693,7 +693,7 @@ struct sm90_tuning<KeyT, ValueT, primitive_op::yes, key_size::_16, val_size::_8,
using delay_constructor = fixed_delay_constructor_t<320, 1200>;
};

#if CUB_IS_INT128_ENABLED
#if _CCCL_HAS_INT128()
template <class KeyT>
struct sm90_tuning<KeyT, __int128_t, primitive_op::yes, key_size::_16, val_size::_16, primitive_accum::no>
{
Expand Down
16 changes: 8 additions & 8 deletions cub/cub/device/dispatch/tuning/tuning_select_if.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -121,7 +121,7 @@ struct sm80_tuning<Input, flagged::no, keep_rejects::no, offset_size::_4, primit
using delay_constructor = detail::fixed_delay_constructor_t<832, 1165>;
};

#if CUB_IS_INT128_ENABLED
#if _CCCL_HAS_INT128()
template <>
struct sm80_tuning<__int128_t, flagged::no, keep_rejects::no, offset_size::_4, primitive::no, input_size::_16>
{
Expand Down Expand Up @@ -174,7 +174,7 @@ struct sm80_tuning<Input, flagged::yes, keep_rejects::no, offset_size::_4, primi
using delay_constructor = detail::no_delay_constructor_t<1130>;
};

#if CUB_IS_INT128_ENABLED
#if _CCCL_HAS_INT128()
template <>
struct sm80_tuning<__int128_t, flagged::yes, keep_rejects::no, offset_size::_4, primitive::no, input_size::_16>
{
Expand Down Expand Up @@ -227,7 +227,7 @@ struct sm80_tuning<Input, flagged::no, keep_rejects::yes, offset_size::_4, primi
using delay_constructor = detail::fixed_delay_constructor_t<68, 1160>;
};

#if CUB_IS_INT128_ENABLED
#if _CCCL_HAS_INT128()
template <>
struct sm80_tuning<__int128_t, flagged::no, keep_rejects::yes, offset_size::_4, primitive::no, input_size::_16>
{
Expand Down Expand Up @@ -280,7 +280,7 @@ struct sm80_tuning<Input, flagged::yes, keep_rejects::yes, offset_size::_4, prim
using delay_constructor = detail::fixed_delay_constructor_t<884, 1130>;
};

#if CUB_IS_INT128_ENABLED
#if _CCCL_HAS_INT128()
template <>
struct sm80_tuning<__int128_t, flagged::yes, keep_rejects::yes, offset_size::_4, primitive::no, input_size::_16>
{
Expand Down Expand Up @@ -336,7 +336,7 @@ struct sm90_tuning<Input, flagged::no, keep_rejects::no, offset_size::_4, primit
using delay_constructor = detail::fixed_delay_constructor_t<380, 1140>;
};

#if CUB_IS_INT128_ENABLED
#if _CCCL_HAS_INT128()
template <>
struct sm90_tuning<__int128_t, flagged::no, keep_rejects::no, offset_size::_4, primitive::no, input_size::_16>
{
Expand Down Expand Up @@ -389,7 +389,7 @@ struct sm90_tuning<Input, flagged::yes, keep_rejects::no, offset_size::_4, primi
using delay_constructor = detail::fixed_delay_constructor_t<360, 1170>;
};

#if CUB_IS_INT128_ENABLED
#if _CCCL_HAS_INT128()
template <>
struct sm90_tuning<__int128_t, flagged::yes, keep_rejects::no, offset_size::_4, primitive::no, input_size::_16>
{
Expand Down Expand Up @@ -442,7 +442,7 @@ struct sm90_tuning<Input, flagged::no, keep_rejects::yes, offset_size::_4, primi
using delay_constructor = detail::fixed_delay_constructor_t<512, 1075>;
};

#if CUB_IS_INT128_ENABLED
#if _CCCL_HAS_INT128()
template <>
struct sm90_tuning<__int128_t, flagged::no, keep_rejects::yes, offset_size::_4, primitive::no, input_size::_16>
{
Expand Down Expand Up @@ -495,7 +495,7 @@ struct sm90_tuning<Input, flagged::yes, keep_rejects::yes, offset_size::_4, prim
using delay_constructor = detail::fixed_delay_constructor_t<532, 1180>;
};

#if CUB_IS_INT128_ENABLED
#if _CCCL_HAS_INT128()
template <>
struct sm90_tuning<__int128_t, flagged::yes, keep_rejects::yes, offset_size::_4, primitive::no, input_size::_16>
{
Expand Down
2 changes: 1 addition & 1 deletion cub/cub/util_ptx.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -99,7 +99,7 @@ BFE(UnsignedBits source, unsigned int bit_start, unsigned int num_bits, Int2Type
return (source >> bit_start) & MASK;
}

# if CUB_IS_INT128_ENABLED
# if _CCCL_HAS_INT128()
/**
* Bitfield-extract for 128-bit types.
*/
Expand Down
13 changes: 2 additions & 11 deletions cub/cub/util_type.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -76,17 +76,8 @@ _CCCL_DIAG_POP
CUB_NAMESPACE_BEGIN

#ifndef CUB_IS_INT128_ENABLED
# if defined(__CUDACC_RTC__)
# if defined(__CUDACC_RTC_INT128__)
# define CUB_IS_INT128_ENABLED 1
# endif // !defined(__CUDACC_RTC_INT128__)
# else // !defined(__CUDACC_RTC__)
# if _CCCL_CUDACC_AT_LEAST(11, 5)
# if _CCCL_COMPILER(GCC) || _CCCL_COMPILER(CLANG) || _CCCL_COMPILER(NVHPC)
# define CUB_IS_INT128_ENABLED 1
# endif // GCC || CLANG || NVHPC
# endif // _CCCL_CUDACC_AT_LEAST(11, 5)
# endif // !defined(__CUDACC_RTC__)
// Deprecated [Since 2.8]
# define CUB_IS_INT128_ENABLED _CCCL_HAS_INT128()
#endif // !defined(CUB_IS_INT128_ENABLED)

/******************************************************************************
Expand Down
4 changes: 2 additions & 2 deletions cub/test/catch2_test_device_for_each_in_extents.cu
Original file line number Diff line number Diff line change
Expand Up @@ -107,7 +107,7 @@ using index_types =
uint16_t,
int32_t,
uint32_t
# if CUB_IS_INT128_ENABLED
# if _CCCL_HAS_INT128()
,
int64_t,
uint64_t
Expand All @@ -120,7 +120,7 @@ using index_types_dynamic =
uint16_t,
int32_t,
uint32_t
# if CUB_IS_INT128_ENABLED
# if _CCCL_HAS_INT128()
,
int64_t,
uint64_t
Expand Down
2 changes: 1 addition & 1 deletion cub/test/catch2_test_printing.cu
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ std::string print(T val)
return ss.str();
}

#if CUB_IS_INT128_ENABLED
#if _CCCL_HAS_INT128()
TEST_CASE("Test utils can print __int128", "[test][utils]")
{
REQUIRE(print(__int128_t{0}) == "0");
Expand Down
2 changes: 1 addition & 1 deletion cub/test/internal/catch2_test_fast_div_mod.cu
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ using index_types =
uint16_t,
int32_t,
uint32_t
# if CUB_IS_INT128_ENABLED
# if _CCCL_HAS_INT128()
,
int64_t,
uint64_t
Expand Down
2 changes: 1 addition & 1 deletion cub/test/test_util.h
Original file line number Diff line number Diff line change
Expand Up @@ -717,7 +717,7 @@ std::ostream& operator<<(std::ostream& os, const CUB_NS_QUALIFIER::KeyValuePair<
return os;
}

#if CUB_IS_INT128_ENABLED
#if _CCCL_HAS_INT128()
inline std::ostream& operator<<(std::ostream& os, __uint128_t val)
{
constexpr int max_digits = 40;
Expand Down
Loading