Skip to content

Commit

Permalink
Backport to 2.8: Deprecate and replace CUB_IS_INT128_ENABLED (#3427) (
Browse files Browse the repository at this point in the history
#3629)

Co-authored-by: Federico Busato <[email protected]>
  • Loading branch information
bernhardmgruber and fbusato authored Jan 31, 2025
1 parent 264869a commit 6be3e78
Show file tree
Hide file tree
Showing 13 changed files with 42 additions and 51 deletions.
6 changes: 3 additions & 3 deletions cub/cub/detail/fast_modulo_division.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@
#endif // no system header

#include <cub/detail/type_traits.cuh> // implicit_prom_t
#include <cub/util_type.cuh> // CUB_IS_INT128_ENABLED
#include <cub/util_type.cuh> // _CCCL_HAS_INT128()

#include <cuda/cmath> // cuda::std::ceil_div
#include <cuda/std/bit> // std::has_single_bit
Expand Down Expand Up @@ -79,15 +79,15 @@ struct larger_unsigned_type<T, typename ::cuda::std::enable_if<(sizeof(T) == 4)>
using type = ::cuda::std::uint64_t;
};

#if CUB_IS_INT128_ENABLED
#if _CCCL_HAS_INT128()

template <typename T>
struct larger_unsigned_type<T, typename ::cuda::std::enable_if<(sizeof(T) == 8)>::type>
{
using type = __uint128_t;
};

#endif // CUB_IS_INT128_ENABLED
#endif // _CCCL_HAS_INT128()

template <typename T>
using larger_unsigned_type_t = typename larger_unsigned_type<T>::type;
Expand Down
12 changes: 6 additions & 6 deletions cub/cub/device/dispatch/dispatch_histogram.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -646,27 +646,27 @@ public:
using IntArithmeticT = ::cuda::std::_If< //
sizeof(SampleT) + sizeof(CommonT) <= sizeof(uint32_t), //
uint32_t, //
#if CUB_IS_INT128_ENABLED
#if _CCCL_HAS_INT128()
::cuda::std::_If< //
(::cuda::std::is_same<CommonT, __int128_t>::value || //
::cuda::std::is_same<CommonT, __uint128_t>::value), //
CommonT, //
uint64_t> //
#else // ^^^ CUB_IS_INT128_ENABLED ^^^ / vvv !CUB_IS_INT128_ENABLED vvv
#else // ^^^ _CCCL_HAS_INT128() ^^^ / vvv !_CCCL_HAS_INT128() vvv
uint64_t
#endif // !CUB_IS_INT128_ENABLED
#endif // !_CCCL_HAS_INT128()
>;

// Alias template that excludes __[u]int128 from the integral types
template <typename T>
using is_integral_excl_int128 =
#if CUB_IS_INT128_ENABLED
#if _CCCL_HAS_INT128()
::cuda::std::_If<::cuda::std::is_same<T, __int128_t>::value&& ::cuda::std::is_same<T, __uint128_t>::value,
::cuda::std::false_type,
::cuda::std::is_integral<T>>;
#else // ^^^ CUB_IS_INT128_ENABLED ^^^ / vvv !CUB_IS_INT128_ENABLED vvv
#else // ^^^ _CCCL_HAS_INT128() ^^^ / vvv !_CCCL_HAS_INT128() vvv
::cuda::std::is_integral<T>;
#endif // !CUB_IS_INT128_ENABLED
#endif // !_CCCL_HAS_INT128()

union ScaleT
{
Expand Down
8 changes: 4 additions & 4 deletions cub/cub/device/dispatch/tuning/tuning_run_length_encode.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -156,7 +156,7 @@ struct sm80_tuning<LengthT, KeyT, primitive_length::yes, primitive_key::yes, len
using delay_constructor = detail::no_delay_constructor_t<1075>;
};

#if CUB_IS_INT128_ENABLED
#if _CCCL_HAS_INT128()
template <class LengthT>
struct sm80_tuning<LengthT, __int128_t, primitive_length::yes, primitive_key::no, length_size::_4, key_size::_16>
{
Expand Down Expand Up @@ -216,7 +216,7 @@ struct sm90_tuning<LengthT, KeyT, primitive_length::yes, primitive_key::yes, len
using delay_constructor = detail::no_delay_constructor_t<515>;
};

#if CUB_IS_INT128_ENABLED
#if _CCCL_HAS_INT128()
template <class LengthT>
struct sm90_tuning<LengthT, __int128_t, primitive_length::yes, primitive_key::no, length_size::_4, key_size::_16>
{
Expand Down Expand Up @@ -349,7 +349,7 @@ struct sm80_tuning<LengthT, KeyT, primitive_length::yes, primitive_key::yes, len
using delay_constructor = detail::no_delay_constructor_t<1065>;
};

#if CUB_IS_INT128_ENABLED
#if _CCCL_HAS_INT128()
template <class LengthT>
struct sm80_tuning<LengthT, __int128_t, primitive_length::yes, primitive_key::no, length_size::_4, key_size::_16>
{
Expand Down Expand Up @@ -414,7 +414,7 @@ struct sm90_tuning<LengthT, KeyT, primitive_length::yes, primitive_key::yes, len
using delay_constructor = detail::no_delay_constructor_t<840>;
};

#if CUB_IS_INT128_ENABLED
#if _CCCL_HAS_INT128()
template <class LengthT>
struct sm90_tuning<LengthT, __int128_t, primitive_length::yes, primitive_key::no, length_size::_4, key_size::_16>
{
Expand Down
4 changes: 2 additions & 2 deletions cub/cub/device/dispatch/tuning/tuning_scan.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -186,7 +186,7 @@ struct sm80_tuning<double, primitive_op::yes, primitive_accum::yes, accum_size::
static constexpr BlockStoreAlgorithm store_algorithm = BLOCK_STORE_WARP_TRANSPOSE;
};

#if CUB_IS_INT128_ENABLED
#if _CCCL_HAS_INT128()
template <>
struct sm80_tuning<__int128_t, primitive_op::yes, primitive_accum::no, accum_size::_16>
{
Expand Down Expand Up @@ -218,7 +218,7 @@ template <class T> struct sm90_tuning<T, primitive_op::yes, primitive_accum::yes
template <> struct sm90_tuning<float, primitive_op::yes, primitive_accum::yes, accum_size::_4> : tuning<float, 128, 24, 688, 1140> {};
template <> struct sm90_tuning<double, primitive_op::yes, primitive_accum::yes, accum_size::_8> : tuning<double, 224, 24, 576, 1215> {};

#if CUB_IS_INT128_ENABLED
#if _CCCL_HAS_INT128()
template <> struct sm90_tuning<__int128_t, primitive_op::yes, primitive_accum::no, accum_size::_16> : tuning<__int128_t, 576, 21, 860, 630> {};
template <>
struct sm90_tuning<__uint128_t, primitive_op::yes, primitive_accum::no, accum_size::_16>
Expand Down
20 changes: 10 additions & 10 deletions cub/cub/device/dispatch/tuning/tuning_scan_by_key.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -172,7 +172,7 @@ struct sm80_tuning<KeyT, ValueT, primitive_op::yes, key_size::_1, val_size::_8,
using delay_constructor = fixed_delay_constructor_t<124, 1040>;
};

#if CUB_IS_INT128_ENABLED
#if _CCCL_HAS_INT128()
template <class KeyT>
struct sm80_tuning<KeyT, __int128_t, primitive_op::yes, key_size::_1, val_size::_16, primitive_accum::no>
{
Expand Down Expand Up @@ -229,7 +229,7 @@ struct sm80_tuning<KeyT, ValueT, primitive_op::yes, key_size::_2, val_size::_8,
using delay_constructor = fixed_delay_constructor_t<160, 695>;
};

#if CUB_IS_INT128_ENABLED
#if _CCCL_HAS_INT128()
template <class KeyT>
struct sm80_tuning<KeyT, __int128_t, primitive_op::yes, key_size::_2, val_size::_16, primitive_accum::no>
{
Expand Down Expand Up @@ -286,7 +286,7 @@ struct sm80_tuning<KeyT, ValueT, primitive_op::yes, key_size::_4, val_size::_8,
using delay_constructor = fixed_delay_constructor_t<888, 635>;
};

#if CUB_IS_INT128_ENABLED
#if _CCCL_HAS_INT128()
template <class KeyT>
struct sm80_tuning<KeyT, __int128_t, primitive_op::yes, key_size::_4, val_size::_16, primitive_accum::no>
{
Expand Down Expand Up @@ -343,7 +343,7 @@ struct sm80_tuning<KeyT, ValueT, primitive_op::yes, key_size::_8, val_size::_8,
using delay_constructor = no_delay_constructor_t<1160>;
};

#if CUB_IS_INT128_ENABLED
#if _CCCL_HAS_INT128()
template <class KeyT>
struct sm80_tuning<KeyT, __int128_t, primitive_op::yes, key_size::_8, val_size::_16, primitive_accum::no>
{
Expand Down Expand Up @@ -400,7 +400,7 @@ struct sm80_tuning<KeyT, ValueT, primitive_op::yes, key_size::_16, val_size::_8,
using delay_constructor = no_delay_constructor_t<1030>;
};

#if CUB_IS_INT128_ENABLED
#if _CCCL_HAS_INT128()
template <class KeyT>
struct sm80_tuning<KeyT, __int128_t, primitive_op::yes, key_size::_16, val_size::_16, primitive_accum::no>
{
Expand Down Expand Up @@ -465,7 +465,7 @@ struct sm90_tuning<KeyT, ValueT, primitive_op::yes, key_size::_1, val_size::_8,
using delay_constructor = fixed_delay_constructor_t<488, 1070>;
};

#if CUB_IS_INT128_ENABLED
#if _CCCL_HAS_INT128()
template <class KeyT>
struct sm90_tuning<KeyT, __int128_t, primitive_op::yes, key_size::_1, val_size::_16, primitive_accum::no>
{
Expand Down Expand Up @@ -522,7 +522,7 @@ struct sm90_tuning<KeyT, ValueT, primitive_op::yes, key_size::_2, val_size::_8,
using delay_constructor = fixed_delay_constructor_t<352, 1170>;
};

#if CUB_IS_INT128_ENABLED
#if _CCCL_HAS_INT128()
template <class KeyT>
struct sm90_tuning<KeyT, __int128_t, primitive_op::yes, key_size::_2, val_size::_16, primitive_accum::no>
{
Expand Down Expand Up @@ -579,7 +579,7 @@ struct sm90_tuning<KeyT, ValueT, primitive_op::yes, key_size::_4, val_size::_8,
using delay_constructor = fixed_delay_constructor_t<556, 1195>;
};

#if CUB_IS_INT128_ENABLED
#if _CCCL_HAS_INT128()
template <class KeyT>
struct sm90_tuning<KeyT, __int128_t, primitive_op::yes, key_size::_4, val_size::_16, primitive_accum::no>
{
Expand Down Expand Up @@ -636,7 +636,7 @@ struct sm90_tuning<KeyT, ValueT, primitive_op::yes, key_size::_8, val_size::_8,
using delay_constructor = fixed_delay_constructor_t<600, 930>;
};

#if CUB_IS_INT128_ENABLED
#if _CCCL_HAS_INT128()
template <class KeyT>
struct sm90_tuning<KeyT, __int128_t, primitive_op::yes, key_size::_8, val_size::_16, primitive_accum::no>
{
Expand Down Expand Up @@ -693,7 +693,7 @@ struct sm90_tuning<KeyT, ValueT, primitive_op::yes, key_size::_16, val_size::_8,
using delay_constructor = fixed_delay_constructor_t<320, 1200>;
};

#if CUB_IS_INT128_ENABLED
#if _CCCL_HAS_INT128()
template <class KeyT>
struct sm90_tuning<KeyT, __int128_t, primitive_op::yes, key_size::_16, val_size::_16, primitive_accum::no>
{
Expand Down
16 changes: 8 additions & 8 deletions cub/cub/device/dispatch/tuning/tuning_select_if.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -121,7 +121,7 @@ struct sm80_tuning<Input, flagged::no, keep_rejects::no, offset_size::_4, primit
using delay_constructor = detail::fixed_delay_constructor_t<832, 1165>;
};

#if CUB_IS_INT128_ENABLED
#if _CCCL_HAS_INT128()
template <>
struct sm80_tuning<__int128_t, flagged::no, keep_rejects::no, offset_size::_4, primitive::no, input_size::_16>
{
Expand Down Expand Up @@ -174,7 +174,7 @@ struct sm80_tuning<Input, flagged::yes, keep_rejects::no, offset_size::_4, primi
using delay_constructor = detail::no_delay_constructor_t<1130>;
};

#if CUB_IS_INT128_ENABLED
#if _CCCL_HAS_INT128()
template <>
struct sm80_tuning<__int128_t, flagged::yes, keep_rejects::no, offset_size::_4, primitive::no, input_size::_16>
{
Expand Down Expand Up @@ -227,7 +227,7 @@ struct sm80_tuning<Input, flagged::no, keep_rejects::yes, offset_size::_4, primi
using delay_constructor = detail::fixed_delay_constructor_t<68, 1160>;
};

#if CUB_IS_INT128_ENABLED
#if _CCCL_HAS_INT128()
template <>
struct sm80_tuning<__int128_t, flagged::no, keep_rejects::yes, offset_size::_4, primitive::no, input_size::_16>
{
Expand Down Expand Up @@ -280,7 +280,7 @@ struct sm80_tuning<Input, flagged::yes, keep_rejects::yes, offset_size::_4, prim
using delay_constructor = detail::fixed_delay_constructor_t<884, 1130>;
};

#if CUB_IS_INT128_ENABLED
#if _CCCL_HAS_INT128()
template <>
struct sm80_tuning<__int128_t, flagged::yes, keep_rejects::yes, offset_size::_4, primitive::no, input_size::_16>
{
Expand Down Expand Up @@ -336,7 +336,7 @@ struct sm90_tuning<Input, flagged::no, keep_rejects::no, offset_size::_4, primit
using delay_constructor = detail::fixed_delay_constructor_t<380, 1140>;
};

#if CUB_IS_INT128_ENABLED
#if _CCCL_HAS_INT128()
template <>
struct sm90_tuning<__int128_t, flagged::no, keep_rejects::no, offset_size::_4, primitive::no, input_size::_16>
{
Expand Down Expand Up @@ -389,7 +389,7 @@ struct sm90_tuning<Input, flagged::yes, keep_rejects::no, offset_size::_4, primi
using delay_constructor = detail::fixed_delay_constructor_t<360, 1170>;
};

#if CUB_IS_INT128_ENABLED
#if _CCCL_HAS_INT128()
template <>
struct sm90_tuning<__int128_t, flagged::yes, keep_rejects::no, offset_size::_4, primitive::no, input_size::_16>
{
Expand Down Expand Up @@ -442,7 +442,7 @@ struct sm90_tuning<Input, flagged::no, keep_rejects::yes, offset_size::_4, primi
using delay_constructor = detail::fixed_delay_constructor_t<512, 1075>;
};

#if CUB_IS_INT128_ENABLED
#if _CCCL_HAS_INT128()
template <>
struct sm90_tuning<__int128_t, flagged::no, keep_rejects::yes, offset_size::_4, primitive::no, input_size::_16>
{
Expand Down Expand Up @@ -495,7 +495,7 @@ struct sm90_tuning<Input, flagged::yes, keep_rejects::yes, offset_size::_4, prim
using delay_constructor = detail::fixed_delay_constructor_t<532, 1180>;
};

#if CUB_IS_INT128_ENABLED
#if _CCCL_HAS_INT128()
template <>
struct sm90_tuning<__int128_t, flagged::yes, keep_rejects::yes, offset_size::_4, primitive::no, input_size::_16>
{
Expand Down
2 changes: 1 addition & 1 deletion cub/cub/util_ptx.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -99,7 +99,7 @@ BFE(UnsignedBits source, unsigned int bit_start, unsigned int num_bits, Int2Type
return (source >> bit_start) & MASK;
}

# if CUB_IS_INT128_ENABLED
# if _CCCL_HAS_INT128()
/**
* Bitfield-extract for 128-bit types.
*/
Expand Down
13 changes: 2 additions & 11 deletions cub/cub/util_type.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -76,17 +76,8 @@ _CCCL_DIAG_POP
CUB_NAMESPACE_BEGIN

#ifndef CUB_IS_INT128_ENABLED
# if defined(__CUDACC_RTC__)
# if defined(__CUDACC_RTC_INT128__)
# define CUB_IS_INT128_ENABLED 1
# endif // !defined(__CUDACC_RTC_INT128__)
# else // !defined(__CUDACC_RTC__)
# if _CCCL_CUDACC_AT_LEAST(11, 5)
# if _CCCL_COMPILER(GCC) || _CCCL_COMPILER(CLANG) || _CCCL_COMPILER(ICC) || _CCCL_COMPILER(NVHPC)
# define CUB_IS_INT128_ENABLED 1
# endif // GCC || CLANG || ICC || NVHPC
# endif // _CCCL_CUDACC_AT_LEAST(11, 5)
# endif // !defined(__CUDACC_RTC__)
// Deprecated [Since 2.8]
# define CUB_IS_INT128_ENABLED _CCCL_HAS_INT128()
#endif // !defined(CUB_IS_INT128_ENABLED)

/******************************************************************************
Expand Down
4 changes: 2 additions & 2 deletions cub/test/catch2_test_device_for_each_in_extents.cu
Original file line number Diff line number Diff line change
Expand Up @@ -107,7 +107,7 @@ using index_types =
uint16_t,
int32_t,
uint32_t
# if CUB_IS_INT128_ENABLED
# if _CCCL_HAS_INT128()
,
int64_t,
uint64_t
Expand All @@ -120,7 +120,7 @@ using index_types_dynamic =
uint16_t,
int32_t,
uint32_t
# if CUB_IS_INT128_ENABLED
# if _CCCL_HAS_INT128()
,
int64_t,
uint64_t
Expand Down
2 changes: 1 addition & 1 deletion cub/test/catch2_test_printing.cu
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ std::string print(T val)
return ss.str();
}

#if CUB_IS_INT128_ENABLED
#if _CCCL_HAS_INT128()
TEST_CASE("Test utils can print __int128", "[test][utils]")
{
REQUIRE(print(__int128_t{0}) == "0");
Expand Down
2 changes: 1 addition & 1 deletion cub/test/internal/catch2_test_fast_div_mod.cu
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ using index_types =
uint16_t,
int32_t,
uint32_t
# if CUB_IS_INT128_ENABLED
# if _CCCL_HAS_INT128()
,
int64_t,
uint64_t
Expand Down
2 changes: 1 addition & 1 deletion cub/test/test_util.h
Original file line number Diff line number Diff line change
Expand Up @@ -716,7 +716,7 @@ std::ostream& operator<<(std::ostream& os, const CUB_NS_QUALIFIER::KeyValuePair<
return os;
}

#if CUB_IS_INT128_ENABLED
#if _CCCL_HAS_INT128()
inline std::ostream& operator<<(std::ostream& os, __uint128_t val)
{
constexpr int max_digits = 40;
Expand Down
2 changes: 1 addition & 1 deletion libcudacxx/include/cuda/std/__cccl/extended_data_types.h
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@
#if !defined(_CCCL_DISABLE_INT128)
# if _CCCL_COMPILER(NVRTC) && defined(__CUDACC_RTC_INT128__) && _CCCL_OS(LINUX)
# define _CCCL_HAS_INT128() 1
# elif defined(__SIZEOF_INT128__) && _CCCL_OS(LINUX)
# elif defined(__SIZEOF_INT128__) && _CCCL_OS(LINUX) && _CCCL_CUDACC_AT_LEAST(11, 5)
# define _CCCL_HAS_INT128() 1
# else
# define _CCCL_HAS_INT128() 0
Expand Down

0 comments on commit 6be3e78

Please sign in to comment.