From 0659159cd751bcca8d2214b419b0519143b820b5 Mon Sep 17 00:00:00 2001 From: Bernhard Manfred Gruber Date: Fri, 31 Jan 2025 02:47:46 +0100 Subject: [PATCH 1/3] Deprecate macros from cuda/detail/core/util.h (#3504) (#3520) --- thrust/thrust/system/cuda/detail/core/util.h | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/thrust/thrust/system/cuda/detail/core/util.h b/thrust/thrust/system/cuda/detail/core/util.h index 2105489fc43..5ab1d2f12ac 100644 --- a/thrust/thrust/system/cuda/detail/core/util.h +++ b/thrust/thrust/system/cuda/detail/core/util.h @@ -60,24 +60,33 @@ namespace core #ifdef _NVHPC_CUDA # if (__NVCOMPILER_CUDA_ARCH__ >= 600) +// deprecated [since 2.8] # define THRUST_TUNING_ARCH sm60 # elif (__NVCOMPILER_CUDA_ARCH__ >= 520) +// deprecated [since 2.8] # define THRUST_TUNING_ARCH sm52 # elif (__NVCOMPILER_CUDA_ARCH__ >= 350) +// deprecated [since 2.8] # define THRUST_TUNING_ARCH sm35 # else +// deprecated [since 2.8] # define THRUST_TUNING_ARCH sm30 # endif #else # if (__CUDA_ARCH__ >= 600) +// deprecated [since 2.8] # define THRUST_TUNING_ARCH sm60 # elif (__CUDA_ARCH__ >= 520) +// deprecated [since 2.8] # define THRUST_TUNING_ARCH sm52 # elif (__CUDA_ARCH__ >= 350) +// deprecated [since 2.8] # define THRUST_TUNING_ARCH sm35 # elif (__CUDA_ARCH__ >= 300) +// deprecated [since 2.8] # define THRUST_TUNING_ARCH sm30 # elif !defined(__CUDA_ARCH__) +// deprecated [since 2.8] # define THRUST_TUNING_ARCH sm30 # endif #endif @@ -684,6 +693,7 @@ inline void _CCCL_DEVICE sync_threadblock() __syncthreads(); } +// Deprecated [Since 2.8] #define CUDA_CUB_RET_IF_FAIL(e) \ { \ auto const error = (e); \ From 193cf146a46bf6570c2e4cec1e29e3f7efc88f89 Mon Sep 17 00:00:00 2001 From: Michael Schellenberger Costa Date: Fri, 31 Jan 2025 04:37:26 +0100 Subject: [PATCH 2/3] Try to always include the definition of barrier_native_handle when needed (#3556) (#3569) --- libcudacxx/include/cuda/__barrier/barrier_native_handle.h | 4 ++++ .../include/cuda/__memcpy_async/try_get_barrier_handle.h | 1 + libcudacxx/include/cuda/barrier | 1 + 3 files changed, 6 insertions(+) diff --git a/libcudacxx/include/cuda/__barrier/barrier_native_handle.h b/libcudacxx/include/cuda/__barrier/barrier_native_handle.h index 29879c71edf..a685c832723 100644 --- a/libcudacxx/include/cuda/__barrier/barrier_native_handle.h +++ b/libcudacxx/include/cuda/__barrier/barrier_native_handle.h @@ -25,6 +25,8 @@ #include #include +#if _CCCL_HAS_CUDA_COMPILER + _LIBCUDACXX_BEGIN_NAMESPACE_CUDA_DEVICE _CCCL_DEVICE inline _CUDA_VSTD::uint64_t* barrier_native_handle(barrier& __b) @@ -34,4 +36,6 @@ _CCCL_DEVICE inline _CUDA_VSTD::uint64_t* barrier_native_handle(barrier +#include #include #include #include diff --git a/libcudacxx/include/cuda/barrier b/libcudacxx/include/cuda/barrier index 0d65d4bf344..bcc2999cb31 100644 --- a/libcudacxx/include/cuda/barrier +++ b/libcudacxx/include/cuda/barrier @@ -34,6 +34,7 @@ #include #include #include +#include #include #include #include From 00438adf91ca88b694fe8b98b87848beacd5f6e9 Mon Sep 17 00:00:00 2001 From: Elias Stehle <3958403+elstehle@users.noreply.github.com> Date: Fri, 31 Jan 2025 06:59:24 +0100 Subject: [PATCH 3/3] deprecates policy hubs (#3514) (#3531) --- cub/cub/device/device_radix_sort.cuh | 28 +++++++++++-------- cub/cub/device/dispatch/dispatch_reduce.cuh | 2 +- .../dispatch/dispatch_streaming_reduce.cuh | 2 +- .../tuning/tuning_adjacent_difference.cuh | 5 ++-- .../dispatch/tuning/tuning_merge_sort.cuh | 3 +- .../dispatch/tuning/tuning_radix_sort.cuh | 4 +-- .../device/dispatch/tuning/tuning_reduce.cuh | 19 +++++++++---- .../device/dispatch/tuning/tuning_scan.cuh | 4 +-- .../dispatch/tuning/tuning_scan_by_key.cuh | 5 ++-- .../dispatch/tuning/tuning_segmented_sort.cuh | 5 ++-- .../dispatch/tuning/tuning_unique_by_key.cuh | 4 +-- 11 files changed, 49 insertions(+), 32 deletions(-) diff --git a/cub/cub/device/device_radix_sort.cuh b/cub/cub/device/device_radix_sort.cuh index 32156b75e34..9a099ee3cb9 100644 --- a/cub/cub/device/device_radix_sort.cuh +++ b/cub/cub/device/device_radix_sort.cuh @@ -151,18 +151,22 @@ private: int end_bit, cudaStream_t stream) { - return DispatchRadixSort, DecomposerT>:: - Dispatch( - d_temp_storage, - temp_storage_bytes, - d_keys, - d_values, - static_cast(num_items), - begin_bit, - end_bit, - is_overwrite_okay, - stream, - decomposer); + return DispatchRadixSort< + IsDescending, + KeyT, + ValueT, + OffsetT, + detail::radix::policy_hub, + DecomposerT>::Dispatch(d_temp_storage, + temp_storage_bytes, + d_keys, + d_values, + static_cast(num_items), + begin_bit, + end_bit, + is_overwrite_okay, + stream, + decomposer); } template diff --git a/cub/cub/device/dispatch/dispatch_reduce.cuh b/cub/cub/device/dispatch/dispatch_reduce.cuh index 8aaa5728d1c..d3e20785e57 100644 --- a/cub/cub/device/dispatch/dispatch_reduce.cuh +++ b/cub/cub/device/dispatch/dispatch_reduce.cuh @@ -590,7 +590,7 @@ struct DispatchReduce template CUB_RUNTIME_FUNCTION _CCCL_FORCEINLINE cudaError_t Invoke(ActivePolicyT active_policy = {}) { - auto wrapped_policy = MakeReducePolicyWrapper(active_policy); + auto wrapped_policy = detail::reduce::MakeReducePolicyWrapper(active_policy); if (num_items <= static_cast( wrapped_policy.SingleTile().BlockThreads() * wrapped_policy.SingleTile().ItemsPerThread())) { diff --git a/cub/cub/device/dispatch/dispatch_streaming_reduce.cuh b/cub/cub/device/dispatch/dispatch_streaming_reduce.cuh index d4af506a6d9..5d6729f67dd 100644 --- a/cub/cub/device/dispatch/dispatch_streaming_reduce.cuh +++ b/cub/cub/device/dispatch/dispatch_streaming_reduce.cuh @@ -189,7 +189,7 @@ template , PerPartitionOffsetT, ReductionOpT>> + detail::reduce::policy_hub, PerPartitionOffsetT, ReductionOpT>> struct dispatch_streaming_arg_reduce_t { // Internal dispatch routine for computing a device-wide argument extremum, like `ArgMin` and `ArgMax` diff --git a/cub/cub/device/dispatch/tuning/tuning_adjacent_difference.cuh b/cub/cub/device/dispatch/tuning/tuning_adjacent_difference.cuh index b4bae244371..20717e1c68a 100644 --- a/cub/cub/device/dispatch/tuning/tuning_adjacent_difference.cuh +++ b/cub/cub/device/dispatch/tuning/tuning_adjacent_difference.cuh @@ -77,8 +77,9 @@ struct policy_hub } // namespace adjacent_difference } // namespace detail -// TODO(bgruber): deprecate this alias. Users should not access policy_hubs directly. template -using DeviceAdjacentDifferencePolicy = detail::adjacent_difference::policy_hub; +using DeviceAdjacentDifferencePolicy CCCL_DEPRECATED_BECAUSE( + "This class is considered an implementation detail and it " + "will be removed.") = detail::adjacent_difference::policy_hub; CUB_NAMESPACE_END diff --git a/cub/cub/device/dispatch/tuning/tuning_merge_sort.cuh b/cub/cub/device/dispatch/tuning/tuning_merge_sort.cuh index e2154d5e337..94d54b08509 100644 --- a/cub/cub/device/dispatch/tuning/tuning_merge_sort.cuh +++ b/cub/cub/device/dispatch/tuning/tuning_merge_sort.cuh @@ -92,6 +92,7 @@ struct policy_hub } // namespace detail template -using DeviceMergeSortPolicy = detail::merge_sort::policy_hub; +using DeviceMergeSortPolicy CCCL_DEPRECATED_BECAUSE("This class is considered an implementation detail and it will be " + "removed.") = detail::merge_sort::policy_hub; CUB_NAMESPACE_END diff --git a/cub/cub/device/dispatch/tuning/tuning_radix_sort.cuh b/cub/cub/device/dispatch/tuning/tuning_radix_sort.cuh index 4c1c2f53042..99b8dbda413 100644 --- a/cub/cub/device/dispatch/tuning/tuning_radix_sort.cuh +++ b/cub/cub/device/dispatch/tuning/tuning_radix_sort.cuh @@ -887,7 +887,6 @@ struct policy_hub } // namespace radix } // namespace detail -// TODO(bgruber): deprecate this alias. Users should not access policy_hubs directly. /** * @brief Tuning policy for kernel specialization * @@ -901,6 +900,7 @@ struct policy_hub * Signed integer type for global offsets */ template -using DeviceRadixSortPolicy = detail::radix::policy_hub; +using DeviceRadixSortPolicy CCCL_DEPRECATED_BECAUSE("This class is considered an implementation detail and it will be " + "removed.") = detail::radix::policy_hub; CUB_NAMESPACE_END diff --git a/cub/cub/device/dispatch/tuning/tuning_reduce.cuh b/cub/cub/device/dispatch/tuning/tuning_reduce.cuh index dc844efef3b..a87b6b9d6d6 100644 --- a/cub/cub/device/dispatch/tuning/tuning_reduce.cuh +++ b/cub/cub/device/dispatch/tuning/tuning_reduce.cuh @@ -141,7 +141,6 @@ struct policy_hub } // namespace reduce } // namespace detail -// TODO(bgruber): deprecate at some point when we have a better API for users to supply tunings /// @tparam AccumT /// Accumulator data type /// @@ -152,10 +151,20 @@ struct policy_hub /// Binary reduction functor type having member /// `auto operator()(const T &a, const U &b)` template -using DeviceReducePolicy = detail::reduce::policy_hub; +using DeviceReducePolicy CCCL_DEPRECATED_BECAUSE( + "This class is considered an implementation detail and it will be " + "removed.") = detail::reduce::policy_hub; -// TODO(bgruber): deprecate those -using detail::reduce::MakeReducePolicyWrapper; -using detail::reduce::ReducePolicyWrapper; +template +using ReducePolicyWrapper CCCL_DEPRECATED_BECAUSE("This class is considered an implementation detail and it will be " + "removed.") = detail::reduce::ReducePolicyWrapper; + +template +CCCL_DEPRECATED_BECAUSE("This function is considered an implementation detail and it will " + "be removed.") +CUB_RUNTIME_FUNCTION detail::reduce::ReducePolicyWrapper MakeReducePolicyWrapper(PolicyT policy) +{ + return detail::reduce::ReducePolicyWrapper{policy}; +} CUB_NAMESPACE_END diff --git a/cub/cub/device/dispatch/tuning/tuning_scan.cuh b/cub/cub/device/dispatch/tuning/tuning_scan.cuh index 2163c4b7431..1c76064da1d 100644 --- a/cub/cub/device/dispatch/tuning/tuning_scan.cuh +++ b/cub/cub/device/dispatch/tuning/tuning_scan.cuh @@ -296,8 +296,8 @@ struct policy_hub } // namespace scan } // namespace detail -// TODO(bgruber): deprecate this at some point when we have a better way to allow users to supply tunings template > -using DeviceScanPolicy = detail::scan::policy_hub; +using DeviceScanPolicy CCCL_DEPRECATED_BECAUSE("This class is considered an implementation detail and it will be " + "removed.") = detail::scan::policy_hub; CUB_NAMESPACE_END diff --git a/cub/cub/device/dispatch/tuning/tuning_scan_by_key.cuh b/cub/cub/device/dispatch/tuning/tuning_scan_by_key.cuh index b3eaa4e513c..cdd2468dc38 100644 --- a/cub/cub/device/dispatch/tuning/tuning_scan_by_key.cuh +++ b/cub/cub/device/dispatch/tuning/tuning_scan_by_key.cuh @@ -790,8 +790,9 @@ struct policy_hub } // namespace scan_by_key } // namespace detail -// TODO(bgruber): deprecate this at some point in the future when we have a better API for users to supply policies template > -using DeviceScanByKeyPolicy = detail::scan_by_key::policy_hub; +using DeviceScanByKeyPolicy CCCL_DEPRECATED_BECAUSE( + "This class is considered an implementation detail and it will be " + "removed.") = detail::scan_by_key::policy_hub; CUB_NAMESPACE_END diff --git a/cub/cub/device/dispatch/tuning/tuning_segmented_sort.cuh b/cub/cub/device/dispatch/tuning/tuning_segmented_sort.cuh index b1b72637f4b..fc442a4f982 100644 --- a/cub/cub/device/dispatch/tuning/tuning_segmented_sort.cuh +++ b/cub/cub/device/dispatch/tuning/tuning_segmented_sort.cuh @@ -272,8 +272,9 @@ struct policy_hub } // namespace segmented_sort } // namespace detail -// TODO(bgruber): Deprecate this at some point when we have a better API for users to provide tunings template -using DeviceSegmentedSortPolicy = detail::segmented_sort::policy_hub; +using DeviceSegmentedSortPolicy CCCL_DEPRECATED_BECAUSE( + "This class is considered an implementation detail and it will " + "be removed.") = detail::segmented_sort::policy_hub; CUB_NAMESPACE_END diff --git a/cub/cub/device/dispatch/tuning/tuning_unique_by_key.cuh b/cub/cub/device/dispatch/tuning/tuning_unique_by_key.cuh index b1c4c91ea23..f988d6fb29e 100644 --- a/cub/cub/device/dispatch/tuning/tuning_unique_by_key.cuh +++ b/cub/cub/device/dispatch/tuning/tuning_unique_by_key.cuh @@ -581,9 +581,9 @@ struct policy_hub } // namespace unique_by_key } // namespace detail -// TODO(bgruber): deprecate at some point when we have an API to pass tuning policies template -using DeviceUniqueByKeyPolicy = +using DeviceUniqueByKeyPolicy CCCL_DEPRECATED_BECAUSE("This class is considered an implementation detail and it will " + "be removed.") = detail::unique_by_key::policy_hub, detail::value_t>; CUB_NAMESPACE_END