Skip to content

Commit

Permalink
Merge branch 'branch/2.8.x' into backport_depr_iterator
Browse files Browse the repository at this point in the history
  • Loading branch information
miscco authored Jan 31, 2025
2 parents d89337e + 00438ad commit 4a829c6
Show file tree
Hide file tree
Showing 15 changed files with 65 additions and 32 deletions.
28 changes: 16 additions & 12 deletions cub/cub/device/device_radix_sort.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -151,18 +151,22 @@ private:
int end_bit,
cudaStream_t stream)
{
return DispatchRadixSort<IsDescending, KeyT, ValueT, OffsetT, DeviceRadixSortPolicy<KeyT, ValueT, OffsetT>, DecomposerT>::
Dispatch(
d_temp_storage,
temp_storage_bytes,
d_keys,
d_values,
static_cast<OffsetT>(num_items),
begin_bit,
end_bit,
is_overwrite_okay,
stream,
decomposer);
return DispatchRadixSort<
IsDescending,
KeyT,
ValueT,
OffsetT,
detail::radix::policy_hub<KeyT, ValueT, OffsetT>,
DecomposerT>::Dispatch(d_temp_storage,
temp_storage_bytes,
d_keys,
d_values,
static_cast<OffsetT>(num_items),
begin_bit,
end_bit,
is_overwrite_okay,
stream,
decomposer);
}

template <bool IsDescending, typename KeyT, typename ValueT, typename NumItemsT, typename DecomposerT>
Expand Down
2 changes: 1 addition & 1 deletion cub/cub/device/dispatch/dispatch_reduce.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -590,7 +590,7 @@ struct DispatchReduce
template <typename ActivePolicyT>
CUB_RUNTIME_FUNCTION _CCCL_FORCEINLINE cudaError_t Invoke(ActivePolicyT active_policy = {})
{
auto wrapped_policy = MakeReducePolicyWrapper(active_policy);
auto wrapped_policy = detail::reduce::MakeReducePolicyWrapper(active_policy);
if (num_items <= static_cast<OffsetT>(
wrapped_policy.SingleTile().BlockThreads() * wrapped_policy.SingleTile().ItemsPerThread()))
{
Expand Down
2 changes: 1 addition & 1 deletion cub/cub/device/dispatch/dispatch_streaming_reduce.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -188,7 +188,7 @@ template <typename InputIteratorT,
typename ReductionOpT,
typename InitT,
typename PolicyChainT =
DeviceReducePolicy<KeyValuePair<PerPartitionOffsetT, InitT>, PerPartitionOffsetT, ReductionOpT>>
detail::reduce::policy_hub<KeyValuePair<PerPartitionOffsetT, InitT>, PerPartitionOffsetT, ReductionOpT>>
struct dispatch_streaming_arg_reduce_t
{
# if _CCCL_COMPILER(NVHPC)
Expand Down
5 changes: 3 additions & 2 deletions cub/cub/device/dispatch/tuning/tuning_adjacent_difference.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -77,8 +77,9 @@ struct policy_hub
} // namespace adjacent_difference
} // namespace detail

// TODO(bgruber): deprecate this alias. Users should not access policy_hubs directly.
template <typename InputIteratorT, bool MayAlias = true>
using DeviceAdjacentDifferencePolicy = detail::adjacent_difference::policy_hub<InputIteratorT, MayAlias>;
using DeviceAdjacentDifferencePolicy CCCL_DEPRECATED_BECAUSE(
"This class is considered an implementation detail and it "
"will be removed.") = detail::adjacent_difference::policy_hub<InputIteratorT, MayAlias>;

CUB_NAMESPACE_END
3 changes: 2 additions & 1 deletion cub/cub/device/dispatch/tuning/tuning_merge_sort.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,7 @@ struct policy_hub
} // namespace detail

template <typename KeyIteratorT>
using DeviceMergeSortPolicy = detail::merge_sort::policy_hub<KeyIteratorT>;
using DeviceMergeSortPolicy CCCL_DEPRECATED_BECAUSE("This class is considered an implementation detail and it will be "
"removed.") = detail::merge_sort::policy_hub<KeyIteratorT>;

CUB_NAMESPACE_END
4 changes: 2 additions & 2 deletions cub/cub/device/dispatch/tuning/tuning_radix_sort.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -887,7 +887,6 @@ struct policy_hub
} // namespace radix
} // namespace detail

// TODO(bgruber): deprecate this alias. Users should not access policy_hubs directly.
/**
* @brief Tuning policy for kernel specialization
*
Expand All @@ -901,6 +900,7 @@ struct policy_hub
* Signed integer type for global offsets
*/
template <typename KeyT, typename ValueT, typename OffsetT>
using DeviceRadixSortPolicy = detail::radix::policy_hub<KeyT, ValueT, OffsetT>;
using DeviceRadixSortPolicy CCCL_DEPRECATED_BECAUSE("This class is considered an implementation detail and it will be "
"removed.") = detail::radix::policy_hub<KeyT, ValueT, OffsetT>;

CUB_NAMESPACE_END
19 changes: 14 additions & 5 deletions cub/cub/device/dispatch/tuning/tuning_reduce.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -141,7 +141,6 @@ struct policy_hub
} // namespace reduce
} // namespace detail

// TODO(bgruber): deprecate at some point when we have a better API for users to supply tunings
/// @tparam AccumT
/// Accumulator data type
///
Expand All @@ -152,10 +151,20 @@ struct policy_hub
/// Binary reduction functor type having member
/// `auto operator()(const T &a, const U &b)`
template <typename AccumT, typename OffsetT, typename ReductionOpT>
using DeviceReducePolicy = detail::reduce::policy_hub<AccumT, OffsetT, ReductionOpT>;
using DeviceReducePolicy CCCL_DEPRECATED_BECAUSE(
"This class is considered an implementation detail and it will be "
"removed.") = detail::reduce::policy_hub<AccumT, OffsetT, ReductionOpT>;

// TODO(bgruber): deprecate those
using detail::reduce::MakeReducePolicyWrapper;
using detail::reduce::ReducePolicyWrapper;
template <typename PolicyT, typename Enable = void>
using ReducePolicyWrapper CCCL_DEPRECATED_BECAUSE("This class is considered an implementation detail and it will be "
"removed.") = detail::reduce::ReducePolicyWrapper<PolicyT, Enable>;

template <typename PolicyT>
CCCL_DEPRECATED_BECAUSE("This function is considered an implementation detail and it will "
"be removed.")
CUB_RUNTIME_FUNCTION detail::reduce::ReducePolicyWrapper<PolicyT> MakeReducePolicyWrapper(PolicyT policy)
{
return detail::reduce::ReducePolicyWrapper<PolicyT>{policy};
}

CUB_NAMESPACE_END
4 changes: 2 additions & 2 deletions cub/cub/device/dispatch/tuning/tuning_scan.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -296,8 +296,8 @@ struct policy_hub
} // namespace scan
} // namespace detail

// TODO(bgruber): deprecate this at some point when we have a better way to allow users to supply tunings
template <typename AccumT, typename ScanOpT = ::cuda::std::plus<>>
using DeviceScanPolicy = detail::scan::policy_hub<AccumT, ScanOpT>;
using DeviceScanPolicy CCCL_DEPRECATED_BECAUSE("This class is considered an implementation detail and it will be "
"removed.") = detail::scan::policy_hub<AccumT, ScanOpT>;

CUB_NAMESPACE_END
5 changes: 3 additions & 2 deletions cub/cub/device/dispatch/tuning/tuning_scan_by_key.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -790,8 +790,9 @@ struct policy_hub
} // namespace scan_by_key
} // namespace detail

// TODO(bgruber): deprecate this at some point in the future when we have a better API for users to supply policies
template <typename KeysInputIteratorT, typename AccumT, typename ValueT = AccumT, typename ScanOpT = ::cuda::std::plus<>>
using DeviceScanByKeyPolicy = detail::scan_by_key::policy_hub<KeysInputIteratorT, AccumT, ValueT, ScanOpT>;
using DeviceScanByKeyPolicy CCCL_DEPRECATED_BECAUSE(
"This class is considered an implementation detail and it will be "
"removed.") = detail::scan_by_key::policy_hub<KeysInputIteratorT, AccumT, ValueT, ScanOpT>;

CUB_NAMESPACE_END
5 changes: 3 additions & 2 deletions cub/cub/device/dispatch/tuning/tuning_segmented_sort.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -272,8 +272,9 @@ struct policy_hub
} // namespace segmented_sort
} // namespace detail

// TODO(bgruber): Deprecate this at some point when we have a better API for users to provide tunings
template <typename KeyT, typename ValueT>
using DeviceSegmentedSortPolicy = detail::segmented_sort::policy_hub<KeyT, ValueT>;
using DeviceSegmentedSortPolicy CCCL_DEPRECATED_BECAUSE(
"This class is considered an implementation detail and it will "
"be removed.") = detail::segmented_sort::policy_hub<KeyT, ValueT>;

CUB_NAMESPACE_END
4 changes: 2 additions & 2 deletions cub/cub/device/dispatch/tuning/tuning_unique_by_key.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -581,9 +581,9 @@ struct policy_hub
} // namespace unique_by_key
} // namespace detail

// TODO(bgruber): deprecate at some point when we have an API to pass tuning policies
template <typename KeyInputIteratorT, typename ValueInputIteratorT = unsigned long long int*>
using DeviceUniqueByKeyPolicy =
using DeviceUniqueByKeyPolicy CCCL_DEPRECATED_BECAUSE("This class is considered an implementation detail and it will "
"be removed.") =
detail::unique_by_key::policy_hub<detail::value_t<KeyInputIteratorT>, detail::value_t<ValueInputIteratorT>>;

CUB_NAMESPACE_END
4 changes: 4 additions & 0 deletions libcudacxx/include/cuda/__barrier/barrier_native_handle.h
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,8 @@
#include <cuda/__fwd/barrier.h>
#include <cuda/std/cstdint>

#if _CCCL_HAS_CUDA_COMPILER

_LIBCUDACXX_BEGIN_NAMESPACE_CUDA_DEVICE

_CCCL_DEVICE inline _CUDA_VSTD::uint64_t* barrier_native_handle(barrier<thread_scope_block>& __b)
Expand All @@ -34,4 +36,6 @@ _CCCL_DEVICE inline _CUDA_VSTD::uint64_t* barrier_native_handle(barrier<thread_s

_LIBCUDACXX_END_NAMESPACE_CUDA_DEVICE

#endif // _CCCL_HAS_CUDA_COMPILER

#endif // _CUDA___BARRIER_BARRIER_NATIVE_HANDLE_H
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
#endif // no system header

#include <cuda/__barrier/barrier_block_scope.h>
#include <cuda/__barrier/barrier_native_handle.h>
#include <cuda/std/__atomic/scopes.h>
#include <cuda/std/__barrier/barrier.h>
#include <cuda/std/__barrier/empty_completion.h>
Expand Down
1 change: 1 addition & 0 deletions libcudacxx/include/cuda/barrier
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@
#include <cuda/__barrier/barrier_arrive_tx.h>
#include <cuda/__barrier/barrier_block_scope.h>
#include <cuda/__barrier/barrier_expect_tx.h>
#include <cuda/__barrier/barrier_native_handle.h>
#include <cuda/__barrier/barrier_thread_scope.h>
#include <cuda/__memcpy_async/memcpy_async.h>
#include <cuda/__memcpy_async/memcpy_async_tx.h>
Expand Down
10 changes: 10 additions & 0 deletions thrust/thrust/system/cuda/detail/core/util.h
Original file line number Diff line number Diff line change
Expand Up @@ -60,24 +60,33 @@ namespace core

#ifdef _NVHPC_CUDA
# if (__NVCOMPILER_CUDA_ARCH__ >= 600)
// deprecated [since 2.8]
# define THRUST_TUNING_ARCH sm60
# elif (__NVCOMPILER_CUDA_ARCH__ >= 520)
// deprecated [since 2.8]
# define THRUST_TUNING_ARCH sm52
# elif (__NVCOMPILER_CUDA_ARCH__ >= 350)
// deprecated [since 2.8]
# define THRUST_TUNING_ARCH sm35
# else
// deprecated [since 2.8]
# define THRUST_TUNING_ARCH sm30
# endif
#else
# if (__CUDA_ARCH__ >= 600)
// deprecated [since 2.8]
# define THRUST_TUNING_ARCH sm60
# elif (__CUDA_ARCH__ >= 520)
// deprecated [since 2.8]
# define THRUST_TUNING_ARCH sm52
# elif (__CUDA_ARCH__ >= 350)
// deprecated [since 2.8]
# define THRUST_TUNING_ARCH sm35
# elif (__CUDA_ARCH__ >= 300)
// deprecated [since 2.8]
# define THRUST_TUNING_ARCH sm30
# elif !defined(__CUDA_ARCH__)
// deprecated [since 2.8]
# define THRUST_TUNING_ARCH sm30
# endif
#endif
Expand Down Expand Up @@ -684,6 +693,7 @@ inline void _CCCL_DEVICE sync_threadblock()
__syncthreads();
}

// Deprecated [Since 2.8]
#define CUDA_CUB_RET_IF_FAIL(e) \
{ \
auto const error = (e); \
Expand Down

0 comments on commit 4a829c6

Please sign in to comment.