Skip to content

Commit

Permalink
Merge branch 'main' into round-up-down
Browse files Browse the repository at this point in the history
  • Loading branch information
fbusato authored Jan 14, 2025
2 parents 8fdb01d + 08420d4 commit 893a413
Show file tree
Hide file tree
Showing 130 changed files with 1,924 additions and 2,576 deletions.
6 changes: 2 additions & 4 deletions cub/cub/agent/agent_scan.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -52,8 +52,6 @@

#include <cuda/std/type_traits>

#include <iterator>

CUB_NAMESPACE_BEGIN

/******************************************************************************
Expand Down Expand Up @@ -162,15 +160,15 @@ struct AgentScan
// Wrap the native input pointer with CacheModifiedInputIterator
// or directly use the supplied input iterator type
using WrappedInputIteratorT =
::cuda::std::_If<std::is_pointer<InputIteratorT>::value,
::cuda::std::_If<::cuda::std::is_pointer<InputIteratorT>::value,
CacheModifiedInputIterator<AgentScanPolicyT::LOAD_MODIFIER, InputT, OffsetT>,
InputIteratorT>;

// Constants
enum
{
// Inclusive scan if no init_value type is provided
HAS_INIT = !std::is_same<InitValueT, NullType>::value,
HAS_INIT = !::cuda::std::is_same<InitValueT, NullType>::value,
IS_INCLUSIVE = ForceInclusive || !HAS_INIT, // We are relying on either initial value not being `NullType`
// or the ForceInclusive tag to be true for inclusive scan
// to get picked up.
Expand Down
15 changes: 12 additions & 3 deletions cub/cub/agent/agent_spmv_orig.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -102,7 +102,7 @@ template <int _BLOCK_THREADS,
CacheLoadModifier _VECTOR_VALUES_LOAD_MODIFIER,
bool _DIRECT_LOAD_NONZEROS,
BlockScanAlgorithm _SCAN_ALGORITHM>
struct AgentSpmvPolicy
struct CCCL_DEPRECATED_BECAUSE("Use the cuSPARSE library instead") AgentSpmvPolicy
{
enum
{
Expand Down Expand Up @@ -148,7 +148,12 @@ struct AgentSpmvPolicy
* Signed integer type for sequence offsets
*/
template <typename ValueT, typename OffsetT>
struct SpmvParams
struct
// with NVHPC, we get a deprecation warning in the implementation of cudaLaunchKernelEx, which we cannot suppress :/
#if !_CCCL_COMPILER(NVHPC)
CCCL_DEPRECATED_BECAUSE("Use the cuSPARSE library instead")
#endif
SpmvParams
{
/// Pointer to the array of \p num_nonzeros values of the corresponding nonzero elements of matrix
/// <b>A</b>.
Expand Down Expand Up @@ -211,7 +216,7 @@ template <typename AgentSpmvPolicyT,
bool HAS_ALPHA,
bool HAS_BETA,
int LEGACY_PTX_ARCH = 0>
struct AgentSpmv
struct CCCL_DEPRECATED_BECAUSE("Use the cuSPARSE library instead") AgentSpmv
{
//---------------------------------------------------------------------
// Types and constants
Expand Down Expand Up @@ -308,7 +313,9 @@ struct AgentSpmv
/// Reference to temp_storage
_TempStorage& temp_storage;

_CCCL_SUPPRESS_DEPRECATED_PUSH
SpmvParams<ValueT, OffsetT>& spmv_params;
_CCCL_SUPPRESS_DEPRECATED_POP

/// Wrapped pointer to the array of \p num_nonzeros values of the corresponding nonzero elements
/// of matrix <b>A</b>.
Expand Down Expand Up @@ -341,6 +348,7 @@ struct AgentSpmv
* @param spmv_params
* SpMV input parameter bundle
*/
_CCCL_SUPPRESS_DEPRECATED_PUSH
_CCCL_DEVICE _CCCL_FORCEINLINE AgentSpmv(TempStorage& temp_storage, SpmvParams<ValueT, OffsetT>& spmv_params)
: temp_storage(temp_storage.Alias())
, spmv_params(spmv_params)
Expand All @@ -350,6 +358,7 @@ struct AgentSpmv
, wd_vector_x(spmv_params.d_vector_x)
, wd_vector_y(spmv_params.d_vector_y)
{}
_CCCL_SUPPRESS_DEPRECATED_POP

/**
* @brief Consume a merge tile, specialized for direct-load of nonzeros
Expand Down
2 changes: 0 additions & 2 deletions cub/cub/agent/single_pass_scan_operators.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -53,8 +53,6 @@

#include <cuda/std/type_traits>

#include <iterator>

#include <nv/target>

CUB_NAMESPACE_BEGIN
Expand Down
9 changes: 3 additions & 6 deletions cub/cub/detail/detect_cuda_runtime.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -85,12 +85,9 @@
# endif // CUB_RUNTIME_FUNCTION predefined

# ifdef CUB_RDC_ENABLED
// Detect available version of CDP:
# if __CUDACC_VER_MAJOR__ < 12 || defined(CUDA_FORCE_CDP1_IF_SUPPORTED)
# define CUB_DETAIL_CDPv1
# else
# define CUB_DETAIL_CDPv2
# endif
# ifdef CUDA_FORCE_CDP1_IF_SUPPORTED
# error "CUDA Dynamic Parallelism 1 is no longer supported. Please undefine CUDA_FORCE_CDP1_IF_SUPPORTED."
# endif // CUDA_FORCE_CDP1_IF_SUPPORTED
# endif

#endif // Do not document
22 changes: 1 addition & 21 deletions cub/cub/detail/device_synchronize.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -45,27 +45,7 @@ _CCCL_EXEC_CHECK_DISABLE
CUB_RUNTIME_FUNCTION inline cudaError_t device_synchronize()
{
cudaError_t result = cudaErrorNotSupported;

// Device-side sync is only available under CDPv1:
#if defined(CUB_DETAIL_CDPv1)

# if ((__CUDACC_VER_MAJOR__ > 11) || ((__CUDACC_VER_MAJOR__ == 11) && (__CUDACC_VER_MINOR__ >= 6)))
// CUDA >= 11.6
# define CUB_TMP_DEVICE_SYNC_IMPL result = __cudaDeviceSynchronizeDeprecationAvoidance();
# else // CUDA < 11.6:
# define CUB_TMP_DEVICE_SYNC_IMPL result = cudaDeviceSynchronize();
# endif

#else // CDPv2 or no CDP:

# define CUB_TMP_DEVICE_SYNC_IMPL /* unavailable */

#endif // CDP version

NV_IF_TARGET(NV_IS_HOST, (result = cudaDeviceSynchronize();), (CUB_TMP_DEVICE_SYNC_IMPL));

#undef CUB_TMP_DEVICE_SYNC_IMPL

NV_IF_TARGET(NV_IS_HOST, (result = cudaDeviceSynchronize();), ());
return result;
}

Expand Down
2 changes: 0 additions & 2 deletions cub/cub/detail/strong_load.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -45,8 +45,6 @@
#include <cub/util_ptx.cuh>
#include <cub/util_type.cuh>

#include <iterator>

CUB_NAMESPACE_BEGIN

#ifndef _CCCL_DOXYGEN_INVOKED // Do not document
Expand Down
25 changes: 11 additions & 14 deletions cub/cub/device/device_merge_sort.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -129,10 +129,10 @@ private:
CompareOpT compare_op,
cudaStream_t stream = 0)
{
using PromotedOffsetT = detail::promote_small_offset_t<OffsetT>;
using ChooseOffsetT = detail::choose_offset_t<OffsetT>;

using DispatchMergeSortT =
DispatchMergeSort<KeyIteratorT, ValueIteratorT, KeyIteratorT, ValueIteratorT, PromotedOffsetT, CompareOpT>;
DispatchMergeSort<KeyIteratorT, ValueIteratorT, KeyIteratorT, ValueIteratorT, ChooseOffsetT, CompareOpT>;

return DispatchMergeSortT::Dispatch(
d_temp_storage, temp_storage_bytes, d_keys, d_items, d_keys, d_items, num_items, compare_op, stream);
Expand Down Expand Up @@ -374,10 +374,10 @@ public:
cudaStream_t stream = 0)
{
CUB_DETAIL_NVTX_RANGE_SCOPE_IF(d_temp_storage, GetName());
using PromotedOffsetT = detail::promote_small_offset_t<OffsetT>;
using ChooseOffsetT = detail::choose_offset_t<OffsetT>;

using DispatchMergeSortT =
DispatchMergeSort<KeyInputIteratorT, ValueInputIteratorT, KeyIteratorT, ValueIteratorT, PromotedOffsetT, CompareOpT>;
DispatchMergeSort<KeyInputIteratorT, ValueInputIteratorT, KeyIteratorT, ValueIteratorT, ChooseOffsetT, CompareOpT>;

return DispatchMergeSortT::Dispatch(
d_temp_storage,
Expand All @@ -402,10 +402,10 @@ private:
CompareOpT compare_op,
cudaStream_t stream = 0)
{
using PromotedOffsetT = detail::promote_small_offset_t<OffsetT>;
using ChooseOffsetT = detail::choose_offset_t<OffsetT>;

using DispatchMergeSortT =
DispatchMergeSort<KeyIteratorT, NullType*, KeyIteratorT, NullType*, PromotedOffsetT, CompareOpT>;
DispatchMergeSort<KeyIteratorT, NullType*, KeyIteratorT, NullType*, ChooseOffsetT, CompareOpT>;

return DispatchMergeSortT::Dispatch(
d_temp_storage,
Expand Down Expand Up @@ -528,10 +528,10 @@ private:
CompareOpT compare_op,
cudaStream_t stream = 0)
{
using PromotedOffsetT = detail::promote_small_offset_t<OffsetT>;
using ChooseOffsetT = detail::choose_offset_t<OffsetT>;

using DispatchMergeSortT =
DispatchMergeSort<KeyInputIteratorT, NullType*, KeyIteratorT, NullType*, PromotedOffsetT, CompareOpT>;
DispatchMergeSort<KeyInputIteratorT, NullType*, KeyIteratorT, NullType*, ChooseOffsetT, CompareOpT>;

return DispatchMergeSortT::Dispatch(
d_temp_storage,
Expand Down Expand Up @@ -760,9 +760,8 @@ public:
cudaStream_t stream = 0)
{
CUB_DETAIL_NVTX_RANGE_SCOPE_IF(d_temp_storage, GetName());
using PromotedOffsetT = detail::promote_small_offset_t<OffsetT>;

return SortPairsNoNVTX<KeyIteratorT, ValueIteratorT, PromotedOffsetT, CompareOpT>(
return SortPairsNoNVTX<KeyIteratorT, ValueIteratorT, OffsetT, CompareOpT>(
d_temp_storage, temp_storage_bytes, d_keys, d_items, num_items, compare_op, stream);
}

Expand Down Expand Up @@ -860,9 +859,8 @@ public:
cudaStream_t stream = 0)
{
CUB_DETAIL_NVTX_RANGE_SCOPE_IF(d_temp_storage, GetName());
using PromotedOffsetT = detail::promote_small_offset_t<OffsetT>;

return SortKeysNoNVTX<KeyIteratorT, PromotedOffsetT, CompareOpT>(
return SortKeysNoNVTX<KeyIteratorT, OffsetT, CompareOpT>(
d_temp_storage, temp_storage_bytes, d_keys, num_items, compare_op, stream);
}

Expand Down Expand Up @@ -974,8 +972,7 @@ public:
cudaStream_t stream = 0)
{
CUB_DETAIL_NVTX_RANGE_SCOPE_IF(d_temp_storage, GetName());
using PromotedOffsetT = detail::promote_small_offset_t<OffsetT>;
return SortKeysCopyNoNVTX<KeyInputIteratorT, KeyIteratorT, PromotedOffsetT, CompareOpT>(
return SortKeysCopyNoNVTX<KeyInputIteratorT, KeyIteratorT, OffsetT, CompareOpT>(
d_temp_storage, temp_storage_bytes, d_input_keys, d_output_keys, num_items, compare_op, stream);
}
};
Expand Down
Loading

0 comments on commit 893a413

Please sign in to comment.