Skip to content

Commit

Permalink
Remove LEGACY_PTX_ARCH (#3551)
Browse files Browse the repository at this point in the history
  • Loading branch information
fbusato authored Jan 30, 2025
1 parent 0ce59c7 commit 77a6a45
Show file tree
Hide file tree
Showing 46 changed files with 85 additions and 203 deletions.
2 changes: 0 additions & 2 deletions cub/cub/agent/agent_batch_memcpy.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -642,14 +642,12 @@ private:
TilePrefixCallbackOp<BufferOffsetT,
::cuda::std::plus<>,
BLevBufferOffsetTileState,
0,
typename AgentMemcpySmallBuffersPolicyT::buff_delay_constructor>;

using BLevBlockScanPrefixCallbackOpT =
TilePrefixCallbackOp<BlockOffsetT,
::cuda::std::plus<>,
BLevBlockOffsetTileState,
0,
typename AgentMemcpySmallBuffersPolicyT::block_delay_constructor>;

//-----------------------------------------------------------------------------
Expand Down
12 changes: 3 additions & 9 deletions cub/cub/agent/agent_histogram.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -172,9 +172,6 @@ namespace histogram
*
* @tparam OffsetT
* Signed integer type for global offsets
*
* @tparam LEGACY_PTX_ARCH
* PTX compute capability (unused)
*/
template <typename AgentHistogramPolicyT,
int PRIVATIZED_SMEM_BINS,
Expand All @@ -184,8 +181,7 @@ template <typename AgentHistogramPolicyT,
typename CounterT,
typename PrivatizedDecodeOpT,
typename OutputDecodeOpT,
typename OffsetT,
int LEGACY_PTX_ARCH = 0>
typename OffsetT>
struct AgentHistogram
{
//---------------------------------------------------------------------
Expand Down Expand Up @@ -930,8 +926,7 @@ template <typename AgentHistogramPolicyT,
typename CounterT,
typename PrivatizedDecodeOpT,
typename OutputDecodeOpT,
typename OffsetT,
int LEGACY_PTX_ARCH = 0>
typename OffsetT>
using AgentHistogram CCCL_DEPRECATED_BECAUSE("This class is considered an implementation detail and the public "
"interface will be removed.") =
detail::histogram::AgentHistogram<
Expand All @@ -943,7 +938,6 @@ using AgentHistogram CCCL_DEPRECATED_BECAUSE("This class is considered an implem
CounterT,
PrivatizedDecodeOpT,
OutputDecodeOpT,
OffsetT,
LEGACY_PTX_ARCH>;
OffsetT>;

CUB_NAMESPACE_END
2 changes: 1 addition & 1 deletion cub/cub/agent/agent_reduce_by_key.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -276,7 +276,7 @@ struct AgentReduceByKey
// Callback type for obtaining tile prefix during block scan
using DelayConstructorT = typename AgentReduceByKeyPolicyT::detail::delay_constructor_t;
using TilePrefixCallbackOpT =
TilePrefixCallbackOp<OffsetValuePairT, ReduceBySegmentOpT, ScanTileStateT, 0, DelayConstructorT>;
TilePrefixCallbackOp<OffsetValuePairT, ReduceBySegmentOpT, ScanTileStateT, DelayConstructorT>;

// Key and value exchange types
using KeyExchangeT = KeyOutputT[TILE_ITEMS + 1];
Expand Down
2 changes: 1 addition & 1 deletion cub/cub/agent/agent_rle.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -258,7 +258,7 @@ struct AgentRle
// Callback type for obtaining tile prefix during block scan
using DelayConstructorT = typename AgentRlePolicyT::detail::delay_constructor_t;
using TilePrefixCallbackOpT =
TilePrefixCallbackOp<LengthOffsetPair, ReduceBySegmentOpT, ScanTileStateT, 0, DelayConstructorT>;
TilePrefixCallbackOp<LengthOffsetPair, ReduceBySegmentOpT, ScanTileStateT, DelayConstructorT>;

// Warp exchange types
using WarpExchangePairs = WarpExchange<LengthOffsetPair, ITEMS_PER_THREAD>;
Expand Down
2 changes: 1 addition & 1 deletion cub/cub/agent/agent_scan.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -201,7 +201,7 @@ struct AgentScan

// Callback type for obtaining tile prefix during block scan
using DelayConstructorT = typename AgentScanPolicyT::detail::delay_constructor_t;
using TilePrefixCallbackOpT = TilePrefixCallbackOp<AccumT, ScanOpT, ScanTileStateT, 0 /* PTX */, DelayConstructorT>;
using TilePrefixCallbackOpT = TilePrefixCallbackOp<AccumT, ScanOpT, ScanTileStateT, DelayConstructorT>;

// Stateful BlockScan prefix callback type for managing a running total while
// scanning consecutive tiles
Expand Down
2 changes: 1 addition & 1 deletion cub/cub/agent/agent_scan_by_key.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -179,7 +179,7 @@ struct AgentScanByKey

using DelayConstructorT = typename AgentScanByKeyPolicyT::detail::delay_constructor_t;
using TilePrefixCallbackT =
TilePrefixCallbackOp<FlagValuePairT, ReduceBySegmentOpT, ScanTileStateT, 0, DelayConstructorT>;
TilePrefixCallbackOp<FlagValuePairT, ReduceBySegmentOpT, ScanTileStateT, DelayConstructorT>;

using BlockScanT = BlockScan<FlagValuePairT, BLOCK_THREADS, AgentScanByKeyPolicyT::SCAN_ALGORITHM, 1, 1>;

Expand Down
2 changes: 1 addition & 1 deletion cub/cub/agent/agent_select_if.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -274,7 +274,7 @@ struct AgentSelectIf
// Callback type for obtaining tile prefix during block scan
using DelayConstructorT = typename AgentSelectIfPolicyT::detail::delay_constructor_t;
using TilePrefixCallbackOpT =
TilePrefixCallbackOp<OffsetT, ::cuda::std::plus<>, MemoryOrderedTileStateT, 0, DelayConstructorT>;
TilePrefixCallbackOp<OffsetT, ::cuda::std::plus<>, MemoryOrderedTileStateT, DelayConstructorT>;

// Item exchange type
using ItemExchangeT = InputT[TILE_ITEMS];
Expand Down
2 changes: 1 addition & 1 deletion cub/cub/agent/agent_three_way_partition.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -207,7 +207,7 @@ struct AgentThreeWayPartition
// Callback type for obtaining tile prefix during block scan
using DelayConstructorT = typename PolicyT::detail::delay_constructor_t;
using TilePrefixCallbackOpT =
cub::TilePrefixCallbackOp<AccumPackT, ::cuda::std::plus<>, ScanTileStateT, 0, DelayConstructorT>;
cub::TilePrefixCallbackOp<AccumPackT, ::cuda::std::plus<>, ScanTileStateT, DelayConstructorT>;

// Item exchange type
using ItemExchangeT = InputT[TILE_ITEMS];
Expand Down
5 changes: 2 additions & 3 deletions cub/cub/agent/agent_unique_by_key.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -179,9 +179,8 @@ struct AgentUniqueByKey
using BlockScanT = cub::BlockScan<OffsetT, BLOCK_THREADS, AgentUniqueByKeyPolicyT::SCAN_ALGORITHM>;

// Parameterized BlockDiscontinuity type for items
using DelayConstructorT = typename AgentUniqueByKeyPolicyT::detail::delay_constructor_t;
using TilePrefixCallback =
cub::TilePrefixCallbackOp<OffsetT, ::cuda::std::plus<>, ScanTileStateT, 0, DelayConstructorT>;
using DelayConstructorT = typename AgentUniqueByKeyPolicyT::detail::delay_constructor_t;
using TilePrefixCallback = cub::TilePrefixCallbackOp<OffsetT, ::cuda::std::plus<>, ScanTileStateT, DelayConstructorT>;

// Key exchange type
using KeyExchangeT = KeyT[ITEMS_PER_TILE];
Expand Down
1 change: 0 additions & 1 deletion cub/cub/agent/single_pass_scan_operators.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -1170,7 +1170,6 @@ struct ReduceByKeyScanTileState<ValueT, KeyT, true>
template <typename T,
typename ScanOpT,
typename ScanTileStateT,
int LEGACY_PTX_ARCH = 0,
typename DelayConstructorT = detail::default_delay_constructor_t<T>>
struct TilePrefixCallbackOp
{
Expand Down
2 changes: 1 addition & 1 deletion cub/cub/block/block_adjacent_difference.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -122,7 +122,7 @@ CUB_NAMESPACE_BEGIN
//! ``{ [4,-2,-1,0], [0,0,0,0], [1,1,0,0], [0,1,-3,3], ... }``.
//!
//! @endrst
template <typename T, int BLOCK_DIM_X, int BLOCK_DIM_Y = 1, int BLOCK_DIM_Z = 1, int LEGACY_PTX_ARCH = 0>
template <typename T, int BLOCK_DIM_X, int BLOCK_DIM_Y = 1, int BLOCK_DIM_Z = 1>
class BlockAdjacentDifference
{
private:
Expand Down
4 changes: 1 addition & 3 deletions cub/cub/block/block_discontinuity.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -122,9 +122,7 @@ CUB_NAMESPACE_BEGIN
//! @tparam BLOCK_DIM_Z
//! **[optional]** The thread block length in threads along the Z dimension (default: 1)
//!
//! @tparam LEGACY_PTX_ARCH
//! **[optional]** Unused
template <typename T, int BLOCK_DIM_X, int BLOCK_DIM_Y = 1, int BLOCK_DIM_Z = 1, int LEGACY_PTX_ARCH = 0>
template <typename T, int BLOCK_DIM_X, int BLOCK_DIM_Y = 1, int BLOCK_DIM_Z = 1>
class BlockDiscontinuity
{
private:
Expand Down
5 changes: 1 addition & 4 deletions cub/cub/block/block_exchange.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -137,15 +137,12 @@ CUB_NAMESPACE_BEGIN
//! @tparam BLOCK_DIM_Z
//! **[optional]** The thread block length in threads along the Z dimension (default: 1)
//!
//! @tparam LEGACY_PTX_ARCH
//! <b>[optional]</b> Unused.
template <typename T,
int BLOCK_DIM_X,
int ITEMS_PER_THREAD,
bool WARP_TIME_SLICING = false,
int BLOCK_DIM_Y = 1,
int BLOCK_DIM_Z = 1,
int LEGACY_PTX_ARCH = 0>
int BLOCK_DIM_Z = 1>
class BlockExchange
{
static constexpr int BLOCK_THREADS = BLOCK_DIM_X * BLOCK_DIM_Y * BLOCK_DIM_Z; ///< The thread block size in threads
Expand Down
5 changes: 1 addition & 4 deletions cub/cub/block/block_histogram.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -179,16 +179,13 @@ enum BlockHistogramAlgorithm
//! @tparam BLOCK_DIM_Z
//! **[optional]** The thread block length in threads along the Z dimension (default: 1)
//!
//! @tparam LEGACY_PTX_ARCH
//! **[optional]** Unused.
template <typename T,
int BLOCK_DIM_X,
int ITEMS_PER_THREAD,
int BINS,
BlockHistogramAlgorithm ALGORITHM = BLOCK_HISTO_SORT,
int BLOCK_DIM_Y = 1,
int BLOCK_DIM_Z = 1,
int LEGACY_PTX_ARCH = 0>
int BLOCK_DIM_Z = 1>
class BlockHistogram
{
private:
Expand Down
5 changes: 1 addition & 4 deletions cub/cub/block/block_load.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -790,15 +790,12 @@ enum BlockLoadAlgorithm
//! @tparam BLOCK_DIM_Z
//! **[optional]** The thread block length in threads along the Z dimension (default: 1)
//!
//! @tparam LEGACY_PTX_ARCH
//! **[optional]** Unused.
template <typename T,
int BLOCK_DIM_X,
int ITEMS_PER_THREAD,
BlockLoadAlgorithm ALGORITHM = BLOCK_LOAD_DIRECT,
int BLOCK_DIM_Y = 1,
int BLOCK_DIM_Z = 1,
int LEGACY_PTX_ARCH = 0>
int BLOCK_DIM_Z = 1>
class BlockLoad
{
static constexpr int BLOCK_THREADS = BLOCK_DIM_X * BLOCK_DIM_Y * BLOCK_DIM_Z; // total threads in the block
Expand Down
8 changes: 2 additions & 6 deletions cub/cub/block/block_radix_rank.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -204,17 +204,14 @@ struct warp_in_block_matcher_t<Bits, 0, PartialWarpId>
//! @tparam BLOCK_DIM_Z
//! **[optional]** The thread block length in threads along the Z dimension (default: 1)
//!
//! @tparam LEGACY_PTX_ARCH
//! **[optional]** Unused.
template <int BLOCK_DIM_X,
int RADIX_BITS,
bool IS_DESCENDING,
bool MEMOIZE_OUTER_SCAN = true,
BlockScanAlgorithm INNER_SCAN_ALGORITHM = BLOCK_SCAN_WARP_SCANS,
cudaSharedMemConfig SMEM_CONFIG = cudaSharedMemBankSizeFourByte,
int BLOCK_DIM_Y = 1,
int BLOCK_DIM_Z = 1,
int LEGACY_PTX_ARCH = 0>
int BLOCK_DIM_Z = 1>
class BlockRadixRank
{
private:
Expand Down Expand Up @@ -560,8 +557,7 @@ template <int BLOCK_DIM_X,
bool IS_DESCENDING,
BlockScanAlgorithm INNER_SCAN_ALGORITHM = BLOCK_SCAN_WARP_SCANS,
int BLOCK_DIM_Y = 1,
int BLOCK_DIM_Z = 1,
int LEGACY_PTX_ARCH = 0>
int BLOCK_DIM_Z = 1>
class BlockRadixRankMatch
{
private:
Expand Down
5 changes: 1 addition & 4 deletions cub/cub/block/block_radix_sort.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -238,8 +238,6 @@ CUB_NAMESPACE_BEGIN
//! @tparam BLOCK_DIM_Z
//! **[optional]** The thread block length in threads along the Z dimension (default: 1)
//!
//! @tparam LEGACY_PTX_ARCH
//! **[optional]** Unused
template <typename KeyT,
int BLOCK_DIM_X,
int ITEMS_PER_THREAD,
Expand All @@ -249,8 +247,7 @@ template <typename KeyT,
BlockScanAlgorithm INNER_SCAN_ALGORITHM = BLOCK_SCAN_WARP_SCANS,
cudaSharedMemConfig SMEM_CONFIG = cudaSharedMemBankSizeFourByte,
int BLOCK_DIM_Y = 1,
int BLOCK_DIM_Z = 1,
int LEGACY_PTX_ARCH = 0>
int BLOCK_DIM_Z = 1>
class BlockRadixSort
{
private:
Expand Down
4 changes: 1 addition & 3 deletions cub/cub/block/block_raking_layout.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -68,9 +68,7 @@ CUB_NAMESPACE_BEGIN
//! @tparam BLOCK_THREADS
//! The thread block size in threads.
//!
//! @tparam LEGACY_PTX_ARCH
//! **[optional]** Unused.
template <typename T, int BLOCK_THREADS, int LEGACY_PTX_ARCH = 0>
template <typename T, int BLOCK_THREADS>
struct BlockRakingLayout
{
//---------------------------------------------------------------------
Expand Down
5 changes: 1 addition & 4 deletions cub/cub/block/block_reduce.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -232,14 +232,11 @@ enum BlockReduceAlgorithm
//! @tparam BLOCK_DIM_Z
//! **[optional]** The thread block length in threads along the Z dimension (default: 1)
//!
//! @tparam LEGACY_PTX_ARCH
//! **[optional]** Unused.
template <typename T,
int BLOCK_DIM_X,
BlockReduceAlgorithm ALGORITHM = BLOCK_REDUCE_WARP_REDUCTIONS,
int BLOCK_DIM_Y = 1,
int BLOCK_DIM_Z = 1,
int LEGACY_PTX_ARCH = 0>
int BLOCK_DIM_Z = 1>
class BlockReduce
{
private:
Expand Down
5 changes: 1 addition & 4 deletions cub/cub/block/block_scan.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -221,14 +221,11 @@ enum BlockScanAlgorithm
//! @tparam BLOCK_DIM_Z
//! **[optional]** The thread block length in threads along the Z dimension (default: 1)
//!
//! @tparam LEGACY_PTX_ARCH
//! **[optional]** Unused.
template <typename T,
int BLOCK_DIM_X,
BlockScanAlgorithm ALGORITHM = BLOCK_SCAN_RAKING,
int BLOCK_DIM_Y = 1,
int BLOCK_DIM_Z = 1,
int LEGACY_PTX_ARCH = 0>
int BLOCK_DIM_Z = 1>
class BlockScan
{
private:
Expand Down
4 changes: 1 addition & 3 deletions cub/cub/block/block_shuffle.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -73,9 +73,7 @@ CUB_NAMESPACE_BEGIN
//! @tparam BLOCK_DIM_Z
//! **[optional]** The thread block length in threads along the Z dimension (default: 1)
//!
//! @tparam LEGACY_PTX_ARCH
//! **[optional]** Unused
template <typename T, int BLOCK_DIM_X, int BLOCK_DIM_Y = 1, int BLOCK_DIM_Z = 1, int LEGACY_PTX_ARCH = 0>
template <typename T, int BLOCK_DIM_X, int BLOCK_DIM_Y = 1, int BLOCK_DIM_Z = 1>
class BlockShuffle
{
private:
Expand Down
5 changes: 1 addition & 4 deletions cub/cub/block/block_store.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -639,15 +639,12 @@ enum BlockStoreAlgorithm
//! @tparam BLOCK_DIM_Z
//! **[optional]** The thread block length in threads along the Z dimension (default: 1)
//!
//! @tparam LEGACY_PTX_ARCH
//! **[optional]** Unused.
template <typename T,
int BLOCK_DIM_X,
int ITEMS_PER_THREAD,
BlockStoreAlgorithm ALGORITHM = BLOCK_STORE_DIRECT,
int BLOCK_DIM_Y = 1,
int BLOCK_DIM_Z = 1,
int LEGACY_PTX_ARCH = 0>
int BLOCK_DIM_Z = 1>
class BlockStore
{
private:
Expand Down
22 changes: 3 additions & 19 deletions cub/cub/block/specializations/block_histogram_sort.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -72,17 +72,8 @@ namespace detail
*
* @tparam BLOCK_DIM_Z
* The thread block length in threads along the Z dimension
*
* @tparam LEGACY_PTX_ARCH
* The PTX compute capability for which to to specialize this collective (unused)
*/
template <typename T,
int BLOCK_DIM_X,
int ITEMS_PER_THREAD,
int BINS,
int BLOCK_DIM_Y,
int BLOCK_DIM_Z,
int LEGACY_PTX_ARCH = 0>
template <typename T, int BLOCK_DIM_X, int ITEMS_PER_THREAD, int BINS, int BLOCK_DIM_Y, int BLOCK_DIM_Z>
struct BlockHistogramSort
{
/// Constants
Expand Down Expand Up @@ -246,16 +237,9 @@ struct BlockHistogramSort
};
} // namespace detail

template <typename T,
int BLOCK_DIM_X,
int ITEMS_PER_THREAD,
int BINS,
int BLOCK_DIM_Y,
int BLOCK_DIM_Z,
int LEGACY_PTX_ARCH = 0>
template <typename T, int BLOCK_DIM_X, int ITEMS_PER_THREAD, int BINS, int BLOCK_DIM_Y, int BLOCK_DIM_Z>
using BlockHistogramSort CCCL_DEPRECATED_BECAUSE(
"This class is considered an implementation detail and the public interface will be "
"removed.") =
detail::BlockHistogramSort<T, BLOCK_DIM_X, ITEMS_PER_THREAD, BINS, BLOCK_DIM_Y, BLOCK_DIM_Z, LEGACY_PTX_ARCH>;
"removed.") = detail::BlockHistogramSort<T, BLOCK_DIM_X, ITEMS_PER_THREAD, BINS, BLOCK_DIM_Y, BLOCK_DIM_Z>;

CUB_NAMESPACE_END
9 changes: 3 additions & 6 deletions cub/cub/block/specializations/block_reduce_raking.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -77,11 +77,8 @@ namespace detail
*
* @tparam BLOCK_DIM_Z
* The thread block length in threads along the Z dimension
*
* @tparam LEGACY_PTX_ARCH
* The PTX compute capability for which to to specialize this collective
*/
template <typename T, int BLOCK_DIM_X, int BLOCK_DIM_Y, int BLOCK_DIM_Z, int LEGACY_PTX_ARCH = 0>
template <typename T, int BLOCK_DIM_X, int BLOCK_DIM_Y, int BLOCK_DIM_Z>
struct BlockReduceRaking
{
/// Constants
Expand Down Expand Up @@ -260,9 +257,9 @@ struct BlockReduceRaking
};
} // namespace detail

template <typename T, int BLOCK_DIM_X, int BLOCK_DIM_Y, int BLOCK_DIM_Z, int LEGACY_PTX_ARCH = 0>
template <typename T, int BLOCK_DIM_X, int BLOCK_DIM_Y, int BLOCK_DIM_Z>
using BlockReduceRaking CCCL_DEPRECATED_BECAUSE(
"This class is considered an implementation detail and the public interface will be "
"removed.") = detail::BlockReduceRaking<T, BLOCK_DIM_X, BLOCK_DIM_Y, BLOCK_DIM_Z, LEGACY_PTX_ARCH>;
"removed.") = detail::BlockReduceRaking<T, BLOCK_DIM_X, BLOCK_DIM_Y, BLOCK_DIM_Z>;

CUB_NAMESPACE_END
Loading

0 comments on commit 77a6a45

Please sign in to comment.