Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Remove LEGACY_PTX_ARCH #3551

Merged
merged 5 commits into from
Jan 30, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 0 additions & 2 deletions cub/cub/agent/agent_batch_memcpy.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -642,14 +642,12 @@ private:
TilePrefixCallbackOp<BufferOffsetT,
::cuda::std::plus<>,
BLevBufferOffsetTileState,
0,
typename AgentMemcpySmallBuffersPolicyT::buff_delay_constructor>;

using BLevBlockScanPrefixCallbackOpT =
TilePrefixCallbackOp<BlockOffsetT,
::cuda::std::plus<>,
BLevBlockOffsetTileState,
0,
typename AgentMemcpySmallBuffersPolicyT::block_delay_constructor>;

//-----------------------------------------------------------------------------
Expand Down
12 changes: 3 additions & 9 deletions cub/cub/agent/agent_histogram.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -172,9 +172,6 @@ namespace histogram
*
* @tparam OffsetT
* Signed integer type for global offsets
*
* @tparam LEGACY_PTX_ARCH
* PTX compute capability (unused)
*/
template <typename AgentHistogramPolicyT,
int PRIVATIZED_SMEM_BINS,
Expand All @@ -184,8 +181,7 @@ template <typename AgentHistogramPolicyT,
typename CounterT,
typename PrivatizedDecodeOpT,
typename OutputDecodeOpT,
typename OffsetT,
int LEGACY_PTX_ARCH = 0>
typename OffsetT>
struct AgentHistogram
{
//---------------------------------------------------------------------
Expand Down Expand Up @@ -930,8 +926,7 @@ template <typename AgentHistogramPolicyT,
typename CounterT,
typename PrivatizedDecodeOpT,
typename OutputDecodeOpT,
typename OffsetT,
int LEGACY_PTX_ARCH = 0>
typename OffsetT>
using AgentHistogram CCCL_DEPRECATED_BECAUSE("This class is considered an implementation detail and the public "
"interface will be removed.") =
detail::histogram::AgentHistogram<
Expand All @@ -943,7 +938,6 @@ using AgentHistogram CCCL_DEPRECATED_BECAUSE("This class is considered an implem
CounterT,
PrivatizedDecodeOpT,
OutputDecodeOpT,
OffsetT,
LEGACY_PTX_ARCH>;
OffsetT>;

CUB_NAMESPACE_END
2 changes: 1 addition & 1 deletion cub/cub/agent/agent_reduce_by_key.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -276,7 +276,7 @@ struct AgentReduceByKey
// Callback type for obtaining tile prefix during block scan
using DelayConstructorT = typename AgentReduceByKeyPolicyT::detail::delay_constructor_t;
using TilePrefixCallbackOpT =
TilePrefixCallbackOp<OffsetValuePairT, ReduceBySegmentOpT, ScanTileStateT, 0, DelayConstructorT>;
TilePrefixCallbackOp<OffsetValuePairT, ReduceBySegmentOpT, ScanTileStateT, DelayConstructorT>;

// Key and value exchange types
using KeyExchangeT = KeyOutputT[TILE_ITEMS + 1];
Expand Down
2 changes: 1 addition & 1 deletion cub/cub/agent/agent_rle.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -258,7 +258,7 @@ struct AgentRle
// Callback type for obtaining tile prefix during block scan
using DelayConstructorT = typename AgentRlePolicyT::detail::delay_constructor_t;
using TilePrefixCallbackOpT =
TilePrefixCallbackOp<LengthOffsetPair, ReduceBySegmentOpT, ScanTileStateT, 0, DelayConstructorT>;
TilePrefixCallbackOp<LengthOffsetPair, ReduceBySegmentOpT, ScanTileStateT, DelayConstructorT>;

// Warp exchange types
using WarpExchangePairs = WarpExchange<LengthOffsetPair, ITEMS_PER_THREAD>;
Expand Down
2 changes: 1 addition & 1 deletion cub/cub/agent/agent_scan.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -201,7 +201,7 @@ struct AgentScan

// Callback type for obtaining tile prefix during block scan
using DelayConstructorT = typename AgentScanPolicyT::detail::delay_constructor_t;
using TilePrefixCallbackOpT = TilePrefixCallbackOp<AccumT, ScanOpT, ScanTileStateT, 0 /* PTX */, DelayConstructorT>;
using TilePrefixCallbackOpT = TilePrefixCallbackOp<AccumT, ScanOpT, ScanTileStateT, DelayConstructorT>;

// Stateful BlockScan prefix callback type for managing a running total while
// scanning consecutive tiles
Expand Down
2 changes: 1 addition & 1 deletion cub/cub/agent/agent_scan_by_key.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -179,7 +179,7 @@ struct AgentScanByKey

using DelayConstructorT = typename AgentScanByKeyPolicyT::detail::delay_constructor_t;
using TilePrefixCallbackT =
TilePrefixCallbackOp<FlagValuePairT, ReduceBySegmentOpT, ScanTileStateT, 0, DelayConstructorT>;
TilePrefixCallbackOp<FlagValuePairT, ReduceBySegmentOpT, ScanTileStateT, DelayConstructorT>;

using BlockScanT = BlockScan<FlagValuePairT, BLOCK_THREADS, AgentScanByKeyPolicyT::SCAN_ALGORITHM, 1, 1>;

Expand Down
2 changes: 1 addition & 1 deletion cub/cub/agent/agent_select_if.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -274,7 +274,7 @@ struct AgentSelectIf
// Callback type for obtaining tile prefix during block scan
using DelayConstructorT = typename AgentSelectIfPolicyT::detail::delay_constructor_t;
using TilePrefixCallbackOpT =
TilePrefixCallbackOp<OffsetT, ::cuda::std::plus<>, MemoryOrderedTileStateT, 0, DelayConstructorT>;
TilePrefixCallbackOp<OffsetT, ::cuda::std::plus<>, MemoryOrderedTileStateT, DelayConstructorT>;

// Item exchange type
using ItemExchangeT = InputT[TILE_ITEMS];
Expand Down
2 changes: 1 addition & 1 deletion cub/cub/agent/agent_three_way_partition.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -207,7 +207,7 @@ struct AgentThreeWayPartition
// Callback type for obtaining tile prefix during block scan
using DelayConstructorT = typename PolicyT::detail::delay_constructor_t;
using TilePrefixCallbackOpT =
cub::TilePrefixCallbackOp<AccumPackT, ::cuda::std::plus<>, ScanTileStateT, 0, DelayConstructorT>;
cub::TilePrefixCallbackOp<AccumPackT, ::cuda::std::plus<>, ScanTileStateT, DelayConstructorT>;

// Item exchange type
using ItemExchangeT = InputT[TILE_ITEMS];
Expand Down
5 changes: 2 additions & 3 deletions cub/cub/agent/agent_unique_by_key.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -179,9 +179,8 @@ struct AgentUniqueByKey
using BlockScanT = cub::BlockScan<OffsetT, BLOCK_THREADS, AgentUniqueByKeyPolicyT::SCAN_ALGORITHM>;

// Parameterized BlockDiscontinuity type for items
using DelayConstructorT = typename AgentUniqueByKeyPolicyT::detail::delay_constructor_t;
using TilePrefixCallback =
cub::TilePrefixCallbackOp<OffsetT, ::cuda::std::plus<>, ScanTileStateT, 0, DelayConstructorT>;
using DelayConstructorT = typename AgentUniqueByKeyPolicyT::detail::delay_constructor_t;
using TilePrefixCallback = cub::TilePrefixCallbackOp<OffsetT, ::cuda::std::plus<>, ScanTileStateT, DelayConstructorT>;

// Key exchange type
using KeyExchangeT = KeyT[ITEMS_PER_TILE];
Expand Down
1 change: 0 additions & 1 deletion cub/cub/agent/single_pass_scan_operators.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -1170,7 +1170,6 @@ struct ReduceByKeyScanTileState<ValueT, KeyT, true>
template <typename T,
typename ScanOpT,
typename ScanTileStateT,
int LEGACY_PTX_ARCH = 0,
typename DelayConstructorT = detail::default_delay_constructor_t<T>>
struct TilePrefixCallbackOp
{
Expand Down
2 changes: 1 addition & 1 deletion cub/cub/block/block_adjacent_difference.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -122,7 +122,7 @@ CUB_NAMESPACE_BEGIN
//! ``{ [4,-2,-1,0], [0,0,0,0], [1,1,0,0], [0,1,-3,3], ... }``.
//!
//! @endrst
template <typename T, int BLOCK_DIM_X, int BLOCK_DIM_Y = 1, int BLOCK_DIM_Z = 1, int LEGACY_PTX_ARCH = 0>
template <typename T, int BLOCK_DIM_X, int BLOCK_DIM_Y = 1, int BLOCK_DIM_Z = 1>
class BlockAdjacentDifference
{
private:
Expand Down
4 changes: 1 addition & 3 deletions cub/cub/block/block_discontinuity.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -122,9 +122,7 @@ CUB_NAMESPACE_BEGIN
//! @tparam BLOCK_DIM_Z
//! **[optional]** The thread block length in threads along the Z dimension (default: 1)
//!
//! @tparam LEGACY_PTX_ARCH
//! **[optional]** Unused
template <typename T, int BLOCK_DIM_X, int BLOCK_DIM_Y = 1, int BLOCK_DIM_Z = 1, int LEGACY_PTX_ARCH = 0>
template <typename T, int BLOCK_DIM_X, int BLOCK_DIM_Y = 1, int BLOCK_DIM_Z = 1>
class BlockDiscontinuity
{
private:
Expand Down
5 changes: 1 addition & 4 deletions cub/cub/block/block_exchange.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -137,15 +137,12 @@ CUB_NAMESPACE_BEGIN
//! @tparam BLOCK_DIM_Z
//! **[optional]** The thread block length in threads along the Z dimension (default: 1)
//!
//! @tparam LEGACY_PTX_ARCH
//! <b>[optional]</b> Unused.
template <typename T,
int BLOCK_DIM_X,
int ITEMS_PER_THREAD,
bool WARP_TIME_SLICING = false,
int BLOCK_DIM_Y = 1,
int BLOCK_DIM_Z = 1,
int LEGACY_PTX_ARCH = 0>
int BLOCK_DIM_Z = 1>
class BlockExchange
{
static constexpr int BLOCK_THREADS = BLOCK_DIM_X * BLOCK_DIM_Y * BLOCK_DIM_Z; ///< The thread block size in threads
Expand Down
5 changes: 1 addition & 4 deletions cub/cub/block/block_histogram.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -179,16 +179,13 @@ enum BlockHistogramAlgorithm
//! @tparam BLOCK_DIM_Z
//! **[optional]** The thread block length in threads along the Z dimension (default: 1)
//!
//! @tparam LEGACY_PTX_ARCH
//! **[optional]** Unused.
template <typename T,
int BLOCK_DIM_X,
int ITEMS_PER_THREAD,
int BINS,
BlockHistogramAlgorithm ALGORITHM = BLOCK_HISTO_SORT,
int BLOCK_DIM_Y = 1,
int BLOCK_DIM_Z = 1,
int LEGACY_PTX_ARCH = 0>
int BLOCK_DIM_Z = 1>
class BlockHistogram
{
private:
Expand Down
5 changes: 1 addition & 4 deletions cub/cub/block/block_load.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -790,15 +790,12 @@ enum BlockLoadAlgorithm
//! @tparam BLOCK_DIM_Z
//! **[optional]** The thread block length in threads along the Z dimension (default: 1)
//!
//! @tparam LEGACY_PTX_ARCH
//! **[optional]** Unused.
template <typename T,
int BLOCK_DIM_X,
int ITEMS_PER_THREAD,
BlockLoadAlgorithm ALGORITHM = BLOCK_LOAD_DIRECT,
int BLOCK_DIM_Y = 1,
int BLOCK_DIM_Z = 1,
int LEGACY_PTX_ARCH = 0>
int BLOCK_DIM_Z = 1>
class BlockLoad
{
static constexpr int BLOCK_THREADS = BLOCK_DIM_X * BLOCK_DIM_Y * BLOCK_DIM_Z; // total threads in the block
Expand Down
8 changes: 2 additions & 6 deletions cub/cub/block/block_radix_rank.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -204,17 +204,14 @@ struct warp_in_block_matcher_t<Bits, 0, PartialWarpId>
//! @tparam BLOCK_DIM_Z
//! **[optional]** The thread block length in threads along the Z dimension (default: 1)
//!
//! @tparam LEGACY_PTX_ARCH
//! **[optional]** Unused.
template <int BLOCK_DIM_X,
int RADIX_BITS,
bool IS_DESCENDING,
bool MEMOIZE_OUTER_SCAN = true,
BlockScanAlgorithm INNER_SCAN_ALGORITHM = BLOCK_SCAN_WARP_SCANS,
cudaSharedMemConfig SMEM_CONFIG = cudaSharedMemBankSizeFourByte,
int BLOCK_DIM_Y = 1,
int BLOCK_DIM_Z = 1,
int LEGACY_PTX_ARCH = 0>
int BLOCK_DIM_Z = 1>
class BlockRadixRank
{
private:
Expand Down Expand Up @@ -560,8 +557,7 @@ template <int BLOCK_DIM_X,
bool IS_DESCENDING,
BlockScanAlgorithm INNER_SCAN_ALGORITHM = BLOCK_SCAN_WARP_SCANS,
int BLOCK_DIM_Y = 1,
int BLOCK_DIM_Z = 1,
int LEGACY_PTX_ARCH = 0>
int BLOCK_DIM_Z = 1>
class BlockRadixRankMatch
{
private:
Expand Down
5 changes: 1 addition & 4 deletions cub/cub/block/block_radix_sort.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -238,8 +238,6 @@ CUB_NAMESPACE_BEGIN
//! @tparam BLOCK_DIM_Z
//! **[optional]** The thread block length in threads along the Z dimension (default: 1)
//!
//! @tparam LEGACY_PTX_ARCH
//! **[optional]** Unused
template <typename KeyT,
int BLOCK_DIM_X,
int ITEMS_PER_THREAD,
Expand All @@ -249,8 +247,7 @@ template <typename KeyT,
BlockScanAlgorithm INNER_SCAN_ALGORITHM = BLOCK_SCAN_WARP_SCANS,
cudaSharedMemConfig SMEM_CONFIG = cudaSharedMemBankSizeFourByte,
int BLOCK_DIM_Y = 1,
int BLOCK_DIM_Z = 1,
int LEGACY_PTX_ARCH = 0>
int BLOCK_DIM_Z = 1>
class BlockRadixSort
{
private:
Expand Down
4 changes: 1 addition & 3 deletions cub/cub/block/block_raking_layout.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -68,9 +68,7 @@ CUB_NAMESPACE_BEGIN
//! @tparam BLOCK_THREADS
//! The thread block size in threads.
//!
//! @tparam LEGACY_PTX_ARCH
//! **[optional]** Unused.
template <typename T, int BLOCK_THREADS, int LEGACY_PTX_ARCH = 0>
template <typename T, int BLOCK_THREADS>
struct BlockRakingLayout
{
//---------------------------------------------------------------------
Expand Down
5 changes: 1 addition & 4 deletions cub/cub/block/block_reduce.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -232,14 +232,11 @@ enum BlockReduceAlgorithm
//! @tparam BLOCK_DIM_Z
//! **[optional]** The thread block length in threads along the Z dimension (default: 1)
//!
//! @tparam LEGACY_PTX_ARCH
//! **[optional]** Unused.
template <typename T,
int BLOCK_DIM_X,
BlockReduceAlgorithm ALGORITHM = BLOCK_REDUCE_WARP_REDUCTIONS,
int BLOCK_DIM_Y = 1,
int BLOCK_DIM_Z = 1,
int LEGACY_PTX_ARCH = 0>
int BLOCK_DIM_Z = 1>
class BlockReduce
{
private:
Expand Down
5 changes: 1 addition & 4 deletions cub/cub/block/block_scan.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -221,14 +221,11 @@ enum BlockScanAlgorithm
//! @tparam BLOCK_DIM_Z
//! **[optional]** The thread block length in threads along the Z dimension (default: 1)
//!
//! @tparam LEGACY_PTX_ARCH
//! **[optional]** Unused.
template <typename T,
int BLOCK_DIM_X,
BlockScanAlgorithm ALGORITHM = BLOCK_SCAN_RAKING,
int BLOCK_DIM_Y = 1,
int BLOCK_DIM_Z = 1,
int LEGACY_PTX_ARCH = 0>
int BLOCK_DIM_Z = 1>
class BlockScan
{
private:
Expand Down
4 changes: 1 addition & 3 deletions cub/cub/block/block_shuffle.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -73,9 +73,7 @@ CUB_NAMESPACE_BEGIN
//! @tparam BLOCK_DIM_Z
//! **[optional]** The thread block length in threads along the Z dimension (default: 1)
//!
//! @tparam LEGACY_PTX_ARCH
//! **[optional]** Unused
template <typename T, int BLOCK_DIM_X, int BLOCK_DIM_Y = 1, int BLOCK_DIM_Z = 1, int LEGACY_PTX_ARCH = 0>
template <typename T, int BLOCK_DIM_X, int BLOCK_DIM_Y = 1, int BLOCK_DIM_Z = 1>
class BlockShuffle
{
private:
Expand Down
5 changes: 1 addition & 4 deletions cub/cub/block/block_store.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -639,15 +639,12 @@ enum BlockStoreAlgorithm
//! @tparam BLOCK_DIM_Z
//! **[optional]** The thread block length in threads along the Z dimension (default: 1)
//!
//! @tparam LEGACY_PTX_ARCH
//! **[optional]** Unused.
template <typename T,
int BLOCK_DIM_X,
int ITEMS_PER_THREAD,
BlockStoreAlgorithm ALGORITHM = BLOCK_STORE_DIRECT,
int BLOCK_DIM_Y = 1,
int BLOCK_DIM_Z = 1,
int LEGACY_PTX_ARCH = 0>
int BLOCK_DIM_Z = 1>
class BlockStore
{
private:
Expand Down
22 changes: 3 additions & 19 deletions cub/cub/block/specializations/block_histogram_sort.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -72,17 +72,8 @@ namespace detail
*
* @tparam BLOCK_DIM_Z
* The thread block length in threads along the Z dimension
*
* @tparam LEGACY_PTX_ARCH
* The PTX compute capability for which to to specialize this collective (unused)
*/
template <typename T,
int BLOCK_DIM_X,
int ITEMS_PER_THREAD,
int BINS,
int BLOCK_DIM_Y,
int BLOCK_DIM_Z,
int LEGACY_PTX_ARCH = 0>
template <typename T, int BLOCK_DIM_X, int ITEMS_PER_THREAD, int BINS, int BLOCK_DIM_Y, int BLOCK_DIM_Z>
struct BlockHistogramSort
{
/// Constants
Expand Down Expand Up @@ -246,16 +237,9 @@ struct BlockHistogramSort
};
} // namespace detail

template <typename T,
int BLOCK_DIM_X,
int ITEMS_PER_THREAD,
int BINS,
int BLOCK_DIM_Y,
int BLOCK_DIM_Z,
int LEGACY_PTX_ARCH = 0>
template <typename T, int BLOCK_DIM_X, int ITEMS_PER_THREAD, int BINS, int BLOCK_DIM_Y, int BLOCK_DIM_Z>
using BlockHistogramSort CCCL_DEPRECATED_BECAUSE(
"This class is considered an implementation detail and the public interface will be "
"removed.") =
detail::BlockHistogramSort<T, BLOCK_DIM_X, ITEMS_PER_THREAD, BINS, BLOCK_DIM_Y, BLOCK_DIM_Z, LEGACY_PTX_ARCH>;
"removed.") = detail::BlockHistogramSort<T, BLOCK_DIM_X, ITEMS_PER_THREAD, BINS, BLOCK_DIM_Y, BLOCK_DIM_Z>;

CUB_NAMESPACE_END
9 changes: 3 additions & 6 deletions cub/cub/block/specializations/block_reduce_raking.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -77,11 +77,8 @@ namespace detail
*
* @tparam BLOCK_DIM_Z
* The thread block length in threads along the Z dimension
*
* @tparam LEGACY_PTX_ARCH
* The PTX compute capability for which to to specialize this collective
*/
template <typename T, int BLOCK_DIM_X, int BLOCK_DIM_Y, int BLOCK_DIM_Z, int LEGACY_PTX_ARCH = 0>
template <typename T, int BLOCK_DIM_X, int BLOCK_DIM_Y, int BLOCK_DIM_Z>
struct BlockReduceRaking
{
/// Constants
Expand Down Expand Up @@ -260,9 +257,9 @@ struct BlockReduceRaking
};
} // namespace detail

template <typename T, int BLOCK_DIM_X, int BLOCK_DIM_Y, int BLOCK_DIM_Z, int LEGACY_PTX_ARCH = 0>
template <typename T, int BLOCK_DIM_X, int BLOCK_DIM_Y, int BLOCK_DIM_Z>
using BlockReduceRaking CCCL_DEPRECATED_BECAUSE(
"This class is considered an implementation detail and the public interface will be "
"removed.") = detail::BlockReduceRaking<T, BLOCK_DIM_X, BLOCK_DIM_Y, BLOCK_DIM_Z, LEGACY_PTX_ARCH>;
"removed.") = detail::BlockReduceRaking<T, BLOCK_DIM_X, BLOCK_DIM_Y, BLOCK_DIM_Z>;

CUB_NAMESPACE_END
Loading
Loading