Skip to content

Commit

Permalink
Remove all code paths and policies for SM37 and below (#3466)
Browse files Browse the repository at this point in the history
  • Loading branch information
fbusato authored Jan 29, 2025
1 parent 09b1200 commit ced506d
Show file tree
Hide file tree
Showing 39 changed files with 86 additions and 456 deletions.
4 changes: 2 additions & 2 deletions cub/benchmarks/bench/adjacent_difference/subtract_left.cu
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@
#if !TUNE_BASE
struct policy_hub_t
{
struct Policy350 : cub::ChainedPolicy<350, Policy350, Policy350>
struct Policy500 : cub::ChainedPolicy<500, Policy500, Policy500>
{
using AdjacentDifferencePolicy =
cub::AgentAdjacentDifferencePolicy<TUNE_THREADS_PER_BLOCK,
Expand All @@ -45,7 +45,7 @@ struct policy_hub_t
cub::BLOCK_STORE_WARP_TRANSPOSE>;
};

using MaxPolicy = Policy350;
using MaxPolicy = Policy500;
};
#endif // !TUNE_BASE

Expand Down
2 changes: 1 addition & 1 deletion cub/benchmarks/bench/copy/memcpy.cu
Original file line number Diff line number Diff line change
Expand Up @@ -118,7 +118,7 @@ using block_delay_constructor_t =

struct policy_hub_t
{
struct policy_t : cub::ChainedPolicy<350, policy_t, policy_t>
struct policy_t : cub::ChainedPolicy<500, policy_t, policy_t>
{
using AgentSmallBufferPolicyT = cub::detail::AgentBatchMemcpyPolicy<
TUNE_THREADS,
Expand Down
2 changes: 1 addition & 1 deletion cub/benchmarks/bench/histogram/histogram_common.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@ constexpr cub::BlockHistogramMemoryPreference MEM_PREFERENCE = cub::BLEND;
template <typename SampleT, int NUM_CHANNELS, int NUM_ACTIVE_CHANNELS>
struct policy_hub_t
{
struct policy_t : cub::ChainedPolicy<350, policy_t, policy_t>
struct policy_t : cub::ChainedPolicy<500, policy_t, policy_t>
{
static constexpr cub::BlockLoadAlgorithm load_algorithm =
(TUNE_LOAD_ALGORITHM == cub::BLOCK_LOAD_STRIPED)
Expand Down
2 changes: 1 addition & 1 deletion cub/benchmarks/bench/partition/three_way.cu
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@
template <typename InputT>
struct policy_hub_t
{
struct policy_t : cub::ChainedPolicy<350, policy_t, policy_t>
struct policy_t : cub::ChainedPolicy<500, policy_t, policy_t>
{
using ThreeWayPartitionPolicy = //
cub::AgentThreeWayPartitionPolicy<TUNE_THREADS_PER_BLOCK,
Expand Down
4 changes: 2 additions & 2 deletions cub/benchmarks/bench/reduce/by_key.cu
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@

struct reduce_by_key_policy_hub
{
struct Policy350 : cub::ChainedPolicy<350, Policy350, Policy350>
struct Policy500 : cub::ChainedPolicy<500, Policy500, Policy500>
{
using ReduceByKeyPolicyT =
cub::AgentReduceByKeyPolicy<TUNE_THREADS,
Expand All @@ -64,7 +64,7 @@ struct reduce_by_key_policy_hub
delay_constructor_t>;
};

using MaxPolicy = Policy350;
using MaxPolicy = Policy500;
};
#endif // !TUNE_BASE

Expand Down
4 changes: 2 additions & 2 deletions cub/benchmarks/bench/run_length_encode/encode.cu
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@

struct reduce_by_key_policy_hub
{
struct Policy350 : cub::ChainedPolicy<350, Policy350, Policy350>
struct Policy500 : cub::ChainedPolicy<500, Policy500, Policy500>
{
using ReduceByKeyPolicyT =
cub::AgentReduceByKeyPolicy<TUNE_THREADS,
Expand All @@ -66,7 +66,7 @@ struct reduce_by_key_policy_hub
delay_constructor_t>;
};

using MaxPolicy = Policy350;
using MaxPolicy = Policy500;
};
#endif // !TUNE_BASE

Expand Down
4 changes: 2 additions & 2 deletions cub/benchmarks/bench/run_length_encode/non_trivial_runs.cu
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@

struct device_rle_policy_hub
{
struct Policy350 : cub::ChainedPolicy<350, Policy350, Policy350>
struct Policy500 : cub::ChainedPolicy<500, Policy500, Policy500>
{
using RleSweepPolicyT =
cub::AgentRlePolicy<TUNE_THREADS,
Expand All @@ -66,7 +66,7 @@ struct device_rle_policy_hub
delay_constructor_t>;
};

using MaxPolicy = Policy350;
using MaxPolicy = Policy500;
};
#endif // !TUNE_BASE

Expand Down
4 changes: 2 additions & 2 deletions cub/benchmarks/bench/segmented_sort/keys.cu
Original file line number Diff line number Diff line change
Expand Up @@ -109,7 +109,7 @@ struct device_seg_sort_policy_hub
{
using DominantT = KeyT;

struct Policy350 : cub::ChainedPolicy<350, Policy350, Policy350>
struct Policy500 : cub::ChainedPolicy<500, Policy500, Policy500>
{
static constexpr int BLOCK_THREADS = TUNE_THREADS;
static constexpr int RADIX_BITS = TUNE_RADIX_BITS;
Expand Down Expand Up @@ -143,7 +143,7 @@ struct device_seg_sort_policy_hub
TUNE_M_LOAD_MODIFIER>>;
};

using MaxPolicy = Policy350;
using MaxPolicy = Policy500;
};
#endif // !TUNE_BASE

Expand Down
4 changes: 2 additions & 2 deletions cub/benchmarks/bench/select/unique_by_key.cu
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@

struct policy_hub
{
struct Policy350 : cub::ChainedPolicy<350, Policy350, Policy350>
struct Policy500 : cub::ChainedPolicy<500, Policy500, Policy500>
{
using UniqueByKeyPolicyT =
cub::AgentUniqueByKeyPolicy<TUNE_THREADS,
Expand All @@ -64,7 +64,7 @@ struct policy_hub
delay_constructor_t>;
};

using MaxPolicy = Policy350;
using MaxPolicy = Policy500;
};
#endif // !TUNE_BASE

Expand Down
2 changes: 1 addition & 1 deletion cub/benchmarks/bench/transform/common.h
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ using policy_hub_t = cub::detail::transform::policy_hub<false, ::cuda::std::tupl
#else
struct policy_hub_t
{
struct max_policy : cub::ChainedPolicy<350, max_policy, max_policy>
struct max_policy : cub::ChainedPolicy<500, max_policy, max_policy>
{
static constexpr int min_bif = cub::detail::transform::arch_to_min_bytes_in_flight(__CUDA_ARCH_LIST__);
static constexpr auto algorithm = static_cast<cub::detail::transform::Algorithm>(TUNE_ALGORITHM);
Expand Down
50 changes: 3 additions & 47 deletions cub/cub/device/dispatch/dispatch_spmv_orig.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -383,40 +383,6 @@ struct CCCL_DEPRECATED_BECAUSE("Use the cuSPARSE library instead") DispatchSpmv
// Tuning policies
//---------------------------------------------------------------------

/// SM35
struct Policy350
{
using SpmvPolicyT =
AgentSpmvPolicy<(sizeof(ValueT) > 4) ? 96 : 128,
(sizeof(ValueT) > 4) ? 4 : 7,
LOAD_LDG,
LOAD_CA,
LOAD_LDG,
LOAD_LDG,
LOAD_LDG,
(sizeof(ValueT) > 4) ? true : false,
BLOCK_SCAN_WARP_SCANS>;

using SegmentFixupPolicyT = AgentSegmentFixupPolicy<128, 3, BLOCK_LOAD_VECTORIZE, LOAD_LDG, BLOCK_SCAN_WARP_SCANS>;
};

/// SM37
struct Policy370
{
using SpmvPolicyT =
AgentSpmvPolicy<(sizeof(ValueT) > 4) ? 128 : 128,
(sizeof(ValueT) > 4) ? 9 : 14,
LOAD_LDG,
LOAD_CA,
LOAD_LDG,
LOAD_LDG,
LOAD_LDG,
false,
BLOCK_SCAN_WARP_SCANS>;

using SegmentFixupPolicyT = AgentSegmentFixupPolicy<128, 3, BLOCK_LOAD_VECTORIZE, LOAD_LDG, BLOCK_SCAN_WARP_SCANS>;
};

/// SM50
struct Policy500
{
Expand Down Expand Up @@ -459,15 +425,8 @@ struct CCCL_DEPRECATED_BECAUSE("Use the cuSPARSE library instead") DispatchSpmv
#if (CUB_PTX_ARCH >= 600)
using PtxPolicy = Policy600;

#elif (CUB_PTX_ARCH >= 500)
using PtxPolicy = Policy500;

#elif (CUB_PTX_ARCH >= 370)
using PtxPolicy = Policy370;

#else
using PtxPolicy = Policy350;

using PtxPolicy = Policy500;
#endif

// "Opaque" policies (whose parameterizations aren't reflected in the type signature)
Expand Down Expand Up @@ -502,12 +461,9 @@ struct CCCL_DEPRECATED_BECAUSE("Use the cuSPARSE library instead") DispatchSpmv
} else if (ptx_version >= 500) {
spmv_config.template Init<typename Policy500::SpmvPolicyT>();
segment_fixup_config.template Init<typename Policy500::SegmentFixupPolicyT>();
} else if (ptx_version >= 370) {
spmv_config.template Init<typename Policy370::SpmvPolicyT>();
segment_fixup_config.template Init<typename Policy370::SegmentFixupPolicyT>();
} else {
spmv_config.template Init<typename Policy350::SpmvPolicyT>();
segment_fixup_config.template Init<typename Policy350::SegmentFixupPolicyT>();
spmv_config.template Init<typename Policy500::SpmvPolicyT>();
segment_fixup_config.template Init<typename Policy500::SegmentFixupPolicyT>();
}));
}

Expand Down
14 changes: 2 additions & 12 deletions cub/cub/device/dispatch/tuning/tuning_adjacent_difference.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -52,17 +52,7 @@ struct policy_hub
{
using ValueT = typename std::iterator_traits<InputIteratorT>::value_type;

struct Policy300 : ChainedPolicy<300, Policy300, Policy300>
{
using AdjacentDifferencePolicy =
AgentAdjacentDifferencePolicy<128,
Nominal8BItemsToItems<ValueT>(7),
BLOCK_LOAD_WARP_TRANSPOSE,
LOAD_DEFAULT,
BLOCK_STORE_WARP_TRANSPOSE>;
};

struct Policy350 : ChainedPolicy<350, Policy350, Policy300>
struct Policy500 : ChainedPolicy<500, Policy500, Policy500>
{
using AdjacentDifferencePolicy =
AgentAdjacentDifferencePolicy<128,
Expand All @@ -72,7 +62,7 @@ struct policy_hub
BLOCK_STORE_WARP_TRANSPOSE>;
};

using MaxPolicy = Policy350;
using MaxPolicy = Policy500;
};
} // namespace adjacent_difference
} // namespace detail
Expand Down
6 changes: 3 additions & 3 deletions cub/cub/device/dispatch/tuning/tuning_batch_memcpy.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -75,8 +75,8 @@ struct policy_hub
using buff_delay_constructor_t = detail::default_delay_constructor_t<BufferOffsetT>;
using block_delay_constructor_t = detail::default_delay_constructor_t<BlockOffsetT>;

/// SM35
struct Policy350 : ChainedPolicy<350, Policy350, Policy350>
/// SM50
struct Policy500 : ChainedPolicy<500, Policy500, Policy500>
{
static constexpr bool PREFER_POW2_BITS = true;
using AgentSmallBufferPolicyT = AgentBatchMemcpyPolicy<
Expand All @@ -95,7 +95,7 @@ struct policy_hub
};

/// SM70
struct Policy700 : ChainedPolicy<700, Policy700, Policy350>
struct Policy700 : ChainedPolicy<700, Policy700, Policy500>
{
static constexpr bool PREFER_POW2_BITS = false;
using AgentSmallBufferPolicyT = AgentBatchMemcpyPolicy<
Expand Down
4 changes: 2 additions & 2 deletions cub/cub/device/dispatch/tuning/tuning_for.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -49,12 +49,12 @@ namespace for_each

struct policy_hub_t
{
struct policy_350_t : ChainedPolicy<350, policy_350_t, policy_350_t>
struct policy_500_t : ChainedPolicy<500, policy_500_t, policy_500_t>
{
using for_policy_t = policy_t<256, 2>;
};

using MaxPolicy = policy_350_t;
using MaxPolicy = policy_500_t;
};

} // namespace for_each
Expand Down
9 changes: 1 addition & 8 deletions cub/cub/device/dispatch/tuning/tuning_histogram.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -136,15 +136,8 @@ struct policy_hub
return (::cuda::std::max)(nominalItemsPerThread / NumActiveChannels / v_scale, 1);
}

// SM35
struct Policy350 : ChainedPolicy<350, Policy350, Policy350>
{
// TODO This might be worth it to separate usual histogram and the multi one
using AgentHistogramPolicyT = AgentHistogramPolicy<128, t_scale(8), BLOCK_LOAD_DIRECT, LOAD_LDG, true, BLEND, true>;
};

// SM50
struct Policy500 : ChainedPolicy<500, Policy500, Policy350>
struct Policy500 : ChainedPolicy<500, Policy500, Policy500>
{
// TODO This might be worth it to separate usual histogram and the multi one
using AgentHistogramPolicyT =
Expand Down
14 changes: 2 additions & 12 deletions cub/cub/device/dispatch/tuning/tuning_merge.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -53,17 +53,7 @@ struct policy_hub

using tune_type = char[has_values ? sizeof(KeyT) + sizeof(ValueT) : sizeof(KeyT)];

struct policy300 : ChainedPolicy<300, policy300, policy300>
{
using merge_policy =
agent_policy_t<128,
Nominal4BItemsToItems<tune_type>(7),
BLOCK_LOAD_WARP_TRANSPOSE,
LOAD_DEFAULT,
BLOCK_STORE_WARP_TRANSPOSE>;
};

struct policy350 : ChainedPolicy<350, policy350, policy300>
struct policy500 : ChainedPolicy<500, policy500, policy500>
{
using merge_policy =
agent_policy_t<256,
Expand All @@ -73,7 +63,7 @@ struct policy_hub
BLOCK_STORE_WARP_TRANSPOSE>;
};

struct policy520 : ChainedPolicy<520, policy520, policy350>
struct policy520 : ChainedPolicy<520, policy520, policy500>
{
using merge_policy =
agent_policy_t<512,
Expand Down
6 changes: 3 additions & 3 deletions cub/cub/device/dispatch/tuning/tuning_merge_sort.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ struct policy_hub
{
using KeyT = value_t<KeyIteratorT>;

struct Policy350 : ChainedPolicy<350, Policy350, Policy350>
struct Policy500 : ChainedPolicy<500, Policy500, Policy500>
{
using MergeSortPolicy =
AgentMergeSortPolicy<256,
Expand All @@ -63,9 +63,9 @@ struct policy_hub

// NVBug 3384810
#if defined(_NVHPC_CUDA)
using Policy520 = Policy350;
using Policy520 = Policy500;
#else
struct Policy520 : ChainedPolicy<520, Policy520, Policy350>
struct Policy520 : ChainedPolicy<520, Policy520, Policy500>
{
using MergeSortPolicy =
AgentMergeSortPolicy<512,
Expand Down
Loading

0 comments on commit ced506d

Please sign in to comment.