Skip to content

Commit

Permalink
Remove i128 tuning overload and default back I64,I32,false for partit…
Browse files Browse the repository at this point in the history
…ion.if
  • Loading branch information
gonidelis committed Feb 6, 2025
1 parent c62efcb commit eaa2130
Showing 1 changed file with 17 additions and 33 deletions.
50 changes: 17 additions & 33 deletions cub/cub/device/dispatch/tuning/tuning_select_if.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -1062,23 +1062,23 @@ struct sm100_tuning<Input,
using delay_constructor = exponential_backon_jitter_window_constructor_t<724, 970>;
};

template <class Input>
struct sm100_tuning<Input,
flagged::no,
keep_rejects::yes,
offset_size::_4,
primitive::yes,
input_size::_8,
may_alias::no,
distinct_partitions::no>
{
// trp_1.ld_0.ipt_23.tpb_416.ns_1608.dcid_2.l2w_560 1.099752 0.977393 1.106477 1.259336
static constexpr int nominal_4b_items = 23;
static constexpr int threads = 416;
static constexpr BlockLoadAlgorithm load_algorithm = BLOCK_LOAD_WARP_TRANSPOSE;
static constexpr CacheLoadModifier load_modifier = LOAD_DEFAULT;
using delay_constructor = exponential_backoff_constructor_t<1608, 560>;
};
// template <class Input>
// struct sm100_tuning<Input,
// flagged::no,
// keep_rejects::yes,
// offset_size::_4,
// primitive::yes,
// input_size::_8,
// may_alias::no,
// distinct_partitions::no>
// {
// // trp_1.ld_0.ipt_23.tpb_416.ns_1608.dcid_2.l2w_560 1.099752 0.977393 1.106477 1.259336
// static constexpr int nominal_4b_items = 23;
// static constexpr int threads = 416;
// static constexpr BlockLoadAlgorithm load_algorithm = BLOCK_LOAD_WARP_TRANSPOSE;
// static constexpr CacheLoadModifier load_modifier = LOAD_DEFAULT;
// using delay_constructor = exponential_backoff_constructor_t<1608, 560>;
// };

template <class Input>
struct sm100_tuning<Input,
Expand Down Expand Up @@ -1152,22 +1152,6 @@ struct sm100_tuning<Input,
using delay_constructor = exponential_backoff_constructor_t<0, 1200>;
};

#if CUB_IS_INT128_ENABLED
// because we introduced cases for when offset is I64 this leads to regressions if not defaulted explicitly
template <distinct_partitions DistinctPartitions>
struct sm100_tuning<__int128_t,
flagged::no,
keep_rejects::yes,
offset_size::_8,
primitive::no,
input_size::_16,
may_alias::no,
DistinctPartitions>
: sm90_tuning<__int128_t, flagged::no, keep_rejects::yes, offset_size::_4, primitive::no, input_size::_16>
// ^^^^^ this base is wrong and leads to regressions ^^^^^
{};
#endif // CUB_IS_INT128_ENABLED

// partition::flagged
template <class Input>
struct sm100_tuning<Input,
Expand Down

0 comments on commit eaa2130

Please sign in to comment.