Skip to content

Commit

Permalink
Default i64/f64,i64,true tuning due regressions
Browse files Browse the repository at this point in the history
  • Loading branch information
gonidelis committed Feb 5, 2025
1 parent 22be949 commit 58344a6
Showing 1 changed file with 18 additions and 17 deletions.
35 changes: 18 additions & 17 deletions cub/cub/device/dispatch/tuning/tuning_select_if.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -988,23 +988,24 @@ struct sm100_tuning<Input,
using delay_constructor = exponential_backon_jitter_window_constructor_t<1712, 825>;
};

template <class Input>
struct sm100_tuning<Input,
flagged::no,
keep_rejects::yes,
offset_size::_8,
primitive::yes,
input_size::_8,
may_alias::no,
distinct_partitions::yes>
{
// trp_1.ld_0.ipt_14.tpb_512.ns_1468.dcid_7.l2w_820 1.111830 1.011070 1.119481 1.245868
static constexpr int nominal_4b_items = 14;
static constexpr int threads = 512;
static constexpr BlockLoadAlgorithm load_algorithm = BLOCK_LOAD_WARP_TRANSPOSE;
static constexpr CacheLoadModifier load_modifier = LOAD_DEFAULT;
using delay_constructor = exponential_backon_jitter_constructor_t<1468, 820>;
};
// todo(gonidelis): Tuning Regresses for large input sizes. Find better tuning.
// template <class Input>
// struct sm100_tuning<Input,
// flagged::no,
// keep_rejects::yes,
// offset_size::_8,
// primitive::yes,
// input_size::_8,
// may_alias::no,
// distinct_partitions::yes>
// {
// // trp_1.ld_0.ipt_14.tpb_512.ns_1468.dcid_7.l2w_820 1.111830 1.011070 1.119481 1.245868
// static constexpr int nominal_4b_items = 14;
// static constexpr int threads = 512;
// static constexpr BlockLoadAlgorithm load_algorithm = BLOCK_LOAD_WARP_TRANSPOSE;
// static constexpr CacheLoadModifier load_modifier = LOAD_DEFAULT;
// using delay_constructor = exponential_backon_jitter_constructor_t<1468, 820>;
// };

template <class Input>
struct sm100_tuning<Input,
Expand Down

0 comments on commit 58344a6

Please sign in to comment.