From 46487c50b4210c540d6f2ee9d2f9bdad4ff297c3 Mon Sep 17 00:00:00 2001 From: Giannis Gonidelis Date: Mon, 27 Jan 2025 11:06:29 -0800 Subject: [PATCH] Add b200 policies for reduce.min --- .../device/dispatch/tuning/tuning_reduce.cuh | 123 ++++++++++++++++++ 1 file changed, 123 insertions(+) diff --git a/cub/cub/device/dispatch/tuning/tuning_reduce.cuh b/cub/cub/device/dispatch/tuning/tuning_reduce.cuh index 2c94440ac80..94c1b7127d6 100644 --- a/cub/cub/device/dispatch/tuning/tuning_reduce.cuh +++ b/cub/cub/device/dispatch/tuning/tuning_reduce.cuh @@ -247,6 +247,128 @@ struct sm100_tuning +struct sm100_tuning +{ + // ipt_16.tpb_128.ipv_2 1.021369 0.998557 1.019009 1.077479 + static constexpr int items = 16; + static constexpr int threads = 128; + static constexpr int items_per_vec_load = 2; +}; + +template +struct sm100_tuning +{ + // ipt_16.tpb_256.ipv_2 1.038750 1.0 1.034382 1.117647 + static constexpr int items = 16; + static constexpr int threads = 256; + static constexpr int items_per_vec_load = 2; +}; + +template +struct sm100_tuning +{ + // ipt_12.tpb_448.ipv_1 1.037834 1.000000 1.036212 1.144847 + static constexpr int items = 12; + static constexpr int threads = 448; + static constexpr int items_per_vec_load = 1; +}; + +template +struct sm100_tuning +{ + // ipt_15.tpb_512.ipv_2 1.020165 1.0 1.018162 1.058036 + static constexpr int items = 15; + static constexpr int threads = 512; + static constexpr int items_per_vec_load = 2; +}; + +template +struct sm100_tuning +{ + // ipt_16.tpb_320.ipv_2 1.009217 1.0 1.008197 1.032787 + static constexpr int items = 16; + static constexpr int threads = 320; + static constexpr int items_per_vec_load = 2; +}; + +template +struct sm100_tuning +{ + // ipt_18.tpb_448.ipv_2 1.032745 0.966480 1.032123 1.162011 + static constexpr int items = 18; + static constexpr int threads = 448; + static constexpr int items_per_vec_load = 2; +}; + +template +struct sm100_tuning +{ + // ipt_15.tpb_512.ipv_2 1.019901 1.0 1.017648 1.058036 + static constexpr int items = 15; + static constexpr int threads = 512; + static constexpr int items_per_vec_load = 2; +}; + +// same as base, so fall back to Policy600 +// template +// struct sm100_tuning {}; + +template +struct sm100_tuning +{ + // ipt_16.tpb_224.ipv_2 1.031922 0.997989 1.028396 1.115596 + static constexpr int items = 16; + static constexpr int threads = 224; + static constexpr int items_per_vec_load = 2; +}; + +template +struct sm100_tuning +{ + // ipt_14.tpb_416.ipv_1 1.047490 1.000000 1.045455 1.181818 + static constexpr int items = 14; + static constexpr int threads = 416; + static constexpr int items_per_vec_load = 1; +}; + +template +struct sm100_tuning +{ + // ipt_21.tpb_384.ipv_2 1.021487 1.0 1.019033 1.057143 + static constexpr int items = 21; + static constexpr int threads = 384; + static constexpr int items_per_vec_load = 2; +}; + +template +struct sm100_tuning +{ + // ipt_17.tpb_512.ipv_2 1.003412 0.980713 1.003111 1.031730 + static constexpr int items = 17; + static constexpr int threads = 512; + static constexpr int items_per_vec_load = 2; +}; + +template +struct sm100_tuning +{ + // ipt_18.tpb_448.ipv_1 1.023427 1.000000 1.022287 1.083333 + static constexpr int items = 18; + static constexpr int threads = 448; + static constexpr int items_per_vec_load = 1; +}; + +template +struct sm100_tuning +{ + // ipt_16.tpb_320.ipv_2 1.018602 1.0 1.016518 1.059821 + static constexpr int items = 16; + static constexpr int threads = 320; + static constexpr int items_per_vec_load = 1; +}; + template struct policy_hub { @@ -299,6 +421,7 @@ struct policy_hub Tuning::items_per_vec_load, BLOCK_REDUCE_WARP_REDUCTIONS, LOAD_LDG>; + // use Policy600 as DefaultPolicy template static auto select_agent_policy(long) -> typename Policy600::ReducePolicy;