From 36e27f7c1074010eefaab64d387ff6663569e065 Mon Sep 17 00:00:00 2001 From: Georgii Evtushenko Date: Fri, 3 Jan 2025 10:39:03 -0800 Subject: [PATCH] Fix RLE tuning (#3239) --- .../tuning/tuning_run_length_encode.cuh | 20 ++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) diff --git a/cub/cub/device/dispatch/tuning/tuning_run_length_encode.cuh b/cub/cub/device/dispatch/tuning/tuning_run_length_encode.cuh index 783da6820d5..33771f6882f 100644 --- a/cub/cub/device/dispatch/tuning/tuning_run_length_encode.cuh +++ b/cub/cub/device/dispatch/tuning/tuning_run_length_encode.cuh @@ -239,6 +239,7 @@ struct policy_hub static constexpr int max_input_bytes = static_cast(::cuda::std::max(sizeof(KeyT), sizeof(LengthT))); static constexpr int combined_input_bytes = sizeof(KeyT) + sizeof(LengthT); + template struct DefaultPolicy { static constexpr int nominal_4B_items_per_thread = 6; @@ -252,14 +253,14 @@ struct policy_hub AgentReduceByKeyPolicy<128, items, BLOCK_LOAD_DIRECT, - LOAD_LDG, + LoadModifier, BLOCK_SCAN_WARP_SCANS, default_reduce_by_key_delay_constructor_t>; }; // SM35 struct Policy350 - : DefaultPolicy + : DefaultPolicy , ChainedPolicy<350, Policy350, Policy350> {}; @@ -273,7 +274,7 @@ struct policy_hub BLOCK_SCAN_WARP_SCANS, typename Tuning::delay_constructor>; template - static auto select_agent_policy(long) -> typename DefaultPolicy::ReduceByKeyPolicyT; + static auto select_agent_policy(long) -> typename DefaultPolicy::ReduceByKeyPolicyT; // SM80 struct Policy800 : ChainedPolicy<800, Policy800, Policy350> @@ -283,7 +284,7 @@ struct policy_hub // SM86 struct Policy860 - : DefaultPolicy + : DefaultPolicy , ChainedPolicy<860, Policy860, Policy800> {}; @@ -433,7 +434,7 @@ struct sm90_tuning struct policy_hub { - template + template struct DefaultPolicy { static constexpr int nominal_4B_items_per_thread = 15; @@ -444,7 +445,7 @@ struct policy_hub AgentRlePolicy<96, ITEMS_PER_THREAD, BlockLoad, - LOAD_LDG, + LoadModifier, true, BLOCK_SCAN_WARP_SCANS, default_reduce_by_key_delay_constructor_t>; @@ -452,7 +453,7 @@ struct policy_hub // SM35 struct Policy350 - : DefaultPolicy // TODO(bgruber): I think we want `LengthT` instead of `int` + : DefaultPolicy // TODO(bgruber): I think we want `LengthT` instead of `int` , ChainedPolicy<350, Policy350, Policy350> {}; @@ -467,7 +468,8 @@ struct policy_hub BLOCK_SCAN_WARP_SCANS, typename Tuning::delay_constructor>; template - static auto select_agent_policy(long) -> typename DefaultPolicy::RleSweepPolicyT; + static auto select_agent_policy(long) -> + typename DefaultPolicy::RleSweepPolicyT; // SM80 struct Policy800 : ChainedPolicy<800, Policy800, Policy350> @@ -477,7 +479,7 @@ struct policy_hub // SM86 struct Policy860 - : DefaultPolicy // TODO(bgruber): I think we want `LengthT` instead of `int` + : DefaultPolicy // TODO(bgruber): I think we want `LengthT` instead of `int` , ChainedPolicy<860, Policy860, Policy800> {};