diff --git a/cub/cub/device/device_partition.cuh b/cub/cub/device/device_partition.cuh
index 621bf2b9070..020456fd81d 100644
--- a/cub/cub/device/device_partition.cuh
+++ b/cub/cub/device/device_partition.cuh
@@ -436,7 +436,7 @@ private:
             typename OffsetT,
             typename BeginOffsetIteratorT,
             typename EndOffsetIteratorT,
-            typename SelectedPolicy>
+            typename PolicyHub>
   friend class DispatchSegmentedSort;
 
   // Internal version without NVTX range
diff --git a/cub/cub/device/dispatch/dispatch_adjacent_difference.cuh b/cub/cub/device/dispatch/dispatch_adjacent_difference.cuh
index ae0c74d45fc..d4ca673ff93 100644
--- a/cub/cub/device/dispatch/dispatch_adjacent_difference.cuh
+++ b/cub/cub/device/dispatch/dispatch_adjacent_difference.cuh
@@ -108,8 +108,8 @@ template <typename InputIteratorT,
           typename OffsetT,
           bool MayAlias,
           bool ReadLeft,
-          typename SelectedPolicy = detail::adjacent_difference::policy_hub<InputIteratorT, MayAlias>>
-struct DispatchAdjacentDifference : public SelectedPolicy
+          typename PolicyHub = detail::adjacent_difference::policy_hub<InputIteratorT, MayAlias>>
+struct DispatchAdjacentDifference
 {
   using InputT = typename std::iterator_traits<InputIteratorT>::value_type;
 
@@ -167,8 +167,6 @@ struct DispatchAdjacentDifference : public SelectedPolicy
   {
     using AdjacentDifferencePolicyT = typename ActivePolicyT::AdjacentDifferencePolicy;
 
-    using MaxPolicyT = typename DispatchAdjacentDifference::MaxPolicy;
-
     cudaError error = cudaSuccess;
 
     do
@@ -256,7 +254,7 @@ struct DispatchAdjacentDifference : public SelectedPolicy
       THRUST_NS_QUALIFIER::cuda_cub::launcher::triple_chevron(
         num_tiles, AdjacentDifferencePolicyT::BLOCK_THREADS, 0, stream)
         .doit(DeviceAdjacentDifferenceDifferenceKernel<
-                MaxPolicyT,
+                typename PolicyHub::MaxPolicy,
                 InputIteratorT,
                 OutputIteratorT,
                 DifferenceOpT,
@@ -297,8 +295,6 @@ struct DispatchAdjacentDifference : public SelectedPolicy
     DifferenceOpT difference_op,
     cudaStream_t stream)
   {
-    using MaxPolicyT = typename DispatchAdjacentDifference::MaxPolicy;
-
     cudaError error = cudaSuccess;
     do
     {
@@ -315,7 +311,7 @@ struct DispatchAdjacentDifference : public SelectedPolicy
         d_temp_storage, temp_storage_bytes, d_input, d_output, num_items, difference_op, stream);
 
       // Dispatch to chained policy
-      error = CubDebug(MaxPolicyT::Invoke(ptx_version, dispatch));
+      error = CubDebug(PolicyHub::MaxPolicy::Invoke(ptx_version, dispatch));
       if (cudaSuccess != error)
       {
         break;
diff --git a/cub/cub/device/dispatch/dispatch_batch_memcpy.cuh b/cub/cub/device/dispatch/dispatch_batch_memcpy.cuh
index f8415386158..43eccd08a26 100644
--- a/cub/cub/device/dispatch/dispatch_batch_memcpy.cuh
+++ b/cub/cub/device/dispatch/dispatch_batch_memcpy.cuh
@@ -285,9 +285,9 @@ template <typename InputBufferIt,
           typename BufferSizeIteratorT,
           typename BufferOffsetT,
           typename BlockOffsetT,
-          typename SelectedPolicy = batch_memcpy::policy_hub<BufferOffsetT, BlockOffsetT>,
-          bool IsMemcpy           = true>
-struct DispatchBatchMemcpy : SelectedPolicy
+          typename PolicyHub = batch_memcpy::policy_hub<BufferOffsetT, BlockOffsetT>,
+          bool IsMemcpy      = true>
+struct DispatchBatchMemcpy
 {
   //------------------------------------------------------------------------------
   // TYPE ALIASES
@@ -345,8 +345,6 @@ struct DispatchBatchMemcpy : SelectedPolicy
   template <typename ActivePolicyT>
   CUB_RUNTIME_FUNCTION _CCCL_FORCEINLINE cudaError_t Invoke()
   {
-    using MaxPolicyT = typename DispatchBatchMemcpy::MaxPolicy;
-
     // Single-pass prefix scan tile states for the prefix-sum over the number of block-level buffers
     using BLevBufferOffsetTileState = cub::ScanTileState<BufferOffsetT>;
 
@@ -466,7 +464,7 @@ struct DispatchBatchMemcpy : SelectedPolicy
     auto init_scan_states_kernel =
       InitTileStateKernel<BLevBufferOffsetTileState, BLevBlockOffsetTileState, BlockOffsetT>;
     auto batch_memcpy_non_blev_kernel = BatchMemcpyKernel<
-      MaxPolicyT,
+      typename PolicyHub::MaxPolicy,
       InputBufferIt,
       OutputBufferIt,
       BufferSizeIteratorT,
@@ -481,7 +479,7 @@ struct DispatchBatchMemcpy : SelectedPolicy
       IsMemcpy>;
 
     auto multi_block_memcpy_kernel = MultiBlockBatchMemcpyKernel<
-      MaxPolicyT,
+      typename PolicyHub::MaxPolicy,
       BufferOffsetT,
       BlevBufferSrcsOutItT,
       BlevBufferDstsOutItT,
@@ -651,8 +649,6 @@ struct DispatchBatchMemcpy : SelectedPolicy
     BufferOffsetT num_buffers,
     cudaStream_t stream)
   {
-    using MaxPolicyT = typename DispatchBatchMemcpy::MaxPolicy;
-
     cudaError_t error = cudaSuccess;
 
     // Get PTX version
@@ -668,7 +664,7 @@ struct DispatchBatchMemcpy : SelectedPolicy
       d_temp_storage, temp_storage_bytes, input_buffer_it, output_buffer_it, buffer_sizes, num_buffers, stream);
 
     // Dispatch to chained policy
-    error = CubDebug(MaxPolicyT::Invoke(ptx_version, dispatch));
+    error = CubDebug(PolicyHub::MaxPolicy::Invoke(ptx_version, dispatch));
     if (cudaSuccess != error)
     {
       return error;
diff --git a/cub/cub/device/dispatch/dispatch_for.cuh b/cub/cub/device/dispatch/dispatch_for.cuh
index de0189490fb..7ba478e3c00 100644
--- a/cub/cub/device/dispatch/dispatch_for.cuh
+++ b/cub/cub/device/dispatch/dispatch_for.cuh
@@ -59,7 +59,7 @@ namespace for_each
 
 // The dispatch layer is in the detail namespace until we figure out tuning API
 template <class OffsetT, class OpT, class PolicyHubT = policy_hub_t>
-struct dispatch_t : PolicyHubT
+struct dispatch_t
 {
   OffsetT num_items;
   OpT op;
@@ -75,7 +75,7 @@ struct dispatch_t : PolicyHubT
   CUB_RUNTIME_FUNCTION
   _CCCL_FORCEINLINE cudaError_t Invoke(::cuda::std::false_type /* block size is not known at compile time */)
   {
-    using max_policy_t = typename dispatch_t::MaxPolicy;
+    using max_policy_t = typename PolicyHubT::MaxPolicy;
 
     if (num_items == 0)
     {
@@ -132,8 +132,6 @@ struct dispatch_t : PolicyHubT
   CUB_RUNTIME_FUNCTION
   _CCCL_FORCEINLINE cudaError_t Invoke(::cuda::std::true_type /* block size is known at compile time */)
   {
-    using max_policy_t = typename dispatch_t::MaxPolicy;
-
     if (num_items == 0)
     {
       return cudaSuccess;
@@ -157,7 +155,7 @@ struct dispatch_t : PolicyHubT
 
     error = THRUST_NS_QUALIFIER::cuda_cub::launcher::triple_chevron(
               static_cast<unsigned int>(num_tiles), static_cast<unsigned int>(block_threads), 0, stream)
-              .doit(detail::for_each::static_kernel<max_policy_t, OffsetT, OpT>, num_items, op);
+              .doit(detail::for_each::static_kernel<typename PolicyHubT::MaxPolicy, OffsetT, OpT>, num_items, op);
     error = CubDebug(error);
     if (cudaSuccess != error)
     {
@@ -182,8 +180,6 @@ struct dispatch_t : PolicyHubT
 
   CUB_RUNTIME_FUNCTION _CCCL_FORCEINLINE static cudaError_t dispatch(OffsetT num_items, OpT op, cudaStream_t stream)
   {
-    using max_policy_t = typename dispatch_t::MaxPolicy;
-
     int ptx_version   = 0;
     cudaError_t error = CubDebug(PtxVersion(ptx_version));
     if (cudaSuccess != error)
@@ -193,7 +189,7 @@ struct dispatch_t : PolicyHubT
 
     dispatch_t dispatch(num_items, op, stream);
 
-    error = CubDebug(max_policy_t::Invoke(ptx_version, dispatch));
+    error = CubDebug(PolicyHubT::MaxPolicy::Invoke(ptx_version, dispatch));
 
     return error;
   }
diff --git a/cub/cub/device/dispatch/dispatch_for_each_in_extents.cuh b/cub/cub/device/dispatch/dispatch_for_each_in_extents.cuh
index db43ceb1006..6e346316d48 100644
--- a/cub/cub/device/dispatch/dispatch_for_each_in_extents.cuh
+++ b/cub/cub/device/dispatch/dispatch_for_each_in_extents.cuh
@@ -80,11 +80,11 @@ namespace for_each_in_extents
 
 // The dispatch layer is in the detail namespace until we figure out the tuning API
 template <typename ExtentsType, typename OpType, typename PolicyHubT = cub::detail::for_each::policy_hub_t>
-class dispatch_t : PolicyHubT
+class dispatch_t
 {
   using index_type          = typename ExtentsType::index_type;
   using unsigned_index_type = ::cuda::std::make_unsigned_t<index_type>;
-  using max_policy_t        = typename dispatch_t::MaxPolicy;
+  using max_policy_t        = typename PolicyHubT::MaxPolicy;
   //  workaround for nvcc 11.1 bug related to deduction guides, vvv
   using array_type = ::cuda::std::array<fast_div_mod<index_type>, ExtentsType::rank()>;
 
@@ -190,8 +190,7 @@ public:
   _CCCL_NODISCARD CUB_RUNTIME_FUNCTION _CCCL_FORCEINLINE static cudaError_t
   dispatch(const ExtentsType& ext, const OpType& op, cudaStream_t stream)
   {
-    using max_policy_t = typename dispatch_t::MaxPolicy;
-    int ptx_version    = 0;
+    int ptx_version = 0;
     _CUB_RETURN_IF_ERROR(CubDebug(PtxVersion(ptx_version)))
     dispatch_t dispatch(ext, op, stream);
     return CubDebug(max_policy_t::Invoke(ptx_version, dispatch));
diff --git a/cub/cub/device/dispatch/dispatch_histogram.cuh b/cub/cub/device/dispatch/dispatch_histogram.cuh
index 5536c545ac4..42a256b8e20 100644
--- a/cub/cub/device/dispatch/dispatch_histogram.cuh
+++ b/cub/cub/device/dispatch/dispatch_histogram.cuh
@@ -545,7 +545,7 @@ struct dispatch_histogram
  * @tparam OffsetT
  *   Signed integer type for global offsets
  *
- * @tparam SelectedPolicy
+ * @tparam PolicyHub
  *   Implementation detail, do not specify directly, requirements on the
  *   content of this type are subject to breaking change.
  */
@@ -555,9 +555,9 @@ template <int NUM_CHANNELS,
           typename CounterT,
           typename LevelT,
           typename OffsetT,
-          typename SelectedPolicy =
+          typename PolicyHub =
             detail::histogram::policy_hub<detail::value_t<SampleIteratorT>, CounterT, NUM_CHANNELS, NUM_ACTIVE_CHANNELS>>
-struct DispatchHistogram : SelectedPolicy
+struct DispatchHistogram
 {
   static_assert(NUM_CHANNELS <= 4, "Histograms only support up to 4 channels");
   static_assert(NUM_ACTIVE_CHANNELS <= NUM_CHANNELS,
@@ -921,7 +921,7 @@ public:
     cudaStream_t stream,
     Int2Type<false> /*is_byte_sample*/)
   {
-    using MaxPolicyT = typename SelectedPolicy::MaxPolicy;
+    using MaxPolicyT = typename PolicyHub::MaxPolicy;
     cudaError error  = cudaSuccess;
 
     do
@@ -1125,7 +1125,7 @@ public:
     cudaStream_t stream,
     Int2Type<true> /*is_byte_sample*/)
   {
-    using MaxPolicyT = typename SelectedPolicy::MaxPolicy;
+    using MaxPolicyT = typename PolicyHub::MaxPolicy;
     cudaError error  = cudaSuccess;
 
     do
@@ -1293,7 +1293,7 @@ public:
     cudaStream_t stream,
     Int2Type<false> /*is_byte_sample*/)
   {
-    using MaxPolicyT = typename SelectedPolicy::MaxPolicy;
+    using MaxPolicyT = typename PolicyHub::MaxPolicy;
     cudaError error  = cudaSuccess;
 
     do
@@ -1514,7 +1514,7 @@ public:
     cudaStream_t stream,
     Int2Type<true> /*is_byte_sample*/)
   {
-    using MaxPolicyT = typename SelectedPolicy::MaxPolicy;
+    using MaxPolicyT = typename PolicyHub::MaxPolicy;
     cudaError error  = cudaSuccess;
 
     do
diff --git a/cub/cub/device/dispatch/dispatch_merge_sort.cuh b/cub/cub/device/dispatch/dispatch_merge_sort.cuh
index 04f5c77e997..6d8c74c368a 100644
--- a/cub/cub/device/dispatch/dispatch_merge_sort.cuh
+++ b/cub/cub/device/dispatch/dispatch_merge_sort.cuh
@@ -334,8 +334,8 @@ template <typename KeyInputIteratorT,
           typename ValueIteratorT,
           typename OffsetT,
           typename CompareOpT,
-          typename SelectedPolicy = detail::merge_sort::policy_hub<KeyIteratorT>>
-struct DispatchMergeSort : SelectedPolicy
+          typename PolicyHub = detail::merge_sort::policy_hub<KeyIteratorT>>
+struct DispatchMergeSort
 {
   using KeyT   = cub::detail::value_t<KeyIteratorT>;
   using ValueT = cub::detail::value_t<ValueIteratorT>;
@@ -447,8 +447,6 @@ struct DispatchMergeSort : SelectedPolicy
     using BlockSortVSmemHelperT  = cub::detail::vsmem_helper_impl<typename merge_sort_helper_t::block_sort_agent_t>;
     using MergeAgentVSmemHelperT = cub::detail::vsmem_helper_impl<typename merge_sort_helper_t::merge_agent_t>;
 
-    using MaxPolicyT = typename DispatchMergeSort::MaxPolicy;
-
     cudaError error = cudaSuccess;
 
     if (num_items == 0)
@@ -517,7 +515,7 @@ struct DispatchMergeSort : SelectedPolicy
         static_cast<int>(num_tiles), merge_sort_helper_t::policy_t::BLOCK_THREADS, 0, stream)
         .doit(
           DeviceMergeSortBlockSortKernel<
-            MaxPolicyT,
+            typename PolicyHub::MaxPolicy,
             KeyInputIteratorT,
             ValueInputIteratorT,
             KeyIteratorT,
@@ -602,7 +600,7 @@ struct DispatchMergeSort : SelectedPolicy
         THRUST_NS_QUALIFIER::cuda_cub::launcher::triple_chevron(
           static_cast<int>(num_tiles), static_cast<int>(merge_sort_helper_t::policy_t::BLOCK_THREADS), 0, stream, true)
           .doit(
-            DeviceMergeSortMergeKernel<MaxPolicyT,
+            DeviceMergeSortMergeKernel<typename PolicyHub::MaxPolicy,
                                        KeyInputIteratorT,
                                        ValueInputIteratorT,
                                        KeyIteratorT,
@@ -651,8 +649,6 @@ struct DispatchMergeSort : SelectedPolicy
     CompareOpT compare_op,
     cudaStream_t stream)
   {
-    using MaxPolicyT = typename DispatchMergeSort::MaxPolicy;
-
     cudaError error = cudaSuccess;
     do
     {
@@ -678,7 +674,7 @@ struct DispatchMergeSort : SelectedPolicy
         ptx_version);
 
       // Dispatch to chained policy
-      error = CubDebug(MaxPolicyT::Invoke(ptx_version, dispatch));
+      error = CubDebug(PolicyHub::MaxPolicy::Invoke(ptx_version, dispatch));
       if (cudaSuccess != error)
       {
         break;
diff --git a/cub/cub/device/dispatch/dispatch_radix_sort.cuh b/cub/cub/device/dispatch/dispatch_radix_sort.cuh
index 0be04b1b11f..783e83ab1b0 100644
--- a/cub/cub/device/dispatch/dispatch_radix_sort.cuh
+++ b/cub/cub/device/dispatch/dispatch_radix_sort.cuh
@@ -852,9 +852,9 @@ template <bool IS_DESCENDING,
           typename KeyT,
           typename ValueT,
           typename OffsetT,
-          typename SelectedPolicy = detail::radix::policy_hub<KeyT, ValueT, OffsetT>,
-          typename DecomposerT    = detail::identity_decomposer_t>
-struct DispatchRadixSort : SelectedPolicy
+          typename PolicyHub   = detail::radix::policy_hub<KeyT, ValueT, OffsetT>,
+          typename DecomposerT = detail::identity_decomposer_t>
+struct DispatchRadixSort
 {
   //------------------------------------------------------------------------------
   // Constants
@@ -863,6 +863,8 @@ struct DispatchRadixSort : SelectedPolicy
   // Whether this is a keys-only (or key-value) sort
   static constexpr bool KEYS_ONLY = std::is_same<ValueT, NullType>::value;
 
+  using max_policy_t = typename PolicyHub::MaxPolicy;
+
   //------------------------------------------------------------------------------
   // Problem state
   //------------------------------------------------------------------------------
@@ -1241,7 +1243,6 @@ struct DispatchRadixSort : SelectedPolicy
   template <typename ActivePolicyT>
   CUB_RUNTIME_FUNCTION _CCCL_FORCEINLINE cudaError_t InvokeOnesweep()
   {
-    using MaxPolicyT = typename DispatchRadixSort::MaxPolicy;
     // PortionOffsetT is used for offsets within a portion, and must be signed.
     using PortionOffsetT = int;
     using AtomicOffsetT  = PortionOffsetT;
@@ -1322,7 +1323,7 @@ struct DispatchRadixSort : SelectedPolicy
 
       constexpr int HISTO_BLOCK_THREADS = ActivePolicyT::HistogramPolicy::BLOCK_THREADS;
       int histo_blocks_per_sm           = 1;
-      auto histogram_kernel = DeviceRadixSortHistogramKernel<MaxPolicyT, IS_DESCENDING, KeyT, OffsetT, DecomposerT>;
+      auto histogram_kernel = DeviceRadixSortHistogramKernel<max_policy_t, IS_DESCENDING, KeyT, OffsetT, DecomposerT>;
 
       error = CubDebug(
         cudaOccupancyMaxActiveBlocksPerMultiprocessor(&histo_blocks_per_sm, histogram_kernel, HISTO_BLOCK_THREADS, 0));
@@ -1371,7 +1372,7 @@ struct DispatchRadixSort : SelectedPolicy
 #endif
 
       error = THRUST_NS_QUALIFIER::cuda_cub::launcher::triple_chevron(num_passes, SCAN_BLOCK_THREADS, 0, stream)
-                .doit(DeviceRadixSortExclusiveSumKernel<MaxPolicyT, OffsetT>, d_bins);
+                .doit(DeviceRadixSortExclusiveSumKernel<max_policy_t, OffsetT>, d_bins);
       error = CubDebug(error);
       if (cudaSuccess != error)
       {
@@ -1424,7 +1425,7 @@ struct DispatchRadixSort : SelectedPolicy
 #endif
 
           auto onesweep_kernel = DeviceRadixSortOnesweepKernel<
-            MaxPolicyT,
+            max_policy_t,
             IS_DESCENDING,
             KeyT,
             ValueT,
@@ -1673,13 +1674,12 @@ struct DispatchRadixSort : SelectedPolicy
   CUB_RUNTIME_FUNCTION _CCCL_FORCEINLINE cudaError_t InvokeManyTiles(Int2Type<false>)
   {
     // Invoke upsweep-downsweep
-    using MaxPolicyT = typename DispatchRadixSort::MaxPolicy;
     return InvokePasses<ActivePolicyT>(
-      DeviceRadixSortUpsweepKernel<MaxPolicyT, false, IS_DESCENDING, KeyT, OffsetT, DecomposerT>,
-      DeviceRadixSortUpsweepKernel<MaxPolicyT, true, IS_DESCENDING, KeyT, OffsetT, DecomposerT>,
-      RadixSortScanBinsKernel<MaxPolicyT, OffsetT>,
-      DeviceRadixSortDownsweepKernel<MaxPolicyT, false, IS_DESCENDING, KeyT, ValueT, OffsetT, DecomposerT>,
-      DeviceRadixSortDownsweepKernel<MaxPolicyT, true, IS_DESCENDING, KeyT, ValueT, OffsetT, DecomposerT>);
+      DeviceRadixSortUpsweepKernel<max_policy_t, false, IS_DESCENDING, KeyT, OffsetT, DecomposerT>,
+      DeviceRadixSortUpsweepKernel<max_policy_t, true, IS_DESCENDING, KeyT, OffsetT, DecomposerT>,
+      RadixSortScanBinsKernel<max_policy_t, OffsetT>,
+      DeviceRadixSortDownsweepKernel<max_policy_t, false, IS_DESCENDING, KeyT, ValueT, OffsetT, DecomposerT>,
+      DeviceRadixSortDownsweepKernel<max_policy_t, true, IS_DESCENDING, KeyT, ValueT, OffsetT, DecomposerT>);
   }
 
   template <typename ActivePolicyT>
@@ -1747,7 +1747,6 @@ struct DispatchRadixSort : SelectedPolicy
   template <typename ActivePolicyT>
   CUB_RUNTIME_FUNCTION _CCCL_FORCEINLINE cudaError_t Invoke()
   {
-    using MaxPolicyT        = typename DispatchRadixSort::MaxPolicy;
     using SingleTilePolicyT = typename ActivePolicyT::SingleTilePolicy;
 
     // Return if empty problem, or if no bits to sort and double-buffering is used
@@ -1780,7 +1779,7 @@ struct DispatchRadixSort : SelectedPolicy
     {
       // Small, single tile size
       return InvokeSingleTile<ActivePolicyT>(
-        DeviceRadixSortSingleTileKernel<MaxPolicyT, IS_DESCENDING, KeyT, ValueT, OffsetT, DecomposerT>);
+        DeviceRadixSortSingleTileKernel<max_policy_t, IS_DESCENDING, KeyT, ValueT, OffsetT, DecomposerT>);
     }
     else
     {
@@ -1838,8 +1837,6 @@ struct DispatchRadixSort : SelectedPolicy
     cudaStream_t stream,
     DecomposerT decomposer = {})
   {
-    using MaxPolicyT = typename DispatchRadixSort::MaxPolicy;
-
     cudaError_t error;
     do
     {
@@ -1867,7 +1864,7 @@ struct DispatchRadixSort : SelectedPolicy
         decomposer);
 
       // Dispatch to chained policy
-      error = CubDebug(MaxPolicyT::Invoke(ptx_version, dispatch));
+      error = CubDebug(max_policy_t::Invoke(ptx_version, dispatch));
       if (cudaSuccess != error)
       {
         break;
@@ -1929,9 +1926,9 @@ template <bool IS_DESCENDING,
           typename BeginOffsetIteratorT,
           typename EndOffsetIteratorT,
           typename OffsetT,
-          typename SelectedPolicy = detail::radix::policy_hub<KeyT, ValueT, OffsetT>,
-          typename DecomposerT    = detail::identity_decomposer_t>
-struct DispatchSegmentedRadixSort : SelectedPolicy
+          typename PolicyHub   = detail::radix::policy_hub<KeyT, ValueT, OffsetT>,
+          typename DecomposerT = detail::identity_decomposer_t>
+struct DispatchSegmentedRadixSort
 {
   //------------------------------------------------------------------------------
   // Constants
@@ -1940,6 +1937,8 @@ struct DispatchSegmentedRadixSort : SelectedPolicy
   // Whether this is a keys-only (or key-value) sort
   static constexpr bool KEYS_ONLY = std::is_same<ValueT, NullType>::value;
 
+  using max_policy_t = typename PolicyHub::MaxPolicy;
+
   //------------------------------------------------------------------------------
   // Parameter members
   //------------------------------------------------------------------------------
@@ -2292,8 +2291,6 @@ struct DispatchSegmentedRadixSort : SelectedPolicy
   template <typename ActivePolicyT>
   CUB_RUNTIME_FUNCTION _CCCL_FORCEINLINE cudaError_t Invoke()
   {
-    using MaxPolicyT = typename DispatchSegmentedRadixSort::MaxPolicy;
-
     // Return if empty problem, or if no bits to sort and double-buffering is used
     if (num_items == 0 || num_segments == 0 || (begin_bit == end_bit && is_overwrite_okay))
     {
@@ -2307,7 +2304,7 @@ struct DispatchSegmentedRadixSort : SelectedPolicy
     // Force kernel code-generation in all compiler passes
     return InvokePasses<ActivePolicyT>(
       DeviceSegmentedRadixSortKernel<
-        MaxPolicyT,
+        max_policy_t,
         false,
         IS_DESCENDING,
         KeyT,
@@ -2317,7 +2314,7 @@ struct DispatchSegmentedRadixSort : SelectedPolicy
         OffsetT,
         DecomposerT>,
       DeviceSegmentedRadixSortKernel<
-        MaxPolicyT,
+        max_policy_t,
         true,
         IS_DESCENDING,
         KeyT,
@@ -2394,8 +2391,6 @@ struct DispatchSegmentedRadixSort : SelectedPolicy
     bool is_overwrite_okay,
     cudaStream_t stream)
   {
-    using MaxPolicyT = typename DispatchSegmentedRadixSort::MaxPolicy;
-
     cudaError_t error;
     do
     {
@@ -2425,7 +2420,7 @@ struct DispatchSegmentedRadixSort : SelectedPolicy
         ptx_version);
 
       // Dispatch to chained policy
-      error = CubDebug(MaxPolicyT::Invoke(ptx_version, dispatch));
+      error = CubDebug(max_policy_t::Invoke(ptx_version, dispatch));
       if (cudaSuccess != error)
       {
         break;
diff --git a/cub/cub/device/dispatch/dispatch_reduce.cuh b/cub/cub/device/dispatch/dispatch_reduce.cuh
index a3c41d1e530..a30d489612c 100644
--- a/cub/cub/device/dispatch/dispatch_reduce.cuh
+++ b/cub/cub/device/dispatch/dispatch_reduce.cuh
@@ -260,21 +260,21 @@ template <typename InputIteratorT,
           typename OutputIteratorT,
           typename OffsetT,
           typename ReductionOpT,
-          typename InitT  = cub::detail::non_void_value_t<OutputIteratorT, cub::detail::value_t<InputIteratorT>>,
-          typename AccumT = ::cuda::std::__accumulator_t<ReductionOpT, cub::detail::value_t<InputIteratorT>, InitT>,
-          typename SelectedPolicy = detail::reduce::policy_hub<AccumT, OffsetT, ReductionOpT>,
-          typename TransformOpT   = ::cuda::std::__identity,
-          typename KernelSource   = DeviceReduceKernelSource<
-              typename SelectedPolicy::MaxPolicy,
-              InputIteratorT,
-              OutputIteratorT,
-              OffsetT,
-              ReductionOpT,
-              InitT,
-              AccumT,
-              TransformOpT>,
+          typename InitT     = cub::detail::non_void_value_t<OutputIteratorT, cub::detail::value_t<InputIteratorT>>,
+          typename AccumT    = ::cuda::std::__accumulator_t<ReductionOpT, cub::detail::value_t<InputIteratorT>, InitT>,
+          typename PolicyHub = detail::reduce::policy_hub<AccumT, OffsetT, ReductionOpT>,
+          typename TransformOpT = ::cuda::std::__identity,
+          typename KernelSource = DeviceReduceKernelSource<
+            typename PolicyHub::MaxPolicy,
+            InputIteratorT,
+            OutputIteratorT,
+            OffsetT,
+            ReductionOpT,
+            InitT,
+            AccumT,
+            TransformOpT>,
           typename KernelLauncherFactory = TripleChevronFactory>
-struct DispatchReduce : SelectedPolicy
+struct DispatchReduce
 {
   //---------------------------------------------------------------------------
   // Problem state
@@ -639,7 +639,7 @@ struct DispatchReduce : SelectedPolicy
    *   **[optional]** CUDA stream to launch kernels within.
    *   Default is stream<sub>0</sub>.
    */
-  template <typename MaxPolicyT = typename DispatchReduce::MaxPolicy>
+  template <typename MaxPolicyT = typename PolicyHub::MaxPolicy>
   CUB_RUNTIME_FUNCTION _CCCL_FORCEINLINE static cudaError_t Dispatch(
     void* d_temp_storage,
     size_t& temp_storage_bytes,
@@ -744,16 +744,16 @@ template <
   typename InitT,
   typename AccumT = ::cuda::std::
     __accumulator_t<ReductionOpT, cub::detail::invoke_result_t<TransformOpT, cub::detail::value_t<InputIteratorT>>, InitT>,
-  typename SelectedPolicyT = detail::reduce::policy_hub<AccumT, OffsetT, ReductionOpT>,
-  typename KernelSource    = DeviceReduceKernelSource<
-       typename SelectedPolicyT::MaxPolicy,
-       InputIteratorT,
-       OutputIteratorT,
-       OffsetT,
-       ReductionOpT,
-       InitT,
-       AccumT,
-       TransformOpT>,
+  typename PolicyHub    = detail::reduce::policy_hub<AccumT, OffsetT, ReductionOpT>,
+  typename KernelSource = DeviceReduceKernelSource<
+    typename PolicyHub::MaxPolicy,
+    InputIteratorT,
+    OutputIteratorT,
+    OffsetT,
+    ReductionOpT,
+    InitT,
+    AccumT,
+    TransformOpT>,
   typename KernelLauncherFactory = TripleChevronFactory>
 using DispatchTransformReduce =
   DispatchReduce<InputIteratorT,
@@ -762,7 +762,7 @@ using DispatchTransformReduce =
                  ReductionOpT,
                  InitT,
                  AccumT,
-                 SelectedPolicyT,
+                 PolicyHub,
                  TransformOpT,
                  KernelSource,
                  KernelLauncherFactory>;
@@ -805,10 +805,10 @@ template <typename InputIteratorT,
           typename EndOffsetIteratorT,
           typename OffsetT,
           typename ReductionOpT,
-          typename InitT  = cub::detail::non_void_value_t<OutputIteratorT, cub::detail::value_t<InputIteratorT>>,
-          typename AccumT = ::cuda::std::__accumulator_t<ReductionOpT, cub::detail::value_t<InputIteratorT>, InitT>,
-          typename SelectedPolicy = detail::reduce::policy_hub<AccumT, OffsetT, ReductionOpT>>
-struct DispatchSegmentedReduce : SelectedPolicy
+          typename InitT     = cub::detail::non_void_value_t<OutputIteratorT, cub::detail::value_t<InputIteratorT>>,
+          typename AccumT    = ::cuda::std::__accumulator_t<ReductionOpT, cub::detail::value_t<InputIteratorT>, InitT>,
+          typename PolicyHub = detail::reduce::policy_hub<AccumT, OffsetT, ReductionOpT>>
+struct DispatchSegmentedReduce
 {
   //---------------------------------------------------------------------------
   // Problem state
@@ -996,12 +996,10 @@ struct DispatchSegmentedReduce : SelectedPolicy
   template <typename ActivePolicyT>
   CUB_RUNTIME_FUNCTION _CCCL_FORCEINLINE cudaError_t Invoke()
   {
-    using MaxPolicyT = typename DispatchSegmentedReduce::MaxPolicy;
-
     // Force kernel code-generation in all compiler passes
     return InvokePasses<ActivePolicyT>(
       DeviceSegmentedReduceKernel<
-        MaxPolicyT,
+        typename PolicyHub::MaxPolicy,
         InputIteratorT,
         OutputIteratorT,
         BeginOffsetIteratorT,
@@ -1071,8 +1069,6 @@ struct DispatchSegmentedReduce : SelectedPolicy
     InitT init,
     cudaStream_t stream)
   {
-    using MaxPolicyT = typename DispatchSegmentedReduce::MaxPolicy;
-
     if (num_segments <= 0)
     {
       return cudaSuccess;
@@ -1105,7 +1101,7 @@ struct DispatchSegmentedReduce : SelectedPolicy
         ptx_version);
 
       // Dispatch to chained policy
-      error = CubDebug(MaxPolicyT::Invoke(ptx_version, dispatch));
+      error = CubDebug(PolicyHub::MaxPolicy::Invoke(ptx_version, dispatch));
       if (cudaSuccess != error)
       {
         break;
diff --git a/cub/cub/device/dispatch/dispatch_reduce_by_key.cuh b/cub/cub/device/dispatch/dispatch_reduce_by_key.cuh
index 22191a9e16d..28d60973862 100644
--- a/cub/cub/device/dispatch/dispatch_reduce_by_key.cuh
+++ b/cub/cub/device/dispatch/dispatch_reduce_by_key.cuh
@@ -209,7 +209,7 @@ __launch_bounds__(int(ChainedPolicyT::ActivePolicy::ReduceByKeyPolicyT::BLOCK_TH
  * @tparam OffsetT
  *   Signed integer type for global offsets
  *
- * @tparam SelectedPolicy
+ * @tparam PolicyHub
  *   Implementation detail, do not specify directly, requirements on the
  *   content of this type are subject to breaking change.
  */
@@ -221,17 +221,13 @@ template <typename KeysInputIteratorT,
           typename EqualityOpT,
           typename ReductionOpT,
           typename OffsetT,
-          typename AccumT = //
-          ::cuda::std::__accumulator_t<ReductionOpT,
-                                       cub::detail::value_t<ValuesInputIteratorT>,
-                                       cub::detail::value_t<ValuesInputIteratorT>>,
-          typename SelectedPolicy = //
-          detail::reduce_by_key::policy_hub< //
-            ReductionOpT, //
-            AccumT, //
-            cub::detail::non_void_value_t< //
-              UniqueOutputIteratorT, //
-              cub::detail::value_t<KeysInputIteratorT>>>>
+          typename AccumT    = ::cuda::std::__accumulator_t<ReductionOpT,
+                                                            cub::detail::value_t<ValuesInputIteratorT>,
+                                                            cub::detail::value_t<ValuesInputIteratorT>>,
+          typename PolicyHub = detail::reduce_by_key::policy_hub<
+            ReductionOpT,
+            AccumT,
+            cub::detail::non_void_value_t<UniqueOutputIteratorT, cub::detail::value_t<KeysInputIteratorT>>>>
 struct DispatchReduceByKey
 {
   //-------------------------------------------------------------------------
@@ -443,11 +439,10 @@ struct DispatchReduceByKey
   template <typename ActivePolicyT>
   CUB_RUNTIME_FUNCTION _CCCL_FORCEINLINE cudaError_t Invoke()
   {
-    using MaxPolicyT = typename SelectedPolicy::MaxPolicy;
     return Invoke<ActivePolicyT>(
       DeviceCompactInitKernel<ScanTileStateT, NumRunsOutputIteratorT>,
       DeviceReduceByKeyKernel<
-        MaxPolicyT,
+        typename PolicyHub::MaxPolicy,
         KeysInputIteratorT,
         UniqueOutputIteratorT,
         ValuesInputIteratorT,
@@ -512,8 +507,6 @@ struct DispatchReduceByKey
     OffsetT num_items,
     cudaStream_t stream)
   {
-    using MaxPolicyT = typename SelectedPolicy::MaxPolicy;
-
     cudaError error = cudaSuccess;
 
     do
@@ -540,7 +533,7 @@ struct DispatchReduceByKey
         stream);
 
       // Dispatch
-      error = CubDebug(MaxPolicyT::Invoke(ptx_version, dispatch));
+      error = CubDebug(PolicyHub::MaxPolicy::Invoke(ptx_version, dispatch));
       if (cudaSuccess != error)
       {
         break;
diff --git a/cub/cub/device/dispatch/dispatch_rle.cuh b/cub/cub/device/dispatch/dispatch_rle.cuh
index 2ed6d6cc18f..5c8f1e01d0f 100644
--- a/cub/cub/device/dispatch/dispatch_rle.cuh
+++ b/cub/cub/device/dispatch/dispatch_rle.cuh
@@ -178,7 +178,7 @@ __launch_bounds__(int(ChainedPolicyT::ActivePolicy::RleSweepPolicyT::BLOCK_THREA
  * @tparam OffsetT
  *   Signed integer type for global offsets
  *
- * @tparam SelectedPolicy
+ * @tparam PolicyHub
  *   Implementation detail, do not specify directly, requirements on the
  *   content of this type are subject to breaking change.
  */
@@ -188,7 +188,7 @@ template <typename InputIteratorT,
           typename NumRunsOutputIteratorT,
           typename EqualityOpT,
           typename OffsetT,
-          typename SelectedPolicy =
+          typename PolicyHub =
             detail::rle::non_trivial_runs::policy_hub<cub::detail::non_void_value_t<LengthsOutputIteratorT, OffsetT>,
                                                       cub::detail::value_t<InputIteratorT>>>
 struct DeviceRleDispatch
@@ -450,10 +450,9 @@ struct DeviceRleDispatch
   template <class ActivePolicyT>
   CUB_RUNTIME_FUNCTION _CCCL_FORCEINLINE cudaError_t Invoke()
   {
-    using MaxPolicyT = typename SelectedPolicy::MaxPolicy;
     return Invoke<ActivePolicyT>(
       DeviceCompactInitKernel<ScanTileStateT, NumRunsOutputIteratorT>,
-      DeviceRleSweepKernel<MaxPolicyT,
+      DeviceRleSweepKernel<typename PolicyHub::MaxPolicy,
                            InputIteratorT,
                            OffsetsOutputIteratorT,
                            LengthsOutputIteratorT,
@@ -507,8 +506,6 @@ struct DeviceRleDispatch
     OffsetT num_items,
     cudaStream_t stream)
   {
-    using MaxPolicyT = typename SelectedPolicy::MaxPolicy;
-
     cudaError error = cudaSuccess;
 
     do
@@ -533,7 +530,7 @@ struct DeviceRleDispatch
         stream);
 
       // Dispatch
-      error = CubDebug(MaxPolicyT::Invoke(ptx_version, dispatch));
+      error = CubDebug(PolicyHub::MaxPolicy::Invoke(ptx_version, dispatch));
       if (cudaSuccess != error)
       {
         break;
diff --git a/cub/cub/device/dispatch/dispatch_scan.cuh b/cub/cub/device/dispatch/dispatch_scan.cuh
index fce3334e018..85c819c9ba8 100644
--- a/cub/cub/device/dispatch/dispatch_scan.cuh
+++ b/cub/cub/device/dispatch/dispatch_scan.cuh
@@ -234,14 +234,14 @@ template <typename InputIteratorT,
           typename ScanOpT,
           typename InitValueT,
           typename OffsetT,
-          typename AccumT         = ::cuda::std::__accumulator_t<ScanOpT,
-                                                                 cub::detail::value_t<InputIteratorT>,
-                                                                 ::cuda::std::_If<std::is_same<InitValueT, NullType>::value,
-                                                                                  cub::detail::value_t<InputIteratorT>,
-                                                                                  typename InitValueT::value_type>>,
-          typename SelectedPolicy = detail::scan::policy_hub<AccumT, ScanOpT>,
-          bool ForceInclusive     = false>
-struct DispatchScan : SelectedPolicy
+          typename AccumT     = ::cuda::std::__accumulator_t<ScanOpT,
+                                                             cub::detail::value_t<InputIteratorT>,
+                                                             ::cuda::std::_If<std::is_same<InitValueT, NullType>::value,
+                                                                              cub::detail::value_t<InputIteratorT>,
+                                                                              typename InitValueT::value_type>>,
+          typename PolicyHub  = detail::scan::policy_hub<AccumT, ScanOpT>,
+          bool ForceInclusive = false>
+struct DispatchScan
 {
   //---------------------------------------------------------------------
   // Constants and Types
@@ -508,12 +508,11 @@ struct DispatchScan : SelectedPolicy
   template <typename ActivePolicyT>
   CUB_RUNTIME_FUNCTION _CCCL_HOST _CCCL_FORCEINLINE cudaError_t Invoke()
   {
-    using MaxPolicyT     = typename DispatchScan::MaxPolicy;
     using ScanTileStateT = typename cub::ScanTileState<AccumT>;
     // Ensure kernels are instantiated.
     return Invoke<ActivePolicyT>(
       DeviceScanInitKernel<ScanTileStateT>,
-      DeviceScanKernel<MaxPolicyT,
+      DeviceScanKernel<typename PolicyHub::MaxPolicy,
                        InputIteratorT,
                        OutputIteratorT,
                        ScanTileStateT,
@@ -565,8 +564,6 @@ struct DispatchScan : SelectedPolicy
     OffsetT num_items,
     cudaStream_t stream)
   {
-    using MaxPolicyT = typename DispatchScan::MaxPolicy;
-
     cudaError_t error;
     do
     {
@@ -583,7 +580,7 @@ struct DispatchScan : SelectedPolicy
         d_temp_storage, temp_storage_bytes, d_in, d_out, num_items, scan_op, init_value, stream, ptx_version);
 
       // Dispatch to chained policy
-      error = CubDebug(MaxPolicyT::Invoke(ptx_version, dispatch));
+      error = CubDebug(PolicyHub::MaxPolicy::Invoke(ptx_version, dispatch));
       if (cudaSuccess != error)
       {
         break;
diff --git a/cub/cub/device/dispatch/dispatch_scan_by_key.cuh b/cub/cub/device/dispatch/dispatch_scan_by_key.cuh
index d435feb0e7d..ecd1f18f62c 100644
--- a/cub/cub/device/dispatch/dispatch_scan_by_key.cuh
+++ b/cub/cub/device/dispatch/dispatch_scan_by_key.cuh
@@ -232,9 +232,9 @@ template <
     ScanOpT,
     cub::detail::value_t<ValuesInputIteratorT>,
     ::cuda::std::_If<std::is_same<InitValueT, NullType>::value, cub::detail::value_t<ValuesInputIteratorT>, InitValueT>>,
-  typename SelectedPolicy =
+  typename PolicyHub =
     detail::scan_by_key::policy_hub<KeysInputIteratorT, AccumT, cub::detail::value_t<ValuesInputIteratorT>, ScanOpT>>
-struct DispatchScanByKey : SelectedPolicy
+struct DispatchScanByKey
 {
   //---------------------------------------------------------------------
   // Constants and Types
@@ -519,12 +519,10 @@ struct DispatchScanByKey : SelectedPolicy
   template <typename ActivePolicyT>
   CUB_RUNTIME_FUNCTION _CCCL_HOST _CCCL_FORCEINLINE cudaError_t Invoke()
   {
-    using MaxPolicyT = typename DispatchScanByKey::MaxPolicy;
-
     // Ensure kernels are instantiated.
     return Invoke<ActivePolicyT>(
       DeviceScanByKeyInitKernel<ScanByKeyTileStateT, KeysInputIteratorT, OffsetT>,
-      DeviceScanByKeyKernel<MaxPolicyT,
+      DeviceScanByKeyKernel<typename PolicyHub::MaxPolicy,
                             KeysInputIteratorT,
                             ValuesInputIteratorT,
                             ValuesOutputIteratorT,
@@ -583,8 +581,6 @@ struct DispatchScanByKey : SelectedPolicy
     OffsetT num_items,
     cudaStream_t stream)
   {
-    using MaxPolicyT = typename DispatchScanByKey::MaxPolicy;
-
     cudaError_t error;
 
     do
@@ -612,7 +608,7 @@ struct DispatchScanByKey : SelectedPolicy
         ptx_version);
 
       // Dispatch to chained policy
-      error = CubDebug(MaxPolicyT::Invoke(ptx_version, dispatch));
+      error = CubDebug(PolicyHub::MaxPolicy::Invoke(ptx_version, dispatch));
       if (cudaSuccess != error)
       {
         break;
diff --git a/cub/cub/device/dispatch/dispatch_segmented_sort.cuh b/cub/cub/device/dispatch/dispatch_segmented_sort.cuh
index 855d970003f..dcfc0daab24 100644
--- a/cub/cub/device/dispatch/dispatch_segmented_sort.cuh
+++ b/cub/cub/device/dispatch/dispatch_segmented_sort.cuh
@@ -700,8 +700,8 @@ template <bool IS_DESCENDING,
           typename OffsetT,
           typename BeginOffsetIteratorT,
           typename EndOffsetIteratorT,
-          typename SelectedPolicy = detail::segmented_sort::policy_hub<KeyT, ValueT>>
-struct DispatchSegmentedSort : SelectedPolicy
+          typename PolicyHub = detail::segmented_sort::policy_hub<KeyT, ValueT>>
+struct DispatchSegmentedSort
 {
   static constexpr int KEYS_ONLY = std::is_same<ValueT, NullType>::value;
 
@@ -853,7 +853,6 @@ struct DispatchSegmentedSort : SelectedPolicy
   template <typename ActivePolicyT>
   CUB_RUNTIME_FUNCTION _CCCL_FORCEINLINE cudaError_t Invoke()
   {
-    using MaxPolicyT            = typename DispatchSegmentedSort::MaxPolicy;
     using LargeSegmentPolicyT   = typename ActivePolicyT::LargeSegmentPolicy;
     using SmallAndMediumPolicyT = typename ActivePolicyT::SmallAndMediumSegmentedSortPolicyT;
 
@@ -1026,6 +1025,8 @@ struct DispatchSegmentedSort : SelectedPolicy
         : (is_num_passes_odd) ? values_allocation.get()
                               : d_values.Alternate());
 
+      using MaxPolicyT = typename PolicyHub::MaxPolicy;
+
       if (partition_segments)
       {
         // Partition input segments into size groups and assign specialized
@@ -1092,8 +1093,6 @@ struct DispatchSegmentedSort : SelectedPolicy
     bool is_overwrite_okay,
     cudaStream_t stream)
   {
-    using MaxPolicyT = typename DispatchSegmentedSort::MaxPolicy;
-
     cudaError error = cudaSuccess;
 
     do
@@ -1120,7 +1119,7 @@ struct DispatchSegmentedSort : SelectedPolicy
         stream);
 
       // Dispatch to chained policy
-      error = CubDebug(MaxPolicyT::Invoke(ptx_version, dispatch));
+      error = CubDebug(PolicyHub::MaxPolicy::Invoke(ptx_version, dispatch));
       if (cudaSuccess != error)
       {
         break;
@@ -1234,12 +1233,11 @@ private:
 #else // CUB_RDC_ENABLED
 
 #  define CUB_TEMP_DEVICE_CODE                                                 \
-    using MaxPolicyT = typename DispatchSegmentedSort::MaxPolicy;              \
     error =                                                                    \
       THRUST_NS_QUALIFIER::cuda_cub::launcher::triple_chevron(1, 1, 0, stream) \
         .doit(                                                                 \
           DeviceSegmentedSortContinuationKernel<                               \
-            MaxPolicyT,                                                        \
+            typename PolicyHub::MaxPolicy,                                     \
             LargeKernelT,                                                      \
             SmallKernelT,                                                      \
             KeyT,                                                              \
diff --git a/cub/cub/device/dispatch/dispatch_select_if.cuh b/cub/cub/device/dispatch/dispatch_select_if.cuh
index 67f33647d29..e9db3db1787 100644
--- a/cub/cub/device/dispatch/dispatch_select_if.cuh
+++ b/cub/cub/device/dispatch/dispatch_select_if.cuh
@@ -424,13 +424,13 @@ template <typename InputIteratorT,
           typename EqualityOpT,
           typename OffsetT,
           bool KeepRejects,
-          bool MayAlias           = false,
-          typename SelectedPolicy = detail::select::policy_hub<cub::detail::value_t<InputIteratorT>,
-                                                               cub::detail::value_t<FlagsInputIteratorT>,
-                                                               detail::select::per_partition_offset_t,
-                                                               MayAlias,
-                                                               KeepRejects>>
-struct DispatchSelectIf : SelectedPolicy
+          bool MayAlias      = false,
+          typename PolicyHub = detail::select::policy_hub<cub::detail::value_t<InputIteratorT>,
+                                                          cub::detail::value_t<FlagsInputIteratorT>,
+                                                          detail::select::per_partition_offset_t,
+                                                          MayAlias,
+                                                          KeepRejects>>
+struct DispatchSelectIf
 {
   /******************************************************************************
    * Types and constants
@@ -755,12 +755,10 @@ struct DispatchSelectIf : SelectedPolicy
   template <typename ActivePolicyT>
   CUB_RUNTIME_FUNCTION _CCCL_FORCEINLINE cudaError_t Invoke()
   {
-    using MaxPolicyT = typename SelectedPolicy::MaxPolicy;
-
     return Invoke<ActivePolicyT>(
       DeviceCompactInitKernel<ScanTileStateT, NumSelectedIteratorT>,
       DeviceSelectSweepKernel<
-        MaxPolicyT,
+        typename PolicyHub::MaxPolicy,
         InputIteratorT,
         FlagsInputIteratorT,
         SelectedOutputIteratorT,
@@ -821,8 +819,6 @@ struct DispatchSelectIf : SelectedPolicy
     OffsetT num_items,
     cudaStream_t stream)
   {
-    using MaxPolicyT = typename SelectedPolicy::MaxPolicy;
-
     int ptx_version = 0;
     if (cudaError_t error = CubDebug(PtxVersion(ptx_version)))
     {
@@ -842,7 +838,7 @@ struct DispatchSelectIf : SelectedPolicy
       stream,
       ptx_version);
 
-    return CubDebug(MaxPolicyT::Invoke(ptx_version, dispatch));
+    return CubDebug(PolicyHub::MaxPolicy::Invoke(ptx_version, dispatch));
   }
 
 #ifndef _CCCL_DOXYGEN_INVOKED // Do not document
diff --git a/cub/cub/device/dispatch/dispatch_three_way_partition.cuh b/cub/cub/device/dispatch/dispatch_three_way_partition.cuh
index a8468ac4484..9db1ee1c661 100644
--- a/cub/cub/device/dispatch/dispatch_three_way_partition.cuh
+++ b/cub/cub/device/dispatch/dispatch_three_way_partition.cuh
@@ -153,16 +153,15 @@ DeviceThreeWayPartitionInitKernel(ScanTileStateT tile_state, int num_tiles, NumS
  * Dispatch
  ******************************************************************************/
 
-template <
-  typename InputIteratorT,
-  typename FirstOutputIteratorT,
-  typename SecondOutputIteratorT,
-  typename UnselectedOutputIteratorT,
-  typename NumSelectedIteratorT,
-  typename SelectFirstPartOp,
-  typename SelectSecondPartOp,
-  typename OffsetT,
-  typename SelectedPolicy = detail::three_way_partition::policy_hub<cub::detail::value_t<InputIteratorT>, OffsetT>>
+template <typename InputIteratorT,
+          typename FirstOutputIteratorT,
+          typename SecondOutputIteratorT,
+          typename UnselectedOutputIteratorT,
+          typename NumSelectedIteratorT,
+          typename SelectFirstPartOp,
+          typename SelectSecondPartOp,
+          typename OffsetT,
+          typename PolicyHub = detail::three_way_partition::policy_hub<cub::detail::value_t<InputIteratorT>, OffsetT>>
 struct DispatchThreeWayPartitionIf
 {
   /*****************************************************************************
@@ -382,7 +381,7 @@ struct DispatchThreeWayPartitionIf
   template <typename ActivePolicyT>
   CUB_RUNTIME_FUNCTION _CCCL_FORCEINLINE cudaError_t Invoke()
   {
-    using MaxPolicyT = typename SelectedPolicy::MaxPolicy;
+    using MaxPolicyT = typename PolicyHub::MaxPolicy;
     return Invoke<ActivePolicyT>(
       DeviceThreeWayPartitionInitKernel<ScanTileStateT, NumSelectedIteratorT>,
       DeviceThreeWayPartitionKernel<
@@ -414,7 +413,7 @@ struct DispatchThreeWayPartitionIf
     OffsetT num_items,
     cudaStream_t stream)
   {
-    using MaxPolicyT = typename SelectedPolicy::MaxPolicy;
+    using MaxPolicyT = typename PolicyHub::MaxPolicy;
 
     cudaError error = cudaSuccess;
 
diff --git a/cub/cub/device/dispatch/dispatch_unique_by_key.cuh b/cub/cub/device/dispatch/dispatch_unique_by_key.cuh
index 3fb26e258b7..8a310b35ebb 100644
--- a/cub/cub/device/dispatch/dispatch_unique_by_key.cuh
+++ b/cub/cub/device/dispatch/dispatch_unique_by_key.cuh
@@ -213,9 +213,9 @@ template <typename KeyInputIteratorT,
           typename NumSelectedIteratorT,
           typename EqualityOpT,
           typename OffsetT,
-          typename SelectedPolicy =
+          typename PolicyHub =
             detail::unique_by_key::policy_hub<detail::value_t<KeyInputIteratorT>, detail::value_t<ValueInputIteratorT>>>
-struct DispatchUniqueByKey : SelectedPolicy
+struct DispatchUniqueByKey
 {
   /******************************************************************************
    * Types and constants
@@ -529,13 +529,11 @@ struct DispatchUniqueByKey : SelectedPolicy
   template <typename ActivePolicyT>
   CUB_RUNTIME_FUNCTION _CCCL_HOST _CCCL_FORCEINLINE cudaError_t Invoke()
   {
-    using MaxPolicyT = typename DispatchUniqueByKey::MaxPolicy;
-
     // Ensure kernels are instantiated.
     return Invoke<ActivePolicyT>(
       DeviceCompactInitKernel<ScanTileStateT, NumSelectedIteratorT>,
       DeviceUniqueByKeySweepKernel<
-        MaxPolicyT,
+        typename PolicyHub::MaxPolicy,
         KeyInputIteratorT,
         ValueInputIteratorT,
         KeyOutputIteratorT,
@@ -595,8 +593,6 @@ struct DispatchUniqueByKey : SelectedPolicy
     OffsetT num_items,
     cudaStream_t stream)
   {
-    using MaxPolicyT = typename DispatchUniqueByKey::MaxPolicy;
-
     cudaError_t error;
     do
     {
@@ -622,7 +618,7 @@ struct DispatchUniqueByKey : SelectedPolicy
         stream);
 
       // Dispatch to chained policy
-      error = CubDebug(MaxPolicyT::Invoke(ptx_version, dispatch));
+      error = CubDebug(PolicyHub::MaxPolicy::Invoke(ptx_version, dispatch));
       if (cudaSuccess != error)
       {
         break;
diff --git a/cub/test/catch2_test_vsmem.cu b/cub/test/catch2_test_vsmem.cu
index 5e43686beed..355332fa6a9 100644
--- a/cub/test/catch2_test_vsmem.cu
+++ b/cub/test/catch2_test_vsmem.cu
@@ -218,8 +218,8 @@ struct device_dummy_algorithm_policy_t
 template <typename InputIteratorT,
           typename OutputIteratorT,
           typename OffsetT,
-          typename SelectedPolicy = device_dummy_algorithm_policy_t<InputIteratorT>>
-struct dispatch_dummy_algorithm_t : SelectedPolicy
+          typename PolicyHub = device_dummy_algorithm_policy_t<InputIteratorT>>
+struct dispatch_dummy_algorithm_t
 {
   using item_t = cub::detail::value_t<InputIteratorT>;
 
@@ -274,8 +274,6 @@ struct dispatch_dummy_algorithm_t : SelectedPolicy
   template <typename ActivePolicyT>
   CUB_RUNTIME_FUNCTION __forceinline__ cudaError_t Invoke()
   {
-    using max_policy_t = typename dispatch_dummy_algorithm_t::max_policy_t;
-
     using vsmem_helper_t = cub::detail::vsmem_helper_fallback_policy_t<
       typename ActivePolicyT::DummyAlgorithmPolicy,
       typename ActivePolicyT::FallbackDummyAlgorithmPolicy,
@@ -327,7 +325,7 @@ struct dispatch_dummy_algorithm_t : SelectedPolicy
     launch_config_info->config_vsmem_per_block       = vsmem_helper_t::vsmem_per_block;
 
     THRUST_NS_QUALIFIER::cuda_cub::launcher::triple_chevron(num_tiles, block_threads, 0, stream)
-      .doit(dummy_algorithm_kernel<max_policy_t, InputIteratorT, OutputIteratorT, OffsetT>,
+      .doit(dummy_algorithm_kernel<typename PolicyHub::max_policy_t, InputIteratorT, OutputIteratorT, OffsetT>,
             d_in,
             d_out,
             num_items,
@@ -349,8 +347,6 @@ struct dispatch_dummy_algorithm_t : SelectedPolicy
     launch_config_test_info_t* launch_config_info,
     cudaStream_t stream = 0)
   {
-    using max_policy_t = typename dispatch_dummy_algorithm_t::max_policy_t;
-
     // Get PTX version
     int ptx_version = 0;
     cudaError error = cub::PtxVersion(ptx_version);
@@ -372,7 +368,7 @@ struct dispatch_dummy_algorithm_t : SelectedPolicy
       ptx_version);
 
     // Dispatch to chained policy
-    error = max_policy_t::Invoke(ptx_version, dispatch);
+    error = PolicyHub::max_policy_t::Invoke(ptx_version, dispatch);
     if (cudaSuccess != error)
     {
       return error;
diff --git a/docs/cub/developer_overview.rst b/docs/cub/developer_overview.rst
index 1946bdfaf39..a0a78ed0d71 100644
--- a/docs/cub/developer_overview.rst
+++ b/docs/cub/developer_overview.rst
@@ -501,8 +501,8 @@ and passes it to the ``ChainedPolicy::Invoke`` function:
 .. code-block:: c++
 
     template <..., // algorithm specific compile-time parameters
-              typename SelectedPolicy> // also called: PolicyHub
-    struct DispatchAlgorithm : SelectedPolicy { // TODO(bgruber): I see no need for inheritance, can we remove it?
+              typename PolicyHub>
+    struct DispatchAlgorithm {
       CUB_RUNTIME_FUNCTION _CCCL_FORCEINLINE static
       cudaError_t Dispatch(void *d_temp_storage, size_t &temp_storage_bytes, ..., cudaStream stream) {
         if (/* no items to process */) {
@@ -518,9 +518,8 @@ and passes it to the ``ChainedPolicy::Invoke`` function:
         {
           return error;
         }
-        using MaxPolicy = typename SelectedPolicy::MaxPolicy;
         DispatchAlgorithm dispatch(..., stream);
-        return CubDebug(MaxPolicy::Invoke(ptx_version, dispatch));
+        return CubDebug(PolicyHub::MaxPolicy::Invoke(ptx_version, dispatch));
       }
     };
 
@@ -555,7 +554,7 @@ The dispatch object's ``Invoke`` function is then called with the best policy fo
 
 .. code-block:: c++
 
-    template <..., typename SelectedPolicy = DefaultTuning>
+    template <..., typename PolicyHub = detail::algorithm::policy_hub>
     struct DispatchAlgorithm {
       template <typename ActivePolicy>
       CUB_RUNTIME_FUNCTION _CCCL_FORCEINLINE
@@ -618,13 +617,12 @@ An agent policy could look like this:
 It's typically a collection of configuration values for the kernel launch configuration,
 work distribution setting, load and store algorithms to use, as well as load instruction cache modifiers.
 
-Finally, the tuning looks like:
+Finally, the tuning policy hub looks like:
 
 .. code-block:: c++
 
     template <typename... TuningRelevantParams /* ... */>
-    struct DeviceAlgorithmPolicy // also called tuning hub
-    {
+    struct policy_hub {
       // TuningRelevantParams... could be used for decision making, like element types used, iterator category, etc.
 
       // for SM35