From daab0a4c1cc01fe631e41602ba211e0b59b8c59c Mon Sep 17 00:00:00 2001 From: Bernhard Manfred Gruber Date: Tue, 14 Jan 2025 09:39:24 +0100 Subject: [PATCH] Cleanup util_arch (#2773) --- cub/cub/util_arch.cuh | 31 ++++++++++++++++--------------- 1 file changed, 16 insertions(+), 15 deletions(-) diff --git a/cub/cub/util_arch.cuh b/cub/cub/util_arch.cuh index b1da6a03b5d..3c6aea5cc5c 100644 --- a/cub/cub/util_arch.cuh +++ b/cub/cub/util_arch.cuh @@ -47,6 +47,10 @@ #include #include +#include +#include +#include + // Legacy include; this functionality used to be defined in here. #include @@ -113,27 +117,24 @@ namespace detail static constexpr ::cuda::std::size_t max_smem_per_block = 48 * 1024; } // namespace detail -template +template struct RegBoundScaling { - enum - { - ITEMS_PER_THREAD = CUB_MAX(1, NOMINAL_4B_ITEMS_PER_THREAD * 4 / CUB_MAX(4, sizeof(T))), - BLOCK_THREADS = CUB_MIN(NOMINAL_4B_BLOCK_THREADS, - ((cub::detail::max_smem_per_block / (sizeof(T) * ITEMS_PER_THREAD)) + 31) / 32 * 32), - }; + static constexpr int ITEMS_PER_THREAD = + ::cuda::std::max(1, Nominal4ByteItemsPerThread * 4 / ::cuda::std::max(4, int{sizeof(T)})); + static constexpr int BLOCK_THREADS = + ::cuda::std::min(Nominal4ByteBlockThreads, + ::cuda::ceil_div(int{detail::max_smem_per_block} / (int{sizeof(T)} * ITEMS_PER_THREAD), 32) * 32); }; -template +template struct MemBoundScaling { - enum - { - ITEMS_PER_THREAD = - CUB_MAX(1, CUB_MIN(NOMINAL_4B_ITEMS_PER_THREAD * 4 / sizeof(T), NOMINAL_4B_ITEMS_PER_THREAD * 2)), - BLOCK_THREADS = CUB_MIN(NOMINAL_4B_BLOCK_THREADS, - ((cub::detail::max_smem_per_block / (sizeof(T) * ITEMS_PER_THREAD)) + 31) / 32 * 32), - }; + static constexpr int ITEMS_PER_THREAD = ::cuda::std::max( + 1, ::cuda::std::min(Nominal4ByteItemsPerThread * 4 / int{sizeof(T)}, Nominal4ByteItemsPerThread * 2)); + static constexpr int BLOCK_THREADS = + ::cuda::std::min(Nominal4ByteBlockThreads, + ::cuda::ceil_div(int{detail::max_smem_per_block} / (int{sizeof(T)} * ITEMS_PER_THREAD), 32) * 32); }; #endif // Do not document