From 0af70fdbe1e8992d38c517bcf3a00bb9d5b2e243 Mon Sep 17 00:00:00 2001 From: fbusato Date: Mon, 13 Jan 2025 20:32:44 +0000 Subject: [PATCH] replace SHR_ADD with shift-add --- cub/cub/block/block_exchange.cuh | 22 +++++++++---------- cub/cub/util_ptx.cuh | 3 ++- .../specializations/warp_exchange_smem.cuh | 4 ++-- 3 files changed, 15 insertions(+), 14 deletions(-) diff --git a/cub/cub/block/block_exchange.cuh b/cub/cub/block/block_exchange.cuh index c22918ce9d5..2d4029c3b08 100644 --- a/cub/cub/block/block_exchange.cuh +++ b/cub/cub/block/block_exchange.cuh @@ -628,7 +628,7 @@ private: int item_offset = ranks[i]; _CCCL_IF_CONSTEXPR (INSERT_PADDING) { - item_offset = SHR_ADD(item_offset, LOG_SMEM_BANKS, item_offset); + item_offset = (item_offset >> LOG_SMEM_BANKS) + item_offset; } detail::uninitialized_copy_single(temp_storage.buff + item_offset, input_items[i]); } @@ -641,7 +641,7 @@ private: int item_offset = linear_tid * ITEMS_PER_THREAD + i; _CCCL_IF_CONSTEXPR (INSERT_PADDING) { - item_offset = SHR_ADD(item_offset, LOG_SMEM_BANKS, item_offset); + item_offset = (item_offset >> LOG_SMEM_BANKS) + item_offset; } output_items[i] = temp_storage.buff[item_offset]; } @@ -681,7 +681,7 @@ private: { _CCCL_IF_CONSTEXPR (INSERT_PADDING) { - item_offset = SHR_ADD(item_offset, LOG_SMEM_BANKS, item_offset); + item_offset = (item_offset >> LOG_SMEM_BANKS) + item_offset; } detail::uninitialized_copy_single(temp_storage.buff + item_offset, input_items[i]); } @@ -697,7 +697,7 @@ private: int item_offset = lane_id * ITEMS_PER_THREAD + i; _CCCL_IF_CONSTEXPR (INSERT_PADDING) { - item_offset = SHR_ADD(item_offset, LOG_SMEM_BANKS, item_offset); + item_offset = (item_offset >> LOG_SMEM_BANKS) + item_offset; } temp_items[i] = temp_storage.buff[item_offset]; } @@ -735,7 +735,7 @@ private: int item_offset = ranks[i]; _CCCL_IF_CONSTEXPR (INSERT_PADDING) { - item_offset = SHR_ADD(item_offset, LOG_SMEM_BANKS, item_offset); + item_offset = (item_offset >> LOG_SMEM_BANKS) + item_offset; } detail::uninitialized_copy_single(temp_storage.buff + item_offset, input_items[i]); } @@ -748,7 +748,7 @@ private: int item_offset = i * BLOCK_THREADS + linear_tid; _CCCL_IF_CONSTEXPR (INSERT_PADDING) { - item_offset = SHR_ADD(item_offset, LOG_SMEM_BANKS, item_offset); + item_offset = (item_offset >> LOG_SMEM_BANKS) + item_offset; } output_items[i] = temp_storage.buff[item_offset]; } @@ -789,7 +789,7 @@ private: { _CCCL_IF_CONSTEXPR (INSERT_PADDING) { - item_offset = SHR_ADD(item_offset, LOG_SMEM_BANKS, item_offset); + item_offset = (item_offset >> LOG_SMEM_BANKS) + item_offset; } detail::uninitialized_copy_single(temp_storage.buff + item_offset, input_items[i]); } @@ -1136,7 +1136,7 @@ public: int item_offset = ranks[i]; _CCCL_IF_CONSTEXPR (INSERT_PADDING) { - item_offset = SHR_ADD(item_offset, LOG_SMEM_BANKS, item_offset); + item_offset = (item_offset >> LOG_SMEM_BANKS) + item_offset; } if (ranks[i] >= 0) { @@ -1152,7 +1152,7 @@ public: int item_offset = i * BLOCK_THREADS + linear_tid; _CCCL_IF_CONSTEXPR (INSERT_PADDING) { - item_offset = SHR_ADD(item_offset, LOG_SMEM_BANKS, item_offset); + item_offset = (item_offset >> LOG_SMEM_BANKS) + item_offset; } output_items[i] = temp_storage.buff[item_offset]; } @@ -1195,7 +1195,7 @@ public: int item_offset = ranks[i]; _CCCL_IF_CONSTEXPR (INSERT_PADDING) { - item_offset = SHR_ADD(item_offset, LOG_SMEM_BANKS, item_offset); + item_offset = (item_offset >> LOG_SMEM_BANKS) + item_offset; } if (is_valid[i]) { @@ -1211,7 +1211,7 @@ public: int item_offset = i * BLOCK_THREADS + linear_tid; _CCCL_IF_CONSTEXPR (INSERT_PADDING) { - item_offset = SHR_ADD(item_offset, LOG_SMEM_BANKS, item_offset); + item_offset = (item_offset >> LOG_SMEM_BANKS) + item_offset; } output_items[i] = temp_storage.buff[item_offset]; } diff --git a/cub/cub/util_ptx.cuh b/cub/cub/util_ptx.cuh index 328a7181b6e..6ca57622cde 100644 --- a/cub/cub/util_ptx.cuh +++ b/cub/cub/util_ptx.cuh @@ -55,6 +55,7 @@ CUB_NAMESPACE_BEGIN /** * \brief Shift-right then add. Returns (\p x >> \p shift) + \p addend. */ +CCCL_DEPRECATED_BECAUSE("will be removed in the next major release") _CCCL_DEVICE _CCCL_FORCEINLINE unsigned int SHR_ADD(unsigned int x, unsigned int shift, unsigned int addend) { unsigned int ret; @@ -324,7 +325,7 @@ _CCCL_DEVICE _CCCL_FORCEINLINE void ThreadExit() /** * \brief Abort execution and generate an interrupt to the host CPU */ -CCCL_DEPRECATED_BECAUSE("will be removed in the next major release") +CCCL_DEPRECATED_BECAUSE("use cuda::std::terminate() instead") _CCCL_DEVICE _CCCL_FORCEINLINE void ThreadTrap() { asm volatile("trap;"); diff --git a/cub/cub/warp/specializations/warp_exchange_smem.cuh b/cub/cub/warp/specializations/warp_exchange_smem.cuh index 01f1d2c873e..d9435c4bc5d 100644 --- a/cub/cub/warp/specializations/warp_exchange_smem.cuh +++ b/cub/cub/warp/specializations/warp_exchange_smem.cuh @@ -149,7 +149,7 @@ public: { if (INSERT_PADDING) { - ranks[ITEM] = SHR_ADD(ranks[ITEM], LOG_SMEM_BANKS, ranks[ITEM]); + ranks[ITEM] = (ranks[ITEM] >> LOG_SMEM_BANKS) + ranks[ITEM]; } temp_storage.items_shared[ranks[ITEM]] = input_items[ITEM]; @@ -164,7 +164,7 @@ public: if (INSERT_PADDING) { - item_offset = SHR_ADD(item_offset, LOG_SMEM_BANKS, item_offset); + item_offset = (item_offset >> LOG_SMEM_BANKS) + item_offset; } output_items[ITEM] = temp_storage.items_shared[item_offset];