Skip to content

Commit

Permalink
replace SHR_ADD with shift-add
Browse files Browse the repository at this point in the history
  • Loading branch information
fbusato committed Jan 13, 2025
1 parent 43b2b58 commit 0af70fd
Show file tree
Hide file tree
Showing 3 changed files with 15 additions and 14 deletions.
22 changes: 11 additions & 11 deletions cub/cub/block/block_exchange.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -628,7 +628,7 @@ private:
int item_offset = ranks[i];
_CCCL_IF_CONSTEXPR (INSERT_PADDING)
{
item_offset = SHR_ADD(item_offset, LOG_SMEM_BANKS, item_offset);
item_offset = (item_offset >> LOG_SMEM_BANKS) + item_offset;
}
detail::uninitialized_copy_single(temp_storage.buff + item_offset, input_items[i]);
}
Expand All @@ -641,7 +641,7 @@ private:
int item_offset = linear_tid * ITEMS_PER_THREAD + i;
_CCCL_IF_CONSTEXPR (INSERT_PADDING)
{
item_offset = SHR_ADD(item_offset, LOG_SMEM_BANKS, item_offset);
item_offset = (item_offset >> LOG_SMEM_BANKS) + item_offset;
}
output_items[i] = temp_storage.buff[item_offset];
}
Expand Down Expand Up @@ -681,7 +681,7 @@ private:
{
_CCCL_IF_CONSTEXPR (INSERT_PADDING)
{
item_offset = SHR_ADD(item_offset, LOG_SMEM_BANKS, item_offset);
item_offset = (item_offset >> LOG_SMEM_BANKS) + item_offset;
}
detail::uninitialized_copy_single(temp_storage.buff + item_offset, input_items[i]);
}
Expand All @@ -697,7 +697,7 @@ private:
int item_offset = lane_id * ITEMS_PER_THREAD + i;
_CCCL_IF_CONSTEXPR (INSERT_PADDING)
{
item_offset = SHR_ADD(item_offset, LOG_SMEM_BANKS, item_offset);
item_offset = (item_offset >> LOG_SMEM_BANKS) + item_offset;
}
temp_items[i] = temp_storage.buff[item_offset];
}
Expand Down Expand Up @@ -735,7 +735,7 @@ private:
int item_offset = ranks[i];
_CCCL_IF_CONSTEXPR (INSERT_PADDING)
{
item_offset = SHR_ADD(item_offset, LOG_SMEM_BANKS, item_offset);
item_offset = (item_offset >> LOG_SMEM_BANKS) + item_offset;
}
detail::uninitialized_copy_single(temp_storage.buff + item_offset, input_items[i]);
}
Expand All @@ -748,7 +748,7 @@ private:
int item_offset = i * BLOCK_THREADS + linear_tid;
_CCCL_IF_CONSTEXPR (INSERT_PADDING)
{
item_offset = SHR_ADD(item_offset, LOG_SMEM_BANKS, item_offset);
item_offset = (item_offset >> LOG_SMEM_BANKS) + item_offset;
}
output_items[i] = temp_storage.buff[item_offset];
}
Expand Down Expand Up @@ -789,7 +789,7 @@ private:
{
_CCCL_IF_CONSTEXPR (INSERT_PADDING)
{
item_offset = SHR_ADD(item_offset, LOG_SMEM_BANKS, item_offset);
item_offset = (item_offset >> LOG_SMEM_BANKS) + item_offset;
}
detail::uninitialized_copy_single(temp_storage.buff + item_offset, input_items[i]);
}
Expand Down Expand Up @@ -1136,7 +1136,7 @@ public:
int item_offset = ranks[i];
_CCCL_IF_CONSTEXPR (INSERT_PADDING)
{
item_offset = SHR_ADD(item_offset, LOG_SMEM_BANKS, item_offset);
item_offset = (item_offset >> LOG_SMEM_BANKS) + item_offset;
}
if (ranks[i] >= 0)
{
Expand All @@ -1152,7 +1152,7 @@ public:
int item_offset = i * BLOCK_THREADS + linear_tid;
_CCCL_IF_CONSTEXPR (INSERT_PADDING)
{
item_offset = SHR_ADD(item_offset, LOG_SMEM_BANKS, item_offset);
item_offset = (item_offset >> LOG_SMEM_BANKS) + item_offset;
}
output_items[i] = temp_storage.buff[item_offset];
}
Expand Down Expand Up @@ -1195,7 +1195,7 @@ public:
int item_offset = ranks[i];
_CCCL_IF_CONSTEXPR (INSERT_PADDING)
{
item_offset = SHR_ADD(item_offset, LOG_SMEM_BANKS, item_offset);
item_offset = (item_offset >> LOG_SMEM_BANKS) + item_offset;
}
if (is_valid[i])
{
Expand All @@ -1211,7 +1211,7 @@ public:
int item_offset = i * BLOCK_THREADS + linear_tid;
_CCCL_IF_CONSTEXPR (INSERT_PADDING)
{
item_offset = SHR_ADD(item_offset, LOG_SMEM_BANKS, item_offset);
item_offset = (item_offset >> LOG_SMEM_BANKS) + item_offset;
}
output_items[i] = temp_storage.buff[item_offset];
}
Expand Down
3 changes: 2 additions & 1 deletion cub/cub/util_ptx.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,7 @@ CUB_NAMESPACE_BEGIN
/**
* \brief Shift-right then add. Returns (\p x >> \p shift) + \p addend.
*/
CCCL_DEPRECATED_BECAUSE("will be removed in the next major release")
_CCCL_DEVICE _CCCL_FORCEINLINE unsigned int SHR_ADD(unsigned int x, unsigned int shift, unsigned int addend)
{
unsigned int ret;
Expand Down Expand Up @@ -324,7 +325,7 @@ _CCCL_DEVICE _CCCL_FORCEINLINE void ThreadExit()
/**
* \brief Abort execution and generate an interrupt to the host CPU
*/
CCCL_DEPRECATED_BECAUSE("will be removed in the next major release")
CCCL_DEPRECATED_BECAUSE("use cuda::std::terminate() instead")
_CCCL_DEVICE _CCCL_FORCEINLINE void ThreadTrap()
{
asm volatile("trap;");
Expand Down
4 changes: 2 additions & 2 deletions cub/cub/warp/specializations/warp_exchange_smem.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -149,7 +149,7 @@ public:
{
if (INSERT_PADDING)
{
ranks[ITEM] = SHR_ADD(ranks[ITEM], LOG_SMEM_BANKS, ranks[ITEM]);
ranks[ITEM] = (ranks[ITEM] >> LOG_SMEM_BANKS) + ranks[ITEM];
}

temp_storage.items_shared[ranks[ITEM]] = input_items[ITEM];
Expand All @@ -164,7 +164,7 @@ public:

if (INSERT_PADDING)
{
item_offset = SHR_ADD(item_offset, LOG_SMEM_BANKS, item_offset);
item_offset = (item_offset >> LOG_SMEM_BANKS) + item_offset;
}

output_items[ITEM] = temp_storage.items_shared[item_offset];
Expand Down

0 comments on commit 0af70fd

Please sign in to comment.