Skip to content

Commit

Permalink
recover LogicShiftLeft and LogicShiftRight
Browse files Browse the repository at this point in the history
Signed-off-by: fbusato <[email protected]>
  • Loading branch information
fbusato committed Jan 14, 2025
1 parent 3cf7cb5 commit e60f288
Show file tree
Hide file tree
Showing 2 changed files with 32 additions and 8 deletions.
18 changes: 10 additions & 8 deletions cub/cub/agent/agent_batch_memcpy.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -424,11 +424,12 @@ public:
for (uint32_t i = 0; i < NUM_TOTAL_UNITS; ++i)
{
// In case the bit-offset of the counter at <index> is larger than the bit range of the
// current unit, the bit_shift amount will be larger than the bits provided by this unit.
// C++'s bit-shift has undefined behaviour if the bits being shifted exceed the operand width.
// The bit_shift is a run-time value, it is translated into SASS `shr` and the result behavior is well-defined.
// current unit, the bit_shift amount will be larger than the bits provided by this unit. As
// C++'s bit-shift has undefined behaviour if the bits being shifted exceed the operand width,
// we use the PTX instruction `shr` to make sure behaviour is well-defined.
// Negative bit-shift amounts wrap around in unsigned integer math and are ultimately clamped.
const uint32_t bit_shift = target_offset - i * USED_BITS_PER_UNIT;
val |= (data[i] >> bit_shift) & ITEM_MASK;
val |= detail::LogicShiftRight(data[i], bit_shift) & ITEM_MASK;
}
return val;
}
Expand All @@ -441,11 +442,12 @@ public:
for (uint32_t i = 0; i < NUM_TOTAL_UNITS; ++i)
{
// In case the bit-offset of the counter at <index> is larger than the bit range of the
// current unit, the bit_shift amount will be larger than the bits provided by this unit.
// C++'s bit-shift has undefined behaviour if the bits being shifted exceed the operand width.
// The bit_shift is a run-time value, it is translated into SASS `shl` and the result behavior is well-defined.
// current unit, the bit_shift amount will be larger than the bits provided by this unit. As
// C++'s bit-shift has undefined behaviour if the bits being shifted exceed the operand width,
// we use the PTX instruction `shl` to make sure behaviour is well-defined.
// Negative bit-shift amounts wrap around in unsigned integer math and are ultimately clamped.
const uint32_t bit_shift = target_offset - i * USED_BITS_PER_UNIT;
data[i] += (value << bit_shift) & UNIT_MASK;
data[i] += detail::LogicShiftLeft(value, bit_shift) & UNIT_MASK;
}
}

Expand Down
22 changes: 22 additions & 0 deletions cub/cub/util_ptx.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -748,6 +748,28 @@ struct warp_matcher_t<LABEL_BITS, CUB_PTX_WARP_THREADS>
}
};

/**
* @brief Shifts @p val left by the amount specified by unsigned 32-bit value in @p num_bits. If @p
* num_bits is larger than 32 bits, @p num_bits is clamped to 32.
*/
_CCCL_DEVICE _CCCL_FORCEINLINE uint32_t LogicShiftLeft(uint32_t val, uint32_t num_bits)
{
uint32_t ret{};
asm("shl.b32 %0, %1, %2;" : "=r"(ret) : "r"(val), "r"(num_bits));
return ret;
}

/**
* @brief Shifts @p val right by the amount specified by unsigned 32-bit value in @p num_bits. If @p
* num_bits is larger than 32 bits, @p num_bits is clamped to 32.
*/
_CCCL_DEVICE _CCCL_FORCEINLINE uint32_t LogicShiftRight(uint32_t val, uint32_t num_bits)
{
uint32_t ret{};
asm("shr.b32 %0, %1, %2;" : "=r"(ret) : "r"(val), "r"(num_bits));
return ret;
}

} // namespace detail
#endif // _CCCL_DOXYGEN_INVOKED

Expand Down

0 comments on commit e60f288

Please sign in to comment.