From 76288d5f1757270bb6e3b03c39c5b7138b1bfaa3 Mon Sep 17 00:00:00 2001 From: Bernhard Manfred Gruber Date: Tue, 25 Jun 2024 12:40:50 +0200 Subject: [PATCH] Rename CUB uninitialized_copy (#1913) CUB's uninitialized_copy is not related to std::uninitialized_copy since it only operates on a single value. Therefore, it should have a different name. --- cub/cub/agent/single_pass_scan_operators.cuh | 6 ++-- cub/cub/block/block_exchange.cuh | 28 +++++++++---------- .../block_reduce_warp_reductions.cuh | 2 +- .../specializations/block_scan_raking.cuh | 16 +++++------ .../specializations/block_scan_warp_scans.cuh | 6 ++-- cub/cub/detail/uninitialized_copy.cuh | 4 +-- cub/cub/device/dispatch/dispatch_reduce.cuh | 2 +- cub/cub/util_type.cuh | 2 +- 8 files changed, 33 insertions(+), 33 deletions(-) diff --git a/cub/cub/agent/single_pass_scan_operators.cuh b/cub/cub/agent/single_pass_scan_operators.cuh index 13f59768cf2..67ffb965017 100644 --- a/cub/cub/agent/single_pass_scan_operators.cuh +++ b/cub/cub/agent/single_pass_scan_operators.cuh @@ -1158,7 +1158,7 @@ struct TilePrefixCallbackOp // Update our status with our tile-aggregate if (threadIdx.x == 0) { - detail::uninitialized_copy(&temp_storage.block_aggregate, block_aggregate); + detail::uninitialized_copy_single(&temp_storage.block_aggregate, block_aggregate); tile_status.SetPartial(tile_idx, block_aggregate); } @@ -1190,9 +1190,9 @@ struct TilePrefixCallbackOp inclusive_prefix = scan_op(exclusive_prefix, block_aggregate); tile_status.SetInclusive(tile_idx, inclusive_prefix); - detail::uninitialized_copy(&temp_storage.exclusive_prefix, exclusive_prefix); + detail::uninitialized_copy_single(&temp_storage.exclusive_prefix, exclusive_prefix); - detail::uninitialized_copy(&temp_storage.inclusive_prefix, inclusive_prefix); + detail::uninitialized_copy_single(&temp_storage.inclusive_prefix, inclusive_prefix); } // Return exclusive_prefix diff --git a/cub/cub/block/block_exchange.cuh b/cub/cub/block/block_exchange.cuh index b6b47bfb512..256c7fb4888 100644 --- a/cub/cub/block/block_exchange.cuh +++ b/cub/cub/block/block_exchange.cuh @@ -225,7 +225,7 @@ private: { item_offset += item_offset >> LOG_SMEM_BANKS; } - detail::uninitialized_copy(temp_storage.buff + item_offset, input_items[ITEM]); + detail::uninitialized_copy_single(temp_storage.buff + item_offset, input_items[ITEM]); } CTA_SYNC(); @@ -274,7 +274,7 @@ private: { item_offset += item_offset >> LOG_SMEM_BANKS; } - detail::uninitialized_copy(temp_storage.buff + item_offset, input_items[ITEM]); + detail::uninitialized_copy_single(temp_storage.buff + item_offset, input_items[ITEM]); } } @@ -330,7 +330,7 @@ private: { item_offset += item_offset >> LOG_SMEM_BANKS; } - detail::uninitialized_copy(temp_storage.buff + item_offset, input_items[ITEM]); + detail::uninitialized_copy_single(temp_storage.buff + item_offset, input_items[ITEM]); } WARP_SYNC(0xffffffff); @@ -369,7 +369,7 @@ private: { item_offset += item_offset >> LOG_SMEM_BANKS; } - detail::uninitialized_copy(temp_storage.buff + item_offset, input_items[ITEM]); + detail::uninitialized_copy_single(temp_storage.buff + item_offset, input_items[ITEM]); } WARP_SYNC(0xffffffff); @@ -401,7 +401,7 @@ private: { item_offset += item_offset >> LOG_SMEM_BANKS; } - detail::uninitialized_copy(temp_storage.buff + item_offset, input_items[ITEM]); + detail::uninitialized_copy_single(temp_storage.buff + item_offset, input_items[ITEM]); } WARP_SYNC(0xffffffff); @@ -440,7 +440,7 @@ private: { item_offset += item_offset >> LOG_SMEM_BANKS; } - detail::uninitialized_copy(temp_storage.buff + item_offset, input_items[ITEM]); + detail::uninitialized_copy_single(temp_storage.buff + item_offset, input_items[ITEM]); } CTA_SYNC(); @@ -497,7 +497,7 @@ private: { item_offset += item_offset >> LOG_SMEM_BANKS; } - detail::uninitialized_copy(temp_storage.buff + item_offset, input_items[ITEM]); + detail::uninitialized_copy_single(temp_storage.buff + item_offset, input_items[ITEM]); } } } @@ -547,7 +547,7 @@ private: { item_offset += item_offset >> LOG_SMEM_BANKS; } - detail::uninitialized_copy(temp_storage.buff + item_offset, input_items[ITEM]); + detail::uninitialized_copy_single(temp_storage.buff + item_offset, input_items[ITEM]); } WARP_SYNC(0xffffffff); @@ -560,7 +560,7 @@ private: { item_offset += item_offset >> LOG_SMEM_BANKS; } - detail::uninitialized_copy(output_items + ITEM, temp_storage.buff[item_offset]); + detail::uninitialized_copy_single(output_items + ITEM, temp_storage.buff[item_offset]); } } @@ -591,7 +591,7 @@ private: { item_offset += item_offset >> LOG_SMEM_BANKS; } - detail::uninitialized_copy(temp_storage.buff + item_offset, input_items[ITEM]); + detail::uninitialized_copy_single(temp_storage.buff + item_offset, input_items[ITEM]); } WARP_SYNC(0xffffffff); @@ -636,7 +636,7 @@ private: { item_offset = SHR_ADD(item_offset, LOG_SMEM_BANKS, item_offset); } - detail::uninitialized_copy(temp_storage.buff + item_offset, input_items[ITEM]); + detail::uninitialized_copy_single(temp_storage.buff + item_offset, input_items[ITEM]); } CTA_SYNC(); @@ -690,7 +690,7 @@ private: { item_offset = SHR_ADD(item_offset, LOG_SMEM_BANKS, item_offset); } - detail::uninitialized_copy(temp_storage.buff + item_offset, input_items[ITEM]); + detail::uninitialized_copy_single(temp_storage.buff + item_offset, input_items[ITEM]); } } @@ -745,7 +745,7 @@ private: { item_offset = SHR_ADD(item_offset, LOG_SMEM_BANKS, item_offset); } - detail::uninitialized_copy(temp_storage.buff + item_offset, input_items[ITEM]); + detail::uninitialized_copy_single(temp_storage.buff + item_offset, input_items[ITEM]); } CTA_SYNC(); @@ -800,7 +800,7 @@ private: { item_offset = SHR_ADD(item_offset, LOG_SMEM_BANKS, item_offset); } - detail::uninitialized_copy(temp_storage.buff + item_offset, input_items[ITEM]); + detail::uninitialized_copy_single(temp_storage.buff + item_offset, input_items[ITEM]); } } diff --git a/cub/cub/block/specializations/block_reduce_warp_reductions.cuh b/cub/cub/block/specializations/block_reduce_warp_reductions.cuh index f209d74ab4e..5b827b08030 100644 --- a/cub/cub/block/specializations/block_reduce_warp_reductions.cuh +++ b/cub/cub/block/specializations/block_reduce_warp_reductions.cuh @@ -181,7 +181,7 @@ struct BlockReduceWarpReductions // Share lane aggregates if (lane_id == 0) { - detail::uninitialized_copy(temp_storage.warp_aggregates + warp_id, warp_aggregate); + detail::uninitialized_copy_single(temp_storage.warp_aggregates + warp_id, warp_aggregate); } CTA_SYNC(); diff --git a/cub/cub/block/specializations/block_scan_raking.cuh b/cub/cub/block/specializations/block_scan_raking.cuh index d131c1515e0..f0fe7a5ca2a 100644 --- a/cub/cub/block/specializations/block_scan_raking.cuh +++ b/cub/cub/block/specializations/block_scan_raking.cuh @@ -300,7 +300,7 @@ struct BlockScanRaking { // Place thread partial into shared memory raking grid T* placement_ptr = BlockRakingLayout::PlacementPtr(temp_storage.raking_grid, linear_tid); - detail::uninitialized_copy(placement_ptr, input); + detail::uninitialized_copy_single(placement_ptr, input); CTA_SYNC(); @@ -353,7 +353,7 @@ struct BlockScanRaking { // Place thread partial into shared memory raking grid T* placement_ptr = BlockRakingLayout::PlacementPtr(temp_storage.raking_grid, linear_tid); - detail::uninitialized_copy(placement_ptr, input); + detail::uninitialized_copy_single(placement_ptr, input); CTA_SYNC(); @@ -408,7 +408,7 @@ struct BlockScanRaking { // Place thread partial into shared memory raking grid T* placement_ptr = BlockRakingLayout::PlacementPtr(temp_storage.raking_grid, linear_tid); - detail::uninitialized_copy(placement_ptr, input); + detail::uninitialized_copy_single(placement_ptr, input); CTA_SYNC(); @@ -476,7 +476,7 @@ struct BlockScanRaking { // Place thread partial into shared memory raking grid T* placement_ptr = BlockRakingLayout::PlacementPtr(temp_storage.raking_grid, linear_tid); - detail::uninitialized_copy(placement_ptr, input); + detail::uninitialized_copy_single(placement_ptr, input); CTA_SYNC(); @@ -557,7 +557,7 @@ struct BlockScanRaking { // Place thread partial into shared memory raking grid T* placement_ptr = BlockRakingLayout::PlacementPtr(temp_storage.raking_grid, linear_tid); - detail::uninitialized_copy(placement_ptr, input); + detail::uninitialized_copy_single(placement_ptr, input); CTA_SYNC(); @@ -624,7 +624,7 @@ struct BlockScanRaking { // Place thread partial into shared memory raking grid T* placement_ptr = BlockRakingLayout::PlacementPtr(temp_storage.raking_grid, linear_tid); - detail::uninitialized_copy(placement_ptr, input); + detail::uninitialized_copy_single(placement_ptr, input); CTA_SYNC(); @@ -678,7 +678,7 @@ struct BlockScanRaking { // Place thread partial into shared memory raking grid T* placement_ptr = BlockRakingLayout::PlacementPtr(temp_storage.raking_grid, linear_tid); - detail::uninitialized_copy(placement_ptr, input); + detail::uninitialized_copy_single(placement_ptr, input); CTA_SYNC(); @@ -756,7 +756,7 @@ struct BlockScanRaking { // Place thread partial into shared memory raking grid T* placement_ptr = BlockRakingLayout::PlacementPtr(temp_storage.raking_grid, linear_tid); - detail::uninitialized_copy(placement_ptr, input); + detail::uninitialized_copy_single(placement_ptr, input); CTA_SYNC(); diff --git a/cub/cub/block/specializations/block_scan_warp_scans.cuh b/cub/cub/block/specializations/block_scan_warp_scans.cuh index a60e0b2f503..851a71cbe7b 100644 --- a/cub/cub/block/specializations/block_scan_warp_scans.cuh +++ b/cub/cub/block/specializations/block_scan_warp_scans.cuh @@ -194,7 +194,7 @@ struct BlockScanWarpScans // Last lane in each warp shares its warp-aggregate if (lane_id == WARP_THREADS - 1) { - detail::uninitialized_copy(temp_storage.warp_aggregates + warp_id, warp_aggregate); + detail::uninitialized_copy_single(temp_storage.warp_aggregates + warp_id, warp_aggregate); } CTA_SYNC(); @@ -417,7 +417,7 @@ struct BlockScanWarpScans if (lane_id == 0) { // Share the prefix with all threads - detail::uninitialized_copy(&temp_storage.block_prefix, block_prefix); + detail::uninitialized_copy_single(&temp_storage.block_prefix, block_prefix); exclusive_output = block_prefix; // The block prefix is the exclusive output for tid0 } @@ -524,7 +524,7 @@ struct BlockScanWarpScans if (lane_id == 0) { // Share the prefix with all threads - detail::uninitialized_copy(&temp_storage.block_prefix, block_prefix); + detail::uninitialized_copy_single(&temp_storage.block_prefix, block_prefix); } } diff --git a/cub/cub/detail/uninitialized_copy.cuh b/cub/cub/detail/uninitialized_copy.cuh index 107c827982c..85de5fc2c8c 100644 --- a/cub/cub/detail/uninitialized_copy.cuh +++ b/cub/cub/detail/uninitialized_copy.cuh @@ -56,7 +56,7 @@ _CCCL_HOST_DEVICE void uninitialized_copy(T* ptr, U&& val) template ::value, int>::type = 0> -_CCCL_HOST_DEVICE void uninitialized_copy(T* ptr, U&& val) +_CCCL_HOST_DEVICE void uninitialized_copy_single(T* ptr, U&& val) { *ptr = ::cuda::std::forward(val); } @@ -64,7 +64,7 @@ _CCCL_HOST_DEVICE void uninitialized_copy(T* ptr, U&& val) template ::value, int>::type = 0> -_CCCL_HOST_DEVICE void uninitialized_copy(T* ptr, U&& val) +_CCCL_HOST_DEVICE void uninitialized_copy_single(T* ptr, U&& val) { new (ptr) T(::cuda::std::forward(val)); } diff --git a/cub/cub/device/dispatch/dispatch_reduce.cuh b/cub/cub/device/dispatch/dispatch_reduce.cuh index 354b9bd94fd..262bcc26231 100644 --- a/cub/cub/device/dispatch/dispatch_reduce.cuh +++ b/cub/cub/device/dispatch/dispatch_reduce.cuh @@ -195,7 +195,7 @@ __launch_bounds__(int(ChainedPolicyT::ActivePolicy::ReducePolicy::BLOCK_THREADS) // Output result if (threadIdx.x == 0) { - detail::uninitialized_copy(d_out + blockIdx.x, block_aggregate); + detail::uninitialized_copy_single(d_out + blockIdx.x, block_aggregate); } } diff --git a/cub/cub/util_type.cuh b/cub/cub/util_type.cuh index 5f065172d64..d8c03500081 100644 --- a/cub/cub/util_type.cuh +++ b/cub/cub/util_type.cuh @@ -303,7 +303,7 @@ struct InputValue } else { - detail::uninitialized_copy(&m_immediate_value, other.m_immediate_value); + detail::uninitialized_copy_single(&m_immediate_value, other.m_immediate_value); } }