@@ -209,7 +209,7 @@ T PrefixSum_mp (N n, FIN const& fin, FOUT const& fout, TYPE, RetSum a_ret_sum)
209
209
T* blocksum_p = (T*)(dp + nbytes_blockresult);
210
210
T* totalsum_p = (T*)(dp + nbytes_blockresult + nbytes_blocksum);
211
211
212
- amrex::launch (nblocks, nthreads , sm, stream,
212
+ amrex::launch<nthreads> (nblocks, sm, stream,
213
213
[=] AMREX_GPU_DEVICE (Gpu::Handler const & gh) noexcept
214
214
{
215
215
sycl::sub_group const & sg = gh.item ->get_sub_group ();
@@ -289,7 +289,7 @@ T PrefixSum_mp (N n, FIN const& fin, FOUT const& fout, TYPE, RetSum a_ret_sum)
289
289
}
290
290
});
291
291
292
- amrex::launch ( 1 , nthreads , sm, stream,
292
+ amrex::launch<nthreads>( 1 , sm, stream,
293
293
[=] AMREX_GPU_DEVICE (Gpu::Handler const & gh) noexcept
294
294
{
295
295
sycl::sub_group const & sg = gh.item ->get_sub_group ();
@@ -355,7 +355,7 @@ T PrefixSum_mp (N n, FIN const& fin, FOUT const& fout, TYPE, RetSum a_ret_sum)
355
355
}
356
356
});
357
357
358
- amrex::launch (nblocks, nthreads , 0 , stream,
358
+ amrex::launch<nthreads> (nblocks, 0 , stream,
359
359
[=] AMREX_GPU_DEVICE (Gpu::Handler const & gh) noexcept
360
360
{
361
361
int threadIdxx = gh.item ->get_local_id (0 );
@@ -429,7 +429,7 @@ T PrefixSum (N n, FIN && fin, FOUT && fout, TYPE type, RetSum a_ret_sum = retSum
429
429
}
430
430
});
431
431
432
- amrex::launch (nblocks, nthreads , sm, stream,
432
+ amrex::launch<nthreads> (nblocks, sm, stream,
433
433
[=] AMREX_GPU_DEVICE (Gpu::Handler const & gh) noexcept
434
434
{
435
435
sycl::sub_group const & sg = gh.item ->get_sub_group ();
@@ -672,7 +672,7 @@ T PrefixSum (N n, FIN const& fin, FOUT const& fout, TYPE, RetSum a_ret_sum = ret
672
672
(reinterpret_cast <OrderedBlockId::id_type*>(dp + nbytes_tile_state));
673
673
674
674
// Init ScanTileState on device
675
- amrex::launch ((nblocks+nthreads-1 )/nthreads, nthreads, 0 , stream, [=] AMREX_GPU_DEVICE ()
675
+ amrex::launch<nthreads> ((nblocks+nthreads-1 )/nthreads, 0 , stream, [=] AMREX_GPU_DEVICE ()
676
676
{
677
677
auto & scan_tile_state = const_cast <ScanTileState&>(tile_state);
678
678
auto & scan_bid = const_cast <OrderedBlockId&>(ordered_block_id);
@@ -813,7 +813,7 @@ T PrefixSum (N n, FIN const& fin, FOUT const& fout, TYPE, RetSum a_ret_sum = ret
813
813
814
814
if (nblocks > 1 ) {
815
815
// Init ScanTileState on device
816
- amrex::launch ((nblocks+nthreads-1 )/nthreads, nthreads, 0 , stream, [=] AMREX_GPU_DEVICE ()
816
+ amrex::launch<nthreads> ((nblocks+nthreads-1 )/nthreads, 0 , stream, [=] AMREX_GPU_DEVICE ()
817
817
{
818
818
const_cast <ScanTileState&>(tile_state).InitializeStatus (nblocks);
819
819
});
@@ -957,7 +957,7 @@ T PrefixSum (N n, FIN const& fin, FOUT const& fout, TYPE, RetSum a_ret_sum = ret
957
957
}
958
958
});
959
959
960
- amrex::launch (nblocks, nthreads , sm, stream,
960
+ amrex::launch<nthreads> (nblocks, sm, stream,
961
961
[=] AMREX_GPU_DEVICE () noexcept
962
962
{
963
963
int lane = threadIdx.x % Gpu::Device::warp_size;
0 commit comments