From 59a6584bbfe4b6bf292456e8774704e4e1af86b7 Mon Sep 17 00:00:00 2001 From: Bernhard Manfred Gruber Date: Mon, 25 Nov 2024 15:26:57 +0100 Subject: [PATCH] Regenerate PTX test Overwrites all generated PTX tests and runs `pre-commit run --all-files` --- .../{barrier_cluster.inc => barrier_cluster.h} | 0 .../generated/{cp_async_bulk.inc => cp_async_bulk.h} | 0 ...commit_group.inc => cp_async_bulk_commit_group.h} | 0 ..._bulk_multicast.inc => cp_async_bulk_multicast.h} | 0 ..._async_bulk_tensor.inc => cp_async_bulk_tensor.h} | 0 ...ulticast.inc => cp_async_bulk_tensor_multicast.h} | 0 ...ulk_wait_group.inc => cp_async_bulk_wait_group.h} | 0 ..._reduce_async_bulk.inc => cp_reduce_async_bulk.h} | 0 ...ync_bulk_bf16.inc => cp_reduce_async_bulk_bf16.h} | 12 ++++++------ ...async_bulk_f16.inc => cp_reduce_async_bulk_f16.h} | 12 ++++++------ ...bulk_tensor.inc => cp_reduce_async_bulk_tensor.h} | 0 .../cuda/ptx/generated/{fence.inc => fence.h} | 0 ...fence_mbarrier_init.inc => fence_mbarrier_init.h} | 0 .../{fence_proxy_alias.inc => fence_proxy_alias.h} | 0 .../{fence_proxy_async.inc => fence_proxy_async.h} | 0 ...p_generic.inc => fence_proxy_tensormap_generic.h} | 0 .../cuda/ptx/generated/{get_sreg.inc => get_sreg.h} | 0 .../ptx/generated/{getctarank.inc => getctarank.h} | 0 .../{mbarrier_arrive.inc => mbarrier_arrive.h} | 0 ...ive_expect_tx.inc => mbarrier_arrive_expect_tx.h} | 0 ...no_complete.inc => mbarrier_arrive_no_complete.h} | 0 .../generated/{mbarrier_init.inc => mbarrier_init.h} | 0 .../{mbarrier_try_wait.inc => mbarrier_try_wait.h} | 0 ...ry_wait_parity.inc => mbarrier_try_wait_parity.h} | 0 .../generated/{mbarrier_wait.inc => mbarrier_wait.h} | 0 ...arrier_wait_parity.inc => mbarrier_wait_parity.h} | 0 .../ptx/generated/{red_async.inc => red_async.h} | 0 .../cuda/ptx/generated/{st_async.inc => st_async.h} | 0 ...p_cp_fenceproxy.inc => tensormap_cp_fenceproxy.h} | 0 .../{tensormap_replace.inc => tensormap_replace.h} | 0 .../cuda/ptx/ptx.barrier.cluster.compile.pass.cpp | 2 +- .../ptx.cp.async.bulk.commit_group.compile.pass.cpp | 2 +- .../cuda/ptx/ptx.cp.async.bulk.compile.pass.cpp | 2 +- .../ptx/ptx.cp.async.bulk.multicast.compile.pass.cpp | 2 +- .../ptx/ptx.cp.async.bulk.tensor.compile.pass.cpp | 2 +- ...x.cp.async.bulk.tensor.multicast.compile.pass.cpp | 2 +- .../ptx.cp.async.bulk.wait_group.compile.pass.cpp | 2 +- .../ptx/ptx.cp.reduce.async.bulk.compile.pass.cpp | 6 +++--- .../ptx.cp.reduce.async.bulk.tensor.compile.pass.cpp | 2 +- .../libcudacxx/cuda/ptx/ptx.fence.compile.pass.cpp | 10 +++++----- .../cuda/ptx/ptx.get_sreg.compile.pass.cpp | 2 +- .../cuda/ptx/ptx.getctarank.compile.pass.cpp | 2 +- .../cuda/ptx/ptx.mbarrier.arrive.compile.pass.cpp | 6 +++--- .../cuda/ptx/ptx.mbarrier.init.compile.pass.cpp | 2 +- .../cuda/ptx/ptx.mbarrier.wait.compile.pass.cpp | 8 ++++---- .../cuda/ptx/ptx.red.async.compile.pass.cpp | 2 +- .../cuda/ptx/ptx.st.async.compile.pass.cpp | 2 +- .../ptx/ptx.tensormap.cp_fenceproxy.compile.pass.cpp | 2 +- .../cuda/ptx/ptx.tensormap.replace.compile.pass.cpp | 2 +- 49 files changed, 42 insertions(+), 42 deletions(-) rename libcudacxx/test/libcudacxx/cuda/ptx/generated/{barrier_cluster.inc => barrier_cluster.h} (100%) rename libcudacxx/test/libcudacxx/cuda/ptx/generated/{cp_async_bulk.inc => cp_async_bulk.h} (100%) rename libcudacxx/test/libcudacxx/cuda/ptx/generated/{cp_async_bulk_commit_group.inc => cp_async_bulk_commit_group.h} (100%) rename libcudacxx/test/libcudacxx/cuda/ptx/generated/{cp_async_bulk_multicast.inc => cp_async_bulk_multicast.h} (100%) rename libcudacxx/test/libcudacxx/cuda/ptx/generated/{cp_async_bulk_tensor.inc => cp_async_bulk_tensor.h} (100%) rename libcudacxx/test/libcudacxx/cuda/ptx/generated/{cp_async_bulk_tensor_multicast.inc => cp_async_bulk_tensor_multicast.h} (100%) rename libcudacxx/test/libcudacxx/cuda/ptx/generated/{cp_async_bulk_wait_group.inc => cp_async_bulk_wait_group.h} (100%) rename libcudacxx/test/libcudacxx/cuda/ptx/generated/{cp_reduce_async_bulk.inc => cp_reduce_async_bulk.h} (100%) rename libcudacxx/test/libcudacxx/cuda/ptx/generated/{cp_reduce_async_bulk_bf16.inc => cp_reduce_async_bulk_bf16.h} (90%) rename libcudacxx/test/libcudacxx/cuda/ptx/generated/{cp_reduce_async_bulk_f16.inc => cp_reduce_async_bulk_f16.h} (87%) rename libcudacxx/test/libcudacxx/cuda/ptx/generated/{cp_reduce_async_bulk_tensor.inc => cp_reduce_async_bulk_tensor.h} (100%) rename libcudacxx/test/libcudacxx/cuda/ptx/generated/{fence.inc => fence.h} (100%) rename libcudacxx/test/libcudacxx/cuda/ptx/generated/{fence_mbarrier_init.inc => fence_mbarrier_init.h} (100%) rename libcudacxx/test/libcudacxx/cuda/ptx/generated/{fence_proxy_alias.inc => fence_proxy_alias.h} (100%) rename libcudacxx/test/libcudacxx/cuda/ptx/generated/{fence_proxy_async.inc => fence_proxy_async.h} (100%) rename libcudacxx/test/libcudacxx/cuda/ptx/generated/{fence_proxy_tensormap_generic.inc => fence_proxy_tensormap_generic.h} (100%) rename libcudacxx/test/libcudacxx/cuda/ptx/generated/{get_sreg.inc => get_sreg.h} (100%) rename libcudacxx/test/libcudacxx/cuda/ptx/generated/{getctarank.inc => getctarank.h} (100%) rename libcudacxx/test/libcudacxx/cuda/ptx/generated/{mbarrier_arrive.inc => mbarrier_arrive.h} (100%) rename libcudacxx/test/libcudacxx/cuda/ptx/generated/{mbarrier_arrive_expect_tx.inc => mbarrier_arrive_expect_tx.h} (100%) rename libcudacxx/test/libcudacxx/cuda/ptx/generated/{mbarrier_arrive_no_complete.inc => mbarrier_arrive_no_complete.h} (100%) rename libcudacxx/test/libcudacxx/cuda/ptx/generated/{mbarrier_init.inc => mbarrier_init.h} (100%) rename libcudacxx/test/libcudacxx/cuda/ptx/generated/{mbarrier_try_wait.inc => mbarrier_try_wait.h} (100%) rename libcudacxx/test/libcudacxx/cuda/ptx/generated/{mbarrier_try_wait_parity.inc => mbarrier_try_wait_parity.h} (100%) rename libcudacxx/test/libcudacxx/cuda/ptx/generated/{mbarrier_wait.inc => mbarrier_wait.h} (100%) rename libcudacxx/test/libcudacxx/cuda/ptx/generated/{mbarrier_wait_parity.inc => mbarrier_wait_parity.h} (100%) rename libcudacxx/test/libcudacxx/cuda/ptx/generated/{red_async.inc => red_async.h} (100%) rename libcudacxx/test/libcudacxx/cuda/ptx/generated/{st_async.inc => st_async.h} (100%) rename libcudacxx/test/libcudacxx/cuda/ptx/generated/{tensormap_cp_fenceproxy.inc => tensormap_cp_fenceproxy.h} (100%) rename libcudacxx/test/libcudacxx/cuda/ptx/generated/{tensormap_replace.inc => tensormap_replace.h} (100%) diff --git a/libcudacxx/test/libcudacxx/cuda/ptx/generated/barrier_cluster.inc b/libcudacxx/test/libcudacxx/cuda/ptx/generated/barrier_cluster.h similarity index 100% rename from libcudacxx/test/libcudacxx/cuda/ptx/generated/barrier_cluster.inc rename to libcudacxx/test/libcudacxx/cuda/ptx/generated/barrier_cluster.h diff --git a/libcudacxx/test/libcudacxx/cuda/ptx/generated/cp_async_bulk.inc b/libcudacxx/test/libcudacxx/cuda/ptx/generated/cp_async_bulk.h similarity index 100% rename from libcudacxx/test/libcudacxx/cuda/ptx/generated/cp_async_bulk.inc rename to libcudacxx/test/libcudacxx/cuda/ptx/generated/cp_async_bulk.h diff --git a/libcudacxx/test/libcudacxx/cuda/ptx/generated/cp_async_bulk_commit_group.inc b/libcudacxx/test/libcudacxx/cuda/ptx/generated/cp_async_bulk_commit_group.h similarity index 100% rename from libcudacxx/test/libcudacxx/cuda/ptx/generated/cp_async_bulk_commit_group.inc rename to libcudacxx/test/libcudacxx/cuda/ptx/generated/cp_async_bulk_commit_group.h diff --git a/libcudacxx/test/libcudacxx/cuda/ptx/generated/cp_async_bulk_multicast.inc b/libcudacxx/test/libcudacxx/cuda/ptx/generated/cp_async_bulk_multicast.h similarity index 100% rename from libcudacxx/test/libcudacxx/cuda/ptx/generated/cp_async_bulk_multicast.inc rename to libcudacxx/test/libcudacxx/cuda/ptx/generated/cp_async_bulk_multicast.h diff --git a/libcudacxx/test/libcudacxx/cuda/ptx/generated/cp_async_bulk_tensor.inc b/libcudacxx/test/libcudacxx/cuda/ptx/generated/cp_async_bulk_tensor.h similarity index 100% rename from libcudacxx/test/libcudacxx/cuda/ptx/generated/cp_async_bulk_tensor.inc rename to libcudacxx/test/libcudacxx/cuda/ptx/generated/cp_async_bulk_tensor.h diff --git a/libcudacxx/test/libcudacxx/cuda/ptx/generated/cp_async_bulk_tensor_multicast.inc b/libcudacxx/test/libcudacxx/cuda/ptx/generated/cp_async_bulk_tensor_multicast.h similarity index 100% rename from libcudacxx/test/libcudacxx/cuda/ptx/generated/cp_async_bulk_tensor_multicast.inc rename to libcudacxx/test/libcudacxx/cuda/ptx/generated/cp_async_bulk_tensor_multicast.h diff --git a/libcudacxx/test/libcudacxx/cuda/ptx/generated/cp_async_bulk_wait_group.inc b/libcudacxx/test/libcudacxx/cuda/ptx/generated/cp_async_bulk_wait_group.h similarity index 100% rename from libcudacxx/test/libcudacxx/cuda/ptx/generated/cp_async_bulk_wait_group.inc rename to libcudacxx/test/libcudacxx/cuda/ptx/generated/cp_async_bulk_wait_group.h diff --git a/libcudacxx/test/libcudacxx/cuda/ptx/generated/cp_reduce_async_bulk.inc b/libcudacxx/test/libcudacxx/cuda/ptx/generated/cp_reduce_async_bulk.h similarity index 100% rename from libcudacxx/test/libcudacxx/cuda/ptx/generated/cp_reduce_async_bulk.inc rename to libcudacxx/test/libcudacxx/cuda/ptx/generated/cp_reduce_async_bulk.h diff --git a/libcudacxx/test/libcudacxx/cuda/ptx/generated/cp_reduce_async_bulk_bf16.inc b/libcudacxx/test/libcudacxx/cuda/ptx/generated/cp_reduce_async_bulk_bf16.h similarity index 90% rename from libcudacxx/test/libcudacxx/cuda/ptx/generated/cp_reduce_async_bulk_bf16.inc rename to libcudacxx/test/libcudacxx/cuda/ptx/generated/cp_reduce_async_bulk_bf16.h index fe38374fe00..076806a70f8 100644 --- a/libcudacxx/test/libcudacxx/cuda/ptx/generated/cp_reduce_async_bulk_bf16.inc +++ b/libcudacxx/test/libcudacxx/cuda/ptx/generated/cp_reduce_async_bulk_bf16.h @@ -1,6 +1,6 @@ __global__ void test_cp_reduce_async_bulk_bf16(void** fn_ptr) { -# if __cccl_ptx_isa >= 800 +#if __cccl_ptx_isa >= 800 NV_IF_TARGET( NV_PROVIDES_SM_90, ( @@ -12,9 +12,9 @@ __global__ void test_cp_reduce_async_bulk_bf16(void** fn_ptr) __nv_bfloat16*, const __nv_bfloat16*, uint32_t)>(cuda::ptx::cp_reduce_async_bulk));)); -# endif // __cccl_ptx_isa >= 800 +#endif // __cccl_ptx_isa >= 800 -# if __cccl_ptx_isa >= 800 +#if __cccl_ptx_isa >= 800 NV_IF_TARGET( NV_PROVIDES_SM_90, ( @@ -26,9 +26,9 @@ __global__ void test_cp_reduce_async_bulk_bf16(void** fn_ptr) __nv_bfloat16*, const __nv_bfloat16*, uint32_t)>(cuda::ptx::cp_reduce_async_bulk));)); -# endif // __cccl_ptx_isa >= 800 +#endif // __cccl_ptx_isa >= 800 -# if __cccl_ptx_isa >= 800 +#if __cccl_ptx_isa >= 800 NV_IF_TARGET( NV_PROVIDES_SM_90, ( @@ -40,5 +40,5 @@ __global__ void test_cp_reduce_async_bulk_bf16(void** fn_ptr) __nv_bfloat16*, const __nv_bfloat16*, uint32_t)>(cuda::ptx::cp_reduce_async_bulk));)); -# endif // __cccl_ptx_isa >= 800 +#endif // __cccl_ptx_isa >= 800 } diff --git a/libcudacxx/test/libcudacxx/cuda/ptx/generated/cp_reduce_async_bulk_f16.inc b/libcudacxx/test/libcudacxx/cuda/ptx/generated/cp_reduce_async_bulk_f16.h similarity index 87% rename from libcudacxx/test/libcudacxx/cuda/ptx/generated/cp_reduce_async_bulk_f16.inc rename to libcudacxx/test/libcudacxx/cuda/ptx/generated/cp_reduce_async_bulk_f16.h index e7e58cfcb80..34f29b49b9c 100644 --- a/libcudacxx/test/libcudacxx/cuda/ptx/generated/cp_reduce_async_bulk_f16.inc +++ b/libcudacxx/test/libcudacxx/cuda/ptx/generated/cp_reduce_async_bulk_f16.h @@ -1,6 +1,6 @@ __global__ void test_cp_reduce_async_bulk_f16(void** fn_ptr) { -# if __cccl_ptx_isa >= 800 +#if __cccl_ptx_isa >= 800 NV_IF_TARGET( NV_PROVIDES_SM_90, ( @@ -9,9 +9,9 @@ __global__ void test_cp_reduce_async_bulk_f16(void** fn_ptr) static_cast( cuda::ptx::cp_reduce_async_bulk));)); -# endif // __cccl_ptx_isa >= 800 +#endif // __cccl_ptx_isa >= 800 -# if __cccl_ptx_isa >= 800 +#if __cccl_ptx_isa >= 800 NV_IF_TARGET( NV_PROVIDES_SM_90, ( @@ -20,9 +20,9 @@ __global__ void test_cp_reduce_async_bulk_f16(void** fn_ptr) static_cast( cuda::ptx::cp_reduce_async_bulk));)); -# endif // __cccl_ptx_isa >= 800 +#endif // __cccl_ptx_isa >= 800 -# if __cccl_ptx_isa >= 800 +#if __cccl_ptx_isa >= 800 NV_IF_TARGET( NV_PROVIDES_SM_90, ( @@ -31,5 +31,5 @@ __global__ void test_cp_reduce_async_bulk_f16(void** fn_ptr) static_cast( cuda::ptx::cp_reduce_async_bulk));)); -# endif // __cccl_ptx_isa >= 800 +#endif // __cccl_ptx_isa >= 800 } diff --git a/libcudacxx/test/libcudacxx/cuda/ptx/generated/cp_reduce_async_bulk_tensor.inc b/libcudacxx/test/libcudacxx/cuda/ptx/generated/cp_reduce_async_bulk_tensor.h similarity index 100% rename from libcudacxx/test/libcudacxx/cuda/ptx/generated/cp_reduce_async_bulk_tensor.inc rename to libcudacxx/test/libcudacxx/cuda/ptx/generated/cp_reduce_async_bulk_tensor.h diff --git a/libcudacxx/test/libcudacxx/cuda/ptx/generated/fence.inc b/libcudacxx/test/libcudacxx/cuda/ptx/generated/fence.h similarity index 100% rename from libcudacxx/test/libcudacxx/cuda/ptx/generated/fence.inc rename to libcudacxx/test/libcudacxx/cuda/ptx/generated/fence.h diff --git a/libcudacxx/test/libcudacxx/cuda/ptx/generated/fence_mbarrier_init.inc b/libcudacxx/test/libcudacxx/cuda/ptx/generated/fence_mbarrier_init.h similarity index 100% rename from libcudacxx/test/libcudacxx/cuda/ptx/generated/fence_mbarrier_init.inc rename to libcudacxx/test/libcudacxx/cuda/ptx/generated/fence_mbarrier_init.h diff --git a/libcudacxx/test/libcudacxx/cuda/ptx/generated/fence_proxy_alias.inc b/libcudacxx/test/libcudacxx/cuda/ptx/generated/fence_proxy_alias.h similarity index 100% rename from libcudacxx/test/libcudacxx/cuda/ptx/generated/fence_proxy_alias.inc rename to libcudacxx/test/libcudacxx/cuda/ptx/generated/fence_proxy_alias.h diff --git a/libcudacxx/test/libcudacxx/cuda/ptx/generated/fence_proxy_async.inc b/libcudacxx/test/libcudacxx/cuda/ptx/generated/fence_proxy_async.h similarity index 100% rename from libcudacxx/test/libcudacxx/cuda/ptx/generated/fence_proxy_async.inc rename to libcudacxx/test/libcudacxx/cuda/ptx/generated/fence_proxy_async.h diff --git a/libcudacxx/test/libcudacxx/cuda/ptx/generated/fence_proxy_tensormap_generic.inc b/libcudacxx/test/libcudacxx/cuda/ptx/generated/fence_proxy_tensormap_generic.h similarity index 100% rename from libcudacxx/test/libcudacxx/cuda/ptx/generated/fence_proxy_tensormap_generic.inc rename to libcudacxx/test/libcudacxx/cuda/ptx/generated/fence_proxy_tensormap_generic.h diff --git a/libcudacxx/test/libcudacxx/cuda/ptx/generated/get_sreg.inc b/libcudacxx/test/libcudacxx/cuda/ptx/generated/get_sreg.h similarity index 100% rename from libcudacxx/test/libcudacxx/cuda/ptx/generated/get_sreg.inc rename to libcudacxx/test/libcudacxx/cuda/ptx/generated/get_sreg.h diff --git a/libcudacxx/test/libcudacxx/cuda/ptx/generated/getctarank.inc b/libcudacxx/test/libcudacxx/cuda/ptx/generated/getctarank.h similarity index 100% rename from libcudacxx/test/libcudacxx/cuda/ptx/generated/getctarank.inc rename to libcudacxx/test/libcudacxx/cuda/ptx/generated/getctarank.h diff --git a/libcudacxx/test/libcudacxx/cuda/ptx/generated/mbarrier_arrive.inc b/libcudacxx/test/libcudacxx/cuda/ptx/generated/mbarrier_arrive.h similarity index 100% rename from libcudacxx/test/libcudacxx/cuda/ptx/generated/mbarrier_arrive.inc rename to libcudacxx/test/libcudacxx/cuda/ptx/generated/mbarrier_arrive.h diff --git a/libcudacxx/test/libcudacxx/cuda/ptx/generated/mbarrier_arrive_expect_tx.inc b/libcudacxx/test/libcudacxx/cuda/ptx/generated/mbarrier_arrive_expect_tx.h similarity index 100% rename from libcudacxx/test/libcudacxx/cuda/ptx/generated/mbarrier_arrive_expect_tx.inc rename to libcudacxx/test/libcudacxx/cuda/ptx/generated/mbarrier_arrive_expect_tx.h diff --git a/libcudacxx/test/libcudacxx/cuda/ptx/generated/mbarrier_arrive_no_complete.inc b/libcudacxx/test/libcudacxx/cuda/ptx/generated/mbarrier_arrive_no_complete.h similarity index 100% rename from libcudacxx/test/libcudacxx/cuda/ptx/generated/mbarrier_arrive_no_complete.inc rename to libcudacxx/test/libcudacxx/cuda/ptx/generated/mbarrier_arrive_no_complete.h diff --git a/libcudacxx/test/libcudacxx/cuda/ptx/generated/mbarrier_init.inc b/libcudacxx/test/libcudacxx/cuda/ptx/generated/mbarrier_init.h similarity index 100% rename from libcudacxx/test/libcudacxx/cuda/ptx/generated/mbarrier_init.inc rename to libcudacxx/test/libcudacxx/cuda/ptx/generated/mbarrier_init.h diff --git a/libcudacxx/test/libcudacxx/cuda/ptx/generated/mbarrier_try_wait.inc b/libcudacxx/test/libcudacxx/cuda/ptx/generated/mbarrier_try_wait.h similarity index 100% rename from libcudacxx/test/libcudacxx/cuda/ptx/generated/mbarrier_try_wait.inc rename to libcudacxx/test/libcudacxx/cuda/ptx/generated/mbarrier_try_wait.h diff --git a/libcudacxx/test/libcudacxx/cuda/ptx/generated/mbarrier_try_wait_parity.inc b/libcudacxx/test/libcudacxx/cuda/ptx/generated/mbarrier_try_wait_parity.h similarity index 100% rename from libcudacxx/test/libcudacxx/cuda/ptx/generated/mbarrier_try_wait_parity.inc rename to libcudacxx/test/libcudacxx/cuda/ptx/generated/mbarrier_try_wait_parity.h diff --git a/libcudacxx/test/libcudacxx/cuda/ptx/generated/mbarrier_wait.inc b/libcudacxx/test/libcudacxx/cuda/ptx/generated/mbarrier_wait.h similarity index 100% rename from libcudacxx/test/libcudacxx/cuda/ptx/generated/mbarrier_wait.inc rename to libcudacxx/test/libcudacxx/cuda/ptx/generated/mbarrier_wait.h diff --git a/libcudacxx/test/libcudacxx/cuda/ptx/generated/mbarrier_wait_parity.inc b/libcudacxx/test/libcudacxx/cuda/ptx/generated/mbarrier_wait_parity.h similarity index 100% rename from libcudacxx/test/libcudacxx/cuda/ptx/generated/mbarrier_wait_parity.inc rename to libcudacxx/test/libcudacxx/cuda/ptx/generated/mbarrier_wait_parity.h diff --git a/libcudacxx/test/libcudacxx/cuda/ptx/generated/red_async.inc b/libcudacxx/test/libcudacxx/cuda/ptx/generated/red_async.h similarity index 100% rename from libcudacxx/test/libcudacxx/cuda/ptx/generated/red_async.inc rename to libcudacxx/test/libcudacxx/cuda/ptx/generated/red_async.h diff --git a/libcudacxx/test/libcudacxx/cuda/ptx/generated/st_async.inc b/libcudacxx/test/libcudacxx/cuda/ptx/generated/st_async.h similarity index 100% rename from libcudacxx/test/libcudacxx/cuda/ptx/generated/st_async.inc rename to libcudacxx/test/libcudacxx/cuda/ptx/generated/st_async.h diff --git a/libcudacxx/test/libcudacxx/cuda/ptx/generated/tensormap_cp_fenceproxy.inc b/libcudacxx/test/libcudacxx/cuda/ptx/generated/tensormap_cp_fenceproxy.h similarity index 100% rename from libcudacxx/test/libcudacxx/cuda/ptx/generated/tensormap_cp_fenceproxy.inc rename to libcudacxx/test/libcudacxx/cuda/ptx/generated/tensormap_cp_fenceproxy.h diff --git a/libcudacxx/test/libcudacxx/cuda/ptx/generated/tensormap_replace.inc b/libcudacxx/test/libcudacxx/cuda/ptx/generated/tensormap_replace.h similarity index 100% rename from libcudacxx/test/libcudacxx/cuda/ptx/generated/tensormap_replace.inc rename to libcudacxx/test/libcudacxx/cuda/ptx/generated/tensormap_replace.h diff --git a/libcudacxx/test/libcudacxx/cuda/ptx/ptx.barrier.cluster.compile.pass.cpp b/libcudacxx/test/libcudacxx/cuda/ptx/ptx.barrier.cluster.compile.pass.cpp index c460a2e5b09..b54989848e4 100644 --- a/libcudacxx/test/libcudacxx/cuda/ptx/ptx.barrier.cluster.compile.pass.cpp +++ b/libcudacxx/test/libcudacxx/cuda/ptx/ptx.barrier.cluster.compile.pass.cpp @@ -31,7 +31,7 @@ * */ -#include "generated/barrier_cluster.inc" +#include "generated/barrier_cluster.h" int main(int, char**) { diff --git a/libcudacxx/test/libcudacxx/cuda/ptx/ptx.cp.async.bulk.commit_group.compile.pass.cpp b/libcudacxx/test/libcudacxx/cuda/ptx/ptx.cp.async.bulk.commit_group.compile.pass.cpp index 4695221dbc5..40295ce9816 100644 --- a/libcudacxx/test/libcudacxx/cuda/ptx/ptx.cp.async.bulk.commit_group.compile.pass.cpp +++ b/libcudacxx/test/libcudacxx/cuda/ptx/ptx.cp.async.bulk.commit_group.compile.pass.cpp @@ -31,7 +31,7 @@ * */ -#include "generated/cp_async_bulk_commit_group.inc" +#include "generated/cp_async_bulk_commit_group.h" int main(int, char**) { diff --git a/libcudacxx/test/libcudacxx/cuda/ptx/ptx.cp.async.bulk.compile.pass.cpp b/libcudacxx/test/libcudacxx/cuda/ptx/ptx.cp.async.bulk.compile.pass.cpp index b1811727b66..57db3fad66c 100644 --- a/libcudacxx/test/libcudacxx/cuda/ptx/ptx.cp.async.bulk.compile.pass.cpp +++ b/libcudacxx/test/libcudacxx/cuda/ptx/ptx.cp.async.bulk.compile.pass.cpp @@ -31,7 +31,7 @@ * */ -#include "generated/cp_async_bulk.inc" +#include "generated/cp_async_bulk.h" int main(int, char**) { diff --git a/libcudacxx/test/libcudacxx/cuda/ptx/ptx.cp.async.bulk.multicast.compile.pass.cpp b/libcudacxx/test/libcudacxx/cuda/ptx/ptx.cp.async.bulk.multicast.compile.pass.cpp index c040528cabc..40a3421854e 100644 --- a/libcudacxx/test/libcudacxx/cuda/ptx/ptx.cp.async.bulk.multicast.compile.pass.cpp +++ b/libcudacxx/test/libcudacxx/cuda/ptx/ptx.cp.async.bulk.multicast.compile.pass.cpp @@ -33,7 +33,7 @@ * */ -#include "generated/cp_async_bulk_multicast.inc" +#include "generated/cp_async_bulk_multicast.h" int main(int, char**) { diff --git a/libcudacxx/test/libcudacxx/cuda/ptx/ptx.cp.async.bulk.tensor.compile.pass.cpp b/libcudacxx/test/libcudacxx/cuda/ptx/ptx.cp.async.bulk.tensor.compile.pass.cpp index 0b69b8a8f1c..13f1a84d523 100644 --- a/libcudacxx/test/libcudacxx/cuda/ptx/ptx.cp.async.bulk.tensor.compile.pass.cpp +++ b/libcudacxx/test/libcudacxx/cuda/ptx/ptx.cp.async.bulk.tensor.compile.pass.cpp @@ -31,7 +31,7 @@ * */ -#include "generated/cp_async_bulk_tensor.inc" +#include "generated/cp_async_bulk_tensor.h" int main(int, char**) { diff --git a/libcudacxx/test/libcudacxx/cuda/ptx/ptx.cp.async.bulk.tensor.multicast.compile.pass.cpp b/libcudacxx/test/libcudacxx/cuda/ptx/ptx.cp.async.bulk.tensor.multicast.compile.pass.cpp index 7d53d9ee0c9..b8b26b22618 100644 --- a/libcudacxx/test/libcudacxx/cuda/ptx/ptx.cp.async.bulk.tensor.multicast.compile.pass.cpp +++ b/libcudacxx/test/libcudacxx/cuda/ptx/ptx.cp.async.bulk.tensor.multicast.compile.pass.cpp @@ -33,7 +33,7 @@ * */ -#include "generated/cp_async_bulk_tensor_multicast.inc" +#include "generated/cp_async_bulk_tensor_multicast.h" int main(int, char**) { diff --git a/libcudacxx/test/libcudacxx/cuda/ptx/ptx.cp.async.bulk.wait_group.compile.pass.cpp b/libcudacxx/test/libcudacxx/cuda/ptx/ptx.cp.async.bulk.wait_group.compile.pass.cpp index 39df53c5f9d..eff068cb319 100644 --- a/libcudacxx/test/libcudacxx/cuda/ptx/ptx.cp.async.bulk.wait_group.compile.pass.cpp +++ b/libcudacxx/test/libcudacxx/cuda/ptx/ptx.cp.async.bulk.wait_group.compile.pass.cpp @@ -31,7 +31,7 @@ * */ -#include "generated/cp_async_bulk_wait_group.inc" +#include "generated/cp_async_bulk_wait_group.h" int main(int, char**) { diff --git a/libcudacxx/test/libcudacxx/cuda/ptx/ptx.cp.reduce.async.bulk.compile.pass.cpp b/libcudacxx/test/libcudacxx/cuda/ptx/ptx.cp.reduce.async.bulk.compile.pass.cpp index a186e34a809..6dc98bdec0d 100644 --- a/libcudacxx/test/libcudacxx/cuda/ptx/ptx.cp.reduce.async.bulk.compile.pass.cpp +++ b/libcudacxx/test/libcudacxx/cuda/ptx/ptx.cp.reduce.async.bulk.compile.pass.cpp @@ -31,14 +31,14 @@ * */ -#include "generated/cp_reduce_async_bulk.inc" +#include "generated/cp_reduce_async_bulk.h" #ifdef _LIBCUDACXX_HAS_NVF16 -# include "generated/cp_reduce_async_bulk_f16.inc" +# include "generated/cp_reduce_async_bulk_f16.h" #endif // _LIBCUDACXX_HAS_NVF16 #ifdef _LIBCUDACXX_HAS_NVBF16 -# include "generated/cp_reduce_async_bulk_bf16.inc" +# include "generated/cp_reduce_async_bulk_bf16.h" #endif // _LIBCUDACXX_HAS_NVBF16 int main(int, char**) diff --git a/libcudacxx/test/libcudacxx/cuda/ptx/ptx.cp.reduce.async.bulk.tensor.compile.pass.cpp b/libcudacxx/test/libcudacxx/cuda/ptx/ptx.cp.reduce.async.bulk.tensor.compile.pass.cpp index 14abc0d3ae6..6ada1b5ef32 100644 --- a/libcudacxx/test/libcudacxx/cuda/ptx/ptx.cp.reduce.async.bulk.tensor.compile.pass.cpp +++ b/libcudacxx/test/libcudacxx/cuda/ptx/ptx.cp.reduce.async.bulk.tensor.compile.pass.cpp @@ -31,7 +31,7 @@ * */ -#include "generated/cp_reduce_async_bulk_tensor.inc" +#include "generated/cp_reduce_async_bulk_tensor.h" int main(int, char**) { diff --git a/libcudacxx/test/libcudacxx/cuda/ptx/ptx.fence.compile.pass.cpp b/libcudacxx/test/libcudacxx/cuda/ptx/ptx.fence.compile.pass.cpp index 641cb83f172..7764261d6f4 100644 --- a/libcudacxx/test/libcudacxx/cuda/ptx/ptx.fence.compile.pass.cpp +++ b/libcudacxx/test/libcudacxx/cuda/ptx/ptx.fence.compile.pass.cpp @@ -31,11 +31,11 @@ * */ -#include "generated/fence.inc" -#include "generated/fence_mbarrier_init.inc" -#include "generated/fence_proxy_alias.inc" -#include "generated/fence_proxy_async.inc" -#include "generated/fence_proxy_tensormap_generic.inc" +#include "generated/fence.h" +#include "generated/fence_mbarrier_init.h" +#include "generated/fence_proxy_alias.h" +#include "generated/fence_proxy_async.h" +#include "generated/fence_proxy_tensormap_generic.h" int main(int, char**) { diff --git a/libcudacxx/test/libcudacxx/cuda/ptx/ptx.get_sreg.compile.pass.cpp b/libcudacxx/test/libcudacxx/cuda/ptx/ptx.get_sreg.compile.pass.cpp index 697cc00a1be..defadf3b445 100644 --- a/libcudacxx/test/libcudacxx/cuda/ptx/ptx.get_sreg.compile.pass.cpp +++ b/libcudacxx/test/libcudacxx/cuda/ptx/ptx.get_sreg.compile.pass.cpp @@ -32,7 +32,7 @@ * */ -#include "generated/get_sreg.inc" +#include "generated/get_sreg.h" int main(int, char**) { diff --git a/libcudacxx/test/libcudacxx/cuda/ptx/ptx.getctarank.compile.pass.cpp b/libcudacxx/test/libcudacxx/cuda/ptx/ptx.getctarank.compile.pass.cpp index 80fc71c0998..da8209a0c7e 100644 --- a/libcudacxx/test/libcudacxx/cuda/ptx/ptx.getctarank.compile.pass.cpp +++ b/libcudacxx/test/libcudacxx/cuda/ptx/ptx.getctarank.compile.pass.cpp @@ -31,7 +31,7 @@ * */ -#include "generated/getctarank.inc" +#include "generated/getctarank.h" int main(int, char**) { diff --git a/libcudacxx/test/libcudacxx/cuda/ptx/ptx.mbarrier.arrive.compile.pass.cpp b/libcudacxx/test/libcudacxx/cuda/ptx/ptx.mbarrier.arrive.compile.pass.cpp index 2350b176630..15a0d4d4b79 100644 --- a/libcudacxx/test/libcudacxx/cuda/ptx/ptx.mbarrier.arrive.compile.pass.cpp +++ b/libcudacxx/test/libcudacxx/cuda/ptx/ptx.mbarrier.arrive.compile.pass.cpp @@ -31,9 +31,9 @@ * */ -#include "generated/mbarrier_arrive.inc" -#include "generated/mbarrier_arrive_expect_tx.inc" -#include "generated/mbarrier_arrive_no_complete.inc" +#include "generated/mbarrier_arrive.h" +#include "generated/mbarrier_arrive_expect_tx.h" +#include "generated/mbarrier_arrive_no_complete.h" int main(int, char**) { diff --git a/libcudacxx/test/libcudacxx/cuda/ptx/ptx.mbarrier.init.compile.pass.cpp b/libcudacxx/test/libcudacxx/cuda/ptx/ptx.mbarrier.init.compile.pass.cpp index b445a61a8a9..7673d49c784 100644 --- a/libcudacxx/test/libcudacxx/cuda/ptx/ptx.mbarrier.init.compile.pass.cpp +++ b/libcudacxx/test/libcudacxx/cuda/ptx/ptx.mbarrier.init.compile.pass.cpp @@ -31,7 +31,7 @@ * */ -#include "generated/mbarrier_init.inc" +#include "generated/mbarrier_init.h" int main(int, char**) { diff --git a/libcudacxx/test/libcudacxx/cuda/ptx/ptx.mbarrier.wait.compile.pass.cpp b/libcudacxx/test/libcudacxx/cuda/ptx/ptx.mbarrier.wait.compile.pass.cpp index e9c17a2024d..6cfb7241847 100644 --- a/libcudacxx/test/libcudacxx/cuda/ptx/ptx.mbarrier.wait.compile.pass.cpp +++ b/libcudacxx/test/libcudacxx/cuda/ptx/ptx.mbarrier.wait.compile.pass.cpp @@ -31,10 +31,10 @@ * */ -#include "generated/mbarrier_try_wait.inc" -#include "generated/mbarrier_try_wait_parity.inc" -#include "generated/mbarrier_wait.inc" -#include "generated/mbarrier_wait_parity.inc" +#include "generated/mbarrier_try_wait.h" +#include "generated/mbarrier_try_wait_parity.h" +#include "generated/mbarrier_wait.h" +#include "generated/mbarrier_wait_parity.h" int main(int, char**) { diff --git a/libcudacxx/test/libcudacxx/cuda/ptx/ptx.red.async.compile.pass.cpp b/libcudacxx/test/libcudacxx/cuda/ptx/ptx.red.async.compile.pass.cpp index 4a380ec8396..ea07a2149b8 100644 --- a/libcudacxx/test/libcudacxx/cuda/ptx/ptx.red.async.compile.pass.cpp +++ b/libcudacxx/test/libcudacxx/cuda/ptx/ptx.red.async.compile.pass.cpp @@ -31,7 +31,7 @@ * */ -#include "generated/red_async.inc" +#include "generated/red_async.h" int main(int, char**) { diff --git a/libcudacxx/test/libcudacxx/cuda/ptx/ptx.st.async.compile.pass.cpp b/libcudacxx/test/libcudacxx/cuda/ptx/ptx.st.async.compile.pass.cpp index 2c74f48e04d..8d61cb42909 100644 --- a/libcudacxx/test/libcudacxx/cuda/ptx/ptx.st.async.compile.pass.cpp +++ b/libcudacxx/test/libcudacxx/cuda/ptx/ptx.st.async.compile.pass.cpp @@ -31,7 +31,7 @@ * */ -#include "generated/st_async.inc" +#include "generated/st_async.h" int main(int, char**) { diff --git a/libcudacxx/test/libcudacxx/cuda/ptx/ptx.tensormap.cp_fenceproxy.compile.pass.cpp b/libcudacxx/test/libcudacxx/cuda/ptx/ptx.tensormap.cp_fenceproxy.compile.pass.cpp index d0d3a967836..2beb418b53f 100644 --- a/libcudacxx/test/libcudacxx/cuda/ptx/ptx.tensormap.cp_fenceproxy.compile.pass.cpp +++ b/libcudacxx/test/libcudacxx/cuda/ptx/ptx.tensormap.cp_fenceproxy.compile.pass.cpp @@ -31,7 +31,7 @@ * */ -#include "generated/tensormap_cp_fenceproxy.inc" +#include "generated/tensormap_cp_fenceproxy.h" int main(int, char**) { diff --git a/libcudacxx/test/libcudacxx/cuda/ptx/ptx.tensormap.replace.compile.pass.cpp b/libcudacxx/test/libcudacxx/cuda/ptx/ptx.tensormap.replace.compile.pass.cpp index d780ff26dca..a80564c8cf0 100644 --- a/libcudacxx/test/libcudacxx/cuda/ptx/ptx.tensormap.replace.compile.pass.cpp +++ b/libcudacxx/test/libcudacxx/cuda/ptx/ptx.tensormap.replace.compile.pass.cpp @@ -31,7 +31,7 @@ * */ -#include "generated/tensormap_replace.inc" +#include "generated/tensormap_replace.h" int main(int, char**) {