-
Notifications
You must be signed in to change notification settings - Fork 190
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Overwrites all generated PTX tests and runs `pre-commit run --all-files`
- Loading branch information
1 parent
0b36a7d
commit eba90a1
Showing
55 changed files
with
550 additions
and
425 deletions.
There are no files selected for viewing
16 changes: 16 additions & 0 deletions
16
...xx/cuda/ptx/generated/barrier_cluster.inc → ...acxx/cuda/ptx/generated/barrier_cluster.h
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
16 changes: 16 additions & 0 deletions
16
...acxx/cuda/ptx/generated/cp_async_bulk.inc → ...udacxx/cuda/ptx/generated/cp_async_bulk.h
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
26 changes: 26 additions & 0 deletions
26
libcudacxx/test/libcudacxx/cuda/ptx/generated/cp_async_bulk_commit_group.h
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,26 @@ | ||
// This file was automatically generated. Do not edit. | ||
|
||
// We use a special strategy to force the generation of the PTX. This is mainly | ||
// a fight against dead-code-elimination in the NVVM layer. | ||
// | ||
// The reason we need this strategy is because certain older versions of ptxas | ||
// segfault when a non-sensical sequence of PTX is generated. So instead, we try | ||
// to force the instantiation and compilation to PTX of all the overloads of the | ||
// PTX wrapping functions. | ||
// | ||
// We do this by writing a function pointer of each overload to the kernel | ||
// parameter `fn_ptr`. | ||
// | ||
// Because `fn_ptr` is possibly visible outside this translation unit, the | ||
// compiler must compile all the functions which are stored. | ||
|
||
__global__ void test_cp_async_bulk_commit_group(void** fn_ptr) | ||
{ | ||
#if __cccl_ptx_isa >= 800 | ||
NV_IF_TARGET( | ||
NV_PROVIDES_SM_90, | ||
( | ||
// cp.async.bulk.commit_group; | ||
* fn_ptr++ = reinterpret_cast<void*>(static_cast<void (*)()>(cuda::ptx::cp_async_bulk_commit_group));)); | ||
#endif // __cccl_ptx_isa >= 800 | ||
} |
10 changes: 0 additions & 10 deletions
10
libcudacxx/test/libcudacxx/cuda/ptx/generated/cp_async_bulk_commit_group.inc
This file was deleted.
Oops, something went wrong.
16 changes: 16 additions & 0 deletions
16
...ptx/generated/cp_async_bulk_multicast.inc → ...a/ptx/generated/cp_async_bulk_multicast.h
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
16 changes: 16 additions & 0 deletions
16
...da/ptx/generated/cp_async_bulk_tensor.inc → ...cuda/ptx/generated/cp_async_bulk_tensor.h
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
16 changes: 16 additions & 0 deletions
16
...erated/cp_async_bulk_tensor_multicast.inc → ...enerated/cp_async_bulk_tensor_multicast.h
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
16 changes: 16 additions & 0 deletions
16
...tx/generated/cp_async_bulk_wait_group.inc → .../ptx/generated/cp_async_bulk_wait_group.h
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
16 changes: 16 additions & 0 deletions
16
...da/ptx/generated/cp_reduce_async_bulk.inc → ...cuda/ptx/generated/cp_reduce_async_bulk.h
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
16 changes: 16 additions & 0 deletions
16
...generated/cp_reduce_async_bulk_tensor.inc → ...x/generated/cp_reduce_async_bulk_tensor.h
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
16 changes: 16 additions & 0 deletions
16
...t/libcudacxx/cuda/ptx/generated/fence.inc → ...est/libcudacxx/cuda/ptx/generated/fence.h
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
27 changes: 27 additions & 0 deletions
27
libcudacxx/test/libcudacxx/cuda/ptx/generated/fence_mbarrier_init.h
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,27 @@ | ||
// This file was automatically generated. Do not edit. | ||
|
||
// We use a special strategy to force the generation of the PTX. This is mainly | ||
// a fight against dead-code-elimination in the NVVM layer. | ||
// | ||
// The reason we need this strategy is because certain older versions of ptxas | ||
// segfault when a non-sensical sequence of PTX is generated. So instead, we try | ||
// to force the instantiation and compilation to PTX of all the overloads of the | ||
// PTX wrapping functions. | ||
// | ||
// We do this by writing a function pointer of each overload to the kernel | ||
// parameter `fn_ptr`. | ||
// | ||
// Because `fn_ptr` is possibly visible outside this translation unit, the | ||
// compiler must compile all the functions which are stored. | ||
|
||
__global__ void test_fence_mbarrier_init(void** fn_ptr) | ||
{ | ||
#if __cccl_ptx_isa >= 800 | ||
NV_IF_TARGET( | ||
NV_PROVIDES_SM_90, | ||
( | ||
// fence.mbarrier_init.release.cluster; // 3. | ||
* fn_ptr++ = reinterpret_cast<void*>(static_cast<void (*)(cuda::ptx::sem_release_t, cuda::ptx::scope_cluster_t)>( | ||
cuda::ptx::fence_mbarrier_init));)); | ||
#endif // __cccl_ptx_isa >= 800 | ||
} |
11 changes: 0 additions & 11 deletions
11
libcudacxx/test/libcudacxx/cuda/ptx/generated/fence_mbarrier_init.inc
This file was deleted.
Oops, something went wrong.
25 changes: 25 additions & 0 deletions
25
libcudacxx/test/libcudacxx/cuda/ptx/generated/fence_proxy_alias.h
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,25 @@ | ||
// This file was automatically generated. Do not edit. | ||
|
||
// We use a special strategy to force the generation of the PTX. This is mainly | ||
// a fight against dead-code-elimination in the NVVM layer. | ||
// | ||
// The reason we need this strategy is because certain older versions of ptxas | ||
// segfault when a non-sensical sequence of PTX is generated. So instead, we try | ||
// to force the instantiation and compilation to PTX of all the overloads of the | ||
// PTX wrapping functions. | ||
// | ||
// We do this by writing a function pointer of each overload to the kernel | ||
// parameter `fn_ptr`. | ||
// | ||
// Because `fn_ptr` is possibly visible outside this translation unit, the | ||
// compiler must compile all the functions which are stored. | ||
|
||
__global__ void test_fence_proxy_alias(void** fn_ptr) | ||
{ | ||
#if __cccl_ptx_isa >= 750 | ||
NV_IF_TARGET(NV_PROVIDES_SM_70, | ||
( | ||
// fence.proxy.alias; // 4. | ||
* fn_ptr++ = reinterpret_cast<void*>(static_cast<void (*)()>(cuda::ptx::fence_proxy_alias));)); | ||
#endif // __cccl_ptx_isa >= 750 | ||
} |
Oops, something went wrong.