Skip to content

Commit

Permalink
Fix mbarrier.init addressing (#1636)
Browse files Browse the repository at this point in the history
* Fix mbarrier.init addressing
  • Loading branch information
ahendriksen authored Apr 16, 2024
1 parent 285cb15 commit cefa6b8
Show file tree
Hide file tree
Showing 3 changed files with 5 additions and 5 deletions.
4 changes: 2 additions & 2 deletions libcudacxx/docs/ptx/instructions/mbarrier.init.md
Original file line number Diff line number Diff line change
Expand Up @@ -3,13 +3,13 @@
- PTX ISA: [`mbarrier.arrive`](https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#parallel-synchronization-and-communication-instructions-mbarrier-init)

| C++ | PTX |
| [(0)](#0-mbarrier_init) `cuda::ptx::mbarrier_init`| `mbarrier.init.b64` |
| [(0)](#0-mbarrier_init) `cuda::ptx::mbarrier_init`| `mbarrier.init.shared.b64` |


### [(0)](#0-mbarrier_init) `mbarrier_init`
{: .no_toc }
```cuda
// mbarrier.init.b64 [addr], count; // PTX ISA 70, SM_80
// mbarrier.init.shared.b64 [addr], count; // PTX ISA 70, SM_80
template <typename=void>
__device__ static inline void mbarrier_init(
uint64_t* addr,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ _LIBCUDACXX_BEGIN_NAMESPACE_CUDA_PTX
// 9.7.12.15.9. Parallel Synchronization and Communication Instructions: mbarrier.init
// https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#parallel-synchronization-and-communication-instructions-mbarrier-init
/*
// mbarrier.init.b64 [addr], count; // PTX ISA 70, SM_80
// mbarrier.init.shared.b64 [addr], count; // PTX ISA 70, SM_80
template <typename=void>
__device__ static inline void mbarrier_init(
uint64_t* addr,
Expand All @@ -46,7 +46,7 @@ _CCCL_DEVICE static inline void mbarrier_init(_CUDA_VSTD::uint64_t* __addr, cons
{
NV_IF_ELSE_TARGET(
NV_PROVIDES_SM_80,
(asm("mbarrier.init.b64 [%0], %1;"
(asm("mbarrier.init.shared.b64 [%0], %1;"
:
: "r"(__as_ptr_smem(__addr)), "r"(__count)
: "memory");),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ __global__ void test_mbarrier_init(void** fn_ptr)
#if __cccl_ptx_isa >= 700
NV_IF_TARGET(NV_PROVIDES_SM_80,
(
// mbarrier.init.b64 [addr], count;
// mbarrier.init.shared.b64 [addr], count;
* fn_ptr++ = reinterpret_cast<void*>(
static_cast<void (*)(uint64_t*, const uint32_t&)>(cuda::ptx::mbarrier_init));));
#endif // __cccl_ptx_isa >= 700
Expand Down

0 comments on commit cefa6b8

Please sign in to comment.