Skip to content

Commit

Permalink
Add missing non-volatile atomic overloads. (#1582)
Browse files Browse the repository at this point in the history
* Update codegen to output non-volatile atomic backends

* Switch atomic interface decorator to `_LIBCUDACXX_HOST_DEVICE`

* Add tests and fixes for volatile/non-volatile atomic CAS overloads.

* Add atomic codegen tests.

* Currently only enabled if FileCheck is found.
* Add a preset maybe to enable building this specifically?
* TODO: Maybe generate the tests.

* Create non-volatile overloads for all base atomic functions.

* Add `_LIBCUDACXX_FALLTHROUGH` to every atomic backend switch.

* Make atomic_ref APIs non-volatile

* Delete tests for volatile atomic_ref
  • Loading branch information
wmaxey authored Apr 10, 2024
1 parent eefcca8 commit bff5288
Show file tree
Hide file tree
Showing 17 changed files with 3,913 additions and 690 deletions.
60 changes: 30 additions & 30 deletions libcudacxx/codegen/codegen.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,7 @@ int main() {
{ "bitwise", std::map<std::string, std::string>{ { "", ".b" } } },
{ "arithmetic", std::map<std::string, std::string>{ { "u", ".u" }, { "s", ".s" }, { "f", ".f" } } } };

std::vector<std::string> cv_qualifier{ "volatile "/*, ""*/ };
std::vector<std::string> cv_qualifier{ "volatile ", "" };

std::ofstream out("atomic_cuda_generated.h");

Expand Down Expand Up @@ -107,20 +107,20 @@ int main() {
out << " NV_PROVIDES_SM_70, (\n";
out << " switch (__memorder) {\n";
out << " case __ATOMIC_SEQ_CST: " << fencename("sc"s, s.first) << "(); break;\n";
out << " case __ATOMIC_CONSUME:\n";
out << " case __ATOMIC_ACQUIRE:\n";
out << " case __ATOMIC_ACQ_REL:\n";
out << " case __ATOMIC_CONSUME: _LIBCUDACXX_FALLTHROUGH();\n";
out << " case __ATOMIC_ACQUIRE: _LIBCUDACXX_FALLTHROUGH();\n";
out << " case __ATOMIC_ACQ_REL: _LIBCUDACXX_FALLTHROUGH();\n";
out << " case __ATOMIC_RELEASE: " << fencename("acq_rel"s, s.first) << "(); break;\n";
out << " case __ATOMIC_RELAXED: break;\n";
out << " default: assert(0);\n";
out << " }\n";
out << " ),\n";
out << " NV_IS_DEVICE, (\n";
out << " switch (__memorder) {\n";
out << " case __ATOMIC_SEQ_CST:\n";
out << " case __ATOMIC_CONSUME:\n";
out << " case __ATOMIC_ACQUIRE:\n";
out << " case __ATOMIC_ACQ_REL:\n";
out << " case __ATOMIC_SEQ_CST: _LIBCUDACXX_FALLTHROUGH();\n";
out << " case __ATOMIC_CONSUME: _LIBCUDACXX_FALLTHROUGH();\n";
out << " case __ATOMIC_ACQUIRE: _LIBCUDACXX_FALLTHROUGH();\n";
out << " case __ATOMIC_ACQ_REL: _LIBCUDACXX_FALLTHROUGH();\n";
out << " case __ATOMIC_RELEASE: __cuda_membar_" << s.first << "(); break;\n";
out << " case __ATOMIC_RELAXED: break;\n";
out << " default: assert(0);\n";
Expand All @@ -146,17 +146,17 @@ int main() {
out << " NV_DISPATCH_TARGET(\n";
out << " NV_PROVIDES_SM_70, (\n";
out << " switch (__memorder) {\n";
out << " case __ATOMIC_SEQ_CST: " << fencename("sc"s, s.first) << "();\n";
out << " case __ATOMIC_CONSUME:\n";
out << " case __ATOMIC_SEQ_CST: " << fencename("sc"s, s.first) << "(); _LIBCUDACXX_FALLTHROUGH();\n";
out << " case __ATOMIC_CONSUME: _LIBCUDACXX_FALLTHROUGH();\n";
out << " case __ATOMIC_ACQUIRE: __cuda_load_acquire_" << sz << "_" << s.first << "(__ptr, __tmp); break;\n";
out << " case __ATOMIC_RELAXED: __cuda_load_relaxed_" << sz << "_" << s.first << "(__ptr, __tmp); break;\n";
out << " default: assert(0);\n";
out << " }\n";
out << " ),\n";
out << " NV_IS_DEVICE, (\n";
out << " switch (__memorder) {\n";
out << " case __ATOMIC_SEQ_CST: __cuda_membar_" << s.first << "();\n";
out << " case __ATOMIC_CONSUME:\n";
out << " case __ATOMIC_SEQ_CST: __cuda_membar_" << s.first << "(); _LIBCUDACXX_FALLTHROUGH();\n";
out << " case __ATOMIC_CONSUME: _LIBCUDACXX_FALLTHROUGH();\n";
out << " case __ATOMIC_ACQUIRE: __cuda_load_volatile_" << sz << "_" << s.first << "(__ptr, __tmp); __cuda_membar_" << s.first << "(); break;\n";
out << " case __ATOMIC_RELAXED: __cuda_load_volatile_" << sz << "_" << s.first << "(__ptr, __tmp); break;\n";
out << " default: assert(0);\n";
Expand Down Expand Up @@ -184,15 +184,15 @@ int main() {
out << " NV_PROVIDES_SM_70, (\n";
out << " switch (__memorder) {\n";
out << " case __ATOMIC_RELEASE: __cuda_store_release_" << sz << "_" << s.first << "(__ptr, __tmp); break;\n";
out << " case __ATOMIC_SEQ_CST: " << fencename("sc"s, s.first) << "();\n";
out << " case __ATOMIC_SEQ_CST: " << fencename("sc"s, s.first) << "(); _LIBCUDACXX_FALLTHROUGH();\n";
out << " case __ATOMIC_RELAXED: __cuda_store_relaxed_" << sz << "_" << s.first << "(__ptr, __tmp); break;\n";
out << " default: assert(0);\n";
out << " }\n";
out << " ),\n";
out << " NV_IS_DEVICE, (\n";
out << " switch (__memorder) {\n";
out << " case __ATOMIC_RELEASE:\n";
out << " case __ATOMIC_SEQ_CST: __cuda_membar_" << s.first << "();\n";
out << " case __ATOMIC_RELEASE: _LIBCUDACXX_FALLTHROUGH();\n";
out << " case __ATOMIC_SEQ_CST: __cuda_membar_" << s.first << "(); _LIBCUDACXX_FALLTHROUGH();\n";
out << " case __ATOMIC_RELAXED: __cuda_store_volatile_" << sz << "_" << s.first << "(__ptr, __tmp); break;\n";
out << " default: assert(0);\n";
out << " }\n";
Expand Down Expand Up @@ -246,8 +246,8 @@ int main() {
out << " NV_DISPATCH_TARGET(\n";
out << " NV_PROVIDES_SM_70, (\n";
out << " switch (__stronger_order_cuda(__success_memorder, __failure_memorder)) {\n";
out << " case __ATOMIC_SEQ_CST: " << fencename("sc"s, s.first) << "();\n";
out << " case __ATOMIC_CONSUME:\n";
out << " case __ATOMIC_SEQ_CST: " << fencename("sc"s, s.first) << "(); _LIBCUDACXX_FALLTHROUGH();\n";
out << " case __ATOMIC_CONSUME: _LIBCUDACXX_FALLTHROUGH();\n";
out << " case __ATOMIC_ACQUIRE: __cuda_compare_exchange_acquire_" << type.first << sz << "_" << s.first << "(__ptr, __old, __old_tmp, __tmp); break;\n";
out << " case __ATOMIC_ACQ_REL: __cuda_compare_exchange_acq_rel_" << type.first << sz << "_" << s.first << "(__ptr, __old, __old_tmp, __tmp); break;\n";
out << " case __ATOMIC_RELEASE: __cuda_compare_exchange_release_" << type.first << sz << "_" << s.first << "(__ptr, __old, __old_tmp, __tmp); break;\n";
Expand All @@ -257,9 +257,9 @@ int main() {
out << " ),\n";
out << " NV_IS_DEVICE, (\n";
out << " switch (__stronger_order_cuda(__success_memorder, __failure_memorder)) {\n";
out << " case __ATOMIC_SEQ_CST:\n";
out << " case __ATOMIC_ACQ_REL: __cuda_membar_" << s.first << "();\n";
out << " case __ATOMIC_CONSUME:\n";
out << " case __ATOMIC_SEQ_CST: _LIBCUDACXX_FALLTHROUGH();\n";
out << " case __ATOMIC_ACQ_REL: __cuda_membar_" << s.first << "(); _LIBCUDACXX_FALLTHROUGH();\n";
out << " case __ATOMIC_CONSUME: _LIBCUDACXX_FALLTHROUGH();\n";
out << " case __ATOMIC_ACQUIRE: __cuda_compare_exchange_volatile_" << type.first << sz << "_" << s.first << "(__ptr, __old, __old_tmp, __tmp); __cuda_membar_" << s.first << "(); break;\n";
out << " case __ATOMIC_RELEASE: __cuda_membar_" << s.first << "(); __cuda_compare_exchange_volatile_" << type.first << sz << "_" << s.first << "(__ptr, __old, __old_tmp, __tmp); break;\n";
out << " case __ATOMIC_RELAXED: __cuda_compare_exchange_volatile_" << type.first << sz << "_" << s.first << "(__ptr, __old, __old_tmp, __tmp); break;\n";
Expand Down Expand Up @@ -307,8 +307,8 @@ int main() {
out << " NV_DISPATCH_TARGET(\n";
out << " NV_PROVIDES_SM_70, (\n";
out << " switch (__memorder) {\n";
out << " case __ATOMIC_SEQ_CST: " << fencename("sc"s, s.first) << "();\n";
out << " case __ATOMIC_CONSUME:\n";
out << " case __ATOMIC_SEQ_CST: " << fencename("sc"s, s.first) << "(); _LIBCUDACXX_FALLTHROUGH();\n";
out << " case __ATOMIC_CONSUME: _LIBCUDACXX_FALLTHROUGH();\n";
out << " case __ATOMIC_ACQUIRE: __cuda_" << rmw.first << "_acquire_" << type.first << sz << "_" << s.first << "(__ptr, __tmp, __tmp); break;\n";
out << " case __ATOMIC_ACQ_REL: __cuda_" << rmw.first << "_acq_rel_" << type.first << sz << "_" << s.first << "(__ptr, __tmp, __tmp); break;\n";
out << " case __ATOMIC_RELEASE: __cuda_" << rmw.first << "_release_" << type.first << sz << "_" << s.first << "(__ptr, __tmp, __tmp); break;\n";
Expand All @@ -318,9 +318,9 @@ int main() {
out << " ),\n";
out << " NV_IS_DEVICE, (\n";
out << " switch (__memorder) {\n";
out << " case __ATOMIC_SEQ_CST:\n";
out << " case __ATOMIC_ACQ_REL: __cuda_membar_" << s.first << "();\n";
out << " case __ATOMIC_CONSUME:\n";
out << " case __ATOMIC_SEQ_CST: _LIBCUDACXX_FALLTHROUGH();\n";
out << " case __ATOMIC_ACQ_REL: __cuda_membar_" << s.first << "(); _LIBCUDACXX_FALLTHROUGH();\n";
out << " case __ATOMIC_CONSUME: _LIBCUDACXX_FALLTHROUGH();\n";
out << " case __ATOMIC_ACQUIRE: __cuda_" << rmw.first << "_volatile_" << type.first << sz << "_" << s.first << "(__ptr, __tmp, __tmp); __cuda_membar_" << s.first << "(); break;\n";
out << " case __ATOMIC_RELEASE: __cuda_membar_" << s.first << "(); __cuda_" << rmw.first << "_volatile_" << type.first << sz << "_" << s.first << "(__ptr, __tmp, __tmp); break;\n";
out << " case __ATOMIC_RELAXED: __cuda_" << rmw.first << "_volatile_" << type.first << sz << "_" << s.first << "(__ptr, __tmp, __tmp); break;\n";
Expand Down Expand Up @@ -355,8 +355,8 @@ int main() {
out << " NV_DISPATCH_TARGET(\n";
out << " NV_PROVIDES_SM_70, (\n";
out << " switch (__memorder) {\n";
out << " case __ATOMIC_SEQ_CST: " << fencename("sc"s, s.first) << "();\n";
out << " case __ATOMIC_CONSUME:\n";
out << " case __ATOMIC_SEQ_CST: " << fencename("sc"s, s.first) << "(); _LIBCUDACXX_FALLTHROUGH();\n";
out << " case __ATOMIC_CONSUME: _LIBCUDACXX_FALLTHROUGH();\n";
out << " case __ATOMIC_ACQUIRE: __cuda_fetch_add_acquire_u64_" << s.first << "(__ptr, __tmp, __tmp); break;\n";
out << " case __ATOMIC_ACQ_REL: __cuda_fetch_add_acq_rel_u64_" << s.first << "(__ptr, __tmp, __tmp); break;\n";
out << " case __ATOMIC_RELEASE: __cuda_fetch_add_release_u64_" << s.first << "(__ptr, __tmp, __tmp); break;\n";
Expand All @@ -365,9 +365,9 @@ int main() {
out << " ),\n";
out << " NV_IS_DEVICE, (\n";
out << " switch (__memorder) {\n";
out << " case __ATOMIC_SEQ_CST:\n";
out << " case __ATOMIC_ACQ_REL: __cuda_membar_" << s.first << "();\n";
out << " case __ATOMIC_CONSUME:\n";
out << " case __ATOMIC_SEQ_CST: _LIBCUDACXX_FALLTHROUGH();\n";
out << " case __ATOMIC_ACQ_REL: __cuda_membar_" << s.first << "(); _LIBCUDACXX_FALLTHROUGH();\n";
out << " case __ATOMIC_CONSUME: _LIBCUDACXX_FALLTHROUGH();\n";
out << " case __ATOMIC_ACQUIRE: __cuda_fetch_add_volatile_u64_" << s.first << "(__ptr, __tmp, __tmp); __cuda_membar_" << s.first << "(); break;\n";
out << " case __ATOMIC_RELEASE: __cuda_membar_" << s.first << "(); __cuda_fetch_add_volatile_u64_" << s.first << "(__ptr, __tmp, __tmp); break;\n";
out << " case __ATOMIC_RELAXED: __cuda_fetch_add_volatile_u64_" << s.first << "(__ptr, __tmp, __tmp); break;\n";
Expand Down
30 changes: 2 additions & 28 deletions libcudacxx/include/cuda/std/detail/libcxx/include/__cuda/atomic.h
Original file line number Diff line number Diff line change
Expand Up @@ -149,21 +149,18 @@ struct atomic_ref
_LIBCUDACXX_HOST_DEVICE
constexpr atomic_ref(_Tp& __d) noexcept : __base(__d) {}

_LIBCUDACXX_HOST_DEVICE
_Tp operator=(_Tp __d) const volatile noexcept
{__base::store(__d); return __d;}
_LIBCUDACXX_HOST_DEVICE
_Tp operator=(_Tp __d) const noexcept
{__base::store(__d); return __d;}

_LIBCUDACXX_HOST_DEVICE
_Tp fetch_max(const _Tp & __op, memory_order __m = memory_order_seq_cst) const volatile noexcept
_Tp fetch_max(const _Tp & __op, memory_order __m = memory_order_seq_cst) const noexcept
{
return std::__detail::__cxx_atomic_fetch_max(&this->__a_, __op, __m);
}

_LIBCUDACXX_HOST_DEVICE
_Tp fetch_min(const _Tp & __op, memory_order __m = memory_order_seq_cst) const volatile noexcept
_Tp fetch_min(const _Tp & __op, memory_order __m = memory_order_seq_cst) const noexcept
{
return std::__detail::__cxx_atomic_fetch_min(&this->__a_, __op, __m);
}
Expand All @@ -180,53 +177,30 @@ struct atomic_ref<_Tp*, _Sco>
_LIBCUDACXX_HOST_DEVICE
constexpr atomic_ref(_Tp*& __d) noexcept : __base(__d) {}

_LIBCUDACXX_HOST_DEVICE
_Tp* operator=(_Tp* __d) const volatile noexcept
{__base::store(__d); return __d;}
_LIBCUDACXX_HOST_DEVICE
_Tp* operator=(_Tp* __d) const noexcept
{__base::store(__d); return __d;}

_LIBCUDACXX_HOST_DEVICE
_Tp* fetch_add(ptrdiff_t __op,
memory_order __m = memory_order_seq_cst) const volatile noexcept
{return __cxx_atomic_fetch_add(&this->__a_, __op, __m);}
_LIBCUDACXX_HOST_DEVICE
_Tp* fetch_add(ptrdiff_t __op,
memory_order __m = memory_order_seq_cst) const noexcept
{return __cxx_atomic_fetch_add(&this->__a_, __op, __m);}
_LIBCUDACXX_HOST_DEVICE
_Tp* fetch_sub(ptrdiff_t __op,
memory_order __m = memory_order_seq_cst) const volatile noexcept
{return __cxx_atomic_fetch_sub(&this->__a_, __op, __m);}
_LIBCUDACXX_HOST_DEVICE
_Tp* fetch_sub(ptrdiff_t __op,
memory_order __m = memory_order_seq_cst) const noexcept
{return __cxx_atomic_fetch_sub(&this->__a_, __op, __m);}

_LIBCUDACXX_HOST_DEVICE
_Tp* operator++(int) const volatile noexcept {return fetch_add(1);}
_LIBCUDACXX_HOST_DEVICE
_Tp* operator++(int) const noexcept {return fetch_add(1);}
_LIBCUDACXX_HOST_DEVICE
_Tp* operator--(int) const volatile noexcept {return fetch_sub(1);}
_LIBCUDACXX_HOST_DEVICE
_Tp* operator--(int) const noexcept {return fetch_sub(1);}
_LIBCUDACXX_HOST_DEVICE
_Tp* operator++() const volatile noexcept {return fetch_add(1) + 1;}
_LIBCUDACXX_HOST_DEVICE
_Tp* operator++() const noexcept {return fetch_add(1) + 1;}
_LIBCUDACXX_HOST_DEVICE
_Tp* operator--() const volatile noexcept {return fetch_sub(1) - 1;}
_LIBCUDACXX_HOST_DEVICE
_Tp* operator--() const noexcept {return fetch_sub(1) - 1;}
_LIBCUDACXX_HOST_DEVICE
_Tp* operator+=(ptrdiff_t __op) const volatile noexcept {return fetch_add(__op) + __op;}
_LIBCUDACXX_HOST_DEVICE
_Tp* operator+=(ptrdiff_t __op) const noexcept {return fetch_add(__op) + __op;}
_LIBCUDACXX_HOST_DEVICE
_Tp* operator-=(ptrdiff_t __op) const volatile noexcept {return fetch_sub(__op) - __op;}
_LIBCUDACXX_HOST_DEVICE
_Tp* operator-=(ptrdiff_t __op) const noexcept {return fetch_sub(__op) - __op;}
};

Expand Down
24 changes: 0 additions & 24 deletions libcudacxx/include/cuda/std/detail/libcxx/include/atomic
Original file line number Diff line number Diff line change
Expand Up @@ -1883,8 +1883,6 @@ template <class _Tp>
_LIBCUDACXX_INLINE_VISIBILITY
explicit atomic_ref(_Tp& __ref) : __base(__ref) {}

_LIBCUDACXX_INLINE_VISIBILITY
_Tp operator=(_Tp __v) const noexcept {__base::store(__v); return __v;}
_LIBCUDACXX_INLINE_VISIBILITY
_Tp operator=(_Tp __v) const volatile noexcept {__base::store(__v); return __v;}
};
Expand All @@ -1907,49 +1905,27 @@ template <class _Tp>

_LIBCUDACXX_INLINE_VISIBILITY
_Tp* operator=(_Tp* __v) const noexcept {__base::store(__v); return __v;}
_LIBCUDACXX_INLINE_VISIBILITY
_Tp* operator=(_Tp* __v) const volatile noexcept {__base::store(__v); return __v;}

_LIBCUDACXX_INLINE_VISIBILITY
_Tp* fetch_add(ptrdiff_t __op, memory_order __m = memory_order_seq_cst)
const volatile noexcept
{return __cxx_atomic_fetch_add(&this->__a_, __op, __m);}
_LIBCUDACXX_INLINE_VISIBILITY
_Tp* fetch_add(ptrdiff_t __op, memory_order __m = memory_order_seq_cst)
const noexcept
{return __cxx_atomic_fetch_add(&this->__a_, __op, __m);}
_LIBCUDACXX_INLINE_VISIBILITY
_Tp* fetch_sub(ptrdiff_t __op, memory_order __m = memory_order_seq_cst)
const volatile noexcept
{return __cxx_atomic_fetch_sub(&this->__a_, __op, __m);}
_LIBCUDACXX_INLINE_VISIBILITY
_Tp* fetch_sub(ptrdiff_t __op, memory_order __m = memory_order_seq_cst)
const noexcept
{return __cxx_atomic_fetch_sub(&this->__a_, __op, __m);}

_LIBCUDACXX_INLINE_VISIBILITY
_Tp* operator++(int) const volatile noexcept {return fetch_add(1);}
_LIBCUDACXX_INLINE_VISIBILITY
_Tp* operator++(int) const noexcept {return fetch_add(1);}
_LIBCUDACXX_INLINE_VISIBILITY
_Tp* operator--(int) const volatile noexcept {return fetch_sub(1);}
_LIBCUDACXX_INLINE_VISIBILITY
_Tp* operator--(int) const noexcept {return fetch_sub(1);}
_LIBCUDACXX_INLINE_VISIBILITY
_Tp* operator++() const volatile noexcept {return fetch_add(1) + 1;}
_LIBCUDACXX_INLINE_VISIBILITY
_Tp* operator++() const noexcept {return fetch_add(1) + 1;}
_LIBCUDACXX_INLINE_VISIBILITY
_Tp* operator--() const volatile noexcept {return fetch_sub(1) - 1;}
_LIBCUDACXX_INLINE_VISIBILITY
_Tp* operator--() const noexcept {return fetch_sub(1) - 1;}
_LIBCUDACXX_INLINE_VISIBILITY
_Tp* operator+=(ptrdiff_t __op) const volatile noexcept {return fetch_add(__op) + __op;}
_LIBCUDACXX_INLINE_VISIBILITY
_Tp* operator+=(ptrdiff_t __op) const noexcept {return fetch_add(__op) + __op;}
_LIBCUDACXX_INLINE_VISIBILITY
_Tp* operator-=(ptrdiff_t __op) const volatile noexcept {return fetch_sub(__op) - __op;}
_LIBCUDACXX_INLINE_VISIBILITY
_Tp* operator-=(ptrdiff_t __op) const noexcept {return fetch_sub(__op) - __op;}
};

Expand Down
Loading

0 comments on commit bff5288

Please sign in to comment.