From 4a0addc88c74475d71d2dee198b972fb37f23052 Mon Sep 17 00:00:00 2001 From: Michael Schellenberger Costa Date: Sat, 19 Oct 2024 17:39:30 +0200 Subject: [PATCH] Cleanup threading support (#2507) * Drop `_LIBCUDACXX_THREAD_ABI_VISIBILITY` its always defined as `_LIBCUDACXX_HIDE_FROM_ABI` * Drop `_LIBCUDACXX_NO_THREAD_SAFETY_ANALYSIS` Its never defined outside of `__FreeBSD__` * Drop `thread_if` * Drop `__libcpp_thread_favorite_barrier_index` * Drop `_LIBCUDACXX_HAS_NO_THREAD_CONTENTION_TABLE` It is always defined * Drop `_LIBCUDACXX_HAS_NO_PLATFORM_WAIT` It is always defined and only used once * Drop `_LIBCUDACXX_BUILDING_THREAD_LIBRARY_EXTERNAL` * Move macro definition out of function declaration * Move threading_support * Split into the different threading mechanisms * Disentangle `_LIBCUDACXX_HAS_THREAD_API_EXTERNAL` with other backends * Fix missing qualifiers and attributes * Silence a ICC warning about `__libcpp_thread_id_equal` * Drop more unused funtions from pthread * Move to `__thread` subfolder --- .../cuda/experimental/__async/stop_token.cuh | 2 +- .../cuda/std/__atomic/wait/notify_wait.h | 4 +- .../include/cuda/std/__atomic/wait/polling.h | 4 +- .../include/cuda/std/__barrier/barrier.h | 5 +- .../cuda/std/__semaphore/atomic_semaphore.h | 4 +- .../cuda/std/__thread/threading_support.h | 109 +++ .../std/__thread/threading_support_cuda.h | 48 ++ .../std/__thread/threading_support_external.h | 41 + .../std/__thread/threading_support_pthread.h | 184 ++++ .../std/__thread/threading_support_win32.h | 89 ++ .../cuda/std/detail/libcxx/include/__config | 8 - .../detail/libcxx/include/__threading_support | 795 ------------------ 12 files changed, 481 insertions(+), 812 deletions(-) create mode 100644 libcudacxx/include/cuda/std/__thread/threading_support.h create mode 100644 libcudacxx/include/cuda/std/__thread/threading_support_cuda.h create mode 100644 libcudacxx/include/cuda/std/__thread/threading_support_external.h create mode 100644 libcudacxx/include/cuda/std/__thread/threading_support_pthread.h create mode 100644 libcudacxx/include/cuda/std/__thread/threading_support_win32.h delete mode 100644 libcudacxx/include/cuda/std/detail/libcxx/include/__threading_support diff --git a/cudax/include/cuda/experimental/__async/stop_token.cuh b/cudax/include/cuda/experimental/__async/stop_token.cuh index 237585af218..e4a4574c32c 100644 --- a/cudax/include/cuda/experimental/__async/stop_token.cuh +++ b/cudax/include/cuda/experimental/__async/stop_token.cuh @@ -21,9 +21,9 @@ # pragma system_header #endif // no system header +#include #include #include -#include #include #include diff --git a/libcudacxx/include/cuda/std/__atomic/wait/notify_wait.h b/libcudacxx/include/cuda/std/__atomic/wait/notify_wait.h index f7dc51987ab..b79b22adad6 100644 --- a/libcudacxx/include/cuda/std/__atomic/wait/notify_wait.h +++ b/libcudacxx/include/cuda/std/__atomic/wait/notify_wait.h @@ -72,11 +72,11 @@ _LIBCUDACXX_HIDE_FROM_ABI void __atomic_wait( } if (__i < 12) { - __libcpp_thread_yield_processor(); + _CUDA_VSTD::__libcpp_thread_yield_processor(); } else { - __libcpp_thread_yield(); + _CUDA_VSTD::__libcpp_thread_yield(); } } while (__nonatomic_compare_equal(__atomic_load_dispatch(__a, __order, _Sco{}), __val)) diff --git a/libcudacxx/include/cuda/std/__atomic/wait/polling.h b/libcudacxx/include/cuda/std/__atomic/wait/polling.h index 8fe5f24b6db..cbb1a73a4b8 100644 --- a/libcudacxx/include/cuda/std/__atomic/wait/polling.h +++ b/libcudacxx/include/cuda/std/__atomic/wait/polling.h @@ -24,7 +24,7 @@ #include #include #include -#include +#include _LIBCUDACXX_BEGIN_NAMESPACE_STD @@ -53,7 +53,7 @@ template _CCCL_HOST_DEVICE void __atomic_try_wait_slow_fallback( _Tp const volatile* __a, __atomic_underlying_remove_cv_t<_Tp> __val, memory_order __order, _Sco) { - __libcpp_thread_poll_with_backoff(__atomic_poll_tester<_Tp, _Sco>(__a, __val, __order)); + _CUDA_VSTD::__libcpp_thread_poll_with_backoff(__atomic_poll_tester<_Tp, _Sco>(__a, __val, __order)); } _LIBCUDACXX_END_NAMESPACE_STD diff --git a/libcudacxx/include/cuda/std/__barrier/barrier.h b/libcudacxx/include/cuda/std/__barrier/barrier.h index 5956a49d24e..491998132a8 100644 --- a/libcudacxx/include/cuda/std/__barrier/barrier.h +++ b/libcudacxx/include/cuda/std/__barrier/barrier.h @@ -192,11 +192,12 @@ class __barrier_base<__empty_completion, _Sco> } _LIBCUDACXX_HIDE_FROM_ABI void wait(arrival_token&& __phase) const { - __libcpp_thread_poll_with_backoff(__barrier_poll_tester_phase<__barrier_base>(this, _CUDA_VSTD::move(__phase))); + _CUDA_VSTD::__libcpp_thread_poll_with_backoff( + __barrier_poll_tester_phase<__barrier_base>(this, _CUDA_VSTD::move(__phase))); } _LIBCUDACXX_HIDE_FROM_ABI void wait_parity(bool __parity) const { - __libcpp_thread_poll_with_backoff(__barrier_poll_tester_parity<__barrier_base>(this, __parity)); + _CUDA_VSTD::__libcpp_thread_poll_with_backoff(__barrier_poll_tester_parity<__barrier_base>(this, __parity)); } _LIBCUDACXX_HIDE_FROM_ABI void arrive_and_wait() { diff --git a/libcudacxx/include/cuda/std/__semaphore/atomic_semaphore.h b/libcudacxx/include/cuda/std/__semaphore/atomic_semaphore.h index c34745e8b8a..78013392630 100644 --- a/libcudacxx/include/cuda/std/__semaphore/atomic_semaphore.h +++ b/libcudacxx/include/cuda/std/__semaphore/atomic_semaphore.h @@ -74,7 +74,7 @@ class __atomic_semaphore _CCCL_NODISCARD _LIBCUDACXX_HIDE_FROM_ABI bool __acquire_slow_timed(chrono::nanoseconds const& __rel_time) { - return __libcpp_thread_poll_with_backoff( + return _CUDA_VSTD::__libcpp_thread_poll_with_backoff( [this]() { ptrdiff_t const __old = __count.load(memory_order_acquire); return __old != 0 && __fetch_sub_if_slow(__old); @@ -157,7 +157,7 @@ class __atomic_semaphore<_Sco, 1> _CCCL_NODISCARD _LIBCUDACXX_HIDE_FROM_ABI bool __acquire_slow_timed(chrono::nanoseconds const& __rel_time) { - return __libcpp_thread_poll_with_backoff( + return _CUDA_VSTD::__libcpp_thread_poll_with_backoff( [this]() { return try_acquire(); }, diff --git a/libcudacxx/include/cuda/std/__thread/threading_support.h b/libcudacxx/include/cuda/std/__thread/threading_support.h new file mode 100644 index 00000000000..b131dbf0f94 --- /dev/null +++ b/libcudacxx/include/cuda/std/__thread/threading_support.h @@ -0,0 +1,109 @@ +// -*- C++ -*- +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef _LIBCUDACXX___THREAD_THREADING_SUPPORT_H +#define _LIBCUDACXX___THREAD_THREADING_SUPPORT_H + +#include + +#if defined(_CCCL_IMPLICIT_SYSTEM_HEADER_GCC) +# pragma GCC system_header +#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_CLANG) +# pragma clang system_header +#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_MSVC) +# pragma system_header +#endif // no system header + +#if !defined(_LIBCUDACXX_HAS_NO_THREADS) + +# include + +# if defined(_LIBCUDACXX_HAS_THREAD_API_EXTERNAL) +# include +# endif // _LIBCUDACXX_HAS_THREAD_API_EXTERNAL + +# if defined(_LIBCUDACXX_HAS_THREAD_API_CUDA) +# include +# elif defined(_LIBCUDACXX_HAS_THREAD_API_PTHREAD) +# include +# elif defined(_LIBCUDACXX_HAS_THREAD_API_WIN32) +# include +# else // ^^^ _LIBCUDACXX_HAS_THREAD_API_WIN32 ^^^ / vvv Unknown Thread API vvv +# error "Unknown Thread API" +# endif // Unknown Thread API + +_CCCL_PUSH_MACROS + +_LIBCUDACXX_BEGIN_NAMESPACE_STD + +# define _LIBCUDACXX_POLLING_COUNT 16 + +# if defined(__aarch64__) +# define __LIBCUDACXX_ASM_THREAD_YIELD (asm volatile("yield" :::);) +# elif defined(__x86_64__) +# define __LIBCUDACXX_ASM_THREAD_YIELD (asm volatile("pause" :::);) +# else // ^^^ __x86_64__ ^^^ / vvv !__x86_64__ vvv +# define __LIBCUDACXX_ASM_THREAD_YIELD (;) +# endif // !__x86_64__ + +_LIBCUDACXX_HIDE_FROM_ABI void __libcpp_thread_yield_processor() +{ + NV_IF_TARGET(NV_IS_HOST, __LIBCUDACXX_ASM_THREAD_YIELD) +} + +template +_LIBCUDACXX_HIDE_FROM_ABI bool __libcpp_thread_poll_with_backoff( + _Fn&& __f, _CUDA_VSTD::chrono::nanoseconds __max = _CUDA_VSTD::chrono::nanoseconds::zero()) +{ + _CUDA_VSTD::chrono::high_resolution_clock::time_point const __start = + _CUDA_VSTD::chrono::high_resolution_clock::now(); + for (int __count = 0;;) + { + if (__f()) + { + return true; + } + if (__count < _LIBCUDACXX_POLLING_COUNT) + { + if (__count > (_LIBCUDACXX_POLLING_COUNT >> 1)) + { + _CUDA_VSTD::__libcpp_thread_yield_processor(); + } + __count += 1; + continue; + } + _CUDA_VSTD::chrono::high_resolution_clock::duration const __elapsed = + _CUDA_VSTD::chrono::high_resolution_clock::now() - __start; + if (__max != _CUDA_VSTD::chrono::nanoseconds::zero() && __max < __elapsed) + { + return false; + } + _CUDA_VSTD::chrono::nanoseconds const __step = __elapsed / 4; + if (__step >= _CUDA_VSTD::chrono::milliseconds(1)) + { + _CUDA_VSTD::__libcpp_thread_sleep_for(_CUDA_VSTD::chrono::milliseconds(1)); + } + else if (__step >= _CUDA_VSTD::chrono::microseconds(10)) + { + _CUDA_VSTD::__libcpp_thread_sleep_for(__step); + } + else + { + _CUDA_VSTD::__libcpp_thread_yield(); + } + } +} + +_LIBCUDACXX_END_NAMESPACE_STD + +_CCCL_POP_MACROS + +#endif // !_LIBCUDACXX_HAS_NO_THREADS + +#endif // _LIBCUDACXX___THREAD_THREADING_SUPPORT_H diff --git a/libcudacxx/include/cuda/std/__thread/threading_support_cuda.h b/libcudacxx/include/cuda/std/__thread/threading_support_cuda.h new file mode 100644 index 00000000000..c361b0f7e06 --- /dev/null +++ b/libcudacxx/include/cuda/std/__thread/threading_support_cuda.h @@ -0,0 +1,48 @@ +// -*- C++ -*- +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef _LIBCUDACXX___THREAD_THREADING_SUPPORT_CUDA_H +#define _LIBCUDACXX___THREAD_THREADING_SUPPORT_CUDA_H + +#include + +#if defined(_CCCL_IMPLICIT_SYSTEM_HEADER_GCC) +# pragma GCC system_header +#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_CLANG) +# pragma clang system_header +#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_MSVC) +# pragma system_header +#endif // no system header + +#if !defined(_LIBCUDACXX_HAS_NO_THREADS) && defined(_LIBCUDACXX_HAS_THREAD_API_CUDA) + +# include +# include + +_CCCL_PUSH_MACROS + +_LIBCUDACXX_BEGIN_NAMESPACE_STD + +_LIBCUDACXX_HIDE_FROM_ABI void __libcpp_thread_yield() {} + +_LIBCUDACXX_HIDE_FROM_ABI void __libcpp_thread_sleep_for(_CUDA_VSTD::chrono::nanoseconds __ns) +{ + NV_IF_TARGET(NV_IS_DEVICE, + (auto const __step = __ns.count(); assert(__step < numeric_limits::max()); + asm volatile("nanosleep.u32 %0;" ::"r"((unsigned) __step) + :);)) +} + +_LIBCUDACXX_END_NAMESPACE_STD + +_CCCL_POP_MACROS + +#endif // !_LIBCUDACXX_HAS_NO_THREADS && _LIBCUDACXX_HAS_THREAD_API_CUDA + +#endif // _LIBCUDACXX___THREAD_THREADING_SUPPORT_CUDA_H diff --git a/libcudacxx/include/cuda/std/__thread/threading_support_external.h b/libcudacxx/include/cuda/std/__thread/threading_support_external.h new file mode 100644 index 00000000000..639e117355c --- /dev/null +++ b/libcudacxx/include/cuda/std/__thread/threading_support_external.h @@ -0,0 +1,41 @@ +// -*- C++ -*- +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef _LIBCUDACXX___THREAD_THREADING_SUPPORT_EXTERNAL_H +#define _LIBCUDACXX___THREAD_THREADING_SUPPORT_EXTERNAL_H + +#include + +#if defined(_CCCL_IMPLICIT_SYSTEM_HEADER_GCC) +# pragma GCC system_header +#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_CLANG) +# pragma clang system_header +#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_MSVC) +# pragma system_header +#endif // no system header + +#if !defined(_LIBCUDACXX_HAS_NO_THREADS) && defined(_LIBCUDACXX_HAS_THREAD_API_EXTERNAL) + +# include + +_CCCL_PUSH_MACROS + +_LIBCUDACXX_BEGIN_NAMESPACE_STD + +_LIBCUDACXX_HIDE_FROM_ABI void __libcpp_thread_yield(); + +_LIBCUDACXX_HIDE_FROM_ABI void __libcpp_thread_sleep_for(_CUDA_VSTD::chrono::nanoseconds __ns); + +_LIBCUDACXX_END_NAMESPACE_STD + +_CCCL_POP_MACROS + +#endif // !_LIBCUDACXX_HAS_NO_THREADS && _LIBCUDACXX_HAS_THREAD_API_EXTERNAL + +#endif // _LIBCUDACXX___THREAD_THREADING_SUPPORT_EXTERNAL_H diff --git a/libcudacxx/include/cuda/std/__thread/threading_support_pthread.h b/libcudacxx/include/cuda/std/__thread/threading_support_pthread.h new file mode 100644 index 00000000000..4b1af8c7bc2 --- /dev/null +++ b/libcudacxx/include/cuda/std/__thread/threading_support_pthread.h @@ -0,0 +1,184 @@ +// -*- C++ -*- +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef _LIBCUDACXX___THREAD_THREADING_SUPPORT_PTHREAD_H +#define _LIBCUDACXX___THREAD_THREADING_SUPPORT_PTHREAD_H + +#include + +#if defined(_CCCL_IMPLICIT_SYSTEM_HEADER_GCC) +# pragma GCC system_header +#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_CLANG) +# pragma clang system_header +#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_MSVC) +# pragma system_header +#endif // no system header + +#if !defined(_LIBCUDACXX_HAS_NO_THREADS) && defined(_LIBCUDACXX_HAS_THREAD_API_PTHREAD) + +# include +# include + +# include +# include +# include +# include +# if defined(__APPLE__) +# include +# endif // __APPLE__ +# if defined(__linux__) +# include +# include +# include +# endif // __linux__ + +_CCCL_PUSH_MACROS + +typedef ::timespec __libcpp_timespec_t; + +_LIBCUDACXX_BEGIN_NAMESPACE_STD + +// Mutex +typedef pthread_mutex_t __libcpp_mutex_t; +# define _LIBCUDACXX_MUTEX_INITIALIZER PTHREAD_MUTEX_INITIALIZER + +typedef pthread_mutex_t __libcpp_recursive_mutex_t; + +// Condition Variable +typedef pthread_cond_t __libcpp_condvar_t; +# define _LIBCUDACXX_CONDVAR_INITIALIZER PTHREAD_COND_INITIALIZER + +// Semaphore +# if defined(__APPLE__) +typedef dispatch_semaphore_t __libcpp_semaphore_t; +# define _LIBCUDACXX_SEMAPHORE_MAX numeric_limits::max() +# else // ^^^ __APPLE__ ^^^ / vvv !__APPLE__ vvv +typedef sem_t __libcpp_semaphore_t; +# define _LIBCUDACXX_SEMAPHORE_MAX SEM_VALUE_MAX +# endif // !__APPLE__ + +// Execute once +typedef pthread_once_t __libcpp_exec_once_flag; +# define _LIBCUDACXX_EXEC_ONCE_INITIALIZER PTHREAD_ONCE_INIT + +// Thread id +typedef pthread_t __libcpp_thread_id; + +// Thread +# define _LIBCUDACXX_NULL_THREAD 0U + +typedef pthread_t __libcpp_thread_t; + +// Thread Local Storage +typedef pthread_key_t __libcpp_tls_key; + +# define _LIBCUDACXX_TLS_DESTRUCTOR_CC + +_LIBCUDACXX_HIDE_FROM_ABI __libcpp_timespec_t __libcpp_to_timespec(const _CUDA_VSTD::chrono::nanoseconds& __ns) +{ + using namespace chrono; + seconds __s = duration_cast(__ns); + __libcpp_timespec_t __ts; + typedef decltype(__ts.tv_sec) ts_sec; + constexpr ts_sec __ts_sec_max = numeric_limits::max(); + + if (__s.count() < __ts_sec_max) + { + __ts.tv_sec = static_cast(__s.count()); + __ts.tv_nsec = static_cast((__ns - __s).count()); + } + else + { + __ts.tv_sec = __ts_sec_max; + __ts.tv_nsec = 999999999; // (10^9 - 1) + } + return __ts; +} + +// Semaphore +# if defined(__APPLE__) + +_LIBCUDACXX_HIDE_FROM_ABI bool __libcpp_semaphore_init(__libcpp_semaphore_t* __sem, int __init) +{ + return (*__sem = dispatch_semaphore_create(__init)) != nullptr; +} + +_LIBCUDACXX_HIDE_FROM_ABI bool __libcpp_semaphore_destroy(__libcpp_semaphore_t* __sem) +{ + dispatch_release(*__sem); + return true; +} + +_LIBCUDACXX_HIDE_FROM_ABI bool __libcpp_semaphore_post(__libcpp_semaphore_t* __sem) +{ + dispatch_semaphore_signal(*__sem); + return true; +} + +_LIBCUDACXX_HIDE_FROM_ABI bool __libcpp_semaphore_wait(__libcpp_semaphore_t* __sem) +{ + return dispatch_semaphore_wait(*__sem, DISPATCH_TIME_FOREVER) == 0; +} + +_LIBCUDACXX_HIDE_FROM_ABI bool +__libcpp_semaphore_wait_timed(__libcpp_semaphore_t* __sem, _CUDA_VSTD::chrono::nanoseconds const& __ns) +{ + return dispatch_semaphore_wait(*__sem, dispatch_time(DISPATCH_TIME_NOW, __ns.count())) == 0; +} + +# else // ^^^ __APPLE__ ^^^ / vvv !__APPLE__ vvv + +_LIBCUDACXX_HIDE_FROM_ABI bool __libcpp_semaphore_init(__libcpp_semaphore_t* __sem, int __init) +{ + return sem_init(__sem, 0, __init) == 0; +} + +_LIBCUDACXX_HIDE_FROM_ABI bool __libcpp_semaphore_destroy(__libcpp_semaphore_t* __sem) +{ + return sem_destroy(__sem) == 0; +} + +_LIBCUDACXX_HIDE_FROM_ABI bool __libcpp_semaphore_post(__libcpp_semaphore_t* __sem) +{ + return sem_post(__sem) == 0; +} + +_LIBCUDACXX_HIDE_FROM_ABI bool __libcpp_semaphore_wait(__libcpp_semaphore_t* __sem) +{ + return sem_wait(__sem) == 0; +} + +_LIBCUDACXX_HIDE_FROM_ABI bool +__libcpp_semaphore_wait_timed(__libcpp_semaphore_t* __sem, _CUDA_VSTD::chrono::nanoseconds const& __ns) +{ + __libcpp_timespec_t __ts = __libcpp_to_timespec(__ns); + return sem_timedwait(__sem, &__ts) == 0; +} + +# endif // !__APPLE__ + +_LIBCUDACXX_HIDE_FROM_ABI void __libcpp_thread_yield() +{ + sched_yield(); +} + +_LIBCUDACXX_HIDE_FROM_ABI void __libcpp_thread_sleep_for(_CUDA_VSTD::chrono::nanoseconds __ns) +{ + __libcpp_timespec_t __ts = __libcpp_to_timespec(__ns); + while (nanosleep(&__ts, &__ts) == -1 && errno == EINTR) + ; +} + +_LIBCUDACXX_END_NAMESPACE_STD + +_CCCL_POP_MACROS + +#endif // !_LIBCUDACXX_HAS_NO_THREADS + +#endif // _LIBCUDACXX___THREAD_THREADING_SUPPORT_PTHREAD_H diff --git a/libcudacxx/include/cuda/std/__thread/threading_support_win32.h b/libcudacxx/include/cuda/std/__thread/threading_support_win32.h new file mode 100644 index 00000000000..ff8bd6a35fe --- /dev/null +++ b/libcudacxx/include/cuda/std/__thread/threading_support_win32.h @@ -0,0 +1,89 @@ +// -*- C++ -*- +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef _LIBCUDACXX___THREAD_THREADING_SUPPORT_WIN32_H +#define _LIBCUDACXX___THREAD_THREADING_SUPPORT_WIN32_H + +#include + +#if defined(_CCCL_IMPLICIT_SYSTEM_HEADER_GCC) +# pragma GCC system_header +#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_CLANG) +# pragma clang system_header +#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_MSVC) +# pragma system_header +#endif // no system header + +#if !defined(_LIBCUDACXX_HAS_NO_THREADS) && defined(_LIBCUDACXX_HAS_THREAD_API_WIN32) + +# include + +# include +# include + +_CCCL_PUSH_MACROS + +_LIBCUDACXX_BEGIN_NAMESPACE_STD + +// Mutex +typedef void* __libcpp_mutex_t; +# define _LIBCUDACXX_MUTEX_INITIALIZER 0 + +# if defined(_M_IX86) || defined(__i386__) || defined(_M_ARM) || defined(__arm__) +typedef void* __libcpp_recursive_mutex_t[6]; +# elif defined(_M_AMD64) || defined(__x86_64__) || defined(_M_ARM64) || defined(__aarch64__) +typedef void* __libcpp_recursive_mutex_t[5]; +# else +# error Unsupported architecture +# endif + +// Condition Variable +typedef void* __libcpp_condvar_t; +# define _LIBCUDACXX_CONDVAR_INITIALIZER 0 + +// Semaphore +typedef void* __libcpp_semaphore_t; + +// Execute Once +typedef void* __libcpp_exec_once_flag; +# define _LIBCUDACXX_EXEC_ONCE_INITIALIZER 0 + +// Thread ID +typedef long __libcpp_thread_id; + +// Thread +# define _LIBCUDACXX_NULL_THREAD 0U + +typedef void* __libcpp_thread_t; + +// Thread Local Storage +typedef long __libcpp_tls_key; + +# define _LIBCUDACXX_TLS_DESTRUCTOR_CC __stdcall + +_LIBCUDACXX_HIDE_FROM_ABI void __libcpp_thread_yield() +{ + SwitchToThread(); +} + +_LIBCUDACXX_HIDE_FROM_ABI void __libcpp_thread_sleep_for(chrono::nanoseconds __ns) +{ + using namespace chrono; + // round-up to the nearest milisecond + milliseconds __ms = duration_cast(__ns + chrono::nanoseconds(999999)); + Sleep(static_cast(__ms.count())); +} + +_LIBCUDACXX_END_NAMESPACE_STD + +_CCCL_POP_MACROS + +#endif // !_LIBCUDACXX_HAS_NO_THREADS && _LIBCUDACXX_HAS_THREAD_API_WIN32 + +#endif // _LIBCUDACXX___THREAD_THREADING_SUPPORT_H diff --git a/libcudacxx/include/cuda/std/detail/libcxx/include/__config b/libcudacxx/include/cuda/std/detail/libcxx/include/__config index b1afb184af2..d91df148713 100644 --- a/libcudacxx/include/cuda/std/detail/libcxx/include/__config +++ b/libcudacxx/include/cuda/std/detail/libcxx/include/__config @@ -513,14 +513,6 @@ typedef __char32_t char32_t; # define _LIBCUDACXX_HAS_NO_MONOTONIC_CLOCK # endif // _LIBCUDACXX_HAS_NO_MONOTONIC_CLOCK -# ifndef _LIBCUDACXX_HAS_NO_PLATFORM_WAIT -# define _LIBCUDACXX_HAS_NO_PLATFORM_WAIT -# endif // _LIBCUDACXX_HAS_NO_PLATFORM_WAIT - -# ifndef _LIBCUDACXX_HAS_NO_THREAD_CONTENTION_TABLE -# define _LIBCUDACXX_HAS_NO_THREAD_CONTENTION_TABLE -# endif // _LIBCUDACXX_HAS_NO_THREAD_CONTENTION_TABLE - # ifndef _LIBCUDACXX_HAS_NO_WCHAR_H # define _LIBCUDACXX_HAS_NO_WCHAR_H # endif // _LIBCUDACXX_HAS_NO_WCHAR_H diff --git a/libcudacxx/include/cuda/std/detail/libcxx/include/__threading_support b/libcudacxx/include/cuda/std/detail/libcxx/include/__threading_support deleted file mode 100644 index 5240ff7702e..00000000000 --- a/libcudacxx/include/cuda/std/detail/libcxx/include/__threading_support +++ /dev/null @@ -1,795 +0,0 @@ -// -*- C++ -*- -//===----------------------------------------------------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#ifndef _LIBCUDACXX_THREADING_SUPPORT -#define _LIBCUDACXX_THREADING_SUPPORT - -#include - -#if defined(_CCCL_IMPLICIT_SYSTEM_HEADER_GCC) -# pragma GCC system_header -#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_CLANG) -# pragma clang system_header -#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_MSVC) -# pragma system_header -#endif // no system header - -#include -#include -#include -#include - -_CCCL_PUSH_MACROS - -#if defined(_LIBCUDACXX_HAS_THREAD_API_EXTERNAL) -# ifndef __cuda_std__ -# include <__external_threading> -# else -# define _LIBCUDACXX_THREAD_ABI_VISIBILITY _LIBCUDACXX_HIDE_FROM_ABI -# endif -#elif !defined(_LIBCUDACXX_HAS_NO_THREADS) - -# if defined(_LIBCUDACXX_HAS_THREAD_API_PTHREAD) -# include -# include -# include -# if defined(__APPLE__) -# include -# endif -# if defined(__linux__) -# include -# include -# include -# endif -# endif - -# if defined(_LIBCUDACXX_HAS_THREAD_API_WIN32) -# include -# include -# endif - -# if defined(_LIBCUDACXX_HAS_THREAD_LIBRARY_EXTERNAL) || defined(_LIBCUDACXX_BUILDING_THREAD_LIBRARY_EXTERNAL) -# define _LIBCUDACXX_THREAD_ABI_VISIBILITY _LIBCUDACXX_HIDE_FROM_ABI -# else -# define _LIBCUDACXX_THREAD_ABI_VISIBILITY _LIBCUDACXX_HIDE_FROM_ABI -# endif - -# if defined(__FreeBSD__) && defined(_CCCL_COMPILER_CLANG) && _CCCL_HAS_ATTRIBUTE(no_thread_safety_analysis) -# define _LIBCUDACXX_NO_THREAD_SAFETY_ANALYSIS __attribute__((no_thread_safety_analysis)) -# else -# define _LIBCUDACXX_NO_THREAD_SAFETY_ANALYSIS -# endif - -typedef ::timespec __libcpp_timespec_t; -#endif // !defined(_LIBCUDACXX_HAS_NO_THREADS) - -_LIBCUDACXX_BEGIN_NAMESPACE_STD - -#if !defined(_LIBCUDACXX_HAS_NO_THREADS) - -# define _LIBCUDACXX_POLLING_COUNT 16 - -_LIBCUDACXX_HIDE_FROM_ABI void __libcpp_thread_yield_processor(){ -# if defined(__aarch64__) -# define __LIBCUDACXX_ASM_THREAD_YIELD (asm volatile("yield" :::);) -# elif defined(__x86_64__) -# define __LIBCUDACXX_ASM_THREAD_YIELD (asm volatile("pause" :::);) -# else -# define __LIBCUDACXX_ASM_THREAD_YIELD (;) -# endif - NV_IF_TARGET(NV_IS_HOST, __LIBCUDACXX_ASM_THREAD_YIELD)} - -_LIBCUDACXX_THREAD_ABI_VISIBILITY void __libcpp_thread_yield(); - -_LIBCUDACXX_THREAD_ABI_VISIBILITY -void __libcpp_thread_sleep_for(chrono::nanoseconds __ns); - -template -_LIBCUDACXX_THREAD_ABI_VISIBILITY bool -__libcpp_thread_poll_with_backoff(_Fn&& __f, chrono::nanoseconds __max = chrono::nanoseconds::zero()); - -# if defined(_LIBCUDACXX_HAS_THREAD_API_PTHREAD) -// Mutex -typedef pthread_mutex_t __libcpp_mutex_t; -# define _LIBCUDACXX_MUTEX_INITIALIZER PTHREAD_MUTEX_INITIALIZER - -typedef pthread_mutex_t __libcpp_recursive_mutex_t; - -// Condition Variable -typedef pthread_cond_t __libcpp_condvar_t; -# define _LIBCUDACXX_CONDVAR_INITIALIZER PTHREAD_COND_INITIALIZER - -// Semaphore -# if defined(__APPLE__) -typedef dispatch_semaphore_t __libcpp_semaphore_t; -# define _LIBCUDACXX_SEMAPHORE_MAX numeric_limits::max() -# else -typedef sem_t __libcpp_semaphore_t; -# define _LIBCUDACXX_SEMAPHORE_MAX SEM_VALUE_MAX -# endif - -// Execute once -typedef pthread_once_t __libcpp_exec_once_flag; -# define _LIBCUDACXX_EXEC_ONCE_INITIALIZER PTHREAD_ONCE_INIT - -// Thread id -typedef pthread_t __libcpp_thread_id; - -// Thread -# define _LIBCUDACXX_NULL_THREAD 0U - -typedef pthread_t __libcpp_thread_t; - -// Thread Local Storage -typedef pthread_key_t __libcpp_tls_key; - -# define _LIBCUDACXX_TLS_DESTRUCTOR_CC -# elif !defined(_LIBCUDACXX_HAS_THREAD_API_EXTERNAL) -// Mutex -typedef void* __libcpp_mutex_t; -# define _LIBCUDACXX_MUTEX_INITIALIZER 0 - -# if defined(_M_IX86) || defined(__i386__) || defined(_M_ARM) || defined(__arm__) -typedef void* __libcpp_recursive_mutex_t[6]; -# elif defined(_M_AMD64) || defined(__x86_64__) || defined(_M_ARM64) || defined(__aarch64__) -typedef void* __libcpp_recursive_mutex_t[5]; -# else -# error Unsupported architecture -# endif - -// Condition Variable -typedef void* __libcpp_condvar_t; -# define _LIBCUDACXX_CONDVAR_INITIALIZER 0 - -// Semaphore -typedef void* __libcpp_semaphore_t; - -// Execute Once -typedef void* __libcpp_exec_once_flag; -# define _LIBCUDACXX_EXEC_ONCE_INITIALIZER 0 - -// Thread ID -typedef long __libcpp_thread_id; - -// Thread -# define _LIBCUDACXX_NULL_THREAD 0U - -typedef void* __libcpp_thread_t; - -// Thread Local Storage -typedef long __libcpp_tls_key; - -# define _LIBCUDACXX_TLS_DESTRUCTOR_CC __stdcall -# endif // !defined(_LIBCUDACXX_HAS_THREAD_API_PTHREAD) && !defined(_LIBCUDACXX_HAS_THREAD_API_EXTERNAL) - -# if !defined(_LIBCUDACXX_HAS_THREAD_API_EXTERNAL) - -_LIBCUDACXX_THREAD_ABI_VISIBILITY -__libcpp_timespec_t __libcpp_to_timespec(const chrono::nanoseconds& __ns); - -// Mutex -_LIBCUDACXX_THREAD_ABI_VISIBILITY -int __libcpp_recursive_mutex_init(__libcpp_recursive_mutex_t* __m); - -_LIBCUDACXX_THREAD_ABI_VISIBILITY _LIBCUDACXX_NO_THREAD_SAFETY_ANALYSIS int -__libcpp_recursive_mutex_lock(__libcpp_recursive_mutex_t* __m); - -_LIBCUDACXX_THREAD_ABI_VISIBILITY _LIBCUDACXX_NO_THREAD_SAFETY_ANALYSIS bool -__libcpp_recursive_mutex_trylock(__libcpp_recursive_mutex_t* __m); - -_LIBCUDACXX_THREAD_ABI_VISIBILITY _LIBCUDACXX_NO_THREAD_SAFETY_ANALYSIS int -__libcpp_recursive_mutex_unlock(__libcpp_recursive_mutex_t* __m); - -_LIBCUDACXX_THREAD_ABI_VISIBILITY -int __libcpp_recursive_mutex_destroy(__libcpp_recursive_mutex_t* __m); - -_LIBCUDACXX_THREAD_ABI_VISIBILITY _LIBCUDACXX_NO_THREAD_SAFETY_ANALYSIS int __libcpp_mutex_lock(__libcpp_mutex_t* __m); - -_LIBCUDACXX_THREAD_ABI_VISIBILITY _LIBCUDACXX_NO_THREAD_SAFETY_ANALYSIS bool -__libcpp_mutex_trylock(__libcpp_mutex_t* __m); - -_LIBCUDACXX_THREAD_ABI_VISIBILITY _LIBCUDACXX_NO_THREAD_SAFETY_ANALYSIS int __libcpp_mutex_unlock(__libcpp_mutex_t* __m); - -_LIBCUDACXX_THREAD_ABI_VISIBILITY -int __libcpp_mutex_destroy(__libcpp_mutex_t* __m); - -// Condition variable -_LIBCUDACXX_THREAD_ABI_VISIBILITY -int __libcpp_condvar_signal(__libcpp_condvar_t* __cv); - -_LIBCUDACXX_THREAD_ABI_VISIBILITY -int __libcpp_condvar_broadcast(__libcpp_condvar_t* __cv); - -_LIBCUDACXX_THREAD_ABI_VISIBILITY _LIBCUDACXX_NO_THREAD_SAFETY_ANALYSIS int -__libcpp_condvar_wait(__libcpp_condvar_t* __cv, __libcpp_mutex_t* __m); - -_LIBCUDACXX_THREAD_ABI_VISIBILITY _LIBCUDACXX_NO_THREAD_SAFETY_ANALYSIS int -__libcpp_condvar_timedwait(__libcpp_condvar_t* __cv, __libcpp_mutex_t* __m, __libcpp_timespec_t* __ts); - -_LIBCUDACXX_THREAD_ABI_VISIBILITY -int __libcpp_condvar_destroy(__libcpp_condvar_t* __cv); - -// Semaphore -_LIBCUDACXX_THREAD_ABI_VISIBILITY -bool __libcpp_semaphore_init(__libcpp_semaphore_t* __sem, int __init); - -_LIBCUDACXX_THREAD_ABI_VISIBILITY -bool __libcpp_semaphore_destroy(__libcpp_semaphore_t* __sem); - -_LIBCUDACXX_THREAD_ABI_VISIBILITY -bool __libcpp_semaphore_post(__libcpp_semaphore_t* __sem); - -_LIBCUDACXX_THREAD_ABI_VISIBILITY -bool __libcpp_semaphore_wait(__libcpp_semaphore_t* __sem); - -_LIBCUDACXX_THREAD_ABI_VISIBILITY -bool __libcpp_semaphore_wait_timed(__libcpp_semaphore_t* __sem, chrono::nanoseconds const& __ns); - -// Execute once -_LIBCUDACXX_THREAD_ABI_VISIBILITY -int __libcpp_execute_once(__libcpp_exec_once_flag* flag, void (*init_routine)()); - -// Thread id -_LIBCUDACXX_THREAD_ABI_VISIBILITY -bool __libcpp_thread_id_equal(__libcpp_thread_id t1, __libcpp_thread_id t2); - -_LIBCUDACXX_THREAD_ABI_VISIBILITY -bool __libcpp_thread_id_less(__libcpp_thread_id t1, __libcpp_thread_id t2); - -// Thread -_LIBCUDACXX_THREAD_ABI_VISIBILITY -bool __libcpp_thread_isnull(const __libcpp_thread_t* __t); - -_LIBCUDACXX_THREAD_ABI_VISIBILITY -int __libcpp_thread_create(__libcpp_thread_t* __t, void* (*__func)(void*), void* __arg); - -_LIBCUDACXX_THREAD_ABI_VISIBILITY -__libcpp_thread_id __libcpp_thread_get_current_id(); - -_LIBCUDACXX_THREAD_ABI_VISIBILITY -__libcpp_thread_id __libcpp_thread_get_id(const __libcpp_thread_t* __t); - -_LIBCUDACXX_THREAD_ABI_VISIBILITY -int __libcpp_thread_join(__libcpp_thread_t* __t); - -_LIBCUDACXX_THREAD_ABI_VISIBILITY -int __libcpp_thread_detach(__libcpp_thread_t* __t); - -// Thread local storage -_LIBCUDACXX_THREAD_ABI_VISIBILITY -int __libcpp_tls_create(__libcpp_tls_key* __key, void(_LIBCUDACXX_TLS_DESTRUCTOR_CC* __at_exit)(void*)); - -_LIBCUDACXX_THREAD_ABI_VISIBILITY -void* __libcpp_tls_get(__libcpp_tls_key __key); - -_LIBCUDACXX_THREAD_ABI_VISIBILITY -int __libcpp_tls_set(__libcpp_tls_key __key, void* __p); - -# endif // !defined(_LIBCUDACXX_HAS_THREAD_API_EXTERNAL) - -# if !defined(_LIBCUDACXX_HAS_THREAD_LIBRARY_EXTERNAL) || defined(_LIBCUDACXX_BUILDING_THREAD_LIBRARY_EXTERNAL) - -# if defined(_LIBCUDACXX_HAS_THREAD_API_CUDA) - -_LIBCUDACXX_THREAD_ABI_VISIBILITY -void __libcpp_thread_yield() {} - -_LIBCUDACXX_THREAD_ABI_VISIBILITY -void __libcpp_thread_sleep_for(chrono::nanoseconds __ns) -{ - NV_IF_TARGET(NV_IS_DEVICE, - (auto const __step = __ns.count(); assert(__step < numeric_limits::max()); - asm volatile("nanosleep.u32 %0;" ::"r"((unsigned) __step) - :);)) -} - -# elif defined(_LIBCUDACXX_HAS_THREAD_API_PTHREAD) - -_LIBCUDACXX_THREAD_ABI_VISIBILITY -__libcpp_timespec_t __libcpp_to_timespec(const chrono::nanoseconds& __ns) -{ - using namespace chrono; - seconds __s = duration_cast(__ns); - __libcpp_timespec_t __ts; - typedef decltype(__ts.tv_sec) ts_sec; - constexpr ts_sec __ts_sec_max = numeric_limits::max(); - - if (__s.count() < __ts_sec_max) - { - __ts.tv_sec = static_cast(__s.count()); - __ts.tv_nsec = static_cast((__ns - __s).count()); - } - else - { - __ts.tv_sec = __ts_sec_max; - __ts.tv_nsec = 999999999; // (10^9 - 1) - } - return __ts; -} - -_LIBCUDACXX_THREAD_ABI_VISIBILITY -int __libcpp_recursive_mutex_init(__libcpp_recursive_mutex_t* __m) -{ - pthread_mutexattr_t attr; - int __ec = pthread_mutexattr_init(&attr); - if (__ec) - { - return __ec; - } - __ec = pthread_mutexattr_settype(&attr, PTHREAD_MUTEX_RECURSIVE); - if (__ec) - { - pthread_mutexattr_destroy(&attr); - return __ec; - } - __ec = pthread_mutex_init(__m, &attr); - if (__ec) - { - pthread_mutexattr_destroy(&attr); - return __ec; - } - __ec = pthread_mutexattr_destroy(&attr); - if (__ec) - { - pthread_mutex_destroy(__m); - return __ec; - } - return 0; -} - -_LIBCUDACXX_THREAD_ABI_VISIBILITY -int __libcpp_recursive_mutex_lock(__libcpp_recursive_mutex_t* __m) -{ - return pthread_mutex_lock(__m); -} - -_LIBCUDACXX_THREAD_ABI_VISIBILITY -bool __libcpp_recursive_mutex_trylock(__libcpp_recursive_mutex_t* __m) -{ - return pthread_mutex_trylock(__m) == 0; -} - -_LIBCUDACXX_THREAD_ABI_VISIBILITY -int __libcpp_recursive_mutex_unlock(__libcpp_mutex_t* __m) -{ - return pthread_mutex_unlock(__m); -} - -_LIBCUDACXX_THREAD_ABI_VISIBILITY -int __libcpp_recursive_mutex_destroy(__libcpp_recursive_mutex_t* __m) -{ - return pthread_mutex_destroy(__m); -} - -_LIBCUDACXX_THREAD_ABI_VISIBILITY -int __libcpp_mutex_lock(__libcpp_mutex_t* __m) -{ - return pthread_mutex_lock(__m); -} - -_LIBCUDACXX_THREAD_ABI_VISIBILITY -bool __libcpp_mutex_trylock(__libcpp_mutex_t* __m) -{ - return pthread_mutex_trylock(__m) == 0; -} - -_LIBCUDACXX_THREAD_ABI_VISIBILITY -int __libcpp_mutex_unlock(__libcpp_mutex_t* __m) -{ - return pthread_mutex_unlock(__m); -} - -_LIBCUDACXX_THREAD_ABI_VISIBILITY -int __libcpp_mutex_destroy(__libcpp_mutex_t* __m) -{ - return pthread_mutex_destroy(__m); -} - -// Condition Variable -_LIBCUDACXX_THREAD_ABI_VISIBILITY -int __libcpp_condvar_signal(__libcpp_condvar_t* __cv) -{ - return pthread_cond_signal(__cv); -} - -_LIBCUDACXX_THREAD_ABI_VISIBILITY -int __libcpp_condvar_broadcast(__libcpp_condvar_t* __cv) -{ - return pthread_cond_broadcast(__cv); -} - -_LIBCUDACXX_THREAD_ABI_VISIBILITY -int __libcpp_condvar_wait(__libcpp_condvar_t* __cv, __libcpp_mutex_t* __m) -{ - return pthread_cond_wait(__cv, __m); -} - -_LIBCUDACXX_THREAD_ABI_VISIBILITY -int __libcpp_condvar_timedwait(__libcpp_condvar_t* __cv, __libcpp_mutex_t* __m, __libcpp_timespec_t* __ts) -{ - return pthread_cond_timedwait(__cv, __m, __ts); -} - -_LIBCUDACXX_THREAD_ABI_VISIBILITY -int __libcpp_condvar_destroy(__libcpp_condvar_t* __cv) -{ - return pthread_cond_destroy(__cv); -} - -// Semaphore -# if defined(__APPLE__) - -bool __libcpp_semaphore_init(__libcpp_semaphore_t* __sem, int __init) -{ - return (*__sem = dispatch_semaphore_create(__init)) != nullptr; -} - -bool __libcpp_semaphore_destroy(__libcpp_semaphore_t* __sem) -{ - dispatch_release(*__sem); - return true; -} - -bool __libcpp_semaphore_post(__libcpp_semaphore_t* __sem) -{ - dispatch_semaphore_signal(*__sem); - return true; -} - -bool __libcpp_semaphore_wait(__libcpp_semaphore_t* __sem) -{ - return dispatch_semaphore_wait(*__sem, DISPATCH_TIME_FOREVER) == 0; -} - -bool __libcpp_semaphore_wait_timed(__libcpp_semaphore_t* __sem, chrono::nanoseconds const& __ns) -{ - return dispatch_semaphore_wait(*__sem, dispatch_time(DISPATCH_TIME_NOW, __ns.count())) == 0; -} - -# else - -_LIBCUDACXX_THREAD_ABI_VISIBILITY -bool __libcpp_semaphore_init(__libcpp_semaphore_t* __sem, int __init) -{ - return sem_init(__sem, 0, __init) == 0; -} - -_LIBCUDACXX_THREAD_ABI_VISIBILITY -bool __libcpp_semaphore_destroy(__libcpp_semaphore_t* __sem) -{ - return sem_destroy(__sem) == 0; -} - -_LIBCUDACXX_THREAD_ABI_VISIBILITY -bool __libcpp_semaphore_post(__libcpp_semaphore_t* __sem) -{ - return sem_post(__sem) == 0; -} - -_LIBCUDACXX_THREAD_ABI_VISIBILITY -bool __libcpp_semaphore_wait(__libcpp_semaphore_t* __sem) -{ - return sem_wait(__sem) == 0; -} - -_LIBCUDACXX_THREAD_ABI_VISIBILITY -bool __libcpp_semaphore_wait_timed(__libcpp_semaphore_t* __sem, chrono::nanoseconds const& __ns) -{ - __libcpp_timespec_t __ts = __libcpp_to_timespec(__ns); - return sem_timedwait(__sem, &__ts) == 0; -} - -# endif //__APPLE__ - -// Execute once -_LIBCUDACXX_THREAD_ABI_VISIBILITY -int __libcpp_execute_once(__libcpp_exec_once_flag* flag, void (*init_routine)()) -{ - return pthread_once(flag, init_routine); -} - -// Thread id -// Returns non-zero if the thread ids are equal, otherwise 0 -_LIBCUDACXX_THREAD_ABI_VISIBILITY -bool __libcpp_thread_id_equal(__libcpp_thread_id t1, __libcpp_thread_id t2) -{ - return pthread_equal(t1, t2) != 0; -} - -// Returns non-zero if t1 < t2, otherwise 0 -_LIBCUDACXX_THREAD_ABI_VISIBILITY -bool __libcpp_thread_id_less(__libcpp_thread_id t1, __libcpp_thread_id t2) -{ - return t1 < t2; -} - -// Thread -_LIBCUDACXX_THREAD_ABI_VISIBILITY -bool __libcpp_thread_isnull(const __libcpp_thread_t* __t) -{ - return *__t == 0; -} - -_LIBCUDACXX_THREAD_ABI_VISIBILITY -int __libcpp_thread_create(__libcpp_thread_t* __t, void* (*__func)(void*), void* __arg) -{ - return pthread_create(__t, 0, __func, __arg); -} - -_LIBCUDACXX_THREAD_ABI_VISIBILITY -__libcpp_thread_id __libcpp_thread_get_current_id() -{ - return pthread_self(); -} - -_LIBCUDACXX_THREAD_ABI_VISIBILITY -__libcpp_thread_id __libcpp_thread_get_id(const __libcpp_thread_t* __t) -{ - return *__t; -} - -_LIBCUDACXX_THREAD_ABI_VISIBILITY -int __libcpp_thread_join(__libcpp_thread_t* __t) -{ - return pthread_join(*__t, 0); -} - -_LIBCUDACXX_THREAD_ABI_VISIBILITY -int __libcpp_thread_detach(__libcpp_thread_t* __t) -{ - return pthread_detach(*__t); -} - -// Thread local storage -_LIBCUDACXX_THREAD_ABI_VISIBILITY -int __libcpp_tls_create(__libcpp_tls_key* __key, void (*__at_exit)(void*)) -{ - return pthread_key_create(__key, __at_exit); -} - -_LIBCUDACXX_THREAD_ABI_VISIBILITY -void* __libcpp_tls_get(__libcpp_tls_key __key) -{ - return pthread_getspecific(__key); -} - -_LIBCUDACXX_THREAD_ABI_VISIBILITY -int __libcpp_tls_set(__libcpp_tls_key __key, void* __p) -{ - return pthread_setspecific(__key, __p); -} - -_LIBCUDACXX_THREAD_ABI_VISIBILITY -void __libcpp_thread_yield() -{ - sched_yield(); -} - -_LIBCUDACXX_THREAD_ABI_VISIBILITY -void __libcpp_thread_sleep_for(chrono::nanoseconds __ns) -{ - __libcpp_timespec_t __ts = __libcpp_to_timespec(__ns); - while (nanosleep(&__ts, &__ts) == -1 && errno == EINTR) - ; -} - -# if defined(__linux__) && !defined(_LIBCUDACXX_HAS_NO_PLATFORM_WAIT) - -# define _LIBCUDACXX_HAS_PLATFORM_WAIT - -typedef int __libcpp_platform_wait_t; - -template -struct __libcpp_platform_wait_uses_type -{ - enum - { - __value = is_same<__remove_cv_t<_Tp>, __libcpp_platform_wait_t>::value - }; -}; - -template ::__value, int>::type = 1> -void __libcpp_platform_wait(_Tp const* ptr, _Tp val, void const* timeout) -{ - syscall(SYS_futex, ptr, FUTEX_WAIT_PRIVATE, val, timeout, 0, 0); -} - -template ::__value, int>::type = 1> -void __libcpp_platform_wake(_Tp const* ptr, bool all) -{ - syscall(SYS_futex, ptr, FUTEX_WAKE_PRIVATE, all ? INT_MAX : 1, 0, 0, 0); -} - -# endif // defined(__linux__) && !defined(_LIBCUDACXX_HAS_NO_PLATFORM_WAIT) - -# elif defined(_LIBCUDACXX_HAS_THREAD_API_WIN32) - -void __libcpp_thread_yield() -{ - SwitchToThread(); -} - -void __libcpp_thread_sleep_for(chrono::nanoseconds __ns) -{ - using namespace chrono; - // round-up to the nearest milisecond - milliseconds __ms = duration_cast(__ns + chrono::nanoseconds(999999)); - Sleep(static_cast(__ms.count())); -} - -# endif // defined(_LIBCUDACXX_HAS_THREAD_API_WIN32) - -# endif // !defined(_LIBCUDACXX_HAS_THREAD_LIBRARY_EXTERNAL) || defined(_LIBCUDACXX_BUILDING_THREAD_LIBRARY_EXTERNAL) - -template -_LIBCUDACXX_THREAD_ABI_VISIBILITY bool __libcpp_thread_poll_with_backoff(_Fn&& __f, chrono::nanoseconds __max) -{ - chrono::high_resolution_clock::time_point const __start = chrono::high_resolution_clock::now(); - for (int __count = 0;;) - { - if (__f()) - { - return true; - } - if (__count < _LIBCUDACXX_POLLING_COUNT) - { - if (__count > (_LIBCUDACXX_POLLING_COUNT >> 1)) - { - __libcpp_thread_yield_processor(); - } - __count += 1; - continue; - } - chrono::high_resolution_clock::duration const __elapsed = chrono::high_resolution_clock::now() - __start; - if (__max != chrono::nanoseconds::zero() && __max < __elapsed) - { - return false; - } - chrono::nanoseconds const __step = __elapsed / 4; - if (__step >= chrono::milliseconds(1)) - { - __libcpp_thread_sleep_for(chrono::milliseconds(1)); - } - else if (__step >= chrono::microseconds(10)) - { - __libcpp_thread_sleep_for(__step); - } - else - { - __libcpp_thread_yield(); - } - } -} - -# ifndef _LIBCUDACXX_HAS_NO_THREAD_CONTENTION_TABLE - -struct alignas(64) __libcpp_contention_t -{ -# if defined(_LIBCUDACXX_HAS_PLATFORM_WAIT) - ptrdiff_t __waiters = 0; - __libcpp_platform_wait_t __version = 0; -# else - ptrdiff_t __credit = 0; - __libcpp_mutex_t __mutex = _LIBCUDACXX_MUTEX_INITIALIZER; - __libcpp_condvar_t __condvar = _LIBCUDACXX_CONDVAR_INITIALIZER; -# endif -}; - -_LIBCUDACXX_HIDE_FROM_ABI __libcpp_contention_t* __libcpp_contention_state(void const volatile* p) noexcept; - -# endif // _LIBCUDACXX_HAS_NO_THREAD_CONTENTION_TABLE - -# ifndef __cuda_std__ - -class _CCCL_TYPE_VISIBILITY_DEFAULT thread; -class _CCCL_TYPE_VISIBILITY_DEFAULT __thread_id; - -namespace this_thread -{ - -_LIBCUDACXX_HIDE_FROM_ABI __thread_id get_id() noexcept; - -} // namespace this_thread - -template <> -struct hash<__thread_id>; - -class _CCCL_TYPE_VISIBILITY_DEFAULT __thread_id -{ - // FIXME: pthread_t is a pointer on Darwin but a long on Linux. - // NULL is the no-thread value on Darwin. Someone needs to check - // on other platforms. We assume 0 works everywhere for now. - __libcpp_thread_id __id_; - -public: - _LIBCUDACXX_HIDE_FROM_ABI __thread_id() noexcept - : __id_(0) - {} - - friend _LIBCUDACXX_HIDE_FROM_ABI bool operator==(__thread_id __x, __thread_id __y) noexcept - { // don't pass id==0 to underlying routines - if (__x.__id_ == 0) - { - return __y.__id_ == 0; - } - if (__y.__id_ == 0) - { - return false; - } - return __libcpp_thread_id_equal(__x.__id_, __y.__id_); - } - friend _LIBCUDACXX_HIDE_FROM_ABI bool operator!=(__thread_id __x, __thread_id __y) noexcept - { - return !(__x == __y); - } - friend _LIBCUDACXX_HIDE_FROM_ABI bool operator<(__thread_id __x, __thread_id __y) noexcept - { // id==0 is always less than any other thread_id - if (__x.__id_ == 0) - { - return __y.__id_ != 0; - } - if (__y.__id_ == 0) - { - return false; - } - return __libcpp_thread_id_less(__x.__id_, __y.__id_); - } - friend _LIBCUDACXX_HIDE_FROM_ABI bool operator<=(__thread_id __x, __thread_id __y) noexcept - { - return !(__y < __x); - } - friend _LIBCUDACXX_HIDE_FROM_ABI bool operator>(__thread_id __x, __thread_id __y) noexcept - { - return __y < __x; - } - friend _LIBCUDACXX_HIDE_FROM_ABI bool operator>=(__thread_id __x, __thread_id __y) noexcept - { - return !(__x < __y); - } - - _LIBCUDACXX_HIDE_FROM_ABI void __reset() - { - __id_ = 0; - } - -# ifndef __cuda_std__ - template - friend _LIBCUDACXX_HIDE_FROM_ABI basic_ostream<_CharT, _Traits>& - operator<<(basic_ostream<_CharT, _Traits>& __os, __thread_id __id); -# endif - -private: - _LIBCUDACXX_HIDE_FROM_ABI __thread_id(__libcpp_thread_id __id) - : __id_(__id) - {} - - friend __thread_id this_thread::get_id() noexcept; - friend class _CCCL_TYPE_VISIBILITY_DEFAULT thread; - friend struct _CCCL_TYPE_VISIBILITY_DEFAULT hash<__thread_id>; -}; - -namespace this_thread -{ - -_LIBCUDACXX_HIDE_FROM_ABI __thread_id get_id() noexcept -{ - return __libcpp_thread_get_current_id(); -} - -} // namespace this_thread - -# endif // __cuda_std__ - -#endif // !_LIBCUDACXX_HAS_NO_THREADS - -_LIBCUDACXX_END_NAMESPACE_STD - -_CCCL_POP_MACROS - -#endif // _LIBCUDACXX_THREADING_SUPPORT