diff --git a/libcudacxx/include/cuda/std/detail/libcxx/include/__config b/libcudacxx/include/cuda/std/detail/libcxx/include/__config index b2babea4617..3b9bac23ba4 100644 --- a/libcudacxx/include/cuda/std/detail/libcxx/include/__config +++ b/libcudacxx/include/cuda/std/detail/libcxx/include/__config @@ -1152,8 +1152,14 @@ typedef __char32_t char32_t; #endif #endif // _LIBCUDACXX_HAS_NO_LONG_DOUBLE +#ifndef _LIBCUDACXX_HAS_NO_NVFP16 +#if !__has_include() +# define _LIBCUDACXX_HAS_NO_NVFP16 +#endif +#endif + #ifndef _LIBCUDACXX_HAS_NO_NVBF16 -#if defined(CUB_DISABLE_BF16_SUPPORT) +#if defined(CUB_DISABLE_BF16_SUPPORT) || !__has_include() # define _LIBCUDACXX_HAS_NO_NVBF16 #endif #endif // _LIBCUDACXX_HAS_NO_NVBF16 diff --git a/libcudacxx/include/cuda/std/detail/libcxx/include/__type_traits/promote.h b/libcudacxx/include/cuda/std/detail/libcxx/include/__type_traits/promote.h index 40825c79674..d4fce2e81a9 100644 --- a/libcudacxx/include/cuda/std/detail/libcxx/include/__type_traits/promote.h +++ b/libcudacxx/include/cuda/std/detail/libcxx/include/__type_traits/promote.h @@ -20,7 +20,9 @@ #include "../cstddef" #if defined(__cuda_std__) && defined(_LIBCUDACXX_CUDACC) +#ifndef _LIBCUDACXX_HAS_NO_NVFP16 #include +#endif #ifndef _LIBCUDACXX_HAS_NO_NVBF16 #include #endif @@ -41,7 +43,9 @@ struct __numeric_type { _LIBCUDACXX_INLINE_VISIBILITY static void __test(...); #if defined(__cuda_std__) && defined(_LIBCUDACXX_CUDACC) +#ifndef _LIBCUDACXX_HAS_NO_NVFP16 _LIBCUDACXX_INLINE_VISIBILITY static __half __test(__half); +#endif #ifndef _LIBCUDACXX_HAS_NO_NVBF16 _LIBCUDACXX_INLINE_VISIBILITY static __nv_bfloat16 __test(__nv_bfloat16); #endif diff --git a/libcudacxx/include/cuda/std/detail/libcxx/include/cmath b/libcudacxx/include/cuda/std/detail/libcxx/include/cmath index a26a92cac79..d77b3f11e94 100644 --- a/libcudacxx/include/cuda/std/detail/libcxx/include/cmath +++ b/libcudacxx/include/cuda/std/detail/libcxx/include/cmath @@ -321,7 +321,9 @@ long double truncl(long double x); #endif // __cuda_std__ #ifdef __cuda_std__ +#ifndef _LIBCUDACXX_HAS_NO_NVFP16 #include +#endif #ifndef _LIBCUDACXX_HAS_NO_NVBF16 #include #endif @@ -621,6 +623,7 @@ hypot(_A1 __lcpp_x, _A2 __lcpp_y, _A3 __lcpp_z) noexcept #ifdef __cuda_std__ +#ifndef _LIBCUDACXX_HAS_NO_NVFP16 inline _LIBCUDACXX_INLINE_VISIBILITY __half sin(__half __v) { @@ -651,6 +654,7 @@ __half sin(__half __v) }) ) } +#endif #ifndef _LIBCUDACXX_HAS_NO_NVBF16 inline _LIBCUDACXX_INLINE_VISIBILITY @@ -663,11 +667,13 @@ __nv_bfloat16 sin(__nv_bfloat16 __v) } #endif +#ifndef _LIBCUDACXX_HAS_NO_NVFP16 inline _LIBCUDACXX_INLINE_VISIBILITY __half sinh(__half __v) { return __half(_CUDA_VSTD::sinh(float(__v))); } +#endif #ifndef _LIBCUDACXX_HAS_NO_NVBF16 inline _LIBCUDACXX_INLINE_VISIBILITY @@ -677,6 +683,7 @@ __nv_bfloat16 sinh(__nv_bfloat16 __v) } #endif +#ifndef _LIBCUDACXX_HAS_NO_NVFP16 inline _LIBCUDACXX_INLINE_VISIBILITY __half cos(__half __v) { @@ -702,6 +709,7 @@ __half cos(__half __v) }) ) } +#endif #ifndef _LIBCUDACXX_HAS_NO_NVBF16 inline _LIBCUDACXX_INLINE_VISIBILITY @@ -714,11 +722,13 @@ __nv_bfloat16 cos(__nv_bfloat16 __v) } #endif +#ifndef _LIBCUDACXX_HAS_NO_NVFP16 inline _LIBCUDACXX_INLINE_VISIBILITY __half cosh(__half __v) { return __half(_CUDA_VSTD::cosh(float(__v))); } +#endif #ifndef _LIBCUDACXX_HAS_NO_NVBF16 inline _LIBCUDACXX_INLINE_VISIBILITY @@ -728,6 +738,7 @@ __nv_bfloat16 cosh(__nv_bfloat16 __v) } #endif +#ifndef _LIBCUDACXX_HAS_NO_NVFP16 inline _LIBCUDACXX_INLINE_VISIBILITY __half exp(__half __v) { @@ -753,6 +764,7 @@ __half exp(__half __v) }) ) } +#endif #ifndef _LIBCUDACXX_HAS_NO_NVBF16 inline _LIBCUDACXX_INLINE_VISIBILITY @@ -765,11 +777,13 @@ __nv_bfloat16 exp(__nv_bfloat16 __v) } #endif +#ifndef _LIBCUDACXX_HAS_NO_NVFP16 inline _LIBCUDACXX_INLINE_VISIBILITY bool signbit(__half __v) { return ::signbit(__half2float(__v)); } +#endif #ifndef _LIBCUDACXX_HAS_NO_NVBF16 inline _LIBCUDACXX_INLINE_VISIBILITY @@ -779,11 +793,13 @@ bool signbit(__nv_bfloat16 __v) } #endif +#ifndef _LIBCUDACXX_HAS_NO_NVFP16 inline _LIBCUDACXX_INLINE_VISIBILITY __half atan2(__half __x, __half __y) { return __half(_CUDA_VSTD::atan2(float(__x), float(__y))); } +#endif #ifndef _LIBCUDACXX_HAS_NO_NVBF16 inline _LIBCUDACXX_INLINE_VISIBILITY @@ -793,6 +809,7 @@ __nv_bfloat16 atan2(__nv_bfloat16 __x, __nv_bfloat16 __y) } #endif +#ifndef _LIBCUDACXX_HAS_NO_NVFP16 inline _LIBCUDACXX_INLINE_VISIBILITY __half log(__half __x) { @@ -817,6 +834,7 @@ __half log(__half __x) }) ) } +#endif #ifndef _LIBCUDACXX_HAS_NO_NVBF16 inline _LIBCUDACXX_INLINE_VISIBILITY @@ -829,6 +847,7 @@ __nv_bfloat16 log(__nv_bfloat16 __x) } #endif +#ifndef _LIBCUDACXX_HAS_NO_NVFP16 inline _LIBCUDACXX_INLINE_VISIBILITY __half sqrt(__half __x) { @@ -837,6 +856,7 @@ __half sqrt(__half __x) (return __half(_CUDA_VSTD::sqrt(float(__x)));) ) } +#endif #ifndef _LIBCUDACXX_HAS_NO_NVBF16 inline _LIBCUDACXX_INLINE_VISIBILITY @@ -873,6 +893,7 @@ __constexpr_isnan(_A1 __lcpp_x) noexcept } #ifdef __cuda_std__ +#ifndef _LIBCUDACXX_HAS_NO_NVFP16 inline _LIBCUDACXX_INLINE_VISIBILITY bool __constexpr_isnan(__half __x) noexcept { @@ -884,6 +905,7 @@ bool isnan(__half __v) { return __constexpr_isnan(__v); } +#endif #ifndef _LIBCUDACXX_HAS_NO_NVBF16 inline _LIBCUDACXX_INLINE_VISIBILITY @@ -925,6 +947,7 @@ __constexpr_isinf(_A1 __lcpp_x) noexcept } #ifdef __cuda_std__ +#ifndef _LIBCUDACXX_HAS_NO_NVFP16 inline _LIBCUDACXX_INLINE_VISIBILITY bool __constexpr_isinf(__half __x) noexcept { #if _LIBCUDACXX_STD_VER >= 20 @@ -935,6 +958,7 @@ bool __constexpr_isinf(__half __x) noexcept { return __hisinf(__x) != 0; #endif } +#endif #ifndef _LIBCUDACXX_HAS_NO_NVBF16 inline _LIBCUDACXX_INLINE_VISIBILITY @@ -949,11 +973,13 @@ bool __constexpr_isinf(__nv_bfloat16 __x) noexcept { } #endif +#ifndef _LIBCUDACXX_HAS_NO_NVFP16 inline _LIBCUDACXX_INLINE_VISIBILITY bool isinf(__half __v) { return __constexpr_isinf(__v); } +#endif #ifndef _LIBCUDACXX_HAS_NO_NVBF16 inline _LIBCUDACXX_INLINE_VISIBILITY @@ -963,11 +989,13 @@ bool isinf(__nv_bfloat16 __v) } #endif +#ifndef _LIBCUDACXX_HAS_NO_NVFP16 inline _LIBCUDACXX_INLINE_VISIBILITY __half hypot(__half __x, __half __y) { return __half(_CUDA_VSTD::hypot(float(__x), float(__y))); } +#endif #ifndef _LIBCUDACXX_HAS_NO_NVBF16 inline _LIBCUDACXX_INLINE_VISIBILITY @@ -1001,6 +1029,7 @@ __constexpr_isfinite(_A1 __lcpp_x) noexcept } #ifdef __cuda_std__ +#ifndef _LIBCUDACXX_HAS_NO_NVFP16 inline _LIBCUDACXX_INLINE_VISIBILITY bool __constexpr_isfinite(__half __x) noexcept { return !__constexpr_isnan(__x) && !__constexpr_isinf(__x); @@ -1011,6 +1040,7 @@ bool isfinite(__half __v) { return __constexpr_isfinite(__v); } +#endif #ifndef _LIBCUDACXX_HAS_NO_NVBF16 inline _LIBCUDACXX_INLINE_VISIBILITY @@ -1061,6 +1091,7 @@ _LIBCUDACXX_CONSTEXPR_AFTER_CXX11 typename __enable_if_t::val #endif // !_MSC_VER #ifdef __cuda_std__ +#ifndef _LIBCUDACXX_HAS_NO_NVFP16 inline _LIBCUDACXX_INLINE_VISIBILITY __half __constexpr_copysign(__half __x, __half __y) noexcept { @@ -1072,6 +1103,7 @@ __half copysign(__half __x, __half __y) { return __constexpr_copysign(__x, __y); } +#endif #ifndef _LIBCUDACXX_HAS_NO_NVBF16 inline _LIBCUDACXX_INLINE_VISIBILITY @@ -1119,6 +1151,7 @@ _LIBCUDACXX_CONSTEXPR_AFTER_CXX11 double __constexpr_fabs(_Tp __x) noexcept { #endif // !_MSC_VER #ifdef __cuda_std__ +#ifndef _LIBCUDACXX_HAS_NO_NVFP16 inline _LIBCUDACXX_INLINE_VISIBILITY __half __constexpr_fabs(__half __x) noexcept { @@ -1136,6 +1169,7 @@ __half abs(__half __x) { return __constexpr_fabs(__x); } +#endif #ifndef _LIBCUDACXX_HAS_NO_NVBF16 inline _LIBCUDACXX_INLINE_VISIBILITY @@ -1218,11 +1252,13 @@ __constexpr_fmax(_Tp __x, _Up __y) noexcept { #endif // !_MSC_VER #ifdef __cuda_std__ +#ifndef _LIBCUDACXX_HAS_NO_NVFP16 inline _LIBCUDACXX_INLINE_VISIBILITY __half __constexpr_fmax(__half __x, __half __y) noexcept { return __hmax(__x, __y); } +#endif #ifndef _LIBCUDACXX_HAS_NO_NVBF16 inline _LIBCUDACXX_INLINE_VISIBILITY diff --git a/libcudacxx/include/cuda/std/detail/libcxx/include/complex b/libcudacxx/include/cuda/std/detail/libcxx/include/complex index 74e497dc91f..6df4a9b6bd0 100644 --- a/libcudacxx/include/cuda/std/detail/libcxx/include/complex +++ b/libcudacxx/include/cuda/std/detail/libcxx/include/complex @@ -296,7 +296,9 @@ template struct __is_complex_float { static constexpr auto value = is_floating_point<_Tp>::value #ifdef __cuda_std__ +#ifndef _LIBCUDACXX_HAS_NO_NVFP16 || is_same<_Tp, __half>::value +#endif #ifndef _LIBCUDACXX_HAS_NO_NVBF16 || is_same<_Tp, __nv_bfloat16>::value #endif @@ -419,6 +421,7 @@ template<> class complex; #endif // _LIBCUDACXX_HAS_COMPLEX_LONG_DOUBLE #ifdef __cuda_std__ +#ifndef _LIBCUDACXX_HAS_NO_NVFP16 template<> class _LIBCUDACXX_TEMPLATE_VIS _LIBCUDACXX_COMPLEX_ALIGNAS(alignof(__half2)) complex<__half> { @@ -518,6 +521,7 @@ public: return *this; } }; +#endif #ifndef _LIBCUDACXX_HAS_NO_NVBF16 template<> @@ -633,8 +637,10 @@ public: _LIBCUDACXX_INLINE_VISIBILITY constexpr complex(float __re = 0.0f, float __im = 0.0f) : __re_(__re), __im_(__im) {} #ifdef __cuda_std__ +#ifndef _LIBCUDACXX_HAS_NO_NVFP16 _LIBCUDACXX_INLINE_VISIBILITY complex(const complex<__half> & __c) : __re_(__c.real()), __im_(__c.imag()) {} +#endif #ifndef _LIBCUDACXX_HAS_NO_NVBF16 _LIBCUDACXX_INLINE_VISIBILITY complex(const complex<__nv_bfloat16> & __c) : __re_(__c.real()), __im_(__c.imag()) {} @@ -738,8 +744,10 @@ public: _LIBCUDACXX_INLINE_VISIBILITY constexpr complex(double __re = 0.0, double __im = 0.0) : __re_(__re), __im_(__im) {} #ifdef __cuda_std__ +#ifndef _LIBCUDACXX_HAS_NO_NVFP16 _LIBCUDACXX_INLINE_VISIBILITY complex(const complex<__half> & __c) : __re_(__c.real()), __im_(__c.imag()) {} +#endif #ifndef _LIBCUDACXX_HAS_NO_NVBF16 _LIBCUDACXX_INLINE_VISIBILITY complex(const complex<__nv_bfloat16> & __c) : __re_(__c.real()), __im_(__c.imag()) {} @@ -1035,7 +1043,7 @@ template struct __has_vector_type : _CUDA_VSTD::false_type {}; template -struct __has_vector_type<_Tp, _CUDA_VSTD::void_t::__type>> : _CUDA_VSTD::true_type {}; +struct __has_vector_type<_Tp, _CUDA_VSTD::__void_t::__type>> : _CUDA_VSTD::true_type {}; template struct __abcd_results @@ -1078,8 +1086,10 @@ __complex_piecewise_mul(_Tp __x1, _Tp __y1, _Tp __x2, _Tp __y2) } #ifdef __cuda_std__ +#ifndef _LIBCUDACXX_HAS_NO_NVFP16 template<> struct __type_to_vector<__half> { using __type = __half2; }; +#endif #ifndef _LIBCUDACXX_HAS_NO_NVBF16 template<> struct __type_to_vector<__nv_bfloat16> { using __type = __nv_bfloat162; }; @@ -1480,12 +1490,14 @@ struct __libcpp_complex_overload_traits<_Tp, false, true> #ifdef __cuda_std__ // __half and __nvbfloat +#ifndef _LIBCUDACXX_HAS_NO_NVFP16 template <> struct __libcpp_complex_overload_traits<__half, false, false> { typedef __half _ValueType; typedef complex<__half> _ComplexType; }; +#endif #ifndef _LIBCUDACXX_HAS_NO_NVBF16 template <> @@ -1591,6 +1603,7 @@ arg(_Tp __re) } #ifdef __cuda_std__ +#ifndef _LIBCUDACXX_HAS_NO_NVFP16 template inline _LIBCUDACXX_INLINE_VISIBILITY __enable_if_t< @@ -1601,6 +1614,7 @@ arg(_Tp __re) { return _CUDA_VSTD::atan2f(__half(0), __re); } +#endif #ifndef _LIBCUDACXX_HAS_NO_NVBF16 template