diff --git a/c2h/include/c2h/generators.h b/c2h/include/c2h/generators.h index 20036088fa8..62f169e9e21 100644 --- a/c2h/include/c2h/generators.h +++ b/c2h/include/c2h/generators.h @@ -35,7 +35,24 @@ #include #if THRUST_DEVICE_SYSTEM == THRUST_DEVICE_SYSTEM_CUDA -# include // for +# if defined(_CCCL_HAS_NVFP16) +# include +# endif // _CCCL_HAS_NVFP16 + +# if defined(_CCCL_HAS_NVBF16) +_CCCL_DIAG_PUSH +_CCCL_DIAG_SUPPRESS_CLANG("-Wunused-function") +# include +_CCCL_DIAG_POP + +# if _CCCL_CUDACC_AT_LEAST(11, 8) +// cuda_fp8.h resets default for C4127, so we have to guard the inclusion +_CCCL_DIAG_PUSH +# include +_CCCL_DIAG_POP +# endif // _CCCL_CUDACC_AT_LEAST(11, 8) +# endif // _CCCL_HAS_NVBF16 + # if defined(__CUDA_FP8_TYPES_EXIST__) namespace std { diff --git a/cub/cub/detail/fast_modulo_division.cuh b/cub/cub/detail/fast_modulo_division.cuh index aa2ffd371c0..24b3204801d 100644 --- a/cub/cub/detail/fast_modulo_division.cuh +++ b/cub/cub/detail/fast_modulo_division.cuh @@ -37,6 +37,9 @@ # pragma system_header #endif // no system header +#include // implicit_prom_t +#include // CUB_IS_INT128_ENABLED + #include // cuda::std::ceil_div #include // std::has_single_bit #include // CHAR_BIT @@ -44,9 +47,6 @@ #include // numeric_limits #include // std::is_integral -#include "cub/detail/type_traits.cuh" // implicit_prom_t -#include "cub/util_type.cuh" // CUB_IS_INT128_ENABLED - #if defined(CCCL_ENABLE_DEVICE_ASSERTIONS) _CCCL_NV_DIAG_SUPPRESS(186) // pointless comparison of unsigned integer with zero #endif // CCCL_ENABLE_DEVICE_ASSERTIONS diff --git a/cub/cub/thread/thread_operators.cuh b/cub/cub/thread/thread_operators.cuh index 45d2446188f..05f2d6a41f6 100644 --- a/cub/cub/thread/thread_operators.cuh +++ b/cub/cub/thread/thread_operators.cuh @@ -56,6 +56,17 @@ #include // cuda::std::common_type #include // cuda::std::forward +#if defined(_CCCL_HAS_NVFP16) +# include +#endif // _CCCL_HAS_NVFP16 + +#if defined(_CCCL_HAS_NVBF16) +_CCCL_DIAG_PUSH +_CCCL_DIAG_SUPPRESS_CLANG("-Wunused-function") +# include +_CCCL_DIAG_POP +#endif // _CCCL_HAS_NVFP16 + CUB_NAMESPACE_BEGIN // TODO(bgruber): deprecate in C++17 with a note: "replace by decltype(cuda::std::not_fn(EqualityOp{}))" diff --git a/cub/cub/thread/thread_reduce.cuh b/cub/cub/thread/thread_reduce.cuh index d4b4a89fdfd..f384d907b34 100644 --- a/cub/cub/thread/thread_reduce.cuh +++ b/cub/cub/thread/thread_reduce.cuh @@ -54,6 +54,17 @@ #include // uint16_t #include // cuda::std::plus +#if defined(_CCCL_HAS_NVFP16) +# include +#endif // _CCCL_HAS_NVFP16 + +#if defined(_CCCL_HAS_NVBF16) +_CCCL_DIAG_PUSH +_CCCL_DIAG_SUPPRESS_CLANG("-Wunused-function") +# include +_CCCL_DIAG_POP +#endif // _CCCL_HAS_NVFP16 + CUB_NAMESPACE_BEGIN //! @rst diff --git a/cub/cub/util_type.cuh b/cub/cub/util_type.cuh index f062ebc4ae9..5bda9dfe98f 100644 --- a/cub/cub/util_type.cuh +++ b/cub/cub/util_type.cuh @@ -50,7 +50,16 @@ #include #include +#if defined(_CCCL_HAS_NVFP16) +# include +#endif // _CCCL_HAS_NVFP16 + #if defined(_CCCL_HAS_NVBF16) +_CCCL_DIAG_PUSH +_CCCL_DIAG_SUPPRESS_CLANG("-Wunused-function") +# include +_CCCL_DIAG_POP + # if _CCCL_CUDACC_AT_LEAST(11, 8) // cuda_fp8.h resets default for C4127, so we have to guard the inclusion _CCCL_DIAG_PUSH diff --git a/libcudacxx/include/cuda/std/__cccl/extended_floating_point.h b/libcudacxx/include/cuda/std/__cccl/extended_floating_point.h index 9d3c835c464..d135f406702 100644 --- a/libcudacxx/include/cuda/std/__cccl/extended_floating_point.h +++ b/libcudacxx/include/cuda/std/__cccl/extended_floating_point.h @@ -39,15 +39,4 @@ # endif #endif // !_CCCL_HAS_NVBF16 -#if defined(_CCCL_HAS_NVFP16) -# include -#endif // _CCCL_HAS_NVFP16 - -#if defined(_CCCL_HAS_NVBF16) -_CCCL_DIAG_PUSH -_CCCL_DIAG_SUPPRESS_CLANG("-Wunused-function") -# include -_CCCL_DIAG_POP -#endif // _CCCL_HAS_NVFP16 - #endif // __CCCL_EXTENDED_FLOATING_POINT_H diff --git a/libcudacxx/include/cuda/std/__type_traits/is_extended_floating_point.h b/libcudacxx/include/cuda/std/__type_traits/is_extended_floating_point.h index dcc4330e107..bb1afa4225b 100644 --- a/libcudacxx/include/cuda/std/__type_traits/is_extended_floating_point.h +++ b/libcudacxx/include/cuda/std/__type_traits/is_extended_floating_point.h @@ -22,6 +22,17 @@ #include +#if defined(_LIBCUDACXX_HAS_NVFP16) +# include +#endif // _LIBCUDACXX_HAS_NVFP16 + +#if defined(_LIBCUDACXX_HAS_NVBF16) +_CCCL_DIAG_PUSH +_CCCL_DIAG_SUPPRESS_CLANG("-Wunused-function") +# include +_CCCL_DIAG_POP +#endif // _LIBCUDACXX_HAS_NVBF16 + _LIBCUDACXX_BEGIN_NAMESPACE_STD template @@ -39,8 +50,6 @@ _CCCL_INLINE_VAR constexpr bool __is_extended_floating_point_v #endif // !_CCCL_NO_VARIABLE_TEMPLATES #if defined(_LIBCUDACXX_HAS_NVFP16) -# include - template <> struct __is_extended_floating_point<__half> : true_type {}; @@ -52,11 +61,6 @@ _CCCL_INLINE_VAR constexpr bool __is_extended_floating_point_v<__half> = true; #endif // _LIBCUDACXX_HAS_NVFP16 #if defined(_LIBCUDACXX_HAS_NVBF16) -_CCCL_DIAG_PUSH -_CCCL_DIAG_SUPPRESS_CLANG("-Wunused-function") -# include -_CCCL_DIAG_POP - template <> struct __is_extended_floating_point<__nv_bfloat16> : true_type {}; diff --git a/libcudacxx/include/cuda/std/__type_traits/promote.h b/libcudacxx/include/cuda/std/__type_traits/promote.h index 01b06989513..daa545c5fa1 100644 --- a/libcudacxx/include/cuda/std/__type_traits/promote.h +++ b/libcudacxx/include/cuda/std/__type_traits/promote.h @@ -28,6 +28,7 @@ #ifdef _LIBCUDACXX_HAS_NVFP16 # include #endif // _LIBCUDACXX_HAS_NVFP16 + #ifdef _LIBCUDACXX_HAS_NVBF16 _CCCL_DIAG_PUSH _CCCL_DIAG_SUPPRESS_CLANG("-Wunused-function") diff --git a/thrust/thrust/system/cuda/detail/sort.h b/thrust/thrust/system/cuda/detail/sort.h index a582cf2f3c6..3de3d5492f7 100644 --- a/thrust/thrust/system/cuda/detail/sort.h +++ b/thrust/thrust/system/cuda/detail/sort.h @@ -60,6 +60,17 @@ # include +# if defined(_CCCL_HAS_NVFP16) +# include +# endif // _CCCL_HAS_NVFP16 + +# if defined(_CCCL_HAS_NVBF16) +_CCCL_DIAG_PUSH +_CCCL_DIAG_SUPPRESS_CLANG("-Wunused-function") +# include +_CCCL_DIAG_POP +# endif // _CCCL_HAS_NVBF16 + THRUST_NAMESPACE_BEGIN namespace cuda_cub {