Skip to content

Commit

Permalink
Do not include extended floating point headers if they are not needed (
Browse files Browse the repository at this point in the history
  • Loading branch information
miscco authored Nov 25, 2024
1 parent efee771 commit dc920c9
Show file tree
Hide file tree
Showing 9 changed files with 75 additions and 22 deletions.
19 changes: 18 additions & 1 deletion c2h/include/c2h/generators.h
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,24 @@
#include <c2h/vector.h>

#if THRUST_DEVICE_SYSTEM == THRUST_DEVICE_SYSTEM_CUDA
# include <cub/util_type.cuh> // for <cuda_fp8.h>
# if defined(_CCCL_HAS_NVFP16)
# include <cuda_fp16.h>
# endif // _CCCL_HAS_NVFP16

# if defined(_CCCL_HAS_NVBF16)
_CCCL_DIAG_PUSH
_CCCL_DIAG_SUPPRESS_CLANG("-Wunused-function")
# include <cuda_bf16.h>
_CCCL_DIAG_POP

# if _CCCL_CUDACC_AT_LEAST(11, 8)
// cuda_fp8.h resets default for C4127, so we have to guard the inclusion
_CCCL_DIAG_PUSH
# include <cuda_fp8.h>
_CCCL_DIAG_POP
# endif // _CCCL_CUDACC_AT_LEAST(11, 8)
# endif // _CCCL_HAS_NVBF16

# if defined(__CUDA_FP8_TYPES_EXIST__)
namespace std
{
Expand Down
6 changes: 3 additions & 3 deletions cub/cub/detail/fast_modulo_division.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -37,16 +37,16 @@
# pragma system_header
#endif // no system header

#include <cub/detail/type_traits.cuh> // implicit_prom_t
#include <cub/util_type.cuh> // CUB_IS_INT128_ENABLED

#include <cuda/cmath> // cuda::std::ceil_div
#include <cuda/std/bit> // std::has_single_bit
#include <cuda/std/climits> // CHAR_BIT
#include <cuda/std/cstdint> // uint64_t
#include <cuda/std/limits> // numeric_limits
#include <cuda/std/type_traits> // std::is_integral

#include "cub/detail/type_traits.cuh" // implicit_prom_t
#include "cub/util_type.cuh" // CUB_IS_INT128_ENABLED

#if defined(CCCL_ENABLE_DEVICE_ASSERTIONS)
_CCCL_NV_DIAG_SUPPRESS(186) // pointless comparison of unsigned integer with zero
#endif // CCCL_ENABLE_DEVICE_ASSERTIONS
Expand Down
11 changes: 11 additions & 0 deletions cub/cub/thread/thread_operators.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,17 @@
#include <cuda/std/type_traits> // cuda::std::common_type
#include <cuda/std/utility> // cuda::std::forward

#if defined(_CCCL_HAS_NVFP16)
# include <cuda_fp16.h>
#endif // _CCCL_HAS_NVFP16

#if defined(_CCCL_HAS_NVBF16)
_CCCL_DIAG_PUSH
_CCCL_DIAG_SUPPRESS_CLANG("-Wunused-function")
# include <cuda_bf16.h>
_CCCL_DIAG_POP
#endif // _CCCL_HAS_NVFP16

CUB_NAMESPACE_BEGIN

// TODO(bgruber): deprecate in C++17 with a note: "replace by decltype(cuda::std::not_fn(EqualityOp{}))"
Expand Down
11 changes: 11 additions & 0 deletions cub/cub/thread/thread_reduce.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,17 @@
#include <cuda/std/cstdint> // uint16_t
#include <cuda/std/functional> // cuda::std::plus

#if defined(_CCCL_HAS_NVFP16)
# include <cuda_fp16.h>
#endif // _CCCL_HAS_NVFP16

#if defined(_CCCL_HAS_NVBF16)
_CCCL_DIAG_PUSH
_CCCL_DIAG_SUPPRESS_CLANG("-Wunused-function")
# include <cuda_bf16.h>
_CCCL_DIAG_POP
#endif // _CCCL_HAS_NVFP16

CUB_NAMESPACE_BEGIN

//! @rst
Expand Down
9 changes: 9 additions & 0 deletions cub/cub/util_type.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,16 @@
#include <cuda/std/limits>
#include <cuda/std/type_traits>

#if defined(_CCCL_HAS_NVFP16)
# include <cuda_fp16.h>
#endif // _CCCL_HAS_NVFP16

#if defined(_CCCL_HAS_NVBF16)
_CCCL_DIAG_PUSH
_CCCL_DIAG_SUPPRESS_CLANG("-Wunused-function")
# include <cuda_bf16.h>
_CCCL_DIAG_POP

# if _CCCL_CUDACC_AT_LEAST(11, 8)
// cuda_fp8.h resets default for C4127, so we have to guard the inclusion
_CCCL_DIAG_PUSH
Expand Down
11 changes: 0 additions & 11 deletions libcudacxx/include/cuda/std/__cccl/extended_floating_point.h
Original file line number Diff line number Diff line change
Expand Up @@ -39,15 +39,4 @@
# endif
#endif // !_CCCL_HAS_NVBF16

#if defined(_CCCL_HAS_NVFP16)
# include <cuda_fp16.h>
#endif // _CCCL_HAS_NVFP16

#if defined(_CCCL_HAS_NVBF16)
_CCCL_DIAG_PUSH
_CCCL_DIAG_SUPPRESS_CLANG("-Wunused-function")
# include <cuda_bf16.h>
_CCCL_DIAG_POP
#endif // _CCCL_HAS_NVFP16

#endif // __CCCL_EXTENDED_FLOATING_POINT_H
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,17 @@

#include <cuda/std/__type_traits/integral_constant.h>

#if defined(_LIBCUDACXX_HAS_NVFP16)
# include <cuda_fp16.h>
#endif // _LIBCUDACXX_HAS_NVFP16

#if defined(_LIBCUDACXX_HAS_NVBF16)
_CCCL_DIAG_PUSH
_CCCL_DIAG_SUPPRESS_CLANG("-Wunused-function")
# include <cuda_bf16.h>
_CCCL_DIAG_POP
#endif // _LIBCUDACXX_HAS_NVBF16

_LIBCUDACXX_BEGIN_NAMESPACE_STD

template <class _Tp>
Expand All @@ -39,8 +50,6 @@ _CCCL_INLINE_VAR constexpr bool __is_extended_floating_point_v
#endif // !_CCCL_NO_VARIABLE_TEMPLATES

#if defined(_LIBCUDACXX_HAS_NVFP16)
# include <cuda_fp16.h>

template <>
struct __is_extended_floating_point<__half> : true_type
{};
Expand All @@ -52,11 +61,6 @@ _CCCL_INLINE_VAR constexpr bool __is_extended_floating_point_v<__half> = true;
#endif // _LIBCUDACXX_HAS_NVFP16

#if defined(_LIBCUDACXX_HAS_NVBF16)
_CCCL_DIAG_PUSH
_CCCL_DIAG_SUPPRESS_CLANG("-Wunused-function")
# include <cuda_bf16.h>
_CCCL_DIAG_POP

template <>
struct __is_extended_floating_point<__nv_bfloat16> : true_type
{};
Expand Down
1 change: 1 addition & 0 deletions libcudacxx/include/cuda/std/__type_traits/promote.h
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@
#ifdef _LIBCUDACXX_HAS_NVFP16
# include <cuda_fp16.h>
#endif // _LIBCUDACXX_HAS_NVFP16

#ifdef _LIBCUDACXX_HAS_NVBF16
_CCCL_DIAG_PUSH
_CCCL_DIAG_SUPPRESS_CLANG("-Wunused-function")
Expand Down
11 changes: 11 additions & 0 deletions thrust/thrust/system/cuda/detail/sort.h
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,17 @@

# include <cstdint>

# if defined(_CCCL_HAS_NVFP16)
# include <cuda_fp16.h>
# endif // _CCCL_HAS_NVFP16

# if defined(_CCCL_HAS_NVBF16)
_CCCL_DIAG_PUSH
_CCCL_DIAG_SUPPRESS_CLANG("-Wunused-function")
# include <cuda_bf16.h>
_CCCL_DIAG_POP
# endif // _CCCL_HAS_NVBF16

THRUST_NAMESPACE_BEGIN
namespace cuda_cub
{
Expand Down

0 comments on commit dc920c9

Please sign in to comment.