Skip to content

Commit af0a8bb

Browse files
authored
Forward declare half types in cuda::ptx (#2981)
1 parent d68714d commit af0a8bb

File tree

1 file changed

+6
-10
lines changed

1 file changed

+6
-10
lines changed

libcudacxx/include/cuda/__ptx/instructions/cp_reduce_async_bulk.h

+6-10
Original file line numberDiff line numberDiff line change
@@ -28,16 +28,12 @@
2828

2929
#include <nv/target> // __CUDA_MINIMUM_ARCH__ and friends
3030

31-
#if defined(_LIBCUDACXX_HAS_NVFP16)
32-
# include <cuda_fp16.h>
33-
#endif // _LIBCUDACXX_HAS_NVFP16
34-
35-
#if defined(_LIBCUDACXX_HAS_NVBF16)
36-
_CCCL_DIAG_PUSH
37-
_CCCL_DIAG_SUPPRESS_CLANG("-Wunused-function")
38-
# include <cuda_bf16.h>
39-
_CCCL_DIAG_POP
40-
#endif // _LIBCUDACXX_HAS_NVBF16
31+
// Forward-declare __half and __nv_bfloat16. The cuda_fp16.h and cuda_bf16.h are
32+
// expensive to include. The APIs use only pointers, so we do not have to define
33+
// the types. If the user wants to use these types, it is their responsibility
34+
// to include the headers.
35+
struct __half;
36+
struct __nv_bfloat16;
4137

4238
_LIBCUDACXX_BEGIN_NAMESPACE_CUDA_PTX
4339

0 commit comments

Comments
 (0)