Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[BACKPORT]: Ensure that headers in <cuda/*> can be build with a C++ only compiler (#3472) #3651

Merged
merged 3 commits into from
Feb 3, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 8 additions & 4 deletions libcudacxx/include/cuda/__ptx/ptx_helper_functions.h
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,8 @@
#include <cuda/std/cstddef>
#include <cuda/std/cstdint>

#if _CCCL_HAS_CUDA_COMPILER

_LIBCUDACXX_BEGIN_NAMESPACE_CUDA_PTX

/*************************************************************
Expand Down Expand Up @@ -101,23 +103,25 @@ inline _CCCL_DEVICE _Tp* __from_ptr_gmem(_CUDA_VSTD::size_t __ptr)
template <typename _Tp>
inline _CCCL_DEVICE _CUDA_VSTD::uint32_t __as_b32(_Tp __val)
{
#if _CCCL_STD_VER >= 2017
# if _CCCL_STD_VER >= 2017
static_assert(sizeof(_Tp) == 4, "");
#endif // _CCCL_STD_VER >= 2017
# endif // _CCCL_STD_VER >= 2017
// Consider using std::bitcast
return *reinterpret_cast<_CUDA_VSTD::uint32_t*>(&__val);
}

template <typename _Tp>
inline _CCCL_DEVICE _CUDA_VSTD::uint64_t __as_b64(_Tp __val)
{
#if _CCCL_STD_VER >= 2017
# if _CCCL_STD_VER >= 2017
static_assert(sizeof(_Tp) == 8, "");
#endif // _CCCL_STD_VER >= 2017
# endif // _CCCL_STD_VER >= 2017
// Consider using std::bitcast
return *reinterpret_cast<_CUDA_VSTD::uint64_t*>(&__val);
}

_LIBCUDACXX_END_NAMESPACE_CUDA_PTX

#endif // _CCCL_HAS_CUDA_COMPILER

#endif // _CUDA_PTX_HELPER_FUNCTIONS_H_
3 changes: 2 additions & 1 deletion libcudacxx/include/cuda/discard_memory
Original file line number Diff line number Diff line change
Expand Up @@ -21,11 +21,12 @@
# pragma system_header
#endif // no system header

#include <cuda/std/cstddef>
#include <cuda/std/cstdint>

_LIBCUDACXX_BEGIN_NAMESPACE_CUDA

inline _CCCL_HOST_DEVICE void discard_memory(volatile void* __ptr, size_t __nbytes) noexcept
inline _CCCL_HOST_DEVICE void discard_memory(volatile void* __ptr, _CUDA_VSTD::size_t __nbytes) noexcept
{
// The discard PTX instruction is only available with PTX ISA 7.4 and later
#if __cccl_ptx_isa < 740ULL
Expand Down
2 changes: 2 additions & 0 deletions libcudacxx/include/cuda/pipeline
Original file line number Diff line number Diff line change
Expand Up @@ -141,6 +141,8 @@
# pragma system_header
#endif // no system header

#include <cuda/__memcpy_async/completion_mechanism.h>
#include <cuda/__memcpy_async/memcpy_async_barrier.h>
#include <cuda/atomic>
#include <cuda/barrier>
#include <cuda/std/chrono>
Expand Down
22 changes: 8 additions & 14 deletions libcudacxx/include/cuda/std/__exception/cuda_error.h
Original file line number Diff line number Diff line change
Expand Up @@ -22,10 +22,6 @@
# pragma system_header
#endif // no system header

#if _CCCL_CUDA_COMPILER(CLANG)
# include <cuda_runtime_api.h>
#endif // _CCCL_CUDA_COMPILER(CLANG)

#include <cuda/std/__exception/terminate.h>

#if !_CCCL_COMPILER(NVRTC)
Expand All @@ -40,8 +36,7 @@ _LIBCUDACXX_BEGIN_NAMESPACE_CUDA
/**
* @brief Exception thrown when a CUDA error is encountered.
*/
#if _CCCL_HAS_CUDA_COMPILER
# ifndef _CCCL_NO_EXCEPTIONS
#ifndef _CCCL_NO_EXCEPTIONS
class cuda_error : public ::std::runtime_error
{
private:
Expand All @@ -50,37 +45,36 @@ class cuda_error : public ::std::runtime_error
char __buffer[256];
};

static char* __format_cuda_error(::cudaError_t __status, const char* __msg, char* __msg_buffer) noexcept
static char* __format_cuda_error(const int __status, const char* __msg, char* __msg_buffer) noexcept
{
::snprintf(__msg_buffer, 256, "cudaError %d: %s", __status, __msg);
return __msg_buffer;
}

public:
cuda_error(::cudaError_t __status, const char* __msg, __msg_storage __msg_buffer = {0}) noexcept
cuda_error(const int __status, const char* __msg, __msg_storage __msg_buffer = {0}) noexcept
: ::std::runtime_error(__format_cuda_error(__status, __msg, __msg_buffer.__buffer))
{}
};

_CCCL_NORETURN _LIBCUDACXX_HIDE_FROM_ABI void __throw_cuda_error(::cudaError_t __status, const char* __msg)
_CCCL_NORETURN _LIBCUDACXX_HIDE_FROM_ABI void __throw_cuda_error(const int __status, const char* __msg)
{
NV_IF_ELSE_TARGET(NV_IS_HOST,
(throw ::cuda::cuda_error(__status, __msg);),
((void) __status; (void) __msg; _CUDA_VSTD_NOVERSION::terminate();))
}
# else // ^^^ !_CCCL_NO_EXCEPTIONS ^^^ / vvv _CCCL_NO_EXCEPTIONS vvv
#else // ^^^ !_CCCL_NO_EXCEPTIONS ^^^ / vvv _CCCL_NO_EXCEPTIONS vvv
class cuda_error
{
public:
_LIBCUDACXX_HIDE_FROM_ABI cuda_error(::cudaError_t, const char*) noexcept {}
_LIBCUDACXX_HIDE_FROM_ABI cuda_error(const int, const char*) noexcept {}
};

_CCCL_NORETURN _LIBCUDACXX_HIDE_FROM_ABI void __throw_cuda_error(::cudaError_t, const char*)
_CCCL_NORETURN _LIBCUDACXX_HIDE_FROM_ABI void __throw_cuda_error(const int, const char*)
{
_CUDA_VSTD_NOVERSION::terminate();
}
# endif // _CCCL_NO_EXCEPTIONS
#endif // _CCCL_CUDA_COMPILER
#endif // _CCCL_NO_EXCEPTIONS

_LIBCUDACXX_END_NAMESPACE_CUDA

Expand Down
8 changes: 5 additions & 3 deletions libcudacxx/include/cuda/std/detail/__access_property
Original file line number Diff line number Diff line change
Expand Up @@ -129,19 +129,21 @@
* (v. August 20, 2021)
*/

#include <cuda_runtime_api.h>

_LIBCUDACXX_BEGIN_NAMESPACE_CUDA

namespace __detail_ap
{

_CCCL_HOST_DEVICE constexpr uint32_t __ap_floor_log2(uint32_t __x)
{
return (__x == 1 | __x == 0) ? 0 : 1 + __ap_floor_log2(__x >> 1);
return ((__x == 1) || (__x == 0)) ? 0 : 1 + __ap_floor_log2(__x >> 1);
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

NOTE: MSVC was warning about those lines and I believe the && is much clearer given that we are dealing with booleans

}

_CCCL_HOST_DEVICE constexpr uint32_t __ap_ceil_log2(uint32_t __x)
{
return (__x == 1 | __x == 0) ? 0 : __ap_floor_log2(__x - 1) + 1;
return ((__x == 1) || (__x == 0)) ? 0 : __ap_floor_log2(__x - 1) + 1;
}

_CCCL_HOST_DEVICE constexpr uint32_t __ap_min(uint32_t __a, uint32_t __b) noexcept
Expand Down Expand Up @@ -429,7 +431,7 @@ _CCCL_HOST_DEVICE constexpr std::uint64_t __block(
cudaAccessProperty __hit_prop,
cudaAccessProperty __miss_prop = cudaAccessPropertyNormal)
{
return (__total_bytes <= (size_t{0xFFFFFFFF}) & __total_bytes != 0 & __hit_bytes <= __total_bytes)
return (__total_bytes <= (size_t{0xFFFFFFFF}) && __total_bytes != 0 && __hit_bytes <= __total_bytes)
? __sm_80::__block_descriptor_builder(
reinterpret_cast<std::uintptr_t>(__ptr),
__hit_bytes,
Expand Down
15 changes: 9 additions & 6 deletions libcudacxx/include/cuda/std/detail/__annotated_ptr
Original file line number Diff line number Diff line change
Expand Up @@ -137,15 +137,16 @@ namespace __detail_ap
template <typename _Property>
_CCCL_DEVICE void* __associate_address_space(void* __ptr, _Property __prop)
{
#if _CCCL_HAS_CUDA_COMPILER
if (std::is_same<_Property, access_property::shared>::value == true)
{
bool __b = __isShared(__ptr);
_CCCL_ASSERT(__b, "");
#if defined(_CCCL_BUILTIN_ASSUME)
# if defined(_CCCL_BUILTIN_ASSUME)
_CCCL_BUILTIN_ASSUME(__b);
#else // ^^^ _CCCL_BUILTIN_ASSUME ^^^ / vvv !_CCCL_BUILTIN_ASSUME vvv
# else // ^^^ _CCCL_BUILTIN_ASSUME ^^^ / vvv !_CCCL_BUILTIN_ASSUME vvv
(void) __b;
#endif // !_CCCL_BUILTIN_ASSUME
# endif // !_CCCL_BUILTIN_ASSUME
}
else if (std::is_same<_Property, access_property::global>::value == true
|| std::is_same<_Property, access_property::normal>::value == true
Expand All @@ -155,12 +156,13 @@ _CCCL_DEVICE void* __associate_address_space(void* __ptr, _Property __prop)
{
bool __b = __isGlobal(__ptr);
_CCCL_ASSERT(__b, "");
#if defined(_CCCL_BUILTIN_ASSUME)
# if defined(_CCCL_BUILTIN_ASSUME)
_CCCL_BUILTIN_ASSUME(__b);
#else // ^^^ !_CCCL_BUILTIN_ASSUME ^^^ / vvv _CCCL_BUILTIN_ASSUME vvv
# else // ^^^ !_CCCL_BUILTIN_ASSUME ^^^ / vvv _CCCL_BUILTIN_ASSUME vvv
(void) __b;
#endif // !_CCCL_BUILTIN_ASSUME
# endif // !_CCCL_BUILTIN_ASSUME
}
#endif // _CCCL_HAS_CUDA_COMPILER

return __ptr;
}
Expand All @@ -174,6 +176,7 @@ _CCCL_DEVICE void* __associate_descriptor(void* __ptr, __Prop __prop)
template <>
inline _CCCL_DEVICE void* __associate_descriptor(void* __ptr, std::uint64_t __prop)
{
(void) __prop;
NV_IF_ELSE_TARGET(NV_PROVIDES_SM_80, (return __nv_associate_access_property(__ptr, __prop);), (return __ptr;))
}

Expand Down
5 changes: 2 additions & 3 deletions libcudacxx/include/cuda/stream_ref
Original file line number Diff line number Diff line change
Expand Up @@ -38,9 +38,6 @@ private:
} // cuda
*/

#include <cuda_runtime_api.h>
// cuda_runtime_api needs to come first

#include <cuda/std/detail/__config>

#if defined(_CCCL_IMPLICIT_SYSTEM_HEADER_GCC)
Expand All @@ -51,6 +48,8 @@ private:
# pragma system_header
#endif // no system header

#include <cuda_runtime_api.h>

#include <cuda/std/__cuda/api_wrapper.h>
#include <cuda/std/__exception/cuda_error.h>
#include <cuda/std/cstddef>
Expand Down
15 changes: 14 additions & 1 deletion libcudacxx/test/public_headers_host_only/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -2,12 +2,18 @@
# without anything else but also pretents to be a std header
add_custom_target(libcudacxx.test.public_headers_host_only)

if ("NVHPC" STREQUAL "${CMAKE_CXX_COMPILER_ID}")
find_package(NVHPC)
else()
find_package(CUDAToolkit)
endif()

# Grep all public headers
file(GLOB public_headers_host_only
LIST_DIRECTORIES false
RELATIVE "${libcudacxx_SOURCE_DIR}/include/"
CONFIGURE_DEPENDS
"${libcudacxx_SOURCE_DIR}/include/cuda/std/*"
"${libcudacxx_SOURCE_DIR}/include/cuda/*"
)

# mdspan is currently not supported on msvc outside of C++20
Expand Down Expand Up @@ -36,6 +42,13 @@ function(libcudacxx_add_std_header_test header)
target_compile_definitions(headertest_std_${header_name} PRIVATE CCCL_SUPPRESS_MSVC2017_DEPRECATION_WARNING)
endif()

# We want to ensure that we can build headers within <cuda/> with a host compiler but we need cuda_runtime_api.h
if ("NVHPC" STREQUAL "${CMAKE_CXX_COMPILER_ID}")
target_link_libraries(headertest_std_${header_name} NVHPC::CUDART)
else()
target_link_libraries(headertest_std_${header_name} CUDA::cudart)
endif()

add_dependencies(libcudacxx.test.public_headers_host_only headertest_std_${header_name})
endfunction()

Expand Down