Skip to content

Commit e10c75d

Browse files
fbusatomiscco
authored andcommitted
Expose parts of <cuda/std/memory> (NVIDIA#2502)
This exposes some parts of `<memory>` that are frequently used and safe to use everywhere. We do not expose some features like allocators and smart pointers until we are sure that they are usefull and properly implemented. Co-authored-by: Michael Schellenberger Costa <[email protected]>
1 parent dca8e39 commit e10c75d

29 files changed

+329
-77
lines changed

docs/libcudacxx/standard_api.rst

+2
Original file line numberDiff line numberDiff line change
@@ -74,6 +74,8 @@ Feature availability:
7474
they need to be used similar to type traits as language concepts
7575
are not available.
7676

77+
- C++20 ``std::assume_aligned`` in ``<memory>`` is available in C++11.
78+
7779
- C++20 ``<ranges>`` are available in C++17.
7880

7981
- all ``<ranges>`` concepts are available in C++17. However, they

docs/libcudacxx/standard_api/utility_library.rst

+4
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@ Utility Library
1010
utility_library/bitset
1111
utility_library/expected
1212
utility_library/functional
13+
utility_library/memory
1314
utility_library/optional
1415
utility_library/tuple
1516
utility_library/type_traits
@@ -34,6 +35,9 @@ the information about the individual features for details.
3435
- Optional value with error channel
3536
- CCCL 2.3.0 / CUDA 12.4
3637
* - :ref:`libcudacxx-standard-api-utility-functional`
38+
- ``std::assume_aligned``
39+
- CCCL 2.9.0 / CUDA 12.9
40+
* - :ref:`libcudacxx-standard-api-utility-memory`
3741
- Function objects and function wrappers
3842
- libcu++ 1.1.0 / CCCL 2.0.0 / CUDA 11.2
3943
* - :ref:`libcudacxx-standard-api-utility-optional`
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
.. _libcudacxx-standard-api-utility-memory:
2+
3+
<cuda/std/memory>
4+
===================
5+
6+
Provided functionalities
7+
------------------------
8+
9+
- ``cuda::std::addressof``. See the C++ documentation of `std::addressof <https://en.cppreference.com/w/cpp/memory/addressof>`_
10+
- ``cuda::std::align``. See the C++ documentation of `std::align <https://en.cppreference.com/w/cpp/memory/align>`_
11+
- ``cuda::std::assume_aligned``. See the C++ documentation of `std::assume_aligned <https://en.cppreference.com/w/cpp/memory/assume_aligned>`_
12+
- Uninitialized memory algorithms. See the C++ documentation `<https://en.cppreference.com/w/cpp/memory>`_
13+
14+
Extensions
15+
----------
16+
17+
- Most features are available from C++11 onwards.
18+
- ``cuda::std::addressof`` is constexpr from C++11 on if compiler support is available
19+
- ``cuda::std::assume_aligned`` is constexpr from C++14 on
20+
21+
Restrictions
22+
------------
23+
24+
- `construct_at` and is only available in C++20 as that is explicitly mentioned in the standard
25+
- The specialized memory algorithms are not parallel

libcudacxx/include/cuda/std/__cccl/builtin.h

+11-1
Original file line numberDiff line numberDiff line change
@@ -84,7 +84,17 @@
8484

8585
#if _CCCL_HAS_BUILTIN(__array_extent)
8686
# define _CCCL_BUILTIN_ARRAY_EXTENT(...) __array_extent(__VA_ARGS__)
87-
#endif // _CCCL_HAS_BUILTIN(array_extent)
87+
#endif // _CCCL_HAS_BUILTIN(__array_extent)
88+
89+
#if _CCCL_HAS_BUILTIN(__builtin_assume_aligned) || (defined(_CCCL_COMPILER_MSVC) && _CCCL_MSVC_VERSION >= 1923) \
90+
|| defined(_CCCL_COMPILER_GCC)
91+
# define _CCCL_BUILTIN_ASSUME_ALIGNED(...) __builtin_assume_aligned(__VA_ARGS__)
92+
#endif // _CCCL_HAS_BUILTIN(__builtin_assume_aligned)
93+
94+
// NVCC below 11.2 treats this as a host only function
95+
#if defined(_CCCL_CUDACC_BELOW_11_2)
96+
# undef _CCCL_BUILTIN_ASSUME_ALIGNED
97+
#endif // _CCCL_CUDACC_BELOW_11_2
8898

8999
// nvhpc has a bug where it supports __builtin_addressof but does not mark it via _CCCL_CHECK_BUILTIN
90100
#if _CCCL_CHECK_BUILTIN(builtin_addressof) || (defined(_CCCL_COMPILER_GCC) && _CCCL_GCC_VERSION >= 70000) \
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,53 @@
1+
// -*- C++ -*-
2+
//===----------------------------------------------------------------------===//
3+
//
4+
// Part of libcu++, the C++ Standard Library for your entire system,
5+
// under the Apache License v2.0 with LLVM Exceptions.
6+
// See https://llvm.org/LICENSE.txt for license information.
7+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
8+
// SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES.
9+
//
10+
//===----------------------------------------------------------------------===//
11+
12+
#ifndef _LIBCUDACXX___MEMORY_ASSUME_ALIGNED_H
13+
#define _LIBCUDACXX___MEMORY_ASSUME_ALIGNED_H
14+
15+
#include <cuda/std/detail/__config>
16+
17+
#if defined(_CCCL_IMPLICIT_SYSTEM_HEADER_GCC)
18+
# pragma GCC system_header
19+
#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_CLANG)
20+
# pragma clang system_header
21+
#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_MSVC)
22+
# pragma system_header
23+
#endif // no system header
24+
25+
#include <cuda/std/__bit/has_single_bit.h>
26+
#include <cuda/std/__type_traits/is_constant_evaluated.h>
27+
#include <cuda/std/cstddef> // size_t
28+
#include <cuda/std/cstdint> // uintptr_t
29+
30+
_LIBCUDACXX_BEGIN_NAMESPACE_STD
31+
32+
template <size_t _Align, class _Tp>
33+
_CCCL_NODISCARD _LIBCUDACXX_HIDE_FROM_ABI _CCCL_CONSTEXPR_CXX14 _Tp* assume_aligned(_Tp* __ptr) noexcept
34+
{
35+
static_assert(_CUDA_VSTD::has_single_bit(_Align), "std::assume_aligned requires the alignment to be a power of 2!");
36+
#if defined(_CCCL_BUILTIN_IS_CONSTANT_EVALUATED) && defined(_CCCL_BUILTIN_ASSUME_ALIGNED)
37+
if (!_CCCL_BUILTIN_IS_CONSTANT_EVALUATED())
38+
{
39+
# if !defined(_CCCL_COMPILER_MSVC) // MSVC checks within the builtin
40+
_CCCL_ASSERT(reinterpret_cast<uintptr_t>(__ptr) % _Align == 0, "Alignment assumption is violated");
41+
# endif // !_CCCL_COMPILER_MSVC
42+
return static_cast<_Tp*>(_CCCL_BUILTIN_ASSUME_ALIGNED(__ptr, _Align));
43+
}
44+
else
45+
#endif // _CCCL_BUILTIN_IS_CONSTANT_EVALUATED && _CCCL_BUILTIN_ASSUME_ALIGNED
46+
{
47+
return __ptr;
48+
}
49+
}
50+
51+
_LIBCUDACXX_END_NAMESPACE_STD
52+
53+
#endif // _LIBCUDACXX___MEMORY_ASSUME_ALIGNED_H

libcudacxx/include/cuda/std/__memory/construct_at.h

+11-19
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
55
// See https://llvm.org/LICENSE.txt for license information.
66
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7-
// SPDX-FileCopyrightText: Copyright (c) 2023 NVIDIA CORPORATION & AFFILIATES
7+
// SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES
88
//
99
//===----------------------------------------------------------------------===//
1010

@@ -185,8 +185,8 @@ _LIBCUDACXX_HIDE_FROM_ABI _CCCL_CONSTEXPR_CXX14 _ForwardIterator __destroy(_Forw
185185

186186
_CCCL_EXEC_CHECK_DISABLE
187187
template <class _Tp,
188-
__enable_if_t<!is_array<_Tp>::value, int> = 0,
189-
__enable_if_t<!is_trivially_destructible<_Tp>::value, int> = 0>
188+
__enable_if_t<!_CCCL_TRAIT(is_array, _Tp), int> = 0,
189+
__enable_if_t<!_CCCL_TRAIT(is_trivially_destructible, _Tp), int> = 0>
190190
_LIBCUDACXX_HIDE_FROM_ABI _CCCL_CONSTEXPR_CXX14 void __destroy_at(_Tp* __loc)
191191
{
192192
_CCCL_ASSERT(__loc != nullptr, "null pointer given to destroy_at");
@@ -195,22 +195,20 @@ _LIBCUDACXX_HIDE_FROM_ABI _CCCL_CONSTEXPR_CXX14 void __destroy_at(_Tp* __loc)
195195

196196
_CCCL_EXEC_CHECK_DISABLE
197197
template <class _Tp,
198-
__enable_if_t<!is_array<_Tp>::value, int> = 0,
199-
__enable_if_t<is_trivially_destructible<_Tp>::value, int> = 0>
198+
__enable_if_t<!_CCCL_TRAIT(is_array, _Tp), int> = 0,
199+
__enable_if_t<_CCCL_TRAIT(is_trivially_destructible, _Tp), int> = 0>
200200
_LIBCUDACXX_HIDE_FROM_ABI _CCCL_CONSTEXPR_CXX14 void __destroy_at(_Tp* __loc)
201201
{
202202
_CCCL_ASSERT(__loc != nullptr, "null pointer given to destroy_at");
203203
(void) __loc;
204204
}
205205

206-
#if _CCCL_STD_VER >= 2020
207-
template <class _Tp, __enable_if_t<is_array<_Tp>::value, int> = 0>
208-
_LIBCUDACXX_HIDE_FROM_ABI constexpr void __destroy_at(_Tp* __loc)
206+
template <class _Tp, __enable_if_t<_CCCL_TRAIT(is_array, _Tp), int> = 0>
207+
_LIBCUDACXX_HIDE_FROM_ABI _CCCL_CONSTEXPR_CXX14 void __destroy_at(_Tp* __loc)
209208
{
210209
_CCCL_ASSERT(__loc != nullptr, "null pointer given to destroy_at");
211210
_CUDA_VSTD::__destroy(_CUDA_VSTD::begin(*__loc), _CUDA_VSTD::end(*__loc));
212211
}
213-
#endif // _CCCL_STD_VER >= 2020
214212

215213
template <class _ForwardIterator>
216214
_LIBCUDACXX_HIDE_FROM_ABI _CCCL_CONSTEXPR_CXX14 _ForwardIterator
@@ -235,22 +233,18 @@ __reverse_destroy(_BidirectionalIterator __first, _BidirectionalIterator __last)
235233
return __last;
236234
}
237235

238-
#if _CCCL_STD_VER >= 2017
239-
240-
template <class _Tp, enable_if_t<!is_array_v<_Tp>, int> = 0>
241-
_LIBCUDACXX_HIDE_FROM_ABI _CCCL_CONSTEXPR_CXX20 void destroy_at(_Tp* __loc) noexcept
236+
template <class _Tp, __enable_if_t<!_CCCL_TRAIT(is_array, _Tp), int> = 0>
237+
_LIBCUDACXX_HIDE_FROM_ABI _CCCL_CONSTEXPR_CXX20 void destroy_at(_Tp* __loc)
242238
{
243239
_CCCL_ASSERT(__loc != nullptr, "null pointer given to destroy_at");
244240
__loc->~_Tp();
245241
}
246242

247-
# if _CCCL_STD_VER >= 2020
248-
template <class _Tp, enable_if_t<is_array_v<_Tp>, int> = 0>
249-
_LIBCUDACXX_HIDE_FROM_ABI _CCCL_CONSTEXPR_CXX20 void destroy_at(_Tp* __loc) noexcept
243+
template <class _Tp, __enable_if_t<_CCCL_TRAIT(is_array, _Tp), int> = 0>
244+
_LIBCUDACXX_HIDE_FROM_ABI _CCCL_CONSTEXPR_CXX20 void destroy_at(_Tp* __loc)
250245
{
251246
_CUDA_VSTD::__destroy_at(__loc);
252247
}
253-
# endif // _CCCL_STD_VER >= 2020
254248

255249
template <class _ForwardIterator>
256250
_LIBCUDACXX_HIDE_FROM_ABI _CCCL_CONSTEXPR_CXX20 void destroy(_ForwardIterator __first, _ForwardIterator __last) noexcept
@@ -268,8 +262,6 @@ _LIBCUDACXX_HIDE_FROM_ABI _CCCL_CONSTEXPR_CXX20 _ForwardIterator destroy_n(_Forw
268262
return __first;
269263
}
270264

271-
#endif // _CCCL_STD_VER >= 2017
272-
273265
_LIBCUDACXX_END_NAMESPACE_STD
274266

275267
#endif // _LIBCUDACXX___MEMORY_CONSTRUCT_AT_H

libcudacxx/include/cuda/std/__memory_

+5-16
Original file line numberDiff line numberDiff line change
@@ -8,8 +8,8 @@
88
//
99
//===----------------------------------------------------------------------===//
1010

11-
#ifndef _CUDA_STD_MEMORY
12-
#define _CUDA_STD_MEMORY
11+
#ifndef _CUDA_STD___MEMORY_
12+
#define _CUDA_STD___MEMORY_
1313

1414
#include <cuda/std/detail/__config>
1515

@@ -21,25 +21,14 @@
2121
# pragma system_header
2222
#endif // no system header
2323

24-
#include <cuda/std/__memory/addressof.h>
25-
#include <cuda/std/__memory/align.h>
2624
#include <cuda/std/__memory/allocate_at_least.h>
2725
#include <cuda/std/__memory/allocation_guard.h>
2826
#include <cuda/std/__memory/allocator.h>
2927
#include <cuda/std/__memory/allocator_arg_t.h>
3028
#include <cuda/std/__memory/allocator_traits.h>
31-
#include <cuda/std/__memory/construct_at.h>
32-
#include <cuda/std/__memory/pointer_traits.h>
33-
#include <cuda/std/__memory/uninitialized_algorithms.h>
29+
#include <cuda/std/__memory/temporary_buffer.h>
3430
#include <cuda/std/__memory/unique_ptr.h>
3531
#include <cuda/std/__memory/uses_allocator.h>
32+
#include <cuda/std/memory>
3633

37-
// standard-mandated includes
38-
#include <cuda/std/version>
39-
40-
// [memory.syn]
41-
#ifndef _LIBCUDACXX_HAS_NO_SPACESHIP_OPERATOR
42-
# include <cuda/std/compare>
43-
#endif // !_LIBCUDACXX_HAS_NO_SPACESHIP_OPERATOR
44-
45-
#endif // _CUDA_STD_MEMORY
34+
#endif // _CUDA_STD___MEMORY_

libcudacxx/include/cuda/std/memory

+39
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,39 @@
1+
//===----------------------------------------------------------------------===//
2+
//
3+
// Part of libcu++, the C++ Standard Library for your entire system,
4+
// under the Apache License v2.0 with LLVM Exceptions.
5+
// See https://llvm.org/LICENSE.txt for license information.
6+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7+
// SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES.
8+
//
9+
//===----------------------------------------------------------------------===//
10+
11+
#ifndef _CUDA_STD_MEMORY
12+
#define _CUDA_STD_MEMORY
13+
14+
#include <cuda/std/detail/__config>
15+
16+
#if defined(_CCCL_IMPLICIT_SYSTEM_HEADER_GCC)
17+
# pragma GCC system_header
18+
#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_CLANG)
19+
# pragma clang system_header
20+
#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_MSVC)
21+
# pragma system_header
22+
#endif // no system header
23+
24+
#include <cuda/std/__memory/addressof.h>
25+
#include <cuda/std/__memory/align.h>
26+
#include <cuda/std/__memory/assume_aligned.h>
27+
#include <cuda/std/__memory/construct_at.h>
28+
#include <cuda/std/__memory/pointer_traits.h>
29+
#include <cuda/std/__memory/uninitialized_algorithms.h>
30+
31+
// standard-mandated includes
32+
#include <cuda/std/version>
33+
34+
// [memory.syn]
35+
#ifndef _LIBCUDACXX_HAS_NO_SPACESHIP_OPERATOR
36+
# include <cuda/std/compare>
37+
#endif // !_LIBCUDACXX_HAS_NO_SPACESHIP_OPERATOR
38+
39+
#endif // _CUDA_STD_MEMORY

libcudacxx/test/libcudacxx/std/containers/sequences/array/size_and_alignment.pass.cpp

+1-1
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@
2929
TEST_NV_DIAG_SUPPRESS(cuda_demote_unsupported_floating_point)
3030

3131
#if defined(TEST_COMPILER_MSVC)
32-
# pragma warning(disable : 4324)
32+
# pragma warning(disable : 4324) // structure was padded due to alignment specifier
3333
#endif // TEST_COMPILER_MSVC
3434

3535
template <class T, cuda::std::size_t Size>

libcudacxx/test/libcudacxx/std/utilities/memory/default.allocator/allocator.members/allocate.pass.cpp

+1-1
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@
2323
#include "test_macros.h"
2424

2525
#if defined(TEST_COMPILER_MSVC)
26-
# pragma warning(disable : 4324)
26+
# pragma warning(disable : 4324) // structure was padded due to alignment specifier
2727
#endif // TEST_COMPILER_MSVC
2828

2929
#ifdef TEST_HAS_NO_ALIGNED_ALLOCATION

libcudacxx/test/libcudacxx/std/utilities/memory/ptr.align/align.pass.cpp

+1-1
Original file line numberDiff line numberDiff line change
@@ -11,9 +11,9 @@
1111

1212
// void* align(size_t alignment, size_t size, void*& ptr, size_t& space);
1313

14-
#include <cuda/std/__memory_>
1514
#include <cuda/std/cassert>
1615
#include <cuda/std/cstddef>
16+
#include <cuda/std/memory>
1717

1818
#include "test_macros.h"
1919

Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
//===----------------------------------------------------------------------===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
// SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES.
7+
//
8+
//===----------------------------------------------------------------------===//
9+
10+
// #include <memory>
11+
12+
// template<size_t N, class T>
13+
// [[nodiscard]] constexpr T* assume_aligned(T* ptr);
14+
15+
// UNSUPPORTED: nvrtc
16+
// nvrtc currently compiles the test with a warning
17+
18+
#include <cuda/std/memory>
19+
20+
__host__ __device__ void f()
21+
{
22+
int* p = nullptr;
23+
cuda::std::assume_aligned<4>(p); // expected-warning {{ignoring return value of function declared with 'nodiscard'
24+
// attribute}}
25+
}
26+
27+
int main(int, char**)
28+
{
29+
return 0;
30+
}

0 commit comments

Comments
 (0)