-
Notifications
You must be signed in to change notification settings - Fork 188
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
[CUDAX] Add initial bits of copy_bytes and fill_bytes (#2608)
- Loading branch information
Showing
11 changed files
with
496 additions
and
20 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,54 @@ | ||
//===----------------------------------------------------------------------===// | ||
// | ||
// Part of CUDA Experimental in CUDA C++ Core Libraries, | ||
// under the Apache License v2.0 with LLVM Exceptions. | ||
// See https://llvm.org/LICENSE.txt for license information. | ||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | ||
// SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. | ||
// | ||
//===----------------------------------------------------------------------===// | ||
|
||
#ifndef __CUDAX_ALGORITHM_COMMON | ||
#define __CUDAX_ALGORITHM_COMMON | ||
|
||
#include <cuda/__cccl_config> | ||
|
||
#if defined(_CCCL_IMPLICIT_SYSTEM_HEADER_GCC) | ||
# pragma GCC system_header | ||
#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_CLANG) | ||
# pragma clang system_header | ||
#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_MSVC) | ||
# pragma system_header | ||
#endif // no system header | ||
|
||
#include <cuda/std/__ranges/concepts.h> | ||
#include <cuda/std/__type_traits/is_convertible.h> | ||
#include <cuda/std/span> | ||
|
||
#include <cuda/experimental/__launch/launch_transform.cuh> | ||
|
||
namespace cuda::experimental | ||
{ | ||
#if _CCCL_STD_VER >= 2020 && defined(_CCCL_SPAN_USES_RANGES) | ||
template <typename _Tp> | ||
concept __valid_copy_fill_argument = _CUDA_VRANGES::contiguous_range<detail::__as_copy_arg_t<_Tp>>; | ||
|
||
#else | ||
template <typename _Tp, typename = int> | ||
inline constexpr bool __convertible_to_span = false; | ||
|
||
template <typename _Tp> | ||
inline constexpr bool __convertible_to_span< | ||
_Tp, | ||
_CUDA_VSTD::enable_if_t< | ||
_CUDA_VSTD::is_convertible_v<_Tp, _CUDA_VSTD::span<typename _CUDA_VSTD::decay_t<_Tp>::value_type>>, | ||
int>> = true; | ||
|
||
template <typename _Tp> | ||
inline constexpr bool __valid_copy_fill_argument = | ||
_CUDA_VRANGES::contiguous_range<detail::__as_copy_arg_t<_Tp>> || __convertible_to_span<_Tp>; | ||
|
||
#endif | ||
|
||
} // namespace cuda::experimental | ||
#endif //__CUDAX_ALGORITHM_COMMON |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,79 @@ | ||
//===----------------------------------------------------------------------===// | ||
// | ||
// Part of CUDA Experimental in CUDA C++ Core Libraries, | ||
// under the Apache License v2.0 with LLVM Exceptions. | ||
// See https://llvm.org/LICENSE.txt for license information. | ||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | ||
// SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. | ||
// | ||
//===----------------------------------------------------------------------===// | ||
|
||
#ifndef __CUDAX_ALGORITHM_COPY | ||
#define __CUDAX_ALGORITHM_COPY | ||
|
||
#include <cuda/__cccl_config> | ||
|
||
#if defined(_CCCL_IMPLICIT_SYSTEM_HEADER_GCC) | ||
# pragma GCC system_header | ||
#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_CLANG) | ||
# pragma clang system_header | ||
#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_MSVC) | ||
# pragma system_header | ||
#endif // no system header | ||
|
||
#include <cuda/std/__concepts/__concept_macros.h> | ||
|
||
#include <cuda/experimental/__algorithm/common.cuh> | ||
#include <cuda/experimental/__stream/stream_ref.cuh> | ||
|
||
namespace cuda::experimental | ||
{ | ||
|
||
template <typename _SrcTy, typename _DstTy> | ||
void __copy_bytes_impl(stream_ref __stream, _CUDA_VSTD::span<_SrcTy> __src, _CUDA_VSTD::span<_DstTy> __dst) | ||
{ | ||
static_assert(!_CUDA_VSTD::is_const_v<_DstTy>, "Copy destination can't be const"); | ||
static_assert(_CUDA_VSTD::is_trivially_copyable_v<_SrcTy> && _CUDA_VSTD::is_trivially_copyable_v<_DstTy>); | ||
|
||
if (__src.size_bytes() > __dst.size_bytes()) | ||
{ | ||
_CUDA_VSTD::__throw_invalid_argument("Copy destination is too small to fit the source data"); | ||
} | ||
|
||
// TODO pass copy direction hint once we have span with properties | ||
_CCCL_TRY_CUDA_API( | ||
::cudaMemcpyAsync, | ||
"Failed to perform a copy", | ||
__dst.data(), | ||
__src.data(), | ||
__src.size_bytes(), | ||
cudaMemcpyDefault, | ||
__stream.get()); | ||
} | ||
|
||
//! @brief Launches a bytewise memory copy from source to destination into the provided stream. | ||
//! | ||
//! Both source and destination needs to either be a `contiguous_range` or implicitly | ||
//! implicitly/launch transform to one. | ||
//! Both source and destination type is required to be trivially copyable. | ||
//! | ||
//! This call might be synchronous if either source or destination is pagable host memory. | ||
//! It will be synchronous if both destination and copy is located in host memory. | ||
//! | ||
//! @param __stream Stream that the copy should be inserted into | ||
//! @param __src Source to copy from | ||
//! @param __dst Destination to copy into | ||
_LIBCUDACXX_TEMPLATE(typename _SrcTy, typename _DstTy) | ||
_LIBCUDACXX_REQUIRES(__valid_copy_fill_argument<_SrcTy> _LIBCUDACXX_AND __valid_copy_fill_argument<_DstTy>) | ||
void copy_bytes(stream_ref __stream, _SrcTy&& __src, _DstTy&& __dst) | ||
{ | ||
__copy_bytes_impl( | ||
__stream, | ||
_CUDA_VSTD::span(static_cast<detail::__as_copy_arg_t<_SrcTy>>( | ||
detail::__launch_transform(__stream, _CUDA_VSTD::forward<_SrcTy>(__src)))), | ||
_CUDA_VSTD::span(static_cast<detail::__as_copy_arg_t<_DstTy>>( | ||
detail::__launch_transform(__stream, _CUDA_VSTD::forward<_DstTy>(__dst))))); | ||
} | ||
|
||
} // namespace cuda::experimental | ||
#endif // __CUDAX_ALGORITHM_COPY |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,63 @@ | ||
//===----------------------------------------------------------------------===// | ||
// | ||
// Part of CUDA Experimental in CUDA C++ Core Libraries, | ||
// under the Apache License v2.0 with LLVM Exceptions. | ||
// See https://llvm.org/LICENSE.txt for license information. | ||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | ||
// SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. | ||
// | ||
//===----------------------------------------------------------------------===// | ||
|
||
#ifndef __CUDAX_ALGORITHM_FILL | ||
#define __CUDAX_ALGORITHM_FILL | ||
|
||
#include <cuda/__cccl_config> | ||
|
||
#if defined(_CCCL_IMPLICIT_SYSTEM_HEADER_GCC) | ||
# pragma GCC system_header | ||
#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_CLANG) | ||
# pragma clang system_header | ||
#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_MSVC) | ||
# pragma system_header | ||
#endif // no system header | ||
|
||
#include <cuda/std/__concepts/__concept_macros.h> | ||
|
||
#include <cuda/experimental/__algorithm/common.cuh> | ||
#include <cuda/experimental/__stream/stream_ref.cuh> | ||
|
||
namespace cuda::experimental | ||
{ | ||
|
||
template <typename _DstTy, ::std::size_t _DstSize> | ||
void __fill_bytes_impl(stream_ref __stream, _CUDA_VSTD::span<_DstTy, _DstSize> __dst, uint8_t __value) | ||
{ | ||
static_assert(!_CUDA_VSTD::is_const_v<_DstTy>, "Fill destination can't be const"); | ||
static_assert(_CUDA_VSTD::is_trivially_copyable_v<_DstTy>); | ||
|
||
// TODO do a host callback if not device accessible? | ||
_CCCL_TRY_CUDA_API( | ||
::cudaMemsetAsync, "Failed to perform a fill", __dst.data(), __value, __dst.size_bytes(), __stream.get()); | ||
} | ||
|
||
//! @brief Launches an operation to bytewise fill the memory into the provided stream. | ||
//! | ||
//! Destination needs to either be a `contiguous_range` or implicitly/launch transform | ||
//! into one. It can't reside in pagable host memory. | ||
//! Destination type is required to be trivially copyable. | ||
//! | ||
//! @param __stream Stream that the copy should be inserted into | ||
//! @param __dst Destination memory to fill | ||
//! @param __value Value to fill into every byte in the destination | ||
_LIBCUDACXX_TEMPLATE(typename _DstTy) | ||
_LIBCUDACXX_REQUIRES(__valid_copy_fill_argument<_DstTy>) | ||
void fill_bytes(stream_ref __stream, _DstTy&& __dst, uint8_t __value) | ||
{ | ||
__fill_bytes_impl(__stream, | ||
_CUDA_VSTD::span(static_cast<detail::__as_copy_arg_t<_DstTy>>( | ||
detail::__launch_transform(__stream, _CUDA_VSTD::forward<_DstTy>(__dst)))), | ||
__value); | ||
} | ||
|
||
} // namespace cuda::experimental | ||
#endif // __CUDAX_ALGORITHM_FILL |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,17 @@ | ||
//===----------------------------------------------------------------------===// | ||
// | ||
// Part of CUDA Experimental in CUDA C++ Core Libraries, | ||
// under the Apache License v2.0 with LLVM Exceptions. | ||
// See https://llvm.org/LICENSE.txt for license information. | ||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | ||
// SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. | ||
// | ||
//===----------------------------------------------------------------------===// | ||
|
||
#ifndef __CUDAX_ALGORITHM__ | ||
#define __CUDAX_ALGORITHM__ | ||
|
||
#include <cuda/experimental/__algorithm/copy.cuh> | ||
#include <cuda/experimental/__algorithm/fill.cuh> | ||
|
||
#endif // __CUDAX_ALGORITHM__ |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.