diff --git a/cudax/include/cuda/experimental/__algorithm/common.cuh b/cudax/include/cuda/experimental/__algorithm/common.cuh
new file mode 100644
index 00000000000..9dd891f7b28
--- /dev/null
+++ b/cudax/include/cuda/experimental/__algorithm/common.cuh
@@ -0,0 +1,54 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of CUDA Experimental in CUDA C++ Core Libraries,
+// under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+// SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef __CUDAX_ALGORITHM_COMMON
+#define __CUDAX_ALGORITHM_COMMON
+
+#include <cuda/__cccl_config>
+
+#if defined(_CCCL_IMPLICIT_SYSTEM_HEADER_GCC)
+#  pragma GCC system_header
+#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_CLANG)
+#  pragma clang system_header
+#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_MSVC)
+#  pragma system_header
+#endif // no system header
+
+#include <cuda/std/__ranges/concepts.h>
+#include <cuda/std/__type_traits/is_convertible.h>
+#include <cuda/std/span>
+
+#include <cuda/experimental/__launch/launch_transform.cuh>
+
+namespace cuda::experimental
+{
+#if _CCCL_STD_VER >= 2020 && defined(_CCCL_SPAN_USES_RANGES)
+template <typename _Tp>
+concept __valid_copy_fill_argument = _CUDA_VRANGES::contiguous_range<detail::__as_copy_arg_t<_Tp>>;
+
+#else
+template <typename _Tp, typename = int>
+inline constexpr bool __convertible_to_span = false;
+
+template <typename _Tp>
+inline constexpr bool __convertible_to_span<
+  _Tp,
+  _CUDA_VSTD::enable_if_t<
+    _CUDA_VSTD::is_convertible_v<_Tp, _CUDA_VSTD::span<typename _CUDA_VSTD::decay_t<_Tp>::value_type>>,
+    int>> = true;
+
+template <typename _Tp>
+inline constexpr bool __valid_copy_fill_argument =
+  _CUDA_VRANGES::contiguous_range<detail::__as_copy_arg_t<_Tp>> || __convertible_to_span<_Tp>;
+
+#endif
+
+} // namespace cuda::experimental
+#endif //__CUDAX_ALGORITHM_COMMON
diff --git a/cudax/include/cuda/experimental/__algorithm/copy.cuh b/cudax/include/cuda/experimental/__algorithm/copy.cuh
new file mode 100644
index 00000000000..9054bf0ea5e
--- /dev/null
+++ b/cudax/include/cuda/experimental/__algorithm/copy.cuh
@@ -0,0 +1,79 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of CUDA Experimental in CUDA C++ Core Libraries,
+// under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+// SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef __CUDAX_ALGORITHM_COPY
+#define __CUDAX_ALGORITHM_COPY
+
+#include <cuda/__cccl_config>
+
+#if defined(_CCCL_IMPLICIT_SYSTEM_HEADER_GCC)
+#  pragma GCC system_header
+#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_CLANG)
+#  pragma clang system_header
+#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_MSVC)
+#  pragma system_header
+#endif // no system header
+
+#include <cuda/std/__concepts/__concept_macros.h>
+
+#include <cuda/experimental/__algorithm/common.cuh>
+#include <cuda/experimental/__stream/stream_ref.cuh>
+
+namespace cuda::experimental
+{
+
+template <typename _SrcTy, typename _DstTy>
+void __copy_bytes_impl(stream_ref __stream, _CUDA_VSTD::span<_SrcTy> __src, _CUDA_VSTD::span<_DstTy> __dst)
+{
+  static_assert(!_CUDA_VSTD::is_const_v<_DstTy>, "Copy destination can't be const");
+  static_assert(_CUDA_VSTD::is_trivially_copyable_v<_SrcTy> && _CUDA_VSTD::is_trivially_copyable_v<_DstTy>);
+
+  if (__src.size_bytes() > __dst.size_bytes())
+  {
+    _CUDA_VSTD::__throw_invalid_argument("Copy destination is too small to fit the source data");
+  }
+
+  // TODO pass copy direction hint once we have span with properties
+  _CCCL_TRY_CUDA_API(
+    ::cudaMemcpyAsync,
+    "Failed to perform a copy",
+    __dst.data(),
+    __src.data(),
+    __src.size_bytes(),
+    cudaMemcpyDefault,
+    __stream.get());
+}
+
+//! @brief Launches a bytewise memory copy from source to destination into the provided stream.
+//!
+//! Both source and destination needs to either be a `contiguous_range` or implicitly
+//! implicitly/launch transform to one.
+//! Both source and destination type is required to be trivially copyable.
+//!
+//! This call might be synchronous if either source or destination is pagable host memory.
+//! It will be synchronous if both destination and copy is located in host memory.
+//!
+//! @param __stream Stream that the copy should be inserted into
+//! @param __src Source to copy from
+//! @param __dst Destination to copy into
+_LIBCUDACXX_TEMPLATE(typename _SrcTy, typename _DstTy)
+_LIBCUDACXX_REQUIRES(__valid_copy_fill_argument<_SrcTy> _LIBCUDACXX_AND __valid_copy_fill_argument<_DstTy>)
+void copy_bytes(stream_ref __stream, _SrcTy&& __src, _DstTy&& __dst)
+{
+  __copy_bytes_impl(
+    __stream,
+    _CUDA_VSTD::span(static_cast<detail::__as_copy_arg_t<_SrcTy>>(
+      detail::__launch_transform(__stream, _CUDA_VSTD::forward<_SrcTy>(__src)))),
+    _CUDA_VSTD::span(static_cast<detail::__as_copy_arg_t<_DstTy>>(
+      detail::__launch_transform(__stream, _CUDA_VSTD::forward<_DstTy>(__dst)))));
+}
+
+} // namespace cuda::experimental
+#endif // __CUDAX_ALGORITHM_COPY
diff --git a/cudax/include/cuda/experimental/__algorithm/fill.cuh b/cudax/include/cuda/experimental/__algorithm/fill.cuh
new file mode 100644
index 00000000000..4fef2777e87
--- /dev/null
+++ b/cudax/include/cuda/experimental/__algorithm/fill.cuh
@@ -0,0 +1,63 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of CUDA Experimental in CUDA C++ Core Libraries,
+// under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+// SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef __CUDAX_ALGORITHM_FILL
+#define __CUDAX_ALGORITHM_FILL
+
+#include <cuda/__cccl_config>
+
+#if defined(_CCCL_IMPLICIT_SYSTEM_HEADER_GCC)
+#  pragma GCC system_header
+#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_CLANG)
+#  pragma clang system_header
+#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_MSVC)
+#  pragma system_header
+#endif // no system header
+
+#include <cuda/std/__concepts/__concept_macros.h>
+
+#include <cuda/experimental/__algorithm/common.cuh>
+#include <cuda/experimental/__stream/stream_ref.cuh>
+
+namespace cuda::experimental
+{
+
+template <typename _DstTy, ::std::size_t _DstSize>
+void __fill_bytes_impl(stream_ref __stream, _CUDA_VSTD::span<_DstTy, _DstSize> __dst, uint8_t __value)
+{
+  static_assert(!_CUDA_VSTD::is_const_v<_DstTy>, "Fill destination can't be const");
+  static_assert(_CUDA_VSTD::is_trivially_copyable_v<_DstTy>);
+
+  // TODO do a host callback if not device accessible?
+  _CCCL_TRY_CUDA_API(
+    ::cudaMemsetAsync, "Failed to perform a fill", __dst.data(), __value, __dst.size_bytes(), __stream.get());
+}
+
+//! @brief Launches an operation to bytewise fill the memory into the provided stream.
+//!
+//! Destination needs to either be a `contiguous_range` or implicitly/launch transform
+//! into one. It can't reside in pagable host memory.
+//! Destination type is required to be trivially copyable.
+//!
+//! @param __stream Stream that the copy should be inserted into
+//! @param __dst Destination memory to fill
+//! @param __value Value to fill into every byte in the destination
+_LIBCUDACXX_TEMPLATE(typename _DstTy)
+_LIBCUDACXX_REQUIRES(__valid_copy_fill_argument<_DstTy>)
+void fill_bytes(stream_ref __stream, _DstTy&& __dst, uint8_t __value)
+{
+  __fill_bytes_impl(__stream,
+                    _CUDA_VSTD::span(static_cast<detail::__as_copy_arg_t<_DstTy>>(
+                      detail::__launch_transform(__stream, _CUDA_VSTD::forward<_DstTy>(__dst)))),
+                    __value);
+}
+
+} // namespace cuda::experimental
+#endif // __CUDAX_ALGORITHM_FILL
diff --git a/cudax/include/cuda/experimental/__container/uninitialized_async_buffer.cuh b/cudax/include/cuda/experimental/__container/uninitialized_async_buffer.cuh
index e5fc07b2252..d9679c41575 100644
--- a/cudax/include/cuda/experimental/__container/uninitialized_async_buffer.cuh
+++ b/cudax/include/cuda/experimental/__container/uninitialized_async_buffer.cuh
@@ -100,21 +100,24 @@ private:
 
   //! @brief Causes the buffer to be treated as a span when passed to cudax::launch.
   //! @pre The buffer must have the cuda::mr::device_accessible property.
-  _CCCL_NODISCARD_FRIEND _CUDA_VSTD::span<_Tp>
-  __cudax_launch_transform(::cuda::stream_ref, uninitialized_async_buffer& __self) noexcept
+  template <class _Tp2 = _Tp>
+  _CCCL_NODISCARD_FRIEND auto __cudax_launch_transform(::cuda::stream_ref, uninitialized_async_buffer& __self) noexcept
+    _LIBCUDACXX_TRAILING_REQUIRES(_CUDA_VSTD::span<_Tp>)(
+      _CUDA_VSTD::same_as<_Tp, _Tp2>&& _CUDA_VSTD::__is_included_in_v<_CUDA_VMR::device_accessible, _Properties...>)
   {
-    static_assert(_CUDA_VSTD::__is_included_in_v<_CUDA_VMR::device_accessible, _Properties...>,
-                  "The buffer must be device accessible to be passed to `launch`");
+    // TODO add auto synchronization
     return {__self.__get_data(), __self.size()};
   }
 
   //! @brief Causes the buffer to be treated as a span when passed to cudax::launch
   //! @pre The buffer must have the cuda::mr::device_accessible property.
-  _CCCL_NODISCARD_FRIEND _CUDA_VSTD::span<const _Tp>
+  template <class _Tp2 = _Tp>
+  _CCCL_NODISCARD_FRIEND auto
   __cudax_launch_transform(::cuda::stream_ref, const uninitialized_async_buffer& __self) noexcept
+    _LIBCUDACXX_TRAILING_REQUIRES(_CUDA_VSTD::span<const _Tp>)(
+      _CUDA_VSTD::same_as<_Tp, _Tp2>&& _CUDA_VSTD::__is_included_in_v<_CUDA_VMR::device_accessible, _Properties...>)
   {
-    static_assert(_CUDA_VSTD::__is_included_in_v<_CUDA_VMR::device_accessible, _Properties...>,
-                  "The buffer must be device accessible to be passed to `launch`");
+    // TODO add auto synchronization
     return {__self.__get_data(), __self.size()};
   }
 
diff --git a/cudax/include/cuda/experimental/__container/uninitialized_buffer.cuh b/cudax/include/cuda/experimental/__container/uninitialized_buffer.cuh
index 221da6a6e0e..c57e4bc5eac 100644
--- a/cudax/include/cuda/experimental/__container/uninitialized_buffer.cuh
+++ b/cudax/include/cuda/experimental/__container/uninitialized_buffer.cuh
@@ -90,21 +90,21 @@ private:
 
   //! @brief Causes the buffer to be treated as a span when passed to cudax::launch.
   //! @pre The buffer must have the cuda::mr::device_accessible property.
-  _CCCL_NODISCARD_FRIEND _CUDA_VSTD::span<_Tp>
-  __cudax_launch_transform(::cuda::stream_ref, uninitialized_buffer& __self) noexcept
+  template <class _Tp2 = _Tp>
+  _CCCL_NODISCARD_FRIEND auto __cudax_launch_transform(::cuda::stream_ref, uninitialized_buffer& __self) noexcept
+    _LIBCUDACXX_TRAILING_REQUIRES(_CUDA_VSTD::span<_Tp>)(
+      _CUDA_VSTD::same_as<_Tp, _Tp2>&& _CUDA_VSTD::__is_included_in_v<_CUDA_VMR::device_accessible, _Properties...>)
   {
-    static_assert(_CUDA_VSTD::__is_included_in_v<_CUDA_VMR::device_accessible, _Properties...>,
-                  "The buffer must be device accessible to be passed to `launch`");
     return {__self.__get_data(), __self.size()};
   }
 
   //! @brief Causes the buffer to be treated as a span when passed to cudax::launch
   //! @pre The buffer must have the cuda::mr::device_accessible property.
-  _CCCL_NODISCARD_FRIEND _CUDA_VSTD::span<const _Tp>
-  __cudax_launch_transform(::cuda::stream_ref, const uninitialized_buffer& __self) noexcept
+  template <class _Tp2 = _Tp>
+  _CCCL_NODISCARD_FRIEND auto __cudax_launch_transform(::cuda::stream_ref, const uninitialized_buffer& __self) noexcept
+    _LIBCUDACXX_TRAILING_REQUIRES(_CUDA_VSTD::span<const _Tp>)(
+      _CUDA_VSTD::same_as<_Tp, _Tp2>&& _CUDA_VSTD::__is_included_in_v<_CUDA_VMR::device_accessible, _Properties...>)
   {
-    static_assert(_CUDA_VSTD::__is_included_in_v<_CUDA_VMR::device_accessible, _Properties...>,
-                  "The buffer must be device accessible to be passed to `launch`");
     return {__self.__get_data(), __self.size()};
   }
 
diff --git a/cudax/include/cuda/experimental/__launch/launch_transform.cuh b/cudax/include/cuda/experimental/__launch/launch_transform.cuh
index 15ebce61169..284cde01124 100644
--- a/cudax/include/cuda/experimental/__launch/launch_transform.cuh
+++ b/cudax/include/cuda/experimental/__launch/launch_transform.cuh
@@ -59,19 +59,29 @@ template <typename _Arg>
 using __launch_transform_result_t = decltype(__fn{}(::cuda::stream_ref{}, _CUDA_VSTD::declval<_Arg>()));
 
 template <typename _Arg, typename _Enable = void>
-struct __as_kernel_arg
+struct __as_copy_arg
 {
-  using type = _CUDA_VSTD::decay_t<__launch_transform_result_t<_Arg>>;
+  using type = __launch_transform_result_t<_Arg>;
 };
 
+// Copy needs to know if original value is a reference
 template <typename _Arg>
-struct __as_kernel_arg<
-  _Arg,
-  _CUDA_VSTD::void_t<typename _CUDA_VSTD::decay_t<__launch_transform_result_t<_Arg>>::__as_kernel_arg>>
+struct __as_copy_arg<_Arg,
+                     _CUDA_VSTD::void_t<typename _CUDA_VSTD::decay_t<__launch_transform_result_t<_Arg>>::__as_kernel_arg>>
 {
   using type = typename _CUDA_VSTD::decay_t<__launch_transform_result_t<_Arg>>::__as_kernel_arg;
 };
 
+template <typename _Arg>
+using __as_copy_arg_t = typename detail::__as_copy_arg<_Arg>::type;
+
+// While kernel argument can't be a reference
+template <typename _Arg>
+struct __as_kernel_arg
+{
+  using type = _CUDA_VSTD::decay_t<typename __as_copy_arg<_Arg>::type>;
+};
+
 _CCCL_GLOBAL_CONSTANT __fn __launch_transform{};
 } // namespace detail
 
diff --git a/cudax/include/cuda/experimental/algorithm.cuh b/cudax/include/cuda/experimental/algorithm.cuh
new file mode 100644
index 00000000000..0de41b72012
--- /dev/null
+++ b/cudax/include/cuda/experimental/algorithm.cuh
@@ -0,0 +1,17 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of CUDA Experimental in CUDA C++ Core Libraries,
+// under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+// SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef __CUDAX_ALGORITHM__
+#define __CUDAX_ALGORITHM__
+
+#include <cuda/experimental/__algorithm/copy.cuh>
+#include <cuda/experimental/__algorithm/fill.cuh>
+
+#endif // __CUDAX_ALGORITHM__
diff --git a/cudax/test/CMakeLists.txt b/cudax/test/CMakeLists.txt
index 7e71e758d52..38f826c0b4e 100644
--- a/cudax/test/CMakeLists.txt
+++ b/cudax/test/CMakeLists.txt
@@ -122,6 +122,12 @@ foreach(cn_target IN LISTS cudax_TARGETS)
   cudax_add_catch2_test(test_target green_context ${cn_target}
     green_context/green_ctx_smoke.cu
   )
+
+  cudax_add_catch2_test(test_target algorithm ${cn_target}
+    algorithm/fill.cu
+    algorithm/copy.cu
+  )
+
 endforeach()
 
 # FIXME: Enable MSVC
diff --git a/cudax/test/algorithm/common.cuh b/cudax/test/algorithm/common.cuh
new file mode 100644
index 00000000000..2789a1f4802
--- /dev/null
+++ b/cudax/test/algorithm/common.cuh
@@ -0,0 +1,93 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of CUDA Experimental in CUDA C++ Core Libraries,
+// under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+// SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef __ALGORITHM_COMMON__
+#define __ALGORITHM_COMMON__
+
+#include <cuda/memory_resource>
+
+#include <cuda/experimental/algorithm.cuh>
+#include <cuda/experimental/buffer.cuh>
+#include <cuda/experimental/memory_resource.cuh>
+
+#include <catch2/catch.hpp>
+#include <utility.cuh>
+
+inline constexpr uint8_t fill_byte    = 1;
+inline constexpr uint32_t buffer_size = 42;
+
+inline int get_expected_value(uint8_t pattern_byte)
+{
+  int result;
+  memset(&result, pattern_byte, sizeof(int));
+  return result;
+}
+
+template <typename Result>
+void check_result_and_erase(cudax::stream_ref stream, Result&& result, uint8_t pattern_byte = fill_byte)
+{
+  int expected = get_expected_value(pattern_byte);
+
+  stream.wait();
+  for (int& i : result)
+  {
+    CUDAX_REQUIRE(i == expected);
+    i = 0;
+  }
+}
+
+namespace cuda::experimental
+{
+
+// Need a type that goes through all launch_transform steps, but is not a contiguous_range
+struct weird_buffer
+{
+  const cuda::mr::pinned_memory_resource& resource;
+  int* data;
+  std::size_t size;
+
+  weird_buffer(const cuda::mr::pinned_memory_resource& res, std::size_t s)
+      : resource(res)
+      , data((int*) res.allocate(s * sizeof(int)))
+      , size(s)
+  {}
+
+  ~weird_buffer()
+  {
+    resource.deallocate(data, size);
+  }
+
+  weird_buffer(const weird_buffer&) = delete;
+  weird_buffer(weird_buffer&&)      = delete;
+
+  struct transform_result
+  {
+    int* data;
+    std::size_t size;
+
+    using __as_kernel_arg = cuda::std::span<int>;
+
+    operator cuda::std::span<int>()
+    {
+      return {data, size};
+    }
+  };
+
+  _CCCL_NODISCARD_FRIEND transform_result __cudax_launch_transform(cuda::stream_ref, const weird_buffer& self) noexcept
+  {
+    return {self.data, self.size};
+  }
+};
+
+static_assert(std::is_same_v<cudax::as_kernel_arg_t<cudax::weird_buffer>, cuda::std::span<int>>);
+
+} // namespace cuda::experimental
+
+#endif // __ALGORITHM_COMMON__
diff --git a/cudax/test/algorithm/copy.cu b/cudax/test/algorithm/copy.cu
new file mode 100644
index 00000000000..0066f4feba9
--- /dev/null
+++ b/cudax/test/algorithm/copy.cu
@@ -0,0 +1,105 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of CUDA Experimental in CUDA C++ Core Libraries,
+// under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+// SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES.
+//
+//===----------------------------------------------------------------------===//
+
+#include "common.cuh"
+
+TEST_CASE("Copy", "[data_manipulation]")
+{
+  cudax::stream _stream;
+
+  SECTION("Device resource")
+  {
+    cudax::mr::async_memory_resource device_resource;
+    std::vector<int> host_vector(buffer_size);
+
+    {
+      cudax::uninitialized_async_buffer<int, cuda::mr::device_accessible> buffer(device_resource, _stream, buffer_size);
+      cudax::fill_bytes(_stream, buffer, fill_byte);
+
+      cudax::copy_bytes(_stream, buffer, host_vector);
+      check_result_and_erase(_stream, host_vector);
+
+      cudax::copy_bytes(_stream, std::move(buffer), host_vector);
+      check_result_and_erase(_stream, host_vector);
+    }
+    {
+      cudax::uninitialized_async_buffer<int, cuda::mr::device_accessible> not_yet_const_buffer(
+        device_resource, _stream, buffer_size);
+      cudax::fill_bytes(_stream, not_yet_const_buffer, fill_byte);
+
+      const auto& const_buffer = not_yet_const_buffer;
+
+      cudax::copy_bytes(_stream, const_buffer, host_vector);
+      check_result_and_erase(_stream, host_vector);
+
+      cudax::copy_bytes(_stream, const_buffer, cuda::std::span(host_vector));
+      check_result_and_erase(_stream, host_vector);
+    }
+  }
+
+  SECTION("Host and managed resource")
+  {
+    cuda::mr::managed_memory_resource managed_resource;
+    cuda::mr::pinned_memory_resource host_resource;
+
+    {
+      cudax::uninitialized_buffer<int, cuda::mr::host_accessible> host_buffer(host_resource, buffer_size);
+      cudax::uninitialized_buffer<int, cuda::mr::device_accessible> device_buffer(managed_resource, buffer_size);
+
+      cudax::fill_bytes(_stream, host_buffer, fill_byte);
+
+      cudax::copy_bytes(_stream, host_buffer, device_buffer);
+      check_result_and_erase(_stream, device_buffer);
+
+      cudax::copy_bytes(_stream, cuda::std::span(host_buffer), device_buffer);
+      check_result_and_erase(_stream, device_buffer);
+    }
+
+    {
+      cudax::uninitialized_buffer<int, cuda::mr::host_accessible> not_yet_const_host_buffer(host_resource, buffer_size);
+      cudax::uninitialized_buffer<int, cuda::mr::device_accessible> device_buffer(managed_resource, buffer_size);
+      cudax::fill_bytes(_stream, not_yet_const_host_buffer, fill_byte);
+
+      const auto& const_host_buffer = not_yet_const_host_buffer;
+
+      cudax::copy_bytes(_stream, const_host_buffer, device_buffer);
+      check_result_and_erase(_stream, device_buffer);
+
+      cudax::copy_bytes(_stream, cuda::std::span(const_host_buffer), device_buffer);
+      check_result_and_erase(_stream, device_buffer);
+    }
+  }
+  SECTION("Launch transform")
+  {
+    cuda::mr::pinned_memory_resource host_resource;
+    cudax::weird_buffer input(host_resource, buffer_size);
+    cudax::weird_buffer output(host_resource, buffer_size);
+
+    memset(input.data, fill_byte, input.size * sizeof(int));
+
+    cudax::copy_bytes(_stream, input, output);
+    check_result_and_erase(_stream, cuda::std::span(output.data, output.size));
+  }
+
+  SECTION("Asymetric size")
+  {
+    cuda::mr::pinned_memory_resource host_resource;
+    cudax::uninitialized_buffer<int, cuda::mr::host_accessible> host_buffer(host_resource, 1);
+    cudax::fill_bytes(_stream, host_buffer, fill_byte);
+
+    ::std::vector<int> vec(buffer_size, 0xbeef);
+
+    cudax::copy_bytes(_stream, host_buffer, vec);
+    _stream.wait();
+
+    CUDAX_REQUIRE(vec[0] == get_expected_value(fill_byte));
+    CUDAX_REQUIRE(vec[1] == 0xbeef);
+  }
+}
diff --git a/cudax/test/algorithm/fill.cu b/cudax/test/algorithm/fill.cu
new file mode 100644
index 00000000000..7111aa848f3
--- /dev/null
+++ b/cudax/test/algorithm/fill.cu
@@ -0,0 +1,46 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of CUDA Experimental in CUDA C++ Core Libraries,
+// under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+// SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES.
+//
+//===----------------------------------------------------------------------===//
+
+#include "common.cuh"
+
+TEST_CASE("Fill", "[data_manipulation]")
+{
+  cudax::stream _stream;
+  SECTION("Host resource")
+  {
+    cuda::mr::pinned_memory_resource host_resource;
+    cudax::uninitialized_buffer<int, cuda::mr::device_accessible> buffer(host_resource, buffer_size);
+
+    cudax::fill_bytes(_stream, buffer, fill_byte);
+
+    check_result_and_erase(_stream, cuda::std::span(buffer));
+  }
+
+  SECTION("Device resource")
+  {
+    cuda::mr::device_memory_resource device_resource;
+    cudax::uninitialized_buffer<int, cuda::mr::device_accessible> buffer(device_resource, buffer_size);
+    cudax::fill_bytes(_stream, buffer, fill_byte);
+
+    std::vector<int> host_vector(42);
+    CUDART(cudaMemcpyAsync(
+      host_vector.data(), buffer.data(), buffer.size() * sizeof(int), cudaMemcpyDefault, _stream.get()));
+
+    check_result_and_erase(_stream, host_vector);
+  }
+  SECTION("Launch transform")
+  {
+    cuda::mr::pinned_memory_resource host_resource;
+    cudax::weird_buffer buffer(host_resource, buffer_size);
+
+    cudax::fill_bytes(_stream, buffer, fill_byte);
+    check_result_and_erase(_stream, cuda::std::span(buffer.data, buffer.size));
+  }
+}