From 82cff38a1c41394b50386f254f6227d9ba5c5d88 Mon Sep 17 00:00:00 2001
From: Eric Niebler <eniebler@nvidia.com>
Date: Tue, 17 Dec 2024 13:58:45 -0800
Subject: [PATCH] new type-erased memory resources (#2824)

---
 .../uninitialized_async_buffer.cuh            |    9 +-
 .../__container/uninitialized_buffer.cuh      |    9 +-
 .../__memory_resource/any_resource.cuh        | 1009 +++++++++++++----
 .../__memory_resource/device_memory_pool.cuh  |    2 +-
 .../device_memory_resource.cuh                |    6 +-
 .../managed_memory_resource.cuh               |   22 +-
 .../pinned_memory_resource.cuh                |   10 +-
 .../__memory_resource/shared_resource.cuh     |    2 +-
 .../__utility/basic_any/basic_any_base.cuh    |   13 +-
 .../__utility/basic_any/conversions.cuh       |  126 +-
 .../__utility/basic_any/interfaces.cuh        |    2 +-
 .../__utility/basic_any/semiregular.cuh       |   81 ++
 cudax/test/containers/uninitialized_buffer.cu |   27 +-
 .../memory_resource/any_async_resource.cu     |   24 +-
 cudax/test/memory_resource/any_resource.cu    |  156 ++-
 .../memory_resource/device_memory_resource.cu |   85 +-
 .../managed_memory_resource.cu                |   32 +-
 .../memory_resource/pinned_memory_resource.cu |   34 +-
 cudax/test/memory_resource/shared_resource.cu |    8 +-
 cudax/test/memory_resource/test_resource.cuh  |   26 +-
 .../extended_api/memory_resource.rst          |    2 +-
 .../include/cuda/__memory_resource/resource.h |    8 +-
 .../cuda/__memory_resource/resource_ref.h     |   50 +-
 23 files changed, 1293 insertions(+), 450 deletions(-)
diff --git a/cudax/include/cuda/experimental/__container/uninitialized_async_buffer.cuh b/cudax/include/cuda/experimental/__container/uninitialized_async_buffer.cuh
index d6259a7c077..3f55084dc63 100644
--- a/cudax/include/cuda/experimental/__container/uninitialized_async_buffer.cuh
+++ b/cudax/include/cuda/experimental/__container/uninitialized_async_buffer.cuh
@@ -22,7 +22,6 @@
 #endif // no system header
 
 #include <cuda/__memory_resource/properties.h>
-#include <cuda/__memory_resource/resource_ref.h>
 #include <cuda/std/__memory/align.h>
 #include <cuda/std/__new/launder.h>
 #include <cuda/std/__type_traits/type_set.h>
@@ -118,7 +117,7 @@ private:
   _CCCL_NODISCARD_FRIEND _CCCL_HIDE_FROM_ABI auto
   __cudax_launch_transform(::cuda::stream_ref, uninitialized_async_buffer& __self) noexcept
     _CCCL_TRAILING_REQUIRES(_CUDA_VSTD::span<_Tp>)(
-      _CUDA_VSTD::same_as<_Tp, _Tp2>&& _CUDA_VSTD::__is_included_in_v<mr::device_accessible, _Properties...>)
+      _CUDA_VSTD::same_as<_Tp, _Tp2>&& _CUDA_VSTD::__is_included_in_v<device_accessible, _Properties...>)
   {
     // TODO add auto synchronization
     return {__self.__get_data(), __self.size()};
@@ -130,7 +129,7 @@ private:
   _CCCL_NODISCARD_FRIEND _CCCL_HIDE_FROM_ABI auto
   __cudax_launch_transform(::cuda::stream_ref, const uninitialized_async_buffer& __self) noexcept
     _CCCL_TRAILING_REQUIRES(_CUDA_VSTD::span<const _Tp>)(
-      _CUDA_VSTD::same_as<_Tp, _Tp2>&& _CUDA_VSTD::__is_included_in_v<mr::device_accessible, _Properties...>)
+      _CUDA_VSTD::same_as<_Tp, _Tp2>&& _CUDA_VSTD::__is_included_in_v<device_accessible, _Properties...>)
   {
     // TODO add auto synchronization
     return {__self.__get_data(), __self.size()};
@@ -287,7 +286,7 @@ public:
   _CCCL_HIDE_FROM_ABI uninitialized_async_buffer __replace_allocation(const size_t __count)
   {
     // Create a new buffer with a reference to the stored memory resource and swap allocation information
-    uninitialized_async_buffer __ret{_CUDA_VMR::async_resource_ref<_Properties...>{__mr_}, __stream_, __count};
+    uninitialized_async_buffer __ret{async_resource_ref<_Properties...>{__mr_}, __stream_, __count};
     _CUDA_VSTD::swap(__count_, __ret.__count_);
     _CUDA_VSTD::swap(__buf_, __ret.__buf_);
     return __ret;
@@ -295,7 +294,7 @@ public:
 };
 
 template <class _Tp>
-using uninitialized_async_device_buffer = uninitialized_async_buffer<_Tp, mr::device_accessible>;
+using uninitialized_async_device_buffer = uninitialized_async_buffer<_Tp, device_accessible>;
 
 } // namespace cuda::experimental
 
diff --git a/cudax/include/cuda/experimental/__container/uninitialized_buffer.cuh b/cudax/include/cuda/experimental/__container/uninitialized_buffer.cuh
index 74af402ca51..55168b38805 100644
--- a/cudax/include/cuda/experimental/__container/uninitialized_buffer.cuh
+++ b/cudax/include/cuda/experimental/__container/uninitialized_buffer.cuh
@@ -22,7 +22,6 @@
 #endif // no system header
 
 #include <cuda/__memory_resource/properties.h>
-#include <cuda/__memory_resource/resource_ref.h>
 #include <cuda/std/__memory/align.h>
 #include <cuda/std/__new/launder.h>
 #include <cuda/std/__type_traits/type_set.h>
@@ -108,7 +107,7 @@ private:
   _CCCL_NODISCARD_FRIEND _CCCL_HIDE_FROM_ABI auto
   __cudax_launch_transform(::cuda::stream_ref, uninitialized_buffer& __self) noexcept
     _CCCL_TRAILING_REQUIRES(_CUDA_VSTD::span<_Tp>)(
-      _CUDA_VSTD::same_as<_Tp, _Tp2>&& _CUDA_VSTD::__is_included_in_v<mr::device_accessible, _Properties...>)
+      _CUDA_VSTD::same_as<_Tp, _Tp2>&& _CUDA_VSTD::__is_included_in_v<device_accessible, _Properties...>)
   {
     return {__self.__get_data(), __self.size()};
   }
@@ -119,7 +118,7 @@ private:
   _CCCL_NODISCARD_FRIEND _CCCL_HIDE_FROM_ABI auto
   __cudax_launch_transform(::cuda::stream_ref, const uninitialized_buffer& __self) noexcept
     _CCCL_TRAILING_REQUIRES(_CUDA_VSTD::span<const _Tp>)(
-      _CUDA_VSTD::same_as<_Tp, _Tp2>&& _CUDA_VSTD::__is_included_in_v<mr::device_accessible, _Properties...>)
+      _CUDA_VSTD::same_as<_Tp, _Tp2>&& _CUDA_VSTD::__is_included_in_v<device_accessible, _Properties...>)
   {
     return {__self.__get_data(), __self.size()};
   }
@@ -252,7 +251,7 @@ public:
   _CCCL_HIDE_FROM_ABI uninitialized_buffer __replace_allocation(const size_t __count)
   {
     // Create a new buffer with a reference to the stored memory resource and swap allocation information
-    uninitialized_buffer __ret{_CUDA_VMR::resource_ref<_Properties...>{__mr_}, __count};
+    uninitialized_buffer __ret{resource_ref<_Properties...>{__mr_}, __count};
     _CUDA_VSTD::swap(__count_, __ret.__count_);
     _CUDA_VSTD::swap(__buf_, __ret.__buf_);
     return __ret;
@@ -260,7 +259,7 @@ public:
 };
 
 template <class _Tp>
-using uninitialized_device_buffer = uninitialized_buffer<_Tp, mr::device_accessible>;
+using uninitialized_device_buffer = uninitialized_buffer<_Tp, device_accessible>;
 
 } // namespace cuda::experimental
 
diff --git a/cudax/include/cuda/experimental/__memory_resource/any_resource.cuh b/cudax/include/cuda/experimental/__memory_resource/any_resource.cuh
index 8eb5687925e..5836016c31e 100644
--- a/cudax/include/cuda/experimental/__memory_resource/any_resource.cuh
+++ b/cudax/include/cuda/experimental/__memory_resource/any_resource.cuh
@@ -41,260 +41,801 @@
 #include <cuda/__memory_resource/resource.h>
 #include <cuda/__memory_resource/resource_ref.h>
 #include <cuda/std/__concepts/concept_macros.h>
-#include <cuda/std/__new_>
-#include <cuda/std/__type_traits/is_constructible.h>
-#include <cuda/std/__type_traits/is_nothrow_constructible.h>
-#include <cuda/std/__type_traits/is_same.h>
-#include <cuda/std/__type_traits/remove_cvref.h>
-#include <cuda/std/__type_traits/type_set.h>
-#include <cuda/std/__utility/exchange.h>
 #include <cuda/std/__utility/forward.h>
-#include <cuda/std/__utility/in_place.h>
+#include <cuda/std/optional>
 
-namespace cuda::experimental
-{
-template <class _Ty, class _Uy = _CUDA_VSTD::remove_cvref_t<_Ty>>
-_CCCL_INLINE_VAR constexpr bool __is_basic_any_resource = false;
-
-//! @rst
-//! .. _cudax-memory-resource-basic-any-resource:
-//!
-//! Base class for a type erased owning wrapper around a memory resource
-//! ---------------------------------------------------------------------
-//!
-//! ``basic_any_resource`` abstracts the differences between a resource and an async resource away, allowing efficient
-//! interoperability between the two.
-//!
-//! @endrst
-template <_CUDA_VMR::_AllocType _Alloc_type, class... _Properties>
-class basic_any_resource
-    : public _CUDA_VMR::_Resource_base<_Alloc_type, _CUDA_VMR::_WrapperType::_Owning>
-    , private _CUDA_VMR::_Filtered_vtable<_Properties...>
-{
-private:
-  static_assert(_CUDA_VMR::__contains_execution_space_property<_Properties...>,
-                "The properties of cuda::experimental::basic_any_resource must contain at least one execution "
-                "space property!");
+#include <cuda/experimental/__utility/basic_any.cuh>
 
-  template <_CUDA_VMR::_AllocType, class...>
-  friend class basic_any_resource;
+_CCCL_PUSH_MACROS
+#undef interface
 
-  template <class...>
-  friend struct _CUDA_VMR::_Resource_vtable;
-
-  using __vtable = _CUDA_VMR::_Filtered_vtable<_Properties...>;
+namespace cuda::experimental
+{
+#ifndef _CCCL_DOXYGEN_INVOKED // Do not document this
 
-  //! @brief Validates that a set of \c _OtherProperties... is a superset of \c _Properties... .
-  template <class... _OtherProperties>
-  static constexpr bool __properties_match =
-    _CUDA_VSTD::__type_set_contains_v<_CUDA_VSTD::__make_type_set<_OtherProperties...>, _Properties...>;
+template <class _Property>
+using __property_result_t _CCCL_NODEBUG_ALIAS = _CUDA_VSTD::__type_call1< //
+  _CUDA_VSTD::conditional_t<cuda::property_with_value<_Property>,
+                            _CUDA_VSTD::__type_quote1<__property_value_t>,
+                            _CUDA_VSTD::__type_always<void>>,
+  _Property>;
 
-  //! @brief Validates that a passed in \c _Resource satisfies the \c resource or \c async_resource concept respectively
-  //! as well as all properties in \c _Properties... .
-  template <class _Resource>
-  static constexpr bool __valid_resource =
-    _Alloc_type == _CUDA_VMR::_AllocType::_Async
-      ? _CUDA_VMR::async_resource_with<_Resource, _Properties...>
-      : _CUDA_VMR::resource_with<_Resource, _Properties...>;
-
-public:
-  //! @brief Constructs a \c basic_any_resource from a type that satisfies the \c resource or \c async_resource
-  //! concept as well as all properties.
-  //! @param __res The resource to be wrapped within the \c basic_any_resource.
-  _CCCL_TEMPLATE(class _Resource, class __resource_t = _CUDA_VSTD::remove_cvref_t<_Resource>)
-  _CCCL_REQUIRES((!__is_basic_any_resource<_Resource>) _CCCL_AND __valid_resource<__resource_t>)
-  basic_any_resource(_Resource&& __res) noexcept
-      : _CUDA_VMR::_Resource_base<_Alloc_type, _CUDA_VMR::_WrapperType::_Owning>(
-          nullptr, &_CUDA_VMR::__alloc_vtable<_Alloc_type, _CUDA_VMR::_WrapperType::_Owning, __resource_t>)
-      , __vtable(__vtable::template _Create<__resource_t>())
+template <class _Property>
+struct __with_property
+{
+  template <class _Ty>
+  _CUDAX_PUBLIC_API static auto __get_property(const _Ty& __obj) //
+    -> __property_result_t<_Property>
   {
-    if constexpr (_CUDA_VMR::_IsSmall<__resource_t>())
+    if constexpr (!_CUDA_VSTD::is_same_v<__property_result_t<_Property>, void>)
     {
-      ::new (static_cast<void*>(this->__object.__buf_)) __resource_t(_CUDA_VSTD::forward<_Resource>(__res));
+      return get_property(__obj, _Property());
     }
     else
     {
-      this->__object.__ptr_ = new __resource_t(_CUDA_VSTD::forward<_Resource>(__res));
+      return void();
     }
   }
 
-  //! @brief Constructs a \c basic_any_resource wrapping an object of type \c _Resource that
-  //! is constructed from \c __args... . \c _Resource must satisfy the  \c resource or \c async_resource
-  //! concept, and it must provide all properties in \c _Properties... .
-  //! @param __args The arguments used to construct the instance of \c _Resource to be wrapped within the
-  //! \c basic_any_resource.
-  _CCCL_TEMPLATE(class _Resource, class... _Args)
-  _CCCL_REQUIRES(_CCCL_TRAIT(_CUDA_VSTD::is_constructible, _Resource, _Args...) _CCCL_AND __valid_resource<_Resource>)
-  basic_any_resource(_CUDA_VSTD::in_place_type_t<_Resource>, _Args&&... __args) noexcept
-      : _CUDA_VMR::_Resource_base<_Alloc_type, _CUDA_VMR::_WrapperType::_Owning>(
-          nullptr, &_CUDA_VMR::__alloc_vtable<_Alloc_type, _CUDA_VMR::_WrapperType::_Owning, _Resource>)
-      , __vtable(__vtable::template _Create<_Resource>())
+  template <class...>
+  struct __iproperty : interface<__iproperty>
   {
-    if constexpr (_CUDA_VMR::_IsSmall<_Resource>())
+    _CUDAX_HOST_API friend auto
+    get_property([[maybe_unused]] const __iproperty& __obj, _Property) -> __property_result_t<_Property>
     {
-      ::new (static_cast<void*>(this->__object.__buf_)) _Resource(_CUDA_VSTD::forward<_Args>(__args)...);
+      if constexpr (!_CUDA_VSTD::is_same_v<__property_result_t<_Property>, void>)
+      {
+        return __cudax::virtcall<&__get_property<__iproperty>>(&__obj);
+      }
+      else
+      {
+        return void();
+      }
     }
-    else
-    {
-      this->__object.__ptr_ = new _Resource(_CUDA_VSTD::forward<_Args>(__args)...);
-    }
-  }
 
-  //! @brief Conversion from a \c basic_any_resource with the same set of properties but in a different order.
-  //! This constructor also handles conversion from \c any_async_resource to \c any_resource
-  //! @param __other The other \c basic_any_resource.
-  _CCCL_TEMPLATE(_CUDA_VMR::_AllocType _OtherAllocType, class... _OtherProperties)
-  _CCCL_REQUIRES(
-    (_CUDA_VSTD::_IsNotSame<basic_any_resource, basic_any_resource<_OtherAllocType, _OtherProperties...>>::value)
-      _CCCL_AND(_OtherAllocType == _Alloc_type || _OtherAllocType == _CUDA_VMR::_AllocType::_Async)
-        _CCCL_AND __properties_match<_OtherProperties...>)
-  basic_any_resource(basic_any_resource<_OtherAllocType, _OtherProperties...> __other) noexcept
-      : _CUDA_VMR::_Resource_base<_Alloc_type, _CUDA_VMR::_WrapperType::_Owning>(
-          nullptr, _CUDA_VSTD::exchange(__other.__static_vtable, nullptr))
-      , __vtable(__other)
-  {
-    _CCCL_ASSERT(this->__static_vtable != nullptr, "copying from a moved-from object");
-    this->__static_vtable->__move_fn(&this->__object, &__other.__object);
-  }
+    template <class _Ty>
+    using overrides _CCCL_NODEBUG_ALIAS = overrides_for<_Ty, _CUDAX_FNPTR_CONSTANT_WAR(&__get_property<_Ty>)>;
+  };
+};
+
+template <class _Property>
+using __iproperty = typename __with_property<_Property>::template __iproperty<>;
+
+template <class... _Properties>
+using __iproperty_set = iset<__iproperty<_Properties>...>;
+
+// Wrap the calls of the allocate_async and deallocate_async member functions
+// because of NVBUG#4967486
+template <class _Resource>
+_CUDAX_PUBLIC_API auto __allocate_async(_Resource& __mr, size_t __bytes, size_t __alignment, ::cuda::stream_ref __stream)
+  -> decltype(__mr.allocate_async(__bytes, __alignment, __stream))
+{
+  return __mr.allocate_async(__bytes, __alignment, __stream);
+}
+
+template <class _Resource>
+_CUDAX_PUBLIC_API auto
+__deallocate_async(_Resource& __mr, void* __pv, size_t __bytes, size_t __alignment, ::cuda::stream_ref __stream)
+  -> decltype(__mr.deallocate_async(__pv, __bytes, __alignment, __stream))
+{
+  __mr.deallocate_async(__pv, __bytes, __alignment, __stream);
+}
 
-  //! @brief Move-constructs a \c basic_any_resource from another one, taking ownership of the stored resource.
-  //! @param __other The other \c basic_any_resource.
-  basic_any_resource(basic_any_resource&& __other) noexcept
-      : _CUDA_VMR::_Resource_base<_Alloc_type, _CUDA_VMR::_WrapperType::_Owning>(
-          nullptr, _CUDA_VSTD::exchange(__other.__static_vtable, nullptr))
-      , __vtable(__other)
+template <class...>
+struct __ibasic_resource : interface<__ibasic_resource>
+{
+  _CUDAX_PUBLIC_API void* allocate(size_t __bytes, size_t __alignment = alignof(_CUDA_VSTD::max_align_t))
   {
-    _CCCL_ASSERT(this->__static_vtable != nullptr, "copying from a moved-from object");
-    this->__static_vtable->__move_fn(&this->__object, &__other.__object);
+    return __cudax::virtcall<&__ibasic_resource::allocate>(this, __bytes, __alignment);
   }
 
-  //! @brief Move-assigns another \c basic_any_resource, taking ownership of the stored resource.
-  //! @param __other The other \c basic_any_resource.
-  basic_any_resource& operator=(basic_any_resource&& __other) noexcept
+  _CUDAX_PUBLIC_API void deallocate(void* __pv, size_t __bytes, size_t __alignment = alignof(_CUDA_VSTD::max_align_t))
   {
-    if (this->__static_vtable != nullptr)
-    {
-      this->__static_vtable->__destroy_fn(&this->__object);
-      this->__static_vtable = nullptr;
-    }
+    return __cudax::virtcall<&__ibasic_resource::deallocate>(this, __pv, __bytes, __alignment);
+  }
 
-    if (__other.__static_vtable != nullptr)
-    {
-      this->__static_vtable = _CUDA_VSTD::exchange(__other.__static_vtable, nullptr);
-      this->__static_vtable->__move_fn(&this->__object, &__other.__object);
-    }
+  template <class _Ty>
+  using overrides _CCCL_NODEBUG_ALIAS =
+    overrides_for<_Ty, _CUDAX_FNPTR_CONSTANT_WAR(&_Ty::allocate), _CUDAX_FNPTR_CONSTANT_WAR(&_Ty::deallocate)>;
+};
 
-    return *this;
+template <class...>
+struct __ibasic_async_resource : interface<__ibasic_async_resource>
+{
+  _CUDAX_PUBLIC_API void* allocate_async(size_t __bytes, size_t __alignment, ::cuda::stream_ref __stream)
+  {
+    return __cudax::virtcall<&__allocate_async<__ibasic_async_resource>>(this, __bytes, __alignment, __stream);
   }
 
-  //! @brief Copy-constructs a \c basic_any_resource from another one.
-  //! @param __other The other \c basic_any_resource.
-  basic_any_resource(const basic_any_resource& __other)
-      : _CUDA_VMR::_Resource_base<_Alloc_type, _CUDA_VMR::_WrapperType::_Owning>(nullptr, __other.__static_vtable)
-      , __vtable(__other)
+  _CUDAX_PUBLIC_API void* allocate_async(size_t __bytes, ::cuda::stream_ref __stream)
   {
-    _CCCL_ASSERT(this->__static_vtable != nullptr, "copying from a moved-from object");
-    this->__static_vtable->__copy_fn(&this->__object, &__other.__object);
+    return __cudax::virtcall<&__allocate_async<__ibasic_async_resource>>(
+      this, __bytes, alignof(_CUDA_VSTD::max_align_t), __stream);
   }
 
-  //! @brief Copy-assigns another \c basic_any_resource.
-  //! @param __other The other \c basic_any_resource.
-  basic_any_resource& operator=(const basic_any_resource& __other)
+  _CUDAX_PUBLIC_API void deallocate_async(void* __pv, size_t __bytes, size_t __alignment, ::cuda::stream_ref __stream)
   {
-    return this == &__other ? *this : operator=(basic_any_resource(__other));
+    return __cudax::virtcall<&__deallocate_async<__ibasic_async_resource>>(this, __pv, __bytes, __alignment, __stream);
   }
 
-  //! @brief Destroys the stored resource
-  ~basic_any_resource() noexcept
+  _CUDAX_PUBLIC_API void deallocate_async(void* __pv, size_t __bytes, ::cuda::stream_ref __stream)
   {
-    if (this->__static_vtable != nullptr)
-    {
-      this->__static_vtable->__destroy_fn(&this->__object);
-    }
+    return __cudax::virtcall<&__deallocate_async<__ibasic_async_resource>>(
+      this, __pv, __bytes, alignof(_CUDA_VSTD::max_align_t), __stream);
   }
 
-  //! @brief Converts a \c basic_any_resource to a \c resource_ref with a potential subset of properties.
-  //! @return The \c resource_ref to this resource.
-  _CCCL_TEMPLATE(_CUDA_VMR::_AllocType _OtherAllocType, class... _OtherProperties)
-  _CCCL_REQUIRES((_OtherAllocType == _CUDA_VMR::_AllocType::_Default || _OtherAllocType == _Alloc_type) _CCCL_AND(
-    _CUDA_VSTD::__type_set_contains_v<_CUDA_VSTD::__make_type_set<_Properties...>, _OtherProperties...>))
-  operator _CUDA_VMR::basic_resource_ref<_OtherAllocType, _OtherProperties...>() noexcept
+  template <class _Ty>
+  using overrides _CCCL_NODEBUG_ALIAS =
+    overrides_for<_Ty,
+                  _CUDAX_FNPTR_CONSTANT_WAR(&__allocate_async<_Ty>),
+                  _CUDAX_FNPTR_CONSTANT_WAR(&__deallocate_async<_Ty>)>;
+};
+
+// This is the pseudo-virtual override for getting an old-style vtable pointer
+// from a new-style basic_any resource type. It is used below by
+// __iresource_ref_conversions.
+template <class _Resource>
+_CUDAX_PUBLIC_API const _CUDA_VMR::_Alloc_vtable* __get_resource_vptr(_Resource&) noexcept
+{
+  if constexpr (_CUDA_VMR::async_resource<_Resource>)
   {
-    return _CUDA_VMR::_Resource_ref_helper::_Construct<_Alloc_type, _OtherProperties...>(
-      this->_Get_object(), this->__static_vtable, static_cast<const __vtable&>(*this));
+    return &_CUDA_VMR::__alloc_vtable<_CUDA_VMR::_AllocType::_Async, _CUDA_VMR::_WrapperType::_Reference, _Resource>;
   }
-
-  //! @brief Swaps a \c basic_any_resource with another one.
-  //! @param __other The other \c basic_any_resource.
-  void swap(basic_any_resource& __other) noexcept
+  else if constexpr (_CUDA_VMR::resource<_Resource>)
   {
-    auto __tmp = _CUDA_VSTD::move(__other);
-    __other    = _CUDA_VSTD::move(*this);
-    *this      = _CUDA_VSTD::move(__tmp);
+    return &_CUDA_VMR::__alloc_vtable<_CUDA_VMR::_AllocType::_Default, _CUDA_VMR::_WrapperType::_Reference, _Resource>;
   }
-
-  //! @brief Equality comparison between two \c basic_any_resource
-  //! @param __rhs The other \c basic_any_resource
-  //! @return Checks whether both resources have the same equality function stored in their vtable and if so returns
-  //! the result of that equality comparison. Otherwise returns false.
-  _CCCL_NODISCARD bool operator==(const basic_any_resource& __rhs) const
+  else
   {
-    return (this->__static_vtable->__equal_fn == __rhs.__static_vtable->__equal_fn)
-        && this->__static_vtable->__equal_fn(this->_Get_object(), __rhs._Get_object());
+    // This branch is taken when called from the thunk of an unspecialized
+    // interface; e.g., `icat<>` rather than `icat<ialley_cat<>>`. The thunks of
+    // unspecialized interfaces are never called, they just need to exist. The
+    // function pointer will be used as a key to look up the proper override.
+    _CCCL_UNREACHABLE();
   }
+}
 
-  //! @brief Equality comparison between two \c basic_any_resource
-  //! @param __rhs The other \c basic_any_resource
-  //! @return Checks whether both resources have the same equality function stored in their vtable and if so returns
-  //! the result of that equality comparison. Otherwise returns false.
-  _CCCL_TEMPLATE(class... _OtherProperties)
-  _CCCL_REQUIRES((sizeof...(_Properties) == sizeof...(_OtherProperties))
-                   _CCCL_AND __properties_match<_OtherProperties...>)
-  _CCCL_NODISCARD bool operator==(const basic_any_resource<_Alloc_type, _OtherProperties...>& __rhs) const
+_CCCL_DIAG_PUSH
+_CCCL_DIAG_SUPPRESS_GCC("-Wunused-but-set-parameter")
+
+// Given a list of properties and a basic_any vptr, build a _Resource_vtable
+// for the properties as cuda::mr::basic_resource_ref expects.
+template <class _VPtr, class... _Properties>
+_CUDAX_HOST_API auto __make_resource_vtable(_VPtr __vptr, _CUDA_VMR::_Resource_vtable<_Properties...>*) noexcept
+  -> _CUDA_VMR::_Resource_vtable<_Properties...>
+{
+  return {__vptr->__query_interface(__iproperty<_Properties>())->__fn_...};
+}
+
+_CCCL_DIAG_POP
+
+// This interface provides the any_[async_]resource types with a conversion
+// to the old cuda::mr::basic_resource_ref types.
+template <class... _Super>
+struct _LIBCUDACXX_DECLSPEC_EMPTY_BASES __iresource_ref_conversions
+    : interface<__iresource_ref_conversions>
+    , _CUDA_VMR::_Resource_ref_base
+{
+  using __self_t = basic_any_from_t<__iresource_ref_conversions&>;
+
+  template <class _Property>
+  using __iprop = __rebind_interface<__iproperty<_Property>, _Super...>;
+
+  template <_CUDA_VMR::_AllocType _Alloc_type>
+  using __iresource = __rebind_interface<
+    _CUDA_VSTD::
+      conditional_t<_Alloc_type == _CUDA_VMR::_AllocType::_Default, __ibasic_resource<>, __ibasic_async_resource<>>,
+    _Super...>;
+
+  _CCCL_TEMPLATE(_CUDA_VMR::_AllocType _Alloc_type, class... _Properties)
+  _CCCL_REQUIRES(_CUDA_VSTD::derived_from<__self_t, __iresource<_Alloc_type>>
+                 && (_CUDA_VSTD::derived_from<__self_t, __iprop<_Properties>> && ...))
+  operator _CUDA_VMR::basic_resource_ref<_Alloc_type, _Properties...>()
   {
-    return (this->__static_vtable->__equal_fn == __rhs.__static_vtable->__equal_fn)
-        && this->__static_vtable->__equal_fn(this->_Get_object(), __rhs._Get_object());
+    auto& __self = __cudax::basic_any_from(*this);
+    auto* __vptr = __cudax::virtcall<&__get_resource_vptr<__iresource_ref_conversions>>(this);
+    auto* __vtag = static_cast<_CUDA_VMR::_Filtered_vtable<_Properties...>*>(nullptr);
+    auto __props = __cudax::__make_resource_vtable(__basic_any_access::__get_vptr(__self), __vtag);
+
+    return _CUDA_VMR::_Resource_ref_helper::_Construct<_Alloc_type, _Properties...>(
+      __basic_any_access::__get_optr(__self),
+      static_cast<const _CUDA_VMR::_Vtable_store<_Alloc_type>*>(__vptr),
+      __props);
   }
 
-  //! @brief Inequality comparison between two \c basic_any_resource
-  //! @param __rhs The other \c basic_any_resource
-  //! @return Checks whether both resources have the same equality function stored in their vtable and if so returns
-  //! the inverse result of that equality comparison. Otherwise returns true.
-  _CCCL_NODISCARD bool operator!=(const basic_any_resource& __rhs) const
+  template <class _Resource>
+  using overrides = overrides_for<_Resource, _CUDAX_FNPTR_CONSTANT_WAR(&__get_resource_vptr<_Resource>)>;
+};
+
+template <class... _Properties>
+using __iresource _CCCL_NODEBUG_ALIAS =
+  iset<__ibasic_resource<>,
+       __iproperty_set<_Properties...>,
+       __iresource_ref_conversions<>,
+       icopyable<>,
+       iequality_comparable<>>;
+
+template <class... _Properties>
+using __iasync_resource _CCCL_NODEBUG_ALIAS = iset<__iresource<_Properties...>, __ibasic_async_resource<>>;
+
+template <class _Property>
+using __try_property_result_t =
+  _CUDA_VSTD::conditional_t<!_CUDA_VSTD::is_same_v<__property_result_t<_Property>, void>, //
+                            _CUDA_VSTD::optional<__property_result_t<_Property>>, //
+                            bool>;
+
+template <class _Derived>
+struct __with_try_get_property
+{
+  template <class _Property>
+  _CUDAX_HOST_API _CCCL_NODISCARD_FRIEND auto
+  try_get_property(const _Derived& __self, _Property) noexcept -> __try_property_result_t<_Property>
   {
-    return !(*this == __rhs);
+    auto __prop = __cudax::dynamic_any_cast<const __iproperty<_Property>*>(&__self);
+    if constexpr (_CUDA_VSTD::is_same_v<__property_result_t<_Property>, void>)
+    {
+      return __prop != nullptr;
+    }
+    else if (__prop)
+    {
+      return get_property(*__prop, _Property{});
+    }
+    else
+    {
+      return _CUDA_VSTD::nullopt;
+    }
   }
+};
+
+template <class... _Properties>
+struct _LIBCUDACXX_DECLSPEC_EMPTY_BASES any_async_resource;
 
-  //! @brief Inequality comparison between two \c basic_any_resource
-  //! @param __rhs The other \c basic_any_resource
-  //! @return Checks whether both resources have the same equality function stored in their vtable and if so returns
-  //! the inverse result of that equality comparison. Otherwise returns true.
+template <class... _Properties>
+struct _LIBCUDACXX_DECLSPEC_EMPTY_BASES async_resource_ref;
+
+// `any_resource` wraps any given resource that satisfies the required
+// properties. It owns the contained resource, taking care of construction /
+// destruction. This makes it especially suited for use in e.g. container types
+// that need to ensure that the lifetime of the container exceeds the lifetime
+// of the memory resource used to allocate the storage
+template <class... _Properties>
+struct _LIBCUDACXX_DECLSPEC_EMPTY_BASES any_resource
+    : basic_any<__iresource<_Properties...>>
+    , __with_try_get_property<any_resource<_Properties...>>
+{
+private:
+  static_assert(_CUDA_VMR::__contains_execution_space_property<_Properties...>,
+                "The properties of cuda::experimental::any_resource must contain at least one execution space "
+                "property!");
+  using __base_t = __cudax::basic_any<__iresource<_Properties...>>;
+  using __base_t::interface;
+
+public:
+  // any_async_resource is convertible to any_resource
   _CCCL_TEMPLATE(class... _OtherProperties)
-  _CCCL_REQUIRES((sizeof...(_Properties) == sizeof...(_OtherProperties))
-                   _CCCL_AND __properties_match<_OtherProperties...>)
-  _CCCL_NODISCARD bool operator!=(const basic_any_resource<_Alloc_type, _OtherProperties...>& __rhs) const
+  _CCCL_REQUIRES((_CUDA_VSTD::__type_set_contains_v<_CUDA_VSTD::__type_set<_OtherProperties...>, _Properties...>) )
+  any_resource(__cudax::any_async_resource<_OtherProperties...> __other) noexcept
+      : __base_t(_CUDA_VSTD::move(__other.__base()))
+  {}
+
+  // Inherit other constructors from basic_any
+  using __base_t::__base_t;
+};
+
+// ``any_async_resource`` wraps any given async_resource that satisfies the
+// required properties. It owns the contained resource, taking care of
+// construction / destruction. This makes it especially suited for use in e.g.
+// container types that need to ensure that the lifetime of the container
+// exceeds the lifetime of the memory resource used to allocate the storage
+template <class... _Properties>
+struct _LIBCUDACXX_DECLSPEC_EMPTY_BASES any_async_resource
+    : basic_any<__iasync_resource<_Properties...>>
+    , __with_try_get_property<any_async_resource<_Properties...>>
+{
+private:
+  static_assert(_CUDA_VMR::__contains_execution_space_property<_Properties...>,
+                "The properties of cuda::experimental::any_async_resource must contain at least one execution space "
+                "property!");
+
+  template <class...>
+  friend struct any_resource;
+
+  using __base_t = __cudax::basic_any<__iasync_resource<_Properties...>>;
+  using __base_t::interface;
+
+  __base_t& __base() noexcept
   {
-    return !(*this == __rhs);
+    return *this;
   }
 
-  //! @brief Forwards the stateless properties
-  _CCCL_TEMPLATE(class _Property)
-  _CCCL_REQUIRES((!property_with_value<_Property>) _CCCL_AND(_CUDA_VSTD::__is_included_in_v<_Property, _Properties...>))
-  friend void get_property(const basic_any_resource&, _Property) noexcept {}
+public:
+  // Inherit constructors from basic_any
+  using __base_t::__base_t;
+};
 
-  //! @brief Forwards the stateful properties
-  _CCCL_TEMPLATE(class _Property)
-  _CCCL_REQUIRES(property_with_value<_Property> _CCCL_AND(_CUDA_VSTD::__is_included_in_v<_Property, _Properties...>))
-  _CCCL_NODISCARD_FRIEND __property_value_t<_Property> get_property(const basic_any_resource& __res, _Property) noexcept
+//! @brief Type erased wrapper around a `resource` that satisfies \tparam _Properties
+//! @tparam _Properties The properties that any resource wrapped within the `resource_ref` needs to satisfy
+template <class... _Properties>
+struct _LIBCUDACXX_DECLSPEC_EMPTY_BASES resource_ref
+    : basic_any<__iresource<_Properties...>&>
+    , __with_try_get_property<resource_ref<_Properties...>>
+{
+private:
+  static_assert(_CUDA_VMR::__contains_execution_space_property<_Properties...>,
+                "The properties of cuda::experimental::resource_ref must contain at least one execution space "
+                "property!");
+  using __base_t = __cudax::basic_any<__iresource<_Properties...>&>;
+  using __base_t::interface;
+
+public:
+  // async_resource_ref is convertible to resource_ref
+  _CCCL_TEMPLATE(class... _OtherProperties)
+  _CCCL_REQUIRES((_CUDA_VSTD::__type_set_contains_v<_CUDA_VSTD::__type_set<_OtherProperties...>, _Properties...>) )
+  resource_ref(__cudax::async_resource_ref<_OtherProperties...> __other) noexcept
+      : __base_t(__other.__base())
+  {}
+
+  // Conversions from the resource_ref types in cuda::mr is not supported.
+  template <class... _OtherProperties>
+  resource_ref(_CUDA_VMR::resource_ref<_OtherProperties...>) = delete;
+
+  template <class... _OtherProperties>
+  resource_ref(_CUDA_VMR::async_resource_ref<_OtherProperties...>) = delete;
+
+  // Inherit other constructors from basic_any
+  using __base_t::__base_t;
+};
+
+//! @brief Type erased wrapper around a `async_resource` that satisfies \tparam _Properties
+//! @tparam _Properties The properties that any async resource wrapped within the `async_resource_ref` needs to satisfy
+template <class... _Properties>
+struct _LIBCUDACXX_DECLSPEC_EMPTY_BASES async_resource_ref
+    : basic_any<__iasync_resource<_Properties...>&>
+    , __with_try_get_property<async_resource_ref<_Properties...>>
+{
+private:
+  static_assert(_CUDA_VMR::__contains_execution_space_property<_Properties...>,
+                "The properties of cuda::experimental::async_resource_ref must contain at least one execution space "
+                "property!");
+
+  template <class...>
+  friend struct resource_ref;
+
+  using __base_t = __cudax::basic_any<__iasync_resource<_Properties...>&>;
+  using __base_t::interface;
+
+  __base_t& __base() noexcept
   {
-    _CUDA_VMR::_Property_vtable<_Property> const& __prop = __res;
-    return __prop.__property_fn(__res._Get_object());
+    return *this;
   }
+
+public:
+  // Conversions from the resource_ref types in cuda::mr is not supported.
+  template <class... _OtherProperties>
+  async_resource_ref(_CUDA_VMR::async_resource_ref<_OtherProperties...>) = delete;
+
+  // Inherit other constructors from basic_any
+  using __base_t::__base_t;
 };
 
-//! @brief Checks whether a passed in type is a specialization of basic_any_resource
-template <class _Ty, _CUDA_VMR::_AllocType _Alloc_type, class... _Properties>
-_CCCL_INLINE_VAR constexpr bool __is_basic_any_resource<_Ty, basic_any_resource<_Alloc_type, _Properties...>> = true;
+_CCCL_TEMPLATE(class... _Properties, class _Resource)
+_CCCL_REQUIRES(mr::resource_with<_Resource, _Properties...>)
+resource_ref<_Properties...> __as_resource_ref(_Resource& __mr) noexcept
+{
+  return resource_ref<_Properties...>(__mr);
+}
+
+template <class... _Properties>
+resource_ref<_Properties...> __as_resource_ref(resource_ref<_Properties...> const __mr) noexcept
+{
+  return __mr;
+}
+
+template <class... _Properties>
+resource_ref<_Properties...> __as_resource_ref(async_resource_ref<_Properties...> const __mr) noexcept
+{
+  return __mr;
+}
+
+template <class... _Properties, mr::_AllocType _Alloc_type>
+mr::resource_ref<_Properties...>
+__as_resource_ref(mr::basic_resource_ref<_Alloc_type, _Properties...> const __mr) noexcept
+{
+  return __mr;
+}
+
+#else // ^^^ !_CCCL_DOXYGEN_INVOKED ^^^ / vvv _CCCL_DOXYGEN_INVOKED vvv
+
+enum class _ResourceKind
+{
+  _Synchronous,
+  _Asynchronous
+};
+
+//! @rst
+//! Type erased wrapper around a `resource` or an `async_resource`
+//! --------------------------------------------------------------
+//!
+//! ``basic_any_resource`` wraps any given :ref:`resource
+//! <libcudacxx-extended-api-memory-resources-resource>` that satisfies the
+//! required properties. It owns the contained resource, taking care of
+//! construction / destruction. This makes it especially suited for use in e.g.
+//! container types that need to ensure that the lifetime of the container
+//! exceeds the lifetime of the memory resource used to allocate the storage
+//!
+//! ``basic_any_resource`` models the ``cuda::std::regular`` concept.
+//! @endrst
+//!
+//! @tparam _Kind Either `_ResourceKind::_Synchronous` for `any_resource`, or
+//! `_ResourceKind::_Asynchronous` for `any_async_resource`.
+//! @tparam _Properties A pack of property types that a memory resource must
+//! provide in order to be storable in instances of this `basic_any_resource`
+//! type.
+//!
+//! @sa any_resource
+//! @sa any_async_resource
+//! @sa resource_ref
+//! @sa async_resource_ref
+template <_ResourceKind _Kind, class... _Properties>
+class basic_any_resource
+{
+public:
+  //! @brief Constructs a \c basic_any_resource with no value
+  //! @post `has_value()` is `false`
+  basic_any_resource() noexcept;
+
+  //! @brief Move constructs a \c basic_any_resource
+  //! @post `has_value()` is `true` if `__other` had a value prior to the move,
+  //! and `false` otherwise. `__other.has_value()` is `false`.
+  basic_any_resource(basic_any_resource&& __other) noexcept;
+
+  //! @brief Copy constructs a \c basic_any_resource
+  //! @post `has_value()` is the same as `__other.has_value()`.
+  basic_any_resource(const basic_any_resource& __other);
+
+  //! @brief Constructs a \c basic_any_resource from a type that satisfies the
+  //! \c resource concept.
+  //! and that supports all of the specified properties.
+  //! @param __res The resource to be wrapped by the \c basic_any_resource.
+  //! @pre \c _Resource is not a specialization of \c basic_any_resource or
+  //! \c basic_resource_ref, or a type derived from such.
+  //! @pre `resource_with<_Resource, _Properties...>` is `true`.
+  //! @pre If \c _Kind is \c _ResourceKind::_Asynchronous,
+  //! `async_resource_with<_Resource, _Properties...>` is `true`.
+  //! @post `has_value()` is `true`
+  template <class _Resource>
+  basic_any_resource(_Resource __res);
+
+  //! @brief Conversion from a type-erased resource with a superset of the
+  //! required properties.
+  //! @param __res The object to copy from.
+  //! @pre \c _OtherKind is equal to either \c _Kind or
+  //! \c _ResourceKind::_Asynchronous.
+  //! @pre The set `_OtherProperties...` is a superset of `_Properties...`.
+  //! @post `has_value()` is equal to `__res.has_value()`
+  template <_ResourceKind _OtherKind, class... _OtherProperties>
+  basic_any_resource(basic_any_resource<_OtherKind, _OtherProperties...> __res);
+
+  //! @brief Deep copy from a type-erased resource reference with a superset
+  //! of the required properties.
+  //!
+  //! The object to which \c __res refers is copied into `*this`.
+  //! @param __res The reference to copy from.
+  //! @pre \c _OtherKind is equal to either \c _Kind or
+  //! \c _ResourceKind::_Asynchronous.
+  //! @pre The set `_OtherProperties...` is a superset of `_Properties...`.
+  template <_ResourceKind _OtherKind, class... _OtherProperties>
+  basic_any_resource(basic_resource_ref<_OtherKind, _OtherProperties...> __res);
+
+  //! @brief Move assigns a \c basic_any_resource
+  //! @post `has_value()` is `true` if `__other` had a value prior to the move,
+  //! and `false` otherwise.
+  //! @post `__other.has_value()` is `false`.
+  basic_any_resource& operator=(basic_any_resource&& __other) noexcept;
+
+  //! @brief Copy assigns a \c basic_any_resource
+  //! @post `has_value()` is the same as `__other.has_value()`.
+  basic_any_resource& operator=(const basic_any_resource& __other);
+
+  //! @brief Assigns from a type that satisfies the \c resource concept and that
+  //! supports all of the specified properties.
+  //! @param __res The resource to be wrapped within the \c basic_any_resource
+  //! @pre \c _Resource is not a specialization of \c basic_any_resource or
+  //! \c basic_resource_ref, or a type derived from such.
+  //! @pre `resource_with<_Resource, _Properties...>` is `true`.
+  //! @pre If \c _Kind is \c _ResourceKind::_Asynchronous,
+  //! `async_resource_with<_Resource, _Properties...>` is `true`.
+  //! @post `has_value()` is `true`
+  template <class _Resource>
+  basic_any_resource& operator=(_Resource __res);
+
+  //! @brief Assignment from a type-erased resource with a superset of the
+  //! required properties.
+  //! @param __res The object to copy from.
+  //! @pre \c _OtherKind is equal to either \c _Kind or
+  //! \c _ResourceKind::_Asynchronous.
+  //! @pre The set `_OtherProperties...` is a superset of `_Properties...`.
+  //! @post `has_value()` is equal to `__res.has_value()`.
+  template <_ResourceKind _OtherKind, class... _OtherProperties>
+  basic_any_resource& operator=(basic_any_resource<_OtherKind, _OtherProperties...> __res);
+
+  //! @brief Deep copy from a type-erased resource reference with a superset of
+  //! the required properties.
+  //! @param __res The type-erased resource reference to copy from.
+  //! @pre \c _OtherKind is equal to either \c _Kind or
+  //! \c _ResourceKind::_Asynchronous.
+  //! @pre The set `_OtherProperties...` is a superset of `_Properties...`.
+  //! @post `has_value()` is `true`.
+  template <_ResourceKind _OtherKind, class... _OtherProperties>
+  basic_any_resource& operator=(basic_resource_ref<_OtherKind, _OtherProperties...> __res);
+
+  //! @brief Equality comparison between two type-erased memory resource
+  //! @param __rhs The type-erased resource to compare with `*this`.
+  //! @pre \c _OtherKind is equal to either \c _Kind or
+  //! \c _ResourceKind::_Asynchronous.
+  //! @pre The set `_Properties...` is equal to the set `_OtherProperties...`.
+  //! @return `true` if both resources hold objects of the same type and those
+  //! objects compare equal, and `false` otherwise.
+  template <_ResourceKind _OtherKind, class... _OtherProperties>
+  [[nodiscard]] bool operator==(const basic_any_resource<_OtherKind, _OtherProperties...>& __rhs) const;
+
+  //! @brief Equality comparison between `*this` and a type-erased resource
+  //! reference.
+  //! @param __rhs The type-erased resource reference to compare with `*this`.
+  //! @pre \c _OtherKind is equal to either \c _Kind or
+  //! \c _ResourceKind::_Asynchronous.
+  //! @pre The set `_Properties...` is equal to the set `_OtherProperties...`.
+  //! @return `true` if \c __rhs refers to an object of the same type as that
+  //! wrapped by `*this` and those objects compare equal; `false` otherwise.
+  template <_ResourceKind _OtherKind, class... _OtherProperties>
+  [[nodiscard]] bool operator==(const basic_resource_ref<_OtherKind, _OtherProperties...>& __rhs) const;
+
+  //! @brief Calls `allocate` on the wrapped object with the specified
+  //! arguments.
+  //! @pre `has_value()` is `true`.
+  //! @return `obj.allocate(__size, __align)`, where `obj` is the wrapped
+  //! object.
+  [[nodiscard]] void* allocate(size_t __size, size_t __align = alignof(cuda::std::max_align_t));
+
+  //! @brief Calls `deallocate` on the wrapped object with the specified
+  //! arguments.
+  //! @pre `has_value()` is `true`.
+  //! @pre `__pv` must be a pointer that was previously returned by a call to \c
+  //! allocate on the object wrapped by `*this`.
+  //! @return `obj.deallocate(__pv, __size, __align)`, where `obj` is the
+  //! wrapped object.
+  void deallocate(void* __pv, size_t __size, size_t __align = alignof(cuda::std::max_align_t));
+
+  //! @brief Calls `allocate_async` on the wrapped object with the specified
+  //! arguments.
+  //! @pre `_Kind` is `_ResourceKind::_Asynchronous`.
+  //! @pre `has_value()` is `true`.
+  //! @return `obj.allocate_async(__size, __align, __stream)`, where `obj` is
+  //! the wrapped object.
+  //! @warning The returned pointer is not valid until `__stream` has been
+  //! synchronized.
+  [[nodiscard]] void* allocate_async(size_t __size, size_t __align, cuda::stream_ref __stream);
+
+  //! @brief Equivalent to `allocate_async(__size,
+  //! alignof(_CUDA_VSTD::max_align_t), __stream)`.
+  [[nodiscard]] void* allocate_async(size_t __size, cuda::stream_ref __stream);
+
+  //! @brief Calls `deallocate_async` on the wrapped object with the specified
+  //! arguments.
+  //! @pre `_Kind` is `_ResourceKind::_Asynchronous`.
+  //! @pre `has_value()` is `true`.
+  //! @pre `__pv` must be a pointer that was previously returned by a call to
+  //! \c allocate_async on the object wrapped by `*this`.
+  //! @return `obj.deallocate_async(__pv, __size, __align, __stream)`, where
+  //! `obj` is the wrapped object.
+  void deallocate_async(void* __pv, size_t __size, size_t __align, cuda::stream_ref __stream);
+
+  //! @brief Equivalent to `deallocate_async(__pv, __size,
+  //! alignof(_CUDA_VSTD::max_align_t), __stream)`.
+  void deallocate_async(void* __pv, size_t __size, cuda::stream_ref __stream);
+
+  //! @brief Checks if `*this` holds a value.
+  //! @return `true` if `*this` holds a value; `false` otherwise.
+  [[nodiscard]] bool has_value() const noexcept;
+
+  //! @brief Resets `*this` to the empty state.
+  //! @post `has_value() == false`
+  void reset() noexcept;
+
+  //! @return A reference to the \c type_info object for the wrapped
+  //! resource, or `typeid(void)` if `has_value()` is `false`.
+  [[nodiscard]] const cuda::std::type_info& type() const noexcept;
+
+  //! @brief Forwards a property query to the type-erased object.
+  //! @param __res The \c basic_any_resource object
+  //! @param __prop The property to query
+  //! @pre The type \c _Property is one of the types in the pack
+  //! `_Properties...`.
+  //! @return The result of calling `get_property(__obj, __prop)`, where `__obj`
+  //! is the type-erased object stored in `__res`.
+  template <class _Property>
+  friend decltype(auto) get_property(const basic_any_resource& __res, _Property __prop) noexcept;
+
+  //! @brief Attempts to forward a property query to the type-erased object and
+  //! returns a _`boolean-testable`_ object that contains the result, if any.
+  //!
+  //! @tparam _Property
+  //! @param __res The \c basic_any_resource object
+  //! @param __prop The property to query
+  //! @pre `has_value()` is `true`.
+  //! @return
+  //! Let:
+  //!   - \c obj be the wrapped object.
+  //!   - \c ValueType be the associated value type of \c __prop.
+  //!   - \c ReturnType be \c bool if \c ValueType is \c void. Otherwise,
+  //!     \c ReturnType is \c cuda::std::optional<ValueType>.
+  //!   - \c _OtherProperties be the pack of type parameters of the
+  //!     \c basic_any_resource object that first type-erased \c obj. [_Note:_
+  //!     `_OtherProperties` is different than `_Properties` when \c *this is
+  //!     the result of a conversion from a different \c basic_any type. -- end
+  //!     note]
+  //!   .
+  //! `try_get_property(__res, __prop)` has type \c ReturnType. If \c _Property
+  //! is not in the pack \c _OtherProperties, returns `ReturnType()`.
+  //! Otherwise:
+  //!   - Returns \c true if \c ValueType is \c void.
+  //!   - Returns `ReturnType(get_property(obj, __prop))` otherwise.
+  template <class _Property>
+  friend auto try_get_property(const basic_any_resource& __res, _Property __prop) noexcept;
+};
+
+//! @brief Type erased wrapper around a reference to an object that satisfies
+//! the \c resource concept and that provides the requested \c _Properties.
+//! @tparam _Properties The properties that any resource wrapped within the
+//! `basic_resource_ref` needs to provide.
+//!
+//! ``basic_resource_ref`` models the ``cuda::std::copyable`` and
+//! ``cuda::std::equality_comparable`` concepts.
+template <_ResourceKind _Kind, class... _Properties>
+class basic_resource_ref
+{
+public:
+  //! @brief Copy constructs a \c basic_resource_ref
+  //! @post `*this` and `__other` both refer to the same resource object.
+  basic_resource_ref(const basic_resource_ref& __other);
+
+  //! @brief Constructs a \c basic_resource_ref from a reference to a type that
+  //! satisfies the \c resource concept and that supports all of the specified
+  //! properties.
+  //! @param __res The resource reference to be wrapped.
+  //! @pre `resource_with<_Resource, _Properties...>` is `true`.
+  //! @pre If \c _Kind is \c _ResourceKind::_Asynchronous,
+  //! `async_resource_with<_Resource, _Properties...>` is `true`.
+  //! @pre If \c __res refers to a specialization of \c basic_any_resource or
+  //! a type derived from such, `__res.has_value()` is `true`.
+  template <class _Resource>
+  basic_resource_ref(_Resource& __res);
+
+  //! @brief Conversion from type-erased resource reference with a superset
+  //! of the required properties.
+  //! @param __res The other type-erased resource reference to copy from.
+  //! @pre \c _OtherKind is equal to either \c _Kind or
+  //! \c _ResourceKind::_Asynchronous.
+  //! @pre The set `_OtherProperties...` is a superset of `_Properties...`.
+  //! @post `*this` and `__res` both refer to the same resource object.
+  template <_ResourceKind _OtherKind, class... _OtherProperties>
+  basic_resource_ref(basic_resource_ref<_OtherKind, _OtherProperties...> __res);
+
+  //! @brief Rebinds `*this` to refer to the object to which `__other` refers.
+  //! @post `*this` and `__other` both refer to the same resource object.
+  basic_resource_ref& operator=(const basic_resource_ref& __other);
+
+  //! @brief Rebinds the wrapped reference to an object whose type satisfies the
+  //! \c resource concept and that supports all of the specified properties.
+  //! @param __res The reference to the resource to be wrapped by the \c
+  //! basic_resource_ref.
+  //! @pre `resource_with<_Resource, _Properties...>` is `true`.
+  //! @pre If \c _Kind is \c _ResourceKind::_Asynchronous,
+  //! `async_resource_with<_Resource, _Properties...>` is `true`.
+  //! @pre If \c __res refers to a specialization of \c basic_any_resource or a
+  //! type derived from such, `__res.has_value()` is `true`.
+  template <class _Resource>
+  basic_resource_ref& operator=(_Resource& __res);
+
+  //! @brief Rebinds `*this` to refer to the object to which `__other` refers.
+  //! @param __res The other type-erased resource reference to copy from.
+  //! @pre \c _OtherKind is equal to either \c _Kind or
+  //! \c _ResourceKind::_Asynchronous.
+  //! @pre The set `_OtherProperties...` is a superset of `_Properties...`.
+  //! @post `*this` and `__res` both refer to the same resource object.
+  template <_ResourceKind _OtherKind, class... _OtherProperties>
+  basic_resource_ref& operator=(basic_resource_ref<_OtherKind, _OtherProperties...> __res);
+
+  //! @brief Equality comparison between two type-erased resource references.
+  //! @param __rhs The other type-erased resource reference.
+  //! @pre \c _OtherKind is equal to either \c _Kind or
+  //! \c _ResourceKind::_Asynchronous.
+  //! @pre The set `_Properties...` is equal to the set `_OtherProperties...`.
+  //! @return `true` if both resources refer to objects of the same type and
+  //! those objects compare equal. Otherwise, returns `false`.
+  template <_ResourceKind _OtherKind, class... _OtherProperties>
+  [[nodiscard]] bool operator==(const basic_resource_ref<_OtherKind, _OtherProperties...>& __rhs) const;
+
+  //! @brief Calls `allocate` on the wrapped reference with the specified
+  //! arguments.
+  //! @return `obj.allocate(__size, __align)`, where `obj` is the wrapped
+  //! reference.
+  [[nodiscard]] void* allocate(size_t __size, size_t __align = alignof(cuda::std::max_align_t));
+
+  //! @brief Calls `deallocate` on the wrapped reference with the specified
+  //! arguments.
+  //! @pre `__pv` must be a pointer that was previously returned by a call to
+  //! \c allocate on the object referenced by `*this`.
+  //! @return `obj.deallocate(__pv, __size, __align)`, where `obj` is the
+  //! wrapped reference.
+  void deallocate(void* __pv, size_t __size, size_t __align = alignof(cuda::std::max_align_t));
+
+  //! @brief Calls `allocate_async` on the wrapped reference with the specified
+  //! arguments.
+  //! @pre `_Kind` is `_ResourceKind::_Asynchronous`.
+  //! @return `obj.allocate_async(__size, __align, __stream)`, where `obj` is
+  //! the wrapped reference.
+  //! @warning The returned pointer is not valid until `__stream` has been
+  //! synchronized.
+  [[nodiscard]] void* allocate_async(size_t __size, size_t __align, cuda::stream_ref __stream);
+
+  //! @brief Equivalent to `allocate_async(__size,
+  //! alignof(_CUDA_VSTD::max_align_t), __stream)`.
+  [[nodiscard]] void* allocate_async(size_t __size, cuda::stream_ref __stream);
+
+  //! @brief Calls `deallocate_async` on the wrapped reference with the specified
+  //! arguments.
+  //! @pre `_Kind` is `_ResourceKind::_Asynchronous`.
+  //! @pre `__pv` must be a pointer that was previously returned by a call to \c
+  //! allocate_async on the object referenced by `*this`.
+  //! @return `obj.deallocate_async(__pv, __size, __align, __stream)`, where
+  //! `obj` is the wrapped reference.
+  void deallocate_async(void* __pv, size_t __size, size_t __align, cuda::stream_ref __stream);
+
+  //! @brief Equivalent to `deallocate_async(__pv, __size,
+  //! alignof(_CUDA_VSTD::max_align_t), __stream)`.
+  void deallocate_async(void* __pv, size_t __size, cuda::stream_ref __stream);
+
+  //! @return A reference to the \c type_info object for the type of the object
+  //! to which `*this` refers.
+  [[nodiscard]] const cuda::std::type_info& type() const noexcept;
+
+  //! @brief Forwards a property query to the type-erased reference.
+  //! @tparam _Property
+  //! @param __res The \c basic_resource_ref object
+  //! @param __prop The property to query
+  //! @pre \c _Property is a type in `_Properties...`.
+  //! @return The result of calling `get_property(__obj, __prop)`, where `__obj`
+  //! is the type-erased reference stored in `__res`.
+  template <class _Property>
+  friend decltype(auto) get_property(const basic_resource_ref& __res, _Property __prop) noexcept;
+
+  //! @brief Attempts to forward a property query to the type-erased object and
+  //! returns a _`boolean-testable`_ object that contains the result, if any.
+  //!
+  //! @tparam _Property
+  //! @param __res The \c any_resource object
+  //! @param __prop The property to query
+  //! @pre `has_value()` is `true`.
+  //! @return
+  //! Let:
+  //!   - \c obj be the wrapped reference.
+  //!   - \c ValueType be the associated value type of \c __prop.
+  //!   - \c ReturnType be \c bool if \c ValueType is \c void. Otherwise,
+  //!     \c ReturnType is \c cuda::std::optional<ValueType>.
+  //!   - \c _OtherProperties be the pack of type parameters of the wrapper type
+  //!     that first type-erased \c obj. [_Note:_ `_OtherProperties` is
+  //!     different than `_Properties` when \c *this is the result of an
+  //!     interface-narrowing conversion. -- end note]
+  //!   .
+  //! `try_get_property(__res, __prop)` has type \c ReturnType. If \c _Property
+  //! is not in the pack \c _OtherProperties, returns `ReturnType()`.
+  //! Otherwise:
+  //!   - Returns \c true if \c ValueType is \c void.
+  //!   - Returns `ReturnType(get_property(obj, __prop))` otherwise.
+  template <class _Property>
+  friend auto try_get_property(const basic_resource_ref& __res, _Property __prop) noexcept;
+};
 
 //! @rst
 //! .. _cudax-memory-resource-any-resource:
@@ -302,29 +843,53 @@ _CCCL_INLINE_VAR constexpr bool __is_basic_any_resource<_Ty, basic_any_resource<
 //! Type erased wrapper around a `resource`
 //! ----------------------------------------
 //!
-//! ``any_resource`` wraps any given :ref:`resource <libcudacxx-extended-api-memory-resources-resource>` that
-//! satisfies the required properties. It owns the contained resource, taking care of construction / destruction.
-//! This makes it especially suited for use in e.g. container types that need to ensure that the lifetime of the
-//! container exceeds the lifetime of the memory resource used to allocate the storage
+//! ``any_resource`` wraps any given :ref:`resource
+//! <libcudacxx-extended-api-memory-resources-resource>` that satisfies the
+//! required properties. It owns the contained resource, taking care of
+//! construction / destruction. This makes it especially suited for use in e.g.
+//! container types that need to ensure that the lifetime of the container
+//! exceeds the lifetime of the memory resource used to allocate the storage
+//!
+//! ``any_resource`` models the ``cuda::std::regular`` concept.
 //!
 //! @endrst
 template <class... _Properties>
-using any_resource = basic_any_resource<_CUDA_VMR::_AllocType::_Default, _Properties...>;
+using any_resource = basic_any_resource<_ResourceKind::_Synchronous, _Properties...>;
 
 //! @rst
 //! .. _cudax-memory-resource-any-async-resource:
 //!
 //! Type erased wrapper around an `async_resource`
-//! -----------------------------------------------
+//! ----------------------------------------------
+//!
+//! ``any_async_resource`` wraps any given :ref:`async_resource
+//! <libcudacxx-extended-api-memory-resources-resource>` that satisfies the
+//! required properties. It owns the contained resource, taking care of
+//! construction / destruction. This makes it especially suited for use in e.g.
+//! container types that need to ensure that the lifetime of the container
+//! exceeds the lifetime of the memory resource used to allocate the storage
 //!
-//! ``any_async_resource`` wraps any given :ref:`async resource <libcudacxx-extended-api-memory-resources-resource>`
-//! that satisfies the required properties. It owns the contained resource, taking care of construction / destruction.
-//! This makes it especially suited for use in e.g. container types that need to ensure that the lifetime of the
-//! container exceeds the lifetime of the memory resource used to allocate the storage
+//! ``any_async_resource`` models the ``cuda::std::regular`` concept.
 //!
 //! @endrst
 template <class... _Properties>
-using any_async_resource = basic_any_resource<_CUDA_VMR::_AllocType::_Async, _Properties...>;
+using any_async_resource = basic_any_resource<_ResourceKind::_Asynchronous, _Properties...>;
+
+//! @brief Type erased wrapper around a `resource` that satisfies \c
+//! _Properties.
+//! @tparam _Properties The properties that any resource wrapped within the
+//! `resource_ref` needs to satisfy
+template <class... _Properties>
+using resource_ref = basic_resource_ref<_ResourceKind::_Synchronous, _Properties...>;
+
+//! @brief Type erased wrapper around a `async_resource` that satisfies \c
+//! _Properties
+//! @tparam _Properties The properties that any async resource wrapped within
+//! the `async_resource_ref` needs to satisfy
+template <class... _Properties>
+using async_resource_ref = basic_resource_ref<_ResourceKind::_Asynchronous, _Properties...>;
+
+#endif // _CCCL_DOXYGEN_INVOKED
 
 //! @rst
 //! .. _cudax-memory-resource-make-any-resource:
@@ -332,11 +897,14 @@ using any_async_resource = basic_any_resource<_CUDA_VMR::_AllocType::_Async, _Pr
 //! Factory function for `any_resource` objects
 //! -------------------------------------------
 //!
-//! ``make_any_resource`` constructs an :ref:`any_resource <cudax-memory-resource-any-resource>` object that wraps a
-//! newly constructed instance of the given resource type. The resource type must satisfy the ``cuda::mr::resource``
-//! concept and provide all of the properties specified in the template parameter pack.
+//! ``make_any_resource`` constructs an :ref:`any_resource
+//! <cudax-memory-resource-any-resource>` object that wraps a newly constructed
+//! instance of the given resource type. The resource type must satisfy the
+//! ``cuda::mr::resource`` concept and provide all of the properties specified
+//! in the template parameter pack.
 //!
-//! @param __args The arguments used to construct the instance of the resource type.
+//! @param __args The arguments used to construct the instance of the resource
+//! type.
 //!
 //! @endrst
 template <class _Resource, class... _Properties, class... _Args>
@@ -344,7 +912,7 @@ auto make_any_resource(_Args&&... __args) -> any_resource<_Properties...>
 {
   static_assert(_CUDA_VMR::resource<_Resource>, "_Resource does not satisfy the cuda::mr::resource concept");
   static_assert(_CUDA_VMR::resource_with<_Resource, _Properties...>,
-                "Resource does not satisfy the required properties");
+                "The provided _Resource type does not support the requested properties");
   return any_resource<_Properties...>{_CUDA_VSTD::in_place_type<_Resource>, _CUDA_VSTD::forward<_Args>(__args)...};
 }
 
@@ -354,11 +922,14 @@ auto make_any_resource(_Args&&... __args) -> any_resource<_Properties...>
 //! Factory function for `any_async_resource` objects
 //! -------------------------------------------------
 //!
-//! ``make_any_async_resource`` constructs an :ref:`any_async_resource <cudax-memory-resource-any-async-resource>`
-//! object that wraps a newly constructed instance of the given resource type. The resource type must satisfy the
-//! ``cuda::mr::async_resource`` concept and provide all of the properties specified in the template parameter pack.
+//! ``make_any_async_resource`` constructs an :ref:`any_async_resource
+//! <cudax-memory-resource-any-async-resource>` object that wraps a newly
+//! constructed instance of the given resource type. The resource type must
+//! satisfy the ``cuda::mr::async_resource`` concept and provide all of the
+//! properties specified in the template parameter pack.
 //!
-//! @param __args The arguments used to construct the instance of the resource type.
+//! @param __args The arguments used to construct the instance of the resource
+//! type.
 //!
 //! @endrst
 template <class _Resource, class... _Properties, class... _Args>
@@ -367,10 +938,12 @@ auto make_any_async_resource(_Args&&... __args) -> any_async_resource<_Propertie
   static_assert(_CUDA_VMR::async_resource<_Resource>,
                 "_Resource does not satisfy the cuda::mr::async_resource concept");
   static_assert(_CUDA_VMR::async_resource_with<_Resource, _Properties...>,
-                "Resource does not satisfy the required properties");
+                "The provided _Resource type does not support the requested properties");
   return any_async_resource<_Properties...>{_CUDA_VSTD::in_place_type<_Resource>, _CUDA_VSTD::forward<_Args>(__args)...};
 }
 
 } // namespace cuda::experimental
 
-#endif //_CUDAX__MEMORY_RESOURCE_ANY_RESOURCE_H
+_CCCL_POP_MACROS
+
+#endif // _CUDAX__MEMORY_RESOURCE_ANY_RESOURCE_H
diff --git a/cudax/include/cuda/experimental/__memory_resource/device_memory_pool.cuh b/cudax/include/cuda/experimental/__memory_resource/device_memory_pool.cuh
index 431e210de84..9dce7ef8f86 100644
--- a/cudax/include/cuda/experimental/__memory_resource/device_memory_pool.cuh
+++ b/cudax/include/cuda/experimental/__memory_resource/device_memory_pool.cuh
@@ -31,12 +31,12 @@
 
 #  include <cuda/__memory_resource/get_property.h>
 #  include <cuda/__memory_resource/properties.h>
-#  include <cuda/__memory_resource/resource_ref.h>
 #  include <cuda/std/__cuda/api_wrapper.h>
 #  include <cuda/std/__new_>
 #  include <cuda/std/span>
 #  include <cuda/stream_ref>
 
+#  include <cuda/experimental/__memory_resource/any_resource.cuh>
 #  include <cuda/experimental/__stream/stream.cuh>
 
 #  if _CCCL_STD_VER >= 2014
diff --git a/cudax/include/cuda/experimental/__memory_resource/device_memory_resource.cuh b/cudax/include/cuda/experimental/__memory_resource/device_memory_resource.cuh
index 7dce44ecbaa..2f727fd5dc1 100644
--- a/cudax/include/cuda/experimental/__memory_resource/device_memory_resource.cuh
+++ b/cudax/include/cuda/experimental/__memory_resource/device_memory_resource.cuh
@@ -31,7 +31,6 @@
 
 #  include <cuda/__memory_resource/get_property.h>
 #  include <cuda/__memory_resource/properties.h>
-#  include <cuda/__memory_resource/resource_ref.h>
 #  include <cuda/std/__concepts/concept_macros.h>
 #  include <cuda/std/__cuda/api_wrapper.h>
 #  include <cuda/std/__new_>
@@ -39,6 +38,7 @@
 #  include <cuda/stream_ref>
 
 #  include <cuda/experimental/__device/device_ref.cuh>
+#  include <cuda/experimental/__memory_resource/any_resource.cuh>
 #  include <cuda/experimental/__memory_resource/device_memory_pool.cuh>
 #  include <cuda/experimental/__memory_resource/properties.cuh>
 #  include <cuda/experimental/__stream/stream.cuh>
@@ -317,8 +317,8 @@ private:
   {
     if constexpr (has_property<_Resource, device_accessible>)
     {
-      return _CUDA_VMR::resource_ref<device_accessible>{const_cast<device_memory_resource*>(this)}
-          == _CUDA_VMR::resource_ref<device_accessible>{const_cast<_Resource&>(__rhs)};
+      return resource_ref<device_accessible>{*const_cast<device_memory_resource*>(this)}
+          == __cudax::__as_resource_ref<device_accessible>(const_cast<_Resource&>(__rhs));
     }
     else
     {
diff --git a/cudax/include/cuda/experimental/__memory_resource/managed_memory_resource.cuh b/cudax/include/cuda/experimental/__memory_resource/managed_memory_resource.cuh
index a7c286ab644..b583450d414 100644
--- a/cudax/include/cuda/experimental/__memory_resource/managed_memory_resource.cuh
+++ b/cudax/include/cuda/experimental/__memory_resource/managed_memory_resource.cuh
@@ -28,11 +28,11 @@
 #include <cuda/__memory_resource/get_property.h>
 #include <cuda/__memory_resource/properties.h>
 #include <cuda/__memory_resource/resource.h>
-#include <cuda/__memory_resource/resource_ref.h>
 #include <cuda/std/__concepts/concept_macros.h>
 #include <cuda/std/__cuda/api_wrapper.h>
 #include <cuda/std/detail/libcxx/include/stdexcept>
 
+#include <cuda/experimental/__memory_resource/any_resource.cuh>
 #include <cuda/experimental/__memory_resource/properties.cuh>
 
 //! @file
@@ -165,15 +165,15 @@ private:
   template <class _Resource>
   _CCCL_NODISCARD bool __equal_to(_Resource const& __rhs) const noexcept
   {
-    if constexpr (has_property<_Resource, mr::device_accessible>)
+    if constexpr (has_property<_Resource, device_accessible>)
     {
-      return _CUDA_VMR::resource_ref<mr::device_accessible>{const_cast<managed_memory_resource*>(this)}
-          == _CUDA_VMR::resource_ref<mr::device_accessible>{const_cast<_Resource&>(__rhs)};
+      return resource_ref<device_accessible>{*const_cast<managed_memory_resource*>(this)}
+          == __cudax::__as_resource_ref<device_accessible>(const_cast<_Resource&>(__rhs));
     }
-    else if constexpr (has_property<_Resource, mr::host_accessible>)
+    else if constexpr (has_property<_Resource, host_accessible>)
     {
-      return _CUDA_VMR::resource_ref<mr::host_accessible>{const_cast<managed_memory_resource*>(this)}
-          == _CUDA_VMR::resource_ref<mr::host_accessible>{const_cast<_Resource&>(__rhs)};
+      return resource_ref<host_accessible>{*const_cast<managed_memory_resource*>(this)}
+          == __cudax::__as_resource_ref<host_accessible>(const_cast<_Resource&>(__rhs));
     }
     else
     {
@@ -224,9 +224,9 @@ public:
 #  endif // _CCCL_STD_VER <= 2017
 
   //! @brief Enables the \c device_accessible property
-  friend constexpr void get_property(managed_memory_resource const&, mr::device_accessible) noexcept {}
+  friend constexpr void get_property(managed_memory_resource const&, device_accessible) noexcept {}
   //! @brief Enables the \c host_accessible property
-  friend constexpr void get_property(managed_memory_resource const&, mr::host_accessible) noexcept {}
+  friend constexpr void get_property(managed_memory_resource const&, host_accessible) noexcept {}
 #endif // _CCCL_DOXYGEN_INVOKED
 
   //! @brief Checks whether the passed in alignment is valid
@@ -236,8 +236,8 @@ public:
         && (_CUDA_VMR::default_cuda_malloc_alignment % __alignment == 0);
   }
 };
-static_assert(_CUDA_VMR::async_resource_with<managed_memory_resource, mr::device_accessible>, "");
-static_assert(_CUDA_VMR::async_resource_with<managed_memory_resource, mr::host_accessible>, "");
+static_assert(_CUDA_VMR::async_resource_with<managed_memory_resource, device_accessible>, "");
+static_assert(_CUDA_VMR::async_resource_with<managed_memory_resource, host_accessible>, "");
 
 } // namespace cuda::experimental
 
diff --git a/cudax/include/cuda/experimental/__memory_resource/pinned_memory_resource.cuh b/cudax/include/cuda/experimental/__memory_resource/pinned_memory_resource.cuh
index fa3d2c44f8a..68ccaf2344b 100644
--- a/cudax/include/cuda/experimental/__memory_resource/pinned_memory_resource.cuh
+++ b/cudax/include/cuda/experimental/__memory_resource/pinned_memory_resource.cuh
@@ -29,11 +29,11 @@
 #include <cuda/__memory_resource/get_property.h>
 #include <cuda/__memory_resource/properties.h>
 #include <cuda/__memory_resource/resource.h>
-#include <cuda/__memory_resource/resource_ref.h>
 #include <cuda/std/__concepts/concept_macros.h>
 #include <cuda/std/__cuda/api_wrapper.h>
 #include <cuda/std/detail/libcxx/include/stdexcept>
 
+#include <cuda/experimental/__memory_resource/any_resource.cuh>
 #include <cuda/experimental/__memory_resource/properties.cuh>
 
 //! @file
@@ -166,13 +166,13 @@ public:
   {
     if constexpr (has_property<_Resource, device_accessible>)
     {
-      return _CUDA_VMR::resource_ref<device_accessible>{const_cast<pinned_memory_resource*>(this)}
-          == _CUDA_VMR::resource_ref<device_accessible>{const_cast<_Resource&>(__rhs)};
+      return resource_ref<device_accessible>{*const_cast<pinned_memory_resource*>(this)}
+          == __cudax::__as_resource_ref<device_accessible>(const_cast<_Resource&>(__rhs));
     }
     else if constexpr (has_property<_Resource, host_accessible>)
     {
-      return _CUDA_VMR::resource_ref<host_accessible>{const_cast<pinned_memory_resource*>(this)}
-          == _CUDA_VMR::resource_ref<host_accessible>{const_cast<_Resource&>(__rhs)};
+      return resource_ref<host_accessible>{*const_cast<pinned_memory_resource*>(this)}
+          == __cudax::__as_resource_ref<host_accessible>(const_cast<_Resource&>(__rhs));
     }
     else
     {
diff --git a/cudax/include/cuda/experimental/__memory_resource/shared_resource.cuh b/cudax/include/cuda/experimental/__memory_resource/shared_resource.cuh
index 03bae6c4b0f..4c71f5cfe2e 100644
--- a/cudax/include/cuda/experimental/__memory_resource/shared_resource.cuh
+++ b/cudax/include/cuda/experimental/__memory_resource/shared_resource.cuh
@@ -121,7 +121,7 @@ struct shared_resource
   //! held by this \c shared_resource object is released, while the reference held by \c __other
   //! is transferred to this object.
   //! @param __other The \c shared_resource object to move from.
-  /// @post \c __other is left in a valid but unspecified state.
+  //! @post \c __other is left in a valid but unspecified state.
   shared_resource& operator=(shared_resource&& __other) noexcept
   {
     if (this != &__other)
diff --git a/cudax/include/cuda/experimental/__utility/basic_any/basic_any_base.cuh b/cudax/include/cuda/experimental/__utility/basic_any/basic_any_base.cuh
index 248c3b05e01..b5b4e622a6f 100644
--- a/cudax/include/cuda/experimental/__utility/basic_any/basic_any_base.cuh
+++ b/cudax/include/cuda/experimental/__utility/basic_any/basic_any_base.cuh
@@ -43,12 +43,23 @@ _CUDAX_HOST_API auto __is_basic_any_test(basic_any<_Interface>&) -> basic_any<_I
 template <class _Interface>
 _CUDAX_HOST_API auto __is_basic_any_test(basic_any<_Interface> const&) -> basic_any<_Interface> const&;
 
+#if _CCCL_COMPILER(CLANG, <, 12) || _CCCL_COMPILER(GCC, <, 11)
+// Older versions of clang and gcc need help disambiguating between
+// basic_any<__ireference<I>> and basic_any<I&>.
+template <class _Interface>
+_CUDAX_HOST_API auto __is_basic_any_test(basic_any<_Interface&>&&) -> basic_any<_Interface&>&&;
+template <class _Interface>
+_CUDAX_HOST_API auto __is_basic_any_test(basic_any<_Interface&>&) -> basic_any<_Interface&>&;
+template <class _Interface>
+_CUDAX_HOST_API auto __is_basic_any_test(basic_any<_Interface&> const&) -> basic_any<_Interface&> const&;
+#endif
+
 // clang-format off
 template <class _Tp>
 _CCCL_CONCEPT __is_basic_any =
   _CCCL_REQUIRES_EXPR((_Tp), _Tp& __value)
   (
-    __is_basic_any_test(__value)
+    __cudax::__is_basic_any_test(__value)
   );
 // clang-format on
 
diff --git a/cudax/include/cuda/experimental/__utility/basic_any/conversions.cuh b/cudax/include/cuda/experimental/__utility/basic_any/conversions.cuh
index b447d27c244..412604c6601 100644
--- a/cudax/include/cuda/experimental/__utility/basic_any/conversions.cuh
+++ b/cudax/include/cuda/experimental/__utility/basic_any/conversions.cuh
@@ -23,9 +23,10 @@
 #  pragma system_header
 #endif // no system header
 
-#include <cuda/std/__type_traits/conditional.h>
+#include <cuda/std/__concepts/convertible_to.h>
 #include <cuda/std/__type_traits/is_convertible.h>
 #include <cuda/std/__type_traits/remove_reference.h>
+#include <cuda/std/__type_traits/type_list.h>
 
 #include <cuda/experimental/__utility/basic_any/basic_any_fwd.cuh>
 #include <cuda/experimental/__utility/basic_any/interfaces.cuh>
@@ -43,97 +44,118 @@ namespace cuda::experimental
 //! cvref qualified basic_any types to archetype types, and then using
 //! the built-in language rules to determine if the conversion is valid.
 //!
-struct __immovable_archetype
+template <bool _Movable, bool _Copyable>
+struct __archetype;
+
+// Archetype for interfaces that extend neither imovable nor icopyable
+template <>
+struct __archetype<false, false> // immovable archetype
 {
-  __immovable_archetype()                             = default;
-  __immovable_archetype(__immovable_archetype&&)      = delete;
-  __immovable_archetype(const __immovable_archetype&) = delete;
+  __archetype()                   = default;
+  __archetype(__archetype&&)      = delete;
+  __archetype(const __archetype&) = delete;
 
   template <class _Value>
-  _CUDAX_HOST_API __immovable_archetype(_Value) noexcept;
+  _CUDAX_HOST_API __archetype(_Value) noexcept;
   template <class _Value>
-  _CUDAX_HOST_API __immovable_archetype(_Value*) = delete;
+  _CUDAX_HOST_API __archetype(_Value*) = delete;
 };
 
-struct __movable_archetype : __immovable_archetype
+// Archetype for interfaces that extend imovable but not icopyable
+template <>
+struct __archetype<true, false> : __archetype<false, false> // movable archetype
 {
-  __movable_archetype() = default;
-  _CUDAX_HOST_API __movable_archetype(__movable_archetype&&) noexcept;
-  __movable_archetype(const __movable_archetype&) = delete;
+  __archetype() = default;
+  _CUDAX_HOST_API __archetype(__archetype&&) noexcept;
+  __archetype(const __archetype&) = delete;
 };
 
-struct __copyable_archetype : __movable_archetype
+// Archetype for interfaces that extend icopyable
+template <>
+struct __archetype<true, true> : __archetype<true, false>
 {
-  __copyable_archetype() = default;
-  _CUDAX_HOST_API __copyable_archetype(__copyable_archetype const&);
+  __archetype() = default;
+  _CUDAX_HOST_API __archetype(__archetype const&);
 };
 
 template <class _Interface>
-using _archetype_base = _CUDA_VSTD::conditional_t<
-  extension_of<_Interface, icopyable<>>,
-  __copyable_archetype,
-  _CUDA_VSTD::conditional_t<extension_of<_Interface, imovable<>>, __movable_archetype, __immovable_archetype>>;
+using __archetype_t _CCCL_NODEBUG_ALIAS =
+  __archetype<extension_of<_Interface, imovable<>>, extension_of<_Interface, icopyable<>>>;
+
+// Strip top-level cv- and ref-qualifiers from pointer types:
+template <class _Ty>
+auto __normalize(_Ty&&) -> _Ty
+{}
+template <class _Ty>
+auto __normalize(_Ty*) -> _Ty*
+{}
 
+template <class _Ty>
+using __normalize_t _CCCL_NODEBUG_ALIAS = decltype(__cudax::__normalize(declval<_Ty>()));
+
+// Used to map a basic_any specialization to a normalized interface type:
+template <class _Ty>
+extern _CUDA_VSTD::__undefined<_Ty> __interface_from;
 template <class _Interface>
-_CUDAX_HOST_API auto __interface_from(basic_any<_Interface>&&) -> _Interface;
+extern _Interface __interface_from<basic_any<_Interface>>;
 template <class _Interface>
-_CUDAX_HOST_API auto __interface_from(basic_any<__ireference<_Interface>>&&) -> _Interface;
+extern _Interface __interface_from<basic_any<__ireference<_Interface>>>;
 template <class _Interface>
-_CUDAX_HOST_API auto __interface_from(basic_any<_Interface>&) -> _Interface&;
+extern _Interface& __interface_from<basic_any<_Interface>&>;
 template <class _Interface>
-_CUDAX_HOST_API auto __interface_from(basic_any<_Interface> const&) -> _Interface const&;
+extern _Interface const& __interface_from<basic_any<_Interface> const&>;
 template <class _Interface>
-_CUDAX_HOST_API auto __interface_from(basic_any<_Interface>*) -> _Interface*;
+extern _Interface* __interface_from<basic_any<_Interface>*>;
 template <class _Interface>
-_CUDAX_HOST_API auto __interface_from(basic_any<_Interface> const*) -> _Interface const*;
+extern _Interface const* __interface_from<basic_any<_Interface> const*>;
 template <class _Interface>
-_CUDAX_HOST_API auto __interface_from(basic_any<__ireference<_Interface>>*) -> _Interface*;
+extern _Interface* __interface_from<basic_any<__ireference<_Interface>>*>;
 template <class _Interface>
-_CUDAX_HOST_API auto __interface_from(basic_any<__ireference<_Interface>> const*) -> _Interface*;
+extern _Interface* __interface_from<basic_any<__ireference<_Interface>> const*>;
 
+// Used to map a normalized interface type to an archetype for conversion testing:
 template <class _Interface>
-_CUDAX_HOST_API auto __as_archetype(_Interface&&) -> _archetype_base<_Interface>;
+extern __archetype_t<_Interface> __as_archetype;
 template <class _Interface>
-_CUDAX_HOST_API auto __as_archetype(_Interface&) -> _archetype_base<_Interface>&;
+extern __archetype_t<_Interface>& __as_archetype<_Interface&>;
 template <class _Interface>
-_CUDAX_HOST_API auto __as_archetype(_Interface const&) -> _archetype_base<_Interface> const&;
+extern __archetype_t<_Interface> const& __as_archetype<_Interface const&>;
 template <class _Interface>
-_CUDAX_HOST_API auto __as_archetype(_Interface*) -> _archetype_base<_Interface>*;
+extern __archetype_t<_Interface>* __as_archetype<_Interface*>;
 template <class _Interface>
-_CUDAX_HOST_API auto __as_archetype(_Interface const*) -> _archetype_base<_Interface> const*;
+extern __archetype_t<_Interface> const* __as_archetype<_Interface const*>;
 template <class _Interface>
-_CUDAX_HOST_API auto __as_archetype(__ireference<_Interface>) -> _archetype_base<_Interface>&;
+extern __archetype_t<_Interface>& __as_archetype<__ireference<_Interface>>;
 template <class _Interface>
-_CUDAX_HOST_API auto __as_archetype(__ireference<_Interface const>) -> _archetype_base<_Interface> const&;
+extern __archetype_t<_Interface> const& __as_archetype<__ireference<_Interface const>>;
 
-template <class _Interface>
-_CUDAX_HOST_API auto __as_immovable(_Interface&&) -> __immovable_archetype;
-template <class _Interface>
-_CUDAX_HOST_API auto __as_immovable(_Interface&) -> __immovable_archetype&;
-template <class _Interface>
-_CUDAX_HOST_API auto __as_immovable(_Interface const&) -> __immovable_archetype const&;
-template <class _Interface>
-_CUDAX_HOST_API auto __as_immovable(_Interface*) -> __immovable_archetype*;
-template <class _Interface>
-_CUDAX_HOST_API auto __as_immovable(_Interface const*) -> __immovable_archetype const*;
+// Used to map an archetype to an immovable archetype
+template <class _Archetype>
+extern __archetype<false, false> __as_immovable;
+template <class _Archetype>
+extern __archetype<false, false>& __as_immovable<_Archetype&>;
+template <class _Archetype>
+extern __archetype<false, false> const& __as_immovable<_Archetype const&>;
+template <class _Archetype>
+extern __archetype<false, false>* __as_immovable<_Archetype*>;
+template <class _Archetype>
+extern __archetype<false, false> const* __as_immovable<_Archetype const*>;
 
-template <class CvAny>
-using __normalized_interface_of _CCCL_NODEBUG_ALIAS = decltype(__cudax::__interface_from(declval<CvAny>()));
+template <class _CvAny>
+using __normalized_interface_of _CCCL_NODEBUG_ALIAS = __normalize_t<decltype(__interface_from<__normalize_t<_CvAny>>)>;
 
-template <class CvAny>
-using __src_archetype_of _CCCL_NODEBUG_ALIAS =
-  decltype(__cudax::__as_archetype(__cudax::__interface_from(declval<CvAny>())));
+template <class _CvAny>
+using __src_archetype_of _CCCL_NODEBUG_ALIAS = decltype(__as_archetype<__normalized_interface_of<_CvAny>>);
 
-template <class CvAny>
-using __dst_archetype_of _CCCL_NODEBUG_ALIAS =
-  decltype(__cudax::__as_immovable(__cudax::__as_archetype(__cudax::__interface_from(declval<CvAny>()))));
+template <class _CvAny>
+using __dst_archetype_of _CCCL_NODEBUG_ALIAS = decltype(__as_immovable<__src_archetype_of<_CvAny>>);
 
 // If the archetypes are implicitly convertible, then it is possible to
 // dynamically cast from the source to the destination. The cast may fail,
 // but at least it is possible.
 template <class _SrcCvAny, class _DstCvAny>
 _CCCL_CONCEPT __any_castable_to =
-  _CUDA_VSTD::is_convertible_v<__src_archetype_of<_SrcCvAny>, __dst_archetype_of<_DstCvAny>>;
+  _CUDA_VSTD::convertible_to<__src_archetype_of<_SrcCvAny>, __dst_archetype_of<_DstCvAny>>;
 
 // If the archetypes are implicitly convertible **and** the source interface
 // is an extension of the destination one, then it is possible to implicitly
diff --git a/cudax/include/cuda/experimental/__utility/basic_any/interfaces.cuh b/cudax/include/cuda/experimental/__utility/basic_any/interfaces.cuh
index 71a0506330f..435e43ee699 100644
--- a/cudax/include/cuda/experimental/__utility/basic_any/interfaces.cuh
+++ b/cudax/include/cuda/experimental/__utility/basic_any/interfaces.cuh
@@ -164,7 +164,7 @@ template <class _Tp>
 _CCCL_CONCEPT __is_interface =
   _CCCL_REQUIRES_EXPR((_Tp), _Tp& __value)
   (
-    __is_interface_test(__value)
+    __cudax::__is_interface_test(__value)
   );
 // clang-format on
 
diff --git a/cudax/include/cuda/experimental/__utility/basic_any/semiregular.cuh b/cudax/include/cuda/experimental/__utility/basic_any/semiregular.cuh
index 13759edfbcd..140770f015f 100644
--- a/cudax/include/cuda/experimental/__utility/basic_any/semiregular.cuh
+++ b/cudax/include/cuda/experimental/__utility/basic_any/semiregular.cuh
@@ -22,9 +22,13 @@
 #endif // no system header
 
 #include <cuda/std/__concepts/concept_macros.h>
+#include <cuda/std/__concepts/convertible_to.h>
 #include <cuda/std/__concepts/copyable.h>
 #include <cuda/std/__concepts/equality_comparable.h>
 #include <cuda/std/__concepts/movable.h>
+#include <cuda/std/__memory/addressof.h>
+#include <cuda/std/__type_traits/decay.h>
+#include <cuda/std/__type_traits/is_same.h>
 #include <cuda/std/__type_traits/type_identity.h>
 #include <cuda/std/__utility/typeid.h>
 #include <cuda/std/__utility/unreachable.h>
@@ -114,6 +118,13 @@ __equal_fn(_Tp const& __self, _CUDA_VSTD::__type_info_ref __type, void const* __
   return false;
 }
 
+_CCCL_TEMPLATE(class _From, class _To)
+_CCCL_REQUIRES(_CUDA_VSTD::convertible_to<_From, _To>)
+_CCCL_NODISCARD _CUDAX_PUBLIC_API _To __conversion_fn(_CUDA_VSTD::type_identity_t<_From> __self)
+{
+  return static_cast<_To>(static_cast<_From&&>(__self));
+}
+
 //!
 //! semi-regular interfaces
 //!
@@ -251,6 +262,76 @@ template <class... _Super>
 struct iequality_comparable : iequality_comparable_base
 {};
 
+struct self; // a nice placeholder type
+
+template <class _CvSelf, class _To>
+struct __iconvertible_to
+{
+  static_assert(_CUDA_VSTD::is_same_v<_CUDA_VSTD::decay_t<_CvSelf>, self>,
+                "The first template parameter to iconvertible_to must be the placeholder type "
+                "cuda::experimental::self, possibly with cv- and/or ref-qualifiers");
+};
+
+template <class _To>
+struct __iconvertible_to<self&&, _To>
+{
+  template <class>
+  struct __always_false : _CUDA_VSTD::false_type
+  {};
+
+  static_assert(__always_false<_To>::value, "rvalue-qualified conversion operations are not yet supported");
+};
+
+template <class _To>
+struct __iconvertible_to<self, _To>
+{
+  template <class...>
+  struct __interface_ : interface<__interface_>
+  {
+    _CCCL_NODISCARD _CUDAX_HOST_API operator _To()
+    {
+      return __cudax::virtcall<__conversion_fn<__interface_, _To>>(this);
+    }
+
+    template <class _From>
+    using overrides = overrides_for<_From, _CUDAX_FNPTR_CONSTANT_WAR(&__conversion_fn<_From, _To>)>;
+  };
+};
+
+template <class _To>
+struct __iconvertible_to<self&, _To>
+{
+  template <class...>
+  struct __interface_ : interface<__interface_>
+  {
+    _CCCL_NODISCARD _CUDAX_HOST_API operator _To() &
+    {
+      return __cudax::virtcall<&__conversion_fn<__interface_&, _To>>(this);
+    }
+
+    template <class _From>
+    using overrides = overrides_for<_From, _CUDAX_FNPTR_CONSTANT_WAR(&__conversion_fn<_From&, _To>)>;
+  };
+};
+
+template <class _To>
+struct __iconvertible_to<self const&, _To>
+{
+  template <class...>
+  struct __interface_ : interface<__interface_>
+  {
+    _CCCL_NODISCARD _CUDAX_HOST_API operator _To() const&
+    {
+      return __cudax::virtcall<&__conversion_fn<__interface_ const&, _To>>(this);
+    }
+
+    template <class _From>
+    using overrides = overrides_for<_From, _CUDAX_FNPTR_CONSTANT_WAR(&__conversion_fn<_From const&, _To>)>;
+  };
+};
+
+template <class _From, class _To>
+using iconvertible_to _CCCL_NODEBUG_ALIAS = typename __iconvertible_to<_From, _To>::template __interface_<>;
 } // namespace cuda::experimental
 
 _CCCL_POP_MACROS
diff --git a/cudax/test/containers/uninitialized_buffer.cu b/cudax/test/containers/uninitialized_buffer.cu
index aadd16253fe..3e5c48c0eff 100644
--- a/cudax/test/containers/uninitialized_buffer.cu
+++ b/cudax/test/containers/uninitialized_buffer.cu
@@ -23,7 +23,6 @@
 #include <cuda/experimental/stream.cuh>
 
 #include "testing.cuh"
-#include <catch2/catch.hpp>
 
 struct do_not_construct
 {
@@ -61,6 +60,19 @@ constexpr int get_property(const cudax::device_memory_resource&, my_property)
   return 42;
 }
 
+__global__ void kernel(_CUDA_VSTD::span<int> data)
+{
+  // Touch the memory to be sure it's accessible
+  CUDAX_CHECK(data.size() == 1024);
+  data[0] = 42;
+}
+
+__global__ void const_kernel(_CUDA_VSTD::span<const int> data)
+{
+  // Touch the memory to be sure it's accessible
+  CUDAX_CHECK(data.size() == 1024);
+}
+
 TEMPLATE_TEST_CASE(
   "uninitialized_buffer", "[container]", char, short, int, long, long long, float, double, do_not_construct)
 {
@@ -204,19 +216,6 @@ TEMPLATE_TEST_CASE(
   }
 }
 
-__global__ void kernel(_CUDA_VSTD::span<int> data)
-{
-  // Touch the memory to be sure it's accessible
-  CUDAX_CHECK(data.size() == 1024);
-  data[0] = 42;
-}
-
-__global__ void const_kernel(_CUDA_VSTD::span<const int> data)
-{
-  // Touch the memory to be sure it's accessible
-  CUDAX_CHECK(data.size() == 1024);
-}
-
 TEST_CASE("uninitialized_buffer is usable with cudax::launch", "[container]")
 {
   SECTION("non-const")
diff --git a/cudax/test/memory_resource/any_async_resource.cu b/cudax/test/memory_resource/any_async_resource.cu
index 89c28b8a279..c491c9efa21 100644
--- a/cudax/test/memory_resource/any_async_resource.cu
+++ b/cudax/test/memory_resource/any_async_resource.cu
@@ -14,18 +14,20 @@
 #include <catch2/catch.hpp>
 #include <testing.cuh>
 
+#ifndef __CUDA_ARCH__
+
 TEMPLATE_TEST_CASE_METHOD(test_fixture, "any_async_resource", "[container][resource]", big_resource, small_resource)
 {
   using TestResource = TestType;
-  static_assert(cuda::mr::resource_with<TestResource, cuda::mr::host_accessible>);
-  constexpr bool is_big = sizeof(TestResource) > sizeof(cuda::mr::_AnyResourceStorage);
+  static_assert(cuda::mr::resource_with<TestResource, cudax::host_accessible>);
+  constexpr bool is_big = sizeof(TestResource) > cudax::__default_buffer_size;
 
   SECTION("construct and destruct")
   {
     Counts expected{};
     CHECK(this->counts == expected);
     {
-      cudax::any_async_resource<cuda::mr::host_accessible> mr{TestResource{42, this}};
+      cudax::any_async_resource<cudax::host_accessible> mr{TestResource{42, this}};
       expected.new_count += is_big;
       ++expected.object_count;
       ++expected.move_count;
@@ -44,7 +46,7 @@ TEMPLATE_TEST_CASE_METHOD(test_fixture, "any_async_resource", "[container][resou
     Counts expected{};
     CHECK(this->counts == expected);
     {
-      cudax::any_async_resource<cuda::mr::host_accessible> mr{TestResource{42, this}};
+      cudax::any_async_resource<cudax::host_accessible> mr{TestResource{42, this}};
       expected.new_count += is_big;
       ++expected.object_count;
       ++expected.move_count;
@@ -79,7 +81,7 @@ TEMPLATE_TEST_CASE_METHOD(test_fixture, "any_async_resource", "[container][resou
     Counts expected{};
     CHECK(this->counts == expected);
     {
-      cudax::any_async_resource<cuda::mr::host_accessible> mr{TestResource{42, this}};
+      cudax::any_async_resource<cudax::host_accessible> mr{TestResource{42, this}};
       expected.new_count += is_big;
       ++expected.object_count;
       ++expected.move_count;
@@ -108,7 +110,7 @@ TEMPLATE_TEST_CASE_METHOD(test_fixture, "any_async_resource", "[container][resou
     CHECK(this->counts == expected);
     {
       cudax::stream stream{};
-      cudax::any_async_resource<cuda::mr::host_accessible> mr{TestResource{42, this}};
+      cudax::any_async_resource<cudax::host_accessible> mr{TestResource{42, this}};
       expected.new_count += is_big;
       ++expected.object_count;
       ++expected.move_count;
@@ -135,13 +137,13 @@ TEMPLATE_TEST_CASE_METHOD(test_fixture, "any_async_resource", "[container][resou
   {
     Counts expected{};
     {
-      cudax::any_async_resource<cuda::mr::host_accessible> mr{TestResource{42, this}};
+      cudax::any_async_resource<cudax::host_accessible> mr{TestResource{42, this}};
       expected.new_count += is_big;
       ++expected.object_count;
       ++expected.move_count;
       CHECK(this->counts == expected);
 
-      cuda::mr::resource_ref<cuda::mr::host_accessible> ref = mr;
+      cudax::resource_ref<cudax::host_accessible> ref = mr;
 
       CHECK(this->counts == expected);
       auto* ptr = ref.allocate(bytes(100), align(8));
@@ -165,8 +167,8 @@ TEMPLATE_TEST_CASE_METHOD(test_fixture, "any_async_resource", "[container][resou
     Counts expected{};
     CHECK(this->counts == expected);
     {
-      cudax::any_async_resource<cuda::mr::host_accessible> mr =
-        cudax::make_any_async_resource<TestResource, cuda::mr::host_accessible>(42, this);
+      cudax::any_async_resource<cudax::host_accessible> mr =
+        cudax::make_any_async_resource<TestResource, cudax::host_accessible>(42, this);
       expected.new_count += is_big;
       ++expected.object_count;
       CHECK(this->counts == expected);
@@ -178,3 +180,5 @@ TEMPLATE_TEST_CASE_METHOD(test_fixture, "any_async_resource", "[container][resou
   // Reset the counters:
   this->counts = Counts();
 }
+
+#endif // __CUDA_ARCH__
diff --git a/cudax/test/memory_resource/any_resource.cu b/cudax/test/memory_resource/any_resource.cu
index c013785f32f..c7aae10fea5 100644
--- a/cudax/test/memory_resource/any_resource.cu
+++ b/cudax/test/memory_resource/any_resource.cu
@@ -8,27 +8,37 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include <cuda/memory_resource>
+
 #include <cuda/experimental/memory_resource.cuh>
 
 #include "test_resource.cuh"
-#include <catch2/catch.hpp>
 #include <testing.cuh>
 
+static_assert(cuda::has_property<cudax::any_resource<cudax::host_accessible, get_data>, cudax::host_accessible>);
+static_assert(cuda::has_property<cudax::any_resource<cudax::host_accessible, get_data>, get_data>);
+static_assert(!cuda::has_property<cudax::any_resource<cudax::host_accessible, get_data>, cudax::device_accessible>);
+
+struct unused_property
+{};
+
 TEMPLATE_TEST_CASE_METHOD(test_fixture, "any_resource", "[container][resource]", big_resource, small_resource)
 {
   using TestResource    = TestType;
-  constexpr bool is_big = sizeof(TestResource) > sizeof(cuda::mr::_AnyResourceStorage);
+  constexpr bool is_big = sizeof(TestResource) > cudax::__default_buffer_size;
 
   SECTION("construct and destruct")
   {
     Counts expected{};
     CHECK(this->counts == expected);
     {
-      cudax::any_resource<cuda::mr::host_accessible> mr{TestResource{42, this}};
+      cudax::any_resource<cudax::host_accessible, get_data> mr{TestResource{42, this}};
       expected.new_count += is_big;
       ++expected.object_count;
       ++expected.move_count;
       CHECK(this->counts == expected);
+      CHECK(get_property(mr, get_data{}) == 42);
+      get_property(mr, cudax::host_accessible{});
     }
     expected.delete_count += is_big;
     --expected.object_count;
@@ -43,7 +53,7 @@ TEMPLATE_TEST_CASE_METHOD(test_fixture, "any_resource", "[container][resource]",
     Counts expected{};
     CHECK(this->counts == expected);
     {
-      cudax::any_resource<cuda::mr::host_accessible> mr{TestResource{42, this}};
+      cudax::any_resource<cudax::host_accessible, get_data> mr{TestResource{42, this}};
       expected.new_count += is_big;
       ++expected.object_count;
       ++expected.move_count;
@@ -54,14 +64,14 @@ TEMPLATE_TEST_CASE_METHOD(test_fixture, "any_resource", "[container][resource]",
       ++expected.copy_count;
       ++expected.object_count;
       CHECK(this->counts == expected);
-      CHECK(mr == mr2);
+      CHECK((mr == mr2));
       ++expected.equal_to_count;
       CHECK(this->counts == expected);
 
       auto mr3 = std::move(mr);
       expected.move_count += !is_big; // for big resources, move is a pointer swap
       CHECK(this->counts == expected);
-      CHECK(mr2 == mr3);
+      CHECK((mr2 == mr3));
       ++expected.equal_to_count;
       CHECK(this->counts == expected);
     }
@@ -78,7 +88,7 @@ TEMPLATE_TEST_CASE_METHOD(test_fixture, "any_resource", "[container][resource]",
     Counts expected{};
     CHECK(this->counts == expected);
     {
-      cudax::any_resource<cuda::mr::host_accessible> mr{TestResource{42, this}};
+      cudax::any_resource<cudax::host_accessible, get_data> mr{TestResource{42, this}};
       expected.new_count += is_big;
       ++expected.object_count;
       ++expected.move_count;
@@ -101,17 +111,69 @@ TEMPLATE_TEST_CASE_METHOD(test_fixture, "any_resource", "[container][resource]",
   // Reset the counters:
   this->counts = Counts();
 
-  SECTION("conversion to resource_ref")
+  SECTION("equality comparable")
+  {
+    Counts expected{};
+    CHECK(this->counts == expected);
+    {
+      cudax::managed_memory_resource managed1{}, managed2{};
+      CHECK(managed1 == managed2);
+      cudax::any_resource<cudax::device_accessible> mr{managed1};
+      CHECK(mr == managed1);
+    }
+    CHECK(this->counts == expected);
+  }
+
+  // Reset the counters:
+  this->counts = Counts();
+
+  SECTION("conversion from any_resource to cudax::resource_ref")
+  {
+    Counts expected{};
+    {
+      cudax::any_resource<cudax::host_accessible, cudax::device_accessible, get_data> mr{TestResource{42, this}};
+      expected.new_count += is_big;
+      ++expected.object_count;
+      ++expected.move_count;
+      CHECK(this->counts == expected);
+
+      // conversion from any_resource to cuda::mr::resource_ref:
+      cuda::mr::resource_ref<cudax::host_accessible, cudax::device_accessible, get_data> ref = mr;
+
+      // conversion from any_resource to cuda::mr::resource_ref with narrowing:
+      cuda::mr::resource_ref<cudax::host_accessible, get_data> ref2 = mr;
+      CHECK(get_property(ref2, get_data{}) == 42);
+
+      CHECK(this->counts == expected);
+      auto* ptr = ref.allocate(bytes(100), align(8));
+      CHECK(ptr == this);
+      ++expected.allocate_count;
+      CHECK(this->counts == expected);
+      ref.deallocate(ptr, bytes(0), align(0));
+      ++expected.deallocate_count;
+      CHECK(this->counts == expected);
+    }
+    expected.delete_count += is_big;
+    --expected.object_count;
+    CHECK(this->counts == expected);
+  }
+
+  SECTION("conversion from any_resource to cuda::mr::resource_ref")
   {
     Counts expected{};
     {
-      cudax::any_resource<cuda::mr::host_accessible> mr{TestResource{42, this}};
+      cudax::any_resource<cudax::host_accessible, cudax::device_accessible, get_data> mr{TestResource{42, this}};
       expected.new_count += is_big;
       ++expected.object_count;
       ++expected.move_count;
       CHECK(this->counts == expected);
 
-      cuda::mr::resource_ref<cuda::mr::host_accessible> ref = mr;
+      // conversion from any_resource to cuda::mr::resource_ref:
+      cuda::mr::resource_ref<cudax::host_accessible, cudax::device_accessible, get_data> ref = mr;
+
+      // conversion from any_resource to cuda::mr::resource_ref with narrowing:
+      cuda::mr::resource_ref<cudax::host_accessible, get_data> ref2 = mr;
+      CHECK(get_property(ref2, get_data{}) == 42);
 
       CHECK(this->counts == expected);
       auto* ptr = ref.allocate(bytes(100), align(8));
@@ -130,13 +192,83 @@ TEMPLATE_TEST_CASE_METHOD(test_fixture, "any_resource", "[container][resource]",
   // Reset the counters:
   this->counts = Counts();
 
+  SECTION("conversion from resource_ref to any_resource")
+  {
+    Counts expected{};
+    {
+      TestResource test{42, this};
+      ++expected.object_count;
+      cudax::resource_ref<cudax::host_accessible, get_data> ref{test};
+      CHECK(this->counts == expected);
+
+      cudax::any_resource<cudax::host_accessible, get_data> mr = ref;
+      expected.new_count += is_big;
+      ++expected.object_count;
+      ++expected.copy_count;
+      CHECK(this->counts == expected);
+
+      auto* ptr = ref.allocate(bytes(100), align(8));
+      CHECK(ptr == this);
+      ++expected.allocate_count;
+      CHECK(this->counts == expected);
+      ref.deallocate(ptr, bytes(0), align(0));
+      ++expected.deallocate_count;
+      CHECK(this->counts == expected);
+    }
+    expected.delete_count += is_big;
+    expected.object_count -= 2;
+    CHECK(this->counts == expected);
+  }
+
+  // Reset the counters:
+  this->counts = Counts();
+
+  SECTION("test slicing off of properties")
+  {
+    Counts expected{};
+    CHECK(this->counts == expected);
+    {
+      cudax::any_resource<cudax::host_accessible, cudax::device_accessible, get_data> mr{TestResource{42, this}};
+      expected.new_count += is_big;
+      ++expected.object_count;
+      ++expected.move_count;
+      CHECK(this->counts == expected);
+
+      cudax::any_resource<cudax::device_accessible, get_data> mr2 = mr;
+      expected.new_count += is_big;
+      ++expected.object_count;
+      ++expected.copy_count;
+      CHECK(this->counts == expected);
+
+      CHECK(get_property(mr2, get_data{}) == 42);
+      auto data = try_get_property(mr2, get_data{});
+      static_assert(cuda::std::is_same_v<decltype(data), cuda::std::optional<int>>);
+      CHECK(data.has_value());
+      CHECK(data.value() == 42);
+
+      auto host = try_get_property(mr2, cudax::host_accessible{});
+      static_assert(cuda::std::is_same_v<decltype(host), bool>);
+      CHECK(host);
+
+      auto unused = try_get_property(mr2, unused_property{});
+      static_assert(cuda::std::is_same_v<decltype(unused), bool>);
+      CHECK(!unused);
+    }
+    expected.delete_count += 2 * is_big;
+    expected.object_count -= 2;
+    CHECK(this->counts == expected);
+  }
+
+  // Reset the counters:
+  this->counts = Counts();
+
   SECTION("make_any_resource")
   {
     Counts expected{};
     CHECK(this->counts == expected);
     {
-      cudax::any_resource<cuda::mr::host_accessible> mr =
-        cudax::make_any_resource<TestResource, cuda::mr::host_accessible>(42, this);
+      cudax::any_resource<cudax::host_accessible, get_data> mr =
+        cudax::make_any_resource<TestResource, cudax::host_accessible, get_data>(42, this);
       expected.new_count += is_big;
       ++expected.object_count;
       CHECK(this->counts == expected);
diff --git a/cudax/test/memory_resource/device_memory_resource.cu b/cudax/test/memory_resource/device_memory_resource.cu
index 6c364f67e50..aefbb8b1bf7 100644
--- a/cudax/test/memory_resource/device_memory_resource.cu
+++ b/cudax/test/memory_resource/device_memory_resource.cu
@@ -87,7 +87,7 @@ TEST_CASE("device_memory_resource construction", "[memory_resource]")
                        current_device);
   }
 
-  using async_resource = cuda::experimental::device_memory_resource;
+  using async_resource = cudax::device_memory_resource;
   SECTION("Default construction")
   {
     {
@@ -99,7 +99,7 @@ TEST_CASE("device_memory_resource construction", "[memory_resource]")
     void* ptr{nullptr};
     _CCCL_TRY_CUDA_API(
       ::cudaMallocAsync,
-      "Failed to allocate with pool passed to cuda::experimental::device_memory_resource",
+      "Failed to allocate with pool passed to cudax::device_memory_resource",
       &ptr,
       42,
       current_default_pool,
@@ -107,10 +107,7 @@ TEST_CASE("device_memory_resource construction", "[memory_resource]")
     CHECK(ptr != nullptr);
 
     _CCCL_ASSERT_CUDA_API(
-      ::cudaFreeAsync,
-      "Failed to deallocate with pool passed to cuda::experimental::device_memory_resource",
-      ptr,
-      ::cudaStream_t{0});
+      ::cudaFreeAsync, "Failed to deallocate with pool passed to cudax::device_memory_resource", ptr, ::cudaStream_t{0});
   }
 
   SECTION("Construct from mempool handle")
@@ -133,7 +130,7 @@ TEST_CASE("device_memory_resource construction", "[memory_resource]")
     void* ptr{nullptr};
     _CCCL_TRY_CUDA_API(
       ::cudaMallocAsync,
-      "Failed to allocate with pool passed to cuda::experimental::device_memory_resource",
+      "Failed to allocate with pool passed to cudax::device_memory_resource",
       &ptr,
       42,
       current_default_pool,
@@ -141,18 +138,15 @@ TEST_CASE("device_memory_resource construction", "[memory_resource]")
     CHECK(ptr != nullptr);
 
     _CCCL_ASSERT_CUDA_API(
-      ::cudaFreeAsync,
-      "Failed to deallocate with pool passed to cuda::experimental::device_memory_resource",
-      ptr,
-      ::cudaStream_t{0});
+      ::cudaFreeAsync, "Failed to deallocate with pool passed to cudax::device_memory_resource", ptr, ::cudaStream_t{0});
   }
 
   SECTION("Construct with initial pool size")
   {
-    cuda::experimental::memory_pool_properties props = {
+    cudax::memory_pool_properties props = {
       42,
     };
-    cuda::experimental::device_memory_pool pool{current_device, props};
+    cudax::device_memory_pool pool{current_device, props};
     async_resource from_initial_pool_size{pool};
 
     ::cudaMemPool_t get = from_initial_pool_size.get();
@@ -170,11 +164,11 @@ TEST_CASE("device_memory_resource construction", "[memory_resource]")
 
   SECTION("Construct with release threshold")
   {
-    cuda::experimental::memory_pool_properties props = {
+    cudax::memory_pool_properties props = {
       42,
       20,
     };
-    cuda::experimental::device_memory_pool pool{current_device, props};
+    cudax::device_memory_pool pool{current_device, props};
     async_resource with_threshold{pool};
 
     ::cudaMemPool_t get = with_threshold.get();
@@ -194,12 +188,12 @@ TEST_CASE("device_memory_resource construction", "[memory_resource]")
 #if _CCCL_CUDACC_AT_LEAST(11, 2)
   SECTION("Construct with allocation handle")
   {
-    cuda::experimental::memory_pool_properties props = {
+    cudax::memory_pool_properties props = {
       42,
       20,
-      cuda::experimental::cudaMemAllocationHandleType::cudaMemHandleTypePosixFileDescriptor,
+      cudax::cudaMemAllocationHandleType::cudaMemHandleTypePosixFileDescriptor,
     };
-    cuda::experimental::device_memory_pool pool{current_device, props};
+    cudax::device_memory_pool pool{current_device, props};
     async_resource with_allocation_handle{pool};
 
     ::cudaMemPool_t get = with_allocation_handle.get();
@@ -228,7 +222,7 @@ static void ensure_device_ptr(void* ptr)
 
 TEST_CASE("device_memory_resource allocation", "[memory_resource]")
 {
-  cuda::experimental::device_memory_resource res{};
+  cudax::device_memory_resource res{};
 
   { // allocate / deallocate
     auto* ptr = res.allocate(42);
@@ -374,13 +368,13 @@ struct resource
 
   template <AccessibilityType Accessibilty2                                         = Accessibility,
             cuda::std::enable_if_t<Accessibilty2 == AccessibilityType::Device, int> = 0>
-  friend void get_property(const resource&, cuda::mr::device_accessible) noexcept
+  friend void get_property(const resource&, cudax::device_accessible) noexcept
   {}
 };
 static_assert(cuda::mr::resource<resource<AccessibilityType::Host>>, "");
-static_assert(!cuda::mr::resource_with<resource<AccessibilityType::Host>, cuda::mr::device_accessible>, "");
+static_assert(!cuda::mr::resource_with<resource<AccessibilityType::Host>, cudax::device_accessible>, "");
 static_assert(cuda::mr::resource<resource<AccessibilityType::Device>>, "");
-static_assert(cuda::mr::resource_with<resource<AccessibilityType::Device>, cuda::mr::device_accessible>, "");
+static_assert(cuda::mr::resource_with<resource<AccessibilityType::Device>, cudax::device_accessible>, "");
 
 template <AccessibilityType Accessibility>
 struct async_resource : public resource<Accessibility>
@@ -392,10 +386,9 @@ struct async_resource : public resource<Accessibility>
   void deallocate_async(void*, size_t, size_t, cuda::stream_ref) {}
 };
 static_assert(cuda::mr::async_resource<async_resource<AccessibilityType::Host>>, "");
-static_assert(!cuda::mr::async_resource_with<async_resource<AccessibilityType::Host>, cuda::mr::device_accessible>, "");
+static_assert(!cuda::mr::async_resource_with<async_resource<AccessibilityType::Host>, cudax::device_accessible>, "");
 static_assert(cuda::mr::async_resource<async_resource<AccessibilityType::Device>>, "");
-static_assert(cuda::mr::async_resource_with<async_resource<AccessibilityType::Device>, cuda::mr::device_accessible>,
-              "");
+static_assert(cuda::mr::async_resource_with<async_resource<AccessibilityType::Device>, cudax::device_accessible>, "");
 
 TEST_CASE("device_memory_resource comparison", "[memory_resource]")
 {
@@ -404,10 +397,10 @@ TEST_CASE("device_memory_resource comparison", "[memory_resource]")
     _CCCL_TRY_CUDA_API(::cudaGetDevice, "Failed to query current device with cudaGetDevice.", &current_device);
   }
 
-  cuda::experimental::device_memory_resource first{};
+  cudax::device_memory_resource first{};
   { // comparison against a plain device_memory_resource
-    cuda::experimental::device_memory_resource second{};
-    CHECK(first == second);
+    cudax::device_memory_resource second{};
+    CHECK((first == second));
     CHECK(!(first != second));
   }
 
@@ -421,27 +414,27 @@ TEST_CASE("device_memory_resource comparison", "[memory_resource]")
       pool_properties.location.id   = current_device;
       _CCCL_TRY_CUDA_API(::cudaMemPoolCreate, "Failed to call cudaMemPoolCreate", &cuda_pool_handle, &pool_properties);
     }
-    cuda::experimental::device_memory_resource second{cuda_pool_handle};
-    CHECK(first != second);
+    cudax::device_memory_resource second{cuda_pool_handle};
+    CHECK((first != second));
     CHECK(!(first == second));
   }
 
   { // comparison against a device_memory_resource wrapped inside a resource_ref<device_accessible>
-    cuda::experimental::device_memory_resource second{};
-    cuda::mr::resource_ref<cuda::mr::device_accessible> second_ref{second};
-    CHECK(first == second_ref);
+    cudax::device_memory_resource second{};
+    cudax::resource_ref<cudax::device_accessible> second_ref{second};
+    CHECK((first == second_ref));
     CHECK(!(first != second_ref));
-    CHECK(second_ref == first);
+    CHECK((second_ref == first));
     CHECK(!(second_ref != first));
   }
 
   { // comparison against a device_memory_resource wrapped inside a async_resource_ref
-    cuda::experimental::device_memory_resource second{};
-    cuda::mr::async_resource_ref<cuda::mr::device_accessible> second_ref{second};
+    cudax::device_memory_resource second{};
+    cudax::async_resource_ref<cudax::device_accessible> second_ref{second};
 
-    CHECK(first == second_ref);
+    CHECK((first == second_ref));
     CHECK(!(first != second_ref));
-    CHECK(second_ref == first);
+    CHECK((second_ref == first));
     CHECK(!(second_ref != first));
   }
 
@@ -449,28 +442,28 @@ TEST_CASE("device_memory_resource comparison", "[memory_resource]")
     resource<AccessibilityType::Host> host_resource{};
     resource<AccessibilityType::Device> device_resource{};
     CHECK(!(first == host_resource));
-    CHECK(first != host_resource);
+    CHECK((first != host_resource));
     CHECK(!(first == device_resource));
-    CHECK(first != device_resource);
+    CHECK((first != device_resource));
 
     CHECK(!(host_resource == first));
-    CHECK(host_resource != first);
+    CHECK((host_resource != first));
     CHECK(!(device_resource == first));
-    CHECK(device_resource != first);
+    CHECK((device_resource != first));
   }
 
   { // comparison against a different resource through resource_ref
     async_resource<AccessibilityType::Host> host_async_resource{};
     async_resource<AccessibilityType::Device> device_async_resource{};
     CHECK(!(first == host_async_resource));
-    CHECK(first != host_async_resource);
+    CHECK((first != host_async_resource));
     CHECK(!(first == device_async_resource));
-    CHECK(first != device_async_resource);
+    CHECK((first != device_async_resource));
 
     CHECK(!(host_async_resource == first));
-    CHECK(host_async_resource != first);
+    CHECK((host_async_resource != first));
     CHECK(!(device_async_resource == first));
-    CHECK(device_async_resource != first);
+    CHECK((device_async_resource != first));
   }
 }
 
diff --git a/cudax/test/memory_resource/managed_memory_resource.cu b/cudax/test/memory_resource/managed_memory_resource.cu
index 0c71ef24a91..1c5836192ba 100644
--- a/cudax/test/memory_resource/managed_memory_resource.cu
+++ b/cudax/test/memory_resource/managed_memory_resource.cu
@@ -214,32 +214,32 @@ TEST_CASE("managed_memory_resource comparison", "[memory_resource]")
   managed_resource first{};
   { // comparison against a plain managed_memory_resource
     managed_resource second{};
-    CHECK(first == second);
+    CHECK((first == second));
     CHECK(!(first != second));
   }
 
   { // comparison against a plain managed_memory_resource with a different pool
     managed_resource second{cudaMemAttachHost};
-    CHECK(first != second);
+    CHECK((first != second));
     CHECK(!(first == second));
   }
 
   { // comparison against a managed_memory_resource wrapped inside a resource_ref<device_accessible>
     managed_resource second{};
-    cuda::mr::resource_ref<cuda::mr::device_accessible> second_ref{second};
-    CHECK(first == second_ref);
+    cuda::mr::resource_ref<cudax::device_accessible> second_ref{second};
+    CHECK((first == second_ref));
     CHECK(!(first != second_ref));
-    CHECK(second_ref == first);
+    CHECK((second_ref == first));
     CHECK(!(second_ref != first));
   }
 
   { // comparison against a managed_memory_resource wrapped inside a async_resource_ref
     managed_resource second{};
-    cuda::mr::async_resource_ref<cuda::mr::device_accessible> second_ref{second};
+    cuda::mr::async_resource_ref<cudax::device_accessible> second_ref{second};
 
-    CHECK(first == second_ref);
+    CHECK((first == second_ref));
     CHECK(!(first != second_ref));
-    CHECK(second_ref == first);
+    CHECK((second_ref == first));
     CHECK(!(second_ref != first));
   }
 
@@ -247,27 +247,27 @@ TEST_CASE("managed_memory_resource comparison", "[memory_resource]")
     resource<AccessibilityType::Host> host_resource{};
     resource<AccessibilityType::Device> device_resource{};
     CHECK(!(first == host_resource));
-    CHECK(first != host_resource);
+    CHECK((first != host_resource));
     CHECK(!(first == device_resource));
-    CHECK(first != device_resource);
+    CHECK((first != device_resource));
 
     CHECK(!(host_resource == first));
-    CHECK(host_resource != first);
+    CHECK((host_resource != first));
     CHECK(!(device_resource == first));
-    CHECK(device_resource != first);
+    CHECK((device_resource != first));
   }
 
   { // comparison against a different managed_resource through resource_ref
     resource<AccessibilityType::Host> host_async_resource{};
     resource<AccessibilityType::Device> device_async_resource{};
     CHECK(!(first == host_async_resource));
-    CHECK(first != host_async_resource);
+    CHECK((first != host_async_resource));
     CHECK(!(first == device_async_resource));
-    CHECK(first != device_async_resource);
+    CHECK((first != device_async_resource));
 
     CHECK(!(host_async_resource == first));
-    CHECK(host_async_resource != first);
+    CHECK((host_async_resource != first));
     CHECK(!(device_async_resource == first));
-    CHECK(device_async_resource != first);
+    CHECK((device_async_resource != first));
   }
 }
diff --git a/cudax/test/memory_resource/pinned_memory_resource.cu b/cudax/test/memory_resource/pinned_memory_resource.cu
index 3a834d3d051..4240491c6a3 100644
--- a/cudax/test/memory_resource/pinned_memory_resource.cu
+++ b/cudax/test/memory_resource/pinned_memory_resource.cu
@@ -17,6 +17,7 @@
 
 #include <stdexcept>
 
+#include "cuda/__memory_resource/resource_ref.h"
 #include <catch2/catch.hpp>
 #include <utility.cuh>
 
@@ -215,32 +216,33 @@ TEST_CASE("pinned_memory_resource comparison", "[memory_resource]")
   pinned_resource first{};
   { // comparison against a plain pinned_memory_resource
     pinned_resource second{};
-    CHECK(first == second);
+    CHECK((first == second));
     CHECK(!(first != second));
   }
 
   { // comparison against a plain pinned_memory_resource with a different pool
     pinned_resource second{cudaMemAttachHost};
-    CHECK(first != second);
+    CHECK((first != second));
     CHECK(!(first == second));
   }
 
   { // comparison against a pinned_memory_resource wrapped inside a resource_ref<device_accessible>
     pinned_resource second{};
-    cuda::mr::resource_ref<cuda::mr::device_accessible> second_ref{second};
-    CHECK(first == second_ref);
+    cuda::mr::resource_ref<cudax::device_accessible> const second_ref{second};
+    CHECK((first == second_ref));
     CHECK(!(first != second_ref));
-    CHECK(second_ref == first);
+    CHECK((second_ref == first));
     CHECK(!(second_ref != first));
   }
 
   { // comparison against a pinned_memory_resource wrapped inside a async_resource_ref
     pinned_resource second{};
-    cuda::mr::async_resource_ref<cuda::mr::device_accessible> second_ref{second};
+    // cuda::mr::async_resource_ref<cudax::device_accessible> second_ref{second};
+    cudax::async_resource_ref<cudax::device_accessible> second_ref{second};
 
-    CHECK(first == second_ref);
+    CHECK((first == second_ref));
     CHECK(!(first != second_ref));
-    CHECK(second_ref == first);
+    CHECK((second_ref == first));
     CHECK(!(second_ref != first));
   }
 
@@ -248,27 +250,27 @@ TEST_CASE("pinned_memory_resource comparison", "[memory_resource]")
     resource<AccessibilityType::Host> host_resource{};
     resource<AccessibilityType::Device> device_resource{};
     CHECK(!(first == host_resource));
-    CHECK(first != host_resource);
+    CHECK((first != host_resource));
     CHECK(!(first == device_resource));
-    CHECK(first != device_resource);
+    CHECK((first != device_resource));
 
     CHECK(!(host_resource == first));
-    CHECK(host_resource != first);
+    CHECK((host_resource != first));
     CHECK(!(device_resource == first));
-    CHECK(device_resource != first);
+    CHECK((device_resource != first));
   }
 
   { // comparison against a different pinned_resource through resource_ref
     resource<AccessibilityType::Host> host_async_resource{};
     resource<AccessibilityType::Device> device_async_resource{};
     CHECK(!(first == host_async_resource));
-    CHECK(first != host_async_resource);
+    CHECK((first != host_async_resource));
     CHECK(!(first == device_async_resource));
-    CHECK(first != device_async_resource);
+    CHECK((first != device_async_resource));
 
     CHECK(!(host_async_resource == first));
-    CHECK(host_async_resource != first);
+    CHECK((host_async_resource != first));
     CHECK(!(device_async_resource == first));
-    CHECK(device_async_resource != first);
+    CHECK((device_async_resource != first));
   }
 }
diff --git a/cudax/test/memory_resource/shared_resource.cu b/cudax/test/memory_resource/shared_resource.cu
index 4711498b0fe..cd279ab0b9e 100644
--- a/cudax/test/memory_resource/shared_resource.cu
+++ b/cudax/test/memory_resource/shared_resource.cu
@@ -105,7 +105,7 @@ TEMPLATE_TEST_CASE_METHOD(test_fixture, "shared_resource", "[container][resource
       ++expected.object_count;
       CHECK(this->counts == expected);
 
-      cuda::mr::resource_ref<cuda::mr::host_accessible> ref = mr;
+      cudax::resource_ref<cudax::host_accessible> ref = mr;
 
       CHECK(this->counts == expected);
       auto* ptr = ref.allocate(bytes(100), align(8));
@@ -129,7 +129,7 @@ TEMPLATE_TEST_CASE_METHOD(test_fixture, "shared_resource", "[container][resource
     align(alignof(int) * 4);
     {
       bytes(42 * sizeof(int));
-      cudax::uninitialized_buffer<int, cuda::mr::host_accessible> buffer{
+      cudax::uninitialized_buffer<int, cudax::host_accessible> buffer{
         cudax::shared_resource<TestResource>(42, this), 42};
       ++expected.object_count;
       ++expected.allocate_count;
@@ -139,7 +139,7 @@ TEMPLATE_TEST_CASE_METHOD(test_fixture, "shared_resource", "[container][resource
       {
         // accounting for new storage
         bytes(1337 * sizeof(int));
-        cudax::uninitialized_buffer<int, cuda::mr::host_accessible> other_buffer{buffer.get_memory_resource(), 1337};
+        cudax::uninitialized_buffer<int, cudax::host_accessible> other_buffer{buffer.get_memory_resource(), 1337};
         ++expected.allocate_count;
         CHECK(this->counts == expected);
       }
@@ -151,7 +151,7 @@ TEMPLATE_TEST_CASE_METHOD(test_fixture, "shared_resource", "[container][resource
 
       {
         // Moving the resource should not do anything
-        cudax::uninitialized_buffer<int, cuda::mr::host_accessible> third_buffer = ::cuda::std::move(buffer);
+        cudax::uninitialized_buffer<int, cudax::host_accessible> third_buffer = ::cuda::std::move(buffer);
         CHECK(this->counts == expected);
       }
 
diff --git a/cudax/test/memory_resource/test_resource.cuh b/cudax/test/memory_resource/test_resource.cuh
index 524ad12089e..75cd9b665b2 100644
--- a/cudax/test/memory_resource/test_resource.cuh
+++ b/cudax/test/memory_resource/test_resource.cuh
@@ -2,6 +2,9 @@
 
 #include <cuda_runtime_api.h>
 
+#include <cuda/experimental/memory_resource.cuh>
+#include <cuda/experimental/stream.cuh>
+
 #include <cstddef>
 #include <cstdint>
 
@@ -90,6 +93,11 @@ inline thread_local Counts* test_fixture_::counts_ = nullptr;
 template <class>
 using test_fixture = test_fixture_;
 
+struct get_data
+{
+  using value_type = int;
+};
+
 template <class T>
 struct test_resource
 {
@@ -128,6 +136,14 @@ struct test_resource
     --fixture->counts.object_count;
   }
 
+  test_resource& operator=(test_resource other) noexcept
+  {
+    other._assert_valid();
+    ::cuda::std::swap(data, other.data);
+    ::cuda::std::swap(fixture, other.fixture);
+    return *this;
+  }
+
   void* allocate(std::size_t bytes, std::size_t align)
   {
     _assert_valid();
@@ -198,11 +214,15 @@ struct test_resource
     return ::operator delete(pv);
   }
 
-  friend constexpr void get_property(const test_resource&, cuda::mr::host_accessible) noexcept {}
+  friend constexpr void get_property(const test_resource&, cudax::host_accessible) noexcept {}
+  friend constexpr int get_property(const test_resource& self, get_data) noexcept
+  {
+    return self.data;
+  }
 };
 
 using big_resource   = test_resource<uintptr_t>;
 using small_resource = test_resource<unsigned int>;
 
-static_assert(sizeof(big_resource) > sizeof(cuda::mr::_AnyResourceStorage));
-static_assert(sizeof(small_resource) <= sizeof(cuda::mr::_AnyResourceStorage));
+static_assert(sizeof(big_resource) > cuda::experimental::__default_buffer_size);
+static_assert(sizeof(small_resource) <= cuda::experimental::__default_buffer_size);
diff --git a/docs/libcudacxx/extended_api/memory_resource.rst b/docs/libcudacxx/extended_api/memory_resource.rst
index 5a8118f69a3..c06cc3c118a 100644
--- a/docs/libcudacxx/extended_api/memory_resource.rst
+++ b/docs/libcudacxx/extended_api/memory_resource.rst
@@ -36,7 +36,7 @@ At a high level, the header provides:
      - Concepts that provide proper constraints for arbitrary memory resources.
      - CCCL 2.2.0 / CUDA 12.3
    * - :ref:`cuda::mr::{async}_resource_ref <libcudacxx-extended-api-memory-resources-resource-ref>`
-     - A type-erased memory resource wrapper that enables consumers to specify properties of resources that they expect.
+     - A non-owning type-erased memory resource wrapper that enables consumers to specify properties of resources that they expect.
      - CCCL 2.2.0 / CUDA 12.3
 
 These features are an evolution of `std::pmr::memory_resource <https://en.cppreference.com/w/cpp/header/memory_resource>`__
diff --git a/libcudacxx/include/cuda/__memory_resource/resource.h b/libcudacxx/include/cuda/__memory_resource/resource.h
index 42f9668338a..733a8fdde8b 100644
--- a/libcudacxx/include/cuda/__memory_resource/resource.h
+++ b/libcudacxx/include/cuda/__memory_resource/resource.h
@@ -52,9 +52,9 @@ _LIBCUDACXX_BEGIN_NAMESPACE_CUDA_MR
 template <class _Resource>
 _CCCL_CONCEPT resource =
   _CCCL_REQUIRES_EXPR((_Resource), _Resource& __res, void* __ptr, size_t __bytes, size_t __alignment)(
-    requires(_CUDA_VSTD::equality_comparable<_Resource>),
     _Same_as(void*) __res.allocate(__bytes, __alignment), //
-    _Same_as(void) __res.deallocate(__ptr, __bytes, __alignment));
+    _Same_as(void) __res.deallocate(__ptr, __bytes, __alignment),
+    requires(_CUDA_VSTD::equality_comparable<_Resource>));
 
 //! @brief The \c async_resource concept verifies that a type Resource satisfies the basic requirements of a
 //! memory resource and additionally supports stream ordered allocations
@@ -74,9 +74,9 @@ _CCCL_CONCEPT resource =
 template <class _Resource>
 _CCCL_CONCEPT async_resource = _CCCL_REQUIRES_EXPR(
   (_Resource), _Resource& __res, void* __ptr, size_t __bytes, size_t __alignment, ::cuda::stream_ref __stream)(
-  requires(resource<_Resource>),
   _Same_as(void*) __res.allocate_async(__bytes, __alignment, __stream),
-  _Same_as(void) __res.deallocate_async(__ptr, __bytes, __alignment, __stream));
+  _Same_as(void) __res.deallocate_async(__ptr, __bytes, __alignment, __stream),
+  requires(resource<_Resource>));
 
 //! @brief The \c resource_with concept verifies that a type Resource satisfies the `resource` concept and
 //! also satisfies all the provided Properties
diff --git a/libcudacxx/include/cuda/__memory_resource/resource_ref.h b/libcudacxx/include/cuda/__memory_resource/resource_ref.h
index 81831720349..40f2c9d0ba8 100644
--- a/libcudacxx/include/cuda/__memory_resource/resource_ref.h
+++ b/libcudacxx/include/cuda/__memory_resource/resource_ref.h
@@ -147,7 +147,7 @@ struct _Resource_vtable_builder
   using __wrapper_type = _CUDA_VSTD::integral_constant<_WrapperType, _Wrapper_type>;
 
   template <class _Resource, class _Property>
-  static __property_value_t<_Property> _Get_property(void* __res) noexcept
+  static __property_value_t<_Property> _Get_property(const void* __res) noexcept
   {
     return get_property(*static_cast<const _Resource*>(__res), _Property{});
   }
@@ -292,7 +292,7 @@ struct _Resource_vtable_builder
 };
 
 template <class _Property>
-using __property_fn_t = __property_value_t<_Property> (*)(void*);
+using __property_fn_t = __property_value_t<_Property> (*)(const void*);
 
 template <class _Property>
 struct _Property_vtable
@@ -374,8 +374,11 @@ using _Filtered_vtable = typename _Filtered<_Properties...>::_Filtered_vtable::_
 template <_WrapperType _Wrapper_type>
 using __alloc_object_storage_t = _CUDA_VSTD::_If<_Wrapper_type == _WrapperType::_Reference, void*, _AnyResourceStorage>;
 
+struct _Resource_ref_base
+{};
+
 template <class _Vtable, _WrapperType _Wrapper_type>
-struct _Alloc_base
+struct _LIBCUDACXX_DECLSPEC_EMPTY_BASES _Alloc_base : _Resource_ref_base
 {
   static_assert(_CUDA_VSTD::is_base_of_v<_Alloc_vtable, _Vtable>, "");
 
@@ -445,19 +448,8 @@ struct _Async_alloc_base : public _Alloc_base<_Vtable, _Wrapper_type>
   }
 };
 
-template <class _VTable, _WrapperType _Wrapper_type>
-constexpr bool _Is_resource_base_fn(const _Alloc_base<_VTable, _Wrapper_type>*) noexcept
-{
-  return true;
-}
-
-constexpr bool _Is_resource_base_fn(...) noexcept
-{
-  return false;
-}
-
 template <class _Resource>
-_CCCL_CONCEPT _Is_resource_base = _Is_resource_base_fn(static_cast<_Resource*>(nullptr));
+_CCCL_CONCEPT _Is_resource_ref = _CUDA_VSTD::convertible_to<_Resource&, _Resource_ref_base>;
 
 template <_AllocType _Alloc_type, _WrapperType _Wrapper_type>
 using _Resource_base =
@@ -522,7 +514,7 @@ class basic_resource_ref
   //! as well as all properties
   //! @param __res The resource to be wrapped within the \c basic_resource_ref
   _CCCL_TEMPLATE(class _Resource, _AllocType _Alloc_type2 = _Alloc_type)
-  _CCCL_REQUIRES((!_Is_resource_base<_Resource>) _CCCL_AND(_Alloc_type2 == _AllocType::_Default)
+  _CCCL_REQUIRES((!_Is_resource_ref<_Resource>) _CCCL_AND(_Alloc_type2 == _AllocType::_Default)
                    _CCCL_AND resource_with<_Resource, _Properties...>)
   basic_resource_ref(_Resource& __res) noexcept
       : _Resource_base<_Alloc_type, _WrapperType::_Reference>(
@@ -534,7 +526,7 @@ class basic_resource_ref
   //! properties. This ignores the async interface of the passed in resource
   //! @param __res The resource to be wrapped within the \c resource_ref
   _CCCL_TEMPLATE(class _Resource, _AllocType _Alloc_type2 = _Alloc_type)
-  _CCCL_REQUIRES((!_Is_resource_base<_Resource>) _CCCL_AND(_Alloc_type2 == _AllocType::_Async)
+  _CCCL_REQUIRES((!_Is_resource_ref<_Resource>) _CCCL_AND(_Alloc_type2 == _AllocType::_Async)
                    _CCCL_AND async_resource_with<_Resource, _Properties...>)
   basic_resource_ref(_Resource& __res) noexcept
       : _Resource_base<_Alloc_type, _WrapperType::_Reference>(
@@ -546,7 +538,7 @@ class basic_resource_ref
   //! as well as all properties
   //! @param __res Pointer to a resource to be wrapped within the \c basic_resource_ref
   _CCCL_TEMPLATE(class _Resource, _AllocType _Alloc_type2 = _Alloc_type)
-  _CCCL_REQUIRES((!_Is_resource_base<_Resource>) _CCCL_AND(_Alloc_type2 == _AllocType::_Default)
+  _CCCL_REQUIRES((!_Is_resource_ref<_Resource>) _CCCL_AND(_Alloc_type2 == _AllocType::_Default)
                    _CCCL_AND resource_with<_Resource, _Properties...>)
   basic_resource_ref(_Resource* __res) noexcept
       : _Resource_base<_Alloc_type, _WrapperType::_Reference>(
@@ -558,7 +550,7 @@ class basic_resource_ref
   //! properties. This ignores the async interface of the passed in resource
   //! @param __res Pointer to a resource to be wrapped within the \c resource_ref
   _CCCL_TEMPLATE(class _Resource, _AllocType _Alloc_type2 = _Alloc_type)
-  _CCCL_REQUIRES((!_Is_resource_base<_Resource>) _CCCL_AND(_Alloc_type2 == _AllocType::_Async)
+  _CCCL_REQUIRES((!_Is_resource_ref<_Resource>) _CCCL_AND(_Alloc_type2 == _AllocType::_Async)
                    _CCCL_AND async_resource_with<_Resource, _Properties...>)
   basic_resource_ref(_Resource* __res) noexcept
       : _Resource_base<_Alloc_type, _WrapperType::_Reference>(
@@ -587,9 +579,18 @@ class basic_resource_ref
   {}
 
   //! @brief Equality comparison between two \c basic_resource_ref
-  //! @param __rhs The other \c basic_resource_ref
+  //! @param __lhs The first \c basic_resource_ref
+  //! @param __rhs The second \c basic_resource_ref
   //! @return Checks whether both resources have the same equality function stored in their vtable and if so returns
   //! the result of that equality comparison. Otherwise returns false.
+  _CCCL_NODISCARD_FRIEND bool operator==(const basic_resource_ref& __lhs, const basic_resource_ref& __rhs)
+  {
+    // BUGBUG: comparing function pointers like this can lead to false negatives:
+    return (__lhs.__static_vtable->__equal_fn == __rhs.__static_vtable->__equal_fn)
+        && __lhs.__static_vtable->__equal_fn(__lhs.__object, __rhs.__object);
+  }
+
+  //! @overload
   _CCCL_TEMPLATE(class... _OtherProperties)
   _CCCL_REQUIRES((sizeof...(_Properties) == sizeof...(_OtherProperties))
                    _CCCL_AND __properties_match<_OtherProperties...>)
@@ -601,9 +602,16 @@ class basic_resource_ref
   }
 
   //! @brief Inequality comparison between two \c basic_resource_ref
-  //! @param __rhs The other \c basic_resource_ref
+  //! @param __lhs The first \c basic_resource_ref
+  //! @param __rhs The second \c basic_resource_ref
   //! @return Checks whether both resources have the same equality function stored in their vtable and if so returns
   //! the inverse result of that equality comparison. Otherwise returns true.
+  _CCCL_NODISCARD_FRIEND bool operator!=(const basic_resource_ref& __lhs, const basic_resource_ref& __rhs)
+  {
+    return !(__lhs == __rhs);
+  }
+
+  //! @overload
   _CCCL_TEMPLATE(class... _OtherProperties)
   _CCCL_REQUIRES((sizeof...(_Properties) == sizeof...(_OtherProperties))
                    _CCCL_AND __properties_match<_OtherProperties...>)