From 82cff38a1c41394b50386f254f6227d9ba5c5d88 Mon Sep 17 00:00:00 2001 From: Eric Niebler Date: Tue, 17 Dec 2024 13:58:45 -0800 Subject: [PATCH] new type-erased memory resources (#2824) --- .../uninitialized_async_buffer.cuh | 9 +- .../__container/uninitialized_buffer.cuh | 9 +- .../__memory_resource/any_resource.cuh | 1009 +++++++++++++---- .../__memory_resource/device_memory_pool.cuh | 2 +- .../device_memory_resource.cuh | 6 +- .../managed_memory_resource.cuh | 22 +- .../pinned_memory_resource.cuh | 10 +- .../__memory_resource/shared_resource.cuh | 2 +- .../__utility/basic_any/basic_any_base.cuh | 13 +- .../__utility/basic_any/conversions.cuh | 126 +- .../__utility/basic_any/interfaces.cuh | 2 +- .../__utility/basic_any/semiregular.cuh | 81 ++ cudax/test/containers/uninitialized_buffer.cu | 27 +- .../memory_resource/any_async_resource.cu | 24 +- cudax/test/memory_resource/any_resource.cu | 156 ++- .../memory_resource/device_memory_resource.cu | 85 +- .../managed_memory_resource.cu | 32 +- .../memory_resource/pinned_memory_resource.cu | 34 +- cudax/test/memory_resource/shared_resource.cu | 8 +- cudax/test/memory_resource/test_resource.cuh | 26 +- .../extended_api/memory_resource.rst | 2 +- .../include/cuda/__memory_resource/resource.h | 8 +- .../cuda/__memory_resource/resource_ref.h | 50 +- 23 files changed, 1293 insertions(+), 450 deletions(-) diff --git a/cudax/include/cuda/experimental/__container/uninitialized_async_buffer.cuh b/cudax/include/cuda/experimental/__container/uninitialized_async_buffer.cuh index d6259a7c077..3f55084dc63 100644 --- a/cudax/include/cuda/experimental/__container/uninitialized_async_buffer.cuh +++ b/cudax/include/cuda/experimental/__container/uninitialized_async_buffer.cuh @@ -22,7 +22,6 @@ #endif // no system header #include -#include #include #include #include @@ -118,7 +117,7 @@ private: _CCCL_NODISCARD_FRIEND _CCCL_HIDE_FROM_ABI auto __cudax_launch_transform(::cuda::stream_ref, uninitialized_async_buffer& __self) noexcept _CCCL_TRAILING_REQUIRES(_CUDA_VSTD::span<_Tp>)( - _CUDA_VSTD::same_as<_Tp, _Tp2>&& _CUDA_VSTD::__is_included_in_v) + _CUDA_VSTD::same_as<_Tp, _Tp2>&& _CUDA_VSTD::__is_included_in_v) { // TODO add auto synchronization return {__self.__get_data(), __self.size()}; @@ -130,7 +129,7 @@ private: _CCCL_NODISCARD_FRIEND _CCCL_HIDE_FROM_ABI auto __cudax_launch_transform(::cuda::stream_ref, const uninitialized_async_buffer& __self) noexcept _CCCL_TRAILING_REQUIRES(_CUDA_VSTD::span)( - _CUDA_VSTD::same_as<_Tp, _Tp2>&& _CUDA_VSTD::__is_included_in_v) + _CUDA_VSTD::same_as<_Tp, _Tp2>&& _CUDA_VSTD::__is_included_in_v) { // TODO add auto synchronization return {__self.__get_data(), __self.size()}; @@ -287,7 +286,7 @@ public: _CCCL_HIDE_FROM_ABI uninitialized_async_buffer __replace_allocation(const size_t __count) { // Create a new buffer with a reference to the stored memory resource and swap allocation information - uninitialized_async_buffer __ret{_CUDA_VMR::async_resource_ref<_Properties...>{__mr_}, __stream_, __count}; + uninitialized_async_buffer __ret{async_resource_ref<_Properties...>{__mr_}, __stream_, __count}; _CUDA_VSTD::swap(__count_, __ret.__count_); _CUDA_VSTD::swap(__buf_, __ret.__buf_); return __ret; @@ -295,7 +294,7 @@ public: }; template -using uninitialized_async_device_buffer = uninitialized_async_buffer<_Tp, mr::device_accessible>; +using uninitialized_async_device_buffer = uninitialized_async_buffer<_Tp, device_accessible>; } // namespace cuda::experimental diff --git a/cudax/include/cuda/experimental/__container/uninitialized_buffer.cuh b/cudax/include/cuda/experimental/__container/uninitialized_buffer.cuh index 74af402ca51..55168b38805 100644 --- a/cudax/include/cuda/experimental/__container/uninitialized_buffer.cuh +++ b/cudax/include/cuda/experimental/__container/uninitialized_buffer.cuh @@ -22,7 +22,6 @@ #endif // no system header #include -#include #include #include #include @@ -108,7 +107,7 @@ private: _CCCL_NODISCARD_FRIEND _CCCL_HIDE_FROM_ABI auto __cudax_launch_transform(::cuda::stream_ref, uninitialized_buffer& __self) noexcept _CCCL_TRAILING_REQUIRES(_CUDA_VSTD::span<_Tp>)( - _CUDA_VSTD::same_as<_Tp, _Tp2>&& _CUDA_VSTD::__is_included_in_v) + _CUDA_VSTD::same_as<_Tp, _Tp2>&& _CUDA_VSTD::__is_included_in_v) { return {__self.__get_data(), __self.size()}; } @@ -119,7 +118,7 @@ private: _CCCL_NODISCARD_FRIEND _CCCL_HIDE_FROM_ABI auto __cudax_launch_transform(::cuda::stream_ref, const uninitialized_buffer& __self) noexcept _CCCL_TRAILING_REQUIRES(_CUDA_VSTD::span)( - _CUDA_VSTD::same_as<_Tp, _Tp2>&& _CUDA_VSTD::__is_included_in_v) + _CUDA_VSTD::same_as<_Tp, _Tp2>&& _CUDA_VSTD::__is_included_in_v) { return {__self.__get_data(), __self.size()}; } @@ -252,7 +251,7 @@ public: _CCCL_HIDE_FROM_ABI uninitialized_buffer __replace_allocation(const size_t __count) { // Create a new buffer with a reference to the stored memory resource and swap allocation information - uninitialized_buffer __ret{_CUDA_VMR::resource_ref<_Properties...>{__mr_}, __count}; + uninitialized_buffer __ret{resource_ref<_Properties...>{__mr_}, __count}; _CUDA_VSTD::swap(__count_, __ret.__count_); _CUDA_VSTD::swap(__buf_, __ret.__buf_); return __ret; @@ -260,7 +259,7 @@ public: }; template -using uninitialized_device_buffer = uninitialized_buffer<_Tp, mr::device_accessible>; +using uninitialized_device_buffer = uninitialized_buffer<_Tp, device_accessible>; } // namespace cuda::experimental diff --git a/cudax/include/cuda/experimental/__memory_resource/any_resource.cuh b/cudax/include/cuda/experimental/__memory_resource/any_resource.cuh index 8eb5687925e..5836016c31e 100644 --- a/cudax/include/cuda/experimental/__memory_resource/any_resource.cuh +++ b/cudax/include/cuda/experimental/__memory_resource/any_resource.cuh @@ -41,260 +41,801 @@ #include #include #include -#include -#include -#include -#include -#include -#include -#include #include -#include +#include -namespace cuda::experimental -{ -template > -_CCCL_INLINE_VAR constexpr bool __is_basic_any_resource = false; - -//! @rst -//! .. _cudax-memory-resource-basic-any-resource: -//! -//! Base class for a type erased owning wrapper around a memory resource -//! --------------------------------------------------------------------- -//! -//! ``basic_any_resource`` abstracts the differences between a resource and an async resource away, allowing efficient -//! interoperability between the two. -//! -//! @endrst -template <_CUDA_VMR::_AllocType _Alloc_type, class... _Properties> -class basic_any_resource - : public _CUDA_VMR::_Resource_base<_Alloc_type, _CUDA_VMR::_WrapperType::_Owning> - , private _CUDA_VMR::_Filtered_vtable<_Properties...> -{ -private: - static_assert(_CUDA_VMR::__contains_execution_space_property<_Properties...>, - "The properties of cuda::experimental::basic_any_resource must contain at least one execution " - "space property!"); +#include - template <_CUDA_VMR::_AllocType, class...> - friend class basic_any_resource; +_CCCL_PUSH_MACROS +#undef interface - template - friend struct _CUDA_VMR::_Resource_vtable; - - using __vtable = _CUDA_VMR::_Filtered_vtable<_Properties...>; +namespace cuda::experimental +{ +#ifndef _CCCL_DOXYGEN_INVOKED // Do not document this - //! @brief Validates that a set of \c _OtherProperties... is a superset of \c _Properties... . - template - static constexpr bool __properties_match = - _CUDA_VSTD::__type_set_contains_v<_CUDA_VSTD::__make_type_set<_OtherProperties...>, _Properties...>; +template +using __property_result_t _CCCL_NODEBUG_ALIAS = _CUDA_VSTD::__type_call1< // + _CUDA_VSTD::conditional_t, + _CUDA_VSTD::__type_quote1<__property_value_t>, + _CUDA_VSTD::__type_always>, + _Property>; - //! @brief Validates that a passed in \c _Resource satisfies the \c resource or \c async_resource concept respectively - //! as well as all properties in \c _Properties... . - template - static constexpr bool __valid_resource = - _Alloc_type == _CUDA_VMR::_AllocType::_Async - ? _CUDA_VMR::async_resource_with<_Resource, _Properties...> - : _CUDA_VMR::resource_with<_Resource, _Properties...>; - -public: - //! @brief Constructs a \c basic_any_resource from a type that satisfies the \c resource or \c async_resource - //! concept as well as all properties. - //! @param __res The resource to be wrapped within the \c basic_any_resource. - _CCCL_TEMPLATE(class _Resource, class __resource_t = _CUDA_VSTD::remove_cvref_t<_Resource>) - _CCCL_REQUIRES((!__is_basic_any_resource<_Resource>) _CCCL_AND __valid_resource<__resource_t>) - basic_any_resource(_Resource&& __res) noexcept - : _CUDA_VMR::_Resource_base<_Alloc_type, _CUDA_VMR::_WrapperType::_Owning>( - nullptr, &_CUDA_VMR::__alloc_vtable<_Alloc_type, _CUDA_VMR::_WrapperType::_Owning, __resource_t>) - , __vtable(__vtable::template _Create<__resource_t>()) +template +struct __with_property +{ + template + _CUDAX_PUBLIC_API static auto __get_property(const _Ty& __obj) // + -> __property_result_t<_Property> { - if constexpr (_CUDA_VMR::_IsSmall<__resource_t>()) + if constexpr (!_CUDA_VSTD::is_same_v<__property_result_t<_Property>, void>) { - ::new (static_cast(this->__object.__buf_)) __resource_t(_CUDA_VSTD::forward<_Resource>(__res)); + return get_property(__obj, _Property()); } else { - this->__object.__ptr_ = new __resource_t(_CUDA_VSTD::forward<_Resource>(__res)); + return void(); } } - //! @brief Constructs a \c basic_any_resource wrapping an object of type \c _Resource that - //! is constructed from \c __args... . \c _Resource must satisfy the \c resource or \c async_resource - //! concept, and it must provide all properties in \c _Properties... . - //! @param __args The arguments used to construct the instance of \c _Resource to be wrapped within the - //! \c basic_any_resource. - _CCCL_TEMPLATE(class _Resource, class... _Args) - _CCCL_REQUIRES(_CCCL_TRAIT(_CUDA_VSTD::is_constructible, _Resource, _Args...) _CCCL_AND __valid_resource<_Resource>) - basic_any_resource(_CUDA_VSTD::in_place_type_t<_Resource>, _Args&&... __args) noexcept - : _CUDA_VMR::_Resource_base<_Alloc_type, _CUDA_VMR::_WrapperType::_Owning>( - nullptr, &_CUDA_VMR::__alloc_vtable<_Alloc_type, _CUDA_VMR::_WrapperType::_Owning, _Resource>) - , __vtable(__vtable::template _Create<_Resource>()) + template + struct __iproperty : interface<__iproperty> { - if constexpr (_CUDA_VMR::_IsSmall<_Resource>()) + _CUDAX_HOST_API friend auto + get_property([[maybe_unused]] const __iproperty& __obj, _Property) -> __property_result_t<_Property> { - ::new (static_cast(this->__object.__buf_)) _Resource(_CUDA_VSTD::forward<_Args>(__args)...); + if constexpr (!_CUDA_VSTD::is_same_v<__property_result_t<_Property>, void>) + { + return __cudax::virtcall<&__get_property<__iproperty>>(&__obj); + } + else + { + return void(); + } } - else - { - this->__object.__ptr_ = new _Resource(_CUDA_VSTD::forward<_Args>(__args)...); - } - } - //! @brief Conversion from a \c basic_any_resource with the same set of properties but in a different order. - //! This constructor also handles conversion from \c any_async_resource to \c any_resource - //! @param __other The other \c basic_any_resource. - _CCCL_TEMPLATE(_CUDA_VMR::_AllocType _OtherAllocType, class... _OtherProperties) - _CCCL_REQUIRES( - (_CUDA_VSTD::_IsNotSame>::value) - _CCCL_AND(_OtherAllocType == _Alloc_type || _OtherAllocType == _CUDA_VMR::_AllocType::_Async) - _CCCL_AND __properties_match<_OtherProperties...>) - basic_any_resource(basic_any_resource<_OtherAllocType, _OtherProperties...> __other) noexcept - : _CUDA_VMR::_Resource_base<_Alloc_type, _CUDA_VMR::_WrapperType::_Owning>( - nullptr, _CUDA_VSTD::exchange(__other.__static_vtable, nullptr)) - , __vtable(__other) - { - _CCCL_ASSERT(this->__static_vtable != nullptr, "copying from a moved-from object"); - this->__static_vtable->__move_fn(&this->__object, &__other.__object); - } + template + using overrides _CCCL_NODEBUG_ALIAS = overrides_for<_Ty, _CUDAX_FNPTR_CONSTANT_WAR(&__get_property<_Ty>)>; + }; +}; + +template +using __iproperty = typename __with_property<_Property>::template __iproperty<>; + +template +using __iproperty_set = iset<__iproperty<_Properties>...>; + +// Wrap the calls of the allocate_async and deallocate_async member functions +// because of NVBUG#4967486 +template +_CUDAX_PUBLIC_API auto __allocate_async(_Resource& __mr, size_t __bytes, size_t __alignment, ::cuda::stream_ref __stream) + -> decltype(__mr.allocate_async(__bytes, __alignment, __stream)) +{ + return __mr.allocate_async(__bytes, __alignment, __stream); +} + +template +_CUDAX_PUBLIC_API auto +__deallocate_async(_Resource& __mr, void* __pv, size_t __bytes, size_t __alignment, ::cuda::stream_ref __stream) + -> decltype(__mr.deallocate_async(__pv, __bytes, __alignment, __stream)) +{ + __mr.deallocate_async(__pv, __bytes, __alignment, __stream); +} - //! @brief Move-constructs a \c basic_any_resource from another one, taking ownership of the stored resource. - //! @param __other The other \c basic_any_resource. - basic_any_resource(basic_any_resource&& __other) noexcept - : _CUDA_VMR::_Resource_base<_Alloc_type, _CUDA_VMR::_WrapperType::_Owning>( - nullptr, _CUDA_VSTD::exchange(__other.__static_vtable, nullptr)) - , __vtable(__other) +template +struct __ibasic_resource : interface<__ibasic_resource> +{ + _CUDAX_PUBLIC_API void* allocate(size_t __bytes, size_t __alignment = alignof(_CUDA_VSTD::max_align_t)) { - _CCCL_ASSERT(this->__static_vtable != nullptr, "copying from a moved-from object"); - this->__static_vtable->__move_fn(&this->__object, &__other.__object); + return __cudax::virtcall<&__ibasic_resource::allocate>(this, __bytes, __alignment); } - //! @brief Move-assigns another \c basic_any_resource, taking ownership of the stored resource. - //! @param __other The other \c basic_any_resource. - basic_any_resource& operator=(basic_any_resource&& __other) noexcept + _CUDAX_PUBLIC_API void deallocate(void* __pv, size_t __bytes, size_t __alignment = alignof(_CUDA_VSTD::max_align_t)) { - if (this->__static_vtable != nullptr) - { - this->__static_vtable->__destroy_fn(&this->__object); - this->__static_vtable = nullptr; - } + return __cudax::virtcall<&__ibasic_resource::deallocate>(this, __pv, __bytes, __alignment); + } - if (__other.__static_vtable != nullptr) - { - this->__static_vtable = _CUDA_VSTD::exchange(__other.__static_vtable, nullptr); - this->__static_vtable->__move_fn(&this->__object, &__other.__object); - } + template + using overrides _CCCL_NODEBUG_ALIAS = + overrides_for<_Ty, _CUDAX_FNPTR_CONSTANT_WAR(&_Ty::allocate), _CUDAX_FNPTR_CONSTANT_WAR(&_Ty::deallocate)>; +}; - return *this; +template +struct __ibasic_async_resource : interface<__ibasic_async_resource> +{ + _CUDAX_PUBLIC_API void* allocate_async(size_t __bytes, size_t __alignment, ::cuda::stream_ref __stream) + { + return __cudax::virtcall<&__allocate_async<__ibasic_async_resource>>(this, __bytes, __alignment, __stream); } - //! @brief Copy-constructs a \c basic_any_resource from another one. - //! @param __other The other \c basic_any_resource. - basic_any_resource(const basic_any_resource& __other) - : _CUDA_VMR::_Resource_base<_Alloc_type, _CUDA_VMR::_WrapperType::_Owning>(nullptr, __other.__static_vtable) - , __vtable(__other) + _CUDAX_PUBLIC_API void* allocate_async(size_t __bytes, ::cuda::stream_ref __stream) { - _CCCL_ASSERT(this->__static_vtable != nullptr, "copying from a moved-from object"); - this->__static_vtable->__copy_fn(&this->__object, &__other.__object); + return __cudax::virtcall<&__allocate_async<__ibasic_async_resource>>( + this, __bytes, alignof(_CUDA_VSTD::max_align_t), __stream); } - //! @brief Copy-assigns another \c basic_any_resource. - //! @param __other The other \c basic_any_resource. - basic_any_resource& operator=(const basic_any_resource& __other) + _CUDAX_PUBLIC_API void deallocate_async(void* __pv, size_t __bytes, size_t __alignment, ::cuda::stream_ref __stream) { - return this == &__other ? *this : operator=(basic_any_resource(__other)); + return __cudax::virtcall<&__deallocate_async<__ibasic_async_resource>>(this, __pv, __bytes, __alignment, __stream); } - //! @brief Destroys the stored resource - ~basic_any_resource() noexcept + _CUDAX_PUBLIC_API void deallocate_async(void* __pv, size_t __bytes, ::cuda::stream_ref __stream) { - if (this->__static_vtable != nullptr) - { - this->__static_vtable->__destroy_fn(&this->__object); - } + return __cudax::virtcall<&__deallocate_async<__ibasic_async_resource>>( + this, __pv, __bytes, alignof(_CUDA_VSTD::max_align_t), __stream); } - //! @brief Converts a \c basic_any_resource to a \c resource_ref with a potential subset of properties. - //! @return The \c resource_ref to this resource. - _CCCL_TEMPLATE(_CUDA_VMR::_AllocType _OtherAllocType, class... _OtherProperties) - _CCCL_REQUIRES((_OtherAllocType == _CUDA_VMR::_AllocType::_Default || _OtherAllocType == _Alloc_type) _CCCL_AND( - _CUDA_VSTD::__type_set_contains_v<_CUDA_VSTD::__make_type_set<_Properties...>, _OtherProperties...>)) - operator _CUDA_VMR::basic_resource_ref<_OtherAllocType, _OtherProperties...>() noexcept + template + using overrides _CCCL_NODEBUG_ALIAS = + overrides_for<_Ty, + _CUDAX_FNPTR_CONSTANT_WAR(&__allocate_async<_Ty>), + _CUDAX_FNPTR_CONSTANT_WAR(&__deallocate_async<_Ty>)>; +}; + +// This is the pseudo-virtual override for getting an old-style vtable pointer +// from a new-style basic_any resource type. It is used below by +// __iresource_ref_conversions. +template +_CUDAX_PUBLIC_API const _CUDA_VMR::_Alloc_vtable* __get_resource_vptr(_Resource&) noexcept +{ + if constexpr (_CUDA_VMR::async_resource<_Resource>) { - return _CUDA_VMR::_Resource_ref_helper::_Construct<_Alloc_type, _OtherProperties...>( - this->_Get_object(), this->__static_vtable, static_cast(*this)); + return &_CUDA_VMR::__alloc_vtable<_CUDA_VMR::_AllocType::_Async, _CUDA_VMR::_WrapperType::_Reference, _Resource>; } - - //! @brief Swaps a \c basic_any_resource with another one. - //! @param __other The other \c basic_any_resource. - void swap(basic_any_resource& __other) noexcept + else if constexpr (_CUDA_VMR::resource<_Resource>) { - auto __tmp = _CUDA_VSTD::move(__other); - __other = _CUDA_VSTD::move(*this); - *this = _CUDA_VSTD::move(__tmp); + return &_CUDA_VMR::__alloc_vtable<_CUDA_VMR::_AllocType::_Default, _CUDA_VMR::_WrapperType::_Reference, _Resource>; } - - //! @brief Equality comparison between two \c basic_any_resource - //! @param __rhs The other \c basic_any_resource - //! @return Checks whether both resources have the same equality function stored in their vtable and if so returns - //! the result of that equality comparison. Otherwise returns false. - _CCCL_NODISCARD bool operator==(const basic_any_resource& __rhs) const + else { - return (this->__static_vtable->__equal_fn == __rhs.__static_vtable->__equal_fn) - && this->__static_vtable->__equal_fn(this->_Get_object(), __rhs._Get_object()); + // This branch is taken when called from the thunk of an unspecialized + // interface; e.g., `icat<>` rather than `icat>`. The thunks of + // unspecialized interfaces are never called, they just need to exist. The + // function pointer will be used as a key to look up the proper override. + _CCCL_UNREACHABLE(); } +} - //! @brief Equality comparison between two \c basic_any_resource - //! @param __rhs The other \c basic_any_resource - //! @return Checks whether both resources have the same equality function stored in their vtable and if so returns - //! the result of that equality comparison. Otherwise returns false. - _CCCL_TEMPLATE(class... _OtherProperties) - _CCCL_REQUIRES((sizeof...(_Properties) == sizeof...(_OtherProperties)) - _CCCL_AND __properties_match<_OtherProperties...>) - _CCCL_NODISCARD bool operator==(const basic_any_resource<_Alloc_type, _OtherProperties...>& __rhs) const +_CCCL_DIAG_PUSH +_CCCL_DIAG_SUPPRESS_GCC("-Wunused-but-set-parameter") + +// Given a list of properties and a basic_any vptr, build a _Resource_vtable +// for the properties as cuda::mr::basic_resource_ref expects. +template +_CUDAX_HOST_API auto __make_resource_vtable(_VPtr __vptr, _CUDA_VMR::_Resource_vtable<_Properties...>*) noexcept + -> _CUDA_VMR::_Resource_vtable<_Properties...> +{ + return {__vptr->__query_interface(__iproperty<_Properties>())->__fn_...}; +} + +_CCCL_DIAG_POP + +// This interface provides the any_[async_]resource types with a conversion +// to the old cuda::mr::basic_resource_ref types. +template +struct _LIBCUDACXX_DECLSPEC_EMPTY_BASES __iresource_ref_conversions + : interface<__iresource_ref_conversions> + , _CUDA_VMR::_Resource_ref_base +{ + using __self_t = basic_any_from_t<__iresource_ref_conversions&>; + + template + using __iprop = __rebind_interface<__iproperty<_Property>, _Super...>; + + template <_CUDA_VMR::_AllocType _Alloc_type> + using __iresource = __rebind_interface< + _CUDA_VSTD:: + conditional_t<_Alloc_type == _CUDA_VMR::_AllocType::_Default, __ibasic_resource<>, __ibasic_async_resource<>>, + _Super...>; + + _CCCL_TEMPLATE(_CUDA_VMR::_AllocType _Alloc_type, class... _Properties) + _CCCL_REQUIRES(_CUDA_VSTD::derived_from<__self_t, __iresource<_Alloc_type>> + && (_CUDA_VSTD::derived_from<__self_t, __iprop<_Properties>> && ...)) + operator _CUDA_VMR::basic_resource_ref<_Alloc_type, _Properties...>() { - return (this->__static_vtable->__equal_fn == __rhs.__static_vtable->__equal_fn) - && this->__static_vtable->__equal_fn(this->_Get_object(), __rhs._Get_object()); + auto& __self = __cudax::basic_any_from(*this); + auto* __vptr = __cudax::virtcall<&__get_resource_vptr<__iresource_ref_conversions>>(this); + auto* __vtag = static_cast<_CUDA_VMR::_Filtered_vtable<_Properties...>*>(nullptr); + auto __props = __cudax::__make_resource_vtable(__basic_any_access::__get_vptr(__self), __vtag); + + return _CUDA_VMR::_Resource_ref_helper::_Construct<_Alloc_type, _Properties...>( + __basic_any_access::__get_optr(__self), + static_cast*>(__vptr), + __props); } - //! @brief Inequality comparison between two \c basic_any_resource - //! @param __rhs The other \c basic_any_resource - //! @return Checks whether both resources have the same equality function stored in their vtable and if so returns - //! the inverse result of that equality comparison. Otherwise returns true. - _CCCL_NODISCARD bool operator!=(const basic_any_resource& __rhs) const + template + using overrides = overrides_for<_Resource, _CUDAX_FNPTR_CONSTANT_WAR(&__get_resource_vptr<_Resource>)>; +}; + +template +using __iresource _CCCL_NODEBUG_ALIAS = + iset<__ibasic_resource<>, + __iproperty_set<_Properties...>, + __iresource_ref_conversions<>, + icopyable<>, + iequality_comparable<>>; + +template +using __iasync_resource _CCCL_NODEBUG_ALIAS = iset<__iresource<_Properties...>, __ibasic_async_resource<>>; + +template +using __try_property_result_t = + _CUDA_VSTD::conditional_t, void>, // + _CUDA_VSTD::optional<__property_result_t<_Property>>, // + bool>; + +template +struct __with_try_get_property +{ + template + _CUDAX_HOST_API _CCCL_NODISCARD_FRIEND auto + try_get_property(const _Derived& __self, _Property) noexcept -> __try_property_result_t<_Property> { - return !(*this == __rhs); + auto __prop = __cudax::dynamic_any_cast*>(&__self); + if constexpr (_CUDA_VSTD::is_same_v<__property_result_t<_Property>, void>) + { + return __prop != nullptr; + } + else if (__prop) + { + return get_property(*__prop, _Property{}); + } + else + { + return _CUDA_VSTD::nullopt; + } } +}; + +template +struct _LIBCUDACXX_DECLSPEC_EMPTY_BASES any_async_resource; - //! @brief Inequality comparison between two \c basic_any_resource - //! @param __rhs The other \c basic_any_resource - //! @return Checks whether both resources have the same equality function stored in their vtable and if so returns - //! the inverse result of that equality comparison. Otherwise returns true. +template +struct _LIBCUDACXX_DECLSPEC_EMPTY_BASES async_resource_ref; + +// `any_resource` wraps any given resource that satisfies the required +// properties. It owns the contained resource, taking care of construction / +// destruction. This makes it especially suited for use in e.g. container types +// that need to ensure that the lifetime of the container exceeds the lifetime +// of the memory resource used to allocate the storage +template +struct _LIBCUDACXX_DECLSPEC_EMPTY_BASES any_resource + : basic_any<__iresource<_Properties...>> + , __with_try_get_property> +{ +private: + static_assert(_CUDA_VMR::__contains_execution_space_property<_Properties...>, + "The properties of cuda::experimental::any_resource must contain at least one execution space " + "property!"); + using __base_t = __cudax::basic_any<__iresource<_Properties...>>; + using __base_t::interface; + +public: + // any_async_resource is convertible to any_resource _CCCL_TEMPLATE(class... _OtherProperties) - _CCCL_REQUIRES((sizeof...(_Properties) == sizeof...(_OtherProperties)) - _CCCL_AND __properties_match<_OtherProperties...>) - _CCCL_NODISCARD bool operator!=(const basic_any_resource<_Alloc_type, _OtherProperties...>& __rhs) const + _CCCL_REQUIRES((_CUDA_VSTD::__type_set_contains_v<_CUDA_VSTD::__type_set<_OtherProperties...>, _Properties...>) ) + any_resource(__cudax::any_async_resource<_OtherProperties...> __other) noexcept + : __base_t(_CUDA_VSTD::move(__other.__base())) + {} + + // Inherit other constructors from basic_any + using __base_t::__base_t; +}; + +// ``any_async_resource`` wraps any given async_resource that satisfies the +// required properties. It owns the contained resource, taking care of +// construction / destruction. This makes it especially suited for use in e.g. +// container types that need to ensure that the lifetime of the container +// exceeds the lifetime of the memory resource used to allocate the storage +template +struct _LIBCUDACXX_DECLSPEC_EMPTY_BASES any_async_resource + : basic_any<__iasync_resource<_Properties...>> + , __with_try_get_property> +{ +private: + static_assert(_CUDA_VMR::__contains_execution_space_property<_Properties...>, + "The properties of cuda::experimental::any_async_resource must contain at least one execution space " + "property!"); + + template + friend struct any_resource; + + using __base_t = __cudax::basic_any<__iasync_resource<_Properties...>>; + using __base_t::interface; + + __base_t& __base() noexcept { - return !(*this == __rhs); + return *this; } - //! @brief Forwards the stateless properties - _CCCL_TEMPLATE(class _Property) - _CCCL_REQUIRES((!property_with_value<_Property>) _CCCL_AND(_CUDA_VSTD::__is_included_in_v<_Property, _Properties...>)) - friend void get_property(const basic_any_resource&, _Property) noexcept {} +public: + // Inherit constructors from basic_any + using __base_t::__base_t; +}; - //! @brief Forwards the stateful properties - _CCCL_TEMPLATE(class _Property) - _CCCL_REQUIRES(property_with_value<_Property> _CCCL_AND(_CUDA_VSTD::__is_included_in_v<_Property, _Properties...>)) - _CCCL_NODISCARD_FRIEND __property_value_t<_Property> get_property(const basic_any_resource& __res, _Property) noexcept +//! @brief Type erased wrapper around a `resource` that satisfies \tparam _Properties +//! @tparam _Properties The properties that any resource wrapped within the `resource_ref` needs to satisfy +template +struct _LIBCUDACXX_DECLSPEC_EMPTY_BASES resource_ref + : basic_any<__iresource<_Properties...>&> + , __with_try_get_property> +{ +private: + static_assert(_CUDA_VMR::__contains_execution_space_property<_Properties...>, + "The properties of cuda::experimental::resource_ref must contain at least one execution space " + "property!"); + using __base_t = __cudax::basic_any<__iresource<_Properties...>&>; + using __base_t::interface; + +public: + // async_resource_ref is convertible to resource_ref + _CCCL_TEMPLATE(class... _OtherProperties) + _CCCL_REQUIRES((_CUDA_VSTD::__type_set_contains_v<_CUDA_VSTD::__type_set<_OtherProperties...>, _Properties...>) ) + resource_ref(__cudax::async_resource_ref<_OtherProperties...> __other) noexcept + : __base_t(__other.__base()) + {} + + // Conversions from the resource_ref types in cuda::mr is not supported. + template + resource_ref(_CUDA_VMR::resource_ref<_OtherProperties...>) = delete; + + template + resource_ref(_CUDA_VMR::async_resource_ref<_OtherProperties...>) = delete; + + // Inherit other constructors from basic_any + using __base_t::__base_t; +}; + +//! @brief Type erased wrapper around a `async_resource` that satisfies \tparam _Properties +//! @tparam _Properties The properties that any async resource wrapped within the `async_resource_ref` needs to satisfy +template +struct _LIBCUDACXX_DECLSPEC_EMPTY_BASES async_resource_ref + : basic_any<__iasync_resource<_Properties...>&> + , __with_try_get_property> +{ +private: + static_assert(_CUDA_VMR::__contains_execution_space_property<_Properties...>, + "The properties of cuda::experimental::async_resource_ref must contain at least one execution space " + "property!"); + + template + friend struct resource_ref; + + using __base_t = __cudax::basic_any<__iasync_resource<_Properties...>&>; + using __base_t::interface; + + __base_t& __base() noexcept { - _CUDA_VMR::_Property_vtable<_Property> const& __prop = __res; - return __prop.__property_fn(__res._Get_object()); + return *this; } + +public: + // Conversions from the resource_ref types in cuda::mr is not supported. + template + async_resource_ref(_CUDA_VMR::async_resource_ref<_OtherProperties...>) = delete; + + // Inherit other constructors from basic_any + using __base_t::__base_t; }; -//! @brief Checks whether a passed in type is a specialization of basic_any_resource -template -_CCCL_INLINE_VAR constexpr bool __is_basic_any_resource<_Ty, basic_any_resource<_Alloc_type, _Properties...>> = true; +_CCCL_TEMPLATE(class... _Properties, class _Resource) +_CCCL_REQUIRES(mr::resource_with<_Resource, _Properties...>) +resource_ref<_Properties...> __as_resource_ref(_Resource& __mr) noexcept +{ + return resource_ref<_Properties...>(__mr); +} + +template +resource_ref<_Properties...> __as_resource_ref(resource_ref<_Properties...> const __mr) noexcept +{ + return __mr; +} + +template +resource_ref<_Properties...> __as_resource_ref(async_resource_ref<_Properties...> const __mr) noexcept +{ + return __mr; +} + +template +mr::resource_ref<_Properties...> +__as_resource_ref(mr::basic_resource_ref<_Alloc_type, _Properties...> const __mr) noexcept +{ + return __mr; +} + +#else // ^^^ !_CCCL_DOXYGEN_INVOKED ^^^ / vvv _CCCL_DOXYGEN_INVOKED vvv + +enum class _ResourceKind +{ + _Synchronous, + _Asynchronous +}; + +//! @rst +//! Type erased wrapper around a `resource` or an `async_resource` +//! -------------------------------------------------------------- +//! +//! ``basic_any_resource`` wraps any given :ref:`resource +//! ` that satisfies the +//! required properties. It owns the contained resource, taking care of +//! construction / destruction. This makes it especially suited for use in e.g. +//! container types that need to ensure that the lifetime of the container +//! exceeds the lifetime of the memory resource used to allocate the storage +//! +//! ``basic_any_resource`` models the ``cuda::std::regular`` concept. +//! @endrst +//! +//! @tparam _Kind Either `_ResourceKind::_Synchronous` for `any_resource`, or +//! `_ResourceKind::_Asynchronous` for `any_async_resource`. +//! @tparam _Properties A pack of property types that a memory resource must +//! provide in order to be storable in instances of this `basic_any_resource` +//! type. +//! +//! @sa any_resource +//! @sa any_async_resource +//! @sa resource_ref +//! @sa async_resource_ref +template <_ResourceKind _Kind, class... _Properties> +class basic_any_resource +{ +public: + //! @brief Constructs a \c basic_any_resource with no value + //! @post `has_value()` is `false` + basic_any_resource() noexcept; + + //! @brief Move constructs a \c basic_any_resource + //! @post `has_value()` is `true` if `__other` had a value prior to the move, + //! and `false` otherwise. `__other.has_value()` is `false`. + basic_any_resource(basic_any_resource&& __other) noexcept; + + //! @brief Copy constructs a \c basic_any_resource + //! @post `has_value()` is the same as `__other.has_value()`. + basic_any_resource(const basic_any_resource& __other); + + //! @brief Constructs a \c basic_any_resource from a type that satisfies the + //! \c resource concept. + //! and that supports all of the specified properties. + //! @param __res The resource to be wrapped by the \c basic_any_resource. + //! @pre \c _Resource is not a specialization of \c basic_any_resource or + //! \c basic_resource_ref, or a type derived from such. + //! @pre `resource_with<_Resource, _Properties...>` is `true`. + //! @pre If \c _Kind is \c _ResourceKind::_Asynchronous, + //! `async_resource_with<_Resource, _Properties...>` is `true`. + //! @post `has_value()` is `true` + template + basic_any_resource(_Resource __res); + + //! @brief Conversion from a type-erased resource with a superset of the + //! required properties. + //! @param __res The object to copy from. + //! @pre \c _OtherKind is equal to either \c _Kind or + //! \c _ResourceKind::_Asynchronous. + //! @pre The set `_OtherProperties...` is a superset of `_Properties...`. + //! @post `has_value()` is equal to `__res.has_value()` + template <_ResourceKind _OtherKind, class... _OtherProperties> + basic_any_resource(basic_any_resource<_OtherKind, _OtherProperties...> __res); + + //! @brief Deep copy from a type-erased resource reference with a superset + //! of the required properties. + //! + //! The object to which \c __res refers is copied into `*this`. + //! @param __res The reference to copy from. + //! @pre \c _OtherKind is equal to either \c _Kind or + //! \c _ResourceKind::_Asynchronous. + //! @pre The set `_OtherProperties...` is a superset of `_Properties...`. + template <_ResourceKind _OtherKind, class... _OtherProperties> + basic_any_resource(basic_resource_ref<_OtherKind, _OtherProperties...> __res); + + //! @brief Move assigns a \c basic_any_resource + //! @post `has_value()` is `true` if `__other` had a value prior to the move, + //! and `false` otherwise. + //! @post `__other.has_value()` is `false`. + basic_any_resource& operator=(basic_any_resource&& __other) noexcept; + + //! @brief Copy assigns a \c basic_any_resource + //! @post `has_value()` is the same as `__other.has_value()`. + basic_any_resource& operator=(const basic_any_resource& __other); + + //! @brief Assigns from a type that satisfies the \c resource concept and that + //! supports all of the specified properties. + //! @param __res The resource to be wrapped within the \c basic_any_resource + //! @pre \c _Resource is not a specialization of \c basic_any_resource or + //! \c basic_resource_ref, or a type derived from such. + //! @pre `resource_with<_Resource, _Properties...>` is `true`. + //! @pre If \c _Kind is \c _ResourceKind::_Asynchronous, + //! `async_resource_with<_Resource, _Properties...>` is `true`. + //! @post `has_value()` is `true` + template + basic_any_resource& operator=(_Resource __res); + + //! @brief Assignment from a type-erased resource with a superset of the + //! required properties. + //! @param __res The object to copy from. + //! @pre \c _OtherKind is equal to either \c _Kind or + //! \c _ResourceKind::_Asynchronous. + //! @pre The set `_OtherProperties...` is a superset of `_Properties...`. + //! @post `has_value()` is equal to `__res.has_value()`. + template <_ResourceKind _OtherKind, class... _OtherProperties> + basic_any_resource& operator=(basic_any_resource<_OtherKind, _OtherProperties...> __res); + + //! @brief Deep copy from a type-erased resource reference with a superset of + //! the required properties. + //! @param __res The type-erased resource reference to copy from. + //! @pre \c _OtherKind is equal to either \c _Kind or + //! \c _ResourceKind::_Asynchronous. + //! @pre The set `_OtherProperties...` is a superset of `_Properties...`. + //! @post `has_value()` is `true`. + template <_ResourceKind _OtherKind, class... _OtherProperties> + basic_any_resource& operator=(basic_resource_ref<_OtherKind, _OtherProperties...> __res); + + //! @brief Equality comparison between two type-erased memory resource + //! @param __rhs The type-erased resource to compare with `*this`. + //! @pre \c _OtherKind is equal to either \c _Kind or + //! \c _ResourceKind::_Asynchronous. + //! @pre The set `_Properties...` is equal to the set `_OtherProperties...`. + //! @return `true` if both resources hold objects of the same type and those + //! objects compare equal, and `false` otherwise. + template <_ResourceKind _OtherKind, class... _OtherProperties> + [[nodiscard]] bool operator==(const basic_any_resource<_OtherKind, _OtherProperties...>& __rhs) const; + + //! @brief Equality comparison between `*this` and a type-erased resource + //! reference. + //! @param __rhs The type-erased resource reference to compare with `*this`. + //! @pre \c _OtherKind is equal to either \c _Kind or + //! \c _ResourceKind::_Asynchronous. + //! @pre The set `_Properties...` is equal to the set `_OtherProperties...`. + //! @return `true` if \c __rhs refers to an object of the same type as that + //! wrapped by `*this` and those objects compare equal; `false` otherwise. + template <_ResourceKind _OtherKind, class... _OtherProperties> + [[nodiscard]] bool operator==(const basic_resource_ref<_OtherKind, _OtherProperties...>& __rhs) const; + + //! @brief Calls `allocate` on the wrapped object with the specified + //! arguments. + //! @pre `has_value()` is `true`. + //! @return `obj.allocate(__size, __align)`, where `obj` is the wrapped + //! object. + [[nodiscard]] void* allocate(size_t __size, size_t __align = alignof(cuda::std::max_align_t)); + + //! @brief Calls `deallocate` on the wrapped object with the specified + //! arguments. + //! @pre `has_value()` is `true`. + //! @pre `__pv` must be a pointer that was previously returned by a call to \c + //! allocate on the object wrapped by `*this`. + //! @return `obj.deallocate(__pv, __size, __align)`, where `obj` is the + //! wrapped object. + void deallocate(void* __pv, size_t __size, size_t __align = alignof(cuda::std::max_align_t)); + + //! @brief Calls `allocate_async` on the wrapped object with the specified + //! arguments. + //! @pre `_Kind` is `_ResourceKind::_Asynchronous`. + //! @pre `has_value()` is `true`. + //! @return `obj.allocate_async(__size, __align, __stream)`, where `obj` is + //! the wrapped object. + //! @warning The returned pointer is not valid until `__stream` has been + //! synchronized. + [[nodiscard]] void* allocate_async(size_t __size, size_t __align, cuda::stream_ref __stream); + + //! @brief Equivalent to `allocate_async(__size, + //! alignof(_CUDA_VSTD::max_align_t), __stream)`. + [[nodiscard]] void* allocate_async(size_t __size, cuda::stream_ref __stream); + + //! @brief Calls `deallocate_async` on the wrapped object with the specified + //! arguments. + //! @pre `_Kind` is `_ResourceKind::_Asynchronous`. + //! @pre `has_value()` is `true`. + //! @pre `__pv` must be a pointer that was previously returned by a call to + //! \c allocate_async on the object wrapped by `*this`. + //! @return `obj.deallocate_async(__pv, __size, __align, __stream)`, where + //! `obj` is the wrapped object. + void deallocate_async(void* __pv, size_t __size, size_t __align, cuda::stream_ref __stream); + + //! @brief Equivalent to `deallocate_async(__pv, __size, + //! alignof(_CUDA_VSTD::max_align_t), __stream)`. + void deallocate_async(void* __pv, size_t __size, cuda::stream_ref __stream); + + //! @brief Checks if `*this` holds a value. + //! @return `true` if `*this` holds a value; `false` otherwise. + [[nodiscard]] bool has_value() const noexcept; + + //! @brief Resets `*this` to the empty state. + //! @post `has_value() == false` + void reset() noexcept; + + //! @return A reference to the \c type_info object for the wrapped + //! resource, or `typeid(void)` if `has_value()` is `false`. + [[nodiscard]] const cuda::std::type_info& type() const noexcept; + + //! @brief Forwards a property query to the type-erased object. + //! @param __res The \c basic_any_resource object + //! @param __prop The property to query + //! @pre The type \c _Property is one of the types in the pack + //! `_Properties...`. + //! @return The result of calling `get_property(__obj, __prop)`, where `__obj` + //! is the type-erased object stored in `__res`. + template + friend decltype(auto) get_property(const basic_any_resource& __res, _Property __prop) noexcept; + + //! @brief Attempts to forward a property query to the type-erased object and + //! returns a _`boolean-testable`_ object that contains the result, if any. + //! + //! @tparam _Property + //! @param __res The \c basic_any_resource object + //! @param __prop The property to query + //! @pre `has_value()` is `true`. + //! @return + //! Let: + //! - \c obj be the wrapped object. + //! - \c ValueType be the associated value type of \c __prop. + //! - \c ReturnType be \c bool if \c ValueType is \c void. Otherwise, + //! \c ReturnType is \c cuda::std::optional. + //! - \c _OtherProperties be the pack of type parameters of the + //! \c basic_any_resource object that first type-erased \c obj. [_Note:_ + //! `_OtherProperties` is different than `_Properties` when \c *this is + //! the result of a conversion from a different \c basic_any type. -- end + //! note] + //! . + //! `try_get_property(__res, __prop)` has type \c ReturnType. If \c _Property + //! is not in the pack \c _OtherProperties, returns `ReturnType()`. + //! Otherwise: + //! - Returns \c true if \c ValueType is \c void. + //! - Returns `ReturnType(get_property(obj, __prop))` otherwise. + template + friend auto try_get_property(const basic_any_resource& __res, _Property __prop) noexcept; +}; + +//! @brief Type erased wrapper around a reference to an object that satisfies +//! the \c resource concept and that provides the requested \c _Properties. +//! @tparam _Properties The properties that any resource wrapped within the +//! `basic_resource_ref` needs to provide. +//! +//! ``basic_resource_ref`` models the ``cuda::std::copyable`` and +//! ``cuda::std::equality_comparable`` concepts. +template <_ResourceKind _Kind, class... _Properties> +class basic_resource_ref +{ +public: + //! @brief Copy constructs a \c basic_resource_ref + //! @post `*this` and `__other` both refer to the same resource object. + basic_resource_ref(const basic_resource_ref& __other); + + //! @brief Constructs a \c basic_resource_ref from a reference to a type that + //! satisfies the \c resource concept and that supports all of the specified + //! properties. + //! @param __res The resource reference to be wrapped. + //! @pre `resource_with<_Resource, _Properties...>` is `true`. + //! @pre If \c _Kind is \c _ResourceKind::_Asynchronous, + //! `async_resource_with<_Resource, _Properties...>` is `true`. + //! @pre If \c __res refers to a specialization of \c basic_any_resource or + //! a type derived from such, `__res.has_value()` is `true`. + template + basic_resource_ref(_Resource& __res); + + //! @brief Conversion from type-erased resource reference with a superset + //! of the required properties. + //! @param __res The other type-erased resource reference to copy from. + //! @pre \c _OtherKind is equal to either \c _Kind or + //! \c _ResourceKind::_Asynchronous. + //! @pre The set `_OtherProperties...` is a superset of `_Properties...`. + //! @post `*this` and `__res` both refer to the same resource object. + template <_ResourceKind _OtherKind, class... _OtherProperties> + basic_resource_ref(basic_resource_ref<_OtherKind, _OtherProperties...> __res); + + //! @brief Rebinds `*this` to refer to the object to which `__other` refers. + //! @post `*this` and `__other` both refer to the same resource object. + basic_resource_ref& operator=(const basic_resource_ref& __other); + + //! @brief Rebinds the wrapped reference to an object whose type satisfies the + //! \c resource concept and that supports all of the specified properties. + //! @param __res The reference to the resource to be wrapped by the \c + //! basic_resource_ref. + //! @pre `resource_with<_Resource, _Properties...>` is `true`. + //! @pre If \c _Kind is \c _ResourceKind::_Asynchronous, + //! `async_resource_with<_Resource, _Properties...>` is `true`. + //! @pre If \c __res refers to a specialization of \c basic_any_resource or a + //! type derived from such, `__res.has_value()` is `true`. + template + basic_resource_ref& operator=(_Resource& __res); + + //! @brief Rebinds `*this` to refer to the object to which `__other` refers. + //! @param __res The other type-erased resource reference to copy from. + //! @pre \c _OtherKind is equal to either \c _Kind or + //! \c _ResourceKind::_Asynchronous. + //! @pre The set `_OtherProperties...` is a superset of `_Properties...`. + //! @post `*this` and `__res` both refer to the same resource object. + template <_ResourceKind _OtherKind, class... _OtherProperties> + basic_resource_ref& operator=(basic_resource_ref<_OtherKind, _OtherProperties...> __res); + + //! @brief Equality comparison between two type-erased resource references. + //! @param __rhs The other type-erased resource reference. + //! @pre \c _OtherKind is equal to either \c _Kind or + //! \c _ResourceKind::_Asynchronous. + //! @pre The set `_Properties...` is equal to the set `_OtherProperties...`. + //! @return `true` if both resources refer to objects of the same type and + //! those objects compare equal. Otherwise, returns `false`. + template <_ResourceKind _OtherKind, class... _OtherProperties> + [[nodiscard]] bool operator==(const basic_resource_ref<_OtherKind, _OtherProperties...>& __rhs) const; + + //! @brief Calls `allocate` on the wrapped reference with the specified + //! arguments. + //! @return `obj.allocate(__size, __align)`, where `obj` is the wrapped + //! reference. + [[nodiscard]] void* allocate(size_t __size, size_t __align = alignof(cuda::std::max_align_t)); + + //! @brief Calls `deallocate` on the wrapped reference with the specified + //! arguments. + //! @pre `__pv` must be a pointer that was previously returned by a call to + //! \c allocate on the object referenced by `*this`. + //! @return `obj.deallocate(__pv, __size, __align)`, where `obj` is the + //! wrapped reference. + void deallocate(void* __pv, size_t __size, size_t __align = alignof(cuda::std::max_align_t)); + + //! @brief Calls `allocate_async` on the wrapped reference with the specified + //! arguments. + //! @pre `_Kind` is `_ResourceKind::_Asynchronous`. + //! @return `obj.allocate_async(__size, __align, __stream)`, where `obj` is + //! the wrapped reference. + //! @warning The returned pointer is not valid until `__stream` has been + //! synchronized. + [[nodiscard]] void* allocate_async(size_t __size, size_t __align, cuda::stream_ref __stream); + + //! @brief Equivalent to `allocate_async(__size, + //! alignof(_CUDA_VSTD::max_align_t), __stream)`. + [[nodiscard]] void* allocate_async(size_t __size, cuda::stream_ref __stream); + + //! @brief Calls `deallocate_async` on the wrapped reference with the specified + //! arguments. + //! @pre `_Kind` is `_ResourceKind::_Asynchronous`. + //! @pre `__pv` must be a pointer that was previously returned by a call to \c + //! allocate_async on the object referenced by `*this`. + //! @return `obj.deallocate_async(__pv, __size, __align, __stream)`, where + //! `obj` is the wrapped reference. + void deallocate_async(void* __pv, size_t __size, size_t __align, cuda::stream_ref __stream); + + //! @brief Equivalent to `deallocate_async(__pv, __size, + //! alignof(_CUDA_VSTD::max_align_t), __stream)`. + void deallocate_async(void* __pv, size_t __size, cuda::stream_ref __stream); + + //! @return A reference to the \c type_info object for the type of the object + //! to which `*this` refers. + [[nodiscard]] const cuda::std::type_info& type() const noexcept; + + //! @brief Forwards a property query to the type-erased reference. + //! @tparam _Property + //! @param __res The \c basic_resource_ref object + //! @param __prop The property to query + //! @pre \c _Property is a type in `_Properties...`. + //! @return The result of calling `get_property(__obj, __prop)`, where `__obj` + //! is the type-erased reference stored in `__res`. + template + friend decltype(auto) get_property(const basic_resource_ref& __res, _Property __prop) noexcept; + + //! @brief Attempts to forward a property query to the type-erased object and + //! returns a _`boolean-testable`_ object that contains the result, if any. + //! + //! @tparam _Property + //! @param __res The \c any_resource object + //! @param __prop The property to query + //! @pre `has_value()` is `true`. + //! @return + //! Let: + //! - \c obj be the wrapped reference. + //! - \c ValueType be the associated value type of \c __prop. + //! - \c ReturnType be \c bool if \c ValueType is \c void. Otherwise, + //! \c ReturnType is \c cuda::std::optional. + //! - \c _OtherProperties be the pack of type parameters of the wrapper type + //! that first type-erased \c obj. [_Note:_ `_OtherProperties` is + //! different than `_Properties` when \c *this is the result of an + //! interface-narrowing conversion. -- end note] + //! . + //! `try_get_property(__res, __prop)` has type \c ReturnType. If \c _Property + //! is not in the pack \c _OtherProperties, returns `ReturnType()`. + //! Otherwise: + //! - Returns \c true if \c ValueType is \c void. + //! - Returns `ReturnType(get_property(obj, __prop))` otherwise. + template + friend auto try_get_property(const basic_resource_ref& __res, _Property __prop) noexcept; +}; //! @rst //! .. _cudax-memory-resource-any-resource: @@ -302,29 +843,53 @@ _CCCL_INLINE_VAR constexpr bool __is_basic_any_resource<_Ty, basic_any_resource< //! Type erased wrapper around a `resource` //! ---------------------------------------- //! -//! ``any_resource`` wraps any given :ref:`resource ` that -//! satisfies the required properties. It owns the contained resource, taking care of construction / destruction. -//! This makes it especially suited for use in e.g. container types that need to ensure that the lifetime of the -//! container exceeds the lifetime of the memory resource used to allocate the storage +//! ``any_resource`` wraps any given :ref:`resource +//! ` that satisfies the +//! required properties. It owns the contained resource, taking care of +//! construction / destruction. This makes it especially suited for use in e.g. +//! container types that need to ensure that the lifetime of the container +//! exceeds the lifetime of the memory resource used to allocate the storage +//! +//! ``any_resource`` models the ``cuda::std::regular`` concept. //! //! @endrst template -using any_resource = basic_any_resource<_CUDA_VMR::_AllocType::_Default, _Properties...>; +using any_resource = basic_any_resource<_ResourceKind::_Synchronous, _Properties...>; //! @rst //! .. _cudax-memory-resource-any-async-resource: //! //! Type erased wrapper around an `async_resource` -//! ----------------------------------------------- +//! ---------------------------------------------- +//! +//! ``any_async_resource`` wraps any given :ref:`async_resource +//! ` that satisfies the +//! required properties. It owns the contained resource, taking care of +//! construction / destruction. This makes it especially suited for use in e.g. +//! container types that need to ensure that the lifetime of the container +//! exceeds the lifetime of the memory resource used to allocate the storage //! -//! ``any_async_resource`` wraps any given :ref:`async resource ` -//! that satisfies the required properties. It owns the contained resource, taking care of construction / destruction. -//! This makes it especially suited for use in e.g. container types that need to ensure that the lifetime of the -//! container exceeds the lifetime of the memory resource used to allocate the storage +//! ``any_async_resource`` models the ``cuda::std::regular`` concept. //! //! @endrst template -using any_async_resource = basic_any_resource<_CUDA_VMR::_AllocType::_Async, _Properties...>; +using any_async_resource = basic_any_resource<_ResourceKind::_Asynchronous, _Properties...>; + +//! @brief Type erased wrapper around a `resource` that satisfies \c +//! _Properties. +//! @tparam _Properties The properties that any resource wrapped within the +//! `resource_ref` needs to satisfy +template +using resource_ref = basic_resource_ref<_ResourceKind::_Synchronous, _Properties...>; + +//! @brief Type erased wrapper around a `async_resource` that satisfies \c +//! _Properties +//! @tparam _Properties The properties that any async resource wrapped within +//! the `async_resource_ref` needs to satisfy +template +using async_resource_ref = basic_resource_ref<_ResourceKind::_Asynchronous, _Properties...>; + +#endif // _CCCL_DOXYGEN_INVOKED //! @rst //! .. _cudax-memory-resource-make-any-resource: @@ -332,11 +897,14 @@ using any_async_resource = basic_any_resource<_CUDA_VMR::_AllocType::_Async, _Pr //! Factory function for `any_resource` objects //! ------------------------------------------- //! -//! ``make_any_resource`` constructs an :ref:`any_resource ` object that wraps a -//! newly constructed instance of the given resource type. The resource type must satisfy the ``cuda::mr::resource`` -//! concept and provide all of the properties specified in the template parameter pack. +//! ``make_any_resource`` constructs an :ref:`any_resource +//! ` object that wraps a newly constructed +//! instance of the given resource type. The resource type must satisfy the +//! ``cuda::mr::resource`` concept and provide all of the properties specified +//! in the template parameter pack. //! -//! @param __args The arguments used to construct the instance of the resource type. +//! @param __args The arguments used to construct the instance of the resource +//! type. //! //! @endrst template @@ -344,7 +912,7 @@ auto make_any_resource(_Args&&... __args) -> any_resource<_Properties...> { static_assert(_CUDA_VMR::resource<_Resource>, "_Resource does not satisfy the cuda::mr::resource concept"); static_assert(_CUDA_VMR::resource_with<_Resource, _Properties...>, - "Resource does not satisfy the required properties"); + "The provided _Resource type does not support the requested properties"); return any_resource<_Properties...>{_CUDA_VSTD::in_place_type<_Resource>, _CUDA_VSTD::forward<_Args>(__args)...}; } @@ -354,11 +922,14 @@ auto make_any_resource(_Args&&... __args) -> any_resource<_Properties...> //! Factory function for `any_async_resource` objects //! ------------------------------------------------- //! -//! ``make_any_async_resource`` constructs an :ref:`any_async_resource ` -//! object that wraps a newly constructed instance of the given resource type. The resource type must satisfy the -//! ``cuda::mr::async_resource`` concept and provide all of the properties specified in the template parameter pack. +//! ``make_any_async_resource`` constructs an :ref:`any_async_resource +//! ` object that wraps a newly +//! constructed instance of the given resource type. The resource type must +//! satisfy the ``cuda::mr::async_resource`` concept and provide all of the +//! properties specified in the template parameter pack. //! -//! @param __args The arguments used to construct the instance of the resource type. +//! @param __args The arguments used to construct the instance of the resource +//! type. //! //! @endrst template @@ -367,10 +938,12 @@ auto make_any_async_resource(_Args&&... __args) -> any_async_resource<_Propertie static_assert(_CUDA_VMR::async_resource<_Resource>, "_Resource does not satisfy the cuda::mr::async_resource concept"); static_assert(_CUDA_VMR::async_resource_with<_Resource, _Properties...>, - "Resource does not satisfy the required properties"); + "The provided _Resource type does not support the requested properties"); return any_async_resource<_Properties...>{_CUDA_VSTD::in_place_type<_Resource>, _CUDA_VSTD::forward<_Args>(__args)...}; } } // namespace cuda::experimental -#endif //_CUDAX__MEMORY_RESOURCE_ANY_RESOURCE_H +_CCCL_POP_MACROS + +#endif // _CUDAX__MEMORY_RESOURCE_ANY_RESOURCE_H diff --git a/cudax/include/cuda/experimental/__memory_resource/device_memory_pool.cuh b/cudax/include/cuda/experimental/__memory_resource/device_memory_pool.cuh index 431e210de84..9dce7ef8f86 100644 --- a/cudax/include/cuda/experimental/__memory_resource/device_memory_pool.cuh +++ b/cudax/include/cuda/experimental/__memory_resource/device_memory_pool.cuh @@ -31,12 +31,12 @@ # include # include -# include # include # include # include # include +# include # include # if _CCCL_STD_VER >= 2014 diff --git a/cudax/include/cuda/experimental/__memory_resource/device_memory_resource.cuh b/cudax/include/cuda/experimental/__memory_resource/device_memory_resource.cuh index 7dce44ecbaa..2f727fd5dc1 100644 --- a/cudax/include/cuda/experimental/__memory_resource/device_memory_resource.cuh +++ b/cudax/include/cuda/experimental/__memory_resource/device_memory_resource.cuh @@ -31,7 +31,6 @@ # include # include -# include # include # include # include @@ -39,6 +38,7 @@ # include # include +# include # include # include # include @@ -317,8 +317,8 @@ private: { if constexpr (has_property<_Resource, device_accessible>) { - return _CUDA_VMR::resource_ref{const_cast(this)} - == _CUDA_VMR::resource_ref{const_cast<_Resource&>(__rhs)}; + return resource_ref{*const_cast(this)} + == __cudax::__as_resource_ref(const_cast<_Resource&>(__rhs)); } else { diff --git a/cudax/include/cuda/experimental/__memory_resource/managed_memory_resource.cuh b/cudax/include/cuda/experimental/__memory_resource/managed_memory_resource.cuh index a7c286ab644..b583450d414 100644 --- a/cudax/include/cuda/experimental/__memory_resource/managed_memory_resource.cuh +++ b/cudax/include/cuda/experimental/__memory_resource/managed_memory_resource.cuh @@ -28,11 +28,11 @@ #include #include #include -#include #include #include #include +#include #include //! @file @@ -165,15 +165,15 @@ private: template _CCCL_NODISCARD bool __equal_to(_Resource const& __rhs) const noexcept { - if constexpr (has_property<_Resource, mr::device_accessible>) + if constexpr (has_property<_Resource, device_accessible>) { - return _CUDA_VMR::resource_ref{const_cast(this)} - == _CUDA_VMR::resource_ref{const_cast<_Resource&>(__rhs)}; + return resource_ref{*const_cast(this)} + == __cudax::__as_resource_ref(const_cast<_Resource&>(__rhs)); } - else if constexpr (has_property<_Resource, mr::host_accessible>) + else if constexpr (has_property<_Resource, host_accessible>) { - return _CUDA_VMR::resource_ref{const_cast(this)} - == _CUDA_VMR::resource_ref{const_cast<_Resource&>(__rhs)}; + return resource_ref{*const_cast(this)} + == __cudax::__as_resource_ref(const_cast<_Resource&>(__rhs)); } else { @@ -224,9 +224,9 @@ public: # endif // _CCCL_STD_VER <= 2017 //! @brief Enables the \c device_accessible property - friend constexpr void get_property(managed_memory_resource const&, mr::device_accessible) noexcept {} + friend constexpr void get_property(managed_memory_resource const&, device_accessible) noexcept {} //! @brief Enables the \c host_accessible property - friend constexpr void get_property(managed_memory_resource const&, mr::host_accessible) noexcept {} + friend constexpr void get_property(managed_memory_resource const&, host_accessible) noexcept {} #endif // _CCCL_DOXYGEN_INVOKED //! @brief Checks whether the passed in alignment is valid @@ -236,8 +236,8 @@ public: && (_CUDA_VMR::default_cuda_malloc_alignment % __alignment == 0); } }; -static_assert(_CUDA_VMR::async_resource_with, ""); -static_assert(_CUDA_VMR::async_resource_with, ""); +static_assert(_CUDA_VMR::async_resource_with, ""); +static_assert(_CUDA_VMR::async_resource_with, ""); } // namespace cuda::experimental diff --git a/cudax/include/cuda/experimental/__memory_resource/pinned_memory_resource.cuh b/cudax/include/cuda/experimental/__memory_resource/pinned_memory_resource.cuh index fa3d2c44f8a..68ccaf2344b 100644 --- a/cudax/include/cuda/experimental/__memory_resource/pinned_memory_resource.cuh +++ b/cudax/include/cuda/experimental/__memory_resource/pinned_memory_resource.cuh @@ -29,11 +29,11 @@ #include #include #include -#include #include #include #include +#include #include //! @file @@ -166,13 +166,13 @@ public: { if constexpr (has_property<_Resource, device_accessible>) { - return _CUDA_VMR::resource_ref{const_cast(this)} - == _CUDA_VMR::resource_ref{const_cast<_Resource&>(__rhs)}; + return resource_ref{*const_cast(this)} + == __cudax::__as_resource_ref(const_cast<_Resource&>(__rhs)); } else if constexpr (has_property<_Resource, host_accessible>) { - return _CUDA_VMR::resource_ref{const_cast(this)} - == _CUDA_VMR::resource_ref{const_cast<_Resource&>(__rhs)}; + return resource_ref{*const_cast(this)} + == __cudax::__as_resource_ref(const_cast<_Resource&>(__rhs)); } else { diff --git a/cudax/include/cuda/experimental/__memory_resource/shared_resource.cuh b/cudax/include/cuda/experimental/__memory_resource/shared_resource.cuh index 03bae6c4b0f..4c71f5cfe2e 100644 --- a/cudax/include/cuda/experimental/__memory_resource/shared_resource.cuh +++ b/cudax/include/cuda/experimental/__memory_resource/shared_resource.cuh @@ -121,7 +121,7 @@ struct shared_resource //! held by this \c shared_resource object is released, while the reference held by \c __other //! is transferred to this object. //! @param __other The \c shared_resource object to move from. - /// @post \c __other is left in a valid but unspecified state. + //! @post \c __other is left in a valid but unspecified state. shared_resource& operator=(shared_resource&& __other) noexcept { if (this != &__other) diff --git a/cudax/include/cuda/experimental/__utility/basic_any/basic_any_base.cuh b/cudax/include/cuda/experimental/__utility/basic_any/basic_any_base.cuh index 248c3b05e01..b5b4e622a6f 100644 --- a/cudax/include/cuda/experimental/__utility/basic_any/basic_any_base.cuh +++ b/cudax/include/cuda/experimental/__utility/basic_any/basic_any_base.cuh @@ -43,12 +43,23 @@ _CUDAX_HOST_API auto __is_basic_any_test(basic_any<_Interface>&) -> basic_any<_I template _CUDAX_HOST_API auto __is_basic_any_test(basic_any<_Interface> const&) -> basic_any<_Interface> const&; +#if _CCCL_COMPILER(CLANG, <, 12) || _CCCL_COMPILER(GCC, <, 11) +// Older versions of clang and gcc need help disambiguating between +// basic_any<__ireference> and basic_any. +template +_CUDAX_HOST_API auto __is_basic_any_test(basic_any<_Interface&>&&) -> basic_any<_Interface&>&&; +template +_CUDAX_HOST_API auto __is_basic_any_test(basic_any<_Interface&>&) -> basic_any<_Interface&>&; +template +_CUDAX_HOST_API auto __is_basic_any_test(basic_any<_Interface&> const&) -> basic_any<_Interface&> const&; +#endif + // clang-format off template _CCCL_CONCEPT __is_basic_any = _CCCL_REQUIRES_EXPR((_Tp), _Tp& __value) ( - __is_basic_any_test(__value) + __cudax::__is_basic_any_test(__value) ); // clang-format on diff --git a/cudax/include/cuda/experimental/__utility/basic_any/conversions.cuh b/cudax/include/cuda/experimental/__utility/basic_any/conversions.cuh index b447d27c244..412604c6601 100644 --- a/cudax/include/cuda/experimental/__utility/basic_any/conversions.cuh +++ b/cudax/include/cuda/experimental/__utility/basic_any/conversions.cuh @@ -23,9 +23,10 @@ # pragma system_header #endif // no system header -#include +#include #include #include +#include #include #include @@ -43,97 +44,118 @@ namespace cuda::experimental //! cvref qualified basic_any types to archetype types, and then using //! the built-in language rules to determine if the conversion is valid. //! -struct __immovable_archetype +template +struct __archetype; + +// Archetype for interfaces that extend neither imovable nor icopyable +template <> +struct __archetype // immovable archetype { - __immovable_archetype() = default; - __immovable_archetype(__immovable_archetype&&) = delete; - __immovable_archetype(const __immovable_archetype&) = delete; + __archetype() = default; + __archetype(__archetype&&) = delete; + __archetype(const __archetype&) = delete; template - _CUDAX_HOST_API __immovable_archetype(_Value) noexcept; + _CUDAX_HOST_API __archetype(_Value) noexcept; template - _CUDAX_HOST_API __immovable_archetype(_Value*) = delete; + _CUDAX_HOST_API __archetype(_Value*) = delete; }; -struct __movable_archetype : __immovable_archetype +// Archetype for interfaces that extend imovable but not icopyable +template <> +struct __archetype : __archetype // movable archetype { - __movable_archetype() = default; - _CUDAX_HOST_API __movable_archetype(__movable_archetype&&) noexcept; - __movable_archetype(const __movable_archetype&) = delete; + __archetype() = default; + _CUDAX_HOST_API __archetype(__archetype&&) noexcept; + __archetype(const __archetype&) = delete; }; -struct __copyable_archetype : __movable_archetype +// Archetype for interfaces that extend icopyable +template <> +struct __archetype : __archetype { - __copyable_archetype() = default; - _CUDAX_HOST_API __copyable_archetype(__copyable_archetype const&); + __archetype() = default; + _CUDAX_HOST_API __archetype(__archetype const&); }; template -using _archetype_base = _CUDA_VSTD::conditional_t< - extension_of<_Interface, icopyable<>>, - __copyable_archetype, - _CUDA_VSTD::conditional_t>, __movable_archetype, __immovable_archetype>>; +using __archetype_t _CCCL_NODEBUG_ALIAS = + __archetype>, extension_of<_Interface, icopyable<>>>; + +// Strip top-level cv- and ref-qualifiers from pointer types: +template +auto __normalize(_Ty&&) -> _Ty +{} +template +auto __normalize(_Ty*) -> _Ty* +{} +template +using __normalize_t _CCCL_NODEBUG_ALIAS = decltype(__cudax::__normalize(declval<_Ty>())); + +// Used to map a basic_any specialization to a normalized interface type: +template +extern _CUDA_VSTD::__undefined<_Ty> __interface_from; template -_CUDAX_HOST_API auto __interface_from(basic_any<_Interface>&&) -> _Interface; +extern _Interface __interface_from>; template -_CUDAX_HOST_API auto __interface_from(basic_any<__ireference<_Interface>>&&) -> _Interface; +extern _Interface __interface_from>>; template -_CUDAX_HOST_API auto __interface_from(basic_any<_Interface>&) -> _Interface&; +extern _Interface& __interface_from&>; template -_CUDAX_HOST_API auto __interface_from(basic_any<_Interface> const&) -> _Interface const&; +extern _Interface const& __interface_from const&>; template -_CUDAX_HOST_API auto __interface_from(basic_any<_Interface>*) -> _Interface*; +extern _Interface* __interface_from*>; template -_CUDAX_HOST_API auto __interface_from(basic_any<_Interface> const*) -> _Interface const*; +extern _Interface const* __interface_from const*>; template -_CUDAX_HOST_API auto __interface_from(basic_any<__ireference<_Interface>>*) -> _Interface*; +extern _Interface* __interface_from>*>; template -_CUDAX_HOST_API auto __interface_from(basic_any<__ireference<_Interface>> const*) -> _Interface*; +extern _Interface* __interface_from> const*>; +// Used to map a normalized interface type to an archetype for conversion testing: template -_CUDAX_HOST_API auto __as_archetype(_Interface&&) -> _archetype_base<_Interface>; +extern __archetype_t<_Interface> __as_archetype; template -_CUDAX_HOST_API auto __as_archetype(_Interface&) -> _archetype_base<_Interface>&; +extern __archetype_t<_Interface>& __as_archetype<_Interface&>; template -_CUDAX_HOST_API auto __as_archetype(_Interface const&) -> _archetype_base<_Interface> const&; +extern __archetype_t<_Interface> const& __as_archetype<_Interface const&>; template -_CUDAX_HOST_API auto __as_archetype(_Interface*) -> _archetype_base<_Interface>*; +extern __archetype_t<_Interface>* __as_archetype<_Interface*>; template -_CUDAX_HOST_API auto __as_archetype(_Interface const*) -> _archetype_base<_Interface> const*; +extern __archetype_t<_Interface> const* __as_archetype<_Interface const*>; template -_CUDAX_HOST_API auto __as_archetype(__ireference<_Interface>) -> _archetype_base<_Interface>&; +extern __archetype_t<_Interface>& __as_archetype<__ireference<_Interface>>; template -_CUDAX_HOST_API auto __as_archetype(__ireference<_Interface const>) -> _archetype_base<_Interface> const&; +extern __archetype_t<_Interface> const& __as_archetype<__ireference<_Interface const>>; -template -_CUDAX_HOST_API auto __as_immovable(_Interface&&) -> __immovable_archetype; -template -_CUDAX_HOST_API auto __as_immovable(_Interface&) -> __immovable_archetype&; -template -_CUDAX_HOST_API auto __as_immovable(_Interface const&) -> __immovable_archetype const&; -template -_CUDAX_HOST_API auto __as_immovable(_Interface*) -> __immovable_archetype*; -template -_CUDAX_HOST_API auto __as_immovable(_Interface const*) -> __immovable_archetype const*; +// Used to map an archetype to an immovable archetype +template +extern __archetype __as_immovable; +template +extern __archetype& __as_immovable<_Archetype&>; +template +extern __archetype const& __as_immovable<_Archetype const&>; +template +extern __archetype* __as_immovable<_Archetype*>; +template +extern __archetype const* __as_immovable<_Archetype const*>; -template -using __normalized_interface_of _CCCL_NODEBUG_ALIAS = decltype(__cudax::__interface_from(declval())); +template +using __normalized_interface_of _CCCL_NODEBUG_ALIAS = __normalize_t>)>; -template -using __src_archetype_of _CCCL_NODEBUG_ALIAS = - decltype(__cudax::__as_archetype(__cudax::__interface_from(declval()))); +template +using __src_archetype_of _CCCL_NODEBUG_ALIAS = decltype(__as_archetype<__normalized_interface_of<_CvAny>>); -template -using __dst_archetype_of _CCCL_NODEBUG_ALIAS = - decltype(__cudax::__as_immovable(__cudax::__as_archetype(__cudax::__interface_from(declval())))); +template +using __dst_archetype_of _CCCL_NODEBUG_ALIAS = decltype(__as_immovable<__src_archetype_of<_CvAny>>); // If the archetypes are implicitly convertible, then it is possible to // dynamically cast from the source to the destination. The cast may fail, // but at least it is possible. template _CCCL_CONCEPT __any_castable_to = - _CUDA_VSTD::is_convertible_v<__src_archetype_of<_SrcCvAny>, __dst_archetype_of<_DstCvAny>>; + _CUDA_VSTD::convertible_to<__src_archetype_of<_SrcCvAny>, __dst_archetype_of<_DstCvAny>>; // If the archetypes are implicitly convertible **and** the source interface // is an extension of the destination one, then it is possible to implicitly diff --git a/cudax/include/cuda/experimental/__utility/basic_any/interfaces.cuh b/cudax/include/cuda/experimental/__utility/basic_any/interfaces.cuh index 71a0506330f..435e43ee699 100644 --- a/cudax/include/cuda/experimental/__utility/basic_any/interfaces.cuh +++ b/cudax/include/cuda/experimental/__utility/basic_any/interfaces.cuh @@ -164,7 +164,7 @@ template _CCCL_CONCEPT __is_interface = _CCCL_REQUIRES_EXPR((_Tp), _Tp& __value) ( - __is_interface_test(__value) + __cudax::__is_interface_test(__value) ); // clang-format on diff --git a/cudax/include/cuda/experimental/__utility/basic_any/semiregular.cuh b/cudax/include/cuda/experimental/__utility/basic_any/semiregular.cuh index 13759edfbcd..140770f015f 100644 --- a/cudax/include/cuda/experimental/__utility/basic_any/semiregular.cuh +++ b/cudax/include/cuda/experimental/__utility/basic_any/semiregular.cuh @@ -22,9 +22,13 @@ #endif // no system header #include +#include #include #include #include +#include +#include +#include #include #include #include @@ -114,6 +118,13 @@ __equal_fn(_Tp const& __self, _CUDA_VSTD::__type_info_ref __type, void const* __ return false; } +_CCCL_TEMPLATE(class _From, class _To) +_CCCL_REQUIRES(_CUDA_VSTD::convertible_to<_From, _To>) +_CCCL_NODISCARD _CUDAX_PUBLIC_API _To __conversion_fn(_CUDA_VSTD::type_identity_t<_From> __self) +{ + return static_cast<_To>(static_cast<_From&&>(__self)); +} + //! //! semi-regular interfaces //! @@ -251,6 +262,76 @@ template struct iequality_comparable : iequality_comparable_base {}; +struct self; // a nice placeholder type + +template +struct __iconvertible_to +{ + static_assert(_CUDA_VSTD::is_same_v<_CUDA_VSTD::decay_t<_CvSelf>, self>, + "The first template parameter to iconvertible_to must be the placeholder type " + "cuda::experimental::self, possibly with cv- and/or ref-qualifiers"); +}; + +template +struct __iconvertible_to +{ + template + struct __always_false : _CUDA_VSTD::false_type + {}; + + static_assert(__always_false<_To>::value, "rvalue-qualified conversion operations are not yet supported"); +}; + +template +struct __iconvertible_to +{ + template + struct __interface_ : interface<__interface_> + { + _CCCL_NODISCARD _CUDAX_HOST_API operator _To() + { + return __cudax::virtcall<__conversion_fn<__interface_, _To>>(this); + } + + template + using overrides = overrides_for<_From, _CUDAX_FNPTR_CONSTANT_WAR(&__conversion_fn<_From, _To>)>; + }; +}; + +template +struct __iconvertible_to +{ + template + struct __interface_ : interface<__interface_> + { + _CCCL_NODISCARD _CUDAX_HOST_API operator _To() & + { + return __cudax::virtcall<&__conversion_fn<__interface_&, _To>>(this); + } + + template + using overrides = overrides_for<_From, _CUDAX_FNPTR_CONSTANT_WAR(&__conversion_fn<_From&, _To>)>; + }; +}; + +template +struct __iconvertible_to +{ + template + struct __interface_ : interface<__interface_> + { + _CCCL_NODISCARD _CUDAX_HOST_API operator _To() const& + { + return __cudax::virtcall<&__conversion_fn<__interface_ const&, _To>>(this); + } + + template + using overrides = overrides_for<_From, _CUDAX_FNPTR_CONSTANT_WAR(&__conversion_fn<_From const&, _To>)>; + }; +}; + +template +using iconvertible_to _CCCL_NODEBUG_ALIAS = typename __iconvertible_to<_From, _To>::template __interface_<>; } // namespace cuda::experimental _CCCL_POP_MACROS diff --git a/cudax/test/containers/uninitialized_buffer.cu b/cudax/test/containers/uninitialized_buffer.cu index aadd16253fe..3e5c48c0eff 100644 --- a/cudax/test/containers/uninitialized_buffer.cu +++ b/cudax/test/containers/uninitialized_buffer.cu @@ -23,7 +23,6 @@ #include #include "testing.cuh" -#include struct do_not_construct { @@ -61,6 +60,19 @@ constexpr int get_property(const cudax::device_memory_resource&, my_property) return 42; } +__global__ void kernel(_CUDA_VSTD::span data) +{ + // Touch the memory to be sure it's accessible + CUDAX_CHECK(data.size() == 1024); + data[0] = 42; +} + +__global__ void const_kernel(_CUDA_VSTD::span data) +{ + // Touch the memory to be sure it's accessible + CUDAX_CHECK(data.size() == 1024); +} + TEMPLATE_TEST_CASE( "uninitialized_buffer", "[container]", char, short, int, long, long long, float, double, do_not_construct) { @@ -204,19 +216,6 @@ TEMPLATE_TEST_CASE( } } -__global__ void kernel(_CUDA_VSTD::span data) -{ - // Touch the memory to be sure it's accessible - CUDAX_CHECK(data.size() == 1024); - data[0] = 42; -} - -__global__ void const_kernel(_CUDA_VSTD::span data) -{ - // Touch the memory to be sure it's accessible - CUDAX_CHECK(data.size() == 1024); -} - TEST_CASE("uninitialized_buffer is usable with cudax::launch", "[container]") { SECTION("non-const") diff --git a/cudax/test/memory_resource/any_async_resource.cu b/cudax/test/memory_resource/any_async_resource.cu index 89c28b8a279..c491c9efa21 100644 --- a/cudax/test/memory_resource/any_async_resource.cu +++ b/cudax/test/memory_resource/any_async_resource.cu @@ -14,18 +14,20 @@ #include #include +#ifndef __CUDA_ARCH__ + TEMPLATE_TEST_CASE_METHOD(test_fixture, "any_async_resource", "[container][resource]", big_resource, small_resource) { using TestResource = TestType; - static_assert(cuda::mr::resource_with); - constexpr bool is_big = sizeof(TestResource) > sizeof(cuda::mr::_AnyResourceStorage); + static_assert(cuda::mr::resource_with); + constexpr bool is_big = sizeof(TestResource) > cudax::__default_buffer_size; SECTION("construct and destruct") { Counts expected{}; CHECK(this->counts == expected); { - cudax::any_async_resource mr{TestResource{42, this}}; + cudax::any_async_resource mr{TestResource{42, this}}; expected.new_count += is_big; ++expected.object_count; ++expected.move_count; @@ -44,7 +46,7 @@ TEMPLATE_TEST_CASE_METHOD(test_fixture, "any_async_resource", "[container][resou Counts expected{}; CHECK(this->counts == expected); { - cudax::any_async_resource mr{TestResource{42, this}}; + cudax::any_async_resource mr{TestResource{42, this}}; expected.new_count += is_big; ++expected.object_count; ++expected.move_count; @@ -79,7 +81,7 @@ TEMPLATE_TEST_CASE_METHOD(test_fixture, "any_async_resource", "[container][resou Counts expected{}; CHECK(this->counts == expected); { - cudax::any_async_resource mr{TestResource{42, this}}; + cudax::any_async_resource mr{TestResource{42, this}}; expected.new_count += is_big; ++expected.object_count; ++expected.move_count; @@ -108,7 +110,7 @@ TEMPLATE_TEST_CASE_METHOD(test_fixture, "any_async_resource", "[container][resou CHECK(this->counts == expected); { cudax::stream stream{}; - cudax::any_async_resource mr{TestResource{42, this}}; + cudax::any_async_resource mr{TestResource{42, this}}; expected.new_count += is_big; ++expected.object_count; ++expected.move_count; @@ -135,13 +137,13 @@ TEMPLATE_TEST_CASE_METHOD(test_fixture, "any_async_resource", "[container][resou { Counts expected{}; { - cudax::any_async_resource mr{TestResource{42, this}}; + cudax::any_async_resource mr{TestResource{42, this}}; expected.new_count += is_big; ++expected.object_count; ++expected.move_count; CHECK(this->counts == expected); - cuda::mr::resource_ref ref = mr; + cudax::resource_ref ref = mr; CHECK(this->counts == expected); auto* ptr = ref.allocate(bytes(100), align(8)); @@ -165,8 +167,8 @@ TEMPLATE_TEST_CASE_METHOD(test_fixture, "any_async_resource", "[container][resou Counts expected{}; CHECK(this->counts == expected); { - cudax::any_async_resource mr = - cudax::make_any_async_resource(42, this); + cudax::any_async_resource mr = + cudax::make_any_async_resource(42, this); expected.new_count += is_big; ++expected.object_count; CHECK(this->counts == expected); @@ -178,3 +180,5 @@ TEMPLATE_TEST_CASE_METHOD(test_fixture, "any_async_resource", "[container][resou // Reset the counters: this->counts = Counts(); } + +#endif // __CUDA_ARCH__ diff --git a/cudax/test/memory_resource/any_resource.cu b/cudax/test/memory_resource/any_resource.cu index c013785f32f..c7aae10fea5 100644 --- a/cudax/test/memory_resource/any_resource.cu +++ b/cudax/test/memory_resource/any_resource.cu @@ -8,27 +8,37 @@ // //===----------------------------------------------------------------------===// +#include + #include #include "test_resource.cuh" -#include #include +static_assert(cuda::has_property, cudax::host_accessible>); +static_assert(cuda::has_property, get_data>); +static_assert(!cuda::has_property, cudax::device_accessible>); + +struct unused_property +{}; + TEMPLATE_TEST_CASE_METHOD(test_fixture, "any_resource", "[container][resource]", big_resource, small_resource) { using TestResource = TestType; - constexpr bool is_big = sizeof(TestResource) > sizeof(cuda::mr::_AnyResourceStorage); + constexpr bool is_big = sizeof(TestResource) > cudax::__default_buffer_size; SECTION("construct and destruct") { Counts expected{}; CHECK(this->counts == expected); { - cudax::any_resource mr{TestResource{42, this}}; + cudax::any_resource mr{TestResource{42, this}}; expected.new_count += is_big; ++expected.object_count; ++expected.move_count; CHECK(this->counts == expected); + CHECK(get_property(mr, get_data{}) == 42); + get_property(mr, cudax::host_accessible{}); } expected.delete_count += is_big; --expected.object_count; @@ -43,7 +53,7 @@ TEMPLATE_TEST_CASE_METHOD(test_fixture, "any_resource", "[container][resource]", Counts expected{}; CHECK(this->counts == expected); { - cudax::any_resource mr{TestResource{42, this}}; + cudax::any_resource mr{TestResource{42, this}}; expected.new_count += is_big; ++expected.object_count; ++expected.move_count; @@ -54,14 +64,14 @@ TEMPLATE_TEST_CASE_METHOD(test_fixture, "any_resource", "[container][resource]", ++expected.copy_count; ++expected.object_count; CHECK(this->counts == expected); - CHECK(mr == mr2); + CHECK((mr == mr2)); ++expected.equal_to_count; CHECK(this->counts == expected); auto mr3 = std::move(mr); expected.move_count += !is_big; // for big resources, move is a pointer swap CHECK(this->counts == expected); - CHECK(mr2 == mr3); + CHECK((mr2 == mr3)); ++expected.equal_to_count; CHECK(this->counts == expected); } @@ -78,7 +88,7 @@ TEMPLATE_TEST_CASE_METHOD(test_fixture, "any_resource", "[container][resource]", Counts expected{}; CHECK(this->counts == expected); { - cudax::any_resource mr{TestResource{42, this}}; + cudax::any_resource mr{TestResource{42, this}}; expected.new_count += is_big; ++expected.object_count; ++expected.move_count; @@ -101,17 +111,69 @@ TEMPLATE_TEST_CASE_METHOD(test_fixture, "any_resource", "[container][resource]", // Reset the counters: this->counts = Counts(); - SECTION("conversion to resource_ref") + SECTION("equality comparable") + { + Counts expected{}; + CHECK(this->counts == expected); + { + cudax::managed_memory_resource managed1{}, managed2{}; + CHECK(managed1 == managed2); + cudax::any_resource mr{managed1}; + CHECK(mr == managed1); + } + CHECK(this->counts == expected); + } + + // Reset the counters: + this->counts = Counts(); + + SECTION("conversion from any_resource to cudax::resource_ref") + { + Counts expected{}; + { + cudax::any_resource mr{TestResource{42, this}}; + expected.new_count += is_big; + ++expected.object_count; + ++expected.move_count; + CHECK(this->counts == expected); + + // conversion from any_resource to cuda::mr::resource_ref: + cuda::mr::resource_ref ref = mr; + + // conversion from any_resource to cuda::mr::resource_ref with narrowing: + cuda::mr::resource_ref ref2 = mr; + CHECK(get_property(ref2, get_data{}) == 42); + + CHECK(this->counts == expected); + auto* ptr = ref.allocate(bytes(100), align(8)); + CHECK(ptr == this); + ++expected.allocate_count; + CHECK(this->counts == expected); + ref.deallocate(ptr, bytes(0), align(0)); + ++expected.deallocate_count; + CHECK(this->counts == expected); + } + expected.delete_count += is_big; + --expected.object_count; + CHECK(this->counts == expected); + } + + SECTION("conversion from any_resource to cuda::mr::resource_ref") { Counts expected{}; { - cudax::any_resource mr{TestResource{42, this}}; + cudax::any_resource mr{TestResource{42, this}}; expected.new_count += is_big; ++expected.object_count; ++expected.move_count; CHECK(this->counts == expected); - cuda::mr::resource_ref ref = mr; + // conversion from any_resource to cuda::mr::resource_ref: + cuda::mr::resource_ref ref = mr; + + // conversion from any_resource to cuda::mr::resource_ref with narrowing: + cuda::mr::resource_ref ref2 = mr; + CHECK(get_property(ref2, get_data{}) == 42); CHECK(this->counts == expected); auto* ptr = ref.allocate(bytes(100), align(8)); @@ -130,13 +192,83 @@ TEMPLATE_TEST_CASE_METHOD(test_fixture, "any_resource", "[container][resource]", // Reset the counters: this->counts = Counts(); + SECTION("conversion from resource_ref to any_resource") + { + Counts expected{}; + { + TestResource test{42, this}; + ++expected.object_count; + cudax::resource_ref ref{test}; + CHECK(this->counts == expected); + + cudax::any_resource mr = ref; + expected.new_count += is_big; + ++expected.object_count; + ++expected.copy_count; + CHECK(this->counts == expected); + + auto* ptr = ref.allocate(bytes(100), align(8)); + CHECK(ptr == this); + ++expected.allocate_count; + CHECK(this->counts == expected); + ref.deallocate(ptr, bytes(0), align(0)); + ++expected.deallocate_count; + CHECK(this->counts == expected); + } + expected.delete_count += is_big; + expected.object_count -= 2; + CHECK(this->counts == expected); + } + + // Reset the counters: + this->counts = Counts(); + + SECTION("test slicing off of properties") + { + Counts expected{}; + CHECK(this->counts == expected); + { + cudax::any_resource mr{TestResource{42, this}}; + expected.new_count += is_big; + ++expected.object_count; + ++expected.move_count; + CHECK(this->counts == expected); + + cudax::any_resource mr2 = mr; + expected.new_count += is_big; + ++expected.object_count; + ++expected.copy_count; + CHECK(this->counts == expected); + + CHECK(get_property(mr2, get_data{}) == 42); + auto data = try_get_property(mr2, get_data{}); + static_assert(cuda::std::is_same_v>); + CHECK(data.has_value()); + CHECK(data.value() == 42); + + auto host = try_get_property(mr2, cudax::host_accessible{}); + static_assert(cuda::std::is_same_v); + CHECK(host); + + auto unused = try_get_property(mr2, unused_property{}); + static_assert(cuda::std::is_same_v); + CHECK(!unused); + } + expected.delete_count += 2 * is_big; + expected.object_count -= 2; + CHECK(this->counts == expected); + } + + // Reset the counters: + this->counts = Counts(); + SECTION("make_any_resource") { Counts expected{}; CHECK(this->counts == expected); { - cudax::any_resource mr = - cudax::make_any_resource(42, this); + cudax::any_resource mr = + cudax::make_any_resource(42, this); expected.new_count += is_big; ++expected.object_count; CHECK(this->counts == expected); diff --git a/cudax/test/memory_resource/device_memory_resource.cu b/cudax/test/memory_resource/device_memory_resource.cu index 6c364f67e50..aefbb8b1bf7 100644 --- a/cudax/test/memory_resource/device_memory_resource.cu +++ b/cudax/test/memory_resource/device_memory_resource.cu @@ -87,7 +87,7 @@ TEST_CASE("device_memory_resource construction", "[memory_resource]") current_device); } - using async_resource = cuda::experimental::device_memory_resource; + using async_resource = cudax::device_memory_resource; SECTION("Default construction") { { @@ -99,7 +99,7 @@ TEST_CASE("device_memory_resource construction", "[memory_resource]") void* ptr{nullptr}; _CCCL_TRY_CUDA_API( ::cudaMallocAsync, - "Failed to allocate with pool passed to cuda::experimental::device_memory_resource", + "Failed to allocate with pool passed to cudax::device_memory_resource", &ptr, 42, current_default_pool, @@ -107,10 +107,7 @@ TEST_CASE("device_memory_resource construction", "[memory_resource]") CHECK(ptr != nullptr); _CCCL_ASSERT_CUDA_API( - ::cudaFreeAsync, - "Failed to deallocate with pool passed to cuda::experimental::device_memory_resource", - ptr, - ::cudaStream_t{0}); + ::cudaFreeAsync, "Failed to deallocate with pool passed to cudax::device_memory_resource", ptr, ::cudaStream_t{0}); } SECTION("Construct from mempool handle") @@ -133,7 +130,7 @@ TEST_CASE("device_memory_resource construction", "[memory_resource]") void* ptr{nullptr}; _CCCL_TRY_CUDA_API( ::cudaMallocAsync, - "Failed to allocate with pool passed to cuda::experimental::device_memory_resource", + "Failed to allocate with pool passed to cudax::device_memory_resource", &ptr, 42, current_default_pool, @@ -141,18 +138,15 @@ TEST_CASE("device_memory_resource construction", "[memory_resource]") CHECK(ptr != nullptr); _CCCL_ASSERT_CUDA_API( - ::cudaFreeAsync, - "Failed to deallocate with pool passed to cuda::experimental::device_memory_resource", - ptr, - ::cudaStream_t{0}); + ::cudaFreeAsync, "Failed to deallocate with pool passed to cudax::device_memory_resource", ptr, ::cudaStream_t{0}); } SECTION("Construct with initial pool size") { - cuda::experimental::memory_pool_properties props = { + cudax::memory_pool_properties props = { 42, }; - cuda::experimental::device_memory_pool pool{current_device, props}; + cudax::device_memory_pool pool{current_device, props}; async_resource from_initial_pool_size{pool}; ::cudaMemPool_t get = from_initial_pool_size.get(); @@ -170,11 +164,11 @@ TEST_CASE("device_memory_resource construction", "[memory_resource]") SECTION("Construct with release threshold") { - cuda::experimental::memory_pool_properties props = { + cudax::memory_pool_properties props = { 42, 20, }; - cuda::experimental::device_memory_pool pool{current_device, props}; + cudax::device_memory_pool pool{current_device, props}; async_resource with_threshold{pool}; ::cudaMemPool_t get = with_threshold.get(); @@ -194,12 +188,12 @@ TEST_CASE("device_memory_resource construction", "[memory_resource]") #if _CCCL_CUDACC_AT_LEAST(11, 2) SECTION("Construct with allocation handle") { - cuda::experimental::memory_pool_properties props = { + cudax::memory_pool_properties props = { 42, 20, - cuda::experimental::cudaMemAllocationHandleType::cudaMemHandleTypePosixFileDescriptor, + cudax::cudaMemAllocationHandleType::cudaMemHandleTypePosixFileDescriptor, }; - cuda::experimental::device_memory_pool pool{current_device, props}; + cudax::device_memory_pool pool{current_device, props}; async_resource with_allocation_handle{pool}; ::cudaMemPool_t get = with_allocation_handle.get(); @@ -228,7 +222,7 @@ static void ensure_device_ptr(void* ptr) TEST_CASE("device_memory_resource allocation", "[memory_resource]") { - cuda::experimental::device_memory_resource res{}; + cudax::device_memory_resource res{}; { // allocate / deallocate auto* ptr = res.allocate(42); @@ -374,13 +368,13 @@ struct resource template = 0> - friend void get_property(const resource&, cuda::mr::device_accessible) noexcept + friend void get_property(const resource&, cudax::device_accessible) noexcept {} }; static_assert(cuda::mr::resource>, ""); -static_assert(!cuda::mr::resource_with, cuda::mr::device_accessible>, ""); +static_assert(!cuda::mr::resource_with, cudax::device_accessible>, ""); static_assert(cuda::mr::resource>, ""); -static_assert(cuda::mr::resource_with, cuda::mr::device_accessible>, ""); +static_assert(cuda::mr::resource_with, cudax::device_accessible>, ""); template struct async_resource : public resource @@ -392,10 +386,9 @@ struct async_resource : public resource void deallocate_async(void*, size_t, size_t, cuda::stream_ref) {} }; static_assert(cuda::mr::async_resource>, ""); -static_assert(!cuda::mr::async_resource_with, cuda::mr::device_accessible>, ""); +static_assert(!cuda::mr::async_resource_with, cudax::device_accessible>, ""); static_assert(cuda::mr::async_resource>, ""); -static_assert(cuda::mr::async_resource_with, cuda::mr::device_accessible>, - ""); +static_assert(cuda::mr::async_resource_with, cudax::device_accessible>, ""); TEST_CASE("device_memory_resource comparison", "[memory_resource]") { @@ -404,10 +397,10 @@ TEST_CASE("device_memory_resource comparison", "[memory_resource]") _CCCL_TRY_CUDA_API(::cudaGetDevice, "Failed to query current device with cudaGetDevice.", ¤t_device); } - cuda::experimental::device_memory_resource first{}; + cudax::device_memory_resource first{}; { // comparison against a plain device_memory_resource - cuda::experimental::device_memory_resource second{}; - CHECK(first == second); + cudax::device_memory_resource second{}; + CHECK((first == second)); CHECK(!(first != second)); } @@ -421,27 +414,27 @@ TEST_CASE("device_memory_resource comparison", "[memory_resource]") pool_properties.location.id = current_device; _CCCL_TRY_CUDA_API(::cudaMemPoolCreate, "Failed to call cudaMemPoolCreate", &cuda_pool_handle, &pool_properties); } - cuda::experimental::device_memory_resource second{cuda_pool_handle}; - CHECK(first != second); + cudax::device_memory_resource second{cuda_pool_handle}; + CHECK((first != second)); CHECK(!(first == second)); } { // comparison against a device_memory_resource wrapped inside a resource_ref - cuda::experimental::device_memory_resource second{}; - cuda::mr::resource_ref second_ref{second}; - CHECK(first == second_ref); + cudax::device_memory_resource second{}; + cudax::resource_ref second_ref{second}; + CHECK((first == second_ref)); CHECK(!(first != second_ref)); - CHECK(second_ref == first); + CHECK((second_ref == first)); CHECK(!(second_ref != first)); } { // comparison against a device_memory_resource wrapped inside a async_resource_ref - cuda::experimental::device_memory_resource second{}; - cuda::mr::async_resource_ref second_ref{second}; + cudax::device_memory_resource second{}; + cudax::async_resource_ref second_ref{second}; - CHECK(first == second_ref); + CHECK((first == second_ref)); CHECK(!(first != second_ref)); - CHECK(second_ref == first); + CHECK((second_ref == first)); CHECK(!(second_ref != first)); } @@ -449,28 +442,28 @@ TEST_CASE("device_memory_resource comparison", "[memory_resource]") resource host_resource{}; resource device_resource{}; CHECK(!(first == host_resource)); - CHECK(first != host_resource); + CHECK((first != host_resource)); CHECK(!(first == device_resource)); - CHECK(first != device_resource); + CHECK((first != device_resource)); CHECK(!(host_resource == first)); - CHECK(host_resource != first); + CHECK((host_resource != first)); CHECK(!(device_resource == first)); - CHECK(device_resource != first); + CHECK((device_resource != first)); } { // comparison against a different resource through resource_ref async_resource host_async_resource{}; async_resource device_async_resource{}; CHECK(!(first == host_async_resource)); - CHECK(first != host_async_resource); + CHECK((first != host_async_resource)); CHECK(!(first == device_async_resource)); - CHECK(first != device_async_resource); + CHECK((first != device_async_resource)); CHECK(!(host_async_resource == first)); - CHECK(host_async_resource != first); + CHECK((host_async_resource != first)); CHECK(!(device_async_resource == first)); - CHECK(device_async_resource != first); + CHECK((device_async_resource != first)); } } diff --git a/cudax/test/memory_resource/managed_memory_resource.cu b/cudax/test/memory_resource/managed_memory_resource.cu index 0c71ef24a91..1c5836192ba 100644 --- a/cudax/test/memory_resource/managed_memory_resource.cu +++ b/cudax/test/memory_resource/managed_memory_resource.cu @@ -214,32 +214,32 @@ TEST_CASE("managed_memory_resource comparison", "[memory_resource]") managed_resource first{}; { // comparison against a plain managed_memory_resource managed_resource second{}; - CHECK(first == second); + CHECK((first == second)); CHECK(!(first != second)); } { // comparison against a plain managed_memory_resource with a different pool managed_resource second{cudaMemAttachHost}; - CHECK(first != second); + CHECK((first != second)); CHECK(!(first == second)); } { // comparison against a managed_memory_resource wrapped inside a resource_ref managed_resource second{}; - cuda::mr::resource_ref second_ref{second}; - CHECK(first == second_ref); + cuda::mr::resource_ref second_ref{second}; + CHECK((first == second_ref)); CHECK(!(first != second_ref)); - CHECK(second_ref == first); + CHECK((second_ref == first)); CHECK(!(second_ref != first)); } { // comparison against a managed_memory_resource wrapped inside a async_resource_ref managed_resource second{}; - cuda::mr::async_resource_ref second_ref{second}; + cuda::mr::async_resource_ref second_ref{second}; - CHECK(first == second_ref); + CHECK((first == second_ref)); CHECK(!(first != second_ref)); - CHECK(second_ref == first); + CHECK((second_ref == first)); CHECK(!(second_ref != first)); } @@ -247,27 +247,27 @@ TEST_CASE("managed_memory_resource comparison", "[memory_resource]") resource host_resource{}; resource device_resource{}; CHECK(!(first == host_resource)); - CHECK(first != host_resource); + CHECK((first != host_resource)); CHECK(!(first == device_resource)); - CHECK(first != device_resource); + CHECK((first != device_resource)); CHECK(!(host_resource == first)); - CHECK(host_resource != first); + CHECK((host_resource != first)); CHECK(!(device_resource == first)); - CHECK(device_resource != first); + CHECK((device_resource != first)); } { // comparison against a different managed_resource through resource_ref resource host_async_resource{}; resource device_async_resource{}; CHECK(!(first == host_async_resource)); - CHECK(first != host_async_resource); + CHECK((first != host_async_resource)); CHECK(!(first == device_async_resource)); - CHECK(first != device_async_resource); + CHECK((first != device_async_resource)); CHECK(!(host_async_resource == first)); - CHECK(host_async_resource != first); + CHECK((host_async_resource != first)); CHECK(!(device_async_resource == first)); - CHECK(device_async_resource != first); + CHECK((device_async_resource != first)); } } diff --git a/cudax/test/memory_resource/pinned_memory_resource.cu b/cudax/test/memory_resource/pinned_memory_resource.cu index 3a834d3d051..4240491c6a3 100644 --- a/cudax/test/memory_resource/pinned_memory_resource.cu +++ b/cudax/test/memory_resource/pinned_memory_resource.cu @@ -17,6 +17,7 @@ #include +#include "cuda/__memory_resource/resource_ref.h" #include #include @@ -215,32 +216,33 @@ TEST_CASE("pinned_memory_resource comparison", "[memory_resource]") pinned_resource first{}; { // comparison against a plain pinned_memory_resource pinned_resource second{}; - CHECK(first == second); + CHECK((first == second)); CHECK(!(first != second)); } { // comparison against a plain pinned_memory_resource with a different pool pinned_resource second{cudaMemAttachHost}; - CHECK(first != second); + CHECK((first != second)); CHECK(!(first == second)); } { // comparison against a pinned_memory_resource wrapped inside a resource_ref pinned_resource second{}; - cuda::mr::resource_ref second_ref{second}; - CHECK(first == second_ref); + cuda::mr::resource_ref const second_ref{second}; + CHECK((first == second_ref)); CHECK(!(first != second_ref)); - CHECK(second_ref == first); + CHECK((second_ref == first)); CHECK(!(second_ref != first)); } { // comparison against a pinned_memory_resource wrapped inside a async_resource_ref pinned_resource second{}; - cuda::mr::async_resource_ref second_ref{second}; + // cuda::mr::async_resource_ref second_ref{second}; + cudax::async_resource_ref second_ref{second}; - CHECK(first == second_ref); + CHECK((first == second_ref)); CHECK(!(first != second_ref)); - CHECK(second_ref == first); + CHECK((second_ref == first)); CHECK(!(second_ref != first)); } @@ -248,27 +250,27 @@ TEST_CASE("pinned_memory_resource comparison", "[memory_resource]") resource host_resource{}; resource device_resource{}; CHECK(!(first == host_resource)); - CHECK(first != host_resource); + CHECK((first != host_resource)); CHECK(!(first == device_resource)); - CHECK(first != device_resource); + CHECK((first != device_resource)); CHECK(!(host_resource == first)); - CHECK(host_resource != first); + CHECK((host_resource != first)); CHECK(!(device_resource == first)); - CHECK(device_resource != first); + CHECK((device_resource != first)); } { // comparison against a different pinned_resource through resource_ref resource host_async_resource{}; resource device_async_resource{}; CHECK(!(first == host_async_resource)); - CHECK(first != host_async_resource); + CHECK((first != host_async_resource)); CHECK(!(first == device_async_resource)); - CHECK(first != device_async_resource); + CHECK((first != device_async_resource)); CHECK(!(host_async_resource == first)); - CHECK(host_async_resource != first); + CHECK((host_async_resource != first)); CHECK(!(device_async_resource == first)); - CHECK(device_async_resource != first); + CHECK((device_async_resource != first)); } } diff --git a/cudax/test/memory_resource/shared_resource.cu b/cudax/test/memory_resource/shared_resource.cu index 4711498b0fe..cd279ab0b9e 100644 --- a/cudax/test/memory_resource/shared_resource.cu +++ b/cudax/test/memory_resource/shared_resource.cu @@ -105,7 +105,7 @@ TEMPLATE_TEST_CASE_METHOD(test_fixture, "shared_resource", "[container][resource ++expected.object_count; CHECK(this->counts == expected); - cuda::mr::resource_ref ref = mr; + cudax::resource_ref ref = mr; CHECK(this->counts == expected); auto* ptr = ref.allocate(bytes(100), align(8)); @@ -129,7 +129,7 @@ TEMPLATE_TEST_CASE_METHOD(test_fixture, "shared_resource", "[container][resource align(alignof(int) * 4); { bytes(42 * sizeof(int)); - cudax::uninitialized_buffer buffer{ + cudax::uninitialized_buffer buffer{ cudax::shared_resource(42, this), 42}; ++expected.object_count; ++expected.allocate_count; @@ -139,7 +139,7 @@ TEMPLATE_TEST_CASE_METHOD(test_fixture, "shared_resource", "[container][resource { // accounting for new storage bytes(1337 * sizeof(int)); - cudax::uninitialized_buffer other_buffer{buffer.get_memory_resource(), 1337}; + cudax::uninitialized_buffer other_buffer{buffer.get_memory_resource(), 1337}; ++expected.allocate_count; CHECK(this->counts == expected); } @@ -151,7 +151,7 @@ TEMPLATE_TEST_CASE_METHOD(test_fixture, "shared_resource", "[container][resource { // Moving the resource should not do anything - cudax::uninitialized_buffer third_buffer = ::cuda::std::move(buffer); + cudax::uninitialized_buffer third_buffer = ::cuda::std::move(buffer); CHECK(this->counts == expected); } diff --git a/cudax/test/memory_resource/test_resource.cuh b/cudax/test/memory_resource/test_resource.cuh index 524ad12089e..75cd9b665b2 100644 --- a/cudax/test/memory_resource/test_resource.cuh +++ b/cudax/test/memory_resource/test_resource.cuh @@ -2,6 +2,9 @@ #include +#include +#include + #include #include @@ -90,6 +93,11 @@ inline thread_local Counts* test_fixture_::counts_ = nullptr; template using test_fixture = test_fixture_; +struct get_data +{ + using value_type = int; +}; + template struct test_resource { @@ -128,6 +136,14 @@ struct test_resource --fixture->counts.object_count; } + test_resource& operator=(test_resource other) noexcept + { + other._assert_valid(); + ::cuda::std::swap(data, other.data); + ::cuda::std::swap(fixture, other.fixture); + return *this; + } + void* allocate(std::size_t bytes, std::size_t align) { _assert_valid(); @@ -198,11 +214,15 @@ struct test_resource return ::operator delete(pv); } - friend constexpr void get_property(const test_resource&, cuda::mr::host_accessible) noexcept {} + friend constexpr void get_property(const test_resource&, cudax::host_accessible) noexcept {} + friend constexpr int get_property(const test_resource& self, get_data) noexcept + { + return self.data; + } }; using big_resource = test_resource; using small_resource = test_resource; -static_assert(sizeof(big_resource) > sizeof(cuda::mr::_AnyResourceStorage)); -static_assert(sizeof(small_resource) <= sizeof(cuda::mr::_AnyResourceStorage)); +static_assert(sizeof(big_resource) > cuda::experimental::__default_buffer_size); +static_assert(sizeof(small_resource) <= cuda::experimental::__default_buffer_size); diff --git a/docs/libcudacxx/extended_api/memory_resource.rst b/docs/libcudacxx/extended_api/memory_resource.rst index 5a8118f69a3..c06cc3c118a 100644 --- a/docs/libcudacxx/extended_api/memory_resource.rst +++ b/docs/libcudacxx/extended_api/memory_resource.rst @@ -36,7 +36,7 @@ At a high level, the header provides: - Concepts that provide proper constraints for arbitrary memory resources. - CCCL 2.2.0 / CUDA 12.3 * - :ref:`cuda::mr::{async}_resource_ref ` - - A type-erased memory resource wrapper that enables consumers to specify properties of resources that they expect. + - A non-owning type-erased memory resource wrapper that enables consumers to specify properties of resources that they expect. - CCCL 2.2.0 / CUDA 12.3 These features are an evolution of `std::pmr::memory_resource `__ diff --git a/libcudacxx/include/cuda/__memory_resource/resource.h b/libcudacxx/include/cuda/__memory_resource/resource.h index 42f9668338a..733a8fdde8b 100644 --- a/libcudacxx/include/cuda/__memory_resource/resource.h +++ b/libcudacxx/include/cuda/__memory_resource/resource.h @@ -52,9 +52,9 @@ _LIBCUDACXX_BEGIN_NAMESPACE_CUDA_MR template _CCCL_CONCEPT resource = _CCCL_REQUIRES_EXPR((_Resource), _Resource& __res, void* __ptr, size_t __bytes, size_t __alignment)( - requires(_CUDA_VSTD::equality_comparable<_Resource>), _Same_as(void*) __res.allocate(__bytes, __alignment), // - _Same_as(void) __res.deallocate(__ptr, __bytes, __alignment)); + _Same_as(void) __res.deallocate(__ptr, __bytes, __alignment), + requires(_CUDA_VSTD::equality_comparable<_Resource>)); //! @brief The \c async_resource concept verifies that a type Resource satisfies the basic requirements of a //! memory resource and additionally supports stream ordered allocations @@ -74,9 +74,9 @@ _CCCL_CONCEPT resource = template _CCCL_CONCEPT async_resource = _CCCL_REQUIRES_EXPR( (_Resource), _Resource& __res, void* __ptr, size_t __bytes, size_t __alignment, ::cuda::stream_ref __stream)( - requires(resource<_Resource>), _Same_as(void*) __res.allocate_async(__bytes, __alignment, __stream), - _Same_as(void) __res.deallocate_async(__ptr, __bytes, __alignment, __stream)); + _Same_as(void) __res.deallocate_async(__ptr, __bytes, __alignment, __stream), + requires(resource<_Resource>)); //! @brief The \c resource_with concept verifies that a type Resource satisfies the `resource` concept and //! also satisfies all the provided Properties diff --git a/libcudacxx/include/cuda/__memory_resource/resource_ref.h b/libcudacxx/include/cuda/__memory_resource/resource_ref.h index 81831720349..40f2c9d0ba8 100644 --- a/libcudacxx/include/cuda/__memory_resource/resource_ref.h +++ b/libcudacxx/include/cuda/__memory_resource/resource_ref.h @@ -147,7 +147,7 @@ struct _Resource_vtable_builder using __wrapper_type = _CUDA_VSTD::integral_constant<_WrapperType, _Wrapper_type>; template - static __property_value_t<_Property> _Get_property(void* __res) noexcept + static __property_value_t<_Property> _Get_property(const void* __res) noexcept { return get_property(*static_cast(__res), _Property{}); } @@ -292,7 +292,7 @@ struct _Resource_vtable_builder }; template -using __property_fn_t = __property_value_t<_Property> (*)(void*); +using __property_fn_t = __property_value_t<_Property> (*)(const void*); template struct _Property_vtable @@ -374,8 +374,11 @@ using _Filtered_vtable = typename _Filtered<_Properties...>::_Filtered_vtable::_ template <_WrapperType _Wrapper_type> using __alloc_object_storage_t = _CUDA_VSTD::_If<_Wrapper_type == _WrapperType::_Reference, void*, _AnyResourceStorage>; +struct _Resource_ref_base +{}; + template -struct _Alloc_base +struct _LIBCUDACXX_DECLSPEC_EMPTY_BASES _Alloc_base : _Resource_ref_base { static_assert(_CUDA_VSTD::is_base_of_v<_Alloc_vtable, _Vtable>, ""); @@ -445,19 +448,8 @@ struct _Async_alloc_base : public _Alloc_base<_Vtable, _Wrapper_type> } }; -template -constexpr bool _Is_resource_base_fn(const _Alloc_base<_VTable, _Wrapper_type>*) noexcept -{ - return true; -} - -constexpr bool _Is_resource_base_fn(...) noexcept -{ - return false; -} - template -_CCCL_CONCEPT _Is_resource_base = _Is_resource_base_fn(static_cast<_Resource*>(nullptr)); +_CCCL_CONCEPT _Is_resource_ref = _CUDA_VSTD::convertible_to<_Resource&, _Resource_ref_base>; template <_AllocType _Alloc_type, _WrapperType _Wrapper_type> using _Resource_base = @@ -522,7 +514,7 @@ class basic_resource_ref //! as well as all properties //! @param __res The resource to be wrapped within the \c basic_resource_ref _CCCL_TEMPLATE(class _Resource, _AllocType _Alloc_type2 = _Alloc_type) - _CCCL_REQUIRES((!_Is_resource_base<_Resource>) _CCCL_AND(_Alloc_type2 == _AllocType::_Default) + _CCCL_REQUIRES((!_Is_resource_ref<_Resource>) _CCCL_AND(_Alloc_type2 == _AllocType::_Default) _CCCL_AND resource_with<_Resource, _Properties...>) basic_resource_ref(_Resource& __res) noexcept : _Resource_base<_Alloc_type, _WrapperType::_Reference>( @@ -534,7 +526,7 @@ class basic_resource_ref //! properties. This ignores the async interface of the passed in resource //! @param __res The resource to be wrapped within the \c resource_ref _CCCL_TEMPLATE(class _Resource, _AllocType _Alloc_type2 = _Alloc_type) - _CCCL_REQUIRES((!_Is_resource_base<_Resource>) _CCCL_AND(_Alloc_type2 == _AllocType::_Async) + _CCCL_REQUIRES((!_Is_resource_ref<_Resource>) _CCCL_AND(_Alloc_type2 == _AllocType::_Async) _CCCL_AND async_resource_with<_Resource, _Properties...>) basic_resource_ref(_Resource& __res) noexcept : _Resource_base<_Alloc_type, _WrapperType::_Reference>( @@ -546,7 +538,7 @@ class basic_resource_ref //! as well as all properties //! @param __res Pointer to a resource to be wrapped within the \c basic_resource_ref _CCCL_TEMPLATE(class _Resource, _AllocType _Alloc_type2 = _Alloc_type) - _CCCL_REQUIRES((!_Is_resource_base<_Resource>) _CCCL_AND(_Alloc_type2 == _AllocType::_Default) + _CCCL_REQUIRES((!_Is_resource_ref<_Resource>) _CCCL_AND(_Alloc_type2 == _AllocType::_Default) _CCCL_AND resource_with<_Resource, _Properties...>) basic_resource_ref(_Resource* __res) noexcept : _Resource_base<_Alloc_type, _WrapperType::_Reference>( @@ -558,7 +550,7 @@ class basic_resource_ref //! properties. This ignores the async interface of the passed in resource //! @param __res Pointer to a resource to be wrapped within the \c resource_ref _CCCL_TEMPLATE(class _Resource, _AllocType _Alloc_type2 = _Alloc_type) - _CCCL_REQUIRES((!_Is_resource_base<_Resource>) _CCCL_AND(_Alloc_type2 == _AllocType::_Async) + _CCCL_REQUIRES((!_Is_resource_ref<_Resource>) _CCCL_AND(_Alloc_type2 == _AllocType::_Async) _CCCL_AND async_resource_with<_Resource, _Properties...>) basic_resource_ref(_Resource* __res) noexcept : _Resource_base<_Alloc_type, _WrapperType::_Reference>( @@ -587,9 +579,18 @@ class basic_resource_ref {} //! @brief Equality comparison between two \c basic_resource_ref - //! @param __rhs The other \c basic_resource_ref + //! @param __lhs The first \c basic_resource_ref + //! @param __rhs The second \c basic_resource_ref //! @return Checks whether both resources have the same equality function stored in their vtable and if so returns //! the result of that equality comparison. Otherwise returns false. + _CCCL_NODISCARD_FRIEND bool operator==(const basic_resource_ref& __lhs, const basic_resource_ref& __rhs) + { + // BUGBUG: comparing function pointers like this can lead to false negatives: + return (__lhs.__static_vtable->__equal_fn == __rhs.__static_vtable->__equal_fn) + && __lhs.__static_vtable->__equal_fn(__lhs.__object, __rhs.__object); + } + + //! @overload _CCCL_TEMPLATE(class... _OtherProperties) _CCCL_REQUIRES((sizeof...(_Properties) == sizeof...(_OtherProperties)) _CCCL_AND __properties_match<_OtherProperties...>) @@ -601,9 +602,16 @@ class basic_resource_ref } //! @brief Inequality comparison between two \c basic_resource_ref - //! @param __rhs The other \c basic_resource_ref + //! @param __lhs The first \c basic_resource_ref + //! @param __rhs The second \c basic_resource_ref //! @return Checks whether both resources have the same equality function stored in their vtable and if so returns //! the inverse result of that equality comparison. Otherwise returns true. + _CCCL_NODISCARD_FRIEND bool operator!=(const basic_resource_ref& __lhs, const basic_resource_ref& __rhs) + { + return !(__lhs == __rhs); + } + + //! @overload _CCCL_TEMPLATE(class... _OtherProperties) _CCCL_REQUIRES((sizeof...(_Properties) == sizeof...(_OtherProperties)) _CCCL_AND __properties_match<_OtherProperties...>)