diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 5eb43784063..fff9035a2bb 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -20,7 +20,20 @@ repos: rev: v17.0.6 hooks: - id: clang-format - types_or: [c, c++, cuda] + types_or: [file] + files: | + (?x)^( + ^.*\.c$| + ^.*\.cpp$| + ^.*\.cu$| + ^.*\.cuh$| + ^.*\.cxx$| + ^.*\.h$| + ^.*\.hpp$| + ^.*\.inl$| + ^.*\.mm$| + ^libcudacxx/include/.*/[^.]*$ + ) args: ["-fallback-style=none", "-style=file", "-i"] default_language_version: diff --git a/libcudacxx/include/cuda/annotated_ptr b/libcudacxx/include/cuda/annotated_ptr index bd9f26ad591..f5e04e56623 100644 --- a/libcudacxx/include/cuda/annotated_ptr +++ b/libcudacxx/include/cuda/annotated_ptr @@ -3,50 +3,128 @@ * * NVIDIA SOFTWARE LICENSE * - * This license is a legal agreement between you and NVIDIA Corporation ("NVIDIA") and governs your use of the NVIDIA/CUDA C++ Library software and materials provided hereunder (“SOFTWARE”). + * This license is a legal agreement between you and NVIDIA Corporation ("NVIDIA") and governs your use of the + * NVIDIA/CUDA C++ Library software and materials provided hereunder (“SOFTWARE”). * - * This license can be accepted only by an adult of legal age of majority in the country in which the SOFTWARE is used. If you are under the legal age of majority, you must ask your parent or legal guardian to consent to this license. By taking delivery of the SOFTWARE, you affirm that you have reached the legal age of majority, you accept the terms of this license, and you take legal and financial responsibility for the actions of your permitted users. + * This license can be accepted only by an adult of legal age of majority in the country in which the SOFTWARE is used. + * If you are under the legal age of majority, you must ask your parent or legal guardian to consent to this license. By + * taking delivery of the SOFTWARE, you affirm that you have reached the legal age of majority, you accept the terms of + * this license, and you take legal and financial responsibility for the actions of your permitted users. * - * You agree to use the SOFTWARE only for purposes that are permitted by (a) this license, and (b) any applicable law, regulation or generally accepted practices or guidelines in the relevant jurisdictions. + * You agree to use the SOFTWARE only for purposes that are permitted by (a) this license, and (b) any applicable law, + * regulation or generally accepted practices or guidelines in the relevant jurisdictions. * - * 1. LICENSE. Subject to the terms of this license, NVIDIA grants you a non-exclusive limited license to: (a) install and use the SOFTWARE, and (b) distribute the SOFTWARE subject to the distribution requirements described in this license. NVIDIA reserves all rights, title and interest in and to the SOFTWARE not expressly granted to you under this license. + * 1. LICENSE. Subject to the terms of this license, NVIDIA grants you a non-exclusive limited license to: (a) install + * and use the SOFTWARE, and (b) distribute the SOFTWARE subject to the distribution requirements described in this + * license. NVIDIA reserves all rights, title and interest in and to the SOFTWARE not expressly granted to you under + * this license. * * 2. DISTRIBUTION REQUIREMENTS. These are the distribution requirements for you to exercise the distribution grant: - * a. The terms under which you distribute the SOFTWARE must be consistent with the terms of this license, including (without limitation) terms relating to the license grant and license restrictions and protection of NVIDIA’s intellectual property rights. - * b. You agree to notify NVIDIA in writing of any known or suspected distribution or use of the SOFTWARE not in compliance with the requirements of this license, and to enforce the terms of your agreements with respect to distributed SOFTWARE. + * a. The terms under which you distribute the SOFTWARE must be consistent with the terms of this license, + * including (without limitation) terms relating to the license grant and license restrictions and protection of + * NVIDIA’s intellectual property rights. b. You agree to notify NVIDIA in writing of any known or suspected + * distribution or use of the SOFTWARE not in compliance with the requirements of this license, and to enforce the terms + * of your agreements with respect to distributed SOFTWARE. * * 3. LIMITATIONS. Your license to use the SOFTWARE is restricted as follows: * a. The SOFTWARE is licensed for you to develop applications only for use in systems with NVIDIA GPUs. - * b. You may not reverse engineer, decompile or disassemble, or remove copyright or other proprietary notices from any portion of the SOFTWARE or copies of the SOFTWARE. - * c. You may not modify or create derivative works of any portion of the SOFTWARE. - * d. You may not bypass, disable, or circumvent any technical measure, encryption, security, digital rights management or authentication mechanism in the SOFTWARE. - * e. You may not use the SOFTWARE in any manner that would cause it to become subject to an open source software license. As examples, licenses that require as a condition of use, modification, and/or distribution that the SOFTWARE be (i) disclosed or distributed in source code form; (ii) licensed for the purpose of making derivative works; or (iii) redistributable at no charge. - * f. Unless you have an agreement with NVIDIA for this purpose, you may not use the SOFTWARE with any system or application where the use or failure of the system or application can reasonably be expected to threaten or result in personal injury, death, or catastrophic loss. Examples include use in avionics, navigation, military, medical, life support or other life critical applications. NVIDIA does not design, test or manufacture the SOFTWARE for these critical uses and NVIDIA shall not be liable to you or any third party, in whole or in part, for any claims or damages arising from such uses. - * g. You agree to defend, indemnify and hold harmless NVIDIA and its affiliates, and their respective employees, contractors, agents, officers and directors, from and against any and all claims, damages, obligations, losses, liabilities, costs or debt, fines, restitutions and expenses (including but not limited to attorney’s fees and costs incident to establishing the right of indemnification) arising out of or related to use of the SOFTWARE outside of the scope of this Agreement, or not in compliance with its terms. + * b. You may not reverse engineer, decompile or disassemble, or remove copyright or other proprietary notices from + * any portion of the SOFTWARE or copies of the SOFTWARE. c. You may not modify or create derivative works of any + * portion of the SOFTWARE. d. You may not bypass, disable, or circumvent any technical measure, encryption, + * security, digital rights management or authentication mechanism in the SOFTWARE. e. You may not use the SOFTWARE + * in any manner that would cause it to become subject to an open source software license. As examples, licenses that + * require as a condition of use, modification, and/or distribution that the SOFTWARE be (i) disclosed or distributed in + * source code form; (ii) licensed for the purpose of making derivative works; or (iii) redistributable at no charge. f. + * Unless you have an agreement with NVIDIA for this purpose, you may not use the SOFTWARE with any system or + * application where the use or failure of the system or application can reasonably be expected to threaten or result in + * personal injury, death, or catastrophic loss. Examples include use in avionics, navigation, military, medical, life + * support or other life critical applications. NVIDIA does not design, test or manufacture the SOFTWARE for these + * critical uses and NVIDIA shall not be liable to you or any third party, in whole or in part, for any claims or + * damages arising from such uses. g. You agree to defend, indemnify and hold harmless NVIDIA and its affiliates, + * and their respective employees, contractors, agents, officers and directors, from and against any and all claims, + * damages, obligations, losses, liabilities, costs or debt, fines, restitutions and expenses (including but not limited + * to attorney’s fees and costs incident to establishing the right of indemnification) arising out of or related to use + * of the SOFTWARE outside of the scope of this Agreement, or not in compliance with its terms. * - * 4. PRE-RELEASE. SOFTWARE versions identified as alpha, beta, preview, early access or otherwise as pre-release may not be fully functional, may contain errors or design flaws, and may have reduced or different security, privacy, availability, and reliability standards relative to commercial versions of NVIDIA software and materials. You may use a pre-release SOFTWARE version at your own risk, understanding that these versions are not intended for use in production or business-critical systems. + * 4. PRE-RELEASE. SOFTWARE versions identified as alpha, beta, preview, early access or otherwise as pre-release may + * not be fully functional, may contain errors or design flaws, and may have reduced or different security, privacy, + * availability, and reliability standards relative to commercial versions of NVIDIA software and materials. You may use + * a pre-release SOFTWARE version at your own risk, understanding that these versions are not intended for use in + * production or business-critical systems. * - * 5. OWNERSHIP. The SOFTWARE and the related intellectual property rights therein are and will remain the sole and exclusive property of NVIDIA or its licensors. The SOFTWARE is copyrighted and protected by the laws of the United States and other countries, and international treaty provisions. NVIDIA may make changes to the SOFTWARE, at any time without notice, but is not obligated to support or update the SOFTWARE. + * 5. OWNERSHIP. The SOFTWARE and the related intellectual property rights therein are and will remain the sole and + * exclusive property of NVIDIA or its licensors. The SOFTWARE is copyrighted and protected by the laws of the United + * States and other countries, and international treaty provisions. NVIDIA may make changes to the SOFTWARE, at any time + * without notice, but is not obligated to support or update the SOFTWARE. * - * 6. COMPONENTS UNDER OTHER LICENSES. The SOFTWARE may include NVIDIA or third-party components with separate legal notices or terms as may be described in proprietary notices accompanying the SOFTWARE. If and to the extent there is a conflict between the terms in this license and the license terms associated with a component, the license terms associated with the components control only to the extent necessary to resolve the conflict. + * 6. COMPONENTS UNDER OTHER LICENSES. The SOFTWARE may include NVIDIA or third-party components with separate legal + * notices or terms as may be described in proprietary notices accompanying the SOFTWARE. If and to the extent there is + * a conflict between the terms in this license and the license terms associated with a component, the license terms + * associated with the components control only to the extent necessary to resolve the conflict. * - * 7. FEEDBACK. You may, but don’t have to, provide to NVIDIA any Feedback. “Feedback” means any suggestions, bug fixes, enhancements, modifications, feature requests or other feedback regarding the SOFTWARE. For any Feedback that you voluntarily provide, you hereby grant NVIDIA and its affiliates a perpetual, non-exclusive, worldwide, irrevocable license to use, reproduce, modify, license, sublicense (through multiple tiers of sublicensees), and distribute (through multiple tiers of distributors) the Feedback without the payment of any royalties or fees to you. NVIDIA will use Feedback at its choice. + * 7. FEEDBACK. You may, but don’t have to, provide to NVIDIA any Feedback. “Feedback” means any suggestions, bug fixes, + * enhancements, modifications, feature requests or other feedback regarding the SOFTWARE. For any Feedback that you + * voluntarily provide, you hereby grant NVIDIA and its affiliates a perpetual, non-exclusive, worldwide, irrevocable + * license to use, reproduce, modify, license, sublicense (through multiple tiers of sublicensees), and distribute + * (through multiple tiers of distributors) the Feedback without the payment of any royalties or fees to you. NVIDIA + * will use Feedback at its choice. * - * 8. NO WARRANTIES. THE SOFTWARE IS PROVIDED "AS IS" WITHOUT ANY EXPRESS OR IMPLIED WARRANTY OF ANY KIND INCLUDING, BUT NOT LIMITED TO, WARRANTIES OF MERCHANTABILITY, NONINFRINGEMENT, OR FITNESS FOR A PARTICULAR PURPOSE. NVIDIA DOES NOT WARRANT THAT THE SOFTWARE WILL MEET YOUR REQUIREMENTS OR THAT THE OPERATION THEREOF WILL BE UNINTERRUPTED OR ERROR-FREE, OR THAT ALL ERRORS WILL BE CORRECTED. + * 8. NO WARRANTIES. THE SOFTWARE IS PROVIDED "AS IS" WITHOUT ANY EXPRESS OR IMPLIED WARRANTY OF ANY KIND INCLUDING, BUT + * NOT LIMITED TO, WARRANTIES OF MERCHANTABILITY, NONINFRINGEMENT, OR FITNESS FOR A PARTICULAR PURPOSE. NVIDIA DOES NOT + * WARRANT THAT THE SOFTWARE WILL MEET YOUR REQUIREMENTS OR THAT THE OPERATION THEREOF WILL BE UNINTERRUPTED OR + * ERROR-FREE, OR THAT ALL ERRORS WILL BE CORRECTED. * - * 9. LIMITATIONS OF LIABILITY. TO THE MAXIMUM EXTENT PERMITTED BY LAW, NVIDIA AND ITS AFFILIATES SHALL NOT BE LIABLE FOR ANY SPECIAL, INCIDENTAL, PUNITIVE OR CONSEQUENTIAL DAMAGES, OR ANY LOST PROFITS, PROJECT DELAYS, LOSS OF USE, LOSS OF DATA OR LOSS OF GOODWILL, OR THE COSTS OF PROCURING SUBSTITUTE PRODUCTS, ARISING OUT OF OR IN CONNECTION WITH THIS LICENSE OR THE USE OR PERFORMANCE OF THE SOFTWARE, WHETHER SUCH LIABILITY ARISES FROM ANY CLAIM BASED UPON BREACH OF CONTRACT, BREACH OF WARRANTY, TORT (INCLUDING NEGLIGENCE), PRODUCT LIABILITY OR ANY OTHER CAUSE OF ACTION OR THEORY OF LIABILITY, EVEN IF NVIDIA HAS PREVIOUSLY BEEN ADVISED OF, OR COULD REASONABLY HAVE FORESEEN, THE POSSIBILITY OF SUCH DAMAGES. IN NO EVENT WILL NVIDIA’S AND ITS AFFILIATES TOTAL CUMULATIVE LIABILITY UNDER OR ARISING OUT OF THIS LICENSE EXCEED US$10.00. THE NATURE OF THE LIABILITY OR THE NUMBER OF CLAIMS OR SUITS SHALL NOT ENLARGE OR EXTEND THIS LIMIT. + * 9. LIMITATIONS OF LIABILITY. TO THE MAXIMUM EXTENT PERMITTED BY LAW, NVIDIA AND ITS AFFILIATES SHALL NOT BE LIABLE + * FOR ANY SPECIAL, INCIDENTAL, PUNITIVE OR CONSEQUENTIAL DAMAGES, OR ANY LOST PROFITS, PROJECT DELAYS, LOSS OF USE, + * LOSS OF DATA OR LOSS OF GOODWILL, OR THE COSTS OF PROCURING SUBSTITUTE PRODUCTS, ARISING OUT OF OR IN CONNECTION WITH + * THIS LICENSE OR THE USE OR PERFORMANCE OF THE SOFTWARE, WHETHER SUCH LIABILITY ARISES FROM ANY CLAIM BASED UPON + * BREACH OF CONTRACT, BREACH OF WARRANTY, TORT (INCLUDING NEGLIGENCE), PRODUCT LIABILITY OR ANY OTHER CAUSE OF ACTION + * OR THEORY OF LIABILITY, EVEN IF NVIDIA HAS PREVIOUSLY BEEN ADVISED OF, OR COULD REASONABLY HAVE FORESEEN, THE + * POSSIBILITY OF SUCH DAMAGES. IN NO EVENT WILL NVIDIA’S AND ITS AFFILIATES TOTAL CUMULATIVE LIABILITY UNDER OR ARISING + * OUT OF THIS LICENSE EXCEED US$10.00. THE NATURE OF THE LIABILITY OR THE NUMBER OF CLAIMS OR SUITS SHALL NOT ENLARGE + * OR EXTEND THIS LIMIT. * - * 10. TERMINATION. Your rights under this license will terminate automatically without notice from NVIDIA if you fail to comply with any term and condition of this license or if you commence or participate in any legal proceeding against NVIDIA with respect to the SOFTWARE. NVIDIA may terminate this license with advance written notice to you if NVIDIA decides to no longer provide the SOFTWARE in a country or, in NVIDIA’s sole discretion, the continued use of it is no longer commercially viable. Upon any termination of this license, you agree to promptly discontinue use of the SOFTWARE and destroy all copies in your possession or control. Your prior distributions in accordance with this license are not affected by the termination of this license. All provisions of this license will survive termination, except for the license granted to you. + * 10. TERMINATION. Your rights under this license will terminate automatically without notice from NVIDIA if you fail + * to comply with any term and condition of this license or if you commence or participate in any legal proceeding + * against NVIDIA with respect to the SOFTWARE. NVIDIA may terminate this license with advance written notice to you if + * NVIDIA decides to no longer provide the SOFTWARE in a country or, in NVIDIA’s sole discretion, the continued use of + * it is no longer commercially viable. Upon any termination of this license, you agree to promptly discontinue use of + * the SOFTWARE and destroy all copies in your possession or control. Your prior distributions in accordance with this + * license are not affected by the termination of this license. All provisions of this license will survive termination, + * except for the license granted to you. * - * 11. APPLICABLE LAW. This license will be governed in all respects by the laws of the United States and of the State of Delaware as those laws are applied to contracts entered into and performed entirely within Delaware by Delaware residents, without regard to the conflicts of laws principles. The United Nations Convention on Contracts for the International Sale of Goods is specifically disclaimed. You agree to all terms of this Agreement in the English language. The state or federal courts residing in Santa Clara County, California shall have exclusive jurisdiction over any dispute or claim arising out of this license. Notwithstanding this, you agree that NVIDIA shall still be allowed to apply for injunctive remedies or an equivalent type of urgent legal relief in any jurisdiction. + * 11. APPLICABLE LAW. This license will be governed in all respects by the laws of the United States and of the State + * of Delaware as those laws are applied to contracts entered into and performed entirely within Delaware by Delaware + * residents, without regard to the conflicts of laws principles. The United Nations Convention on Contracts for the + * International Sale of Goods is specifically disclaimed. You agree to all terms of this Agreement in the English + * language. The state or federal courts residing in Santa Clara County, California shall have exclusive jurisdiction + * over any dispute or claim arising out of this license. Notwithstanding this, you agree that NVIDIA shall still be + * allowed to apply for injunctive remedies or an equivalent type of urgent legal relief in any jurisdiction. * - * 12. NO ASSIGNMENT. This license and your rights and obligations thereunder may not be assigned by you by any means or operation of law without NVIDIA’s permission. Any attempted assignment not approved by NVIDIA in writing shall be void and of no effect. + * 12. NO ASSIGNMENT. This license and your rights and obligations thereunder may not be assigned by you by any means or + * operation of law without NVIDIA’s permission. Any attempted assignment not approved by NVIDIA in writing shall be + * void and of no effect. * - * 13. EXPORT. The SOFTWARE is subject to United States export laws and regulations. You agree that you will not ship, transfer or export the SOFTWARE into any country, or use the SOFTWARE in any manner, prohibited by the United States Bureau of Industry and Security or economic sanctions regulations administered by the U.S. Department of Treasury’s Office of Foreign Assets Control (OFAC), or any applicable export laws, restrictions or regulations. These laws include restrictions on destinations, end users and end use. By accepting this license, you confirm that you are not a resident or citizen of any country currently embargoed by the U.S. and that you are not otherwise prohibited from receiving the SOFTWARE. + * 13. EXPORT. The SOFTWARE is subject to United States export laws and regulations. You agree that you will not ship, + * transfer or export the SOFTWARE into any country, or use the SOFTWARE in any manner, prohibited by the United States + * Bureau of Industry and Security or economic sanctions regulations administered by the U.S. Department of Treasury’s + * Office of Foreign Assets Control (OFAC), or any applicable export laws, restrictions or regulations. These laws + * include restrictions on destinations, end users and end use. By accepting this license, you confirm that you are not + * a resident or citizen of any country currently embargoed by the U.S. and that you are not otherwise prohibited from + * receiving the SOFTWARE. * - * 14. GOVERNMENT USE. The SOFTWARE has been developed entirely at private expense and is “commercial items” consisting of “commercial computer software” and “commercial computer software documentation” provided with RESTRICTED RIGHTS. Use, duplication or disclosure by the U.S. Government or a U.S. Government subcontractor is subject to the restrictions in this license pursuant to DFARS 227.7202-3(a) or as set forth in subparagraphs (b)(1) and (2) of the Commercial Computer Software - Restricted Rights clause at FAR 52.227-19, as applicable. Contractor/manufacturer is NVIDIA, 2788 San Tomas Expressway, Santa Clara, CA 95051. + * 14. GOVERNMENT USE. The SOFTWARE has been developed entirely at private expense and is “commercial items” consisting + * of “commercial computer software” and “commercial computer software documentation” provided with RESTRICTED RIGHTS. + * Use, duplication or disclosure by the U.S. Government or a U.S. Government subcontractor is subject to the + * restrictions in this license pursuant to DFARS 227.7202-3(a) or as set forth in subparagraphs (b)(1) and (2) of the + * Commercial Computer Software - Restricted Rights clause at FAR 52.227-19, as applicable. Contractor/manufacturer is + * NVIDIA, 2788 San Tomas Expressway, Santa Clara, CA 95051. * - * 15. ENTIRE AGREEMENT. This license is the final, complete and exclusive agreement between the parties relating to the subject matter of this license and supersedes all prior or contemporaneous understandings and agreements relating to this subject matter, whether oral or written. If any court of competent jurisdiction determines that any provision of this license is illegal, invalid or unenforceable, the remaining provisions will remain in full force and effect. This license may only be modified in a writing signed by an authorized representative of each party. + * 15. ENTIRE AGREEMENT. This license is the final, complete and exclusive agreement between the parties relating to the + * subject matter of this license and supersedes all prior or contemporaneous understandings and agreements relating to + * this subject matter, whether oral or written. If any court of competent jurisdiction determines that any provision of + * this license is illegal, invalid or unenforceable, the remaining provisions will remain in full force and effect. + * This license may only be modified in a writing signed by an authorized representative of each party. * * (v. August 20, 2021) */ @@ -71,56 +149,96 @@ _LIBCUDACXX_BEGIN_NAMESPACE_CUDA -class access_property { - private: - std::uint64_t __descriptor = 0; - - public: - struct shared {}; - struct global {}; - struct persisting { - _CCCL_HOST_DEVICE constexpr operator cudaAccessProperty() const noexcept { - return cudaAccessProperty::cudaAccessPropertyPersisting; - } - }; - struct streaming { - _CCCL_HOST_DEVICE constexpr operator cudaAccessProperty() const noexcept { - return cudaAccessProperty::cudaAccessPropertyStreaming; - } - }; - struct normal { - _CCCL_HOST_DEVICE constexpr operator cudaAccessProperty() const noexcept { - return cudaAccessProperty::cudaAccessPropertyNormal; - } - }; - - _CCCL_HOST_DEVICE constexpr access_property(global) noexcept : __descriptor(__detail_ap::__sm_80::__interleave_normal()) {} - _CCCL_HOST_DEVICE constexpr access_property() noexcept : __descriptor(__detail_ap::__sm_80::__interleave_normal()) {} - constexpr access_property(access_property const&) noexcept = default; - access_property& operator=(const access_property& other) noexcept = default; - - _CCCL_HOST_DEVICE constexpr access_property(normal, float __fraction) : __descriptor(__detail_ap::__interleave(normal{}, __fraction)) {} - _CCCL_HOST_DEVICE constexpr access_property(streaming, float __fraction) : __descriptor(__detail_ap::__interleave(streaming{}, __fraction)) {} - _CCCL_HOST_DEVICE constexpr access_property(persisting, float __fraction) : __descriptor(__detail_ap::__interleave(persisting{}, __fraction)) {} - _CCCL_HOST_DEVICE constexpr access_property(normal, float __fraction, streaming) : __descriptor(__detail_ap::__interleave(normal{}, __fraction, streaming{})) {} - _CCCL_HOST_DEVICE constexpr access_property(persisting, float __fraction, streaming) : __descriptor(__detail_ap::__interleave(persisting{}, __fraction, streaming{})) {} - - _CCCL_HOST_DEVICE constexpr access_property(normal) noexcept : access_property(normal{}, 1.0) {} - _CCCL_HOST_DEVICE constexpr access_property(streaming) noexcept : access_property(streaming{}, 1.0) {} - _CCCL_HOST_DEVICE constexpr access_property(persisting) noexcept : access_property(persisting{}, 1.0) {} - - _CCCL_HOST_DEVICE constexpr access_property(void* __ptr, std::size_t __hit_bytes, std::size_t __total_bytes, normal) - : __descriptor(__detail_ap::__block(__ptr, __hit_bytes, __total_bytes, normal{})) {} - _CCCL_HOST_DEVICE constexpr access_property(void* __ptr, std::size_t __hit_bytes, std::size_t __total_bytes, streaming) - : __descriptor(__detail_ap::__block(__ptr, __hit_bytes, __total_bytes, streaming{})) {} - _CCCL_HOST_DEVICE constexpr access_property(void* __ptr, std::size_t __hit_bytes, std::size_t __total_bytes, persisting) - : __descriptor(__detail_ap::__block(__ptr, __hit_bytes, __total_bytes, persisting{})) {} - _CCCL_HOST_DEVICE constexpr access_property(void* __ptr, std::size_t __hit_bytes, std::size_t __total_bytes, normal, streaming) - : __descriptor(__detail_ap::__block(__ptr, __hit_bytes, __total_bytes, normal{}, streaming{})) {} - _CCCL_HOST_DEVICE constexpr access_property(void* __ptr, std::size_t __hit_bytes, std::size_t __total_bytes, persisting, streaming) - : __descriptor(__detail_ap::__block(__ptr, __hit_bytes, __total_bytes, persisting{}, streaming{})) {} - - _CCCL_HOST_DEVICE constexpr explicit operator std::uint64_t() const noexcept { return __descriptor; } +class access_property +{ +private: + std::uint64_t __descriptor = 0; + +public: + struct shared + {}; + struct global + {}; + struct persisting + { + _CCCL_HOST_DEVICE constexpr operator cudaAccessProperty() const noexcept + { + return cudaAccessProperty::cudaAccessPropertyPersisting; + } + }; + struct streaming + { + _CCCL_HOST_DEVICE constexpr operator cudaAccessProperty() const noexcept + { + return cudaAccessProperty::cudaAccessPropertyStreaming; + } + }; + struct normal + { + _CCCL_HOST_DEVICE constexpr operator cudaAccessProperty() const noexcept + { + return cudaAccessProperty::cudaAccessPropertyNormal; + } + }; + + _CCCL_HOST_DEVICE constexpr access_property(global) noexcept + : __descriptor(__detail_ap::__sm_80::__interleave_normal()) + {} + _CCCL_HOST_DEVICE constexpr access_property() noexcept + : __descriptor(__detail_ap::__sm_80::__interleave_normal()) + {} + constexpr access_property(access_property const&) noexcept = default; + access_property& operator=(const access_property& other) noexcept = default; + + _CCCL_HOST_DEVICE constexpr access_property(normal, float __fraction) + : __descriptor(__detail_ap::__interleave(normal{}, __fraction)) + {} + _CCCL_HOST_DEVICE constexpr access_property(streaming, float __fraction) + : __descriptor(__detail_ap::__interleave(streaming{}, __fraction)) + {} + _CCCL_HOST_DEVICE constexpr access_property(persisting, float __fraction) + : __descriptor(__detail_ap::__interleave(persisting{}, __fraction)) + {} + _CCCL_HOST_DEVICE constexpr access_property(normal, float __fraction, streaming) + : __descriptor(__detail_ap::__interleave(normal{}, __fraction, streaming{})) + {} + _CCCL_HOST_DEVICE constexpr access_property(persisting, float __fraction, streaming) + : __descriptor(__detail_ap::__interleave(persisting{}, __fraction, streaming{})) + {} + + _CCCL_HOST_DEVICE constexpr access_property(normal) noexcept + : access_property(normal{}, 1.0) + {} + _CCCL_HOST_DEVICE constexpr access_property(streaming) noexcept + : access_property(streaming{}, 1.0) + {} + _CCCL_HOST_DEVICE constexpr access_property(persisting) noexcept + : access_property(persisting{}, 1.0) + {} + + _CCCL_HOST_DEVICE constexpr access_property(void* __ptr, std::size_t __hit_bytes, std::size_t __total_bytes, normal) + : __descriptor(__detail_ap::__block(__ptr, __hit_bytes, __total_bytes, normal{})) + {} + _CCCL_HOST_DEVICE constexpr access_property(void* __ptr, std::size_t __hit_bytes, std::size_t __total_bytes, streaming) + : __descriptor(__detail_ap::__block(__ptr, __hit_bytes, __total_bytes, streaming{})) + {} + _CCCL_HOST_DEVICE constexpr access_property( + void* __ptr, std::size_t __hit_bytes, std::size_t __total_bytes, persisting) + : __descriptor(__detail_ap::__block(__ptr, __hit_bytes, __total_bytes, persisting{})) + {} + _CCCL_HOST_DEVICE constexpr access_property( + void* __ptr, std::size_t __hit_bytes, std::size_t __total_bytes, normal, streaming) + : __descriptor(__detail_ap::__block(__ptr, __hit_bytes, __total_bytes, normal{}, streaming{})) + {} + _CCCL_HOST_DEVICE constexpr access_property( + void* __ptr, std::size_t __hit_bytes, std::size_t __total_bytes, persisting, streaming) + : __descriptor(__detail_ap::__block(__ptr, __hit_bytes, __total_bytes, persisting{}, streaming{})) + {} + + _CCCL_HOST_DEVICE constexpr explicit operator std::uint64_t() const noexcept + { + return __descriptor; + } }; _LIBCUDACXX_END_NAMESPACE_CUDA @@ -130,195 +248,201 @@ _LIBCUDACXX_END_NAMESPACE_CUDA _LIBCUDACXX_BEGIN_NAMESPACE_CUDA template -_CCCL_HOST_DEVICE -_Tp* associate_access_property(_Tp* __ptr, _Property __prop) { +_CCCL_HOST_DEVICE _Tp* associate_access_property(_Tp* __ptr, _Property __prop) +{ static_assert( - std::is_same<_Property, access_property>::value || - std::is_same<_Property, access_property::persisting>::value || - std::is_same<_Property, access_property::streaming>::value || - std::is_same<_Property, access_property::normal>::value || - std::is_same<_Property, access_property::global>::value || - std::is_same<_Property, access_property::shared>::value - , "property is not convertible to cuda::access_property"); + std::is_same<_Property, access_property>::value || std::is_same<_Property, access_property::persisting>::value + || std::is_same<_Property, access_property::streaming>::value + || std::is_same<_Property, access_property::normal>::value + || std::is_same<_Property, access_property::global>::value + || std::is_same<_Property, access_property::shared>::value, + "property is not convertible to cuda::access_property"); return __detail_ap::__associate(__ptr, __prop); } template -_CCCL_HOST_DEVICE -void apply_access_property(const volatile void* __ptr, const _Shape __shape, access_property::persisting __prop) noexcept { - NV_IF_TARGET(NV_PROVIDES_SM_80,( - if (!__isGlobal((void*)__ptr)) return; - - char* __p = reinterpret_cast(const_cast(__ptr)); - static constexpr std::size_t _LINE_SIZE = 128; - std::size_t __nbytes = static_cast(__shape); - std::size_t __end = ((std::uintptr_t)(__p + __nbytes) % _LINE_SIZE) ? __nbytes + _LINE_SIZE : __nbytes; - __end /= _LINE_SIZE; - - //Apply to all 128 bytes aligned cache lines inclusive of __p - for (std::size_t __i = 0; __i < __end; __i += _LINE_SIZE) { - asm volatile ("prefetch.global.L2::evict_last [%0];" ::"l"(__p + (__i * _LINE_SIZE)) :); - } - )) +_CCCL_HOST_DEVICE void +apply_access_property(const volatile void* __ptr, const _Shape __shape, access_property::persisting __prop) noexcept +{ + NV_IF_TARGET( + NV_PROVIDES_SM_80, + (if (!__isGlobal((void*) __ptr)) return; + + char* __p = reinterpret_cast(const_cast(__ptr)); + static constexpr std::size_t _LINE_SIZE = 128; + std::size_t __nbytes = static_cast(__shape); + std::size_t __end = ((std::uintptr_t)(__p + __nbytes) % _LINE_SIZE) ? __nbytes + _LINE_SIZE : __nbytes; + __end /= _LINE_SIZE; + + // Apply to all 128 bytes aligned cache lines inclusive of __p + for (std::size_t __i = 0; __i < __end; __i += _LINE_SIZE) { + asm volatile("prefetch.global.L2::evict_last [%0];" ::"l"(__p + (__i * _LINE_SIZE)) :); + })) } template -_CCCL_HOST_DEVICE -void apply_access_property(const volatile void* __ptr, const _Shape __shape, access_property::normal __prop) noexcept { - NV_IF_TARGET(NV_PROVIDES_SM_80,( - if (!__isGlobal((void*)__ptr)) return; - - char* __p = reinterpret_cast(const_cast(__ptr)); - static constexpr std::size_t _LINE_SIZE = 128; - std::size_t __nbytes = static_cast(__shape); - std::size_t __end = ((std::uintptr_t)(__p + __nbytes) % _LINE_SIZE) ? __nbytes + _LINE_SIZE : __nbytes; - __end /= _LINE_SIZE; - - //Apply to all 128 bytes aligned cache lines inclusive of __p - for (std::size_t __i = 0; __i < __end; __i += _LINE_SIZE) { - asm volatile ("prefetch.global.L2::evict_normal [%0];" ::"l"(__p + (__i * _LINE_SIZE)) :); - } - )) +_CCCL_HOST_DEVICE void +apply_access_property(const volatile void* __ptr, const _Shape __shape, access_property::normal __prop) noexcept +{ + NV_IF_TARGET( + NV_PROVIDES_SM_80, + (if (!__isGlobal((void*) __ptr)) return; + + char* __p = reinterpret_cast(const_cast(__ptr)); + static constexpr std::size_t _LINE_SIZE = 128; + std::size_t __nbytes = static_cast(__shape); + std::size_t __end = ((std::uintptr_t)(__p + __nbytes) % _LINE_SIZE) ? __nbytes + _LINE_SIZE : __nbytes; + __end /= _LINE_SIZE; + + // Apply to all 128 bytes aligned cache lines inclusive of __p + for (std::size_t __i = 0; __i < __end; __i += _LINE_SIZE) { + asm volatile("prefetch.global.L2::evict_normal [%0];" ::"l"(__p + (__i * _LINE_SIZE)) :); + })) } -template -class annotated_ptr: public __detail_ap::__annotated_ptr_base<_Property> { - public: - using value_type = _Tp; - using size_type = std::size_t; - using reference = value_type&; - using pointer = value_type*; - using const_pointer = value_type const*; - using difference_type = std::ptrdiff_t; - - private: - using __self = annotated_ptr<_Tp, _Property>; - - // Converting from a 64-bit to 32-bit shared pointer and maybe back just for storage might or might not be profitable. - pointer __repr = (pointer)((size_type)nullptr); - - _CCCL_HOST_DEVICE pointer __get(bool __skip_prop = false, difference_type __n = 0) const { - NV_IF_TARGET(NV_IS_DEVICE,( - if (!__skip_prop) { - return static_cast(this->__apply_prop(const_cast(static_cast(__repr + __n)))); - } - )) - return __repr + __n; - } - _CCCL_HOST_DEVICE pointer __offset(difference_type __n, bool __skip_prop = false) const { - return __get(__skip_prop, __n); - } - - public: - _CCCL_HOST_DEVICE pointer operator->() const { - return __get(); - } - - _CCCL_HOST_DEVICE reference operator*() const { - return *__get(); - } - - _CCCL_HOST_DEVICE reference operator[](difference_type __n) const { - return *__offset(__n); - } - - _CCCL_HOST_DEVICE constexpr difference_type operator-(annotated_ptr o) const { - return __repr - o.__repr; - } - - constexpr annotated_ptr() noexcept = default; - constexpr annotated_ptr(annotated_ptr const&) noexcept = default; - // No constexpr for c11 as the method can't be const - _CCCL_CONSTEXPR_CXX14 annotated_ptr& operator=(annotated_ptr const& other) noexcept = default; - - _CCCL_HOST_DEVICE explicit annotated_ptr(pointer __p) +template +class annotated_ptr : public __detail_ap::__annotated_ptr_base<_Property> +{ +public: + using value_type = _Tp; + using size_type = std::size_t; + using reference = value_type&; + using pointer = value_type*; + using const_pointer = value_type const*; + using difference_type = std::ptrdiff_t; + +private: + using __self = annotated_ptr<_Tp, _Property>; + + // Converting from a 64-bit to 32-bit shared pointer and maybe back just for storage might or might not be profitable. + pointer __repr = (pointer) ((size_type) nullptr); + + _CCCL_HOST_DEVICE pointer __get(bool __skip_prop = false, difference_type __n = 0) const + { + NV_IF_TARGET(NV_IS_DEVICE, (if (!__skip_prop) { + return static_cast( + this->__apply_prop(const_cast(static_cast(__repr + __n)))); + })) + return __repr + __n; + } + _CCCL_HOST_DEVICE pointer __offset(difference_type __n, bool __skip_prop = false) const + { + return __get(__skip_prop, __n); + } + +public: + _CCCL_HOST_DEVICE pointer operator->() const + { + return __get(); + } + + _CCCL_HOST_DEVICE reference operator*() const + { + return *__get(); + } + + _CCCL_HOST_DEVICE reference operator[](difference_type __n) const + { + return *__offset(__n); + } + + _CCCL_HOST_DEVICE constexpr difference_type operator-(annotated_ptr o) const + { + return __repr - o.__repr; + } + + constexpr annotated_ptr() noexcept = default; + constexpr annotated_ptr(annotated_ptr const&) noexcept = default; + // No constexpr for c11 as the method can't be const + _CCCL_CONSTEXPR_CXX14 annotated_ptr& operator=(annotated_ptr const& other) noexcept = default; + + _CCCL_HOST_DEVICE explicit annotated_ptr(pointer __p) : __repr(__p) - { - NV_IF_TARGET(NV_IS_DEVICE,( - _LIBCUDACXX_DEBUG_ASSERT((std::is_same<_Property, shared>::value && __isShared(__p) || __isGlobal(__p)), ""); - )) - } - - template - _CCCL_HOST_DEVICE annotated_ptr(pointer __p, _RuntimeProperty __prop) - : __detail_ap::__annotated_ptr_base<_Property>(static_cast(access_property(__prop))), __repr(__p) - { - static_assert(std::is_same<_Property, access_property>::value, - "This method requires annotated_ptr"); - static_assert(std::is_same<_RuntimeProperty, access_property::global>::value || - std::is_same<_RuntimeProperty, access_property::normal>::value || - std::is_same<_RuntimeProperty, access_property::streaming>::value || - std::is_same<_RuntimeProperty, access_property::persisting>::value || - std::is_same<_RuntimeProperty, access_property>::value, - "This method requires RuntimeProperty=global|normal|streaming|persisting|access_property"); - NV_IF_TARGET(NV_IS_DEVICE,( - _LIBCUDACXX_DEBUG_ASSERT((__isGlobal(__p) == true), ""); - )) - } - - template - _CCCL_HOST_DEVICE annotated_ptr(const annotated_ptr<_TTp,_Prop>& __other); - - _CCCL_HOST_DEVICE constexpr explicit operator bool() const noexcept { - return __repr != nullptr; - } - - _CCCL_HOST_DEVICE pointer get() const noexcept { - constexpr bool __is_shared = std::is_same<_Property, access_property::shared>::value; - return __is_shared ? __repr : &(*annotated_ptr(__repr)); - } - - _CCCL_HOST_DEVICE _Property __property() const noexcept { - return this->__get_property(); - } + { + NV_IF_TARGET( + NV_IS_DEVICE, + (_LIBCUDACXX_DEBUG_ASSERT((std::is_same<_Property, shared>::value && __isShared(__p) || __isGlobal(__p)), "");)) + } + + template + _CCCL_HOST_DEVICE annotated_ptr(pointer __p, _RuntimeProperty __prop) + : __detail_ap::__annotated_ptr_base<_Property>(static_cast(access_property(__prop))) + , __repr(__p) + { + static_assert(std::is_same<_Property, access_property>::value, + "This method requires annotated_ptr"); + static_assert( + std::is_same<_RuntimeProperty, access_property::global>::value + || std::is_same<_RuntimeProperty, access_property::normal>::value + || std::is_same<_RuntimeProperty, access_property::streaming>::value + || std::is_same<_RuntimeProperty, access_property::persisting>::value + || std::is_same<_RuntimeProperty, access_property>::value, + "This method requires RuntimeProperty=global|normal|streaming|persisting|access_property"); + NV_IF_TARGET(NV_IS_DEVICE, (_LIBCUDACXX_DEBUG_ASSERT((__isGlobal(__p) == true), "");)) + } + + template + _CCCL_HOST_DEVICE annotated_ptr(const annotated_ptr<_TTp, _Prop>& __other); + + _CCCL_HOST_DEVICE constexpr explicit operator bool() const noexcept + { + return __repr != nullptr; + } + + _CCCL_HOST_DEVICE pointer get() const noexcept + { + constexpr bool __is_shared = std::is_same<_Property, access_property::shared>::value; + return __is_shared ? __repr : &(*annotated_ptr(__repr)); + } + + _CCCL_HOST_DEVICE _Property __property() const noexcept + { + return this->__get_property(); + } }; - -template -template -_CCCL_HOST_DEVICE annotated_ptr<_Tp, _Property>::annotated_ptr(const annotated_ptr<_TTp,_Prop>& __other) - : __detail_ap::__annotated_ptr_base<_Property>(__other.__property()), __repr(__other.get()) +template +template +_CCCL_HOST_DEVICE annotated_ptr<_Tp, _Property>::annotated_ptr(const annotated_ptr<_TTp, _Prop>& __other) + : __detail_ap::__annotated_ptr_base<_Property>(__other.__property()) + , __repr(__other.get()) { static_assert(std::is_assignable::value, "pointer must be assignable from other pointer"); - static_assert((std::is_same<_Property, access_property>::value && !std::is_same<_Prop, access_property::shared>::value) || - std::is_same<_Property, _Prop>::value, "Property must be either access_property or other property, and both properties must have same address space"); + static_assert( + (std::is_same<_Property, access_property>::value && !std::is_same<_Prop, access_property::shared>::value) + || std::is_same<_Property, _Prop>::value, + "Property must be either access_property or other property, and both properties must have same address space"); // note: precondition "__other.__rep must be compatible with _Property" currently always holds } -template -_CCCL_HOST_DEVICE -void memcpy_async(_Dst* __dst, - annotated_ptr<_Src,_SrcProperty> __src, - _Shape __shape, _Sync & __sync) { +template +_CCCL_HOST_DEVICE void memcpy_async(_Dst* __dst, annotated_ptr<_Src, _SrcProperty> __src, _Shape __shape, _Sync& __sync) +{ memcpy_async(__dst, &(*__src), __shape, __sync); } -template -_CCCL_HOST_DEVICE -void memcpy_async(annotated_ptr<_Dst,_DstProperty> __dst, - annotated_ptr<_Src,_SrcProperty> __src, - _Shape __shape, _Sync & __sync){ +template +_CCCL_HOST_DEVICE void memcpy_async( + annotated_ptr<_Dst, _DstProperty> __dst, annotated_ptr<_Src, _SrcProperty> __src, _Shape __shape, _Sync& __sync) +{ memcpy_async(&(*__dst), &(*__src), __shape, __sync); } -template -_CCCL_HOST_DEVICE -void memcpy_async(const _Group & __group, - _Dst * __dst, - annotated_ptr<_Src,_SrcProperty> __src, - _Shape __shape, _Sync & __sync) { +template +_CCCL_HOST_DEVICE void +memcpy_async(const _Group& __group, _Dst* __dst, annotated_ptr<_Src, _SrcProperty> __src, _Shape __shape, _Sync& __sync) +{ memcpy_async(__group, __dst, &(*__src), __shape, __sync); } -template -_CCCL_HOST_DEVICE -void memcpy_async(const _Group & __group, - annotated_ptr<_Dst,_DstProperty> __dst, - annotated_ptr<_Src,_SrcProperty> __src, - _Shape __shape, _Sync & __sync) { +template +_CCCL_HOST_DEVICE void memcpy_async( + const _Group& __group, + annotated_ptr<_Dst, _DstProperty> __dst, + annotated_ptr<_Src, _SrcProperty> __src, + _Shape __shape, + _Sync& __sync) +{ memcpy_async(__group, &(*__dst), &(*__src), __shape, __sync); } diff --git a/libcudacxx/include/cuda/barrier b/libcudacxx/include/cuda/barrier index e19684cfece..99117dde90b 100644 --- a/libcudacxx/include/cuda/barrier +++ b/libcudacxx/include/cuda/barrier @@ -21,8 +21,8 @@ # pragma system_header #endif // no system header -#include #include +#include // Forward-declare CUtensorMap for use in cp_async_bulk_tensor_* PTX wrapping // functions. These functions take a pointer to CUtensorMap, so do not need to @@ -54,175 +54,185 @@ _LIBCUDACXX_BEGIN_NAMESPACE_CUDA_DEVICE_EXPERIMENTAL #ifdef __cccl_lib_experimental_ctk12_cp_async_exposure // https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#data-movement-and-conversion-instructions-cp-async-bulk -inline _CCCL_DEVICE -void cp_async_bulk_global_to_shared(void *__dest, const void *__src, _CUDA_VSTD::uint32_t __size, ::cuda::barrier<::cuda::thread_scope_block> &__bar) +inline _CCCL_DEVICE void cp_async_bulk_global_to_shared( + void* __dest, const void* __src, _CUDA_VSTD::uint32_t __size, ::cuda::barrier<::cuda::thread_scope_block>& __bar) { - _LIBCUDACXX_DEBUG_ASSERT(__size % 16 == 0, "Size must be multiple of 16."); - _LIBCUDACXX_DEBUG_ASSERT(__isShared(__dest), "Destination must be shared memory address."); - _LIBCUDACXX_DEBUG_ASSERT(__isGlobal(__src), "Source must be global memory address."); - - _CUDA_VPTX::cp_async_bulk( - _CUDA_VPTX::space_cluster, _CUDA_VPTX::space_global, - __dest, __src, __size, - ::cuda::device::barrier_native_handle(__bar)); + _LIBCUDACXX_DEBUG_ASSERT(__size % 16 == 0, "Size must be multiple of 16."); + _LIBCUDACXX_DEBUG_ASSERT(__isShared(__dest), "Destination must be shared memory address."); + _LIBCUDACXX_DEBUG_ASSERT(__isGlobal(__src), "Source must be global memory address."); + + _CUDA_VPTX::cp_async_bulk( + _CUDA_VPTX::space_cluster, + _CUDA_VPTX::space_global, + __dest, + __src, + __size, + ::cuda::device::barrier_native_handle(__bar)); } - // https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#data-movement-and-conversion-instructions-cp-async-bulk -inline _CCCL_DEVICE -void cp_async_bulk_shared_to_global(void *__dest, const void * __src, _CUDA_VSTD::uint32_t __size) +inline _CCCL_DEVICE void cp_async_bulk_shared_to_global(void* __dest, const void* __src, _CUDA_VSTD::uint32_t __size) { - _LIBCUDACXX_DEBUG_ASSERT(__size % 16 == 0, "Size must be multiple of 16."); - _LIBCUDACXX_DEBUG_ASSERT(__isGlobal(__dest), "Destination must be global memory address."); - _LIBCUDACXX_DEBUG_ASSERT(__isShared(__src), "Source must be shared memory address."); + _LIBCUDACXX_DEBUG_ASSERT(__size % 16 == 0, "Size must be multiple of 16."); + _LIBCUDACXX_DEBUG_ASSERT(__isGlobal(__dest), "Destination must be global memory address."); + _LIBCUDACXX_DEBUG_ASSERT(__isShared(__src), "Source must be shared memory address."); - _CUDA_VPTX::cp_async_bulk( - _CUDA_VPTX::space_global, _CUDA_VPTX::space_shared, - __dest, __src, __size); + _CUDA_VPTX::cp_async_bulk(_CUDA_VPTX::space_global, _CUDA_VPTX::space_shared, __dest, __src, __size); } // https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#data-movement-and-conversion-instructions-cp-async-bulk-tensor -inline _CCCL_DEVICE -void cp_async_bulk_tensor_1d_global_to_shared( - void *__dest, const CUtensorMap *__tensor_map , int __c0, ::cuda::barrier<::cuda::thread_scope_block> &__bar) +inline _CCCL_DEVICE void cp_async_bulk_tensor_1d_global_to_shared( + void* __dest, const CUtensorMap* __tensor_map, int __c0, ::cuda::barrier<::cuda::thread_scope_block>& __bar) { - const _CUDA_VSTD::int32_t __coords[]{__c0}; - - _CUDA_VPTX::cp_async_bulk_tensor( - _CUDA_VPTX::space_cluster, _CUDA_VPTX::space_global, - __dest, __tensor_map, __coords, - ::cuda::device::barrier_native_handle(__bar)); + const _CUDA_VSTD::int32_t __coords[]{__c0}; + + _CUDA_VPTX::cp_async_bulk_tensor( + _CUDA_VPTX::space_cluster, + _CUDA_VPTX::space_global, + __dest, + __tensor_map, + __coords, + ::cuda::device::barrier_native_handle(__bar)); } // https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#data-movement-and-conversion-instructions-cp-async-bulk-tensor -inline _CCCL_DEVICE -void cp_async_bulk_tensor_2d_global_to_shared( - void *__dest, const CUtensorMap *__tensor_map , int __c0, int __c1, ::cuda::barrier<::cuda::thread_scope_block> &__bar) +inline _CCCL_DEVICE void cp_async_bulk_tensor_2d_global_to_shared( + void* __dest, const CUtensorMap* __tensor_map, int __c0, int __c1, ::cuda::barrier<::cuda::thread_scope_block>& __bar) { - const _CUDA_VSTD::int32_t __coords[]{__c0, __c1}; - - _CUDA_VPTX::cp_async_bulk_tensor( - _CUDA_VPTX::space_cluster, _CUDA_VPTX::space_global, - __dest, __tensor_map, __coords, - ::cuda::device::barrier_native_handle(__bar)); + const _CUDA_VSTD::int32_t __coords[]{__c0, __c1}; + + _CUDA_VPTX::cp_async_bulk_tensor( + _CUDA_VPTX::space_cluster, + _CUDA_VPTX::space_global, + __dest, + __tensor_map, + __coords, + ::cuda::device::barrier_native_handle(__bar)); } // https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#data-movement-and-conversion-instructions-cp-async-bulk-tensor -inline _CCCL_DEVICE -void cp_async_bulk_tensor_3d_global_to_shared( - void *__dest, const CUtensorMap *__tensor_map, int __c0, int __c1, int __c2, ::cuda::barrier<::cuda::thread_scope_block> &__bar) +inline _CCCL_DEVICE void cp_async_bulk_tensor_3d_global_to_shared( + void* __dest, + const CUtensorMap* __tensor_map, + int __c0, + int __c1, + int __c2, + ::cuda::barrier<::cuda::thread_scope_block>& __bar) { - const _CUDA_VSTD::int32_t __coords[]{__c0, __c1, __c2}; - - _CUDA_VPTX::cp_async_bulk_tensor( - _CUDA_VPTX::space_cluster, _CUDA_VPTX::space_global, - __dest, __tensor_map, __coords, - ::cuda::device::barrier_native_handle(__bar)); + const _CUDA_VSTD::int32_t __coords[]{__c0, __c1, __c2}; + + _CUDA_VPTX::cp_async_bulk_tensor( + _CUDA_VPTX::space_cluster, + _CUDA_VPTX::space_global, + __dest, + __tensor_map, + __coords, + ::cuda::device::barrier_native_handle(__bar)); } // https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#data-movement-and-conversion-instructions-cp-async-bulk-tensor -inline _CCCL_DEVICE -void cp_async_bulk_tensor_4d_global_to_shared( - void *__dest, const CUtensorMap *__tensor_map , int __c0, int __c1, int __c2, int __c3, ::cuda::barrier<::cuda::thread_scope_block> &__bar) +inline _CCCL_DEVICE void cp_async_bulk_tensor_4d_global_to_shared( + void* __dest, + const CUtensorMap* __tensor_map, + int __c0, + int __c1, + int __c2, + int __c3, + ::cuda::barrier<::cuda::thread_scope_block>& __bar) { - const _CUDA_VSTD::int32_t __coords[]{__c0, __c1, __c2, __c3}; - - _CUDA_VPTX::cp_async_bulk_tensor( - _CUDA_VPTX::space_cluster, _CUDA_VPTX::space_global, - __dest, __tensor_map, __coords, - ::cuda::device::barrier_native_handle(__bar)); + const _CUDA_VSTD::int32_t __coords[]{__c0, __c1, __c2, __c3}; + + _CUDA_VPTX::cp_async_bulk_tensor( + _CUDA_VPTX::space_cluster, + _CUDA_VPTX::space_global, + __dest, + __tensor_map, + __coords, + ::cuda::device::barrier_native_handle(__bar)); } // https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#data-movement-and-conversion-instructions-cp-async-bulk-tensor -inline _CCCL_DEVICE -void cp_async_bulk_tensor_5d_global_to_shared( - void *__dest, const CUtensorMap *__tensor_map , int __c0, int __c1, int __c2, int __c3, int __c4, ::cuda::barrier<::cuda::thread_scope_block> &__bar) +inline _CCCL_DEVICE void cp_async_bulk_tensor_5d_global_to_shared( + void* __dest, + const CUtensorMap* __tensor_map, + int __c0, + int __c1, + int __c2, + int __c3, + int __c4, + ::cuda::barrier<::cuda::thread_scope_block>& __bar) { - const _CUDA_VSTD::int32_t __coords[]{__c0, __c1, __c2, __c3, __c4}; - - _CUDA_VPTX::cp_async_bulk_tensor( - _CUDA_VPTX::space_cluster, _CUDA_VPTX::space_global, - __dest, __tensor_map, __coords, - ::cuda::device::barrier_native_handle(__bar)); + const _CUDA_VSTD::int32_t __coords[]{__c0, __c1, __c2, __c3, __c4}; + + _CUDA_VPTX::cp_async_bulk_tensor( + _CUDA_VPTX::space_cluster, + _CUDA_VPTX::space_global, + __dest, + __tensor_map, + __coords, + ::cuda::device::barrier_native_handle(__bar)); } // https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#data-movement-and-conversion-instructions-cp-async-bulk-tensor -inline _CCCL_DEVICE -void cp_async_bulk_tensor_1d_shared_to_global( - const CUtensorMap *__tensor_map, int __c0, const void *__src) +inline _CCCL_DEVICE void +cp_async_bulk_tensor_1d_shared_to_global(const CUtensorMap* __tensor_map, int __c0, const void* __src) { - const _CUDA_VSTD::int32_t __coords[]{__c0}; + const _CUDA_VSTD::int32_t __coords[]{__c0}; - _CUDA_VPTX::cp_async_bulk_tensor( - _CUDA_VPTX::space_global, _CUDA_VPTX::space_shared, - __tensor_map, __coords, __src); + _CUDA_VPTX::cp_async_bulk_tensor(_CUDA_VPTX::space_global, _CUDA_VPTX::space_shared, __tensor_map, __coords, __src); } // https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#data-movement-and-conversion-instructions-cp-async-bulk-tensor -inline _CCCL_DEVICE -void cp_async_bulk_tensor_2d_shared_to_global( - const CUtensorMap *__tensor_map, int __c0, int __c1, const void *__src) +inline _CCCL_DEVICE void +cp_async_bulk_tensor_2d_shared_to_global(const CUtensorMap* __tensor_map, int __c0, int __c1, const void* __src) { - const _CUDA_VSTD::int32_t __coords[]{__c0, __c1}; + const _CUDA_VSTD::int32_t __coords[]{__c0, __c1}; - _CUDA_VPTX::cp_async_bulk_tensor( - _CUDA_VPTX::space_global, _CUDA_VPTX::space_shared, - __tensor_map, __coords, __src); + _CUDA_VPTX::cp_async_bulk_tensor(_CUDA_VPTX::space_global, _CUDA_VPTX::space_shared, __tensor_map, __coords, __src); } // https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#data-movement-and-conversion-instructions-cp-async-bulk-tensor -inline _CCCL_DEVICE -void cp_async_bulk_tensor_3d_shared_to_global( - const CUtensorMap *__tensor_map, int __c0, int __c1, int __c2, const void *__src) +inline _CCCL_DEVICE void cp_async_bulk_tensor_3d_shared_to_global( + const CUtensorMap* __tensor_map, int __c0, int __c1, int __c2, const void* __src) { - const _CUDA_VSTD::int32_t __coords[]{__c0, __c1, __c2}; + const _CUDA_VSTD::int32_t __coords[]{__c0, __c1, __c2}; - _CUDA_VPTX::cp_async_bulk_tensor( - _CUDA_VPTX::space_global, _CUDA_VPTX::space_shared, - __tensor_map, __coords, __src); + _CUDA_VPTX::cp_async_bulk_tensor(_CUDA_VPTX::space_global, _CUDA_VPTX::space_shared, __tensor_map, __coords, __src); } // https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#data-movement-and-conversion-instructions-cp-async-bulk-tensor -inline _CCCL_DEVICE -void cp_async_bulk_tensor_4d_shared_to_global( - const CUtensorMap *__tensor_map, int __c0, int __c1, int __c2, int __c3, const void *__src) +inline _CCCL_DEVICE void cp_async_bulk_tensor_4d_shared_to_global( + const CUtensorMap* __tensor_map, int __c0, int __c1, int __c2, int __c3, const void* __src) { - const _CUDA_VSTD::int32_t __coords[]{__c0, __c1, __c2, __c3}; + const _CUDA_VSTD::int32_t __coords[]{__c0, __c1, __c2, __c3}; - _CUDA_VPTX::cp_async_bulk_tensor( - _CUDA_VPTX::space_global, _CUDA_VPTX::space_shared, - __tensor_map, __coords, __src); + _CUDA_VPTX::cp_async_bulk_tensor(_CUDA_VPTX::space_global, _CUDA_VPTX::space_shared, __tensor_map, __coords, __src); } // https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#data-movement-and-conversion-instructions-cp-async-bulk-tensor -inline _CCCL_DEVICE -void cp_async_bulk_tensor_5d_shared_to_global( - const CUtensorMap *__tensor_map, int __c0, int __c1, int __c2, int __c3, int __c4, const void *__src) +inline _CCCL_DEVICE void cp_async_bulk_tensor_5d_shared_to_global( + const CUtensorMap* __tensor_map, int __c0, int __c1, int __c2, int __c3, int __c4, const void* __src) { - const _CUDA_VSTD::int32_t __coords[]{__c0, __c1, __c2, __c3, __c4}; + const _CUDA_VSTD::int32_t __coords[]{__c0, __c1, __c2, __c3, __c4}; - _CUDA_VPTX::cp_async_bulk_tensor( - _CUDA_VPTX::space_global, _CUDA_VPTX::space_shared, - __tensor_map, __coords, __src); + _CUDA_VPTX::cp_async_bulk_tensor(_CUDA_VPTX::space_global, _CUDA_VPTX::space_shared, __tensor_map, __coords, __src); } // https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#parallel-synchronization-and-communication-instructions-membar -inline _CCCL_DEVICE -void fence_proxy_async_shared_cta() { - _CUDA_VPTX::fence_proxy_async(_CUDA_VPTX::space_shared); +inline _CCCL_DEVICE void fence_proxy_async_shared_cta() +{ + _CUDA_VPTX::fence_proxy_async(_CUDA_VPTX::space_shared); } // https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#data-movement-and-conversion-instructions-cp-async-bulk-commit-group -inline _CCCL_DEVICE -void cp_async_bulk_commit_group() +inline _CCCL_DEVICE void cp_async_bulk_commit_group() { - _CUDA_VPTX::cp_async_bulk_commit_group(); + _CUDA_VPTX::cp_async_bulk_commit_group(); } // https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#data-movement-and-conversion-instructions-cp-async-bulk-wait-group template -inline _CCCL_DEVICE -void cp_async_bulk_wait_group_read() +inline _CCCL_DEVICE void cp_async_bulk_wait_group_read() { static_assert(__n_prior <= 63, "cp_async_bulk_wait_group_read: waiting for more than 63 groups is not supported."); _CUDA_VPTX::cp_async_bulk_wait_group_read(_CUDA_VPTX::n32_t<__n_prior>{}); diff --git a/libcudacxx/include/cuda/discard_memory b/libcudacxx/include/cuda/discard_memory index cc4963874ae..6da2ea209c4 100644 --- a/libcudacxx/include/cuda/discard_memory +++ b/libcudacxx/include/cuda/discard_memory @@ -36,14 +36,14 @@ inline _CCCL_HOST_DEVICE void discard_memory(volatile void* __ptr, size_t __nbyt NV_PROVIDES_SM_80, (if (!__isGlobal((void*) __ptr)) return; - char* __p = reinterpret_cast(const_cast(__ptr)); - char* const __end_p = __p + __nbytes; + char* __p = reinterpret_cast(const_cast(__ptr)); + char* const __end_p = __p + __nbytes; static constexpr size_t _LINE_SIZE = 128; // Trim the first block and last block if they're not 128 bytes aligned - size_t __misalignment = reinterpret_cast(__p) % _LINE_SIZE; - char* __start_aligned = __misalignment == 0 ? __p : __p + (_LINE_SIZE - __misalignment); - char* const __end_aligned = __end_p - (reinterpret_cast(__end_p) % _LINE_SIZE); + size_t __misalignment = reinterpret_cast(__p) % _LINE_SIZE; + char* __start_aligned = __misalignment == 0 ? __p : __p + (_LINE_SIZE - __misalignment); + char* const __end_aligned = __end_p - (reinterpret_cast(__end_p) % _LINE_SIZE); while (__start_aligned < __end_aligned) { asm volatile("discard.global.L2 [%0], 128;" ::"l"(__start_aligned) :); diff --git a/libcudacxx/include/cuda/functional b/libcudacxx/include/cuda/functional index 7820c8352cc..d88472f50a0 100644 --- a/libcudacxx/include/cuda/functional +++ b/libcudacxx/include/cuda/functional @@ -4,50 +4,128 @@ * * NVIDIA SOFTWARE LICENSE * - * This license is a legal agreement between you and NVIDIA Corporation ("NVIDIA") and governs your use of the NVIDIA/CUDA C++ Library software and materials provided hereunder (“SOFTWARE”). + * This license is a legal agreement between you and NVIDIA Corporation ("NVIDIA") and governs your use of the + * NVIDIA/CUDA C++ Library software and materials provided hereunder (“SOFTWARE”). * - * This license can be accepted only by an adult of legal age of majority in the country in which the SOFTWARE is used. If you are under the legal age of majority, you must ask your parent or legal guardian to consent to this license. By taking delivery of the SOFTWARE, you affirm that you have reached the legal age of majority, you accept the terms of this license, and you take legal and financial responsibility for the actions of your permitted users. + * This license can be accepted only by an adult of legal age of majority in the country in which the SOFTWARE is used. + * If you are under the legal age of majority, you must ask your parent or legal guardian to consent to this license. By + * taking delivery of the SOFTWARE, you affirm that you have reached the legal age of majority, you accept the terms of + * this license, and you take legal and financial responsibility for the actions of your permitted users. * - * You agree to use the SOFTWARE only for purposes that are permitted by (a) this license, and (b) any applicable law, regulation or generally accepted practices or guidelines in the relevant jurisdictions. + * You agree to use the SOFTWARE only for purposes that are permitted by (a) this license, and (b) any applicable law, + * regulation or generally accepted practices or guidelines in the relevant jurisdictions. * - * 1. LICENSE. Subject to the terms of this license, NVIDIA grants you a non-exclusive limited license to: (a) install and use the SOFTWARE, and (b) distribute the SOFTWARE subject to the distribution requirements described in this license. NVIDIA reserves all rights, title and interest in and to the SOFTWARE not expressly granted to you under this license. + * 1. LICENSE. Subject to the terms of this license, NVIDIA grants you a non-exclusive limited license to: (a) install + * and use the SOFTWARE, and (b) distribute the SOFTWARE subject to the distribution requirements described in this + * license. NVIDIA reserves all rights, title and interest in and to the SOFTWARE not expressly granted to you under + * this license. * * 2. DISTRIBUTION REQUIREMENTS. These are the distribution requirements for you to exercise the distribution grant: - * a. The terms under which you distribute the SOFTWARE must be consistent with the terms of this license, including (without limitation) terms relating to the license grant and license restrictions and protection of NVIDIA’s intellectual property rights. - * b. You agree to notify NVIDIA in writing of any known or suspected distribution or use of the SOFTWARE not in compliance with the requirements of this license, and to enforce the terms of your agreements with respect to distributed SOFTWARE. + * a. The terms under which you distribute the SOFTWARE must be consistent with the terms of this license, + * including (without limitation) terms relating to the license grant and license restrictions and protection of + * NVIDIA’s intellectual property rights. b. You agree to notify NVIDIA in writing of any known or suspected + * distribution or use of the SOFTWARE not in compliance with the requirements of this license, and to enforce the terms + * of your agreements with respect to distributed SOFTWARE. * * 3. LIMITATIONS. Your license to use the SOFTWARE is restricted as follows: * a. The SOFTWARE is licensed for you to develop applications only for use in systems with NVIDIA GPUs. - * b. You may not reverse engineer, decompile or disassemble, or remove copyright or other proprietary notices from any portion of the SOFTWARE or copies of the SOFTWARE. - * c. You may not modify or create derivative works of any portion of the SOFTWARE. - * d. You may not bypass, disable, or circumvent any technical measure, encryption, security, digital rights management or authentication mechanism in the SOFTWARE. - * e. You may not use the SOFTWARE in any manner that would cause it to become subject to an open source software license. As examples, licenses that require as a condition of use, modification, and/or distribution that the SOFTWARE be (i) disclosed or distributed in source code form; (ii) licensed for the purpose of making derivative works; or (iii) redistributable at no charge. - * f. Unless you have an agreement with NVIDIA for this purpose, you may not use the SOFTWARE with any system or application where the use or failure of the system or application can reasonably be expected to threaten or result in personal injury, death, or catastrophic loss. Examples include use in avionics, navigation, military, medical, life support or other life critical applications. NVIDIA does not design, test or manufacture the SOFTWARE for these critical uses and NVIDIA shall not be liable to you or any third party, in whole or in part, for any claims or damages arising from such uses. - * g. You agree to defend, indemnify and hold harmless NVIDIA and its affiliates, and their respective employees, contractors, agents, officers and directors, from and against any and all claims, damages, obligations, losses, liabilities, costs or debt, fines, restitutions and expenses (including but not limited to attorney’s fees and costs incident to establishing the right of indemnification) arising out of or related to use of the SOFTWARE outside of the scope of this Agreement, or not in compliance with its terms. - * - * 4. PRE-RELEASE. SOFTWARE versions identified as alpha, beta, preview, early access or otherwise as pre-release may not be fully functional, may contain errors or design flaws, and may have reduced or different security, privacy, availability, and reliability standards relative to commercial versions of NVIDIA software and materials. You may use a pre-release SOFTWARE version at your own risk, understanding that these versions are not intended for use in production or business-critical systems. - * - * 5. OWNERSHIP. The SOFTWARE and the related intellectual property rights therein are and will remain the sole and exclusive property of NVIDIA or its licensors. The SOFTWARE is copyrighted and protected by the laws of the United States and other countries, and international treaty provisions. NVIDIA may make changes to the SOFTWARE, at any time without notice, but is not obligated to support or update the SOFTWARE. - * - * 6. COMPONENTS UNDER OTHER LICENSES. The SOFTWARE may include NVIDIA or third-party components with separate legal notices or terms as may be described in proprietary notices accompanying the SOFTWARE. If and to the extent there is a conflict between the terms in this license and the license terms associated with a component, the license terms associated with the components control only to the extent necessary to resolve the conflict. - * - * 7. FEEDBACK. You may, but don’t have to, provide to NVIDIA any Feedback. “Feedback” means any suggestions, bug fixes, enhancements, modifications, feature requests or other feedback regarding the SOFTWARE. For any Feedback that you voluntarily provide, you hereby grant NVIDIA and its affiliates a perpetual, non-exclusive, worldwide, irrevocable license to use, reproduce, modify, license, sublicense (through multiple tiers of sublicensees), and distribute (through multiple tiers of distributors) the Feedback without the payment of any royalties or fees to you. NVIDIA will use Feedback at its choice. - * - * 8. NO WARRANTIES. THE SOFTWARE IS PROVIDED "AS IS" WITHOUT ANY EXPRESS OR IMPLIED WARRANTY OF ANY KIND INCLUDING, BUT NOT LIMITED TO, WARRANTIES OF MERCHANTABILITY, NONINFRINGEMENT, OR FITNESS FOR A PARTICULAR PURPOSE. NVIDIA DOES NOT WARRANT THAT THE SOFTWARE WILL MEET YOUR REQUIREMENTS OR THAT THE OPERATION THEREOF WILL BE UNINTERRUPTED OR ERROR-FREE, OR THAT ALL ERRORS WILL BE CORRECTED. - * - * 9. LIMITATIONS OF LIABILITY. TO THE MAXIMUM EXTENT PERMITTED BY LAW, NVIDIA AND ITS AFFILIATES SHALL NOT BE LIABLE FOR ANY SPECIAL, INCIDENTAL, PUNITIVE OR CONSEQUENTIAL DAMAGES, OR ANY LOST PROFITS, PROJECT DELAYS, LOSS OF USE, LOSS OF DATA OR LOSS OF GOODWILL, OR THE COSTS OF PROCURING SUBSTITUTE PRODUCTS, ARISING OUT OF OR IN CONNECTION WITH THIS LICENSE OR THE USE OR PERFORMANCE OF THE SOFTWARE, WHETHER SUCH LIABILITY ARISES FROM ANY CLAIM BASED UPON BREACH OF CONTRACT, BREACH OF WARRANTY, TORT (INCLUDING NEGLIGENCE), PRODUCT LIABILITY OR ANY OTHER CAUSE OF ACTION OR THEORY OF LIABILITY, EVEN IF NVIDIA HAS PREVIOUSLY BEEN ADVISED OF, OR COULD REASONABLY HAVE FORESEEN, THE POSSIBILITY OF SUCH DAMAGES. IN NO EVENT WILL NVIDIA’S AND ITS AFFILIATES TOTAL CUMULATIVE LIABILITY UNDER OR ARISING OUT OF THIS LICENSE EXCEED US$10.00. THE NATURE OF THE LIABILITY OR THE NUMBER OF CLAIMS OR SUITS SHALL NOT ENLARGE OR EXTEND THIS LIMIT. - * - * 10. TERMINATION. Your rights under this license will terminate automatically without notice from NVIDIA if you fail to comply with any term and condition of this license or if you commence or participate in any legal proceeding against NVIDIA with respect to the SOFTWARE. NVIDIA may terminate this license with advance written notice to you if NVIDIA decides to no longer provide the SOFTWARE in a country or, in NVIDIA’s sole discretion, the continued use of it is no longer commercially viable. Upon any termination of this license, you agree to promptly discontinue use of the SOFTWARE and destroy all copies in your possession or control. Your prior distributions in accordance with this license are not affected by the termination of this license. All provisions of this license will survive termination, except for the license granted to you. - * - * 11. APPLICABLE LAW. This license will be governed in all respects by the laws of the United States and of the State of Delaware as those laws are applied to contracts entered into and performed entirely within Delaware by Delaware residents, without regard to the conflicts of laws principles. The United Nations Convention on Contracts for the International Sale of Goods is specifically disclaimed. You agree to all terms of this Agreement in the English language. The state or federal courts residing in Santa Clara County, California shall have exclusive jurisdiction over any dispute or claim arising out of this license. Notwithstanding this, you agree that NVIDIA shall still be allowed to apply for injunctive remedies or an equivalent type of urgent legal relief in any jurisdiction. - * - * 12. NO ASSIGNMENT. This license and your rights and obligations thereunder may not be assigned by you by any means or operation of law without NVIDIA’s permission. Any attempted assignment not approved by NVIDIA in writing shall be void and of no effect. - * - * 13. EXPORT. The SOFTWARE is subject to United States export laws and regulations. You agree that you will not ship, transfer or export the SOFTWARE into any country, or use the SOFTWARE in any manner, prohibited by the United States Bureau of Industry and Security or economic sanctions regulations administered by the U.S. Department of Treasury’s Office of Foreign Assets Control (OFAC), or any applicable export laws, restrictions or regulations. These laws include restrictions on destinations, end users and end use. By accepting this license, you confirm that you are not a resident or citizen of any country currently embargoed by the U.S. and that you are not otherwise prohibited from receiving the SOFTWARE. - * - * 14. GOVERNMENT USE. The SOFTWARE has been developed entirely at private expense and is “commercial items” consisting of “commercial computer software” and “commercial computer software documentation” provided with RESTRICTED RIGHTS. Use, duplication or disclosure by the U.S. Government or a U.S. Government subcontractor is subject to the restrictions in this license pursuant to DFARS 227.7202-3(a) or as set forth in subparagraphs (b)(1) and (2) of the Commercial Computer Software - Restricted Rights clause at FAR 52.227-19, as applicable. Contractor/manufacturer is NVIDIA, 2788 San Tomas Expressway, Santa Clara, CA 95051. - * - * 15. ENTIRE AGREEMENT. This license is the final, complete and exclusive agreement between the parties relating to the subject matter of this license and supersedes all prior or contemporaneous understandings and agreements relating to this subject matter, whether oral or written. If any court of competent jurisdiction determines that any provision of this license is illegal, invalid or unenforceable, the remaining provisions will remain in full force and effect. This license may only be modified in a writing signed by an authorized representative of each party. + * b. You may not reverse engineer, decompile or disassemble, or remove copyright or other proprietary notices from + * any portion of the SOFTWARE or copies of the SOFTWARE. c. You may not modify or create derivative works of any + * portion of the SOFTWARE. d. You may not bypass, disable, or circumvent any technical measure, encryption, + * security, digital rights management or authentication mechanism in the SOFTWARE. e. You may not use the SOFTWARE + * in any manner that would cause it to become subject to an open source software license. As examples, licenses that + * require as a condition of use, modification, and/or distribution that the SOFTWARE be (i) disclosed or distributed in + * source code form; (ii) licensed for the purpose of making derivative works; or (iii) redistributable at no charge. f. + * Unless you have an agreement with NVIDIA for this purpose, you may not use the SOFTWARE with any system or + * application where the use or failure of the system or application can reasonably be expected to threaten or result in + * personal injury, death, or catastrophic loss. Examples include use in avionics, navigation, military, medical, life + * support or other life critical applications. NVIDIA does not design, test or manufacture the SOFTWARE for these + * critical uses and NVIDIA shall not be liable to you or any third party, in whole or in part, for any claims or + * damages arising from such uses. g. You agree to defend, indemnify and hold harmless NVIDIA and its affiliates, + * and their respective employees, contractors, agents, officers and directors, from and against any and all claims, + * damages, obligations, losses, liabilities, costs or debt, fines, restitutions and expenses (including but not limited + * to attorney’s fees and costs incident to establishing the right of indemnification) arising out of or related to use + * of the SOFTWARE outside of the scope of this Agreement, or not in compliance with its terms. + * + * 4. PRE-RELEASE. SOFTWARE versions identified as alpha, beta, preview, early access or otherwise as pre-release may + * not be fully functional, may contain errors or design flaws, and may have reduced or different security, privacy, + * availability, and reliability standards relative to commercial versions of NVIDIA software and materials. You may use + * a pre-release SOFTWARE version at your own risk, understanding that these versions are not intended for use in + * production or business-critical systems. + * + * 5. OWNERSHIP. The SOFTWARE and the related intellectual property rights therein are and will remain the sole and + * exclusive property of NVIDIA or its licensors. The SOFTWARE is copyrighted and protected by the laws of the United + * States and other countries, and international treaty provisions. NVIDIA may make changes to the SOFTWARE, at any time + * without notice, but is not obligated to support or update the SOFTWARE. + * + * 6. COMPONENTS UNDER OTHER LICENSES. The SOFTWARE may include NVIDIA or third-party components with separate legal + * notices or terms as may be described in proprietary notices accompanying the SOFTWARE. If and to the extent there is + * a conflict between the terms in this license and the license terms associated with a component, the license terms + * associated with the components control only to the extent necessary to resolve the conflict. + * + * 7. FEEDBACK. You may, but don’t have to, provide to NVIDIA any Feedback. “Feedback” means any suggestions, bug fixes, + * enhancements, modifications, feature requests or other feedback regarding the SOFTWARE. For any Feedback that you + * voluntarily provide, you hereby grant NVIDIA and its affiliates a perpetual, non-exclusive, worldwide, irrevocable + * license to use, reproduce, modify, license, sublicense (through multiple tiers of sublicensees), and distribute + * (through multiple tiers of distributors) the Feedback without the payment of any royalties or fees to you. NVIDIA + * will use Feedback at its choice. + * + * 8. NO WARRANTIES. THE SOFTWARE IS PROVIDED "AS IS" WITHOUT ANY EXPRESS OR IMPLIED WARRANTY OF ANY KIND INCLUDING, BUT + * NOT LIMITED TO, WARRANTIES OF MERCHANTABILITY, NONINFRINGEMENT, OR FITNESS FOR A PARTICULAR PURPOSE. NVIDIA DOES NOT + * WARRANT THAT THE SOFTWARE WILL MEET YOUR REQUIREMENTS OR THAT THE OPERATION THEREOF WILL BE UNINTERRUPTED OR + * ERROR-FREE, OR THAT ALL ERRORS WILL BE CORRECTED. + * + * 9. LIMITATIONS OF LIABILITY. TO THE MAXIMUM EXTENT PERMITTED BY LAW, NVIDIA AND ITS AFFILIATES SHALL NOT BE LIABLE + * FOR ANY SPECIAL, INCIDENTAL, PUNITIVE OR CONSEQUENTIAL DAMAGES, OR ANY LOST PROFITS, PROJECT DELAYS, LOSS OF USE, + * LOSS OF DATA OR LOSS OF GOODWILL, OR THE COSTS OF PROCURING SUBSTITUTE PRODUCTS, ARISING OUT OF OR IN CONNECTION WITH + * THIS LICENSE OR THE USE OR PERFORMANCE OF THE SOFTWARE, WHETHER SUCH LIABILITY ARISES FROM ANY CLAIM BASED UPON + * BREACH OF CONTRACT, BREACH OF WARRANTY, TORT (INCLUDING NEGLIGENCE), PRODUCT LIABILITY OR ANY OTHER CAUSE OF ACTION + * OR THEORY OF LIABILITY, EVEN IF NVIDIA HAS PREVIOUSLY BEEN ADVISED OF, OR COULD REASONABLY HAVE FORESEEN, THE + * POSSIBILITY OF SUCH DAMAGES. IN NO EVENT WILL NVIDIA’S AND ITS AFFILIATES TOTAL CUMULATIVE LIABILITY UNDER OR ARISING + * OUT OF THIS LICENSE EXCEED US$10.00. THE NATURE OF THE LIABILITY OR THE NUMBER OF CLAIMS OR SUITS SHALL NOT ENLARGE + * OR EXTEND THIS LIMIT. + * + * 10. TERMINATION. Your rights under this license will terminate automatically without notice from NVIDIA if you fail + * to comply with any term and condition of this license or if you commence or participate in any legal proceeding + * against NVIDIA with respect to the SOFTWARE. NVIDIA may terminate this license with advance written notice to you if + * NVIDIA decides to no longer provide the SOFTWARE in a country or, in NVIDIA’s sole discretion, the continued use of + * it is no longer commercially viable. Upon any termination of this license, you agree to promptly discontinue use of + * the SOFTWARE and destroy all copies in your possession or control. Your prior distributions in accordance with this + * license are not affected by the termination of this license. All provisions of this license will survive termination, + * except for the license granted to you. + * + * 11. APPLICABLE LAW. This license will be governed in all respects by the laws of the United States and of the State + * of Delaware as those laws are applied to contracts entered into and performed entirely within Delaware by Delaware + * residents, without regard to the conflicts of laws principles. The United Nations Convention on Contracts for the + * International Sale of Goods is specifically disclaimed. You agree to all terms of this Agreement in the English + * language. The state or federal courts residing in Santa Clara County, California shall have exclusive jurisdiction + * over any dispute or claim arising out of this license. Notwithstanding this, you agree that NVIDIA shall still be + * allowed to apply for injunctive remedies or an equivalent type of urgent legal relief in any jurisdiction. + * + * 12. NO ASSIGNMENT. This license and your rights and obligations thereunder may not be assigned by you by any means or + * operation of law without NVIDIA’s permission. Any attempted assignment not approved by NVIDIA in writing shall be + * void and of no effect. + * + * 13. EXPORT. The SOFTWARE is subject to United States export laws and regulations. You agree that you will not ship, + * transfer or export the SOFTWARE into any country, or use the SOFTWARE in any manner, prohibited by the United States + * Bureau of Industry and Security or economic sanctions regulations administered by the U.S. Department of Treasury’s + * Office of Foreign Assets Control (OFAC), or any applicable export laws, restrictions or regulations. These laws + * include restrictions on destinations, end users and end use. By accepting this license, you confirm that you are not + * a resident or citizen of any country currently embargoed by the U.S. and that you are not otherwise prohibited from + * receiving the SOFTWARE. + * + * 14. GOVERNMENT USE. The SOFTWARE has been developed entirely at private expense and is “commercial items” consisting + * of “commercial computer software” and “commercial computer software documentation” provided with RESTRICTED RIGHTS. + * Use, duplication or disclosure by the U.S. Government or a U.S. Government subcontractor is subject to the + * restrictions in this license pursuant to DFARS 227.7202-3(a) or as set forth in subparagraphs (b)(1) and (2) of the + * Commercial Computer Software - Restricted Rights clause at FAR 52.227-19, as applicable. Contractor/manufacturer is + * NVIDIA, 2788 San Tomas Expressway, Santa Clara, CA 95051. + * + * 15. ENTIRE AGREEMENT. This license is the final, complete and exclusive agreement between the parties relating to the + * subject matter of this license and supersedes all prior or contemporaneous understandings and agreements relating to + * this subject matter, whether oral or written. If any court of competent jurisdiction determines that any provision of + * this license is illegal, invalid or unenforceable, the remaining provisions will remain in full force and effect. + * This license may only be modified in a writing signed by an authorized representative of each party. * * (v. August 20, 2021) */ @@ -65,101 +143,83 @@ # pragma system_header #endif // no system header -#include +#include +#include #include +#include #include -#include -#include _LIBCUDACXX_BEGIN_NAMESPACE_CUDA namespace __detail { template -class __return_type_wrapper { - private: +class __return_type_wrapper +{ +private: _DecayFn __fn_; - public: +public: __return_type_wrapper() = delete; template , _DecayFn>::value>> - _LIBCUDACXX_INLINE_VISIBILITY _CCCL_CONSTEXPR_CXX14 - explicit __return_type_wrapper(_Fn &&__fn) noexcept - : __fn_(_CUDA_VSTD::forward<_Fn>(__fn)) {} + _LIBCUDACXX_INLINE_VISIBILITY _CCCL_CONSTEXPR_CXX14 explicit __return_type_wrapper(_Fn&& __fn) noexcept + : __fn_(_CUDA_VSTD::forward<_Fn>(__fn)) + {} template - _LIBCUDACXX_INLINE_VISIBILITY _CCCL_CONSTEXPR_CXX14 - _Ret operator()(_As&&... __as) & noexcept { + _LIBCUDACXX_INLINE_VISIBILITY _CCCL_CONSTEXPR_CXX14 _Ret operator()(_As&&... __as) & noexcept + { #if !defined(__NVCC__) || defined(__CUDA_ARCH__) - static_assert( - _CUDA_VSTD::is_same< - _Ret, - typename _CUDA_VSTD::__invoke_of<_DecayFn&, _As...>::type - >::value, - "Return type shall match the proclaimed one exactly"); + static_assert(_CUDA_VSTD::is_same<_Ret, typename _CUDA_VSTD::__invoke_of<_DecayFn&, _As...>::type>::value, + "Return type shall match the proclaimed one exactly"); #endif return _CUDA_VSTD::__invoke(__fn_, _CUDA_VSTD::forward<_As>(__as)...); } template - _LIBCUDACXX_INLINE_VISIBILITY _CCCL_CONSTEXPR_CXX14 - _Ret operator()(_As&&... __as) && noexcept { + _LIBCUDACXX_INLINE_VISIBILITY _CCCL_CONSTEXPR_CXX14 _Ret operator()(_As&&... __as) && noexcept + { #if !defined(__NVCC__) || defined(__CUDA_ARCH__) - static_assert( - _CUDA_VSTD::is_same< - _Ret, - typename _CUDA_VSTD::__invoke_of<_DecayFn, _As...>::type - >::value, - "Return type shall match the proclaimed one exactly"); + static_assert(_CUDA_VSTD::is_same<_Ret, typename _CUDA_VSTD::__invoke_of<_DecayFn, _As...>::type>::value, + "Return type shall match the proclaimed one exactly"); #endif - return _CUDA_VSTD::__invoke(_CUDA_VSTD::move(__fn_), - _CUDA_VSTD::forward<_As>(__as)...); + return _CUDA_VSTD::__invoke(_CUDA_VSTD::move(__fn_), _CUDA_VSTD::forward<_As>(__as)...); } template - _LIBCUDACXX_INLINE_VISIBILITY _CCCL_CONSTEXPR_CXX14 - _Ret operator()(_As&&... __as) const& noexcept { + _LIBCUDACXX_INLINE_VISIBILITY _CCCL_CONSTEXPR_CXX14 _Ret operator()(_As&&... __as) const& noexcept + { #if !defined(__NVCC__) || defined(__CUDA_ARCH__) - static_assert( - _CUDA_VSTD::is_same< - _Ret, - typename _CUDA_VSTD::__invoke_of::type - >::value, - "Return type shall match the proclaimed one exactly"); + static_assert(_CUDA_VSTD::is_same<_Ret, typename _CUDA_VSTD::__invoke_of::type>::value, + "Return type shall match the proclaimed one exactly"); #endif return _CUDA_VSTD::__invoke(__fn_, _CUDA_VSTD::forward<_As>(__as)...); } template - _LIBCUDACXX_INLINE_VISIBILITY _CCCL_CONSTEXPR_CXX14 - _Ret operator()(_As&&... __as) const&& noexcept { + _LIBCUDACXX_INLINE_VISIBILITY _CCCL_CONSTEXPR_CXX14 _Ret operator()(_As&&... __as) const&& noexcept + { #if !defined(__NVCC__) || defined(__CUDA_ARCH__) - static_assert( - _CUDA_VSTD::is_same< - _Ret, - typename _CUDA_VSTD::__invoke_of::type - >::value, - "Return type shall match the proclaimed one exactly"); + static_assert(_CUDA_VSTD::is_same<_Ret, typename _CUDA_VSTD::__invoke_of::type>::value, + "Return type shall match the proclaimed one exactly"); #endif - return _CUDA_VSTD::__invoke(_CUDA_VSTD::move(__fn_), - _CUDA_VSTD::forward<_As>(__as)...); + return _CUDA_VSTD::__invoke(_CUDA_VSTD::move(__fn_), _CUDA_VSTD::forward<_As>(__as)...); } }; -} // __detail +} // namespace __detail template -inline _LIBCUDACXX_INLINE_VISIBILITY -__detail::__return_type_wrapper<_Ret, _CUDA_VSTD::__decay_t<_Fn>> -proclaim_return_type(_Fn&& __fn) noexcept { - return __detail::__return_type_wrapper<_Ret, _CUDA_VSTD::__decay_t<_Fn>>( - _CUDA_VSTD::forward<_Fn>(__fn)); +inline _LIBCUDACXX_INLINE_VISIBILITY __detail::__return_type_wrapper<_Ret, _CUDA_VSTD::__decay_t<_Fn>> +proclaim_return_type(_Fn&& __fn) noexcept +{ + return __detail::__return_type_wrapper<_Ret, _CUDA_VSTD::__decay_t<_Fn>>(_CUDA_VSTD::forward<_Fn>(__fn)); } _LIBCUDACXX_END_NAMESPACE_CUDA diff --git a/libcudacxx/include/cuda/memory_resource b/libcudacxx/include/cuda/memory_resource index a138995aa5f..4c23140f8db 100644 --- a/libcudacxx/include/cuda/memory_resource +++ b/libcudacxx/include/cuda/memory_resource @@ -80,17 +80,18 @@ class resource_ref { */ // clang-format on -# include // cuda_runtime_api needs to come first - -# include "__cccl_config" - -# if defined(_CCCL_IMPLICIT_SYSTEM_HEADER_GCC) -# pragma GCC system_header -# elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_CLANG) -# pragma clang system_header -# elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_MSVC) -# pragma system_header -# endif // no system header +#include +// cuda_runtime_api needs to come first + +#include + +#if defined(_CCCL_IMPLICIT_SYSTEM_HEADER_GCC) +# pragma GCC system_header +#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_CLANG) +# pragma clang system_header +#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_MSVC) +# pragma system_header +#endif // no system header #include #include diff --git a/libcudacxx/include/cuda/pipeline b/libcudacxx/include/cuda/pipeline index 509dfd65cbe..583a6fb6c72 100644 --- a/libcudacxx/include/cuda/pipeline +++ b/libcudacxx/include/cuda/pipeline @@ -3,50 +3,128 @@ * * NVIDIA SOFTWARE LICENSE * - * This license is a legal agreement between you and NVIDIA Corporation ("NVIDIA") and governs your use of the NVIDIA/CUDA C++ Library software and materials provided hereunder (“SOFTWARE”). + * This license is a legal agreement between you and NVIDIA Corporation ("NVIDIA") and governs your use of the + * NVIDIA/CUDA C++ Library software and materials provided hereunder (“SOFTWARE”). * - * This license can be accepted only by an adult of legal age of majority in the country in which the SOFTWARE is used. If you are under the legal age of majority, you must ask your parent or legal guardian to consent to this license. By taking delivery of the SOFTWARE, you affirm that you have reached the legal age of majority, you accept the terms of this license, and you take legal and financial responsibility for the actions of your permitted users. + * This license can be accepted only by an adult of legal age of majority in the country in which the SOFTWARE is used. + * If you are under the legal age of majority, you must ask your parent or legal guardian to consent to this license. By + * taking delivery of the SOFTWARE, you affirm that you have reached the legal age of majority, you accept the terms of + * this license, and you take legal and financial responsibility for the actions of your permitted users. * - * You agree to use the SOFTWARE only for purposes that are permitted by (a) this license, and (b) any applicable law, regulation or generally accepted practices or guidelines in the relevant jurisdictions. + * You agree to use the SOFTWARE only for purposes that are permitted by (a) this license, and (b) any applicable law, + * regulation or generally accepted practices or guidelines in the relevant jurisdictions. * - * 1. LICENSE. Subject to the terms of this license, NVIDIA grants you a non-exclusive limited license to: (a) install and use the SOFTWARE, and (b) distribute the SOFTWARE subject to the distribution requirements described in this license. NVIDIA reserves all rights, title and interest in and to the SOFTWARE not expressly granted to you under this license. + * 1. LICENSE. Subject to the terms of this license, NVIDIA grants you a non-exclusive limited license to: (a) install + * and use the SOFTWARE, and (b) distribute the SOFTWARE subject to the distribution requirements described in this + * license. NVIDIA reserves all rights, title and interest in and to the SOFTWARE not expressly granted to you under + * this license. * * 2. DISTRIBUTION REQUIREMENTS. These are the distribution requirements for you to exercise the distribution grant: - * a. The terms under which you distribute the SOFTWARE must be consistent with the terms of this license, including (without limitation) terms relating to the license grant and license restrictions and protection of NVIDIA’s intellectual property rights. - * b. You agree to notify NVIDIA in writing of any known or suspected distribution or use of the SOFTWARE not in compliance with the requirements of this license, and to enforce the terms of your agreements with respect to distributed SOFTWARE. + * a. The terms under which you distribute the SOFTWARE must be consistent with the terms of this license, + * including (without limitation) terms relating to the license grant and license restrictions and protection of + * NVIDIA’s intellectual property rights. b. You agree to notify NVIDIA in writing of any known or suspected + * distribution or use of the SOFTWARE not in compliance with the requirements of this license, and to enforce the terms + * of your agreements with respect to distributed SOFTWARE. * * 3. LIMITATIONS. Your license to use the SOFTWARE is restricted as follows: * a. The SOFTWARE is licensed for you to develop applications only for use in systems with NVIDIA GPUs. - * b. You may not reverse engineer, decompile or disassemble, or remove copyright or other proprietary notices from any portion of the SOFTWARE or copies of the SOFTWARE. - * c. You may not modify or create derivative works of any portion of the SOFTWARE. - * d. You may not bypass, disable, or circumvent any technical measure, encryption, security, digital rights management or authentication mechanism in the SOFTWARE. - * e. You may not use the SOFTWARE in any manner that would cause it to become subject to an open source software license. As examples, licenses that require as a condition of use, modification, and/or distribution that the SOFTWARE be (i) disclosed or distributed in source code form; (ii) licensed for the purpose of making derivative works; or (iii) redistributable at no charge. - * f. Unless you have an agreement with NVIDIA for this purpose, you may not use the SOFTWARE with any system or application where the use or failure of the system or application can reasonably be expected to threaten or result in personal injury, death, or catastrophic loss. Examples include use in avionics, navigation, military, medical, life support or other life critical applications. NVIDIA does not design, test or manufacture the SOFTWARE for these critical uses and NVIDIA shall not be liable to you or any third party, in whole or in part, for any claims or damages arising from such uses. - * g. You agree to defend, indemnify and hold harmless NVIDIA and its affiliates, and their respective employees, contractors, agents, officers and directors, from and against any and all claims, damages, obligations, losses, liabilities, costs or debt, fines, restitutions and expenses (including but not limited to attorney’s fees and costs incident to establishing the right of indemnification) arising out of or related to use of the SOFTWARE outside of the scope of this Agreement, or not in compliance with its terms. + * b. You may not reverse engineer, decompile or disassemble, or remove copyright or other proprietary notices from + * any portion of the SOFTWARE or copies of the SOFTWARE. c. You may not modify or create derivative works of any + * portion of the SOFTWARE. d. You may not bypass, disable, or circumvent any technical measure, encryption, + * security, digital rights management or authentication mechanism in the SOFTWARE. e. You may not use the SOFTWARE + * in any manner that would cause it to become subject to an open source software license. As examples, licenses that + * require as a condition of use, modification, and/or distribution that the SOFTWARE be (i) disclosed or distributed in + * source code form; (ii) licensed for the purpose of making derivative works; or (iii) redistributable at no charge. f. + * Unless you have an agreement with NVIDIA for this purpose, you may not use the SOFTWARE with any system or + * application where the use or failure of the system or application can reasonably be expected to threaten or result in + * personal injury, death, or catastrophic loss. Examples include use in avionics, navigation, military, medical, life + * support or other life critical applications. NVIDIA does not design, test or manufacture the SOFTWARE for these + * critical uses and NVIDIA shall not be liable to you or any third party, in whole or in part, for any claims or + * damages arising from such uses. g. You agree to defend, indemnify and hold harmless NVIDIA and its affiliates, + * and their respective employees, contractors, agents, officers and directors, from and against any and all claims, + * damages, obligations, losses, liabilities, costs or debt, fines, restitutions and expenses (including but not limited + * to attorney’s fees and costs incident to establishing the right of indemnification) arising out of or related to use + * of the SOFTWARE outside of the scope of this Agreement, or not in compliance with its terms. * - * 4. PRE-RELEASE. SOFTWARE versions identified as alpha, beta, preview, early access or otherwise as pre-release may not be fully functional, may contain errors or design flaws, and may have reduced or different security, privacy, availability, and reliability standards relative to commercial versions of NVIDIA software and materials. You may use a pre-release SOFTWARE version at your own risk, understanding that these versions are not intended for use in production or business-critical systems. + * 4. PRE-RELEASE. SOFTWARE versions identified as alpha, beta, preview, early access or otherwise as pre-release may + * not be fully functional, may contain errors or design flaws, and may have reduced or different security, privacy, + * availability, and reliability standards relative to commercial versions of NVIDIA software and materials. You may use + * a pre-release SOFTWARE version at your own risk, understanding that these versions are not intended for use in + * production or business-critical systems. * - * 5. OWNERSHIP. The SOFTWARE and the related intellectual property rights therein are and will remain the sole and exclusive property of NVIDIA or its licensors. The SOFTWARE is copyrighted and protected by the laws of the United States and other countries, and international treaty provisions. NVIDIA may make changes to the SOFTWARE, at any time without notice, but is not obligated to support or update the SOFTWARE. + * 5. OWNERSHIP. The SOFTWARE and the related intellectual property rights therein are and will remain the sole and + * exclusive property of NVIDIA or its licensors. The SOFTWARE is copyrighted and protected by the laws of the United + * States and other countries, and international treaty provisions. NVIDIA may make changes to the SOFTWARE, at any time + * without notice, but is not obligated to support or update the SOFTWARE. * - * 6. COMPONENTS UNDER OTHER LICENSES. The SOFTWARE may include NVIDIA or third-party components with separate legal notices or terms as may be described in proprietary notices accompanying the SOFTWARE. If and to the extent there is a conflict between the terms in this license and the license terms associated with a component, the license terms associated with the components control only to the extent necessary to resolve the conflict. + * 6. COMPONENTS UNDER OTHER LICENSES. The SOFTWARE may include NVIDIA or third-party components with separate legal + * notices or terms as may be described in proprietary notices accompanying the SOFTWARE. If and to the extent there is + * a conflict between the terms in this license and the license terms associated with a component, the license terms + * associated with the components control only to the extent necessary to resolve the conflict. * - * 7. FEEDBACK. You may, but don’t have to, provide to NVIDIA any Feedback. “Feedback” means any suggestions, bug fixes, enhancements, modifications, feature requests or other feedback regarding the SOFTWARE. For any Feedback that you voluntarily provide, you hereby grant NVIDIA and its affiliates a perpetual, non-exclusive, worldwide, irrevocable license to use, reproduce, modify, license, sublicense (through multiple tiers of sublicensees), and distribute (through multiple tiers of distributors) the Feedback without the payment of any royalties or fees to you. NVIDIA will use Feedback at its choice. + * 7. FEEDBACK. You may, but don’t have to, provide to NVIDIA any Feedback. “Feedback” means any suggestions, bug fixes, + * enhancements, modifications, feature requests or other feedback regarding the SOFTWARE. For any Feedback that you + * voluntarily provide, you hereby grant NVIDIA and its affiliates a perpetual, non-exclusive, worldwide, irrevocable + * license to use, reproduce, modify, license, sublicense (through multiple tiers of sublicensees), and distribute + * (through multiple tiers of distributors) the Feedback without the payment of any royalties or fees to you. NVIDIA + * will use Feedback at its choice. * - * 8. NO WARRANTIES. THE SOFTWARE IS PROVIDED "AS IS" WITHOUT ANY EXPRESS OR IMPLIED WARRANTY OF ANY KIND INCLUDING, BUT NOT LIMITED TO, WARRANTIES OF MERCHANTABILITY, NONINFRINGEMENT, OR FITNESS FOR A PARTICULAR PURPOSE. NVIDIA DOES NOT WARRANT THAT THE SOFTWARE WILL MEET YOUR REQUIREMENTS OR THAT THE OPERATION THEREOF WILL BE UNINTERRUPTED OR ERROR-FREE, OR THAT ALL ERRORS WILL BE CORRECTED. + * 8. NO WARRANTIES. THE SOFTWARE IS PROVIDED "AS IS" WITHOUT ANY EXPRESS OR IMPLIED WARRANTY OF ANY KIND INCLUDING, BUT + * NOT LIMITED TO, WARRANTIES OF MERCHANTABILITY, NONINFRINGEMENT, OR FITNESS FOR A PARTICULAR PURPOSE. NVIDIA DOES NOT + * WARRANT THAT THE SOFTWARE WILL MEET YOUR REQUIREMENTS OR THAT THE OPERATION THEREOF WILL BE UNINTERRUPTED OR + * ERROR-FREE, OR THAT ALL ERRORS WILL BE CORRECTED. * - * 9. LIMITATIONS OF LIABILITY. TO THE MAXIMUM EXTENT PERMITTED BY LAW, NVIDIA AND ITS AFFILIATES SHALL NOT BE LIABLE FOR ANY SPECIAL, INCIDENTAL, PUNITIVE OR CONSEQUENTIAL DAMAGES, OR ANY LOST PROFITS, PROJECT DELAYS, LOSS OF USE, LOSS OF DATA OR LOSS OF GOODWILL, OR THE COSTS OF PROCURING SUBSTITUTE PRODUCTS, ARISING OUT OF OR IN CONNECTION WITH THIS LICENSE OR THE USE OR PERFORMANCE OF THE SOFTWARE, WHETHER SUCH LIABILITY ARISES FROM ANY CLAIM BASED UPON BREACH OF CONTRACT, BREACH OF WARRANTY, TORT (INCLUDING NEGLIGENCE), PRODUCT LIABILITY OR ANY OTHER CAUSE OF ACTION OR THEORY OF LIABILITY, EVEN IF NVIDIA HAS PREVIOUSLY BEEN ADVISED OF, OR COULD REASONABLY HAVE FORESEEN, THE POSSIBILITY OF SUCH DAMAGES. IN NO EVENT WILL NVIDIA’S AND ITS AFFILIATES TOTAL CUMULATIVE LIABILITY UNDER OR ARISING OUT OF THIS LICENSE EXCEED US$10.00. THE NATURE OF THE LIABILITY OR THE NUMBER OF CLAIMS OR SUITS SHALL NOT ENLARGE OR EXTEND THIS LIMIT. + * 9. LIMITATIONS OF LIABILITY. TO THE MAXIMUM EXTENT PERMITTED BY LAW, NVIDIA AND ITS AFFILIATES SHALL NOT BE LIABLE + * FOR ANY SPECIAL, INCIDENTAL, PUNITIVE OR CONSEQUENTIAL DAMAGES, OR ANY LOST PROFITS, PROJECT DELAYS, LOSS OF USE, + * LOSS OF DATA OR LOSS OF GOODWILL, OR THE COSTS OF PROCURING SUBSTITUTE PRODUCTS, ARISING OUT OF OR IN CONNECTION WITH + * THIS LICENSE OR THE USE OR PERFORMANCE OF THE SOFTWARE, WHETHER SUCH LIABILITY ARISES FROM ANY CLAIM BASED UPON + * BREACH OF CONTRACT, BREACH OF WARRANTY, TORT (INCLUDING NEGLIGENCE), PRODUCT LIABILITY OR ANY OTHER CAUSE OF ACTION + * OR THEORY OF LIABILITY, EVEN IF NVIDIA HAS PREVIOUSLY BEEN ADVISED OF, OR COULD REASONABLY HAVE FORESEEN, THE + * POSSIBILITY OF SUCH DAMAGES. IN NO EVENT WILL NVIDIA’S AND ITS AFFILIATES TOTAL CUMULATIVE LIABILITY UNDER OR ARISING + * OUT OF THIS LICENSE EXCEED US$10.00. THE NATURE OF THE LIABILITY OR THE NUMBER OF CLAIMS OR SUITS SHALL NOT ENLARGE + * OR EXTEND THIS LIMIT. * - * 10. TERMINATION. Your rights under this license will terminate automatically without notice from NVIDIA if you fail to comply with any term and condition of this license or if you commence or participate in any legal proceeding against NVIDIA with respect to the SOFTWARE. NVIDIA may terminate this license with advance written notice to you if NVIDIA decides to no longer provide the SOFTWARE in a country or, in NVIDIA’s sole discretion, the continued use of it is no longer commercially viable. Upon any termination of this license, you agree to promptly discontinue use of the SOFTWARE and destroy all copies in your possession or control. Your prior distributions in accordance with this license are not affected by the termination of this license. All provisions of this license will survive termination, except for the license granted to you. + * 10. TERMINATION. Your rights under this license will terminate automatically without notice from NVIDIA if you fail + * to comply with any term and condition of this license or if you commence or participate in any legal proceeding + * against NVIDIA with respect to the SOFTWARE. NVIDIA may terminate this license with advance written notice to you if + * NVIDIA decides to no longer provide the SOFTWARE in a country or, in NVIDIA’s sole discretion, the continued use of + * it is no longer commercially viable. Upon any termination of this license, you agree to promptly discontinue use of + * the SOFTWARE and destroy all copies in your possession or control. Your prior distributions in accordance with this + * license are not affected by the termination of this license. All provisions of this license will survive termination, + * except for the license granted to you. * - * 11. APPLICABLE LAW. This license will be governed in all respects by the laws of the United States and of the State of Delaware as those laws are applied to contracts entered into and performed entirely within Delaware by Delaware residents, without regard to the conflicts of laws principles. The United Nations Convention on Contracts for the International Sale of Goods is specifically disclaimed. You agree to all terms of this Agreement in the English language. The state or federal courts residing in Santa Clara County, California shall have exclusive jurisdiction over any dispute or claim arising out of this license. Notwithstanding this, you agree that NVIDIA shall still be allowed to apply for injunctive remedies or an equivalent type of urgent legal relief in any jurisdiction. + * 11. APPLICABLE LAW. This license will be governed in all respects by the laws of the United States and of the State + * of Delaware as those laws are applied to contracts entered into and performed entirely within Delaware by Delaware + * residents, without regard to the conflicts of laws principles. The United Nations Convention on Contracts for the + * International Sale of Goods is specifically disclaimed. You agree to all terms of this Agreement in the English + * language. The state or federal courts residing in Santa Clara County, California shall have exclusive jurisdiction + * over any dispute or claim arising out of this license. Notwithstanding this, you agree that NVIDIA shall still be + * allowed to apply for injunctive remedies or an equivalent type of urgent legal relief in any jurisdiction. * - * 12. NO ASSIGNMENT. This license and your rights and obligations thereunder may not be assigned by you by any means or operation of law without NVIDIA’s permission. Any attempted assignment not approved by NVIDIA in writing shall be void and of no effect. + * 12. NO ASSIGNMENT. This license and your rights and obligations thereunder may not be assigned by you by any means or + * operation of law without NVIDIA’s permission. Any attempted assignment not approved by NVIDIA in writing shall be + * void and of no effect. * - * 13. EXPORT. The SOFTWARE is subject to United States export laws and regulations. You agree that you will not ship, transfer or export the SOFTWARE into any country, or use the SOFTWARE in any manner, prohibited by the United States Bureau of Industry and Security or economic sanctions regulations administered by the U.S. Department of Treasury’s Office of Foreign Assets Control (OFAC), or any applicable export laws, restrictions or regulations. These laws include restrictions on destinations, end users and end use. By accepting this license, you confirm that you are not a resident or citizen of any country currently embargoed by the U.S. and that you are not otherwise prohibited from receiving the SOFTWARE. + * 13. EXPORT. The SOFTWARE is subject to United States export laws and regulations. You agree that you will not ship, + * transfer or export the SOFTWARE into any country, or use the SOFTWARE in any manner, prohibited by the United States + * Bureau of Industry and Security or economic sanctions regulations administered by the U.S. Department of Treasury’s + * Office of Foreign Assets Control (OFAC), or any applicable export laws, restrictions or regulations. These laws + * include restrictions on destinations, end users and end use. By accepting this license, you confirm that you are not + * a resident or citizen of any country currently embargoed by the U.S. and that you are not otherwise prohibited from + * receiving the SOFTWARE. * - * 14. GOVERNMENT USE. The SOFTWARE has been developed entirely at private expense and is “commercial items” consisting of “commercial computer software” and “commercial computer software documentation” provided with RESTRICTED RIGHTS. Use, duplication or disclosure by the U.S. Government or a U.S. Government subcontractor is subject to the restrictions in this license pursuant to DFARS 227.7202-3(a) or as set forth in subparagraphs (b)(1) and (2) of the Commercial Computer Software - Restricted Rights clause at FAR 52.227-19, as applicable. Contractor/manufacturer is NVIDIA, 2788 San Tomas Expressway, Santa Clara, CA 95051. + * 14. GOVERNMENT USE. The SOFTWARE has been developed entirely at private expense and is “commercial items” consisting + * of “commercial computer software” and “commercial computer software documentation” provided with RESTRICTED RIGHTS. + * Use, duplication or disclosure by the U.S. Government or a U.S. Government subcontractor is subject to the + * restrictions in this license pursuant to DFARS 227.7202-3(a) or as set forth in subparagraphs (b)(1) and (2) of the + * Commercial Computer Software - Restricted Rights clause at FAR 52.227-19, as applicable. Contractor/manufacturer is + * NVIDIA, 2788 San Tomas Expressway, Santa Clara, CA 95051. * - * 15. ENTIRE AGREEMENT. This license is the final, complete and exclusive agreement between the parties relating to the subject matter of this license and supersedes all prior or contemporaneous understandings and agreements relating to this subject matter, whether oral or written. If any court of competent jurisdiction determines that any provision of this license is illegal, invalid or unenforceable, the remaining provisions will remain in full force and effect. This license may only be modified in a writing signed by an authorized representative of each party. + * 15. ENTIRE AGREEMENT. This license is the final, complete and exclusive agreement between the parties relating to the + * subject matter of this license and supersedes all prior or contemporaneous understandings and agreements relating to + * this subject matter, whether oral or written. If any court of competent jurisdiction determines that any provision of + * this license is illegal, invalid or unenforceable, the remaining provisions will remain in full force and effect. + * This license may only be modified in a writing signed by an authorized representative of each party. * * (v. August 20, 2021) */ @@ -63,532 +141,563 @@ # pragma system_header #endif // no system header -#include #include +#include #include _LIBCUDACXX_BEGIN_NAMESPACE_CUDA - // Forward declaration in barrier of pipeline - enum class pipeline_role { - producer, - consumer - }; - - template - struct __pipeline_stage { - barrier<_Scope> __produced; - barrier<_Scope> __consumed; - }; - - template - class pipeline_shared_state { - public: - pipeline_shared_state() = default; - pipeline_shared_state(const pipeline_shared_state &) = delete; - pipeline_shared_state(pipeline_shared_state &&) = delete; - pipeline_shared_state & operator=(pipeline_shared_state &&) = delete; - pipeline_shared_state & operator=(const pipeline_shared_state &) = delete; - - private: - __pipeline_stage<_Scope> __stages[_Stages_count]; - atomic __refcount; - - template - friend class pipeline; - - template - friend _LIBCUDACXX_INLINE_VISIBILITY - pipeline<_Pipeline_scope> make_pipeline(const _Group & __group, pipeline_shared_state<_Pipeline_scope, _Pipeline_stages_count> * __shared_state); - - template - friend _LIBCUDACXX_INLINE_VISIBILITY - pipeline<_Pipeline_scope> make_pipeline(const _Group & __group, pipeline_shared_state<_Pipeline_scope, _Pipeline_stages_count> * __shared_state, size_t __producer_count); - - template - friend _LIBCUDACXX_INLINE_VISIBILITY - pipeline<_Pipeline_scope> make_pipeline(const _Group & __group, pipeline_shared_state<_Pipeline_scope, _Pipeline_stages_count> * __shared_state, pipeline_role __role); - }; - - struct __pipeline_asm_helper { - _CCCL_DEVICE - static inline uint32_t __lane_id() - { - NV_IF_ELSE_TARGET( - NV_IS_DEVICE, - ( - uint32_t __lane_id; - asm volatile ("mov.u32 %0, %%laneid;" : "=r"(__lane_id)); - return __lane_id; - ), - ( - return 0; - ) - ) - } - }; - - template - class pipeline { - public: - pipeline(pipeline &&) = default; - pipeline(const pipeline &) = delete; - pipeline & operator=(pipeline &&) = delete; - pipeline & operator=(const pipeline &) = delete; - - _LIBCUDACXX_INLINE_VISIBILITY - ~pipeline() - { - if (__active) { - (void)quit(); - } - } - - _LIBCUDACXX_INLINE_VISIBILITY - bool quit() - { - bool __elected; - uint32_t __sub_count; -NV_IF_TARGET(NV_IS_DEVICE, - const uint32_t __match_mask = __match_any_sync(__activemask(), reinterpret_cast(__shared_state_get_refcount())); - const uint32_t __elected_id = __ffs(__match_mask) - 1; - __elected = (__pipeline_asm_helper::__lane_id() == __elected_id); - __sub_count = __popc(__match_mask); -, - __elected = true; - __sub_count = 1; -) - bool __released = false; - if (__elected) { - const uint32_t __old = __shared_state_get_refcount()->fetch_sub(__sub_count); - const bool __last = (__old == __sub_count); - if (__last) { - for (uint8_t __stage = 0; __stage < __stages_count; ++__stage) { - __shared_state_get_stage(__stage)->__produced.~barrier(); - __shared_state_get_stage(__stage)->__consumed.~barrier(); - } - __released = true; - } - } - __active = false; - return __released; - } - - _LIBCUDACXX_INLINE_VISIBILITY - void producer_acquire() - { - barrier<_Scope> & __stage_barrier = __shared_state_get_stage(__head)->__consumed; - __stage_barrier.wait_parity(__consumed_phase_parity); - } - - _LIBCUDACXX_INLINE_VISIBILITY - void producer_commit() - { - barrier<_Scope> & __stage_barrier = __shared_state_get_stage(__head)->__produced; - (void)__memcpy_completion_impl::__defer(__completion_mechanism::__async_group, __single_thread_group{}, 0, __stage_barrier); - (void)__stage_barrier.arrive(); - if (++__head == __stages_count) { - __head = 0; - __consumed_phase_parity = !__consumed_phase_parity; - } - } - - _LIBCUDACXX_INLINE_VISIBILITY - void consumer_wait() - { - barrier<_Scope> & __stage_barrier = __shared_state_get_stage(__tail)->__produced; - __stage_barrier.wait_parity(__produced_phase_parity); - } - - _LIBCUDACXX_INLINE_VISIBILITY - void consumer_release() - { - (void)__shared_state_get_stage(__tail)->__consumed.arrive(); - if (++__tail == __stages_count) { - __tail = 0; - __produced_phase_parity = !__produced_phase_parity; - } - } - - template - _LIBCUDACXX_INLINE_VISIBILITY - bool consumer_wait_for(const _CUDA_VSTD::chrono::duration<_Rep, _Period> & __duration) - { - barrier<_Scope> & __stage_barrier = __shared_state_get_stage(__tail)->__produced; - return _CUDA_VSTD::__libcpp_thread_poll_with_backoff( - _CUDA_VSTD::__barrier_poll_tester_parity>( - &__stage_barrier, - __produced_phase_parity), - _CUDA_VSTD::chrono::duration_cast<_CUDA_VSTD::chrono::nanoseconds>(__duration) - ); - } - - template - _LIBCUDACXX_INLINE_VISIBILITY - bool consumer_wait_until(const _CUDA_VSTD::chrono::time_point<_Clock, _Duration> & __time_point) - { - return consumer_wait_for(__time_point - _Clock::now()); - } - - private: - uint8_t __head : 8; - uint8_t __tail : 8; - const uint8_t __stages_count : 8; - bool __consumed_phase_parity : 1; - bool __produced_phase_parity : 1; - bool __active : 1; - // TODO: Remove partitioned on next ABI break - const bool __partitioned : 1; - char * const __shared_state; - - - _LIBCUDACXX_INLINE_VISIBILITY - pipeline(char * __shared_state, uint8_t __stages_count, bool __partitioned) - : __head(0) - , __tail(0) - , __stages_count(__stages_count) - , __consumed_phase_parity(true) - , __produced_phase_parity(false) - , __active(true) - , __partitioned(__partitioned) - , __shared_state(__shared_state) - {} - - _LIBCUDACXX_INLINE_VISIBILITY - __pipeline_stage<_Scope> * __shared_state_get_stage(uint8_t __stage) - { - ptrdiff_t __stage_offset = __stage * sizeof(__pipeline_stage<_Scope>); - return reinterpret_cast<__pipeline_stage<_Scope>*>(__shared_state + __stage_offset); - } - - _LIBCUDACXX_INLINE_VISIBILITY - atomic * __shared_state_get_refcount() +// Forward declaration in barrier of pipeline +enum class pipeline_role +{ + producer, + consumer +}; + +template +struct __pipeline_stage +{ + barrier<_Scope> __produced; + barrier<_Scope> __consumed; +}; + +template +class pipeline_shared_state +{ +public: + pipeline_shared_state() = default; + pipeline_shared_state(const pipeline_shared_state&) = delete; + pipeline_shared_state(pipeline_shared_state&&) = delete; + pipeline_shared_state& operator=(pipeline_shared_state&&) = delete; + pipeline_shared_state& operator=(const pipeline_shared_state&) = delete; + +private: + __pipeline_stage<_Scope> __stages[_Stages_count]; + atomic __refcount; + + template + friend class pipeline; + + template + friend _LIBCUDACXX_INLINE_VISIBILITY pipeline<_Pipeline_scope> + make_pipeline(const _Group& __group, pipeline_shared_state<_Pipeline_scope, _Pipeline_stages_count>* __shared_state); + + template + friend _LIBCUDACXX_INLINE_VISIBILITY pipeline<_Pipeline_scope> + make_pipeline(const _Group& __group, + pipeline_shared_state<_Pipeline_scope, _Pipeline_stages_count>* __shared_state, + size_t __producer_count); + + template + friend _LIBCUDACXX_INLINE_VISIBILITY pipeline<_Pipeline_scope> + make_pipeline(const _Group& __group, + pipeline_shared_state<_Pipeline_scope, _Pipeline_stages_count>* __shared_state, + pipeline_role __role); +}; + +struct __pipeline_asm_helper +{ + _CCCL_DEVICE static inline uint32_t __lane_id() + { + NV_IF_ELSE_TARGET( + NV_IS_DEVICE, + (uint32_t __lane_id; asm volatile("mov.u32 %0, %%laneid;" + : "=r"(__lane_id)); + return __lane_id;), + (return 0;)) + } +}; + +template +class pipeline +{ +public: + pipeline(pipeline&&) = default; + pipeline(const pipeline&) = delete; + pipeline& operator=(pipeline&&) = delete; + pipeline& operator=(const pipeline&) = delete; + + _LIBCUDACXX_INLINE_VISIBILITY ~pipeline() + { + if (__active) + { + (void) quit(); + } + } + + _LIBCUDACXX_INLINE_VISIBILITY bool quit() + { + bool __elected; + uint32_t __sub_count; + NV_IF_TARGET( + NV_IS_DEVICE, + const uint32_t __match_mask = + __match_any_sync(__activemask(), reinterpret_cast(__shared_state_get_refcount())); + const uint32_t __elected_id = __ffs(__match_mask) - 1; + __elected = (__pipeline_asm_helper::__lane_id() == __elected_id); + __sub_count = __popc(__match_mask); + , __elected = true; + __sub_count = 1;) + bool __released = false; + if (__elected) + { + const uint32_t __old = __shared_state_get_refcount()->fetch_sub(__sub_count); + const bool __last = (__old == __sub_count); + if (__last) + { + for (uint8_t __stage = 0; __stage < __stages_count; ++__stage) { - ptrdiff_t __refcount_offset = __stages_count * sizeof(__pipeline_stage<_Scope>); - return reinterpret_cast*>(__shared_state + __refcount_offset); + __shared_state_get_stage(__stage)->__produced.~barrier(); + __shared_state_get_stage(__stage)->__consumed.~barrier(); } - - template - friend _LIBCUDACXX_INLINE_VISIBILITY - pipeline<_Pipeline_scope> make_pipeline(const _Group & __group, pipeline_shared_state<_Pipeline_scope, _Pipeline_stages_count> * __shared_state); - - template - friend _LIBCUDACXX_INLINE_VISIBILITY - pipeline<_Pipeline_scope> make_pipeline(const _Group & __group, pipeline_shared_state<_Pipeline_scope, _Pipeline_stages_count> * __shared_state, size_t __producer_count); - - template - friend _LIBCUDACXX_INLINE_VISIBILITY - pipeline<_Pipeline_scope> make_pipeline(const _Group & __group, pipeline_shared_state<_Pipeline_scope, _Pipeline_stages_count> * __shared_state, pipeline_role __role); - }; - - template - _LIBCUDACXX_INLINE_VISIBILITY - pipeline<_Scope> make_pipeline(const _Group & __group, pipeline_shared_state<_Scope, _Stages_count> * __shared_state) + __released = true; + } + } + __active = false; + return __released; + } + + _LIBCUDACXX_INLINE_VISIBILITY void producer_acquire() + { + barrier<_Scope>& __stage_barrier = __shared_state_get_stage(__head)->__consumed; + __stage_barrier.wait_parity(__consumed_phase_parity); + } + + _LIBCUDACXX_INLINE_VISIBILITY void producer_commit() + { + barrier<_Scope>& __stage_barrier = __shared_state_get_stage(__head)->__produced; + (void) __memcpy_completion_impl::__defer( + __completion_mechanism::__async_group, __single_thread_group{}, 0, __stage_barrier); + (void) __stage_barrier.arrive(); + if (++__head == __stages_count) { - const uint32_t __group_size = static_cast(__group.size()); - const uint32_t __thread_rank = static_cast(__group.thread_rank()); - - if (__thread_rank == 0) { - for (uint8_t __stage = 0; __stage < _Stages_count; ++__stage) { - init(&__shared_state->__stages[__stage].__consumed, __group_size); - init(&__shared_state->__stages[__stage].__produced, __group_size); - } - __shared_state->__refcount.store(__group_size, std::memory_order_relaxed); - } - __group.sync(); - - return pipeline<_Scope>(reinterpret_cast(__shared_state->__stages), _Stages_count, false); + __head = 0; + __consumed_phase_parity = !__consumed_phase_parity; } - - template - _LIBCUDACXX_INLINE_VISIBILITY - pipeline<_Scope> make_pipeline(const _Group & __group, pipeline_shared_state<_Scope, _Stages_count> * __shared_state, size_t __producer_count) + } + + _LIBCUDACXX_INLINE_VISIBILITY void consumer_wait() + { + barrier<_Scope>& __stage_barrier = __shared_state_get_stage(__tail)->__produced; + __stage_barrier.wait_parity(__produced_phase_parity); + } + + _LIBCUDACXX_INLINE_VISIBILITY void consumer_release() + { + (void) __shared_state_get_stage(__tail)->__consumed.arrive(); + if (++__tail == __stages_count) { - const uint32_t __group_size = static_cast(__group.size()); - const uint32_t __thread_rank = static_cast(__group.thread_rank()); - - if (__thread_rank == 0) { - const size_t __consumer_count = __group_size - __producer_count; - for (uint8_t __stage = 0; __stage < _Stages_count; ++__stage) { - init(&__shared_state->__stages[__stage].__consumed, __consumer_count); - init(&__shared_state->__stages[__stage].__produced, __producer_count); - } - __shared_state->__refcount.store(__group_size, std::memory_order_relaxed); - } - __group.sync(); - - return pipeline<_Scope>(reinterpret_cast(__shared_state->__stages), _Stages_count, true); + __tail = 0; + __produced_phase_parity = !__produced_phase_parity; } - - template - _LIBCUDACXX_INLINE_VISIBILITY - pipeline<_Scope> make_pipeline(const _Group & __group, pipeline_shared_state<_Scope, _Stages_count> * __shared_state, pipeline_role __role) + } + + template + _LIBCUDACXX_INLINE_VISIBILITY bool consumer_wait_for(const _CUDA_VSTD::chrono::duration<_Rep, _Period>& __duration) + { + barrier<_Scope>& __stage_barrier = __shared_state_get_stage(__tail)->__produced; + return _CUDA_VSTD::__libcpp_thread_poll_with_backoff( + _CUDA_VSTD::__barrier_poll_tester_parity>(&__stage_barrier, __produced_phase_parity), + _CUDA_VSTD::chrono::duration_cast<_CUDA_VSTD::chrono::nanoseconds>(__duration)); + } + + template + _LIBCUDACXX_INLINE_VISIBILITY bool + consumer_wait_until(const _CUDA_VSTD::chrono::time_point<_Clock, _Duration>& __time_point) + { + return consumer_wait_for(__time_point - _Clock::now()); + } + +private: + uint8_t __head : 8; + uint8_t __tail : 8; + const uint8_t __stages_count : 8; + bool __consumed_phase_parity : 1; + bool __produced_phase_parity : 1; + bool __active : 1; + // TODO: Remove partitioned on next ABI break + const bool __partitioned : 1; + char* const __shared_state; + + _LIBCUDACXX_INLINE_VISIBILITY pipeline(char* __shared_state, uint8_t __stages_count, bool __partitioned) + : __head(0) + , __tail(0) + , __stages_count(__stages_count) + , __consumed_phase_parity(true) + , __produced_phase_parity(false) + , __active(true) + , __partitioned(__partitioned) + , __shared_state(__shared_state) + {} + + _LIBCUDACXX_INLINE_VISIBILITY __pipeline_stage<_Scope>* __shared_state_get_stage(uint8_t __stage) + { + ptrdiff_t __stage_offset = __stage * sizeof(__pipeline_stage<_Scope>); + return reinterpret_cast<__pipeline_stage<_Scope>*>(__shared_state + __stage_offset); + } + + _LIBCUDACXX_INLINE_VISIBILITY atomic* __shared_state_get_refcount() + { + ptrdiff_t __refcount_offset = __stages_count * sizeof(__pipeline_stage<_Scope>); + return reinterpret_cast*>(__shared_state + __refcount_offset); + } + + template + friend _LIBCUDACXX_INLINE_VISIBILITY pipeline<_Pipeline_scope> + make_pipeline(const _Group& __group, pipeline_shared_state<_Pipeline_scope, _Pipeline_stages_count>* __shared_state); + + template + friend _LIBCUDACXX_INLINE_VISIBILITY pipeline<_Pipeline_scope> + make_pipeline(const _Group& __group, + pipeline_shared_state<_Pipeline_scope, _Pipeline_stages_count>* __shared_state, + size_t __producer_count); + + template + friend _LIBCUDACXX_INLINE_VISIBILITY pipeline<_Pipeline_scope> + make_pipeline(const _Group& __group, + pipeline_shared_state<_Pipeline_scope, _Pipeline_stages_count>* __shared_state, + pipeline_role __role); +}; + +template +_LIBCUDACXX_INLINE_VISIBILITY pipeline<_Scope> +make_pipeline(const _Group& __group, pipeline_shared_state<_Scope, _Stages_count>* __shared_state) +{ + const uint32_t __group_size = static_cast(__group.size()); + const uint32_t __thread_rank = static_cast(__group.thread_rank()); + + if (__thread_rank == 0) + { + for (uint8_t __stage = 0; __stage < _Stages_count; ++__stage) { - const uint32_t __group_size = static_cast(__group.size()); - const uint32_t __thread_rank = static_cast(__group.thread_rank()); - - if (__thread_rank == 0) { - __shared_state->__refcount.store(0, std::memory_order_relaxed); - } - __group.sync(); - - if (__role == pipeline_role::producer) { - bool __elected; - uint32_t __add_count; -NV_IF_TARGET(NV_IS_DEVICE, - const uint32_t __match_mask = __match_any_sync(__activemask(), reinterpret_cast(&__shared_state->__refcount)); - const uint32_t __elected_id = __ffs(__match_mask) - 1; - __elected = (__pipeline_asm_helper::__lane_id() == __elected_id); - __add_count = __popc(__match_mask); -, - __elected = true; - __add_count = 1; -) - if (__elected) { - (void)__shared_state->__refcount.fetch_add(__add_count, std::memory_order_relaxed); - } - } - __group.sync(); - - if (__thread_rank == 0) { - const uint32_t __producer_count = __shared_state->__refcount.load(std::memory_order_relaxed); - const uint32_t __consumer_count = __group_size - __producer_count; - for (uint8_t __stage = 0; __stage < _Stages_count; ++__stage) { - init(&__shared_state->__stages[__stage].__consumed, __consumer_count); - init(&__shared_state->__stages[__stage].__produced, __producer_count); - } - __shared_state->__refcount.store(__group_size, std::memory_order_relaxed); - } - __group.sync(); - - return pipeline<_Scope>(reinterpret_cast(__shared_state->__stages), _Stages_count, true); + init(&__shared_state->__stages[__stage].__consumed, __group_size); + init(&__shared_state->__stages[__stage].__produced, __group_size); + } + __shared_state->__refcount.store(__group_size, std::memory_order_relaxed); + } + __group.sync(); + + return pipeline<_Scope>(reinterpret_cast(__shared_state->__stages), _Stages_count, false); +} + +template +_LIBCUDACXX_INLINE_VISIBILITY pipeline<_Scope> make_pipeline( + const _Group& __group, pipeline_shared_state<_Scope, _Stages_count>* __shared_state, size_t __producer_count) +{ + const uint32_t __group_size = static_cast(__group.size()); + const uint32_t __thread_rank = static_cast(__group.thread_rank()); + + if (__thread_rank == 0) + { + const size_t __consumer_count = __group_size - __producer_count; + for (uint8_t __stage = 0; __stage < _Stages_count; ++__stage) + { + init(&__shared_state->__stages[__stage].__consumed, __consumer_count); + init(&__shared_state->__stages[__stage].__produced, __producer_count); + } + __shared_state->__refcount.store(__group_size, std::memory_order_relaxed); + } + __group.sync(); + + return pipeline<_Scope>(reinterpret_cast(__shared_state->__stages), _Stages_count, true); +} + +template +_LIBCUDACXX_INLINE_VISIBILITY pipeline<_Scope> +make_pipeline(const _Group& __group, pipeline_shared_state<_Scope, _Stages_count>* __shared_state, pipeline_role __role) +{ + const uint32_t __group_size = static_cast(__group.size()); + const uint32_t __thread_rank = static_cast(__group.thread_rank()); + + if (__thread_rank == 0) + { + __shared_state->__refcount.store(0, std::memory_order_relaxed); + } + __group.sync(); + + if (__role == pipeline_role::producer) + { + bool __elected; + uint32_t __add_count; + NV_IF_TARGET( + NV_IS_DEVICE, + const uint32_t __match_mask = + __match_any_sync(__activemask(), reinterpret_cast(&__shared_state->__refcount)); + const uint32_t __elected_id = __ffs(__match_mask) - 1; + __elected = (__pipeline_asm_helper::__lane_id() == __elected_id); + __add_count = __popc(__match_mask); + , __elected = true; + __add_count = 1;) + if (__elected) + { + (void) __shared_state->__refcount.fetch_add(__add_count, std::memory_order_relaxed); } + } + __group.sync(); + + if (__thread_rank == 0) + { + const uint32_t __producer_count = __shared_state->__refcount.load(std::memory_order_relaxed); + const uint32_t __consumer_count = __group_size - __producer_count; + for (uint8_t __stage = 0; __stage < _Stages_count; ++__stage) + { + init(&__shared_state->__stages[__stage].__consumed, __consumer_count); + init(&__shared_state->__stages[__stage].__produced, __producer_count); + } + __shared_state->__refcount.store(__group_size, std::memory_order_relaxed); + } + __group.sync(); + + return pipeline<_Scope>(reinterpret_cast(__shared_state->__stages), _Stages_count, true); +} _LIBCUDACXX_END_NAMESPACE_CUDA _LIBCUDACXX_BEGIN_NAMESPACE_CUDA_DEVICE - template - _CCCL_DEVICE - void __pipeline_consumer_wait(pipeline & __pipeline); +template +_CCCL_DEVICE void __pipeline_consumer_wait(pipeline& __pipeline); - _CCCL_DEVICE - inline void __pipeline_consumer_wait(pipeline & __pipeline, uint8_t __prior); +_CCCL_DEVICE inline void __pipeline_consumer_wait(pipeline& __pipeline, uint8_t __prior); _LIBCUDACXX_END_NAMESPACE_CUDA_DEVICE _LIBCUDACXX_BEGIN_NAMESPACE_CUDA - template<> - class pipeline { - public: - pipeline(pipeline &&) = default; - pipeline(const pipeline &) = delete; - pipeline & operator=(pipeline &&) = delete; - pipeline & operator=(const pipeline &) = delete; - - _LIBCUDACXX_INLINE_VISIBILITY - ~pipeline() {} - - _LIBCUDACXX_INLINE_VISIBILITY - bool quit() - { - return true; - } - - _LIBCUDACXX_INLINE_VISIBILITY - void producer_acquire() {} - - _LIBCUDACXX_INLINE_VISIBILITY - void producer_commit() - { -NV_IF_TARGET(NV_PROVIDES_SM_80, - asm volatile ("cp.async.commit_group;"); - ++__head; -) - } - - _LIBCUDACXX_INLINE_VISIBILITY - void consumer_wait() - { -NV_IF_TARGET(NV_PROVIDES_SM_80, - if (__head == __tail) { - return; - } - - const uint8_t __prior = __head - __tail - 1; - device::__pipeline_consumer_wait(*this, __prior); - ++__tail; -) - } - - _LIBCUDACXX_INLINE_VISIBILITY - void consumer_release() {} - - template - _LIBCUDACXX_INLINE_VISIBILITY - bool consumer_wait_for(const _CUDA_VSTD::chrono::duration<_Rep, _Period> & __duration) - { - (void)__duration; - consumer_wait(); - return true; - } - - template - _LIBCUDACXX_INLINE_VISIBILITY - bool consumer_wait_until(const _CUDA_VSTD::chrono::time_point<_Clock, _Duration> & __time_point) - { - (void)__time_point; - consumer_wait(); - return true; - } - - private: - uint8_t __head; - uint8_t __tail; - - _LIBCUDACXX_INLINE_VISIBILITY - pipeline() - : __head(0) - , __tail(0) - {} - - friend _LIBCUDACXX_INLINE_VISIBILITY inline pipeline make_pipeline(); - - template - friend _LIBCUDACXX_INLINE_VISIBILITY - void pipeline_consumer_wait_prior(pipeline & __pipeline); - - template - friend _LIBCUDACXX_INLINE_VISIBILITY - pipeline<_Pipeline_scope> __make_pipeline(const _Group & __group, pipeline_shared_state<_Pipeline_scope, _Pipeline_stages_count> * __shared_state); - }; +template <> +class pipeline +{ +public: + pipeline(pipeline&&) = default; + pipeline(const pipeline&) = delete; + pipeline& operator=(pipeline&&) = delete; + pipeline& operator=(const pipeline&) = delete; + + _LIBCUDACXX_INLINE_VISIBILITY ~pipeline() {} + + _LIBCUDACXX_INLINE_VISIBILITY bool quit() + { + return true; + } + + _LIBCUDACXX_INLINE_VISIBILITY void producer_acquire() {} + + _LIBCUDACXX_INLINE_VISIBILITY void producer_commit() + { + NV_IF_TARGET(NV_PROVIDES_SM_80, asm volatile("cp.async.commit_group;"); ++__head;) + } + + _LIBCUDACXX_INLINE_VISIBILITY void consumer_wait() + { + NV_IF_TARGET( + NV_PROVIDES_SM_80, + if (__head == __tail) { return; } + + const uint8_t __prior = __head - __tail - 1; + device::__pipeline_consumer_wait(*this, __prior); + ++__tail;) + } + + _LIBCUDACXX_INLINE_VISIBILITY void consumer_release() {} + + template + _LIBCUDACXX_INLINE_VISIBILITY bool consumer_wait_for(const _CUDA_VSTD::chrono::duration<_Rep, _Period>& __duration) + { + (void) __duration; + consumer_wait(); + return true; + } + + template + _LIBCUDACXX_INLINE_VISIBILITY bool + consumer_wait_until(const _CUDA_VSTD::chrono::time_point<_Clock, _Duration>& __time_point) + { + (void) __time_point; + consumer_wait(); + return true; + } + +private: + uint8_t __head; + uint8_t __tail; + + _LIBCUDACXX_INLINE_VISIBILITY pipeline() + : __head(0) + , __tail(0) + {} + + friend _LIBCUDACXX_INLINE_VISIBILITY inline pipeline make_pipeline(); + + template + friend _LIBCUDACXX_INLINE_VISIBILITY void pipeline_consumer_wait_prior(pipeline& __pipeline); + + template + friend _LIBCUDACXX_INLINE_VISIBILITY pipeline<_Pipeline_scope> __make_pipeline( + const _Group& __group, pipeline_shared_state<_Pipeline_scope, _Pipeline_stages_count>* __shared_state); +}; _LIBCUDACXX_END_NAMESPACE_CUDA _LIBCUDACXX_BEGIN_NAMESPACE_CUDA_DEVICE - template - _CCCL_DEVICE - void __pipeline_consumer_wait(pipeline & __pipeline) - { - (void)__pipeline; -NV_IF_TARGET(NV_PROVIDES_SM_80, - constexpr uint8_t __max_prior = 8; - - asm volatile ("cp.async.wait_group %0;" - : - : "n"(_Prior < __max_prior ? _Prior : __max_prior)); -) - } - - _CCCL_DEVICE - inline void __pipeline_consumer_wait(pipeline & __pipeline, uint8_t __prior) - { - switch (__prior) { - case 0: device::__pipeline_consumer_wait<0>(__pipeline); break; - case 1: device::__pipeline_consumer_wait<1>(__pipeline); break; - case 2: device::__pipeline_consumer_wait<2>(__pipeline); break; - case 3: device::__pipeline_consumer_wait<3>(__pipeline); break; - case 4: device::__pipeline_consumer_wait<4>(__pipeline); break; - case 5: device::__pipeline_consumer_wait<5>(__pipeline); break; - case 6: device::__pipeline_consumer_wait<6>(__pipeline); break; - case 7: device::__pipeline_consumer_wait<7>(__pipeline); break; - default: device::__pipeline_consumer_wait<8>(__pipeline); break; - } - } +template +_CCCL_DEVICE void __pipeline_consumer_wait(pipeline& __pipeline) +{ + (void) __pipeline; + NV_IF_TARGET(NV_PROVIDES_SM_80, constexpr uint8_t __max_prior = 8; + + asm volatile("cp.async.wait_group %0;" + : + : "n"(_Prior < __max_prior ? _Prior : __max_prior));) +} + +_CCCL_DEVICE inline void __pipeline_consumer_wait(pipeline& __pipeline, uint8_t __prior) +{ + switch (__prior) + { + case 0: + device::__pipeline_consumer_wait<0>(__pipeline); + break; + case 1: + device::__pipeline_consumer_wait<1>(__pipeline); + break; + case 2: + device::__pipeline_consumer_wait<2>(__pipeline); + break; + case 3: + device::__pipeline_consumer_wait<3>(__pipeline); + break; + case 4: + device::__pipeline_consumer_wait<4>(__pipeline); + break; + case 5: + device::__pipeline_consumer_wait<5>(__pipeline); + break; + case 6: + device::__pipeline_consumer_wait<6>(__pipeline); + break; + case 7: + device::__pipeline_consumer_wait<7>(__pipeline); + break; + default: + device::__pipeline_consumer_wait<8>(__pipeline); + break; + } +} _LIBCUDACXX_END_NAMESPACE_CUDA_DEVICE _LIBCUDACXX_BEGIN_NAMESPACE_CUDA - _LIBCUDACXX_INLINE_VISIBILITY - inline pipeline make_pipeline() - { - return pipeline(); - } - - template - _LIBCUDACXX_INLINE_VISIBILITY - void pipeline_consumer_wait_prior(pipeline & __pipeline) - { - NV_IF_TARGET(NV_PROVIDES_SM_80, - device::__pipeline_consumer_wait<_Prior>(__pipeline); - __pipeline.__tail = __pipeline.__head - _Prior; - ) - } - - template - _LIBCUDACXX_INLINE_VISIBILITY - void pipeline_producer_commit(pipeline & __pipeline, barrier<_Scope> & __barrier) - { - (void)__pipeline; - NV_IF_TARGET(NV_PROVIDES_SM_80,( - (void)__memcpy_completion_impl::__defer(__completion_mechanism::__async_group, __single_thread_group{}, 0, __barrier); - )); - } - - template - _LIBCUDACXX_INLINE_VISIBILITY - async_contract_fulfillment __memcpy_async_pipeline(_Group const & __group, _Tp * __destination, _Tp const * __source, _Size __size, pipeline<_Scope> & __pipeline) { - // 1. Set the completion mechanisms that can be used. - // - // Do not (yet) allow async_bulk_group completion. Do not allow - // mbarrier_complete_tx completion, even though it may be possible if - // the pipeline has stage barriers in shared memory. - _CUDA_VSTD::uint32_t __allowed_completions = _CUDA_VSTD::uint32_t(__completion_mechanism::__async_group); - - // Alignment: Use the maximum of the alignment of _Tp and that of a possible cuda::aligned_size_t. - constexpr _CUDA_VSTD::size_t __size_align = __get_size_align<_Size>::align; - constexpr _CUDA_VSTD::size_t __align = (alignof(_Tp) < __size_align) ? __size_align : alignof(_Tp); - // Cast to char pointers. We don't need the type for alignment anymore and - // erasing the types reduces the number of instantiations of down-stream - // functions. - char * __dest_char = reinterpret_cast(__destination); - char const * __src_char = reinterpret_cast(__source); - - // 2. Issue actual copy instructions. - auto __cm = __dispatch_memcpy_async<__align>(__group, __dest_char, __src_char, __size, __allowed_completions); - - // 3. No need to synchronize with copy instructions. - return __memcpy_completion_impl::__defer(__cm, __group, __size, __pipeline); - } - - template - _LIBCUDACXX_INLINE_VISIBILITY - async_contract_fulfillment memcpy_async(_Group const & __group, _Type * __destination, _Type const * __source, std::size_t __size, pipeline<_Scope> & __pipeline) { - return __memcpy_async_pipeline(__group, __destination, __source, __size, __pipeline); - } - - template _Alignment) ? alignof(_Type) : _Alignment> - _LIBCUDACXX_INLINE_VISIBILITY - async_contract_fulfillment memcpy_async(_Group const & __group, _Type * __destination, _Type const * __source, aligned_size_t<_Alignment> __size, pipeline<_Scope> & __pipeline) { - return __memcpy_async_pipeline(__group, __destination, __source, __size, __pipeline); - } - - template - _LIBCUDACXX_INLINE_VISIBILITY - async_contract_fulfillment memcpy_async(_Type * __destination, _Type const * __source, _Size __size, pipeline<_Scope> & __pipeline) { - return __memcpy_async_pipeline(__single_thread_group{}, __destination, __source, __size, __pipeline); - } - - template - _LIBCUDACXX_INLINE_VISIBILITY - async_contract_fulfillment memcpy_async(_Group const & __group, void * __destination, void const * __source, std::size_t __size, pipeline<_Scope> & __pipeline) { - return __memcpy_async_pipeline(__group, reinterpret_cast(__destination), reinterpret_cast(__source), __size, __pipeline); - } - - template - _LIBCUDACXX_INLINE_VISIBILITY - async_contract_fulfillment memcpy_async(_Group const & __group, void * __destination, void const * __source, aligned_size_t<_Alignment> __size, pipeline<_Scope> & __pipeline) { - return __memcpy_async_pipeline(__group, reinterpret_cast(__destination), reinterpret_cast(__source), __size, __pipeline); - } - - template - _LIBCUDACXX_INLINE_VISIBILITY - async_contract_fulfillment memcpy_async(void * __destination, void const * __source, _Size __size, pipeline<_Scope> & __pipeline) { - return __memcpy_async_pipeline(__single_thread_group{}, reinterpret_cast(__destination), reinterpret_cast(__source), __size, __pipeline); - } +_LIBCUDACXX_INLINE_VISIBILITY inline pipeline make_pipeline() +{ + return pipeline(); +} + +template +_LIBCUDACXX_INLINE_VISIBILITY void pipeline_consumer_wait_prior(pipeline& __pipeline) +{ + NV_IF_TARGET(NV_PROVIDES_SM_80, device::__pipeline_consumer_wait<_Prior>(__pipeline); + __pipeline.__tail = __pipeline.__head - _Prior;) +} + +template +_LIBCUDACXX_INLINE_VISIBILITY void +pipeline_producer_commit(pipeline& __pipeline, barrier<_Scope>& __barrier) +{ + (void) __pipeline; + NV_IF_TARGET(NV_PROVIDES_SM_80, + ((void) __memcpy_completion_impl::__defer( + __completion_mechanism::__async_group, __single_thread_group{}, 0, __barrier);)); +} + +template +_LIBCUDACXX_INLINE_VISIBILITY async_contract_fulfillment __memcpy_async_pipeline( + _Group const& __group, _Tp* __destination, _Tp const* __source, _Size __size, pipeline<_Scope>& __pipeline) +{ + // 1. Set the completion mechanisms that can be used. + // + // Do not (yet) allow async_bulk_group completion. Do not allow + // mbarrier_complete_tx completion, even though it may be possible if + // the pipeline has stage barriers in shared memory. + _CUDA_VSTD::uint32_t __allowed_completions = _CUDA_VSTD::uint32_t(__completion_mechanism::__async_group); + + // Alignment: Use the maximum of the alignment of _Tp and that of a possible cuda::aligned_size_t. + constexpr _CUDA_VSTD::size_t __size_align = __get_size_align<_Size>::align; + constexpr _CUDA_VSTD::size_t __align = (alignof(_Tp) < __size_align) ? __size_align : alignof(_Tp); + // Cast to char pointers. We don't need the type for alignment anymore and + // erasing the types reduces the number of instantiations of down-stream + // functions. + char* __dest_char = reinterpret_cast(__destination); + char const* __src_char = reinterpret_cast(__source); + + // 2. Issue actual copy instructions. + auto __cm = __dispatch_memcpy_async<__align>(__group, __dest_char, __src_char, __size, __allowed_completions); + + // 3. No need to synchronize with copy instructions. + return __memcpy_completion_impl::__defer(__cm, __group, __size, __pipeline); +} + +template +_LIBCUDACXX_INLINE_VISIBILITY async_contract_fulfillment memcpy_async( + _Group const& __group, _Type* __destination, _Type const* __source, std::size_t __size, pipeline<_Scope>& __pipeline) +{ + return __memcpy_async_pipeline(__group, __destination, __source, __size, __pipeline); +} + +template _Alignment) ? alignof(_Type) : _Alignment> +_LIBCUDACXX_INLINE_VISIBILITY async_contract_fulfillment memcpy_async( + _Group const& __group, + _Type* __destination, + _Type const* __source, + aligned_size_t<_Alignment> __size, + pipeline<_Scope>& __pipeline) +{ + return __memcpy_async_pipeline(__group, __destination, __source, __size, __pipeline); +} + +template +_LIBCUDACXX_INLINE_VISIBILITY async_contract_fulfillment +memcpy_async(_Type* __destination, _Type const* __source, _Size __size, pipeline<_Scope>& __pipeline) +{ + return __memcpy_async_pipeline(__single_thread_group{}, __destination, __source, __size, __pipeline); +} + +template +_LIBCUDACXX_INLINE_VISIBILITY async_contract_fulfillment memcpy_async( + _Group const& __group, void* __destination, void const* __source, std::size_t __size, pipeline<_Scope>& __pipeline) +{ + return __memcpy_async_pipeline( + __group, reinterpret_cast(__destination), reinterpret_cast(__source), __size, __pipeline); +} + +template +_LIBCUDACXX_INLINE_VISIBILITY async_contract_fulfillment memcpy_async( + _Group const& __group, + void* __destination, + void const* __source, + aligned_size_t<_Alignment> __size, + pipeline<_Scope>& __pipeline) +{ + return __memcpy_async_pipeline( + __group, reinterpret_cast(__destination), reinterpret_cast(__source), __size, __pipeline); +} + +template +_LIBCUDACXX_INLINE_VISIBILITY async_contract_fulfillment +memcpy_async(void* __destination, void const* __source, _Size __size, pipeline<_Scope>& __pipeline) +{ + return __memcpy_async_pipeline( + __single_thread_group{}, + reinterpret_cast(__destination), + reinterpret_cast(__source), + __size, + __pipeline); +} _LIBCUDACXX_END_NAMESPACE_CUDA diff --git a/libcudacxx/include/cuda/std/__algorithm_ b/libcudacxx/include/cuda/std/__algorithm_ index 91c4160a8b5..2ec4ef668af 100644 --- a/libcudacxx/include/cuda/std/__algorithm_ +++ b/libcudacxx/include/cuda/std/__algorithm_ @@ -13,10 +13,20 @@ #include +#if defined(_CCCL_IMPLICIT_SYSTEM_HEADER_GCC) +# pragma GCC system_header +#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_CLANG) +# pragma clang system_header +#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_MSVC) +# pragma system_header +#endif // no system header + +// clang-format off #include #include #include +// clang-format on #endif // _CUDA_STD_ALGORITHM diff --git a/libcudacxx/include/cuda/std/__exception_ b/libcudacxx/include/cuda/std/__exception_ index e5aedc1d49d..a54c7a1f77a 100644 --- a/libcudacxx/include/cuda/std/__exception_ +++ b/libcudacxx/include/cuda/std/__exception_ @@ -8,15 +8,25 @@ // //===----------------------------------------------------------------------===// -#ifndef _CUDA_STD_NEW -#define _CUDA_STD_NEW +#ifndef _CUDA_STD_EXCEPTION +#define _CUDA_STD_EXCEPTION -#include "detail/__config" +#include -#include "detail/__pragma_push" +#if defined(_CCCL_IMPLICIT_SYSTEM_HEADER_GCC) +# pragma GCC system_header +#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_CLANG) +# pragma clang system_header +#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_MSVC) +# pragma system_header +#endif // no system header -#include "detail/libcxx/include/exception" +// clang-format off +#include -#include "detail/__pragma_pop" +#include -#endif // _CUDA_STD_NEW +#include +// clang-format on + +#endif // _CUDA_STD_EXCEPTION diff --git a/libcudacxx/include/cuda/std/__memory_ b/libcudacxx/include/cuda/std/__memory_ index 1bff78d6773..ee2af7fdb97 100644 --- a/libcudacxx/include/cuda/std/__memory_ +++ b/libcudacxx/include/cuda/std/__memory_ @@ -11,12 +11,22 @@ #ifndef _CUDA_STD_MEMORY #define _CUDA_STD_MEMORY -#include "detail/__config" +#include -#include "detail/__pragma_push" +#if defined(_CCCL_IMPLICIT_SYSTEM_HEADER_GCC) +# pragma GCC system_header +#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_CLANG) +# pragma clang system_header +#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_MSVC) +# pragma system_header +#endif // no system header -#include "detail/libcxx/include/memory" +// clang-format off +#include -#include "detail/__pragma_pop" +#include + +#include +// clang-format on #endif // _CUDA_STD_MEMORY diff --git a/libcudacxx/include/cuda/std/__new_ b/libcudacxx/include/cuda/std/__new_ index 3e8aefcdb6f..39550ac1080 100644 --- a/libcudacxx/include/cuda/std/__new_ +++ b/libcudacxx/include/cuda/std/__new_ @@ -11,12 +11,22 @@ #ifndef _CUDA_STD_NEW #define _CUDA_STD_NEW -#include "detail/__config" +#include -#include "detail/__pragma_push" +#if defined(_CCCL_IMPLICIT_SYSTEM_HEADER_GCC) +# pragma GCC system_header +#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_CLANG) +# pragma clang system_header +#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_MSVC) +# pragma system_header +#endif // no system header -#include "detail/libcxx/include/new" +// clang-format off +#include -#include "detail/__pragma_pop" +#include + +#include +// clang-format on #endif // _CUDA_STD_NEW diff --git a/libcudacxx/include/cuda/std/array b/libcudacxx/include/cuda/std/array index f0bd5785600..8190edc899d 100644 --- a/libcudacxx/include/cuda/std/array +++ b/libcudacxx/include/cuda/std/array @@ -12,10 +12,20 @@ #include +#if defined(_CCCL_IMPLICIT_SYSTEM_HEADER_GCC) +# pragma GCC system_header +#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_CLANG) +# pragma clang system_header +#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_MSVC) +# pragma system_header +#endif // no system header + +// clang-format off #include #include #include +// clang-format on #endif // _CUDA_STD_ARRAY diff --git a/libcudacxx/include/cuda/std/atomic b/libcudacxx/include/cuda/std/atomic index 0daab5f2cb5..3ec9392334f 100644 --- a/libcudacxx/include/cuda/std/atomic +++ b/libcudacxx/include/cuda/std/atomic @@ -13,10 +13,20 @@ #include +#if defined(_CCCL_IMPLICIT_SYSTEM_HEADER_GCC) +# pragma GCC system_header +#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_CLANG) +# pragma clang system_header +#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_MSVC) +# pragma system_header +#endif // no system header + +// clang-format off #include #include #include +// clang-format on #endif // _CUDA_STD_ATOMIC diff --git a/libcudacxx/include/cuda/std/barrier b/libcudacxx/include/cuda/std/barrier index 415c3f80acf..6a29770a6fb 100644 --- a/libcudacxx/include/cuda/std/barrier +++ b/libcudacxx/include/cuda/std/barrier @@ -17,10 +17,20 @@ #include +#if defined(_CCCL_IMPLICIT_SYSTEM_HEADER_GCC) +# pragma GCC system_header +#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_CLANG) +# pragma clang system_header +#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_MSVC) +# pragma system_header +#endif // no system header + +// clang-format off #include #include #include +// clang-format on #endif // _CUDA_STD_BARRIER diff --git a/libcudacxx/include/cuda/std/bit b/libcudacxx/include/cuda/std/bit index 491b346c576..ca3e0ed6470 100644 --- a/libcudacxx/include/cuda/std/bit +++ b/libcudacxx/include/cuda/std/bit @@ -13,10 +13,20 @@ #include +#if defined(_CCCL_IMPLICIT_SYSTEM_HEADER_GCC) +# pragma GCC system_header +#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_CLANG) +# pragma clang system_header +#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_MSVC) +# pragma system_header +#endif // no system header + +// clang-format off #include #include #include +// clang-format on #endif // _CUDA_STD_BIT diff --git a/libcudacxx/include/cuda/std/cassert b/libcudacxx/include/cuda/std/cassert index af8af80e43d..5270a28dc93 100644 --- a/libcudacxx/include/cuda/std/cassert +++ b/libcudacxx/include/cuda/std/cassert @@ -13,10 +13,20 @@ #include +#if defined(_CCCL_IMPLICIT_SYSTEM_HEADER_GCC) +# pragma GCC system_header +#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_CLANG) +# pragma clang system_header +#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_MSVC) +# pragma system_header +#endif // no system header + +// clang-format off #include #include #include +// clang-format on #endif // _CUDA_STD_CASSERT diff --git a/libcudacxx/include/cuda/std/cfloat b/libcudacxx/include/cuda/std/cfloat index 31a9f8e4e61..b27bc58c561 100644 --- a/libcudacxx/include/cuda/std/cfloat +++ b/libcudacxx/include/cuda/std/cfloat @@ -13,10 +13,20 @@ #include +#if defined(_CCCL_IMPLICIT_SYSTEM_HEADER_GCC) +# pragma GCC system_header +#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_CLANG) +# pragma clang system_header +#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_MSVC) +# pragma system_header +#endif // no system header + +// clang-format off #include #include #include +// clang-format on #endif // _CUDA_STD_CFLOAT diff --git a/libcudacxx/include/cuda/std/chrono b/libcudacxx/include/cuda/std/chrono index f8d62efb4f6..5358250f98f 100644 --- a/libcudacxx/include/cuda/std/chrono +++ b/libcudacxx/include/cuda/std/chrono @@ -13,10 +13,20 @@ #include +#if defined(_CCCL_IMPLICIT_SYSTEM_HEADER_GCC) +# pragma GCC system_header +#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_CLANG) +# pragma clang system_header +#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_MSVC) +# pragma system_header +#endif // no system header + +// clang-format off #include #include #include +// clang-format on #endif // _CUDA_STD_CHRONO diff --git a/libcudacxx/include/cuda/std/climits b/libcudacxx/include/cuda/std/climits index f7934b665a9..1cb0c9625ce 100644 --- a/libcudacxx/include/cuda/std/climits +++ b/libcudacxx/include/cuda/std/climits @@ -13,10 +13,20 @@ #include +#if defined(_CCCL_IMPLICIT_SYSTEM_HEADER_GCC) +# pragma GCC system_header +#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_CLANG) +# pragma clang system_header +#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_MSVC) +# pragma system_header +#endif // no system header + +// clang-format off #include #include #include +// clang-format on #endif // _CUDA_STD_CLIMITS diff --git a/libcudacxx/include/cuda/std/cmath b/libcudacxx/include/cuda/std/cmath index a6a05ef2430..e3022b2a7ac 100644 --- a/libcudacxx/include/cuda/std/cmath +++ b/libcudacxx/include/cuda/std/cmath @@ -12,10 +12,20 @@ #include +#if defined(_CCCL_IMPLICIT_SYSTEM_HEADER_GCC) +# pragma GCC system_header +#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_CLANG) +# pragma clang system_header +#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_MSVC) +# pragma system_header +#endif // no system header + +// clang-format off #include #include #include +// clang-format on #endif // _CUDA_STD_CMATH diff --git a/libcudacxx/include/cuda/std/complex b/libcudacxx/include/cuda/std/complex index 7c8ea6b5b46..813a47e9afc 100644 --- a/libcudacxx/include/cuda/std/complex +++ b/libcudacxx/include/cuda/std/complex @@ -12,10 +12,20 @@ #include +#if defined(_CCCL_IMPLICIT_SYSTEM_HEADER_GCC) +# pragma GCC system_header +#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_CLANG) +# pragma clang system_header +#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_MSVC) +# pragma system_header +#endif // no system header + +// clang-format off #include #include #include +// clang-format on #endif // _CUDA_STD_COMPLEX diff --git a/libcudacxx/include/cuda/std/concepts b/libcudacxx/include/cuda/std/concepts index d3f9eb25dde..8807b10c1e5 100644 --- a/libcudacxx/include/cuda/std/concepts +++ b/libcudacxx/include/cuda/std/concepts @@ -12,10 +12,20 @@ #include +#if defined(_CCCL_IMPLICIT_SYSTEM_HEADER_GCC) +# pragma GCC system_header +#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_CLANG) +# pragma clang system_header +#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_MSVC) +# pragma system_header +#endif // no system header + +// clang-format off #include #include #include +// clang-format on #endif // _CUDA_STD_CONCEPTS diff --git a/libcudacxx/include/cuda/std/cstddef b/libcudacxx/include/cuda/std/cstddef index 95aae77de22..eac158b4763 100644 --- a/libcudacxx/include/cuda/std/cstddef +++ b/libcudacxx/include/cuda/std/cstddef @@ -13,10 +13,20 @@ #include +#if defined(_CCCL_IMPLICIT_SYSTEM_HEADER_GCC) +# pragma GCC system_header +#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_CLANG) +# pragma clang system_header +#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_MSVC) +# pragma system_header +#endif // no system header + +// clang-format off #include #include #include +// clang-format on #endif // _CUDA_STD_CSTDDEF diff --git a/libcudacxx/include/cuda/std/cstdint b/libcudacxx/include/cuda/std/cstdint index 22c0754e481..78c18424d24 100644 --- a/libcudacxx/include/cuda/std/cstdint +++ b/libcudacxx/include/cuda/std/cstdint @@ -13,10 +13,20 @@ #include +#if defined(_CCCL_IMPLICIT_SYSTEM_HEADER_GCC) +# pragma GCC system_header +#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_CLANG) +# pragma clang system_header +#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_MSVC) +# pragma system_header +#endif // no system header + +// clang-format off #include #include #include +// clang-format on #endif // _CUDA_STD_CSTDINT diff --git a/libcudacxx/include/cuda/std/cstdlib b/libcudacxx/include/cuda/std/cstdlib index af85815be27..ba11ff9090a 100644 --- a/libcudacxx/include/cuda/std/cstdlib +++ b/libcudacxx/include/cuda/std/cstdlib @@ -11,12 +11,22 @@ #ifndef _CUDA_STD_CSTDLIB #define _CUDA_STD_CSTDLIB -#include "detail/__config" +#include -#include "detail/__pragma_push" +#if defined(_CCCL_IMPLICIT_SYSTEM_HEADER_GCC) +# pragma GCC system_header +#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_CLANG) +# pragma clang system_header +#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_MSVC) +# pragma system_header +#endif // no system header -#include "detail/libcxx/include/cstdlib" +// clang-format off +#include -#include "detail/__pragma_pop" +#include + +#include +// clang-format on #endif // _CUDA_STD_CSTDLIB diff --git a/libcudacxx/include/cuda/std/ctime b/libcudacxx/include/cuda/std/ctime index d610c831077..38c957eba11 100644 --- a/libcudacxx/include/cuda/std/ctime +++ b/libcudacxx/include/cuda/std/ctime @@ -13,10 +13,20 @@ #include +#if defined(_CCCL_IMPLICIT_SYSTEM_HEADER_GCC) +# pragma GCC system_header +#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_CLANG) +# pragma clang system_header +#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_MSVC) +# pragma system_header +#endif // no system header + +// clang-format off #include #include #include +// clang-format on #endif // _CUDA_STD_CTIME diff --git a/libcudacxx/include/cuda/std/detail/__access_property b/libcudacxx/include/cuda/std/detail/__access_property index 7d9718503e9..c63ec342df9 100644 --- a/libcudacxx/include/cuda/std/detail/__access_property +++ b/libcudacxx/include/cuda/std/detail/__access_property @@ -3,325 +3,445 @@ * * NVIDIA SOFTWARE LICENSE * - * This license is a legal agreement between you and NVIDIA Corporation ("NVIDIA") and governs your use of the NVIDIA/CUDA C++ Library software and materials provided hereunder (“SOFTWARE”). + * This license is a legal agreement between you and NVIDIA Corporation ("NVIDIA") and governs your use of the + * NVIDIA/CUDA C++ Library software and materials provided hereunder (“SOFTWARE”). * - * This license can be accepted only by an adult of legal age of majority in the country in which the SOFTWARE is used. If you are under the legal age of majority, you must ask your parent or legal guardian to consent to this license. By taking delivery of the SOFTWARE, you affirm that you have reached the legal age of majority, you accept the terms of this license, and you take legal and financial responsibility for the actions of your permitted users. + * This license can be accepted only by an adult of legal age of majority in the country in which the SOFTWARE is used. + * If you are under the legal age of majority, you must ask your parent or legal guardian to consent to this license. By + * taking delivery of the SOFTWARE, you affirm that you have reached the legal age of majority, you accept the terms of + * this license, and you take legal and financial responsibility for the actions of your permitted users. * - * You agree to use the SOFTWARE only for purposes that are permitted by (a) this license, and (b) any applicable law, regulation or generally accepted practices or guidelines in the relevant jurisdictions. + * You agree to use the SOFTWARE only for purposes that are permitted by (a) this license, and (b) any applicable law, + * regulation or generally accepted practices or guidelines in the relevant jurisdictions. * - * 1. LICENSE. Subject to the terms of this license, NVIDIA grants you a non-exclusive limited license to: (a) install and use the SOFTWARE, and (b) distribute the SOFTWARE subject to the distribution requirements described in this license. NVIDIA reserves all rights, title and interest in and to the SOFTWARE not expressly granted to you under this license. + * 1. LICENSE. Subject to the terms of this license, NVIDIA grants you a non-exclusive limited license to: (a) install + * and use the SOFTWARE, and (b) distribute the SOFTWARE subject to the distribution requirements described in this + * license. NVIDIA reserves all rights, title and interest in and to the SOFTWARE not expressly granted to you under + * this license. * * 2. DISTRIBUTION REQUIREMENTS. These are the distribution requirements for you to exercise the distribution grant: - * a. The terms under which you distribute the SOFTWARE must be consistent with the terms of this license, including (without limitation) terms relating to the license grant and license restrictions and protection of NVIDIA’s intellectual property rights. - * b. You agree to notify NVIDIA in writing of any known or suspected distribution or use of the SOFTWARE not in compliance with the requirements of this license, and to enforce the terms of your agreements with respect to distributed SOFTWARE. + * a. The terms under which you distribute the SOFTWARE must be consistent with the terms of this license, + * including (without limitation) terms relating to the license grant and license restrictions and protection of + * NVIDIA’s intellectual property rights. b. You agree to notify NVIDIA in writing of any known or suspected + * distribution or use of the SOFTWARE not in compliance with the requirements of this license, and to enforce the terms + * of your agreements with respect to distributed SOFTWARE. * * 3. LIMITATIONS. Your license to use the SOFTWARE is restricted as follows: * a. The SOFTWARE is licensed for you to develop applications only for use in systems with NVIDIA GPUs. - * b. You may not reverse engineer, decompile or disassemble, or remove copyright or other proprietary notices from any portion of the SOFTWARE or copies of the SOFTWARE. - * c. You may not modify or create derivative works of any portion of the SOFTWARE. - * d. You may not bypass, disable, or circumvent any technical measure, encryption, security, digital rights management or authentication mechanism in the SOFTWARE. - * e. You may not use the SOFTWARE in any manner that would cause it to become subject to an open source software license. As examples, licenses that require as a condition of use, modification, and/or distribution that the SOFTWARE be (i) disclosed or distributed in source code form; (ii) licensed for the purpose of making derivative works; or (iii) redistributable at no charge. - * f. Unless you have an agreement with NVIDIA for this purpose, you may not use the SOFTWARE with any system or application where the use or failure of the system or application can reasonably be expected to threaten or result in personal injury, death, or catastrophic loss. Examples include use in avionics, navigation, military, medical, life support or other life critical applications. NVIDIA does not design, test or manufacture the SOFTWARE for these critical uses and NVIDIA shall not be liable to you or any third party, in whole or in part, for any claims or damages arising from such uses. - * g. You agree to defend, indemnify and hold harmless NVIDIA and its affiliates, and their respective employees, contractors, agents, officers and directors, from and against any and all claims, damages, obligations, losses, liabilities, costs or debt, fines, restitutions and expenses (including but not limited to attorney’s fees and costs incident to establishing the right of indemnification) arising out of or related to use of the SOFTWARE outside of the scope of this Agreement, or not in compliance with its terms. + * b. You may not reverse engineer, decompile or disassemble, or remove copyright or other proprietary notices from + * any portion of the SOFTWARE or copies of the SOFTWARE. c. You may not modify or create derivative works of any + * portion of the SOFTWARE. d. You may not bypass, disable, or circumvent any technical measure, encryption, + * security, digital rights management or authentication mechanism in the SOFTWARE. e. You may not use the SOFTWARE + * in any manner that would cause it to become subject to an open source software license. As examples, licenses that + * require as a condition of use, modification, and/or distribution that the SOFTWARE be (i) disclosed or distributed in + * source code form; (ii) licensed for the purpose of making derivative works; or (iii) redistributable at no charge. f. + * Unless you have an agreement with NVIDIA for this purpose, you may not use the SOFTWARE with any system or + * application where the use or failure of the system or application can reasonably be expected to threaten or result in + * personal injury, death, or catastrophic loss. Examples include use in avionics, navigation, military, medical, life + * support or other life critical applications. NVIDIA does not design, test or manufacture the SOFTWARE for these + * critical uses and NVIDIA shall not be liable to you or any third party, in whole or in part, for any claims or + * damages arising from such uses. g. You agree to defend, indemnify and hold harmless NVIDIA and its affiliates, + * and their respective employees, contractors, agents, officers and directors, from and against any and all claims, + * damages, obligations, losses, liabilities, costs or debt, fines, restitutions and expenses (including but not limited + * to attorney’s fees and costs incident to establishing the right of indemnification) arising out of or related to use + * of the SOFTWARE outside of the scope of this Agreement, or not in compliance with its terms. * - * 4. PRE-RELEASE. SOFTWARE versions identified as alpha, beta, preview, early access or otherwise as pre-release may not be fully functional, may contain errors or design flaws, and may have reduced or different security, privacy, availability, and reliability standards relative to commercial versions of NVIDIA software and materials. You may use a pre-release SOFTWARE version at your own risk, understanding that these versions are not intended for use in production or business-critical systems. + * 4. PRE-RELEASE. SOFTWARE versions identified as alpha, beta, preview, early access or otherwise as pre-release may + * not be fully functional, may contain errors or design flaws, and may have reduced or different security, privacy, + * availability, and reliability standards relative to commercial versions of NVIDIA software and materials. You may use + * a pre-release SOFTWARE version at your own risk, understanding that these versions are not intended for use in + * production or business-critical systems. * - * 5. OWNERSHIP. The SOFTWARE and the related intellectual property rights therein are and will remain the sole and exclusive property of NVIDIA or its licensors. The SOFTWARE is copyrighted and protected by the laws of the United States and other countries, and international treaty provisions. NVIDIA may make changes to the SOFTWARE, at any time without notice, but is not obligated to support or update the SOFTWARE. + * 5. OWNERSHIP. The SOFTWARE and the related intellectual property rights therein are and will remain the sole and + * exclusive property of NVIDIA or its licensors. The SOFTWARE is copyrighted and protected by the laws of the United + * States and other countries, and international treaty provisions. NVIDIA may make changes to the SOFTWARE, at any time + * without notice, but is not obligated to support or update the SOFTWARE. * - * 6. COMPONENTS UNDER OTHER LICENSES. The SOFTWARE may include NVIDIA or third-party components with separate legal notices or terms as may be described in proprietary notices accompanying the SOFTWARE. If and to the extent there is a conflict between the terms in this license and the license terms associated with a component, the license terms associated with the components control only to the extent necessary to resolve the conflict. + * 6. COMPONENTS UNDER OTHER LICENSES. The SOFTWARE may include NVIDIA or third-party components with separate legal + * notices or terms as may be described in proprietary notices accompanying the SOFTWARE. If and to the extent there is + * a conflict between the terms in this license and the license terms associated with a component, the license terms + * associated with the components control only to the extent necessary to resolve the conflict. * - * 7. FEEDBACK. You may, but don’t have to, provide to NVIDIA any Feedback. “Feedback” means any suggestions, bug fixes, enhancements, modifications, feature requests or other feedback regarding the SOFTWARE. For any Feedback that you voluntarily provide, you hereby grant NVIDIA and its affiliates a perpetual, non-exclusive, worldwide, irrevocable license to use, reproduce, modify, license, sublicense (through multiple tiers of sublicensees), and distribute (through multiple tiers of distributors) the Feedback without the payment of any royalties or fees to you. NVIDIA will use Feedback at its choice. + * 7. FEEDBACK. You may, but don’t have to, provide to NVIDIA any Feedback. “Feedback” means any suggestions, bug fixes, + * enhancements, modifications, feature requests or other feedback regarding the SOFTWARE. For any Feedback that you + * voluntarily provide, you hereby grant NVIDIA and its affiliates a perpetual, non-exclusive, worldwide, irrevocable + * license to use, reproduce, modify, license, sublicense (through multiple tiers of sublicensees), and distribute + * (through multiple tiers of distributors) the Feedback without the payment of any royalties or fees to you. NVIDIA + * will use Feedback at its choice. * - * 8. NO WARRANTIES. THE SOFTWARE IS PROVIDED "AS IS" WITHOUT ANY EXPRESS OR IMPLIED WARRANTY OF ANY KIND INCLUDING, BUT NOT LIMITED TO, WARRANTIES OF MERCHANTABILITY, NONINFRINGEMENT, OR FITNESS FOR A PARTICULAR PURPOSE. NVIDIA DOES NOT WARRANT THAT THE SOFTWARE WILL MEET YOUR REQUIREMENTS OR THAT THE OPERATION THEREOF WILL BE UNINTERRUPTED OR ERROR-FREE, OR THAT ALL ERRORS WILL BE CORRECTED. + * 8. NO WARRANTIES. THE SOFTWARE IS PROVIDED "AS IS" WITHOUT ANY EXPRESS OR IMPLIED WARRANTY OF ANY KIND INCLUDING, BUT + * NOT LIMITED TO, WARRANTIES OF MERCHANTABILITY, NONINFRINGEMENT, OR FITNESS FOR A PARTICULAR PURPOSE. NVIDIA DOES NOT + * WARRANT THAT THE SOFTWARE WILL MEET YOUR REQUIREMENTS OR THAT THE OPERATION THEREOF WILL BE UNINTERRUPTED OR + * ERROR-FREE, OR THAT ALL ERRORS WILL BE CORRECTED. * - * 9. LIMITATIONS OF LIABILITY. TO THE MAXIMUM EXTENT PERMITTED BY LAW, NVIDIA AND ITS AFFILIATES SHALL NOT BE LIABLE FOR ANY SPECIAL, INCIDENTAL, PUNITIVE OR CONSEQUENTIAL DAMAGES, OR ANY LOST PROFITS, PROJECT DELAYS, LOSS OF USE, LOSS OF DATA OR LOSS OF GOODWILL, OR THE COSTS OF PROCURING SUBSTITUTE PRODUCTS, ARISING OUT OF OR IN CONNECTION WITH THIS LICENSE OR THE USE OR PERFORMANCE OF THE SOFTWARE, WHETHER SUCH LIABILITY ARISES FROM ANY CLAIM BASED UPON BREACH OF CONTRACT, BREACH OF WARRANTY, TORT (INCLUDING NEGLIGENCE), PRODUCT LIABILITY OR ANY OTHER CAUSE OF ACTION OR THEORY OF LIABILITY, EVEN IF NVIDIA HAS PREVIOUSLY BEEN ADVISED OF, OR COULD REASONABLY HAVE FORESEEN, THE POSSIBILITY OF SUCH DAMAGES. IN NO EVENT WILL NVIDIA’S AND ITS AFFILIATES TOTAL CUMULATIVE LIABILITY UNDER OR ARISING OUT OF THIS LICENSE EXCEED US$10.00. THE NATURE OF THE LIABILITY OR THE NUMBER OF CLAIMS OR SUITS SHALL NOT ENLARGE OR EXTEND THIS LIMIT. + * 9. LIMITATIONS OF LIABILITY. TO THE MAXIMUM EXTENT PERMITTED BY LAW, NVIDIA AND ITS AFFILIATES SHALL NOT BE LIABLE + * FOR ANY SPECIAL, INCIDENTAL, PUNITIVE OR CONSEQUENTIAL DAMAGES, OR ANY LOST PROFITS, PROJECT DELAYS, LOSS OF USE, + * LOSS OF DATA OR LOSS OF GOODWILL, OR THE COSTS OF PROCURING SUBSTITUTE PRODUCTS, ARISING OUT OF OR IN CONNECTION WITH + * THIS LICENSE OR THE USE OR PERFORMANCE OF THE SOFTWARE, WHETHER SUCH LIABILITY ARISES FROM ANY CLAIM BASED UPON + * BREACH OF CONTRACT, BREACH OF WARRANTY, TORT (INCLUDING NEGLIGENCE), PRODUCT LIABILITY OR ANY OTHER CAUSE OF ACTION + * OR THEORY OF LIABILITY, EVEN IF NVIDIA HAS PREVIOUSLY BEEN ADVISED OF, OR COULD REASONABLY HAVE FORESEEN, THE + * POSSIBILITY OF SUCH DAMAGES. IN NO EVENT WILL NVIDIA’S AND ITS AFFILIATES TOTAL CUMULATIVE LIABILITY UNDER OR ARISING + * OUT OF THIS LICENSE EXCEED US$10.00. THE NATURE OF THE LIABILITY OR THE NUMBER OF CLAIMS OR SUITS SHALL NOT ENLARGE + * OR EXTEND THIS LIMIT. * - * 10. TERMINATION. Your rights under this license will terminate automatically without notice from NVIDIA if you fail to comply with any term and condition of this license or if you commence or participate in any legal proceeding against NVIDIA with respect to the SOFTWARE. NVIDIA may terminate this license with advance written notice to you if NVIDIA decides to no longer provide the SOFTWARE in a country or, in NVIDIA’s sole discretion, the continued use of it is no longer commercially viable. Upon any termination of this license, you agree to promptly discontinue use of the SOFTWARE and destroy all copies in your possession or control. Your prior distributions in accordance with this license are not affected by the termination of this license. All provisions of this license will survive termination, except for the license granted to you. + * 10. TERMINATION. Your rights under this license will terminate automatically without notice from NVIDIA if you fail + * to comply with any term and condition of this license or if you commence or participate in any legal proceeding + * against NVIDIA with respect to the SOFTWARE. NVIDIA may terminate this license with advance written notice to you if + * NVIDIA decides to no longer provide the SOFTWARE in a country or, in NVIDIA’s sole discretion, the continued use of + * it is no longer commercially viable. Upon any termination of this license, you agree to promptly discontinue use of + * the SOFTWARE and destroy all copies in your possession or control. Your prior distributions in accordance with this + * license are not affected by the termination of this license. All provisions of this license will survive termination, + * except for the license granted to you. * - * 11. APPLICABLE LAW. This license will be governed in all respects by the laws of the United States and of the State of Delaware as those laws are applied to contracts entered into and performed entirely within Delaware by Delaware residents, without regard to the conflicts of laws principles. The United Nations Convention on Contracts for the International Sale of Goods is specifically disclaimed. You agree to all terms of this Agreement in the English language. The state or federal courts residing in Santa Clara County, California shall have exclusive jurisdiction over any dispute or claim arising out of this license. Notwithstanding this, you agree that NVIDIA shall still be allowed to apply for injunctive remedies or an equivalent type of urgent legal relief in any jurisdiction. + * 11. APPLICABLE LAW. This license will be governed in all respects by the laws of the United States and of the State + * of Delaware as those laws are applied to contracts entered into and performed entirely within Delaware by Delaware + * residents, without regard to the conflicts of laws principles. The United Nations Convention on Contracts for the + * International Sale of Goods is specifically disclaimed. You agree to all terms of this Agreement in the English + * language. The state or federal courts residing in Santa Clara County, California shall have exclusive jurisdiction + * over any dispute or claim arising out of this license. Notwithstanding this, you agree that NVIDIA shall still be + * allowed to apply for injunctive remedies or an equivalent type of urgent legal relief in any jurisdiction. * - * 12. NO ASSIGNMENT. This license and your rights and obligations thereunder may not be assigned by you by any means or operation of law without NVIDIA’s permission. Any attempted assignment not approved by NVIDIA in writing shall be void and of no effect. + * 12. NO ASSIGNMENT. This license and your rights and obligations thereunder may not be assigned by you by any means or + * operation of law without NVIDIA’s permission. Any attempted assignment not approved by NVIDIA in writing shall be + * void and of no effect. * - * 13. EXPORT. The SOFTWARE is subject to United States export laws and regulations. You agree that you will not ship, transfer or export the SOFTWARE into any country, or use the SOFTWARE in any manner, prohibited by the United States Bureau of Industry and Security or economic sanctions regulations administered by the U.S. Department of Treasury’s Office of Foreign Assets Control (OFAC), or any applicable export laws, restrictions or regulations. These laws include restrictions on destinations, end users and end use. By accepting this license, you confirm that you are not a resident or citizen of any country currently embargoed by the U.S. and that you are not otherwise prohibited from receiving the SOFTWARE. + * 13. EXPORT. The SOFTWARE is subject to United States export laws and regulations. You agree that you will not ship, + * transfer or export the SOFTWARE into any country, or use the SOFTWARE in any manner, prohibited by the United States + * Bureau of Industry and Security or economic sanctions regulations administered by the U.S. Department of Treasury’s + * Office of Foreign Assets Control (OFAC), or any applicable export laws, restrictions or regulations. These laws + * include restrictions on destinations, end users and end use. By accepting this license, you confirm that you are not + * a resident or citizen of any country currently embargoed by the U.S. and that you are not otherwise prohibited from + * receiving the SOFTWARE. * - * 14. GOVERNMENT USE. The SOFTWARE has been developed entirely at private expense and is “commercial items” consisting of “commercial computer software” and “commercial computer software documentation” provided with RESTRICTED RIGHTS. Use, duplication or disclosure by the U.S. Government or a U.S. Government subcontractor is subject to the restrictions in this license pursuant to DFARS 227.7202-3(a) or as set forth in subparagraphs (b)(1) and (2) of the Commercial Computer Software - Restricted Rights clause at FAR 52.227-19, as applicable. Contractor/manufacturer is NVIDIA, 2788 San Tomas Expressway, Santa Clara, CA 95051. + * 14. GOVERNMENT USE. The SOFTWARE has been developed entirely at private expense and is “commercial items” consisting + * of “commercial computer software” and “commercial computer software documentation” provided with RESTRICTED RIGHTS. + * Use, duplication or disclosure by the U.S. Government or a U.S. Government subcontractor is subject to the + * restrictions in this license pursuant to DFARS 227.7202-3(a) or as set forth in subparagraphs (b)(1) and (2) of the + * Commercial Computer Software - Restricted Rights clause at FAR 52.227-19, as applicable. Contractor/manufacturer is + * NVIDIA, 2788 San Tomas Expressway, Santa Clara, CA 95051. * - * 15. ENTIRE AGREEMENT. This license is the final, complete and exclusive agreement between the parties relating to the subject matter of this license and supersedes all prior or contemporaneous understandings and agreements relating to this subject matter, whether oral or written. If any court of competent jurisdiction determines that any provision of this license is illegal, invalid or unenforceable, the remaining provisions will remain in full force and effect. This license may only be modified in a writing signed by an authorized representative of each party. + * 15. ENTIRE AGREEMENT. This license is the final, complete and exclusive agreement between the parties relating to the + * subject matter of this license and supersedes all prior or contemporaneous understandings and agreements relating to + * this subject matter, whether oral or written. If any court of competent jurisdiction determines that any provision of + * this license is illegal, invalid or unenforceable, the remaining provisions will remain in full force and effect. + * This license may only be modified in a writing signed by an authorized representative of each party. * * (v. August 20, 2021) */ _LIBCUDACXX_BEGIN_NAMESPACE_CUDA -namespace __detail_ap { +namespace __detail_ap +{ - _CCCL_HOST_DEVICE - constexpr uint32_t __ap_floor_log2(uint32_t __x) { - return (__x == 1 | __x == 0) ? 0 : 1 + __ap_floor_log2(__x >> 1); - } +_CCCL_HOST_DEVICE constexpr uint32_t __ap_floor_log2(uint32_t __x) +{ + return (__x == 1 | __x == 0) ? 0 : 1 + __ap_floor_log2(__x >> 1); +} - _CCCL_HOST_DEVICE - constexpr uint32_t __ap_ceil_log2(uint32_t __x) { - return (__x == 1 | __x == 0) ? 0 : __ap_floor_log2(__x - 1) + 1; - } +_CCCL_HOST_DEVICE constexpr uint32_t __ap_ceil_log2(uint32_t __x) +{ + return (__x == 1 | __x == 0) ? 0 : __ap_floor_log2(__x - 1) + 1; +} - _CCCL_HOST_DEVICE - constexpr uint32_t __ap_min(uint32_t __a, uint32_t __b) noexcept { - return (__a < __b) ? __a : __b; - } +_CCCL_HOST_DEVICE constexpr uint32_t __ap_min(uint32_t __a, uint32_t __b) noexcept +{ + return (__a < __b) ? __a : __b; +} - _CCCL_HOST_DEVICE - constexpr uint32_t __ap_max(uint32_t __a, uint32_t __b) noexcept { - return (__a > __b) ? __a : __b; - } +_CCCL_HOST_DEVICE constexpr uint32_t __ap_max(uint32_t __a, uint32_t __b) noexcept +{ + return (__a > __b) ? __a : __b; +} // https://gcc.gnu.org/bugzilla/show_bug.cgi?id=61414 // Specifically search for 8.4 and 9.3 and above to guarantee uint64_t enum. -#if defined(_CCCL_COMPILER_GCC) && ( \ - ((_GNUC_VER < 804)) || \ - ((_GNUC_VER < 903)) \ - ) -# define _LIBCUDACXX_AP_ENUM_TYPE_ANNOTATION +#if defined(_CCCL_COMPILER_GCC) && (((_GNUC_VER < 804)) || ((_GNUC_VER < 903))) +# define _LIBCUDACXX_AP_ENUM_TYPE_ANNOTATION #else -# define _LIBCUDACXX_AP_ENUM_TYPE_ANNOTATION : uint64_t +# define _LIBCUDACXX_AP_ENUM_TYPE_ANNOTATION : uint64_t #endif - namespace __sm_80 { - namespace __off { - enum __l2_cop_off_t _LIBCUDACXX_AP_ENUM_TYPE_ANNOTATION { - _L2_EVICT_NORMAL = 0, - _L2_EVICT_FIRST = 1, - }; - } // namespace __off - - namespace __on { - enum __l2_cop_on_t _LIBCUDACXX_AP_ENUM_TYPE_ANNOTATION { - _L2_EVICT_NORMAL = 0, - _L2_EVICT_FIRST = 1, - _L2_EVICT_LAST = 2, - _L2_EVICT_NORMAL_DEMOTE = 3, - }; - } // namespace __on - - enum __l2_descriptor_mode_t _LIBCUDACXX_AP_ENUM_TYPE_ANNOTATION { - _DESC_IMPLICIT = 0, - _DESC_INTERLEAVED = 2, - _DESC_BLOCK_TYPE = 3, - }; - - enum __l2_eviction_max_way_t _LIBCUDACXX_AP_ENUM_TYPE_ANNOTATION { - _CUDA_AMPERE_MAX_L2_WAYS = std::uint32_t{16}, - }; - - enum __block_size_t _LIBCUDACXX_AP_ENUM_TYPE_ANNOTATION { - _BLOCKSIZE_4K = 0, - _BLOCKSIZE_8K = 1, - _BLOCKSIZE_16K = 2, - _BLOCKSIZE_32K = 3, - _BLOCKSIZE_64K = 4, - _BLOCKSIZE_128K = 5, - _BLOCKSIZE_256K = 6, - _BLOCKSIZE_512K = 7, - _BLOCKSIZE_1M = 8, - _BLOCKSIZE_2M = 9, - _BLOCKSIZE_4M = 10, - _BLOCKSIZE_8M = 11, - _BLOCKSIZE_16M = 12, - _BLOCKSIZE_32M = 13, - }; - - struct __block_desc_t { - uint64_t __ap_reserved : 37; - uint64_t __block_count: 7; - uint64_t __block_start: 7; - uint64_t __ap_reserved2 : 1; - __block_size_t __block_size : 4; - __off::__l2_cop_off_t __l2_cop_off : 1; - __on::__l2_cop_on_t __l2_cop_on : 2; - __l2_descriptor_mode_t __l2_descriptor_mode : 2; - uint64_t __l1_inv_dont_allocate : 1; - uint64_t __l2_sector_promote_256B : 1; - uint64_t __ap_reserved3 : 1; - - _CCCL_HOST_DEVICE - constexpr std::uint64_t __get_descriptor_cexpr() const noexcept { - return - std::uint64_t(__ap_reserved) << 0 | - std::uint64_t(__block_count) << 37 | - std::uint64_t(__block_start) << 44 | - std::uint64_t(__ap_reserved2) << 51 | - std::uint64_t(__block_size) << 52 | - std::uint64_t(__l2_cop_off) << 56 | - std::uint64_t(__l2_cop_on) << 57 | - std::uint64_t(__l2_descriptor_mode) << 59 | - std::uint64_t(__l1_inv_dont_allocate) << 61 | - std::uint64_t(__l2_sector_promote_256B) << 62 | - std::uint64_t(__ap_reserved3) << 63; - } - - inline - _CCCL_HOST_DEVICE - std::uint64_t __get_descriptor_non_cexpr() const noexcept { return *reinterpret_cast(this); } - - _CCCL_HOST_DEVICE - constexpr std::uint64_t __get_descriptor() const noexcept { +namespace __sm_80 +{ +namespace __off +{ +enum __l2_cop_off_t _LIBCUDACXX_AP_ENUM_TYPE_ANNOTATION +{ + _L2_EVICT_NORMAL = 0, + _L2_EVICT_FIRST = 1, +}; +} // namespace __off + +namespace __on +{ +enum __l2_cop_on_t _LIBCUDACXX_AP_ENUM_TYPE_ANNOTATION +{ + _L2_EVICT_NORMAL = 0, + _L2_EVICT_FIRST = 1, + _L2_EVICT_LAST = 2, + _L2_EVICT_NORMAL_DEMOTE = 3, +}; +} // namespace __on + +enum __l2_descriptor_mode_t _LIBCUDACXX_AP_ENUM_TYPE_ANNOTATION +{ + _DESC_IMPLICIT = 0, + _DESC_INTERLEAVED = 2, + _DESC_BLOCK_TYPE = 3, +}; + +enum __l2_eviction_max_way_t _LIBCUDACXX_AP_ENUM_TYPE_ANNOTATION +{ + _CUDA_AMPERE_MAX_L2_WAYS = std::uint32_t{16}, +}; + +enum __block_size_t _LIBCUDACXX_AP_ENUM_TYPE_ANNOTATION +{ + _BLOCKSIZE_4K = 0, + _BLOCKSIZE_8K = 1, + _BLOCKSIZE_16K = 2, + _BLOCKSIZE_32K = 3, + _BLOCKSIZE_64K = 4, + _BLOCKSIZE_128K = 5, + _BLOCKSIZE_256K = 6, + _BLOCKSIZE_512K = 7, + _BLOCKSIZE_1M = 8, + _BLOCKSIZE_2M = 9, + _BLOCKSIZE_4M = 10, + _BLOCKSIZE_8M = 11, + _BLOCKSIZE_16M = 12, + _BLOCKSIZE_32M = 13, +}; + +struct __block_desc_t +{ + uint64_t __ap_reserved : 37; + uint64_t __block_count : 7; + uint64_t __block_start : 7; + uint64_t __ap_reserved2 : 1; + __block_size_t __block_size : 4; + __off::__l2_cop_off_t __l2_cop_off : 1; + __on::__l2_cop_on_t __l2_cop_on : 2; + __l2_descriptor_mode_t __l2_descriptor_mode : 2; + uint64_t __l1_inv_dont_allocate : 1; + uint64_t __l2_sector_promote_256B : 1; + uint64_t __ap_reserved3 : 1; + + _CCCL_HOST_DEVICE constexpr std::uint64_t __get_descriptor_cexpr() const noexcept + { + return std::uint64_t(__ap_reserved) << 0 | std::uint64_t(__block_count) << 37 | std::uint64_t(__block_start) << 44 + | std::uint64_t(__ap_reserved2) << 51 | std::uint64_t(__block_size) << 52 | std::uint64_t(__l2_cop_off) << 56 + | std::uint64_t(__l2_cop_on) << 57 | std::uint64_t(__l2_descriptor_mode) << 59 + | std::uint64_t(__l1_inv_dont_allocate) << 61 | std::uint64_t(__l2_sector_promote_256B) << 62 + | std::uint64_t(__ap_reserved3) << 63; + } + + inline _CCCL_HOST_DEVICE std::uint64_t __get_descriptor_non_cexpr() const noexcept + { + return *reinterpret_cast(this); + } + + _CCCL_HOST_DEVICE constexpr std::uint64_t __get_descriptor() const noexcept + { #if defined(_LIBCUDACXX_IS_CONSTANT_EVALUATED) - return cuda::std::is_constant_evaluated() ? - __get_descriptor_cexpr() : - __get_descriptor_non_cexpr(); + return cuda::std::is_constant_evaluated() ? __get_descriptor_cexpr() : __get_descriptor_non_cexpr(); #else - return __get_descriptor_cexpr(); + return __get_descriptor_cexpr(); #endif - } - }; - static_assert(sizeof(__block_desc_t) == 8, "__block_desc_t should be 8 bytes"); - static_assert(sizeof(__block_desc_t) == sizeof(std::uint64_t), ""); - static_assert( - __block_desc_t{(uint64_t)1, (uint64_t)1, (uint64_t)1, (uint64_t)1, __block_size_t::_BLOCKSIZE_8K, __off::_L2_EVICT_FIRST, __on::_L2_EVICT_FIRST, __l2_descriptor_mode_t::_DESC_INTERLEAVED, (uint64_t)1, (uint64_t)1, (uint64_t)1}.__get_descriptor() - == 0xF318102000000001, ""); - - /* Factory like struct to build a __block_desc_t due to constexpr C++11 - */ - struct __block_descriptor_builder { //variable declaration order matters == usage order - std::uint32_t __offset; - __block_size_t __block_size; - std::uint32_t __block_start, __end_hit; - std::uint32_t __block_count; - __off::__l2_cop_off_t __l2_cop_off; - __on::__l2_cop_on_t __l2_cop_on; - __l2_descriptor_mode_t __l2_descriptor_mode; - bool __l1_inv_dont_allocate, __l2_sector_promote_256B; - - _CCCL_HOST_DEVICE static constexpr std::uint32_t __calc_offset(std::size_t __total_bytes) { - return __ap_max(std::uint32_t{12}, static_cast(__ap_ceil_log2(static_cast(__total_bytes))) - std::uint32_t{7}); - } - - _CCCL_HOST_DEVICE static constexpr std::uint32_t __calc_block_start(std::uintptr_t __ptr, std::size_t __total_bytes) { - return static_cast(__ptr >> __calc_offset(static_cast(__total_bytes))); - } - - _CCCL_HOST_DEVICE static constexpr std::uint32_t __calc_end_hit(std::uintptr_t __ptr, std::size_t __hit_bytes, std::size_t __total_bytes) { - return static_cast((__ptr + __hit_bytes + (std::uintptr_t{1} << (__calc_offset(static_cast(__total_bytes)))) - 1) >> __calc_offset(static_cast(__total_bytes))); - } - - _CCCL_HOST_DEVICE constexpr __block_descriptor_builder(std::uintptr_t __ptr, std::size_t __hit_bytes, std::size_t __total_bytes, __on::__l2_cop_on_t __hit_prop, __off::__l2_cop_off_t __miss_prop) - : __offset(__calc_offset(__total_bytes)) - , __block_size(static_cast<__block_size_t>(__calc_offset(__total_bytes) - std::uint32_t{12})) - , __block_start(__calc_block_start(__ptr, __total_bytes)) - , __end_hit(__calc_end_hit(__ptr, __hit_bytes, __total_bytes)) - , __block_count(__calc_end_hit(__ptr, __hit_bytes, __total_bytes) - __calc_block_start(__ptr, __total_bytes)) - , __l2_cop_off(__miss_prop) - , __l2_cop_on(__hit_prop) - , __l2_descriptor_mode(_DESC_BLOCK_TYPE) - , __l1_inv_dont_allocate(false) - , __l2_sector_promote_256B(false) - {} - - _CCCL_HOST_DEVICE - constexpr __block_desc_t __get_block() const noexcept { - return __block_desc_t { 0, __ap_min(std::uint32_t{0x7f}, __block_count), (__block_start & std::uint32_t{0x7f}), 0, __block_size, __l2_cop_off, __l2_cop_on, _DESC_BLOCK_TYPE, false, false, 0 }; - } - }; - static_assert(sizeof(std::uintptr_t) > 4, "std::uintptr_t needs at least 5 bytes for this code to work"); - - struct __interleave_descriptor_t { - uint64_t __ap_reserved : 52; - uint64_t __fraction : 4; - __off::__l2_cop_off_t __l2_cop_off : 1; - __on::__l2_cop_on_t __l2_cop_on : 2; - __l2_descriptor_mode_t __l2_descriptor_mode : 2; - uint64_t __l1_inv_dont_allocate : 1; - uint64_t __l2_sector_promote_256B : 1; - uint64_t __ap_reserved2 : 1; - - _CCCL_HOST_DEVICE - constexpr __interleave_descriptor_t( - __on::__l2_cop_on_t __hit_prop, - std::uint32_t __hit_ratio, - __off::__l2_cop_off_t __miss_prop) noexcept - : __ap_reserved(0x0), - __fraction(__hit_ratio), - __l2_cop_off(__miss_prop), - __l2_cop_on(__hit_prop), - __l2_descriptor_mode(_DESC_INTERLEAVED), - __l1_inv_dont_allocate(0x0), - __l2_sector_promote_256B(0x0), - __ap_reserved2(0x0) {} - - _CCCL_HOST_DEVICE - constexpr std::uint64_t __get_descriptor_cexpr() const { - return - std::uint64_t(__ap_reserved) << 0 | - std::uint64_t(__fraction) << 52 | - std::uint64_t(__l2_cop_off) << 56 | - std::uint64_t(__l2_cop_on) << 57 | - std::uint64_t(__l2_descriptor_mode) << 59 | - std::uint64_t(__l1_inv_dont_allocate) << 61 | - std::uint64_t(__l2_sector_promote_256B) << 62 | - std::uint64_t(__ap_reserved2) << 63; - } - - inline - _CCCL_HOST_DEVICE - std::uint64_t __get_descriptor_non_cexpr() const noexcept { return *reinterpret_cast(this); } - - - _CCCL_HOST_DEVICE - constexpr std::uint64_t __get_descriptor() const noexcept { + } +}; +static_assert(sizeof(__block_desc_t) == 8, "__block_desc_t should be 8 bytes"); +static_assert(sizeof(__block_desc_t) == sizeof(std::uint64_t), ""); +static_assert( + __block_desc_t{ + (uint64_t) 1, + (uint64_t) 1, + (uint64_t) 1, + (uint64_t) 1, + __block_size_t::_BLOCKSIZE_8K, + __off::_L2_EVICT_FIRST, + __on::_L2_EVICT_FIRST, + __l2_descriptor_mode_t::_DESC_INTERLEAVED, + (uint64_t) 1, + (uint64_t) 1, + (uint64_t) 1} + .__get_descriptor() + == 0xF318102000000001, + ""); + +/* Factory like struct to build a __block_desc_t due to constexpr C++11 + */ +struct __block_descriptor_builder +{ // variable declaration order matters == usage order + std::uint32_t __offset; + __block_size_t __block_size; + std::uint32_t __block_start, __end_hit; + std::uint32_t __block_count; + __off::__l2_cop_off_t __l2_cop_off; + __on::__l2_cop_on_t __l2_cop_on; + __l2_descriptor_mode_t __l2_descriptor_mode; + bool __l1_inv_dont_allocate, __l2_sector_promote_256B; + + _CCCL_HOST_DEVICE static constexpr std::uint32_t __calc_offset(std::size_t __total_bytes) + { + return __ap_max( + std::uint32_t{12}, + static_cast(__ap_ceil_log2(static_cast(__total_bytes))) - std::uint32_t{7}); + } + + _CCCL_HOST_DEVICE static constexpr std::uint32_t __calc_block_start(std::uintptr_t __ptr, std::size_t __total_bytes) + { + return static_cast(__ptr >> __calc_offset(static_cast(__total_bytes))); + } + + _CCCL_HOST_DEVICE static constexpr std::uint32_t + __calc_end_hit(std::uintptr_t __ptr, std::size_t __hit_bytes, std::size_t __total_bytes) + { + return static_cast( + (__ptr + __hit_bytes + (std::uintptr_t{1} << (__calc_offset(static_cast(__total_bytes)))) - 1) + >> __calc_offset(static_cast(__total_bytes))); + } + + _CCCL_HOST_DEVICE constexpr __block_descriptor_builder( + std::uintptr_t __ptr, + std::size_t __hit_bytes, + std::size_t __total_bytes, + __on::__l2_cop_on_t __hit_prop, + __off::__l2_cop_off_t __miss_prop) + : __offset(__calc_offset(__total_bytes)) + , __block_size(static_cast<__block_size_t>(__calc_offset(__total_bytes) - std::uint32_t{12})) + , __block_start(__calc_block_start(__ptr, __total_bytes)) + , __end_hit(__calc_end_hit(__ptr, __hit_bytes, __total_bytes)) + , __block_count(__calc_end_hit(__ptr, __hit_bytes, __total_bytes) - __calc_block_start(__ptr, __total_bytes)) + , __l2_cop_off(__miss_prop) + , __l2_cop_on(__hit_prop) + , __l2_descriptor_mode(_DESC_BLOCK_TYPE) + , __l1_inv_dont_allocate(false) + , __l2_sector_promote_256B(false) + {} + + _CCCL_HOST_DEVICE constexpr __block_desc_t __get_block() const noexcept + { + return __block_desc_t{ + 0, + __ap_min(std::uint32_t{0x7f}, __block_count), + (__block_start & std::uint32_t{0x7f}), + 0, + __block_size, + __l2_cop_off, + __l2_cop_on, + _DESC_BLOCK_TYPE, + false, + false, + 0}; + } +}; +static_assert(sizeof(std::uintptr_t) > 4, "std::uintptr_t needs at least 5 bytes for this code to work"); + +struct __interleave_descriptor_t +{ + uint64_t __ap_reserved : 52; + uint64_t __fraction : 4; + __off::__l2_cop_off_t __l2_cop_off : 1; + __on::__l2_cop_on_t __l2_cop_on : 2; + __l2_descriptor_mode_t __l2_descriptor_mode : 2; + uint64_t __l1_inv_dont_allocate : 1; + uint64_t __l2_sector_promote_256B : 1; + uint64_t __ap_reserved2 : 1; + + _CCCL_HOST_DEVICE constexpr __interleave_descriptor_t( + __on::__l2_cop_on_t __hit_prop, std::uint32_t __hit_ratio, __off::__l2_cop_off_t __miss_prop) noexcept + : __ap_reserved(0x0) + , __fraction(__hit_ratio) + , __l2_cop_off(__miss_prop) + , __l2_cop_on(__hit_prop) + , __l2_descriptor_mode(_DESC_INTERLEAVED) + , __l1_inv_dont_allocate(0x0) + , __l2_sector_promote_256B(0x0) + , __ap_reserved2(0x0) + {} + + _CCCL_HOST_DEVICE constexpr std::uint64_t __get_descriptor_cexpr() const + { + return std::uint64_t(__ap_reserved) << 0 | std::uint64_t(__fraction) << 52 | std::uint64_t(__l2_cop_off) << 56 + | std::uint64_t(__l2_cop_on) << 57 | std::uint64_t(__l2_descriptor_mode) << 59 + | std::uint64_t(__l1_inv_dont_allocate) << 61 | std::uint64_t(__l2_sector_promote_256B) << 62 + | std::uint64_t(__ap_reserved2) << 63; + } + + inline _CCCL_HOST_DEVICE std::uint64_t __get_descriptor_non_cexpr() const noexcept + { + return *reinterpret_cast(this); + } + + _CCCL_HOST_DEVICE constexpr std::uint64_t __get_descriptor() const noexcept + { #if defined(_LIBCUDACXX_IS_CONSTANT_EVALUATED) - return cuda::std::is_constant_evaluated() ? - __get_descriptor_cexpr() : - __get_descriptor_non_cexpr(); + return cuda::std::is_constant_evaluated() ? __get_descriptor_cexpr() : __get_descriptor_non_cexpr(); #else - return __get_descriptor_cexpr(); + return __get_descriptor_cexpr(); #endif - } - }; - static_assert(sizeof(__interleave_descriptor_t) == 8, "__interleave_descriptor_t should be 8 bytes"); - static_assert(sizeof(__interleave_descriptor_t) == sizeof(std::uint64_t), ""); - - _CCCL_HOST_DEVICE - static constexpr std::uint64_t __interleave_normal() noexcept { - return 0x10F0000000000000; - } - - _CCCL_HOST_DEVICE - static constexpr std::uint64_t __interleave_streaming() noexcept { - return 0x12F0000000000000; - } - - _CCCL_HOST_DEVICE - static constexpr std::uint64_t __interleave_persisting() noexcept { - return 0x14F0000000000000; - } - - _CCCL_HOST_DEVICE - static constexpr std::uint64_t __interleave_normal_demote() noexcept { - return 0x16F0000000000000; - } - - } // namespace __sm_80 - - _CCCL_HOST_DEVICE - constexpr std::uint64_t __interleave(cudaAccessProperty __hit_prop, float __hit_ratio, cudaAccessProperty __miss_prop = cudaAccessPropertyNormal) { - return __sm_80::__interleave_descriptor_t( - ((__hit_prop == cudaAccessPropertyNormal) ? __sm_80::__on::__l2_cop_on_t::_L2_EVICT_NORMAL_DEMOTE : static_cast<__sm_80::__on::__l2_cop_on_t>(__hit_prop)), - __ap_min((static_cast(__hit_ratio) * __sm_80::__l2_eviction_max_way_t::_CUDA_AMPERE_MAX_L2_WAYS), static_cast(__sm_80::__l2_eviction_max_way_t::_CUDA_AMPERE_MAX_L2_WAYS - 1)), - static_cast<__sm_80::__off::__l2_cop_off_t>(__miss_prop) - ).__get_descriptor(); - } - - _CCCL_HOST_DEVICE - constexpr std::uint64_t __block(void* __ptr, std::size_t __hit_bytes, std::size_t __total_bytes, cudaAccessProperty __hit_prop, cudaAccessProperty __miss_prop = cudaAccessPropertyNormal) { - return (__total_bytes <= (size_t{0xFFFFFFFF}) & __total_bytes != 0 & __hit_bytes <= __total_bytes) ? __sm_80::__block_descriptor_builder( - reinterpret_cast(__ptr), - __hit_bytes, - __total_bytes, - (__hit_prop == cudaAccessPropertyNormal) ? __sm_80::__on::_L2_EVICT_NORMAL_DEMOTE : static_cast<__sm_80::__on::__l2_cop_on_t>(__hit_prop), - static_cast<__sm_80::__off::__l2_cop_off_t>(__miss_prop) - ).__get_block().__get_descriptor() - : __sm_80::__interleave_normal(); } +}; +static_assert(sizeof(__interleave_descriptor_t) == 8, "__interleave_descriptor_t should be 8 bytes"); +static_assert(sizeof(__interleave_descriptor_t) == sizeof(std::uint64_t), ""); + +_CCCL_HOST_DEVICE static constexpr std::uint64_t __interleave_normal() noexcept +{ + return 0x10F0000000000000; +} + +_CCCL_HOST_DEVICE static constexpr std::uint64_t __interleave_streaming() noexcept +{ + return 0x12F0000000000000; +} + +_CCCL_HOST_DEVICE static constexpr std::uint64_t __interleave_persisting() noexcept +{ + return 0x14F0000000000000; +} + +_CCCL_HOST_DEVICE static constexpr std::uint64_t __interleave_normal_demote() noexcept +{ + return 0x16F0000000000000; +} + +} // namespace __sm_80 + +_CCCL_HOST_DEVICE constexpr std::uint64_t __interleave( + cudaAccessProperty __hit_prop, float __hit_ratio, cudaAccessProperty __miss_prop = cudaAccessPropertyNormal) +{ + return __sm_80::__interleave_descriptor_t( + ((__hit_prop == cudaAccessPropertyNormal) ? __sm_80::__on::__l2_cop_on_t::_L2_EVICT_NORMAL_DEMOTE + : static_cast<__sm_80::__on::__l2_cop_on_t>(__hit_prop)), + __ap_min( + (static_cast(__hit_ratio) * __sm_80::__l2_eviction_max_way_t::_CUDA_AMPERE_MAX_L2_WAYS), + static_cast(__sm_80::__l2_eviction_max_way_t::_CUDA_AMPERE_MAX_L2_WAYS - 1)), + static_cast<__sm_80::__off::__l2_cop_off_t>(__miss_prop)) + .__get_descriptor(); +} + +_CCCL_HOST_DEVICE constexpr std::uint64_t __block( + void* __ptr, + std::size_t __hit_bytes, + std::size_t __total_bytes, + cudaAccessProperty __hit_prop, + cudaAccessProperty __miss_prop = cudaAccessPropertyNormal) +{ + return (__total_bytes <= (size_t{0xFFFFFFFF}) & __total_bytes != 0 & __hit_bytes <= __total_bytes) + ? __sm_80::__block_descriptor_builder( + reinterpret_cast(__ptr), + __hit_bytes, + __total_bytes, + (__hit_prop == cudaAccessPropertyNormal) + ? __sm_80::__on::_L2_EVICT_NORMAL_DEMOTE + : static_cast<__sm_80::__on::__l2_cop_on_t>(__hit_prop), + static_cast<__sm_80::__off::__l2_cop_off_t>(__miss_prop)) + .__get_block() + .__get_descriptor() + : __sm_80::__interleave_normal(); +} } // namespace __detail_ap _LIBCUDACXX_END_NAMESPACE_CUDA diff --git a/libcudacxx/include/cuda/std/detail/__annotated_ptr b/libcudacxx/include/cuda/std/detail/__annotated_ptr index f1d4b166b6e..eb84a309f45 100644 --- a/libcudacxx/include/cuda/std/detail/__annotated_ptr +++ b/libcudacxx/include/cuda/std/detail/__annotated_ptr @@ -3,229 +3,327 @@ * * NVIDIA SOFTWARE LICENSE * - * This license is a legal agreement between you and NVIDIA Corporation ("NVIDIA") and governs your use of the NVIDIA/CUDA C++ Library software and materials provided hereunder (“SOFTWARE”). + * This license is a legal agreement between you and NVIDIA Corporation ("NVIDIA") and governs your use of the + * NVIDIA/CUDA C++ Library software and materials provided hereunder (“SOFTWARE”). * - * This license can be accepted only by an adult of legal age of majority in the country in which the SOFTWARE is used. If you are under the legal age of majority, you must ask your parent or legal guardian to consent to this license. By taking delivery of the SOFTWARE, you affirm that you have reached the legal age of majority, you accept the terms of this license, and you take legal and financial responsibility for the actions of your permitted users. + * This license can be accepted only by an adult of legal age of majority in the country in which the SOFTWARE is used. + * If you are under the legal age of majority, you must ask your parent or legal guardian to consent to this license. By + * taking delivery of the SOFTWARE, you affirm that you have reached the legal age of majority, you accept the terms of + * this license, and you take legal and financial responsibility for the actions of your permitted users. * - * You agree to use the SOFTWARE only for purposes that are permitted by (a) this license, and (b) any applicable law, regulation or generally accepted practices or guidelines in the relevant jurisdictions. + * You agree to use the SOFTWARE only for purposes that are permitted by (a) this license, and (b) any applicable law, + * regulation or generally accepted practices or guidelines in the relevant jurisdictions. * - * 1. LICENSE. Subject to the terms of this license, NVIDIA grants you a non-exclusive limited license to: (a) install and use the SOFTWARE, and (b) distribute the SOFTWARE subject to the distribution requirements described in this license. NVIDIA reserves all rights, title and interest in and to the SOFTWARE not expressly granted to you under this license. + * 1. LICENSE. Subject to the terms of this license, NVIDIA grants you a non-exclusive limited license to: (a) install + * and use the SOFTWARE, and (b) distribute the SOFTWARE subject to the distribution requirements described in this + * license. NVIDIA reserves all rights, title and interest in and to the SOFTWARE not expressly granted to you under + * this license. * * 2. DISTRIBUTION REQUIREMENTS. These are the distribution requirements for you to exercise the distribution grant: - * a. The terms under which you distribute the SOFTWARE must be consistent with the terms of this license, including (without limitation) terms relating to the license grant and license restrictions and protection of NVIDIA’s intellectual property rights. - * b. You agree to notify NVIDIA in writing of any known or suspected distribution or use of the SOFTWARE not in compliance with the requirements of this license, and to enforce the terms of your agreements with respect to distributed SOFTWARE. + * a. The terms under which you distribute the SOFTWARE must be consistent with the terms of this license, + * including (without limitation) terms relating to the license grant and license restrictions and protection of + * NVIDIA’s intellectual property rights. b. You agree to notify NVIDIA in writing of any known or suspected + * distribution or use of the SOFTWARE not in compliance with the requirements of this license, and to enforce the terms + * of your agreements with respect to distributed SOFTWARE. * * 3. LIMITATIONS. Your license to use the SOFTWARE is restricted as follows: * a. The SOFTWARE is licensed for you to develop applications only for use in systems with NVIDIA GPUs. - * b. You may not reverse engineer, decompile or disassemble, or remove copyright or other proprietary notices from any portion of the SOFTWARE or copies of the SOFTWARE. - * c. You may not modify or create derivative works of any portion of the SOFTWARE. - * d. You may not bypass, disable, or circumvent any technical measure, encryption, security, digital rights management or authentication mechanism in the SOFTWARE. - * e. You may not use the SOFTWARE in any manner that would cause it to become subject to an open source software license. As examples, licenses that require as a condition of use, modification, and/or distribution that the SOFTWARE be (i) disclosed or distributed in source code form; (ii) licensed for the purpose of making derivative works; or (iii) redistributable at no charge. - * f. Unless you have an agreement with NVIDIA for this purpose, you may not use the SOFTWARE with any system or application where the use or failure of the system or application can reasonably be expected to threaten or result in personal injury, death, or catastrophic loss. Examples include use in avionics, navigation, military, medical, life support or other life critical applications. NVIDIA does not design, test or manufacture the SOFTWARE for these critical uses and NVIDIA shall not be liable to you or any third party, in whole or in part, for any claims or damages arising from such uses. - * g. You agree to defend, indemnify and hold harmless NVIDIA and its affiliates, and their respective employees, contractors, agents, officers and directors, from and against any and all claims, damages, obligations, losses, liabilities, costs or debt, fines, restitutions and expenses (including but not limited to attorney’s fees and costs incident to establishing the right of indemnification) arising out of or related to use of the SOFTWARE outside of the scope of this Agreement, or not in compliance with its terms. + * b. You may not reverse engineer, decompile or disassemble, or remove copyright or other proprietary notices from + * any portion of the SOFTWARE or copies of the SOFTWARE. c. You may not modify or create derivative works of any + * portion of the SOFTWARE. d. You may not bypass, disable, or circumvent any technical measure, encryption, + * security, digital rights management or authentication mechanism in the SOFTWARE. e. You may not use the SOFTWARE + * in any manner that would cause it to become subject to an open source software license. As examples, licenses that + * require as a condition of use, modification, and/or distribution that the SOFTWARE be (i) disclosed or distributed in + * source code form; (ii) licensed for the purpose of making derivative works; or (iii) redistributable at no charge. f. + * Unless you have an agreement with NVIDIA for this purpose, you may not use the SOFTWARE with any system or + * application where the use or failure of the system or application can reasonably be expected to threaten or result in + * personal injury, death, or catastrophic loss. Examples include use in avionics, navigation, military, medical, life + * support or other life critical applications. NVIDIA does not design, test or manufacture the SOFTWARE for these + * critical uses and NVIDIA shall not be liable to you or any third party, in whole or in part, for any claims or + * damages arising from such uses. g. You agree to defend, indemnify and hold harmless NVIDIA and its affiliates, + * and their respective employees, contractors, agents, officers and directors, from and against any and all claims, + * damages, obligations, losses, liabilities, costs or debt, fines, restitutions and expenses (including but not limited + * to attorney’s fees and costs incident to establishing the right of indemnification) arising out of or related to use + * of the SOFTWARE outside of the scope of this Agreement, or not in compliance with its terms. * - * 4. PRE-RELEASE. SOFTWARE versions identified as alpha, beta, preview, early access or otherwise as pre-release may not be fully functional, may contain errors or design flaws, and may have reduced or different security, privacy, availability, and reliability standards relative to commercial versions of NVIDIA software and materials. You may use a pre-release SOFTWARE version at your own risk, understanding that these versions are not intended for use in production or business-critical systems. + * 4. PRE-RELEASE. SOFTWARE versions identified as alpha, beta, preview, early access or otherwise as pre-release may + * not be fully functional, may contain errors or design flaws, and may have reduced or different security, privacy, + * availability, and reliability standards relative to commercial versions of NVIDIA software and materials. You may use + * a pre-release SOFTWARE version at your own risk, understanding that these versions are not intended for use in + * production or business-critical systems. * - * 5. OWNERSHIP. The SOFTWARE and the related intellectual property rights therein are and will remain the sole and exclusive property of NVIDIA or its licensors. The SOFTWARE is copyrighted and protected by the laws of the United States and other countries, and international treaty provisions. NVIDIA may make changes to the SOFTWARE, at any time without notice, but is not obligated to support or update the SOFTWARE. + * 5. OWNERSHIP. The SOFTWARE and the related intellectual property rights therein are and will remain the sole and + * exclusive property of NVIDIA or its licensors. The SOFTWARE is copyrighted and protected by the laws of the United + * States and other countries, and international treaty provisions. NVIDIA may make changes to the SOFTWARE, at any time + * without notice, but is not obligated to support or update the SOFTWARE. * - * 6. COMPONENTS UNDER OTHER LICENSES. The SOFTWARE may include NVIDIA or third-party components with separate legal notices or terms as may be described in proprietary notices accompanying the SOFTWARE. If and to the extent there is a conflict between the terms in this license and the license terms associated with a component, the license terms associated with the components control only to the extent necessary to resolve the conflict. + * 6. COMPONENTS UNDER OTHER LICENSES. The SOFTWARE may include NVIDIA or third-party components with separate legal + * notices or terms as may be described in proprietary notices accompanying the SOFTWARE. If and to the extent there is + * a conflict between the terms in this license and the license terms associated with a component, the license terms + * associated with the components control only to the extent necessary to resolve the conflict. * - * 7. FEEDBACK. You may, but don’t have to, provide to NVIDIA any Feedback. “Feedback” means any suggestions, bug fixes, enhancements, modifications, feature requests or other feedback regarding the SOFTWARE. For any Feedback that you voluntarily provide, you hereby grant NVIDIA and its affiliates a perpetual, non-exclusive, worldwide, irrevocable license to use, reproduce, modify, license, sublicense (through multiple tiers of sublicensees), and distribute (through multiple tiers of distributors) the Feedback without the payment of any royalties or fees to you. NVIDIA will use Feedback at its choice. + * 7. FEEDBACK. You may, but don’t have to, provide to NVIDIA any Feedback. “Feedback” means any suggestions, bug fixes, + * enhancements, modifications, feature requests or other feedback regarding the SOFTWARE. For any Feedback that you + * voluntarily provide, you hereby grant NVIDIA and its affiliates a perpetual, non-exclusive, worldwide, irrevocable + * license to use, reproduce, modify, license, sublicense (through multiple tiers of sublicensees), and distribute + * (through multiple tiers of distributors) the Feedback without the payment of any royalties or fees to you. NVIDIA + * will use Feedback at its choice. * - * 8. NO WARRANTIES. THE SOFTWARE IS PROVIDED "AS IS" WITHOUT ANY EXPRESS OR IMPLIED WARRANTY OF ANY KIND INCLUDING, BUT NOT LIMITED TO, WARRANTIES OF MERCHANTABILITY, NONINFRINGEMENT, OR FITNESS FOR A PARTICULAR PURPOSE. NVIDIA DOES NOT WARRANT THAT THE SOFTWARE WILL MEET YOUR REQUIREMENTS OR THAT THE OPERATION THEREOF WILL BE UNINTERRUPTED OR ERROR-FREE, OR THAT ALL ERRORS WILL BE CORRECTED. + * 8. NO WARRANTIES. THE SOFTWARE IS PROVIDED "AS IS" WITHOUT ANY EXPRESS OR IMPLIED WARRANTY OF ANY KIND INCLUDING, BUT + * NOT LIMITED TO, WARRANTIES OF MERCHANTABILITY, NONINFRINGEMENT, OR FITNESS FOR A PARTICULAR PURPOSE. NVIDIA DOES NOT + * WARRANT THAT THE SOFTWARE WILL MEET YOUR REQUIREMENTS OR THAT THE OPERATION THEREOF WILL BE UNINTERRUPTED OR + * ERROR-FREE, OR THAT ALL ERRORS WILL BE CORRECTED. * - * 9. LIMITATIONS OF LIABILITY. TO THE MAXIMUM EXTENT PERMITTED BY LAW, NVIDIA AND ITS AFFILIATES SHALL NOT BE LIABLE FOR ANY SPECIAL, INCIDENTAL, PUNITIVE OR CONSEQUENTIAL DAMAGES, OR ANY LOST PROFITS, PROJECT DELAYS, LOSS OF USE, LOSS OF DATA OR LOSS OF GOODWILL, OR THE COSTS OF PROCURING SUBSTITUTE PRODUCTS, ARISING OUT OF OR IN CONNECTION WITH THIS LICENSE OR THE USE OR PERFORMANCE OF THE SOFTWARE, WHETHER SUCH LIABILITY ARISES FROM ANY CLAIM BASED UPON BREACH OF CONTRACT, BREACH OF WARRANTY, TORT (INCLUDING NEGLIGENCE), PRODUCT LIABILITY OR ANY OTHER CAUSE OF ACTION OR THEORY OF LIABILITY, EVEN IF NVIDIA HAS PREVIOUSLY BEEN ADVISED OF, OR COULD REASONABLY HAVE FORESEEN, THE POSSIBILITY OF SUCH DAMAGES. IN NO EVENT WILL NVIDIA’S AND ITS AFFILIATES TOTAL CUMULATIVE LIABILITY UNDER OR ARISING OUT OF THIS LICENSE EXCEED US$10.00. THE NATURE OF THE LIABILITY OR THE NUMBER OF CLAIMS OR SUITS SHALL NOT ENLARGE OR EXTEND THIS LIMIT. + * 9. LIMITATIONS OF LIABILITY. TO THE MAXIMUM EXTENT PERMITTED BY LAW, NVIDIA AND ITS AFFILIATES SHALL NOT BE LIABLE + * FOR ANY SPECIAL, INCIDENTAL, PUNITIVE OR CONSEQUENTIAL DAMAGES, OR ANY LOST PROFITS, PROJECT DELAYS, LOSS OF USE, + * LOSS OF DATA OR LOSS OF GOODWILL, OR THE COSTS OF PROCURING SUBSTITUTE PRODUCTS, ARISING OUT OF OR IN CONNECTION WITH + * THIS LICENSE OR THE USE OR PERFORMANCE OF THE SOFTWARE, WHETHER SUCH LIABILITY ARISES FROM ANY CLAIM BASED UPON + * BREACH OF CONTRACT, BREACH OF WARRANTY, TORT (INCLUDING NEGLIGENCE), PRODUCT LIABILITY OR ANY OTHER CAUSE OF ACTION + * OR THEORY OF LIABILITY, EVEN IF NVIDIA HAS PREVIOUSLY BEEN ADVISED OF, OR COULD REASONABLY HAVE FORESEEN, THE + * POSSIBILITY OF SUCH DAMAGES. IN NO EVENT WILL NVIDIA’S AND ITS AFFILIATES TOTAL CUMULATIVE LIABILITY UNDER OR ARISING + * OUT OF THIS LICENSE EXCEED US$10.00. THE NATURE OF THE LIABILITY OR THE NUMBER OF CLAIMS OR SUITS SHALL NOT ENLARGE + * OR EXTEND THIS LIMIT. * - * 10. TERMINATION. Your rights under this license will terminate automatically without notice from NVIDIA if you fail to comply with any term and condition of this license or if you commence or participate in any legal proceeding against NVIDIA with respect to the SOFTWARE. NVIDIA may terminate this license with advance written notice to you if NVIDIA decides to no longer provide the SOFTWARE in a country or, in NVIDIA’s sole discretion, the continued use of it is no longer commercially viable. Upon any termination of this license, you agree to promptly discontinue use of the SOFTWARE and destroy all copies in your possession or control. Your prior distributions in accordance with this license are not affected by the termination of this license. All provisions of this license will survive termination, except for the license granted to you. + * 10. TERMINATION. Your rights under this license will terminate automatically without notice from NVIDIA if you fail + * to comply with any term and condition of this license or if you commence or participate in any legal proceeding + * against NVIDIA with respect to the SOFTWARE. NVIDIA may terminate this license with advance written notice to you if + * NVIDIA decides to no longer provide the SOFTWARE in a country or, in NVIDIA’s sole discretion, the continued use of + * it is no longer commercially viable. Upon any termination of this license, you agree to promptly discontinue use of + * the SOFTWARE and destroy all copies in your possession or control. Your prior distributions in accordance with this + * license are not affected by the termination of this license. All provisions of this license will survive termination, + * except for the license granted to you. * - * 11. APPLICABLE LAW. This license will be governed in all respects by the laws of the United States and of the State of Delaware as those laws are applied to contracts entered into and performed entirely within Delaware by Delaware residents, without regard to the conflicts of laws principles. The United Nations Convention on Contracts for the International Sale of Goods is specifically disclaimed. You agree to all terms of this Agreement in the English language. The state or federal courts residing in Santa Clara County, California shall have exclusive jurisdiction over any dispute or claim arising out of this license. Notwithstanding this, you agree that NVIDIA shall still be allowed to apply for injunctive remedies or an equivalent type of urgent legal relief in any jurisdiction. + * 11. APPLICABLE LAW. This license will be governed in all respects by the laws of the United States and of the State + * of Delaware as those laws are applied to contracts entered into and performed entirely within Delaware by Delaware + * residents, without regard to the conflicts of laws principles. The United Nations Convention on Contracts for the + * International Sale of Goods is specifically disclaimed. You agree to all terms of this Agreement in the English + * language. The state or federal courts residing in Santa Clara County, California shall have exclusive jurisdiction + * over any dispute or claim arising out of this license. Notwithstanding this, you agree that NVIDIA shall still be + * allowed to apply for injunctive remedies or an equivalent type of urgent legal relief in any jurisdiction. * - * 12. NO ASSIGNMENT. This license and your rights and obligations thereunder may not be assigned by you by any means or operation of law without NVIDIA’s permission. Any attempted assignment not approved by NVIDIA in writing shall be void and of no effect. + * 12. NO ASSIGNMENT. This license and your rights and obligations thereunder may not be assigned by you by any means or + * operation of law without NVIDIA’s permission. Any attempted assignment not approved by NVIDIA in writing shall be + * void and of no effect. * - * 13. EXPORT. The SOFTWARE is subject to United States export laws and regulations. You agree that you will not ship, transfer or export the SOFTWARE into any country, or use the SOFTWARE in any manner, prohibited by the United States Bureau of Industry and Security or economic sanctions regulations administered by the U.S. Department of Treasury’s Office of Foreign Assets Control (OFAC), or any applicable export laws, restrictions or regulations. These laws include restrictions on destinations, end users and end use. By accepting this license, you confirm that you are not a resident or citizen of any country currently embargoed by the U.S. and that you are not otherwise prohibited from receiving the SOFTWARE. + * 13. EXPORT. The SOFTWARE is subject to United States export laws and regulations. You agree that you will not ship, + * transfer or export the SOFTWARE into any country, or use the SOFTWARE in any manner, prohibited by the United States + * Bureau of Industry and Security or economic sanctions regulations administered by the U.S. Department of Treasury’s + * Office of Foreign Assets Control (OFAC), or any applicable export laws, restrictions or regulations. These laws + * include restrictions on destinations, end users and end use. By accepting this license, you confirm that you are not + * a resident or citizen of any country currently embargoed by the U.S. and that you are not otherwise prohibited from + * receiving the SOFTWARE. * - * 14. GOVERNMENT USE. The SOFTWARE has been developed entirely at private expense and is “commercial items” consisting of “commercial computer software” and “commercial computer software documentation” provided with RESTRICTED RIGHTS. Use, duplication or disclosure by the U.S. Government or a U.S. Government subcontractor is subject to the restrictions in this license pursuant to DFARS 227.7202-3(a) or as set forth in subparagraphs (b)(1) and (2) of the Commercial Computer Software - Restricted Rights clause at FAR 52.227-19, as applicable. Contractor/manufacturer is NVIDIA, 2788 San Tomas Expressway, Santa Clara, CA 95051. + * 14. GOVERNMENT USE. The SOFTWARE has been developed entirely at private expense and is “commercial items” consisting + * of “commercial computer software” and “commercial computer software documentation” provided with RESTRICTED RIGHTS. + * Use, duplication or disclosure by the U.S. Government or a U.S. Government subcontractor is subject to the + * restrictions in this license pursuant to DFARS 227.7202-3(a) or as set forth in subparagraphs (b)(1) and (2) of the + * Commercial Computer Software - Restricted Rights clause at FAR 52.227-19, as applicable. Contractor/manufacturer is + * NVIDIA, 2788 San Tomas Expressway, Santa Clara, CA 95051. * - * 15. ENTIRE AGREEMENT. This license is the final, complete and exclusive agreement between the parties relating to the subject matter of this license and supersedes all prior or contemporaneous understandings and agreements relating to this subject matter, whether oral or written. If any court of competent jurisdiction determines that any provision of this license is illegal, invalid or unenforceable, the remaining provisions will remain in full force and effect. This license may only be modified in a writing signed by an authorized representative of each party. + * 15. ENTIRE AGREEMENT. This license is the final, complete and exclusive agreement between the parties relating to the + * subject matter of this license and supersedes all prior or contemporaneous understandings and agreements relating to + * this subject matter, whether oral or written. If any court of competent jurisdiction determines that any provision of + * this license is illegal, invalid or unenforceable, the remaining provisions will remain in full force and effect. + * This license may only be modified in a writing signed by an authorized representative of each party. * * (v. August 20, 2021) */ _LIBCUDACXX_BEGIN_NAMESPACE_CUDA -namespace __detail_ap { +namespace __detail_ap +{ - template - _CCCL_DEVICE - void* __associate_address_space(void* __ptr, _Property __prop) { - if (std::is_same<_Property, access_property::shared>::value == true) { - bool __b = __isShared(__ptr); - _LIBCUDACXX_ASSERT(__b, ""); +template +_CCCL_DEVICE void* __associate_address_space(void* __ptr, _Property __prop) +{ + if (std::is_same<_Property, access_property::shared>::value == true) + { + bool __b = __isShared(__ptr); + _LIBCUDACXX_ASSERT(__b, ""); #if !defined(_CCCL_CUDACC_BELOW_11_2) - __builtin_assume(__b); + __builtin_assume(__b); #else // ^^^ !_CCCL_CUDACC_BELOW_11_2 ^^^ / vvv _CCCL_CUDACC_BELOW_11_2 vvv - (void)__b; + (void) __b; #endif // _CCCL_CUDACC_BELOW_11_2 - } else if (std::is_same<_Property, access_property::global>::value == true || - std::is_same<_Property, access_property::normal>::value == true || - std::is_same<_Property, access_property::persisting>::value == true || - std::is_same<_Property, access_property::streaming>::value == true || - std::is_same<_Property, access_property>::value) { - bool __b = __isGlobal(__ptr); - _LIBCUDACXX_ASSERT(__b, ""); + } + else if (std::is_same<_Property, access_property::global>::value == true + || std::is_same<_Property, access_property::normal>::value == true + || std::is_same<_Property, access_property::persisting>::value == true + || std::is_same<_Property, access_property::streaming>::value == true + || std::is_same<_Property, access_property>::value) + { + bool __b = __isGlobal(__ptr); + _LIBCUDACXX_ASSERT(__b, ""); #if !defined(_CCCL_CUDACC_BELOW_11_2) - __builtin_assume(__b); + __builtin_assume(__b); #else // ^^^ !_CCCL_CUDACC_BELOW_11_2 ^^^ / vvv _CCCL_CUDACC_BELOW_11_2 vvv - (void)__b; + (void) __b; #endif // _CCCL_CUDACC_BELOW_11_2 - } + } + + return __ptr; +} + +template +_CCCL_DEVICE void* __associate_descriptor(void* __ptr, __Prop __prop) +{ + return __associate_descriptor(__ptr, static_cast(access_property(__prop))); +} + +template <> +inline _CCCL_DEVICE void* __associate_descriptor(void* __ptr, std::uint64_t __prop) +{ + NV_IF_ELSE_TARGET(NV_PROVIDES_SM_80, (return __nv_associate_access_property(__ptr, __prop);), (return __ptr;)) +} - return __ptr; +template <> +inline _CCCL_DEVICE void* __associate_descriptor(void* __ptr, access_property::shared) +{ + return __ptr; +} + +template +_CCCL_HOST_DEVICE _Type* __associate(_Type* __ptr, _Property __prop) +{ + NV_IF_ELSE_TARGET(NV_IS_DEVICE, + (return static_cast<_Type*>(__associate_descriptor( + __associate_address_space(const_cast(static_cast(__ptr)), __prop), __prop));), + (return __ptr;)) +} + +template +class __annotated_ptr_base +{ + using __error = typename _Property::__unknown_access_property_type; +}; + +template <> +class __annotated_ptr_base +{ +protected: + static constexpr std::uint64_t __prop = 0; + + constexpr __annotated_ptr_base() noexcept = default; + constexpr __annotated_ptr_base(__annotated_ptr_base const&) = default; + _CCCL_CONSTEXPR_CXX14 __annotated_ptr_base& operator=(const __annotated_ptr_base&) = default; + _CCCL_HOST_DEVICE constexpr __annotated_ptr_base(access_property::shared) noexcept {} + inline _CCCL_DEVICE void* __apply_prop(void* __p) const + { + return __associate(__p, access_property::shared{}); + } + _CCCL_HOST_DEVICE constexpr access_property::shared __get_property() const noexcept + { + return access_property::shared{}; } +}; - template - _CCCL_DEVICE - void* __associate_descriptor(void* __ptr, __Prop __prop) { - return __associate_descriptor(__ptr, static_cast(access_property(__prop))); +template <> +class __annotated_ptr_base +{ +protected: + static constexpr std::uint64_t __prop = __sm_80::__interleave_normal(); + + constexpr __annotated_ptr_base() noexcept = default; + constexpr __annotated_ptr_base(__annotated_ptr_base const&) = default; + _CCCL_CONSTEXPR_CXX14 __annotated_ptr_base& operator=(const __annotated_ptr_base&) = default; + _CCCL_HOST_DEVICE constexpr __annotated_ptr_base(access_property::global) noexcept {} + inline _CCCL_DEVICE void* __apply_prop(void* __p) const + { + return __associate(__p, access_property::global{}); + } + _CCCL_HOST_DEVICE constexpr access_property::global __get_property() const noexcept + { + return access_property::global{}; } +}; + +template <> +class __annotated_ptr_base +{ +protected: + static constexpr std::uint64_t __prop = __sm_80::__interleave_normal_demote(); - template <> - inline _CCCL_DEVICE - void* __associate_descriptor(void* __ptr, std::uint64_t __prop) { - NV_IF_ELSE_TARGET(NV_PROVIDES_SM_80,( - return __nv_associate_access_property(__ptr, __prop); - ),( - return __ptr; - )) + constexpr __annotated_ptr_base() noexcept = default; + constexpr __annotated_ptr_base(__annotated_ptr_base const&) = default; + _CCCL_CONSTEXPR_CXX14 __annotated_ptr_base& operator=(const __annotated_ptr_base&) = default; + _CCCL_HOST_DEVICE constexpr __annotated_ptr_base(access_property::normal) noexcept {} + inline _CCCL_DEVICE void* __apply_prop(void* __p) const + { + return __associate(__p, access_property::normal{}); } + _CCCL_HOST_DEVICE constexpr access_property::normal __get_property() const noexcept + { + return access_property::normal{}; + } +}; + +template <> +class __annotated_ptr_base +{ +protected: + static constexpr std::uint64_t __prop = __sm_80::__interleave_persisting(); - template<> - inline _CCCL_DEVICE - void* __associate_descriptor(void* __ptr, access_property::shared) { - return __ptr; + constexpr __annotated_ptr_base() noexcept = default; + constexpr __annotated_ptr_base(__annotated_ptr_base const&) = default; + _CCCL_CONSTEXPR_CXX14 __annotated_ptr_base& operator=(const __annotated_ptr_base&) = default; + _CCCL_HOST_DEVICE constexpr __annotated_ptr_base(access_property::persisting) noexcept {} + inline _CCCL_DEVICE void* __apply_prop(void* __p) const + { + return __associate(__p, access_property::persisting{}); } + _CCCL_HOST_DEVICE constexpr access_property::persisting __get_property() const noexcept + { + return access_property::persisting{}; + } +}; + +template <> +class __annotated_ptr_base +{ +protected: + static constexpr std::uint64_t __prop = __sm_80::__interleave_streaming(); - template - _CCCL_HOST_DEVICE - _Type* __associate(_Type* __ptr, _Property __prop) { - NV_IF_ELSE_TARGET(NV_IS_DEVICE,( - return static_cast<_Type*>(__associate_descriptor( - __associate_address_space(const_cast(static_cast(__ptr)), __prop), - __prop)); - ),( - return __ptr; - )) + constexpr __annotated_ptr_base() noexcept = default; + constexpr __annotated_ptr_base(__annotated_ptr_base const&) = default; + _CCCL_CONSTEXPR_CXX14 __annotated_ptr_base& operator=(const __annotated_ptr_base&) = default; + _CCCL_HOST_DEVICE constexpr __annotated_ptr_base(access_property::streaming) noexcept {} + inline _CCCL_DEVICE void* __apply_prop(void* __p) const + { + return __associate(__p, access_property::streaming{}); + } + _CCCL_HOST_DEVICE constexpr access_property::streaming __get_property() const noexcept + { + return access_property::streaming{}; } +}; +template <> +class __annotated_ptr_base +{ +protected: + std::uint64_t __prop; - template - class __annotated_ptr_base { - using __error = typename _Property::__unknown_access_property_type; - }; - - template<> - class __annotated_ptr_base { - protected: - static constexpr std::uint64_t __prop = 0; - - constexpr __annotated_ptr_base() noexcept = default; - constexpr __annotated_ptr_base(__annotated_ptr_base const&) = default; - _CCCL_CONSTEXPR_CXX14 __annotated_ptr_base& operator=(const __annotated_ptr_base&) = default; - _CCCL_HOST_DEVICE constexpr __annotated_ptr_base(access_property::shared) noexcept {} - inline _CCCL_DEVICE void* __apply_prop(void* __p) const { - return __associate(__p, access_property::shared{}); - } - _CCCL_HOST_DEVICE constexpr access_property::shared __get_property() const noexcept { - return access_property::shared{}; - } - }; - - template<> - class __annotated_ptr_base { - protected: - static constexpr std::uint64_t __prop = __sm_80::__interleave_normal(); - - constexpr __annotated_ptr_base() noexcept = default; - constexpr __annotated_ptr_base(__annotated_ptr_base const&) = default; - _CCCL_CONSTEXPR_CXX14 __annotated_ptr_base& operator=(const __annotated_ptr_base&) = default; - _CCCL_HOST_DEVICE constexpr __annotated_ptr_base(access_property::global) noexcept {} - inline _CCCL_DEVICE void* __apply_prop(void* __p) const { - return __associate(__p, access_property::global{}); - } - _CCCL_HOST_DEVICE constexpr access_property::global __get_property() const noexcept { - return access_property::global{}; - } - }; - - template<> - class __annotated_ptr_base { - protected: - static constexpr std::uint64_t __prop = __sm_80::__interleave_normal_demote(); - - constexpr __annotated_ptr_base() noexcept = default; - constexpr __annotated_ptr_base(__annotated_ptr_base const&) = default; - _CCCL_CONSTEXPR_CXX14 __annotated_ptr_base& operator=(const __annotated_ptr_base&) = default; - _CCCL_HOST_DEVICE constexpr __annotated_ptr_base(access_property::normal) noexcept {} - inline _CCCL_DEVICE void* __apply_prop(void* __p) const { - return __associate(__p, access_property::normal{}); - } - _CCCL_HOST_DEVICE constexpr access_property::normal __get_property() const noexcept { - return access_property::normal{}; - } - }; - - template<> - class __annotated_ptr_base { - protected: - static constexpr std::uint64_t __prop = __sm_80::__interleave_persisting(); - - constexpr __annotated_ptr_base() noexcept = default; - constexpr __annotated_ptr_base(__annotated_ptr_base const&) = default; - _CCCL_CONSTEXPR_CXX14 __annotated_ptr_base& operator=(const __annotated_ptr_base&) = default; - _CCCL_HOST_DEVICE constexpr __annotated_ptr_base(access_property::persisting) noexcept {} - inline _CCCL_DEVICE void* __apply_prop(void* __p) const { - return __associate(__p, access_property::persisting{}); - } - _CCCL_HOST_DEVICE constexpr access_property::persisting __get_property() const noexcept { - return access_property::persisting{}; - } - }; - - template<> - class __annotated_ptr_base { - protected: - static constexpr std::uint64_t __prop = __sm_80::__interleave_streaming(); - - constexpr __annotated_ptr_base() noexcept = default; - constexpr __annotated_ptr_base(__annotated_ptr_base const&) = default; - _CCCL_CONSTEXPR_CXX14 __annotated_ptr_base& operator=(const __annotated_ptr_base&) = default; - _CCCL_HOST_DEVICE constexpr __annotated_ptr_base(access_property::streaming) noexcept {} - inline _CCCL_DEVICE void* __apply_prop(void* __p) const { - return __associate(__p, access_property::streaming{}); - } - _CCCL_HOST_DEVICE constexpr access_property::streaming __get_property() const noexcept { - return access_property::streaming{}; - } - }; - - template<> - class __annotated_ptr_base { - protected: - std::uint64_t __prop; - - _CCCL_HOST_DEVICE constexpr __annotated_ptr_base() noexcept : __prop(access_property()) {} - _CCCL_HOST_DEVICE constexpr __annotated_ptr_base(std::uint64_t __property) noexcept : __prop(__property) {} - _CCCL_HOST_DEVICE constexpr __annotated_ptr_base(access_property __property) noexcept - : __annotated_ptr_base(static_cast(__property)) {} - constexpr __annotated_ptr_base(__annotated_ptr_base const&) = default; - _CCCL_CONSTEXPR_CXX14 __annotated_ptr_base& operator=(const __annotated_ptr_base&) = default; - inline _CCCL_DEVICE void* __apply_prop(void* __p) const { - return __associate(__p, __prop); - } - _CCCL_HOST_DEVICE access_property __get_property() const noexcept { - return reinterpret_cast(const_cast(__prop)); - } - }; + _CCCL_HOST_DEVICE constexpr __annotated_ptr_base() noexcept + : __prop(access_property()) + {} + _CCCL_HOST_DEVICE constexpr __annotated_ptr_base(std::uint64_t __property) noexcept + : __prop(__property) + {} + _CCCL_HOST_DEVICE constexpr __annotated_ptr_base(access_property __property) noexcept + : __annotated_ptr_base(static_cast(__property)) + {} + constexpr __annotated_ptr_base(__annotated_ptr_base const&) = default; + _CCCL_CONSTEXPR_CXX14 __annotated_ptr_base& operator=(const __annotated_ptr_base&) = default; + inline _CCCL_DEVICE void* __apply_prop(void* __p) const + { + return __associate(__p, __prop); + } + _CCCL_HOST_DEVICE access_property __get_property() const noexcept + { + return reinterpret_cast(const_cast(__prop)); + } +}; } // namespace __detail_ap _LIBCUDACXX_END_NAMESPACE_CUDA diff --git a/libcudacxx/include/cuda/std/detail/__config b/libcudacxx/include/cuda/std/detail/__config index f4fba1f24d6..aaa22d7cf6d 100644 --- a/libcudacxx/include/cuda/std/detail/__config +++ b/libcudacxx/include/cuda/std/detail/__config @@ -13,7 +13,7 @@ #include -#define _LIBCUDACXX_CUDA_API_VERSION CCCL_VERSION +#define _LIBCUDACXX_CUDA_API_VERSION CCCL_VERSION #define _LIBCUDACXX_CUDA_API_VERSION_MAJOR CCCL_MAJOR_VERSION #define _LIBCUDACXX_CUDA_API_VERSION_MINOR CCCL_MINOR_VERSION #define _LIBCUDACXX_CUDA_API_VERSION_PATCH CCCL_PATCH_VERSION diff --git a/libcudacxx/include/cuda/std/detail/__pragma_push b/libcudacxx/include/cuda/std/detail/__pragma_push index 5042010790d..e1a507cad04 100644 --- a/libcudacxx/include/cuda/std/detail/__pragma_push +++ b/libcudacxx/include/cuda/std/detail/__pragma_push @@ -8,5 +8,7 @@ // //===----------------------------------------------------------------------===// +// clang-format off #include #include +// clang-format on diff --git a/libcudacxx/include/cuda/std/detail/libcxx/include/__assert b/libcudacxx/include/cuda/std/detail/libcxx/include/__assert index ad54f46dfd6..3568b3b746f 100644 --- a/libcudacxx/include/cuda/std/detail/libcxx/include/__assert +++ b/libcudacxx/include/cuda/std/detail/libcxx/include/__assert @@ -27,28 +27,28 @@ // assertions through the Debug mode previously. // TODO: In LLVM 16, make it an error to define _LIBCUDACXX_DEBUG #if defined(_LIBCUDACXX_DEBUG) -# ifndef _LIBCUDACXX_ENABLE_ASSERTIONS -# define _LIBCUDACXX_ENABLE_ASSERTIONS 1 -# endif +# ifndef _LIBCUDACXX_ENABLE_ASSERTIONS +# define _LIBCUDACXX_ENABLE_ASSERTIONS 1 +# endif #endif // Automatically enable assertions when the debug mode is enabled. #if defined(_LIBCUDACXX_ENABLE_DEBUG_MODE) -# ifndef _LIBCUDACXX_ENABLE_ASSERTIONS -# define _LIBCUDACXX_ENABLE_ASSERTIONS 1 -# endif +# ifndef _LIBCUDACXX_ENABLE_ASSERTIONS +# define _LIBCUDACXX_ENABLE_ASSERTIONS 1 +# endif #endif #ifndef _LIBCUDACXX_ENABLE_ASSERTIONS -# define _LIBCUDACXX_ENABLE_ASSERTIONS _LIBCUDACXX_ENABLE_ASSERTIONS_DEFAULT +# define _LIBCUDACXX_ENABLE_ASSERTIONS _LIBCUDACXX_ENABLE_ASSERTIONS_DEFAULT #endif #if _LIBCUDACXX_ENABLE_ASSERTIONS != 0 && _LIBCUDACXX_ENABLE_ASSERTIONS != 1 -# error "_LIBCUDACXX_ENABLE_ASSERTIONS must be set to 0 or 1" +# error "_LIBCUDACXX_ENABLE_ASSERTIONS must be set to 0 or 1" #endif #if _LIBCUDACXX_ENABLE_ASSERTIONS -# define _LIBCUDACXX_ASSERT(expression, message) \ +# define _LIBCUDACXX_ASSERT(expression, message) \ (_CCCL_DIAG_PUSH \ _CCCL_DIAG_SUPPRESS_CLANG("-Wassume") \ __builtin_expect(static_cast(expression), 1) ? \ @@ -56,13 +56,11 @@ ::_CUDA_VSTD::__libcpp_verbose_abort("%s:%d: assertion %s failed: %s", __FILE__, __LINE__, #expression, message) _CCCL_DIAG_POP) #elif 0 // !defined(_LIBCUDACXX_ASSERTIONS_DISABLE_ASSUME) && __has_builtin(__builtin_assume) -# define _LIBCUDACXX_ASSERT(expression, message) \ - (_CCCL_DIAG_PUSH \ - _CCCL_DIAG_SUPPRESS_CLANG("-Wassume") \ - __builtin_assume(static_cast(expression)) \ - _CCCL_DIAG_POP) +# define _LIBCUDACXX_ASSERT(expression, message) \ + (_CCCL_DIAG_PUSH _CCCL_DIAG_SUPPRESS_CLANG("-Wassume") __builtin_assume(static_cast(expression)) \ + _CCCL_DIAG_POP) #else -# define _LIBCUDACXX_ASSERT(expression, message) ((void)0) +# define _LIBCUDACXX_ASSERT(expression, message) ((void) 0) #endif #endif // _LIBCUDACXX___ASSERT diff --git a/libcudacxx/include/cuda/std/detail/libcxx/include/__availability b/libcudacxx/include/cuda/std/detail/libcxx/include/__availability index 37ac58934ea..f89d2abf1a0 100644 --- a/libcudacxx/include/cuda/std/detail/libcxx/include/__availability +++ b/libcudacxx/include/cuda/std/detail/libcxx/include/__availability @@ -63,226 +63,230 @@ // // [1]: https://clang.llvm.org/docs/AttributeReference.html#availability - // For backwards compatibility, allow users to define _LIBCUDACXX_DISABLE_AVAILABILITY // for a while. #if defined(_LIBCUDACXX_DISABLE_AVAILABILITY) -# if !defined(_LIBCUDACXX_HAS_NO_VENDOR_AVAILABILITY_ANNOTATIONS) -# define _LIBCUDACXX_HAS_NO_VENDOR_AVAILABILITY_ANNOTATIONS -# endif +# if !defined(_LIBCUDACXX_HAS_NO_VENDOR_AVAILABILITY_ANNOTATIONS) +# define _LIBCUDACXX_HAS_NO_VENDOR_AVAILABILITY_ANNOTATIONS +# endif #endif // Availability markup is disabled when building the library, or when the compiler // doesn't support the proper attributes. -#if defined(_LIBCUDACXX_BUILDING_LIBRARY) || \ - defined(_LIBCXXABI_BUILDING_LIBRARY) || \ - !__has_feature(attribute_availability_with_strict) || \ - !__has_feature(attribute_availability_in_templates) || \ - !__has_extension(pragma_clang_attribute_external_declaration) -# if !defined(_LIBCUDACXX_HAS_NO_VENDOR_AVAILABILITY_ANNOTATIONS) -# define _LIBCUDACXX_HAS_NO_VENDOR_AVAILABILITY_ANNOTATIONS -# endif +#if defined(_LIBCUDACXX_BUILDING_LIBRARY) || defined(_LIBCXXABI_BUILDING_LIBRARY) \ + || !__has_feature(attribute_availability_with_strict) || !__has_feature(attribute_availability_in_templates) \ + || !__has_extension(pragma_clang_attribute_external_declaration) +# if !defined(_LIBCUDACXX_HAS_NO_VENDOR_AVAILABILITY_ANNOTATIONS) +# define _LIBCUDACXX_HAS_NO_VENDOR_AVAILABILITY_ANNOTATIONS +# endif #endif #if defined(_LIBCUDACXX_HAS_NO_VENDOR_AVAILABILITY_ANNOTATIONS) - // This controls the availability of std::shared_mutex and std::shared_timed_mutex, - // which were added to the dylib later. -# define _LIBCUDACXX_AVAILABILITY_SHARED_MUTEX +// This controls the availability of std::shared_mutex and std::shared_timed_mutex, +// which were added to the dylib later. +# define _LIBCUDACXX_AVAILABILITY_SHARED_MUTEX // # define _LIBCUDACXX_AVAILABILITY_DISABLE_FTM___cpp_lib_shared_mutex // # define _LIBCUDACXX_AVAILABILITY_DISABLE_FTM___cpp_lib_shared_timed_mutex - // These macros control the availability of std::bad_optional_access and - // other exception types. These were put in the shared library to prevent - // code bloat from every user program defining the vtable for these exception - // types. - // - // Note that when exceptions are disabled, the methods that normally throw - // these exceptions can be used even on older deployment targets, but those - // methods will abort instead of throwing. -# define _LIBCUDACXX_AVAILABILITY_BAD_OPTIONAL_ACCESS -# define _LIBCUDACXX_AVAILABILITY_BAD_VARIANT_ACCESS -# define _LIBCUDACXX_AVAILABILITY_BAD_ANY_CAST - - // This controls the availability of std::uncaught_exceptions(). -# define _LIBCUDACXX_AVAILABILITY_UNCAUGHT_EXCEPTIONS - - // This controls the availability of the sized version of ::operator delete, - // ::operator delete[], and their align_val_t variants, which were all added - // in C++17, and hence not present in early dylibs. -# define _LIBCUDACXX_AVAILABILITY_SIZED_NEW_DELETE - - // This controls the availability of the std::future_error exception. - // - // Note that when exceptions are disabled, the methods that normally throw - // std::future_error can be used even on older deployment targets, but those - // methods will abort instead of throwing. -# define _LIBCUDACXX_AVAILABILITY_FUTURE_ERROR - - // This controls the availability of std::type_info's vtable. - // I can't imagine how using std::type_info can work at all if - // this isn't supported. -# define _LIBCUDACXX_AVAILABILITY_TYPEINFO_VTABLE - - // This controls the availability of std::locale::category members - // (e.g. std::locale::collate), which are defined in the dylib. -# define _LIBCUDACXX_AVAILABILITY_LOCALE_CATEGORY - - // This controls the availability of atomic operations on std::shared_ptr - // (e.g. `std::atomic_store(std::shared_ptr)`), which require a shared - // lock table located in the dylib. -# define _LIBCUDACXX_AVAILABILITY_ATOMIC_SHARED_PTR - - // These macros control the availability of all parts of that - // depend on something in the dylib. -# define _LIBCUDACXX_AVAILABILITY_FILESYSTEM -# define _LIBCUDACXX_AVAILABILITY_FILESYSTEM_PUSH -# define _LIBCUDACXX_AVAILABILITY_FILESYSTEM_POP +// These macros control the availability of std::bad_optional_access and +// other exception types. These were put in the shared library to prevent +// code bloat from every user program defining the vtable for these exception +// types. +// +// Note that when exceptions are disabled, the methods that normally throw +// these exceptions can be used even on older deployment targets, but those +// methods will abort instead of throwing. +# define _LIBCUDACXX_AVAILABILITY_BAD_OPTIONAL_ACCESS +# define _LIBCUDACXX_AVAILABILITY_BAD_VARIANT_ACCESS +# define _LIBCUDACXX_AVAILABILITY_BAD_ANY_CAST + +// This controls the availability of std::uncaught_exceptions(). +# define _LIBCUDACXX_AVAILABILITY_UNCAUGHT_EXCEPTIONS + +// This controls the availability of the sized version of ::operator delete, +// ::operator delete[], and their align_val_t variants, which were all added +// in C++17, and hence not present in early dylibs. +# define _LIBCUDACXX_AVAILABILITY_SIZED_NEW_DELETE + +// This controls the availability of the std::future_error exception. +// +// Note that when exceptions are disabled, the methods that normally throw +// std::future_error can be used even on older deployment targets, but those +// methods will abort instead of throwing. +# define _LIBCUDACXX_AVAILABILITY_FUTURE_ERROR + +// This controls the availability of std::type_info's vtable. +// I can't imagine how using std::type_info can work at all if +// this isn't supported. +# define _LIBCUDACXX_AVAILABILITY_TYPEINFO_VTABLE + +// This controls the availability of std::locale::category members +// (e.g. std::locale::collate), which are defined in the dylib. +# define _LIBCUDACXX_AVAILABILITY_LOCALE_CATEGORY + +// This controls the availability of atomic operations on std::shared_ptr +// (e.g. `std::atomic_store(std::shared_ptr)`), which require a shared +// lock table located in the dylib. +# define _LIBCUDACXX_AVAILABILITY_ATOMIC_SHARED_PTR + +// These macros control the availability of all parts of that +// depend on something in the dylib. +# define _LIBCUDACXX_AVAILABILITY_FILESYSTEM +# define _LIBCUDACXX_AVAILABILITY_FILESYSTEM_PUSH +# define _LIBCUDACXX_AVAILABILITY_FILESYSTEM_POP // # define _LIBCUDACXX_AVAILABILITY_DISABLE_FTM___cpp_lib_filesystem - // This controls the availability of floating-point std::to_chars functions. - // These overloads were added later than the integer overloads. -# define _LIBCUDACXX_AVAILABILITY_TO_CHARS_FLOATING_POINT +// This controls the availability of floating-point std::to_chars functions. +// These overloads were added later than the integer overloads. +# define _LIBCUDACXX_AVAILABILITY_TO_CHARS_FLOATING_POINT - // This controls the availability of the C++20 synchronization library, - // which requires shared library support for various operations - // (see libcxx/src/atomic.cpp). This includes , , - // , and notification functions on std::atomic. -# define _LIBCUDACXX_AVAILABILITY_SYNC +// This controls the availability of the C++20 synchronization library, +// which requires shared library support for various operations +// (see libcxx/src/atomic.cpp). This includes , , +// , and notification functions on std::atomic. +# define _LIBCUDACXX_AVAILABILITY_SYNC // # define _LIBCUDACXX_AVAILABILITY_DISABLE_FTM___cpp_lib_atomic_wait // # define _LIBCUDACXX_AVAILABILITY_DISABLE_FTM___cpp_lib_barrier // # define _LIBCUDACXX_AVAILABILITY_DISABLE_FTM___cpp_lib_latch // # define _LIBCUDACXX_AVAILABILITY_DISABLE_FTM___cpp_lib_semaphore - // This controls the availability of the C++20 format library. - // The library is in development and not ABI stable yet. P2216 is - // retroactively accepted in C++20. This paper contains ABI breaking - // changes. -# define _LIBCUDACXX_AVAILABILITY_FORMAT +// This controls the availability of the C++20 format library. +// The library is in development and not ABI stable yet. P2216 is +// retroactively accepted in C++20. This paper contains ABI breaking +// changes. +# define _LIBCUDACXX_AVAILABILITY_FORMAT // # define _LIBCUDACXX_AVAILABILITY_DISABLE_FTM___cpp_lib_format - // This controls whether the default verbose termination function is - // provided by the library. - // - // Note that when users provide their own custom function, it doesn't - // matter whether the dylib provides a default function, and the - // availability markup can actually give a false positive diagnostic - // (it will think that no function is provided, when in reality the - // user has provided their own). - // - // Users can pass -D_LIBCUDACXX_AVAILABILITY_CUSTOM_VERBOSE_ABORT_PROVIDED - // to the compiler to tell the library not to define its own verbose abort. - // Note that defining this macro but failing to define a custom function - // will lead to a load-time error on back-deployment targets, so it should - // be avoided. +// This controls whether the default verbose termination function is +// provided by the library. +// +// Note that when users provide their own custom function, it doesn't +// matter whether the dylib provides a default function, and the +// availability markup can actually give a false positive diagnostic +// (it will think that no function is provided, when in reality the +// user has provided their own). +// +// Users can pass -D_LIBCUDACXX_AVAILABILITY_CUSTOM_VERBOSE_ABORT_PROVIDED +// to the compiler to tell the library not to define its own verbose abort. +// Note that defining this macro but failing to define a custom function +// will lead to a load-time error on back-deployment targets, so it should +// be avoided. // # define _LIBCUDACXX_HAS_NO_VERBOSE_ABORT_IN_LIBRARY #elif defined(__APPLE__) -# define _LIBCUDACXX_AVAILABILITY_SHARED_MUTEX \ - __attribute__((availability(macos,strict,introduced=10.12))) \ - __attribute__((availability(ios,strict,introduced=10.0))) \ - __attribute__((availability(tvos,strict,introduced=10.0))) \ - __attribute__((availability(watchos,strict,introduced=3.0))) -# if (defined(__ENVIRONMENT_MAC_OS_X_VERSION_MIN_REQUIRED__) && __ENVIRONMENT_MAC_OS_X_VERSION_MIN_REQUIRED__ < 101200) || \ - (defined(__ENVIRONMENT_IPHONE_OS_VERSION_MIN_REQUIRED__) && __ENVIRONMENT_IPHONE_OS_VERSION_MIN_REQUIRED__ < 100000) || \ - (defined(__ENVIRONMENT_TV_OS_VERSION_MIN_REQUIRED__) && __ENVIRONMENT_TV_OS_VERSION_MIN_REQUIRED__ < 100000) || \ - (defined(__ENVIRONMENT_WATCH_OS_VERSION_MIN_REQUIRED__) && __ENVIRONMENT_WATCH_OS_VERSION_MIN_REQUIRED__ < 30000) -# define _LIBCUDACXX_AVAILABILITY_DISABLE_FTM___cpp_lib_shared_mutex -# define _LIBCUDACXX_AVAILABILITY_DISABLE_FTM___cpp_lib_shared_timed_mutex -# endif - - // Note: bad_optional_access & friends were not introduced in the matching - // macOS and iOS versions, so the version mismatch between macOS and others - // is intended. -# define _LIBCUDACXX_AVAILABILITY_BAD_OPTIONAL_ACCESS \ - __attribute__((availability(macos,strict,introduced=10.13))) \ - __attribute__((availability(ios,strict,introduced=12.0))) \ - __attribute__((availability(tvos,strict,introduced=12.0))) \ - __attribute__((availability(watchos,strict,introduced=5.0))) -# define _LIBCUDACXX_AVAILABILITY_BAD_VARIANT_ACCESS \ - _LIBCUDACXX_AVAILABILITY_BAD_OPTIONAL_ACCESS -# define _LIBCUDACXX_AVAILABILITY_BAD_ANY_CAST \ - _LIBCUDACXX_AVAILABILITY_BAD_OPTIONAL_ACCESS - -# define _LIBCUDACXX_AVAILABILITY_UNCAUGHT_EXCEPTIONS \ - __attribute__((availability(macos,strict,introduced=10.12))) \ - __attribute__((availability(ios,strict,introduced=10.0))) \ - __attribute__((availability(tvos,strict,introduced=10.0))) \ - __attribute__((availability(watchos,strict,introduced=3.0))) - -# define _LIBCUDACXX_AVAILABILITY_SIZED_NEW_DELETE \ - __attribute__((availability(macos,strict,introduced=10.12))) \ - __attribute__((availability(ios,strict,introduced=10.0))) \ - __attribute__((availability(tvos,strict,introduced=10.0))) \ - __attribute__((availability(watchos,strict,introduced=3.0))) - -# define _LIBCUDACXX_AVAILABILITY_FUTURE_ERROR \ - __attribute__((availability(ios,strict,introduced=6.0))) - -# define _LIBCUDACXX_AVAILABILITY_TYPEINFO_VTABLE \ - __attribute__((availability(macos,strict,introduced=10.9))) \ - __attribute__((availability(ios,strict,introduced=7.0))) - -# define _LIBCUDACXX_AVAILABILITY_LOCALE_CATEGORY \ - __attribute__((availability(macos,strict,introduced=10.9))) \ - __attribute__((availability(ios,strict,introduced=7.0))) - -# define _LIBCUDACXX_AVAILABILITY_ATOMIC_SHARED_PTR \ - __attribute__((availability(macos,strict,introduced=10.9))) \ - __attribute__((availability(ios,strict,introduced=7.0))) - -# define _LIBCUDACXX_AVAILABILITY_FILESYSTEM \ - __attribute__((availability(macos,strict,introduced=10.15))) \ - __attribute__((availability(ios,strict,introduced=13.0))) \ - __attribute__((availability(tvos,strict,introduced=13.0))) \ - __attribute__((availability(watchos,strict,introduced=6.0))) -# define _LIBCUDACXX_AVAILABILITY_FILESYSTEM_PUSH \ - _Pragma("clang attribute push(__attribute__((availability(macos,strict,introduced=10.15))), apply_to=any(function,record))") \ - _Pragma("clang attribute push(__attribute__((availability(ios,strict,introduced=13.0))), apply_to=any(function,record))") \ - _Pragma("clang attribute push(__attribute__((availability(tvos,strict,introduced=13.0))), apply_to=any(function,record))") \ - _Pragma("clang attribute push(__attribute__((availability(watchos,strict,introduced=6.0))), apply_to=any(function,record))") -# define _LIBCUDACXX_AVAILABILITY_FILESYSTEM_POP \ - _Pragma("clang attribute pop") \ - _Pragma("clang attribute pop") \ - _Pragma("clang attribute pop") \ - _Pragma("clang attribute pop") -# if (defined(__ENVIRONMENT_MAC_OS_X_VERSION_MIN_REQUIRED__) && __ENVIRONMENT_MAC_OS_X_VERSION_MIN_REQUIRED__ < 101500) || \ - (defined(__ENVIRONMENT_IPHONE_OS_VERSION_MIN_REQUIRED__) && __ENVIRONMENT_IPHONE_OS_VERSION_MIN_REQUIRED__ < 130000) || \ - (defined(__ENVIRONMENT_TV_OS_VERSION_MIN_REQUIRED__) && __ENVIRONMENT_TV_OS_VERSION_MIN_REQUIRED__ < 130000) || \ - (defined(__ENVIRONMENT_WATCH_OS_VERSION_MIN_REQUIRED__) && __ENVIRONMENT_WATCH_OS_VERSION_MIN_REQUIRED__ < 60000) -# define _LIBCUDACXX_AVAILABILITY_DISABLE_FTM___cpp_lib_filesystem -# endif - -# define _LIBCUDACXX_AVAILABILITY_TO_CHARS_FLOATING_POINT \ - __attribute__((unavailable)) - -# define _LIBCUDACXX_AVAILABILITY_SYNC \ - __attribute__((availability(macos,strict,introduced=11.0))) \ - __attribute__((availability(ios,strict,introduced=14.0))) \ - __attribute__((availability(tvos,strict,introduced=14.0))) \ - __attribute__((availability(watchos,strict,introduced=7.0))) -# if (defined(__ENVIRONMENT_MAC_OS_X_VERSION_MIN_REQUIRED__) && __ENVIRONMENT_MAC_OS_X_VERSION_MIN_REQUIRED__ < 110000) || \ - (defined(__ENVIRONMENT_IPHONE_OS_VERSION_MIN_REQUIRED__) && __ENVIRONMENT_IPHONE_OS_VERSION_MIN_REQUIRED__ < 140000) || \ - (defined(__ENVIRONMENT_TV_OS_VERSION_MIN_REQUIRED__) && __ENVIRONMENT_TV_OS_VERSION_MIN_REQUIRED__ < 140000) || \ - (defined(__ENVIRONMENT_WATCH_OS_VERSION_MIN_REQUIRED__) && __ENVIRONMENT_WATCH_OS_VERSION_MIN_REQUIRED__ < 70000) -# define _LIBCUDACXX_AVAILABILITY_DISABLE_FTM___cpp_lib_atomic_wait -# define _LIBCUDACXX_AVAILABILITY_DISABLE_FTM___cpp_lib_barrier -# define _LIBCUDACXX_AVAILABILITY_DISABLE_FTM___cpp_lib_latch -# define _LIBCUDACXX_AVAILABILITY_DISABLE_FTM___cpp_lib_semaphore -# endif - -# define _LIBCUDACXX_AVAILABILITY_FORMAT \ - __attribute__((unavailable)) -# define _LIBCUDACXX_AVAILABILITY_DISABLE_FTM___cpp_lib_format - -# define _LIBCUDACXX_HAS_NO_VERBOSE_ABORT_IN_LIBRARY +# define _LIBCUDACXX_AVAILABILITY_SHARED_MUTEX \ + __attribute__((availability(macos, strict, introduced = 10.12))) \ + __attribute__((availability(ios, strict, introduced = 10.0))) \ + __attribute__((availability(tvos, strict, introduced = 10.0))) \ + __attribute__((availability(watchos, strict, introduced = 3.0))) +# if (defined(__ENVIRONMENT_MAC_OS_X_VERSION_MIN_REQUIRED__) \ + && __ENVIRONMENT_MAC_OS_X_VERSION_MIN_REQUIRED__ < 101200) \ + || (defined(__ENVIRONMENT_IPHONE_OS_VERSION_MIN_REQUIRED__) \ + && __ENVIRONMENT_IPHONE_OS_VERSION_MIN_REQUIRED__ < 100000) \ + || (defined(__ENVIRONMENT_TV_OS_VERSION_MIN_REQUIRED__) && __ENVIRONMENT_TV_OS_VERSION_MIN_REQUIRED__ < 100000) \ + || (defined(__ENVIRONMENT_WATCH_OS_VERSION_MIN_REQUIRED__) \ + && __ENVIRONMENT_WATCH_OS_VERSION_MIN_REQUIRED__ < 30000) +# define _LIBCUDACXX_AVAILABILITY_DISABLE_FTM___cpp_lib_shared_mutex +# define _LIBCUDACXX_AVAILABILITY_DISABLE_FTM___cpp_lib_shared_timed_mutex +# endif + +// Note: bad_optional_access & friends were not introduced in the matching +// macOS and iOS versions, so the version mismatch between macOS and others +// is intended. +# define _LIBCUDACXX_AVAILABILITY_BAD_OPTIONAL_ACCESS \ + __attribute__((availability(macos, strict, introduced = 10.13))) \ + __attribute__((availability(ios, strict, introduced = 12.0))) \ + __attribute__((availability(tvos, strict, introduced = 12.0))) \ + __attribute__((availability(watchos, strict, introduced = 5.0))) +# define _LIBCUDACXX_AVAILABILITY_BAD_VARIANT_ACCESS _LIBCUDACXX_AVAILABILITY_BAD_OPTIONAL_ACCESS +# define _LIBCUDACXX_AVAILABILITY_BAD_ANY_CAST _LIBCUDACXX_AVAILABILITY_BAD_OPTIONAL_ACCESS + +# define _LIBCUDACXX_AVAILABILITY_UNCAUGHT_EXCEPTIONS \ + __attribute__((availability(macos, strict, introduced = 10.12))) \ + __attribute__((availability(ios, strict, introduced = 10.0))) \ + __attribute__((availability(tvos, strict, introduced = 10.0))) \ + __attribute__((availability(watchos, strict, introduced = 3.0))) + +# define _LIBCUDACXX_AVAILABILITY_SIZED_NEW_DELETE \ + __attribute__((availability(macos, strict, introduced = 10.12))) \ + __attribute__((availability(ios, strict, introduced = 10.0))) \ + __attribute__((availability(tvos, strict, introduced = 10.0))) \ + __attribute__((availability(watchos, strict, introduced = 3.0))) + +# define _LIBCUDACXX_AVAILABILITY_FUTURE_ERROR __attribute__((availability(ios, strict, introduced = 6.0))) + +# define _LIBCUDACXX_AVAILABILITY_TYPEINFO_VTABLE \ + __attribute__((availability(macos, strict, introduced = 10.9))) \ + __attribute__((availability(ios, strict, introduced = 7.0))) + +# define _LIBCUDACXX_AVAILABILITY_LOCALE_CATEGORY \ + __attribute__((availability(macos, strict, introduced = 10.9))) \ + __attribute__((availability(ios, strict, introduced = 7.0))) + +# define _LIBCUDACXX_AVAILABILITY_ATOMIC_SHARED_PTR \ + __attribute__((availability(macos, strict, introduced = 10.9))) \ + __attribute__((availability(ios, strict, introduced = 7.0))) + +# define _LIBCUDACXX_AVAILABILITY_FILESYSTEM \ + __attribute__((availability(macos, strict, introduced = 10.15))) \ + __attribute__((availability(ios, strict, introduced = 13.0))) \ + __attribute__((availability(tvos, strict, introduced = 13.0))) \ + __attribute__((availability(watchos, strict, introduced = 6.0))) +# define _LIBCUDACXX_AVAILABILITY_FILESYSTEM_PUSH \ + _Pragma("clang attribute push(__attribute__((availability(macos,strict,introduced=10.15))), " \ + "apply_to=any(function,record))") \ + _Pragma("clang attribute push(__attribute__((availability(ios,strict,introduced=13.0))), " \ + "apply_to=any(function,record))") \ + _Pragma("clang attribute push(__attribute__((availability(tvos,strict,introduced=13.0))), " \ + "apply_to=any(function,record))") \ + _Pragma("clang attribute push(__attribute__((availability(watchos,strict,introduced=6.0))), " \ + "apply_to=any(function,record))") +# define _LIBCUDACXX_AVAILABILITY_FILESYSTEM_POP \ + _Pragma("clang attribute pop") _Pragma("clang attribute pop") _Pragma("clang attribute pop") \ + _Pragma("clang attribute pop") +# if (defined(__ENVIRONMENT_MAC_OS_X_VERSION_MIN_REQUIRED__) \ + && __ENVIRONMENT_MAC_OS_X_VERSION_MIN_REQUIRED__ < 101500) \ + || (defined(__ENVIRONMENT_IPHONE_OS_VERSION_MIN_REQUIRED__) \ + && __ENVIRONMENT_IPHONE_OS_VERSION_MIN_REQUIRED__ < 130000) \ + || (defined(__ENVIRONMENT_TV_OS_VERSION_MIN_REQUIRED__) && __ENVIRONMENT_TV_OS_VERSION_MIN_REQUIRED__ < 130000) \ + || (defined(__ENVIRONMENT_WATCH_OS_VERSION_MIN_REQUIRED__) \ + && __ENVIRONMENT_WATCH_OS_VERSION_MIN_REQUIRED__ < 60000) +# define _LIBCUDACXX_AVAILABILITY_DISABLE_FTM___cpp_lib_filesystem +# endif + +# define _LIBCUDACXX_AVAILABILITY_TO_CHARS_FLOATING_POINT __attribute__((unavailable)) + +# define _LIBCUDACXX_AVAILABILITY_SYNC \ + __attribute__((availability(macos, strict, introduced = 11.0))) \ + __attribute__((availability(ios, strict, introduced = 14.0))) \ + __attribute__((availability(tvos, strict, introduced = 14.0))) \ + __attribute__((availability(watchos, strict, introduced = 7.0))) +# if (defined(__ENVIRONMENT_MAC_OS_X_VERSION_MIN_REQUIRED__) \ + && __ENVIRONMENT_MAC_OS_X_VERSION_MIN_REQUIRED__ < 110000) \ + || (defined(__ENVIRONMENT_IPHONE_OS_VERSION_MIN_REQUIRED__) \ + && __ENVIRONMENT_IPHONE_OS_VERSION_MIN_REQUIRED__ < 140000) \ + || (defined(__ENVIRONMENT_TV_OS_VERSION_MIN_REQUIRED__) && __ENVIRONMENT_TV_OS_VERSION_MIN_REQUIRED__ < 140000) \ + || (defined(__ENVIRONMENT_WATCH_OS_VERSION_MIN_REQUIRED__) \ + && __ENVIRONMENT_WATCH_OS_VERSION_MIN_REQUIRED__ < 70000) +# define _LIBCUDACXX_AVAILABILITY_DISABLE_FTM___cpp_lib_atomic_wait +# define _LIBCUDACXX_AVAILABILITY_DISABLE_FTM___cpp_lib_barrier +# define _LIBCUDACXX_AVAILABILITY_DISABLE_FTM___cpp_lib_latch +# define _LIBCUDACXX_AVAILABILITY_DISABLE_FTM___cpp_lib_semaphore +# endif + +# define _LIBCUDACXX_AVAILABILITY_FORMAT __attribute__((unavailable)) +# define _LIBCUDACXX_AVAILABILITY_DISABLE_FTM___cpp_lib_format + +# define _LIBCUDACXX_HAS_NO_VERBOSE_ABORT_IN_LIBRARY #else // ...New vendors can add availability markup here... -# error "It looks like you're trying to enable vendor availability markup, but you haven't defined the corresponding macros yet!" +# error \ + "It looks like you're trying to enable vendor availability markup, but you haven't defined the corresponding macros yet!" #endif @@ -290,15 +294,15 @@ // Those are defined in terms of the availability attributes above, and // should not be vendor-specific. #if defined(_LIBCUDACXX_NO_EXCEPTIONS) -# define _LIBCUDACXX_AVAILABILITY_FUTURE -# define _LIBCUDACXX_AVAILABILITY_THROW_BAD_ANY_CAST -# define _LIBCUDACXX_AVAILABILITY_THROW_BAD_OPTIONAL_ACCESS -# define _LIBCUDACXX_AVAILABILITY_THROW_BAD_VARIANT_ACCESS +# define _LIBCUDACXX_AVAILABILITY_FUTURE +# define _LIBCUDACXX_AVAILABILITY_THROW_BAD_ANY_CAST +# define _LIBCUDACXX_AVAILABILITY_THROW_BAD_OPTIONAL_ACCESS +# define _LIBCUDACXX_AVAILABILITY_THROW_BAD_VARIANT_ACCESS #else -# define _LIBCUDACXX_AVAILABILITY_FUTURE _LIBCUDACXX_AVAILABILITY_FUTURE_ERROR -# define _LIBCUDACXX_AVAILABILITY_THROW_BAD_ANY_CAST _LIBCUDACXX_AVAILABILITY_BAD_ANY_CAST -# define _LIBCUDACXX_AVAILABILITY_THROW_BAD_OPTIONAL_ACCESS _LIBCUDACXX_AVAILABILITY_BAD_OPTIONAL_ACCESS -# define _LIBCUDACXX_AVAILABILITY_THROW_BAD_VARIANT_ACCESS _LIBCUDACXX_AVAILABILITY_BAD_VARIANT_ACCESS +# define _LIBCUDACXX_AVAILABILITY_FUTURE _LIBCUDACXX_AVAILABILITY_FUTURE_ERROR +# define _LIBCUDACXX_AVAILABILITY_THROW_BAD_ANY_CAST _LIBCUDACXX_AVAILABILITY_BAD_ANY_CAST +# define _LIBCUDACXX_AVAILABILITY_THROW_BAD_OPTIONAL_ACCESS _LIBCUDACXX_AVAILABILITY_BAD_OPTIONAL_ACCESS +# define _LIBCUDACXX_AVAILABILITY_THROW_BAD_VARIANT_ACCESS _LIBCUDACXX_AVAILABILITY_BAD_VARIANT_ACCESS #endif #endif // _LIBCUDACXX___AVAILABILITY diff --git a/libcudacxx/include/cuda/std/detail/libcxx/include/__bit_reference b/libcudacxx/include/cuda/std/detail/libcxx/include/__bit_reference index 4ce42eb4c6a..88325c3d5c9 100644 --- a/libcudacxx/include/cuda/std/detail/libcxx/include/__bit_reference +++ b/libcudacxx/include/cuda/std/detail/libcxx/include/__bit_reference @@ -10,9 +10,9 @@ #ifndef _LIBCUDACXX___BIT_REFERENCE #define _LIBCUDACXX___BIT_REFERENCE -##include -#include +##include #include +#include #if defined(_CCCL_IMPLICIT_SYSTEM_HEADER_GCC) # pragma GCC system_header @@ -22,229 +22,259 @@ # pragma system_header #endif // no system header -_LIBCUDACXX_PUSH_MACROS + _LIBCUDACXX_PUSH_MACROS #include <__undef_macros> + _LIBCUDACXX_BEGIN_NAMESPACE_STD -_LIBCUDACXX_BEGIN_NAMESPACE_STD - -template class __bit_iterator; -template class __bit_const_reference; +template +class __bit_iterator; +template +class __bit_const_reference; template struct __has_storage_type { - static const bool value = false; + static const bool value = false; }; template ::value> class __bit_reference { - typedef typename _Cp::__storage_type __storage_type; - typedef typename _Cp::__storage_pointer __storage_pointer; - - __storage_pointer __seg_; - __storage_type __mask_; + typedef typename _Cp::__storage_type __storage_type; + typedef typename _Cp::__storage_pointer __storage_pointer; - friend typename _Cp::__self; + __storage_pointer __seg_; + __storage_type __mask_; - friend class __bit_const_reference<_Cp>; - friend class __bit_iterator<_Cp, false>; -public: - _LIBCUDACXX_INLINE_VISIBILITY - __bit_reference(const __bit_reference&) = default; + friend typename _Cp::__self; - _LIBCUDACXX_INLINE_VISIBILITY operator bool() const noexcept - {return static_cast(*__seg_ & __mask_);} - _LIBCUDACXX_INLINE_VISIBILITY bool operator ~() const noexcept - {return !static_cast(*this);} + friend class __bit_const_reference<_Cp>; + friend class __bit_iterator<_Cp, false>; - _LIBCUDACXX_INLINE_VISIBILITY - __bit_reference& operator=(bool __x) noexcept +public: + _LIBCUDACXX_INLINE_VISIBILITY __bit_reference(const __bit_reference&) = default; + + _LIBCUDACXX_INLINE_VISIBILITY operator bool() const noexcept + { + return static_cast(*__seg_ & __mask_); + } + _LIBCUDACXX_INLINE_VISIBILITY bool operator~() const noexcept + { + return !static_cast(*this); + } + + _LIBCUDACXX_INLINE_VISIBILITY __bit_reference& operator=(bool __x) noexcept + { + if (__x) { - if (__x) - *__seg_ |= __mask_; - else - *__seg_ &= ~__mask_; - return *this; + *__seg_ |= __mask_; } + else + { + *__seg_ &= ~__mask_; + } + return *this; + } + + _LIBCUDACXX_INLINE_VISIBILITY __bit_reference& operator=(const __bit_reference& __x) noexcept + { + return operator=(static_cast(__x)); + } + + _LIBCUDACXX_INLINE_VISIBILITY void flip() noexcept + { + *__seg_ ^= __mask_; + } + _LIBCUDACXX_INLINE_VISIBILITY __bit_iterator<_Cp, false> operator&() const noexcept + { + return __bit_iterator<_Cp, false>(__seg_, static_cast(__libcpp_ctz(__mask_))); + } - _LIBCUDACXX_INLINE_VISIBILITY - __bit_reference& operator=(const __bit_reference& __x) noexcept - {return operator=(static_cast(__x));} - - _LIBCUDACXX_INLINE_VISIBILITY void flip() noexcept {*__seg_ ^= __mask_;} - _LIBCUDACXX_INLINE_VISIBILITY __bit_iterator<_Cp, false> operator&() const noexcept - {return __bit_iterator<_Cp, false>(__seg_, static_cast(__libcpp_ctz(__mask_)));} private: - _LIBCUDACXX_INLINE_VISIBILITY - __bit_reference(__storage_pointer __s, __storage_type __m) noexcept - : __seg_(__s), __mask_(__m) {} + _LIBCUDACXX_INLINE_VISIBILITY __bit_reference(__storage_pointer __s, __storage_type __m) noexcept + : __seg_(__s) + , __mask_(__m) + {} }; template class __bit_reference<_Cp, false> -{ -}; +{}; template -inline _LIBCUDACXX_INLINE_VISIBILITY -void -swap(__bit_reference<_Cp> __x, __bit_reference<_Cp> __y) noexcept +inline _LIBCUDACXX_INLINE_VISIBILITY void swap(__bit_reference<_Cp> __x, __bit_reference<_Cp> __y) noexcept { - bool __t = __x; - __x = __y; - __y = __t; + bool __t = __x; + __x = __y; + __y = __t; } template -inline _LIBCUDACXX_INLINE_VISIBILITY -void -swap(__bit_reference<_Cp> __x, __bit_reference<_Dp> __y) noexcept +inline _LIBCUDACXX_INLINE_VISIBILITY void swap(__bit_reference<_Cp> __x, __bit_reference<_Dp> __y) noexcept { - bool __t = __x; - __x = __y; - __y = __t; + bool __t = __x; + __x = __y; + __y = __t; } template -inline _LIBCUDACXX_INLINE_VISIBILITY -void -swap(__bit_reference<_Cp> __x, bool& __y) noexcept +inline _LIBCUDACXX_INLINE_VISIBILITY void swap(__bit_reference<_Cp> __x, bool& __y) noexcept { - bool __t = __x; - __x = __y; - __y = __t; + bool __t = __x; + __x = __y; + __y = __t; } template -inline _LIBCUDACXX_INLINE_VISIBILITY -void -swap(bool& __x, __bit_reference<_Cp> __y) noexcept +inline _LIBCUDACXX_INLINE_VISIBILITY void swap(bool& __x, __bit_reference<_Cp> __y) noexcept { - bool __t = __x; - __x = __y; - __y = __t; + bool __t = __x; + __x = __y; + __y = __t; } template class __bit_const_reference { - typedef typename _Cp::__storage_type __storage_type; - typedef typename _Cp::__const_storage_pointer __storage_pointer; + typedef typename _Cp::__storage_type __storage_type; + typedef typename _Cp::__const_storage_pointer __storage_pointer; + + __storage_pointer __seg_; + __storage_type __mask_; - __storage_pointer __seg_; - __storage_type __mask_; + friend typename _Cp::__self; + friend class __bit_iterator<_Cp, true>; - friend typename _Cp::__self; - friend class __bit_iterator<_Cp, true>; public: - _LIBCUDACXX_INLINE_VISIBILITY - __bit_const_reference(const __bit_const_reference&) = default; + _LIBCUDACXX_INLINE_VISIBILITY __bit_const_reference(const __bit_const_reference&) = default; + + _LIBCUDACXX_INLINE_VISIBILITY __bit_const_reference(const __bit_reference<_Cp>& __x) noexcept + : __seg_(__x.__seg_) + , __mask_(__x.__mask_) + {} - _LIBCUDACXX_INLINE_VISIBILITY - __bit_const_reference(const __bit_reference<_Cp>& __x) noexcept - : __seg_(__x.__seg_), __mask_(__x.__mask_) {} + _LIBCUDACXX_INLINE_VISIBILITY constexpr operator bool() const noexcept + { + return static_cast(*__seg_ & __mask_); + } - _LIBCUDACXX_INLINE_VISIBILITY constexpr operator bool() const noexcept - {return static_cast(*__seg_ & __mask_);} + _LIBCUDACXX_INLINE_VISIBILITY __bit_iterator<_Cp, true> operator&() const noexcept + { + return __bit_iterator<_Cp, true>(__seg_, static_cast(__libcpp_ctz(__mask_))); + } - _LIBCUDACXX_INLINE_VISIBILITY __bit_iterator<_Cp, true> operator&() const noexcept - {return __bit_iterator<_Cp, true>(__seg_, static_cast(__libcpp_ctz(__mask_)));} private: - _LIBCUDACXX_INLINE_VISIBILITY - constexpr - __bit_const_reference(__storage_pointer __s, __storage_type __m) noexcept - : __seg_(__s), __mask_(__m) {} + _LIBCUDACXX_INLINE_VISIBILITY constexpr __bit_const_reference(__storage_pointer __s, __storage_type __m) noexcept + : __seg_(__s) + , __mask_(__m) + {} - __bit_const_reference& operator=(const __bit_const_reference&) = delete; + __bit_const_reference& operator=(const __bit_const_reference&) = delete; }; // find template -__bit_iterator<_Cp, _IsConst> -__find_bool_true(__bit_iterator<_Cp, _IsConst> __first, typename _Cp::size_type __n) +__bit_iterator<_Cp, _IsConst> __find_bool_true(__bit_iterator<_Cp, _IsConst> __first, typename _Cp::size_type __n) { - typedef __bit_iterator<_Cp, _IsConst> _It; - typedef typename _It::__storage_type __storage_type; - static const int __bits_per_word = _It::__bits_per_word; - // do first partial word - if (__first.__ctz_ != 0) + typedef __bit_iterator<_Cp, _IsConst> _It; + typedef typename _It::__storage_type __storage_type; + static const int __bits_per_word = _It::__bits_per_word; + // do first partial word + if (__first.__ctz_ != 0) + { + __storage_type __clz_f = static_cast<__storage_type>(__bits_per_word - __first.__ctz_); + __storage_type __dn = _CUDA_VSTD::min(__clz_f, __n); + __storage_type __m = (~__storage_type(0) << __first.__ctz_) & (~__storage_type(0) >> (__clz_f - __dn)); + __storage_type __b = *__first.__seg_ & __m; + if (__b) { - __storage_type __clz_f = static_cast<__storage_type>(__bits_per_word - __first.__ctz_); - __storage_type __dn = _CUDA_VSTD::min(__clz_f, __n); - __storage_type __m = (~__storage_type(0) << __first.__ctz_) & (~__storage_type(0) >> (__clz_f - __dn)); - __storage_type __b = *__first.__seg_ & __m; - if (__b) - return _It(__first.__seg_, static_cast(_CUDA_VSTD::__libcpp_ctz(__b))); - if (__n == __dn) - return __first + __n; - __n -= __dn; - ++__first.__seg_; + return _It(__first.__seg_, static_cast(_CUDA_VSTD::__libcpp_ctz(__b))); } - // do middle whole words - for (; __n >= __bits_per_word; ++__first.__seg_, __n -= __bits_per_word) - if (*__first.__seg_) - return _It(__first.__seg_, static_cast(_CUDA_VSTD::__libcpp_ctz(*__first.__seg_))); - // do last partial word - if (__n > 0) + if (__n == __dn) { - __storage_type __m = ~__storage_type(0) >> (__bits_per_word - __n); - __storage_type __b = *__first.__seg_ & __m; - if (__b) - return _It(__first.__seg_, static_cast(_CUDA_VSTD::__libcpp_ctz(__b))); + return __first + __n; } - return _It(__first.__seg_, static_cast(__n)); + __n -= __dn; + ++__first.__seg_; + } + // do middle whole words + for (; __n >= __bits_per_word; ++__first.__seg_, __n -= __bits_per_word) + { + if (*__first.__seg_) + { + return _It(__first.__seg_, static_cast(_CUDA_VSTD::__libcpp_ctz(*__first.__seg_))); + } + } + // do last partial word + if (__n > 0) + { + __storage_type __m = ~__storage_type(0) >> (__bits_per_word - __n); + __storage_type __b = *__first.__seg_ & __m; + if (__b) + { + return _It(__first.__seg_, static_cast(_CUDA_VSTD::__libcpp_ctz(__b))); + } + } + return _It(__first.__seg_, static_cast(__n)); } template -__bit_iterator<_Cp, _IsConst> -__find_bool_false(__bit_iterator<_Cp, _IsConst> __first, typename _Cp::size_type __n) +__bit_iterator<_Cp, _IsConst> __find_bool_false(__bit_iterator<_Cp, _IsConst> __first, typename _Cp::size_type __n) { - typedef __bit_iterator<_Cp, _IsConst> _It; - typedef typename _It::__storage_type __storage_type; - const int __bits_per_word = _It::__bits_per_word; - // do first partial word - if (__first.__ctz_ != 0) + typedef __bit_iterator<_Cp, _IsConst> _It; + typedef typename _It::__storage_type __storage_type; + const int __bits_per_word = _It::__bits_per_word; + // do first partial word + if (__first.__ctz_ != 0) + { + __storage_type __clz_f = static_cast<__storage_type>(__bits_per_word - __first.__ctz_); + __storage_type __dn = _CUDA_VSTD::min(__clz_f, __n); + __storage_type __m = (~__storage_type(0) << __first.__ctz_) & (~__storage_type(0) >> (__clz_f - __dn)); + __storage_type __b = ~*__first.__seg_ & __m; + if (__b) { - __storage_type __clz_f = static_cast<__storage_type>(__bits_per_word - __first.__ctz_); - __storage_type __dn = _CUDA_VSTD::min(__clz_f, __n); - __storage_type __m = (~__storage_type(0) << __first.__ctz_) & (~__storage_type(0) >> (__clz_f - __dn)); - __storage_type __b = ~*__first.__seg_ & __m; - if (__b) - return _It(__first.__seg_, static_cast(_CUDA_VSTD::__libcpp_ctz(__b))); - if (__n == __dn) - return __first + __n; - __n -= __dn; - ++__first.__seg_; + return _It(__first.__seg_, static_cast(_CUDA_VSTD::__libcpp_ctz(__b))); } - // do middle whole words - for (; __n >= __bits_per_word; ++__first.__seg_, __n -= __bits_per_word) + if (__n == __dn) { - __storage_type __b = ~*__first.__seg_; - if (__b) - return _It(__first.__seg_, static_cast(_CUDA_VSTD::__libcpp_ctz(__b))); + return __first + __n; } - // do last partial word - if (__n > 0) + __n -= __dn; + ++__first.__seg_; + } + // do middle whole words + for (; __n >= __bits_per_word; ++__first.__seg_, __n -= __bits_per_word) + { + __storage_type __b = ~*__first.__seg_; + if (__b) { - __storage_type __m = ~__storage_type(0) >> (__bits_per_word - __n); - __storage_type __b = ~*__first.__seg_ & __m; - if (__b) - return _It(__first.__seg_, static_cast(_CUDA_VSTD::__libcpp_ctz(__b))); + return _It(__first.__seg_, static_cast(_CUDA_VSTD::__libcpp_ctz(__b))); } - return _It(__first.__seg_, static_cast(__n)); + } + // do last partial word + if (__n > 0) + { + __storage_type __m = ~__storage_type(0) >> (__bits_per_word - __n); + __storage_type __b = ~*__first.__seg_ & __m; + if (__b) + { + return _It(__first.__seg_, static_cast(_CUDA_VSTD::__libcpp_ctz(__b))); + } + } + return _It(__first.__seg_, static_cast(__n)); } template -inline _LIBCUDACXX_INLINE_VISIBILITY -__bit_iterator<_Cp, _IsConst> +inline _LIBCUDACXX_INLINE_VISIBILITY __bit_iterator<_Cp, _IsConst> find(__bit_iterator<_Cp, _IsConst> __first, __bit_iterator<_Cp, _IsConst> __last, const _Tp& __value_) { - if (static_cast(__value_)) - return __find_bool_true(__first, static_cast(__last - __first)); - return __find_bool_false(__first, static_cast(__last - __first)); + if (static_cast(__value_)) + { + return __find_bool_true(__first, static_cast(__last - __first)); + } + return __find_bool_false(__first, static_cast(__last - __first)); } // count @@ -253,627 +283,633 @@ template typename __bit_iterator<_Cp, _IsConst>::difference_type __count_bool_true(__bit_iterator<_Cp, _IsConst> __first, typename _Cp::size_type __n) { - typedef __bit_iterator<_Cp, _IsConst> _It; - typedef typename _It::__storage_type __storage_type; - typedef typename _It::difference_type difference_type; - const int __bits_per_word = _It::__bits_per_word; - difference_type __r = 0; - // do first partial word - if (__first.__ctz_ != 0) - { - __storage_type __clz_f = static_cast<__storage_type>(__bits_per_word - __first.__ctz_); - __storage_type __dn = _CUDA_VSTD::min(__clz_f, __n); - __storage_type __m = (~__storage_type(0) << __first.__ctz_) & (~__storage_type(0) >> (__clz_f - __dn)); - __r = _CUDA_VSTD::__libcpp_popcount(*__first.__seg_ & __m); - __n -= __dn; - ++__first.__seg_; - } - // do middle whole words - for (; __n >= __bits_per_word; ++__first.__seg_, __n -= __bits_per_word) - __r += _CUDA_VSTD::__libcpp_popcount(*__first.__seg_); - // do last partial word - if (__n > 0) - { - __storage_type __m = ~__storage_type(0) >> (__bits_per_word - __n); - __r += _CUDA_VSTD::__libcpp_popcount(*__first.__seg_ & __m); - } - return __r; + typedef __bit_iterator<_Cp, _IsConst> _It; + typedef typename _It::__storage_type __storage_type; + typedef typename _It::difference_type difference_type; + const int __bits_per_word = _It::__bits_per_word; + difference_type __r = 0; + // do first partial word + if (__first.__ctz_ != 0) + { + __storage_type __clz_f = static_cast<__storage_type>(__bits_per_word - __first.__ctz_); + __storage_type __dn = _CUDA_VSTD::min(__clz_f, __n); + __storage_type __m = (~__storage_type(0) << __first.__ctz_) & (~__storage_type(0) >> (__clz_f - __dn)); + __r = _CUDA_VSTD::__libcpp_popcount(*__first.__seg_ & __m); + __n -= __dn; + ++__first.__seg_; + } + // do middle whole words + for (; __n >= __bits_per_word; ++__first.__seg_, __n -= __bits_per_word) + { + __r += _CUDA_VSTD::__libcpp_popcount(*__first.__seg_); + } + // do last partial word + if (__n > 0) + { + __storage_type __m = ~__storage_type(0) >> (__bits_per_word - __n); + __r += _CUDA_VSTD::__libcpp_popcount(*__first.__seg_ & __m); + } + return __r; } template typename __bit_iterator<_Cp, _IsConst>::difference_type __count_bool_false(__bit_iterator<_Cp, _IsConst> __first, typename _Cp::size_type __n) { - typedef __bit_iterator<_Cp, _IsConst> _It; - typedef typename _It::__storage_type __storage_type; - typedef typename _It::difference_type difference_type; - const int __bits_per_word = _It::__bits_per_word; - difference_type __r = 0; - // do first partial word - if (__first.__ctz_ != 0) - { - __storage_type __clz_f = static_cast<__storage_type>(__bits_per_word - __first.__ctz_); - __storage_type __dn = _CUDA_VSTD::min(__clz_f, __n); - __storage_type __m = (~__storage_type(0) << __first.__ctz_) & (~__storage_type(0) >> (__clz_f - __dn)); - __r = _CUDA_VSTD::__libcpp_popcount(~*__first.__seg_ & __m); - __n -= __dn; - ++__first.__seg_; - } - // do middle whole words - for (; __n >= __bits_per_word; ++__first.__seg_, __n -= __bits_per_word) - __r += _CUDA_VSTD::__libcpp_popcount(~*__first.__seg_); - // do last partial word - if (__n > 0) - { - __storage_type __m = ~__storage_type(0) >> (__bits_per_word - __n); - __r += _CUDA_VSTD::__libcpp_popcount(~*__first.__seg_ & __m); - } - return __r; + typedef __bit_iterator<_Cp, _IsConst> _It; + typedef typename _It::__storage_type __storage_type; + typedef typename _It::difference_type difference_type; + const int __bits_per_word = _It::__bits_per_word; + difference_type __r = 0; + // do first partial word + if (__first.__ctz_ != 0) + { + __storage_type __clz_f = static_cast<__storage_type>(__bits_per_word - __first.__ctz_); + __storage_type __dn = _CUDA_VSTD::min(__clz_f, __n); + __storage_type __m = (~__storage_type(0) << __first.__ctz_) & (~__storage_type(0) >> (__clz_f - __dn)); + __r = _CUDA_VSTD::__libcpp_popcount(~*__first.__seg_ & __m); + __n -= __dn; + ++__first.__seg_; + } + // do middle whole words + for (; __n >= __bits_per_word; ++__first.__seg_, __n -= __bits_per_word) + { + __r += _CUDA_VSTD::__libcpp_popcount(~*__first.__seg_); + } + // do last partial word + if (__n > 0) + { + __storage_type __m = ~__storage_type(0) >> (__bits_per_word - __n); + __r += _CUDA_VSTD::__libcpp_popcount(~*__first.__seg_ & __m); + } + return __r; } template -inline _LIBCUDACXX_INLINE_VISIBILITY -typename __bit_iterator<_Cp, _IsConst>::difference_type +inline _LIBCUDACXX_INLINE_VISIBILITY typename __bit_iterator<_Cp, _IsConst>::difference_type count(__bit_iterator<_Cp, _IsConst> __first, __bit_iterator<_Cp, _IsConst> __last, const _Tp& __value_) { - if (static_cast(__value_)) - return __count_bool_true(__first, static_cast(__last - __first)); - return __count_bool_false(__first, static_cast(__last - __first)); + if (static_cast(__value_)) + { + return __count_bool_true(__first, static_cast(__last - __first)); + } + return __count_bool_false(__first, static_cast(__last - __first)); } // fill_n template -void -__fill_n_false(__bit_iterator<_Cp, false> __first, typename _Cp::size_type __n) +void __fill_n_false(__bit_iterator<_Cp, false> __first, typename _Cp::size_type __n) { - typedef __bit_iterator<_Cp, false> _It; - typedef typename _It::__storage_type __storage_type; - const int __bits_per_word = _It::__bits_per_word; - // do first partial word - if (__first.__ctz_ != 0) - { - __storage_type __clz_f = static_cast<__storage_type>(__bits_per_word - __first.__ctz_); - __storage_type __dn = _CUDA_VSTD::min(__clz_f, __n); - __storage_type __m = (~__storage_type(0) << __first.__ctz_) & (~__storage_type(0) >> (__clz_f - __dn)); - *__first.__seg_ &= ~__m; - __n -= __dn; - ++__first.__seg_; - } - // do middle whole words - __storage_type __nw = __n / __bits_per_word; - _CUDA_VSTD::memset(_CUDA_VSTD::__to_raw_pointer(__first.__seg_), 0, __nw * sizeof(__storage_type)); - __n -= __nw * __bits_per_word; - // do last partial word - if (__n > 0) - { - __first.__seg_ += __nw; - __storage_type __m = ~__storage_type(0) >> (__bits_per_word - __n); - *__first.__seg_ &= ~__m; - } + typedef __bit_iterator<_Cp, false> _It; + typedef typename _It::__storage_type __storage_type; + const int __bits_per_word = _It::__bits_per_word; + // do first partial word + if (__first.__ctz_ != 0) + { + __storage_type __clz_f = static_cast<__storage_type>(__bits_per_word - __first.__ctz_); + __storage_type __dn = _CUDA_VSTD::min(__clz_f, __n); + __storage_type __m = (~__storage_type(0) << __first.__ctz_) & (~__storage_type(0) >> (__clz_f - __dn)); + *__first.__seg_ &= ~__m; + __n -= __dn; + ++__first.__seg_; + } + // do middle whole words + __storage_type __nw = __n / __bits_per_word; + _CUDA_VSTD::memset(_CUDA_VSTD::__to_raw_pointer(__first.__seg_), 0, __nw * sizeof(__storage_type)); + __n -= __nw * __bits_per_word; + // do last partial word + if (__n > 0) + { + __first.__seg_ += __nw; + __storage_type __m = ~__storage_type(0) >> (__bits_per_word - __n); + *__first.__seg_ &= ~__m; + } } template -void -__fill_n_true(__bit_iterator<_Cp, false> __first, typename _Cp::size_type __n) +void __fill_n_true(__bit_iterator<_Cp, false> __first, typename _Cp::size_type __n) { - typedef __bit_iterator<_Cp, false> _It; - typedef typename _It::__storage_type __storage_type; - const int __bits_per_word = _It::__bits_per_word; - // do first partial word - if (__first.__ctz_ != 0) - { - __storage_type __clz_f = static_cast<__storage_type>(__bits_per_word - __first.__ctz_); - __storage_type __dn = _CUDA_VSTD::min(__clz_f, __n); - __storage_type __m = (~__storage_type(0) << __first.__ctz_) & (~__storage_type(0) >> (__clz_f - __dn)); - *__first.__seg_ |= __m; - __n -= __dn; - ++__first.__seg_; - } - // do middle whole words - __storage_type __nw = __n / __bits_per_word; - _CUDA_VSTD::memset(_CUDA_VSTD::__to_raw_pointer(__first.__seg_), -1, __nw * sizeof(__storage_type)); - __n -= __nw * __bits_per_word; - // do last partial word - if (__n > 0) - { - __first.__seg_ += __nw; - __storage_type __m = ~__storage_type(0) >> (__bits_per_word - __n); - *__first.__seg_ |= __m; - } + typedef __bit_iterator<_Cp, false> _It; + typedef typename _It::__storage_type __storage_type; + const int __bits_per_word = _It::__bits_per_word; + // do first partial word + if (__first.__ctz_ != 0) + { + __storage_type __clz_f = static_cast<__storage_type>(__bits_per_word - __first.__ctz_); + __storage_type __dn = _CUDA_VSTD::min(__clz_f, __n); + __storage_type __m = (~__storage_type(0) << __first.__ctz_) & (~__storage_type(0) >> (__clz_f - __dn)); + *__first.__seg_ |= __m; + __n -= __dn; + ++__first.__seg_; + } + // do middle whole words + __storage_type __nw = __n / __bits_per_word; + _CUDA_VSTD::memset(_CUDA_VSTD::__to_raw_pointer(__first.__seg_), -1, __nw * sizeof(__storage_type)); + __n -= __nw * __bits_per_word; + // do last partial word + if (__n > 0) + { + __first.__seg_ += __nw; + __storage_type __m = ~__storage_type(0) >> (__bits_per_word - __n); + *__first.__seg_ |= __m; + } } template -inline _LIBCUDACXX_INLINE_VISIBILITY -void +inline _LIBCUDACXX_INLINE_VISIBILITY void fill_n(__bit_iterator<_Cp, false> __first, typename _Cp::size_type __n, bool __value_) { - if (__n > 0) + if (__n > 0) + { + if (__value_) { - if (__value_) - __fill_n_true(__first, __n); - else - __fill_n_false(__first, __n); + __fill_n_true(__first, __n); } + else + { + __fill_n_false(__first, __n); + } + } } // fill template -inline _LIBCUDACXX_INLINE_VISIBILITY -void +inline _LIBCUDACXX_INLINE_VISIBILITY void fill(__bit_iterator<_Cp, false> __first, __bit_iterator<_Cp, false> __last, bool __value_) { - _CUDA_VSTD::fill_n(__first, static_cast(__last - __first), __value_); + _CUDA_VSTD::fill_n(__first, static_cast(__last - __first), __value_); } // copy template -__bit_iterator<_Cp, false> -__copy_aligned(__bit_iterator<_Cp, _IsConst> __first, __bit_iterator<_Cp, _IsConst> __last, - __bit_iterator<_Cp, false> __result) +__bit_iterator<_Cp, false> __copy_aligned( + __bit_iterator<_Cp, _IsConst> __first, __bit_iterator<_Cp, _IsConst> __last, __bit_iterator<_Cp, false> __result) { - typedef __bit_iterator<_Cp, _IsConst> _In; - typedef typename _In::difference_type difference_type; - typedef typename _In::__storage_type __storage_type; - const int __bits_per_word = _In::__bits_per_word; - difference_type __n = __last - __first; + typedef __bit_iterator<_Cp, _IsConst> _In; + typedef typename _In::difference_type difference_type; + typedef typename _In::__storage_type __storage_type; + const int __bits_per_word = _In::__bits_per_word; + difference_type __n = __last - __first; + if (__n > 0) + { + // do first word + if (__first.__ctz_ != 0) + { + unsigned __clz = __bits_per_word - __first.__ctz_; + difference_type __dn = _CUDA_VSTD::min(static_cast(__clz), __n); + __n -= __dn; + __storage_type __m = (~__storage_type(0) << __first.__ctz_) & (~__storage_type(0) >> (__clz - __dn)); + __storage_type __b = *__first.__seg_ & __m; + *__result.__seg_ &= ~__m; + *__result.__seg_ |= __b; + __result.__seg_ += (__dn + __result.__ctz_) / __bits_per_word; + __result.__ctz_ = static_cast((__dn + __result.__ctz_) % __bits_per_word); + ++__first.__seg_; + // __first.__ctz_ = 0; + } + // __first.__ctz_ == 0; + // do middle words + __storage_type __nw = __n / __bits_per_word; + _CUDA_VSTD::memmove(_CUDA_VSTD::__to_raw_pointer(__result.__seg_), + _CUDA_VSTD::__to_raw_pointer(__first.__seg_), + __nw * sizeof(__storage_type)); + __n -= __nw * __bits_per_word; + __result.__seg_ += __nw; + // do last word if (__n > 0) { - // do first word - if (__first.__ctz_ != 0) - { - unsigned __clz = __bits_per_word - __first.__ctz_; - difference_type __dn = _CUDA_VSTD::min(static_cast(__clz), __n); - __n -= __dn; - __storage_type __m = (~__storage_type(0) << __first.__ctz_) & (~__storage_type(0) >> (__clz - __dn)); - __storage_type __b = *__first.__seg_ & __m; - *__result.__seg_ &= ~__m; - *__result.__seg_ |= __b; - __result.__seg_ += (__dn + __result.__ctz_) / __bits_per_word; - __result.__ctz_ = static_cast((__dn + __result.__ctz_) % __bits_per_word); - ++__first.__seg_; - // __first.__ctz_ = 0; - } - // __first.__ctz_ == 0; - // do middle words - __storage_type __nw = __n / __bits_per_word; - _CUDA_VSTD::memmove(_CUDA_VSTD::__to_raw_pointer(__result.__seg_), - _CUDA_VSTD::__to_raw_pointer(__first.__seg_), - __nw * sizeof(__storage_type)); - __n -= __nw * __bits_per_word; - __result.__seg_ += __nw; - // do last word - if (__n > 0) - { - __first.__seg_ += __nw; - __storage_type __m = ~__storage_type(0) >> (__bits_per_word - __n); - __storage_type __b = *__first.__seg_ & __m; - *__result.__seg_ &= ~__m; - *__result.__seg_ |= __b; - __result.__ctz_ = static_cast(__n); - } + __first.__seg_ += __nw; + __storage_type __m = ~__storage_type(0) >> (__bits_per_word - __n); + __storage_type __b = *__first.__seg_ & __m; + *__result.__seg_ &= ~__m; + *__result.__seg_ |= __b; + __result.__ctz_ = static_cast(__n); } - return __result; + } + return __result; } template -__bit_iterator<_Cp, false> -__copy_unaligned(__bit_iterator<_Cp, _IsConst> __first, __bit_iterator<_Cp, _IsConst> __last, - __bit_iterator<_Cp, false> __result) +__bit_iterator<_Cp, false> __copy_unaligned( + __bit_iterator<_Cp, _IsConst> __first, __bit_iterator<_Cp, _IsConst> __last, __bit_iterator<_Cp, false> __result) { - typedef __bit_iterator<_Cp, _IsConst> _In; - typedef typename _In::difference_type difference_type; - typedef typename _In::__storage_type __storage_type; - static const int __bits_per_word = _In::__bits_per_word; - difference_type __n = __last - __first; + typedef __bit_iterator<_Cp, _IsConst> _In; + typedef typename _In::difference_type difference_type; + typedef typename _In::__storage_type __storage_type; + static const int __bits_per_word = _In::__bits_per_word; + difference_type __n = __last - __first; + if (__n > 0) + { + // do first word + if (__first.__ctz_ != 0) + { + unsigned __clz_f = __bits_per_word - __first.__ctz_; + difference_type __dn = _CUDA_VSTD::min(static_cast(__clz_f), __n); + __n -= __dn; + __storage_type __m = (~__storage_type(0) << __first.__ctz_) & (~__storage_type(0) >> (__clz_f - __dn)); + __storage_type __b = *__first.__seg_ & __m; + unsigned __clz_r = __bits_per_word - __result.__ctz_; + __storage_type __ddn = _CUDA_VSTD::min<__storage_type>(__dn, __clz_r); + __m = (~__storage_type(0) << __result.__ctz_) & (~__storage_type(0) >> (__clz_r - __ddn)); + *__result.__seg_ &= ~__m; + if (__result.__ctz_ > __first.__ctz_) + { + *__result.__seg_ |= __b << (__result.__ctz_ - __first.__ctz_); + } + else + { + *__result.__seg_ |= __b >> (__first.__ctz_ - __result.__ctz_); + } + __result.__seg_ += (__ddn + __result.__ctz_) / __bits_per_word; + __result.__ctz_ = static_cast((__ddn + __result.__ctz_) % __bits_per_word); + __dn -= __ddn; + if (__dn > 0) + { + __m = ~__storage_type(0) >> (__bits_per_word - __dn); + *__result.__seg_ &= ~__m; + *__result.__seg_ |= __b >> (__first.__ctz_ + __ddn); + __result.__ctz_ = static_cast(__dn); + } + ++__first.__seg_; + // __first.__ctz_ = 0; + } + // __first.__ctz_ == 0; + // do middle words + unsigned __clz_r = __bits_per_word - __result.__ctz_; + __storage_type __m = ~__storage_type(0) << __result.__ctz_; + for (; __n >= __bits_per_word; __n -= __bits_per_word, ++__first.__seg_) + { + __storage_type __b = *__first.__seg_; + *__result.__seg_ &= ~__m; + *__result.__seg_ |= __b << __result.__ctz_; + ++__result.__seg_; + *__result.__seg_ &= __m; + *__result.__seg_ |= __b >> __clz_r; + } + // do last word if (__n > 0) { - // do first word - if (__first.__ctz_ != 0) - { - unsigned __clz_f = __bits_per_word - __first.__ctz_; - difference_type __dn = _CUDA_VSTD::min(static_cast(__clz_f), __n); - __n -= __dn; - __storage_type __m = (~__storage_type(0) << __first.__ctz_) & (~__storage_type(0) >> (__clz_f - __dn)); - __storage_type __b = *__first.__seg_ & __m; - unsigned __clz_r = __bits_per_word - __result.__ctz_; - __storage_type __ddn = _CUDA_VSTD::min<__storage_type>(__dn, __clz_r); - __m = (~__storage_type(0) << __result.__ctz_) & (~__storage_type(0) >> (__clz_r - __ddn)); - *__result.__seg_ &= ~__m; - if (__result.__ctz_ > __first.__ctz_) - *__result.__seg_ |= __b << (__result.__ctz_ - __first.__ctz_); - else - *__result.__seg_ |= __b >> (__first.__ctz_ - __result.__ctz_); - __result.__seg_ += (__ddn + __result.__ctz_) / __bits_per_word; - __result.__ctz_ = static_cast((__ddn + __result.__ctz_) % __bits_per_word); - __dn -= __ddn; - if (__dn > 0) - { - __m = ~__storage_type(0) >> (__bits_per_word - __dn); - *__result.__seg_ &= ~__m; - *__result.__seg_ |= __b >> (__first.__ctz_ + __ddn); - __result.__ctz_ = static_cast(__dn); - } - ++__first.__seg_; - // __first.__ctz_ = 0; - } - // __first.__ctz_ == 0; - // do middle words - unsigned __clz_r = __bits_per_word - __result.__ctz_; - __storage_type __m = ~__storage_type(0) << __result.__ctz_; - for (; __n >= __bits_per_word; __n -= __bits_per_word, ++__first.__seg_) - { - __storage_type __b = *__first.__seg_; - *__result.__seg_ &= ~__m; - *__result.__seg_ |= __b << __result.__ctz_; - ++__result.__seg_; - *__result.__seg_ &= __m; - *__result.__seg_ |= __b >> __clz_r; - } - // do last word - if (__n > 0) - { - __m = ~__storage_type(0) >> (__bits_per_word - __n); - __storage_type __b = *__first.__seg_ & __m; - __storage_type __dn = _CUDA_VSTD::min(__n, static_cast(__clz_r)); - __m = (~__storage_type(0) << __result.__ctz_) & (~__storage_type(0) >> (__clz_r - __dn)); - *__result.__seg_ &= ~__m; - *__result.__seg_ |= __b << __result.__ctz_; - __result.__seg_ += (__dn + __result.__ctz_) / __bits_per_word; - __result.__ctz_ = static_cast((__dn + __result.__ctz_) % __bits_per_word); - __n -= __dn; - if (__n > 0) - { - __m = ~__storage_type(0) >> (__bits_per_word - __n); - *__result.__seg_ &= ~__m; - *__result.__seg_ |= __b >> __dn; - __result.__ctz_ = static_cast(__n); - } - } + __m = ~__storage_type(0) >> (__bits_per_word - __n); + __storage_type __b = *__first.__seg_ & __m; + __storage_type __dn = _CUDA_VSTD::min(__n, static_cast(__clz_r)); + __m = (~__storage_type(0) << __result.__ctz_) & (~__storage_type(0) >> (__clz_r - __dn)); + *__result.__seg_ &= ~__m; + *__result.__seg_ |= __b << __result.__ctz_; + __result.__seg_ += (__dn + __result.__ctz_) / __bits_per_word; + __result.__ctz_ = static_cast((__dn + __result.__ctz_) % __bits_per_word); + __n -= __dn; + if (__n > 0) + { + __m = ~__storage_type(0) >> (__bits_per_word - __n); + *__result.__seg_ &= ~__m; + *__result.__seg_ |= __b >> __dn; + __result.__ctz_ = static_cast(__n); + } } - return __result; + } + return __result; } template -inline _LIBCUDACXX_INLINE_VISIBILITY -__bit_iterator<_Cp, false> +inline _LIBCUDACXX_INLINE_VISIBILITY __bit_iterator<_Cp, false> copy(__bit_iterator<_Cp, _IsConst> __first, __bit_iterator<_Cp, _IsConst> __last, __bit_iterator<_Cp, false> __result) { - if (__first.__ctz_ == __result.__ctz_) - return __copy_aligned(__first, __last, __result); - return __copy_unaligned(__first, __last, __result); + if (__first.__ctz_ == __result.__ctz_) + { + return __copy_aligned(__first, __last, __result); + } + return __copy_unaligned(__first, __last, __result); } // copy_backward template -__bit_iterator<_Cp, false> -__copy_backward_aligned(__bit_iterator<_Cp, _IsConst> __first, __bit_iterator<_Cp, _IsConst> __last, - __bit_iterator<_Cp, false> __result) +__bit_iterator<_Cp, false> __copy_backward_aligned( + __bit_iterator<_Cp, _IsConst> __first, __bit_iterator<_Cp, _IsConst> __last, __bit_iterator<_Cp, false> __result) { - typedef __bit_iterator<_Cp, _IsConst> _In; - typedef typename _In::difference_type difference_type; - typedef typename _In::__storage_type __storage_type; - const int __bits_per_word = _In::__bits_per_word; - difference_type __n = __last - __first; + typedef __bit_iterator<_Cp, _IsConst> _In; + typedef typename _In::difference_type difference_type; + typedef typename _In::__storage_type __storage_type; + const int __bits_per_word = _In::__bits_per_word; + difference_type __n = __last - __first; + if (__n > 0) + { + // do first word + if (__last.__ctz_ != 0) + { + difference_type __dn = _CUDA_VSTD::min(static_cast(__last.__ctz_), __n); + __n -= __dn; + unsigned __clz = __bits_per_word - __last.__ctz_; + __storage_type __m = (~__storage_type(0) << (__last.__ctz_ - __dn)) & (~__storage_type(0) >> __clz); + __storage_type __b = *__last.__seg_ & __m; + *__result.__seg_ &= ~__m; + *__result.__seg_ |= __b; + __result.__ctz_ = static_cast(((-__dn & (__bits_per_word - 1)) + __result.__ctz_) % __bits_per_word); + // __last.__ctz_ = 0 + } + // __last.__ctz_ == 0 || __n == 0 + // __result.__ctz_ == 0 || __n == 0 + // do middle words + __storage_type __nw = __n / __bits_per_word; + __result.__seg_ -= __nw; + __last.__seg_ -= __nw; + _CUDA_VSTD::memmove(_CUDA_VSTD::__to_raw_pointer(__result.__seg_), + _CUDA_VSTD::__to_raw_pointer(__last.__seg_), + __nw * sizeof(__storage_type)); + __n -= __nw * __bits_per_word; + // do last word if (__n > 0) { - // do first word - if (__last.__ctz_ != 0) - { - difference_type __dn = _CUDA_VSTD::min(static_cast(__last.__ctz_), __n); - __n -= __dn; - unsigned __clz = __bits_per_word - __last.__ctz_; - __storage_type __m = (~__storage_type(0) << (__last.__ctz_ - __dn)) & (~__storage_type(0) >> __clz); - __storage_type __b = *__last.__seg_ & __m; - *__result.__seg_ &= ~__m; - *__result.__seg_ |= __b; - __result.__ctz_ = static_cast(((-__dn & (__bits_per_word - 1)) + - __result.__ctz_) % __bits_per_word); - // __last.__ctz_ = 0 - } - // __last.__ctz_ == 0 || __n == 0 - // __result.__ctz_ == 0 || __n == 0 - // do middle words - __storage_type __nw = __n / __bits_per_word; - __result.__seg_ -= __nw; - __last.__seg_ -= __nw; - _CUDA_VSTD::memmove(_CUDA_VSTD::__to_raw_pointer(__result.__seg_), - _CUDA_VSTD::__to_raw_pointer(__last.__seg_), - __nw * sizeof(__storage_type)); - __n -= __nw * __bits_per_word; - // do last word - if (__n > 0) - { - __storage_type __m = ~__storage_type(0) << (__bits_per_word - __n); - __storage_type __b = *--__last.__seg_ & __m; - *--__result.__seg_ &= ~__m; - *__result.__seg_ |= __b; - __result.__ctz_ = static_cast(-__n & (__bits_per_word - 1)); - } + __storage_type __m = ~__storage_type(0) << (__bits_per_word - __n); + __storage_type __b = *--__last.__seg_ & __m; + *--__result.__seg_ &= ~__m; + *__result.__seg_ |= __b; + __result.__ctz_ = static_cast(-__n & (__bits_per_word - 1)); } - return __result; + } + return __result; } template -__bit_iterator<_Cp, false> -__copy_backward_unaligned(__bit_iterator<_Cp, _IsConst> __first, __bit_iterator<_Cp, _IsConst> __last, - __bit_iterator<_Cp, false> __result) +__bit_iterator<_Cp, false> __copy_backward_unaligned( + __bit_iterator<_Cp, _IsConst> __first, __bit_iterator<_Cp, _IsConst> __last, __bit_iterator<_Cp, false> __result) { - typedef __bit_iterator<_Cp, _IsConst> _In; - typedef typename _In::difference_type difference_type; - typedef typename _In::__storage_type __storage_type; - const int __bits_per_word = _In::__bits_per_word; - difference_type __n = __last - __first; - if (__n > 0) + typedef __bit_iterator<_Cp, _IsConst> _In; + typedef typename _In::difference_type difference_type; + typedef typename _In::__storage_type __storage_type; + const int __bits_per_word = _In::__bits_per_word; + difference_type __n = __last - __first; + if (__n > 0) + { + // do first word + if (__last.__ctz_ != 0) { - // do first word - if (__last.__ctz_ != 0) - { - difference_type __dn = _CUDA_VSTD::min(static_cast(__last.__ctz_), __n); - __n -= __dn; - unsigned __clz_l = __bits_per_word - __last.__ctz_; - __storage_type __m = (~__storage_type(0) << (__last.__ctz_ - __dn)) & (~__storage_type(0) >> __clz_l); - __storage_type __b = *__last.__seg_ & __m; - unsigned __clz_r = __bits_per_word - __result.__ctz_; - __storage_type __ddn = _CUDA_VSTD::min(__dn, static_cast(__result.__ctz_)); - if (__ddn > 0) - { - __m = (~__storage_type(0) << (__result.__ctz_ - __ddn)) & (~__storage_type(0) >> __clz_r); - *__result.__seg_ &= ~__m; - if (__result.__ctz_ > __last.__ctz_) - *__result.__seg_ |= __b << (__result.__ctz_ - __last.__ctz_); - else - *__result.__seg_ |= __b >> (__last.__ctz_ - __result.__ctz_); - __result.__ctz_ = static_cast(((-__ddn & (__bits_per_word - 1)) + - __result.__ctz_) % __bits_per_word); - __dn -= __ddn; - } - if (__dn > 0) - { - // __result.__ctz_ == 0 - --__result.__seg_; - __result.__ctz_ = static_cast(-__dn & (__bits_per_word - 1)); - __m = ~__storage_type(0) << __result.__ctz_; - *__result.__seg_ &= ~__m; - __last.__ctz_ -= __dn + __ddn; - *__result.__seg_ |= __b << (__result.__ctz_ - __last.__ctz_); - } - // __last.__ctz_ = 0 - } - // __last.__ctz_ == 0 || __n == 0 - // __result.__ctz_ != 0 || __n == 0 - // do middle words - unsigned __clz_r = __bits_per_word - __result.__ctz_; - __storage_type __m = ~__storage_type(0) >> __clz_r; - for (; __n >= __bits_per_word; __n -= __bits_per_word) + difference_type __dn = _CUDA_VSTD::min(static_cast(__last.__ctz_), __n); + __n -= __dn; + unsigned __clz_l = __bits_per_word - __last.__ctz_; + __storage_type __m = (~__storage_type(0) << (__last.__ctz_ - __dn)) & (~__storage_type(0) >> __clz_l); + __storage_type __b = *__last.__seg_ & __m; + unsigned __clz_r = __bits_per_word - __result.__ctz_; + __storage_type __ddn = _CUDA_VSTD::min(__dn, static_cast(__result.__ctz_)); + if (__ddn > 0) + { + __m = (~__storage_type(0) << (__result.__ctz_ - __ddn)) & (~__storage_type(0) >> __clz_r); + *__result.__seg_ &= ~__m; + if (__result.__ctz_ > __last.__ctz_) { - __storage_type __b = *--__last.__seg_; - *__result.__seg_ &= ~__m; - *__result.__seg_ |= __b >> __clz_r; - *--__result.__seg_ &= __m; - *__result.__seg_ |= __b << __result.__ctz_; + *__result.__seg_ |= __b << (__result.__ctz_ - __last.__ctz_); } - // do last word - if (__n > 0) + else { - __m = ~__storage_type(0) << (__bits_per_word - __n); - __storage_type __b = *--__last.__seg_ & __m; - __clz_r = __bits_per_word - __result.__ctz_; - __storage_type __dn = _CUDA_VSTD::min(__n, static_cast(__result.__ctz_)); - __m = (~__storage_type(0) << (__result.__ctz_ - __dn)) & (~__storage_type(0) >> __clz_r); - *__result.__seg_ &= ~__m; - *__result.__seg_ |= __b >> (__bits_per_word - __result.__ctz_); - __result.__ctz_ = static_cast(((-__dn & (__bits_per_word - 1)) + - __result.__ctz_) % __bits_per_word); - __n -= __dn; - if (__n > 0) - { - // __result.__ctz_ == 0 - --__result.__seg_; - __result.__ctz_ = static_cast(-__n & (__bits_per_word - 1)); - __m = ~__storage_type(0) << __result.__ctz_; - *__result.__seg_ &= ~__m; - *__result.__seg_ |= __b << (__result.__ctz_ - (__bits_per_word - __n - __dn)); - } + *__result.__seg_ |= __b >> (__last.__ctz_ - __result.__ctz_); } + __result.__ctz_ = static_cast(((-__ddn & (__bits_per_word - 1)) + __result.__ctz_) % __bits_per_word); + __dn -= __ddn; + } + if (__dn > 0) + { + // __result.__ctz_ == 0 + --__result.__seg_; + __result.__ctz_ = static_cast(-__dn & (__bits_per_word - 1)); + __m = ~__storage_type(0) << __result.__ctz_; + *__result.__seg_ &= ~__m; + __last.__ctz_ -= __dn + __ddn; + *__result.__seg_ |= __b << (__result.__ctz_ - __last.__ctz_); + } + // __last.__ctz_ = 0 + } + // __last.__ctz_ == 0 || __n == 0 + // __result.__ctz_ != 0 || __n == 0 + // do middle words + unsigned __clz_r = __bits_per_word - __result.__ctz_; + __storage_type __m = ~__storage_type(0) >> __clz_r; + for (; __n >= __bits_per_word; __n -= __bits_per_word) + { + __storage_type __b = *--__last.__seg_; + *__result.__seg_ &= ~__m; + *__result.__seg_ |= __b >> __clz_r; + *--__result.__seg_ &= __m; + *__result.__seg_ |= __b << __result.__ctz_; } - return __result; + // do last word + if (__n > 0) + { + __m = ~__storage_type(0) << (__bits_per_word - __n); + __storage_type __b = *--__last.__seg_ & __m; + __clz_r = __bits_per_word - __result.__ctz_; + __storage_type __dn = _CUDA_VSTD::min(__n, static_cast(__result.__ctz_)); + __m = (~__storage_type(0) << (__result.__ctz_ - __dn)) & (~__storage_type(0) >> __clz_r); + *__result.__seg_ &= ~__m; + *__result.__seg_ |= __b >> (__bits_per_word - __result.__ctz_); + __result.__ctz_ = static_cast(((-__dn & (__bits_per_word - 1)) + __result.__ctz_) % __bits_per_word); + __n -= __dn; + if (__n > 0) + { + // __result.__ctz_ == 0 + --__result.__seg_; + __result.__ctz_ = static_cast(-__n & (__bits_per_word - 1)); + __m = ~__storage_type(0) << __result.__ctz_; + *__result.__seg_ &= ~__m; + *__result.__seg_ |= __b << (__result.__ctz_ - (__bits_per_word - __n - __dn)); + } + } + } + return __result; } template -inline _LIBCUDACXX_INLINE_VISIBILITY -__bit_iterator<_Cp, false> -copy_backward(__bit_iterator<_Cp, _IsConst> __first, __bit_iterator<_Cp, _IsConst> __last, __bit_iterator<_Cp, false> __result) +inline _LIBCUDACXX_INLINE_VISIBILITY __bit_iterator<_Cp, false> copy_backward( + __bit_iterator<_Cp, _IsConst> __first, __bit_iterator<_Cp, _IsConst> __last, __bit_iterator<_Cp, false> __result) { - if (__last.__ctz_ == __result.__ctz_) - return __copy_backward_aligned(__first, __last, __result); - return __copy_backward_unaligned(__first, __last, __result); + if (__last.__ctz_ == __result.__ctz_) + { + return __copy_backward_aligned(__first, __last, __result); + } + return __copy_backward_unaligned(__first, __last, __result); } // move template -inline _LIBCUDACXX_INLINE_VISIBILITY -__bit_iterator<_Cp, false> +inline _LIBCUDACXX_INLINE_VISIBILITY __bit_iterator<_Cp, false> move(__bit_iterator<_Cp, _IsConst> __first, __bit_iterator<_Cp, _IsConst> __last, __bit_iterator<_Cp, false> __result) { - return _CUDA_VSTD::copy(__first, __last, __result); + return _CUDA_VSTD::copy(__first, __last, __result); } // move_backward template -inline _LIBCUDACXX_INLINE_VISIBILITY -__bit_iterator<_Cp, false> -move_backward(__bit_iterator<_Cp, _IsConst> __first, __bit_iterator<_Cp, _IsConst> __last, __bit_iterator<_Cp, false> __result) +inline _LIBCUDACXX_INLINE_VISIBILITY __bit_iterator<_Cp, false> move_backward( + __bit_iterator<_Cp, _IsConst> __first, __bit_iterator<_Cp, _IsConst> __last, __bit_iterator<_Cp, false> __result) { - return _CUDA_VSTD::copy_backward(__first, __last, __result); + return _CUDA_VSTD::copy_backward(__first, __last, __result); } // swap_ranges template -__bit_iterator<__C2, false> -__swap_ranges_aligned(__bit_iterator<__C1, false> __first, __bit_iterator<__C1, false> __last, - __bit_iterator<__C2, false> __result) +__bit_iterator<__C2, false> __swap_ranges_aligned( + __bit_iterator<__C1, false> __first, __bit_iterator<__C1, false> __last, __bit_iterator<__C2, false> __result) { - typedef __bit_iterator<__C1, false> _I1; - typedef typename _I1::difference_type difference_type; - typedef typename _I1::__storage_type __storage_type; - const int __bits_per_word = _I1::__bits_per_word; - difference_type __n = __last - __first; + typedef __bit_iterator<__C1, false> _I1; + typedef typename _I1::difference_type difference_type; + typedef typename _I1::__storage_type __storage_type; + const int __bits_per_word = _I1::__bits_per_word; + difference_type __n = __last - __first; + if (__n > 0) + { + // do first word + if (__first.__ctz_ != 0) + { + unsigned __clz = __bits_per_word - __first.__ctz_; + difference_type __dn = _CUDA_VSTD::min(static_cast(__clz), __n); + __n -= __dn; + __storage_type __m = (~__storage_type(0) << __first.__ctz_) & (~__storage_type(0) >> (__clz - __dn)); + __storage_type __b1 = *__first.__seg_ & __m; + *__first.__seg_ &= ~__m; + __storage_type __b2 = *__result.__seg_ & __m; + *__result.__seg_ &= ~__m; + *__result.__seg_ |= __b1; + *__first.__seg_ |= __b2; + __result.__seg_ += (__dn + __result.__ctz_) / __bits_per_word; + __result.__ctz_ = static_cast((__dn + __result.__ctz_) % __bits_per_word); + ++__first.__seg_; + // __first.__ctz_ = 0; + } + // __first.__ctz_ == 0; + // do middle words + for (; __n >= __bits_per_word; __n -= __bits_per_word, ++__first.__seg_, ++__result.__seg_) + { + swap(*__first.__seg_, *__result.__seg_); + } + // do last word if (__n > 0) { - // do first word - if (__first.__ctz_ != 0) - { - unsigned __clz = __bits_per_word - __first.__ctz_; - difference_type __dn = _CUDA_VSTD::min(static_cast(__clz), __n); - __n -= __dn; - __storage_type __m = (~__storage_type(0) << __first.__ctz_) & (~__storage_type(0) >> (__clz - __dn)); - __storage_type __b1 = *__first.__seg_ & __m; - *__first.__seg_ &= ~__m; - __storage_type __b2 = *__result.__seg_ & __m; - *__result.__seg_ &= ~__m; - *__result.__seg_ |= __b1; - *__first.__seg_ |= __b2; - __result.__seg_ += (__dn + __result.__ctz_) / __bits_per_word; - __result.__ctz_ = static_cast((__dn + __result.__ctz_) % __bits_per_word); - ++__first.__seg_; - // __first.__ctz_ = 0; - } - // __first.__ctz_ == 0; - // do middle words - for (; __n >= __bits_per_word; __n -= __bits_per_word, ++__first.__seg_, ++__result.__seg_) - swap(*__first.__seg_, *__result.__seg_); - // do last word - if (__n > 0) - { - __storage_type __m = ~__storage_type(0) >> (__bits_per_word - __n); - __storage_type __b1 = *__first.__seg_ & __m; - *__first.__seg_ &= ~__m; - __storage_type __b2 = *__result.__seg_ & __m; - *__result.__seg_ &= ~__m; - *__result.__seg_ |= __b1; - *__first.__seg_ |= __b2; - __result.__ctz_ = static_cast(__n); - } + __storage_type __m = ~__storage_type(0) >> (__bits_per_word - __n); + __storage_type __b1 = *__first.__seg_ & __m; + *__first.__seg_ &= ~__m; + __storage_type __b2 = *__result.__seg_ & __m; + *__result.__seg_ &= ~__m; + *__result.__seg_ |= __b1; + *__first.__seg_ |= __b2; + __result.__ctz_ = static_cast(__n); } - return __result; + } + return __result; } template -__bit_iterator<__C2, false> -__swap_ranges_unaligned(__bit_iterator<__C1, false> __first, __bit_iterator<__C1, false> __last, - __bit_iterator<__C2, false> __result) +__bit_iterator<__C2, false> __swap_ranges_unaligned( + __bit_iterator<__C1, false> __first, __bit_iterator<__C1, false> __last, __bit_iterator<__C2, false> __result) { - typedef __bit_iterator<__C1, false> _I1; - typedef typename _I1::difference_type difference_type; - typedef typename _I1::__storage_type __storage_type; - const int __bits_per_word = _I1::__bits_per_word; - difference_type __n = __last - __first; + typedef __bit_iterator<__C1, false> _I1; + typedef typename _I1::difference_type difference_type; + typedef typename _I1::__storage_type __storage_type; + const int __bits_per_word = _I1::__bits_per_word; + difference_type __n = __last - __first; + if (__n > 0) + { + // do first word + if (__first.__ctz_ != 0) + { + unsigned __clz_f = __bits_per_word - __first.__ctz_; + difference_type __dn = _CUDA_VSTD::min(static_cast(__clz_f), __n); + __n -= __dn; + __storage_type __m = (~__storage_type(0) << __first.__ctz_) & (~__storage_type(0) >> (__clz_f - __dn)); + __storage_type __b1 = *__first.__seg_ & __m; + *__first.__seg_ &= ~__m; + unsigned __clz_r = __bits_per_word - __result.__ctz_; + __storage_type __ddn = _CUDA_VSTD::min<__storage_type>(__dn, __clz_r); + __m = (~__storage_type(0) << __result.__ctz_) & (~__storage_type(0) >> (__clz_r - __ddn)); + __storage_type __b2 = *__result.__seg_ & __m; + *__result.__seg_ &= ~__m; + if (__result.__ctz_ > __first.__ctz_) + { + unsigned __s = __result.__ctz_ - __first.__ctz_; + *__result.__seg_ |= __b1 << __s; + *__first.__seg_ |= __b2 >> __s; + } + else + { + unsigned __s = __first.__ctz_ - __result.__ctz_; + *__result.__seg_ |= __b1 >> __s; + *__first.__seg_ |= __b2 << __s; + } + __result.__seg_ += (__ddn + __result.__ctz_) / __bits_per_word; + __result.__ctz_ = static_cast((__ddn + __result.__ctz_) % __bits_per_word); + __dn -= __ddn; + if (__dn > 0) + { + __m = ~__storage_type(0) >> (__bits_per_word - __dn); + __b2 = *__result.__seg_ & __m; + *__result.__seg_ &= ~__m; + unsigned __s = __first.__ctz_ + __ddn; + *__result.__seg_ |= __b1 >> __s; + *__first.__seg_ |= __b2 << __s; + __result.__ctz_ = static_cast(__dn); + } + ++__first.__seg_; + // __first.__ctz_ = 0; + } + // __first.__ctz_ == 0; + // do middle words + __storage_type __m = ~__storage_type(0) << __result.__ctz_; + unsigned __clz_r = __bits_per_word - __result.__ctz_; + for (; __n >= __bits_per_word; __n -= __bits_per_word, ++__first.__seg_) + { + __storage_type __b1 = *__first.__seg_; + __storage_type __b2 = *__result.__seg_ & __m; + *__result.__seg_ &= ~__m; + *__result.__seg_ |= __b1 << __result.__ctz_; + *__first.__seg_ = __b2 >> __result.__ctz_; + ++__result.__seg_; + __b2 = *__result.__seg_ & ~__m; + *__result.__seg_ &= __m; + *__result.__seg_ |= __b1 >> __clz_r; + *__first.__seg_ |= __b2 << __clz_r; + } + // do last word if (__n > 0) { - // do first word - if (__first.__ctz_ != 0) - { - unsigned __clz_f = __bits_per_word - __first.__ctz_; - difference_type __dn = _CUDA_VSTD::min(static_cast(__clz_f), __n); - __n -= __dn; - __storage_type __m = (~__storage_type(0) << __first.__ctz_) & (~__storage_type(0) >> (__clz_f - __dn)); - __storage_type __b1 = *__first.__seg_ & __m; - *__first.__seg_ &= ~__m; - unsigned __clz_r = __bits_per_word - __result.__ctz_; - __storage_type __ddn = _CUDA_VSTD::min<__storage_type>(__dn, __clz_r); - __m = (~__storage_type(0) << __result.__ctz_) & (~__storage_type(0) >> (__clz_r - __ddn)); - __storage_type __b2 = *__result.__seg_ & __m; - *__result.__seg_ &= ~__m; - if (__result.__ctz_ > __first.__ctz_) - { - unsigned __s = __result.__ctz_ - __first.__ctz_; - *__result.__seg_ |= __b1 << __s; - *__first.__seg_ |= __b2 >> __s; - } - else - { - unsigned __s = __first.__ctz_ - __result.__ctz_; - *__result.__seg_ |= __b1 >> __s; - *__first.__seg_ |= __b2 << __s; - } - __result.__seg_ += (__ddn + __result.__ctz_) / __bits_per_word; - __result.__ctz_ = static_cast((__ddn + __result.__ctz_) % __bits_per_word); - __dn -= __ddn; - if (__dn > 0) - { - __m = ~__storage_type(0) >> (__bits_per_word - __dn); - __b2 = *__result.__seg_ & __m; - *__result.__seg_ &= ~__m; - unsigned __s = __first.__ctz_ + __ddn; - *__result.__seg_ |= __b1 >> __s; - *__first.__seg_ |= __b2 << __s; - __result.__ctz_ = static_cast(__dn); - } - ++__first.__seg_; - // __first.__ctz_ = 0; - } - // __first.__ctz_ == 0; - // do middle words - __storage_type __m = ~__storage_type(0) << __result.__ctz_; - unsigned __clz_r = __bits_per_word - __result.__ctz_; - for (; __n >= __bits_per_word; __n -= __bits_per_word, ++__first.__seg_) - { - __storage_type __b1 = *__first.__seg_; - __storage_type __b2 = *__result.__seg_ & __m; - *__result.__seg_ &= ~__m; - *__result.__seg_ |= __b1 << __result.__ctz_; - *__first.__seg_ = __b2 >> __result.__ctz_; - ++__result.__seg_; - __b2 = *__result.__seg_ & ~__m; - *__result.__seg_ &= __m; - *__result.__seg_ |= __b1 >> __clz_r; - *__first.__seg_ |= __b2 << __clz_r; - } - // do last word - if (__n > 0) - { - __m = ~__storage_type(0) >> (__bits_per_word - __n); - __storage_type __b1 = *__first.__seg_ & __m; - *__first.__seg_ &= ~__m; - __storage_type __dn = _CUDA_VSTD::min<__storage_type>(__n, __clz_r); - __m = (~__storage_type(0) << __result.__ctz_) & (~__storage_type(0) >> (__clz_r - __dn)); - __storage_type __b2 = *__result.__seg_ & __m; - *__result.__seg_ &= ~__m; - *__result.__seg_ |= __b1 << __result.__ctz_; - *__first.__seg_ |= __b2 >> __result.__ctz_; - __result.__seg_ += (__dn + __result.__ctz_) / __bits_per_word; - __result.__ctz_ = static_cast((__dn + __result.__ctz_) % __bits_per_word); - __n -= __dn; - if (__n > 0) - { - __m = ~__storage_type(0) >> (__bits_per_word - __n); - __b2 = *__result.__seg_ & __m; - *__result.__seg_ &= ~__m; - *__result.__seg_ |= __b1 >> __dn; - *__first.__seg_ |= __b2 << __dn; - __result.__ctz_ = static_cast(__n); - } - } + __m = ~__storage_type(0) >> (__bits_per_word - __n); + __storage_type __b1 = *__first.__seg_ & __m; + *__first.__seg_ &= ~__m; + __storage_type __dn = _CUDA_VSTD::min<__storage_type>(__n, __clz_r); + __m = (~__storage_type(0) << __result.__ctz_) & (~__storage_type(0) >> (__clz_r - __dn)); + __storage_type __b2 = *__result.__seg_ & __m; + *__result.__seg_ &= ~__m; + *__result.__seg_ |= __b1 << __result.__ctz_; + *__first.__seg_ |= __b2 >> __result.__ctz_; + __result.__seg_ += (__dn + __result.__ctz_) / __bits_per_word; + __result.__ctz_ = static_cast((__dn + __result.__ctz_) % __bits_per_word); + __n -= __dn; + if (__n > 0) + { + __m = ~__storage_type(0) >> (__bits_per_word - __n); + __b2 = *__result.__seg_ & __m; + *__result.__seg_ &= ~__m; + *__result.__seg_ |= __b1 >> __dn; + *__first.__seg_ |= __b2 << __dn; + __result.__ctz_ = static_cast(__n); + } } - return __result; + } + return __result; } template -inline _LIBCUDACXX_INLINE_VISIBILITY -__bit_iterator<__C2, false> -swap_ranges(__bit_iterator<__C1, false> __first1, __bit_iterator<__C1, false> __last1, - __bit_iterator<__C2, false> __first2) +inline _LIBCUDACXX_INLINE_VISIBILITY __bit_iterator<__C2, false> swap_ranges( + __bit_iterator<__C1, false> __first1, __bit_iterator<__C1, false> __last1, __bit_iterator<__C2, false> __first2) { - if (__first1.__ctz_ == __first2.__ctz_) - return __swap_ranges_aligned(__first1, __last1, __first2); - return __swap_ranges_unaligned(__first1, __last1, __first2); + if (__first1.__ctz_ == __first2.__ctz_) + { + return __swap_ranges_aligned(__first1, __last1, __first2); + } + return __swap_ranges_unaligned(__first1, __last1, __first2); } // rotate @@ -881,413 +917,469 @@ swap_ranges(__bit_iterator<__C1, false> __first1, __bit_iterator<__C1, false> __ template struct __bit_array { - typedef typename _Cp::difference_type difference_type; - typedef typename _Cp::__storage_type __storage_type; - typedef typename _Cp::__storage_pointer __storage_pointer; - typedef typename _Cp::iterator iterator; - static const unsigned __bits_per_word = _Cp::__bits_per_word; - static const unsigned _Np = 4; - - difference_type __size_; - __storage_type __word_[_Np]; - - _LIBCUDACXX_INLINE_VISIBILITY static difference_type capacity() - {return static_cast(_Np * __bits_per_word);} - _LIBCUDACXX_INLINE_VISIBILITY explicit __bit_array(difference_type __s) : __size_(__s) {} - _LIBCUDACXX_INLINE_VISIBILITY iterator begin() - { - return iterator(pointer_traits<__storage_pointer>::pointer_to(__word_[0]), 0); - } - _LIBCUDACXX_INLINE_VISIBILITY iterator end() - { - return iterator(pointer_traits<__storage_pointer>::pointer_to(__word_[0]) + __size_ / __bits_per_word, - static_cast(__size_ % __bits_per_word)); - } + typedef typename _Cp::difference_type difference_type; + typedef typename _Cp::__storage_type __storage_type; + typedef typename _Cp::__storage_pointer __storage_pointer; + typedef typename _Cp::iterator iterator; + static const unsigned __bits_per_word = _Cp::__bits_per_word; + static const unsigned _Np = 4; + + difference_type __size_; + __storage_type __word_[_Np]; + + _LIBCUDACXX_INLINE_VISIBILITY static difference_type capacity() + { + return static_cast(_Np * __bits_per_word); + } + _LIBCUDACXX_INLINE_VISIBILITY explicit __bit_array(difference_type __s) + : __size_(__s) + {} + _LIBCUDACXX_INLINE_VISIBILITY iterator begin() + { + return iterator(pointer_traits<__storage_pointer>::pointer_to(__word_[0]), 0); + } + _LIBCUDACXX_INLINE_VISIBILITY iterator end() + { + return iterator(pointer_traits<__storage_pointer>::pointer_to(__word_[0]) + __size_ / __bits_per_word, + static_cast(__size_ % __bits_per_word)); + } }; template __bit_iterator<_Cp, false> rotate(__bit_iterator<_Cp, false> __first, __bit_iterator<_Cp, false> __middle, __bit_iterator<_Cp, false> __last) { - typedef __bit_iterator<_Cp, false> _I1; - typedef typename _I1::difference_type difference_type; - difference_type __d1 = __middle - __first; - difference_type __d2 = __last - __middle; - _I1 __r = __first + __d2; - while (__d1 != 0 && __d2 != 0) + typedef __bit_iterator<_Cp, false> _I1; + typedef typename _I1::difference_type difference_type; + difference_type __d1 = __middle - __first; + difference_type __d2 = __last - __middle; + _I1 __r = __first + __d2; + while (__d1 != 0 && __d2 != 0) + { + if (__d1 <= __d2) { - if (__d1 <= __d2) - { - if (__d1 <= __bit_array<_Cp>::capacity()) - { - __bit_array<_Cp> __b(__d1); - _CUDA_VSTD::copy(__first, __middle, __b.begin()); - _CUDA_VSTD::copy(__b.begin(), __b.end(), _CUDA_VSTD::copy(__middle, __last, __first)); - break; - } - else - { - __bit_iterator<_Cp, false> __mp = _CUDA_VSTD::swap_ranges(__first, __middle, __middle); - __first = __middle; - __middle = __mp; - __d2 -= __d1; - } - } - else - { - if (__d2 <= __bit_array<_Cp>::capacity()) - { - __bit_array<_Cp> __b(__d2); - _CUDA_VSTD::copy(__middle, __last, __b.begin()); - _CUDA_VSTD::copy_backward(__b.begin(), __b.end(), _CUDA_VSTD::copy_backward(__first, __middle, __last)); - break; - } - else - { - __bit_iterator<_Cp, false> __mp = __first + __d2; - _CUDA_VSTD::swap_ranges(__first, __mp, __middle); - __first = __mp; - __d1 -= __d2; - } - } + if (__d1 <= __bit_array<_Cp>::capacity()) + { + __bit_array<_Cp> __b(__d1); + _CUDA_VSTD::copy(__first, __middle, __b.begin()); + _CUDA_VSTD::copy(__b.begin(), __b.end(), _CUDA_VSTD::copy(__middle, __last, __first)); + break; + } + else + { + __bit_iterator<_Cp, false> __mp = _CUDA_VSTD::swap_ranges(__first, __middle, __middle); + __first = __middle; + __middle = __mp; + __d2 -= __d1; + } + } + else + { + if (__d2 <= __bit_array<_Cp>::capacity()) + { + __bit_array<_Cp> __b(__d2); + _CUDA_VSTD::copy(__middle, __last, __b.begin()); + _CUDA_VSTD::copy_backward(__b.begin(), __b.end(), _CUDA_VSTD::copy_backward(__first, __middle, __last)); + break; + } + else + { + __bit_iterator<_Cp, false> __mp = __first + __d2; + _CUDA_VSTD::swap_ranges(__first, __mp, __middle); + __first = __mp; + __d1 -= __d2; + } } - return __r; + } + return __r; } // equal template -bool -__equal_unaligned(__bit_iterator<_Cp, _IC1> __first1, __bit_iterator<_Cp, _IC1> __last1, - __bit_iterator<_Cp, _IC2> __first2) +bool __equal_unaligned( + __bit_iterator<_Cp, _IC1> __first1, __bit_iterator<_Cp, _IC1> __last1, __bit_iterator<_Cp, _IC2> __first2) { - typedef __bit_iterator<_Cp, _IC1> _It; - typedef typename _It::difference_type difference_type; - typedef typename _It::__storage_type __storage_type; - static const int __bits_per_word = _It::__bits_per_word; - difference_type __n = __last1 - __first1; - if (__n > 0) + typedef __bit_iterator<_Cp, _IC1> _It; + typedef typename _It::difference_type difference_type; + typedef typename _It::__storage_type __storage_type; + static const int __bits_per_word = _It::__bits_per_word; + difference_type __n = __last1 - __first1; + if (__n > 0) + { + // do first word + if (__first1.__ctz_ != 0) { - // do first word - if (__first1.__ctz_ != 0) + unsigned __clz_f = __bits_per_word - __first1.__ctz_; + difference_type __dn = _CUDA_VSTD::min(static_cast(__clz_f), __n); + __n -= __dn; + __storage_type __m = (~__storage_type(0) << __first1.__ctz_) & (~__storage_type(0) >> (__clz_f - __dn)); + __storage_type __b = *__first1.__seg_ & __m; + unsigned __clz_r = __bits_per_word - __first2.__ctz_; + __storage_type __ddn = _CUDA_VSTD::min<__storage_type>(__dn, __clz_r); + __m = (~__storage_type(0) << __first2.__ctz_) & (~__storage_type(0) >> (__clz_r - __ddn)); + if (__first2.__ctz_ > __first1.__ctz_) + { + if ((*__first2.__seg_ & __m) != (__b << (__first2.__ctz_ - __first1.__ctz_))) { - unsigned __clz_f = __bits_per_word - __first1.__ctz_; - difference_type __dn = _CUDA_VSTD::min(static_cast(__clz_f), __n); - __n -= __dn; - __storage_type __m = (~__storage_type(0) << __first1.__ctz_) & (~__storage_type(0) >> (__clz_f - __dn)); - __storage_type __b = *__first1.__seg_ & __m; - unsigned __clz_r = __bits_per_word - __first2.__ctz_; - __storage_type __ddn = _CUDA_VSTD::min<__storage_type>(__dn, __clz_r); - __m = (~__storage_type(0) << __first2.__ctz_) & (~__storage_type(0) >> (__clz_r - __ddn)); - if (__first2.__ctz_ > __first1.__ctz_) - { - if ((*__first2.__seg_ & __m) != (__b << (__first2.__ctz_ - __first1.__ctz_))) - return false; - } - else - { - if ((*__first2.__seg_ & __m) != (__b >> (__first1.__ctz_ - __first2.__ctz_))) - return false; - } - __first2.__seg_ += (__ddn + __first2.__ctz_) / __bits_per_word; - __first2.__ctz_ = static_cast((__ddn + __first2.__ctz_) % __bits_per_word); - __dn -= __ddn; - if (__dn > 0) - { - __m = ~__storage_type(0) >> (__bits_per_word - __dn); - if ((*__first2.__seg_ & __m) != (__b >> (__first1.__ctz_ + __ddn))) - return false; - __first2.__ctz_ = static_cast(__dn); - } - ++__first1.__seg_; - // __first1.__ctz_ = 0; + return false; } - // __first1.__ctz_ == 0; - // do middle words - unsigned __clz_r = __bits_per_word - __first2.__ctz_; - __storage_type __m = ~__storage_type(0) << __first2.__ctz_; - for (; __n >= __bits_per_word; __n -= __bits_per_word, ++__first1.__seg_) + } + else + { + if ((*__first2.__seg_ & __m) != (__b >> (__first1.__ctz_ - __first2.__ctz_))) { - __storage_type __b = *__first1.__seg_; - if ((*__first2.__seg_ & __m) != (__b << __first2.__ctz_)) - return false; - ++__first2.__seg_; - if ((*__first2.__seg_ & ~__m) != (__b >> __clz_r)) - return false; + return false; } - // do last word - if (__n > 0) + } + __first2.__seg_ += (__ddn + __first2.__ctz_) / __bits_per_word; + __first2.__ctz_ = static_cast((__ddn + __first2.__ctz_) % __bits_per_word); + __dn -= __ddn; + if (__dn > 0) + { + __m = ~__storage_type(0) >> (__bits_per_word - __dn); + if ((*__first2.__seg_ & __m) != (__b >> (__first1.__ctz_ + __ddn))) { - __m = ~__storage_type(0) >> (__bits_per_word - __n); - __storage_type __b = *__first1.__seg_ & __m; - __storage_type __dn = _CUDA_VSTD::min(__n, static_cast(__clz_r)); - __m = (~__storage_type(0) << __first2.__ctz_) & (~__storage_type(0) >> (__clz_r - __dn)); - if ((*__first2.__seg_ & __m) != (__b << __first2.__ctz_)) - return false; - __first2.__seg_ += (__dn + __first2.__ctz_) / __bits_per_word; - __first2.__ctz_ = static_cast((__dn + __first2.__ctz_) % __bits_per_word); - __n -= __dn; - if (__n > 0) - { - __m = ~__storage_type(0) >> (__bits_per_word - __n); - if ((*__first2.__seg_ & __m) != (__b >> __dn)) - return false; - } + return false; } + __first2.__ctz_ = static_cast(__dn); + } + ++__first1.__seg_; + // __first1.__ctz_ = 0; } - return true; + // __first1.__ctz_ == 0; + // do middle words + unsigned __clz_r = __bits_per_word - __first2.__ctz_; + __storage_type __m = ~__storage_type(0) << __first2.__ctz_; + for (; __n >= __bits_per_word; __n -= __bits_per_word, ++__first1.__seg_) + { + __storage_type __b = *__first1.__seg_; + if ((*__first2.__seg_ & __m) != (__b << __first2.__ctz_)) + { + return false; + } + ++__first2.__seg_; + if ((*__first2.__seg_ & ~__m) != (__b >> __clz_r)) + { + return false; + } + } + // do last word + if (__n > 0) + { + __m = ~__storage_type(0) >> (__bits_per_word - __n); + __storage_type __b = *__first1.__seg_ & __m; + __storage_type __dn = _CUDA_VSTD::min(__n, static_cast(__clz_r)); + __m = (~__storage_type(0) << __first2.__ctz_) & (~__storage_type(0) >> (__clz_r - __dn)); + if ((*__first2.__seg_ & __m) != (__b << __first2.__ctz_)) + { + return false; + } + __first2.__seg_ += (__dn + __first2.__ctz_) / __bits_per_word; + __first2.__ctz_ = static_cast((__dn + __first2.__ctz_) % __bits_per_word); + __n -= __dn; + if (__n > 0) + { + __m = ~__storage_type(0) >> (__bits_per_word - __n); + if ((*__first2.__seg_ & __m) != (__b >> __dn)) + { + return false; + } + } + } + } + return true; } template -bool -__equal_aligned(__bit_iterator<_Cp, _IC1> __first1, __bit_iterator<_Cp, _IC1> __last1, - __bit_iterator<_Cp, _IC2> __first2) +bool __equal_aligned( + __bit_iterator<_Cp, _IC1> __first1, __bit_iterator<_Cp, _IC1> __last1, __bit_iterator<_Cp, _IC2> __first2) { - typedef __bit_iterator<_Cp, _IC1> _It; - typedef typename _It::difference_type difference_type; - typedef typename _It::__storage_type __storage_type; - static const int __bits_per_word = _It::__bits_per_word; - difference_type __n = __last1 - __first1; + typedef __bit_iterator<_Cp, _IC1> _It; + typedef typename _It::difference_type difference_type; + typedef typename _It::__storage_type __storage_type; + static const int __bits_per_word = _It::__bits_per_word; + difference_type __n = __last1 - __first1; + if (__n > 0) + { + // do first word + if (__first1.__ctz_ != 0) + { + unsigned __clz = __bits_per_word - __first1.__ctz_; + difference_type __dn = _CUDA_VSTD::min(static_cast(__clz), __n); + __n -= __dn; + __storage_type __m = (~__storage_type(0) << __first1.__ctz_) & (~__storage_type(0) >> (__clz - __dn)); + if ((*__first2.__seg_ & __m) != (*__first1.__seg_ & __m)) + { + return false; + } + ++__first2.__seg_; + ++__first1.__seg_; + // __first1.__ctz_ = 0; + // __first2.__ctz_ = 0; + } + // __first1.__ctz_ == 0; + // __first2.__ctz_ == 0; + // do middle words + for (; __n >= __bits_per_word; __n -= __bits_per_word, ++__first1.__seg_, ++__first2.__seg_) + { + if (*__first2.__seg_ != *__first1.__seg_) + { + return false; + } + } + // do last word if (__n > 0) { - // do first word - if (__first1.__ctz_ != 0) - { - unsigned __clz = __bits_per_word - __first1.__ctz_; - difference_type __dn = _CUDA_VSTD::min(static_cast(__clz), __n); - __n -= __dn; - __storage_type __m = (~__storage_type(0) << __first1.__ctz_) & (~__storage_type(0) >> (__clz - __dn)); - if ((*__first2.__seg_ & __m) != (*__first1.__seg_ & __m)) - return false; - ++__first2.__seg_; - ++__first1.__seg_; - // __first1.__ctz_ = 0; - // __first2.__ctz_ = 0; - } - // __first1.__ctz_ == 0; - // __first2.__ctz_ == 0; - // do middle words - for (; __n >= __bits_per_word; __n -= __bits_per_word, ++__first1.__seg_, ++__first2.__seg_) - if (*__first2.__seg_ != *__first1.__seg_) - return false; - // do last word - if (__n > 0) - { - __storage_type __m = ~__storage_type(0) >> (__bits_per_word - __n); - if ((*__first2.__seg_ & __m) != (*__first1.__seg_ & __m)) - return false; - } + __storage_type __m = ~__storage_type(0) >> (__bits_per_word - __n); + if ((*__first2.__seg_ & __m) != (*__first1.__seg_ & __m)) + { + return false; + } } - return true; + } + return true; } template -inline _LIBCUDACXX_INLINE_VISIBILITY -bool +inline _LIBCUDACXX_INLINE_VISIBILITY bool equal(__bit_iterator<_Cp, _IC1> __first1, __bit_iterator<_Cp, _IC1> __last1, __bit_iterator<_Cp, _IC2> __first2) { - if (__first1.__ctz_ == __first2.__ctz_) - return __equal_aligned(__first1, __last1, __first2); - return __equal_unaligned(__first1, __last1, __first2); + if (__first1.__ctz_ == __first2.__ctz_) + { + return __equal_aligned(__first1, __last1, __first2); + } + return __equal_unaligned(__first1, __last1, __first2); } -template +template class __bit_iterator { public: - typedef typename _Cp::difference_type difference_type; - typedef bool value_type; - typedef __bit_iterator pointer; - typedef typename conditional<_IsConst, __bit_const_reference<_Cp>, __bit_reference<_Cp> >::type reference; - typedef random_access_iterator_tag iterator_category; + typedef typename _Cp::difference_type difference_type; + typedef bool value_type; + typedef __bit_iterator pointer; + typedef typename conditional<_IsConst, __bit_const_reference<_Cp>, __bit_reference<_Cp>>::type reference; + typedef random_access_iterator_tag iterator_category; private: - typedef typename _Cp::__storage_type __storage_type; - typedef typename conditional<_IsConst, typename _Cp::__const_storage_pointer, - typename _Cp::__storage_pointer>::type __storage_pointer; - static const unsigned __bits_per_word = _Cp::__bits_per_word; + typedef typename _Cp::__storage_type __storage_type; + typedef typename conditional<_IsConst, typename _Cp::__const_storage_pointer, typename _Cp::__storage_pointer>::type + __storage_pointer; + static const unsigned __bits_per_word = _Cp::__bits_per_word; - __storage_pointer __seg_; - unsigned __ctz_; + __storage_pointer __seg_; + unsigned __ctz_; public: - _LIBCUDACXX_INLINE_VISIBILITY __bit_iterator() noexcept + _LIBCUDACXX_INLINE_VISIBILITY __bit_iterator() noexcept #if _CCCL_STD_VER > 2011 - : __seg_(nullptr), __ctz_(0) + : __seg_(nullptr) + , __ctz_(0) #endif - {} - // avoid re-declaring a copy constructor for the non-const version. - using __type_for_copy_to_const = - _If<_IsConst, __bit_iterator<_Cp, false>, struct __private_nat>; - - _LIBCUDACXX_INLINE_VISIBILITY - __bit_iterator(const __type_for_copy_to_const& __it) noexcept - : __seg_(__it.__seg_), __ctz_(__it.__ctz_) {} - - _LIBCUDACXX_INLINE_VISIBILITY reference operator*() const noexcept - {return reference(__seg_, __storage_type(1) << __ctz_);} - - _LIBCUDACXX_INLINE_VISIBILITY __bit_iterator& operator++() - { - if (__ctz_ != __bits_per_word-1) - ++__ctz_; - else - { - __ctz_ = 0; - ++__seg_; - } - return *this; - } - - _LIBCUDACXX_INLINE_VISIBILITY __bit_iterator operator++(int) + {} + // avoid re-declaring a copy constructor for the non-const version. + using __type_for_copy_to_const = _If<_IsConst, __bit_iterator<_Cp, false>, struct __private_nat>; + + _LIBCUDACXX_INLINE_VISIBILITY __bit_iterator(const __type_for_copy_to_const& __it) noexcept + : __seg_(__it.__seg_) + , __ctz_(__it.__ctz_) + {} + + _LIBCUDACXX_INLINE_VISIBILITY reference operator*() const noexcept + { + return reference(__seg_, __storage_type(1) << __ctz_); + } + + _LIBCUDACXX_INLINE_VISIBILITY __bit_iterator& operator++() + { + if (__ctz_ != __bits_per_word - 1) { - __bit_iterator __tmp = *this; - ++(*this); - return __tmp; + ++__ctz_; } - - _LIBCUDACXX_INLINE_VISIBILITY __bit_iterator& operator--() + else { - if (__ctz_ != 0) - --__ctz_; - else - { - __ctz_ = __bits_per_word - 1; - --__seg_; - } - return *this; + __ctz_ = 0; + ++__seg_; } - - _LIBCUDACXX_INLINE_VISIBILITY __bit_iterator operator--(int) + return *this; + } + + _LIBCUDACXX_INLINE_VISIBILITY __bit_iterator operator++(int) + { + __bit_iterator __tmp = *this; + ++(*this); + return __tmp; + } + + _LIBCUDACXX_INLINE_VISIBILITY __bit_iterator& operator--() + { + if (__ctz_ != 0) { - __bit_iterator __tmp = *this; - --(*this); - return __tmp; + --__ctz_; } - - _LIBCUDACXX_INLINE_VISIBILITY __bit_iterator& operator+=(difference_type __n) - { - if (__n >= 0) - __seg_ += (__n + __ctz_) / __bits_per_word; - else - __seg_ += static_cast(__n - __bits_per_word + __ctz_ + 1) - / static_cast(__bits_per_word); - __n &= (__bits_per_word - 1); - __ctz_ = static_cast((__n + __ctz_) % __bits_per_word); - return *this; - } - - _LIBCUDACXX_INLINE_VISIBILITY __bit_iterator& operator-=(difference_type __n) + else { - return *this += -__n; + __ctz_ = __bits_per_word - 1; + --__seg_; } - - _LIBCUDACXX_INLINE_VISIBILITY __bit_iterator operator+(difference_type __n) const + return *this; + } + + _LIBCUDACXX_INLINE_VISIBILITY __bit_iterator operator--(int) + { + __bit_iterator __tmp = *this; + --(*this); + return __tmp; + } + + _LIBCUDACXX_INLINE_VISIBILITY __bit_iterator& operator+=(difference_type __n) + { + if (__n >= 0) { - __bit_iterator __t(*this); - __t += __n; - return __t; + __seg_ += (__n + __ctz_) / __bits_per_word; } - - _LIBCUDACXX_INLINE_VISIBILITY __bit_iterator operator-(difference_type __n) const + else { - __bit_iterator __t(*this); - __t -= __n; - return __t; + __seg_ += static_cast(__n - __bits_per_word + __ctz_ + 1) + / static_cast(__bits_per_word); } - - _LIBCUDACXX_INLINE_VISIBILITY - friend __bit_iterator operator+(difference_type __n, const __bit_iterator& __it) {return __it + __n;} - - _LIBCUDACXX_INLINE_VISIBILITY - friend difference_type operator-(const __bit_iterator& __x, const __bit_iterator& __y) - {return (__x.__seg_ - __y.__seg_) * __bits_per_word + __x.__ctz_ - __y.__ctz_;} - - _LIBCUDACXX_INLINE_VISIBILITY reference operator[](difference_type __n) const {return *(*this + __n);} - - _LIBCUDACXX_INLINE_VISIBILITY friend bool operator==(const __bit_iterator& __x, const __bit_iterator& __y) - {return __x.__seg_ == __y.__seg_ && __x.__ctz_ == __y.__ctz_;} - - _LIBCUDACXX_INLINE_VISIBILITY friend bool operator!=(const __bit_iterator& __x, const __bit_iterator& __y) - {return !(__x == __y);} - - _LIBCUDACXX_INLINE_VISIBILITY friend bool operator<(const __bit_iterator& __x, const __bit_iterator& __y) - {return __x.__seg_ < __y.__seg_ || (__x.__seg_ == __y.__seg_ && __x.__ctz_ < __y.__ctz_);} - - _LIBCUDACXX_INLINE_VISIBILITY friend bool operator>(const __bit_iterator& __x, const __bit_iterator& __y) - {return __y < __x;} - - _LIBCUDACXX_INLINE_VISIBILITY friend bool operator<=(const __bit_iterator& __x, const __bit_iterator& __y) - {return !(__y < __x);} - - _LIBCUDACXX_INLINE_VISIBILITY friend bool operator>=(const __bit_iterator& __x, const __bit_iterator& __y) - {return !(__x < __y);} + __n &= (__bits_per_word - 1); + __ctz_ = static_cast((__n + __ctz_) % __bits_per_word); + return *this; + } + + _LIBCUDACXX_INLINE_VISIBILITY __bit_iterator& operator-=(difference_type __n) + { + return *this += -__n; + } + + _LIBCUDACXX_INLINE_VISIBILITY __bit_iterator operator+(difference_type __n) const + { + __bit_iterator __t(*this); + __t += __n; + return __t; + } + + _LIBCUDACXX_INLINE_VISIBILITY __bit_iterator operator-(difference_type __n) const + { + __bit_iterator __t(*this); + __t -= __n; + return __t; + } + + _LIBCUDACXX_INLINE_VISIBILITY friend __bit_iterator operator+(difference_type __n, const __bit_iterator& __it) + { + return __it + __n; + } + + _LIBCUDACXX_INLINE_VISIBILITY friend difference_type operator-(const __bit_iterator& __x, const __bit_iterator& __y) + { + return (__x.__seg_ - __y.__seg_) * __bits_per_word + __x.__ctz_ - __y.__ctz_; + } + + _LIBCUDACXX_INLINE_VISIBILITY reference operator[](difference_type __n) const + { + return *(*this + __n); + } + + _LIBCUDACXX_INLINE_VISIBILITY friend bool operator==(const __bit_iterator& __x, const __bit_iterator& __y) + { + return __x.__seg_ == __y.__seg_ && __x.__ctz_ == __y.__ctz_; + } + + _LIBCUDACXX_INLINE_VISIBILITY friend bool operator!=(const __bit_iterator& __x, const __bit_iterator& __y) + { + return !(__x == __y); + } + + _LIBCUDACXX_INLINE_VISIBILITY friend bool operator<(const __bit_iterator& __x, const __bit_iterator& __y) + { + return __x.__seg_ < __y.__seg_ || (__x.__seg_ == __y.__seg_ && __x.__ctz_ < __y.__ctz_); + } + + _LIBCUDACXX_INLINE_VISIBILITY friend bool operator>(const __bit_iterator& __x, const __bit_iterator& __y) + { + return __y < __x; + } + + _LIBCUDACXX_INLINE_VISIBILITY friend bool operator<=(const __bit_iterator& __x, const __bit_iterator& __y) + { + return !(__y < __x); + } + + _LIBCUDACXX_INLINE_VISIBILITY friend bool operator>=(const __bit_iterator& __x, const __bit_iterator& __y) + { + return !(__x < __y); + } private: - _LIBCUDACXX_INLINE_VISIBILITY - __bit_iterator(__storage_pointer __s, unsigned __ctz) noexcept - : __seg_(__s), __ctz_(__ctz) {} - - friend typename _Cp::__self; - - friend class __bit_reference<_Cp>; - friend class __bit_const_reference<_Cp>; - friend class __bit_iterator<_Cp, true>; - template friend struct __bit_array; - template friend void __fill_n_false(__bit_iterator<_Dp, false> __first, typename _Dp::size_type __n); - template friend void __fill_n_true(__bit_iterator<_Dp, false> __first, typename _Dp::size_type __n); - template friend __bit_iterator<_Dp, false> __copy_aligned(__bit_iterator<_Dp, _IC> __first, - __bit_iterator<_Dp, _IC> __last, - __bit_iterator<_Dp, false> __result); - template friend __bit_iterator<_Dp, false> __copy_unaligned(__bit_iterator<_Dp, _IC> __first, - __bit_iterator<_Dp, _IC> __last, - __bit_iterator<_Dp, false> __result); - template friend __bit_iterator<_Dp, false> copy(__bit_iterator<_Dp, _IC> __first, - __bit_iterator<_Dp, _IC> __last, - __bit_iterator<_Dp, false> __result); - template friend __bit_iterator<_Dp, false> __copy_backward_aligned(__bit_iterator<_Dp, _IC> __first, - __bit_iterator<_Dp, _IC> __last, - __bit_iterator<_Dp, false> __result); - template friend __bit_iterator<_Dp, false> __copy_backward_unaligned(__bit_iterator<_Dp, _IC> __first, - __bit_iterator<_Dp, _IC> __last, - __bit_iterator<_Dp, false> __result); - template friend __bit_iterator<_Dp, false> copy_backward(__bit_iterator<_Dp, _IC> __first, - __bit_iterator<_Dp, _IC> __last, - __bit_iterator<_Dp, false> __result); - template friend __bit_iterator<__C2, false> __swap_ranges_aligned(__bit_iterator<__C1, false>, - __bit_iterator<__C1, false>, - __bit_iterator<__C2, false>); - template friend __bit_iterator<__C2, false> __swap_ranges_unaligned(__bit_iterator<__C1, false>, - __bit_iterator<__C1, false>, - __bit_iterator<__C2, false>); - template friend __bit_iterator<__C2, false> swap_ranges(__bit_iterator<__C1, false>, - __bit_iterator<__C1, false>, - __bit_iterator<__C2, false>); - template friend __bit_iterator<_Dp, false> rotate(__bit_iterator<_Dp, false>, - __bit_iterator<_Dp, false>, - __bit_iterator<_Dp, false>); - template friend bool __equal_aligned(__bit_iterator<_Dp, _IC1>, - __bit_iterator<_Dp, _IC1>, - __bit_iterator<_Dp, _IC2>); - template friend bool __equal_unaligned(__bit_iterator<_Dp, _IC1>, - __bit_iterator<_Dp, _IC1>, - __bit_iterator<_Dp, _IC2>); - template friend bool equal(__bit_iterator<_Dp, _IC1>, - __bit_iterator<_Dp, _IC1>, - __bit_iterator<_Dp, _IC2>); - template friend __bit_iterator<_Dp, _IC> __find_bool_true(__bit_iterator<_Dp, _IC>, - typename _Dp::size_type); - template friend __bit_iterator<_Dp, _IC> __find_bool_false(__bit_iterator<_Dp, _IC>, - typename _Dp::size_type); - template friend typename __bit_iterator<_Dp, _IC>::difference_type - __count_bool_true(__bit_iterator<_Dp, _IC>, typename _Dp::size_type); - template friend typename __bit_iterator<_Dp, _IC>::difference_type - __count_bool_false(__bit_iterator<_Dp, _IC>, typename _Dp::size_type); + _LIBCUDACXX_INLINE_VISIBILITY __bit_iterator(__storage_pointer __s, unsigned __ctz) noexcept + : __seg_(__s) + , __ctz_(__ctz) + {} + + friend typename _Cp::__self; + + friend class __bit_reference<_Cp>; + friend class __bit_const_reference<_Cp>; + friend class __bit_iterator<_Cp, true>; + template + friend struct __bit_array; + template + friend void __fill_n_false(__bit_iterator<_Dp, false> __first, typename _Dp::size_type __n); + template + friend void __fill_n_true(__bit_iterator<_Dp, false> __first, typename _Dp::size_type __n); + template + friend __bit_iterator<_Dp, false> __copy_aligned( + __bit_iterator<_Dp, _IC> __first, __bit_iterator<_Dp, _IC> __last, __bit_iterator<_Dp, false> __result); + template + friend __bit_iterator<_Dp, false> __copy_unaligned( + __bit_iterator<_Dp, _IC> __first, __bit_iterator<_Dp, _IC> __last, __bit_iterator<_Dp, false> __result); + template + friend __bit_iterator<_Dp, false> + copy(__bit_iterator<_Dp, _IC> __first, __bit_iterator<_Dp, _IC> __last, __bit_iterator<_Dp, false> __result); + template + friend __bit_iterator<_Dp, false> __copy_backward_aligned( + __bit_iterator<_Dp, _IC> __first, __bit_iterator<_Dp, _IC> __last, __bit_iterator<_Dp, false> __result); + template + friend __bit_iterator<_Dp, false> __copy_backward_unaligned( + __bit_iterator<_Dp, _IC> __first, __bit_iterator<_Dp, _IC> __last, __bit_iterator<_Dp, false> __result); + template + friend __bit_iterator<_Dp, false> + copy_backward(__bit_iterator<_Dp, _IC> __first, __bit_iterator<_Dp, _IC> __last, __bit_iterator<_Dp, false> __result); + template + friend __bit_iterator<__C2, false> + __swap_ranges_aligned(__bit_iterator<__C1, false>, __bit_iterator<__C1, false>, __bit_iterator<__C2, false>); + template + friend __bit_iterator<__C2, false> + __swap_ranges_unaligned(__bit_iterator<__C1, false>, __bit_iterator<__C1, false>, __bit_iterator<__C2, false>); + template + friend __bit_iterator<__C2, false> + swap_ranges(__bit_iterator<__C1, false>, __bit_iterator<__C1, false>, __bit_iterator<__C2, false>); + template + friend __bit_iterator<_Dp, false> + rotate(__bit_iterator<_Dp, false>, __bit_iterator<_Dp, false>, __bit_iterator<_Dp, false>); + template + friend bool __equal_aligned(__bit_iterator<_Dp, _IC1>, __bit_iterator<_Dp, _IC1>, __bit_iterator<_Dp, _IC2>); + template + friend bool __equal_unaligned(__bit_iterator<_Dp, _IC1>, __bit_iterator<_Dp, _IC1>, __bit_iterator<_Dp, _IC2>); + template + friend bool equal(__bit_iterator<_Dp, _IC1>, __bit_iterator<_Dp, _IC1>, __bit_iterator<_Dp, _IC2>); + template + friend __bit_iterator<_Dp, _IC> __find_bool_true(__bit_iterator<_Dp, _IC>, typename _Dp::size_type); + template + friend __bit_iterator<_Dp, _IC> __find_bool_false(__bit_iterator<_Dp, _IC>, typename _Dp::size_type); + template + friend typename __bit_iterator<_Dp, _IC>::difference_type + __count_bool_true(__bit_iterator<_Dp, _IC>, typename _Dp::size_type); + template + friend typename __bit_iterator<_Dp, _IC>::difference_type + __count_bool_false(__bit_iterator<_Dp, _IC>, typename _Dp::size_type); }; _LIBCUDACXX_END_NAMESPACE_STD _LIBCUDACXX_POP_MACROS -#endif // _LIBCUDACXX___BIT_REFERENCE +#endif // _LIBCUDACXX___BIT_REFERENCE diff --git a/libcudacxx/include/cuda/std/detail/libcxx/include/__pragma_pop b/libcudacxx/include/cuda/std/detail/libcxx/include/__pragma_pop index 27a9a68b4e6..5bd85a09940 100644 --- a/libcudacxx/include/cuda/std/detail/libcxx/include/__pragma_pop +++ b/libcudacxx/include/cuda/std/detail/libcxx/include/__pragma_pop @@ -8,9 +8,9 @@ //===----------------------------------------------------------------------===// #if defined(_LIBCUDACXX_USE_PRAGMA_MSVC_WARNING) - #pragma warning(pop) +# pragma warning(pop) #endif #if defined(_LIBCUDACXX_POP_MACROS) - _LIBCUDACXX_POP_MACROS +_LIBCUDACXX_POP_MACROS #endif diff --git a/libcudacxx/include/cuda/std/detail/libcxx/include/atomic b/libcudacxx/include/cuda/std/detail/libcxx/include/atomic index 298b69726f9..2d0a2e56af6 100644 --- a/libcudacxx/include/cuda/std/detail/libcxx/include/atomic +++ b/libcudacxx/include/cuda/std/detail/libcxx/include/atomic @@ -556,9 +556,6 @@ void atomic_signal_fence(memory_order m) noexcept; # pragma system_header #endif // no system header -#include // all public C++ headers provide the assertion handler -#include -#include #include #include #include @@ -568,42 +565,42 @@ void atomic_signal_fence(memory_order m) noexcept; #include #include #include -#include #include #include +#include // all public C++ headers provide the assertion handler +#include +#include +#include #include #include #include #ifdef _LIBCUDACXX_HAS_NO_THREADS -# error is not supported on this single threaded system +# error is not supported on this single threaded system #endif #ifdef _LIBCUDACXX_HAS_NO_ATOMIC_HEADER -# error is not implemented +# error is not implemented #endif #ifdef _LIBCUDACXX_UNSUPPORTED_THREAD_API -# error " is not supported on this system" +# error " is not supported on this system" #endif #ifdef kill_dependency -# error C++ standard library is incompatible with +# error C++ standard library is incompatible with #endif -#define _LIBCUDACXX_CHECK_STORE_MEMORY_ORDER(__m) \ - _LIBCUDACXX_DIAGNOSE_WARNING(__m == memory_order_consume || \ - __m == memory_order_acquire || \ - __m == memory_order_acq_rel, \ - "memory order argument to atomic operation is invalid") +#define _LIBCUDACXX_CHECK_STORE_MEMORY_ORDER(__m) \ + _LIBCUDACXX_DIAGNOSE_WARNING( \ + __m == memory_order_consume || __m == memory_order_acquire || __m == memory_order_acq_rel, \ + "memory order argument to atomic operation is invalid") -#define _LIBCUDACXX_CHECK_LOAD_MEMORY_ORDER(__m) \ - _LIBCUDACXX_DIAGNOSE_WARNING(__m == memory_order_release || \ - __m == memory_order_acq_rel, \ - "memory order argument to atomic operation is invalid") +#define _LIBCUDACXX_CHECK_LOAD_MEMORY_ORDER(__m) \ + _LIBCUDACXX_DIAGNOSE_WARNING(__m == memory_order_release || __m == memory_order_acq_rel, \ + "memory order argument to atomic operation is invalid") -#define _LIBCUDACXX_CHECK_EXCHANGE_MEMORY_ORDER(__m, __f) \ - _LIBCUDACXX_DIAGNOSE_WARNING(__f == memory_order_release || \ - __f == memory_order_acq_rel, \ - "memory order argument to atomic operation is invalid") +#define _LIBCUDACXX_CHECK_EXCHANGE_MEMORY_ORDER(__m, __f) \ + _LIBCUDACXX_DIAGNOSE_WARNING(__f == memory_order_release || __f == memory_order_acq_rel, \ + "memory order argument to atomic operation is invalid") #if defined(_LIBCUDACXX_HAS_MSVC_ATOMIC_IMPL) # include @@ -614,25 +611,25 @@ void atomic_signal_fence(memory_order m) noexcept; #endif #if !defined(__CLANG_ATOMIC_BOOL_LOCK_FREE) && !defined(__GCC_ATOMIC_BOOL_LOCK_FREE) -#define ATOMIC_BOOL_LOCK_FREE 2 -#define ATOMIC_CHAR_LOCK_FREE 2 -#define ATOMIC_CHAR16_T_LOCK_FREE 2 -#define ATOMIC_CHAR32_T_LOCK_FREE 2 -#define ATOMIC_WCHAR_T_LOCK_FREE 2 -#define ATOMIC_SHORT_LOCK_FREE 2 -#define ATOMIC_INT_LOCK_FREE 2 -#define ATOMIC_LONG_LOCK_FREE 2 -#define ATOMIC_LLONG_LOCK_FREE 2 -#define ATOMIC_POINTER_LOCK_FREE 2 -#endif //!defined(__CLANG_ATOMIC_BOOL_LOCK_FREE) && !defined(__GCC_ATOMIC_BOOL_LOCK_FREE) +# define ATOMIC_BOOL_LOCK_FREE 2 +# define ATOMIC_CHAR_LOCK_FREE 2 +# define ATOMIC_CHAR16_T_LOCK_FREE 2 +# define ATOMIC_CHAR32_T_LOCK_FREE 2 +# define ATOMIC_WCHAR_T_LOCK_FREE 2 +# define ATOMIC_SHORT_LOCK_FREE 2 +# define ATOMIC_INT_LOCK_FREE 2 +# define ATOMIC_LONG_LOCK_FREE 2 +# define ATOMIC_LLONG_LOCK_FREE 2 +# define ATOMIC_POINTER_LOCK_FREE 2 +#endif //! defined(__CLANG_ATOMIC_BOOL_LOCK_FREE) && !defined(__GCC_ATOMIC_BOOL_LOCK_FREE) #ifndef __ATOMIC_RELAXED -#define __ATOMIC_RELAXED 0 -#define __ATOMIC_CONSUME 1 -#define __ATOMIC_ACQUIRE 2 -#define __ATOMIC_RELEASE 3 -#define __ATOMIC_ACQ_REL 4 -#define __ATOMIC_SEQ_CST 5 +# define __ATOMIC_RELAXED 0 +# define __ATOMIC_CONSUME 1 +# define __ATOMIC_ACQUIRE 2 +# define __ATOMIC_RELEASE 3 +# define __ATOMIC_ACQ_REL 4 +# define __ATOMIC_SEQ_CST 5 #endif //__ATOMIC_RELAXED _LIBCUDACXX_BEGIN_NAMESPACE_STD @@ -640,20 +637,22 @@ _LIBCUDACXX_BEGIN_NAMESPACE_STD // Figure out what the underlying type for `memory_order` would be if it were // declared as an unscoped enum (accounting for -fshort-enums). Use this result // to pin the underlying type in C++20. -enum __legacy_memory_order { - __mo_relaxed, - __mo_consume, - __mo_acquire, - __mo_release, - __mo_acq_rel, - __mo_seq_cst +enum __legacy_memory_order +{ + __mo_relaxed, + __mo_consume, + __mo_acquire, + __mo_release, + __mo_acq_rel, + __mo_seq_cst }; typedef underlying_type<__legacy_memory_order>::type __memory_order_underlying_t; #if _CCCL_STD_VER > 2017 -enum class memory_order : __memory_order_underlying_t { +enum class memory_order : __memory_order_underlying_t +{ relaxed = __mo_relaxed, consume = __mo_consume, acquire = __mo_acquire, @@ -671,7 +670,8 @@ inline constexpr auto memory_order_seq_cst = memory_order::seq_cst; #else -typedef enum memory_order { +typedef enum memory_order +{ memory_order_relaxed = __mo_relaxed, memory_order_consume = __mo_consume, memory_order_acquire = __mo_acquire, @@ -682,43 +682,48 @@ typedef enum memory_order { #endif // _CCCL_STD_VER > 2017 -template _LIBCUDACXX_INLINE_VISIBILITY -bool __cxx_nonatomic_compare_equal(_Tp const& __lhs, _Tp const& __rhs) { +template +_LIBCUDACXX_INLINE_VISIBILITY bool __cxx_nonatomic_compare_equal(_Tp const& __lhs, _Tp const& __rhs) +{ #if defined(_CCCL_CUDA_COMPILER) - return __lhs == __rhs; + return __lhs == __rhs; #else - return memcmp(&__lhs, &__rhs, sizeof(_Tp)) == 0; + return memcmp(&__lhs, &__rhs, sizeof(_Tp)) == 0; #endif } static_assert((is_same::type, __memory_order_underlying_t>::value), - "unexpected underlying type for std::memory_order"); + "unexpected underlying type for std::memory_order"); -#if defined(_LIBCUDACXX_HAS_GCC_ATOMIC_IMP) || \ - defined(_LIBCUDACXX_ATOMIC_ONLY_USE_BUILTINS) +#if defined(_LIBCUDACXX_HAS_GCC_ATOMIC_IMP) || defined(_LIBCUDACXX_ATOMIC_ONLY_USE_BUILTINS) // [atomics.types.generic]p1 guarantees _Tp is trivially copyable. Because // the default operator= in an object is not volatile, a byte-by-byte copy // is required. -template _LIBCUDACXX_INLINE_VISIBILITY -__enable_if_t::value> -__cxx_atomic_assign_volatile(_Tp& __a_value, _Tv const& __val) { +template +_LIBCUDACXX_INLINE_VISIBILITY __enable_if_t::value> +__cxx_atomic_assign_volatile(_Tp& __a_value, _Tv const& __val) +{ __a_value = __val; } -template _LIBCUDACXX_INLINE_VISIBILITY -__enable_if_t::value> -__cxx_atomic_assign_volatile(_Tp volatile& __a_value, _Tv volatile const& __val) { - volatile char* __to = reinterpret_cast(&__a_value); - volatile char* __end = __to + sizeof(_Tp); +template +_LIBCUDACXX_INLINE_VISIBILITY __enable_if_t::value> +__cxx_atomic_assign_volatile(_Tp volatile& __a_value, _Tv volatile const& __val) +{ + volatile char* __to = reinterpret_cast(&__a_value); + volatile char* __end = __to + sizeof(_Tp); volatile const char* __from = reinterpret_cast(&__val); while (__to != __end) + { *__to++ = *__from++; + } } #endif // Headers are wrapped like so: (cuda::std::|std::)detail -namespace __detail { +namespace __detail +{ #if defined(_LIBCUDACXX_HAS_CUDA_ATOMIC_EXT) # include #endif @@ -733,91 +738,98 @@ namespace __detail { // TODO: Maybe support C11 atomics? // #include #endif // _LIBCUDACXX_HAS_GCC_ATOMIC_IMP, _LIBCUDACXX_HAS_C_ATOMIC_IMP -} +} // namespace __detail using __detail::__cxx_atomic_base_impl; -using __detail::__cxx_atomic_ref_base_impl; -using __detail::__cxx_atomic_thread_fence; -using __detail::__cxx_atomic_signal_fence; -using __detail::__cxx_atomic_load; -using __detail::__cxx_atomic_store; -using __detail::__cxx_atomic_exchange; -using __detail::__cxx_atomic_compare_exchange_weak; using __detail::__cxx_atomic_compare_exchange_strong; +using __detail::__cxx_atomic_compare_exchange_weak; +using __detail::__cxx_atomic_exchange; using __detail::__cxx_atomic_fetch_add; -using __detail::__cxx_atomic_fetch_sub; -using __detail::__cxx_atomic_fetch_or; using __detail::__cxx_atomic_fetch_and; +using __detail::__cxx_atomic_fetch_or; +using __detail::__cxx_atomic_fetch_sub; using __detail::__cxx_atomic_fetch_xor; +using __detail::__cxx_atomic_load; +using __detail::__cxx_atomic_ref_base_impl; +using __detail::__cxx_atomic_signal_fence; +using __detail::__cxx_atomic_store; +using __detail::__cxx_atomic_thread_fence; template -_LIBCUDACXX_INLINE_VISIBILITY -_Tp kill_dependency(_Tp __y) noexcept +_LIBCUDACXX_INLINE_VISIBILITY _Tp kill_dependency(_Tp __y) noexcept { - return __y; + return __y; } #if defined(__CLANG_ATOMIC_BOOL_LOCK_FREE) -# define ATOMIC_BOOL_LOCK_FREE __CLANG_ATOMIC_BOOL_LOCK_FREE -# define ATOMIC_CHAR_LOCK_FREE __CLANG_ATOMIC_CHAR_LOCK_FREE -# define ATOMIC_CHAR16_T_LOCK_FREE __CLANG_ATOMIC_CHAR16_T_LOCK_FREE -# define ATOMIC_CHAR32_T_LOCK_FREE __CLANG_ATOMIC_CHAR32_T_LOCK_FREE -# define ATOMIC_WCHAR_T_LOCK_FREE __CLANG_ATOMIC_WCHAR_T_LOCK_FREE -# define ATOMIC_SHORT_LOCK_FREE __CLANG_ATOMIC_SHORT_LOCK_FREE -# define ATOMIC_INT_LOCK_FREE __CLANG_ATOMIC_INT_LOCK_FREE -# define ATOMIC_LONG_LOCK_FREE __CLANG_ATOMIC_LONG_LOCK_FREE -# define ATOMIC_LLONG_LOCK_FREE __CLANG_ATOMIC_LLONG_LOCK_FREE -# define ATOMIC_POINTER_LOCK_FREE __CLANG_ATOMIC_POINTER_LOCK_FREE +# define ATOMIC_BOOL_LOCK_FREE __CLANG_ATOMIC_BOOL_LOCK_FREE +# define ATOMIC_CHAR_LOCK_FREE __CLANG_ATOMIC_CHAR_LOCK_FREE +# define ATOMIC_CHAR16_T_LOCK_FREE __CLANG_ATOMIC_CHAR16_T_LOCK_FREE +# define ATOMIC_CHAR32_T_LOCK_FREE __CLANG_ATOMIC_CHAR32_T_LOCK_FREE +# define ATOMIC_WCHAR_T_LOCK_FREE __CLANG_ATOMIC_WCHAR_T_LOCK_FREE +# define ATOMIC_SHORT_LOCK_FREE __CLANG_ATOMIC_SHORT_LOCK_FREE +# define ATOMIC_INT_LOCK_FREE __CLANG_ATOMIC_INT_LOCK_FREE +# define ATOMIC_LONG_LOCK_FREE __CLANG_ATOMIC_LONG_LOCK_FREE +# define ATOMIC_LLONG_LOCK_FREE __CLANG_ATOMIC_LLONG_LOCK_FREE +# define ATOMIC_POINTER_LOCK_FREE __CLANG_ATOMIC_POINTER_LOCK_FREE #elif defined(__GCC_ATOMIC_BOOL_LOCK_FREE) -# define ATOMIC_BOOL_LOCK_FREE __GCC_ATOMIC_BOOL_LOCK_FREE -# define ATOMIC_CHAR_LOCK_FREE __GCC_ATOMIC_CHAR_LOCK_FREE -# define ATOMIC_CHAR16_T_LOCK_FREE __GCC_ATOMIC_CHAR16_T_LOCK_FREE -# define ATOMIC_CHAR32_T_LOCK_FREE __GCC_ATOMIC_CHAR32_T_LOCK_FREE -# define ATOMIC_WCHAR_T_LOCK_FREE __GCC_ATOMIC_WCHAR_T_LOCK_FREE -# define ATOMIC_SHORT_LOCK_FREE __GCC_ATOMIC_SHORT_LOCK_FREE -# define ATOMIC_INT_LOCK_FREE __GCC_ATOMIC_INT_LOCK_FREE -# define ATOMIC_LONG_LOCK_FREE __GCC_ATOMIC_LONG_LOCK_FREE -# define ATOMIC_LLONG_LOCK_FREE __GCC_ATOMIC_LLONG_LOCK_FREE -# define ATOMIC_POINTER_LOCK_FREE __GCC_ATOMIC_POINTER_LOCK_FREE +# define ATOMIC_BOOL_LOCK_FREE __GCC_ATOMIC_BOOL_LOCK_FREE +# define ATOMIC_CHAR_LOCK_FREE __GCC_ATOMIC_CHAR_LOCK_FREE +# define ATOMIC_CHAR16_T_LOCK_FREE __GCC_ATOMIC_CHAR16_T_LOCK_FREE +# define ATOMIC_CHAR32_T_LOCK_FREE __GCC_ATOMIC_CHAR32_T_LOCK_FREE +# define ATOMIC_WCHAR_T_LOCK_FREE __GCC_ATOMIC_WCHAR_T_LOCK_FREE +# define ATOMIC_SHORT_LOCK_FREE __GCC_ATOMIC_SHORT_LOCK_FREE +# define ATOMIC_INT_LOCK_FREE __GCC_ATOMIC_INT_LOCK_FREE +# define ATOMIC_LONG_LOCK_FREE __GCC_ATOMIC_LONG_LOCK_FREE +# define ATOMIC_LLONG_LOCK_FREE __GCC_ATOMIC_LLONG_LOCK_FREE +# define ATOMIC_POINTER_LOCK_FREE __GCC_ATOMIC_POINTER_LOCK_FREE #endif #ifdef _LIBCUDACXX_ATOMIC_ONLY_USE_BUILTINS -template -struct __cxx_atomic_lock_impl { - - _LIBCUDACXX_INLINE_VISIBILITY - __cxx_atomic_lock_impl() noexcept - : __a_value(), __a_lock(0) {} - _LIBCUDACXX_INLINE_VISIBILITY constexpr explicit - __cxx_atomic_lock_impl(_Tp value) noexcept - : __a_value(value), __a_lock(0) {} +template +struct __cxx_atomic_lock_impl +{ + _LIBCUDACXX_INLINE_VISIBILITY __cxx_atomic_lock_impl() noexcept + : __a_value() + , __a_lock(0) + {} + _LIBCUDACXX_INLINE_VISIBILITY constexpr explicit __cxx_atomic_lock_impl(_Tp value) noexcept + : __a_value(value) + , __a_lock(0) + {} _Tp __a_value; mutable __cxx_atomic_base_impl<_LIBCUDACXX_ATOMIC_FLAG_TYPE, _Sco> __a_lock; - _LIBCUDACXX_INLINE_VISIBILITY void __lock() const volatile { - while(1 == __cxx_atomic_exchange(&__a_lock, _LIBCUDACXX_ATOMIC_FLAG_TYPE(true), memory_order_acquire)) - /*spin*/; + _LIBCUDACXX_INLINE_VISIBILITY void __lock() const volatile + { + while (1 == __cxx_atomic_exchange(&__a_lock, _LIBCUDACXX_ATOMIC_FLAG_TYPE(true), memory_order_acquire)) + /*spin*/; } - _LIBCUDACXX_INLINE_VISIBILITY void __lock() const { - while(1 == __cxx_atomic_exchange(&__a_lock, _LIBCUDACXX_ATOMIC_FLAG_TYPE(true), memory_order_acquire)) - /*spin*/; + _LIBCUDACXX_INLINE_VISIBILITY void __lock() const + { + while (1 == __cxx_atomic_exchange(&__a_lock, _LIBCUDACXX_ATOMIC_FLAG_TYPE(true), memory_order_acquire)) + /*spin*/; } - _LIBCUDACXX_INLINE_VISIBILITY void __unlock() const volatile { + _LIBCUDACXX_INLINE_VISIBILITY void __unlock() const volatile + { __cxx_atomic_store(&__a_lock, _LIBCUDACXX_ATOMIC_FLAG_TYPE(false), memory_order_release); } - _LIBCUDACXX_INLINE_VISIBILITY void __unlock() const { + _LIBCUDACXX_INLINE_VISIBILITY void __unlock() const + { __cxx_atomic_store(&__a_lock, _LIBCUDACXX_ATOMIC_FLAG_TYPE(false), memory_order_release); } - _LIBCUDACXX_INLINE_VISIBILITY _Tp __read() const volatile { + _LIBCUDACXX_INLINE_VISIBILITY _Tp __read() const volatile + { __lock(); _Tp __old; __cxx_atomic_assign_volatile(__old, __a_value); __unlock(); return __old; } - _LIBCUDACXX_INLINE_VISIBILITY _Tp __read() const { + _LIBCUDACXX_INLINE_VISIBILITY _Tp __read() const + { __lock(); _Tp __old = __a_value; __unlock(); @@ -826,45 +838,47 @@ struct __cxx_atomic_lock_impl { }; template -_LIBCUDACXX_INLINE_VISIBILITY -void __cxx_atomic_init(volatile __cxx_atomic_lock_impl<_Tp, _Sco>* __a, _Tp __val) { +_LIBCUDACXX_INLINE_VISIBILITY void __cxx_atomic_init(volatile __cxx_atomic_lock_impl<_Tp, _Sco>* __a, _Tp __val) +{ __cxx_atomic_assign_volatile(__a->__a_value, __val); } template -_LIBCUDACXX_INLINE_VISIBILITY -void __cxx_atomic_init(__cxx_atomic_lock_impl<_Tp, _Sco>* __a, _Tp __val) { +_LIBCUDACXX_INLINE_VISIBILITY void __cxx_atomic_init(__cxx_atomic_lock_impl<_Tp, _Sco>* __a, _Tp __val) +{ __a->__a_value = __val; } template -_LIBCUDACXX_INLINE_VISIBILITY -void __cxx_atomic_store(volatile __cxx_atomic_lock_impl<_Tp, _Sco>* __a, _Tp __val, memory_order) { +_LIBCUDACXX_INLINE_VISIBILITY void +__cxx_atomic_store(volatile __cxx_atomic_lock_impl<_Tp, _Sco>* __a, _Tp __val, memory_order) +{ __a->__lock(); __cxx_atomic_assign_volatile(__a->__a_value, __val); __a->__unlock(); } template -_LIBCUDACXX_INLINE_VISIBILITY -void __cxx_atomic_store(__cxx_atomic_lock_impl<_Tp, _Sco>* __a, _Tp __val, memory_order) { +_LIBCUDACXX_INLINE_VISIBILITY void __cxx_atomic_store(__cxx_atomic_lock_impl<_Tp, _Sco>* __a, _Tp __val, memory_order) +{ __a->__lock(); __a->__a_value = __val; __a->__unlock(); } template -_LIBCUDACXX_INLINE_VISIBILITY -_Tp __cxx_atomic_load(const volatile __cxx_atomic_lock_impl<_Tp, _Sco>* __a, memory_order) { +_LIBCUDACXX_INLINE_VISIBILITY _Tp __cxx_atomic_load(const volatile __cxx_atomic_lock_impl<_Tp, _Sco>* __a, memory_order) +{ return __a->__read(); } template -_LIBCUDACXX_INLINE_VISIBILITY -_Tp __cxx_atomic_load(const __cxx_atomic_lock_impl<_Tp, _Sco>* __a, memory_order) { +_LIBCUDACXX_INLINE_VISIBILITY _Tp __cxx_atomic_load(const __cxx_atomic_lock_impl<_Tp, _Sco>* __a, memory_order) +{ return __a->__read(); } template -_LIBCUDACXX_INLINE_VISIBILITY -_Tp __cxx_atomic_exchange(volatile __cxx_atomic_lock_impl<_Tp, _Sco>* __a, _Tp __value, memory_order) { +_LIBCUDACXX_INLINE_VISIBILITY _Tp +__cxx_atomic_exchange(volatile __cxx_atomic_lock_impl<_Tp, _Sco>* __a, _Tp __value, memory_order) +{ __a->__lock(); _Tp __old; __cxx_atomic_assign_volatile(__old, __a->__a_value); @@ -873,77 +887,94 @@ _Tp __cxx_atomic_exchange(volatile __cxx_atomic_lock_impl<_Tp, _Sco>* __a, _Tp _ return __old; } template -_LIBCUDACXX_INLINE_VISIBILITY -_Tp __cxx_atomic_exchange(__cxx_atomic_lock_impl<_Tp, _Sco>* __a, _Tp __value, memory_order) { +_LIBCUDACXX_INLINE_VISIBILITY _Tp +__cxx_atomic_exchange(__cxx_atomic_lock_impl<_Tp, _Sco>* __a, _Tp __value, memory_order) +{ __a->__lock(); - _Tp __old = __a->__a_value; + _Tp __old = __a->__a_value; __a->__a_value = __value; __a->__unlock(); return __old; } template -_LIBCUDACXX_INLINE_VISIBILITY -bool __cxx_atomic_compare_exchange_strong(volatile __cxx_atomic_lock_impl<_Tp, _Sco>* __a, - _Tp* __expected, _Tp __value, memory_order, memory_order) { +_LIBCUDACXX_INLINE_VISIBILITY bool __cxx_atomic_compare_exchange_strong( + volatile __cxx_atomic_lock_impl<_Tp, _Sco>* __a, _Tp* __expected, _Tp __value, memory_order, memory_order) +{ __a->__lock(); _Tp __temp; __cxx_atomic_assign_volatile(__temp, __a->__a_value); bool __ret = __temp == *__expected; - if(__ret) + if (__ret) + { __cxx_atomic_assign_volatile(__a->__a_value, __value); + } else + { __cxx_atomic_assign_volatile(*__expected, __a->__a_value); + } __a->__unlock(); return __ret; } template -_LIBCUDACXX_INLINE_VISIBILITY -bool __cxx_atomic_compare_exchange_strong(__cxx_atomic_lock_impl<_Tp, _Sco>* __a, - _Tp* __expected, _Tp __value, memory_order, memory_order) { +_LIBCUDACXX_INLINE_VISIBILITY bool __cxx_atomic_compare_exchange_strong( + __cxx_atomic_lock_impl<_Tp, _Sco>* __a, _Tp* __expected, _Tp __value, memory_order, memory_order) +{ __a->__lock(); bool __ret = __a->__a_value == *__expected; - if(__ret) + if (__ret) + { __a->__a_value = __value; + } else + { *__expected = __a->__a_value; + } __a->__unlock(); return __ret; } template -_LIBCUDACXX_INLINE_VISIBILITY -bool __cxx_atomic_compare_exchange_weak(volatile __cxx_atomic_lock_impl<_Tp, _Sco>* __a, - _Tp* __expected, _Tp __value, memory_order, memory_order) { +_LIBCUDACXX_INLINE_VISIBILITY bool __cxx_atomic_compare_exchange_weak( + volatile __cxx_atomic_lock_impl<_Tp, _Sco>* __a, _Tp* __expected, _Tp __value, memory_order, memory_order) +{ __a->__lock(); _Tp __temp; __cxx_atomic_assign_volatile(__temp, __a->__a_value); bool __ret = __temp == *__expected; - if(__ret) + if (__ret) + { __cxx_atomic_assign_volatile(__a->__a_value, __value); + } else + { __cxx_atomic_assign_volatile(*__expected, __a->__a_value); + } __a->__unlock(); return __ret; } template -_LIBCUDACXX_INLINE_VISIBILITY -bool __cxx_atomic_compare_exchange_weak(__cxx_atomic_lock_impl<_Tp, _Sco>* __a, - _Tp* __expected, _Tp __value, memory_order, memory_order) { +_LIBCUDACXX_INLINE_VISIBILITY bool __cxx_atomic_compare_exchange_weak( + __cxx_atomic_lock_impl<_Tp, _Sco>* __a, _Tp* __expected, _Tp __value, memory_order, memory_order) +{ __a->__lock(); bool __ret = __a->__a_value == *__expected; - if(__ret) + if (__ret) + { __a->__a_value = __value; + } else + { *__expected = __a->__a_value; + } __a->__unlock(); return __ret; } template -_LIBCUDACXX_INLINE_VISIBILITY -_Tp __cxx_atomic_fetch_add(volatile __cxx_atomic_lock_impl<_Tp, _Sco>* __a, - _Td __delta, memory_order) { +_LIBCUDACXX_INLINE_VISIBILITY _Tp +__cxx_atomic_fetch_add(volatile __cxx_atomic_lock_impl<_Tp, _Sco>* __a, _Td __delta, memory_order) +{ __a->__lock(); _Tp __old; __cxx_atomic_assign_volatile(__old, __a->__a_value); @@ -952,9 +983,9 @@ _Tp __cxx_atomic_fetch_add(volatile __cxx_atomic_lock_impl<_Tp, _Sco>* __a, return __old; } template -_LIBCUDACXX_INLINE_VISIBILITY -_Tp __cxx_atomic_fetch_add(__cxx_atomic_lock_impl<_Tp, _Sco>* __a, - _Td __delta, memory_order) { +_LIBCUDACXX_INLINE_VISIBILITY _Tp +__cxx_atomic_fetch_add(__cxx_atomic_lock_impl<_Tp, _Sco>* __a, _Td __delta, memory_order) +{ __a->__lock(); _Tp __old = __a->__a_value; __a->__a_value += __delta; @@ -963,9 +994,9 @@ _Tp __cxx_atomic_fetch_add(__cxx_atomic_lock_impl<_Tp, _Sco>* __a, } template -_LIBCUDACXX_INLINE_VISIBILITY -_Tp* __cxx_atomic_fetch_add(volatile __cxx_atomic_lock_impl<_Tp*, _Sco>* __a, - ptrdiff_t __delta, memory_order) { +_LIBCUDACXX_INLINE_VISIBILITY _Tp* +__cxx_atomic_fetch_add(volatile __cxx_atomic_lock_impl<_Tp*, _Sco>* __a, ptrdiff_t __delta, memory_order) +{ __a->__lock(); _Tp* __old; __cxx_atomic_assign_volatile(__old, __a->__a_value); @@ -974,9 +1005,9 @@ _Tp* __cxx_atomic_fetch_add(volatile __cxx_atomic_lock_impl<_Tp*, _Sco>* __a, return __old; } template -_LIBCUDACXX_INLINE_VISIBILITY -_Tp* __cxx_atomic_fetch_add(__cxx_atomic_lock_impl<_Tp*, _Sco>* __a, - ptrdiff_t __delta, memory_order) { +_LIBCUDACXX_INLINE_VISIBILITY _Tp* +__cxx_atomic_fetch_add(__cxx_atomic_lock_impl<_Tp*, _Sco>* __a, ptrdiff_t __delta, memory_order) +{ __a->__lock(); _Tp* __old = __a->__a_value; __a->__a_value += __delta; @@ -985,9 +1016,9 @@ _Tp* __cxx_atomic_fetch_add(__cxx_atomic_lock_impl<_Tp*, _Sco>* __a, } template -_LIBCUDACXX_INLINE_VISIBILITY -_Tp __cxx_atomic_fetch_sub(volatile __cxx_atomic_lock_impl<_Tp, _Sco>* __a, - _Td __delta, memory_order) { +_LIBCUDACXX_INLINE_VISIBILITY _Tp +__cxx_atomic_fetch_sub(volatile __cxx_atomic_lock_impl<_Tp, _Sco>* __a, _Td __delta, memory_order) +{ __a->__lock(); _Tp __old; __cxx_atomic_assign_volatile(__old, __a->__a_value); @@ -996,9 +1027,9 @@ _Tp __cxx_atomic_fetch_sub(volatile __cxx_atomic_lock_impl<_Tp, _Sco>* __a, return __old; } template -_LIBCUDACXX_INLINE_VISIBILITY -_Tp __cxx_atomic_fetch_sub(__cxx_atomic_lock_impl<_Tp, _Sco>* __a, - _Td __delta, memory_order) { +_LIBCUDACXX_INLINE_VISIBILITY _Tp +__cxx_atomic_fetch_sub(__cxx_atomic_lock_impl<_Tp, _Sco>* __a, _Td __delta, memory_order) +{ __a->__lock(); _Tp __old = __a->__a_value; __a->__a_value -= __delta; @@ -1007,9 +1038,9 @@ _Tp __cxx_atomic_fetch_sub(__cxx_atomic_lock_impl<_Tp, _Sco>* __a, } template -_LIBCUDACXX_INLINE_VISIBILITY -_Tp __cxx_atomic_fetch_and(volatile __cxx_atomic_lock_impl<_Tp, _Sco>* __a, - _Tp __pattern, memory_order) { +_LIBCUDACXX_INLINE_VISIBILITY _Tp +__cxx_atomic_fetch_and(volatile __cxx_atomic_lock_impl<_Tp, _Sco>* __a, _Tp __pattern, memory_order) +{ __a->__lock(); _Tp __old; __cxx_atomic_assign_volatile(__old, __a->__a_value); @@ -1018,9 +1049,9 @@ _Tp __cxx_atomic_fetch_and(volatile __cxx_atomic_lock_impl<_Tp, _Sco>* __a, return __old; } template -_LIBCUDACXX_INLINE_VISIBILITY -_Tp __cxx_atomic_fetch_and(__cxx_atomic_lock_impl<_Tp, _Sco>* __a, - _Tp __pattern, memory_order) { +_LIBCUDACXX_INLINE_VISIBILITY _Tp +__cxx_atomic_fetch_and(__cxx_atomic_lock_impl<_Tp, _Sco>* __a, _Tp __pattern, memory_order) +{ __a->__lock(); _Tp __old = __a->__a_value; __a->__a_value &= __pattern; @@ -1029,9 +1060,9 @@ _Tp __cxx_atomic_fetch_and(__cxx_atomic_lock_impl<_Tp, _Sco>* __a, } template -_LIBCUDACXX_INLINE_VISIBILITY -_Tp __cxx_atomic_fetch_or(volatile __cxx_atomic_lock_impl<_Tp, _Sco>* __a, - _Tp __pattern, memory_order) { +_LIBCUDACXX_INLINE_VISIBILITY _Tp +__cxx_atomic_fetch_or(volatile __cxx_atomic_lock_impl<_Tp, _Sco>* __a, _Tp __pattern, memory_order) +{ __a->__lock(); _Tp __old; __cxx_atomic_assign_volatile(__old, __a->__a_value); @@ -1040,9 +1071,9 @@ _Tp __cxx_atomic_fetch_or(volatile __cxx_atomic_lock_impl<_Tp, _Sco>* __a, return __old; } template -_LIBCUDACXX_INLINE_VISIBILITY -_Tp __cxx_atomic_fetch_or(__cxx_atomic_lock_impl<_Tp, _Sco>* __a, - _Tp __pattern, memory_order) { +_LIBCUDACXX_INLINE_VISIBILITY _Tp +__cxx_atomic_fetch_or(__cxx_atomic_lock_impl<_Tp, _Sco>* __a, _Tp __pattern, memory_order) +{ __a->__lock(); _Tp __old = __a->__a_value; __a->__a_value |= __pattern; @@ -1051,9 +1082,9 @@ _Tp __cxx_atomic_fetch_or(__cxx_atomic_lock_impl<_Tp, _Sco>* __a, } template -_LIBCUDACXX_INLINE_VISIBILITY -_Tp __cxx_atomic_fetch_xor(volatile __cxx_atomic_lock_impl<_Tp, _Sco>* __a, - _Tp __pattern, memory_order) { +_LIBCUDACXX_INLINE_VISIBILITY _Tp +__cxx_atomic_fetch_xor(volatile __cxx_atomic_lock_impl<_Tp, _Sco>* __a, _Tp __pattern, memory_order) +{ __a->__lock(); _Tp __old; __cxx_atomic_assign_volatile(__old, __a->__a_value); @@ -1062,9 +1093,9 @@ _Tp __cxx_atomic_fetch_xor(volatile __cxx_atomic_lock_impl<_Tp, _Sco>* __a, return __old; } template -_LIBCUDACXX_INLINE_VISIBILITY -_Tp __cxx_atomic_fetch_xor(__cxx_atomic_lock_impl<_Tp, _Sco>* __a, - _Tp __pattern, memory_order) { +_LIBCUDACXX_INLINE_VISIBILITY _Tp +__cxx_atomic_fetch_xor(__cxx_atomic_lock_impl<_Tp, _Sco>* __a, _Tp __pattern, memory_order) +{ __a->__lock(); _Tp __old = __a->__a_value; __a->__a_value ^= __pattern; @@ -1072,44 +1103,56 @@ _Tp __cxx_atomic_fetch_xor(__cxx_atomic_lock_impl<_Tp, _Sco>* __a, return __old; } -#if defined(_LIBCUDACXX_ATOMIC_ALWAYS_LOCK_FREE) +# if defined(_LIBCUDACXX_ATOMIC_ALWAYS_LOCK_FREE) -template struct __cxx_is_always_lock_free { - enum { __value = _LIBCUDACXX_ATOMIC_ALWAYS_LOCK_FREE(sizeof(_Tp), 0) }; }; +template +struct __cxx_is_always_lock_free +{ + enum + { + __value = _LIBCUDACXX_ATOMIC_ALWAYS_LOCK_FREE(sizeof(_Tp), 0) + }; +}; -#else +# else -template struct __cxx_is_always_lock_free { - enum { __value = sizeof(_Tp) <= 8 }; }; +template +struct __cxx_is_always_lock_free +{ + enum + { + __value = sizeof(_Tp) <= 8 + }; +}; -#endif // defined(_LIBCUDACXX_ATOMIC_ALWAYS_LOCK_FREE) +# endif // defined(_LIBCUDACXX_ATOMIC_ALWAYS_LOCK_FREE) template -struct __cxx_atomic_impl_conditional { - using type = __conditional_t<__cxx_is_always_lock_free<_Tp>::__value, - __cxx_atomic_base_impl<_Tp, _Sco>, - __cxx_atomic_lock_impl<_Tp, _Sco> >; +struct __cxx_atomic_impl_conditional +{ + using type = __conditional_t<__cxx_is_always_lock_free<_Tp>::__value, + __cxx_atomic_base_impl<_Tp, _Sco>, + __cxx_atomic_lock_impl<_Tp, _Sco>>; }; -template ::type > +template ::type> #else -template > +template > #endif //_LIBCUDACXX_ATOMIC_ONLY_USE_BUILTINS -struct __cxx_atomic_impl : public _Base { +struct __cxx_atomic_impl : public _Base +{ __cxx_atomic_impl() noexcept = default; _LIBCUDACXX_INLINE_VISIBILITY constexpr explicit __cxx_atomic_impl(_Tp value) noexcept - : _Base(value) {} + : _Base(value) + {} }; - -template -_LIBCUDACXX_INLINE_VISIBILITY -__cxx_atomic_impl<_Tp, _Sco>* __cxx_atomic_rebind(_Tp* __inst) { - static_assert(sizeof(__cxx_atomic_impl<_Tp, _Sco>) == sizeof(_Tp),""); - static_assert(alignof(__cxx_atomic_impl<_Tp, _Sco>) == alignof(_Tp),""); - return (__cxx_atomic_impl<_Tp, _Sco>*)__inst; +template +_LIBCUDACXX_INLINE_VISIBILITY __cxx_atomic_impl<_Tp, _Sco>* __cxx_atomic_rebind(_Tp* __inst) +{ + static_assert(sizeof(__cxx_atomic_impl<_Tp, _Sco>) == sizeof(_Tp), ""); + static_assert(alignof(__cxx_atomic_impl<_Tp, _Sco>) == alignof(_Tp), ""); + return (__cxx_atomic_impl<_Tp, _Sco>*) __inst; } template @@ -1118,25 +1161,29 @@ using __cxx_atomic_ref_impl = __cxx_atomic_ref_base_impl<_Tp, _Sco>; #ifdef _LIBCUDACXX_HAS_NO_THREAD_CONTENTION_TABLE template , int _Sco = _Ty::__sco> -struct __cxx_atomic_poll_tester { - _Ty const volatile* __a; - _Tp __val; - memory_order __order; +struct __cxx_atomic_poll_tester +{ + _Ty const volatile* __a; + _Tp __val; + memory_order __order; - _LIBCUDACXX_INLINE_VISIBILITY __cxx_atomic_poll_tester(_Ty const volatile* __a_, _Tp __val_, memory_order __order_) + _LIBCUDACXX_INLINE_VISIBILITY __cxx_atomic_poll_tester(_Ty const volatile* __a_, _Tp __val_, memory_order __order_) : __a(__a_) , __val(__val_) , __order(__order_) - {} + {} - _LIBCUDACXX_INLINE_VISIBILITY bool operator()() const { - return !(__cxx_atomic_load(__a, __order) == __val); - } + _LIBCUDACXX_INLINE_VISIBILITY bool operator()() const + { + return !(__cxx_atomic_load(__a, __order) == __val); + } }; template , int _Sco = _Ty::__sco> -_LIBCUDACXX_INLINE_VISIBILITY void __cxx_atomic_try_wait_slow_fallback(_Ty const volatile* __a, _Tp __val, memory_order __order) { - __libcpp_thread_poll_with_backoff(__cxx_atomic_poll_tester<_Ty>(__a, __val, __order)); +_LIBCUDACXX_INLINE_VISIBILITY void +__cxx_atomic_try_wait_slow_fallback(_Ty const volatile* __a, _Tp __val, memory_order __order) +{ + __libcpp_thread_poll_with_backoff(__cxx_atomic_poll_tester<_Ty>(__a, __val, __order)); } #endif @@ -1144,632 +1191,888 @@ _LIBCUDACXX_INLINE_VISIBILITY void __cxx_atomic_try_wait_slow_fallback(_Ty const #ifdef _LIBCUDACXX_HAS_PLATFORM_WAIT template ::__value, int> = 1> -_LIBCUDACXX_INLINE_VISIBILITY void __cxx_atomic_notify_all(__cxx_atomic_impl<_Tp, _Sco> const volatile* __a) { -#ifndef _LIBCUDACXX_HAS_NO_THREAD_CONTENTION_TABLE - auto * const __c = __libcpp_contention_state(__a); - __cxx_atomic_fetch_add(__cxx_atomic_rebind<_Sco>(&__c->__version), (__libcpp_platform_wait_t)1, memory_order_relaxed); - __cxx_atomic_thread_fence(memory_order_seq_cst); - if (0 != __cxx_atomic_exchange(__cxx_atomic_rebind<_Sco>(&__c->__waiters), (ptrdiff_t)0, memory_order_relaxed)) - __libcpp_platform_wake(&__c->__version, true); -#endif +_LIBCUDACXX_INLINE_VISIBILITY void __cxx_atomic_notify_all(__cxx_atomic_impl<_Tp, _Sco> const volatile* __a) +{ +# ifndef _LIBCUDACXX_HAS_NO_THREAD_CONTENTION_TABLE + auto* const __c = __libcpp_contention_state(__a); + __cxx_atomic_fetch_add(__cxx_atomic_rebind<_Sco>(&__c->__version), (__libcpp_platform_wait_t) 1, memory_order_relaxed); + __cxx_atomic_thread_fence(memory_order_seq_cst); + if (0 != __cxx_atomic_exchange(__cxx_atomic_rebind<_Sco>(&__c->__waiters), (ptrdiff_t) 0, memory_order_relaxed)) + { + __libcpp_platform_wake(&__c->__version, true); + } +# endif } template ::__value, int> = 1> -_LIBCUDACXX_INLINE_VISIBILITY void __cxx_atomic_notify_one(__cxx_atomic_impl<_Tp, _Sco> const volatile* __a) { - __cxx_atomic_notify_all(__a); -} -template , int _Sco = _Ty::__sco, __enable_if_t::__value, int> = 1> -_LIBCUDACXX_INLINE_VISIBILITY void __cxx_atomic_try_wait_slow(_Ty const volatile* __a, _Tp const __val, memory_order __order) { -#ifndef _LIBCUDACXX_HAS_NO_THREAD_CONTENTION_TABLE - auto * const __c = __libcpp_contention_state(__a); - __cxx_atomic_store(__cxx_atomic_rebind<_Sco>(&__c->__waiters), (ptrdiff_t)1, memory_order_relaxed); - __cxx_atomic_thread_fence(memory_order_seq_cst); - auto const __version = __cxx_atomic_load(__cxx_atomic_rebind<_Sco>(&__c->__version), memory_order_relaxed); - if (!__cxx_nonatomic_compare_equal(__cxx_atomic_load(__a, __order), __val)) - return; - if(sizeof(__libcpp_platform_wait_t) < 8) { - constexpr timespec __timeout = { 2, 0 }; // Hedge on rare 'int version' aliasing. - __libcpp_platform_wait(&__c->__version, __version, &__timeout); - } - else - __libcpp_platform_wait(&__c->__version, __version, nullptr); -#else - __cxx_atomic_try_wait_slow_fallback(__a, __val, __order); -#endif // _LIBCUDACXX_HAS_NO_THREAD_CONTENTION_TABLE +_LIBCUDACXX_INLINE_VISIBILITY void __cxx_atomic_notify_one(__cxx_atomic_impl<_Tp, _Sco> const volatile* __a) +{ + __cxx_atomic_notify_all(__a); +} +template , + int _Sco = _Ty::__sco, + __enable_if_t::__value, int> = 1> +_LIBCUDACXX_INLINE_VISIBILITY void +__cxx_atomic_try_wait_slow(_Ty const volatile* __a, _Tp const __val, memory_order __order) +{ +# ifndef _LIBCUDACXX_HAS_NO_THREAD_CONTENTION_TABLE + auto* const __c = __libcpp_contention_state(__a); + __cxx_atomic_store(__cxx_atomic_rebind<_Sco>(&__c->__waiters), (ptrdiff_t) 1, memory_order_relaxed); + __cxx_atomic_thread_fence(memory_order_seq_cst); + auto const __version = __cxx_atomic_load(__cxx_atomic_rebind<_Sco>(&__c->__version), memory_order_relaxed); + if (!__cxx_nonatomic_compare_equal(__cxx_atomic_load(__a, __order), __val)) + { + return; + } + if (sizeof(__libcpp_platform_wait_t) < 8) + { + constexpr timespec __timeout = {2, 0}; // Hedge on rare 'int version' aliasing. + __libcpp_platform_wait(&__c->__version, __version, &__timeout); + } + else + { + __libcpp_platform_wait(&__c->__version, __version, nullptr); + } +# else + __cxx_atomic_try_wait_slow_fallback(__a, __val, __order); +# endif // _LIBCUDACXX_HAS_NO_THREAD_CONTENTION_TABLE } template ::__value, int> = 1> -_LIBCUDACXX_INLINE_VISIBILITY void __cxx_atomic_try_wait_slow(__cxx_atomic_impl<_Tp, _Sco> const volatile* __a, _Tp __val, memory_order) { -#ifndef _LIBCUDACXX_HAS_NO_THREAD_CONTENTION_TABLE - auto * const __c = __libcpp_contention_state(__a); - __cxx_atomic_fetch_add(__cxx_atomic_rebind<_Sco>(&__c->__waiters), (ptrdiff_t)1, memory_order_relaxed); - __cxx_atomic_thread_fence(memory_order_seq_cst); -#endif - __libcpp_platform_wait((_Tp*)__a, __val, nullptr); -#ifndef _LIBCUDACXX_HAS_NO_THREAD_CONTENTION_TABLE - __cxx_atomic_fetch_sub(__cxx_atomic_rebind<_Sco>(&__c->__waiters), (ptrdiff_t)1, memory_order_relaxed); -#endif +_LIBCUDACXX_INLINE_VISIBILITY void +__cxx_atomic_try_wait_slow(__cxx_atomic_impl<_Tp, _Sco> const volatile* __a, _Tp __val, memory_order) +{ +# ifndef _LIBCUDACXX_HAS_NO_THREAD_CONTENTION_TABLE + auto* const __c = __libcpp_contention_state(__a); + __cxx_atomic_fetch_add(__cxx_atomic_rebind<_Sco>(&__c->__waiters), (ptrdiff_t) 1, memory_order_relaxed); + __cxx_atomic_thread_fence(memory_order_seq_cst); +# endif + __libcpp_platform_wait((_Tp*) __a, __val, nullptr); +# ifndef _LIBCUDACXX_HAS_NO_THREAD_CONTENTION_TABLE + __cxx_atomic_fetch_sub(__cxx_atomic_rebind<_Sco>(&__c->__waiters), (ptrdiff_t) 1, memory_order_relaxed); +# endif } template ::__value, int> = 1> -_LIBCUDACXX_INLINE_VISIBILITY void __cxx_atomic_notify_all(__cxx_atomic_impl<_Tp, _Sco> const volatile* __a) { -#ifndef _LIBCUDACXX_HAS_NO_THREAD_CONTENTION_TABLE - auto * const __c = __libcpp_contention_state(__a); - __cxx_atomic_thread_fence(memory_order_seq_cst); - if (0 != __cxx_atomic_load(__cxx_atomic_rebind<_Sco>(&__c->__waiters), memory_order_relaxed)) -#endif - __libcpp_platform_wake((_Tp*)__a, true); +_LIBCUDACXX_INLINE_VISIBILITY void __cxx_atomic_notify_all(__cxx_atomic_impl<_Tp, _Sco> const volatile* __a) +{ +# ifndef _LIBCUDACXX_HAS_NO_THREAD_CONTENTION_TABLE + auto* const __c = __libcpp_contention_state(__a); + __cxx_atomic_thread_fence(memory_order_seq_cst); + if (0 != __cxx_atomic_load(__cxx_atomic_rebind<_Sco>(&__c->__waiters), memory_order_relaxed)) +# endif + __libcpp_platform_wake((_Tp*) __a, true); } template ::__value, int> = 1> -_LIBCUDACXX_INLINE_VISIBILITY void __cxx_atomic_notify_one(__cxx_atomic_impl<_Tp, _Sco> const volatile* __a) { -#ifndef _LIBCUDACXX_HAS_NO_THREAD_CONTENTION_TABLE - auto * const __c = __libcpp_contention_state(__a); - __cxx_atomic_thread_fence(memory_order_seq_cst); - if (0 != __cxx_atomic_load(__cxx_atomic_rebind<_Sco>(&__c->__waiters), memory_order_relaxed)) -#endif - __libcpp_platform_wake((_Tp*)__a, false); +_LIBCUDACXX_INLINE_VISIBILITY void __cxx_atomic_notify_one(__cxx_atomic_impl<_Tp, _Sco> const volatile* __a) +{ +# ifndef _LIBCUDACXX_HAS_NO_THREAD_CONTENTION_TABLE + auto* const __c = __libcpp_contention_state(__a); + __cxx_atomic_thread_fence(memory_order_seq_cst); + if (0 != __cxx_atomic_load(__cxx_atomic_rebind<_Sco>(&__c->__waiters), memory_order_relaxed)) +# endif + __libcpp_platform_wake((_Tp*) __a, false); } #elif !defined(_LIBCUDACXX_HAS_NO_THREAD_CONTENTION_TABLE) template -_LIBCUDACXX_INLINE_VISIBILITY void __cxx_atomic_notify_all(__cxx_atomic_impl<_Tp, _Sco> const volatile* __a) { - auto * const __c = __libcpp_contention_state(__a); - __cxx_atomic_thread_fence(memory_order_seq_cst); - if(0 == __cxx_atomic_load(__cxx_atomic_rebind<_Sco>(&__c->__credit), memory_order_relaxed)) - return; - if(0 != __cxx_atomic_exchange(__cxx_atomic_rebind<_Sco>(&__c->__credit), (ptrdiff_t)0, memory_order_relaxed)) { - __libcpp_mutex_lock(&__c->__mutex); - __libcpp_mutex_unlock(&__c->__mutex); - __libcpp_condvar_broadcast(&__c->__condvar); - } +_LIBCUDACXX_INLINE_VISIBILITY void __cxx_atomic_notify_all(__cxx_atomic_impl<_Tp, _Sco> const volatile* __a) +{ + auto* const __c = __libcpp_contention_state(__a); + __cxx_atomic_thread_fence(memory_order_seq_cst); + if (0 == __cxx_atomic_load(__cxx_atomic_rebind<_Sco>(&__c->__credit), memory_order_relaxed)) + { + return; + } + if (0 != __cxx_atomic_exchange(__cxx_atomic_rebind<_Sco>(&__c->__credit), (ptrdiff_t) 0, memory_order_relaxed)) + { + __libcpp_mutex_lock(&__c->__mutex); + __libcpp_mutex_unlock(&__c->__mutex); + __libcpp_condvar_broadcast(&__c->__condvar); + } } template -_LIBCUDACXX_INLINE_VISIBILITY void __cxx_atomic_notify_one(__cxx_atomic_impl<_Tp, _Sco> const volatile* __a) { - __cxx_atomic_notify_all(__a); +_LIBCUDACXX_INLINE_VISIBILITY void __cxx_atomic_notify_one(__cxx_atomic_impl<_Tp, _Sco> const volatile* __a) +{ + __cxx_atomic_notify_all(__a); } template -_LIBCUDACXX_INLINE_VISIBILITY void __cxx_atomic_try_wait_slow(__cxx_atomic_impl<_Tp, _Sco> const volatile* __a, _Tp const __val, memory_order __order) { - auto * const __c = __libcpp_contention_state(__a); - __libcpp_mutex_lock(&__c->__mutex); - __cxx_atomic_store(__cxx_atomic_rebind<_Sco>(&__c->__credit), (ptrdiff_t)1, memory_order_relaxed); - __cxx_atomic_thread_fence(memory_order_seq_cst); - if (__cxx_nonatomic_compare_equal(__cxx_atomic_load(__a, __order), __val)) - __libcpp_condvar_wait(&__c->__condvar, &__c->__mutex); - __libcpp_mutex_unlock(&__c->__mutex); +_LIBCUDACXX_INLINE_VISIBILITY void +__cxx_atomic_try_wait_slow(__cxx_atomic_impl<_Tp, _Sco> const volatile* __a, _Tp const __val, memory_order __order) +{ + auto* const __c = __libcpp_contention_state(__a); + __libcpp_mutex_lock(&__c->__mutex); + __cxx_atomic_store(__cxx_atomic_rebind<_Sco>(&__c->__credit), (ptrdiff_t) 1, memory_order_relaxed); + __cxx_atomic_thread_fence(memory_order_seq_cst); + if (__cxx_nonatomic_compare_equal(__cxx_atomic_load(__a, __order), __val)) + { + __libcpp_condvar_wait(&__c->__condvar, &__c->__mutex); + } + __libcpp_mutex_unlock(&__c->__mutex); } #else -template +template struct __atomic_wait_and_notify_supported -#if defined(__CUDA_MINIMUM_ARCH__) && __CUDA_MINIMUM_ARCH__ < 700 +# if defined(__CUDA_MINIMUM_ARCH__) && __CUDA_MINIMUM_ARCH__ < 700 : false_type -#else +# else : true_type -#endif +# endif {}; template > -_LIBCUDACXX_INLINE_VISIBILITY void __cxx_atomic_try_wait_slow(_Ty const volatile* __a, _Tp __val, memory_order __order) { - static_assert(__atomic_wait_and_notify_supported<_Tp>::value, "atomic wait operations are unsupported on Pascal"); - __cxx_atomic_try_wait_slow_fallback(__a, __val, __order); +_LIBCUDACXX_INLINE_VISIBILITY void __cxx_atomic_try_wait_slow(_Ty const volatile* __a, _Tp __val, memory_order __order) +{ + static_assert(__atomic_wait_and_notify_supported<_Tp>::value, "atomic wait operations are unsupported on Pascal"); + __cxx_atomic_try_wait_slow_fallback(__a, __val, __order); } template > -_LIBCUDACXX_INLINE_VISIBILITY void __cxx_atomic_notify_one(_Ty const volatile*) { - static_assert(__atomic_wait_and_notify_supported<_Tp>::value, "atomic notify-one operations are unsupported on Pascal"); +_LIBCUDACXX_INLINE_VISIBILITY void __cxx_atomic_notify_one(_Ty const volatile*) +{ + static_assert(__atomic_wait_and_notify_supported<_Tp>::value, + "atomic notify-one operations are unsupported on Pascal"); } template > -_LIBCUDACXX_INLINE_VISIBILITY void __cxx_atomic_notify_all(_Ty const volatile*) { - static_assert(__atomic_wait_and_notify_supported<_Tp>::value, "atomic notify-all operations are unsupported on Pascal"); +_LIBCUDACXX_INLINE_VISIBILITY void __cxx_atomic_notify_all(_Ty const volatile*) +{ + static_assert(__atomic_wait_and_notify_supported<_Tp>::value, + "atomic notify-all operations are unsupported on Pascal"); } #endif // _LIBCUDACXX_HAS_PLATFORM_WAIT || !defined(_LIBCUDACXX_HAS_NO_THREAD_CONTENTION_TABLE) template > -_LIBCUDACXX_INLINE_VISIBILITY void __cxx_atomic_wait(_Ty const volatile* __a, _Tp const __val, memory_order __order) { - for(int __i = 0; __i < _LIBCUDACXX_POLLING_COUNT; ++__i) { - if(!__cxx_nonatomic_compare_equal(__cxx_atomic_load(__a, __order), __val)) - return; - if(__i < 12) - __libcpp_thread_yield_processor(); - else - __libcpp_thread_yield(); +_LIBCUDACXX_INLINE_VISIBILITY void __cxx_atomic_wait(_Ty const volatile* __a, _Tp const __val, memory_order __order) +{ + for (int __i = 0; __i < _LIBCUDACXX_POLLING_COUNT; ++__i) + { + if (!__cxx_nonatomic_compare_equal(__cxx_atomic_load(__a, __order), __val)) + { + return; + } + if (__i < 12) + { + __libcpp_thread_yield_processor(); } - while(__cxx_nonatomic_compare_equal(__cxx_atomic_load(__a, __order), __val)) - __cxx_atomic_try_wait_slow(__a, __val, __order); + else + { + __libcpp_thread_yield(); + } + } + while (__cxx_nonatomic_compare_equal(__cxx_atomic_load(__a, __order), __val)) + { + __cxx_atomic_try_wait_slow(__a, __val, __order); + } } template -struct __atomic_base_storage { - mutable _Storage __a_; +struct __atomic_base_storage +{ + mutable _Storage __a_; - __atomic_base_storage() = default; - __atomic_base_storage(const __atomic_base_storage&) = default; - __atomic_base_storage(__atomic_base_storage&&) = default; + __atomic_base_storage() = default; + __atomic_base_storage(const __atomic_base_storage&) = default; + __atomic_base_storage(__atomic_base_storage&&) = default; - __atomic_base_storage& operator=(const __atomic_base_storage&) = default; - __atomic_base_storage& operator=(__atomic_base_storage&&) = default; + __atomic_base_storage& operator=(const __atomic_base_storage&) = default; + __atomic_base_storage& operator=(__atomic_base_storage&&) = default; - _LIBCUDACXX_INLINE_VISIBILITY constexpr - __atomic_base_storage(_Storage&& __a) noexcept : __a_(_CUDA_VSTD::forward<_Storage>(__a)) {} + _LIBCUDACXX_INLINE_VISIBILITY constexpr __atomic_base_storage(_Storage&& __a) noexcept + : __a_(_CUDA_VSTD::forward<_Storage>(__a)) + {} }; template -struct __atomic_base_core : public __atomic_base_storage<_Tp, _Storage>{ - __atomic_base_core() = default; - __atomic_base_core(const __atomic_base_core&) = delete; - __atomic_base_core(__atomic_base_core&&) = delete; +struct __atomic_base_core : public __atomic_base_storage<_Tp, _Storage> +{ + __atomic_base_core() = default; + __atomic_base_core(const __atomic_base_core&) = delete; + __atomic_base_core(__atomic_base_core&&) = delete; - __atomic_base_core& operator=(const __atomic_base_core&) = delete; - __atomic_base_core& operator=(__atomic_base_core&&) = delete; + __atomic_base_core& operator=(const __atomic_base_core&) = delete; + __atomic_base_core& operator=(__atomic_base_core&&) = delete; - _LIBCUDACXX_INLINE_VISIBILITY constexpr - __atomic_base_core(_Storage&& __a) noexcept : __atomic_base_storage<_Tp, _Storage>(_CUDA_VSTD::forward<_Storage>(__a)) {} + _LIBCUDACXX_INLINE_VISIBILITY constexpr __atomic_base_core(_Storage&& __a) noexcept + : __atomic_base_storage<_Tp, _Storage>(_CUDA_VSTD::forward<_Storage>(__a)) + {} #if defined(_LIBCUDACXX_ATOMIC_ALWAYS_LOCK_FREE) - static constexpr bool is_always_lock_free = _LIBCUDACXX_ATOMIC_ALWAYS_LOCK_FREE(sizeof(_Tp), 0); + static constexpr bool is_always_lock_free = _LIBCUDACXX_ATOMIC_ALWAYS_LOCK_FREE(sizeof(_Tp), 0); #endif // defined(_LIBCUDACXX_ATOMIC_ALWAYS_LOCK_FREE) - _LIBCUDACXX_INLINE_VISIBILITY - bool is_lock_free() const volatile noexcept - {return _LIBCUDACXX_ATOMIC_IS_LOCK_FREE(sizeof(_Tp));} - _LIBCUDACXX_INLINE_VISIBILITY - bool is_lock_free() const noexcept - {return static_cast<__atomic_base_core const volatile*>(this)->is_lock_free();} - _LIBCUDACXX_INLINE_VISIBILITY - - void store(_Tp __d, memory_order __m = memory_order_seq_cst) volatile noexcept - _LIBCUDACXX_CHECK_STORE_MEMORY_ORDER(__m) - {__cxx_atomic_store(&this->__a_, __d, __m);} - _LIBCUDACXX_INLINE_VISIBILITY - void store(_Tp __d, memory_order __m = memory_order_seq_cst) noexcept - _LIBCUDACXX_CHECK_STORE_MEMORY_ORDER(__m) - {__cxx_atomic_store(&this->__a_, __d, __m);} - - _LIBCUDACXX_INLINE_VISIBILITY - _Tp load(memory_order __m = memory_order_seq_cst) const volatile noexcept - _LIBCUDACXX_CHECK_LOAD_MEMORY_ORDER(__m) - {return __cxx_atomic_load(&this->__a_, __m);} - _LIBCUDACXX_INLINE_VISIBILITY - _Tp load(memory_order __m = memory_order_seq_cst) const noexcept - _LIBCUDACXX_CHECK_LOAD_MEMORY_ORDER(__m) - {return __cxx_atomic_load(&this->__a_, __m);} - _LIBCUDACXX_INLINE_VISIBILITY - operator _Tp() const volatile noexcept {return load();} - _LIBCUDACXX_INLINE_VISIBILITY - operator _Tp() const noexcept {return load();} - _LIBCUDACXX_INLINE_VISIBILITY - _Tp exchange(_Tp __d, memory_order __m = memory_order_seq_cst) volatile noexcept - {return __cxx_atomic_exchange(&this->__a_, __d, __m);} - _LIBCUDACXX_INLINE_VISIBILITY - _Tp exchange(_Tp __d, memory_order __m = memory_order_seq_cst) noexcept - {return __cxx_atomic_exchange(&this->__a_, __d, __m);} - _LIBCUDACXX_INLINE_VISIBILITY - bool compare_exchange_weak(_Tp& __e, _Tp __d, - memory_order __s, memory_order __f) volatile noexcept - _LIBCUDACXX_CHECK_EXCHANGE_MEMORY_ORDER(__s, __f) - {return __cxx_atomic_compare_exchange_weak(&this->__a_, &__e, __d, __s, __f);} - _LIBCUDACXX_INLINE_VISIBILITY - bool compare_exchange_weak(_Tp& __e, _Tp __d, - memory_order __s, memory_order __f) noexcept - _LIBCUDACXX_CHECK_EXCHANGE_MEMORY_ORDER(__s, __f) - {return __cxx_atomic_compare_exchange_weak(&this->__a_, &__e, __d, __s, __f);} - _LIBCUDACXX_INLINE_VISIBILITY - bool compare_exchange_strong(_Tp& __e, _Tp __d, - memory_order __s, memory_order __f) volatile noexcept - _LIBCUDACXX_CHECK_EXCHANGE_MEMORY_ORDER(__s, __f) - {return __cxx_atomic_compare_exchange_strong(&this->__a_, &__e, __d, __s, __f);} - _LIBCUDACXX_INLINE_VISIBILITY - bool compare_exchange_strong(_Tp& __e, _Tp __d, - memory_order __s, memory_order __f) noexcept - _LIBCUDACXX_CHECK_EXCHANGE_MEMORY_ORDER(__s, __f) - {return __cxx_atomic_compare_exchange_strong(&this->__a_, &__e, __d, __s, __f);} - _LIBCUDACXX_INLINE_VISIBILITY - bool compare_exchange_weak(_Tp& __e, _Tp __d, - memory_order __m = memory_order_seq_cst) volatile noexcept { - if (memory_order_acq_rel == __m) - return __cxx_atomic_compare_exchange_weak(&this->__a_, &__e, __d, __m, memory_order_acquire); - else if (memory_order_release == __m) - return __cxx_atomic_compare_exchange_weak(&this->__a_, &__e, __d, __m, memory_order_relaxed); - else - return __cxx_atomic_compare_exchange_weak(&this->__a_, &__e, __d, __m, __m); + _LIBCUDACXX_INLINE_VISIBILITY bool is_lock_free() const volatile noexcept + { + return _LIBCUDACXX_ATOMIC_IS_LOCK_FREE(sizeof(_Tp)); + } + _LIBCUDACXX_INLINE_VISIBILITY bool is_lock_free() const noexcept + { + return static_cast<__atomic_base_core const volatile*>(this)->is_lock_free(); + } + _LIBCUDACXX_INLINE_VISIBILITY + + void + store(_Tp __d, memory_order __m = memory_order_seq_cst) volatile noexcept _LIBCUDACXX_CHECK_STORE_MEMORY_ORDER(__m) + { + __cxx_atomic_store(&this->__a_, __d, __m); + } + _LIBCUDACXX_INLINE_VISIBILITY void store(_Tp __d, memory_order __m = memory_order_seq_cst) noexcept + _LIBCUDACXX_CHECK_STORE_MEMORY_ORDER(__m) + { + __cxx_atomic_store(&this->__a_, __d, __m); + } + + _LIBCUDACXX_INLINE_VISIBILITY _Tp load(memory_order __m = memory_order_seq_cst) const volatile noexcept + _LIBCUDACXX_CHECK_LOAD_MEMORY_ORDER(__m) + { + return __cxx_atomic_load(&this->__a_, __m); + } + _LIBCUDACXX_INLINE_VISIBILITY _Tp load(memory_order __m = memory_order_seq_cst) const noexcept + _LIBCUDACXX_CHECK_LOAD_MEMORY_ORDER(__m) + { + return __cxx_atomic_load(&this->__a_, __m); + } + _LIBCUDACXX_INLINE_VISIBILITY operator _Tp() const volatile noexcept + { + return load(); + } + _LIBCUDACXX_INLINE_VISIBILITY operator _Tp() const noexcept + { + return load(); + } + _LIBCUDACXX_INLINE_VISIBILITY _Tp exchange(_Tp __d, memory_order __m = memory_order_seq_cst) volatile noexcept + { + return __cxx_atomic_exchange(&this->__a_, __d, __m); + } + _LIBCUDACXX_INLINE_VISIBILITY _Tp exchange(_Tp __d, memory_order __m = memory_order_seq_cst) noexcept + { + return __cxx_atomic_exchange(&this->__a_, __d, __m); + } + _LIBCUDACXX_INLINE_VISIBILITY bool + compare_exchange_weak(_Tp& __e, _Tp __d, memory_order __s, memory_order __f) volatile noexcept + _LIBCUDACXX_CHECK_EXCHANGE_MEMORY_ORDER(__s, __f) + { + return __cxx_atomic_compare_exchange_weak(&this->__a_, &__e, __d, __s, __f); + } + _LIBCUDACXX_INLINE_VISIBILITY bool + compare_exchange_weak(_Tp& __e, _Tp __d, memory_order __s, memory_order __f) noexcept + _LIBCUDACXX_CHECK_EXCHANGE_MEMORY_ORDER(__s, __f) + { + return __cxx_atomic_compare_exchange_weak(&this->__a_, &__e, __d, __s, __f); + } + _LIBCUDACXX_INLINE_VISIBILITY bool + compare_exchange_strong(_Tp& __e, _Tp __d, memory_order __s, memory_order __f) volatile noexcept + _LIBCUDACXX_CHECK_EXCHANGE_MEMORY_ORDER(__s, __f) + { + return __cxx_atomic_compare_exchange_strong(&this->__a_, &__e, __d, __s, __f); + } + _LIBCUDACXX_INLINE_VISIBILITY bool + compare_exchange_strong(_Tp& __e, _Tp __d, memory_order __s, memory_order __f) noexcept + _LIBCUDACXX_CHECK_EXCHANGE_MEMORY_ORDER(__s, __f) + { + return __cxx_atomic_compare_exchange_strong(&this->__a_, &__e, __d, __s, __f); + } + _LIBCUDACXX_INLINE_VISIBILITY bool + compare_exchange_weak(_Tp& __e, _Tp __d, memory_order __m = memory_order_seq_cst) volatile noexcept + { + if (memory_order_acq_rel == __m) + { + return __cxx_atomic_compare_exchange_weak(&this->__a_, &__e, __d, __m, memory_order_acquire); + } + else if (memory_order_release == __m) + { + return __cxx_atomic_compare_exchange_weak(&this->__a_, &__e, __d, __m, memory_order_relaxed); + } + else + { + return __cxx_atomic_compare_exchange_weak(&this->__a_, &__e, __d, __m, __m); + } + } + _LIBCUDACXX_INLINE_VISIBILITY bool + compare_exchange_weak(_Tp& __e, _Tp __d, memory_order __m = memory_order_seq_cst) noexcept + { + if (memory_order_acq_rel == __m) + { + return __cxx_atomic_compare_exchange_weak(&this->__a_, &__e, __d, __m, memory_order_acquire); } - _LIBCUDACXX_INLINE_VISIBILITY - bool compare_exchange_weak(_Tp& __e, _Tp __d, - memory_order __m = memory_order_seq_cst) noexcept { - if(memory_order_acq_rel == __m) - return __cxx_atomic_compare_exchange_weak(&this->__a_, &__e, __d, __m, memory_order_acquire); - else if(memory_order_release == __m) - return __cxx_atomic_compare_exchange_weak(&this->__a_, &__e, __d, __m, memory_order_relaxed); - else - return __cxx_atomic_compare_exchange_weak(&this->__a_, &__e, __d, __m, __m); + else if (memory_order_release == __m) + { + return __cxx_atomic_compare_exchange_weak(&this->__a_, &__e, __d, __m, memory_order_relaxed); } - _LIBCUDACXX_INLINE_VISIBILITY - bool compare_exchange_strong(_Tp& __e, _Tp __d, - memory_order __m = memory_order_seq_cst) volatile noexcept { - if (memory_order_acq_rel == __m) - return __cxx_atomic_compare_exchange_strong(&this->__a_, &__e, __d, __m, memory_order_acquire); - else if (memory_order_release == __m) - return __cxx_atomic_compare_exchange_strong(&this->__a_, &__e, __d, __m, memory_order_relaxed); - else - return __cxx_atomic_compare_exchange_strong(&this->__a_, &__e, __d, __m, __m); + else + { + return __cxx_atomic_compare_exchange_weak(&this->__a_, &__e, __d, __m, __m); + } + } + _LIBCUDACXX_INLINE_VISIBILITY bool + compare_exchange_strong(_Tp& __e, _Tp __d, memory_order __m = memory_order_seq_cst) volatile noexcept + { + if (memory_order_acq_rel == __m) + { + return __cxx_atomic_compare_exchange_strong(&this->__a_, &__e, __d, __m, memory_order_acquire); + } + else if (memory_order_release == __m) + { + return __cxx_atomic_compare_exchange_strong(&this->__a_, &__e, __d, __m, memory_order_relaxed); + } + else + { + return __cxx_atomic_compare_exchange_strong(&this->__a_, &__e, __d, __m, __m); + } + } + _LIBCUDACXX_INLINE_VISIBILITY bool + compare_exchange_strong(_Tp& __e, _Tp __d, memory_order __m = memory_order_seq_cst) noexcept + { + if (memory_order_acq_rel == __m) + { + return __cxx_atomic_compare_exchange_strong(&this->__a_, &__e, __d, __m, memory_order_acquire); } - _LIBCUDACXX_INLINE_VISIBILITY - bool compare_exchange_strong(_Tp& __e, _Tp __d, - memory_order __m = memory_order_seq_cst) noexcept { - if (memory_order_acq_rel == __m) - return __cxx_atomic_compare_exchange_strong(&this->__a_, &__e, __d, __m, memory_order_acquire); - else if (memory_order_release == __m) - return __cxx_atomic_compare_exchange_strong(&this->__a_, &__e, __d, __m, memory_order_relaxed); - else - return __cxx_atomic_compare_exchange_strong(&this->__a_, &__e, __d, __m, __m); + else if (memory_order_release == __m) + { + return __cxx_atomic_compare_exchange_strong(&this->__a_, &__e, __d, __m, memory_order_relaxed); } + else + { + return __cxx_atomic_compare_exchange_strong(&this->__a_, &__e, __d, __m, __m); + } + } - _LIBCUDACXX_INLINE_VISIBILITY void wait(_Tp __v, memory_order __m = memory_order_seq_cst) const volatile noexcept - {__cxx_atomic_wait(&this->__a_, __v, __m);} - _LIBCUDACXX_INLINE_VISIBILITY void wait(_Tp __v, memory_order __m = memory_order_seq_cst) const noexcept - {__cxx_atomic_wait(&this->__a_, __v, __m);} - _LIBCUDACXX_INLINE_VISIBILITY void notify_one() volatile noexcept - {__cxx_atomic_notify_one(&this->__a_);} - _LIBCUDACXX_INLINE_VISIBILITY void notify_one() noexcept - {__cxx_atomic_notify_one(&this->__a_);} - _LIBCUDACXX_INLINE_VISIBILITY void notify_all() volatile noexcept - {__cxx_atomic_notify_all(&this->__a_);} - _LIBCUDACXX_INLINE_VISIBILITY void notify_all() noexcept - {__cxx_atomic_notify_all(&this->__a_);} + _LIBCUDACXX_INLINE_VISIBILITY void wait(_Tp __v, memory_order __m = memory_order_seq_cst) const volatile noexcept + { + __cxx_atomic_wait(&this->__a_, __v, __m); + } + _LIBCUDACXX_INLINE_VISIBILITY void wait(_Tp __v, memory_order __m = memory_order_seq_cst) const noexcept + { + __cxx_atomic_wait(&this->__a_, __v, __m); + } + _LIBCUDACXX_INLINE_VISIBILITY void notify_one() volatile noexcept + { + __cxx_atomic_notify_one(&this->__a_); + } + _LIBCUDACXX_INLINE_VISIBILITY void notify_one() noexcept + { + __cxx_atomic_notify_one(&this->__a_); + } + _LIBCUDACXX_INLINE_VISIBILITY void notify_all() volatile noexcept + { + __cxx_atomic_notify_all(&this->__a_); + } + _LIBCUDACXX_INLINE_VISIBILITY void notify_all() noexcept + { + __cxx_atomic_notify_all(&this->__a_); + } }; template -struct __atomic_base_core<_Tp, true, _Storage> : public __atomic_base_storage<_Tp, _Storage>{ - __atomic_base_core() = default; - __atomic_base_core(const __atomic_base_core&) = default; - __atomic_base_core(__atomic_base_core&&) = default; +struct __atomic_base_core<_Tp, true, _Storage> : public __atomic_base_storage<_Tp, _Storage> +{ + __atomic_base_core() = default; + __atomic_base_core(const __atomic_base_core&) = default; + __atomic_base_core(__atomic_base_core&&) = default; - __atomic_base_core& operator=(const __atomic_base_core&) = default; - __atomic_base_core& operator=(__atomic_base_core&&) = default; + __atomic_base_core& operator=(const __atomic_base_core&) = default; + __atomic_base_core& operator=(__atomic_base_core&&) = default; - _LIBCUDACXX_INLINE_VISIBILITY constexpr - __atomic_base_core(_Storage&& __a) noexcept : __atomic_base_storage<_Tp, _Storage>(_CUDA_VSTD::forward<_Storage>(__a)) {} + _LIBCUDACXX_INLINE_VISIBILITY constexpr __atomic_base_core(_Storage&& __a) noexcept + : __atomic_base_storage<_Tp, _Storage>(_CUDA_VSTD::forward<_Storage>(__a)) + {} #if defined(_LIBCUDACXX_ATOMIC_ALWAYS_LOCK_FREE) - static constexpr bool is_always_lock_free = _LIBCUDACXX_ATOMIC_ALWAYS_LOCK_FREE(sizeof(_Tp), 0); + static constexpr bool is_always_lock_free = _LIBCUDACXX_ATOMIC_ALWAYS_LOCK_FREE(sizeof(_Tp), 0); #endif // defined(_LIBCUDACXX_ATOMIC_ALWAYS_LOCK_FREE) - _LIBCUDACXX_INLINE_VISIBILITY - bool is_lock_free() const volatile noexcept - {return _LIBCUDACXX_ATOMIC_IS_LOCK_FREE(sizeof(_Tp));} - _LIBCUDACXX_INLINE_VISIBILITY - bool is_lock_free() const noexcept - {return static_cast<__atomic_base_core const volatile*>(this)->is_lock_free();} - _LIBCUDACXX_INLINE_VISIBILITY - - void store(_Tp __d, memory_order __m = memory_order_seq_cst) const volatile noexcept - _LIBCUDACXX_CHECK_STORE_MEMORY_ORDER(__m) - {__cxx_atomic_store(&this->__a_, __d, __m);} - _LIBCUDACXX_INLINE_VISIBILITY - void store(_Tp __d, memory_order __m = memory_order_seq_cst) const noexcept - _LIBCUDACXX_CHECK_STORE_MEMORY_ORDER(__m) - {__cxx_atomic_store(&this->__a_, __d, __m);} - - _LIBCUDACXX_INLINE_VISIBILITY - _Tp load(memory_order __m = memory_order_seq_cst) const volatile noexcept - _LIBCUDACXX_CHECK_LOAD_MEMORY_ORDER(__m) - {return __cxx_atomic_load(&this->__a_, __m);} - _LIBCUDACXX_INLINE_VISIBILITY - _Tp load(memory_order __m = memory_order_seq_cst) const noexcept - _LIBCUDACXX_CHECK_LOAD_MEMORY_ORDER(__m) - {return __cxx_atomic_load(&this->__a_, __m);} - _LIBCUDACXX_INLINE_VISIBILITY - operator _Tp() const volatile noexcept {return load();} - _LIBCUDACXX_INLINE_VISIBILITY - operator _Tp() const noexcept {return load();} - _LIBCUDACXX_INLINE_VISIBILITY - _Tp exchange(_Tp __d, memory_order __m = memory_order_seq_cst) const volatile noexcept - {return __cxx_atomic_exchange(&this->__a_, __d, __m);} - _LIBCUDACXX_INLINE_VISIBILITY - _Tp exchange(_Tp __d, memory_order __m = memory_order_seq_cst) const noexcept - {return __cxx_atomic_exchange(&this->__a_, __d, __m);} - _LIBCUDACXX_INLINE_VISIBILITY - bool compare_exchange_weak(_Tp& __e, _Tp __d, - memory_order __s, memory_order __f) const volatile noexcept - _LIBCUDACXX_CHECK_EXCHANGE_MEMORY_ORDER(__s, __f) - {return __cxx_atomic_compare_exchange_weak(&this->__a_, &__e, __d, __s, __f);} - _LIBCUDACXX_INLINE_VISIBILITY - bool compare_exchange_weak(_Tp& __e, _Tp __d, - memory_order __s, memory_order __f) const noexcept - _LIBCUDACXX_CHECK_EXCHANGE_MEMORY_ORDER(__s, __f) - {return __cxx_atomic_compare_exchange_weak(&this->__a_, &__e, __d, __s, __f);} - _LIBCUDACXX_INLINE_VISIBILITY - bool compare_exchange_strong(_Tp& __e, _Tp __d, - memory_order __s, memory_order __f) const volatile noexcept - _LIBCUDACXX_CHECK_EXCHANGE_MEMORY_ORDER(__s, __f) - {return __cxx_atomic_compare_exchange_strong(&this->__a_, &__e, __d, __s, __f);} - _LIBCUDACXX_INLINE_VISIBILITY - bool compare_exchange_strong(_Tp& __e, _Tp __d, - memory_order __s, memory_order __f) const noexcept - _LIBCUDACXX_CHECK_EXCHANGE_MEMORY_ORDER(__s, __f) - {return __cxx_atomic_compare_exchange_strong(&this->__a_, &__e, __d, __s, __f);} - _LIBCUDACXX_INLINE_VISIBILITY - bool compare_exchange_weak(_Tp& __e, _Tp __d, - memory_order __m = memory_order_seq_cst) const volatile noexcept { - if (memory_order_acq_rel == __m) - return __cxx_atomic_compare_exchange_weak(&this->__a_, &__e, __d, __m, memory_order_acquire); - else if (memory_order_release == __m) - return __cxx_atomic_compare_exchange_weak(&this->__a_, &__e, __d, __m, memory_order_relaxed); - else - return __cxx_atomic_compare_exchange_weak(&this->__a_, &__e, __d, __m, __m); + _LIBCUDACXX_INLINE_VISIBILITY bool is_lock_free() const volatile noexcept + { + return _LIBCUDACXX_ATOMIC_IS_LOCK_FREE(sizeof(_Tp)); + } + _LIBCUDACXX_INLINE_VISIBILITY bool is_lock_free() const noexcept + { + return static_cast<__atomic_base_core const volatile*>(this)->is_lock_free(); + } + _LIBCUDACXX_INLINE_VISIBILITY + + void + store(_Tp __d, memory_order __m = memory_order_seq_cst) const volatile noexcept + _LIBCUDACXX_CHECK_STORE_MEMORY_ORDER(__m) + { + __cxx_atomic_store(&this->__a_, __d, __m); + } + _LIBCUDACXX_INLINE_VISIBILITY void store(_Tp __d, memory_order __m = memory_order_seq_cst) const noexcept + _LIBCUDACXX_CHECK_STORE_MEMORY_ORDER(__m) + { + __cxx_atomic_store(&this->__a_, __d, __m); + } + + _LIBCUDACXX_INLINE_VISIBILITY _Tp load(memory_order __m = memory_order_seq_cst) const volatile noexcept + _LIBCUDACXX_CHECK_LOAD_MEMORY_ORDER(__m) + { + return __cxx_atomic_load(&this->__a_, __m); + } + _LIBCUDACXX_INLINE_VISIBILITY _Tp load(memory_order __m = memory_order_seq_cst) const noexcept + _LIBCUDACXX_CHECK_LOAD_MEMORY_ORDER(__m) + { + return __cxx_atomic_load(&this->__a_, __m); + } + _LIBCUDACXX_INLINE_VISIBILITY operator _Tp() const volatile noexcept + { + return load(); + } + _LIBCUDACXX_INLINE_VISIBILITY operator _Tp() const noexcept + { + return load(); + } + _LIBCUDACXX_INLINE_VISIBILITY _Tp exchange(_Tp __d, memory_order __m = memory_order_seq_cst) const volatile noexcept + { + return __cxx_atomic_exchange(&this->__a_, __d, __m); + } + _LIBCUDACXX_INLINE_VISIBILITY _Tp exchange(_Tp __d, memory_order __m = memory_order_seq_cst) const noexcept + { + return __cxx_atomic_exchange(&this->__a_, __d, __m); + } + _LIBCUDACXX_INLINE_VISIBILITY bool compare_exchange_weak(_Tp& __e, _Tp __d, memory_order __s, memory_order __f) const + volatile noexcept _LIBCUDACXX_CHECK_EXCHANGE_MEMORY_ORDER(__s, __f) + { + return __cxx_atomic_compare_exchange_weak(&this->__a_, &__e, __d, __s, __f); + } + _LIBCUDACXX_INLINE_VISIBILITY bool + compare_exchange_weak(_Tp& __e, _Tp __d, memory_order __s, memory_order __f) const noexcept + _LIBCUDACXX_CHECK_EXCHANGE_MEMORY_ORDER(__s, __f) + { + return __cxx_atomic_compare_exchange_weak(&this->__a_, &__e, __d, __s, __f); + } + _LIBCUDACXX_INLINE_VISIBILITY bool compare_exchange_strong(_Tp& __e, _Tp __d, memory_order __s, memory_order __f) const + volatile noexcept _LIBCUDACXX_CHECK_EXCHANGE_MEMORY_ORDER(__s, __f) + { + return __cxx_atomic_compare_exchange_strong(&this->__a_, &__e, __d, __s, __f); + } + _LIBCUDACXX_INLINE_VISIBILITY bool + compare_exchange_strong(_Tp& __e, _Tp __d, memory_order __s, memory_order __f) const noexcept + _LIBCUDACXX_CHECK_EXCHANGE_MEMORY_ORDER(__s, __f) + { + return __cxx_atomic_compare_exchange_strong(&this->__a_, &__e, __d, __s, __f); + } + _LIBCUDACXX_INLINE_VISIBILITY bool + compare_exchange_weak(_Tp& __e, _Tp __d, memory_order __m = memory_order_seq_cst) const volatile noexcept + { + if (memory_order_acq_rel == __m) + { + return __cxx_atomic_compare_exchange_weak(&this->__a_, &__e, __d, __m, memory_order_acquire); + } + else if (memory_order_release == __m) + { + return __cxx_atomic_compare_exchange_weak(&this->__a_, &__e, __d, __m, memory_order_relaxed); + } + else + { + return __cxx_atomic_compare_exchange_weak(&this->__a_, &__e, __d, __m, __m); + } + } + _LIBCUDACXX_INLINE_VISIBILITY bool + compare_exchange_weak(_Tp& __e, _Tp __d, memory_order __m = memory_order_seq_cst) const noexcept + { + if (memory_order_acq_rel == __m) + { + return __cxx_atomic_compare_exchange_weak(&this->__a_, &__e, __d, __m, memory_order_acquire); + } + else if (memory_order_release == __m) + { + return __cxx_atomic_compare_exchange_weak(&this->__a_, &__e, __d, __m, memory_order_relaxed); } - _LIBCUDACXX_INLINE_VISIBILITY - bool compare_exchange_weak(_Tp& __e, _Tp __d, - memory_order __m = memory_order_seq_cst) const noexcept { - if(memory_order_acq_rel == __m) - return __cxx_atomic_compare_exchange_weak(&this->__a_, &__e, __d, __m, memory_order_acquire); - else if(memory_order_release == __m) - return __cxx_atomic_compare_exchange_weak(&this->__a_, &__e, __d, __m, memory_order_relaxed); - else - return __cxx_atomic_compare_exchange_weak(&this->__a_, &__e, __d, __m, __m); + else + { + return __cxx_atomic_compare_exchange_weak(&this->__a_, &__e, __d, __m, __m); + } + } + _LIBCUDACXX_INLINE_VISIBILITY bool + compare_exchange_strong(_Tp& __e, _Tp __d, memory_order __m = memory_order_seq_cst) const volatile noexcept + { + if (memory_order_acq_rel == __m) + { + return __cxx_atomic_compare_exchange_strong(&this->__a_, &__e, __d, __m, memory_order_acquire); } - _LIBCUDACXX_INLINE_VISIBILITY - bool compare_exchange_strong(_Tp& __e, _Tp __d, - memory_order __m = memory_order_seq_cst) const volatile noexcept { - if (memory_order_acq_rel == __m) - return __cxx_atomic_compare_exchange_strong(&this->__a_, &__e, __d, __m, memory_order_acquire); - else if (memory_order_release == __m) - return __cxx_atomic_compare_exchange_strong(&this->__a_, &__e, __d, __m, memory_order_relaxed); - else - return __cxx_atomic_compare_exchange_strong(&this->__a_, &__e, __d, __m, __m); + else if (memory_order_release == __m) + { + return __cxx_atomic_compare_exchange_strong(&this->__a_, &__e, __d, __m, memory_order_relaxed); } - _LIBCUDACXX_INLINE_VISIBILITY - bool compare_exchange_strong(_Tp& __e, _Tp __d, - memory_order __m = memory_order_seq_cst) const noexcept { - if (memory_order_acq_rel == __m) - return __cxx_atomic_compare_exchange_strong(&this->__a_, &__e, __d, __m, memory_order_acquire); - else if (memory_order_release == __m) - return __cxx_atomic_compare_exchange_strong(&this->__a_, &__e, __d, __m, memory_order_relaxed); - else - return __cxx_atomic_compare_exchange_strong(&this->__a_, &__e, __d, __m, __m); + else + { + return __cxx_atomic_compare_exchange_strong(&this->__a_, &__e, __d, __m, __m); } + } + _LIBCUDACXX_INLINE_VISIBILITY bool + compare_exchange_strong(_Tp& __e, _Tp __d, memory_order __m = memory_order_seq_cst) const noexcept + { + if (memory_order_acq_rel == __m) + { + return __cxx_atomic_compare_exchange_strong(&this->__a_, &__e, __d, __m, memory_order_acquire); + } + else if (memory_order_release == __m) + { + return __cxx_atomic_compare_exchange_strong(&this->__a_, &__e, __d, __m, memory_order_relaxed); + } + else + { + return __cxx_atomic_compare_exchange_strong(&this->__a_, &__e, __d, __m, __m); + } + } - _LIBCUDACXX_INLINE_VISIBILITY void wait(_Tp __v, memory_order __m = memory_order_seq_cst) const volatile noexcept - {__cxx_atomic_wait(&this->__a_, __v, __m);} - _LIBCUDACXX_INLINE_VISIBILITY void wait(_Tp __v, memory_order __m = memory_order_seq_cst) const noexcept - {__cxx_atomic_wait(&this->__a_, __v, __m);} - _LIBCUDACXX_INLINE_VISIBILITY void notify_one() const volatile noexcept - {__cxx_atomic_notify_one(&this->__a_);} - _LIBCUDACXX_INLINE_VISIBILITY void notify_one() const noexcept - {__cxx_atomic_notify_one(&this->__a_);} - _LIBCUDACXX_INLINE_VISIBILITY void notify_all() const volatile noexcept - {__cxx_atomic_notify_all(&this->__a_);} - _LIBCUDACXX_INLINE_VISIBILITY void notify_all() const noexcept - {__cxx_atomic_notify_all(&this->__a_);} + _LIBCUDACXX_INLINE_VISIBILITY void wait(_Tp __v, memory_order __m = memory_order_seq_cst) const volatile noexcept + { + __cxx_atomic_wait(&this->__a_, __v, __m); + } + _LIBCUDACXX_INLINE_VISIBILITY void wait(_Tp __v, memory_order __m = memory_order_seq_cst) const noexcept + { + __cxx_atomic_wait(&this->__a_, __v, __m); + } + _LIBCUDACXX_INLINE_VISIBILITY void notify_one() const volatile noexcept + { + __cxx_atomic_notify_one(&this->__a_); + } + _LIBCUDACXX_INLINE_VISIBILITY void notify_one() const noexcept + { + __cxx_atomic_notify_one(&this->__a_); + } + _LIBCUDACXX_INLINE_VISIBILITY void notify_all() const volatile noexcept + { + __cxx_atomic_notify_all(&this->__a_); + } + _LIBCUDACXX_INLINE_VISIBILITY void notify_all() const noexcept + { + __cxx_atomic_notify_all(&this->__a_); + } }; template -struct __atomic_base_arithmetic : public __atomic_base_core<_Tp, _Cq, _Storage> { - __atomic_base_arithmetic() = default; - __atomic_base_arithmetic(const __atomic_base_arithmetic&) = delete; - __atomic_base_arithmetic(__atomic_base_arithmetic&&) = delete; - - __atomic_base_arithmetic& operator=(const __atomic_base_arithmetic&) = delete; - __atomic_base_arithmetic& operator=(__atomic_base_arithmetic&&) = delete; - - _LIBCUDACXX_INLINE_VISIBILITY constexpr - __atomic_base_arithmetic(_Storage&& __a) noexcept : __atomic_base_core<_Tp, _Cq, _Storage>(_CUDA_VSTD::forward<_Storage>(__a)) {} - - _LIBCUDACXX_INLINE_VISIBILITY - _Tp fetch_add(_Tp __op, memory_order __m = memory_order_seq_cst) volatile noexcept - {return __cxx_atomic_fetch_add(&this->__a_, __op, __m);} - _LIBCUDACXX_INLINE_VISIBILITY - _Tp fetch_add(_Tp __op, memory_order __m = memory_order_seq_cst) noexcept - {return __cxx_atomic_fetch_add(&this->__a_, __op, __m);} - _LIBCUDACXX_INLINE_VISIBILITY - _Tp fetch_sub(_Tp __op, memory_order __m = memory_order_seq_cst) volatile noexcept - {return __cxx_atomic_fetch_sub(&this->__a_, __op, __m);} - _LIBCUDACXX_INLINE_VISIBILITY - _Tp fetch_sub(_Tp __op, memory_order __m = memory_order_seq_cst) noexcept - {return __cxx_atomic_fetch_sub(&this->__a_, __op, __m);} - - _LIBCUDACXX_INLINE_VISIBILITY - _Tp operator++(int) volatile noexcept {return fetch_add(_Tp(1));} - _LIBCUDACXX_INLINE_VISIBILITY - _Tp operator++(int) noexcept {return fetch_add(_Tp(1));} - _LIBCUDACXX_INLINE_VISIBILITY - _Tp operator--(int) volatile noexcept {return fetch_sub(_Tp(1));} - _LIBCUDACXX_INLINE_VISIBILITY - _Tp operator--(int) noexcept {return fetch_sub(_Tp(1));} - _LIBCUDACXX_INLINE_VISIBILITY - _Tp operator++() volatile noexcept {return fetch_add(_Tp(1)) + _Tp(1);} - _LIBCUDACXX_INLINE_VISIBILITY - _Tp operator++() noexcept {return fetch_add(_Tp(1)) + _Tp(1);} - _LIBCUDACXX_INLINE_VISIBILITY - _Tp operator--() volatile noexcept {return fetch_sub(_Tp(1)) - _Tp(1);} - _LIBCUDACXX_INLINE_VISIBILITY - _Tp operator--() noexcept {return fetch_sub(_Tp(1)) - _Tp(1);} - _LIBCUDACXX_INLINE_VISIBILITY - _Tp operator+=(_Tp __op) volatile noexcept {return fetch_add(__op) + __op;} - _LIBCUDACXX_INLINE_VISIBILITY - _Tp operator+=(_Tp __op) noexcept {return fetch_add(__op) + __op;} - _LIBCUDACXX_INLINE_VISIBILITY - _Tp operator-=(_Tp __op) volatile noexcept {return fetch_sub(__op) - __op;} - _LIBCUDACXX_INLINE_VISIBILITY - _Tp operator-=(_Tp __op) noexcept {return fetch_sub(__op) - __op;} +struct __atomic_base_arithmetic : public __atomic_base_core<_Tp, _Cq, _Storage> +{ + __atomic_base_arithmetic() = default; + __atomic_base_arithmetic(const __atomic_base_arithmetic&) = delete; + __atomic_base_arithmetic(__atomic_base_arithmetic&&) = delete; + + __atomic_base_arithmetic& operator=(const __atomic_base_arithmetic&) = delete; + __atomic_base_arithmetic& operator=(__atomic_base_arithmetic&&) = delete; + + _LIBCUDACXX_INLINE_VISIBILITY constexpr __atomic_base_arithmetic(_Storage&& __a) noexcept + : __atomic_base_core<_Tp, _Cq, _Storage>(_CUDA_VSTD::forward<_Storage>(__a)) + {} + + _LIBCUDACXX_INLINE_VISIBILITY _Tp fetch_add(_Tp __op, memory_order __m = memory_order_seq_cst) volatile noexcept + { + return __cxx_atomic_fetch_add(&this->__a_, __op, __m); + } + _LIBCUDACXX_INLINE_VISIBILITY _Tp fetch_add(_Tp __op, memory_order __m = memory_order_seq_cst) noexcept + { + return __cxx_atomic_fetch_add(&this->__a_, __op, __m); + } + _LIBCUDACXX_INLINE_VISIBILITY _Tp fetch_sub(_Tp __op, memory_order __m = memory_order_seq_cst) volatile noexcept + { + return __cxx_atomic_fetch_sub(&this->__a_, __op, __m); + } + _LIBCUDACXX_INLINE_VISIBILITY _Tp fetch_sub(_Tp __op, memory_order __m = memory_order_seq_cst) noexcept + { + return __cxx_atomic_fetch_sub(&this->__a_, __op, __m); + } + + _LIBCUDACXX_INLINE_VISIBILITY _Tp operator++(int) volatile noexcept + { + return fetch_add(_Tp(1)); + } + _LIBCUDACXX_INLINE_VISIBILITY _Tp operator++(int) noexcept + { + return fetch_add(_Tp(1)); + } + _LIBCUDACXX_INLINE_VISIBILITY _Tp operator--(int) volatile noexcept + { + return fetch_sub(_Tp(1)); + } + _LIBCUDACXX_INLINE_VISIBILITY _Tp operator--(int) noexcept + { + return fetch_sub(_Tp(1)); + } + _LIBCUDACXX_INLINE_VISIBILITY _Tp operator++() volatile noexcept + { + return fetch_add(_Tp(1)) + _Tp(1); + } + _LIBCUDACXX_INLINE_VISIBILITY _Tp operator++() noexcept + { + return fetch_add(_Tp(1)) + _Tp(1); + } + _LIBCUDACXX_INLINE_VISIBILITY _Tp operator--() volatile noexcept + { + return fetch_sub(_Tp(1)) - _Tp(1); + } + _LIBCUDACXX_INLINE_VISIBILITY _Tp operator--() noexcept + { + return fetch_sub(_Tp(1)) - _Tp(1); + } + _LIBCUDACXX_INLINE_VISIBILITY _Tp operator+=(_Tp __op) volatile noexcept + { + return fetch_add(__op) + __op; + } + _LIBCUDACXX_INLINE_VISIBILITY _Tp operator+=(_Tp __op) noexcept + { + return fetch_add(__op) + __op; + } + _LIBCUDACXX_INLINE_VISIBILITY _Tp operator-=(_Tp __op) volatile noexcept + { + return fetch_sub(__op) - __op; + } + _LIBCUDACXX_INLINE_VISIBILITY _Tp operator-=(_Tp __op) noexcept + { + return fetch_sub(__op) - __op; + } }; template -struct __atomic_base_arithmetic<_Tp, true, _Storage> : public __atomic_base_core<_Tp, true, _Storage> { - __atomic_base_arithmetic() = default; - __atomic_base_arithmetic(const __atomic_base_arithmetic&) = default; - __atomic_base_arithmetic(__atomic_base_arithmetic&&) = default; - - __atomic_base_arithmetic& operator=(const __atomic_base_arithmetic&) = default; - __atomic_base_arithmetic& operator=(__atomic_base_arithmetic&&) = default; - - _LIBCUDACXX_INLINE_VISIBILITY constexpr - __atomic_base_arithmetic(_Storage&& __a) noexcept : __atomic_base_core<_Tp, true, _Storage>(_CUDA_VSTD::forward<_Storage>(__a)) {} - - _LIBCUDACXX_INLINE_VISIBILITY - _Tp fetch_add(_Tp __op, memory_order __m = memory_order_seq_cst) const volatile noexcept - {return __cxx_atomic_fetch_add(&this->__a_, __op, __m);} - _LIBCUDACXX_INLINE_VISIBILITY - _Tp fetch_add(_Tp __op, memory_order __m = memory_order_seq_cst) const noexcept - {return __cxx_atomic_fetch_add(&this->__a_, __op, __m);} - _LIBCUDACXX_INLINE_VISIBILITY - _Tp fetch_sub(_Tp __op, memory_order __m = memory_order_seq_cst) const volatile noexcept - {return __cxx_atomic_fetch_sub(&this->__a_, __op, __m);} - _LIBCUDACXX_INLINE_VISIBILITY - _Tp fetch_sub(_Tp __op, memory_order __m = memory_order_seq_cst) const noexcept - {return __cxx_atomic_fetch_sub(&this->__a_, __op, __m);} - - _LIBCUDACXX_INLINE_VISIBILITY - _Tp operator++(int) const volatile noexcept {return fetch_add(_Tp(1));} - _LIBCUDACXX_INLINE_VISIBILITY - _Tp operator++(int) const noexcept {return fetch_add(_Tp(1));} - _LIBCUDACXX_INLINE_VISIBILITY - _Tp operator--(int) const volatile noexcept {return fetch_sub(_Tp(1));} - _LIBCUDACXX_INLINE_VISIBILITY - _Tp operator--(int) const noexcept {return fetch_sub(_Tp(1));} - _LIBCUDACXX_INLINE_VISIBILITY - _Tp operator++() const volatile noexcept {return fetch_add(_Tp(1)) + _Tp(1);} - _LIBCUDACXX_INLINE_VISIBILITY - _Tp operator++() const noexcept {return fetch_add(_Tp(1)) + _Tp(1);} - _LIBCUDACXX_INLINE_VISIBILITY - _Tp operator--() const volatile noexcept {return fetch_sub(_Tp(1)) - _Tp(1);} - _LIBCUDACXX_INLINE_VISIBILITY - _Tp operator--() const noexcept {return fetch_sub(_Tp(1)) - _Tp(1);} - _LIBCUDACXX_INLINE_VISIBILITY - _Tp operator+=(_Tp __op) const volatile noexcept {return fetch_add(__op) + __op;} - _LIBCUDACXX_INLINE_VISIBILITY - _Tp operator+=(_Tp __op) const noexcept {return fetch_add(__op) + __op;} - _LIBCUDACXX_INLINE_VISIBILITY - _Tp operator-=(_Tp __op) const volatile noexcept {return fetch_sub(__op) - __op;} - _LIBCUDACXX_INLINE_VISIBILITY - _Tp operator-=(_Tp __op) const noexcept {return fetch_sub(__op) - __op;} +struct __atomic_base_arithmetic<_Tp, true, _Storage> : public __atomic_base_core<_Tp, true, _Storage> +{ + __atomic_base_arithmetic() = default; + __atomic_base_arithmetic(const __atomic_base_arithmetic&) = default; + __atomic_base_arithmetic(__atomic_base_arithmetic&&) = default; + + __atomic_base_arithmetic& operator=(const __atomic_base_arithmetic&) = default; + __atomic_base_arithmetic& operator=(__atomic_base_arithmetic&&) = default; + + _LIBCUDACXX_INLINE_VISIBILITY constexpr __atomic_base_arithmetic(_Storage&& __a) noexcept + : __atomic_base_core<_Tp, true, _Storage>(_CUDA_VSTD::forward<_Storage>(__a)) + {} + + _LIBCUDACXX_INLINE_VISIBILITY _Tp fetch_add(_Tp __op, memory_order __m = memory_order_seq_cst) const volatile noexcept + { + return __cxx_atomic_fetch_add(&this->__a_, __op, __m); + } + _LIBCUDACXX_INLINE_VISIBILITY _Tp fetch_add(_Tp __op, memory_order __m = memory_order_seq_cst) const noexcept + { + return __cxx_atomic_fetch_add(&this->__a_, __op, __m); + } + _LIBCUDACXX_INLINE_VISIBILITY _Tp fetch_sub(_Tp __op, memory_order __m = memory_order_seq_cst) const volatile noexcept + { + return __cxx_atomic_fetch_sub(&this->__a_, __op, __m); + } + _LIBCUDACXX_INLINE_VISIBILITY _Tp fetch_sub(_Tp __op, memory_order __m = memory_order_seq_cst) const noexcept + { + return __cxx_atomic_fetch_sub(&this->__a_, __op, __m); + } + + _LIBCUDACXX_INLINE_VISIBILITY _Tp operator++(int) const volatile noexcept + { + return fetch_add(_Tp(1)); + } + _LIBCUDACXX_INLINE_VISIBILITY _Tp operator++(int) const noexcept + { + return fetch_add(_Tp(1)); + } + _LIBCUDACXX_INLINE_VISIBILITY _Tp operator--(int) const volatile noexcept + { + return fetch_sub(_Tp(1)); + } + _LIBCUDACXX_INLINE_VISIBILITY _Tp operator--(int) const noexcept + { + return fetch_sub(_Tp(1)); + } + _LIBCUDACXX_INLINE_VISIBILITY _Tp operator++() const volatile noexcept + { + return fetch_add(_Tp(1)) + _Tp(1); + } + _LIBCUDACXX_INLINE_VISIBILITY _Tp operator++() const noexcept + { + return fetch_add(_Tp(1)) + _Tp(1); + } + _LIBCUDACXX_INLINE_VISIBILITY _Tp operator--() const volatile noexcept + { + return fetch_sub(_Tp(1)) - _Tp(1); + } + _LIBCUDACXX_INLINE_VISIBILITY _Tp operator--() const noexcept + { + return fetch_sub(_Tp(1)) - _Tp(1); + } + _LIBCUDACXX_INLINE_VISIBILITY _Tp operator+=(_Tp __op) const volatile noexcept + { + return fetch_add(__op) + __op; + } + _LIBCUDACXX_INLINE_VISIBILITY _Tp operator+=(_Tp __op) const noexcept + { + return fetch_add(__op) + __op; + } + _LIBCUDACXX_INLINE_VISIBILITY _Tp operator-=(_Tp __op) const volatile noexcept + { + return fetch_sub(__op) - __op; + } + _LIBCUDACXX_INLINE_VISIBILITY _Tp operator-=(_Tp __op) const noexcept + { + return fetch_sub(__op) - __op; + } }; template -struct __atomic_base_bitwise : public __atomic_base_arithmetic<_Tp, _Cq, _Storage> { - __atomic_base_bitwise() = default; - __atomic_base_bitwise(const __atomic_base_bitwise&) = delete; - __atomic_base_bitwise(__atomic_base_bitwise&&) = delete; - - __atomic_base_bitwise& operator=(const __atomic_base_bitwise&) = delete; - __atomic_base_bitwise& operator=(__atomic_base_bitwise&&) = delete; - - _LIBCUDACXX_INLINE_VISIBILITY constexpr - __atomic_base_bitwise(_Storage&& __a) noexcept : __atomic_base_arithmetic<_Tp, _Cq, _Storage>(_CUDA_VSTD::forward<_Storage>(__a)) {} - - _LIBCUDACXX_INLINE_VISIBILITY - _Tp fetch_and(_Tp __op, memory_order __m = memory_order_seq_cst) volatile noexcept - {return __cxx_atomic_fetch_and(&this->__a_, __op, __m);} - _LIBCUDACXX_INLINE_VISIBILITY - _Tp fetch_and(_Tp __op, memory_order __m = memory_order_seq_cst) noexcept - {return __cxx_atomic_fetch_and(&this->__a_, __op, __m);} - _LIBCUDACXX_INLINE_VISIBILITY - _Tp fetch_or(_Tp __op, memory_order __m = memory_order_seq_cst) volatile noexcept - {return __cxx_atomic_fetch_or(&this->__a_, __op, __m);} - _LIBCUDACXX_INLINE_VISIBILITY - _Tp fetch_or(_Tp __op, memory_order __m = memory_order_seq_cst) noexcept - {return __cxx_atomic_fetch_or(&this->__a_, __op, __m);} - _LIBCUDACXX_INLINE_VISIBILITY - _Tp fetch_xor(_Tp __op, memory_order __m = memory_order_seq_cst) volatile noexcept - {return __cxx_atomic_fetch_xor(&this->__a_, __op, __m);} - _LIBCUDACXX_INLINE_VISIBILITY - _Tp fetch_xor(_Tp __op, memory_order __m = memory_order_seq_cst) noexcept - {return __cxx_atomic_fetch_xor(&this->__a_, __op, __m);} - - _LIBCUDACXX_INLINE_VISIBILITY - _Tp operator&=(_Tp __op) volatile noexcept {return fetch_and(__op) & __op;} - _LIBCUDACXX_INLINE_VISIBILITY - _Tp operator&=(_Tp __op) noexcept {return fetch_and(__op) & __op;} - _LIBCUDACXX_INLINE_VISIBILITY - _Tp operator|=(_Tp __op) volatile noexcept {return fetch_or(__op) | __op;} - _LIBCUDACXX_INLINE_VISIBILITY - _Tp operator|=(_Tp __op) noexcept {return fetch_or(__op) | __op;} - _LIBCUDACXX_INLINE_VISIBILITY - _Tp operator^=(_Tp __op) volatile noexcept {return fetch_xor(__op) ^ __op;} - _LIBCUDACXX_INLINE_VISIBILITY - _Tp operator^=(_Tp __op) noexcept {return fetch_xor(__op) ^ __op;} +struct __atomic_base_bitwise : public __atomic_base_arithmetic<_Tp, _Cq, _Storage> +{ + __atomic_base_bitwise() = default; + __atomic_base_bitwise(const __atomic_base_bitwise&) = delete; + __atomic_base_bitwise(__atomic_base_bitwise&&) = delete; + + __atomic_base_bitwise& operator=(const __atomic_base_bitwise&) = delete; + __atomic_base_bitwise& operator=(__atomic_base_bitwise&&) = delete; + + _LIBCUDACXX_INLINE_VISIBILITY constexpr __atomic_base_bitwise(_Storage&& __a) noexcept + : __atomic_base_arithmetic<_Tp, _Cq, _Storage>(_CUDA_VSTD::forward<_Storage>(__a)) + {} + + _LIBCUDACXX_INLINE_VISIBILITY _Tp fetch_and(_Tp __op, memory_order __m = memory_order_seq_cst) volatile noexcept + { + return __cxx_atomic_fetch_and(&this->__a_, __op, __m); + } + _LIBCUDACXX_INLINE_VISIBILITY _Tp fetch_and(_Tp __op, memory_order __m = memory_order_seq_cst) noexcept + { + return __cxx_atomic_fetch_and(&this->__a_, __op, __m); + } + _LIBCUDACXX_INLINE_VISIBILITY _Tp fetch_or(_Tp __op, memory_order __m = memory_order_seq_cst) volatile noexcept + { + return __cxx_atomic_fetch_or(&this->__a_, __op, __m); + } + _LIBCUDACXX_INLINE_VISIBILITY _Tp fetch_or(_Tp __op, memory_order __m = memory_order_seq_cst) noexcept + { + return __cxx_atomic_fetch_or(&this->__a_, __op, __m); + } + _LIBCUDACXX_INLINE_VISIBILITY _Tp fetch_xor(_Tp __op, memory_order __m = memory_order_seq_cst) volatile noexcept + { + return __cxx_atomic_fetch_xor(&this->__a_, __op, __m); + } + _LIBCUDACXX_INLINE_VISIBILITY _Tp fetch_xor(_Tp __op, memory_order __m = memory_order_seq_cst) noexcept + { + return __cxx_atomic_fetch_xor(&this->__a_, __op, __m); + } + + _LIBCUDACXX_INLINE_VISIBILITY _Tp operator&=(_Tp __op) volatile noexcept + { + return fetch_and(__op) & __op; + } + _LIBCUDACXX_INLINE_VISIBILITY _Tp operator&=(_Tp __op) noexcept + { + return fetch_and(__op) & __op; + } + _LIBCUDACXX_INLINE_VISIBILITY _Tp operator|=(_Tp __op) volatile noexcept + { + return fetch_or(__op) | __op; + } + _LIBCUDACXX_INLINE_VISIBILITY _Tp operator|=(_Tp __op) noexcept + { + return fetch_or(__op) | __op; + } + _LIBCUDACXX_INLINE_VISIBILITY _Tp operator^=(_Tp __op) volatile noexcept + { + return fetch_xor(__op) ^ __op; + } + _LIBCUDACXX_INLINE_VISIBILITY _Tp operator^=(_Tp __op) noexcept + { + return fetch_xor(__op) ^ __op; + } }; template -struct __atomic_base_bitwise<_Tp, true, _Storage> : public __atomic_base_arithmetic<_Tp, true, _Storage> { - __atomic_base_bitwise() = default; - __atomic_base_bitwise(const __atomic_base_bitwise&) = default; - __atomic_base_bitwise(__atomic_base_bitwise&&) = default; - - __atomic_base_bitwise& operator=(const __atomic_base_bitwise&) = default; - __atomic_base_bitwise& operator=(__atomic_base_bitwise&&) = default; - - _LIBCUDACXX_INLINE_VISIBILITY constexpr - __atomic_base_bitwise(_Storage&& __a) noexcept : __atomic_base_arithmetic<_Tp, true, _Storage>(_CUDA_VSTD::forward<_Storage>(__a)) {} - - _LIBCUDACXX_INLINE_VISIBILITY - _Tp fetch_and(_Tp __op, memory_order __m = memory_order_seq_cst) const volatile noexcept - {return __cxx_atomic_fetch_and(&this->__a_, __op, __m);} - _LIBCUDACXX_INLINE_VISIBILITY - _Tp fetch_and(_Tp __op, memory_order __m = memory_order_seq_cst) const noexcept - {return __cxx_atomic_fetch_and(&this->__a_, __op, __m);} - _LIBCUDACXX_INLINE_VISIBILITY - _Tp fetch_or(_Tp __op, memory_order __m = memory_order_seq_cst) const volatile noexcept - {return __cxx_atomic_fetch_or(&this->__a_, __op, __m);} - _LIBCUDACXX_INLINE_VISIBILITY - _Tp fetch_or(_Tp __op, memory_order __m = memory_order_seq_cst) const noexcept - {return __cxx_atomic_fetch_or(&this->__a_, __op, __m);} - _LIBCUDACXX_INLINE_VISIBILITY - _Tp fetch_xor(_Tp __op, memory_order __m = memory_order_seq_cst) const volatile noexcept - {return __cxx_atomic_fetch_xor(&this->__a_, __op, __m);} - _LIBCUDACXX_INLINE_VISIBILITY - _Tp fetch_xor(_Tp __op, memory_order __m = memory_order_seq_cst) const noexcept - {return __cxx_atomic_fetch_xor(&this->__a_, __op, __m);} - - _LIBCUDACXX_INLINE_VISIBILITY - _Tp operator&=(_Tp __op) const volatile noexcept {return fetch_and(__op) & __op;} - _LIBCUDACXX_INLINE_VISIBILITY - _Tp operator&=(_Tp __op) const noexcept {return fetch_and(__op) & __op;} - _LIBCUDACXX_INLINE_VISIBILITY - _Tp operator|=(_Tp __op) const volatile noexcept {return fetch_or(__op) | __op;} - _LIBCUDACXX_INLINE_VISIBILITY - _Tp operator|=(_Tp __op) const noexcept {return fetch_or(__op) | __op;} - _LIBCUDACXX_INLINE_VISIBILITY - _Tp operator^=(_Tp __op) const volatile noexcept {return fetch_xor(__op) ^ __op;} - _LIBCUDACXX_INLINE_VISIBILITY - _Tp operator^=(_Tp __op) const noexcept {return fetch_xor(__op) ^ __op;} +struct __atomic_base_bitwise<_Tp, true, _Storage> : public __atomic_base_arithmetic<_Tp, true, _Storage> +{ + __atomic_base_bitwise() = default; + __atomic_base_bitwise(const __atomic_base_bitwise&) = default; + __atomic_base_bitwise(__atomic_base_bitwise&&) = default; + + __atomic_base_bitwise& operator=(const __atomic_base_bitwise&) = default; + __atomic_base_bitwise& operator=(__atomic_base_bitwise&&) = default; + + _LIBCUDACXX_INLINE_VISIBILITY constexpr __atomic_base_bitwise(_Storage&& __a) noexcept + : __atomic_base_arithmetic<_Tp, true, _Storage>(_CUDA_VSTD::forward<_Storage>(__a)) + {} + + _LIBCUDACXX_INLINE_VISIBILITY _Tp fetch_and(_Tp __op, memory_order __m = memory_order_seq_cst) const volatile noexcept + { + return __cxx_atomic_fetch_and(&this->__a_, __op, __m); + } + _LIBCUDACXX_INLINE_VISIBILITY _Tp fetch_and(_Tp __op, memory_order __m = memory_order_seq_cst) const noexcept + { + return __cxx_atomic_fetch_and(&this->__a_, __op, __m); + } + _LIBCUDACXX_INLINE_VISIBILITY _Tp fetch_or(_Tp __op, memory_order __m = memory_order_seq_cst) const volatile noexcept + { + return __cxx_atomic_fetch_or(&this->__a_, __op, __m); + } + _LIBCUDACXX_INLINE_VISIBILITY _Tp fetch_or(_Tp __op, memory_order __m = memory_order_seq_cst) const noexcept + { + return __cxx_atomic_fetch_or(&this->__a_, __op, __m); + } + _LIBCUDACXX_INLINE_VISIBILITY _Tp fetch_xor(_Tp __op, memory_order __m = memory_order_seq_cst) const volatile noexcept + { + return __cxx_atomic_fetch_xor(&this->__a_, __op, __m); + } + _LIBCUDACXX_INLINE_VISIBILITY _Tp fetch_xor(_Tp __op, memory_order __m = memory_order_seq_cst) const noexcept + { + return __cxx_atomic_fetch_xor(&this->__a_, __op, __m); + } + + _LIBCUDACXX_INLINE_VISIBILITY _Tp operator&=(_Tp __op) const volatile noexcept + { + return fetch_and(__op) & __op; + } + _LIBCUDACXX_INLINE_VISIBILITY _Tp operator&=(_Tp __op) const noexcept + { + return fetch_and(__op) & __op; + } + _LIBCUDACXX_INLINE_VISIBILITY _Tp operator|=(_Tp __op) const volatile noexcept + { + return fetch_or(__op) | __op; + } + _LIBCUDACXX_INLINE_VISIBILITY _Tp operator|=(_Tp __op) const noexcept + { + return fetch_or(__op) | __op; + } + _LIBCUDACXX_INLINE_VISIBILITY _Tp operator^=(_Tp __op) const volatile noexcept + { + return fetch_xor(__op) ^ __op; + } + _LIBCUDACXX_INLINE_VISIBILITY _Tp operator^=(_Tp __op) const noexcept + { + return fetch_xor(__op) ^ __op; + } }; template -using __atomic_select_base = __conditional_t::value, - __atomic_base_arithmetic<_Tp, _Cq, _Storage>, - __conditional_t::value, - __atomic_base_bitwise<_Tp, _Cq, _Storage>, - __atomic_base_core<_Tp, _Cq, _Storage> >>; +using __atomic_select_base = + __conditional_t::value, + __atomic_base_arithmetic<_Tp, _Cq, _Storage>, + __conditional_t::value, + __atomic_base_bitwise<_Tp, _Cq, _Storage>, + __atomic_base_core<_Tp, _Cq, _Storage>>>; template >> -struct __atomic_base : public _Base { - __atomic_base() = default; - __atomic_base(const __atomic_base&) = delete; - __atomic_base(__atomic_base&&) = delete; +struct __atomic_base : public _Base +{ + __atomic_base() = default; + __atomic_base(const __atomic_base&) = delete; + __atomic_base(__atomic_base&&) = delete; - __atomic_base& operator=(const __atomic_base&) = delete; - __atomic_base& operator=(__atomic_base&&) = delete; + __atomic_base& operator=(const __atomic_base&) = delete; + __atomic_base& operator=(__atomic_base&&) = delete; - _LIBCUDACXX_INLINE_VISIBILITY constexpr - __atomic_base(const _Tp& __a) noexcept : - _Base(__cxx_atomic_impl<_Tp, _Sco>(__a)) {} + _LIBCUDACXX_INLINE_VISIBILITY constexpr __atomic_base(const _Tp& __a) noexcept + : _Base(__cxx_atomic_impl<_Tp, _Sco>(__a)) + {} }; template >> -struct __atomic_base_ref : public _Base { - __atomic_base_ref() = default; - __atomic_base_ref(const __atomic_base_ref&) = default; - __atomic_base_ref(__atomic_base_ref&&) = default; +struct __atomic_base_ref : public _Base +{ + __atomic_base_ref() = default; + __atomic_base_ref(const __atomic_base_ref&) = default; + __atomic_base_ref(__atomic_base_ref&&) = default; - __atomic_base_ref& operator=(const __atomic_base_ref&) = default; - __atomic_base_ref& operator=(__atomic_base_ref&&) = default; + __atomic_base_ref& operator=(const __atomic_base_ref&) = default; + __atomic_base_ref& operator=(__atomic_base_ref&&) = default; - _LIBCUDACXX_INLINE_VISIBILITY constexpr - __atomic_base_ref(_Tp& __a) noexcept : - _Base(__cxx_atomic_ref_impl<_Tp, _Sco>(__a)) {} + _LIBCUDACXX_INLINE_VISIBILITY constexpr __atomic_base_ref(_Tp& __a) noexcept + : _Base(__cxx_atomic_ref_impl<_Tp, _Sco>(__a)) + {} }; #if defined(_LIBCUDACXX_ATOMIC_ALWAYS_LOCK_FREE) @@ -1779,1059 +2082,918 @@ constexpr bool __atomic_base_core<_Tp, _Cq, _Storage>::is_always_lock_free; // atomic template -struct atomic - : public __atomic_base<_Tp> +struct atomic : public __atomic_base<_Tp> { - typedef __atomic_base<_Tp> __base; - using value_type = _Tp; + typedef __atomic_base<_Tp> __base; + using value_type = _Tp; - atomic() noexcept = default; - _LIBCUDACXX_INLINE_VISIBILITY - constexpr atomic(_Tp __d) noexcept : __base(__d) {} - - _LIBCUDACXX_INLINE_VISIBILITY - _Tp operator=(_Tp __d) volatile noexcept - {__base::store(__d); return __d;} - _LIBCUDACXX_INLINE_VISIBILITY - _Tp operator=(_Tp __d) noexcept - {__base::store(__d); return __d;} + atomic() noexcept = default; + _LIBCUDACXX_INLINE_VISIBILITY constexpr atomic(_Tp __d) noexcept + : __base(__d) + {} + + _LIBCUDACXX_INLINE_VISIBILITY _Tp operator=(_Tp __d) volatile noexcept + { + __base::store(__d); + return __d; + } + _LIBCUDACXX_INLINE_VISIBILITY _Tp operator=(_Tp __d) noexcept + { + __base::store(__d); + return __d; + } }; // atomic template -struct atomic<_Tp*> - : public __atomic_base<_Tp*> +struct atomic<_Tp*> : public __atomic_base<_Tp*> { - typedef __atomic_base<_Tp*> __base; - using value_type = _Tp*; + typedef __atomic_base<_Tp*> __base; + using value_type = _Tp*; - atomic() noexcept = default; - _LIBCUDACXX_INLINE_VISIBILITY - constexpr atomic(_Tp* __d) noexcept : __base(__d) {} - - _LIBCUDACXX_INLINE_VISIBILITY - _Tp* operator=(_Tp* __d) volatile noexcept - {__base::store(__d); return __d;} - _LIBCUDACXX_INLINE_VISIBILITY - _Tp* operator=(_Tp* __d) noexcept - {__base::store(__d); return __d;} - - _LIBCUDACXX_INLINE_VISIBILITY - _Tp* fetch_add(ptrdiff_t __op, memory_order __m = memory_order_seq_cst) - volatile noexcept - {return __cxx_atomic_fetch_add(&this->__a_, __op, __m);} - _LIBCUDACXX_INLINE_VISIBILITY - _Tp* fetch_add(ptrdiff_t __op, memory_order __m = memory_order_seq_cst) - noexcept - {return __cxx_atomic_fetch_add(&this->__a_, __op, __m);} - _LIBCUDACXX_INLINE_VISIBILITY - _Tp* fetch_sub(ptrdiff_t __op, memory_order __m = memory_order_seq_cst) - volatile noexcept - {return __cxx_atomic_fetch_sub(&this->__a_, __op, __m);} - _LIBCUDACXX_INLINE_VISIBILITY - _Tp* fetch_sub(ptrdiff_t __op, memory_order __m = memory_order_seq_cst) - noexcept - {return __cxx_atomic_fetch_sub(&this->__a_, __op, __m);} - - _LIBCUDACXX_INLINE_VISIBILITY - _Tp* operator++(int) volatile noexcept {return fetch_add(1);} - _LIBCUDACXX_INLINE_VISIBILITY - _Tp* operator++(int) noexcept {return fetch_add(1);} - _LIBCUDACXX_INLINE_VISIBILITY - _Tp* operator--(int) volatile noexcept {return fetch_sub(1);} - _LIBCUDACXX_INLINE_VISIBILITY - _Tp* operator--(int) noexcept {return fetch_sub(1);} - _LIBCUDACXX_INLINE_VISIBILITY - _Tp* operator++() volatile noexcept {return fetch_add(1) + 1;} - _LIBCUDACXX_INLINE_VISIBILITY - _Tp* operator++() noexcept {return fetch_add(1) + 1;} - _LIBCUDACXX_INLINE_VISIBILITY - _Tp* operator--() volatile noexcept {return fetch_sub(1) - 1;} - _LIBCUDACXX_INLINE_VISIBILITY - _Tp* operator--() noexcept {return fetch_sub(1) - 1;} - _LIBCUDACXX_INLINE_VISIBILITY - _Tp* operator+=(ptrdiff_t __op) volatile noexcept {return fetch_add(__op) + __op;} - _LIBCUDACXX_INLINE_VISIBILITY - _Tp* operator+=(ptrdiff_t __op) noexcept {return fetch_add(__op) + __op;} - _LIBCUDACXX_INLINE_VISIBILITY - _Tp* operator-=(ptrdiff_t __op) volatile noexcept {return fetch_sub(__op) - __op;} - _LIBCUDACXX_INLINE_VISIBILITY - _Tp* operator-=(ptrdiff_t __op) noexcept {return fetch_sub(__op) - __op;} + atomic() noexcept = default; + _LIBCUDACXX_INLINE_VISIBILITY constexpr atomic(_Tp* __d) noexcept + : __base(__d) + {} + + _LIBCUDACXX_INLINE_VISIBILITY _Tp* operator=(_Tp* __d) volatile noexcept + { + __base::store(__d); + return __d; + } + _LIBCUDACXX_INLINE_VISIBILITY _Tp* operator=(_Tp* __d) noexcept + { + __base::store(__d); + return __d; + } + + _LIBCUDACXX_INLINE_VISIBILITY _Tp* fetch_add(ptrdiff_t __op, memory_order __m = memory_order_seq_cst) volatile noexcept + { + return __cxx_atomic_fetch_add(&this->__a_, __op, __m); + } + _LIBCUDACXX_INLINE_VISIBILITY _Tp* fetch_add(ptrdiff_t __op, memory_order __m = memory_order_seq_cst) noexcept + { + return __cxx_atomic_fetch_add(&this->__a_, __op, __m); + } + _LIBCUDACXX_INLINE_VISIBILITY _Tp* fetch_sub(ptrdiff_t __op, memory_order __m = memory_order_seq_cst) volatile noexcept + { + return __cxx_atomic_fetch_sub(&this->__a_, __op, __m); + } + _LIBCUDACXX_INLINE_VISIBILITY _Tp* fetch_sub(ptrdiff_t __op, memory_order __m = memory_order_seq_cst) noexcept + { + return __cxx_atomic_fetch_sub(&this->__a_, __op, __m); + } + + _LIBCUDACXX_INLINE_VISIBILITY _Tp* operator++(int) volatile noexcept + { + return fetch_add(1); + } + _LIBCUDACXX_INLINE_VISIBILITY _Tp* operator++(int) noexcept + { + return fetch_add(1); + } + _LIBCUDACXX_INLINE_VISIBILITY _Tp* operator--(int) volatile noexcept + { + return fetch_sub(1); + } + _LIBCUDACXX_INLINE_VISIBILITY _Tp* operator--(int) noexcept + { + return fetch_sub(1); + } + _LIBCUDACXX_INLINE_VISIBILITY _Tp* operator++() volatile noexcept + { + return fetch_add(1) + 1; + } + _LIBCUDACXX_INLINE_VISIBILITY _Tp* operator++() noexcept + { + return fetch_add(1) + 1; + } + _LIBCUDACXX_INLINE_VISIBILITY _Tp* operator--() volatile noexcept + { + return fetch_sub(1) - 1; + } + _LIBCUDACXX_INLINE_VISIBILITY _Tp* operator--() noexcept + { + return fetch_sub(1) - 1; + } + _LIBCUDACXX_INLINE_VISIBILITY _Tp* operator+=(ptrdiff_t __op) volatile noexcept + { + return fetch_add(__op) + __op; + } + _LIBCUDACXX_INLINE_VISIBILITY _Tp* operator+=(ptrdiff_t __op) noexcept + { + return fetch_add(__op) + __op; + } + _LIBCUDACXX_INLINE_VISIBILITY _Tp* operator-=(ptrdiff_t __op) volatile noexcept + { + return fetch_sub(__op) - __op; + } + _LIBCUDACXX_INLINE_VISIBILITY _Tp* operator-=(ptrdiff_t __op) noexcept + { + return fetch_sub(__op) - __op; + } }; // atomic_ref template - struct atomic_ref - : public __atomic_base_ref<_Tp> +struct atomic_ref : public __atomic_base_ref<_Tp> { - typedef __atomic_base_ref<_Tp> __base; - using value_type = _Tp; + typedef __atomic_base_ref<_Tp> __base; + using value_type = _Tp; - static constexpr size_t required_alignment = sizeof(_Tp); + static constexpr size_t required_alignment = sizeof(_Tp); - static constexpr bool is_always_lock_free = sizeof(_Tp) <= 8; + static constexpr bool is_always_lock_free = sizeof(_Tp) <= 8; - _LIBCUDACXX_INLINE_VISIBILITY - explicit atomic_ref(_Tp& __ref) : __base(__ref) {} + _LIBCUDACXX_INLINE_VISIBILITY explicit atomic_ref(_Tp& __ref) + : __base(__ref) + {} - _LIBCUDACXX_INLINE_VISIBILITY - _Tp operator=(_Tp __v) const volatile noexcept {__base::store(__v); return __v;} + _LIBCUDACXX_INLINE_VISIBILITY _Tp operator=(_Tp __v) const volatile noexcept + { + __base::store(__v); + return __v; + } }; // atomic_ref template - struct atomic_ref<_Tp*> - : public __atomic_base_ref<_Tp*> +struct atomic_ref<_Tp*> : public __atomic_base_ref<_Tp*> { - typedef __atomic_base_ref<_Tp*> __base; - using value_type = _Tp*; + typedef __atomic_base_ref<_Tp*> __base; + using value_type = _Tp*; - static constexpr size_t required_alignment = sizeof(_Tp*); + static constexpr size_t required_alignment = sizeof(_Tp*); - static constexpr bool is_always_lock_free = sizeof(_Tp*) <= 8; + static constexpr bool is_always_lock_free = sizeof(_Tp*) <= 8; - _LIBCUDACXX_INLINE_VISIBILITY - explicit atomic_ref(_Tp*& __ref) : __base(__ref) {} + _LIBCUDACXX_INLINE_VISIBILITY explicit atomic_ref(_Tp*& __ref) + : __base(__ref) + {} - _LIBCUDACXX_INLINE_VISIBILITY - _Tp* operator=(_Tp* __v) const noexcept {__base::store(__v); return __v;} + _LIBCUDACXX_INLINE_VISIBILITY _Tp* operator=(_Tp* __v) const noexcept + { + __base::store(__v); + return __v; + } - _LIBCUDACXX_INLINE_VISIBILITY - _Tp* fetch_add(ptrdiff_t __op, memory_order __m = memory_order_seq_cst) - const noexcept - {return __cxx_atomic_fetch_add(&this->__a_, __op, __m);} - _LIBCUDACXX_INLINE_VISIBILITY - _Tp* fetch_sub(ptrdiff_t __op, memory_order __m = memory_order_seq_cst) - const noexcept - {return __cxx_atomic_fetch_sub(&this->__a_, __op, __m);} + _LIBCUDACXX_INLINE_VISIBILITY _Tp* fetch_add(ptrdiff_t __op, memory_order __m = memory_order_seq_cst) const noexcept + { + return __cxx_atomic_fetch_add(&this->__a_, __op, __m); + } + _LIBCUDACXX_INLINE_VISIBILITY _Tp* fetch_sub(ptrdiff_t __op, memory_order __m = memory_order_seq_cst) const noexcept + { + return __cxx_atomic_fetch_sub(&this->__a_, __op, __m); + } - _LIBCUDACXX_INLINE_VISIBILITY - _Tp* operator++(int) const noexcept {return fetch_add(1);} - _LIBCUDACXX_INLINE_VISIBILITY - _Tp* operator--(int) const noexcept {return fetch_sub(1);} - _LIBCUDACXX_INLINE_VISIBILITY - _Tp* operator++() const noexcept {return fetch_add(1) + 1;} - _LIBCUDACXX_INLINE_VISIBILITY - _Tp* operator--() const noexcept {return fetch_sub(1) - 1;} - _LIBCUDACXX_INLINE_VISIBILITY - _Tp* operator+=(ptrdiff_t __op) const noexcept {return fetch_add(__op) + __op;} - _LIBCUDACXX_INLINE_VISIBILITY - _Tp* operator-=(ptrdiff_t __op) const noexcept {return fetch_sub(__op) - __op;} + _LIBCUDACXX_INLINE_VISIBILITY _Tp* operator++(int) const noexcept + { + return fetch_add(1); + } + _LIBCUDACXX_INLINE_VISIBILITY _Tp* operator--(int) const noexcept + { + return fetch_sub(1); + } + _LIBCUDACXX_INLINE_VISIBILITY _Tp* operator++() const noexcept + { + return fetch_add(1) + 1; + } + _LIBCUDACXX_INLINE_VISIBILITY _Tp* operator--() const noexcept + { + return fetch_sub(1) - 1; + } + _LIBCUDACXX_INLINE_VISIBILITY _Tp* operator+=(ptrdiff_t __op) const noexcept + { + return fetch_add(__op) + __op; + } + _LIBCUDACXX_INLINE_VISIBILITY _Tp* operator-=(ptrdiff_t __op) const noexcept + { + return fetch_sub(__op) - __op; + } }; // atomic_is_lock_free template -_LIBCUDACXX_INLINE_VISIBILITY -bool -atomic_is_lock_free(const volatile atomic<_Tp>* __o) noexcept +_LIBCUDACXX_INLINE_VISIBILITY bool atomic_is_lock_free(const volatile atomic<_Tp>* __o) noexcept { - return __o->is_lock_free(); + return __o->is_lock_free(); } template -_LIBCUDACXX_INLINE_VISIBILITY -bool -atomic_is_lock_free(const atomic<_Tp>* __o) noexcept +_LIBCUDACXX_INLINE_VISIBILITY bool atomic_is_lock_free(const atomic<_Tp>* __o) noexcept { - return __o->is_lock_free(); + return __o->is_lock_free(); } // atomic_init template -_LIBCUDACXX_INLINE_VISIBILITY -void -atomic_init(volatile atomic<_Tp>* __o, _Tp __d) noexcept +_LIBCUDACXX_INLINE_VISIBILITY void atomic_init(volatile atomic<_Tp>* __o, _Tp __d) noexcept { - __cxx_atomic_init(&__o->__a_, __d); + __cxx_atomic_init(&__o->__a_, __d); } template -_LIBCUDACXX_INLINE_VISIBILITY -void -atomic_init(atomic<_Tp>* __o, _Tp __d) noexcept +_LIBCUDACXX_INLINE_VISIBILITY void atomic_init(atomic<_Tp>* __o, _Tp __d) noexcept { - __cxx_atomic_init(&__o->__a_, __d); + __cxx_atomic_init(&__o->__a_, __d); } // atomic_store template -_LIBCUDACXX_INLINE_VISIBILITY -void -atomic_store(volatile atomic<_Tp>* __o, _Tp __d) noexcept +_LIBCUDACXX_INLINE_VISIBILITY void atomic_store(volatile atomic<_Tp>* __o, _Tp __d) noexcept { - __o->store(__d); + __o->store(__d); } template -_LIBCUDACXX_INLINE_VISIBILITY -void -atomic_store(atomic<_Tp>* __o, _Tp __d) noexcept +_LIBCUDACXX_INLINE_VISIBILITY void atomic_store(atomic<_Tp>* __o, _Tp __d) noexcept { - __o->store(__d); + __o->store(__d); } // atomic_store_explicit template -_LIBCUDACXX_INLINE_VISIBILITY -void -atomic_store_explicit(volatile atomic<_Tp>* __o, _Tp __d, memory_order __m) noexcept +_LIBCUDACXX_INLINE_VISIBILITY void atomic_store_explicit(volatile atomic<_Tp>* __o, _Tp __d, memory_order __m) noexcept _LIBCUDACXX_CHECK_STORE_MEMORY_ORDER(__m) { - __o->store(__d, __m); + __o->store(__d, __m); } template -_LIBCUDACXX_INLINE_VISIBILITY -void -atomic_store_explicit(atomic<_Tp>* __o, _Tp __d, memory_order __m) noexcept +_LIBCUDACXX_INLINE_VISIBILITY void atomic_store_explicit(atomic<_Tp>* __o, _Tp __d, memory_order __m) noexcept _LIBCUDACXX_CHECK_STORE_MEMORY_ORDER(__m) { - __o->store(__d, __m); + __o->store(__d, __m); } // atomic_load template -_LIBCUDACXX_INLINE_VISIBILITY -_Tp -atomic_load(const volatile atomic<_Tp>* __o) noexcept +_LIBCUDACXX_INLINE_VISIBILITY _Tp atomic_load(const volatile atomic<_Tp>* __o) noexcept { - return __o->load(); + return __o->load(); } template -_LIBCUDACXX_INLINE_VISIBILITY -_Tp -atomic_load(const atomic<_Tp>* __o) noexcept +_LIBCUDACXX_INLINE_VISIBILITY _Tp atomic_load(const atomic<_Tp>* __o) noexcept { - return __o->load(); + return __o->load(); } // atomic_load_explicit template -_LIBCUDACXX_INLINE_VISIBILITY -_Tp -atomic_load_explicit(const volatile atomic<_Tp>* __o, memory_order __m) noexcept +_LIBCUDACXX_INLINE_VISIBILITY _Tp atomic_load_explicit(const volatile atomic<_Tp>* __o, memory_order __m) noexcept _LIBCUDACXX_CHECK_LOAD_MEMORY_ORDER(__m) { - return __o->load(__m); + return __o->load(__m); } template -_LIBCUDACXX_INLINE_VISIBILITY -_Tp -atomic_load_explicit(const atomic<_Tp>* __o, memory_order __m) noexcept +_LIBCUDACXX_INLINE_VISIBILITY _Tp atomic_load_explicit(const atomic<_Tp>* __o, memory_order __m) noexcept _LIBCUDACXX_CHECK_LOAD_MEMORY_ORDER(__m) { - return __o->load(__m); + return __o->load(__m); } // atomic_exchange template -_LIBCUDACXX_INLINE_VISIBILITY -_Tp -atomic_exchange(volatile atomic<_Tp>* __o, _Tp __d) noexcept +_LIBCUDACXX_INLINE_VISIBILITY _Tp atomic_exchange(volatile atomic<_Tp>* __o, _Tp __d) noexcept { - return __o->exchange(__d); + return __o->exchange(__d); } template -_LIBCUDACXX_INLINE_VISIBILITY -_Tp -atomic_exchange(atomic<_Tp>* __o, _Tp __d) noexcept +_LIBCUDACXX_INLINE_VISIBILITY _Tp atomic_exchange(atomic<_Tp>* __o, _Tp __d) noexcept { - return __o->exchange(__d); + return __o->exchange(__d); } // atomic_exchange_explicit template -_LIBCUDACXX_INLINE_VISIBILITY -_Tp -atomic_exchange_explicit(volatile atomic<_Tp>* __o, _Tp __d, memory_order __m) noexcept +_LIBCUDACXX_INLINE_VISIBILITY _Tp atomic_exchange_explicit(volatile atomic<_Tp>* __o, _Tp __d, memory_order __m) noexcept { - return __o->exchange(__d, __m); + return __o->exchange(__d, __m); } template -_LIBCUDACXX_INLINE_VISIBILITY -_Tp -atomic_exchange_explicit(atomic<_Tp>* __o, _Tp __d, memory_order __m) noexcept +_LIBCUDACXX_INLINE_VISIBILITY _Tp atomic_exchange_explicit(atomic<_Tp>* __o, _Tp __d, memory_order __m) noexcept { - return __o->exchange(__d, __m); + return __o->exchange(__d, __m); } // atomic_compare_exchange_weak template -_LIBCUDACXX_INLINE_VISIBILITY -bool -atomic_compare_exchange_weak(volatile atomic<_Tp>* __o, _Tp* __e, _Tp __d) noexcept +_LIBCUDACXX_INLINE_VISIBILITY bool atomic_compare_exchange_weak(volatile atomic<_Tp>* __o, _Tp* __e, _Tp __d) noexcept { - return __o->compare_exchange_weak(*__e, __d); + return __o->compare_exchange_weak(*__e, __d); } template -_LIBCUDACXX_INLINE_VISIBILITY -bool -atomic_compare_exchange_weak(atomic<_Tp>* __o, _Tp* __e, _Tp __d) noexcept +_LIBCUDACXX_INLINE_VISIBILITY bool atomic_compare_exchange_weak(atomic<_Tp>* __o, _Tp* __e, _Tp __d) noexcept { - return __o->compare_exchange_weak(*__e, __d); + return __o->compare_exchange_weak(*__e, __d); } // atomic_compare_exchange_strong template -_LIBCUDACXX_INLINE_VISIBILITY -bool -atomic_compare_exchange_strong(volatile atomic<_Tp>* __o, _Tp* __e, _Tp __d) noexcept +_LIBCUDACXX_INLINE_VISIBILITY bool atomic_compare_exchange_strong(volatile atomic<_Tp>* __o, _Tp* __e, _Tp __d) noexcept { - return __o->compare_exchange_strong(*__e, __d); + return __o->compare_exchange_strong(*__e, __d); } template -_LIBCUDACXX_INLINE_VISIBILITY -bool -atomic_compare_exchange_strong(atomic<_Tp>* __o, _Tp* __e, _Tp __d) noexcept +_LIBCUDACXX_INLINE_VISIBILITY bool atomic_compare_exchange_strong(atomic<_Tp>* __o, _Tp* __e, _Tp __d) noexcept { - return __o->compare_exchange_strong(*__e, __d); + return __o->compare_exchange_strong(*__e, __d); } // atomic_compare_exchange_weak_explicit template -_LIBCUDACXX_INLINE_VISIBILITY -bool -atomic_compare_exchange_weak_explicit(volatile atomic<_Tp>* __o, _Tp* __e, - _Tp __d, - memory_order __s, memory_order __f) noexcept +_LIBCUDACXX_INLINE_VISIBILITY bool atomic_compare_exchange_weak_explicit( + volatile atomic<_Tp>* __o, _Tp* __e, _Tp __d, memory_order __s, memory_order __f) noexcept _LIBCUDACXX_CHECK_EXCHANGE_MEMORY_ORDER(__s, __f) { - return __o->compare_exchange_weak(*__e, __d, __s, __f); + return __o->compare_exchange_weak(*__e, __d, __s, __f); } template -_LIBCUDACXX_INLINE_VISIBILITY -bool -atomic_compare_exchange_weak_explicit(atomic<_Tp>* __o, _Tp* __e, _Tp __d, - memory_order __s, memory_order __f) noexcept +_LIBCUDACXX_INLINE_VISIBILITY bool +atomic_compare_exchange_weak_explicit(atomic<_Tp>* __o, _Tp* __e, _Tp __d, memory_order __s, memory_order __f) noexcept _LIBCUDACXX_CHECK_EXCHANGE_MEMORY_ORDER(__s, __f) { - return __o->compare_exchange_weak(*__e, __d, __s, __f); + return __o->compare_exchange_weak(*__e, __d, __s, __f); } // atomic_compare_exchange_strong_explicit template -_LIBCUDACXX_INLINE_VISIBILITY -bool -atomic_compare_exchange_strong_explicit(volatile atomic<_Tp>* __o, - _Tp* __e, _Tp __d, - memory_order __s, memory_order __f) noexcept +_LIBCUDACXX_INLINE_VISIBILITY bool atomic_compare_exchange_strong_explicit( + volatile atomic<_Tp>* __o, _Tp* __e, _Tp __d, memory_order __s, memory_order __f) noexcept _LIBCUDACXX_CHECK_EXCHANGE_MEMORY_ORDER(__s, __f) { - return __o->compare_exchange_strong(*__e, __d, __s, __f); + return __o->compare_exchange_strong(*__e, __d, __s, __f); } template -_LIBCUDACXX_INLINE_VISIBILITY -bool -atomic_compare_exchange_strong_explicit(atomic<_Tp>* __o, _Tp* __e, - _Tp __d, - memory_order __s, memory_order __f) noexcept +_LIBCUDACXX_INLINE_VISIBILITY bool atomic_compare_exchange_strong_explicit( + atomic<_Tp>* __o, _Tp* __e, _Tp __d, memory_order __s, memory_order __f) noexcept _LIBCUDACXX_CHECK_EXCHANGE_MEMORY_ORDER(__s, __f) { - return __o->compare_exchange_strong(*__e, __d, __s, __f); + return __o->compare_exchange_strong(*__e, __d, __s, __f); } // atomic_wait template -_LIBCUDACXX_INLINE_VISIBILITY -void atomic_wait(const volatile atomic<_Tp>* __o, - typename atomic<_Tp>::value_type __v) noexcept +_LIBCUDACXX_INLINE_VISIBILITY void +atomic_wait(const volatile atomic<_Tp>* __o, typename atomic<_Tp>::value_type __v) noexcept { - return __o->wait(__v); + return __o->wait(__v); } template -_LIBCUDACXX_INLINE_VISIBILITY -void atomic_wait(const atomic<_Tp>* __o, - typename atomic<_Tp>::value_type __v) noexcept +_LIBCUDACXX_INLINE_VISIBILITY void atomic_wait(const atomic<_Tp>* __o, typename atomic<_Tp>::value_type __v) noexcept { - return __o->wait(__v); + return __o->wait(__v); } // atomic_wait_explicit template -_LIBCUDACXX_INLINE_VISIBILITY -void atomic_wait_explicit(const volatile atomic<_Tp>* __o, - typename atomic<_Tp>::value_type __v, - memory_order __m) noexcept +_LIBCUDACXX_INLINE_VISIBILITY void +atomic_wait_explicit(const volatile atomic<_Tp>* __o, typename atomic<_Tp>::value_type __v, memory_order __m) noexcept _LIBCUDACXX_CHECK_LOAD_MEMORY_ORDER(__m) { - return __o->wait(__v, __m); + return __o->wait(__v, __m); } template -_LIBCUDACXX_INLINE_VISIBILITY -void atomic_wait_explicit(const atomic<_Tp>* __o, - typename atomic<_Tp>::value_type __v, - memory_order __m) noexcept +_LIBCUDACXX_INLINE_VISIBILITY void +atomic_wait_explicit(const atomic<_Tp>* __o, typename atomic<_Tp>::value_type __v, memory_order __m) noexcept _LIBCUDACXX_CHECK_LOAD_MEMORY_ORDER(__m) { - return __o->wait(__v, __m); + return __o->wait(__v, __m); } // atomic_notify_one template -_LIBCUDACXX_INLINE_VISIBILITY -void atomic_notify_one(volatile atomic<_Tp>* __o) noexcept +_LIBCUDACXX_INLINE_VISIBILITY void atomic_notify_one(volatile atomic<_Tp>* __o) noexcept { - __o->notify_one(); + __o->notify_one(); } template -_LIBCUDACXX_INLINE_VISIBILITY -void atomic_notify_one(atomic<_Tp>* __o) noexcept +_LIBCUDACXX_INLINE_VISIBILITY void atomic_notify_one(atomic<_Tp>* __o) noexcept { - __o->notify_one(); + __o->notify_one(); } // atomic_notify_one template -_LIBCUDACXX_INLINE_VISIBILITY -void atomic_notify_all(volatile atomic<_Tp>* __o) noexcept +_LIBCUDACXX_INLINE_VISIBILITY void atomic_notify_all(volatile atomic<_Tp>* __o) noexcept { - __o->notify_all(); + __o->notify_all(); } template -_LIBCUDACXX_INLINE_VISIBILITY -void atomic_notify_all(atomic<_Tp>* __o) noexcept +_LIBCUDACXX_INLINE_VISIBILITY void atomic_notify_all(atomic<_Tp>* __o) noexcept { - __o->notify_all(); + __o->notify_all(); } // atomic_fetch_add template _LIBCUDACXX_INLINE_VISIBILITY -__enable_if_t -< - (is_integral<_Tp>::value && !is_same<_Tp, bool>::value) || is_floating_point<_Tp>::value, - _Tp -> -atomic_fetch_add(volatile atomic<_Tp>* __o, _Tp __op) noexcept + __enable_if_t<(is_integral<_Tp>::value && !is_same<_Tp, bool>::value) || is_floating_point<_Tp>::value, _Tp> + atomic_fetch_add(volatile atomic<_Tp>* __o, _Tp __op) noexcept { - return __o->fetch_add(__op); + return __o->fetch_add(__op); } template _LIBCUDACXX_INLINE_VISIBILITY -__enable_if_t -< - (is_integral<_Tp>::value && !is_same<_Tp, bool>::value) || is_floating_point<_Tp>::value, - _Tp -> -atomic_fetch_add(atomic<_Tp>* __o, _Tp __op) noexcept + __enable_if_t<(is_integral<_Tp>::value && !is_same<_Tp, bool>::value) || is_floating_point<_Tp>::value, _Tp> + atomic_fetch_add(atomic<_Tp>* __o, _Tp __op) noexcept { - return __o->fetch_add(__op); + return __o->fetch_add(__op); } template -_LIBCUDACXX_INLINE_VISIBILITY -_Tp* -atomic_fetch_add(volatile atomic<_Tp*>* __o, ptrdiff_t __op) noexcept +_LIBCUDACXX_INLINE_VISIBILITY _Tp* atomic_fetch_add(volatile atomic<_Tp*>* __o, ptrdiff_t __op) noexcept { - return __o->fetch_add(__op); + return __o->fetch_add(__op); } template -_LIBCUDACXX_INLINE_VISIBILITY -_Tp* -atomic_fetch_add(atomic<_Tp*>* __o, ptrdiff_t __op) noexcept +_LIBCUDACXX_INLINE_VISIBILITY _Tp* atomic_fetch_add(atomic<_Tp*>* __o, ptrdiff_t __op) noexcept { - return __o->fetch_add(__op); + return __o->fetch_add(__op); } // atomic_fetch_add_explicit template _LIBCUDACXX_INLINE_VISIBILITY -__enable_if_t -< - (is_integral<_Tp>::value && !is_same<_Tp, bool>::value) || is_floating_point<_Tp>::value, - _Tp -> -atomic_fetch_add_explicit(volatile atomic<_Tp>* __o, _Tp __op, memory_order __m) noexcept + __enable_if_t<(is_integral<_Tp>::value && !is_same<_Tp, bool>::value) || is_floating_point<_Tp>::value, _Tp> + atomic_fetch_add_explicit(volatile atomic<_Tp>* __o, _Tp __op, memory_order __m) noexcept { - return __o->fetch_add(__op, __m); + return __o->fetch_add(__op, __m); } template _LIBCUDACXX_INLINE_VISIBILITY -__enable_if_t -< - (is_integral<_Tp>::value && !is_same<_Tp, bool>::value) || is_floating_point<_Tp>::value, - _Tp -> -atomic_fetch_add_explicit(atomic<_Tp>* __o, _Tp __op, memory_order __m) noexcept + __enable_if_t<(is_integral<_Tp>::value && !is_same<_Tp, bool>::value) || is_floating_point<_Tp>::value, _Tp> + atomic_fetch_add_explicit(atomic<_Tp>* __o, _Tp __op, memory_order __m) noexcept { - return __o->fetch_add(__op, __m); + return __o->fetch_add(__op, __m); } template -_LIBCUDACXX_INLINE_VISIBILITY -_Tp* -atomic_fetch_add_explicit(volatile atomic<_Tp*>* __o, ptrdiff_t __op, - memory_order __m) noexcept +_LIBCUDACXX_INLINE_VISIBILITY _Tp* +atomic_fetch_add_explicit(volatile atomic<_Tp*>* __o, ptrdiff_t __op, memory_order __m) noexcept { - return __o->fetch_add(__op, __m); + return __o->fetch_add(__op, __m); } template -_LIBCUDACXX_INLINE_VISIBILITY -_Tp* +_LIBCUDACXX_INLINE_VISIBILITY _Tp* atomic_fetch_add_explicit(atomic<_Tp*>* __o, ptrdiff_t __op, memory_order __m) noexcept { - return __o->fetch_add(__op, __m); + return __o->fetch_add(__op, __m); } // atomic_fetch_sub template _LIBCUDACXX_INLINE_VISIBILITY -__enable_if_t -< - (is_integral<_Tp>::value && !is_same<_Tp, bool>::value) || is_floating_point<_Tp>::value, - _Tp -> -atomic_fetch_sub(volatile atomic<_Tp>* __o, _Tp __op) noexcept + __enable_if_t<(is_integral<_Tp>::value && !is_same<_Tp, bool>::value) || is_floating_point<_Tp>::value, _Tp> + atomic_fetch_sub(volatile atomic<_Tp>* __o, _Tp __op) noexcept { - return __o->fetch_sub(__op); + return __o->fetch_sub(__op); } template _LIBCUDACXX_INLINE_VISIBILITY -__enable_if_t -< - (is_integral<_Tp>::value && !is_same<_Tp, bool>::value) || is_floating_point<_Tp>::value, - _Tp -> -atomic_fetch_sub(atomic<_Tp>* __o, _Tp __op) noexcept + __enable_if_t<(is_integral<_Tp>::value && !is_same<_Tp, bool>::value) || is_floating_point<_Tp>::value, _Tp> + atomic_fetch_sub(atomic<_Tp>* __o, _Tp __op) noexcept { - return __o->fetch_sub(__op); + return __o->fetch_sub(__op); } template -_LIBCUDACXX_INLINE_VISIBILITY -_Tp* -atomic_fetch_sub(volatile atomic<_Tp*>* __o, ptrdiff_t __op) noexcept +_LIBCUDACXX_INLINE_VISIBILITY _Tp* atomic_fetch_sub(volatile atomic<_Tp*>* __o, ptrdiff_t __op) noexcept { - return __o->fetch_sub(__op); + return __o->fetch_sub(__op); } template -_LIBCUDACXX_INLINE_VISIBILITY -_Tp* -atomic_fetch_sub(atomic<_Tp*>* __o, ptrdiff_t __op) noexcept +_LIBCUDACXX_INLINE_VISIBILITY _Tp* atomic_fetch_sub(atomic<_Tp*>* __o, ptrdiff_t __op) noexcept { - return __o->fetch_sub(__op); + return __o->fetch_sub(__op); } // atomic_fetch_sub_explicit template _LIBCUDACXX_INLINE_VISIBILITY -__enable_if_t -< - (is_integral<_Tp>::value && !is_same<_Tp, bool>::value) || is_floating_point<_Tp>::value, - _Tp -> -atomic_fetch_sub_explicit(volatile atomic<_Tp>* __o, _Tp __op, memory_order __m) noexcept + __enable_if_t<(is_integral<_Tp>::value && !is_same<_Tp, bool>::value) || is_floating_point<_Tp>::value, _Tp> + atomic_fetch_sub_explicit(volatile atomic<_Tp>* __o, _Tp __op, memory_order __m) noexcept { - return __o->fetch_sub(__op, __m); + return __o->fetch_sub(__op, __m); } template _LIBCUDACXX_INLINE_VISIBILITY -__enable_if_t -< - (is_integral<_Tp>::value && !is_same<_Tp, bool>::value) || is_floating_point<_Tp>::value, - _Tp -> -atomic_fetch_sub_explicit(atomic<_Tp>* __o, _Tp __op, memory_order __m) noexcept + __enable_if_t<(is_integral<_Tp>::value && !is_same<_Tp, bool>::value) || is_floating_point<_Tp>::value, _Tp> + atomic_fetch_sub_explicit(atomic<_Tp>* __o, _Tp __op, memory_order __m) noexcept { - return __o->fetch_sub(__op, __m); + return __o->fetch_sub(__op, __m); } template -_LIBCUDACXX_INLINE_VISIBILITY -_Tp* -atomic_fetch_sub_explicit(volatile atomic<_Tp*>* __o, ptrdiff_t __op, - memory_order __m) noexcept +_LIBCUDACXX_INLINE_VISIBILITY _Tp* +atomic_fetch_sub_explicit(volatile atomic<_Tp*>* __o, ptrdiff_t __op, memory_order __m) noexcept { - return __o->fetch_sub(__op, __m); + return __o->fetch_sub(__op, __m); } template -_LIBCUDACXX_INLINE_VISIBILITY -_Tp* +_LIBCUDACXX_INLINE_VISIBILITY _Tp* atomic_fetch_sub_explicit(atomic<_Tp*>* __o, ptrdiff_t __op, memory_order __m) noexcept { - return __o->fetch_sub(__op, __m); + return __o->fetch_sub(__op, __m); } // atomic_fetch_and template -_LIBCUDACXX_INLINE_VISIBILITY -__enable_if_t -< - is_integral<_Tp>::value && !is_same<_Tp, bool>::value, - _Tp -> +_LIBCUDACXX_INLINE_VISIBILITY __enable_if_t::value && !is_same<_Tp, bool>::value, _Tp> atomic_fetch_and(volatile atomic<_Tp>* __o, _Tp __op) noexcept { - return __o->fetch_and(__op); + return __o->fetch_and(__op); } template -_LIBCUDACXX_INLINE_VISIBILITY -__enable_if_t -< - is_integral<_Tp>::value && !is_same<_Tp, bool>::value, - _Tp -> +_LIBCUDACXX_INLINE_VISIBILITY __enable_if_t::value && !is_same<_Tp, bool>::value, _Tp> atomic_fetch_and(atomic<_Tp>* __o, _Tp __op) noexcept { - return __o->fetch_and(__op); + return __o->fetch_and(__op); } // atomic_fetch_and_explicit template -_LIBCUDACXX_INLINE_VISIBILITY -__enable_if_t -< - is_integral<_Tp>::value && !is_same<_Tp, bool>::value, - _Tp -> +_LIBCUDACXX_INLINE_VISIBILITY __enable_if_t::value && !is_same<_Tp, bool>::value, _Tp> atomic_fetch_and_explicit(volatile atomic<_Tp>* __o, _Tp __op, memory_order __m) noexcept { - return __o->fetch_and(__op, __m); + return __o->fetch_and(__op, __m); } template -_LIBCUDACXX_INLINE_VISIBILITY -__enable_if_t -< - is_integral<_Tp>::value && !is_same<_Tp, bool>::value, - _Tp -> +_LIBCUDACXX_INLINE_VISIBILITY __enable_if_t::value && !is_same<_Tp, bool>::value, _Tp> atomic_fetch_and_explicit(atomic<_Tp>* __o, _Tp __op, memory_order __m) noexcept { - return __o->fetch_and(__op, __m); + return __o->fetch_and(__op, __m); } // atomic_fetch_or template -_LIBCUDACXX_INLINE_VISIBILITY -__enable_if_t -< - is_integral<_Tp>::value && !is_same<_Tp, bool>::value, - _Tp -> +_LIBCUDACXX_INLINE_VISIBILITY __enable_if_t::value && !is_same<_Tp, bool>::value, _Tp> atomic_fetch_or(volatile atomic<_Tp>* __o, _Tp __op) noexcept { - return __o->fetch_or(__op); + return __o->fetch_or(__op); } template -_LIBCUDACXX_INLINE_VISIBILITY -__enable_if_t -< - is_integral<_Tp>::value && !is_same<_Tp, bool>::value, - _Tp -> +_LIBCUDACXX_INLINE_VISIBILITY __enable_if_t::value && !is_same<_Tp, bool>::value, _Tp> atomic_fetch_or(atomic<_Tp>* __o, _Tp __op) noexcept { - return __o->fetch_or(__op); + return __o->fetch_or(__op); } // atomic_fetch_or_explicit template -_LIBCUDACXX_INLINE_VISIBILITY -__enable_if_t -< - is_integral<_Tp>::value && !is_same<_Tp, bool>::value, - _Tp -> +_LIBCUDACXX_INLINE_VISIBILITY __enable_if_t::value && !is_same<_Tp, bool>::value, _Tp> atomic_fetch_or_explicit(volatile atomic<_Tp>* __o, _Tp __op, memory_order __m) noexcept { - return __o->fetch_or(__op, __m); + return __o->fetch_or(__op, __m); } template -_LIBCUDACXX_INLINE_VISIBILITY -__enable_if_t -< - is_integral<_Tp>::value && !is_same<_Tp, bool>::value, - _Tp -> +_LIBCUDACXX_INLINE_VISIBILITY __enable_if_t::value && !is_same<_Tp, bool>::value, _Tp> atomic_fetch_or_explicit(atomic<_Tp>* __o, _Tp __op, memory_order __m) noexcept { - return __o->fetch_or(__op, __m); + return __o->fetch_or(__op, __m); } // atomic_fetch_xor template -_LIBCUDACXX_INLINE_VISIBILITY -__enable_if_t -< - is_integral<_Tp>::value && !is_same<_Tp, bool>::value, - _Tp -> +_LIBCUDACXX_INLINE_VISIBILITY __enable_if_t::value && !is_same<_Tp, bool>::value, _Tp> atomic_fetch_xor(volatile atomic<_Tp>* __o, _Tp __op) noexcept { - return __o->fetch_xor(__op); + return __o->fetch_xor(__op); } template -_LIBCUDACXX_INLINE_VISIBILITY -__enable_if_t -< - is_integral<_Tp>::value && !is_same<_Tp, bool>::value, - _Tp -> +_LIBCUDACXX_INLINE_VISIBILITY __enable_if_t::value && !is_same<_Tp, bool>::value, _Tp> atomic_fetch_xor(atomic<_Tp>* __o, _Tp __op) noexcept { - return __o->fetch_xor(__op); + return __o->fetch_xor(__op); } // atomic_fetch_xor_explicit template -_LIBCUDACXX_INLINE_VISIBILITY -__enable_if_t -< - is_integral<_Tp>::value && !is_same<_Tp, bool>::value, - _Tp -> +_LIBCUDACXX_INLINE_VISIBILITY __enable_if_t::value && !is_same<_Tp, bool>::value, _Tp> atomic_fetch_xor_explicit(volatile atomic<_Tp>* __o, _Tp __op, memory_order __m) noexcept { - return __o->fetch_xor(__op, __m); + return __o->fetch_xor(__op, __m); } template -_LIBCUDACXX_INLINE_VISIBILITY -__enable_if_t -< - is_integral<_Tp>::value && !is_same<_Tp, bool>::value, - _Tp -> +_LIBCUDACXX_INLINE_VISIBILITY __enable_if_t::value && !is_same<_Tp, bool>::value, _Tp> atomic_fetch_xor_explicit(atomic<_Tp>* __o, _Tp __op, memory_order __m) noexcept { - return __o->fetch_xor(__op, __m); + return __o->fetch_xor(__op, __m); } // flag type and operations typedef struct atomic_flag { - __cxx_atomic_impl<_LIBCUDACXX_ATOMIC_FLAG_TYPE, 0> __a_; - - _LIBCUDACXX_INLINE_VISIBILITY - bool test(memory_order __m = memory_order_seq_cst) const volatile noexcept - {return _LIBCUDACXX_ATOMIC_FLAG_TYPE(true)==__cxx_atomic_load(&__a_, __m);} - _LIBCUDACXX_INLINE_VISIBILITY - bool test(memory_order __m = memory_order_seq_cst) const noexcept - {return _LIBCUDACXX_ATOMIC_FLAG_TYPE(true)==__cxx_atomic_load(&__a_, __m);} - - _LIBCUDACXX_INLINE_VISIBILITY - bool test_and_set(memory_order __m = memory_order_seq_cst) volatile noexcept - {return __cxx_atomic_exchange(&__a_, _LIBCUDACXX_ATOMIC_FLAG_TYPE(true), __m);} - _LIBCUDACXX_INLINE_VISIBILITY - bool test_and_set(memory_order __m = memory_order_seq_cst) noexcept - {return __cxx_atomic_exchange(&__a_, _LIBCUDACXX_ATOMIC_FLAG_TYPE(true), __m);} - _LIBCUDACXX_INLINE_VISIBILITY - void clear(memory_order __m = memory_order_seq_cst) volatile noexcept - {__cxx_atomic_store(&__a_, _LIBCUDACXX_ATOMIC_FLAG_TYPE(false), __m);} - _LIBCUDACXX_INLINE_VISIBILITY - void clear(memory_order __m = memory_order_seq_cst) noexcept - {__cxx_atomic_store(&__a_, _LIBCUDACXX_ATOMIC_FLAG_TYPE(false), __m);} + __cxx_atomic_impl<_LIBCUDACXX_ATOMIC_FLAG_TYPE, 0> __a_; + + _LIBCUDACXX_INLINE_VISIBILITY bool test(memory_order __m = memory_order_seq_cst) const volatile noexcept + { + return _LIBCUDACXX_ATOMIC_FLAG_TYPE(true) == __cxx_atomic_load(&__a_, __m); + } + _LIBCUDACXX_INLINE_VISIBILITY bool test(memory_order __m = memory_order_seq_cst) const noexcept + { + return _LIBCUDACXX_ATOMIC_FLAG_TYPE(true) == __cxx_atomic_load(&__a_, __m); + } + + _LIBCUDACXX_INLINE_VISIBILITY bool test_and_set(memory_order __m = memory_order_seq_cst) volatile noexcept + { + return __cxx_atomic_exchange(&__a_, _LIBCUDACXX_ATOMIC_FLAG_TYPE(true), __m); + } + _LIBCUDACXX_INLINE_VISIBILITY bool test_and_set(memory_order __m = memory_order_seq_cst) noexcept + { + return __cxx_atomic_exchange(&__a_, _LIBCUDACXX_ATOMIC_FLAG_TYPE(true), __m); + } + _LIBCUDACXX_INLINE_VISIBILITY void clear(memory_order __m = memory_order_seq_cst) volatile noexcept + { + __cxx_atomic_store(&__a_, _LIBCUDACXX_ATOMIC_FLAG_TYPE(false), __m); + } + _LIBCUDACXX_INLINE_VISIBILITY void clear(memory_order __m = memory_order_seq_cst) noexcept + { + __cxx_atomic_store(&__a_, _LIBCUDACXX_ATOMIC_FLAG_TYPE(false), __m); + } #if !defined(__CUDA_MINIMUM_ARCH__) || __CUDA_MINIMUM_ARCH__ >= 700 - _LIBCUDACXX_INLINE_VISIBILITY - void wait(bool __v, memory_order __m = memory_order_seq_cst) const volatile noexcept - {__cxx_atomic_wait(&__a_, _LIBCUDACXX_ATOMIC_FLAG_TYPE(__v), __m);} - _LIBCUDACXX_INLINE_VISIBILITY - void wait(bool __v, memory_order __m = memory_order_seq_cst) const noexcept - {__cxx_atomic_wait(&__a_, _LIBCUDACXX_ATOMIC_FLAG_TYPE(__v), __m);} - _LIBCUDACXX_INLINE_VISIBILITY - void notify_one() volatile noexcept - {__cxx_atomic_notify_one(&__a_);} - _LIBCUDACXX_INLINE_VISIBILITY - void notify_one() noexcept - {__cxx_atomic_notify_one(&__a_);} - _LIBCUDACXX_INLINE_VISIBILITY - void notify_all() volatile noexcept - {__cxx_atomic_notify_all(&__a_);} - _LIBCUDACXX_INLINE_VISIBILITY - void notify_all() noexcept - {__cxx_atomic_notify_all(&__a_);} + _LIBCUDACXX_INLINE_VISIBILITY void wait(bool __v, memory_order __m = memory_order_seq_cst) const volatile noexcept + { + __cxx_atomic_wait(&__a_, _LIBCUDACXX_ATOMIC_FLAG_TYPE(__v), __m); + } + _LIBCUDACXX_INLINE_VISIBILITY void wait(bool __v, memory_order __m = memory_order_seq_cst) const noexcept + { + __cxx_atomic_wait(&__a_, _LIBCUDACXX_ATOMIC_FLAG_TYPE(__v), __m); + } + _LIBCUDACXX_INLINE_VISIBILITY void notify_one() volatile noexcept + { + __cxx_atomic_notify_one(&__a_); + } + _LIBCUDACXX_INLINE_VISIBILITY void notify_one() noexcept + { + __cxx_atomic_notify_one(&__a_); + } + _LIBCUDACXX_INLINE_VISIBILITY void notify_all() volatile noexcept + { + __cxx_atomic_notify_all(&__a_); + } + _LIBCUDACXX_INLINE_VISIBILITY void notify_all() noexcept + { + __cxx_atomic_notify_all(&__a_); + } #endif - atomic_flag() noexcept = default; + atomic_flag() noexcept = default; - _LIBCUDACXX_INLINE_VISIBILITY constexpr - atomic_flag(bool __b) noexcept : __a_(__b) {} // EXTENSION + _LIBCUDACXX_INLINE_VISIBILITY constexpr atomic_flag(bool __b) noexcept + : __a_(__b) + {} // EXTENSION - atomic_flag(const atomic_flag&) = delete; - atomic_flag& operator=(const atomic_flag&) = delete; - atomic_flag& operator=(const atomic_flag&) volatile = delete; + atomic_flag(const atomic_flag&) = delete; + atomic_flag& operator=(const atomic_flag&) = delete; + atomic_flag& operator=(const atomic_flag&) volatile = delete; } atomic_flag; - -inline _LIBCUDACXX_INLINE_VISIBILITY -bool -atomic_flag_test(const volatile atomic_flag* __o) noexcept +inline _LIBCUDACXX_INLINE_VISIBILITY bool atomic_flag_test(const volatile atomic_flag* __o) noexcept { - return __o->test(); + return __o->test(); } -inline _LIBCUDACXX_INLINE_VISIBILITY -bool -atomic_flag_test(const atomic_flag* __o) noexcept +inline _LIBCUDACXX_INLINE_VISIBILITY bool atomic_flag_test(const atomic_flag* __o) noexcept { - return __o->test(); + return __o->test(); } -inline _LIBCUDACXX_INLINE_VISIBILITY -bool +inline _LIBCUDACXX_INLINE_VISIBILITY bool atomic_flag_test_explicit(const volatile atomic_flag* __o, memory_order __m) noexcept { - return __o->test(__m); + return __o->test(__m); } -inline _LIBCUDACXX_INLINE_VISIBILITY -bool -atomic_flag_test_explicit(const atomic_flag* __o, memory_order __m) noexcept +inline _LIBCUDACXX_INLINE_VISIBILITY bool atomic_flag_test_explicit(const atomic_flag* __o, memory_order __m) noexcept { - return __o->test(__m); + return __o->test(__m); } -inline _LIBCUDACXX_INLINE_VISIBILITY -bool -atomic_flag_test_and_set(volatile atomic_flag* __o) noexcept +inline _LIBCUDACXX_INLINE_VISIBILITY bool atomic_flag_test_and_set(volatile atomic_flag* __o) noexcept { - return __o->test_and_set(); + return __o->test_and_set(); } -inline _LIBCUDACXX_INLINE_VISIBILITY -bool -atomic_flag_test_and_set(atomic_flag* __o) noexcept +inline _LIBCUDACXX_INLINE_VISIBILITY bool atomic_flag_test_and_set(atomic_flag* __o) noexcept { - return __o->test_and_set(); + return __o->test_and_set(); } -inline _LIBCUDACXX_INLINE_VISIBILITY -bool +inline _LIBCUDACXX_INLINE_VISIBILITY bool atomic_flag_test_and_set_explicit(volatile atomic_flag* __o, memory_order __m) noexcept { - return __o->test_and_set(__m); + return __o->test_and_set(__m); } -inline _LIBCUDACXX_INLINE_VISIBILITY -bool -atomic_flag_test_and_set_explicit(atomic_flag* __o, memory_order __m) noexcept +inline _LIBCUDACXX_INLINE_VISIBILITY bool atomic_flag_test_and_set_explicit(atomic_flag* __o, memory_order __m) noexcept { - return __o->test_and_set(__m); + return __o->test_and_set(__m); } -inline _LIBCUDACXX_INLINE_VISIBILITY -void -atomic_flag_clear(volatile atomic_flag* __o) noexcept +inline _LIBCUDACXX_INLINE_VISIBILITY void atomic_flag_clear(volatile atomic_flag* __o) noexcept { - __o->clear(); + __o->clear(); } -inline _LIBCUDACXX_INLINE_VISIBILITY -void -atomic_flag_clear(atomic_flag* __o) noexcept +inline _LIBCUDACXX_INLINE_VISIBILITY void atomic_flag_clear(atomic_flag* __o) noexcept { - __o->clear(); + __o->clear(); } -inline _LIBCUDACXX_INLINE_VISIBILITY -void +inline _LIBCUDACXX_INLINE_VISIBILITY void atomic_flag_clear_explicit(volatile atomic_flag* __o, memory_order __m) noexcept { - __o->clear(__m); + __o->clear(__m); } -inline _LIBCUDACXX_INLINE_VISIBILITY -void -atomic_flag_clear_explicit(atomic_flag* __o, memory_order __m) noexcept +inline _LIBCUDACXX_INLINE_VISIBILITY void atomic_flag_clear_explicit(atomic_flag* __o, memory_order __m) noexcept { - __o->clear(__m); + __o->clear(__m); } #if !defined(__CUDA_MINIMUM_ARCH__) || __CUDA_MINIMUM_ARCH__ >= 700 -inline _LIBCUDACXX_INLINE_VISIBILITY -void -atomic_flag_wait(const volatile atomic_flag* __o, bool __v) noexcept +inline _LIBCUDACXX_INLINE_VISIBILITY void atomic_flag_wait(const volatile atomic_flag* __o, bool __v) noexcept { - __o->wait(__v); + __o->wait(__v); } -inline _LIBCUDACXX_INLINE_VISIBILITY -void -atomic_flag_wait(const atomic_flag* __o, bool __v) noexcept +inline _LIBCUDACXX_INLINE_VISIBILITY void atomic_flag_wait(const atomic_flag* __o, bool __v) noexcept { - __o->wait(__v); + __o->wait(__v); } -inline _LIBCUDACXX_INLINE_VISIBILITY -void -atomic_flag_wait_explicit(const volatile atomic_flag* __o, - bool __v, memory_order __m) noexcept +inline _LIBCUDACXX_INLINE_VISIBILITY void +atomic_flag_wait_explicit(const volatile atomic_flag* __o, bool __v, memory_order __m) noexcept { - __o->wait(__v, __m); + __o->wait(__v, __m); } -inline _LIBCUDACXX_INLINE_VISIBILITY -void -atomic_flag_wait_explicit(const atomic_flag* __o, - bool __v, memory_order __m) noexcept +inline _LIBCUDACXX_INLINE_VISIBILITY void +atomic_flag_wait_explicit(const atomic_flag* __o, bool __v, memory_order __m) noexcept { - __o->wait(__v, __m); + __o->wait(__v, __m); } -inline _LIBCUDACXX_INLINE_VISIBILITY -void -atomic_flag_notify_one(volatile atomic_flag* __o) noexcept +inline _LIBCUDACXX_INLINE_VISIBILITY void atomic_flag_notify_one(volatile atomic_flag* __o) noexcept { - __o->notify_one(); + __o->notify_one(); } -inline _LIBCUDACXX_INLINE_VISIBILITY -void -atomic_flag_notify_one(atomic_flag* __o) noexcept +inline _LIBCUDACXX_INLINE_VISIBILITY void atomic_flag_notify_one(atomic_flag* __o) noexcept { - __o->notify_one(); + __o->notify_one(); } -inline _LIBCUDACXX_INLINE_VISIBILITY -void -atomic_flag_notify_all(volatile atomic_flag* __o) noexcept +inline _LIBCUDACXX_INLINE_VISIBILITY void atomic_flag_notify_all(volatile atomic_flag* __o) noexcept { - __o->notify_all(); + __o->notify_all(); } -inline _LIBCUDACXX_INLINE_VISIBILITY -void -atomic_flag_notify_all(atomic_flag* __o) noexcept +inline _LIBCUDACXX_INLINE_VISIBILITY void atomic_flag_notify_all(atomic_flag* __o) noexcept { - __o->notify_all(); + __o->notify_all(); } #endif // fences -inline _LIBCUDACXX_INLINE_VISIBILITY -void -atomic_thread_fence(memory_order __m) noexcept +inline _LIBCUDACXX_INLINE_VISIBILITY void atomic_thread_fence(memory_order __m) noexcept { - __cxx_atomic_thread_fence(__m); + __cxx_atomic_thread_fence(__m); } -inline _LIBCUDACXX_INLINE_VISIBILITY -void -atomic_signal_fence(memory_order __m) noexcept +inline _LIBCUDACXX_INLINE_VISIBILITY void atomic_signal_fence(memory_order __m) noexcept { - __cxx_atomic_signal_fence(__m); + __cxx_atomic_signal_fence(__m); } // Atomics for standard typedef types -typedef atomic atomic_bool; -typedef atomic atomic_char; -typedef atomic atomic_schar; -typedef atomic atomic_uchar; -typedef atomic atomic_short; -typedef atomic atomic_ushort; -typedef atomic atomic_int; -typedef atomic atomic_uint; -typedef atomic atomic_long; -typedef atomic atomic_ulong; -typedef atomic atomic_llong; +typedef atomic atomic_bool; +typedef atomic atomic_char; +typedef atomic atomic_schar; +typedef atomic atomic_uchar; +typedef atomic atomic_short; +typedef atomic atomic_ushort; +typedef atomic atomic_int; +typedef atomic atomic_uint; +typedef atomic atomic_long; +typedef atomic atomic_ulong; +typedef atomic atomic_llong; typedef atomic atomic_ullong; -typedef atomic atomic_char16_t; -typedef atomic atomic_char32_t; -typedef atomic atomic_wchar_t; +typedef atomic atomic_char16_t; +typedef atomic atomic_char32_t; +typedef atomic atomic_wchar_t; -typedef atomic atomic_int_least8_t; -typedef atomic atomic_uint_least8_t; -typedef atomic atomic_int_least16_t; +typedef atomic atomic_int_least8_t; +typedef atomic atomic_uint_least8_t; +typedef atomic atomic_int_least16_t; typedef atomic atomic_uint_least16_t; -typedef atomic atomic_int_least32_t; +typedef atomic atomic_int_least32_t; typedef atomic atomic_uint_least32_t; -typedef atomic atomic_int_least64_t; +typedef atomic atomic_int_least64_t; typedef atomic atomic_uint_least64_t; -typedef atomic atomic_int_fast8_t; -typedef atomic atomic_uint_fast8_t; -typedef atomic atomic_int_fast16_t; +typedef atomic atomic_int_fast8_t; +typedef atomic atomic_uint_fast8_t; +typedef atomic atomic_int_fast16_t; typedef atomic atomic_uint_fast16_t; -typedef atomic atomic_int_fast32_t; +typedef atomic atomic_int_fast32_t; typedef atomic atomic_uint_fast32_t; -typedef atomic atomic_int_fast64_t; +typedef atomic atomic_int_fast64_t; typedef atomic atomic_uint_fast64_t; -typedef atomic< int8_t> atomic_int8_t; -typedef atomic atomic_uint8_t; -typedef atomic< int16_t> atomic_int16_t; +typedef atomic atomic_int8_t; +typedef atomic atomic_uint8_t; +typedef atomic atomic_int16_t; typedef atomic atomic_uint16_t; -typedef atomic< int32_t> atomic_int32_t; +typedef atomic atomic_int32_t; typedef atomic atomic_uint32_t; -typedef atomic< int64_t> atomic_int64_t; +typedef atomic atomic_int64_t; typedef atomic atomic_uint64_t; -typedef atomic atomic_intptr_t; +typedef atomic atomic_intptr_t; typedef atomic atomic_uintptr_t; -typedef atomic atomic_size_t; +typedef atomic atomic_size_t; typedef atomic atomic_ptrdiff_t; -typedef atomic atomic_intmax_t; +typedef atomic atomic_intmax_t; typedef atomic atomic_uintmax_t; static_assert(ATOMIC_INT_LOCK_FREE, "This library assumes atomic is lock-free."); -typedef atomic atomic_signed_lock_free; -typedef atomic atomic_unsigned_lock_free; +typedef atomic atomic_signed_lock_free; +typedef atomic atomic_unsigned_lock_free; -#define ATOMIC_FLAG_INIT {false} -#define ATOMIC_VAR_INIT(__v) {__v} +#define ATOMIC_FLAG_INIT \ + { \ + false \ + } +#define ATOMIC_VAR_INIT(__v) \ + { \ + __v \ + } _LIBCUDACXX_END_NAMESPACE_STD #include #include -#endif // _LIBCUDACXX_ATOMIC +#endif // _LIBCUDACXX_ATOMIC diff --git a/libcudacxx/include/cuda/std/detail/libcxx/include/bitset b/libcudacxx/include/cuda/std/detail/libcxx/include/bitset index c475bfb7d9f..ebf17ae02a2 100644 --- a/libcudacxx/include/cuda/std/detail/libcxx/include/bitset +++ b/libcudacxx/include/cuda/std/detail/libcxx/include/bitset @@ -74,14 +74,10 @@ public: template basic_string > to_string(charT zero = charT('0'), charT one = charT('1')) const; template - basic_string, allocator > to_string(charT zero = charT('0'), charT one = charT('1')) const; - basic_string, allocator > to_string(char zero = '0', char one = '1') const; - size_t count() const noexcept; - constexpr size_t size() const noexcept; - bool operator==(const bitset& rhs) const noexcept; - bool operator!=(const bitset& rhs) const noexcept; - bool test(size_t pos) const; - bool all() const noexcept; + basic_string, allocator > to_string(charT zero = charT('0'), charT one = +charT('1')) const; basic_string, allocator > to_string(char zero = '0', char one = '1') +const; size_t count() const noexcept; constexpr size_t size() const noexcept; bool operator==(const bitset& rhs) const +noexcept; bool operator!=(const bitset& rhs) const noexcept; bool test(size_t pos) const; bool all() const noexcept; bool any() const noexcept; bool none() const noexcept; bitset operator<<(size_t pos) const noexcept; @@ -112,14 +108,14 @@ template struct hash>; */ -#include <__config> #include <__bit_reference> -#include +#include <__config> +#include <__functional_base> #include -#include -#include +#include #include -#include <__functional_base> +#include +#include #if defined(_CCCL_IMPLICIT_SYSTEM_HEADER_GCC) # pragma GCC system_header @@ -132,929 +128,901 @@ template struct hash>; _LIBCUDACXX_PUSH_MACROS #include <__undef_macros> - _LIBCUDACXX_BEGIN_NAMESPACE_STD template class __bitset; template -struct __has_storage_type<__bitset<_N_words, _Size> > +struct __has_storage_type<__bitset<_N_words, _Size>> { - static const bool value = true; + static const bool value = true; }; template class __bitset { public: - typedef ptrdiff_t difference_type; - typedef size_t size_type; - typedef size_type __storage_type; + typedef ptrdiff_t difference_type; + typedef size_t size_type; + typedef size_type __storage_type; + protected: - typedef __bitset __self; - typedef __storage_type* __storage_pointer; - typedef const __storage_type* __const_storage_pointer; - static const unsigned __bits_per_word = static_cast(sizeof(__storage_type) * CHAR_BIT); - - friend class __bit_reference<__bitset>; - friend class __bit_const_reference<__bitset>; - friend class __bit_iterator<__bitset, false>; - friend class __bit_iterator<__bitset, true>; - friend struct __bit_array<__bitset>; - - __storage_type __first_[_N_words]; - - typedef __bit_reference<__bitset> reference; - typedef __bit_const_reference<__bitset> const_reference; - typedef __bit_iterator<__bitset, false> iterator; - typedef __bit_iterator<__bitset, true> const_iterator; - - _LIBCUDACXX_INLINE_VISIBILITY - constexpr __bitset() noexcept; - _LIBCUDACXX_INLINE_VISIBILITY - explicit constexpr __bitset(unsigned long long __v) noexcept; - - _LIBCUDACXX_INLINE_VISIBILITY reference __make_ref(size_t __pos) noexcept - {return reference(__first_ + __pos / __bits_per_word, __storage_type(1) << __pos % __bits_per_word);} - _LIBCUDACXX_INLINE_VISIBILITY constexpr const_reference __make_ref(size_t __pos) const noexcept - {return const_reference(__first_ + __pos / __bits_per_word, __storage_type(1) << __pos % __bits_per_word);} - _LIBCUDACXX_INLINE_VISIBILITY iterator __make_iter(size_t __pos) noexcept - {return iterator(__first_ + __pos / __bits_per_word, __pos % __bits_per_word);} - _LIBCUDACXX_INLINE_VISIBILITY const_iterator __make_iter(size_t __pos) const noexcept - {return const_iterator(__first_ + __pos / __bits_per_word, __pos % __bits_per_word);} - - _LIBCUDACXX_INLINE_VISIBILITY - void operator&=(const __bitset& __v) noexcept; - _LIBCUDACXX_INLINE_VISIBILITY - void operator|=(const __bitset& __v) noexcept; - _LIBCUDACXX_INLINE_VISIBILITY - void operator^=(const __bitset& __v) noexcept; - - void flip() noexcept; - _LIBCUDACXX_INLINE_VISIBILITY unsigned long to_ulong() const - {return to_ulong(integral_constant());} - _LIBCUDACXX_INLINE_VISIBILITY unsigned long long to_ullong() const - {return to_ullong(integral_constant());} - - bool all() const noexcept; - bool any() const noexcept; - _LIBCUDACXX_INLINE_VISIBILITY - size_t __hash_code() const noexcept; + typedef __bitset __self; + typedef __storage_type* __storage_pointer; + typedef const __storage_type* __const_storage_pointer; + static const unsigned __bits_per_word = static_cast(sizeof(__storage_type) * CHAR_BIT); + + friend class __bit_reference<__bitset>; + friend class __bit_const_reference<__bitset>; + friend class __bit_iterator<__bitset, false>; + friend class __bit_iterator<__bitset, true>; + friend struct __bit_array<__bitset>; + + __storage_type __first_[_N_words]; + + typedef __bit_reference<__bitset> reference; + typedef __bit_const_reference<__bitset> const_reference; + typedef __bit_iterator<__bitset, false> iterator; + typedef __bit_iterator<__bitset, true> const_iterator; + + _LIBCUDACXX_INLINE_VISIBILITY constexpr __bitset() noexcept; + _LIBCUDACXX_INLINE_VISIBILITY explicit constexpr __bitset(unsigned long long __v) noexcept; + + _LIBCUDACXX_INLINE_VISIBILITY reference __make_ref(size_t __pos) noexcept + { + return reference(__first_ + __pos / __bits_per_word, __storage_type(1) << __pos % __bits_per_word); + } + _LIBCUDACXX_INLINE_VISIBILITY constexpr const_reference __make_ref(size_t __pos) const noexcept + { + return const_reference(__first_ + __pos / __bits_per_word, __storage_type(1) << __pos % __bits_per_word); + } + _LIBCUDACXX_INLINE_VISIBILITY iterator __make_iter(size_t __pos) noexcept + { + return iterator(__first_ + __pos / __bits_per_word, __pos % __bits_per_word); + } + _LIBCUDACXX_INLINE_VISIBILITY const_iterator __make_iter(size_t __pos) const noexcept + { + return const_iterator(__first_ + __pos / __bits_per_word, __pos % __bits_per_word); + } + + _LIBCUDACXX_INLINE_VISIBILITY void operator&=(const __bitset& __v) noexcept; + _LIBCUDACXX_INLINE_VISIBILITY void operator|=(const __bitset& __v) noexcept; + _LIBCUDACXX_INLINE_VISIBILITY void operator^=(const __bitset& __v) noexcept; + + void flip() noexcept; + _LIBCUDACXX_INLINE_VISIBILITY unsigned long to_ulong() const + { + return to_ulong(integral_constant < bool, _Size()); + } + _LIBCUDACXX_INLINE_VISIBILITY unsigned long long to_ullong() const + { + return to_ullong(integral_constant < bool, _Size()); + } + + bool all() const noexcept; + bool any() const noexcept; + _LIBCUDACXX_INLINE_VISIBILITY size_t __hash_code() const noexcept; + private: - unsigned long to_ulong(false_type) const; - _LIBCUDACXX_INLINE_VISIBILITY - unsigned long to_ulong(true_type) const; - unsigned long long to_ullong(false_type) const; - _LIBCUDACXX_INLINE_VISIBILITY - unsigned long long to_ullong(true_type) const; - _LIBCUDACXX_INLINE_VISIBILITY - unsigned long long to_ullong(true_type, false_type) const; - unsigned long long to_ullong(true_type, true_type) const; + unsigned long to_ulong(false_type) const; + _LIBCUDACXX_INLINE_VISIBILITY unsigned long to_ulong(true_type) const; + unsigned long long to_ullong(false_type) const; + _LIBCUDACXX_INLINE_VISIBILITY unsigned long long to_ullong(true_type) const; + _LIBCUDACXX_INLINE_VISIBILITY unsigned long long to_ullong(true_type, false_type) const; + unsigned long long to_ullong(true_type, true_type) const; }; template -inline constexpr -__bitset<_N_words, _Size>::__bitset() noexcept +inline constexpr __bitset<_N_words, _Size>::__bitset() noexcept : __first_{0} {} template -inline -constexpr -__bitset<_N_words, _Size>::__bitset(unsigned long long __v) noexcept +inline constexpr __bitset<_N_words, _Size>::__bitset(unsigned long long __v) noexcept #if __SIZEOF_SIZE_T__ == 8 : __first_{__v} #elif __SIZEOF_SIZE_T__ == 4 - : __first_{static_cast<__storage_type>(__v), - _Size >= 2 * __bits_per_word ? static_cast<__storage_type>(__v >> __bits_per_word) - : static_cast<__storage_type>((__v >> __bits_per_word) & (__storage_type(1) << (_Size - __bits_per_word)) - 1)} + : __first_{ + static_cast<__storage_type>(__v), + _Size >= 2 * __bits_per_word + ? static_cast<__storage_type>(__v >> __bits_per_word) + : static_cast<__storage_type>((__v >> __bits_per_word) & (__storage_type(1) << (_Size - __bits_per_word)) - 1)} #else -#error This constructor has not been ported to this platform +# error This constructor has not been ported to this platform #endif {} template -inline -void -__bitset<_N_words, _Size>::operator&=(const __bitset& __v) noexcept +inline void __bitset<_N_words, _Size>::operator&=(const __bitset& __v) noexcept { - for (size_type __i = 0; __i < _N_words; ++__i) - __first_[__i] &= __v.__first_[__i]; + for (size_type __i = 0; __i < _N_words; ++__i) + { + __first_[__i] &= __v.__first_[__i]; + } } template -inline -void -__bitset<_N_words, _Size>::operator|=(const __bitset& __v) noexcept +inline void __bitset<_N_words, _Size>::operator|=(const __bitset& __v) noexcept { - for (size_type __i = 0; __i < _N_words; ++__i) - __first_[__i] |= __v.__first_[__i]; + for (size_type __i = 0; __i < _N_words; ++__i) + { + __first_[__i] |= __v.__first_[__i]; + } } template -inline -void -__bitset<_N_words, _Size>::operator^=(const __bitset& __v) noexcept +inline void __bitset<_N_words, _Size>::operator^=(const __bitset& __v) noexcept { - for (size_type __i = 0; __i < _N_words; ++__i) - __first_[__i] ^= __v.__first_[__i]; + for (size_type __i = 0; __i < _N_words; ++__i) + { + __first_[__i] ^= __v.__first_[__i]; + } } template -void -__bitset<_N_words, _Size>::flip() noexcept -{ - // do middle whole words - size_type __n = _Size; - __storage_pointer __p = __first_; - for (; __n >= __bits_per_word; ++__p, __n -= __bits_per_word) - *__p = ~*__p; - // do last partial word - if (__n > 0) - { - __storage_type __m = ~__storage_type(0) >> (__bits_per_word - __n); - __storage_type __b = *__p & __m; - *__p &= ~__m; - *__p |= ~__b & __m; - } +void __bitset<_N_words, _Size>::flip() noexcept +{ + // do middle whole words + size_type __n = _Size; + __storage_pointer __p = __first_; + for (; __n >= __bits_per_word; ++__p, __n -= __bits_per_word) + { + *__p = ~*__p; + } + // do last partial word + if (__n > 0) + { + __storage_type __m = ~__storage_type(0) >> (__bits_per_word - __n); + __storage_type __b = *__p & __m; + *__p &= ~__m; + *__p |= ~__b & __m; + } } template -unsigned long -__bitset<_N_words, _Size>::to_ulong(false_type) const +unsigned long __bitset<_N_words, _Size>::to_ulong(false_type) const { - const_iterator __e = __make_iter(_Size); - const_iterator __i = _CUDA_VSTD::find(__make_iter(sizeof(unsigned long) * CHAR_BIT), __e, true); - if (__i != __e) - __throw_overflow_error("bitset to_ulong overflow error"); + const_iterator __e = __make_iter(_Size); + const_iterator __i = _CUDA_VSTD::find(__make_iter(sizeof(unsigned long) * CHAR_BIT), __e, true); + if (__i != __e) + { + __throw_overflow_error("bitset to_ulong overflow error"); + } - return __first_[0]; + return __first_[0]; } template -inline -unsigned long -__bitset<_N_words, _Size>::to_ulong(true_type) const +inline unsigned long __bitset<_N_words, _Size>::to_ulong(true_type) const { - return __first_[0]; + return __first_[0]; } template -unsigned long long -__bitset<_N_words, _Size>::to_ullong(false_type) const +unsigned long long __bitset<_N_words, _Size>::to_ullong(false_type) const { - const_iterator __e = __make_iter(_Size); - const_iterator __i = _CUDA_VSTD::find(__make_iter(sizeof(unsigned long long) * CHAR_BIT), __e, true); - if (__i != __e) - __throw_overflow_error("bitset to_ullong overflow error"); + const_iterator __e = __make_iter(_Size); + const_iterator __i = _CUDA_VSTD::find(__make_iter(sizeof(unsigned long long) * CHAR_BIT), __e, true); + if (__i != __e) + { + __throw_overflow_error("bitset to_ullong overflow error"); + } - return to_ullong(true_type()); + return to_ullong(true_type()); } template -inline -unsigned long long -__bitset<_N_words, _Size>::to_ullong(true_type) const +inline unsigned long long __bitset<_N_words, _Size>::to_ullong(true_type) const { - return to_ullong(true_type(), integral_constant()); + return to_ullong(true_type(), integral_constant()); } template -inline -unsigned long long -__bitset<_N_words, _Size>::to_ullong(true_type, false_type) const +inline unsigned long long __bitset<_N_words, _Size>::to_ullong(true_type, false_type) const { - return __first_[0]; + return __first_[0]; } template -unsigned long long -__bitset<_N_words, _Size>::to_ullong(true_type, true_type) const -{ - unsigned long long __r = __first_[0]; - for (std::size_t __i = 1; __i < sizeof(unsigned long long) / sizeof(__storage_type); ++__i) - __r |= static_cast(__first_[__i]) << (sizeof(__storage_type) * CHAR_BIT); - return __r; +unsigned long long __bitset<_N_words, _Size>::to_ullong(true_type, true_type) const +{ + unsigned long long __r = __first_[0]; + for (std::size_t __i = 1; __i < sizeof(unsigned long long) / sizeof(__storage_type); ++__i) + { + __r |= static_cast(__first_[__i]) << (sizeof(__storage_type) * CHAR_BIT); + } + return __r; } template -bool -__bitset<_N_words, _Size>::all() const noexcept -{ - // do middle whole words - size_type __n = _Size; - __const_storage_pointer __p = __first_; - for (; __n >= __bits_per_word; ++__p, __n -= __bits_per_word) - if (~*__p) - return false; - // do last partial word - if (__n > 0) +bool __bitset<_N_words, _Size>::all() const noexcept +{ + // do middle whole words + size_type __n = _Size; + __const_storage_pointer __p = __first_; + for (; __n >= __bits_per_word; ++__p, __n -= __bits_per_word) + { + if (~*__p) { - __storage_type __m = ~__storage_type(0) >> (__bits_per_word - __n); - if (~*__p & __m) - return false; + return false; } - return true; + } + // do last partial word + if (__n > 0) + { + __storage_type __m = ~__storage_type(0) >> (__bits_per_word - __n); + if (~*__p & __m) + { + return false; + } + } + return true; } template -bool -__bitset<_N_words, _Size>::any() const noexcept -{ - // do middle whole words - size_type __n = _Size; - __const_storage_pointer __p = __first_; - for (; __n >= __bits_per_word; ++__p, __n -= __bits_per_word) - if (*__p) - return true; - // do last partial word - if (__n > 0) +bool __bitset<_N_words, _Size>::any() const noexcept +{ + // do middle whole words + size_type __n = _Size; + __const_storage_pointer __p = __first_; + for (; __n >= __bits_per_word; ++__p, __n -= __bits_per_word) + { + if (*__p) { - __storage_type __m = ~__storage_type(0) >> (__bits_per_word - __n); - if (*__p & __m) - return true; + return true; } - return false; + } + // do last partial word + if (__n > 0) + { + __storage_type __m = ~__storage_type(0) >> (__bits_per_word - __n); + if (*__p & __m) + { + return true; + } + } + return false; } template -inline -size_t -__bitset<_N_words, _Size>::__hash_code() const noexcept -{ - size_t __h = 0; - for (size_type __i = 0; __i < _N_words; ++__i) - __h ^= __first_[__i]; - return __h; +inline size_t __bitset<_N_words, _Size>::__hash_code() const noexcept +{ + size_t __h = 0; + for (size_type __i = 0; __i < _N_words; ++__i) + { + __h ^= __first_[__i]; + } + return __h; } template class __bitset<1, _Size> { public: - typedef ptrdiff_t difference_type; - typedef size_t size_type; - typedef size_type __storage_type; -protected: - typedef __bitset __self; - typedef __storage_type* __storage_pointer; - typedef const __storage_type* __const_storage_pointer; - static const unsigned __bits_per_word = static_cast(sizeof(__storage_type) * CHAR_BIT); - - friend class __bit_reference<__bitset>; - friend class __bit_const_reference<__bitset>; - friend class __bit_iterator<__bitset, false>; - friend class __bit_iterator<__bitset, true>; - friend struct __bit_array<__bitset>; - - __storage_type __first_; - - typedef __bit_reference<__bitset> reference; - typedef __bit_const_reference<__bitset> const_reference; - typedef __bit_iterator<__bitset, false> iterator; - typedef __bit_iterator<__bitset, true> const_iterator; - - _LIBCUDACXX_INLINE_VISIBILITY - constexpr __bitset() noexcept; - _LIBCUDACXX_INLINE_VISIBILITY - explicit constexpr __bitset(unsigned long long __v) noexcept; - - _LIBCUDACXX_INLINE_VISIBILITY reference __make_ref(size_t __pos) noexcept - {return reference(&__first_, __storage_type(1) << __pos);} - _LIBCUDACXX_INLINE_VISIBILITY constexpr const_reference __make_ref(size_t __pos) const noexcept - {return const_reference(&__first_, __storage_type(1) << __pos);} - _LIBCUDACXX_INLINE_VISIBILITY iterator __make_iter(size_t __pos) noexcept - {return iterator(&__first_ + __pos / __bits_per_word, __pos % __bits_per_word);} - _LIBCUDACXX_INLINE_VISIBILITY const_iterator __make_iter(size_t __pos) const noexcept - {return const_iterator(&__first_ + __pos / __bits_per_word, __pos % __bits_per_word);} - - _LIBCUDACXX_INLINE_VISIBILITY - void operator&=(const __bitset& __v) noexcept; - _LIBCUDACXX_INLINE_VISIBILITY - void operator|=(const __bitset& __v) noexcept; - _LIBCUDACXX_INLINE_VISIBILITY - void operator^=(const __bitset& __v) noexcept; - - _LIBCUDACXX_INLINE_VISIBILITY - void flip() noexcept; - - _LIBCUDACXX_INLINE_VISIBILITY - unsigned long to_ulong() const; - _LIBCUDACXX_INLINE_VISIBILITY - unsigned long long to_ullong() const; - - _LIBCUDACXX_INLINE_VISIBILITY - bool all() const noexcept; - _LIBCUDACXX_INLINE_VISIBILITY - bool any() const noexcept; + typedef ptrdiff_t difference_type; + typedef size_t size_type; + typedef size_type __storage_type; - _LIBCUDACXX_INLINE_VISIBILITY - size_t __hash_code() const noexcept; +protected: + typedef __bitset __self; + typedef __storage_type* __storage_pointer; + typedef const __storage_type* __const_storage_pointer; + static const unsigned __bits_per_word = static_cast(sizeof(__storage_type) * CHAR_BIT); + + friend class __bit_reference<__bitset>; + friend class __bit_const_reference<__bitset>; + friend class __bit_iterator<__bitset, false>; + friend class __bit_iterator<__bitset, true>; + friend struct __bit_array<__bitset>; + + __storage_type __first_; + + typedef __bit_reference<__bitset> reference; + typedef __bit_const_reference<__bitset> const_reference; + typedef __bit_iterator<__bitset, false> iterator; + typedef __bit_iterator<__bitset, true> const_iterator; + + _LIBCUDACXX_INLINE_VISIBILITY constexpr __bitset() noexcept; + _LIBCUDACXX_INLINE_VISIBILITY explicit constexpr __bitset(unsigned long long __v) noexcept; + + _LIBCUDACXX_INLINE_VISIBILITY reference __make_ref(size_t __pos) noexcept + { + return reference(&__first_, __storage_type(1) << __pos); + } + _LIBCUDACXX_INLINE_VISIBILITY constexpr const_reference __make_ref(size_t __pos) const noexcept + { + return const_reference(&__first_, __storage_type(1) << __pos); + } + _LIBCUDACXX_INLINE_VISIBILITY iterator __make_iter(size_t __pos) noexcept + { + return iterator(&__first_ + __pos / __bits_per_word, __pos % __bits_per_word); + } + _LIBCUDACXX_INLINE_VISIBILITY const_iterator __make_iter(size_t __pos) const noexcept + { + return const_iterator(&__first_ + __pos / __bits_per_word, __pos % __bits_per_word); + } + + _LIBCUDACXX_INLINE_VISIBILITY void operator&=(const __bitset& __v) noexcept; + _LIBCUDACXX_INLINE_VISIBILITY void operator|=(const __bitset& __v) noexcept; + _LIBCUDACXX_INLINE_VISIBILITY void operator^=(const __bitset& __v) noexcept; + + _LIBCUDACXX_INLINE_VISIBILITY void flip() noexcept; + + _LIBCUDACXX_INLINE_VISIBILITY unsigned long to_ulong() const; + _LIBCUDACXX_INLINE_VISIBILITY unsigned long long to_ullong() const; + + _LIBCUDACXX_INLINE_VISIBILITY bool all() const noexcept; + _LIBCUDACXX_INLINE_VISIBILITY bool any() const noexcept; + + _LIBCUDACXX_INLINE_VISIBILITY size_t __hash_code() const noexcept; }; template -inline constexpr -__bitset<1, _Size>::__bitset() noexcept +inline constexpr __bitset<1, _Size>::__bitset() noexcept : __first_(0) -{ -} +{} template -inline constexpr -__bitset<1, _Size>::__bitset(unsigned long long __v) noexcept - : __first_( - _Size == __bits_per_word ? static_cast<__storage_type>(__v) - : static_cast<__storage_type>(__v) & ((__storage_type(1) << _Size) - 1) - ) -{ -} +inline constexpr __bitset<1, _Size>::__bitset(unsigned long long __v) noexcept + : __first_(_Size == __bits_per_word ? static_cast<__storage_type>(__v) + : static_cast<__storage_type>(__v) & ((__storage_type(1) << _Size) - 1)) +{} template -inline -void -__bitset<1, _Size>::operator&=(const __bitset& __v) noexcept +inline void __bitset<1, _Size>::operator&=(const __bitset& __v) noexcept { - __first_ &= __v.__first_; + __first_ &= __v.__first_; } template -inline -void -__bitset<1, _Size>::operator|=(const __bitset& __v) noexcept +inline void __bitset<1, _Size>::operator|=(const __bitset& __v) noexcept { - __first_ |= __v.__first_; + __first_ |= __v.__first_; } template -inline -void -__bitset<1, _Size>::operator^=(const __bitset& __v) noexcept +inline void __bitset<1, _Size>::operator^=(const __bitset& __v) noexcept { - __first_ ^= __v.__first_; + __first_ ^= __v.__first_; } template -inline -void -__bitset<1, _Size>::flip() noexcept +inline void __bitset<1, _Size>::flip() noexcept { - __storage_type __m = ~__storage_type(0) >> (__bits_per_word - _Size); - __first_ = ~__first_; - __first_ &= __m; + __storage_type __m = ~__storage_type(0) >> (__bits_per_word - _Size); + __first_ = ~__first_; + __first_ &= __m; } template -inline -unsigned long -__bitset<1, _Size>::to_ulong() const +inline unsigned long __bitset<1, _Size>::to_ulong() const { - return __first_; + return __first_; } template -inline -unsigned long long -__bitset<1, _Size>::to_ullong() const +inline unsigned long long __bitset<1, _Size>::to_ullong() const { - return __first_; + return __first_; } template -inline -bool -__bitset<1, _Size>::all() const noexcept +inline bool __bitset<1, _Size>::all() const noexcept { - __storage_type __m = ~__storage_type(0) >> (__bits_per_word - _Size); - return !(~__first_ & __m); + __storage_type __m = ~__storage_type(0) >> (__bits_per_word - _Size); + return !(~__first_ & __m); } template -inline -bool -__bitset<1, _Size>::any() const noexcept +inline bool __bitset<1, _Size>::any() const noexcept { - __storage_type __m = ~__storage_type(0) >> (__bits_per_word - _Size); - return __first_ & __m; + __storage_type __m = ~__storage_type(0) >> (__bits_per_word - _Size); + return __first_ & __m; } template -inline -size_t -__bitset<1, _Size>::__hash_code() const noexcept +inline size_t __bitset<1, _Size>::__hash_code() const noexcept { - return __first_; + return __first_; } template <> class __bitset<0, 0> { public: - typedef ptrdiff_t difference_type; - typedef size_t size_type; - typedef size_type __storage_type; + typedef ptrdiff_t difference_type; + typedef size_t size_type; + typedef size_type __storage_type; + protected: - typedef __bitset __self; - typedef __storage_type* __storage_pointer; - typedef const __storage_type* __const_storage_pointer; - static const unsigned __bits_per_word = static_cast(sizeof(__storage_type) * CHAR_BIT); - - friend class __bit_reference<__bitset>; - friend class __bit_const_reference<__bitset>; - friend class __bit_iterator<__bitset, false>; - friend class __bit_iterator<__bitset, true>; - friend struct __bit_array<__bitset>; - - typedef __bit_reference<__bitset> reference; - typedef __bit_const_reference<__bitset> const_reference; - typedef __bit_iterator<__bitset, false> iterator; - typedef __bit_iterator<__bitset, true> const_iterator; - - _LIBCUDACXX_INLINE_VISIBILITY - constexpr __bitset() noexcept; - _LIBCUDACXX_INLINE_VISIBILITY - explicit constexpr __bitset(unsigned long long) noexcept; - - _LIBCUDACXX_INLINE_VISIBILITY reference __make_ref(size_t) noexcept - {return reference(0, 1);} - _LIBCUDACXX_INLINE_VISIBILITY constexpr const_reference __make_ref(size_t) const noexcept - {return const_reference(0, 1);} - _LIBCUDACXX_INLINE_VISIBILITY iterator __make_iter(size_t) noexcept - {return iterator(0, 0);} - _LIBCUDACXX_INLINE_VISIBILITY const_iterator __make_iter(size_t) const noexcept - {return const_iterator(0, 0);} - - _LIBCUDACXX_INLINE_VISIBILITY void operator&=(const __bitset&) noexcept {} - _LIBCUDACXX_INLINE_VISIBILITY void operator|=(const __bitset&) noexcept {} - _LIBCUDACXX_INLINE_VISIBILITY void operator^=(const __bitset&) noexcept {} - - _LIBCUDACXX_INLINE_VISIBILITY void flip() noexcept {} - - _LIBCUDACXX_INLINE_VISIBILITY unsigned long to_ulong() const {return 0;} - _LIBCUDACXX_INLINE_VISIBILITY unsigned long long to_ullong() const {return 0;} - - _LIBCUDACXX_INLINE_VISIBILITY bool all() const noexcept {return true;} - _LIBCUDACXX_INLINE_VISIBILITY bool any() const noexcept {return false;} - - _LIBCUDACXX_INLINE_VISIBILITY size_t __hash_code() const noexcept {return 0;} + typedef __bitset __self; + typedef __storage_type* __storage_pointer; + typedef const __storage_type* __const_storage_pointer; + static const unsigned __bits_per_word = static_cast(sizeof(__storage_type) * CHAR_BIT); + + friend class __bit_reference<__bitset>; + friend class __bit_const_reference<__bitset>; + friend class __bit_iterator<__bitset, false>; + friend class __bit_iterator<__bitset, true>; + friend struct __bit_array<__bitset>; + + typedef __bit_reference<__bitset> reference; + typedef __bit_const_reference<__bitset> const_reference; + typedef __bit_iterator<__bitset, false> iterator; + typedef __bit_iterator<__bitset, true> const_iterator; + + _LIBCUDACXX_INLINE_VISIBILITY constexpr __bitset() noexcept; + _LIBCUDACXX_INLINE_VISIBILITY explicit constexpr __bitset(unsigned long long) noexcept; + + _LIBCUDACXX_INLINE_VISIBILITY reference __make_ref(size_t) noexcept + { + return reference(0, 1); + } + _LIBCUDACXX_INLINE_VISIBILITY constexpr const_reference __make_ref(size_t) const noexcept + { + return const_reference(0, 1); + } + _LIBCUDACXX_INLINE_VISIBILITY iterator __make_iter(size_t) noexcept + { + return iterator(0, 0); + } + _LIBCUDACXX_INLINE_VISIBILITY const_iterator __make_iter(size_t) const noexcept + { + return const_iterator(0, 0); + } + + _LIBCUDACXX_INLINE_VISIBILITY void operator&=(const __bitset&) noexcept {} + _LIBCUDACXX_INLINE_VISIBILITY void operator|=(const __bitset&) noexcept {} + _LIBCUDACXX_INLINE_VISIBILITY void operator^=(const __bitset&) noexcept {} + + _LIBCUDACXX_INLINE_VISIBILITY void flip() noexcept {} + + _LIBCUDACXX_INLINE_VISIBILITY unsigned long to_ulong() const + { + return 0; + } + _LIBCUDACXX_INLINE_VISIBILITY unsigned long long to_ullong() const + { + return 0; + } + + _LIBCUDACXX_INLINE_VISIBILITY bool all() const noexcept + { + return true; + } + _LIBCUDACXX_INLINE_VISIBILITY bool any() const noexcept + { + return false; + } + + _LIBCUDACXX_INLINE_VISIBILITY size_t __hash_code() const noexcept + { + return 0; + } }; -inline -constexpr -__bitset<0, 0>::__bitset() noexcept -{ -} +inline constexpr __bitset<0, 0>::__bitset() noexcept {} -inline -constexpr -__bitset<0, 0>::__bitset(unsigned long long) noexcept -{ -} +inline constexpr __bitset<0, 0>::__bitset(unsigned long long) noexcept {} -template class _LIBCUDACXX_TEMPLATE_VIS bitset; -template struct hash >; +template +class _LIBCUDACXX_TEMPLATE_VIS bitset; +template +struct hash>; template class _LIBCUDACXX_TEMPLATE_VIS bitset : private __bitset<_Size == 0 ? 0 : (_Size - 1) / (sizeof(size_t) * CHAR_BIT) + 1, _Size> { public: - static const unsigned __n_words = _Size == 0 ? 0 : (_Size - 1) / (sizeof(size_t) * CHAR_BIT) + 1; - typedef __bitset<__n_words, _Size> base; + static const unsigned __n_words = _Size == 0 ? 0 : (_Size - 1) / (sizeof(size_t) * CHAR_BIT) + 1; + typedef __bitset<__n_words, _Size> base; public: - typedef typename base::reference reference; - typedef typename base::const_reference const_reference; - - // 23.3.5.1 constructors: - _LIBCUDACXX_INLINE_VISIBILITY constexpr bitset() noexcept {} - _LIBCUDACXX_INLINE_VISIBILITY constexpr - bitset(unsigned long long __v) noexcept : base(__v) {} - template::value> > - explicit bitset(const _CharT* __str, - typename basic_string<_CharT>::size_type __n = basic_string<_CharT>::npos, - _CharT __zero = _CharT('0'), _CharT __one = _CharT('1')); - template - explicit bitset(const basic_string<_CharT,_Traits,_Allocator>& __str, - typename basic_string<_CharT,_Traits,_Allocator>::size_type __pos = 0, - typename basic_string<_CharT,_Traits,_Allocator>::size_type __n = - (basic_string<_CharT,_Traits,_Allocator>::npos), - _CharT __zero = _CharT('0'), _CharT __one = _CharT('1')); - - // 23.3.5.2 bitset operations: - _LIBCUDACXX_INLINE_VISIBILITY - bitset& operator&=(const bitset& __rhs) noexcept; - _LIBCUDACXX_INLINE_VISIBILITY - bitset& operator|=(const bitset& __rhs) noexcept; - _LIBCUDACXX_INLINE_VISIBILITY - bitset& operator^=(const bitset& __rhs) noexcept; - bitset& operator<<=(size_t __pos) noexcept; - bitset& operator>>=(size_t __pos) noexcept; - _LIBCUDACXX_INLINE_VISIBILITY - bitset& set() noexcept; - bitset& set(size_t __pos, bool __val = true); - _LIBCUDACXX_INLINE_VISIBILITY - bitset& reset() noexcept; - bitset& reset(size_t __pos); - _LIBCUDACXX_INLINE_VISIBILITY - bitset operator~() const noexcept; - _LIBCUDACXX_INLINE_VISIBILITY - bitset& flip() noexcept; - bitset& flip(size_t __pos); - - // element access: - _LIBCUDACXX_INLINE_VISIBILITY constexpr - const_reference operator[](size_t __p) const {return base::__make_ref(__p);} - _LIBCUDACXX_INLINE_VISIBILITY reference operator[](size_t __p) {return base::__make_ref(__p);} - _LIBCUDACXX_INLINE_VISIBILITY - unsigned long to_ulong() const; - _LIBCUDACXX_INLINE_VISIBILITY - unsigned long long to_ullong() const; - template - basic_string<_CharT, _Traits, _Allocator> to_string(_CharT __zero = _CharT('0'), - _CharT __one = _CharT('1')) const; - template - _LIBCUDACXX_INLINE_VISIBILITY - basic_string<_CharT, _Traits, allocator<_CharT> > to_string(_CharT __zero = _CharT('0'), - _CharT __one = _CharT('1')) const; - template - _LIBCUDACXX_INLINE_VISIBILITY - basic_string<_CharT, char_traits<_CharT>, allocator<_CharT> > to_string(_CharT __zero = _CharT('0'), - _CharT __one = _CharT('1')) const; - _LIBCUDACXX_INLINE_VISIBILITY - basic_string, allocator > to_string(char __zero = '0', - char __one = '1') const; - _LIBCUDACXX_INLINE_VISIBILITY - size_t count() const noexcept; - _LIBCUDACXX_INLINE_VISIBILITY constexpr size_t size() const noexcept {return _Size;} - _LIBCUDACXX_INLINE_VISIBILITY - bool operator==(const bitset& __rhs) const noexcept; - _LIBCUDACXX_INLINE_VISIBILITY - bool operator!=(const bitset& __rhs) const noexcept; - bool test(size_t __pos) const; - _LIBCUDACXX_INLINE_VISIBILITY - bool all() const noexcept; - _LIBCUDACXX_INLINE_VISIBILITY - bool any() const noexcept; - _LIBCUDACXX_INLINE_VISIBILITY bool none() const noexcept {return !any();} - _LIBCUDACXX_INLINE_VISIBILITY - bitset operator<<(size_t __pos) const noexcept; - _LIBCUDACXX_INLINE_VISIBILITY - bitset operator>>(size_t __pos) const noexcept; + typedef typename base::reference reference; + typedef typename base::const_reference const_reference; + + // 23.3.5.1 constructors: + _LIBCUDACXX_INLINE_VISIBILITY constexpr bitset() noexcept {} + _LIBCUDACXX_INLINE_VISIBILITY constexpr bitset(unsigned long long __v) noexcept + : base(__v) + {} + template ::value>> + explicit bitset(const _CharT* __str, + typename basic_string<_CharT>::size_type __n = basic_string<_CharT>::npos, + _CharT __zero = _CharT('0'), + _CharT __one = _CharT('1')); + template + explicit bitset(const basic_string<_CharT, _Traits, _Allocator>& __str, + typename basic_string<_CharT, _Traits, _Allocator>::size_type __pos = 0, + typename basic_string<_CharT, _Traits, _Allocator>::size_type __n = + (basic_string<_CharT, _Traits, _Allocator>::npos), + _CharT __zero = _CharT('0'), + _CharT __one = _CharT('1')); + + // 23.3.5.2 bitset operations: + _LIBCUDACXX_INLINE_VISIBILITY bitset& operator&=(const bitset& __rhs) noexcept; + _LIBCUDACXX_INLINE_VISIBILITY bitset& operator|=(const bitset& __rhs) noexcept; + _LIBCUDACXX_INLINE_VISIBILITY bitset& operator^=(const bitset& __rhs) noexcept; + bitset& operator<<=(size_t __pos) noexcept; + bitset& operator>>=(size_t __pos) noexcept; + _LIBCUDACXX_INLINE_VISIBILITY bitset& set() noexcept; + bitset& set(size_t __pos, bool __val = true); + _LIBCUDACXX_INLINE_VISIBILITY bitset& reset() noexcept; + bitset& reset(size_t __pos); + _LIBCUDACXX_INLINE_VISIBILITY bitset operator~() const noexcept; + _LIBCUDACXX_INLINE_VISIBILITY bitset& flip() noexcept; + bitset& flip(size_t __pos); + + // element access: + _LIBCUDACXX_INLINE_VISIBILITY constexpr const_reference operator[](size_t __p) const + { + return base::__make_ref(__p); + } + _LIBCUDACXX_INLINE_VISIBILITY reference operator[](size_t __p) + { + return base::__make_ref(__p); + } + _LIBCUDACXX_INLINE_VISIBILITY unsigned long to_ulong() const; + _LIBCUDACXX_INLINE_VISIBILITY unsigned long long to_ullong() const; + template + basic_string<_CharT, _Traits, _Allocator> to_string(_CharT __zero = _CharT('0'), _CharT __one = _CharT('1')) const; + template + _LIBCUDACXX_INLINE_VISIBILITY basic_string<_CharT, _Traits, allocator<_CharT>> + to_string(_CharT __zero = _CharT('0'), _CharT __one = _CharT('1')) const; + template + _LIBCUDACXX_INLINE_VISIBILITY basic_string<_CharT, char_traits<_CharT>, allocator<_CharT>> + to_string(_CharT __zero = _CharT('0'), _CharT __one = _CharT('1')) const; + _LIBCUDACXX_INLINE_VISIBILITY basic_string, allocator> + to_string(char __zero = '0', char __one = '1') const; + _LIBCUDACXX_INLINE_VISIBILITY size_t count() const noexcept; + _LIBCUDACXX_INLINE_VISIBILITY constexpr size_t size() const noexcept + { + return _Size; + } + _LIBCUDACXX_INLINE_VISIBILITY bool operator==(const bitset& __rhs) const noexcept; + _LIBCUDACXX_INLINE_VISIBILITY bool operator!=(const bitset& __rhs) const noexcept; + bool test(size_t __pos) const; + _LIBCUDACXX_INLINE_VISIBILITY bool all() const noexcept; + _LIBCUDACXX_INLINE_VISIBILITY bool any() const noexcept; + _LIBCUDACXX_INLINE_VISIBILITY bool none() const noexcept + { + return !any(); + } + _LIBCUDACXX_INLINE_VISIBILITY bitset operator<<(size_t __pos) const noexcept; + _LIBCUDACXX_INLINE_VISIBILITY bitset operator>>(size_t __pos) const noexcept; private: + _LIBCUDACXX_INLINE_VISIBILITY size_t __hash_code() const noexcept + { + return base::__hash_code(); + } - _LIBCUDACXX_INLINE_VISIBILITY - size_t __hash_code() const noexcept {return base::__hash_code();} - - friend struct hash; + friend struct hash; }; template -template -bitset<_Size>::bitset(const _CharT* __str, - typename basic_string<_CharT>::size_type __n, - _CharT __zero, _CharT __one) +template +bitset<_Size>::bitset(const _CharT* __str, typename basic_string<_CharT>::size_type __n, _CharT __zero, _CharT __one) { - size_t __rlen = _CUDA_VSTD::min(__n, char_traits<_CharT>::length(__str)); - for (size_t __i = 0; __i < __rlen; ++__i) - if (__str[__i] != __zero && __str[__i] != __one) - __throw_invalid_argument("bitset string ctor has invalid argument"); - - size_t _Mp = _CUDA_VSTD::min(__rlen, _Size); - size_t __i = 0; - for (; __i < _Mp; ++__i) + size_t __rlen = _CUDA_VSTD::min(__n, char_traits<_CharT>::length(__str)); + for (size_t __i = 0; __i < __rlen; ++__i) + { + if (__str[__i] != __zero && __str[__i] != __one) { - _CharT __c = __str[_Mp - 1 - __i]; - if (__c == __zero) - (*this)[__i] = false; - else - (*this)[__i] = true; + __throw_invalid_argument("bitset string ctor has invalid argument"); } - _CUDA_VSTD::fill(base::__make_iter(__i), base::__make_iter(_Size), false); + } + + size_t _Mp = _CUDA_VSTD::min(__rlen, _Size); + size_t __i = 0; + for (; __i < _Mp; ++__i) + { + _CharT __c = __str[_Mp - 1 - __i]; + if (__c == __zero) + { + (*this)[__i] = false; + } + else + { + (*this)[__i] = true; + } + } + _CUDA_VSTD::fill(base::__make_iter(__i), base::__make_iter(_Size), false); } template -template -bitset<_Size>::bitset(const basic_string<_CharT,_Traits,_Allocator>& __str, - typename basic_string<_CharT,_Traits,_Allocator>::size_type __pos, - typename basic_string<_CharT,_Traits,_Allocator>::size_type __n, - _CharT __zero, _CharT __one) -{ - if (__pos > __str.size()) - __throw_out_of_range("bitset string pos out of range"); - - size_t __rlen = _CUDA_VSTD::min(__n, __str.size() - __pos); - for (size_t __i = __pos; __i < __pos + __rlen; ++__i) - if (!_Traits::eq(__str[__i], __zero) && !_Traits::eq(__str[__i], __one)) - __throw_invalid_argument("bitset string ctor has invalid argument"); - - size_t _Mp = _CUDA_VSTD::min(__rlen, _Size); - size_t __i = 0; - for (; __i < _Mp; ++__i) +template +bitset<_Size>::bitset( + const basic_string<_CharT, _Traits, _Allocator>& __str, + typename basic_string<_CharT, _Traits, _Allocator>::size_type __pos, + typename basic_string<_CharT, _Traits, _Allocator>::size_type __n, + _CharT __zero, + _CharT __one) +{ + if (__pos > __str.size()) + { + __throw_out_of_range("bitset string pos out of range"); + } + + size_t __rlen = _CUDA_VSTD::min(__n, __str.size() - __pos); + for (size_t __i = __pos; __i < __pos + __rlen; ++__i) + { + if (!_Traits::eq(__str[__i], __zero) && !_Traits::eq(__str[__i], __one)) + { + __throw_invalid_argument("bitset string ctor has invalid argument"); + } + } + + size_t _Mp = _CUDA_VSTD::min(__rlen, _Size); + size_t __i = 0; + for (; __i < _Mp; ++__i) + { + _CharT __c = __str[__pos + _Mp - 1 - __i]; + if (_Traits::eq(__c, __zero)) { - _CharT __c = __str[__pos + _Mp - 1 - __i]; - if (_Traits::eq(__c, __zero)) - (*this)[__i] = false; - else - (*this)[__i] = true; + (*this)[__i] = false; } - _CUDA_VSTD::fill(base::__make_iter(__i), base::__make_iter(_Size), false); + else + { + (*this)[__i] = true; + } + } + _CUDA_VSTD::fill(base::__make_iter(__i), base::__make_iter(_Size), false); } template -inline -bitset<_Size>& -bitset<_Size>::operator&=(const bitset& __rhs) noexcept +inline bitset<_Size>& bitset<_Size>::operator&=(const bitset& __rhs) noexcept { - base::operator&=(__rhs); - return *this; + base::operator&=(__rhs); + return *this; } template -inline -bitset<_Size>& -bitset<_Size>::operator|=(const bitset& __rhs) noexcept +inline bitset<_Size>& bitset<_Size>::operator|=(const bitset& __rhs) noexcept { - base::operator|=(__rhs); - return *this; + base::operator|=(__rhs); + return *this; } template -inline -bitset<_Size>& -bitset<_Size>::operator^=(const bitset& __rhs) noexcept +inline bitset<_Size>& bitset<_Size>::operator^=(const bitset& __rhs) noexcept { - base::operator^=(__rhs); - return *this; + base::operator^=(__rhs); + return *this; } template -bitset<_Size>& -bitset<_Size>::operator<<=(size_t __pos) noexcept +bitset<_Size>& bitset<_Size>::operator<<=(size_t __pos) noexcept { - __pos = _CUDA_VSTD::min(__pos, _Size); - _CUDA_VSTD::copy_backward(base::__make_iter(0), base::__make_iter(_Size - __pos), base::__make_iter(_Size)); - _CUDA_VSTD::fill_n(base::__make_iter(0), __pos, false); - return *this; + __pos = _CUDA_VSTD::min(__pos, _Size); + _CUDA_VSTD::copy_backward(base::__make_iter(0), base::__make_iter(_Size - __pos), base::__make_iter(_Size)); + _CUDA_VSTD::fill_n(base::__make_iter(0), __pos, false); + return *this; } template -bitset<_Size>& -bitset<_Size>::operator>>=(size_t __pos) noexcept +bitset<_Size>& bitset<_Size>::operator>>=(size_t __pos) noexcept { - __pos = _CUDA_VSTD::min(__pos, _Size); - _CUDA_VSTD::copy(base::__make_iter(__pos), base::__make_iter(_Size), base::__make_iter(0)); - _CUDA_VSTD::fill_n(base::__make_iter(_Size - __pos), __pos, false); - return *this; + __pos = _CUDA_VSTD::min(__pos, _Size); + _CUDA_VSTD::copy(base::__make_iter(__pos), base::__make_iter(_Size), base::__make_iter(0)); + _CUDA_VSTD::fill_n(base::__make_iter(_Size - __pos), __pos, false); + return *this; } template -inline -bitset<_Size>& -bitset<_Size>::set() noexcept +inline bitset<_Size>& bitset<_Size>::set() noexcept { - _CUDA_VSTD::fill_n(base::__make_iter(0), _Size, true); - return *this; + _CUDA_VSTD::fill_n(base::__make_iter(0), _Size, true); + return *this; } template -bitset<_Size>& -bitset<_Size>::set(size_t __pos, bool __val) +bitset<_Size>& bitset<_Size>::set(size_t __pos, bool __val) { - if (__pos >= _Size) - __throw_out_of_range("bitset set argument out of range"); + if (__pos >= _Size) + { + __throw_out_of_range("bitset set argument out of range"); + } - (*this)[__pos] = __val; - return *this; + (*this)[__pos] = __val; + return *this; } template -inline -bitset<_Size>& -bitset<_Size>::reset() noexcept +inline bitset<_Size>& bitset<_Size>::reset() noexcept { - _CUDA_VSTD::fill_n(base::__make_iter(0), _Size, false); - return *this; + _CUDA_VSTD::fill_n(base::__make_iter(0), _Size, false); + return *this; } template -bitset<_Size>& -bitset<_Size>::reset(size_t __pos) +bitset<_Size>& bitset<_Size>::reset(size_t __pos) { - if (__pos >= _Size) - __throw_out_of_range("bitset reset argument out of range"); + if (__pos >= _Size) + { + __throw_out_of_range("bitset reset argument out of range"); + } - (*this)[__pos] = false; - return *this; + (*this)[__pos] = false; + return *this; } template -inline -bitset<_Size> -bitset<_Size>::operator~() const noexcept +inline bitset<_Size> bitset<_Size>::operator~() const noexcept { - bitset __x(*this); - __x.flip(); - return __x; + bitset __x(*this); + __x.flip(); + return __x; } template -inline -bitset<_Size>& -bitset<_Size>::flip() noexcept +inline bitset<_Size>& bitset<_Size>::flip() noexcept { - base::flip(); - return *this; + base::flip(); + return *this; } template -bitset<_Size>& -bitset<_Size>::flip(size_t __pos) +bitset<_Size>& bitset<_Size>::flip(size_t __pos) { - if (__pos >= _Size) - __throw_out_of_range("bitset flip argument out of range"); + if (__pos >= _Size) + { + __throw_out_of_range("bitset flip argument out of range"); + } - reference r = base::__make_ref(__pos); - r = ~r; - return *this; + reference r = base::__make_ref(__pos); + r = ~r; + return *this; } template -inline -unsigned long -bitset<_Size>::to_ulong() const +inline unsigned long bitset<_Size>::to_ulong() const { - return base::to_ulong(); + return base::to_ulong(); } template -inline -unsigned long long -bitset<_Size>::to_ullong() const +inline unsigned long long bitset<_Size>::to_ullong() const { - return base::to_ullong(); + return base::to_ullong(); } template template -basic_string<_CharT, _Traits, _Allocator> -bitset<_Size>::to_string(_CharT __zero, _CharT __one) const +basic_string<_CharT, _Traits, _Allocator> bitset<_Size>::to_string(_CharT __zero, _CharT __one) const { - basic_string<_CharT, _Traits, _Allocator> __r(_Size, __zero); - for (size_t __i = 0; __i < _Size; ++__i) + basic_string<_CharT, _Traits, _Allocator> __r(_Size, __zero); + for (size_t __i = 0; __i < _Size; ++__i) + { + if ((*this)[__i]) { - if ((*this)[__i]) - __r[_Size - 1 - __i] = __one; + __r[_Size - 1 - __i] = __one; } - return __r; + } + return __r; } template template -inline -basic_string<_CharT, _Traits, allocator<_CharT> > -bitset<_Size>::to_string(_CharT __zero, _CharT __one) const +inline basic_string<_CharT, _Traits, allocator<_CharT>> bitset<_Size>::to_string(_CharT __zero, _CharT __one) const { - return to_string<_CharT, _Traits, allocator<_CharT> >(__zero, __one); + return to_string<_CharT, _Traits, allocator<_CharT>>(__zero, __one); } template template -inline -basic_string<_CharT, char_traits<_CharT>, allocator<_CharT> > +inline basic_string<_CharT, char_traits<_CharT>, allocator<_CharT>> bitset<_Size>::to_string(_CharT __zero, _CharT __one) const { - return to_string<_CharT, char_traits<_CharT>, allocator<_CharT> >(__zero, __one); + return to_string<_CharT, char_traits<_CharT>, allocator<_CharT>>(__zero, __one); } template -inline -basic_string, allocator > -bitset<_Size>::to_string(char __zero, char __one) const +inline basic_string, allocator> bitset<_Size>::to_string(char __zero, char __one) const { - return to_string, allocator >(__zero, __one); + return to_string, allocator>(__zero, __one); } template -inline -size_t -bitset<_Size>::count() const noexcept +inline size_t bitset<_Size>::count() const noexcept { - return static_cast(__count_bool_true(base::__make_iter(0), _Size)); + return static_cast(__count_bool_true(base::__make_iter(0), _Size)); } template -inline -bool -bitset<_Size>::operator==(const bitset& __rhs) const noexcept +inline bool bitset<_Size>::operator==(const bitset& __rhs) const noexcept { - return _CUDA_VSTD::equal(base::__make_iter(0), base::__make_iter(_Size), __rhs.__make_iter(0)); + return _CUDA_VSTD::equal(base::__make_iter(0), base::__make_iter(_Size), __rhs.__make_iter(0)); } template -inline -bool -bitset<_Size>::operator!=(const bitset& __rhs) const noexcept +inline bool bitset<_Size>::operator!=(const bitset& __rhs) const noexcept { - return !(*this == __rhs); + return !(*this == __rhs); } template -bool -bitset<_Size>::test(size_t __pos) const +bool bitset<_Size>::test(size_t __pos) const { - if (__pos >= _Size) - __throw_out_of_range("bitset test argument out of range"); + if (__pos >= _Size) + { + __throw_out_of_range("bitset test argument out of range"); + } - return (*this)[__pos]; + return (*this)[__pos]; } template -inline -bool -bitset<_Size>::all() const noexcept +inline bool bitset<_Size>::all() const noexcept { - return base::all(); + return base::all(); } template -inline -bool -bitset<_Size>::any() const noexcept +inline bool bitset<_Size>::any() const noexcept { - return base::any(); + return base::any(); } template -inline -bitset<_Size> -bitset<_Size>::operator<<(size_t __pos) const noexcept +inline bitset<_Size> bitset<_Size>::operator<<(size_t __pos) const noexcept { - bitset __r = *this; - __r <<= __pos; - return __r; + bitset __r = *this; + __r <<= __pos; + return __r; } template -inline -bitset<_Size> -bitset<_Size>::operator>>(size_t __pos) const noexcept +inline bitset<_Size> bitset<_Size>::operator>>(size_t __pos) const noexcept { - bitset __r = *this; - __r >>= __pos; - return __r; + bitset __r = *this; + __r >>= __pos; + return __r; } template -inline _LIBCUDACXX_INLINE_VISIBILITY -bitset<_Size> -operator&(const bitset<_Size>& __x, const bitset<_Size>& __y) noexcept +inline _LIBCUDACXX_INLINE_VISIBILITY bitset<_Size> operator&(const bitset<_Size>& __x, const bitset<_Size>& __y) noexcept { - bitset<_Size> __r = __x; - __r &= __y; - return __r; + bitset<_Size> __r = __x; + __r &= __y; + return __r; } template -inline _LIBCUDACXX_INLINE_VISIBILITY -bitset<_Size> -operator|(const bitset<_Size>& __x, const bitset<_Size>& __y) noexcept +inline _LIBCUDACXX_INLINE_VISIBILITY bitset<_Size> operator|(const bitset<_Size>& __x, const bitset<_Size>& __y) noexcept { - bitset<_Size> __r = __x; - __r |= __y; - return __r; + bitset<_Size> __r = __x; + __r |= __y; + return __r; } template -inline _LIBCUDACXX_INLINE_VISIBILITY -bitset<_Size> -operator^(const bitset<_Size>& __x, const bitset<_Size>& __y) noexcept +inline _LIBCUDACXX_INLINE_VISIBILITY bitset<_Size> operator^(const bitset<_Size>& __x, const bitset<_Size>& __y) noexcept { - bitset<_Size> __r = __x; - __r ^= __y; - return __r; + bitset<_Size> __r = __x; + __r ^= __y; + return __r; } template -struct _LIBCUDACXX_TEMPLATE_VIS hash > - : public __unary_function, size_t> +struct _LIBCUDACXX_TEMPLATE_VIS hash> : public __unary_function, size_t> { - _LIBCUDACXX_INLINE_VISIBILITY - size_t operator()(const bitset<_Size>& __bs) const noexcept - {return __bs.__hash_code();} + _LIBCUDACXX_INLINE_VISIBILITY size_t operator()(const bitset<_Size>& __bs) const noexcept + { + return __bs.__hash_code(); + } }; template -basic_istream<_CharT, _Traits>& -operator>>(basic_istream<_CharT, _Traits>& __is, bitset<_Size>& __x); +basic_istream<_CharT, _Traits>& operator>>(basic_istream<_CharT, _Traits>& __is, bitset<_Size>& __x); template -basic_ostream<_CharT, _Traits>& -operator<<(basic_ostream<_CharT, _Traits>& __os, const bitset<_Size>& __x); +basic_ostream<_CharT, _Traits>& operator<<(basic_ostream<_CharT, _Traits>& __os, const bitset<_Size>& __x); _LIBCUDACXX_END_NAMESPACE_STD _LIBCUDACXX_POP_MACROS -#endif // _LIBCUDACXX_BITSET +#endif // _LIBCUDACXX_BITSET diff --git a/libcudacxx/include/cuda/std/detail/libcxx/include/concepts b/libcudacxx/include/cuda/std/detail/libcxx/include/concepts index 15f041190c0..24995197262 100644 --- a/libcudacxx/include/cuda/std/detail/libcxx/include/concepts +++ b/libcudacxx/include/cuda/std/detail/libcxx/include/concepts @@ -140,7 +140,6 @@ namespace std { # pragma system_header #endif // no system header -#include // all public C++ headers provide the assertion handler #include #include #include @@ -166,7 +165,7 @@ namespace std { #include #include #include - +#include // all public C++ headers provide the assertion handler #include #endif // _LIBCUDACXX_CONCEPTS diff --git a/libcudacxx/include/cuda/std/detail/libcxx/include/version b/libcudacxx/include/cuda/std/detail/libcxx/include/version index 9c81e18dcb2..08f33681920 100644 --- a/libcudacxx/include/cuda/std/detail/libcxx/include/version +++ b/libcudacxx/include/cuda/std/detail/libcxx/include/version @@ -10,7 +10,6 @@ #ifndef _LIBCUDACXX_VERSIONH #define _LIBCUDACXX_VERSIONH - /* version synopsis @@ -212,131 +211,131 @@ __cpp_lib_void_t 201411L // We need to define our own macros to not conflict with the host stl. // At the same time we want bring in all feature test macros from host #if __has_include() // should be the smallest include possible -#include +# include #elif !defined(_CCCL_COMPILER_NVRTC) -#include // otherwise go for the smallest possible header +# include // otherwise go for the smallest possible header #endif #if _CCCL_STD_VER > 2011 -# define __cccl_lib_chrono_udls 201304L -# define __cccl_lib_complex_udls 201309L -#ifdef _LIBCUDACXX_IS_CONSTANT_EVALUATED -# define __cccl_lib_constexpr_complex 201711L -#endif -# define __cccl_lib_concepts 202002L -# define __cccl_lib_exchange_function 201304L -# define __cccl_lib_expected 202211L +# define __cccl_lib_chrono_udls 201304L +# define __cccl_lib_complex_udls 201309L +# ifdef _LIBCUDACXX_IS_CONSTANT_EVALUATED +# define __cccl_lib_constexpr_complex 201711L +# endif +# define __cccl_lib_concepts 202002L +# define __cccl_lib_exchange_function 201304L +# define __cccl_lib_expected 202211L // # define __cccl_lib_generic_associative_lookup 201304L -# define __cccl_lib_integer_sequence 201304L -# define __cccl_lib_integral_constant_callable 201304L -# define __cccl_lib_is_final 201402L -# define __cccl_lib_is_null_pointer 201309L -# define __cccl_lib_make_reverse_iterator 201402L +# define __cccl_lib_integer_sequence 201304L +# define __cccl_lib_integral_constant_callable 201304L +# define __cccl_lib_is_final 201402L +# define __cccl_lib_is_null_pointer 201309L +# define __cccl_lib_make_reverse_iterator 201402L // # define __cccl_lib_make_unique 201304L -# define __cccl_lib_null_iterators 201304L -# define __cccl_lib_optional 202110L +# define __cccl_lib_null_iterators 201304L +# define __cccl_lib_optional 202110L // # define __cccl_lib_quoted_string_io 201304L -# define __cccl_lib_result_of_sfinae 201210L -# define __cccl_lib_robust_nonmodifying_seq_ops 201304L -# if !defined(_LIBCUDACXX_HAS_NO_THREADS) +# define __cccl_lib_result_of_sfinae 201210L +# define __cccl_lib_robust_nonmodifying_seq_ops 201304L +# if !defined(_LIBCUDACXX_HAS_NO_THREADS) // # define __cccl_lib_shared_timed_mutex 201402L -# endif -# define __cccl_lib_span 202002L +# endif +# define __cccl_lib_span 202002L // # define __cccl_lib_string_udls 201304L -# define __cccl_lib_transformation_trait_aliases 201304L -# define __cccl_lib_transparent_operators 201210L -# define __cccl_lib_tuple_element_t 201402L -# define __cccl_lib_tuples_by_type 201304L +# define __cccl_lib_transformation_trait_aliases 201304L +# define __cccl_lib_transparent_operators 201210L +# define __cccl_lib_tuple_element_t 201402L +# define __cccl_lib_tuples_by_type 201304L #endif // _CCCL_STD_VER > 2011 #if _CCCL_STD_VER > 2014 -# if defined(_LIBCUDACXX_ADDRESSOF) -# define __cccl_lib_addressof_constexpr 201603L -# endif +# if defined(_LIBCUDACXX_ADDRESSOF) +# define __cccl_lib_addressof_constexpr 201603L +# endif // # define __cccl_lib_allocator_traits_is_always_equal 201411L // # define __cccl_lib_any 201606L -# define __cccl_lib_apply 201603L -# define __cccl_lib_array_constexpr 201603L -# define __cccl_lib_as_const 201510L -# if !defined(_LIBCUDACXX_HAS_NO_THREADS) -# define __cccl_lib_atomic_is_always_lock_free 201603L -# endif -# define __cccl_lib_bind_front 201907L -# define __cccl_lib_bool_constant 201505L +# define __cccl_lib_apply 201603L +# define __cccl_lib_array_constexpr 201603L +# define __cccl_lib_as_const 201510L +# if !defined(_LIBCUDACXX_HAS_NO_THREADS) +# define __cccl_lib_atomic_is_always_lock_free 201603L +# endif +# define __cccl_lib_bind_front 201907L +# define __cccl_lib_bool_constant 201505L // # define __cccl_lib_boyer_moore_searcher 201603L -# define __cccl_lib_byte 201603L -# define __cccl_lib_chrono 201611L +# define __cccl_lib_byte 201603L +# define __cccl_lib_chrono 201611L // # define __cccl_lib_clamp 201603L // # define __cccl_lib_enable_shared_from_this 201603L // # define __cccl_lib_execution 201603L // # define __cccl_lib_filesystem 201703L -# define __cccl_lib_gcd_lcm 201606L -# define __cccl_lib_hardware_interference_size 201703L -# if defined(_LIBCUDACXX_HAS_UNIQUE_OBJECT_REPRESENTATIONS) -# define __cccl_lib_has_unique_object_representations 201606L -# endif -# define __cccl_lib_hypot 201603L +# define __cccl_lib_gcd_lcm 201606L +# define __cccl_lib_hardware_interference_size 201703L +# if defined(_LIBCUDACXX_HAS_UNIQUE_OBJECT_REPRESENTATIONS) +# define __cccl_lib_has_unique_object_representations 201606L +# endif +# define __cccl_lib_hypot 201603L // # define __cccl_lib_incomplete_container_elements 201505L -# define __cccl_lib_invoke 201411L -# if !defined(_LIBCUDACXX_HAS_NO_IS_AGGREGATE) -# define __cccl_lib_is_aggregate 201703L -# endif -# define __cccl_lib_is_invocable 201703L -# define __cccl_lib_is_swappable 201603L -# define __cccl_lib_launder 201606L -# define __cccl_lib_logical_traits 201510L -# define __cccl_lib_make_from_tuple 201606L +# define __cccl_lib_invoke 201411L +# if !defined(_LIBCUDACXX_HAS_NO_IS_AGGREGATE) +# define __cccl_lib_is_aggregate 201703L +# endif +# define __cccl_lib_is_invocable 201703L +# define __cccl_lib_is_swappable 201603L +# define __cccl_lib_launder 201606L +# define __cccl_lib_logical_traits 201510L +# define __cccl_lib_make_from_tuple 201606L // # define __cccl_lib_map_try_emplace 201411L // # define __cccl_lib_math_special_functions 201603L // # define __cccl_lib_memory_resource 201603L // # define __cccl_lib_node_extract 201606L // # define __cccl_lib_nonmember_container_access 201411L -# define __cccl_lib_not_fn 201603L +# define __cccl_lib_not_fn 201603L // # define __cccl_lib_parallel_algorithm 201603L // # define __cccl_lib_raw_memory_algorithms 201606L // # define __cccl_lib_sample 201603L // # define __cccl_lib_scoped_lock 201703L -# if !defined(_LIBCUDACXX_HAS_NO_THREADS) +# if !defined(_LIBCUDACXX_HAS_NO_THREADS) // # define __cccl_lib_shared_mutex 201505L -# endif +# endif // # define __cccl_lib_shared_ptr_arrays 201611L // # define __cccl_lib_shared_ptr_weak_type 201606L // # define __cccl_lib_string_view 201606L // # define __cccl_lib_to_chars 201611L -# define __cccl_lib_type_trait_variable_templates 201510L -# define __cccl_lib_uncaught_exceptions 201411L -# define __cccl_lib_unordered_map_try_emplace 201411L -# define __cccl_lib_variant 201606L -# define __cccl_lib_void_t 201411L +# define __cccl_lib_type_trait_variable_templates 201510L +# define __cccl_lib_uncaught_exceptions 201411L +# define __cccl_lib_unordered_map_try_emplace 201411L +# define __cccl_lib_variant 201606L +# define __cccl_lib_void_t 201411L #endif // _CCCL_STD_VER > 2014 #if _CCCL_STD_VER > 2017 -# undef __cccl_lib_array_constexpr -# define __cccl_lib_array_constexpr 201811L +# undef __cccl_lib_array_constexpr +# define __cccl_lib_array_constexpr 201811L // # define __cccl_lib_assume_aligned 201811L -# define __cccl_lib_atomic_flag_test 201907L -# define __cccl_lib_atomic_float 201711L -# define __cccl_lib_atomic_lock_free_type_aliases 201907L -# if !defined(_LIBCUDACXX_HAS_NO_THREADS) -# define __cccl_lib_atomic_ref 201806L -#endif +# define __cccl_lib_atomic_flag_test 201907L +# define __cccl_lib_atomic_float 201711L +# define __cccl_lib_atomic_lock_free_type_aliases 201907L +# if !defined(_LIBCUDACXX_HAS_NO_THREADS) +# define __cccl_lib_atomic_ref 201806L +# endif // # define __cccl_lib_atomic_shared_ptr 201711L -# define __cccl_lib_atomic_value_initialization 201911L -# if !defined(_LIBCUDACXX_AVAILABILITY_DISABLE_FTM___cpp_lib_atomic_wait) -# define __cccl_lib_atomic_wait 201907L -# endif -# if !defined(_LIBCUDACXX_HAS_NO_THREADS) && !defined(_LIBCUDACXX_AVAILABILITY_DISABLE_FTM___cpp_lib_barrier) -# define __cccl_lib_barrier 201907L -# endif -# define __cccl_lib_bit_cast 201806L -# define __cccl_lib_bitops 201907L -# define __cccl_lib_bounded_array_traits 201902L -# if !defined(_LIBCUDACXX_NO_HAS_CHAR8_T) -# define __cccl_lib_char8_t 201811L -# endif +# define __cccl_lib_atomic_value_initialization 201911L +# if !defined(_LIBCUDACXX_AVAILABILITY_DISABLE_FTM___cpp_lib_atomic_wait) +# define __cccl_lib_atomic_wait 201907L +# endif +# if !defined(_LIBCUDACXX_HAS_NO_THREADS) && !defined(_LIBCUDACXX_AVAILABILITY_DISABLE_FTM___cpp_lib_barrier) +# define __cccl_lib_barrier 201907L +# endif +# define __cccl_lib_bit_cast 201806L +# define __cccl_lib_bitops 201907L +# define __cccl_lib_bounded_array_traits 201902L +# if !defined(_LIBCUDACXX_NO_HAS_CHAR8_T) +# define __cccl_lib_char8_t 201811L +# endif // # define __cccl_lib_constexpr_algorithms 201806L // # define __cccl_lib_constexpr_dynamic_alloc 201907L -# define __cccl_lib_constexpr_functional 201907L +# define __cccl_lib_constexpr_functional 201907L // # define __cccl_lib_constexpr_iterator 201811L // # define __cccl_lib_constexpr_memory 201811L // # define __cccl_lib_constexpr_misc 201811L @@ -348,40 +347,41 @@ __cpp_lib_void_t 201411L // # define __cccl_lib_constexpr_utility 201811L // # define __cccl_lib_constexpr_vector 201907L // # define __cccl_lib_coroutine 201902L -# if defined(__cpp_impl_destroying_delete) && __cpp_impl_destroying_delete >= 201806L && defined(__cpp_lib_destroying_delete) -# define __cccl_lib_destroying_delete 201806L -# endif +# if defined(__cpp_impl_destroying_delete) && __cpp_impl_destroying_delete >= 201806L \ + && defined(__cpp_lib_destroying_delete) +# define __cccl_lib_destroying_delete 201806L +# endif // # define __cccl_lib_endian 201907L // # define __cccl_lib_erase_if 201811L // # undef __cccl_lib_execution // # define __cccl_lib_execution 201902L -# if !defined(_LIBCUDACXX_AVAILABILITY_DISABLE_FTM___cpp_lib_format) && !defined(_LIBCUDACXX_HAS_NO_INCOMPLETE_FORMAT) +# if !defined(_LIBCUDACXX_AVAILABILITY_DISABLE_FTM___cpp_lib_format) && !defined(_LIBCUDACXX_HAS_NO_INCOMPLETE_FORMAT) // # define __cccl_lib_format 202106L -# endif +# endif // # define __cccl_lib_generic_unordered_lookup 201811L // # define __cccl_lib_int_pow2 202002L // # define __cccl_lib_integer_comparison_functions 202002L // # define __cccl_lib_interpolate 201902L -# if defined(_LIBCUDACXX_IS_CONSTANT_EVALUATED) -# define __cccl_lib_is_constant_evaluated 201811L -# endif +# if defined(_LIBCUDACXX_IS_CONSTANT_EVALUATED) +# define __cccl_lib_is_constant_evaluated 201811L +# endif // # define __cccl_lib_is_layout_compatible 201907L -# define __cccl_lib_is_nothrow_convertible 201806L +# define __cccl_lib_is_nothrow_convertible 201806L // # define __cccl_lib_is_pointer_interconvertible 201907L -# if !defined(_LIBCUDACXX_HAS_NO_THREADS) +# if !defined(_LIBCUDACXX_HAS_NO_THREADS) // # define __cccl_lib_jthread 201911L -# endif -# if !defined(_LIBCUDACXX_HAS_NO_THREADS) && !defined(_LIBCUDACXX_AVAILABILITY_DISABLE_FTM___cpp_lib_latch) +# endif +# if !defined(_LIBCUDACXX_HAS_NO_THREADS) && !defined(_LIBCUDACXX_AVAILABILITY_DISABLE_FTM___cpp_lib_latch) // # define __cccl_lib_latch 201907L -# endif +# endif // # define __cccl_lib_list_remove_return_type 201806L // # define __cccl_lib_math_constants 201907L // # define __cccl_lib_polymorphic_allocator 201902L // # define __cccl_lib_ranges 201811L // # define __cccl_lib_remove_cvref 201711L -# if !defined(_LIBCUDACXX_HAS_NO_THREADS) && !defined(_LIBCUDACXX_AVAILABILITY_DISABLE_FTM___cpp_lib_semaphore) +# if !defined(_LIBCUDACXX_HAS_NO_THREADS) && !defined(_LIBCUDACXX_AVAILABILITY_DISABLE_FTM___cpp_lib_semaphore) // # define __cccl_lib_semaphore 201907L -# endif +# endif // # undef __cccl_lib_shared_ptr_arrays // # define __cccl_lib_shared_ptr_arrays 201707L // # define __cccl_lib_shift 201806L @@ -396,7 +396,7 @@ __cpp_lib_void_t 201411L // # define __cccl_lib_to_address 201711L // # define __cccl_lib_to_array 201907L // # define __cccl_lib_type_identity 201806L -# define __cccl_lib_unwrap_ref 201811L +# define __cccl_lib_unwrap_ref 201811L #endif // _CCCL_STD_VER > 2017 #if _CCCL_STD_VER > 2020 @@ -411,9 +411,9 @@ __cpp_lib_void_t 201411L // # undef __cccl_lib_constexpr_memory // # define __cccl_lib_constexpr_memory 202202L // # define __cccl_lib_constexpr_typeinfo 202106L -# define __cccl_lib_forward_like 202207L +# define __cccl_lib_forward_like 202207L // # define __cccl_lib_invoke_r 202106L -# define __cccl_lib_is_scoped_enum 202011L +# define __cccl_lib_is_scoped_enum 202011L // # define __cccl_lib_move_only_function 202110L // # define __cccl_lib_out_ptr 202106L // # define __cccl_lib_ranges_chunk 202202L @@ -430,8 +430,8 @@ __cpp_lib_void_t 201411L // # define __cccl_lib_stdatomic_h 202011L // # define __cccl_lib_string_contains 202011L // # define __cccl_lib_string_resize_and_overwrite 202110L -# define __cccl_lib_to_underlying 202102L -# define __cccl_lib_unreachable 202202L +# define __cccl_lib_to_underlying 202102L +# define __cccl_lib_unreachable 202202L #endif // _CCCL_STD_VER > 2020 diff --git a/libcudacxx/include/cuda/std/expected b/libcudacxx/include/cuda/std/expected index 9469e699f34..1448d03e6f5 100644 --- a/libcudacxx/include/cuda/std/expected +++ b/libcudacxx/include/cuda/std/expected @@ -12,10 +12,20 @@ #include +#if defined(_CCCL_IMPLICIT_SYSTEM_HEADER_GCC) +# pragma GCC system_header +#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_CLANG) +# pragma clang system_header +#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_MSVC) +# pragma system_header +#endif // no system header + +// clang-format off #include #include #include +// clang-format on #endif //_CUDA_STD_EXPECTED diff --git a/libcudacxx/include/cuda/std/functional b/libcudacxx/include/cuda/std/functional index 042e4b4b072..aa04b2b17ca 100644 --- a/libcudacxx/include/cuda/std/functional +++ b/libcudacxx/include/cuda/std/functional @@ -13,10 +13,20 @@ #include +#if defined(_CCCL_IMPLICIT_SYSTEM_HEADER_GCC) +# pragma GCC system_header +#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_CLANG) +# pragma clang system_header +#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_MSVC) +# pragma system_header +#endif // no system header + +// clang-format off #include #include #include +// clang-format on #endif // _CUDA_STD_FUNCTIONAL diff --git a/libcudacxx/include/cuda/std/initializer_list b/libcudacxx/include/cuda/std/initializer_list index 24296620e85..5bbda785a96 100644 --- a/libcudacxx/include/cuda/std/initializer_list +++ b/libcudacxx/include/cuda/std/initializer_list @@ -12,10 +12,20 @@ #include +#if defined(_CCCL_IMPLICIT_SYSTEM_HEADER_GCC) +# pragma GCC system_header +#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_CLANG) +# pragma clang system_header +#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_MSVC) +# pragma system_header +#endif // no system header + +// clang-format off #include #include #include +// clang-format on #endif // _CUDA_STD_INITIALIZER_LIST diff --git a/libcudacxx/include/cuda/std/iterator b/libcudacxx/include/cuda/std/iterator index 08568f18628..2bcceb7520e 100644 --- a/libcudacxx/include/cuda/std/iterator +++ b/libcudacxx/include/cuda/std/iterator @@ -12,10 +12,20 @@ #include +#if defined(_CCCL_IMPLICIT_SYSTEM_HEADER_GCC) +# pragma GCC system_header +#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_CLANG) +# pragma clang system_header +#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_MSVC) +# pragma system_header +#endif // no system header + +// clang-format off #include #include #include +// clang-format on #endif // _CUDA_STD_ITERATOR diff --git a/libcudacxx/include/cuda/std/latch b/libcudacxx/include/cuda/std/latch index fde9078ab4f..a01fd333116 100644 --- a/libcudacxx/include/cuda/std/latch +++ b/libcudacxx/include/cuda/std/latch @@ -8,19 +8,29 @@ // //===----------------------------------------------------------------------===// +#ifndef _CUDA_STD_LATCH +#define _CUDA_STD_LATCH + #if defined(__CUDA_ARCH__) && __CUDA_ARCH__ < 700 # error "CUDA synchronization primitives are only supported for sm_70 and up." #endif -#ifndef _CUDA_STD_LATCH -#define _CUDA_STD_LATCH - #include +#if defined(_CCCL_IMPLICIT_SYSTEM_HEADER_GCC) +# pragma GCC system_header +#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_CLANG) +# pragma clang system_header +#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_MSVC) +# pragma system_header +#endif // no system header + +// clang-format off #include #include #include +// clang-format on #endif // _CUDA_STD_LATCH diff --git a/libcudacxx/include/cuda/std/limits b/libcudacxx/include/cuda/std/limits index 16531da3da0..2ba9062d357 100644 --- a/libcudacxx/include/cuda/std/limits +++ b/libcudacxx/include/cuda/std/limits @@ -13,10 +13,20 @@ #include +#if defined(_CCCL_IMPLICIT_SYSTEM_HEADER_GCC) +# pragma GCC system_header +#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_CLANG) +# pragma clang system_header +#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_MSVC) +# pragma system_header +#endif // no system header + +// clang-format off #include #include #include +// clang-format on #endif // _CUDA_STD_LIMITS diff --git a/libcudacxx/include/cuda/std/mdspan b/libcudacxx/include/cuda/std/mdspan index e9522897ca0..1cc958bf22f 100644 --- a/libcudacxx/include/cuda/std/mdspan +++ b/libcudacxx/include/cuda/std/mdspan @@ -13,10 +13,20 @@ #include +#if defined(_CCCL_IMPLICIT_SYSTEM_HEADER_GCC) +# pragma GCC system_header +#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_CLANG) +# pragma clang system_header +#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_MSVC) +# pragma system_header +#endif // no system header + +// clang-format off #include #include #include +// clang-format on #endif // _CUDA_STD_MDSPAN diff --git a/libcudacxx/include/cuda/std/optional b/libcudacxx/include/cuda/std/optional index 5ecee7594fb..ca79559f6e4 100644 --- a/libcudacxx/include/cuda/std/optional +++ b/libcudacxx/include/cuda/std/optional @@ -12,10 +12,20 @@ #include +#if defined(_CCCL_IMPLICIT_SYSTEM_HEADER_GCC) +# pragma GCC system_header +#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_CLANG) +# pragma clang system_header +#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_MSVC) +# pragma system_header +#endif // no system header + +// clang-format off #include #include #include +// clang-format on #endif // _CUDA_STD_OPTIONAL diff --git a/libcudacxx/include/cuda/std/ranges b/libcudacxx/include/cuda/std/ranges index 56a06f65071..f8ea2dfed26 100644 --- a/libcudacxx/include/cuda/std/ranges +++ b/libcudacxx/include/cuda/std/ranges @@ -12,10 +12,20 @@ #include +#if defined(_CCCL_IMPLICIT_SYSTEM_HEADER_GCC) +# pragma GCC system_header +#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_CLANG) +# pragma clang system_header +#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_MSVC) +# pragma system_header +#endif // no system header + +// clang-format off #include #include #include +// clang-format on #endif //_CUDA_RANGES diff --git a/libcudacxx/include/cuda/std/ratio b/libcudacxx/include/cuda/std/ratio index 97425f38d1e..b737209b0dc 100644 --- a/libcudacxx/include/cuda/std/ratio +++ b/libcudacxx/include/cuda/std/ratio @@ -13,10 +13,20 @@ #include +#if defined(_CCCL_IMPLICIT_SYSTEM_HEADER_GCC) +# pragma GCC system_header +#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_CLANG) +# pragma clang system_header +#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_MSVC) +# pragma system_header +#endif // no system header + +// clang-format off #include #include #include +// clang-format on #endif // _CUDA_STD_RATIO diff --git a/libcudacxx/include/cuda/std/semaphore b/libcudacxx/include/cuda/std/semaphore index 5b7efef48a3..453aa36d81d 100644 --- a/libcudacxx/include/cuda/std/semaphore +++ b/libcudacxx/include/cuda/std/semaphore @@ -8,19 +8,29 @@ // //===----------------------------------------------------------------------===// +#ifndef _CUDA_STD_SEMAPHORE +#define _CUDA_STD_SEMAPHORE + #if defined(__CUDA_ARCH__) && __CUDA_ARCH__ < 700 # error "CUDA synchronization primitives are only supported for sm_70 and up." #endif -#ifndef _CUDA_STD_SEMAPHORE -#define _CUDA_STD_SEMAPHORE - #include +#if defined(_CCCL_IMPLICIT_SYSTEM_HEADER_GCC) +# pragma GCC system_header +#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_CLANG) +# pragma clang system_header +#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_MSVC) +# pragma system_header +#endif // no system header + +// clang-format off #include #include #include +// clang-format on #endif // _CUDA_STD_SEMAPHORE diff --git a/libcudacxx/include/cuda/std/span b/libcudacxx/include/cuda/std/span index 0388da66871..c62e5a2e17c 100644 --- a/libcudacxx/include/cuda/std/span +++ b/libcudacxx/include/cuda/std/span @@ -13,10 +13,20 @@ #include +#if defined(_CCCL_IMPLICIT_SYSTEM_HEADER_GCC) +# pragma GCC system_header +#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_CLANG) +# pragma clang system_header +#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_MSVC) +# pragma system_header +#endif // no system header + +// clang-format off #include #include #include +// clang-format on #endif // _CUDA_STD_SPAN diff --git a/libcudacxx/include/cuda/std/tuple b/libcudacxx/include/cuda/std/tuple index ee870be346c..d0ebc45b85d 100644 --- a/libcudacxx/include/cuda/std/tuple +++ b/libcudacxx/include/cuda/std/tuple @@ -13,10 +13,20 @@ #include +#if defined(_CCCL_IMPLICIT_SYSTEM_HEADER_GCC) +# pragma GCC system_header +#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_CLANG) +# pragma clang system_header +#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_MSVC) +# pragma system_header +#endif // no system header + +// clang-format off #include #include #include +// clang-format on #endif // _CUDA_STD_TUPLE diff --git a/libcudacxx/include/cuda/std/type_traits b/libcudacxx/include/cuda/std/type_traits index 9eee9b7830a..fe7ddb3d80e 100644 --- a/libcudacxx/include/cuda/std/type_traits +++ b/libcudacxx/include/cuda/std/type_traits @@ -13,10 +13,20 @@ #include +#if defined(_CCCL_IMPLICIT_SYSTEM_HEADER_GCC) +# pragma GCC system_header +#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_CLANG) +# pragma clang system_header +#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_MSVC) +# pragma system_header +#endif // no system header + +// clang-format off #include #include #include +// clang-format on #endif // _CUDA_STD_TYPE_TRAITS diff --git a/libcudacxx/include/cuda/std/utility b/libcudacxx/include/cuda/std/utility index de2b78ca814..d5e3715dc6f 100644 --- a/libcudacxx/include/cuda/std/utility +++ b/libcudacxx/include/cuda/std/utility @@ -13,10 +13,20 @@ #include +#if defined(_CCCL_IMPLICIT_SYSTEM_HEADER_GCC) +# pragma GCC system_header +#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_CLANG) +# pragma clang system_header +#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_MSVC) +# pragma system_header +#endif // no system header + +// clang-format off #include #include #include +// clang-format on #endif // _CUDA_STD_UTILITY diff --git a/libcudacxx/include/cuda/std/variant b/libcudacxx/include/cuda/std/variant index 28d59fc012b..039e3ebf6d3 100644 --- a/libcudacxx/include/cuda/std/variant +++ b/libcudacxx/include/cuda/std/variant @@ -12,10 +12,20 @@ #include +#if defined(_CCCL_IMPLICIT_SYSTEM_HEADER_GCC) +# pragma GCC system_header +#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_CLANG) +# pragma clang system_header +#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_MSVC) +# pragma system_header +#endif // no system header + +// clang-format off #include #include #include +// clang-format on #endif //_CUDA_STD_VARIANT diff --git a/libcudacxx/include/cuda/std/version b/libcudacxx/include/cuda/std/version index 2d0cbbe9aab..9a3627f9bbf 100644 --- a/libcudacxx/include/cuda/std/version +++ b/libcudacxx/include/cuda/std/version @@ -13,10 +13,20 @@ #include +#if defined(_CCCL_IMPLICIT_SYSTEM_HEADER_GCC) +# pragma GCC system_header +#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_CLANG) +# pragma clang system_header +#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_MSVC) +# pragma system_header +#endif // no system header + +// clang-format off #include #include #include +// clang-format on #endif // _CUDA_STD_VERSION diff --git a/libcudacxx/include/cuda/stream_ref b/libcudacxx/include/cuda/stream_ref index 5c2ef3c3d8b..9bb23d3e2ef 100644 --- a/libcudacxx/include/cuda/stream_ref +++ b/libcudacxx/include/cuda/stream_ref @@ -38,7 +38,8 @@ private: } // cuda */ -#include // cuda_runtime_api needs to come first +#include +// cuda_runtime_api needs to come first #include @@ -50,9 +51,9 @@ private: # pragma system_header #endif // no system header -#include #include #include +#include _LIBCUDACXX_BEGIN_NAMESPACE_CUDA @@ -127,7 +128,10 @@ public: } /// Returns the wrapped `cudaStream_t` handle. - _CCCL_NODISCARD constexpr value_type get() const noexcept { return __stream; } + _CCCL_NODISCARD constexpr value_type get() const noexcept + { + return __stream; + } /** * \brief Synchronizes the wrapped stream. @@ -150,7 +154,8 @@ public: _CCCL_NODISCARD bool ready() const { const auto __result = ::cudaStreamQuery(get()); - if (__result == ::cudaErrorNotReady) { + if (__result == ::cudaErrorNotReady) + { return false; } switch (__result) diff --git a/libcudacxx/include/nv/detail/__preprocessor b/libcudacxx/include/nv/detail/__preprocessor index af9382bd13a..15fe84eabd7 100644 --- a/libcudacxx/include/nv/detail/__preprocessor +++ b/libcudacxx/include/nv/detail/__preprocessor @@ -9,7 +9,7 @@ //===----------------------------------------------------------------------===// #if defined(__GNUC__) -#pragma GCC system_header +# pragma GCC system_header #endif // For all compilers and dialects this header defines: @@ -24,95 +24,153 @@ #if defined(_NV_TARGET_CPP11) # define _NV_EVAL1(...) __VA_ARGS__ -# define _NV_EVAL(...) _NV_EVAL1(__VA_ARGS__) +# define _NV_EVAL(...) _NV_EVAL1(__VA_ARGS__) #else # define _NV_EVAL1(x) x -# define _NV_EVAL(x) _NV_EVAL1(x) +# define _NV_EVAL(x) _NV_EVAL1(x) #endif // C++11 -#define _NV_CONCAT_EVAL1(l, r) _NV_EVAL(l ## r) -#define _NV_CONCAT_EVAL(l, r) _NV_CONCAT_EVAL1(l, r) +#define _NV_CONCAT_EVAL1(l, r) _NV_EVAL(l##r) +#define _NV_CONCAT_EVAL(l, r) _NV_CONCAT_EVAL1(l, r) #define _NV_IF_0(t, f) f #define _NV_IF_1(t, f) t -#define _NV_IF_BIT(b) _NV_EVAL(_NV_IF_##b) -#define _NV_IF__EVAL(fn, t, f) _NV_EVAL(fn(t, f)) +#define _NV_IF_BIT(b) _NV_EVAL(_NV_IF_##b) +#define _NV_IF__EVAL(fn, t, f) _NV_EVAL(fn(t, f)) #define _NV_IF_EVAL(cond, t, f) _NV_IF__EVAL(_NV_IF_BIT(cond), t, f) #define _NV_IF1(cond, t, f) _NV_IF_EVAL(cond, t, f) -#define _NV_IF(cond, t, f) _NV_IF1(_NV_EVAL(cond), _NV_EVAL(t), _NV_EVAL(f)) +#define _NV_IF(cond, t, f) _NV_IF1(_NV_EVAL(cond), _NV_EVAL(t), _NV_EVAL(f)) #if defined(_NV_TARGET_CPP11) // The below mechanisms were derived from: https://gustedt.wordpress.com/2010/06/08/detect-empty-macro-arguments/ -#define _NV_ARG32(...) _NV_EVAL(_NV_ARG32_0(__VA_ARGS__)) -#define _NV_ARG32_0( \ - _0, _1, _2, _3, _4, _5, _6, _7, _8, _9, _10, _11, _12, _13, _14, _15, \ - _16, _17, _18, _19, _20, _21, _22, _23, _24, _25, _26, _27, _28, _29, _30, _31, ...) _31 - -#define _NV_HAS_COMMA(...) _NV_ARG32(__VA_ARGS__, \ - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, \ - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0) - -#define _NV_TRIGGER_PARENTHESIS_(...) , - -#define _NV_ISEMPTY(...) \ - _NV_ISEMPTY0( \ - /* test if there is just one argument, eventually an empty \ - one */ \ - _NV_EVAL(_NV_HAS_COMMA(__VA_ARGS__)), \ - /* test if _TRIGGER_PARENTHESIS_ together with the argument \ - adds a comma */ \ - _NV_EVAL(_NV_HAS_COMMA(_NV_TRIGGER_PARENTHESIS_ __VA_ARGS__)), \ - /* test if the argument together with a parenthesis \ - adds a comma */ \ - _NV_EVAL(_NV_HAS_COMMA(__VA_ARGS__ (/*empty*/))), \ - /* test if placing it between _TRIGGER_PARENTHESIS_ and the \ - parenthesis adds a comma */ \ - _NV_EVAL(_NV_HAS_COMMA(_NV_TRIGGER_PARENTHESIS_ __VA_ARGS__ (/*empty*/))) \ - ) - -#define _NV_PASTE5(_0, _1, _2, _3, _4) _0 ## _1 ## _2 ## _3 ## _4 -#define _NV_ISEMPTY0(_0, _1, _2, _3) _NV_HAS_COMMA(_NV_PASTE5(_NV_IS_EMPTY_CASE_, _0, _1, _2, _3)) -#define _NV_IS_EMPTY_CASE_0001 , - - -#define _NV_REMOVE_PAREN(...) _NV_REMOVE_PAREN1(__VA_ARGS__) -#define _NV_REMOVE_PAREN1(...) _NV_STRIP_PAREN(_NV_IF(_NV_TEST_PAREN(__VA_ARGS__), (_NV_STRIP_PAREN(__VA_ARGS__)), (__VA_ARGS__))) - -#define _NV_STRIP_PAREN2(...) __VA_ARGS__ -#define _NV_STRIP_PAREN1(...) _NV_STRIP_PAREN2 __VA_ARGS__ -#define _NV_STRIP_PAREN(...) _NV_STRIP_PAREN1(__VA_ARGS__) - -#define _NV_TEST_PAREN(...) _NV_TEST_PAREN1(__VA_ARGS__) -#define _NV_TEST_PAREN1(...) _NV_TEST_PAREN2(_NV_TEST_PAREN_DUMMY __VA_ARGS__) -#define _NV_TEST_PAREN2(...) _NV_TEST_PAREN3(_NV_CONCAT_EVAL(_, __VA_ARGS__)) -#define _NV_TEST_PAREN3(...) _NV_EVAL(_NV_FIRST_ARG(__VA_ARGS__)) - -#define __NV_PAREN_YES 1 -#define __NV_PAREN_NO 0 - -#define _NV_TEST_PAREN_DUMMY(...) _NV_PAREN_YES -#define __NV_TEST_PAREN_DUMMY __NV_PAREN_NO, - -#define _NV_FIRST_ARG1(x, ...) x -#define _NV_FIRST_ARG(x, ...) _NV_FIRST_ARG1(x) - -#define _NV_REMOVE_FIRST_ARGS1(...) __VA_ARGS__ -#define _NV_REMOVE_FIRST_ARGS(x, ...) _NV_REMOVE_FIRST_ARGS1(__VA_ARGS__) - -#define _NV_NUM_ARGS(...) _NV_NUM_ARGS0(__VA_ARGS__) -#define _NV_NUM_ARGS0(...) _NV_EVAL(_NV_NUM_ARGS1(__VA_ARGS__)) -#define _NV_NUM_ARGS1(...) _NV_IF(_NV_ISEMPTY(__VA_ARGS__), 0, _NV_NUM_ARGS2(__VA_ARGS__)) -#define _NV_NUM_ARGS2(...) _NV_ARG32(__VA_ARGS__, \ - 31,30,29,28,27,26,25,24,23,22,21,20,19,18,17,16, \ - 15,14,13,12,11,10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0) - -#define _NV_DISPATCH_N_IMPL1(name, ...) _NV_EVAL(name(__VA_ARGS__)) -#define _NV_DISPATCH_N_IMPL0(depth, name, ...) _NV_DISPATCH_N_IMPL1(_NV_CONCAT_EVAL(name, depth), __VA_ARGS__) -#define _NV_DISPATCH_N_IMPL(name, ...) _NV_DISPATCH_N_IMPL0(_NV_NUM_ARGS(__VA_ARGS__), name, __VA_ARGS__) -#define _NV_DISPATCH_N_ARY(name, ...) _NV_DISPATCH_N_IMPL(name, __VA_ARGS__) +# define _NV_ARG32(...) _NV_EVAL(_NV_ARG32_0(__VA_ARGS__)) +# define _NV_ARG32_0( \ + _0, \ + _1, \ + _2, \ + _3, \ + _4, \ + _5, \ + _6, \ + _7, \ + _8, \ + _9, \ + _10, \ + _11, \ + _12, \ + _13, \ + _14, \ + _15, \ + _16, \ + _17, \ + _18, \ + _19, \ + _20, \ + _21, \ + _22, \ + _23, \ + _24, \ + _25, \ + _26, \ + _27, \ + _28, \ + _29, \ + _30, \ + _31, \ + ...) \ + _31 + +# define _NV_HAS_COMMA(...) \ + _NV_ARG32(__VA_ARGS__, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0) + +# define _NV_TRIGGER_PARENTHESIS_(...) , + +# define _NV_ISEMPTY(...) \ + _NV_ISEMPTY0(/* test if there is just one argument, eventually an empty \ + one */ \ + _NV_EVAL(_NV_HAS_COMMA(__VA_ARGS__)), /* test if _TRIGGER_PARENTHESIS_ together with the argument \ + adds a comma */ \ + _NV_EVAL(_NV_HAS_COMMA(_NV_TRIGGER_PARENTHESIS_ __VA_ARGS__)), /* test if the argument together with \ + a parenthesis adds a comma */ \ + _NV_EVAL(_NV_HAS_COMMA(__VA_ARGS__(/*empty*/))), /* test if placing it between _TRIGGER_PARENTHESIS_ \ + and the parenthesis adds a comma */ \ + _NV_EVAL(_NV_HAS_COMMA(_NV_TRIGGER_PARENTHESIS_ __VA_ARGS__(/*empty*/)))) + +# define _NV_PASTE5(_0, _1, _2, _3, _4) _0##_1##_2##_3##_4 +# define _NV_ISEMPTY0(_0, _1, _2, _3) _NV_HAS_COMMA(_NV_PASTE5(_NV_IS_EMPTY_CASE_, _0, _1, _2, _3)) +# define _NV_IS_EMPTY_CASE_0001 , + +# define _NV_REMOVE_PAREN(...) _NV_REMOVE_PAREN1(__VA_ARGS__) +# define _NV_REMOVE_PAREN1(...) \ + _NV_STRIP_PAREN(_NV_IF(_NV_TEST_PAREN(__VA_ARGS__), (_NV_STRIP_PAREN(__VA_ARGS__)), (__VA_ARGS__))) + +# define _NV_STRIP_PAREN2(...) __VA_ARGS__ +# define _NV_STRIP_PAREN1(...) _NV_STRIP_PAREN2 __VA_ARGS__ +# define _NV_STRIP_PAREN(...) _NV_STRIP_PAREN1(__VA_ARGS__) + +# define _NV_TEST_PAREN(...) _NV_TEST_PAREN1(__VA_ARGS__) +# define _NV_TEST_PAREN1(...) _NV_TEST_PAREN2(_NV_TEST_PAREN_DUMMY __VA_ARGS__) +# define _NV_TEST_PAREN2(...) _NV_TEST_PAREN3(_NV_CONCAT_EVAL(_, __VA_ARGS__)) +# define _NV_TEST_PAREN3(...) _NV_EVAL(_NV_FIRST_ARG(__VA_ARGS__)) + +# define __NV_PAREN_YES 1 +# define __NV_PAREN_NO 0 + +# define _NV_TEST_PAREN_DUMMY(...) _NV_PAREN_YES +# define __NV_TEST_PAREN_DUMMY __NV_PAREN_NO, + +# define _NV_FIRST_ARG1(x, ...) x +# define _NV_FIRST_ARG(x, ...) _NV_FIRST_ARG1(x) + +# define _NV_REMOVE_FIRST_ARGS1(...) __VA_ARGS__ +# define _NV_REMOVE_FIRST_ARGS(x, ...) _NV_REMOVE_FIRST_ARGS1(__VA_ARGS__) + +# define _NV_NUM_ARGS(...) _NV_NUM_ARGS0(__VA_ARGS__) +# define _NV_NUM_ARGS0(...) _NV_EVAL(_NV_NUM_ARGS1(__VA_ARGS__)) +# define _NV_NUM_ARGS1(...) _NV_IF(_NV_ISEMPTY(__VA_ARGS__), 0, _NV_NUM_ARGS2(__VA_ARGS__)) +# define _NV_NUM_ARGS2(...) \ + _NV_ARG32( \ + __VA_ARGS__, \ + 31, \ + 30, \ + 29, \ + 28, \ + 27, \ + 26, \ + 25, \ + 24, \ + 23, \ + 22, \ + 21, \ + 20, \ + 19, \ + 18, \ + 17, \ + 16, \ + 15, \ + 14, \ + 13, \ + 12, \ + 11, \ + 10, \ + 9, \ + 8, \ + 7, \ + 6, \ + 5, \ + 4, \ + 3, \ + 2, \ + 1, \ + 0) + +# define _NV_DISPATCH_N_IMPL1(name, ...) _NV_EVAL(name(__VA_ARGS__)) +# define _NV_DISPATCH_N_IMPL0(depth, name, ...) _NV_DISPATCH_N_IMPL1(_NV_CONCAT_EVAL(name, depth), __VA_ARGS__) +# define _NV_DISPATCH_N_IMPL(name, ...) _NV_DISPATCH_N_IMPL0(_NV_NUM_ARGS(__VA_ARGS__), name, __VA_ARGS__) +# define _NV_DISPATCH_N_ARY(name, ...) _NV_DISPATCH_N_IMPL(name, __VA_ARGS__) #endif // C++11 diff --git a/libcudacxx/include/nv/detail/__target_macros b/libcudacxx/include/nv/detail/__target_macros index 6d108021b41..59df8dfd188 100644 --- a/libcudacxx/include/nv/detail/__target_macros +++ b/libcudacxx/include/nv/detail/__target_macros @@ -14,42 +14,42 @@ #include #if defined(__GNUC__) -#pragma GCC system_header +# pragma GCC system_header #endif -# define _NV_TARGET_ARCH_TO_SELECTOR_350 nv::target::sm_35 -# define _NV_TARGET_ARCH_TO_SELECTOR_370 nv::target::sm_37 -# define _NV_TARGET_ARCH_TO_SELECTOR_500 nv::target::sm_50 -# define _NV_TARGET_ARCH_TO_SELECTOR_520 nv::target::sm_52 -# define _NV_TARGET_ARCH_TO_SELECTOR_530 nv::target::sm_53 -# define _NV_TARGET_ARCH_TO_SELECTOR_600 nv::target::sm_60 -# define _NV_TARGET_ARCH_TO_SELECTOR_610 nv::target::sm_61 -# define _NV_TARGET_ARCH_TO_SELECTOR_620 nv::target::sm_62 -# define _NV_TARGET_ARCH_TO_SELECTOR_700 nv::target::sm_70 -# define _NV_TARGET_ARCH_TO_SELECTOR_720 nv::target::sm_72 -# define _NV_TARGET_ARCH_TO_SELECTOR_750 nv::target::sm_75 -# define _NV_TARGET_ARCH_TO_SELECTOR_800 nv::target::sm_80 -# define _NV_TARGET_ARCH_TO_SELECTOR_860 nv::target::sm_86 -# define _NV_TARGET_ARCH_TO_SELECTOR_870 nv::target::sm_87 -# define _NV_TARGET_ARCH_TO_SELECTOR_890 nv::target::sm_89 -# define _NV_TARGET_ARCH_TO_SELECTOR_900 nv::target::sm_90 - -# define _NV_TARGET_ARCH_TO_SM_350 35 -# define _NV_TARGET_ARCH_TO_SM_370 37 -# define _NV_TARGET_ARCH_TO_SM_500 50 -# define _NV_TARGET_ARCH_TO_SM_520 52 -# define _NV_TARGET_ARCH_TO_SM_530 53 -# define _NV_TARGET_ARCH_TO_SM_600 60 -# define _NV_TARGET_ARCH_TO_SM_610 61 -# define _NV_TARGET_ARCH_TO_SM_620 62 -# define _NV_TARGET_ARCH_TO_SM_700 70 -# define _NV_TARGET_ARCH_TO_SM_720 72 -# define _NV_TARGET_ARCH_TO_SM_750 75 -# define _NV_TARGET_ARCH_TO_SM_800 80 -# define _NV_TARGET_ARCH_TO_SM_860 86 -# define _NV_TARGET_ARCH_TO_SM_870 87 -# define _NV_TARGET_ARCH_TO_SM_890 89 -# define _NV_TARGET_ARCH_TO_SM_900 90 +#define _NV_TARGET_ARCH_TO_SELECTOR_350 nv::target::sm_35 +#define _NV_TARGET_ARCH_TO_SELECTOR_370 nv::target::sm_37 +#define _NV_TARGET_ARCH_TO_SELECTOR_500 nv::target::sm_50 +#define _NV_TARGET_ARCH_TO_SELECTOR_520 nv::target::sm_52 +#define _NV_TARGET_ARCH_TO_SELECTOR_530 nv::target::sm_53 +#define _NV_TARGET_ARCH_TO_SELECTOR_600 nv::target::sm_60 +#define _NV_TARGET_ARCH_TO_SELECTOR_610 nv::target::sm_61 +#define _NV_TARGET_ARCH_TO_SELECTOR_620 nv::target::sm_62 +#define _NV_TARGET_ARCH_TO_SELECTOR_700 nv::target::sm_70 +#define _NV_TARGET_ARCH_TO_SELECTOR_720 nv::target::sm_72 +#define _NV_TARGET_ARCH_TO_SELECTOR_750 nv::target::sm_75 +#define _NV_TARGET_ARCH_TO_SELECTOR_800 nv::target::sm_80 +#define _NV_TARGET_ARCH_TO_SELECTOR_860 nv::target::sm_86 +#define _NV_TARGET_ARCH_TO_SELECTOR_870 nv::target::sm_87 +#define _NV_TARGET_ARCH_TO_SELECTOR_890 nv::target::sm_89 +#define _NV_TARGET_ARCH_TO_SELECTOR_900 nv::target::sm_90 + +#define _NV_TARGET_ARCH_TO_SM_350 35 +#define _NV_TARGET_ARCH_TO_SM_370 37 +#define _NV_TARGET_ARCH_TO_SM_500 50 +#define _NV_TARGET_ARCH_TO_SM_520 52 +#define _NV_TARGET_ARCH_TO_SM_530 53 +#define _NV_TARGET_ARCH_TO_SM_600 60 +#define _NV_TARGET_ARCH_TO_SM_610 61 +#define _NV_TARGET_ARCH_TO_SM_620 62 +#define _NV_TARGET_ARCH_TO_SM_700 70 +#define _NV_TARGET_ARCH_TO_SM_720 72 +#define _NV_TARGET_ARCH_TO_SM_750 75 +#define _NV_TARGET_ARCH_TO_SM_800 80 +#define _NV_TARGET_ARCH_TO_SM_860 86 +#define _NV_TARGET_ARCH_TO_SM_870 87 +#define _NV_TARGET_ARCH_TO_SM_890 89 +#define _NV_TARGET_ARCH_TO_SM_900 90 // Only enable when compiling for CUDA/stdpar #if defined(_NV_COMPILER_NVCXX) && defined(_NVHPC_CUDA) @@ -71,22 +71,22 @@ # define _NV_TARGET_VAL_SM_89 nv::target::sm_89 # define _NV_TARGET_VAL_SM_90 nv::target::sm_90 -# define _NV_TARGET___NV_IS_HOST nv::target::is_host +# define _NV_TARGET___NV_IS_HOST nv::target::is_host # define _NV_TARGET___NV_IS_DEVICE nv::target::is_device # define _NV_TARGET___NV_ANY_TARGET (nv::target::any_target) -# define _NV_TARGET___NV_NO_TARGET (nv::target::no_target) +# define _NV_TARGET___NV_NO_TARGET (nv::target::no_target) # if defined(NV_TARGET_SM_INTEGER_LIST) # define NV_TARGET_MINIMUM_SM_SELECTOR _NV_FIRST_ARG(NV_TARGET_SM_SELECTOR_LIST) -# define NV_TARGET_MINIMUM_SM_INTEGER _NV_FIRST_ARG(NV_TARGET_SM_INTEGER_LIST) -# define __CUDA_MINIMUM_ARCH__ _NV_CONCAT_EVAL(_NV_FIRST_ARG(NV_TARGET_SM_INTEGER_LIST), 0) +# define NV_TARGET_MINIMUM_SM_INTEGER _NV_FIRST_ARG(NV_TARGET_SM_INTEGER_LIST) +# define __CUDA_MINIMUM_ARCH__ _NV_CONCAT_EVAL(_NV_FIRST_ARG(NV_TARGET_SM_INTEGER_LIST), 0) # endif # define _NV_TARGET_PROVIDES(q) nv::target::provides(q) # define _NV_TARGET_IS_EXACTLY(q) nv::target::is_exactly(q) -#elif defined(_NV_COMPILER_NVCC) || defined (_NV_COMPILER_CLANG_CUDA) +#elif defined(_NV_COMPILER_NVCC) || defined(_NV_COMPILER_CLANG_CUDA) # define _NV_TARGET_VAL_SM_35 350 # define _NV_TARGET_VAL_SM_37 370 @@ -106,10 +106,10 @@ # define _NV_TARGET_VAL_SM_90 900 # if defined(__CUDA_ARCH__) -# define _NV_TARGET_VAL __CUDA_ARCH__ +# define _NV_TARGET_VAL __CUDA_ARCH__ # define NV_TARGET_MINIMUM_SM_SELECTOR _NV_CONCAT_EVAL(_NV_TARGET_ARCH_TO_SELECTOR_, __CUDA_ARCH__) -# define NV_TARGET_MINIMUM_SM_INTEGER _NV_CONCAT_EVAL(_NV_TARGET_ARCH_TO_SM_, __CUDA_ARCH__) -# define __CUDA_MINIMUM_ARCH__ __CUDA_ARCH__ +# define NV_TARGET_MINIMUM_SM_INTEGER _NV_CONCAT_EVAL(_NV_TARGET_ARCH_TO_SM_, __CUDA_ARCH__) +# define __CUDA_MINIMUM_ARCH__ __CUDA_ARCH__ # endif # if defined(__CUDA_ARCH__) @@ -197,22 +197,22 @@ #define _NV_TARGET___NV_IS_EXACTLY_SM_89 (_NV_TARGET_IS_EXACTLY(_NV_TARGET_VAL_SM_89)) #define _NV_TARGET___NV_IS_EXACTLY_SM_90 (_NV_TARGET_IS_EXACTLY(_NV_TARGET_VAL_SM_90)) -#define NV_PROVIDES_SM_35 __NV_PROVIDES_SM_35 -#define NV_PROVIDES_SM_37 __NV_PROVIDES_SM_37 -#define NV_PROVIDES_SM_50 __NV_PROVIDES_SM_50 -#define NV_PROVIDES_SM_52 __NV_PROVIDES_SM_52 -#define NV_PROVIDES_SM_53 __NV_PROVIDES_SM_53 -#define NV_PROVIDES_SM_60 __NV_PROVIDES_SM_60 -#define NV_PROVIDES_SM_61 __NV_PROVIDES_SM_61 -#define NV_PROVIDES_SM_62 __NV_PROVIDES_SM_62 -#define NV_PROVIDES_SM_70 __NV_PROVIDES_SM_70 -#define NV_PROVIDES_SM_72 __NV_PROVIDES_SM_72 -#define NV_PROVIDES_SM_75 __NV_PROVIDES_SM_75 -#define NV_PROVIDES_SM_80 __NV_PROVIDES_SM_80 -#define NV_PROVIDES_SM_86 __NV_PROVIDES_SM_86 -#define NV_PROVIDES_SM_87 __NV_PROVIDES_SM_87 -#define NV_PROVIDES_SM_89 __NV_PROVIDES_SM_89 -#define NV_PROVIDES_SM_90 __NV_PROVIDES_SM_90 +#define NV_PROVIDES_SM_35 __NV_PROVIDES_SM_35 +#define NV_PROVIDES_SM_37 __NV_PROVIDES_SM_37 +#define NV_PROVIDES_SM_50 __NV_PROVIDES_SM_50 +#define NV_PROVIDES_SM_52 __NV_PROVIDES_SM_52 +#define NV_PROVIDES_SM_53 __NV_PROVIDES_SM_53 +#define NV_PROVIDES_SM_60 __NV_PROVIDES_SM_60 +#define NV_PROVIDES_SM_61 __NV_PROVIDES_SM_61 +#define NV_PROVIDES_SM_62 __NV_PROVIDES_SM_62 +#define NV_PROVIDES_SM_70 __NV_PROVIDES_SM_70 +#define NV_PROVIDES_SM_72 __NV_PROVIDES_SM_72 +#define NV_PROVIDES_SM_75 __NV_PROVIDES_SM_75 +#define NV_PROVIDES_SM_80 __NV_PROVIDES_SM_80 +#define NV_PROVIDES_SM_86 __NV_PROVIDES_SM_86 +#define NV_PROVIDES_SM_87 __NV_PROVIDES_SM_87 +#define NV_PROVIDES_SM_89 __NV_PROVIDES_SM_89 +#define NV_PROVIDES_SM_90 __NV_PROVIDES_SM_90 #define NV_IS_EXACTLY_SM_35 __NV_IS_EXACTLY_SM_35 #define NV_IS_EXACTLY_SM_37 __NV_IS_EXACTLY_SM_37 @@ -235,11 +235,11 @@ // Will re-enable for nvcc below. #define NV_HAS_FEATURE_SM_90a NV_NO_TARGET -#define NV_IS_HOST __NV_IS_HOST -#define NV_IS_DEVICE __NV_IS_DEVICE +#define NV_IS_HOST __NV_IS_HOST +#define NV_IS_DEVICE __NV_IS_DEVICE -#define NV_ANY_TARGET __NV_ANY_TARGET -#define NV_NO_TARGET __NV_NO_TARGET +#define NV_ANY_TARGET __NV_ANY_TARGET +#define NV_NO_TARGET __NV_NO_TARGET // Platform invoke mechanisms #if defined(_NV_COMPILER_NVCXX) && defined(_NVHPC_CUDA) @@ -249,11 +249,9 @@ # define _NV_BLOCK_EXPAND(...) _NV_REMOVE_PAREN(__VA_ARGS__) # define _NV_TARGET_IF(cond, t, ...) \ - (if target _NV_ARCH_COND(cond) { \ - _NV_BLOCK_EXPAND(t) \ - } else { _NV_BLOCK_EXPAND(__VA_ARGS__) }) + (if target _NV_ARCH_COND(cond) { _NV_BLOCK_EXPAND(t) } else {_NV_BLOCK_EXPAND(__VA_ARGS__)}) -#elif defined(_NV_COMPILER_NVCC) || defined (_NV_COMPILER_CLANG_CUDA) +#elif defined(_NV_COMPILER_NVCC) || defined(_NV_COMPILER_CLANG_CUDA) # if (_NV_TARGET___NV_IS_EXACTLY_SM_35) # define _NV_TARGET_BOOL___NV_IS_EXACTLY_SM_35 1 @@ -353,7 +351,7 @@ // Re-enable sm_90a support in nvcc. # undef NV_HAS_FEATURE_SM_90a -# define NV_HAS_FEATURE_SM_90a __NV_HAS_FEATURE_SM_90a +# define NV_HAS_FEATURE_SM_90a __NV_HAS_FEATURE_SM_90a # if (defined(__CUDA_ARCH__) && (__CUDA_ARCH__ >= 900) && defined(__CUDA_ARCH_FEAT_SM90_ALL)) # define _NV_TARGET_BOOL___NV_HAS_FEATURE_SM_90a 1 # else @@ -369,7 +367,7 @@ # endif # define _NV_TARGET_BOOL___NV_ANY_TARGET 1 -# define _NV_TARGET_BOOL___NV_NO_TARGET 0 +# define _NV_TARGET_BOOL___NV_NO_TARGET 0 // NVCC Greater than stuff @@ -470,18 +468,24 @@ # endif # define _NV_ARCH_COND_CAT1(cond) _NV_TARGET_BOOL_##cond -# define _NV_ARCH_COND_CAT(cond) _NV_EVAL(_NV_ARCH_COND_CAT1(cond)) +# define _NV_ARCH_COND_CAT(cond) _NV_EVAL(_NV_ARCH_COND_CAT1(cond)) -# define _NV_TARGET_EMPTY_PARAM ; +# define _NV_TARGET_EMPTY_PARAM ; # if defined(_NV_TARGET_CPP11) -# define _NV_BLOCK_EXPAND(...) { _NV_REMOVE_PAREN(__VA_ARGS__) } -# define _NV_TARGET_IF(cond, t, ...) _NV_IF( _NV_ARCH_COND_CAT(cond), t, __VA_ARGS__) +# define _NV_BLOCK_EXPAND(...) \ + { \ + _NV_REMOVE_PAREN(__VA_ARGS__) \ + } +# define _NV_TARGET_IF(cond, t, ...) _NV_IF(_NV_ARCH_COND_CAT(cond), t, __VA_ARGS__) # else // = 201103L) || \ - (defined(_MSC_VER) && _MSVC_LANG >= 201103L)) +#if (!defined(__ibmxl__)) \ + && ((defined(__cplusplus) && __cplusplus >= 201103L) || (defined(_MSC_VER) && _MSVC_LANG >= 201103L)) # define _NV_TARGET_CPP11 #endif - // Hide `if target` support from NVRTC #if defined(_NV_TARGET_CPP11) && !defined(__CUDACC_RTC__) -#if defined(_NV_COMPILER_NVCXX) -# define _NV_BITSET_ATTRIBUTE [[nv::__target_bitset]] -#else -# define _NV_BITSET_ATTRIBUTE -#endif +# if defined(_NV_COMPILER_NVCXX) +# define _NV_BITSET_ATTRIBUTE [[nv::__target_bitset]] +# else +# define _NV_BITSET_ATTRIBUTE +# endif + +namespace nv +{ +namespace target +{ +namespace detail +{ + +typedef unsigned long long base_int_t; + +// No host specialization +constexpr base_int_t all_hosts = 1; + +// NVIDIA GPUs +constexpr base_int_t sm_35_bit = 1 << 1; +constexpr base_int_t sm_37_bit = 1 << 2; +constexpr base_int_t sm_50_bit = 1 << 3; +constexpr base_int_t sm_52_bit = 1 << 4; +constexpr base_int_t sm_53_bit = 1 << 5; +constexpr base_int_t sm_60_bit = 1 << 6; +constexpr base_int_t sm_61_bit = 1 << 7; +constexpr base_int_t sm_62_bit = 1 << 8; +constexpr base_int_t sm_70_bit = 1 << 9; +constexpr base_int_t sm_72_bit = 1 << 10; +constexpr base_int_t sm_75_bit = 1 << 11; +constexpr base_int_t sm_80_bit = 1 << 12; +constexpr base_int_t sm_86_bit = 1 << 13; +constexpr base_int_t sm_87_bit = 1 << 14; +constexpr base_int_t sm_89_bit = 1 << 15; +constexpr base_int_t sm_90_bit = 1 << 16; +constexpr base_int_t all_devices = + sm_35_bit | sm_37_bit | sm_50_bit | sm_52_bit | sm_53_bit | sm_60_bit | sm_61_bit | sm_62_bit | sm_70_bit | sm_72_bit + | sm_75_bit | sm_80_bit | sm_86_bit | sm_87_bit | sm_89_bit | sm_90_bit; + +// Store a set of targets as a set of bits +struct _NV_BITSET_ATTRIBUTE target_description +{ + base_int_t targets; + + constexpr target_description(base_int_t a) + : targets(a) + {} +}; + +// The type of the user-visible names of the NVIDIA GPU targets +enum class sm_selector : base_int_t +{ + sm_35 = 35, + sm_37 = 37, + sm_50 = 50, + sm_52 = 52, + sm_53 = 53, + sm_60 = 60, + sm_61 = 61, + sm_62 = 62, + sm_70 = 70, + sm_72 = 72, + sm_75 = 75, + sm_80 = 80, + sm_86 = 86, + sm_87 = 87, + sm_89 = 89, + sm_90 = 90, +}; + +constexpr base_int_t toint(sm_selector a) +{ + return static_cast(a); +} + +constexpr base_int_t bitexact(sm_selector a) +{ + return toint(a) == 35 ? sm_35_bit + : toint(a) == 37 ? sm_37_bit + : toint(a) == 50 ? sm_50_bit + : toint(a) == 52 ? sm_52_bit + : toint(a) == 53 ? sm_53_bit + : toint(a) == 60 ? sm_60_bit + : toint(a) == 61 ? sm_61_bit + : toint(a) == 62 ? sm_62_bit + : toint(a) == 70 ? sm_70_bit + : toint(a) == 72 ? sm_72_bit + : toint(a) == 75 ? sm_75_bit + : toint(a) == 80 ? sm_80_bit + : toint(a) == 86 ? sm_86_bit + : toint(a) == 87 ? sm_87_bit + : toint(a) == 89 ? sm_89_bit + : toint(a) == 90 ? sm_90_bit + : 0; +} + +constexpr base_int_t bitrounddown(sm_selector a) +{ + return toint(a) >= 90 ? sm_90_bit + : toint(a) >= 89 ? sm_89_bit + : toint(a) >= 87 ? sm_87_bit + : toint(a) >= 86 ? sm_86_bit + : toint(a) >= 80 ? sm_80_bit + : toint(a) >= 75 ? sm_75_bit + : toint(a) >= 72 ? sm_72_bit + : toint(a) >= 70 ? sm_70_bit + : toint(a) >= 62 ? sm_62_bit + : toint(a) >= 61 ? sm_61_bit + : toint(a) >= 60 ? sm_60_bit + : toint(a) >= 53 ? sm_53_bit + : toint(a) >= 52 ? sm_52_bit + : toint(a) >= 50 ? sm_50_bit + : toint(a) >= 37 ? sm_37_bit + : toint(a) >= 35 ? sm_35_bit + : 0; +} + +// Public API for NVIDIA GPUs + +constexpr target_description is_exactly(sm_selector a) +{ + return target_description(bitexact(a)); +} + +constexpr target_description provides(sm_selector a) +{ + return target_description(~(bitrounddown(a) - 1) & all_devices); +} + +// Boolean operations on target sets + +constexpr target_description operator&&(target_description a, target_description b) +{ + return target_description(a.targets & b.targets); +} + +constexpr target_description operator||(target_description a, target_description b) +{ + return target_description(a.targets | b.targets); +} -namespace nv { - namespace target { - namespace detail { - - typedef unsigned long long base_int_t; - - // No host specialization - constexpr base_int_t all_hosts = 1; - - // NVIDIA GPUs - constexpr base_int_t sm_35_bit = 1 << 1; - constexpr base_int_t sm_37_bit = 1 << 2; - constexpr base_int_t sm_50_bit = 1 << 3; - constexpr base_int_t sm_52_bit = 1 << 4; - constexpr base_int_t sm_53_bit = 1 << 5; - constexpr base_int_t sm_60_bit = 1 << 6; - constexpr base_int_t sm_61_bit = 1 << 7; - constexpr base_int_t sm_62_bit = 1 << 8; - constexpr base_int_t sm_70_bit = 1 << 9; - constexpr base_int_t sm_72_bit = 1 << 10; - constexpr base_int_t sm_75_bit = 1 << 11; - constexpr base_int_t sm_80_bit = 1 << 12; - constexpr base_int_t sm_86_bit = 1 << 13; - constexpr base_int_t sm_87_bit = 1 << 14; - constexpr base_int_t sm_89_bit = 1 << 15; - constexpr base_int_t sm_90_bit = 1 << 16; - constexpr base_int_t all_devices = - sm_35_bit | sm_37_bit | - sm_50_bit | sm_52_bit | sm_53_bit | - sm_60_bit | sm_61_bit | sm_62_bit | - sm_70_bit | sm_72_bit | sm_75_bit | - sm_80_bit | sm_86_bit | sm_87_bit | - sm_89_bit | sm_90_bit; - - // Store a set of targets as a set of bits - struct _NV_BITSET_ATTRIBUTE target_description { - base_int_t targets; - - constexpr target_description(base_int_t a) : targets(a) { } - }; - - // The type of the user-visible names of the NVIDIA GPU targets - enum class sm_selector : base_int_t { - sm_35 = 35, sm_37 = 37, - sm_50 = 50, sm_52 = 52, sm_53 = 53, - sm_60 = 60, sm_61 = 61, sm_62 = 62, - sm_70 = 70, sm_72 = 72, sm_75 = 75, - sm_80 = 80, sm_86 = 86, sm_87 = 87, - sm_89 = 89, sm_90 = 90, - }; - - constexpr base_int_t toint(sm_selector a) { - return static_cast(a); - } - - constexpr base_int_t bitexact(sm_selector a) { - return toint(a) == 35 ? sm_35_bit : - toint(a) == 37 ? sm_37_bit : - toint(a) == 50 ? sm_50_bit : - toint(a) == 52 ? sm_52_bit : - toint(a) == 53 ? sm_53_bit : - toint(a) == 60 ? sm_60_bit : - toint(a) == 61 ? sm_61_bit : - toint(a) == 62 ? sm_62_bit : - toint(a) == 70 ? sm_70_bit : - toint(a) == 72 ? sm_72_bit : - toint(a) == 75 ? sm_75_bit : - toint(a) == 80 ? sm_80_bit : - toint(a) == 86 ? sm_86_bit : - toint(a) == 87 ? sm_87_bit : - toint(a) == 89 ? sm_89_bit : - toint(a) == 90 ? sm_90_bit : 0; - } - - constexpr base_int_t bitrounddown(sm_selector a) { - return toint(a) >= 90 ? sm_90_bit : - toint(a) >= 89 ? sm_89_bit : - toint(a) >= 87 ? sm_87_bit : - toint(a) >= 86 ? sm_86_bit : - toint(a) >= 80 ? sm_80_bit : - toint(a) >= 75 ? sm_75_bit : - toint(a) >= 72 ? sm_72_bit : - toint(a) >= 70 ? sm_70_bit : - toint(a) >= 62 ? sm_62_bit : - toint(a) >= 61 ? sm_61_bit : - toint(a) >= 60 ? sm_60_bit : - toint(a) >= 53 ? sm_53_bit : - toint(a) >= 52 ? sm_52_bit : - toint(a) >= 50 ? sm_50_bit : - toint(a) >= 37 ? sm_37_bit : - toint(a) >= 35 ? sm_35_bit : 0; - } - - // Public API for NVIDIA GPUs - - constexpr target_description is_exactly(sm_selector a) { - return target_description(bitexact(a)); - } - - constexpr target_description provides(sm_selector a) { - return target_description(~(bitrounddown(a) - 1) & all_devices); - } - - // Boolean operations on target sets - - constexpr target_description operator&&(target_description a, - target_description b) { - return target_description(a.targets & b.targets); - } - - constexpr target_description operator||(target_description a, - target_description b) { - return target_description(a.targets | b.targets); - } - - constexpr target_description operator!(target_description a) { - return target_description(~a.targets & (all_devices | all_hosts)); - } - } - - using detail::target_description; - using detail::sm_selector; - - // The predicates for basic host/device selection - constexpr target_description is_host = - target_description(detail::all_hosts); - constexpr target_description is_device = - target_description(detail::all_devices); - constexpr target_description any_target = - target_description(detail::all_hosts | detail::all_devices); - constexpr target_description no_target = - target_description(0); - - // The public names for NVIDIA GPU architectures - constexpr sm_selector sm_35 = sm_selector::sm_35; - constexpr sm_selector sm_37 = sm_selector::sm_37; - constexpr sm_selector sm_50 = sm_selector::sm_50; - constexpr sm_selector sm_52 = sm_selector::sm_52; - constexpr sm_selector sm_53 = sm_selector::sm_53; - constexpr sm_selector sm_60 = sm_selector::sm_60; - constexpr sm_selector sm_61 = sm_selector::sm_61; - constexpr sm_selector sm_62 = sm_selector::sm_62; - constexpr sm_selector sm_70 = sm_selector::sm_70; - constexpr sm_selector sm_72 = sm_selector::sm_72; - constexpr sm_selector sm_75 = sm_selector::sm_75; - constexpr sm_selector sm_80 = sm_selector::sm_80; - constexpr sm_selector sm_86 = sm_selector::sm_86; - constexpr sm_selector sm_87 = sm_selector::sm_87; - constexpr sm_selector sm_89 = sm_selector::sm_89; - constexpr sm_selector sm_90 = sm_selector::sm_90; - - using detail::is_exactly; - using detail::provides; - } +constexpr target_description operator!(target_description a) +{ + return target_description(~a.targets & (all_devices | all_hosts)); } +} // namespace detail + +using detail::sm_selector; +using detail::target_description; + +// The predicates for basic host/device selection +constexpr target_description is_host = target_description(detail::all_hosts); +constexpr target_description is_device = target_description(detail::all_devices); +constexpr target_description any_target = target_description(detail::all_hosts | detail::all_devices); +constexpr target_description no_target = target_description(0); + +// The public names for NVIDIA GPU architectures +constexpr sm_selector sm_35 = sm_selector::sm_35; +constexpr sm_selector sm_37 = sm_selector::sm_37; +constexpr sm_selector sm_50 = sm_selector::sm_50; +constexpr sm_selector sm_52 = sm_selector::sm_52; +constexpr sm_selector sm_53 = sm_selector::sm_53; +constexpr sm_selector sm_60 = sm_selector::sm_60; +constexpr sm_selector sm_61 = sm_selector::sm_61; +constexpr sm_selector sm_62 = sm_selector::sm_62; +constexpr sm_selector sm_70 = sm_selector::sm_70; +constexpr sm_selector sm_72 = sm_selector::sm_72; +constexpr sm_selector sm_75 = sm_selector::sm_75; +constexpr sm_selector sm_80 = sm_selector::sm_80; +constexpr sm_selector sm_86 = sm_selector::sm_86; +constexpr sm_selector sm_87 = sm_selector::sm_87; +constexpr sm_selector sm_89 = sm_selector::sm_89; +constexpr sm_selector sm_90 = sm_selector::sm_90; + +using detail::is_exactly; +using detail::provides; +} // namespace target +} // namespace nv #endif // C++11 && !defined(__CUDACC_RTC__)