-
Notifications
You must be signed in to change notification settings - Fork 190
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
6 changed files
with
344 additions
and
1 deletion.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,173 @@ | ||
//===----------------------------------------------------------------------===// | ||
// | ||
// Part of libcu++, the C++ Standard Library for your entire system, | ||
// under the Apache License v2.0 with LLVM Exceptions. | ||
// See https://llvm.org/LICENSE.txt for license information. | ||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | ||
// SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. | ||
// | ||
//===----------------------------------------------------------------------===// | ||
|
||
#ifndef _LIBCUDACXX___BIT_BYTESWAP_H | ||
#define _LIBCUDACXX___BIT_BYTESWAP_H | ||
|
||
#include <cuda/std/detail/__config> | ||
|
||
#if defined(_CCCL_IMPLICIT_SYSTEM_HEADER_GCC) | ||
# pragma GCC system_header | ||
#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_CLANG) | ||
# pragma clang system_header | ||
#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_MSVC) | ||
# pragma system_header | ||
#endif // no system header | ||
|
||
#include <cuda/std/__bit/has_single_bit.h> | ||
#include <cuda/std/__concepts/concept_macros.h> | ||
#include <cuda/std/__type_traits/is_constant_evaluated.h> | ||
#include <cuda/std/__type_traits/is_integral.h> | ||
#include <cuda/std/__type_traits/make_unsigned.h> | ||
#include <cuda/std/climits> | ||
#include <cuda/std/cstdint> | ||
|
||
#if _CCCL_COMPILER(MSVC) | ||
# include <intrin.h> | ||
#endif // _CCCL_COMPILER(MSVC) | ||
|
||
_LIBCUDACXX_BEGIN_NAMESPACE_STD | ||
|
||
class __byteswap_impl | ||
{ | ||
template <class _Half, class _Full> | ||
_CCCL_NODISCARD _LIBCUDACXX_HIDE_FROM_ABI static constexpr _Full __impl_recursive(_Full __val) noexcept | ||
{ | ||
static_assert(sizeof(_Full) == sizeof(_Half) * 2, "Invalid half type passed to __bytswap_impl"); | ||
|
||
return static_cast<_Full>(__impl(static_cast<_Half>(__val >> CHAR_BIT * sizeof(_Half)))) | ||
| (static_cast<_Full>(__impl(static_cast<_Half>(__val))) << CHAR_BIT * sizeof(_Half)); | ||
} | ||
|
||
#if _CCCL_HAS_CUDA_COMPILER | ||
_CCCL_NODISCARD _CCCL_HIDE_FROM_ABI _CCCL_DEVICE static uint32_t __impl_device(uint32_t __val) noexcept | ||
{ | ||
uint32_t __result; | ||
asm("prmt.b32 %0, %1, 0, 0x0123;" : "=r"(__result) : "r"(__val)); | ||
return __result; | ||
} | ||
|
||
_CCCL_NODISCARD _CCCL_HIDE_FROM_ABI _CCCL_DEVICE static uint64_t __impl_device(uint64_t __val) noexcept | ||
{ | ||
uint32_t __hi; | ||
uint32_t __lo; | ||
asm("mov.b64 {%0, %1}, %2;" : "=r"(__hi), "=r"(__lo) : "l"(__val)); | ||
asm("prmt.b32 %0, %0, 0, 0x0123;" : "+r"(__hi)); | ||
asm("prmt.b32 %0, %0, 0, 0x0123;" : "+r"(__lo)); | ||
|
||
uint64_t __result; | ||
asm("mov.b64 %0, {%1, %2};" : "=l"(__result) : "r"(__lo), "r"(__hi)); | ||
|
||
return __result; | ||
} | ||
#endif // _CCCL_HAS_CUDA_COMPILER | ||
|
||
public: | ||
template <class _Tp> | ||
_CCCL_NODISCARD _LIBCUDACXX_HIDE_FROM_ABI static _CCCL_CONSTEXPR_CXX14 _Tp __impl(_Tp __val) noexcept | ||
{ | ||
_Tp __result{}; | ||
for (size_t __i{}; __i < sizeof(__val); ++__i) | ||
{ | ||
__result <<= CHAR_BIT; | ||
__result |= (__val >> (__i * CHAR_BIT)) & static_cast<_Tp>(UCHAR_MAX); | ||
} | ||
return __result; | ||
} | ||
|
||
_CCCL_NODISCARD _LIBCUDACXX_HIDE_FROM_ABI static constexpr uint16_t __impl(uint16_t __val) noexcept | ||
{ | ||
#if defined(_CCCL_BUILTIN_BSWAP16) | ||
return _CCCL_BUILTIN_BSWAP16(__val); | ||
#else // ^^^ _CCCL_BUILTIN_BSWAP16 ^^^ / vvv !_CCCL_BUILTIN_BSWAP16 vvv | ||
# if _CCCL_STD_VER >= 2014 && _CCCL_COMPILER(MSVC) | ||
if (!_CUDA_VSTD::__cccl_default_is_constant_evaluated()) | ||
{ | ||
NV_IF_TARGET(NV_IS_HOST, return _byteswap_ushort(__val);) | ||
} | ||
# endif // _CCCL_STD_VER >= 2014 && _CCCL_COMPILER(MSVC) | ||
return (__val << CHAR_BIT) | (__val >> CHAR_BIT); | ||
#endif // !_CCCL_BUILTIN_BSWAP16 | ||
} | ||
|
||
_CCCL_NODISCARD _LIBCUDACXX_HIDE_FROM_ABI static constexpr uint32_t __impl(uint32_t __val) noexcept | ||
{ | ||
#if defined(_CCCL_BUILTIN_BSWAP32) | ||
return _CCCL_BUILTIN_BSWAP32(__val); | ||
#else // ^^^ _CCCL_BUILTIN_BSWAP32 ^^^ / vvv !_CCCL_BUILTIN_BSWAP32 vvv | ||
# if _CCCL_STD_VER >= 2014 | ||
if (!_CUDA_VSTD::__cccl_default_is_constant_evaluated()) | ||
{ | ||
# if _CCCL_COMPILER(MSVC) | ||
NV_IF_TARGET(NV_IS_HOST, return _byteswap_ulong(__val);) | ||
# endif // _CCCL_COMPILER(MSVC) | ||
NV_IF_TARGET(NV_IS_DEVICE, return __impl_device(__val);) | ||
} | ||
# endif // _CCCL_STD_VER >= 2014 | ||
return __impl_recursive<uint16_t>(__val); | ||
#endif // !_CCCL_BUILTIN_BSWAP32 | ||
} | ||
|
||
_CCCL_NODISCARD _LIBCUDACXX_HIDE_FROM_ABI static constexpr uint64_t __impl(uint64_t __val) noexcept | ||
{ | ||
#if defined(_CCCL_BUILTIN_BSWAP64) | ||
return _CCCL_BUILTIN_BSWAP64(__val); | ||
#else // ^^^ _CCCL_BUILTIN_BSWAP64 ^^^ / vvv !_CCCL_BUILTIN_BSWAP64 vvv | ||
# if _CCCL_STD_VER >= 2014 | ||
if (!_CUDA_VSTD::__cccl_default_is_constant_evaluated()) | ||
{ | ||
# if _CCCL_COMPILER(MSVC) | ||
NV_IF_TARGET(NV_IS_HOST, return _byteswap_uint64(__val);) | ||
# endif // _CCCL_COMPILER(MSVC) | ||
NV_IF_TARGET(NV_IS_DEVICE, return __impl_device(__val);) | ||
} | ||
# endif // _CCCL_STD_VER >= 2014 | ||
return __impl_recursive<uint32_t>(__val); | ||
#endif // !_CCCL_BUILTIN_BSWAP64 | ||
} | ||
|
||
#if !defined(_LIBCUDACXX_HAS_NO_INT128) | ||
_CCCL_NODISCARD _LIBCUDACXX_HIDE_FROM_ABI static constexpr __uint128_t __impl(__uint128_t __val) noexcept | ||
{ | ||
# if defined(_CCCL_BUILTIN_BSWAP128) | ||
return _CCCL_BUILTIN_BSWAP128(__val); | ||
# else // ^^^ _CCCL_BUILTIN_BSWAP128 ^^^ / vvv !_CCCL_BUILTIN_BSWAP128 vvv | ||
return __impl_recursive<uint64_t>(__val); | ||
# endif // !_CCCL_BUILTIN_BSWAP128 | ||
} | ||
#endif // !_LIBCUDACXX_HAS_NO_INT128 | ||
}; | ||
|
||
_CCCL_TEMPLATE(class _Integer) | ||
_CCCL_REQUIRES(_CCCL_TRAIT(is_integral, _Integer) _CCCL_AND(sizeof(_Integer) == 1)) | ||
_CCCL_NODISCARD _LIBCUDACXX_HIDE_FROM_ABI constexpr _Integer byteswap(_Integer __val) noexcept | ||
{ | ||
return __val; | ||
} | ||
|
||
_CCCL_TEMPLATE(class _Integer) | ||
_CCCL_REQUIRES(_CCCL_TRAIT(is_integral, _Integer) _CCCL_AND(sizeof(_Integer) > 1) | ||
_CCCL_AND(has_single_bit(sizeof(_Integer)))) | ||
_CCCL_NODISCARD _LIBCUDACXX_HIDE_FROM_ABI constexpr _Integer byteswap(_Integer __val) noexcept | ||
{ | ||
return static_cast<_Integer>(__byteswap_impl::__impl(_CUDA_VSTD::__to_unsigned_like(__val))); | ||
} | ||
|
||
_CCCL_TEMPLATE(class _Integer) | ||
_CCCL_REQUIRES(_CCCL_TRAIT(is_integral, _Integer) _CCCL_AND(sizeof(_Integer) > 1) | ||
_CCCL_AND(!has_single_bit(sizeof(_Integer)))) | ||
_CCCL_NODISCARD _LIBCUDACXX_HIDE_FROM_ABI _CCCL_CONSTEXPR_CXX14 _Integer byteswap(_Integer __val) noexcept | ||
{ | ||
return static_cast<_Integer>(__byteswap_impl::__impl(_CUDA_VSTD::__to_unsigned_like(__val))); | ||
} | ||
|
||
_LIBCUDACXX_END_NAMESPACE_STD | ||
|
||
#endif // _LIBCUDACXX___BIT_BYTESWAP_H |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
143 changes: 143 additions & 0 deletions
143
libcudacxx/test/libcudacxx/std/numerics/bit/byteswap.pass.cpp
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,143 @@ | ||
//===----------------------------------------------------------------------===// | ||
// | ||
// Part of libcu++, the C++ Standard Library for your entire system, | ||
// under the Apache License v2.0 with LLVM Exceptions. | ||
// See https://llvm.org/LICENSE.txt for license information. | ||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | ||
// SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. | ||
// | ||
//===----------------------------------------------------------------------===// | ||
|
||
#include <cuda/std/bit> | ||
#include <cuda/std/cassert> | ||
#include <cuda/std/cstddef> | ||
#include <cuda/std/cstdint> | ||
#include <cuda/std/utility> | ||
|
||
#include "test_macros.h" | ||
|
||
template <class T, class = void> | ||
struct has_byteswap : cuda::std::false_type | ||
{}; | ||
|
||
template <class T> | ||
struct has_byteswap<T, cuda::std::void_t<decltype(cuda::std::byteswap(cuda::std::declval<T>()))>> : cuda::std::true_type | ||
{}; | ||
|
||
enum class Byte : cuda::std::uint8_t | ||
{ | ||
}; | ||
|
||
static_assert(!has_byteswap<void*>::value, ""); | ||
static_assert(!has_byteswap<float>::value, ""); | ||
static_assert(!has_byteswap<char[2]>::value, ""); | ||
static_assert(!has_byteswap<Byte>::value, ""); | ||
|
||
template <class T> | ||
struct MakeUnsigned | ||
{ | ||
using type = cuda::std::make_unsigned_t<T>; | ||
}; | ||
|
||
template <> | ||
struct MakeUnsigned<bool> | ||
{ | ||
using type = bool; | ||
}; | ||
|
||
template <class T> | ||
__host__ __device__ TEST_CONSTEXPR_CXX14 void test_num(T in, T expected) | ||
{ | ||
using U = typename MakeUnsigned<T>::type; | ||
|
||
assert(static_cast<U>(cuda::std::byteswap(in)) == static_cast<U>(expected)); | ||
ASSERT_SAME_TYPE(decltype(cuda::std::byteswap(in)), decltype(in)); | ||
ASSERT_NOEXCEPT(cuda::std::byteswap(in)); | ||
} | ||
|
||
template <class T> | ||
struct TestData | ||
{ | ||
T in; | ||
T expected; | ||
}; | ||
|
||
template <class T> | ||
__host__ __device__ TEST_CONSTEXPR_CXX14 TestData<T> get_test_data() | ||
{ | ||
switch (sizeof(T)) | ||
{ | ||
case 2: | ||
return {static_cast<T>(0x1234), static_cast<T>(0x3412)}; | ||
case 4: | ||
return {static_cast<T>(0x60AF8503), static_cast<T>(0x0385AF60)}; | ||
case 8: | ||
return {static_cast<T>(0xABCDFE9477936406), static_cast<T>(0x0664937794FECDAB)}; | ||
default: | ||
assert(false); | ||
cuda::std::unreachable(); | ||
} | ||
} | ||
|
||
template <class T> | ||
__host__ __device__ TEST_CONSTEXPR_CXX14 void test_implementation_defined_size() | ||
{ | ||
const auto test_data = get_test_data<T>(); | ||
test_num<T>(test_data.in, test_data.expected); | ||
} | ||
|
||
__host__ __device__ TEST_CONSTEXPR_CXX14 bool test() | ||
{ | ||
test_num<cuda::std::uint8_t>(0xAB, 0xAB); | ||
test_num<cuda::std::uint16_t>(0xCDEF, 0xEFCD); | ||
test_num<cuda::std::uint32_t>(0x01234567, 0x67452301); | ||
test_num<cuda::std::uint64_t>(0x0123456789ABCDEF, 0xEFCDAB8967452301); | ||
|
||
test_num<cuda::std::int8_t>(static_cast<cuda::std::int8_t>(0xAB), static_cast<cuda::std::int8_t>(0xAB)); | ||
test_num<cuda::std::int16_t>(static_cast<cuda::std::int16_t>(0xCDEF), static_cast<cuda::std::int16_t>(0xEFCD)); | ||
test_num<cuda::std::int32_t>(0x01234567, 0x67452301); | ||
// requires static_cast to silence integer conversion resulted in a change of sign warning | ||
test_num<cuda::std::int64_t>( | ||
static_cast<cuda::std::int64_t>(0x0123456789ABCDEF), static_cast<cuda::std::int64_t>(0xEFCDAB8967452301)); | ||
|
||
#if !defined(TEST_HAS_NO_INT128_T) | ||
const auto in = static_cast<__uint128_t>(0x0123456789ABCDEF) << 64 | 0x13579BDF02468ACE; | ||
const auto expected = static_cast<__uint128_t>(0xCE8A4602DF9B5713) << 64 | 0xEFCDAB8967452301; | ||
test_num<__uint128_t>(in, expected); | ||
test_num<__int128_t>(in, expected); | ||
#endif // !defined(TEST_HAS_NO_INT128_T) | ||
|
||
test_num<bool>(true, true); | ||
test_num<bool>(false, false); | ||
test_num<char>(static_cast<char>(0xCD), static_cast<char>(0xCD)); | ||
test_num<unsigned char>(0xEF, 0xEF); | ||
test_num<signed char>(0x45, 0x45); | ||
#if TEST_STD_VER >= 2020 | ||
test_num<char8_t>(0xAB, 0xAB); | ||
#endif // TEST_STD_VER >= 2020 | ||
test_num<char16_t>(0xABCD, 0xCDAB); | ||
test_num<char32_t>(0xABCDEF01, 0x01EFCDAB); | ||
#ifndef TEST_HAS_NO_WIDE_CHARACTERS | ||
test_implementation_defined_size<wchar_t>(); | ||
#endif | ||
|
||
test_implementation_defined_size<short>(); | ||
test_implementation_defined_size<unsigned short>(); | ||
test_implementation_defined_size<int>(); | ||
test_implementation_defined_size<unsigned int>(); | ||
test_implementation_defined_size<long>(); | ||
test_implementation_defined_size<unsigned long>(); | ||
test_implementation_defined_size<long long>(); | ||
test_implementation_defined_size<unsigned long long>(); | ||
return true; | ||
} | ||
|
||
int main(int, char**) | ||
{ | ||
test(); | ||
#if TEST_STD_VER >= 2014 | ||
static_assert(test(), ""); | ||
#endif // TEST_STD_VER >= 2014 | ||
|
||
return 0; | ||
} |