Skip to content

Commit

Permalink
replace constexpr implementations with generic
Browse files Browse the repository at this point in the history
  • Loading branch information
davebayer committed Jan 20, 2025
1 parent 980a29f commit 708597b
Showing 1 changed file with 71 additions and 119 deletions.
190 changes: 71 additions & 119 deletions libcudacxx/include/cuda/std/__numeric/saturation_arithmetic.h
Original file line number Diff line number Diff line change
Expand Up @@ -67,8 +67,9 @@ class __add_sat
return __result;
}

public:
template <class _Tp>
_CCCL_NODISCARD _LIBCUDACXX_HIDE_FROM_ABI static _Tp __impl_generic(_Tp __x, _Tp __y) noexcept
_CCCL_NODISCARD _LIBCUDACXX_HIDE_FROM_ABI static constexpr _Tp __impl_generic(_Tp __x, _Tp __y) noexcept
{
if constexpr (_CCCL_TRAIT(is_signed, _Tp))
{
Expand All @@ -83,7 +84,6 @@ class __add_sat
}
}

public:
# if defined(_CCCL_BUILTIN_ADD_OVERFLOW)
template <class _Tp>
_CCCL_NODISCARD _LIBCUDACXX_HIDE_FROM_ABI static constexpr _Tp __impl_builtin(_Tp __x, _Tp __y) noexcept
Expand Down Expand Up @@ -243,27 +243,6 @@ class __add_sat
return __impl_generic(__x, __y);
}
# endif // _CCCL_HAS_CUDA_COMPILER

template <class _Tp>
_CCCL_NODISCARD _LIBCUDACXX_HIDE_FROM_ABI static constexpr _Tp __impl_constexpr(_Tp __x, _Tp __y) noexcept
{
if constexpr (_CCCL_TRAIT(is_signed, _Tp))
{
if (__y > 0 && __x > _CUDA_VSTD::numeric_limits<_Tp>::max() - __y)
{
return _CUDA_VSTD::numeric_limits<_Tp>::max();
}
else if (__y < 0 && __x < _CUDA_VSTD::numeric_limits<_Tp>::min() - __y)
{
return _CUDA_VSTD::numeric_limits<_Tp>::min();
}
return __x + __y;
}
else
{
return (__x > _CUDA_VSTD::numeric_limits<_Tp>::max() - __y) ? _CUDA_VSTD::numeric_limits<_Tp>::max() : __x + __y;
}
}
};

_CCCL_TEMPLATE(class _Tp)
Expand All @@ -279,7 +258,7 @@ _CCCL_NODISCARD _LIBCUDACXX_HIDE_FROM_ABI constexpr _Tp add_sat(_Tp __x, _Tp __y
NV_IF_ELSE_TARGET(
NV_IS_HOST, (return __add_sat::__impl_host<_Up>(__x, __y);), (return __add_sat::__impl_device<_Up>(__x, __y);))
}
return __add_sat::__impl_constexpr<_Up>(__x, __y);
return __add_sat::__impl_generic<_Up>(__x, __y);
# endif // !_CCCL_BUILTIN_ADD_OVERFLOW
}

Expand All @@ -303,8 +282,9 @@ class __sub_sat
return __result;
}

public:
template <class _Tp>
_CCCL_NODISCARD _LIBCUDACXX_HIDE_FROM_ABI static _Tp __impl_generic(_Tp __x, _Tp __y) noexcept
_CCCL_NODISCARD _LIBCUDACXX_HIDE_FROM_ABI static constexpr _Tp __impl_generic(_Tp __x, _Tp __y) noexcept
{
if constexpr (_CCCL_TRAIT(is_signed, _Tp))
{
Expand All @@ -322,7 +302,6 @@ class __sub_sat
}
}

public:
# if defined(_CCCL_BUILTIN_SUB_OVERFLOW)
template <class _Tp>
_CCCL_NODISCARD _LIBCUDACXX_HIDE_FROM_ABI static constexpr _Tp __impl_builtin(_Tp __x, _Tp __y) noexcept
Expand Down Expand Up @@ -464,27 +443,6 @@ class __sub_sat
return __impl_generic(__x, __y);
}
# endif // _CCCL_HAS_CUDA_COMPILER

template <class _Tp>
_CCCL_NODISCARD _LIBCUDACXX_HIDE_FROM_ABI static constexpr _Tp __impl_constexpr(_Tp __x, _Tp __y) noexcept
{
if constexpr (_CCCL_TRAIT(is_signed, _Tp))
{
if (__y < 0 && __x > _CUDA_VSTD::numeric_limits<_Tp>::max() + __y)
{
return _CUDA_VSTD::numeric_limits<_Tp>::max();
}
else if (__y > 0 && __x < _CUDA_VSTD::numeric_limits<_Tp>::min() + __y)
{
return _CUDA_VSTD::numeric_limits<_Tp>::min();
}
return __x - __y;
}
else
{
return (__y > __x) ? _CUDA_VSTD::numeric_limits<_Tp>::min() : __x - __y;
}
}
};

_CCCL_TEMPLATE(class _Tp)
Expand All @@ -500,7 +458,7 @@ _CCCL_NODISCARD _LIBCUDACXX_HIDE_FROM_ABI constexpr _Tp sub_sat(_Tp __x, _Tp __y
NV_IF_ELSE_TARGET(
NV_IS_HOST, (return __sub_sat::__impl_host<_Up>(__x, __y);), (return __sub_sat::__impl_device<_Up>(__x, __y);))
}
return __sub_sat::__impl_constexpr<_Up>(__x, __y);
return __sub_sat::__impl_generic<_Up>(__x, __y);
# endif // !_CCCL_BUILTIN_SUB_OVERFLOW
}

Expand All @@ -526,13 +484,74 @@ class __mul_sat
return __result;
}

public:
template <class _Tp>
_CCCL_NODISCARD _LIBCUDACXX_HIDE_FROM_ABI static _Tp __impl_generic(_Tp __x, _Tp __y) noexcept
_CCCL_NODISCARD _LIBCUDACXX_HIDE_FROM_ABI static constexpr _Tp __impl_generic(_Tp __x, _Tp __y) noexcept
{
return __impl_constexpr(__x, __y);
if (__x == 0 || __y == 0)
{
return _Tp{};
}

if constexpr (_CCCL_TRAIT(is_signed, _Tp))
{
if (__x == -1)
{
if (__y == _CUDA_VSTD::numeric_limits<_Tp>::min())
{
return _CUDA_VSTD::numeric_limits<_Tp>::max();
}
return -__y;
}

if (__y == -1)
{
if (__x == _CUDA_VSTD::numeric_limits<_Tp>::min())
{
return _CUDA_VSTD::numeric_limits<_Tp>::max();
}
return -__x;
}

if (__x > 0 && __y > 0)
{
if (__x > _CUDA_VSTD::numeric_limits<_Tp>::max() / __y)
{
return _CUDA_VSTD::numeric_limits<_Tp>::max();
}
}
else if (__x < 0 && __y < 0)
{
if (__x < _CUDA_VSTD::numeric_limits<_Tp>::max() / __y)
{
return _CUDA_VSTD::numeric_limits<_Tp>::max();
}
}
else if (__x < 0 && __y > 0)
{
if (__x < _CUDA_VSTD::numeric_limits<_Tp>::min() / __y)
{
return _CUDA_VSTD::numeric_limits<_Tp>::min();
}
}
else
{
if (__x > _CUDA_VSTD::numeric_limits<_Tp>::min() / __y)
{
return _CUDA_VSTD::numeric_limits<_Tp>::min();
}
}
}
else
{
if (__x > _CUDA_VSTD::numeric_limits<_Tp>::max() / __y)
{
return _CUDA_VSTD::numeric_limits<_Tp>::max();
}
}
return __x * __y;
}

public:
# if defined(_CCCL_BUILTIN_MUL_OVERFLOW)
template <class _Tp>
_CCCL_NODISCARD _LIBCUDACXX_HIDE_FROM_ABI static constexpr _Tp __impl_builtin(_Tp __x, _Tp __y) noexcept
Expand Down Expand Up @@ -727,73 +746,6 @@ class __mul_sat
return __impl_generic(__x, __y);
}
# endif // _CCCL_HAS_CUDA_COMPILER

template <class _Tp>
_CCCL_NODISCARD _LIBCUDACXX_HIDE_FROM_ABI static constexpr _Tp __impl_constexpr(_Tp __x, _Tp __y) noexcept
{
if (__x == 0 || __y == 0)
{
return _Tp{};
}

if constexpr (_CCCL_TRAIT(is_signed, _Tp))
{
if (__x == -1)
{
if (__y == _CUDA_VSTD::numeric_limits<_Tp>::min())
{
return _CUDA_VSTD::numeric_limits<_Tp>::max();
}
return -__y;
}

if (__y == -1)
{
if (__x == _CUDA_VSTD::numeric_limits<_Tp>::min())
{
return _CUDA_VSTD::numeric_limits<_Tp>::max();
}
return -__x;
}

if (__x > 0 && __y > 0)
{
if (__x > _CUDA_VSTD::numeric_limits<_Tp>::max() / __y)
{
return _CUDA_VSTD::numeric_limits<_Tp>::max();
}
}
else if (__x < 0 && __y < 0)
{
if (__x < _CUDA_VSTD::numeric_limits<_Tp>::max() / __y)
{
return _CUDA_VSTD::numeric_limits<_Tp>::max();
}
}
else if (__x < 0 && __y > 0)
{
if (__x < _CUDA_VSTD::numeric_limits<_Tp>::min() / __y)
{
return _CUDA_VSTD::numeric_limits<_Tp>::min();
}
}
else
{
if (__x > _CUDA_VSTD::numeric_limits<_Tp>::min() / __y)
{
return _CUDA_VSTD::numeric_limits<_Tp>::min();
}
}
}
else
{
if (__x > _CUDA_VSTD::numeric_limits<_Tp>::max() / __y)
{
return _CUDA_VSTD::numeric_limits<_Tp>::max();
}
}
return __x * __y;
}
};

_CCCL_TEMPLATE(class _Tp)
Expand All @@ -809,7 +761,7 @@ _CCCL_NODISCARD _LIBCUDACXX_HIDE_FROM_ABI constexpr _Tp mul_sat(_Tp __x, _Tp __y
NV_IF_ELSE_TARGET(
NV_IS_HOST, (return __mul_sat::__impl_host<_Up>(__x, __y);), (return __mul_sat::__impl_device<_Up>(__x, __y);))
}
return __mul_sat::__impl_constexpr<_Up>(__x, __y);
return __mul_sat::__impl_generic<_Up>(__x, __y);
# endif // !_CCCL_BUILTIN_MUL_OVERFLOW
}

Expand Down

0 comments on commit 708597b

Please sign in to comment.