replace constexpr implementations with generic

davebayer · Jan 20, 2025 · 708597b · 708597b
1 parent 980a29f
commit 708597b
Showing 1 changed file with 71 additions and 119 deletions.
diff --git a/libcudacxx/include/cuda/std/__numeric/saturation_arithmetic.h b/libcudacxx/include/cuda/std/__numeric/saturation_arithmetic.h
@@ -67,8 +67,9 @@ class __add_sat
     return __result;
   }
 
+public:
   template <class _Tp>
-  _CCCL_NODISCARD _LIBCUDACXX_HIDE_FROM_ABI static _Tp __impl_generic(_Tp __x, _Tp __y) noexcept
+  _CCCL_NODISCARD _LIBCUDACXX_HIDE_FROM_ABI static constexpr _Tp __impl_generic(_Tp __x, _Tp __y) noexcept
   {
     if constexpr (_CCCL_TRAIT(is_signed, _Tp))
     {
@@ -83,7 +84,6 @@ class __add_sat
     }
   }
 
-public:
 #  if defined(_CCCL_BUILTIN_ADD_OVERFLOW)
   template <class _Tp>
   _CCCL_NODISCARD _LIBCUDACXX_HIDE_FROM_ABI static constexpr _Tp __impl_builtin(_Tp __x, _Tp __y) noexcept
@@ -243,27 +243,6 @@ class __add_sat
     return __impl_generic(__x, __y);
   }
 #  endif // _CCCL_HAS_CUDA_COMPILER
-
-  template <class _Tp>
-  _CCCL_NODISCARD _LIBCUDACXX_HIDE_FROM_ABI static constexpr _Tp __impl_constexpr(_Tp __x, _Tp __y) noexcept
-  {
-    if constexpr (_CCCL_TRAIT(is_signed, _Tp))
-    {
-      if (__y > 0 && __x > _CUDA_VSTD::numeric_limits<_Tp>::max() - __y)
-      {
-        return _CUDA_VSTD::numeric_limits<_Tp>::max();
-      }
-      else if (__y < 0 && __x < _CUDA_VSTD::numeric_limits<_Tp>::min() - __y)
-      {
-        return _CUDA_VSTD::numeric_limits<_Tp>::min();
-      }
-      return __x + __y;
-    }
-    else
-    {
-      return (__x > _CUDA_VSTD::numeric_limits<_Tp>::max() - __y) ? _CUDA_VSTD::numeric_limits<_Tp>::max() : __x + __y;
-    }
-  }
 };
 
 _CCCL_TEMPLATE(class _Tp)
@@ -279,7 +258,7 @@ _CCCL_NODISCARD _LIBCUDACXX_HIDE_FROM_ABI constexpr _Tp add_sat(_Tp __x, _Tp __y
     NV_IF_ELSE_TARGET(
       NV_IS_HOST, (return __add_sat::__impl_host<_Up>(__x, __y);), (return __add_sat::__impl_device<_Up>(__x, __y);))
   }
-  return __add_sat::__impl_constexpr<_Up>(__x, __y);
+  return __add_sat::__impl_generic<_Up>(__x, __y);
 #  endif // !_CCCL_BUILTIN_ADD_OVERFLOW
 }
 
@@ -303,8 +282,9 @@ class __sub_sat
     return __result;
   }
 
+public:
   template <class _Tp>
-  _CCCL_NODISCARD _LIBCUDACXX_HIDE_FROM_ABI static _Tp __impl_generic(_Tp __x, _Tp __y) noexcept
+  _CCCL_NODISCARD _LIBCUDACXX_HIDE_FROM_ABI static constexpr _Tp __impl_generic(_Tp __x, _Tp __y) noexcept
   {
     if constexpr (_CCCL_TRAIT(is_signed, _Tp))
     {
@@ -322,7 +302,6 @@ class __sub_sat
     }
   }
 
-public:
 #  if defined(_CCCL_BUILTIN_SUB_OVERFLOW)
   template <class _Tp>
   _CCCL_NODISCARD _LIBCUDACXX_HIDE_FROM_ABI static constexpr _Tp __impl_builtin(_Tp __x, _Tp __y) noexcept
@@ -464,27 +443,6 @@ class __sub_sat
     return __impl_generic(__x, __y);
   }
 #  endif // _CCCL_HAS_CUDA_COMPILER
-
-  template <class _Tp>
-  _CCCL_NODISCARD _LIBCUDACXX_HIDE_FROM_ABI static constexpr _Tp __impl_constexpr(_Tp __x, _Tp __y) noexcept
-  {
-    if constexpr (_CCCL_TRAIT(is_signed, _Tp))
-    {
-      if (__y < 0 && __x > _CUDA_VSTD::numeric_limits<_Tp>::max() + __y)
-      {
-        return _CUDA_VSTD::numeric_limits<_Tp>::max();
-      }
-      else if (__y > 0 && __x < _CUDA_VSTD::numeric_limits<_Tp>::min() + __y)
-      {
-        return _CUDA_VSTD::numeric_limits<_Tp>::min();
-      }
-      return __x - __y;
-    }
-    else
-    {
-      return (__y > __x) ? _CUDA_VSTD::numeric_limits<_Tp>::min() : __x - __y;
-    }
-  }
 };
 
 _CCCL_TEMPLATE(class _Tp)
@@ -500,7 +458,7 @@ _CCCL_NODISCARD _LIBCUDACXX_HIDE_FROM_ABI constexpr _Tp sub_sat(_Tp __x, _Tp __y
     NV_IF_ELSE_TARGET(
       NV_IS_HOST, (return __sub_sat::__impl_host<_Up>(__x, __y);), (return __sub_sat::__impl_device<_Up>(__x, __y);))
   }
-  return __sub_sat::__impl_constexpr<_Up>(__x, __y);
+  return __sub_sat::__impl_generic<_Up>(__x, __y);
 #  endif // !_CCCL_BUILTIN_SUB_OVERFLOW
 }
 
@@ -526,13 +484,74 @@ class __mul_sat
     return __result;
   }
 
+public:
   template <class _Tp>
-  _CCCL_NODISCARD _LIBCUDACXX_HIDE_FROM_ABI static _Tp __impl_generic(_Tp __x, _Tp __y) noexcept
+  _CCCL_NODISCARD _LIBCUDACXX_HIDE_FROM_ABI static constexpr _Tp __impl_generic(_Tp __x, _Tp __y) noexcept
   {
-    return __impl_constexpr(__x, __y);
+    if (__x == 0 || __y == 0)
+    {
+      return _Tp{};
+    }
+
+    if constexpr (_CCCL_TRAIT(is_signed, _Tp))
+    {
+      if (__x == -1)
+      {
+        if (__y == _CUDA_VSTD::numeric_limits<_Tp>::min())
+        {
+          return _CUDA_VSTD::numeric_limits<_Tp>::max();
+        }
+        return -__y;
+      }
+
+      if (__y == -1)
+      {
+        if (__x == _CUDA_VSTD::numeric_limits<_Tp>::min())
+        {
+          return _CUDA_VSTD::numeric_limits<_Tp>::max();
+        }
+        return -__x;
+      }
+
+      if (__x > 0 && __y > 0)
+      {
+        if (__x > _CUDA_VSTD::numeric_limits<_Tp>::max() / __y)
+        {
+          return _CUDA_VSTD::numeric_limits<_Tp>::max();
+        }
+      }
+      else if (__x < 0 && __y < 0)
+      {
+        if (__x < _CUDA_VSTD::numeric_limits<_Tp>::max() / __y)
+        {
+          return _CUDA_VSTD::numeric_limits<_Tp>::max();
+        }
+      }
+      else if (__x < 0 && __y > 0)
+      {
+        if (__x < _CUDA_VSTD::numeric_limits<_Tp>::min() / __y)
+        {
+          return _CUDA_VSTD::numeric_limits<_Tp>::min();
+        }
+      }
+      else
+      {
+        if (__x > _CUDA_VSTD::numeric_limits<_Tp>::min() / __y)
+        {
+          return _CUDA_VSTD::numeric_limits<_Tp>::min();
+        }
+      }
+    }
+    else
+    {
+      if (__x > _CUDA_VSTD::numeric_limits<_Tp>::max() / __y)
+      {
+        return _CUDA_VSTD::numeric_limits<_Tp>::max();
+      }
+    }
+    return __x * __y;
   }
 
-public:
 #  if defined(_CCCL_BUILTIN_MUL_OVERFLOW)
   template <class _Tp>
   _CCCL_NODISCARD _LIBCUDACXX_HIDE_FROM_ABI static constexpr _Tp __impl_builtin(_Tp __x, _Tp __y) noexcept
@@ -727,73 +746,6 @@ class __mul_sat
     return __impl_generic(__x, __y);
   }
 #  endif // _CCCL_HAS_CUDA_COMPILER
-
-  template <class _Tp>
-  _CCCL_NODISCARD _LIBCUDACXX_HIDE_FROM_ABI static constexpr _Tp __impl_constexpr(_Tp __x, _Tp __y) noexcept
-  {
-    if (__x == 0 || __y == 0)
-    {
-      return _Tp{};
-    }
-
-    if constexpr (_CCCL_TRAIT(is_signed, _Tp))
-    {
-      if (__x == -1)
-      {
-        if (__y == _CUDA_VSTD::numeric_limits<_Tp>::min())
-        {
-          return _CUDA_VSTD::numeric_limits<_Tp>::max();
-        }
-        return -__y;
-      }
-
-      if (__y == -1)
-      {
-        if (__x == _CUDA_VSTD::numeric_limits<_Tp>::min())
-        {
-          return _CUDA_VSTD::numeric_limits<_Tp>::max();
-        }
-        return -__x;
-      }
-
-      if (__x > 0 && __y > 0)
-      {
-        if (__x > _CUDA_VSTD::numeric_limits<_Tp>::max() / __y)
-        {
-          return _CUDA_VSTD::numeric_limits<_Tp>::max();
-        }
-      }
-      else if (__x < 0 && __y < 0)
-      {
-        if (__x < _CUDA_VSTD::numeric_limits<_Tp>::max() / __y)
-        {
-          return _CUDA_VSTD::numeric_limits<_Tp>::max();
-        }
-      }
-      else if (__x < 0 && __y > 0)
-      {
-        if (__x < _CUDA_VSTD::numeric_limits<_Tp>::min() / __y)
-        {
-          return _CUDA_VSTD::numeric_limits<_Tp>::min();
-        }
-      }
-      else
-      {
-        if (__x > _CUDA_VSTD::numeric_limits<_Tp>::min() / __y)
-        {
-          return _CUDA_VSTD::numeric_limits<_Tp>::min();
-        }
-      }
-    }
-    else
-    {
-      if (__x > _CUDA_VSTD::numeric_limits<_Tp>::max() / __y)
-      {
-        return _CUDA_VSTD::numeric_limits<_Tp>::max();
-      }
-    }
-    return __x * __y;
-  }
 };
 
 _CCCL_TEMPLATE(class _Tp)
@@ -809,7 +761,7 @@ _CCCL_NODISCARD _LIBCUDACXX_HIDE_FROM_ABI constexpr _Tp mul_sat(_Tp __x, _Tp __y
     NV_IF_ELSE_TARGET(
       NV_IS_HOST, (return __mul_sat::__impl_host<_Up>(__x, __y);), (return __mul_sat::__impl_device<_Up>(__x, __y);))
   }
-  return __mul_sat::__impl_constexpr<_Up>(__x, __y);
+  return __mul_sat::__impl_generic<_Up>(__x, __y);
 #  endif // !_CCCL_BUILTIN_MUL_OVERFLOW
 }