Skip to content

Commit

Permalink
Implement more cmath functions to be usable on host and device (#3382)
Browse files Browse the repository at this point in the history
* Implement more cmath functions to be usable on host and device

* Implement math roots functions

* Implement exponential functions
  • Loading branch information
miscco authored Jan 16, 2025
1 parent 3155c4b commit 3267f42
Show file tree
Hide file tree
Showing 10 changed files with 3,103 additions and 131 deletions.
325 changes: 312 additions & 13 deletions libcudacxx/include/cuda/std/__cccl/builtin.h

Large diffs are not rendered by default.

611 changes: 611 additions & 0 deletions libcudacxx/include/cuda/std/__cmath/exponential_functions.h

Large diffs are not rendered by default.

10 changes: 0 additions & 10 deletions libcudacxx/include/cuda/std/__cmath/nvbf16.h
Original file line number Diff line number Diff line change
Expand Up @@ -55,11 +55,6 @@ _LIBCUDACXX_HIDE_FROM_ABI __nv_bfloat16 cosh(__nv_bfloat16 __v)
return __float2bfloat16(::coshf(__bfloat162float(__v)));
}

_LIBCUDACXX_HIDE_FROM_ABI __nv_bfloat16 exp(__nv_bfloat16 __v)
{
NV_IF_ELSE_TARGET(NV_IS_DEVICE, (return ::hexp(__v);), (return __float2bfloat16(::expf(__bfloat162float(__v)));))
}

_LIBCUDACXX_HIDE_FROM_ABI __nv_bfloat16 hypot(__nv_bfloat16 __x, __nv_bfloat16 __y)
{
return __float2bfloat16(::hypotf(__bfloat162float(__x), __bfloat162float(__y)));
Expand All @@ -70,11 +65,6 @@ _LIBCUDACXX_HIDE_FROM_ABI __nv_bfloat16 atan2(__nv_bfloat16 __x, __nv_bfloat16 _
return __float2bfloat16(::atan2f(__bfloat162float(__x), __bfloat162float(__y)));
}

_LIBCUDACXX_HIDE_FROM_ABI __nv_bfloat16 sqrt(__nv_bfloat16 __x)
{
NV_IF_ELSE_TARGET(NV_IS_DEVICE, (return ::hsqrt(__x);), (return __float2bfloat16(::sqrtf(__bfloat162float(__x)));))
}

// floating point helper
_LIBCUDACXX_HIDE_FROM_ABI __nv_bfloat16 __constexpr_copysign(__nv_bfloat16 __x, __nv_bfloat16 __y) noexcept
{
Expand Down
33 changes: 0 additions & 33 deletions libcudacxx/include/cuda/std/__cmath/nvfp16.h
Original file line number Diff line number Diff line change
Expand Up @@ -97,34 +97,6 @@ _LIBCUDACXX_HIDE_FROM_ABI __half cosh(__half __v)
return __float2half(::coshf(__half2float(__v)));
}

// clang-format off
_LIBCUDACXX_HIDE_FROM_ABI __half exp(__half __v)
{
NV_IF_ELSE_TARGET(NV_PROVIDES_SM_53, (
return ::hexp(__v);
), (
{
float __vf = __half2float(__v);
__vf = ::expf(__vf);
__half_raw __ret_repr = ::__float2half_rn(__vf);

uint16_t __repr = __half_raw(__v).x;
switch (__repr)
{
case 8057:
case 9679:
__ret_repr.x -= 1;
break;

default:;
}

return __ret_repr;
}
))
}
// clang-format on

_LIBCUDACXX_HIDE_FROM_ABI __half hypot(__half __x, __half __y)
{
return __float2half(::hypotf(__half2float(__x), __half2float(__y)));
Expand All @@ -135,11 +107,6 @@ _LIBCUDACXX_HIDE_FROM_ABI __half atan2(__half __x, __half __y)
return __float2half(::atan2f(__half2float(__x), __half2float(__y)));
}

_LIBCUDACXX_HIDE_FROM_ABI __half sqrt(__half __x)
{
NV_IF_ELSE_TARGET(NV_IS_DEVICE, (return ::hsqrt(__x);), (return __float2half(::sqrtf(__half2float(__x)));))
}

// floating point helper
_LIBCUDACXX_HIDE_FROM_ABI __half __constexpr_copysign(__half __x, __half __y) noexcept
{
Expand Down
171 changes: 171 additions & 0 deletions libcudacxx/include/cuda/std/__cmath/roots.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,171 @@
// -*- C++ -*-
//===----------------------------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
// SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES.
//
//===----------------------------------------------------------------------===//

#ifndef _LIBCUDACXX___CMATH_ROOTS_H
#define _LIBCUDACXX___CMATH_ROOTS_H

#include <cuda/std/detail/__config>

#if defined(_CCCL_IMPLICIT_SYSTEM_HEADER_GCC)
# pragma GCC system_header
#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_CLANG)
# pragma clang system_header
#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_MSVC)
# pragma system_header
#endif // no system header

#include <cuda/std/__cmath/common.h>
#include <cuda/std/__type_traits/enable_if.h>
#include <cuda/std/__type_traits/is_integral.h>

_LIBCUDACXX_BEGIN_NAMESPACE_STD

// sqrt

_CCCL_NODISCARD _LIBCUDACXX_HIDE_FROM_ABI float sqrt(float __x) noexcept
{
#if defined(_CCCL_BUILTIN_SQRTF)
return _CCCL_BUILTIN_SQRTF(__x);
#else // ^^^ _CCCL_BUILTIN_SQRTF ^^^ // vvv !_CCCL_BUILTIN_SQRTF vvv
return ::sqrtf(__x);
#endif // !_CCCL_BUILTIN_SQRTF
}

_CCCL_NODISCARD _LIBCUDACXX_HIDE_FROM_ABI float sqrtf(float __x) noexcept
{
#if defined(_CCCL_BUILTIN_SQRTF)
return _CCCL_BUILTIN_SQRTF(__x);
#else // ^^^ _CCCL_BUILTIN_SQRTF ^^^ // vvv !_CCCL_BUILTIN_SQRTF vvv
return ::sqrtf(__x);
#endif // !_CCCL_BUILTIN_SQRTF
}

_CCCL_NODISCARD _LIBCUDACXX_HIDE_FROM_ABI double sqrt(double __x) noexcept
{
#if defined(_CCCL_BUILTIN_SQRT)
return _CCCL_BUILTIN_SQRT(__x);
#else // ^^^ _CCCL_BUILTIN_SQRT ^^^ // vvv !_CCCL_BUILTIN_SQRT vvv
return ::sqrt(__x);
#endif // !_CCCL_BUILTIN_SQRT
}

#if !defined(_LIBCUDACXX_HAS_NO_LONG_DOUBLE)
_CCCL_NODISCARD _LIBCUDACXX_HIDE_FROM_ABI long double sqrt(long double __x) noexcept
{
# if defined(_CCCL_BUILTIN_SQRTL)
return _CCCL_BUILTIN_SQRTL(__x);
# else // ^^^ _CCCL_BUILTIN_SQRTL ^^^ // vvv !_CCCL_BUILTIN_SQRTL vvv
return ::sqrtl(__x);
# endif // !_CCCL_BUILTIN_SQRTL
}

_CCCL_NODISCARD _LIBCUDACXX_HIDE_FROM_ABI long double sqrtl(long double __x) noexcept
{
# if defined(_CCCL_BUILTIN_SQRTL)
return _CCCL_BUILTIN_SQRTL(__x);
# else // ^^^ _CCCL_BUILTIN_SQRTL ^^^ // vvv !_CCCL_BUILTIN_SQRTL vvv
return ::sqrtl(__x);
# endif // !_CCCL_BUILTIN_SQRTL
}
#endif // !_LIBCUDACXX_HAS_NO_LONG_DOUBLE

#if defined(_LIBCUDACXX_HAS_NVFP16)
_CCCL_NODISCARD _LIBCUDACXX_HIDE_FROM_ABI __half sqrt(__half __x) noexcept
{
NV_IF_ELSE_TARGET(NV_IS_DEVICE, (return ::hsqrt(__x);), (return __float2half(_CUDA_VSTD::sqrt(__half2float(__x)));))
}
#endif // _LIBCUDACXX_HAS_NVFP16

#if defined(_LIBCUDACXX_HAS_NVBF16)
_CCCL_NODISCARD _LIBCUDACXX_HIDE_FROM_ABI __nv_bfloat16 sqrt(__nv_bfloat16 __x) noexcept
{
NV_IF_ELSE_TARGET(
NV_IS_DEVICE, (return ::hsqrt(__x);), (return __float2bfloat16(_CUDA_VSTD::sqrt(__bfloat162float(__x)));))
}
#endif // _LIBCUDACXX_HAS_NVBF16

template <class _Integer, enable_if_t<_CCCL_TRAIT(is_integral, _Integer), int> = 0>
_CCCL_NODISCARD _LIBCUDACXX_HIDE_FROM_ABI double sqrt(_Integer __x) noexcept
{
return _CUDA_VSTD::sqrt((double) __x);
}

// cbrt

_CCCL_NODISCARD _LIBCUDACXX_HIDE_FROM_ABI float cbrt(float __x) noexcept
{
#if defined(_CCCL_BUILTIN_CBRTF)
return _CCCL_BUILTIN_CBRTF(__x);
#else // ^^^ _CCCL_BUILTIN_CBRTF ^^^ // vvv !_CCCL_BUILTIN_CBRTF vvv
return ::cbrtf(__x);
#endif // !_CCCL_BUILTIN_CBRTF
}

_CCCL_NODISCARD _LIBCUDACXX_HIDE_FROM_ABI float cbrtf(float __x) noexcept
{
#if defined(_CCCL_BUILTIN_CBRTF)
return _CCCL_BUILTIN_CBRTF(__x);
#else // ^^^ _CCCL_BUILTIN_CBRTF ^^^ // vvv !_CCCL_BUILTIN_CBRTF vvv
return ::cbrtf(__x);
#endif // !_CCCL_BUILTIN_CBRTF
}

_CCCL_NODISCARD _LIBCUDACXX_HIDE_FROM_ABI double cbrt(double __x) noexcept
{
#if defined(_CCCL_BUILTIN_CBRT)
return _CCCL_BUILTIN_CBRT(__x);
#else // ^^^ _CCCL_BUILTIN_CBRT ^^^ // vvv !_CCCL_BUILTIN_CBRT vvv
return ::cbrt(__x);
#endif // !_CCCL_BUILTIN_CBRT
}

#if !defined(_LIBCUDACXX_HAS_NO_LONG_DOUBLE)
_CCCL_NODISCARD _LIBCUDACXX_HIDE_FROM_ABI long double cbrt(long double __x) noexcept
{
# if defined(_CCCL_BUILTIN_CBRTL)
return _CCCL_BUILTIN_CBRTL(__x);
# else // ^^^ _CCCL_BUILTIN_CBRTL ^^^ // vvv !_CCCL_BUILTIN_CBRTL vvv
return ::cbrtl(__x);
# endif // !_CCCL_BUILTIN_CBRTL
}

_CCCL_NODISCARD _LIBCUDACXX_HIDE_FROM_ABI long double cbrtl(long double __x) noexcept
{
# if defined(_CCCL_BUILTIN_CBRTL)
return _CCCL_BUILTIN_CBRTL(__x);
# else // ^^^ _CCCL_BUILTIN_CBRTL ^^^ // vvv !_CCCL_BUILTIN_CBRTL vvv
return ::cbrtl(__x);
# endif // !_CCCL_BUILTIN_CBRTL
}
#endif // !_LIBCUDACXX_HAS_NO_LONG_DOUBLE

#if defined(_LIBCUDACXX_HAS_NVFP16)
_CCCL_NODISCARD _LIBCUDACXX_HIDE_FROM_ABI __half cbrt(__half __x) noexcept
{
return __float2half(_CUDA_VSTD::cbrt(__half2float(__x)));
}
#endif // _LIBCUDACXX_HAS_NVFP16

#if defined(_LIBCUDACXX_HAS_NVBF16)
_CCCL_NODISCARD _LIBCUDACXX_HIDE_FROM_ABI __nv_bfloat16 cbrt(__nv_bfloat16 __x) noexcept
{
return __float2bfloat16(_CUDA_VSTD::cbrt(__bfloat162float(__x)));
}
#endif // _LIBCUDACXX_HAS_NVBF16

template <class _Integer, enable_if_t<_CCCL_TRAIT(is_integral, _Integer), int> = 0>
_CCCL_NODISCARD _LIBCUDACXX_HIDE_FROM_ABI double cbrt(_Integer __x) noexcept
{
return _CUDA_VSTD::cbrt((double) __x);
}

_LIBCUDACXX_END_NAMESPACE_STD

#endif // _LIBCUDACXX___CMATH_ROOTS_H
Loading

0 comments on commit 3267f42

Please sign in to comment.