Fix dint arithmetic

tgross35 · tgross35 · commit f9e145996b06 · 2025-02-12T06:55:04.000Z
diff --git a/src/math/log.rs b/src/math/log.rs
@@ -1,139 +1,22 @@
-/* origin: FreeBSD /usr/src/lib/msun/src/e_log.c */
-/*
- * ====================================================
- * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
- *
- * Developed at SunSoft, a Sun Microsystems, Inc. business.
- * Permission to use, copy, modify, and distribute this
- * software is freely granted, provided that this notice
- * is preserved.
- * ====================================================
- */
-/* log(x)
- * Return the logarithm of x
- *
- * Method :
- *   1. Argument Reduction: find k and f such that
- *                      x = 2^k * (1+f),
- *         where  sqrt(2)/2 < 1+f < sqrt(2) .
- *
- *   2. Approximation of log(1+f).
- *      Let s = f/(2+f) ; based on log(1+f) = log(1+s) - log(1-s)
- *               = 2s + 2/3 s**3 + 2/5 s**5 + .....,
- *               = 2s + s*R
- *      We use a special Remez algorithm on [0,0.1716] to generate
- *      a polynomial of degree 14 to approximate R The maximum error
- *      of this polynomial approximation is bounded by 2**-58.45. In
- *      other words,
- *                      2      4      6      8      10      12      14
- *          R(z) ~ Lg1*s +Lg2*s +Lg3*s +Lg4*s +Lg5*s  +Lg6*s  +Lg7*s
- *      (the values of Lg1 to Lg7 are listed in the program)
- *      and
- *          |      2          14          |     -58.45
- *          | Lg1*s +...+Lg7*s    -  R(z) | <= 2
- *          |                             |
- *      Note that 2s = f - s*f = f - hfsq + s*hfsq, where hfsq = f*f/2.
- *      In order to guarantee error in log below 1ulp, we compute log
- *      by
- *              log(1+f) = f - s*(f - R)        (if f is not too large)
- *              log(1+f) = f - (hfsq - s*(hfsq+R)).     (better accuracy)
- *
- *      3. Finally,  log(x) = k*ln2 + log(1+f).
- *                          = k*ln2_hi+(f-(hfsq-(s*(hfsq+R)+k*ln2_lo)))
- *         Here ln2 is split into two floating point number:
- *                      ln2_hi + ln2_lo,
- *         where n*ln2_hi is always exact for |n| < 2000.
- *
- * Special cases:
- *      log(x) is NaN with signal if x < 0 (including -INF) ;
- *      log(+INF) is +INF; log(0) is -INF with signal;
- *      log(NaN) is that NaN with no signal.
- *
- * Accuracy:
- *      according to an error analysis, the error is always less than
- *      1 ulp (unit in the last place).
- *
- * Constants:
- * The hexadecimal values are the intended ones for the following
- * constants. The decimal values may be used, provided that the
- * compiler will convert from decimal to binary accurately enough
- * to produce the hexadecimal values shown.
- */
-
-/*
-Copyright (c) 2022 INRIA and CERN.
-Authors: Paul Zimmermann and Tom Hubrecht.
-
-This file is part of the CORE-MATH project
-(https://core-math.gitlabpages.inria.fr/).
+/* SPDX-License-Identifier: MIT */
+/* origin: core-math/src/binary64/cbrt/cbrt.c
+ * Copyright (c) 2021-2022 Alexei Sibidanov.
+ * Ported to Rust in 2025 by Trevor Gross.
  */
 
 use core::cmp::Ordering;
 
-const LN2_HI: f64 = 6.93147180369123816490e-01; /* 3fe62e42 fee00000 */
-const LN2_LO: f64 = 1.90821492927058770002e-10; /* 3dea39ef 35793c76 */
-const LG1: f64 = 6.666666666666735130e-01; /* 3FE55555 55555593 */
-const LG2: f64 = 3.999999999940941908e-01; /* 3FD99999 9997FA04 */
-const LG3: f64 = 2.857142874366239149e-01; /* 3FD24924 94229359 */
-const LG4: f64 = 2.222219843214978396e-01; /* 3FCC71C5 1D8E78AF */
-const LG5: f64 = 1.818357216161805012e-01; /* 3FC74664 96CB03DE */
-const LG6: f64 = 1.531383769920937332e-01; /* 3FC39A09 D078C69F */
-const LG7: f64 = 1.479819860511658591e-01; /* 3FC2F112 DF3E5244 */
+use super::support::{DInt, HInt, cold_path};
 
 /// The natural logarithm of `x` (f64).
 #[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
-pub fn log(mut x: f64) -> f64 {
-    if true {
-        return cr_log(x);
-    }
-
-    let x1p54 = f64::from_bits(0x4350000000000000); // 0x1p54 === 2 ^ 54
-
-    let mut ui = x.to_bits();
-    let mut hx: u32 = (ui >> 32) as u32;
-    let mut k: i32 = 0;
-
-    if (hx < 0x00100000) || ((hx >> 31) != 0) {
-        /* x < 2**-126  */
-        if ui << 1 == 0 {
-            return -1. / (x * x); /* log(+-0)=-inf */
-        }
-        if hx >> 31 != 0 {
-            return (x - x) / 0.0; /* log(-#) = NaN */
-        }
-        /* subnormal number, scale x up */
-        k -= 54;
-        x *= x1p54;
-        ui = x.to_bits();
-        hx = (ui >> 32) as u32;
-    } else if hx >= 0x7ff00000 {
-        return x;
-    } else if hx == 0x3ff00000 && ui << 32 == 0 {
-        return 0.;
-    }
-
-    /* reduce x into [sqrt(2)/2, sqrt(2)] */
-    hx += 0x3ff00000 - 0x3fe6a09e;
-    k += ((hx >> 20) as i32) - 0x3ff;
-    hx = (hx & 0x000fffff) + 0x3fe6a09e;
-    ui = ((hx as u64) << 32) | (ui & 0xffffffff);
-    x = f64::from_bits(ui);
-
-    let f: f64 = x - 1.0;
-    let hfsq: f64 = 0.5 * f * f;
-    let s: f64 = f / (2.0 + f);
-    let z: f64 = s * s;
-    let w: f64 = z * z;
-    let t1: f64 = w * (LG2 + w * (LG4 + w * LG6));
-    let t2: f64 = z * (LG1 + w * (LG3 + w * (LG5 + w * LG7)));
-    let r: f64 = t2 + t1;
-    let dk: f64 = k as f64;
-    s * (hfsq + r) + dk * LN2_LO - hfsq + f + dk * LN2_HI
+pub fn log(x: f64) -> f64 {
+    return cr_log(x);
 }
 
 fn cr_log(x: f64) -> f64 {
     let mut v = x.to_bits();
-    let mut e: i32 = (v >> 52).wrapping_sub(0x3ff) as i32;
+    let mut e: i32 = (v >> 52) as i32 - 0x3ff;
     if e >= 0x400 || e == -0x3ff {
         /* x <= 0 or NaN/Inf or subnormal */
         if x <= 0.0 {
@@ -152,14 +35,15 @@ fn cr_log(x: f64) -> f64 {
         if e == -0x3ff {
             /* subnormal */
             v = (f64::from_bits(v) * hf64!("0x1p52")).to_bits();
-            e = ((v >> 52) - 0x3ff - 52) as i32;
+            e = (v >> 52) as i32 - 0x3ff - 52;
         }
     }
     /* now x > 0 */
     /* normalize v in [1,2) */
     v = (0x3ffu64 << 52) | (v & 0xfffffffffffff);
     /* now x = m*2^e with 1 <= m < 2 (m = v.f) and -1074 <= e <= 1023 */
-    if __builtin_expect(v == 0x3ff0000000000000u64 && e == 0, false) {
+    if v == 0x3ff0000000000000u64 && e == 0 {
+        cold_path();
         return 0.0;
     }
 
@@ -203,26 +87,26 @@ fn cr_log_fast(mut e: i32, v: u64) -> (f64, f64) {
     let r: f64 = INVERSE[i as usize - offset];
     let l1: f64 = LOG_INV[i as usize - offset].0;
     let l2: f64 = LOG_INV[i as usize - offset].1;
-    let z: f64 = __builtin_fma(r, y, -1.0); /* exact */
+    let z: f64 = fmaf64(r, y, -1.0); /* exact */
     /* evaluate P(z), for |z| < 0.00212097167968735 */
     let mut ph: f64; /* will hold the value of P(z)-z */
     let z2: f64 = z * z; /* |z2| < 4.5e-6 thus the rounding error on z2 is
     bounded by ulp(4.5e-6) = 2^-70. */
-    let p45: f64 = __builtin_fma(P[5], z, P[4]);
+    let p45: f64 = fmaf64(P[5], z, P[4]);
     /* |P[5]| < 0.167, |z| < 0.0022, |P[4]| < 0.21 thus |p45| < 0.22:
     the rounding (and total) error on p45 is bounded by ulp(0.22) = 2^-55 */
-    let p23: f64 = __builtin_fma(P[3], z, P[2]);
+    let p23: f64 = fmaf64(P[3], z, P[2]);
     /* |P[3]| < 0.26, |z| < 0.0022, |P[2]| < 0.34 thus |p23| < 0.35:
     the rounding (and total) error on p23 is bounded by ulp(0.35) = 2^-54 */
-    ph = __builtin_fma(p45, z2, p23);
+    ph = fmaf64(p45, z2, p23);
     /* |p45| < 0.22, |z2| < 4.5e-6, |p23| < 0.35 thus |ph| < 0.36:
     the rounding error on ph is bounded by ulp(0.36) = 2^-54.
     Adding the error on p45 multiplied by z2, that on z2 multiplied by p45,
     and that on p23 (ignoring low order errors), we get for the total error
     on ph the following bound:
     2^-54 + err(p45)*4.5e-6 + 0.22*err(z2) + err(p23) <
     2^-54 + 2^-55*4.5e-6 + 0.22*2^-70 + 2^-54 < 2^-52.99 */
-    ph = __builtin_fma(ph, z, P[1]);
+    ph = fmaf64(ph, z, P[1]);
     /* let ph0 be the value at input, and ph1 the value at output:
     |ph0| < 0.36, |z| < 0.0022, |P[1]| < 0.5 thus |ph1| < 0.501:
     the rounding error on ph1 is bounded by ulp(0.501) = 2^-53.
@@ -252,7 +136,7 @@ fn cr_log_fast(mut e: i32, v: u64) -> (f64, f64) {
     representable. */
 
     let ee: f64 = e as f64;
-    let (h, mut l) = fast_two_sum(__builtin_fma(ee, log2_h, l1), z);
+    let (h, mut l) = fast_two_sum(fmaf64(ee, log2_h, l1), z);
     /* here |hh+l1|+|z| <= 3275606777621385*2^-42 + 0.0022 < 745
     thus |h| < 745, and the additional error from the fast_two_sum() call is
     bounded by 2^-105*745 < 2^-95.4. */
@@ -265,7 +149,7 @@ fn cr_log_fast(mut e: i32, v: u64) -> (f64, f64) {
     error on ph + ... is bounded by ulp(2^-18.7) = 2^-71, which yields a
     cumulated error bound of 2^-71 + 2^-95 < 2^-70.99. */
 
-    l = __builtin_fma(ee, log2_l, l);
+    l = fmaf64(ee, log2_l, l);
     /* let l_in be the input value of *l, and l_out the output value.
     We have |l_in| < 2^-18.7 (from above)
     and |e*log2_l| <= 1074*0x1.ef35793c7673p-45
@@ -279,7 +163,7 @@ fn cr_log_fast(mut e: i32, v: u64) -> (f64, f64) {
        2^-69.32 from the rounding errors in the polynomial evaluation
        2^-95.4 from the fast_two_sum call
        2^-70.99 from the *l = ph + (*l + l2) instruction
-       2^-71 from the last __builtin_fma call.
+       2^-71 from the last fmaf64 call.
        This gives an absolute error bounded by < 2^-68.22.
     */
 
@@ -329,7 +213,8 @@ fn log_2(x: &DInt64) -> DInt64 {
 
     x.ex = x.ex - e;
 
-    let mut z = x.mul(&INVERSE_2[(i - 128) as usize]);
+    let inv2 = &INVERSE_2[(i - 128) as usize];
+    let mut z = x.mul(inv2);
 
     z = DInt64::M_ONE.add(&z);
 
@@ -408,7 +293,7 @@ impl DInt64 {
         /* For log, the result is always in the normal range,
         thus a->ex > -1023. Similarly, we cannot have a->ex > 1023. */
 
-        let e: u64 = ((self.ex as u64 + 1023) & 0x7ff) << 52;
+        let e: u64 = (((self.ex + 1023) & 0x7ff) as u64) << 52;
 
         return r_f * f64::from_bits(e);
     }
@@ -451,13 +336,13 @@ impl DInt64 {
             }
         }
 
-        let sign = self.sign != 0;
+        let sign = self.sign as u8;
         let mut c: u128;
 
         if (self.sign ^ other.sign) != 0 {
             c = ai - bi;
         } else {
-            c = ai + bi;
+            c = ai.wrapping_add(bi);
             if c != 0 {
                 c += c & 0x1;
                 c = (1u128 << 127) | (c >> 1);
@@ -466,14 +351,15 @@ impl DInt64 {
         }
 
         let ex: u64 = if (c >> 64) as u64 != 0 {
-            (c >> 64 as u64).leading_zeros() as u64
+            ((c >> 64) as u64).leading_zeros() as u64
         } else {
             64 + if (c & u64::MAX as u128) != 0 {
                 ((c & u64::MAX as u128) as u64).leading_zeros() as u64
             } else {
                 self.ex as u64
             }
         };
+        c <<= ex;
 
         Self::new(c, m_ex - ex as i64, sign as u64)
     }
@@ -487,11 +373,12 @@ impl DInt64 {
         // code uint64_t l = ((u128)(a->lo) * (u128)(b->lo)) >> 64; m.l += l; m.h +=
         // (m.l < l);
         let (m, ovf) = m1.overflowing_add(m2);
-        t += (ovf as u128) << 64;
-        t += m & ((u64::MAX as u128) << 64);
+        t = t.wrapping_add((ovf as u128) << 64);
+        t = t.wrapping_add(m.hi().widen());
+        // t = t.wrapping_add(m & ((u64::MAX as u128) << 64));
 
         // Ensure that r->hi starts with a 1
-        let ex: u64 = !((t >> (64 + 63)) as u64);
+        let ex: u64 = ((t >> (64 + 63)) == 0) as u64;
         if ex != 0 {
             t <<= 1;
         }
@@ -535,13 +422,30 @@ impl DInt64 {
     }
 
     fn cmp(&self, other: &Self) -> Ordering {
-        if self.ex != other.ex {
-            self.ex.cmp(&other.ex)
-        } else if self.hi != other.hi {
-            self.hi.cmp(&other.hi)
+        let cmp = |a, b| (a > b) as i32 - (a < b) as i32;
+        let cmpu = |a, b| (a > b) as i32 - (a < b) as i32;
+        let r = if cmp(self.ex, other.ex) != 0 {
+            cmp(self.ex, other.ex)
+        } else if cmpu(self.hi, other.hi) != 0 {
+            cmpu(self.hi, other.hi)
         } else {
-            self.lo.cmp(&other.lo)
+            cmpu(self.lo, other.lo)
+        };
+        if r == -1 {
+            Ordering::Less
+        } else if r == 0 {
+            Ordering::Equal
+        } else {
+            Ordering::Greater
         }
+
+        // if self.ex != other.ex {
+        //     self.ex.cmp(&other.ex)
+        // } else if self.hi != other.hi {
+        //     self.hi.cmp(&other.hi)
+        // } else {
+        //     self.lo.cmp(&other.lo)
+        // }
     }
 
     fn pow2(&self) -> Self {
@@ -593,10 +497,10 @@ impl DInt64 {
 // Extract both the significand and exponent of a double
 fn fast_extract(x: f64) -> (i64, u64) {
     let xi = x.to_bits();
-    let mut e = (xi >> 52) & 0x7ff;
+    let e = (xi >> 52) & 0x7ff;
     let m = (xi & (u64::MAX >> 12)) + (if e != 0 { 1u64 << 52 } else { 0 });
-    e = e - 0x3ff;
-    (e as i64, m)
+    let e = e as i64 - 0x3ff;
+    (e, m)
 }
 
 fn fast_two_sum(a: f64, b: f64) -> (f64, f64) {
@@ -619,22 +523,6 @@ fn fast_two_sum(a: f64, b: f64) -> (f64, f64) {
     |(a+b)-(hi+lo)| <= 2^-105 min(|a+b|,|hi|) */
 }
 
-fn __builtin_expect<T>(v: T, _exp: T) -> T {
-    v
-}
-
-fn __builtin_fabs(x: f64) -> f64 {
-    unsafe { core::intrinsics::fabsf64(x) }
-}
-
-fn __builtin_copysign(x: f64, y: f64) -> f64 {
-    unsafe { core::intrinsics::copysignf64(x, y) }
-}
-
-fn __builtin_fma(x: f64, y: f64, z: f64) -> f64 {
-    unsafe { core::intrinsics::fmaf64(x, y, z) }
-}
-
 fn fmaf64(x: f64, y: f64, z: f64) -> f64 {
     #[cfg(intrinsics_enabled)]
     {