Skip to content

Commit f9e1459

Browse files
committed
Fix dint arithmetic
1 parent 35fb10b commit f9e1459

File tree

1 file changed

+55
-167
lines changed

1 file changed

+55
-167
lines changed

src/math/log.rs

+55-167
Original file line numberDiff line numberDiff line change
@@ -1,139 +1,22 @@
1-
/* origin: FreeBSD /usr/src/lib/msun/src/e_log.c */
2-
/*
3-
* ====================================================
4-
* Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
5-
*
6-
* Developed at SunSoft, a Sun Microsystems, Inc. business.
7-
* Permission to use, copy, modify, and distribute this
8-
* software is freely granted, provided that this notice
9-
* is preserved.
10-
* ====================================================
11-
*/
12-
/* log(x)
13-
* Return the logarithm of x
14-
*
15-
* Method :
16-
* 1. Argument Reduction: find k and f such that
17-
* x = 2^k * (1+f),
18-
* where sqrt(2)/2 < 1+f < sqrt(2) .
19-
*
20-
* 2. Approximation of log(1+f).
21-
* Let s = f/(2+f) ; based on log(1+f) = log(1+s) - log(1-s)
22-
* = 2s + 2/3 s**3 + 2/5 s**5 + .....,
23-
* = 2s + s*R
24-
* We use a special Remez algorithm on [0,0.1716] to generate
25-
* a polynomial of degree 14 to approximate R The maximum error
26-
* of this polynomial approximation is bounded by 2**-58.45. In
27-
* other words,
28-
* 2 4 6 8 10 12 14
29-
* R(z) ~ Lg1*s +Lg2*s +Lg3*s +Lg4*s +Lg5*s +Lg6*s +Lg7*s
30-
* (the values of Lg1 to Lg7 are listed in the program)
31-
* and
32-
* | 2 14 | -58.45
33-
* | Lg1*s +...+Lg7*s - R(z) | <= 2
34-
* | |
35-
* Note that 2s = f - s*f = f - hfsq + s*hfsq, where hfsq = f*f/2.
36-
* In order to guarantee error in log below 1ulp, we compute log
37-
* by
38-
* log(1+f) = f - s*(f - R) (if f is not too large)
39-
* log(1+f) = f - (hfsq - s*(hfsq+R)). (better accuracy)
40-
*
41-
* 3. Finally, log(x) = k*ln2 + log(1+f).
42-
* = k*ln2_hi+(f-(hfsq-(s*(hfsq+R)+k*ln2_lo)))
43-
* Here ln2 is split into two floating point number:
44-
* ln2_hi + ln2_lo,
45-
* where n*ln2_hi is always exact for |n| < 2000.
46-
*
47-
* Special cases:
48-
* log(x) is NaN with signal if x < 0 (including -INF) ;
49-
* log(+INF) is +INF; log(0) is -INF with signal;
50-
* log(NaN) is that NaN with no signal.
51-
*
52-
* Accuracy:
53-
* according to an error analysis, the error is always less than
54-
* 1 ulp (unit in the last place).
55-
*
56-
* Constants:
57-
* The hexadecimal values are the intended ones for the following
58-
* constants. The decimal values may be used, provided that the
59-
* compiler will convert from decimal to binary accurately enough
60-
* to produce the hexadecimal values shown.
61-
*/
62-
63-
/*
64-
Copyright (c) 2022 INRIA and CERN.
65-
Authors: Paul Zimmermann and Tom Hubrecht.
66-
67-
This file is part of the CORE-MATH project
68-
(https://core-math.gitlabpages.inria.fr/).
1+
/* SPDX-License-Identifier: MIT */
2+
/* origin: core-math/src/binary64/cbrt/cbrt.c
3+
* Copyright (c) 2021-2022 Alexei Sibidanov.
4+
* Ported to Rust in 2025 by Trevor Gross.
695
*/
706

717
use core::cmp::Ordering;
728

73-
const LN2_HI: f64 = 6.93147180369123816490e-01; /* 3fe62e42 fee00000 */
74-
const LN2_LO: f64 = 1.90821492927058770002e-10; /* 3dea39ef 35793c76 */
75-
const LG1: f64 = 6.666666666666735130e-01; /* 3FE55555 55555593 */
76-
const LG2: f64 = 3.999999999940941908e-01; /* 3FD99999 9997FA04 */
77-
const LG3: f64 = 2.857142874366239149e-01; /* 3FD24924 94229359 */
78-
const LG4: f64 = 2.222219843214978396e-01; /* 3FCC71C5 1D8E78AF */
79-
const LG5: f64 = 1.818357216161805012e-01; /* 3FC74664 96CB03DE */
80-
const LG6: f64 = 1.531383769920937332e-01; /* 3FC39A09 D078C69F */
81-
const LG7: f64 = 1.479819860511658591e-01; /* 3FC2F112 DF3E5244 */
9+
use super::support::{DInt, HInt, cold_path};
8210

8311
/// The natural logarithm of `x` (f64).
8412
#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
85-
pub fn log(mut x: f64) -> f64 {
86-
if true {
87-
return cr_log(x);
88-
}
89-
90-
let x1p54 = f64::from_bits(0x4350000000000000); // 0x1p54 === 2 ^ 54
91-
92-
let mut ui = x.to_bits();
93-
let mut hx: u32 = (ui >> 32) as u32;
94-
let mut k: i32 = 0;
95-
96-
if (hx < 0x00100000) || ((hx >> 31) != 0) {
97-
/* x < 2**-126 */
98-
if ui << 1 == 0 {
99-
return -1. / (x * x); /* log(+-0)=-inf */
100-
}
101-
if hx >> 31 != 0 {
102-
return (x - x) / 0.0; /* log(-#) = NaN */
103-
}
104-
/* subnormal number, scale x up */
105-
k -= 54;
106-
x *= x1p54;
107-
ui = x.to_bits();
108-
hx = (ui >> 32) as u32;
109-
} else if hx >= 0x7ff00000 {
110-
return x;
111-
} else if hx == 0x3ff00000 && ui << 32 == 0 {
112-
return 0.;
113-
}
114-
115-
/* reduce x into [sqrt(2)/2, sqrt(2)] */
116-
hx += 0x3ff00000 - 0x3fe6a09e;
117-
k += ((hx >> 20) as i32) - 0x3ff;
118-
hx = (hx & 0x000fffff) + 0x3fe6a09e;
119-
ui = ((hx as u64) << 32) | (ui & 0xffffffff);
120-
x = f64::from_bits(ui);
121-
122-
let f: f64 = x - 1.0;
123-
let hfsq: f64 = 0.5 * f * f;
124-
let s: f64 = f / (2.0 + f);
125-
let z: f64 = s * s;
126-
let w: f64 = z * z;
127-
let t1: f64 = w * (LG2 + w * (LG4 + w * LG6));
128-
let t2: f64 = z * (LG1 + w * (LG3 + w * (LG5 + w * LG7)));
129-
let r: f64 = t2 + t1;
130-
let dk: f64 = k as f64;
131-
s * (hfsq + r) + dk * LN2_LO - hfsq + f + dk * LN2_HI
13+
pub fn log(x: f64) -> f64 {
14+
return cr_log(x);
13215
}
13316

13417
fn cr_log(x: f64) -> f64 {
13518
let mut v = x.to_bits();
136-
let mut e: i32 = (v >> 52).wrapping_sub(0x3ff) as i32;
19+
let mut e: i32 = (v >> 52) as i32 - 0x3ff;
13720
if e >= 0x400 || e == -0x3ff {
13821
/* x <= 0 or NaN/Inf or subnormal */
13922
if x <= 0.0 {
@@ -152,14 +35,15 @@ fn cr_log(x: f64) -> f64 {
15235
if e == -0x3ff {
15336
/* subnormal */
15437
v = (f64::from_bits(v) * hf64!("0x1p52")).to_bits();
155-
e = ((v >> 52) - 0x3ff - 52) as i32;
38+
e = (v >> 52) as i32 - 0x3ff - 52;
15639
}
15740
}
15841
/* now x > 0 */
15942
/* normalize v in [1,2) */
16043
v = (0x3ffu64 << 52) | (v & 0xfffffffffffff);
16144
/* now x = m*2^e with 1 <= m < 2 (m = v.f) and -1074 <= e <= 1023 */
162-
if __builtin_expect(v == 0x3ff0000000000000u64 && e == 0, false) {
45+
if v == 0x3ff0000000000000u64 && e == 0 {
46+
cold_path();
16347
return 0.0;
16448
}
16549

@@ -203,26 +87,26 @@ fn cr_log_fast(mut e: i32, v: u64) -> (f64, f64) {
20387
let r: f64 = INVERSE[i as usize - offset];
20488
let l1: f64 = LOG_INV[i as usize - offset].0;
20589
let l2: f64 = LOG_INV[i as usize - offset].1;
206-
let z: f64 = __builtin_fma(r, y, -1.0); /* exact */
90+
let z: f64 = fmaf64(r, y, -1.0); /* exact */
20791
/* evaluate P(z), for |z| < 0.00212097167968735 */
20892
let mut ph: f64; /* will hold the value of P(z)-z */
20993
let z2: f64 = z * z; /* |z2| < 4.5e-6 thus the rounding error on z2 is
21094
bounded by ulp(4.5e-6) = 2^-70. */
211-
let p45: f64 = __builtin_fma(P[5], z, P[4]);
95+
let p45: f64 = fmaf64(P[5], z, P[4]);
21296
/* |P[5]| < 0.167, |z| < 0.0022, |P[4]| < 0.21 thus |p45| < 0.22:
21397
the rounding (and total) error on p45 is bounded by ulp(0.22) = 2^-55 */
214-
let p23: f64 = __builtin_fma(P[3], z, P[2]);
98+
let p23: f64 = fmaf64(P[3], z, P[2]);
21599
/* |P[3]| < 0.26, |z| < 0.0022, |P[2]| < 0.34 thus |p23| < 0.35:
216100
the rounding (and total) error on p23 is bounded by ulp(0.35) = 2^-54 */
217-
ph = __builtin_fma(p45, z2, p23);
101+
ph = fmaf64(p45, z2, p23);
218102
/* |p45| < 0.22, |z2| < 4.5e-6, |p23| < 0.35 thus |ph| < 0.36:
219103
the rounding error on ph is bounded by ulp(0.36) = 2^-54.
220104
Adding the error on p45 multiplied by z2, that on z2 multiplied by p45,
221105
and that on p23 (ignoring low order errors), we get for the total error
222106
on ph the following bound:
223107
2^-54 + err(p45)*4.5e-6 + 0.22*err(z2) + err(p23) <
224108
2^-54 + 2^-55*4.5e-6 + 0.22*2^-70 + 2^-54 < 2^-52.99 */
225-
ph = __builtin_fma(ph, z, P[1]);
109+
ph = fmaf64(ph, z, P[1]);
226110
/* let ph0 be the value at input, and ph1 the value at output:
227111
|ph0| < 0.36, |z| < 0.0022, |P[1]| < 0.5 thus |ph1| < 0.501:
228112
the rounding error on ph1 is bounded by ulp(0.501) = 2^-53.
@@ -252,7 +136,7 @@ fn cr_log_fast(mut e: i32, v: u64) -> (f64, f64) {
252136
representable. */
253137

254138
let ee: f64 = e as f64;
255-
let (h, mut l) = fast_two_sum(__builtin_fma(ee, log2_h, l1), z);
139+
let (h, mut l) = fast_two_sum(fmaf64(ee, log2_h, l1), z);
256140
/* here |hh+l1|+|z| <= 3275606777621385*2^-42 + 0.0022 < 745
257141
thus |h| < 745, and the additional error from the fast_two_sum() call is
258142
bounded by 2^-105*745 < 2^-95.4. */
@@ -265,7 +149,7 @@ fn cr_log_fast(mut e: i32, v: u64) -> (f64, f64) {
265149
error on ph + ... is bounded by ulp(2^-18.7) = 2^-71, which yields a
266150
cumulated error bound of 2^-71 + 2^-95 < 2^-70.99. */
267151

268-
l = __builtin_fma(ee, log2_l, l);
152+
l = fmaf64(ee, log2_l, l);
269153
/* let l_in be the input value of *l, and l_out the output value.
270154
We have |l_in| < 2^-18.7 (from above)
271155
and |e*log2_l| <= 1074*0x1.ef35793c7673p-45
@@ -279,7 +163,7 @@ fn cr_log_fast(mut e: i32, v: u64) -> (f64, f64) {
279163
2^-69.32 from the rounding errors in the polynomial evaluation
280164
2^-95.4 from the fast_two_sum call
281165
2^-70.99 from the *l = ph + (*l + l2) instruction
282-
2^-71 from the last __builtin_fma call.
166+
2^-71 from the last fmaf64 call.
283167
This gives an absolute error bounded by < 2^-68.22.
284168
*/
285169

@@ -329,7 +213,8 @@ fn log_2(x: &DInt64) -> DInt64 {
329213

330214
x.ex = x.ex - e;
331215

332-
let mut z = x.mul(&INVERSE_2[(i - 128) as usize]);
216+
let inv2 = &INVERSE_2[(i - 128) as usize];
217+
let mut z = x.mul(inv2);
333218

334219
z = DInt64::M_ONE.add(&z);
335220

@@ -408,7 +293,7 @@ impl DInt64 {
408293
/* For log, the result is always in the normal range,
409294
thus a->ex > -1023. Similarly, we cannot have a->ex > 1023. */
410295

411-
let e: u64 = ((self.ex as u64 + 1023) & 0x7ff) << 52;
296+
let e: u64 = (((self.ex + 1023) & 0x7ff) as u64) << 52;
412297

413298
return r_f * f64::from_bits(e);
414299
}
@@ -451,13 +336,13 @@ impl DInt64 {
451336
}
452337
}
453338

454-
let sign = self.sign != 0;
339+
let sign = self.sign as u8;
455340
let mut c: u128;
456341

457342
if (self.sign ^ other.sign) != 0 {
458343
c = ai - bi;
459344
} else {
460-
c = ai + bi;
345+
c = ai.wrapping_add(bi);
461346
if c != 0 {
462347
c += c & 0x1;
463348
c = (1u128 << 127) | (c >> 1);
@@ -466,14 +351,15 @@ impl DInt64 {
466351
}
467352

468353
let ex: u64 = if (c >> 64) as u64 != 0 {
469-
(c >> 64 as u64).leading_zeros() as u64
354+
((c >> 64) as u64).leading_zeros() as u64
470355
} else {
471356
64 + if (c & u64::MAX as u128) != 0 {
472357
((c & u64::MAX as u128) as u64).leading_zeros() as u64
473358
} else {
474359
self.ex as u64
475360
}
476361
};
362+
c <<= ex;
477363

478364
Self::new(c, m_ex - ex as i64, sign as u64)
479365
}
@@ -487,11 +373,12 @@ impl DInt64 {
487373
// code uint64_t l = ((u128)(a->lo) * (u128)(b->lo)) >> 64; m.l += l; m.h +=
488374
// (m.l < l);
489375
let (m, ovf) = m1.overflowing_add(m2);
490-
t += (ovf as u128) << 64;
491-
t += m & ((u64::MAX as u128) << 64);
376+
t = t.wrapping_add((ovf as u128) << 64);
377+
t = t.wrapping_add(m.hi().widen());
378+
// t = t.wrapping_add(m & ((u64::MAX as u128) << 64));
492379

493380
// Ensure that r->hi starts with a 1
494-
let ex: u64 = !((t >> (64 + 63)) as u64);
381+
let ex: u64 = ((t >> (64 + 63)) == 0) as u64;
495382
if ex != 0 {
496383
t <<= 1;
497384
}
@@ -535,13 +422,30 @@ impl DInt64 {
535422
}
536423

537424
fn cmp(&self, other: &Self) -> Ordering {
538-
if self.ex != other.ex {
539-
self.ex.cmp(&other.ex)
540-
} else if self.hi != other.hi {
541-
self.hi.cmp(&other.hi)
425+
let cmp = |a, b| (a > b) as i32 - (a < b) as i32;
426+
let cmpu = |a, b| (a > b) as i32 - (a < b) as i32;
427+
let r = if cmp(self.ex, other.ex) != 0 {
428+
cmp(self.ex, other.ex)
429+
} else if cmpu(self.hi, other.hi) != 0 {
430+
cmpu(self.hi, other.hi)
542431
} else {
543-
self.lo.cmp(&other.lo)
432+
cmpu(self.lo, other.lo)
433+
};
434+
if r == -1 {
435+
Ordering::Less
436+
} else if r == 0 {
437+
Ordering::Equal
438+
} else {
439+
Ordering::Greater
544440
}
441+
442+
// if self.ex != other.ex {
443+
// self.ex.cmp(&other.ex)
444+
// } else if self.hi != other.hi {
445+
// self.hi.cmp(&other.hi)
446+
// } else {
447+
// self.lo.cmp(&other.lo)
448+
// }
545449
}
546450

547451
fn pow2(&self) -> Self {
@@ -593,10 +497,10 @@ impl DInt64 {
593497
// Extract both the significand and exponent of a double
594498
fn fast_extract(x: f64) -> (i64, u64) {
595499
let xi = x.to_bits();
596-
let mut e = (xi >> 52) & 0x7ff;
500+
let e = (xi >> 52) & 0x7ff;
597501
let m = (xi & (u64::MAX >> 12)) + (if e != 0 { 1u64 << 52 } else { 0 });
598-
e = e - 0x3ff;
599-
(e as i64, m)
502+
let e = e as i64 - 0x3ff;
503+
(e, m)
600504
}
601505

602506
fn fast_two_sum(a: f64, b: f64) -> (f64, f64) {
@@ -619,22 +523,6 @@ fn fast_two_sum(a: f64, b: f64) -> (f64, f64) {
619523
|(a+b)-(hi+lo)| <= 2^-105 min(|a+b|,|hi|) */
620524
}
621525

622-
fn __builtin_expect<T>(v: T, _exp: T) -> T {
623-
v
624-
}
625-
626-
fn __builtin_fabs(x: f64) -> f64 {
627-
unsafe { core::intrinsics::fabsf64(x) }
628-
}
629-
630-
fn __builtin_copysign(x: f64, y: f64) -> f64 {
631-
unsafe { core::intrinsics::copysignf64(x, y) }
632-
}
633-
634-
fn __builtin_fma(x: f64, y: f64, z: f64) -> f64 {
635-
unsafe { core::intrinsics::fmaf64(x, y, z) }
636-
}
637-
638526
fn fmaf64(x: f64, y: f64, z: f64) -> f64 {
639527
#[cfg(intrinsics_enabled)]
640528
{

0 commit comments

Comments
 (0)