1
- /* origin: FreeBSD /usr/src/lib/msun/src/e_log.c */
2
- /*
3
- * ====================================================
4
- * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
5
- *
6
- * Developed at SunSoft, a Sun Microsystems, Inc. business.
7
- * Permission to use, copy, modify, and distribute this
8
- * software is freely granted, provided that this notice
9
- * is preserved.
10
- * ====================================================
11
- */
12
- /* log(x)
13
- * Return the logarithm of x
14
- *
15
- * Method :
16
- * 1. Argument Reduction: find k and f such that
17
- * x = 2^k * (1+f),
18
- * where sqrt(2)/2 < 1+f < sqrt(2) .
19
- *
20
- * 2. Approximation of log(1+f).
21
- * Let s = f/(2+f) ; based on log(1+f) = log(1+s) - log(1-s)
22
- * = 2s + 2/3 s**3 + 2/5 s**5 + .....,
23
- * = 2s + s*R
24
- * We use a special Remez algorithm on [0,0.1716] to generate
25
- * a polynomial of degree 14 to approximate R The maximum error
26
- * of this polynomial approximation is bounded by 2**-58.45. In
27
- * other words,
28
- * 2 4 6 8 10 12 14
29
- * R(z) ~ Lg1*s +Lg2*s +Lg3*s +Lg4*s +Lg5*s +Lg6*s +Lg7*s
30
- * (the values of Lg1 to Lg7 are listed in the program)
31
- * and
32
- * | 2 14 | -58.45
33
- * | Lg1*s +...+Lg7*s - R(z) | <= 2
34
- * | |
35
- * Note that 2s = f - s*f = f - hfsq + s*hfsq, where hfsq = f*f/2.
36
- * In order to guarantee error in log below 1ulp, we compute log
37
- * by
38
- * log(1+f) = f - s*(f - R) (if f is not too large)
39
- * log(1+f) = f - (hfsq - s*(hfsq+R)). (better accuracy)
40
- *
41
- * 3. Finally, log(x) = k*ln2 + log(1+f).
42
- * = k*ln2_hi+(f-(hfsq-(s*(hfsq+R)+k*ln2_lo)))
43
- * Here ln2 is split into two floating point number:
44
- * ln2_hi + ln2_lo,
45
- * where n*ln2_hi is always exact for |n| < 2000.
46
- *
47
- * Special cases:
48
- * log(x) is NaN with signal if x < 0 (including -INF) ;
49
- * log(+INF) is +INF; log(0) is -INF with signal;
50
- * log(NaN) is that NaN with no signal.
51
- *
52
- * Accuracy:
53
- * according to an error analysis, the error is always less than
54
- * 1 ulp (unit in the last place).
55
- *
56
- * Constants:
57
- * The hexadecimal values are the intended ones for the following
58
- * constants. The decimal values may be used, provided that the
59
- * compiler will convert from decimal to binary accurately enough
60
- * to produce the hexadecimal values shown.
61
- */
62
-
63
- /*
64
- Copyright (c) 2022 INRIA and CERN.
65
- Authors: Paul Zimmermann and Tom Hubrecht.
66
-
67
- This file is part of the CORE-MATH project
68
- (https://core-math.gitlabpages.inria.fr/).
1
+ /* SPDX-License-Identifier: MIT */
2
+ /* origin: core-math/src/binary64/cbrt/cbrt.c
3
+ * Copyright (c) 2021-2022 Alexei Sibidanov.
4
+ * Ported to Rust in 2025 by Trevor Gross.
69
5
*/
70
6
71
7
use core:: cmp:: Ordering ;
72
8
73
- const LN2_HI : f64 = 6.93147180369123816490e-01 ; /* 3fe62e42 fee00000 */
74
- const LN2_LO : f64 = 1.90821492927058770002e-10 ; /* 3dea39ef 35793c76 */
75
- const LG1 : f64 = 6.666666666666735130e-01 ; /* 3FE55555 55555593 */
76
- const LG2 : f64 = 3.999999999940941908e-01 ; /* 3FD99999 9997FA04 */
77
- const LG3 : f64 = 2.857142874366239149e-01 ; /* 3FD24924 94229359 */
78
- const LG4 : f64 = 2.222219843214978396e-01 ; /* 3FCC71C5 1D8E78AF */
79
- const LG5 : f64 = 1.818357216161805012e-01 ; /* 3FC74664 96CB03DE */
80
- const LG6 : f64 = 1.531383769920937332e-01 ; /* 3FC39A09 D078C69F */
81
- const LG7 : f64 = 1.479819860511658591e-01 ; /* 3FC2F112 DF3E5244 */
9
+ use super :: support:: { DInt , HInt , cold_path} ;
82
10
83
11
/// The natural logarithm of `x` (f64).
84
12
#[ cfg_attr( all( test, assert_no_panic) , no_panic:: no_panic) ]
85
- pub fn log ( mut x : f64 ) -> f64 {
86
- if true {
87
- return cr_log ( x) ;
88
- }
89
-
90
- let x1p54 = f64:: from_bits ( 0x4350000000000000 ) ; // 0x1p54 === 2 ^ 54
91
-
92
- let mut ui = x. to_bits ( ) ;
93
- let mut hx: u32 = ( ui >> 32 ) as u32 ;
94
- let mut k: i32 = 0 ;
95
-
96
- if ( hx < 0x00100000 ) || ( ( hx >> 31 ) != 0 ) {
97
- /* x < 2**-126 */
98
- if ui << 1 == 0 {
99
- return -1. / ( x * x) ; /* log(+-0)=-inf */
100
- }
101
- if hx >> 31 != 0 {
102
- return ( x - x) / 0.0 ; /* log(-#) = NaN */
103
- }
104
- /* subnormal number, scale x up */
105
- k -= 54 ;
106
- x *= x1p54;
107
- ui = x. to_bits ( ) ;
108
- hx = ( ui >> 32 ) as u32 ;
109
- } else if hx >= 0x7ff00000 {
110
- return x;
111
- } else if hx == 0x3ff00000 && ui << 32 == 0 {
112
- return 0. ;
113
- }
114
-
115
- /* reduce x into [sqrt(2)/2, sqrt(2)] */
116
- hx += 0x3ff00000 - 0x3fe6a09e ;
117
- k += ( ( hx >> 20 ) as i32 ) - 0x3ff ;
118
- hx = ( hx & 0x000fffff ) + 0x3fe6a09e ;
119
- ui = ( ( hx as u64 ) << 32 ) | ( ui & 0xffffffff ) ;
120
- x = f64:: from_bits ( ui) ;
121
-
122
- let f: f64 = x - 1.0 ;
123
- let hfsq: f64 = 0.5 * f * f;
124
- let s: f64 = f / ( 2.0 + f) ;
125
- let z: f64 = s * s;
126
- let w: f64 = z * z;
127
- let t1: f64 = w * ( LG2 + w * ( LG4 + w * LG6 ) ) ;
128
- let t2: f64 = z * ( LG1 + w * ( LG3 + w * ( LG5 + w * LG7 ) ) ) ;
129
- let r: f64 = t2 + t1;
130
- let dk: f64 = k as f64 ;
131
- s * ( hfsq + r) + dk * LN2_LO - hfsq + f + dk * LN2_HI
13
+ pub fn log ( x : f64 ) -> f64 {
14
+ return cr_log ( x) ;
132
15
}
133
16
134
17
fn cr_log ( x : f64 ) -> f64 {
135
18
let mut v = x. to_bits ( ) ;
136
- let mut e: i32 = ( v >> 52 ) . wrapping_sub ( 0x3ff ) as i32 ;
19
+ let mut e: i32 = ( v >> 52 ) as i32 - 0x3ff ;
137
20
if e >= 0x400 || e == -0x3ff {
138
21
/* x <= 0 or NaN/Inf or subnormal */
139
22
if x <= 0.0 {
@@ -152,14 +35,15 @@ fn cr_log(x: f64) -> f64 {
152
35
if e == -0x3ff {
153
36
/* subnormal */
154
37
v = ( f64:: from_bits ( v) * hf64 ! ( "0x1p52" ) ) . to_bits ( ) ;
155
- e = ( ( v >> 52 ) - 0x3ff - 52 ) as i32 ;
38
+ e = ( v >> 52 ) as i32 - 0x3ff - 52 ;
156
39
}
157
40
}
158
41
/* now x > 0 */
159
42
/* normalize v in [1,2) */
160
43
v = ( 0x3ffu64 << 52 ) | ( v & 0xfffffffffffff ) ;
161
44
/* now x = m*2^e with 1 <= m < 2 (m = v.f) and -1074 <= e <= 1023 */
162
- if __builtin_expect ( v == 0x3ff0000000000000u64 && e == 0 , false ) {
45
+ if v == 0x3ff0000000000000u64 && e == 0 {
46
+ cold_path ( ) ;
163
47
return 0.0 ;
164
48
}
165
49
@@ -203,26 +87,26 @@ fn cr_log_fast(mut e: i32, v: u64) -> (f64, f64) {
203
87
let r: f64 = INVERSE [ i as usize - offset] ;
204
88
let l1: f64 = LOG_INV [ i as usize - offset] . 0 ;
205
89
let l2: f64 = LOG_INV [ i as usize - offset] . 1 ;
206
- let z: f64 = __builtin_fma ( r, y, -1.0 ) ; /* exact */
90
+ let z: f64 = fmaf64 ( r, y, -1.0 ) ; /* exact */
207
91
/* evaluate P(z), for |z| < 0.00212097167968735 */
208
92
let mut ph: f64 ; /* will hold the value of P(z)-z */
209
93
let z2: f64 = z * z; /* |z2| < 4.5e-6 thus the rounding error on z2 is
210
94
bounded by ulp(4.5e-6) = 2^-70. */
211
- let p45: f64 = __builtin_fma ( P [ 5 ] , z, P [ 4 ] ) ;
95
+ let p45: f64 = fmaf64 ( P [ 5 ] , z, P [ 4 ] ) ;
212
96
/* |P[5]| < 0.167, |z| < 0.0022, |P[4]| < 0.21 thus |p45| < 0.22:
213
97
the rounding (and total) error on p45 is bounded by ulp(0.22) = 2^-55 */
214
- let p23: f64 = __builtin_fma ( P [ 3 ] , z, P [ 2 ] ) ;
98
+ let p23: f64 = fmaf64 ( P [ 3 ] , z, P [ 2 ] ) ;
215
99
/* |P[3]| < 0.26, |z| < 0.0022, |P[2]| < 0.34 thus |p23| < 0.35:
216
100
the rounding (and total) error on p23 is bounded by ulp(0.35) = 2^-54 */
217
- ph = __builtin_fma ( p45, z2, p23) ;
101
+ ph = fmaf64 ( p45, z2, p23) ;
218
102
/* |p45| < 0.22, |z2| < 4.5e-6, |p23| < 0.35 thus |ph| < 0.36:
219
103
the rounding error on ph is bounded by ulp(0.36) = 2^-54.
220
104
Adding the error on p45 multiplied by z2, that on z2 multiplied by p45,
221
105
and that on p23 (ignoring low order errors), we get for the total error
222
106
on ph the following bound:
223
107
2^-54 + err(p45)*4.5e-6 + 0.22*err(z2) + err(p23) <
224
108
2^-54 + 2^-55*4.5e-6 + 0.22*2^-70 + 2^-54 < 2^-52.99 */
225
- ph = __builtin_fma ( ph, z, P [ 1 ] ) ;
109
+ ph = fmaf64 ( ph, z, P [ 1 ] ) ;
226
110
/* let ph0 be the value at input, and ph1 the value at output:
227
111
|ph0| < 0.36, |z| < 0.0022, |P[1]| < 0.5 thus |ph1| < 0.501:
228
112
the rounding error on ph1 is bounded by ulp(0.501) = 2^-53.
@@ -252,7 +136,7 @@ fn cr_log_fast(mut e: i32, v: u64) -> (f64, f64) {
252
136
representable. */
253
137
254
138
let ee: f64 = e as f64 ;
255
- let ( h, mut l) = fast_two_sum ( __builtin_fma ( ee, log2_h, l1) , z) ;
139
+ let ( h, mut l) = fast_two_sum ( fmaf64 ( ee, log2_h, l1) , z) ;
256
140
/* here |hh+l1|+|z| <= 3275606777621385*2^-42 + 0.0022 < 745
257
141
thus |h| < 745, and the additional error from the fast_two_sum() call is
258
142
bounded by 2^-105*745 < 2^-95.4. */
@@ -265,7 +149,7 @@ fn cr_log_fast(mut e: i32, v: u64) -> (f64, f64) {
265
149
error on ph + ... is bounded by ulp(2^-18.7) = 2^-71, which yields a
266
150
cumulated error bound of 2^-71 + 2^-95 < 2^-70.99. */
267
151
268
- l = __builtin_fma ( ee, log2_l, l) ;
152
+ l = fmaf64 ( ee, log2_l, l) ;
269
153
/* let l_in be the input value of *l, and l_out the output value.
270
154
We have |l_in| < 2^-18.7 (from above)
271
155
and |e*log2_l| <= 1074*0x1.ef35793c7673p-45
@@ -279,7 +163,7 @@ fn cr_log_fast(mut e: i32, v: u64) -> (f64, f64) {
279
163
2^-69.32 from the rounding errors in the polynomial evaluation
280
164
2^-95.4 from the fast_two_sum call
281
165
2^-70.99 from the *l = ph + (*l + l2) instruction
282
- 2^-71 from the last __builtin_fma call.
166
+ 2^-71 from the last fmaf64 call.
283
167
This gives an absolute error bounded by < 2^-68.22.
284
168
*/
285
169
@@ -329,7 +213,8 @@ fn log_2(x: &DInt64) -> DInt64 {
329
213
330
214
x. ex = x. ex - e;
331
215
332
- let mut z = x. mul ( & INVERSE_2 [ ( i - 128 ) as usize ] ) ;
216
+ let inv2 = & INVERSE_2 [ ( i - 128 ) as usize ] ;
217
+ let mut z = x. mul ( inv2) ;
333
218
334
219
z = DInt64 :: M_ONE . add ( & z) ;
335
220
@@ -408,7 +293,7 @@ impl DInt64 {
408
293
/* For log, the result is always in the normal range,
409
294
thus a->ex > -1023. Similarly, we cannot have a->ex > 1023. */
410
295
411
- let e: u64 = ( ( self . ex as u64 + 1023 ) & 0x7ff ) << 52 ;
296
+ let e: u64 = ( ( ( self . ex + 1023 ) & 0x7ff ) as u64 ) << 52 ;
412
297
413
298
return r_f * f64:: from_bits ( e) ;
414
299
}
@@ -451,13 +336,13 @@ impl DInt64 {
451
336
}
452
337
}
453
338
454
- let sign = self . sign != 0 ;
339
+ let sign = self . sign as u8 ;
455
340
let mut c: u128 ;
456
341
457
342
if ( self . sign ^ other. sign ) != 0 {
458
343
c = ai - bi;
459
344
} else {
460
- c = ai + bi ;
345
+ c = ai. wrapping_add ( bi ) ;
461
346
if c != 0 {
462
347
c += c & 0x1 ;
463
348
c = ( 1u128 << 127 ) | ( c >> 1 ) ;
@@ -466,14 +351,15 @@ impl DInt64 {
466
351
}
467
352
468
353
let ex: u64 = if ( c >> 64 ) as u64 != 0 {
469
- ( c >> 64 as u64 ) . leading_zeros ( ) as u64
354
+ ( ( c >> 64 ) as u64 ) . leading_zeros ( ) as u64
470
355
} else {
471
356
64 + if ( c & u64:: MAX as u128 ) != 0 {
472
357
( ( c & u64:: MAX as u128 ) as u64 ) . leading_zeros ( ) as u64
473
358
} else {
474
359
self . ex as u64
475
360
}
476
361
} ;
362
+ c <<= ex;
477
363
478
364
Self :: new ( c, m_ex - ex as i64 , sign as u64 )
479
365
}
@@ -487,11 +373,12 @@ impl DInt64 {
487
373
// code uint64_t l = ((u128)(a->lo) * (u128)(b->lo)) >> 64; m.l += l; m.h +=
488
374
// (m.l < l);
489
375
let ( m, ovf) = m1. overflowing_add ( m2) ;
490
- t += ( ovf as u128 ) << 64 ;
491
- t += m & ( ( u64:: MAX as u128 ) << 64 ) ;
376
+ t = t. wrapping_add ( ( ovf as u128 ) << 64 ) ;
377
+ t = t. wrapping_add ( m. hi ( ) . widen ( ) ) ;
378
+ // t = t.wrapping_add(m & ((u64::MAX as u128) << 64));
492
379
493
380
// Ensure that r->hi starts with a 1
494
- let ex: u64 = ! ( ( t >> ( 64 + 63 ) ) as u64 ) ;
381
+ let ex: u64 = ( ( t >> ( 64 + 63 ) ) == 0 ) as u64 ;
495
382
if ex != 0 {
496
383
t <<= 1 ;
497
384
}
@@ -535,13 +422,30 @@ impl DInt64 {
535
422
}
536
423
537
424
fn cmp ( & self , other : & Self ) -> Ordering {
538
- if self . ex != other. ex {
539
- self . ex . cmp ( & other. ex )
540
- } else if self . hi != other. hi {
541
- self . hi . cmp ( & other. hi )
425
+ let cmp = |a, b| ( a > b) as i32 - ( a < b) as i32 ;
426
+ let cmpu = |a, b| ( a > b) as i32 - ( a < b) as i32 ;
427
+ let r = if cmp ( self . ex , other. ex ) != 0 {
428
+ cmp ( self . ex , other. ex )
429
+ } else if cmpu ( self . hi , other. hi ) != 0 {
430
+ cmpu ( self . hi , other. hi )
542
431
} else {
543
- self . lo . cmp ( & other. lo )
432
+ cmpu ( self . lo , other. lo )
433
+ } ;
434
+ if r == -1 {
435
+ Ordering :: Less
436
+ } else if r == 0 {
437
+ Ordering :: Equal
438
+ } else {
439
+ Ordering :: Greater
544
440
}
441
+
442
+ // if self.ex != other.ex {
443
+ // self.ex.cmp(&other.ex)
444
+ // } else if self.hi != other.hi {
445
+ // self.hi.cmp(&other.hi)
446
+ // } else {
447
+ // self.lo.cmp(&other.lo)
448
+ // }
545
449
}
546
450
547
451
fn pow2 ( & self ) -> Self {
@@ -593,10 +497,10 @@ impl DInt64 {
593
497
// Extract both the significand and exponent of a double
594
498
fn fast_extract ( x : f64 ) -> ( i64 , u64 ) {
595
499
let xi = x. to_bits ( ) ;
596
- let mut e = ( xi >> 52 ) & 0x7ff ;
500
+ let e = ( xi >> 52 ) & 0x7ff ;
597
501
let m = ( xi & ( u64:: MAX >> 12 ) ) + ( if e != 0 { 1u64 << 52 } else { 0 } ) ;
598
- e = e - 0x3ff ;
599
- ( e as i64 , m)
502
+ let e = e as i64 - 0x3ff ;
503
+ ( e, m)
600
504
}
601
505
602
506
fn fast_two_sum ( a : f64 , b : f64 ) -> ( f64 , f64 ) {
@@ -619,22 +523,6 @@ fn fast_two_sum(a: f64, b: f64) -> (f64, f64) {
619
523
|(a+b)-(hi+lo)| <= 2^-105 min(|a+b|,|hi|) */
620
524
}
621
525
622
- fn __builtin_expect < T > ( v : T , _exp : T ) -> T {
623
- v
624
- }
625
-
626
- fn __builtin_fabs ( x : f64 ) -> f64 {
627
- unsafe { core:: intrinsics:: fabsf64 ( x) }
628
- }
629
-
630
- fn __builtin_copysign ( x : f64 , y : f64 ) -> f64 {
631
- unsafe { core:: intrinsics:: copysignf64 ( x, y) }
632
- }
633
-
634
- fn __builtin_fma ( x : f64 , y : f64 , z : f64 ) -> f64 {
635
- unsafe { core:: intrinsics:: fmaf64 ( x, y, z) }
636
- }
637
-
638
526
fn fmaf64 ( x : f64 , y : f64 , z : f64 ) -> f64 {
639
527
#[ cfg( intrinsics_enabled) ]
640
528
{
0 commit comments