From b351893c5d691fa6edd164b1292ff3e94a302c1d Mon Sep 17 00:00:00 2001 From: Lokathor Date: Wed, 7 Aug 2019 14:06:12 -0600 Subject: [PATCH 1/5] Improve sqrt/sqrtf if stable intrinsics allow --- src/math/sqrt.rs | 252 +++++++++++++++++++++++++--------------------- src/math/sqrtf.rs | 160 ++++++++++++++++------------- 2 files changed, 224 insertions(+), 188 deletions(-) diff --git a/src/math/sqrt.rs b/src/math/sqrt.rs index 58cf00ed8..f01267da7 100644 --- a/src/math/sqrt.rs +++ b/src/math/sqrt.rs @@ -95,128 +95,146 @@ pub fn sqrt(x: f64) -> f64 { } } } - let mut z: f64; - let sign: Wrapping = Wrapping(0x80000000); - let mut ix0: i32; - let mut s0: i32; - let mut q: i32; - let mut m: i32; - let mut t: i32; - let mut i: i32; - let mut r: Wrapping; - let mut t1: Wrapping; - let mut s1: Wrapping; - let mut ix1: Wrapping; - let mut q1: Wrapping; + #[cfg(target_feature="sse2")] + { + // Note(Lokathor): If compile time settings allow, we just use SSE2, since + // the sqrt in `std` on these platforms also compiles down to an SSE2 + // instruction. + #[cfg(target_arch="x86")] + use core::arch::x86::*; + #[cfg(target_arch="x86_64")] + use core::arch::x86_64::*; + unsafe { + let m = _mm_set_sd(x); + let m_sqrt = _mm_sqrt_pd(m); + _mm_cvtsd_f64(m_sqrt) + } + } + #[cfg(not(target_feature="sse2"))] + { + let mut z: f64; + let sign: Wrapping = Wrapping(0x80000000); + let mut ix0: i32; + let mut s0: i32; + let mut q: i32; + let mut m: i32; + let mut t: i32; + let mut i: i32; + let mut r: Wrapping; + let mut t1: Wrapping; + let mut s1: Wrapping; + let mut ix1: Wrapping; + let mut q1: Wrapping; - ix0 = (x.to_bits() >> 32) as i32; - ix1 = Wrapping(x.to_bits() as u32); + ix0 = (x.to_bits() >> 32) as i32; + ix1 = Wrapping(x.to_bits() as u32); - /* take care of Inf and NaN */ - if (ix0 & 0x7ff00000) == 0x7ff00000 { - return x * x + x; /* sqrt(NaN)=NaN, sqrt(+inf)=+inf, sqrt(-inf)=sNaN */ - } - /* take care of zero */ - if ix0 <= 0 { - if ((ix0 & !(sign.0 as i32)) | ix1.0 as i32) == 0 { - return x; /* sqrt(+-0) = +-0 */ - } - if ix0 < 0 { - return (x - x) / (x - x); /* sqrt(-ve) = sNaN */ - } - } - /* normalize x */ - m = ix0 >> 20; - if m == 0 { - /* subnormal x */ - while ix0 == 0 { - m -= 21; - ix0 |= (ix1 >> 11).0 as i32; - ix1 <<= 21; - } - i = 0; - while (ix0 & 0x00100000) == 0 { - i += 1; - ix0 <<= 1; - } - m -= i - 1; - ix0 |= (ix1 >> (32 - i) as usize).0 as i32; - ix1 = ix1 << i as usize; - } - m -= 1023; /* unbias exponent */ - ix0 = (ix0 & 0x000fffff) | 0x00100000; - if (m & 1) == 1 { - /* odd m, double x to make it even */ - ix0 += ix0 + ((ix1 & sign) >> 31).0 as i32; - ix1 += ix1; - } - m >>= 1; /* m = [m/2] */ + /* take care of Inf and NaN */ + if (ix0 & 0x7ff00000) == 0x7ff00000 { + return x * x + x; /* sqrt(NaN)=NaN, sqrt(+inf)=+inf, sqrt(-inf)=sNaN */ + } + /* take care of zero */ + if ix0 <= 0 { + if ((ix0 & !(sign.0 as i32)) | ix1.0 as i32) == 0 { + return x; /* sqrt(+-0) = +-0 */ + } + if ix0 < 0 { + return (x - x) / (x - x); /* sqrt(-ve) = sNaN */ + } + } + /* normalize x */ + m = ix0 >> 20; + if m == 0 { + /* subnormal x */ + while ix0 == 0 { + m -= 21; + ix0 |= (ix1 >> 11).0 as i32; + ix1 <<= 21; + } + i = 0; + while (ix0 & 0x00100000) == 0 { + i += 1; + ix0 <<= 1; + } + m -= i - 1; + ix0 |= (ix1 >> (32 - i) as usize).0 as i32; + ix1 = ix1 << i as usize; + } + m -= 1023; /* unbias exponent */ + ix0 = (ix0 & 0x000fffff) | 0x00100000; + if (m & 1) == 1 { + /* odd m, double x to make it even */ + ix0 += ix0 + ((ix1 & sign) >> 31).0 as i32; + ix1 += ix1; + } + m >>= 1; /* m = [m/2] */ - /* generate sqrt(x) bit by bit */ - ix0 += ix0 + ((ix1 & sign) >> 31).0 as i32; - ix1 += ix1; - q = 0; /* [q,q1] = sqrt(x) */ - q1 = Wrapping(0); - s0 = 0; - s1 = Wrapping(0); - r = Wrapping(0x00200000); /* r = moving bit from right to left */ + /* generate sqrt(x) bit by bit */ + ix0 += ix0 + ((ix1 & sign) >> 31).0 as i32; + ix1 += ix1; + q = 0; /* [q,q1] = sqrt(x) */ + q1 = Wrapping(0); + s0 = 0; + s1 = Wrapping(0); + r = Wrapping(0x00200000); /* r = moving bit from right to left */ - while r != Wrapping(0) { - t = s0 + r.0 as i32; - if t <= ix0 { - s0 = t + r.0 as i32; - ix0 -= t; - q += r.0 as i32; - } - ix0 += ix0 + ((ix1 & sign) >> 31).0 as i32; - ix1 += ix1; - r >>= 1; - } + while r != Wrapping(0) { + t = s0 + r.0 as i32; + if t <= ix0 { + s0 = t + r.0 as i32; + ix0 -= t; + q += r.0 as i32; + } + ix0 += ix0 + ((ix1 & sign) >> 31).0 as i32; + ix1 += ix1; + r >>= 1; + } - r = sign; - while r != Wrapping(0) { - t1 = s1 + r; - t = s0; - if t < ix0 || (t == ix0 && t1 <= ix1) { - s1 = t1 + r; - if (t1 & sign) == sign && (s1 & sign) == Wrapping(0) { - s0 += 1; - } - ix0 -= t; - if ix1 < t1 { - ix0 -= 1; - } - ix1 -= t1; - q1 += r; - } - ix0 += ix0 + ((ix1 & sign) >> 31).0 as i32; - ix1 += ix1; - r >>= 1; - } + r = sign; + while r != Wrapping(0) { + t1 = s1 + r; + t = s0; + if t < ix0 || (t == ix0 && t1 <= ix1) { + s1 = t1 + r; + if (t1 & sign) == sign && (s1 & sign) == Wrapping(0) { + s0 += 1; + } + ix0 -= t; + if ix1 < t1 { + ix0 -= 1; + } + ix1 -= t1; + q1 += r; + } + ix0 += ix0 + ((ix1 & sign) >> 31).0 as i32; + ix1 += ix1; + r >>= 1; + } - /* use floating add to find out rounding direction */ - if (ix0 as u32 | ix1.0) != 0 { - z = 1.0 - TINY; /* raise inexact flag */ - if z >= 1.0 { - z = 1.0 + TINY; - if q1.0 == 0xffffffff { - q1 = Wrapping(0); - q += 1; - } else if z > 1.0 { - if q1.0 == 0xfffffffe { - q += 1; - } - q1 += Wrapping(2); - } else { - q1 += q1 & Wrapping(1); - } - } - } - ix0 = (q >> 1) + 0x3fe00000; - ix1 = q1 >> 1; - if (q & 1) == 1 { - ix1 |= sign; + /* use floating add to find out rounding direction */ + if (ix0 as u32 | ix1.0) != 0 { + z = 1.0 - TINY; /* raise inexact flag */ + if z >= 1.0 { + z = 1.0 + TINY; + if q1.0 == 0xffffffff { + q1 = Wrapping(0); + q += 1; + } else if z > 1.0 { + if q1.0 == 0xfffffffe { + q += 1; + } + q1 += Wrapping(2); + } else { + q1 += q1 & Wrapping(1); + } + } + } + ix0 = (q >> 1) + 0x3fe00000; + ix1 = q1 >> 1; + if (q & 1) == 1 { + ix1 |= sign; + } + ix0 += m << 20; + f64::from_bits((ix0 as u64) << 32 | ix1.0 as u64) } - ix0 += m << 20; - f64::from_bits((ix0 as u64) << 32 | ix1.0 as u64) } diff --git a/src/math/sqrtf.rs b/src/math/sqrtf.rs index 889b52581..a4d9ab53d 100644 --- a/src/math/sqrtf.rs +++ b/src/math/sqrtf.rs @@ -29,83 +29,101 @@ pub fn sqrtf(x: f32) -> f32 { } } } - let mut z: f32; - let sign: i32 = 0x80000000u32 as i32; - let mut ix: i32; - let mut s: i32; - let mut q: i32; - let mut m: i32; - let mut t: i32; - let mut i: i32; - let mut r: u32; + #[cfg(target_feature="sse")] + { + // Note(Lokathor): If compile time settings allow, we just use SSE, since + // the sqrt in `std` on these platforms also compiles down to an SSE + // instruction. + #[cfg(target_arch="x86")] + use core::arch::x86::*; + #[cfg(target_arch="x86_64")] + use core::arch::x86_64::*; + unsafe { + let m = _mm_set_ss(x); + let m_sqrt = _mm_sqrt_ss(m); + _mm_cvtss_f32(m_sqrt) + } + } + #[cfg(not(target_feature="sse"))] + { + let mut z: f32; + let sign: i32 = 0x80000000u32 as i32; + let mut ix: i32; + let mut s: i32; + let mut q: i32; + let mut m: i32; + let mut t: i32; + let mut i: i32; + let mut r: u32; - ix = x.to_bits() as i32; + ix = x.to_bits() as i32; - /* take care of Inf and NaN */ - if (ix as u32 & 0x7f800000) == 0x7f800000 { - return x * x + x; /* sqrt(NaN)=NaN, sqrt(+inf)=+inf, sqrt(-inf)=sNaN */ - } + /* take care of Inf and NaN */ + if (ix as u32 & 0x7f800000) == 0x7f800000 { + return x * x + x; /* sqrt(NaN)=NaN, sqrt(+inf)=+inf, sqrt(-inf)=sNaN */ + } - /* take care of zero */ - if ix <= 0 { - if (ix & !sign) == 0 { - return x; /* sqrt(+-0) = +-0 */ - } - if ix < 0 { - return (x - x) / (x - x); /* sqrt(-ve) = sNaN */ - } - } + /* take care of zero */ + if ix <= 0 { + if (ix & !sign) == 0 { + return x; /* sqrt(+-0) = +-0 */ + } + if ix < 0 { + return (x - x) / (x - x); /* sqrt(-ve) = sNaN */ + } + } - /* normalize x */ - m = ix >> 23; - if m == 0 { - /* subnormal x */ - i = 0; - while ix & 0x00800000 == 0 { - ix <<= 1; - i = i + 1; - } - m -= i - 1; - } - m -= 127; /* unbias exponent */ - ix = (ix & 0x007fffff) | 0x00800000; - if m & 1 == 1 { - /* odd m, double x to make it even */ - ix += ix; - } - m >>= 1; /* m = [m/2] */ + /* normalize x */ + m = ix >> 23; + if m == 0 { + /* subnormal x */ + i = 0; + while ix & 0x00800000 == 0 { + ix <<= 1; + i = i + 1; + } + m -= i - 1; + } + m -= 127; /* unbias exponent */ + ix = (ix & 0x007fffff) | 0x00800000; + if m & 1 == 1 { + /* odd m, double x to make it even */ + ix += ix; + } + m >>= 1; /* m = [m/2] */ - /* generate sqrt(x) bit by bit */ - ix += ix; - q = 0; - s = 0; - r = 0x01000000; /* r = moving bit from right to left */ + /* generate sqrt(x) bit by bit */ + ix += ix; + q = 0; + s = 0; + r = 0x01000000; /* r = moving bit from right to left */ - while r != 0 { - t = s + r as i32; - if t <= ix { - s = t + r as i32; - ix -= t; - q += r as i32; - } - ix += ix; - r >>= 1; - } + while r != 0 { + t = s + r as i32; + if t <= ix { + s = t + r as i32; + ix -= t; + q += r as i32; + } + ix += ix; + r >>= 1; + } - /* use floating add to find out rounding direction */ - if ix != 0 { - z = 1.0 - TINY; /* raise inexact flag */ - if z >= 1.0 { - z = 1.0 + TINY; - if z > 1.0 { - q += 2; - } else { - q += q & 1; - } - } - } + /* use floating add to find out rounding direction */ + if ix != 0 { + z = 1.0 - TINY; /* raise inexact flag */ + if z >= 1.0 { + z = 1.0 + TINY; + if z > 1.0 { + q += 2; + } else { + q += q & 1; + } + } + } - ix = (q >> 1) + 0x3f000000; - ix += m << 23; - f32::from_bits(ix as u32) + ix = (q >> 1) + 0x3f000000; + ix += m << 23; + f32::from_bits(ix as u32) + } } From 519c5d6cdd6485eb7e14c8fbd66f29b4433d233e Mon Sep 17 00:00:00 2001 From: Lokathor Date: Wed, 7 Aug 2019 14:10:34 -0600 Subject: [PATCH 2/5] apply rustfmt --- src/math/sqrt.rs | 264 +++++++++++++++++++++++----------------------- src/math/sqrtf.rs | 170 ++++++++++++++--------------- 2 files changed, 217 insertions(+), 217 deletions(-) diff --git a/src/math/sqrt.rs b/src/math/sqrt.rs index f01267da7..2fb1b24b7 100644 --- a/src/math/sqrt.rs +++ b/src/math/sqrt.rs @@ -95,146 +95,146 @@ pub fn sqrt(x: f64) -> f64 { } } } - #[cfg(target_feature="sse2")] + #[cfg(target_feature = "sse2")] { - // Note(Lokathor): If compile time settings allow, we just use SSE2, since - // the sqrt in `std` on these platforms also compiles down to an SSE2 - // instruction. - #[cfg(target_arch="x86")] - use core::arch::x86::*; - #[cfg(target_arch="x86_64")] - use core::arch::x86_64::*; - unsafe { - let m = _mm_set_sd(x); - let m_sqrt = _mm_sqrt_pd(m); - _mm_cvtsd_f64(m_sqrt) - } + // Note(Lokathor): If compile time settings allow, we just use SSE2, since + // the sqrt in `std` on these platforms also compiles down to an SSE2 + // instruction. + #[cfg(target_arch = "x86")] + use core::arch::x86::*; + #[cfg(target_arch = "x86_64")] + use core::arch::x86_64::*; + unsafe { + let m = _mm_set_sd(x); + let m_sqrt = _mm_sqrt_pd(m); + _mm_cvtsd_f64(m_sqrt) + } } - #[cfg(not(target_feature="sse2"))] + #[cfg(not(target_feature = "sse2"))] { - let mut z: f64; - let sign: Wrapping = Wrapping(0x80000000); - let mut ix0: i32; - let mut s0: i32; - let mut q: i32; - let mut m: i32; - let mut t: i32; - let mut i: i32; - let mut r: Wrapping; - let mut t1: Wrapping; - let mut s1: Wrapping; - let mut ix1: Wrapping; - let mut q1: Wrapping; + let mut z: f64; + let sign: Wrapping = Wrapping(0x80000000); + let mut ix0: i32; + let mut s0: i32; + let mut q: i32; + let mut m: i32; + let mut t: i32; + let mut i: i32; + let mut r: Wrapping; + let mut t1: Wrapping; + let mut s1: Wrapping; + let mut ix1: Wrapping; + let mut q1: Wrapping; - ix0 = (x.to_bits() >> 32) as i32; - ix1 = Wrapping(x.to_bits() as u32); + ix0 = (x.to_bits() >> 32) as i32; + ix1 = Wrapping(x.to_bits() as u32); - /* take care of Inf and NaN */ - if (ix0 & 0x7ff00000) == 0x7ff00000 { - return x * x + x; /* sqrt(NaN)=NaN, sqrt(+inf)=+inf, sqrt(-inf)=sNaN */ - } - /* take care of zero */ - if ix0 <= 0 { - if ((ix0 & !(sign.0 as i32)) | ix1.0 as i32) == 0 { - return x; /* sqrt(+-0) = +-0 */ - } - if ix0 < 0 { - return (x - x) / (x - x); /* sqrt(-ve) = sNaN */ - } - } - /* normalize x */ - m = ix0 >> 20; - if m == 0 { - /* subnormal x */ - while ix0 == 0 { - m -= 21; - ix0 |= (ix1 >> 11).0 as i32; - ix1 <<= 21; - } - i = 0; - while (ix0 & 0x00100000) == 0 { - i += 1; - ix0 <<= 1; - } - m -= i - 1; - ix0 |= (ix1 >> (32 - i) as usize).0 as i32; - ix1 = ix1 << i as usize; - } - m -= 1023; /* unbias exponent */ - ix0 = (ix0 & 0x000fffff) | 0x00100000; - if (m & 1) == 1 { - /* odd m, double x to make it even */ - ix0 += ix0 + ((ix1 & sign) >> 31).0 as i32; - ix1 += ix1; - } - m >>= 1; /* m = [m/2] */ + /* take care of Inf and NaN */ + if (ix0 & 0x7ff00000) == 0x7ff00000 { + return x * x + x; /* sqrt(NaN)=NaN, sqrt(+inf)=+inf, sqrt(-inf)=sNaN */ + } + /* take care of zero */ + if ix0 <= 0 { + if ((ix0 & !(sign.0 as i32)) | ix1.0 as i32) == 0 { + return x; /* sqrt(+-0) = +-0 */ + } + if ix0 < 0 { + return (x - x) / (x - x); /* sqrt(-ve) = sNaN */ + } + } + /* normalize x */ + m = ix0 >> 20; + if m == 0 { + /* subnormal x */ + while ix0 == 0 { + m -= 21; + ix0 |= (ix1 >> 11).0 as i32; + ix1 <<= 21; + } + i = 0; + while (ix0 & 0x00100000) == 0 { + i += 1; + ix0 <<= 1; + } + m -= i - 1; + ix0 |= (ix1 >> (32 - i) as usize).0 as i32; + ix1 = ix1 << i as usize; + } + m -= 1023; /* unbias exponent */ + ix0 = (ix0 & 0x000fffff) | 0x00100000; + if (m & 1) == 1 { + /* odd m, double x to make it even */ + ix0 += ix0 + ((ix1 & sign) >> 31).0 as i32; + ix1 += ix1; + } + m >>= 1; /* m = [m/2] */ - /* generate sqrt(x) bit by bit */ - ix0 += ix0 + ((ix1 & sign) >> 31).0 as i32; - ix1 += ix1; - q = 0; /* [q,q1] = sqrt(x) */ - q1 = Wrapping(0); - s0 = 0; - s1 = Wrapping(0); - r = Wrapping(0x00200000); /* r = moving bit from right to left */ + /* generate sqrt(x) bit by bit */ + ix0 += ix0 + ((ix1 & sign) >> 31).0 as i32; + ix1 += ix1; + q = 0; /* [q,q1] = sqrt(x) */ + q1 = Wrapping(0); + s0 = 0; + s1 = Wrapping(0); + r = Wrapping(0x00200000); /* r = moving bit from right to left */ - while r != Wrapping(0) { - t = s0 + r.0 as i32; - if t <= ix0 { - s0 = t + r.0 as i32; - ix0 -= t; - q += r.0 as i32; - } - ix0 += ix0 + ((ix1 & sign) >> 31).0 as i32; - ix1 += ix1; - r >>= 1; - } + while r != Wrapping(0) { + t = s0 + r.0 as i32; + if t <= ix0 { + s0 = t + r.0 as i32; + ix0 -= t; + q += r.0 as i32; + } + ix0 += ix0 + ((ix1 & sign) >> 31).0 as i32; + ix1 += ix1; + r >>= 1; + } - r = sign; - while r != Wrapping(0) { - t1 = s1 + r; - t = s0; - if t < ix0 || (t == ix0 && t1 <= ix1) { - s1 = t1 + r; - if (t1 & sign) == sign && (s1 & sign) == Wrapping(0) { - s0 += 1; - } - ix0 -= t; - if ix1 < t1 { - ix0 -= 1; - } - ix1 -= t1; - q1 += r; - } - ix0 += ix0 + ((ix1 & sign) >> 31).0 as i32; - ix1 += ix1; - r >>= 1; - } + r = sign; + while r != Wrapping(0) { + t1 = s1 + r; + t = s0; + if t < ix0 || (t == ix0 && t1 <= ix1) { + s1 = t1 + r; + if (t1 & sign) == sign && (s1 & sign) == Wrapping(0) { + s0 += 1; + } + ix0 -= t; + if ix1 < t1 { + ix0 -= 1; + } + ix1 -= t1; + q1 += r; + } + ix0 += ix0 + ((ix1 & sign) >> 31).0 as i32; + ix1 += ix1; + r >>= 1; + } - /* use floating add to find out rounding direction */ - if (ix0 as u32 | ix1.0) != 0 { - z = 1.0 - TINY; /* raise inexact flag */ - if z >= 1.0 { - z = 1.0 + TINY; - if q1.0 == 0xffffffff { - q1 = Wrapping(0); - q += 1; - } else if z > 1.0 { - if q1.0 == 0xfffffffe { - q += 1; - } - q1 += Wrapping(2); - } else { - q1 += q1 & Wrapping(1); - } - } - } - ix0 = (q >> 1) + 0x3fe00000; - ix1 = q1 >> 1; - if (q & 1) == 1 { - ix1 |= sign; - } - ix0 += m << 20; - f64::from_bits((ix0 as u64) << 32 | ix1.0 as u64) + /* use floating add to find out rounding direction */ + if (ix0 as u32 | ix1.0) != 0 { + z = 1.0 - TINY; /* raise inexact flag */ + if z >= 1.0 { + z = 1.0 + TINY; + if q1.0 == 0xffffffff { + q1 = Wrapping(0); + q += 1; + } else if z > 1.0 { + if q1.0 == 0xfffffffe { + q += 1; + } + q1 += Wrapping(2); + } else { + q1 += q1 & Wrapping(1); + } + } + } + ix0 = (q >> 1) + 0x3fe00000; + ix1 = q1 >> 1; + if (q & 1) == 1 { + ix1 |= sign; + } + ix0 += m << 20; + f64::from_bits((ix0 as u64) << 32 | ix1.0 as u64) } } diff --git a/src/math/sqrtf.rs b/src/math/sqrtf.rs index a4d9ab53d..5fe0a7744 100644 --- a/src/math/sqrtf.rs +++ b/src/math/sqrtf.rs @@ -29,101 +29,101 @@ pub fn sqrtf(x: f32) -> f32 { } } } - #[cfg(target_feature="sse")] + #[cfg(target_feature = "sse")] { - // Note(Lokathor): If compile time settings allow, we just use SSE, since - // the sqrt in `std` on these platforms also compiles down to an SSE - // instruction. - #[cfg(target_arch="x86")] - use core::arch::x86::*; - #[cfg(target_arch="x86_64")] - use core::arch::x86_64::*; - unsafe { - let m = _mm_set_ss(x); - let m_sqrt = _mm_sqrt_ss(m); - _mm_cvtss_f32(m_sqrt) - } + // Note(Lokathor): If compile time settings allow, we just use SSE, since + // the sqrt in `std` on these platforms also compiles down to an SSE + // instruction. + #[cfg(target_arch = "x86")] + use core::arch::x86::*; + #[cfg(target_arch = "x86_64")] + use core::arch::x86_64::*; + unsafe { + let m = _mm_set_ss(x); + let m_sqrt = _mm_sqrt_ss(m); + _mm_cvtss_f32(m_sqrt) + } } - #[cfg(not(target_feature="sse"))] + #[cfg(not(target_feature = "sse"))] { - let mut z: f32; - let sign: i32 = 0x80000000u32 as i32; - let mut ix: i32; - let mut s: i32; - let mut q: i32; - let mut m: i32; - let mut t: i32; - let mut i: i32; - let mut r: u32; + let mut z: f32; + let sign: i32 = 0x80000000u32 as i32; + let mut ix: i32; + let mut s: i32; + let mut q: i32; + let mut m: i32; + let mut t: i32; + let mut i: i32; + let mut r: u32; - ix = x.to_bits() as i32; + ix = x.to_bits() as i32; - /* take care of Inf and NaN */ - if (ix as u32 & 0x7f800000) == 0x7f800000 { - return x * x + x; /* sqrt(NaN)=NaN, sqrt(+inf)=+inf, sqrt(-inf)=sNaN */ - } + /* take care of Inf and NaN */ + if (ix as u32 & 0x7f800000) == 0x7f800000 { + return x * x + x; /* sqrt(NaN)=NaN, sqrt(+inf)=+inf, sqrt(-inf)=sNaN */ + } - /* take care of zero */ - if ix <= 0 { - if (ix & !sign) == 0 { - return x; /* sqrt(+-0) = +-0 */ - } - if ix < 0 { - return (x - x) / (x - x); /* sqrt(-ve) = sNaN */ - } - } + /* take care of zero */ + if ix <= 0 { + if (ix & !sign) == 0 { + return x; /* sqrt(+-0) = +-0 */ + } + if ix < 0 { + return (x - x) / (x - x); /* sqrt(-ve) = sNaN */ + } + } - /* normalize x */ - m = ix >> 23; - if m == 0 { - /* subnormal x */ - i = 0; - while ix & 0x00800000 == 0 { - ix <<= 1; - i = i + 1; - } - m -= i - 1; - } - m -= 127; /* unbias exponent */ - ix = (ix & 0x007fffff) | 0x00800000; - if m & 1 == 1 { - /* odd m, double x to make it even */ - ix += ix; - } - m >>= 1; /* m = [m/2] */ + /* normalize x */ + m = ix >> 23; + if m == 0 { + /* subnormal x */ + i = 0; + while ix & 0x00800000 == 0 { + ix <<= 1; + i = i + 1; + } + m -= i - 1; + } + m -= 127; /* unbias exponent */ + ix = (ix & 0x007fffff) | 0x00800000; + if m & 1 == 1 { + /* odd m, double x to make it even */ + ix += ix; + } + m >>= 1; /* m = [m/2] */ - /* generate sqrt(x) bit by bit */ - ix += ix; - q = 0; - s = 0; - r = 0x01000000; /* r = moving bit from right to left */ + /* generate sqrt(x) bit by bit */ + ix += ix; + q = 0; + s = 0; + r = 0x01000000; /* r = moving bit from right to left */ - while r != 0 { - t = s + r as i32; - if t <= ix { - s = t + r as i32; - ix -= t; - q += r as i32; - } - ix += ix; - r >>= 1; - } + while r != 0 { + t = s + r as i32; + if t <= ix { + s = t + r as i32; + ix -= t; + q += r as i32; + } + ix += ix; + r >>= 1; + } - /* use floating add to find out rounding direction */ - if ix != 0 { - z = 1.0 - TINY; /* raise inexact flag */ - if z >= 1.0 { - z = 1.0 + TINY; - if z > 1.0 { - q += 2; - } else { - q += q & 1; - } - } - } + /* use floating add to find out rounding direction */ + if ix != 0 { + z = 1.0 - TINY; /* raise inexact flag */ + if z >= 1.0 { + z = 1.0 + TINY; + if z > 1.0 { + q += 2; + } else { + q += q & 1; + } + } + } - ix = (q >> 1) + 0x3f000000; - ix += m << 23; - f32::from_bits(ix as u32) + ix = (q >> 1) + 0x3f000000; + ix += m << 23; + f32::from_bits(ix as u32) } } From a9fa14dd6a2ce1264c7504ca1b05ab7cf8976329 Mon Sep 17 00:00:00 2001 From: Lokathor Date: Wed, 7 Aug 2019 14:16:10 -0600 Subject: [PATCH 3/5] move use/const statements to a limited scope --- src/math/sqrt.rs | 7 ++++--- src/math/sqrtf.rs | 4 ++-- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/src/math/sqrt.rs b/src/math/sqrt.rs index 2fb1b24b7..8a67cb18b 100644 --- a/src/math/sqrt.rs +++ b/src/math/sqrt.rs @@ -77,9 +77,6 @@ */ use core::f64; -use core::num::Wrapping; - -const TINY: f64 = 1.0e-300; #[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] pub fn sqrt(x: f64) -> f64 { @@ -112,6 +109,10 @@ pub fn sqrt(x: f64) -> f64 { } #[cfg(not(target_feature = "sse2"))] { + use core::num::Wrapping; + + const TINY: f64 = 1.0e-300; + let mut z: f64; let sign: Wrapping = Wrapping(0x80000000); let mut ix0: i32; diff --git a/src/math/sqrtf.rs b/src/math/sqrtf.rs index 5fe0a7744..cb3c1672e 100644 --- a/src/math/sqrtf.rs +++ b/src/math/sqrtf.rs @@ -13,8 +13,6 @@ * ==================================================== */ -const TINY: f32 = 1.0e-30; - #[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] pub fn sqrtf(x: f32) -> f32 { // On wasm32 we know that LLVM's intrinsic will compile to an optimized @@ -46,6 +44,8 @@ pub fn sqrtf(x: f32) -> f32 { } #[cfg(not(target_feature = "sse"))] { + const TINY: f32 = 1.0e-30; + let mut z: f32; let sign: i32 = 0x80000000u32 as i32; let mut ix: i32; From a3d55e685dd332eadccbda822401d383ac5f536d Mon Sep 17 00:00:00 2001 From: Lokathor Date: Thu, 8 Aug 2019 18:21:10 -0600 Subject: [PATCH 4/5] update comments --- src/math/sqrtf.rs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/math/sqrtf.rs b/src/math/sqrtf.rs index cb3c1672e..1d5b78e84 100644 --- a/src/math/sqrtf.rs +++ b/src/math/sqrtf.rs @@ -29,9 +29,9 @@ pub fn sqrtf(x: f32) -> f32 { } #[cfg(target_feature = "sse")] { - // Note(Lokathor): If compile time settings allow, we just use SSE, since - // the sqrt in `std` on these platforms also compiles down to an SSE - // instruction. + // Note: This path is unlikely since LLVM will usually have already + // optimized sqrt calls into hardware instructions if sse is available, + // but if someone does end up here they'll apprected the speed increase. #[cfg(target_arch = "x86")] use core::arch::x86::*; #[cfg(target_arch = "x86_64")] From b0f666e1a218740db09d7d6efb6a610be470225c Mon Sep 17 00:00:00 2001 From: Lokathor Date: Thu, 8 Aug 2019 18:21:18 -0600 Subject: [PATCH 5/5] update comments --- src/math/sqrt.rs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/math/sqrt.rs b/src/math/sqrt.rs index 8a67cb18b..31afe3356 100644 --- a/src/math/sqrt.rs +++ b/src/math/sqrt.rs @@ -94,9 +94,9 @@ pub fn sqrt(x: f64) -> f64 { } #[cfg(target_feature = "sse2")] { - // Note(Lokathor): If compile time settings allow, we just use SSE2, since - // the sqrt in `std` on these platforms also compiles down to an SSE2 - // instruction. + // Note: This path is unlikely since LLVM will usually have already + // optimized sqrt calls into hardware instructions if sse2 is available, + // but if someone does end up here they'll apprected the speed increase. #[cfg(target_arch = "x86")] use core::arch::x86::*; #[cfg(target_arch = "x86_64")]