Skip to content

Commit 226f3bc

Browse files
committed
Minor improvements to isqrt behind-the-scenes functions
* Eliminated generation of code that handles division-by-zero panics where the denominator can't be zero. * Renamed `EVEN_BITMASK` to `EVEN_MAKING_BITMASK` for clarity. * Simplified computation that generates `EVEN_MAKING_BITMASK` * `unsigned_fn` macro now has code that makes it clearer that recursion is happening when `n` fits in the next smaller type.
1 parent f4bbacc commit 226f3bc

File tree

1 file changed

+34
-4
lines changed

1 file changed

+34
-4
lines changed

library/core/src/num/int_sqrt.rs

Lines changed: 34 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -125,12 +125,11 @@ macro_rules! unsigned_fn {
125125
#[must_use = "this returns the result of the operation, \
126126
without modifying the original"]
127127
pub const fn $UnsignedT(mut n: $UnsignedT) -> $UnsignedT {
128-
let leading_zeros = n.leading_zeros();
129-
if leading_zeros >= <$HalfBitsT>::BITS {
128+
if n <= <$HalfBitsT>::MAX as $UnsignedT {
130129
$HalfBitsT(n as $HalfBitsT) as $UnsignedT
131130
} else {
132-
const EVEN_BITMASK: u32 = u32::MAX & !1;
133-
let normalization_shift = leading_zeros & EVEN_BITMASK;
131+
const EVEN_MAKING_BITMASK: u32 = !1;
132+
let normalization_shift = n.leading_zeros() & EVEN_MAKING_BITMASK;
134133
n <<= normalization_shift;
135134

136135
let s = $stages!(n);
@@ -155,6 +154,28 @@ macro_rules! first_stage {
155154
/// Generates a middle stage of the computation.
156155
macro_rules! middle_stage {
157156
($original_bits:literal, $ty:ty, $n:ident, $s:ident, $r:ident) => {{
157+
// SAFETY: Inform the optimizer that `$s` is nonzero. This will allow
158+
// it to avoid generating code to handle division-by-zero panics in the
159+
// divisions below.
160+
//
161+
// If the original `$n` is zero, the top of the `unsigned_fn` macro
162+
// recurses instead of continuing to this point, so the original `$n`
163+
// wasn't a 0 if we've reached here.
164+
//
165+
// Then the `unsigned_fn` macro normalizes `$n` so that at least one of
166+
// the two most-significant bits is a 1.
167+
//
168+
// Then these stages take as many of the most-significant bits of `$n`
169+
// that fit in this stage's type. For example, the stage that handles
170+
// `u32` deals with the 32 most-significant bits of `$n`. This means
171+
// that each stage has at least one 1 bit in `n`'s two most-significant
172+
// bits, making `n` nonzero.
173+
//
174+
// Then, the stage previous to this produces `$s` as the correct
175+
// integer square root for the previous type. Since it was taking the
176+
// integer square root of a nonzero number, `$s` will be nonzero.
177+
unsafe { crate::hint::assert_unchecked($s != 0) };
178+
158179
const N_SHIFT: u32 = $original_bits - <$ty>::BITS;
159180
let n = ($n >> N_SHIFT) as $ty;
160181

@@ -168,6 +189,7 @@ macro_rules! middle_stage {
168189
let denominator = ($s as $ty) << 1;
169190
let q = numerator / denominator;
170191
let u = numerator % denominator;
192+
171193
let mut s = ($s << QUARTER_BITS) as $ty + q;
172194
let (mut r, overflow) =
173195
((u << QUARTER_BITS) | (lo & LOWEST_QUARTER_1_BITS)).overflowing_sub(q * q);
@@ -182,13 +204,21 @@ macro_rules! middle_stage {
182204
/// Generates the last stage of the computation before denormalization.
183205
macro_rules! last_stage {
184206
($ty:ty, $n:ident, $s:ident, $r:ident) => {{
207+
// SAFETY: Inform the optimizer that `$s` is nonzero. This will allow
208+
// it to avoid generating code to handle division-by-zero panics in the
209+
// divisions below.
210+
//
211+
// See the proof in the `middle_stage` macro above.
212+
unsafe { core::hint::assert_unchecked($s != 0) };
213+
185214
const HALF_BITS: u32 = <$ty>::BITS >> 1;
186215
const QUARTER_BITS: u32 = <$ty>::BITS >> 2;
187216
const LOWER_HALF_1_BITS: $ty = (1 << HALF_BITS) - 1;
188217

189218
let lo = $n & LOWER_HALF_1_BITS;
190219
let numerator = (($r as $ty) << QUARTER_BITS) | (lo >> QUARTER_BITS);
191220
let denominator = ($s as $ty) << 1;
221+
192222
let q = numerator / denominator;
193223
let mut s = ($s << QUARTER_BITS) as $ty + q;
194224
let (s_squared, overflow) = s.overflowing_mul(s);

0 commit comments

Comments
 (0)