@@ -125,12 +125,11 @@ macro_rules! unsigned_fn {
125
125
#[ must_use = "this returns the result of the operation, \
126
126
without modifying the original"]
127
127
pub const fn $UnsignedT( mut n: $UnsignedT) -> $UnsignedT {
128
- let leading_zeros = n. leading_zeros( ) ;
129
- if leading_zeros >= <$HalfBitsT>:: BITS {
128
+ if n <= <$HalfBitsT>:: MAX as $UnsignedT {
130
129
$HalfBitsT( n as $HalfBitsT) as $UnsignedT
131
130
} else {
132
- const EVEN_BITMASK : u32 = u32 :: MAX & !1 ;
133
- let normalization_shift = leading_zeros & EVEN_BITMASK ;
131
+ const EVEN_MAKING_BITMASK : u32 = !1 ;
132
+ let normalization_shift = n . leading_zeros( ) & EVEN_MAKING_BITMASK ;
134
133
n <<= normalization_shift;
135
134
136
135
let s = $stages!( n) ;
@@ -155,6 +154,28 @@ macro_rules! first_stage {
155
154
/// Generates a middle stage of the computation.
156
155
macro_rules! middle_stage {
157
156
( $original_bits: literal, $ty: ty, $n: ident, $s: ident, $r: ident) => { {
157
+ // SAFETY: Inform the optimizer that `$s` is nonzero. This will allow
158
+ // it to avoid generating code to handle division-by-zero panics in the
159
+ // divisions below.
160
+ //
161
+ // If the original `$n` is zero, the top of the `unsigned_fn` macro
162
+ // recurses instead of continuing to this point, so the original `$n`
163
+ // wasn't a 0 if we've reached here.
164
+ //
165
+ // Then the `unsigned_fn` macro normalizes `$n` so that at least one of
166
+ // the two most-significant bits is a 1.
167
+ //
168
+ // Then these stages take as many of the most-significant bits of `$n`
169
+ // that fit in this stage's type. For example, the stage that handles
170
+ // `u32` deals with the 32 most-significant bits of `$n`. This means
171
+ // that each stage has at least one 1 bit in `n`'s two most-significant
172
+ // bits, making `n` nonzero.
173
+ //
174
+ // Then, the stage previous to this produces `$s` as the correct
175
+ // integer square root for the previous type. Since it was taking the
176
+ // integer square root of a nonzero number, `$s` will be nonzero.
177
+ unsafe { crate :: hint:: assert_unchecked( $s != 0 ) } ;
178
+
158
179
const N_SHIFT : u32 = $original_bits - <$ty>:: BITS ;
159
180
let n = ( $n >> N_SHIFT ) as $ty;
160
181
@@ -168,6 +189,7 @@ macro_rules! middle_stage {
168
189
let denominator = ( $s as $ty) << 1 ;
169
190
let q = numerator / denominator;
170
191
let u = numerator % denominator;
192
+
171
193
let mut s = ( $s << QUARTER_BITS ) as $ty + q;
172
194
let ( mut r, overflow) =
173
195
( ( u << QUARTER_BITS ) | ( lo & LOWEST_QUARTER_1_BITS ) ) . overflowing_sub( q * q) ;
@@ -182,13 +204,21 @@ macro_rules! middle_stage {
182
204
/// Generates the last stage of the computation before denormalization.
183
205
macro_rules! last_stage {
184
206
( $ty: ty, $n: ident, $s: ident, $r: ident) => { {
207
+ // SAFETY: Inform the optimizer that `$s` is nonzero. This will allow
208
+ // it to avoid generating code to handle division-by-zero panics in the
209
+ // divisions below.
210
+ //
211
+ // See the proof in the `middle_stage` macro above.
212
+ unsafe { core:: hint:: assert_unchecked( $s != 0 ) } ;
213
+
185
214
const HALF_BITS : u32 = <$ty>:: BITS >> 1 ;
186
215
const QUARTER_BITS : u32 = <$ty>:: BITS >> 2 ;
187
216
const LOWER_HALF_1_BITS : $ty = ( 1 << HALF_BITS ) - 1 ;
188
217
189
218
let lo = $n & LOWER_HALF_1_BITS ;
190
219
let numerator = ( ( $r as $ty) << QUARTER_BITS ) | ( lo >> QUARTER_BITS ) ;
191
220
let denominator = ( $s as $ty) << 1 ;
221
+
192
222
let q = numerator / denominator;
193
223
let mut s = ( $s << QUARTER_BITS ) as $ty + q;
194
224
let ( s_squared, overflow) = s. overflowing_mul( s) ;
0 commit comments