Skip to content

Commit 8aae21a

Browse files
committed
Add non-BMI forms of remaining x86 P-384 functions
1 parent 0060c2a commit 8aae21a

39 files changed

+5792
-7
lines changed

arm/p384/bignum_cmul_p384.S

+2
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@
2525
// ----------------------------------------------------------------------------
2626

2727
.globl bignum_cmul_p384
28+
.globl bignum_cmul_p384_alt
2829
.text
2930
.balign 4
3031

@@ -55,6 +56,7 @@
5556

5657

5758
bignum_cmul_p384:
59+
bignum_cmul_p384_alt:
5860

5961
// First do the multiply, straightforwardly, getting [h; d5; ...; d0]
6062

arm/p384/bignum_deamont_p384.S

+2
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@
2727
// ----------------------------------------------------------------------------
2828

2929
.globl bignum_deamont_p384
30+
.globl bignum_deamont_p384_alt
3031
.text
3132
.balign 4
3233

@@ -88,6 +89,7 @@
8889
#define w x10
8990

9091
bignum_deamont_p384:
92+
bignum_deamont_p384_alt:
9193

9294
// Set up an initial window with the input x and an extra leading zero
9395

arm/p384/bignum_demont_p384.S

+2
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@
2727
// ----------------------------------------------------------------------------
2828

2929
.globl bignum_demont_p384
30+
.globl bignum_demont_p384_alt
3031
.text
3132
.balign 4
3233

@@ -88,6 +89,7 @@
8889
#define w x10
8990

9091
bignum_demont_p384:
92+
bignum_demont_p384_alt:
9193

9294
// Set up an initial window with the input x and an extra leading zero
9395

arm/p384/bignum_mod_n384.S

+2
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@
2626
// ----------------------------------------------------------------------------
2727

2828
.globl bignum_mod_n384
29+
.globl bignum_mod_n384_alt
2930
.text
3031
.balign 4
3132

@@ -73,6 +74,7 @@
7374
movk nn, n3, lsl #48
7475

7576
bignum_mod_n384:
77+
bignum_mod_n384_alt:
7678

7779
// If the input is already <= 5 words long, go to a trivial "copy" path
7880

arm/p384/bignum_mod_p384.S

+2
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@
2424
// ----------------------------------------------------------------------------
2525

2626
.globl bignum_mod_p384
27+
.globl bignum_mod_p384_alt
2728
.text
2829
.balign 4
2930

@@ -51,6 +52,7 @@
5152

5253

5354
bignum_mod_p384:
55+
bignum_mod_p384_alt:
5456

5557
// If the input is already <= 5 words long, go to a trivial "copy" path
5658

arm/p384/bignum_tomont_p384.S

+2
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@
2424
// ----------------------------------------------------------------------------
2525

2626
.globl bignum_tomont_p384
27+
.globl bignum_tomont_p384_alt
2728
.text
2829
.balign 4
2930

@@ -70,6 +71,7 @@
7071
adc d5, d5, t3
7172

7273
bignum_tomont_p384:
74+
bignum_tomont_p384_alt:
7375

7476
#define d0 x2
7577
#define d1 x3

arm/p384/bignum_triple_p384.S

+2
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@
2727
// ----------------------------------------------------------------------------
2828

2929
.globl bignum_triple_p384
30+
.globl bignum_triple_p384_alt
3031
.text
3132
.balign 4
3233

@@ -62,6 +63,7 @@
6263

6364

6465
bignum_triple_p384:
66+
bignum_triple_p384_alt:
6567

6668
// Load the inputs
6769

benchmarks/benchmark.c

+19
Original file line numberDiff line numberDiff line change
@@ -219,6 +219,8 @@ void call_bignum_cmul_p256k1_alt(void) repeat(bignum_cmul_p256k1_alt(b0,b1[0],b2
219219

220220
void call_bignum_cmul_p384(void) repeat(bignum_cmul_p384(b0,b1[0],b2))
221221

222+
void call_bignum_cmul_p384_alt(void) repeat(bignum_cmul_p384_alt(b0,b1[0],b2))
223+
222224
void call_bignum_cmul_p521(void) repeat(bignum_cmul_p521(b0,b1[0],b2))
223225

224226
void call_bignum_optneg_p256(void) repeat(bignum_optneg_p256(b0,b1[0],b2))
@@ -237,6 +239,8 @@ void call_bignum_deamont_p256k1(void) repeat(bignum_deamont_p256k1(b0,b1))
237239

238240
void call_bignum_deamont_p384(void) repeat(bignum_deamont_p384(b0,b1))
239241

242+
void call_bignum_deamont_p384_alt(void) repeat(bignum_deamont_p384_alt(b0,b1))
243+
240244
void call_bignum_deamont_p521(void) repeat(bignum_deamont_p521(b0,b1))
241245

242246
void call_bignum_demont_p256(void) repeat(bignum_demont_p256(b0,b1))
@@ -247,6 +251,8 @@ void call_bignum_demont_p256k1(void) repeat(bignum_demont_p256k1(b0,b1))
247251

248252
void call_bignum_demont_p384(void) repeat(bignum_demont_p384(b0,b1))
249253

254+
void call_bignum_demont_p384_alt(void) repeat(bignum_demont_p384_alt(b0,b1))
255+
250256
void call_bignum_demont_p521(void) repeat(bignum_demont_p521(b0,b1))
251257

252258
void call_bignum_tomont_p256(void) repeat(bignum_tomont_p256(b0,b1))
@@ -259,6 +265,8 @@ void call_bignum_tomont_p256k1_alt(void) repeat(bignum_tomont_p256k1_alt(b0,b1))
259265

260266
void call_bignum_tomont_p384(void) repeat(bignum_tomont_p384(b0,b1))
261267

268+
void call_bignum_tomont_p384_alt(void) repeat(bignum_tomont_p384_alt(b0,b1))
269+
262270
void call_bignum_tomont_p521(void) repeat(bignum_tomont_p521(b0,b1))
263271

264272
void call_bignum_double_p256(void) repeat(bignum_double_p256(b0,b1))
@@ -287,6 +295,8 @@ void call_bignum_triple_p256k1_alt(void) repeat(bignum_triple_p256k1_alt(b0,b1))
287295

288296
void call_bignum_triple_p384(void) repeat(bignum_triple_p384(b0,b1))
289297

298+
void call_bignum_triple_p384_alt(void) repeat(bignum_triple_p384_alt(b0,b1))
299+
290300
void call_bignum_triple_p521(void) repeat(bignum_triple_p521(b0,b1))
291301

292302
void call_bignum_montmul_p256(void) repeat(bignum_montmul_p256(b0,b1,b2))
@@ -435,9 +445,11 @@ void call_bignum_mod_p521_9(void) repeat(bignum_mod_p521_9(b0,b1))
435445
void call_bignum_mod_n256__8(void) repeat(bignum_mod_n256(b0,8,b1))
436446
void call_bignum_mod_n256_alt__8(void) repeat(bignum_mod_n256_alt(b0,8,b1))
437447
void call_bignum_mod_n384__12(void) repeat(bignum_mod_n384(b0,12,b1))
448+
void call_bignum_mod_n384_alt__12(void) repeat(bignum_mod_n384_alt(b0,12,b1))
438449
void call_bignum_mod_p256__8(void) repeat(bignum_mod_p256(b0,8,b1))
439450
void call_bignum_mod_p256_alt__8(void) repeat(bignum_mod_p256_alt(b0,8,b1))
440451
void call_bignum_mod_p384__12(void) repeat(bignum_mod_p384(b0,12,b1))
452+
void call_bignum_mod_p384_alt__12(void) repeat(bignum_mod_p384_alt(b0,12,b1))
441453

442454
void call_bignum_nonzero_4(void) repeat(bignum_nonzero_4(b0))
443455
void call_bignum_nonzero_6(void) repeat(bignum_nonzero_6(b0))
@@ -660,6 +672,7 @@ int main(void)
660672
timingtest(bmi,"bignum_cmul_p256k1",call_bignum_cmul_p256k1);
661673
timingtest(all,"bignum_cmul_p256k1_alt",call_bignum_cmul_p256k1_alt);
662674
timingtest(bmi,"bignum_cmul_p384",call_bignum_cmul_p384);
675+
timingtest(all,"bignum_cmul_p384_alt",call_bignum_cmul_p384_alt);
663676
timingtest(bmi,"bignum_cmul_p521",call_bignum_cmul_p521);
664677
timingtest(all,"bignum_coprime (4x4)",call_bignum_coprime__4_4);
665678
timingtest(all,"bignum_coprime (6x6)",call_bignum_coprime__6_6);
@@ -671,12 +684,14 @@ int main(void)
671684
timingtest(all,"bignum_deamont_p256_alt",call_bignum_deamont_p256_alt);
672685
timingtest(all,"bignum_deamont_p256k1",call_bignum_deamont_p256k1);
673686
timingtest(bmi,"bignum_deamont_p384",call_bignum_deamont_p384);
687+
timingtest(all,"bignum_deamont_p384_alt",call_bignum_deamont_p384_alt);
674688
timingtest(all,"bignum_deamont_p521",call_bignum_deamont_p521);
675689
timingtest(all,"bignum_demont (32 -> 32)" ,call_bignum_demont__32);
676690
timingtest(bmi,"bignum_demont_p256",call_bignum_demont_p256);
677691
timingtest(all,"bignum_demont_p256_alt",call_bignum_demont_p256_alt);
678692
timingtest(all,"bignum_demont_p256k1",call_bignum_demont_p256k1);
679693
timingtest(bmi,"bignum_demont_p384",call_bignum_demont_p384);
694+
timingtest(all,"bignum_demont_p384_alt",call_bignum_demont_p384_alt);
680695
timingtest(all,"bignum_demont_p521",call_bignum_demont_p521);
681696
timingtest(all,"bignum_digit (32 -> 1)",call_bignum_digit__32);
682697
timingtest(all,"bignum_digitsize (32)" ,call_bignum_digitsize__32);
@@ -720,13 +735,15 @@ int main(void)
720735
timingtest(all,"bignum_mod_n256_4",call_bignum_mod_n256_4);
721736
timingtest(all,"bignum_mod_n256k1_4",call_bignum_mod_n256k1_4);
722737
timingtest(bmi,"bignum_mod_n384 (12 -> 6)",call_bignum_mod_n384__12);
738+
timingtest(all,"bignum_mod_n384_alt (12 -> 6)",call_bignum_mod_n384_alt__12);
723739
timingtest(all,"bignum_mod_n384_6",call_bignum_mod_n384_6);
724740
timingtest(bmi,"bignum_mod_n521_9",call_bignum_mod_n521_9);
725741
timingtest(bmi,"bignum_mod_p256 (8 -> 4)",call_bignum_mod_p256__8);
726742
timingtest(all,"bignum_mod_p256_alt (8 -> 4)",call_bignum_mod_p256_alt__8);
727743
timingtest(all,"bignum_mod_p256_4",call_bignum_mod_p256_4);
728744
timingtest(all,"bignum_mod_p256k1_4",call_bignum_mod_p256k1_4);
729745
timingtest(bmi,"bignum_mod_p384 (12 -> 6)",call_bignum_mod_p384__12);
746+
timingtest(all,"bignum_mod_p384_alt (12 -> 6)",call_bignum_mod_p384_alt__12);
730747
timingtest(all,"bignum_mod_p384_6",call_bignum_mod_p384_6);
731748
timingtest(all,"bignum_mod_p521_9",call_bignum_mod_p521_9);
732749
timingtest(all,"bignum_modadd (32 -> 32)" ,call_bignum_modadd__32);
@@ -825,12 +842,14 @@ int main(void)
825842
timingtest(bmi,"bignum_tomont_p256k1",call_bignum_tomont_p256k1);
826843
timingtest(all,"bignum_tomont_p256k1_alt",call_bignum_tomont_p256k1_alt);
827844
timingtest(bmi,"bignum_tomont_p384",call_bignum_tomont_p384);
845+
timingtest(all,"bignum_tomont_p384_alt",call_bignum_tomont_p384_alt);
828846
timingtest(all,"bignum_tomont_p521",call_bignum_tomont_p521);
829847
timingtest(bmi,"bignum_triple_p256",call_bignum_triple_p256);
830848
timingtest(all,"bignum_triple_p256_alt",call_bignum_triple_p256_alt);
831849
timingtest(bmi,"bignum_triple_p256k1",call_bignum_triple_p256k1);
832850
timingtest(all,"bignum_triple_p256k1_alt",call_bignum_triple_p256k1_alt);
833851
timingtest(bmi,"bignum_triple_p384",call_bignum_triple_p384);
852+
timingtest(all,"bignum_triple_p384_alt",call_bignum_triple_p384_alt);
834853
timingtest(bmi,"bignum_triple_p521",call_bignum_triple_p521);
835854
timingtest(all,"word_bytereverse",call_word_bytereverse);
836855
timingtest(all,"word_clz",call_word_clz);

include/s2n-bignum-c89.h

+7
Original file line numberDiff line numberDiff line change
@@ -133,6 +133,7 @@ extern void bignum_cmul_p256k1_alt (uint64_t z[4], uint64_t c, uint64_t x[4]);
133133
/* Multiply by a single word modulo p_384, z := (c * x) mod p_384, assuming x reduced */
134134
/* Inputs c, x[6]; output z[6] */
135135
extern void bignum_cmul_p384 (uint64_t z[6], uint64_t c, uint64_t x[6]);
136+
extern void bignum_cmul_p384_alt (uint64_t z[6], uint64_t c, uint64_t x[6]);
136137

137138
/* Multiply by a single word modulo p_521, z := (c * x) mod p_521, assuming x reduced */
138139
/* Inputs c, x[9]; output z[9] */
@@ -166,6 +167,7 @@ extern void bignum_deamont_p256k1 (uint64_t z[4], uint64_t x[4]);
166167
/* Convert from almost-Montgomery form, z := (x / 2^384) mod p_384 */
167168
/* Input x[6]; output z[6] */
168169
extern void bignum_deamont_p384 (uint64_t z[6], uint64_t x[6]);
170+
extern void bignum_deamont_p384_alt (uint64_t z[6], uint64_t x[6]);
169171

170172
/* Convert from almost-Montgomery form z := (x / 2^576) mod p_521 */
171173
/* Input x[9]; output z[9] */
@@ -187,6 +189,7 @@ extern void bignum_demont_p256k1 (uint64_t z[4], uint64_t x[4]);
187189
/* Convert from Montgomery form z := (x / 2^384) mod p_384, assuming x reduced */
188190
/* Input x[6]; output z[6] */
189191
extern void bignum_demont_p384 (uint64_t z[6], uint64_t x[6]);
192+
extern void bignum_demont_p384_alt (uint64_t z[6], uint64_t x[6]);
190193

191194
/* Convert from Montgomery form z := (x / 2^576) mod p_521, assuming x reduced */
192195
/* Input x[9]; output z[9] */
@@ -332,6 +335,7 @@ extern void bignum_mod_n256k1_4 (uint64_t z[4], uint64_t x[4]);
332335
/* Reduce modulo group order, z := x mod n_384 */
333336
/* Input x[k]; output z[6] */
334337
extern void bignum_mod_n384 (uint64_t z[6], uint64_t k, uint64_t *x);
338+
extern void bignum_mod_n384_alt (uint64_t z[6], uint64_t k, uint64_t *x);
335339

336340
/* Reduce modulo group order, z := x mod n_384 */
337341
/* Input x[6]; output z[6] */
@@ -357,6 +361,7 @@ extern void bignum_mod_p256k1_4 (uint64_t z[4], uint64_t x[4]);
357361
/* Reduce modulo field characteristic, z := x mod p_384 */
358362
/* Input x[k]; output z[6] */
359363
extern void bignum_mod_p384 (uint64_t z[6], uint64_t k, uint64_t *x);
364+
extern void bignum_mod_p384_alt (uint64_t z[6], uint64_t k, uint64_t *x);
360365

361366
/* Reduce modulo field characteristic, z := x mod p_384 */
362367
/* Input x[6]; output z[6] */
@@ -655,6 +660,7 @@ extern void bignum_tomont_p256k1_alt (uint64_t z[4], uint64_t x[4]);
655660
/* Convert to Montgomery form z := (2^384 * x) mod p_384 */
656661
/* Input x[6]; output z[6] */
657662
extern void bignum_tomont_p384 (uint64_t z[6], uint64_t x[6]);
663+
extern void bignum_tomont_p384_alt (uint64_t z[6], uint64_t x[6]);
658664

659665
/* Convert to Montgomery form z := (2^576 * x) mod p_521 */
660666
/* Input x[9]; output z[9] */
@@ -673,6 +679,7 @@ extern void bignum_triple_p256k1_alt (uint64_t z[4], uint64_t x[4]);
673679
/* Triple modulo p_384, z := (3 * x) mod p_384 */
674680
/* Input x[6]; output z[6] */
675681
extern void bignum_triple_p384 (uint64_t z[6], uint64_t x[6]);
682+
extern void bignum_triple_p384_alt (uint64_t z[6], uint64_t x[6]);
676683

677684
/* Triple modulo p_521, z := (3 * x) mod p_521, assuming x reduced */
678685
/* Input x[9]; output z[9] */

include/s2n-bignum.h

+7
Original file line numberDiff line numberDiff line change
@@ -132,6 +132,7 @@ extern void bignum_cmul_p256k1_alt (uint64_t z[static 4], uint64_t c, uint64_t x
132132
// Multiply by a single word modulo p_384, z := (c * x) mod p_384, assuming x reduced
133133
// Inputs c, x[6]; output z[6]
134134
extern void bignum_cmul_p384 (uint64_t z[static 6], uint64_t c, uint64_t x[static 6]);
135+
extern void bignum_cmul_p384_alt (uint64_t z[static 6], uint64_t c, uint64_t x[static 6]);
135136

136137
// Multiply by a single word modulo p_521, z := (c * x) mod p_521, assuming x reduced
137138
// Inputs c, x[9]; output z[9]
@@ -165,6 +166,7 @@ extern void bignum_deamont_p256k1 (uint64_t z[static 4], uint64_t x[static 4]);
165166
// Convert from almost-Montgomery form, z := (x / 2^384) mod p_384
166167
// Input x[6]; output z[6]
167168
extern void bignum_deamont_p384 (uint64_t z[static 6], uint64_t x[static 6]);
169+
extern void bignum_deamont_p384_alt (uint64_t z[static 6], uint64_t x[static 6]);
168170

169171
// Convert from almost-Montgomery form z := (x / 2^576) mod p_521
170172
// Input x[9]; output z[9]
@@ -186,6 +188,7 @@ extern void bignum_demont_p256k1 (uint64_t z[static 4], uint64_t x[static 4]);
186188
// Convert from Montgomery form z := (x / 2^384) mod p_384, assuming x reduced
187189
// Input x[6]; output z[6]
188190
extern void bignum_demont_p384 (uint64_t z[static 6], uint64_t x[static 6]);
191+
extern void bignum_demont_p384_alt (uint64_t z[static 6], uint64_t x[static 6]);
189192

190193
// Convert from Montgomery form z := (x / 2^576) mod p_521, assuming x reduced
191194
// Input x[9]; output z[9]
@@ -331,6 +334,7 @@ extern void bignum_mod_n256k1_4 (uint64_t z[static 4], uint64_t x[static 4]);
331334
// Reduce modulo group order, z := x mod n_384
332335
// Input x[k]; output z[6]
333336
extern void bignum_mod_n384 (uint64_t z[static 6], uint64_t k, uint64_t *x);
337+
extern void bignum_mod_n384_alt (uint64_t z[static 6], uint64_t k, uint64_t *x);
334338

335339
// Reduce modulo group order, z := x mod n_384
336340
// Input x[6]; output z[6]
@@ -356,6 +360,7 @@ extern void bignum_mod_p256k1_4 (uint64_t z[static 4], uint64_t x[static 4]);
356360
// Reduce modulo field characteristic, z := x mod p_384
357361
// Input x[k]; output z[6]
358362
extern void bignum_mod_p384 (uint64_t z[static 6], uint64_t k, uint64_t *x);
363+
extern void bignum_mod_p384_alt (uint64_t z[static 6], uint64_t k, uint64_t *x);
359364

360365
// Reduce modulo field characteristic, z := x mod p_384
361366
// Input x[6]; output z[6]
@@ -654,6 +659,7 @@ extern void bignum_tomont_p256k1_alt (uint64_t z[static 4], uint64_t x[static 4]
654659
// Convert to Montgomery form z := (2^384 * x) mod p_384
655660
// Input x[6]; output z[6]
656661
extern void bignum_tomont_p384 (uint64_t z[static 6], uint64_t x[static 6]);
662+
extern void bignum_tomont_p384_alt (uint64_t z[static 6], uint64_t x[static 6]);
657663

658664
// Convert to Montgomery form z := (2^576 * x) mod p_521
659665
// Input x[9]; output z[9]
@@ -672,6 +678,7 @@ extern void bignum_triple_p256k1_alt (uint64_t z[static 4], uint64_t x[static 4]
672678
// Triple modulo p_384, z := (3 * x) mod p_384
673679
// Input x[6]; output z[6]
674680
extern void bignum_triple_p384 (uint64_t z[static 6], uint64_t x[static 6]);
681+
extern void bignum_triple_p384_alt (uint64_t z[static 6], uint64_t x[static 6]);
675682

676683
// Triple modulo p_521, z := (3 * x) mod p_521, assuming x reduced
677684
// Input x[9]; output z[9]

non_ct_functions.txt

+2
Original file line numberDiff line numberDiff line change
@@ -3,4 +3,6 @@ p256/bignum_mod_n256_alto:
33
p256/bignum_mod_p256o:
44
p256/bignum_mod_p256_alto:
55
p384/bignum_mod_n384o:
6+
p384/bignum_mod_n384_alto:
67
p384/bignum_mod_p384o:
8+
p384/bignum_mod_p384_alto:

0 commit comments

Comments
 (0)