p256/arithmetic/scalar/
scalar64.rs

1//! 64-bit secp256r1 scalar field algorithms.
2
3use super::{MODULUS, MU};
4use crate::{
5    arithmetic::util::{adc, mac, sbb},
6    U256,
7};
8
9/// Barrett Reduction
10///
11/// The general algorithm is:
12/// ```text
13/// p = n = order of group
14/// b = 2^64 = 64bit machine word
15/// k = 4
16/// a \in [0, 2^512]
17/// mu := floor(b^{2k} / p)
18/// q1 := floor(a / b^{k - 1})
19/// q2 := q1 * mu
20/// q3 := <- floor(a / b^{k - 1})
21/// r1 := a mod b^{k + 1}
22/// r2 := q3 * m mod b^{k + 1}
23/// r := r1 - r2
24///
25/// if r < 0: r := r + b^{k + 1}
26/// while r >= p: do r := r - p (at most twice)
27/// ```
28///
29/// References:
30/// - Handbook of Applied Cryptography, Chapter 14
31///   Algorithm 14.42
32///   http://cacr.uwaterloo.ca/hac/about/chap14.pdf
33///
34/// - Efficient and Secure Elliptic Curve Cryptography Implementation of Curve P-256
35///   Algorithm 6) Barrett Reduction modulo p
36///   https://csrc.nist.gov/csrc/media/events/workshop-on-elliptic-curve-cryptography-standards/documents/papers/session6-adalier-mehmet.pdf
37#[inline]
38#[allow(clippy::too_many_arguments)]
39pub(super) const fn barrett_reduce(lo: U256, hi: U256) -> U256 {
40    let lo = lo.as_words();
41    let hi = hi.as_words();
42    let a0 = lo[0];
43    let a1 = lo[1];
44    let a2 = lo[2];
45    let a3 = lo[3];
46    let a4 = hi[0];
47    let a5 = hi[1];
48    let a6 = hi[2];
49    let a7 = hi[3];
50    let q1: [u64; 5] = [a3, a4, a5, a6, a7];
51    let q3 = q1_times_mu_shift_five(&q1);
52
53    let r1: [u64; 5] = [a0, a1, a2, a3, a4];
54    let r2: [u64; 5] = q3_times_n_keep_five(&q3);
55    let r: [u64; 5] = sub_inner_five(r1, r2);
56
57    // Result is in range (0, 3*n - 1),
58    // and 90% of the time, no subtraction will be needed.
59    let r = subtract_n_if_necessary(r[0], r[1], r[2], r[3], r[4]);
60    let r = subtract_n_if_necessary(r[0], r[1], r[2], r[3], r[4]);
61    U256::from_words([r[0], r[1], r[2], r[3]])
62}
63
64const fn q1_times_mu_shift_five(q1: &[u64; 5]) -> [u64; 5] {
65    // Schoolbook multiplication.
66
67    let (_w0, carry) = mac(0, q1[0], MU[0], 0);
68    let (w1, carry) = mac(0, q1[0], MU[1], carry);
69    let (w2, carry) = mac(0, q1[0], MU[2], carry);
70    let (w3, carry) = mac(0, q1[0], MU[3], carry);
71    let (w4, w5) = mac(0, q1[0], MU[4], carry);
72
73    let (_w1, carry) = mac(w1, q1[1], MU[0], 0);
74    let (w2, carry) = mac(w2, q1[1], MU[1], carry);
75    let (w3, carry) = mac(w3, q1[1], MU[2], carry);
76    let (w4, carry) = mac(w4, q1[1], MU[3], carry);
77    let (w5, w6) = mac(w5, q1[1], MU[4], carry);
78
79    let (_w2, carry) = mac(w2, q1[2], MU[0], 0);
80    let (w3, carry) = mac(w3, q1[2], MU[1], carry);
81    let (w4, carry) = mac(w4, q1[2], MU[2], carry);
82    let (w5, carry) = mac(w5, q1[2], MU[3], carry);
83    let (w6, w7) = mac(w6, q1[2], MU[4], carry);
84
85    let (_w3, carry) = mac(w3, q1[3], MU[0], 0);
86    let (w4, carry) = mac(w4, q1[3], MU[1], carry);
87    let (w5, carry) = mac(w5, q1[3], MU[2], carry);
88    let (w6, carry) = mac(w6, q1[3], MU[3], carry);
89    let (w7, w8) = mac(w7, q1[3], MU[4], carry);
90
91    let (_w4, carry) = mac(w4, q1[4], MU[0], 0);
92    let (w5, carry) = mac(w5, q1[4], MU[1], carry);
93    let (w6, carry) = mac(w6, q1[4], MU[2], carry);
94    let (w7, carry) = mac(w7, q1[4], MU[3], carry);
95    let (w8, w9) = mac(w8, q1[4], MU[4], carry);
96
97    // let q2 = [_w0, _w1, _w2, _w3, _w4, w5, w6, w7, w8, w9];
98    [w5, w6, w7, w8, w9]
99}
100
101const fn q3_times_n_keep_five(q3: &[u64; 5]) -> [u64; 5] {
102    // Schoolbook multiplication.
103
104    let modulus = MODULUS.as_words();
105
106    let (w0, carry) = mac(0, q3[0], modulus[0], 0);
107    let (w1, carry) = mac(0, q3[0], modulus[1], carry);
108    let (w2, carry) = mac(0, q3[0], modulus[2], carry);
109    let (w3, carry) = mac(0, q3[0], modulus[3], carry);
110    let (w4, _) = mac(0, q3[0], 0, carry);
111
112    let (w1, carry) = mac(w1, q3[1], modulus[0], 0);
113    let (w2, carry) = mac(w2, q3[1], modulus[1], carry);
114    let (w3, carry) = mac(w3, q3[1], modulus[2], carry);
115    let (w4, _) = mac(w4, q3[1], modulus[3], carry);
116
117    let (w2, carry) = mac(w2, q3[2], modulus[0], 0);
118    let (w3, carry) = mac(w3, q3[2], modulus[1], carry);
119    let (w4, _) = mac(w4, q3[2], modulus[2], carry);
120
121    let (w3, carry) = mac(w3, q3[3], modulus[0], 0);
122    let (w4, _) = mac(w4, q3[3], modulus[1], carry);
123
124    let (w4, _) = mac(w4, q3[4], modulus[0], 0);
125
126    [w0, w1, w2, w3, w4]
127}
128
129#[inline]
130#[allow(clippy::too_many_arguments)]
131const fn sub_inner_five(l: [u64; 5], r: [u64; 5]) -> [u64; 5] {
132    let (w0, borrow) = sbb(l[0], r[0], 0);
133    let (w1, borrow) = sbb(l[1], r[1], borrow);
134    let (w2, borrow) = sbb(l[2], r[2], borrow);
135    let (w3, borrow) = sbb(l[3], r[3], borrow);
136    let (w4, _borrow) = sbb(l[4], r[4], borrow);
137
138    // If underflow occurred on the final limb - don't care (= add b^{k+1}).
139    [w0, w1, w2, w3, w4]
140}
141
142#[inline]
143#[allow(clippy::too_many_arguments)]
144const fn subtract_n_if_necessary(r0: u64, r1: u64, r2: u64, r3: u64, r4: u64) -> [u64; 5] {
145    let modulus = MODULUS.as_words();
146
147    let (w0, borrow) = sbb(r0, modulus[0], 0);
148    let (w1, borrow) = sbb(r1, modulus[1], borrow);
149    let (w2, borrow) = sbb(r2, modulus[2], borrow);
150    let (w3, borrow) = sbb(r3, modulus[3], borrow);
151    let (w4, borrow) = sbb(r4, 0, borrow);
152
153    // If underflow occurred on the final limb, borrow = 0xfff...fff, otherwise
154    // borrow = 0x000...000. Thus, we use it as a mask to conditionally add the
155    // modulus.
156    let (w0, carry) = adc(w0, modulus[0] & borrow, 0);
157    let (w1, carry) = adc(w1, modulus[1] & borrow, carry);
158    let (w2, carry) = adc(w2, modulus[2] & borrow, carry);
159    let (w3, carry) = adc(w3, modulus[3] & borrow, carry);
160    let (w4, _carry) = adc(w4, 0, carry);
161
162    [w0, w1, w2, w3, w4]
163}