bigint: Save one modular doubling in Montgomery RR setup.

Eliminate one modular doubling in Montgomery RR setup. This saves one
public modulus modular doubling per RSA signature verification, at the
cost of approximately one public-modulus-wide XOR. RsaKeyPair also sees
similar savings per Modulus.
This commit is contained in:
Brian Smith 2023-11-11 16:30:25 -08:00
parent 0349d2a332
commit 81e17e4b10
2 changed files with 26 additions and 4 deletions

View File

@ -289,10 +289,19 @@ impl<M> One<M, RR> {
let m_bits = m.len_bits().as_usize_bits(); let m_bits = m.len_bits().as_usize_bits();
let r = (m_bits + (LIMB_BITS - 1)) / LIMB_BITS * LIMB_BITS; let r = (m_bits + (LIMB_BITS - 1)) / LIMB_BITS * LIMB_BITS;
// base = 2**(lg m - 1). // base = 2**r - m.
let bit = m_bits - 1;
let mut base = m.zero(); let mut base = m.zero();
base.limbs[bit / LIMB_BITS] = 1 << (bit % LIMB_BITS); limb::limbs_negative_odd(&mut base.limbs, m.limbs());
// Correct base to 2**(lg m) (mod m).
let lg_m = m.len_bits().as_usize_bits();
let leading_zero_bits_in_m = r - lg_m;
if leading_zero_bits_in_m != 0 {
debug_assert!(leading_zero_bits_in_m < LIMB_BITS);
// `limbs_negative_odd` flipped all the leading zero bits to ones.
// Flip them back.
*base.limbs.last_mut().unwrap() &= (!0) >> leading_zero_bits_in_m;
}
// Double `base` so that base == R == 2**r (mod m). For normal moduli // Double `base` so that base == R == 2**r (mod m). For normal moduli
// that have the high bit of the highest limb set, this requires one // that have the high bit of the highest limb set, this requires one
@ -312,7 +321,7 @@ impl<M> One<M, RR> {
const LG_BASE: usize = 2; // Doubling vs. squaring trade-off. const LG_BASE: usize = 2; // Doubling vs. squaring trade-off.
debug_assert_eq!(LG_BASE.count_ones(), 1); // Must be 2**n for n >= 0. debug_assert_eq!(LG_BASE.count_ones(), 1); // Must be 2**n for n >= 0.
let doublings = r - bit + LG_BASE; let doublings = leading_zero_bits_in_m + LG_BASE;
// `m_bits >= LG_BASE` (for the currently chosen value of `LG_BASE`) // `m_bits >= LG_BASE` (for the currently chosen value of `LG_BASE`)
// since we require the modulus to have at least `MODULUS_MIN_LIMBS` // since we require the modulus to have at least `MODULUS_MIN_LIMBS`
// limbs. `r >= m_bits` as seen above. So `r >= LG_BASE` and thus // limbs. `r >= m_bits` as seen above. So `r >= LG_BASE` and thus

View File

@ -350,6 +350,19 @@ pub(crate) fn limbs_add_assign_mod(a: &mut [Limb], b: &[Limb], m: &[Limb]) {
unsafe { LIMBS_add_mod(a.as_mut_ptr(), a.as_ptr(), b.as_ptr(), m.as_ptr(), m.len()) } unsafe { LIMBS_add_mod(a.as_mut_ptr(), a.as_ptr(), b.as_ptr(), m.as_ptr(), m.len()) }
} }
// *r = -a, assuming a is odd.
pub(crate) fn limbs_negative_odd(r: &mut [Limb], a: &[Limb]) {
debug_assert_eq!(r.len(), a.len());
// Two's complement step 1: flip all the bits.
// The compiler should optimize this to vectorized (a ^ !0).
r.iter_mut().zip(a.iter()).for_each(|(r, &a)| {
*r = !a;
});
// Two's complement step 2: Add one. Since `a` is odd, `r` is even. Thus we
// can use a bitwise or for addition.
r[0] |= 1;
}
prefixed_extern! { prefixed_extern! {
fn LIMBS_are_zero(a: *const Limb, num_limbs: c::size_t) -> LimbMask; fn LIMBS_are_zero(a: *const Limb, num_limbs: c::size_t) -> LimbMask;
fn LIMBS_less_than(a: *const Limb, b: *const Limb, num_limbs: c::size_t) -> LimbMask; fn LIMBS_less_than(a: *const Limb, b: *const Limb, num_limbs: c::size_t) -> LimbMask;