bigint: Split Modulus
(and PartialModulus
) into a submodule.
Better encapsulate `Modulus` and `PartialModulus`. `git diff HEAD^1:src/arithmetic/bigint.rs src/arithmetic/bigint/modulus.rs`
This commit is contained in:
parent
dc47d5f3a7
commit
7ab206e423
@ -36,13 +36,16 @@
|
||||
//! [Static checking of units in Servo]:
|
||||
//! https://blog.mozilla.org/research/2014/06/23/static-checking-of-units-in-servo/
|
||||
|
||||
pub(crate) use self::private_exponent::PrivateExponent;
|
||||
pub(crate) use self::{
|
||||
modulus::{Modulus, PartialModulus, MODULUS_MAX_LIMBS},
|
||||
private_exponent::PrivateExponent,
|
||||
};
|
||||
pub(crate) use super::nonnegative::Nonnegative;
|
||||
use crate::{
|
||||
arithmetic::montgomery::*,
|
||||
bits, bssl, c, cpu, error,
|
||||
limb::{self, Limb, LimbMask, LIMB_BITS, LIMB_BYTES},
|
||||
polyfill::{u64_from_usize, LeadingZerosStripped},
|
||||
polyfill::u64_from_usize,
|
||||
};
|
||||
use alloc::{borrow::ToOwned as _, boxed::Box, vec};
|
||||
use core::{
|
||||
@ -52,6 +55,7 @@ use core::{
|
||||
};
|
||||
|
||||
mod bn_mul_mont_fallback;
|
||||
mod modulus;
|
||||
mod private_exponent;
|
||||
|
||||
/// A prime modulus.
|
||||
@ -140,7 +144,7 @@ impl<M> BoxedLimbs<M> {
|
||||
) -> Result<Self, error::Unspecified> {
|
||||
let mut r = Self::zero(m.width());
|
||||
limb::parse_big_endian_and_pad_consttime(input, &mut r)?;
|
||||
if limb::limbs_less_than_limbs_consttime(&r, &m.limbs) != LimbMask::True {
|
||||
if limb::limbs_less_than_limbs_consttime(&r, m.limbs()) != LimbMask::True {
|
||||
return Err(error::Unspecified);
|
||||
}
|
||||
Ok(r)
|
||||
@ -205,239 +209,6 @@ pub unsafe trait NotMuchSmallerModulus<L>: SmallerModulus<L> {}
|
||||
|
||||
pub trait PublicModulus {}
|
||||
|
||||
/// The x86 implementation of `bn_mul_mont`, at least, requires at least 4
|
||||
/// limbs. For a long time we have required 4 limbs for all targets, though
|
||||
/// this may be unnecessary. TODO: Replace this with
|
||||
/// `n.len() < 256 / LIMB_BITS` so that 32-bit and 64-bit platforms behave the
|
||||
/// same.
|
||||
pub const MODULUS_MIN_LIMBS: usize = 4;
|
||||
|
||||
pub const MODULUS_MAX_LIMBS: usize = 8192 / LIMB_BITS;
|
||||
|
||||
/// The modulus *m* for a ring ℤ/mℤ, along with the precomputed values needed
|
||||
/// for efficient Montgomery multiplication modulo *m*. The value must be odd
|
||||
/// and larger than 2. The larger-than-1 requirement is imposed, at least, by
|
||||
/// the modular inversion code.
|
||||
pub struct Modulus<M> {
|
||||
limbs: BoxedLimbs<M>, // Also `value >= 3`.
|
||||
|
||||
// n0 * N == -1 (mod r).
|
||||
//
|
||||
// r == 2**(N0_LIMBS_USED * LIMB_BITS) and LG_LITTLE_R == lg(r). This
|
||||
// ensures that we can do integer division by |r| by simply ignoring
|
||||
// `N0_LIMBS_USED` limbs. Similarly, we can calculate values modulo `r` by
|
||||
// just looking at the lowest `N0_LIMBS_USED` limbs. This is what makes
|
||||
// Montgomery multiplication efficient.
|
||||
//
|
||||
// As shown in Algorithm 1 of "Fast Prime Field Elliptic Curve Cryptography
|
||||
// with 256 Bit Primes" by Shay Gueron and Vlad Krasnov, in the loop of a
|
||||
// multi-limb Montgomery multiplication of a * b (mod n), given the
|
||||
// unreduced product t == a * b, we repeatedly calculate:
|
||||
//
|
||||
// t1 := t % r |t1| is |t|'s lowest limb (see previous paragraph).
|
||||
// t2 := t1*n0*n
|
||||
// t3 := t + t2
|
||||
// t := t3 / r copy all limbs of |t3| except the lowest to |t|.
|
||||
//
|
||||
// In the last step, it would only make sense to ignore the lowest limb of
|
||||
// |t3| if it were zero. The middle steps ensure that this is the case:
|
||||
//
|
||||
// t3 == 0 (mod r)
|
||||
// t + t2 == 0 (mod r)
|
||||
// t + t1*n0*n == 0 (mod r)
|
||||
// t1*n0*n == -t (mod r)
|
||||
// t*n0*n == -t (mod r)
|
||||
// n0*n == -1 (mod r)
|
||||
// n0 == -1/n (mod r)
|
||||
//
|
||||
// Thus, in each iteration of the loop, we multiply by the constant factor
|
||||
// n0, the negative inverse of n (mod r).
|
||||
//
|
||||
// TODO(perf): Not all 32-bit platforms actually make use of n0[1]. For the
|
||||
// ones that don't, we could use a shorter `R` value and use faster `Limb`
|
||||
// calculations instead of double-precision `u64` calculations.
|
||||
n0: N0,
|
||||
|
||||
oneRR: One<M, RR>,
|
||||
|
||||
cpu_features: cpu::Features,
|
||||
}
|
||||
|
||||
impl<M: PublicModulus> Clone for Modulus<M> {
|
||||
fn clone(&self) -> Self {
|
||||
Self {
|
||||
limbs: self.limbs.clone(),
|
||||
n0: self.n0.clone(),
|
||||
oneRR: self.oneRR.clone(),
|
||||
cpu_features: self.cpu_features,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<M: PublicModulus> core::fmt::Debug for Modulus<M> {
|
||||
fn fmt(&self, fmt: &mut ::core::fmt::Formatter) -> Result<(), ::core::fmt::Error> {
|
||||
fmt.debug_struct("Modulus")
|
||||
// TODO: Print modulus value.
|
||||
.finish()
|
||||
}
|
||||
}
|
||||
|
||||
impl<M> Modulus<M> {
|
||||
pub(crate) fn from_be_bytes_with_bit_length(
|
||||
input: untrusted::Input,
|
||||
cpu_features: cpu::Features,
|
||||
) -> Result<(Self, bits::BitLength), error::KeyRejected> {
|
||||
let limbs = BoxedLimbs::positive_minimal_width_from_be_bytes(input)?;
|
||||
Self::from_boxed_limbs(limbs, cpu_features)
|
||||
}
|
||||
|
||||
pub(crate) fn from_nonnegative_with_bit_length(
|
||||
n: Nonnegative,
|
||||
cpu_features: cpu::Features,
|
||||
) -> Result<(Self, bits::BitLength), error::KeyRejected> {
|
||||
let limbs = BoxedLimbs {
|
||||
limbs: n.into_limbs(),
|
||||
m: PhantomData,
|
||||
};
|
||||
Self::from_boxed_limbs(limbs, cpu_features)
|
||||
}
|
||||
|
||||
fn from_boxed_limbs(
|
||||
n: BoxedLimbs<M>,
|
||||
cpu_features: cpu::Features,
|
||||
) -> Result<(Self, bits::BitLength), error::KeyRejected> {
|
||||
if n.len() > MODULUS_MAX_LIMBS {
|
||||
return Err(error::KeyRejected::too_large());
|
||||
}
|
||||
if n.len() < MODULUS_MIN_LIMBS {
|
||||
return Err(error::KeyRejected::unexpected_error());
|
||||
}
|
||||
if limb::limbs_are_even_constant_time(&n) != LimbMask::False {
|
||||
return Err(error::KeyRejected::invalid_component());
|
||||
}
|
||||
if limb::limbs_less_than_limb_constant_time(&n, 3) != LimbMask::False {
|
||||
return Err(error::KeyRejected::unexpected_error());
|
||||
}
|
||||
|
||||
// n_mod_r = n % r. As explained in the documentation for `n0`, this is
|
||||
// done by taking the lowest `N0_LIMBS_USED` limbs of `n`.
|
||||
#[allow(clippy::useless_conversion)]
|
||||
let n0 = {
|
||||
prefixed_extern! {
|
||||
fn bn_neg_inv_mod_r_u64(n: u64) -> u64;
|
||||
}
|
||||
|
||||
// XXX: u64::from isn't guaranteed to be constant time.
|
||||
let mut n_mod_r: u64 = u64::from(n[0]);
|
||||
|
||||
if N0_LIMBS_USED == 2 {
|
||||
// XXX: If we use `<< LIMB_BITS` here then 64-bit builds
|
||||
// fail to compile because of `deny(exceeding_bitshifts)`.
|
||||
debug_assert_eq!(LIMB_BITS, 32);
|
||||
n_mod_r |= u64::from(n[1]) << 32;
|
||||
}
|
||||
N0::from(unsafe { bn_neg_inv_mod_r_u64(n_mod_r) })
|
||||
};
|
||||
|
||||
let bits = limb::limbs_minimal_bits(&n.limbs);
|
||||
let oneRR = {
|
||||
let partial = PartialModulus {
|
||||
limbs: &n.limbs,
|
||||
n0: n0.clone(),
|
||||
m: PhantomData,
|
||||
cpu_features,
|
||||
};
|
||||
|
||||
One::newRR(&partial, bits)
|
||||
};
|
||||
|
||||
Ok((
|
||||
Self {
|
||||
limbs: n,
|
||||
n0,
|
||||
oneRR,
|
||||
cpu_features,
|
||||
},
|
||||
bits,
|
||||
))
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn width(&self) -> Width<M> {
|
||||
self.limbs.width()
|
||||
}
|
||||
|
||||
fn zero<E>(&self) -> Elem<M, E> {
|
||||
Elem {
|
||||
limbs: BoxedLimbs::zero(self.width()),
|
||||
encoding: PhantomData,
|
||||
}
|
||||
}
|
||||
|
||||
// TODO: Get rid of this
|
||||
fn one(&self) -> Elem<M, Unencoded> {
|
||||
let mut r = self.zero();
|
||||
r.limbs[0] = 1;
|
||||
r
|
||||
}
|
||||
|
||||
pub fn oneRR(&self) -> &One<M, RR> {
|
||||
&self.oneRR
|
||||
}
|
||||
|
||||
pub fn to_elem<L>(&self, l: &Modulus<L>) -> Elem<L, Unencoded>
|
||||
where
|
||||
M: SmallerModulus<L>,
|
||||
{
|
||||
// TODO: Encode this assertion into the `where` above.
|
||||
assert_eq!(self.width().num_limbs, l.width().num_limbs);
|
||||
let limbs = self.limbs.clone();
|
||||
Elem {
|
||||
limbs: BoxedLimbs {
|
||||
limbs: limbs.limbs,
|
||||
m: PhantomData,
|
||||
},
|
||||
encoding: PhantomData,
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn as_partial(&self) -> PartialModulus<M> {
|
||||
PartialModulus {
|
||||
limbs: &self.limbs,
|
||||
n0: self.n0.clone(),
|
||||
m: PhantomData,
|
||||
cpu_features: self.cpu_features,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<M: PublicModulus> Modulus<M> {
|
||||
pub fn be_bytes(&self) -> LeadingZerosStripped<impl ExactSizeIterator<Item = u8> + Clone + '_> {
|
||||
LeadingZerosStripped::new(limb::unstripped_be_bytes(&self.limbs))
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) struct PartialModulus<'a, M> {
|
||||
limbs: &'a [Limb],
|
||||
n0: N0,
|
||||
m: PhantomData<M>,
|
||||
cpu_features: cpu::Features,
|
||||
}
|
||||
|
||||
impl<M> PartialModulus<'_, M> {
|
||||
// TODO: XXX Avoid duplication with `Modulus`.
|
||||
fn zero(&self) -> Elem<M, R> {
|
||||
let width = Width {
|
||||
num_limbs: self.limbs.len(),
|
||||
m: PhantomData,
|
||||
};
|
||||
Elem {
|
||||
limbs: BoxedLimbs::zero(width),
|
||||
encoding: PhantomData,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Elements of ℤ/mℤ for some modulus *m*.
|
||||
//
|
||||
// Defaulting `E` to `Unencoded` is a convenience for callers from outside this
|
||||
@ -480,7 +251,7 @@ impl<M, E: ReductionEncoding> Elem<M, E> {
|
||||
let mut one = [0; MODULUS_MAX_LIMBS];
|
||||
one[0] = 1;
|
||||
let one = &one[..num_limbs]; // assert!(num_limbs <= MODULUS_MAX_LIMBS);
|
||||
limbs_mont_mul(&mut limbs, one, &m.limbs, &m.n0, m.cpu_features);
|
||||
limbs_mont_mul(&mut limbs, one, m.limbs(), m.n0(), m.cpu_features());
|
||||
Elem {
|
||||
limbs,
|
||||
encoding: PhantomData,
|
||||
@ -547,7 +318,7 @@ fn elem_mul_<M, AF, BF>(
|
||||
where
|
||||
(AF, BF): ProductEncoding,
|
||||
{
|
||||
limbs_mont_mul(&mut b.limbs, &a.limbs, m.limbs, &m.n0, m.cpu_features);
|
||||
limbs_mont_mul(&mut b.limbs, &a.limbs, m.limbs(), m.n0(), m.cpu_features());
|
||||
Elem {
|
||||
limbs: b.limbs,
|
||||
encoding: PhantomData,
|
||||
@ -562,8 +333,8 @@ fn elem_mul_by_2<M, AF>(a: &mut Elem<M, AF>, m: &PartialModulus<M>) {
|
||||
LIMBS_shl_mod(
|
||||
a.limbs.as_mut_ptr(),
|
||||
a.limbs.as_ptr(),
|
||||
m.limbs.as_ptr(),
|
||||
m.limbs.len(),
|
||||
m.limbs().as_ptr(),
|
||||
m.limbs().len(),
|
||||
);
|
||||
}
|
||||
}
|
||||
@ -573,8 +344,8 @@ pub fn elem_reduced_once<Larger, Smaller: SlightlySmallerModulus<Larger>>(
|
||||
m: &Modulus<Smaller>,
|
||||
) -> Elem<Smaller, Unencoded> {
|
||||
let mut r = a.limbs.clone();
|
||||
assert!(r.len() <= m.limbs.len());
|
||||
limb::limbs_reduce_once_constant_time(&mut r, &m.limbs);
|
||||
assert!(r.len() <= m.limbs().len());
|
||||
limb::limbs_reduce_once_constant_time(&mut r, m.limbs());
|
||||
Elem {
|
||||
limbs: BoxedLimbs {
|
||||
limbs: r.limbs,
|
||||
@ -594,7 +365,7 @@ pub fn elem_reduced<Larger, Smaller: NotMuchSmallerModulus<Larger>>(
|
||||
tmp.copy_from_slice(&a.limbs);
|
||||
|
||||
let mut r = m.zero();
|
||||
limbs_from_mont_in_place(&mut r.limbs, tmp, &m.limbs, &m.n0);
|
||||
limbs_from_mont_in_place(&mut r.limbs, tmp, m.limbs(), m.n0());
|
||||
r
|
||||
}
|
||||
|
||||
@ -605,7 +376,7 @@ fn elem_squared<M, E>(
|
||||
where
|
||||
(E, E): ProductEncoding,
|
||||
{
|
||||
limbs_mont_square(&mut a.limbs, m.limbs, &m.n0, m.cpu_features);
|
||||
limbs_mont_square(&mut a.limbs, m.limbs(), m.n0(), m.cpu_features());
|
||||
Elem {
|
||||
limbs: a.limbs,
|
||||
encoding: PhantomData,
|
||||
@ -623,7 +394,7 @@ pub fn elem_widen<Larger, Smaller: SmallerModulus<Larger>>(
|
||||
|
||||
// TODO: Document why this works for all Montgomery factors.
|
||||
pub fn elem_add<M, E>(mut a: Elem<M, E>, b: Elem<M, E>, m: &Modulus<M>) -> Elem<M, E> {
|
||||
limb::limbs_add_assign_mod(&mut a.limbs, &b.limbs, &m.limbs);
|
||||
limb::limbs_add_assign_mod(&mut a.limbs, &b.limbs, m.limbs());
|
||||
a
|
||||
}
|
||||
|
||||
@ -644,8 +415,8 @@ pub fn elem_sub<M, E>(mut a: Elem<M, E>, b: &Elem<M, E>, m: &Modulus<M>) -> Elem
|
||||
a.limbs.as_mut_ptr(),
|
||||
a.limbs.as_ptr(),
|
||||
b.limbs.as_ptr(),
|
||||
m.limbs.as_ptr(),
|
||||
m.limbs.len(),
|
||||
m.limbs().as_ptr(),
|
||||
m.limbs().len(),
|
||||
);
|
||||
}
|
||||
a
|
||||
@ -780,7 +551,7 @@ pub fn elem_exp_consttime<M>(
|
||||
const WINDOW_BITS: usize = 5;
|
||||
const TABLE_ENTRIES: usize = 1 << WINDOW_BITS;
|
||||
|
||||
let num_limbs = m.limbs.len();
|
||||
let num_limbs = m.limbs().len();
|
||||
|
||||
let mut table = vec![0; TABLE_ENTRIES * num_limbs];
|
||||
|
||||
@ -823,7 +594,6 @@ pub fn elem_exp_consttime<M>(
|
||||
fn entry_mut(table: &mut [Limb], i: usize, num_limbs: usize) -> &mut [Limb] {
|
||||
&mut table[(i * num_limbs)..][..num_limbs]
|
||||
}
|
||||
let num_limbs = m.limbs.len();
|
||||
entry_mut(&mut table, 0, num_limbs).copy_from_slice(&tmp.limbs);
|
||||
entry_mut(&mut table, 1, num_limbs).copy_from_slice(&base.limbs);
|
||||
for i in 2..TABLE_ENTRIES {
|
||||
@ -836,7 +606,7 @@ pub fn elem_exp_consttime<M>(
|
||||
let src1 = entry(previous, src1, num_limbs);
|
||||
let src2 = entry(previous, src2, num_limbs);
|
||||
let dst = entry_mut(rest, 0, num_limbs);
|
||||
limbs_mont_product(dst, src1, src2, &m.limbs, &m.n0, m.cpu_features);
|
||||
limbs_mont_product(dst, src1, src2, m.limbs(), m.n0(), m.cpu_features());
|
||||
}
|
||||
|
||||
let (r, _) = limb::fold_5_bit_windows(
|
||||
@ -874,7 +644,7 @@ pub fn elem_exp_consttime<M>(
|
||||
// Pretty much all the math here requires CPU feature detection to have
|
||||
// been done. `cpu_features` isn't threaded through all the internal
|
||||
// functions, so just make it clear that it has been done at this point.
|
||||
let _ = m.cpu_features;
|
||||
let cpu_features = m.cpu_features();
|
||||
|
||||
// The x86_64 assembly was written under the assumption that the input data
|
||||
// is aligned to `MOD_EXP_CTIME_MIN_CACHE_LINE_WIDTH` bytes, which was/is
|
||||
@ -890,7 +660,7 @@ pub fn elem_exp_consttime<M>(
|
||||
const WINDOW_BITS: usize = 5;
|
||||
const TABLE_ENTRIES: usize = 1 << WINDOW_BITS;
|
||||
|
||||
let num_limbs = m.limbs.len();
|
||||
let num_limbs = m.limbs().len();
|
||||
|
||||
const ALIGNMENT: usize = 64;
|
||||
assert_eq!(ALIGNMENT % LIMB_BYTES, 0);
|
||||
@ -914,7 +684,7 @@ pub fn elem_exp_consttime<M>(
|
||||
const M: usize = BASE + 1; // `np` in OpenSSL
|
||||
|
||||
entry_mut(state, BASE, num_limbs).copy_from_slice(&base.limbs);
|
||||
entry_mut(state, M, num_limbs).copy_from_slice(&m.limbs);
|
||||
entry_mut(state, M, num_limbs).copy_from_slice(m.limbs());
|
||||
|
||||
fn scatter(table: &mut [Limb], state: &[Limb], i: Window, num_limbs: usize) {
|
||||
prefixed_extern! {
|
||||
@ -1013,7 +783,7 @@ pub fn elem_exp_consttime<M>(
|
||||
{
|
||||
let acc = entry_mut(state, ACC, num_limbs);
|
||||
acc[0] = 1;
|
||||
limbs_mont_mul(acc, &m.oneRR.0.limbs, &m.limbs, &m.n0, m.cpu_features);
|
||||
limbs_mont_mul(acc, &m.oneRR().0.limbs, m.limbs(), m.n0(), cpu_features);
|
||||
}
|
||||
scatter(table, state, 0, num_limbs);
|
||||
|
||||
@ -1024,9 +794,9 @@ pub fn elem_exp_consttime<M>(
|
||||
for i in 2..(TABLE_ENTRIES as Window) {
|
||||
if i % 2 == 0 {
|
||||
// TODO: Optimize this to avoid gathering
|
||||
gather_square(table, state, &m.n0, i / 2, num_limbs, m.cpu_features);
|
||||
gather_square(table, state, m.n0(), i / 2, num_limbs, cpu_features);
|
||||
} else {
|
||||
gather_mul_base(table, state, &m.n0, i - 1, num_limbs)
|
||||
gather_mul_base(table, state, m.n0(), i - 1, num_limbs)
|
||||
};
|
||||
scatter(table, state, i, num_limbs);
|
||||
}
|
||||
@ -1038,7 +808,7 @@ pub fn elem_exp_consttime<M>(
|
||||
state
|
||||
},
|
||||
|state, window| {
|
||||
power(table, state, &m.n0, window, num_limbs);
|
||||
power(table, state, m.n0(), window, num_limbs);
|
||||
state
|
||||
},
|
||||
);
|
||||
@ -1059,7 +829,7 @@ pub fn elem_exp_consttime<M>(
|
||||
entry(state, ACC, num_limbs).as_ptr(),
|
||||
core::ptr::null(),
|
||||
entry(state, M, num_limbs).as_ptr(),
|
||||
&m.n0,
|
||||
m.n0(),
|
||||
num_limbs,
|
||||
)
|
||||
})?;
|
||||
@ -1106,11 +876,11 @@ impl Nonnegative {
|
||||
}
|
||||
|
||||
pub fn verify_less_than_modulus<M>(&self, m: &Modulus<M>) -> Result<(), error::Unspecified> {
|
||||
if self.limbs().len() > m.limbs.len() {
|
||||
if self.limbs().len() > m.limbs().len() {
|
||||
return Err(error::Unspecified);
|
||||
}
|
||||
if self.limbs().len() == m.limbs.len() {
|
||||
if limb::limbs_less_than_limbs_consttime(self.limbs(), &m.limbs) != LimbMask::True {
|
||||
if self.limbs().len() == m.limbs().len() {
|
||||
if limb::limbs_less_than_limbs_consttime(self.limbs(), m.limbs()) != LimbMask::True {
|
||||
return Err(error::Unspecified);
|
||||
}
|
||||
}
|
||||
@ -1275,7 +1045,7 @@ prefixed_extern! {
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use super::{modulus::MODULUS_MIN_LIMBS, *};
|
||||
use crate::test;
|
||||
use alloc::format;
|
||||
|
||||
|
287
src/arithmetic/bigint/modulus.rs
Normal file
287
src/arithmetic/bigint/modulus.rs
Normal file
@ -0,0 +1,287 @@
|
||||
// Copyright 2015-2023 Brian Smith.
|
||||
//
|
||||
// Permission to use, copy, modify, and/or distribute this software for any
|
||||
// purpose with or without fee is hereby granted, provided that the above
|
||||
// copyright notice and this permission notice appear in all copies.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES
|
||||
// WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
|
||||
// MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY
|
||||
// SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
|
||||
// WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION
|
||||
// OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
|
||||
// CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
|
||||
|
||||
use super::{
|
||||
super::montgomery::{Unencoded, R, RR},
|
||||
BoxedLimbs, Elem, Nonnegative, One, PublicModulus, SmallerModulus, Width, N0, N0_LIMBS_USED,
|
||||
};
|
||||
use crate::{
|
||||
bits, cpu, error,
|
||||
limb::{self, Limb, LimbMask, LIMB_BITS},
|
||||
polyfill::LeadingZerosStripped,
|
||||
};
|
||||
use core::marker::PhantomData;
|
||||
|
||||
/// The x86 implementation of `bn_mul_mont`, at least, requires at least 4
|
||||
/// limbs. For a long time we have required 4 limbs for all targets, though
|
||||
/// this may be unnecessary. TODO: Replace this with
|
||||
/// `n.len() < 256 / LIMB_BITS` so that 32-bit and 64-bit platforms behave the
|
||||
/// same.
|
||||
pub const MODULUS_MIN_LIMBS: usize = 4;
|
||||
|
||||
pub const MODULUS_MAX_LIMBS: usize = 8192 / LIMB_BITS;
|
||||
|
||||
/// The modulus *m* for a ring ℤ/mℤ, along with the precomputed values needed
|
||||
/// for efficient Montgomery multiplication modulo *m*. The value must be odd
|
||||
/// and larger than 2. The larger-than-1 requirement is imposed, at least, by
|
||||
/// the modular inversion code.
|
||||
pub struct Modulus<M> {
|
||||
limbs: BoxedLimbs<M>, // Also `value >= 3`.
|
||||
|
||||
// n0 * N == -1 (mod r).
|
||||
//
|
||||
// r == 2**(N0_LIMBS_USED * LIMB_BITS) and LG_LITTLE_R == lg(r). This
|
||||
// ensures that we can do integer division by |r| by simply ignoring
|
||||
// `N0_LIMBS_USED` limbs. Similarly, we can calculate values modulo `r` by
|
||||
// just looking at the lowest `N0_LIMBS_USED` limbs. This is what makes
|
||||
// Montgomery multiplication efficient.
|
||||
//
|
||||
// As shown in Algorithm 1 of "Fast Prime Field Elliptic Curve Cryptography
|
||||
// with 256 Bit Primes" by Shay Gueron and Vlad Krasnov, in the loop of a
|
||||
// multi-limb Montgomery multiplication of a * b (mod n), given the
|
||||
// unreduced product t == a * b, we repeatedly calculate:
|
||||
//
|
||||
// t1 := t % r |t1| is |t|'s lowest limb (see previous paragraph).
|
||||
// t2 := t1*n0*n
|
||||
// t3 := t + t2
|
||||
// t := t3 / r copy all limbs of |t3| except the lowest to |t|.
|
||||
//
|
||||
// In the last step, it would only make sense to ignore the lowest limb of
|
||||
// |t3| if it were zero. The middle steps ensure that this is the case:
|
||||
//
|
||||
// t3 == 0 (mod r)
|
||||
// t + t2 == 0 (mod r)
|
||||
// t + t1*n0*n == 0 (mod r)
|
||||
// t1*n0*n == -t (mod r)
|
||||
// t*n0*n == -t (mod r)
|
||||
// n0*n == -1 (mod r)
|
||||
// n0 == -1/n (mod r)
|
||||
//
|
||||
// Thus, in each iteration of the loop, we multiply by the constant factor
|
||||
// n0, the negative inverse of n (mod r).
|
||||
//
|
||||
// TODO(perf): Not all 32-bit platforms actually make use of n0[1]. For the
|
||||
// ones that don't, we could use a shorter `R` value and use faster `Limb`
|
||||
// calculations instead of double-precision `u64` calculations.
|
||||
n0: N0,
|
||||
|
||||
oneRR: One<M, RR>,
|
||||
|
||||
cpu_features: cpu::Features,
|
||||
}
|
||||
|
||||
impl<M: PublicModulus> Clone for Modulus<M> {
|
||||
fn clone(&self) -> Self {
|
||||
Self {
|
||||
limbs: self.limbs.clone(),
|
||||
n0: self.n0.clone(),
|
||||
oneRR: self.oneRR.clone(),
|
||||
cpu_features: self.cpu_features,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<M: PublicModulus> core::fmt::Debug for Modulus<M> {
|
||||
fn fmt(&self, fmt: &mut ::core::fmt::Formatter) -> Result<(), ::core::fmt::Error> {
|
||||
fmt.debug_struct("Modulus")
|
||||
// TODO: Print modulus value.
|
||||
.finish()
|
||||
}
|
||||
}
|
||||
|
||||
impl<M> Modulus<M> {
|
||||
pub(crate) fn from_be_bytes_with_bit_length(
|
||||
input: untrusted::Input,
|
||||
cpu_features: cpu::Features,
|
||||
) -> Result<(Self, bits::BitLength), error::KeyRejected> {
|
||||
let limbs = BoxedLimbs::positive_minimal_width_from_be_bytes(input)?;
|
||||
Self::from_boxed_limbs(limbs, cpu_features)
|
||||
}
|
||||
|
||||
pub(crate) fn from_nonnegative_with_bit_length(
|
||||
n: Nonnegative,
|
||||
cpu_features: cpu::Features,
|
||||
) -> Result<(Self, bits::BitLength), error::KeyRejected> {
|
||||
let limbs = BoxedLimbs {
|
||||
limbs: n.into_limbs(),
|
||||
m: PhantomData,
|
||||
};
|
||||
Self::from_boxed_limbs(limbs, cpu_features)
|
||||
}
|
||||
|
||||
pub(super) fn from_boxed_limbs(
|
||||
n: BoxedLimbs<M>,
|
||||
cpu_features: cpu::Features,
|
||||
) -> Result<(Self, bits::BitLength), error::KeyRejected> {
|
||||
if n.len() > MODULUS_MAX_LIMBS {
|
||||
return Err(error::KeyRejected::too_large());
|
||||
}
|
||||
if n.len() < MODULUS_MIN_LIMBS {
|
||||
return Err(error::KeyRejected::unexpected_error());
|
||||
}
|
||||
if limb::limbs_are_even_constant_time(&n) != LimbMask::False {
|
||||
return Err(error::KeyRejected::invalid_component());
|
||||
}
|
||||
if limb::limbs_less_than_limb_constant_time(&n, 3) != LimbMask::False {
|
||||
return Err(error::KeyRejected::unexpected_error());
|
||||
}
|
||||
|
||||
// n_mod_r = n % r. As explained in the documentation for `n0`, this is
|
||||
// done by taking the lowest `N0_LIMBS_USED` limbs of `n`.
|
||||
#[allow(clippy::useless_conversion)]
|
||||
let n0 = {
|
||||
prefixed_extern! {
|
||||
fn bn_neg_inv_mod_r_u64(n: u64) -> u64;
|
||||
}
|
||||
|
||||
// XXX: u64::from isn't guaranteed to be constant time.
|
||||
let mut n_mod_r: u64 = u64::from(n[0]);
|
||||
|
||||
if N0_LIMBS_USED == 2 {
|
||||
// XXX: If we use `<< LIMB_BITS` here then 64-bit builds
|
||||
// fail to compile because of `deny(exceeding_bitshifts)`.
|
||||
debug_assert_eq!(LIMB_BITS, 32);
|
||||
n_mod_r |= u64::from(n[1]) << 32;
|
||||
}
|
||||
N0::from(unsafe { bn_neg_inv_mod_r_u64(n_mod_r) })
|
||||
};
|
||||
|
||||
let bits = limb::limbs_minimal_bits(&n.limbs);
|
||||
let oneRR = {
|
||||
let partial = PartialModulus {
|
||||
limbs: &n.limbs,
|
||||
n0: n0.clone(),
|
||||
m: PhantomData,
|
||||
cpu_features,
|
||||
};
|
||||
|
||||
One::newRR(&partial, bits)
|
||||
};
|
||||
|
||||
Ok((
|
||||
Self {
|
||||
limbs: n,
|
||||
n0,
|
||||
oneRR,
|
||||
cpu_features,
|
||||
},
|
||||
bits,
|
||||
))
|
||||
}
|
||||
|
||||
#[inline]
|
||||
pub(super) fn cpu_features(&self) -> cpu::Features {
|
||||
self.cpu_features
|
||||
}
|
||||
|
||||
#[inline]
|
||||
pub(super) fn limbs(&self) -> &[Limb] {
|
||||
&self.limbs
|
||||
}
|
||||
|
||||
#[inline]
|
||||
pub(super) fn n0(&self) -> &N0 {
|
||||
&self.n0
|
||||
}
|
||||
|
||||
#[inline]
|
||||
pub(super) fn width(&self) -> Width<M> {
|
||||
self.limbs.width()
|
||||
}
|
||||
|
||||
pub(super) fn zero<E>(&self) -> Elem<M, E> {
|
||||
Elem {
|
||||
limbs: BoxedLimbs::zero(self.width()),
|
||||
encoding: PhantomData,
|
||||
}
|
||||
}
|
||||
|
||||
// TODO: Get rid of this
|
||||
pub(super) fn one(&self) -> Elem<M, Unencoded> {
|
||||
let mut r = self.zero();
|
||||
r.limbs[0] = 1;
|
||||
r
|
||||
}
|
||||
|
||||
pub fn oneRR(&self) -> &One<M, RR> {
|
||||
&self.oneRR
|
||||
}
|
||||
|
||||
pub fn to_elem<L>(&self, l: &Modulus<L>) -> Elem<L, Unencoded>
|
||||
where
|
||||
M: SmallerModulus<L>,
|
||||
{
|
||||
// TODO: Encode this assertion into the `where` above.
|
||||
assert_eq!(self.width().num_limbs, l.width().num_limbs);
|
||||
let limbs = self.limbs.clone();
|
||||
Elem {
|
||||
limbs: BoxedLimbs {
|
||||
limbs: limbs.limbs,
|
||||
m: PhantomData,
|
||||
},
|
||||
encoding: PhantomData,
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn as_partial(&self) -> PartialModulus<M> {
|
||||
PartialModulus {
|
||||
limbs: &self.limbs,
|
||||
n0: self.n0.clone(),
|
||||
m: PhantomData,
|
||||
cpu_features: self.cpu_features,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<M: PublicModulus> Modulus<M> {
|
||||
pub fn be_bytes(&self) -> LeadingZerosStripped<impl ExactSizeIterator<Item = u8> + Clone + '_> {
|
||||
LeadingZerosStripped::new(limb::unstripped_be_bytes(&self.limbs))
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) struct PartialModulus<'a, M> {
|
||||
limbs: &'a [Limb],
|
||||
n0: N0,
|
||||
m: PhantomData<M>,
|
||||
cpu_features: cpu::Features,
|
||||
}
|
||||
|
||||
impl<M> PartialModulus<'_, M> {
|
||||
// TODO: XXX Avoid duplication with `Modulus`.
|
||||
pub(super) fn zero(&self) -> Elem<M, R> {
|
||||
let width = Width {
|
||||
num_limbs: self.limbs.len(),
|
||||
m: PhantomData,
|
||||
};
|
||||
Elem {
|
||||
limbs: BoxedLimbs::zero(width),
|
||||
encoding: PhantomData,
|
||||
}
|
||||
}
|
||||
|
||||
#[inline]
|
||||
pub(super) fn limbs(&self) -> &[Limb] {
|
||||
self.limbs
|
||||
}
|
||||
|
||||
#[inline]
|
||||
pub(super) fn n0(&self) -> &N0 {
|
||||
&self.n0
|
||||
}
|
||||
|
||||
#[inline]
|
||||
pub fn cpu_features(&self) -> cpu::Features {
|
||||
self.cpu_features
|
||||
}
|
||||
}
|
Loading…
x
Reference in New Issue
Block a user