360 lines
10 KiB
Rust
360 lines
10 KiB
Rust
// Copyright 2018 Brian Smith.
|
|
//
|
|
// Permission to use, copy, modify, and/or distribute this software for any
|
|
// purpose with or without fee is hereby granted, provided that the above
|
|
// copyright notice and this permission notice appear in all copies.
|
|
//
|
|
// THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES
|
|
// WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
|
|
// MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY
|
|
// SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
|
|
// WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION
|
|
// OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
|
|
// CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
|
|
|
|
use super::{
|
|
block::{Block, BLOCK_LEN},
|
|
Aad,
|
|
};
|
|
use crate::{cpu, polyfill::ChunksFixed};
|
|
use core::ops::BitXorAssign;
|
|
|
|
#[cfg(not(target_arch = "aarch64"))]
|
|
mod gcm_nohw;
|
|
|
|
#[derive(Clone)]
|
|
pub struct Key {
|
|
h_table: HTable,
|
|
cpu_features: cpu::Features,
|
|
}
|
|
|
|
impl Key {
|
|
pub(super) fn new(h_be: Block, cpu_features: cpu::Features) -> Self {
|
|
let h_be: &[[u8; 8]; 2] = h_be.as_ref().chunks_fixed();
|
|
let h: [u64; 2] = h_be.map(u64::from_be_bytes);
|
|
|
|
let mut key = Self {
|
|
h_table: HTable {
|
|
Htable: [u128 { hi: 0, lo: 0 }; HTABLE_LEN],
|
|
},
|
|
cpu_features,
|
|
};
|
|
let h_table = &mut key.h_table;
|
|
|
|
match detect_implementation(cpu_features) {
|
|
#[cfg(target_arch = "x86_64")]
|
|
Implementation::CLMUL if has_avx_movbe(cpu_features) => {
|
|
prefixed_extern! {
|
|
fn gcm_init_avx(HTable: &mut HTable, h: &[u64; 2]);
|
|
}
|
|
unsafe {
|
|
gcm_init_avx(h_table, &h);
|
|
}
|
|
}
|
|
|
|
#[cfg(any(
|
|
target_arch = "aarch64",
|
|
target_arch = "arm",
|
|
target_arch = "x86_64",
|
|
target_arch = "x86"
|
|
))]
|
|
Implementation::CLMUL => {
|
|
prefixed_extern! {
|
|
fn gcm_init_clmul(Htable: &mut HTable, h: &[u64; 2]);
|
|
}
|
|
unsafe {
|
|
gcm_init_clmul(h_table, &h);
|
|
}
|
|
}
|
|
|
|
#[cfg(any(target_arch = "aarch64", target_arch = "arm"))]
|
|
Implementation::NEON => {
|
|
prefixed_extern! {
|
|
fn gcm_init_neon(Htable: &mut HTable, h: &[u64; 2]);
|
|
}
|
|
unsafe {
|
|
gcm_init_neon(h_table, &h);
|
|
}
|
|
}
|
|
|
|
#[cfg(not(target_arch = "aarch64"))]
|
|
Implementation::Fallback => {
|
|
h_table.Htable[0] = gcm_nohw::init(h);
|
|
}
|
|
}
|
|
|
|
key
|
|
}
|
|
}
|
|
|
|
pub struct Context {
|
|
inner: ContextInner,
|
|
cpu_features: cpu::Features,
|
|
}
|
|
|
|
impl Context {
|
|
pub(crate) fn new(key: &Key, aad: Aad<&[u8]>) -> Self {
|
|
let mut ctx = Self {
|
|
inner: ContextInner {
|
|
Xi: Xi(Block::zero()),
|
|
Htable: key.h_table.clone(),
|
|
},
|
|
cpu_features: key.cpu_features,
|
|
};
|
|
|
|
for ad in aad.0.chunks(BLOCK_LEN) {
|
|
let mut block = Block::zero();
|
|
block.overwrite_part_at(0, ad);
|
|
ctx.update_block(block);
|
|
}
|
|
|
|
ctx
|
|
}
|
|
|
|
/// Access to `inner` for the integrated AES-GCM implementations only.
|
|
#[cfg(target_arch = "x86_64")]
|
|
#[inline]
|
|
pub(super) fn inner(&mut self) -> (&HTable, &mut Xi) {
|
|
(&self.inner.Htable, &mut self.inner.Xi)
|
|
}
|
|
|
|
pub fn update_blocks(&mut self, input: &[u8]) {
|
|
// Th assembly functions take the input length in bytes, not blocks.
|
|
let input_bytes = input.len();
|
|
|
|
debug_assert_eq!(input_bytes % BLOCK_LEN, 0);
|
|
debug_assert!(input_bytes > 0);
|
|
|
|
let input = input.as_ptr() as *const [u8; BLOCK_LEN];
|
|
// SAFETY:
|
|
// - `[[u8; BLOCK_LEN]]` has the same bit validity as `[u8]`.
|
|
// - `[[u8; BLOCK_LEN]]` has the same alignment requirement as `[u8]`.
|
|
// - `input_bytes / BLOCK_LEN` ensures that the total length in bytes of
|
|
// the new `[[u8; BLOCK_LEN]]` will not be longer than the original
|
|
// `[u8]`.
|
|
let input = unsafe { core::slice::from_raw_parts(input, input_bytes / BLOCK_LEN) };
|
|
|
|
let xi = &mut self.inner.Xi;
|
|
let h_table = &self.inner.Htable;
|
|
|
|
match detect_implementation(self.cpu_features) {
|
|
#[cfg(target_arch = "x86_64")]
|
|
Implementation::CLMUL if has_avx_movbe(self.cpu_features) => {
|
|
prefixed_extern! {
|
|
fn gcm_ghash_avx(
|
|
xi: &mut Xi,
|
|
Htable: &HTable,
|
|
inp: *const [u8; BLOCK_LEN],
|
|
len: crate::c::size_t,
|
|
);
|
|
}
|
|
unsafe {
|
|
gcm_ghash_avx(xi, h_table, input.as_ptr(), input_bytes);
|
|
}
|
|
}
|
|
|
|
#[cfg(any(
|
|
target_arch = "aarch64",
|
|
target_arch = "arm",
|
|
target_arch = "x86_64",
|
|
target_arch = "x86"
|
|
))]
|
|
Implementation::CLMUL => {
|
|
prefixed_extern! {
|
|
fn gcm_ghash_clmul(
|
|
xi: &mut Xi,
|
|
Htable: &HTable,
|
|
inp: *const [u8; BLOCK_LEN],
|
|
len: crate::c::size_t,
|
|
);
|
|
}
|
|
unsafe {
|
|
gcm_ghash_clmul(xi, h_table, input.as_ptr(), input_bytes);
|
|
}
|
|
}
|
|
|
|
#[cfg(any(target_arch = "aarch64", target_arch = "arm"))]
|
|
Implementation::NEON => {
|
|
prefixed_extern! {
|
|
fn gcm_ghash_neon(
|
|
xi: &mut Xi,
|
|
Htable: &HTable,
|
|
inp: *const [u8; BLOCK_LEN],
|
|
len: crate::c::size_t,
|
|
);
|
|
}
|
|
unsafe {
|
|
gcm_ghash_neon(xi, h_table, input.as_ptr(), input_bytes);
|
|
}
|
|
}
|
|
|
|
#[cfg(not(target_arch = "aarch64"))]
|
|
Implementation::Fallback => {
|
|
gcm_nohw::ghash(xi, h_table.Htable[0], input);
|
|
}
|
|
}
|
|
}
|
|
|
|
pub fn update_block(&mut self, a: Block) {
|
|
self.inner.Xi.bitxor_assign(a);
|
|
|
|
// Although these functions take `Xi` and `h_table` as separate
|
|
// parameters, one or more of them might assume that they are part of
|
|
// the same `ContextInner` structure.
|
|
let xi = &mut self.inner.Xi;
|
|
let h_table = &self.inner.Htable;
|
|
|
|
match detect_implementation(self.cpu_features) {
|
|
#[cfg(any(
|
|
target_arch = "aarch64",
|
|
target_arch = "arm",
|
|
target_arch = "x86_64",
|
|
target_arch = "x86"
|
|
))]
|
|
Implementation::CLMUL => {
|
|
prefixed_extern! {
|
|
fn gcm_gmult_clmul(xi: &mut Xi, Htable: &HTable);
|
|
}
|
|
unsafe {
|
|
gcm_gmult_clmul(xi, h_table);
|
|
}
|
|
}
|
|
|
|
#[cfg(any(target_arch = "aarch64", target_arch = "arm"))]
|
|
Implementation::NEON => {
|
|
prefixed_extern! {
|
|
fn gcm_gmult_neon(xi: &mut Xi, Htable: &HTable);
|
|
}
|
|
unsafe {
|
|
gcm_gmult_neon(xi, h_table);
|
|
}
|
|
}
|
|
|
|
#[cfg(not(target_arch = "aarch64"))]
|
|
Implementation::Fallback => {
|
|
gcm_nohw::gmult(xi, h_table.Htable[0]);
|
|
}
|
|
}
|
|
}
|
|
|
|
pub(super) fn pre_finish<F>(self, f: F) -> super::Tag
|
|
where
|
|
F: FnOnce(Block) -> super::Tag,
|
|
{
|
|
f(self.inner.Xi.0)
|
|
}
|
|
|
|
#[cfg(target_arch = "x86_64")]
|
|
pub(super) fn is_avx2(&self) -> bool {
|
|
match detect_implementation(self.cpu_features) {
|
|
Implementation::CLMUL => has_avx_movbe(self.cpu_features),
|
|
_ => false,
|
|
}
|
|
}
|
|
}
|
|
|
|
// The alignment is required by non-Rust code that uses `GCM128_CONTEXT`.
|
|
#[derive(Clone)]
|
|
#[repr(C, align(16))]
|
|
pub(super) struct HTable {
|
|
Htable: [u128; HTABLE_LEN],
|
|
}
|
|
|
|
#[derive(Clone, Copy)]
|
|
#[repr(C)]
|
|
struct u128 {
|
|
hi: u64,
|
|
lo: u64,
|
|
}
|
|
|
|
const HTABLE_LEN: usize = 16;
|
|
|
|
#[repr(transparent)]
|
|
pub struct Xi(Block);
|
|
|
|
impl BitXorAssign<Block> for Xi {
|
|
#[inline]
|
|
fn bitxor_assign(&mut self, a: Block) {
|
|
self.0 ^= a;
|
|
}
|
|
}
|
|
|
|
impl From<Xi> for Block {
|
|
#[inline]
|
|
fn from(Xi(block): Xi) -> Self {
|
|
block
|
|
}
|
|
}
|
|
|
|
// This corresponds roughly to the `GCM128_CONTEXT` structure in BoringSSL.
|
|
// Some assembly language code, in particular the MOVEBE+AVX2 X86-64
|
|
// implementation, requires this exact layout.
|
|
#[repr(C, align(16))]
|
|
struct ContextInner {
|
|
Xi: Xi,
|
|
Htable: HTable,
|
|
}
|
|
|
|
enum Implementation {
|
|
#[cfg(any(
|
|
target_arch = "aarch64",
|
|
target_arch = "arm",
|
|
target_arch = "x86_64",
|
|
target_arch = "x86"
|
|
))]
|
|
CLMUL,
|
|
|
|
#[cfg(any(target_arch = "aarch64", target_arch = "arm"))]
|
|
NEON,
|
|
|
|
#[cfg(not(target_arch = "aarch64"))]
|
|
Fallback,
|
|
}
|
|
|
|
#[inline]
|
|
fn detect_implementation(cpu_features: cpu::Features) -> Implementation {
|
|
// `cpu_features` is only used for specific platforms.
|
|
#[cfg(not(any(
|
|
target_arch = "aarch64",
|
|
target_arch = "arm",
|
|
target_arch = "x86_64",
|
|
target_arch = "x86"
|
|
)))]
|
|
let _cpu_features = cpu_features;
|
|
|
|
#[cfg(any(
|
|
target_arch = "aarch64",
|
|
target_arch = "arm",
|
|
target_arch = "x86_64",
|
|
target_arch = "x86"
|
|
))]
|
|
{
|
|
if (cpu::intel::FXSR.available(cpu_features)
|
|
&& cpu::intel::PCLMULQDQ.available(cpu_features))
|
|
|| cpu::arm::PMULL.available(cpu_features)
|
|
{
|
|
return Implementation::CLMUL;
|
|
}
|
|
}
|
|
|
|
#[cfg(target_arch = "arm")]
|
|
{
|
|
if cpu::arm::NEON.available(cpu_features) {
|
|
return Implementation::NEON;
|
|
}
|
|
}
|
|
|
|
#[cfg(target_arch = "aarch64")]
|
|
{
|
|
return Implementation::NEON;
|
|
}
|
|
|
|
#[cfg(not(target_arch = "aarch64"))]
|
|
Implementation::Fallback
|
|
}
|
|
|
|
#[cfg(target_arch = "x86_64")]
|
|
fn has_avx_movbe(cpu_features: cpu::Features) -> bool {
|
|
cpu::intel::AVX.available(cpu_features) && cpu::intel::MOVBE.available(cpu_features)
|
|
}
|