Remove ArrayEncoding usage from Digest and delete ArrayEncoding.

Note: I originally tried an alternative implementation using `flat_map` that
ended up being materially slower. To fix that performance regression I had to
make the following change:

```
     let mut output = Output([0; MAX_OUTPUT_LEN]);
     output
         .0
-        .iter_mut()
-        .zip(input.iter().copied().flat_map(|Wrapping(w)| f(w)))
+        .chunks_mut(N)
+        .zip(input.iter().copied().map(|Wrapping(w)| f(w)))
         .for_each(|(o, i)| {
-            *o = i;
+            o.copy_from_slice(&i);
         });
     output
 }
```

I verified that this generates the same assembly code as the original code
on x86-64 using Rust 1.74.0, except that there are two additional 128-bit
moves in `sha256_formta_output` to zero out the latter half of `Output`,
which was intended.
This commit is contained in:
Brian Smith 2023-10-11 14:26:40 -07:00
parent ff0ed4ca9d
commit 57fc4860d0
2 changed files with 24 additions and 48 deletions

View File

@ -24,11 +24,7 @@
// The goal for this implementation is to drive the overhead as close to zero
// as possible.
use crate::{
c, cpu, debug,
endian::{ArrayEncoding, BigEndian},
polyfill,
};
use crate::{c, cpu, debug, polyfill};
use core::num::Wrapping;
mod sha1;
@ -248,8 +244,7 @@ impl Digest {
impl AsRef<[u8]> for Digest {
#[inline(always)]
fn as_ref(&self) -> &[u8] {
let as64 = unsafe { &self.value.as64 };
&as64.as_byte_array()[..self.algorithm.output_len]
&self.value.0[..self.algorithm.output_len]
}
}
@ -456,10 +451,7 @@ union State {
}
#[derive(Clone, Copy)]
union Output {
as64: [BigEndian<u64>; 512 / 8 / core::mem::size_of::<BigEndian<u64>>()],
as32: [BigEndian<u32>; 256 / 8 / core::mem::size_of::<BigEndian<u32>>()],
}
struct Output([u8; MAX_OUTPUT_LEN]);
/// The maximum block length ([`Algorithm::block_len()`]) of all the algorithms
/// in this module.
@ -474,17 +466,30 @@ pub const MAX_OUTPUT_LEN: usize = 512 / 8;
pub const MAX_CHAINING_LEN: usize = MAX_OUTPUT_LEN;
fn sha256_format_output(input: State) -> Output {
let input = unsafe { &input.as32 };
Output {
as32: input.map(BigEndian::from),
}
let input = unsafe { input.as32 };
format_output::<_, _, { core::mem::size_of::<u32>() }>(input, u32::to_be_bytes)
}
fn sha512_format_output(input: State) -> Output {
let input = unsafe { &input.as64 };
Output {
as64: input.map(BigEndian::from),
}
let input = unsafe { input.as64 };
format_output::<_, _, { core::mem::size_of::<u64>() }>(input, u64::to_be_bytes)
}
#[inline]
fn format_output<T, F, const N: usize>(input: [Wrapping<T>; sha2::CHAINING_WORDS], f: F) -> Output
where
F: Fn(T) -> [u8; N],
T: Copy,
{
let mut output = Output([0; MAX_OUTPUT_LEN]);
output
.0
.chunks_mut(N)
.zip(input.iter().copied().map(|Wrapping(w)| f(w)))
.for_each(|(o, i)| {
o.copy_from_slice(&i);
});
output
}
/// The length of the output of SHA-1, in bytes.

View File

@ -11,12 +11,6 @@ where
const ZERO: Self;
}
/// Work around the inability to implement `AsRef` for arrays of `Encoding`s
/// due to the coherence rules.
pub trait ArrayEncoding<T> {
fn as_byte_array(&self) -> &T;
}
macro_rules! define_endian {
($endian:ident) => {
#[derive(Clone, Copy)]
@ -25,22 +19,6 @@ macro_rules! define_endian {
};
}
macro_rules! impl_array_encoding {
($endian:ident, $base:ident, $elems:expr) => {
impl ArrayEncoding<[u8; $elems * core::mem::size_of::<$base>()]>
for [$endian<$base>; $elems]
{
#[inline]
fn as_byte_array(&self) -> &[u8; $elems * core::mem::size_of::<$base>()] {
let as_bytes_ptr = self
.as_ptr()
.cast::<[u8; $elems * core::mem::size_of::<$base>()]>();
unsafe { &*as_bytes_ptr }
}
}
};
}
macro_rules! impl_endian {
($endian:ident, $base:ident, $to_endian:ident, $from_endian:ident, $size:expr) => {
impl Encoding<$base> for $endian<$base> {
@ -81,18 +59,11 @@ macro_rules! impl_endian {
$base::$from_endian(value)
}
}
impl_array_encoding!($endian, $base, 1);
impl_array_encoding!($endian, $base, 2);
impl_array_encoding!($endian, $base, 3);
impl_array_encoding!($endian, $base, 4);
impl_array_encoding!($endian, $base, 8);
};
}
define_endian!(BigEndian);
impl_endian!(BigEndian, u32, to_be, from_be, 4);
impl_endian!(BigEndian, u64, to_be, from_be, 8);
#[cfg(test)]
mod tests {