Rework benchmarks to make it easier to get assembly. (#297)
* Rename benches/mod.rs to benches/buffer.rs This naming makes more sense, especially if we add more benchmark files. Signed-off-by: Joe Richey <joerichey@google.com> * Rework benchmarks to make it easier to get assembly. This change: - Move the benchmarks from mod.rs to buffer.rs - Move the inner loop we benchmark into an `#[inline(never)]` function - Includes instructions for getting the ASM for a specific benchmark This should hopefully reduce the variance of these benchmarks and make it easier to figure out if we are emitting the assembly or IR we expect for a particular implementation. Signed-off-by: Joe Richey <joerichey@google.com> Signed-off-by: Joe Richey <joerichey@google.com>
This commit is contained in:
parent
55ad4c41ba
commit
bd0654fe70
71
benches/buffer.rs
Normal file
71
benches/buffer.rs
Normal file
@ -0,0 +1,71 @@
|
||||
#![feature(test, maybe_uninit_uninit_array_transpose)]
|
||||
extern crate test;
|
||||
|
||||
use std::mem::MaybeUninit;
|
||||
|
||||
// Call getrandom on a zero-initialized stack buffer
|
||||
#[inline(always)]
|
||||
fn bench_getrandom<const N: usize>() {
|
||||
let mut buf = [0u8; N];
|
||||
getrandom::getrandom(&mut buf).unwrap();
|
||||
test::black_box(&buf as &[u8]);
|
||||
}
|
||||
|
||||
// Call getrandom_uninit on an uninitialized stack buffer
|
||||
#[inline(always)]
|
||||
fn bench_getrandom_uninit<const N: usize>() {
|
||||
let mut uninit = [MaybeUninit::uninit(); N];
|
||||
let buf: &[u8] = getrandom::getrandom_uninit(&mut uninit).unwrap();
|
||||
test::black_box(buf);
|
||||
}
|
||||
|
||||
// We benchmark using #[inline(never)] "inner" functions for two reasons:
|
||||
// - Avoiding inlining reduces a source of variance when running benchmarks.
|
||||
// - It is _much_ easier to get the assembly or IR for the inner loop.
|
||||
//
|
||||
// For example, using cargo-show-asm (https://github.com/pacak/cargo-show-asm),
|
||||
// we can get the assembly for a particular benchmark's inner loop by running:
|
||||
// cargo asm --bench buffer --release buffer::p384::bench_getrandom::inner
|
||||
macro_rules! bench {
|
||||
( $name:ident, $size:expr ) => {
|
||||
pub mod $name {
|
||||
#[bench]
|
||||
pub fn bench_getrandom(b: &mut test::Bencher) {
|
||||
#[inline(never)]
|
||||
fn inner() {
|
||||
super::bench_getrandom::<{ $size }>()
|
||||
}
|
||||
|
||||
b.bytes = $size as u64;
|
||||
b.iter(inner);
|
||||
}
|
||||
#[bench]
|
||||
pub fn bench_getrandom_uninit(b: &mut test::Bencher) {
|
||||
#[inline(never)]
|
||||
fn inner() {
|
||||
super::bench_getrandom_uninit::<{ $size }>()
|
||||
}
|
||||
|
||||
b.bytes = $size as u64;
|
||||
b.iter(inner);
|
||||
}
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
// 16 bytes (128 bits) is the size of an 128-bit AES key/nonce.
|
||||
bench!(aes128, 128 / 8);
|
||||
|
||||
// 32 bytes (256 bits) is the seed sized used for rand::thread_rng
|
||||
// and the `random` value in a ClientHello/ServerHello for TLS.
|
||||
// This is also the size of a 256-bit AES/HMAC/P-256/Curve25519 key
|
||||
// and/or nonce.
|
||||
bench!(p256, 256 / 8);
|
||||
|
||||
// A P-384/HMAC-384 key and/or nonce.
|
||||
bench!(p384, 384 / 8);
|
||||
|
||||
// Initializing larger buffers is not the primary use case of this library, as
|
||||
// this should normally be done by a userspace CSPRNG. However, we have a test
|
||||
// here to see the effects of a lower (amortized) syscall overhead.
|
||||
bench!(page, 4096);
|
@ -1,64 +0,0 @@
|
||||
#![feature(test)]
|
||||
#![feature(maybe_uninit_as_bytes)]
|
||||
|
||||
extern crate test;
|
||||
|
||||
use std::mem::MaybeUninit;
|
||||
|
||||
// Used to benchmark the throughput of getrandom in an optimal scenario.
|
||||
// The buffer is hot, and does not require initialization.
|
||||
#[inline(always)]
|
||||
fn bench_getrandom<const N: usize>(b: &mut test::Bencher) {
|
||||
b.bytes = N as u64;
|
||||
b.iter(|| {
|
||||
let mut buf = [0u8; N];
|
||||
getrandom::getrandom(&mut buf[..]).unwrap();
|
||||
test::black_box(buf);
|
||||
});
|
||||
}
|
||||
|
||||
// Used to benchmark the throughput of getrandom is a slightly less optimal
|
||||
// scenario. The buffer is still hot, but requires initialization.
|
||||
#[inline(always)]
|
||||
fn bench_getrandom_uninit<const N: usize>(b: &mut test::Bencher) {
|
||||
b.bytes = N as u64;
|
||||
b.iter(|| {
|
||||
let mut buf: MaybeUninit<[u8; N]> = MaybeUninit::uninit();
|
||||
let _ = getrandom::getrandom_uninit(buf.as_bytes_mut()).unwrap();
|
||||
let buf: [u8; N] = unsafe { buf.assume_init() };
|
||||
test::black_box(buf)
|
||||
});
|
||||
}
|
||||
|
||||
macro_rules! bench {
|
||||
( $name:ident, $size:expr ) => {
|
||||
pub mod $name {
|
||||
#[bench]
|
||||
pub fn bench_getrandom(b: &mut test::Bencher) {
|
||||
super::bench_getrandom::<{ $size }>(b);
|
||||
}
|
||||
|
||||
#[bench]
|
||||
pub fn bench_getrandom_uninit(b: &mut test::Bencher) {
|
||||
super::bench_getrandom_uninit::<{ $size }>(b);
|
||||
}
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
// 16 bytes (128 bits) is the size of an 128-bit AES key/nonce.
|
||||
bench!(aes128, 128 / 8);
|
||||
|
||||
// 32 bytes (256 bits) is the seed sized used for rand::thread_rng
|
||||
// and the `random` value in a ClientHello/ServerHello for TLS.
|
||||
// This is also the size of a 256-bit AES/HMAC/P-256/Curve25519 key
|
||||
// and/or nonce.
|
||||
bench!(p256, 256 / 8);
|
||||
|
||||
// A P-384/HMAC-384 key and/or nonce.
|
||||
bench!(p384, 384 / 8);
|
||||
|
||||
// Initializing larger buffers is not the primary use case of this library, as
|
||||
// this should normally be done by a userspace CSPRNG. However, we have a test
|
||||
// here to see the effects of a lower (amortized) syscall overhead.
|
||||
bench!(page, 4096);
|
Loading…
x
Reference in New Issue
Block a user