Improve SmallRng initialization performance (#1482)
This commit is contained in:
parent
d2eb51bc29
commit
d1f961c4be
@ -24,6 +24,7 @@ You may also find the [Upgrade Guide](https://rust-random.github.io/book/update.
|
||||
- Add `UniformUsize` and use to make `Uniform` for `usize` portable (#1487)
|
||||
- Remove support for generating `isize` and `usize` values with `Standard`, `Uniform` and `Fill` and usage as a `WeightedAliasIndex` weight (#1487)
|
||||
- Require `Clone` and `AsRef` bound for `SeedableRng::Seed`. (#1491)
|
||||
- Improve SmallRng initialization performance (#1482)
|
||||
- Implement `Distribution<u64>` for `Poisson<f64>` (#1498)
|
||||
- Limit the maximal acceptable lambda for `Poisson` to solve (#1312) (#1498)
|
||||
- Rename `Rng::gen_iter` to `random_iter` (#1500)
|
||||
|
@ -19,7 +19,7 @@ use rand_pcg::{Pcg32, Pcg64, Pcg64Dxsm, Pcg64Mcg};
|
||||
criterion_group!(
|
||||
name = benches;
|
||||
config = Criterion::default();
|
||||
targets = gen_bytes, gen_u32, gen_u64, init_gen, reseeding_bytes
|
||||
targets = gen_bytes, gen_u32, gen_u64, init_gen, init_from_u64, init_from_seed, reseeding_bytes
|
||||
);
|
||||
criterion_main!(benches);
|
||||
|
||||
@ -133,6 +133,62 @@ pub fn init_gen(c: &mut Criterion) {
|
||||
bench::<ChaCha12Rng>(&mut g, "chacha12");
|
||||
bench::<ChaCha20Rng>(&mut g, "chacha20");
|
||||
bench::<StdRng>(&mut g, "std");
|
||||
bench::<SmallRng>(&mut g, "small");
|
||||
|
||||
g.finish()
|
||||
}
|
||||
|
||||
pub fn init_from_u64(c: &mut Criterion) {
|
||||
let mut g = c.benchmark_group("init_from_u64");
|
||||
g.warm_up_time(Duration::from_millis(500));
|
||||
g.measurement_time(Duration::from_millis(1000));
|
||||
|
||||
fn bench<R: SeedableRng>(g: &mut BenchmarkGroup<WallTime>, name: &str) {
|
||||
g.bench_function(name, |b| {
|
||||
let mut rng = Pcg32::from_os_rng();
|
||||
let seed = rng.random();
|
||||
b.iter(|| R::seed_from_u64(black_box(seed)));
|
||||
});
|
||||
}
|
||||
|
||||
bench::<Pcg32>(&mut g, "pcg32");
|
||||
bench::<Pcg64>(&mut g, "pcg64");
|
||||
bench::<Pcg64Mcg>(&mut g, "pcg64mcg");
|
||||
bench::<Pcg64Dxsm>(&mut g, "pcg64dxsm");
|
||||
bench::<ChaCha8Rng>(&mut g, "chacha8");
|
||||
bench::<ChaCha12Rng>(&mut g, "chacha12");
|
||||
bench::<ChaCha20Rng>(&mut g, "chacha20");
|
||||
bench::<StdRng>(&mut g, "std");
|
||||
bench::<SmallRng>(&mut g, "small");
|
||||
|
||||
g.finish()
|
||||
}
|
||||
|
||||
pub fn init_from_seed(c: &mut Criterion) {
|
||||
let mut g = c.benchmark_group("init_from_seed");
|
||||
g.warm_up_time(Duration::from_millis(500));
|
||||
g.measurement_time(Duration::from_millis(1000));
|
||||
|
||||
fn bench<R: SeedableRng>(g: &mut BenchmarkGroup<WallTime>, name: &str)
|
||||
where
|
||||
rand::distr::Standard: Distribution<<R as SeedableRng>::Seed>,
|
||||
{
|
||||
g.bench_function(name, |b| {
|
||||
let mut rng = Pcg32::from_os_rng();
|
||||
let seed = rng.random();
|
||||
b.iter(|| R::from_seed(black_box(seed.clone())));
|
||||
});
|
||||
}
|
||||
|
||||
bench::<Pcg32>(&mut g, "pcg32");
|
||||
bench::<Pcg64>(&mut g, "pcg64");
|
||||
bench::<Pcg64Mcg>(&mut g, "pcg64mcg");
|
||||
bench::<Pcg64Dxsm>(&mut g, "pcg64dxsm");
|
||||
bench::<ChaCha8Rng>(&mut g, "chacha8");
|
||||
bench::<ChaCha12Rng>(&mut g, "chacha12");
|
||||
bench::<ChaCha20Rng>(&mut g, "chacha20");
|
||||
bench::<StdRng>(&mut g, "std");
|
||||
bench::<SmallRng>(&mut g, "small");
|
||||
|
||||
g.finish()
|
||||
}
|
||||
|
@ -83,7 +83,8 @@ impl SeedableRng for SmallRng {
|
||||
|
||||
#[inline(always)]
|
||||
fn from_seed(seed: Self::Seed) -> Self {
|
||||
// With MSRV >= 1.77: let seed = *seed.first_chunk().unwrap();
|
||||
// This is for compatibility with 32-bit platforms where Rng::Seed has a different seed size
|
||||
// With MSRV >= 1.77: let seed = *seed.first_chunk().unwrap()
|
||||
const LEN: usize = core::mem::size_of::<<Rng as SeedableRng>::Seed>();
|
||||
let seed = (&seed[..LEN]).try_into().unwrap();
|
||||
SmallRng(Rng::from_seed(seed))
|
||||
|
@ -33,29 +33,36 @@ impl SeedableRng for Xoshiro128PlusPlus {
|
||||
/// mapped to a different seed.
|
||||
#[inline]
|
||||
fn from_seed(seed: [u8; 16]) -> Xoshiro128PlusPlus {
|
||||
if seed.iter().all(|&x| x == 0) {
|
||||
return Self::seed_from_u64(0);
|
||||
}
|
||||
let mut state = [0; 4];
|
||||
read_u32_into(&seed, &mut state);
|
||||
// Check for zero on aligned integers for better code generation.
|
||||
// Furtermore, seed_from_u64(0) will expand to a constant when optimized.
|
||||
if state.iter().all(|&x| x == 0) {
|
||||
return Self::seed_from_u64(0);
|
||||
}
|
||||
Xoshiro128PlusPlus { s: state }
|
||||
}
|
||||
|
||||
/// Create a new `Xoshiro128PlusPlus` from a `u64` seed.
|
||||
///
|
||||
/// This uses the SplitMix64 generator internally.
|
||||
#[inline]
|
||||
fn seed_from_u64(mut state: u64) -> Self {
|
||||
const PHI: u64 = 0x9e3779b97f4a7c15;
|
||||
let mut seed = Self::Seed::default();
|
||||
for chunk in seed.as_mut().chunks_mut(8) {
|
||||
let mut s = [0; 4];
|
||||
for i in s.chunks_exact_mut(2) {
|
||||
state = state.wrapping_add(PHI);
|
||||
let mut z = state;
|
||||
z = (z ^ (z >> 30)).wrapping_mul(0xbf58476d1ce4e5b9);
|
||||
z = (z ^ (z >> 27)).wrapping_mul(0x94d049bb133111eb);
|
||||
z = z ^ (z >> 31);
|
||||
chunk.copy_from_slice(&z.to_le_bytes());
|
||||
i[0] = z as u32;
|
||||
i[1] = (z >> 32) as u32;
|
||||
}
|
||||
Self::from_seed(seed)
|
||||
// By using a non-zero PHI we are guaranteed to generate a non-zero state
|
||||
// Thus preventing a recursion between from_seed and seed_from_u64.
|
||||
debug_assert_ne!(s, [0; 4]);
|
||||
Xoshiro128PlusPlus { s }
|
||||
}
|
||||
}
|
||||
|
||||
@ -113,4 +120,18 @@ mod tests {
|
||||
assert_eq!(rng.next_u32(), e);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn stable_seed_from_u64() {
|
||||
// We don't guarantee value-stability for SmallRng but this
|
||||
// could influence keeping stability whenever possible (e.g. after optimizations).
|
||||
let mut rng = Xoshiro128PlusPlus::seed_from_u64(0);
|
||||
let expected = [
|
||||
1179900579, 1938959192, 3089844957, 3657088315, 1015453891, 479942911, 3433842246,
|
||||
669252886, 3985671746, 2737205563,
|
||||
];
|
||||
for &e in &expected {
|
||||
assert_eq!(rng.next_u32(), e);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -33,29 +33,35 @@ impl SeedableRng for Xoshiro256PlusPlus {
|
||||
/// mapped to a different seed.
|
||||
#[inline]
|
||||
fn from_seed(seed: [u8; 32]) -> Xoshiro256PlusPlus {
|
||||
if seed.iter().all(|&x| x == 0) {
|
||||
return Self::seed_from_u64(0);
|
||||
}
|
||||
let mut state = [0; 4];
|
||||
read_u64_into(&seed, &mut state);
|
||||
// Check for zero on aligned integers for better code generation.
|
||||
// Furtermore, seed_from_u64(0) will expand to a constant when optimized.
|
||||
if state.iter().all(|&x| x == 0) {
|
||||
return Self::seed_from_u64(0);
|
||||
}
|
||||
Xoshiro256PlusPlus { s: state }
|
||||
}
|
||||
|
||||
/// Create a new `Xoshiro256PlusPlus` from a `u64` seed.
|
||||
///
|
||||
/// This uses the SplitMix64 generator internally.
|
||||
#[inline]
|
||||
fn seed_from_u64(mut state: u64) -> Self {
|
||||
const PHI: u64 = 0x9e3779b97f4a7c15;
|
||||
let mut seed = Self::Seed::default();
|
||||
for chunk in seed.as_mut().chunks_mut(8) {
|
||||
let mut s = [0; 4];
|
||||
for i in s.iter_mut() {
|
||||
state = state.wrapping_add(PHI);
|
||||
let mut z = state;
|
||||
z = (z ^ (z >> 30)).wrapping_mul(0xbf58476d1ce4e5b9);
|
||||
z = (z ^ (z >> 27)).wrapping_mul(0x94d049bb133111eb);
|
||||
z = z ^ (z >> 31);
|
||||
chunk.copy_from_slice(&z.to_le_bytes());
|
||||
*i = z;
|
||||
}
|
||||
Self::from_seed(seed)
|
||||
// By using a non-zero PHI we are guaranteed to generate a non-zero state
|
||||
// Thus preventing a recursion between from_seed and seed_from_u64.
|
||||
debug_assert_ne!(s, [0; 4]);
|
||||
Xoshiro256PlusPlus { s }
|
||||
}
|
||||
}
|
||||
|
||||
@ -126,4 +132,26 @@ mod tests {
|
||||
assert_eq!(rng.next_u64(), e);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn stable_seed_from_u64() {
|
||||
// We don't guarantee value-stability for SmallRng but this
|
||||
// could influence keeping stability whenever possible (e.g. after optimizations).
|
||||
let mut rng = Xoshiro256PlusPlus::seed_from_u64(0);
|
||||
let expected = [
|
||||
5987356902031041503,
|
||||
7051070477665621255,
|
||||
6633766593972829180,
|
||||
211316841551650330,
|
||||
9136120204379184874,
|
||||
379361710973160858,
|
||||
15813423377499357806,
|
||||
15596884590815070553,
|
||||
5439680534584881407,
|
||||
1369371744833522710,
|
||||
];
|
||||
for &e in &expected {
|
||||
assert_eq!(rng.next_u64(), e);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user