From 86493ca4bee20d2b9945c8cfd7345f8e0a9125f8 Mon Sep 17 00:00:00 2001 From: Paul Dicker Date: Fri, 13 Apr 2018 13:57:22 +0200 Subject: [PATCH] Optimize `fill_bytes_via` --- rand_core/src/impls.rs | 57 +++++++++++++++++++++--------------------- rand_core/src/lib.rs | 5 ++-- src/jitter.rs | 2 +- src/mock.rs | 2 +- src/prng/xorshift.rs | 4 ++- 5 files changed, 35 insertions(+), 35 deletions(-) diff --git a/rand_core/src/impls.rs b/rand_core/src/impls.rs index 645dc8f9..530a2ed7 100644 --- a/rand_core/src/impls.rs +++ b/rand_core/src/impls.rs @@ -37,35 +37,34 @@ pub fn next_u64_via_u32(rng: &mut R) -> u64 { (y << 32) | x } -macro_rules! fill_bytes_via { - ($rng:ident, $next_u:ident, $BYTES:expr, $dest:ident) => {{ - let mut left = $dest; - while left.len() >= $BYTES { - let (l, r) = {left}.split_at_mut($BYTES); - left = r; - let chunk: [u8; $BYTES] = unsafe { - transmute($rng.$next_u().to_le()) - }; - l.copy_from_slice(&chunk); - } - let n = left.len(); - if n > 0 { - let chunk: [u8; $BYTES] = unsafe { - transmute($rng.$next_u().to_le()) - }; - left.copy_from_slice(&chunk[..n]); - } - }} -} - -/// Implement `fill_bytes` via `next_u32`, little-endian order. -pub fn fill_bytes_via_u32(rng: &mut R, dest: &mut [u8]) { - fill_bytes_via!(rng, next_u32, 4, dest) -} - -/// Implement `fill_bytes` via `next_u64`, little-endian order. -pub fn fill_bytes_via_u64(rng: &mut R, dest: &mut [u8]) { - fill_bytes_via!(rng, next_u64, 8, dest) +/// Implement `fill_bytes` via `next_u64` and `next_u32`, little-endian order. +/// +/// The fastest way to fill a slice is usually to work as long as possible with +/// integers. That is why this method mostly uses `next_u64`, and only when +/// there are 4 or less bytes remaining at the end of the slice it uses +/// `next_u32` once. +pub fn fill_bytes_via_next(rng: &mut R, dest: &mut [u8]) { + let mut left = dest; + while left.len() >= 8 { + let (l, r) = {left}.split_at_mut(8); + left = r; + let chunk: [u8; 8] = unsafe { + transmute(rng.next_u64().to_le()) + }; + l.copy_from_slice(&chunk); + } + let n = left.len(); + if n > 4 { + let chunk: [u8; 8] = unsafe { + transmute(rng.next_u64().to_le()) + }; + left.copy_from_slice(&chunk[..n]); + } else if n > 0 { + let chunk: [u8; 4] = unsafe { + transmute(rng.next_u32().to_le()) + }; + left.copy_from_slice(&chunk[..n]); + } } macro_rules! impl_uint_from_fill { diff --git a/rand_core/src/lib.rs b/rand_core/src/lib.rs index 924d44ef..7ac0686d 100644 --- a/rand_core/src/lib.rs +++ b/rand_core/src/lib.rs @@ -121,7 +121,7 @@ pub mod le; /// } /// /// fn fill_bytes(&mut self, dest: &mut [u8]) { -/// impls::fill_bytes_via_u64(self, dest) +/// impls::fill_bytes_via_next(self, dest) /// } /// /// fn try_fill_bytes(&mut self, dest: &mut [u8]) -> Result<(), Error> { @@ -160,8 +160,7 @@ pub trait RngCore { /// /// RNGs must implement at least one method from this trait directly. In /// the case this method is not implemented directly, it can be implemented - /// [via `next_u32`](../rand_core/impls/fn.fill_bytes_via_u32.html) or - /// [via `next_u64`](../rand_core/impls/fn.fill_bytes_via_u64.html) or + /// [via `next_u*`](../rand_core/impls/fn.fill_bytes_via_next.html) or /// via `try_fill_bytes`; if this generator can fail the implementation /// must choose how best to handle errors here (e.g. panic with a /// descriptive message or log a warning and retry a few times). diff --git a/src/jitter.rs b/src/jitter.rs index 719afa3a..5811479e 100644 --- a/src/jitter.rs +++ b/src/jitter.rs @@ -804,7 +804,7 @@ impl RngCore for JitterRng { // // This is done especially for wrappers that implement `next_u32` // themselves via `fill_bytes`. - impls::fill_bytes_via_u32(self, dest) + impls::fill_bytes_via_next(self, dest) } fn try_fill_bytes(&mut self, dest: &mut [u8]) -> Result<(), Error> { diff --git a/src/mock.rs b/src/mock.rs index 5c73594f..090258ef 100644 --- a/src/mock.rs +++ b/src/mock.rs @@ -52,7 +52,7 @@ impl RngCore for StepRng { } fn fill_bytes(&mut self, dest: &mut [u8]) { - impls::fill_bytes_via_u64(self, dest); + impls::fill_bytes_via_next(self, dest); } fn try_fill_bytes(&mut self, dest: &mut [u8]) -> Result<(), Error> { diff --git a/src/prng/xorshift.rs b/src/prng/xorshift.rs index 9f7a3c88..9fac6e33 100644 --- a/src/prng/xorshift.rs +++ b/src/prng/xorshift.rs @@ -71,12 +71,14 @@ impl RngCore for XorShiftRng { self.w.0 } + #[inline] fn next_u64(&mut self) -> u64 { impls::next_u64_via_u32(self) } + #[inline] fn fill_bytes(&mut self, dest: &mut [u8]) { - impls::fill_bytes_via_u32(self, dest) + impls::fill_bytes_via_next(self, dest) } fn try_fill_bytes(&mut self, dest: &mut [u8]) -> Result<(), Error> {