Optimize fill_bytes_via

2018-04-13 13:57:22 +02:00 · 2018-04-13 13:57:22 +02:00 · 86493ca4be
commit 86493ca4be
parent b55f47e838
5 changed files with 35 additions and 35 deletions
--- a/rand_core/src/impls.rs
+++ b/rand_core/src/impls.rs
@ -37,35 +37,34 @@ pub fn next_u64_via_u32<R: RngCore + ?Sized>(rng: &mut R) -> u64 {
    (y << 32) | x
 }

-macro_rules! fill_bytes_via {
-    ($rng:ident, $next_u:ident, $BYTES:expr, $dest:ident) => {{
-        let mut left = $dest;
-        while left.len() >= $BYTES {
-            let (l, r) = {left}.split_at_mut($BYTES);
-            left = r;
-            let chunk: [u8; $BYTES] = unsafe {
-                transmute($rng.$next_u().to_le())
-            };
-            l.copy_from_slice(&chunk);
-        }
-        let n = left.len();
-        if n > 0 {
-            let chunk: [u8; $BYTES] = unsafe {
-                transmute($rng.$next_u().to_le())
-            };
-            left.copy_from_slice(&chunk[..n]);
-        }
-    }}
-}
-
-/// Implement `fill_bytes` via `next_u32`, little-endian order.
-pub fn fill_bytes_via_u32<R: RngCore + ?Sized>(rng: &mut R, dest: &mut [u8]) {
-    fill_bytes_via!(rng, next_u32, 4, dest)
-}
-
-/// Implement `fill_bytes` via `next_u64`, little-endian order.
-pub fn fill_bytes_via_u64<R: RngCore + ?Sized>(rng: &mut R, dest: &mut [u8]) {
-    fill_bytes_via!(rng, next_u64, 8, dest)
+/// Implement `fill_bytes` via `next_u64` and `next_u32`, little-endian order.
+///
+/// The fastest way to fill a slice is usually to work as long as possible with
+/// integers. That is why this method mostly uses `next_u64`, and only when
+/// there are 4 or less bytes remaining at the end of the slice it uses
+/// `next_u32` once.
+pub fn fill_bytes_via_next<R: RngCore + ?Sized>(rng: &mut R, dest: &mut [u8]) {
+    let mut left = dest;
+    while left.len() >= 8 {
+        let (l, r) = {left}.split_at_mut(8);
+        left = r;
+        let chunk: [u8; 8] = unsafe {
+            transmute(rng.next_u64().to_le())
+        };
+        l.copy_from_slice(&chunk);
+    }
+    let n = left.len();
+    if n > 4 {
+        let chunk: [u8; 8] = unsafe {
+            transmute(rng.next_u64().to_le())
+        };
+        left.copy_from_slice(&chunk[..n]);
+    } else if n > 0 {
+        let chunk: [u8; 4] = unsafe {
+            transmute(rng.next_u32().to_le())
+        };
+        left.copy_from_slice(&chunk[..n]);
+    }
 }

 macro_rules! impl_uint_from_fill {
--- a/rand_core/src/lib.rs
+++ b/rand_core/src/lib.rs
@ -121,7 +121,7 @@ pub mod le;
 ///     }
 ///     
 ///     fn fill_bytes(&mut self, dest: &mut [u8]) {
-///         impls::fill_bytes_via_u64(self, dest)
+///         impls::fill_bytes_via_next(self, dest)
 ///     }
 ///     
 ///     fn try_fill_bytes(&mut self, dest: &mut [u8]) -> Result<(), Error> {
@ -160,8 +160,7 @@ pub trait RngCore {
    ///
    /// RNGs must implement at least one method from this trait directly. In
    /// the case this method is not implemented directly, it can be implemented
-    /// [via `next_u32`](../rand_core/impls/fn.fill_bytes_via_u32.html) or
-    /// [via `next_u64`](../rand_core/impls/fn.fill_bytes_via_u64.html) or
+    /// [via `next_u*`](../rand_core/impls/fn.fill_bytes_via_next.html) or
    /// via `try_fill_bytes`; if this generator can fail the implementation
    /// must choose how best to handle errors here (e.g. panic with a
    /// descriptive message or log a warning and retry a few times).
--- a/src/jitter.rs
+++ b/src/jitter.rs
@ -804,7 +804,7 @@ impl RngCore for JitterRng {
        //
        // This is done especially for wrappers that implement `next_u32`
        // themselves via `fill_bytes`.
-        impls::fill_bytes_via_u32(self, dest)
+        impls::fill_bytes_via_next(self, dest)
    }

    fn try_fill_bytes(&mut self, dest: &mut [u8]) -> Result<(), Error> {
--- a/src/mock.rs
+++ b/src/mock.rs
@ -52,7 +52,7 @@ impl RngCore for StepRng {
    }

    fn fill_bytes(&mut self, dest: &mut [u8]) {
-        impls::fill_bytes_via_u64(self, dest);
+        impls::fill_bytes_via_next(self, dest);
    }

    fn try_fill_bytes(&mut self, dest: &mut [u8]) -> Result<(), Error> {
--- a/src/prng/xorshift.rs
+++ b/src/prng/xorshift.rs
@ -71,12 +71,14 @@ impl RngCore for XorShiftRng {
        self.w.0
    }

+    #[inline]
    fn next_u64(&mut self) -> u64 {
        impls::next_u64_via_u32(self)
    }

+    #[inline]
    fn fill_bytes(&mut self, dest: &mut [u8]) {
-        impls::fill_bytes_via_u32(self, dest)
+        impls::fill_bytes_via_next(self, dest)
    }

    fn try_fill_bytes(&mut self, dest: &mut [u8]) -> Result<(), Error> {