From 45b70d6ace3b47b0428f53f60a651f8c6d35883a Mon Sep 17 00:00:00 2001
From: Diggory Hardy <git@dhardy.name>
Date: Fri, 15 Dec 2017 11:19:40 +0000
Subject: [PATCH] Add impls module; replace custom impls; remove default impls
 for next_u64 and fill_bytes

This is based on dd1241a256f2 but heavily modified
---
 src/distributions/mod.rs |   6 +-
 src/impls.rs             | 170 +++++++++++++++++++++++++++++++++++++++
 src/jitter.rs            |  19 +----
 src/lib.rs               |  67 +++++----------
 src/prng/chacha.rs       |   9 +++
 src/prng/isaac.rs        |   9 +++
 src/prng/isaac64.rs      |   5 ++
 src/prng/xorshift.rs     |   9 +++
 src/rand_impls.rs        |   5 ++
 src/reseeding.rs         |   8 ++
 10 files changed, 241 insertions(+), 66 deletions(-)
 create mode 100644 src/impls.rs
diff --git a/src/distributions/mod.rs b/src/distributions/mod.rs
index 5de8efb9..7c652363 100644
--- a/src/distributions/mod.rs
+++ b/src/distributions/mod.rs
@@ -279,8 +279,8 @@ fn ziggurat<R: Rng, P, Z>(
 
 #[cfg(test)]
 mod tests {
-
     use {Rng, Rand};
+    use impls;
     use super::{RandSample, WeightedChoice, Weighted, Sample, IndependentSample};
 
     #[derive(PartialEq, Debug)]
@@ -301,6 +301,10 @@ mod tests {
         fn next_u64(&mut self) -> u64 {
             self.next_u32() as u64
         }
+        
+        fn fill_bytes(&mut self, dest: &mut [u8]) {
+            impls::fill_bytes_via_u32(self, dest)
+        }
     }
 
     #[test]
diff --git a/src/impls.rs b/src/impls.rs
new file mode 100644
index 00000000..12fb2ff4
--- /dev/null
+++ b/src/impls.rs
@@ -0,0 +1,170 @@
+// Copyright 2013-2017 The Rust Project Developers. See the COPYRIGHT
+// file at the top-level directory of this distribution and at
+// http://rust-lang.org/COPYRIGHT.
+//
+// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
+// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
+// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
+// option. This file may not be copied, modified, or distributed
+// except according to those terms.
+
+//! Helper functions for implementing `Rng` functions.
+//! 
+//! For cross-platform reproducibility, these functions all use Little Endian:
+//! least-significant part first. For example, `next_u64_via_u32` takes `u32`
+//! values `x, y`, then outputs `(y << 32) | x`. To implement `next_u32`
+//! from `next_u64` in little-endian order, one should use `next_u64() as u32`.
+//! 
+//! Byte-swapping (like the std `to_le` functions) is only needed to convert
+//! to/from byte sequences, and since its purpose is reproducibility,
+//! non-reproducible sources (e.g. `OsRng`) need not bother with it.
+
+// TODO: eventually these should be exported somehow
+#![allow(unused)]
+
+use core::intrinsics::transmute;
+use core::slice;
+use core::cmp::min;
+use core::mem::size_of;
+use Rng;
+
+/// Implement `next_u64` via `next_u32`, little-endian order.
+pub fn next_u64_via_u32<R: Rng+?Sized>(rng: &mut R) -> u64 {
+    // Use LE; we explicitly generate one value before the next.
+    let x = rng.next_u32() as u64;
+    let y = rng.next_u32() as u64;
+    (y << 32) | x
+}
+
+macro_rules! fill_bytes_via {
+    ($rng:ident, $next_u:ident, $BYTES:expr, $dest:ident) => {{
+        let mut left = $dest;
+        while left.len() >= $BYTES {
+            let (l, r) = {left}.split_at_mut($BYTES);
+            left = r;
+            let chunk: [u8; $BYTES] = unsafe {
+                transmute($rng.$next_u().to_le())
+            };
+            l.copy_from_slice(&chunk);
+        }
+        let n = left.len();
+        if n > 0 {
+            let chunk: [u8; $BYTES] = unsafe {
+                transmute($rng.$next_u().to_le())
+            };
+            left.copy_from_slice(&chunk[..n]);
+        }
+    }}
+}
+
+/// Implement `fill_bytes` via `next_u32`, little-endian order.
+pub fn fill_bytes_via_u32<R: Rng+?Sized>(rng: &mut R, dest: &mut [u8]) {
+    fill_bytes_via!(rng, next_u32, 4, dest)
+}
+
+/// Implement `fill_bytes` via `next_u64`, little-endian order.
+pub fn fill_bytes_via_u64<R: Rng+?Sized>(rng: &mut R, dest: &mut [u8]) {
+    fill_bytes_via!(rng, next_u64, 8, dest)
+}
+
+macro_rules! impl_uint_from_fill {
+    ($self:expr, $ty:ty, $N:expr) => ({
+        debug_assert!($N == size_of::<$ty>());
+
+        let mut int: $ty = 0;
+        unsafe {
+            let ptr = &mut int as *mut $ty as *mut u8;
+            let slice = slice::from_raw_parts_mut(ptr, $N);
+            $self.fill_bytes(slice);
+        }
+        int
+    });
+}
+
+macro_rules! fill_via_chunks {
+    ($src:expr, $dest:expr, $N:expr) => ({
+        let chunk_size_u8 = min($src.len() * $N, $dest.len());
+        let chunk_size = (chunk_size_u8 + $N - 1) / $N;
+
+        // Convert to little-endian:
+        for ref mut x in $src[0..chunk_size].iter_mut() {
+            **x = (*x).to_le();
+        }
+
+        let bytes = unsafe { slice::from_raw_parts($src.as_ptr() as *const u8,
+                                                   $src.len() * $N) };
+
+        let dest_chunk = &mut $dest[0..chunk_size_u8];
+        dest_chunk.copy_from_slice(&bytes[0..chunk_size_u8]);
+
+        (chunk_size, chunk_size_u8)
+    });
+}
+
+/// Implement `fill_bytes` by reading chunks from the output buffer of a block
+/// based RNG.
+///
+/// The return values are `(consumed_u32, filled_u8)`.
+///
+/// `filled_u8` is the number of filled bytes in `dest`, which may be less than
+/// the length of `dest`.
+/// `consumed_u32` is the number of words consumed from `src`, which is the same
+/// as `filled_u8 / 4` rounded up.
+///
+/// Note that on big-endian systems values in the output buffer `src` are
+/// mutated. `src[0..consumed_u32]` get converted to little-endian before
+/// copying.
+///
+/// # Example
+/// (from `IsaacRng`)
+///
+/// ```rust,ignore
+/// fn fill_bytes(&mut self, dest: &mut [u8]) {
+///     let mut read_len = 0;
+///     while read_len < dest.len() {
+///         if self.index >= self.rsl.len() {
+///             self.isaac();
+///         }
+///
+///         let (consumed_u32, filled_u8) =
+///             impls::fill_via_u32_chunks(&mut self.rsl[self.index..],
+///                                        &mut dest[read_len..]);
+///
+///         self.index += consumed_u32;
+///         read_len += filled_u8;
+///     }
+/// }
+/// ```
+pub fn fill_via_u32_chunks(src: &mut [u32], dest: &mut [u8]) -> (usize, usize) {
+    fill_via_chunks!(src, dest, 4)
+}
+
+/// Implement `fill_bytes` by reading chunks from the output buffer of a block
+/// based RNG.
+///
+/// The return values are `(consumed_u64, filled_u8)`.
+/// `filled_u8` is the number of filled bytes in `dest`, which may be less than
+/// the length of `dest`.
+/// `consumed_u64` is the number of words consumed from `src`, which is the same
+/// as `filled_u8 / 8` rounded up.
+///
+/// Note that on big-endian systems values in the output buffer `src` are
+/// mutated. `src[0..consumed_u64]` get converted to little-endian before
+/// copying.
+///
+/// See `fill_via_u32_chunks` for an example.
+pub fn fill_via_u64_chunks(src: &mut [u64], dest: &mut [u8]) -> (usize, usize) {
+    fill_via_chunks!(src, dest, 8)
+}
+
+/// Implement `next_u32` via `fill_bytes`, little-endian order.
+pub fn next_u32_via_fill<R: Rng+?Sized>(rng: &mut R) -> u32 {
+    impl_uint_from_fill!(rng, u32, 4)
+}
+
+/// Implement `next_u64` via `fill_bytes`, little-endian order.
+pub fn next_u64_via_fill<R: Rng+?Sized>(rng: &mut R) -> u64 {
+    impl_uint_from_fill!(rng, u64, 8)
+}
+
+// TODO: implement tests for the above
diff --git a/src/jitter.rs b/src/jitter.rs
index 942b0d0c..162782db 100644
--- a/src/jitter.rs
+++ b/src/jitter.rs
@@ -16,7 +16,7 @@
 
 //! Non-physical true random number generator based on timing jitter.
 
-use Rng;
+use {Rng, impls};
 
 use core::{fmt, mem, ptr};
 #[cfg(feature="std")]
@@ -731,22 +731,7 @@ impl Rng for JitterRng {
     }
 
     fn fill_bytes(&mut self, dest: &mut [u8]) {
-        let mut left = dest;
-        while left.len() >= 8 {
-            let (l, r) = {left}.split_at_mut(8);
-            left = r;
-            let chunk: [u8; 8] = unsafe {
-                mem::transmute(self.next_u64().to_le())
-            };
-            l.copy_from_slice(&chunk);
-        }
-        let n = left.len();
-        if n > 0 {
-            let chunk: [u8; 8] = unsafe {
-                mem::transmute(self.next_u64().to_le())
-            };
-            left.copy_from_slice(&chunk[..n]);
-        }
+        impls::fill_bytes_via_u64(self, dest)
     }
 }
 
diff --git a/src/lib.rs b/src/lib.rs
index 996c12b7..a63d210b 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -277,6 +277,7 @@ use distributions::range::SampleRange;
 
 // public modules
 pub mod distributions;
+mod impls;
 pub mod jitter;
 #[cfg(feature="std")] pub mod os;
 #[cfg(feature="std")] pub mod read;
@@ -339,21 +340,10 @@ pub trait Rand : Sized {
 /// A random number generator.
 pub trait Rng {
     /// Return the next random u32.
-    ///
-    /// This rarely needs to be called directly, prefer `r.gen()` to
-    /// `r.next_u32()`.
-    // FIXME #rust-lang/rfcs#628: Should be implemented in terms of next_u64
     fn next_u32(&mut self) -> u32;
 
     /// Return the next random u64.
-    ///
-    /// By default this is implemented in terms of `next_u32`. An
-    /// implementation of this trait must provide at least one of
-    /// these two methods. Similarly to `next_u32`, this rarely needs
-    /// to be called directly, prefer `r.gen()` to `r.next_u64()`.
-    fn next_u64(&mut self) -> u64 {
-        ((self.next_u32() as u64) << 32) | (self.next_u32() as u64)
-    }
+    fn next_u64(&mut self) -> u64;
 
     /// Return the next random f32 selected from the half-open
     /// interval `[0, 1)`.
@@ -409,11 +399,6 @@ pub trait Rng {
 
     /// Fill `dest` with random data.
     ///
-    /// This has a default implementation in terms of `next_u64` and
-    /// `next_u32`, but should be overridden by implementations that
-    /// offer a more efficient solution than just calling those
-    /// methods repeatedly.
-    ///
     /// This method does *not* have a requirement to bear any fixed
     /// relationship to the other methods, for example, it does *not*
     /// have to result in the same output as progressively filling
@@ -434,29 +419,7 @@ pub trait Rng {
     /// thread_rng().fill_bytes(&mut v);
     /// println!("{:?}", &v[..]);
     /// ```
-    fn fill_bytes(&mut self, dest: &mut [u8]) {
-        // this could, in theory, be done by transmuting dest to a
-        // [u64], but this is (1) likely to be undefined behaviour for
-        // LLVM, (2) has to be very careful about alignment concerns,
-        // (3) adds more `unsafe` that needs to be checked, (4)
-        // probably doesn't give much performance gain if
-        // optimisations are on.
-        let mut count = 0;
-        let mut num = 0;
-        for byte in dest.iter_mut() {
-            if count == 0 {
-                // we could micro-optimise here by generating a u32 if
-                // we only need a few more bytes to fill the vector
-                // (i.e. at most 4).
-                num = self.next_u64();
-                count = 8;
-            }
-
-            *byte = (num & 0xff) as u8;
-            num >>= 8;
-            count -= 1;
-        }
-    }
+    fn fill_bytes(&mut self, dest: &mut [u8]);
 
     /// Return a random value of a `Rand` type.
     ///
@@ -802,15 +765,17 @@ impl StdRng {
 }
 
 impl Rng for StdRng {
-    #[inline]
     fn next_u32(&mut self) -> u32 {
         self.rng.next_u32()
     }
 
-    #[inline]
     fn next_u64(&mut self) -> u64 {
         self.rng.next_u64()
     }
+    
+    fn fill_bytes(&mut self, dest: &mut [u8]) {
+        self.rng.fill_bytes(dest)
+    }
 }
 
 impl<'a> SeedableRng<&'a [usize]> for StdRng {
@@ -985,6 +950,7 @@ pub fn sample<T, I, R>(rng: &mut R, iterable: I, amount: usize) -> Vec<T>
 
 #[cfg(test)]
 mod test {
+    use impls;
     use super::{Rng, thread_rng, random, SeedableRng, StdRng, weak_rng};
     use std::iter::repeat;
 
@@ -992,10 +958,13 @@ mod test {
 
     impl<R: Rng> Rng for MyRng<R> {
         fn next_u32(&mut self) -> u32 {
-            fn next<T: Rng>(t: &mut T) -> u32 {
-                t.next_u32()
-            }
-            next(&mut self.inner)
+            self.inner.next_u32()
+        }
+        fn next_u64(&mut self) -> u64 {
+            self.inner.next_u64()
+        }
+        fn fill_bytes(&mut self, dest: &mut [u8]) {
+            self.inner.fill_bytes(dest)
         }
     }
 
@@ -1007,8 +976,10 @@ mod test {
     impl Rng for ConstRng {
         fn next_u32(&mut self) -> u32 { self.i as u32 }
         fn next_u64(&mut self) -> u64 { self.i }
-
-        // no fill_bytes on purpose
+        
+        fn fill_bytes(&mut self, dest: &mut [u8]) {
+            impls::fill_bytes_via_u64(self, dest)
+        }
     }
 
     pub fn iter_eq<I, J>(i: I, j: J) -> bool
diff --git a/src/prng/chacha.rs b/src/prng/chacha.rs
index a73e8e78..9cc0217e 100644
--- a/src/prng/chacha.rs
+++ b/src/prng/chacha.rs
@@ -12,6 +12,7 @@
 
 use core::num::Wrapping as w;
 use {Rng, SeedableRng, Rand};
+use impls;
 
 #[allow(bad_style)]
 type w32 = w<u32>;
@@ -196,6 +197,14 @@ impl Rng for ChaChaRng {
         self.index += 1;
         value.0
     }
+    
+    fn next_u64(&mut self) -> u64 {
+        impls::next_u64_via_u32(self)
+    }
+    
+    fn fill_bytes(&mut self, bytes: &mut [u8]) {
+        impls::fill_bytes_via_u32(self, bytes)
+    }
 }
 
 impl<'a> SeedableRng<&'a [u32]> for ChaChaRng {
diff --git a/src/prng/isaac.rs b/src/prng/isaac.rs
index ba3e2106..85e9d4e3 100644
--- a/src/prng/isaac.rs
+++ b/src/prng/isaac.rs
@@ -16,6 +16,7 @@ use core::num::Wrapping as w;
 use core::fmt;
 
 use {Rng, SeedableRng, Rand};
+use impls;
 
 #[allow(non_camel_case_types)]
 type w32 = w<u32>;
@@ -204,6 +205,14 @@ impl Rng for IsaacRng {
         // it optimises to a bitwise mask).
         self.rsl[self.cnt as usize % RAND_SIZE].0
     }
+    
+    fn next_u64(&mut self) -> u64 {
+        impls::next_u64_via_u32(self)
+    }
+    
+    fn fill_bytes(&mut self, dest: &mut [u8]) {
+        impls::fill_bytes_via_u32(self, dest)
+    }
 }
 
 impl<'a> SeedableRng<&'a [u32]> for IsaacRng {
diff --git a/src/prng/isaac64.rs b/src/prng/isaac64.rs
index 521c9d32..540fa238 100644
--- a/src/prng/isaac64.rs
+++ b/src/prng/isaac64.rs
@@ -16,6 +16,7 @@ use core::num::Wrapping as w;
 use core::fmt;
 
 use {Rng, SeedableRng, Rand};
+use impls;
 
 #[allow(non_camel_case_types)]
 type w64 = w<u64>;
@@ -209,6 +210,10 @@ impl Rng for Isaac64Rng {
         // it optimises to a bitwise mask).
         self.rsl[self.cnt as usize % RAND_SIZE].0
     }
+    
+    fn fill_bytes(&mut self, dest: &mut [u8]) {
+        impls::fill_bytes_via_u64(self, dest)
+    }
 }
 
 impl<'a> SeedableRng<&'a [u64]> for Isaac64Rng {
diff --git a/src/prng/xorshift.rs b/src/prng/xorshift.rs
index dd367e9b..0f5ba076 100644
--- a/src/prng/xorshift.rs
+++ b/src/prng/xorshift.rs
@@ -12,6 +12,7 @@
 
 use core::num::Wrapping as w;
 use {Rng, SeedableRng, Rand};
+use impls;
 
 /// An Xorshift[1] random number
 /// generator.
@@ -61,6 +62,14 @@ impl Rng for XorShiftRng {
         self.w = w_ ^ (w_ >> 19) ^ (t ^ (t >> 8));
         self.w.0
     }
+    
+    fn next_u64(&mut self) -> u64 {
+        impls::next_u64_via_u32(self)
+    }
+    
+    fn fill_bytes(&mut self, dest: &mut [u8]) {
+        impls::fill_bytes_via_u32(self, dest)
+    }
 }
 
 impl SeedableRng<[u32; 4]> for XorShiftRng {
diff --git a/src/rand_impls.rs b/src/rand_impls.rs
index a865bb69..13ff1d6f 100644
--- a/src/rand_impls.rs
+++ b/src/rand_impls.rs
@@ -248,6 +248,7 @@ impl<T:Rand> Rand for Option<T> {
 
 #[cfg(test)]
 mod tests {
+    use impls;
     use {Rng, thread_rng, Open01, Closed01};
 
     struct ConstantRng(u64);
@@ -260,6 +261,10 @@ mod tests {
             let ConstantRng(v) = *self;
             v
         }
+        
+        fn fill_bytes(&mut self, dest: &mut [u8]) {
+            impls::fill_bytes_via_u64(self, dest)
+        }
     }
 
     #[test]
diff --git a/src/reseeding.rs b/src/reseeding.rs
index 1f24e200..455f90e3 100644
--- a/src/reseeding.rs
+++ b/src/reseeding.rs
@@ -147,6 +147,7 @@ impl Default for ReseedWithDefault {
 
 #[cfg(test)]
 mod test {
+    use impls;
     use std::default::Default;
     use std::iter::repeat;
     use super::{ReseedingRng, ReseedWithDefault};
@@ -162,6 +163,13 @@ mod test {
             // very random
             self.i - 1
         }
+        fn next_u64(&mut self) -> u64 {
+            impls::next_u64_via_u32(self)
+        }
+        
+        fn fill_bytes(&mut self, dest: &mut [u8]) {
+            impls::fill_bytes_via_u64(self, dest)
+        }
     }
     impl Default for Counter {
         fn default() -> Counter {