2018-08-16 15:38:40 +01:00
|
|
|
// Copyright 2018 Developers of the Rand project.
|
2017-12-15 11:19:40 +00:00
|
|
|
//
|
|
|
|
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
|
2018-01-09 08:39:55 +02:00
|
|
|
// https://www.apache.org/licenses/LICENSE-2.0> or the MIT license
|
|
|
|
// <LICENSE-MIT or https://opensource.org/licenses/MIT>, at your
|
2017-12-15 11:19:40 +00:00
|
|
|
// option. This file may not be copied, modified, or distributed
|
|
|
|
// except according to those terms.
|
|
|
|
|
2018-01-25 17:34:28 +00:00
|
|
|
//! Helper functions for implementing `RngCore` functions.
|
2018-01-09 08:35:22 +02:00
|
|
|
//!
|
2017-12-15 11:19:40 +00:00
|
|
|
//! For cross-platform reproducibility, these functions all use Little Endian:
|
|
|
|
//! least-significant part first. For example, `next_u64_via_u32` takes `u32`
|
|
|
|
//! values `x, y`, then outputs `(y << 32) | x`. To implement `next_u32`
|
|
|
|
//! from `next_u64` in little-endian order, one should use `next_u64() as u32`.
|
2018-01-09 08:35:22 +02:00
|
|
|
//!
|
2017-12-15 11:19:40 +00:00
|
|
|
//! Byte-swapping (like the std `to_le` functions) is only needed to convert
|
|
|
|
//! to/from byte sequences, and since its purpose is reproducibility,
|
|
|
|
//! non-reproducible sources (e.g. `OsRng`) need not bother with it.
|
|
|
|
|
2020-01-01 18:01:48 +00:00
|
|
|
use crate::RngCore;
|
2017-12-15 11:19:40 +00:00
|
|
|
use core::cmp::min;
|
|
|
|
|
|
|
|
/// Implement `next_u64` via `next_u32`, little-endian order.
|
2018-01-25 17:34:28 +00:00
|
|
|
pub fn next_u64_via_u32<R: RngCore + ?Sized>(rng: &mut R) -> u64 {
|
2017-12-15 11:19:40 +00:00
|
|
|
// Use LE; we explicitly generate one value before the next.
|
2018-03-27 14:54:05 +02:00
|
|
|
let x = u64::from(rng.next_u32());
|
|
|
|
let y = u64::from(rng.next_u32());
|
2017-12-15 11:19:40 +00:00
|
|
|
(y << 32) | x
|
|
|
|
}
|
|
|
|
|
2018-04-13 13:57:22 +02:00
|
|
|
/// Implement `fill_bytes` via `next_u64` and `next_u32`, little-endian order.
|
|
|
|
///
|
|
|
|
/// The fastest way to fill a slice is usually to work as long as possible with
|
|
|
|
/// integers. That is why this method mostly uses `next_u64`, and only when
|
|
|
|
/// there are 4 or less bytes remaining at the end of the slice it uses
|
|
|
|
/// `next_u32` once.
|
|
|
|
pub fn fill_bytes_via_next<R: RngCore + ?Sized>(rng: &mut R, dest: &mut [u8]) {
|
|
|
|
let mut left = dest;
|
|
|
|
while left.len() >= 8 {
|
2020-01-02 11:04:23 +00:00
|
|
|
let (l, r) = { left }.split_at_mut(8);
|
2018-04-13 13:57:22 +02:00
|
|
|
left = r;
|
2019-07-09 14:12:35 +02:00
|
|
|
let chunk: [u8; 8] = rng.next_u64().to_le_bytes();
|
2018-04-13 13:57:22 +02:00
|
|
|
l.copy_from_slice(&chunk);
|
|
|
|
}
|
|
|
|
let n = left.len();
|
|
|
|
if n > 4 {
|
2019-07-09 14:12:35 +02:00
|
|
|
let chunk: [u8; 8] = rng.next_u64().to_le_bytes();
|
2018-04-13 13:57:22 +02:00
|
|
|
left.copy_from_slice(&chunk[..n]);
|
|
|
|
} else if n > 0 {
|
2019-07-09 14:12:35 +02:00
|
|
|
let chunk: [u8; 4] = rng.next_u32().to_le_bytes();
|
2018-04-13 13:57:22 +02:00
|
|
|
left.copy_from_slice(&chunk[..n]);
|
|
|
|
}
|
2017-12-15 11:19:40 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
macro_rules! fill_via_chunks {
|
2020-08-02 01:15:42 +02:00
|
|
|
($src:expr, $dst:expr, $ty:ty) => {{
|
|
|
|
const SIZE: usize = core::mem::size_of::<$ty>();
|
|
|
|
let chunk_size_u8 = min($src.len() * SIZE, $dst.len());
|
|
|
|
let chunk_size = (chunk_size_u8 + SIZE - 1) / SIZE;
|
2020-08-01 20:32:49 +02:00
|
|
|
|
Use `chunks_exact_mut` for slightly better performance
The results from master, using unsafe code:
```
gen_bytes_chacha12: 2,733,838 ns/iter (+/- 181,694) = 374 MB/s
gen_bytes_chacha20: 4,339,602 ns/iter (+/- 237,793) = 235 MB/s
gen_bytes_chacha8: 1,918,279 ns/iter (+/- 103,581) = 533 MB/s
```
The results of the new code using `chunks_exact_mut` (this commit):
```
gen_bytes_chacha12: 3,049,147 ns/iter (+/- 220,631) = 335 MB/s
gen_bytes_chacha20: 4,645,772 ns/iter (+/- 269,261) = 220 MB/s
gen_bytes_chacha8: 2,214,954 ns/iter (+/- 1,745,600) = 462 MB/s
```
The results of using `chunks_mut` (before this commit):
```
gen_bytes_chacha12: 3,492,109 ns/iter (+/- 164,638) = 293 MB/s
gen_bytes_chacha20: 5,087,706 ns/iter (+/- 249,219) = 201 MB/s
gen_bytes_chacha8: 2,700,197 ns/iter (+/- 524,148) = 379 MB/s
```
2020-08-02 02:42:42 +02:00
|
|
|
let mut iter_src = $src.iter();
|
2020-08-04 07:02:16 +02:00
|
|
|
let mut chunks = $dst.chunks_exact_mut(SIZE);
|
|
|
|
for (chunk, n) in (&mut chunks).zip(&mut iter_src) {
|
|
|
|
chunk.copy_from_slice(&n.to_le_bytes());
|
Use `chunks_exact_mut` for slightly better performance
The results from master, using unsafe code:
```
gen_bytes_chacha12: 2,733,838 ns/iter (+/- 181,694) = 374 MB/s
gen_bytes_chacha20: 4,339,602 ns/iter (+/- 237,793) = 235 MB/s
gen_bytes_chacha8: 1,918,279 ns/iter (+/- 103,581) = 533 MB/s
```
The results of the new code using `chunks_exact_mut` (this commit):
```
gen_bytes_chacha12: 3,049,147 ns/iter (+/- 220,631) = 335 MB/s
gen_bytes_chacha20: 4,645,772 ns/iter (+/- 269,261) = 220 MB/s
gen_bytes_chacha8: 2,214,954 ns/iter (+/- 1,745,600) = 462 MB/s
```
The results of using `chunks_mut` (before this commit):
```
gen_bytes_chacha12: 3,492,109 ns/iter (+/- 164,638) = 293 MB/s
gen_bytes_chacha20: 5,087,706 ns/iter (+/- 249,219) = 201 MB/s
gen_bytes_chacha8: 2,700,197 ns/iter (+/- 524,148) = 379 MB/s
```
2020-08-02 02:42:42 +02:00
|
|
|
}
|
2020-08-04 07:02:16 +02:00
|
|
|
let rem = chunks.into_remainder();
|
|
|
|
if let Some(n) = iter_src.next() {
|
|
|
|
rem.copy_from_slice(&n.to_le_bytes()[..rem.len()]);
|
2017-12-15 11:19:40 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
(chunk_size, chunk_size_u8)
|
2020-01-02 10:55:25 +00:00
|
|
|
}};
|
2017-12-15 11:19:40 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
/// Implement `fill_bytes` by reading chunks from the output buffer of a block
|
|
|
|
/// based RNG.
|
|
|
|
///
|
|
|
|
/// The return values are `(consumed_u32, filled_u8)`.
|
|
|
|
///
|
|
|
|
/// `filled_u8` is the number of filled bytes in `dest`, which may be less than
|
|
|
|
/// the length of `dest`.
|
|
|
|
/// `consumed_u32` is the number of words consumed from `src`, which is the same
|
|
|
|
/// as `filled_u8 / 4` rounded up.
|
|
|
|
///
|
|
|
|
/// # Example
|
|
|
|
/// (from `IsaacRng`)
|
|
|
|
///
|
2018-05-11 08:54:14 +02:00
|
|
|
/// ```ignore
|
2017-12-15 11:19:40 +00:00
|
|
|
/// fn fill_bytes(&mut self, dest: &mut [u8]) {
|
|
|
|
/// let mut read_len = 0;
|
|
|
|
/// while read_len < dest.len() {
|
|
|
|
/// if self.index >= self.rsl.len() {
|
|
|
|
/// self.isaac();
|
|
|
|
/// }
|
|
|
|
///
|
|
|
|
/// let (consumed_u32, filled_u8) =
|
|
|
|
/// impls::fill_via_u32_chunks(&mut self.rsl[self.index..],
|
|
|
|
/// &mut dest[read_len..]);
|
|
|
|
///
|
|
|
|
/// self.index += consumed_u32;
|
|
|
|
/// read_len += filled_u8;
|
|
|
|
/// }
|
|
|
|
/// }
|
|
|
|
/// ```
|
2018-01-20 19:55:18 +01:00
|
|
|
pub fn fill_via_u32_chunks(src: &[u32], dest: &mut [u8]) -> (usize, usize) {
|
2020-08-02 01:15:42 +02:00
|
|
|
fill_via_chunks!(src, dest, u32)
|
2017-12-15 11:19:40 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
/// Implement `fill_bytes` by reading chunks from the output buffer of a block
|
|
|
|
/// based RNG.
|
|
|
|
///
|
|
|
|
/// The return values are `(consumed_u64, filled_u8)`.
|
|
|
|
/// `filled_u8` is the number of filled bytes in `dest`, which may be less than
|
|
|
|
/// the length of `dest`.
|
|
|
|
/// `consumed_u64` is the number of words consumed from `src`, which is the same
|
|
|
|
/// as `filled_u8 / 8` rounded up.
|
|
|
|
///
|
|
|
|
/// See `fill_via_u32_chunks` for an example.
|
2018-01-20 19:55:18 +01:00
|
|
|
pub fn fill_via_u64_chunks(src: &[u64], dest: &mut [u8]) -> (usize, usize) {
|
2020-08-02 01:15:42 +02:00
|
|
|
fill_via_chunks!(src, dest, u64)
|
2017-12-15 11:19:40 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
/// Implement `next_u32` via `fill_bytes`, little-endian order.
|
2018-01-25 17:34:28 +00:00
|
|
|
pub fn next_u32_via_fill<R: RngCore + ?Sized>(rng: &mut R) -> u32 {
|
2020-08-01 20:32:49 +02:00
|
|
|
let mut buf = [0; 4];
|
|
|
|
rng.fill_bytes(&mut buf);
|
2020-08-01 22:48:10 +02:00
|
|
|
u32::from_ne_bytes(buf)
|
2017-12-15 11:19:40 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
/// Implement `next_u64` via `fill_bytes`, little-endian order.
|
2018-01-25 17:34:28 +00:00
|
|
|
pub fn next_u64_via_fill<R: RngCore + ?Sized>(rng: &mut R) -> u64 {
|
2020-08-01 20:32:49 +02:00
|
|
|
let mut buf = [0; 8];
|
|
|
|
rng.fill_bytes(&mut buf);
|
2020-08-01 22:48:10 +02:00
|
|
|
u64::from_ne_bytes(buf)
|
2017-12-15 11:19:40 +00:00
|
|
|
}
|
|
|
|
|
2020-08-02 01:13:22 +02:00
|
|
|
#[cfg(test)]
|
|
|
|
mod test {
|
|
|
|
use super::*;
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
fn test_fill_via_u32_chunks() {
|
|
|
|
let src = [1, 2, 3];
|
|
|
|
let mut dst = [0u8; 11];
|
|
|
|
assert_eq!(fill_via_u32_chunks(&src, &mut dst), (3, 11));
|
|
|
|
assert_eq!(dst, [1, 0, 0, 0, 2, 0, 0, 0, 3, 0, 0]);
|
|
|
|
|
|
|
|
let mut dst = [0u8; 13];
|
|
|
|
assert_eq!(fill_via_u32_chunks(&src, &mut dst), (3, 12));
|
|
|
|
assert_eq!(dst, [1, 0, 0, 0, 2, 0, 0, 0, 3, 0, 0, 0, 0]);
|
|
|
|
|
|
|
|
let mut dst = [0u8; 5];
|
|
|
|
assert_eq!(fill_via_u32_chunks(&src, &mut dst), (2, 5));
|
|
|
|
assert_eq!(dst, [1, 0, 0, 0, 2]);
|
|
|
|
}
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
fn test_fill_via_u64_chunks() {
|
|
|
|
let src = [1, 2];
|
|
|
|
let mut dst = [0u8; 11];
|
|
|
|
assert_eq!(fill_via_u64_chunks(&src, &mut dst), (2, 11));
|
|
|
|
assert_eq!(dst, [1, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0]);
|
|
|
|
|
|
|
|
let mut dst = [0u8; 17];
|
|
|
|
assert_eq!(fill_via_u64_chunks(&src, &mut dst), (2, 16));
|
|
|
|
assert_eq!(dst, [1, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0]);
|
|
|
|
|
|
|
|
let mut dst = [0u8; 5];
|
|
|
|
assert_eq!(fill_via_u64_chunks(&src, &mut dst), (1, 5));
|
|
|
|
assert_eq!(dst, [1, 0, 0, 0, 0]);
|
|
|
|
}
|
|
|
|
}
|