// Copyright 2018-2020 Developers of the Rand project. // Copyright 2017 The Rust Project Developers. // // Licensed under the Apache License, Version 2.0 or the MIT license // , at your // option. This file may not be copied, modified, or distributed // except according to those terms. //! `UniformInt` implementation use super::{Error, SampleBorrow, SampleUniform, UniformSampler}; use crate::distr::utils::WideningMultiply; #[cfg(feature = "simd_support")] use crate::distr::{Distribution, Standard}; use crate::Rng; #[cfg(feature = "simd_support")] use core::simd::prelude::*; #[cfg(feature = "simd_support")] use core::simd::{LaneCount, SupportedLaneCount}; #[cfg(feature = "serde1")] use serde::{Deserialize, Serialize}; /// The back-end implementing [`UniformSampler`] for integer types. /// /// Unless you are implementing [`UniformSampler`] for your own type, this type /// should not be used directly, use [`Uniform`] instead. /// /// # Implementation notes /// /// For simplicity, we use the same generic struct `UniformInt` for all /// integer types `X`. This gives us only one field type, `X`; to store unsigned /// values of this size, we take use the fact that these conversions are no-ops. /// /// For a closed range, the number of possible numbers we should generate is /// `range = (high - low + 1)`. To avoid bias, we must ensure that the size of /// our sample space, `zone`, is a multiple of `range`; other values must be /// rejected (by replacing with a new random sample). /// /// As a special case, we use `range = 0` to represent the full range of the /// result type (i.e. for `new_inclusive($ty::MIN, $ty::MAX)`). /// /// The optimum `zone` is the largest product of `range` which fits in our /// (unsigned) target type. We calculate this by calculating how many numbers we /// must reject: `reject = (MAX + 1) % range = (MAX - range + 1) % range`. Any (large) /// product of `range` will suffice, thus in `sample_single` we multiply by a /// power of 2 via bit-shifting (faster but may cause more rejections). /// /// The smallest integer PRNGs generate is `u32`. For 8- and 16-bit outputs we /// use `u32` for our `zone` and samples (because it's not slower and because /// it reduces the chance of having to reject a sample). In this case we cannot /// store `zone` in the target type since it is too large, however we know /// `ints_to_reject < range <= $uty::MAX`. /// /// An alternative to using a modulus is widening multiply: After a widening /// multiply by `range`, the result is in the high word. Then comparing the low /// word against `zone` makes sure our distribution is uniform. /// /// [`Uniform`]: super::Uniform #[derive(Clone, Copy, Debug, PartialEq, Eq)] #[cfg_attr(feature = "serde1", derive(Serialize, Deserialize))] pub struct UniformInt { pub(super) low: X, pub(super) range: X, thresh: X, // effectively 2.pow(max(64, uty_bits)) % range } macro_rules! uniform_int_impl { ($ty:ty, $uty:ty, $sample_ty:ident) => { impl SampleUniform for $ty { type Sampler = UniformInt<$ty>; } impl UniformSampler for UniformInt<$ty> { // We play free and fast with unsigned vs signed here // (when $ty is signed), but that's fine, since the // contract of this macro is for $ty and $uty to be // "bit-equal", so casting between them is a no-op. type X = $ty; #[inline] // if the range is constant, this helps LLVM to do the // calculations at compile-time. fn new(low_b: B1, high_b: B2) -> Result where B1: SampleBorrow + Sized, B2: SampleBorrow + Sized, { let low = *low_b.borrow(); let high = *high_b.borrow(); if !(low < high) { return Err(Error::EmptyRange); } UniformSampler::new_inclusive(low, high - 1) } #[inline] // if the range is constant, this helps LLVM to do the // calculations at compile-time. fn new_inclusive(low_b: B1, high_b: B2) -> Result where B1: SampleBorrow + Sized, B2: SampleBorrow + Sized, { let low = *low_b.borrow(); let high = *high_b.borrow(); if !(low <= high) { return Err(Error::EmptyRange); } let range = high.wrapping_sub(low).wrapping_add(1) as $uty; let thresh = if range > 0 { let range = $sample_ty::from(range); (range.wrapping_neg() % range) } else { 0 }; Ok(UniformInt { low, range: range as $ty, // type: $uty thresh: thresh as $uty as $ty, // type: $sample_ty }) } /// Sample from distribution, Lemire's method, unbiased #[inline] fn sample(&self, rng: &mut R) -> Self::X { let range = self.range as $uty as $sample_ty; if range == 0 { return rng.random(); } let thresh = self.thresh as $uty as $sample_ty; let hi = loop { let (hi, lo) = rng.random::<$sample_ty>().wmul(range); if lo >= thresh { break hi; } }; self.low.wrapping_add(hi as $ty) } #[inline] fn sample_single( low_b: B1, high_b: B2, rng: &mut R, ) -> Result where B1: SampleBorrow + Sized, B2: SampleBorrow + Sized, { let low = *low_b.borrow(); let high = *high_b.borrow(); if !(low < high) { return Err(Error::EmptyRange); } Self::sample_single_inclusive(low, high - 1, rng) } /// Sample single value, Canon's method, biased /// /// In the worst case, bias affects 1 in `2^n` samples where n is /// 56 (`i8`), 48 (`i16`), 96 (`i32`), 64 (`i64`), 128 (`i128`). #[cfg(not(feature = "unbiased"))] #[inline] fn sample_single_inclusive( low_b: B1, high_b: B2, rng: &mut R, ) -> Result where B1: SampleBorrow + Sized, B2: SampleBorrow + Sized, { let low = *low_b.borrow(); let high = *high_b.borrow(); if !(low <= high) { return Err(Error::EmptyRange); } let range = high.wrapping_sub(low).wrapping_add(1) as $uty as $sample_ty; if range == 0 { // Range is MAX+1 (unrepresentable), so we need a special case return Ok(rng.random()); } // generate a sample using a sensible integer type let (mut result, lo_order) = rng.random::<$sample_ty>().wmul(range); // if the sample is biased... if lo_order > range.wrapping_neg() { // ...generate a new sample to reduce bias... let (new_hi_order, _) = (rng.random::<$sample_ty>()).wmul(range as $sample_ty); // ... incrementing result on overflow let is_overflow = lo_order.checked_add(new_hi_order as $sample_ty).is_none(); result += is_overflow as $sample_ty; } Ok(low.wrapping_add(result as $ty)) } /// Sample single value, Canon's method, unbiased #[cfg(feature = "unbiased")] #[inline] fn sample_single_inclusive( low_b: B1, high_b: B2, rng: &mut R, ) -> Result where B1: SampleBorrow<$ty> + Sized, B2: SampleBorrow<$ty> + Sized, { let low = *low_b.borrow(); let high = *high_b.borrow(); if !(low <= high) { return Err(Error::EmptyRange); } let range = high.wrapping_sub(low).wrapping_add(1) as $uty as $sample_ty; if range == 0 { // Range is MAX+1 (unrepresentable), so we need a special case return Ok(rng.random()); } let (mut result, mut lo) = rng.random::<$sample_ty>().wmul(range); // In contrast to the biased sampler, we use a loop: while lo > range.wrapping_neg() { let (new_hi, new_lo) = (rng.random::<$sample_ty>()).wmul(range); match lo.checked_add(new_hi) { Some(x) if x < $sample_ty::MAX => { // Anything less than MAX: last term is 0 break; } None => { // Overflow: last term is 1 result += 1; break; } _ => { // Unlikely case: must check next sample lo = new_lo; continue; } } } Ok(low.wrapping_add(result as $ty)) } } }; } uniform_int_impl! { i8, u8, u32 } uniform_int_impl! { i16, u16, u32 } uniform_int_impl! { i32, u32, u32 } uniform_int_impl! { i64, u64, u64 } uniform_int_impl! { i128, u128, u128 } uniform_int_impl! { isize, usize, usize } uniform_int_impl! { u8, u8, u32 } uniform_int_impl! { u16, u16, u32 } uniform_int_impl! { u32, u32, u32 } uniform_int_impl! { u64, u64, u64 } uniform_int_impl! { usize, usize, usize } uniform_int_impl! { u128, u128, u128 } #[cfg(feature = "simd_support")] macro_rules! uniform_simd_int_impl { ($ty:ident, $unsigned:ident) => { // The "pick the largest zone that can fit in an `u32`" optimization // is less useful here. Multiple lanes complicate things, we don't // know the PRNG's minimal output size, and casting to a larger vector // is generally a bad idea for SIMD performance. The user can still // implement it manually. #[cfg(feature = "simd_support")] impl SampleUniform for Simd<$ty, LANES> where LaneCount: SupportedLaneCount, Simd<$unsigned, LANES>: WideningMultiply, Simd<$unsigned, LANES>)>, Standard: Distribution>, { type Sampler = UniformInt>; } #[cfg(feature = "simd_support")] impl UniformSampler for UniformInt> where LaneCount: SupportedLaneCount, Simd<$unsigned, LANES>: WideningMultiply, Simd<$unsigned, LANES>)>, Standard: Distribution>, { type X = Simd<$ty, LANES>; #[inline] // if the range is constant, this helps LLVM to do the // calculations at compile-time. fn new(low_b: B1, high_b: B2) -> Result where B1: SampleBorrow + Sized, B2: SampleBorrow + Sized { let low = *low_b.borrow(); let high = *high_b.borrow(); if !(low.simd_lt(high).all()) { return Err(Error::EmptyRange); } UniformSampler::new_inclusive(low, high - Simd::splat(1)) } #[inline] // if the range is constant, this helps LLVM to do the // calculations at compile-time. fn new_inclusive(low_b: B1, high_b: B2) -> Result where B1: SampleBorrow + Sized, B2: SampleBorrow + Sized { let low = *low_b.borrow(); let high = *high_b.borrow(); if !(low.simd_le(high).all()) { return Err(Error::EmptyRange); } // NOTE: all `Simd` operations are inherently wrapping, // see https://doc.rust-lang.org/std/simd/struct.Simd.html let range: Simd<$unsigned, LANES> = ((high - low) + Simd::splat(1)).cast(); // We must avoid divide-by-zero by using 0 % 1 == 0. let not_full_range = range.simd_gt(Simd::splat(0)); let modulo = not_full_range.select(range, Simd::splat(1)); let ints_to_reject = range.wrapping_neg() % modulo; Ok(UniformInt { low, // These are really $unsigned values, but store as $ty: range: range.cast(), thresh: ints_to_reject.cast(), }) } fn sample(&self, rng: &mut R) -> Self::X { let range: Simd<$unsigned, LANES> = self.range.cast(); let thresh: Simd<$unsigned, LANES> = self.thresh.cast(); // This might seem very slow, generating a whole new // SIMD vector for every sample rejection. For most uses // though, the chance of rejection is small and provides good // general performance. With multiple lanes, that chance is // multiplied. To mitigate this, we replace only the lanes of // the vector which fail, iteratively reducing the chance of // rejection. The replacement method does however add a little // overhead. Benchmarking or calculating probabilities might // reveal contexts where this replacement method is slower. let mut v: Simd<$unsigned, LANES> = rng.random(); loop { let (hi, lo) = v.wmul(range); let mask = lo.simd_ge(thresh); if mask.all() { let hi: Simd<$ty, LANES> = hi.cast(); // wrapping addition let result = self.low + hi; // `select` here compiles to a blend operation // When `range.eq(0).none()` the compare and blend // operations are avoided. let v: Simd<$ty, LANES> = v.cast(); return range.simd_gt(Simd::splat(0)).select(result, v); } // Replace only the failing lanes v = mask.select(v, rng.random()); } } } }; // bulk implementation ($(($unsigned:ident, $signed:ident)),+) => { $( uniform_simd_int_impl!($unsigned, $unsigned); uniform_simd_int_impl!($signed, $unsigned); )+ }; } #[cfg(feature = "simd_support")] uniform_simd_int_impl! { (u8, i8), (u16, i16), (u32, i32), (u64, i64) } #[cfg(test)] mod tests { use super::*; use crate::distr::Uniform; #[test] fn test_uniform_bad_limits_equal_int() { assert_eq!(Uniform::new(10, 10), Err(Error::EmptyRange)); } #[test] fn test_uniform_good_limits_equal_int() { let mut rng = crate::test::rng(804); let dist = Uniform::new_inclusive(10, 10).unwrap(); for _ in 0..20 { assert_eq!(rng.sample(dist), 10); } } #[test] fn test_uniform_bad_limits_flipped_int() { assert_eq!(Uniform::new(10, 5), Err(Error::EmptyRange)); } #[test] #[cfg_attr(miri, ignore)] // Miri is too slow fn test_integers() { let mut rng = crate::test::rng(251); macro_rules! t { ($ty:ident, $v:expr, $le:expr, $lt:expr) => {{ for &(low, high) in $v.iter() { let my_uniform = Uniform::new(low, high).unwrap(); for _ in 0..1000 { let v: $ty = rng.sample(my_uniform); assert!($le(low, v) && $lt(v, high)); } let my_uniform = Uniform::new_inclusive(low, high).unwrap(); for _ in 0..1000 { let v: $ty = rng.sample(my_uniform); assert!($le(low, v) && $le(v, high)); } let my_uniform = Uniform::new(&low, high).unwrap(); for _ in 0..1000 { let v: $ty = rng.sample(my_uniform); assert!($le(low, v) && $lt(v, high)); } let my_uniform = Uniform::new_inclusive(&low, &high).unwrap(); for _ in 0..1000 { let v: $ty = rng.sample(my_uniform); assert!($le(low, v) && $le(v, high)); } for _ in 0..1000 { let v = <$ty as SampleUniform>::Sampler::sample_single(low, high, &mut rng).unwrap(); assert!($le(low, v) && $lt(v, high)); } for _ in 0..1000 { let v = <$ty as SampleUniform>::Sampler::sample_single_inclusive(low, high, &mut rng).unwrap(); assert!($le(low, v) && $le(v, high)); } } }}; // scalar bulk ($($ty:ident),*) => {{ $(t!( $ty, [(0, 10), (10, 127), ($ty::MIN, $ty::MAX)], |x, y| x <= y, |x, y| x < y );)* }}; // simd bulk ($($ty:ident),* => $scalar:ident) => {{ $(t!( $ty, [ ($ty::splat(0), $ty::splat(10)), ($ty::splat(10), $ty::splat(127)), ($ty::splat($scalar::MIN), $ty::splat($scalar::MAX)), ], |x: $ty, y| x.simd_le(y).all(), |x: $ty, y| x.simd_lt(y).all() );)* }}; } t!(i8, i16, i32, i64, isize, u8, u16, u32, u64, usize, i128, u128); #[cfg(feature = "simd_support")] { t!(u8x4, u8x8, u8x16, u8x32, u8x64 => u8); t!(i8x4, i8x8, i8x16, i8x32, i8x64 => i8); t!(u16x2, u16x4, u16x8, u16x16, u16x32 => u16); t!(i16x2, i16x4, i16x8, i16x16, i16x32 => i16); t!(u32x2, u32x4, u32x8, u32x16 => u32); t!(i32x2, i32x4, i32x8, i32x16 => i32); t!(u64x2, u64x4, u64x8 => u64); t!(i64x2, i64x4, i64x8 => i64); } } #[test] fn test_uniform_from_std_range() { let r = Uniform::try_from(2u32..7).unwrap(); assert_eq!(r.0.low, 2); assert_eq!(r.0.range, 5); } #[test] fn test_uniform_from_std_range_bad_limits() { #![allow(clippy::reversed_empty_ranges)] assert!(Uniform::try_from(100..10).is_err()); assert!(Uniform::try_from(100..100).is_err()); } #[test] fn test_uniform_from_std_range_inclusive() { let r = Uniform::try_from(2u32..=6).unwrap(); assert_eq!(r.0.low, 2); assert_eq!(r.0.range, 5); } #[test] fn test_uniform_from_std_range_inclusive_bad_limits() { #![allow(clippy::reversed_empty_ranges)] assert!(Uniform::try_from(100..=10).is_err()); assert!(Uniform::try_from(100..=99).is_err()); } }