Merge branch 'master' into no-std-fix

2021-09-10 21:49:28 +02:00 · 2021-09-10 21:49:28 +02:00 · 2bddede4b7
commit 2bddede4b7
parent f7f9287071 0932a58271
8 changed files with 401 additions and 23 deletions
--- a/.github/workflows/test.yml
+++ b/.github/workflows/test.yml
@ -83,9 +83,16 @@ jobs:
          cargo test --target ${{ matrix.target }} --lib --tests --no-default-features
          cargo build --target ${{ matrix.target }} --no-default-features --features alloc,getrandom,small_rng
          cargo test --target ${{ matrix.target }} --lib --tests --no-default-features --features=alloc,getrandom,small_rng
-          # all stable features:
-          cargo test --target ${{ matrix.target }} --features=serde1,log,small_rng,min_const_gen
          cargo test --target ${{ matrix.target }} --examples
+      - name: Test rand (all stable features, non-MSRV)
+        if: ${{ matrix.toolchain != '1.36.0' }}
+        run: |
+          cargo test --target ${{ matrix.target }} --features=serde1,log,small_rng,min_const_gen
+      - name: Test rand (all stable features, MSRV)
+        if: ${{ matrix.toolchain == '1.36.0' }}
+        run: |
+          # const generics are not stable on 1.36.0
+          cargo test --target ${{ matrix.target }} --features=serde1,log,small_rng
      - name: Test rand_core
        run: |
          cargo test --target ${{ matrix.target }} --manifest-path rand_core/Cargo.toml
--- a/Cargo.toml
+++ b/Cargo.toml
@ -71,7 +71,7 @@ rand_chacha = { path = "rand_chacha", version = "0.3.0", default-features = fals
 [dependencies.packed_simd]
 # NOTE: so far no version works reliably due to dependence on unstable features
 package = "packed_simd_2"
-version = "0.3.5"
+version = "0.3.6"
 optional = true
 features = ["into_bits"]

--- a/rand_distr/CHANGELOG.md
+++ b/rand_distr/CHANGELOG.md
@ -6,6 +6,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0

 ## Unreleased
 - New `Zeta` and `Zipf` distributions (#1136)
+- New `SkewNormal` distribution (#1149)
 - New `Gumbel` and `Frechet` distributions (#1168, #1171)

 ## [0.4.1] - 2021-06-15
--- a/rand_distr/Cargo.toml
+++ b/rand_distr/Cargo.toml
@ -33,3 +33,5 @@ rand_pcg = { version = "0.3.0", path = "../rand_pcg" }
 rand = { path = "..", version = "0.8.0", default-features = false, features = ["std_rng", "std", "small_rng"] }
 # Histogram implementation for testing uniformity
 average = { version = "0.13", features = [ "std" ] }
+# Special functions for testing distributions
+special = "0.8.1"
--- a/rand_distr/benches/src/distributions.rs
+++ b/rand_distr/benches/src/distributions.rs
@ -141,6 +141,13 @@ fn bench(c: &mut Criterion<CyclesPerByte>) {
    });
    }

+    {
+    let mut g = c.benchmark_group("skew_normal");
+    distr_float!(g, "shape_zero", f64, SkewNormal::new(0.0, 1.0, 0.0).unwrap());
+    distr_float!(g, "shape_positive", f64, SkewNormal::new(0.0, 1.0, 100.0).unwrap());
+    distr_float!(g, "shape_negative", f64, SkewNormal::new(0.0, 1.0, -100.0).unwrap());
+    }
+
    {
    let mut g = c.benchmark_group("gamma");
    distr_float!(g, "gamma_large_shape", f64, Gamma::new(10., 1.0).unwrap());
--- a/rand_distr/src/lib.rs
+++ b/rand_distr/src/lib.rs
@ -43,6 +43,7 @@
 //! - Related to real-valued quantities that grow linearly
 //!   (e.g. errors, offsets):
 //!   - [`Normal`] distribution, and [`StandardNormal`] as a primitive
+//!   - [`SkewNormal`] distribution
 //!   - [`Cauchy`] distribution
 //! - Related to Bernoulli trials (yes/no events, with a given probability):
 //!   - [`Binomial`] distribution
@ -109,19 +110,22 @@ pub use self::gamma::{
 pub use self::geometric::{Error as GeoError, Geometric, StandardGeometric};
 pub use self::gumbel::{Error as GumbelError, Gumbel};
 pub use self::hypergeometric::{Error as HyperGeoError, Hypergeometric};
-pub use self::inverse_gaussian::{InverseGaussian, Error as InverseGaussianError};
+pub use self::inverse_gaussian::{Error as InverseGaussianError, InverseGaussian};
 pub use self::normal::{Error as NormalError, LogNormal, Normal, StandardNormal};
-pub use self::normal_inverse_gaussian::{NormalInverseGaussian, Error as NormalInverseGaussianError};
+pub use self::normal_inverse_gaussian::{
+    Error as NormalInverseGaussianError, NormalInverseGaussian,
+};
 pub use self::pareto::{Error as ParetoError, Pareto};
 pub use self::pert::{Pert, PertError};
 pub use self::poisson::{Error as PoissonError, Poisson};
+pub use self::skew_normal::{Error as SkewNormalError, SkewNormal};
 pub use self::triangular::{Triangular, TriangularError};
 pub use self::unit_ball::UnitBall;
 pub use self::unit_circle::UnitCircle;
 pub use self::unit_disc::UnitDisc;
 pub use self::unit_sphere::UnitSphere;
 pub use self::weibull::{Error as WeibullError, Weibull};
-pub use self::zipf::{ZetaError, Zeta, ZipfError, Zipf};
+pub use self::zipf::{Zeta, ZetaError, Zipf, ZipfError};
 #[cfg(feature = "alloc")]
 #[cfg_attr(doc_cfg, doc(cfg(feature = "alloc")))]
 pub use rand::distributions::{WeightedError, WeightedIndex};
@ -199,6 +203,7 @@ mod normal_inverse_gaussian;
 mod pareto;
 mod pert;
 mod poisson;
+mod skew_normal;
 mod triangular;
 mod unit_ball;
 mod unit_circle;
--- a/rand_distr/src/skew_normal.rs
+++ b/rand_distr/src/skew_normal.rs
@ -0,0 +1,259 @@
+// Copyright 2021 Developers of the Rand project.
+//
+// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
+// https://www.apache.org/licenses/LICENSE-2.0> or the MIT license
+// <LICENSE-MIT or https://opensource.org/licenses/MIT>, at your
+// option. This file may not be copied, modified, or distributed
+// except according to those terms.
+
+//! The Skew Normal distribution.
+
+use crate::{Distribution, StandardNormal};
+use core::fmt;
+use num_traits::Float;
+use rand::Rng;
+
+/// The [skew normal distribution] `SN(location, scale, shape)`.
+///
+/// The skew normal distribution is a generalization of the
+/// [`Normal`] distribution to allow for non-zero skewness.
+///
+/// It has the density function, for `scale > 0`,
+/// `f(x) = 2 / scale * phi((x - location) / scale) * Phi(alpha * (x - location) / scale)`
+/// where `phi` and `Phi` are the density and distribution of a standard normal variable.
+///
+/// # Example
+///
+/// ```
+/// use rand_distr::{SkewNormal, Distribution};
+///
+/// // location 2, scale 3, shape 1
+/// let skew_normal = SkewNormal::new(2.0, 3.0, 1.0).unwrap();
+/// let v = skew_normal.sample(&mut rand::thread_rng());
+/// println!("{} is from a SN(2, 3, 1) distribution", v)
+/// ```
+///
+/// # Implementation details
+///
+/// We are using the algorithm from [A Method to Simulate the Skew Normal Distribution].
+///
+/// [skew normal distribution]: https://en.wikipedia.org/wiki/Skew_normal_distribution
+/// [`Normal`]: struct.Normal.html
+/// [A Method to Simulate the Skew Normal Distribution]:
+/// 	Ghorbanzadeh, D. , Jaupi, L. and Durand, P. (2014)
+/// 	[A Method to Simulate the Skew Normal Distribution](https://dx.doi.org/10.4236/am.2014.513201).
+/// 	Applied Mathematics, 5, 2073-2076.
+#[derive(Clone, Copy, Debug)]
+#[cfg_attr(feature = "serde1", derive(serde::Serialize, serde::Deserialize))]
+pub struct SkewNormal<F>
+where
+    F: Float,
+    StandardNormal: Distribution<F>,
+{
+    location: F,
+    scale: F,
+    shape: F,
+}
+
+/// Error type returned from `SkewNormal::new`.
+#[derive(Clone, Copy, Debug, PartialEq, Eq)]
+pub enum Error {
+    /// The scale parameter is not finite or it is less or equal to zero.
+    ScaleTooSmall,
+    /// The shape parameter is not finite.
+    BadShape,
+}
+
+impl fmt::Display for Error {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        f.write_str(match self {
+            Error::ScaleTooSmall => {
+                "scale parameter is either non-finite or it is less or equal to zero in skew normal distribution"
+            }
+            Error::BadShape => "shape parameter is non-finite in skew normal distribution",
+        })
+    }
+}
+
+#[cfg(feature = "std")]
+#[cfg_attr(doc_cfg, doc(cfg(feature = "std")))]
+impl std::error::Error for Error {}
+
+impl<F> SkewNormal<F>
+where
+    F: Float,
+    StandardNormal: Distribution<F>,
+{
+    /// Construct, from location, scale and shape.
+    ///
+    /// Parameters:
+    ///
+    /// -   location (unrestricted)
+    /// -   scale (must be finite and larger than zero)
+    /// -   shape (must be finite)
+    #[inline]
+    pub fn new(location: F, scale: F, shape: F) -> Result<SkewNormal<F>, Error> {
+        if !scale.is_finite() || !(scale > F::zero()) {
+            return Err(Error::ScaleTooSmall);
+        }
+        if !shape.is_finite() {
+            return Err(Error::BadShape);
+        }
+        Ok(SkewNormal {
+            location,
+            scale,
+            shape,
+        })
+    }
+
+    /// Returns the location of the distribution.
+    pub fn location(&self) -> F {
+        self.location
+    }
+
+    /// Returns the scale of the distribution.
+    pub fn scale(&self) -> F {
+        self.scale
+    }
+
+    /// Returns the shape of the distribution.
+    pub fn shape(&self) -> F {
+        self.shape
+    }
+}
+
+impl<F> Distribution<F> for SkewNormal<F>
+where
+    F: Float,
+    StandardNormal: Distribution<F>,
+{
+    fn sample<R: Rng + ?Sized>(&self, rng: &mut R) -> F {
+        let linear_map = |x: F| -> F { x * self.scale + self.location };
+        let u_1: F = rng.sample(StandardNormal);
+        if self.shape == F::zero() {
+            linear_map(u_1)
+        } else {
+            let u_2 = rng.sample(StandardNormal);
+            let (u, v) = (u_1.max(u_2), u_1.min(u_2));
+            if self.shape == -F::one() {
+                linear_map(v)
+            } else if self.shape == F::one() {
+                linear_map(u)
+            } else {
+                let normalized = ((F::one() + self.shape) * u + (F::one() - self.shape) * v)
+                    / ((F::one() + self.shape * self.shape).sqrt()
+                        * F::from(core::f64::consts::SQRT_2).unwrap());
+                linear_map(normalized)
+            }
+        }
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    fn test_samples<F: Float + core::fmt::Debug, D: Distribution<F>>(
+        distr: D, zero: F, expected: &[F],
+    ) {
+        let mut rng = crate::test::rng(213);
+        let mut buf = [zero; 4];
+        for x in &mut buf {
+            *x = rng.sample(&distr);
+        }
+        assert_eq!(buf, expected);
+    }
+
+    #[test]
+    #[should_panic]
+    fn invalid_scale_nan() {
+        SkewNormal::new(0.0, core::f64::NAN, 0.0).unwrap();
+    }
+
+    #[test]
+    #[should_panic]
+    fn invalid_scale_zero() {
+        SkewNormal::new(0.0, 0.0, 0.0).unwrap();
+    }
+
+    #[test]
+    #[should_panic]
+    fn invalid_scale_negative() {
+        SkewNormal::new(0.0, -1.0, 0.0).unwrap();
+    }
+
+    #[test]
+    #[should_panic]
+    fn invalid_scale_infinite() {
+        SkewNormal::new(0.0, core::f64::INFINITY, 0.0).unwrap();
+    }
+
+    #[test]
+    #[should_panic]
+    fn invalid_shape_nan() {
+        SkewNormal::new(0.0, 1.0, core::f64::NAN).unwrap();
+    }
+
+    #[test]
+    #[should_panic]
+    fn invalid_shape_infinite() {
+        SkewNormal::new(0.0, 1.0, core::f64::INFINITY).unwrap();
+    }
+
+    #[test]
+    fn valid_location_nan() {
+        SkewNormal::new(core::f64::NAN, 1.0, 0.0).unwrap();
+    }
+
+    #[test]
+    fn skew_normal_value_stability() {
+        test_samples(
+            SkewNormal::new(0.0, 1.0, 0.0).unwrap(),
+            0f32,
+            &[-0.11844189, 0.781378, 0.06563994, -1.1932899],
+        );
+        test_samples(
+            SkewNormal::new(0.0, 1.0, 0.0).unwrap(),
+            0f64,
+            &[
+                -0.11844188827977231,
+                0.7813779637772346,
+                0.06563993969580051,
+                -1.1932899004186373,
+            ],
+        );
+        test_samples(
+            SkewNormal::new(core::f64::INFINITY, 1.0, 0.0).unwrap(),
+            0f64,
+            &[
+                core::f64::INFINITY,
+                core::f64::INFINITY,
+                core::f64::INFINITY,
+                core::f64::INFINITY,
+            ],
+        );
+        test_samples(
+            SkewNormal::new(core::f64::NEG_INFINITY, 1.0, 0.0).unwrap(),
+            0f64,
+            &[
+                core::f64::NEG_INFINITY,
+                core::f64::NEG_INFINITY,
+                core::f64::NEG_INFINITY,
+                core::f64::NEG_INFINITY,
+            ],
+        );
+    }
+
+    #[test]
+    fn skew_normal_value_location_nan() {
+        let skew_normal = SkewNormal::new(core::f64::NAN, 1.0, 0.0).unwrap();
+        let mut rng = crate::test::rng(213);
+        let mut buf = [0.0; 4];
+        for x in &mut buf {
+            *x = rng.sample(&skew_normal);
+        }
+        for value in buf.iter() {
+            assert!(value.is_nan());
+        }
+    }
+}
--- a/rand_distr/tests/pdf.rs
+++ b/rand_distr/tests/pdf.rs
@ -10,7 +10,7 @@

 use average::Histogram;
 use rand::{Rng, SeedableRng};
-use rand_distr::Normal;
+use rand_distr::{Normal, SkewNormal};

 const HIST_LEN: usize = 100;
 average::define_histogram!(hist, crate::HIST_LEN);
@ -27,19 +27,21 @@ fn normal() {
    const MAX_X: f64 = 5.;

    let dist = Normal::new(MEAN, STD_DEV).unwrap();
-    let mut hist = Histogram100::with_const_width(MIN_X,MAX_X);
+    let mut hist = Histogram100::with_const_width(MIN_X, MAX_X);
    let mut rng = rand::rngs::SmallRng::seed_from_u64(1);

    for _ in 0..N_SAMPLES {
-        let _ = hist.add(rng.sample(dist));  // Ignore out-of-range values
+        let _ = hist.add(rng.sample(dist)); // Ignore out-of-range values
    }

-    println!("Sampled normal distribution:\n{}",
-        sparkline::render_u64_as_string(hist.bins()));
+    println!(
+        "Sampled normal distribution:\n{}",
+        sparkline::render_u64_as_string(hist.bins())
+    );

    fn pdf(x: f64) -> f64 {
-        (-0.5 * ((x - MEAN) / STD_DEV).powi(2)).exp() /
-            (STD_DEV * (2. * core::f64::consts::PI).sqrt())
+        (-0.5 * ((x - MEAN) / STD_DEV).powi(2)).exp()
+            / (STD_DEV * (2. * core::f64::consts::PI).sqrt())
    }

    let mut bin_centers = hist.centers();
@ -48,19 +50,25 @@ fn normal() {
        *e = pdf(bin_centers.next().unwrap());
    }

-    println!("Expected normal distribution:\n{}",
-        sparkline::render_u64_as_string(hist.bins()));
+    println!(
+        "Expected normal distribution:\n{}",
+        sparkline::render_u64_as_string(hist.bins())
+    );

    let mut diff = [0.; HIST_LEN];
    for (i, n) in hist.normalized_bins().enumerate() {
-        let bin = (n as f64) / (N_SAMPLES as f64) ;
+        let bin = (n as f64) / (N_SAMPLES as f64);
        diff[i] = (bin - expected[i]).abs();
    }

-    println!("Difference:\n{}",
-        sparkline::render_f64_as_string(&diff[..]));
-    println!("max diff: {:?}", diff.iter().fold(
-        core::f64::NEG_INFINITY, |a, &b| a.max(b)));
+    println!(
+        "Difference:\n{}",
+        sparkline::render_f64_as_string(&diff[..])
+    );
+    println!(
+        "max diff: {:?}",
+        diff.iter().fold(core::f64::NEG_INFINITY, |a, &b| a.max(b))
+    );

    // Check that the differences are significantly smaller than the expected error.
    let mut expected_error = [0.; HIST_LEN];
@ -74,8 +82,12 @@ fn normal() {
    }
    // TODO: Calculate error from distribution cutoff / normalization

-    println!("max expected_error: {:?}", expected_error.iter().fold(
-        core::f64::NEG_INFINITY, |a, &b| a.max(b)));
+    println!(
+        "max expected_error: {:?}",
+        expected_error
+            .iter()
+            .fold(core::f64::NEG_INFINITY, |a, &b| a.max(b))
+    );
    for (&d, &e) in diff.iter().zip(expected_error.iter()) {
        // Difference larger than 3 standard deviations or cutoff
        let tol = (3. * e).max(1e-4);
@ -83,4 +95,89 @@ fn normal() {
            panic!("Difference = {} * tol", d / tol);
        }
    }
-}
+}
+
+#[test]
+fn skew_normal() {
+    const N_SAMPLES: u64 = 1_000_000;
+    const LOCATION: f64 = 2.;
+    const SCALE: f64 = 0.5;
+    const SHAPE: f64 = -3.0;
+    const MIN_X: f64 = -1.;
+    const MAX_X: f64 = 4.;
+
+    let dist = SkewNormal::new(LOCATION, SCALE, SHAPE).unwrap();
+    let mut hist = Histogram100::with_const_width(MIN_X, MAX_X);
+    let mut rng = rand::rngs::SmallRng::seed_from_u64(1);
+
+    for _ in 0..N_SAMPLES {
+        let _ = hist.add(rng.sample(dist)); // Ignore out-of-range values
+    }
+
+    println!(
+        "Sampled skew normal distribution:\n{}",
+        sparkline::render_u64_as_string(hist.bins())
+    );
+
+    use special::Error;
+    fn pdf(x: f64) -> f64 {
+        let x_normalized = (x - LOCATION) / SCALE;
+        let normal_density_x =
+            (-0.5 * (x_normalized).powi(2)).exp() / (2. * core::f64::consts::PI).sqrt();
+        let normal_distribution_x =
+            0.5 * (1.0 + (SHAPE * x_normalized / core::f64::consts::SQRT_2).error());
+        2.0 / SCALE * normal_density_x * normal_distribution_x
+    }
+
+    let mut bin_centers = hist.centers();
+    let mut expected = [0.; HIST_LEN];
+    for e in &mut expected[..] {
+        *e = pdf(bin_centers.next().unwrap());
+    }
+
+    println!(
+        "Expected skew normal distribution:\n{}",
+        sparkline::render_u64_as_string(hist.bins())
+    );
+
+    let mut diff = [0.; HIST_LEN];
+    for (i, n) in hist.normalized_bins().enumerate() {
+        let bin = (n as f64) / (N_SAMPLES as f64);
+        diff[i] = (bin - expected[i]).abs();
+    }
+
+    println!(
+        "Difference:\n{}",
+        sparkline::render_f64_as_string(&diff[..])
+    );
+    println!(
+        "max diff: {:?}",
+        diff.iter().fold(core::f64::NEG_INFINITY, |a, &b| a.max(b))
+    );
+
+    // Check that the differences are significantly smaller than the expected error.
+    let mut expected_error = [0.; HIST_LEN];
+    // Calculate error from histogram
+    for (err, var) in expected_error.iter_mut().zip(hist.variances()) {
+        *err = var.sqrt() / (N_SAMPLES as f64);
+    }
+    // Normalize error by bin width
+    for (err, width) in expected_error.iter_mut().zip(hist.widths()) {
+        *err /= width;
+    }
+    // TODO: Calculate error from distribution cutoff / normalization
+
+    println!(
+        "max expected_error: {:?}",
+        expected_error
+            .iter()
+            .fold(core::f64::NEG_INFINITY, |a, &b| a.max(b))
+    );
+    for (&d, &e) in diff.iter().zip(expected_error.iter()) {
+        // Difference larger than 3 standard deviations or cutoff
+        let tol = (3. * e).max(1e-4);
+        if d > tol {
+            panic!("Difference = {} * tol", d / tol);
+        }
+    }
+}