Merge #15

15: Implement Stein's algorithm for gcd r=cuviper a=Emerentius This implements Stein's algorithm for bigints. Asymptotically this has the same runtime complexity as the euclidean algorithm but it's faster because it avoids division in favor of bitshifts and subtractions. There are faster algorithms for large bigints. For small ones, [gmp uses the binary gcd too](https://gmplib.org/manual/Binary-GCD.html). I've run some benchmarks with the code in [this repo](https://github.com/Emerentius/bigint_gcd_bench) This iterates through the sizes of 1-10 `BigDigit`s and generates 300 uniformly distributed random bigints at each size and computes the gcd for each combination with both Euclid's and Stein's algorithm. I'm only looking at combinations of numbers with the same number of `BigDigit`s The speed gains are sizeable. See the benchmark results below. I'm running this on an ultrabook with a 15W CPU (i5 4210u). Performance may differ on different architectures, in particular if there is no intrinsic for counting trailing zeroes. Please run the benchmark on your machine. It's just a simple ``` git clone https://github.com/Emerentius/bigint_gcd_bench cargo run --release ``` ``` 2^32n bits euclidean gcd binary gcd speedup n: 1 => 0.3050s 0.0728s 4.19 n: 2 => 0.6228s 0.1453s 4.29 n: 3 => 0.9618s 0.2214s 4.34 n: 4 => 1.3021s 0.3028s 4.30 n: 5 => 1.6469s 0.3875s 4.25 n: 6 => 2.0017s 0.4759s 4.21 n: 7 => 2.3636s 0.5667s 4.17 n: 8 => 2.7284s 0.6418s 4.25 n: 9 => 3.0712s 0.7302s 4.21 n: 10 => 3.4822s 0.8223s 4.23 ``` The guys at gmp say these algorithms are quadratic in N, I'm not sure why they seem almost linear here.
2018-02-08 06:27:38 +00:00 · 2018-02-08 06:27:38 +00:00 · 07a2d6adbe
commit 07a2d6adbe
parent c0ab6c3c6a e45b2b7d0c
6 changed files with 136 additions and 13 deletions
--- a/Cargo.toml
+++ b/Cargo.toml
@ -14,6 +14,9 @@ readme = "README.md"
 [[bench]]
 name = "bigint"

+[[bench]]
+name = "gcd"
+
 [[bench]]
 harness = false
 name = "shootout-pidigits"
--- a/benches/gcd.rs
+++ b/benches/gcd.rs
@ -0,0 +1,84 @@
+#![feature(test)]
+
+extern crate test;
+extern crate num_bigint;
+extern crate num_integer;
+extern crate num_traits;
+extern crate rand;
+
+use test::Bencher;
+use num_bigint::{BigUint, RandBigInt};
+use num_integer::Integer;
+use num_traits::Zero;
+use rand::{SeedableRng, StdRng};
+
+fn get_rng() -> StdRng {
+    let seed: &[_] = &[1, 2, 3, 4];
+    SeedableRng::from_seed(seed)
+}
+
+fn bench(b: &mut Bencher, bits: usize, gcd: fn(&BigUint, &BigUint) -> BigUint) {
+    let mut rng = get_rng();
+    let x = rng.gen_biguint(bits);
+    let y = rng.gen_biguint(bits);
+
+    assert_eq!(euclid(&x, &y), x.gcd(&y));
+
+    b.iter(|| gcd(&x, &y));
+}
+
+
+fn euclid(x: &BigUint, y: &BigUint) -> BigUint {
+    // Use Euclid's algorithm
+    let mut m = x.clone();
+    let mut n = y.clone();
+    while !m.is_zero() {
+        let temp = m;
+        m = n % &temp;
+        n = temp;
+    }
+    return n;
+}
+
+#[bench]
+fn gcd_euclid_0064(b: &mut Bencher) {
+    bench(b, 64, euclid);
+}
+
+#[bench]
+fn gcd_euclid_0256(b: &mut Bencher) {
+    bench(b, 256, euclid);
+}
+
+#[bench]
+fn gcd_euclid_1024(b: &mut Bencher) {
+    bench(b, 1024, euclid);
+}
+
+#[bench]
+fn gcd_euclid_4096(b: &mut Bencher) {
+    bench(b, 4096, euclid);
+}
+
+
+// Integer for BigUint now uses Stein for gcd
+
+#[bench]
+fn gcd_stein_0064(b: &mut Bencher) {
+    bench(b, 64, BigUint::gcd);
+}
+
+#[bench]
+fn gcd_stein_0256(b: &mut Bencher) {
+    bench(b, 256, BigUint::gcd);
+}
+
+#[bench]
+fn gcd_stein_1024(b: &mut Bencher) {
+    bench(b, 1024, BigUint::gcd);
+}
+
+#[bench]
+fn gcd_stein_4096(b: &mut Bencher) {
+    bench(b, 4096, BigUint::gcd);
+}
--- a/src/algorithms.rs
+++ b/src/algorithms.rs
@ -591,9 +591,12 @@ pub fn biguint_shr(n: Cow<BigUint>, bits: usize) -> BigUint {
    if n_unit >= n.data.len() {
        return Zero::zero();
    }
-    let mut data = match n_unit {
-        0 => n.into_owned().data,
-        _ => n.data[n_unit..].to_vec(),
+    let mut data = match n {
+        Cow::Borrowed(n) => n.data[n_unit..].to_vec(),
+        Cow::Owned(mut n) => {
+            n.data.drain(..n_unit);
+            n.data
+        }
    };

    let n_bits = bits % big_digit::BITS;
--- a/src/bigint.rs
+++ b/src/bigint.rs
@ -4,6 +4,7 @@ use std::str::{self, FromStr};
 use std::fmt;
 use std::cmp::Ordering::{self, Less, Greater, Equal};
 use std::{i64, u64};
+#[allow(unused)]
 use std::ascii::AsciiExt;

 #[cfg(feature = "serde")]
--- a/src/biguint.rs
+++ b/src/biguint.rs
@ -7,9 +7,11 @@ use std::ops::{Add, BitAnd, BitOr, BitXor, Div, Mul, Neg, Rem, Shl, Shr, Sub,
 use std::str::{self, FromStr};
 use std::fmt;
 use std::cmp;
+use std::mem;
 use std::cmp::Ordering::{self, Less, Greater, Equal};
 use std::{f32, f64};
 use std::{u8, u64};
+#[allow(unused)]
 use std::ascii::AsciiExt;

 #[cfg(feature = "serde")]
@ -396,7 +398,8 @@ impl<'a> Shr<usize> for &'a BigUint {
 impl ShrAssign<usize> for BigUint {
    #[inline]
    fn shr_assign(&mut self, rhs: usize) {
-        *self = biguint_shr(Cow::Borrowed(&*self), rhs);
+        let n = mem::replace(self, BigUint::zero());
+        *self = n >> rhs;
    }
 }

@ -940,16 +943,34 @@ impl Integer for BigUint {
    ///
    /// The result is always positive.
    #[inline]
-    fn gcd(&self, other: &BigUint) -> BigUint {
-        // Use Euclid's algorithm
-        let mut m = (*self).clone();
-        let mut n = (*other).clone();
-        while !m.is_zero() {
-            let temp = m;
-            m = n % &temp;
-            n = temp;
+    fn gcd(&self, other: &Self) -> Self {
+        // Stein's algorithm
+        if self.is_zero() {
+            return other.clone();
        }
-        return n;
+        if other.is_zero() {
+            return self.clone();
+        }
+        let mut m = self.clone();
+        let mut n = other.clone();
+
+        // find common factors of 2
+        let shift = cmp::min(
+            n.trailing_zeros(),
+            m.trailing_zeros()
+        );
+
+        // divide m and n by 2 until odd
+        // m inside loop
+        n >>= n.trailing_zeros();
+
+        while !m.is_zero() {
+            m >>= m.trailing_zeros();
+            if n > m { mem::swap(&mut n, &mut m) }
+            m -= &n;
+        }
+
+        n << shift
    }

    /// Calculates the Lowest Common Multiple (LCM) of the number and `other`.
@ -1607,6 +1628,16 @@ impl BigUint {
        return self.data.len() * big_digit::BITS - zeros as usize;
    }

+    // self is assumed to be normalized
+    fn trailing_zeros(&self) -> usize {
+        self.data
+            .iter()
+            .enumerate()
+            .find(|&(_, &digit)| digit != 0)
+            .map(|(i, digit)| i * big_digit::BITS + digit.trailing_zeros() as usize)
+            .unwrap_or(0)
+    }
+
    /// Strips off trailing zero bigdigits - comparisons require the last element in the vector to
    /// be nonzero.
    #[inline]
--- a/src/tests/biguint.rs
+++ b/src/tests/biguint.rs
@ -1569,6 +1569,7 @@ fn test_from_str_radix() {

 #[test]
 fn test_all_str_radix() {
+    #[allow(unused)]
    use std::ascii::AsciiExt;

    let n = BigUint::new((0..10).collect());