15: Implement Stein's algorithm for gcd r=cuviper a=Emerentius

This implements Stein's algorithm for bigints.
Asymptotically this has the same runtime complexity as the euclidean algorithm but it's faster because it avoids division in favor of bitshifts and subtractions.
There are faster algorithms for large bigints. For small ones, [gmp uses the binary gcd too](https://gmplib.org/manual/Binary-GCD.html).

I've run some benchmarks with the code in [this repo](https://github.com/Emerentius/bigint_gcd_bench)
This iterates through the sizes of 1-10 `BigDigit`s and generates 300 uniformly distributed random bigints at each size and computes the gcd for each combination with both Euclid's and Stein's algorithm. I'm only looking at combinations of numbers with the same number of `BigDigit`s

The speed gains are sizeable. See the benchmark results below. I'm running this on an ultrabook with a 15W CPU (i5 4210u). Performance may differ on different architectures, in particular if there is no intrinsic for counting trailing zeroes.

Please run the benchmark on your machine. It's just a simple
```
git clone https://github.com/Emerentius/bigint_gcd_bench
cargo run --release
```

```
2^32n bits	euclidean gcd	binary gcd	speedup
n:  1 =>	0.3050s		0.0728s		4.19
n:  2 =>	0.6228s		0.1453s		4.29
n:  3 =>	0.9618s		0.2214s		4.34
n:  4 =>	1.3021s		0.3028s		4.30
n:  5 =>	1.6469s		0.3875s		4.25
n:  6 =>	2.0017s		0.4759s		4.21
n:  7 =>	2.3636s		0.5667s		4.17
n:  8 =>	2.7284s		0.6418s		4.25
n:  9 =>	3.0712s		0.7302s		4.21
n: 10 =>	3.4822s		0.8223s		4.23
```

The guys at gmp say these algorithms are quadratic in N, I'm not sure why they seem almost linear here.
This commit is contained in:
bors[bot] 2018-02-08 06:27:38 +00:00
commit 07a2d6adbe
6 changed files with 136 additions and 13 deletions

View File

@ -14,6 +14,9 @@ readme = "README.md"
[[bench]]
name = "bigint"
[[bench]]
name = "gcd"
[[bench]]
harness = false
name = "shootout-pidigits"

84
benches/gcd.rs Executable file
View File

@ -0,0 +1,84 @@
#![feature(test)]
extern crate test;
extern crate num_bigint;
extern crate num_integer;
extern crate num_traits;
extern crate rand;
use test::Bencher;
use num_bigint::{BigUint, RandBigInt};
use num_integer::Integer;
use num_traits::Zero;
use rand::{SeedableRng, StdRng};
fn get_rng() -> StdRng {
let seed: &[_] = &[1, 2, 3, 4];
SeedableRng::from_seed(seed)
}
fn bench(b: &mut Bencher, bits: usize, gcd: fn(&BigUint, &BigUint) -> BigUint) {
let mut rng = get_rng();
let x = rng.gen_biguint(bits);
let y = rng.gen_biguint(bits);
assert_eq!(euclid(&x, &y), x.gcd(&y));
b.iter(|| gcd(&x, &y));
}
fn euclid(x: &BigUint, y: &BigUint) -> BigUint {
// Use Euclid's algorithm
let mut m = x.clone();
let mut n = y.clone();
while !m.is_zero() {
let temp = m;
m = n % &temp;
n = temp;
}
return n;
}
#[bench]
fn gcd_euclid_0064(b: &mut Bencher) {
bench(b, 64, euclid);
}
#[bench]
fn gcd_euclid_0256(b: &mut Bencher) {
bench(b, 256, euclid);
}
#[bench]
fn gcd_euclid_1024(b: &mut Bencher) {
bench(b, 1024, euclid);
}
#[bench]
fn gcd_euclid_4096(b: &mut Bencher) {
bench(b, 4096, euclid);
}
// Integer for BigUint now uses Stein for gcd
#[bench]
fn gcd_stein_0064(b: &mut Bencher) {
bench(b, 64, BigUint::gcd);
}
#[bench]
fn gcd_stein_0256(b: &mut Bencher) {
bench(b, 256, BigUint::gcd);
}
#[bench]
fn gcd_stein_1024(b: &mut Bencher) {
bench(b, 1024, BigUint::gcd);
}
#[bench]
fn gcd_stein_4096(b: &mut Bencher) {
bench(b, 4096, BigUint::gcd);
}

View File

@ -591,9 +591,12 @@ pub fn biguint_shr(n: Cow<BigUint>, bits: usize) -> BigUint {
if n_unit >= n.data.len() {
return Zero::zero();
}
let mut data = match n_unit {
0 => n.into_owned().data,
_ => n.data[n_unit..].to_vec(),
let mut data = match n {
Cow::Borrowed(n) => n.data[n_unit..].to_vec(),
Cow::Owned(mut n) => {
n.data.drain(..n_unit);
n.data
}
};
let n_bits = bits % big_digit::BITS;

View File

@ -4,6 +4,7 @@ use std::str::{self, FromStr};
use std::fmt;
use std::cmp::Ordering::{self, Less, Greater, Equal};
use std::{i64, u64};
#[allow(unused)]
use std::ascii::AsciiExt;
#[cfg(feature = "serde")]

View File

@ -7,9 +7,11 @@ use std::ops::{Add, BitAnd, BitOr, BitXor, Div, Mul, Neg, Rem, Shl, Shr, Sub,
use std::str::{self, FromStr};
use std::fmt;
use std::cmp;
use std::mem;
use std::cmp::Ordering::{self, Less, Greater, Equal};
use std::{f32, f64};
use std::{u8, u64};
#[allow(unused)]
use std::ascii::AsciiExt;
#[cfg(feature = "serde")]
@ -396,7 +398,8 @@ impl<'a> Shr<usize> for &'a BigUint {
impl ShrAssign<usize> for BigUint {
#[inline]
fn shr_assign(&mut self, rhs: usize) {
*self = biguint_shr(Cow::Borrowed(&*self), rhs);
let n = mem::replace(self, BigUint::zero());
*self = n >> rhs;
}
}
@ -940,16 +943,34 @@ impl Integer for BigUint {
///
/// The result is always positive.
#[inline]
fn gcd(&self, other: &BigUint) -> BigUint {
// Use Euclid's algorithm
let mut m = (*self).clone();
let mut n = (*other).clone();
while !m.is_zero() {
let temp = m;
m = n % &temp;
n = temp;
fn gcd(&self, other: &Self) -> Self {
// Stein's algorithm
if self.is_zero() {
return other.clone();
}
return n;
if other.is_zero() {
return self.clone();
}
let mut m = self.clone();
let mut n = other.clone();
// find common factors of 2
let shift = cmp::min(
n.trailing_zeros(),
m.trailing_zeros()
);
// divide m and n by 2 until odd
// m inside loop
n >>= n.trailing_zeros();
while !m.is_zero() {
m >>= m.trailing_zeros();
if n > m { mem::swap(&mut n, &mut m) }
m -= &n;
}
n << shift
}
/// Calculates the Lowest Common Multiple (LCM) of the number and `other`.
@ -1607,6 +1628,16 @@ impl BigUint {
return self.data.len() * big_digit::BITS - zeros as usize;
}
// self is assumed to be normalized
fn trailing_zeros(&self) -> usize {
self.data
.iter()
.enumerate()
.find(|&(_, &digit)| digit != 0)
.map(|(i, digit)| i * big_digit::BITS + digit.trailing_zeros() as usize)
.unwrap_or(0)
}
/// Strips off trailing zero bigdigits - comparisons require the last element in the vector to
/// be nonzero.
#[inline]

View File

@ -1569,6 +1569,7 @@ fn test_from_str_radix() {
#[test]
fn test_all_str_radix() {
#[allow(unused)]
use std::ascii::AsciiExt;
let n = BigUint::new((0..10).collect());