From 22ff3f918de1b98d7bf2578dd6f38f9e4fc860b6 Mon Sep 17 00:00:00 2001 From: Josh Stone Date: Tue, 15 Dec 2015 21:59:51 -0800 Subject: [PATCH] bigint: improve from_str_radix performance Before: test from_str_radix_02 ... bench: 8,432 ns/iter (+/- 280) test from_str_radix_08 ... bench: 7,397 ns/iter (+/- 95) test from_str_radix_10 ... bench: 7,344 ns/iter (+/- 142) test from_str_radix_16 ... bench: 6,753 ns/iter (+/- 157) test from_str_radix_36 ... bench: 7,093 ns/iter (+/- 60) After: test from_str_radix_02 ... bench: 3,295 ns/iter (+/- 81) test from_str_radix_08 ... bench: 1,377 ns/iter (+/- 56) test from_str_radix_10 ... bench: 1,583 ns/iter (+/- 16) test from_str_radix_16 ... bench: 1,483 ns/iter (+/- 53) test from_str_radix_36 ... bench: 1,628 ns/iter (+/- 27) --- src/bigint.rs | 154 ++++++++++++++++++++++++++++++++++++++------------ 1 file changed, 119 insertions(+), 35 deletions(-) diff --git a/src/bigint.rs b/src/bigint.rs index 81ab886..fce68e0 100644 --- a/src/bigint.rs +++ b/src/bigint.rs @@ -66,12 +66,11 @@ use std::iter::repeat; use std::num::ParseIntError; use std::ops::{Add, BitAnd, BitOr, BitXor, Div, Mul, Neg, Rem, Shl, Shr, Sub}; use std::str::{self, FromStr}; -use std::{cmp, fmt, hash}; +use std::{fmt, hash}; use std::cmp::Ordering::{self, Less, Greater, Equal}; use std::{i64, u64}; use rand::Rng; -use rustc_serialize::hex::ToHex; use traits::{ToPrimitive, FromPrimitive}; @@ -254,41 +253,122 @@ impl FromStr for BigUint { } } +// Read bitwise digits that evenly divide BigDigit +fn from_bitwise_digits_le(v: &[u8], bits: usize) -> BigUint { + debug_assert!(!v.is_empty() && bits <= 8 && big_digit::BITS % bits == 0); + debug_assert!(v.iter().all(|&c| (c as BigDigit) < (1 << bits))); + + let digits_per_big_digit = big_digit::BITS / bits; + + let data = v.chunks(digits_per_big_digit).map(|chunk| { + chunk.iter().rev().fold(0u32, |acc, &c| (acc << bits) | c as BigDigit) + }).collect(); + + BigUint::new(data) +} + +// Read bitwise digits that don't evenly divide BigDigit +fn from_inexact_bitwise_digits_le(v: &[u8], bits: usize) -> BigUint { + debug_assert!(!v.is_empty() && bits <= 8 && big_digit::BITS % bits != 0); + debug_assert!(v.iter().all(|&c| (c as BigDigit) < (1 << bits))); + + let big_digits = (v.len() * bits + big_digit::BITS - 1) / big_digit::BITS; + let mut data = Vec::with_capacity(big_digits); + + let mut d = 0; + let mut dbits = 0; + for &c in v { + d |= (c as DoubleBigDigit) << dbits; + dbits += bits; + if dbits >= big_digit::BITS { + let (hi, lo) = big_digit::from_doublebigdigit(d); + data.push(lo); + d = hi as DoubleBigDigit; + dbits -= big_digit::BITS; + } + } + + if dbits > 0 { + debug_assert!(dbits < big_digit::BITS); + data.push(d as BigDigit); + } + + BigUint::new(data) +} + +// Read little-endian radix digits +fn from_radix_digits_be(v: &[u8], radix: u32) -> BigUint { + debug_assert!(!v.is_empty() && !radix.is_power_of_two()); + debug_assert!(v.iter().all(|&c| (c as u32) < radix)); + + let (base, power) = get_radix_base(radix); + debug_assert!(base < (1 << 32)); + let base = base as BigDigit; + + let r = v.len() % power; + let i = if r == 0 { power } else { r }; + let (head, tail) = v.split_at(i); + + let first = head.iter().fold(0, |acc, &d| acc * radix + d as BigDigit); + let mut data = vec![first]; + + debug_assert!(tail.len() % power == 0); + for chunk in tail.chunks(power) { + let mut carry = 0; + data.push(0); + for d in data.iter_mut() { + *d = mac_with_carry(0, *d, base, &mut carry); + } + debug_assert!(carry == 0); + + let n = chunk.iter().fold(0, |acc, &d| acc * radix + d as BigDigit); + add2(&mut data, &[n]); + + if let Some(&0) = data.last() { + data.pop(); + } + } + + BigUint::new(data) +} + impl Num for BigUint { type FromStrRadixErr = ParseBigIntError; /// Creates and initializes a `BigUint`. - #[inline] fn from_str_radix(s: &str, radix: u32) -> Result { - let (base, unit_len) = get_radix_base(radix); - let base_num = match base.to_biguint() { - Some(base_num) => base_num, - None => { return Err(ParseBigIntError::Other); } - }; - - let mut end = s.len(); - let mut n: BigUint = Zero::zero(); - let mut power: BigUint = One::one(); - loop { - let start = cmp::max(end, unit_len) - unit_len; - let d = try!(usize::from_str_radix(&s[start .. end], radix)); - let d: Option = FromPrimitive::from_usize(d); - match d { - Some(d) => { - // FIXME(#5992): assignment operator overloads - // n += d * &power; - n = n + d * &power; - } - None => { return Err(ParseBigIntError::Other); } - } - if end <= unit_len { - return Ok(n); - } - end -= unit_len; - // FIXME(#5992): assignment operator overloads - // power *= &base_num; - power = power * &base_num; + assert!(2 <= radix && radix <= 36, "The radix must be within 2...36"); + if s.is_empty() { + // create ParseIntError + try!(u64::from_str_radix(s, radix)); + unreachable!(); } + + // First normalize all characters to plain digit values + let mut v = Vec::with_capacity(s.len()); + for (i, c) in s.chars().enumerate() { + if let Some(d) = c.to_digit(radix) { + v.push(d as u8); + } else { + // create ParseIntError + try!(u64::from_str_radix(&s[i..], radix)); + unreachable!(); + } + } + + let res = if radix.is_power_of_two() { + // Powers of two can use bitwise masks and shifting instead of multiplication + let bits = radix.trailing_zeros() as usize; + v.reverse(); + if big_digit::BITS % bits == 0 { + from_bitwise_digits_le(&v, bits) + } else { + from_inexact_bitwise_digits_le(&v, bits) + } + } else { + from_radix_digits_be(&v, radix) + }; + Ok(res) } } @@ -1351,7 +1431,9 @@ impl BigUint { if bytes.is_empty() { Zero::zero() } else { - BigUint::parse_bytes(bytes.to_hex().as_bytes(), 16).unwrap() + let mut v = bytes.to_vec(); + v.reverse(); + BigUint::from_bytes_le(&*v) } } @@ -1360,9 +1442,11 @@ impl BigUint { /// The bytes are in little-endian byte order. #[inline] pub fn from_bytes_le(bytes: &[u8]) -> BigUint { - let mut v = bytes.to_vec(); - v.reverse(); - BigUint::from_bytes_be(&*v) + if bytes.is_empty() { + Zero::zero() + } else { + from_bitwise_digits_le(bytes, 8) + } } /// Returns the byte representation of the `BigUint` in little-endian byte order.