227 lines
6.2 KiB
Rust
227 lines
6.2 KiB
Rust
use core::{f32, f64};
|
|
|
|
use super::scalbn;
|
|
|
|
const ZEROINFNAN: i32 = 0x7ff - 0x3ff - 52 - 1;
|
|
|
|
struct Num {
|
|
m: u64,
|
|
e: i32,
|
|
sign: i32,
|
|
}
|
|
|
|
#[inline]
|
|
fn normalize(x: f64) -> Num {
|
|
let x1p63: f64 = f64::from_bits(0x43e0000000000000); // 0x1p63 === 2 ^ 63
|
|
|
|
let mut ix: u64 = x.to_bits();
|
|
let mut e: i32 = (ix >> 52) as i32;
|
|
let sign: i32 = e & 0x800;
|
|
e &= 0x7ff;
|
|
if e == 0 {
|
|
ix = (x * x1p63).to_bits();
|
|
e = (ix >> 52) as i32 & 0x7ff;
|
|
e = if e != 0 { e - 63 } else { 0x800 };
|
|
}
|
|
ix &= (1 << 52) - 1;
|
|
ix |= 1 << 52;
|
|
ix <<= 1;
|
|
e -= 0x3ff + 52 + 1;
|
|
Num { m: ix, e, sign }
|
|
}
|
|
|
|
#[inline]
|
|
fn mul(x: u64, y: u64) -> (u64, u64) {
|
|
let t1: u64;
|
|
let t2: u64;
|
|
let t3: u64;
|
|
let xlo: u64 = x as u32 as u64;
|
|
let xhi: u64 = x >> 32;
|
|
let ylo: u64 = y as u32 as u64;
|
|
let yhi: u64 = y >> 32;
|
|
|
|
t1 = xlo * ylo;
|
|
t2 = xlo * yhi + xhi * ylo;
|
|
t3 = xhi * yhi;
|
|
let lo = t1.wrapping_add(t2 << 32);
|
|
let hi = t3 + (t2 >> 32) + (t1 > lo) as u64;
|
|
(hi, lo)
|
|
}
|
|
|
|
/// Floating multiply add (f64)
|
|
///
|
|
/// Computes `(x*y)+z`, rounded as one ternary operation:
|
|
/// Computes the value (as if) to infinite precision and rounds once to the result format,
|
|
/// according to the rounding mode characterized by the value of FLT_ROUNDS.
|
|
#[inline]
|
|
#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
|
|
pub fn fma(x: f64, y: f64, z: f64) -> f64 {
|
|
let x1p63: f64 = f64::from_bits(0x43e0000000000000); // 0x1p63 === 2 ^ 63
|
|
let x0_ffffff8p_63 = f64::from_bits(0x3bfffffff0000000); // 0x0.ffffff8p-63
|
|
|
|
/* normalize so top 10bits and last bit are 0 */
|
|
let nx = normalize(x);
|
|
let ny = normalize(y);
|
|
let nz = normalize(z);
|
|
|
|
if nx.e >= ZEROINFNAN || ny.e >= ZEROINFNAN {
|
|
return x * y + z;
|
|
}
|
|
if nz.e >= ZEROINFNAN {
|
|
if nz.e > ZEROINFNAN {
|
|
/* z==0 */
|
|
return x * y + z;
|
|
}
|
|
return z;
|
|
}
|
|
|
|
/* mul: r = x*y */
|
|
let zhi: u64;
|
|
let zlo: u64;
|
|
let (mut rhi, mut rlo) = mul(nx.m, ny.m);
|
|
/* either top 20 or 21 bits of rhi and last 2 bits of rlo are 0 */
|
|
|
|
/* align exponents */
|
|
let mut e: i32 = nx.e + ny.e;
|
|
let mut d: i32 = nz.e - e;
|
|
/* shift bits z<<=kz, r>>=kr, so kz+kr == d, set e = e+kr (== ez-kz) */
|
|
if d > 0 {
|
|
if d < 64 {
|
|
zlo = nz.m << d;
|
|
zhi = nz.m >> (64 - d);
|
|
} else {
|
|
zlo = 0;
|
|
zhi = nz.m;
|
|
e = nz.e - 64;
|
|
d -= 64;
|
|
if d == 0 {
|
|
} else if d < 64 {
|
|
rlo = rhi << (64 - d) | rlo >> d | ((rlo << (64 - d)) != 0) as u64;
|
|
rhi = rhi >> d;
|
|
} else {
|
|
rlo = 1;
|
|
rhi = 0;
|
|
}
|
|
}
|
|
} else {
|
|
zhi = 0;
|
|
d = -d;
|
|
if d == 0 {
|
|
zlo = nz.m;
|
|
} else if d < 64 {
|
|
zlo = nz.m >> d | ((nz.m << (64 - d)) != 0) as u64;
|
|
} else {
|
|
zlo = 1;
|
|
}
|
|
}
|
|
|
|
/* add */
|
|
let mut sign: i32 = nx.sign ^ ny.sign;
|
|
let samesign: bool = (sign ^ nz.sign) == 0;
|
|
let mut nonzero: i32 = 1;
|
|
if samesign {
|
|
/* r += z */
|
|
rlo = rlo.wrapping_add(zlo);
|
|
rhi += zhi + (rlo < zlo) as u64;
|
|
} else {
|
|
/* r -= z */
|
|
let t = rlo;
|
|
rlo -= zlo;
|
|
rhi = rhi - zhi - (t < rlo) as u64;
|
|
if (rhi >> 63) != 0 {
|
|
rlo = (-(rlo as i64)) as u64;
|
|
rhi = (-(rhi as i64)) as u64 - (rlo != 0) as u64;
|
|
sign = (sign == 0) as i32;
|
|
}
|
|
nonzero = (rhi != 0) as i32;
|
|
}
|
|
|
|
/* set rhi to top 63bit of the result (last bit is sticky) */
|
|
if nonzero != 0 {
|
|
e += 64;
|
|
d = rhi.leading_zeros() as i32 - 1;
|
|
/* note: d > 0 */
|
|
rhi = rhi << d | rlo >> (64 - d) | ((rlo << d) != 0) as u64;
|
|
} else if rlo != 0 {
|
|
d = rlo.leading_zeros() as i32 - 1;
|
|
if d < 0 {
|
|
rhi = rlo >> 1 | (rlo & 1);
|
|
} else {
|
|
rhi = rlo << d;
|
|
}
|
|
} else {
|
|
/* exact +-0 */
|
|
return x * y + z;
|
|
}
|
|
e -= d;
|
|
|
|
/* convert to double */
|
|
let mut i: i64 = rhi as i64; /* i is in [1<<62,(1<<63)-1] */
|
|
if sign != 0 {
|
|
i = -i;
|
|
}
|
|
let mut r: f64 = i as f64; /* |r| is in [0x1p62,0x1p63] */
|
|
|
|
if e < -1022 - 62 {
|
|
/* result is subnormal before rounding */
|
|
if e == -1022 - 63 {
|
|
let mut c: f64 = x1p63;
|
|
if sign != 0 {
|
|
c = -c;
|
|
}
|
|
if r == c {
|
|
/* min normal after rounding, underflow depends
|
|
on arch behaviour which can be imitated by
|
|
a double to float conversion */
|
|
let fltmin: f32 = (x0_ffffff8p_63 * f32::MIN_POSITIVE as f64 * r) as f32;
|
|
return f64::MIN_POSITIVE / f32::MIN_POSITIVE as f64 * fltmin as f64;
|
|
}
|
|
/* one bit is lost when scaled, add another top bit to
|
|
only round once at conversion if it is inexact */
|
|
if (rhi << 53) != 0 {
|
|
i = (rhi >> 1 | (rhi & 1) | 1 << 62) as i64;
|
|
if sign != 0 {
|
|
i = -i;
|
|
}
|
|
r = i as f64;
|
|
r = 2. * r - c; /* remove top bit */
|
|
|
|
/* raise underflow portably, such that it
|
|
cannot be optimized away */
|
|
{
|
|
let tiny: f64 = f64::MIN_POSITIVE / f32::MIN_POSITIVE as f64 * r;
|
|
r += (tiny * tiny) * (r - r);
|
|
}
|
|
}
|
|
} else {
|
|
/* only round once when scaled */
|
|
d = 10;
|
|
i = ((rhi >> d | ((rhi << (64 - d)) != 0) as u64) << d) as i64;
|
|
if sign != 0 {
|
|
i = -i;
|
|
}
|
|
r = i as f64;
|
|
}
|
|
}
|
|
scalbn(r, e)
|
|
}
|
|
|
|
#[cfg(test)]
|
|
mod tests {
|
|
use super::*;
|
|
#[test]
|
|
fn fma_segfault() {
|
|
// These two inputs cause fma to segfault on release due to overflow:
|
|
assert_eq!(
|
|
fma(
|
|
-0.0000000000000002220446049250313,
|
|
-0.0000000000000002220446049250313,
|
|
-0.0000000000000002220446049250313
|
|
),
|
|
-0.00000000000000022204460492503126,
|
|
);
|
|
|
|
assert_eq!(fma(-0.992, -0.992, -0.992), -0.00793599999988632,);
|
|
}
|
|
}
|