Use macros for more division/array checks (#244)

* Use macros for more division/array checks This commit moves over more array accesses to the `i!` macro to avoid bounds checks when debug assertions are disabled. This is surfaced from rust-lang/compiler-builtins#360 where recent changes in codegen units has caused some bounds checks to not get elided in release mode. This also adds a `div!` macro to work around rust-lang/rust#72751. * Don't test/bench our shim crate It's not intended to run all our tests
2020-05-29 14:16:51 -05:00 · 2020-05-29 14:16:51 -05:00 · fe396e00b7
commit fe396e00b7
parent 3d729b7a85
12 changed files with 53 additions and 35 deletions
--- a/crates/compiler-builtins-smoke-test/Cargo.toml
+++ b/crates/compiler-builtins-smoke-test/Cargo.toml
@ -3,4 +3,7 @@ name = "cb"
 version = "0.1.0"
 authors = ["Jorge Aparicio <jorge@japaric.io>"]

-[dependencies]
+[lib]
+test = false
+bench = false
+
--- a/src/lib.rs
+++ b/src/lib.rs
@ -1,10 +1,7 @@
 //! libm in pure Rust
 #![deny(warnings)]
 #![no_std]
-#![cfg_attr(
-    all(target_arch = "wasm32", feature = "unstable"),
-    feature(core_intrinsics)
-)]
+#![cfg_attr(all(feature = "unstable"), feature(core_intrinsics))]
 #![allow(clippy::unreadable_literal)]
 #![allow(clippy::many_single_char_names)]
 #![allow(clippy::needless_return)]
--- a/src/math/atanf.rs
+++ b/src/math/atanf.rs
@ -56,7 +56,7 @@ pub fn atanf(mut x: f32) -> f32 {
        if x.is_nan() {
            return x;
        }
-        z = ATAN_HI[3] + x1p_120;
+        z = i!(ATAN_HI, 3) + x1p_120;
        return if sign { -z } else { z };
    }
    let id = if ix < 0x3ee00000 {
@ -97,13 +97,13 @@ pub fn atanf(mut x: f32) -> f32 {
    z = x * x;
    let w = z * z;
    /* break sum from i=0 to 10 aT[i]z**(i+1) into odd and even poly */
-    let s1 = z * (A_T[0] + w * (A_T[2] + w * A_T[4]));
-    let s2 = w * (A_T[1] + w * A_T[3]);
+    let s1 = z * (i!(A_T, 0) + w * (i!(A_T, 2) + w * i!(A_T, 4)));
+    let s2 = w * (i!(A_T, 1) + w * i!(A_T, 3));
    if id < 0 {
        return x - x * (s1 + s2);
    }
    let id = id as usize;
-    let z = ATAN_HI[id] - ((x * (s1 + s2) - ATAN_LO[id]) - x);
+    let z = i!(ATAN_HI, id) - ((x * (s1 + s2) - i!(ATAN_LO, id)) - x);
    if sign {
        -z
    } else {
--- a/src/math/exp.rs
+++ b/src/math/exp.rs
@ -124,7 +124,7 @@ pub fn exp(mut x: f64) -> f64 {
        /* if |x| > 0.5 ln2 */
        if hx >= 0x3ff0a2b2 {
            /* if |x| >= 1.5 ln2 */
-            k = (INVLN2 * x + HALF[sign as usize]) as i32;
+            k = (INVLN2 * x + i!(HALF, sign as usize)) as i32;
        } else {
            k = 1 - sign - sign;
        }
--- a/src/math/exp2.rs
+++ b/src/math/exp2.rs
@ -374,14 +374,14 @@ pub fn exp2(mut x: f64) -> f64 {
    let mut i0 = ui as u32;
    i0 = i0.wrapping_add(TBLSIZE as u32 / 2);
    let ku = i0 / TBLSIZE as u32 * TBLSIZE as u32;
-    let ki = ku as i32 / TBLSIZE as i32;
+    let ki = div!(ku as i32, TBLSIZE as i32);
    i0 %= TBLSIZE as u32;
    let uf = f64::from_bits(ui) - redux;
    let mut z = x - uf;

    /* Compute r = exp2(y) = exp2t[i0] * p(z - eps[i]). */
-    let t = f64::from_bits(TBL[2 * i0 as usize]); /* exp2t[i0] */
-    z -= f64::from_bits(TBL[2 * i0 as usize + 1]); /* eps[i0]   */
+    let t = f64::from_bits(i!(TBL, 2 * i0 as usize)); /* exp2t[i0] */
+    z -= f64::from_bits(i!(TBL, 2 * i0 as usize + 1)); /* eps[i0]   */
    let r = t + t * z * (p1 + z * (p2 + z * (p3 + z * (p4 + z * p5))));

    scalbn(r, ki)
--- a/src/math/exp2f.rs
+++ b/src/math/exp2f.rs
@ -126,7 +126,7 @@ pub fn exp2f(mut x: f32) -> f32 {
    uf -= redux;
    let z: f64 = (x - uf) as f64;
    /* Compute r = exp2(y) = exp2ft[i0] * p(z). */
-    let r: f64 = f64::from_bits(EXP2FT[i0 as usize]);
+    let r: f64 = f64::from_bits(i!(EXP2FT, i0 as usize));
    let t: f64 = r as f64 * z;
    let r: f64 = r + t * (p1 as f64 + z * p2 as f64) + t * (z * z) * (p3 as f64 + z * p4 as f64);

--- a/src/math/expf.rs
+++ b/src/math/expf.rs
@ -70,7 +70,7 @@ pub fn expf(mut x: f32) -> f32 {
        /* if |x| > 0.5 ln2 */
        if hx > 0x3f851592 {
            /* if |x| > 1.5 ln2 */
-            k = (INV_LN2 * x + HALF[sign as usize]) as i32;
+            k = (INV_LN2 * x + i!(HALF, sign as usize)) as i32;
        } else {
            k = 1 - sign - sign;
        }
--- a/src/math/mod.rs
+++ b/src/math/mod.rs
@ -58,6 +58,24 @@ macro_rules! i {
    };
 }

+// Temporary macro to avoid panic codegen for division (in debug mode too). At
+// the time of this writing this is only used in a few places, and once
+// rust-lang/rust#72751 is fixed then this macro will no longer be necessary and
+// the native `/` operator can be used and panics won't be codegen'd.
+#[cfg(any(debug_assertions, not(feature = "unstable")))]
+macro_rules! div {
+    ($a:expr, $b:expr) => {
+        $a / $b
+    };
+}
+
+#[cfg(all(not(debug_assertions), feature = "unstable"))]
+macro_rules! div {
+    ($a:expr, $b:expr) => {
+        unsafe { core::intrinsics::unchecked_div($a, $b) }
+    };
+}
+
 macro_rules! llvm_intrinsically_optimized {
    (#[cfg($($clause:tt)*)] $e:expr) => {
        #[cfg(all(feature = "unstable", $($clause)*))]
--- a/src/math/pow.rs
+++ b/src/math/pow.rs
@ -299,8 +299,8 @@ pub fn pow(x: f64, y: f64) -> f64 {
        ax = with_set_high_word(ax, ix as u32);

        /* compute ss = s_h+s_l = (x-1)/(x+1) or (x-1.5)/(x+1.5) */
-        let u: f64 = ax - BP[k as usize]; /* bp[0]=1.0, bp[1]=1.5 */
-        let v: f64 = 1.0 / (ax + BP[k as usize]);
+        let u: f64 = ax - i!(BP, k as usize); /* bp[0]=1.0, bp[1]=1.5 */
+        let v: f64 = 1.0 / (ax + i!(BP, k as usize));
        let ss: f64 = u * v;
        let s_h = with_set_low_word(ss, 0);

@ -309,7 +309,7 @@ pub fn pow(x: f64, y: f64) -> f64 {
            0.0,
            ((ix as u32 >> 1) | 0x20000000) + 0x00080000 + ((k as u32) << 18),
        );
-        let t_l: f64 = ax - (t_h - BP[k as usize]);
+        let t_l: f64 = ax - (t_h - i!(BP, k as usize));
        let s_l: f64 = v * ((u - s_h * t_h) - s_h * t_l);

        /* compute log(ax) */
@ -328,12 +328,12 @@ pub fn pow(x: f64, y: f64) -> f64 {
        let p_h: f64 = with_set_low_word(u + v, 0);
        let p_l = v - (p_h - u);
        let z_h: f64 = CP_H * p_h; /* cp_h+cp_l = 2/(3*log2) */
-        let z_l: f64 = CP_L * p_h + p_l * CP + DP_L[k as usize];
+        let z_l: f64 = CP_L * p_h + p_l * CP + i!(DP_L, k as usize);

        /* log2(ax) = (ss+..)*2/(3*log2) = n + dp_h + z_h + z_l */
        let t: f64 = n as f64;
-        t1 = with_set_low_word(((z_h + z_l) + DP_H[k as usize]) + t, 0);
-        t2 = z_l - (((t1 - t) - DP_H[k as usize]) - z_h);
+        t1 = with_set_low_word(((z_h + z_l) + i!(DP_H, k as usize)) + t, 0);
+        t2 = z_l - (((t1 - t) - i!(DP_H, k as usize)) - z_h);
    }

    /* split up y into y1+y2 and compute (y1+y2)*(t1+t2) */
--- a/src/math/powf.rs
+++ b/src/math/powf.rs
@ -238,8 +238,8 @@ pub fn powf(x: f32, y: f32) -> f32 {
        ax = f32::from_bits(ix as u32);

        /* compute s = s_h+s_l = (x-1)/(x+1) or (x-1.5)/(x+1.5) */
-        u = ax - BP[k as usize]; /* bp[0]=1.0, bp[1]=1.5 */
-        v = 1.0 / (ax + BP[k as usize]);
+        u = ax - i!(BP, k as usize); /* bp[0]=1.0, bp[1]=1.5 */
+        v = 1.0 / (ax + i!(BP, k as usize));
        s = u * v;
        s_h = s;
        is = s_h.to_bits() as i32;
@ -247,7 +247,7 @@ pub fn powf(x: f32, y: f32) -> f32 {
        /* t_h=ax+bp[k] High */
        is = (((ix as u32 >> 1) & 0xfffff000) | 0x20000000) as i32;
        t_h = f32::from_bits(is as u32 + 0x00400000 + ((k as u32) << 21));
-        t_l = ax - (t_h - BP[k as usize]);
+        t_l = ax - (t_h - i!(BP, k as usize));
        s_l = v * ((u - s_h * t_h) - s_h * t_l);
        /* compute log(ax) */
        s2 = s * s;
@ -267,13 +267,13 @@ pub fn powf(x: f32, y: f32) -> f32 {
        p_h = f32::from_bits(is as u32 & 0xfffff000);
        p_l = v - (p_h - u);
        z_h = CP_H * p_h; /* cp_h+cp_l = 2/(3*log2) */
-        z_l = CP_L * p_h + p_l * CP + DP_L[k as usize];
+        z_l = CP_L * p_h + p_l * CP + i!(DP_L, k as usize);
        /* log2(ax) = (s+..)*2/(3*log2) = n + dp_h + z_h + z_l */
        t = n as f32;
-        t1 = ((z_h + z_l) + DP_H[k as usize]) + t;
+        t1 = ((z_h + z_l) + i!(DP_H, k as usize)) + t;
        is = t1.to_bits() as i32;
        t1 = f32::from_bits(is as u32 & 0xfffff000);
-        t2 = z_l - (((t1 - t) - DP_H[k as usize]) - z_h);
+        t2 = z_l - (((t1 - t) - i!(DP_H, k as usize)) - z_h);
    };

    /* split up y into y1+y2 and compute (y1+y2)*(t1+t2) */
--- a/src/math/rem_pio2.rs
+++ b/src/math/rem_pio2.rs
@ -167,21 +167,21 @@ pub(crate) fn rem_pio2(x: f64) -> (i32, f64, f64) {
    let mut z = f64::from_bits(ui);
    let mut tx = [0.0; 3];
    for i in 0..2 {
-        tx[i] = z as i32 as f64;
-        z = (z - tx[i]) * x1p24;
+        i!(tx,i, =, z as i32 as f64);
+        z = (z - i!(tx, i)) * x1p24;
    }
-    tx[2] = z;
+    i!(tx,2, =, z);
    /* skip zero terms, first term is non-zero */
    let mut i = 2;
-    while i != 0 && tx[i] == 0.0 {
+    while i != 0 && i!(tx, i) == 0.0 {
        i -= 1;
    }
    let mut ty = [0.0; 3];
    let n = rem_pio2_large(&tx[..=i], &mut ty, ((ix as i32) >> 20) - (0x3ff + 23), 1);
    if sign != 0 {
-        return (-n, -ty[0], -ty[1]);
+        return (-n, -i!(ty, 0), -i!(ty, 1));
    }
-    (n, ty[0], ty[1])
+    (n, i!(ty, 0), i!(ty, 1))
 }

 #[cfg(test)]
--- a/src/math/rem_pio2_large.rs
+++ b/src/math/rem_pio2_large.rs
@ -242,12 +242,12 @@ pub(crate) fn rem_pio2_large(x: &[f64], y: &mut [f64], e0: i32, prec: usize) ->
    let mut iq: [i32; 20] = [0; 20];

    /* initialize jk*/
-    let jk = INIT_JK[prec];
+    let jk = i!(INIT_JK, prec);
    let jp = jk;

    /* determine jx,jv,q0, note that 3>q0 */
    let jx = nx - 1;
-    let mut jv = (e0 - 3) / 24;
+    let mut jv = div!(e0 - 3, 24);
    if jv < 0 {
        jv = 0;
    }