Use macros for more division/array checks (#244)
* Use macros for more division/array checks This commit moves over more array accesses to the `i!` macro to avoid bounds checks when debug assertions are disabled. This is surfaced from rust-lang/compiler-builtins#360 where recent changes in codegen units has caused some bounds checks to not get elided in release mode. This also adds a `div!` macro to work around rust-lang/rust#72751. * Don't test/bench our shim crate It's not intended to run all our tests
This commit is contained in:
parent
3d729b7a85
commit
fe396e00b7
crates/compiler-builtins-smoke-test
src
@ -3,4 +3,7 @@ name = "cb"
|
||||
version = "0.1.0"
|
||||
authors = ["Jorge Aparicio <jorge@japaric.io>"]
|
||||
|
||||
[dependencies]
|
||||
[lib]
|
||||
test = false
|
||||
bench = false
|
||||
|
||||
|
@ -1,10 +1,7 @@
|
||||
//! libm in pure Rust
|
||||
#![deny(warnings)]
|
||||
#![no_std]
|
||||
#![cfg_attr(
|
||||
all(target_arch = "wasm32", feature = "unstable"),
|
||||
feature(core_intrinsics)
|
||||
)]
|
||||
#![cfg_attr(all(feature = "unstable"), feature(core_intrinsics))]
|
||||
#![allow(clippy::unreadable_literal)]
|
||||
#![allow(clippy::many_single_char_names)]
|
||||
#![allow(clippy::needless_return)]
|
||||
|
@ -56,7 +56,7 @@ pub fn atanf(mut x: f32) -> f32 {
|
||||
if x.is_nan() {
|
||||
return x;
|
||||
}
|
||||
z = ATAN_HI[3] + x1p_120;
|
||||
z = i!(ATAN_HI, 3) + x1p_120;
|
||||
return if sign { -z } else { z };
|
||||
}
|
||||
let id = if ix < 0x3ee00000 {
|
||||
@ -97,13 +97,13 @@ pub fn atanf(mut x: f32) -> f32 {
|
||||
z = x * x;
|
||||
let w = z * z;
|
||||
/* break sum from i=0 to 10 aT[i]z**(i+1) into odd and even poly */
|
||||
let s1 = z * (A_T[0] + w * (A_T[2] + w * A_T[4]));
|
||||
let s2 = w * (A_T[1] + w * A_T[3]);
|
||||
let s1 = z * (i!(A_T, 0) + w * (i!(A_T, 2) + w * i!(A_T, 4)));
|
||||
let s2 = w * (i!(A_T, 1) + w * i!(A_T, 3));
|
||||
if id < 0 {
|
||||
return x - x * (s1 + s2);
|
||||
}
|
||||
let id = id as usize;
|
||||
let z = ATAN_HI[id] - ((x * (s1 + s2) - ATAN_LO[id]) - x);
|
||||
let z = i!(ATAN_HI, id) - ((x * (s1 + s2) - i!(ATAN_LO, id)) - x);
|
||||
if sign {
|
||||
-z
|
||||
} else {
|
||||
|
@ -124,7 +124,7 @@ pub fn exp(mut x: f64) -> f64 {
|
||||
/* if |x| > 0.5 ln2 */
|
||||
if hx >= 0x3ff0a2b2 {
|
||||
/* if |x| >= 1.5 ln2 */
|
||||
k = (INVLN2 * x + HALF[sign as usize]) as i32;
|
||||
k = (INVLN2 * x + i!(HALF, sign as usize)) as i32;
|
||||
} else {
|
||||
k = 1 - sign - sign;
|
||||
}
|
||||
|
@ -374,14 +374,14 @@ pub fn exp2(mut x: f64) -> f64 {
|
||||
let mut i0 = ui as u32;
|
||||
i0 = i0.wrapping_add(TBLSIZE as u32 / 2);
|
||||
let ku = i0 / TBLSIZE as u32 * TBLSIZE as u32;
|
||||
let ki = ku as i32 / TBLSIZE as i32;
|
||||
let ki = div!(ku as i32, TBLSIZE as i32);
|
||||
i0 %= TBLSIZE as u32;
|
||||
let uf = f64::from_bits(ui) - redux;
|
||||
let mut z = x - uf;
|
||||
|
||||
/* Compute r = exp2(y) = exp2t[i0] * p(z - eps[i]). */
|
||||
let t = f64::from_bits(TBL[2 * i0 as usize]); /* exp2t[i0] */
|
||||
z -= f64::from_bits(TBL[2 * i0 as usize + 1]); /* eps[i0] */
|
||||
let t = f64::from_bits(i!(TBL, 2 * i0 as usize)); /* exp2t[i0] */
|
||||
z -= f64::from_bits(i!(TBL, 2 * i0 as usize + 1)); /* eps[i0] */
|
||||
let r = t + t * z * (p1 + z * (p2 + z * (p3 + z * (p4 + z * p5))));
|
||||
|
||||
scalbn(r, ki)
|
||||
|
@ -126,7 +126,7 @@ pub fn exp2f(mut x: f32) -> f32 {
|
||||
uf -= redux;
|
||||
let z: f64 = (x - uf) as f64;
|
||||
/* Compute r = exp2(y) = exp2ft[i0] * p(z). */
|
||||
let r: f64 = f64::from_bits(EXP2FT[i0 as usize]);
|
||||
let r: f64 = f64::from_bits(i!(EXP2FT, i0 as usize));
|
||||
let t: f64 = r as f64 * z;
|
||||
let r: f64 = r + t * (p1 as f64 + z * p2 as f64) + t * (z * z) * (p3 as f64 + z * p4 as f64);
|
||||
|
||||
|
@ -70,7 +70,7 @@ pub fn expf(mut x: f32) -> f32 {
|
||||
/* if |x| > 0.5 ln2 */
|
||||
if hx > 0x3f851592 {
|
||||
/* if |x| > 1.5 ln2 */
|
||||
k = (INV_LN2 * x + HALF[sign as usize]) as i32;
|
||||
k = (INV_LN2 * x + i!(HALF, sign as usize)) as i32;
|
||||
} else {
|
||||
k = 1 - sign - sign;
|
||||
}
|
||||
|
@ -58,6 +58,24 @@ macro_rules! i {
|
||||
};
|
||||
}
|
||||
|
||||
// Temporary macro to avoid panic codegen for division (in debug mode too). At
|
||||
// the time of this writing this is only used in a few places, and once
|
||||
// rust-lang/rust#72751 is fixed then this macro will no longer be necessary and
|
||||
// the native `/` operator can be used and panics won't be codegen'd.
|
||||
#[cfg(any(debug_assertions, not(feature = "unstable")))]
|
||||
macro_rules! div {
|
||||
($a:expr, $b:expr) => {
|
||||
$a / $b
|
||||
};
|
||||
}
|
||||
|
||||
#[cfg(all(not(debug_assertions), feature = "unstable"))]
|
||||
macro_rules! div {
|
||||
($a:expr, $b:expr) => {
|
||||
unsafe { core::intrinsics::unchecked_div($a, $b) }
|
||||
};
|
||||
}
|
||||
|
||||
macro_rules! llvm_intrinsically_optimized {
|
||||
(#[cfg($($clause:tt)*)] $e:expr) => {
|
||||
#[cfg(all(feature = "unstable", $($clause)*))]
|
||||
|
@ -299,8 +299,8 @@ pub fn pow(x: f64, y: f64) -> f64 {
|
||||
ax = with_set_high_word(ax, ix as u32);
|
||||
|
||||
/* compute ss = s_h+s_l = (x-1)/(x+1) or (x-1.5)/(x+1.5) */
|
||||
let u: f64 = ax - BP[k as usize]; /* bp[0]=1.0, bp[1]=1.5 */
|
||||
let v: f64 = 1.0 / (ax + BP[k as usize]);
|
||||
let u: f64 = ax - i!(BP, k as usize); /* bp[0]=1.0, bp[1]=1.5 */
|
||||
let v: f64 = 1.0 / (ax + i!(BP, k as usize));
|
||||
let ss: f64 = u * v;
|
||||
let s_h = with_set_low_word(ss, 0);
|
||||
|
||||
@ -309,7 +309,7 @@ pub fn pow(x: f64, y: f64) -> f64 {
|
||||
0.0,
|
||||
((ix as u32 >> 1) | 0x20000000) + 0x00080000 + ((k as u32) << 18),
|
||||
);
|
||||
let t_l: f64 = ax - (t_h - BP[k as usize]);
|
||||
let t_l: f64 = ax - (t_h - i!(BP, k as usize));
|
||||
let s_l: f64 = v * ((u - s_h * t_h) - s_h * t_l);
|
||||
|
||||
/* compute log(ax) */
|
||||
@ -328,12 +328,12 @@ pub fn pow(x: f64, y: f64) -> f64 {
|
||||
let p_h: f64 = with_set_low_word(u + v, 0);
|
||||
let p_l = v - (p_h - u);
|
||||
let z_h: f64 = CP_H * p_h; /* cp_h+cp_l = 2/(3*log2) */
|
||||
let z_l: f64 = CP_L * p_h + p_l * CP + DP_L[k as usize];
|
||||
let z_l: f64 = CP_L * p_h + p_l * CP + i!(DP_L, k as usize);
|
||||
|
||||
/* log2(ax) = (ss+..)*2/(3*log2) = n + dp_h + z_h + z_l */
|
||||
let t: f64 = n as f64;
|
||||
t1 = with_set_low_word(((z_h + z_l) + DP_H[k as usize]) + t, 0);
|
||||
t2 = z_l - (((t1 - t) - DP_H[k as usize]) - z_h);
|
||||
t1 = with_set_low_word(((z_h + z_l) + i!(DP_H, k as usize)) + t, 0);
|
||||
t2 = z_l - (((t1 - t) - i!(DP_H, k as usize)) - z_h);
|
||||
}
|
||||
|
||||
/* split up y into y1+y2 and compute (y1+y2)*(t1+t2) */
|
||||
|
@ -238,8 +238,8 @@ pub fn powf(x: f32, y: f32) -> f32 {
|
||||
ax = f32::from_bits(ix as u32);
|
||||
|
||||
/* compute s = s_h+s_l = (x-1)/(x+1) or (x-1.5)/(x+1.5) */
|
||||
u = ax - BP[k as usize]; /* bp[0]=1.0, bp[1]=1.5 */
|
||||
v = 1.0 / (ax + BP[k as usize]);
|
||||
u = ax - i!(BP, k as usize); /* bp[0]=1.0, bp[1]=1.5 */
|
||||
v = 1.0 / (ax + i!(BP, k as usize));
|
||||
s = u * v;
|
||||
s_h = s;
|
||||
is = s_h.to_bits() as i32;
|
||||
@ -247,7 +247,7 @@ pub fn powf(x: f32, y: f32) -> f32 {
|
||||
/* t_h=ax+bp[k] High */
|
||||
is = (((ix as u32 >> 1) & 0xfffff000) | 0x20000000) as i32;
|
||||
t_h = f32::from_bits(is as u32 + 0x00400000 + ((k as u32) << 21));
|
||||
t_l = ax - (t_h - BP[k as usize]);
|
||||
t_l = ax - (t_h - i!(BP, k as usize));
|
||||
s_l = v * ((u - s_h * t_h) - s_h * t_l);
|
||||
/* compute log(ax) */
|
||||
s2 = s * s;
|
||||
@ -267,13 +267,13 @@ pub fn powf(x: f32, y: f32) -> f32 {
|
||||
p_h = f32::from_bits(is as u32 & 0xfffff000);
|
||||
p_l = v - (p_h - u);
|
||||
z_h = CP_H * p_h; /* cp_h+cp_l = 2/(3*log2) */
|
||||
z_l = CP_L * p_h + p_l * CP + DP_L[k as usize];
|
||||
z_l = CP_L * p_h + p_l * CP + i!(DP_L, k as usize);
|
||||
/* log2(ax) = (s+..)*2/(3*log2) = n + dp_h + z_h + z_l */
|
||||
t = n as f32;
|
||||
t1 = ((z_h + z_l) + DP_H[k as usize]) + t;
|
||||
t1 = ((z_h + z_l) + i!(DP_H, k as usize)) + t;
|
||||
is = t1.to_bits() as i32;
|
||||
t1 = f32::from_bits(is as u32 & 0xfffff000);
|
||||
t2 = z_l - (((t1 - t) - DP_H[k as usize]) - z_h);
|
||||
t2 = z_l - (((t1 - t) - i!(DP_H, k as usize)) - z_h);
|
||||
};
|
||||
|
||||
/* split up y into y1+y2 and compute (y1+y2)*(t1+t2) */
|
||||
|
@ -167,21 +167,21 @@ pub(crate) fn rem_pio2(x: f64) -> (i32, f64, f64) {
|
||||
let mut z = f64::from_bits(ui);
|
||||
let mut tx = [0.0; 3];
|
||||
for i in 0..2 {
|
||||
tx[i] = z as i32 as f64;
|
||||
z = (z - tx[i]) * x1p24;
|
||||
i!(tx,i, =, z as i32 as f64);
|
||||
z = (z - i!(tx, i)) * x1p24;
|
||||
}
|
||||
tx[2] = z;
|
||||
i!(tx,2, =, z);
|
||||
/* skip zero terms, first term is non-zero */
|
||||
let mut i = 2;
|
||||
while i != 0 && tx[i] == 0.0 {
|
||||
while i != 0 && i!(tx, i) == 0.0 {
|
||||
i -= 1;
|
||||
}
|
||||
let mut ty = [0.0; 3];
|
||||
let n = rem_pio2_large(&tx[..=i], &mut ty, ((ix as i32) >> 20) - (0x3ff + 23), 1);
|
||||
if sign != 0 {
|
||||
return (-n, -ty[0], -ty[1]);
|
||||
return (-n, -i!(ty, 0), -i!(ty, 1));
|
||||
}
|
||||
(n, ty[0], ty[1])
|
||||
(n, i!(ty, 0), i!(ty, 1))
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
|
@ -242,12 +242,12 @@ pub(crate) fn rem_pio2_large(x: &[f64], y: &mut [f64], e0: i32, prec: usize) ->
|
||||
let mut iq: [i32; 20] = [0; 20];
|
||||
|
||||
/* initialize jk*/
|
||||
let jk = INIT_JK[prec];
|
||||
let jk = i!(INIT_JK, prec);
|
||||
let jp = jk;
|
||||
|
||||
/* determine jx,jv,q0, note that 3>q0 */
|
||||
let jx = nx - 1;
|
||||
let mut jv = (e0 - 3) / 24;
|
||||
let mut jv = div!(e0 - 3, 24);
|
||||
if jv < 0 {
|
||||
jv = 0;
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user