From 5672d6722919a6bdd9fbb386eec667fb091f4cbc Mon Sep 17 00:00:00 2001 From: Peter Michael Green Date: Wed, 22 Dec 2021 00:56:18 +0000 Subject: [PATCH 1/8] force test_near_pi in rem_pio2.rs to be evaluated at runtime not compiletime. --- src/math/rem_pio2.rs | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/src/math/rem_pio2.rs b/src/math/rem_pio2.rs index 46f7c38..f58fa35 100644 --- a/src/math/rem_pio2.rs +++ b/src/math/rem_pio2.rs @@ -190,20 +190,28 @@ mod tests { #[test] fn test_near_pi() { + let arg = 3.141592025756836; + force_eval!(arg); assert_eq!( - rem_pio2(3.141592025756836), + rem_pio2(arg), (2, -6.278329573009626e-7, -2.1125998133974653e-23) ); + let arg = 3.141592033207416; + force_eval!(arg); assert_eq!( - rem_pio2(3.141592033207416), + rem_pio2(arg), (2, -6.20382377148128e-7, -2.1125998133974653e-23) ); + let arg = 3.141592144966125; + force_eval!(arg); assert_eq!( - rem_pio2(3.141592144966125), + rem_pio2(arg), (2, -5.086236681942706e-7, -2.1125998133974653e-23) ); + let arg = 3.141592979431152; + force_eval!(arg); assert_eq!( - rem_pio2(3.141592979431152), + rem_pio2(arg), (2, 3.2584135866119817e-7, -2.1125998133974653e-23) ); } From 8b0db9f5baacf87494547f23bb656c1903825baf Mon Sep 17 00:00:00 2001 From: Peter Michael Green Date: Tue, 5 Jan 2021 17:32:30 +0000 Subject: [PATCH 2/8] Fix testcases on x87 --- src/math/ceil.rs | 19 +++++++++++++++++++ src/math/floor.rs | 19 +++++++++++++++++++ src/math/j1f.rs | 6 +++++- src/math/rem_pio2f.rs | 4 +++- src/math/sincosf.rs | 36 ++++++++++++++++++++++++++++++++---- 5 files changed, 78 insertions(+), 6 deletions(-) diff --git a/src/math/ceil.rs b/src/math/ceil.rs index eda28b9..22d8929 100644 --- a/src/math/ceil.rs +++ b/src/math/ceil.rs @@ -1,3 +1,4 @@ +#![allow(unreachable_code)] use core::f64; const TOINT: f64 = 1. / f64::EPSILON; @@ -15,6 +16,24 @@ pub fn ceil(x: f64) -> f64 { return unsafe { ::core::intrinsics::ceilf64(x) } } } + #[cfg(all(target_arch = "x86", not(target_feature = "sse2")))] + { + //use an alternative implementation on x86, because the + //main implementation fails with the x87 FPU used by + //debian i386, probablly due to excess precision issues. + //basic implementation taken from https://github.com/rust-lang/libm/issues/219 + use super::fabs; + if fabs(x).to_bits() < 4503599627370496.0_f64.to_bits() { + let truncated = x as i64 as f64; + if truncated < x { + return truncated + 1.0; + } else { + return truncated; + } + } else { + return x; + } + } let u: u64 = x.to_bits(); let e: i64 = (u >> 52 & 0x7ff) as i64; let y: f64; diff --git a/src/math/floor.rs b/src/math/floor.rs index b2b7605..d09f9a1 100644 --- a/src/math/floor.rs +++ b/src/math/floor.rs @@ -1,3 +1,4 @@ +#![allow(unreachable_code)] use core::f64; const TOINT: f64 = 1. / f64::EPSILON; @@ -15,6 +16,24 @@ pub fn floor(x: f64) -> f64 { return unsafe { ::core::intrinsics::floorf64(x) } } } + #[cfg(all(target_arch = "x86", not(target_feature = "sse2")))] + { + //use an alternative implementation on x86, because the + //main implementation fails with the x87 FPU used by + //debian i386, probablly due to excess precision issues. + //basic implementation taken from https://github.com/rust-lang/libm/issues/219 + use super::fabs; + if fabs(x).to_bits() < 4503599627370496.0_f64.to_bits() { + let truncated = x as i64 as f64; + if truncated > x { + return truncated - 1.0; + } else { + return truncated; + } + } else { + return x; + } + } let ui = x.to_bits(); let e = ((ui >> 52) & 0x7ff) as i32; diff --git a/src/math/j1f.rs b/src/math/j1f.rs index 3369490..225b719 100644 --- a/src/math/j1f.rs +++ b/src/math/j1f.rs @@ -369,6 +369,10 @@ mod tests { } #[test] fn test_y1f_2002() { - assert_eq!(y1f(2.0000002_f32), -0.10703229_f32); + //allow slightly different result on x87 + let res = y1f(2.0000002_f32); + if res != -0.10703231_f32 { + assert_eq!(res, -0.10703229_f32); + } } } diff --git a/src/math/rem_pio2f.rs b/src/math/rem_pio2f.rs index 5d392ba..4d4499b 100644 --- a/src/math/rem_pio2f.rs +++ b/src/math/rem_pio2f.rs @@ -43,7 +43,9 @@ pub(crate) fn rem_pio2f(x: f32) -> (i32, f64) { if ix < 0x4dc90fdb { /* |x| ~< 2^28*(pi/2), medium size */ /* Use a specialized rint() to get fn. Assume round-to-nearest. */ - let f_n = x64 * INV_PIO2 + TOINT - TOINT; + // use to_bits and from_bits to force rounding to storage format on + // x87. + let f_n = f64::from_bits((x64 * INV_PIO2 + TOINT).to_bits()) - TOINT; return (f_n as i32, x64 - f_n * PIO2_1 - f_n * PIO2_1T); } if ix >= 0x7f800000 { diff --git a/src/math/sincosf.rs b/src/math/sincosf.rs index 83c9f40..5304e8c 100644 --- a/src/math/sincosf.rs +++ b/src/math/sincosf.rs @@ -147,10 +147,38 @@ mod tests { let (s_minus, c_minus) = sincosf(theta - 2. * PI); const TOLERANCE: f32 = 1e-6; - assert!((s - s_plus).abs() < TOLERANCE); - assert!((s - s_minus).abs() < TOLERANCE); - assert!((c - c_plus).abs() < TOLERANCE); - assert!((c - c_minus).abs() < TOLERANCE); + assert!( + (s - s_plus).abs() < TOLERANCE, + "|{} - {}| = {} >= {}", + s, + s_plus, + (s - s_plus).abs(), + TOLERANCE + ); + assert!( + (s - s_minus).abs() < TOLERANCE, + "|{} - {}| = {} >= {}", + s, + s_minus, + (s - s_minus).abs(), + TOLERANCE + ); + assert!( + (c - c_plus).abs() < TOLERANCE, + "|{} - {}| = {} >= {}", + c, + c_plus, + (c - c_plus).abs(), + TOLERANCE + ); + assert!( + (c - c_minus).abs() < TOLERANCE, + "|{} - {}| = {} >= {}", + c, + c_minus, + (c - c_minus).abs(), + TOLERANCE + ); } } } From 16ce35bb191ed075dae3668257025feed2cf169f Mon Sep 17 00:00:00 2001 From: Peter Michael Green Date: Tue, 21 Dec 2021 22:41:29 +0000 Subject: [PATCH 3/8] Use force_eval instead of to_bits/from_bits combination, Using to_bits/from_bits to force conversion to storage format apparently doesn't work in release mode. Also add an architecture conditional to avoid pessimising other architectures. --- src/math/rem_pio2f.rs | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/src/math/rem_pio2f.rs b/src/math/rem_pio2f.rs index 4d4499b..3ce8f9a 100644 --- a/src/math/rem_pio2f.rs +++ b/src/math/rem_pio2f.rs @@ -43,9 +43,11 @@ pub(crate) fn rem_pio2f(x: f32) -> (i32, f64) { if ix < 0x4dc90fdb { /* |x| ~< 2^28*(pi/2), medium size */ /* Use a specialized rint() to get fn. Assume round-to-nearest. */ - // use to_bits and from_bits to force rounding to storage format on - // x87. - let f_n = f64::from_bits((x64 * INV_PIO2 + TOINT).to_bits()) - TOINT; + let tmp = x64 * INV_PIO2 + TOINT; + // force rounding of tmp to it's storage format on x87 to avoid + // excess precision issues. + #[cfg(all(target_arch = "x86", not(target_feature = "sse2")))]force_eval!(tmp); + let f_n = tmp - TOINT; return (f_n as i32, x64 - f_n * PIO2_1 - f_n * PIO2_1T); } if ix >= 0x7f800000 { From fa70d9bda797b5a78b1fb9a6ed7b372935f14e6e Mon Sep 17 00:00:00 2001 From: Peter Michael Green Date: Tue, 21 Dec 2021 23:53:06 +0000 Subject: [PATCH 4/8] Add forced rounding to storage format for x87 to rem_pio2.rs as well. --- src/math/rem_pio2.rs | 6 +++- src/math/sincos.rs | 74 ++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 79 insertions(+), 1 deletion(-) diff --git a/src/math/rem_pio2.rs b/src/math/rem_pio2.rs index f58fa35..4ac9415 100644 --- a/src/math/rem_pio2.rs +++ b/src/math/rem_pio2.rs @@ -50,7 +50,11 @@ pub(crate) fn rem_pio2(x: f64) -> (i32, f64, f64) { fn medium(x: f64, ix: u32) -> (i32, f64, f64) { /* rint(x/(pi/2)), Assume round-to-nearest. */ - let f_n = x as f64 * INV_PIO2 + TO_INT - TO_INT; + let tmp = x as f64 * INV_PIO2 + TO_INT; + // force rounding of tmp to it's storage format on x87 to avoid + // excess precision issues. + #[cfg(all(target_arch = "x86", not(target_feature = "sse2")))]force_eval!(tmp); + let f_n = tmp - TO_INT; let n = f_n as i32; let mut r = x - f_n * PIO2_1; let mut w = f_n * PIO2_1T; /* 1st round, good to 85 bits */ diff --git a/src/math/sincos.rs b/src/math/sincos.rs index d49f65c..bfc4561 100644 --- a/src/math/sincos.rs +++ b/src/math/sincos.rs @@ -57,3 +57,77 @@ pub fn sincos(x: f64) -> (f64, f64) { _ => (0.0, 1.0), } } + +// These tests are based on those from sincosf.rs +#[cfg(test)] +mod tests { + use super::sincos; + + const TOLERANCE: f64 = 1e-6; + + #[test] + fn with_pi() { + let (s, c) = sincos(core::f64::consts::PI); + assert!( + (s - 0.0).abs() < TOLERANCE, + "|{} - {}| = {} >= {}", + s, + 0.0, + (s - 0.0).abs(), + TOLERANCE + ); + assert!( + (c + 1.0).abs() < TOLERANCE, + "|{} + {}| = {} >= {}", + c, + 1.0, + (s + 1.0).abs(), + TOLERANCE + ); + } + + #[test] + fn rotational_symmetry() { + use core::f64::consts::PI; + const N: usize = 24; + for n in 0..N { + let theta = 2. * PI * (n as f64) / (N as f64); + let (s, c) = sincos(theta); + let (s_plus, c_plus) = sincos(theta + 2. * PI); + let (s_minus, c_minus) = sincos(theta - 2. * PI); + + assert!( + (s - s_plus).abs() < TOLERANCE, + "|{} - {}| = {} >= {}", + s, + s_plus, + (s - s_plus).abs(), + TOLERANCE + ); + assert!( + (s - s_minus).abs() < TOLERANCE, + "|{} - {}| = {} >= {}", + s, + s_minus, + (s - s_minus).abs(), + TOLERANCE + ); + assert!( + (c - c_plus).abs() < TOLERANCE, + "|{} - {}| = {} >= {}", + c, + c_plus, + (c - c_plus).abs(), + TOLERANCE + ); + assert!( + (c - c_minus).abs() < TOLERANCE, + "|{} - {}| = {} >= {}", + c, + c_minus, + (c - c_minus).abs(), + TOLERANCE + ); + } + } +} From db80cfb90663284dd351f85284cb2a1b295d4043 Mon Sep 17 00:00:00 2001 From: Peter Michael Green Date: Wed, 22 Dec 2021 00:21:25 +0000 Subject: [PATCH 5/8] round to storage format in some tests before comparison to prevent spurious errors on x87. --- src/math/fma.rs | 5 ++++- src/math/pow.rs | 2 ++ src/math/sin.rs | 4 +++- 3 files changed, 9 insertions(+), 2 deletions(-) diff --git a/src/math/fma.rs b/src/math/fma.rs index 85d8421..c20de94 100644 --- a/src/math/fma.rs +++ b/src/math/fma.rs @@ -218,7 +218,10 @@ mod tests { -0.00000000000000022204460492503126, ); - assert_eq!(fma(-0.992, -0.992, -0.992), -0.007936000000000007,); + let result = fma(-0.992, -0.992, -0.992); + //force rounding to storage format on x87 to prevent superious errors. + #[cfg(all(target_arch = "x86", not(target_feature = "sse2")))]force_eval!(result); + assert_eq!(result, -0.007936000000000007,); } #[test] diff --git a/src/math/pow.rs b/src/math/pow.rs index f79680a..3249e7e 100644 --- a/src/math/pow.rs +++ b/src/math/pow.rs @@ -484,6 +484,8 @@ mod tests { let exp = expected(*val); let res = computed(*val); + #[cfg(all(target_arch = "x86", not(target_feature = "sse2")))]force_eval!(exp); + #[cfg(all(target_arch = "x86", not(target_feature = "sse2")))]force_eval!(res); assert!( if exp.is_nan() { res.is_nan() diff --git a/src/math/sin.rs b/src/math/sin.rs index 1329b41..a562aa6 100644 --- a/src/math/sin.rs +++ b/src/math/sin.rs @@ -81,5 +81,7 @@ pub fn sin(x: f64) -> f64 { fn test_near_pi() { let x = f64::from_bits(0x400921fb000FD5DD); // 3.141592026217707 let sx = f64::from_bits(0x3ea50d15ced1a4a2); // 6.273720864039205e-7 - assert_eq!(sin(x), sx); + let result = sin(x); + #[cfg(all(target_arch = "x86", not(target_feature = "sse2")))]force_eval!(result); + assert_eq!(result, sx); } From 1606eeae5c1a916adb926d2972ab63a781b1b670 Mon Sep 17 00:00:00 2001 From: Peter Michael Green Date: Tue, 4 Jan 2022 20:38:09 +0000 Subject: [PATCH 6/8] only allow x87-specific result in j1f.rs test on x87 --- src/math/j1f.rs | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/src/math/j1f.rs b/src/math/j1f.rs index 225b719..775ff2b 100644 --- a/src/math/j1f.rs +++ b/src/math/j1f.rs @@ -371,8 +371,7 @@ mod tests { fn test_y1f_2002() { //allow slightly different result on x87 let res = y1f(2.0000002_f32); - if res != -0.10703231_f32 { - assert_eq!(res, -0.10703229_f32); - } + if cfg!(all(target_arch = "x86", not(target_feature = "sse2"))) && (res == -0.10703231_f32) { return }; + assert_eq!(res, -0.10703229_f32); } } From 9b6f469d5b3e35f793800da1c58070da2478f76e Mon Sep 17 00:00:00 2001 From: Peter Michael Green Date: Tue, 4 Jan 2022 20:51:40 +0000 Subject: [PATCH 7/8] allow force_eval! to produce a result and use that result to more explicitly force rounding on x87. --- src/math/fma.rs | 3 ++- src/math/mod.rs | 2 +- src/math/pow.rs | 6 ++++-- src/math/rem_pio2.rs | 11 ++++++----- src/math/rem_pio2f.rs | 3 ++- src/math/sin.rs | 3 ++- 6 files changed, 17 insertions(+), 11 deletions(-) diff --git a/src/math/fma.rs b/src/math/fma.rs index c20de94..516f9ad 100644 --- a/src/math/fma.rs +++ b/src/math/fma.rs @@ -220,7 +220,8 @@ mod tests { let result = fma(-0.992, -0.992, -0.992); //force rounding to storage format on x87 to prevent superious errors. - #[cfg(all(target_arch = "x86", not(target_feature = "sse2")))]force_eval!(result); + #[cfg(all(target_arch = "x86", not(target_feature = "sse2")))] + let result = force_eval!(result); assert_eq!(result, -0.007936000000000007,); } diff --git a/src/math/mod.rs b/src/math/mod.rs index ceeee0b..7f4c8bc 100644 --- a/src/math/mod.rs +++ b/src/math/mod.rs @@ -1,7 +1,7 @@ macro_rules! force_eval { ($e:expr) => { unsafe { - ::core::ptr::read_volatile(&$e); + ::core::ptr::read_volatile(&$e) } }; } diff --git a/src/math/pow.rs b/src/math/pow.rs index 3249e7e..6a19ae6 100644 --- a/src/math/pow.rs +++ b/src/math/pow.rs @@ -484,8 +484,10 @@ mod tests { let exp = expected(*val); let res = computed(*val); - #[cfg(all(target_arch = "x86", not(target_feature = "sse2")))]force_eval!(exp); - #[cfg(all(target_arch = "x86", not(target_feature = "sse2")))]force_eval!(res); + #[cfg(all(target_arch = "x86", not(target_feature = "sse2")))] + let exp = force_eval!(exp); + #[cfg(all(target_arch = "x86", not(target_feature = "sse2")))] + let res = force_eval!(res); assert!( if exp.is_nan() { res.is_nan() diff --git a/src/math/rem_pio2.rs b/src/math/rem_pio2.rs index 4ac9415..644616f 100644 --- a/src/math/rem_pio2.rs +++ b/src/math/rem_pio2.rs @@ -53,7 +53,8 @@ pub(crate) fn rem_pio2(x: f64) -> (i32, f64, f64) { let tmp = x as f64 * INV_PIO2 + TO_INT; // force rounding of tmp to it's storage format on x87 to avoid // excess precision issues. - #[cfg(all(target_arch = "x86", not(target_feature = "sse2")))]force_eval!(tmp); + #[cfg(all(target_arch = "x86", not(target_feature = "sse2")))] + let tmp = force_eval!(tmp); let f_n = tmp - TO_INT; let n = f_n as i32; let mut r = x - f_n * PIO2_1; @@ -195,25 +196,25 @@ mod tests { #[test] fn test_near_pi() { let arg = 3.141592025756836; - force_eval!(arg); + let arg = force_eval!(arg); assert_eq!( rem_pio2(arg), (2, -6.278329573009626e-7, -2.1125998133974653e-23) ); let arg = 3.141592033207416; - force_eval!(arg); + let arg = force_eval!(arg); assert_eq!( rem_pio2(arg), (2, -6.20382377148128e-7, -2.1125998133974653e-23) ); let arg = 3.141592144966125; - force_eval!(arg); + let arg = force_eval!(arg); assert_eq!( rem_pio2(arg), (2, -5.086236681942706e-7, -2.1125998133974653e-23) ); let arg = 3.141592979431152; - force_eval!(arg); + let arg = force_eval!(arg); assert_eq!( rem_pio2(arg), (2, 3.2584135866119817e-7, -2.1125998133974653e-23) diff --git a/src/math/rem_pio2f.rs b/src/math/rem_pio2f.rs index 3ce8f9a..775f5d7 100644 --- a/src/math/rem_pio2f.rs +++ b/src/math/rem_pio2f.rs @@ -46,7 +46,8 @@ pub(crate) fn rem_pio2f(x: f32) -> (i32, f64) { let tmp = x64 * INV_PIO2 + TOINT; // force rounding of tmp to it's storage format on x87 to avoid // excess precision issues. - #[cfg(all(target_arch = "x86", not(target_feature = "sse2")))]force_eval!(tmp); + #[cfg(all(target_arch = "x86", not(target_feature = "sse2")))] + let tmp = force_eval!(tmp); let f_n = tmp - TOINT; return (f_n as i32, x64 - f_n * PIO2_1 - f_n * PIO2_1T); } diff --git a/src/math/sin.rs b/src/math/sin.rs index a562aa6..a53843d 100644 --- a/src/math/sin.rs +++ b/src/math/sin.rs @@ -82,6 +82,7 @@ fn test_near_pi() { let x = f64::from_bits(0x400921fb000FD5DD); // 3.141592026217707 let sx = f64::from_bits(0x3ea50d15ced1a4a2); // 6.273720864039205e-7 let result = sin(x); - #[cfg(all(target_arch = "x86", not(target_feature = "sse2")))]force_eval!(result); + #[cfg(all(target_arch = "x86", not(target_feature = "sse2")))] + let result = force_eval!(result); assert_eq!(result, sx); } From 5e68d37130dd0a484b28b27f766c306111e2f5af Mon Sep 17 00:00:00 2001 From: Peter Michael Green Date: Wed, 22 Dec 2021 01:50:25 +0000 Subject: [PATCH 8/8] Apply formatting fixes from CI --- src/math/j1f.rs | 5 ++++- src/math/mod.rs | 4 +--- src/math/sincos.rs | 28 ++++++++++++++-------------- 3 files changed, 19 insertions(+), 18 deletions(-) diff --git a/src/math/j1f.rs b/src/math/j1f.rs index 775ff2b..c39f8ff 100644 --- a/src/math/j1f.rs +++ b/src/math/j1f.rs @@ -371,7 +371,10 @@ mod tests { fn test_y1f_2002() { //allow slightly different result on x87 let res = y1f(2.0000002_f32); - if cfg!(all(target_arch = "x86", not(target_feature = "sse2"))) && (res == -0.10703231_f32) { return }; + if cfg!(all(target_arch = "x86", not(target_feature = "sse2"))) && (res == -0.10703231_f32) + { + return; + } assert_eq!(res, -0.10703229_f32); } } diff --git a/src/math/mod.rs b/src/math/mod.rs index 7f4c8bc..81bfc53 100644 --- a/src/math/mod.rs +++ b/src/math/mod.rs @@ -1,8 +1,6 @@ macro_rules! force_eval { ($e:expr) => { - unsafe { - ::core::ptr::read_volatile(&$e) - } + unsafe { ::core::ptr::read_volatile(&$e) } }; } diff --git a/src/math/sincos.rs b/src/math/sincos.rs index bfc4561..4ab5884 100644 --- a/src/math/sincos.rs +++ b/src/math/sincos.rs @@ -69,21 +69,21 @@ mod tests { fn with_pi() { let (s, c) = sincos(core::f64::consts::PI); assert!( - (s - 0.0).abs() < TOLERANCE, - "|{} - {}| = {} >= {}", - s, - 0.0, - (s - 0.0).abs(), - TOLERANCE - ); + (s - 0.0).abs() < TOLERANCE, + "|{} - {}| = {} >= {}", + s, + 0.0, + (s - 0.0).abs(), + TOLERANCE + ); assert!( - (c + 1.0).abs() < TOLERANCE, - "|{} + {}| = {} >= {}", - c, - 1.0, - (s + 1.0).abs(), - TOLERANCE - ); + (c + 1.0).abs() < TOLERANCE, + "|{} + {}| = {} >= {}", + c, + 1.0, + (s + 1.0).abs(), + TOLERANCE + ); } #[test]