From 1f058a02c07492691ec1a69dd741cf5a0da18d8a Mon Sep 17 00:00:00 2001 From: Afonso Bordado Date: Fri, 29 Jul 2022 15:09:37 +0100 Subject: [PATCH] cranelift: Add MinGW `fma` regression tests (#4517) * cranelift: Add MinGW `fma` regression tests * cranelift: Fix FMA in interpreter * cranelift: Add separate `fma` test suite for the interpreter The interpreter can run `fma.clif` on most platforms, however on `x86_64-pc-windows-gnu` we use libm which has issues with some inputs. We should delete `fma-interpreter.clif` and enable the interpreter on the main `fma.clif` file once those are fixed. --- Cargo.lock | 1 + cranelift/codegen/src/ir/immediates.rs | 12 --------- .../filetests/runtests/fma-interpreter.clif | 25 ++++++++++++++++++ .../filetests/filetests/runtests/fma.clif | 18 ++++++++++++- cranelift/interpreter/Cargo.toml | 3 +++ cranelift/interpreter/src/value.rs | 26 +++++++++++++++++-- 6 files changed, 70 insertions(+), 15 deletions(-) create mode 100644 cranelift/filetests/filetests/runtests/fma-interpreter.clif diff --git a/Cargo.lock b/Cargo.lock index f3ef1c75e5..b08c039d89 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -595,6 +595,7 @@ dependencies = [ "cranelift-entity", "cranelift-frontend", "cranelift-reader", + "libm", "log", "smallvec", "thiserror", diff --git a/cranelift/codegen/src/ir/immediates.rs b/cranelift/codegen/src/ir/immediates.rs index 6355cbff9c..ae7c5f20fa 100644 --- a/cranelift/codegen/src/ir/immediates.rs +++ b/cranelift/codegen/src/ir/immediates.rs @@ -770,12 +770,6 @@ impl Ieee32 { f32::from_bits(self.0) } - /// Fused multiply-add. Computes (self * a) + b with only one rounding error, yielding a - /// more accurate result than an unfused multiply-add. - pub fn mul_add(&self, a: Self, b: Self) -> Self { - Self::with_float(self.as_f32().mul_add(a.as_f32(), b.as_f32())) - } - /// Returns the square root of self. pub fn sqrt(self) -> Self { Self::with_float(self.as_f32().sqrt()) @@ -962,12 +956,6 @@ impl Ieee64 { f64::from_bits(self.0) } - /// Fused multiply-add. Computes (self * a) + b with only one rounding error, yielding a - /// more accurate result than an unfused multiply-add. - pub fn mul_add(&self, a: Self, b: Self) -> Self { - Self::with_float(self.as_f64().mul_add(a.as_f64(), b.as_f64())) - } - /// Returns the square root of self. pub fn sqrt(self) -> Self { Self::with_float(self.as_f64().sqrt()) diff --git a/cranelift/filetests/filetests/runtests/fma-interpreter.clif b/cranelift/filetests/filetests/runtests/fma-interpreter.clif new file mode 100644 index 0000000000..90e3d566e8 --- /dev/null +++ b/cranelift/filetests/filetests/runtests/fma-interpreter.clif @@ -0,0 +1,25 @@ +test interpret + +; The interpreter can run `fma.clif` on most platforms, however on `x86_64-pc-windows-gnu` we +; use libm which has issues with some inputs. We should delete this file and enable the interpreter +; on the main `fma.clif` file once those are fixed. + +; See: https://github.com/bytecodealliance/wasmtime/pull/4517 +; See: https://github.com/rust-lang/libm/issues/263 + +function %fma_f32(f32, f32, f32) -> f32 { +block0(v0: f32, v1: f32, v2: f32): + v3 = fma v0, v1, v2 + return v3 +} +; run: %fma_f32(0x9.0, 0x9.0, 0x9.0) == 0x1.680000p6 +; run: %fma_f32(0x83.0, 0x2.68091p6, 0x9.88721p1) == 0x1.3b88e6p14 + + +function %fma_f64(f64, f64, f64) -> f64 { +block0(v0: f64, v1: f64, v2: f64): + v3 = fma v0, v1, v2 + return v3 +} +; run: %fma_f64(0x9.0, 0x9.0, 0x9.0) == 0x1.680000p6 +; run: %fma_f64(0x1.3b88ea148dd4ap14, 0x2.680916809121p6, 0x9.887218721837p1) == 0x1.7ba6ebee17417p21 diff --git a/cranelift/filetests/filetests/runtests/fma.clif b/cranelift/filetests/filetests/runtests/fma.clif index e9429f4b51..dfe7a95038 100644 --- a/cranelift/filetests/filetests/runtests/fma.clif +++ b/cranelift/filetests/filetests/runtests/fma.clif @@ -1,4 +1,3 @@ -test interpret test run target aarch64 target s390x @@ -38,6 +37,15 @@ block0(v0: f32, v1: f32, v2: f32): ; run: %fma_f32(0x0.000002p-126, 0x0.000002p-126, 0x0.0) == 0x0.0 ; run: %fma_f32(0x0.0, 0x0.0, 0x0.000002p-126) == 0x0.000002p-126 +; Regression tests for x86_64-pc-windows-gnu +; See: https://github.com/bytecodealliance/wasmtime/issues/4512 +; run: %fma_f32(0x1.0p100, 0x1.0p100, -Inf) == -Inf +; run: %fma_f32(0x1.fffffep23, 0x1.000004p28, 0x1.fcp5) == 0x1.000002p52 +; run: %fma_f32(0x1.84ae3p125, 0x1.6p-141, 0x1.0p-149) == 0x1.0b37c2p-15 +; run: %fma_f32(0x1.00001p50, 0x1.1p50, 0x1.0p-149) == 0x1.100012p100 +; run: %fma_f32(0x1.000002p50, 0x1.8p50, -0x1.0p-149) == 0x1.800002p100 +; run: %fma_f32(0x1.83bd78p4, -0x1.cp118, -0x1.344108p-2) == -0x1.5345cap123 + ;; The IEEE754 Standard does not make a lot of guarantees about what @@ -98,6 +106,14 @@ block0(v0: f64, v1: f64, v2: f64): ; run: %fma_f64(0x0.0, 0x0.0, 0x0.0000000000001p-1022) == 0x0.0000000000001p-1022 +; Regression tests for x86_64-pc-windows-gnu +; See: https://github.com/bytecodealliance/wasmtime/issues/4512 +; run: %fma_f64(0x1.0p1000, 0x1.0p1000, -Inf) == -Inf +; run: %fma_f64(-0x1.4f8ac19291ffap1023, 0x1.39c33c8d39b7p-1025, 0x1.ee11f685e2e12p-1) == 0x1.2071b0283f156p-1 +; run: %fma_f64(0x1.0000000000008p500, 0x1.1p500, 0x1.0p-1074) == 0x1.1000000000009p1000 +; run: %fma_f64(0x1.0000000000001p500, 0x1.8p500, -0x1.0p-1074) == 0x1.8000000000001p1000 +; run: %fma_f64(0x0.ffffffep513, 0x1.0000002p511, -0x1.0p-1074) == 0x1.fffffffffffffp1023 + ;; The IEEE754 Standard does not make a lot of guarantees about what ;; comes out of NaN producing operations, we just check if its a NaN function %fma_is_nan_f64(f64, f64, f64) -> i32 { diff --git a/cranelift/interpreter/Cargo.toml b/cranelift/interpreter/Cargo.toml index 4003495cb0..26b7733be4 100644 --- a/cranelift/interpreter/Cargo.toml +++ b/cranelift/interpreter/Cargo.toml @@ -17,6 +17,9 @@ log = { version = "0.4.8", default-features = false } smallvec = "1.6.1" thiserror = "1.0.15" +[target.x86_64-pc-windows-gnu.dependencies] +libm = "0.2" + [dev-dependencies] cranelift-frontend = { path = "../frontend", version = "0.87.0" } cranelift-reader = { path = "../reader", version = "0.87.0" } diff --git a/cranelift/interpreter/src/value.rs b/cranelift/interpreter/src/value.rs index 388309fda3..94d4a11bc9 100644 --- a/cranelift/interpreter/src/value.rs +++ b/cranelift/interpreter/src/value.rs @@ -555,10 +555,32 @@ impl Value for DataValue { fn fma(self, b: Self, c: Self) -> ValueResult { match (self, b, c) { (DataValue::F32(a), DataValue::F32(b), DataValue::F32(c)) => { - Ok(DataValue::F32(a.mul_add(b, c))) + // The `fma` function for `x86_64-pc-windows-gnu` is incorrect. Use `libm`'s instead. + // See: https://github.com/bytecodealliance/wasmtime/issues/4512 + #[cfg(all(target_arch = "x86_64", target_os = "windows", target_env = "gnu"))] + let res = libm::fmaf(a.as_f32(), b.as_f32(), c.as_f32()); + + #[cfg(not(all( + target_arch = "x86_64", + target_os = "windows", + target_env = "gnu" + )))] + let res = a.as_f32().mul_add(b.as_f32(), c.as_f32()); + + Ok(DataValue::F32(res.into())) } (DataValue::F64(a), DataValue::F64(b), DataValue::F64(c)) => { - Ok(DataValue::F64(a.mul_add(b, c))) + #[cfg(all(target_arch = "x86_64", target_os = "windows", target_env = "gnu"))] + let res = libm::fma(a.as_f64(), b.as_f64(), c.as_f64()); + + #[cfg(not(all( + target_arch = "x86_64", + target_os = "windows", + target_env = "gnu" + )))] + let res = a.as_f64().mul_add(b.as_f64(), c.as_f64()); + + Ok(DataValue::F64(res.into())) } (a, _b, _c) => Err(ValueError::InvalidType(ValueTypeClass::Float, a.ty())), }