cranelift: Add MinGW `fma` regression tests (#4517)

* cranelift: Add MinGW `fma` regression tests * cranelift: Fix FMA in interpreter * cranelift: Add separate `fma` test suite for the interpreter The interpreter can run `fma.clif` on most platforms, however on `x86_64-pc-windows-gnu` we use libm which has issues with some inputs. We should delete `fma-interpreter.clif` and enable the interpreter on the main `fma.clif` file once those are fixed.
2 years ago · 1f058a02c0
6 changed files with 70 additions and 15 deletions
--- a/Cargo.lock
+++ b/Cargo.lock
@ -595,6 +595,7 @@ dependencies = [
 "cranelift-entity",
 "cranelift-frontend",
 "cranelift-reader",
+ "libm",
 "log",
 "smallvec",
 "thiserror",
--- a/cranelift/codegen/src/ir/immediates.rs
+++ b/cranelift/codegen/src/ir/immediates.rs
@ -770,12 +770,6 @@ impl Ieee32 {
        f32::from_bits(self.0)
    }

-    /// Fused multiply-add. Computes (self * a) + b with only one rounding error, yielding a
-    /// more accurate result than an unfused multiply-add.
-    pub fn mul_add(&self, a: Self, b: Self) -> Self {
-        Self::with_float(self.as_f32().mul_add(a.as_f32(), b.as_f32()))
-    }
-
    /// Returns the square root of self.
    pub fn sqrt(self) -> Self {
        Self::with_float(self.as_f32().sqrt())
@ -962,12 +956,6 @@ impl Ieee64 {
        f64::from_bits(self.0)
    }

-    /// Fused multiply-add. Computes (self * a) + b with only one rounding error, yielding a
-    /// more accurate result than an unfused multiply-add.
-    pub fn mul_add(&self, a: Self, b: Self) -> Self {
-        Self::with_float(self.as_f64().mul_add(a.as_f64(), b.as_f64()))
-    }
-
    /// Returns the square root of self.
    pub fn sqrt(self) -> Self {
        Self::with_float(self.as_f64().sqrt())
--- a/cranelift/filetests/filetests/runtests/fma-interpreter.clif
+++ b/cranelift/filetests/filetests/runtests/fma-interpreter.clif
@ -0,0 +1,25 @@
+test interpret
+
+; The interpreter can run `fma.clif` on most platforms, however on `x86_64-pc-windows-gnu` we
+; use libm which has issues with some inputs. We should delete this file and enable the interpreter
+; on the main `fma.clif` file once those are fixed.
+
+; See: https://github.com/bytecodealliance/wasmtime/pull/4517
+; See: https://github.com/rust-lang/libm/issues/263
+
+function %fma_f32(f32, f32, f32) -> f32 {
+block0(v0: f32, v1: f32, v2: f32):
+    v3 = fma v0, v1, v2
+    return v3
+}
+; run: %fma_f32(0x9.0, 0x9.0, 0x9.0) == 0x1.680000p6
+; run: %fma_f32(0x83.0, 0x2.68091p6, 0x9.88721p1) == 0x1.3b88e6p14
+
+
+function %fma_f64(f64, f64, f64) -> f64 {
+block0(v0: f64, v1: f64, v2: f64):
+    v3 = fma v0, v1, v2
+    return v3
+}
+; run: %fma_f64(0x9.0, 0x9.0, 0x9.0) == 0x1.680000p6
+; run: %fma_f64(0x1.3b88ea148dd4ap14, 0x2.680916809121p6, 0x9.887218721837p1) == 0x1.7ba6ebee17417p21
--- a/cranelift/filetests/filetests/runtests/fma.clif
+++ b/cranelift/filetests/filetests/runtests/fma.clif
@ -1,4 +1,3 @@
-test interpret
 test run
 target aarch64
 target s390x
@ -38,6 +37,15 @@ block0(v0: f32, v1: f32, v2: f32):
 ; run: %fma_f32(0x0.000002p-126, 0x0.000002p-126, 0x0.0) == 0x0.0
 ; run: %fma_f32(0x0.0, 0x0.0, 0x0.000002p-126) == 0x0.000002p-126

+; Regression tests for x86_64-pc-windows-gnu
+; See: https://github.com/bytecodealliance/wasmtime/issues/4512
+; run: %fma_f32(0x1.0p100, 0x1.0p100, -Inf) == -Inf
+; run: %fma_f32(0x1.fffffep23, 0x1.000004p28, 0x1.fcp5) == 0x1.000002p52
+; run: %fma_f32(0x1.84ae3p125, 0x1.6p-141, 0x1.0p-149) == 0x1.0b37c2p-15
+; run: %fma_f32(0x1.00001p50, 0x1.1p50, 0x1.0p-149) == 0x1.100012p100
+; run: %fma_f32(0x1.000002p50, 0x1.8p50, -0x1.0p-149) == 0x1.800002p100
+; run: %fma_f32(0x1.83bd78p4, -0x1.cp118, -0x1.344108p-2) == -0x1.5345cap123
+


 ;; The IEEE754 Standard does not make a lot of guarantees about what
@ -98,6 +106,14 @@ block0(v0: f64, v1: f64, v2: f64):
 ; run: %fma_f64(0x0.0, 0x0.0, 0x0.0000000000001p-1022) == 0x0.0000000000001p-1022


+; Regression tests for x86_64-pc-windows-gnu
+; See: https://github.com/bytecodealliance/wasmtime/issues/4512
+; run: %fma_f64(0x1.0p1000, 0x1.0p1000, -Inf) == -Inf
+; run: %fma_f64(-0x1.4f8ac19291ffap1023, 0x1.39c33c8d39b7p-1025, 0x1.ee11f685e2e12p-1) == 0x1.2071b0283f156p-1
+; run: %fma_f64(0x1.0000000000008p500, 0x1.1p500, 0x1.0p-1074) == 0x1.1000000000009p1000
+; run: %fma_f64(0x1.0000000000001p500, 0x1.8p500, -0x1.0p-1074) == 0x1.8000000000001p1000
+; run: %fma_f64(0x0.ffffffep513, 0x1.0000002p511, -0x1.0p-1074) == 0x1.fffffffffffffp1023
+
 ;; The IEEE754 Standard does not make a lot of guarantees about what
 ;; comes out of NaN producing operations, we just check if its a NaN
 function %fma_is_nan_f64(f64, f64, f64) -> i32 {
--- a/cranelift/interpreter/Cargo.toml
+++ b/cranelift/interpreter/Cargo.toml
@ -17,6 +17,9 @@ log = { version = "0.4.8", default-features = false }
 smallvec = "1.6.1"
 thiserror = "1.0.15"

+[target.x86_64-pc-windows-gnu.dependencies]
+libm = "0.2"
+
 [dev-dependencies]
 cranelift-frontend = { path = "../frontend", version = "0.87.0" }
 cranelift-reader = { path = "../reader", version = "0.87.0" }
--- a/cranelift/interpreter/src/value.rs
+++ b/cranelift/interpreter/src/value.rs
@ -555,10 +555,32 @@ impl Value for DataValue {
    fn fma(self, b: Self, c: Self) -> ValueResult<Self> {
        match (self, b, c) {
            (DataValue::F32(a), DataValue::F32(b), DataValue::F32(c)) => {
-                Ok(DataValue::F32(a.mul_add(b, c)))
+                // The `fma` function for `x86_64-pc-windows-gnu` is incorrect. Use `libm`'s instead.
+                // See: https://github.com/bytecodealliance/wasmtime/issues/4512
+                #[cfg(all(target_arch = "x86_64", target_os = "windows", target_env = "gnu"))]
+                let res = libm::fmaf(a.as_f32(), b.as_f32(), c.as_f32());
+
+                #[cfg(not(all(
+                    target_arch = "x86_64",
+                    target_os = "windows",
+                    target_env = "gnu"
+                )))]
+                let res = a.as_f32().mul_add(b.as_f32(), c.as_f32());
+
+                Ok(DataValue::F32(res.into()))
            }
            (DataValue::F64(a), DataValue::F64(b), DataValue::F64(c)) => {
-                Ok(DataValue::F64(a.mul_add(b, c)))
+                #[cfg(all(target_arch = "x86_64", target_os = "windows", target_env = "gnu"))]
+                let res = libm::fma(a.as_f64(), b.as_f64(), c.as_f64());
+
+                #[cfg(not(all(
+                    target_arch = "x86_64",
+                    target_os = "windows",
+                    target_env = "gnu"
+                )))]
+                let res = a.as_f64().mul_add(b.as_f64(), c.as_f64());
+
+                Ok(DataValue::F64(res.into()))
            }
            (a, _b, _c) => Err(ValueError::InvalidType(ValueTypeClass::Float, a.ty())),
        }