Browse Source

cranelift: Add MinGW `fma` regression tests (#4517)

* cranelift: Add MinGW `fma` regression tests

* cranelift: Fix FMA in interpreter

* cranelift: Add separate `fma` test suite for the interpreter

The interpreter can run `fma.clif` on most platforms, however on
`x86_64-pc-windows-gnu` we use libm which has issues with some inputs.
We should delete `fma-interpreter.clif` and enable the interpreter on
the main `fma.clif` file once those are fixed.
pull/4553/head
Afonso Bordado 2 years ago
committed by GitHub
parent
commit
1f058a02c0
No known key found for this signature in database GPG Key ID: 4AEE18F83AFDEB23
  1. 1
      Cargo.lock
  2. 12
      cranelift/codegen/src/ir/immediates.rs
  3. 25
      cranelift/filetests/filetests/runtests/fma-interpreter.clif
  4. 18
      cranelift/filetests/filetests/runtests/fma.clif
  5. 3
      cranelift/interpreter/Cargo.toml
  6. 26
      cranelift/interpreter/src/value.rs

1
Cargo.lock

@ -595,6 +595,7 @@ dependencies = [
"cranelift-entity",
"cranelift-frontend",
"cranelift-reader",
"libm",
"log",
"smallvec",
"thiserror",

12
cranelift/codegen/src/ir/immediates.rs

@ -770,12 +770,6 @@ impl Ieee32 {
f32::from_bits(self.0)
}
/// Fused multiply-add. Computes (self * a) + b with only one rounding error, yielding a
/// more accurate result than an unfused multiply-add.
pub fn mul_add(&self, a: Self, b: Self) -> Self {
Self::with_float(self.as_f32().mul_add(a.as_f32(), b.as_f32()))
}
/// Returns the square root of self.
pub fn sqrt(self) -> Self {
Self::with_float(self.as_f32().sqrt())
@ -962,12 +956,6 @@ impl Ieee64 {
f64::from_bits(self.0)
}
/// Fused multiply-add. Computes (self * a) + b with only one rounding error, yielding a
/// more accurate result than an unfused multiply-add.
pub fn mul_add(&self, a: Self, b: Self) -> Self {
Self::with_float(self.as_f64().mul_add(a.as_f64(), b.as_f64()))
}
/// Returns the square root of self.
pub fn sqrt(self) -> Self {
Self::with_float(self.as_f64().sqrt())

25
cranelift/filetests/filetests/runtests/fma-interpreter.clif

@ -0,0 +1,25 @@
test interpret
; The interpreter can run `fma.clif` on most platforms, however on `x86_64-pc-windows-gnu` we
; use libm which has issues with some inputs. We should delete this file and enable the interpreter
; on the main `fma.clif` file once those are fixed.
; See: https://github.com/bytecodealliance/wasmtime/pull/4517
; See: https://github.com/rust-lang/libm/issues/263
function %fma_f32(f32, f32, f32) -> f32 {
block0(v0: f32, v1: f32, v2: f32):
v3 = fma v0, v1, v2
return v3
}
; run: %fma_f32(0x9.0, 0x9.0, 0x9.0) == 0x1.680000p6
; run: %fma_f32(0x83.0, 0x2.68091p6, 0x9.88721p1) == 0x1.3b88e6p14
function %fma_f64(f64, f64, f64) -> f64 {
block0(v0: f64, v1: f64, v2: f64):
v3 = fma v0, v1, v2
return v3
}
; run: %fma_f64(0x9.0, 0x9.0, 0x9.0) == 0x1.680000p6
; run: %fma_f64(0x1.3b88ea148dd4ap14, 0x2.680916809121p6, 0x9.887218721837p1) == 0x1.7ba6ebee17417p21

18
cranelift/filetests/filetests/runtests/fma.clif

@ -1,4 +1,3 @@
test interpret
test run
target aarch64
target s390x
@ -38,6 +37,15 @@ block0(v0: f32, v1: f32, v2: f32):
; run: %fma_f32(0x0.000002p-126, 0x0.000002p-126, 0x0.0) == 0x0.0
; run: %fma_f32(0x0.0, 0x0.0, 0x0.000002p-126) == 0x0.000002p-126
; Regression tests for x86_64-pc-windows-gnu
; See: https://github.com/bytecodealliance/wasmtime/issues/4512
; run: %fma_f32(0x1.0p100, 0x1.0p100, -Inf) == -Inf
; run: %fma_f32(0x1.fffffep23, 0x1.000004p28, 0x1.fcp5) == 0x1.000002p52
; run: %fma_f32(0x1.84ae3p125, 0x1.6p-141, 0x1.0p-149) == 0x1.0b37c2p-15
; run: %fma_f32(0x1.00001p50, 0x1.1p50, 0x1.0p-149) == 0x1.100012p100
; run: %fma_f32(0x1.000002p50, 0x1.8p50, -0x1.0p-149) == 0x1.800002p100
; run: %fma_f32(0x1.83bd78p4, -0x1.cp118, -0x1.344108p-2) == -0x1.5345cap123
;; The IEEE754 Standard does not make a lot of guarantees about what
@ -98,6 +106,14 @@ block0(v0: f64, v1: f64, v2: f64):
; run: %fma_f64(0x0.0, 0x0.0, 0x0.0000000000001p-1022) == 0x0.0000000000001p-1022
; Regression tests for x86_64-pc-windows-gnu
; See: https://github.com/bytecodealliance/wasmtime/issues/4512
; run: %fma_f64(0x1.0p1000, 0x1.0p1000, -Inf) == -Inf
; run: %fma_f64(-0x1.4f8ac19291ffap1023, 0x1.39c33c8d39b7p-1025, 0x1.ee11f685e2e12p-1) == 0x1.2071b0283f156p-1
; run: %fma_f64(0x1.0000000000008p500, 0x1.1p500, 0x1.0p-1074) == 0x1.1000000000009p1000
; run: %fma_f64(0x1.0000000000001p500, 0x1.8p500, -0x1.0p-1074) == 0x1.8000000000001p1000
; run: %fma_f64(0x0.ffffffep513, 0x1.0000002p511, -0x1.0p-1074) == 0x1.fffffffffffffp1023
;; The IEEE754 Standard does not make a lot of guarantees about what
;; comes out of NaN producing operations, we just check if its a NaN
function %fma_is_nan_f64(f64, f64, f64) -> i32 {

3
cranelift/interpreter/Cargo.toml

@ -17,6 +17,9 @@ log = { version = "0.4.8", default-features = false }
smallvec = "1.6.1"
thiserror = "1.0.15"
[target.x86_64-pc-windows-gnu.dependencies]
libm = "0.2"
[dev-dependencies]
cranelift-frontend = { path = "../frontend", version = "0.87.0" }
cranelift-reader = { path = "../reader", version = "0.87.0" }

26
cranelift/interpreter/src/value.rs

@ -555,10 +555,32 @@ impl Value for DataValue {
fn fma(self, b: Self, c: Self) -> ValueResult<Self> {
match (self, b, c) {
(DataValue::F32(a), DataValue::F32(b), DataValue::F32(c)) => {
Ok(DataValue::F32(a.mul_add(b, c)))
// The `fma` function for `x86_64-pc-windows-gnu` is incorrect. Use `libm`'s instead.
// See: https://github.com/bytecodealliance/wasmtime/issues/4512
#[cfg(all(target_arch = "x86_64", target_os = "windows", target_env = "gnu"))]
let res = libm::fmaf(a.as_f32(), b.as_f32(), c.as_f32());
#[cfg(not(all(
target_arch = "x86_64",
target_os = "windows",
target_env = "gnu"
)))]
let res = a.as_f32().mul_add(b.as_f32(), c.as_f32());
Ok(DataValue::F32(res.into()))
}
(DataValue::F64(a), DataValue::F64(b), DataValue::F64(c)) => {
Ok(DataValue::F64(a.mul_add(b, c)))
#[cfg(all(target_arch = "x86_64", target_os = "windows", target_env = "gnu"))]
let res = libm::fma(a.as_f64(), b.as_f64(), c.as_f64());
#[cfg(not(all(
target_arch = "x86_64",
target_os = "windows",
target_env = "gnu"
)))]
let res = a.as_f64().mul_add(b.as_f64(), c.as_f64());
Ok(DataValue::F64(res.into()))
}
(a, _b, _c) => Err(ValueError::InvalidType(ValueTypeClass::Float, a.ty())),
}

Loading…
Cancel
Save