From 2003ae99a04ea8b1a486dc79445da427d19aa345 Mon Sep 17 00:00:00 2001 From: Afonso Bordado Date: Tue, 5 Jul 2022 17:03:04 +0100 Subject: [PATCH] Implement `fma`/`fabs`/`fneg`/`fcopysign` on the interpreter (#4367) * cranelift: Implement `fma` on interpreter * cranelift: Implement `fabs` on interpreter * cranelift: Fix `fneg` implementation on interpreter `fneg` was implemented as `0 - x` which is not correct according to the standard since that operation makes no guarantees on what the output is when the input is `NaN`. However for `fneg` the output for `NaN` inputs is fully defined. * cranelift: Implement `fcopysign` on interpreter --- cranelift/codegen/src/ir/immediates.rs | 71 ++++++++++- .../filetests/filetests/runtests/fabs.clif | 93 ++++++++++++++ .../filetests/runtests/fcopysign.clif | 107 ++++++++++++++++ .../filetests/filetests/runtests/fma.clif | 119 ++++++++++++++++++ .../filetests/filetests/runtests/fneg.clif | 93 ++++++++++++++ cranelift/interpreter/src/step.rs | 8 +- cranelift/interpreter/src/value.rs | 30 +++++ 7 files changed, 511 insertions(+), 10 deletions(-) create mode 100644 cranelift/filetests/filetests/runtests/fabs.clif create mode 100644 cranelift/filetests/filetests/runtests/fcopysign.clif create mode 100644 cranelift/filetests/filetests/runtests/fma.clif create mode 100644 cranelift/filetests/filetests/runtests/fneg.clif diff --git a/cranelift/codegen/src/ir/immediates.rs b/cranelift/codegen/src/ir/immediates.rs index 704a6a78e9..59e32be9d5 100644 --- a/cranelift/codegen/src/ir/immediates.rs +++ b/cranelift/codegen/src/ir/immediates.rs @@ -12,6 +12,7 @@ use core::str::FromStr; use core::{i32, u32}; #[cfg(feature = "enable-serde")] use serde::{Deserialize, Serialize}; +use std::ops::Neg; /// Convert a type into a vector of bytes; all implementors in this file must use little-endian /// orderings of bytes to match WebAssembly's little-endianness. @@ -761,18 +762,39 @@ impl Ieee32 { /// Check if the value is a NaN. pub fn is_nan(&self) -> bool { - f32::from_bits(self.0).is_nan() + self.as_f32().is_nan() + } + + /// Converts Self to a rust f32 + pub fn as_f32(self) -> f32 { + f32::from_bits(self.0) + } + + /// Fused multiply-add. Computes (self * a) + b with only one rounding error, yielding a + /// more accurate result than an unfused multiply-add. + pub fn mul_add(&self, a: Self, b: Self) -> Self { + Self::with_float(self.as_f32().mul_add(a.as_f32(), b.as_f32())) } /// Returns the square root of self. pub fn sqrt(self) -> Self { - Self::with_float(f32::from_bits(self.0).sqrt()) + Self::with_float(self.as_f32().sqrt()) + } + + /// Computes the absolute value of self. + pub fn abs(self) -> Self { + Self::with_float(self.as_f32().abs()) + } + + /// Returns a number composed of the magnitude of self and the sign of sign. + pub fn copysign(self, sign: Self) -> Self { + Self::with_float(self.as_f32().copysign(sign.as_f32())) } } impl PartialOrd for Ieee32 { fn partial_cmp(&self, other: &Self) -> Option { - f32::from_bits(self.0).partial_cmp(&f32::from_bits(other.0)) + self.as_f32().partial_cmp(&other.as_f32()) } } @@ -806,6 +828,14 @@ impl IntoBytes for Ieee32 { } } +impl Neg for Ieee32 { + type Output = Ieee32; + + fn neg(self) -> Self::Output { + Self::with_float(self.as_f32().neg()) + } +} + impl Ieee64 { /// Create a new `Ieee64` containing the bits of `x`. pub fn with_bits(x: u64) -> Self { @@ -851,18 +881,39 @@ impl Ieee64 { /// Check if the value is a NaN. For [Ieee64], this means checking that the 11 exponent bits are /// all set. pub fn is_nan(&self) -> bool { - f64::from_bits(self.0).is_nan() + self.as_f64().is_nan() + } + + /// Converts Self to a rust f64 + pub fn as_f64(self) -> f64 { + f64::from_bits(self.0) + } + + /// Fused multiply-add. Computes (self * a) + b with only one rounding error, yielding a + /// more accurate result than an unfused multiply-add. + pub fn mul_add(&self, a: Self, b: Self) -> Self { + Self::with_float(self.as_f64().mul_add(a.as_f64(), b.as_f64())) } /// Returns the square root of self. pub fn sqrt(self) -> Self { - Self::with_float(f64::from_bits(self.0).sqrt()) + Self::with_float(self.as_f64().sqrt()) + } + + /// Computes the absolute value of self. + pub fn abs(self) -> Self { + Self::with_float(self.as_f64().abs()) + } + + /// Returns a number composed of the magnitude of self and the sign of sign. + pub fn copysign(self, sign: Self) -> Self { + Self::with_float(self.as_f64().copysign(sign.as_f64())) } } impl PartialOrd for Ieee64 { fn partial_cmp(&self, other: &Self) -> Option { - f64::from_bits(self.0).partial_cmp(&f64::from_bits(other.0)) + self.as_f64().partial_cmp(&other.as_f64()) } } @@ -902,6 +953,14 @@ impl IntoBytes for Ieee64 { } } +impl Neg for Ieee64 { + type Output = Ieee64; + + fn neg(self) -> Self::Output { + Self::with_float(self.as_f64().neg()) + } +} + #[cfg(test)] mod tests { use super::*; diff --git a/cranelift/filetests/filetests/runtests/fabs.clif b/cranelift/filetests/filetests/runtests/fabs.clif new file mode 100644 index 0000000000..4d63273efb --- /dev/null +++ b/cranelift/filetests/filetests/runtests/fabs.clif @@ -0,0 +1,93 @@ +test interpret +test run +target aarch64 +target x86_64 +target s390x + +function %fabs_f32(f32) -> f32 { +block0(v0: f32): + v1 = fabs v0 + return v1 +} +; run: %fabs_f32(0x9.0) == 0x9.0 +; run: %fabs_f32(-0x9.0) == 0x9.0 +; run: %fabs_f32(0x0.0) == 0x0.0 +; run: %fabs_f32(-0x0.0) == 0x0.0 + +; F32 Inf +; run: %fabs_f32(Inf) == Inf +; run: %fabs_f32(-Inf) == +Inf + +; F32 Epsilon / Max / Min Positive +; run: %fabs_f32(0x1.000000p-23) == 0x1.000000p-23 +; run: %fabs_f32(-0x1.000000p-23) == 0x1.000000p-23 +; run: %fabs_f32(0x1.fffffep127) == 0x1.fffffep127 +; run: %fabs_f32(-0x1.fffffep127) == 0x1.fffffep127 +; run: %fabs_f32(0x1.000000p-126) == 0x1.000000p-126 +; run: %fabs_f32(-0x1.000000p-126) == 0x1.000000p-126 + +; F32 Subnormals +; run: %fabs_f32(0x0.800000p-126) == 0x0.800000p-126 +; run: %fabs_f32(-0x0.800000p-126) == 0x0.800000p-126 +; run: %fabs_f32(0x0.000002p-126) == 0x0.000002p-126 +; run: %fabs_f32(-0x0.000002p-126) == 0x0.000002p-126 + +; F32 NaN's +; Unlike with other operations fabs is guaranteed to only affect the sign bit +; run: %fabs_f32(+NaN) == +NaN +; run: %fabs_f32(-NaN) == +NaN +; run: %fabs_f32(+NaN:0x0) == +NaN:0x0 +; run: %fabs_f32(+NaN:0x1) == +NaN:0x1 +; run: %fabs_f32(+NaN:0x300001) == +NaN:0x300001 +; run: %fabs_f32(-NaN:0x0) == +NaN:0x0 +; run: %fabs_f32(-NaN:0x1) == +NaN:0x1 +; run: %fabs_f32(-NaN:0x300001) == +NaN:0x300001 +; run: %fabs_f32(+sNaN:0x1) == +sNaN:0x1 +; run: %fabs_f32(-sNaN:0x1) == +sNaN:0x1 +; run: %fabs_f32(+sNaN:0x200001) == +sNaN:0x200001 +; run: %fabs_f32(-sNaN:0x200001) == +sNaN:0x200001 + + + +function %fabs_f64(f64) -> f64 { +block0(v0: f64): + v1 = fabs v0 + return v1 +} +; run: %fabs_f64(0x9.0) == 0x9.0 +; run: %fabs_f64(-0x9.0) == 0x9.0 +; run: %fabs_f64(0x0.0) == 0x0.0 +; run: %fabs_f64(-0x0.0) == 0x0.0 + +; F64 Inf +; run: %fabs_f64(Inf) == Inf +; run: %fabs_f64(-Inf) == +Inf + +; F64 Epsilon / Max / Min Positive +; run: %fabs_f64(0x1.0000000000000p-52) == 0x1.0000000000000p-52 +; run: %fabs_f64(-0x1.0000000000000p-52) == 0x1.0000000000000p-52 +; run: %fabs_f64(0x1.fffffffffffffp1023) == 0x1.fffffffffffffp1023 +; run: %fabs_f64(-0x1.fffffffffffffp1023) == 0x1.fffffffffffffp1023 +; run: %fabs_f64(0x1.0000000000000p-1022) == 0x1.0000000000000p-1022 +; run: %fabs_f64(-0x1.0000000000000p-1022) == 0x1.0000000000000p-1022 + +; F64 Subnormals +; run: %fabs_f64(0x0.8000000000000p-1022) == 0x0.8000000000000p-1022 +; run: %fabs_f64(-0x0.8000000000000p-1022) == 0x0.8000000000000p-1022 +; run: %fabs_f64(0x0.0000000000001p-1022) == 0x0.0000000000001p-1022 +; run: %fabs_f64(-0x0.0000000000001p-1022) == 0x0.0000000000001p-1022 + +; F64 NaN's +; Unlike with other operations fabs is guaranteed to only affect the sign bit +; run: %fabs_f64(+NaN) == +NaN +; run: %fabs_f64(-NaN) == +NaN +; run: %fabs_f64(+NaN:0x0) == +NaN:0x0 +; run: %fabs_f64(+NaN:0x1) == +NaN:0x1 +; run: %fabs_f64(+NaN:0x4000000000001) == +NaN:0x4000000000001 +; run: %fabs_f64(-NaN:0x0) == +NaN:0x0 +; run: %fabs_f64(-NaN:0x1) == +NaN:0x1 +; run: %fabs_f64(-NaN:0x4000000000001) == +NaN:0x4000000000001 +; run: %fabs_f64(+sNaN:0x1) == +sNaN:0x1 +; run: %fabs_f64(-sNaN:0x1) == +sNaN:0x1 +; run: %fabs_f64(+sNaN:0x4000000000001) == +sNaN:0x4000000000001 +; run: %fabs_f64(-sNaN:0x4000000000001) == +sNaN:0x4000000000001 diff --git a/cranelift/filetests/filetests/runtests/fcopysign.clif b/cranelift/filetests/filetests/runtests/fcopysign.clif new file mode 100644 index 0000000000..2811437836 --- /dev/null +++ b/cranelift/filetests/filetests/runtests/fcopysign.clif @@ -0,0 +1,107 @@ +test interpret +test run +target aarch64 +target x86_64 +target s390x + +function %fcopysign_f32(f32, f32) -> f32 { +block0(v0: f32, v1: f32): + v2 = fcopysign v0, v1 + return v2 +} +; run: %fcopysign_f32(0x9.0, 0x9.0) == 0x9.0 +; run: %fcopysign_f32(-0x9.0, 0x9.0) == 0x9.0 +; run: %fcopysign_f32(0x9.0, -0x9.0) == -0x9.0 +; run: %fcopysign_f32(-0x9.0, -0x9.0) == -0x9.0 +; run: %fcopysign_f32(0x0.0, -0x0.0) == -0x0.0 +; run: %fcopysign_f32(-0x0.0, 0x0.0) == 0x0.0 + +; F32 Inf +; run: %fcopysign_f32(Inf, Inf) == Inf +; run: %fcopysign_f32(-Inf, Inf) == Inf +; run: %fcopysign_f32(Inf, -Inf) == -Inf +; run: %fcopysign_f32(-Inf, -Inf) == -Inf + +; F32 Epsilon / Max / Min Positive +; run: %fcopysign_f32(0x1.000000p-23, -0x0.0) == -0x1.000000p-23 +; run: %fcopysign_f32(-0x1.000000p-23, 0x0.0) == 0x1.000000p-23 +; run: %fcopysign_f32(0x1.fffffep127, -0x0.0) == -0x1.fffffep127 +; run: %fcopysign_f32(-0x1.fffffep127, 0x0.0) == 0x1.fffffep127 +; run: %fcopysign_f32(0x1.000000p-126, -0x0.0) == -0x1.000000p-126 +; run: %fcopysign_f32(-0x1.000000p-126, 0x0.0) == 0x1.000000p-126 + +; F32 Subnormals +; run: %fcopysign_f32(0x0.800000p-126, -0x0.0) == -0x0.800000p-126 +; run: %fcopysign_f32(-0x0.800000p-126, 0x0.0) == 0x0.800000p-126 +; run: %fcopysign_f32(0x0.000002p-126, -0x0.0) == -0x0.000002p-126 +; run: %fcopysign_f32(-0x0.000002p-126, 0x0.0) == 0x0.000002p-126 + +; F32 NaN's +; Unlike with other operations fcopysign is guaranteed to only affect the sign bit +; run: %fcopysign_f32(0x0.0, -NaN) == -0x0.0 +; run: %fcopysign_f32(0x3.0, +sNaN:0x1) == 0x3.0 +; run: %fcopysign_f32(Inf, -NaN) == -Inf +; run: %fcopysign_f32(+NaN, -NaN) == -NaN +; run: %fcopysign_f32(-NaN, +NaN) == +NaN +; run: %fcopysign_f32(+NaN:0x0, -NaN) == -NaN:0x0 +; run: %fcopysign_f32(+NaN:0x1, -NaN) == -NaN:0x1 +; run: %fcopysign_f32(+NaN:0x300001, -NaN) == -NaN:0x300001 +; run: %fcopysign_f32(-NaN:0x0, +NaN) == +NaN:0x0 +; run: %fcopysign_f32(-NaN:0x1, +NaN) == +NaN:0x1 +; run: %fcopysign_f32(-NaN:0x300001, +NaN) == +NaN:0x300001 +; run: %fcopysign_f32(+sNaN:0x1, -NaN) == -sNaN:0x1 +; run: %fcopysign_f32(-sNaN:0x1, +NaN) == +sNaN:0x1 +; run: %fcopysign_f32(+sNaN:0x200001, -NaN) == -sNaN:0x200001 +; run: %fcopysign_f32(-sNaN:0x200001, +NaN) == +sNaN:0x200001 + + + +function %fcopysign_f64(f64, f64) -> f64 { +block0(v0: f64, v1: f64): + v2 = fcopysign v0, v1 + return v2 +} +; run: %fcopysign_f64(0x9.0, 0x9.0) == 0x9.0 +; run: %fcopysign_f64(-0x9.0, 0x9.0) == 0x9.0 +; run: %fcopysign_f64(0x9.0, -0x9.0) == -0x9.0 +; run: %fcopysign_f64(-0x9.0, -0x9.0) == -0x9.0 +; run: %fcopysign_f64(0x0.0, -0x0.0) == -0x0.0 +; run: %fcopysign_f64(-0x0.0, 0x0.0) == 0x0.0 + +; F64 Inf +; run: %fcopysign_f64(Inf, Inf) == Inf +; run: %fcopysign_f64(-Inf, Inf) == Inf +; run: %fcopysign_f64(Inf, -Inf) == -Inf +; run: %fcopysign_f64(-Inf, -Inf) == -Inf + +; F64 Epsilon / Max / Min Positive +; run: %fcopysign_f64(0x1.0000000000000p-52, -0x0.0) == -0x1.0000000000000p-52 +; run: %fcopysign_f64(-0x1.0000000000000p-52, 0x0.0) == 0x1.0000000000000p-52 +; run: %fcopysign_f64(0x1.fffffffffffffp1023, -0x0.0) == -0x1.fffffffffffffp1023 +; run: %fcopysign_f64(-0x1.fffffffffffffp1023, 0x0.0) == 0x1.fffffffffffffp1023 +; run: %fcopysign_f64(0x1.0000000000000p-1022, -0x0.0) == -0x1.0000000000000p-1022 +; run: %fcopysign_f64(-0x1.0000000000000p-1022, 0x0.0) == 0x1.0000000000000p-1022 + +; F64 Subnormals +; run: %fcopysign_f64(0x0.8000000000000p-1022, -0x0.0) == -0x0.8000000000000p-1022 +; run: %fcopysign_f64(-0x0.8000000000000p-1022, 0x0.0) == 0x0.8000000000000p-1022 +; run: %fcopysign_f64(0x0.0000000000001p-1022, -0x0.0) == -0x0.0000000000001p-1022 +; run: %fcopysign_f64(-0x0.0000000000001p-1022, 0x0.0) == 0x0.0000000000001p-1022 + +; F64 NaN's +; Unlike with other operations fcopysign is guaranteed to only affect the sign bit +; run: %fcopysign_f64(0x0.0, -NaN) == -0x0.0 +; run: %fcopysign_f64(0x3.0, +sNaN:0x1) == 0x3.0 +; run: %fcopysign_f64(Inf, -NaN) == -Inf +; run: %fcopysign_f64(+NaN, -NaN) == -NaN +; run: %fcopysign_f64(-NaN, +NaN) == +NaN +; run: %fcopysign_f64(+NaN:0x0, -NaN) == -NaN:0x0 +; run: %fcopysign_f64(+NaN:0x1, -NaN) == -NaN:0x1 +; run: %fcopysign_f64(+NaN:0x4000000000001, -NaN) == -NaN:0x4000000000001 +; run: %fcopysign_f64(-NaN:0x0, +NaN) == +NaN:0x0 +; run: %fcopysign_f64(-NaN:0x1, +NaN) == +NaN:0x1 +; run: %fcopysign_f64(-NaN:0x4000000000001, +NaN) == +NaN:0x4000000000001 +; run: %fcopysign_f64(+sNaN:0x1, -NaN) == -sNaN:0x1 +; run: %fcopysign_f64(-sNaN:0x1, +NaN) == +sNaN:0x1 +; run: %fcopysign_f64(+sNaN:0x4000000000001, -NaN) == -sNaN:0x4000000000001 +; run: %fcopysign_f64(-sNaN:0x4000000000001, +NaN) == +sNaN:0x4000000000001 diff --git a/cranelift/filetests/filetests/runtests/fma.clif b/cranelift/filetests/filetests/runtests/fma.clif new file mode 100644 index 0000000000..e9429f4b51 --- /dev/null +++ b/cranelift/filetests/filetests/runtests/fma.clif @@ -0,0 +1,119 @@ +test interpret +test run +target aarch64 +target s390x + +function %fma_f32(f32, f32, f32) -> f32 { +block0(v0: f32, v1: f32, v2: f32): + v3 = fma v0, v1, v2 + return v3 +} +; run: %fma_f32(0x9.0, 0x9.0, 0x9.0) == 0x1.680000p6 +; run: %fma_f32(0x83.0, 0x2.68091p6, 0x9.88721p1) == 0x1.3b88e6p14 + +; run: %fma_f32(0x0.0, 0x0.0, 0x0.0) == 0x0.0 +; run: %fma_f32(0x0.0, 0x0.0, -0x0.0) == 0x0.0 +; run: %fma_f32(0x0.0, -0x0.0, 0x0.0) == 0x0.0 +; run: %fma_f32(-0x0.0, 0x0.0, 0x0.0) == 0x0.0 + +; run: %fma_f32(-Inf, -Inf, 0x0.0) == +Inf +; run: %fma_f32(Inf, -Inf, 0x0.0) == -Inf +; run: %fma_f32(-Inf, Inf, 0x0.0) == -Inf +; run: %fma_f32(Inf, -Inf, -Inf) == -Inf +; run: %fma_f32(-Inf, Inf, -Inf) == -Inf + +; F32 Epsilon / Max / Min Positive +; run: %fma_f32(0x1.000000p-23, 0x1.000000p-23, 0x1.000000p-23) == 0x1.000002p-23 +; run: %fma_f32(0x0.0, 0x0.0, 0x1.000000p-23) == 0x1.000000p-23 +; run: %fma_f32(0x1.fffffep127, 0x1.fffffep127, 0x1.fffffep127) == +Inf +; run: %fma_f32(0x0.0, 0x0.0, 0x1.fffffep127) == 0x1.fffffep127 +; run: %fma_f32(0x1.000000p-126, 0x1.000000p-126, 0x1.000000p-126) == 0x1.000000p-126 +; run: %fma_f32(0x0.0, 0x0.0, 0x1.000000p-126) == 0x1.000000p-126 + +; F32 Subnormals +; run: %fma_f32(0x0.800000p-126, 0x0.800000p-126, 0x0.800000p-126) == 0x0.800000p-126 +; run: %fma_f32(0x0.800000p-126, 0x0.800000p-126, 0x0.0) == 0x0.0 +; run: %fma_f32(0x0.0, 0x0.0, 0x0.800000p-126) == 0x0.800000p-126 +; run: %fma_f32(0x0.000002p-126, 0x0.000002p-126, 0x0.000002p-126) == 0x0.000002p-126 +; run: %fma_f32(0x0.000002p-126, 0x0.000002p-126, 0x0.0) == 0x0.0 +; run: %fma_f32(0x0.0, 0x0.0, 0x0.000002p-126) == 0x0.000002p-126 + + + +;; The IEEE754 Standard does not make a lot of guarantees about what +;; comes out of NaN producing operations, we just check if its a NaN +function %fma_is_nan_f32(f32, f32, f32) -> i32 { +block0(v0: f32, v1: f32, v2: f32): + v3 = fma v0, v1, v2 + v4 = fcmp ne v3, v3 + v5 = bint.i32 v4 + return v5 +} +; run: %fma_is_nan_f32(Inf, -Inf, Inf) == 1 +; run: %fma_is_nan_f32(-Inf, Inf, Inf) == 1 +; run: %fma_is_nan_f32(-Inf, -Inf, -Inf) == 1 + +; run: %fma_is_nan_f32(+NaN, 0x0.0, 0x0.0) == 1 +; run: %fma_is_nan_f32(0x0.0, +NaN, 0x0.0) == 1 +; run: %fma_is_nan_f32(0x0.0, 0x0.0, +NaN) == 1 +; run: %fma_is_nan_f32(-NaN, 0x0.0, 0x0.0) == 1 +; run: %fma_is_nan_f32(0x0.0, -NaN, 0x0.0) == 1 +; run: %fma_is_nan_f32(0x0.0, 0x0.0, -NaN) == 1 + + + +function %fma_f64(f64, f64, f64) -> f64 { +block0(v0: f64, v1: f64, v2: f64): + v3 = fma v0, v1, v2 + return v3 +} +; run: %fma_f64(0x9.0, 0x9.0, 0x9.0) == 0x1.680000p6 +; run: %fma_f64(0x1.3b88ea148dd4ap14, 0x2.680916809121p6, 0x9.887218721837p1) == 0x1.7ba6ebee17417p21 + +; run: %fma_f64(0x0.0, 0x0.0, 0x0.0) == 0x0.0 +; run: %fma_f64(0x0.0, 0x0.0, -0x0.0) == 0x0.0 +; run: %fma_f64(0x0.0, -0x0.0, 0x0.0) == 0x0.0 +; run: %fma_f64(-0x0.0, 0x0.0, 0x0.0) == 0x0.0 + +; run: %fma_f64(-Inf, -Inf, 0x0.0) == +Inf +; run: %fma_f64(Inf, -Inf, 0x0.0) == -Inf +; run: %fma_f64(-Inf, Inf, 0x0.0) == -Inf +; run: %fma_f64(Inf, -Inf, -Inf) == -Inf +; run: %fma_f64(-Inf, Inf, -Inf) == -Inf + +; F64 Epsilon / Max / Min Positive +; run: %fma_f64(0x1.0000000000000p-52, 0x1.0000000000000p-52, 0x1.0000000000000p-52) == 0x1.0000000000001p-52 +; run: %fma_f64(0x0.0, 0x0.0, 0x1.0000000000000p-52) == 0x1.0000000000000p-52 +; run: %fma_f64(0x1.fffffffffffffp1023, 0x1.fffffffffffffp1023, 0x1.fffffffffffffp1023) == +Inf +; run: %fma_f64(0x0.0, 0x0.0, 0x1.fffffffffffffp1023) == 0x1.fffffffffffffp1023 +; run: %fma_f64(0x1.0000000000000p-1022, 0x1.0000000000000p-1022, 0x1.0000000000000p-1022) == 0x1.0000000000000p-1022 +; run: %fma_f64(0x0.0, 0x0.0, 0x1.0000000000000p-1022) == 0x1.0000000000000p-1022 + +; F64 Subnormals +; run: %fma_f64(0x0.8000000000000p-1022, 0x0.8000000000000p-1022, 0x0.8000000000000p-1022) == 0x0.8000000000000p-1022 +; run: %fma_f64(0x0.8000000000000p-1022, 0x0.8000000000000p-1022, 0x0.0) == 0x0.0 +; run: %fma_f64(0x0.0, 0x0.0, 0x0.8000000000000p-1022) == 0x0.8000000000000p-1022 +; run: %fma_f64(0x0.0000000000001p-1022, 0x0.0000000000001p-1022, 0x0.0000000000001p-1022) == 0x0.0000000000001p-1022 +; run: %fma_f64(0x0.0000000000001p-1022, 0x0.0000000000001p-1022, 0x0.0) == 0x0.0 +; run: %fma_f64(0x0.0, 0x0.0, 0x0.0000000000001p-1022) == 0x0.0000000000001p-1022 + + +;; The IEEE754 Standard does not make a lot of guarantees about what +;; comes out of NaN producing operations, we just check if its a NaN +function %fma_is_nan_f64(f64, f64, f64) -> i32 { +block0(v0: f64, v1: f64, v2: f64): + v3 = fma v0, v1, v2 + v4 = fcmp ne v3, v3 + v5 = bint.i32 v4 + return v5 +} +; run: %fma_is_nan_f64(Inf, -Inf, Inf) == 1 +; run: %fma_is_nan_f64(-Inf, Inf, Inf) == 1 +; run: %fma_is_nan_f64(-Inf, -Inf, -Inf) == 1 + +; run: %fma_is_nan_f64(+NaN, 0x0.0, 0x0.0) == 1 +; run: %fma_is_nan_f64(0x0.0, +NaN, 0x0.0) == 1 +; run: %fma_is_nan_f64(0x0.0, 0x0.0, +NaN) == 1 +; run: %fma_is_nan_f64(-NaN, 0x0.0, 0x0.0) == 1 +; run: %fma_is_nan_f64(0x0.0, -NaN, 0x0.0) == 1 +; run: %fma_is_nan_f64(0x0.0, 0x0.0, -NaN) == 1 diff --git a/cranelift/filetests/filetests/runtests/fneg.clif b/cranelift/filetests/filetests/runtests/fneg.clif new file mode 100644 index 0000000000..bfe63f924b --- /dev/null +++ b/cranelift/filetests/filetests/runtests/fneg.clif @@ -0,0 +1,93 @@ +test interpret +test run +target aarch64 +target x86_64 +target s390x + +function %fneg_f32(f32) -> f32 { +block0(v0: f32): + v1 = fneg v0 + return v1 +} +; run: %fneg_f32(0x9.0) == -0x9.0 +; run: %fneg_f32(-0x9.0) == 0x9.0 +; run: %fneg_f32(0x0.0) == -0x0.0 +; run: %fneg_f32(-0x0.0) == 0x0.0 + +; F32 Inf +; run: %fneg_f32(Inf) == -Inf +; run: %fneg_f32(-Inf) == +Inf + +; F32 Epsilon / Max / Min Positive +; run: %fneg_f32(0x1.000000p-23) == -0x1.000000p-23 +; run: %fneg_f32(-0x1.000000p-23) == 0x1.000000p-23 +; run: %fneg_f32(0x1.fffffep127) == -0x1.fffffep127 +; run: %fneg_f32(-0x1.fffffep127) == 0x1.fffffep127 +; run: %fneg_f32(0x1.000000p-126) == -0x1.000000p-126 +; run: %fneg_f32(-0x1.000000p-126) == 0x1.000000p-126 + +; F32 Subnormals +; run: %fneg_f32(0x0.800000p-126) == -0x0.800000p-126 +; run: %fneg_f32(-0x0.800000p-126) == 0x0.800000p-126 +; run: %fneg_f32(0x0.000002p-126) == -0x0.000002p-126 +; run: %fneg_f32(-0x0.000002p-126) == 0x0.000002p-126 + +; F32 NaN's +; Unlike with other operations fneg is guaranteed to only affect the sign bit +; run: %fneg_f32(+NaN) == -NaN +; run: %fneg_f32(-NaN) == +NaN +; run: %fneg_f32(+NaN:0x0) == -NaN:0x0 +; run: %fneg_f32(+NaN:0x1) == -NaN:0x1 +; run: %fneg_f32(+NaN:0x300001) == -NaN:0x300001 +; run: %fneg_f32(-NaN:0x0) == +NaN:0x0 +; run: %fneg_f32(-NaN:0x1) == +NaN:0x1 +; run: %fneg_f32(-NaN:0x300001) == +NaN:0x300001 +; run: %fneg_f32(+sNaN:0x1) == -sNaN:0x1 +; run: %fneg_f32(-sNaN:0x1) == +sNaN:0x1 +; run: %fneg_f32(+sNaN:0x200001) == -sNaN:0x200001 +; run: %fneg_f32(-sNaN:0x200001) == +sNaN:0x200001 + + + +function %fneg_f64(f64) -> f64 { +block0(v0: f64): + v1 = fneg v0 + return v1 +} +; run: %fneg_f64(0x9.0) == -0x9.0 +; run: %fneg_f64(-0x9.0) == 0x9.0 +; run: %fneg_f64(0x0.0) == -0x0.0 +; run: %fneg_f64(-0x0.0) == 0x0.0 + +; F64 Inf +; run: %fneg_f64(+Inf) == -Inf +; run: %fneg_f64(-Inf) == +Inf + +; F64 Epsilon / Max / Min Positive +; run: %fneg_f64(0x1.0000000000000p-52) == -0x1.0000000000000p-52 +; run: %fneg_f64(-0x1.0000000000000p-52) == 0x1.0000000000000p-52 +; run: %fneg_f64(0x1.fffffffffffffp1023) == -0x1.fffffffffffffp1023 +; run: %fneg_f64(-0x1.fffffffffffffp1023) == 0x1.fffffffffffffp1023 +; run: %fneg_f64(0x1.0000000000000p-1022) == -0x1.0000000000000p-1022 +; run: %fneg_f64(-0x1.0000000000000p-1022) == 0x1.0000000000000p-1022 + +; F64 Subnormals +; run: %fneg_f64(0x0.8000000000000p-1022) == -0x0.8000000000000p-1022 +; run: %fneg_f64(-0x0.8000000000000p-1022) == 0x0.8000000000000p-1022 +; run: %fneg_f64(0x0.0000000000001p-1022) == -0x0.0000000000001p-1022 +; run: %fneg_f64(-0x0.0000000000001p-1022) == 0x0.0000000000001p-1022 + +; F64 NaN's +; Unlike with other operations fneg is guaranteed to only affect the sign bit +; run: %fneg_f64(+NaN) == -NaN +; run: %fneg_f64(-NaN) == +NaN +; run: %fneg_f64(+NaN:0x0) == -NaN:0x0 +; run: %fneg_f64(+NaN:0x1) == -NaN:0x1 +; run: %fneg_f64(+NaN:0x4000000000001) == -NaN:0x4000000000001 +; run: %fneg_f64(-NaN:0x0) == +NaN:0x0 +; run: %fneg_f64(-NaN:0x1) == +NaN:0x1 +; run: %fneg_f64(-NaN:0x4000000000001) == +NaN:0x4000000000001 +; run: %fneg_f64(+sNaN:0x1) == -sNaN:0x1 +; run: %fneg_f64(-sNaN:0x1) == +sNaN:0x1 +; run: %fneg_f64(+sNaN:0x4000000000001) == -sNaN:0x4000000000001 +; run: %fneg_f64(-sNaN:0x4000000000001) == +sNaN:0x4000000000001 diff --git a/cranelift/interpreter/src/step.rs b/cranelift/interpreter/src/step.rs index 91bbcf46d7..5ef0f0d549 100644 --- a/cranelift/interpreter/src/step.rs +++ b/cranelift/interpreter/src/step.rs @@ -679,10 +679,10 @@ where Opcode::Fmul => binary(Value::mul, arg(0)?, arg(1)?)?, Opcode::Fdiv => binary(Value::div, arg(0)?, arg(1)?)?, Opcode::Sqrt => assign(Value::sqrt(arg(0)?)?), - Opcode::Fma => unimplemented!("Fma"), - Opcode::Fneg => binary(Value::sub, Value::float(0, ctrl_ty)?, arg(0)?)?, - Opcode::Fabs => unimplemented!("Fabs"), - Opcode::Fcopysign => unimplemented!("Fcopysign"), + Opcode::Fma => assign(Value::fma(arg(0)?, arg(1)?, arg(2)?)?), + Opcode::Fneg => assign(Value::neg(arg(0)?)?), + Opcode::Fabs => assign(Value::abs(arg(0)?)?), + Opcode::Fcopysign => binary(Value::copysign, arg(0)?, arg(1)?)?, Opcode::Fmin => choose( Value::is_nan(&arg(0)?)? || Value::lt(&arg(0)?, &arg(1)?)?, arg(0)?, diff --git a/cranelift/interpreter/src/value.rs b/cranelift/interpreter/src/value.rs index a38d2aca42..9fe7baf531 100644 --- a/cranelift/interpreter/src/value.rs +++ b/cranelift/interpreter/src/value.rs @@ -51,6 +51,12 @@ pub trait Value: Clone + From { fn div(self, other: Self) -> ValueResult; fn rem(self, other: Self) -> ValueResult; fn sqrt(self) -> ValueResult; + fn fma(self, a: Self, b: Self) -> ValueResult; + fn abs(self) -> ValueResult; + + // Float operations + fn neg(self) -> ValueResult; + fn copysign(self, sign: Self) -> ValueResult; // Saturating arithmetic. fn add_sat(self, other: Self) -> ValueResult; @@ -468,6 +474,30 @@ impl Value for DataValue { unary_match!(sqrt(&self); [F32, F64]; [Ieee32, Ieee64]) } + fn fma(self, b: Self, c: Self) -> ValueResult { + match (self, b, c) { + (DataValue::F32(a), DataValue::F32(b), DataValue::F32(c)) => { + Ok(DataValue::F32(a.mul_add(b, c))) + } + (DataValue::F64(a), DataValue::F64(b), DataValue::F64(c)) => { + Ok(DataValue::F64(a.mul_add(b, c))) + } + (a, _b, _c) => Err(ValueError::InvalidType(ValueTypeClass::Float, a.ty())), + } + } + + fn abs(self) -> ValueResult { + unary_match!(abs(&self); [F32, F64]) + } + + fn neg(self) -> ValueResult { + unary_match!(neg(&self); [F32, F64]) + } + + fn copysign(self, sign: Self) -> ValueResult { + binary_match!(copysign(&self, &sign); [F32, F64]) + } + fn add_sat(self, other: Self) -> ValueResult { binary_match!(saturating_add(self, &other); [I8, I16, I32, I64, I128, U8, U16, U32, U64, U128]) }