From 2003ae99a04ea8b1a486dc79445da427d19aa345 Mon Sep 17 00:00:00 2001
From: Afonso Bordado <afonso360@users.noreply.github.com>
Date: Tue, 5 Jul 2022 17:03:04 +0100
Subject: [PATCH] Implement `fma`/`fabs`/`fneg`/`fcopysign` on the interpreter
 (#4367)

* cranelift: Implement `fma` on interpreter

* cranelift: Implement `fabs` on interpreter

* cranelift: Fix `fneg` implementation on interpreter

`fneg` was implemented as `0 - x` which is not correct according to the
standard since that operation makes no guarantees on what the output
is when the input is `NaN`. However for `fneg` the output for `NaN`
inputs is fully defined.

* cranelift: Implement `fcopysign` on interpreter
---
 cranelift/codegen/src/ir/immediates.rs        |  71 ++++++++++-
 .../filetests/filetests/runtests/fabs.clif    |  93 ++++++++++++++
 .../filetests/runtests/fcopysign.clif         | 107 ++++++++++++++++
 .../filetests/filetests/runtests/fma.clif     | 119 ++++++++++++++++++
 .../filetests/filetests/runtests/fneg.clif    |  93 ++++++++++++++
 cranelift/interpreter/src/step.rs             |   8 +-
 cranelift/interpreter/src/value.rs            |  30 +++++
 7 files changed, 511 insertions(+), 10 deletions(-)
 create mode 100644 cranelift/filetests/filetests/runtests/fabs.clif
 create mode 100644 cranelift/filetests/filetests/runtests/fcopysign.clif
 create mode 100644 cranelift/filetests/filetests/runtests/fma.clif
 create mode 100644 cranelift/filetests/filetests/runtests/fneg.clif
diff --git a/cranelift/codegen/src/ir/immediates.rs b/cranelift/codegen/src/ir/immediates.rs
index 704a6a78e9..59e32be9d5 100644
--- a/cranelift/codegen/src/ir/immediates.rs
+++ b/cranelift/codegen/src/ir/immediates.rs
@@ -12,6 +12,7 @@ use core::str::FromStr;
 use core::{i32, u32};
 #[cfg(feature = "enable-serde")]
 use serde::{Deserialize, Serialize};
+use std::ops::Neg;
 
 /// Convert a type into a vector of bytes; all implementors in this file must use little-endian
 /// orderings of bytes to match WebAssembly's little-endianness.
@@ -761,18 +762,39 @@ impl Ieee32 {
 
     /// Check if the value is a NaN.
     pub fn is_nan(&self) -> bool {
-        f32::from_bits(self.0).is_nan()
+        self.as_f32().is_nan()
+    }
+
+    /// Converts Self to a rust f32
+    pub fn as_f32(self) -> f32 {
+        f32::from_bits(self.0)
+    }
+
+    /// Fused multiply-add. Computes (self * a) + b with only one rounding error, yielding a
+    /// more accurate result than an unfused multiply-add.
+    pub fn mul_add(&self, a: Self, b: Self) -> Self {
+        Self::with_float(self.as_f32().mul_add(a.as_f32(), b.as_f32()))
     }
 
     /// Returns the square root of self.
     pub fn sqrt(self) -> Self {
-        Self::with_float(f32::from_bits(self.0).sqrt())
+        Self::with_float(self.as_f32().sqrt())
+    }
+
+    /// Computes the absolute value of self.
+    pub fn abs(self) -> Self {
+        Self::with_float(self.as_f32().abs())
+    }
+
+    /// Returns a number composed of the magnitude of self and the sign of sign.
+    pub fn copysign(self, sign: Self) -> Self {
+        Self::with_float(self.as_f32().copysign(sign.as_f32()))
     }
 }
 
 impl PartialOrd for Ieee32 {
     fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
-        f32::from_bits(self.0).partial_cmp(&f32::from_bits(other.0))
+        self.as_f32().partial_cmp(&other.as_f32())
     }
 }
 
@@ -806,6 +828,14 @@ impl IntoBytes for Ieee32 {
     }
 }
 
+impl Neg for Ieee32 {
+    type Output = Ieee32;
+
+    fn neg(self) -> Self::Output {
+        Self::with_float(self.as_f32().neg())
+    }
+}
+
 impl Ieee64 {
     /// Create a new `Ieee64` containing the bits of `x`.
     pub fn with_bits(x: u64) -> Self {
@@ -851,18 +881,39 @@ impl Ieee64 {
     /// Check if the value is a NaN. For [Ieee64], this means checking that the 11 exponent bits are
     /// all set.
     pub fn is_nan(&self) -> bool {
-        f64::from_bits(self.0).is_nan()
+        self.as_f64().is_nan()
+    }
+
+    /// Converts Self to a rust f64
+    pub fn as_f64(self) -> f64 {
+        f64::from_bits(self.0)
+    }
+
+    /// Fused multiply-add. Computes (self * a) + b with only one rounding error, yielding a
+    /// more accurate result than an unfused multiply-add.
+    pub fn mul_add(&self, a: Self, b: Self) -> Self {
+        Self::with_float(self.as_f64().mul_add(a.as_f64(), b.as_f64()))
     }
 
     /// Returns the square root of self.
     pub fn sqrt(self) -> Self {
-        Self::with_float(f64::from_bits(self.0).sqrt())
+        Self::with_float(self.as_f64().sqrt())
+    }
+
+    /// Computes the absolute value of self.
+    pub fn abs(self) -> Self {
+        Self::with_float(self.as_f64().abs())
+    }
+
+    /// Returns a number composed of the magnitude of self and the sign of sign.
+    pub fn copysign(self, sign: Self) -> Self {
+        Self::with_float(self.as_f64().copysign(sign.as_f64()))
     }
 }
 
 impl PartialOrd for Ieee64 {
     fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
-        f64::from_bits(self.0).partial_cmp(&f64::from_bits(other.0))
+        self.as_f64().partial_cmp(&other.as_f64())
     }
 }
 
@@ -902,6 +953,14 @@ impl IntoBytes for Ieee64 {
     }
 }
 
+impl Neg for Ieee64 {
+    type Output = Ieee64;
+
+    fn neg(self) -> Self::Output {
+        Self::with_float(self.as_f64().neg())
+    }
+}
+
 #[cfg(test)]
 mod tests {
     use super::*;
diff --git a/cranelift/filetests/filetests/runtests/fabs.clif b/cranelift/filetests/filetests/runtests/fabs.clif
new file mode 100644
index 0000000000..4d63273efb
--- /dev/null
+++ b/cranelift/filetests/filetests/runtests/fabs.clif
@@ -0,0 +1,93 @@
+test interpret
+test run
+target aarch64
+target x86_64
+target s390x
+
+function %fabs_f32(f32) -> f32 {
+block0(v0: f32):
+    v1 = fabs v0
+    return v1
+}
+; run: %fabs_f32(0x9.0) == 0x9.0
+; run: %fabs_f32(-0x9.0) == 0x9.0
+; run: %fabs_f32(0x0.0) == 0x0.0
+; run: %fabs_f32(-0x0.0) == 0x0.0
+
+; F32 Inf
+; run: %fabs_f32(Inf) == Inf
+; run: %fabs_f32(-Inf) == +Inf
+
+; F32 Epsilon  / Max / Min Positive
+; run: %fabs_f32(0x1.000000p-23) == 0x1.000000p-23
+; run: %fabs_f32(-0x1.000000p-23) == 0x1.000000p-23
+; run: %fabs_f32(0x1.fffffep127) == 0x1.fffffep127
+; run: %fabs_f32(-0x1.fffffep127) == 0x1.fffffep127
+; run: %fabs_f32(0x1.000000p-126) == 0x1.000000p-126
+; run: %fabs_f32(-0x1.000000p-126) == 0x1.000000p-126
+
+; F32 Subnormals
+; run: %fabs_f32(0x0.800000p-126) == 0x0.800000p-126
+; run: %fabs_f32(-0x0.800000p-126) == 0x0.800000p-126
+; run: %fabs_f32(0x0.000002p-126) == 0x0.000002p-126
+; run: %fabs_f32(-0x0.000002p-126) == 0x0.000002p-126
+
+; F32 NaN's
+; Unlike with other operations fabs is guaranteed to only affect the sign bit
+; run: %fabs_f32(+NaN) == +NaN
+; run: %fabs_f32(-NaN) == +NaN
+; run: %fabs_f32(+NaN:0x0) == +NaN:0x0
+; run: %fabs_f32(+NaN:0x1) == +NaN:0x1
+; run: %fabs_f32(+NaN:0x300001) == +NaN:0x300001
+; run: %fabs_f32(-NaN:0x0) == +NaN:0x0
+; run: %fabs_f32(-NaN:0x1) == +NaN:0x1
+; run: %fabs_f32(-NaN:0x300001) == +NaN:0x300001
+; run: %fabs_f32(+sNaN:0x1) == +sNaN:0x1
+; run: %fabs_f32(-sNaN:0x1) == +sNaN:0x1
+; run: %fabs_f32(+sNaN:0x200001) == +sNaN:0x200001
+; run: %fabs_f32(-sNaN:0x200001) == +sNaN:0x200001
+
+
+
+function %fabs_f64(f64) -> f64 {
+block0(v0: f64):
+    v1 = fabs v0
+    return v1
+}
+; run: %fabs_f64(0x9.0) == 0x9.0
+; run: %fabs_f64(-0x9.0) == 0x9.0
+; run: %fabs_f64(0x0.0) == 0x0.0
+; run: %fabs_f64(-0x0.0) == 0x0.0
+
+; F64 Inf
+; run: %fabs_f64(Inf) == Inf
+; run: %fabs_f64(-Inf) == +Inf
+
+; F64 Epsilon / Max / Min Positive
+; run: %fabs_f64(0x1.0000000000000p-52) == 0x1.0000000000000p-52
+; run: %fabs_f64(-0x1.0000000000000p-52) == 0x1.0000000000000p-52
+; run: %fabs_f64(0x1.fffffffffffffp1023) == 0x1.fffffffffffffp1023
+; run: %fabs_f64(-0x1.fffffffffffffp1023) == 0x1.fffffffffffffp1023
+; run: %fabs_f64(0x1.0000000000000p-1022) == 0x1.0000000000000p-1022
+; run: %fabs_f64(-0x1.0000000000000p-1022) == 0x1.0000000000000p-1022
+
+; F64 Subnormals
+; run: %fabs_f64(0x0.8000000000000p-1022) == 0x0.8000000000000p-1022
+; run: %fabs_f64(-0x0.8000000000000p-1022) == 0x0.8000000000000p-1022
+; run: %fabs_f64(0x0.0000000000001p-1022) == 0x0.0000000000001p-1022
+; run: %fabs_f64(-0x0.0000000000001p-1022) == 0x0.0000000000001p-1022
+
+; F64 NaN's
+; Unlike with other operations fabs is guaranteed to only affect the sign bit
+; run: %fabs_f64(+NaN) == +NaN
+; run: %fabs_f64(-NaN) == +NaN
+; run: %fabs_f64(+NaN:0x0) == +NaN:0x0
+; run: %fabs_f64(+NaN:0x1) == +NaN:0x1
+; run: %fabs_f64(+NaN:0x4000000000001) == +NaN:0x4000000000001
+; run: %fabs_f64(-NaN:0x0) == +NaN:0x0
+; run: %fabs_f64(-NaN:0x1) == +NaN:0x1
+; run: %fabs_f64(-NaN:0x4000000000001) == +NaN:0x4000000000001
+; run: %fabs_f64(+sNaN:0x1) == +sNaN:0x1
+; run: %fabs_f64(-sNaN:0x1) == +sNaN:0x1
+; run: %fabs_f64(+sNaN:0x4000000000001) == +sNaN:0x4000000000001
+; run: %fabs_f64(-sNaN:0x4000000000001) == +sNaN:0x4000000000001
diff --git a/cranelift/filetests/filetests/runtests/fcopysign.clif b/cranelift/filetests/filetests/runtests/fcopysign.clif
new file mode 100644
index 0000000000..2811437836
--- /dev/null
+++ b/cranelift/filetests/filetests/runtests/fcopysign.clif
@@ -0,0 +1,107 @@
+test interpret
+test run
+target aarch64
+target x86_64
+target s390x
+
+function %fcopysign_f32(f32, f32) -> f32 {
+block0(v0: f32, v1: f32):
+    v2 = fcopysign v0, v1
+    return v2
+}
+; run: %fcopysign_f32(0x9.0, 0x9.0) == 0x9.0
+; run: %fcopysign_f32(-0x9.0, 0x9.0) == 0x9.0
+; run: %fcopysign_f32(0x9.0, -0x9.0) == -0x9.0
+; run: %fcopysign_f32(-0x9.0, -0x9.0) == -0x9.0
+; run: %fcopysign_f32(0x0.0, -0x0.0) == -0x0.0
+; run: %fcopysign_f32(-0x0.0, 0x0.0) == 0x0.0
+
+; F32 Inf
+; run: %fcopysign_f32(Inf, Inf) == Inf
+; run: %fcopysign_f32(-Inf, Inf) == Inf
+; run: %fcopysign_f32(Inf, -Inf) == -Inf
+; run: %fcopysign_f32(-Inf, -Inf) == -Inf
+
+; F32 Epsilon  / Max / Min Positive
+; run: %fcopysign_f32(0x1.000000p-23, -0x0.0) == -0x1.000000p-23
+; run: %fcopysign_f32(-0x1.000000p-23, 0x0.0) == 0x1.000000p-23
+; run: %fcopysign_f32(0x1.fffffep127, -0x0.0) == -0x1.fffffep127
+; run: %fcopysign_f32(-0x1.fffffep127, 0x0.0) == 0x1.fffffep127
+; run: %fcopysign_f32(0x1.000000p-126, -0x0.0) == -0x1.000000p-126
+; run: %fcopysign_f32(-0x1.000000p-126, 0x0.0) == 0x1.000000p-126
+
+; F32 Subnormals
+; run: %fcopysign_f32(0x0.800000p-126, -0x0.0) == -0x0.800000p-126
+; run: %fcopysign_f32(-0x0.800000p-126, 0x0.0) == 0x0.800000p-126
+; run: %fcopysign_f32(0x0.000002p-126, -0x0.0) == -0x0.000002p-126
+; run: %fcopysign_f32(-0x0.000002p-126, 0x0.0) == 0x0.000002p-126
+
+; F32 NaN's
+; Unlike with other operations fcopysign is guaranteed to only affect the sign bit
+; run: %fcopysign_f32(0x0.0, -NaN) == -0x0.0
+; run: %fcopysign_f32(0x3.0, +sNaN:0x1) == 0x3.0
+; run: %fcopysign_f32(Inf, -NaN) == -Inf
+; run: %fcopysign_f32(+NaN, -NaN) == -NaN
+; run: %fcopysign_f32(-NaN, +NaN) == +NaN
+; run: %fcopysign_f32(+NaN:0x0, -NaN) == -NaN:0x0
+; run: %fcopysign_f32(+NaN:0x1, -NaN) == -NaN:0x1
+; run: %fcopysign_f32(+NaN:0x300001, -NaN) == -NaN:0x300001
+; run: %fcopysign_f32(-NaN:0x0, +NaN) == +NaN:0x0
+; run: %fcopysign_f32(-NaN:0x1, +NaN) == +NaN:0x1
+; run: %fcopysign_f32(-NaN:0x300001, +NaN) == +NaN:0x300001
+; run: %fcopysign_f32(+sNaN:0x1, -NaN) == -sNaN:0x1
+; run: %fcopysign_f32(-sNaN:0x1, +NaN) == +sNaN:0x1
+; run: %fcopysign_f32(+sNaN:0x200001, -NaN) == -sNaN:0x200001
+; run: %fcopysign_f32(-sNaN:0x200001, +NaN) == +sNaN:0x200001
+
+
+
+function %fcopysign_f64(f64, f64) -> f64 {
+block0(v0: f64, v1: f64):
+    v2 = fcopysign v0, v1
+    return v2
+}
+; run: %fcopysign_f64(0x9.0, 0x9.0) == 0x9.0
+; run: %fcopysign_f64(-0x9.0, 0x9.0) == 0x9.0
+; run: %fcopysign_f64(0x9.0, -0x9.0) == -0x9.0
+; run: %fcopysign_f64(-0x9.0, -0x9.0) == -0x9.0
+; run: %fcopysign_f64(0x0.0, -0x0.0) == -0x0.0
+; run: %fcopysign_f64(-0x0.0, 0x0.0) == 0x0.0
+
+; F64 Inf
+; run: %fcopysign_f64(Inf, Inf) == Inf
+; run: %fcopysign_f64(-Inf, Inf) == Inf
+; run: %fcopysign_f64(Inf, -Inf) == -Inf
+; run: %fcopysign_f64(-Inf, -Inf) == -Inf
+
+; F64 Epsilon / Max / Min Positive
+; run: %fcopysign_f64(0x1.0000000000000p-52, -0x0.0) == -0x1.0000000000000p-52
+; run: %fcopysign_f64(-0x1.0000000000000p-52, 0x0.0) == 0x1.0000000000000p-52
+; run: %fcopysign_f64(0x1.fffffffffffffp1023, -0x0.0) == -0x1.fffffffffffffp1023
+; run: %fcopysign_f64(-0x1.fffffffffffffp1023, 0x0.0) == 0x1.fffffffffffffp1023
+; run: %fcopysign_f64(0x1.0000000000000p-1022, -0x0.0) == -0x1.0000000000000p-1022
+; run: %fcopysign_f64(-0x1.0000000000000p-1022, 0x0.0) == 0x1.0000000000000p-1022
+
+; F64 Subnormals
+; run: %fcopysign_f64(0x0.8000000000000p-1022, -0x0.0) == -0x0.8000000000000p-1022
+; run: %fcopysign_f64(-0x0.8000000000000p-1022, 0x0.0) == 0x0.8000000000000p-1022
+; run: %fcopysign_f64(0x0.0000000000001p-1022, -0x0.0) == -0x0.0000000000001p-1022
+; run: %fcopysign_f64(-0x0.0000000000001p-1022, 0x0.0) == 0x0.0000000000001p-1022
+
+; F64 NaN's
+; Unlike with other operations fcopysign is guaranteed to only affect the sign bit
+; run: %fcopysign_f64(0x0.0, -NaN) == -0x0.0
+; run: %fcopysign_f64(0x3.0, +sNaN:0x1) == 0x3.0
+; run: %fcopysign_f64(Inf, -NaN) == -Inf
+; run: %fcopysign_f64(+NaN, -NaN) == -NaN
+; run: %fcopysign_f64(-NaN, +NaN) == +NaN
+; run: %fcopysign_f64(+NaN:0x0, -NaN) == -NaN:0x0
+; run: %fcopysign_f64(+NaN:0x1, -NaN) == -NaN:0x1
+; run: %fcopysign_f64(+NaN:0x4000000000001, -NaN) == -NaN:0x4000000000001
+; run: %fcopysign_f64(-NaN:0x0, +NaN) == +NaN:0x0
+; run: %fcopysign_f64(-NaN:0x1, +NaN) == +NaN:0x1
+; run: %fcopysign_f64(-NaN:0x4000000000001, +NaN) == +NaN:0x4000000000001
+; run: %fcopysign_f64(+sNaN:0x1, -NaN) == -sNaN:0x1
+; run: %fcopysign_f64(-sNaN:0x1, +NaN) == +sNaN:0x1
+; run: %fcopysign_f64(+sNaN:0x4000000000001, -NaN) == -sNaN:0x4000000000001
+; run: %fcopysign_f64(-sNaN:0x4000000000001, +NaN) == +sNaN:0x4000000000001
diff --git a/cranelift/filetests/filetests/runtests/fma.clif b/cranelift/filetests/filetests/runtests/fma.clif
new file mode 100644
index 0000000000..e9429f4b51
--- /dev/null
+++ b/cranelift/filetests/filetests/runtests/fma.clif
@@ -0,0 +1,119 @@
+test interpret
+test run
+target aarch64
+target s390x
+
+function %fma_f32(f32, f32, f32) -> f32 {
+block0(v0: f32, v1: f32, v2: f32):
+    v3 = fma v0, v1, v2
+    return v3
+}
+; run: %fma_f32(0x9.0, 0x9.0, 0x9.0) == 0x1.680000p6
+; run: %fma_f32(0x83.0, 0x2.68091p6, 0x9.88721p1) == 0x1.3b88e6p14
+
+; run: %fma_f32(0x0.0, 0x0.0, 0x0.0) == 0x0.0
+; run: %fma_f32(0x0.0, 0x0.0, -0x0.0) == 0x0.0
+; run: %fma_f32(0x0.0, -0x0.0, 0x0.0) == 0x0.0
+; run: %fma_f32(-0x0.0, 0x0.0, 0x0.0) == 0x0.0
+
+; run: %fma_f32(-Inf, -Inf, 0x0.0) == +Inf
+; run: %fma_f32(Inf, -Inf, 0x0.0) == -Inf
+; run: %fma_f32(-Inf, Inf, 0x0.0) == -Inf
+; run: %fma_f32(Inf, -Inf, -Inf) == -Inf
+; run: %fma_f32(-Inf, Inf, -Inf) == -Inf
+
+; F32 Epsilon / Max / Min Positive
+; run: %fma_f32(0x1.000000p-23, 0x1.000000p-23, 0x1.000000p-23) == 0x1.000002p-23
+; run: %fma_f32(0x0.0, 0x0.0, 0x1.000000p-23) == 0x1.000000p-23
+; run: %fma_f32(0x1.fffffep127, 0x1.fffffep127, 0x1.fffffep127) == +Inf
+; run: %fma_f32(0x0.0, 0x0.0, 0x1.fffffep127) == 0x1.fffffep127
+; run: %fma_f32(0x1.000000p-126, 0x1.000000p-126, 0x1.000000p-126) == 0x1.000000p-126
+; run: %fma_f32(0x0.0, 0x0.0, 0x1.000000p-126) == 0x1.000000p-126
+
+; F32 Subnormals
+; run: %fma_f32(0x0.800000p-126, 0x0.800000p-126, 0x0.800000p-126) == 0x0.800000p-126
+; run: %fma_f32(0x0.800000p-126, 0x0.800000p-126, 0x0.0) == 0x0.0
+; run: %fma_f32(0x0.0, 0x0.0, 0x0.800000p-126) == 0x0.800000p-126
+; run: %fma_f32(0x0.000002p-126, 0x0.000002p-126, 0x0.000002p-126) == 0x0.000002p-126
+; run: %fma_f32(0x0.000002p-126, 0x0.000002p-126, 0x0.0) == 0x0.0
+; run: %fma_f32(0x0.0, 0x0.0, 0x0.000002p-126) == 0x0.000002p-126
+
+
+
+;; The IEEE754 Standard does not make a lot of guarantees about what
+;; comes out of NaN producing operations, we just check if its a NaN
+function %fma_is_nan_f32(f32, f32, f32) -> i32 {
+block0(v0: f32, v1: f32, v2: f32):
+    v3 = fma v0, v1, v2
+    v4 = fcmp ne v3, v3
+    v5 = bint.i32 v4
+    return v5
+}
+; run: %fma_is_nan_f32(Inf, -Inf, Inf) == 1
+; run: %fma_is_nan_f32(-Inf, Inf, Inf) == 1
+; run: %fma_is_nan_f32(-Inf, -Inf, -Inf) == 1
+
+; run: %fma_is_nan_f32(+NaN, 0x0.0, 0x0.0) == 1
+; run: %fma_is_nan_f32(0x0.0, +NaN, 0x0.0) == 1
+; run: %fma_is_nan_f32(0x0.0, 0x0.0, +NaN) == 1
+; run: %fma_is_nan_f32(-NaN, 0x0.0, 0x0.0) == 1
+; run: %fma_is_nan_f32(0x0.0, -NaN, 0x0.0) == 1
+; run: %fma_is_nan_f32(0x0.0, 0x0.0, -NaN) == 1
+
+
+
+function %fma_f64(f64, f64, f64) -> f64 {
+block0(v0: f64, v1: f64, v2: f64):
+    v3 = fma v0, v1, v2
+    return v3
+}
+; run: %fma_f64(0x9.0, 0x9.0, 0x9.0) == 0x1.680000p6
+; run: %fma_f64(0x1.3b88ea148dd4ap14, 0x2.680916809121p6, 0x9.887218721837p1) == 0x1.7ba6ebee17417p21
+
+; run: %fma_f64(0x0.0, 0x0.0, 0x0.0) == 0x0.0
+; run: %fma_f64(0x0.0, 0x0.0, -0x0.0) == 0x0.0
+; run: %fma_f64(0x0.0, -0x0.0, 0x0.0) == 0x0.0
+; run: %fma_f64(-0x0.0, 0x0.0, 0x0.0) == 0x0.0
+
+; run: %fma_f64(-Inf, -Inf, 0x0.0) == +Inf
+; run: %fma_f64(Inf, -Inf, 0x0.0) == -Inf
+; run: %fma_f64(-Inf, Inf, 0x0.0) == -Inf
+; run: %fma_f64(Inf, -Inf, -Inf) == -Inf
+; run: %fma_f64(-Inf, Inf, -Inf) == -Inf
+
+; F64 Epsilon / Max / Min Positive
+; run: %fma_f64(0x1.0000000000000p-52, 0x1.0000000000000p-52, 0x1.0000000000000p-52) == 0x1.0000000000001p-52
+; run: %fma_f64(0x0.0, 0x0.0, 0x1.0000000000000p-52) == 0x1.0000000000000p-52
+; run: %fma_f64(0x1.fffffffffffffp1023, 0x1.fffffffffffffp1023, 0x1.fffffffffffffp1023) == +Inf
+; run: %fma_f64(0x0.0, 0x0.0, 0x1.fffffffffffffp1023) == 0x1.fffffffffffffp1023
+; run: %fma_f64(0x1.0000000000000p-1022, 0x1.0000000000000p-1022, 0x1.0000000000000p-1022) == 0x1.0000000000000p-1022
+; run: %fma_f64(0x0.0, 0x0.0, 0x1.0000000000000p-1022) == 0x1.0000000000000p-1022
+
+; F64 Subnormals
+; run: %fma_f64(0x0.8000000000000p-1022, 0x0.8000000000000p-1022, 0x0.8000000000000p-1022) == 0x0.8000000000000p-1022
+; run: %fma_f64(0x0.8000000000000p-1022, 0x0.8000000000000p-1022, 0x0.0) == 0x0.0
+; run: %fma_f64(0x0.0, 0x0.0, 0x0.8000000000000p-1022) == 0x0.8000000000000p-1022
+; run: %fma_f64(0x0.0000000000001p-1022, 0x0.0000000000001p-1022, 0x0.0000000000001p-1022) == 0x0.0000000000001p-1022
+; run: %fma_f64(0x0.0000000000001p-1022, 0x0.0000000000001p-1022, 0x0.0) == 0x0.0
+; run: %fma_f64(0x0.0, 0x0.0, 0x0.0000000000001p-1022) == 0x0.0000000000001p-1022
+
+
+;; The IEEE754 Standard does not make a lot of guarantees about what
+;; comes out of NaN producing operations, we just check if its a NaN
+function %fma_is_nan_f64(f64, f64, f64) -> i32 {
+block0(v0: f64, v1: f64, v2: f64):
+    v3 = fma v0, v1, v2
+    v4 = fcmp ne v3, v3
+    v5 = bint.i32 v4
+    return v5
+}
+; run: %fma_is_nan_f64(Inf, -Inf, Inf) == 1
+; run: %fma_is_nan_f64(-Inf, Inf, Inf) == 1
+; run: %fma_is_nan_f64(-Inf, -Inf, -Inf) == 1
+
+; run: %fma_is_nan_f64(+NaN, 0x0.0, 0x0.0) == 1
+; run: %fma_is_nan_f64(0x0.0, +NaN, 0x0.0) == 1
+; run: %fma_is_nan_f64(0x0.0, 0x0.0, +NaN) == 1
+; run: %fma_is_nan_f64(-NaN, 0x0.0, 0x0.0) == 1
+; run: %fma_is_nan_f64(0x0.0, -NaN, 0x0.0) == 1
+; run: %fma_is_nan_f64(0x0.0, 0x0.0, -NaN) == 1
diff --git a/cranelift/filetests/filetests/runtests/fneg.clif b/cranelift/filetests/filetests/runtests/fneg.clif
new file mode 100644
index 0000000000..bfe63f924b
--- /dev/null
+++ b/cranelift/filetests/filetests/runtests/fneg.clif
@@ -0,0 +1,93 @@
+test interpret
+test run
+target aarch64
+target x86_64
+target s390x
+
+function %fneg_f32(f32) -> f32 {
+block0(v0: f32):
+    v1 = fneg v0
+    return v1
+}
+; run: %fneg_f32(0x9.0) == -0x9.0
+; run: %fneg_f32(-0x9.0) == 0x9.0
+; run: %fneg_f32(0x0.0) == -0x0.0
+; run: %fneg_f32(-0x0.0) == 0x0.0
+
+; F32 Inf
+; run: %fneg_f32(Inf) == -Inf
+; run: %fneg_f32(-Inf) == +Inf
+
+; F32 Epsilon  / Max / Min Positive
+; run: %fneg_f32(0x1.000000p-23) == -0x1.000000p-23
+; run: %fneg_f32(-0x1.000000p-23) == 0x1.000000p-23
+; run: %fneg_f32(0x1.fffffep127) == -0x1.fffffep127
+; run: %fneg_f32(-0x1.fffffep127) == 0x1.fffffep127
+; run: %fneg_f32(0x1.000000p-126) == -0x1.000000p-126
+; run: %fneg_f32(-0x1.000000p-126) == 0x1.000000p-126
+
+; F32 Subnormals
+; run: %fneg_f32(0x0.800000p-126) == -0x0.800000p-126
+; run: %fneg_f32(-0x0.800000p-126) == 0x0.800000p-126
+; run: %fneg_f32(0x0.000002p-126) == -0x0.000002p-126
+; run: %fneg_f32(-0x0.000002p-126) == 0x0.000002p-126
+
+; F32 NaN's
+; Unlike with other operations fneg is guaranteed to only affect the sign bit
+; run: %fneg_f32(+NaN) == -NaN
+; run: %fneg_f32(-NaN) == +NaN
+; run: %fneg_f32(+NaN:0x0) == -NaN:0x0
+; run: %fneg_f32(+NaN:0x1) == -NaN:0x1
+; run: %fneg_f32(+NaN:0x300001) == -NaN:0x300001
+; run: %fneg_f32(-NaN:0x0) == +NaN:0x0
+; run: %fneg_f32(-NaN:0x1) == +NaN:0x1
+; run: %fneg_f32(-NaN:0x300001) == +NaN:0x300001
+; run: %fneg_f32(+sNaN:0x1) == -sNaN:0x1
+; run: %fneg_f32(-sNaN:0x1) == +sNaN:0x1
+; run: %fneg_f32(+sNaN:0x200001) == -sNaN:0x200001
+; run: %fneg_f32(-sNaN:0x200001) == +sNaN:0x200001
+
+
+
+function %fneg_f64(f64) -> f64 {
+block0(v0: f64):
+    v1 = fneg v0
+    return v1
+}
+; run: %fneg_f64(0x9.0) == -0x9.0
+; run: %fneg_f64(-0x9.0) == 0x9.0
+; run: %fneg_f64(0x0.0) == -0x0.0
+; run: %fneg_f64(-0x0.0) == 0x0.0
+
+; F64 Inf
+; run: %fneg_f64(+Inf) == -Inf
+; run: %fneg_f64(-Inf) == +Inf
+
+; F64 Epsilon / Max / Min Positive
+; run: %fneg_f64(0x1.0000000000000p-52) == -0x1.0000000000000p-52
+; run: %fneg_f64(-0x1.0000000000000p-52) == 0x1.0000000000000p-52
+; run: %fneg_f64(0x1.fffffffffffffp1023) == -0x1.fffffffffffffp1023
+; run: %fneg_f64(-0x1.fffffffffffffp1023) == 0x1.fffffffffffffp1023
+; run: %fneg_f64(0x1.0000000000000p-1022) == -0x1.0000000000000p-1022
+; run: %fneg_f64(-0x1.0000000000000p-1022) == 0x1.0000000000000p-1022
+
+; F64 Subnormals
+; run: %fneg_f64(0x0.8000000000000p-1022) == -0x0.8000000000000p-1022
+; run: %fneg_f64(-0x0.8000000000000p-1022) == 0x0.8000000000000p-1022
+; run: %fneg_f64(0x0.0000000000001p-1022) == -0x0.0000000000001p-1022
+; run: %fneg_f64(-0x0.0000000000001p-1022) == 0x0.0000000000001p-1022
+
+; F64 NaN's
+; Unlike with other operations fneg is guaranteed to only affect the sign bit
+; run: %fneg_f64(+NaN) == -NaN
+; run: %fneg_f64(-NaN) == +NaN
+; run: %fneg_f64(+NaN:0x0) == -NaN:0x0
+; run: %fneg_f64(+NaN:0x1) == -NaN:0x1
+; run: %fneg_f64(+NaN:0x4000000000001) == -NaN:0x4000000000001
+; run: %fneg_f64(-NaN:0x0) == +NaN:0x0
+; run: %fneg_f64(-NaN:0x1) == +NaN:0x1
+; run: %fneg_f64(-NaN:0x4000000000001) == +NaN:0x4000000000001
+; run: %fneg_f64(+sNaN:0x1) == -sNaN:0x1
+; run: %fneg_f64(-sNaN:0x1) == +sNaN:0x1
+; run: %fneg_f64(+sNaN:0x4000000000001) == -sNaN:0x4000000000001
+; run: %fneg_f64(-sNaN:0x4000000000001) == +sNaN:0x4000000000001
diff --git a/cranelift/interpreter/src/step.rs b/cranelift/interpreter/src/step.rs
index 91bbcf46d7..5ef0f0d549 100644
--- a/cranelift/interpreter/src/step.rs
+++ b/cranelift/interpreter/src/step.rs
@@ -679,10 +679,10 @@ where
         Opcode::Fmul => binary(Value::mul, arg(0)?, arg(1)?)?,
         Opcode::Fdiv => binary(Value::div, arg(0)?, arg(1)?)?,
         Opcode::Sqrt => assign(Value::sqrt(arg(0)?)?),
-        Opcode::Fma => unimplemented!("Fma"),
-        Opcode::Fneg => binary(Value::sub, Value::float(0, ctrl_ty)?, arg(0)?)?,
-        Opcode::Fabs => unimplemented!("Fabs"),
-        Opcode::Fcopysign => unimplemented!("Fcopysign"),
+        Opcode::Fma => assign(Value::fma(arg(0)?, arg(1)?, arg(2)?)?),
+        Opcode::Fneg => assign(Value::neg(arg(0)?)?),
+        Opcode::Fabs => assign(Value::abs(arg(0)?)?),
+        Opcode::Fcopysign => binary(Value::copysign, arg(0)?, arg(1)?)?,
         Opcode::Fmin => choose(
             Value::is_nan(&arg(0)?)? || Value::lt(&arg(0)?, &arg(1)?)?,
             arg(0)?,
diff --git a/cranelift/interpreter/src/value.rs b/cranelift/interpreter/src/value.rs
index a38d2aca42..9fe7baf531 100644
--- a/cranelift/interpreter/src/value.rs
+++ b/cranelift/interpreter/src/value.rs
@@ -51,6 +51,12 @@ pub trait Value: Clone + From<DataValue> {
     fn div(self, other: Self) -> ValueResult<Self>;
     fn rem(self, other: Self) -> ValueResult<Self>;
     fn sqrt(self) -> ValueResult<Self>;
+    fn fma(self, a: Self, b: Self) -> ValueResult<Self>;
+    fn abs(self) -> ValueResult<Self>;
+
+    // Float operations
+    fn neg(self) -> ValueResult<Self>;
+    fn copysign(self, sign: Self) -> ValueResult<Self>;
 
     // Saturating arithmetic.
     fn add_sat(self, other: Self) -> ValueResult<Self>;
@@ -468,6 +474,30 @@ impl Value for DataValue {
         unary_match!(sqrt(&self); [F32, F64]; [Ieee32, Ieee64])
     }
 
+    fn fma(self, b: Self, c: Self) -> ValueResult<Self> {
+        match (self, b, c) {
+            (DataValue::F32(a), DataValue::F32(b), DataValue::F32(c)) => {
+                Ok(DataValue::F32(a.mul_add(b, c)))
+            }
+            (DataValue::F64(a), DataValue::F64(b), DataValue::F64(c)) => {
+                Ok(DataValue::F64(a.mul_add(b, c)))
+            }
+            (a, _b, _c) => Err(ValueError::InvalidType(ValueTypeClass::Float, a.ty())),
+        }
+    }
+
+    fn abs(self) -> ValueResult<Self> {
+        unary_match!(abs(&self); [F32, F64])
+    }
+
+    fn neg(self) -> ValueResult<Self> {
+        unary_match!(neg(&self); [F32, F64])
+    }
+
+    fn copysign(self, sign: Self) -> ValueResult<Self> {
+        binary_match!(copysign(&self, &sign); [F32, F64])
+    }
+
     fn add_sat(self, other: Self) -> ValueResult<Self> {
         binary_match!(saturating_add(self, &other); [I8, I16, I32, I64, I128, U8, U16, U32, U64, U128])
     }