From 925891245decf15ea27d545b8d15500e82f83300 Mon Sep 17 00:00:00 2001
From: Afonso Bordado <afonso360@users.noreply.github.com>
Date: Tue, 5 Jul 2022 20:59:23 +0100
Subject: [PATCH] cranelift: Fix `fmin`/`fmax` when dealing with zeroes (#4373)

`fmin`/`fmax` are defined as returning -0.0 as smaller than 0.0.
This is not how the IEEE754 views these values and the interpreter was
returning the wrong value in these operations since it was just using the
standard IEEE754 comparisons.

This also tries to preserve NaN information by avoiding passing NaN's
through any operation that could canonicalize it.
---
 cranelift/codegen/src/ir/immediates.rs        |  20 ++++
 .../filetests/filetests/runtests/fmax.clif    | 105 ++++++++++++++++++
 .../filetests/filetests/runtests/fmin.clif    | 105 ++++++++++++++++++
 cranelift/interpreter/src/step.rs             |  24 ++--
 cranelift/interpreter/src/value.rs            |  19 ++++
 5 files changed, 263 insertions(+), 10 deletions(-)
 create mode 100644 cranelift/filetests/filetests/runtests/fmax.clif
 create mode 100644 cranelift/filetests/filetests/runtests/fmin.clif

diff --git a/cranelift/codegen/src/ir/immediates.rs b/cranelift/codegen/src/ir/immediates.rs
index 59e32be9d5..2ed2bb884c 100644
--- a/cranelift/codegen/src/ir/immediates.rs
+++ b/cranelift/codegen/src/ir/immediates.rs
@@ -790,6 +790,16 @@ impl Ieee32 {
     pub fn copysign(self, sign: Self) -> Self {
         Self::with_float(self.as_f32().copysign(sign.as_f32()))
     }
+
+    /// Returns true if self has a negative sign, including -0.0, NaNs with negative sign bit and negative infinity.
+    pub fn is_negative(&self) -> bool {
+        self.as_f32().is_sign_negative()
+    }
+
+    /// Returns true if self is positive or negative zero
+    pub fn is_zero(&self) -> bool {
+        self.as_f32() == 0.0
+    }
 }
 
 impl PartialOrd for Ieee32 {
@@ -909,6 +919,16 @@ impl Ieee64 {
     pub fn copysign(self, sign: Self) -> Self {
         Self::with_float(self.as_f64().copysign(sign.as_f64()))
     }
+
+    /// Returns true if self has a negative sign, including -0.0, NaNs with negative sign bit and negative infinity.
+    pub fn is_negative(&self) -> bool {
+        self.as_f64().is_sign_negative()
+    }
+
+    /// Returns true if self is positive or negative zero
+    pub fn is_zero(&self) -> bool {
+        self.as_f64() == 0.0
+    }
 }
 
 impl PartialOrd for Ieee64 {
diff --git a/cranelift/filetests/filetests/runtests/fmax.clif b/cranelift/filetests/filetests/runtests/fmax.clif
new file mode 100644
index 0000000000..10d72e8f47
--- /dev/null
+++ b/cranelift/filetests/filetests/runtests/fmax.clif
@@ -0,0 +1,105 @@
+test interpret
+test run
+target x86_64
+target aarch64
+target s390x
+
+function %fmax_f32(f32, f32) -> f32 {
+block0(v0: f32, v1: f32):
+    v2 = fmax v0, v1
+    return v2
+}
+; run: %fmax_f32(0x1.0, 0x2.0) == 0x2.0
+; run: %fmax_f32(0x1.0p10, 0x1.0p11) == 0x1.0p11
+
+; run: %fmax_f32(0x0.0, -0x0.0) == 0x0.0
+; run: %fmax_f32(-0x0.0, 0x0.0) == 0x0.0
+; run: %fmax_f32(+Inf, 0x0.0) == +Inf
+; run: %fmax_f32(0x0.0, +Inf) == +Inf
+; run: %fmax_f32(-Inf, 0x0.0) == 0x0.0
+; run: %fmax_f32(0x0.0, -Inf) == 0x0.0
+; run: %fmax_f32(+Inf, -Inf) == +Inf
+
+; F32 Epsilon / Max / Min Positive
+; run: %fmax_f32(0x1.000002p-23, 0x1.000000p-23) == 0x1.000002p-23
+; run: %fmax_f32(0x1.fffffcp127, 0x1.fffffep127) == 0x1.fffffep127
+; run: %fmax_f32(0x1.000000p-126, 0x1.000000p-126) == 0x1.000000p-126
+
+; F32 Subnormals
+; run: %fmax_f32(0x0.800002p-126, 0x0.800000p-126) ==  0x0.800002p-126
+; run: %fmax_f32(-0x0.800002p-126, -0x0.800000p-126) == -0x0.800000p-126
+
+; F32 NaN's
+; For NaN's this operation is specified as producing a value that is a NaN
+; This behaviour differs from IEEE754's behaviour
+function %fmax_is_nan_f32(f32, f32) -> i32 {
+block0(v0: f32, v1: f32):
+    v2 = fmax v0, v1
+    v3 = fcmp ne v2, v2
+    v4 = bint.i32 v3
+    return v4
+}
+; run: %fmax_is_nan_f32(0x0.0, +NaN) == 1
+; run: %fmax_is_nan_f32(-NaN, 0x0.0) == 1
+; run: %fmax_is_nan_f32(0x0.0, +NaN:0x0) == 1
+; run: %fmax_is_nan_f32(0x0.0, +NaN:0x1) == 1
+; run: %fmax_is_nan_f32(0x0.0, +NaN:0x300001) == 1
+; run: %fmax_is_nan_f32(-NaN:0x0, 0x0.0) == 1
+; run: %fmax_is_nan_f32(-NaN:0x1, 0x0.0) == 1
+; run: %fmax_is_nan_f32(-NaN:0x300001, 0x0.0) == 1
+; run: %fmax_is_nan_f32(0x0.0, +sNaN:0x1) == 1
+; run: %fmax_is_nan_f32(-sNaN:0x1, 0x0.0) == 1
+; run: %fmax_is_nan_f32(0x0.0, +sNaN:0x200001) == 1
+; run: %fmax_is_nan_f32(-sNaN:0x200001, 0x0.0) == 1
+
+
+
+function %fmax_f64(f64, f64) -> f64 {
+block0(v0: f64, v1: f64):
+    v2 = fmax v0, v1
+    return v2
+}
+; run: %fmax_f64(0x1.0, 0x2.0) == 0x2.0
+; run: %fmax_f64(0x1.0p10, 0x1.0p11) == 0x1.0p11
+
+; run: %fmax_f64(0x0.0, -0x0.0) == 0x0.0
+; run: %fmax_f64(-0x0.0, 0x0.0) == 0x0.0
+; run: %fmax_f64(+Inf, 0x0.0) == +Inf
+; run: %fmax_f64(0x0.0, +Inf) == +Inf
+; run: %fmax_f64(-Inf, 0x0.0) == 0x0.0
+; run: %fmax_f64(0x0.0, -Inf) == 0x0.0
+; run: %fmax_f64(+Inf, -Inf) == +Inf
+
+; F64 Epsilon / Max / Min Positive
+; run: %fmax_f64(0x1.0000000000002p-52, 0x1.0000000000000p-52) == 0x1.0000000000002p-52
+; run: %fmax_f64(0x1.ffffffffffffcp1023, 0x1.fffffffffffffp1023) == 0x1.fffffffffffffp1023
+; run: %fmax_f64(0x1.0000000000000p-1022, 0x1.0000000000000p-1022) == 0x1.0000000000000p-1022
+
+; F64 Subnormals
+; run: %fmax_f64(0x0.8000000000002p-1022, 0x0.8000000000000p-1022) == 0x0.8000000000002p-1022
+; run: %fmax_f64(-0x0.8000000000002p-1022, -0x0.8000000000000p-1022) == -0x0.8000000000000p-1022
+
+
+
+; F64 NaN's
+; For NaN's this operation is specified as producing a value that is a NaN
+; This behaviour differs from IEEE754's behaviour
+function %fmax_is_nan_f64(f64, f64) -> i32 {
+block0(v0: f64, v1: f64):
+    v2 = fmax v0, v1
+    v3 = fcmp ne v2, v2
+    v4 = bint.i32 v3
+    return v4
+}
+; run: %fmax_is_nan_f64(0x0.0, +NaN) == 1
+; run: %fmax_is_nan_f64(-NaN, 0x0.0) == 1
+; run: %fmax_is_nan_f64(0x0.0, +NaN:0x0) == 1
+; run: %fmax_is_nan_f64(0x0.0, +NaN:0x1) == 1
+; run: %fmax_is_nan_f64(0x0.0, +NaN:0x4000000000001) == 1
+; run: %fmax_is_nan_f64(-NaN:0x0, 0x0.0) == 1
+; run: %fmax_is_nan_f64(-NaN:0x1, 0x0.0) == 1
+; run: %fmax_is_nan_f64(-NaN:0x4000000000001, 0x0.0) == 1
+; run: %fmax_is_nan_f64(0x0.0, +sNaN:0x1) == 1
+; run: %fmax_is_nan_f64(-sNaN:0x1, 0x0.0) == 1
+; run: %fmax_is_nan_f64(0x0.0, +sNaN:0x4000000000001) == 1
+; run: %fmax_is_nan_f64(-sNaN:0x4000000000001, 0x0.0) == 1
diff --git a/cranelift/filetests/filetests/runtests/fmin.clif b/cranelift/filetests/filetests/runtests/fmin.clif
new file mode 100644
index 0000000000..9f436f5458
--- /dev/null
+++ b/cranelift/filetests/filetests/runtests/fmin.clif
@@ -0,0 +1,105 @@
+test interpret
+test run
+target x86_64
+target aarch64
+target s390x
+
+function %fmin_f32(f32, f32) -> f32 {
+block0(v0: f32, v1: f32):
+    v2 = fmin v0, v1
+    return v2
+}
+; run: %fmin_f32(0x1.0, 0x2.0) == 0x1.0
+; run: %fmin_f32(0x1.0p10, 0x1.0p11) == 0x1.0p10
+
+; run: %fmin_f32(0x0.0, -0x0.0) == -0x0.0
+; run: %fmin_f32(-0x0.0, 0x0.0) == -0x0.0
+; run: %fmin_f32(+Inf, 0x0.0) == 0x0.0
+; run: %fmin_f32(0x0.0, +Inf) == 0x0.0
+; run: %fmin_f32(-Inf, 0x0.0) == -Inf
+; run: %fmin_f32(0x0.0, -Inf) == -Inf
+; run: %fmin_f32(+Inf, -Inf) == -Inf
+
+; F32 Epsilon / Max / Min Positive
+; run: %fmin_f32(0x1.000002p-23, 0x1.000000p-23) == 0x1.000000p-23
+; run: %fmin_f32(0x1.fffffcp127, 0x1.fffffep127) == 0x1.fffffcp127
+; run: %fmin_f32(0x1.000000p-126, 0x1.000000p-126) == 0x1.000000p-126
+
+; F32 Subnormals
+; run: %fmin_f32(0x0.800002p-126, 0x0.800000p-126) ==  0x0.800000p-126
+; run: %fmin_f32(-0x0.800002p-126, -0x0.800000p-126) == -0x0.800002p-126
+
+; F32 NaN's
+; For NaN's this operation is specified as producing a value that is a NaN
+; This behaviour differs from IEEE754's behaviour
+function %fmin_is_nan_f32(f32, f32) -> i32 {
+block0(v0: f32, v1: f32):
+    v2 = fmin v0, v1
+    v3 = fcmp ne v2, v2
+    v4 = bint.i32 v3
+    return v4
+}
+; run: %fmin_is_nan_f32(0x0.0, +NaN) == 1
+; run: %fmin_is_nan_f32(-NaN, 0x0.0) == 1
+; run: %fmin_is_nan_f32(0x0.0, +NaN:0x0) == 1
+; run: %fmin_is_nan_f32(0x0.0, +NaN:0x1) == 1
+; run: %fmin_is_nan_f32(0x0.0, +NaN:0x300001) == 1
+; run: %fmin_is_nan_f32(-NaN:0x0, 0x0.0) == 1
+; run: %fmin_is_nan_f32(-NaN:0x1, 0x0.0) == 1
+; run: %fmin_is_nan_f32(-NaN:0x300001, 0x0.0) == 1
+; run: %fmin_is_nan_f32(0x0.0, +sNaN:0x1) == 1
+; run: %fmin_is_nan_f32(-sNaN:0x1, 0x0.0) == 1
+; run: %fmin_is_nan_f32(0x0.0, +sNaN:0x200001) == 1
+; run: %fmin_is_nan_f32(-sNaN:0x200001, 0x0.0) == 1
+
+
+
+function %fmin_f64(f64, f64) -> f64 {
+block0(v0: f64, v1: f64):
+    v2 = fmin v0, v1
+    return v2
+}
+; run: %fmin_f64(0x1.0, 0x2.0) == 0x1.0
+; run: %fmin_f64(0x1.0p10, 0x1.0p11) == 0x1.0p10
+
+; run: %fmin_f64(0x0.0, -0x0.0) == -0x0.0
+; run: %fmin_f64(-0x0.0, 0x0.0) == -0x0.0
+; run: %fmin_f64(+Inf, 0x0.0) == 0x0.0
+; run: %fmin_f64(0x0.0, +Inf) == 0x0.0
+; run: %fmin_f64(-Inf, 0x0.0) == -Inf
+; run: %fmin_f64(0x0.0, -Inf) == -Inf
+; run: %fmin_f64(+Inf, -Inf) == -Inf
+
+; F64 Epsilon / Max / Min Positive
+; run: %fmin_f64(0x1.0000000000002p-52, 0x1.0000000000000p-52) == 0x1.0000000000000p-52
+; run: %fmin_f64(0x1.ffffffffffffcp1023, 0x1.fffffffffffffp1023) == 0x1.ffffffffffffcp1023
+; run: %fmin_f64(0x1.0000000000000p-1022, 0x1.0000000000000p-1022) == 0x1.0000000000000p-1022
+
+; F64 Subnormals
+; run: %fmin_f64(0x0.8000000000002p-1022, 0x0.8000000000000p-1022) == 0x0.8000000000000p-1022
+; run: %fmin_f64(-0x0.8000000000002p-1022, -0x0.8000000000000p-1022) == -0x0.8000000000002p-1022
+
+
+
+; F64 NaN's
+; For NaN's this operation is specified as producing a value that is a NaN
+; This behaviour differs from IEEE754's behaviour
+function %fmin_is_nan_f64(f64, f64) -> i32 {
+block0(v0: f64, v1: f64):
+    v2 = fmin v0, v1
+    v3 = fcmp ne v2, v2
+    v4 = bint.i32 v3
+    return v4
+}
+; run: %fmin_is_nan_f64(0x0.0, +NaN) == 1
+; run: %fmin_is_nan_f64(-NaN, 0x0.0) == 1
+; run: %fmin_is_nan_f64(0x0.0, +NaN:0x0) == 1
+; run: %fmin_is_nan_f64(0x0.0, +NaN:0x1) == 1
+; run: %fmin_is_nan_f64(0x0.0, +NaN:0x4000000000001) == 1
+; run: %fmin_is_nan_f64(-NaN:0x0, 0x0.0) == 1
+; run: %fmin_is_nan_f64(-NaN:0x1, 0x0.0) == 1
+; run: %fmin_is_nan_f64(-NaN:0x4000000000001, 0x0.0) == 1
+; run: %fmin_is_nan_f64(0x0.0, +sNaN:0x1) == 1
+; run: %fmin_is_nan_f64(-sNaN:0x1, 0x0.0) == 1
+; run: %fmin_is_nan_f64(0x0.0, +sNaN:0x4000000000001) == 1
+; run: %fmin_is_nan_f64(-sNaN:0x4000000000001, 0x0.0) == 1
diff --git a/cranelift/interpreter/src/step.rs b/cranelift/interpreter/src/step.rs
index b43b86a925..cf045673f8 100644
--- a/cranelift/interpreter/src/step.rs
+++ b/cranelift/interpreter/src/step.rs
@@ -698,17 +698,21 @@ where
         Opcode::Fneg => assign(Value::neg(arg(0)?)?),
         Opcode::Fabs => assign(Value::abs(arg(0)?)?),
         Opcode::Fcopysign => binary(Value::copysign, arg(0)?, arg(1)?)?,
-        Opcode::Fmin => choose(
-            Value::is_nan(&arg(0)?)? || Value::lt(&arg(0)?, &arg(1)?)?,
-            arg(0)?,
-            arg(1)?,
-        ),
+        Opcode::Fmin => assign(match (arg(0)?, arg(1)?) {
+            (a, _) if a.is_nan()? => a,
+            (_, b) if b.is_nan()? => b,
+            (a, b) if a.is_zero()? && b.is_zero()? && a.is_negative()? => a,
+            (a, b) if a.is_zero()? && b.is_zero()? && b.is_negative()? => b,
+            (a, b) => a.min(b)?,
+        }),
         Opcode::FminPseudo => unimplemented!("FminPseudo"),
-        Opcode::Fmax => choose(
-            Value::is_nan(&arg(0)?)? || Value::gt(&arg(0)?, &arg(1)?)?,
-            arg(0)?,
-            arg(1)?,
-        ),
+        Opcode::Fmax => assign(match (arg(0)?, arg(1)?) {
+            (a, _) if a.is_nan()? => a,
+            (_, b) if b.is_nan()? => b,
+            (a, b) if a.is_zero()? && b.is_zero()? && a.is_negative()? => b,
+            (a, b) if a.is_zero()? && b.is_zero()? && b.is_negative()? => a,
+            (a, b) => a.max(b)?,
+        }),
         Opcode::FmaxPseudo => unimplemented!("FmaxPseudo"),
         Opcode::Ceil => unimplemented!("Ceil"),
         Opcode::Floor => unimplemented!("Floor"),
diff --git a/cranelift/interpreter/src/value.rs b/cranelift/interpreter/src/value.rs
index 9fe7baf531..fde4659bda 100644
--- a/cranelift/interpreter/src/value.rs
+++ b/cranelift/interpreter/src/value.rs
@@ -26,6 +26,9 @@ pub trait Value: Clone + From<DataValue> {
     fn convert(self, kind: ValueConversionKind) -> ValueResult<Self>;
     fn concat(self, other: Self) -> ValueResult<Self>;
 
+    fn is_negative(&self) -> ValueResult<bool>;
+    fn is_zero(&self) -> ValueResult<bool>;
+
     fn max(self, other: Self) -> ValueResult<Self>;
     fn min(self, other: Self) -> ValueResult<Self>;
 
@@ -394,6 +397,22 @@ impl Value for DataValue {
         }
     }
 
+    fn is_negative(&self) -> ValueResult<bool> {
+        match self {
+            DataValue::F32(f) => Ok(f.is_negative()),
+            DataValue::F64(f) => Ok(f.is_negative()),
+            _ => Err(ValueError::InvalidType(ValueTypeClass::Float, self.ty())),
+        }
+    }
+
+    fn is_zero(&self) -> ValueResult<bool> {
+        match self {
+            DataValue::F32(f) => Ok(f.is_zero()),
+            DataValue::F64(f) => Ok(f.is_zero()),
+            _ => Err(ValueError::InvalidType(ValueTypeClass::Float, self.ty())),
+        }
+    }
+
     fn max(self, other: Self) -> ValueResult<Self> {
         if Value::gt(&self, &other)? {
             Ok(self)