cranelift: Fix `fmin`/`fmax` when dealing with zeroes (#4373)

`fmin`/`fmax` are defined as returning -0.0 as smaller than 0.0. This is not how the IEEE754 views these values and the interpreter was returning the wrong value in these operations since it was just using the standard IEEE754 comparisons. This also tries to preserve NaN information by avoiding passing NaN's through any operation that could canonicalize it.
2 years ago · 925891245d
5 changed files with 263 additions and 10 deletions
--- a/cranelift/codegen/src/ir/immediates.rs
+++ b/cranelift/codegen/src/ir/immediates.rs
@ -790,6 +790,16 @@ impl Ieee32 {
    pub fn copysign(self, sign: Self) -> Self {
        Self::with_float(self.as_f32().copysign(sign.as_f32()))
    }
    /// Returns true if self has a negative sign, including -0.0, NaNs with negative sign bit and negative infinity.
    pub fn is_negative(&self) -> bool {
        self.as_f32().is_sign_negative()
    }
    /// Returns true if self is positive or negative zero
    pub fn is_zero(&self) -> bool {
        self.as_f32() == 0.0
    }
 }
 impl PartialOrd for Ieee32 {
@ -909,6 +919,16 @@ impl Ieee64 {
    pub fn copysign(self, sign: Self) -> Self {
        Self::with_float(self.as_f64().copysign(sign.as_f64()))
    }
    /// Returns true if self has a negative sign, including -0.0, NaNs with negative sign bit and negative infinity.
    pub fn is_negative(&self) -> bool {
        self.as_f64().is_sign_negative()
    }
    /// Returns true if self is positive or negative zero
    pub fn is_zero(&self) -> bool {
        self.as_f64() == 0.0
    }
 }
 impl PartialOrd for Ieee64 {
--- a/cranelift/filetests/filetests/runtests/fmax.clif
+++ b/cranelift/filetests/filetests/runtests/fmax.clif
@ -0,0 +1,105 @@
 test interpret
 test run
 target x86_64
 target aarch64
 target s390x
 function %fmax_f32(f32, f32) -> f32 {
 block0(v0: f32, v1: f32):
    v2 = fmax v0, v1
    return v2
 }
 ; run: %fmax_f32(0x1.0, 0x2.0) == 0x2.0
 ; run: %fmax_f32(0x1.0p10, 0x1.0p11) == 0x1.0p11
 ; run: %fmax_f32(0x0.0, -0x0.0) == 0x0.0
 ; run: %fmax_f32(-0x0.0, 0x0.0) == 0x0.0
 ; run: %fmax_f32(+Inf, 0x0.0) == +Inf
 ; run: %fmax_f32(0x0.0, +Inf) == +Inf
 ; run: %fmax_f32(-Inf, 0x0.0) == 0x0.0
 ; run: %fmax_f32(0x0.0, -Inf) == 0x0.0
 ; run: %fmax_f32(+Inf, -Inf) == +Inf
 ; F32 Epsilon / Max / Min Positive
 ; run: %fmax_f32(0x1.000002p-23, 0x1.000000p-23) == 0x1.000002p-23
 ; run: %fmax_f32(0x1.fffffcp127, 0x1.fffffep127) == 0x1.fffffep127
 ; run: %fmax_f32(0x1.000000p-126, 0x1.000000p-126) == 0x1.000000p-126
 ; F32 Subnormals
 ; run: %fmax_f32(0x0.800002p-126, 0x0.800000p-126) ==  0x0.800002p-126
 ; run: %fmax_f32(-0x0.800002p-126, -0x0.800000p-126) == -0x0.800000p-126
 ; F32 NaN's
 ; For NaN's this operation is specified as producing a value that is a NaN
 ; This behaviour differs from IEEE754's behaviour
 function %fmax_is_nan_f32(f32, f32) -> i32 {
 block0(v0: f32, v1: f32):
    v2 = fmax v0, v1
    v3 = fcmp ne v2, v2
    v4 = bint.i32 v3
    return v4
 }
 ; run: %fmax_is_nan_f32(0x0.0, +NaN) == 1
 ; run: %fmax_is_nan_f32(-NaN, 0x0.0) == 1
 ; run: %fmax_is_nan_f32(0x0.0, +NaN:0x0) == 1
 ; run: %fmax_is_nan_f32(0x0.0, +NaN:0x1) == 1
 ; run: %fmax_is_nan_f32(0x0.0, +NaN:0x300001) == 1
 ; run: %fmax_is_nan_f32(-NaN:0x0, 0x0.0) == 1
 ; run: %fmax_is_nan_f32(-NaN:0x1, 0x0.0) == 1
 ; run: %fmax_is_nan_f32(-NaN:0x300001, 0x0.0) == 1
 ; run: %fmax_is_nan_f32(0x0.0, +sNaN:0x1) == 1
 ; run: %fmax_is_nan_f32(-sNaN:0x1, 0x0.0) == 1
 ; run: %fmax_is_nan_f32(0x0.0, +sNaN:0x200001) == 1
 ; run: %fmax_is_nan_f32(-sNaN:0x200001, 0x0.0) == 1
 function %fmax_f64(f64, f64) -> f64 {
 block0(v0: f64, v1: f64):
    v2 = fmax v0, v1
    return v2
 }
 ; run: %fmax_f64(0x1.0, 0x2.0) == 0x2.0
 ; run: %fmax_f64(0x1.0p10, 0x1.0p11) == 0x1.0p11
 ; run: %fmax_f64(0x0.0, -0x0.0) == 0x0.0
 ; run: %fmax_f64(-0x0.0, 0x0.0) == 0x0.0
 ; run: %fmax_f64(+Inf, 0x0.0) == +Inf
 ; run: %fmax_f64(0x0.0, +Inf) == +Inf
 ; run: %fmax_f64(-Inf, 0x0.0) == 0x0.0
 ; run: %fmax_f64(0x0.0, -Inf) == 0x0.0
 ; run: %fmax_f64(+Inf, -Inf) == +Inf
 ; F64 Epsilon / Max / Min Positive
 ; run: %fmax_f64(0x1.0000000000002p-52, 0x1.0000000000000p-52) == 0x1.0000000000002p-52
 ; run: %fmax_f64(0x1.ffffffffffffcp1023, 0x1.fffffffffffffp1023) == 0x1.fffffffffffffp1023
 ; run: %fmax_f64(0x1.0000000000000p-1022, 0x1.0000000000000p-1022) == 0x1.0000000000000p-1022
 ; F64 Subnormals
 ; run: %fmax_f64(0x0.8000000000002p-1022, 0x0.8000000000000p-1022) == 0x0.8000000000002p-1022
 ; run: %fmax_f64(-0x0.8000000000002p-1022, -0x0.8000000000000p-1022) == -0x0.8000000000000p-1022
 ; F64 NaN's
 ; For NaN's this operation is specified as producing a value that is a NaN
 ; This behaviour differs from IEEE754's behaviour
 function %fmax_is_nan_f64(f64, f64) -> i32 {
 block0(v0: f64, v1: f64):
    v2 = fmax v0, v1
    v3 = fcmp ne v2, v2
    v4 = bint.i32 v3
    return v4
 }
 ; run: %fmax_is_nan_f64(0x0.0, +NaN) == 1
 ; run: %fmax_is_nan_f64(-NaN, 0x0.0) == 1
 ; run: %fmax_is_nan_f64(0x0.0, +NaN:0x0) == 1
 ; run: %fmax_is_nan_f64(0x0.0, +NaN:0x1) == 1
 ; run: %fmax_is_nan_f64(0x0.0, +NaN:0x4000000000001) == 1
 ; run: %fmax_is_nan_f64(-NaN:0x0, 0x0.0) == 1
 ; run: %fmax_is_nan_f64(-NaN:0x1, 0x0.0) == 1
 ; run: %fmax_is_nan_f64(-NaN:0x4000000000001, 0x0.0) == 1
 ; run: %fmax_is_nan_f64(0x0.0, +sNaN:0x1) == 1
 ; run: %fmax_is_nan_f64(-sNaN:0x1, 0x0.0) == 1
 ; run: %fmax_is_nan_f64(0x0.0, +sNaN:0x4000000000001) == 1
 ; run: %fmax_is_nan_f64(-sNaN:0x4000000000001, 0x0.0) == 1
--- a/cranelift/filetests/filetests/runtests/fmin.clif
+++ b/cranelift/filetests/filetests/runtests/fmin.clif
@ -0,0 +1,105 @@
 test interpret
 test run
 target x86_64
 target aarch64
 target s390x
 function %fmin_f32(f32, f32) -> f32 {
 block0(v0: f32, v1: f32):
    v2 = fmin v0, v1
    return v2
 }
 ; run: %fmin_f32(0x1.0, 0x2.0) == 0x1.0
 ; run: %fmin_f32(0x1.0p10, 0x1.0p11) == 0x1.0p10
 ; run: %fmin_f32(0x0.0, -0x0.0) == -0x0.0
 ; run: %fmin_f32(-0x0.0, 0x0.0) == -0x0.0
 ; run: %fmin_f32(+Inf, 0x0.0) == 0x0.0
 ; run: %fmin_f32(0x0.0, +Inf) == 0x0.0
 ; run: %fmin_f32(-Inf, 0x0.0) == -Inf
 ; run: %fmin_f32(0x0.0, -Inf) == -Inf
 ; run: %fmin_f32(+Inf, -Inf) == -Inf
 ; F32 Epsilon / Max / Min Positive
 ; run: %fmin_f32(0x1.000002p-23, 0x1.000000p-23) == 0x1.000000p-23
 ; run: %fmin_f32(0x1.fffffcp127, 0x1.fffffep127) == 0x1.fffffcp127
 ; run: %fmin_f32(0x1.000000p-126, 0x1.000000p-126) == 0x1.000000p-126
 ; F32 Subnormals
 ; run: %fmin_f32(0x0.800002p-126, 0x0.800000p-126) ==  0x0.800000p-126
 ; run: %fmin_f32(-0x0.800002p-126, -0x0.800000p-126) == -0x0.800002p-126
 ; F32 NaN's
 ; For NaN's this operation is specified as producing a value that is a NaN
 ; This behaviour differs from IEEE754's behaviour
 function %fmin_is_nan_f32(f32, f32) -> i32 {
 block0(v0: f32, v1: f32):
    v2 = fmin v0, v1
    v3 = fcmp ne v2, v2
    v4 = bint.i32 v3
    return v4
 }
 ; run: %fmin_is_nan_f32(0x0.0, +NaN) == 1
 ; run: %fmin_is_nan_f32(-NaN, 0x0.0) == 1
 ; run: %fmin_is_nan_f32(0x0.0, +NaN:0x0) == 1
 ; run: %fmin_is_nan_f32(0x0.0, +NaN:0x1) == 1
 ; run: %fmin_is_nan_f32(0x0.0, +NaN:0x300001) == 1
 ; run: %fmin_is_nan_f32(-NaN:0x0, 0x0.0) == 1
 ; run: %fmin_is_nan_f32(-NaN:0x1, 0x0.0) == 1
 ; run: %fmin_is_nan_f32(-NaN:0x300001, 0x0.0) == 1
 ; run: %fmin_is_nan_f32(0x0.0, +sNaN:0x1) == 1
 ; run: %fmin_is_nan_f32(-sNaN:0x1, 0x0.0) == 1
 ; run: %fmin_is_nan_f32(0x0.0, +sNaN:0x200001) == 1
 ; run: %fmin_is_nan_f32(-sNaN:0x200001, 0x0.0) == 1
 function %fmin_f64(f64, f64) -> f64 {
 block0(v0: f64, v1: f64):
    v2 = fmin v0, v1
    return v2
 }
 ; run: %fmin_f64(0x1.0, 0x2.0) == 0x1.0
 ; run: %fmin_f64(0x1.0p10, 0x1.0p11) == 0x1.0p10
 ; run: %fmin_f64(0x0.0, -0x0.0) == -0x0.0
 ; run: %fmin_f64(-0x0.0, 0x0.0) == -0x0.0
 ; run: %fmin_f64(+Inf, 0x0.0) == 0x0.0
 ; run: %fmin_f64(0x0.0, +Inf) == 0x0.0
 ; run: %fmin_f64(-Inf, 0x0.0) == -Inf
 ; run: %fmin_f64(0x0.0, -Inf) == -Inf
 ; run: %fmin_f64(+Inf, -Inf) == -Inf
 ; F64 Epsilon / Max / Min Positive
 ; run: %fmin_f64(0x1.0000000000002p-52, 0x1.0000000000000p-52) == 0x1.0000000000000p-52
 ; run: %fmin_f64(0x1.ffffffffffffcp1023, 0x1.fffffffffffffp1023) == 0x1.ffffffffffffcp1023
 ; run: %fmin_f64(0x1.0000000000000p-1022, 0x1.0000000000000p-1022) == 0x1.0000000000000p-1022
 ; F64 Subnormals
 ; run: %fmin_f64(0x0.8000000000002p-1022, 0x0.8000000000000p-1022) == 0x0.8000000000000p-1022
 ; run: %fmin_f64(-0x0.8000000000002p-1022, -0x0.8000000000000p-1022) == -0x0.8000000000002p-1022
 ; F64 NaN's
 ; For NaN's this operation is specified as producing a value that is a NaN
 ; This behaviour differs from IEEE754's behaviour
 function %fmin_is_nan_f64(f64, f64) -> i32 {
 block0(v0: f64, v1: f64):
    v2 = fmin v0, v1
    v3 = fcmp ne v2, v2
    v4 = bint.i32 v3
    return v4
 }
 ; run: %fmin_is_nan_f64(0x0.0, +NaN) == 1
 ; run: %fmin_is_nan_f64(-NaN, 0x0.0) == 1
 ; run: %fmin_is_nan_f64(0x0.0, +NaN:0x0) == 1
 ; run: %fmin_is_nan_f64(0x0.0, +NaN:0x1) == 1
 ; run: %fmin_is_nan_f64(0x0.0, +NaN:0x4000000000001) == 1
 ; run: %fmin_is_nan_f64(-NaN:0x0, 0x0.0) == 1
 ; run: %fmin_is_nan_f64(-NaN:0x1, 0x0.0) == 1
 ; run: %fmin_is_nan_f64(-NaN:0x4000000000001, 0x0.0) == 1
 ; run: %fmin_is_nan_f64(0x0.0, +sNaN:0x1) == 1
 ; run: %fmin_is_nan_f64(-sNaN:0x1, 0x0.0) == 1
 ; run: %fmin_is_nan_f64(0x0.0, +sNaN:0x4000000000001) == 1
 ; run: %fmin_is_nan_f64(-sNaN:0x4000000000001, 0x0.0) == 1
--- a/cranelift/interpreter/src/step.rs
+++ b/cranelift/interpreter/src/step.rs
@ -698,17 +698,21 @@ where
        Opcode::Fneg => assign(Value::neg(arg(0)?)?),
        Opcode::Fabs => assign(Value::abs(arg(0)?)?),
        Opcode::Fcopysign => binary(Value::copysign, arg(0)?, arg(1)?)?,
-        Opcode::Fmin => choose(
+        Opcode::Fmin => assign(match (arg(0)?, arg(1)?) {
-            Value::is_nan(&arg(0)?)? || Value::lt(&arg(0)?, &arg(1)?)?,
+            (a, _) if a.is_nan()? => a,
-            arg(0)?,
+            (_, b) if b.is_nan()? => b,
-            arg(1)?,
+            (a, b) if a.is_zero()? && b.is_zero()? && a.is_negative()? => a,
-        ),
+            (a, b) if a.is_zero()? && b.is_zero()? && b.is_negative()? => b,
            (a, b) => a.min(b)?,
        }),
        Opcode::FminPseudo => unimplemented!("FminPseudo"),
-        Opcode::Fmax => choose(
+        Opcode::Fmax => assign(match (arg(0)?, arg(1)?) {
-            Value::is_nan(&arg(0)?)? || Value::gt(&arg(0)?, &arg(1)?)?,
+            (a, _) if a.is_nan()? => a,
-            arg(0)?,
+            (_, b) if b.is_nan()? => b,
-            arg(1)?,
+            (a, b) if a.is_zero()? && b.is_zero()? && a.is_negative()? => b,
-        ),
+            (a, b) if a.is_zero()? && b.is_zero()? && b.is_negative()? => a,
            (a, b) => a.max(b)?,
        }),
        Opcode::FmaxPseudo => unimplemented!("FmaxPseudo"),
        Opcode::Ceil => unimplemented!("Ceil"),
        Opcode::Floor => unimplemented!("Floor"),
--- a/cranelift/interpreter/src/value.rs
+++ b/cranelift/interpreter/src/value.rs
@ -26,6 +26,9 @@ pub trait Value: Clone + From<DataValue> {
    fn convert(self, kind: ValueConversionKind) -> ValueResult<Self>;
    fn concat(self, other: Self) -> ValueResult<Self>;
    fn is_negative(&self) -> ValueResult<bool>;
    fn is_zero(&self) -> ValueResult<bool>;
    fn max(self, other: Self) -> ValueResult<Self>;
    fn min(self, other: Self) -> ValueResult<Self>;
@ -394,6 +397,22 @@ impl Value for DataValue {
        }
    }
    fn is_negative(&self) -> ValueResult<bool> {
        match self {
            DataValue::F32(f) => Ok(f.is_negative()),
            DataValue::F64(f) => Ok(f.is_negative()),
            _ => Err(ValueError::InvalidType(ValueTypeClass::Float, self.ty())),
        }
    }
    fn is_zero(&self) -> ValueResult<bool> {
        match self {
            DataValue::F32(f) => Ok(f.is_zero()),
            DataValue::F64(f) => Ok(f.is_zero()),
            _ => Err(ValueError::InvalidType(ValueTypeClass::Float, self.ty())),
        }
    }
    fn max(self, other: Self) -> ValueResult<Self> {
        if Value::gt(&self, &other)? {
            Ok(self)