Fix `fmin`/`fmax` cprop miscompilation and add `f16`/`f128` `fmin`/`fmax` cprop support (#9030)

3 months ago · 0efe50ebce
6 changed files with 316 additions and 39 deletions
--- a/cranelift/codegen/src/ir/immediates.rs
+++ b/cranelift/codegen/src/ir/immediates.rs
@ -512,11 +512,13 @@ macro_rules! ieee_float {
            const SIGN_MASK: $bits_ty = 1 << (Self::EXPONENT_BITS + Self::SIGNIFICAND_BITS);
            const SIGNIFICAND_MASK: $bits_ty = $bits_ty::MAX >> (Self::EXPONENT_BITS + 1);
            const EXPONENT_MASK: $bits_ty = !Self::SIGN_MASK & !Self::SIGNIFICAND_MASK;
+            /// The positive WebAssembly canonical NaN.
+            pub const NAN: Self = Self::with_bits(Self::EXPONENT_MASK | (1 << (Self::SIGNIFICAND_BITS - 1)));

            /// Create a new
            #[doc = concat!("`", stringify!($name), "`")]
            /// containing the bits of `bits`.
-            pub fn with_bits(bits: $bits_ty) -> Self {
+            pub const fn with_bits(bits: $bits_ty) -> Self {
                Self { bits }
            }

@ -550,6 +552,42 @@ macro_rules! ieee_float {
                Self::with_bits((self.bits() & !Self::SIGN_MASK) | (sign.bits() & Self::SIGN_MASK))
            }

+            /// Returns the minimum of `self` and `other`, following the WebAssembly/IEEE 754-2019 definition.
+            pub fn minimum(self, other: Self) -> Self {
+                // FIXME: Replace with Rust float method once it is stabilised.
+                if self.is_nan() || other.is_nan() {
+                    Self::NAN
+                } else if self.is_zero() && other.is_zero() {
+                    if self.is_negative() {
+                        self
+                    } else {
+                        other
+                    }
+                } else if self <= other {
+                    self
+                } else {
+                    other
+                }
+            }
+
+            /// Returns the maximum of `self` and `other`, following the WebAssembly/IEEE 754-2019 definition.
+            pub fn maximum(self, other: Self) -> Self {
+                // FIXME: Replace with Rust float method once it is stabilised.
+                if self.is_nan() || other.is_nan() {
+                    Self::NAN
+                } else if self.is_zero() && other.is_zero() {
+                    if self.is_positive() {
+                        self
+                    } else {
+                        other
+                    }
+                } else if self >= other {
+                    self
+                } else {
+                    other
+                }
+            }
+
            /// Create an
            #[doc = concat!("`", stringify!($name), "`")]
            /// number representing `2.0^n`.
@ -583,6 +621,11 @@ macro_rules! ieee_float {
                self.abs().bits() > Self::EXPONENT_MASK
            }

+            /// Returns true if `self` has a negative sign, including 0.0, NaNs with positive sign bit and positive infinity.
+            pub fn is_positive(self) -> bool {
+                !self.is_negative()
+            }
+
            /// Returns true if `self` has a negative sign, including -0.0, NaNs with negative sign bit and negative infinity.
            pub fn is_negative(self) -> bool {
                self.bits() & Self::SIGN_MASK == Self::SIGN_MASK
@ -641,8 +684,8 @@ macro_rules! ieee_float {
                        // Zeros are always equal regardless of sign.
                        return Some(Ordering::Equal);
                    }
-                    let lhs_positive = !self.is_negative();
-                    let rhs_positive = !rhs.is_negative();
+                    let lhs_positive = self.is_positive();
+                    let rhs_positive = rhs.is_positive();
                    if lhs_positive != rhs_positive {
                        // Different signs: negative < positive
                        return lhs_positive.partial_cmp(&rhs_positive);
--- a/cranelift/codegen/src/isle_prelude.rs
+++ b/cranelift/codegen/src/isle_prelude.rs
@ -938,6 +938,14 @@ macro_rules! isle_common_prelude_methods {
            }
        }

+        fn f16_min(&mut self, a: Ieee16, b: Ieee16) -> Option<Ieee16> {
+            a.minimum(b).non_nan()
+        }
+
+        fn f16_max(&mut self, a: Ieee16, b: Ieee16) -> Option<Ieee16> {
+            a.maximum(b).non_nan()
+        }
+
        fn f16_neg(&mut self, n: Ieee16) -> Ieee16 {
            -n
        }
@ -987,23 +995,11 @@ macro_rules! isle_common_prelude_methods {
        }

        fn f32_min(&mut self, a: Ieee32, b: Ieee32) -> Option<Ieee32> {
-            if a.is_nan() || b.is_nan() {
-                None
-            } else if a <= b {
-                Some(a)
-            } else {
-                Some(b)
-            }
+            a.minimum(b).non_nan()
        }

        fn f32_max(&mut self, a: Ieee32, b: Ieee32) -> Option<Ieee32> {
-            if a.is_nan() || b.is_nan() {
-                None
-            } else if a >= b {
-                Some(a)
-            } else {
-                Some(b)
-            }
+            a.maximum(b).non_nan()
        }

        fn f32_neg(&mut self, n: Ieee32) -> Ieee32 {
@ -1055,23 +1051,11 @@ macro_rules! isle_common_prelude_methods {
        }

        fn f64_min(&mut self, a: Ieee64, b: Ieee64) -> Option<Ieee64> {
-            if a.is_nan() || b.is_nan() {
-                None
-            } else if a <= b {
-                Some(a)
-            } else {
-                Some(b)
-            }
+            a.minimum(b).non_nan()
        }

        fn f64_max(&mut self, a: Ieee64, b: Ieee64) -> Option<Ieee64> {
-            if a.is_nan() || b.is_nan() {
-                None
-            } else if a >= b {
-                Some(a)
-            } else {
-                Some(b)
-            }
+            a.maximum(b).non_nan()
        }

        fn f64_neg(&mut self, n: Ieee64) -> Ieee64 {
@ -1086,6 +1070,14 @@ macro_rules! isle_common_prelude_methods {
            a.copysign(b)
        }

+        fn f128_min(&mut self, a: Ieee128, b: Ieee128) -> Option<Ieee128> {
+            a.minimum(b).non_nan()
+        }
+
+        fn f128_max(&mut self, a: Ieee128, b: Ieee128) -> Option<Ieee128> {
+            a.maximum(b).non_nan()
+        }
+
        fn f128_neg(&mut self, n: Ieee128) -> Ieee128 {
            -n
        }
--- a/cranelift/codegen/src/nan_canonicalization.rs
+++ b/cranelift/codegen/src/nan_canonicalization.rs
@ -10,10 +10,6 @@ use crate::ir::{Function, Inst, InstBuilder, InstructionData, Opcode, Value};
 use crate::opts::MemFlags;
 use crate::timing;

-// Canonical 32-bit and 64-bit NaN values.
-static CANON_32BIT_NAN: u32 = 0b01111111110000000000000000000000;
-static CANON_64BIT_NAN: u64 = 0b0111111111111000000000000000000000000000000000000000000000000000;
-
 /// Perform the NaN canonicalization pass.
 pub fn do_nan_canonicalization(func: &mut Function, has_vector_support: bool) {
    let _tt = timing::canonicalize_nans();
@ -95,7 +91,7 @@ fn add_nan_canon_seq(pos: &mut FuncCursor, inst: Inst, has_vector_support: bool)

    match val_type {
        types::F32 => {
-            let canon_nan = pos.ins().f32const(Ieee32::with_bits(CANON_32BIT_NAN));
+            let canon_nan = pos.ins().f32const(Ieee32::NAN);
            if has_vector_support {
                vectorized_scalar_select(pos, canon_nan, types::F32X4);
            } else {
@ -103,7 +99,7 @@ fn add_nan_canon_seq(pos: &mut FuncCursor, inst: Inst, has_vector_support: bool)
            }
        }
        types::F64 => {
-            let canon_nan = pos.ins().f64const(Ieee64::with_bits(CANON_64BIT_NAN));
+            let canon_nan = pos.ins().f64const(Ieee64::NAN);
            if has_vector_support {
                vectorized_scalar_select(pos, canon_nan, types::F64X2);
            } else {
@ -111,12 +107,12 @@ fn add_nan_canon_seq(pos: &mut FuncCursor, inst: Inst, has_vector_support: bool)
            }
        }
        types::F32X4 => {
-            let canon_nan = pos.ins().f32const(Ieee32::with_bits(CANON_32BIT_NAN));
+            let canon_nan = pos.ins().f32const(Ieee32::NAN);
            let canon_nan = pos.ins().splat(types::F32X4, canon_nan);
            vector_select(pos, canon_nan);
        }
        types::F64X2 => {
-            let canon_nan = pos.ins().f64const(Ieee64::with_bits(CANON_64BIT_NAN));
+            let canon_nan = pos.ins().f64const(Ieee64::NAN);
            let canon_nan = pos.ins().splat(types::F64X2, canon_nan);
            vector_select(pos, canon_nan);
        }
--- a/cranelift/codegen/src/opts/cprop.isle
+++ b/cranelift/codegen/src/opts/cprop.isle
@ -350,19 +350,31 @@
      (if-let r (f64_nearest n))
      (subsume (f64const $F64 r)))

+(rule (simplify (fmin $F16 (f16const $F16 n) (f16const $F16 m)))
+      (if-let r (f16_min n m))
+      (subsume (f16const $F32 r)))
 (rule (simplify (fmin $F32 (f32const $F32 n) (f32const $F32 m)))
      (if-let r (f32_min n m))
      (subsume (f32const $F32 r)))
 (rule (simplify (fmin $F64 (f64const $F64 n) (f64const $F64 m)))
      (if-let r (f64_min n m))
      (subsume (f64const $F64 r)))
+(rule (simplify (fmin $F128 (f128const $F128 (ieee128_constant n)) (f128const $F128 (ieee128_constant m))))
+      (if-let r (f128_min n m))
+      (subsume (f128const $F128 (ieee128_constant r))))

+(rule (simplify (fmax $F16 (f16const $F16 n) (f16const $F16 m)))
+      (if-let r (f16_max n m))
+      (subsume (f16const $F16 r)))
 (rule (simplify (fmax $F32 (f32const $F32 n) (f32const $F32 m)))
      (if-let r (f32_max n m))
      (subsume (f32const $F32 r)))
 (rule (simplify (fmax $F64 (f64const $F64 n) (f64const $F64 m)))
      (if-let r (f64_max n m))
      (subsume (f64const $F64 r)))
+(rule (simplify (fmax $F128 (f128const $F128 (ieee128_constant n)) (f128const $F128 (ieee128_constant m))))
+      (if-let r (f128_max n m))
+      (subsume (f128const $F128 (ieee128_constant r))))

 (rule (simplify (fneg $F16 (f16const $F16 n)))
      (subsume (f16const $F16 (f16_neg n))))
--- a/cranelift/codegen/src/prelude.isle
+++ b/cranelift/codegen/src/prelude.isle
@ -239,6 +239,10 @@

 ;; Floating point operations

+(decl pure partial f16_min (Ieee16 Ieee16) Ieee16)
+(extern constructor f16_min f16_min)
+(decl pure partial f16_max (Ieee16 Ieee16) Ieee16)
+(extern constructor f16_max f16_max)
 (decl pure f16_neg (Ieee16) Ieee16)
 (extern constructor f16_neg f16_neg)
 (decl pure f16_abs (Ieee16) Ieee16)
@ -301,6 +305,10 @@
 (extern constructor f64_abs f64_abs)
 (decl pure f64_copysign (Ieee64 Ieee64) Ieee64)
 (extern constructor f64_copysign f64_copysign)
+(decl pure partial f128_min (Ieee128 Ieee128) Ieee128)
+(extern constructor f128_min f128_min)
+(decl pure partial f128_max (Ieee128 Ieee128) Ieee128)
+(extern constructor f128_max f128_max)
 (decl pure f128_neg (Ieee128) Ieee128)
 (extern constructor f128_neg f128_neg)
 (decl pure f128_abs (Ieee128) Ieee128)
--- a/cranelift/filetests/filetests/egraph/cprop.clif
+++ b/cranelift/filetests/filetests/egraph/cprop.clif
@ -313,6 +313,72 @@ block0:
 ; check: v2 = iconst.i64 0xf0de_bc9a_7856_3412
 ; nextln: return v2

+function %f16_fmin() -> f16 {
+block0:
+    v1 = f16const -0x1.5p6
+    v2 = f16const -0x1.5p7
+    v3 = fmin v2, v1
+    return v3
+}
+
+; check: v4 = f16const -0x1.500p7
+; check: return v4  ; v4 = -0x1.500p7
+
+function %f16_fmin_zero_1() -> f16 {
+block0:
+    v1 = f16const 0.0
+    v2 = f16const -0.0
+    v3 = fmin v1, v2
+    return v3
+}
+
+; check: v4 = f16const -0.0
+; check: return v4  ; v4 = -0.0
+
+function %f16_fmin_zero_2() -> f16 {
+block0:
+    v1 = f16const -0.0
+    v2 = f16const 0.0
+    v3 = fmin v1, v2
+    return v3
+}
+
+; check: v4 = f16const -0.0
+; check: return v4  ; v4 = -0.0
+
+function %f16_fmax() -> f16 {
+block0:
+    v1 = f16const -0x1.5p6
+    v2 = f16const -0x1.5p7
+    v3 = fmax v2, v1
+    return v3
+}
+
+; check: v4 = f16const -0x1.500p6
+; check: return v4  ; v4 = -0x1.500p6
+
+function %f16_fmax_zero_1() -> f16 {
+block0:
+    v1 = f16const 0.0
+    v2 = f16const -0.0
+    v3 = fmax v1, v2
+    return v3
+}
+
+; check: v4 = f16const 0.0
+; check: return v4  ; v4 = 0.0
+
+function %f16_fmax_zero_2() -> f16 {
+block0:
+    v1 = f16const -0.0
+    v2 = f16const 0.0
+    v3 = fmax v1, v2
+    return v3
+}
+
+; check: v4 = f16const 0.0
+; check: return v4  ; v4 = 0.0
+
 function %f16_fneg() -> f16 {
 block0:
    v1 = f16const 0.0
@ -449,6 +515,28 @@ block0:
 ; check: v4 = f32const 0x1.500000p6
 ; check: return v4  ; v4 = 0x1.500000p6

+function %f32_fmin_zero_1() -> f32 {
+block0:
+    v1 = f32const 0.0
+    v2 = f32const -0.0
+    v3 = fmin v1, v2
+    return v3
+}
+
+; check: v4 = f32const -0.0
+; check: return v4  ; v4 = -0.0
+
+function %f32_fmin_zero_2() -> f32 {
+block0:
+    v1 = f32const -0.0
+    v2 = f32const 0.0
+    v3 = fmin v1, v2
+    return v3
+}
+
+; check: v4 = f32const -0.0
+; check: return v4  ; v4 = -0.0
+
 function %f32_fmax() -> f32 {
 block0:
    v1 = f32const 0x1.5p6
@ -460,6 +548,28 @@ block0:
 ; check: v4 = f32const 0x1.500000p7
 ; check: return v4  ; v4 = 0x1.500000p7

+function %f32_fmax_zero_1() -> f32 {
+block0:
+    v1 = f32const 0.0
+    v2 = f32const -0.0
+    v3 = fmax v1, v2
+    return v3
+}
+
+; check: v4 = f32const 0.0
+; check: return v4  ; v4 = 0.0
+
+function %f32_fmax_zero_2() -> f32 {
+block0:
+    v1 = f32const -0.0
+    v2 = f32const 0.0
+    v3 = fmax v1, v2
+    return v3
+}
+
+; check: v4 = f32const 0.0
+; check: return v4  ; v4 = 0.0
+
 function %f32_fneg() -> f32 {
 block0:
    v1 = f32const 0.0
@ -596,6 +706,28 @@ block0:
 ; check: v4 = f64const -0x1.5000000000000p7
 ; check: return v4  ; v4 = -0x1.5000000000000p7

+function %f64_fmin_zero_1() -> f64 {
+block0:
+    v1 = f64const 0.0
+    v2 = f64const -0.0
+    v3 = fmin v1, v2
+    return v3
+}
+
+; check: v4 = f64const -0.0
+; check: return v4  ; v4 = -0.0
+
+function %f64_fmin_zero_2() -> f64 {
+block0:
+    v1 = f64const -0.0
+    v2 = f64const 0.0
+    v3 = fmin v1, v2
+    return v3
+}
+
+; check: v4 = f64const -0.0
+; check: return v4  ; v4 = -0.0
+
 function %f64_fmax() -> f64 {
 block0:
    v1 = f64const -0x1.5p6
@ -607,6 +739,28 @@ block0:
 ; check: v4 = f64const -0x1.5000000000000p6
 ; check: return v4  ; v4 = -0x1.5000000000000p6

+function %f64_fmax_zero_1() -> f64 {
+block0:
+    v1 = f64const 0.0
+    v2 = f64const -0.0
+    v3 = fmax v1, v2
+    return v3
+}
+
+; check: v4 = f64const 0.0
+; check: return v4  ; v4 = 0.0
+
+function %f64_fmax_zero_2() -> f64 {
+block0:
+    v1 = f64const -0.0
+    v2 = f64const 0.0
+    v3 = fmax v1, v2
+    return v3
+}
+
+; check: v4 = f64const 0.0
+; check: return v4  ; v4 = 0.0
+
 function %f64_fneg() -> f64 {
 block0:
    v1 = f64const 0.0
@ -638,6 +792,78 @@ block0:
 ; check: v4 = f64const -NaN
 ; check: return v4  ; v4 = -NaN

+function %f128_fmin() -> f128 {
+block0:
+    v1 = f128const 0x1.5p6
+    v2 = f128const 0x1.5p7
+    v3 = fmin v2, v1
+    return v3
+}
+
+; check: const0 = 0x40055000000000000000000000000000
+; check: v4 = f128const const0
+; check: return v4  ; v4 = 0x1.5000000000000000000000000000p6
+
+function %f128_fmin_zero_1() -> f128 {
+block0:
+    v1 = f128const 0.0
+    v2 = f128const -0.0
+    v3 = fmin v1, v2
+    return v3
+}
+
+; check: const1 = 0x80000000000000000000000000000000
+; check: v4 = f128const const1
+; check: return v4  ; v4 = -0.0
+
+function %f128_fmin_zero_2() -> f128 {
+block0:
+    v1 = f128const -0.0
+    v2 = f128const 0.0
+    v3 = fmin v1, v2
+    return v3
+}
+
+; check: const0 = 0x80000000000000000000000000000000
+; check: v4 = f128const const0
+; check: return v4  ; v4 = -0.0
+
+function %f128_fmax() -> f128 {
+block0:
+    v1 = f128const 0x1.5p6
+    v2 = f128const 0x1.5p7
+    v3 = fmax v2, v1
+    return v3
+}
+
+; check: const1 = 0x40065000000000000000000000000000
+; check: v4 = f128const const1
+; check: return v4  ; v4 = 0x1.5000000000000000000000000000p7
+
+function %f128_fmax_zero_1() -> f128 {
+block0:
+    v1 = f128const 0.0
+    v2 = f128const -0.0
+    v3 = fmax v1, v2
+    return v3
+}
+
+; check: const0 = 0x00000000000000000000000000000000
+; check: v4 = f128const const0
+; check: return v4  ; v4 = 0.0
+
+function %f128_fmax_zero_2() -> f128 {
+block0:
+    v1 = f128const -0.0
+    v2 = f128const 0.0
+    v3 = fmax v1, v2
+    return v3
+}
+
+; check: const1 = 0x00000000000000000000000000000000
+; check: v4 = f128const const1
+; check: return v4  ; v4 = 0.0
+
 function %f128_fneg() -> f128 {
 block0:
    v1 = f128const 0.0