From 0efe50ebce2f4f00a2a9d408c89229812216365c Mon Sep 17 00:00:00 2001
From: beetrees <b@beetr.ee>
Date: Mon, 29 Jul 2024 17:12:47 +0100
Subject: [PATCH] Fix `fmin`/`fmax` cprop miscompilation and add `f16`/`f128`
 `fmin`/`fmax` cprop support (#9030)

---
 cranelift/codegen/src/ir/immediates.rs        |  49 +++-
 cranelift/codegen/src/isle_prelude.rs         |  48 ++--
 cranelift/codegen/src/nan_canonicalization.rs |  12 +-
 cranelift/codegen/src/opts/cprop.isle         |  12 +
 cranelift/codegen/src/prelude.isle            |   8 +
 .../filetests/filetests/egraph/cprop.clif     | 226 ++++++++++++++++++
 6 files changed, 316 insertions(+), 39 deletions(-)

diff --git a/cranelift/codegen/src/ir/immediates.rs b/cranelift/codegen/src/ir/immediates.rs
index f96d678b85..d9efcac277 100644
--- a/cranelift/codegen/src/ir/immediates.rs
+++ b/cranelift/codegen/src/ir/immediates.rs
@@ -512,11 +512,13 @@ macro_rules! ieee_float {
             const SIGN_MASK: $bits_ty = 1 << (Self::EXPONENT_BITS + Self::SIGNIFICAND_BITS);
             const SIGNIFICAND_MASK: $bits_ty = $bits_ty::MAX >> (Self::EXPONENT_BITS + 1);
             const EXPONENT_MASK: $bits_ty = !Self::SIGN_MASK & !Self::SIGNIFICAND_MASK;
+            /// The positive WebAssembly canonical NaN.
+            pub const NAN: Self = Self::with_bits(Self::EXPONENT_MASK | (1 << (Self::SIGNIFICAND_BITS - 1)));
 
             /// Create a new
             #[doc = concat!("`", stringify!($name), "`")]
             /// containing the bits of `bits`.
-            pub fn with_bits(bits: $bits_ty) -> Self {
+            pub const fn with_bits(bits: $bits_ty) -> Self {
                 Self { bits }
             }
 
@@ -550,6 +552,42 @@ macro_rules! ieee_float {
                 Self::with_bits((self.bits() & !Self::SIGN_MASK) | (sign.bits() & Self::SIGN_MASK))
             }
 
+            /// Returns the minimum of `self` and `other`, following the WebAssembly/IEEE 754-2019 definition.
+            pub fn minimum(self, other: Self) -> Self {
+                // FIXME: Replace with Rust float method once it is stabilised.
+                if self.is_nan() || other.is_nan() {
+                    Self::NAN
+                } else if self.is_zero() && other.is_zero() {
+                    if self.is_negative() {
+                        self
+                    } else {
+                        other
+                    }
+                } else if self <= other {
+                    self
+                } else {
+                    other
+                }
+            }
+
+            /// Returns the maximum of `self` and `other`, following the WebAssembly/IEEE 754-2019 definition.
+            pub fn maximum(self, other: Self) -> Self {
+                // FIXME: Replace with Rust float method once it is stabilised.
+                if self.is_nan() || other.is_nan() {
+                    Self::NAN
+                } else if self.is_zero() && other.is_zero() {
+                    if self.is_positive() {
+                        self
+                    } else {
+                        other
+                    }
+                } else if self >= other {
+                    self
+                } else {
+                    other
+                }
+            }
+
             /// Create an
             #[doc = concat!("`", stringify!($name), "`")]
             /// number representing `2.0^n`.
@@ -583,6 +621,11 @@ macro_rules! ieee_float {
                 self.abs().bits() > Self::EXPONENT_MASK
             }
 
+            /// Returns true if `self` has a negative sign, including 0.0, NaNs with positive sign bit and positive infinity.
+            pub fn is_positive(self) -> bool {
+                !self.is_negative()
+            }
+
             /// Returns true if `self` has a negative sign, including -0.0, NaNs with negative sign bit and negative infinity.
             pub fn is_negative(self) -> bool {
                 self.bits() & Self::SIGN_MASK == Self::SIGN_MASK
@@ -641,8 +684,8 @@ macro_rules! ieee_float {
                         // Zeros are always equal regardless of sign.
                         return Some(Ordering::Equal);
                     }
-                    let lhs_positive = !self.is_negative();
-                    let rhs_positive = !rhs.is_negative();
+                    let lhs_positive = self.is_positive();
+                    let rhs_positive = rhs.is_positive();
                     if lhs_positive != rhs_positive {
                         // Different signs: negative < positive
                         return lhs_positive.partial_cmp(&rhs_positive);
diff --git a/cranelift/codegen/src/isle_prelude.rs b/cranelift/codegen/src/isle_prelude.rs
index ce6879cb7a..64c343ea61 100644
--- a/cranelift/codegen/src/isle_prelude.rs
+++ b/cranelift/codegen/src/isle_prelude.rs
@@ -938,6 +938,14 @@ macro_rules! isle_common_prelude_methods {
             }
         }
 
+        fn f16_min(&mut self, a: Ieee16, b: Ieee16) -> Option<Ieee16> {
+            a.minimum(b).non_nan()
+        }
+
+        fn f16_max(&mut self, a: Ieee16, b: Ieee16) -> Option<Ieee16> {
+            a.maximum(b).non_nan()
+        }
+
         fn f16_neg(&mut self, n: Ieee16) -> Ieee16 {
             -n
         }
@@ -987,23 +995,11 @@ macro_rules! isle_common_prelude_methods {
         }
 
         fn f32_min(&mut self, a: Ieee32, b: Ieee32) -> Option<Ieee32> {
-            if a.is_nan() || b.is_nan() {
-                None
-            } else if a <= b {
-                Some(a)
-            } else {
-                Some(b)
-            }
+            a.minimum(b).non_nan()
         }
 
         fn f32_max(&mut self, a: Ieee32, b: Ieee32) -> Option<Ieee32> {
-            if a.is_nan() || b.is_nan() {
-                None
-            } else if a >= b {
-                Some(a)
-            } else {
-                Some(b)
-            }
+            a.maximum(b).non_nan()
         }
 
         fn f32_neg(&mut self, n: Ieee32) -> Ieee32 {
@@ -1055,23 +1051,11 @@ macro_rules! isle_common_prelude_methods {
         }
 
         fn f64_min(&mut self, a: Ieee64, b: Ieee64) -> Option<Ieee64> {
-            if a.is_nan() || b.is_nan() {
-                None
-            } else if a <= b {
-                Some(a)
-            } else {
-                Some(b)
-            }
+            a.minimum(b).non_nan()
         }
 
         fn f64_max(&mut self, a: Ieee64, b: Ieee64) -> Option<Ieee64> {
-            if a.is_nan() || b.is_nan() {
-                None
-            } else if a >= b {
-                Some(a)
-            } else {
-                Some(b)
-            }
+            a.maximum(b).non_nan()
         }
 
         fn f64_neg(&mut self, n: Ieee64) -> Ieee64 {
@@ -1086,6 +1070,14 @@ macro_rules! isle_common_prelude_methods {
             a.copysign(b)
         }
 
+        fn f128_min(&mut self, a: Ieee128, b: Ieee128) -> Option<Ieee128> {
+            a.minimum(b).non_nan()
+        }
+
+        fn f128_max(&mut self, a: Ieee128, b: Ieee128) -> Option<Ieee128> {
+            a.maximum(b).non_nan()
+        }
+
         fn f128_neg(&mut self, n: Ieee128) -> Ieee128 {
             -n
         }
diff --git a/cranelift/codegen/src/nan_canonicalization.rs b/cranelift/codegen/src/nan_canonicalization.rs
index 5be41857ac..e6a73deaf8 100644
--- a/cranelift/codegen/src/nan_canonicalization.rs
+++ b/cranelift/codegen/src/nan_canonicalization.rs
@@ -10,10 +10,6 @@ use crate::ir::{Function, Inst, InstBuilder, InstructionData, Opcode, Value};
 use crate::opts::MemFlags;
 use crate::timing;
 
-// Canonical 32-bit and 64-bit NaN values.
-static CANON_32BIT_NAN: u32 = 0b01111111110000000000000000000000;
-static CANON_64BIT_NAN: u64 = 0b0111111111111000000000000000000000000000000000000000000000000000;
-
 /// Perform the NaN canonicalization pass.
 pub fn do_nan_canonicalization(func: &mut Function, has_vector_support: bool) {
     let _tt = timing::canonicalize_nans();
@@ -95,7 +91,7 @@ fn add_nan_canon_seq(pos: &mut FuncCursor, inst: Inst, has_vector_support: bool)
 
     match val_type {
         types::F32 => {
-            let canon_nan = pos.ins().f32const(Ieee32::with_bits(CANON_32BIT_NAN));
+            let canon_nan = pos.ins().f32const(Ieee32::NAN);
             if has_vector_support {
                 vectorized_scalar_select(pos, canon_nan, types::F32X4);
             } else {
@@ -103,7 +99,7 @@ fn add_nan_canon_seq(pos: &mut FuncCursor, inst: Inst, has_vector_support: bool)
             }
         }
         types::F64 => {
-            let canon_nan = pos.ins().f64const(Ieee64::with_bits(CANON_64BIT_NAN));
+            let canon_nan = pos.ins().f64const(Ieee64::NAN);
             if has_vector_support {
                 vectorized_scalar_select(pos, canon_nan, types::F64X2);
             } else {
@@ -111,12 +107,12 @@ fn add_nan_canon_seq(pos: &mut FuncCursor, inst: Inst, has_vector_support: bool)
             }
         }
         types::F32X4 => {
-            let canon_nan = pos.ins().f32const(Ieee32::with_bits(CANON_32BIT_NAN));
+            let canon_nan = pos.ins().f32const(Ieee32::NAN);
             let canon_nan = pos.ins().splat(types::F32X4, canon_nan);
             vector_select(pos, canon_nan);
         }
         types::F64X2 => {
-            let canon_nan = pos.ins().f64const(Ieee64::with_bits(CANON_64BIT_NAN));
+            let canon_nan = pos.ins().f64const(Ieee64::NAN);
             let canon_nan = pos.ins().splat(types::F64X2, canon_nan);
             vector_select(pos, canon_nan);
         }
diff --git a/cranelift/codegen/src/opts/cprop.isle b/cranelift/codegen/src/opts/cprop.isle
index a9685be759..7eee7d219c 100644
--- a/cranelift/codegen/src/opts/cprop.isle
+++ b/cranelift/codegen/src/opts/cprop.isle
@@ -350,19 +350,31 @@
       (if-let r (f64_nearest n))
       (subsume (f64const $F64 r)))
 
+(rule (simplify (fmin $F16 (f16const $F16 n) (f16const $F16 m)))
+      (if-let r (f16_min n m))
+      (subsume (f16const $F32 r)))
 (rule (simplify (fmin $F32 (f32const $F32 n) (f32const $F32 m)))
       (if-let r (f32_min n m))
       (subsume (f32const $F32 r)))
 (rule (simplify (fmin $F64 (f64const $F64 n) (f64const $F64 m)))
       (if-let r (f64_min n m))
       (subsume (f64const $F64 r)))
+(rule (simplify (fmin $F128 (f128const $F128 (ieee128_constant n)) (f128const $F128 (ieee128_constant m))))
+      (if-let r (f128_min n m))
+      (subsume (f128const $F128 (ieee128_constant r))))
 
+(rule (simplify (fmax $F16 (f16const $F16 n) (f16const $F16 m)))
+      (if-let r (f16_max n m))
+      (subsume (f16const $F16 r)))
 (rule (simplify (fmax $F32 (f32const $F32 n) (f32const $F32 m)))
       (if-let r (f32_max n m))
       (subsume (f32const $F32 r)))
 (rule (simplify (fmax $F64 (f64const $F64 n) (f64const $F64 m)))
       (if-let r (f64_max n m))
       (subsume (f64const $F64 r)))
+(rule (simplify (fmax $F128 (f128const $F128 (ieee128_constant n)) (f128const $F128 (ieee128_constant m))))
+      (if-let r (f128_max n m))
+      (subsume (f128const $F128 (ieee128_constant r))))
 
 (rule (simplify (fneg $F16 (f16const $F16 n)))
       (subsume (f16const $F16 (f16_neg n))))
diff --git a/cranelift/codegen/src/prelude.isle b/cranelift/codegen/src/prelude.isle
index 233ab899e3..2e7b3ee877 100644
--- a/cranelift/codegen/src/prelude.isle
+++ b/cranelift/codegen/src/prelude.isle
@@ -239,6 +239,10 @@
 
 ;; Floating point operations
 
+(decl pure partial f16_min (Ieee16 Ieee16) Ieee16)
+(extern constructor f16_min f16_min)
+(decl pure partial f16_max (Ieee16 Ieee16) Ieee16)
+(extern constructor f16_max f16_max)
 (decl pure f16_neg (Ieee16) Ieee16)
 (extern constructor f16_neg f16_neg)
 (decl pure f16_abs (Ieee16) Ieee16)
@@ -301,6 +305,10 @@
 (extern constructor f64_abs f64_abs)
 (decl pure f64_copysign (Ieee64 Ieee64) Ieee64)
 (extern constructor f64_copysign f64_copysign)
+(decl pure partial f128_min (Ieee128 Ieee128) Ieee128)
+(extern constructor f128_min f128_min)
+(decl pure partial f128_max (Ieee128 Ieee128) Ieee128)
+(extern constructor f128_max f128_max)
 (decl pure f128_neg (Ieee128) Ieee128)
 (extern constructor f128_neg f128_neg)
 (decl pure f128_abs (Ieee128) Ieee128)
diff --git a/cranelift/filetests/filetests/egraph/cprop.clif b/cranelift/filetests/filetests/egraph/cprop.clif
index f1ca491f49..f6f0e01763 100644
--- a/cranelift/filetests/filetests/egraph/cprop.clif
+++ b/cranelift/filetests/filetests/egraph/cprop.clif
@@ -313,6 +313,72 @@ block0:
 ; check: v2 = iconst.i64 0xf0de_bc9a_7856_3412
 ; nextln: return v2
 
+function %f16_fmin() -> f16 {
+block0:
+    v1 = f16const -0x1.5p6
+    v2 = f16const -0x1.5p7
+    v3 = fmin v2, v1
+    return v3
+}
+
+; check: v4 = f16const -0x1.500p7
+; check: return v4  ; v4 = -0x1.500p7
+
+function %f16_fmin_zero_1() -> f16 {
+block0:
+    v1 = f16const 0.0
+    v2 = f16const -0.0
+    v3 = fmin v1, v2
+    return v3
+}
+
+; check: v4 = f16const -0.0
+; check: return v4  ; v4 = -0.0
+
+function %f16_fmin_zero_2() -> f16 {
+block0:
+    v1 = f16const -0.0
+    v2 = f16const 0.0
+    v3 = fmin v1, v2
+    return v3
+}
+
+; check: v4 = f16const -0.0
+; check: return v4  ; v4 = -0.0
+
+function %f16_fmax() -> f16 {
+block0:
+    v1 = f16const -0x1.5p6
+    v2 = f16const -0x1.5p7
+    v3 = fmax v2, v1
+    return v3
+}
+
+; check: v4 = f16const -0x1.500p6
+; check: return v4  ; v4 = -0x1.500p6
+
+function %f16_fmax_zero_1() -> f16 {
+block0:
+    v1 = f16const 0.0
+    v2 = f16const -0.0
+    v3 = fmax v1, v2
+    return v3
+}
+
+; check: v4 = f16const 0.0
+; check: return v4  ; v4 = 0.0
+
+function %f16_fmax_zero_2() -> f16 {
+block0:
+    v1 = f16const -0.0
+    v2 = f16const 0.0
+    v3 = fmax v1, v2
+    return v3
+}
+
+; check: v4 = f16const 0.0
+; check: return v4  ; v4 = 0.0
+
 function %f16_fneg() -> f16 {
 block0:
     v1 = f16const 0.0
@@ -449,6 +515,28 @@ block0:
 ; check: v4 = f32const 0x1.500000p6
 ; check: return v4  ; v4 = 0x1.500000p6
 
+function %f32_fmin_zero_1() -> f32 {
+block0:
+    v1 = f32const 0.0
+    v2 = f32const -0.0
+    v3 = fmin v1, v2
+    return v3
+}
+
+; check: v4 = f32const -0.0
+; check: return v4  ; v4 = -0.0
+
+function %f32_fmin_zero_2() -> f32 {
+block0:
+    v1 = f32const -0.0
+    v2 = f32const 0.0
+    v3 = fmin v1, v2
+    return v3
+}
+
+; check: v4 = f32const -0.0
+; check: return v4  ; v4 = -0.0
+
 function %f32_fmax() -> f32 {
 block0:
     v1 = f32const 0x1.5p6
@@ -460,6 +548,28 @@ block0:
 ; check: v4 = f32const 0x1.500000p7
 ; check: return v4  ; v4 = 0x1.500000p7
 
+function %f32_fmax_zero_1() -> f32 {
+block0:
+    v1 = f32const 0.0
+    v2 = f32const -0.0
+    v3 = fmax v1, v2
+    return v3
+}
+
+; check: v4 = f32const 0.0
+; check: return v4  ; v4 = 0.0
+
+function %f32_fmax_zero_2() -> f32 {
+block0:
+    v1 = f32const -0.0
+    v2 = f32const 0.0
+    v3 = fmax v1, v2
+    return v3
+}
+
+; check: v4 = f32const 0.0
+; check: return v4  ; v4 = 0.0
+
 function %f32_fneg() -> f32 {
 block0:
     v1 = f32const 0.0
@@ -596,6 +706,28 @@ block0:
 ; check: v4 = f64const -0x1.5000000000000p7
 ; check: return v4  ; v4 = -0x1.5000000000000p7
 
+function %f64_fmin_zero_1() -> f64 {
+block0:
+    v1 = f64const 0.0
+    v2 = f64const -0.0
+    v3 = fmin v1, v2
+    return v3
+}
+
+; check: v4 = f64const -0.0
+; check: return v4  ; v4 = -0.0
+
+function %f64_fmin_zero_2() -> f64 {
+block0:
+    v1 = f64const -0.0
+    v2 = f64const 0.0
+    v3 = fmin v1, v2
+    return v3
+}
+
+; check: v4 = f64const -0.0
+; check: return v4  ; v4 = -0.0
+
 function %f64_fmax() -> f64 {
 block0:
     v1 = f64const -0x1.5p6
@@ -607,6 +739,28 @@ block0:
 ; check: v4 = f64const -0x1.5000000000000p6
 ; check: return v4  ; v4 = -0x1.5000000000000p6
 
+function %f64_fmax_zero_1() -> f64 {
+block0:
+    v1 = f64const 0.0
+    v2 = f64const -0.0
+    v3 = fmax v1, v2
+    return v3
+}
+
+; check: v4 = f64const 0.0
+; check: return v4  ; v4 = 0.0
+
+function %f64_fmax_zero_2() -> f64 {
+block0:
+    v1 = f64const -0.0
+    v2 = f64const 0.0
+    v3 = fmax v1, v2
+    return v3
+}
+
+; check: v4 = f64const 0.0
+; check: return v4  ; v4 = 0.0
+
 function %f64_fneg() -> f64 {
 block0:
     v1 = f64const 0.0
@@ -638,6 +792,78 @@ block0:
 ; check: v4 = f64const -NaN
 ; check: return v4  ; v4 = -NaN
 
+function %f128_fmin() -> f128 {
+block0:
+    v1 = f128const 0x1.5p6
+    v2 = f128const 0x1.5p7
+    v3 = fmin v2, v1
+    return v3
+}
+
+; check: const0 = 0x40055000000000000000000000000000
+; check: v4 = f128const const0
+; check: return v4  ; v4 = 0x1.5000000000000000000000000000p6
+
+function %f128_fmin_zero_1() -> f128 {
+block0:
+    v1 = f128const 0.0
+    v2 = f128const -0.0
+    v3 = fmin v1, v2
+    return v3
+}
+
+; check: const1 = 0x80000000000000000000000000000000
+; check: v4 = f128const const1
+; check: return v4  ; v4 = -0.0
+
+function %f128_fmin_zero_2() -> f128 {
+block0:
+    v1 = f128const -0.0
+    v2 = f128const 0.0
+    v3 = fmin v1, v2
+    return v3
+}
+
+; check: const0 = 0x80000000000000000000000000000000
+; check: v4 = f128const const0
+; check: return v4  ; v4 = -0.0
+
+function %f128_fmax() -> f128 {
+block0:
+    v1 = f128const 0x1.5p6
+    v2 = f128const 0x1.5p7
+    v3 = fmax v2, v1
+    return v3
+}
+
+; check: const1 = 0x40065000000000000000000000000000
+; check: v4 = f128const const1
+; check: return v4  ; v4 = 0x1.5000000000000000000000000000p7
+
+function %f128_fmax_zero_1() -> f128 {
+block0:
+    v1 = f128const 0.0
+    v2 = f128const -0.0
+    v3 = fmax v1, v2
+    return v3
+}
+
+; check: const0 = 0x00000000000000000000000000000000
+; check: v4 = f128const const0
+; check: return v4  ; v4 = 0.0
+
+function %f128_fmax_zero_2() -> f128 {
+block0:
+    v1 = f128const -0.0
+    v2 = f128const 0.0
+    v3 = fmax v1, v2
+    return v3
+}
+
+; check: const1 = 0x00000000000000000000000000000000
+; check: v4 = f128const const1
+; check: return v4  ; v4 = 0.0
+
 function %f128_fneg() -> f128 {
 block0:
     v1 = f128const 0.0