From 0efe50ebce2f4f00a2a9d408c89229812216365c Mon Sep 17 00:00:00 2001 From: beetrees Date: Mon, 29 Jul 2024 17:12:47 +0100 Subject: [PATCH] Fix `fmin`/`fmax` cprop miscompilation and add `f16`/`f128` `fmin`/`fmax` cprop support (#9030) --- cranelift/codegen/src/ir/immediates.rs | 49 +++- cranelift/codegen/src/isle_prelude.rs | 48 ++-- cranelift/codegen/src/nan_canonicalization.rs | 12 +- cranelift/codegen/src/opts/cprop.isle | 12 + cranelift/codegen/src/prelude.isle | 8 + .../filetests/filetests/egraph/cprop.clif | 226 ++++++++++++++++++ 6 files changed, 316 insertions(+), 39 deletions(-) diff --git a/cranelift/codegen/src/ir/immediates.rs b/cranelift/codegen/src/ir/immediates.rs index f96d678b85..d9efcac277 100644 --- a/cranelift/codegen/src/ir/immediates.rs +++ b/cranelift/codegen/src/ir/immediates.rs @@ -512,11 +512,13 @@ macro_rules! ieee_float { const SIGN_MASK: $bits_ty = 1 << (Self::EXPONENT_BITS + Self::SIGNIFICAND_BITS); const SIGNIFICAND_MASK: $bits_ty = $bits_ty::MAX >> (Self::EXPONENT_BITS + 1); const EXPONENT_MASK: $bits_ty = !Self::SIGN_MASK & !Self::SIGNIFICAND_MASK; + /// The positive WebAssembly canonical NaN. + pub const NAN: Self = Self::with_bits(Self::EXPONENT_MASK | (1 << (Self::SIGNIFICAND_BITS - 1))); /// Create a new #[doc = concat!("`", stringify!($name), "`")] /// containing the bits of `bits`. - pub fn with_bits(bits: $bits_ty) -> Self { + pub const fn with_bits(bits: $bits_ty) -> Self { Self { bits } } @@ -550,6 +552,42 @@ macro_rules! ieee_float { Self::with_bits((self.bits() & !Self::SIGN_MASK) | (sign.bits() & Self::SIGN_MASK)) } + /// Returns the minimum of `self` and `other`, following the WebAssembly/IEEE 754-2019 definition. + pub fn minimum(self, other: Self) -> Self { + // FIXME: Replace with Rust float method once it is stabilised. + if self.is_nan() || other.is_nan() { + Self::NAN + } else if self.is_zero() && other.is_zero() { + if self.is_negative() { + self + } else { + other + } + } else if self <= other { + self + } else { + other + } + } + + /// Returns the maximum of `self` and `other`, following the WebAssembly/IEEE 754-2019 definition. + pub fn maximum(self, other: Self) -> Self { + // FIXME: Replace with Rust float method once it is stabilised. + if self.is_nan() || other.is_nan() { + Self::NAN + } else if self.is_zero() && other.is_zero() { + if self.is_positive() { + self + } else { + other + } + } else if self >= other { + self + } else { + other + } + } + /// Create an #[doc = concat!("`", stringify!($name), "`")] /// number representing `2.0^n`. @@ -583,6 +621,11 @@ macro_rules! ieee_float { self.abs().bits() > Self::EXPONENT_MASK } + /// Returns true if `self` has a negative sign, including 0.0, NaNs with positive sign bit and positive infinity. + pub fn is_positive(self) -> bool { + !self.is_negative() + } + /// Returns true if `self` has a negative sign, including -0.0, NaNs with negative sign bit and negative infinity. pub fn is_negative(self) -> bool { self.bits() & Self::SIGN_MASK == Self::SIGN_MASK @@ -641,8 +684,8 @@ macro_rules! ieee_float { // Zeros are always equal regardless of sign. return Some(Ordering::Equal); } - let lhs_positive = !self.is_negative(); - let rhs_positive = !rhs.is_negative(); + let lhs_positive = self.is_positive(); + let rhs_positive = rhs.is_positive(); if lhs_positive != rhs_positive { // Different signs: negative < positive return lhs_positive.partial_cmp(&rhs_positive); diff --git a/cranelift/codegen/src/isle_prelude.rs b/cranelift/codegen/src/isle_prelude.rs index ce6879cb7a..64c343ea61 100644 --- a/cranelift/codegen/src/isle_prelude.rs +++ b/cranelift/codegen/src/isle_prelude.rs @@ -938,6 +938,14 @@ macro_rules! isle_common_prelude_methods { } } + fn f16_min(&mut self, a: Ieee16, b: Ieee16) -> Option { + a.minimum(b).non_nan() + } + + fn f16_max(&mut self, a: Ieee16, b: Ieee16) -> Option { + a.maximum(b).non_nan() + } + fn f16_neg(&mut self, n: Ieee16) -> Ieee16 { -n } @@ -987,23 +995,11 @@ macro_rules! isle_common_prelude_methods { } fn f32_min(&mut self, a: Ieee32, b: Ieee32) -> Option { - if a.is_nan() || b.is_nan() { - None - } else if a <= b { - Some(a) - } else { - Some(b) - } + a.minimum(b).non_nan() } fn f32_max(&mut self, a: Ieee32, b: Ieee32) -> Option { - if a.is_nan() || b.is_nan() { - None - } else if a >= b { - Some(a) - } else { - Some(b) - } + a.maximum(b).non_nan() } fn f32_neg(&mut self, n: Ieee32) -> Ieee32 { @@ -1055,23 +1051,11 @@ macro_rules! isle_common_prelude_methods { } fn f64_min(&mut self, a: Ieee64, b: Ieee64) -> Option { - if a.is_nan() || b.is_nan() { - None - } else if a <= b { - Some(a) - } else { - Some(b) - } + a.minimum(b).non_nan() } fn f64_max(&mut self, a: Ieee64, b: Ieee64) -> Option { - if a.is_nan() || b.is_nan() { - None - } else if a >= b { - Some(a) - } else { - Some(b) - } + a.maximum(b).non_nan() } fn f64_neg(&mut self, n: Ieee64) -> Ieee64 { @@ -1086,6 +1070,14 @@ macro_rules! isle_common_prelude_methods { a.copysign(b) } + fn f128_min(&mut self, a: Ieee128, b: Ieee128) -> Option { + a.minimum(b).non_nan() + } + + fn f128_max(&mut self, a: Ieee128, b: Ieee128) -> Option { + a.maximum(b).non_nan() + } + fn f128_neg(&mut self, n: Ieee128) -> Ieee128 { -n } diff --git a/cranelift/codegen/src/nan_canonicalization.rs b/cranelift/codegen/src/nan_canonicalization.rs index 5be41857ac..e6a73deaf8 100644 --- a/cranelift/codegen/src/nan_canonicalization.rs +++ b/cranelift/codegen/src/nan_canonicalization.rs @@ -10,10 +10,6 @@ use crate::ir::{Function, Inst, InstBuilder, InstructionData, Opcode, Value}; use crate::opts::MemFlags; use crate::timing; -// Canonical 32-bit and 64-bit NaN values. -static CANON_32BIT_NAN: u32 = 0b01111111110000000000000000000000; -static CANON_64BIT_NAN: u64 = 0b0111111111111000000000000000000000000000000000000000000000000000; - /// Perform the NaN canonicalization pass. pub fn do_nan_canonicalization(func: &mut Function, has_vector_support: bool) { let _tt = timing::canonicalize_nans(); @@ -95,7 +91,7 @@ fn add_nan_canon_seq(pos: &mut FuncCursor, inst: Inst, has_vector_support: bool) match val_type { types::F32 => { - let canon_nan = pos.ins().f32const(Ieee32::with_bits(CANON_32BIT_NAN)); + let canon_nan = pos.ins().f32const(Ieee32::NAN); if has_vector_support { vectorized_scalar_select(pos, canon_nan, types::F32X4); } else { @@ -103,7 +99,7 @@ fn add_nan_canon_seq(pos: &mut FuncCursor, inst: Inst, has_vector_support: bool) } } types::F64 => { - let canon_nan = pos.ins().f64const(Ieee64::with_bits(CANON_64BIT_NAN)); + let canon_nan = pos.ins().f64const(Ieee64::NAN); if has_vector_support { vectorized_scalar_select(pos, canon_nan, types::F64X2); } else { @@ -111,12 +107,12 @@ fn add_nan_canon_seq(pos: &mut FuncCursor, inst: Inst, has_vector_support: bool) } } types::F32X4 => { - let canon_nan = pos.ins().f32const(Ieee32::with_bits(CANON_32BIT_NAN)); + let canon_nan = pos.ins().f32const(Ieee32::NAN); let canon_nan = pos.ins().splat(types::F32X4, canon_nan); vector_select(pos, canon_nan); } types::F64X2 => { - let canon_nan = pos.ins().f64const(Ieee64::with_bits(CANON_64BIT_NAN)); + let canon_nan = pos.ins().f64const(Ieee64::NAN); let canon_nan = pos.ins().splat(types::F64X2, canon_nan); vector_select(pos, canon_nan); } diff --git a/cranelift/codegen/src/opts/cprop.isle b/cranelift/codegen/src/opts/cprop.isle index a9685be759..7eee7d219c 100644 --- a/cranelift/codegen/src/opts/cprop.isle +++ b/cranelift/codegen/src/opts/cprop.isle @@ -350,19 +350,31 @@ (if-let r (f64_nearest n)) (subsume (f64const $F64 r))) +(rule (simplify (fmin $F16 (f16const $F16 n) (f16const $F16 m))) + (if-let r (f16_min n m)) + (subsume (f16const $F32 r))) (rule (simplify (fmin $F32 (f32const $F32 n) (f32const $F32 m))) (if-let r (f32_min n m)) (subsume (f32const $F32 r))) (rule (simplify (fmin $F64 (f64const $F64 n) (f64const $F64 m))) (if-let r (f64_min n m)) (subsume (f64const $F64 r))) +(rule (simplify (fmin $F128 (f128const $F128 (ieee128_constant n)) (f128const $F128 (ieee128_constant m)))) + (if-let r (f128_min n m)) + (subsume (f128const $F128 (ieee128_constant r)))) +(rule (simplify (fmax $F16 (f16const $F16 n) (f16const $F16 m))) + (if-let r (f16_max n m)) + (subsume (f16const $F16 r))) (rule (simplify (fmax $F32 (f32const $F32 n) (f32const $F32 m))) (if-let r (f32_max n m)) (subsume (f32const $F32 r))) (rule (simplify (fmax $F64 (f64const $F64 n) (f64const $F64 m))) (if-let r (f64_max n m)) (subsume (f64const $F64 r))) +(rule (simplify (fmax $F128 (f128const $F128 (ieee128_constant n)) (f128const $F128 (ieee128_constant m)))) + (if-let r (f128_max n m)) + (subsume (f128const $F128 (ieee128_constant r)))) (rule (simplify (fneg $F16 (f16const $F16 n))) (subsume (f16const $F16 (f16_neg n)))) diff --git a/cranelift/codegen/src/prelude.isle b/cranelift/codegen/src/prelude.isle index 233ab899e3..2e7b3ee877 100644 --- a/cranelift/codegen/src/prelude.isle +++ b/cranelift/codegen/src/prelude.isle @@ -239,6 +239,10 @@ ;; Floating point operations +(decl pure partial f16_min (Ieee16 Ieee16) Ieee16) +(extern constructor f16_min f16_min) +(decl pure partial f16_max (Ieee16 Ieee16) Ieee16) +(extern constructor f16_max f16_max) (decl pure f16_neg (Ieee16) Ieee16) (extern constructor f16_neg f16_neg) (decl pure f16_abs (Ieee16) Ieee16) @@ -301,6 +305,10 @@ (extern constructor f64_abs f64_abs) (decl pure f64_copysign (Ieee64 Ieee64) Ieee64) (extern constructor f64_copysign f64_copysign) +(decl pure partial f128_min (Ieee128 Ieee128) Ieee128) +(extern constructor f128_min f128_min) +(decl pure partial f128_max (Ieee128 Ieee128) Ieee128) +(extern constructor f128_max f128_max) (decl pure f128_neg (Ieee128) Ieee128) (extern constructor f128_neg f128_neg) (decl pure f128_abs (Ieee128) Ieee128) diff --git a/cranelift/filetests/filetests/egraph/cprop.clif b/cranelift/filetests/filetests/egraph/cprop.clif index f1ca491f49..f6f0e01763 100644 --- a/cranelift/filetests/filetests/egraph/cprop.clif +++ b/cranelift/filetests/filetests/egraph/cprop.clif @@ -313,6 +313,72 @@ block0: ; check: v2 = iconst.i64 0xf0de_bc9a_7856_3412 ; nextln: return v2 +function %f16_fmin() -> f16 { +block0: + v1 = f16const -0x1.5p6 + v2 = f16const -0x1.5p7 + v3 = fmin v2, v1 + return v3 +} + +; check: v4 = f16const -0x1.500p7 +; check: return v4 ; v4 = -0x1.500p7 + +function %f16_fmin_zero_1() -> f16 { +block0: + v1 = f16const 0.0 + v2 = f16const -0.0 + v3 = fmin v1, v2 + return v3 +} + +; check: v4 = f16const -0.0 +; check: return v4 ; v4 = -0.0 + +function %f16_fmin_zero_2() -> f16 { +block0: + v1 = f16const -0.0 + v2 = f16const 0.0 + v3 = fmin v1, v2 + return v3 +} + +; check: v4 = f16const -0.0 +; check: return v4 ; v4 = -0.0 + +function %f16_fmax() -> f16 { +block0: + v1 = f16const -0x1.5p6 + v2 = f16const -0x1.5p7 + v3 = fmax v2, v1 + return v3 +} + +; check: v4 = f16const -0x1.500p6 +; check: return v4 ; v4 = -0x1.500p6 + +function %f16_fmax_zero_1() -> f16 { +block0: + v1 = f16const 0.0 + v2 = f16const -0.0 + v3 = fmax v1, v2 + return v3 +} + +; check: v4 = f16const 0.0 +; check: return v4 ; v4 = 0.0 + +function %f16_fmax_zero_2() -> f16 { +block0: + v1 = f16const -0.0 + v2 = f16const 0.0 + v3 = fmax v1, v2 + return v3 +} + +; check: v4 = f16const 0.0 +; check: return v4 ; v4 = 0.0 + function %f16_fneg() -> f16 { block0: v1 = f16const 0.0 @@ -449,6 +515,28 @@ block0: ; check: v4 = f32const 0x1.500000p6 ; check: return v4 ; v4 = 0x1.500000p6 +function %f32_fmin_zero_1() -> f32 { +block0: + v1 = f32const 0.0 + v2 = f32const -0.0 + v3 = fmin v1, v2 + return v3 +} + +; check: v4 = f32const -0.0 +; check: return v4 ; v4 = -0.0 + +function %f32_fmin_zero_2() -> f32 { +block0: + v1 = f32const -0.0 + v2 = f32const 0.0 + v3 = fmin v1, v2 + return v3 +} + +; check: v4 = f32const -0.0 +; check: return v4 ; v4 = -0.0 + function %f32_fmax() -> f32 { block0: v1 = f32const 0x1.5p6 @@ -460,6 +548,28 @@ block0: ; check: v4 = f32const 0x1.500000p7 ; check: return v4 ; v4 = 0x1.500000p7 +function %f32_fmax_zero_1() -> f32 { +block0: + v1 = f32const 0.0 + v2 = f32const -0.0 + v3 = fmax v1, v2 + return v3 +} + +; check: v4 = f32const 0.0 +; check: return v4 ; v4 = 0.0 + +function %f32_fmax_zero_2() -> f32 { +block0: + v1 = f32const -0.0 + v2 = f32const 0.0 + v3 = fmax v1, v2 + return v3 +} + +; check: v4 = f32const 0.0 +; check: return v4 ; v4 = 0.0 + function %f32_fneg() -> f32 { block0: v1 = f32const 0.0 @@ -596,6 +706,28 @@ block0: ; check: v4 = f64const -0x1.5000000000000p7 ; check: return v4 ; v4 = -0x1.5000000000000p7 +function %f64_fmin_zero_1() -> f64 { +block0: + v1 = f64const 0.0 + v2 = f64const -0.0 + v3 = fmin v1, v2 + return v3 +} + +; check: v4 = f64const -0.0 +; check: return v4 ; v4 = -0.0 + +function %f64_fmin_zero_2() -> f64 { +block0: + v1 = f64const -0.0 + v2 = f64const 0.0 + v3 = fmin v1, v2 + return v3 +} + +; check: v4 = f64const -0.0 +; check: return v4 ; v4 = -0.0 + function %f64_fmax() -> f64 { block0: v1 = f64const -0x1.5p6 @@ -607,6 +739,28 @@ block0: ; check: v4 = f64const -0x1.5000000000000p6 ; check: return v4 ; v4 = -0x1.5000000000000p6 +function %f64_fmax_zero_1() -> f64 { +block0: + v1 = f64const 0.0 + v2 = f64const -0.0 + v3 = fmax v1, v2 + return v3 +} + +; check: v4 = f64const 0.0 +; check: return v4 ; v4 = 0.0 + +function %f64_fmax_zero_2() -> f64 { +block0: + v1 = f64const -0.0 + v2 = f64const 0.0 + v3 = fmax v1, v2 + return v3 +} + +; check: v4 = f64const 0.0 +; check: return v4 ; v4 = 0.0 + function %f64_fneg() -> f64 { block0: v1 = f64const 0.0 @@ -638,6 +792,78 @@ block0: ; check: v4 = f64const -NaN ; check: return v4 ; v4 = -NaN +function %f128_fmin() -> f128 { +block0: + v1 = f128const 0x1.5p6 + v2 = f128const 0x1.5p7 + v3 = fmin v2, v1 + return v3 +} + +; check: const0 = 0x40055000000000000000000000000000 +; check: v4 = f128const const0 +; check: return v4 ; v4 = 0x1.5000000000000000000000000000p6 + +function %f128_fmin_zero_1() -> f128 { +block0: + v1 = f128const 0.0 + v2 = f128const -0.0 + v3 = fmin v1, v2 + return v3 +} + +; check: const1 = 0x80000000000000000000000000000000 +; check: v4 = f128const const1 +; check: return v4 ; v4 = -0.0 + +function %f128_fmin_zero_2() -> f128 { +block0: + v1 = f128const -0.0 + v2 = f128const 0.0 + v3 = fmin v1, v2 + return v3 +} + +; check: const0 = 0x80000000000000000000000000000000 +; check: v4 = f128const const0 +; check: return v4 ; v4 = -0.0 + +function %f128_fmax() -> f128 { +block0: + v1 = f128const 0x1.5p6 + v2 = f128const 0x1.5p7 + v3 = fmax v2, v1 + return v3 +} + +; check: const1 = 0x40065000000000000000000000000000 +; check: v4 = f128const const1 +; check: return v4 ; v4 = 0x1.5000000000000000000000000000p7 + +function %f128_fmax_zero_1() -> f128 { +block0: + v1 = f128const 0.0 + v2 = f128const -0.0 + v3 = fmax v1, v2 + return v3 +} + +; check: const0 = 0x00000000000000000000000000000000 +; check: v4 = f128const const0 +; check: return v4 ; v4 = 0.0 + +function %f128_fmax_zero_2() -> f128 { +block0: + v1 = f128const -0.0 + v2 = f128const 0.0 + v3 = fmax v1, v2 + return v3 +} + +; check: const1 = 0x00000000000000000000000000000000 +; check: v4 = f128const const1 +; check: return v4 ; v4 = 0.0 + function %f128_fneg() -> f128 { block0: v1 = f128const 0.0