Browse Source

Fix `fmin`/`fmax` cprop miscompilation and add `f16`/`f128` `fmin`/`fmax` cprop support (#9030)

pull/9038/head
beetrees 3 months ago
committed by GitHub
parent
commit
0efe50ebce
No known key found for this signature in database GPG Key ID: B5690EEEBB952194
  1. 49
      cranelift/codegen/src/ir/immediates.rs
  2. 48
      cranelift/codegen/src/isle_prelude.rs
  3. 12
      cranelift/codegen/src/nan_canonicalization.rs
  4. 12
      cranelift/codegen/src/opts/cprop.isle
  5. 8
      cranelift/codegen/src/prelude.isle
  6. 226
      cranelift/filetests/filetests/egraph/cprop.clif

49
cranelift/codegen/src/ir/immediates.rs

@ -512,11 +512,13 @@ macro_rules! ieee_float {
const SIGN_MASK: $bits_ty = 1 << (Self::EXPONENT_BITS + Self::SIGNIFICAND_BITS); const SIGN_MASK: $bits_ty = 1 << (Self::EXPONENT_BITS + Self::SIGNIFICAND_BITS);
const SIGNIFICAND_MASK: $bits_ty = $bits_ty::MAX >> (Self::EXPONENT_BITS + 1); const SIGNIFICAND_MASK: $bits_ty = $bits_ty::MAX >> (Self::EXPONENT_BITS + 1);
const EXPONENT_MASK: $bits_ty = !Self::SIGN_MASK & !Self::SIGNIFICAND_MASK; const EXPONENT_MASK: $bits_ty = !Self::SIGN_MASK & !Self::SIGNIFICAND_MASK;
/// The positive WebAssembly canonical NaN.
pub const NAN: Self = Self::with_bits(Self::EXPONENT_MASK | (1 << (Self::SIGNIFICAND_BITS - 1)));
/// Create a new /// Create a new
#[doc = concat!("`", stringify!($name), "`")] #[doc = concat!("`", stringify!($name), "`")]
/// containing the bits of `bits`. /// containing the bits of `bits`.
pub fn with_bits(bits: $bits_ty) -> Self { pub const fn with_bits(bits: $bits_ty) -> Self {
Self { bits } Self { bits }
} }
@ -550,6 +552,42 @@ macro_rules! ieee_float {
Self::with_bits((self.bits() & !Self::SIGN_MASK) | (sign.bits() & Self::SIGN_MASK)) Self::with_bits((self.bits() & !Self::SIGN_MASK) | (sign.bits() & Self::SIGN_MASK))
} }
/// Returns the minimum of `self` and `other`, following the WebAssembly/IEEE 754-2019 definition.
pub fn minimum(self, other: Self) -> Self {
// FIXME: Replace with Rust float method once it is stabilised.
if self.is_nan() || other.is_nan() {
Self::NAN
} else if self.is_zero() && other.is_zero() {
if self.is_negative() {
self
} else {
other
}
} else if self <= other {
self
} else {
other
}
}
/// Returns the maximum of `self` and `other`, following the WebAssembly/IEEE 754-2019 definition.
pub fn maximum(self, other: Self) -> Self {
// FIXME: Replace with Rust float method once it is stabilised.
if self.is_nan() || other.is_nan() {
Self::NAN
} else if self.is_zero() && other.is_zero() {
if self.is_positive() {
self
} else {
other
}
} else if self >= other {
self
} else {
other
}
}
/// Create an /// Create an
#[doc = concat!("`", stringify!($name), "`")] #[doc = concat!("`", stringify!($name), "`")]
/// number representing `2.0^n`. /// number representing `2.0^n`.
@ -583,6 +621,11 @@ macro_rules! ieee_float {
self.abs().bits() > Self::EXPONENT_MASK self.abs().bits() > Self::EXPONENT_MASK
} }
/// Returns true if `self` has a negative sign, including 0.0, NaNs with positive sign bit and positive infinity.
pub fn is_positive(self) -> bool {
!self.is_negative()
}
/// Returns true if `self` has a negative sign, including -0.0, NaNs with negative sign bit and negative infinity. /// Returns true if `self` has a negative sign, including -0.0, NaNs with negative sign bit and negative infinity.
pub fn is_negative(self) -> bool { pub fn is_negative(self) -> bool {
self.bits() & Self::SIGN_MASK == Self::SIGN_MASK self.bits() & Self::SIGN_MASK == Self::SIGN_MASK
@ -641,8 +684,8 @@ macro_rules! ieee_float {
// Zeros are always equal regardless of sign. // Zeros are always equal regardless of sign.
return Some(Ordering::Equal); return Some(Ordering::Equal);
} }
let lhs_positive = !self.is_negative(); let lhs_positive = self.is_positive();
let rhs_positive = !rhs.is_negative(); let rhs_positive = rhs.is_positive();
if lhs_positive != rhs_positive { if lhs_positive != rhs_positive {
// Different signs: negative < positive // Different signs: negative < positive
return lhs_positive.partial_cmp(&rhs_positive); return lhs_positive.partial_cmp(&rhs_positive);

48
cranelift/codegen/src/isle_prelude.rs

@ -938,6 +938,14 @@ macro_rules! isle_common_prelude_methods {
} }
} }
fn f16_min(&mut self, a: Ieee16, b: Ieee16) -> Option<Ieee16> {
a.minimum(b).non_nan()
}
fn f16_max(&mut self, a: Ieee16, b: Ieee16) -> Option<Ieee16> {
a.maximum(b).non_nan()
}
fn f16_neg(&mut self, n: Ieee16) -> Ieee16 { fn f16_neg(&mut self, n: Ieee16) -> Ieee16 {
-n -n
} }
@ -987,23 +995,11 @@ macro_rules! isle_common_prelude_methods {
} }
fn f32_min(&mut self, a: Ieee32, b: Ieee32) -> Option<Ieee32> { fn f32_min(&mut self, a: Ieee32, b: Ieee32) -> Option<Ieee32> {
if a.is_nan() || b.is_nan() { a.minimum(b).non_nan()
None
} else if a <= b {
Some(a)
} else {
Some(b)
}
} }
fn f32_max(&mut self, a: Ieee32, b: Ieee32) -> Option<Ieee32> { fn f32_max(&mut self, a: Ieee32, b: Ieee32) -> Option<Ieee32> {
if a.is_nan() || b.is_nan() { a.maximum(b).non_nan()
None
} else if a >= b {
Some(a)
} else {
Some(b)
}
} }
fn f32_neg(&mut self, n: Ieee32) -> Ieee32 { fn f32_neg(&mut self, n: Ieee32) -> Ieee32 {
@ -1055,23 +1051,11 @@ macro_rules! isle_common_prelude_methods {
} }
fn f64_min(&mut self, a: Ieee64, b: Ieee64) -> Option<Ieee64> { fn f64_min(&mut self, a: Ieee64, b: Ieee64) -> Option<Ieee64> {
if a.is_nan() || b.is_nan() { a.minimum(b).non_nan()
None
} else if a <= b {
Some(a)
} else {
Some(b)
}
} }
fn f64_max(&mut self, a: Ieee64, b: Ieee64) -> Option<Ieee64> { fn f64_max(&mut self, a: Ieee64, b: Ieee64) -> Option<Ieee64> {
if a.is_nan() || b.is_nan() { a.maximum(b).non_nan()
None
} else if a >= b {
Some(a)
} else {
Some(b)
}
} }
fn f64_neg(&mut self, n: Ieee64) -> Ieee64 { fn f64_neg(&mut self, n: Ieee64) -> Ieee64 {
@ -1086,6 +1070,14 @@ macro_rules! isle_common_prelude_methods {
a.copysign(b) a.copysign(b)
} }
fn f128_min(&mut self, a: Ieee128, b: Ieee128) -> Option<Ieee128> {
a.minimum(b).non_nan()
}
fn f128_max(&mut self, a: Ieee128, b: Ieee128) -> Option<Ieee128> {
a.maximum(b).non_nan()
}
fn f128_neg(&mut self, n: Ieee128) -> Ieee128 { fn f128_neg(&mut self, n: Ieee128) -> Ieee128 {
-n -n
} }

12
cranelift/codegen/src/nan_canonicalization.rs

@ -10,10 +10,6 @@ use crate::ir::{Function, Inst, InstBuilder, InstructionData, Opcode, Value};
use crate::opts::MemFlags; use crate::opts::MemFlags;
use crate::timing; use crate::timing;
// Canonical 32-bit and 64-bit NaN values.
static CANON_32BIT_NAN: u32 = 0b01111111110000000000000000000000;
static CANON_64BIT_NAN: u64 = 0b0111111111111000000000000000000000000000000000000000000000000000;
/// Perform the NaN canonicalization pass. /// Perform the NaN canonicalization pass.
pub fn do_nan_canonicalization(func: &mut Function, has_vector_support: bool) { pub fn do_nan_canonicalization(func: &mut Function, has_vector_support: bool) {
let _tt = timing::canonicalize_nans(); let _tt = timing::canonicalize_nans();
@ -95,7 +91,7 @@ fn add_nan_canon_seq(pos: &mut FuncCursor, inst: Inst, has_vector_support: bool)
match val_type { match val_type {
types::F32 => { types::F32 => {
let canon_nan = pos.ins().f32const(Ieee32::with_bits(CANON_32BIT_NAN)); let canon_nan = pos.ins().f32const(Ieee32::NAN);
if has_vector_support { if has_vector_support {
vectorized_scalar_select(pos, canon_nan, types::F32X4); vectorized_scalar_select(pos, canon_nan, types::F32X4);
} else { } else {
@ -103,7 +99,7 @@ fn add_nan_canon_seq(pos: &mut FuncCursor, inst: Inst, has_vector_support: bool)
} }
} }
types::F64 => { types::F64 => {
let canon_nan = pos.ins().f64const(Ieee64::with_bits(CANON_64BIT_NAN)); let canon_nan = pos.ins().f64const(Ieee64::NAN);
if has_vector_support { if has_vector_support {
vectorized_scalar_select(pos, canon_nan, types::F64X2); vectorized_scalar_select(pos, canon_nan, types::F64X2);
} else { } else {
@ -111,12 +107,12 @@ fn add_nan_canon_seq(pos: &mut FuncCursor, inst: Inst, has_vector_support: bool)
} }
} }
types::F32X4 => { types::F32X4 => {
let canon_nan = pos.ins().f32const(Ieee32::with_bits(CANON_32BIT_NAN)); let canon_nan = pos.ins().f32const(Ieee32::NAN);
let canon_nan = pos.ins().splat(types::F32X4, canon_nan); let canon_nan = pos.ins().splat(types::F32X4, canon_nan);
vector_select(pos, canon_nan); vector_select(pos, canon_nan);
} }
types::F64X2 => { types::F64X2 => {
let canon_nan = pos.ins().f64const(Ieee64::with_bits(CANON_64BIT_NAN)); let canon_nan = pos.ins().f64const(Ieee64::NAN);
let canon_nan = pos.ins().splat(types::F64X2, canon_nan); let canon_nan = pos.ins().splat(types::F64X2, canon_nan);
vector_select(pos, canon_nan); vector_select(pos, canon_nan);
} }

12
cranelift/codegen/src/opts/cprop.isle

@ -350,19 +350,31 @@
(if-let r (f64_nearest n)) (if-let r (f64_nearest n))
(subsume (f64const $F64 r))) (subsume (f64const $F64 r)))
(rule (simplify (fmin $F16 (f16const $F16 n) (f16const $F16 m)))
(if-let r (f16_min n m))
(subsume (f16const $F32 r)))
(rule (simplify (fmin $F32 (f32const $F32 n) (f32const $F32 m))) (rule (simplify (fmin $F32 (f32const $F32 n) (f32const $F32 m)))
(if-let r (f32_min n m)) (if-let r (f32_min n m))
(subsume (f32const $F32 r))) (subsume (f32const $F32 r)))
(rule (simplify (fmin $F64 (f64const $F64 n) (f64const $F64 m))) (rule (simplify (fmin $F64 (f64const $F64 n) (f64const $F64 m)))
(if-let r (f64_min n m)) (if-let r (f64_min n m))
(subsume (f64const $F64 r))) (subsume (f64const $F64 r)))
(rule (simplify (fmin $F128 (f128const $F128 (ieee128_constant n)) (f128const $F128 (ieee128_constant m))))
(if-let r (f128_min n m))
(subsume (f128const $F128 (ieee128_constant r))))
(rule (simplify (fmax $F16 (f16const $F16 n) (f16const $F16 m)))
(if-let r (f16_max n m))
(subsume (f16const $F16 r)))
(rule (simplify (fmax $F32 (f32const $F32 n) (f32const $F32 m))) (rule (simplify (fmax $F32 (f32const $F32 n) (f32const $F32 m)))
(if-let r (f32_max n m)) (if-let r (f32_max n m))
(subsume (f32const $F32 r))) (subsume (f32const $F32 r)))
(rule (simplify (fmax $F64 (f64const $F64 n) (f64const $F64 m))) (rule (simplify (fmax $F64 (f64const $F64 n) (f64const $F64 m)))
(if-let r (f64_max n m)) (if-let r (f64_max n m))
(subsume (f64const $F64 r))) (subsume (f64const $F64 r)))
(rule (simplify (fmax $F128 (f128const $F128 (ieee128_constant n)) (f128const $F128 (ieee128_constant m))))
(if-let r (f128_max n m))
(subsume (f128const $F128 (ieee128_constant r))))
(rule (simplify (fneg $F16 (f16const $F16 n))) (rule (simplify (fneg $F16 (f16const $F16 n)))
(subsume (f16const $F16 (f16_neg n)))) (subsume (f16const $F16 (f16_neg n))))

8
cranelift/codegen/src/prelude.isle

@ -239,6 +239,10 @@
;; Floating point operations ;; Floating point operations
(decl pure partial f16_min (Ieee16 Ieee16) Ieee16)
(extern constructor f16_min f16_min)
(decl pure partial f16_max (Ieee16 Ieee16) Ieee16)
(extern constructor f16_max f16_max)
(decl pure f16_neg (Ieee16) Ieee16) (decl pure f16_neg (Ieee16) Ieee16)
(extern constructor f16_neg f16_neg) (extern constructor f16_neg f16_neg)
(decl pure f16_abs (Ieee16) Ieee16) (decl pure f16_abs (Ieee16) Ieee16)
@ -301,6 +305,10 @@
(extern constructor f64_abs f64_abs) (extern constructor f64_abs f64_abs)
(decl pure f64_copysign (Ieee64 Ieee64) Ieee64) (decl pure f64_copysign (Ieee64 Ieee64) Ieee64)
(extern constructor f64_copysign f64_copysign) (extern constructor f64_copysign f64_copysign)
(decl pure partial f128_min (Ieee128 Ieee128) Ieee128)
(extern constructor f128_min f128_min)
(decl pure partial f128_max (Ieee128 Ieee128) Ieee128)
(extern constructor f128_max f128_max)
(decl pure f128_neg (Ieee128) Ieee128) (decl pure f128_neg (Ieee128) Ieee128)
(extern constructor f128_neg f128_neg) (extern constructor f128_neg f128_neg)
(decl pure f128_abs (Ieee128) Ieee128) (decl pure f128_abs (Ieee128) Ieee128)

226
cranelift/filetests/filetests/egraph/cprop.clif

@ -313,6 +313,72 @@ block0:
; check: v2 = iconst.i64 0xf0de_bc9a_7856_3412 ; check: v2 = iconst.i64 0xf0de_bc9a_7856_3412
; nextln: return v2 ; nextln: return v2
function %f16_fmin() -> f16 {
block0:
v1 = f16const -0x1.5p6
v2 = f16const -0x1.5p7
v3 = fmin v2, v1
return v3
}
; check: v4 = f16const -0x1.500p7
; check: return v4 ; v4 = -0x1.500p7
function %f16_fmin_zero_1() -> f16 {
block0:
v1 = f16const 0.0
v2 = f16const -0.0
v3 = fmin v1, v2
return v3
}
; check: v4 = f16const -0.0
; check: return v4 ; v4 = -0.0
function %f16_fmin_zero_2() -> f16 {
block0:
v1 = f16const -0.0
v2 = f16const 0.0
v3 = fmin v1, v2
return v3
}
; check: v4 = f16const -0.0
; check: return v4 ; v4 = -0.0
function %f16_fmax() -> f16 {
block0:
v1 = f16const -0x1.5p6
v2 = f16const -0x1.5p7
v3 = fmax v2, v1
return v3
}
; check: v4 = f16const -0x1.500p6
; check: return v4 ; v4 = -0x1.500p6
function %f16_fmax_zero_1() -> f16 {
block0:
v1 = f16const 0.0
v2 = f16const -0.0
v3 = fmax v1, v2
return v3
}
; check: v4 = f16const 0.0
; check: return v4 ; v4 = 0.0
function %f16_fmax_zero_2() -> f16 {
block0:
v1 = f16const -0.0
v2 = f16const 0.0
v3 = fmax v1, v2
return v3
}
; check: v4 = f16const 0.0
; check: return v4 ; v4 = 0.0
function %f16_fneg() -> f16 { function %f16_fneg() -> f16 {
block0: block0:
v1 = f16const 0.0 v1 = f16const 0.0
@ -449,6 +515,28 @@ block0:
; check: v4 = f32const 0x1.500000p6 ; check: v4 = f32const 0x1.500000p6
; check: return v4 ; v4 = 0x1.500000p6 ; check: return v4 ; v4 = 0x1.500000p6
function %f32_fmin_zero_1() -> f32 {
block0:
v1 = f32const 0.0
v2 = f32const -0.0
v3 = fmin v1, v2
return v3
}
; check: v4 = f32const -0.0
; check: return v4 ; v4 = -0.0
function %f32_fmin_zero_2() -> f32 {
block0:
v1 = f32const -0.0
v2 = f32const 0.0
v3 = fmin v1, v2
return v3
}
; check: v4 = f32const -0.0
; check: return v4 ; v4 = -0.0
function %f32_fmax() -> f32 { function %f32_fmax() -> f32 {
block0: block0:
v1 = f32const 0x1.5p6 v1 = f32const 0x1.5p6
@ -460,6 +548,28 @@ block0:
; check: v4 = f32const 0x1.500000p7 ; check: v4 = f32const 0x1.500000p7
; check: return v4 ; v4 = 0x1.500000p7 ; check: return v4 ; v4 = 0x1.500000p7
function %f32_fmax_zero_1() -> f32 {
block0:
v1 = f32const 0.0
v2 = f32const -0.0
v3 = fmax v1, v2
return v3
}
; check: v4 = f32const 0.0
; check: return v4 ; v4 = 0.0
function %f32_fmax_zero_2() -> f32 {
block0:
v1 = f32const -0.0
v2 = f32const 0.0
v3 = fmax v1, v2
return v3
}
; check: v4 = f32const 0.0
; check: return v4 ; v4 = 0.0
function %f32_fneg() -> f32 { function %f32_fneg() -> f32 {
block0: block0:
v1 = f32const 0.0 v1 = f32const 0.0
@ -596,6 +706,28 @@ block0:
; check: v4 = f64const -0x1.5000000000000p7 ; check: v4 = f64const -0x1.5000000000000p7
; check: return v4 ; v4 = -0x1.5000000000000p7 ; check: return v4 ; v4 = -0x1.5000000000000p7
function %f64_fmin_zero_1() -> f64 {
block0:
v1 = f64const 0.0
v2 = f64const -0.0
v3 = fmin v1, v2
return v3
}
; check: v4 = f64const -0.0
; check: return v4 ; v4 = -0.0
function %f64_fmin_zero_2() -> f64 {
block0:
v1 = f64const -0.0
v2 = f64const 0.0
v3 = fmin v1, v2
return v3
}
; check: v4 = f64const -0.0
; check: return v4 ; v4 = -0.0
function %f64_fmax() -> f64 { function %f64_fmax() -> f64 {
block0: block0:
v1 = f64const -0x1.5p6 v1 = f64const -0x1.5p6
@ -607,6 +739,28 @@ block0:
; check: v4 = f64const -0x1.5000000000000p6 ; check: v4 = f64const -0x1.5000000000000p6
; check: return v4 ; v4 = -0x1.5000000000000p6 ; check: return v4 ; v4 = -0x1.5000000000000p6
function %f64_fmax_zero_1() -> f64 {
block0:
v1 = f64const 0.0
v2 = f64const -0.0
v3 = fmax v1, v2
return v3
}
; check: v4 = f64const 0.0
; check: return v4 ; v4 = 0.0
function %f64_fmax_zero_2() -> f64 {
block0:
v1 = f64const -0.0
v2 = f64const 0.0
v3 = fmax v1, v2
return v3
}
; check: v4 = f64const 0.0
; check: return v4 ; v4 = 0.0
function %f64_fneg() -> f64 { function %f64_fneg() -> f64 {
block0: block0:
v1 = f64const 0.0 v1 = f64const 0.0
@ -638,6 +792,78 @@ block0:
; check: v4 = f64const -NaN ; check: v4 = f64const -NaN
; check: return v4 ; v4 = -NaN ; check: return v4 ; v4 = -NaN
function %f128_fmin() -> f128 {
block0:
v1 = f128const 0x1.5p6
v2 = f128const 0x1.5p7
v3 = fmin v2, v1
return v3
}
; check: const0 = 0x40055000000000000000000000000000
; check: v4 = f128const const0
; check: return v4 ; v4 = 0x1.5000000000000000000000000000p6
function %f128_fmin_zero_1() -> f128 {
block0:
v1 = f128const 0.0
v2 = f128const -0.0
v3 = fmin v1, v2
return v3
}
; check: const1 = 0x80000000000000000000000000000000
; check: v4 = f128const const1
; check: return v4 ; v4 = -0.0
function %f128_fmin_zero_2() -> f128 {
block0:
v1 = f128const -0.0
v2 = f128const 0.0
v3 = fmin v1, v2
return v3
}
; check: const0 = 0x80000000000000000000000000000000
; check: v4 = f128const const0
; check: return v4 ; v4 = -0.0
function %f128_fmax() -> f128 {
block0:
v1 = f128const 0x1.5p6
v2 = f128const 0x1.5p7
v3 = fmax v2, v1
return v3
}
; check: const1 = 0x40065000000000000000000000000000
; check: v4 = f128const const1
; check: return v4 ; v4 = 0x1.5000000000000000000000000000p7
function %f128_fmax_zero_1() -> f128 {
block0:
v1 = f128const 0.0
v2 = f128const -0.0
v3 = fmax v1, v2
return v3
}
; check: const0 = 0x00000000000000000000000000000000
; check: v4 = f128const const0
; check: return v4 ; v4 = 0.0
function %f128_fmax_zero_2() -> f128 {
block0:
v1 = f128const -0.0
v2 = f128const 0.0
v3 = fmax v1, v2
return v3
}
; check: const1 = 0x00000000000000000000000000000000
; check: v4 = f128const const1
; check: return v4 ; v4 = 0.0
function %f128_fneg() -> f128 { function %f128_fneg() -> f128 {
block0: block0:
v1 = f128const 0.0 v1 = f128const 0.0

Loading…
Cancel
Save