diff --git a/cranelift/codegen/src/ir/immediates.rs b/cranelift/codegen/src/ir/immediates.rs index 59e32be9d5..2ed2bb884c 100644 --- a/cranelift/codegen/src/ir/immediates.rs +++ b/cranelift/codegen/src/ir/immediates.rs @@ -790,6 +790,16 @@ impl Ieee32 { pub fn copysign(self, sign: Self) -> Self { Self::with_float(self.as_f32().copysign(sign.as_f32())) } + + /// Returns true if self has a negative sign, including -0.0, NaNs with negative sign bit and negative infinity. + pub fn is_negative(&self) -> bool { + self.as_f32().is_sign_negative() + } + + /// Returns true if self is positive or negative zero + pub fn is_zero(&self) -> bool { + self.as_f32() == 0.0 + } } impl PartialOrd for Ieee32 { @@ -909,6 +919,16 @@ impl Ieee64 { pub fn copysign(self, sign: Self) -> Self { Self::with_float(self.as_f64().copysign(sign.as_f64())) } + + /// Returns true if self has a negative sign, including -0.0, NaNs with negative sign bit and negative infinity. + pub fn is_negative(&self) -> bool { + self.as_f64().is_sign_negative() + } + + /// Returns true if self is positive or negative zero + pub fn is_zero(&self) -> bool { + self.as_f64() == 0.0 + } } impl PartialOrd for Ieee64 { diff --git a/cranelift/filetests/filetests/runtests/fmax.clif b/cranelift/filetests/filetests/runtests/fmax.clif new file mode 100644 index 0000000000..10d72e8f47 --- /dev/null +++ b/cranelift/filetests/filetests/runtests/fmax.clif @@ -0,0 +1,105 @@ +test interpret +test run +target x86_64 +target aarch64 +target s390x + +function %fmax_f32(f32, f32) -> f32 { +block0(v0: f32, v1: f32): + v2 = fmax v0, v1 + return v2 +} +; run: %fmax_f32(0x1.0, 0x2.0) == 0x2.0 +; run: %fmax_f32(0x1.0p10, 0x1.0p11) == 0x1.0p11 + +; run: %fmax_f32(0x0.0, -0x0.0) == 0x0.0 +; run: %fmax_f32(-0x0.0, 0x0.0) == 0x0.0 +; run: %fmax_f32(+Inf, 0x0.0) == +Inf +; run: %fmax_f32(0x0.0, +Inf) == +Inf +; run: %fmax_f32(-Inf, 0x0.0) == 0x0.0 +; run: %fmax_f32(0x0.0, -Inf) == 0x0.0 +; run: %fmax_f32(+Inf, -Inf) == +Inf + +; F32 Epsilon / Max / Min Positive +; run: %fmax_f32(0x1.000002p-23, 0x1.000000p-23) == 0x1.000002p-23 +; run: %fmax_f32(0x1.fffffcp127, 0x1.fffffep127) == 0x1.fffffep127 +; run: %fmax_f32(0x1.000000p-126, 0x1.000000p-126) == 0x1.000000p-126 + +; F32 Subnormals +; run: %fmax_f32(0x0.800002p-126, 0x0.800000p-126) == 0x0.800002p-126 +; run: %fmax_f32(-0x0.800002p-126, -0x0.800000p-126) == -0x0.800000p-126 + +; F32 NaN's +; For NaN's this operation is specified as producing a value that is a NaN +; This behaviour differs from IEEE754's behaviour +function %fmax_is_nan_f32(f32, f32) -> i32 { +block0(v0: f32, v1: f32): + v2 = fmax v0, v1 + v3 = fcmp ne v2, v2 + v4 = bint.i32 v3 + return v4 +} +; run: %fmax_is_nan_f32(0x0.0, +NaN) == 1 +; run: %fmax_is_nan_f32(-NaN, 0x0.0) == 1 +; run: %fmax_is_nan_f32(0x0.0, +NaN:0x0) == 1 +; run: %fmax_is_nan_f32(0x0.0, +NaN:0x1) == 1 +; run: %fmax_is_nan_f32(0x0.0, +NaN:0x300001) == 1 +; run: %fmax_is_nan_f32(-NaN:0x0, 0x0.0) == 1 +; run: %fmax_is_nan_f32(-NaN:0x1, 0x0.0) == 1 +; run: %fmax_is_nan_f32(-NaN:0x300001, 0x0.0) == 1 +; run: %fmax_is_nan_f32(0x0.0, +sNaN:0x1) == 1 +; run: %fmax_is_nan_f32(-sNaN:0x1, 0x0.0) == 1 +; run: %fmax_is_nan_f32(0x0.0, +sNaN:0x200001) == 1 +; run: %fmax_is_nan_f32(-sNaN:0x200001, 0x0.0) == 1 + + + +function %fmax_f64(f64, f64) -> f64 { +block0(v0: f64, v1: f64): + v2 = fmax v0, v1 + return v2 +} +; run: %fmax_f64(0x1.0, 0x2.0) == 0x2.0 +; run: %fmax_f64(0x1.0p10, 0x1.0p11) == 0x1.0p11 + +; run: %fmax_f64(0x0.0, -0x0.0) == 0x0.0 +; run: %fmax_f64(-0x0.0, 0x0.0) == 0x0.0 +; run: %fmax_f64(+Inf, 0x0.0) == +Inf +; run: %fmax_f64(0x0.0, +Inf) == +Inf +; run: %fmax_f64(-Inf, 0x0.0) == 0x0.0 +; run: %fmax_f64(0x0.0, -Inf) == 0x0.0 +; run: %fmax_f64(+Inf, -Inf) == +Inf + +; F64 Epsilon / Max / Min Positive +; run: %fmax_f64(0x1.0000000000002p-52, 0x1.0000000000000p-52) == 0x1.0000000000002p-52 +; run: %fmax_f64(0x1.ffffffffffffcp1023, 0x1.fffffffffffffp1023) == 0x1.fffffffffffffp1023 +; run: %fmax_f64(0x1.0000000000000p-1022, 0x1.0000000000000p-1022) == 0x1.0000000000000p-1022 + +; F64 Subnormals +; run: %fmax_f64(0x0.8000000000002p-1022, 0x0.8000000000000p-1022) == 0x0.8000000000002p-1022 +; run: %fmax_f64(-0x0.8000000000002p-1022, -0x0.8000000000000p-1022) == -0x0.8000000000000p-1022 + + + +; F64 NaN's +; For NaN's this operation is specified as producing a value that is a NaN +; This behaviour differs from IEEE754's behaviour +function %fmax_is_nan_f64(f64, f64) -> i32 { +block0(v0: f64, v1: f64): + v2 = fmax v0, v1 + v3 = fcmp ne v2, v2 + v4 = bint.i32 v3 + return v4 +} +; run: %fmax_is_nan_f64(0x0.0, +NaN) == 1 +; run: %fmax_is_nan_f64(-NaN, 0x0.0) == 1 +; run: %fmax_is_nan_f64(0x0.0, +NaN:0x0) == 1 +; run: %fmax_is_nan_f64(0x0.0, +NaN:0x1) == 1 +; run: %fmax_is_nan_f64(0x0.0, +NaN:0x4000000000001) == 1 +; run: %fmax_is_nan_f64(-NaN:0x0, 0x0.0) == 1 +; run: %fmax_is_nan_f64(-NaN:0x1, 0x0.0) == 1 +; run: %fmax_is_nan_f64(-NaN:0x4000000000001, 0x0.0) == 1 +; run: %fmax_is_nan_f64(0x0.0, +sNaN:0x1) == 1 +; run: %fmax_is_nan_f64(-sNaN:0x1, 0x0.0) == 1 +; run: %fmax_is_nan_f64(0x0.0, +sNaN:0x4000000000001) == 1 +; run: %fmax_is_nan_f64(-sNaN:0x4000000000001, 0x0.0) == 1 diff --git a/cranelift/filetests/filetests/runtests/fmin.clif b/cranelift/filetests/filetests/runtests/fmin.clif new file mode 100644 index 0000000000..9f436f5458 --- /dev/null +++ b/cranelift/filetests/filetests/runtests/fmin.clif @@ -0,0 +1,105 @@ +test interpret +test run +target x86_64 +target aarch64 +target s390x + +function %fmin_f32(f32, f32) -> f32 { +block0(v0: f32, v1: f32): + v2 = fmin v0, v1 + return v2 +} +; run: %fmin_f32(0x1.0, 0x2.0) == 0x1.0 +; run: %fmin_f32(0x1.0p10, 0x1.0p11) == 0x1.0p10 + +; run: %fmin_f32(0x0.0, -0x0.0) == -0x0.0 +; run: %fmin_f32(-0x0.0, 0x0.0) == -0x0.0 +; run: %fmin_f32(+Inf, 0x0.0) == 0x0.0 +; run: %fmin_f32(0x0.0, +Inf) == 0x0.0 +; run: %fmin_f32(-Inf, 0x0.0) == -Inf +; run: %fmin_f32(0x0.0, -Inf) == -Inf +; run: %fmin_f32(+Inf, -Inf) == -Inf + +; F32 Epsilon / Max / Min Positive +; run: %fmin_f32(0x1.000002p-23, 0x1.000000p-23) == 0x1.000000p-23 +; run: %fmin_f32(0x1.fffffcp127, 0x1.fffffep127) == 0x1.fffffcp127 +; run: %fmin_f32(0x1.000000p-126, 0x1.000000p-126) == 0x1.000000p-126 + +; F32 Subnormals +; run: %fmin_f32(0x0.800002p-126, 0x0.800000p-126) == 0x0.800000p-126 +; run: %fmin_f32(-0x0.800002p-126, -0x0.800000p-126) == -0x0.800002p-126 + +; F32 NaN's +; For NaN's this operation is specified as producing a value that is a NaN +; This behaviour differs from IEEE754's behaviour +function %fmin_is_nan_f32(f32, f32) -> i32 { +block0(v0: f32, v1: f32): + v2 = fmin v0, v1 + v3 = fcmp ne v2, v2 + v4 = bint.i32 v3 + return v4 +} +; run: %fmin_is_nan_f32(0x0.0, +NaN) == 1 +; run: %fmin_is_nan_f32(-NaN, 0x0.0) == 1 +; run: %fmin_is_nan_f32(0x0.0, +NaN:0x0) == 1 +; run: %fmin_is_nan_f32(0x0.0, +NaN:0x1) == 1 +; run: %fmin_is_nan_f32(0x0.0, +NaN:0x300001) == 1 +; run: %fmin_is_nan_f32(-NaN:0x0, 0x0.0) == 1 +; run: %fmin_is_nan_f32(-NaN:0x1, 0x0.0) == 1 +; run: %fmin_is_nan_f32(-NaN:0x300001, 0x0.0) == 1 +; run: %fmin_is_nan_f32(0x0.0, +sNaN:0x1) == 1 +; run: %fmin_is_nan_f32(-sNaN:0x1, 0x0.0) == 1 +; run: %fmin_is_nan_f32(0x0.0, +sNaN:0x200001) == 1 +; run: %fmin_is_nan_f32(-sNaN:0x200001, 0x0.0) == 1 + + + +function %fmin_f64(f64, f64) -> f64 { +block0(v0: f64, v1: f64): + v2 = fmin v0, v1 + return v2 +} +; run: %fmin_f64(0x1.0, 0x2.0) == 0x1.0 +; run: %fmin_f64(0x1.0p10, 0x1.0p11) == 0x1.0p10 + +; run: %fmin_f64(0x0.0, -0x0.0) == -0x0.0 +; run: %fmin_f64(-0x0.0, 0x0.0) == -0x0.0 +; run: %fmin_f64(+Inf, 0x0.0) == 0x0.0 +; run: %fmin_f64(0x0.0, +Inf) == 0x0.0 +; run: %fmin_f64(-Inf, 0x0.0) == -Inf +; run: %fmin_f64(0x0.0, -Inf) == -Inf +; run: %fmin_f64(+Inf, -Inf) == -Inf + +; F64 Epsilon / Max / Min Positive +; run: %fmin_f64(0x1.0000000000002p-52, 0x1.0000000000000p-52) == 0x1.0000000000000p-52 +; run: %fmin_f64(0x1.ffffffffffffcp1023, 0x1.fffffffffffffp1023) == 0x1.ffffffffffffcp1023 +; run: %fmin_f64(0x1.0000000000000p-1022, 0x1.0000000000000p-1022) == 0x1.0000000000000p-1022 + +; F64 Subnormals +; run: %fmin_f64(0x0.8000000000002p-1022, 0x0.8000000000000p-1022) == 0x0.8000000000000p-1022 +; run: %fmin_f64(-0x0.8000000000002p-1022, -0x0.8000000000000p-1022) == -0x0.8000000000002p-1022 + + + +; F64 NaN's +; For NaN's this operation is specified as producing a value that is a NaN +; This behaviour differs from IEEE754's behaviour +function %fmin_is_nan_f64(f64, f64) -> i32 { +block0(v0: f64, v1: f64): + v2 = fmin v0, v1 + v3 = fcmp ne v2, v2 + v4 = bint.i32 v3 + return v4 +} +; run: %fmin_is_nan_f64(0x0.0, +NaN) == 1 +; run: %fmin_is_nan_f64(-NaN, 0x0.0) == 1 +; run: %fmin_is_nan_f64(0x0.0, +NaN:0x0) == 1 +; run: %fmin_is_nan_f64(0x0.0, +NaN:0x1) == 1 +; run: %fmin_is_nan_f64(0x0.0, +NaN:0x4000000000001) == 1 +; run: %fmin_is_nan_f64(-NaN:0x0, 0x0.0) == 1 +; run: %fmin_is_nan_f64(-NaN:0x1, 0x0.0) == 1 +; run: %fmin_is_nan_f64(-NaN:0x4000000000001, 0x0.0) == 1 +; run: %fmin_is_nan_f64(0x0.0, +sNaN:0x1) == 1 +; run: %fmin_is_nan_f64(-sNaN:0x1, 0x0.0) == 1 +; run: %fmin_is_nan_f64(0x0.0, +sNaN:0x4000000000001) == 1 +; run: %fmin_is_nan_f64(-sNaN:0x4000000000001, 0x0.0) == 1 diff --git a/cranelift/interpreter/src/step.rs b/cranelift/interpreter/src/step.rs index b43b86a925..cf045673f8 100644 --- a/cranelift/interpreter/src/step.rs +++ b/cranelift/interpreter/src/step.rs @@ -698,17 +698,21 @@ where Opcode::Fneg => assign(Value::neg(arg(0)?)?), Opcode::Fabs => assign(Value::abs(arg(0)?)?), Opcode::Fcopysign => binary(Value::copysign, arg(0)?, arg(1)?)?, - Opcode::Fmin => choose( - Value::is_nan(&arg(0)?)? || Value::lt(&arg(0)?, &arg(1)?)?, - arg(0)?, - arg(1)?, - ), + Opcode::Fmin => assign(match (arg(0)?, arg(1)?) { + (a, _) if a.is_nan()? => a, + (_, b) if b.is_nan()? => b, + (a, b) if a.is_zero()? && b.is_zero()? && a.is_negative()? => a, + (a, b) if a.is_zero()? && b.is_zero()? && b.is_negative()? => b, + (a, b) => a.min(b)?, + }), Opcode::FminPseudo => unimplemented!("FminPseudo"), - Opcode::Fmax => choose( - Value::is_nan(&arg(0)?)? || Value::gt(&arg(0)?, &arg(1)?)?, - arg(0)?, - arg(1)?, - ), + Opcode::Fmax => assign(match (arg(0)?, arg(1)?) { + (a, _) if a.is_nan()? => a, + (_, b) if b.is_nan()? => b, + (a, b) if a.is_zero()? && b.is_zero()? && a.is_negative()? => b, + (a, b) if a.is_zero()? && b.is_zero()? && b.is_negative()? => a, + (a, b) => a.max(b)?, + }), Opcode::FmaxPseudo => unimplemented!("FmaxPseudo"), Opcode::Ceil => unimplemented!("Ceil"), Opcode::Floor => unimplemented!("Floor"), diff --git a/cranelift/interpreter/src/value.rs b/cranelift/interpreter/src/value.rs index 9fe7baf531..fde4659bda 100644 --- a/cranelift/interpreter/src/value.rs +++ b/cranelift/interpreter/src/value.rs @@ -26,6 +26,9 @@ pub trait Value: Clone + From { fn convert(self, kind: ValueConversionKind) -> ValueResult; fn concat(self, other: Self) -> ValueResult; + fn is_negative(&self) -> ValueResult; + fn is_zero(&self) -> ValueResult; + fn max(self, other: Self) -> ValueResult; fn min(self, other: Self) -> ValueResult; @@ -394,6 +397,22 @@ impl Value for DataValue { } } + fn is_negative(&self) -> ValueResult { + match self { + DataValue::F32(f) => Ok(f.is_negative()), + DataValue::F64(f) => Ok(f.is_negative()), + _ => Err(ValueError::InvalidType(ValueTypeClass::Float, self.ty())), + } + } + + fn is_zero(&self) -> ValueResult { + match self { + DataValue::F32(f) => Ok(f.is_zero()), + DataValue::F64(f) => Ok(f.is_zero()), + _ => Err(ValueError::InvalidType(ValueTypeClass::Float, self.ty())), + } + } + fn max(self, other: Self) -> ValueResult { if Value::gt(&self, &other)? { Ok(self)