Browse Source

Fix SIMD float comparison encoding (#1285)

The Intel manual uses `CMPNLT` and `CMPNLE` to denote not-less-than and not-less-than-or-equals. These were translated previously to `FloatCC::GreaterThan` and `FloatCC::GreaterThanOrEqual` but should be correctly translated to `FloatCC::UnorderedOrGreaterThanOrEqual` and `FloatCC::UnorderedOrGreaterThan`. This change adds the necessary legalizations to make use of these new encodings.
pull/1019/head
Andrew Brown 5 years ago
committed by GitHub
parent
commit
6fe86bcb61
  1. 20
      cranelift/codegen/meta/src/isa/x86/legalize.rs
  2. 18
      cranelift/codegen/meta/src/isa/x86/recipes.rs
  3. 8
      cranelift/filetests/filetests/isa/x86/simd-comparison-binemit.clif
  4. 14
      cranelift/filetests/filetests/isa/x86/simd-comparison-run.clif

20
cranelift/codegen/meta/src/isa/x86/legalize.rs

@ -552,6 +552,26 @@ pub(crate) fn define(shared: &mut SharedDefinitions, x86_instructions: &Instruct
narrow.legalize(def!(c = icmp_(ule, a, b)), vec![def!(c = icmp(uge, b, a))]);
}
// SIMD fcmp greater-/less-than
let gt = Literal::enumerator_for(&imm.floatcc, "gt");
let lt = Literal::enumerator_for(&imm.floatcc, "lt");
let ge = Literal::enumerator_for(&imm.floatcc, "ge");
let le = Literal::enumerator_for(&imm.floatcc, "le");
let ugt = Literal::enumerator_for(&imm.floatcc, "ugt");
let ult = Literal::enumerator_for(&imm.floatcc, "ult");
let uge = Literal::enumerator_for(&imm.floatcc, "uge");
let ule = Literal::enumerator_for(&imm.floatcc, "ule");
for ty in &[F32, F64] {
let fcmp_ = fcmp.bind(vector(*ty, sse_vector_size));
narrow.legalize(def!(c = fcmp_(gt, a, b)), vec![def!(c = fcmp(lt, b, a))]);
let fcmp_ = fcmp.bind(vector(*ty, sse_vector_size));
narrow.legalize(def!(c = fcmp_(ge, a, b)), vec![def!(c = fcmp(le, b, a))]);
let fcmp_ = fcmp.bind(vector(*ty, sse_vector_size));
narrow.legalize(def!(c = fcmp_(ult, a, b)), vec![def!(c = fcmp(ugt, b, a))]);
let fcmp_ = fcmp.bind(vector(*ty, sse_vector_size));
narrow.legalize(def!(c = fcmp_(ule, a, b)), vec![def!(c = fcmp(uge, b, a))]);
}
for ty in &[F32, F64] {
let fneg = fneg.bind(vector(*ty, sse_vector_size));
let lane_type_as_int = LaneType::int_from_bits(LaneType::from(*ty).lane_bits() as u16);

18
cranelift/codegen/meta/src/isa/x86/recipes.rs

@ -3169,7 +3169,7 @@ pub(crate) fn define<'shared>(
);
{
let supported_floatccs: Vec<Literal> = ["eq", "lt", "le", "uno", "ne", "gt", "ge", "ord"]
let supported_floatccs: Vec<Literal> = ["eq", "lt", "le", "uno", "ne", "uge", "ugt", "ord"]
.iter()
.map(|name| Literal::enumerator_for(floatcc, name))
.collect();
@ -3189,14 +3189,14 @@ pub(crate) fn define<'shared>(
// Add immediate byte indicating what type of comparison.
use crate::ir::condcodes::FloatCC::*;
let imm = match cond {
Equal => 0x00,
LessThan => 0x01,
LessThanOrEqual => 0x02,
Unordered => 0x03,
NotEqual => 0x04,
GreaterThanOrEqual => 0x05,
GreaterThan => 0x06,
Ordered => 0x07,
Equal => 0x00,
LessThan => 0x01,
LessThanOrEqual => 0x02,
Unordered => 0x03,
NotEqual => 0x04,
UnorderedOrGreaterThanOrEqual => 0x05,
UnorderedOrGreaterThan => 0x06,
Ordered => 0x07,
_ => panic!("{} not supported by pfcmp", cond),
};
sink.put1(imm);

8
cranelift/filetests/filetests/isa/x86/simd-comparison-binemit.clif

@ -60,8 +60,8 @@ ebb0(v0: f32x4 [%xmm2], v1: f32x4 [%xmm4]):
[-, %xmm2] v4 = fcmp le v0, v1 ; bin: 40 0f c2 d4 02
[-, %xmm2] v5 = fcmp uno v0, v1 ; bin: 40 0f c2 d4 03
[-, %xmm2] v6 = fcmp ne v0, v1 ; bin: 40 0f c2 d4 04
[-, %xmm2] v7 = fcmp ge v0, v1 ; bin: 40 0f c2 d4 05
[-, %xmm2] v8 = fcmp gt v0, v1 ; bin: 40 0f c2 d4 06
[-, %xmm2] v7 = fcmp uge v0, v1 ; bin: 40 0f c2 d4 05
[-, %xmm2] v8 = fcmp ugt v0, v1 ; bin: 40 0f c2 d4 06
[-, %xmm2] v9 = fcmp ord v0, v1 ; bin: 40 0f c2 d4 07
return
}
@ -73,8 +73,8 @@ ebb0(v0: f64x2 [%xmm2], v1: f64x2 [%xmm0]):
[-, %xmm2] v4 = fcmp le v0, v1 ; bin: 66 40 0f c2 d0 02
[-, %xmm2] v5 = fcmp uno v0, v1 ; bin: 66 40 0f c2 d0 03
[-, %xmm2] v6 = fcmp ne v0, v1 ; bin: 66 40 0f c2 d0 04
[-, %xmm2] v7 = fcmp ge v0, v1 ; bin: 66 40 0f c2 d0 05
[-, %xmm2] v8 = fcmp gt v0, v1 ; bin: 66 40 0f c2 d0 06
[-, %xmm2] v7 = fcmp uge v0, v1 ; bin: 66 40 0f c2 d0 05
[-, %xmm2] v8 = fcmp ugt v0, v1 ; bin: 66 40 0f c2 d0 06
[-, %xmm2] v9 = fcmp ord v0, v1 ; bin: 66 40 0f c2 d0 07
return
}

14
cranelift/filetests/filetests/isa/x86/simd-comparison-run.clif

@ -231,3 +231,17 @@ ebb0:
return v8
}
; run
function %fcmp_gt_nans_f32x4() -> b1 {
ebb0:
v0 = vconst.f32x4 [NaN 0x42.0 -NaN NaN]
v1 = vconst.f32x4 [NaN NaN 0x42.0 Inf]
v2 = fcmp gt v0, v1
; now check that the result v2 is all zeroes
v3 = vconst.i32x4 0x00
v4 = raw_bitcast.i32x4 v2
v5 = icmp eq v3, v4
v8 = vall_true v5
return v8
}
; run

Loading…
Cancel
Save