Browse Source

[x64] Add the new i64x2 comparisons

pull/2698/head
Andrew Brown 4 years ago
parent
commit
d730f18a78
  1. 7
      build.rs
  2. 41
      cranelift/codegen/src/isa/x64/lower.rs
  3. 25
      cranelift/wasm/src/code_translator.rs

7
build.rs

@ -182,7 +182,6 @@ fn experimental_x64_should_panic(testsuite: &str, testname: &str, strategy: &str
}
match (testsuite, testname) {
("simd", "simd_i64x2_cmp") => return true,
("simd", "simd_i8x16_arith2") => return true, // Unsupported feature: proposed simd operator I8x16Popcnt
("simd", "simd_i64x2_arith2") => return true, // Unsupported feature: proposed simd operator I64x2Abs
("simd", "simd_conversions") => return true, // unknown operator or unexpected token: tests/spec_testsuite/proposals/simd/simd_conversions.wast:724:6
@ -231,8 +230,7 @@ fn ignore(testsuite: &str, testname: &str, strategy: &str) -> bool {
("simd", "simd_boolean") | ("simd", "simd_lane") => return true,
// These are new instructions that are not really implemented in any backend.
("simd", "simd_i64x2_cmp")
| ("simd", "simd_i8x16_arith2")
("simd", "simd_i8x16_arith2")
| ("simd", "simd_i64x2_arith2")
| ("simd", "simd_conversions")
| ("simd", "simd_i16x8_extadd_pairwise_i8x16")
@ -252,6 +250,9 @@ fn ignore(testsuite: &str, testname: &str, strategy: &str) -> bool {
| ("simd", "simd_store64_lane")
| ("simd", "simd_store8_lane") => return true,
// These are only implemented on x64.
("simd", "simd_i64x2_cmp") => return !cfg!(feature = "experimental_x64"),
// These are only implemented on aarch64 and x64.
("simd", "simd_f32x4_pmin_pmax")
| ("simd", "simd_f64x2_pmin_pmax")

41
cranelift/codegen/src/isa/x64/lower.rs

@ -3185,11 +3185,27 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
),
};
// Here we decide which operand to use as the read/write `dst` (ModRM reg field)
// and which to use as the read `input` (ModRM r/m field). In the normal case we
// use Cranelift's first operand, the `lhs`, as `dst` but we flip the operands for
// the less-than cases so that we can reuse the greater-than implementation.
// Here we decide which operand to use as the read/write `dst` (ModRM reg field) and
// which to use as the read `input` (ModRM r/m field). In the normal case we use
// Cranelift's first operand, the `lhs`, as `dst` but we flip the operands for the
// less-than cases so that we can reuse the greater-than implementation.
//
// In a surprising twist, the operands for i64x2 `gte`/`sle` must also be flipped
// from the normal order because of the special-case lowering for these instructions
// (i.e. we use PCMPGTQ with flipped operands and negate the result).
let input = match condcode {
IntCC::SignedLessThanOrEqual if ty == types::I64X2 => {
let lhs = put_input_in_reg(ctx, inputs[0]);
let rhs = input_to_reg_mem(ctx, inputs[1]);
ctx.emit(Inst::gen_move(dst, lhs, ty));
rhs
}
IntCC::SignedGreaterThanOrEqual if ty == types::I64X2 => {
let lhs = input_to_reg_mem(ctx, inputs[0]);
let rhs = put_input_in_reg(ctx, inputs[1]);
ctx.emit(Inst::gen_move(dst, rhs, ty));
lhs
}
IntCC::SignedLessThan
| IntCC::SignedLessThanOrEqual
| IntCC::UnsignedLessThan
@ -3220,10 +3236,25 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
IntCC::SignedGreaterThan | IntCC::SignedLessThan => {
ctx.emit(Inst::xmm_rm_r(gt(ty), input, dst))
}
IntCC::SignedGreaterThanOrEqual | IntCC::SignedLessThanOrEqual => {
IntCC::SignedGreaterThanOrEqual | IntCC::SignedLessThanOrEqual
if ty != types::I64X2 =>
{
ctx.emit(Inst::xmm_rm_r(mins(ty), input.clone(), dst));
ctx.emit(Inst::xmm_rm_r(eq(ty), input, dst))
}
IntCC::SignedGreaterThanOrEqual | IntCC::SignedLessThanOrEqual
if ty == types::I64X2 =>
{
// The PMINS* instruction is only available in AVX512VL/F so we must instead
// compare with flipped operands and negate the result (emitting one more
// instruction).
ctx.emit(Inst::xmm_rm_r(gt(ty), input, dst));
// Emit all 1s into the `tmp` register.
let tmp = ctx.alloc_tmp(ty).only_reg().unwrap();
ctx.emit(Inst::xmm_rm_r(eq(ty), RegMem::from(tmp), tmp));
// Invert the result of the `PCMPGT*`.
ctx.emit(Inst::xmm_rm_r(SseOpcode::Pxor, RegMem::from(tmp), dst));
}
IntCC::UnsignedGreaterThan | IntCC::UnsignedLessThan => {
ctx.emit(Inst::xmm_rm_r(maxu(ty), input.clone(), dst));
ctx.emit(Inst::xmm_rm_r(eq(ty), input, dst));

25
cranelift/wasm/src/code_translator.rs

@ -1642,16 +1642,16 @@ pub fn translate_operator<FE: FuncEnvironment + ?Sized>(
let a = pop1_with_bitcast(state, type_of(op), builder);
state.push1(builder.ins().vhigh_bits(I32, a));
}
Operator::I8x16Eq | Operator::I16x8Eq | Operator::I32x4Eq => {
Operator::I8x16Eq | Operator::I16x8Eq | Operator::I32x4Eq | Operator::I64x2Eq => {
translate_vector_icmp(IntCC::Equal, type_of(op), builder, state)
}
Operator::I8x16Ne | Operator::I16x8Ne | Operator::I32x4Ne => {
Operator::I8x16Ne | Operator::I16x8Ne | Operator::I32x4Ne | Operator::I64x2Ne => {
translate_vector_icmp(IntCC::NotEqual, type_of(op), builder, state)
}
Operator::I8x16GtS | Operator::I16x8GtS | Operator::I32x4GtS => {
Operator::I8x16GtS | Operator::I16x8GtS | Operator::I32x4GtS | Operator::I64x2GtS => {
translate_vector_icmp(IntCC::SignedGreaterThan, type_of(op), builder, state)
}
Operator::I8x16LtS | Operator::I16x8LtS | Operator::I32x4LtS => {
Operator::I8x16LtS | Operator::I16x8LtS | Operator::I32x4LtS | Operator::I64x2LtS => {
translate_vector_icmp(IntCC::SignedLessThan, type_of(op), builder, state)
}
Operator::I8x16GtU | Operator::I16x8GtU | Operator::I32x4GtU => {
@ -1660,10 +1660,10 @@ pub fn translate_operator<FE: FuncEnvironment + ?Sized>(
Operator::I8x16LtU | Operator::I16x8LtU | Operator::I32x4LtU => {
translate_vector_icmp(IntCC::UnsignedLessThan, type_of(op), builder, state)
}
Operator::I8x16GeS | Operator::I16x8GeS | Operator::I32x4GeS => {
Operator::I8x16GeS | Operator::I16x8GeS | Operator::I32x4GeS | Operator::I64x2GeS => {
translate_vector_icmp(IntCC::SignedGreaterThanOrEqual, type_of(op), builder, state)
}
Operator::I8x16LeS | Operator::I16x8LeS | Operator::I32x4LeS => {
Operator::I8x16LeS | Operator::I16x8LeS | Operator::I32x4LeS | Operator::I64x2LeS => {
translate_vector_icmp(IntCC::SignedLessThanOrEqual, type_of(op), builder, state)
}
Operator::I8x16GeU | Operator::I16x8GeU | Operator::I32x4GeU => translate_vector_icmp(
@ -1852,12 +1852,6 @@ pub fn translate_operator<FE: FuncEnvironment + ?Sized>(
| Operator::I64x2ExtMulHighI32x4S
| Operator::I64x2ExtMulLowI32x4U
| Operator::I64x2ExtMulHighI32x4U
| Operator::I64x2Eq
| Operator::I64x2Ne
| Operator::I64x2LtS
| Operator::I64x2GtS
| Operator::I64x2LeS
| Operator::I64x2GeS
| Operator::I64x2Abs
| Operator::I64x2AllTrue
| Operator::I16x8ExtAddPairwiseI8x16S
@ -2646,7 +2640,14 @@ fn type_of(operator: &Operator) -> Type {
| Operator::V128Load64Splat { .. }
| Operator::I64x2ExtractLane { .. }
| Operator::I64x2ReplaceLane { .. }
| Operator::I64x2Eq
| Operator::I64x2Ne
| Operator::I64x2LtS
| Operator::I64x2GtS
| Operator::I64x2LeS
| Operator::I64x2GeS
| Operator::I64x2Neg
| Operator::I64x2Abs
| Operator::I64x2Shl
| Operator::I64x2ShrS
| Operator::I64x2ShrU

Loading…
Cancel
Save