Browse Source

Replace `x86_packss` with `snarrow`

Since the Wasm specification contains narrowing instructions (see https://github.com/WebAssembly/simd/blob/master/proposals/simd/SIMD.md#integer-to-integer-narrowing) that lower to PACKSS*, the x86-specific instruction is not necessary in the CLIF IR.
pull/1970/head
Andrew Brown 4 years ago
parent
commit
65e6de2344
  1. 6
      cranelift/codegen/meta/src/isa/x86/encodings.rs
  2. 29
      cranelift/codegen/meta/src/isa/x86/instructions.rs
  3. 4
      cranelift/codegen/meta/src/isa/x86/legalize.rs
  4. 31
      cranelift/codegen/meta/src/shared/instructions.rs
  5. 4
      cranelift/codegen/src/isa/aarch64/lower_inst.rs
  6. 2
      cranelift/filetests/filetests/isa/x86/simd-bitwise-legalize.clif
  7. 4
      cranelift/filetests/filetests/isa/x86/simd-lane-access-binemit.clif
  8. 16
      cranelift/filetests/filetests/isa/x86/simd-lane-access-run.clif

6
cranelift/codegen/meta/src/isa/x86/encodings.rs

@ -1676,6 +1676,7 @@ fn define_simd(
let uload16x4_complex = shared.by_name("uload16x4_complex"); let uload16x4_complex = shared.by_name("uload16x4_complex");
let uload32x2 = shared.by_name("uload32x2"); let uload32x2 = shared.by_name("uload32x2");
let uload32x2_complex = shared.by_name("uload32x2_complex"); let uload32x2_complex = shared.by_name("uload32x2_complex");
let snarrow = shared.by_name("snarrow");
let ushr_imm = shared.by_name("ushr_imm"); let ushr_imm = shared.by_name("ushr_imm");
let usub_sat = shared.by_name("usub_sat"); let usub_sat = shared.by_name("usub_sat");
let vconst = shared.by_name("vconst"); let vconst = shared.by_name("vconst");
@ -1686,7 +1687,6 @@ fn define_simd(
let x86_fmin = x86.by_name("x86_fmin"); let x86_fmin = x86.by_name("x86_fmin");
let x86_movlhps = x86.by_name("x86_movlhps"); let x86_movlhps = x86.by_name("x86_movlhps");
let x86_movsd = x86.by_name("x86_movsd"); let x86_movsd = x86.by_name("x86_movsd");
let x86_packss = x86.by_name("x86_packss");
let x86_pblendw = x86.by_name("x86_pblendw"); let x86_pblendw = x86.by_name("x86_pblendw");
let x86_pextr = x86.by_name("x86_pextr"); let x86_pextr = x86.by_name("x86_pextr");
let x86_pinsr = x86.by_name("x86_pinsr"); let x86_pinsr = x86.by_name("x86_pinsr");
@ -1901,8 +1901,8 @@ fn define_simd(
); );
} }
for (ty, opcodes) in &[(I16, &PACKSSWB), (I32, &PACKSSDW)] { for (ty, opcodes) in &[(I16, &PACKSSWB), (I32, &PACKSSDW)] {
let x86_packss = x86_packss.bind(vector(*ty, sse_vector_size)); let snarrow = snarrow.bind(vector(*ty, sse_vector_size));
e.enc_both_inferred(x86_packss, rec_fa.opcodes(*opcodes)); e.enc_both_inferred(snarrow, rec_fa.opcodes(*opcodes));
} }
// SIMD bitcast all 128-bit vectors to each other (for legalizing splat.x16x8). // SIMD bitcast all 128-bit vectors to each other (for legalizing splat.x16x8).

29
cranelift/codegen/meta/src/isa/x86/instructions.rs

@ -454,35 +454,6 @@ pub(crate) fn define(
.operands_out(vec![a]), .operands_out(vec![a]),
); );
let I16xN = &TypeVar::new(
"I16xN",
"A SIMD vector type containing integers 16-bits wide and up",
TypeSetBuilder::new()
.ints(16..32)
.simd_lanes(4..8)
.includes_scalars(false)
.build(),
);
let x = &Operand::new("x", I16xN);
let y = &Operand::new("y", I16xN);
let a = &Operand::new("a", &I16xN.split_lanes());
ig.push(
Inst::new(
"x86_packss",
r#"
Convert packed signed integers the lanes of ``x`` and ``y`` into half-width integers, using
signed saturation to handle overflows. For example, with notional i16x2 vectors, where
``x = [x1, x0]`` and ``y = [y1, y0]``, this operation would result in
``a = [y1', y0', x1', x0']`` (using the Intel manual's right-to-left lane ordering).
"#,
&formats.binary,
)
.operands_in(vec![x, y])
.operands_out(vec![a]),
);
let x = &Operand::new("x", FxN); let x = &Operand::new("x", FxN);
let y = &Operand::new("y", FxN); let y = &Operand::new("y", FxN);
let a = &Operand::new("a", FxN); let a = &Operand::new("a", FxN);

4
cranelift/codegen/meta/src/isa/x86/legalize.rs

@ -405,6 +405,7 @@ fn define_simd(
let uadd_sat = insts.by_name("uadd_sat"); let uadd_sat = insts.by_name("uadd_sat");
let umax = insts.by_name("umax"); let umax = insts.by_name("umax");
let umin = insts.by_name("umin"); let umin = insts.by_name("umin");
let snarrow = insts.by_name("snarrow");
let ushr_imm = insts.by_name("ushr_imm"); let ushr_imm = insts.by_name("ushr_imm");
let ushr = insts.by_name("ushr"); let ushr = insts.by_name("ushr");
let vconst = insts.by_name("vconst"); let vconst = insts.by_name("vconst");
@ -412,7 +413,6 @@ fn define_simd(
let vany_true = insts.by_name("vany_true"); let vany_true = insts.by_name("vany_true");
let vselect = insts.by_name("vselect"); let vselect = insts.by_name("vselect");
let x86_packss = x86_instructions.by_name("x86_packss");
let x86_pmaxs = x86_instructions.by_name("x86_pmaxs"); let x86_pmaxs = x86_instructions.by_name("x86_pmaxs");
let x86_pmaxu = x86_instructions.by_name("x86_pmaxu"); let x86_pmaxu = x86_instructions.by_name("x86_pmaxu");
let x86_pmins = x86_instructions.by_name("x86_pmins"); let x86_pmins = x86_instructions.by_name("x86_pmins");
@ -575,7 +575,7 @@ fn define_simd(
def!(g = raw_bitcast_i16x8_again(f)), def!(g = raw_bitcast_i16x8_again(f)),
def!(h = x86_psra(g, b)), def!(h = x86_psra(g, b)),
// Re-pack the vector. // Re-pack the vector.
def!(z = x86_packss(e, h)), def!(z = snarrow(e, h)),
], ],
); );
} }

31
cranelift/codegen/meta/src/shared/instructions.rs

@ -3883,6 +3883,37 @@ pub(crate) fn define(
.constraints(vec![WiderOrEq(Int.clone(), IntTo.clone())]), .constraints(vec![WiderOrEq(Int.clone(), IntTo.clone())]),
); );
let I16xN = &TypeVar::new(
"I16xN",
"A SIMD vector type containing integers 16-bits wide and up",
TypeSetBuilder::new()
.ints(16..32)
.simd_lanes(4..8)
.includes_scalars(false)
.build(),
);
let x = &Operand::new("x", I16xN);
let y = &Operand::new("y", I16xN);
let a = &Operand::new("a", &I16xN.split_lanes());
ig.push(
Inst::new(
"snarrow",
r#"
Combine `x` and `y` into a vector with twice the lanes but half the integer width while
saturating overflowing values to the signed maximum and minimum.
The lanes will be concatenated after narrowing. For example, when `x` and `y` are `i32x4`
and `x = [x3, x2, x1, x0]` and `y = [y3, y2, y1, y0]`, then after narrowing the value
returned is an `i16x8`: `a = [y3', y2', y1', y0', x3', x2', x1', x0']`.
"#,
&formats.binary,
)
.operands_in(vec![x, y])
.operands_out(vec![a]),
);
let IntTo = &TypeVar::new( let IntTo = &TypeVar::new(
"IntTo", "IntTo",
"A larger integer type with the same number of lanes", "A larger integer type with the same number of lanes",

4
cranelift/codegen/src/isa/aarch64/lower_inst.rs

@ -2060,7 +2060,6 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
| Opcode::X86Pminu | Opcode::X86Pminu
| Opcode::X86Pmullq | Opcode::X86Pmullq
| Opcode::X86Pmuludq | Opcode::X86Pmuludq
| Opcode::X86Packss
| Opcode::X86Punpckh | Opcode::X86Punpckh
| Opcode::X86Punpckl | Opcode::X86Punpckl
| Opcode::X86Vcvtudq2ps | Opcode::X86Vcvtudq2ps
@ -2069,8 +2068,9 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
panic!("x86-specific opcode in supposedly arch-neutral IR!"); panic!("x86-specific opcode in supposedly arch-neutral IR!");
} }
Opcode::Iabs => unimplemented!(),
Opcode::AvgRound => unimplemented!(), Opcode::AvgRound => unimplemented!(),
Opcode::Iabs => unimplemented!(),
Opcode::Snarrow => unimplemented!(),
Opcode::TlsValue => unimplemented!(), Opcode::TlsValue => unimplemented!(),
} }

2
cranelift/filetests/filetests/isa/x86/simd-bitwise-legalize.clif

@ -34,7 +34,7 @@ block0:
; nextln: v9 = raw_bitcast.i16x8 v8 ; nextln: v9 = raw_bitcast.i16x8 v8
; nextln: v10 = x86_psra v9, v4 ; nextln: v10 = x86_psra v9, v4
; nextln: v2 = x86_packss v7, v10 ; nextln: v2 = snarrow v7, v10
return v2 return v2
} }

4
cranelift/filetests/filetests/isa/x86/simd-lane-access-binemit.clif

@ -118,8 +118,8 @@ block0(v0: i32x4 [%xmm7], v1: i32x4 [%xmm6]):
return return
} }
function %packss_i16x8(i16x8, i16x8) { function %snarrow_i16x8(i16x8, i16x8) {
block0(v0: i16x8 [%xmm7], v1: i16x8 [%xmm8]): block0(v0: i16x8 [%xmm7], v1: i16x8 [%xmm8]):
[-, %xmm7] v2 = x86_packss v0, v1 ; bin: 66 41 0f 63 f8 [-, %xmm7] v2 = snarrow v0, v1 ; bin: 66 41 0f 63 f8
return return
} }

16
cranelift/filetests/filetests/isa/x86/simd-lane-access-run.clif

@ -206,15 +206,9 @@ block0:
} }
; run ; run
function %pack() -> b1 { function %snarrow(i32x4, i32x4) -> i16x8 {
block0: block0(v0: i32x4, v1: i32x4):
v0 = vconst.i32x4 [0 1 -1 0x0001ffff] v2 = snarrow v0, v1
v1 = vconst.i32x4 [4 5 -6 0xffffffff] return v2
v2 = x86_packss v0, v1
v3 = vconst.i16x8 [0 1 -1 0x7fff 4 5 -6 0xffff]
v4 = icmp eq v2, v3
v5 = vall_true v4
return v5
} }
; run ; run: %snarrow([0 1 -1 0x0001ffff], [4 5 -6 0xffffffff]) == [0 1 -1 0x7fff 4 5 -6 0xffff]

Loading…
Cancel
Save