From 65e6de234437b54c1f13697aa2c817acc2c6ef7a Mon Sep 17 00:00:00 2001 From: Andrew Brown Date: Wed, 1 Jul 2020 10:30:43 -0700 Subject: [PATCH] Replace `x86_packss` with `snarrow` Since the Wasm specification contains narrowing instructions (see https://github.com/WebAssembly/simd/blob/master/proposals/simd/SIMD.md#integer-to-integer-narrowing) that lower to PACKSS*, the x86-specific instruction is not necessary in the CLIF IR. --- .../codegen/meta/src/isa/x86/encodings.rs | 6 ++-- .../codegen/meta/src/isa/x86/instructions.rs | 29 ----------------- .../codegen/meta/src/isa/x86/legalize.rs | 4 +-- .../codegen/meta/src/shared/instructions.rs | 31 +++++++++++++++++++ .../codegen/src/isa/aarch64/lower_inst.rs | 4 +-- .../isa/x86/simd-bitwise-legalize.clif | 2 +- .../isa/x86/simd-lane-access-binemit.clif | 4 +-- .../isa/x86/simd-lane-access-run.clif | 16 +++------- 8 files changed, 46 insertions(+), 50 deletions(-) diff --git a/cranelift/codegen/meta/src/isa/x86/encodings.rs b/cranelift/codegen/meta/src/isa/x86/encodings.rs index 303b1bfaeb..a1d4de8ca5 100644 --- a/cranelift/codegen/meta/src/isa/x86/encodings.rs +++ b/cranelift/codegen/meta/src/isa/x86/encodings.rs @@ -1676,6 +1676,7 @@ fn define_simd( let uload16x4_complex = shared.by_name("uload16x4_complex"); let uload32x2 = shared.by_name("uload32x2"); let uload32x2_complex = shared.by_name("uload32x2_complex"); + let snarrow = shared.by_name("snarrow"); let ushr_imm = shared.by_name("ushr_imm"); let usub_sat = shared.by_name("usub_sat"); let vconst = shared.by_name("vconst"); @@ -1686,7 +1687,6 @@ fn define_simd( let x86_fmin = x86.by_name("x86_fmin"); let x86_movlhps = x86.by_name("x86_movlhps"); let x86_movsd = x86.by_name("x86_movsd"); - let x86_packss = x86.by_name("x86_packss"); let x86_pblendw = x86.by_name("x86_pblendw"); let x86_pextr = x86.by_name("x86_pextr"); let x86_pinsr = x86.by_name("x86_pinsr"); @@ -1901,8 +1901,8 @@ fn define_simd( ); } for (ty, opcodes) in &[(I16, &PACKSSWB), (I32, &PACKSSDW)] { - let x86_packss = x86_packss.bind(vector(*ty, sse_vector_size)); - e.enc_both_inferred(x86_packss, rec_fa.opcodes(*opcodes)); + let snarrow = snarrow.bind(vector(*ty, sse_vector_size)); + e.enc_both_inferred(snarrow, rec_fa.opcodes(*opcodes)); } // SIMD bitcast all 128-bit vectors to each other (for legalizing splat.x16x8). diff --git a/cranelift/codegen/meta/src/isa/x86/instructions.rs b/cranelift/codegen/meta/src/isa/x86/instructions.rs index 4afbc88747..0e48784f23 100644 --- a/cranelift/codegen/meta/src/isa/x86/instructions.rs +++ b/cranelift/codegen/meta/src/isa/x86/instructions.rs @@ -454,35 +454,6 @@ pub(crate) fn define( .operands_out(vec![a]), ); - let I16xN = &TypeVar::new( - "I16xN", - "A SIMD vector type containing integers 16-bits wide and up", - TypeSetBuilder::new() - .ints(16..32) - .simd_lanes(4..8) - .includes_scalars(false) - .build(), - ); - - let x = &Operand::new("x", I16xN); - let y = &Operand::new("y", I16xN); - let a = &Operand::new("a", &I16xN.split_lanes()); - - ig.push( - Inst::new( - "x86_packss", - r#" - Convert packed signed integers the lanes of ``x`` and ``y`` into half-width integers, using - signed saturation to handle overflows. For example, with notional i16x2 vectors, where - ``x = [x1, x0]`` and ``y = [y1, y0]``, this operation would result in - ``a = [y1', y0', x1', x0']`` (using the Intel manual's right-to-left lane ordering). - "#, - &formats.binary, - ) - .operands_in(vec![x, y]) - .operands_out(vec![a]), - ); - let x = &Operand::new("x", FxN); let y = &Operand::new("y", FxN); let a = &Operand::new("a", FxN); diff --git a/cranelift/codegen/meta/src/isa/x86/legalize.rs b/cranelift/codegen/meta/src/isa/x86/legalize.rs index 51453322e9..30c6789dff 100644 --- a/cranelift/codegen/meta/src/isa/x86/legalize.rs +++ b/cranelift/codegen/meta/src/isa/x86/legalize.rs @@ -405,6 +405,7 @@ fn define_simd( let uadd_sat = insts.by_name("uadd_sat"); let umax = insts.by_name("umax"); let umin = insts.by_name("umin"); + let snarrow = insts.by_name("snarrow"); let ushr_imm = insts.by_name("ushr_imm"); let ushr = insts.by_name("ushr"); let vconst = insts.by_name("vconst"); @@ -412,7 +413,6 @@ fn define_simd( let vany_true = insts.by_name("vany_true"); let vselect = insts.by_name("vselect"); - let x86_packss = x86_instructions.by_name("x86_packss"); let x86_pmaxs = x86_instructions.by_name("x86_pmaxs"); let x86_pmaxu = x86_instructions.by_name("x86_pmaxu"); let x86_pmins = x86_instructions.by_name("x86_pmins"); @@ -575,7 +575,7 @@ fn define_simd( def!(g = raw_bitcast_i16x8_again(f)), def!(h = x86_psra(g, b)), // Re-pack the vector. - def!(z = x86_packss(e, h)), + def!(z = snarrow(e, h)), ], ); } diff --git a/cranelift/codegen/meta/src/shared/instructions.rs b/cranelift/codegen/meta/src/shared/instructions.rs index fb91ae0ae9..bad56b5f27 100644 --- a/cranelift/codegen/meta/src/shared/instructions.rs +++ b/cranelift/codegen/meta/src/shared/instructions.rs @@ -3883,6 +3883,37 @@ pub(crate) fn define( .constraints(vec![WiderOrEq(Int.clone(), IntTo.clone())]), ); + let I16xN = &TypeVar::new( + "I16xN", + "A SIMD vector type containing integers 16-bits wide and up", + TypeSetBuilder::new() + .ints(16..32) + .simd_lanes(4..8) + .includes_scalars(false) + .build(), + ); + + let x = &Operand::new("x", I16xN); + let y = &Operand::new("y", I16xN); + let a = &Operand::new("a", &I16xN.split_lanes()); + + ig.push( + Inst::new( + "snarrow", + r#" + Combine `x` and `y` into a vector with twice the lanes but half the integer width while + saturating overflowing values to the signed maximum and minimum. + + The lanes will be concatenated after narrowing. For example, when `x` and `y` are `i32x4` + and `x = [x3, x2, x1, x0]` and `y = [y3, y2, y1, y0]`, then after narrowing the value + returned is an `i16x8`: `a = [y3', y2', y1', y0', x3', x2', x1', x0']`. + "#, + &formats.binary, + ) + .operands_in(vec![x, y]) + .operands_out(vec![a]), + ); + let IntTo = &TypeVar::new( "IntTo", "A larger integer type with the same number of lanes", diff --git a/cranelift/codegen/src/isa/aarch64/lower_inst.rs b/cranelift/codegen/src/isa/aarch64/lower_inst.rs index 82eb35f13f..2c67c1cd46 100644 --- a/cranelift/codegen/src/isa/aarch64/lower_inst.rs +++ b/cranelift/codegen/src/isa/aarch64/lower_inst.rs @@ -2060,7 +2060,6 @@ pub(crate) fn lower_insn_to_regs>( | Opcode::X86Pminu | Opcode::X86Pmullq | Opcode::X86Pmuludq - | Opcode::X86Packss | Opcode::X86Punpckh | Opcode::X86Punpckl | Opcode::X86Vcvtudq2ps @@ -2069,8 +2068,9 @@ pub(crate) fn lower_insn_to_regs>( panic!("x86-specific opcode in supposedly arch-neutral IR!"); } - Opcode::Iabs => unimplemented!(), Opcode::AvgRound => unimplemented!(), + Opcode::Iabs => unimplemented!(), + Opcode::Snarrow => unimplemented!(), Opcode::TlsValue => unimplemented!(), } diff --git a/cranelift/filetests/filetests/isa/x86/simd-bitwise-legalize.clif b/cranelift/filetests/filetests/isa/x86/simd-bitwise-legalize.clif index 7193aa2b54..ad459563ef 100644 --- a/cranelift/filetests/filetests/isa/x86/simd-bitwise-legalize.clif +++ b/cranelift/filetests/filetests/isa/x86/simd-bitwise-legalize.clif @@ -34,7 +34,7 @@ block0: ; nextln: v9 = raw_bitcast.i16x8 v8 ; nextln: v10 = x86_psra v9, v4 - ; nextln: v2 = x86_packss v7, v10 + ; nextln: v2 = snarrow v7, v10 return v2 } diff --git a/cranelift/filetests/filetests/isa/x86/simd-lane-access-binemit.clif b/cranelift/filetests/filetests/isa/x86/simd-lane-access-binemit.clif index 24bc8cfa24..e15d059eef 100644 --- a/cranelift/filetests/filetests/isa/x86/simd-lane-access-binemit.clif +++ b/cranelift/filetests/filetests/isa/x86/simd-lane-access-binemit.clif @@ -118,8 +118,8 @@ block0(v0: i32x4 [%xmm7], v1: i32x4 [%xmm6]): return } -function %packss_i16x8(i16x8, i16x8) { +function %snarrow_i16x8(i16x8, i16x8) { block0(v0: i16x8 [%xmm7], v1: i16x8 [%xmm8]): -[-, %xmm7] v2 = x86_packss v0, v1 ; bin: 66 41 0f 63 f8 +[-, %xmm7] v2 = snarrow v0, v1 ; bin: 66 41 0f 63 f8 return } diff --git a/cranelift/filetests/filetests/isa/x86/simd-lane-access-run.clif b/cranelift/filetests/filetests/isa/x86/simd-lane-access-run.clif index 00ebae26f6..013ea78679 100644 --- a/cranelift/filetests/filetests/isa/x86/simd-lane-access-run.clif +++ b/cranelift/filetests/filetests/isa/x86/simd-lane-access-run.clif @@ -206,15 +206,9 @@ block0: } ; run -function %pack() -> b1 { -block0: - v0 = vconst.i32x4 [0 1 -1 0x0001ffff] - v1 = vconst.i32x4 [4 5 -6 0xffffffff] - v2 = x86_packss v0, v1 - - v3 = vconst.i16x8 [0 1 -1 0x7fff 4 5 -6 0xffff] - v4 = icmp eq v2, v3 - v5 = vall_true v4 - return v5 +function %snarrow(i32x4, i32x4) -> i16x8 { +block0(v0: i32x4, v1: i32x4): + v2 = snarrow v0, v1 + return v2 } -; run +; run: %snarrow([0 1 -1 0x0001ffff], [4 5 -6 0xffffffff]) == [0 1 -1 0x7fff 4 5 -6 0xffff]