Browse Source

Move `insertlane` to ISLE (#3544)

This also fixes a bug where `movsd` was incorrectly used with a memory
operand for `insertlane`, causing it to actually zero the upper bits
instead of preserving them.

Note that the insertlane logic still exists in `lower.rs` because it's
used as a helper for a few other instruction lowerings which aren't
migrated to ISLE yet. This commit also adds a helper in ISLE itself for
those other lowerings to use when they get implemented.

Closes #3216
pull/3549/head
Alex Crichton 3 years ago
committed by GitHub
parent
commit
352ee2b186
No known key found for this signature in database GPG Key ID: 4AEE18F83AFDEB23
  1. 2
      cranelift/codegen/src/isa/aarch64/lower/isle/generated_code.manifest
  2. 1
      cranelift/codegen/src/isa/aarch64/lower/isle/generated_code.rs
  3. 30
      cranelift/codegen/src/isa/x64/inst.isle
  4. 57
      cranelift/codegen/src/isa/x64/lower.isle
  5. 21
      cranelift/codegen/src/isa/x64/lower.rs
  6. 7
      cranelift/codegen/src/isa/x64/lower/isle.rs
  7. 6
      cranelift/codegen/src/isa/x64/lower/isle/generated_code.manifest
  8. 198
      cranelift/codegen/src/isa/x64/lower/isle/generated_code.rs
  9. 4
      cranelift/codegen/src/machinst/isle.rs
  10. 4
      cranelift/codegen/src/prelude.isle
  11. 12
      tests/misc_testsuite/simd/replace-lane-preserve.wast

2
cranelift/codegen/src/isa/aarch64/lower/isle/generated_code.manifest

@ -1,4 +1,4 @@
src/clif.isle 9c0563583e5500de00ec5e226edc0547ac3ea789c8d76f1da0401c80ec619320fdc9a6f17fd76bbcac74a5894f85385c1f51c900c2b83bc9906d03d0f29bf5cb
src/prelude.isle a069d14321601afc63959af23086709d67d189dafcdc7d1fc8534b32d89d49008acb8368b7b5a7bc51a353736a378197ac352ccce2bb3be89d93afb6979e480a
src/prelude.isle c1391bcd436c23caf46b909ba7b5a352405014f0c393e3886cf1b9ad37f610b0563e8a64daad215f107395e6bb55744d955dd9c6344bb19b96587c2deb703462
src/isa/aarch64/inst.isle 841748c9c5900821b7086a09a41c6dcdb2172eb47a45293b6ef10f2e1f1389620bf6a2c75152af807d8bc8929029a357af5191f5d87bac2c9ec54bf63a9a2a8f
src/isa/aarch64/lower.isle b3cd0834484e543f39d477d47ee66042276e99955c21fb8c9340a5f27ac317936acb2907a30f758bf596066e36db801a179fda6dbcecaee758a0187a5a5f1412

1
cranelift/codegen/src/isa/aarch64/lower/isle/generated_code.rs

@ -36,6 +36,7 @@ pub trait Context {
fn unwrap_head_value_list_1(&mut self, arg0: ValueList) -> (Value, ValueSlice);
fn unwrap_head_value_list_2(&mut self, arg0: ValueList) -> (Value, Value, ValueSlice);
fn writable_reg_to_reg(&mut self, arg0: WritableReg) -> Reg;
fn u8_from_uimm8(&mut self, arg0: Uimm8) -> u8;
fn u64_from_imm64(&mut self, arg0: Imm64) -> u64;
fn u64_from_ieee32(&mut self, arg0: Ieee32) -> u64;
fn u64_from_ieee64(&mut self, arg0: Ieee64) -> u64;

30
cranelift/codegen/src/isa/x64/inst.isle

@ -1115,3 +1115,33 @@
(let ((dst WritableReg (temp_writable_reg ty))
(_ Unit (emit (MInst.GprToXmm op src dst size))))
(writable_reg_to_reg dst)))
;; Helper for creating `pinsrb` instructions.
(decl pinsrb (Reg RegMem u8) Reg)
(rule (pinsrb src1 src2 lane)
(xmm_rm_r_imm (SseOpcode.Pinsrb) src1 src2 lane (OperandSize.Size32)))
;; Helper for creating `pinsrw` instructions.
(decl pinsrw (Reg RegMem u8) Reg)
(rule (pinsrw src1 src2 lane)
(xmm_rm_r_imm (SseOpcode.Pinsrw) src1 src2 lane (OperandSize.Size32)))
;; Helper for creating `pinsrd` instructions.
(decl pinsrd (Reg RegMem u8 OperandSize) Reg)
(rule (pinsrd src1 src2 lane size)
(xmm_rm_r_imm (SseOpcode.Pinsrd) src1 src2 lane size))
;; Helper for creating `insertps` instructions.
(decl insertps (Reg RegMem u8) Reg)
(rule (insertps src1 src2 lane)
(xmm_rm_r_imm (SseOpcode.Insertps) src1 src2 lane (OperandSize.Size32)))
;; Helper for creating `movsd` instructions.
(decl movsd (Reg RegMem) Reg)
(rule (movsd src1 src2)
(xmm_rm_r $I8X16 (SseOpcode.Movsd) src1 src2))
;; Helper for creating `movlhps` instructions.
(decl movlhps (Reg RegMem) Reg)
(rule (movlhps src1 src2)
(xmm_rm_r $I8X16 (SseOpcode.Movlhps) src1 src2))

57
cranelift/codegen/src/isa/x64/lower.isle

@ -964,3 +964,60 @@
;; all-one value
(rule (lower (has_type ty @ (multi_lane _bits _lanes) (bnot x)))
(value_reg (sse_xor ty (put_in_reg x) (RegMem.Reg (vector_all_ones ty)))))
;;;; Rules for `insertlane` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(rule (lower (insertlane vec @ (value_type ty) val (u8_from_uimm8 idx)))
(value_reg (vec_insert_lane ty (put_in_reg vec) (put_in_reg_mem val) idx)))
;; Helper function used below for `insertlane` but also here for other
;; lowerings.
;;
;; Note that the `Type` used here is the type of vector the insertion is
;; happening into, or the type of the first `Reg` argument.
(decl vec_insert_lane (Type Reg RegMem u8) Reg)
;; i8x16.replace_lane
(rule (vec_insert_lane $I8X16 vec val idx) (pinsrb vec val idx))
;; i16x8.replace_lane
(rule (vec_insert_lane $I16X8 vec val idx) (pinsrw vec val idx))
;; i32x4.replace_lane
(rule (vec_insert_lane $I32X4 vec val idx) (pinsrd vec val idx (OperandSize.Size32)))
;; i64x2.replace_lane
(rule (vec_insert_lane $I64X2 vec val idx) (pinsrd vec val idx (OperandSize.Size64)))
;; f32x4.replace_lane
(rule (vec_insert_lane $F32X4 vec val idx) (insertps vec val (sse_insertps_lane_imm idx)))
;; external rust code used to calculate the immediate value to `insertps`
(decl sse_insertps_lane_imm (u8) u8)
(extern constructor sse_insertps_lane_imm sse_insertps_lane_imm)
;; f64x2.replace_lane 0
;;
;; Here the `movsd` instruction is used specifically to specialize moving
;; into the fist lane where unlike above cases we're not using the lane
;; immediate as an immediate to the instruction itself.
;;
;; Note, though, the `movsd` has different behavior with respect to the second
;; lane of the f64x2 depending on whether the RegMem operand is a register or
;; memory. When loading from a register `movsd` preserves the upper bits, but
;; when loading from memory it zeros the upper bits. We specifically want to
;; preserve the upper bits so if a `RegMem.Mem` is passed in we need to emit
;; two `movsd` instructions. The first `movsd` (used as `xmm_unary_rm_r`) will
;; load from memory into a temp register and then the second `movsd` (modeled
;; internally as `xmm_rm_r` will merge the temp register into our `vec`
;; register.
(rule (vec_insert_lane $F64X2 vec (RegMem.Reg val) 0) (movsd vec (RegMem.Reg val)))
(rule (vec_insert_lane $F64X2 vec mem 0)
(movsd vec (RegMem.Reg (xmm_unary_rm_r (SseOpcode.Movsd) mem))))
;; f64x2.replace_lane 1
;;
;; Here the `movlhps` instruction is used specifically to specialize moving
;; into the second lane where unlike above cases we're not using the lane
;; immediate as an immediate to the instruction itself.
(rule (vec_insert_lane $F64X2 vec val 1) (movlhps vec val))

21
cranelift/codegen/src/isa/x64/lower.rs

@ -5641,22 +5641,11 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
}
Opcode::Insertlane => {
// The instruction format maps to variables like: %dst = insertlane %in_vec, %src, %lane
let ty = ty.unwrap();
let dst = get_output_reg(ctx, outputs[0]).only_reg().unwrap();
let in_vec = put_input_in_reg(ctx, inputs[0]);
let src_ty = ctx.input_ty(insn, 1);
debug_assert!(!src_ty.is_vector());
let src = input_to_reg_mem(ctx, inputs[1]);
let lane = if let InstructionData::TernaryImm8 { imm, .. } = ctx.data(insn) {
*imm
} else {
unreachable!();
};
debug_assert!(lane < ty.lane_count() as u8);
ctx.emit(Inst::gen_move(dst, in_vec, ty));
emit_insert_lane(ctx, src, dst, lane, ty.lane_type());
unreachable!(
"implemented in ISLE: inst = `{}`, type = `{:?}`",
ctx.dfg().display_inst(insn),
ty
);
}
Opcode::Extractlane => {

7
cranelift/codegen/src/isa/x64/lower/isle.rs

@ -207,6 +207,13 @@ where
None
}
}
#[inline]
fn sse_insertps_lane_imm(&mut self, lane: u8) -> u8 {
// Insert 32-bits from replacement (at index 00, bits 7:8) to vector (lane
// shifted into bits 5:6).
0b00_00_00_00 | lane << 4
}
}
#[inline]

6
cranelift/codegen/src/isa/x64/lower/isle/generated_code.manifest

@ -1,4 +1,4 @@
src/clif.isle 9c0563583e5500de00ec5e226edc0547ac3ea789c8d76f1da0401c80ec619320fdc9a6f17fd76bbcac74a5894f85385c1f51c900c2b83bc9906d03d0f29bf5cb
src/prelude.isle a069d14321601afc63959af23086709d67d189dafcdc7d1fc8534b32d89d49008acb8368b7b5a7bc51a353736a378197ac352ccce2bb3be89d93afb6979e480a
src/isa/x64/inst.isle fdfbfc6dfad1fc5ed252e0a14ccc69baba51d0538e05cfb9916f6213e5a6fcfc9d22605a29bd684d6a66f6d5e1c8ec36a963660d52c2e8b3fb6e0758f7adb7b5
src/isa/x64/lower.isle 8555abdae385431c96aaabc392b7b3a8b1bbe733be08b007ef776850860cb77e85a140db02f427586c155c0b0129f9ffd531abd2e4a772c72667535cc015e609
src/prelude.isle c1391bcd436c23caf46b909ba7b5a352405014f0c393e3886cf1b9ad37f610b0563e8a64daad215f107395e6bb55744d955dd9c6344bb19b96587c2deb703462
src/isa/x64/inst.isle 6065d3b9e0fa3361d179d9b87d09568ff474f8bac7eeabd29b328ace723041f96045bc82cfa2d7feda4490ce7e5d4be1a1c7ebe25c99916564d43a51550cd093
src/isa/x64/lower.isle e51b7a67343dba342a43b3c9e4b9ed7df9b2c66a677018acf7054ba48c27e4e93a4421fd892b9bf7c0e5b790bcfafab7cb3e93ce2b8206c04d456918d2ad0b5a

198
cranelift/codegen/src/isa/x64/lower/isle/generated_code.rs

@ -36,6 +36,7 @@ pub trait Context {
fn unwrap_head_value_list_1(&mut self, arg0: ValueList) -> (Value, ValueSlice);
fn unwrap_head_value_list_2(&mut self, arg0: ValueList) -> (Value, Value, ValueSlice);
fn writable_reg_to_reg(&mut self, arg0: WritableReg) -> Reg;
fn u8_from_uimm8(&mut self, arg0: Uimm8) -> u8;
fn u64_from_imm64(&mut self, arg0: Imm64) -> u64;
fn u64_from_ieee32(&mut self, arg0: Ieee32) -> u64;
fn u64_from_ieee64(&mut self, arg0: Ieee64) -> u64;
@ -58,6 +59,7 @@ pub trait Context {
fn ext_mode(&mut self, arg0: u16, arg1: u16) -> ExtMode;
fn emit(&mut self, arg0: &MInst) -> Unit;
fn nonzero_u64_fits_in_u32(&mut self, arg0: u64) -> Option<u64>;
fn sse_insertps_lane_imm(&mut self, arg0: u8) -> u8;
}
/// Internal type ProducesFlags: defined at src/isa/x64/inst.isle line 392.
@ -1724,6 +1726,102 @@ pub fn constructor_gpr_to_xmm<C: Context>(
return Some(expr3_0);
}
// Generated as internal constructor for term pinsrb.
pub fn constructor_pinsrb<C: Context>(
ctx: &mut C,
arg0: Reg,
arg1: &RegMem,
arg2: u8,
) -> Option<Reg> {
let pattern0_0 = arg0;
let pattern1_0 = arg1;
let pattern2_0 = arg2;
// Rule at src/isa/x64/inst.isle line 1121.
let expr0_0 = SseOpcode::Pinsrb;
let expr1_0 = OperandSize::Size32;
let expr2_0 =
constructor_xmm_rm_r_imm(ctx, &expr0_0, pattern0_0, pattern1_0, pattern2_0, &expr1_0)?;
return Some(expr2_0);
}
// Generated as internal constructor for term pinsrw.
pub fn constructor_pinsrw<C: Context>(
ctx: &mut C,
arg0: Reg,
arg1: &RegMem,
arg2: u8,
) -> Option<Reg> {
let pattern0_0 = arg0;
let pattern1_0 = arg1;
let pattern2_0 = arg2;
// Rule at src/isa/x64/inst.isle line 1126.
let expr0_0 = SseOpcode::Pinsrw;
let expr1_0 = OperandSize::Size32;
let expr2_0 =
constructor_xmm_rm_r_imm(ctx, &expr0_0, pattern0_0, pattern1_0, pattern2_0, &expr1_0)?;
return Some(expr2_0);
}
// Generated as internal constructor for term pinsrd.
pub fn constructor_pinsrd<C: Context>(
ctx: &mut C,
arg0: Reg,
arg1: &RegMem,
arg2: u8,
arg3: &OperandSize,
) -> Option<Reg> {
let pattern0_0 = arg0;
let pattern1_0 = arg1;
let pattern2_0 = arg2;
let pattern3_0 = arg3;
// Rule at src/isa/x64/inst.isle line 1131.
let expr0_0 = SseOpcode::Pinsrd;
let expr1_0 = constructor_xmm_rm_r_imm(
ctx, &expr0_0, pattern0_0, pattern1_0, pattern2_0, pattern3_0,
)?;
return Some(expr1_0);
}
// Generated as internal constructor for term insertps.
pub fn constructor_insertps<C: Context>(
ctx: &mut C,
arg0: Reg,
arg1: &RegMem,
arg2: u8,
) -> Option<Reg> {
let pattern0_0 = arg0;
let pattern1_0 = arg1;
let pattern2_0 = arg2;
// Rule at src/isa/x64/inst.isle line 1136.
let expr0_0 = SseOpcode::Insertps;
let expr1_0 = OperandSize::Size32;
let expr2_0 =
constructor_xmm_rm_r_imm(ctx, &expr0_0, pattern0_0, pattern1_0, pattern2_0, &expr1_0)?;
return Some(expr2_0);
}
// Generated as internal constructor for term movsd.
pub fn constructor_movsd<C: Context>(ctx: &mut C, arg0: Reg, arg1: &RegMem) -> Option<Reg> {
let pattern0_0 = arg0;
let pattern1_0 = arg1;
// Rule at src/isa/x64/inst.isle line 1141.
let expr0_0: Type = I8X16;
let expr1_0 = SseOpcode::Movsd;
let expr2_0 = constructor_xmm_rm_r(ctx, expr0_0, &expr1_0, pattern0_0, pattern1_0)?;
return Some(expr2_0);
}
// Generated as internal constructor for term movlhps.
pub fn constructor_movlhps<C: Context>(ctx: &mut C, arg0: Reg, arg1: &RegMem) -> Option<Reg> {
let pattern0_0 = arg0;
let pattern1_0 = arg1;
// Rule at src/isa/x64/inst.isle line 1146.
let expr0_0: Type = I8X16;
let expr1_0 = SseOpcode::Movlhps;
let expr2_0 = constructor_xmm_rm_r(ctx, expr0_0, &expr1_0, pattern0_0, pattern1_0)?;
return Some(expr2_0);
}
// Generated as internal constructor for term lower.
pub fn constructor_lower<C: Context>(ctx: &mut C, arg0: Inst) -> Option<ValueRegs> {
let pattern0_0 = arg0;
@ -1755,6 +1853,24 @@ pub fn constructor_lower<C: Context>(ctx: &mut C, arg0: Inst) -> Option<ValueReg
return Some(expr2_0);
}
}
&InstructionData::TernaryImm8 {
opcode: ref pattern2_0,
args: ref pattern2_1,
imm: pattern2_2,
} => {
if let &Opcode::Insertlane = &pattern2_0 {
let (pattern4_0, pattern4_1) = C::unpack_value_array_2(ctx, &pattern2_1);
let pattern5_0 = C::value_type(ctx, pattern4_0);
let pattern6_0 = C::u8_from_uimm8(ctx, pattern2_2);
// Rule at src/isa/x64/lower.isle line 970.
let expr0_0 = C::put_in_reg(ctx, pattern4_0);
let expr1_0 = C::put_in_reg_mem(ctx, pattern4_1);
let expr2_0 =
constructor_vec_insert_lane(ctx, pattern5_0, expr0_0, &expr1_0, pattern6_0)?;
let expr3_0 = C::value_reg(ctx, expr2_0);
return Some(expr3_0);
}
}
_ => {}
}
if let Some(pattern1_0) = C::first_result(ctx, pattern0_0) {
@ -3967,3 +4083,85 @@ pub fn constructor_shr_i128<C: Context>(
let expr49_0 = constructor_with_flags_2(ctx, &expr37_0, &expr41_0, &expr48_0)?;
return Some(expr49_0);
}
// Generated as internal constructor for term vec_insert_lane.
pub fn constructor_vec_insert_lane<C: Context>(
ctx: &mut C,
arg0: Type,
arg1: Reg,
arg2: &RegMem,
arg3: u8,
) -> Option<Reg> {
let pattern0_0 = arg0;
if pattern0_0 == I8X16 {
let pattern2_0 = arg1;
let pattern3_0 = arg2;
let pattern4_0 = arg3;
// Rule at src/isa/x64/lower.isle line 981.
let expr0_0 = constructor_pinsrb(ctx, pattern2_0, pattern3_0, pattern4_0)?;
return Some(expr0_0);
}
if pattern0_0 == I16X8 {
let pattern2_0 = arg1;
let pattern3_0 = arg2;
let pattern4_0 = arg3;
// Rule at src/isa/x64/lower.isle line 984.
let expr0_0 = constructor_pinsrw(ctx, pattern2_0, pattern3_0, pattern4_0)?;
return Some(expr0_0);
}
if pattern0_0 == I32X4 {
let pattern2_0 = arg1;
let pattern3_0 = arg2;
let pattern4_0 = arg3;
// Rule at src/isa/x64/lower.isle line 987.
let expr0_0 = OperandSize::Size32;
let expr1_0 = constructor_pinsrd(ctx, pattern2_0, pattern3_0, pattern4_0, &expr0_0)?;
return Some(expr1_0);
}
if pattern0_0 == I64X2 {
let pattern2_0 = arg1;
let pattern3_0 = arg2;
let pattern4_0 = arg3;
// Rule at src/isa/x64/lower.isle line 990.
let expr0_0 = OperandSize::Size64;
let expr1_0 = constructor_pinsrd(ctx, pattern2_0, pattern3_0, pattern4_0, &expr0_0)?;
return Some(expr1_0);
}
if pattern0_0 == F32X4 {
let pattern2_0 = arg1;
let pattern3_0 = arg2;
let pattern4_0 = arg3;
// Rule at src/isa/x64/lower.isle line 993.
let expr0_0 = C::sse_insertps_lane_imm(ctx, pattern4_0);
let expr1_0 = constructor_insertps(ctx, pattern2_0, pattern3_0, expr0_0)?;
return Some(expr1_0);
}
if pattern0_0 == F64X2 {
let pattern2_0 = arg1;
let pattern3_0 = arg2;
if let &RegMem::Reg { reg: pattern4_0 } = pattern3_0 {
let pattern5_0 = arg3;
if pattern5_0 == 0 {
// Rule at src/isa/x64/lower.isle line 1014.
let expr0_0 = RegMem::Reg { reg: pattern4_0 };
let expr1_0 = constructor_movsd(ctx, pattern2_0, &expr0_0)?;
return Some(expr1_0);
}
}
let pattern4_0 = arg3;
if pattern4_0 == 0 {
// Rule at src/isa/x64/lower.isle line 1015.
let expr0_0 = SseOpcode::Movsd;
let expr1_0 = constructor_xmm_unary_rm_r(ctx, &expr0_0, pattern3_0)?;
let expr2_0 = RegMem::Reg { reg: expr1_0 };
let expr3_0 = constructor_movsd(ctx, pattern2_0, &expr2_0)?;
return Some(expr3_0);
}
if pattern4_0 == 1 {
// Rule at src/isa/x64/lower.isle line 1023.
let expr0_0 = constructor_movlhps(ctx, pattern2_0, pattern3_0)?;
return Some(expr0_0);
}
}
return None;
}

4
cranelift/codegen/src/machinst/isle.rs

@ -175,6 +175,10 @@ macro_rules! isle_prelude_methods {
fn u64_from_ieee64(&mut self, val: Ieee64) -> u64 {
val.bits()
}
fn u8_from_uimm8(&mut self, val: Uimm8) -> u8 {
val
}
};
}

4
cranelift/codegen/src/prelude.isle

@ -166,6 +166,10 @@
(decl writable_reg_to_reg (WritableReg) Reg)
(extern constructor writable_reg_to_reg writable_reg_to_reg)
;; Extract a `u8` from an `Uimm8`.
(decl u8_from_uimm8 (u8) Uimm8)
(extern extractor infallible u8_from_uimm8 u8_from_uimm8)
;; Extract a `u64` from an `Imm64`.
(decl u64_from_imm64 (u64) Imm64)
(extern extractor infallible u64_from_imm64 u64_from_imm64)

12
tests/misc_testsuite/simd/replace-lane-preserve.wast

@ -0,0 +1,12 @@
;; originally from #3216
(module
(func (result i64)
v128.const i64x2 -1 1
global.get 0
f64x2.replace_lane 0
i64x2.extract_lane 1
)
(global f64 (f64.const 1))
(export "" (func 0)))
(assert_return (invoke "") (i64.const 1))
Loading…
Cancel
Save