Browse Source

Merge pull request #3554 from fitzgen/isle-iabs

ISLE: port `iabs` to ISLE for x64
pull/3557/head
Nick Fitzgerald 3 years ago
committed by GitHub
parent
commit
21bce8071e
No known key found for this signature in database GPG Key ID: 4AEE18F83AFDEB23
  1. 54
      cranelift/codegen/src/isa/x64/inst.isle
  2. 27
      cranelift/codegen/src/isa/x64/lower.isle
  3. 58
      cranelift/codegen/src/isa/x64/lower.rs
  4. 15
      cranelift/codegen/src/isa/x64/lower/isle.rs
  5. 4
      cranelift/codegen/src/isa/x64/lower/isle/generated_code.manifest
  6. 421
      cranelift/codegen/src/isa/x64/lower/isle/generated_code.rs

54
cranelift/codegen/src/isa/x64/inst.isle

@ -22,6 +22,9 @@
(XmmUnaryRmR (op SseOpcode)
(src RegMem)
(dst WritableReg))
(XmmUnaryRmREvex (op Avx512Opcode)
(src RegMem)
(dst WritableReg))
(XmmRmiReg (opcode SseOpcode)
(src1 Reg)
(src2 RegMemImm)
@ -347,6 +350,15 @@
(decl encode_fcmp_imm (FcmpImm) u8)
(extern constructor encode_fcmp_imm encode_fcmp_imm)
;;;; Helpers for Getting Particular Physical Registers ;;;;;;;;;;;;;;;;;;;;;;;;;
;;
;; These should only be used for legalization purposes, when we can't otherwise
;; rely on something like `Inst::mov_mitosis` to put an operand into the
;; appropriate physical register for whatever reason.
(decl xmm0 () WritableReg)
(extern constructor xmm0 xmm0)
;;;; Helpers for Querying Enabled ISA Extensions ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(decl avx512vl_enabled () Type)
@ -355,6 +367,9 @@
(decl avx512dq_enabled () Type)
(extern extractor avx512dq_enabled avx512dq_enabled)
(decl avx512f_enabled () Type)
(extern extractor avx512f_enabled avx512f_enabled)
;;;; Helpers for Merging and Sinking Immediates/Loads ;;;;;;;;;;;;;;;;;;;;;;;;;
;; Extract a constant `Imm8Reg.Imm8` from a value operand.
@ -926,6 +941,18 @@
(rule (pandn src1 src2)
(xmm_rm_r $F64X2 (SseOpcode.Pandn) src1 src2))
;; Helper for creating `blendvpd` instructions.
(decl blendvpd (Reg RegMem Reg) Reg)
(rule (blendvpd src1 src2 mask)
;; Move the mask into `xmm0`, as `blendvpd` implicitly operates on that
;; register. (This kind of thing would normally happen inside of
;; `Inst::mov_mitosis`, but has to happen here, where we still have the
;; mask register, because the mask is implicit and doesn't appear in the
;; `Inst` itself.)
(let ((mask2 WritableReg (xmm0))
(_ Unit (emit (MInst.XmmUnaryRmR (SseOpcode.Movapd) (RegMem.Reg mask) mask2))))
(xmm_rm_r $F64X2 (SseOpcode.Blendvpd) src1 src2)))
;; Helper for creating `MInst.XmmRmRImm` instructions.
(decl xmm_rm_r_imm (SseOpcode Reg RegMem u8 OperandSize) Reg)
(rule (xmm_rm_r_imm op src1 src2 imm size)
@ -977,6 +1004,33 @@
(rule (pmovzxbw src)
(xmm_unary_rm_r (SseOpcode.Pmovzxbw) src))
;; Helper for creating `pabsb` instructions.
(decl pabsb (RegMem) Reg)
(rule (pabsb src)
(xmm_unary_rm_r (SseOpcode.Pabsb) src))
;; Helper for creating `pabsw` instructions.
(decl pabsw (RegMem) Reg)
(rule (pabsw src)
(xmm_unary_rm_r (SseOpcode.Pabsw) src))
;; Helper for creating `pabsd` instructions.
(decl pabsd (RegMem) Reg)
(rule (pabsd src)
(xmm_unary_rm_r (SseOpcode.Pabsd) src))
;; Helper for creating `MInst.XmmUnaryRmREvex` instructions.
(decl xmm_unary_rm_r_evex (Avx512Opcode RegMem) Reg)
(rule (xmm_unary_rm_r_evex op src)
(let ((dst WritableReg (temp_writable_reg $I8X16))
(_ Unit (emit (MInst.XmmUnaryRmREvex op src dst))))
(writable_reg_to_reg dst)))
;; Helper for creating `vpabsq` instructions.
(decl vpabsq (RegMem) Reg)
(rule (vpabsq src)
(xmm_unary_rm_r_evex (Avx512Opcode.Vpabsq) src))
;; Helper for creating `MInst.XmmRmREvex` instructions.
(decl xmm_rm_r_evex (Avx512Opcode RegMem Reg) Reg)
(rule (xmm_rm_r_evex op src1 src2)

27
cranelift/codegen/src/isa/x64/lower.isle

@ -946,6 +946,33 @@
(rule (lower (has_type (multi_lane _bits _lanes) (band_not x y)))
(value_reg (pandn (put_in_reg y) (put_in_reg_mem x))))
;;;; Rules for `iabs` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(rule (lower (has_type $I8X16 (iabs x)))
(value_reg (pabsb (put_in_reg_mem x))))
(rule (lower (has_type $I16X8 (iabs x)))
(value_reg (pabsw (put_in_reg_mem x))))
(rule (lower (has_type $I32X4 (iabs x)))
(value_reg (pabsd (put_in_reg_mem x))))
;; When AVX512 is available, we can use a single `vpabsq` instruction.
(rule (lower (has_type (and (avx512vl_enabled)
(avx512f_enabled)
$I64X2)
(iabs x)))
(value_reg (vpabsq (put_in_reg_mem x))))
;; Otherwise, we use a separate register, `neg`, to contain the results of `0 -
;; x` and then blend in those results with `blendvpd` if the MSB of `neg` was
;; set to 1 (i.e. if `neg` was negative or, conversely, if `x` was originally
;; positive).
(rule (lower (has_type $I64X2 (iabs x)))
(let ((rx Reg (put_in_reg x))
(neg Reg (psubq (imm $I64X2 0) (RegMem.Reg rx))))
(value_reg (blendvpd neg (RegMem.Reg rx) neg))))
;;;; Rules for `fabs` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; Special case for `f32x4.abs`.

58
cranelift/codegen/src/isa/x64/lower.rs

@ -1504,6 +1504,14 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
return Ok(());
}
let implemented_in_isle = |ctx: &mut C| {
unreachable!(
"implemented in ISLE: inst = `{}`, type = `{:?}`",
ctx.dfg().display_inst(insn),
ty
)
};
match op {
Opcode::Iconst
| Opcode::Bconst
@ -1520,54 +1528,8 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
| Opcode::Bor
| Opcode::Bxor
| Opcode::Imul
| Opcode::BandNot => {
unreachable!(
"implemented in ISLE: inst = `{}`, type = `{:?}`",
ctx.dfg().display_inst(insn),
ty
);
}
Opcode::Iabs => {
let src = input_to_reg_mem(ctx, inputs[0]);
let dst = get_output_reg(ctx, outputs[0]).only_reg().unwrap();
let ty = ty.unwrap();
if ty == types::I64X2 {
if isa_flags.use_avx512vl_simd() && isa_flags.use_avx512f_simd() {
ctx.emit(Inst::xmm_unary_rm_r_evex(Avx512Opcode::Vpabsq, src, dst));
} else {
// If `VPABSQ` from AVX512 is unavailable, we use a separate register, `tmp`, to
// contain the results of `0 - src` and then blend in those results with
// `BLENDVPD` if the MSB of `tmp` was set to 1 (i.e. if `tmp` was negative or,
// conversely, if `src` was originally positive).
// Emit all 0s into the `tmp` register.
let tmp = ctx.alloc_tmp(ty).only_reg().unwrap();
ctx.emit(Inst::xmm_rm_r(SseOpcode::Pxor, RegMem::from(tmp), tmp));
// Subtract the lanes from 0 and set up `dst`.
ctx.emit(Inst::xmm_rm_r(SseOpcode::Psubq, src.clone(), tmp));
ctx.emit(Inst::gen_move(dst, tmp.to_reg(), ty));
// Choose the subtracted lanes when `tmp` has an MSB of 1. BLENDVPD's semantics
// require the "choice" mask to be in XMM0.
ctx.emit(Inst::gen_move(
Writable::from_reg(regs::xmm0()),
tmp.to_reg(),
ty,
));
ctx.emit(Inst::xmm_rm_r(SseOpcode::Blendvpd, src, dst));
}
} else if ty.is_vector() {
let opcode = match ty {
types::I8X16 => SseOpcode::Pabsb,
types::I16X8 => SseOpcode::Pabsw,
types::I32X4 => SseOpcode::Pabsd,
_ => panic!("Unsupported type for packed iabs instruction: {}", ty),
};
ctx.emit(Inst::xmm_unary_rm_r(opcode, src, dst));
} else {
unimplemented!("iabs is unimplemented for non-vector type: {}", ty);
}
}
| Opcode::BandNot
| Opcode::Iabs => implemented_in_isle(ctx),
Opcode::Imax | Opcode::Umax | Opcode::Imin | Opcode::Umin => {
let lhs = put_input_in_reg(ctx, inputs[0]);

15
cranelift/codegen/src/isa/x64/lower/isle.rs

@ -8,6 +8,7 @@ use super::{
is_mergeable_load, lower_to_amode, AluRmiROpcode, Inst as MInst, OperandSize, Reg, RegMemImm,
};
use crate::isa::x64::inst::args::SyntheticAmode;
use crate::isa::x64::inst::regs;
use crate::isa::x64::settings as x64_settings;
use crate::machinst::isle::*;
use crate::{
@ -145,6 +146,15 @@ where
}
}
#[inline]
fn avx512f_enabled(&mut self, _: Type) -> Option<()> {
if self.isa_flags.use_avx512f_simd() {
Some(())
} else {
None
}
}
#[inline]
fn imm8_from_value(&mut self, val: Value) -> Option<Imm8Reg> {
let inst = self.lower_ctx.dfg().value_def(val).inst()?;
@ -214,6 +224,11 @@ where
// shifted into bits 5:6).
0b00_00_00_00 | lane << 4
}
#[inline]
fn xmm0(&mut self) -> WritableReg {
WritableReg::from_reg(regs::xmm0())
}
}
#[inline]

4
cranelift/codegen/src/isa/x64/lower/isle/generated_code.manifest

@ -1,4 +1,4 @@
src/clif.isle 9c0563583e5500de00ec5e226edc0547ac3ea789c8d76f1da0401c80ec619320fdc9a6f17fd76bbcac74a5894f85385c1f51c900c2b83bc9906d03d0f29bf5cb
src/prelude.isle e4933f2bcb6cd9e00cb6dc0c47c43d096d0c4e37468af17a38fad8906b864d975e0a8b98d15c6a5e2bccf255ec2ced2466991c3405533e9cafefbf4d9ac46823
src/isa/x64/inst.isle e4a0657406056a4cf116fe125e91d16377b602e0b41edd6628cbb7259b0fc2aa6b0482ffd33f00d63d68cf3546f188705877309d43eba5e75abd0f38a52a79b2
src/isa/x64/lower.isle e51b7a67343dba342a43b3c9e4b9ed7df9b2c66a677018acf7054ba48c27e4e93a4421fd892b9bf7c0e5b790bcfafab7cb3e93ce2b8206c04d456918d2ad0b5a
src/isa/x64/inst.isle 12dc8fa43cbba6e9c5cf46a2472e2754abfe33b7fd38f80e271afa3f6c002efad7a4202c8f00ff27d5e6176de8fec97e1887d382cbd4ef06eaac177a0b5992e3
src/isa/x64/lower.isle 333e1be62f602bb835a3cebc3299290a3d386438e9190d2db219263d974e097bfc3f1afdaac9401853806d21d548cad70bab2ffbc3b1cf5c3bebdd971a961f70

421
cranelift/codegen/src/isa/x64/lower/isle/generated_code.rs

File diff suppressed because it is too large
Loading…
Cancel
Save