Browse Source

Add initial scalar FP operations (addss, subss, etc) to x64 backend.

Adds support for addss and subss. This is the first lowering for
sse floating point alu and some move operations. The changes here do
some renaming of data structures and adds a couple of new ones
to support sse specific operations. The work done here will likely
evolve as needed to support an efficient, inituative, and consistent
framework.
pull/1851/head
Johnnie Birch 5 years ago
committed by Benjamin Bouvier
parent
commit
48f0b10c7a
  1. 69
      cranelift/codegen/src/isa/x64/abi.rs
  2. 73
      cranelift/codegen/src/isa/x64/inst/args.rs
  3. 185
      cranelift/codegen/src/isa/x64/inst/emit.rs
  4. 96
      cranelift/codegen/src/isa/x64/inst/emit_tests.rs
  5. 63
      cranelift/codegen/src/isa/x64/inst/mod.rs
  6. 33
      cranelift/codegen/src/isa/x64/inst/regs.rs
  7. 39
      cranelift/codegen/src/isa/x64/lower.rs
  8. 1
      cranelift/codegen/src/lib.rs
  9. 9
      cranelift/codegen/src/machinst/vcode.rs
  10. 1
      cranelift/src/wasm.rs

69
cranelift/codegen/src/isa/x64/abi.rs

@ -48,7 +48,7 @@ pub(crate) struct X64ABIBody {
flags: settings::Flags,
}
fn in_int_reg(ty: types::Type) -> bool {
fn use_int_reg(ty: types::Type) -> bool {
match ty {
types::I8
| types::I16
@ -63,6 +63,13 @@ fn in_int_reg(ty: types::Type) -> bool {
}
}
fn use_flt_reg(ty: types::Type) -> bool {
match ty {
types::F32 | types::F64 => true,
_ => false,
}
}
fn get_intreg_for_arg_systemv(idx: usize) -> Option<Reg> {
match idx {
0 => Some(regs::rdi()),
@ -75,6 +82,20 @@ fn get_intreg_for_arg_systemv(idx: usize) -> Option<Reg> {
}
}
fn get_fltreg_for_arg_systemv(idx: usize) -> Option<Reg> {
match idx {
0 => Some(regs::xmm0()),
1 => Some(regs::xmm1()),
2 => Some(regs::xmm2()),
3 => Some(regs::xmm3()),
4 => Some(regs::xmm4()),
5 => Some(regs::xmm5()),
6 => Some(regs::xmm6()),
7 => Some(regs::xmm7()),
_ => None,
}
}
fn get_intreg_for_retval_systemv(idx: usize) -> Option<Reg> {
match idx {
0 => Some(regs::rax()),
@ -83,6 +104,14 @@ fn get_intreg_for_retval_systemv(idx: usize) -> Option<Reg> {
}
}
fn get_fltreg_for_retval_systemv(idx: usize) -> Option<Reg> {
match idx {
0 => Some(regs::xmm0()),
1 => Some(regs::xmm1()),
_ => None,
}
}
fn is_callee_save_systemv(r: RealReg) -> bool {
use regs::*;
match r.get_class() {
@ -90,6 +119,7 @@ fn is_callee_save_systemv(r: RealReg) -> bool {
ENC_RBX | ENC_RBP | ENC_R12 | ENC_R13 | ENC_R14 | ENC_R15 => true,
_ => false,
},
RegClass::V128 => false,
_ => unimplemented!(),
}
}
@ -106,6 +136,7 @@ impl X64ABIBody {
// Compute args and retvals from signature.
let mut args = vec![];
let mut next_int_arg = 0;
let mut next_flt_arg = 0;
for param in &f.signature.params {
match param.purpose {
ir::ArgumentPurpose::VMContext if f.signature.call_conv.extends_baldrdash() => {
@ -114,15 +145,22 @@ impl X64ABIBody {
}
ir::ArgumentPurpose::Normal | ir::ArgumentPurpose::VMContext => {
if in_int_reg(param.value_type) {
if use_int_reg(param.value_type) {
if let Some(reg) = get_intreg_for_arg_systemv(next_int_arg) {
args.push(ABIArg::Reg(reg.to_real_reg()));
} else {
unimplemented!("passing arg on the stack");
}
next_int_arg += 1;
} else if use_flt_reg(param.value_type) {
if let Some(reg) = get_fltreg_for_arg_systemv(next_flt_arg) {
args.push(ABIArg::Reg(reg.to_real_reg()));
} else {
unimplemented!("passing arg on the stack");
}
next_flt_arg += 1;
} else {
unimplemented!("non int normal register")
unimplemented!("non int normal register {:?}", param.value_type)
}
}
@ -132,16 +170,24 @@ impl X64ABIBody {
let mut rets = vec![];
let mut next_int_retval = 0;
let mut next_flt_retval = 0;
for ret in &f.signature.returns {
match ret.purpose {
ir::ArgumentPurpose::Normal => {
if in_int_reg(ret.value_type) {
if use_int_reg(ret.value_type) {
if let Some(reg) = get_intreg_for_retval_systemv(next_int_retval) {
rets.push(ABIRet::Reg(reg.to_real_reg()));
} else {
unimplemented!("passing return on the stack");
}
next_int_retval += 1;
} else if use_flt_reg(ret.value_type) {
if let Some(reg) = get_fltreg_for_retval_systemv(next_flt_retval) {
rets.push(ABIRet::Reg(reg.to_real_reg()));
} else {
unimplemented!("passing return on the stack");
}
next_flt_retval += 1;
} else {
unimplemented!("returning non integer normal value");
}
@ -232,8 +278,11 @@ impl ABIBody for X64ABIBody {
if from_reg.get_class() == RegClass::I32 || from_reg.get_class() == RegClass::I64 {
// TODO do we need a sign extension if it's I32?
return Inst::mov_r_r(/*is64=*/ true, from_reg.to_reg(), to_reg);
} else if from_reg.get_class() == RegClass::V128 {
// TODO: How to support Movss. Should is64 always be true?
return Inst::xmm_r_r(SSE_Op::SSE2_Movsd, from_reg.to_reg(), to_reg);
}
unimplemented!("moving from non-int arg to vreg");
unimplemented!("moving from non-int arg to vreg {:?}", from_reg.get_class());
}
ABIArg::_Stack => unimplemented!("moving from stack arg to vreg"),
}
@ -266,8 +315,16 @@ impl ABIBody for X64ABIBody {
from_reg.to_reg(),
Writable::<Reg>::from_reg(to_reg.to_reg()),
))
} else if to_reg.get_class() == RegClass::V128
|| to_reg.get_class() == RegClass::V128
{
ret.push(Inst::xmm_r_r(
SSE_Op::SSE2_Movsd,
from_reg.to_reg(),
Writable::<Reg>::from_reg(to_reg.to_reg()),
))
} else {
unimplemented!("moving from vreg to non-int return value");
unimplemented!("moving from vreg to unsupported return value");
}
}

73
cranelift/codegen/src/isa/x64/inst/args.rs

@ -144,7 +144,7 @@ impl RM {
// Constructors.
pub(crate) fn reg(reg: Reg) -> Self {
debug_assert!(reg.get_class() == RegClass::I64);
debug_assert!(reg.get_class() == RegClass::I64 || reg.get_class() == RegClass::V128);
RM::R { reg }
}
@ -205,8 +205,75 @@ impl fmt::Debug for RMI_R_Op {
}
}
/// These indicate ways of extending (widening) a value, using the Intel naming:
/// B(yte) = u8, W(ord) = u16, L(ong)word = u32, Q(uad)word = u64
/// Some scalar SSE operations requiring 2 operands r/m and r
/// Each instruction is prefixed with the SSE version that introduced
/// the particular instructions.
/// TODO: Below only includes scalar operations. To be seen if packed will
/// be added here.
#[derive(Clone, PartialEq)]
pub enum SSE_Op {
SSE_Addss,
SSE2_Addsd,
SSE_Comiss,
SSE2_Comisd,
SSE2_Cvtsd2ss,
SSE2_Cvtsd2si,
SSE_Cvtsi2ss,
SSE2_Cvtsi2sd,
SSE_Cvtss2si,
SSE2_Cvtss2sd,
SSE_Cvttss2si,
SSE2_Cvttsd2si,
SSE_Divss,
SSE2_Divsd,
SSE_Maxss,
SSE2_Maxsd,
SSE_Minss,
SSE2_Minsd,
SSE_Movss,
SSE2_Movsd,
SSE_Mulss,
SSE2_Mulsd,
SSE_Rcpss,
SSE41_Roundss,
SSE41_Roundsd,
SSE_Rsqrtss,
SSE_Sqrtss,
SSE2_Sqrtsd,
SSE_Subss,
SSE2_Subsd,
SSE_Ucomiss,
SSE2_Ucomisd,
}
/// Some SSE operations requiring 3 operands i, r/m, and r
#[derive(Clone, PartialEq)]
pub enum SSE_RMI_Op {
SSE_Cmpss,
SSE2_Cmpsd,
SSE41_Insertps,
}
impl SSE_Op {
pub(crate) fn to_string(&self) -> String {
match self {
SSE_Op::SSE_Addss => "addss".to_string(),
SSE_Op::SSE_Subss => "subss".to_string(),
SSE_Op::SSE_Movss => "movss".to_string(),
SSE_Op::SSE2_Movsd => "movsd".to_string(),
_ => "unimplemented sse_op".to_string(),
}
}
}
impl fmt::Debug for SSE_Op {
fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result {
write!(fmt, "{}", self.to_string())
}
}
/// These indicate ways of extending (widening) a value, using the Intel
/// naming: B(yte) = u8, W(ord) = u16, L(ong)word = u32, Q(uad)word = u64
#[derive(Clone, PartialEq)]
pub enum ExtMode {
/// Byte -> Longword.

185
cranelift/codegen/src/isa/x64/inst/emit.rs

@ -1,6 +1,5 @@
use regalloc::{Reg, RegClass};
use crate::isa::x64::inst::*;
use regalloc::Reg;
fn low8willSXto64(x: u32) -> bool {
let xs = (x as i32) as i64;
@ -42,7 +41,6 @@ fn mkSIB(shift: u8, encIndex: u8, encBase: u8) -> u8 {
#[inline(always)]
fn iregEnc(reg: Reg) -> u8 {
debug_assert!(reg.is_real());
debug_assert!(reg.get_class() == RegClass::I64);
reg.get_hw_encoding()
}
@ -57,10 +55,16 @@ const F_RETAIN_REDUNDANT_REX: u32 = 1;
/// indicating a 64-bit operation.
const F_CLEAR_REX_W: u32 = 2;
/// Add an 0x66 (operand-size override) prefix. This is necessary to indicate
/// a 16-bit operation. Normally this will be used together with F_CLEAR_REX_W.
const F_PREFIX_66: u32 = 4;
/// For specifying the legacy prefixes (or `PfxNone` if no prefix required) to
/// be used at the start an instruction. A select prefix may be required for
/// various operations, including instructions that operate on GPR, SSE, and Vex
/// registers.
enum LegacyPrefix {
PfxNone,
Pfx66,
PfxF2,
PfxF3,
}
/// This is the core 'emit' function for instructions that reference memory.
///
/// For an instruction that has as operands a register `encG` and a memory
@ -82,6 +86,7 @@ const F_PREFIX_66: u32 = 4;
/// indicate a 64-bit operation.
fn emit_REX_OPCODES_MODRM_SIB_IMM_encG_memE(
sink: &mut MachBuffer<Inst>,
prefix: LegacyPrefix,
opcodes: u32,
mut numOpcodes: usize,
encG: u8,
@ -91,13 +96,15 @@ fn emit_REX_OPCODES_MODRM_SIB_IMM_encG_memE(
// General comment for this function: the registers in `memE` must be
// 64-bit integer registers, because they are part of an address
// expression. But `encG` can be derived from a register of any class.
let prefix66 = (flags & F_PREFIX_66) != 0;
let clearRexW = (flags & F_CLEAR_REX_W) != 0;
let retainRedundant = (flags & F_RETAIN_REDUNDANT_REX) != 0;
// The operand-size override, if requested. This indicates a 16-bit
// operation.
if prefix66 {
sink.put1(0x66);
// Lower the prefix if applicable.
match prefix {
LegacyPrefix::Pfx66 => sink.put1(0x66),
LegacyPrefix::PfxF2 => sink.put1(0xF2),
LegacyPrefix::PfxF3 => sink.put1(0xF3),
LegacyPrefix::PfxNone => (),
}
match memE {
Addr::IR { simm32, base: regE } => {
@ -201,6 +208,7 @@ fn emit_REX_OPCODES_MODRM_SIB_IMM_encG_memE(
/// simpler.
fn emit_REX_OPCODES_MODRM_encG_encE(
sink: &mut MachBuffer<Inst>,
prefix: LegacyPrefix,
opcodes: u32,
mut numOpcodes: usize,
encG: u8,
@ -211,22 +219,28 @@ fn emit_REX_OPCODES_MODRM_encG_encE(
// don't even have to be from the same class. For example, for an
// integer-to-FP conversion insn, one might be RegClass::I64 and the other
// RegClass::V128.
let prefix66 = (flags & F_PREFIX_66) != 0;
let clearRexW = (flags & F_CLEAR_REX_W) != 0;
let retainRedundant = (flags & F_RETAIN_REDUNDANT_REX) != 0;
// The operand-size override
if prefix66 {
sink.put1(0x66);
match prefix {
LegacyPrefix::Pfx66 => sink.put1(0x66),
LegacyPrefix::PfxF2 => sink.put1(0xF2),
LegacyPrefix::PfxF3 => sink.put1(0xF3),
LegacyPrefix::PfxNone => (),
}
// The rex byte
let w = if clearRexW { 0 } else { 1 };
let r = (encG >> 3) & 1;
let x = 0;
let b = (encE >> 3) & 1;
let rex = 0x40 | (w << 3) | (r << 2) | (x << 1) | b;
if rex != 0x40 || retainRedundant {
sink.put1(rex);
}
// All other prefixes and opcodes
while numOpcodes > 0 {
numOpcodes -= 1;
@ -242,6 +256,7 @@ fn emit_REX_OPCODES_MODRM_encG_encE(
fn emit_REX_OPCODES_MODRM_SIB_IMM_regG_memE(
sink: &mut MachBuffer<Inst>,
prefix: LegacyPrefix,
opcodes: u32,
numOpcodes: usize,
regG: Reg,
@ -250,11 +265,12 @@ fn emit_REX_OPCODES_MODRM_SIB_IMM_regG_memE(
) {
// JRS FIXME 2020Feb07: this should really just be `regEnc` not `iregEnc`
let encG = iregEnc(regG);
emit_REX_OPCODES_MODRM_SIB_IMM_encG_memE(sink, opcodes, numOpcodes, encG, memE, flags);
emit_REX_OPCODES_MODRM_SIB_IMM_encG_memE(sink, prefix, opcodes, numOpcodes, encG, memE, flags);
}
fn emit_REX_OPCODES_MODRM_regG_regE(
sink: &mut MachBuffer<Inst>,
prefix: LegacyPrefix,
opcodes: u32,
numOpcodes: usize,
regG: Reg,
@ -264,7 +280,7 @@ fn emit_REX_OPCODES_MODRM_regG_regE(
// JRS FIXME 2020Feb07: these should really just be `regEnc` not `iregEnc`
let encG = iregEnc(regG);
let encE = iregEnc(regE);
emit_REX_OPCODES_MODRM_encG_encE(sink, opcodes, numOpcodes, encG, encE, flags);
emit_REX_OPCODES_MODRM_encG_encE(sink, prefix, opcodes, numOpcodes, encG, encE, flags);
}
/// Write a suitable number of bits from an imm64 to the sink.
@ -346,6 +362,7 @@ pub(crate) fn emit(inst: &Inst, sink: &mut MachBuffer<Inst>) {
RMI::R { reg: regE } => {
emit_REX_OPCODES_MODRM_regG_regE(
sink,
LegacyPrefix::PfxNone,
0x0FAF,
2,
regG.to_reg(),
@ -356,6 +373,7 @@ pub(crate) fn emit(inst: &Inst, sink: &mut MachBuffer<Inst>) {
RMI::M { addr } => {
emit_REX_OPCODES_MODRM_SIB_IMM_regG_memE(
sink,
LegacyPrefix::PfxNone,
0x0FAF,
2,
regG.to_reg(),
@ -369,6 +387,7 @@ pub(crate) fn emit(inst: &Inst, sink: &mut MachBuffer<Inst>) {
// Yes, really, regG twice.
emit_REX_OPCODES_MODRM_regG_regE(
sink,
LegacyPrefix::PfxNone,
opcode,
1,
regG.to_reg(),
@ -402,6 +421,7 @@ pub(crate) fn emit(inst: &Inst, sink: &mut MachBuffer<Inst>) {
// gold standard.
emit_REX_OPCODES_MODRM_regG_regE(
sink,
LegacyPrefix::PfxNone,
opcode_R,
1,
*regE,
@ -415,6 +435,7 @@ pub(crate) fn emit(inst: &Inst, sink: &mut MachBuffer<Inst>) {
// Whereas here we revert to the "normal" G-E ordering.
emit_REX_OPCODES_MODRM_SIB_IMM_regG_memE(
sink,
LegacyPrefix::PfxNone,
opcode_M,
1,
regG.to_reg(),
@ -427,7 +448,15 @@ pub(crate) fn emit(inst: &Inst, sink: &mut MachBuffer<Inst>) {
let opcode = if useImm8 { 0x83 } else { 0x81 };
// And also here we use the "normal" G-E ordering.
let encG = iregEnc(regG.to_reg());
emit_REX_OPCODES_MODRM_encG_encE(sink, opcode, 1, subopcode_I, encG, flags);
emit_REX_OPCODES_MODRM_encG_encE(
sink,
LegacyPrefix::PfxNone,
opcode,
1,
subopcode_I,
encG,
flags,
);
emit_simm(sink, if useImm8 { 1 } else { 4 }, *simm32);
}
}
@ -455,7 +484,15 @@ pub(crate) fn emit(inst: &Inst, sink: &mut MachBuffer<Inst>) {
}
Inst::Mov_R_R { is_64, src, dst } => {
let flags = if *is_64 { F_NONE } else { F_CLEAR_REX_W };
emit_REX_OPCODES_MODRM_regG_regE(sink, 0x89, 1, *src, dst.to_reg(), flags);
emit_REX_OPCODES_MODRM_regG_regE(
sink,
LegacyPrefix::PfxNone,
0x89,
1,
*src,
dst.to_reg(),
flags,
);
}
Inst::MovZX_M_R { extMode, addr, dst } => {
match extMode {
@ -463,6 +500,7 @@ pub(crate) fn emit(inst: &Inst, sink: &mut MachBuffer<Inst>) {
// MOVZBL is (REX.W==0) 0F B6 /r
emit_REX_OPCODES_MODRM_SIB_IMM_regG_memE(
sink,
LegacyPrefix::PfxNone,
0x0FB6,
2,
dst.to_reg(),
@ -478,6 +516,7 @@ pub(crate) fn emit(inst: &Inst, sink: &mut MachBuffer<Inst>) {
// the upper half of the destination anyway.
emit_REX_OPCODES_MODRM_SIB_IMM_regG_memE(
sink,
LegacyPrefix::PfxNone,
0x0FB6,
2,
dst.to_reg(),
@ -489,6 +528,7 @@ pub(crate) fn emit(inst: &Inst, sink: &mut MachBuffer<Inst>) {
// MOVZWL is (REX.W==0) 0F B7 /r
emit_REX_OPCODES_MODRM_SIB_IMM_regG_memE(
sink,
LegacyPrefix::PfxNone,
0x0FB7,
2,
dst.to_reg(),
@ -500,6 +540,7 @@ pub(crate) fn emit(inst: &Inst, sink: &mut MachBuffer<Inst>) {
// MOVZWQ is (REX.W==1) 0F B7 /r
emit_REX_OPCODES_MODRM_SIB_IMM_regG_memE(
sink,
LegacyPrefix::PfxNone,
0x0FB7,
2,
dst.to_reg(),
@ -513,6 +554,7 @@ pub(crate) fn emit(inst: &Inst, sink: &mut MachBuffer<Inst>) {
// MOV r/m32, r32 is (REX.W==0) 8B /r
emit_REX_OPCODES_MODRM_SIB_IMM_regG_memE(
sink,
LegacyPrefix::PfxNone,
0x8B,
1,
dst.to_reg(),
@ -522,15 +564,22 @@ pub(crate) fn emit(inst: &Inst, sink: &mut MachBuffer<Inst>) {
}
}
}
Inst::Mov64_M_R { addr, dst } => {
emit_REX_OPCODES_MODRM_SIB_IMM_regG_memE(sink, 0x8B, 1, dst.to_reg(), addr, F_NONE)
}
Inst::Mov64_M_R { addr, dst } => emit_REX_OPCODES_MODRM_SIB_IMM_regG_memE(
sink,
LegacyPrefix::PfxNone,
0x8B,
1,
dst.to_reg(),
addr,
F_NONE,
),
Inst::MovSX_M_R { extMode, addr, dst } => {
match extMode {
ExtMode::BL => {
// MOVSBL is (REX.W==0) 0F BE /r
emit_REX_OPCODES_MODRM_SIB_IMM_regG_memE(
sink,
LegacyPrefix::PfxNone,
0x0FBE,
2,
dst.to_reg(),
@ -542,6 +591,7 @@ pub(crate) fn emit(inst: &Inst, sink: &mut MachBuffer<Inst>) {
// MOVSBQ is (REX.W==1) 0F BE /r
emit_REX_OPCODES_MODRM_SIB_IMM_regG_memE(
sink,
LegacyPrefix::PfxNone,
0x0FBE,
2,
dst.to_reg(),
@ -553,6 +603,7 @@ pub(crate) fn emit(inst: &Inst, sink: &mut MachBuffer<Inst>) {
// MOVSWL is (REX.W==0) 0F BF /r
emit_REX_OPCODES_MODRM_SIB_IMM_regG_memE(
sink,
LegacyPrefix::PfxNone,
0x0FBF,
2,
dst.to_reg(),
@ -564,6 +615,7 @@ pub(crate) fn emit(inst: &Inst, sink: &mut MachBuffer<Inst>) {
// MOVSWQ is (REX.W==1) 0F BF /r
emit_REX_OPCODES_MODRM_SIB_IMM_regG_memE(
sink,
LegacyPrefix::PfxNone,
0x0FBF,
2,
dst.to_reg(),
@ -575,6 +627,7 @@ pub(crate) fn emit(inst: &Inst, sink: &mut MachBuffer<Inst>) {
// MOVSLQ is (REX.W==1) 63 /r
emit_REX_OPCODES_MODRM_SIB_IMM_regG_memE(
sink,
LegacyPrefix::PfxNone,
0x63,
1,
dst.to_reg(),
@ -599,6 +652,7 @@ pub(crate) fn emit(inst: &Inst, sink: &mut MachBuffer<Inst>) {
// MOV r8, r/m8 is (REX.W==0) 88 /r
emit_REX_OPCODES_MODRM_SIB_IMM_regG_memE(
sink,
LegacyPrefix::PfxNone,
0x88,
1,
*src,
@ -610,17 +664,19 @@ pub(crate) fn emit(inst: &Inst, sink: &mut MachBuffer<Inst>) {
// MOV r16, r/m16 is 66 (REX.W==0) 89 /r
emit_REX_OPCODES_MODRM_SIB_IMM_regG_memE(
sink,
LegacyPrefix::Pfx66,
0x89,
1,
*src,
addr,
F_CLEAR_REX_W | F_PREFIX_66,
F_CLEAR_REX_W,
)
}
4 => {
// MOV r32, r/m32 is (REX.W==0) 89 /r
emit_REX_OPCODES_MODRM_SIB_IMM_regG_memE(
sink,
LegacyPrefix::PfxNone,
0x89,
1,
*src,
@ -630,7 +686,15 @@ pub(crate) fn emit(inst: &Inst, sink: &mut MachBuffer<Inst>) {
}
8 => {
// MOV r64, r/m64 is (REX.W==1) 89 /r
emit_REX_OPCODES_MODRM_SIB_IMM_regG_memE(sink, 0x89, 1, *src, addr, F_NONE)
emit_REX_OPCODES_MODRM_SIB_IMM_regG_memE(
sink,
LegacyPrefix::PfxNone,
0x89,
1,
*src,
addr,
F_NONE,
)
}
_ => panic!("x64::Inst::Mov_R_M::emit: unreachable"),
}
@ -653,6 +717,7 @@ pub(crate) fn emit(inst: &Inst, sink: &mut MachBuffer<Inst>) {
// SHL/SHR/SAR %cl, reg64 is (REX.W==1) D3 /subopcode
emit_REX_OPCODES_MODRM_encG_encE(
sink,
LegacyPrefix::PfxNone,
0xD3,
1,
subopcode,
@ -667,6 +732,7 @@ pub(crate) fn emit(inst: &Inst, sink: &mut MachBuffer<Inst>) {
// bother with that nicety here.
emit_REX_OPCODES_MODRM_encG_encE(
sink,
LegacyPrefix::PfxNone,
0xC1,
1,
subopcode,
@ -683,6 +749,7 @@ pub(crate) fn emit(inst: &Inst, sink: &mut MachBuffer<Inst>) {
dst: regG,
} => {
let mut retainRedundantRex = 0;
let mut prefix = LegacyPrefix::PfxNone;
if *size == 1 {
// Here, a redundant REX prefix changes the meaning of the
// instruction.
@ -691,10 +758,12 @@ pub(crate) fn emit(inst: &Inst, sink: &mut MachBuffer<Inst>) {
retainRedundantRex = F_RETAIN_REDUNDANT_REX;
}
}
if *size == 2 {
prefix = LegacyPrefix::Pfx66;
}
let mut flags = match size {
8 => F_NONE,
4 => F_CLEAR_REX_W,
2 => F_CLEAR_REX_W | F_PREFIX_66,
4 | 2 => F_CLEAR_REX_W,
1 => F_CLEAR_REX_W | retainRedundantRex,
_ => panic!("x64::Inst::Cmp_RMI_R::emit: unreachable"),
};
@ -710,12 +779,14 @@ pub(crate) fn emit(inst: &Inst, sink: &mut MachBuffer<Inst>) {
}
}
// Same comment re swapped args as for Alu_RMI_R.
emit_REX_OPCODES_MODRM_regG_regE(sink, opcode, 1, *regE, *regG, flags);
emit_REX_OPCODES_MODRM_regG_regE(sink, prefix, opcode, 1, *regE, *regG, flags);
}
RMI::M { addr } => {
let opcode = if *size == 1 { 0x3A } else { 0x3B };
// Whereas here we revert to the "normal" G-E ordering.
emit_REX_OPCODES_MODRM_SIB_IMM_regG_memE(sink, opcode, 1, *regG, addr, flags);
emit_REX_OPCODES_MODRM_SIB_IMM_regG_memE(
sink, prefix, opcode, 1, *regG, addr, flags,
);
}
RMI::I { simm32 } => {
// FIXME JRS 2020Feb11: there are shorter encodings for
@ -731,7 +802,7 @@ pub(crate) fn emit(inst: &Inst, sink: &mut MachBuffer<Inst>) {
// And also here we use the "normal" G-E ordering.
let encG = iregEnc(*regG);
emit_REX_OPCODES_MODRM_encG_encE(
sink, opcode, 1, 7, /*subopcode*/
sink, prefix, opcode, 1, 7, /*subopcode*/
encG, flags,
);
emit_simm(sink, if useImm8 { 1 } else { *size }, *simm32);
@ -751,6 +822,7 @@ pub(crate) fn emit(inst: &Inst, sink: &mut MachBuffer<Inst>) {
RMI::M { addr } => {
emit_REX_OPCODES_MODRM_SIB_IMM_encG_memE(
sink,
LegacyPrefix::PfxNone,
0xFF,
1,
6, /*subopcode*/
@ -787,6 +859,7 @@ pub(crate) fn emit(inst: &Inst, sink: &mut MachBuffer<Inst>) {
let regEnc = iregEnc(*reg);
emit_REX_OPCODES_MODRM_encG_encE(
sink,
LegacyPrefix::PfxNone,
0xFF,
1,
2, /*subopcode*/
@ -797,6 +870,7 @@ pub(crate) fn emit(inst: &Inst, sink: &mut MachBuffer<Inst>) {
RM::M { addr } => {
emit_REX_OPCODES_MODRM_SIB_IMM_encG_memE(
sink,
LegacyPrefix::PfxNone,
0xFF,
1,
2, /*subopcode*/
@ -867,6 +941,7 @@ pub(crate) fn emit(inst: &Inst, sink: &mut MachBuffer<Inst>) {
let regEnc = iregEnc(*reg);
emit_REX_OPCODES_MODRM_encG_encE(
sink,
LegacyPrefix::PfxNone,
0xFF,
1,
4, /*subopcode*/
@ -877,6 +952,7 @@ pub(crate) fn emit(inst: &Inst, sink: &mut MachBuffer<Inst>) {
RM::M { addr } => {
emit_REX_OPCODES_MODRM_SIB_IMM_encG_memE(
sink,
LegacyPrefix::PfxNone,
0xFF,
1,
4, /*subopcode*/
@ -886,7 +962,56 @@ pub(crate) fn emit(inst: &Inst, sink: &mut MachBuffer<Inst>) {
}
}
}
Inst::XMM_R_R { op, src, dst } => {
let flags = F_CLEAR_REX_W;
let opcode = match op {
SSE_Op::SSE_Movss => 0x0F10,
SSE_Op::SSE2_Movsd => 0x0F10,
_ => unimplemented!("XMM_R_R opcode"),
};
let prefix = match op {
SSE_Op::SSE_Movss => LegacyPrefix::PfxF3,
SSE_Op::SSE2_Movsd => LegacyPrefix::PfxF2,
_ => unimplemented!("XMM_R_R opcode"),
};
emit_REX_OPCODES_MODRM_regG_regE(sink, prefix, opcode, 2, dst.to_reg(), *src, flags);
}
Inst::XMM_RM_R {
op,
src: srcE,
dst: regG,
} => {
let flags = F_CLEAR_REX_W;
let opcode = match op {
SSE_Op::SSE_Addss => 0x0F58,
SSE_Op::SSE_Subss => 0x0F5C,
_ => unimplemented!("XMM_RM_R opcode"),
};
match srcE {
RM::R { reg: regE } => {
emit_REX_OPCODES_MODRM_regG_regE(
sink,
LegacyPrefix::PfxF3,
opcode,
2,
regG.to_reg(),
*regE,
flags,
);
}
RM::M { addr } => {
emit_REX_OPCODES_MODRM_SIB_IMM_regG_memE(
sink,
LegacyPrefix::PfxF3,
opcode,
2,
regG.to_reg(),
addr,
flags,
);
}
}
}
_ => panic!("x64_emit: unhandled: {} ", inst.show_rru(None)),
}
}

96
cranelift/codegen/src/isa/x64/inst/emit_tests.rs

@ -33,6 +33,23 @@ fn test_x64_emit() {
let r14 = regs::r14();
let r15 = regs::r15();
let xmm0 = regs::xmm0();
let xmm1 = regs::xmm1();
let xmm2 = regs::xmm2();
let xmm3 = regs::xmm3();
let xmm4 = regs::xmm4();
let xmm5 = regs::xmm5();
let xmm6 = regs::xmm6();
let xmm7 = regs::xmm7();
let xmm8 = regs::xmm8();
let xmm9 = regs::xmm9();
let xmm10 = regs::xmm10();
let xmm11 = regs::xmm11();
let xmm12 = regs::xmm12();
let xmm13 = regs::xmm13();
let xmm14 = regs::xmm14();
let xmm15 = regs::xmm15();
// And Writable<> versions of the same:
let w_rax = Writable::<Reg>::from_reg(rax);
let w_rbx = Writable::<Reg>::from_reg(rbx);
@ -51,6 +68,23 @@ fn test_x64_emit() {
let w_r14 = Writable::<Reg>::from_reg(r14);
let w_r15 = Writable::<Reg>::from_reg(r15);
let w_xmm0 = Writable::<Reg>::from_reg(xmm0);
let w_xmm1 = Writable::<Reg>::from_reg(xmm1);
let w_xmm2 = Writable::<Reg>::from_reg(xmm2);
let w_xmm3 = Writable::<Reg>::from_reg(xmm3);
let _w_xmm4 = Writable::<Reg>::from_reg(xmm4);
let _w_xmm5 = Writable::<Reg>::from_reg(xmm5);
let _w_xmm6 = Writable::<Reg>::from_reg(xmm6);
let _w_xmm7 = Writable::<Reg>::from_reg(xmm7);
let _w_xmm8 = Writable::<Reg>::from_reg(xmm8);
let _w_xmm9 = Writable::<Reg>::from_reg(xmm9);
let w_xmm10 = Writable::<Reg>::from_reg(xmm10);
let _w_xmm11 = Writable::<Reg>::from_reg(xmm11);
let _w_xmm12 = Writable::<Reg>::from_reg(xmm12);
let w_xmm13 = Writable::<Reg>::from_reg(xmm13);
let _w_xmm14 = Writable::<Reg>::from_reg(xmm14);
let _w_xmm15 = Writable::<Reg>::from_reg(xmm15);
let mut insns = Vec::<(Inst, &str, &str)>::new();
// ========================================================
@ -2171,6 +2205,67 @@ fn test_x64_emit() {
"jmp *321(%r10,%rdx,4)",
));
// ========================================================
// XMM_RM_R
insns.push((
Inst::xmm_rm_r(SSE_Op::SSE_Addss, RM::reg(xmm1), w_xmm0),
"F30F58C1",
"addss %xmm1, %xmm0",
));
insns.push((
Inst::xmm_rm_r(SSE_Op::SSE_Subss, RM::reg(xmm0), w_xmm1),
"F30F5CC8",
"subss %xmm0, %xmm1",
));
insns.push((
Inst::xmm_rm_r(SSE_Op::SSE_Addss, RM::reg(xmm11), w_xmm13),
"F3450F58EB",
"addss %xmm11, %xmm13",
));
insns.push((
Inst::xmm_rm_r(SSE_Op::SSE_Subss, RM::reg(xmm12), w_xmm1),
"F3410F5CCC",
"subss %xmm12, %xmm1",
));
insns.push((
Inst::xmm_rm_r(
SSE_Op::SSE_Addss,
RM::mem(Addr::imm_reg_reg_shift(123, r10, rdx, 2)),
w_xmm0,
),
"F3410F5844927B",
"addss 123(%r10,%rdx,4), %xmm0",
));
insns.push((
Inst::xmm_rm_r(
SSE_Op::SSE_Subss,
RM::mem(Addr::imm_reg_reg_shift(321, r10, rax, 3)),
w_xmm10,
),
"F3450F5C94C241010000",
"subss 321(%r10,%rax,8), %xmm10",
));
// ========================================================
// XMM_R_R
insns.push((
Inst::xmm_r_r(SSE_Op::SSE_Movss, xmm3, w_xmm2),
"F30F10D3",
"movss %xmm3, %xmm2",
));
insns.push((
Inst::xmm_r_r(SSE_Op::SSE2_Movsd, xmm4, w_xmm3),
"F20F10DC",
"movsd %xmm4, %xmm3",
));
// ========================================================
// Actually run the tests!
let flags = settings::Flags::new(settings::builder());
@ -2179,7 +2274,6 @@ fn test_x64_emit() {
// Check the printed text is as expected.
let actual_printing = insn.show_rru(Some(&rru));
assert_eq!(expected_printing, actual_printing);
let mut sink = test_utils::TestCodeSink::new();
let mut buffer = MachBuffer::new();
insn.emit(&mut buffer, &flags, &mut Default::default());

63
cranelift/codegen/src/isa/x64/inst/mod.rs

@ -150,6 +150,20 @@ pub(crate) enum Inst {
/// jmpq (reg mem)
JmpUnknown { target: RM },
/// (add sub and or xor mul adc? sbb?) (32 64) (reg addr imm) reg
XMM_RM_R {
op: SSE_Op,
src: RM,
dst: Writable<Reg>,
},
/// mov (64 32) reg reg
XMM_R_R {
op: SSE_Op,
src: Reg,
dst: Writable<Reg>,
},
}
// Handy constructors for Insts.
@ -195,6 +209,17 @@ impl Inst {
Inst::Mov_R_R { is_64, src, dst }
}
pub(crate) fn xmm_r_r(op: SSE_Op, src: Reg, dst: Writable<Reg>) -> Inst {
debug_assert!(src.get_class() == RegClass::V128);
debug_assert!(dst.to_reg().get_class() == RegClass::V128);
Inst::XMM_R_R { op, src, dst }
}
pub(crate) fn xmm_rm_r(op: SSE_Op, src: RM, dst: Writable<Reg>) -> Self {
debug_assert!(dst.to_reg().get_class() == RegClass::V128);
Self::XMM_RM_R { op, src, dst }
}
pub(crate) fn movzx_m_r(extMode: ExtMode, addr: Addr, dst: Writable<Reg>) -> Inst {
debug_assert!(dst.to_reg().get_class() == RegClass::I64);
Inst::MovZX_M_R { extMode, addr, dst }
@ -341,6 +366,12 @@ impl ShowWithRRU for Inst {
src.show_rru_sized(mb_rru, sizeLQ(*is_64)),
show_ireg_sized(dst.to_reg(), mb_rru, sizeLQ(*is_64)),
),
Inst::XMM_RM_R { op, src, dst } => format!(
"{} {}, {}",
ljustify(op.to_string()),
src.show_rru_sized(mb_rru, 8),
show_ireg_sized(dst.to_reg(), mb_rru, 8),
),
Inst::Imm_R {
dst_is_64,
simm64,
@ -368,6 +399,12 @@ impl ShowWithRRU for Inst {
show_ireg_sized(*src, mb_rru, sizeLQ(*is_64)),
show_ireg_sized(dst.to_reg(), mb_rru, sizeLQ(*is_64))
),
Inst::XMM_R_R { op, src, dst } => format!(
"{} {}, {}",
ljustify(op.to_string()),
show_ireg_sized(*src, mb_rru, 8),
show_ireg_sized(dst.to_reg(), mb_rru, 8)
),
Inst::MovZX_M_R { extMode, addr, dst } => {
if *extMode == ExtMode::LQ {
format!(
@ -490,6 +527,10 @@ fn x64_get_regs(inst: &Inst, collector: &mut RegUsageCollector) {
src.get_regs_as_uses(collector);
collector.add_mod(*dst);
}
Inst::XMM_RM_R { op: _, src, dst } => {
src.get_regs_as_uses(collector);
collector.add_mod(*dst);
}
Inst::Imm_R {
dst_is_64: _,
simm64: _,
@ -501,6 +542,10 @@ fn x64_get_regs(inst: &Inst, collector: &mut RegUsageCollector) {
collector.add_use(*src);
collector.add_def(*dst);
}
Inst::XMM_R_R { op: _, src, dst } => {
collector.add_use(*src);
collector.add_def(*dst);
}
Inst::MovZX_M_R {
extMode: _,
addr,
@ -649,6 +694,14 @@ fn x64_map_regs<RUM: RegUsageMapper>(inst: &mut Inst, mapper: &RUM) {
src.map_uses(mapper);
map_mod(mapper, dst);
}
Inst::XMM_RM_R {
op: _,
ref mut src,
ref mut dst,
} => {
src.map_uses(mapper);
map_mod(mapper, dst);
}
Inst::Imm_R {
dst_is_64: _,
simm64: _,
@ -662,6 +715,14 @@ fn x64_map_regs<RUM: RegUsageMapper>(inst: &mut Inst, mapper: &RUM) {
map_use(mapper, src);
map_def(mapper, dst);
}
Inst::XMM_R_R {
op: _,
ref mut src,
ref mut dst,
} => {
map_use(mapper, src);
map_def(mapper, dst);
}
Inst::MovZX_M_R {
extMode: _,
ref mut addr,
@ -784,6 +845,8 @@ impl MachInst for Inst {
debug_assert!(rc_dst == rc_src);
match rc_dst {
RegClass::I64 => Inst::mov_r_r(true, src_reg, dst_reg),
// TODO: How do you just move 32 bits?
RegClass::V128 => Inst::xmm_r_r(SSE_Op::SSE2_Movsd, src_reg, dst_reg),
_ => panic!("gen_move(x64): unhandled regclass"),
}
}

33
cranelift/codegen/src/isa/x64/inst/regs.rs

@ -78,52 +78,53 @@ pub(crate) fn r11() -> Reg {
fn fpr(enc: u8, index: u8) -> Reg {
Reg::new_real(RegClass::V128, enc, index)
}
fn xmm0() -> Reg {
pub(crate) fn xmm0() -> Reg {
fpr(0, 14)
}
fn xmm1() -> Reg {
pub(crate) fn xmm1() -> Reg {
fpr(1, 15)
}
fn xmm2() -> Reg {
pub(crate) fn xmm2() -> Reg {
fpr(2, 16)
}
fn xmm3() -> Reg {
pub(crate) fn xmm3() -> Reg {
fpr(3, 17)
}
fn xmm4() -> Reg {
pub(crate) fn xmm4() -> Reg {
fpr(4, 18)
}
fn xmm5() -> Reg {
pub(crate) fn xmm5() -> Reg {
fpr(5, 19)
}
fn xmm6() -> Reg {
pub(crate) fn xmm6() -> Reg {
fpr(6, 20)
}
fn xmm7() -> Reg {
pub(crate) fn xmm7() -> Reg {
fpr(7, 21)
}
fn xmm8() -> Reg {
pub(crate) fn xmm8() -> Reg {
fpr(8, 22)
}
fn xmm9() -> Reg {
pub(crate) fn xmm9() -> Reg {
fpr(9, 23)
}
fn xmm10() -> Reg {
pub(crate) fn xmm10() -> Reg {
fpr(10, 24)
}
fn xmm11() -> Reg {
pub(crate) fn xmm11() -> Reg {
fpr(11, 25)
}
fn xmm12() -> Reg {
pub(crate) fn xmm12() -> Reg {
fpr(12, 26)
}
fn xmm13() -> Reg {
pub(crate) fn xmm13() -> Reg {
fpr(13, 27)
}
fn xmm14() -> Reg {
pub(crate) fn xmm14() -> Reg {
fpr(14, 28)
}
fn xmm15() -> Reg {
pub(crate) fn xmm15() -> Reg {
fpr(15, 29)
}

39
cranelift/codegen/src/isa/x64/lower.rs

@ -3,7 +3,7 @@
#![allow(dead_code)]
#![allow(non_snake_case)]
use regalloc::{Reg, Writable};
use regalloc::{Reg, RegClass, Writable};
use crate::ir::condcodes::IntCC;
use crate::ir::types;
@ -31,7 +31,7 @@ fn is_int_ty(ty: Type) -> bool {
}
}
fn int_ty_to_is64(ty: Type) -> bool {
fn int_ty_is_64(ty: Type) -> bool {
match ty {
types::I8 | types::I16 | types::I32 => false,
types::I64 => true,
@ -39,6 +39,14 @@ fn int_ty_to_is64(ty: Type) -> bool {
}
}
fn flt_ty_is_64(ty: Type) -> bool {
match ty {
types::F32 => false,
types::F64 => true,
_ => panic!("type {} is none of F32, F64", ty),
}
}
fn int_ty_to_sizeB(ty: Type) -> u8 {
match ty {
types::I8 => 1,
@ -118,7 +126,6 @@ fn lower_insn_to_regs<'a>(ctx: Ctx<'a>, iri: IRInst) {
};
// This is all outstandingly feeble. TODO: much better!
match op {
Opcode::Iconst => {
if let Some(w64) = iri_to_u64_immediate(ctx, iri) {
@ -136,7 +143,7 @@ fn lower_insn_to_regs<'a>(ctx: Ctx<'a>, iri: IRInst) {
let regD = output_to_reg(ctx, iri, 0);
let regL = input_to_reg(ctx, iri, 0);
let regR = input_to_reg(ctx, iri, 1);
let is64 = int_ty_to_is64(ty.unwrap());
let is64 = int_ty_is_64(ty.unwrap());
let how = if op == Opcode::Iadd {
RMI_R_Op::Add
} else {
@ -195,7 +202,11 @@ fn lower_insn_to_regs<'a>(ctx: Ctx<'a>, iri: IRInst) {
for i in 0..ctx.num_inputs(iri) {
let src_reg = input_to_reg(ctx, iri, i);
let retval_reg = ctx.retval(i);
ctx.emit(Inst::mov_r_r(true, src_reg, retval_reg));
if src_reg.get_class() == RegClass::I64 {
ctx.emit(Inst::mov_r_r(true, src_reg, retval_reg));
} else if src_reg.get_class() == RegClass::V128 {
ctx.emit(Inst::xmm_r_r(SSE_Op::SSE2_Movsd, src_reg, retval_reg));
}
}
// N.B.: the Ret itself is generated by the ABI.
}
@ -229,7 +240,23 @@ fn lower_insn_to_regs<'a>(ctx: Ctx<'a>, iri: IRInst) {
| Opcode::SshrImm => {
panic!("ALU+imm and ALU+carry ops should not appear here!");
}
Opcode::Fadd | Opcode::Fsub => {
let regD = output_to_reg(ctx, iri, 0);
let regL = input_to_reg(ctx, iri, 0);
let regR = input_to_reg(ctx, iri, 1);
let is64 = flt_ty_is_64(ty.unwrap());
if !is64 {
let inst = if op == Opcode::Fadd {
SSE_Op::SSE_Addss
} else {
SSE_Op::SSE_Subss
};
ctx.emit(Inst::xmm_r_r(SSE_Op::SSE_Movss, regL, regD));
ctx.emit(Inst::xmm_rm_r(inst, RM::reg(regR), regD));
} else {
unimplemented!("unimplemented lowering for opcode {:?}", op);
}
}
_ => unimplemented!("unimplemented lowering for opcode {:?}", op),
}
}

1
cranelift/codegen/src/lib.rs

@ -1,5 +1,4 @@
//! Cranelift code generation library.
#![deny(missing_docs, trivial_numeric_casts, unused_extern_crates)]
#![warn(unused_import_braces)]
#![cfg_attr(feature = "std", deny(unstable_features))]

9
cranelift/codegen/src/machinst/vcode.rs

@ -136,9 +136,14 @@ impl<I: VCodeInst> VCodeBuilder<I> {
/// Set the type of a VReg.
pub fn set_vreg_type(&mut self, vreg: VirtualReg, ty: Type) {
while self.vcode.vreg_types.len() <= vreg.get_index() {
self.vcode.vreg_types.push(ir::types::I8); // Default type.
if self.vcode.vreg_types.len() <= vreg.get_index() {
self.vcode.vreg_types.resize(
self.vcode.vreg_types.len()
+ ((vreg.get_index() + 1) - self.vcode.vreg_types.len()),
ir::types::I8,
)
}
self.vcode.vreg_types[vreg.get_index()] = ty;
}

1
cranelift/src/wasm.rs

@ -51,7 +51,6 @@ pub fn run(
flag_calc_value_ranges: bool,
) -> Result<(), String> {
let parsed = parse_sets_and_triple(flag_set, flag_triple)?;
for filename in files {
let path = Path::new(&filename);
let name = String::from(path.as_os_str().to_string_lossy());

Loading…
Cancel
Save