From 48f0b10c7a0db45a4578246aab7ac8a440de482c Mon Sep 17 00:00:00 2001 From: Johnnie Birch <45402135+jlb6740@users.noreply.github.com> Date: Tue, 5 May 2020 22:05:36 -0700 Subject: [PATCH] Add initial scalar FP operations (addss, subss, etc) to x64 backend. Adds support for addss and subss. This is the first lowering for sse floating point alu and some move operations. The changes here do some renaming of data structures and adds a couple of new ones to support sse specific operations. The work done here will likely evolve as needed to support an efficient, inituative, and consistent framework. --- cranelift/codegen/src/isa/x64/abi.rs | 69 ++++++- cranelift/codegen/src/isa/x64/inst/args.rs | 73 ++++++- cranelift/codegen/src/isa/x64/inst/emit.rs | 185 +++++++++++++++--- .../codegen/src/isa/x64/inst/emit_tests.rs | 96 ++++++++- cranelift/codegen/src/isa/x64/inst/mod.rs | 63 ++++++ cranelift/codegen/src/isa/x64/inst/regs.rs | 33 ++-- cranelift/codegen/src/isa/x64/lower.rs | 39 +++- cranelift/codegen/src/lib.rs | 1 - cranelift/codegen/src/machinst/vcode.rs | 9 +- cranelift/src/wasm.rs | 1 - 10 files changed, 503 insertions(+), 66 deletions(-) diff --git a/cranelift/codegen/src/isa/x64/abi.rs b/cranelift/codegen/src/isa/x64/abi.rs index 4ba75e394c..2505286e08 100644 --- a/cranelift/codegen/src/isa/x64/abi.rs +++ b/cranelift/codegen/src/isa/x64/abi.rs @@ -48,7 +48,7 @@ pub(crate) struct X64ABIBody { flags: settings::Flags, } -fn in_int_reg(ty: types::Type) -> bool { +fn use_int_reg(ty: types::Type) -> bool { match ty { types::I8 | types::I16 @@ -63,6 +63,13 @@ fn in_int_reg(ty: types::Type) -> bool { } } +fn use_flt_reg(ty: types::Type) -> bool { + match ty { + types::F32 | types::F64 => true, + _ => false, + } +} + fn get_intreg_for_arg_systemv(idx: usize) -> Option { match idx { 0 => Some(regs::rdi()), @@ -75,6 +82,20 @@ fn get_intreg_for_arg_systemv(idx: usize) -> Option { } } +fn get_fltreg_for_arg_systemv(idx: usize) -> Option { + match idx { + 0 => Some(regs::xmm0()), + 1 => Some(regs::xmm1()), + 2 => Some(regs::xmm2()), + 3 => Some(regs::xmm3()), + 4 => Some(regs::xmm4()), + 5 => Some(regs::xmm5()), + 6 => Some(regs::xmm6()), + 7 => Some(regs::xmm7()), + _ => None, + } +} + fn get_intreg_for_retval_systemv(idx: usize) -> Option { match idx { 0 => Some(regs::rax()), @@ -83,6 +104,14 @@ fn get_intreg_for_retval_systemv(idx: usize) -> Option { } } +fn get_fltreg_for_retval_systemv(idx: usize) -> Option { + match idx { + 0 => Some(regs::xmm0()), + 1 => Some(regs::xmm1()), + _ => None, + } +} + fn is_callee_save_systemv(r: RealReg) -> bool { use regs::*; match r.get_class() { @@ -90,6 +119,7 @@ fn is_callee_save_systemv(r: RealReg) -> bool { ENC_RBX | ENC_RBP | ENC_R12 | ENC_R13 | ENC_R14 | ENC_R15 => true, _ => false, }, + RegClass::V128 => false, _ => unimplemented!(), } } @@ -106,6 +136,7 @@ impl X64ABIBody { // Compute args and retvals from signature. let mut args = vec![]; let mut next_int_arg = 0; + let mut next_flt_arg = 0; for param in &f.signature.params { match param.purpose { ir::ArgumentPurpose::VMContext if f.signature.call_conv.extends_baldrdash() => { @@ -114,15 +145,22 @@ impl X64ABIBody { } ir::ArgumentPurpose::Normal | ir::ArgumentPurpose::VMContext => { - if in_int_reg(param.value_type) { + if use_int_reg(param.value_type) { if let Some(reg) = get_intreg_for_arg_systemv(next_int_arg) { args.push(ABIArg::Reg(reg.to_real_reg())); } else { unimplemented!("passing arg on the stack"); } next_int_arg += 1; + } else if use_flt_reg(param.value_type) { + if let Some(reg) = get_fltreg_for_arg_systemv(next_flt_arg) { + args.push(ABIArg::Reg(reg.to_real_reg())); + } else { + unimplemented!("passing arg on the stack"); + } + next_flt_arg += 1; } else { - unimplemented!("non int normal register") + unimplemented!("non int normal register {:?}", param.value_type) } } @@ -132,16 +170,24 @@ impl X64ABIBody { let mut rets = vec![]; let mut next_int_retval = 0; + let mut next_flt_retval = 0; for ret in &f.signature.returns { match ret.purpose { ir::ArgumentPurpose::Normal => { - if in_int_reg(ret.value_type) { + if use_int_reg(ret.value_type) { if let Some(reg) = get_intreg_for_retval_systemv(next_int_retval) { rets.push(ABIRet::Reg(reg.to_real_reg())); } else { unimplemented!("passing return on the stack"); } next_int_retval += 1; + } else if use_flt_reg(ret.value_type) { + if let Some(reg) = get_fltreg_for_retval_systemv(next_flt_retval) { + rets.push(ABIRet::Reg(reg.to_real_reg())); + } else { + unimplemented!("passing return on the stack"); + } + next_flt_retval += 1; } else { unimplemented!("returning non integer normal value"); } @@ -232,8 +278,11 @@ impl ABIBody for X64ABIBody { if from_reg.get_class() == RegClass::I32 || from_reg.get_class() == RegClass::I64 { // TODO do we need a sign extension if it's I32? return Inst::mov_r_r(/*is64=*/ true, from_reg.to_reg(), to_reg); + } else if from_reg.get_class() == RegClass::V128 { + // TODO: How to support Movss. Should is64 always be true? + return Inst::xmm_r_r(SSE_Op::SSE2_Movsd, from_reg.to_reg(), to_reg); } - unimplemented!("moving from non-int arg to vreg"); + unimplemented!("moving from non-int arg to vreg {:?}", from_reg.get_class()); } ABIArg::_Stack => unimplemented!("moving from stack arg to vreg"), } @@ -266,8 +315,16 @@ impl ABIBody for X64ABIBody { from_reg.to_reg(), Writable::::from_reg(to_reg.to_reg()), )) + } else if to_reg.get_class() == RegClass::V128 + || to_reg.get_class() == RegClass::V128 + { + ret.push(Inst::xmm_r_r( + SSE_Op::SSE2_Movsd, + from_reg.to_reg(), + Writable::::from_reg(to_reg.to_reg()), + )) } else { - unimplemented!("moving from vreg to non-int return value"); + unimplemented!("moving from vreg to unsupported return value"); } } diff --git a/cranelift/codegen/src/isa/x64/inst/args.rs b/cranelift/codegen/src/isa/x64/inst/args.rs index 1e77dd91fa..6f4b52156f 100644 --- a/cranelift/codegen/src/isa/x64/inst/args.rs +++ b/cranelift/codegen/src/isa/x64/inst/args.rs @@ -144,7 +144,7 @@ impl RM { // Constructors. pub(crate) fn reg(reg: Reg) -> Self { - debug_assert!(reg.get_class() == RegClass::I64); + debug_assert!(reg.get_class() == RegClass::I64 || reg.get_class() == RegClass::V128); RM::R { reg } } @@ -205,8 +205,75 @@ impl fmt::Debug for RMI_R_Op { } } -/// These indicate ways of extending (widening) a value, using the Intel naming: -/// B(yte) = u8, W(ord) = u16, L(ong)word = u32, Q(uad)word = u64 +/// Some scalar SSE operations requiring 2 operands r/m and r +/// Each instruction is prefixed with the SSE version that introduced +/// the particular instructions. +/// TODO: Below only includes scalar operations. To be seen if packed will +/// be added here. +#[derive(Clone, PartialEq)] +pub enum SSE_Op { + SSE_Addss, + SSE2_Addsd, + SSE_Comiss, + SSE2_Comisd, + SSE2_Cvtsd2ss, + SSE2_Cvtsd2si, + SSE_Cvtsi2ss, + SSE2_Cvtsi2sd, + SSE_Cvtss2si, + SSE2_Cvtss2sd, + SSE_Cvttss2si, + SSE2_Cvttsd2si, + SSE_Divss, + SSE2_Divsd, + SSE_Maxss, + SSE2_Maxsd, + SSE_Minss, + SSE2_Minsd, + SSE_Movss, + SSE2_Movsd, + SSE_Mulss, + SSE2_Mulsd, + SSE_Rcpss, + SSE41_Roundss, + SSE41_Roundsd, + SSE_Rsqrtss, + SSE_Sqrtss, + SSE2_Sqrtsd, + SSE_Subss, + SSE2_Subsd, + SSE_Ucomiss, + SSE2_Ucomisd, +} + +/// Some SSE operations requiring 3 operands i, r/m, and r +#[derive(Clone, PartialEq)] +pub enum SSE_RMI_Op { + SSE_Cmpss, + SSE2_Cmpsd, + SSE41_Insertps, +} + +impl SSE_Op { + pub(crate) fn to_string(&self) -> String { + match self { + SSE_Op::SSE_Addss => "addss".to_string(), + SSE_Op::SSE_Subss => "subss".to_string(), + SSE_Op::SSE_Movss => "movss".to_string(), + SSE_Op::SSE2_Movsd => "movsd".to_string(), + _ => "unimplemented sse_op".to_string(), + } + } +} + +impl fmt::Debug for SSE_Op { + fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result { + write!(fmt, "{}", self.to_string()) + } +} + +/// These indicate ways of extending (widening) a value, using the Intel +/// naming: B(yte) = u8, W(ord) = u16, L(ong)word = u32, Q(uad)word = u64 #[derive(Clone, PartialEq)] pub enum ExtMode { /// Byte -> Longword. diff --git a/cranelift/codegen/src/isa/x64/inst/emit.rs b/cranelift/codegen/src/isa/x64/inst/emit.rs index b19013dc5c..77ed7fcb4e 100644 --- a/cranelift/codegen/src/isa/x64/inst/emit.rs +++ b/cranelift/codegen/src/isa/x64/inst/emit.rs @@ -1,6 +1,5 @@ -use regalloc::{Reg, RegClass}; - use crate::isa::x64::inst::*; +use regalloc::Reg; fn low8willSXto64(x: u32) -> bool { let xs = (x as i32) as i64; @@ -42,7 +41,6 @@ fn mkSIB(shift: u8, encIndex: u8, encBase: u8) -> u8 { #[inline(always)] fn iregEnc(reg: Reg) -> u8 { debug_assert!(reg.is_real()); - debug_assert!(reg.get_class() == RegClass::I64); reg.get_hw_encoding() } @@ -57,10 +55,16 @@ const F_RETAIN_REDUNDANT_REX: u32 = 1; /// indicating a 64-bit operation. const F_CLEAR_REX_W: u32 = 2; -/// Add an 0x66 (operand-size override) prefix. This is necessary to indicate -/// a 16-bit operation. Normally this will be used together with F_CLEAR_REX_W. -const F_PREFIX_66: u32 = 4; - +/// For specifying the legacy prefixes (or `PfxNone` if no prefix required) to +/// be used at the start an instruction. A select prefix may be required for +/// various operations, including instructions that operate on GPR, SSE, and Vex +/// registers. +enum LegacyPrefix { + PfxNone, + Pfx66, + PfxF2, + PfxF3, +} /// This is the core 'emit' function for instructions that reference memory. /// /// For an instruction that has as operands a register `encG` and a memory @@ -82,6 +86,7 @@ const F_PREFIX_66: u32 = 4; /// indicate a 64-bit operation. fn emit_REX_OPCODES_MODRM_SIB_IMM_encG_memE( sink: &mut MachBuffer, + prefix: LegacyPrefix, opcodes: u32, mut numOpcodes: usize, encG: u8, @@ -91,13 +96,15 @@ fn emit_REX_OPCODES_MODRM_SIB_IMM_encG_memE( // General comment for this function: the registers in `memE` must be // 64-bit integer registers, because they are part of an address // expression. But `encG` can be derived from a register of any class. - let prefix66 = (flags & F_PREFIX_66) != 0; let clearRexW = (flags & F_CLEAR_REX_W) != 0; let retainRedundant = (flags & F_RETAIN_REDUNDANT_REX) != 0; - // The operand-size override, if requested. This indicates a 16-bit - // operation. - if prefix66 { - sink.put1(0x66); + + // Lower the prefix if applicable. + match prefix { + LegacyPrefix::Pfx66 => sink.put1(0x66), + LegacyPrefix::PfxF2 => sink.put1(0xF2), + LegacyPrefix::PfxF3 => sink.put1(0xF3), + LegacyPrefix::PfxNone => (), } match memE { Addr::IR { simm32, base: regE } => { @@ -201,6 +208,7 @@ fn emit_REX_OPCODES_MODRM_SIB_IMM_encG_memE( /// simpler. fn emit_REX_OPCODES_MODRM_encG_encE( sink: &mut MachBuffer, + prefix: LegacyPrefix, opcodes: u32, mut numOpcodes: usize, encG: u8, @@ -211,22 +219,28 @@ fn emit_REX_OPCODES_MODRM_encG_encE( // don't even have to be from the same class. For example, for an // integer-to-FP conversion insn, one might be RegClass::I64 and the other // RegClass::V128. - let prefix66 = (flags & F_PREFIX_66) != 0; let clearRexW = (flags & F_CLEAR_REX_W) != 0; let retainRedundant = (flags & F_RETAIN_REDUNDANT_REX) != 0; + // The operand-size override - if prefix66 { - sink.put1(0x66); + match prefix { + LegacyPrefix::Pfx66 => sink.put1(0x66), + LegacyPrefix::PfxF2 => sink.put1(0xF2), + LegacyPrefix::PfxF3 => sink.put1(0xF3), + LegacyPrefix::PfxNone => (), } + // The rex byte let w = if clearRexW { 0 } else { 1 }; let r = (encG >> 3) & 1; let x = 0; let b = (encE >> 3) & 1; let rex = 0x40 | (w << 3) | (r << 2) | (x << 1) | b; + if rex != 0x40 || retainRedundant { sink.put1(rex); } + // All other prefixes and opcodes while numOpcodes > 0 { numOpcodes -= 1; @@ -242,6 +256,7 @@ fn emit_REX_OPCODES_MODRM_encG_encE( fn emit_REX_OPCODES_MODRM_SIB_IMM_regG_memE( sink: &mut MachBuffer, + prefix: LegacyPrefix, opcodes: u32, numOpcodes: usize, regG: Reg, @@ -250,11 +265,12 @@ fn emit_REX_OPCODES_MODRM_SIB_IMM_regG_memE( ) { // JRS FIXME 2020Feb07: this should really just be `regEnc` not `iregEnc` let encG = iregEnc(regG); - emit_REX_OPCODES_MODRM_SIB_IMM_encG_memE(sink, opcodes, numOpcodes, encG, memE, flags); + emit_REX_OPCODES_MODRM_SIB_IMM_encG_memE(sink, prefix, opcodes, numOpcodes, encG, memE, flags); } fn emit_REX_OPCODES_MODRM_regG_regE( sink: &mut MachBuffer, + prefix: LegacyPrefix, opcodes: u32, numOpcodes: usize, regG: Reg, @@ -264,7 +280,7 @@ fn emit_REX_OPCODES_MODRM_regG_regE( // JRS FIXME 2020Feb07: these should really just be `regEnc` not `iregEnc` let encG = iregEnc(regG); let encE = iregEnc(regE); - emit_REX_OPCODES_MODRM_encG_encE(sink, opcodes, numOpcodes, encG, encE, flags); + emit_REX_OPCODES_MODRM_encG_encE(sink, prefix, opcodes, numOpcodes, encG, encE, flags); } /// Write a suitable number of bits from an imm64 to the sink. @@ -346,6 +362,7 @@ pub(crate) fn emit(inst: &Inst, sink: &mut MachBuffer) { RMI::R { reg: regE } => { emit_REX_OPCODES_MODRM_regG_regE( sink, + LegacyPrefix::PfxNone, 0x0FAF, 2, regG.to_reg(), @@ -356,6 +373,7 @@ pub(crate) fn emit(inst: &Inst, sink: &mut MachBuffer) { RMI::M { addr } => { emit_REX_OPCODES_MODRM_SIB_IMM_regG_memE( sink, + LegacyPrefix::PfxNone, 0x0FAF, 2, regG.to_reg(), @@ -369,6 +387,7 @@ pub(crate) fn emit(inst: &Inst, sink: &mut MachBuffer) { // Yes, really, regG twice. emit_REX_OPCODES_MODRM_regG_regE( sink, + LegacyPrefix::PfxNone, opcode, 1, regG.to_reg(), @@ -402,6 +421,7 @@ pub(crate) fn emit(inst: &Inst, sink: &mut MachBuffer) { // gold standard. emit_REX_OPCODES_MODRM_regG_regE( sink, + LegacyPrefix::PfxNone, opcode_R, 1, *regE, @@ -415,6 +435,7 @@ pub(crate) fn emit(inst: &Inst, sink: &mut MachBuffer) { // Whereas here we revert to the "normal" G-E ordering. emit_REX_OPCODES_MODRM_SIB_IMM_regG_memE( sink, + LegacyPrefix::PfxNone, opcode_M, 1, regG.to_reg(), @@ -427,7 +448,15 @@ pub(crate) fn emit(inst: &Inst, sink: &mut MachBuffer) { let opcode = if useImm8 { 0x83 } else { 0x81 }; // And also here we use the "normal" G-E ordering. let encG = iregEnc(regG.to_reg()); - emit_REX_OPCODES_MODRM_encG_encE(sink, opcode, 1, subopcode_I, encG, flags); + emit_REX_OPCODES_MODRM_encG_encE( + sink, + LegacyPrefix::PfxNone, + opcode, + 1, + subopcode_I, + encG, + flags, + ); emit_simm(sink, if useImm8 { 1 } else { 4 }, *simm32); } } @@ -455,7 +484,15 @@ pub(crate) fn emit(inst: &Inst, sink: &mut MachBuffer) { } Inst::Mov_R_R { is_64, src, dst } => { let flags = if *is_64 { F_NONE } else { F_CLEAR_REX_W }; - emit_REX_OPCODES_MODRM_regG_regE(sink, 0x89, 1, *src, dst.to_reg(), flags); + emit_REX_OPCODES_MODRM_regG_regE( + sink, + LegacyPrefix::PfxNone, + 0x89, + 1, + *src, + dst.to_reg(), + flags, + ); } Inst::MovZX_M_R { extMode, addr, dst } => { match extMode { @@ -463,6 +500,7 @@ pub(crate) fn emit(inst: &Inst, sink: &mut MachBuffer) { // MOVZBL is (REX.W==0) 0F B6 /r emit_REX_OPCODES_MODRM_SIB_IMM_regG_memE( sink, + LegacyPrefix::PfxNone, 0x0FB6, 2, dst.to_reg(), @@ -478,6 +516,7 @@ pub(crate) fn emit(inst: &Inst, sink: &mut MachBuffer) { // the upper half of the destination anyway. emit_REX_OPCODES_MODRM_SIB_IMM_regG_memE( sink, + LegacyPrefix::PfxNone, 0x0FB6, 2, dst.to_reg(), @@ -489,6 +528,7 @@ pub(crate) fn emit(inst: &Inst, sink: &mut MachBuffer) { // MOVZWL is (REX.W==0) 0F B7 /r emit_REX_OPCODES_MODRM_SIB_IMM_regG_memE( sink, + LegacyPrefix::PfxNone, 0x0FB7, 2, dst.to_reg(), @@ -500,6 +540,7 @@ pub(crate) fn emit(inst: &Inst, sink: &mut MachBuffer) { // MOVZWQ is (REX.W==1) 0F B7 /r emit_REX_OPCODES_MODRM_SIB_IMM_regG_memE( sink, + LegacyPrefix::PfxNone, 0x0FB7, 2, dst.to_reg(), @@ -513,6 +554,7 @@ pub(crate) fn emit(inst: &Inst, sink: &mut MachBuffer) { // MOV r/m32, r32 is (REX.W==0) 8B /r emit_REX_OPCODES_MODRM_SIB_IMM_regG_memE( sink, + LegacyPrefix::PfxNone, 0x8B, 1, dst.to_reg(), @@ -522,15 +564,22 @@ pub(crate) fn emit(inst: &Inst, sink: &mut MachBuffer) { } } } - Inst::Mov64_M_R { addr, dst } => { - emit_REX_OPCODES_MODRM_SIB_IMM_regG_memE(sink, 0x8B, 1, dst.to_reg(), addr, F_NONE) - } + Inst::Mov64_M_R { addr, dst } => emit_REX_OPCODES_MODRM_SIB_IMM_regG_memE( + sink, + LegacyPrefix::PfxNone, + 0x8B, + 1, + dst.to_reg(), + addr, + F_NONE, + ), Inst::MovSX_M_R { extMode, addr, dst } => { match extMode { ExtMode::BL => { // MOVSBL is (REX.W==0) 0F BE /r emit_REX_OPCODES_MODRM_SIB_IMM_regG_memE( sink, + LegacyPrefix::PfxNone, 0x0FBE, 2, dst.to_reg(), @@ -542,6 +591,7 @@ pub(crate) fn emit(inst: &Inst, sink: &mut MachBuffer) { // MOVSBQ is (REX.W==1) 0F BE /r emit_REX_OPCODES_MODRM_SIB_IMM_regG_memE( sink, + LegacyPrefix::PfxNone, 0x0FBE, 2, dst.to_reg(), @@ -553,6 +603,7 @@ pub(crate) fn emit(inst: &Inst, sink: &mut MachBuffer) { // MOVSWL is (REX.W==0) 0F BF /r emit_REX_OPCODES_MODRM_SIB_IMM_regG_memE( sink, + LegacyPrefix::PfxNone, 0x0FBF, 2, dst.to_reg(), @@ -564,6 +615,7 @@ pub(crate) fn emit(inst: &Inst, sink: &mut MachBuffer) { // MOVSWQ is (REX.W==1) 0F BF /r emit_REX_OPCODES_MODRM_SIB_IMM_regG_memE( sink, + LegacyPrefix::PfxNone, 0x0FBF, 2, dst.to_reg(), @@ -575,6 +627,7 @@ pub(crate) fn emit(inst: &Inst, sink: &mut MachBuffer) { // MOVSLQ is (REX.W==1) 63 /r emit_REX_OPCODES_MODRM_SIB_IMM_regG_memE( sink, + LegacyPrefix::PfxNone, 0x63, 1, dst.to_reg(), @@ -599,6 +652,7 @@ pub(crate) fn emit(inst: &Inst, sink: &mut MachBuffer) { // MOV r8, r/m8 is (REX.W==0) 88 /r emit_REX_OPCODES_MODRM_SIB_IMM_regG_memE( sink, + LegacyPrefix::PfxNone, 0x88, 1, *src, @@ -610,17 +664,19 @@ pub(crate) fn emit(inst: &Inst, sink: &mut MachBuffer) { // MOV r16, r/m16 is 66 (REX.W==0) 89 /r emit_REX_OPCODES_MODRM_SIB_IMM_regG_memE( sink, + LegacyPrefix::Pfx66, 0x89, 1, *src, addr, - F_CLEAR_REX_W | F_PREFIX_66, + F_CLEAR_REX_W, ) } 4 => { // MOV r32, r/m32 is (REX.W==0) 89 /r emit_REX_OPCODES_MODRM_SIB_IMM_regG_memE( sink, + LegacyPrefix::PfxNone, 0x89, 1, *src, @@ -630,7 +686,15 @@ pub(crate) fn emit(inst: &Inst, sink: &mut MachBuffer) { } 8 => { // MOV r64, r/m64 is (REX.W==1) 89 /r - emit_REX_OPCODES_MODRM_SIB_IMM_regG_memE(sink, 0x89, 1, *src, addr, F_NONE) + emit_REX_OPCODES_MODRM_SIB_IMM_regG_memE( + sink, + LegacyPrefix::PfxNone, + 0x89, + 1, + *src, + addr, + F_NONE, + ) } _ => panic!("x64::Inst::Mov_R_M::emit: unreachable"), } @@ -653,6 +717,7 @@ pub(crate) fn emit(inst: &Inst, sink: &mut MachBuffer) { // SHL/SHR/SAR %cl, reg64 is (REX.W==1) D3 /subopcode emit_REX_OPCODES_MODRM_encG_encE( sink, + LegacyPrefix::PfxNone, 0xD3, 1, subopcode, @@ -667,6 +732,7 @@ pub(crate) fn emit(inst: &Inst, sink: &mut MachBuffer) { // bother with that nicety here. emit_REX_OPCODES_MODRM_encG_encE( sink, + LegacyPrefix::PfxNone, 0xC1, 1, subopcode, @@ -683,6 +749,7 @@ pub(crate) fn emit(inst: &Inst, sink: &mut MachBuffer) { dst: regG, } => { let mut retainRedundantRex = 0; + let mut prefix = LegacyPrefix::PfxNone; if *size == 1 { // Here, a redundant REX prefix changes the meaning of the // instruction. @@ -691,10 +758,12 @@ pub(crate) fn emit(inst: &Inst, sink: &mut MachBuffer) { retainRedundantRex = F_RETAIN_REDUNDANT_REX; } } + if *size == 2 { + prefix = LegacyPrefix::Pfx66; + } let mut flags = match size { 8 => F_NONE, - 4 => F_CLEAR_REX_W, - 2 => F_CLEAR_REX_W | F_PREFIX_66, + 4 | 2 => F_CLEAR_REX_W, 1 => F_CLEAR_REX_W | retainRedundantRex, _ => panic!("x64::Inst::Cmp_RMI_R::emit: unreachable"), }; @@ -710,12 +779,14 @@ pub(crate) fn emit(inst: &Inst, sink: &mut MachBuffer) { } } // Same comment re swapped args as for Alu_RMI_R. - emit_REX_OPCODES_MODRM_regG_regE(sink, opcode, 1, *regE, *regG, flags); + emit_REX_OPCODES_MODRM_regG_regE(sink, prefix, opcode, 1, *regE, *regG, flags); } RMI::M { addr } => { let opcode = if *size == 1 { 0x3A } else { 0x3B }; // Whereas here we revert to the "normal" G-E ordering. - emit_REX_OPCODES_MODRM_SIB_IMM_regG_memE(sink, opcode, 1, *regG, addr, flags); + emit_REX_OPCODES_MODRM_SIB_IMM_regG_memE( + sink, prefix, opcode, 1, *regG, addr, flags, + ); } RMI::I { simm32 } => { // FIXME JRS 2020Feb11: there are shorter encodings for @@ -731,7 +802,7 @@ pub(crate) fn emit(inst: &Inst, sink: &mut MachBuffer) { // And also here we use the "normal" G-E ordering. let encG = iregEnc(*regG); emit_REX_OPCODES_MODRM_encG_encE( - sink, opcode, 1, 7, /*subopcode*/ + sink, prefix, opcode, 1, 7, /*subopcode*/ encG, flags, ); emit_simm(sink, if useImm8 { 1 } else { *size }, *simm32); @@ -751,6 +822,7 @@ pub(crate) fn emit(inst: &Inst, sink: &mut MachBuffer) { RMI::M { addr } => { emit_REX_OPCODES_MODRM_SIB_IMM_encG_memE( sink, + LegacyPrefix::PfxNone, 0xFF, 1, 6, /*subopcode*/ @@ -787,6 +859,7 @@ pub(crate) fn emit(inst: &Inst, sink: &mut MachBuffer) { let regEnc = iregEnc(*reg); emit_REX_OPCODES_MODRM_encG_encE( sink, + LegacyPrefix::PfxNone, 0xFF, 1, 2, /*subopcode*/ @@ -797,6 +870,7 @@ pub(crate) fn emit(inst: &Inst, sink: &mut MachBuffer) { RM::M { addr } => { emit_REX_OPCODES_MODRM_SIB_IMM_encG_memE( sink, + LegacyPrefix::PfxNone, 0xFF, 1, 2, /*subopcode*/ @@ -867,6 +941,7 @@ pub(crate) fn emit(inst: &Inst, sink: &mut MachBuffer) { let regEnc = iregEnc(*reg); emit_REX_OPCODES_MODRM_encG_encE( sink, + LegacyPrefix::PfxNone, 0xFF, 1, 4, /*subopcode*/ @@ -877,6 +952,7 @@ pub(crate) fn emit(inst: &Inst, sink: &mut MachBuffer) { RM::M { addr } => { emit_REX_OPCODES_MODRM_SIB_IMM_encG_memE( sink, + LegacyPrefix::PfxNone, 0xFF, 1, 4, /*subopcode*/ @@ -886,7 +962,56 @@ pub(crate) fn emit(inst: &Inst, sink: &mut MachBuffer) { } } } - + Inst::XMM_R_R { op, src, dst } => { + let flags = F_CLEAR_REX_W; + let opcode = match op { + SSE_Op::SSE_Movss => 0x0F10, + SSE_Op::SSE2_Movsd => 0x0F10, + _ => unimplemented!("XMM_R_R opcode"), + }; + let prefix = match op { + SSE_Op::SSE_Movss => LegacyPrefix::PfxF3, + SSE_Op::SSE2_Movsd => LegacyPrefix::PfxF2, + _ => unimplemented!("XMM_R_R opcode"), + }; + emit_REX_OPCODES_MODRM_regG_regE(sink, prefix, opcode, 2, dst.to_reg(), *src, flags); + } + Inst::XMM_RM_R { + op, + src: srcE, + dst: regG, + } => { + let flags = F_CLEAR_REX_W; + let opcode = match op { + SSE_Op::SSE_Addss => 0x0F58, + SSE_Op::SSE_Subss => 0x0F5C, + _ => unimplemented!("XMM_RM_R opcode"), + }; + match srcE { + RM::R { reg: regE } => { + emit_REX_OPCODES_MODRM_regG_regE( + sink, + LegacyPrefix::PfxF3, + opcode, + 2, + regG.to_reg(), + *regE, + flags, + ); + } + RM::M { addr } => { + emit_REX_OPCODES_MODRM_SIB_IMM_regG_memE( + sink, + LegacyPrefix::PfxF3, + opcode, + 2, + regG.to_reg(), + addr, + flags, + ); + } + } + } _ => panic!("x64_emit: unhandled: {} ", inst.show_rru(None)), } } diff --git a/cranelift/codegen/src/isa/x64/inst/emit_tests.rs b/cranelift/codegen/src/isa/x64/inst/emit_tests.rs index 654dcc6b33..6b54c7a1f2 100644 --- a/cranelift/codegen/src/isa/x64/inst/emit_tests.rs +++ b/cranelift/codegen/src/isa/x64/inst/emit_tests.rs @@ -33,6 +33,23 @@ fn test_x64_emit() { let r14 = regs::r14(); let r15 = regs::r15(); + let xmm0 = regs::xmm0(); + let xmm1 = regs::xmm1(); + let xmm2 = regs::xmm2(); + let xmm3 = regs::xmm3(); + let xmm4 = regs::xmm4(); + let xmm5 = regs::xmm5(); + let xmm6 = regs::xmm6(); + let xmm7 = regs::xmm7(); + let xmm8 = regs::xmm8(); + let xmm9 = regs::xmm9(); + let xmm10 = regs::xmm10(); + let xmm11 = regs::xmm11(); + let xmm12 = regs::xmm12(); + let xmm13 = regs::xmm13(); + let xmm14 = regs::xmm14(); + let xmm15 = regs::xmm15(); + // And Writable<> versions of the same: let w_rax = Writable::::from_reg(rax); let w_rbx = Writable::::from_reg(rbx); @@ -51,6 +68,23 @@ fn test_x64_emit() { let w_r14 = Writable::::from_reg(r14); let w_r15 = Writable::::from_reg(r15); + let w_xmm0 = Writable::::from_reg(xmm0); + let w_xmm1 = Writable::::from_reg(xmm1); + let w_xmm2 = Writable::::from_reg(xmm2); + let w_xmm3 = Writable::::from_reg(xmm3); + let _w_xmm4 = Writable::::from_reg(xmm4); + let _w_xmm5 = Writable::::from_reg(xmm5); + let _w_xmm6 = Writable::::from_reg(xmm6); + let _w_xmm7 = Writable::::from_reg(xmm7); + let _w_xmm8 = Writable::::from_reg(xmm8); + let _w_xmm9 = Writable::::from_reg(xmm9); + let w_xmm10 = Writable::::from_reg(xmm10); + let _w_xmm11 = Writable::::from_reg(xmm11); + let _w_xmm12 = Writable::::from_reg(xmm12); + let w_xmm13 = Writable::::from_reg(xmm13); + let _w_xmm14 = Writable::::from_reg(xmm14); + let _w_xmm15 = Writable::::from_reg(xmm15); + let mut insns = Vec::<(Inst, &str, &str)>::new(); // ======================================================== @@ -2171,6 +2205,67 @@ fn test_x64_emit() { "jmp *321(%r10,%rdx,4)", )); + // ======================================================== + // XMM_RM_R + + insns.push(( + Inst::xmm_rm_r(SSE_Op::SSE_Addss, RM::reg(xmm1), w_xmm0), + "F30F58C1", + "addss %xmm1, %xmm0", + )); + insns.push(( + Inst::xmm_rm_r(SSE_Op::SSE_Subss, RM::reg(xmm0), w_xmm1), + "F30F5CC8", + "subss %xmm0, %xmm1", + )); + + insns.push(( + Inst::xmm_rm_r(SSE_Op::SSE_Addss, RM::reg(xmm11), w_xmm13), + "F3450F58EB", + "addss %xmm11, %xmm13", + )); + + insns.push(( + Inst::xmm_rm_r(SSE_Op::SSE_Subss, RM::reg(xmm12), w_xmm1), + "F3410F5CCC", + "subss %xmm12, %xmm1", + )); + + insns.push(( + Inst::xmm_rm_r( + SSE_Op::SSE_Addss, + RM::mem(Addr::imm_reg_reg_shift(123, r10, rdx, 2)), + w_xmm0, + ), + "F3410F5844927B", + "addss 123(%r10,%rdx,4), %xmm0", + )); + + insns.push(( + Inst::xmm_rm_r( + SSE_Op::SSE_Subss, + RM::mem(Addr::imm_reg_reg_shift(321, r10, rax, 3)), + w_xmm10, + ), + "F3450F5C94C241010000", + "subss 321(%r10,%rax,8), %xmm10", + )); + + // ======================================================== + // XMM_R_R + + insns.push(( + Inst::xmm_r_r(SSE_Op::SSE_Movss, xmm3, w_xmm2), + "F30F10D3", + "movss %xmm3, %xmm2", + )); + + insns.push(( + Inst::xmm_r_r(SSE_Op::SSE2_Movsd, xmm4, w_xmm3), + "F20F10DC", + "movsd %xmm4, %xmm3", + )); + // ======================================================== // Actually run the tests! let flags = settings::Flags::new(settings::builder()); @@ -2179,7 +2274,6 @@ fn test_x64_emit() { // Check the printed text is as expected. let actual_printing = insn.show_rru(Some(&rru)); assert_eq!(expected_printing, actual_printing); - let mut sink = test_utils::TestCodeSink::new(); let mut buffer = MachBuffer::new(); insn.emit(&mut buffer, &flags, &mut Default::default()); diff --git a/cranelift/codegen/src/isa/x64/inst/mod.rs b/cranelift/codegen/src/isa/x64/inst/mod.rs index 1f3eb0a1ed..132d93e166 100644 --- a/cranelift/codegen/src/isa/x64/inst/mod.rs +++ b/cranelift/codegen/src/isa/x64/inst/mod.rs @@ -150,6 +150,20 @@ pub(crate) enum Inst { /// jmpq (reg mem) JmpUnknown { target: RM }, + + /// (add sub and or xor mul adc? sbb?) (32 64) (reg addr imm) reg + XMM_RM_R { + op: SSE_Op, + src: RM, + dst: Writable, + }, + + /// mov (64 32) reg reg + XMM_R_R { + op: SSE_Op, + src: Reg, + dst: Writable, + }, } // Handy constructors for Insts. @@ -195,6 +209,17 @@ impl Inst { Inst::Mov_R_R { is_64, src, dst } } + pub(crate) fn xmm_r_r(op: SSE_Op, src: Reg, dst: Writable) -> Inst { + debug_assert!(src.get_class() == RegClass::V128); + debug_assert!(dst.to_reg().get_class() == RegClass::V128); + Inst::XMM_R_R { op, src, dst } + } + + pub(crate) fn xmm_rm_r(op: SSE_Op, src: RM, dst: Writable) -> Self { + debug_assert!(dst.to_reg().get_class() == RegClass::V128); + Self::XMM_RM_R { op, src, dst } + } + pub(crate) fn movzx_m_r(extMode: ExtMode, addr: Addr, dst: Writable) -> Inst { debug_assert!(dst.to_reg().get_class() == RegClass::I64); Inst::MovZX_M_R { extMode, addr, dst } @@ -341,6 +366,12 @@ impl ShowWithRRU for Inst { src.show_rru_sized(mb_rru, sizeLQ(*is_64)), show_ireg_sized(dst.to_reg(), mb_rru, sizeLQ(*is_64)), ), + Inst::XMM_RM_R { op, src, dst } => format!( + "{} {}, {}", + ljustify(op.to_string()), + src.show_rru_sized(mb_rru, 8), + show_ireg_sized(dst.to_reg(), mb_rru, 8), + ), Inst::Imm_R { dst_is_64, simm64, @@ -368,6 +399,12 @@ impl ShowWithRRU for Inst { show_ireg_sized(*src, mb_rru, sizeLQ(*is_64)), show_ireg_sized(dst.to_reg(), mb_rru, sizeLQ(*is_64)) ), + Inst::XMM_R_R { op, src, dst } => format!( + "{} {}, {}", + ljustify(op.to_string()), + show_ireg_sized(*src, mb_rru, 8), + show_ireg_sized(dst.to_reg(), mb_rru, 8) + ), Inst::MovZX_M_R { extMode, addr, dst } => { if *extMode == ExtMode::LQ { format!( @@ -490,6 +527,10 @@ fn x64_get_regs(inst: &Inst, collector: &mut RegUsageCollector) { src.get_regs_as_uses(collector); collector.add_mod(*dst); } + Inst::XMM_RM_R { op: _, src, dst } => { + src.get_regs_as_uses(collector); + collector.add_mod(*dst); + } Inst::Imm_R { dst_is_64: _, simm64: _, @@ -501,6 +542,10 @@ fn x64_get_regs(inst: &Inst, collector: &mut RegUsageCollector) { collector.add_use(*src); collector.add_def(*dst); } + Inst::XMM_R_R { op: _, src, dst } => { + collector.add_use(*src); + collector.add_def(*dst); + } Inst::MovZX_M_R { extMode: _, addr, @@ -649,6 +694,14 @@ fn x64_map_regs(inst: &mut Inst, mapper: &RUM) { src.map_uses(mapper); map_mod(mapper, dst); } + Inst::XMM_RM_R { + op: _, + ref mut src, + ref mut dst, + } => { + src.map_uses(mapper); + map_mod(mapper, dst); + } Inst::Imm_R { dst_is_64: _, simm64: _, @@ -662,6 +715,14 @@ fn x64_map_regs(inst: &mut Inst, mapper: &RUM) { map_use(mapper, src); map_def(mapper, dst); } + Inst::XMM_R_R { + op: _, + ref mut src, + ref mut dst, + } => { + map_use(mapper, src); + map_def(mapper, dst); + } Inst::MovZX_M_R { extMode: _, ref mut addr, @@ -784,6 +845,8 @@ impl MachInst for Inst { debug_assert!(rc_dst == rc_src); match rc_dst { RegClass::I64 => Inst::mov_r_r(true, src_reg, dst_reg), + // TODO: How do you just move 32 bits? + RegClass::V128 => Inst::xmm_r_r(SSE_Op::SSE2_Movsd, src_reg, dst_reg), _ => panic!("gen_move(x64): unhandled regclass"), } } diff --git a/cranelift/codegen/src/isa/x64/inst/regs.rs b/cranelift/codegen/src/isa/x64/inst/regs.rs index bb8f05fb50..087103b4ac 100644 --- a/cranelift/codegen/src/isa/x64/inst/regs.rs +++ b/cranelift/codegen/src/isa/x64/inst/regs.rs @@ -78,52 +78,53 @@ pub(crate) fn r11() -> Reg { fn fpr(enc: u8, index: u8) -> Reg { Reg::new_real(RegClass::V128, enc, index) } -fn xmm0() -> Reg { + +pub(crate) fn xmm0() -> Reg { fpr(0, 14) } -fn xmm1() -> Reg { +pub(crate) fn xmm1() -> Reg { fpr(1, 15) } -fn xmm2() -> Reg { +pub(crate) fn xmm2() -> Reg { fpr(2, 16) } -fn xmm3() -> Reg { +pub(crate) fn xmm3() -> Reg { fpr(3, 17) } -fn xmm4() -> Reg { +pub(crate) fn xmm4() -> Reg { fpr(4, 18) } -fn xmm5() -> Reg { +pub(crate) fn xmm5() -> Reg { fpr(5, 19) } -fn xmm6() -> Reg { +pub(crate) fn xmm6() -> Reg { fpr(6, 20) } -fn xmm7() -> Reg { +pub(crate) fn xmm7() -> Reg { fpr(7, 21) } -fn xmm8() -> Reg { +pub(crate) fn xmm8() -> Reg { fpr(8, 22) } -fn xmm9() -> Reg { +pub(crate) fn xmm9() -> Reg { fpr(9, 23) } -fn xmm10() -> Reg { +pub(crate) fn xmm10() -> Reg { fpr(10, 24) } -fn xmm11() -> Reg { +pub(crate) fn xmm11() -> Reg { fpr(11, 25) } -fn xmm12() -> Reg { +pub(crate) fn xmm12() -> Reg { fpr(12, 26) } -fn xmm13() -> Reg { +pub(crate) fn xmm13() -> Reg { fpr(13, 27) } -fn xmm14() -> Reg { +pub(crate) fn xmm14() -> Reg { fpr(14, 28) } -fn xmm15() -> Reg { +pub(crate) fn xmm15() -> Reg { fpr(15, 29) } diff --git a/cranelift/codegen/src/isa/x64/lower.rs b/cranelift/codegen/src/isa/x64/lower.rs index f85d6166ac..f306d867b0 100644 --- a/cranelift/codegen/src/isa/x64/lower.rs +++ b/cranelift/codegen/src/isa/x64/lower.rs @@ -3,7 +3,7 @@ #![allow(dead_code)] #![allow(non_snake_case)] -use regalloc::{Reg, Writable}; +use regalloc::{Reg, RegClass, Writable}; use crate::ir::condcodes::IntCC; use crate::ir::types; @@ -31,7 +31,7 @@ fn is_int_ty(ty: Type) -> bool { } } -fn int_ty_to_is64(ty: Type) -> bool { +fn int_ty_is_64(ty: Type) -> bool { match ty { types::I8 | types::I16 | types::I32 => false, types::I64 => true, @@ -39,6 +39,14 @@ fn int_ty_to_is64(ty: Type) -> bool { } } +fn flt_ty_is_64(ty: Type) -> bool { + match ty { + types::F32 => false, + types::F64 => true, + _ => panic!("type {} is none of F32, F64", ty), + } +} + fn int_ty_to_sizeB(ty: Type) -> u8 { match ty { types::I8 => 1, @@ -118,7 +126,6 @@ fn lower_insn_to_regs<'a>(ctx: Ctx<'a>, iri: IRInst) { }; // This is all outstandingly feeble. TODO: much better! - match op { Opcode::Iconst => { if let Some(w64) = iri_to_u64_immediate(ctx, iri) { @@ -136,7 +143,7 @@ fn lower_insn_to_regs<'a>(ctx: Ctx<'a>, iri: IRInst) { let regD = output_to_reg(ctx, iri, 0); let regL = input_to_reg(ctx, iri, 0); let regR = input_to_reg(ctx, iri, 1); - let is64 = int_ty_to_is64(ty.unwrap()); + let is64 = int_ty_is_64(ty.unwrap()); let how = if op == Opcode::Iadd { RMI_R_Op::Add } else { @@ -195,7 +202,11 @@ fn lower_insn_to_regs<'a>(ctx: Ctx<'a>, iri: IRInst) { for i in 0..ctx.num_inputs(iri) { let src_reg = input_to_reg(ctx, iri, i); let retval_reg = ctx.retval(i); - ctx.emit(Inst::mov_r_r(true, src_reg, retval_reg)); + if src_reg.get_class() == RegClass::I64 { + ctx.emit(Inst::mov_r_r(true, src_reg, retval_reg)); + } else if src_reg.get_class() == RegClass::V128 { + ctx.emit(Inst::xmm_r_r(SSE_Op::SSE2_Movsd, src_reg, retval_reg)); + } } // N.B.: the Ret itself is generated by the ABI. } @@ -229,7 +240,23 @@ fn lower_insn_to_regs<'a>(ctx: Ctx<'a>, iri: IRInst) { | Opcode::SshrImm => { panic!("ALU+imm and ALU+carry ops should not appear here!"); } - + Opcode::Fadd | Opcode::Fsub => { + let regD = output_to_reg(ctx, iri, 0); + let regL = input_to_reg(ctx, iri, 0); + let regR = input_to_reg(ctx, iri, 1); + let is64 = flt_ty_is_64(ty.unwrap()); + if !is64 { + let inst = if op == Opcode::Fadd { + SSE_Op::SSE_Addss + } else { + SSE_Op::SSE_Subss + }; + ctx.emit(Inst::xmm_r_r(SSE_Op::SSE_Movss, regL, regD)); + ctx.emit(Inst::xmm_rm_r(inst, RM::reg(regR), regD)); + } else { + unimplemented!("unimplemented lowering for opcode {:?}", op); + } + } _ => unimplemented!("unimplemented lowering for opcode {:?}", op), } } diff --git a/cranelift/codegen/src/lib.rs b/cranelift/codegen/src/lib.rs index dd871924ab..38b173de13 100644 --- a/cranelift/codegen/src/lib.rs +++ b/cranelift/codegen/src/lib.rs @@ -1,5 +1,4 @@ //! Cranelift code generation library. - #![deny(missing_docs, trivial_numeric_casts, unused_extern_crates)] #![warn(unused_import_braces)] #![cfg_attr(feature = "std", deny(unstable_features))] diff --git a/cranelift/codegen/src/machinst/vcode.rs b/cranelift/codegen/src/machinst/vcode.rs index 477b02075b..81c59f5f2a 100644 --- a/cranelift/codegen/src/machinst/vcode.rs +++ b/cranelift/codegen/src/machinst/vcode.rs @@ -136,9 +136,14 @@ impl VCodeBuilder { /// Set the type of a VReg. pub fn set_vreg_type(&mut self, vreg: VirtualReg, ty: Type) { - while self.vcode.vreg_types.len() <= vreg.get_index() { - self.vcode.vreg_types.push(ir::types::I8); // Default type. + if self.vcode.vreg_types.len() <= vreg.get_index() { + self.vcode.vreg_types.resize( + self.vcode.vreg_types.len() + + ((vreg.get_index() + 1) - self.vcode.vreg_types.len()), + ir::types::I8, + ) } + self.vcode.vreg_types[vreg.get_index()] = ty; } diff --git a/cranelift/src/wasm.rs b/cranelift/src/wasm.rs index f93471393e..fc282b31b9 100644 --- a/cranelift/src/wasm.rs +++ b/cranelift/src/wasm.rs @@ -51,7 +51,6 @@ pub fn run( flag_calc_value_ranges: bool, ) -> Result<(), String> { let parsed = parse_sets_and_triple(flag_set, flag_triple)?; - for filename in files { let path = Path::new(&filename); let name = String::from(path.as_os_str().to_string_lossy());