diff --git a/cranelift/codegen/src/isa/x64/abi.rs b/cranelift/codegen/src/isa/x64/abi.rs index 4ba75e394c..2505286e08 100644 --- a/cranelift/codegen/src/isa/x64/abi.rs +++ b/cranelift/codegen/src/isa/x64/abi.rs @@ -48,7 +48,7 @@ pub(crate) struct X64ABIBody { flags: settings::Flags, } -fn in_int_reg(ty: types::Type) -> bool { +fn use_int_reg(ty: types::Type) -> bool { match ty { types::I8 | types::I16 @@ -63,6 +63,13 @@ fn in_int_reg(ty: types::Type) -> bool { } } +fn use_flt_reg(ty: types::Type) -> bool { + match ty { + types::F32 | types::F64 => true, + _ => false, + } +} + fn get_intreg_for_arg_systemv(idx: usize) -> Option { match idx { 0 => Some(regs::rdi()), @@ -75,6 +82,20 @@ fn get_intreg_for_arg_systemv(idx: usize) -> Option { } } +fn get_fltreg_for_arg_systemv(idx: usize) -> Option { + match idx { + 0 => Some(regs::xmm0()), + 1 => Some(regs::xmm1()), + 2 => Some(regs::xmm2()), + 3 => Some(regs::xmm3()), + 4 => Some(regs::xmm4()), + 5 => Some(regs::xmm5()), + 6 => Some(regs::xmm6()), + 7 => Some(regs::xmm7()), + _ => None, + } +} + fn get_intreg_for_retval_systemv(idx: usize) -> Option { match idx { 0 => Some(regs::rax()), @@ -83,6 +104,14 @@ fn get_intreg_for_retval_systemv(idx: usize) -> Option { } } +fn get_fltreg_for_retval_systemv(idx: usize) -> Option { + match idx { + 0 => Some(regs::xmm0()), + 1 => Some(regs::xmm1()), + _ => None, + } +} + fn is_callee_save_systemv(r: RealReg) -> bool { use regs::*; match r.get_class() { @@ -90,6 +119,7 @@ fn is_callee_save_systemv(r: RealReg) -> bool { ENC_RBX | ENC_RBP | ENC_R12 | ENC_R13 | ENC_R14 | ENC_R15 => true, _ => false, }, + RegClass::V128 => false, _ => unimplemented!(), } } @@ -106,6 +136,7 @@ impl X64ABIBody { // Compute args and retvals from signature. let mut args = vec![]; let mut next_int_arg = 0; + let mut next_flt_arg = 0; for param in &f.signature.params { match param.purpose { ir::ArgumentPurpose::VMContext if f.signature.call_conv.extends_baldrdash() => { @@ -114,15 +145,22 @@ impl X64ABIBody { } ir::ArgumentPurpose::Normal | ir::ArgumentPurpose::VMContext => { - if in_int_reg(param.value_type) { + if use_int_reg(param.value_type) { if let Some(reg) = get_intreg_for_arg_systemv(next_int_arg) { args.push(ABIArg::Reg(reg.to_real_reg())); } else { unimplemented!("passing arg on the stack"); } next_int_arg += 1; + } else if use_flt_reg(param.value_type) { + if let Some(reg) = get_fltreg_for_arg_systemv(next_flt_arg) { + args.push(ABIArg::Reg(reg.to_real_reg())); + } else { + unimplemented!("passing arg on the stack"); + } + next_flt_arg += 1; } else { - unimplemented!("non int normal register") + unimplemented!("non int normal register {:?}", param.value_type) } } @@ -132,16 +170,24 @@ impl X64ABIBody { let mut rets = vec![]; let mut next_int_retval = 0; + let mut next_flt_retval = 0; for ret in &f.signature.returns { match ret.purpose { ir::ArgumentPurpose::Normal => { - if in_int_reg(ret.value_type) { + if use_int_reg(ret.value_type) { if let Some(reg) = get_intreg_for_retval_systemv(next_int_retval) { rets.push(ABIRet::Reg(reg.to_real_reg())); } else { unimplemented!("passing return on the stack"); } next_int_retval += 1; + } else if use_flt_reg(ret.value_type) { + if let Some(reg) = get_fltreg_for_retval_systemv(next_flt_retval) { + rets.push(ABIRet::Reg(reg.to_real_reg())); + } else { + unimplemented!("passing return on the stack"); + } + next_flt_retval += 1; } else { unimplemented!("returning non integer normal value"); } @@ -232,8 +278,11 @@ impl ABIBody for X64ABIBody { if from_reg.get_class() == RegClass::I32 || from_reg.get_class() == RegClass::I64 { // TODO do we need a sign extension if it's I32? return Inst::mov_r_r(/*is64=*/ true, from_reg.to_reg(), to_reg); + } else if from_reg.get_class() == RegClass::V128 { + // TODO: How to support Movss. Should is64 always be true? + return Inst::xmm_r_r(SSE_Op::SSE2_Movsd, from_reg.to_reg(), to_reg); } - unimplemented!("moving from non-int arg to vreg"); + unimplemented!("moving from non-int arg to vreg {:?}", from_reg.get_class()); } ABIArg::_Stack => unimplemented!("moving from stack arg to vreg"), } @@ -266,8 +315,16 @@ impl ABIBody for X64ABIBody { from_reg.to_reg(), Writable::::from_reg(to_reg.to_reg()), )) + } else if to_reg.get_class() == RegClass::V128 + || to_reg.get_class() == RegClass::V128 + { + ret.push(Inst::xmm_r_r( + SSE_Op::SSE2_Movsd, + from_reg.to_reg(), + Writable::::from_reg(to_reg.to_reg()), + )) } else { - unimplemented!("moving from vreg to non-int return value"); + unimplemented!("moving from vreg to unsupported return value"); } } diff --git a/cranelift/codegen/src/isa/x64/inst/args.rs b/cranelift/codegen/src/isa/x64/inst/args.rs index 1e77dd91fa..6f4b52156f 100644 --- a/cranelift/codegen/src/isa/x64/inst/args.rs +++ b/cranelift/codegen/src/isa/x64/inst/args.rs @@ -144,7 +144,7 @@ impl RM { // Constructors. pub(crate) fn reg(reg: Reg) -> Self { - debug_assert!(reg.get_class() == RegClass::I64); + debug_assert!(reg.get_class() == RegClass::I64 || reg.get_class() == RegClass::V128); RM::R { reg } } @@ -205,8 +205,75 @@ impl fmt::Debug for RMI_R_Op { } } -/// These indicate ways of extending (widening) a value, using the Intel naming: -/// B(yte) = u8, W(ord) = u16, L(ong)word = u32, Q(uad)word = u64 +/// Some scalar SSE operations requiring 2 operands r/m and r +/// Each instruction is prefixed with the SSE version that introduced +/// the particular instructions. +/// TODO: Below only includes scalar operations. To be seen if packed will +/// be added here. +#[derive(Clone, PartialEq)] +pub enum SSE_Op { + SSE_Addss, + SSE2_Addsd, + SSE_Comiss, + SSE2_Comisd, + SSE2_Cvtsd2ss, + SSE2_Cvtsd2si, + SSE_Cvtsi2ss, + SSE2_Cvtsi2sd, + SSE_Cvtss2si, + SSE2_Cvtss2sd, + SSE_Cvttss2si, + SSE2_Cvttsd2si, + SSE_Divss, + SSE2_Divsd, + SSE_Maxss, + SSE2_Maxsd, + SSE_Minss, + SSE2_Minsd, + SSE_Movss, + SSE2_Movsd, + SSE_Mulss, + SSE2_Mulsd, + SSE_Rcpss, + SSE41_Roundss, + SSE41_Roundsd, + SSE_Rsqrtss, + SSE_Sqrtss, + SSE2_Sqrtsd, + SSE_Subss, + SSE2_Subsd, + SSE_Ucomiss, + SSE2_Ucomisd, +} + +/// Some SSE operations requiring 3 operands i, r/m, and r +#[derive(Clone, PartialEq)] +pub enum SSE_RMI_Op { + SSE_Cmpss, + SSE2_Cmpsd, + SSE41_Insertps, +} + +impl SSE_Op { + pub(crate) fn to_string(&self) -> String { + match self { + SSE_Op::SSE_Addss => "addss".to_string(), + SSE_Op::SSE_Subss => "subss".to_string(), + SSE_Op::SSE_Movss => "movss".to_string(), + SSE_Op::SSE2_Movsd => "movsd".to_string(), + _ => "unimplemented sse_op".to_string(), + } + } +} + +impl fmt::Debug for SSE_Op { + fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result { + write!(fmt, "{}", self.to_string()) + } +} + +/// These indicate ways of extending (widening) a value, using the Intel +/// naming: B(yte) = u8, W(ord) = u16, L(ong)word = u32, Q(uad)word = u64 #[derive(Clone, PartialEq)] pub enum ExtMode { /// Byte -> Longword. diff --git a/cranelift/codegen/src/isa/x64/inst/emit.rs b/cranelift/codegen/src/isa/x64/inst/emit.rs index b19013dc5c..77ed7fcb4e 100644 --- a/cranelift/codegen/src/isa/x64/inst/emit.rs +++ b/cranelift/codegen/src/isa/x64/inst/emit.rs @@ -1,6 +1,5 @@ -use regalloc::{Reg, RegClass}; - use crate::isa::x64::inst::*; +use regalloc::Reg; fn low8willSXto64(x: u32) -> bool { let xs = (x as i32) as i64; @@ -42,7 +41,6 @@ fn mkSIB(shift: u8, encIndex: u8, encBase: u8) -> u8 { #[inline(always)] fn iregEnc(reg: Reg) -> u8 { debug_assert!(reg.is_real()); - debug_assert!(reg.get_class() == RegClass::I64); reg.get_hw_encoding() } @@ -57,10 +55,16 @@ const F_RETAIN_REDUNDANT_REX: u32 = 1; /// indicating a 64-bit operation. const F_CLEAR_REX_W: u32 = 2; -/// Add an 0x66 (operand-size override) prefix. This is necessary to indicate -/// a 16-bit operation. Normally this will be used together with F_CLEAR_REX_W. -const F_PREFIX_66: u32 = 4; - +/// For specifying the legacy prefixes (or `PfxNone` if no prefix required) to +/// be used at the start an instruction. A select prefix may be required for +/// various operations, including instructions that operate on GPR, SSE, and Vex +/// registers. +enum LegacyPrefix { + PfxNone, + Pfx66, + PfxF2, + PfxF3, +} /// This is the core 'emit' function for instructions that reference memory. /// /// For an instruction that has as operands a register `encG` and a memory @@ -82,6 +86,7 @@ const F_PREFIX_66: u32 = 4; /// indicate a 64-bit operation. fn emit_REX_OPCODES_MODRM_SIB_IMM_encG_memE( sink: &mut MachBuffer, + prefix: LegacyPrefix, opcodes: u32, mut numOpcodes: usize, encG: u8, @@ -91,13 +96,15 @@ fn emit_REX_OPCODES_MODRM_SIB_IMM_encG_memE( // General comment for this function: the registers in `memE` must be // 64-bit integer registers, because they are part of an address // expression. But `encG` can be derived from a register of any class. - let prefix66 = (flags & F_PREFIX_66) != 0; let clearRexW = (flags & F_CLEAR_REX_W) != 0; let retainRedundant = (flags & F_RETAIN_REDUNDANT_REX) != 0; - // The operand-size override, if requested. This indicates a 16-bit - // operation. - if prefix66 { - sink.put1(0x66); + + // Lower the prefix if applicable. + match prefix { + LegacyPrefix::Pfx66 => sink.put1(0x66), + LegacyPrefix::PfxF2 => sink.put1(0xF2), + LegacyPrefix::PfxF3 => sink.put1(0xF3), + LegacyPrefix::PfxNone => (), } match memE { Addr::IR { simm32, base: regE } => { @@ -201,6 +208,7 @@ fn emit_REX_OPCODES_MODRM_SIB_IMM_encG_memE( /// simpler. fn emit_REX_OPCODES_MODRM_encG_encE( sink: &mut MachBuffer, + prefix: LegacyPrefix, opcodes: u32, mut numOpcodes: usize, encG: u8, @@ -211,22 +219,28 @@ fn emit_REX_OPCODES_MODRM_encG_encE( // don't even have to be from the same class. For example, for an // integer-to-FP conversion insn, one might be RegClass::I64 and the other // RegClass::V128. - let prefix66 = (flags & F_PREFIX_66) != 0; let clearRexW = (flags & F_CLEAR_REX_W) != 0; let retainRedundant = (flags & F_RETAIN_REDUNDANT_REX) != 0; + // The operand-size override - if prefix66 { - sink.put1(0x66); + match prefix { + LegacyPrefix::Pfx66 => sink.put1(0x66), + LegacyPrefix::PfxF2 => sink.put1(0xF2), + LegacyPrefix::PfxF3 => sink.put1(0xF3), + LegacyPrefix::PfxNone => (), } + // The rex byte let w = if clearRexW { 0 } else { 1 }; let r = (encG >> 3) & 1; let x = 0; let b = (encE >> 3) & 1; let rex = 0x40 | (w << 3) | (r << 2) | (x << 1) | b; + if rex != 0x40 || retainRedundant { sink.put1(rex); } + // All other prefixes and opcodes while numOpcodes > 0 { numOpcodes -= 1; @@ -242,6 +256,7 @@ fn emit_REX_OPCODES_MODRM_encG_encE( fn emit_REX_OPCODES_MODRM_SIB_IMM_regG_memE( sink: &mut MachBuffer, + prefix: LegacyPrefix, opcodes: u32, numOpcodes: usize, regG: Reg, @@ -250,11 +265,12 @@ fn emit_REX_OPCODES_MODRM_SIB_IMM_regG_memE( ) { // JRS FIXME 2020Feb07: this should really just be `regEnc` not `iregEnc` let encG = iregEnc(regG); - emit_REX_OPCODES_MODRM_SIB_IMM_encG_memE(sink, opcodes, numOpcodes, encG, memE, flags); + emit_REX_OPCODES_MODRM_SIB_IMM_encG_memE(sink, prefix, opcodes, numOpcodes, encG, memE, flags); } fn emit_REX_OPCODES_MODRM_regG_regE( sink: &mut MachBuffer, + prefix: LegacyPrefix, opcodes: u32, numOpcodes: usize, regG: Reg, @@ -264,7 +280,7 @@ fn emit_REX_OPCODES_MODRM_regG_regE( // JRS FIXME 2020Feb07: these should really just be `regEnc` not `iregEnc` let encG = iregEnc(regG); let encE = iregEnc(regE); - emit_REX_OPCODES_MODRM_encG_encE(sink, opcodes, numOpcodes, encG, encE, flags); + emit_REX_OPCODES_MODRM_encG_encE(sink, prefix, opcodes, numOpcodes, encG, encE, flags); } /// Write a suitable number of bits from an imm64 to the sink. @@ -346,6 +362,7 @@ pub(crate) fn emit(inst: &Inst, sink: &mut MachBuffer) { RMI::R { reg: regE } => { emit_REX_OPCODES_MODRM_regG_regE( sink, + LegacyPrefix::PfxNone, 0x0FAF, 2, regG.to_reg(), @@ -356,6 +373,7 @@ pub(crate) fn emit(inst: &Inst, sink: &mut MachBuffer) { RMI::M { addr } => { emit_REX_OPCODES_MODRM_SIB_IMM_regG_memE( sink, + LegacyPrefix::PfxNone, 0x0FAF, 2, regG.to_reg(), @@ -369,6 +387,7 @@ pub(crate) fn emit(inst: &Inst, sink: &mut MachBuffer) { // Yes, really, regG twice. emit_REX_OPCODES_MODRM_regG_regE( sink, + LegacyPrefix::PfxNone, opcode, 1, regG.to_reg(), @@ -402,6 +421,7 @@ pub(crate) fn emit(inst: &Inst, sink: &mut MachBuffer) { // gold standard. emit_REX_OPCODES_MODRM_regG_regE( sink, + LegacyPrefix::PfxNone, opcode_R, 1, *regE, @@ -415,6 +435,7 @@ pub(crate) fn emit(inst: &Inst, sink: &mut MachBuffer) { // Whereas here we revert to the "normal" G-E ordering. emit_REX_OPCODES_MODRM_SIB_IMM_regG_memE( sink, + LegacyPrefix::PfxNone, opcode_M, 1, regG.to_reg(), @@ -427,7 +448,15 @@ pub(crate) fn emit(inst: &Inst, sink: &mut MachBuffer) { let opcode = if useImm8 { 0x83 } else { 0x81 }; // And also here we use the "normal" G-E ordering. let encG = iregEnc(regG.to_reg()); - emit_REX_OPCODES_MODRM_encG_encE(sink, opcode, 1, subopcode_I, encG, flags); + emit_REX_OPCODES_MODRM_encG_encE( + sink, + LegacyPrefix::PfxNone, + opcode, + 1, + subopcode_I, + encG, + flags, + ); emit_simm(sink, if useImm8 { 1 } else { 4 }, *simm32); } } @@ -455,7 +484,15 @@ pub(crate) fn emit(inst: &Inst, sink: &mut MachBuffer) { } Inst::Mov_R_R { is_64, src, dst } => { let flags = if *is_64 { F_NONE } else { F_CLEAR_REX_W }; - emit_REX_OPCODES_MODRM_regG_regE(sink, 0x89, 1, *src, dst.to_reg(), flags); + emit_REX_OPCODES_MODRM_regG_regE( + sink, + LegacyPrefix::PfxNone, + 0x89, + 1, + *src, + dst.to_reg(), + flags, + ); } Inst::MovZX_M_R { extMode, addr, dst } => { match extMode { @@ -463,6 +500,7 @@ pub(crate) fn emit(inst: &Inst, sink: &mut MachBuffer) { // MOVZBL is (REX.W==0) 0F B6 /r emit_REX_OPCODES_MODRM_SIB_IMM_regG_memE( sink, + LegacyPrefix::PfxNone, 0x0FB6, 2, dst.to_reg(), @@ -478,6 +516,7 @@ pub(crate) fn emit(inst: &Inst, sink: &mut MachBuffer) { // the upper half of the destination anyway. emit_REX_OPCODES_MODRM_SIB_IMM_regG_memE( sink, + LegacyPrefix::PfxNone, 0x0FB6, 2, dst.to_reg(), @@ -489,6 +528,7 @@ pub(crate) fn emit(inst: &Inst, sink: &mut MachBuffer) { // MOVZWL is (REX.W==0) 0F B7 /r emit_REX_OPCODES_MODRM_SIB_IMM_regG_memE( sink, + LegacyPrefix::PfxNone, 0x0FB7, 2, dst.to_reg(), @@ -500,6 +540,7 @@ pub(crate) fn emit(inst: &Inst, sink: &mut MachBuffer) { // MOVZWQ is (REX.W==1) 0F B7 /r emit_REX_OPCODES_MODRM_SIB_IMM_regG_memE( sink, + LegacyPrefix::PfxNone, 0x0FB7, 2, dst.to_reg(), @@ -513,6 +554,7 @@ pub(crate) fn emit(inst: &Inst, sink: &mut MachBuffer) { // MOV r/m32, r32 is (REX.W==0) 8B /r emit_REX_OPCODES_MODRM_SIB_IMM_regG_memE( sink, + LegacyPrefix::PfxNone, 0x8B, 1, dst.to_reg(), @@ -522,15 +564,22 @@ pub(crate) fn emit(inst: &Inst, sink: &mut MachBuffer) { } } } - Inst::Mov64_M_R { addr, dst } => { - emit_REX_OPCODES_MODRM_SIB_IMM_regG_memE(sink, 0x8B, 1, dst.to_reg(), addr, F_NONE) - } + Inst::Mov64_M_R { addr, dst } => emit_REX_OPCODES_MODRM_SIB_IMM_regG_memE( + sink, + LegacyPrefix::PfxNone, + 0x8B, + 1, + dst.to_reg(), + addr, + F_NONE, + ), Inst::MovSX_M_R { extMode, addr, dst } => { match extMode { ExtMode::BL => { // MOVSBL is (REX.W==0) 0F BE /r emit_REX_OPCODES_MODRM_SIB_IMM_regG_memE( sink, + LegacyPrefix::PfxNone, 0x0FBE, 2, dst.to_reg(), @@ -542,6 +591,7 @@ pub(crate) fn emit(inst: &Inst, sink: &mut MachBuffer) { // MOVSBQ is (REX.W==1) 0F BE /r emit_REX_OPCODES_MODRM_SIB_IMM_regG_memE( sink, + LegacyPrefix::PfxNone, 0x0FBE, 2, dst.to_reg(), @@ -553,6 +603,7 @@ pub(crate) fn emit(inst: &Inst, sink: &mut MachBuffer) { // MOVSWL is (REX.W==0) 0F BF /r emit_REX_OPCODES_MODRM_SIB_IMM_regG_memE( sink, + LegacyPrefix::PfxNone, 0x0FBF, 2, dst.to_reg(), @@ -564,6 +615,7 @@ pub(crate) fn emit(inst: &Inst, sink: &mut MachBuffer) { // MOVSWQ is (REX.W==1) 0F BF /r emit_REX_OPCODES_MODRM_SIB_IMM_regG_memE( sink, + LegacyPrefix::PfxNone, 0x0FBF, 2, dst.to_reg(), @@ -575,6 +627,7 @@ pub(crate) fn emit(inst: &Inst, sink: &mut MachBuffer) { // MOVSLQ is (REX.W==1) 63 /r emit_REX_OPCODES_MODRM_SIB_IMM_regG_memE( sink, + LegacyPrefix::PfxNone, 0x63, 1, dst.to_reg(), @@ -599,6 +652,7 @@ pub(crate) fn emit(inst: &Inst, sink: &mut MachBuffer) { // MOV r8, r/m8 is (REX.W==0) 88 /r emit_REX_OPCODES_MODRM_SIB_IMM_regG_memE( sink, + LegacyPrefix::PfxNone, 0x88, 1, *src, @@ -610,17 +664,19 @@ pub(crate) fn emit(inst: &Inst, sink: &mut MachBuffer) { // MOV r16, r/m16 is 66 (REX.W==0) 89 /r emit_REX_OPCODES_MODRM_SIB_IMM_regG_memE( sink, + LegacyPrefix::Pfx66, 0x89, 1, *src, addr, - F_CLEAR_REX_W | F_PREFIX_66, + F_CLEAR_REX_W, ) } 4 => { // MOV r32, r/m32 is (REX.W==0) 89 /r emit_REX_OPCODES_MODRM_SIB_IMM_regG_memE( sink, + LegacyPrefix::PfxNone, 0x89, 1, *src, @@ -630,7 +686,15 @@ pub(crate) fn emit(inst: &Inst, sink: &mut MachBuffer) { } 8 => { // MOV r64, r/m64 is (REX.W==1) 89 /r - emit_REX_OPCODES_MODRM_SIB_IMM_regG_memE(sink, 0x89, 1, *src, addr, F_NONE) + emit_REX_OPCODES_MODRM_SIB_IMM_regG_memE( + sink, + LegacyPrefix::PfxNone, + 0x89, + 1, + *src, + addr, + F_NONE, + ) } _ => panic!("x64::Inst::Mov_R_M::emit: unreachable"), } @@ -653,6 +717,7 @@ pub(crate) fn emit(inst: &Inst, sink: &mut MachBuffer) { // SHL/SHR/SAR %cl, reg64 is (REX.W==1) D3 /subopcode emit_REX_OPCODES_MODRM_encG_encE( sink, + LegacyPrefix::PfxNone, 0xD3, 1, subopcode, @@ -667,6 +732,7 @@ pub(crate) fn emit(inst: &Inst, sink: &mut MachBuffer) { // bother with that nicety here. emit_REX_OPCODES_MODRM_encG_encE( sink, + LegacyPrefix::PfxNone, 0xC1, 1, subopcode, @@ -683,6 +749,7 @@ pub(crate) fn emit(inst: &Inst, sink: &mut MachBuffer) { dst: regG, } => { let mut retainRedundantRex = 0; + let mut prefix = LegacyPrefix::PfxNone; if *size == 1 { // Here, a redundant REX prefix changes the meaning of the // instruction. @@ -691,10 +758,12 @@ pub(crate) fn emit(inst: &Inst, sink: &mut MachBuffer) { retainRedundantRex = F_RETAIN_REDUNDANT_REX; } } + if *size == 2 { + prefix = LegacyPrefix::Pfx66; + } let mut flags = match size { 8 => F_NONE, - 4 => F_CLEAR_REX_W, - 2 => F_CLEAR_REX_W | F_PREFIX_66, + 4 | 2 => F_CLEAR_REX_W, 1 => F_CLEAR_REX_W | retainRedundantRex, _ => panic!("x64::Inst::Cmp_RMI_R::emit: unreachable"), }; @@ -710,12 +779,14 @@ pub(crate) fn emit(inst: &Inst, sink: &mut MachBuffer) { } } // Same comment re swapped args as for Alu_RMI_R. - emit_REX_OPCODES_MODRM_regG_regE(sink, opcode, 1, *regE, *regG, flags); + emit_REX_OPCODES_MODRM_regG_regE(sink, prefix, opcode, 1, *regE, *regG, flags); } RMI::M { addr } => { let opcode = if *size == 1 { 0x3A } else { 0x3B }; // Whereas here we revert to the "normal" G-E ordering. - emit_REX_OPCODES_MODRM_SIB_IMM_regG_memE(sink, opcode, 1, *regG, addr, flags); + emit_REX_OPCODES_MODRM_SIB_IMM_regG_memE( + sink, prefix, opcode, 1, *regG, addr, flags, + ); } RMI::I { simm32 } => { // FIXME JRS 2020Feb11: there are shorter encodings for @@ -731,7 +802,7 @@ pub(crate) fn emit(inst: &Inst, sink: &mut MachBuffer) { // And also here we use the "normal" G-E ordering. let encG = iregEnc(*regG); emit_REX_OPCODES_MODRM_encG_encE( - sink, opcode, 1, 7, /*subopcode*/ + sink, prefix, opcode, 1, 7, /*subopcode*/ encG, flags, ); emit_simm(sink, if useImm8 { 1 } else { *size }, *simm32); @@ -751,6 +822,7 @@ pub(crate) fn emit(inst: &Inst, sink: &mut MachBuffer) { RMI::M { addr } => { emit_REX_OPCODES_MODRM_SIB_IMM_encG_memE( sink, + LegacyPrefix::PfxNone, 0xFF, 1, 6, /*subopcode*/ @@ -787,6 +859,7 @@ pub(crate) fn emit(inst: &Inst, sink: &mut MachBuffer) { let regEnc = iregEnc(*reg); emit_REX_OPCODES_MODRM_encG_encE( sink, + LegacyPrefix::PfxNone, 0xFF, 1, 2, /*subopcode*/ @@ -797,6 +870,7 @@ pub(crate) fn emit(inst: &Inst, sink: &mut MachBuffer) { RM::M { addr } => { emit_REX_OPCODES_MODRM_SIB_IMM_encG_memE( sink, + LegacyPrefix::PfxNone, 0xFF, 1, 2, /*subopcode*/ @@ -867,6 +941,7 @@ pub(crate) fn emit(inst: &Inst, sink: &mut MachBuffer) { let regEnc = iregEnc(*reg); emit_REX_OPCODES_MODRM_encG_encE( sink, + LegacyPrefix::PfxNone, 0xFF, 1, 4, /*subopcode*/ @@ -877,6 +952,7 @@ pub(crate) fn emit(inst: &Inst, sink: &mut MachBuffer) { RM::M { addr } => { emit_REX_OPCODES_MODRM_SIB_IMM_encG_memE( sink, + LegacyPrefix::PfxNone, 0xFF, 1, 4, /*subopcode*/ @@ -886,7 +962,56 @@ pub(crate) fn emit(inst: &Inst, sink: &mut MachBuffer) { } } } - + Inst::XMM_R_R { op, src, dst } => { + let flags = F_CLEAR_REX_W; + let opcode = match op { + SSE_Op::SSE_Movss => 0x0F10, + SSE_Op::SSE2_Movsd => 0x0F10, + _ => unimplemented!("XMM_R_R opcode"), + }; + let prefix = match op { + SSE_Op::SSE_Movss => LegacyPrefix::PfxF3, + SSE_Op::SSE2_Movsd => LegacyPrefix::PfxF2, + _ => unimplemented!("XMM_R_R opcode"), + }; + emit_REX_OPCODES_MODRM_regG_regE(sink, prefix, opcode, 2, dst.to_reg(), *src, flags); + } + Inst::XMM_RM_R { + op, + src: srcE, + dst: regG, + } => { + let flags = F_CLEAR_REX_W; + let opcode = match op { + SSE_Op::SSE_Addss => 0x0F58, + SSE_Op::SSE_Subss => 0x0F5C, + _ => unimplemented!("XMM_RM_R opcode"), + }; + match srcE { + RM::R { reg: regE } => { + emit_REX_OPCODES_MODRM_regG_regE( + sink, + LegacyPrefix::PfxF3, + opcode, + 2, + regG.to_reg(), + *regE, + flags, + ); + } + RM::M { addr } => { + emit_REX_OPCODES_MODRM_SIB_IMM_regG_memE( + sink, + LegacyPrefix::PfxF3, + opcode, + 2, + regG.to_reg(), + addr, + flags, + ); + } + } + } _ => panic!("x64_emit: unhandled: {} ", inst.show_rru(None)), } } diff --git a/cranelift/codegen/src/isa/x64/inst/emit_tests.rs b/cranelift/codegen/src/isa/x64/inst/emit_tests.rs index 654dcc6b33..6b54c7a1f2 100644 --- a/cranelift/codegen/src/isa/x64/inst/emit_tests.rs +++ b/cranelift/codegen/src/isa/x64/inst/emit_tests.rs @@ -33,6 +33,23 @@ fn test_x64_emit() { let r14 = regs::r14(); let r15 = regs::r15(); + let xmm0 = regs::xmm0(); + let xmm1 = regs::xmm1(); + let xmm2 = regs::xmm2(); + let xmm3 = regs::xmm3(); + let xmm4 = regs::xmm4(); + let xmm5 = regs::xmm5(); + let xmm6 = regs::xmm6(); + let xmm7 = regs::xmm7(); + let xmm8 = regs::xmm8(); + let xmm9 = regs::xmm9(); + let xmm10 = regs::xmm10(); + let xmm11 = regs::xmm11(); + let xmm12 = regs::xmm12(); + let xmm13 = regs::xmm13(); + let xmm14 = regs::xmm14(); + let xmm15 = regs::xmm15(); + // And Writable<> versions of the same: let w_rax = Writable::::from_reg(rax); let w_rbx = Writable::::from_reg(rbx); @@ -51,6 +68,23 @@ fn test_x64_emit() { let w_r14 = Writable::::from_reg(r14); let w_r15 = Writable::::from_reg(r15); + let w_xmm0 = Writable::::from_reg(xmm0); + let w_xmm1 = Writable::::from_reg(xmm1); + let w_xmm2 = Writable::::from_reg(xmm2); + let w_xmm3 = Writable::::from_reg(xmm3); + let _w_xmm4 = Writable::::from_reg(xmm4); + let _w_xmm5 = Writable::::from_reg(xmm5); + let _w_xmm6 = Writable::::from_reg(xmm6); + let _w_xmm7 = Writable::::from_reg(xmm7); + let _w_xmm8 = Writable::::from_reg(xmm8); + let _w_xmm9 = Writable::::from_reg(xmm9); + let w_xmm10 = Writable::::from_reg(xmm10); + let _w_xmm11 = Writable::::from_reg(xmm11); + let _w_xmm12 = Writable::::from_reg(xmm12); + let w_xmm13 = Writable::::from_reg(xmm13); + let _w_xmm14 = Writable::::from_reg(xmm14); + let _w_xmm15 = Writable::::from_reg(xmm15); + let mut insns = Vec::<(Inst, &str, &str)>::new(); // ======================================================== @@ -2171,6 +2205,67 @@ fn test_x64_emit() { "jmp *321(%r10,%rdx,4)", )); + // ======================================================== + // XMM_RM_R + + insns.push(( + Inst::xmm_rm_r(SSE_Op::SSE_Addss, RM::reg(xmm1), w_xmm0), + "F30F58C1", + "addss %xmm1, %xmm0", + )); + insns.push(( + Inst::xmm_rm_r(SSE_Op::SSE_Subss, RM::reg(xmm0), w_xmm1), + "F30F5CC8", + "subss %xmm0, %xmm1", + )); + + insns.push(( + Inst::xmm_rm_r(SSE_Op::SSE_Addss, RM::reg(xmm11), w_xmm13), + "F3450F58EB", + "addss %xmm11, %xmm13", + )); + + insns.push(( + Inst::xmm_rm_r(SSE_Op::SSE_Subss, RM::reg(xmm12), w_xmm1), + "F3410F5CCC", + "subss %xmm12, %xmm1", + )); + + insns.push(( + Inst::xmm_rm_r( + SSE_Op::SSE_Addss, + RM::mem(Addr::imm_reg_reg_shift(123, r10, rdx, 2)), + w_xmm0, + ), + "F3410F5844927B", + "addss 123(%r10,%rdx,4), %xmm0", + )); + + insns.push(( + Inst::xmm_rm_r( + SSE_Op::SSE_Subss, + RM::mem(Addr::imm_reg_reg_shift(321, r10, rax, 3)), + w_xmm10, + ), + "F3450F5C94C241010000", + "subss 321(%r10,%rax,8), %xmm10", + )); + + // ======================================================== + // XMM_R_R + + insns.push(( + Inst::xmm_r_r(SSE_Op::SSE_Movss, xmm3, w_xmm2), + "F30F10D3", + "movss %xmm3, %xmm2", + )); + + insns.push(( + Inst::xmm_r_r(SSE_Op::SSE2_Movsd, xmm4, w_xmm3), + "F20F10DC", + "movsd %xmm4, %xmm3", + )); + // ======================================================== // Actually run the tests! let flags = settings::Flags::new(settings::builder()); @@ -2179,7 +2274,6 @@ fn test_x64_emit() { // Check the printed text is as expected. let actual_printing = insn.show_rru(Some(&rru)); assert_eq!(expected_printing, actual_printing); - let mut sink = test_utils::TestCodeSink::new(); let mut buffer = MachBuffer::new(); insn.emit(&mut buffer, &flags, &mut Default::default()); diff --git a/cranelift/codegen/src/isa/x64/inst/mod.rs b/cranelift/codegen/src/isa/x64/inst/mod.rs index 1f3eb0a1ed..132d93e166 100644 --- a/cranelift/codegen/src/isa/x64/inst/mod.rs +++ b/cranelift/codegen/src/isa/x64/inst/mod.rs @@ -150,6 +150,20 @@ pub(crate) enum Inst { /// jmpq (reg mem) JmpUnknown { target: RM }, + + /// (add sub and or xor mul adc? sbb?) (32 64) (reg addr imm) reg + XMM_RM_R { + op: SSE_Op, + src: RM, + dst: Writable, + }, + + /// mov (64 32) reg reg + XMM_R_R { + op: SSE_Op, + src: Reg, + dst: Writable, + }, } // Handy constructors for Insts. @@ -195,6 +209,17 @@ impl Inst { Inst::Mov_R_R { is_64, src, dst } } + pub(crate) fn xmm_r_r(op: SSE_Op, src: Reg, dst: Writable) -> Inst { + debug_assert!(src.get_class() == RegClass::V128); + debug_assert!(dst.to_reg().get_class() == RegClass::V128); + Inst::XMM_R_R { op, src, dst } + } + + pub(crate) fn xmm_rm_r(op: SSE_Op, src: RM, dst: Writable) -> Self { + debug_assert!(dst.to_reg().get_class() == RegClass::V128); + Self::XMM_RM_R { op, src, dst } + } + pub(crate) fn movzx_m_r(extMode: ExtMode, addr: Addr, dst: Writable) -> Inst { debug_assert!(dst.to_reg().get_class() == RegClass::I64); Inst::MovZX_M_R { extMode, addr, dst } @@ -341,6 +366,12 @@ impl ShowWithRRU for Inst { src.show_rru_sized(mb_rru, sizeLQ(*is_64)), show_ireg_sized(dst.to_reg(), mb_rru, sizeLQ(*is_64)), ), + Inst::XMM_RM_R { op, src, dst } => format!( + "{} {}, {}", + ljustify(op.to_string()), + src.show_rru_sized(mb_rru, 8), + show_ireg_sized(dst.to_reg(), mb_rru, 8), + ), Inst::Imm_R { dst_is_64, simm64, @@ -368,6 +399,12 @@ impl ShowWithRRU for Inst { show_ireg_sized(*src, mb_rru, sizeLQ(*is_64)), show_ireg_sized(dst.to_reg(), mb_rru, sizeLQ(*is_64)) ), + Inst::XMM_R_R { op, src, dst } => format!( + "{} {}, {}", + ljustify(op.to_string()), + show_ireg_sized(*src, mb_rru, 8), + show_ireg_sized(dst.to_reg(), mb_rru, 8) + ), Inst::MovZX_M_R { extMode, addr, dst } => { if *extMode == ExtMode::LQ { format!( @@ -490,6 +527,10 @@ fn x64_get_regs(inst: &Inst, collector: &mut RegUsageCollector) { src.get_regs_as_uses(collector); collector.add_mod(*dst); } + Inst::XMM_RM_R { op: _, src, dst } => { + src.get_regs_as_uses(collector); + collector.add_mod(*dst); + } Inst::Imm_R { dst_is_64: _, simm64: _, @@ -501,6 +542,10 @@ fn x64_get_regs(inst: &Inst, collector: &mut RegUsageCollector) { collector.add_use(*src); collector.add_def(*dst); } + Inst::XMM_R_R { op: _, src, dst } => { + collector.add_use(*src); + collector.add_def(*dst); + } Inst::MovZX_M_R { extMode: _, addr, @@ -649,6 +694,14 @@ fn x64_map_regs(inst: &mut Inst, mapper: &RUM) { src.map_uses(mapper); map_mod(mapper, dst); } + Inst::XMM_RM_R { + op: _, + ref mut src, + ref mut dst, + } => { + src.map_uses(mapper); + map_mod(mapper, dst); + } Inst::Imm_R { dst_is_64: _, simm64: _, @@ -662,6 +715,14 @@ fn x64_map_regs(inst: &mut Inst, mapper: &RUM) { map_use(mapper, src); map_def(mapper, dst); } + Inst::XMM_R_R { + op: _, + ref mut src, + ref mut dst, + } => { + map_use(mapper, src); + map_def(mapper, dst); + } Inst::MovZX_M_R { extMode: _, ref mut addr, @@ -784,6 +845,8 @@ impl MachInst for Inst { debug_assert!(rc_dst == rc_src); match rc_dst { RegClass::I64 => Inst::mov_r_r(true, src_reg, dst_reg), + // TODO: How do you just move 32 bits? + RegClass::V128 => Inst::xmm_r_r(SSE_Op::SSE2_Movsd, src_reg, dst_reg), _ => panic!("gen_move(x64): unhandled regclass"), } } diff --git a/cranelift/codegen/src/isa/x64/inst/regs.rs b/cranelift/codegen/src/isa/x64/inst/regs.rs index bb8f05fb50..087103b4ac 100644 --- a/cranelift/codegen/src/isa/x64/inst/regs.rs +++ b/cranelift/codegen/src/isa/x64/inst/regs.rs @@ -78,52 +78,53 @@ pub(crate) fn r11() -> Reg { fn fpr(enc: u8, index: u8) -> Reg { Reg::new_real(RegClass::V128, enc, index) } -fn xmm0() -> Reg { + +pub(crate) fn xmm0() -> Reg { fpr(0, 14) } -fn xmm1() -> Reg { +pub(crate) fn xmm1() -> Reg { fpr(1, 15) } -fn xmm2() -> Reg { +pub(crate) fn xmm2() -> Reg { fpr(2, 16) } -fn xmm3() -> Reg { +pub(crate) fn xmm3() -> Reg { fpr(3, 17) } -fn xmm4() -> Reg { +pub(crate) fn xmm4() -> Reg { fpr(4, 18) } -fn xmm5() -> Reg { +pub(crate) fn xmm5() -> Reg { fpr(5, 19) } -fn xmm6() -> Reg { +pub(crate) fn xmm6() -> Reg { fpr(6, 20) } -fn xmm7() -> Reg { +pub(crate) fn xmm7() -> Reg { fpr(7, 21) } -fn xmm8() -> Reg { +pub(crate) fn xmm8() -> Reg { fpr(8, 22) } -fn xmm9() -> Reg { +pub(crate) fn xmm9() -> Reg { fpr(9, 23) } -fn xmm10() -> Reg { +pub(crate) fn xmm10() -> Reg { fpr(10, 24) } -fn xmm11() -> Reg { +pub(crate) fn xmm11() -> Reg { fpr(11, 25) } -fn xmm12() -> Reg { +pub(crate) fn xmm12() -> Reg { fpr(12, 26) } -fn xmm13() -> Reg { +pub(crate) fn xmm13() -> Reg { fpr(13, 27) } -fn xmm14() -> Reg { +pub(crate) fn xmm14() -> Reg { fpr(14, 28) } -fn xmm15() -> Reg { +pub(crate) fn xmm15() -> Reg { fpr(15, 29) } diff --git a/cranelift/codegen/src/isa/x64/lower.rs b/cranelift/codegen/src/isa/x64/lower.rs index f85d6166ac..f306d867b0 100644 --- a/cranelift/codegen/src/isa/x64/lower.rs +++ b/cranelift/codegen/src/isa/x64/lower.rs @@ -3,7 +3,7 @@ #![allow(dead_code)] #![allow(non_snake_case)] -use regalloc::{Reg, Writable}; +use regalloc::{Reg, RegClass, Writable}; use crate::ir::condcodes::IntCC; use crate::ir::types; @@ -31,7 +31,7 @@ fn is_int_ty(ty: Type) -> bool { } } -fn int_ty_to_is64(ty: Type) -> bool { +fn int_ty_is_64(ty: Type) -> bool { match ty { types::I8 | types::I16 | types::I32 => false, types::I64 => true, @@ -39,6 +39,14 @@ fn int_ty_to_is64(ty: Type) -> bool { } } +fn flt_ty_is_64(ty: Type) -> bool { + match ty { + types::F32 => false, + types::F64 => true, + _ => panic!("type {} is none of F32, F64", ty), + } +} + fn int_ty_to_sizeB(ty: Type) -> u8 { match ty { types::I8 => 1, @@ -118,7 +126,6 @@ fn lower_insn_to_regs<'a>(ctx: Ctx<'a>, iri: IRInst) { }; // This is all outstandingly feeble. TODO: much better! - match op { Opcode::Iconst => { if let Some(w64) = iri_to_u64_immediate(ctx, iri) { @@ -136,7 +143,7 @@ fn lower_insn_to_regs<'a>(ctx: Ctx<'a>, iri: IRInst) { let regD = output_to_reg(ctx, iri, 0); let regL = input_to_reg(ctx, iri, 0); let regR = input_to_reg(ctx, iri, 1); - let is64 = int_ty_to_is64(ty.unwrap()); + let is64 = int_ty_is_64(ty.unwrap()); let how = if op == Opcode::Iadd { RMI_R_Op::Add } else { @@ -195,7 +202,11 @@ fn lower_insn_to_regs<'a>(ctx: Ctx<'a>, iri: IRInst) { for i in 0..ctx.num_inputs(iri) { let src_reg = input_to_reg(ctx, iri, i); let retval_reg = ctx.retval(i); - ctx.emit(Inst::mov_r_r(true, src_reg, retval_reg)); + if src_reg.get_class() == RegClass::I64 { + ctx.emit(Inst::mov_r_r(true, src_reg, retval_reg)); + } else if src_reg.get_class() == RegClass::V128 { + ctx.emit(Inst::xmm_r_r(SSE_Op::SSE2_Movsd, src_reg, retval_reg)); + } } // N.B.: the Ret itself is generated by the ABI. } @@ -229,7 +240,23 @@ fn lower_insn_to_regs<'a>(ctx: Ctx<'a>, iri: IRInst) { | Opcode::SshrImm => { panic!("ALU+imm and ALU+carry ops should not appear here!"); } - + Opcode::Fadd | Opcode::Fsub => { + let regD = output_to_reg(ctx, iri, 0); + let regL = input_to_reg(ctx, iri, 0); + let regR = input_to_reg(ctx, iri, 1); + let is64 = flt_ty_is_64(ty.unwrap()); + if !is64 { + let inst = if op == Opcode::Fadd { + SSE_Op::SSE_Addss + } else { + SSE_Op::SSE_Subss + }; + ctx.emit(Inst::xmm_r_r(SSE_Op::SSE_Movss, regL, regD)); + ctx.emit(Inst::xmm_rm_r(inst, RM::reg(regR), regD)); + } else { + unimplemented!("unimplemented lowering for opcode {:?}", op); + } + } _ => unimplemented!("unimplemented lowering for opcode {:?}", op), } } diff --git a/cranelift/codegen/src/lib.rs b/cranelift/codegen/src/lib.rs index dd871924ab..38b173de13 100644 --- a/cranelift/codegen/src/lib.rs +++ b/cranelift/codegen/src/lib.rs @@ -1,5 +1,4 @@ //! Cranelift code generation library. - #![deny(missing_docs, trivial_numeric_casts, unused_extern_crates)] #![warn(unused_import_braces)] #![cfg_attr(feature = "std", deny(unstable_features))] diff --git a/cranelift/codegen/src/machinst/vcode.rs b/cranelift/codegen/src/machinst/vcode.rs index 477b02075b..81c59f5f2a 100644 --- a/cranelift/codegen/src/machinst/vcode.rs +++ b/cranelift/codegen/src/machinst/vcode.rs @@ -136,9 +136,14 @@ impl VCodeBuilder { /// Set the type of a VReg. pub fn set_vreg_type(&mut self, vreg: VirtualReg, ty: Type) { - while self.vcode.vreg_types.len() <= vreg.get_index() { - self.vcode.vreg_types.push(ir::types::I8); // Default type. + if self.vcode.vreg_types.len() <= vreg.get_index() { + self.vcode.vreg_types.resize( + self.vcode.vreg_types.len() + + ((vreg.get_index() + 1) - self.vcode.vreg_types.len()), + ir::types::I8, + ) } + self.vcode.vreg_types[vreg.get_index()] = ty; } diff --git a/cranelift/src/wasm.rs b/cranelift/src/wasm.rs index f93471393e..fc282b31b9 100644 --- a/cranelift/src/wasm.rs +++ b/cranelift/src/wasm.rs @@ -51,7 +51,6 @@ pub fn run( flag_calc_value_ranges: bool, ) -> Result<(), String> { let parsed = parse_sets_and_triple(flag_set, flag_triple)?; - for filename in files { let path = Path::new(&filename); let name = String::from(path.as_os_str().to_string_lossy());