From 9d1bcfb2e8d13785ca96484b9b4bccd68c4be36c Mon Sep 17 00:00:00 2001 From: Benjamin Bouvier Date: Thu, 25 Jun 2020 19:30:11 +0200 Subject: [PATCH] machinst x64: implement cmov --- cranelift/codegen/src/isa/aarch64/lower.rs | 22 +----- cranelift/codegen/src/isa/x64/inst/emit.rs | 24 +++++++ .../codegen/src/isa/x64/inst/emit_tests.rs | 38 ++++++++++ cranelift/codegen/src/isa/x64/inst/mod.rs | 34 +++++++++ cranelift/codegen/src/isa/x64/lower.rs | 71 ++++++++++++++++--- 5 files changed, 160 insertions(+), 29 deletions(-) diff --git a/cranelift/codegen/src/isa/aarch64/lower.rs b/cranelift/codegen/src/isa/aarch64/lower.rs index 1da3d41328..71f257abf3 100644 --- a/cranelift/codegen/src/isa/aarch64/lower.rs +++ b/cranelift/codegen/src/isa/aarch64/lower.rs @@ -988,16 +988,7 @@ pub(crate) fn lower_icmp_or_ifcmp_to_flags>( (false, true) => NarrowValueMode::SignExtend64, (false, false) => NarrowValueMode::ZeroExtend64, }; - let inputs = [ - InsnInput { - insn: insn, - input: 0, - }, - InsnInput { - insn: insn, - input: 1, - }, - ]; + let inputs = [InsnInput { insn, input: 0 }, InsnInput { insn, input: 1 }]; let ty = ctx.input_ty(insn, 0); let rn = put_input_in_reg(ctx, inputs[0], narrow_mode); let rm = put_input_in_rse_imm12(ctx, inputs[1], narrow_mode); @@ -1010,16 +1001,7 @@ pub(crate) fn lower_icmp_or_ifcmp_to_flags>( pub(crate) fn lower_fcmp_or_ffcmp_to_flags>(ctx: &mut C, insn: IRInst) { let ty = ctx.input_ty(insn, 0); let bits = ty_bits(ty); - let inputs = [ - InsnInput { - insn: insn, - input: 0, - }, - InsnInput { - insn: insn, - input: 1, - }, - ]; + let inputs = [InsnInput { insn, input: 0 }, InsnInput { insn, input: 1 }]; let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None); let rm = put_input_in_reg(ctx, inputs[1], NarrowValueMode::None); match bits { diff --git a/cranelift/codegen/src/isa/x64/inst/emit.rs b/cranelift/codegen/src/isa/x64/inst/emit.rs index d2666728c3..23043602ad 100644 --- a/cranelift/codegen/src/isa/x64/inst/emit.rs +++ b/cranelift/codegen/src/isa/x64/inst/emit.rs @@ -849,6 +849,30 @@ pub(crate) fn emit( ); } + Inst::Cmove { + size, + cc, + src, + dst: reg_g, + } => { + let (prefix, rex_flags) = match size { + 2 => (LegacyPrefix::_66, RexFlags::clear_w()), + 4 => (LegacyPrefix::None, RexFlags::clear_w()), + 8 => (LegacyPrefix::None, RexFlags::set_w()), + _ => unreachable!("invalid size spec for cmove"), + }; + let opcode = 0x0F40 + cc.get_enc() as u32; + match src { + RegMem::Reg { reg: reg_e } => { + emit_std_reg_reg(sink, prefix, opcode, 2, reg_g.to_reg(), *reg_e, rex_flags); + } + RegMem::Mem { addr } => { + let addr = &addr.finalize(state); + emit_std_reg_mem(sink, prefix, opcode, 2, reg_g.to_reg(), addr, rex_flags); + } + } + } + Inst::Push64 { src } => { match src { RegMemImm::Reg { reg } => { diff --git a/cranelift/codegen/src/isa/x64/inst/emit_tests.rs b/cranelift/codegen/src/isa/x64/inst/emit_tests.rs index 96f350d43d..614efaa1b7 100644 --- a/cranelift/codegen/src/isa/x64/inst/emit_tests.rs +++ b/cranelift/codegen/src/isa/x64/inst/emit_tests.rs @@ -2481,6 +2481,44 @@ fn test_x64_emit() { insns.push((Inst::setcc(CC::Z, w_r14), "410F94C6", "setz %r14b")); insns.push((Inst::setcc(CC::LE, w_r14), "410F9EC6", "setle %r14b")); + // ======================================================== + // Cmove + insns.push(( + Inst::cmove(2, CC::O, RegMem::reg(rdi), w_rsi), + "660F40F7", + "cmovow %di, %si", + )); + insns.push(( + Inst::cmove( + 2, + CC::NO, + RegMem::mem(Amode::imm_reg_reg_shift(37, rdi, rsi, 2)), + w_r15, + ), + "66440F417CB725", + "cmovnow 37(%rdi,%rsi,4), %r15w", + )); + insns.push(( + Inst::cmove(4, CC::LE, RegMem::reg(rdi), w_rsi), + "0F4EF7", + "cmovlel %edi, %esi", + )); + insns.push(( + Inst::cmove(4, CC::NLE, RegMem::mem(Amode::imm_reg(0, r15)), w_rsi), + "410F4F37", + "cmovnlel 0(%r15), %esi", + )); + insns.push(( + Inst::cmove(8, CC::Z, RegMem::reg(rdi), w_r14), + "4C0F44F7", + "cmovzq %rdi, %r14", + )); + insns.push(( + Inst::cmove(8, CC::NZ, RegMem::mem(Amode::imm_reg(13, rdi)), w_r14), + "4C0F45770D", + "cmovnzq 13(%rdi), %r14", + )); + // ======================================================== // Push64 insns.push((Inst::push64(RegMemImm::reg(rdi)), "57", "pushq %rdi")); diff --git a/cranelift/codegen/src/isa/x64/inst/mod.rs b/cranelift/codegen/src/isa/x64/inst/mod.rs index bf778a1e40..77c3a73cc3 100644 --- a/cranelift/codegen/src/isa/x64/inst/mod.rs +++ b/cranelift/codegen/src/isa/x64/inst/mod.rs @@ -118,6 +118,16 @@ pub enum Inst { /// Materializes the requested condition code in the destination reg. Setcc { cc: CC, dst: Writable }, + /// Integer conditional move. + /// Overwrites the destination register. + Cmove { + /// Possible values are 2, 4 or 8. Checked in the related factory. + size: u8, + cc: CC, + src: RegMem, + dst: Writable, + }, + // ===================================== // Stack manipulation. /// pushq (reg addr imm) @@ -350,6 +360,12 @@ impl Inst { Inst::Setcc { cc, dst } } + pub(crate) fn cmove(size: u8, cc: CC, src: RegMem, dst: Writable) -> Inst { + debug_assert!(size == 8 || size == 4 || size == 2); + debug_assert!(dst.to_reg().get_class() == RegClass::I64); + Inst::Cmove { size, cc, src, dst } + } + pub(crate) fn push64(src: RegMemImm) -> Inst { Inst::Push64 { src } } @@ -585,6 +601,12 @@ impl ShowWithRRU for Inst { ljustify2("set".to_string(), cc.to_string()), show_ireg_sized(dst.to_reg(), mb_rru, 1) ), + Inst::Cmove { size, cc, src, dst } => format!( + "{} {}, {}", + ljustify(format!("cmov{}{}", cc.to_string(), suffixBWLQ(*size))), + src.show_rru_sized(mb_rru, *size), + show_ireg_sized(dst.to_reg(), mb_rru, *size) + ), Inst::Push64 { src } => { format!("{} {}", ljustify("pushq".to_string()), src.show_rru(mb_rru)) } @@ -701,6 +723,10 @@ fn x64_get_regs(inst: &Inst, collector: &mut RegUsageCollector) { Inst::Setcc { dst, .. } => { collector.add_def(*dst); } + Inst::Cmove { src, dst, .. } => { + src.get_regs_as_uses(collector); + collector.add_def(*dst); + } Inst::Push64 { src } => { src.get_regs_as_uses(collector); collector.add_mod(Writable::from_reg(regs::rsp())); @@ -899,6 +925,14 @@ fn x64_map_regs(inst: &mut Inst, mapper: &RUM) { map_use(mapper, dst); } Inst::Setcc { ref mut dst, .. } => map_def(mapper, dst), + Inst::Cmove { + ref mut src, + ref mut dst, + .. + } => { + src.map_uses(mapper); + map_def(mapper, dst) + } Inst::Push64 { ref mut src } => src.map_uses(mapper), Inst::Pop64 { ref mut dst } => { map_def(mapper, dst); diff --git a/cranelift/codegen/src/isa/x64/lower.rs b/cranelift/codegen/src/isa/x64/lower.rs index 0411fb904b..a9ff7b1e94 100644 --- a/cranelift/codegen/src/isa/x64/lower.rs +++ b/cranelift/codegen/src/isa/x64/lower.rs @@ -123,6 +123,11 @@ fn input_to_reg<'a>(ctx: Ctx<'a>, spec: InsnInput) -> Reg { inputs.reg } +fn input_to_reg_mem(ctx: Ctx, spec: InsnInput) -> RegMem { + // TODO handle memory. + RegMem::reg(input_to_reg(ctx, spec)) +} + /// Try to use an immediate for constant inputs, and a register otherwise. /// TODO: handle memory as well! fn input_to_reg_mem_imm(ctx: Ctx, spec: InsnInput) -> RegMemImm { @@ -146,6 +151,20 @@ fn output_to_reg<'a>(ctx: Ctx<'a>, spec: InsnOutput) -> Writable { ctx.get_output(spec.insn, spec.output) } +fn emit_cmp(ctx: Ctx, insn: IRInst) { + let ty = ctx.input_ty(insn, 0); + + let inputs = [InsnInput { insn, input: 0 }, InsnInput { insn, input: 1 }]; + + // TODO Try to commute the operands (and invert the condition) if one is an immediate. + let lhs = input_to_reg(ctx, inputs[0]); + let rhs = input_to_reg_mem_imm(ctx, inputs[1]); + + // Cranelift's icmp semantics want to compare lhs - rhs, while Intel gives + // us dst - src at the machine instruction level, so invert operands. + ctx.emit(Inst::cmp_rmi_r(ty.bytes() as u8, rhs, lhs)); +} + //============================================================================= // Top-level instruction lowering entry point, for one instruction. @@ -269,18 +288,11 @@ fn lower_insn_to_regs>(ctx: &mut C, insn: IRInst) -> Codeg } Opcode::Icmp => { + emit_cmp(ctx, insn); + let condcode = inst_condcode(ctx.data(insn)); let cc = CC::from_intcc(condcode); - let ty = ctx.input_ty(insn, 0); - - // TODO Try to commute the operands (and invert the condition) if one is an immediate. - let lhs = input_to_reg(ctx, inputs[0]); - let rhs = input_to_reg_mem_imm(ctx, inputs[1]); let dst = output_to_reg(ctx, outputs[0]); - - // Cranelift's icmp semantics want to compare lhs - rhs, while Intel gives - // us dst - src at the machine instruction level, so invert operands. - ctx.emit(Inst::cmp_rmi_r(ty.bytes() as u8, rhs, lhs)); ctx.emit(Inst::setcc(cc, dst)); } @@ -603,6 +615,47 @@ fn lower_insn_to_regs>(ctx: &mut C, insn: IRInst) -> Codeg ctx.emit(inst); } + Opcode::Select | Opcode::Selectif => { + let cc = if op == Opcode::Select { + // The input is a boolean value, compare it against zero. + let size = ctx.input_ty(insn, 0).bytes() as u8; + let test = input_to_reg(ctx, inputs[0]); + ctx.emit(Inst::cmp_rmi_r(size, RegMemImm::imm(0), test)); + + CC::NZ + } else { + // Verification ensures that the input is always a single-def ifcmp. + let cmp_insn = ctx + .get_input(inputs[0].insn, inputs[0].input) + .inst + .unwrap() + .0; + debug_assert_eq!(ctx.data(cmp_insn).opcode(), Opcode::Ifcmp); + emit_cmp(ctx, cmp_insn); + + CC::from_intcc(inst_condcode(ctx.data(insn))) + }; + + let lhs = input_to_reg_mem(ctx, inputs[1]); + let rhs = input_to_reg(ctx, inputs[2]); + let dst = output_to_reg(ctx, outputs[0]); + + let ty = ctx.output_ty(insn, 0); + assert!(is_int_ty(ty), "float cmov NYI"); + + let size = ty.bytes() as u8; + if size == 1 { + // Sign-extend operands to 32, then do a cmove of size 4. + let lhs_se = ctx.alloc_tmp(RegClass::I64, I32); + ctx.emit(Inst::movsx_rm_r(ExtMode::BL, lhs, lhs_se)); + ctx.emit(Inst::movsx_rm_r(ExtMode::BL, RegMem::reg(rhs), dst)); + ctx.emit(Inst::cmove(4, cc, RegMem::reg(lhs_se.to_reg()), dst)); + } else { + ctx.emit(Inst::gen_move(dst, rhs, ty)); + ctx.emit(Inst::cmove(size, cc, lhs, dst)); + } + } + Opcode::IaddImm | Opcode::ImulImm | Opcode::UdivImm