Browse Source

machinst x64: use xor/xorpss/xorpd to generate zero constants;

pull/2086/head
Benjamin Bouvier 4 years ago
committed by Andrew Brown
parent
commit
e108f14620
  1. 15
      cranelift/codegen/src/isa/x64/inst/args.rs
  2. 20
      cranelift/codegen/src/isa/x64/inst/emit.rs
  3. 157
      cranelift/codegen/src/isa/x64/inst/mod.rs
  4. 4
      cranelift/codegen/src/isa/x64/lower.rs

15
cranelift/codegen/src/isa/x64/inst/args.rs

@ -210,6 +210,13 @@ impl RegMemImm {
Self::Imm { .. } => {}
}
}
pub(crate) fn to_reg(&self) -> Option<Reg> {
match self {
Self::Reg { reg } => Some(*reg),
_ => None,
}
}
}
impl ShowWithRRU for RegMemImm {
@ -255,6 +262,12 @@ impl RegMem {
RegMem::Mem { addr, .. } => addr.get_regs_as_uses(collector),
}
}
pub(crate) fn to_reg(&self) -> Option<Reg> {
match self {
RegMem::Reg { reg } => Some(*reg),
_ => None,
}
}
}
impl ShowWithRRU for RegMem {
@ -271,7 +284,7 @@ impl ShowWithRRU for RegMem {
}
/// Some basic ALU operations. TODO: maybe add Adc, Sbb.
#[derive(Clone, PartialEq)]
#[derive(Copy, Clone, PartialEq)]
pub enum AluRmiROpcode {
Add,
Sub,

20
cranelift/codegen/src/isa/x64/inst/emit.rs

@ -2044,8 +2044,7 @@ pub(crate) fn emit(
//
// ;; positive inputs get saturated to INT_MAX; negative ones to INT_MIN, which is
// ;; already in %dst.
// mov 0, %tmp_gpr
// movd/movq %tmp_gpr, %tmp_xmm
// xorpd %tmp_xmm, %tmp_xmm
// cmpss/cmpsd %src, %tmp_xmm
// jnb done
// mov/movaps $INT_MAX, %dst
@ -2069,8 +2068,7 @@ pub(crate) fn emit(
//
// ;; if positive, it was a real overflow
// check_positive:
// mov 0, %tmp_gpr
// movd/movq %tmp_gpr, %tmp_xmm
// xorpd %tmp_xmm, %tmp_xmm
// cmpss/cmpsd %src, %tmp_xmm
// jnb done
// ud2 trap IntegerOverflow
@ -2120,11 +2118,10 @@ pub(crate) fn emit(
sink.bind_label(not_nan);
// If the input was positive, saturate to INT_MAX.
// TODO use xorps/xorpd here
let inst = Inst::imm_r(false, 0, *tmp_gpr); // rely on sign-extension to get 0 on 64-bits
inst.emit(sink, flags, state);
// Zero out tmp_xmm.
let inst =
Inst::gpr_to_xmm(cast_op, RegMem::reg(tmp_gpr.to_reg()), *src_size, *tmp_xmm);
Inst::xmm_rm_r(SseOpcode::Xorpd, RegMem::reg(tmp_xmm.to_reg()), *tmp_xmm);
inst.emit(sink, flags, state);
let inst = Inst::xmm_cmp_rm_r(cmp_op, RegMem::reg(src), tmp_xmm.to_reg());
@ -2194,12 +2191,9 @@ pub(crate) fn emit(
sink.bind_label(check_positive);
// TODO use xorpd
let inst = Inst::imm_r(false, 0, *tmp_gpr);
inst.emit(sink, flags, state);
// Zero out the tmp_xmm register.
let inst =
Inst::gpr_to_xmm(cast_op, RegMem::reg(tmp_gpr.to_reg()), *src_size, *tmp_xmm);
Inst::xmm_rm_r(SseOpcode::Xorpd, RegMem::reg(tmp_xmm.to_reg()), *tmp_xmm);
inst.emit(sink, flags, state);
let inst = Inst::xmm_cmp_rm_r(cmp_op, RegMem::reg(src), tmp_xmm.to_reg());

157
cranelift/codegen/src/isa/x64/inst/mod.rs

@ -939,6 +939,40 @@ impl Inst {
}
}
// Inst helpers.
impl Inst {
/// In certain cases, instructions of this format can act as a definition of an XMM register,
/// producing a value that is independent of its initial value.
///
/// For example, a vector equality comparison (`cmppd` or `cmpps`) that compares a register to
/// itself will generate all ones as a result, regardless of its value. From the register
/// allocator's point of view, we should (i) record the first register, which is normally a
/// mod, as a def instead; and (ii) not record the second register as a use, because it is the
/// same as the first register (already handled).
fn produces_const(&self) -> bool {
match self {
Self::Alu_RMI_R { op, src, dst, .. } => {
src.to_reg() == Some(dst.to_reg())
&& (*op == AluRmiROpcode::Xor || *op == AluRmiROpcode::Sub)
}
Self::XMM_RM_R { op, src, dst, .. } => {
src.to_reg() == Some(dst.to_reg())
&& (*op == SseOpcode::Xorps || *op == SseOpcode::Xorpd)
}
Self::XmmRmRImm { op, src, dst, imm } => {
src.to_reg() == Some(dst.to_reg())
&& (*op == SseOpcode::Cmppd || *op == SseOpcode::Cmpps)
&& *imm == FcmpImm::Equal.encode()
}
_ => false,
}
}
}
//=============================================================================
// Instructions: printing
@ -1433,8 +1467,13 @@ fn x64_get_regs(inst: &Inst, collector: &mut RegUsageCollector) {
// sets.
match inst {
Inst::Alu_RMI_R { src, dst, .. } => {
src.get_regs_as_uses(collector);
collector.add_mod(*dst);
if inst.produces_const() {
// No need to account for src, since src == dst.
collector.add_def(*dst);
} else {
src.get_regs_as_uses(collector);
collector.add_mod(*dst);
}
}
Inst::Div { divisor, .. } => {
collector.add_mod(Writable::from_reg(regs::rax()));
@ -1466,26 +1505,17 @@ fn x64_get_regs(inst: &Inst, collector: &mut RegUsageCollector) {
collector.add_def(*dst);
}
Inst::XMM_RM_R { src, dst, .. } => {
src.get_regs_as_uses(collector);
collector.add_mod(*dst);
}
Inst::XmmRmRImm { src, dst, op, imm } => {
// In certain cases, instructions of this format can act as a definition of an XMM
// register, producing a value that is independent of its initial value. For example,
// a vector equality comparison (`cmppd` or `cmpps`) that compares a register to itself
// will generate all ones as a result, regardless of its value. From the register
// allocator's point of view, we should (i) record the first register, which is normally
// a mod, as a def instread; and (ii) not record the second register as a use, because
// it is the same as the first register (already handled). TODO Re-factored in #2071.
let is_def = if let RegMem::Reg { reg } = src {
(*op == SseOpcode::Cmppd || *op == SseOpcode::Cmpps)
&& *imm == FcmpImm::Equal.encode()
&& *reg == dst.to_reg()
if inst.produces_const() {
// No need to account for src, since src == dst.
collector.add_def(*dst);
} else {
false
};
if is_def {
src.get_regs_as_uses(collector);
collector.add_mod(*dst);
}
}
Inst::XmmRmRImm { src, dst, .. } => {
if inst.produces_const() {
// No need to account for src, since src == dst.
collector.add_def(*dst);
} else {
src.get_regs_as_uses(collector);
@ -1694,6 +1724,17 @@ impl RegMemImm {
RegMemImm::Imm { .. } => {}
}
}
fn map_as_def<RUM: RegUsageMapper>(&mut self, mapper: &RUM) {
match self {
Self::Reg { reg } => {
let mut writable_src = Writable::from_reg(*reg);
map_def(mapper, &mut writable_src);
*self = Self::reg(writable_src.to_reg());
}
_ => panic!("unexpected RegMemImm kind in map_src_reg_as_def"),
}
}
}
impl RegMem {
@ -1703,10 +1744,23 @@ impl RegMem {
RegMem::Mem { ref mut addr, .. } => addr.map_uses(map),
}
}
fn map_as_def<RUM: RegUsageMapper>(&mut self, mapper: &RUM) {
match self {
Self::Reg { reg } => {
let mut writable_src = Writable::from_reg(*reg);
map_def(mapper, &mut writable_src);
*self = Self::reg(writable_src.to_reg());
}
_ => panic!("unexpected RegMem kind in map_src_reg_as_def"),
}
}
}
fn x64_map_regs<RUM: RegUsageMapper>(inst: &mut Inst, mapper: &RUM) {
// Note this must be carefully synchronized with x64_get_regs.
let produces_const = inst.produces_const();
match inst {
// ** Nop
Inst::Alu_RMI_R {
@ -1714,8 +1768,13 @@ fn x64_map_regs<RUM: RegUsageMapper>(inst: &mut Inst, mapper: &RUM) {
ref mut dst,
..
} => {
src.map_uses(mapper);
map_mod(mapper, dst);
if produces_const {
src.map_as_def(mapper);
map_def(mapper, dst);
} else {
src.map_uses(mapper);
map_mod(mapper, dst);
}
}
Inst::Div { divisor, .. } => divisor.map_uses(mapper),
Inst::MulHi { rhs, .. } => rhs.map_uses(mapper),
@ -1742,28 +1801,12 @@ fn x64_map_regs<RUM: RegUsageMapper>(inst: &mut Inst, mapper: &RUM) {
Inst::XmmRmRImm {
ref mut src,
ref mut dst,
ref op,
ref imm,
..
} => {
// In certain cases, instructions of this format can convert an XMM register into a
// define (e.g. an equality comparison); this extra logic is necessary to inform the
// registry allocator of a different register usage. TODO Re-factored in #2071.
if let RegMem::Reg { reg } = src {
if (*op == SseOpcode::Cmppd || *op == SseOpcode::Cmpps)
&& *imm == FcmpImm::Equal.encode()
&& *reg == dst.to_reg()
{
let mut writable_src = Writable::from_reg(*reg);
map_def(mapper, &mut writable_src);
*reg = writable_src.to_reg();
map_def(mapper, dst);
} else {
// Otherwise, we map the instruction as usual.
src.map_uses(mapper);
map_mod(mapper, dst);
}
if produces_const {
src.map_as_def(mapper);
map_def(mapper, dst);
} else {
// TODO this is duplicated because there seems to be no way to join the `if let` and `if`?
src.map_uses(mapper);
map_mod(mapper, dst);
}
@ -1773,8 +1816,13 @@ fn x64_map_regs<RUM: RegUsageMapper>(inst: &mut Inst, mapper: &RUM) {
ref mut dst,
..
} => {
src.map_uses(mapper);
map_mod(mapper, dst);
if produces_const {
src.map_as_def(mapper);
map_def(mapper, dst);
} else {
src.map_uses(mapper);
map_mod(mapper, dst);
}
}
Inst::XmmRmiReg {
ref mut src,
@ -2097,8 +2145,23 @@ impl MachInst for Inst {
) -> SmallVec<[Self; 4]> {
let mut ret = SmallVec::new();
if ty.is_int() {
let is_64 = ty == I64 && value > 0x7fffffff;
ret.push(Inst::imm_r(is_64, value, to_reg));
if value == 0 {
ret.push(Inst::alu_rmi_r(
ty == I64,
AluRmiROpcode::Xor,
RegMemImm::reg(to_reg.to_reg()),
to_reg,
));
} else {
let is_64 = ty == I64 && value > 0x7fffffff;
ret.push(Inst::imm_r(is_64, value, to_reg));
}
} else if value == 0 {
ret.push(Inst::xmm_rm_r(
SseOpcode::Xorps,
RegMem::reg(to_reg.to_reg()),
to_reg,
));
} else {
match ty {
F32 => {

4
cranelift/codegen/src/isa/x64/lower.rs

@ -1043,7 +1043,7 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
}
Opcode::F64const => {
// TODO use xorpd for 0 and cmpeqpd for all 1s.
// TODO use cmpeqpd for all 1s.
let value = ctx.get_constant(insn).unwrap();
let dst = output_to_reg(ctx, outputs[0]);
for inst in Inst::gen_constant(dst, value, F64, |reg_class, ty| {
@ -1054,7 +1054,7 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
}
Opcode::F32const => {
// TODO use xorps for 0 and cmpeqps for all 1s.
// TODO use cmpeqps for all 1s.
let value = ctx.get_constant(insn).unwrap();
let dst = output_to_reg(ctx, outputs[0]);
for inst in Inst::gen_constant(dst, value, F32, |reg_class, ty| {

Loading…
Cancel
Save