Browse Source

Fix AArch64 ABI to respect half-caller-save, half-callee-save vec regs.

This PR updates the AArch64 ABI implementation so that it (i) properly
respects that v8-v15 inclusive have callee-save lower halves, and
caller-save upper halves, by conservatively approximating (to full
registers) in the appropriate directions when generating prologue
caller-saves and when informing the regalloc of clobbered regs across
callsites.

In order to prevent saving all of these vector registers in the prologue
of every non-leaf function due to the above approximation, this also
makes use of a new regalloc.rs feature to exclude call instructions'
writes from the clobber set returned by register allocation. This is
safe whenever the caller and callee have the same ABI (because anything
the callee could clobber, the caller is allowed to clobber as well
without saving it in the prologue).

Fixes #2254.
pull/2267/head
Chris Fallin 4 years ago
parent
commit
71768bb6cf
  1. 4
      Cargo.lock
  2. 2
      cranelift/codegen/Cargo.toml
  3. 50
      cranelift/codegen/src/isa/aarch64/abi.rs
  4. 16
      cranelift/codegen/src/isa/aarch64/inst/args.rs
  5. 5
      cranelift/codegen/src/isa/aarch64/inst/emit_tests.rs
  6. 25
      cranelift/codegen/src/isa/aarch64/inst/imms.rs
  7. 31
      cranelift/codegen/src/isa/aarch64/inst/mod.rs
  8. 5
      cranelift/codegen/src/isa/aarch64/inst/regs.rs
  9. 8
      cranelift/codegen/src/isa/aarch64/lower_inst.rs
  10. 6
      cranelift/codegen/src/isa/aarch64/mod.rs
  11. 8
      cranelift/codegen/src/isa/arm32/abi.rs
  12. 14
      cranelift/codegen/src/isa/arm32/inst/args.rs
  13. 4
      cranelift/codegen/src/isa/arm32/inst/mod.rs
  14. 8
      cranelift/codegen/src/isa/arm32/lower_inst.rs
  15. 6
      cranelift/codegen/src/isa/arm32/mod.rs
  16. 6
      cranelift/codegen/src/isa/x64/abi.rs
  17. 19
      cranelift/codegen/src/isa/x64/inst/args.rs
  18. 6
      cranelift/codegen/src/isa/x64/inst/mod.rs
  19. 6
      cranelift/codegen/src/isa/x64/inst/regs.rs
  20. 11
      cranelift/codegen/src/isa/x64/lower.rs
  21. 6
      cranelift/codegen/src/isa/x64/mod.rs
  22. 4
      cranelift/codegen/src/machinst/abi.rs
  23. 34
      cranelift/codegen/src/machinst/abi_impl.rs
  24. 4
      cranelift/codegen/src/machinst/compile.rs
  25. 7
      cranelift/codegen/src/machinst/mod.rs
  26. 66
      cranelift/codegen/src/machinst/pretty_print.rs
  27. 10
      cranelift/codegen/src/machinst/vcode.rs
  28. 61
      cranelift/filetests/filetests/isa/aarch64/call.clif
  29. 99
      cranelift/filetests/filetests/isa/aarch64/prologue.clif

4
Cargo.lock

@ -1692,9 +1692,9 @@ dependencies = [
[[package]]
name = "regalloc"
version = "0.0.30"
version = "0.0.31"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2041c2d34f6ff346d6f428974f03d8bf12679b0c816bb640dc5eb1d48848d8d1"
checksum = "571f7f397d61c4755285cd37853fe8e03271c243424a907415909379659381c5"
dependencies = [
"log",
"rustc-hash",

2
cranelift/codegen/Cargo.toml

@ -28,7 +28,7 @@ byteorder = { version = "1.3.2", default-features = false }
peepmatic = { path = "../peepmatic", optional = true, version = "0.67.0" }
peepmatic-traits = { path = "../peepmatic/crates/traits", optional = true, version = "0.67.0" }
peepmatic-runtime = { path = "../peepmatic/crates/runtime", optional = true, version = "0.67.0" }
regalloc = "0.0.30"
regalloc = { version = "0.0.31" }
souper-ir = { version = "1", optional = true }
wast = { version = "25.0.0", optional = true }
# It is a goal of the cranelift-codegen crate to have minimal external dependencies.

50
cranelift/codegen/src/isa/aarch64/abi.rs

@ -510,7 +510,7 @@ impl ABIMachineSpec for AArch64MachineDeps {
fixed_frame_storage_size: u32,
) -> (u64, SmallVec<[Inst; 16]>) {
let mut insts = SmallVec::new();
let (clobbered_int, clobbered_vec) = get_callee_saves(call_conv, clobbers);
let (clobbered_int, clobbered_vec) = get_regs_saved_in_prologue(call_conv, clobbers);
let (int_save_bytes, vec_save_bytes) = saved_reg_stack_size(&clobbered_int, &clobbered_vec);
let total_save_bytes = (vec_save_bytes + int_save_bytes) as i32;
@ -561,7 +561,7 @@ impl ABIMachineSpec for AArch64MachineDeps {
clobbers: &Set<Writable<RealReg>>,
) -> SmallVec<[Inst; 16]> {
let mut insts = SmallVec::new();
let (clobbered_int, clobbered_vec) = get_callee_saves(call_conv, clobbers);
let (clobbered_int, clobbered_vec) = get_regs_saved_in_prologue(call_conv, clobbers);
let (int_save_bytes, vec_save_bytes) = saved_reg_stack_size(&clobbered_int, &clobbered_vec);
for (i, reg_pair) in clobbered_int.chunks(2).enumerate() {
@ -629,6 +629,8 @@ impl ABIMachineSpec for AArch64MachineDeps {
loc: SourceLoc,
opcode: ir::Opcode,
tmp: Writable<Reg>,
callee_conv: isa::CallConv,
caller_conv: isa::CallConv,
) -> SmallVec<[(InstIsSafepoint, Inst); 2]> {
let mut insts = SmallVec::new();
match &dest {
@ -641,6 +643,8 @@ impl ABIMachineSpec for AArch64MachineDeps {
defs,
loc,
opcode,
caller_callconv: caller_conv,
callee_callconv: callee_conv,
}),
},
)),
@ -663,6 +667,8 @@ impl ABIMachineSpec for AArch64MachineDeps {
defs,
loc,
opcode,
caller_callconv: caller_conv,
callee_callconv: callee_conv,
}),
},
));
@ -676,6 +682,8 @@ impl ABIMachineSpec for AArch64MachineDeps {
defs,
loc,
opcode,
caller_callconv: caller_conv,
callee_callconv: callee_conv,
}),
},
)),
@ -704,17 +712,17 @@ impl ABIMachineSpec for AArch64MachineDeps {
s.nominal_sp_to_fp
}
fn get_caller_saves(call_conv: isa::CallConv) -> Vec<Writable<Reg>> {
fn get_regs_clobbered_by_call(call_conv_of_callee: isa::CallConv) -> Vec<Writable<Reg>> {
let mut caller_saved = Vec::new();
for i in 0..29 {
let x = writable_xreg(i);
if is_caller_save_reg(call_conv, x.to_reg().to_real_reg()) {
if is_reg_clobbered_by_call(call_conv_of_callee, x.to_reg().to_real_reg()) {
caller_saved.push(x);
}
}
for i in 0..32 {
let v = writable_vreg(i);
if is_caller_save_reg(call_conv, v.to_reg().to_real_reg()) {
if is_reg_clobbered_by_call(call_conv_of_callee, v.to_reg().to_real_reg()) {
caller_saved.push(v);
}
}
@ -731,7 +739,9 @@ fn legal_type_for_machine(ty: Type) -> bool {
}
}
fn is_callee_save_reg(call_conv: isa::CallConv, r: RealReg) -> bool {
/// Is the given register saved in the prologue if clobbered, i.e., is it a
/// callee-save?
fn is_reg_saved_in_prologue(call_conv: isa::CallConv, r: RealReg) -> bool {
if call_conv.extends_baldrdash() {
match r.get_class() {
RegClass::I64 => {
@ -759,14 +769,17 @@ fn is_callee_save_reg(call_conv: isa::CallConv, r: RealReg) -> bool {
}
}
fn get_callee_saves(
/// Return the set of all integer and vector registers that must be saved in the
/// prologue and restored in the epilogue, given the set of all registers
/// written by the function's body.
fn get_regs_saved_in_prologue(
call_conv: isa::CallConv,
regs: &Set<Writable<RealReg>>,
) -> (Vec<Writable<RealReg>>, Vec<Writable<RealReg>>) {
let mut int_saves = vec![];
let mut vec_saves = vec![];
for &reg in regs.iter() {
if is_callee_save_reg(call_conv, reg.to_reg()) {
if is_reg_saved_in_prologue(call_conv, reg.to_reg()) {
match reg.to_reg().get_class() {
RegClass::I64 => int_saves.push(reg),
RegClass::V128 => vec_saves.push(reg),
@ -781,8 +794,8 @@ fn get_callee_saves(
(int_saves, vec_saves)
}
fn is_caller_save_reg(call_conv: isa::CallConv, r: RealReg) -> bool {
if call_conv.extends_baldrdash() {
fn is_reg_clobbered_by_call(call_conv_of_callee: isa::CallConv, r: RealReg) -> bool {
if call_conv_of_callee.extends_baldrdash() {
match r.get_class() {
RegClass::I64 => {
let enc = r.get_hw_encoding();
@ -808,8 +821,21 @@ fn is_caller_save_reg(call_conv: isa::CallConv, r: RealReg) -> bool {
r.get_hw_encoding() <= 17
}
RegClass::V128 => {
// v0 - v7 inclusive and v16 - v31 inclusive are caller-saves.
r.get_hw_encoding() <= 7 || (r.get_hw_encoding() >= 16 && r.get_hw_encoding() <= 31)
// v0 - v7 inclusive and v16 - v31 inclusive are caller-saves. The
// upper 64 bits of v8 - v15 inclusive are also caller-saves.
// However, because we cannot currently represent partial registers
// to regalloc.rs, we indicate here that every vector register is
// caller-save. Because this function is used at *callsites*,
// approximating in this direction (save more than necessary) is
// conservative and thus safe.
//
// Note that we set the 'not included in clobber set' flag in the
// regalloc.rs API when a call instruction's callee has the same ABI
// as the caller (the current function body); this is safe (anything
// clobbered by callee can be clobbered by caller as well) and
// avoids unnecessary saves of v8-v15 in the prologue even though we
// include them as defs here.
true
}
_ => panic!("Unexpected RegClass"),
}

16
cranelift/codegen/src/isa/aarch64/inst/args.rs

@ -8,7 +8,7 @@ use crate::ir::Type;
use crate::isa::aarch64::inst::*;
use crate::machinst::{ty_bits, MachLabel};
use regalloc::{RealRegUniverse, Reg, Writable};
use regalloc::{PrettyPrint, RealRegUniverse, Reg, Writable};
use core::convert::Into;
use std::string::String;
@ -348,19 +348,19 @@ impl BranchTarget {
}
}
impl ShowWithRRU for ShiftOpAndAmt {
impl PrettyPrint for ShiftOpAndAmt {
fn show_rru(&self, _mb_rru: Option<&RealRegUniverse>) -> String {
format!("{:?} {}", self.op(), self.amt().value())
}
}
impl ShowWithRRU for ExtendOp {
impl PrettyPrint for ExtendOp {
fn show_rru(&self, _mb_rru: Option<&RealRegUniverse>) -> String {
format!("{:?}", self)
}
}
impl ShowWithRRU for MemLabel {
impl PrettyPrint for MemLabel {
fn show_rru(&self, _mb_rru: Option<&RealRegUniverse>) -> String {
match self {
&MemLabel::PCRel(off) => format!("pc+{}", off),
@ -379,7 +379,7 @@ fn shift_for_type(ty: Type) -> usize {
}
}
impl ShowWithRRU for AMode {
impl PrettyPrint for AMode {
fn show_rru(&self, mb_rru: Option<&RealRegUniverse>) -> String {
match self {
&AMode::Unscaled(reg, simm9) => {
@ -458,7 +458,7 @@ impl ShowWithRRU for AMode {
}
}
impl ShowWithRRU for PairAMode {
impl PrettyPrint for PairAMode {
fn show_rru(&self, mb_rru: Option<&RealRegUniverse>) -> String {
match self {
&PairAMode::SignedOffset(reg, simm7) => {
@ -482,7 +482,7 @@ impl ShowWithRRU for PairAMode {
}
}
impl ShowWithRRU for Cond {
impl PrettyPrint for Cond {
fn show_rru(&self, _mb_rru: Option<&RealRegUniverse>) -> String {
let mut s = format!("{:?}", self);
s.make_ascii_lowercase();
@ -490,7 +490,7 @@ impl ShowWithRRU for Cond {
}
}
impl ShowWithRRU for BranchTarget {
impl PrettyPrint for BranchTarget {
fn show_rru(&self, _mb_rru: Option<&RealRegUniverse>) -> String {
match self {
&BranchTarget::Label(label) => format!("label{:?}", label.get()),

5
cranelift/codegen/src/isa/aarch64/inst/emit_tests.rs

@ -1,6 +1,7 @@
use crate::ir::types::*;
use crate::isa::aarch64::inst::*;
use crate::isa::test_utils;
use crate::isa::CallConv;
use crate::settings;
use alloc::boxed::Box;
@ -3789,6 +3790,8 @@ fn test_aarch64_binemit() {
defs: Vec::new(),
loc: SourceLoc::default(),
opcode: Opcode::Call,
caller_callconv: CallConv::SystemV,
callee_callconv: CallConv::SystemV,
}),
},
"00000094",
@ -3803,6 +3806,8 @@ fn test_aarch64_binemit() {
defs: Vec::new(),
loc: SourceLoc::default(),
opcode: Opcode::CallIndirect,
caller_callconv: CallConv::SystemV,
callee_callconv: CallConv::SystemV,
}),
},
"40013FD6",

25
cranelift/codegen/src/isa/aarch64/inst/imms.rs

@ -5,9 +5,8 @@
use crate::ir::types::*;
use crate::ir::Type;
use crate::isa::aarch64::inst::OperandSize;
use crate::machinst::*;
use regalloc::RealRegUniverse;
use regalloc::{PrettyPrint, RealRegUniverse};
use core::convert::TryFrom;
use std::string::String;
@ -668,7 +667,7 @@ impl MoveWideConst {
}
}
impl ShowWithRRU for NZCV {
impl PrettyPrint for NZCV {
fn show_rru(&self, _mb_rru: Option<&RealRegUniverse>) -> String {
let fmt = |c: char, v| if v { c.to_ascii_uppercase() } else { c };
format!(
@ -681,13 +680,13 @@ impl ShowWithRRU for NZCV {
}
}
impl ShowWithRRU for UImm5 {
impl PrettyPrint for UImm5 {
fn show_rru(&self, _mb_rru: Option<&RealRegUniverse>) -> String {
format!("#{}", self.value)
}
}
impl ShowWithRRU for Imm12 {
impl PrettyPrint for Imm12 {
fn show_rru(&self, _mb_rru: Option<&RealRegUniverse>) -> String {
let shift = if self.shift12 { 12 } else { 0 };
let value = u32::from(self.bits) << shift;
@ -695,49 +694,49 @@ impl ShowWithRRU for Imm12 {
}
}
impl ShowWithRRU for SImm7Scaled {
impl PrettyPrint for SImm7Scaled {
fn show_rru(&self, _mb_rru: Option<&RealRegUniverse>) -> String {
format!("#{}", self.value)
}
}
impl ShowWithRRU for FPULeftShiftImm {
impl PrettyPrint for FPULeftShiftImm {
fn show_rru(&self, _mb_rru: Option<&RealRegUniverse>) -> String {
format!("#{}", self.amount)
}
}
impl ShowWithRRU for FPURightShiftImm {
impl PrettyPrint for FPURightShiftImm {
fn show_rru(&self, _mb_rru: Option<&RealRegUniverse>) -> String {
format!("#{}", self.amount)
}
}
impl ShowWithRRU for SImm9 {
impl PrettyPrint for SImm9 {
fn show_rru(&self, _mb_rru: Option<&RealRegUniverse>) -> String {
format!("#{}", self.value)
}
}
impl ShowWithRRU for UImm12Scaled {
impl PrettyPrint for UImm12Scaled {
fn show_rru(&self, _mb_rru: Option<&RealRegUniverse>) -> String {
format!("#{}", self.value)
}
}
impl ShowWithRRU for ImmLogic {
impl PrettyPrint for ImmLogic {
fn show_rru(&self, _mb_rru: Option<&RealRegUniverse>) -> String {
format!("#{}", self.value())
}
}
impl ShowWithRRU for ImmShift {
impl PrettyPrint for ImmShift {
fn show_rru(&self, _mb_rru: Option<&RealRegUniverse>) -> String {
format!("#{}", self.imm)
}
}
impl ShowWithRRU for MoveWideConst {
impl PrettyPrint for MoveWideConst {
fn show_rru(&self, _mb_rru: Option<&RealRegUniverse>) -> String {
if self.shift == 0 {
format!("#{}", self.bits)

31
cranelift/codegen/src/isa/aarch64/inst/mod.rs

@ -9,10 +9,11 @@ use crate::ir::types::{
I32, I32X4, I64, I64X2, I8, I8X16, IFLAGS, R32, R64,
};
use crate::ir::{ExternalName, Opcode, SourceLoc, TrapCode, Type};
use crate::isa::CallConv;
use crate::machinst::*;
use crate::{settings, CodegenError, CodegenResult};
use regalloc::{RealRegUniverse, Reg, RegClass, SpillSlot, VirtualReg, Writable};
use regalloc::{PrettyPrint, RealRegUniverse, Reg, RegClass, SpillSlot, VirtualReg, Writable};
use regalloc::{RegUsageCollector, RegUsageMapper};
use alloc::boxed::Box;
@ -392,6 +393,8 @@ pub struct CallInfo {
pub defs: Vec<Writable<Reg>>,
pub loc: SourceLoc,
pub opcode: Opcode,
pub caller_callconv: CallConv,
pub callee_callconv: CallConv,
}
/// Additional information for CallInd instructions, left out of line to lower the size of the Inst
@ -403,6 +406,8 @@ pub struct CallIndInfo {
pub defs: Vec<Writable<Reg>>,
pub loc: SourceLoc,
pub opcode: Opcode,
pub caller_callconv: CallConv,
pub callee_callconv: CallConv,
}
/// Additional information for JTSequence instructions, left out of line to lower the size of the Inst
@ -2491,6 +2496,24 @@ impl MachInst for Inst {
}
}
fn is_included_in_clobbers(&self) -> bool {
// We exclude call instructions from the clobber-set when they are calls
// from caller to callee with the same ABI. Such calls cannot possibly
// force any new registers to be saved in the prologue, because anything
// that the callee clobbers, the caller is also allowed to clobber. This
// both saves work and enables us to more precisely follow the
// half-caller-save, half-callee-save SysV ABI for some vector
// registers.
//
// See the note in [crate::isa::aarch64::abi::is_caller_save_reg] for
// more information on this ABI-implementation hack.
match self {
&Inst::Call { ref info } => info.caller_callconv != info.callee_callconv,
&Inst::CallInd { ref info } => info.caller_callconv != info.callee_callconv,
_ => true,
}
}
fn is_term<'a>(&'a self) -> MachTerminator<'a> {
match self {
&Inst::Ret | &Inst::EpiloguePlaceholder => MachTerminator::Ret,
@ -2623,7 +2646,7 @@ fn mem_finalize_for_show(
(mem_str, mem)
}
impl ShowWithRRU for Inst {
impl PrettyPrint for Inst {
fn show_rru(&self, mb_rru: Option<&RealRegUniverse>) -> String {
self.pretty_print(mb_rru, &mut EmitState::default())
}
@ -2883,13 +2906,13 @@ impl Inst {
&Inst::StoreP64 { rt, rt2, ref mem } => {
let rt = rt.show_rru(mb_rru);
let rt2 = rt2.show_rru(mb_rru);
let mem = mem.show_rru_sized(mb_rru, /* size = */ 8);
let mem = mem.show_rru(mb_rru);
format!("stp {}, {}, {}", rt, rt2, mem)
}
&Inst::LoadP64 { rt, rt2, ref mem } => {
let rt = rt.to_reg().show_rru(mb_rru);
let rt2 = rt2.to_reg().show_rru(mb_rru);
let mem = mem.show_rru_sized(mb_rru, /* size = */ 8);
let mem = mem.show_rru(mb_rru);
format!("ldp {}, {}, {}", rt, rt2, mem)
}
&Inst::Mov64 { rd, rm } => {

5
cranelift/codegen/src/isa/aarch64/inst/regs.rs

@ -3,10 +3,11 @@
use crate::isa::aarch64::inst::OperandSize;
use crate::isa::aarch64::inst::ScalarSize;
use crate::isa::aarch64::inst::VectorSize;
use crate::machinst::*;
use crate::settings;
use regalloc::{RealRegUniverse, Reg, RegClass, RegClassInfo, Writable, NUM_REG_CLASSES};
use regalloc::{
PrettyPrint, RealRegUniverse, Reg, RegClass, RegClassInfo, Writable, NUM_REG_CLASSES,
};
use std::string::{String, ToString};

8
cranelift/codegen/src/isa/aarch64/lower_inst.rs

@ -1857,6 +1857,7 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
Opcode::Call | Opcode::CallIndirect => {
let loc = ctx.srcloc(insn);
let caller_conv = ctx.abi().call_conv();
let (mut abi, inputs) = match op {
Opcode::Call => {
let (extname, dist) = ctx.call_target(insn).unwrap();
@ -1865,7 +1866,7 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
assert!(inputs.len() == sig.params.len());
assert!(outputs.len() == sig.returns.len());
(
AArch64ABICaller::from_func(sig, &extname, dist, loc)?,
AArch64ABICaller::from_func(sig, &extname, dist, loc, caller_conv)?,
&inputs[..],
)
}
@ -1874,7 +1875,10 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
let sig = ctx.call_sig(insn).unwrap();
assert!(inputs.len() - 1 == sig.params.len());
assert!(outputs.len() == sig.returns.len());
(AArch64ABICaller::from_ptr(sig, ptr, loc, op)?, &inputs[1..])
(
AArch64ABICaller::from_ptr(sig, ptr, loc, op, caller_conv)?,
&inputs[1..],
)
}
_ => unreachable!(),
};

6
cranelift/codegen/src/isa/aarch64/mod.rs

@ -3,15 +3,13 @@
use crate::ir::condcodes::IntCC;
use crate::ir::Function;
use crate::isa::Builder as IsaBuilder;
use crate::machinst::{
compile, MachBackend, MachCompileResult, ShowWithRRU, TargetIsaAdapter, VCode,
};
use crate::machinst::{compile, MachBackend, MachCompileResult, TargetIsaAdapter, VCode};
use crate::result::CodegenResult;
use crate::settings;
use alloc::boxed::Box;
use regalloc::RealRegUniverse;
use regalloc::{PrettyPrint, RealRegUniverse};
use target_lexicon::{Aarch64Architecture, Architecture, Triple};
// New backend:

8
cranelift/codegen/src/isa/arm32/abi.rs

@ -361,6 +361,8 @@ impl ABIMachineSpec for Arm32MachineDeps {
loc: SourceLoc,
opcode: ir::Opcode,
tmp: Writable<Reg>,
_callee_conv: isa::CallConv,
_caller_conv: isa::CallConv,
) -> SmallVec<[(InstIsSafepoint, Inst); 2]> {
let mut insts = SmallVec::new();
match &dest {
@ -431,11 +433,11 @@ impl ABIMachineSpec for Arm32MachineDeps {
s.nominal_sp_to_fp
}
fn get_caller_saves(_call_conv: isa::CallConv) -> Vec<Writable<Reg>> {
fn get_regs_clobbered_by_call(_: isa::CallConv) -> Vec<Writable<Reg>> {
let mut caller_saved = Vec::new();
for i in 0..15 {
let r = writable_rreg(i);
if is_caller_save(r.to_reg().to_real_reg()) {
if is_reg_clobbered_by_call(r.to_reg().to_real_reg()) {
caller_saved.push(r);
}
}
@ -461,7 +463,7 @@ fn get_callee_saves(regs: &Set<Writable<RealReg>>) -> Vec<Writable<RealReg>> {
ret
}
fn is_caller_save(r: RealReg) -> bool {
fn is_reg_clobbered_by_call(r: RealReg) -> bool {
let enc = r.get_hw_encoding();
enc <= 3
}

14
cranelift/codegen/src/isa/arm32/inst/args.rs

@ -2,7 +2,7 @@
use crate::isa::arm32::inst::*;
use regalloc::{RealRegUniverse, Reg};
use regalloc::{PrettyPrint, RealRegUniverse, Reg};
use std::string::String;
@ -265,7 +265,7 @@ impl BranchTarget {
}
}
impl ShowWithRRU for ShiftOpAndAmt {
impl PrettyPrint for ShiftOpAndAmt {
fn show_rru(&self, _mb_rru: Option<&RealRegUniverse>) -> String {
let op = match self.op() {
ShiftOp::LSL => "lsl",
@ -277,19 +277,19 @@ impl ShowWithRRU for ShiftOpAndAmt {
}
}
impl ShowWithRRU for UImm8 {
impl PrettyPrint for UImm8 {
fn show_rru(&self, _mb_rru: Option<&RealRegUniverse>) -> String {
format!("#{}", self.value)
}
}
impl ShowWithRRU for UImm12 {
impl PrettyPrint for UImm12 {
fn show_rru(&self, _mb_rru: Option<&RealRegUniverse>) -> String {
format!("#{}", self.value)
}
}
impl ShowWithRRU for AMode {
impl PrettyPrint for AMode {
fn show_rru(&self, mb_rru: Option<&RealRegUniverse>) -> String {
match self {
&AMode::RegReg(rn, rm, imm2) => {
@ -317,7 +317,7 @@ impl ShowWithRRU for AMode {
}
}
impl ShowWithRRU for Cond {
impl PrettyPrint for Cond {
fn show_rru(&self, _mb_rru: Option<&RealRegUniverse>) -> String {
let mut s = format!("{:?}", self);
s.make_ascii_lowercase();
@ -325,7 +325,7 @@ impl ShowWithRRU for Cond {
}
}
impl ShowWithRRU for BranchTarget {
impl PrettyPrint for BranchTarget {
fn show_rru(&self, _mb_rru: Option<&RealRegUniverse>) -> String {
match self {
&BranchTarget::Label(label) => format!("label{:?}", label.get()),

4
cranelift/codegen/src/isa/arm32/inst/mod.rs

@ -8,7 +8,7 @@ use crate::ir::{ExternalName, Opcode, SourceLoc, TrapCode, Type};
use crate::machinst::*;
use crate::{settings, CodegenError, CodegenResult};
use regalloc::{RealRegUniverse, Reg, RegClass, SpillSlot, VirtualReg, Writable};
use regalloc::{PrettyPrint, RealRegUniverse, Reg, RegClass, SpillSlot, VirtualReg, Writable};
use regalloc::{RegUsageCollector, RegUsageMapper};
use alloc::boxed::Box;
@ -897,7 +897,7 @@ fn mem_finalize_for_show(
(mem_str, mem)
}
impl ShowWithRRU for Inst {
impl PrettyPrint for Inst {
fn show_rru(&self, mb_rru: Option<&RealRegUniverse>) -> String {
self.pretty_print(mb_rru, &mut EmitState::default())
}

8
cranelift/codegen/src/isa/arm32/lower_inst.rs

@ -513,6 +513,7 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
}
Opcode::Call | Opcode::CallIndirect => {
let loc = ctx.srcloc(insn);
let caller_conv = ctx.abi().call_conv();
let (mut abi, inputs) = match op {
Opcode::Call => {
let (extname, dist) = ctx.call_target(insn).unwrap();
@ -521,7 +522,7 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
assert_eq!(inputs.len(), sig.params.len());
assert_eq!(outputs.len(), sig.returns.len());
(
Arm32ABICaller::from_func(sig, &extname, dist, loc)?,
Arm32ABICaller::from_func(sig, &extname, dist, loc, caller_conv)?,
&inputs[..],
)
}
@ -530,7 +531,10 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
let sig = ctx.call_sig(insn).unwrap();
assert_eq!(inputs.len() - 1, sig.params.len());
assert_eq!(outputs.len(), sig.returns.len());
(Arm32ABICaller::from_ptr(sig, ptr, loc, op)?, &inputs[1..])
(
Arm32ABICaller::from_ptr(sig, ptr, loc, op, caller_conv)?,
&inputs[1..],
)
}
_ => unreachable!(),
};

6
cranelift/codegen/src/isa/arm32/mod.rs

@ -3,14 +3,12 @@
use crate::ir::condcodes::IntCC;
use crate::ir::Function;
use crate::isa::Builder as IsaBuilder;
use crate::machinst::{
compile, MachBackend, MachCompileResult, ShowWithRRU, TargetIsaAdapter, VCode,
};
use crate::machinst::{compile, MachBackend, MachCompileResult, TargetIsaAdapter, VCode};
use crate::result::CodegenResult;
use crate::settings;
use alloc::boxed::Box;
use regalloc::RealRegUniverse;
use regalloc::{PrettyPrint, RealRegUniverse};
use target_lexicon::{Architecture, ArmArchitecture, Triple};
// New backend:

6
cranelift/codegen/src/isa/x64/abi.rs

@ -493,6 +493,8 @@ impl ABIMachineSpec for X64ABIMachineSpec {
loc: SourceLoc,
opcode: ir::Opcode,
tmp: Writable<Reg>,
_callee_conv: isa::CallConv,
_caller_conv: isa::CallConv,
) -> SmallVec<[(InstIsSafepoint, Self::I); 2]> {
let mut insts = SmallVec::new();
match dest {
@ -545,7 +547,7 @@ impl ABIMachineSpec for X64ABIMachineSpec {
s.nominal_sp_to_fp
}
fn get_caller_saves(call_conv: isa::CallConv) -> Vec<Writable<Reg>> {
fn get_regs_clobbered_by_call(call_conv_of_callee: isa::CallConv) -> Vec<Writable<Reg>> {
let mut caller_saved = vec![
// Systemv calling convention:
// - GPR: all except RBX, RBP, R12 to R15 (which are callee-saved).
@ -577,7 +579,7 @@ impl ABIMachineSpec for X64ABIMachineSpec {
Writable::from_reg(regs::xmm15()),
];
if call_conv.extends_baldrdash() {
if call_conv_of_callee.extends_baldrdash() {
caller_saved.push(Writable::from_reg(regs::r12()));
caller_saved.push(Writable::from_reg(regs::r13()));
// Not r14; implicitly preserved in the entry.

19
cranelift/codegen/src/isa/x64/inst/args.rs

@ -5,7 +5,10 @@ use super::EmitState;
use crate::ir::condcodes::{FloatCC, IntCC};
use crate::machinst::*;
use core::fmt::Debug;
use regalloc::{RealRegUniverse, Reg, RegClass, RegUsageCollector, RegUsageMapper, Writable};
use regalloc::{
PrettyPrint, PrettyPrintSized, RealRegUniverse, Reg, RegClass, RegUsageCollector,
RegUsageMapper, Writable,
};
use std::fmt;
use std::string::{String, ToString};
@ -68,7 +71,7 @@ impl Amode {
}
}
impl ShowWithRRU for Amode {
impl PrettyPrint for Amode {
fn show_rru(&self, mb_rru: Option<&RealRegUniverse>) -> String {
match self {
Amode::ImmReg { simm32, base } => {
@ -156,7 +159,7 @@ impl Into<SyntheticAmode> for Amode {
}
}
impl ShowWithRRU for SyntheticAmode {
impl PrettyPrint for SyntheticAmode {
fn show_rru(&self, mb_rru: Option<&RealRegUniverse>) -> String {
match self {
SyntheticAmode::Real(addr) => addr.show_rru(mb_rru),
@ -214,11 +217,13 @@ impl RegMemImm {
}
}
impl ShowWithRRU for RegMemImm {
impl PrettyPrint for RegMemImm {
fn show_rru(&self, mb_rru: Option<&RealRegUniverse>) -> String {
self.show_rru_sized(mb_rru, 8)
}
}
impl PrettyPrintSized for RegMemImm {
fn show_rru_sized(&self, mb_rru: Option<&RealRegUniverse>, size: u8) -> String {
match self {
Self::Reg { reg } => show_ireg_sized(*reg, mb_rru, size),
@ -271,11 +276,13 @@ impl From<Writable<Reg>> for RegMem {
}
}
impl ShowWithRRU for RegMem {
impl PrettyPrint for RegMem {
fn show_rru(&self, mb_rru: Option<&RealRegUniverse>) -> String {
self.show_rru_sized(mb_rru, 8)
}
}
impl PrettyPrintSized for RegMem {
fn show_rru_sized(&self, mb_rru: Option<&RealRegUniverse>, size: u8) -> String {
match self {
RegMem::Reg { reg } => show_ireg_sized(*reg, mb_rru, size),
@ -1098,7 +1105,7 @@ pub enum BranchTarget {
ResolvedOffset(isize),
}
impl ShowWithRRU for BranchTarget {
impl PrettyPrint for BranchTarget {
fn show_rru(&self, _mb_rru: Option<&RealRegUniverse>) -> String {
match self {
BranchTarget::Label(l) => format!("{:?}", l),

6
cranelift/codegen/src/isa/x64/inst/mod.rs

@ -8,8 +8,8 @@ use crate::{settings, settings::Flags, CodegenError, CodegenResult};
use alloc::boxed::Box;
use alloc::vec::Vec;
use regalloc::{
RealRegUniverse, Reg, RegClass, RegUsageCollector, RegUsageMapper, SpillSlot, VirtualReg,
Writable,
PrettyPrint, PrettyPrintSized, RealRegUniverse, Reg, RegClass, RegUsageCollector,
RegUsageMapper, SpillSlot, VirtualReg, Writable,
};
use smallvec::SmallVec;
use std::fmt;
@ -1165,7 +1165,7 @@ impl Inst {
//=============================================================================
// Instructions: printing
impl ShowWithRRU for Inst {
impl PrettyPrint for Inst {
fn show_rru(&self, mb_rru: Option<&RealRegUniverse>) -> String {
fn ljustify(s: String) -> String {
let w = 7;

6
cranelift/codegen/src/isa/x64/inst/regs.rs

@ -10,9 +10,11 @@
//! Also, they will have to be ABI dependent. Need to find a way to avoid constructing a universe
//! for each function we compile.
use crate::{machinst::pretty_print::ShowWithRRU, settings};
use crate::settings;
use alloc::vec::Vec;
use regalloc::{RealReg, RealRegUniverse, Reg, RegClass, RegClassInfo, NUM_REG_CLASSES};
use regalloc::{
PrettyPrint, RealReg, RealRegUniverse, Reg, RegClass, RegClassInfo, NUM_REG_CLASSES,
};
use std::string::String;
// Hardware encodings for a few registers.

11
cranelift/codegen/src/isa/x64/lower.rs

@ -384,9 +384,10 @@ fn emit_vm_call<C: LowerCtx<I = Inst>>(
// TODO avoid recreating signatures for every single Libcall function.
let call_conv = CallConv::for_libcall(flags, CallConv::triple_default(triple));
let sig = make_libcall_sig(ctx, insn, call_conv, types::I64);
let caller_conv = ctx.abi().call_conv();
let loc = ctx.srcloc(insn);
let mut abi = X64ABICaller::from_func(&sig, &extname, dist, loc)?;
let mut abi = X64ABICaller::from_func(&sig, &extname, dist, loc, caller_conv)?;
abi.emit_stack_pre_adjust(ctx);
@ -1558,6 +1559,7 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
Opcode::Call | Opcode::CallIndirect => {
let loc = ctx.srcloc(insn);
let caller_conv = ctx.abi().call_conv();
let (mut abi, inputs) = match op {
Opcode::Call => {
let (extname, dist) = ctx.call_target(insn).unwrap();
@ -1565,7 +1567,7 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
assert_eq!(inputs.len(), sig.params.len());
assert_eq!(outputs.len(), sig.returns.len());
(
X64ABICaller::from_func(sig, &extname, dist, loc)?,
X64ABICaller::from_func(sig, &extname, dist, loc, caller_conv)?,
&inputs[..],
)
}
@ -1575,7 +1577,10 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
let sig = ctx.call_sig(insn).unwrap();
assert_eq!(inputs.len() - 1, sig.params.len());
assert_eq!(outputs.len(), sig.returns.len());
(X64ABICaller::from_ptr(sig, ptr, loc, op)?, &inputs[1..])
(
X64ABICaller::from_ptr(sig, ptr, loc, op, caller_conv)?,
&inputs[1..],
)
}
_ => unreachable!(),

6
cranelift/codegen/src/isa/x64/mod.rs

@ -4,13 +4,11 @@ use super::TargetIsa;
use crate::ir::{condcodes::IntCC, Function};
use crate::isa::x64::{inst::regs::create_reg_universe_systemv, settings as x64_settings};
use crate::isa::Builder as IsaBuilder;
use crate::machinst::{
compile, pretty_print::ShowWithRRU, MachBackend, MachCompileResult, TargetIsaAdapter, VCode,
};
use crate::machinst::{compile, MachBackend, MachCompileResult, TargetIsaAdapter, VCode};
use crate::result::CodegenResult;
use crate::settings::{self as shared_settings, Flags};
use alloc::boxed::Box;
use regalloc::RealRegUniverse;
use regalloc::{PrettyPrint, RealRegUniverse};
use target_lexicon::Triple;
mod abi;

4
cranelift/codegen/src/machinst/abi.rs

@ -2,6 +2,7 @@
use crate::binemit::StackMap;
use crate::ir::StackSlot;
use crate::isa::CallConv;
use crate::machinst::*;
use crate::settings;
@ -25,6 +26,9 @@ pub trait ABICallee {
/// Get the settings controlling this function's compilation.
fn flags(&self) -> &settings::Flags;
/// Get the calling convention implemented by this ABI object.
fn call_conv(&self) -> CallConv;
/// Get the liveins of the function.
fn liveins(&self) -> Set<RealReg>;

34
cranelift/codegen/src/machinst/abi_impl.rs

@ -347,6 +347,8 @@ pub trait ABIMachineSpec {
loc: SourceLoc,
opcode: ir::Opcode,
tmp: Writable<Reg>,
callee_conv: isa::CallConv,
callee_conv: isa::CallConv,
) -> SmallVec<[(InstIsSafepoint, Self::I); 2]>;
/// Get the number of spillslots required for the given register-class and
@ -359,8 +361,9 @@ pub trait ABIMachineSpec {
/// Get the "nominal SP to FP" offset from an instruction-emission state.
fn get_nominal_sp_to_fp(s: &<Self::I as MachInstEmit>::State) -> i64;
/// Get all caller-save registers.
fn get_caller_saves(call_conv: isa::CallConv) -> Vec<Writable<Reg>>;
/// Get all caller-save registers, that is, registers that we expect
/// not to be saved across a call to a callee with the given ABI.
fn get_regs_clobbered_by_call(call_conv_of_callee: isa::CallConv) -> Vec<Writable<Reg>>;
}
/// ABI information shared between body (callee) and caller.
@ -682,6 +685,10 @@ impl<M: ABIMachineSpec> ABICallee for ABICalleeImpl<M> {
&self.flags
}
fn call_conv(&self) -> isa::CallConv {
self.sig.call_conv
}
fn liveins(&self) -> Set<RealReg> {
let mut set: Set<RealReg> = Set::empty();
for &arg in &self.sig.args {
@ -1040,7 +1047,7 @@ fn abisig_to_uses_and_defs<M: ABIMachineSpec>(sig: &ABISig) -> (Vec<Reg>, Vec<Wr
}
// Compute defs: all retval regs, and all caller-save (clobbered) regs.
let mut defs = M::get_caller_saves(sig.call_conv);
let mut defs = M::get_regs_clobbered_by_call(sig.call_conv);
for ret in &sig.rets {
match ret {
&ABIArg::Reg(reg, ..) => defs.push(Writable::from_reg(reg.to_reg())),
@ -1063,8 +1070,10 @@ pub struct ABICallerImpl<M: ABIMachineSpec> {
dest: CallDest,
/// Location of callsite.
loc: ir::SourceLoc,
/// Actuall call opcode; used to distinguish various types of calls.
/// Actual call opcode; used to distinguish various types of calls.
opcode: ir::Opcode,
/// Caller's calling convention.
caller_conv: isa::CallConv,
_mach: PhantomData<M>,
}
@ -1085,6 +1094,7 @@ impl<M: ABIMachineSpec> ABICallerImpl<M> {
extname: &ir::ExternalName,
dist: RelocDistance,
loc: ir::SourceLoc,
caller_conv: isa::CallConv,
) -> CodegenResult<ABICallerImpl<M>> {
let sig = ABISig::from_func_sig::<M>(sig)?;
let (uses, defs) = abisig_to_uses_and_defs::<M>(&sig);
@ -1095,6 +1105,7 @@ impl<M: ABIMachineSpec> ABICallerImpl<M> {
dest: CallDest::ExtName(extname.clone(), dist),
loc,
opcode: ir::Opcode::Call,
caller_conv,
_mach: PhantomData,
})
}
@ -1106,6 +1117,7 @@ impl<M: ABIMachineSpec> ABICallerImpl<M> {
ptr: Reg,
loc: ir::SourceLoc,
opcode: ir::Opcode,
caller_conv: isa::CallConv,
) -> CodegenResult<ABICallerImpl<M>> {
let sig = ABISig::from_func_sig::<M>(sig)?;
let (uses, defs) = abisig_to_uses_and_defs::<M>(&sig);
@ -1116,6 +1128,7 @@ impl<M: ABIMachineSpec> ABICallerImpl<M> {
dest: CallDest::Reg(ptr),
loc,
opcode,
caller_conv,
_mach: PhantomData,
})
}
@ -1255,8 +1268,17 @@ impl<M: ABIMachineSpec> ABICaller for ABICallerImpl<M> {
self.emit_copy_reg_to_arg(ctx, i, rd.to_reg());
}
let tmp = ctx.alloc_tmp(word_rc, word_type);
for (is_safepoint, inst) in
M::gen_call(&self.dest, uses, defs, self.loc, self.opcode, tmp).into_iter()
for (is_safepoint, inst) in M::gen_call(
&self.dest,
uses,
defs,
self.loc,
self.opcode,
tmp,
self.sig.call_conv,
self.caller_conv,
)
.into_iter()
{
match is_safepoint {
InstIsSafepoint::Yes => ctx.emit_safepoint(inst),

4
cranelift/codegen/src/machinst/compile.rs

@ -6,7 +6,7 @@ use crate::settings;
use crate::timing;
use log::debug;
use regalloc::{allocate_registers_with_opts, Algorithm, Options};
use regalloc::{allocate_registers_with_opts, Algorithm, Options, PrettyPrint};
/// Compile the given function down to VCode with allocated registers, ready
/// for binary emission.
@ -16,7 +16,7 @@ pub fn compile<B: LowerBackend + MachBackend>(
abi: Box<dyn ABICallee<I = B::MInst>>,
) -> CodegenResult<VCode<B::MInst>>
where
B::MInst: ShowWithRRU,
B::MInst: PrettyPrint,
{
// Compute lowered block order.
let block_order = BlockLoweringOrder::new(f);

7
cranelift/codegen/src/machinst/mod.rs

@ -125,8 +125,6 @@ pub mod abi;
pub use abi::*;
pub mod abi_impl;
pub use abi_impl::*;
pub mod pretty_print;
pub use pretty_print::*;
pub mod buffer;
pub use buffer::*;
pub mod adapter;
@ -156,6 +154,11 @@ pub trait MachInst: Clone + Debug {
/// Returns true if the instruction is an epilogue placeholder.
fn is_epilogue_placeholder(&self) -> bool;
/// Should this instruction be included in the clobber-set?
fn is_included_in_clobbers(&self) -> bool {
true
}
/// Generate a move.
fn gen_move(to_reg: Writable<Reg>, from_reg: Reg, ty: Type) -> Self;

66
cranelift/codegen/src/machinst/pretty_print.rs

@ -1,66 +0,0 @@
//! Pretty-printing for machine code (virtual-registerized or final).
use regalloc::{RealRegUniverse, Reg, Writable};
use std::fmt::Debug;
use std::hash::Hash;
use std::string::{String, ToString};
// FIXME: Should this go into regalloc.rs instead?
/// A trait for printing instruction bits and pieces, with the the ability to
/// take a contextualising RealRegUniverse that is used to give proper names to
/// registers.
pub trait ShowWithRRU {
/// Return a string that shows the implementing object in context of the
/// given `RealRegUniverse`, if provided.
fn show_rru(&self, mb_rru: Option<&RealRegUniverse>) -> String;
/// The same as |show_rru|, but with an optional hint giving a size in
/// bytes. Its interpretation is object-dependent, and it is intended to
/// pass around enough information to facilitate printing sub-parts of
/// real registers correctly. Objects may ignore size hints that are
/// irrelevant to them.
fn show_rru_sized(&self, mb_rru: Option<&RealRegUniverse>, _size: u8) -> String {
// Default implementation is to ignore the hint.
self.show_rru(mb_rru)
}
}
impl ShowWithRRU for Reg {
fn show_rru(&self, mb_rru: Option<&RealRegUniverse>) -> String {
if self.is_real() {
if let Some(rru) = mb_rru {
let reg_ix = self.get_index();
if reg_ix < rru.regs.len() {
return rru.regs[reg_ix].1.to_string();
} else {
// We have a real reg which isn't listed in the universe.
// Per the regalloc.rs interface requirements, this is
// Totally Not Allowed. Print it generically anyway, so
// we have something to debug.
return format!("!!{:?}!!", self);
}
}
}
// The reg is virtual, or we have no universe. Be generic.
format!("%{:?}", self)
}
fn show_rru_sized(&self, _mb_rru: Option<&RealRegUniverse>, _size: u8) -> String {
// For the specific case of Reg, we demand not to have a size hint,
// since interpretation of the size is target specific, but this code
// is used by all targets.
panic!("Reg::show_rru_sized: impossible to implement");
}
}
impl<R: ShowWithRRU + Copy + Ord + Hash + Eq + Debug> ShowWithRRU for Writable<R> {
fn show_rru(&self, mb_rru: Option<&RealRegUniverse>) -> String {
self.to_reg().show_rru(mb_rru)
}
fn show_rru_sized(&self, mb_rru: Option<&RealRegUniverse>, size: u8) -> String {
self.to_reg().show_rru_sized(mb_rru, size)
}
}

10
cranelift/codegen/src/machinst/vcode.rs

@ -25,8 +25,8 @@ use crate::timing;
use regalloc::Function as RegallocFunction;
use regalloc::Set as RegallocSet;
use regalloc::{
BlockIx, InstIx, Range, RegAllocResult, RegClass, RegUsageCollector, RegUsageMapper, SpillSlot,
StackmapRequestInfo,
BlockIx, InstIx, PrettyPrint, Range, RegAllocResult, RegClass, RegUsageCollector,
RegUsageMapper, SpillSlot, StackmapRequestInfo,
};
use alloc::boxed::Box;
@ -543,6 +543,10 @@ impl<I: VCodeInst> RegallocFunction for VCode<I> {
}
}
fn is_included_in_clobbers(&self, insn: &I) -> bool {
insn.is_included_in_clobbers()
}
fn get_regs(insn: &I, collector: &mut RegUsageCollector) {
insn.get_regs(collector)
}
@ -624,7 +628,7 @@ impl<I: VCodeInst> fmt::Debug for VCode<I> {
}
/// Pretty-printing with `RealRegUniverse` context.
impl<I: VCodeInst> ShowWithRRU for VCode<I> {
impl<I: VCodeInst> PrettyPrint for VCode<I> {
fn show_rru(&self, mb_rru: Option<&RealRegUniverse>) -> String {
use std::fmt::Write;

61
cranelift/filetests/filetests/isa/aarch64/call.clif

@ -151,34 +151,27 @@ block0:
; check: stp fp, lr, [sp, #-16]!
; nextln: mov fp, sp
; nextln: sub sp, sp, #48
; nextln: str q8, [sp]
; nextln: str q9, [sp, #16]
; nextln: str q10, [sp, #32]
; nextln: virtual_sp_offset_adjust 48
; nextln: sub sp, sp, #32
; nextln: ldr x0, 8 ; b 12 ; data
; nextln: blr x0
; nextln: mov v8.16b, v0.16b
; nextln: str s0, [sp]
; nextln: ldr x0, 8 ; b 12 ; data
; nextln: blr x0
; nextln: mov v9.16b, v0.16b
; nextln: str d0, [sp, #8]
; nextln: ldr x0, 8 ; b 12 ; data
; nextln: blr x0
; nextln: mov v10.16b, v0.16b
; nextln: str d0, [sp, #16]
; nextln: ldr x0, 8 ; b 12 ; data
; nextln: blr x0
; nextln: mov v0.16b, v8.16b
; nextln: ldr s0, [sp]
; nextln: ldr x0, 8 ; b 12 ; data
; nextln: blr x0
; nextln: mov v0.16b, v9.16b
; nextln: ldr d0, [sp, #8]
; nextln: ldr x0, 8 ; b 12 ; data
; nextln: blr x0
; nextln: mov v0.16b, v10.16b
; nextln: ldr d0, [sp, #16]
; nextln: ldr x0, 8 ; b 12 ; data
; nextln: blr x0
; nextln: ldr q8, [sp]
; nextln: ldr q9, [sp, #16]
; nextln: ldr q10, [sp, #32]
; nextln: mov sp, fp
; nextln: ldp fp, lr, [sp], #16
; nextln: ret
@ -202,33 +195,26 @@ block0:
; check: stp fp, lr, [sp, #-16]!
; nextln: mov fp, sp
; nextln: sub sp, sp, #48
; nextln: str q8, [sp]
; nextln: str q9, [sp, #16]
; nextln: str q10, [sp, #32]
; nextln: virtual_sp_offset_adjust 48
; nextln: ldr x0, 8 ; b 12 ; data
; nextln: blr x0
; nextln: mov v8.16b, v0.16b
; nextln: str q0, [sp]
; nextln: ldr x0, 8 ; b 12 ; data
; nextln: blr x0
; nextln: mov v9.16b, v0.16b
; nextln: str q0, [sp, #16]
; nextln: ldr x0, 8 ; b 12 ; data
; nextln: blr x0
; nextln: mov v10.16b, v0.16b
; nextln: str q0, [sp, #32]
; nextln: ldr x0, 8 ; b 12 ; data
; nextln: blr x0
; nextln: mov v0.16b, v8.16b
; nextln: ldr q0, [sp]
; nextln: ldr x0, 8 ; b 12 ; data
; nextln: blr x0
; nextln: mov v0.16b, v9.16b
; nextln: ldr q0, [sp, #16]
; nextln: ldr x0, 8 ; b 12 ; data
; nextln: blr x0
; nextln: mov v0.16b, v10.16b
; nextln: ldr q0, [sp, #32]
; nextln: ldr x0, 8 ; b 12 ; data
; nextln: blr x0
; nextln: ldr q8, [sp]
; nextln: ldr q9, [sp, #16]
; nextln: ldr q10, [sp, #32]
; nextln: mov sp, fp
; nextln: ldp fp, lr, [sp], #16
; nextln: ret
@ -255,34 +241,27 @@ block0:
; check: stp fp, lr, [sp, #-16]!
; nextln: mov fp, sp
; nextln: sub sp, sp, #48
; nextln: str q8, [sp]
; nextln: str q9, [sp, #16]
; nextln: str q10, [sp, #32]
; nextln: virtual_sp_offset_adjust 48
; nextln: sub sp, sp, #32
; nextln: ldr x0, 8 ; b 12 ; data
; nextln: blr x0
; nextln: mov v8.16b, v0.16b
; nextln: str s0, [sp]
; nextln: ldr x0, 8 ; b 12 ; data
; nextln: blr x0
; nextln: mov v9.16b, v0.16b
; nextln: str d0, [sp, #8]
; nextln: ldr x0, 8 ; b 12 ; data
; nextln: blr x0
; nextln: mov v10.16b, v0.16b
; nextln: str q0, [sp, #16]
; nextln: ldr x0, 8 ; b 12 ; data
; nextln: blr x0
; nextln: mov v0.16b, v8.16b
; nextln: ldr s0, [sp]
; nextln: ldr x0, 8 ; b 12 ; data
; nextln: blr x0
; nextln: mov v0.16b, v9.16b
; nextln: ldr d0, [sp, #8]
; nextln: ldr x0, 8 ; b 12 ; data
; nextln: blr x0
; nextln: mov v0.16b, v10.16b
; nextln: ldr q0, [sp, #16]
; nextln: ldr x0, 8 ; b 12 ; data
; nextln: blr x0
; nextln: ldr q8, [sp]
; nextln: ldr q9, [sp, #16]
; nextln: ldr q10, [sp, #32]
; nextln: mov sp, fp
; nextln: ldp fp, lr, [sp], #16
; nextln: ret

99
cranelift/filetests/filetests/isa/aarch64/prologue.clif

@ -0,0 +1,99 @@
test compile
target aarch64
function %f(f64) -> f64 {
block0(v0: f64):
v1 = fadd.f64 v0, v0
v2 = fadd.f64 v0, v0
v3 = fadd.f64 v0, v0
v4 = fadd.f64 v0, v0
v5 = fadd.f64 v0, v0
v6 = fadd.f64 v0, v0
v7 = fadd.f64 v0, v0
v8 = fadd.f64 v0, v0
v9 = fadd.f64 v0, v0
v10 = fadd.f64 v0, v0
v11 = fadd.f64 v0, v0
v12 = fadd.f64 v0, v0
v13 = fadd.f64 v0, v0
v14 = fadd.f64 v0, v0
v15 = fadd.f64 v0, v0
v16 = fadd.f64 v0, v0
v17 = fadd.f64 v0, v0
v18 = fadd.f64 v0, v0
v19 = fadd.f64 v0, v0
v20 = fadd.f64 v0, v0
v21 = fadd.f64 v0, v0
v22 = fadd.f64 v0, v0
v23 = fadd.f64 v0, v0
v24 = fadd.f64 v0, v0
v25 = fadd.f64 v0, v0
v26 = fadd.f64 v0, v0
v27 = fadd.f64 v0, v0
v28 = fadd.f64 v0, v0
v29 = fadd.f64 v0, v0
v30 = fadd.f64 v0, v0
v31 = fadd.f64 v0, v0
v32 = fadd.f64 v0, v1
v33 = fadd.f64 v2, v3
v34 = fadd.f64 v4, v5
v35 = fadd.f64 v6, v7
v36 = fadd.f64 v8, v9
v37 = fadd.f64 v10, v11
v38 = fadd.f64 v12, v13
v39 = fadd.f64 v14, v15
v40 = fadd.f64 v16, v17
v41 = fadd.f64 v18, v19
v42 = fadd.f64 v20, v21
v43 = fadd.f64 v22, v23
v44 = fadd.f64 v24, v25
v45 = fadd.f64 v26, v27
v46 = fadd.f64 v28, v29
v47 = fadd.f64 v30, v31
v48 = fadd.f64 v32, v33
v49 = fadd.f64 v34, v35
v50 = fadd.f64 v36, v37
v51 = fadd.f64 v38, v39
v52 = fadd.f64 v40, v41
v53 = fadd.f64 v42, v43
v54 = fadd.f64 v44, v45
v55 = fadd.f64 v46, v47
v56 = fadd.f64 v48, v49
v57 = fadd.f64 v50, v51
v58 = fadd.f64 v52, v53
v59 = fadd.f64 v54, v55
v60 = fadd.f64 v56, v57
v61 = fadd.f64 v58, v59
v62 = fadd.f64 v60, v61
return v62
}
; check: stp fp, lr, [sp, #-16]!
; nextln: mov fp, sp
; nextln: sub sp, sp, #128
; nextln: str q8, [sp]
; nextln: str q9, [sp, #16]
; nextln: str q10, [sp, #32]
; nextln: str q11, [sp, #48]
; nextln: str q12, [sp, #64]
; nextln: str q13, [sp, #80]
; nextln: str q14, [sp, #96]
; nextln: str q15, [sp, #112]
; check: ldr q8, [sp]
; nextln: ldr q9, [sp, #16]
; nextln: ldr q10, [sp, #32]
; nextln: ldr q11, [sp, #48]
; nextln: ldr q12, [sp, #64]
; nextln: ldr q13, [sp, #80]
; nextln: ldr q14, [sp, #96]
; nextln: ldr q15, [sp, #112]
; nextln: mov sp, fp
; nextln: ldp fp, lr, [sp], #16
; nextln: ret
Loading…
Cancel
Save