Browse Source
This PR updates the AArch64 ABI implementation so that it (i) properly respects that v8-v15 inclusive have callee-save lower halves, and caller-save upper halves, by conservatively approximating (to full registers) in the appropriate directions when generating prologue caller-saves and when informing the regalloc of clobbered regs across callsites. In order to prevent saving all of these vector registers in the prologue of every non-leaf function due to the above approximation, this also makes use of a new regalloc.rs feature to exclude call instructions' writes from the clobber set returned by register allocation. This is safe whenever the caller and callee have the same ABI (because anything the callee could clobber, the caller is allowed to clobber as well without saving it in the prologue). Fixes #2254.pull/2267/head
Chris Fallin
4 years ago
29 changed files with 325 additions and 206 deletions
@ -1,66 +0,0 @@ |
|||
//! Pretty-printing for machine code (virtual-registerized or final).
|
|||
|
|||
use regalloc::{RealRegUniverse, Reg, Writable}; |
|||
|
|||
use std::fmt::Debug; |
|||
use std::hash::Hash; |
|||
use std::string::{String, ToString}; |
|||
|
|||
// FIXME: Should this go into regalloc.rs instead?
|
|||
|
|||
/// A trait for printing instruction bits and pieces, with the the ability to
|
|||
/// take a contextualising RealRegUniverse that is used to give proper names to
|
|||
/// registers.
|
|||
pub trait ShowWithRRU { |
|||
/// Return a string that shows the implementing object in context of the
|
|||
/// given `RealRegUniverse`, if provided.
|
|||
fn show_rru(&self, mb_rru: Option<&RealRegUniverse>) -> String; |
|||
|
|||
/// The same as |show_rru|, but with an optional hint giving a size in
|
|||
/// bytes. Its interpretation is object-dependent, and it is intended to
|
|||
/// pass around enough information to facilitate printing sub-parts of
|
|||
/// real registers correctly. Objects may ignore size hints that are
|
|||
/// irrelevant to them.
|
|||
fn show_rru_sized(&self, mb_rru: Option<&RealRegUniverse>, _size: u8) -> String { |
|||
// Default implementation is to ignore the hint.
|
|||
self.show_rru(mb_rru) |
|||
} |
|||
} |
|||
|
|||
impl ShowWithRRU for Reg { |
|||
fn show_rru(&self, mb_rru: Option<&RealRegUniverse>) -> String { |
|||
if self.is_real() { |
|||
if let Some(rru) = mb_rru { |
|||
let reg_ix = self.get_index(); |
|||
if reg_ix < rru.regs.len() { |
|||
return rru.regs[reg_ix].1.to_string(); |
|||
} else { |
|||
// We have a real reg which isn't listed in the universe.
|
|||
// Per the regalloc.rs interface requirements, this is
|
|||
// Totally Not Allowed. Print it generically anyway, so
|
|||
// we have something to debug.
|
|||
return format!("!!{:?}!!", self); |
|||
} |
|||
} |
|||
} |
|||
// The reg is virtual, or we have no universe. Be generic.
|
|||
format!("%{:?}", self) |
|||
} |
|||
|
|||
fn show_rru_sized(&self, _mb_rru: Option<&RealRegUniverse>, _size: u8) -> String { |
|||
// For the specific case of Reg, we demand not to have a size hint,
|
|||
// since interpretation of the size is target specific, but this code
|
|||
// is used by all targets.
|
|||
panic!("Reg::show_rru_sized: impossible to implement"); |
|||
} |
|||
} |
|||
|
|||
impl<R: ShowWithRRU + Copy + Ord + Hash + Eq + Debug> ShowWithRRU for Writable<R> { |
|||
fn show_rru(&self, mb_rru: Option<&RealRegUniverse>) -> String { |
|||
self.to_reg().show_rru(mb_rru) |
|||
} |
|||
|
|||
fn show_rru_sized(&self, mb_rru: Option<&RealRegUniverse>, size: u8) -> String { |
|||
self.to_reg().show_rru_sized(mb_rru, size) |
|||
} |
|||
} |
@ -0,0 +1,99 @@ |
|||
test compile |
|||
target aarch64 |
|||
|
|||
function %f(f64) -> f64 { |
|||
block0(v0: f64): |
|||
v1 = fadd.f64 v0, v0 |
|||
v2 = fadd.f64 v0, v0 |
|||
v3 = fadd.f64 v0, v0 |
|||
v4 = fadd.f64 v0, v0 |
|||
v5 = fadd.f64 v0, v0 |
|||
v6 = fadd.f64 v0, v0 |
|||
v7 = fadd.f64 v0, v0 |
|||
v8 = fadd.f64 v0, v0 |
|||
v9 = fadd.f64 v0, v0 |
|||
v10 = fadd.f64 v0, v0 |
|||
v11 = fadd.f64 v0, v0 |
|||
v12 = fadd.f64 v0, v0 |
|||
v13 = fadd.f64 v0, v0 |
|||
v14 = fadd.f64 v0, v0 |
|||
v15 = fadd.f64 v0, v0 |
|||
v16 = fadd.f64 v0, v0 |
|||
v17 = fadd.f64 v0, v0 |
|||
v18 = fadd.f64 v0, v0 |
|||
v19 = fadd.f64 v0, v0 |
|||
v20 = fadd.f64 v0, v0 |
|||
v21 = fadd.f64 v0, v0 |
|||
v22 = fadd.f64 v0, v0 |
|||
v23 = fadd.f64 v0, v0 |
|||
v24 = fadd.f64 v0, v0 |
|||
v25 = fadd.f64 v0, v0 |
|||
v26 = fadd.f64 v0, v0 |
|||
v27 = fadd.f64 v0, v0 |
|||
v28 = fadd.f64 v0, v0 |
|||
v29 = fadd.f64 v0, v0 |
|||
v30 = fadd.f64 v0, v0 |
|||
v31 = fadd.f64 v0, v0 |
|||
|
|||
v32 = fadd.f64 v0, v1 |
|||
v33 = fadd.f64 v2, v3 |
|||
v34 = fadd.f64 v4, v5 |
|||
v35 = fadd.f64 v6, v7 |
|||
v36 = fadd.f64 v8, v9 |
|||
v37 = fadd.f64 v10, v11 |
|||
v38 = fadd.f64 v12, v13 |
|||
v39 = fadd.f64 v14, v15 |
|||
v40 = fadd.f64 v16, v17 |
|||
v41 = fadd.f64 v18, v19 |
|||
v42 = fadd.f64 v20, v21 |
|||
v43 = fadd.f64 v22, v23 |
|||
v44 = fadd.f64 v24, v25 |
|||
v45 = fadd.f64 v26, v27 |
|||
v46 = fadd.f64 v28, v29 |
|||
v47 = fadd.f64 v30, v31 |
|||
|
|||
v48 = fadd.f64 v32, v33 |
|||
v49 = fadd.f64 v34, v35 |
|||
v50 = fadd.f64 v36, v37 |
|||
v51 = fadd.f64 v38, v39 |
|||
v52 = fadd.f64 v40, v41 |
|||
v53 = fadd.f64 v42, v43 |
|||
v54 = fadd.f64 v44, v45 |
|||
v55 = fadd.f64 v46, v47 |
|||
|
|||
v56 = fadd.f64 v48, v49 |
|||
v57 = fadd.f64 v50, v51 |
|||
v58 = fadd.f64 v52, v53 |
|||
v59 = fadd.f64 v54, v55 |
|||
|
|||
v60 = fadd.f64 v56, v57 |
|||
v61 = fadd.f64 v58, v59 |
|||
|
|||
v62 = fadd.f64 v60, v61 |
|||
|
|||
return v62 |
|||
} |
|||
|
|||
; check: stp fp, lr, [sp, #-16]! |
|||
; nextln: mov fp, sp |
|||
; nextln: sub sp, sp, #128 |
|||
; nextln: str q8, [sp] |
|||
; nextln: str q9, [sp, #16] |
|||
; nextln: str q10, [sp, #32] |
|||
; nextln: str q11, [sp, #48] |
|||
; nextln: str q12, [sp, #64] |
|||
; nextln: str q13, [sp, #80] |
|||
; nextln: str q14, [sp, #96] |
|||
; nextln: str q15, [sp, #112] |
|||
|
|||
; check: ldr q8, [sp] |
|||
; nextln: ldr q9, [sp, #16] |
|||
; nextln: ldr q10, [sp, #32] |
|||
; nextln: ldr q11, [sp, #48] |
|||
; nextln: ldr q12, [sp, #64] |
|||
; nextln: ldr q13, [sp, #80] |
|||
; nextln: ldr q14, [sp, #96] |
|||
; nextln: ldr q15, [sp, #112] |
|||
; nextln: mov sp, fp |
|||
; nextln: ldp fp, lr, [sp], #16 |
|||
; nextln: ret |
Loading…
Reference in new issue