Browse Source

Avoid unnecessary work when passing arguments on

pull/397/head
Jef 6 years ago
parent
commit
b86d11e4ad
  1. 152
      src/backend.rs

152
src/backend.rs

@ -144,7 +144,7 @@ const ARGS_IN_GPRS: &[GPR] = &[RDI, RSI, RDX, RCX, R8, R9];
const SCRATCH_REGS: &[GPR] = &[RAX, R10, R11];
#[must_use]
pub struct Function {
pub struct FunctionEnd {
should_generate_epilogue: bool,
}
@ -299,6 +299,79 @@ impl StackValue {
}
}
#[derive(Copy, Clone, PartialEq, Eq, Debug)]
enum ArgLoc {
Register(GPR),
Stack(i32),
Both(GPR, i32),
}
impl ArgLoc {
fn from_loc(loc: ValueLocation) -> Self {
match loc {
ValueLocation::Stack(offset) => ArgLoc::Stack(offset),
ValueLocation::Reg(gpr) => ArgLoc::Register(gpr),
_ => panic!("Unsupported local location"),
}
}
fn union(&mut self, other: ArgLoc) {
for l in other.locs() {
self.add_loc(l);
}
}
fn locs(&self) -> impl Iterator<Item = ValueLocation> {
self.reg()
.map(ValueLocation::Reg)
.into_iter()
.chain(self.stack().map(ValueLocation::Stack))
}
fn reg(&self) -> Option<GPR> {
match *self {
ArgLoc::Register(gpr) | ArgLoc::Both(gpr, _) => Some(gpr),
ArgLoc::Stack(_) => None,
}
}
fn stack(&self) -> Option<i32> {
match *self {
ArgLoc::Stack(o) | ArgLoc::Both(_, o) => Some(o),
ArgLoc::Register(_) => None,
}
}
fn add_loc(&mut self, loc: ValueLocation) {
match loc {
ValueLocation::Stack(offset) => self.add_stack(offset),
ValueLocation::Reg(gpr) => self.add_reg(gpr),
_ => panic!("Unsupported local location"),
}
}
fn add_stack(&mut self, offset: i32) {
*self = match *self {
ArgLoc::Register(gpr) | ArgLoc::Both(gpr, _) => ArgLoc::Both(gpr, offset),
ArgLoc::Stack(_) => ArgLoc::Stack(offset),
};
}
fn add_reg(&mut self, gpr: GPR) {
*self = match *self {
ArgLoc::Stack(offset) | ArgLoc::Both(_, offset) => ArgLoc::Both(gpr, offset),
ArgLoc::Register(_) => ArgLoc::Register(gpr),
};
}
fn best_loc(&self) -> ValueLocation {
match *self {
ArgLoc::Register(gpr) | ArgLoc::Both(gpr, _) => ValueLocation::Reg(gpr),
ArgLoc::Stack(offset) => ValueLocation::Stack(offset),
}
}
}
/// A store for the local values for our function, including arguments.
#[derive(Debug, Default, Clone)]
pub struct Locals {
@ -309,7 +382,7 @@ pub struct Locals {
/// registers that this can contain. If we need to move the argument
/// out of a register (for example, because we're calling a function)
/// we note that down here, so we don't have to move it back afterwards.
register_arguments: ArrayVec<[ValueLocation; ARGS_IN_GPRS.len()]>,
register_arguments: ArrayVec<[ArgLoc; ARGS_IN_GPRS.len()]>,
/// The number of arguments stored on the stack.
num_stack_args: u32,
/// The number of local stack slots, i.e. the amount of stack space reserved for locals.
@ -325,14 +398,18 @@ impl Locals {
}
}
fn add_pos(&mut self, index: u32, loc: ValueLocation) {
self.register_arguments[index as usize].add_loc(loc);
}
fn set_pos(&mut self, index: u32, loc: ValueLocation) {
self.register_arguments[index as usize] = loc;
self.register_arguments[index as usize] = ArgLoc::from_loc(loc);
}
fn get(&self, index: u32) -> ValueLocation {
self.register_arguments
.get(index as usize)
.cloned()
.map(ArgLoc::best_loc)
.unwrap_or_else(|| {
let stack_index = index - self.register_arguments.len() as u32;
if stack_index < self.num_stack_args {
@ -890,6 +967,14 @@ impl Context<'_> {
Label(self.asm.new_dynamic_label())
}
pub fn define_host_fn(&mut self, host_fn: *const u8) {
dynasm!(self.asm
; mov rax, QWORD host_fn as i64
; call rax
; ret
);
}
fn adjusted_offset(&self, offset: i32) -> i32 {
(self.block_state.depth.0 * WORD_SIZE) as i32 + offset
}
@ -1184,7 +1269,7 @@ impl Context<'_> {
.iter()
.zip(&locals.register_arguments)
{
self.copy_value(*src, *dst);
self.copy_value(src.best_loc(), dst.best_loc());
}
for (src, dst) in self
@ -1194,7 +1279,7 @@ impl Context<'_> {
.iter_mut()
.zip(&locals.register_arguments)
{
*src = *dst;
src.union(*dst);
}
}
@ -1615,7 +1700,7 @@ impl Context<'_> {
.register_arguments
.get_mut(local_idx as usize)
{
*cur = dst_loc;
*cur = ArgLoc::from_loc(dst_loc);
}
self.copy_value(val_loc, dst_loc);
@ -1636,7 +1721,7 @@ impl Context<'_> {
.register_arguments
.get_mut(local_idx as usize)
{
*cur = dst_loc;
*cur = ArgLoc::from_loc(dst_loc);
}
self.copy_value(val_loc, dst_loc);
@ -1715,23 +1800,25 @@ impl Context<'_> {
/// Unfortunately, we can't elide this store if we're just passing arguments on
/// because these registers are caller-saved and so the callee can use them as
/// scratch space.
fn free_arg_registers(&mut self, count: u32) {
if count == 0 {
return;
}
fn free_arg_registers(&mut self, arity: u32) {
// This is bound to the maximum size of the `ArrayVec` amd so can be considered to have constant
// runtime
for i in 0..self.block_state.locals.register_arguments.len() {
for i in 0..self
.block_state
.locals
.register_arguments
.len()
.min(arity as usize)
{
match self.block_state.locals.register_arguments[i] {
ValueLocation::Reg(reg) => {
ArgLoc::Register(reg) => {
if ARGS_IN_GPRS.contains(&reg) {
let dst = ValueLocation::Stack(
let offset =
((self.block_state.locals.num_local_stack_slots - 1 - i as u32)
* WORD_SIZE) as _,
);
* WORD_SIZE) as _;
let dst = ValueLocation::Stack(offset);
self.copy_value(ValueLocation::Reg(reg), dst);
self.block_state.locals.register_arguments[i] = dst;
self.block_state.locals.register_arguments[i].add_stack(offset);
}
}
_ => {}
@ -1821,10 +1908,15 @@ impl Context<'_> {
/// Write the arguments to the callee to the registers and the stack using the SystemV
/// calling convention.
fn pass_outgoing_args(&mut self, arity: u32, return_arity: u32) -> CallCleanup {
fn pass_outgoing_args(
&mut self,
arity: u32,
return_arity: u32,
has_vmctx: bool,
) -> CallCleanup {
let num_stack_args = (arity as usize).saturating_sub(ARGS_IN_GPRS.len()) as i32;
self.free_arg_registers(arity);
self.free_arg_registers(if has_vmctx { arity - 1} else { arity } );
// We pop stack arguments first - arguments are RTL
if num_stack_args > 0 {
@ -1879,6 +1971,14 @@ impl Context<'_> {
);
}
// If these values were in register they've now been invalidated, since
// the callee can use them as scratch.
for loc in self.block_state.locals.register_arguments.iter_mut() {
if let Some(offset) = loc.stack() {
*loc = ArgLoc::Stack(offset);
}
}
for reg in cleanup.restore_registers.drain(..).rev() {
dynasm!(self.asm
; pop Rq(reg)
@ -1904,7 +2004,7 @@ impl Context<'_> {
let vmctx = Value::Local(self.block_state.locals.vmctx_index());
self.push(vmctx);
let cleanup = self.pass_outgoing_args(arg_arity + 1, return_arity);
let cleanup = self.pass_outgoing_args(arg_arity + 1, return_arity, true);
let label = &self.func_starts[index as usize].1;
dynasm!(self.asm
@ -1919,7 +2019,7 @@ impl Context<'_> {
// as scratch registers
// TODO: Allow use of unused argument registers as scratch registers.
/// Writes the function prologue and stores the arguments as locals
pub fn start_function(&mut self, arguments: u32, locals: u32) -> Function {
pub fn start_function(&mut self, arguments: u32, locals: u32) -> FunctionEnd {
let reg_args = &ARGS_IN_GPRS[..(arguments as usize).min(ARGS_IN_GPRS.len())];
// We need space to store the register arguments if we need to call a function
@ -1931,7 +2031,7 @@ impl Context<'_> {
let frame_size: i32 = aligned_stack_slots as i32 * WORD_SIZE as i32;
self.block_state.locals.register_arguments =
reg_args.iter().cloned().map(ValueLocation::Reg).collect();
reg_args.iter().cloned().map(ArgLoc::Register).collect();
self.block_state.locals.num_stack_args = arguments.saturating_sub(ARGS_IN_GPRS.len() as _);
self.block_state.locals.num_local_stack_slots = stack_slots;
self.block_state.return_register = Some(RAX);
@ -1946,14 +2046,14 @@ impl Context<'_> {
);
}
Function {
FunctionEnd {
should_generate_epilogue,
}
}
/// Writes the function epilogue, restoring the stack pointer and returning to the
/// caller.
pub fn epilogue(&mut self, func: Function) {
pub fn epilogue(&mut self, func: FunctionEnd) {
// We don't need to clean up the stack - RSP is restored and
// the calling function has its own register stack and will
// stomp on the registers from our stack if necessary.

Loading…
Cancel
Save