Browse Source

cranelift: Support callee-saved registers with tail calls on x64 (#8246)

* Add GrowFrame and ShrinkFrame instructions for moving the frame

Co-authored-by: Jamey Sharp <jsharp@fastly.com>

* Experimentally emit grow/shrink frame instructions for x64 tail calls

Co-authored-by: Jamey Sharp <jsharp@fastly.com>

* Reuse the epilogue generation functions for tail call emission

Instead of building and copying the new frame over the old one, make use
of the frame shrink/grow pseudo-instructions to move the frame, and then
reuse the existing epilogue generation functions to setup the tail call.

Co-authored-by: Jamey Sharp <jsharp@fastly.com>

* Enable callee saves with the tail calling convention on x64

Co-authored-by: Jamey Sharp <jsharp@fastly.com>

* Remove the requirement that indirect calls go through r15 with the tail cc

* Stop using r14 for a temporary during the stack check with the tail cc

* Apply suggestions from code review

Co-authored-by: Jamey Sharp <jamey@minilop.net>

* Remove constants in favor of reusing values computed for FrameLayout

Co-authored-by: Jamey Sharp <jsharp@fastly.com>

* Suggestions from review

* Rename the grow/shrink frame instructions, and adjust their comments

* Comments on ArgLoc

* Add more tests for return_call, and fix grow/shrink arg area printing

---------

Co-authored-by: Jamey Sharp <jsharp@fastly.com>
Co-authored-by: Jamey Sharp <jamey@minilop.net>
pull/8251/head
Trevor Elliott 7 months ago
committed by GitHub
parent
commit
a4613829ae
No known key found for this signature in database GPG Key ID: B5690EEEBB952194
  1. 3
      cranelift/codegen/src/isa/aarch64/lower/isle.rs
  2. 5
      cranelift/codegen/src/isa/riscv64/lower/isle.rs
  3. 127
      cranelift/codegen/src/isa/x64/abi.rs
  4. 17
      cranelift/codegen/src/isa/x64/inst.isle
  5. 285
      cranelift/codegen/src/isa/x64/inst/emit.rs
  6. 9
      cranelift/codegen/src/isa/x64/inst/emit_state.rs
  7. 105
      cranelift/codegen/src/isa/x64/inst/mod.rs
  8. 1
      cranelift/codegen/src/isa/x64/lower/isle.rs
  9. 2
      cranelift/codegen/src/isa/x64/pcc.rs
  10. 69
      cranelift/codegen/src/machinst/abi.rs
  11. 1
      cranelift/codegen/src/machinst/isle.rs
  12. 52
      cranelift/filetests/filetests/isa/x64/fuzzbug-60035.clif
  13. 64
      cranelift/filetests/filetests/isa/x64/return-call-indirect.clif
  14. 680
      cranelift/filetests/filetests/isa/x64/return-call.clif
  15. 1048
      cranelift/filetests/filetests/isa/x64/tail-call-conv.clif
  16. 31
      cranelift/filetests/filetests/isa/x64/tail-stack-limit.clif

3
cranelift/codegen/src/isa/aarch64/lower/isle.rs

@ -100,6 +100,9 @@ impl Context for IsleContext<'_, '_, MInst, AArch64Backend> {
self.lower_ctx.sigs(),
callee_sig,
&callee,
// TODO: this should be Opcode::ReturnCall, once aarch64 has been ported to the new
// tail call strategy.
Opcode::Call,
distance,
caller_conv,
self.backend.flags().clone(),

5
cranelift/codegen/src/isa/riscv64/lower/isle.rs

@ -18,7 +18,7 @@ use crate::machinst::{VCodeConstant, VCodeConstantData};
use crate::{
ir::{
immediates::*, types::*, AtomicRmwOp, BlockCall, ExternalName, Inst, InstructionData,
MemFlags, StackSlot, TrapCode, Value, ValueList,
MemFlags, Opcode, StackSlot, TrapCode, Value, ValueList,
},
isa::riscv64::inst::*,
machinst::{ArgPair, InstOutput},
@ -82,6 +82,9 @@ impl generated_code::Context for RV64IsleContext<'_, '_, MInst, Riscv64Backend>
self.lower_ctx.sigs(),
callee_sig,
&callee,
// TODO: this should be Opcode::ReturnCall, once riscv64 has been ported to the new
// tail call strategy.
Opcode::Call,
distance,
caller_conv,
self.backend.flags().clone(),

127
cranelift/codegen/src/isa/x64/abi.rs

@ -53,29 +53,20 @@ impl X64ABIMachineSpec {
fn gen_probestack_loop(
insts: &mut SmallInstVec<Inst>,
call_conv: isa::CallConv,
_call_conv: isa::CallConv,
frame_size: u32,
guard_size: u32,
) {
// We have to use a caller-saved register since clobbering only
// happens after stack probing.
let tmp = match call_conv {
// All registers are caller-saved on the `tail` calling convention,
// and `r15` is not used to pass arguments.
isa::CallConv::Tail => regs::r15(),
// `r11` is caller saved on both Fastcall and SystemV, and not used
// for argument passing, so it's pretty much free. It is also not
// used by the stacklimit mechanism.
_ => {
let tmp = regs::r11();
debug_assert!({
let real_reg = tmp.to_real_reg().unwrap();
!is_callee_save_systemv(real_reg, false)
&& !is_callee_save_fastcall(real_reg, false)
!is_callee_save_systemv(real_reg, false) && !is_callee_save_fastcall(real_reg, false)
});
tmp
}
};
insts.push(Inst::StackProbeLoop {
tmp: Writable::from_reg(tmp),
@ -439,20 +430,15 @@ impl ABIMachineSpec for X64ABIMachineSpec {
Inst::lea(mem, into_reg)
}
fn get_stacklimit_reg(call_conv: isa::CallConv) -> Reg {
fn get_stacklimit_reg(_call_conv: isa::CallConv) -> Reg {
// As per comment on trait definition, we must return a caller-save
// register that is not used as an argument here.
match call_conv {
isa::CallConv::Tail => regs::r14(),
_ => {
debug_assert!(!is_callee_save_systemv(
regs::r10().to_real_reg().unwrap(),
false
));
regs::r10()
}
}
}
fn gen_load_base_offset(into_reg: Writable<Reg>, base: Reg, offset: i32, ty: Type) -> Self::I {
// Only ever used for I64s and vectors; if that changes, see if the
@ -595,14 +581,10 @@ impl ABIMachineSpec for X64ABIMachineSpec {
}
fn gen_clobber_save(
call_conv: isa::CallConv,
_call_conv: isa::CallConv,
flags: &settings::Flags,
frame_layout: &FrameLayout,
) -> SmallVec<[Self::I; 16]> {
if call_conv == isa::CallConv::Tail {
assert!(frame_layout.clobbered_callee_saves.is_empty());
}
let mut insts = SmallVec::new();
if flags.unwind_info() && frame_layout.setup_area_size > 0 {
@ -857,7 +839,6 @@ impl ABIMachineSpec for X64ABIMachineSpec {
fn get_regs_clobbered_by_call(call_conv_of_callee: isa::CallConv) -> PRegSet {
match call_conv_of_callee {
isa::CallConv::Tail => ALL_CLOBBERS,
isa::CallConv::Winch => ALL_CLOBBERS,
_ if call_conv_of_callee.extends_windows_fastcall() => WINDOWS_CLOBBERS,
_ => SYSV_CLOBBERS,
@ -882,13 +863,10 @@ impl ABIMachineSpec for X64ABIMachineSpec {
outgoing_args_size: u32,
) -> FrameLayout {
let mut regs: Vec<Writable<RealReg>> = match call_conv {
// The `tail` calling convention doesn't have any callee-save
// registers.
CallConv::Tail => vec![],
// The `winch` calling convention doesn't have any callee-save
// registers.
CallConv::Winch => vec![],
CallConv::Fast | CallConv::Cold | CallConv::SystemV => regs
CallConv::Fast | CallConv::Cold | CallConv::SystemV | CallConv::Tail => regs
.iter()
.cloned()
.filter(|r| is_callee_save_systemv(r.to_reg(), flags.enable_pinned_reg()))
@ -926,46 +904,36 @@ impl ABIMachineSpec for X64ABIMachineSpec {
impl X64CallSite {
pub fn emit_return_call(mut self, ctx: &mut Lower<Inst>, args: isle::ValueSlice) {
let (new_stack_arg_size, old_stack_arg_size) =
self.emit_temporary_tail_call_frame(ctx, args);
// Make a copy of the frame pointer, since we use it when copying down
// the new stack frame.
let fp = ctx.temp_writable_gpr();
let rbp = PReg::from(regs::rbp().to_real_reg().unwrap());
ctx.emit(Inst::MovFromPReg { src: rbp, dst: fp });
// Load the return address, because copying our new stack frame
// over our current stack frame might overwrite it, and we'll need to
// place it in the correct location after we do that copy.
//
// But we only need to actually move the return address if the size of
// stack arguments changes.
let ret_addr = if new_stack_arg_size != old_stack_arg_size {
let ret_addr = ctx.temp_writable_gpr();
ctx.emit(Inst::Mov64MR {
src: SyntheticAmode::Real(Amode::ImmReg {
simm32: 8,
base: *fp.to_reg(),
flags: MemFlags::trusted(),
}),
dst: ret_addr,
let new_stack_arg_size =
u32::try_from(self.sig(ctx.sigs()).sized_stack_arg_space()).unwrap();
let old_stack_arg_size = ctx.abi().stack_args_size(ctx.sigs());
match new_stack_arg_size.cmp(&old_stack_arg_size) {
core::cmp::Ordering::Equal => {}
core::cmp::Ordering::Less => {
let tmp = ctx.temp_writable_gpr();
ctx.emit(Inst::ShrinkArgumentArea {
amount: old_stack_arg_size - new_stack_arg_size,
tmp,
});
Some(ret_addr.to_reg())
} else {
None
};
}
core::cmp::Ordering::Greater => {
let tmp = ctx.temp_writable_gpr();
ctx.emit(Inst::GrowArgumentArea {
amount: new_stack_arg_size - old_stack_arg_size,
tmp,
});
}
}
// Finally, emit the macro instruction to copy the new stack frame over
// our current one and do the actual tail call!
// Put all arguments in registers and stack slots (within that newly
// allocated stack space).
self.emit_args(ctx, args);
self.emit_stack_ret_arg_for_tail_call(ctx);
// Finally, do the actual tail call!
let dest = self.dest().clone();
let info = Box::new(ReturnCallInfo {
new_stack_arg_size,
old_stack_arg_size,
ret_addr,
fp: fp.to_reg(),
tmp: ctx.temp_writable_gpr(),
uses: self.take_uses(),
});
match dest {
@ -1029,25 +997,6 @@ impl From<StackAMode> for SyntheticAmode {
fn get_intreg_for_arg(call_conv: &CallConv, idx: usize, arg_idx: usize) -> Option<Reg> {
let is_fastcall = call_conv.extends_windows_fastcall();
if *call_conv == isa::CallConv::Tail {
return match idx {
0 => Some(regs::rax()),
1 => Some(regs::rcx()),
2 => Some(regs::rdx()),
3 => Some(regs::rbx()),
4 => Some(regs::rsi()),
5 => Some(regs::rdi()),
6 => Some(regs::r8()),
7 => Some(regs::r9()),
8 => Some(regs::r10()),
9 => Some(regs::r11()),
// NB: `r12`, `r13`, `r14` and `r15` are reserved for indirect
// callee addresses and temporaries required for our tail call
// sequence (fp, ret_addr, tmp).
_ => None,
};
}
// Fastcall counts by absolute argument number; SysV counts by argument of
// this (integer) class.
let i = if is_fastcall { arg_idx } else { idx };
@ -1100,16 +1049,12 @@ fn get_intreg_for_retval(
0 => Some(regs::rax()),
1 => Some(regs::rcx()),
2 => Some(regs::rdx()),
3 => Some(regs::rbx()),
4 => Some(regs::rsi()),
5 => Some(regs::rdi()),
6 => Some(regs::r8()),
7 => Some(regs::r9()),
8 => Some(regs::r10()),
9 => Some(regs::r11()),
10 => Some(regs::r12()),
11 => Some(regs::r13()),
12 => Some(regs::r14()),
3 => Some(regs::rsi()),
4 => Some(regs::rdi()),
5 => Some(regs::r8()),
6 => Some(regs::r9()),
7 => Some(regs::r10()),
8 => Some(regs::r11()),
// NB: `r15` is reserved as a scratch register.
_ => None,
},

17
cranelift/codegen/src/isa/x64/inst.isle

@ -541,6 +541,23 @@
(ReturnCallUnknown (callee RegMem)
(info BoxReturnCallInfo))
;; GrowArgumentArea does a memmove of everything in the frame except for
;; the argument area, to make room for more arguments. That includes all
;; the stack slots, the callee-saved registers, and the saved FP and
;; return address. To keep the stack pointers in sync with that change,
;; it also subtracts the given amount from both the FP and SP registers.
(GrowArgumentArea (amount u32)
(tmp WritableGpr))
;; ShrinkArgumentArea does a memmove of everything in the frame except
;; for the argument area, to trim space for fewer arguments. That
;; includes all the stack slots, the callee-saved registers, and the
;; saved FP and return address. To keep the stack pointers in sync with
;; that change, it also adds the given amount to both the FP and SP
;; registers.
(ShrinkArgumentArea (amount u32)
(tmp WritableGpr))
;; A pseudo-instruction that captures register arguments in vregs.
(Args
(args VecArgPair))

285
cranelift/codegen/src/isa/x64/inst/emit.rs

@ -1,6 +1,6 @@
use crate::ir;
use crate::ir::immediates::{Ieee32, Ieee64};
use crate::ir::{KnownSymbol, MemFlags};
use crate::ir::KnownSymbol;
use crate::isa::x64::encoding::evex::{EvexInstruction, EvexVectorLength, RegisterOrAmode};
use crate::isa::x64::encoding::rex::{
emit_simm, emit_std_enc_enc, emit_std_enc_mem, emit_std_reg_mem, emit_std_reg_reg, int_reg_enc,
@ -1628,18 +1628,7 @@ pub(crate) fn emit(
callee,
info: call_info,
} => {
emit_return_call_common_sequence(
allocs,
sink,
info,
state,
call_info.new_stack_arg_size,
call_info.old_stack_arg_size,
call_info.ret_addr,
call_info.fp,
call_info.tmp,
&call_info.uses,
);
emit_return_call_common_sequence(allocs, sink, info, state, &call_info.uses);
// Finally, jump to the callee!
//
@ -1660,18 +1649,7 @@ pub(crate) fn emit(
} => {
let callee = callee.with_allocs(allocs);
emit_return_call_common_sequence(
allocs,
sink,
info,
state,
call_info.new_stack_arg_size,
call_info.old_stack_arg_size,
call_info.ret_addr,
call_info.fp,
call_info.tmp,
&call_info.uses,
);
emit_return_call_common_sequence(allocs, sink, info, state, &call_info.uses);
Inst::JmpUnknown { target: callee }.emit(&[], sink, info, state);
sink.add_call_site(ir::Opcode::ReturnCallIndirect);
@ -1722,6 +1700,130 @@ pub(crate) fn emit(
}
}
Inst::GrowArgumentArea { amount, tmp } => {
debug_assert!(*amount > 0);
debug_assert_eq!(*amount % 8, 0);
assert!(
info.flags.preserve_frame_pointers(),
"frame pointers must be enabled for GrowArgumentArea"
);
let tmp = allocs.next(tmp.to_reg().to_reg());
let tmp = Gpr::new(tmp).unwrap();
let tmp_w = WritableGpr::from_reg(tmp);
// As we're increasing the number of stack arguments, we need to move the frame down in
// memory, by decrementing SP by `amount` and looping from lower addresses to higher
// ones, copying down.
// Decrement SP and FP by `amount`
Inst::alu_rmi_r(
OperandSize::Size64,
AluRmiROpcode::Sub,
RegMemImm::imm(*amount),
Writable::from_reg(regs::rsp()),
)
.emit(&[], sink, info, state);
Inst::alu_rmi_r(
OperandSize::Size64,
AluRmiROpcode::Sub,
RegMemImm::imm(*amount),
Writable::from_reg(regs::rbp()),
)
.emit(&[], sink, info, state);
// The total size that we're going to copy, including the return address and frame
// pointer that are pushed on the stack already.
let size = i32::try_from(state.nominal_sp_to_fp()).unwrap()
+ i32::try_from(state.frame_layout().setup_area_size).unwrap();
debug_assert_eq!(size % 8, 0);
// Copy the `i`th word in the stack from `SP + amount + i * 8` to `SP + i * 8`. Do this
// from lower to higher addresses to avoid clobbering words we haven't copied yet.
for sp_word_offset in 0..(size / 8) {
let sp_byte_offset = sp_word_offset * 8;
Inst::Mov64MR {
src: SyntheticAmode::nominal_sp_offset(
sp_byte_offset + i32::try_from(*amount).unwrap(),
),
dst: tmp_w,
}
.emit(&[], sink, info, state);
Inst::MovRM {
size: OperandSize::Size64,
src: tmp,
dst: SyntheticAmode::nominal_sp_offset(sp_byte_offset),
}
.emit(&[], sink, info, state);
}
}
Inst::ShrinkArgumentArea { amount, tmp } => {
debug_assert!(*amount > 0);
debug_assert_eq!(*amount % 8, 0);
assert!(
info.flags.preserve_frame_pointers(),
"frame pointers must be enabled for ShrinkArgumentArea"
);
let tmp = allocs.next(tmp.to_reg().to_reg());
let tmp = Gpr::new(tmp).unwrap();
let tmp_w = WritableGpr::from_reg(tmp);
// As we're decreasing the number of stack arguments, we need to move the frame up in
// memory, looping from higher addresses to lower ones copying up, and finally
// incrementing `SP` by `amount`.
// The total size that we're going to copy, including the return address and frame
// pointer that are pushed on the stack alreadcy.
let size = i32::try_from(state.nominal_sp_to_fp()).unwrap()
+ i32::try_from(state.frame_layout().setup_area_size).unwrap();
debug_assert_eq!(size % 8, 0);
// Copy the `i`th word in the stack from `SP + i * 8` to `SP + amount + i * 8`. Do this
// from higher to lower addresses to avoid clobbering words we haven't copied yet.
for sp_word_offset in (0..(size / 8)).rev() {
let sp_byte_offset = sp_word_offset * 8;
Inst::Mov64MR {
src: SyntheticAmode::nominal_sp_offset(sp_byte_offset),
dst: tmp_w,
}
.emit(&[], sink, info, state);
Inst::MovRM {
size: OperandSize::Size64,
src: tmp,
dst: SyntheticAmode::nominal_sp_offset(
sp_byte_offset + i32::try_from(*amount).unwrap(),
),
}
.emit(&[], sink, info, state);
}
// Increment SP by `amount`
Inst::alu_rmi_r(
OperandSize::Size64,
AluRmiROpcode::Add,
RegMemImm::imm(*amount),
Writable::from_reg(regs::rsp()),
)
.emit(&[], sink, info, state);
// Increment FP by `amount`
Inst::alu_rmi_r(
OperandSize::Size64,
AluRmiROpcode::Add,
RegMemImm::imm(*amount),
Writable::from_reg(regs::rbp()),
)
.emit(&[], sink, info, state);
}
Inst::Args { .. } => {}
Inst::Rets { .. } => {}
@ -4252,11 +4354,6 @@ fn emit_return_call_common_sequence(
sink: &mut MachBuffer<Inst>,
info: &EmitInfo,
state: &mut EmitState,
new_stack_arg_size: u32,
old_stack_arg_size: u32,
ret_addr: Option<Gpr>,
fp: Gpr,
tmp: WritableGpr,
uses: &CallArgList,
) {
assert!(
@ -4269,124 +4366,18 @@ fn emit_return_call_common_sequence(
let _ = allocs.next(u.vreg);
}
let ret_addr = ret_addr.map(|r| Gpr::new(allocs.next(*r)).unwrap());
let fp = allocs.next(*fp);
let tmp = allocs.next(tmp.to_reg().to_reg());
let tmp = Gpr::new(tmp).unwrap();
let tmp_w = WritableGpr::from_reg(tmp);
// Copy the new frame (which is `frame_size` bytes above the SP)
// onto our current frame, using only volatile, non-argument
// registers.
//
//
// The current stack layout is the following:
//
// | ... |
// +---------------------+
// | ... |
// | stack arguments |
// | ... |
// current | return address |
// frame | old FP | <-- FP
// | ... |
// | old stack slots |
// | ... |
// +---------------------+
// | ... |
// new | new stack arguments |
// frame | ... | <-- SP
// +---------------------+
//
// We need to restore the old FP, copy the new stack arguments over the old
// stack arguments, write the return address into the correct slot just
// after the new stack arguments, adjust SP to point to the new return
// address, and then jump to the callee (which will push the old FP again).
// Restore the old FP into `rbp`.
Inst::Mov64MR {
src: SyntheticAmode::Real(Amode::ImmReg {
simm32: 0,
base: fp,
flags: MemFlags::trusted(),
}),
dst: Writable::from_reg(Gpr::new(regs::rbp()).unwrap()),
}
.emit(&[], sink, info, state);
// The new lowest address (top of stack) -- relative to FP -- for
// our tail callee. We compute this now so that we can move our
// stack arguments into place.
let callee_sp_relative_to_fp = i64::from(old_stack_arg_size) - i64::from(new_stack_arg_size);
// Copy over each word, using `tmp` as a temporary register.
//
// Note that we have to do this from stack slots with the highest
// address to lowest address because in the case of when the tail
// callee has more stack arguments than we do, we might otherwise
// overwrite some of our stack arguments before they've been copied
// into place.
assert_eq!(
new_stack_arg_size % 8,
0,
"stack argument space sizes should always be 8-byte aligned"
);
for i in (0..new_stack_arg_size / 8).rev() {
Inst::Mov64MR {
src: SyntheticAmode::Real(Amode::ImmReg {
simm32: (i * 8).try_into().unwrap(),
base: regs::rsp(),
flags: MemFlags::trusted(),
}),
dst: tmp_w,
}
.emit(&[], sink, info, state);
Inst::MovRM {
size: OperandSize::Size64,
src: tmp,
dst: SyntheticAmode::Real(Amode::ImmReg {
// Add 2 because we need to skip over the old FP and the
// return address.
simm32: (callee_sp_relative_to_fp + i64::from((i + 2) * 8))
.try_into()
.unwrap(),
base: fp,
flags: MemFlags::trusted(),
}),
}
.emit(&[], sink, info, state);
}
// Initialize SP for the tail callee, deallocating the temporary
// stack arguments space at the same time.
Inst::LoadEffectiveAddress {
size: OperandSize::Size64,
addr: SyntheticAmode::Real(Amode::ImmReg {
// NB: We add a word to `callee_sp_relative_to_fp` here because the
// callee will push FP, not us.
simm32: callee_sp_relative_to_fp.wrapping_add(8).try_into().unwrap(),
base: fp,
flags: MemFlags::trusted(),
}),
dst: Writable::from_reg(Gpr::new(regs::rsp()).unwrap()),
for inst in
X64ABIMachineSpec::gen_clobber_restore(CallConv::Tail, &info.flags, state.frame_layout())
{
inst.emit(&[], sink, info, state);
}
.emit(&[], sink, info, state);
state.adjust_virtual_sp_offset(-i64::from(new_stack_arg_size));
// Write the return address into the correct stack slot.
if let Some(ret_addr) = ret_addr {
Inst::MovRM {
size: OperandSize::Size64,
src: ret_addr,
dst: SyntheticAmode::Real(Amode::ImmReg {
simm32: 0,
base: regs::rsp(),
flags: MemFlags::trusted(),
}),
}
.emit(&[], sink, info, state);
for inst in X64ABIMachineSpec::gen_epilogue_frame_restore(
CallConv::Tail,
&info.flags,
&info.isa_flags,
state.frame_layout(),
) {
inst.emit(&[], sink, info, state);
}
}

9
cranelift/codegen/src/isa/x64/inst/emit_state.rs

@ -14,6 +14,10 @@ pub struct EmitState {
/// Only used during fuzz-testing. Otherwise, it is a zero-sized struct and
/// optimized away at compiletime. See [cranelift_control].
ctrl_plane: ControlPlane,
/// A copy of the frame layout, used during the emission of `Inst::ReturnCallKnown` and
/// `Inst::ReturnCallUnknown` instructions.
frame_layout: FrameLayout,
}
impl MachInstEmitState<Inst> for EmitState {
@ -23,6 +27,7 @@ impl MachInstEmitState<Inst> for EmitState {
nominal_sp_to_fp: abi.frame_size() as i64,
stack_map: None,
ctrl_plane,
frame_layout: abi.frame_layout().clone(),
}
}
@ -62,4 +67,8 @@ impl EmitState {
pub(crate) fn nominal_sp_to_fp(&self) -> i64 {
self.nominal_sp_to_fp
}
pub(crate) fn frame_layout(&self) -> &FrameLayout {
&self.frame_layout
}
}

105
cranelift/codegen/src/isa/x64/inst/mod.rs

@ -52,20 +52,6 @@ pub struct CallInfo {
/// Out-of-line data for return-calls, to keep the size of `Inst` down.
#[derive(Clone, Debug)]
pub struct ReturnCallInfo {
/// The size of the new stack frame's stack arguments. This is necessary
/// for copying the frame over our current frame. It must already be
/// allocated on the stack.
pub new_stack_arg_size: u32,
/// The size of the current/old stack frame's stack arguments.
pub old_stack_arg_size: u32,
/// The return address. Needs to be written into the correct stack slot
/// after the new stack frame is copied into place.
pub ret_addr: Option<Gpr>,
/// A copy of the frame pointer, because we will overwrite the current
/// `rbp`.
pub fp: Gpr,
/// A temporary register.
pub tmp: WritableGpr,
/// The in-register arguments and their constraints.
pub uses: CallArgList,
}
@ -138,6 +124,8 @@ impl Inst {
| Inst::Pop64 { .. }
| Inst::Push64 { .. }
| Inst::StackProbeLoop { .. }
| Inst::GrowArgumentArea { .. }
| Inst::ShrinkArgumentArea { .. }
| Inst::Args { .. }
| Inst::Rets { .. }
| Inst::Ret { .. }
@ -1675,26 +1663,8 @@ impl PrettyPrint for Inst {
}
Inst::ReturnCallKnown { callee, info } => {
let ReturnCallInfo {
new_stack_arg_size,
old_stack_arg_size,
ret_addr,
fp,
tmp,
uses,
} = &**info;
let ret_addr = ret_addr.map(|r| regs::show_reg(*r));
let fp = regs::show_reg(fp.to_reg());
let tmp = regs::show_reg(tmp.to_reg().to_reg());
let mut s = format!(
"return_call_known \
{callee:?} \
new_stack_arg_size:{new_stack_arg_size} \
old_stack_arg_size:{old_stack_arg_size} \
ret_addr:{ret_addr:?} \
fp:{fp} \
tmp:{tmp}"
);
let ReturnCallInfo { uses } = &**info;
let mut s = format!("return_call_known {callee:?}");
for ret in uses {
let preg = regs::show_reg(ret.preg);
let vreg = pretty_print_reg(ret.vreg, 8, allocs);
@ -1704,27 +1674,9 @@ impl PrettyPrint for Inst {
}
Inst::ReturnCallUnknown { callee, info } => {
let ReturnCallInfo {
new_stack_arg_size,
old_stack_arg_size,
ret_addr,
fp,
tmp,
uses,
} = &**info;
let ReturnCallInfo { uses } = &**info;
let callee = callee.pretty_print(8, allocs);
let ret_addr = ret_addr.map(|r| regs::show_reg(*r));
let fp = regs::show_reg(fp.to_reg());
let tmp = regs::show_reg(tmp.to_reg().to_reg());
let mut s = format!(
"return_call_unknown \
{callee} \
new_stack_arg_size:{new_stack_arg_size} \
old_stack_arg_size:{old_stack_arg_size} \
ret_addr:{ret_addr:?} \
fp:{fp} \
tmp:{tmp}"
);
let mut s = format!("return_call_unknown {callee}");
for ret in uses {
let preg = regs::show_reg(ret.preg);
let vreg = pretty_print_reg(ret.vreg, 8, allocs);
@ -1733,6 +1685,18 @@ impl PrettyPrint for Inst {
s
}
Inst::GrowArgumentArea { amount, tmp } => {
let amount = *amount;
let tmp = pretty_print_reg(tmp.to_reg().to_reg(), 8, allocs);
format!("grow_argument_area {amount} {tmp}")
}
Inst::ShrinkArgumentArea { amount, tmp } => {
let amount = *amount;
let tmp = pretty_print_reg(tmp.to_reg().to_reg(), 8, allocs);
format!("shrink_argument_area {amount} {tmp}")
}
Inst::Args { args } => {
let mut s = "args".to_string();
for arg in args {
@ -2365,11 +2329,6 @@ fn x64_get_operands<F: Fn(VReg) -> VReg>(inst: &Inst, collector: &mut OperandCol
Inst::CallUnknown { info, dest, .. } => {
let info = info.as_ref().expect("CallInfo is expected in this path");
match dest {
RegMem::Reg { reg } if info.callee_conv == CallConv::Tail => {
// TODO(https://github.com/bytecodealliance/regalloc2/issues/145):
// This shouldn't be a fixed register constraint.
collector.reg_fixed_use(*reg, regs::r15())
}
RegMem::Reg { reg } if info.callee_conv == CallConv::Winch => {
// TODO(https://github.com/bytecodealliance/regalloc2/issues/145):
// This shouldn't be a fixed register constraint.
@ -2387,42 +2346,24 @@ fn x64_get_operands<F: Fn(VReg) -> VReg>(inst: &Inst, collector: &mut OperandCol
}
Inst::ReturnCallKnown { callee, info } => {
let ReturnCallInfo {
ret_addr,
fp,
tmp,
uses,
..
} = &**info;
let ReturnCallInfo { uses } = &**info;
// Same as in the `Inst::CallKnown` branch.
debug_assert_ne!(*callee, ExternalName::LibCall(LibCall::Probestack));
for u in uses {
collector.reg_fixed_use(u.vreg, u.preg);
}
if let Some(ret_addr) = ret_addr {
collector.reg_use(**ret_addr);
}
collector.reg_use(**fp);
collector.reg_early_def(tmp.to_writable_reg());
}
Inst::ReturnCallUnknown { callee, info } => {
let ReturnCallInfo {
ret_addr,
fp,
tmp,
uses,
..
} = &**info;
let ReturnCallInfo { uses } = &**info;
callee.get_operands(collector);
for u in uses {
collector.reg_fixed_use(u.vreg, u.preg);
}
if let Some(ret_addr) = ret_addr {
collector.reg_use(**ret_addr);
}
collector.reg_use(**fp);
collector.reg_early_def(tmp.to_writable_reg());
Inst::GrowArgumentArea { tmp, .. } | Inst::ShrinkArgumentArea { tmp, .. } => {
collector.reg_def(tmp.to_writable_reg());
}
Inst::JmpTableSeq {

1
cranelift/codegen/src/isa/x64/lower/isle.rs

@ -118,6 +118,7 @@ impl Context for IsleContext<'_, '_, MInst, X64Backend> {
self.lower_ctx.sigs(),
callee_sig,
&callee,
Opcode::ReturnCall,
distance,
caller_conv,
self.backend.flags().clone(),

2
cranelift/codegen/src/isa/x64/pcc.rs

@ -808,6 +808,8 @@ pub(crate) fn check(
| Inst::ReturnCallKnown { .. }
| Inst::JmpKnown { .. }
| Inst::Ret { .. }
| Inst::GrowArgumentArea { .. }
| Inst::ShrinkArgumentArea { .. }
| Inst::JmpIf { .. }
| Inst::JmpCond { .. }
| Inst::TrapIf { .. }

69
cranelift/codegen/src/machinst/abi.rs

@ -987,6 +987,7 @@ impl std::ops::Index<Sig> for SigSet {
}
/// Structure describing the layout of a function's stack frame.
#[derive(Clone, Debug, Default)]
pub struct FrameLayout {
/// N.B. The areas whose sizes are given in this structure fully
/// cover the current function's stack frame, from high to low
@ -1886,7 +1887,7 @@ impl<M: ABIMachineSpec> Callee<M> {
/// This should include any stack frame or other setup necessary to use the
/// other methods (`load_arg`, `store_retval`, and spillslot accesses.)
pub fn gen_prologue(&self) -> SmallInstVec<M::I> {
let frame_layout = self.frame_layout.as_ref().unwrap();
let frame_layout = self.frame_layout();
let mut insts = smallvec![];
// Set up frame.
@ -1953,7 +1954,7 @@ impl<M: ABIMachineSpec> Callee<M> {
/// emitting this in the lowering logic), because the epilogue code comes
/// before the return and the two are likely closely related.
pub fn gen_epilogue(&self) -> SmallInstVec<M::I> {
let frame_layout = self.frame_layout.as_ref().unwrap();
let frame_layout = self.frame_layout();
let mut insts = smallvec![];
// Restore clobbered registers.
@ -1988,25 +1989,27 @@ impl<M: ABIMachineSpec> Callee<M> {
insts
}
/// Return a reference to the computed frame layout information. This
/// function will panic if it's called before [`Self::compute_frame_layout`].
pub fn frame_layout(&self) -> &FrameLayout {
self.frame_layout
.as_ref()
.expect("frame layout not computed before prologue generation")
}
/// Returns the full frame size for the given function, after prologue
/// emission has run. This comprises the spill slots and stack-storage
/// slots as well as storage for clobbered callee-save registers, but
/// not arguments arguments pushed at callsites within this function,
/// or other ephemeral pushes.
pub fn frame_size(&self) -> u32 {
let frame_layout = self
.frame_layout
.as_ref()
.expect("frame size not computed before prologue generation");
let frame_layout = self.frame_layout();
frame_layout.clobber_size + frame_layout.fixed_frame_storage_size
}
/// Returns offset from the nominal SP to caller's SP.
pub fn nominal_sp_to_caller_sp_offset(&self) -> u32 {
let frame_layout = self
.frame_layout
.as_ref()
.expect("frame size not computed before prologue generation");
let frame_layout = self.frame_layout();
frame_layout.clobber_size
+ frame_layout.fixed_frame_storage_size
+ frame_layout.setup_area_size
@ -2068,8 +2071,14 @@ impl<M: ABIMachineSpec> Callee<M> {
/// The register or stack slot location of an argument.
#[derive(Clone, Debug)]
pub enum ArgLoc {
/// The physical register that the value will be passed through.
Reg(PReg),
Stack(StackAMode),
/// The offset into the argument area where this value will be passed. It's up to the consumer
/// of the `ArgLoc::Stack` variant to decide how to find the argument area that the `offset`
/// value is relative to. Depending on the abi, this may end up being relative to SP or FP, for
/// example with a tail call where the frame is reused.
Stack { offset: i64, ty: ir::Type },
}
/// An input argument to a call instruction: the vreg that is used,
@ -2133,6 +2142,7 @@ impl<M: ABIMachineSpec> CallSite<M> {
sigs: &SigSet,
sig_ref: ir::SigRef,
extname: &ir::ExternalName,
opcode: ir::Opcode,
dist: RelocDistance,
caller_conv: isa::CallConv,
flags: settings::Flags,
@ -2145,7 +2155,7 @@ impl<M: ABIMachineSpec> CallSite<M> {
defs: smallvec![],
clobbers,
dest: CallDest::ExtName(extname.clone(), dist),
opcode: ir::Opcode::Call,
opcode,
caller_conv,
flags,
_mach: PhantomData,
@ -2213,6 +2223,17 @@ impl<M: ABIMachineSpec> CallSite<M> {
pub(crate) fn take_uses(self) -> CallArgList {
self.uses
}
pub(crate) fn sig<'a>(&self, sigs: &'a SigSet) -> &'a SigData {
&sigs[self.sig]
}
pub(crate) fn is_tail_call(&self) -> bool {
matches!(
self.opcode,
ir::Opcode::ReturnCall | ir::Opcode::ReturnCallIndirect
)
}
}
fn adjust_stack_and_nominal_sp<M: ABIMachineSpec>(ctx: &mut Lower<M::I>, amount: i32) {
@ -2329,7 +2350,22 @@ impl<M: ABIMachineSpec> CallSite<M> {
vreg,
preg: preg.into(),
}),
ArgLoc::Stack(amode) => ctx.emit(M::gen_store_stack(amode, vreg, amode.get_type())),
ArgLoc::Stack { offset, ty } => {
let amode = if self.is_tail_call() {
assert!(
self.flags.preserve_frame_pointers(),
"tail calls require frame pointers to be enabled"
);
StackAMode::FPOffset(
offset + M::fp_to_arg_offset(self.caller_conv, &self.flags),
ty,
)
} else {
StackAMode::SPOffset(offset, ty)
};
ctx.emit(M::gen_store_stack(amode, vreg, ty))
}
}
}
}
@ -2416,10 +2452,7 @@ impl<M: ABIMachineSpec> CallSite<M> {
} else {
(*from_reg, ty)
};
locs.push((
data.into(),
ArgLoc::Stack(StackAMode::SPOffset(offset, ty)),
));
locs.push((data.into(), ArgLoc::Stack { offset, ty }));
}
}
}
@ -2444,7 +2477,7 @@ impl<M: ABIMachineSpec> CallSite<M> {
ABIArgSlot::Reg { reg, .. } => ArgLoc::Reg(reg.into()),
ABIArgSlot::Stack { offset, .. } => {
let ty = M::word_type();
ArgLoc::Stack(StackAMode::SPOffset(offset, ty))
ArgLoc::Stack { offset, ty }
}
};
locs.push((tmp.into(), loc));

1
cranelift/codegen/src/machinst/isle.rs

@ -758,6 +758,7 @@ macro_rules! isle_prelude_caller_methods {
self.lower_ctx.sigs(),
sig_ref,
&extname,
Opcode::Call,
dist,
caller_conv,
self.backend.flags().clone(),

52
cranelift/filetests/filetests/isa/x64/fuzzbug-60035.clif

@ -15,25 +15,14 @@ block0:
; VCode:
; pushq %rbp
; movq %rsp, %rbp
; subq %rsp, $64, %rsp
; movq %rbx, 16(%rsp)
; movq %r12, 24(%rsp)
; movq %r13, 32(%rsp)
; movq %r14, 40(%rsp)
; movq %r15, 48(%rsp)
; subq %rsp, $16, %rsp
; movq %rbx, 0(%rsp)
; block0:
; load_ext_name userextname0+0, %r15
; movq %r15, rsp(0 + virtual offset)
; movq rsp(0 + virtual offset), %r15
; call *%r15
; movq rsp(0 + virtual offset), %r15
; call *%r15
; movq 16(%rsp), %rbx
; movq 24(%rsp), %r12
; movq 32(%rsp), %r13
; movq 40(%rsp), %r14
; movq 48(%rsp), %r15
; addq %rsp, $64, %rsp
; load_ext_name userextname0+0, %rbx
; call *%rbx
; call *%rbx
; movq 0(%rsp), %rbx
; addq %rsp, $16, %rsp
; movq %rbp, %rsp
; popq %rbp
; ret
@ -42,25 +31,14 @@ block0:
; block0: ; offset 0x0
; pushq %rbp
; movq %rsp, %rbp
; subq $0x40, %rsp
; movq %rbx, 0x10(%rsp)
; movq %r12, 0x18(%rsp)
; movq %r13, 0x20(%rsp)
; movq %r14, 0x28(%rsp)
; movq %r15, 0x30(%rsp)
; block1: ; offset 0x21
; movabsq $0, %r15 ; reloc_external Abs8 u1:7 0
; movq %r15, (%rsp)
; movq (%rsp), %r15
; callq *%r15
; movq (%rsp), %r15
; callq *%r15
; movq 0x10(%rsp), %rbx
; movq 0x18(%rsp), %r12
; movq 0x20(%rsp), %r13
; movq 0x28(%rsp), %r14
; movq 0x30(%rsp), %r15
; addq $0x40, %rsp
; subq $0x10, %rsp
; movq %rbx, (%rsp)
; block1: ; offset 0xc
; movabsq $0, %rbx ; reloc_external Abs8 u1:7 0
; callq *%rbx
; callq *%rbx
; movq (%rsp), %rbx
; addq $0x10, %rsp
; movq %rbp, %rsp
; popq %rbp
; retq

64
cranelift/filetests/filetests/isa/x64/return-call-indirect.clif

@ -14,7 +14,7 @@ block0(v0: i64):
; pushq %rbp
; movq %rsp, %rbp
; block0:
; lea 10(%rax), %rax
; lea 10(%rdi), %rax
; movq %rbp, %rsp
; popq %rbp
; ret
@ -24,7 +24,7 @@ block0(v0: i64):
; pushq %rbp
; movq %rsp, %rbp
; block1: ; offset 0x4
; addq $0xa, %rax
; leaq 0xa(%rdi), %rax
; movq %rbp, %rsp
; popq %rbp
; retq
@ -42,20 +42,18 @@ block0(v0: i64):
; pushq %rbp
; movq %rsp, %rbp
; block0:
; load_ext_name %callee_i64+0, %rdx
; movq %rbp, %rcx
; return_call_unknown %rdx new_stack_arg_size:0 old_stack_arg_size:0 ret_addr:None fp:%v194 tmp:%v195 %rax=%rax
; load_ext_name %callee_i64+0, %rax
; return_call_unknown %rax %rdi=%rdi
;
; Disassembled:
; block0: ; offset 0x0
; pushq %rbp
; movq %rsp, %rbp
; block1: ; offset 0x4
; movabsq $0, %rdx ; reloc_external Abs8 %callee_i64 0
; movq %rbp, %rcx
; movq (%rcx), %rbp
; leaq 8(%rcx), %rsp
; jmpq *%rdx
; movabsq $0, %rax ; reloc_external Abs8 %callee_i64 0
; movq %rbp, %rsp
; popq %rbp
; jmpq *%rax
;;;; Test colocated tail calls ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
@ -72,20 +70,18 @@ block0(v0: i64):
; pushq %rbp
; movq %rsp, %rbp
; block0:
; load_ext_name %callee_i64+0, %rdx
; movq %rbp, %rcx
; return_call_unknown %rdx new_stack_arg_size:0 old_stack_arg_size:0 ret_addr:None fp:%v194 tmp:%v195 %rax=%rax
; load_ext_name %callee_i64+0, %rax
; return_call_unknown %rax %rdi=%rdi
;
; Disassembled:
; block0: ; offset 0x0
; pushq %rbp
; movq %rsp, %rbp
; block1: ; offset 0x4
; leaq (%rip), %rdx ; reloc_external CallPCRel4 %callee_i64 -4
; movq %rbp, %rcx
; movq (%rcx), %rbp
; leaq 8(%rcx), %rsp
; jmpq *%rdx
; leaq (%rip), %rax ; reloc_external CallPCRel4 %callee_i64 -4
; movq %rbp, %rsp
; popq %rbp
; jmpq *%rax
;;;; Test passing `f64`s ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
@ -143,20 +139,18 @@ block0(v0: f64):
; pushq %rbp
; movq %rsp, %rbp
; block0:
; load_ext_name %callee_f64+0, %rdx
; movq %rbp, %rcx
; return_call_unknown %rdx new_stack_arg_size:0 old_stack_arg_size:0 ret_addr:None fp:%v194 tmp:%v195 %xmm0=%xmm0
; load_ext_name %callee_f64+0, %rax
; return_call_unknown %rax %xmm0=%xmm0
;
; Disassembled:
; block0: ; offset 0x0
; pushq %rbp
; movq %rsp, %rbp
; block1: ; offset 0x4
; movabsq $0, %rdx ; reloc_external Abs8 %callee_f64 0
; movq %rbp, %rcx
; movq (%rcx), %rbp
; leaq 8(%rcx), %rsp
; jmpq *%rdx
; movabsq $0, %rax ; reloc_external Abs8 %callee_f64 0
; movq %rbp, %rsp
; popq %rbp
; jmpq *%rax
;;;; Test passing `i8`s ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
@ -171,7 +165,7 @@ block0(v0: i8):
; pushq %rbp
; movq %rsp, %rbp
; block0:
; testb %al, %al
; testb %dil, %dil
; setz %al
; movq %rbp, %rsp
; popq %rbp
@ -182,7 +176,7 @@ block0(v0: i8):
; pushq %rbp
; movq %rsp, %rbp
; block1: ; offset 0x4
; testb %al, %al
; testb %dil, %dil
; sete %al
; movq %rbp, %rsp
; popq %rbp
@ -201,18 +195,16 @@ block0(v0: i8):
; pushq %rbp
; movq %rsp, %rbp
; block0:
; load_ext_name %callee_i8+0, %rdx
; movq %rbp, %rcx
; return_call_unknown %rdx new_stack_arg_size:0 old_stack_arg_size:0 ret_addr:None fp:%v194 tmp:%v195 %rax=%rax
; load_ext_name %callee_i8+0, %rax
; return_call_unknown %rax %rdi=%rdi
;
; Disassembled:
; block0: ; offset 0x0
; pushq %rbp
; movq %rsp, %rbp
; block1: ; offset 0x4
; movabsq $0, %rdx ; reloc_external Abs8 %callee_i8 0
; movq %rbp, %rcx
; movq (%rcx), %rbp
; leaq 8(%rcx), %rsp
; jmpq *%rdx
; movabsq $0, %rax ; reloc_external Abs8 %callee_i8 0
; movq %rbp, %rsp
; popq %rbp
; jmpq *%rax

680
cranelift/filetests/filetests/isa/x64/return-call.clif

@ -14,7 +14,7 @@ block0(v0: i64):
; pushq %rbp
; movq %rsp, %rbp
; block0:
; lea 10(%rax), %rax
; lea 10(%rdi), %rax
; movq %rbp, %rsp
; popq %rbp
; ret
@ -24,7 +24,7 @@ block0(v0: i64):
; pushq %rbp
; movq %rsp, %rbp
; block1: ; offset 0x4
; addq $0xa, %rax
; leaq 0xa(%rdi), %rax
; movq %rbp, %rsp
; popq %rbp
; retq
@ -40,20 +40,18 @@ block0(v0: i64):
; pushq %rbp
; movq %rsp, %rbp
; block0:
; movq %rbp, %rcx
; load_ext_name %callee_i64+0, %r8
; return_call_unknown %r8 new_stack_arg_size:0 old_stack_arg_size:0 ret_addr:None fp:%v193 tmp:%v194 %rax=%rax
; load_ext_name %callee_i64+0, %rax
; return_call_unknown %rax %rdi=%rdi
;
; Disassembled:
; block0: ; offset 0x0
; pushq %rbp
; movq %rsp, %rbp
; block1: ; offset 0x4
; movq %rbp, %rcx
; movabsq $0, %r8 ; reloc_external Abs8 %callee_i64 0
; movq (%rcx), %rbp
; leaq 8(%rcx), %rsp
; jmpq *%r8
; movabsq $0, %rax ; reloc_external Abs8 %callee_i64 0
; movq %rbp, %rsp
; popq %rbp
; jmpq *%rax
;;;; Test colocated tail calls ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
@ -68,18 +66,16 @@ block0(v0: i64):
; pushq %rbp
; movq %rsp, %rbp
; block0:
; movq %rbp, %rcx
; return_call_known TestCase(%callee_i64) new_stack_arg_size:0 old_stack_arg_size:0 ret_addr:None fp:%v193 tmp:%v194 %rax=%rax
; return_call_known TestCase(%callee_i64) %rdi=%rdi
;
; Disassembled:
; block0: ; offset 0x0
; pushq %rbp
; movq %rsp, %rbp
; block1: ; offset 0x4
; movq %rbp, %rcx
; movq (%rcx), %rbp
; leaq 8(%rcx), %rsp
; jmp 0x13 ; reloc_external CallPCRel4 %callee_i64 -4
; movq %rbp, %rsp
; popq %rbp
; jmp 0xd ; reloc_external CallPCRel4 %callee_i64 -4
;;;; Test passing `f64`s ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
@ -135,20 +131,18 @@ block0(v0: f64):
; pushq %rbp
; movq %rsp, %rbp
; block0:
; movq %rbp, %rax
; load_ext_name %callee_f64+0, %r8
; return_call_unknown %r8 new_stack_arg_size:0 old_stack_arg_size:0 ret_addr:None fp:%v193 tmp:%v194 %xmm0=%xmm0
; load_ext_name %callee_f64+0, %rax
; return_call_unknown %rax %xmm0=%xmm0
;
; Disassembled:
; block0: ; offset 0x0
; pushq %rbp
; movq %rsp, %rbp
; block1: ; offset 0x4
; movq %rbp, %rax
; movabsq $0, %r8 ; reloc_external Abs8 %callee_f64 0
; movq (%rax), %rbp
; leaq 8(%rax), %rsp
; jmpq *%r8
; movabsq $0, %rax ; reloc_external Abs8 %callee_f64 0
; movq %rbp, %rsp
; popq %rbp
; jmpq *%rax
;;;; Test passing `i8`s ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
@ -163,7 +157,7 @@ block0(v0: i8):
; pushq %rbp
; movq %rsp, %rbp
; block0:
; testb %al, %al
; testb %dil, %dil
; setz %al
; movq %rbp, %rsp
; popq %rbp
@ -174,7 +168,7 @@ block0(v0: i8):
; pushq %rbp
; movq %rsp, %rbp
; block1: ; offset 0x4
; testb %al, %al
; testb %dil, %dil
; sete %al
; movq %rbp, %rsp
; popq %rbp
@ -191,20 +185,177 @@ block0(v0: i8):
; pushq %rbp
; movq %rsp, %rbp
; block0:
; movq %rbp, %rcx
; load_ext_name %callee_i8+0, %r8
; return_call_unknown %r8 new_stack_arg_size:0 old_stack_arg_size:0 ret_addr:None fp:%v193 tmp:%v194 %rax=%rax
; load_ext_name %callee_i8+0, %rax
; return_call_unknown %rax %rdi=%rdi
;
; Disassembled:
; block0: ; offset 0x0
; pushq %rbp
; movq %rsp, %rbp
; block1: ; offset 0x4
; movabsq $0, %rax ; reloc_external Abs8 %callee_i8 0
; movq %rbp, %rsp
; popq %rbp
; jmpq *%rax
;;;; Test passing fewer arguments on the stack ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
function %one_stack_arg(i32, i32, i32, i32, i32, i32, i32) tail {
block0(v0: i32, v1: i32, v2: i32, v3: i32, v4: i32, v5: i32, v6: i32):
return
}
; VCode:
; pushq %rbp
; movq %rsp, %rbp
; block0:
; movq 16(%rbp), %r10
; movq %rbp, %rsp
; popq %rbp
; ret 16
;
; Disassembled:
; block0: ; offset 0x0
; pushq %rbp
; movq %rsp, %rbp
; block1: ; offset 0x4
; movq 0x10(%rbp), %r10
; movq %rbp, %rsp
; popq %rbp
; retq $0x10
function %call_one_stack_arg(i32, i32, i32, i32, i32, i32, i32, i32, i32) tail {
fn0 = colocated %one_stack_arg(i32, i32, i32, i32, i32, i32, i32) tail
block0(v0: i32, v1: i32, v2: i32, v3: i32, v4: i32, v5: i32, v6: i32, v7: i32, v8: i32):
return_call fn0(v2, v3, v4, v5, v6, v7, v8)
}
; VCode:
; pushq %rbp
; movq %rsp, %rbp
; block0:
; movq %r8, %r10
; movq %rdx, %rdi
; movq %rcx, %rsi
; movq %r9, %rcx
; movq 16(%rbp), %r8
; movq 24(%rbp), %r9
; movq 32(%rbp), %rax
; shrink_argument_area 16 %rdx
; movl %eax, 16(%rbp)
; movq %r10, %rdx
; return_call_known TestCase(%one_stack_arg) %rdi=%rdi %rsi=%rsi %rdx=%rdx %rcx=%rcx %r8=%r8 %r9=%r9
;
; Disassembled:
; block0: ; offset 0x0
; pushq %rbp
; movq %rsp, %rbp
; block1: ; offset 0x4
; movq %r8, %r10
; movq %rdx, %rdi
; movq %rcx, %rsi
; movq %r9, %rcx
; movq 0x10(%rbp), %r8
; movq 0x18(%rbp), %r9
; movq 0x20(%rbp), %rax
; movq 8(%rsp), %rdx
; movq %rdx, 0x18(%rsp)
; movq (%rsp), %rdx
; movq %rdx, 0x10(%rsp)
; addq $0x10, %rsp
; addq $0x10, %rbp
; movl %eax, 0x10(%rbp)
; movq %r10, %rdx
; movq %rbp, %rsp
; popq %rbp
; jmp 0x46 ; reloc_external CallPCRel4 %one_stack_arg -4
function %call_zero_stack_args(i32, i32, i32, i32, i32, i32, i32, i32, i8) -> i8 tail {
fn0 = colocated %callee_i8(i8) -> i8 tail
block0(v0: i32, v1: i32, v2: i32, v3: i32, v4: i32, v5: i32, v6: i32, v7: i32, v8: i8):
return_call fn0(v8)
}
; VCode:
; pushq %rbp
; movq %rsp, %rbp
; block0:
; movq 16(%rbp), %r10
; movq 24(%rbp), %rsi
; movq 32(%rbp), %rdi
; shrink_argument_area 32 %rdx
; return_call_known TestCase(%callee_i8) %rdi=%rdi
;
; Disassembled:
; block0: ; offset 0x0
; pushq %rbp
; movq %rsp, %rbp
; block1: ; offset 0x4
; movq 0x10(%rbp), %r10
; movq 0x18(%rbp), %rsi
; movq 0x20(%rbp), %rdi
; movq 8(%rsp), %rdx
; movq %rdx, 0x28(%rsp)
; movq (%rsp), %rdx
; movq %rdx, 0x20(%rsp)
; addq $0x20, %rsp
; addq $0x20, %rbp
; movq %rbp, %rsp
; popq %rbp
; jmp 0x34 ; reloc_external CallPCRel4 %callee_i8 -4
;;;; Test growing the argument area when it's non-empty ;;;;;;;;;;;;;;;;;;;;;;;;
function %call_from_one_stack_arg(i32, i32, i32, i32, i32, i32, i32) tail {
fn0 = colocated %call_one_stack_arg(i32, i32, i32, i32, i32, i32, i32, i32, i32) tail
block0(v0: i32, v1: i32, v2: i32, v3: i32, v4: i32, v5: i32, v6: i32):
return_call fn0(v1, v2, v3, v4, v5, v6, v0, v0, v1)
}
; VCode:
; pushq %rbp
; movq %rsp, %rbp
; block0:
; movq %rdx, %r10
; movq %rcx, %rdx
; movq %r8, %rcx
; movq %r9, %r8
; movq 16(%rbp), %r9
; grow_argument_area 16 %rax
; movl %edi, 16(%rbp)
; movl %edi, 24(%rbp)
; movl %esi, 32(%rbp)
; movq %rsi, %rdi
; movq %r10, %rsi
; return_call_known TestCase(%call_one_stack_arg) %rdi=%rdi %rsi=%rsi %rdx=%rdx %rcx=%rcx %r8=%r8 %r9=%r9
;
; Disassembled:
; block0: ; offset 0x0
; pushq %rbp
; movq %rsp, %rbp
; block1: ; offset 0x4
; movq %rbp, %rcx
; movabsq $0, %r8 ; reloc_external Abs8 %callee_i8 0
; movq (%rcx), %rbp
; leaq 8(%rcx), %rsp
; jmpq *%r8
; movq %rdx, %r10
; movq %rcx, %rdx
; movq %r8, %rcx
; movq %r9, %r8
; movq 0x10(%rbp), %r9
; subq $0x10, %rsp
; subq $0x10, %rbp
; movq 0x10(%rsp), %rax
; movq %rax, (%rsp)
; movq 0x18(%rsp), %rax
; movq %rax, 8(%rsp)
; movl %edi, 0x10(%rbp)
; movl %edi, 0x18(%rbp)
; movl %esi, 0x20(%rbp)
; movq %rsi, %rdi
; movq %r10, %rsi
; movq %rbp, %rsp
; popq %rbp
; jmp 0x47 ; reloc_external CallPCRel4 %call_one_stack_arg -4
;;;; Test passing many arguments on stack ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
@ -217,50 +368,58 @@ block0(v0: i64, v1: i64, v2: i64, v3: i64, v4: i64, v5: i64, v6: i64, v7: i64, v
; pushq %rbp
; movq %rsp, %rbp
; block0:
; movq 16(%rbp), %rax
; movq 24(%rbp), %rdx
; movq 32(%rbp), %r9
; movq 40(%rbp), %r11
; movq 48(%rbp), %rdi
; movq 56(%rbp), %rcx
; movq 64(%rbp), %r8
; movq 72(%rbp), %r10
; movq 80(%rbp), %rsi
; movq 88(%rbp), %rax
; movq 96(%rbp), %rdx
; movq 104(%rbp), %r9
; movq 112(%rbp), %r11
; movq 120(%rbp), %rdi
; movq 128(%rbp), %rcx
; movq 136(%rbp), %rax
; movq 16(%rbp), %r10
; movq 24(%rbp), %rsi
; movq 32(%rbp), %rax
; movq 40(%rbp), %rdx
; movq 48(%rbp), %r9
; movq 56(%rbp), %r11
; movq 64(%rbp), %rdi
; movq 72(%rbp), %rcx
; movq 80(%rbp), %r8
; movq 88(%rbp), %r10
; movq 96(%rbp), %rsi
; movq 104(%rbp), %rax
; movq 112(%rbp), %rdx
; movq 120(%rbp), %r9
; movq 128(%rbp), %r11
; movq 136(%rbp), %rdi
; movq 144(%rbp), %rcx
; movq 152(%rbp), %r8
; movq 160(%rbp), %r10
; movq 168(%rbp), %rax
; movq %rbp, %rsp
; popq %rbp
; ret 128
; ret 160
;
; Disassembled:
; block0: ; offset 0x0
; pushq %rbp
; movq %rsp, %rbp
; block1: ; offset 0x4
; movq 0x10(%rbp), %rax
; movq 0x18(%rbp), %rdx
; movq 0x20(%rbp), %r9
; movq 0x28(%rbp), %r11
; movq 0x30(%rbp), %rdi
; movq 0x38(%rbp), %rcx
; movq 0x40(%rbp), %r8
; movq 0x48(%rbp), %r10
; movq 0x50(%rbp), %rsi
; movq 0x58(%rbp), %rax
; movq 0x60(%rbp), %rdx
; movq 0x68(%rbp), %r9
; movq 0x70(%rbp), %r11
; movq 0x78(%rbp), %rdi
; movq 0x80(%rbp), %rcx
; movq 0x88(%rbp), %rax
; movq 0x10(%rbp), %r10
; movq 0x18(%rbp), %rsi
; movq 0x20(%rbp), %rax
; movq 0x28(%rbp), %rdx
; movq 0x30(%rbp), %r9
; movq 0x38(%rbp), %r11
; movq 0x40(%rbp), %rdi
; movq 0x48(%rbp), %rcx
; movq 0x50(%rbp), %r8
; movq 0x58(%rbp), %r10
; movq 0x60(%rbp), %rsi
; movq 0x68(%rbp), %rax
; movq 0x70(%rbp), %rdx
; movq 0x78(%rbp), %r9
; movq 0x80(%rbp), %r11
; movq 0x88(%rbp), %rdi
; movq 0x90(%rbp), %rcx
; movq 0x98(%rbp), %r8
; movq 0xa0(%rbp), %r10
; movq 0xa8(%rbp), %rax
; movq %rbp, %rsp
; popq %rbp
; retq $0x80
; retq $0xa0
function %tail_caller_stack_args() -> i64 tail {
fn0 = %tail_callee_stack_args(i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64) -> i64 tail
@ -298,195 +457,226 @@ block0:
; VCode:
; pushq %rbp
; movq %rsp, %rbp
; subq %rsp, $112, %rsp
; subq %rsp, $160, %rsp
; movq %rbx, 112(%rsp)
; movq %r12, 120(%rsp)
; movq %r13, 128(%rsp)
; movq %r14, 136(%rsp)
; movq %r15, 144(%rsp)
; block0:
; movl $10, %eax
; movq %rax, rsp(96 + virtual offset)
; movl $15, %ecx
; movq %rcx, rsp(88 + virtual offset)
; movl $10, %edi
; movq %rdi, rsp(96 + virtual offset)
; movl $15, %esi
; movq %rsi, rsp(88 + virtual offset)
; movl $20, %edx
; movq %rdx, rsp(80 + virtual offset)
; movl $25, %ebx
; movq %rbx, rsp(72 + virtual offset)
; movl $30, %esi
; movq %rsi, rsp(64 + virtual offset)
; movl $35, %edi
; movq %rdi, rsp(56 + virtual offset)
; movl $40, %r8d
; movq %r8, rsp(48 + virtual offset)
; movl $45, %r9d
; movq %r9, rsp(40 + virtual offset)
; movl $50, %r10d
; movq %r10, rsp(32 + virtual offset)
; movl $55, %r11d
; movq %r11, rsp(24 + virtual offset)
; movl $60, %r15d
; movl $65, %r12d
; movl $70, %r13d
; movl $75, %r14d
; movl $80, %ecx
; movq %rcx, rsp(16 + virtual offset)
; movl $85, %ecx
; movl $25, %ecx
; movq %rcx, rsp(72 + virtual offset)
; movl $30, %r8d
; movq %r8, rsp(64 + virtual offset)
; movl $35, %r9d
; movq %r9, rsp(56 + virtual offset)
; movl $40, %eax
; movl $45, %r10d
; movl $50, %r11d
; movl $55, %r13d
; movl $60, %r14d
; movl $65, %r15d
; movl $70, %ebx
; movl $75, %r12d
; movl $80, %edi
; movl $85, %esi
; movq %rsi, rsp(48 + virtual offset)
; movl $90, %edx
; movl $95, %ebx
; movl $100, %esi
; movl $105, %edi
; movl $110, %r8d
; movl $115, %r9d
; movl $120, %r10d
; movl $125, %r11d
; movl $130, %eax
; movq %rax, rsp(8 + virtual offset)
; movl $135, %eax
; movq %rax, rsp(0 + virtual offset)
; subq %rsp, $128, %rsp
; virtual_sp_offset_adjust 128
; movq %r15, 0(%rsp)
; movq %r12, 8(%rsp)
; movq %r13, 16(%rsp)
; movq %r14, 24(%rsp)
; movq rsp(16 + virtual offset), %rax
; movq %rax, 32(%rsp)
; movq %rcx, 40(%rsp)
; movq %rdx, 48(%rsp)
; movq %rbx, 56(%rsp)
; movq %rsi, 64(%rsp)
; movq %rdi, 72(%rsp)
; movq %r8, 80(%rsp)
; movq %r9, 88(%rsp)
; movq %r10, 96(%rsp)
; movq %r11, 104(%rsp)
; movq rsp(8 + virtual offset), %rax
; movq %rax, 112(%rsp)
; movq rsp(0 + virtual offset), %rax
; movq %rax, 120(%rsp)
; movq %rbp, %r15
; movq 8(%r15), %r13
; load_ext_name %tail_callee_stack_args+0, %r12
; movq rsp(96 + virtual offset), %rax
; movq rsp(88 + virtual offset), %rcx
; movl $95, %ecx
; movl $100, %r8d
; movl $105, %r9d
; movl $110, %esi
; movq %rsi, rsp(40 + virtual offset)
; movl $115, %esi
; movq %rsi, rsp(32 + virtual offset)
; movl $120, %esi
; movq %rsi, rsp(24 + virtual offset)
; movl $125, %esi
; movq %rsi, rsp(16 + virtual offset)
; movl $130, %esi
; movq %rsi, rsp(8 + virtual offset)
; movl $135, %esi
; movq %rsi, rsp(0 + virtual offset)
; grow_argument_area 160 %rsi
; movq %rax, 16(%rbp)
; movq %r10, 24(%rbp)
; movq %r11, 32(%rbp)
; movq %r13, 40(%rbp)
; movq %r14, 48(%rbp)
; movq %r15, 56(%rbp)
; movq %rbx, 64(%rbp)
; movq %r12, 72(%rbp)
; movq %rdi, 80(%rbp)
; movq rsp(48 + virtual offset), %rdi
; movq %rdi, 88(%rbp)
; movq %rdx, 96(%rbp)
; movq %rcx, 104(%rbp)
; movq %r8, 112(%rbp)
; movq %r9, 120(%rbp)
; movq rsp(40 + virtual offset), %rsi
; movq %rsi, 128(%rbp)
; movq rsp(32 + virtual offset), %rsi
; movq %rsi, 136(%rbp)
; movq rsp(24 + virtual offset), %rsi
; movq %rsi, 144(%rbp)
; movq rsp(16 + virtual offset), %rsi
; movq %rsi, 152(%rbp)
; movq rsp(8 + virtual offset), %rsi
; movq %rsi, 160(%rbp)
; movq rsp(0 + virtual offset), %rsi
; movq %rsi, 168(%rbp)
; load_ext_name %tail_callee_stack_args+0, %r10
; movq rsp(72 + virtual offset), %rcx
; movq rsp(80 + virtual offset), %rdx
; movq rsp(72 + virtual offset), %rbx
; movq rsp(64 + virtual offset), %rsi
; movq rsp(56 + virtual offset), %rdi
; movq rsp(48 + virtual offset), %r8
; movq rsp(40 + virtual offset), %r9
; movq rsp(32 + virtual offset), %r10
; movq rsp(24 + virtual offset), %r11
; return_call_unknown %r12 new_stack_arg_size:128 old_stack_arg_size:0 ret_addr:Some("%v219") fp:%v218 tmp:%v220 %rax=%rax %rcx=%rcx %rdx=%rdx %rbx=%rbx %rsi=%rsi %rdi=%rdi %r8=%r8 %r9=%r9 %r10=%r10 %r11=%r11
; movq rsp(88 + virtual offset), %rsi
; movq rsp(96 + virtual offset), %rdi
; movq rsp(64 + virtual offset), %r8
; movq rsp(56 + virtual offset), %r9
; return_call_unknown %r10 %rdi=%rdi %rsi=%rsi %rdx=%rdx %rcx=%rcx %r8=%r8 %r9=%r9
;
; Disassembled:
; block0: ; offset 0x0
; pushq %rbp
; movq %rsp, %rbp
; subq $0x70, %rsp
; block1: ; offset 0x8
; movl $0xa, %eax
; movq %rax, 0x60(%rsp)
; movl $0xf, %ecx
; movq %rcx, 0x58(%rsp)
; subq $0xa0, %rsp
; movq %rbx, 0x70(%rsp)
; movq %r12, 0x78(%rsp)
; movq %r13, 0x80(%rsp)
; movq %r14, 0x88(%rsp)
; movq %r15, 0x90(%rsp)
; block1: ; offset 0x2d
; movl $0xa, %edi
; movq %rdi, 0x60(%rsp)
; movl $0xf, %esi
; movq %rsi, 0x58(%rsp)
; movl $0x14, %edx
; movq %rdx, 0x50(%rsp)
; movl $0x19, %ebx
; movq %rbx, 0x48(%rsp)
; movl $0x1e, %esi
; movq %rsi, 0x40(%rsp)
; movl $0x23, %edi
; movq %rdi, 0x38(%rsp)
; movl $0x28, %r8d
; movq %r8, 0x30(%rsp)
; movl $0x2d, %r9d
; movq %r9, 0x28(%rsp)
; movl $0x32, %r10d
; movq %r10, 0x20(%rsp)
; movl $0x37, %r11d
; movq %r11, 0x18(%rsp)
; movl $0x3c, %r15d
; movl $0x41, %r12d
; movl $0x46, %r13d
; movl $0x4b, %r14d
; movl $0x50, %ecx
; movq %rcx, 0x10(%rsp)
; movl $0x55, %ecx
; movl $0x19, %ecx
; movq %rcx, 0x48(%rsp)
; movl $0x1e, %r8d
; movq %r8, 0x40(%rsp)
; movl $0x23, %r9d
; movq %r9, 0x38(%rsp)
; movl $0x28, %eax
; movl $0x2d, %r10d
; movl $0x32, %r11d
; movl $0x37, %r13d
; movl $0x3c, %r14d
; movl $0x41, %r15d
; movl $0x46, %ebx
; movl $0x4b, %r12d
; movl $0x50, %edi
; movl $0x55, %esi
; movq %rsi, 0x30(%rsp)
; movl $0x5a, %edx
; movl $0x5f, %ebx
; movl $0x64, %esi
; movl $0x69, %edi
; movl $0x6e, %r8d
; movl $0x73, %r9d
; movl $0x78, %r10d
; movl $0x7d, %r11d
; movl $0x82, %eax
; movq %rax, 8(%rsp)
; movl $0x87, %eax
; movq %rax, (%rsp)
; subq $0x80, %rsp
; movq %r15, (%rsp)
; movq %r12, 8(%rsp)
; movq %r13, 0x10(%rsp)
; movq %r14, 0x18(%rsp)
; movq 0x90(%rsp), %rax
; movq %rax, 0x20(%rsp)
; movq %rcx, 0x28(%rsp)
; movq %rdx, 0x30(%rsp)
; movq %rbx, 0x38(%rsp)
; movq %rsi, 0x40(%rsp)
; movq %rdi, 0x48(%rsp)
; movq %r8, 0x50(%rsp)
; movq %r9, 0x58(%rsp)
; movq %r10, 0x60(%rsp)
; movq %r11, 0x68(%rsp)
; movq 0x88(%rsp), %rax
; movq %rax, 0x70(%rsp)
; movq 0x80(%rsp), %rax
; movq %rax, 0x78(%rsp)
; movq %rbp, %r15
; movq 8(%r15), %r13
; movabsq $0, %r12 ; reloc_external Abs8 %tail_callee_stack_args 0
; movq 0xe0(%rsp), %rax
; movq 0xd8(%rsp), %rcx
; movq 0xd0(%rsp), %rdx
; movq 0xc8(%rsp), %rbx
; movl $0x5f, %ecx
; movl $0x64, %r8d
; movl $0x69, %r9d
; movl $0x6e, %esi
; movq %rsi, 0x28(%rsp)
; movl $0x73, %esi
; movq %rsi, 0x20(%rsp)
; movl $0x78, %esi
; movq %rsi, 0x18(%rsp)
; movl $0x7d, %esi
; movq %rsi, 0x10(%rsp)
; movl $0x82, %esi
; movq %rsi, 8(%rsp)
; movl $0x87, %esi
; movq %rsi, (%rsp)
; subq $0xa0, %rsp
; subq $0xa0, %rbp
; movq 0xa0(%rsp), %rsi
; movq %rsi, (%rsp)
; movq 0xa8(%rsp), %rsi
; movq %rsi, 8(%rsp)
; movq 0xb0(%rsp), %rsi
; movq %rsi, 0x10(%rsp)
; movq 0xb8(%rsp), %rsi
; movq %rsi, 0x18(%rsp)
; movq 0xc0(%rsp), %rsi
; movq 0xb8(%rsp), %rdi
; movq 0xb0(%rsp), %r8
; movq 0xa8(%rsp), %r9
; movq 0xa0(%rsp), %r10
; movq 0x98(%rsp), %r11
; movq (%r15), %rbp
; movq 0x78(%rsp), %r14
; movq %r14, 8(%r15)
; movq 0x70(%rsp), %r14
; movq %r14, (%r15)
; movq 0x68(%rsp), %r14
; movq %r14, -8(%r15)
; movq 0x60(%rsp), %r14
; movq %r14, -0x10(%r15)
; movq 0x58(%rsp), %r14
; movq %r14, -0x18(%r15)
; movq 0x50(%rsp), %r14
; movq %r14, -0x20(%r15)
; movq 0x48(%rsp), %r14
; movq %r14, -0x28(%r15)
; movq 0x40(%rsp), %r14
; movq %r14, -0x30(%r15)
; movq 0x38(%rsp), %r14
; movq %r14, -0x38(%r15)
; movq 0x30(%rsp), %r14
; movq %r14, -0x40(%r15)
; movq 0x28(%rsp), %r14
; movq %r14, -0x48(%r15)
; movq 0x20(%rsp), %r14
; movq %r14, -0x50(%r15)
; movq 0x18(%rsp), %r14
; movq %r14, -0x58(%r15)
; movq 0x10(%rsp), %r14
; movq %r14, -0x60(%r15)
; movq 8(%rsp), %r14
; movq %r14, -0x68(%r15)
; movq (%rsp), %r14
; movq %r14, -0x70(%r15)
; leaq -0x78(%r15), %rsp
; movq %r13, (%rsp)
; jmpq *%r12
; movq %rsi, 0x20(%rsp)
; movq 0xc8(%rsp), %rsi
; movq %rsi, 0x28(%rsp)
; movq 0xd0(%rsp), %rsi
; movq %rsi, 0x30(%rsp)
; movq 0xd8(%rsp), %rsi
; movq %rsi, 0x38(%rsp)
; movq 0xe0(%rsp), %rsi
; movq %rsi, 0x40(%rsp)
; movq 0xe8(%rsp), %rsi
; movq %rsi, 0x48(%rsp)
; movq 0xf0(%rsp), %rsi
; movq %rsi, 0x50(%rsp)
; movq 0xf8(%rsp), %rsi
; movq %rsi, 0x58(%rsp)
; movq 0x100(%rsp), %rsi
; movq %rsi, 0x60(%rsp)
; movq 0x108(%rsp), %rsi
; movq %rsi, 0x68(%rsp)
; movq 0x110(%rsp), %rsi
; movq %rsi, 0x70(%rsp)
; movq 0x118(%rsp), %rsi
; movq %rsi, 0x78(%rsp)
; movq 0x120(%rsp), %rsi
; movq %rsi, 0x80(%rsp)
; movq 0x128(%rsp), %rsi
; movq %rsi, 0x88(%rsp)
; movq 0x130(%rsp), %rsi
; movq %rsi, 0x90(%rsp)
; movq 0x138(%rsp), %rsi
; movq %rsi, 0x98(%rsp)
; movq 0x140(%rsp), %rsi
; movq %rsi, 0xa0(%rsp)
; movq 0x148(%rsp), %rsi
; movq %rsi, 0xa8(%rsp)
; movq %rax, 0x10(%rbp)
; movq %r10, 0x18(%rbp)
; movq %r11, 0x20(%rbp)
; movq %r13, 0x28(%rbp)
; movq %r14, 0x30(%rbp)
; movq %r15, 0x38(%rbp)
; movq %rbx, 0x40(%rbp)
; movq %r12, 0x48(%rbp)
; movq %rdi, 0x50(%rbp)
; movq 0x30(%rsp), %rdi
; movq %rdi, 0x58(%rbp)
; movq %rdx, 0x60(%rbp)
; movq %rcx, 0x68(%rbp)
; movq %r8, 0x70(%rbp)
; movq %r9, 0x78(%rbp)
; movq 0x28(%rsp), %rsi
; movq %rsi, 0x80(%rbp)
; movq 0x20(%rsp), %rsi
; movq %rsi, 0x88(%rbp)
; movq 0x18(%rsp), %rsi
; movq %rsi, 0x90(%rbp)
; movq 0x10(%rsp), %rsi
; movq %rsi, 0x98(%rbp)
; movq 8(%rsp), %rsi
; movq %rsi, 0xa0(%rbp)
; movq (%rsp), %rsi
; movq %rsi, 0xa8(%rbp)
; movabsq $0, %r10 ; reloc_external Abs8 %tail_callee_stack_args 0
; movq 0x48(%rsp), %rcx
; movq 0x50(%rsp), %rdx
; movq 0x58(%rsp), %rsi
; movq 0x60(%rsp), %rdi
; movq 0x40(%rsp), %r8
; movq 0x38(%rsp), %r9
; movq 0x70(%rsp), %rbx
; movq 0x78(%rsp), %r12
; movq 0x80(%rsp), %r13
; movq 0x88(%rsp), %r14
; movq 0x90(%rsp), %r15
; addq $0xa0, %rsp
; movq %rbp, %rsp
; popq %rbp
; jmpq *%r10

1048
cranelift/filetests/filetests/isa/x64/tail-call-conv.clif

File diff suppressed because it is too large

31
cranelift/filetests/filetests/isa/x64/tail-stack-limit.clif

@ -19,33 +19,38 @@ block0(v0: i64, v1: i8, v2: i8, v3: i8, v4: i8, v5: i8, v6: i8, v7: i8, v8: i128
; VCode:
; pushq %rbp
; movq %rsp, %rbp
; movq %rax, %r14
; addq %r14, $16, %r14
; cmpq %rsp, %r14
; movq %rdi, %r10
; addq %r10, $16, %r10
; cmpq %rsp, %r10
; jnbe #trap=stk_ovf
; subq %rsp, $16, %rsp
; block0:
; movq %r10, %rax
; movq %r11, %rcx
; movq 16(%rbp), %r10
; movq 24(%rbp), %rsi
; movq 32(%rbp), %rax
; movq 40(%rbp), %rcx
; addq %rsp, $16, %rsp
; movq %rbp, %rsp
; popq %rbp
; ret
; ret 32
;
; Disassembled:
; block0: ; offset 0x0
; pushq %rbp
; movq %rsp, %rbp
; movq %rax, %r14
; addq $0x10, %r14
; cmpq %rsp, %r14
; ja 0x27
; movq %rdi, %r10
; addq $0x10, %r10
; cmpq %rsp, %r10
; ja 0x33
; subq $0x10, %rsp
; block1: ; offset 0x18
; movq %r10, %rax
; movq %r11, %rcx
; movq 0x10(%rbp), %r10
; movq 0x18(%rbp), %rsi
; movq 0x20(%rbp), %rax
; movq 0x28(%rbp), %rcx
; addq $0x10, %rsp
; movq %rbp, %rsp
; popq %rbp
; retq
; retq $0x20
; ud2 ; trap: stk_ovf

Loading…
Cancel
Save