Browse Source

Cranelift: Take user stack maps through lowering and emission (#8876)

* Cranelift: Take user stack maps through lowering and emission

Previously, user stack maps were inserted by the frontend and preserved in the
mid-end. This commit takes them from the mid-end CLIF into the backend vcode,
and then from that vcode into the finalized mach buffer during emission.

During lowering, we compile the `UserStackMapEntry`s into packed
`UserStackMap`s. This is the appropriate moment in time to do that coalescing,
packing, and compiling because the stack map entries are immutable from this
point on.

Additionally, we include user stack maps in the `Debug` and disassembly
implementations for vcode, just after their associated safepoint
instructions. This allows us to see the stack maps we are generating when
debugging, as well as write filetests that check we are generating the expected
stack maps for the correct instructions.

Co-Authored-By: Trevor Elliott <telliott@fastly.com>

* uncomment debug assert that was commented out for debugging

* Address review feedback

* remove new method that was actually never needed

---------

Co-authored-by: Trevor Elliott <telliott@fastly.com>
pull/8886/head
Nick Fitzgerald 4 months ago
committed by GitHub
parent
commit
e20b4244b9
No known key found for this signature in database GPG Key ID: B5690EEEBB952194
  1. 2
      cranelift/codegen/src/ir/mod.rs
  2. 55
      cranelift/codegen/src/ir/user_stack_maps.rs
  3. 37
      cranelift/codegen/src/isa/aarch64/inst/emit.rs
  4. 41
      cranelift/codegen/src/isa/riscv64/inst/emit.rs
  5. 44
      cranelift/codegen/src/isa/s390x/inst/emit.rs
  6. 17
      cranelift/codegen/src/isa/x64/inst/emit.rs
  7. 23
      cranelift/codegen/src/isa/x64/inst/emit_state.rs
  8. 48
      cranelift/codegen/src/machinst/abi.rs
  9. 46
      cranelift/codegen/src/machinst/buffer.rs
  10. 40
      cranelift/codegen/src/machinst/lower.rs
  11. 19
      cranelift/codegen/src/machinst/mod.rs
  12. 109
      cranelift/codegen/src/machinst/vcode.rs
  13. 221
      cranelift/filetests/filetests/isa/aarch64/user_stack_maps.clif
  14. 250
      cranelift/filetests/filetests/isa/riscv64/user_stack_maps.clif
  15. 221
      cranelift/filetests/filetests/isa/s390x/user_stack_maps.clif
  16. 241
      cranelift/filetests/filetests/isa/x64/user_stack_maps.clif

2
cranelift/codegen/src/ir/mod.rs

@ -65,7 +65,7 @@ pub use crate::ir::stackslot::{
};
pub use crate::ir::trapcode::TrapCode;
pub use crate::ir::types::Type;
pub use crate::ir::user_stack_maps::UserStackMapEntry;
pub use crate::ir::user_stack_maps::{UserStackMap, UserStackMapEntry};
use crate::entity::{entity_impl, PrimaryMap, SecondaryMap};

55
cranelift/codegen/src/ir/user_stack_maps.rs

@ -29,13 +29,19 @@
//! contrast to the old system and its `r64` values).
use crate::ir;
use cranelift_bitset::CompoundBitSet;
use cranelift_entity::PrimaryMap;
use smallvec::SmallVec;
pub(crate) type UserStackMapEntryVec = SmallVec<[UserStackMapEntry; 4]>;
/// A stack map entry describes a GC-managed value and its location at a
/// particular instruction.
#[derive(Clone, PartialEq, Hash)]
/// A stack map entry describes a single GC-managed value and its location on
/// the stack.
///
/// A stack map entry is associated with a particular instruction, and that
/// instruction must be a safepoint. The GC-managed value must be stored in the
/// described location across this entry's instruction.
#[derive(Clone, Debug, PartialEq, Hash)]
#[cfg_attr(
feature = "enable-serde",
derive(serde_derive::Serialize, serde_derive::Deserialize)
@ -50,3 +56,46 @@ pub struct UserStackMapEntry {
/// The offset within the stack slot where this entry's value can be found.
pub offset: u32,
}
/// A compiled stack map, describing the location of many GC-managed values.
///
/// A stack map is associated with a particular instruction, and that
/// instruction is a safepoint.
#[derive(Clone, Debug, PartialEq)]
#[cfg_attr(
feature = "enable-serde",
derive(serde_derive::Deserialize, serde_derive::Serialize)
)]
pub struct UserStackMap {
by_type: SmallVec<[(ir::Type, CompoundBitSet); 1]>,
}
impl UserStackMap {
/// Coalesce the given entries into a new `UserStackMap`.
pub fn new(
entries: &[UserStackMapEntry],
stack_slot_offsets: &PrimaryMap<ir::StackSlot, u32>,
) -> Self {
let mut by_type = SmallVec::<[(ir::Type, CompoundBitSet); 1]>::default();
for entry in entries {
let offset = stack_slot_offsets[entry.slot] + entry.offset;
let offset = usize::try_from(offset).unwrap();
// Don't bother trying to avoid an `O(n)` search here: `n` is
// basically always one in practice; even if it isn't, there aren't
// that many different CLIF types.
let index = by_type
.iter()
.position(|(ty, _)| *ty == entry.ty)
.unwrap_or_else(|| {
by_type.push((entry.ty, CompoundBitSet::with_capacity(offset + 1)));
by_type.len() - 1
});
by_type[index].1.insert(offset);
}
UserStackMap { by_type }
}
}

37
cranelift/codegen/src/isa/aarch64/inst/emit.rs

@ -651,11 +651,18 @@ fn enc_asimd_mod_imm(rd: Writable<Reg>, q_op: u32, cmode: u32, imm: u8) -> u32 {
/// State carried between emissions of a sequence of instructions.
#[derive(Default, Clone, Debug)]
pub struct EmitState {
/// Safepoint stack map for upcoming instruction, as provided to `pre_safepoint()`.
/// Safepoint stack map for upcoming instruction, as provided to
/// `pre_safepoint()`.
stack_map: Option<StackMap>,
/// The user stack map for the upcoming instruction, as provided to
/// `pre_safepoint()`.
user_stack_map: Option<ir::UserStackMap>,
/// Only used during fuzz-testing. Otherwise, it is a zero-sized struct and
/// optimized away at compiletime. See [cranelift_control].
ctrl_plane: ControlPlane,
frame_layout: FrameLayout,
}
@ -663,13 +670,19 @@ impl MachInstEmitState<Inst> for EmitState {
fn new(abi: &Callee<AArch64MachineDeps>, ctrl_plane: ControlPlane) -> Self {
EmitState {
stack_map: None,
user_stack_map: None,
ctrl_plane,
frame_layout: abi.frame_layout().clone(),
}
}
fn pre_safepoint(&mut self, stack_map: StackMap) {
self.stack_map = Some(stack_map);
fn pre_safepoint(
&mut self,
stack_map: Option<StackMap>,
user_stack_map: Option<ir::UserStackMap>,
) {
self.stack_map = stack_map;
self.user_stack_map = user_stack_map;
}
fn ctrl_plane_mut(&mut self) -> &mut ControlPlane {
@ -686,8 +699,8 @@ impl MachInstEmitState<Inst> for EmitState {
}
impl EmitState {
fn take_stack_map(&mut self) -> Option<StackMap> {
self.stack_map.take()
fn take_stack_map(&mut self) -> (Option<StackMap>, Option<ir::UserStackMap>) {
(self.stack_map.take(), self.user_stack_map.take())
}
fn clear_post_insn(&mut self) {
@ -2921,11 +2934,16 @@ impl MachInstEmit for Inst {
}
}
&Inst::Call { ref info } => {
if let Some(s) = state.take_stack_map() {
let (stack_map, user_stack_map) = state.take_stack_map();
if let Some(s) = stack_map {
sink.add_stack_map(StackMapExtent::UpcomingBytes(4), s);
}
sink.add_reloc(Reloc::Arm64Call, &info.dest, 0);
sink.put4(enc_jump26(0b100101, 0));
if let Some(s) = user_stack_map {
let offset = sink.cur_offset();
sink.push_user_stack_map(state, offset, s);
}
if info.opcode.is_call() {
sink.add_call_site(info.opcode);
}
@ -2939,11 +2957,16 @@ impl MachInstEmit for Inst {
}
}
&Inst::CallInd { ref info } => {
if let Some(s) = state.take_stack_map() {
let (stack_map, user_stack_map) = state.take_stack_map();
if let Some(s) = stack_map {
sink.add_stack_map(StackMapExtent::UpcomingBytes(4), s);
}
let rn = info.rn;
sink.put4(0b1101011_0001_11111_000000_00000_00000 | (machreg_to_gpr(rn) << 5));
if let Some(s) = user_stack_map {
let offset = sink.cur_offset();
sink.push_user_stack_map(state, offset, s);
}
if info.opcode.is_call() {
sink.add_call_site(info.opcode);
}

41
cranelift/codegen/src/isa/riscv64/inst/emit.rs

@ -46,20 +46,28 @@ pub enum EmitVState {
/// State carried between emissions of a sequence of instructions.
#[derive(Default, Clone, Debug)]
pub struct EmitState {
/// Safepoint stack map for upcoming instruction, as provided to `pre_safepoint()`.
/// Safepoint stack map for upcoming instruction, as provided to
/// `pre_safepoint()`.
stack_map: Option<StackMap>,
/// The user stack map for the upcoming instruction, as provided to
/// `pre_safepoint()`.
user_stack_map: Option<ir::UserStackMap>,
/// Only used during fuzz-testing. Otherwise, it is a zero-sized struct and
/// optimized away at compiletime. See [cranelift_control].
ctrl_plane: ControlPlane,
/// Vector State
/// Controls the current state of the vector unit at the emission point.
vstate: EmitVState,
frame_layout: FrameLayout,
}
impl EmitState {
fn take_stack_map(&mut self) -> Option<StackMap> {
self.stack_map.take()
fn take_stack_map(&mut self) -> (Option<StackMap>, Option<ir::UserStackMap>) {
(self.stack_map.take(), self.user_stack_map.take())
}
}
@ -70,14 +78,20 @@ impl MachInstEmitState<Inst> for EmitState {
) -> Self {
EmitState {
stack_map: None,
user_stack_map: None,
ctrl_plane,
vstate: EmitVState::Unknown,
frame_layout: abi.frame_layout().clone(),
}
}
fn pre_safepoint(&mut self, stack_map: StackMap) {
self.stack_map = Some(stack_map);
fn pre_safepoint(
&mut self,
stack_map: Option<StackMap>,
user_stack_map: Option<ir::UserStackMap>,
) {
self.stack_map = stack_map;
self.user_stack_map = user_stack_map;
}
fn ctrl_plane_mut(&mut self) -> &mut ControlPlane {
@ -1134,13 +1148,21 @@ impl Inst {
sink.add_call_site(info.opcode);
}
sink.add_reloc(Reloc::RiscvCallPlt, &info.dest, 0);
if let Some(s) = state.take_stack_map() {
let (stack_map, user_stack_map) = state.take_stack_map();
if let Some(s) = stack_map {
sink.add_stack_map(StackMapExtent::UpcomingBytes(8), s);
}
Inst::construct_auipc_and_jalr(Some(writable_link_reg()), writable_link_reg(), 0)
.into_iter()
.for_each(|i| i.emit_uncompressed(sink, emit_info, state, start_off));
if let Some(s) = user_stack_map {
let offset = sink.cur_offset();
sink.push_user_stack_map(state, offset, s);
}
let callee_pop_size = i32::try_from(info.callee_pop_size).unwrap();
if callee_pop_size > 0 {
for inst in Riscv64MachineDeps::gen_sp_reg_adjust(-callee_pop_size) {
@ -1158,9 +1180,14 @@ impl Inst {
}
.emit(sink, emit_info, state);
if let Some(s) = state.take_stack_map() {
let (stack_map, user_stack_map) = state.take_stack_map();
if let Some(s) = stack_map {
sink.add_stack_map(StackMapExtent::StartedAtOffset(start_offset), s);
}
if let Some(s) = user_stack_map {
let offset = sink.cur_offset();
sink.push_user_stack_map(state, offset, s);
}
if info.opcode.is_call() {
sink.add_call_site(info.opcode);

44
cranelift/codegen/src/isa/s390x/inst/emit.rs

@ -1,7 +1,7 @@
//! S390x ISA: binary code emission.
use crate::binemit::StackMap;
use crate::ir::{MemFlags, TrapCode};
use crate::ir::{self, MemFlags, TrapCode};
use crate::isa::s390x::inst::*;
use crate::isa::s390x::settings as s390x_settings;
use cranelift_control::ControlPlane;
@ -1306,11 +1306,19 @@ fn put_with_trap(sink: &mut MachBuffer<Inst>, enc: &[u8], trap_code: TrapCode) {
#[derive(Default, Clone, Debug)]
pub struct EmitState {
pub(crate) initial_sp_offset: i64,
/// Safepoint stack map for upcoming instruction, as provided to `pre_safepoint()`.
/// Safepoint stack map for upcoming instruction, as provided to
/// `pre_safepoint()`.
stack_map: Option<StackMap>,
/// The user stack map for the upcoming instruction, as provided to
/// `pre_safepoint()`.
user_stack_map: Option<ir::UserStackMap>,
/// Only used during fuzz-testing. Otherwise, it is a zero-sized struct and
/// optimized away at compiletime. See [cranelift_control].
ctrl_plane: ControlPlane,
frame_layout: FrameLayout,
}
@ -1319,13 +1327,19 @@ impl MachInstEmitState<Inst> for EmitState {
EmitState {
initial_sp_offset: abi.frame_size() as i64,
stack_map: None,
user_stack_map: None,
ctrl_plane,
frame_layout: abi.frame_layout().clone(),
}
}
fn pre_safepoint(&mut self, stack_map: StackMap) {
self.stack_map = Some(stack_map);
fn pre_safepoint(
&mut self,
stack_map: Option<StackMap>,
user_stack_map: Option<ir::UserStackMap>,
) {
self.stack_map = stack_map;
self.user_stack_map = user_stack_map;
}
fn ctrl_plane_mut(&mut self) -> &mut ControlPlane {
@ -1342,8 +1356,8 @@ impl MachInstEmitState<Inst> for EmitState {
}
impl EmitState {
fn take_stack_map(&mut self) -> Option<StackMap> {
self.stack_map.take()
fn take_stack_map(&mut self) -> (Option<StackMap>, Option<ir::UserStackMap>) {
(self.stack_map.take(), self.user_stack_map.take())
}
fn clear_post_insn(&mut self) {
@ -3243,9 +3257,15 @@ impl Inst {
_ => unreachable!(),
}
if let Some(s) = state.take_stack_map() {
let (stack_map, user_stack_map) = state.take_stack_map();
if let Some(s) = stack_map {
sink.add_stack_map(StackMapExtent::UpcomingBytes(6), s);
}
if let Some(s) = user_stack_map {
let offset = sink.cur_offset() + 6;
sink.push_user_stack_map(state, offset, s);
}
put(sink, &enc_ril_b(opcode, link.to_reg(), 0));
if info.opcode.is_call() {
sink.add_call_site(info.opcode);
@ -3255,10 +3275,16 @@ impl Inst {
debug_assert_eq!(link.to_reg(), gpr(14));
let rn = info.rn;
let opcode = 0x0d; // BASR
if let Some(s) = state.take_stack_map() {
let (stack_map, user_stack_map) = state.take_stack_map();
if let Some(s) = stack_map {
sink.add_stack_map(StackMapExtent::UpcomingBytes(2), s);
}
if let Some(s) = user_stack_map {
let offset = sink.cur_offset() + 2;
sink.push_user_stack_map(state, offset, s);
}
let opcode = 0x0d; // BASR
put(sink, &enc_rr(opcode, link.to_reg(), rn));
if info.opcode.is_call() {
sink.add_call_site(info.opcode);

17
cranelift/codegen/src/isa/x64/inst/emit.rs

@ -1599,9 +1599,15 @@ pub(crate) fn emit(
opcode,
info: call_info,
} => {
if let Some(s) = state.take_stack_map() {
let (stack_map, user_stack_map) = state.take_stack_map();
if let Some(s) = stack_map {
sink.add_stack_map(StackMapExtent::UpcomingBytes(5), s);
}
if let Some(s) = user_stack_map {
let offset = sink.cur_offset() + 5;
sink.push_user_stack_map(state, offset, s);
}
sink.put1(0xE8);
// The addend adjusts for the difference between the end of the instruction and the
// beginning of the immediate field.
@ -1696,9 +1702,16 @@ pub(crate) fn emit(
);
}
}
if let Some(s) = state.take_stack_map() {
let (stack_map, user_stack_map) = state.take_stack_map();
if let Some(s) = stack_map {
sink.add_stack_map(StackMapExtent::StartedAtOffset(start_offset), s);
}
if let Some(s) = user_stack_map {
let offset = sink.cur_offset();
sink.push_user_stack_map(state, offset, s);
}
if opcode.is_call() {
sink.add_call_site(*opcode);
}

23
cranelift/codegen/src/isa/x64/inst/emit_state.rs

@ -1,11 +1,18 @@
use super::*;
use crate::ir;
use cranelift_control::ControlPlane;
/// State carried between emissions of a sequence of instructions.
#[derive(Default, Clone, Debug)]
pub struct EmitState {
/// Safepoint stack map for upcoming instruction, as provided to `pre_safepoint()`.
/// Safepoint stack map for upcoming instruction, as provided to
/// `pre_safepoint()`.
stack_map: Option<StackMap>,
/// The user stack map for the upcoming instruction, as provided to
/// `pre_safepoint()`.
user_stack_map: Option<ir::UserStackMap>,
/// Only used during fuzz-testing. Otherwise, it is a zero-sized struct and
/// optimized away at compiletime. See [cranelift_control].
ctrl_plane: ControlPlane,
@ -19,13 +26,19 @@ impl MachInstEmitState<Inst> for EmitState {
fn new(abi: &Callee<X64ABIMachineSpec>, ctrl_plane: ControlPlane) -> Self {
EmitState {
stack_map: None,
user_stack_map: None,
ctrl_plane,
frame_layout: abi.frame_layout().clone(),
}
}
fn pre_safepoint(&mut self, stack_map: StackMap) {
self.stack_map = Some(stack_map);
fn pre_safepoint(
&mut self,
stack_map: Option<StackMap>,
user_stack_map: Option<ir::UserStackMap>,
) {
self.stack_map = stack_map;
self.user_stack_map = user_stack_map;
}
fn ctrl_plane_mut(&mut self) -> &mut ControlPlane {
@ -42,8 +55,8 @@ impl MachInstEmitState<Inst> for EmitState {
}
impl EmitState {
pub(crate) fn take_stack_map(&mut self) -> Option<StackMap> {
self.stack_map.take()
pub(crate) fn take_stack_map(&mut self) -> (Option<StackMap>, Option<ir::UserStackMap>) {
(self.stack_map.take(), self.user_stack_map.take())
}
pub(crate) fn clear_post_insn(&mut self) {

48
cranelift/codegen/src/machinst/abi.rs

@ -59,8 +59,10 @@
//!
//! ```plain
//! (high address)
//!
//! +---------------------------+
//! | ... |
//! | caller frames |
//! | ... |
//! +===========================+
//! | ... |
//! | stack args |
//! Canonical Frame Address --> | (accessed via FP) |
@ -68,24 +70,24 @@
//! SP at function entry -----> | return address |
//! +---------------------------+
//! FP after prologue --------> | FP (pushed by prologue) |
//! +---------------------------+
//! | ... |
//! | clobbered callee-saves |
//! unwind-frame base --------> | (pushed by prologue) |
//! +---------------------------+
//! | ... |
//! | spill slots |
//! | (accessed via SP) |
//! | ... |
//! | stack slots |
//! | (accessed via SP) |
//! | (alloc'd by prologue) |
//! +---------------------------+
//! | [alignment as needed] |
//! | ... |
//! | args for largest call |
//! SP -----------------------> | (alloc'd by prologue) |
//! +---------------------------+
//! +---------------------------+ -----
//! | ... | |
//! | clobbered callee-saves | |
//! unwind-frame base --------> | (pushed by prologue) | |
//! +---------------------------+ |
//! | ... | |
//! | spill slots | |
//! | (accessed via SP) | active
//! | ... | size
//! | stack slots | |
//! | (accessed via SP) | |
//! | (alloc'd by prologue) | |
//! +---------------------------+ |
//! | [alignment as needed] | |
//! | ... | |
//! | args for largest call | |
//! SP -----------------------> | (alloc'd by prologue) | |
//! +===========================+ -----
//!
//! (low address)
//! ```
@ -1012,6 +1014,12 @@ impl FrameLayout {
debug_assert!(floats.iter().all(|r| r.to_reg().class() == RegClass::Float));
(ints, floats)
}
/// The size of FP to SP while the frame is active (not during prologue
/// setup or epilogue tear down).
pub fn active_size(&self) -> u32 {
self.outgoing_args_size + self.fixed_frame_storage_size + self.clobber_size
}
}
/// ABI object for a function body.

46
cranelift/codegen/src/machinst/buffer.rs

@ -178,6 +178,7 @@ use crate::machinst::{
BlockIndex, MachInstLabelUse, TextSectionBuilder, VCodeConstant, VCodeConstants, VCodeInst,
};
use crate::trace;
use crate::{ir, MachInstEmitState};
use crate::{timing, VCodeConstantData};
use cranelift_control::ControlPlane;
use cranelift_entity::{entity_impl, PrimaryMap};
@ -250,6 +251,11 @@ pub struct MachBuffer<I: VCodeInst> {
srclocs: SmallVec<[MachSrcLoc<Stencil>; 64]>,
/// Any stack maps referring to this code.
stack_maps: SmallVec<[MachStackMap; 8]>,
/// Any user stack maps for this code.
///
/// Each entry is an `(offset, span, stack_map)` triple. Entries are sorted
/// by code offset, and each stack map covers `span` bytes on the stack.
user_stack_maps: SmallVec<[(CodeOffset, u32, ir::UserStackMap); 8]>,
/// Any unwind info at a given location.
unwind_info: SmallVec<[(CodeOffset, UnwindInst); 8]>,
/// The current source location in progress (after `start_srcloc()` and
@ -329,6 +335,7 @@ impl MachBufferFinalized<Stencil> {
.map(|srcloc| srcloc.apply_base_srcloc(base_srcloc))
.collect(),
stack_maps: self.stack_maps,
user_stack_maps: self.user_stack_maps,
unwind_info: self.unwind_info,
alignment: self.alignment,
}
@ -357,9 +364,14 @@ pub struct MachBufferFinalized<T: CompilePhase> {
pub(crate) srclocs: SmallVec<[T::MachSrcLocType; 64]>,
/// Any stack maps referring to this code.
pub(crate) stack_maps: SmallVec<[MachStackMap; 8]>,
/// Any user stack maps for this code.
///
/// Each entry is an `(offset, span, stack_map)` triple. Entries are sorted
/// by code offset, and each stack map covers `span` bytes on the stack.
pub(crate) user_stack_maps: SmallVec<[(CodeOffset, u32, ir::UserStackMap); 8]>,
/// Any unwind info at a given location.
pub unwind_info: SmallVec<[(CodeOffset, UnwindInst); 8]>,
/// The requireed alignment of this buffer
/// The required alignment of this buffer.
pub alignment: u32,
}
@ -447,6 +459,7 @@ impl<I: VCodeInst> MachBuffer<I> {
call_sites: SmallVec::new(),
srclocs: SmallVec::new(),
stack_maps: SmallVec::new(),
user_stack_maps: SmallVec::new(),
unwind_info: SmallVec::new(),
cur_srcloc: None,
label_offsets: SmallVec::new(),
@ -1532,6 +1545,7 @@ impl<I: VCodeInst> MachBuffer<I> {
call_sites: self.call_sites,
srclocs,
stack_maps: self.stack_maps,
user_stack_maps: self.user_stack_maps,
unwind_info: self.unwind_info,
alignment,
}
@ -1667,6 +1681,36 @@ impl<I: VCodeInst> MachBuffer<I> {
stack_map,
});
}
/// Push a user stack map onto this buffer.
///
/// The stack map is associated with the given `return_addr` code
/// offset. This must be the PC for the instruction just *after* this stack
/// map's associated instruction. For example in the sequence `call $foo;
/// add r8, rax`, the `return_addr` must be the offset of the start of the
/// `add` instruction.
///
/// Stack maps must be pushed in sorted `return_addr` order.
pub fn push_user_stack_map(
&mut self,
emit_state: &I::State,
return_addr: CodeOffset,
stack_map: ir::UserStackMap,
) {
let span = emit_state.frame_layout().active_size();
trace!("Adding user stack map @ {return_addr:#x} spanning {span} bytes: {stack_map:?}");
debug_assert!(
self.user_stack_maps
.last()
.map_or(true, |(prev_addr, _, _)| *prev_addr < return_addr),
"pushed stack maps out of order: {} is not less than {}",
self.user_stack_maps.last().unwrap().0,
return_addr,
);
self.user_stack_maps.push((return_addr, span, stack_map));
}
}
impl<T: CompilePhase> MachBufferFinalized<T> {

40
cranelift/codegen/src/machinst/lower.rs

@ -14,9 +14,9 @@ use crate::ir::{
Value, ValueDef, ValueLabelAssignments, ValueLabelStart,
};
use crate::machinst::{
writable_value_regs, BlockIndex, BlockLoweringOrder, Callee, InsnIndex, LoweredBlock,
MachLabel, Reg, SigSet, VCode, VCodeBuilder, VCodeConstant, VCodeConstantData, VCodeConstants,
VCodeInst, ValueRegs, Writable,
writable_value_regs, BackwardsInsnIndex, BlockIndex, BlockLoweringOrder, Callee, InsnIndex,
LoweredBlock, MachLabel, Reg, SigSet, VCode, VCodeBuilder, VCodeConstant, VCodeConstantData,
VCodeConstants, VCodeInst, ValueRegs, Writable,
};
use crate::settings::Flags;
use crate::{trace, CodegenResult};
@ -485,8 +485,8 @@ impl<'func, I: VCodeInst> Lower<'func, I> {
/// Pre-analysis: compute `value_ir_uses`. See comment on
/// `ValueUseState` for a description of what this analysis
/// computes.
fn compute_use_states<'a>(
f: &'a Function,
fn compute_use_states(
f: &Function,
sret_param: Option<Value>,
) -> SecondaryMap<Value, ValueUseState> {
// We perform the analysis without recursion, so we don't
@ -803,9 +803,39 @@ impl<'func, I: VCodeInst> Lower<'func, I> {
}
}
let start = self.vcode.vcode.num_insts();
let loc = self.srcloc(inst);
self.finish_ir_inst(loc);
// If the instruction had a user stack map, forward it from the CLIF
// to the vcode.
if let Some(entries) = self.f.dfg.user_stack_map_entries(inst) {
let end = self.vcode.vcode.num_insts();
debug_assert!(end > start);
debug_assert_eq!(
(start..end)
.filter(|i| self.vcode.vcode[InsnIndex::new(*i)].is_safepoint())
.count(),
1
);
for i in start..end {
let iix = InsnIndex::new(i);
if self.vcode.vcode[iix].is_safepoint() {
trace!(
"Adding user stack map from clif\n\n\
{inst:?} `{}`\n\n\
to vcode\n\n\
{iix:?} `{}`",
self.f.dfg.display_inst(inst),
&self.vcode.vcode[iix].pretty_print_inst(&mut Default::default()),
);
self.vcode
.add_user_stack_map(BackwardsInsnIndex::new(iix.index()), entries);
break;
}
}
}
// maybe insert random instruction
if ctrl_plane.get_decision() {
if ctrl_plane.get_decision() {

19
cranelift/codegen/src/machinst/mod.rs

@ -45,8 +45,9 @@
//! ```
use crate::binemit::{Addend, CodeInfo, CodeOffset, Reloc, StackMap};
use crate::ir::function::FunctionParameters;
use crate::ir::{DynamicStackSlot, RelSourceLoc, StackSlot, Type};
use crate::ir::{
self, function::FunctionParameters, DynamicStackSlot, RelSourceLoc, StackSlot, Type,
};
use crate::isa::FunctionAlignment;
use crate::result::CodegenResult;
use crate::settings;
@ -284,10 +285,13 @@ pub enum MachTerminator {
pub trait MachInstEmit: MachInst {
/// Persistent state carried across `emit` invocations.
type State: MachInstEmitState<Self>;
/// Constant information used in `emit` invocations.
type Info;
/// Emit the instruction.
fn emit(&self, code: &mut MachBuffer<Self>, info: &Self::Info, state: &mut Self::State);
/// Pretty-print the instruction.
fn pretty_print_inst(&self, state: &mut Self::State) -> String;
}
@ -297,20 +301,29 @@ pub trait MachInstEmit: MachInst {
pub trait MachInstEmitState<I: VCodeInst>: Default + Clone + Debug {
/// Create a new emission state given the ABI object.
fn new(abi: &Callee<I::ABIMachineSpec>, ctrl_plane: ControlPlane) -> Self;
/// Update the emission state before emitting an instruction that is a
/// safepoint.
fn pre_safepoint(&mut self, _stack_map: StackMap) {}
fn pre_safepoint(
&mut self,
stack_map: Option<StackMap>,
user_stack_map: Option<ir::UserStackMap>,
);
/// The emission state holds ownership of a control plane, so it doesn't
/// have to be passed around explicitly too much. `ctrl_plane_mut` may
/// be used if temporary access to the control plane is needed by some
/// other function that doesn't have access to the emission state.
fn ctrl_plane_mut(&mut self) -> &mut ControlPlane;
/// Used to continue using a control plane after the emission state is
/// not needed anymore.
fn take_ctrl_plane(self) -> ControlPlane;
/// A hook that triggers when first emitting a new block.
/// It is guaranteed to be called before any instructions are emitted.
fn on_new_block(&mut self) {}
/// The [`FrameLayout`] for the function currently being compiled.
fn frame_layout(&self) -> &FrameLayout;
}

109
cranelift/codegen/src/machinst/vcode.rs

@ -40,6 +40,33 @@ use std::fmt;
/// Index referring to an instruction in VCode.
pub type InsnIndex = regalloc2::Inst;
/// Extension trait for `InsnIndex` to allow conversion to a
/// `BackwardsInsnIndex`.
trait ToBackwardsInsnIndex {
fn to_backwards_insn_index(&self, num_insts: usize) -> BackwardsInsnIndex;
}
impl ToBackwardsInsnIndex for InsnIndex {
fn to_backwards_insn_index(&self, num_insts: usize) -> BackwardsInsnIndex {
BackwardsInsnIndex::new(num_insts - self.index() - 1)
}
}
/// An index referring to an instruction in the VCode when it is backwards,
/// during VCode construction.
#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
#[cfg_attr(
feature = "enable-serde",
derive(::serde::Serialize, ::serde::Deserialize)
)]
pub struct BackwardsInsnIndex(InsnIndex);
impl BackwardsInsnIndex {
pub fn new(i: usize) -> Self {
BackwardsInsnIndex(InsnIndex::new(i))
}
}
/// Index referring to a basic block in VCode.
pub type BlockIndex = regalloc2::Block;
@ -67,6 +94,14 @@ pub struct VCode<I: VCodeInst> {
/// Lowered machine instructions in order corresponding to the original IR.
insts: Vec<I>,
/// A map from backwards instruction index to the user stack map for that
/// instruction.
///
/// This is a sparse side table that only has entries for instructions that
/// are safepoints, and only for a subset of those that have an associated
/// user stack map.
user_stack_maps: FxHashMap<BackwardsInsnIndex, ir::UserStackMap>,
/// Operands: pre-regalloc references to virtual registers with
/// constraints, in one flattened array. This allows the regalloc
/// to efficiently access all operands without requiring expensive
@ -251,7 +286,7 @@ impl<I: VCodeInst> VCodeBuilder<I> {
block_order: BlockLoweringOrder,
constants: VCodeConstants,
direction: VCodeBuildDirection,
) -> VCodeBuilder<I> {
) -> Self {
let vcode = VCode::new(sigs, abi, emit_info, block_order, constants);
VCodeBuilder {
@ -565,6 +600,17 @@ impl<I: VCodeInst> VCodeBuilder<I> {
self.vcode
}
/// Add a user stack map for the associated instruction.
pub fn add_user_stack_map(
&mut self,
inst: BackwardsInsnIndex,
entries: &[ir::UserStackMapEntry],
) {
let stack_map = ir::UserStackMap::new(entries, self.vcode.abi.sized_stackslot_offsets());
let old_entry = self.vcode.user_stack_maps.insert(inst, stack_map);
debug_assert!(old_entry.is_none());
}
}
/// Is this type a reference type?
@ -582,12 +628,13 @@ impl<I: VCodeInst> VCode<I> {
emit_info: I::Info,
block_order: BlockLoweringOrder,
constants: VCodeConstants,
) -> VCode<I> {
) -> Self {
let n_blocks = block_order.lowered_order().len();
VCode {
sigs,
vreg_types: vec![],
insts: Vec::with_capacity(10 * n_blocks),
user_stack_maps: FxHashMap::default(),
operands: Vec::with_capacity(30 * n_blocks),
operand_ranges: Ranges::with_capacity(10 * n_blocks),
clobbers: FxHashMap::default(),
@ -864,7 +911,7 @@ impl<I: VCodeInst> VCode<I> {
// If this is a safepoint, compute a stack map
// and pass it to the emit state.
if self.insts[iix.index()].is_safepoint() {
let stack_map_disasm = if self.insts[iix.index()].is_safepoint() {
let mut safepoint_slots: SmallVec<[SpillSlot; 8]> = smallvec![];
// Find the contiguous range of
// (progpoint, allocation) safepoint slot
@ -888,13 +935,36 @@ impl<I: VCodeInst> VCode<I> {
let slot = alloc.as_stack().unwrap();
safepoint_slots.push(slot);
}
if !safepoint_slots.is_empty() {
let stack_map = self
.abi
.spillslots_to_stack_map(&safepoint_slots[..], &state);
state.pre_safepoint(stack_map);
}
}
let stack_map = if safepoint_slots.is_empty() {
None
} else {
Some(
self.abi
.spillslots_to_stack_map(&safepoint_slots[..], &state),
)
};
let (user_stack_map, user_stack_map_disasm) = {
// The `user_stack_maps` is keyed by reverse
// instruction index, so we must flip the
// index. We can't put this into a helper method
// due to borrowck issues because parts of
// `self` are borrowed mutably elsewhere in this
// function.
let index = iix.to_backwards_insn_index(self.num_insts());
let user_stack_map = self.user_stack_maps.remove(&index);
let user_stack_map_disasm =
user_stack_map.as_ref().map(|m| format!(" ; {m:?}"));
(user_stack_map, user_stack_map_disasm)
};
state.pre_safepoint(stack_map, user_stack_map);
user_stack_map_disasm
} else {
None
};
// If the instruction we are about to emit is
// a return, place an epilogue at this point
@ -932,6 +1002,10 @@ impl<I: VCodeInst> VCode<I> {
&mut buffer,
&mut state,
);
if let Some(stack_map_disasm) = stack_map_disasm {
disasm.push_str(&stack_map_disasm);
disasm.push('\n');
}
}
}
@ -1014,6 +1088,12 @@ impl<I: VCodeInst> VCode<I> {
}
}
debug_assert!(
self.user_stack_maps.is_empty(),
"any stack maps should have been consumed by instruction emission, still have: {:#?}",
self.user_stack_maps,
);
// Do any optimizations on branches at tail of buffer, as if we had
// bound one last label.
buffer.optimize_branches(ctrl_plane);
@ -1224,6 +1304,12 @@ impl<I: VCodeInst> VCode<I> {
.map(|o| o.vreg())
.any(|vreg| self.facts[vreg.vreg()].is_some())
}
/// Get the user stack map associated with the given forward instruction index.
pub fn get_user_stack_map(&self, inst: InsnIndex) -> Option<&ir::UserStackMap> {
let index = inst.to_backwards_insn_index(self.num_insts());
self.user_stack_maps.get(&index)
}
}
impl<I: VCodeInst> std::ops::Index<InsnIndex> for VCode<I> {
@ -1385,6 +1471,9 @@ impl<I: VCodeInst> fmt::Debug for VCode<I> {
}
}
}
if let Some(user_stack_map) = self.get_user_stack_map(InsnIndex::new(inst)) {
writeln!(f, " {user_stack_map:?}")?;
}
}
}

221
cranelift/filetests/filetests/isa/aarch64/user_stack_maps.clif

@ -0,0 +1,221 @@
test compile precise-output
set unwind_info=false
set enable_probestack=false
target aarch64
function %foo() system_v {
ss0 = explicit_slot 12, align = 4
sig0 = (i32) system_v
fn0 = colocated u0:0 sig0
block0:
v0 = iconst.i32 0
v1 = iconst.i32 1
v2 = iconst.i32 2
v3 = iconst.i32 3
stack_store v0, ss0
stack_store v1, ss0+4
stack_store v2, ss0+8
call fn0(v0), stack_map=[i32 @ ss0+0, i32 @ ss0+4, i32 @ ss0+8]
stack_store v1, ss0
stack_store v2, ss0+4
call fn0(v0), stack_map=[i32 @ ss0+0, i32 @ ss0+4]
stack_store v2, ss0
call fn0(v1), stack_map=[i32 @ ss0+0]
call fn0(v2)
return
}
; VCode:
; stp fp, lr, [sp, #-16]!
; mov fp, sp
; str x24, [sp, #-16]!
; stp x19, x22, [sp, #-16]!
; sub sp, sp, #16
; block0:
; movz w9, #0
; movz w8, #1
; movz w0, #2
; mov x10, sp
; str w9, [x10]
; mov x24, x9
; add x9, sp, #4
; str w8, [x9]
; mov x19, x8
; add x10, sp, #8
; str w0, [x10]
; mov x22, x0
; mov x0, x24
; bl 0
; ; UserStackMap { by_type: [(types::I32, CompoundBitSet {0, 4, 8})] }
; mov x12, sp
; mov x0, x19
; str w0, [x12]
; add x13, sp, #4
; mov x0, x22
; str w0, [x13]
; mov x22, x0
; mov x0, x24
; bl 0
; ; UserStackMap { by_type: [(types::I32, CompoundBitSet {0, 4})] }
; mov x15, sp
; mov x0, x22
; str w0, [x15]
; mov x0, x19
; bl 0
; ; UserStackMap { by_type: [(types::I32, CompoundBitSet {0})] }
; mov x0, x22
; bl 0
; add sp, sp, #16
; ldp x19, x22, [sp], #16
; ldr x24, [sp], #16
; ldp fp, lr, [sp], #16
; ret
;
; Disassembled:
; block0: ; offset 0x0
; stp x29, x30, [sp, #-0x10]!
; mov x29, sp
; str x24, [sp, #-0x10]!
; stp x19, x22, [sp, #-0x10]!
; sub sp, sp, #0x10
; block1: ; offset 0x14
; mov w9, #0
; mov w8, #1
; mov w0, #2
; mov x10, sp
; str w9, [x10]
; mov x24, x9
; add x9, sp, #4
; str w8, [x9]
; mov x19, x8
; add x10, sp, #8
; str w0, [x10]
; mov x22, x0
; mov x0, x24
; bl #0x48 ; reloc_external Call u0:0 0
; mov x12, sp
; mov x0, x19
; str w0, [x12]
; add x13, sp, #4
; mov x0, x22
; str w0, [x13]
; mov x22, x0
; mov x0, x24
; bl #0x6c ; reloc_external Call u0:0 0
; mov x15, sp
; mov x0, x22
; str w0, [x15]
; mov x0, x19
; bl #0x80 ; reloc_external Call u0:0 0
; mov x0, x22
; bl #0x88 ; reloc_external Call u0:0 0
; add sp, sp, #0x10
; ldp x19, x22, [sp], #0x10
; ldr x24, [sp], #0x10
; ldp x29, x30, [sp], #0x10
; ret
function %different_types(i8, i16, i32, i64, f32, f64) -> i8, i16, i32, i64, f32, f64 {
ss0 = explicit_slot 1
ss1 = explicit_slot 2, align = 2
ss2 = explicit_slot 8, align = 4
ss3 = explicit_slot 16, align = 8
ss4 = explicit_slot 48, align = 16
sig0 = () system_v
fn0 = colocated u0:0 sig0
block0(v0: i8, v1: i16, v2: i32, v3: i64, v4: f32, v5: f64):
stack_store v0, ss0
stack_store v1, ss1
stack_store v2, ss2
stack_store v4, ss2+4
stack_store v3, ss3
stack_store v5, ss3+8
call fn0(), stack_map=[i8 @ ss0+0, i16 @ ss1+0, i32 @ ss2+0, f32 @ ss2+4, i64 @ ss3+0, f64 @ ss3+8]
return v0, v1, v2, v3, v4, v5
}
; VCode:
; stp fp, lr, [sp, #-16]!
; mov fp, sp
; stp x23, x27, [sp, #-16]!
; stp x20, x21, [sp, #-16]!
; sub sp, sp, #128
; block0:
; mov x12, sp
; strb w0, [x12]
; mov x23, x0
; add x13, sp, #8
; strh w1, [x13]
; mov x20, x1
; add x14, sp, #16
; str w2, [x14]
; mov x21, x2
; add x15, sp, #20
; str s0, [x15]
; str q0, [sp, #96]
; add x0, sp, #24
; str x3, [x0]
; mov x27, x3
; add x1, sp, #32
; str d1, [x1]
; str q1, [sp, #112]
; bl 0
; ; UserStackMap { by_type: [(types::I8, CompoundBitSet {0}), (types::I16, CompoundBitSet {8}), (types::I32, CompoundBitSet {16}), (types::F32, CompoundBitSet {20}), (types::I64, CompoundBitSet {24}), (types::F64, CompoundBitSet {32})] }
; mov x0, x23
; mov x1, x20
; mov x2, x21
; mov x3, x27
; ldr q0, [sp, #96]
; ldr q1, [sp, #112]
; add sp, sp, #128
; ldp x20, x21, [sp], #16
; ldp x23, x27, [sp], #16
; ldp fp, lr, [sp], #16
; ret
;
; Disassembled:
; block0: ; offset 0x0
; stp x29, x30, [sp, #-0x10]!
; mov x29, sp
; stp x23, x27, [sp, #-0x10]!
; stp x20, x21, [sp, #-0x10]!
; sub sp, sp, #0x80
; block1: ; offset 0x14
; mov x12, sp
; strb w0, [x12]
; mov x23, x0
; add x13, sp, #8
; strh w1, [x13]
; mov x20, x1
; add x14, sp, #0x10
; str w2, [x14]
; mov x21, x2
; add x15, sp, #0x14
; str s0, [x15]
; stur q0, [sp, #0x60]
; add x0, sp, #0x18
; str x3, [x0]
; mov x27, x3
; add x1, sp, #0x20
; str d1, [x1]
; stur q1, [sp, #0x70]
; bl #0x5c ; reloc_external Call u0:0 0
; mov x0, x23
; mov x1, x20
; mov x2, x21
; mov x3, x27
; ldur q0, [sp, #0x60]
; ldur q1, [sp, #0x70]
; add sp, sp, #0x80
; ldp x20, x21, [sp], #0x10
; ldp x23, x27, [sp], #0x10
; ldp x29, x30, [sp], #0x10
; ret

250
cranelift/filetests/filetests/isa/riscv64/user_stack_maps.clif

@ -0,0 +1,250 @@
test compile precise-output
set unwind_info=false
set enable_probestack=false
target riscv64
function %foo() system_v {
ss0 = explicit_slot 12, align = 4
sig0 = (i32) system_v
fn0 = colocated u0:0 sig0
block0:
v0 = iconst.i32 0
v1 = iconst.i32 1
v2 = iconst.i32 2
v3 = iconst.i32 3
stack_store v0, ss0
stack_store v1, ss0+4
stack_store v2, ss0+8
call fn0(v0), stack_map=[i32 @ ss0+0, i32 @ ss0+4, i32 @ ss0+8]
stack_store v1, ss0
stack_store v2, ss0+4
call fn0(v0), stack_map=[i32 @ ss0+0, i32 @ ss0+4]
stack_store v2, ss0
call fn0(v1), stack_map=[i32 @ ss0+0]
call fn0(v2)
return
}
; VCode:
; addi sp,sp,-16
; sd ra,8(sp)
; sd fp,0(sp)
; mv fp,sp
; addi sp,sp,-48
; sd s1,40(sp)
; sd s2,32(sp)
; sd s3,24(sp)
; block0:
; li a0,0
; mv s3,a0
; li a0,1
; li a2,2
; sw zero,0(slot)
; sw a0,4(slot)
; mv s1,a0
; sw a2,8(slot)
; mv s2,a2
; mv a0,s3
; call userextname0
; ; UserStackMap { by_type: [(types::I32, CompoundBitSet {0, 4, 8})] }
; mv a0,s1
; sw a0,0(slot)
; mv a0,s2
; sw a0,4(slot)
; mv s2,a0
; mv a0,s3
; call userextname0
; ; UserStackMap { by_type: [(types::I32, CompoundBitSet {0, 4})] }
; mv a0,s2
; sw a0,0(slot)
; mv a0,s1
; call userextname0
; ; UserStackMap { by_type: [(types::I32, CompoundBitSet {0})] }
; mv a0,s2
; call userextname0
; ld s1,40(sp)
; ld s2,32(sp)
; ld s3,24(sp)
; addi sp,sp,48
; ld ra,8(sp)
; ld fp,0(sp)
; addi sp,sp,16
; ret
;
; Disassembled:
; block0: ; offset 0x0
; addi sp, sp, -0x10
; sd ra, 8(sp)
; sd s0, 0(sp)
; mv s0, sp
; addi sp, sp, -0x30
; sd s1, 0x28(sp)
; sd s2, 0x20(sp)
; sd s3, 0x18(sp)
; block1: ; offset 0x20
; mv a0, zero
; mv s3, a0
; addi a0, zero, 1
; addi a2, zero, 2
; sw zero, 0(sp)
; sw a0, 4(sp)
; mv s1, a0
; sw a2, 8(sp)
; mv s2, a2
; mv a0, s3
; auipc ra, 0 ; reloc_external RiscvCallPlt u0:0 0
; jalr ra
; mv a0, s1
; sw a0, 0(sp)
; mv a0, s2
; sw a0, 4(sp)
; mv s2, a0
; mv a0, s3
; auipc ra, 0 ; reloc_external RiscvCallPlt u0:0 0
; jalr ra
; mv a0, s2
; sw a0, 0(sp)
; mv a0, s1
; auipc ra, 0 ; reloc_external RiscvCallPlt u0:0 0
; jalr ra
; mv a0, s2
; auipc ra, 0 ; reloc_external RiscvCallPlt u0:0 0
; jalr ra
; ld s1, 0x28(sp)
; ld s2, 0x20(sp)
; ld s3, 0x18(sp)
; addi sp, sp, 0x30
; ld ra, 8(sp)
; ld s0, 0(sp)
; addi sp, sp, 0x10
; ret
function %different_types(i8, i16, i32, i64, f32, f64) -> i8, i16, i32, i64, f32, f64 {
ss0 = explicit_slot 1
ss1 = explicit_slot 2, align = 2
ss2 = explicit_slot 8, align = 4
ss3 = explicit_slot 16, align = 8
ss4 = explicit_slot 48, align = 16
sig0 = () system_v
fn0 = colocated u0:0 sig0
block0(v0: i8, v1: i16, v2: i32, v3: i64, v4: f32, v5: f64):
stack_store v0, ss0
stack_store v1, ss1
stack_store v2, ss2
stack_store v4, ss2+4
stack_store v3, ss3
stack_store v5, ss3+8
call fn0(), stack_map=[i8 @ ss0+0, i16 @ ss1+0, i32 @ ss2+0, f32 @ ss2+4, i64 @ ss3+0, f64 @ ss3+8]
return v0, v1, v2, v3, v4, v5
}
; VCode:
; addi sp,sp,-16
; sd ra,8(sp)
; sd fp,0(sp)
; mv fp,sp
; addi sp,sp,-160
; sd s1,152(sp)
; sd s2,144(sp)
; sd s6,136(sp)
; sd s8,128(sp)
; sd s10,120(sp)
; fsd fs0,112(sp)
; fsd fs2,104(sp)
; block0:
; mv s10,a4
; sb a0,0(slot)
; mv s8,a0
; sh a1,8(slot)
; mv s6,a1
; sw a2,16(slot)
; mv s2,a2
; fsw fa0,20(slot)
; fmv.d fs2,fa0
; sd a3,24(slot)
; mv s1,a3
; fsd fa1,32(slot)
; fmv.d fs0,fa1
; call userextname0
; ; UserStackMap { by_type: [(types::I8, CompoundBitSet {0}), (types::I16, CompoundBitSet {8}), (types::I32, CompoundBitSet {16}), (types::F32, CompoundBitSet {20}), (types::I64, CompoundBitSet {24}), (types::F64, CompoundBitSet {32})] }
; mv a2,s2
; mv a4,s10
; sw a2,0(a4)
; mv a3,s1
; sd a3,8(a4)
; mv a0,s8
; mv a1,s6
; fmv.d fa0,fs2
; fmv.d fa1,fs0
; ld s1,152(sp)
; ld s2,144(sp)
; ld s6,136(sp)
; ld s8,128(sp)
; ld s10,120(sp)
; fld fs0,112(sp)
; fld fs2,104(sp)
; addi sp,sp,160
; ld ra,8(sp)
; ld fp,0(sp)
; addi sp,sp,16
; ret
;
; Disassembled:
; block0: ; offset 0x0
; addi sp, sp, -0x10
; sd ra, 8(sp)
; sd s0, 0(sp)
; mv s0, sp
; addi sp, sp, -0xa0
; sd s1, 0x98(sp)
; sd s2, 0x90(sp)
; sd s6, 0x88(sp)
; sd s8, 0x80(sp)
; sd s10, 0x78(sp)
; fsd fs0, 0x70(sp)
; fsd fs2, 0x68(sp)
; block1: ; offset 0x30
; mv s10, a4
; sb a0, 0(sp)
; mv s8, a0
; sh a1, 8(sp)
; mv s6, a1
; sw a2, 0x10(sp)
; mv s2, a2
; fsw fa0, 0x14(sp)
; fmv.d fs2, fa0
; sd a3, 0x18(sp)
; mv s1, a3
; fsd fa1, 0x20(sp)
; fmv.d fs0, fa1
; auipc ra, 0 ; reloc_external RiscvCallPlt u0:0 0
; jalr ra
; mv a2, s2
; mv a4, s10
; sw a2, 0(a4)
; mv a3, s1
; sd a3, 8(a4)
; mv a0, s8
; mv a1, s6
; fmv.d fa0, fs2
; fmv.d fa1, fs0
; ld s1, 0x98(sp)
; ld s2, 0x90(sp)
; ld s6, 0x88(sp)
; ld s8, 0x80(sp)
; ld s10, 0x78(sp)
; fld fs0, 0x70(sp)
; fld fs2, 0x68(sp)
; addi sp, sp, 0xa0
; ld ra, 8(sp)
; ld s0, 0(sp)
; addi sp, sp, 0x10
; ret

221
cranelift/filetests/filetests/isa/s390x/user_stack_maps.clif

@ -0,0 +1,221 @@
test compile precise-output
set unwind_info=false
set enable_probestack=false
target s390x
function %foo() system_v {
ss0 = explicit_slot 12, align = 4
sig0 = (i32) system_v
fn0 = colocated u0:0 sig0
block0:
v0 = iconst.i32 0
v1 = iconst.i32 1
v2 = iconst.i32 2
v3 = iconst.i32 3
stack_store v0, ss0
stack_store v1, ss0+4
stack_store v2, ss0+8
call fn0(v0), stack_map=[i32 @ ss0+0, i32 @ ss0+4, i32 @ ss0+8]
stack_store v1, ss0
stack_store v2, ss0+4
call fn0(v0), stack_map=[i32 @ ss0+0, i32 @ ss0+4]
stack_store v2, ss0
call fn0(v1), stack_map=[i32 @ ss0+0]
call fn0(v2)
return
}
; VCode:
; stmg %r7, %r15, 56(%r15)
; aghi %r15, -176
; block0:
; lhi %r2, 0
; lgr %r11, %r2
; lhi %r2, 1
; lgr %r7, %r2
; lhi %r2, 2
; lgr %r9, %r2
; la %r2, 160(%r15)
; mvhi 0(%r2), 0
; la %r3, 164(%r15)
; mvhi 0(%r3), 1
; la %r4, 168(%r15)
; mvhi 0(%r4), 2
; lgr %r2, %r11
; brasl %r14, userextname0
; ; UserStackMap { by_type: [(types::I32, CompoundBitSet {0, 4, 8})] }
; la %r2, 160(%r15)
; mvhi 0(%r2), 1
; la %r3, 164(%r15)
; mvhi 0(%r3), 2
; lgr %r2, %r11
; brasl %r14, userextname0
; ; UserStackMap { by_type: [(types::I32, CompoundBitSet {0, 4})] }
; la %r5, 160(%r15)
; mvhi 0(%r5), 2
; lgr %r2, %r7
; brasl %r14, userextname0
; ; UserStackMap { by_type: [(types::I32, CompoundBitSet {0})] }
; lgr %r2, %r9
; brasl %r14, userextname0
; lmg %r7, %r15, 232(%r15)
; br %r14
;
; Disassembled:
; block0: ; offset 0x0
; stmg %r7, %r15, 0x38(%r15)
; aghi %r15, -0xb0
; block1: ; offset 0xa
; lhi %r2, 0
; lgr %r11, %r2
; lhi %r2, 1
; lgr %r7, %r2
; lhi %r2, 2
; lgr %r9, %r2
; la %r2, 0xa0(%r15)
; mvhi 0(%r2), 0
; la %r3, 0xa4(%r15)
; mvhi 0(%r3), 1
; la %r4, 0xa8(%r15)
; mvhi 0(%r4), 2
; lgr %r2, %r11
; brasl %r14, 0x44 ; reloc_external PLTRel32Dbl u0:0 2
; la %r2, 0xa0(%r15)
; mvhi 0(%r2), 1
; la %r3, 0xa4(%r15)
; mvhi 0(%r3), 2
; lgr %r2, %r11
; brasl %r14, 0x62 ; reloc_external PLTRel32Dbl u0:0 2
; la %r5, 0xa0(%r15)
; mvhi 0(%r5), 2
; lgr %r2, %r7
; brasl %r14, 0x76 ; reloc_external PLTRel32Dbl u0:0 2
; lgr %r2, %r9
; brasl %r14, 0x80 ; reloc_external PLTRel32Dbl u0:0 2
; lmg %r7, %r15, 0xe8(%r15)
; br %r14
function %different_types(i8, i16, i32, i64, f32, f64) -> i8, i16, i32, i64, f32, f64 {
ss0 = explicit_slot 1
ss1 = explicit_slot 2, align = 2
ss2 = explicit_slot 8, align = 4
ss3 = explicit_slot 16, align = 8
ss4 = explicit_slot 48, align = 16
sig0 = () system_v
fn0 = colocated u0:0 sig0
block0(v0: i8, v1: i16, v2: i32, v3: i64, v4: f32, v5: f64):
stack_store v0, ss0
stack_store v1, ss1
stack_store v2, ss2
stack_store v4, ss2+4
stack_store v3, ss3
stack_store v5, ss3+8
call fn0(), stack_map=[i8 @ ss0+0, i16 @ ss1+0, i32 @ ss2+0, f32 @ ss2+4, i64 @ ss3+0, f64 @ ss3+8]
return v0, v1, v2, v3, v4, v5
}
; VCode:
; stmg %r6, %r15, 48(%r15)
; aghi %r15, -352
; std %f8, 288(%r15)
; std %f9, 296(%r15)
; std %f10, 304(%r15)
; std %f11, 312(%r15)
; std %f12, 320(%r15)
; std %f13, 328(%r15)
; std %f14, 336(%r15)
; std %f15, 344(%r15)
; block0:
; la %r9, 160(%r15)
; stc %r2, 0(%r9)
; lgr %r11, %r2
; la %r2, 168(%r15)
; sth %r3, 0(%r2)
; lgr %r9, %r3
; la %r2, 176(%r15)
; st %r4, 0(%r2)
; lgr %r7, %r4
; la %r2, 180(%r15)
; ste %f0, 0(%r2)
; vst %v0, 256(%r15)
; la %r2, 184(%r15)
; stg %r5, 0(%r2)
; lgr %r6, %r5
; la %r3, 192(%r15)
; std %f2, 0(%r3)
; vst %v2, 272(%r15)
; brasl %r14, userextname0
; ; UserStackMap { by_type: [(types::I8, CompoundBitSet {0}), (types::I16, CompoundBitSet {8}), (types::I32, CompoundBitSet {16}), (types::F32, CompoundBitSet {20}), (types::I64, CompoundBitSet {24}), (types::F64, CompoundBitSet {32})] }
; lgr %r2, %r11
; lgr %r3, %r9
; lgr %r4, %r7
; lgr %r5, %r6
; vl %v0, 256(%r15)
; vl %v2, 272(%r15)
; ld %f8, 288(%r15)
; ld %f9, 296(%r15)
; ld %f10, 304(%r15)
; ld %f11, 312(%r15)
; ld %f12, 320(%r15)
; ld %f13, 328(%r15)
; ld %f14, 336(%r15)
; ld %f15, 344(%r15)
; lmg %r6, %r15, 400(%r15)
; br %r14
;
; Disassembled:
; block0: ; offset 0x0
; stmg %r6, %r15, 0x30(%r15)
; aghi %r15, -0x160
; std %f8, 0x120(%r15)
; std %f9, 0x128(%r15)
; std %f10, 0x130(%r15)
; std %f11, 0x138(%r15)
; std %f12, 0x140(%r15)
; std %f13, 0x148(%r15)
; std %f14, 0x150(%r15)
; std %f15, 0x158(%r15)
; block1: ; offset 0x2a
; la %r9, 0xa0(%r15)
; stc %r2, 0(%r9)
; lgr %r11, %r2
; la %r2, 0xa8(%r15)
; sth %r3, 0(%r2)
; lgr %r9, %r3
; la %r2, 0xb0(%r15)
; st %r4, 0(%r2)
; lgr %r7, %r4
; la %r2, 0xb4(%r15)
; ste %f0, 0(%r2)
; vst %v0, 0x100(%r15)
; la %r2, 0xb8(%r15)
; stg %r5, 0(%r2)
; lgr %r6, %r5
; la %r3, 0xc0(%r15)
; std %f2, 0(%r3)
; vst %v2, 0x110(%r15)
; brasl %r14, 0x78 ; reloc_external PLTRel32Dbl u0:0 2
; lgr %r2, %r11
; lgr %r3, %r9
; lgr %r4, %r7
; lgr %r5, %r6
; vl %v0, 0x100(%r15)
; vl %v2, 0x110(%r15)
; ld %f8, 0x120(%r15)
; ld %f9, 0x128(%r15)
; ld %f10, 0x130(%r15)
; ld %f11, 0x138(%r15)
; ld %f12, 0x140(%r15)
; ld %f13, 0x148(%r15)
; ld %f14, 0x150(%r15)
; ld %f15, 0x158(%r15)
; lmg %r6, %r15, 0x190(%r15)
; br %r14

241
cranelift/filetests/filetests/isa/x64/user_stack_maps.clif

@ -0,0 +1,241 @@
test compile precise-output
set unwind_info=false
set enable_probestack=false
target x86_64
function %foo() system_v {
ss0 = explicit_slot 12, align = 4
sig0 = (i32) system_v
fn0 = colocated u0:0 sig0
block0:
v0 = iconst.i32 0
v1 = iconst.i32 1
v2 = iconst.i32 2
v3 = iconst.i32 3
stack_store v0, ss0
stack_store v1, ss0+4
stack_store v2, ss0+8
call fn0(v0), stack_map=[i32 @ ss0+0, i32 @ ss0+4, i32 @ ss0+8]
stack_store v1, ss0
stack_store v2, ss0+4
call fn0(v0), stack_map=[i32 @ ss0+0, i32 @ ss0+4]
stack_store v2, ss0
call fn0(v1), stack_map=[i32 @ ss0+0]
call fn0(v2)
return
}
; VCode:
; pushq %rbp
; movq %rsp, %rbp
; subq %rsp, $48, %rsp
; movq %rbx, 16(%rsp)
; movq %r13, 24(%rsp)
; movq %r15, 32(%rsp)
; block0:
; xorl %edi, %edi, %edi
; movq %rdi, %r15
; movl $1, %edi
; movq %rdi, %rbx
; movl $2, %edi
; movq %rdi, %r13
; lea rsp(0 + virtual offset), %r11
; movl $0, 0(%r11)
; lea rsp(4 + virtual offset), %rsi
; movl $1, 0(%rsi)
; lea rsp(8 + virtual offset), %rdi
; movl $2, 0(%rdi)
; movq %r15, %rdi
; call User(userextname0)
; ; UserStackMap { by_type: [(types::I32, CompoundBitSet {0, 4, 8})] }
; lea rsp(0 + virtual offset), %rcx
; movl $1, 0(%rcx)
; lea rsp(4 + virtual offset), %rdx
; movl $2, 0(%rdx)
; movq %r15, %rdi
; call User(userextname0)
; ; UserStackMap { by_type: [(types::I32, CompoundBitSet {0, 4})] }
; lea rsp(0 + virtual offset), %r9
; movl $2, 0(%r9)
; movq %rbx, %rdi
; call User(userextname0)
; ; UserStackMap { by_type: [(types::I32, CompoundBitSet {0})] }
; movq %r13, %rdi
; call User(userextname0)
; movq 16(%rsp), %rbx
; movq 24(%rsp), %r13
; movq 32(%rsp), %r15
; addq %rsp, $48, %rsp
; movq %rbp, %rsp
; popq %rbp
; ret
;
; Disassembled:
; block0: ; offset 0x0
; pushq %rbp
; movq %rsp, %rbp
; subq $0x30, %rsp
; movq %rbx, 0x10(%rsp)
; movq %r13, 0x18(%rsp)
; movq %r15, 0x20(%rsp)
; block1: ; offset 0x17
; xorl %edi, %edi
; movq %rdi, %r15
; movl $1, %edi
; movq %rdi, %rbx
; movl $2, %edi
; movq %rdi, %r13
; leaq (%rsp), %r11
; movl $0, (%r11)
; leaq 4(%rsp), %rsi
; movl $1, (%rsi)
; leaq 8(%rsp), %rdi
; movl $2, (%rdi)
; movq %r15, %rdi
; callq 0x55 ; reloc_external CallPCRel4 u0:0 -4
; leaq (%rsp), %rcx
; movl $1, (%rcx)
; leaq 4(%rsp), %rdx
; movl $2, (%rdx)
; movq %r15, %rdi
; callq 0x72 ; reloc_external CallPCRel4 u0:0 -4
; leaq (%rsp), %r9
; movl $2, (%r9)
; movq %rbx, %rdi
; callq 0x85 ; reloc_external CallPCRel4 u0:0 -4
; movq %r13, %rdi
; callq 0x8d ; reloc_external CallPCRel4 u0:0 -4
; movq 0x10(%rsp), %rbx
; movq 0x18(%rsp), %r13
; movq 0x20(%rsp), %r15
; addq $0x30, %rsp
; movq %rbp, %rsp
; popq %rbp
; retq
function %different_types(i8, i16, i32, i64, f32, f64) -> i8, i16, i32, i64, f32, f64 {
ss0 = explicit_slot 1
ss1 = explicit_slot 2, align = 2
ss2 = explicit_slot 8, align = 4
ss3 = explicit_slot 16, align = 8
ss4 = explicit_slot 48, align = 16
sig0 = () system_v
fn0 = colocated u0:0 sig0
block0(v0: i8, v1: i16, v2: i32, v3: i64, v4: f32, v5: f64):
stack_store v0, ss0
stack_store v1, ss1
stack_store v2, ss2
stack_store v4, ss2+4
stack_store v3, ss3
stack_store v5, ss3+8
call fn0(), stack_map=[i8 @ ss0+0, i16 @ ss1+0, i32 @ ss2+0, f32 @ ss2+4, i64 @ ss3+0, f64 @ ss3+8]
return v0, v1, v2, v3, v4, v5
}
; VCode:
; pushq %rbp
; movq %rsp, %rbp
; subq %rsp, $176, %rsp
; movq %rbx, 128(%rsp)
; movq %r12, 136(%rsp)
; movq %r13, 144(%rsp)
; movq %r14, 152(%rsp)
; movq %r15, 160(%rsp)
; block0:
; movq %r8, %r13
; lea rsp(0 + virtual offset), %r8
; movb %dil, 0(%r8)
; movq %rdi, %rbx
; lea rsp(8 + virtual offset), %r8
; movw %si, 0(%r8)
; movq %rsi, %r14
; lea rsp(16 + virtual offset), %r9
; movl %edx, 0(%r9)
; movq %rdx, %r12
; lea rsp(20 + virtual offset), %r10
; movss %xmm0, 0(%r10)
; movdqu %xmm0, rsp(96 + virtual offset)
; lea rsp(24 + virtual offset), %r11
; movq %rcx, 0(%r11)
; movq %rcx, %r15
; lea rsp(32 + virtual offset), %rsi
; movsd %xmm1, 0(%rsi)
; movdqu %xmm1, rsp(112 + virtual offset)
; call User(userextname0)
; ; UserStackMap { by_type: [(types::I8, CompoundBitSet {0}), (types::I16, CompoundBitSet {8}), (types::I32, CompoundBitSet {16}), (types::F32, CompoundBitSet {20}), (types::I64, CompoundBitSet {24}), (types::F64, CompoundBitSet {32})] }
; movq %r12, %rdx
; movq %r13, %r8
; movl %edx, 0(%r8)
; movq %r15, %rcx
; movq %rcx, 8(%r8)
; movq %rbx, %rax
; movq %r14, %rdx
; movdqu rsp(96 + virtual offset), %xmm0
; movdqu rsp(112 + virtual offset), %xmm1
; movq 128(%rsp), %rbx
; movq 136(%rsp), %r12
; movq 144(%rsp), %r13
; movq 152(%rsp), %r14
; movq 160(%rsp), %r15
; addq %rsp, $176, %rsp
; movq %rbp, %rsp
; popq %rbp
; ret
;
; Disassembled:
; block0: ; offset 0x0
; pushq %rbp
; movq %rsp, %rbp
; subq $0xb0, %rsp
; movq %rbx, 0x80(%rsp)
; movq %r12, 0x88(%rsp)
; movq %r13, 0x90(%rsp)
; movq %r14, 0x98(%rsp)
; movq %r15, 0xa0(%rsp)
; block1: ; offset 0x33
; movq %r8, %r13
; leaq (%rsp), %r8
; movb %dil, (%r8)
; movq %rdi, %rbx
; leaq 8(%rsp), %r8
; movw %si, (%r8)
; movq %rsi, %r14
; leaq 0x10(%rsp), %r9
; movl %edx, (%r9)
; movq %rdx, %r12
; leaq 0x14(%rsp), %r10
; movss %xmm0, (%r10)
; movdqu %xmm0, 0x60(%rsp)
; leaq 0x18(%rsp), %r11
; movq %rcx, (%r11)
; movq %rcx, %r15
; leaq 0x20(%rsp), %rsi
; movsd %xmm1, (%rsi)
; movdqu %xmm1, 0x70(%rsp)
; callq 0x86 ; reloc_external CallPCRel4 u0:0 -4
; movq %r12, %rdx
; movq %r13, %r8
; movl %edx, (%r8)
; movq %r15, %rcx
; movq %rcx, 8(%r8)
; movq %rbx, %rax
; movq %r14, %rdx
; movdqu 0x60(%rsp), %xmm0
; movdqu 0x70(%rsp), %xmm1
; movq 0x80(%rsp), %rbx
; movq 0x88(%rsp), %r12
; movq 0x90(%rsp), %r13
; movq 0x98(%rsp), %r14
; movq 0xa0(%rsp), %r15
; addq $0xb0, %rsp
; movq %rbp, %rsp
; popq %rbp
; retq
Loading…
Cancel
Save