Browse Source

cranelift: Specialize StackAMode::FPOffset (#8292)

* cranelift: Specialize StackAMode::FPOffset

The StackAMode::FPOffset address mode was always used together with
fp_to_arg_offset, to compute addresses within the current stack frame's
argument area.

Instead, introduce a new StackAMode::ArgOffset variant specifically for
stack addresses within the current frame's argument area. The details of
how to find the argument area are folded into the conversion from the
target-independent StackAMode into target-dependent address-mode types.

Currently, fp_to_arg_offset returns a target-specific constant, so I've
preserved that constant in each backend's address-mode conversion.

However, in general the location of the argument area may depend on
calling convention, flags, or other concerns. Also, it may not always be
desirable to use a frame pointer register as the base to find the
argument area. I expect some backends will eventually need to introduce
new synthetic addressing modes to resolve argument-area offsets after
register allocation, when the full frame layout is known.

I also cleaned up a couple minor things while I was in the area:
- Determining argument extension type was written in a confusing way and
  also had a typo in the comment describing it.
- riscv64's AMode::offset was only used in one place and is clearer
  when inlined.

* Review comments

@bjorn3 correctly pointed out that I had changed the overflow behavior
of this address computation.

The existing code always added the result of `fp_to_arg_offset` using
`i64` addition. It used Rust's default overflow behavior for addition,
which panics in debug builds and wraps in release builds.

In this commit I'm preserving that behavior:

- s390x doesn't add anything, so can't overflow.

- aarch64 and riscv64 use `i64` offsets in `FPOffset` address modes, so
  the addition is still using `i64` addition.

- x64 does a checked narrowing to `i32`, so it's important to do the
  addition before that, on the wider `i64` offset.
pull/8296/head
Jamey Sharp 7 months ago
committed by GitHub
parent
commit
9f94462067
No known key found for this signature in database GPG Key ID: B5690EEEBB952194
  1. 7
      cranelift/codegen/src/isa/aarch64/abi.rs
  2. 5
      cranelift/codegen/src/isa/riscv64/abi.rs
  3. 10
      cranelift/codegen/src/isa/riscv64/inst/args.rs
  4. 7
      cranelift/codegen/src/isa/s390x/abi.rs
  5. 13
      cranelift/codegen/src/isa/x64/abi.rs
  6. 48
      cranelift/codegen/src/machinst/abi.rs

7
cranelift/codegen/src/isa/aarch64/abi.rs

@ -35,7 +35,8 @@ static STACK_ARG_RET_SIZE_LIMIT: u32 = 128 * 1024 * 1024;
impl Into<AMode> for StackAMode {
fn into(self) -> AMode {
match self {
StackAMode::FPOffset(off, ty) => AMode::FPOffset { off, ty },
// Argument area begins after saved frame pointer + return address.
StackAMode::ArgOffset(off, ty) => AMode::FPOffset { off: off + 16, ty },
StackAMode::NominalSPOffset(off, ty) => AMode::NominalSPOffset { off, ty },
StackAMode::SPOffset(off, ty) => AMode::SPOffset { off, ty },
}
@ -365,10 +366,6 @@ impl ABIMachineSpec for AArch64MachineDeps {
Ok((next_stack, extra_arg))
}
fn fp_to_arg_offset(_call_conv: isa::CallConv, _flags: &settings::Flags) -> i64 {
16 // frame pointer + return address.
}
fn gen_load_stack(mem: StackAMode, into_reg: Writable<Reg>, ty: Type) -> Inst {
Inst::gen_load(into_reg, mem.into(), ty, MemFlags::trusted())
}

5
cranelift/codegen/src/isa/riscv64/abi.rs

@ -205,11 +205,6 @@ impl ABIMachineSpec for Riscv64MachineDeps {
Ok((next_stack, pos))
}
fn fp_to_arg_offset(_call_conv: isa::CallConv, _flags: &settings::Flags) -> i64 {
// lr fp.
16
}
fn gen_load_stack(mem: StackAMode, into_reg: Writable<Reg>, ty: Type) -> Inst {
Inst::gen_load(into_reg, mem.into(), ty, MemFlags::trusted())
}

10
cranelift/codegen/src/isa/riscv64/inst/args.rs

@ -147,16 +147,9 @@ impl AMode {
pub(crate) fn get_offset_with_state(&self, state: &EmitState) -> i64 {
match self {
&AMode::NominalSPOffset(offset, _) => offset + state.virtual_sp_offset,
_ => self.get_offset(),
}
}
fn get_offset(&self) -> i64 {
match self {
&AMode::RegOffset(_, offset, ..) => offset,
&AMode::SPOffset(offset, _) => offset,
&AMode::FPOffset(offset, _) => offset,
&AMode::NominalSPOffset(offset, _) => offset,
&AMode::Const(_) | &AMode::Label(_) => 0,
}
}
@ -206,7 +199,8 @@ impl Display for AMode {
impl Into<AMode> for StackAMode {
fn into(self) -> AMode {
match self {
StackAMode::FPOffset(offset, ty) => AMode::FPOffset(offset, ty),
// Argument area begins after saved lr + fp.
StackAMode::ArgOffset(offset, ty) => AMode::FPOffset(offset + 16, ty),
StackAMode::SPOffset(offset, ty) => AMode::SPOffset(offset, ty),
StackAMode::NominalSPOffset(offset, ty) => AMode::NominalSPOffset(offset, ty),
}

7
cranelift/codegen/src/isa/s390x/abi.rs

@ -191,7 +191,8 @@ pub static REG_SAVE_AREA_SIZE: u32 = 160;
impl Into<MemArg> for StackAMode {
fn into(self) -> MemArg {
match self {
StackAMode::FPOffset(off, _ty) => MemArg::InitialSPOffset { off },
// Argument area always begins at the initial SP.
StackAMode::ArgOffset(off, _ty) => MemArg::InitialSPOffset { off },
StackAMode::NominalSPOffset(off, _ty) => MemArg::NominalSPOffset { off },
StackAMode::SPOffset(off, _ty) => {
MemArg::reg_plus_off(stack_reg(), off, MemFlags::trusted())
@ -404,10 +405,6 @@ impl ABIMachineSpec for S390xMachineDeps {
Ok((next_stack, extra_arg))
}
fn fp_to_arg_offset(_call_conv: isa::CallConv, _flags: &settings::Flags) -> i64 {
0
}
fn gen_load_stack(mem: StackAMode, into_reg: Writable<Reg>, ty: Type) -> Inst {
Inst::gen_load(into_reg, mem.into(), ty)
}

13
cranelift/codegen/src/isa/x64/abi.rs

@ -405,10 +405,6 @@ impl ABIMachineSpec for X64ABIMachineSpec {
Ok((next_stack, extra_arg))
}
fn fp_to_arg_offset(_call_conv: isa::CallConv, _flags: &settings::Flags) -> i64 {
16 // frame pointer + return address.
}
fn gen_load_stack(mem: StackAMode, into_reg: Writable<Reg>, ty: Type) -> Self::I {
// For integer-typed values, we always load a full 64 bits (and we always spill a full 64
// bits as well -- see `Inst::store()`).
@ -1023,9 +1019,12 @@ impl From<StackAMode> for SyntheticAmode {
// We enforce a 128 MB stack-frame size limit above, so these
// `expect()`s should never fail.
match amode {
StackAMode::FPOffset(off, _ty) => {
let off = i32::try_from(off)
.expect("Offset in FPOffset is greater than 2GB; should hit impl limit first");
StackAMode::ArgOffset(off, _ty) => {
let off =
i32::try_from(off + 16) // frame pointer + return address
.expect(
"Offset in ArgOffset is greater than 2GB; should hit impl limit first",
);
SyntheticAmode::Real(Amode::ImmReg {
simm32: off,
base: regs::rbp(),

48
cranelift/codegen/src/machinst/abi.rs

@ -281,9 +281,9 @@ pub enum ArgsOrRets {
/// appropriate addressing mode.
#[derive(Clone, Copy, Debug)]
pub enum StackAMode {
/// Offset from the frame pointer, possibly making use of a specific type
/// for a scaled indexing operation.
FPOffset(i64, ir::Type),
/// Offset into the current frame's argument area, possibly making use of a
/// specific type for a scaled indexing operation.
ArgOffset(i64, ir::Type),
/// Offset from the nominal stack pointer, possibly making use of a specific
/// type for a scaled indexing operation.
NominalSPOffset(i64, ir::Type),
@ -296,7 +296,7 @@ impl StackAMode {
/// Offset by an addend.
pub fn offset(self, addend: i64) -> Self {
match self {
StackAMode::FPOffset(off, ty) => StackAMode::FPOffset(off + addend, ty),
StackAMode::ArgOffset(off, ty) => StackAMode::ArgOffset(off + addend, ty),
StackAMode::NominalSPOffset(off, ty) => StackAMode::NominalSPOffset(off + addend, ty),
StackAMode::SPOffset(off, ty) => StackAMode::SPOffset(off + addend, ty),
}
@ -304,7 +304,7 @@ impl StackAMode {
pub fn get_type(&self) -> ir::Type {
match self {
&StackAMode::FPOffset(_, ty) => ty,
&StackAMode::ArgOffset(_, ty) => ty,
&StackAMode::NominalSPOffset(_, ty) => ty,
&StackAMode::SPOffset(_, ty) => ty,
}
@ -417,10 +417,6 @@ pub trait ABIMachineSpec {
args: ArgsAccumulator,
) -> CodegenResult<(u32, Option<usize>)>;
/// Returns the offset from FP to the argument area, i.e., jumping over the saved FP, return
/// address, and maybe other standard elements depending on ABI (e.g. Wasm TLS reg).
fn fp_to_arg_offset(call_conv: isa::CallConv, flags: &settings::Flags) -> i64;
/// Generate a load from the stack.
fn gen_load_stack(mem: StackAMode, into_reg: Writable<Reg>, ty: Type) -> Self::I;
@ -1520,22 +1516,17 @@ impl<M: ABIMachineSpec> Callee<M> {
extension,
..
} => {
// However, we have to respect the extention mode for stack
// However, we have to respect the extension mode for stack
// slots, or else we grab the wrong bytes on big-endian.
let ext = M::get_ext_mode(sigs[self.sig].call_conv, extension);
let ty = match (ext, ty_bits(ty) as u32) {
(ArgumentExtension::Uext, n) | (ArgumentExtension::Sext, n)
if n < M::word_bits() =>
{
let ty =
if ext != ArgumentExtension::None && M::word_bits() > ty_bits(ty) as u32 {
M::word_type()
}
_ => ty,
};
} else {
ty
};
insts.push(M::gen_load_stack(
StackAMode::FPOffset(
M::fp_to_arg_offset(self.call_conv, &self.flags) + offset,
ty,
),
StackAMode::ArgOffset(offset, ty),
*into_reg,
ty,
));
@ -1560,10 +1551,7 @@ impl<M: ABIMachineSpec> Callee<M> {
} else {
// Buffer address is implicitly defined by the ABI.
insts.push(M::gen_get_stack_addr(
StackAMode::FPOffset(
M::fp_to_arg_offset(self.call_conv, &self.flags) + offset,
I8,
),
StackAMode::ArgOffset(offset, I8),
into_reg,
I8,
));
@ -1586,10 +1574,7 @@ impl<M: ABIMachineSpec> Callee<M> {
// This was allocated in the `init` routine.
let addr_reg = self.arg_temp_reg[idx].unwrap();
insts.push(M::gen_load_stack(
StackAMode::FPOffset(
M::fp_to_arg_offset(self.call_conv, &self.flags) + offset,
ty,
),
StackAMode::ArgOffset(offset, ty),
addr_reg,
ty,
));
@ -2357,10 +2342,7 @@ impl<M: ABIMachineSpec> CallSite<M> {
"tail calls require frame pointers to be enabled"
);
StackAMode::FPOffset(
offset + M::fp_to_arg_offset(self.caller_conv, &self.flags),
ty,
)
StackAMode::ArgOffset(offset, ty)
} else {
StackAMode::SPOffset(offset, ty)
};

Loading…
Cancel
Save