Browse Source

Reduce sig data size by changing sized spaces (#5402)

* Reduce sig sizes

* Fix test

* Change compute_args_loc to return u32
pull/5352/merge
Timothy Chen 2 years ago
committed by GitHub
parent
commit
8035945502
No known key found for this signature in database GPG Key ID: 4AEE18F83AFDEB23
  1. 16
      cranelift/codegen/src/isa/aarch64/abi.rs
  2. 14
      cranelift/codegen/src/isa/riscv64/abi.rs
  3. 16
      cranelift/codegen/src/isa/s390x/abi.rs
  4. 14
      cranelift/codegen/src/isa/x64/abi.rs
  5. 24
      cranelift/codegen/src/machinst/abi.rs

16
cranelift/codegen/src/isa/aarch64/abi.rs

@ -29,7 +29,7 @@ pub(crate) type AArch64Caller = Caller<AArch64MachineDeps>;
/// This is the limit for the size of argument and return-value areas on the /// This is the limit for the size of argument and return-value areas on the
/// stack. We place a reasonable limit here to avoid integer overflow issues /// stack. We place a reasonable limit here to avoid integer overflow issues
/// with 32-bit arithmetic: for now, 128 MB. /// with 32-bit arithmetic: for now, 128 MB.
static STACK_ARG_RET_SIZE_LIMIT: u64 = 128 * 1024 * 1024; static STACK_ARG_RET_SIZE_LIMIT: u32 = 128 * 1024 * 1024;
impl Into<AMode> for StackAMode { impl Into<AMode> for StackAMode {
fn into(self) -> AMode { fn into(self) -> AMode {
@ -94,7 +94,7 @@ impl ABIMachineSpec for AArch64MachineDeps {
args_or_rets: ArgsOrRets, args_or_rets: ArgsOrRets,
add_ret_area_ptr: bool, add_ret_area_ptr: bool,
mut args: ArgsAccumulator<'_>, mut args: ArgsAccumulator<'_>,
) -> CodegenResult<(i64, Option<usize>)> ) -> CodegenResult<(u32, Option<usize>)>
where where
I: IntoIterator<Item = &'a ir::AbiParam>, I: IntoIterator<Item = &'a ir::AbiParam>,
{ {
@ -116,7 +116,7 @@ impl ABIMachineSpec for AArch64MachineDeps {
let mut next_xreg = 0; let mut next_xreg = 0;
let mut next_vreg = 0; let mut next_vreg = 0;
let mut next_stack: u64 = 0; let mut next_stack: u32 = 0;
let (max_per_class_reg_vals, mut remaining_reg_vals) = match args_or_rets { let (max_per_class_reg_vals, mut remaining_reg_vals) = match args_or_rets {
ArgsOrRets::Args => (8, 16), // x0-x7 and v0-v7 ArgsOrRets::Args => (8, 16), // x0-x7 and v0-v7
@ -152,13 +152,13 @@ impl ABIMachineSpec for AArch64MachineDeps {
if let ir::ArgumentPurpose::StructArgument(size) = param.purpose { if let ir::ArgumentPurpose::StructArgument(size) = param.purpose {
assert_eq!(args_or_rets, ArgsOrRets::Args); assert_eq!(args_or_rets, ArgsOrRets::Args);
let offset = next_stack as i64; let offset = next_stack as i64;
let size = size as u64; let size = size;
assert!(size % 8 == 0, "StructArgument size is not properly aligned"); assert!(size % 8 == 0, "StructArgument size is not properly aligned");
next_stack += size; next_stack += size;
args.push(ABIArg::StructArg { args.push(ABIArg::StructArg {
pointer: None, pointer: None,
offset, offset,
size, size: size as u64,
purpose: param.purpose, purpose: param.purpose,
}); });
continue; continue;
@ -282,7 +282,7 @@ impl ABIMachineSpec for AArch64MachineDeps {
// Spill to the stack // Spill to the stack
// Compute the stack slot's size. // Compute the stack slot's size.
let size = (ty_bits(param.value_type) / 8) as u64; let size = (ty_bits(param.value_type) / 8) as u32;
let size = if is_apple_cc let size = if is_apple_cc
|| (call_conv.extends_wasmtime() && args_or_rets == ArgsOrRets::Rets) || (call_conv.extends_wasmtime() && args_or_rets == ArgsOrRets::Rets)
@ -308,7 +308,7 @@ impl ABIMachineSpec for AArch64MachineDeps {
// Build the stack locations from each slot // Build the stack locations from each slot
.scan(next_stack, |next_stack, ty| { .scan(next_stack, |next_stack, ty| {
let slot_offset = *next_stack as i64; let slot_offset = *next_stack as i64;
*next_stack += (ty_bits(ty) / 8) as u64; *next_stack += (ty_bits(ty) / 8) as u32;
Some((ty, slot_offset)) Some((ty, slot_offset))
}) })
@ -358,7 +358,7 @@ impl ABIMachineSpec for AArch64MachineDeps {
return Err(CodegenError::ImplLimitExceeded); return Err(CodegenError::ImplLimitExceeded);
} }
Ok((next_stack as i64, extra_arg)) Ok((next_stack, extra_arg))
} }
fn fp_to_arg_offset(_call_conv: isa::CallConv, _flags: &settings::Flags) -> i64 { fn fp_to_arg_offset(_call_conv: isa::CallConv, _flags: &settings::Flags) -> i64 {

14
cranelift/codegen/src/isa/riscv64/abi.rs

@ -35,7 +35,7 @@ pub(crate) type Riscv64ABICaller = Caller<Riscv64MachineDeps>;
/// This is the limit for the size of argument and return-value areas on the /// This is the limit for the size of argument and return-value areas on the
/// stack. We place a reasonable limit here to avoid integer overflow issues /// stack. We place a reasonable limit here to avoid integer overflow issues
/// with 32-bit arithmetic: for now, 128 MB. /// with 32-bit arithmetic: for now, 128 MB.
static STACK_ARG_RET_SIZE_LIMIT: u64 = 128 * 1024 * 1024; static STACK_ARG_RET_SIZE_LIMIT: u32 = 128 * 1024 * 1024;
/// Riscv64-specific ABI behavior. This struct just serves as an implementation /// Riscv64-specific ABI behavior. This struct just serves as an implementation
/// point for the trait; it is never actually instantiated. /// point for the trait; it is never actually instantiated.
@ -63,7 +63,7 @@ impl ABIMachineSpec for Riscv64MachineDeps {
args_or_rets: ArgsOrRets, args_or_rets: ArgsOrRets,
add_ret_area_ptr: bool, add_ret_area_ptr: bool,
mut args: ArgsAccumulator<'_>, mut args: ArgsAccumulator<'_>,
) -> CodegenResult<(i64, Option<usize>)> ) -> CodegenResult<(u32, Option<usize>)>
where where
I: IntoIterator<Item = &'a ir::AbiParam>, I: IntoIterator<Item = &'a ir::AbiParam>,
{ {
@ -78,14 +78,14 @@ impl ABIMachineSpec for Riscv64MachineDeps {
let mut next_x_reg = x_start; let mut next_x_reg = x_start;
let mut next_f_reg = f_start; let mut next_f_reg = f_start;
// Stack space. // Stack space.
let mut next_stack: u64 = 0; let mut next_stack: u32 = 0;
let mut return_one_register_used = false; let mut return_one_register_used = false;
for param in params { for param in params {
if let ir::ArgumentPurpose::StructArgument(size) = param.purpose { if let ir::ArgumentPurpose::StructArgument(size) = param.purpose {
let offset = next_stack; let offset = next_stack;
assert!(size % 8 == 0, "StructArgument size is not properly aligned"); assert!(size % 8 == 0, "StructArgument size is not properly aligned");
next_stack += size as u64; next_stack += size;
args.push(ABIArg::StructArg { args.push(ABIArg::StructArg {
pointer: None, pointer: None,
offset: offset as i64, offset: offset as i64,
@ -135,7 +135,7 @@ impl ABIMachineSpec for Riscv64MachineDeps {
// //
// Note that in all cases 16-byte stack alignment happens // Note that in all cases 16-byte stack alignment happens
// separately after all args. // separately after all args.
let size = (reg_ty.bits() / 8) as u64; let size = reg_ty.bits() / 8;
let size = if args_or_rets == ArgsOrRets::Rets && call_conv.extends_wasmtime() { let size = if args_or_rets == ArgsOrRets::Rets && call_conv.extends_wasmtime() {
size size
} else { } else {
@ -181,13 +181,13 @@ impl ABIMachineSpec for Riscv64MachineDeps {
} else { } else {
None None
}; };
next_stack = align_to(next_stack, Self::stack_align(call_conv) as u64); next_stack = align_to(next_stack, Self::stack_align(call_conv));
// To avoid overflow issues, limit the arg/return size to something // To avoid overflow issues, limit the arg/return size to something
// reasonable -- here, 128 MB. // reasonable -- here, 128 MB.
if next_stack > STACK_ARG_RET_SIZE_LIMIT { if next_stack > STACK_ARG_RET_SIZE_LIMIT {
return Err(CodegenError::ImplLimitExceeded); return Err(CodegenError::ImplLimitExceeded);
} }
CodegenResult::Ok((next_stack as i64, pos)) CodegenResult::Ok((next_stack, pos))
} }
fn fp_to_arg_offset(_call_conv: isa::CallConv, _flags: &settings::Flags) -> i64 { fn fp_to_arg_offset(_call_conv: isa::CallConv, _flags: &settings::Flags) -> i64 {

16
cranelift/codegen/src/isa/s390x/abi.rs

@ -184,7 +184,7 @@ fn get_vecreg_for_ret(idx: usize) -> Option<Reg> {
/// This is the limit for the size of argument and return-value areas on the /// This is the limit for the size of argument and return-value areas on the
/// stack. We place a reasonable limit here to avoid integer overflow issues /// stack. We place a reasonable limit here to avoid integer overflow issues
/// with 32-bit arithmetic: for now, 128 MB. /// with 32-bit arithmetic: for now, 128 MB.
static STACK_ARG_RET_SIZE_LIMIT: u64 = 128 * 1024 * 1024; static STACK_ARG_RET_SIZE_LIMIT: u32 = 128 * 1024 * 1024;
/// The size of the register save area /// The size of the register save area
pub static REG_SAVE_AREA_SIZE: u32 = 160; pub static REG_SAVE_AREA_SIZE: u32 = 160;
@ -228,17 +228,17 @@ impl ABIMachineSpec for S390xMachineDeps {
args_or_rets: ArgsOrRets, args_or_rets: ArgsOrRets,
add_ret_area_ptr: bool, add_ret_area_ptr: bool,
mut args: ArgsAccumulator<'_>, mut args: ArgsAccumulator<'_>,
) -> CodegenResult<(i64, Option<usize>)> ) -> CodegenResult<(u32, Option<usize>)>
where where
I: IntoIterator<Item = &'a ir::AbiParam>, I: IntoIterator<Item = &'a ir::AbiParam>,
{ {
let mut next_gpr = 0; let mut next_gpr = 0;
let mut next_fpr = 0; let mut next_fpr = 0;
let mut next_vr = 0; let mut next_vr = 0;
let mut next_stack: u64 = 0; let mut next_stack: u32 = 0;
if args_or_rets == ArgsOrRets::Args { if args_or_rets == ArgsOrRets::Args {
next_stack = REG_SAVE_AREA_SIZE as u64; next_stack = REG_SAVE_AREA_SIZE;
} }
// In the SystemV ABI, the return area pointer is the first argument, // In the SystemV ABI, the return area pointer is the first argument,
@ -307,7 +307,7 @@ impl ABIMachineSpec for S390xMachineDeps {
} else { } else {
// Compute size. Every argument or return value takes a slot of // Compute size. Every argument or return value takes a slot of
// at least 8 bytes, except for return values in the Wasmtime ABI. // at least 8 bytes, except for return values in the Wasmtime ABI.
let size = (ty_bits(param.value_type) / 8) as u64; let size = (ty_bits(param.value_type) / 8) as u32;
let slot_size = if call_conv.extends_wasmtime() && args_or_rets == ArgsOrRets::Rets let slot_size = if call_conv.extends_wasmtime() && args_or_rets == ArgsOrRets::Rets
{ {
size size
@ -401,11 +401,11 @@ impl ABIMachineSpec for S390xMachineDeps {
match arg { match arg {
ABIArg::StructArg { offset, size, .. } => { ABIArg::StructArg { offset, size, .. } => {
*offset = next_stack as i64; *offset = next_stack as i64;
next_stack += *size; next_stack += *size as u32;
} }
ABIArg::ImplicitPtrArg { offset, ty, .. } => { ABIArg::ImplicitPtrArg { offset, ty, .. } => {
*offset = next_stack as i64; *offset = next_stack as i64;
next_stack += (ty_bits(*ty) / 8) as u64; next_stack += (ty_bits(*ty) / 8) as u32;
} }
_ => {} _ => {}
} }
@ -417,7 +417,7 @@ impl ABIMachineSpec for S390xMachineDeps {
return Err(CodegenError::ImplLimitExceeded); return Err(CodegenError::ImplLimitExceeded);
} }
Ok((next_stack as i64, extra_arg)) Ok((next_stack, extra_arg))
} }
fn fp_to_arg_offset(_call_conv: isa::CallConv, _flags: &settings::Flags) -> i64 { fn fp_to_arg_offset(_call_conv: isa::CallConv, _flags: &settings::Flags) -> i64 {

14
cranelift/codegen/src/isa/x64/abi.rs

@ -18,7 +18,7 @@ use std::convert::TryFrom;
/// This is the limit for the size of argument and return-value areas on the /// This is the limit for the size of argument and return-value areas on the
/// stack. We place a reasonable limit here to avoid integer overflow issues /// stack. We place a reasonable limit here to avoid integer overflow issues
/// with 32-bit arithmetic: for now, 128 MB. /// with 32-bit arithmetic: for now, 128 MB.
static STACK_ARG_RET_SIZE_LIMIT: u64 = 128 * 1024 * 1024; static STACK_ARG_RET_SIZE_LIMIT: u32 = 128 * 1024 * 1024;
/// Support for the x64 ABI from the callee side (within a function body). /// Support for the x64 ABI from the callee side (within a function body).
pub(crate) type X64Callee = Callee<X64ABIMachineSpec>; pub(crate) type X64Callee = Callee<X64ABIMachineSpec>;
@ -87,7 +87,7 @@ impl ABIMachineSpec for X64ABIMachineSpec {
args_or_rets: ArgsOrRets, args_or_rets: ArgsOrRets,
add_ret_area_ptr: bool, add_ret_area_ptr: bool,
mut args: ArgsAccumulator<'_>, mut args: ArgsAccumulator<'_>,
) -> CodegenResult<(i64, Option<usize>)> ) -> CodegenResult<(u32, Option<usize>)>
where where
I: IntoIterator<Item = &'a ir::AbiParam>, I: IntoIterator<Item = &'a ir::AbiParam>,
{ {
@ -95,7 +95,7 @@ impl ABIMachineSpec for X64ABIMachineSpec {
let mut next_gpr = 0; let mut next_gpr = 0;
let mut next_vreg = 0; let mut next_vreg = 0;
let mut next_stack: u64 = 0; let mut next_stack: u32 = 0;
let mut next_param_idx = 0; // Fastcall cares about overall param index let mut next_param_idx = 0; // Fastcall cares about overall param index
if args_or_rets == ArgsOrRets::Args && is_fastcall { if args_or_rets == ArgsOrRets::Args && is_fastcall {
@ -110,13 +110,13 @@ impl ABIMachineSpec for X64ABIMachineSpec {
for param in params { for param in params {
if let ir::ArgumentPurpose::StructArgument(size) = param.purpose { if let ir::ArgumentPurpose::StructArgument(size) = param.purpose {
let offset = next_stack as i64; let offset = next_stack as i64;
let size = size as u64; let size = size;
assert!(size % 8 == 0, "StructArgument size is not properly aligned"); assert!(size % 8 == 0, "StructArgument size is not properly aligned");
next_stack += size; next_stack += size;
args.push(ABIArg::StructArg { args.push(ABIArg::StructArg {
pointer: None, pointer: None,
offset, offset,
size, size: size as u64,
purpose: param.purpose, purpose: param.purpose,
}); });
continue; continue;
@ -197,7 +197,7 @@ impl ABIMachineSpec for X64ABIMachineSpec {
// //
// Note that in all cases 16-byte stack alignment happens // Note that in all cases 16-byte stack alignment happens
// separately after all args. // separately after all args.
let size = (reg_ty.bits() / 8) as u64; let size = reg_ty.bits() / 8;
let size = if args_or_rets == ArgsOrRets::Rets && call_conv.extends_wasmtime() { let size = if args_or_rets == ArgsOrRets::Rets && call_conv.extends_wasmtime() {
size size
} else { } else {
@ -251,7 +251,7 @@ impl ABIMachineSpec for X64ABIMachineSpec {
return Err(CodegenError::ImplLimitExceeded); return Err(CodegenError::ImplLimitExceeded);
} }
Ok((next_stack as i64, extra_arg)) Ok((next_stack, extra_arg))
} }
fn fp_to_arg_offset(_call_conv: isa::CallConv, _flags: &settings::Flags) -> i64 { fn fp_to_arg_offset(_call_conv: isa::CallConv, _flags: &settings::Flags) -> i64 {

24
cranelift/codegen/src/machinst/abi.rs

@ -400,7 +400,7 @@ pub trait ABIMachineSpec {
args_or_rets: ArgsOrRets, args_or_rets: ArgsOrRets,
add_ret_area_ptr: bool, add_ret_area_ptr: bool,
args: ArgsAccumulator<'_>, args: ArgsAccumulator<'_>,
) -> CodegenResult<(i64, Option<usize>)> ) -> CodegenResult<(u32, Option<usize>)>
where where
I: IntoIterator<Item = &'a ir::AbiParam>; I: IntoIterator<Item = &'a ir::AbiParam>;
@ -643,11 +643,13 @@ pub struct SigData {
/// This is a index into the `SigSet::abi_args`. /// This is a index into the `SigSet::abi_args`.
rets_end: u32, rets_end: u32,
/// Space on stack used to store arguments. /// Space on stack used to store arguments. We're storing the size in u32 to
sized_stack_arg_space: i64, /// reduce the size of the struct.
sized_stack_arg_space: u32,
/// Space on stack used to store return values. /// Space on stack used to store return values. We're storing the size in u32 to
sized_stack_ret_space: i64, /// reduce the size of the struct.
sized_stack_ret_space: u32,
/// Index in `args` of the stack-return-value-area argument. /// Index in `args` of the stack-return-value-area argument.
stack_ret_arg: Option<u16>, stack_ret_arg: Option<u16>,
@ -659,12 +661,12 @@ pub struct SigData {
impl SigData { impl SigData {
/// Get total stack space required for arguments. /// Get total stack space required for arguments.
pub fn sized_stack_arg_space(&self) -> i64 { pub fn sized_stack_arg_space(&self) -> i64 {
self.sized_stack_arg_space self.sized_stack_arg_space.into()
} }
/// Get total stack space required for return values. /// Get total stack space required for return values.
pub fn sized_stack_ret_space(&self) -> i64 { pub fn sized_stack_ret_space(&self) -> i64 {
self.sized_stack_ret_space self.sized_stack_ret_space.into()
} }
/// Get calling convention used. /// Get calling convention used.
@ -1920,7 +1922,7 @@ impl<M: ABIMachineSpec> Callee<M> {
/// Returns the size of arguments expected on the stack. /// Returns the size of arguments expected on the stack.
pub fn stack_args_size(&self, sigs: &SigSet) -> u32 { pub fn stack_args_size(&self, sigs: &SigSet) -> u32 {
sigs[self.sig].sized_stack_arg_space as u32 sigs[self.sig].sized_stack_arg_space
} }
/// Get the spill-slot size. /// Get the spill-slot size.
@ -2324,7 +2326,7 @@ impl<M: ABIMachineSpec> Caller<M> {
}); });
} }
&ABIArgSlot::Stack { offset, ty, .. } => { &ABIArgSlot::Stack { offset, ty, .. } => {
let ret_area_base = ctx.sigs()[self.sig].sized_stack_arg_space; let ret_area_base = ctx.sigs()[self.sig].sized_stack_arg_space();
insts.push(M::gen_load_stack( insts.push(M::gen_load_stack(
StackAMode::SPOffset(offset + ret_area_base, ty), StackAMode::SPOffset(offset + ret_area_base, ty),
*into_reg, *into_reg,
@ -2361,7 +2363,7 @@ impl<M: ABIMachineSpec> Caller<M> {
let word_type = M::word_type(); let word_type = M::word_type();
if let Some(i) = ctx.sigs()[self.sig].stack_ret_arg { if let Some(i) = ctx.sigs()[self.sig].stack_ret_arg {
let rd = ctx.alloc_tmp(word_type).only_reg().unwrap(); let rd = ctx.alloc_tmp(word_type).only_reg().unwrap();
let ret_area_base = ctx.sigs()[self.sig].sized_stack_arg_space; let ret_area_base = ctx.sigs()[self.sig].sized_stack_arg_space();
ctx.emit(M::gen_get_stack_addr( ctx.emit(M::gen_get_stack_addr(
StackAMode::SPOffset(ret_area_base, I8), StackAMode::SPOffset(ret_area_base, I8),
rd, rd,
@ -2403,6 +2405,6 @@ mod tests {
fn sig_data_size() { fn sig_data_size() {
// The size of `SigData` is performance sensitive, so make sure // The size of `SigData` is performance sensitive, so make sure
// we don't regress it unintentionally. // we don't regress it unintentionally.
assert_eq!(std::mem::size_of::<SigData>(), 32); assert_eq!(std::mem::size_of::<SigData>(), 24);
} }
} }

Loading…
Cancel
Save