Browse Source

cranelift: Include clobbers and outgoing args in stack limit (#8301) (#8334)

When we compute the amount of space that we need in a stack frame for
the stack limit check, we were only counting spill-slots and explicit
stack-slots. However, we need to account for all uses of the stack which
occur before the next stack limit check. That includes clobbers and any
stack arguments we want to pass to callees.

The maximum amount that we could have missed by is essentially bounded
by the number of arguments which could be passed to a function. In
Wasmtime, that is limited by `MAX_WASM_FUNCTION_PARAMS` in
`wasmparser::limits`, which is set to 1,000, and the largest arguments
are 16-byte vectors, so this could undercount by about 16kB.

This is not a security issue according to Wasmtime's security policy
(https://docs.wasmtime.dev/security-what-is-considered-a-security-vulnerability.html)
because it's the embedder's responsibility to ensure that the stack
where Wasmtime is running has enough extra space on top of the
configured `max_wasm_stack` size, and getting within 16kB of the host
stack size is too small to be safe even with this fixed.

However, this was definitely not the intended behavior when stack limit
checks or stack probes are enabled, and anyone with non-default
configurations or non-Wasmtime uses of Cranelift should evaluate whether
this bug impacts your use case.

(For reference: When Wasmtime is used in async mode or on Linux, the
default stack size is 1.5MB larger than the default WebAssembly stack
limit, so such configurations are typically safe regardless. On the
other hand, on macOS the default non-async stack size for threads other
than the main thread is the same size as the default for
`max_wasm_stack`, so that is too small with or without this bug fix.)

Co-authored-by: Jamey Sharp <jsharp@fastly.com>
release-19.0.0
Alex Crichton 7 months ago
committed by GitHub
parent
commit
4e0526da43
No known key found for this signature in database GPG Key ID: B5690EEEBB952194
  1. 1
      cranelift/codegen/src/isa/aarch64/abi.rs
  2. 1
      cranelift/codegen/src/isa/riscv64/abi.rs
  3. 1
      cranelift/codegen/src/isa/x64/abi.rs
  4. 18
      cranelift/codegen/src/machinst/abi.rs
  5. 6
      cranelift/codegen/src/machinst/isle.rs
  6. 24
      cranelift/filetests/filetests/isa/aarch64/stack-limit.clif
  7. 12
      cranelift/filetests/filetests/isa/riscv64/stack-limit.clif
  8. 16
      cranelift/filetests/filetests/isa/s390x/stack-limit.clif

1
cranelift/codegen/src/isa/aarch64/abi.rs

@ -1164,7 +1164,6 @@ impl ABIMachineSpec for AArch64MachineDeps {
};
// Return FrameLayout structure.
debug_assert!(outgoing_args_size == 0);
FrameLayout {
stack_args_size,
setup_area_size,

1
cranelift/codegen/src/isa/riscv64/abi.rs

@ -692,7 +692,6 @@ impl ABIMachineSpec for Riscv64MachineDeps {
};
// Return FrameLayout structure.
debug_assert!(outgoing_args_size == 0);
FrameLayout {
stack_args_size,
setup_area_size,

1
cranelift/codegen/src/isa/x64/abi.rs

@ -857,7 +857,6 @@ impl ABIMachineSpec for X64ABIMachineSpec {
let setup_area_size = 16; // RBP, return address
// Return FrameLayout structure.
debug_assert!(outgoing_args_size == 0);
FrameLayout {
stack_args_size,
setup_area_size,

18
cranelift/codegen/src/machinst/abi.rs

@ -1879,9 +1879,25 @@ impl<M: ABIMachineSpec> Callee<M> {
&frame_layout,
));
// The stack limit check needs to cover all the stack adjustments we
// might make, up to the next stack limit check in any function we
// call. Since this happens after frame setup, the current function's
// setup area needs to be accounted for in the caller's stack limit
// check, but we need to account for any setup area that our callees
// might need. Note that s390x may also use the outgoing args area for
// backtrace support even in leaf functions, so that should be accounted
// for unconditionally.
let total_stacksize = frame_layout.clobber_size
+ frame_layout.fixed_frame_storage_size
+ frame_layout.outgoing_args_size
+ if self.is_leaf {
0
} else {
frame_layout.setup_area_size
};
// Leaf functions with zero stack don't need a stack check if one's
// specified, otherwise always insert the stack check.
let total_stacksize = frame_layout.fixed_frame_storage_size;
if total_stacksize > 0 || !self.is_leaf {
if let Some((reg, stack_limit_load)) = &self.stack_limit {
insts.extend(stack_limit_load.clone());

6
cranelift/codegen/src/machinst/isle.rs

@ -838,6 +838,12 @@ macro_rules! isle_prelude_method_helpers {
mut caller: $abicaller,
args: ValueSlice,
) -> InstOutput {
let off = self.lower_ctx.sigs()[abi].sized_stack_arg_space()
+ self.lower_ctx.sigs()[abi].sized_stack_ret_space();
self.lower_ctx
.abi_mut()
.accumulate_outgoing_args_size(off as u32);
caller.emit_stack_pre_adjust(self.lower_ctx);
self.gen_call_common_args(&mut caller, args);

24
cranelift/filetests/filetests/isa/aarch64/stack-limit.clif

@ -55,7 +55,8 @@ block0(v0: i64):
; VCode:
; stp fp, lr, [sp, #-16]!
; mov fp, sp
; subs xzr, sp, x0, UXTX
; add x16, x0, #16
; subs xzr, sp, x16, UXTX
; b.lo #trap=stk_ovf
; block0:
; load_ext_name x2, TestCase(%foo)+0
@ -67,11 +68,12 @@ block0(v0: i64):
; block0: ; offset 0x0
; stp x29, x30, [sp, #-0x10]!
; mov x29, sp
; cmp sp, x0
; b.lo #0x2c
; block1: ; offset 0x10
; ldr x2, #0x18
; b #0x20
; add x16, x0, #0x10
; cmp sp, x16
; b.lo #0x30
; block1: ; offset 0x14
; ldr x2, #0x1c
; b #0x24
; .byte 0x00, 0x00, 0x00, 0x00 ; reloc_external Abs8 %foo 0
; .byte 0x00, 0x00, 0x00, 0x00
; blr x2
@ -95,6 +97,7 @@ block0(v0: i64):
; mov fp, sp
; ldr x16, [x0]
; ldr x16, [x16, #4]
; add x16, x16, #16
; subs xzr, sp, x16, UXTX
; b.lo #trap=stk_ovf
; block0:
@ -109,11 +112,12 @@ block0(v0: i64):
; mov x29, sp
; ldur x16, [x0]
; ldur x16, [x16, #4]
; add x16, x16, #0x10
; cmp sp, x16
; b.lo #0x34
; block1: ; offset 0x18
; ldr x2, #0x20
; b #0x28
; b.lo #0x38
; block1: ; offset 0x1c
; ldr x2, #0x24
; b #0x2c
; .byte 0x00, 0x00, 0x00, 0x00 ; reloc_external Abs8 %foo 0
; .byte 0x00, 0x00, 0x00, 0x00
; blr x2

12
cranelift/filetests/filetests/isa/riscv64/stack-limit.clif

@ -58,7 +58,8 @@ block0(v0: i64):
; sd ra,8(sp)
; sd fp,0(sp)
; mv fp,sp
; trap_if stk_ovf##(sp ult a0)
; addi t6,a0,16
; trap_if stk_ovf##(sp ult t6)
; block0:
; load_sym a2,%foo+0
; callind a2
@ -73,9 +74,10 @@ block0(v0: i64):
; sd ra, 8(sp)
; sd s0, 0(sp)
; mv s0, sp
; bgeu sp, a0, 8
; addi t6, a0, 0x10
; bgeu sp, t6, 8
; .byte 0x00, 0x00, 0x00, 0x00 ; trap: stk_ovf
; block1: ; offset 0x18
; block1: ; offset 0x1c
; auipc a2, 0
; ld a2, 0xc(a2)
; j 0xc
@ -105,6 +107,7 @@ block0(v0: i64):
; mv fp,sp
; ld t6,0(a0)
; ld t6,4(t6)
; addi t6,t6,16
; trap_if stk_ovf##(sp ult t6)
; block0:
; load_sym a2,%foo+0
@ -122,9 +125,10 @@ block0(v0: i64):
; mv s0, sp
; ld t6, 0(a0)
; ld t6, 4(t6)
; addi t6, t6, 0x10
; bgeu sp, t6, 8
; .byte 0x00, 0x00, 0x00, 0x00 ; trap: stk_ovf
; block1: ; offset 0x20
; block1: ; offset 0x24
; auipc a2, 0
; ld a2, 0xc(a2)
; j 0xc

16
cranelift/filetests/filetests/isa/s390x/stack-limit.clif

@ -52,7 +52,8 @@ block0(v0: i64):
}
; VCode:
; clgrtle %r15, %r2
; la %r1, 160(%r2)
; clgrtle %r15, %r1
; stmg %r14, %r15, 112(%r15)
; aghi %r15, -160
; virtual_sp_offset_adjust 160
@ -64,11 +65,12 @@ block0(v0: i64):
;
; Disassembled:
; block0: ; offset 0x0
; clgrtle %r15, %r2 ; trap: stk_ovf
; la %r1, 0xa0(%r2)
; clgrtle %r15, %r1 ; trap: stk_ovf
; stmg %r14, %r15, 0x70(%r15)
; aghi %r15, -0xa0
; block1: ; offset 0xe
; bras %r1, 0x1a
; block1: ; offset 0x12
; bras %r1, 0x1e
; .byte 0x00, 0x00 ; reloc_external Abs8 %foo 0
; .byte 0x00, 0x00
; .byte 0x00, 0x00
@ -92,6 +94,7 @@ block0(v0: i64):
; VCode:
; lg %r1, 0(%r2)
; lg %r1, 4(%r1)
; la %r1, 160(%r1)
; clgrtle %r15, %r1
; stmg %r14, %r15, 112(%r15)
; aghi %r15, -160
@ -106,11 +109,12 @@ block0(v0: i64):
; block0: ; offset 0x0
; lg %r1, 0(%r2)
; lg %r1, 4(%r1)
; la %r1, 0xa0(%r1)
; clgrtle %r15, %r1 ; trap: stk_ovf
; stmg %r14, %r15, 0x70(%r15)
; aghi %r15, -0xa0
; block1: ; offset 0x1a
; bras %r1, 0x26
; block1: ; offset 0x1e
; bras %r1, 0x2a
; .byte 0x00, 0x00 ; reloc_external Abs8 %foo 0
; .byte 0x00, 0x00
; .byte 0x00, 0x00

Loading…
Cancel
Save