Skip to content

Commit

Permalink
cranelift: Include clobbers and outgoing args in stack limit (#8301) (#…
Browse files Browse the repository at this point in the history
…8334)

When we compute the amount of space that we need in a stack frame for
the stack limit check, we were only counting spill-slots and explicit
stack-slots. However, we need to account for all uses of the stack which
occur before the next stack limit check. That includes clobbers and any
stack arguments we want to pass to callees.

The maximum amount that we could have missed by is essentially bounded
by the number of arguments which could be passed to a function. In
Wasmtime, that is limited by `MAX_WASM_FUNCTION_PARAMS` in
`wasmparser::limits`, which is set to 1,000, and the largest arguments
are 16-byte vectors, so this could undercount by about 16kB.

This is not a security issue according to Wasmtime's security policy
(https://docs.wasmtime.dev/security-what-is-considered-a-security-vulnerability.html)
because it's the embedder's responsibility to ensure that the stack
where Wasmtime is running has enough extra space on top of the
configured `max_wasm_stack` size, and getting within 16kB of the host
stack size is too small to be safe even with this fixed.

However, this was definitely not the intended behavior when stack limit
checks or stack probes are enabled, and anyone with non-default
configurations or non-Wasmtime uses of Cranelift should evaluate whether
this bug impacts your use case.

(For reference: When Wasmtime is used in async mode or on Linux, the
default stack size is 1.5MB larger than the default WebAssembly stack
limit, so such configurations are typically safe regardless. On the
other hand, on macOS the default non-async stack size for threads other
than the main thread is the same size as the default for
`max_wasm_stack`, so that is too small with or without this bug fix.)

Co-authored-by: Jamey Sharp <[email protected]>
  • Loading branch information
alexcrichton and jameysharp authored Apr 11, 2024
1 parent 5244937 commit 4e0526d
Show file tree
Hide file tree
Showing 8 changed files with 55 additions and 24 deletions.
1 change: 0 additions & 1 deletion cranelift/codegen/src/isa/aarch64/abi.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1164,7 +1164,6 @@ impl ABIMachineSpec for AArch64MachineDeps {
};

// Return FrameLayout structure.
debug_assert!(outgoing_args_size == 0);
FrameLayout {
stack_args_size,
setup_area_size,
Expand Down
1 change: 0 additions & 1 deletion cranelift/codegen/src/isa/riscv64/abi.rs
Original file line number Diff line number Diff line change
Expand Up @@ -692,7 +692,6 @@ impl ABIMachineSpec for Riscv64MachineDeps {
};

// Return FrameLayout structure.
debug_assert!(outgoing_args_size == 0);
FrameLayout {
stack_args_size,
setup_area_size,
Expand Down
1 change: 0 additions & 1 deletion cranelift/codegen/src/isa/x64/abi.rs
Original file line number Diff line number Diff line change
Expand Up @@ -857,7 +857,6 @@ impl ABIMachineSpec for X64ABIMachineSpec {
let setup_area_size = 16; // RBP, return address

// Return FrameLayout structure.
debug_assert!(outgoing_args_size == 0);
FrameLayout {
stack_args_size,
setup_area_size,
Expand Down
18 changes: 17 additions & 1 deletion cranelift/codegen/src/machinst/abi.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1879,9 +1879,25 @@ impl<M: ABIMachineSpec> Callee<M> {
&frame_layout,
));

// The stack limit check needs to cover all the stack adjustments we
// might make, up to the next stack limit check in any function we
// call. Since this happens after frame setup, the current function's
// setup area needs to be accounted for in the caller's stack limit
// check, but we need to account for any setup area that our callees
// might need. Note that s390x may also use the outgoing args area for
// backtrace support even in leaf functions, so that should be accounted
// for unconditionally.
let total_stacksize = frame_layout.clobber_size
+ frame_layout.fixed_frame_storage_size
+ frame_layout.outgoing_args_size
+ if self.is_leaf {
0
} else {
frame_layout.setup_area_size
};

// Leaf functions with zero stack don't need a stack check if one's
// specified, otherwise always insert the stack check.
let total_stacksize = frame_layout.fixed_frame_storage_size;
if total_stacksize > 0 || !self.is_leaf {
if let Some((reg, stack_limit_load)) = &self.stack_limit {
insts.extend(stack_limit_load.clone());
Expand Down
6 changes: 6 additions & 0 deletions cranelift/codegen/src/machinst/isle.rs
Original file line number Diff line number Diff line change
Expand Up @@ -838,6 +838,12 @@ macro_rules! isle_prelude_method_helpers {
mut caller: $abicaller,
args: ValueSlice,
) -> InstOutput {
let off = self.lower_ctx.sigs()[abi].sized_stack_arg_space()
+ self.lower_ctx.sigs()[abi].sized_stack_ret_space();
self.lower_ctx
.abi_mut()
.accumulate_outgoing_args_size(off as u32);

caller.emit_stack_pre_adjust(self.lower_ctx);

self.gen_call_common_args(&mut caller, args);
Expand Down
24 changes: 14 additions & 10 deletions cranelift/filetests/filetests/isa/aarch64/stack-limit.clif
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,8 @@ block0(v0: i64):
; VCode:
; stp fp, lr, [sp, #-16]!
; mov fp, sp
; subs xzr, sp, x0, UXTX
; add x16, x0, #16
; subs xzr, sp, x16, UXTX
; b.lo #trap=stk_ovf
; block0:
; load_ext_name x2, TestCase(%foo)+0
Expand All @@ -67,11 +68,12 @@ block0(v0: i64):
; block0: ; offset 0x0
; stp x29, x30, [sp, #-0x10]!
; mov x29, sp
; cmp sp, x0
; b.lo #0x2c
; block1: ; offset 0x10
; ldr x2, #0x18
; b #0x20
; add x16, x0, #0x10
; cmp sp, x16
; b.lo #0x30
; block1: ; offset 0x14
; ldr x2, #0x1c
; b #0x24
; .byte 0x00, 0x00, 0x00, 0x00 ; reloc_external Abs8 %foo 0
; .byte 0x00, 0x00, 0x00, 0x00
; blr x2
Expand All @@ -95,6 +97,7 @@ block0(v0: i64):
; mov fp, sp
; ldr x16, [x0]
; ldr x16, [x16, #4]
; add x16, x16, #16
; subs xzr, sp, x16, UXTX
; b.lo #trap=stk_ovf
; block0:
Expand All @@ -109,11 +112,12 @@ block0(v0: i64):
; mov x29, sp
; ldur x16, [x0]
; ldur x16, [x16, #4]
; add x16, x16, #0x10
; cmp sp, x16
; b.lo #0x34
; block1: ; offset 0x18
; ldr x2, #0x20
; b #0x28
; b.lo #0x38
; block1: ; offset 0x1c
; ldr x2, #0x24
; b #0x2c
; .byte 0x00, 0x00, 0x00, 0x00 ; reloc_external Abs8 %foo 0
; .byte 0x00, 0x00, 0x00, 0x00
; blr x2
Expand Down
12 changes: 8 additions & 4 deletions cranelift/filetests/filetests/isa/riscv64/stack-limit.clif
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,8 @@ block0(v0: i64):
; sd ra,8(sp)
; sd fp,0(sp)
; mv fp,sp
; trap_if stk_ovf##(sp ult a0)
; addi t6,a0,16
; trap_if stk_ovf##(sp ult t6)
; block0:
; load_sym a2,%foo+0
; callind a2
Expand All @@ -73,9 +74,10 @@ block0(v0: i64):
; sd ra, 8(sp)
; sd s0, 0(sp)
; mv s0, sp
; bgeu sp, a0, 8
; addi t6, a0, 0x10
; bgeu sp, t6, 8
; .byte 0x00, 0x00, 0x00, 0x00 ; trap: stk_ovf
; block1: ; offset 0x18
; block1: ; offset 0x1c
; auipc a2, 0
; ld a2, 0xc(a2)
; j 0xc
Expand Down Expand Up @@ -105,6 +107,7 @@ block0(v0: i64):
; mv fp,sp
; ld t6,0(a0)
; ld t6,4(t6)
; addi t6,t6,16
; trap_if stk_ovf##(sp ult t6)
; block0:
; load_sym a2,%foo+0
Expand All @@ -122,9 +125,10 @@ block0(v0: i64):
; mv s0, sp
; ld t6, 0(a0)
; ld t6, 4(t6)
; addi t6, t6, 0x10
; bgeu sp, t6, 8
; .byte 0x00, 0x00, 0x00, 0x00 ; trap: stk_ovf
; block1: ; offset 0x20
; block1: ; offset 0x24
; auipc a2, 0
; ld a2, 0xc(a2)
; j 0xc
Expand Down
16 changes: 10 additions & 6 deletions cranelift/filetests/filetests/isa/s390x/stack-limit.clif
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,8 @@ block0(v0: i64):
}

; VCode:
; clgrtle %r15, %r2
; la %r1, 160(%r2)
; clgrtle %r15, %r1
; stmg %r14, %r15, 112(%r15)
; aghi %r15, -160
; virtual_sp_offset_adjust 160
Expand All @@ -64,11 +65,12 @@ block0(v0: i64):
;
; Disassembled:
; block0: ; offset 0x0
; clgrtle %r15, %r2 ; trap: stk_ovf
; la %r1, 0xa0(%r2)
; clgrtle %r15, %r1 ; trap: stk_ovf
; stmg %r14, %r15, 0x70(%r15)
; aghi %r15, -0xa0
; block1: ; offset 0xe
; bras %r1, 0x1a
; block1: ; offset 0x12
; bras %r1, 0x1e
; .byte 0x00, 0x00 ; reloc_external Abs8 %foo 0
; .byte 0x00, 0x00
; .byte 0x00, 0x00
Expand All @@ -92,6 +94,7 @@ block0(v0: i64):
; VCode:
; lg %r1, 0(%r2)
; lg %r1, 4(%r1)
; la %r1, 160(%r1)
; clgrtle %r15, %r1
; stmg %r14, %r15, 112(%r15)
; aghi %r15, -160
Expand All @@ -106,11 +109,12 @@ block0(v0: i64):
; block0: ; offset 0x0
; lg %r1, 0(%r2)
; lg %r1, 4(%r1)
; la %r1, 0xa0(%r1)
; clgrtle %r15, %r1 ; trap: stk_ovf
; stmg %r14, %r15, 0x70(%r15)
; aghi %r15, -0xa0
; block1: ; offset 0x1a
; bras %r1, 0x26
; block1: ; offset 0x1e
; bras %r1, 0x2a
; .byte 0x00, 0x00 ; reloc_external Abs8 %foo 0
; .byte 0x00, 0x00
; .byte 0x00, 0x00
Expand Down

0 comments on commit 4e0526d

Please sign in to comment.