Skip to content

Commit

Permalink
s390x: Add support for inline probestack (#9423)
Browse files Browse the repository at this point in the history
This implements the gen_inline_probestack callback in line with
the implementation on other platforms, which allows detection
of stack overflows with misconfigured hosts.

Fixes: #9396
  • Loading branch information
uweigand authored Oct 9, 2024
1 parent de469e2 commit d50fea6
Show file tree
Hide file tree
Showing 8 changed files with 205 additions and 19 deletions.
52 changes: 48 additions & 4 deletions cranelift/codegen/src/isa/s390x/abi.rs
Original file line number Diff line number Diff line change
Expand Up @@ -648,12 +648,56 @@ impl ABIMachineSpec for S390xMachineDeps {
}

fn gen_inline_probestack(
_insts: &mut SmallInstVec<Self::I>,
insts: &mut SmallInstVec<Self::I>,
_call_conv: isa::CallConv,
_frame_size: u32,
_guard_size: u32,
frame_size: u32,
guard_size: u32,
) {
unimplemented!("Inline stack probing is unimplemented on S390x");
// Number of probes that we need to perform
let probe_count = align_to(frame_size, guard_size) / guard_size;

// The stack probe loop currently takes 4 instructions and each unrolled
// probe takes 2. Set this to 2 to keep the max size to 4 instructions.
const PROBE_MAX_UNROLL: u32 = 2;

if probe_count <= PROBE_MAX_UNROLL {
// Unrolled probe loop.
for _ in 0..probe_count {
insts.extend(Self::gen_sp_reg_adjust(-(guard_size as i32)));

insts.push(Inst::StoreImm8 {
imm: 0,
mem: MemArg::reg(stack_reg(), MemFlags::trusted()),
});
}
} else {
// Explicit probe loop.

// Load the number of probes into a register used as loop counter.
// `gen_inline_probestack` is called after regalloc2, so we can
// use the nonallocatable spilltmp register for this purpose.
let probe_count_reg = writable_spilltmp_reg();
if let Ok(probe_count) = i16::try_from(probe_count) {
insts.push(Inst::Mov32SImm16 {
rd: probe_count_reg,
imm: probe_count,
});
} else {
insts.push(Inst::Mov32Imm {
rd: probe_count_reg,
imm: probe_count,
});
}

// Emit probe loop. The guard size is assumed to fit in 16 bits.
insts.push(Inst::StackProbeLoop {
probe_count: probe_count_reg,
guard_size: i16::try_from(guard_size).unwrap(),
});
}

// Restore the stack pointer to its original position.
insts.extend(Self::gen_sp_reg_adjust((probe_count * guard_size) as i32));
}

fn gen_clobber_save(
Expand Down
5 changes: 5 additions & 0 deletions cranelift/codegen/src/isa/s390x/inst.isle
Original file line number Diff line number Diff line change
Expand Up @@ -977,6 +977,11 @@
(ridx Reg)
(targets BoxVecMachLabel))

;; Stack probe loop sequence, as one compound instruction.
(StackProbeLoop
(probe_count WritableReg)
(guard_size i16))

;; Load an inline symbol reference with relocation.
(LoadSymbolReloc
(rd WritableReg)
Expand Down
30 changes: 30 additions & 0 deletions cranelift/codegen/src/isa/s390x/inst/emit.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3383,6 +3383,36 @@ impl Inst {
start_off = sink.cur_offset();
}

Inst::StackProbeLoop {
probe_count,
guard_size,
} => {
// Emit the loop start label
let loop_start = sink.get_label();
sink.bind_label(loop_start, state.ctrl_plane_mut());

// aghi %r15, -GUARD_SIZE
let inst = Inst::AluRSImm16 {
alu_op: ALUOp::Add64,
rd: writable_stack_reg(),
ri: stack_reg(),
imm: -guard_size,
};
inst.emit(sink, emit_info, state);

// mvi 0(%r15), 0
let inst = Inst::StoreImm8 {
imm: 0,
mem: MemArg::reg(stack_reg(), MemFlags::trusted()),
};
inst.emit(sink, emit_info, state);

// brct PROBE_COUNT, LOOP_START
let opcode = 0xa76; // BRCT
sink.use_label_at_offset(sink.cur_offset(), loop_start, LabelUse::BranchRI);
put(sink, &enc_ri_b(opcode, probe_count.to_reg(), 0));
}

&Inst::Unwind { ref inst } => {
sink.add_unwind(inst.clone());
}
Expand Down
9 changes: 9 additions & 0 deletions cranelift/codegen/src/isa/s390x/inst/emit_tests.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7032,6 +7032,15 @@ fn test_s390x_binemit() {
"0: cr %r2, %r3 ; jgnh 1f ; cs %r4, %r5, 0(%r6) ; jglh 0b ; 1:",
));

insns.push((
Inst::StackProbeLoop {
probe_count: writable_gpr(1),
guard_size: 4096,
},
"A7FBF0009200F000A716FFFC",
"0: aghi %r15, -4096 ; mvi 0(%r15), 0 ; brct %r1, 0b",
));

insns.push((
Inst::FpuMove32 {
rd: writable_vr(8),
Expand Down
12 changes: 12 additions & 0 deletions cranelift/codegen/src/isa/s390x/inst/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -226,6 +226,7 @@ impl Inst {
| Inst::Debugtrap
| Inst::Trap { .. }
| Inst::JTSequence { .. }
| Inst::StackProbeLoop { .. }
| Inst::LoadSymbolReloc { .. }
| Inst::LoadAddr { .. }
| Inst::Loop { .. }
Expand Down Expand Up @@ -969,6 +970,9 @@ fn s390x_get_operands(inst: &mut Inst, collector: &mut DenyReuseVisitor<impl Ope
collector.reg_def(rd);
memarg_operands(mem, collector);
}
Inst::StackProbeLoop { probe_count, .. } => {
collector.reg_early_def(probe_count);
}
Inst::Loop { body, .. } => {
// `reuse_def` constraints can't be permitted in a Loop instruction because the operand
// index will always be relative to the Loop instruction, not the individual
Expand Down Expand Up @@ -3286,6 +3290,14 @@ impl Inst {

format!("{mem_str}{op} {rd}, {mem}")
}
&Inst::StackProbeLoop {
probe_count,
guard_size,
} => {
let probe_count = pretty_print_reg(probe_count.to_reg());
let stack_reg = pretty_print_reg(stack_reg());
format!("0: aghi {stack_reg}, -{guard_size} ; mvi 0({stack_reg}), 0 ; brct {probe_count}, 0b")
}
&Inst::Loop { ref body, cond } => {
let body = body
.into_iter()
Expand Down
96 changes: 96 additions & 0 deletions cranelift/filetests/filetests/isa/s390x/inline-probestack.clif
Original file line number Diff line number Diff line change
@@ -0,0 +1,96 @@
test compile precise-output
set enable_probestack=true
set probestack_strategy=inline
; This is the default and is equivalent to a page size of 4096
set probestack_size_log2=12
target s390x


; If the stack size is just one page, we can avoid the stack probe entirely
function %single_page() -> i64 tail {
ss0 = explicit_slot 2048

block0:
v1 = stack_addr.i64 ss0
return v1
}

; VCode:
; aghi %r15, -2048
; block0:
; la %r2, 0(%r15)
; aghi %r15, 2048
; br %r14
;
; Disassembled:
; block0: ; offset 0x0
; aghi %r15, -0x800
; block1: ; offset 0x4
; la %r2, 0(%r15)
; aghi %r15, 0x800
; br %r14

function %unrolled() -> i64 tail {
ss0 = explicit_slot 8192

block0:
v1 = stack_addr.i64 ss0
return v1
}

; VCode:
; aghi %r15, -4096
; mvi 0(%r15), 0
; aghi %r15, -4096
; mvi 0(%r15), 0
; aghi %r15, 8192
; aghi %r15, -8192
; block0:
; la %r2, 0(%r15)
; aghi %r15, 8192
; br %r14
;
; Disassembled:
; block0: ; offset 0x0
; aghi %r15, -0x1000
; mvi 0(%r15), 0
; aghi %r15, -0x1000
; mvi 0(%r15), 0
; aghi %r15, 0x2000
; aghi %r15, -0x2000
; block1: ; offset 0x18
; la %r2, 0(%r15)
; aghi %r15, 0x2000
; br %r14

function %large() -> i64 tail {
ss0 = explicit_slot 100000

block0:
v1 = stack_addr.i64 ss0
return v1
}

; VCode:
; lhi %r1, 25
; 0: aghi %r15, -4096 ; mvi 0(%r15), 0 ; brct %r1, 0b
; agfi %r15, 102400
; agfi %r15, -100000
; block0:
; la %r2, 0(%r15)
; agfi %r15, 100000
; br %r14
;
; Disassembled:
; block0: ; offset 0x0
; lhi %r1, 0x19
; aghi %r15, -0x1000
; mvi 0(%r15), 0
; brct %r1, 4
; agfi %r15, 0x19000
; agfi %r15, -0x186a0
; block1: ; offset 0x1c
; la %r2, 0(%r15)
; agfi %r15, 0x186a0
; br %r14

18 changes: 4 additions & 14 deletions crates/wasmtime/src/config.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,6 @@ use core::str::FromStr;
use serde_derive::{Deserialize, Serialize};
#[cfg(any(feature = "cache", feature = "cranelift", feature = "winch"))]
use std::path::Path;
use target_lexicon::Architecture;
use wasmparser::WasmFeatures;
#[cfg(feature = "cache")]
use wasmtime_cache::CacheConfig;
Expand Down Expand Up @@ -2068,16 +2067,14 @@ impl Config {

let target = self.compiler_target();

// On supported targets, we enable stack probing by default.
// We enable stack probing by default on all targets.
// This is required on Windows because of the way Windows
// commits its stacks, but it's also a good idea on other
// platforms to ensure guard pages are hit for large frame
// sizes.
if probestack_supported(target.architecture) {
self.compiler_config
.flags
.insert("enable_probestack".into());
}
self.compiler_config
.flags
.insert("enable_probestack".into());

if let Some(unwind_requested) = self.native_unwind_info {
if !self
Expand Down Expand Up @@ -3021,13 +3018,6 @@ impl PoolingAllocationConfig {
}
}

pub(crate) fn probestack_supported(arch: Architecture) -> bool {
matches!(
arch,
Architecture::X86_64 | Architecture::Aarch64(_) | Architecture::Riscv64(_)
)
}

#[cfg(feature = "std")]
fn detect_host_feature(feature: &str) -> Option<bool> {
#[cfg(target_arch = "aarch64")]
Expand Down
2 changes: 1 addition & 1 deletion crates/wasmtime/src/engine.rs
Original file line number Diff line number Diff line change
Expand Up @@ -306,7 +306,7 @@ impl Engine {
// runtime.
"libcall_call_conv" => *value == FlagValue::Enum("isa_default".into()),
"preserve_frame_pointers" => *value == FlagValue::Bool(true),
"enable_probestack" => *value == FlagValue::Bool(crate::config::probestack_supported(target.architecture)),
"enable_probestack" => *value == FlagValue::Bool(true),
"probestack_strategy" => *value == FlagValue::Enum("inline".into()),

// Features wasmtime doesn't use should all be disabled, since
Expand Down

0 comments on commit d50fea6

Please sign in to comment.