From 06377eb08a649619cc8ac9a934cb3f119017f3ef Mon Sep 17 00:00:00 2001 From: Alex Crichton Date: Wed, 6 Nov 2024 19:23:33 -0600 Subject: [PATCH] Remove use of `MemoryStyle` when compiling with Cranelift (#9576) * Remove use of `MemoryStyle` when compiling with Cranelift Instead read directly from `tunables` and `MemoryType` with new helper methods that can be shared between Cranelift, Winch, and the rest of the memory subsystem. Note that this is intended to be a pure-refactoring change. The diff here is large-ish but it's mostly accounted for via code movement and indentation changes. The high-level changes made to the structure of the code are: * Metadata for PCC is de-indented and uses similar coarse determination for what facts to attach as before. Note that there's still a disconnect between PCC facts being applied and the actual load/store itself and so fully supporting PCC will probably require more refactoring in the future. * Cases have been reordered for actually emitting a bounds check. Due to there no longer being a top-level static/dynamic branch the cases have been reordered in terms of priority -- for example unconditional trapping is first, then elision of bounds checks, then the assumption the bound limit is a constant, etc. * Cases for bounds checks have had their arms rewritten in terms of the new properties. The main new one is that the fallback case for static memories previously which had a bounds check is a little more complicated as it additionally factors in `memory_may_move` in addition to `memory_reservation`. This is captured in the expanded documentation for this case, however. * Documentation was updated to avoid talking about static/dynamic memories. * Review comments --- crates/cranelift/src/func_environ.rs | 211 ++++--- .../code_translator/bounds_checks.rs | 579 +++++++++--------- crates/environ/src/types.rs | 40 ++ tests/disas/readonly-heap-base-pointer2.wat | 6 +- 4 files changed, 429 insertions(+), 407 deletions(-) diff --git a/crates/cranelift/src/func_environ.rs b/crates/cranelift/src/func_environ.rs index f37cbe83048..9ce0d93a201 100644 --- a/crates/cranelift/src/func_environ.rs +++ b/crates/cranelift/src/func_environ.rs @@ -20,10 +20,10 @@ use std::mem; use wasmparser::{Operator, WasmFeatures}; use wasmtime_environ::{ BuiltinFunctionIndex, DataIndex, ElemIndex, EngineOrModuleTypeIndex, FuncIndex, GlobalIndex, - IndexType, Memory, MemoryIndex, MemoryStyle, Module, ModuleInternedTypeIndex, - ModuleTranslation, ModuleTypesBuilder, PtrSize, Table, TableIndex, Tunables, TypeConvert, - TypeIndex, VMOffsets, WasmCompositeInnerType, WasmFuncType, WasmHeapTopType, WasmHeapType, - WasmRefType, WasmResult, WasmValType, + IndexType, Memory, MemoryIndex, Module, ModuleInternedTypeIndex, ModuleTranslation, + ModuleTypesBuilder, PtrSize, Table, TableIndex, Tunables, TypeConvert, TypeIndex, VMOffsets, + WasmCompositeInnerType, WasmFuncType, WasmHeapTopType, WasmHeapType, WasmRefType, WasmResult, + WasmValType, }; use wasmtime_environ::{FUNCREF_INIT_BIT, FUNCREF_MASK}; @@ -2370,113 +2370,108 @@ impl<'module_environment> crate::translate::FuncEnvironment // If we have a declared maximum, we can make this a "static" heap, which is // allocated up front and never moved. - let style = MemoryStyle::for_memory(memory, self.tunables); - let (base_fact, memory_type) = match style { - MemoryStyle::Dynamic { .. } => { - let (base_fact, data_mt) = if let Some(ptr_memtype) = ptr_memtype { - // Create a memtype representing the untyped memory region. - let data_mt = func.create_memory_type(ir::MemoryTypeData::DynamicMemory { - gv: heap_bound, - size: self.tunables.memory_guard_size, - }); - // This fact applies to any pointer to the start of the memory. - let base_fact = ir::Fact::dynamic_base_ptr(data_mt); - // This fact applies to the length. - let length_fact = ir::Fact::global_value( - u16::try_from(self.isa.pointer_type().bits()).unwrap(), - heap_bound, - ); - // Create a field in the vmctx for the base pointer. - match &mut func.memory_types[ptr_memtype] { - ir::MemoryTypeData::Struct { size, fields } => { - let base_offset = u64::try_from(base_offset).unwrap(); - fields.push(ir::MemoryTypeField { - offset: base_offset, - ty: self.isa.pointer_type(), - // Read-only field from the PoV of PCC checks: - // don't allow stores to this field. (Even if - // it is a dynamic memory whose base can - // change, that update happens inside the - // runtime, not in generated code.) - readonly: true, - fact: Some(base_fact.clone()), - }); - let current_length_offset = - u64::try_from(current_length_offset).unwrap(); - fields.push(ir::MemoryTypeField { - offset: current_length_offset, - ty: self.isa.pointer_type(), - // As above, read-only; only the runtime modifies it. - readonly: true, - fact: Some(length_fact), - }); - - let pointer_size = u64::from(self.isa.pointer_type().bytes()); - let fields_end = std::cmp::max( - base_offset + pointer_size, - current_length_offset + pointer_size, - ); - *size = std::cmp::max(*size, fields_end); - } - _ => { - panic!("Bad memtype"); - } + let host_page_size_log2 = self.target_config().page_size_align_log2; + let (base_fact, memory_type) = if !memory + .can_elide_bounds_check(self.tunables, host_page_size_log2) + { + if let Some(ptr_memtype) = ptr_memtype { + // Create a memtype representing the untyped memory region. + let data_mt = func.create_memory_type(ir::MemoryTypeData::DynamicMemory { + gv: heap_bound, + size: self.tunables.memory_guard_size, + }); + // This fact applies to any pointer to the start of the memory. + let base_fact = ir::Fact::dynamic_base_ptr(data_mt); + // This fact applies to the length. + let length_fact = ir::Fact::global_value( + u16::try_from(self.isa.pointer_type().bits()).unwrap(), + heap_bound, + ); + // Create a field in the vmctx for the base pointer. + match &mut func.memory_types[ptr_memtype] { + ir::MemoryTypeData::Struct { size, fields } => { + let base_offset = u64::try_from(base_offset).unwrap(); + fields.push(ir::MemoryTypeField { + offset: base_offset, + ty: self.isa.pointer_type(), + // Read-only field from the PoV of PCC checks: + // don't allow stores to this field. (Even if + // it is a dynamic memory whose base can + // change, that update happens inside the + // runtime, not in generated code.) + readonly: true, + fact: Some(base_fact.clone()), + }); + let current_length_offset = u64::try_from(current_length_offset).unwrap(); + fields.push(ir::MemoryTypeField { + offset: current_length_offset, + ty: self.isa.pointer_type(), + // As above, read-only; only the runtime modifies it. + readonly: true, + fact: Some(length_fact), + }); + + let pointer_size = u64::from(self.isa.pointer_type().bytes()); + let fields_end = std::cmp::max( + base_offset + pointer_size, + current_length_offset + pointer_size, + ); + *size = std::cmp::max(*size, fields_end); } - // Apply a fact to the base pointer. - (Some(base_fact), Some(data_mt)) - } else { - (None, None) - }; - - (base_fact, data_mt) - } - MemoryStyle::Static { - byte_reservation: bound_bytes, - } => { - let (base_fact, data_mt) = if let Some(ptr_memtype) = ptr_memtype { - // Create a memtype representing the untyped memory region. - let data_mt = func.create_memory_type(ir::MemoryTypeData::Memory { - size: bound_bytes - .checked_add(self.tunables.memory_guard_size) - .expect("Memory plan has overflowing size plus guard"), - }); - // This fact applies to any pointer to the start of the memory. - let base_fact = Fact::Mem { - ty: data_mt, - min_offset: 0, - max_offset: 0, - nullable: false, - }; - // Create a field in the vmctx for the base pointer. - match &mut func.memory_types[ptr_memtype] { - ir::MemoryTypeData::Struct { size, fields } => { - let offset = u64::try_from(base_offset).unwrap(); - fields.push(ir::MemoryTypeField { - offset, - ty: self.isa.pointer_type(), - // Read-only field from the PoV of PCC checks: - // don't allow stores to this field. (Even if - // it is a dynamic memory whose base can - // change, that update happens inside the - // runtime, not in generated code.) - readonly: true, - fact: Some(base_fact.clone()), - }); - *size = std::cmp::max( - *size, - offset + u64::from(self.isa.pointer_type().bytes()), - ); - } - _ => { - panic!("Bad memtype"); - } + _ => { + panic!("Bad memtype"); } - // Apply a fact to the base pointer. - (Some(base_fact), Some(data_mt)) - } else { - (None, None) + } + // Apply a fact to the base pointer. + (Some(base_fact), Some(data_mt)) + } else { + (None, None) + } + } else { + if let Some(ptr_memtype) = ptr_memtype { + // Create a memtype representing the untyped memory region. + let data_mt = func.create_memory_type(ir::MemoryTypeData::Memory { + size: self + .tunables + .memory_reservation + .checked_add(self.tunables.memory_guard_size) + .expect("Memory plan has overflowing size plus guard"), + }); + // This fact applies to any pointer to the start of the memory. + let base_fact = Fact::Mem { + ty: data_mt, + min_offset: 0, + max_offset: 0, + nullable: false, }; - (base_fact, data_mt) + // Create a field in the vmctx for the base pointer. + match &mut func.memory_types[ptr_memtype] { + ir::MemoryTypeData::Struct { size, fields } => { + let offset = u64::try_from(base_offset).unwrap(); + fields.push(ir::MemoryTypeField { + offset, + ty: self.isa.pointer_type(), + // Read-only field from the PoV of PCC checks: + // don't allow stores to this field. (Even if + // it is a dynamic memory whose base can + // change, that update happens inside the + // runtime, not in generated code.) + readonly: true, + fact: Some(base_fact.clone()), + }); + *size = std::cmp::max( + *size, + offset + u64::from(self.isa.pointer_type().bytes()), + ); + } + _ => { + panic!("Bad memtype"); + } + } + // Apply a fact to the base pointer. + (Some(base_fact), Some(data_mt)) + } else { + (None, None) } }; diff --git a/crates/cranelift/src/translate/code_translator/bounds_checks.rs b/crates/cranelift/src/translate/code_translator/bounds_checks.rs index 728257c66b8..eb90a9666fb 100644 --- a/crates/cranelift/src/translate/code_translator/bounds_checks.rs +++ b/crates/cranelift/src/translate/code_translator/bounds_checks.rs @@ -27,7 +27,7 @@ use cranelift_codegen::{ ir::{Expr, Fact}, }; use cranelift_frontend::FunctionBuilder; -use wasmtime_environ::{MemoryStyle, WasmResult}; +use wasmtime_environ::WasmResult; use Reachability::*; /// Helper used to emit bounds checks (as necessary) and compute the native @@ -64,9 +64,14 @@ where let pcc = env.proof_carrying_code(); let host_page_size_log2 = env.target_config().page_size_align_log2; - let can_use_virtual_memory = - heap.memory.page_size_log2 >= host_page_size_log2 && env.signals_based_traps(); + let can_use_virtual_memory = heap + .memory + .can_use_virtual_memory(env.tunables(), host_page_size_log2); + let can_elide_bounds_check = heap + .memory + .can_elide_bounds_check(env.tunables(), host_page_size_log2); let memory_guard_size = env.tunables().memory_guard_size; + let memory_reservation = env.tunables().memory_reservation; let make_compare = |builder: &mut FunctionBuilder, compare_kind: IntCC, @@ -133,312 +138,294 @@ where // as `u64`s without fear of overflow, and we only have to be concerned with // whether adding in `index` will overflow. // - // Finally, the following right-hand sides of the matches do have a little + // Finally, the following if/else chains do have a little // bit of duplicated code across them, but I think writing it this way is // worth it for readability and seeing very clearly each of our cases for // different bounds checks and optimizations of those bounds checks. It is // intentionally written in a straightforward case-matching style that will // hopefully make it easy to port to ISLE one day. - let style = MemoryStyle::for_memory(heap.memory, env.tunables()); - Ok(match style { - // ====== Dynamic Memories ====== - // - // 1. First special case for when `offset + access_size == 1`: - // - // index + 1 > bound - // ==> index >= bound - MemoryStyle::Dynamic { .. } if offset_and_size == 1 => { - let bound = get_dynamic_heap_bound(builder, env, heap); - let oob = make_compare( - builder, - IntCC::UnsignedGreaterThanOrEqual, - index, - Some(0), - bound, - Some(0), - ); - Reachable(explicit_check_oob_condition_and_compute_addr( - env, - builder, - heap, - index, - offset, - access_size, - spectre_mitigations_enabled, - AddrPcc::dynamic(heap.pcc_memory_type, bound_gv), - oob, - )) - } + if offset_and_size >= heap.memory.maximum_byte_size().unwrap_or(u64::MAX) { + // Special case: trap immediately if `offset + access_size > + // max_memory_size`, since we will end up being out-of-bounds regardless + // of the given `index`. + env.before_unconditionally_trapping_memory_access(builder)?; + env.trap(builder, ir::TrapCode::HEAP_OUT_OF_BOUNDS); + return Ok(Unreachable); + } - // 2. Second special case for when we know that there are enough guard - // pages to cover the offset and access size. - // - // The precise should-we-trap condition is - // - // index + offset + access_size > bound - // - // However, if we instead check only the partial condition - // - // index > bound - // - // then the most out of bounds that the access can be, while that - // partial check still succeeds, is `offset + access_size`. - // - // However, when we have a guard region that is at least as large as - // `offset + access_size`, we can rely on the virtual memory - // subsystem handling these out-of-bounds errors at - // runtime. Therefore, the partial `index > bound` check is - // sufficient for this heap configuration. - // - // Additionally, this has the advantage that a series of Wasm loads - // that use the same dynamic index operand but different static - // offset immediates -- which is a common code pattern when accessing - // multiple fields in the same struct that is in linear memory -- - // will all emit the same `index > bound` check, which we can GVN. - MemoryStyle::Dynamic { .. } - if can_use_virtual_memory && offset_and_size <= memory_guard_size => - { - let bound = get_dynamic_heap_bound(builder, env, heap); - let oob = make_compare( - builder, - IntCC::UnsignedGreaterThan, - index, - Some(0), - bound, - Some(0), - ); - Reachable(explicit_check_oob_condition_and_compute_addr( - env, - builder, - heap, - index, - offset, - access_size, - spectre_mitigations_enabled, - AddrPcc::dynamic(heap.pcc_memory_type, bound_gv), - oob, - )) - } + // Special case for when we can completely omit explicit + // bounds checks for 32-bit memories. + // + // First, let's rewrite our comparison to move all of the constants + // to one side: + // + // index + offset + access_size > bound + // ==> index > bound - (offset + access_size) + // + // We know the subtraction on the right-hand side won't wrap because + // we didn't hit the unconditional trap case above. + // + // Additionally, we add our guard pages (if any) to the right-hand + // side, since we can rely on the virtual memory subsystem at runtime + // to catch out-of-bound accesses within the range `bound .. bound + + // guard_size`. So now we are dealing with + // + // index > bound + guard_size - (offset + access_size) + // + // Note that `bound + guard_size` cannot overflow for + // correctly-configured heaps, as otherwise the heap wouldn't fit in + // a 64-bit memory space. + // + // The complement of our should-this-trap comparison expression is + // the should-this-not-trap comparison expression: + // + // index <= bound + guard_size - (offset + access_size) + // + // If we know the right-hand side is greater than or equal to + // `u32::MAX`, then + // + // index <= u32::MAX <= bound + guard_size - (offset + access_size) + // + // This expression is always true when the heap is indexed with + // 32-bit integers because `index` cannot be larger than + // `u32::MAX`. This means that `index` is always either in bounds or + // within the guard page region, neither of which require emitting an + // explicit bounds check. + if can_elide_bounds_check + && u64::from(u32::MAX) <= memory_reservation + memory_guard_size - offset_and_size + { + assert!(heap.index_type() == ir::types::I32); + assert!( + can_use_virtual_memory, + "static memories require the ability to use virtual memory" + ); + return Ok(Reachable(compute_addr( + &mut builder.cursor(), + heap, + env.pointer_type(), + index, + offset, + AddrPcc::static32(heap.pcc_memory_type, memory_reservation + memory_guard_size), + ))); + } - // 3. Third special case for when `offset + access_size <= min_size`. - // - // We know that `bound >= min_size`, so we can do the following - // comparison, without fear of the right-hand side wrapping around: - // - // index + offset + access_size > bound - // ==> index > bound - (offset + access_size) - MemoryStyle::Dynamic { .. } - if offset_and_size <= heap.memory.minimum_byte_size().unwrap_or(u64::MAX) => - { - let bound = get_dynamic_heap_bound(builder, env, heap); - let adjustment = offset_and_size as i64; - let adjustment_value = builder.ins().iconst(env.pointer_type(), adjustment); - if pcc { - builder.func.dfg.facts[adjustment_value] = - Some(Fact::constant(pointer_bit_width, offset_and_size)); - } - let adjusted_bound = builder.ins().isub(bound, adjustment_value); - if pcc { - builder.func.dfg.facts[adjusted_bound] = Some(Fact::global_value_offset( - pointer_bit_width, - bound_gv, - -adjustment, - )); - } - let oob = make_compare( - builder, - IntCC::UnsignedGreaterThan, - index, - Some(0), - adjusted_bound, - Some(adjustment), - ); - Reachable(explicit_check_oob_condition_and_compute_addr( - env, - builder, - heap, - index, - offset, - access_size, - spectre_mitigations_enabled, - AddrPcc::dynamic(heap.pcc_memory_type, bound_gv), - oob, - )) + // Special case for when we can rely on virtual memory, the minimum + // byte size of this memory fits within the memory reservation, and + // memory isn't allowed to move. In this situation we know that + // memory will statically not grow beyond `memory_reservation` so we + // and we know that memory from 0 to that limit is guaranteed to be + // valid or trap. Here we effectively assume that the dynamic size + // of linear memory is its maximal value, `memory_reservation`, and + // we can avoid loading the actual length of memory. + // + // We have to explicitly test whether + // + // index > bound - (offset + access_size) + // + // and trap if so. + // + // Since we have to emit explicit bounds checks, we might as well be + // precise, not rely on the virtual memory subsystem at all, and not + // factor in the guard pages here. + if can_use_virtual_memory + && heap.memory.minimum_byte_size().unwrap_or(u64::MAX) <= memory_reservation + && !heap.memory.memory_may_move(env.tunables()) + { + let adjusted_bound = memory_reservation.checked_sub(offset_and_size).unwrap(); + let adjusted_bound_value = builder + .ins() + .iconst(env.pointer_type(), adjusted_bound as i64); + if pcc { + builder.func.dfg.facts[adjusted_bound_value] = + Some(Fact::constant(pointer_bit_width, adjusted_bound)); } + let oob = make_compare( + builder, + IntCC::UnsignedGreaterThan, + index, + Some(0), + adjusted_bound_value, + Some(0), + ); + return Ok(Reachable(explicit_check_oob_condition_and_compute_addr( + env, + builder, + heap, + index, + offset, + access_size, + spectre_mitigations_enabled, + AddrPcc::static32(heap.pcc_memory_type, memory_reservation), + oob, + ))); + } - // 4. General case for dynamic memories: - // - // index + offset + access_size > bound - // - // And we have to handle the overflow case in the left-hand side. - MemoryStyle::Dynamic { .. } => { - let access_size_val = builder - .ins() - // Explicit cast from u64 to i64: we just want the raw - // bits, and iconst takes an `Imm64`. - .iconst(env.pointer_type(), offset_and_size as i64); - if pcc { - builder.func.dfg.facts[access_size_val] = - Some(Fact::constant(pointer_bit_width, offset_and_size)); - } - let adjusted_index = env.uadd_overflow_trap( - builder, - index, - access_size_val, - ir::TrapCode::HEAP_OUT_OF_BOUNDS, - ); - if pcc { - builder.func.dfg.facts[adjusted_index] = Some(Fact::value_offset( - pointer_bit_width, - index, - i64::try_from(offset_and_size).unwrap(), - )); - } - let bound = get_dynamic_heap_bound(builder, env, heap); - let oob = make_compare( - builder, - IntCC::UnsignedGreaterThan, - adjusted_index, - i64::try_from(offset_and_size).ok(), - bound, - Some(0), - ); - Reachable(explicit_check_oob_condition_and_compute_addr( - env, - builder, - heap, - index, - offset, - access_size, - spectre_mitigations_enabled, - AddrPcc::dynamic(heap.pcc_memory_type, bound_gv), - oob, - )) - } + // Special case for when `offset + access_size == 1`: + // + // index + 1 > bound + // ==> index >= bound + if offset_and_size == 1 { + let bound = get_dynamic_heap_bound(builder, env, heap); + let oob = make_compare( + builder, + IntCC::UnsignedGreaterThanOrEqual, + index, + Some(0), + bound, + Some(0), + ); + return Ok(Reachable(explicit_check_oob_condition_and_compute_addr( + env, + builder, + heap, + index, + offset, + access_size, + spectre_mitigations_enabled, + AddrPcc::dynamic(heap.pcc_memory_type, bound_gv), + oob, + ))); + } - // ====== Static Memories ====== - // - // With static memories we know the size of the heap bound at compile - // time. - // - // 1. First special case: trap immediately if `offset + access_size > - // bound`, since we will end up being out-of-bounds regardless of the - // given `index`. - MemoryStyle::Static { byte_reservation } if offset_and_size > byte_reservation => { - assert!( - can_use_virtual_memory, - "static memories require the ability to use virtual memory" - ); - env.before_unconditionally_trapping_memory_access(builder)?; - env.trap(builder, ir::TrapCode::HEAP_OUT_OF_BOUNDS); - Unreachable - } + // Special case for when we know that there are enough guard + // pages to cover the offset and access size. + // + // The precise should-we-trap condition is + // + // index + offset + access_size > bound + // + // However, if we instead check only the partial condition + // + // index > bound + // + // then the most out of bounds that the access can be, while that + // partial check still succeeds, is `offset + access_size`. + // + // However, when we have a guard region that is at least as large as + // `offset + access_size`, we can rely on the virtual memory + // subsystem handling these out-of-bounds errors at + // runtime. Therefore, the partial `index > bound` check is + // sufficient for this heap configuration. + // + // Additionally, this has the advantage that a series of Wasm loads + // that use the same dynamic index operand but different static + // offset immediates -- which is a common code pattern when accessing + // multiple fields in the same struct that is in linear memory -- + // will all emit the same `index > bound` check, which we can GVN. + if can_use_virtual_memory && offset_and_size <= memory_guard_size { + let bound = get_dynamic_heap_bound(builder, env, heap); + let oob = make_compare( + builder, + IntCC::UnsignedGreaterThan, + index, + Some(0), + bound, + Some(0), + ); + return Ok(Reachable(explicit_check_oob_condition_and_compute_addr( + env, + builder, + heap, + index, + offset, + access_size, + spectre_mitigations_enabled, + AddrPcc::dynamic(heap.pcc_memory_type, bound_gv), + oob, + ))); + } - // 2. Second special case for when we can completely omit explicit - // bounds checks for 32-bit static memories. - // - // First, let's rewrite our comparison to move all of the constants - // to one side: - // - // index + offset + access_size > bound - // ==> index > bound - (offset + access_size) - // - // We know the subtraction on the right-hand side won't wrap because - // we didn't hit the first special case. - // - // Additionally, we add our guard pages (if any) to the right-hand - // side, since we can rely on the virtual memory subsystem at runtime - // to catch out-of-bound accesses within the range `bound .. bound + - // guard_size`. So now we are dealing with - // - // index > bound + guard_size - (offset + access_size) - // - // Note that `bound + guard_size` cannot overflow for - // correctly-configured heaps, as otherwise the heap wouldn't fit in - // a 64-bit memory space. - // - // The complement of our should-this-trap comparison expression is - // the should-this-not-trap comparison expression: - // - // index <= bound + guard_size - (offset + access_size) - // - // If we know the right-hand side is greater than or equal to - // `u32::MAX`, then - // - // index <= u32::MAX <= bound + guard_size - (offset + access_size) - // - // This expression is always true when the heap is indexed with - // 32-bit integers because `index` cannot be larger than - // `u32::MAX`. This means that `index` is always either in bounds or - // within the guard page region, neither of which require emitting an - // explicit bounds check. - MemoryStyle::Static { byte_reservation } - if can_use_virtual_memory - && heap.index_type() == ir::types::I32 - && u64::from(u32::MAX) - <= byte_reservation + memory_guard_size - offset_and_size => - { - assert!( - can_use_virtual_memory, - "static memories require the ability to use virtual memory" - ); - Reachable(compute_addr( - &mut builder.cursor(), - heap, - env.pointer_type(), - index, - offset, - AddrPcc::static32(heap.pcc_memory_type, byte_reservation + memory_guard_size), - )) + // Special case for when `offset + access_size <= min_size`. + // + // We know that `bound >= min_size`, so we can do the following + // comparison, without fear of the right-hand side wrapping around: + // + // index + offset + access_size > bound + // ==> index > bound - (offset + access_size) + if offset_and_size <= heap.memory.minimum_byte_size().unwrap_or(u64::MAX) { + let bound = get_dynamic_heap_bound(builder, env, heap); + let adjustment = offset_and_size as i64; + let adjustment_value = builder.ins().iconst(env.pointer_type(), adjustment); + if pcc { + builder.func.dfg.facts[adjustment_value] = + Some(Fact::constant(pointer_bit_width, offset_and_size)); } - - // 3. General case for static memories. - // - // We have to explicitly test whether - // - // index > bound - (offset + access_size) - // - // and trap if so. - // - // Since we have to emit explicit bounds checks, we might as well be - // precise, not rely on the virtual memory subsystem at all, and not - // factor in the guard pages here. - MemoryStyle::Static { byte_reservation } => { - assert!( - can_use_virtual_memory, - "static memories require the ability to use virtual memory" - ); - // NB: this subtraction cannot wrap because we didn't hit the first - // special case. - let adjusted_bound = byte_reservation - offset_and_size; - let adjusted_bound_value = builder - .ins() - .iconst(env.pointer_type(), adjusted_bound as i64); - if pcc { - builder.func.dfg.facts[adjusted_bound_value] = - Some(Fact::constant(pointer_bit_width, adjusted_bound)); - } - let oob = make_compare( - builder, - IntCC::UnsignedGreaterThan, - index, - Some(0), - adjusted_bound_value, - Some(0), - ); - Reachable(explicit_check_oob_condition_and_compute_addr( - env, - builder, - heap, - index, - offset, - access_size, - spectre_mitigations_enabled, - AddrPcc::static32(heap.pcc_memory_type, byte_reservation), - oob, - )) + let adjusted_bound = builder.ins().isub(bound, adjustment_value); + if pcc { + builder.func.dfg.facts[adjusted_bound] = Some(Fact::global_value_offset( + pointer_bit_width, + bound_gv, + -adjustment, + )); } - }) + let oob = make_compare( + builder, + IntCC::UnsignedGreaterThan, + index, + Some(0), + adjusted_bound, + Some(adjustment), + ); + return Ok(Reachable(explicit_check_oob_condition_and_compute_addr( + env, + builder, + heap, + index, + offset, + access_size, + spectre_mitigations_enabled, + AddrPcc::dynamic(heap.pcc_memory_type, bound_gv), + oob, + ))); + } + + // General case for dynamic bounds checks: + // + // index + offset + access_size > bound + // + // And we have to handle the overflow case in the left-hand side. + let access_size_val = builder + .ins() + // Explicit cast from u64 to i64: we just want the raw + // bits, and iconst takes an `Imm64`. + .iconst(env.pointer_type(), offset_and_size as i64); + if pcc { + builder.func.dfg.facts[access_size_val] = + Some(Fact::constant(pointer_bit_width, offset_and_size)); + } + let adjusted_index = env.uadd_overflow_trap( + builder, + index, + access_size_val, + ir::TrapCode::HEAP_OUT_OF_BOUNDS, + ); + if pcc { + builder.func.dfg.facts[adjusted_index] = Some(Fact::value_offset( + pointer_bit_width, + index, + i64::try_from(offset_and_size).unwrap(), + )); + } + let bound = get_dynamic_heap_bound(builder, env, heap); + let oob = make_compare( + builder, + IntCC::UnsignedGreaterThan, + adjusted_index, + i64::try_from(offset_and_size).ok(), + bound, + Some(0), + ); + Ok(Reachable(explicit_check_oob_condition_and_compute_addr( + env, + builder, + heap, + index, + offset, + access_size, + spectre_mitigations_enabled, + AddrPcc::dynamic(heap.pcc_memory_type, bound_gv), + oob, + ))) } /// Get the bound of a dynamic heap as an `ir::Value`. diff --git a/crates/environ/src/types.rs b/crates/environ/src/types.rs index d7ca5297118..ed45898b3b0 100644 --- a/crates/environ/src/types.rs +++ b/crates/environ/src/types.rs @@ -1798,6 +1798,46 @@ impl Memory { } } + /// Returns whether this memory can be implemented with virtual memory on + /// a host with `host_page_size_log2`. + /// + /// When this function returns `true` then it means that signals such as + /// SIGSEGV on the host are compatible with wasm and can be used to + /// represent out-of-bounds memory accesses. + /// + /// When this function returns `false` then it means that this memory must, + /// for example, have explicit bounds checks. This additionally means that + /// virtual memory traps (e.g. SIGSEGV) cannot be relied on to implement + /// linear memory semantics. + pub fn can_use_virtual_memory(&self, tunables: &Tunables, host_page_size_log2: u8) -> bool { + tunables.signals_based_traps && self.page_size_log2 >= host_page_size_log2 + } + + /// Returns whether this memory is a candidate for bounds check elision + /// given the configuration and host page size. + /// + /// This function determines whether the given compilation configuration and + /// hos enables possible bounds check elision for this memory. Bounds checks + /// can only be elided if [`Memory::can_use_virtual_memory`] returns `true` + /// for example but there are additionally requirements on the index size of + /// this memory and the memory reservation in `tunables`. + /// + /// Currently the only case that supports bounds check elision is when all + /// of these apply: + /// + /// * When [`Memory::can_use_virtual_memory`] returns `true`. + /// * This is a 32-bit linear memory (e.g. not 64-bit) + /// * `tunables.memory_reservation` is in excess of 4GiB + /// + /// In this situation all computable addresses fall within the reserved + /// space (modulo static offsets factoring in guard pages) so bounds checks + /// may be elidable. + pub fn can_elide_bounds_check(&self, tunables: &Tunables, host_page_size_log2: u8) -> bool { + self.can_use_virtual_memory(tunables, host_page_size_log2) + && self.idx_type == IndexType::I32 + && tunables.memory_reservation >= (1 << 32) + } + /// Returns the static size of this heap in bytes at runtime, if available. /// /// This is only computable when the minimum size equals the maximum size. diff --git a/tests/disas/readonly-heap-base-pointer2.wat b/tests/disas/readonly-heap-base-pointer2.wat index 7fd8673aad8..247b7e8e3cd 100644 --- a/tests/disas/readonly-heap-base-pointer2.wat +++ b/tests/disas/readonly-heap-base-pointer2.wat @@ -18,11 +18,11 @@ ;; stack_limit = gv2 ;; ;; block0(v0: i64, v1: i64, v2: i32): -;; @0022 v12 = load.i64 notrap aligned readonly v0+88 -;; @0022 v5 = load.i64 notrap aligned v12+8 ;; @0022 v4 = uextend.i64 v2 -;; @0022 v6 = icmp ugt v4, v5 +;; @0022 v5 = iconst.i64 0x0001_fffc +;; @0022 v6 = icmp ugt v4, v5 ; v5 = 0x0001_fffc ;; @0022 v9 = iconst.i64 0 +;; @0022 v12 = load.i64 notrap aligned readonly v0+88 ;; @0022 v7 = load.i64 notrap aligned readonly checked v12 ;; @0022 v8 = iadd v7, v4 ;; @0022 v10 = select_spectre_guard v6, v9, v8 ; v9 = 0