diff --git a/Cargo.lock b/Cargo.lock index c412b2086e..40232bcae2 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3620,6 +3620,7 @@ dependencies = [ "wasmtime-wast", "wast 36.0.0", "wat", + "winapi", ] [[package]] diff --git a/Cargo.toml b/Cargo.toml index d244f1408c..fb7e164c3d 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -59,6 +59,7 @@ tracing-subscriber = "0.2.16" wast = "36.0.0" criterion = "0.3.4" num_cpus = "1.13.0" +winapi = { version = "0.3.9", features = ['memoryapi'] } [build-dependencies] anyhow = "1.0.19" diff --git a/benches/thread_eager_init.rs b/benches/thread_eager_init.rs index 02f3c65f5c..9a7971d16e 100644 --- a/benches/thread_eager_init.rs +++ b/benches/thread_eager_init.rs @@ -98,10 +98,7 @@ fn test_setup() -> (Engine, Module) { memory_pages: 1, ..Default::default() }, - instance_limits: InstanceLimits { - count: pool_count, - memory_reservation_size: 1, - }, + instance_limits: InstanceLimits { count: pool_count }, }); let engine = Engine::new(&config).unwrap(); diff --git a/crates/cranelift/src/func_environ.rs b/crates/cranelift/src/func_environ.rs index 6c093a2efe..14082ce396 100644 --- a/crates/cranelift/src/func_environ.rs +++ b/crates/cranelift/src/func_environ.rs @@ -1257,6 +1257,7 @@ impl<'module_environment> cranelift_wasm::FuncEnvironment for FuncEnvironment<'m MemoryPlan { style: MemoryStyle::Dynamic, offset_guard_size, + pre_guard_size: _, memory: _, } => { let heap_bound = func.create_global_value(ir::GlobalValueData::Load { @@ -1276,6 +1277,7 @@ impl<'module_environment> cranelift_wasm::FuncEnvironment for FuncEnvironment<'m MemoryPlan { style: MemoryStyle::Static { bound }, offset_guard_size, + pre_guard_size: _, memory: _, } => ( Uimm64::new(offset_guard_size), diff --git a/crates/environ/src/module.rs b/crates/environ/src/module.rs index 38a4293825..9a6cf0862d 100644 --- a/crates/environ/src/module.rs +++ b/crates/environ/src/module.rs @@ -63,6 +63,8 @@ pub struct MemoryPlan { pub memory: Memory, /// Our chosen implementation style. pub style: MemoryStyle, + /// Chosen size of a guard page before the linear memory allocation. + pub pre_guard_size: u64, /// Our chosen offset-guard size. pub offset_guard_size: u64, } @@ -75,6 +77,11 @@ impl MemoryPlan { memory, style, offset_guard_size, + pre_guard_size: if tunables.guard_before_linear_memory { + offset_guard_size + } else { + 0 + }, } } } diff --git a/crates/environ/src/tunables.rs b/crates/environ/src/tunables.rs index 8a786ae88b..ef76d157de 100644 --- a/crates/environ/src/tunables.rs +++ b/crates/environ/src/tunables.rs @@ -32,35 +32,38 @@ pub struct Tunables { /// Whether or not to treat the static memory bound as the maximum for unbounded heaps. pub static_memory_bound_is_maximum: bool, + + /// Whether or not linear memory allocations will have a guard region at the + /// beginning of the allocation in addition to the end. + pub guard_before_linear_memory: bool, } impl Default for Tunables { fn default() -> Self { Self { - #[cfg(target_pointer_width = "32")] - /// Size in wasm pages of the bound for static memories. - static_memory_bound: 0x4000, + // 64-bit has tons of address space to static memories can have 4gb + // address space reservations liberally by default, allowing us to + // help eliminate bounds checks. + // + // Coupled with a 2 GiB address space guard it lets us translate + // wasm offsets into x86 offsets as aggressively as we can. #[cfg(target_pointer_width = "64")] - /// Size in wasm pages of the bound for static memories. - /// - /// When we allocate 4 GiB of address space, we can avoid the - /// need for explicit bounds checks. static_memory_bound: 0x1_0000, + #[cfg(target_pointer_width = "64")] + static_memory_offset_guard_size: 0x8000_0000, + // For 32-bit we scale way down to 10MB of reserved memory. This + // impacts performance severely but allows us to have more than a + // few instances running around. + #[cfg(target_pointer_width = "32")] + static_memory_bound: (10 * (1 << 20)) / crate::WASM_PAGE_SIZE, #[cfg(target_pointer_width = "32")] - /// Size in bytes of the offset guard for static memories. static_memory_offset_guard_size: 0x1_0000, - #[cfg(target_pointer_width = "64")] - /// Size in bytes of the offset guard for static memories. - /// - /// Allocating 2 GiB of address space lets us translate wasm - /// offsets into x86 offsets as aggressively as we can. - static_memory_offset_guard_size: 0x8000_0000, - /// Size in bytes of the offset guard for dynamic memories. - /// - /// Allocate a small guard to optimize common cases but without - /// wasting too much memory. + // Size in bytes of the offset guard for dynamic memories. + // + // Allocate a small guard to optimize common cases but without + // wasting too much memory. dynamic_memory_offset_guard_size: 0x1_0000, generate_native_debuginfo: false, @@ -68,6 +71,7 @@ impl Default for Tunables { interruptable: false, consume_fuel: false, static_memory_bound_is_maximum: false, + guard_before_linear_memory: true, } } } diff --git a/crates/fuzzing/src/generators.rs b/crates/fuzzing/src/generators.rs index 764f6a3b38..48fee8e6e9 100644 --- a/crates/fuzzing/src/generators.rs +++ b/crates/fuzzing/src/generators.rs @@ -72,6 +72,7 @@ pub struct Config { static_memory_maximum_size: Option, static_memory_guard_size: Option, dynamic_memory_guard_size: Option, + guard_before_linear_memory: bool, } impl Config { @@ -82,6 +83,7 @@ impl Config { .static_memory_maximum_size(self.static_memory_maximum_size.unwrap_or(0).into()) .static_memory_guard_size(self.static_memory_guard_size.unwrap_or(0).into()) .dynamic_memory_guard_size(self.dynamic_memory_guard_size.unwrap_or(0).into()) + .guard_before_linear_memory(self.guard_before_linear_memory) .cranelift_nan_canonicalization(self.canonicalize_nans) .cranelift_opt_level(self.opt_level.to_wasmtime()) .interruptable(self.interruptable) diff --git a/crates/runtime/src/instance/allocator/pooling.rs b/crates/runtime/src/instance/allocator/pooling.rs index 98f87d3aea..6cb1f2190a 100644 --- a/crates/runtime/src/instance/allocator/pooling.rs +++ b/crates/runtime/src/instance/allocator/pooling.rs @@ -15,7 +15,6 @@ use super::{ use crate::{instance::Instance, Memory, Mmap, Table, VMContext}; use anyhow::{anyhow, bail, Context, Result}; use rand::Rng; -use std::cmp::min; use std::convert::TryFrom; use std::marker; use std::mem; @@ -234,21 +233,12 @@ impl Default for ModuleLimits { pub struct InstanceLimits { /// The maximum number of concurrent instances supported. pub count: u32, - - /// The maximum size, in bytes, of host address space to reserve for each linear memory of an instance. - pub memory_reservation_size: u64, } impl Default for InstanceLimits { fn default() -> Self { // See doc comments for `wasmtime::InstanceLimits` for these default values - Self { - count: 1000, - #[cfg(target_pointer_width = "32")] - memory_reservation_size: 10 * (1 << 20), // 10 MiB, - #[cfg(target_pointer_width = "64")] - memory_reservation_size: 6 * (1 << 30), // 6 GiB, - } + Self { count: 1000 } } } @@ -299,7 +289,11 @@ struct InstancePool { } impl InstancePool { - fn new(module_limits: &ModuleLimits, instance_limits: &InstanceLimits) -> Result { + fn new( + module_limits: &ModuleLimits, + instance_limits: &InstanceLimits, + tunables: &Tunables, + ) -> Result { let page_size = region::page::size(); // Calculate the maximum size of an Instance structure given the limits @@ -337,7 +331,7 @@ impl InstancePool { instance_size, max_instances, free_list: Mutex::new((0..max_instances).collect()), - memories: MemoryPool::new(module_limits, instance_limits)?, + memories: MemoryPool::new(module_limits, instance_limits, tunables)?, tables: TablePool::new(module_limits, instance_limits)?, empty_module: Arc::new(Module::default()), }; @@ -598,20 +592,29 @@ impl Drop for InstancePool { /// Each instance index into the pool returns an iterator over the base addresses /// of the instance's linear memories. /// -/// /// The userfault handler relies on how memories are stored in the mapping, /// so make sure the uffd implementation is kept up-to-date. #[derive(Debug)] struct MemoryPool { mapping: Mmap, + // The size, in bytes, of each linear memory's reservation plus the guard + // region allocated for it. memory_size: usize, + // The size, in bytes, of the offset to the first linear memory in this + // pool. This is here to help account for the first region of guard pages, + // if desired, before the first linear memory. + initial_memory_offset: usize, max_memories: usize, max_instances: usize, max_wasm_pages: u32, } impl MemoryPool { - fn new(module_limits: &ModuleLimits, instance_limits: &InstanceLimits) -> Result { + fn new( + module_limits: &ModuleLimits, + instance_limits: &InstanceLimits, + tunables: &Tunables, + ) -> Result { // The maximum module memory page count cannot exceed 65536 pages if module_limits.memory_pages > 0x10000 { bail!( @@ -621,19 +624,20 @@ impl MemoryPool { } // The maximum module memory page count cannot exceed the memory reservation size - if u64::from(module_limits.memory_pages) * u64::from(WASM_PAGE_SIZE) - > instance_limits.memory_reservation_size - { + if module_limits.memory_pages > tunables.static_memory_bound { bail!( - "module memory page limit of {} pages exceeds the memory reservation size limit of {} bytes", + "module memory page limit of {} pages exceeds maximum static memory limit of {} pages", module_limits.memory_pages, - instance_limits.memory_reservation_size + tunables.static_memory_bound, ); } let memory_size = if module_limits.memory_pages > 0 { - usize::try_from(instance_limits.memory_reservation_size) - .map_err(|_| anyhow!("memory reservation size exceeds addressable memory"))? + usize::try_from( + u64::from(tunables.static_memory_bound) * u64::from(WASM_PAGE_SIZE) + + tunables.static_memory_offset_guard_size, + ) + .map_err(|_| anyhow!("memory reservation size exceeds addressable memory"))? } else { 0 }; @@ -646,10 +650,29 @@ impl MemoryPool { let max_instances = instance_limits.count as usize; let max_memories = module_limits.memories as usize; + let initial_memory_offset = if tunables.guard_before_linear_memory { + usize::try_from(tunables.static_memory_offset_guard_size).unwrap() + } else { + 0 + }; + // The entire allocation here is the size of each memory times the + // max memories per instance times the number of instances allowed in + // this pool, plus guard regions. + // + // Note, though, that guard regions are required to be after each linear + // memory. If the `guard_before_linear_memory` setting is specified, + // then due to the contiguous layout of linear memories the guard pages + // after one memory are also guard pages preceding the next linear + // memory. This means that we only need to handle pre-guard-page sizes + // specially for the first linear memory, hence the + // `initial_memory_offset` variable here. If guards aren't specified + // before linear memories this is set to `0`, otherwise it's set to + // the same size as guard regions for other memories. let allocation_size = memory_size .checked_mul(max_memories) .and_then(|c| c.checked_mul(max_instances)) + .and_then(|c| c.checked_add(initial_memory_offset)) .ok_or_else(|| { anyhow!("total size of memory reservation exceeds addressable memory") })?; @@ -661,6 +684,7 @@ impl MemoryPool { let pool = Self { mapping, memory_size, + initial_memory_offset, max_memories, max_instances, max_wasm_pages: module_limits.memory_pages, @@ -677,9 +701,9 @@ impl MemoryPool { debug_assert!(instance_index < self.max_instances); let base: *mut u8 = unsafe { - self.mapping - .as_mut_ptr() - .add(instance_index * self.memory_size * self.max_memories) as _ + self.mapping.as_mut_ptr().add( + self.initial_memory_offset + instance_index * self.memory_size * self.max_memories, + ) as _ }; let size = self.memory_size; @@ -903,25 +927,15 @@ impl PoolingInstanceAllocator { pub fn new( strategy: PoolingAllocationStrategy, module_limits: ModuleLimits, - mut instance_limits: InstanceLimits, + instance_limits: InstanceLimits, stack_size: usize, + tunables: &Tunables, ) -> Result { if instance_limits.count == 0 { bail!("the instance count limit cannot be zero"); } - // Round the memory reservation size to the nearest Wasm page size - instance_limits.memory_reservation_size = u64::try_from(round_up_to_pow2( - usize::try_from(instance_limits.memory_reservation_size).unwrap(), - WASM_PAGE_SIZE as usize, - )) - .unwrap(); - - // Cap the memory reservation size to 8 GiB (maximum 4 GiB accessible + 4 GiB of guard region) - instance_limits.memory_reservation_size = - min(instance_limits.memory_reservation_size, 0x200000000); - - let instances = InstancePool::new(&module_limits, &instance_limits)?; + let instances = InstancePool::new(&module_limits, &instance_limits, tunables)?; #[cfg(all(feature = "uffd", target_os = "linux"))] let _fault_handler = imp::PageFaultHandler::new(&instances)?; @@ -956,18 +970,6 @@ unsafe impl InstanceAllocator for PoolingInstanceAllocator { } fn adjust_tunables(&self, tunables: &mut Tunables) { - let memory_reservation_size = self.instance_limits.memory_reservation_size; - - // For reservation sizes larger than 4 GiB, use a guard region to elide bounds checks - if memory_reservation_size >= 0x100000000 { - tunables.static_memory_bound = 0x10000; // in Wasm pages - tunables.static_memory_offset_guard_size = memory_reservation_size - 0x100000000; - } else { - tunables.static_memory_bound = - u32::try_from(memory_reservation_size).unwrap() / WASM_PAGE_SIZE; - tunables.static_memory_offset_guard_size = 0; - } - // Treat the static memory bound as the maximum for unbounded Wasm memories // Because we guarantee a module cannot compile unless it fits in the limits of // the pool allocator, this ensures all memories are treated as static (i.e. immovable). @@ -1124,6 +1126,7 @@ mod test { maximum: None, shared: false, }, + pre_guard_size: 0, offset_guard_size: 0, }); @@ -1239,6 +1242,7 @@ mod test { maximum: None, shared: false, }, + pre_guard_size: 0, offset_guard_size: 0, }); assert_eq!( @@ -1312,6 +1316,7 @@ mod test { maximum: None, shared: false, }, + pre_guard_size: 0, offset_guard_size: 0, }); assert_eq!( @@ -1337,6 +1342,7 @@ mod test { shared: false, }, offset_guard_size: 0, + pre_guard_size: 0, }); assert_eq!( limits.validate(&module).map_err(|e| e.to_string()), @@ -1375,12 +1381,16 @@ mod test { table_elements: 10, memory_pages: 1, }; - let instance_limits = InstanceLimits { - count: 3, - memory_reservation_size: WASM_PAGE_SIZE as u64, - }; - - let instances = InstancePool::new(&module_limits, &instance_limits)?; + let instance_limits = InstanceLimits { count: 3 }; + + let instances = InstancePool::new( + &module_limits, + &instance_limits, + &Tunables { + static_memory_bound: 1, + ..Tunables::default() + }, + )?; // As of April 2021, the instance struct's size is largely below the size of a single page, // so it's safe to assume it's been rounded to the size of a single memory page here. @@ -1464,9 +1474,11 @@ mod test { table_elements: 0, memory_pages: 1, }, - &InstanceLimits { - count: 5, - memory_reservation_size: WASM_PAGE_SIZE as u64, + &InstanceLimits { count: 5 }, + &Tunables { + static_memory_bound: 1, + static_memory_offset_guard_size: 0, + ..Tunables::default() }, )?; @@ -1510,10 +1522,7 @@ mod test { table_elements: 100, memory_pages: 0, }, - &InstanceLimits { - count: 7, - memory_reservation_size: WASM_PAGE_SIZE as u64, - }, + &InstanceLimits { count: 7 }, )?; let host_page_size = region::page::size(); @@ -1545,13 +1554,7 @@ mod test { #[cfg(all(unix, target_pointer_width = "64", feature = "async"))] #[test] fn test_stack_pool() -> Result<()> { - let pool = StackPool::new( - &InstanceLimits { - count: 10, - memory_reservation_size: 0, - }, - 1, - )?; + let pool = StackPool::new(&InstanceLimits { count: 10 }, 1)?; let native_page_size = region::page::size(); assert_eq!(pool.stack_size, 2 * native_page_size); @@ -1609,7 +1612,8 @@ mod test { count: 0, ..Default::default() }, - 4096 + 4096, + &Tunables::default(), ) .map_err(|e| e.to_string()) .expect_err("expected a failure constructing instance allocator"), @@ -1626,11 +1630,12 @@ mod test { memory_pages: 0x10001, ..Default::default() }, - InstanceLimits { - count: 1, - memory_reservation_size: 1, + InstanceLimits { count: 1 }, + 4096, + &Tunables { + static_memory_bound: 1, + ..Tunables::default() }, - 4096 ) .map_err(|e| e.to_string()) .expect_err("expected a failure constructing instance allocator"), @@ -1647,15 +1652,17 @@ mod test { memory_pages: 2, ..Default::default() }, - InstanceLimits { - count: 1, - memory_reservation_size: 1, - }, + InstanceLimits { count: 1 }, 4096, + &Tunables { + static_memory_bound: 1, + static_memory_offset_guard_size: 0, + ..Tunables::default() + }, ) .map_err(|e| e.to_string()) .expect_err("expected a failure constructing instance allocator"), - "module memory page limit of 2 pages exceeds the memory reservation size limit of 65536 bytes" + "module memory page limit of 2 pages exceeds maximum static memory limit of 1 pages" ); } @@ -1676,11 +1683,9 @@ mod test { memory_pages: 0, ..Default::default() }, - InstanceLimits { - count: 1, - memory_reservation_size: 1, - }, + InstanceLimits { count: 1 }, 4096, + &Tunables::default(), )?; unsafe { diff --git a/crates/runtime/src/instance/allocator/pooling/uffd.rs b/crates/runtime/src/instance/allocator/pooling/uffd.rs index c0aef40a74..6d8398fb15 100644 --- a/crates/runtime/src/instance/allocator/pooling/uffd.rs +++ b/crates/runtime/src/instance/allocator/pooling/uffd.rs @@ -156,6 +156,7 @@ struct FaultLocator { instances_start: usize, instance_size: usize, max_instances: usize, + memories_mapping_start: usize, memories_start: usize, memories_end: usize, memory_size: usize, @@ -165,8 +166,10 @@ struct FaultLocator { impl FaultLocator { fn new(instances: &InstancePool) -> Self { let instances_start = instances.mapping.as_ptr() as usize; - let memories_start = instances.memories.mapping.as_ptr() as usize; - let memories_end = memories_start + instances.memories.mapping.len(); + let memories_start = + instances.memories.mapping.as_ptr() as usize + instances.memories.initial_memory_offset; + let memories_end = + instances.memories.mapping.as_ptr() as usize + instances.memories.mapping.len(); // Should always have instances debug_assert!(instances_start != 0); @@ -174,6 +177,7 @@ impl FaultLocator { Self { instances_start, instance_size: instances.instance_size, + memories_mapping_start: instances.memories.mapping.as_ptr() as usize, max_instances: instances.max_instances, memories_start, memories_end, @@ -344,7 +348,7 @@ fn fault_handler_thread(uffd: Uffd, locator: FaultLocator) -> Result<()> { let (start, end) = (start as usize, end as usize); - if start == locator.memories_start && end == locator.memories_end { + if start == locator.memories_mapping_start && end == locator.memories_end { break; } else { panic!("unexpected memory region unmapped"); @@ -437,7 +441,9 @@ mod test { PoolingAllocationStrategy, VMSharedSignatureIndex, }; use std::sync::Arc; - use wasmtime_environ::{entity::PrimaryMap, wasm::Memory, MemoryPlan, MemoryStyle, Module}; + use wasmtime_environ::{ + entity::PrimaryMap, wasm::Memory, MemoryPlan, MemoryStyle, Module, Tunables, + }; #[cfg(target_pointer_width = "64")] #[test] @@ -455,13 +461,16 @@ mod test { table_elements: 0, memory_pages: 2, }; - let instance_limits = InstanceLimits { - count: 3, - memory_reservation_size: (WASM_PAGE_SIZE * 10) as u64, + let instance_limits = InstanceLimits { count: 3 }; + let tunables = Tunables { + static_memory_bound: 10, + static_memory_offset_guard_size: 0, + guard_before_linear_memory: false, + ..Tunables::default() }; - let instances = - InstancePool::new(&module_limits, &instance_limits).expect("should allocate"); + let instances = InstancePool::new(&module_limits, &instance_limits, &tunables) + .expect("should allocate"); let locator = FaultLocator::new(&instances); @@ -494,6 +503,7 @@ mod test { }, style: MemoryStyle::Static { bound: 1 }, offset_guard_size: 0, + pre_guard_size: 0, }); } diff --git a/crates/runtime/src/memory.rs b/crates/runtime/src/memory.rs index 59a941cb86..e69dd056ee 100644 --- a/crates/runtime/src/memory.rs +++ b/crates/runtime/src/memory.rs @@ -54,8 +54,9 @@ pub struct MmapMemory { // The optional maximum size in wasm pages of this linear memory. maximum: Option, - // Size in bytes of extra guard pages after the end to optimize loads and stores with - // constant offsets. + // Size in bytes of extra guard pages before the start and after the end to + // optimize loads and stores with constant offsets. + pre_guard_size: usize, offset_guard_size: usize, } @@ -75,6 +76,7 @@ impl MmapMemory { assert!(plan.memory.maximum.is_none() || plan.memory.maximum.unwrap() <= WASM_MAX_PAGES); let offset_guard_bytes = plan.offset_guard_size as usize; + let pre_guard_bytes = plan.pre_guard_size as usize; let minimum_pages = match plan.style { MemoryStyle::Dynamic => plan.memory.minimum, @@ -84,18 +86,27 @@ impl MmapMemory { } } as usize; let minimum_bytes = minimum_pages.checked_mul(WASM_PAGE_SIZE as usize).unwrap(); - let request_bytes = minimum_bytes.checked_add(offset_guard_bytes).unwrap(); + let request_bytes = pre_guard_bytes + .checked_add(minimum_bytes) + .unwrap() + .checked_add(offset_guard_bytes) + .unwrap(); let mapped_pages = plan.memory.minimum as usize; - let mapped_bytes = mapped_pages * WASM_PAGE_SIZE as usize; + let accessible_bytes = mapped_pages * WASM_PAGE_SIZE as usize; - let mmap = WasmMmap { - alloc: Mmap::accessible_reserved(mapped_bytes, request_bytes)?, + let mut mmap = WasmMmap { + alloc: Mmap::accessible_reserved(0, request_bytes)?, size: plan.memory.minimum, }; + if accessible_bytes > 0 { + mmap.alloc + .make_accessible(pre_guard_bytes, accessible_bytes)?; + } Ok(Self { mmap: mmap.into(), maximum: plan.memory.maximum, + pre_guard_size: pre_guard_bytes, offset_guard_size: offset_guard_bytes, }) } @@ -149,24 +160,28 @@ impl RuntimeLinearMemory for MmapMemory { let prev_bytes = usize::try_from(prev_pages).unwrap() * WASM_PAGE_SIZE as usize; let new_bytes = usize::try_from(new_pages).unwrap() * WASM_PAGE_SIZE as usize; - if new_bytes > self.mmap.alloc.len() - self.offset_guard_size { + if new_bytes > self.mmap.alloc.len() - self.offset_guard_size - self.pre_guard_size { // If the new size is within the declared maximum, but needs more memory than we // have on hand, it's a dynamic heap and it can move. - let guard_bytes = self.offset_guard_size; - let request_bytes = new_bytes.checked_add(guard_bytes)?; - - let mut new_mmap = Mmap::accessible_reserved(new_bytes, request_bytes).ok()?; + let request_bytes = self + .pre_guard_size + .checked_add(new_bytes)? + .checked_add(self.offset_guard_size)?; + + let mut new_mmap = Mmap::accessible_reserved(0, request_bytes).ok()?; + new_mmap + .make_accessible(self.pre_guard_size, new_bytes) + .ok()?; - let copy_len = self.mmap.alloc.len() - self.offset_guard_size; - new_mmap.as_mut_slice()[..copy_len] - .copy_from_slice(&self.mmap.alloc.as_slice()[..copy_len]); + new_mmap.as_mut_slice()[self.pre_guard_size..][..prev_bytes] + .copy_from_slice(&self.mmap.alloc.as_slice()[self.pre_guard_size..][..prev_bytes]); self.mmap.alloc = new_mmap; } else if delta_bytes > 0 { // Make the newly allocated pages accessible. self.mmap .alloc - .make_accessible(prev_bytes, delta_bytes) + .make_accessible(self.pre_guard_size + prev_bytes, delta_bytes) .ok()?; } @@ -178,7 +193,7 @@ impl RuntimeLinearMemory for MmapMemory { /// Return a `VMMemoryDefinition` for exposing the memory to compiled wasm code. fn vmmemory(&self) -> VMMemoryDefinition { VMMemoryDefinition { - base: self.mmap.alloc.as_mut_ptr(), + base: unsafe { self.mmap.alloc.as_mut_ptr().add(self.pre_guard_size) }, current_length: self.mmap.size as usize * WASM_PAGE_SIZE as usize, } } diff --git a/crates/runtime/src/mmap.rs b/crates/runtime/src/mmap.rs index b0612f91c9..76d15201d5 100644 --- a/crates/runtime/src/mmap.rs +++ b/crates/runtime/src/mmap.rs @@ -3,7 +3,6 @@ use anyhow::{bail, Result}; use more_asserts::assert_le; -use more_asserts::assert_lt; use std::io; use std::ptr; use std::slice; @@ -176,8 +175,8 @@ impl Mmap { let page_size = region::page::size(); assert_eq!(start & (page_size - 1), 0); assert_eq!(len & (page_size - 1), 0); - assert_lt!(len, self.len); - assert_lt!(start, self.len - len); + assert_le!(len, self.len); + assert_le!(start, self.len - len); // Commit the accessible size. let ptr = self.ptr as *const u8; @@ -199,8 +198,8 @@ impl Mmap { let page_size = region::page::size(); assert_eq!(start & (page_size - 1), 0); assert_eq!(len & (page_size - 1), 0); - assert_lt!(len, self.len); - assert_lt!(start, self.len - len); + assert_le!(len, self.len); + assert_le!(start, self.len - len); // Commit the accessible size. let ptr = self.ptr as *const u8; diff --git a/crates/wasmtime/src/config.rs b/crates/wasmtime/src/config.rs index 28cc85f3c2..ed9ba1fa33 100644 --- a/crates/wasmtime/src/config.rs +++ b/crates/wasmtime/src/config.rs @@ -145,38 +145,14 @@ impl Into for ModuleLimits { pub struct InstanceLimits { /// The maximum number of concurrent instances supported (default is 1000). pub count: u32, - - /// The maximum size, in bytes, of host address space to reserve for each linear memory of an instance. - /// - /// Note: this value has important performance ramifications. - /// - /// On 64-bit platforms, the default for this value will be 6 GiB. A value of less than 4 GiB will - /// force runtime bounds checking for memory accesses and thus will negatively impact performance. - /// Any value above 4 GiB will start eliding bounds checks provided the `offset` of the memory access is - /// less than (`memory_reservation_size` - 4 GiB). A value of 8 GiB will completely elide *all* bounds - /// checks; consequently, 8 GiB will be the maximum supported value. The default of 6 GiB reserves - /// less host address space for each instance, but a memory access with an offset above 2 GiB will incur - /// runtime bounds checks. - /// - /// On 32-bit platforms, the default for this value will be 10 MiB. A 32-bit host has very limited address - /// space to reserve for a lot of concurrent instances. As a result, runtime bounds checking will be used - /// for all memory accesses. For better runtime performance, a 64-bit host is recommended. - /// - /// This value will be rounded up by the WebAssembly page size (64 KiB). - pub memory_reservation_size: u64, } impl Default for InstanceLimits { fn default() -> Self { - let wasmtime_runtime::InstanceLimits { - count, - memory_reservation_size, - } = wasmtime_runtime::InstanceLimits::default(); + let wasmtime_runtime::InstanceLimits { count } = + wasmtime_runtime::InstanceLimits::default(); - Self { - count, - memory_reservation_size, - } + Self { count } } } @@ -185,15 +161,9 @@ impl Default for InstanceLimits { #[doc(hidden)] impl Into for InstanceLimits { fn into(self) -> wasmtime_runtime::InstanceLimits { - let Self { - count, - memory_reservation_size, - } = self; + let Self { count } = self; - wasmtime_runtime::InstanceLimits { - count, - memory_reservation_size, - } + wasmtime_runtime::InstanceLimits { count } } } @@ -916,11 +886,11 @@ impl Config { /// Sets the instance allocation strategy to use. /// - /// When using the pooling instance allocation strategy, all linear memories will be created as "static". - /// - /// This means the [`Config::static_memory_maximum_size`] and [`Config::static_memory_guard_size`] options - /// will be ignored in favor of [`InstanceLimits::memory_reservation_size`] when the pooling instance - /// allocation strategy is used. + /// When using the pooling instance allocation strategy, all linear memories + /// will be created as "static" and the + /// [`Config::static_memory_maximum_size`] and + /// [`Config::static_memory_guard_size`] options will be used to configure + /// the virtual memory allocations of linear memories. pub fn allocation_strategy(&mut self, strategy: InstanceAllocationStrategy) -> &mut Self { self.allocation_strategy = strategy; self @@ -929,6 +899,9 @@ impl Config { /// Configures the maximum size, in bytes, where a linear memory is /// considered static, above which it'll be considered dynamic. /// + /// > Note: this value has important performance ramifications, be sure to + /// > understand what this value does before tweaking it and benchmarking. + /// /// This function configures the threshold for wasm memories whether they're /// implemented as a dynamically relocatable chunk of memory or a statically /// located chunk of memory. The `max_size` parameter here is the size, in @@ -1004,6 +977,13 @@ impl Config { /// For 32-bit platforms this value defaults to 1GB. This means that wasm /// memories whose maximum size is less than 1GB will be allocated /// statically, otherwise they'll be considered dynamic. + /// + /// ## Static Memory and Pooled Instance Allocation + /// + /// When using the pooling instance allocator memories are considered to + /// always be static memories, they are never dynamic. This setting + /// configures the size of linear memory to reserve for each memory in the + /// pooling allocator. pub fn static_memory_maximum_size(&mut self, max_size: u64) -> &mut Self { let max_pages = max_size / u64::from(wasmtime_environ::WASM_PAGE_SIZE); self.tunables.static_memory_bound = u32::try_from(max_pages).unwrap_or(u32::max_value()); @@ -1013,6 +993,9 @@ impl Config { /// Configures the size, in bytes, of the guard region used at the end of a /// static memory's address space reservation. /// + /// > Note: this value has important performance ramifications, be sure to + /// > understand what this value does before tweaking it and benchmarking. + /// /// All WebAssembly loads/stores are bounds-checked and generate a trap if /// they're out-of-bounds. Loads and stores are often very performance /// critical, so we want the bounds check to be as fast as possible! @@ -1095,6 +1078,31 @@ impl Config { self } + /// Indicates whether a guard region is present before allocations of + /// linear memory. + /// + /// Guard regions before linear memories are never used during normal + /// operation of WebAssembly modules, even if they have out-of-bounds + /// loads. The only purpose for a preceding guard region in linear memory + /// is extra protection against possible bugs in code generators like + /// Cranelift. This setting does not affect performance in any way, but will + /// result in larger virtual memory reservations for linear memories (it + /// won't actually ever use more memory, just use more of the address + /// space). + /// + /// The size of the guard region before linear memory is the same as the + /// guard size that comes after linear memory, which is configured by + /// [`Config::static_memory_guard_size`] and + /// [`Config::dynamic_memory_guard_size`]. + /// + /// ## Default + /// + /// This value defaults to `true`. + pub fn guard_before_linear_memory(&mut self, guard: bool) -> &mut Self { + self.tunables.guard_before_linear_memory = guard; + self + } + /// Configure whether deserialized modules should validate version /// information. This only effects [`crate::Module::deserialize()`], which is /// used to load compiled code from trusted sources. When true, @@ -1149,6 +1157,7 @@ impl Config { module_limits.into(), instance_limits.into(), stack_size, + &self.tunables, )?)), } } @@ -1180,6 +1189,23 @@ impl fmt::Debug for Config { .field("wasm_simd", &self.features.simd) .field("wasm_multi_value", &self.features.multi_value) .field("wasm_module_linking", &self.features.module_linking) + .field( + "static_memory_maximum_size", + &(u64::from(self.tunables.static_memory_bound) + * u64::from(wasmtime_environ::WASM_PAGE_SIZE)), + ) + .field( + "static_memory_guard_size", + &self.tunables.static_memory_offset_guard_size, + ) + .field( + "dynamic_memory_guard_size", + &self.tunables.dynamic_memory_offset_guard_size, + ) + .field( + "guard_before_linear_memory", + &self.tunables.guard_before_linear_memory, + ) .field( "flags", &settings::Flags::new(self.flags.clone()).to_string(), diff --git a/crates/wasmtime/src/module/serialization.rs b/crates/wasmtime/src/module/serialization.rs index e7861f419a..dc04b110da 100644 --- a/crates/wasmtime/src/module/serialization.rs +++ b/crates/wasmtime/src/module/serialization.rs @@ -495,6 +495,7 @@ impl<'a> SerializedModule<'a> { interruptable, consume_fuel, static_memory_bound_is_maximum, + guard_before_linear_memory, } = self.tunables; let other = compiler.tunables(); @@ -531,6 +532,11 @@ impl<'a> SerializedModule<'a> { other.static_memory_bound_is_maximum, "pooling allocation support", )?; + Self::check_bool( + guard_before_linear_memory, + other.guard_before_linear_memory, + "guard before linear memory", + )?; Ok(()) } diff --git a/tests/all/async_functions.rs b/tests/all/async_functions.rs index deea640d5f..f81f08812f 100644 --- a/tests/all/async_functions.rs +++ b/tests/all/async_functions.rs @@ -428,11 +428,11 @@ fn async_with_pooling_stacks() { table_elements: 0, ..Default::default() }, - instance_limits: InstanceLimits { - count: 1, - memory_reservation_size: 1, - }, + instance_limits: InstanceLimits { count: 1 }, }); + config.dynamic_memory_guard_size(0); + config.static_memory_guard_size(0); + config.static_memory_maximum_size(65536); let engine = Engine::new(&config).unwrap(); let mut store = Store::new(&engine, ()); @@ -457,11 +457,11 @@ fn async_host_func_with_pooling_stacks() -> Result<()> { table_elements: 0, ..Default::default() }, - instance_limits: InstanceLimits { - count: 1, - memory_reservation_size: 1, - }, + instance_limits: InstanceLimits { count: 1 }, }); + config.dynamic_memory_guard_size(0); + config.static_memory_guard_size(0); + config.static_memory_maximum_size(65536); let mut store = Store::new(&Engine::new(&config)?, ()); let mut linker = Linker::new(store.engine()); diff --git a/tests/all/main.rs b/tests/all/main.rs index 6cb1207923..a4b8ea9807 100644 --- a/tests/all/main.rs +++ b/tests/all/main.rs @@ -15,6 +15,7 @@ mod instance; mod invoke_func_via_table; mod limits; mod linker; +mod memory; mod memory_creator; mod module; mod module_linking; diff --git a/tests/all/memory.rs b/tests/all/memory.rs new file mode 100644 index 0000000000..027cf71c38 --- /dev/null +++ b/tests/all/memory.rs @@ -0,0 +1,281 @@ +use anyhow::Result; +use rayon::prelude::*; +use wasmtime::*; + +fn module(engine: &Engine) -> Result { + let mut wat = format!("(module\n"); + wat.push_str("(import \"\" \"\" (memory 0))\n"); + for i in 0..=33 { + let offset = if i == 0 { + 0 + } else if i == 33 { + !0 + } else { + 1u32 << (i - 1) + }; + + for (width, instr) in [ + (1, &["i32.load8_s"][..]), + (2, &["i32.load16_s"]), + (4, &["i32.load" /*, "f32.load"*/]), + (8, &["i64.load" /*, "f64.load"*/]), + (16, &["v128.load"]), + ] + .iter() + { + for (j, instr) in instr.iter().enumerate() { + wat.push_str(&format!( + "(func (export \"{} {} v{}\") (param i32)\n", + width, offset, j + )); + wat.push_str("local.get 0\n"); + wat.push_str(instr); + wat.push_str(&format!(" offset={}\n", offset)); + wat.push_str("drop\n)"); + } + } + } + wat.push_str(")"); + Module::new(engine, &wat) +} + +struct TestFunc { + width: u32, + offset: u32, + func: TypedFunc, +} + +fn find_funcs(store: &mut Store<()>, instance: &Instance) -> Vec { + let list = instance + .exports(&mut *store) + .map(|export| { + let name = export.name(); + let mut parts = name.split_whitespace(); + ( + parts.next().unwrap().parse().unwrap(), + parts.next().unwrap().parse().unwrap(), + export.into_func().unwrap(), + ) + }) + .collect::>(); + list.into_iter() + .map(|(width, offset, func)| TestFunc { + width, + offset, + func: func.typed(&store).unwrap(), + }) + .collect() +} + +fn test_traps(store: &mut Store<()>, funcs: &[TestFunc], addr: u32, mem: &Memory) { + let mem_size = mem.data_size(&store) as u64; + for func in funcs { + let result = func.func.call(&mut *store, addr); + let base = u64::from(func.offset) + u64::from(addr); + let range = base..base + u64::from(func.width); + if range.start >= mem_size || range.end >= mem_size { + assert!(result.is_err()); + } else { + assert!(result.is_ok()); + } + } +} + +#[test] +fn offsets_static_dynamic_oh_my() -> Result<()> { + const GB: u64 = 1 << 30; + + let mut engines = Vec::new(); + let sizes = [0, 1 * GB, 4 * GB]; + for &static_memory_maximum_size in sizes.iter() { + for &guard_size in sizes.iter() { + for &guard_before_linear_memory in [true, false].iter() { + let mut config = Config::new(); + config.wasm_simd(true); + config.static_memory_maximum_size(static_memory_maximum_size); + config.dynamic_memory_guard_size(guard_size); + config.static_memory_guard_size(guard_size); + config.guard_before_linear_memory(guard_before_linear_memory); + engines.push(Engine::new(&config)?); + } + } + } + + engines.par_iter().for_each(|engine| { + let module = module(&engine).unwrap(); + + for limits in [Limits::new(1, Some(2)), Limits::new(1, None)].iter() { + let mut store = Store::new(&engine, ()); + let mem = Memory::new(&mut store, MemoryType::new(limits.clone())).unwrap(); + let instance = Instance::new(&mut store, &module, &[mem.into()]).unwrap(); + let funcs = find_funcs(&mut store, &instance); + + test_traps(&mut store, &funcs, 0, &mem); + test_traps(&mut store, &funcs, 65536, &mem); + test_traps(&mut store, &funcs, u32::MAX, &mem); + + mem.grow(&mut store, 1).unwrap(); + + test_traps(&mut store, &funcs, 0, &mem); + test_traps(&mut store, &funcs, 65536, &mem); + test_traps(&mut store, &funcs, u32::MAX, &mem); + } + }); + + Ok(()) +} + +#[test] +fn guards_present() -> Result<()> { + const GUARD_SIZE: u64 = 65536; + + let mut config = Config::new(); + config.static_memory_maximum_size(1 << 20); + config.dynamic_memory_guard_size(GUARD_SIZE); + config.static_memory_guard_size(GUARD_SIZE); + config.guard_before_linear_memory(true); + let engine = Engine::new(&config)?; + let mut store = Store::new(&engine, ()); + let static_mem = Memory::new(&mut store, MemoryType::new(Limits::new(1, Some(2))))?; + let dynamic_mem = Memory::new(&mut store, MemoryType::new(Limits::new(1, None)))?; + + let assert_guards = |store: &Store<()>| unsafe { + // guards before + println!("check pre-static-mem"); + assert_faults(static_mem.data_ptr(&store).offset(-(GUARD_SIZE as isize))); + println!("check pre-dynamic-mem"); + assert_faults(dynamic_mem.data_ptr(&store).offset(-(GUARD_SIZE as isize))); + + // guards after + println!("check post-static-mem"); + assert_faults( + static_mem + .data_ptr(&store) + .add(static_mem.data_size(&store)), + ); + println!("check post-dynamic-mem"); + assert_faults( + dynamic_mem + .data_ptr(&store) + .add(dynamic_mem.data_size(&store)), + ); + }; + assert_guards(&store); + // static memory should start with the second page unmapped + unsafe { + assert_faults(static_mem.data_ptr(&store).add(65536)); + } + println!("growing"); + static_mem.grow(&mut store, 1).unwrap(); + dynamic_mem.grow(&mut store, 1).unwrap(); + assert_guards(&store); + + Ok(()) +} + +#[test] +fn guards_present_pooling() -> Result<()> { + const GUARD_SIZE: u64 = 65536; + + let mut config = Config::new(); + config.static_memory_maximum_size(1 << 20); + config.dynamic_memory_guard_size(GUARD_SIZE); + config.static_memory_guard_size(GUARD_SIZE); + config.guard_before_linear_memory(true); + config.allocation_strategy(InstanceAllocationStrategy::Pooling { + strategy: PoolingAllocationStrategy::default(), + module_limits: ModuleLimits { + memory_pages: 10, + ..ModuleLimits::default() + }, + instance_limits: InstanceLimits { count: 2 }, + }); + let engine = Engine::new(&config)?; + + let mut store = Store::new(&engine, ()); + + let mem1 = { + let m = Module::new(&engine, "(module (memory (export \"\") 1 2))")?; + Instance::new(&mut store, &m, &[])? + .get_memory(&mut store, "") + .unwrap() + }; + let mem2 = { + let m = Module::new(&engine, "(module (memory (export \"\") 1))")?; + Instance::new(&mut store, &m, &[])? + .get_memory(&mut store, "") + .unwrap() + }; + + unsafe fn assert_guards(store: &Store<()>, mem: &Memory) { + // guards before + println!("check pre-mem"); + assert_faults(mem.data_ptr(&store).offset(-(GUARD_SIZE as isize))); + + // unmapped just after memory + println!("check mem"); + assert_faults(mem.data_ptr(&store).add(mem.data_size(&store))); + + // guards after memory + println!("check post-mem"); + assert_faults(mem.data_ptr(&store).add(1 << 20)); + } + unsafe { + assert_guards(&store, &mem1); + assert_guards(&store, &mem2); + println!("growing"); + mem1.grow(&mut store, 1).unwrap(); + mem2.grow(&mut store, 1).unwrap(); + assert_guards(&store, &mem1); + assert_guards(&store, &mem2); + } + + Ok(()) +} + +unsafe fn assert_faults(ptr: *mut u8) { + use std::io::Error; + #[cfg(unix)] + { + // I think things get real weird with uffd since there's a helper thread + // that's not cloned with `fork` below. Just skip this test for uffd + // since it's covered by tests elsewhere. + if cfg!(target_os = "linux") && cfg!(feature = "uffd") { + return; + } + // There's probably a faster way to do this here, but, uh, when in rome? + match libc::fork() { + 0 => { + *ptr = 4; + std::process::exit(0); + } + -1 => panic!("failed to fork: {}", Error::last_os_error()), + n => { + let mut status = 0; + assert!( + libc::waitpid(n, &mut status, 0) == n, + "failed to wait: {}", + Error::last_os_error() + ); + assert!(libc::WIFSIGNALED(status)); + } + } + } + #[cfg(windows)] + { + use winapi::um::memoryapi::*; + use winapi::um::winnt::*; + + let mut info = std::mem::MaybeUninit::uninit(); + let r = VirtualQuery( + ptr as *const _, + info.as_mut_ptr(), + std::mem::size_of_val(&info), + ); + if r == 0 { + panic!("failed to VirtualAlloc: {}", Error::last_os_error()); + } + let info = info.assume_init(); + assert_eq!(info.AllocationProtect, PAGE_NOACCESS); + } +} diff --git a/tests/all/pooling_allocator.rs b/tests/all/pooling_allocator.rs index a46f0e2402..783485f830 100644 --- a/tests/all/pooling_allocator.rs +++ b/tests/all/pooling_allocator.rs @@ -11,11 +11,11 @@ fn successful_instantiation() -> Result<()> { table_elements: 10, ..Default::default() }, - instance_limits: InstanceLimits { - count: 1, - memory_reservation_size: 1, - }, + instance_limits: InstanceLimits { count: 1 }, }); + config.dynamic_memory_guard_size(0); + config.static_memory_guard_size(0); + config.static_memory_maximum_size(65536); let engine = Engine::new(&config)?; let module = Module::new(&engine, r#"(module (memory 1) (table 10 funcref))"#)?; @@ -37,11 +37,11 @@ fn memory_limit() -> Result<()> { table_elements: 10, ..Default::default() }, - instance_limits: InstanceLimits { - count: 1, - memory_reservation_size: 196608, - }, + instance_limits: InstanceLimits { count: 1 }, }); + config.dynamic_memory_guard_size(0); + config.static_memory_guard_size(65536); + config.static_memory_maximum_size(3 * 65536); let engine = Engine::new(&config)?; @@ -197,11 +197,11 @@ fn memory_zeroed() -> Result<()> { table_elements: 0, ..Default::default() }, - instance_limits: InstanceLimits { - count: 1, - memory_reservation_size: 1, - }, + instance_limits: InstanceLimits { count: 1 }, }); + config.dynamic_memory_guard_size(0); + config.static_memory_guard_size(0); + config.static_memory_maximum_size(65536); let engine = Engine::new(&config)?; @@ -240,11 +240,11 @@ fn table_limit() -> Result<()> { table_elements: TABLE_ELEMENTS, ..Default::default() }, - instance_limits: InstanceLimits { - count: 1, - memory_reservation_size: 1, - }, + instance_limits: InstanceLimits { count: 1 }, }); + config.dynamic_memory_guard_size(0); + config.static_memory_guard_size(0); + config.static_memory_maximum_size(65536); let engine = Engine::new(&config)?; @@ -367,11 +367,11 @@ fn table_zeroed() -> Result<()> { table_elements: 10, ..Default::default() }, - instance_limits: InstanceLimits { - count: 1, - memory_reservation_size: 1, - }, + instance_limits: InstanceLimits { count: 1 }, }); + config.dynamic_memory_guard_size(0); + config.static_memory_guard_size(0); + config.static_memory_maximum_size(65536); let engine = Engine::new(&config)?; @@ -413,9 +413,11 @@ fn instantiation_limit() -> Result<()> { }, instance_limits: InstanceLimits { count: INSTANCE_LIMIT, - memory_reservation_size: 1, }, }); + config.dynamic_memory_guard_size(0); + config.static_memory_guard_size(0); + config.static_memory_maximum_size(65536); let engine = Engine::new(&config)?; let module = Module::new(&engine, r#"(module)"#)?;