Merge pull request #3697 from cfallin/memfd-cow

memfd/madvise-based CoW pooling allocator
3 years ago · 99ed8cc9be
27 changed files with 1331 additions and 148 deletions
--- a/.github/workflows/main.yml
+++ b/.github/workflows/main.yml
@ -310,7 +310,7 @@ jobs:
      env:
        RUST_BACKTRACE: 1

-    # Test uffd functionality on Linux
+    # Test Linux-specific functionality
    - run: |
        cargo test --features uffd -p wasmtime-runtime instance::allocator::pooling
        cargo test --features uffd -p wasmtime-cli pooling_allocator
--- a/Cargo.lock
+++ b/Cargo.lock
@ -1624,6 +1624,15 @@ version = "2.4.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "308cc39be01b73d0d18f82a0e7b2a3df85245f84af96fdddc5d202d27e47b86a"

+[[package]]
+name = "memfd"
+version = "0.4.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f6627dc657574b49d6ad27105ed671822be56e0d2547d413bfbf3e8d8fa92e7a"
+dependencies = [
+ "libc",
+]
+
 [[package]]
 name = "memmap2"
 version = "0.2.3"
@ -3611,6 +3620,7 @@ dependencies = [
 "libc",
 "log",
 "mach",
+ "memfd",
 "memoffset",
 "more-asserts",
 "rand 0.8.3",
--- a/Cargo.toml
+++ b/Cargo.toml
@ -21,7 +21,7 @@ path = "src/bin/wasmtime.rs"
 doc = false

 [dependencies]
-wasmtime = { path = "crates/wasmtime", version = "0.33.0", default-features = false, features = ['cache', 'cranelift'] }
+wasmtime = { path = "crates/wasmtime", version = "0.33.0", default-features = false, features = ['cache', 'cranelift', 'pooling-allocator', 'memfd'] }
 wasmtime-cache = { path = "crates/cache", version = "=0.33.0" }
 wasmtime-cranelift = { path = "crates/cranelift", version = "=0.33.0" }
 wasmtime-environ = { path = "crates/environ", version = "=0.33.0" }
@ -95,6 +95,7 @@ vtune = ["wasmtime/vtune"]
 wasi-crypto = ["wasmtime-wasi-crypto"]
 wasi-nn = ["wasmtime-wasi-nn"]
 uffd = ["wasmtime/uffd"]
+pooling-allocator = ["wasmtime/pooling-allocator"]
 all-arch = ["wasmtime/all-arch"]
 posix-signals-on-macos = ["wasmtime/posix-signals-on-macos"]

--- a/crates/environ/src/module.rs
+++ b/crates/environ/src/module.rs
@ -95,6 +95,19 @@ impl MemoryPlan {
            },
        }
    }
+
+    /// Determine whether a data segment (memory initializer) is
+    /// possibly out-of-bounds. Returns `true` if the initializer has a
+    /// dynamic location and this question cannot be resolved
+    /// pre-instantiation; hence, this method's result should not be
+    /// used to signal an error, only to exit optimized/simple fastpaths.
+    pub fn initializer_possibly_out_of_bounds(&self, init: &MemoryInitializer) -> bool {
+        match init.end() {
+            // Not statically known, so possibly out of bounds (we can't guarantee in-bounds).
+            None => true,
+            Some(end) => end > self.memory.minimum * (WASM_PAGE_SIZE as u64),
+        }
+    }
 }

 /// A WebAssembly linear memory initializer.
@ -113,6 +126,16 @@ pub struct MemoryInitializer {
    pub data: Range<u32>,
 }

+impl MemoryInitializer {
+    /// If this initializer has a definite, static, non-overflowed end address, return it.
+    pub fn end(&self) -> Option<u64> {
+        if self.base.is_some() {
+            return None;
+        }
+        self.offset.checked_add(self.data.len() as u64)
+    }
+}
+
 /// The type of WebAssembly linear memory initialization to use for a module.
 #[derive(Clone, Debug, Serialize, Deserialize)]
 pub enum MemoryInitialization {
--- a/crates/jit/src/instantiate.rs
+++ b/crates/jit/src/instantiate.rs
@ -19,7 +19,10 @@ use wasmtime_environ::{
    StackMapInformation, Trampoline, Tunables, WasmFuncType, ELF_WASMTIME_ADDRMAP,
    ELF_WASMTIME_TRAPS,
 };
-use wasmtime_runtime::{GdbJitImageRegistration, InstantiationError, VMFunctionBody, VMTrampoline};
+use wasmtime_runtime::{
+    CompiledModuleId, CompiledModuleIdAllocator, GdbJitImageRegistration, InstantiationError,
+    VMFunctionBody, VMTrampoline,
+};

 /// This is the name of the section in the final ELF image which contains
 /// concatenated data segments from the original wasm module.
@ -248,6 +251,8 @@ pub struct CompiledModule {
    code: Range<usize>,
    code_memory: CodeMemory,
    dbg_jit_registration: Option<GdbJitImageRegistration>,
+    /// A unique ID used to register this module with the engine.
+    unique_id: CompiledModuleId,
 }

 impl CompiledModule {
@ -271,6 +276,7 @@ impl CompiledModule {
        mmap: MmapVec,
        info: Option<CompiledModuleInfo>,
        profiler: &dyn ProfilingAgent,
+        id_allocator: &CompiledModuleIdAllocator,
    ) -> Result<Arc<Self>> {
        // Transfer ownership of `obj` to a `CodeMemory` object which will
        // manage permissions, such as the executable bit. Once it's located
@ -312,6 +318,7 @@ impl CompiledModule {
            dbg_jit_registration: None,
            code_memory,
            meta: info.meta,
+            unique_id: id_allocator.alloc(),
        };
        ret.register_debug_and_profiling(profiler)?;

@ -333,6 +340,12 @@ impl CompiledModule {
        Ok(())
    }

+    /// Get this module's unique ID. It is unique with respect to a
+    /// single allocator (which is ordinarily held on a Wasm engine).
+    pub fn unique_id(&self) -> CompiledModuleId {
+        self.unique_id
+    }
+
    /// Returns the underlying memory which contains the compiled module's
    /// image.
    pub fn mmap(&self) -> &MmapVec {
--- a/crates/runtime/Cargo.toml
+++ b/crates/runtime/Cargo.toml
@ -25,6 +25,7 @@ backtrace = "0.3.61"
 lazy_static = "1.3.0"
 rand = "0.8.3"
 anyhow = "1.0.38"
+memfd = { version = "0.4.1", optional = true }

 [target.'cfg(target_os = "macos")'.dependencies]
 mach = "0.3.2"
--- a/crates/runtime/build.rs
+++ b/crates/runtime/build.rs
@ -10,4 +10,14 @@ fn main() {
        )
        .file("src/helpers.c")
        .compile("wasmtime-helpers");
+
+    // Check to see if we are on Linux and the `memfd` feature is
+    // active. If so, enable the `memfd` rustc cfg so `#[cfg(memfd)]`
+    // will work.
+    let os = env::var("CARGO_CFG_TARGET_OS").unwrap();
+    let is_memfd = env::var("CARGO_FEATURE_MEMFD").is_ok();
+    let is_uffd = env::var("CARGO_FEATURE_UFFD").is_ok();
+    if &os == "linux" && is_memfd && !is_uffd {
+        println!("cargo:rustc-cfg=memfd");
+    }
 }
--- a/crates/runtime/src/instance.rs
+++ b/crates/runtime/src/instance.rs
@ -97,6 +97,29 @@ pub(crate) struct Instance {

 #[allow(clippy::cast_ptr_alignment)]
 impl Instance {
+    /// Helper for allocators; not a public API.
+    pub(crate) fn create_raw(
+        module: &Arc<Module>,
+        wasm_data: &'static [u8],
+        memories: PrimaryMap<DefinedMemoryIndex, Memory>,
+        tables: PrimaryMap<DefinedTableIndex, Table>,
+        host_state: Box<dyn Any + Send + Sync>,
+    ) -> Instance {
+        Instance {
+            module: module.clone(),
+            offsets: VMOffsets::new(HostPtr, &module),
+            memories,
+            tables,
+            dropped_elements: EntitySet::with_capacity(module.passive_elements.len()),
+            dropped_data: EntitySet::with_capacity(module.passive_data_map.len()),
+            host_state,
+            wasm_data,
+            vmctx: VMContext {
+                _marker: std::marker::PhantomPinned,
+            },
+        }
+    }
+
    /// Helper function to access various locations offset from our `*mut
    /// VMContext` object.
    unsafe fn vmctx_plus_offset<T>(&self, offset: u32) -> *mut T {
--- a/crates/runtime/src/instance/allocator.rs
+++ b/crates/runtime/src/instance/allocator.rs
@ -4,23 +4,22 @@ use crate::memory::{DefaultMemoryCreator, Memory};
 use crate::table::Table;
 use crate::traphandlers::Trap;
 use crate::vmcontext::{
-    VMBuiltinFunctionsArray, VMCallerCheckedAnyfunc, VMContext, VMGlobalDefinition,
-    VMSharedSignatureIndex,
+    VMBuiltinFunctionsArray, VMCallerCheckedAnyfunc, VMGlobalDefinition, VMSharedSignatureIndex,
 };
+use crate::ModuleMemFds;
 use crate::Store;
 use anyhow::Result;
 use std::alloc;
 use std::any::Any;
 use std::convert::TryFrom;
-use std::marker;
 use std::ptr::{self, NonNull};
 use std::slice;
 use std::sync::Arc;
 use thiserror::Error;
 use wasmtime_environ::{
-    DefinedFuncIndex, DefinedMemoryIndex, DefinedTableIndex, EntityRef, EntitySet, FunctionInfo,
-    GlobalInit, HostPtr, MemoryInitialization, MemoryInitializer, Module, ModuleType, PrimaryMap,
-    SignatureIndex, TableInitializer, TrapCode, VMOffsets, WasmType, WASM_PAGE_SIZE,
+    DefinedFuncIndex, DefinedMemoryIndex, DefinedTableIndex, EntityRef, FunctionInfo, GlobalInit,
+    MemoryInitialization, MemoryInitializer, Module, ModuleType, PrimaryMap, SignatureIndex,
+    TableInitializer, TrapCode, WasmType, WASM_PAGE_SIZE,
 };

 #[cfg(feature = "pooling-allocator")]
@ -39,6 +38,9 @@ pub struct InstanceAllocationRequest<'a> {
    /// The base address of where JIT functions are located.
    pub image_base: usize,

+    /// If using MemFD-based memories, the backing MemFDs.
+    pub memfds: Option<Arc<ModuleMemFds>>,
+
    /// Descriptors about each compiled function, such as the offset from
    /// `image_base`.
    pub functions: &'a PrimaryMap<DefinedFuncIndex, FunctionInfo>,
@ -376,9 +378,23 @@ fn check_memory_init_bounds(

 fn initialize_memories(
    instance: &mut Instance,
+    module: &Module,
    initializers: &[MemoryInitializer],
 ) -> Result<(), InstantiationError> {
    for init in initializers {
+        // Check whether we can skip all initializers (due to, e.g.,
+        // memfd).
+        let memory = init.memory_index;
+        if let Some(defined_index) = module.defined_memory_index(memory) {
+            // We can only skip if there is actually a MemFD image. In
+            // some situations the MemFD image creation code will bail
+            // (e.g. due to an out of bounds data segment) and so we
+            // need to fall back on the usual initialization below.
+            if !instance.memories[defined_index].needs_init() {
+                continue;
+            }
+        }
+
        instance
            .memory_init_segment(
                init.memory_index,
@ -432,6 +448,13 @@ fn initialize_instance(
    match &module.memory_initialization {
        MemoryInitialization::Paged { map, out_of_bounds } => {
            for (index, pages) in map {
+                // Check whether the memory actually needs
+                // initialization. It may not if we're using a CoW
+                // mechanism like memfd.
+                if !instance.memories[index].needs_init() {
+                    continue;
+                }
+
                let memory = instance.memory(index);
                let slice =
                    unsafe { slice::from_raw_parts_mut(memory.base, memory.current_length) };
@ -453,7 +476,7 @@ fn initialize_instance(
            }
        }
        MemoryInitialization::Segmented(initializers) => {
-            initialize_memories(instance, initializers)?;
+            initialize_memories(instance, module, initializers)?;
        }
    }

@ -646,6 +669,7 @@ impl OnDemandInstanceAllocator {
        &self,
        module: &Module,
        store: &mut StorePtr,
+        memfds: &Option<Arc<ModuleMemFds>>,
    ) -> Result<PrimaryMap<DefinedMemoryIndex, Memory>, InstantiationError> {
        let creator = self
            .mem_creator
@ -654,13 +678,26 @@ impl OnDemandInstanceAllocator {
        let num_imports = module.num_imported_memories;
        let mut memories: PrimaryMap<DefinedMemoryIndex, _> =
            PrimaryMap::with_capacity(module.memory_plans.len() - num_imports);
-        for plan in &module.memory_plans.values().as_slice()[num_imports..] {
+        for (memory_idx, plan) in module.memory_plans.iter().skip(num_imports) {
+            // Create a MemFdSlot if there is an image for this memory.
+            let defined_memory_idx = module
+                .defined_memory_index(memory_idx)
+                .expect("Skipped imports, should never be None");
+            let memfd_image = memfds
+                .as_ref()
+                .and_then(|memfds| memfds.get_memory_image(defined_memory_idx));
+
            memories.push(
-                Memory::new_dynamic(plan, creator, unsafe {
-                    store
-                        .get()
-                        .expect("if module has memory plans, store is not empty")
-                })
+                Memory::new_dynamic(
+                    plan,
+                    creator,
+                    unsafe {
+                        store
+                            .get()
+                            .expect("if module has memory plans, store is not empty")
+                    },
+                    memfd_image,
+                )
                .map_err(InstantiationError::Resource)?,
            );
        }
@ -683,25 +720,14 @@ unsafe impl InstanceAllocator for OnDemandInstanceAllocator {
        &self,
        mut req: InstanceAllocationRequest,
    ) -> Result<InstanceHandle, InstantiationError> {
-        let memories = self.create_memories(&req.module, &mut req.store)?;
+        let memories = self.create_memories(&req.module, &mut req.store, &req.memfds)?;
        let tables = Self::create_tables(&req.module, &mut req.store)?;

        let host_state = std::mem::replace(&mut req.host_state, Box::new(()));

        let mut handle = {
-            let instance = Instance {
-                module: req.module.clone(),
-                offsets: VMOffsets::new(HostPtr, &req.module),
-                memories,
-                tables,
-                dropped_elements: EntitySet::with_capacity(req.module.passive_elements.len()),
-                dropped_data: EntitySet::with_capacity(req.module.passive_data_map.len()),
-                host_state,
-                wasm_data: &*req.wasm_data,
-                vmctx: VMContext {
-                    _marker: marker::PhantomPinned,
-                },
-            };
+            let instance =
+                Instance::create_raw(&req.module, &*req.wasm_data, memories, tables, host_state);
            let layout = instance.alloc_layout();
            let instance_ptr = alloc::alloc(layout) as *mut Instance;
            if instance_ptr.is_null() {
--- a/crates/runtime/src/instance/allocator/pooling.rs
+++ b/crates/runtime/src/instance/allocator/pooling.rs
@ -11,15 +11,17 @@ use super::{
    initialize_instance, initialize_vmcontext, InstanceAllocationRequest, InstanceAllocator,
    InstanceHandle, InstantiationError,
 };
-use crate::{instance::Instance, Memory, Mmap, Table, VMContext};
+use crate::MemFdSlot;
+use crate::{instance::Instance, Memory, Mmap, ModuleMemFds, Table};
 use anyhow::{anyhow, bail, Context, Result};
+use libc::c_void;
 use rand::Rng;
 use std::convert::TryFrom;
-use std::marker;
 use std::mem;
-use std::sync::{Arc, Mutex};
+use std::sync::Arc;
+use std::sync::Mutex;
 use wasmtime_environ::{
-    EntitySet, HostPtr, MemoryStyle, Module, PrimaryMap, Tunables, VMOffsets, VMOffsetsFields,
+    HostPtr, MemoryIndex, MemoryStyle, Module, PrimaryMap, Tunables, VMOffsets, VMOffsetsFields,
    WASM_PAGE_SIZE,
 };

@ -284,7 +286,6 @@ struct InstancePool {
    free_list: Mutex<Vec<usize>>,
    memories: MemoryPool,
    tables: TablePool,
-    empty_module: Arc<Module>,
 }

 impl InstancePool {
@ -332,14 +333,8 @@ impl InstancePool {
            free_list: Mutex::new((0..max_instances).collect()),
            memories: MemoryPool::new(module_limits, instance_limits, tunables)?,
            tables: TablePool::new(module_limits, instance_limits)?,
-            empty_module: Arc::new(Module::default()),
        };

-        // Use a default module to initialize the instances to start
-        for i in 0..instance_limits.count as usize {
-            pool.initialize(module_limits, i);
-        }
-
        Ok(pool)
    }

@ -348,41 +343,26 @@ impl InstancePool {
        &mut *(self.mapping.as_mut_ptr().add(index * self.instance_size) as *mut Instance)
    }

-    fn initialize(&self, limits: &ModuleLimits, index: usize) {
-        unsafe {
-            let instance = self.instance(index);
-
-            // Write a default instance with preallocated memory/table map storage to the ptr
-            std::ptr::write(
-                instance as _,
-                Instance {
-                    module: self.empty_module.clone(),
-                    offsets: VMOffsets::new(HostPtr, &self.empty_module),
-                    memories: PrimaryMap::with_capacity(limits.memories as usize),
-                    tables: PrimaryMap::with_capacity(limits.tables as usize),
-                    dropped_elements: EntitySet::new(),
-                    dropped_data: EntitySet::new(),
-                    host_state: Box::new(()),
-                    wasm_data: &[],
-                    vmctx: VMContext {
-                        _marker: marker::PhantomPinned,
-                    },
-                },
-            );
-        }
-    }
-
    unsafe fn setup_instance(
        &self,
        index: usize,
        mut req: InstanceAllocationRequest,
    ) -> Result<InstanceHandle, InstantiationError> {
-        let instance = self.instance(index);
+        let host_state = std::mem::replace(&mut req.host_state, Box::new(()));
+        let instance_data = Instance::create_raw(
+            &req.module,
+            &*req.wasm_data,
+            PrimaryMap::default(),
+            PrimaryMap::default(),
+            host_state,
+        );

-        instance.module = req.module.clone();
-        instance.offsets = VMOffsets::new(HostPtr, instance.module.as_ref());
-        instance.host_state = std::mem::replace(&mut req.host_state, Box::new(()));
-        instance.wasm_data = &*req.wasm_data;
+        // Instances are uninitialized memory at first; we need to
+        // write an empty but initialized `Instance` struct into the
+        // chosen slot before we do anything else with it. (This is
+        // paired with a `drop_in_place` in deallocate below.)
+        let instance = self.instance(index);
+        std::ptr::write(instance as _, instance_data);

        // set_instance_memories and _tables will need the store before we can completely
        // initialize the vmcontext.
@ -391,8 +371,10 @@ impl InstancePool {
        }

        Self::set_instance_memories(
+            index,
            instance,
-            self.memories.get(index),
+            &self.memories,
+            &req.memfds,
            self.memories.max_wasm_pages,
        )?;

@ -448,20 +430,44 @@ impl InstancePool {
        let instance = unsafe { &mut *handle.instance };

        // Decommit any linear memories that were used
-        for (memory, base) in instance.memories.values_mut().zip(self.memories.get(index)) {
+        for ((def_mem_idx, memory), base) in
+            instance.memories.iter_mut().zip(self.memories.get(index))
+        {
            let mut memory = mem::take(memory);
            debug_assert!(memory.is_static());

-            // Reset any faulted guard pages as the physical memory may be reused for another instance in the future
-            #[cfg(all(feature = "uffd", target_os = "linux"))]
-            memory
-                .reset_guard_pages()
-                .expect("failed to reset guard pages");
-            drop(&mut memory); // require mutable on all platforms, not just uffd
-
-            let size = memory.byte_size();
-            drop(memory);
-            decommit_memory_pages(base, size).expect("failed to decommit linear memory pages");
+            match memory {
+                Memory::Static {
+                    memfd_slot: Some(mut memfd_slot),
+                    ..
+                } => {
+                    let mem_idx = instance.module.memory_index(def_mem_idx);
+                    // If there was any error clearing the memfd, just
+                    // drop it here, and let the drop handler for the
+                    // MemFdSlot unmap in a way that retains the
+                    // address space reservation.
+                    if memfd_slot.clear_and_remain_ready().is_ok() {
+                        self.memories.return_memfd_slot(index, mem_idx, memfd_slot);
+                    }
+                }
+
+                _ => {
+                    // Reset any faulted guard pages as the physical
+                    // memory may be reused for another instance in
+                    // the future.
+                    #[cfg(all(feature = "uffd", target_os = "linux"))]
+                    memory
+                        .reset_guard_pages()
+                        .expect("failed to reset guard pages");
+                    // require mutable on all platforms, not just uffd
+                    drop(&mut memory);
+
+                    let size = memory.byte_size();
+                    drop(memory);
+                    decommit_memory_pages(base, size)
+                        .expect("failed to decommit linear memory pages");
+                }
+            }
        }

        instance.memories.clear();
@ -481,50 +487,81 @@ impl InstancePool {
            decommit_table_pages(base, size).expect("failed to decommit table pages");
        }

-        instance.tables.clear();
-        instance.dropped_elements.clear();
-
-        // Drop all `global` values which need a destructor, such as externref
-        // values which now need their reference count dropped.
-        instance.drop_globals();
-
-        // Drop any host state
-        instance.host_state = Box::new(());
-
-        // And finally reset the module/offsets back to their original. This
-        // should put everything back in a relatively pristine state for each
-        // fresh allocation later on.
-        instance.module = self.empty_module.clone();
-        instance.offsets = VMOffsets::new(HostPtr, &self.empty_module);
-        instance.wasm_data = &[];
+        // We've now done all of the pooling-allocator-specific
+        // teardown, so we can drop the Instance and let destructors
+        // take care of any other fields (host state, globals, etc.).
+        unsafe {
+            std::ptr::drop_in_place(instance as *mut _);
+        }
+        // The instance is now uninitialized memory and cannot be
+        // touched again until we write a fresh Instance in-place with
+        // std::ptr::write in allocate() above.

        self.free_list.lock().unwrap().push(index);
    }

    fn set_instance_memories(
+        instance_idx: usize,
        instance: &mut Instance,
-        mut memories: impl Iterator<Item = *mut u8>,
+        memories: &MemoryPool,
+        maybe_memfds: &Option<Arc<ModuleMemFds>>,
        max_pages: u64,
    ) -> Result<(), InstantiationError> {
        let module = instance.module.as_ref();

        debug_assert!(instance.memories.is_empty());

-        for plan in
-            (&module.memory_plans.values().as_slice()[module.num_imported_memories..]).iter()
+        for (memory_index, plan) in module
+            .memory_plans
+            .iter()
+            .skip(module.num_imported_memories)
        {
+            let defined_index = module
+                .defined_memory_index(memory_index)
+                .expect("should be a defined memory since we skipped imported ones");
+
            let memory = unsafe {
                std::slice::from_raw_parts_mut(
-                    memories.next().unwrap(),
+                    memories.get_base(instance_idx, memory_index),
                    (max_pages as usize) * (WASM_PAGE_SIZE as usize),
                )
            };
-            instance.memories.push(
-                Memory::new_static(plan, memory, commit_memory_pages, unsafe {
-                    &mut *instance.store()
-                })
-                .map_err(InstantiationError::Resource)?,
-            );
+
+            if let Some(memfds) = maybe_memfds {
+                let image = memfds.get_memory_image(defined_index);
+                let mut slot = memories.take_memfd_slot(instance_idx, memory_index);
+                let initial_size = plan.memory.minimum * WASM_PAGE_SIZE as u64;
+
+                // If instantiation fails, we can propagate the error
+                // upward and drop the slot. This will cause the Drop
+                // handler to attempt to map the range with PROT_NONE
+                // memory, to reserve the space while releasing any
+                // stale mappings. The next use of this slot will then
+                // create a new MemFdSlot that will try to map over
+                // this, returning errors as well if the mapping
+                // errors persist. The unmap-on-drop is best effort;
+                // if it fails, then we can still soundly continue
+                // using the rest of the pool and allowing the rest of
+                // the process to continue, because we never perform a
+                // mmap that would leave an open space for someone
+                // else to come in and map something.
+                slot.instantiate(initial_size as usize, image)
+                    .map_err(|e| InstantiationError::Resource(e.into()))?;
+
+                instance.memories.push(
+                    Memory::new_static(plan, memory, None, Some(slot), unsafe {
+                        &mut *instance.store()
+                    })
+                    .map_err(InstantiationError::Resource)?,
+                );
+            } else {
+                instance.memories.push(
+                    Memory::new_static(plan, memory, Some(commit_memory_pages), None, unsafe {
+                        &mut *instance.store()
+                    })
+                    .map_err(InstantiationError::Resource)?,
+                );
+            }
        }

        debug_assert!(instance.dropped_data.is_empty());
@ -566,17 +603,6 @@ impl InstancePool {
    }
 }

-impl Drop for InstancePool {
-    fn drop(&mut self) {
-        unsafe {
-            for i in 0..self.max_instances {
-                let ptr = self.mapping.as_mut_ptr().add(i * self.instance_size) as *mut Instance;
-                std::ptr::drop_in_place(ptr);
-            }
-        }
-    }
-}
-
 /// Represents a pool of WebAssembly linear memories.
 ///
 /// A linear memory is divided into accessible pages and guard pages.
@ -589,6 +615,10 @@ impl Drop for InstancePool {
 #[derive(Debug)]
 struct MemoryPool {
    mapping: Mmap,
+    // If using the memfd allocation scheme, the MemFd slots. We
+    // dynamically transfer ownership of a slot to a Memory when in
+    // use.
+    memfd_slots: Vec<Mutex<Option<MemFdSlot>>>,
    // The size, in bytes, of each linear memory's reservation plus the guard
    // region allocated for it.
    memory_size: usize,
@ -673,8 +703,18 @@ impl MemoryPool {
        let mapping = Mmap::accessible_reserved(0, allocation_size)
            .context("failed to create memory pool mapping")?;

+        let num_memfd_slots = if cfg!(memfd) {
+            max_instances * max_memories
+        } else {
+            0
+        };
+        let memfd_slots: Vec<_> = std::iter::repeat_with(|| Mutex::new(None))
+            .take(num_memfd_slots)
+            .collect();
+
        let pool = Self {
            mapping,
+            memfd_slots,
            memory_size,
            initial_memory_offset,
            max_memories,
@ -689,17 +729,53 @@ impl MemoryPool {
        Ok(pool)
    }

-    fn get(&self, instance_index: usize) -> impl Iterator<Item = *mut u8> {
+    fn get_base(&self, instance_index: usize, memory_index: MemoryIndex) -> *mut u8 {
        debug_assert!(instance_index < self.max_instances);
+        let memory_index = memory_index.as_u32() as usize;
+        debug_assert!(memory_index < self.max_memories);
+        let idx = instance_index * self.max_memories + memory_index;
+        let offset = self.initial_memory_offset + idx * self.memory_size;
+        unsafe { self.mapping.as_mut_ptr().offset(offset as isize) }
+    }

-        let base: *mut u8 = unsafe {
-            self.mapping.as_mut_ptr().add(
-                self.initial_memory_offset + instance_index * self.memory_size * self.max_memories,
-            ) as _
-        };
+    fn get<'a>(&'a self, instance_index: usize) -> impl Iterator<Item = *mut u8> + 'a {
+        (0..self.max_memories)
+            .map(move |i| self.get_base(instance_index, MemoryIndex::from_u32(i as u32)))
+    }

-        let size = self.memory_size;
-        (0..self.max_memories).map(move |i| unsafe { base.add(i * size) })
+    /// Take ownership of the given memfd slot. Must be returned via
+    /// `return_memfd_slot` when the instance is done using it.
+    fn take_memfd_slot(&self, instance_index: usize, memory_index: MemoryIndex) -> MemFdSlot {
+        let idx = instance_index * self.max_memories + (memory_index.as_u32() as usize);
+        let maybe_slot = self.memfd_slots[idx].lock().unwrap().take();
+
+        maybe_slot.unwrap_or_else(|| {
+            MemFdSlot::create(
+                self.get_base(instance_index, memory_index) as *mut c_void,
+                self.memory_size,
+            )
+        })
+    }
+
+    /// Return ownership of the given memfd slot.
+    fn return_memfd_slot(&self, instance_index: usize, memory_index: MemoryIndex, slot: MemFdSlot) {
+        assert!(!slot.is_dirty());
+        let idx = instance_index * self.max_memories + (memory_index.as_u32() as usize);
+        *self.memfd_slots[idx].lock().unwrap() = Some(slot);
+    }
+}
+
+impl Drop for MemoryPool {
+    fn drop(&mut self) {
+        // Clear the `clear_no_drop` flag (i.e., ask to *not* clear on
+        // drop) for all MemFdSlots, and then drop them here. This is
+        // valid because the one `Mmap` that covers the whole region
+        // can just do its one munmap.
+        for mut memfd in std::mem::take(&mut self.memfd_slots) {
+            if let Some(memfd_slot) = memfd.get_mut().unwrap() {
+                memfd_slot.no_clear_on_drop();
+            }
+        }
    }
 }

@ -1413,6 +1489,7 @@ mod test {
                            host_state: Box::new(()),
                            store: StorePtr::empty(),
                            wasm_data: &[],
+                            memfds: None,
                        },
                    )
                    .expect("allocation should succeed"),
@ -1437,6 +1514,7 @@ mod test {
                host_state: Box::new(()),
                store: StorePtr::empty(),
                wasm_data: &[],
+                memfds: None,
            },
        ) {
            Err(InstantiationError::Limit(3)) => {}
--- a/crates/runtime/src/instance/allocator/pooling/uffd.rs
+++ b/crates/runtime/src/instance/allocator/pooling/uffd.rs
@ -577,6 +577,7 @@ mod test {
                            PoolingAllocationStrategy::Random,
                            InstanceAllocationRequest {
                                module: module.clone(),
+                                memfds: None,
                                image_base: 0,
                                functions,
                                imports: Imports {
--- a/crates/runtime/src/lib.rs
+++ b/crates/runtime/src/lib.rs
@ -63,6 +63,19 @@ pub use crate::vmcontext::{
    VMSharedSignatureIndex, VMTableDefinition, VMTableImport, VMTrampoline, ValRaw,
 };

+mod module_id;
+pub use module_id::{CompiledModuleId, CompiledModuleIdAllocator};
+
+#[cfg(memfd)]
+mod memfd;
+#[cfg(memfd)]
+pub use crate::memfd::{MemFdSlot, MemoryMemFd, ModuleMemFds};
+
+#[cfg(not(memfd))]
+mod memfd_disabled;
+#[cfg(not(memfd))]
+pub use crate::memfd_disabled::{MemFdSlot, MemoryMemFd, ModuleMemFds};
+
 /// Version number of this crate.
 pub const VERSION: &str = env!("CARGO_PKG_VERSION");

--- a/crates/runtime/src/memfd.rs
+++ b/crates/runtime/src/memfd.rs
@ -0,0 +1,695 @@
+//! memfd support: creation of backing images for modules, and logic
+//! to support mapping these backing images into memory.
+
+use crate::InstantiationError;
+use anyhow::Result;
+use libc::c_void;
+use memfd::{Memfd, MemfdOptions};
+use rustix::fd::AsRawFd;
+use rustix::fs::FileExt;
+use std::sync::Arc;
+use std::{convert::TryFrom, ops::Range};
+use wasmtime_environ::{
+    DefinedMemoryIndex, MemoryInitialization, MemoryInitializer, MemoryPlan, Module, PrimaryMap,
+};
+
+/// MemFDs containing backing images for certain memories in a module.
+///
+/// This is meant to be built once, when a module is first
+/// loaded/constructed, and then used many times for instantiation.
+pub struct ModuleMemFds {
+    memories: PrimaryMap<DefinedMemoryIndex, Option<Arc<MemoryMemFd>>>,
+}
+
+const MAX_MEMFD_IMAGE_SIZE: u64 = 1024 * 1024 * 1024; // limit to 1GiB.
+
+impl ModuleMemFds {
+    pub(crate) fn get_memory_image(
+        &self,
+        defined_index: DefinedMemoryIndex,
+    ) -> Option<&Arc<MemoryMemFd>> {
+        self.memories[defined_index].as_ref()
+    }
+}
+
+/// One backing image for one memory.
+#[derive(Debug)]
+pub struct MemoryMemFd {
+    /// The actual memfd image: an anonymous file in memory which we
+    /// use as the backing content for a copy-on-write (CoW) mapping
+    /// in the memory region.
+    pub fd: Memfd,
+    /// Length of image. Note that initial memory size may be larger;
+    /// leading and trailing zeroes are truncated (handled by
+    /// anonymous backing memfd).
+    ///
+    /// Must be a multiple of the system page size.
+    pub len: usize,
+    /// Image starts this many bytes into heap space. Note that the
+    /// memfd's offsets are always equal to the heap offsets, so we
+    /// map at an offset into the fd as well. (This simplifies
+    /// construction.)
+    ///
+    /// Must be a multiple of the system page size.
+    pub offset: usize,
+}
+
+fn unsupported_initializer(segment: &MemoryInitializer, plan: &MemoryPlan) -> bool {
+    // If the segment has a base that is dynamically determined
+    // (by a global value, which may be a function of an imported
+    // module, for example), then we cannot build a single static
+    // image that is used for every instantiation. So we skip this
+    // memory entirely.
+    let end = match segment.end() {
+        None => {
+            return true;
+        }
+        Some(end) => end,
+    };
+
+    // Cannot be out-of-bounds. If there is a *possibility* it may
+    // be, then we just fall back on ordinary initialization.
+    if plan.initializer_possibly_out_of_bounds(segment) {
+        return true;
+    }
+
+    // Must fit in our max size.
+    if end > MAX_MEMFD_IMAGE_SIZE {
+        return true;
+    }
+
+    false
+}
+
+fn create_memfd() -> Result<Memfd> {
+    // Create the memfd. It needs a name, but the
+    // documentation for `memfd_create()` says that names can
+    // be duplicated with no issues.
+    MemfdOptions::new()
+        .allow_sealing(true)
+        .create("wasm-memory-image")
+        .map_err(|e| e.into())
+}
+
+impl ModuleMemFds {
+    /// Create a new `ModuleMemFds` for the given module. This can be
+    /// passed in as part of a `InstanceAllocationRequest` to speed up
+    /// instantiation and execution by using memfd-backed memories.
+    pub fn new(module: &Module, wasm_data: &[u8]) -> Result<Option<Arc<ModuleMemFds>>> {
+        let page_size = region::page::size() as u64;
+        let num_defined_memories = module.memory_plans.len() - module.num_imported_memories;
+
+        // Allocate a memfd file initially for every memory. We'll
+        // release those and set `excluded_memories` for those that we
+        // determine during initializer processing we cannot support a
+        // static image (e.g. due to dynamically-located segments).
+        let mut memfds: PrimaryMap<DefinedMemoryIndex, Option<Memfd>> = PrimaryMap::default();
+        let mut sizes: PrimaryMap<DefinedMemoryIndex, u64> = PrimaryMap::default();
+        let mut excluded_memories: PrimaryMap<DefinedMemoryIndex, bool> = PrimaryMap::new();
+
+        for _ in 0..num_defined_memories {
+            memfds.push(None);
+            sizes.push(0);
+            excluded_memories.push(false);
+        }
+
+        let round_up_page = |len: u64| (len + page_size - 1) & !(page_size - 1);
+
+        match &module.memory_initialization {
+            &MemoryInitialization::Segmented(ref segments) => {
+                for (i, segment) in segments.iter().enumerate() {
+                    let defined_memory = match module.defined_memory_index(segment.memory_index) {
+                        Some(defined_memory) => defined_memory,
+                        None => continue,
+                    };
+                    if excluded_memories[defined_memory] {
+                        continue;
+                    }
+
+                    if unsupported_initializer(segment, &module.memory_plans[segment.memory_index])
+                    {
+                        memfds[defined_memory] = None;
+                        excluded_memories[defined_memory] = true;
+                        continue;
+                    }
+
+                    if memfds[defined_memory].is_none() {
+                        memfds[defined_memory] = Some(create_memfd()?);
+                    }
+                    let memfd = memfds[defined_memory].as_mut().unwrap();
+
+                    let end = round_up_page(segment.end().expect("must have statically-known end"));
+                    if end > sizes[defined_memory] {
+                        sizes[defined_memory] = end;
+                        memfd.as_file().set_len(end)?;
+                    }
+
+                    let base = segments[i].offset;
+                    let data = &wasm_data[segment.data.start as usize..segment.data.end as usize];
+                    memfd.as_file().write_at(data, base)?;
+                }
+            }
+            &MemoryInitialization::Paged { ref map, .. } => {
+                for (defined_memory, pages) in map {
+                    let top = pages
+                        .iter()
+                        .map(|(base, range)| *base + range.len() as u64)
+                        .max()
+                        .unwrap_or(0);
+
+                    let memfd = create_memfd()?;
+                    memfd.as_file().set_len(top)?;
+
+                    for (base, range) in pages {
+                        let data = &wasm_data[range.start as usize..range.end as usize];
+                        memfd.as_file().write_at(data, *base)?;
+                    }
+
+                    memfds[defined_memory] = Some(memfd);
+                    sizes[defined_memory] = top;
+                }
+            }
+        }
+
+        // Now finalize each memory.
+        let mut memories: PrimaryMap<DefinedMemoryIndex, Option<Arc<MemoryMemFd>>> =
+            PrimaryMap::default();
+        for (defined_memory, maybe_memfd) in memfds {
+            let memfd = match maybe_memfd {
+                Some(memfd) => memfd,
+                None => {
+                    memories.push(None);
+                    continue;
+                }
+            };
+            let size = sizes[defined_memory];
+
+            // Find leading and trailing zero data so that the mmap
+            // can precisely map only the nonzero data; anon-mmap zero
+            // memory is faster for anything that doesn't actually
+            // have content.
+            let mut page_data = vec![0; page_size as usize];
+            let mut page_is_nonzero = |page| {
+                let offset = page_size * page;
+                memfd.as_file().read_at(&mut page_data[..], offset).unwrap();
+                page_data.iter().any(|byte| *byte != 0)
+            };
+            let n_pages = size / page_size;
+
+            let mut offset = 0;
+            for page in 0..n_pages {
+                if page_is_nonzero(page) {
+                    break;
+                }
+                offset += page_size;
+            }
+            let len = if offset == size {
+                0
+            } else {
+                let mut len = 0;
+                for page in (0..n_pages).rev() {
+                    if page_is_nonzero(page) {
+                        len = (page + 1) * page_size - offset;
+                        break;
+                    }
+                }
+                len
+            };
+
+            // Seal the memfd's data and length.
+            //
+            // This is a defense-in-depth security mitigation. The
+            // memfd will serve as the starting point for the heap of
+            // every instance of this module. If anything were to
+            // write to this, it could affect every execution. The
+            // memfd object itself is owned by the machinery here and
+            // not exposed elsewhere, but it is still an ambient open
+            // file descriptor at the syscall level, so some other
+            // vulnerability that allowed writes to arbitrary fds
+            // could modify it. Or we could have some issue with the
+            // way that we map it into each instance. To be
+            // extra-super-sure that it never changes, and because
+            // this costs very little, we use the kernel's "seal" API
+            // to make the memfd image permanently read-only.
+            memfd.add_seal(memfd::FileSeal::SealGrow)?;
+            memfd.add_seal(memfd::FileSeal::SealShrink)?;
+            memfd.add_seal(memfd::FileSeal::SealWrite)?;
+            memfd.add_seal(memfd::FileSeal::SealSeal)?;
+
+            assert_eq!(offset % page_size, 0);
+            assert_eq!(len % page_size, 0);
+
+            memories.push(Some(Arc::new(MemoryMemFd {
+                fd: memfd,
+                offset: usize::try_from(offset).unwrap(),
+                len: usize::try_from(len).unwrap(),
+            })));
+        }
+
+        Ok(Some(Arc::new(ModuleMemFds { memories })))
+    }
+}
+
+/// A single slot handled by the memfd instance-heap mechanism.
+///
+/// The mmap scheme is:
+///
+/// base ==> (points here)
+/// - (image.offset bytes)   anonymous zero memory, pre-image
+/// - (image.len bytes)      CoW mapping of memfd heap image
+/// - (up to static_size)    anonymous zero memory, post-image
+///
+/// The ordering of mmaps to set this up is:
+///
+/// - once, when pooling allocator is created:
+///   - one large mmap to create 8GiB * instances * memories slots
+///
+/// - per instantiation of new image in a slot:
+///   - mmap of anonymous zero memory, from 0 to max heap size
+///     (static_size)
+///   - mmap of CoW'd memfd image, from `image.offset` to
+///     `image.offset + image.len`. This overwrites part of the
+///     anonymous zero memory, potentially splitting it into a pre-
+///     and post-region.
+///   - mprotect(PROT_NONE) on the part of the heap beyond the initial
+///     heap size; we re-mprotect it with R+W bits when the heap is
+///     grown.
+#[derive(Debug)]
+pub struct MemFdSlot {
+    /// The base of the actual heap memory. Bytes at this address are
+    /// what is seen by the Wasm guest code.
+    base: usize,
+    /// The maximum static memory size, plus post-guard.
+    static_size: usize,
+    /// The memfd image that backs this memory. May be `None`, in
+    /// which case the memory is all zeroes.
+    pub(crate) image: Option<Arc<MemoryMemFd>>,
+    /// The initial heap size.
+    initial_size: usize,
+    /// The current heap size. All memory above `base + cur_size`
+    /// should be PROT_NONE (mapped inaccessible).
+    cur_size: usize,
+    /// Whether this slot may have "dirty" pages (pages written by an
+    /// instantiation). Set by `instantiate()` and cleared by
+    /// `clear_and_remain_ready()`, and used in assertions to ensure
+    /// those methods are called properly.
+    ///
+    /// Invariant: if !dirty, then this memory slot contains a clean
+    /// CoW mapping of `image`, if `Some(..)`, and anonymous-zero
+    /// memory beyond the image up to `static_size`. The addresses
+    /// from offset 0 to `initial_size` are accessible R+W and the
+    /// rest of the slot is inaccessible.
+    dirty: bool,
+    /// Whether this MemFdSlot is responsible for mapping anonymous
+    /// memory (to hold the reservation while overwriting mappings
+    /// specific to this slot) in place when it is dropped. Default
+    /// on, unless the caller knows what they are doing.
+    clear_on_drop: bool,
+}
+
+impl MemFdSlot {
+    /// Create a new MemFdSlot. Assumes that there is an anonymous
+    /// mmap backing in the given range to start.
+    pub(crate) fn create(base_addr: *mut c_void, static_size: usize) -> Self {
+        let base = base_addr as usize;
+        MemFdSlot {
+            base,
+            static_size,
+            initial_size: 0,
+            cur_size: 0,
+            image: None,
+            dirty: false,
+            clear_on_drop: true,
+        }
+    }
+
+    /// Inform the MemFdSlot that it should *not* clear the underlying
+    /// address space when dropped. This should be used only when the
+    /// caller will clear or reuse the address space in some other
+    /// way.
+    pub(crate) fn no_clear_on_drop(&mut self) {
+        self.clear_on_drop = false;
+    }
+
+    pub(crate) fn set_heap_limit(&mut self, size_bytes: usize) -> Result<()> {
+        assert!(
+            size_bytes > self.cur_size,
+            "size_bytes = {} cur_size = {}",
+            size_bytes,
+            self.cur_size
+        );
+        // mprotect the relevant region.
+        let start = self.base + self.cur_size;
+        let len = size_bytes - self.cur_size;
+        unsafe {
+            rustix::io::mprotect(
+                start as *mut _,
+                len,
+                rustix::io::MprotectFlags::READ | rustix::io::MprotectFlags::WRITE,
+            )?;
+        }
+        self.cur_size = size_bytes;
+
+        Ok(())
+    }
+
+    pub(crate) fn instantiate(
+        &mut self,
+        initial_size_bytes: usize,
+        maybe_image: Option<&Arc<MemoryMemFd>>,
+    ) -> Result<(), InstantiationError> {
+        assert!(!self.dirty);
+
+        // Fast-path: previously instantiated with the same image, or
+        // no image but the same initial size, so the mappings are
+        // already correct; there is no need to mmap anything. Given
+        // that we asserted not-dirty above, any dirty pages will have
+        // already been thrown away by madvise() during the previous
+        // termination.  The `clear_and_remain_ready()` path also
+        // mprotects memory above the initial heap size back to
+        // PROT_NONE, so we don't need to do that here.
+        if (self.image.is_none()
+            && maybe_image.is_none()
+            && self.initial_size == initial_size_bytes)
+            || (self.image.is_some()
+                && maybe_image.is_some()
+                && self.image.as_ref().unwrap().fd.as_file().as_raw_fd()
+                    == maybe_image.as_ref().unwrap().fd.as_file().as_raw_fd())
+        {
+            self.dirty = true;
+            self.cur_size = initial_size_bytes;
+            return Ok(());
+        }
+
+        // Otherwise, we need to redo (i) the anonymous-mmap backing
+        // for the whole slot, (ii) the initial-heap-image mapping if
+        // present, and (iii) the mprotect(PROT_NONE) above the
+        // initial heap size.
+
+        // Security/audit note: we map all of these MAP_PRIVATE, so
+        // all instance data is local to the mapping, not propagated
+        // to the backing fd. We throw away this CoW overlay with
+        // madvise() below, from base up to static_size (which is the
+        // whole slot) when terminating the instance.
+
+        // Anonymous mapping behind the initial heap size: this gives
+        // zeroes for any "holes" in the initial heap image. Anonymous
+        // mmap memory is faster to fault in than a CoW of a file,
+        // even a file with zero holes, because the kernel's CoW path
+        // unconditionally copies *something* (even if just a page of
+        // zeroes). Anonymous zero pages are fast: the kernel
+        // pre-zeroes them, and even if it runs out of those, a memset
+        // is half as expensive as a memcpy (only writes, no reads).
+        //
+        // We map these inaccessible at first then mprotect() the
+        // whole of the initial heap size to R+W below.
+        if self.image.is_some() {
+            self.reset_with_anon_memory()
+                .map_err(|e| InstantiationError::Resource(e.into()))?;
+        } else if initial_size_bytes < self.initial_size {
+            // Special case: we can skip if the last instantiation had
+            // no image. This means that the whole slot is filled with
+            // an anonymous mmap backing (and it will have already
+            // been cleared by the madvise). We may however need to
+            // mprotect(NONE) the space above `initial_size_bytes` if
+            // the last use of this slot left it larger. This also
+            // lets us skip an mmap the first time a MemFdSlot is
+            // used, because we require the caller to give us a fixed
+            // address in an already-mmaped-with-anon-memory
+            // region. This is important for the on-demand allocator.
+            //
+            // So we come in with:
+            // - anon-zero memory, R+W,  [0, self.initial_size)
+            // - anon-zero memory, none, [self.initial_size, self.static_size)
+            // and we want:
+            // - anon-zero memory, R+W,  [0, initial_size_bytes)
+            // - anon-zero memory, none, [initial_size_bytes, self.static_size)
+            //
+            // so given initial_size_bytes < self.initial_size we
+            // mprotect(NONE) the zone from the first to the second.
+            self.set_protection(
+                initial_size_bytes..self.initial_size,
+                rustix::io::MprotectFlags::empty(),
+            )
+            .map_err(|e| InstantiationError::Resource(e.into()))?;
+        }
+
+        // The initial memory image, if given. If not, we just get a
+        // memory filled with zeroes.
+        if let Some(image) = maybe_image {
+            assert!(image.offset.checked_add(image.len).unwrap() <= initial_size_bytes);
+            if image.len > 0 {
+                unsafe {
+                    let ptr = rustix::io::mmap(
+                        (self.base + image.offset) as *mut c_void,
+                        image.len,
+                        rustix::io::ProtFlags::READ | rustix::io::ProtFlags::WRITE,
+                        rustix::io::MapFlags::PRIVATE | rustix::io::MapFlags::FIXED,
+                        image.fd.as_file(),
+                        image.offset as u64,
+                    )
+                    .map_err(|e| InstantiationError::Resource(e.into()))?;
+                    assert_eq!(ptr as usize, self.base + image.offset);
+                }
+            }
+        }
+
+        self.image = maybe_image.cloned();
+
+        // mprotect the initial `initial_size_bytes` to be accessible.
+        self.initial_size = initial_size_bytes;
+        self.cur_size = initial_size_bytes;
+        self.set_protection(
+            0..initial_size_bytes,
+            rustix::io::MprotectFlags::READ | rustix::io::MprotectFlags::WRITE,
+        )
+        .map_err(|e| InstantiationError::Resource(e.into()))?;
+
+        self.dirty = true;
+        Ok(())
+    }
+
+    pub(crate) fn clear_and_remain_ready(&mut self) -> Result<()> {
+        assert!(self.dirty);
+        // madvise the image range. This will throw away dirty pages,
+        // which are CoW-private pages on top of the initial heap
+        // image memfd.
+        unsafe {
+            rustix::io::madvise(
+                self.base as *mut c_void,
+                self.static_size,
+                rustix::io::Advice::LinuxDontNeed,
+            )?;
+        }
+
+        // mprotect the initial heap region beyond the initial heap size back to PROT_NONE.
+        self.set_protection(
+            self.initial_size..self.static_size,
+            rustix::io::MprotectFlags::empty(),
+        )?;
+        self.dirty = false;
+        Ok(())
+    }
+
+    fn set_protection(&self, range: Range<usize>, flags: rustix::io::MprotectFlags) -> Result<()> {
+        assert!(range.end <= self.static_size);
+        let mprotect_start = self.base.checked_add(range.start).unwrap();
+        if range.len() > 0 {
+            unsafe {
+                rustix::io::mprotect(mprotect_start as *mut _, range.len(), flags)?;
+            }
+        }
+
+        Ok(())
+    }
+
+    pub(crate) fn has_image(&self) -> bool {
+        self.image.is_some()
+    }
+
+    pub(crate) fn is_dirty(&self) -> bool {
+        self.dirty
+    }
+
+    /// Map anonymous zeroed memory across the whole slot,
+    /// inaccessible. Used both during instantiate and during drop.
+    fn reset_with_anon_memory(&self) -> Result<()> {
+        unsafe {
+            let ptr = rustix::io::mmap_anonymous(
+                self.base as *mut c_void,
+                self.static_size,
+                rustix::io::ProtFlags::empty(),
+                rustix::io::MapFlags::PRIVATE | rustix::io::MapFlags::FIXED,
+            )?;
+            assert_eq!(ptr as usize, self.base);
+        }
+        Ok(())
+    }
+}
+
+impl Drop for MemFdSlot {
+    fn drop(&mut self) {
+        // The MemFdSlot may be dropped if there is an error during
+        // instantiation: for example, if a memory-growth limiter
+        // disallows a guest from having a memory of a certain size,
+        // after we've already initialized the MemFdSlot.
+        //
+        // We need to return this region of the large pool mmap to a
+        // safe state (with no module-specific mappings). The
+        // MemFdSlot will not be returned to the MemoryPool, so a new
+        // MemFdSlot will be created and overwrite the mappings anyway
+        // on the slot's next use; but for safety and to avoid
+        // resource leaks it's better not to have stale mappings to a
+        // possibly-otherwise-dead module's image.
+        //
+        // To "wipe the slate clean", let's do a mmap of anonymous
+        // memory over the whole region, with PROT_NONE. Note that we
+        // *can't* simply munmap, because that leaves a hole in the
+        // middle of the pooling allocator's big memory area that some
+        // other random mmap may swoop in and take, to be trampled
+        // over by the next MemFdSlot later.
+        //
+        // Since we're in drop(), we can't sanely return an error if
+        // this mmap fails. Let's ignore the failure if so; the next
+        // MemFdSlot to be created for this slot will try to overwrite
+        // the existing stale mappings, and return a failure properly
+        // if we still cannot map new memory.
+        //
+        // The exception to all of this is if the `unmap_on_drop` flag
+        // (which is set by default) is false. If so, the owner of
+        // this MemFdSlot has indicated that it will clean up in some
+        // other way.
+        if self.clear_on_drop {
+            let _ = self.reset_with_anon_memory();
+        }
+    }
+}
+
+#[cfg(test)]
+mod test {
+    use std::sync::Arc;
+
+    use super::create_memfd;
+    use super::MemFdSlot;
+    use super::MemoryMemFd;
+    use crate::mmap::Mmap;
+    use anyhow::Result;
+    use rustix::fs::FileExt;
+
+    fn create_memfd_with_data(offset: usize, data: &[u8]) -> Result<MemoryMemFd> {
+        let page_size = region::page::size();
+        let memfd = create_memfd()?;
+        // Offset and length have to be page-aligned.
+        assert_eq!(offset & (page_size - 1), 0);
+        let image_len = offset + data.len();
+        let image_len = (image_len + page_size - 1) & !(page_size - 1);
+        memfd.as_file().set_len(image_len as u64)?;
+        memfd.as_file().write_at(data, offset as u64)?;
+        Ok(MemoryMemFd {
+            fd: memfd,
+            len: image_len,
+            offset,
+        })
+    }
+
+    #[test]
+    fn instantiate_no_image() {
+        if skip_tests_due_to_qemu_madvise_semantics() {
+            return;
+        }
+
+        // 4 MiB mmap'd area, not accessible
+        let mut mmap = Mmap::accessible_reserved(0, 4 << 20).unwrap();
+        // Create a MemFdSlot on top of it
+        let mut memfd = MemFdSlot::create(mmap.as_mut_ptr() as *mut _, 4 << 20);
+        memfd.no_clear_on_drop();
+        assert!(!memfd.is_dirty());
+        // instantiate with 64 KiB initial size
+        memfd.instantiate(64 << 10, None).unwrap();
+        assert!(memfd.is_dirty());
+        // We should be able to access this 64 KiB (try both ends) and
+        // it should consist of zeroes.
+        let slice = mmap.as_mut_slice();
+        assert_eq!(0, slice[0]);
+        assert_eq!(0, slice[65535]);
+        slice[1024] = 42;
+        assert_eq!(42, slice[1024]);
+        // grow the heap
+        memfd.set_heap_limit(128 << 10).unwrap();
+        let slice = mmap.as_slice();
+        assert_eq!(42, slice[1024]);
+        assert_eq!(0, slice[131071]);
+        // instantiate again; we should see zeroes, even as the
+        // reuse-anon-mmap-opt kicks in
+        memfd.clear_and_remain_ready().unwrap();
+        assert!(!memfd.is_dirty());
+        memfd.instantiate(64 << 10, None).unwrap();
+        let slice = mmap.as_slice();
+        assert_eq!(0, slice[1024]);
+    }
+
+    #[test]
+    fn instantiate_image() {
+        if skip_tests_due_to_qemu_madvise_semantics() {
+            return;
+        }
+
+        // 4 MiB mmap'd area, not accessible
+        let mut mmap = Mmap::accessible_reserved(0, 4 << 20).unwrap();
+        // Create a MemFdSlot on top of it
+        let mut memfd = MemFdSlot::create(mmap.as_mut_ptr() as *mut _, 4 << 20);
+        memfd.no_clear_on_drop();
+        // Create an image with some data.
+        let image = Arc::new(create_memfd_with_data(4096, &[1, 2, 3, 4]).unwrap());
+        // Instantiate with this image
+        memfd.instantiate(64 << 10, Some(&image)).unwrap();
+        assert!(memfd.has_image());
+        let slice = mmap.as_mut_slice();
+        assert_eq!(&[1, 2, 3, 4], &slice[4096..4100]);
+        slice[4096] = 5;
+        // Clear and re-instantiate same image
+        memfd.clear_and_remain_ready().unwrap();
+        memfd.instantiate(64 << 10, Some(&image)).unwrap();
+        let slice = mmap.as_slice();
+        // Should not see mutation from above
+        assert_eq!(&[1, 2, 3, 4], &slice[4096..4100]);
+        // Clear and re-instantiate no image
+        memfd.clear_and_remain_ready().unwrap();
+        memfd.instantiate(64 << 10, None).unwrap();
+        assert!(!memfd.has_image());
+        let slice = mmap.as_slice();
+        assert_eq!(&[0, 0, 0, 0], &slice[4096..4100]);
+        // Clear and re-instantiate image again
+        memfd.clear_and_remain_ready().unwrap();
+        memfd.instantiate(64 << 10, Some(&image)).unwrap();
+        let slice = mmap.as_slice();
+        assert_eq!(&[1, 2, 3, 4], &slice[4096..4100]);
+        // Create another image with different data.
+        let image2 = Arc::new(create_memfd_with_data(4096, &[10, 11, 12, 13]).unwrap());
+        memfd.clear_and_remain_ready().unwrap();
+        memfd.instantiate(128 << 10, Some(&image2)).unwrap();
+        let slice = mmap.as_slice();
+        assert_eq!(&[10, 11, 12, 13], &slice[4096..4100]);
+        // Instantiate the original image again; we should notice it's
+        // a different image and not reuse the mappings.
+        memfd.clear_and_remain_ready().unwrap();
+        memfd.instantiate(64 << 10, Some(&image)).unwrap();
+        let slice = mmap.as_slice();
+        assert_eq!(&[1, 2, 3, 4], &slice[4096..4100]);
+    }
+
+    /// qemu's madvise implementation does not implement the
+    /// "flash-reset back to zero or CoW backing" semantics that Linux
+    /// does. Our CI setup uses qemu (in usermode-binary mode, not
+    /// whole-system mode) to run tests on aarch64 and s390x. We want
+    /// to skip these tests when under qemu, but not when someone is
+    /// developing natively on one of these architectures. So instead,
+    /// we dynamically detect an environment variable that our CI
+    /// setup sets.
+    ///
+    /// See `skip_pooling_allocator_tests()` in `tests/all/main.rs`
+    /// for more.
+    fn skip_tests_due_to_qemu_madvise_semantics() -> bool {
+        std::env::var("WASMTIME_TEST_NO_HOG_MEMORY").is_ok()
+    }
+}
--- a/crates/runtime/src/memfd_disabled.rs
+++ b/crates/runtime/src/memfd_disabled.rs
@ -0,0 +1,74 @@
+//! Shims for MemFdSlot when the memfd allocator is not
+//! included. Enables unconditional use of the type and its methods
+//! throughout higher-level code.
+
+use crate::InstantiationError;
+use anyhow::Result;
+use std::sync::Arc;
+use wasmtime_environ::{DefinedMemoryIndex, Module};
+
+/// A shim for the memfd image container when memfd support is not
+/// included.
+pub enum ModuleMemFds {}
+
+/// A shim for an individual memory image.
+#[allow(dead_code)]
+pub enum MemoryMemFd {}
+
+impl ModuleMemFds {
+    /// Construct a new set of memfd images. This variant is used
+    /// when memfd support is not included; it always returns no
+    /// images.
+    pub fn new(_: &Module, _: &[u8]) -> Result<Option<Arc<ModuleMemFds>>> {
+        Ok(None)
+    }
+
+    /// Get the memfd image for a particular memory.
+    pub(crate) fn get_memory_image(&self, _: DefinedMemoryIndex) -> Option<&Arc<MemoryMemFd>> {
+        // Should be unreachable because the `Self` type is
+        // uninhabitable.
+        match *self {}
+    }
+}
+
+/// A placeholder for MemFdSlot when we have not included the pooling
+/// allocator.
+///
+/// To allow MemFdSlot to be unconditionally passed around in various
+/// places (e.g. a `Memory`), we define a zero-sized type when memfd is
+/// not included in the build.
+#[derive(Debug)]
+pub struct MemFdSlot;
+
+#[allow(dead_code)]
+impl MemFdSlot {
+    pub(crate) fn create(_: *mut libc::c_void, _: usize) -> Self {
+        panic!("create() on invalid MemFdSlot");
+    }
+
+    pub(crate) fn instantiate(
+        &mut self,
+        _: usize,
+        _: Option<&Arc<MemoryMemFd>>,
+    ) -> Result<Self, InstantiationError> {
+        panic!("instantiate() on invalid MemFdSlot");
+    }
+
+    pub(crate) fn no_clear_on_drop(&mut self) {}
+
+    pub(crate) fn clear_and_remain_ready(&mut self) -> Result<()> {
+        Ok(())
+    }
+
+    pub(crate) fn has_image(&self) -> bool {
+        false
+    }
+
+    pub(crate) fn is_dirty(&self) -> bool {
+        false
+    }
+
+    pub(crate) fn set_heap_limit(&mut self, _: usize) -> Result<()> {
+        panic!("set_heap_limit on invalid MemFdSlot");
+    }
+}
--- a/crates/runtime/src/memory.rs
+++ b/crates/runtime/src/memory.rs
@ -4,11 +4,14 @@

 use crate::mmap::Mmap;
 use crate::vmcontext::VMMemoryDefinition;
+use crate::MemFdSlot;
+use crate::MemoryMemFd;
 use crate::Store;
 use anyhow::Error;
 use anyhow::{bail, format_err, Result};
 use more_asserts::{assert_ge, assert_le};
 use std::convert::TryFrom;
+use std::sync::Arc;
 use wasmtime_environ::{MemoryPlan, MemoryStyle, WASM32_MAX_PAGES, WASM64_MAX_PAGES};

 const WASM_PAGE_SIZE: usize = wasmtime_environ::WASM_PAGE_SIZE as usize;
@ -22,6 +25,8 @@ pub trait RuntimeMemoryCreator: Send + Sync {
        plan: &MemoryPlan,
        minimum: usize,
        maximum: Option<usize>,
+        // Optionally, a memfd image for CoW backing.
+        memfd_image: Option<&Arc<MemoryMemFd>>,
    ) -> Result<Box<dyn RuntimeLinearMemory>>;
 }

@ -35,8 +40,14 @@ impl RuntimeMemoryCreator for DefaultMemoryCreator {
        plan: &MemoryPlan,
        minimum: usize,
        maximum: Option<usize>,
+        memfd_image: Option<&Arc<MemoryMemFd>>,
    ) -> Result<Box<dyn RuntimeLinearMemory>> {
-        Ok(Box::new(MmapMemory::new(plan, minimum, maximum)?))
+        Ok(Box::new(MmapMemory::new(
+            plan,
+            minimum,
+            maximum,
+            memfd_image,
+        )?))
    }
 }

@ -58,6 +69,11 @@ pub trait RuntimeLinearMemory: Send + Sync {
    /// Return a `VMMemoryDefinition` for exposing the memory to compiled wasm
    /// code.
    fn vmmemory(&self) -> VMMemoryDefinition;
+
+    /// Does this memory need initialization? It may not if it already
+    /// has initial contents courtesy of the `MemoryMemFd` passed to
+    /// `RuntimeMemoryCreator::new_memory()`.
+    fn needs_init(&self) -> bool;
 }

 /// A linear memory instance.
@ -86,11 +102,24 @@ pub struct MmapMemory {
    // optimize loads and stores with constant offsets.
    pre_guard_size: usize,
    offset_guard_size: usize,
+
+    // A MemFd CoW mapping that provides the initial content of this
+    // MmapMemory, if mapped.
+    //
+    // N.B.: this comes after the `mmap` field above because it must
+    // be destructed first. It puts a placeholder mapping in place on
+    // drop, then the `mmap` above completely unmaps the region.
+    memfd: Option<MemFdSlot>,
 }

 impl MmapMemory {
    /// Create a new linear memory instance with specified minimum and maximum number of wasm pages.
-    pub fn new(plan: &MemoryPlan, minimum: usize, mut maximum: Option<usize>) -> Result<Self> {
+    pub fn new(
+        plan: &MemoryPlan,
+        minimum: usize,
+        mut maximum: Option<usize>,
+        memfd_image: Option<&Arc<MemoryMemFd>>,
+    ) -> Result<Self> {
        // It's a programmer error for these two configuration values to exceed
        // the host available address space, so panic if such a configuration is
        // found (mostly an issue for hypothetical 32-bit hosts).
@ -126,6 +155,23 @@ impl MmapMemory {
            mmap.make_accessible(pre_guard_bytes, minimum)?;
        }

+        // If a memfd image was specified, try to create the MemFdSlot on top of our mmap.
+        let memfd = match memfd_image {
+            Some(image) => {
+                let base = unsafe { mmap.as_mut_ptr().offset(pre_guard_bytes as isize) };
+                let len = request_bytes - pre_guard_bytes;
+                let mut memfd_slot = MemFdSlot::create(base as *mut _, len);
+                memfd_slot.instantiate(minimum, Some(image))?;
+                // On drop, we will unmap our mmap'd range that this
+                // memfd_slot was mapped on top of, so there is no
+                // need for the memfd_slot to wipe it with an
+                // anonymous mapping first.
+                memfd_slot.no_clear_on_drop();
+                Some(memfd_slot)
+            }
+            None => None,
+        };
+
        Ok(Self {
            mmap,
            accessible: minimum,
@ -133,6 +179,7 @@ impl MmapMemory {
            pre_guard_size: pre_guard_bytes,
            offset_guard_size: offset_guard_bytes,
            extra_to_reserve_on_growth,
+            memfd,
        })
    }
 }
@ -165,7 +212,19 @@ impl RuntimeLinearMemory for MmapMemory {
            new_mmap.as_mut_slice()[self.pre_guard_size..][..self.accessible]
                .copy_from_slice(&self.mmap.as_slice()[self.pre_guard_size..][..self.accessible]);

+            // Now drop the MemFdSlot, if any. We've lost the CoW
+            // advantages by explicitly copying all data, but we have
+            // preserved all of its content; so we no longer need the
+            // memfd mapping. We need to do this before we
+            // (implicitly) drop the `mmap` field by overwriting it
+            // below.
+            let _ = self.memfd.take();
+
            self.mmap = new_mmap;
+        } else if let Some(memfd) = self.memfd.as_mut() {
+            // MemFdSlot has its own growth mechanisms; defer to its
+            // implementation.
+            memfd.set_heap_limit(new_size)?;
        } else {
            // If the new size of this heap fits within the existing allocation
            // then all we need to do is to make the new pages accessible. This
@ -191,6 +250,12 @@ impl RuntimeLinearMemory for MmapMemory {
            current_length: self.accessible,
        }
    }
+
+    fn needs_init(&self) -> bool {
+        // If we're using a memfd CoW mapping, then no initialization
+        // is needed.
+        self.memfd.is_none()
+    }
 }

 /// Representation of a runtime wasm linear memory.
@ -208,7 +273,11 @@ pub enum Memory {
        /// A callback which makes portions of `base` accessible for when memory
        /// is grown. Otherwise it's expected that accesses to `base` will
        /// fault.
-        make_accessible: fn(*mut u8, usize) -> Result<()>,
+        make_accessible: Option<fn(*mut u8, usize) -> Result<()>>,
+
+        /// The MemFdSlot, if any, for this memory. Owned here and
+        /// returned to the pooling allocator when termination occurs.
+        memfd_slot: Option<MemFdSlot>,

        /// Stores the pages in the linear memory that have faulted as guard pages when using the `uffd` feature.
        /// These pages need their protection level reset before the memory can grow.
@ -227,16 +296,23 @@ impl Memory {
        plan: &MemoryPlan,
        creator: &dyn RuntimeMemoryCreator,
        store: &mut dyn Store,
+        memfd_image: Option<&Arc<MemoryMemFd>>,
    ) -> Result<Self> {
        let (minimum, maximum) = Self::limit_new(plan, store)?;
-        Ok(Memory::Dynamic(creator.new_memory(plan, minimum, maximum)?))
+        Ok(Memory::Dynamic(creator.new_memory(
+            plan,
+            minimum,
+            maximum,
+            memfd_image,
+        )?))
    }

    /// Create a new static (immovable) memory instance for the specified plan.
    pub fn new_static(
        plan: &MemoryPlan,
        base: &'static mut [u8],
-        make_accessible: fn(*mut u8, usize) -> Result<()>,
+        make_accessible: Option<fn(*mut u8, usize) -> Result<()>>,
+        memfd_slot: Option<MemFdSlot>,
        store: &mut dyn Store,
    ) -> Result<Self> {
        let (minimum, maximum) = Self::limit_new(plan, store)?;
@ -246,14 +322,17 @@ impl Memory {
            _ => base,
        };

-        if minimum > 0 {
-            make_accessible(base.as_mut_ptr(), minimum)?;
+        if let Some(make_accessible) = make_accessible {
+            if minimum > 0 {
+                make_accessible(base.as_mut_ptr(), minimum)?;
+            }
        }

        Ok(Memory::Static {
            base,
            size: minimum,
            make_accessible,
+            memfd_slot,
            #[cfg(all(feature = "uffd", target_os = "linux"))]
            guard_page_faults: Vec::new(),
        })
@ -373,6 +452,20 @@ impl Memory {
        }
    }

+    /// Returns whether or not this memory needs initialization. It
+    /// may not if it already has initial content thanks to a CoW
+    /// mechanism like memfd.
+    pub(crate) fn needs_init(&self) -> bool {
+        match self {
+            Memory::Static {
+                memfd_slot: Some(ref slot),
+                ..
+            } => !slot.has_image(),
+            Memory::Dynamic(mem) => mem.needs_init(),
+            _ => true,
+        }
+    }
+
    /// Grow memory by the specified amount of wasm pages.
    ///
    /// Returns `None` if memory can't be grown by the specified amount
@ -443,12 +536,33 @@ impl Memory {
        }

        match self {
+            Memory::Static {
+                base,
+                size,
+                memfd_slot: Some(ref mut memfd_slot),
+                ..
+            } => {
+                // Never exceed static memory size
+                if new_byte_size > base.len() {
+                    store.memory_grow_failed(&format_err!("static memory size exceeded"));
+                    return Ok(None);
+                }
+
+                if let Err(e) = memfd_slot.set_heap_limit(new_byte_size) {
+                    store.memory_grow_failed(&e);
+                    return Ok(None);
+                }
+                *size = new_byte_size;
+            }
            Memory::Static {
                base,
                size,
                make_accessible,
                ..
            } => {
+                let make_accessible = make_accessible
+                    .expect("make_accessible must be Some if this is not a MemFD memory");
+
                // Never exceed static memory size
                if new_byte_size > base.len() {
                    store.memory_grow_failed(&format_err!("static memory size exceeded"));
@ -540,7 +654,8 @@ impl Default for Memory {
        Memory::Static {
            base: &mut [],
            size: 0,
-            make_accessible: |_, _| unreachable!(),
+            make_accessible: Some(|_, _| unreachable!()),
+            memfd_slot: None,
            #[cfg(all(feature = "uffd", target_os = "linux"))]
            guard_page_faults: Vec::new(),
        }
--- a/crates/runtime/src/module_id.rs
+++ b/crates/runtime/src/module_id.rs
@ -0,0 +1,28 @@
+//! Unique IDs for modules in the runtime.
+
+use std::sync::atomic::{AtomicU64, Ordering};
+
+/// A unique identifier (within an engine or similar) for a compiled
+/// module.
+#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
+pub struct CompiledModuleId(u64);
+
+/// An allocator for compiled module IDs.
+pub struct CompiledModuleIdAllocator {
+    next: AtomicU64,
+}
+
+impl CompiledModuleIdAllocator {
+    /// Create a compiled-module ID allocator.
+    pub fn new() -> Self {
+        Self {
+            next: AtomicU64::new(1),
+        }
+    }
+
+    /// Allocate a new ID.
+    pub fn alloc(&self) -> CompiledModuleId {
+        let id = self.next.fetch_add(1, Ordering::Relaxed);
+        CompiledModuleId(id)
+    }
+}
--- a/crates/runtime/src/traphandlers/unix.rs
+++ b/crates/runtime/src/traphandlers/unix.rs
@ -51,8 +51,9 @@ pub unsafe fn platform_init() {
        register(&mut PREV_SIGFPE, libc::SIGFPE);
    }

-    // On ARM, handle Unaligned Accesses.
-    // On Darwin, guard page accesses are raised as SIGBUS.
+    // Sometimes we need to handle SIGBUS too:
+    // - On ARM, handle Unaligned Accesses.
+    // - On Darwin, guard page accesses are raised as SIGBUS.
    if cfg!(target_arch = "arm") || cfg!(target_os = "macos") || cfg!(target_os = "freebsd") {
        register(&mut PREV_SIGBUS, libc::SIGBUS);
    }
--- a/crates/wasmtime/Cargo.toml
+++ b/crates/wasmtime/Cargo.toml
@ -50,7 +50,7 @@ wasi-cap-std-sync = { path = "../wasi-common/cap-std-sync" }
 maintenance = { status = "actively-developed" }

 [features]
-default = ['async', 'cache', 'wat', 'jitdump', 'parallel-compilation', 'cranelift', 'pooling-allocator']
+default = ['async', 'cache', 'wat', 'jitdump', 'parallel-compilation', 'cranelift', 'pooling-allocator', 'memfd']

 # An on-by-default feature enabling runtime compilation of WebAssembly modules
 # with the Cranelift compiler. Cranelift is the default compilation backend of
@ -89,3 +89,5 @@ all-arch = ["wasmtime-cranelift/all-arch"]
 # It is useful for applications that do not bind their own exception ports and
 # need portable signal handling.
 posix-signals-on-macos = ["wasmtime-runtime/posix-signals-on-macos"]
+
+memfd = ["wasmtime-runtime/memfd"]
--- a/crates/wasmtime/src/engine.rs
+++ b/crates/wasmtime/src/engine.rs
@ -7,7 +7,7 @@ use std::sync::atomic::{AtomicU64, Ordering};
 use std::sync::Arc;
 #[cfg(feature = "cache")]
 use wasmtime_cache::CacheConfig;
-use wasmtime_runtime::{debug_builtins, InstanceAllocator};
+use wasmtime_runtime::{debug_builtins, CompiledModuleIdAllocator, InstanceAllocator};

 /// An `Engine` which is a global context for compilation and management of wasm
 /// modules.
@ -43,6 +43,7 @@ struct EngineInner {
    allocator: Box<dyn InstanceAllocator>,
    signatures: SignatureRegistry,
    epoch: AtomicU64,
+    unique_id_allocator: CompiledModuleIdAllocator,
 }

 impl Engine {
@ -68,6 +69,7 @@ impl Engine {
                allocator,
                signatures: registry,
                epoch: AtomicU64::new(0),
+                unique_id_allocator: CompiledModuleIdAllocator::new(),
            }),
        })
    }
@ -153,6 +155,10 @@ impl Engine {
        self.inner.epoch.fetch_add(1, Ordering::Relaxed);
    }

+    pub(crate) fn unique_id_allocator(&self) -> &CompiledModuleIdAllocator {
+        &self.inner.unique_id_allocator
+    }
+
    /// Ahead-of-time (AOT) compiles a WebAssembly module.
    ///
    /// The `bytes` provided must be in one of two formats:
--- a/crates/wasmtime/src/instance.rs
+++ b/crates/wasmtime/src/instance.rs
@ -651,7 +651,7 @@ impl<'a> Instantiator<'a> {
                    artifacts,
                    modules,
                    &self.cur.modules,
-                );
+                )?;
                self.cur.modules.push(submodule);
            }

@ -707,6 +707,7 @@ impl<'a> Instantiator<'a> {
                    .allocator()
                    .allocate(InstanceAllocationRequest {
                        module: compiled_module.module().clone(),
+                        memfds: self.cur.module.memfds().clone(),
                        image_base: compiled_module.code().as_ptr() as usize,
                        functions: compiled_module.functions(),
                        imports: self.cur.build(),
--- a/crates/wasmtime/src/module.rs
+++ b/crates/wasmtime/src/module.rs
@ -11,6 +11,7 @@ use std::sync::Arc;
 use wasmparser::{Parser, ValidPayload, Validator};
 use wasmtime_environ::{ModuleEnvironment, ModuleIndex, PrimaryMap};
 use wasmtime_jit::{CompiledModule, CompiledModuleInfo, MmapVec, TypeTables};
+use wasmtime_runtime::ModuleMemFds;

 mod registry;
 mod serialization;
@ -107,6 +108,8 @@ struct ModuleInner {
    types: Arc<TypeTables>,
    /// Registered shared signature for the module.
    signatures: Arc<SignatureCollection>,
+    /// a set of memfd images for memories, if any.
+    memfds: Option<Arc<ModuleMemFds>>,
 }

 impl Module {
@ -336,7 +339,12 @@ impl Module {
        };

        let modules = engine.run_maybe_parallel(artifacts, |(a, b)| {
-            CompiledModule::from_artifacts(a, b, &*engine.config().profiler)
+            CompiledModule::from_artifacts(
+                a,
+                b,
+                &*engine.config().profiler,
+                engine.unique_id_allocator(),
+            )
        })?;

        Self::from_parts(engine, modules, main_module, Arc::new(types), &[])
@ -523,6 +531,8 @@ impl Module {
            })
            .collect::<Result<Vec<_>>>()?;

+        let memfds = ModuleMemFds::new(module.module(), module.wasm_data())?;
+
        return Ok(Self {
            inner: Arc::new(ModuleInner {
                engine: engine.clone(),
@ -531,6 +541,7 @@ impl Module {
                artifact_upvars: modules,
                module_upvars,
                signatures,
+                memfds,
            }),
        });

@ -543,11 +554,14 @@ impl Module {
            module_upvars: &[serialization::SerializedModuleUpvar],
            signatures: &Arc<SignatureCollection>,
        ) -> Result<Module> {
+            let module = artifacts[module_index].clone();
+            let memfds = ModuleMemFds::new(module.module(), module.wasm_data())?;
            Ok(Module {
                inner: Arc::new(ModuleInner {
                    engine: engine.clone(),
                    types: types.clone(),
-                    module: artifacts[module_index].clone(),
+                    module,
+                    memfds,
                    artifact_upvars: artifact_upvars
                        .iter()
                        .map(|i| artifacts[*i].clone())
@ -666,12 +680,15 @@ impl Module {
        artifact_upvars: &[usize],
        module_upvars: &[wasmtime_environ::ModuleUpvar],
        modules: &PrimaryMap<ModuleIndex, Module>,
-    ) -> Module {
-        Module {
+    ) -> Result<Module> {
+        let module = self.inner.artifact_upvars[artifact_index].clone();
+        let memfds = ModuleMemFds::new(module.module(), module.wasm_data())?;
+        Ok(Module {
            inner: Arc::new(ModuleInner {
                types: self.inner.types.clone(),
                engine: self.inner.engine.clone(),
-                module: self.inner.artifact_upvars[artifact_index].clone(),
+                module,
+                memfds,
                artifact_upvars: artifact_upvars
                    .iter()
                    .map(|i| self.inner.artifact_upvars[*i].clone())
@ -687,7 +704,7 @@ impl Module {
                    .collect(),
                signatures: self.inner.signatures.clone(),
            }),
-        }
+        })
    }

    pub(crate) fn compiled_module(&self) -> &Arc<CompiledModule> {
@ -706,6 +723,10 @@ impl Module {
        &self.inner.signatures
    }

+    pub(crate) fn memfds(&self) -> &Option<Arc<ModuleMemFds>> {
+        &self.inner.memfds
+    }
+
    /// Looks up the module upvar value at the `index` specified.
    ///
    /// Note that this panics if `index` is out of bounds since this should
--- a/crates/wasmtime/src/module/serialization.rs
+++ b/crates/wasmtime/src/module/serialization.rs
@ -274,7 +274,12 @@ impl<'a> SerializedModule<'a> {
    pub fn into_module(self, engine: &Engine) -> Result<Module> {
        let (main_module, modules, types, upvars) = self.into_parts(engine)?;
        let modules = engine.run_maybe_parallel(modules, |(i, m)| {
-            CompiledModule::from_artifacts(i, m, &*engine.config().profiler)
+            CompiledModule::from_artifacts(
+                i,
+                m,
+                &*engine.config().profiler,
+                engine.unique_id_allocator(),
+            )
        })?;

        Module::from_parts(engine, modules, main_module, Arc::new(types), &upvars)
--- a/crates/wasmtime/src/store.rs
+++ b/crates/wasmtime/src/store.rs
@ -426,11 +426,13 @@ impl<T> Store<T> {
                    shared_signatures: None.into(),
                    imports: Default::default(),
                    module: Arc::new(wasmtime_environ::Module::default()),
+                    memfds: None,
                    store: StorePtr::empty(),
                    wasm_data: &[],
                })
                .expect("failed to allocate default callee")
        };
+
        let mut inner = Box::new(StoreInner {
            inner: StoreOpaque {
                _marker: marker::PhantomPinned,
--- a/crates/wasmtime/src/trampoline.rs
+++ b/crates/wasmtime/src/trampoline.rs
@ -41,6 +41,7 @@ fn create_handle(
        let handle = OnDemandInstanceAllocator::new(config.mem_creator.clone(), 0).allocate(
            InstanceAllocationRequest {
                module: Arc::new(module),
+                memfds: None,
                functions,
                image_base: 0,
                imports,
--- a/crates/wasmtime/src/trampoline/func.rs
+++ b/crates/wasmtime/src/trampoline/func.rs
@ -161,6 +161,7 @@ pub unsafe fn create_raw_function(
    Ok(
        OnDemandInstanceAllocator::default().allocate(InstanceAllocationRequest {
            module: Arc::new(module),
+            memfds: None,
            functions: &functions,
            image_base: (*func).as_ptr() as usize,
            imports: Imports::default(),
--- a/crates/wasmtime/src/trampoline/memory.rs
+++ b/crates/wasmtime/src/trampoline/memory.rs
@ -6,7 +6,9 @@ use anyhow::{anyhow, Result};
 use std::convert::TryFrom;
 use std::sync::Arc;
 use wasmtime_environ::{EntityIndex, MemoryPlan, MemoryStyle, Module, WASM_PAGE_SIZE};
-use wasmtime_runtime::{RuntimeLinearMemory, RuntimeMemoryCreator, VMMemoryDefinition};
+use wasmtime_runtime::{
+    MemoryMemFd, RuntimeLinearMemory, RuntimeMemoryCreator, VMMemoryDefinition,
+};

 pub fn create_memory(store: &mut StoreOpaque, memory: &MemoryType) -> Result<InstanceId> {
    let mut module = Module::new();
@ -46,6 +48,10 @@ impl RuntimeLinearMemory for LinearMemoryProxy {
            current_length: self.mem.byte_size(),
        }
    }
+
+    fn needs_init(&self) -> bool {
+        true
+    }
 }

 #[derive(Clone)]
@ -57,6 +63,7 @@ impl RuntimeMemoryCreator for MemoryCreatorProxy {
        plan: &MemoryPlan,
        minimum: usize,
        maximum: Option<usize>,
+        _: Option<&Arc<MemoryMemFd>>,
    ) -> Result<Box<dyn RuntimeLinearMemory>> {
        let ty = MemoryType::from_wasmtime_memory(&plan.memory);
        let reserved_size_in_bytes = match plan.style {
--- a/src/lib.rs
+++ b/src/lib.rs
@ -100,6 +100,8 @@ use std::collections::HashMap;
 use std::path::PathBuf;
 use structopt::StructOpt;
 use wasmtime::{Config, ProfilingStrategy};
+#[cfg(feature = "pooling-allocator")]
+use wasmtime::{InstanceLimits, ModuleLimits, PoolingAllocationStrategy};

 fn pick_profiling_strategy(jitdump: bool, vtune: bool) -> Result<ProfilingStrategy> {
    Ok(match (jitdump, vtune) {
@ -250,6 +252,12 @@ struct CommonOptions {
    /// the data segments specified in the original wasm module.
    #[structopt(long)]
    paged_memory_initialization: bool,
+
+    /// Enables the pooling allocator, in place of the on-demand
+    /// allocator.
+    #[cfg(feature = "pooling-allocator")]
+    #[structopt(long)]
+    pooling_allocator: bool,
 }

 impl CommonOptions {
@ -325,6 +333,23 @@ impl CommonOptions {
        config.generate_address_map(!self.disable_address_map);
        config.paged_memory_initialization(self.paged_memory_initialization);

+        #[cfg(feature = "pooling-allocator")]
+        {
+            if self.pooling_allocator {
+                let mut module_limits = ModuleLimits::default();
+                module_limits.functions = 50000;
+                module_limits.types = 10000;
+                module_limits.globals = 1000;
+                module_limits.memory_pages = 2048;
+                let instance_limits = InstanceLimits::default();
+                config.allocation_strategy(wasmtime::InstanceAllocationStrategy::Pooling {
+                    strategy: PoolingAllocationStrategy::NextAvailable,
+                    module_limits,
+                    instance_limits,
+                });
+            }
+        }
+
        Ok(config)
    }