From ca5a9db0d188d1f75f67477629f69f114b003b5a Mon Sep 17 00:00:00 2001 From: ssunkin-fastly <134561977+ssunkin-fastly@users.noreply.github.com> Date: Tue, 15 Aug 2023 20:47:07 -0700 Subject: [PATCH] Memcheck for Wasm guests in Wasmtime (#6820) * attempt at inserting things where i think they might belong + questions * entry hook + questions * commented out all changes, doc comment errors * fix doc comment * libcalls build now!!!! * initial check_malloc_exit setup * WIP: load/store hooks * hooks added + building * added valgrind library * made wasm-valgrind accessible in wasmtime * check_malloc filled in... * move valgrind_state to an appropriate part of instance it works!!!!! * yay it's working! (?) i think?? * stack tracing in progress * errors + num bytes displayed * initial valgrind configuration * valgrind conditional some warnings fixed * conditional compilation + CLI flag finished * panic!() changed to bail!() * started adding doc comments * added memory grow hook + fixed access size handling * removed test.wasm * removed malloc_twice.wat * doc comments in spec.rs * pr feedback addressed * ran cargo fmt * addressing more feedback * Remove fuzz crate from wmemcheck. * Review feedback and test fix. * add wasmtime-wmemcheck crate to publish allowlist. * fix build without compiler features * reorder crates in publish list * Add trampolines for libcalls on s390x. * Make wasmtime-wmemcheck dep an exact version requirement. --------- Co-authored-by: iximeow Co-authored-by: Chris Fallin Co-authored-by: iximeow --- .github/workflows/main.yml | 1 + Cargo.lock | 6 + Cargo.toml | 5 +- cranelift/wasm/src/code_translator.rs | 59 +-- cranelift/wasm/src/environ/spec.rs | 41 ++ cranelift/wasm/src/func_translator.rs | 1 + crates/cranelift/Cargo.toml | 2 + crates/cranelift/src/builder.rs | 7 + crates/cranelift/src/compiler.rs | 6 +- crates/cranelift/src/func_environ.rs | 214 +++++++++- crates/environ/src/builtin.rs | 16 + crates/environ/src/compilation.rs | 3 + crates/runtime/Cargo.toml | 3 + crates/runtime/src/instance.rs | 40 +- crates/runtime/src/instance/allocator.rs | 13 +- .../runtime/src/instance/allocator/pooling.rs | 2 + crates/runtime/src/libcalls.rs | 148 +++++++ crates/runtime/src/trampolines/s390x.S | 8 + crates/wasmtime/Cargo.toml | 2 + crates/wasmtime/src/config.rs | 19 + crates/wasmtime/src/instance.rs | 1 + crates/wasmtime/src/module.rs | 1 + crates/wasmtime/src/store.rs | 1 + crates/wasmtime/src/trampoline.rs | 1 + crates/wasmtime/src/trampoline/memory.rs | 1 + crates/wmemcheck/Cargo.toml | 11 + crates/wmemcheck/src/lib.rs | 404 ++++++++++++++++++ docs/wmemcheck.md | 8 + scripts/publish.rs | 1 + src/commands/run.rs | 8 + 30 files changed, 1002 insertions(+), 31 deletions(-) create mode 100644 crates/wmemcheck/Cargo.toml create mode 100644 crates/wmemcheck/src/lib.rs create mode 100644 docs/wmemcheck.md diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 702c4f25ca..9be7950741 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -266,6 +266,7 @@ jobs: - run: cargo check -p wasmtime --no-default-features --features component-model - run: cargo check -p wasmtime --no-default-features --features cranelift,wat,async,cache - run: cargo check -p wasmtime --no-default-features --features winch + - run: cargo check -p wasmtime --no-default-features --features wmemcheck - run: cargo check --features component-model - run: cargo check -p wasmtime --features incremental-cache diff --git a/Cargo.lock b/Cargo.lock index f5bcb273c8..bbe3606616 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3479,6 +3479,7 @@ name = "wasmtime-cranelift" version = "13.0.0" dependencies = [ "anyhow", + "cfg-if", "cranelift-codegen", "cranelift-control", "cranelift-entity", @@ -3697,6 +3698,7 @@ dependencies = [ "wasmtime-fiber", "wasmtime-jit-debug", "wasmtime-versioned-export-macros", + "wasmtime-wmemcheck", "windows-sys", ] @@ -3826,6 +3828,10 @@ dependencies = [ "wit-parser", ] +[[package]] +name = "wasmtime-wmemcheck" +version = "13.0.0" + [[package]] name = "wast" version = "35.0.2" diff --git a/Cargo.toml b/Cargo.toml index d45779629f..966ab13eee 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -100,6 +100,7 @@ members = [ "crates/test-programs/wasi-http-tests", "crates/test-programs/command-tests", "crates/test-programs/reactor-tests", + "crates/wmemcheck", "crates/wasi-preview1-component-adapter", "crates/wasi-preview1-component-adapter/verify", "crates/winch", @@ -112,7 +113,7 @@ members = [ ] exclude = [ 'crates/wasi-common/WASI/tools/witx-cli', - 'docs/rust_wasi_markdown_parser' + 'docs/rust_wasi_markdown_parser', ] [workspace.package] @@ -122,6 +123,7 @@ edition = "2021" rust-version = "1.66.0" [workspace.dependencies] +wasmtime-wmemcheck = { path = "crates/wmemcheck", version = "=13.0.0" } wasmtime = { path = "crates/wasmtime", version = "13.0.0", default-features = false } wasmtime-cache = { path = "crates/cache", version = "=13.0.0" } wasmtime-cli-flags = { path = "crates/cli-flags", version = "=13.0.0" } @@ -271,6 +273,7 @@ component-model = [ "wasmtime-cli-flags/component-model" ] winch = ["wasmtime/winch"] +wmemcheck = ["wasmtime/wmemcheck"] [[test]] name = "host_segfault" diff --git a/cranelift/wasm/src/code_translator.rs b/cranelift/wasm/src/code_translator.rs index 0ddfce7462..0d5e788897 100644 --- a/cranelift/wasm/src/code_translator.rs +++ b/cranelift/wasm/src/code_translator.rs @@ -208,6 +208,7 @@ pub fn translate_operator( } debug_assert_eq!(ty, builder.func.dfg.value_type(val)); builder.ins().store(flags, val, addr, offset); + environ.update_global(builder, *global_index, val); } GlobalVariable::Custom => { let val = state.pop1(); @@ -588,6 +589,7 @@ pub fn translate_operator( }; { let return_args = state.peekn_mut(return_count); + environ.handle_before_return(&return_args, builder); bitcast_wasm_returns(environ, return_args, builder); builder.ins().return_(return_args); } @@ -757,6 +759,7 @@ pub fn translate_operator( let heap_index = MemoryIndex::from_u32(*mem); let heap = state.get_heap(builder.func, *mem, environ)?; let val = state.pop1(); + environ.before_memory_grow(builder, val, heap_index); state.push1(environ.translate_memory_grow(builder.cursor(), heap_index, heap, val)?) } Operator::MemorySize { mem, mem_byte: _ } => { @@ -859,7 +862,8 @@ pub fn translate_operator( ); } Operator::V128Load8x8S { memarg } => { - let (flags, base) = unwrap_or_return_unreachable_state!( + //TODO(#6829): add before_load() and before_store() hooks for SIMD loads and stores. + let (flags, _, base) = unwrap_or_return_unreachable_state!( state, prepare_addr(memarg, 8, builder, state, environ)? ); @@ -867,7 +871,7 @@ pub fn translate_operator( state.push1(loaded); } Operator::V128Load8x8U { memarg } => { - let (flags, base) = unwrap_or_return_unreachable_state!( + let (flags, _, base) = unwrap_or_return_unreachable_state!( state, prepare_addr(memarg, 8, builder, state, environ)? ); @@ -875,7 +879,7 @@ pub fn translate_operator( state.push1(loaded); } Operator::V128Load16x4S { memarg } => { - let (flags, base) = unwrap_or_return_unreachable_state!( + let (flags, _, base) = unwrap_or_return_unreachable_state!( state, prepare_addr(memarg, 8, builder, state, environ)? ); @@ -883,7 +887,7 @@ pub fn translate_operator( state.push1(loaded); } Operator::V128Load16x4U { memarg } => { - let (flags, base) = unwrap_or_return_unreachable_state!( + let (flags, _, base) = unwrap_or_return_unreachable_state!( state, prepare_addr(memarg, 8, builder, state, environ)? ); @@ -891,7 +895,7 @@ pub fn translate_operator( state.push1(loaded); } Operator::V128Load32x2S { memarg } => { - let (flags, base) = unwrap_or_return_unreachable_state!( + let (flags, _, base) = unwrap_or_return_unreachable_state!( state, prepare_addr(memarg, 8, builder, state, environ)? ); @@ -899,7 +903,7 @@ pub fn translate_operator( state.push1(loaded); } Operator::V128Load32x2U { memarg } => { - let (flags, base) = unwrap_or_return_unreachable_state!( + let (flags, _, base) = unwrap_or_return_unreachable_state!( state, prepare_addr(memarg, 8, builder, state, environ)? ); @@ -2631,13 +2635,15 @@ fn translate_unreachable_operator( /// heap address if execution reaches that point. /// /// Returns `None` when the Wasm access will unconditionally trap. +/// +/// Returns `(flags, wasm_addr, native_addr)`. fn prepare_addr( memarg: &MemArg, access_size: u8, builder: &mut FunctionBuilder, state: &mut FuncTranslationState, environ: &mut FE, -) -> WasmResult> +) -> WasmResult> where FE: FuncEnvironment + ?Sized, { @@ -2787,7 +2793,7 @@ where // vmctx, stack) accesses. flags.set_heap(); - Ok(Reachability::Reachable((flags, addr))) + Ok(Reachability::Reachable((flags, index, addr))) } fn align_atomic_addr( @@ -2834,7 +2840,7 @@ fn prepare_atomic_addr( builder: &mut FunctionBuilder, state: &mut FuncTranslationState, environ: &mut FE, -) -> WasmResult> { +) -> WasmResult> { align_atomic_addr(memarg, loaded_bytes, builder, state); prepare_addr(memarg, loaded_bytes, builder, state, environ) } @@ -2866,16 +2872,15 @@ fn translate_load( state: &mut FuncTranslationState, environ: &mut FE, ) -> WasmResult> { - let (flags, base) = match prepare_addr( - memarg, - mem_op_size(opcode, result_ty), - builder, - state, - environ, - )? { - Reachability::Unreachable => return Ok(Reachability::Unreachable), - Reachability::Reachable((f, b)) => (f, b), - }; + let mem_op_size = mem_op_size(opcode, result_ty); + let (flags, wasm_index, base) = + match prepare_addr(memarg, mem_op_size, builder, state, environ)? { + Reachability::Unreachable => return Ok(Reachability::Unreachable), + Reachability::Reachable((f, i, b)) => (f, i, b), + }; + + environ.before_load(builder, mem_op_size, wasm_index, memarg.offset); + let (load, dfg) = builder .ins() .Load(opcode, result_ty, flags, Offset32::new(0), base); @@ -2893,11 +2898,15 @@ fn translate_store( ) -> WasmResult<()> { let val = state.pop1(); let val_ty = builder.func.dfg.value_type(val); + let mem_op_size = mem_op_size(opcode, val_ty); - let (flags, base) = unwrap_or_return_unreachable_state!( + let (flags, wasm_index, base) = unwrap_or_return_unreachable_state!( state, - prepare_addr(memarg, mem_op_size(opcode, val_ty), builder, state, environ)? + prepare_addr(memarg, mem_op_size, builder, state, environ)? ); + + environ.before_store(builder, mem_op_size, wasm_index, memarg.offset); + builder .ins() .Store(opcode, val_ty, flags, Offset32::new(0), val, base); @@ -2954,7 +2963,7 @@ fn translate_atomic_rmw( arg2 = builder.ins().ireduce(access_ty, arg2); } - let (flags, addr) = unwrap_or_return_unreachable_state!( + let (flags, _, addr) = unwrap_or_return_unreachable_state!( state, prepare_atomic_addr( memarg, @@ -3011,7 +3020,7 @@ fn translate_atomic_cas( replacement = builder.ins().ireduce(access_ty, replacement); } - let (flags, addr) = unwrap_or_return_unreachable_state!( + let (flags, _, addr) = unwrap_or_return_unreachable_state!( state, prepare_atomic_addr( memarg, @@ -3054,7 +3063,7 @@ fn translate_atomic_load( }; assert!(w_ty_ok && widened_ty.bytes() >= access_ty.bytes()); - let (flags, addr) = unwrap_or_return_unreachable_state!( + let (flags, _, addr) = unwrap_or_return_unreachable_state!( state, prepare_atomic_addr( memarg, @@ -3103,7 +3112,7 @@ fn translate_atomic_store( data = builder.ins().ireduce(access_ty, data); } - let (flags, addr) = unwrap_or_return_unreachable_state!( + let (flags, _, addr) = unwrap_or_return_unreachable_state!( state, prepare_atomic_addr( memarg, diff --git a/cranelift/wasm/src/environ/spec.rs b/cranelift/wasm/src/environ/spec.rs index 9dca6d761a..76191d6ca1 100644 --- a/cranelift/wasm/src/environ/spec.rs +++ b/cranelift/wasm/src/environ/spec.rs @@ -648,6 +648,47 @@ pub trait FuncEnvironment: TargetEnvironment { fn use_x86_pmaddubsw_for_dot(&self) -> bool { false } + + /// Inserts code before a function return. + fn handle_before_return(&mut self, _retvals: &[ir::Value], _builder: &mut FunctionBuilder) {} + + /// Inserts code before a load. + fn before_load( + &mut self, + _builder: &mut FunctionBuilder, + _val_size: u8, + _addr: ir::Value, + _offset: u64, + ) { + } + + /// Inserts code before a store. + fn before_store( + &mut self, + _builder: &mut FunctionBuilder, + _val_size: u8, + _addr: ir::Value, + _offset: u64, + ) { + } + + /// Inserts code before updating a global. + fn update_global( + &mut self, + _builder: &mut FunctionBuilder, + _global_index: u32, + _value: ir::Value, + ) { + } + + /// Inserts code before memory.grow. + fn before_memory_grow( + &mut self, + _builder: &mut FunctionBuilder, + _num_bytes: ir::Value, + _mem_index: MemoryIndex, + ) { + } } /// An object satisfying the `ModuleEnvironment` trait can be passed as argument to the diff --git a/cranelift/wasm/src/func_translator.rs b/cranelift/wasm/src/func_translator.rs index e282c8a0d2..2ed2b995c1 100644 --- a/cranelift/wasm/src/func_translator.rs +++ b/cranelift/wasm/src/func_translator.rs @@ -275,6 +275,7 @@ fn parse_function_body( // generate a return instruction that doesn't match the signature. if state.reachable { if !builder.is_unreachable() { + environ.handle_before_return(&state.stack, builder); bitcast_wasm_returns(environ, &mut state.stack, builder); builder.ins().return_(&state.stack); } diff --git a/crates/cranelift/Cargo.toml b/crates/cranelift/Cargo.toml index 1de960f2f4..e7318e4abc 100644 --- a/crates/cranelift/Cargo.toml +++ b/crates/cranelift/Cargo.toml @@ -26,9 +26,11 @@ target-lexicon = { workspace = true } gimli = { workspace = true } object = { workspace = true, features = ['write'] } thiserror = { workspace = true } +cfg-if = { workspace = true } wasmtime-versioned-export-macros = { workspace = true } [features] all-arch = ["cranelift-codegen/all-arch"] component-model = ["wasmtime-environ/component-model"] incremental-cache = ["cranelift-codegen/incremental-cache"] +wmemcheck = [] diff --git a/crates/cranelift/src/builder.rs b/crates/cranelift/src/builder.rs index cd255eccd9..b2c86107cb 100644 --- a/crates/cranelift/src/builder.rs +++ b/crates/cranelift/src/builder.rs @@ -20,6 +20,7 @@ struct Builder { linkopts: LinkOptions, cache_store: Option>, clif_dir: Option, + wmemcheck: bool, } #[derive(Clone, Default)] @@ -42,6 +43,7 @@ pub fn builder() -> Box { linkopts: LinkOptions::default(), cache_store: None, clif_dir: None, + wmemcheck: false, }) } @@ -91,6 +93,7 @@ impl CompilerBuilder for Builder { self.cache_store.clone(), self.linkopts.clone(), self.clif_dir.clone(), + self.wmemcheck, ))) } @@ -105,6 +108,10 @@ impl CompilerBuilder for Builder { self.cache_store = Some(cache_store); Ok(()) } + + fn wmemcheck(&mut self, enable: bool) { + self.wmemcheck = enable; + } } impl fmt::Debug for Builder { diff --git a/crates/cranelift/src/compiler.rs b/crates/cranelift/src/compiler.rs index 3578cad137..3f4497efe9 100644 --- a/crates/cranelift/src/compiler.rs +++ b/crates/cranelift/src/compiler.rs @@ -71,6 +71,7 @@ pub(crate) struct Compiler { linkopts: LinkOptions, cache_store: Option>, clif_dir: Option, + wmemcheck: bool, } impl Drop for Compiler { @@ -108,6 +109,7 @@ impl Compiler { cache_store: Option>, linkopts: LinkOptions, clif_dir: Option, + wmemcheck: bool, ) -> Compiler { Compiler { contexts: Default::default(), @@ -116,6 +118,7 @@ impl Compiler { linkopts, cache_store, clif_dir, + wmemcheck, } } } @@ -147,7 +150,8 @@ impl wasmtime_environ::Compiler for Compiler { context.func.collect_debug_info(); } - let mut func_env = FuncEnvironment::new(isa, translation, types, &self.tunables); + let mut func_env = + FuncEnvironment::new(isa, translation, types, &self.tunables, self.wmemcheck); // The `stack_limit` global value below is the implementation of stack // overflow checks in Wasmtime. diff --git a/crates/cranelift/src/func_environ.rs b/crates/cranelift/src/func_environ.rs index d7b20d98e5..39df50fb0b 100644 --- a/crates/cranelift/src/func_environ.rs +++ b/crates/cranelift/src/func_environ.rs @@ -1,9 +1,12 @@ +use cfg_if::cfg_if; use cranelift_codegen::cursor::FuncCursor; use cranelift_codegen::ir; use cranelift_codegen::ir::condcodes::*; use cranelift_codegen::ir::immediates::{Imm64, Offset32, Uimm64}; use cranelift_codegen::ir::types::*; -use cranelift_codegen::ir::{AbiParam, ArgumentPurpose, Function, InstBuilder, Signature}; +use cranelift_codegen::ir::{ + AbiParam, ArgumentPurpose, Function, InstBuilder, Signature, UserFuncName, Value, +}; use cranelift_codegen::isa::{self, CallConv, TargetFrontendConfig, TargetIsa}; use cranelift_entity::{EntityRef, PrimaryMap}; use cranelift_frontend::FunctionBuilder; @@ -110,6 +113,8 @@ pub struct FuncEnvironment<'module_environment> { module: &'module_environment Module, types: &'module_environment ModuleTypes, + translation: &'module_environment ModuleTranslation<'module_environment>, + /// Heaps implementing WebAssembly linear memories. heaps: PrimaryMap, @@ -150,6 +155,9 @@ pub struct FuncEnvironment<'module_environment> { epoch_ptr_var: cranelift_frontend::Variable, fuel_consumed: i64, + + #[cfg(feature = "wmemcheck")] + wmemcheck: bool, } impl<'module_environment> FuncEnvironment<'module_environment> { @@ -158,6 +166,7 @@ impl<'module_environment> FuncEnvironment<'module_environment> { translation: &'module_environment ModuleTranslation<'module_environment>, types: &'module_environment ModuleTypes, tunables: &'module_environment Tunables, + wmemcheck: bool, ) -> Self { let builtin_function_signatures = BuiltinFunctionSignatures::new( isa.pointer_type(), @@ -168,6 +177,11 @@ impl<'module_environment> FuncEnvironment<'module_environment> { }, CallConv::triple_default(isa.triple()), ); + + // Avoid unused warning in default build. + #[cfg(not(feature = "wmemcheck"))] + let _ = wmemcheck; + Self { isa, module: &translation.module, @@ -181,10 +195,13 @@ impl<'module_environment> FuncEnvironment<'module_environment> { epoch_deadline_var: Variable::new(0), epoch_ptr_var: Variable::new(0), vmruntime_limits_ptr: Variable::new(0), + translation: translation, // Start with at least one fuel being consumed because even empty // functions should consume at least some fuel. fuel_consumed: 1, + #[cfg(feature = "wmemcheck")] + wmemcheck, } } @@ -604,6 +621,57 @@ impl<'module_environment> FuncEnvironment<'module_environment> { self.epoch_check(builder); } + #[cfg(feature = "wmemcheck")] + fn hook_malloc_exit(&mut self, builder: &mut FunctionBuilder, retvals: &[Value]) { + let check_malloc_sig = self.builtin_function_signatures.check_malloc(builder.func); + let (vmctx, check_malloc) = self.translate_load_builtin_function_address( + &mut builder.cursor(), + BuiltinFunctionIndex::check_malloc(), + ); + let func_args = builder + .func + .dfg + .block_params(builder.func.layout.entry_block().unwrap()); + let len = if func_args.len() < 3 { + return; + } else { + // If a function named `malloc` has at least one argument, we assume the + // first argument is the requested allocation size. + func_args[2] + }; + let retval = if retvals.len() < 1 { + return; + } else { + retvals[0] + }; + builder + .ins() + .call_indirect(check_malloc_sig, check_malloc, &[vmctx, retval, len]); + } + + #[cfg(feature = "wmemcheck")] + fn hook_free_exit(&mut self, builder: &mut FunctionBuilder) { + let check_free_sig = self.builtin_function_signatures.check_free(builder.func); + let (vmctx, check_free) = self.translate_load_builtin_function_address( + &mut builder.cursor(), + BuiltinFunctionIndex::check_free(), + ); + let func_args = builder + .func + .dfg + .block_params(builder.func.layout.entry_block().unwrap()); + let ptr = if func_args.len() < 3 { + return; + } else { + // If a function named `free` has at least one argument, we assume the + // first argument is a pointer to memory. + func_args[2] + }; + builder + .ins() + .call_indirect(check_free_sig, check_free, &[vmctx, ptr]); + } + fn epoch_ptr(&mut self, builder: &mut FunctionBuilder<'_>) -> ir::Value { let vmctx = self.vmctx(builder.func); let pointer_type = self.pointer_type(); @@ -812,6 +880,43 @@ impl<'module_environment> FuncEnvironment<'module_environment> { builder.switch_to_block(continuation_block); result_param } + + fn check_malloc_start(&mut self, builder: &mut FunctionBuilder) { + let malloc_start_sig = self.builtin_function_signatures.malloc_start(builder.func); + let (vmctx, malloc_start) = self.translate_load_builtin_function_address( + &mut builder.cursor(), + BuiltinFunctionIndex::malloc_start(), + ); + builder + .ins() + .call_indirect(malloc_start_sig, malloc_start, &[vmctx]); + } + + fn check_free_start(&mut self, builder: &mut FunctionBuilder) { + let free_start_sig = self.builtin_function_signatures.free_start(builder.func); + let (vmctx, free_start) = self.translate_load_builtin_function_address( + &mut builder.cursor(), + BuiltinFunctionIndex::free_start(), + ); + builder + .ins() + .call_indirect(free_start_sig, free_start, &[vmctx]); + } + + fn current_func_name(&self, builder: &mut FunctionBuilder) -> Option<&str> { + let func_index = match &builder.func.name { + UserFuncName::User(user) => FuncIndex::from_u32(user.index), + _ => { + panic!("function name not a UserFuncName::User as expected") + } + }; + self.translation + .debuginfo + .name_section + .func_names + .get(&func_index) + .map(|s| *s) + } } struct Call<'a, 'func, 'module_env> { @@ -2333,6 +2438,14 @@ impl<'module_environment> cranelift_wasm::FuncEnvironment for FuncEnvironment<'m if self.tunables.epoch_interruption { self.epoch_function_entry(builder); } + + let func_name = self.current_func_name(builder); + if func_name == Some("malloc") { + self.check_malloc_start(builder); + } else if func_name == Some("free") { + self.check_free_start(builder); + } + Ok(()) } @@ -2374,4 +2487,103 @@ impl<'module_environment> cranelift_wasm::FuncEnvironment for FuncEnvironment<'m fn use_x86_pmaddubsw_for_dot(&self) -> bool { self.isa.has_x86_pmaddubsw_lowering() } + + cfg_if! { + if #[cfg(feature = "wmemcheck")] { + fn handle_before_return( + &mut self, + retvals: &[Value], + builder: &mut FunctionBuilder, + ) { + if self.wmemcheck { + let func_name = self.current_func_name(builder); + if func_name == Some("malloc") { + self.hook_malloc_exit(builder, retvals); + } else if func_name == Some("free") { + self.hook_free_exit(builder); + } + } + } + + fn before_load(&mut self, builder: &mut FunctionBuilder, val_size: u8, addr: ir::Value, offset: u64) { + if self.wmemcheck { + let check_load_sig = self.builtin_function_signatures.check_load(builder.func); + let (vmctx, check_load) = self.translate_load_builtin_function_address( + &mut builder.cursor(), + BuiltinFunctionIndex::check_load(), + ); + let num_bytes = builder.ins().iconst(I32, val_size as i64); + let offset_val = builder.ins().iconst(I64, offset as i64); + builder + .ins() + .call_indirect(check_load_sig, check_load, &[vmctx, num_bytes, addr, offset_val]); + } + } + + fn before_store(&mut self, builder: &mut FunctionBuilder, val_size: u8, addr: ir::Value, offset: u64) { + if self.wmemcheck { + let check_store_sig = self.builtin_function_signatures.check_store(builder.func); + let (vmctx, check_store) = self.translate_load_builtin_function_address( + &mut builder.cursor(), + BuiltinFunctionIndex::check_store(), + ); + let num_bytes = builder.ins().iconst(I32, val_size as i64); + let offset_val = builder.ins().iconst(I64, offset as i64); + builder + .ins() + .call_indirect(check_store_sig, check_store, &[vmctx, num_bytes, addr, offset_val]); + } + } + + fn update_global(&mut self, builder: &mut FunctionBuilder, global_index: u32, value: ir::Value) { + if self.wmemcheck { + if global_index == 0 { + // We are making the assumption that global 0 is the auxiliary stack pointer. + let update_stack_pointer_sig = self.builtin_function_signatures.update_stack_pointer(builder.func); + let (vmctx, update_stack_pointer) = self.translate_load_builtin_function_address( + &mut builder.cursor(), + BuiltinFunctionIndex::update_stack_pointer(), + ); + builder + .ins() + .call_indirect(update_stack_pointer_sig, update_stack_pointer, &[vmctx, value]); + } + } + } + + fn before_memory_grow(&mut self, builder: &mut FunctionBuilder, num_pages: ir::Value, mem_index: MemoryIndex) { + if self.wmemcheck && mem_index.as_u32() == 0 { + let update_mem_size_sig = self.builtin_function_signatures.update_mem_size(builder.func); + let (vmctx, update_mem_size) = self.translate_load_builtin_function_address( + &mut builder.cursor(), + BuiltinFunctionIndex::update_mem_size(), + ); + builder + .ins() + .call_indirect(update_mem_size_sig, update_mem_size, &[vmctx, num_pages]); + } + } + } else { + fn handle_before_return(&mut self, _retvals: &[Value], builder: &mut FunctionBuilder) { + let _ = self.builtin_function_signatures.check_malloc(builder.func); + let _ = self.builtin_function_signatures.check_free(builder.func); + } + + fn before_load(&mut self, builder: &mut FunctionBuilder, _val_size: u8, _addr: ir::Value, _offset: u64) { + let _ = self.builtin_function_signatures.check_load(builder.func); + } + + fn before_store(&mut self, builder: &mut FunctionBuilder, _val_size: u8, _addr: ir::Value, _offset: u64) { + let _ = self.builtin_function_signatures.check_store(builder.func); + } + + fn update_global(&mut self, builder: &mut FunctionBuilder, _global_index: u32, _value: ir::Value) { + let _ = self.builtin_function_signatures.update_stack_pointer(builder.func); + } + + fn before_memory_grow(&mut self, builder: &mut FunctionBuilder, _num_pages: Value, _mem_index: MemoryIndex) { + let _ = self.builtin_function_signatures.update_mem_size(builder.func); + } + } + } } diff --git a/crates/environ/src/builtin.rs b/crates/environ/src/builtin.rs index 61eb31a1f1..07a5a03ab5 100644 --- a/crates/environ/src/builtin.rs +++ b/crates/environ/src/builtin.rs @@ -51,6 +51,22 @@ macro_rules! foreach_builtin_function { out_of_gas(vmctx: vmctx); /// Invoked when we reach a new epoch. new_epoch(vmctx: vmctx) -> i64; + /// Invoked before malloc returns. + check_malloc(vmctx: vmctx, addr: i32, len: i32) -> i32; + /// Invoked before the free returns. + check_free(vmctx: vmctx, addr: i32) -> i32; + /// Invoked before a load is executed. + check_load(vmctx: vmctx, num_bytes: i32, addr: i32, offset: i32) -> i32; + /// Invoked before a store is executed. + check_store(vmctx: vmctx, num_bytes: i32, addr: i32, offset: i32) -> i32; + /// Invoked after malloc is called. + malloc_start(vmctx: vmctx); + /// Invoked after free is called. + free_start(vmctx: vmctx); + /// Invoked when wasm stack pointer is updated. + update_stack_pointer(vmctx: vmctx, value: i32); + /// Invoked before memory.grow is called. + update_mem_size(vmctx: vmctx, num_bytes: i32); } }; } diff --git a/crates/environ/src/compilation.rs b/crates/environ/src/compilation.rs index 9b79643278..dc5ddc32cf 100644 --- a/crates/environ/src/compilation.rs +++ b/crates/environ/src/compilation.rs @@ -127,6 +127,9 @@ pub trait CompilerBuilder: Send + Sync + fmt::Debug { /// Builds a new [`Compiler`] object from this configuration. fn build(&self) -> Result>; + + /// Enables or disables wmemcheck during runtime according to the wmemcheck CLI flag. + fn wmemcheck(&mut self, _enable: bool) {} } /// Description of compiler settings returned by [`CompilerBuilder::settings`]. diff --git a/crates/runtime/Cargo.toml b/crates/runtime/Cargo.toml index bed617d4e5..50002b88de 100644 --- a/crates/runtime/Cargo.toml +++ b/crates/runtime/Cargo.toml @@ -11,6 +11,7 @@ repository = "https://github.com/bytecodealliance/wasmtime" edition.workspace = true [dependencies] +wasmtime-wmemcheck = { workspace = true } wasmtime-asm-macros = { workspace = true } wasmtime-environ = { workspace = true } wasmtime-fiber = { workspace = true, optional = true } @@ -64,3 +65,5 @@ component-model = [ "wasmtime-environ/component-model", "dep:encoding_rs", ] + +wmemcheck = [] diff --git a/crates/runtime/src/instance.rs b/crates/runtime/src/instance.rs index 4e24aec4dd..7a4c48ffa6 100644 --- a/crates/runtime/src/instance.rs +++ b/crates/runtime/src/instance.rs @@ -29,9 +29,11 @@ use std::{mem, ptr}; use wasmtime_environ::{ packed_option::ReservedValue, DataIndex, DefinedGlobalIndex, DefinedMemoryIndex, DefinedTableIndex, ElemIndex, EntityIndex, EntityRef, EntitySet, FuncIndex, GlobalIndex, - GlobalInit, HostPtr, MemoryIndex, Module, PrimaryMap, SignatureIndex, TableIndex, + GlobalInit, HostPtr, MemoryIndex, MemoryPlan, Module, PrimaryMap, SignatureIndex, TableIndex, TableInitialValue, Trap, VMOffsets, WasmHeapType, WasmRefType, WasmType, VMCONTEXT_MAGIC, }; +#[cfg(feature = "wmemcheck")] +use wasmtime_wmemcheck::Wmemcheck; mod allocator; @@ -140,6 +142,10 @@ pub struct Instance { /// seems not too bad. vmctx_self_reference: SendSyncPtr, + #[cfg(feature = "wmemcheck")] + pub(crate) wmemcheck_state: Option, + // TODO: add support for multiple memories, wmemcheck_state corresponds to + // memory 0. /// Additional context used by compiled wasm code. This field is last, and /// represents a dynamically-sized array that extends beyond the nominal /// end of the struct (similar to a flexible array member). @@ -157,6 +163,7 @@ impl Instance { index: usize, memories: PrimaryMap, tables: PrimaryMap, + memory_plans: &PrimaryMap, ) -> InstanceHandle { // The allocation must be *at least* the size required of `Instance`. let layout = Self::alloc_layout(req.runtime_info.offsets()); @@ -170,6 +177,9 @@ impl Instance { let dropped_elements = EntitySet::with_capacity(module.passive_elements.len()); let dropped_data = EntitySet::with_capacity(module.passive_data_map.len()); + #[cfg(not(feature = "wmemcheck"))] + let _ = memory_plans; + ptr::write( ptr, Instance { @@ -186,6 +196,21 @@ impl Instance { vmctx: VMContext { _marker: std::marker::PhantomPinned, }, + #[cfg(feature = "wmemcheck")] + wmemcheck_state: { + if req.wmemcheck { + let size = memory_plans + .iter() + .next() + .map(|plan| plan.1.memory.minimum) + .unwrap_or(0) + * 64 + * 1024; + Some(Wmemcheck::new(size as usize)) + } else { + None + } + }, }, ); @@ -1126,7 +1151,18 @@ impl Instance { ptr::write(to, VMGlobalDefinition::new()); match *init { - GlobalInit::I32Const(x) => *(*to).as_i32_mut() = x, + GlobalInit::I32Const(x) => { + let index = module.global_index(index); + if index.index() == 0 { + #[cfg(feature = "wmemcheck")] + { + if let Some(wmemcheck) = &mut self.wmemcheck_state { + wmemcheck.set_stack_size(x as usize); + } + } + } + *(*to).as_i32_mut() = x; + } GlobalInit::I64Const(x) => *(*to).as_i64_mut() = x, GlobalInit::F32Const(x) => *(*to).as_f32_bits_mut() = x, GlobalInit::F64Const(x) => *(*to).as_f64_bits_mut() = x, diff --git a/crates/runtime/src/instance/allocator.rs b/crates/runtime/src/instance/allocator.rs index da54195746..4ed72d6964 100644 --- a/crates/runtime/src/instance/allocator.rs +++ b/crates/runtime/src/instance/allocator.rs @@ -53,6 +53,9 @@ pub struct InstanceAllocationRequest<'a> { /// We use a number of `PhantomPinned` declarations to indicate this to the /// compiler. More info on this in `wasmtime/src/store.rs` pub store: StorePtr, + + /// Indicates '--wmemcheck' flag. + pub wmemcheck: bool, } /// A pointer to a Store. This Option<*mut dyn Store> is wrapped in a struct @@ -122,7 +125,15 @@ pub unsafe trait InstanceAllocator { return Err(e); } - unsafe { Ok(Instance::new(req, index, memories, tables)) } + unsafe { + Ok(Instance::new( + req, + index, + memories, + tables, + &module.memory_plans, + )) + } } /// Deallocates the provided instance. diff --git a/crates/runtime/src/instance/allocator/pooling.rs b/crates/runtime/src/instance/allocator/pooling.rs index fd99c0f3b4..c35cf6ffd2 100644 --- a/crates/runtime/src/instance/allocator/pooling.rs +++ b/crates/runtime/src/instance/allocator/pooling.rs @@ -1057,6 +1057,7 @@ mod test { }, host_state: Box::new(()), store: StorePtr::empty(), + wmemcheck: false, }) .expect("allocation should succeed"), ); @@ -1074,6 +1075,7 @@ mod test { }, host_state: Box::new(()), store: StorePtr::empty(), + wmemcheck: false, }) { Err(_) => {} _ => panic!("unexpected error"), diff --git a/crates/runtime/src/libcalls.rs b/crates/runtime/src/libcalls.rs index a4b4db5180..f4c35e1bc5 100644 --- a/crates/runtime/src/libcalls.rs +++ b/crates/runtime/src/libcalls.rs @@ -58,13 +58,20 @@ use crate::externref::VMExternRef; use crate::table::{Table, TableElementType}; use crate::vmcontext::VMFuncRef; use crate::{Instance, TrapReason}; +#[cfg(feature = "wmemcheck")] +use anyhow::bail; use anyhow::Result; +use cfg_if::cfg_if; use std::mem; use std::ptr::{self, NonNull}; use std::time::{Duration, Instant}; use wasmtime_environ::{ DataIndex, ElemIndex, FuncIndex, GlobalIndex, MemoryIndex, TableIndex, Trap, }; +#[cfg(feature = "wmemcheck")] +use wasmtime_wmemcheck::AccessError::{ + DoubleMalloc, InvalidFree, InvalidRead, InvalidWrite, OutOfBounds, +}; /// Actually public trampolines which are used by the runtime as the entrypoint /// for libcalls. @@ -488,6 +495,147 @@ unsafe fn new_epoch(instance: &mut Instance) -> Result { (*instance.store()).new_epoch() } +cfg_if! { + if #[cfg(feature = "wmemcheck")] { + // Hook for validating malloc using wmemcheck_state. + unsafe fn check_malloc(instance: &mut Instance, addr: u32, len: u32) -> Result { + if let Some(wmemcheck_state) = &mut instance.wmemcheck_state { + let result = wmemcheck_state.malloc(addr as usize, len as usize); + wmemcheck_state.memcheck_on(); + match result { + Ok(()) => { + return Ok(0); + } + Err(DoubleMalloc { addr, len }) => { + bail!("Double malloc at addr {:#x} of size {}", addr, len) + } + Err(OutOfBounds { addr, len }) => { + bail!("Malloc out of bounds at addr {:#x} of size {}", addr, len); + } + _ => { + panic!("unreachable") + } + } + } + Ok(0) + } + + // Hook for validating free using wmemcheck_state. + unsafe fn check_free(instance: &mut Instance, addr: u32) -> Result { + if let Some(wmemcheck_state) = &mut instance.wmemcheck_state { + let result = wmemcheck_state.free(addr as usize); + wmemcheck_state.memcheck_on(); + match result { + Ok(()) => { + return Ok(0); + } + Err(InvalidFree { addr }) => { + bail!("Invalid free at addr {:#x}", addr) + } + _ => { + panic!("unreachable") + } + } + } + Ok(0) + } + + // Hook for validating load using wmemcheck_state. + fn check_load(instance: &mut Instance, num_bytes: u32, addr: u32, offset: u32) -> Result { + if let Some(wmemcheck_state) = &mut instance.wmemcheck_state { + let result = wmemcheck_state.read(addr as usize + offset as usize, num_bytes as usize); + match result { + Ok(()) => { + return Ok(0); + } + Err(InvalidRead { addr, len }) => { + bail!("Invalid load at addr {:#x} of size {}", addr, len); + } + Err(OutOfBounds { addr, len }) => { + bail!("Load out of bounds at addr {:#x} of size {}", addr, len); + } + _ => { + panic!("unreachable") + } + } + } + Ok(0) + } + + // Hook for validating store using wmemcheck_state. + fn check_store(instance: &mut Instance, num_bytes: u32, addr: u32, offset: u32) -> Result { + if let Some(wmemcheck_state) = &mut instance.wmemcheck_state { + let result = wmemcheck_state.write(addr as usize + offset as usize, num_bytes as usize); + match result { + Ok(()) => { + return Ok(0); + } + Err(InvalidWrite { addr, len }) => { + bail!("Invalid store at addr {:#x} of size {}", addr, len) + } + Err(OutOfBounds { addr, len }) => { + bail!("Store out of bounds at addr {:#x} of size {}", addr, len) + } + _ => { + panic!("unreachable") + } + } + } + Ok(0) + } + + // Hook for turning wmemcheck load/store validation off when entering a malloc function. + fn malloc_start(instance: &mut Instance) { + if let Some(wmemcheck_state) = &mut instance.wmemcheck_state { + wmemcheck_state.memcheck_off(); + } + } + + // Hook for turning wmemcheck load/store validation off when entering a free function. + fn free_start(instance: &mut Instance) { + if let Some(wmemcheck_state) = &mut instance.wmemcheck_state { + wmemcheck_state.memcheck_off(); + } + } + + // Hook for tracking wasm stack updates using wmemcheck_state. + fn update_stack_pointer(_instance: &mut Instance, _value: u32) { + // TODO: stack-tracing has yet to be finalized. All memory below + // the address of the top of the stack is marked as valid for + // loads and stores. + // if let Some(wmemcheck_state) = &mut instance.wmemcheck_state { + // instance.wmemcheck_state.update_stack_pointer(value as usize); + // } + } + + // Hook updating wmemcheck_state memory state vector every time memory.grow is called. + fn update_mem_size(instance: &mut Instance, num_pages: u32) { + if let Some(wmemcheck_state) = &mut instance.wmemcheck_state { + const KIB: usize = 1024; + let num_bytes = num_pages as usize * 64 * KIB; + wmemcheck_state.update_mem_size(num_bytes); + } + } + } else { + // No-op for all wmemcheck hooks. + unsafe fn check_malloc(_instance: &mut Instance, _addr: u32, _len: u32) -> Result { Ok(0) } + + unsafe fn check_free(_instance: &mut Instance, _addr: u32) -> Result { Ok(0) } + + fn check_load(_instance: &mut Instance, _num_bytes: u32, _addr: u32, _offset: u32) -> Result { Ok(0) } + + fn check_store(_instance: &mut Instance, _num_bytes: u32, _addr: u32, _offset: u32) -> Result { Ok(0) } + + fn malloc_start(_instance: &mut Instance) {} + + fn free_start(_instance: &mut Instance) {} + + fn update_stack_pointer(_instance: &mut Instance, _value: u32) {} + + fn update_mem_size(_instance: &mut Instance, _num_pages: u32) {} + } +} + /// This module contains functions which are used for resolving relocations at /// runtime if necessary. /// diff --git a/crates/runtime/src/trampolines/s390x.S b/crates/runtime/src/trampolines/s390x.S index b6b8bf01d0..3cf80d9d6e 100644 --- a/crates/runtime/src/trampolines/s390x.S +++ b/crates/runtime/src/trampolines/s390x.S @@ -60,3 +60,11 @@ LIBCALL_TRAMPOLINE(memory_atomic_wait32, impl_memory_atomic_wait32) LIBCALL_TRAMPOLINE(memory_atomic_wait64, impl_memory_atomic_wait64) LIBCALL_TRAMPOLINE(out_of_gas, impl_out_of_gas) LIBCALL_TRAMPOLINE(new_epoch, impl_new_epoch) +LIBCALL_TRAMPOLINE(check_malloc, impl_check_malloc) +LIBCALL_TRAMPOLINE(check_free, impl_check_free) +LIBCALL_TRAMPOLINE(check_load, impl_check_load) +LIBCALL_TRAMPOLINE(check_store, impl_check_store) +LIBCALL_TRAMPOLINE(malloc_start, impl_malloc_start) +LIBCALL_TRAMPOLINE(free_start, impl_free_start) +LIBCALL_TRAMPOLINE(update_stack_pointer, impl_update_stack_pointer) +LIBCALL_TRAMPOLINE(update_mem_size, impl_update_mem_size) diff --git a/crates/wasmtime/Cargo.toml b/crates/wasmtime/Cargo.toml index dea939a293..bbe6865ae7 100644 --- a/crates/wasmtime/Cargo.toml +++ b/crates/wasmtime/Cargo.toml @@ -130,3 +130,5 @@ component-model = [ "dep:wasmtime-component-util", "dep:encoding_rs", ] + +wmemcheck = ["wasmtime-runtime/wmemcheck", "wasmtime-cranelift/wmemcheck"] \ No newline at end of file diff --git a/crates/wasmtime/src/config.rs b/crates/wasmtime/src/config.rs index 21adc3582b..d86c96a82b 100644 --- a/crates/wasmtime/src/config.rs +++ b/crates/wasmtime/src/config.rs @@ -110,6 +110,7 @@ pub struct Config { pub(crate) memory_init_cow: bool, pub(crate) memory_guaranteed_dense_image_size: u64, pub(crate) force_memory_init_memfd: bool, + pub(crate) wmemcheck: bool, pub(crate) coredump_on_trap: bool, pub(crate) macos_use_mach_ports: bool, } @@ -125,6 +126,7 @@ struct CompilerConfig { #[cfg(any(feature = "cranelift", feature = "winch"))] cache_store: Option>, clif_dir: Option, + wmemcheck: bool, } #[cfg(any(feature = "cranelift", feature = "winch"))] @@ -137,6 +139,7 @@ impl CompilerConfig { flags: HashSet::new(), cache_store: None, clif_dir: None, + wmemcheck: false, } } @@ -201,6 +204,7 @@ impl Config { memory_init_cow: true, memory_guaranteed_dense_image_size: 16 << 20, force_memory_init_memfd: false, + wmemcheck: false, coredump_on_trap: false, macos_use_mach_ports: true, }; @@ -1480,6 +1484,16 @@ impl Config { self } + /// Enables memory error checking for wasm programs. + /// + /// This option is disabled by default. + #[cfg(any(feature = "cranelift", feature = "winch"))] + pub fn wmemcheck(&mut self, enable: bool) -> &mut Self { + self.wmemcheck = enable; + self.compiler_config.wmemcheck = enable; + self + } + /// Configures the "guaranteed dense image size" for copy-on-write /// initialized memories. /// @@ -1539,6 +1553,10 @@ impl Config { { bail!("static memory guard size cannot be smaller than dynamic memory guard size"); } + #[cfg(not(feature = "wmemcheck"))] + if self.wmemcheck { + bail!("wmemcheck (memory checker) was requested but is not enabled in this build"); + } Ok(()) } @@ -1676,6 +1694,7 @@ impl Config { } compiler.set_tunables(self.tunables.clone())?; + compiler.wmemcheck(self.compiler_config.wmemcheck); Ok((self, compiler.build()?)) } diff --git a/crates/wasmtime/src/instance.rs b/crates/wasmtime/src/instance.rs index 6995fc0936..13cde5749d 100644 --- a/crates/wasmtime/src/instance.rs +++ b/crates/wasmtime/src/instance.rs @@ -274,6 +274,7 @@ impl Instance { imports, host_state: Box::new(Instance(instance_to_be)), store: StorePtr::new(store.traitobj()), + wmemcheck: store.engine().config().wmemcheck, })?; // The instance still has lots of setup, for example diff --git a/crates/wasmtime/src/module.rs b/crates/wasmtime/src/module.rs index 09b5dbc388..7d525d99c5 100644 --- a/crates/wasmtime/src/module.rs +++ b/crates/wasmtime/src/module.rs @@ -1143,6 +1143,7 @@ impl std::hash::Hash for HashedEngineCompileEnv<'_> { let config = self.0.config(); config.tunables.hash(hasher); config.features.hash(hasher); + config.wmemcheck.hash(hasher); // Catch accidental bugs of reusing across crate versions. config.module_version.hash(hasher); diff --git a/crates/wasmtime/src/store.rs b/crates/wasmtime/src/store.rs index 92e6be3921..788b69608a 100644 --- a/crates/wasmtime/src/store.rs +++ b/crates/wasmtime/src/store.rs @@ -511,6 +511,7 @@ impl Store { imports: Default::default(), store: StorePtr::empty(), runtime_info: &shim, + wmemcheck: engine.config().wmemcheck, }) .expect("failed to allocate default callee"); diff --git a/crates/wasmtime/src/trampoline.rs b/crates/wasmtime/src/trampoline.rs index cc94351313..0c7e459336 100644 --- a/crates/wasmtime/src/trampoline.rs +++ b/crates/wasmtime/src/trampoline.rs @@ -47,6 +47,7 @@ fn create_handle( host_state, store: StorePtr::new(store.traitobj()), runtime_info, + wmemcheck: false, }, )?; diff --git a/crates/wasmtime/src/trampoline/memory.rs b/crates/wasmtime/src/trampoline/memory.rs index e10e26bccf..3bd085d966 100644 --- a/crates/wasmtime/src/trampoline/memory.rs +++ b/crates/wasmtime/src/trampoline/memory.rs @@ -56,6 +56,7 @@ pub fn create_memory( host_state, store: StorePtr::new(store.traitobj()), runtime_info, + wmemcheck: false, }; unsafe { diff --git a/crates/wmemcheck/Cargo.toml b/crates/wmemcheck/Cargo.toml new file mode 100644 index 0000000000..af62cc4541 --- /dev/null +++ b/crates/wmemcheck/Cargo.toml @@ -0,0 +1,11 @@ +[package] +name = "wasmtime-wmemcheck" +version.workspace = true +authors.workspace = true +description = "Memcheck implementation for Wasmtime" +license = "Apache-2.0 WITH LLVM-exception" +repository = "https://github.com/bytecodealliance/wasmtime" +documentation = "https://docs.rs/wasmtime-cranelift/" +edition.workspace = true + +[dependencies] diff --git a/crates/wmemcheck/src/lib.rs b/crates/wmemcheck/src/lib.rs new file mode 100644 index 0000000000..742f090b68 --- /dev/null +++ b/crates/wmemcheck/src/lib.rs @@ -0,0 +1,404 @@ +use std::cmp::*; +use std::collections::HashMap; + +/// Memory checker for wasm guest. +pub struct Wmemcheck { + metadata: Vec, + mallocs: HashMap, + pub stack_pointer: usize, + max_stack_size: usize, + pub flag: bool, +} + +/// Error types for memory checker. +#[derive(Debug, PartialEq)] +pub enum AccessError { + /// Malloc over already malloc'd memory. + DoubleMalloc { addr: usize, len: usize }, + /// Read from uninitialized or undefined memory. + InvalidRead { addr: usize, len: usize }, + /// Write to uninitialized memory. + InvalidWrite { addr: usize, len: usize }, + /// Free of non-malloc'd pointer. + InvalidFree { addr: usize }, + /// Access out of bounds of heap or stack. + OutOfBounds { addr: usize, len: usize }, +} + +/// Memory state for memory checker. +#[derive(Debug, Clone, PartialEq)] +pub enum MemState { + /// Unallocated memory. + Unallocated, + /// Initialized but undefined memory. + ValidToWrite, + /// Initialized and defined memory. + ValidToReadWrite, +} + +impl Wmemcheck { + /// Initializes memory checker instance. + pub fn new(mem_size: usize) -> Wmemcheck { + let metadata = vec![MemState::Unallocated; mem_size]; + let mallocs = HashMap::new(); + Wmemcheck { + metadata, + mallocs, + stack_pointer: 0, + max_stack_size: 0, + flag: true, + } + } + + /// Updates memory checker memory state metadata when malloc is called. + pub fn malloc(&mut self, addr: usize, len: usize) -> Result<(), AccessError> { + if !self.is_in_bounds_heap(addr, len) { + return Err(AccessError::OutOfBounds { + addr: addr, + len: len, + }); + } + for i in addr..addr + len { + match self.metadata[i] { + MemState::ValidToWrite => { + return Err(AccessError::DoubleMalloc { + addr: addr, + len: len, + }); + } + MemState::ValidToReadWrite => { + return Err(AccessError::DoubleMalloc { + addr: addr, + len: len, + }); + } + _ => {} + } + } + for i in addr..addr + len { + self.metadata[i] = MemState::ValidToWrite; + } + self.mallocs.insert(addr, len); + Ok(()) + } + + /// Updates memory checker memory state metadata when a load occurs. + pub fn read(&mut self, addr: usize, len: usize) -> Result<(), AccessError> { + if !self.flag { + return Ok(()); + } + if !(self.is_in_bounds_stack(addr, len) || self.is_in_bounds_heap(addr, len)) { + return Err(AccessError::OutOfBounds { + addr: addr, + len: len, + }); + } + for i in addr..addr + len { + match self.metadata[i] { + MemState::Unallocated => { + return Err(AccessError::InvalidRead { + addr: addr, + len: len, + }); + } + MemState::ValidToWrite => { + return Err(AccessError::InvalidRead { + addr: addr, + len: len, + }); + } + _ => {} + } + } + Ok(()) + } + + /// Updates memory checker memory state metadata when a store occurs. + pub fn write(&mut self, addr: usize, len: usize) -> Result<(), AccessError> { + if !self.flag { + return Ok(()); + } + if !(self.is_in_bounds_stack(addr, len) || self.is_in_bounds_heap(addr, len)) { + return Err(AccessError::OutOfBounds { + addr: addr, + len: len, + }); + } + for i in addr..addr + len { + if let MemState::Unallocated = self.metadata[i] { + return Err(AccessError::InvalidWrite { + addr: addr, + len: len, + }); + } + } + for i in addr..addr + len { + self.metadata[i] = MemState::ValidToReadWrite; + } + Ok(()) + } + + /// Updates memory checker memory state metadata when free is called. + pub fn free(&mut self, addr: usize) -> Result<(), AccessError> { + if !self.mallocs.contains_key(&addr) { + return Err(AccessError::InvalidFree { addr: addr }); + } + let len = self.mallocs[&addr]; + for i in addr..addr + len { + if let MemState::Unallocated = self.metadata[i] { + return Err(AccessError::InvalidFree { addr: addr }); + } + } + self.mallocs.remove(&addr); + for i in addr..addr + len { + self.metadata[i] = MemState::Unallocated; + } + Ok(()) + } + + fn is_in_bounds_heap(&self, addr: usize, len: usize) -> bool { + self.max_stack_size <= addr && addr + len <= self.metadata.len() + } + + fn is_in_bounds_stack(&self, addr: usize, len: usize) -> bool { + self.stack_pointer <= addr && addr + len < self.max_stack_size + } + + /// Updates memory checker metadata when stack pointer is updated. + pub fn update_stack_pointer(&mut self, new_sp: usize) -> Result<(), AccessError> { + if new_sp > self.max_stack_size { + return Err(AccessError::OutOfBounds { + addr: self.stack_pointer, + len: new_sp - self.stack_pointer, + }); + } else if new_sp < self.stack_pointer { + for i in new_sp..self.stack_pointer + 1 { + self.metadata[i] = MemState::ValidToReadWrite; + } + } else { + for i in self.stack_pointer..new_sp { + self.metadata[i] = MemState::Unallocated; + } + } + self.stack_pointer = new_sp; + Ok(()) + } + + /// Turns memory checking on. + pub fn memcheck_on(&mut self) { + self.flag = true; + } + + /// Turns memory checking off. + pub fn memcheck_off(&mut self) { + self.flag = false; + } + + /// Initializes stack and stack pointer in memory checker metadata. + pub fn set_stack_size(&mut self, stack_size: usize) { + self.max_stack_size = stack_size + 1; + // TODO: temporary solution to initialize the entire stack + // while keeping stack tracing plumbing in place + self.stack_pointer = stack_size; + let _ = self.update_stack_pointer(0); + } + + /// Updates memory checker metadata size when memory.grow is called. + pub fn update_mem_size(&mut self, num_bytes: usize) { + let to_append = vec![MemState::Unallocated; num_bytes]; + self.metadata.extend(to_append); + } +} + +#[test] +fn basic_wmemcheck() { + let mut wmemcheck_state = Wmemcheck::new(640 * 1024); + + wmemcheck_state.set_stack_size(1024); + assert!(wmemcheck_state.malloc(0x1000, 32).is_ok()); + assert!(wmemcheck_state.write(0x1000, 4).is_ok()); + assert!(wmemcheck_state.read(0x1000, 4).is_ok()); + assert_eq!(wmemcheck_state.mallocs, HashMap::from([(0x1000, 32)])); + assert!(wmemcheck_state.free(0x1000).is_ok()); + assert!(wmemcheck_state.mallocs.is_empty()); +} + +#[test] +fn read_before_initializing() { + let mut wmemcheck_state = Wmemcheck::new(640 * 1024); + + assert!(wmemcheck_state.malloc(0x1000, 32).is_ok()); + assert_eq!( + wmemcheck_state.read(0x1000, 4), + Err(AccessError::InvalidRead { + addr: 0x1000, + len: 4 + }) + ); + assert!(wmemcheck_state.write(0x1000, 4).is_ok()); + assert!(wmemcheck_state.free(0x1000).is_ok()); +} + +#[test] +fn use_after_free() { + let mut wmemcheck_state = Wmemcheck::new(640 * 1024); + + assert!(wmemcheck_state.malloc(0x1000, 32).is_ok()); + assert!(wmemcheck_state.write(0x1000, 4).is_ok()); + assert!(wmemcheck_state.write(0x1000, 4).is_ok()); + assert!(wmemcheck_state.free(0x1000).is_ok()); + assert_eq!( + wmemcheck_state.write(0x1000, 4), + Err(AccessError::InvalidWrite { + addr: 0x1000, + len: 4 + }) + ); +} + +#[test] +fn double_free() { + let mut wmemcheck_state = Wmemcheck::new(640 * 1024); + + assert!(wmemcheck_state.malloc(0x1000, 32).is_ok()); + assert!(wmemcheck_state.write(0x1000, 4).is_ok()); + assert!(wmemcheck_state.free(0x1000).is_ok()); + assert_eq!( + wmemcheck_state.free(0x1000), + Err(AccessError::InvalidFree { addr: 0x1000 }) + ); +} + +#[test] +fn out_of_bounds_malloc() { + let mut wmemcheck_state = Wmemcheck::new(640 * 1024); + + assert_eq!( + wmemcheck_state.malloc(640 * 1024, 1), + Err(AccessError::OutOfBounds { + addr: 640 * 1024, + len: 1 + }) + ); + assert_eq!( + wmemcheck_state.malloc(640 * 1024 - 10, 15), + Err(AccessError::OutOfBounds { + addr: 640 * 1024 - 10, + len: 15 + }) + ); + assert!(wmemcheck_state.mallocs.is_empty()); +} + +#[test] +fn out_of_bounds_read() { + let mut wmemcheck_state = Wmemcheck::new(640 * 1024); + + assert!(wmemcheck_state.malloc(640 * 1024 - 24, 24).is_ok()); + assert_eq!( + wmemcheck_state.read(640 * 1024 - 24, 25), + Err(AccessError::OutOfBounds { + addr: 640 * 1024 - 24, + len: 25 + }) + ); +} + +#[test] +fn double_malloc() { + let mut wmemcheck_state = Wmemcheck::new(640 * 1024); + + assert!(wmemcheck_state.malloc(0x1000, 32).is_ok()); + assert_eq!( + wmemcheck_state.malloc(0x1000, 32), + Err(AccessError::DoubleMalloc { + addr: 0x1000, + len: 32 + }) + ); + assert_eq!( + wmemcheck_state.malloc(0x1002, 32), + Err(AccessError::DoubleMalloc { + addr: 0x1002, + len: 32 + }) + ); + assert!(wmemcheck_state.free(0x1000).is_ok()); +} + +#[test] +fn error_type() { + let mut wmemcheck_state = Wmemcheck::new(640 * 1024); + + assert!(wmemcheck_state.malloc(0x1000, 32).is_ok()); + assert_eq!( + wmemcheck_state.malloc(0x1000, 32), + Err(AccessError::DoubleMalloc { + addr: 0x1000, + len: 32 + }) + ); + assert_eq!( + wmemcheck_state.malloc(640 * 1024, 32), + Err(AccessError::OutOfBounds { + addr: 640 * 1024, + len: 32 + }) + ); + assert!(wmemcheck_state.free(0x1000).is_ok()); +} + +#[test] +fn update_sp_no_error() { + let mut wmemcheck_state = Wmemcheck::new(640 * 1024); + + wmemcheck_state.set_stack_size(1024); + assert!(wmemcheck_state.update_stack_pointer(768).is_ok()); + assert_eq!(wmemcheck_state.stack_pointer, 768); + assert!(wmemcheck_state.malloc(1024 * 2, 32).is_ok()); + assert!(wmemcheck_state.free(1024 * 2).is_ok()); + assert!(wmemcheck_state.update_stack_pointer(896).is_ok()); + assert_eq!(wmemcheck_state.stack_pointer, 896); + assert!(wmemcheck_state.update_stack_pointer(1024).is_ok()); +} + +#[test] +fn bad_stack_malloc() { + let mut wmemcheck_state = Wmemcheck::new(640 * 1024); + + wmemcheck_state.set_stack_size(1024); + + assert!(wmemcheck_state.update_stack_pointer(0).is_ok()); + assert_eq!(wmemcheck_state.stack_pointer, 0); + assert_eq!( + wmemcheck_state.malloc(512, 32), + Err(AccessError::OutOfBounds { addr: 512, len: 32 }) + ); + assert_eq!( + wmemcheck_state.malloc(1022, 32), + Err(AccessError::OutOfBounds { + addr: 1022, + len: 32 + }) + ); +} + +#[test] +fn stack_full_empty() { + let mut wmemcheck_state = Wmemcheck::new(640 * 1024); + + wmemcheck_state.set_stack_size(1024); + + assert!(wmemcheck_state.update_stack_pointer(0).is_ok()); + assert_eq!(wmemcheck_state.stack_pointer, 0); + assert!(wmemcheck_state.update_stack_pointer(1024).is_ok()); + assert_eq!(wmemcheck_state.stack_pointer, 1024) +} + +#[test] +fn from_test_program() { + let mut wmemcheck_state = Wmemcheck::new(1024 * 1024 * 128); + wmemcheck_state.set_stack_size(70864); + assert!(wmemcheck_state.write(70832, 1).is_ok()); + assert!(wmemcheck_state.read(1138, 1).is_ok()); +} diff --git a/docs/wmemcheck.md b/docs/wmemcheck.md new file mode 100644 index 0000000000..40ba26e894 --- /dev/null +++ b/docs/wmemcheck.md @@ -0,0 +1,8 @@ + +Wmemcheck provides debug output for invalid mallocs, reads, and writes. + +How to use: +1. When building Wasmtime, add the CLI flag "--features wmemcheck" to compile with wmemcheck configured. + > cargo build --features wmemcheck +2. When running your wasm module, add the CLI flag "--wmemcheck". + > wasmtime run --wmemcheck test.wasm diff --git a/scripts/publish.rs b/scripts/publish.rs index 1f3ebdafe6..598dd40c09 100644 --- a/scripts/publish.rs +++ b/scripts/publish.rs @@ -52,6 +52,7 @@ const CRATES_TO_PUBLISH: &[&str] = &[ "wasmtime-jit-debug", "wasmtime-fiber", "wasmtime-environ", + "wasmtime-wmemcheck", "wasmtime-runtime", "wasmtime-cranelift-shared", "wasmtime-cranelift", diff --git a/src/commands/run.rs b/src/commands/run.rs index 8a124291a0..56666a19a5 100644 --- a/src/commands/run.rs +++ b/src/commands/run.rs @@ -224,6 +224,12 @@ pub struct RunCommand { #[clap(long)] trap_on_grow_failure: bool, + /// Enables memory error checking. + /// + /// See wmemcheck.md for documentation on how to use. + #[clap(long)] + wmemcheck: bool, + /// The WebAssembly module to run and arguments to pass to it. /// /// Arguments passed to the wasm module will be configured as WASI CLI @@ -260,6 +266,8 @@ impl RunCommand { None => {} } + config.wmemcheck(self.wmemcheck); + let engine = Engine::new(&config)?; let preopen_sockets = self.compute_preopen_sockets()?;