use criterion::measurement::WallTime; use criterion::{criterion_group, criterion_main, BenchmarkGroup, Criterion}; use std::fmt::Debug; use std::future::Future; use std::pin::Pin; use std::task::{Context, Poll, RawWaker, RawWakerVTable, Waker}; use std::time::Instant; use wasmtime::*; criterion_main!(benches); criterion_group!(benches, measure_execution_time); fn measure_execution_time(c: &mut Criterion) { host_to_wasm(c); wasm_to_host(c); } #[derive(Copy, Clone)] enum IsAsync { Yes, YesPooling, No, } impl IsAsync { fn desc(&self) -> &str { match self { IsAsync::Yes => "async", IsAsync::YesPooling => "async-pool", IsAsync::No => "sync", } } fn use_async(&self) -> bool { match self { IsAsync::Yes | IsAsync::YesPooling => true, IsAsync::No => false, } } } fn engines() -> Vec<(Engine, IsAsync)> { let mut config = Config::new(); vec![ (Engine::new(&config).unwrap(), IsAsync::No), ( Engine::new(config.async_support(true)).unwrap(), IsAsync::Yes, ), ( Engine::new(config.allocation_strategy(InstanceAllocationStrategy::pooling())).unwrap(), IsAsync::YesPooling, ), ] } /// Benchmarks the overhead of calling WebAssembly from the host in various /// configurations. fn host_to_wasm(c: &mut Criterion) { for (engine, is_async) in engines() { let mut store = Store::new(&engine, ()); let module = Module::new( &engine, r#"(module (func (export "nop")) (func (export "nop-params-and-results") (param i32 i64) (result f32) f32.const 0) )"#, ) .unwrap(); let instance = if is_async.use_async() { run_await(Instance::new_async(&mut store, &module, &[])).unwrap() } else { Instance::new(&mut store, &module, &[]).unwrap() }; let bench_calls = |group: &mut BenchmarkGroup<'_, WallTime>, store: &mut Store<()>| { // Bench the overhead of a function that has no parameters or results bench_host_to_wasm::<(), ()>(group, store, &instance, is_async, "nop", (), ()); // Bench the overhead of a function that has some parameters and just // one result (will use the raw system-v convention on applicable // platforms). bench_host_to_wasm::<(i32, i64), (f32,)>( group, store, &instance, is_async, "nop-params-and-results", (0, 0), (0.0,), ); }; // Bench once without any call hooks configured let name = format!("{}/no-hook", is_async.desc()); bench_calls(&mut c.benchmark_group(&name), &mut store); // Bench again with a "call hook" enabled store.call_hook(|_, _| Ok(())); let name = format!("{}/hook-sync", is_async.desc()); bench_calls(&mut c.benchmark_group(&name), &mut store); } } fn bench_host_to_wasm( c: &mut BenchmarkGroup<'_, WallTime>, store: &mut Store<()>, instance: &Instance, is_async: IsAsync, name: &str, typed_params: Params, typed_results: Results, ) where Params: WasmParams + ToVals + Copy, Results: WasmResults + ToVals + Copy + PartialEq + Debug, { // Benchmark the "typed" version, which should be faster than the versions // below. c.bench_function(&format!("host-to-wasm - typed - {}", name), |b| { let typed = instance .get_typed_func::(&mut *store, name) .unwrap(); b.iter(|| { let results = if is_async.use_async() { run_await(typed.call_async(&mut *store, typed_params)).unwrap() } else { typed.call(&mut *store, typed_params).unwrap() }; assert_eq!(results, typed_results); }) }); // Benchmark the "untyped" version which should be the slowest of the three // here, but not unduly slow. c.bench_function(&format!("host-to-wasm - untyped - {}", name), |b| { let untyped = instance.get_func(&mut *store, name).unwrap(); let params = typed_params.to_vals(); let expected_results = typed_results.to_vals(); let mut results = vec![Val::I32(0); expected_results.len()]; b.iter(|| { if is_async.use_async() { run_await(untyped.call_async(&mut *store, ¶ms, &mut results)).unwrap(); } else { untyped.call(&mut *store, ¶ms, &mut results).unwrap(); } for (expected, actual) in expected_results.iter().zip(&results) { assert_vals_eq(expected, actual); } }) }); // Currently `call_async_unchecked` isn't implemented, so can't benchmark // below if is_async.use_async() { return; } // Benchmark the "unchecked" version which should be between the above two, // but is unsafe. c.bench_function(&format!("host-to-wasm - unchecked - {}", name), |b| { let untyped = instance.get_func(&mut *store, name).unwrap(); let params = typed_params.to_vals(); let results = typed_results.to_vals(); let mut space = vec![ValRaw::i32(0); params.len().max(results.len())]; b.iter(|| unsafe { for (i, param) in params.iter().enumerate() { space[i] = param.to_raw(&mut *store); } untyped .call_unchecked(&mut *store, space.as_mut_ptr(), space.len()) .unwrap(); for (i, expected) in results.iter().enumerate() { assert_vals_eq( expected, &Val::from_raw(&mut *store, space[i], expected.ty()), ); } }) }); } /// Benchmarks the overhead of calling the host from WebAssembly itself fn wasm_to_host(c: &mut Criterion) { let module = r#"(module ;; host imports with a variety of parameters/arguments (import "" "nop" (func $nop)) (import "" "nop-params-and-results" (func $nop_params_and_results (param i32 i64) (result f32)) ) ;; "runner functions" for each of the above imports. Each runner ;; function takes the number of times to call the host function as ;; the duration of this entire loop will be measured. (func (export "run-nop") (param i64) loop call $nop local.get 0 ;; decrement & break if necessary i64.const -1 i64.add local.tee 0 i64.const 0 i64.ne br_if 0 end ) (func (export "run-nop-params-and-results") (param i64) loop i32.const 0 ;; always zero parameters i64.const 0 call $nop_params_and_results f32.const 0 ;; assert the correct result f32.eq i32.eqz if unreachable end local.get 0 ;; decrement & break if necessary i64.const -1 i64.add local.tee 0 i64.const 0 i64.ne br_if 0 end ) )"#; for (engine, is_async) in engines() { let mut store = Store::new(&engine, ()); let module = Module::new(&engine, module).unwrap(); bench_calls( &mut c.benchmark_group(&format!("{}/no-hook", is_async.desc())), &mut store, &module, is_async, ); store.call_hook(|_, _| Ok(())); bench_calls( &mut c.benchmark_group(&format!("{}/hook-sync", is_async.desc())), &mut store, &module, is_async, ); } // Given a `Store` will create various instances hooked up to different ways // of defining host imports to benchmark their overhead. fn bench_calls( group: &mut BenchmarkGroup<'_, WallTime>, store: &mut Store<()>, module: &Module, is_async: IsAsync, ) { let engine = store.engine().clone(); let mut typed = Linker::new(&engine); typed.func_wrap("", "nop", || {}).unwrap(); typed .func_wrap("", "nop-params-and-results", |x: i32, y: i64| { assert_eq!(x, 0); assert_eq!(y, 0); 0.0f32 }) .unwrap(); let instance = if is_async.use_async() { run_await(typed.instantiate_async(&mut *store, &module)).unwrap() } else { typed.instantiate(&mut *store, &module).unwrap() }; bench_instance(group, store, &instance, "typed", is_async); let mut untyped = Linker::new(&engine); untyped .func_new("", "nop", FuncType::new([], []), |_, _, _| Ok(())) .unwrap(); let ty = FuncType::new([ValType::I32, ValType::I64], [ValType::F32]); untyped .func_new( "", "nop-params-and-results", ty, |_caller, params, results| { assert_eq!(params.len(), 2); match params[0] { Val::I32(0) => {} _ => unreachable!(), } match params[1] { Val::I64(0) => {} _ => unreachable!(), } assert_eq!(results.len(), 1); results[0] = Val::F32(0); Ok(()) }, ) .unwrap(); let instance = if is_async.use_async() { run_await(untyped.instantiate_async(&mut *store, &module)).unwrap() } else { untyped.instantiate(&mut *store, &module).unwrap() }; bench_instance(group, store, &instance, "untyped", is_async); unsafe { let mut unchecked = Linker::new(&engine); unchecked .func_new_unchecked("", "nop", FuncType::new([], []), |_, _| Ok(())) .unwrap(); let ty = FuncType::new([ValType::I32, ValType::I64], [ValType::F32]); unchecked .func_new_unchecked("", "nop-params-and-results", ty, |mut caller, space| { match Val::from_raw(&mut caller, space[0], ValType::I32) { Val::I32(0) => {} _ => unreachable!(), } match Val::from_raw(&mut caller, space[1], ValType::I64) { Val::I64(0) => {} _ => unreachable!(), } space[0] = Val::F32(0).to_raw(&mut caller); Ok(()) }) .unwrap(); let instance = if is_async.use_async() { run_await(unchecked.instantiate_async(&mut *store, &module)).unwrap() } else { unchecked.instantiate(&mut *store, &module).unwrap() }; bench_instance(group, store, &instance, "unchecked", is_async); } // Only define async host imports if allowed if !is_async.use_async() { return; } let mut typed = Linker::new(&engine); typed .func_wrap0_async("", "nop", |caller| { Box::new(async { drop(caller); }) }) .unwrap(); typed .func_wrap2_async("", "nop-params-and-results", |_caller, x: i32, y: i64| { Box::new(async move { assert_eq!(x, 0); assert_eq!(y, 0); 0.0f32 }) }) .unwrap(); let instance = run_await(typed.instantiate_async(&mut *store, &module)).unwrap(); bench_instance(group, store, &instance, "async-typed", is_async); } // Given a specific instance executes all of the "runner functions" fn bench_instance( group: &mut BenchmarkGroup<'_, WallTime>, store: &mut Store<()>, instance: &Instance, desc: &str, is_async: IsAsync, ) { group.bench_function(&format!("wasm-to-host - {} - nop", desc), |b| { let run = instance .get_typed_func::(&mut *store, "run-nop") .unwrap(); b.iter_custom(|iters| { let start = Instant::now(); if is_async.use_async() { run_await(run.call_async(&mut *store, iters)).unwrap(); } else { run.call(&mut *store, iters).unwrap(); } start.elapsed() }) }); group.bench_function( &format!("wasm-to-host - {} - nop-params-and-results", desc), |b| { let run = instance .get_typed_func::(&mut *store, "run-nop-params-and-results") .unwrap(); b.iter_custom(|iters| { let start = Instant::now(); if is_async.use_async() { run_await(run.call_async(&mut *store, iters)).unwrap(); } else { run.call(&mut *store, iters).unwrap(); } start.elapsed() }) }, ); } } fn assert_vals_eq(a: &Val, b: &Val) { match (a, b) { (Val::I32(a), Val::I32(b)) => assert_eq!(a, b), (Val::I64(a), Val::I64(b)) => assert_eq!(a, b), (Val::F32(a), Val::F32(b)) => assert_eq!(a, b), (Val::F64(a), Val::F64(b)) => assert_eq!(a, b), _ => unimplemented!(), } } trait ToVals { fn to_vals(&self) -> Vec; } macro_rules! tuples { ($($t:ident)*) => ( #[allow(non_snake_case)] impl<$($t:Copy + Into,)*> ToVals for ($($t,)*) { fn to_vals(&self) -> Vec { let mut _dst = Vec::new(); let ($($t,)*) = *self; $(_dst.push($t.into());)* _dst } } ) } tuples!(); tuples!(A); tuples!(A B); tuples!(A B C); fn run_await(future: F) -> F::Output { let mut f = Pin::from(Box::new(future)); let waker = dummy_waker(); let mut cx = Context::from_waker(&waker); loop { match f.as_mut().poll(&mut cx) { Poll::Ready(val) => break val, Poll::Pending => {} } } } fn dummy_waker() -> Waker { return unsafe { Waker::from_raw(clone(5 as *const _)) }; unsafe fn clone(ptr: *const ()) -> RawWaker { assert_eq!(ptr as usize, 5); const VTABLE: RawWakerVTable = RawWakerVTable::new(clone, wake, wake_by_ref, drop); RawWaker::new(ptr, &VTABLE) } unsafe fn wake(ptr: *const ()) { assert_eq!(ptr as usize, 5); } unsafe fn wake_by_ref(ptr: *const ()) { assert_eq!(ptr as usize, 5); } unsafe fn drop(ptr: *const ()) { assert_eq!(ptr as usize, 5); } }