cranelift/benches/trap.rs

use anyhow::Result;
use criterion::*;
use wasmtime::*;

criterion_main!(benches);
criterion_group!(benches, bench_traps);

fn bench_traps(c: &mut Criterion) {
    bench_multi_threaded_traps(c);
    bench_many_modules_registered_traps(c);
    bench_many_stack_frames_traps(c);
    bench_host_wasm_frames_traps(c);
}

fn bench_multi_threaded_traps(c: &mut Criterion) {
    let mut group = c.benchmark_group("multi-threaded-traps");

    for num_bg_threads in vec![0, 1, 2, 4, 8, 16] {
        group.throughput(Throughput::Elements(num_bg_threads));
        group.bench_with_input(
            BenchmarkId::from_parameter(num_bg_threads),
            &num_bg_threads,
            |b, &num_bg_threads| {
                let engine = Engine::default();
                let module = module(&engine, 10).unwrap();

                b.iter_custom(|iters| {
                    let (started_sender, started_receiver) = std::sync::mpsc::channel();

                    // Spawn threads in the background doing infinite work.
                    let threads = (0..num_bg_threads)
                        .map(|_| {
                            let (done_sender, done_receiver) = std::sync::mpsc::channel();
                            let handle = std::thread::spawn({
                                let engine = engine.clone();
                                let module = module.clone();
                                let started_sender = started_sender.clone();
                                move || {
                                    let mut store = Store::new(&engine, ());
                                    let instance = Instance::new(&mut store, &module, &[]).unwrap();
                                    let f = instance
                                        .get_typed_func::<(), (), _>(&mut store, "")
                                        .unwrap();

                                    // Notify the parent thread that we are
                                    // doing background work now.
                                    started_sender.send(()).unwrap();

                                    // Keep doing background work until the
                                    // parent tells us to stop.
                                    loop {
                                        if let Ok(()) = done_receiver.try_recv() {
                                            return;
                                        }
                                        assert!(f.call(&mut store, ()).is_err());
                                    }
                                }
                            });
                            (handle, done_sender)
                        })
                        .collect::<Vec<_>>();

                    // Wait on all the threads to start up.
                    for _ in 0..num_bg_threads {
                        let _ = started_receiver.recv().unwrap();
                    }

                    let mut store = Store::new(&engine, ());
                    let instance = Instance::new(&mut store, &module, &[]).unwrap();
                    let f = instance
                        .get_typed_func::<(), (), _>(&mut store, "")
                        .unwrap();

                    // Measure how long it takes to do `iters` worth of traps
                    // while there is a bunch of background work going on.
                    let start = std::time::Instant::now();
                    for _ in 0..iters {
                        assert!(f.call(&mut store, ()).is_err());
                    }
                    let elapsed = start.elapsed();

                    // Clean up all of our background threads.
                    threads.into_iter().for_each(|(handle, done_sender)| {
                        done_sender.send(()).unwrap();
                        handle.join().unwrap();
                    });

                    elapsed
                });
            },
        );
    }

    group.finish();
}

fn bench_many_modules_registered_traps(c: &mut Criterion) {
    let mut group = c.benchmark_group("many-modules-registered-traps");

    for num_modules in vec![1, 8, 64, 512, 4096] {
        group.throughput(Throughput::Elements(num_modules));
        group.bench_with_input(
            BenchmarkId::from_parameter(num_modules),
            &num_modules,
            |b, &num_modules| {
                let engine = Engine::default();
                let modules = (0..num_modules)
                    .map(|_| module(&engine, 10).unwrap())
                    .collect::<Vec<_>>();

                b.iter_custom(|iters| {
                    let mut store = Store::new(&engine, ());
                    let instance = Instance::new(&mut store, modules.last().unwrap(), &[]).unwrap();
                    let f = instance
                        .get_typed_func::<(), (), _>(&mut store, "")
                        .unwrap();

                    let start = std::time::Instant::now();
                    for _ in 0..iters {
                        assert!(f.call(&mut store, ()).is_err());
                    }
                    start.elapsed()
                });
            },
        );
    }

    group.finish()
}

fn bench_many_stack_frames_traps(c: &mut Criterion) {
    let mut group = c.benchmark_group("many-stack-frames-traps");

    for num_stack_frames in vec![1, 8, 64, 512] {
        group.throughput(Throughput::Elements(num_stack_frames));
        group.bench_with_input(
            BenchmarkId::from_parameter(num_stack_frames),
            &num_stack_frames,
            |b, &num_stack_frames| {
                let engine = Engine::default();
                let module = module(&engine, num_stack_frames).unwrap();

                b.iter_custom(|iters| {
                    let mut store = Store::new(&engine, ());
                    let instance = Instance::new(&mut store, &module, &[]).unwrap();
                    let f = instance
                        .get_typed_func::<(), (), _>(&mut store, "")
                        .unwrap();

                    let start = std::time::Instant::now();
                    for _ in 0..iters {
                        assert!(f.call(&mut store, ()).is_err());
                    }
                    start.elapsed()
                });
            },
        );
    }

    group.finish()
}

fn bench_host_wasm_frames_traps(c: &mut Criterion) {
    let mut group = c.benchmark_group("host-wasm-frames-traps");

    let wat = r#"
        (module
            (import "" "" (func $host_func (param i32)))
            (func (export "f") (param i32)
                local.get 0
                i32.eqz
                if
                    unreachable
                end

                local.get 0
                i32.const 1
                i32.sub
                call $host_func
            )
        )
    "#;

    let engine = Engine::default();
    let module = Module::new(&engine, wat).unwrap();

    for num_stack_frames in vec![20, 40, 60, 80, 100, 120, 140, 160, 180, 200] {
        group.throughput(Throughput::Elements(num_stack_frames));
        group.bench_with_input(
            BenchmarkId::from_parameter(num_stack_frames),
            &num_stack_frames,
            |b, &num_stack_frames| {
                b.iter_custom(|iters| {
                    let mut store = Store::new(&engine, ());
                    let host_func = Func::new(
                        &mut store,
                        FuncType::new(vec![ValType::I32], vec![]),
                        |mut caller, args, _results| {
                            let f = caller.get_export("f").unwrap();
                            let f = f.into_func().unwrap();
                            f.call(caller, args, &mut [])?;
                            Ok(())
                        },
                    );
                    let instance = Instance::new(&mut store, &module, &[host_func.into()]).unwrap();
                    let f = instance
                        .get_typed_func::<(i32,), (), _>(&mut store, "f")
                        .unwrap();

                    let start = std::time::Instant::now();
                    for _ in 0..iters {
                        assert!(f.call(&mut store, (num_stack_frames as i32,)).is_err());
                    }
                    start.elapsed()
                });
            },
        );
    }

    group.finish()
}

fn module(engine: &Engine, num_funcs: u64) -> Result<Module> {
    let mut wat = String::new();
    wat.push_str("(module\n");
    for i in 0..num_funcs {
        let j = i + 1;
        wat.push_str(&format!("(func $f{i} call $f{j})\n"));
    }
    wat.push_str(&format!("(func $f{num_funcs} unreachable)\n"));
    wat.push_str(&format!("(export \"\" (func $f0))\n"));
    wat.push_str(")\n");

    Module::new(engine, &wat)
}
wasmtime: Add criterion micro benchmarks for traps (#4398) * wasmtime: Rename host->wasm trampolines As we introduce new types of trampolines, having clear names for our existing trampolines will be helpful. * Fix typo in docs for `VMCOMPONENT_MAGIC` * wasmtime: Add criterion micro benchmarks for traps 2 years ago			`use anyhow::Result;`
			`use criterion::*;`
			`use wasmtime::*;`

			`criterion_main!(benches);`
			`criterion_group!(benches, bench_traps);`

			`fn bench_traps(c: &mut Criterion) {`
			`bench_multi_threaded_traps(c);`
			`bench_many_modules_registered_traps(c);`
			`bench_many_stack_frames_traps(c);`
Don't re-capture backtraces when propagating traps through host frames (#5049) * Add a benchmark for traps with many Wasm<-->host calls on the stack * Add a test for expected Wasm stack traces with Wasm<--host calls on the stack when we trap * Don't re-capture backtraces when propagating traps through host frames This fixes some accidentally quadratic code where we would re-capture a Wasm stack trace (takes `O(n)` time) every time we propagated a trap through a host frame back to Wasm (can happen `O(n)` times). And `O(n) * O(n) = O(n^2)`, of course. Whoops. After this commit, it trapping with a call stack that is `n` frames deep of Wasm-to-host-to-Wasm calls just captures a single backtrace and is therefore just a proper `O(n)` time operation, as it is intended to be. Now we explicitly track whether we need to capture a Wasm backtrace or not when raising a trap. This unfortunately isn't as straightforward as one might hope, however, because of the split between `wasmtime::Trap` and `wasmtime_runtime::Trap`. We need to decide whether or not to capture a Wasm backtrace inside `wasmtime_runtime` but in order to determine whether to do that or not we need to reflect on the `anyhow::Error` and see if it is a `wasmtime::Trap` that already has a backtrace or not. This can't be done the straightforward way because it would introduce a cyclic dependency between the `wasmtime` and `wasmtime-runtime` crates. We can't merge those two `Trap` types-- at least not without effectively merging the whole `wasmtime` and `wasmtime-runtime` crates together, which would be a good idea in a perfect world but would be a ton of ocean boiling from where we currently are -- because `wasmtime::Trap` does symbolication of stack traces which relies on module registration information data that resides inside the `wasmtime` crate and therefore can't be moved into `wasmtime-runtime`. We resolve this problem by adding a boolean to `wasmtime_runtime::raise_user_trap` that controls whether we should capture a Wasm backtrace or not, and then determine whether we need a backtrace or not at each of that function's call sites, which are in `wasmtime` and therefore can do the reflection to determine whether the user trap already has a backtrace or not. Phew! Fixes #5037 * debug assert that we don't record unnecessary backtraces for traps * Add assertions around `needs_backtrace` Unfortunately we can't do debug_assert_eq!(needs_backtrace, trap.inner.backtrace.get().is_some()); because `needs_backtrace` doesn't consider whether Wasm backtraces have been disabled via config. * Consolidate `needs_backtrace` calculation followed by calling `raise_user_trap` into one place 2 years ago			`bench_host_wasm_frames_traps(c);`
wasmtime: Add criterion micro benchmarks for traps (#4398) * wasmtime: Rename host->wasm trampolines As we introduce new types of trampolines, having clear names for our existing trampolines will be helpful. * Fix typo in docs for `VMCOMPONENT_MAGIC` * wasmtime: Add criterion micro benchmarks for traps 2 years ago			`}`

			`fn bench_multi_threaded_traps(c: &mut Criterion) {`
			`let mut group = c.benchmark_group("multi-threaded-traps");`

			`for num_bg_threads in vec![0, 1, 2, 4, 8, 16] {`
			`group.throughput(Throughput::Elements(num_bg_threads));`
			`group.bench_with_input(`
			`BenchmarkId::from_parameter(num_bg_threads),`
			`&num_bg_threads,`
			`\|b, &num_bg_threads\| {`
			`let engine = Engine::default();`
			`let module = module(&engine, 10).unwrap();`

			`b.iter_custom(\|iters\| {`
			`let (started_sender, started_receiver) = std::sync::mpsc::channel();`

			`// Spawn threads in the background doing infinite work.`
			`let threads = (0..num_bg_threads)`
			`.map(\|_\| {`
			`let (done_sender, done_receiver) = std::sync::mpsc::channel();`
			`let handle = std::thread::spawn({`
			`let engine = engine.clone();`
			`let module = module.clone();`
			`let started_sender = started_sender.clone();`
			`move \|\| {`
			`let mut store = Store::new(&engine, ());`
			`let instance = Instance::new(&mut store, &module, &[]).unwrap();`
			`let f = instance`
			`.get_typed_func::<(), (), _>(&mut store, "")`
			`.unwrap();`

			`// Notify the parent thread that we are`
			`// doing background work now.`
			`started_sender.send(()).unwrap();`

			`// Keep doing background work until the`
			`// parent tells us to stop.`
			`loop {`
			`if let Ok(()) = done_receiver.try_recv() {`
			`return;`
			`}`
			`assert!(f.call(&mut store, ()).is_err());`
			`}`
			`}`
			`});`
			`(handle, done_sender)`
			`})`
			`.collect::<Vec<_>>();`

			`// Wait on all the threads to start up.`
			`for _ in 0..num_bg_threads {`
			`let _ = started_receiver.recv().unwrap();`
			`}`

			`let mut store = Store::new(&engine, ());`
			`let instance = Instance::new(&mut store, &module, &[]).unwrap();`
			`let f = instance`
			`.get_typed_func::<(), (), _>(&mut store, "")`
			`.unwrap();`

			// Measure how long it takes to do `iters` worth of traps
			`// while there is a bunch of background work going on.`
			`let start = std::time::Instant::now();`
			`for _ in 0..iters {`
			`assert!(f.call(&mut store, ()).is_err());`
			`}`
			`let elapsed = start.elapsed();`

			`// Clean up all of our background threads.`
			`threads.into_iter().for_each(\|(handle, done_sender)\| {`
			`done_sender.send(()).unwrap();`
			`handle.join().unwrap();`
			`});`

			`elapsed`
			`});`
			`},`
			`);`
			`}`

			`group.finish();`
			`}`

			`fn bench_many_modules_registered_traps(c: &mut Criterion) {`
			`let mut group = c.benchmark_group("many-modules-registered-traps");`

			`for num_modules in vec![1, 8, 64, 512, 4096] {`
			`group.throughput(Throughput::Elements(num_modules));`
			`group.bench_with_input(`
			`BenchmarkId::from_parameter(num_modules),`
			`&num_modules,`
			`\|b, &num_modules\| {`
			`let engine = Engine::default();`
			`let modules = (0..num_modules)`
			`.map(\|_\| module(&engine, 10).unwrap())`
			`.collect::<Vec<_>>();`

			`b.iter_custom(\|iters\| {`
			`let mut store = Store::new(&engine, ());`
			`let instance = Instance::new(&mut store, modules.last().unwrap(), &[]).unwrap();`
			`let f = instance`
			`.get_typed_func::<(), (), _>(&mut store, "")`
			`.unwrap();`

			`let start = std::time::Instant::now();`
			`for _ in 0..iters {`
			`assert!(f.call(&mut store, ()).is_err());`
			`}`
			`start.elapsed()`
			`});`
			`},`
			`);`
			`}`

			`group.finish()`
			`}`

			`fn bench_many_stack_frames_traps(c: &mut Criterion) {`
			`let mut group = c.benchmark_group("many-stack-frames-traps");`

			`for num_stack_frames in vec![1, 8, 64, 512] {`
			`group.throughput(Throughput::Elements(num_stack_frames));`
			`group.bench_with_input(`
			`BenchmarkId::from_parameter(num_stack_frames),`
			`&num_stack_frames,`
			`\|b, &num_stack_frames\| {`
			`let engine = Engine::default();`
			`let module = module(&engine, num_stack_frames).unwrap();`

			`b.iter_custom(\|iters\| {`
			`let mut store = Store::new(&engine, ());`
			`let instance = Instance::new(&mut store, &module, &[]).unwrap();`
			`let f = instance`
			`.get_typed_func::<(), (), _>(&mut store, "")`
			`.unwrap();`

			`let start = std::time::Instant::now();`
			`for _ in 0..iters {`
			`assert!(f.call(&mut store, ()).is_err());`
			`}`
			`start.elapsed()`
			`});`
			`},`
			`);`
			`}`

			`group.finish()`
			`}`

Don't re-capture backtraces when propagating traps through host frames (#5049) * Add a benchmark for traps with many Wasm<-->host calls on the stack * Add a test for expected Wasm stack traces with Wasm<--host calls on the stack when we trap * Don't re-capture backtraces when propagating traps through host frames This fixes some accidentally quadratic code where we would re-capture a Wasm stack trace (takes `O(n)` time) every time we propagated a trap through a host frame back to Wasm (can happen `O(n)` times). And `O(n) * O(n) = O(n^2)`, of course. Whoops. After this commit, it trapping with a call stack that is `n` frames deep of Wasm-to-host-to-Wasm calls just captures a single backtrace and is therefore just a proper `O(n)` time operation, as it is intended to be. Now we explicitly track whether we need to capture a Wasm backtrace or not when raising a trap. This unfortunately isn't as straightforward as one might hope, however, because of the split between `wasmtime::Trap` and `wasmtime_runtime::Trap`. We need to decide whether or not to capture a Wasm backtrace inside `wasmtime_runtime` but in order to determine whether to do that or not we need to reflect on the `anyhow::Error` and see if it is a `wasmtime::Trap` that already has a backtrace or not. This can't be done the straightforward way because it would introduce a cyclic dependency between the `wasmtime` and `wasmtime-runtime` crates. We can't merge those two `Trap` types-- at least not without effectively merging the whole `wasmtime` and `wasmtime-runtime` crates together, which would be a good idea in a perfect world but would be a ton of ocean boiling from where we currently are -- because `wasmtime::Trap` does symbolication of stack traces which relies on module registration information data that resides inside the `wasmtime` crate and therefore can't be moved into `wasmtime-runtime`. We resolve this problem by adding a boolean to `wasmtime_runtime::raise_user_trap` that controls whether we should capture a Wasm backtrace or not, and then determine whether we need a backtrace or not at each of that function's call sites, which are in `wasmtime` and therefore can do the reflection to determine whether the user trap already has a backtrace or not. Phew! Fixes #5037 * debug assert that we don't record unnecessary backtraces for traps * Add assertions around `needs_backtrace` Unfortunately we can't do debug_assert_eq!(needs_backtrace, trap.inner.backtrace.get().is_some()); because `needs_backtrace` doesn't consider whether Wasm backtraces have been disabled via config. * Consolidate `needs_backtrace` calculation followed by calling `raise_user_trap` into one place 2 years ago			`fn bench_host_wasm_frames_traps(c: &mut Criterion) {`
			`let mut group = c.benchmark_group("host-wasm-frames-traps");`

			`let wat = r#"`
			`(module`
			`(import "" "" (func $host_func (param i32)))`
			`(func (export "f") (param i32)`
			`local.get 0`
			`i32.eqz`
			`if`
			`unreachable`
			`end`

			`local.get 0`
			`i32.const 1`
			`i32.sub`
			`call $host_func`
			`)`
			`)`
			`"#;`

			`let engine = Engine::default();`
			`let module = Module::new(&engine, wat).unwrap();`

			`for num_stack_frames in vec![20, 40, 60, 80, 100, 120, 140, 160, 180, 200] {`
			`group.throughput(Throughput::Elements(num_stack_frames));`
			`group.bench_with_input(`
			`BenchmarkId::from_parameter(num_stack_frames),`
			`&num_stack_frames,`
			`\|b, &num_stack_frames\| {`
			`b.iter_custom(\|iters\| {`
			`let mut store = Store::new(&engine, ());`
			`let host_func = Func::new(`
			`&mut store,`
			`FuncType::new(vec![ValType::I32], vec![]),`
			`\|mut caller, args, _results\| {`
			`let f = caller.get_export("f").unwrap();`
			`let f = f.into_func().unwrap();`
			`f.call(caller, args, &mut [])?;`
			`Ok(())`
			`},`
			`);`
			`let instance = Instance::new(&mut store, &module, &[host_func.into()]).unwrap();`
			`let f = instance`
			`.get_typed_func::<(i32,), (), _>(&mut store, "f")`
			`.unwrap();`

			`let start = std::time::Instant::now();`
			`for _ in 0..iters {`
			`assert!(f.call(&mut store, (num_stack_frames as i32,)).is_err());`
			`}`
			`start.elapsed()`
			`});`
			`},`
			`);`
			`}`

			`group.finish()`
			`}`

wasmtime: Add criterion micro benchmarks for traps (#4398) * wasmtime: Rename host->wasm trampolines As we introduce new types of trampolines, having clear names for our existing trampolines will be helpful. * Fix typo in docs for `VMCOMPONENT_MAGIC` * wasmtime: Add criterion micro benchmarks for traps 2 years ago			`fn module(engine: &Engine, num_funcs: u64) -> Result<Module> {`
			`let mut wat = String::new();`
			`wat.push_str("(module\n");`
			`for i in 0..num_funcs {`
			`let j = i + 1;`
			`wat.push_str(&format!("(func $f{i} call $f{j})\n"));`
			`}`
			`wat.push_str(&format!("(func $f{num_funcs} unreachable)\n"));`
			`wat.push_str(&format!("(export \"\" (func $f0))\n"));`
			`wat.push_str(")\n");`

			`Module::new(engine, &wat)`
			`}`