cranelift/benches/thread_eager_init.rs

use criterion::{criterion_group, criterion_main, Criterion};
use std::thread;
use std::time::{Duration, Instant};
use wasmtime::*;

fn measure_execution_time(c: &mut Criterion) {
    // Baseline performance: a single measurment covers both initializing
    // thread local resources and executing the first call.
    //
    // The other two bench functions should sum to this duration.
    c.bench_function("lazy initialization at call", move |b| {
        let (engine, module) = test_setup();
        b.iter_custom(move |iters| {
            (0..iters)
                .into_iter()
                .map(|_| lazy_thread_instantiate(engine.clone(), module.clone()))
                .sum()
        })
    });

    // Using Engine::tls_eager_initialize: measure how long eager
    // initialization takes on a new thread.
    c.bench_function("eager initialization", move |b| {
        let (engine, module) = test_setup();
        b.iter_custom(move |iters| {
            (0..iters)
                .into_iter()
                .map(|_| {
                    let (init, _call) = eager_thread_instantiate(engine.clone(), module.clone());
                    init
                })
                .sum()
        })
    });

    // Measure how long the first call takes on a thread after it has been
    // eagerly initialized.
    c.bench_function("call after eager initialization", move |b| {
        let (engine, module) = test_setup();
        b.iter_custom(move |iters| {
            (0..iters)
                .into_iter()
                .map(|_| {
                    let (_init, call) = eager_thread_instantiate(engine.clone(), module.clone());
                    call
                })
                .sum()
        })
    });
}

/// Creating a store and measuring the time to perform a call is the same behavior
/// in both setups.
fn duration_of_call(engine: &Engine, module: &Module) -> Duration {
    let mut store = Store::new(engine, ());
    let inst = Instance::new(&mut store, module, &[]).expect("instantiate");
    let f = inst.get_func(&mut store, "f").expect("get f");
    let f = f.typed::<(), (), _>(&store).expect("type f");

    let call = Instant::now();
    f.call(&mut store, ()).expect("call f");
    call.elapsed()
}

/// When wasmtime first runs a function on a thread, it needs to initialize
/// some thread-local resources and install signal handlers. This benchmark
/// spawns a new thread, and returns the duration it took to execute the first
/// function call made on that thread.
fn lazy_thread_instantiate(engine: Engine, module: Module) -> Duration {
    thread::spawn(move || duration_of_call(&engine, &module))
        .join()
        .expect("thread joins")
}
/// This benchmark spawns a new thread, and records the duration to eagerly
/// initializes the thread local resources. It then creates a store and
/// instance, and records the duration it took to execute the first function
/// call.
fn eager_thread_instantiate(engine: Engine, module: Module) -> (Duration, Duration) {
    thread::spawn(move || {
        let init_start = Instant::now();
        Engine::tls_eager_initialize();
        let init_duration = init_start.elapsed();

        (init_duration, duration_of_call(&engine, &module))
    })
    .join()
    .expect("thread joins")
}

fn test_setup() -> (Engine, Module) {
    // We only expect to create one Instance at a time, with a single memory.
    let pool_count = 10;

    let mut config = Config::new();
    config.allocation_strategy(InstanceAllocationStrategy::Pooling {
        strategy: PoolingAllocationStrategy::NextAvailable,
        instance_limits: InstanceLimits {
            count: pool_count,
            memory_pages: 1,
            ..Default::default()
        },
    });
    let engine = Engine::new(&config).unwrap();

    // The module has a memory (shouldn't matter) and a single function which is a no-op.
    let module = Module::new(&engine, r#"(module (memory 1) (func (export "f")))"#).unwrap();
    (engine, module)
}

criterion_group!(benches, measure_execution_time);
criterion_main!(benches);
switch eager vs lazy instantiation to a criterion bench 3 years ago			`use criterion::{criterion_group, criterion_main, Criterion};`
add test for eager thread initialization 3 years ago			`use std::thread;`
			`use std::time::{Duration, Instant};`
			`use wasmtime::*;`

switch eager vs lazy instantiation to a criterion bench 3 years ago			`fn measure_execution_time(c: &mut Criterion) {`
add docs 3 years ago			`// Baseline performance: a single measurment covers both initializing`
			`// thread local resources and executing the first call.`
			`//`
			`// The other two bench functions should sum to this duration.`
switch eager vs lazy instantiation to a criterion bench 3 years ago			`c.bench_function("lazy initialization at call", move \|b\| {`
add docs 3 years ago			`let (engine, module) = test_setup();`
switch eager vs lazy instantiation to a criterion bench 3 years ago			`b.iter_custom(move \|iters\| {`
			`(0..iters)`
			`.into_iter()`
			`.map(\|_\| lazy_thread_instantiate(engine.clone(), module.clone()))`
			`.sum()`
			`})`
			`});`

add docs 3 years ago			`// Using Engine::tls_eager_initialize: measure how long eager`
			`// initialization takes on a new thread.`
switch eager vs lazy instantiation to a criterion bench 3 years ago			`c.bench_function("eager initialization", move \|b\| {`
add docs 3 years ago			`let (engine, module) = test_setup();`
switch eager vs lazy instantiation to a criterion bench 3 years ago			`b.iter_custom(move \|iters\| {`
			`(0..iters)`
			`.into_iter()`
			`.map(\|_\| {`
			`let (init, _call) = eager_thread_instantiate(engine.clone(), module.clone());`
			`init`
			`})`
			`.sum()`
			`})`
			`});`
add docs 3 years ago
			`// Measure how long the first call takes on a thread after it has been`
			`// eagerly initialized.`
switch eager vs lazy instantiation to a criterion bench 3 years ago			`c.bench_function("call after eager initialization", move \|b\| {`
add docs 3 years ago			`let (engine, module) = test_setup();`
switch eager vs lazy instantiation to a criterion bench 3 years ago			`b.iter_custom(move \|iters\| {`
			`(0..iters)`
			`.into_iter()`
			`.map(\|_\| {`
			`let (_init, call) = eager_thread_instantiate(engine.clone(), module.clone());`
			`call`
			`})`
			`.sum()`
			`})`
			`});`
			`}`

add docs 3 years ago			`/// Creating a store and measuring the time to perform a call is the same behavior`
			`/// in both setups.`
			`fn duration_of_call(engine: &Engine, module: &Module) -> Duration {`
			`let mut store = Store::new(engine, ());`
			`let inst = Instance::new(&mut store, module, &[]).expect("instantiate");`
			`let f = inst.get_func(&mut store, "f").expect("get f");`
			`let f = f.typed::<(), (), _>(&store).expect("type f");`

			`let call = Instant::now();`
			`f.call(&mut store, ()).expect("call f");`
			`call.elapsed()`
			`}`

			`/// When wasmtime first runs a function on a thread, it needs to initialize`
			`/// some thread-local resources and install signal handlers. This benchmark`
			`/// spawns a new thread, and returns the duration it took to execute the first`
			`/// function call made on that thread.`
			`fn lazy_thread_instantiate(engine: Engine, module: Module) -> Duration {`
			`thread::spawn(move \|\| duration_of_call(&engine, &module))`
			`.join()`
			`.expect("thread joins")`
			`}`
			`/// This benchmark spawns a new thread, and records the duration to eagerly`
			`/// initializes the thread local resources. It then creates a store and`
			`/// instance, and records the duration it took to execute the first function`
			`/// call.`
			`fn eager_thread_instantiate(engine: Engine, module: Module) -> (Duration, Duration) {`
			`thread::spawn(move \|\| {`
			`let init_start = Instant::now();`
Use a `StoreOpaque` during backtraces for metadata (#4325) Previous to this commit Wasmtime would use the `GlobalModuleRegistry` when learning information about a trap such as its trap code, the symbols for each frame, etc. This has a downside though of holding a global read-write lock for the duration of this operation which hinders registration of new modules in parallel. In addition there was a fair amount of internal duplication between this "global module registry" and the store-local module registry. Finally relying on global state for information like this gets a bit more brittle over time as it seems best to scope global queries to precisely what's necessary rather than holding extra information. With the refactoring in wasm backtraces done in #4183 it's now possible to always have a `StoreOpaque` reference when a backtrace is collected for symbolication and otherwise Trap-identification purposes. This commit adds a `StoreOpaque` parameter to the `Trap::from_runtime` constructor and then plumbs that everywhere. Note that while doing this I changed the internal `traphandlers::lazy_per_thread_init` function to no longer return a `Result` and instead just `panic!` on Unix if memory couldn't be allocated for a stack. This removed quite a lot of error-handling code for a case that's expected to quite rarely happen. If necessary in the future we can add a fallible initialization point but this feels like a better default balance for the code here. With a `StoreOpaque` in use when a trap is being symbolicated that means we have a `ModuleRegistry` which can be used for queries and such. This meant that the `GlobalModuleRegistry` state could largely be dismantled and moved to per-`Store` state (within the `ModuleRegistry`, mostly just moving methods around). The final state is that the global rwlock is not exclusively scoped around insertions/deletions/`is_wasm_trap_pc` which is just a lookup and atomic add. Otherwise symbolication for a backtrace exclusively uses store-local state now (as intended). The original motivation for this commit was that frame information lookup and pieces were looking to get somewhat complicated with the addition of components which are a new vector of traps coming out of Cranelift-generated code. My hope is that by having a `Store` around for more operations it's easier to plumb all this through. 2 years ago			`Engine::tls_eager_initialize();`
add docs 3 years ago			`let init_duration = init_start.elapsed();`

			`(init_duration, duration_of_call(&engine, &module))`
			`})`
			`.join()`
			`.expect("thread joins")`
			`}`

			`fn test_setup() -> (Engine, Module) {`
			`// We only expect to create one Instance at a time, with a single memory.`
			`let pool_count = 10;`
add test for eager thread initialization 3 years ago
			`let mut config = Config::new();`
			`config.allocation_strategy(InstanceAllocationStrategy::Pooling {`
			`strategy: PoolingAllocationStrategy::NextAvailable,`
Remove the `ModuleLimits` pooling configuration structure (#3837) * Remove the `ModuleLimits` pooling configuration structure This commit is an attempt to improve the usability of the pooling allocator by removing the need to configure a `ModuleLimits` structure. Internally this structure has limits on all forms of wasm constructs but this largely bottoms out in the size of an allocation for an instance in the instance pooling allocator. Maintaining this list of limits can be cumbersome as modules may get tweaked over time and there's otherwise no real reason to limit the number of globals in a module since the main goal is to limit the memory consumption of a `VMContext` which can be done with a memory allocation limit rather than fine-tuned control over each maximum and minimum. The new approach taken in this commit is to remove `ModuleLimits`. Some fields, such as `tables`, `table_elements` , `memories`, and `memory_pages` are moved to `InstanceLimits` since they're still enforced at runtime. A new field `size` is added to `InstanceLimits` which indicates, in bytes, the maximum size of the `VMContext` allocation. If the size of a `VMContext` for a module exceeds this value then instantiation will fail. This involved adding a few more checks to `{Table, Memory}::new_static` to ensure that the minimum size is able to fit in the allocation, since previously modules were validated at compile time of the module that everything fit and that validation no longer happens (it happens at runtime). A consequence of this commit is that Wasmtime will have no built-in way to reject modules at compile time if they'll fail to be instantiated within a particular pooling allocator configuration. Instead a module must attempt instantiation see if a failure happens. * Fix benchmark compiles * Fix some doc links * Fix a panic by ensuring modules have limited tables/memories * Review comments * Add back validation at `Module` time instantiation is possible This allows for getting an early signal at compile time that a module will never be instantiable in an engine with matching settings. * Provide a better error message when sizes are exceeded Improve the error message when an instance size exceeds the maximum by providing a breakdown of where the bytes are all going and why the large size is being requested. * Try to fix test in qemu * Flag new test as 64-bit only Sizes are all specific to 64-bit right now 3 years ago			`instance_limits: InstanceLimits {`
			`count: pool_count,`
add test for eager thread initialization 3 years ago			`memory_pages: 1,`
			`..Default::default()`
			`},`
			`});`
switch eager vs lazy instantiation to a criterion bench 3 years ago			`let engine = Engine::new(&config).unwrap();`
add docs 3 years ago
			`// The module has a memory (shouldn't matter) and a single function which is a no-op.`
switch eager vs lazy instantiation to a criterion bench 3 years ago			`let module = Module::new(&engine, r#"(module (memory 1) (func (export "f")))"#).unwrap();`
			`(engine, module)`
add test for eager thread initialization 3 years ago			`}`

switch eager vs lazy instantiation to a criterion bench 3 years ago			`criterion_group!(benches, measure_execution_time);`
			`criterion_main!(benches);`