From 10b5cc50c37be393c0b8bb20dc68fb8ea6b750e1 Mon Sep 17 00:00:00 2001 From: Alex Crichton Date: Mon, 2 Nov 2020 20:37:18 -0600 Subject: [PATCH] Further compress the in-memory representation of address maps (#2324) This commit reduces the size of `InstructionAddressMap` from 24 bytes to 8 bytes by dropping the `code_len` field and reducing `code_offset` to `u32` instead of `usize`. The intention is to primarily make the in-memory version take up less space, and the hunch is that the `code_len` is largely not necessary since most entries in this map are always adjacent to one another. The `code_len` field is now implied by the `code_offset` field of the next entry in the map. This isn't as big of an improvement to serialized module size as #2321 or #2322, primarily because of the switch to variable-length encoding. Despite this though it shaves about 10MB off the encoded size of the module from #2318 --- crates/cranelift/src/lib.rs | 95 ++++++++++++------- .../debug/src/transform/address_transform.rs | 40 +++++--- crates/debug/src/transform/expression.rs | 13 ++- crates/environ/src/address_map.rs | 22 +++-- crates/wasmtime/src/frame_info.rs | 17 +--- 5 files changed, 115 insertions(+), 72 deletions(-) diff --git a/crates/cranelift/src/lib.rs b/crates/cranelift/src/lib.rs index 8d8df744cf..5afb14eb94 100644 --- a/crates/cranelift/src/lib.rs +++ b/crates/cranelift/src/lib.rs @@ -229,19 +229,28 @@ impl StackMapSink { fn get_function_address_map<'data>( context: &Context, data: &FunctionBodyData<'data>, - body_len: usize, + body_len: u32, isa: &dyn isa::TargetIsa, ) -> FunctionAddressMap { + // Generate artificial srcloc for function start/end to identify boundary + // within module. + let data = data.body.get_binary_reader(); + let offset = data.original_position(); + let len = data.bytes_remaining(); + assert!((offset + len) <= u32::max_value() as usize); + let start_srcloc = ir::SourceLoc::new(offset as u32); + let end_srcloc = ir::SourceLoc::new((offset + len) as u32); + let instructions = if let Some(ref mcr) = &context.mach_compile_result { // New-style backend: we have a `MachCompileResult` that will give us `MachSrcLoc` mapping // tuples. - collect_address_maps(mcr.buffer.get_srclocs_sorted().into_iter().map( - |&MachSrcLoc { start, end, loc }| InstructionAddressMap { - srcloc: loc, - code_offset: start as usize, - code_len: (end - start) as usize, - }, - )) + collect_address_maps( + body_len, + mcr.buffer + .get_srclocs_sorted() + .into_iter() + .map(|&MachSrcLoc { start, end, loc }| (loc, start, (end - start))), + ) } else { // Old-style backend: we need to traverse the instruction/encoding info in the function. let func = &context.func; @@ -250,28 +259,16 @@ fn get_function_address_map<'data>( let encinfo = isa.encoding_info(); collect_address_maps( + body_len, blocks .into_iter() .flat_map(|block| func.inst_offsets(block, &encinfo)) - .map(|(offset, inst, size)| InstructionAddressMap { - srcloc: func.srclocs[inst], - code_offset: offset as usize, - code_len: size as usize, - }), + .map(|(offset, inst, size)| (func.srclocs[inst], offset, size)), ) }; - // Generate artificial srcloc for function start/end to identify boundary - // within module. Similar to FuncTranslator::cur_srcloc(): it will wrap around - // if byte code is larger than 4 GB. - let data = data.body.get_binary_reader(); - let offset = data.original_position(); - let len = data.bytes_remaining(); - let start_srcloc = ir::SourceLoc::new(offset as u32); - let end_srcloc = ir::SourceLoc::new((offset + len) as u32); - FunctionAddressMap { - instructions, + instructions: instructions.into(), start_srcloc, end_srcloc, body_offset: 0, @@ -283,23 +280,54 @@ fn get_function_address_map<'data>( // into a `FunctionAddressMap`. This will automatically coalesce adjacent // instructions which map to the same original source position. fn collect_address_maps( - iter: impl IntoIterator, + code_size: u32, + iter: impl IntoIterator, ) -> Vec { let mut iter = iter.into_iter(); - let mut cur = match iter.next() { + let (mut cur_loc, mut cur_offset, mut cur_len) = match iter.next() { Some(i) => i, None => return Vec::new(), }; let mut ret = Vec::new(); - for item in iter { - if cur.code_offset + cur.code_len == item.code_offset && item.srcloc == cur.srcloc { - cur.code_len += item.code_len; - } else { - ret.push(cur); - cur = item; + for (loc, offset, len) in iter { + // If this instruction is adjacent to the previous and has the same + // source location then we can "coalesce" it with the current + // instruction. + if cur_offset + cur_len == offset && loc == cur_loc { + cur_len += len; + continue; } + + // Push an entry for the previous source item. + ret.push(InstructionAddressMap { + srcloc: cur_loc, + code_offset: cur_offset, + }); + // And push a "dummy" entry if necessary to cover the span of ranges, + // if any, between the previous source offset and this one. + if cur_offset + cur_len != offset { + ret.push(InstructionAddressMap { + srcloc: ir::SourceLoc::default(), + code_offset: cur_offset + cur_len, + }); + } + // Update our current location to get extended later or pushed on at + // the end. + cur_loc = loc; + cur_offset = offset; + cur_len = len; } - ret.push(cur); + ret.push(InstructionAddressMap { + srcloc: cur_loc, + code_offset: cur_offset, + }); + if cur_offset + cur_len != code_size { + ret.push(InstructionAddressMap { + srcloc: ir::SourceLoc::default(), + code_offset: cur_offset + cur_len, + }); + } + return ret; } @@ -406,7 +434,8 @@ impl Compiler for Cranelift { CompileError::Codegen(pretty_error(&context.func, Some(isa), error)) })?; - let address_transform = get_function_address_map(&context, &input, code_buf.len(), isa); + let address_transform = + get_function_address_map(&context, &input, code_buf.len() as u32, isa); let ranges = if tunables.debug_info { let ranges = context.build_value_labels_ranges(isa).map_err(|error| { diff --git a/crates/debug/src/transform/address_transform.rs b/crates/debug/src/transform/address_transform.rs index 43bfc0b636..2ef2e456bb 100644 --- a/crates/debug/src/transform/address_transform.rs +++ b/crates/debug/src/transform/address_transform.rs @@ -102,7 +102,7 @@ fn build_function_lookup( let mut ranges_index = BTreeMap::new(); let mut current_range = Vec::new(); let mut last_gen_inst_empty = false; - for t in &ft.instructions { + for (i, t) in ft.instructions.iter().enumerate() { if t.srcloc.is_default() { continue; } @@ -111,8 +111,11 @@ fn build_function_lookup( assert_le!(fn_start, offset); assert_le!(offset, fn_end); - let inst_gen_start = t.code_offset; - let inst_gen_end = t.code_offset + t.code_len; + let inst_gen_start = t.code_offset as usize; + let inst_gen_end = match ft.instructions.get(i + 1) { + Some(i) => i.code_offset as usize, + None => ft.body_len as usize, + }; if last_wasm_pos > offset { // Start new range. @@ -149,7 +152,7 @@ fn build_function_lookup( } last_wasm_pos = offset; } - let last_gen_addr = ft.body_offset + ft.body_len; + let last_gen_addr = ft.body_offset + ft.body_len as usize; ranges_index.insert(range_wasm_start, ranges.len()); ranges.push(Range { wasm_start: range_wasm_start, @@ -193,13 +196,13 @@ fn build_function_addr_map( for (_, f) in funcs { let ft = &f.address_map; let mut fn_map = Vec::new(); - for t in &ft.instructions { + for t in ft.instructions.iter() { if t.srcloc.is_default() { continue; } let offset = get_wasm_code_offset(t.srcloc, code_section_offset); fn_map.push(AddressMap { - generated: t.code_offset, + generated: t.code_offset as usize, wasm: offset, }); } @@ -213,7 +216,7 @@ fn build_function_addr_map( map.push(FunctionMap { offset: ft.body_offset, - len: ft.body_len, + len: ft.body_len as usize, wasm_start: get_wasm_code_offset(ft.start_srcloc, code_section_offset), wasm_end: get_wasm_code_offset(ft.end_srcloc, code_section_offset), addresses: fn_map.into_boxed_slice(), @@ -605,6 +608,7 @@ mod tests { use super::{build_function_lookup, get_wasm_code_offset, AddressTransform}; use gimli::write::Address; use std::iter::FromIterator; + use std::mem; use wasmtime_environ::entity::PrimaryMap; use wasmtime_environ::ir::SourceLoc; use wasmtime_environ::{CompiledFunction, WasmFileInfo}; @@ -626,14 +630,21 @@ mod tests { InstructionAddressMap { srcloc: SourceLoc::new(wasm_offset + 2), code_offset: 5, - code_len: 3, + }, + InstructionAddressMap { + srcloc: SourceLoc::default(), + code_offset: 8, }, InstructionAddressMap { srcloc: SourceLoc::new(wasm_offset + 7), code_offset: 15, - code_len: 8, }, - ], + InstructionAddressMap { + srcloc: SourceLoc::default(), + code_offset: 23, + }, + ] + .into(), start_srcloc: SourceLoc::new(wasm_offset), end_srcloc: SourceLoc::new(wasm_offset + 10), body_offset: 0, @@ -678,11 +689,16 @@ mod tests { fn test_build_function_lookup_two_ranges() { let mut input = create_simple_func(11); // append instruction with same srcloc as input.instructions[0] - input.instructions.push(InstructionAddressMap { + let mut list = Vec::from(mem::take(&mut input.instructions)); + list.push(InstructionAddressMap { srcloc: SourceLoc::new(11 + 2), code_offset: 23, - code_len: 3, }); + list.push(InstructionAddressMap { + srcloc: SourceLoc::default(), + code_offset: 26, + }); + input.instructions = list.into(); let (start, end, lookup) = build_function_lookup(&input, 1); assert_eq!(10, start); assert_eq!(20, end); diff --git a/crates/debug/src/transform/expression.rs b/crates/debug/src/transform/expression.rs index 3d9985b2fa..0a286aa616 100644 --- a/crates/debug/src/transform/expression.rs +++ b/crates/debug/src/transform/expression.rs @@ -1145,14 +1145,21 @@ mod tests { InstructionAddressMap { srcloc: SourceLoc::new(code_section_offset + 12), code_offset: 5, - code_len: 3, + }, + InstructionAddressMap { + srcloc: SourceLoc::default(), + code_offset: 8, }, InstructionAddressMap { srcloc: SourceLoc::new(code_section_offset + 17), code_offset: 15, - code_len: 8, }, - ], + InstructionAddressMap { + srcloc: SourceLoc::default(), + code_offset: 23, + }, + ] + .into(), start_srcloc: SourceLoc::new(code_section_offset + 10), end_srcloc: SourceLoc::new(code_section_offset + 20), body_offset: 0, diff --git a/crates/environ/src/address_map.rs b/crates/environ/src/address_map.rs index 4a2f80c90f..972ab0468a 100644 --- a/crates/environ/src/address_map.rs +++ b/crates/environ/src/address_map.rs @@ -7,22 +7,24 @@ use serde::{Deserialize, Serialize}; /// Single source location to generated address mapping. #[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq)] pub struct InstructionAddressMap { - /// Original source location. + /// Where in the source this instruction comes from. pub srcloc: ir::SourceLoc, - /// Generated instructions offset. - pub code_offset: usize, - - /// Generated instructions length. - pub code_len: usize, + /// Offset from the start of the function's compiled code to where this + /// instruction is located, or the region where it starts. + pub code_offset: u32, } /// Function and its instructions addresses mappings. #[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq, Default)] pub struct FunctionAddressMap { - /// Instructions maps. - /// The array is sorted by the InstructionAddressMap::code_offset field. - pub instructions: Vec, + /// An array of data for the instructions in this function, indicating where + /// each instruction maps back to in the original function. + /// + /// This array is sorted least-to-greatest by the `code_offset` field. + /// Additionally the span of each `InstructionAddressMap` is implicitly the + /// gap between it and the next item in the array. + pub instructions: Box<[InstructionAddressMap]>, /// Function start source location (normally declaration). pub start_srcloc: ir::SourceLoc, @@ -34,7 +36,7 @@ pub struct FunctionAddressMap { pub body_offset: usize, /// Generated function body length. - pub body_len: usize, + pub body_len: u32, } /// Memory definition offset in the VMContext structure. diff --git a/crates/wasmtime/src/frame_info.rs b/crates/wasmtime/src/frame_info.rs index 1cc81a0653..28e7b88d7e 100644 --- a/crates/wasmtime/src/frame_info.rs +++ b/crates/wasmtime/src/frame_info.rs @@ -64,7 +64,7 @@ impl GlobalFrameInfo { // Use our relative position from the start of the function to find the // machine instruction that corresponds to `pc`, which then allows us to // map that to a wasm original source location. - let rel_pos = pc - func.start; + let rel_pos = (pc - func.start) as u32; let pos = match func .instr_map .instructions @@ -77,19 +77,8 @@ impl GlobalFrameInfo { // instructions cover `pc`. Err(0) => None, - // This would be at the `nth` slot, so check `n-1` to see if we're - // part of that instruction. This happens due to the minus one when - // this function is called form trap symbolication, where we don't - // always get called with a `pc` that's an exact instruction - // boundary. - Err(n) => { - let instr = &func.instr_map.instructions[n - 1]; - if instr.code_offset <= rel_pos && rel_pos < instr.code_offset + instr.code_len { - Some(n - 1) - } else { - None - } - } + // This would be at the `nth` slot, so we're at the `n-1`th slot. + Err(n) => Some(n - 1), }; // In debug mode for now assert that we found a mapping for `pc` within