Browse Source

Cranelift: Use bump allocation in `remove_constant_phis` pass (#4710)

* Cranelift: Use bump allocation in `remove_constant_phis` pass

This makes compilation 2-6% faster for Sightglass's bz2 benchmark:

```
compilation :: cycles :: benchmarks/bz2/benchmark.wasm

  Δ = 7290648.36 ± 4245152.07 (confidence = 99%)

  bump.so is 1.02x to 1.06x faster than main.so!

  [166388177 183238542.98 214732518] bump.so
  [172836648 190529191.34 217514271] main.so

compilation :: cycles :: benchmarks/pulldown-cmark/benchmark.wasm

  No difference in performance.

  [182220055 225793551.12 277857575] bump.so
  [193212613 227784078.61 277175335] main.so

compilation :: cycles :: benchmarks/spidermonkey/benchmark.wasm

  No difference in performance.

  [3848442474 4295214144.37 4665127241] bump.so
  [3969505457 4262415290.10 4563869974] main.so
```

* Add audit for `bumpalo`

* Add an audit of `arrayvec` version 0.7.2

* Remove unnecessary `collect` into `Vec`

I wasn't able to measure any perf difference here, but its nice to do anyways.

* Use a `SecondaryMap` for keeping track of summaries
pull/4716/head
Nick Fitzgerald 2 years ago
committed by GitHub
parent
commit
ae7688059d
No known key found for this signature in database GPG Key ID: 4AEE18F83AFDEB23
  1. 8
      Cargo.lock
  2. 2
      cranelift/codegen/Cargo.toml
  3. 156
      cranelift/codegen/src/remove_constant_phis.rs
  4. 15
      supply-chain/audits.toml

8
Cargo.lock

@ -102,6 +102,12 @@ dependencies = [
"derive_arbitrary",
]
[[package]]
name = "arrayvec"
version = "0.7.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8da52d66c7071e2e3fa2a1e5c6d088fec47b593032b254f5e980de8ea54454d6"
[[package]]
name = "async-trait"
version = "0.1.53"
@ -529,7 +535,9 @@ dependencies = [
name = "cranelift-codegen"
version = "0.88.0"
dependencies = [
"arrayvec",
"bincode",
"bumpalo",
"cranelift-bforest",
"cranelift-codegen-meta",
"cranelift-codegen-shared",

2
cranelift/codegen/Cargo.toml

@ -13,6 +13,8 @@ build = "build.rs"
edition = "2021"
[dependencies]
arrayvec = "0.7"
bumpalo = "3"
cranelift-codegen-shared = { path = "./shared", version = "0.88.0" }
cranelift-entity = { path = "../entity", version = "0.88.0" }
cranelift-bforest = { path = "../bforest", version = "0.88.0" }

156
cranelift/codegen/src/remove_constant_phis.rs

@ -4,13 +4,15 @@ use crate::dominator_tree::DominatorTree;
use crate::entity::EntityList;
use crate::fx::FxHashMap;
use crate::fx::FxHashSet;
use crate::ir;
use crate::ir::instructions::BranchInfo;
use crate::ir::Function;
use crate::ir::{Block, Inst, Value};
use crate::timing;
use smallvec::{smallvec, SmallVec};
use std::vec::Vec;
use arrayvec::ArrayVec;
use bumpalo::Bump;
use cranelift_entity::SecondaryMap;
use smallvec::SmallVec;
// A note on notation. For the sake of clarity, this file uses the phrase
// "formal parameters" to mean the `Value`s listed in the block head, and
@ -107,27 +109,73 @@ impl AbstractValue {
}
}
#[derive(Clone, Copy, Debug)]
struct OutEdge<'a> {
/// An instruction that transfers control.
inst: Inst,
/// The block that control is transferred to.
block: Block,
/// The arguments to that block.
///
/// These values can be from both groups A and B.
args: &'a [Value],
}
impl<'a> OutEdge<'a> {
/// Construct a new `OutEdge` for the given instruction.
///
/// Returns `None` if this is an edge without any block arguments, which
/// means we can ignore it for this analysis's purposes.
#[inline]
fn new(bump: &'a Bump, dfg: &ir::DataFlowGraph, inst: Inst, block: Block) -> Option<Self> {
let inst_var_args = dfg.inst_variable_args(inst);
// Skip edges without params.
if inst_var_args.is_empty() {
return None;
}
Some(OutEdge {
inst,
block,
args: bump.alloc_slice_fill_iter(
inst_var_args
.iter()
.map(|value| dfg.resolve_aliases(*value)),
),
})
}
}
/// For some block, a useful bundle of info. The `Block` itself is not stored
/// here since it will be the key in the associated `FxHashMap` -- see
/// `summaries` below. For the `SmallVec` tuning params: most blocks have
/// few parameters, hence `4`. And almost all blocks have either one or two
/// successors, hence `2`.
#[derive(Debug)]
struct BlockSummary {
/// Formal parameters for this `Block`
formals: SmallVec<[Value; 4] /*Group A*/>,
/// For each `Inst` in this block that transfers to another block: the
/// `Inst` itself, the destination `Block`, and the actual parameters
/// passed. We don't bother to include transfers that pass zero parameters
#[derive(Clone, Debug, Default)]
struct BlockSummary<'a> {
/// Formal parameters for this `Block`.
///
/// These values are from group A.
formals: &'a [Value],
/// Each outgoing edge from this block.
///
/// We don't bother to include transfers that pass zero parameters
/// since that makes more work for the solver for no purpose.
dests: SmallVec<[(Inst, Block, SmallVec<[Value; 4] /*both Groups A and B*/>); 2]>,
///
/// Note that, because blocks used with `br_table`s cannot have block
/// arguments, there are at most two outgoing edges from these blocks.
dests: ArrayVec<OutEdge<'a>, 2>,
}
impl BlockSummary {
fn new(formals: SmallVec<[Value; 4]>) -> Self {
impl<'a> BlockSummary<'a> {
/// Construct a new `BlockSummary`, using `values` as its backing storage.
#[inline]
fn new(bump: &'a Bump, formals: &[Value]) -> Self {
Self {
formals,
dests: smallvec![],
formals: bump.alloc_slice_copy(formals),
dests: Default::default(),
}
}
}
@ -171,21 +219,17 @@ pub fn do_remove_constant_phis(func: &mut Function, domtree: &mut DominatorTree)
let _tt = timing::remove_constant_phis();
debug_assert!(domtree.is_valid());
// Get the blocks, in reverse postorder
let blocks_reverse_postorder = domtree
.cfg_postorder()
.into_iter()
.rev()
.collect::<Vec<_>>();
// Phase 1 of 3: for each block, make a summary containing all relevant
// info. The solver will iterate over the summaries, rather than having
// to inspect each instruction in each block.
let mut summaries = FxHashMap::<Block, BlockSummary>::default();
let bump =
Bump::with_capacity(domtree.cfg_postorder().len() * 4 * std::mem::size_of::<Value>());
let mut summaries =
SecondaryMap::<Block, BlockSummary>::with_capacity(domtree.cfg_postorder().len());
for &&b in &blocks_reverse_postorder {
for b in domtree.cfg_postorder().iter().rev().copied() {
let formals = func.dfg.block_params(b);
let mut summary = BlockSummary::new(SmallVec::from(formals));
let mut summary = BlockSummary::new(&bump, formals);
for inst in func.layout.block_insts(b) {
let idetails = &func.dfg[inst];
@ -194,15 +238,8 @@ pub fn do_remove_constant_phis(func: &mut Function, domtree: &mut DominatorTree)
// `SingleDest` here.
if let BranchInfo::SingleDest(dest, _) = idetails.analyze_branch(&func.dfg.value_lists)
{
let inst_var_args = func.dfg.inst_variable_args(inst);
// Skip branches/jumps that carry no params.
if inst_var_args.len() > 0 {
let mut actuals = SmallVec::<[Value; 4]>::new();
for arg in inst_var_args {
let arg = func.dfg.resolve_aliases(*arg);
actuals.push(arg);
}
summary.dests.push((inst, dest, actuals));
if let Some(edge) = OutEdge::new(&bump, &func.dfg, inst, dest) {
summary.dests.push(edge);
}
}
}
@ -211,7 +248,7 @@ pub fn do_remove_constant_phis(func: &mut Function, domtree: &mut DominatorTree)
// in the summary, *unless* they have neither formals nor any
// param-carrying branches/jumps.
if formals.len() > 0 || summary.dests.len() > 0 {
summaries.insert(b, summary);
summaries[b] = summary;
}
}
@ -227,7 +264,7 @@ pub fn do_remove_constant_phis(func: &mut Function, domtree: &mut DominatorTree)
// Set up initial solver state
let mut state = SolverState::new();
for &&b in &blocks_reverse_postorder {
for b in domtree.cfg_postorder().iter().rev().copied() {
// For each block, get the formals
if b == entry_block {
continue;
@ -246,27 +283,18 @@ pub fn do_remove_constant_phis(func: &mut Function, domtree: &mut DominatorTree)
iter_no += 1;
let mut changed = false;
for &src in &blocks_reverse_postorder {
let mb_src_summary = summaries.get(src);
// The src block might have no summary. This means it has no
// branches/jumps that carry parameters *and* it doesn't take any
// parameters itself. Phase 1 ensures this. So we can ignore it.
if mb_src_summary.is_none() {
continue;
}
let src_summary = mb_src_summary.unwrap();
for (_inst, dst, src_actuals) in &src_summary.dests {
assert!(*dst != entry_block);
for src in domtree.cfg_postorder().iter().rev().copied() {
let src_summary = &summaries[src];
for edge in &src_summary.dests {
assert!(edge.block != entry_block);
// By contrast, the dst block must have a summary. Phase 1
// will have only included an entry in `src_summary.dests` if
// that branch/jump carried at least one parameter. So the
// dst block does take parameters, so it must have a summary.
let dst_summary = summaries
.get(dst)
.expect("remove_constant_phis: dst block has no summary");
let dst_summary = &summaries[edge.block];
let dst_formals = &dst_summary.formals;
assert_eq!(src_actuals.len(), dst_formals.len());
for (formal, actual) in dst_formals.iter().zip(src_actuals.iter()) {
assert_eq!(edge.args.len(), dst_formals.len());
for (formal, actual) in dst_formals.iter().zip(edge.args) {
// Find the abstract value for `actual`. If it is a block
// formal parameter then the most recent abstract value is
// to be found in the solver state. If not, then it's a
@ -305,14 +333,14 @@ pub fn do_remove_constant_phis(func: &mut Function, domtree: &mut DominatorTree)
// Make up a set of blocks that need editing.
let mut need_editing = FxHashSet::<Block>::default();
for (block, summary) in &summaries {
if *block == entry_block {
for (block, summary) in summaries.iter() {
if block == entry_block {
continue;
}
for formal in &summary.formals {
for formal in summary.formals {
let formal_absval = state.get(*formal);
if formal_absval.is_one() {
need_editing.insert(*block);
need_editing.insert(block);
break;
}
}
@ -344,19 +372,19 @@ pub fn do_remove_constant_phis(func: &mut Function, domtree: &mut DominatorTree)
// Secondly, visit all branch insns. If the destination has had its
// formals changed, change the actuals accordingly. Don't scan all insns,
// rather just visit those as listed in the summaries we prepared earlier.
for (_src_block, summary) in &summaries {
for (inst, dst_block, _src_actuals) in &summary.dests {
if !need_editing.contains(dst_block) {
for summary in summaries.values() {
for edge in &summary.dests {
if !need_editing.contains(&edge.block) {
continue;
}
let old_actuals = func.dfg[*inst].take_value_list().unwrap();
let old_actuals = func.dfg[edge.inst].take_value_list().unwrap();
let num_old_actuals = old_actuals.len(&func.dfg.value_lists);
let num_fixed_actuals = func.dfg[*inst]
let num_fixed_actuals = func.dfg[edge.inst]
.opcode()
.constraints()
.num_fixed_value_arguments();
let dst_summary = summaries.get(&dst_block).unwrap();
let dst_summary = &summaries[edge.block];
// Check that the numbers of arguments make sense.
assert!(num_fixed_actuals <= num_old_actuals);
@ -384,7 +412,7 @@ pub fn do_remove_constant_phis(func: &mut Function, domtree: &mut DominatorTree)
new_actuals.push(actual_i, &mut func.dfg.value_lists);
}
}
func.dfg[*inst].put_value_list(new_actuals);
func.dfg[edge.inst].put_value_list(new_actuals);
}
}

15
supply-chain/audits.toml

@ -1,12 +1,27 @@
# cargo-vet audits file
[[audits.arrayvec]]
who = "Nick Fitzgerald <fitzgen@gmail.com>"
criteria = "safe-to-deploy"
version = "0.7.2"
notes = """
Well documented invariants, good assertions for those invariants in unsafe code,
and tested with MIRI to boot. LGTM.
"""
[[audits.backtrace]]
who = "Alex Crichton <alex@alexcrichton.com>"
criteria = "safe-to-deploy"
version = "0.3.66"
notes = "I am the author of this crate."
[[audits.bumpalo]]
who = "Nick Fitzgerald <fitzgen@gmail.com>"
criteria = "safe-to-deploy"
version = "3.9.1"
notes = "I am the author of this crate."
[[audits.cc]]
who = "Alex Crichton <alex@alexcrichton.com>"
criteria = "safe-to-deploy"

Loading…
Cancel
Save