Browse Source

cranelift: Enable "chaos mode" in egraph pass (#7968)

First of all, thread a "chaos mode" control-plane into Context::optimize
and from there into EgraphPass, OptimizeCtx, and Elaborator.

In this commit we use the control-plane to change the following
behaviors in ways which shouldn't cause incorrect results:

- Dominator-tree block traversal order for both the rule application and
  elaboration passes
- Order of evaluating optimization alternatives from `simplify`
- Choose worst values instead of best in each eclass

Co-authored-by: L. Pereira <lpereira@fastly.com>
pull/7778/merge
Jamey Sharp 9 months ago
committed by GitHub
parent
commit
caa555f8f5
No known key found for this signature in database GPG Key ID: B5690EEEBB952194
  1. 17
      cranelift/codegen/src/context.rs
  2. 74
      cranelift/codegen/src/egraph.rs
  3. 37
      cranelift/codegen/src/egraph/elaborate.rs
  4. 3
      cranelift/filetests/src/test_optimize.rs
  5. 3
      cranelift/filetests/src/test_wasm.rs
  6. 3
      cranelift/src/souper_harvest.rs
  7. 8
      fuzz/fuzz_targets/cranelift-fuzzgen.rs

17
cranelift/codegen/src/context.rs

@ -141,7 +141,7 @@ impl Context {
self.verify_if(isa)?;
self.optimize(isa)?;
self.optimize(isa, ctrl_plane)?;
isa.compile_function(&self.func, &self.domtree, self.want_disasm, ctrl_plane)
}
@ -151,7 +151,11 @@ impl Context {
/// allocation.
///
/// Public only for testing purposes.
pub fn optimize(&mut self, isa: &dyn TargetIsa) -> CodegenResult<()> {
pub fn optimize(
&mut self,
isa: &dyn TargetIsa,
ctrl_plane: &mut ControlPlane,
) -> CodegenResult<()> {
log::debug!(
"Number of CLIF instructions to optimize: {}",
self.func.dfg.num_insts()
@ -185,7 +189,7 @@ impl Context {
self.remove_constant_phis(isa)?;
if opt_level != OptLevel::None {
self.egraph_pass(isa)?;
self.egraph_pass(isa, ctrl_plane)?;
}
Ok(())
@ -347,7 +351,11 @@ impl Context {
}
/// Run optimizations via the egraph infrastructure.
pub fn egraph_pass<'a, FOI>(&mut self, fisa: FOI) -> CodegenResult<()>
pub fn egraph_pass<'a, FOI>(
&mut self,
fisa: FOI,
ctrl_plane: &mut ControlPlane,
) -> CodegenResult<()>
where
FOI: Into<FlagsOrIsa<'a>>,
{
@ -364,6 +372,7 @@ impl Context {
&self.domtree,
&self.loop_analysis,
&mut alias_analysis,
ctrl_plane,
);
pass.run();
log::debug!("egraph stats: {:?}", pass.stats);

74
cranelift/codegen/src/egraph.rs

@ -15,6 +15,7 @@ use crate::opts::IsleContext;
use crate::scoped_hash_map::{Entry as ScopedEntry, ScopedHashMap};
use crate::trace;
use crate::unionfind::UnionFind;
use cranelift_control::ControlPlane;
use cranelift_entity::packed_option::ReservedValue;
use cranelift_entity::SecondaryMap;
use smallvec::SmallVec;
@ -53,6 +54,9 @@ pub struct EgraphPass<'a> {
/// Loop analysis results, used for built-in LICM during
/// elaboration.
loop_analysis: &'a LoopAnalysis,
/// Chaos-mode control-plane so we can test that we still get
/// correct results when our heuristics make bad decisions.
ctrl_plane: &'a mut ControlPlane,
/// Which canonical Values do we want to rematerialize in each
/// block where they're used?
///
@ -84,6 +88,7 @@ where
pub(crate) stats: &'opt mut Stats,
pub(crate) alias_analysis: &'opt mut AliasAnalysis<'analysis>,
pub(crate) alias_analysis_state: &'opt mut LastStores,
ctrl_plane: &'opt mut ControlPlane,
// Held locally during optimization of one node (recursively):
pub(crate) rewrite_depth: usize,
pub(crate) subsume_values: FxHashSet<Value>,
@ -217,7 +222,6 @@ where
let orig_value = self.func.dfg.first_result(inst);
let mut optimized_values = std::mem::take(&mut self.optimized_values);
let mut isle_ctx = IsleContext { ctx: self };
// Limit rewrite depth. When we apply optimization rules, they
// may create new nodes (values) and those are, recursively,
@ -229,23 +233,20 @@ where
// infinite or problematic recursion, we bound the rewrite
// depth to a small constant here.
const REWRITE_LIMIT: usize = 5;
if isle_ctx.ctx.rewrite_depth > REWRITE_LIMIT {
isle_ctx.ctx.stats.rewrite_depth_limit += 1;
if self.rewrite_depth > REWRITE_LIMIT {
self.stats.rewrite_depth_limit += 1;
return orig_value;
}
isle_ctx.ctx.rewrite_depth += 1;
trace!(
"Incrementing rewrite depth; now {}",
isle_ctx.ctx.rewrite_depth
);
self.rewrite_depth += 1;
trace!("Incrementing rewrite depth; now {}", self.rewrite_depth);
// Invoke the ISLE toplevel constructor, getting all new
// values produced as equivalents to this value.
trace!("Calling into ISLE with original value {}", orig_value);
isle_ctx.ctx.stats.rewrite_rule_invoked += 1;
self.stats.rewrite_rule_invoked += 1;
debug_assert!(optimized_values.is_empty());
crate::opts::generated_code::constructor_simplify(
&mut isle_ctx,
&mut IsleContext { ctx: self },
orig_value,
&mut optimized_values,
);
@ -253,6 +254,10 @@ where
" -> returned from ISLE, generated {} optimized values",
optimized_values.len()
);
// It's not supposed to matter what order `simplify` returns values in.
self.ctrl_plane.shuffle(&mut optimized_values);
if optimized_values.len() > MATCHES_LIMIT {
trace!("Reached maximum matches limit; too many optimized values, ignoring rest.");
optimized_values.truncate(MATCHES_LIMIT);
@ -272,45 +277,30 @@ where
trace!(" -> same as orig value; skipping");
continue;
}
if isle_ctx.ctx.subsume_values.contains(&optimized_value) {
if self.subsume_values.contains(&optimized_value) {
// Merge in the unionfind so canonicalization
// still works, but take *only* the subsuming
// value, and break now.
isle_ctx.ctx.eclasses.union(optimized_value, union_value);
isle_ctx
.ctx
.func
.dfg
.merge_facts(optimized_value, union_value);
self.eclasses.union(optimized_value, union_value);
self.func.dfg.merge_facts(optimized_value, union_value);
union_value = optimized_value;
break;
}
let old_union_value = union_value;
union_value = isle_ctx
.ctx
.func
.dfg
.union(old_union_value, optimized_value);
isle_ctx.ctx.stats.union += 1;
union_value = self.func.dfg.union(old_union_value, optimized_value);
self.stats.union += 1;
trace!(" -> union: now {}", union_value);
isle_ctx.ctx.eclasses.add(union_value);
isle_ctx
.ctx
.eclasses
.union(old_union_value, optimized_value);
isle_ctx
.ctx
.func
.dfg
.merge_facts(old_union_value, optimized_value);
isle_ctx.ctx.eclasses.union(old_union_value, union_value);
self.eclasses.add(union_value);
self.eclasses.union(old_union_value, optimized_value);
self.func.dfg.merge_facts(old_union_value, optimized_value);
self.eclasses.union(old_union_value, union_value);
}
isle_ctx.ctx.rewrite_depth -= 1;
self.rewrite_depth -= 1;
debug_assert!(isle_ctx.ctx.optimized_values.is_empty());
isle_ctx.ctx.optimized_values = optimized_values;
debug_assert!(self.optimized_values.is_empty());
self.optimized_values = optimized_values;
union_value
}
@ -400,6 +390,7 @@ impl<'a> EgraphPass<'a> {
raw_domtree: &'a DominatorTree,
loop_analysis: &'a LoopAnalysis,
alias_analysis: &'a mut AliasAnalysis<'a>,
ctrl_plane: &'a mut ControlPlane,
) -> Self {
let num_values = func.dfg.num_values();
let mut domtree = DominatorTreePreorder::new();
@ -409,6 +400,7 @@ impl<'a> EgraphPass<'a> {
domtree,
loop_analysis,
alias_analysis,
ctrl_plane,
stats: Stats::default(),
eclasses: UnionFind::with_capacity(num_values),
remat_values: FxHashSet::default(),
@ -505,7 +497,11 @@ impl<'a> EgraphPass<'a> {
// We popped this block; push children
// immediately, then process this block.
block_stack.push(StackEntry::Pop);
block_stack.extend(self.domtree.children(block).map(StackEntry::Visit));
block_stack.extend(
self.ctrl_plane
.shuffled(self.domtree.children(block))
.map(StackEntry::Visit),
);
effectful_gvn_map.increment_depth();
trace!("Processing block {}", block);
@ -557,6 +553,7 @@ impl<'a> EgraphPass<'a> {
stats: &mut self.stats,
alias_analysis: self.alias_analysis,
alias_analysis_state: &mut alias_analysis_state,
ctrl_plane: self.ctrl_plane,
optimized_values: Default::default(),
};
@ -614,6 +611,7 @@ impl<'a> EgraphPass<'a> {
self.loop_analysis,
&mut self.remat_values,
&mut self.stats,
self.ctrl_plane,
);
elaborator.elaborate();

37
cranelift/codegen/src/egraph/elaborate.rs

@ -12,6 +12,7 @@ use crate::loop_analysis::{Loop, LoopAnalysis};
use crate::scoped_hash_map::ScopedHashMap;
use crate::trace;
use alloc::vec::Vec;
use cranelift_control::ControlPlane;
use cranelift_entity::{packed_option::ReservedValue, SecondaryMap};
use smallvec::{smallvec, SmallVec};
@ -64,6 +65,9 @@ pub(crate) struct Elaborator<'a> {
/// Stats for various events during egraph processing, to help
/// with optimization of this infrastructure.
stats: &'a mut Stats,
/// Chaos-mode control-plane so we can test that we still get
/// correct results when our heuristics make bad decisions.
ctrl_plane: &'a mut ControlPlane,
}
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
@ -139,6 +143,7 @@ impl<'a> Elaborator<'a> {
loop_analysis: &'a LoopAnalysis,
remat_values: &'a FxHashSet<Value>,
stats: &'a mut Stats,
ctrl_plane: &'a mut ControlPlane,
) -> Self {
let num_values = func.dfg.num_values();
let mut value_to_best_value =
@ -158,6 +163,7 @@ impl<'a> Elaborator<'a> {
block_stack: vec![],
remat_copies: FxHashMap::default(),
stats,
ctrl_plane,
}
}
@ -215,6 +221,14 @@ impl<'a> Elaborator<'a> {
fn compute_best_values(&mut self) {
let best = &mut self.value_to_best_value;
// We can't make random decisions inside the fixpoint loop below because
// that could cause values to change on every iteration of the loop,
// which would make the loop never terminate. So in chaos testing
// mode we need a form of making suboptimal decisions that is fully
// deterministic. We choose to simply make the worst decision we know
// how to do instead of the best.
let use_worst = self.ctrl_plane.get_decision();
// Do a fixpoint loop to compute the best value for each eclass.
//
// The maximum number of iterations is the length of the longest chain
@ -227,7 +241,14 @@ impl<'a> Elaborator<'a> {
// which we don't really care about, or (b) the CLIF producer did
// something weird, in which case it is their responsibility to stop
// doing that.
trace!("Entering fixpoint loop to compute the best values for each eclass");
trace!(
"Entering fixpoint loop to compute the {} values for each eclass",
if use_worst {
"worst (chaos mode)"
} else {
"best"
}
);
let mut keep_going = true;
while keep_going {
keep_going = false;
@ -248,7 +269,17 @@ impl<'a> Elaborator<'a> {
// is a `(cost, value)` tuple; `cost` comes first so
// the natural comparison works based on cost, and
// breaks ties based on value number.
best[value] = std::cmp::min(best[x], best[y]);
best[value] = if use_worst {
if best[x].1.is_reserved_value() {
best[y]
} else if best[y].1.is_reserved_value() {
best[x]
} else {
std::cmp::max(best[x], best[y])
}
} else {
std::cmp::min(best[x], best[y])
};
trace!(
" -> best of union({:?}, {:?}) = {:?}",
best[x],
@ -776,7 +807,7 @@ impl<'a> Elaborator<'a> {
// traversal so we do this after processing this
// block above.
let block_stack_end = self.block_stack.len();
for child in domtree.children(block) {
for child in self.ctrl_plane.shuffled(domtree.children(block)) {
self.block_stack.push(BlockStackEntry::Elaborate {
block: child,
idom: Some(block),

3
cranelift/filetests/src/test_optimize.rs

@ -10,6 +10,7 @@
use crate::subtest::{check_precise_output, run_filecheck, Context, SubTest};
use anyhow::Result;
use cranelift_codegen::ir;
use cranelift_control::ControlPlane;
use cranelift_reader::{TestCommand, TestOption};
use std::borrow::Cow;
@ -53,7 +54,7 @@ impl SubTest for TestOptimize {
let mut comp_ctx = cranelift_codegen::Context::for_function(func.into_owned());
comp_ctx
.optimize(isa)
.optimize(isa, &mut ControlPlane::default())
.map_err(|e| crate::pretty_anyhow_error(&comp_ctx.func, e))?;
let clif = format!("{:?}", comp_ctx.func);

3
cranelift/filetests/src/test_wasm.rs

@ -5,6 +5,7 @@ mod env;
use anyhow::{bail, ensure, Context, Result};
use config::TestConfig;
use cranelift_control::ControlPlane;
use env::ModuleEnv;
use similar::TextDiff;
use std::{fmt::Write, path::Path};
@ -64,7 +65,7 @@ pub fn run(path: &Path, wat: &str) -> Result<()> {
writeln!(&mut actual, "{}", code.vcode.as_ref().unwrap()).unwrap();
} else if config.optimize {
let mut ctx = cranelift_codegen::Context::for_function(func.clone());
ctx.optimize(isa)
ctx.optimize(isa, &mut ControlPlane::default())
.map_err(|e| crate::pretty_anyhow_error(&ctx.func, e))?;
writeln!(&mut actual, "{}", ctx.func.display()).unwrap();
} else {

3
cranelift/src/souper_harvest.rs

@ -1,5 +1,6 @@
use anyhow::{Context as _, Result};
use clap::Parser;
use cranelift_codegen::control::ControlPlane;
use cranelift_codegen::Context;
use cranelift_reader::parse_sets_and_triple;
use cranelift_wasm::DummyEnvironment;
@ -133,7 +134,7 @@ pub fn run(options: &Options) -> Result<()> {
let mut ctx = Context::new();
ctx.func = func;
ctx.optimize(fisa.isa.unwrap())
ctx.optimize(fisa.isa.unwrap(), &mut ControlPlane::default())
.context("failed to run optimizations")?;
ctx.souper_harvest(send)

8
fuzz/fuzz_targets/cranelift-fuzzgen.rs

@ -236,12 +236,14 @@ impl TestCase {
}
fn to_optimized(&self) -> Self {
let mut ctrl_planes = self.ctrl_planes.clone();
let optimized_functions: Vec<Function> = self
.functions
.iter()
.map(|func| {
.zip(ctrl_planes.iter_mut())
.map(|(func, ctrl_plane)| {
let mut ctx = Context::for_function(func.clone());
ctx.optimize(self.isa.as_ref()).unwrap();
ctx.optimize(self.isa.as_ref(), ctrl_plane).unwrap();
ctx.func
})
.collect();
@ -249,7 +251,7 @@ impl TestCase {
TestCase {
isa: self.isa.clone(),
functions: optimized_functions,
ctrl_planes: self.ctrl_planes.clone(),
ctrl_planes,
inputs: self.inputs.clone(),
compare_against_host: false,
}

Loading…
Cancel
Save