From 3a6dd832c0fcc51d72e7ef866cda246e8bafe849 Mon Sep 17 00:00:00 2001 From: Nick Fitzgerald Date: Wed, 2 Sep 2020 15:26:42 -0700 Subject: [PATCH] Harvest left-hand side superoptimization candidates. Given a clif function, harvest all its integer subexpressions, so that they can be fed into [Souper](https://github.com/google/souper) as candidates for superoptimization. For some of these candidates, Souper will successfully synthesize a right-hand side that is equivalent but has lower cost than the left-hand side. Then, we can combine these left- and right-hand sides into a complete optimization, and add it to our peephole passes. To harvest the expression that produced a given value `x`, we do a post-order traversal of the dataflow graph starting from `x`. As we do this traversal, we maintain a map from clif values to their translated Souper values. We stop traversing when we reach anything that can't be translated into Souper IR: a memory load, a float-to-int conversion, a block parameter, etc. For values produced by these instructions, we create a Souper `var`, which is an input variable to the optimization. For instructions that have a direct mapping into Souper IR, we get the Souper version of each of its operands and then create the Souper version of the instruction itself. It should now be clear why we do a post-order traversal: we need an instruction's translated operands in order to translate the instruction itself. Once this instruction is translated, we update the clif-to-souper map with this new translation so that any other instruction that uses this result as an operand has access to the translated value. When the traversal is complete we return the translation of `x` as the root of left-hand side candidate. --- Cargo.lock | 2 + cranelift/Cargo.toml | 4 +- cranelift/codegen/Cargo.toml | 4 + cranelift/codegen/src/context.rs | 17 + cranelift/codegen/src/lib.rs | 3 + cranelift/codegen/src/souper_harvest.rs | 500 ++++++++++++++++++++++++ cranelift/src/clif-util.rs | 27 +- cranelift/src/souper_harvest.rs | 87 +++++ 8 files changed, 642 insertions(+), 2 deletions(-) create mode 100644 cranelift/codegen/src/souper_harvest.rs mode change 100644 => 100755 cranelift/src/clif-util.rs create mode 100644 cranelift/src/souper_harvest.rs diff --git a/Cargo.lock b/Cargo.lock index 96d40bf923..38cecde0ee 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -390,6 +390,7 @@ dependencies = [ "regalloc", "serde", "smallvec", + "souper-ir", "target-lexicon", "thiserror", "wast", @@ -566,6 +567,7 @@ dependencies = [ "log", "peepmatic-souper", "pretty_env_logger", + "rayon", "target-lexicon", "term", "thiserror", diff --git a/cranelift/Cargo.toml b/cranelift/Cargo.toml index 7347c75d96..5f29620e33 100644 --- a/cranelift/Cargo.toml +++ b/cranelift/Cargo.toml @@ -38,14 +38,16 @@ wat = { version = "1.0.18", optional = true } target-lexicon = "0.10" peepmatic-souper = { path = "./peepmatic/crates/souper", version = "0.66.0", optional = true } pretty_env_logger = "0.4.0" +rayon = { version = "1", optional = true } file-per-thread-logger = "0.1.2" indicatif = "0.13.0" thiserror = "1.0.15" walkdir = "2.2" [features] -default = ["disas", "wasm", "cranelift-codegen/all-arch", "peepmatic-souper"] +default = ["disas", "wasm", "cranelift-codegen/all-arch", "peepmatic-souper", "souper-harvest"] disas = ["capstone"] enable-peepmatic = ["cranelift-codegen/enable-peepmatic", "cranelift-filetests/enable-peepmatic"] wasm = ["wat", "cranelift-wasm"] experimental_x64 = ["cranelift-codegen/x64"] +souper-harvest = ["cranelift-codegen/souper-harvest", "rayon"] diff --git a/cranelift/codegen/Cargo.toml b/cranelift/codegen/Cargo.toml index cc9e4421ea..d0b7120ee1 100644 --- a/cranelift/codegen/Cargo.toml +++ b/cranelift/codegen/Cargo.toml @@ -29,6 +29,7 @@ peepmatic = { path = "../peepmatic", optional = true, version = "0.66.0" } peepmatic-traits = { path = "../peepmatic/crates/traits", optional = true, version = "0.66.0" } peepmatic-runtime = { path = "../peepmatic/crates/runtime", optional = true, version = "0.66.0" } regalloc = "0.0.30" +souper-ir = { version = "1", optional = true } wast = { version = "22.0.0", optional = true } # It is a goal of the cranelift-codegen crate to have minimal external dependencies. # Please don't add any unless they are essential to the task of creating binary @@ -87,5 +88,8 @@ rebuild-peephole-optimizers = ["peepmatic", "peepmatic-traits", "wast"] # Enable the use of `peepmatic`-generated peephole optimizers. enable-peepmatic = ["peepmatic-runtime", "peepmatic-traits", "serde"] +# Enable support for the Souper harvester. +souper-harvest = ["souper-ir", "souper-ir/stringify"] + [badges] maintenance = { status = "experimental" } diff --git a/cranelift/codegen/src/context.rs b/cranelift/codegen/src/context.rs index 3d2595f270..81b9dcbd23 100644 --- a/cranelift/codegen/src/context.rs +++ b/cranelift/codegen/src/context.rs @@ -36,9 +36,14 @@ use crate::timing; use crate::unreachable_code::eliminate_unreachable_code; use crate::value_label::{build_value_labels_ranges, ComparableSourceLoc, ValueLabelsRanges}; use crate::verifier::{verify_context, verify_locations, VerifierErrors, VerifierResult}; +#[cfg(feature = "souper-harvest")] +use alloc::string::String; use alloc::vec::Vec; use log::debug; +#[cfg(feature = "souper-harvest")] +use crate::souper_harvest::do_souper_harvest; + /// Persistent data structures and compilation pipeline. pub struct Context { /// The function we're compiling. @@ -447,4 +452,16 @@ impl Context { isa, )) } + + /// Harvest candidate left-hand sides for superoptimization with Souper. + #[cfg(feature = "souper-harvest")] + pub fn souper_harvest( + &mut self, + isa: &dyn TargetIsa, + out: &mut std::sync::mpsc::Sender, + ) -> CodegenResult<()> { + self.preopt(isa)?; + do_souper_harvest(&self.func, out); + Ok(()) + } } diff --git a/cranelift/codegen/src/lib.rs b/cranelift/codegen/src/lib.rs index 38b173de13..053d7b979c 100644 --- a/cranelift/codegen/src/lib.rs +++ b/cranelift/codegen/src/lib.rs @@ -116,6 +116,9 @@ mod value_label; #[cfg(feature = "enable-peepmatic")] mod peepmatic; +#[cfg(feature = "souper-harvest")] +mod souper_harvest; + pub use crate::result::{CodegenError, CodegenResult}; /// Version number of this crate. diff --git a/cranelift/codegen/src/souper_harvest.rs b/cranelift/codegen/src/souper_harvest.rs new file mode 100644 index 0000000000..1e0d0eb382 --- /dev/null +++ b/cranelift/codegen/src/souper_harvest.rs @@ -0,0 +1,500 @@ +//! Harvest left-hand side superoptimization candidates. +//! +//! Given a clif function, harvest all its integer subexpressions, so that they +//! can be fed into [Souper](https://github.com/google/souper) as candidates for +//! superoptimization. For some of these candidates, Souper will successfully +//! synthesize a right-hand side that is equivalent but has lower cost than the +//! left-hand side. Then, we can combine these left- and right-hand sides into a +//! complete optimization, and add it to our peephole passes. +//! +//! To harvest the expression that produced a given value `x`, we do a +//! post-order traversal of the dataflow graph starting from `x`. As we do this +//! traversal, we maintain a map from clif values to their translated Souper +//! values. We stop traversing when we reach anything that can't be translated +//! into Souper IR: a memory load, a float-to-int conversion, a block parameter, +//! etc. For values produced by these instructions, we create a Souper `var`, +//! which is an input variable to the optimization. For instructions that have a +//! direct mapping into Souper IR, we get the Souper version of each of its +//! operands and then create the Souper version of the instruction itself. It +//! should now be clear why we do a post-order traversal: we need an +//! instruction's translated operands in order to translate the instruction +//! itself. Once this instruction is translated, we update the clif-to-souper +//! map with this new translation so that any other instruction that uses this +//! result as an operand has access to the translated value. When the traversal +//! is complete we return the translation of `x` as the root of left-hand side +//! candidate. + +use crate::ir; +use souper_ir::ast; +use std::collections::{HashMap, HashSet}; +use std::string::String; +use std::sync::mpsc; +use std::vec::Vec; + +/// Harvest Souper left-hand side candidates from the given function. +/// +/// Candidates are reported through the given MPSC sender. +pub fn do_souper_harvest(func: &ir::Function, out: &mut mpsc::Sender) { + let mut allocs = Allocs::default(); + + // Iterate over each instruction in each block and try and harvest a + // left-hand side from its result. + for block in func.layout.blocks() { + let mut option_inst = func.layout.first_inst(block); + while let Some(inst) = option_inst { + let results = func.dfg.inst_results(inst); + if results.len() == 1 { + let val = results[0]; + let ty = func.dfg.value_type(val); + if ty.is_int() && ty.lane_count() == 1 { + harvest_candidate_lhs(&mut allocs, func, val, out); + } + } + option_inst = func.layout.next_inst(inst); + } + } +} + +/// Allocations that we reuse across many LHS candidate harvests. +#[derive(Default)] +struct Allocs { + /// A map from cranelift IR to souper IR for values that we've already + /// translated into souper IR. + ir_to_souper_val: HashMap, + + /// Stack of to-visit and to-trace values for the post-order DFS. + dfs_stack: Vec, + + /// Set of values we've already seen in our post-order DFS. + dfs_seen: HashSet, +} + +impl Allocs { + /// Reset the collections to their empty state (without deallocating their + /// backing data). + fn reset(&mut self) { + self.ir_to_souper_val.clear(); + self.dfs_stack.clear(); + self.dfs_seen.clear(); + } +} + +/// Harvest a candidate LHS for `val` from the dataflow graph. +fn harvest_candidate_lhs( + allocs: &mut Allocs, + func: &ir::Function, + val: ir::Value, + out: &mut mpsc::Sender, +) { + allocs.reset(); + let mut lhs = ast::LeftHandSideBuilder::default(); + let mut non_var_count = 0; + + // Should we keep tracing through the given `val`? Only if it is defined + // by an instruction that we can translate to Souper IR. + let should_trace = |val| match func.dfg.value_def(val) { + ir::ValueDef::Result(inst, 0) => match func.dfg[inst].opcode() { + ir::Opcode::Iadd + | ir::Opcode::IaddImm + | ir::Opcode::IrsubImm + | ir::Opcode::Imul + | ir::Opcode::ImulImm + | ir::Opcode::Udiv + | ir::Opcode::UdivImm + | ir::Opcode::Sdiv + | ir::Opcode::SdivImm + | ir::Opcode::Urem + | ir::Opcode::UremImm + | ir::Opcode::Srem + | ir::Opcode::SremImm + | ir::Opcode::Band + | ir::Opcode::BandImm + | ir::Opcode::Bor + | ir::Opcode::BorImm + | ir::Opcode::Bxor + | ir::Opcode::BxorImm + | ir::Opcode::Ishl + | ir::Opcode::IshlImm + | ir::Opcode::Sshr + | ir::Opcode::SshrImm + | ir::Opcode::Ushr + | ir::Opcode::UshrImm + | ir::Opcode::Select + | ir::Opcode::Uextend + | ir::Opcode::Sextend + | ir::Opcode::Trunc + | ir::Opcode::Icmp + | ir::Opcode::Popcnt + | ir::Opcode::Bitrev + | ir::Opcode::Clz + | ir::Opcode::Ctz + // TODO: ir::Opcode::IaddCarry + | ir::Opcode::SaddSat + | ir::Opcode::SsubSat + | ir::Opcode::UsubSat => true, + _ => false, + }, + _ => false, + }; + + post_order_dfs(allocs, &func.dfg, val, should_trace, |allocs, val| { + let souper_assignment_rhs = match func.dfg.value_def(val) { + ir::ValueDef::Result(inst, 0) => { + let args = func.dfg.inst_args(inst); + let arg = |allocs: &mut Allocs, n| allocs.ir_to_souper_val[&args[n]].into(); + + match (func.dfg[inst].opcode(), &func.dfg[inst]) { + (ir::Opcode::Iadd, _) => { + let a = arg(allocs, 0); + let b = arg(allocs, 1); + ast::Instruction::Add { a, b }.into() + } + (ir::Opcode::IaddImm, ir::InstructionData::BinaryImm64 { imm, .. }) => { + let a = arg(allocs, 0); + let value: i64 = (*imm).into(); + let value: i128 = value.into(); + let b = ast::Constant { + value, + r#type: souper_type_of(&func.dfg, val), + } + .into(); + ast::Instruction::Add { a, b }.into() + } + (ir::Opcode::IrsubImm, ir::InstructionData::BinaryImm64 { imm, .. }) => { + let b = arg(allocs, 0); + let value: i64 = (*imm).into(); + let value: i128 = value.into(); + let a = ast::Constant { + value, + r#type: souper_type_of(&func.dfg, val), + } + .into(); + ast::Instruction::Sub { a, b }.into() + } + (ir::Opcode::Imul, _) => { + let a = arg(allocs, 0); + let b = arg(allocs, 1); + ast::Instruction::Mul { a, b }.into() + } + (ir::Opcode::ImulImm, ir::InstructionData::BinaryImm64 { imm, .. }) => { + let a = arg(allocs, 0); + let value: i64 = (*imm).into(); + let value: i128 = value.into(); + let b = ast::Constant { + value, + r#type: souper_type_of(&func.dfg, val), + } + .into(); + ast::Instruction::Mul { a, b }.into() + } + (ir::Opcode::Udiv, _) => { + let a = arg(allocs, 0); + let b = arg(allocs, 1); + ast::Instruction::Udiv { a, b }.into() + } + (ir::Opcode::UdivImm, ir::InstructionData::BinaryImm64 { imm, .. }) => { + let a = arg(allocs, 0); + let value: i64 = (*imm).into(); + let value: i128 = value.into(); + let b = ast::Constant { + value, + r#type: souper_type_of(&func.dfg, val), + } + .into(); + ast::Instruction::Udiv { a, b }.into() + } + (ir::Opcode::Sdiv, _) => { + let a = arg(allocs, 0); + let b = arg(allocs, 1); + ast::Instruction::Sdiv { a, b }.into() + } + (ir::Opcode::SdivImm, ir::InstructionData::BinaryImm64 { imm, .. }) => { + let a = arg(allocs, 0); + let value: i64 = (*imm).into(); + let value: i128 = value.into(); + let b = ast::Constant { + value, + r#type: souper_type_of(&func.dfg, val), + } + .into(); + ast::Instruction::Sdiv { a, b }.into() + } + (ir::Opcode::Urem, _) => { + let a = arg(allocs, 0); + let b = arg(allocs, 1); + ast::Instruction::Urem { a, b }.into() + } + (ir::Opcode::UremImm, ir::InstructionData::BinaryImm64 { imm, .. }) => { + let a = arg(allocs, 0); + let value: i64 = (*imm).into(); + let value: i128 = value.into(); + let b = ast::Constant { + value, + r#type: souper_type_of(&func.dfg, val), + } + .into(); + ast::Instruction::Urem { a, b }.into() + } + (ir::Opcode::Srem, _) => { + let a = arg(allocs, 0); + let b = arg(allocs, 1); + ast::Instruction::Srem { a, b }.into() + } + (ir::Opcode::SremImm, ir::InstructionData::BinaryImm64 { imm, .. }) => { + let a = arg(allocs, 0); + let value: i64 = (*imm).into(); + let value: i128 = value.into(); + let b = ast::Constant { + value, + r#type: souper_type_of(&func.dfg, val), + } + .into(); + ast::Instruction::Srem { a, b }.into() + } + (ir::Opcode::Band, _) => { + let a = arg(allocs, 0); + let b = arg(allocs, 1); + ast::Instruction::And { a, b }.into() + } + (ir::Opcode::BandImm, ir::InstructionData::BinaryImm64 { imm, .. }) => { + let a = arg(allocs, 0); + let value: i64 = (*imm).into(); + let value: i128 = value.into(); + let b = ast::Constant { + value, + r#type: souper_type_of(&func.dfg, val), + } + .into(); + ast::Instruction::And { a, b }.into() + } + (ir::Opcode::Bor, _) => { + let a = arg(allocs, 0); + let b = arg(allocs, 1); + ast::Instruction::Or { a, b }.into() + } + (ir::Opcode::BorImm, ir::InstructionData::BinaryImm64 { imm, .. }) => { + let a = arg(allocs, 0); + let value: i64 = (*imm).into(); + let value: i128 = value.into(); + let b = ast::Constant { + value, + r#type: souper_type_of(&func.dfg, val), + } + .into(); + ast::Instruction::Or { a, b }.into() + } + (ir::Opcode::Bxor, _) => { + let a = arg(allocs, 0); + let b = arg(allocs, 1); + ast::Instruction::Xor { a, b }.into() + } + (ir::Opcode::BxorImm, ir::InstructionData::BinaryImm64 { imm, .. }) => { + let a = arg(allocs, 0); + let value: i64 = (*imm).into(); + let value: i128 = value.into(); + let b = ast::Constant { + value, + r#type: souper_type_of(&func.dfg, val), + } + .into(); + ast::Instruction::Xor { a, b }.into() + } + (ir::Opcode::Ishl, _) => { + let a = arg(allocs, 0); + let b = arg(allocs, 1); + ast::Instruction::Shl { a, b }.into() + } + (ir::Opcode::IshlImm, ir::InstructionData::BinaryImm64 { imm, .. }) => { + let a = arg(allocs, 0); + let value: i64 = (*imm).into(); + let value: i128 = value.into(); + let b = ast::Constant { + value, + r#type: souper_type_of(&func.dfg, val), + } + .into(); + ast::Instruction::Shl { a, b }.into() + } + (ir::Opcode::Sshr, _) => { + let a = arg(allocs, 0); + let b = arg(allocs, 1); + ast::Instruction::Ashr { a, b }.into() + } + (ir::Opcode::SshrImm, ir::InstructionData::BinaryImm64 { imm, .. }) => { + let a = arg(allocs, 0); + let value: i64 = (*imm).into(); + let value: i128 = value.into(); + let b = ast::Constant { + value, + r#type: souper_type_of(&func.dfg, val), + } + .into(); + ast::Instruction::Ashr { a, b }.into() + } + (ir::Opcode::Ushr, _) => { + let a = arg(allocs, 0); + let b = arg(allocs, 1); + ast::Instruction::Lshr { a, b }.into() + } + (ir::Opcode::UshrImm, ir::InstructionData::BinaryImm64 { imm, .. }) => { + let a = arg(allocs, 0); + let value: i64 = (*imm).into(); + let value: i128 = value.into(); + let b = ast::Constant { + value, + r#type: souper_type_of(&func.dfg, val), + } + .into(); + ast::Instruction::Lshr { a, b }.into() + } + (ir::Opcode::Select, _) => { + let a = arg(allocs, 0); + let b = arg(allocs, 1); + let c = arg(allocs, 2); + ast::Instruction::Select { a, b, c }.into() + } + (ir::Opcode::Uextend, _) => { + let a = arg(allocs, 0); + ast::Instruction::Zext { a }.into() + } + (ir::Opcode::Sextend, _) => { + let a = arg(allocs, 0); + ast::Instruction::Sext { a }.into() + } + (ir::Opcode::Trunc, _) => { + let a = arg(allocs, 0); + ast::Instruction::Trunc { a }.into() + } + (ir::Opcode::Popcnt, _) => { + let a = arg(allocs, 0); + ast::Instruction::Ctpop { a }.into() + } + (ir::Opcode::Bitrev, _) => { + let a = arg(allocs, 0); + ast::Instruction::BitReverse { a }.into() + } + (ir::Opcode::Clz, _) => { + let a = arg(allocs, 0); + ast::Instruction::Ctlz { a }.into() + } + (ir::Opcode::Ctz, _) => { + let a = arg(allocs, 0); + ast::Instruction::Cttz { a }.into() + } + // TODO: ir::Opcode::IaddCarry + (ir::Opcode::SaddSat, _) => { + let a = arg(allocs, 0); + let b = arg(allocs, 1); + ast::Instruction::SaddSat { a, b }.into() + } + (ir::Opcode::SsubSat, _) => { + let a = arg(allocs, 0); + let b = arg(allocs, 1); + ast::Instruction::SsubSat { a, b }.into() + } + (ir::Opcode::UsubSat, _) => { + let a = arg(allocs, 0); + let b = arg(allocs, 1); + ast::Instruction::UsubSat { a, b }.into() + } + (ir::Opcode::Iconst, ir::InstructionData::UnaryImm { imm, .. }) => { + let value: i64 = (*imm).into(); + let value: i128 = value.into(); + ast::Constant { + value, + r#type: souper_type_of(&func.dfg, val), + } + .into() + } + (ir::Opcode::Bconst, ir::InstructionData::UnaryBool { imm, .. }) => { + let value = *imm as i128; + ast::Constant { + value, + r#type: souper_type_of(&func.dfg, val), + } + .into() + } + _ => ast::AssignmentRhs::Var, + } + } + _ => ast::AssignmentRhs::Var, + }; + + non_var_count += !matches!(souper_assignment_rhs, ast::AssignmentRhs::Var) as u32; + let souper_ty = souper_type_of(&func.dfg, val); + let souper_val = lhs.assignment(None, souper_ty, souper_assignment_rhs, vec![]); + let old_value = allocs.ir_to_souper_val.insert(val, souper_val); + assert!(old_value.is_none()); + }); + + // We end up harvesting a lot of candidates like: + // + // %0:i32 = var + // infer %0 + // + // and + // + // %0:i32 = var + // %1:i32 = var + // %2:i32 = add %0, %1 + // + // Both of these are useless. Only actually harvest the candidate if there + // are at least two actual operations. + if non_var_count >= 2 { + let lhs = lhs.finish(allocs.ir_to_souper_val[&val], None); + out.send(format!( + ";; Harvested from `{}` in `{}`\n{}\n", + val, func.name, lhs + )) + .unwrap(); + } +} + +fn souper_type_of(dfg: &ir::DataFlowGraph, val: ir::Value) -> Option { + let ty = dfg.value_type(val); + assert!(ty.is_int() || ty.is_bool()); + assert_eq!(ty.lane_count(), 1); + Some(ast::Type { width: ty.bits() }) +} + +#[derive(Debug)] +enum StackEntry { + Visit(ir::Value), + Trace(ir::Value), +} + +fn post_order_dfs( + allocs: &mut Allocs, + dfg: &ir::DataFlowGraph, + val: ir::Value, + should_trace: impl Fn(ir::Value) -> bool, + mut visit: impl FnMut(&mut Allocs, ir::Value), +) { + allocs.dfs_stack.push(StackEntry::Trace(val)); + + while let Some(entry) = allocs.dfs_stack.pop() { + match entry { + StackEntry::Visit(val) => { + let is_new = allocs.dfs_seen.insert(val); + if is_new { + visit(allocs, val); + } + } + StackEntry::Trace(val) => { + if allocs.dfs_seen.contains(&val) { + continue; + } + + allocs.dfs_stack.push(StackEntry::Visit(val)); + if should_trace(val) { + if let ir::ValueDef::Result(inst, 0) = dfg.value_def(val) { + let args = dfg.inst_args(inst); + for v in args.iter().rev().copied() { + allocs.dfs_stack.push(StackEntry::Trace(v)); + } + } + } + } + } + } +} diff --git a/cranelift/src/clif-util.rs b/cranelift/src/clif-util.rs old mode 100644 new mode 100755 index 2948c544f9..fb07fba621 --- a/cranelift/src/clif-util.rs +++ b/cranelift/src/clif-util.rs @@ -27,6 +27,8 @@ mod disasm; mod interpret; mod print_cfg; mod run; +#[cfg(feature = "souper-harvest")] +mod souper_harvest; mod utils; #[cfg(feature = "peepmatic-souper")] @@ -265,6 +267,13 @@ fn main() { .about("Convert Souper optimizations into Peepmatic DSL.") .arg(add_single_input_file_arg()) .arg(add_output_arg()), + ) + .subcommand( + SubCommand::with_name("souper-harvest") + .arg(add_single_input_file_arg()) + .arg(add_output_arg()) + .arg(add_target_flag()) + .arg(add_set_flag()), ); let res_util = match app_cmds.get_matches().subcommand() { @@ -392,12 +401,28 @@ fn main() { #[cfg(not(feature = "peepmatic-souper"))] { Err( - "Error: clif-util was compiled without suport for the `souper-to-peepmatic` \ + "Error: clif-util was compiled without support for the `souper-to-peepmatic` \ subcommand" .into(), ) } } + ("souper-harvest", Some(rest_cmd)) => { + #[cfg(feature = "souper-harvest")] + { + souper_harvest::run( + rest_cmd.value_of("target").unwrap_or_default(), + rest_cmd.value_of("single-file").unwrap(), + rest_cmd.value_of("output").unwrap(), + &get_vec(rest_cmd.values_of("set")), + ) + } + + #[cfg(not(feature = "souper-harvest"))] + { + Err("clif-util was compiled without `souper-harvest` support".into()) + } + } _ => Err("Invalid subcommand.".to_owned()), }; diff --git a/cranelift/src/souper_harvest.rs b/cranelift/src/souper_harvest.rs new file mode 100644 index 0000000000..4167611b72 --- /dev/null +++ b/cranelift/src/souper_harvest.rs @@ -0,0 +1,87 @@ +use crate::utils::parse_sets_and_triple; +use cranelift_codegen::Context; +use cranelift_wasm::{DummyEnvironment, ReturnMode}; +use rayon::iter::{IntoParallelIterator, ParallelIterator}; +use std::{fs, io}; + +static WASM_MAGIC: &[u8] = &[0x00, 0x61, 0x73, 0x6D]; + +pub fn run(target: &str, input: &str, output: &str, flag_set: &[String]) -> Result<(), String> { + let parsed = parse_sets_and_triple(flag_set, target)?; + let fisa = parsed.as_fisa(); + if fisa.isa.is_none() { + return Err("`souper-harvest` requires a target isa".into()); + } + + let stdin = io::stdin(); + let mut input: Box = match input { + "-" => Box::new(stdin.lock()), + _ => Box::new(io::BufReader::new( + fs::File::open(input).map_err(|e| format!("failed to open input file: {}", e))?, + )), + }; + + let mut output: Box = match output { + "-" => Box::new(io::stdout()), + _ => Box::new(io::BufWriter::new( + fs::File::create(output).map_err(|e| format!("failed to create output file: {}", e))?, + )), + }; + + let mut contents = vec![]; + input + .read_to_end(&mut contents) + .map_err(|e| format!("failed to read from input file: {}", e))?; + + let funcs = if &contents[..WASM_MAGIC.len()] == WASM_MAGIC { + let mut dummy_environ = DummyEnvironment::new( + fisa.isa.unwrap().frontend_config(), + ReturnMode::NormalReturns, + false, + ); + cranelift_wasm::translate_module(&contents, &mut dummy_environ) + .map_err(|e| format!("failed to translate Wasm module to clif: {}", e))?; + dummy_environ + .info + .function_bodies + .iter() + .map(|(_, f)| f.clone()) + .collect() + } else { + let contents = String::from_utf8(contents) + .map_err(|e| format!("input is not a UTF-8 string: {}", e))?; + cranelift_reader::parse_functions(&contents) + .map_err(|e| format!("failed to parse clif: {}", e))? + }; + + let (send, recv) = std::sync::mpsc::channel::(); + + let writing_thread = std::thread::spawn(move || -> Result<(), String> { + for lhs in recv { + output + .write_all(lhs.as_bytes()) + .map_err(|e| format!("failed to write to output file: {}", e))?; + } + Ok(()) + }); + + funcs + .into_par_iter() + .map_with(send, move |send, func| { + let mut ctx = Context::new(); + ctx.func = func; + + ctx.souper_harvest(fisa.isa.unwrap(), send) + .map_err(|e| format!("failed to run souper harvester: {}", e))?; + + Ok(()) + }) + .collect::>()?; + + match writing_thread.join() { + Ok(result) => result?, + Err(e) => std::panic::resume_unwind(e), + } + + Ok(()) +}