From c85bf27ff8cdaffec26a25d46b4343a06e00027a Mon Sep 17 00:00:00 2001 From: Karl Meakin Date: Tue, 4 Apr 2023 01:25:05 +0100 Subject: [PATCH] simple_gvn: recognize commutative operators (#6135) * simple_gvn: recognize commutative operators Normalize instructions with commutative opcodes by sorting the arguments. This means instructions like `iadd v0, v1` and `iadd v1, v0` will be considered identical by GVN and deduplicated. * Remove `UsubSat` and `SsubSat` from `is_commutative` They are not actually commutative * Remove `TODO`s * Move InstructionData normalization into helper fn * Add normalization of commutative instructions in the epgrah implementation * Handle reflexive icmp/fcmps in GVN * Change formatting of `normalize_in_place` * suggestions from code review --- cranelift/codegen/src/egraph.rs | 5 ++ cranelift/codegen/src/ir/condcodes.rs | 4 +- cranelift/codegen/src/ir/instructions.rs | 58 +++++++++++++++++++ cranelift/codegen/src/simple_gvn.rs | 14 +++-- .../filetests/filetests/egraph/algebraic.clif | 14 ++--- .../filetests/filetests/egraph/basic-gvn.clif | 2 +- .../filetests/egraph/load-hoist.clif | 2 +- .../filetests/simple_gvn/commutative.clif | 55 ++++++++++++++++++ .../duplicate-loads-dynamic-memory-egraph.wat | 4 +- .../duplicate-loads-static-memory-egraph.wat | 8 +-- ...re-access-same-index-different-offsets.wat | 12 ++-- ...re-access-same-index-different-offsets.wat | 8 +-- 12 files changed, 153 insertions(+), 33 deletions(-) create mode 100644 cranelift/filetests/filetests/simple_gvn/commutative.clif diff --git a/cranelift/codegen/src/egraph.rs b/cranelift/codegen/src/egraph.rs index e12b6fed39..f56a562e6a 100644 --- a/cranelift/codegen/src/egraph.rs +++ b/cranelift/codegen/src/egraph.rs @@ -134,6 +134,11 @@ where /// - Update the value-to-opt-value map, and update the eclass /// union-find, if we rewrote the value to different form(s). pub(crate) fn insert_pure_enode(&mut self, inst: NewOrExistingInst) -> Value { + match inst { + NewOrExistingInst::New(mut data, _) => data.normalize_in_place(), + NewOrExistingInst::Existing(inst) => self.func.dfg.insts[inst].normalize_in_place(), + } + // Create the external context for looking up and updating the // GVN map. This is necessary so that instructions themselves // do not have to carry all the references or data for a full diff --git a/cranelift/codegen/src/ir/condcodes.rs b/cranelift/codegen/src/ir/condcodes.rs index 7059ce6c92..bdc17c30f4 100644 --- a/cranelift/codegen/src/ir/condcodes.rs +++ b/cranelift/codegen/src/ir/condcodes.rs @@ -32,7 +32,7 @@ pub trait CondCode: Copy { /// This condition code is used by the `icmp` instruction to compare integer values. There are /// separate codes for comparing the integers as signed or unsigned numbers where it makes a /// difference. -#[derive(Clone, Copy, PartialEq, Eq, Debug, Hash)] +#[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Debug, Hash)] #[cfg_attr(feature = "enable-serde", derive(Serialize, Deserialize))] pub enum IntCC { /// `==`. @@ -194,7 +194,7 @@ impl FromStr for IntCC { /// The condition codes described here are used to produce a single boolean value from the /// comparison. The 14 condition codes here cover every possible combination of the relation above /// except the impossible `!UN & !EQ & !LT & !GT` and the always true `UN | EQ | LT | GT`. -#[derive(Clone, Copy, PartialEq, Eq, Debug, Hash)] +#[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Debug, Hash)] #[cfg_attr(feature = "enable-serde", derive(Serialize, Deserialize))] pub enum FloatCC { /// EQ | LT | GT diff --git a/cranelift/codegen/src/ir/instructions.rs b/cranelift/codegen/src/ir/instructions.rs index 085f203639..dfecde2bf3 100644 --- a/cranelift/codegen/src/ir/instructions.rs +++ b/cranelift/codegen/src/ir/instructions.rs @@ -6,6 +6,7 @@ //! A large part of this module is auto-generated from the instruction descriptions in the meta //! directory. +use crate::ir::condcodes::CondCode; use alloc::vec::Vec; use core::fmt::{self, Display, Formatter}; use core::ops::{Deref, DerefMut}; @@ -200,6 +201,32 @@ impl Opcode { _ => false, } } + + /// Returns true if the instruction is commutative: `op(x, y) == op(y, x)` for all x, y + pub fn is_commutative(&self) -> bool { + match self { + Opcode::Band | Opcode::Bor | Opcode::Bxor | Opcode::BxorNot => true, + + Opcode::Iadd + | Opcode::Imul + | Opcode::IaddPairwise + | Opcode::Umulhi + | Opcode::Smulhi + | Opcode::UaddOverflowTrap + | Opcode::Umin + | Opcode::Umax + | Opcode::Smin + | Opcode::Smax + | Opcode::AvgRound + | Opcode::UaddSat + | Opcode::SaddSat => true, + + Opcode::Fadd | Opcode::Fmul => true, + Opcode::Fmin | Opcode::Fmax => true, + + _ => false, + } + } } // This trait really belongs in cranelift-reader where it is used by the `.clif` file parser, but since @@ -469,6 +496,37 @@ impl InstructionData { _ => {} } } + + /// Normalize commutative instructions by sorting arguments + #[inline] + pub(crate) fn normalize_in_place(&mut self) { + match self { + InstructionData::Binary { opcode, args } + if args[0] > args[1] && opcode.is_commutative() => + { + args.swap(0, 1) + } + InstructionData::Ternary { + opcode: Opcode::Fma, + args, + } if args[0] > args[1] => args.swap(0, 1), + InstructionData::IntCompare { args, cond, .. } if args[0] > args[1] => { + args.swap(0, 1); + *cond = cond.reverse(); + } + InstructionData::IntCompare { args, cond, .. } if args[0] == args[1] => { + *cond = std::cmp::min(*cond, cond.reverse()); + } + InstructionData::FloatCompare { args, cond, .. } if args[0] > args[1] => { + args.swap(0, 1); + *cond = cond.reverse(); + } + InstructionData::FloatCompare { args, cond, .. } if args[0] == args[1] => { + *cond = std::cmp::min(*cond, cond.reverse()); + } + _ => {} + } + } } /// Information about call instructions. diff --git a/cranelift/codegen/src/simple_gvn.rs b/cranelift/codegen/src/simple_gvn.rs index 6b09ae96b2..033f66539d 100644 --- a/cranelift/codegen/src/simple_gvn.rs +++ b/cranelift/codegen/src/simple_gvn.rs @@ -35,6 +35,14 @@ struct HashKey<'a, 'f: 'a> { ty: Type, pos: &'a RefCell>, } + +impl<'a, 'f: 'a> HashKey<'a, 'f> { + fn new(mut inst: InstructionData, ty: Type, pos: &'a RefCell>) -> Self { + inst.normalize_in_place(); + Self { inst, ty, pos } + } +} + impl<'a, 'f: 'a> Hash for HashKey<'a, 'f> { fn hash(&self, state: &mut H) { let pool = &self.pos.borrow().func.dfg.value_lists; @@ -113,11 +121,7 @@ pub fn do_simple_gvn(func: &mut Function, domtree: &mut DominatorTree) { } let ctrl_typevar = func.dfg.ctrl_typevar(inst); - let key = HashKey { - inst: func.dfg.insts[inst], - ty: ctrl_typevar, - pos: &pos, - }; + let key = HashKey::new(func.dfg.insts[inst], ctrl_typevar, &pos); use crate::scoped_hash_map::Entry::*; match visible_values.entry(key) { Occupied(entry) => { diff --git a/cranelift/filetests/filetests/egraph/algebraic.clif b/cranelift/filetests/filetests/egraph/algebraic.clif index ce91d0dff6..deeb3d1bf7 100644 --- a/cranelift/filetests/filetests/egraph/algebraic.clif +++ b/cranelift/filetests/filetests/egraph/algebraic.clif @@ -283,8 +283,6 @@ block0(v0: i32): v2 = imul v1, v0 return v2 ; check: v3 = ineg v0 - ; check: v5 -> v3 - ; check: v6 -> v3 ; check: return v3 } @@ -326,8 +324,8 @@ block0(v0: i8): v3 = iconst.i8 3 v4 = bor v3, v2 return v4 - ; check: v6 = bor v0, v3 - ; check: return v6 + ; check: v5 = bor v0, v3 ; v3 = 3 + ; check: return v5 } function %or_and_constant_with_any_constant_should_not_apply_rule_i8(i8) -> i8 { @@ -379,8 +377,8 @@ block0(v0: i64): v3 = iconst.i64 3 v4 = bor v3, v2 return v4 - ; check: v6 = bor v0, v3 - ; check: return v6 + ; check: v5 = bor v0, v3 ; v3 = 3 + ; check: return v5 } function %or_and_constant_with_any_constant_should_not_apply_rule_i64(i64) -> i64 { @@ -421,8 +419,8 @@ block0(v1: i64): return v3 } -; check: v5 = bnot v1 -; check: return v5 +; check: v4 = bnot v1 +; check: return v4 function %extend_always_above_zero(i32) -> i8 { block0(v1: i32): diff --git a/cranelift/filetests/filetests/egraph/basic-gvn.clif b/cranelift/filetests/filetests/egraph/basic-gvn.clif index 3d74a31b1e..a4a6912e47 100644 --- a/cranelift/filetests/filetests/egraph/basic-gvn.clif +++ b/cranelift/filetests/filetests/egraph/basic-gvn.clif @@ -22,7 +22,7 @@ block2(v6: i32): ; check: block0(v0: i32, v1: i32): ; check: v2 = iadd v0, v1 ; check: block1: -; check: v5 = iadd.i32 v2, v0 +; check: v5 = iadd.i32 v0, v2 ; nextln: return v5 ; check: block2: ; nextln: return v1 diff --git a/cranelift/filetests/filetests/egraph/load-hoist.clif b/cranelift/filetests/filetests/egraph/load-hoist.clif index f52a1995c4..a802a6ff96 100644 --- a/cranelift/filetests/filetests/egraph/load-hoist.clif +++ b/cranelift/filetests/filetests/egraph/load-hoist.clif @@ -39,6 +39,6 @@ function %foo(i64 vmctx, i64, i32, i32) -> i32 fast { ; check: v9 = load.i64 notrap aligned readonly v0+80 ; check: block2(v6: i32, v7: i32, v15: i32): -; check: v10 = iadd.i64 v9, v8 +; check: v10 = iadd v8, v9 ; check: v11 = load.i32 little heap v10 ; check: brif v17, block2(v12, v14, v17), block4 diff --git a/cranelift/filetests/filetests/simple_gvn/commutative.clif b/cranelift/filetests/filetests/simple_gvn/commutative.clif new file mode 100644 index 0000000000..760d395e92 --- /dev/null +++ b/cranelift/filetests/filetests/simple_gvn/commutative.clif @@ -0,0 +1,55 @@ +test simple-gvn + +function %commutative_binary(i32, i32) -> i32, i32 { +block0(v0: i32, v1: i32): + v2 = iadd v0, v1 + v3 = iadd v1, v0 + return v2, v3 + ; check: v2 = iadd v0, v1 + ; check: return v2, v2 +} + +function %commutative_ternary(f32, f32, f32) -> f32, f32 { +block0(v0: f32, v1: f32, v2: f32): + v3 = fma v0, v1, v2 + v4 = fma v1, v0, v2 + return v3, v4 + ; check: v3 = fma v0, v1, v2 + ; check: return v3, v3 +} + +function %commutative_icmp(i32, i32) -> i8, i8 { +block0(v0: i32, v1: i32): + v2 = icmp ult v0, v1 + v3 = icmp ugt v1, v0 + return v2, v3 + ; check: v2 = icmp ult v0, v1 + ; check: return v2, v2 +} + +function %commutative_icmp_reflexive(i32, i32) -> i8, i8 { +block0(v0: i32, v1: i32): + v2 = icmp ule v0, v0 + v3 = icmp uge v0, v0 + return v2, v3 + ; check: v2 = icmp ule v0, v0 + ; check: return v2, v2 +} + +function %commutative_fcmp(f32, f32) -> i8, i8 { +block0(v0: f32, v1: f32): + v2 = fcmp lt v0, v1 + v3 = fcmp gt v1, v0 + return v2, v3 + ; check: v2 = fcmp lt v0, v1 + ; check: return v2, v2 +} + +function %commutative_fcmp_reflexive(f32, f32) -> i8, i8 { +block0(v0: f32, v1: f32): + v2 = fcmp ule v0, v0 + v3 = fcmp uge v0, v0 + return v2, v3 + ; check: v2 = fcmp ule v0, v0 + ; check: return v2, v2 +} diff --git a/cranelift/filetests/filetests/wasm/duplicate-loads-dynamic-memory-egraph.wat b/cranelift/filetests/filetests/wasm/duplicate-loads-dynamic-memory-egraph.wat index 73aaccd829..685fd3cd51 100644 --- a/cranelift/filetests/filetests/wasm/duplicate-loads-dynamic-memory-egraph.wat +++ b/cranelift/filetests/filetests/wasm/duplicate-loads-dynamic-memory-egraph.wat @@ -54,7 +54,7 @@ ;; @0057 v4 = uextend.i64 v0 ;; @0057 v6 = icmp ugt v4, v5 ;; @0057 v9 = iconst.i64 0 -;; @0057 v8 = iadd v7, v4 +;; @0057 v8 = iadd v4, v7 ;; @0057 v10 = select_spectre_guard v6, v9, v8 ; v9 = 0 ;; @0057 v11 = load.i32 little heap v10 ;; v2 -> v11 @@ -75,7 +75,7 @@ ;; @0064 v4 = uextend.i64 v0 ;; @0064 v6 = icmp ugt v4, v5 ;; @0064 v10 = iconst.i64 0 -;; @0064 v8 = iadd v7, v4 +;; @0064 v8 = iadd v4, v7 ;; v22 = iconst.i64 1234 ;; @0064 v9 = iadd v8, v22 ; v22 = 1234 ;; @0064 v11 = select_spectre_guard v6, v10, v9 ; v10 = 0 diff --git a/cranelift/filetests/filetests/wasm/duplicate-loads-static-memory-egraph.wat b/cranelift/filetests/filetests/wasm/duplicate-loads-static-memory-egraph.wat index d434d5a33a..1a8571eff6 100644 --- a/cranelift/filetests/filetests/wasm/duplicate-loads-static-memory-egraph.wat +++ b/cranelift/filetests/filetests/wasm/duplicate-loads-static-memory-egraph.wat @@ -44,9 +44,9 @@ ;; gv1 = load.i64 notrap aligned readonly gv0 ;; ;; block0(v0: i32, v1: i64): -;; @0057 v5 = load.i64 notrap aligned readonly v1 ;; @0057 v4 = uextend.i64 v0 -;; @0057 v6 = iadd v5, v4 +;; @0057 v5 = load.i64 notrap aligned readonly v1 +;; @0057 v6 = iadd v4, v5 ;; @0057 v7 = load.i32 little heap v6 ;; v2 -> v7 ;; @005f jump block1 @@ -60,9 +60,9 @@ ;; gv1 = load.i64 notrap aligned readonly gv0 ;; ;; block0(v0: i32, v1: i64): -;; @0064 v5 = load.i64 notrap aligned readonly v1 ;; @0064 v4 = uextend.i64 v0 -;; @0064 v6 = iadd v5, v4 +;; @0064 v5 = load.i64 notrap aligned readonly v1 +;; @0064 v6 = iadd v4, v5 ;; v14 = iconst.i64 1234 ;; @0064 v7 = iadd v6, v14 ; v14 = 1234 ;; @0064 v8 = load.i32 little heap v7 diff --git a/cranelift/filetests/filetests/wasm/dynamic-memory-no-spectre-access-same-index-different-offsets.wat b/cranelift/filetests/filetests/wasm/dynamic-memory-no-spectre-access-same-index-different-offsets.wat index 14da1eb483..50894bda71 100644 --- a/cranelift/filetests/filetests/wasm/dynamic-memory-no-spectre-access-same-index-different-offsets.wat +++ b/cranelift/filetests/filetests/wasm/dynamic-memory-no-spectre-access-same-index-different-offsets.wat @@ -72,7 +72,7 @@ ;; ;; block3: ;; @0047 v8 = load.i64 notrap aligned v1 -;; @0047 v9 = iadd v8, v5 +;; @0047 v9 = iadd.i64 v5, v8 ;; @0047 v10 = load.i32 little heap v9 ;; v2 -> v10 ;; @004c brif.i8 v7, block4, block5 @@ -87,7 +87,7 @@ ;; v3 -> v17 ;; @0051 v19 = iconst.i64 0x0010_0003 ;; @0051 v20 = uadd_overflow_trap.i64 v5, v19, heap_oob ; v19 = 0x0010_0003 -;; @0051 v22 = icmp ugt v20, v6 +;; @0051 v22 = icmp.i64 ult v6, v20 ;; @0051 brif v22, block6, block7 ;; ;; block6 cold: @@ -95,7 +95,7 @@ ;; ;; block7: ;; @0051 v23 = load.i64 notrap aligned v1 -;; @0051 v24 = iadd v23, v5 +;; @0051 v24 = iadd.i64 v5, v23 ;; v28 = iconst.i64 0x000f_ffff ;; @0051 v25 = iadd v24, v28 ; v28 = 0x000f_ffff ;; @0051 v26 = load.i32 little heap v25 @@ -122,7 +122,7 @@ ;; ;; block3: ;; @005d v8 = load.i64 notrap aligned v4 -;; @005d v9 = iadd v8, v5 +;; @005d v9 = iadd.i64 v5, v8 ;; @005d store.i32 little heap v1, v9 ;; @0064 brif.i8 v7, block4, block5 ;; @@ -135,7 +135,7 @@ ;; @0064 store.i32 little heap v2, v15 ;; @006b v17 = iconst.i64 0x0010_0003 ;; @006b v18 = uadd_overflow_trap.i64 v5, v17, heap_oob ; v17 = 0x0010_0003 -;; @006b v20 = icmp ugt v18, v6 +;; @006b v20 = icmp.i64 ult v6, v18 ;; @006b brif v20, block6, block7 ;; ;; block6 cold: @@ -143,7 +143,7 @@ ;; ;; block7: ;; @006b v21 = load.i64 notrap aligned v4 -;; @006b v22 = iadd v21, v5 +;; @006b v22 = iadd.i64 v5, v21 ;; v25 = iconst.i64 0x000f_ffff ;; @006b v23 = iadd v22, v25 ; v25 = 0x000f_ffff ;; @006b store.i32 little heap v3, v23 diff --git a/cranelift/filetests/filetests/wasm/dynamic-memory-yes-spectre-access-same-index-different-offsets.wat b/cranelift/filetests/filetests/wasm/dynamic-memory-yes-spectre-access-same-index-different-offsets.wat index 4dbe53e807..66599d66ea 100644 --- a/cranelift/filetests/filetests/wasm/dynamic-memory-yes-spectre-access-same-index-different-offsets.wat +++ b/cranelift/filetests/filetests/wasm/dynamic-memory-yes-spectre-access-same-index-different-offsets.wat @@ -67,7 +67,7 @@ ;; @0047 v5 = uextend.i64 v0 ;; @0047 v7 = icmp ugt v5, v6 ;; @0047 v10 = iconst.i64 0 -;; @0047 v9 = iadd v8, v5 +;; @0047 v9 = iadd v5, v8 ;; @0047 v11 = select_spectre_guard v7, v10, v9 ; v10 = 0 ;; @0047 v12 = load.i32 little heap v11 ;; v2 -> v12 @@ -78,7 +78,7 @@ ;; v3 -> v21 ;; @0051 v23 = iconst.i64 0x0010_0003 ;; @0051 v24 = uadd_overflow_trap v5, v23, heap_oob ; v23 = 0x0010_0003 -;; @0051 v26 = icmp ugt v24, v6 +;; @0051 v26 = icmp ult v6, v24 ;; v34 = iconst.i64 0x000f_ffff ;; @0051 v29 = iadd v9, v34 ; v34 = 0x000f_ffff ;; @0051 v31 = select_spectre_guard v26, v10, v29 ; v10 = 0 @@ -101,7 +101,7 @@ ;; @005d v5 = uextend.i64 v0 ;; @005d v7 = icmp ugt v5, v6 ;; @005d v10 = iconst.i64 0 -;; @005d v9 = iadd v8, v5 +;; @005d v9 = iadd v5, v8 ;; @005d v11 = select_spectre_guard v7, v10, v9 ; v10 = 0 ;; @005d store little heap v1, v11 ;; v30 = iconst.i64 4 @@ -110,7 +110,7 @@ ;; @0064 store little heap v2, v19 ;; @006b v21 = iconst.i64 0x0010_0003 ;; @006b v22 = uadd_overflow_trap v5, v21, heap_oob ; v21 = 0x0010_0003 -;; @006b v24 = icmp ugt v22, v6 +;; @006b v24 = icmp ult v6, v22 ;; v31 = iconst.i64 0x000f_ffff ;; @006b v27 = iadd v9, v31 ; v31 = 0x000f_ffff ;; @006b v29 = select_spectre_guard v24, v10, v27 ; v10 = 0