Browse Source

simple_gvn: recognize commutative operators (#6135)

* simple_gvn: recognize commutative operators

Normalize instructions with commutative opcodes by sorting the arguments. This
means instructions like `iadd v0, v1` and `iadd v1, v0` will be considered
identical by GVN and deduplicated.

* Remove `UsubSat` and `SsubSat` from `is_commutative`

They are not actually commutative

* Remove `TODO`s

* Move InstructionData normalization into helper fn

* Add normalization of commutative instructions in the epgrah implementation

* Handle reflexive icmp/fcmps in GVN

* Change formatting of `normalize_in_place`

* suggestions from code review
pull/6131/merge
Karl Meakin 2 years ago
committed by GitHub
parent
commit
c85bf27ff8
No known key found for this signature in database GPG Key ID: 4AEE18F83AFDEB23
  1. 5
      cranelift/codegen/src/egraph.rs
  2. 4
      cranelift/codegen/src/ir/condcodes.rs
  3. 58
      cranelift/codegen/src/ir/instructions.rs
  4. 14
      cranelift/codegen/src/simple_gvn.rs
  5. 14
      cranelift/filetests/filetests/egraph/algebraic.clif
  6. 2
      cranelift/filetests/filetests/egraph/basic-gvn.clif
  7. 2
      cranelift/filetests/filetests/egraph/load-hoist.clif
  8. 55
      cranelift/filetests/filetests/simple_gvn/commutative.clif
  9. 4
      cranelift/filetests/filetests/wasm/duplicate-loads-dynamic-memory-egraph.wat
  10. 8
      cranelift/filetests/filetests/wasm/duplicate-loads-static-memory-egraph.wat
  11. 12
      cranelift/filetests/filetests/wasm/dynamic-memory-no-spectre-access-same-index-different-offsets.wat
  12. 8
      cranelift/filetests/filetests/wasm/dynamic-memory-yes-spectre-access-same-index-different-offsets.wat

5
cranelift/codegen/src/egraph.rs

@ -134,6 +134,11 @@ where
/// - Update the value-to-opt-value map, and update the eclass
/// union-find, if we rewrote the value to different form(s).
pub(crate) fn insert_pure_enode(&mut self, inst: NewOrExistingInst) -> Value {
match inst {
NewOrExistingInst::New(mut data, _) => data.normalize_in_place(),
NewOrExistingInst::Existing(inst) => self.func.dfg.insts[inst].normalize_in_place(),
}
// Create the external context for looking up and updating the
// GVN map. This is necessary so that instructions themselves
// do not have to carry all the references or data for a full

4
cranelift/codegen/src/ir/condcodes.rs

@ -32,7 +32,7 @@ pub trait CondCode: Copy {
/// This condition code is used by the `icmp` instruction to compare integer values. There are
/// separate codes for comparing the integers as signed or unsigned numbers where it makes a
/// difference.
#[derive(Clone, Copy, PartialEq, Eq, Debug, Hash)]
#[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Debug, Hash)]
#[cfg_attr(feature = "enable-serde", derive(Serialize, Deserialize))]
pub enum IntCC {
/// `==`.
@ -194,7 +194,7 @@ impl FromStr for IntCC {
/// The condition codes described here are used to produce a single boolean value from the
/// comparison. The 14 condition codes here cover every possible combination of the relation above
/// except the impossible `!UN & !EQ & !LT & !GT` and the always true `UN | EQ | LT | GT`.
#[derive(Clone, Copy, PartialEq, Eq, Debug, Hash)]
#[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Debug, Hash)]
#[cfg_attr(feature = "enable-serde", derive(Serialize, Deserialize))]
pub enum FloatCC {
/// EQ | LT | GT

58
cranelift/codegen/src/ir/instructions.rs

@ -6,6 +6,7 @@
//! A large part of this module is auto-generated from the instruction descriptions in the meta
//! directory.
use crate::ir::condcodes::CondCode;
use alloc::vec::Vec;
use core::fmt::{self, Display, Formatter};
use core::ops::{Deref, DerefMut};
@ -200,6 +201,32 @@ impl Opcode {
_ => false,
}
}
/// Returns true if the instruction is commutative: `op(x, y) == op(y, x)` for all x, y
pub fn is_commutative(&self) -> bool {
match self {
Opcode::Band | Opcode::Bor | Opcode::Bxor | Opcode::BxorNot => true,
Opcode::Iadd
| Opcode::Imul
| Opcode::IaddPairwise
| Opcode::Umulhi
| Opcode::Smulhi
| Opcode::UaddOverflowTrap
| Opcode::Umin
| Opcode::Umax
| Opcode::Smin
| Opcode::Smax
| Opcode::AvgRound
| Opcode::UaddSat
| Opcode::SaddSat => true,
Opcode::Fadd | Opcode::Fmul => true,
Opcode::Fmin | Opcode::Fmax => true,
_ => false,
}
}
}
// This trait really belongs in cranelift-reader where it is used by the `.clif` file parser, but since
@ -469,6 +496,37 @@ impl InstructionData {
_ => {}
}
}
/// Normalize commutative instructions by sorting arguments
#[inline]
pub(crate) fn normalize_in_place(&mut self) {
match self {
InstructionData::Binary { opcode, args }
if args[0] > args[1] && opcode.is_commutative() =>
{
args.swap(0, 1)
}
InstructionData::Ternary {
opcode: Opcode::Fma,
args,
} if args[0] > args[1] => args.swap(0, 1),
InstructionData::IntCompare { args, cond, .. } if args[0] > args[1] => {
args.swap(0, 1);
*cond = cond.reverse();
}
InstructionData::IntCompare { args, cond, .. } if args[0] == args[1] => {
*cond = std::cmp::min(*cond, cond.reverse());
}
InstructionData::FloatCompare { args, cond, .. } if args[0] > args[1] => {
args.swap(0, 1);
*cond = cond.reverse();
}
InstructionData::FloatCompare { args, cond, .. } if args[0] == args[1] => {
*cond = std::cmp::min(*cond, cond.reverse());
}
_ => {}
}
}
}
/// Information about call instructions.

14
cranelift/codegen/src/simple_gvn.rs

@ -35,6 +35,14 @@ struct HashKey<'a, 'f: 'a> {
ty: Type,
pos: &'a RefCell<FuncCursor<'f>>,
}
impl<'a, 'f: 'a> HashKey<'a, 'f> {
fn new(mut inst: InstructionData, ty: Type, pos: &'a RefCell<FuncCursor<'f>>) -> Self {
inst.normalize_in_place();
Self { inst, ty, pos }
}
}
impl<'a, 'f: 'a> Hash for HashKey<'a, 'f> {
fn hash<H: Hasher>(&self, state: &mut H) {
let pool = &self.pos.borrow().func.dfg.value_lists;
@ -113,11 +121,7 @@ pub fn do_simple_gvn(func: &mut Function, domtree: &mut DominatorTree) {
}
let ctrl_typevar = func.dfg.ctrl_typevar(inst);
let key = HashKey {
inst: func.dfg.insts[inst],
ty: ctrl_typevar,
pos: &pos,
};
let key = HashKey::new(func.dfg.insts[inst], ctrl_typevar, &pos);
use crate::scoped_hash_map::Entry::*;
match visible_values.entry(key) {
Occupied(entry) => {

14
cranelift/filetests/filetests/egraph/algebraic.clif

@ -283,8 +283,6 @@ block0(v0: i32):
v2 = imul v1, v0
return v2
; check: v3 = ineg v0
; check: v5 -> v3
; check: v6 -> v3
; check: return v3
}
@ -326,8 +324,8 @@ block0(v0: i8):
v3 = iconst.i8 3
v4 = bor v3, v2
return v4
; check: v6 = bor v0, v3
; check: return v6
; check: v5 = bor v0, v3 ; v3 = 3
; check: return v5
}
function %or_and_constant_with_any_constant_should_not_apply_rule_i8(i8) -> i8 {
@ -379,8 +377,8 @@ block0(v0: i64):
v3 = iconst.i64 3
v4 = bor v3, v2
return v4
; check: v6 = bor v0, v3
; check: return v6
; check: v5 = bor v0, v3 ; v3 = 3
; check: return v5
}
function %or_and_constant_with_any_constant_should_not_apply_rule_i64(i64) -> i64 {
@ -421,8 +419,8 @@ block0(v1: i64):
return v3
}
; check: v5 = bnot v1
; check: return v5
; check: v4 = bnot v1
; check: return v4
function %extend_always_above_zero(i32) -> i8 {
block0(v1: i32):

2
cranelift/filetests/filetests/egraph/basic-gvn.clif

@ -22,7 +22,7 @@ block2(v6: i32):
; check: block0(v0: i32, v1: i32):
; check: v2 = iadd v0, v1
; check: block1:
; check: v5 = iadd.i32 v2, v0
; check: v5 = iadd.i32 v0, v2
; nextln: return v5
; check: block2:
; nextln: return v1

2
cranelift/filetests/filetests/egraph/load-hoist.clif

@ -39,6 +39,6 @@ function %foo(i64 vmctx, i64, i32, i32) -> i32 fast {
; check: v9 = load.i64 notrap aligned readonly v0+80
; check: block2(v6: i32, v7: i32, v15: i32):
; check: v10 = iadd.i64 v9, v8
; check: v10 = iadd v8, v9
; check: v11 = load.i32 little heap v10
; check: brif v17, block2(v12, v14, v17), block4

55
cranelift/filetests/filetests/simple_gvn/commutative.clif

@ -0,0 +1,55 @@
test simple-gvn
function %commutative_binary(i32, i32) -> i32, i32 {
block0(v0: i32, v1: i32):
v2 = iadd v0, v1
v3 = iadd v1, v0
return v2, v3
; check: v2 = iadd v0, v1
; check: return v2, v2
}
function %commutative_ternary(f32, f32, f32) -> f32, f32 {
block0(v0: f32, v1: f32, v2: f32):
v3 = fma v0, v1, v2
v4 = fma v1, v0, v2
return v3, v4
; check: v3 = fma v0, v1, v2
; check: return v3, v3
}
function %commutative_icmp(i32, i32) -> i8, i8 {
block0(v0: i32, v1: i32):
v2 = icmp ult v0, v1
v3 = icmp ugt v1, v0
return v2, v3
; check: v2 = icmp ult v0, v1
; check: return v2, v2
}
function %commutative_icmp_reflexive(i32, i32) -> i8, i8 {
block0(v0: i32, v1: i32):
v2 = icmp ule v0, v0
v3 = icmp uge v0, v0
return v2, v3
; check: v2 = icmp ule v0, v0
; check: return v2, v2
}
function %commutative_fcmp(f32, f32) -> i8, i8 {
block0(v0: f32, v1: f32):
v2 = fcmp lt v0, v1
v3 = fcmp gt v1, v0
return v2, v3
; check: v2 = fcmp lt v0, v1
; check: return v2, v2
}
function %commutative_fcmp_reflexive(f32, f32) -> i8, i8 {
block0(v0: f32, v1: f32):
v2 = fcmp ule v0, v0
v3 = fcmp uge v0, v0
return v2, v3
; check: v2 = fcmp ule v0, v0
; check: return v2, v2
}

4
cranelift/filetests/filetests/wasm/duplicate-loads-dynamic-memory-egraph.wat

@ -54,7 +54,7 @@
;; @0057 v4 = uextend.i64 v0
;; @0057 v6 = icmp ugt v4, v5
;; @0057 v9 = iconst.i64 0
;; @0057 v8 = iadd v7, v4
;; @0057 v8 = iadd v4, v7
;; @0057 v10 = select_spectre_guard v6, v9, v8 ; v9 = 0
;; @0057 v11 = load.i32 little heap v10
;; v2 -> v11
@ -75,7 +75,7 @@
;; @0064 v4 = uextend.i64 v0
;; @0064 v6 = icmp ugt v4, v5
;; @0064 v10 = iconst.i64 0
;; @0064 v8 = iadd v7, v4
;; @0064 v8 = iadd v4, v7
;; v22 = iconst.i64 1234
;; @0064 v9 = iadd v8, v22 ; v22 = 1234
;; @0064 v11 = select_spectre_guard v6, v10, v9 ; v10 = 0

8
cranelift/filetests/filetests/wasm/duplicate-loads-static-memory-egraph.wat

@ -44,9 +44,9 @@
;; gv1 = load.i64 notrap aligned readonly gv0
;;
;; block0(v0: i32, v1: i64):
;; @0057 v5 = load.i64 notrap aligned readonly v1
;; @0057 v4 = uextend.i64 v0
;; @0057 v6 = iadd v5, v4
;; @0057 v5 = load.i64 notrap aligned readonly v1
;; @0057 v6 = iadd v4, v5
;; @0057 v7 = load.i32 little heap v6
;; v2 -> v7
;; @005f jump block1
@ -60,9 +60,9 @@
;; gv1 = load.i64 notrap aligned readonly gv0
;;
;; block0(v0: i32, v1: i64):
;; @0064 v5 = load.i64 notrap aligned readonly v1
;; @0064 v4 = uextend.i64 v0
;; @0064 v6 = iadd v5, v4
;; @0064 v5 = load.i64 notrap aligned readonly v1
;; @0064 v6 = iadd v4, v5
;; v14 = iconst.i64 1234
;; @0064 v7 = iadd v6, v14 ; v14 = 1234
;; @0064 v8 = load.i32 little heap v7

12
cranelift/filetests/filetests/wasm/dynamic-memory-no-spectre-access-same-index-different-offsets.wat

@ -72,7 +72,7 @@
;;
;; block3:
;; @0047 v8 = load.i64 notrap aligned v1
;; @0047 v9 = iadd v8, v5
;; @0047 v9 = iadd.i64 v5, v8
;; @0047 v10 = load.i32 little heap v9
;; v2 -> v10
;; @004c brif.i8 v7, block4, block5
@ -87,7 +87,7 @@
;; v3 -> v17
;; @0051 v19 = iconst.i64 0x0010_0003
;; @0051 v20 = uadd_overflow_trap.i64 v5, v19, heap_oob ; v19 = 0x0010_0003
;; @0051 v22 = icmp ugt v20, v6
;; @0051 v22 = icmp.i64 ult v6, v20
;; @0051 brif v22, block6, block7
;;
;; block6 cold:
@ -95,7 +95,7 @@
;;
;; block7:
;; @0051 v23 = load.i64 notrap aligned v1
;; @0051 v24 = iadd v23, v5
;; @0051 v24 = iadd.i64 v5, v23
;; v28 = iconst.i64 0x000f_ffff
;; @0051 v25 = iadd v24, v28 ; v28 = 0x000f_ffff
;; @0051 v26 = load.i32 little heap v25
@ -122,7 +122,7 @@
;;
;; block3:
;; @005d v8 = load.i64 notrap aligned v4
;; @005d v9 = iadd v8, v5
;; @005d v9 = iadd.i64 v5, v8
;; @005d store.i32 little heap v1, v9
;; @0064 brif.i8 v7, block4, block5
;;
@ -135,7 +135,7 @@
;; @0064 store.i32 little heap v2, v15
;; @006b v17 = iconst.i64 0x0010_0003
;; @006b v18 = uadd_overflow_trap.i64 v5, v17, heap_oob ; v17 = 0x0010_0003
;; @006b v20 = icmp ugt v18, v6
;; @006b v20 = icmp.i64 ult v6, v18
;; @006b brif v20, block6, block7
;;
;; block6 cold:
@ -143,7 +143,7 @@
;;
;; block7:
;; @006b v21 = load.i64 notrap aligned v4
;; @006b v22 = iadd v21, v5
;; @006b v22 = iadd.i64 v5, v21
;; v25 = iconst.i64 0x000f_ffff
;; @006b v23 = iadd v22, v25 ; v25 = 0x000f_ffff
;; @006b store.i32 little heap v3, v23

8
cranelift/filetests/filetests/wasm/dynamic-memory-yes-spectre-access-same-index-different-offsets.wat

@ -67,7 +67,7 @@
;; @0047 v5 = uextend.i64 v0
;; @0047 v7 = icmp ugt v5, v6
;; @0047 v10 = iconst.i64 0
;; @0047 v9 = iadd v8, v5
;; @0047 v9 = iadd v5, v8
;; @0047 v11 = select_spectre_guard v7, v10, v9 ; v10 = 0
;; @0047 v12 = load.i32 little heap v11
;; v2 -> v12
@ -78,7 +78,7 @@
;; v3 -> v21
;; @0051 v23 = iconst.i64 0x0010_0003
;; @0051 v24 = uadd_overflow_trap v5, v23, heap_oob ; v23 = 0x0010_0003
;; @0051 v26 = icmp ugt v24, v6
;; @0051 v26 = icmp ult v6, v24
;; v34 = iconst.i64 0x000f_ffff
;; @0051 v29 = iadd v9, v34 ; v34 = 0x000f_ffff
;; @0051 v31 = select_spectre_guard v26, v10, v29 ; v10 = 0
@ -101,7 +101,7 @@
;; @005d v5 = uextend.i64 v0
;; @005d v7 = icmp ugt v5, v6
;; @005d v10 = iconst.i64 0
;; @005d v9 = iadd v8, v5
;; @005d v9 = iadd v5, v8
;; @005d v11 = select_spectre_guard v7, v10, v9 ; v10 = 0
;; @005d store little heap v1, v11
;; v30 = iconst.i64 4
@ -110,7 +110,7 @@
;; @0064 store little heap v2, v19
;; @006b v21 = iconst.i64 0x0010_0003
;; @006b v22 = uadd_overflow_trap v5, v21, heap_oob ; v21 = 0x0010_0003
;; @006b v24 = icmp ugt v22, v6
;; @006b v24 = icmp ult v6, v22
;; v31 = iconst.i64 0x000f_ffff
;; @006b v27 = iadd v9, v31 ; v31 = 0x000f_ffff
;; @006b v29 = select_spectre_guard v24, v10, v27 ; v10 = 0

Loading…
Cancel
Save