Optimize more reduction-of-an-extend cases (#7711)

* Optimize more reduction-of-an-extend cases * Rebase atop 7719
10 months ago · 2bd90027f3
3 changed files with 76 additions and 20 deletions
--- a/cranelift/codegen/src/opts/extends.isle
+++ b/cranelift/codegen/src/opts/extends.isle
@ -47,8 +47,24 @@

 ;; A reduction-of-an-extend back to the same original type is the same as not
 ;; actually doing the extend in the first place.
-(rule (simplify (ireduce ty (sextend _ x @ (value_type ty)))) x)
-(rule (simplify (ireduce ty (uextend _ x @ (value_type ty)))) x)
+(rule (simplify (ireduce ty (sextend _ x @ (value_type ty)))) (subsume x))
+(rule (simplify (ireduce ty (uextend _ x @ (value_type ty)))) (subsume x))
+
+;; A reduction-of-an-extend that's not just to the original type is either:
+;; a reduction of the original if the final type is smaller, or
+(rule (simplify (ireduce (ty_int ty_final) (sextend _ inner@(value_type ty_initial))))
+      (if-let $true (u64_lt (ty_bits_u64 ty_final) (ty_bits_u64 ty_initial)))
+      (ireduce ty_final inner))
+(rule (simplify (ireduce (ty_int ty_final) (uextend _ inner@(value_type ty_initial))))
+      (if-let $true (u64_lt (ty_bits_u64 ty_final) (ty_bits_u64 ty_initial)))
+      (ireduce ty_final inner))
+;; an extension of the original if the final type is larger.
+(rule (simplify (ireduce (ty_int ty_final) (sextend _ inner@(value_type ty_initial))))
+      (if-let $true (u64_lt (ty_bits_u64 ty_initial) (ty_bits_u64 ty_final)))
+      (sextend ty_final inner))
+(rule (simplify (ireduce (ty_int ty_final) (uextend _ inner@(value_type ty_initial))))
+      (if-let $true (u64_lt (ty_bits_u64 ty_initial) (ty_bits_u64 ty_final)))
+      (uextend ty_final inner))

 ;; `band`, `bor`, and `bxor` can't affect any bits that aren't set in the one of
 ;; the inputs, so they can be pushed down inside `uextend`s
--- a/cranelift/filetests/filetests/egraph/arithmetic.clif
+++ b/cranelift/filetests/filetests/egraph/arithmetic.clif
@ -321,8 +321,8 @@ block0(v0: i64, v1: i64, v2: i64):
    return
 }

-; check: v18 = imul v1, v2
-; check: store v18, v0
+; check: v15 = imul v1, v2
+; check: store v15, v0
 ; check: v10 = umulhi v1, v2
 ; check: store v10, v0+8

@ -335,9 +335,9 @@ block0(v0: i8):
    return v4
 }

-; check: v8 = iconst.i8 1
-; check: v9 = iadd v0, v8  ; v8 = 1
-; check: return v9
+; check: v7 = iconst.i8 1
+; check: v8 = iadd v0, v7  ; v7 = 1
+; check: return v8

 ;; Adding three `short`s together and storing them in a `short`,
 ;; which in C involves extending them to `int`s in the middle.
@ -352,6 +352,6 @@ block0(v0: i16, v1: i16, v2: i16):
    return v8
 }

-; check: v14 = iadd v0, v1
-; check: v18 = iadd v14, v2
-; check: return v18
+; check: v12 = iadd v0, v1
+; check: v15 = iadd v12, v2
+; check: return v15
--- a/cranelift/filetests/filetests/egraph/extends.clif
+++ b/cranelift/filetests/filetests/egraph/extends.clif
@ -94,6 +94,46 @@ block0(v1: i32):

 ; check: return v1

+function %sextend_then_reduce_smaller(i32) -> i16 {
+block0(v1: i32):
+    v2 = sextend.i64 v1
+    v3 = ireduce.i16 v2
+    return v3
+}
+
+; check: v4 = ireduce.i16 v1
+; check: return v4
+
+function %uextend_then_reduce_smaller(i32) -> i16 {
+block0(v1: i32):
+    v2 = uextend.i64 v1
+    v3 = ireduce.i16 v2
+    return v3
+}
+
+; check: v4 = ireduce.i16 v1
+; check: return v4
+
+function %sextend_then_reduce_partially(i16) -> i32 {
+block0(v1: i16):
+    v2 = sextend.i64 v1
+    v3 = ireduce.i32 v2
+    return v3
+}
+
+; check: v4 = sextend.i32 v1
+; check: return v4
+
+function %uextend_then_reduce_partially(i16) -> i32 {
+block0(v1: i16):
+    v2 = uextend.i64 v1
+    v3 = ireduce.i32 v2
+    return v3
+}
+
+; check: v4 = uextend.i32 v1
+; check: return v4
+
 function %sextend_then_slt_zero(i8) -> i8 {
 block0(v0: i8):
    v1 = sextend.i16 v0
@ -127,8 +167,8 @@ block0(v0: i64, v1: i64):
    return v5
 }

-; check: v10 = imul v0, v1
-; check: return v10
+; check: v8 = imul v0, v1
+; check: return v8

 function %extend_iadd_reduce(i16, i16) -> i16 {
 block0(v0: i16, v1: i16):
@ -139,8 +179,8 @@ block0(v0: i16, v1: i16):
    return v5
 }

-; check: v10 = iadd v0, v1
-; check: return v10
+; check: v8 = iadd v0, v1
+; check: return v8

 function %extend_bxor_reduce(i64, i64) -> i64 {
 block0(v0: i64, v1: i64):
@ -163,8 +203,8 @@ block0(v0: i16, v1: i16):
    return v5
 }

-; check: v10 = band v0, v1
-; check: return v10
+; check: v8 = band v0, v1
+; check: return v8

 function %extend_ineg_reduce(i64) -> i64 {
 block0(v0: i64):
@ -174,8 +214,8 @@ block0(v0: i64):
    return v3
 }

-; check: v6 = ineg v0
-; check: return v6
+; check: v5 = ineg v0
+; check: return v5

 function %extend_bnot_reduce(i16) -> i16 {
 block0(v0: i16):
@ -185,5 +225,5 @@ block0(v0: i16):
    return v3
 }

-; check: v6 = bnot v0
-; check: return v6
+; check: v5 = bnot v0
+; check: return v5