From f8c9f6711f6316001801480d3883888734835faf Mon Sep 17 00:00:00 2001 From: scottmcm Date: Tue, 19 Dec 2023 13:12:54 -0800 Subject: [PATCH] More spaceship patterns (#7702) --- cranelift/codegen/src/opts/bitops.isle | 5 + cranelift/codegen/src/opts/selects.isle | 17 +- cranelift/codegen/src/opts/spaceship.isle | 59 +++---- .../filetests/filetests/egraph/select.clif | 27 +++- .../filetests/filetests/egraph/spaceship.clif | 152 ++++++++++++------ .../filetests/filetests/isa/x64/bmask.clif | 63 ++++++++ 6 files changed, 236 insertions(+), 87 deletions(-) diff --git a/cranelift/codegen/src/opts/bitops.isle b/cranelift/codegen/src/opts/bitops.isle index de1515912b..0f44ba47d2 100644 --- a/cranelift/codegen/src/opts/bitops.isle +++ b/cranelift/codegen/src/opts/bitops.isle @@ -90,6 +90,11 @@ (if-let $true (u64_eq shift_amt (ty_shift_mask ty))) (bmask ty x)) +;; Since icmp is always 0 or 1, bmask is just a negation. +;; TODO: Explore whether this makes sense for things needing extension too. +(rule (simplify (bmask $I8 cmp@(icmp $I8 _ _ _))) + (ineg $I8 cmp)) + ;; Matches any expressions that preserve "truthiness". ;; i.e. If the input is zero it remains zero, and if it is nonzero it can have ;; a different value as long as it is still nonzero. diff --git a/cranelift/codegen/src/opts/selects.isle b/cranelift/codegen/src/opts/selects.isle index e966fb3ec7..ec38be7396 100644 --- a/cranelift/codegen/src/opts/selects.isle +++ b/cranelift/codegen/src/opts/selects.isle @@ -4,16 +4,23 @@ (rule (simplify (select ty _ x x)) x) (rule (simplify (bitselect ty _ x x)) x) +;; Push zeroes to the right -- this makes the select `truthy`, as used elsewhere +;; if icmp { 0 } else { nonzero } => if !icmp { nonzero } else { 0 } +(rule (simplify (select sty (icmp cty cc x y) + zero@(iconst_u _ 0) + nonzero@(iconst_u _ (u64_nonzero _)))) + (select sty (icmp cty (intcc_complement cc) x y) nonzero zero)) + ;; if icmp(x, y) { 1 } else { 0 } => uextend(icmp(x, y)) (rule (simplify (select ty cmp@(icmp _ cc x y) (iconst_u _ 1) (iconst_u _ 0))) (uextend_from_i8 ty cmp)) -;; if icmp(x, y) { 0 } else { 1 } => uextend(!icmp(x, y)) -(rule (simplify (select sty (icmp cty cc x y) - (iconst_u _ 0) - (iconst_u _ 1))) - (uextend_from_i8 sty (icmp cty (intcc_complement cc) x y))) +;; if icmp(x, y) { -1 } else { 0 } => uextend(icmp(x, y)) +(rule (simplify (select ty cmp@(icmp _ cc x y) + (iconst_s _ -1) + (iconst_s _ 0))) + (bmask ty cmp)) ;; Transform select-of-icmp into {u,s}{min,max} instructions where possible. (rule (simplify (select ty (sgt _ x y) x y)) (smax ty x y)) diff --git a/cranelift/codegen/src/opts/spaceship.isle b/cranelift/codegen/src/opts/spaceship.isle index d7d2071528..69dd8f9cb4 100644 --- a/cranelift/codegen/src/opts/spaceship.isle +++ b/cranelift/codegen/src/opts/spaceship.isle @@ -59,32 +59,24 @@ (sextend_from_i8 ty (spaceship_u rty x y))) ;; x > y ? 1 : x < y ? -1 : 0 +;; x > y ? 1 : x >= y ? 0 : -1 (rule (simplify (select ty (ugt rty x y) (iconst_s ty 1) - (select ty (ult rty x y) - (iconst_s ty -1) - (iconst_s ty 0)))) + (ineg rty (ult rty x y)))) (sextend_from_i8 ty (spaceship_u rty x y))) -;; x > y ? 1 : x != y ? -1 : 0 (rule (simplify (select ty (ugt rty x y) (iconst_s ty 1) - (select ty (ne rty x y) - (iconst_s ty -1) - (iconst_s ty 0)))) + (bmask ty (ult rty x y)))) (sextend_from_i8 ty (spaceship_u rty x y))) +;; x > y ? 1 : x != y ? -1 : 0 ;; x > y ? 1 : x == y ? 0 : -1 (rule (simplify (select ty (ugt rty x y) (iconst_s ty 1) - (select ty (eq rty x y) - (iconst_s ty 0) - (iconst_s ty -1)))) + (ineg rty (ne rty x y)))) (sextend_from_i8 ty (spaceship_u rty x y))) -;; x > y ? 1 : x >= y ? 0 : -1 (rule (simplify (select ty (ugt rty x y) (iconst_s ty 1) - (select ty (uge rty x y) - (iconst_s ty 0) - (iconst_s ty -1)))) + (bmask ty (ne rty x y)))) (sextend_from_i8 ty (spaceship_u rty x y))) ;; Same, but for signed comparisons this time @@ -140,32 +132,24 @@ (sextend_from_i8 ty (spaceship_s rty x y))) ;; x > y ? 1 : x < y ? -1 : 0 +;; x > y ? 1 : x >= y ? 0 : -1 (rule (simplify (select ty (sgt rty x y) (iconst_s ty 1) - (select ty (slt rty x y) - (iconst_s ty -1) - (iconst_s ty 0)))) + (ineg rty (slt rty x y)))) (sextend_from_i8 ty (spaceship_s rty x y))) -;; x > y ? 1 : x != y ? -1 : 0 (rule (simplify (select ty (sgt rty x y) (iconst_s ty 1) - (select ty (ne rty x y) - (iconst_s ty -1) - (iconst_s ty 0)))) + (bmask ty (slt rty x y)))) (sextend_from_i8 ty (spaceship_s rty x y))) +;; x > y ? 1 : x != y ? -1 : 0 ;; x > y ? 1 : x == y ? 0 : -1 (rule (simplify (select ty (sgt rty x y) (iconst_s ty 1) - (select ty (eq rty x y) - (iconst_s ty 0) - (iconst_s ty -1)))) + (ineg rty (ne rty x y)))) (sextend_from_i8 ty (spaceship_s rty x y))) -;; x > y ? 1 : x >= y ? 0 : -1 (rule (simplify (select ty (sgt rty x y) (iconst_s ty 1) - (select ty (sge rty x y) - (iconst_s ty 0) - (iconst_s ty -1)))) + (bmask ty (ne rty x y)))) (sextend_from_i8 ty (spaceship_s rty x y))) ;; Then once we have it normalized, we can apply some basic simplifications. @@ -206,6 +190,25 @@ (rule (simplify (sge _ (spaceship_u ty x y) (iconst_s _ 0))) (uge ty x y)) +;; Rust's `sort_by` uses `compare(a, b) == Less`, which the general icmp rules +;; can't simplify to a comparison against zero, so catch things like that too. +(rule (simplify (eq _ (spaceship_s ty x y) (iconst_s _ -1))) + (slt ty x y)) +(rule (simplify (eq _ (spaceship_u ty x y) (iconst_s _ -1))) + (ult ty x y)) +(rule (simplify (ne _ (spaceship_s ty x y) (iconst_s _ -1))) + (sge ty x y)) +(rule (simplify (ne _ (spaceship_u ty x y) (iconst_s _ -1))) + (uge ty x y)) +(rule (simplify (eq _ (spaceship_s ty x y) (iconst_s _ 1))) + (sgt ty x y)) +(rule (simplify (eq _ (spaceship_u ty x y) (iconst_s _ 1))) + (ugt ty x y)) +(rule (simplify (ne _ (spaceship_s ty x y) (iconst_s _ 1))) + (sle ty x y)) +(rule (simplify (ne _ (spaceship_u ty x y) (iconst_s _ 1))) + (ule ty x y)) + ;; extend from i8 to i8 is invalid CLIF, so this allows fixing that in the output ;; rather than needing to duplicate rules for the different width categories (decl sextend_from_i8 (Type Value) Value) diff --git a/cranelift/filetests/filetests/egraph/select.clif b/cranelift/filetests/filetests/egraph/select.clif index 062909a81f..8b916e90d4 100644 --- a/cranelift/filetests/filetests/egraph/select.clif +++ b/cranelift/filetests/filetests/egraph/select.clif @@ -183,5 +183,28 @@ block0(v0: i32, v1: i32): return v5 } ; check: v6 = icmp sge v0, v1 -; check: v7 = uextend.i64 v6 -; check: return v7 +; check: v8 = uextend.i64 v6 +; check: return v8 + +function %then_negone_else_zero(i32, i32) -> i64 { +block0(v0: i32, v1: i32): + v2 = icmp ule v0, v1 + v3 = iconst.i64 -1 + v4 = iconst.i64 0 + v5 = select v2, v3, v4 + return v5 +} +; check: v6 = bmask.i64 v2 +; check: return v6 + +function %then_zero_else_else_negone(i32, i32) -> i64 { +block0(v0: i32, v1: i32): + v2 = icmp sle v0, v1 + v3 = iconst.i64 0 + v4 = iconst.i64 -1 + v5 = select v2, v3, v4 + return v5 +} +; check: v6 = icmp sgt v0, v1 +; check: v8 = bmask.i64 v6 +; check: return v8 diff --git a/cranelift/filetests/filetests/egraph/spaceship.clif b/cranelift/filetests/filetests/egraph/spaceship.clif index 6484f8884c..7c1c8e2be0 100644 --- a/cranelift/filetests/filetests/egraph/spaceship.clif +++ b/cranelift/filetests/filetests/egraph/spaceship.clif @@ -15,10 +15,10 @@ block0(v0: i32, v1: i32): v7 = select v3, v5, v6 v8 = select v2, v4, v7 return v8 - ; check: v11 = icmp sgt v0, v1 - ; check: v12 = icmp slt v0, v1 - ; check: v13 = isub v11, v12 - ; check: return v13 + ; check: v13 = icmp sgt v0, v1 + ; check: v14 = icmp slt v0, v1 + ; check: v15 = isub v13, v14 + ; check: return v15 } function %cmp_s1b(i32, i32) -> i16 { @@ -32,10 +32,10 @@ block0(v0: i32, v1: i32): v8 = select v2, v4, v7 return v8 ; check: v9 = icmp sgt v0, v1 - ; check: v12 = icmp slt v0, v1 - ; check: v13 = isub v9, v12 - ; check: v14 = sextend.i16 v13 - ; check: return v14 + ; check: v14 = icmp slt v0, v1 + ; check: v15 = isub v9, v14 + ; check: v16 = sextend.i16 v15 + ; check: return v16 } function %cmp_s2a(i32, i32) -> i8 { @@ -147,10 +147,10 @@ block0(v0: i32, v1: i32): v7 = select v3, v5, v6 v8 = select v2, v4, v7 return v8 - ; check: v9 = icmp sgt v0, v1 - ; check: v10 = icmp slt v0, v1 - ; check: v11 = isub v9, v10 - ; check: return v11 + ; check: v13 = icmp sgt v0, v1 + ; check: v14 = icmp slt v0, v1 + ; check: v15 = isub v13, v14 + ; check: return v15 } function %cmp_s5b(i32, i32) -> i16 { @@ -163,11 +163,11 @@ block0(v0: i32, v1: i32): v7 = select v3, v5, v6 v8 = select v2, v4, v7 return v8 - ; check: v9 = icmp sgt v0, v1 - ; check: v10 = icmp slt v0, v1 - ; check: v11 = isub v9, v10 - ; check: v12 = sextend.i16 v11 - ; check: return v12 + ; check: v11 = icmp sgt v0, v1 + ; check: v12 = icmp slt v0, v1 + ; check: v13 = isub v11, v12 + ; check: v14 = sextend.i16 v13 + ; check: return v14 } function %cmp_s6a(i32, i32) -> i8 { @@ -180,10 +180,10 @@ block0(v0: i32, v1: i32): v7 = select v3, v5, v6 v8 = select v2, v4, v7 return v8 - ; check: v9 = icmp sgt v0, v1 - ; check: v10 = icmp slt v0, v1 - ; check: v11 = isub v9, v10 - ; check: return v11 + ; check: v16 = icmp sgt v0, v1 + ; check: v9 = icmp slt v0, v1 + ; check: v17 = isub v16, v9 + ; check: return v17 } function %cmp_s6b(i32, i32) -> i16 { @@ -196,11 +196,11 @@ block0(v0: i32, v1: i32): v7 = select v3, v5, v6 v8 = select v2, v4, v7 return v8 - ; check: v9 = icmp sgt v0, v1 - ; check: v10 = icmp slt v0, v1 - ; check: v11 = isub v9, v10 - ; check: v12 = sextend.i16 v11 - ; check: return v12 + ; check: v14 = icmp sgt v0, v1 + ; check: v15 = icmp slt v0, v1 + ; check: v16 = isub v14, v15 + ; check: v17 = sextend.i16 v16 + ; check: return v17 } ;; And again for unsigned... @@ -215,11 +215,11 @@ block0(v0: i32, v1: i32): v7 = select v3, v5, v6 v8 = select v2, v4, v7 return v8 - ; check: v12 = icmp ugt v0, v1 - ; check: v13 = icmp ult v0, v1 - ; check: v14 = isub v12, v13 - ; check: v15 = sextend.i16 v14 - ; check: return v15 + ; check: v14 = icmp ugt v0, v1 + ; check: v15 = icmp ult v0, v1 + ; check: v16 = isub v14, v15 + ; check: v17 = sextend.i16 v16 + ; check: return v17 } function %cmp_u1b(i32, i32) -> i8 { @@ -233,9 +233,9 @@ block0(v0: i32, v1: i32): v8 = select v2, v4, v7 return v8 ; check: v9 = icmp ugt v0, v1 - ; check: v11 = icmp ult v0, v1 - ; check: v12 = isub v9, v11 - ; check: return v12 + ; check: v13 = icmp ult v0, v1 + ; check: v14 = isub v9, v13 + ; check: return v14 } function %cmp_u2a(i32, i32) -> i16 { @@ -347,11 +347,11 @@ block0(v0: i32, v1: i32): v7 = select v3, v5, v6 v8 = select v2, v4, v7 return v8 - ; check: v9 = icmp ugt v0, v1 - ; check: v10 = icmp ult v0, v1 - ; check: v11 = isub v9, v10 - ; check: v12 = sextend.i16 v11 - ; check: return v12 + ; check: v11 = icmp ugt v0, v1 + ; check: v12 = icmp ult v0, v1 + ; check: v13 = isub v11, v12 + ; check: v14 = sextend.i16 v13 + ; check: return v14 } function %cmp_u5b(i32, i32) -> i8 { @@ -364,10 +364,10 @@ block0(v0: i32, v1: i32): v7 = select v3, v5, v6 v8 = select v2, v4, v7 return v8 - ; check: v9 = icmp ugt v0, v1 - ; check: v10 = icmp ult v0, v1 - ; check: v11 = isub v9, v10 - ; check: return v11 + ; check: v13 = icmp ugt v0, v1 + ; check: v14 = icmp ult v0, v1 + ; check: v15 = isub v13, v14 + ; check: return v15 } function %cmp_u6a(i32, i32) -> i16 { @@ -380,11 +380,11 @@ block0(v0: i32, v1: i32): v7 = select v3, v5, v6 v8 = select v2, v4, v7 return v8 - ; check: v9 = icmp ugt v0, v1 - ; check: v10 = icmp ult v0, v1 - ; check: v11 = isub v9, v10 - ; check: v12 = sextend.i16 v11 - ; check: return v12 + ; check: v14 = icmp ugt v0, v1 + ; check: v9 = icmp ult v0, v1 + ; check: v15 = isub v14, v9 + ; check: v16 = sextend.i16 v15 + ; check: return v16 } function %cmp_u6b(i32, i32) -> i8 { @@ -397,10 +397,10 @@ block0(v0: i32, v1: i32): v7 = select v3, v5, v6 v8 = select v2, v4, v7 return v8 - ; check: v9 = icmp ugt v0, v1 - ; check: v10 = icmp ult v0, v1 - ; check: v11 = isub v9, v10 - ; check: return v11 + ; check: v16 = icmp ugt v0, v1 + ; check: v17 = icmp ult v0, v1 + ; check: v18 = isub v16, v17 + ; check: return v18 } ;; Then a few of the simplifications @@ -478,3 +478,51 @@ block0(v0: i16, v1: i16): ; check: v10 = icmp ne v0, v1 ; check: return v10 } + +function %ult_via_cmp_eq_less(i8, i8) -> i8 { +block0(v0: i8, v1: i8): + v2 = icmp ugt v0, v1 + v3 = icmp ult v0, v1 + v4 = isub v2, v3 + v5 = iconst.i8 -1 + v6 = icmp eq v4, v5 + return v6 + ; check: v3 = icmp ult v0, v1 + ; check: return v3 +} + +function %uge_via_cmp_ne_less(i8, i8) -> i8 { +block0(v0: i8, v1: i8): + v2 = icmp ugt v0, v1 + v3 = icmp ult v0, v1 + v4 = isub v2, v3 + v5 = iconst.i8 -1 + v6 = icmp ne v4, v5 + return v6 + ; check: v7 = icmp uge v0, v1 + ; check: return v7 +} + +function %sgt_via_cmp_eq_greater(i8, i8) -> i8 { +block0(v0: i8, v1: i8): + v2 = icmp sgt v0, v1 + v3 = icmp slt v0, v1 + v4 = isub v2, v3 + v5 = iconst.i8 1 + v6 = icmp eq v4, v5 + return v6 + ; check: v2 = icmp sgt v0, v1 + ; check: return v2 +} + +function %sle_via_cmp_ne_less(i8, i8) -> i8 { +block0(v0: i8, v1: i8): + v2 = icmp sgt v0, v1 + v3 = icmp slt v0, v1 + v4 = isub v2, v3 + v5 = iconst.i8 1 + v6 = icmp ne v4, v5 + return v6 + ; check: v7 = icmp sle v0, v1 + ; check: return v7 +} diff --git a/cranelift/filetests/filetests/isa/x64/bmask.clif b/cranelift/filetests/filetests/isa/x64/bmask.clif index 3871aaec8c..4fc9047557 100644 --- a/cranelift/filetests/filetests/isa/x64/bmask.clif +++ b/cranelift/filetests/filetests/isa/x64/bmask.clif @@ -798,3 +798,66 @@ block0(v0: i8): ; popq %rbp ; retq +function %bmask_icmp_i32_i8(i32, i32) -> i8 { +block0(v0: i32, v1: i32): + v2 = icmp sgt v0, v1 + v3 = bmask.i8 v2 + return v3 +} + +; VCode: +; pushq %rbp +; movq %rsp, %rbp +; block0: +; cmpl %esi, %edi +; setnle %al +; movq %rax, %r8 +; negb %r8b, %r8b +; sbbl %eax, %eax, %eax +; movq %rbp, %rsp +; popq %rbp +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; pushq %rbp +; movq %rsp, %rbp +; block1: ; offset 0x4 +; cmpl %esi, %edi +; setg %al +; movq %rax, %r8 +; negb %r8b +; sbbl %eax, %eax +; movq %rbp, %rsp +; popq %rbp +; retq + +function %ineg_icmp_i32_i8(i32, i32) -> i8 { +block0(v0: i32, v1: i32): + v2 = icmp sgt v0, v1 + v3 = ineg v2 + return v3 +} + +; VCode: +; pushq %rbp +; movq %rsp, %rbp +; block0: +; cmpl %esi, %edi +; setnle %al +; negb %al, %al +; movq %rbp, %rsp +; popq %rbp +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; pushq %rbp +; movq %rsp, %rbp +; block1: ; offset 0x4 +; cmpl %esi, %edi +; setg %al +; negb %al +; movq %rbp, %rsp +; popq %rbp +; retq