Browse Source

More spaceship patterns (#7702)

pull/7708/head
scottmcm 11 months ago
committed by GitHub
parent
commit
f8c9f6711f
No known key found for this signature in database GPG Key ID: 4AEE18F83AFDEB23
  1. 5
      cranelift/codegen/src/opts/bitops.isle
  2. 17
      cranelift/codegen/src/opts/selects.isle
  3. 59
      cranelift/codegen/src/opts/spaceship.isle
  4. 27
      cranelift/filetests/filetests/egraph/select.clif
  5. 152
      cranelift/filetests/filetests/egraph/spaceship.clif
  6. 63
      cranelift/filetests/filetests/isa/x64/bmask.clif

5
cranelift/codegen/src/opts/bitops.isle

@ -90,6 +90,11 @@
(if-let $true (u64_eq shift_amt (ty_shift_mask ty)))
(bmask ty x))
;; Since icmp is always 0 or 1, bmask is just a negation.
;; TODO: Explore whether this makes sense for things needing extension too.
(rule (simplify (bmask $I8 cmp@(icmp $I8 _ _ _)))
(ineg $I8 cmp))
;; Matches any expressions that preserve "truthiness".
;; i.e. If the input is zero it remains zero, and if it is nonzero it can have
;; a different value as long as it is still nonzero.

17
cranelift/codegen/src/opts/selects.isle

@ -4,16 +4,23 @@
(rule (simplify (select ty _ x x)) x)
(rule (simplify (bitselect ty _ x x)) x)
;; Push zeroes to the right -- this makes the select `truthy`, as used elsewhere
;; if icmp { 0 } else { nonzero } => if !icmp { nonzero } else { 0 }
(rule (simplify (select sty (icmp cty cc x y)
zero@(iconst_u _ 0)
nonzero@(iconst_u _ (u64_nonzero _))))
(select sty (icmp cty (intcc_complement cc) x y) nonzero zero))
;; if icmp(x, y) { 1 } else { 0 } => uextend(icmp(x, y))
(rule (simplify (select ty cmp@(icmp _ cc x y)
(iconst_u _ 1)
(iconst_u _ 0)))
(uextend_from_i8 ty cmp))
;; if icmp(x, y) { 0 } else { 1 } => uextend(!icmp(x, y))
(rule (simplify (select sty (icmp cty cc x y)
(iconst_u _ 0)
(iconst_u _ 1)))
(uextend_from_i8 sty (icmp cty (intcc_complement cc) x y)))
;; if icmp(x, y) { -1 } else { 0 } => uextend(icmp(x, y))
(rule (simplify (select ty cmp@(icmp _ cc x y)
(iconst_s _ -1)
(iconst_s _ 0)))
(bmask ty cmp))
;; Transform select-of-icmp into {u,s}{min,max} instructions where possible.
(rule (simplify (select ty (sgt _ x y) x y)) (smax ty x y))

59
cranelift/codegen/src/opts/spaceship.isle

@ -59,32 +59,24 @@
(sextend_from_i8 ty (spaceship_u rty x y)))
;; x > y ? 1 : x < y ? -1 : 0
;; x > y ? 1 : x >= y ? 0 : -1
(rule (simplify (select ty (ugt rty x y)
(iconst_s ty 1)
(select ty (ult rty x y)
(iconst_s ty -1)
(iconst_s ty 0))))
(ineg rty (ult rty x y))))
(sextend_from_i8 ty (spaceship_u rty x y)))
;; x > y ? 1 : x != y ? -1 : 0
(rule (simplify (select ty (ugt rty x y)
(iconst_s ty 1)
(select ty (ne rty x y)
(iconst_s ty -1)
(iconst_s ty 0))))
(bmask ty (ult rty x y))))
(sextend_from_i8 ty (spaceship_u rty x y)))
;; x > y ? 1 : x != y ? -1 : 0
;; x > y ? 1 : x == y ? 0 : -1
(rule (simplify (select ty (ugt rty x y)
(iconst_s ty 1)
(select ty (eq rty x y)
(iconst_s ty 0)
(iconst_s ty -1))))
(ineg rty (ne rty x y))))
(sextend_from_i8 ty (spaceship_u rty x y)))
;; x > y ? 1 : x >= y ? 0 : -1
(rule (simplify (select ty (ugt rty x y)
(iconst_s ty 1)
(select ty (uge rty x y)
(iconst_s ty 0)
(iconst_s ty -1))))
(bmask ty (ne rty x y))))
(sextend_from_i8 ty (spaceship_u rty x y)))
;; Same, but for signed comparisons this time
@ -140,32 +132,24 @@
(sextend_from_i8 ty (spaceship_s rty x y)))
;; x > y ? 1 : x < y ? -1 : 0
;; x > y ? 1 : x >= y ? 0 : -1
(rule (simplify (select ty (sgt rty x y)
(iconst_s ty 1)
(select ty (slt rty x y)
(iconst_s ty -1)
(iconst_s ty 0))))
(ineg rty (slt rty x y))))
(sextend_from_i8 ty (spaceship_s rty x y)))
;; x > y ? 1 : x != y ? -1 : 0
(rule (simplify (select ty (sgt rty x y)
(iconst_s ty 1)
(select ty (ne rty x y)
(iconst_s ty -1)
(iconst_s ty 0))))
(bmask ty (slt rty x y))))
(sextend_from_i8 ty (spaceship_s rty x y)))
;; x > y ? 1 : x != y ? -1 : 0
;; x > y ? 1 : x == y ? 0 : -1
(rule (simplify (select ty (sgt rty x y)
(iconst_s ty 1)
(select ty (eq rty x y)
(iconst_s ty 0)
(iconst_s ty -1))))
(ineg rty (ne rty x y))))
(sextend_from_i8 ty (spaceship_s rty x y)))
;; x > y ? 1 : x >= y ? 0 : -1
(rule (simplify (select ty (sgt rty x y)
(iconst_s ty 1)
(select ty (sge rty x y)
(iconst_s ty 0)
(iconst_s ty -1))))
(bmask ty (ne rty x y))))
(sextend_from_i8 ty (spaceship_s rty x y)))
;; Then once we have it normalized, we can apply some basic simplifications.
@ -206,6 +190,25 @@
(rule (simplify (sge _ (spaceship_u ty x y) (iconst_s _ 0)))
(uge ty x y))
;; Rust's `sort_by` uses `compare(a, b) == Less`, which the general icmp rules
;; can't simplify to a comparison against zero, so catch things like that too.
(rule (simplify (eq _ (spaceship_s ty x y) (iconst_s _ -1)))
(slt ty x y))
(rule (simplify (eq _ (spaceship_u ty x y) (iconst_s _ -1)))
(ult ty x y))
(rule (simplify (ne _ (spaceship_s ty x y) (iconst_s _ -1)))
(sge ty x y))
(rule (simplify (ne _ (spaceship_u ty x y) (iconst_s _ -1)))
(uge ty x y))
(rule (simplify (eq _ (spaceship_s ty x y) (iconst_s _ 1)))
(sgt ty x y))
(rule (simplify (eq _ (spaceship_u ty x y) (iconst_s _ 1)))
(ugt ty x y))
(rule (simplify (ne _ (spaceship_s ty x y) (iconst_s _ 1)))
(sle ty x y))
(rule (simplify (ne _ (spaceship_u ty x y) (iconst_s _ 1)))
(ule ty x y))
;; extend from i8 to i8 is invalid CLIF, so this allows fixing that in the output
;; rather than needing to duplicate rules for the different width categories
(decl sextend_from_i8 (Type Value) Value)

27
cranelift/filetests/filetests/egraph/select.clif

@ -183,5 +183,28 @@ block0(v0: i32, v1: i32):
return v5
}
; check: v6 = icmp sge v0, v1
; check: v7 = uextend.i64 v6
; check: return v7
; check: v8 = uextend.i64 v6
; check: return v8
function %then_negone_else_zero(i32, i32) -> i64 {
block0(v0: i32, v1: i32):
v2 = icmp ule v0, v1
v3 = iconst.i64 -1
v4 = iconst.i64 0
v5 = select v2, v3, v4
return v5
}
; check: v6 = bmask.i64 v2
; check: return v6
function %then_zero_else_else_negone(i32, i32) -> i64 {
block0(v0: i32, v1: i32):
v2 = icmp sle v0, v1
v3 = iconst.i64 0
v4 = iconst.i64 -1
v5 = select v2, v3, v4
return v5
}
; check: v6 = icmp sgt v0, v1
; check: v8 = bmask.i64 v6
; check: return v8

152
cranelift/filetests/filetests/egraph/spaceship.clif

@ -15,10 +15,10 @@ block0(v0: i32, v1: i32):
v7 = select v3, v5, v6
v8 = select v2, v4, v7
return v8
; check: v11 = icmp sgt v0, v1
; check: v12 = icmp slt v0, v1
; check: v13 = isub v11, v12
; check: return v13
; check: v13 = icmp sgt v0, v1
; check: v14 = icmp slt v0, v1
; check: v15 = isub v13, v14
; check: return v15
}
function %cmp_s1b(i32, i32) -> i16 {
@ -32,10 +32,10 @@ block0(v0: i32, v1: i32):
v8 = select v2, v4, v7
return v8
; check: v9 = icmp sgt v0, v1
; check: v12 = icmp slt v0, v1
; check: v13 = isub v9, v12
; check: v14 = sextend.i16 v13
; check: return v14
; check: v14 = icmp slt v0, v1
; check: v15 = isub v9, v14
; check: v16 = sextend.i16 v15
; check: return v16
}
function %cmp_s2a(i32, i32) -> i8 {
@ -147,10 +147,10 @@ block0(v0: i32, v1: i32):
v7 = select v3, v5, v6
v8 = select v2, v4, v7
return v8
; check: v9 = icmp sgt v0, v1
; check: v10 = icmp slt v0, v1
; check: v11 = isub v9, v10
; check: return v11
; check: v13 = icmp sgt v0, v1
; check: v14 = icmp slt v0, v1
; check: v15 = isub v13, v14
; check: return v15
}
function %cmp_s5b(i32, i32) -> i16 {
@ -163,11 +163,11 @@ block0(v0: i32, v1: i32):
v7 = select v3, v5, v6
v8 = select v2, v4, v7
return v8
; check: v9 = icmp sgt v0, v1
; check: v10 = icmp slt v0, v1
; check: v11 = isub v9, v10
; check: v12 = sextend.i16 v11
; check: return v12
; check: v11 = icmp sgt v0, v1
; check: v12 = icmp slt v0, v1
; check: v13 = isub v11, v12
; check: v14 = sextend.i16 v13
; check: return v14
}
function %cmp_s6a(i32, i32) -> i8 {
@ -180,10 +180,10 @@ block0(v0: i32, v1: i32):
v7 = select v3, v5, v6
v8 = select v2, v4, v7
return v8
; check: v9 = icmp sgt v0, v1
; check: v10 = icmp slt v0, v1
; check: v11 = isub v9, v10
; check: return v11
; check: v16 = icmp sgt v0, v1
; check: v9 = icmp slt v0, v1
; check: v17 = isub v16, v9
; check: return v17
}
function %cmp_s6b(i32, i32) -> i16 {
@ -196,11 +196,11 @@ block0(v0: i32, v1: i32):
v7 = select v3, v5, v6
v8 = select v2, v4, v7
return v8
; check: v9 = icmp sgt v0, v1
; check: v10 = icmp slt v0, v1
; check: v11 = isub v9, v10
; check: v12 = sextend.i16 v11
; check: return v12
; check: v14 = icmp sgt v0, v1
; check: v15 = icmp slt v0, v1
; check: v16 = isub v14, v15
; check: v17 = sextend.i16 v16
; check: return v17
}
;; And again for unsigned...
@ -215,11 +215,11 @@ block0(v0: i32, v1: i32):
v7 = select v3, v5, v6
v8 = select v2, v4, v7
return v8
; check: v12 = icmp ugt v0, v1
; check: v13 = icmp ult v0, v1
; check: v14 = isub v12, v13
; check: v15 = sextend.i16 v14
; check: return v15
; check: v14 = icmp ugt v0, v1
; check: v15 = icmp ult v0, v1
; check: v16 = isub v14, v15
; check: v17 = sextend.i16 v16
; check: return v17
}
function %cmp_u1b(i32, i32) -> i8 {
@ -233,9 +233,9 @@ block0(v0: i32, v1: i32):
v8 = select v2, v4, v7
return v8
; check: v9 = icmp ugt v0, v1
; check: v11 = icmp ult v0, v1
; check: v12 = isub v9, v11
; check: return v12
; check: v13 = icmp ult v0, v1
; check: v14 = isub v9, v13
; check: return v14
}
function %cmp_u2a(i32, i32) -> i16 {
@ -347,11 +347,11 @@ block0(v0: i32, v1: i32):
v7 = select v3, v5, v6
v8 = select v2, v4, v7
return v8
; check: v9 = icmp ugt v0, v1
; check: v10 = icmp ult v0, v1
; check: v11 = isub v9, v10
; check: v12 = sextend.i16 v11
; check: return v12
; check: v11 = icmp ugt v0, v1
; check: v12 = icmp ult v0, v1
; check: v13 = isub v11, v12
; check: v14 = sextend.i16 v13
; check: return v14
}
function %cmp_u5b(i32, i32) -> i8 {
@ -364,10 +364,10 @@ block0(v0: i32, v1: i32):
v7 = select v3, v5, v6
v8 = select v2, v4, v7
return v8
; check: v9 = icmp ugt v0, v1
; check: v10 = icmp ult v0, v1
; check: v11 = isub v9, v10
; check: return v11
; check: v13 = icmp ugt v0, v1
; check: v14 = icmp ult v0, v1
; check: v15 = isub v13, v14
; check: return v15
}
function %cmp_u6a(i32, i32) -> i16 {
@ -380,11 +380,11 @@ block0(v0: i32, v1: i32):
v7 = select v3, v5, v6
v8 = select v2, v4, v7
return v8
; check: v9 = icmp ugt v0, v1
; check: v10 = icmp ult v0, v1
; check: v11 = isub v9, v10
; check: v12 = sextend.i16 v11
; check: return v12
; check: v14 = icmp ugt v0, v1
; check: v9 = icmp ult v0, v1
; check: v15 = isub v14, v9
; check: v16 = sextend.i16 v15
; check: return v16
}
function %cmp_u6b(i32, i32) -> i8 {
@ -397,10 +397,10 @@ block0(v0: i32, v1: i32):
v7 = select v3, v5, v6
v8 = select v2, v4, v7
return v8
; check: v9 = icmp ugt v0, v1
; check: v10 = icmp ult v0, v1
; check: v11 = isub v9, v10
; check: return v11
; check: v16 = icmp ugt v0, v1
; check: v17 = icmp ult v0, v1
; check: v18 = isub v16, v17
; check: return v18
}
;; Then a few of the simplifications
@ -478,3 +478,51 @@ block0(v0: i16, v1: i16):
; check: v10 = icmp ne v0, v1
; check: return v10
}
function %ult_via_cmp_eq_less(i8, i8) -> i8 {
block0(v0: i8, v1: i8):
v2 = icmp ugt v0, v1
v3 = icmp ult v0, v1
v4 = isub v2, v3
v5 = iconst.i8 -1
v6 = icmp eq v4, v5
return v6
; check: v3 = icmp ult v0, v1
; check: return v3
}
function %uge_via_cmp_ne_less(i8, i8) -> i8 {
block0(v0: i8, v1: i8):
v2 = icmp ugt v0, v1
v3 = icmp ult v0, v1
v4 = isub v2, v3
v5 = iconst.i8 -1
v6 = icmp ne v4, v5
return v6
; check: v7 = icmp uge v0, v1
; check: return v7
}
function %sgt_via_cmp_eq_greater(i8, i8) -> i8 {
block0(v0: i8, v1: i8):
v2 = icmp sgt v0, v1
v3 = icmp slt v0, v1
v4 = isub v2, v3
v5 = iconst.i8 1
v6 = icmp eq v4, v5
return v6
; check: v2 = icmp sgt v0, v1
; check: return v2
}
function %sle_via_cmp_ne_less(i8, i8) -> i8 {
block0(v0: i8, v1: i8):
v2 = icmp sgt v0, v1
v3 = icmp slt v0, v1
v4 = isub v2, v3
v5 = iconst.i8 1
v6 = icmp ne v4, v5
return v6
; check: v7 = icmp sle v0, v1
; check: return v7
}

63
cranelift/filetests/filetests/isa/x64/bmask.clif

@ -798,3 +798,66 @@ block0(v0: i8):
; popq %rbp
; retq
function %bmask_icmp_i32_i8(i32, i32) -> i8 {
block0(v0: i32, v1: i32):
v2 = icmp sgt v0, v1
v3 = bmask.i8 v2
return v3
}
; VCode:
; pushq %rbp
; movq %rsp, %rbp
; block0:
; cmpl %esi, %edi
; setnle %al
; movq %rax, %r8
; negb %r8b, %r8b
; sbbl %eax, %eax, %eax
; movq %rbp, %rsp
; popq %rbp
; ret
;
; Disassembled:
; block0: ; offset 0x0
; pushq %rbp
; movq %rsp, %rbp
; block1: ; offset 0x4
; cmpl %esi, %edi
; setg %al
; movq %rax, %r8
; negb %r8b
; sbbl %eax, %eax
; movq %rbp, %rsp
; popq %rbp
; retq
function %ineg_icmp_i32_i8(i32, i32) -> i8 {
block0(v0: i32, v1: i32):
v2 = icmp sgt v0, v1
v3 = ineg v2
return v3
}
; VCode:
; pushq %rbp
; movq %rsp, %rbp
; block0:
; cmpl %esi, %edi
; setnle %al
; negb %al, %al
; movq %rbp, %rsp
; popq %rbp
; ret
;
; Disassembled:
; block0: ; offset 0x0
; pushq %rbp
; movq %rsp, %rbp
; block1: ; offset 0x4
; cmpl %esi, %edi
; setg %al
; negb %al
; movq %rbp, %rsp
; popq %rbp
; retq

Loading…
Cancel
Save