Browse Source
Add (bnot (bxor x y)) lowerings for s390x/aarch64 (#5763 )
* Add (bnot (bxor x y)) lowerings for s390x/aarch64
I originally thought that s390x's original lowering in #5709 , but as was
rightfully pointed out `(bnot (bxor x y))` is equivalent to
`(bxor x (bnot y))` so the special lowering for one should apply as a
special lowering for the other. For the s390x and aarch64 backend that
have already have a fused lowering of the bxor/bnot add a lowering
additionally for the bnot/bxor combination.
* Add bnot(bxor(..)) tests for s390x 128-bit sizes
pull/5769/head
Alex Crichton
2 years ago
committed by
GitHub
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with
85 additions and
0 deletions
cranelift/codegen/src/isa/aarch64/lower.isle
cranelift/codegen/src/isa/s390x/lower.isle
cranelift/filetests/filetests/isa/aarch64/bitops.clif
cranelift/filetests/filetests/isa/s390x/bitops-optimized.clif
cranelift/filetests/filetests/isa/s390x/bitwise.clif
@ -1052,6 +1052,11 @@
(rule -2 (lower (has_type (ty_vec128 ty) (bnot x)))
(not x (vector_size ty)))
;; Special-cases for fusing a bnot with bxor
(rule 2 (lower (has_type (fits_in_64 ty) (bnot (bxor x y))))
(alu_rs_imm_logic (ALUOp.EorNot) ty x y))
(rule 3 (lower (has_type $I128 (bnot (bxor x y)))) (i128_alu_bitop (ALUOp.EorNot) $I64 x y))
;;;; Rules for `band` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(rule -1 (lower (has_type (fits_in_64 ty) (band x y)))
@ -956,6 +956,15 @@
(rule (lower (has_type (vr128_ty ty) (bnot x)))
(vec_not ty x))
;; With z15 (bnot (bxor ...)) can be a single instruction, similar to the
;; (bxor _ (bnot _)) lowering.
(rule 3 (lower (has_type (and (mie2_enabled) (fits_in_64 ty)) (bnot (bxor x y))))
(not_xor_reg ty x y))
;; Combine a not/xor operation of vector types into one.
(rule 4 (lower (has_type (vr128_ty ty) (bnot (bxor x y))))
(vec_not_xor ty x y))
;;;; Rules for `band` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
@ -924,3 +924,26 @@ block0(v0: i128, v1: i128):
; csel x1, x15, x7, ne
; ret
function %bnot_of_bxor(i32, i32) -> i32 {
block0(v0: i32, v1: i32):
v2 = bxor v0, v1
v3 = bnot v2
return v3
}
; block0:
; eon w0, w0, w1
; ret
function %bnot_of_bxor(i128, i128) -> i128 {
block0(v0: i128, v1: i128):
v2 = bxor v0, v1
v3 = bnot v2
return v3
}
; block0:
; eon x0, x0, x2
; eon x1, x1, x3
; ret
@ -64,3 +64,14 @@ block0(v0: i32, v1: i32):
; block0:
; nxrk %r2, %r3, %r2
; br %r14
function %bnot_of_bxor(i32, i32) -> i32 {
block0(v0: i32, v1: i32):
v2 = bxor v0, v1
v3 = bnot v2
return v3
}
; block0:
; nxrk %r2, %r2, %r3
; br %r14
@ -632,3 +632,40 @@ block0(v0: i8, v1: i8, v2: i8):
; or %r2, %r3
; br %r14
function %bnot_of_bxor(i32, i32) -> i32 {
block0(v0: i32, v1: i32):
v2 = bxor v0, v1
v3 = bnot v2
return v3
}
; block0:
; xr %r2, %r3
; xilf %r2, 4294967295
; br %r14
function %bnot_of_bxor(i128, i128) -> i128 {
block0(v0: i128, v1: i128):
v2 = bxor v0, v1
v3 = bnot v2
return v3
}
; block0:
; vl %v1, 0(%r3)
; vl %v3, 0(%r4)
; vnx %v6, %v1, %v3
; vst %v6, 0(%r2)
; br %r14
function %bnot_of_bxor(i32x4, i32x4) -> i32x4 {
block0(v0: i32x4, v1: i32x4):
v2 = bxor v0, v1
v3 = bnot v2
return v3
}
; block0:
; vnx %v24, %v24, %v25
; br %r14