Browse Source

s390x: Remove uses of copy_reg (#6253)

* Remove uses of `copy_reg` in s390x lowerings

* Update tests

* Add one copy back in for smulhi to avoid an inserted move
pull/6258/head
Trevor Elliott 2 years ago
committed by GitHub
parent
commit
e6339b2725
No known key found for this signature in database GPG Key ID: 4AEE18F83AFDEB23
  1. 24
      cranelift/codegen/src/isa/s390x/lower.isle
  2. 190
      cranelift/filetests/filetests/isa/s390x/arithmetic.clif
  3. 16
      cranelift/filetests/filetests/isa/s390x/vec-arithmetic.clif

24
cranelift/codegen/src/isa/s390x/lower.isle

@ -408,8 +408,8 @@
(y_hi Reg (vec_extract_lane $I64X2 y 0 (zero_reg)))
(y_lo Reg (vec_extract_lane $I64X2 y 1 (zero_reg)))
(lo_pair RegPair (umul_wide x_lo y_lo))
(res_lo Reg (copy_reg $I64 (regpair_lo lo_pair)))
(res_hi_1 Reg (copy_reg $I64 (regpair_hi lo_pair)))
(res_lo Reg (regpair_lo lo_pair))
(res_hi_1 Reg (regpair_hi lo_pair))
(res_hi_2 Reg (mul_reg $I64 x_lo y_hi))
(res_hi_3 Reg (mul_reg $I64 x_hi y_lo))
(res_hi Reg (add_reg $I64 res_hi_3 (add_reg $I64 res_hi_2 res_hi_1))))
@ -435,7 +435,7 @@
;; Multiply high part unsigned, 64-bit types. (Uses umul_wide.)
(rule (lower (has_type $I64 (umulhi x y)))
(let ((pair RegPair (umul_wide x y)))
(copy_reg $I64 (regpair_hi pair))))
(regpair_hi pair)))
;; Multiply high part unsigned, vector types with 8-, 16-, or 32-bit elements.
(rule (lower (has_type $I8X16 (umulhi x y))) (vec_umulhi $I8X16 x y))
@ -447,10 +447,10 @@
(rule (lower (has_type $I64X2 (umulhi x y)))
(let ((pair_0 RegPair (umul_wide (vec_extract_lane $I64X2 x 0 (zero_reg))
(vec_extract_lane $I64X2 y 0 (zero_reg))))
(res_0 Reg (copy_reg $I64 (regpair_hi pair_0)))
(res_0 Reg (regpair_hi pair_0))
(pair_1 RegPair (umul_wide (vec_extract_lane $I64X2 x 1 (zero_reg))
(vec_extract_lane $I64X2 y 1 (zero_reg))))
(res_1 Reg (copy_reg $I64 (regpair_hi pair_1))))
(res_1 Reg (regpair_hi pair_1)))
(mov_to_vec128 $I64X2 res_0 res_1)))
@ -473,7 +473,7 @@
;; Multiply high part signed, 64-bit types. (Uses smul_wide.)
(rule (lower (has_type $I64 (smulhi x y)))
(let ((pair RegPair (smul_wide x y)))
(copy_reg $I64 (regpair_hi pair))))
(regpair_hi pair)))
;; Multiply high part signed, vector types with 8-, 16-, or 32-bit elements.
(rule (lower (has_type $I8X16 (smulhi x y))) (vec_smulhi $I8X16 x y))
@ -488,7 +488,7 @@
(res_0 Reg (copy_reg $I64 (regpair_hi pair_0)))
(pair_1 RegPair (smul_wide (vec_extract_lane $I64X2 x 1 (zero_reg))
(vec_extract_lane $I64X2 y 1 (zero_reg))))
(res_1 Reg (copy_reg $I64 (regpair_hi pair_1))))
(res_1 Reg (regpair_hi pair_1)))
(mov_to_vec128 $I64X2 res_0 res_1)))
@ -547,7 +547,7 @@
;; Emit the actual divide instruction.
(pair RegPair (udivmod ext_ty ext_x ext_y)))
;; The quotient can be found in the low half of the result.
(copy_reg ty (regpair_lo pair))))
(regpair_lo pair)))
;; Implement `urem`. Same as `udiv`, but finds the remainder in
;; the high half of the result register pair instead.
@ -557,7 +557,7 @@
(ext_y Reg (put_in_reg_zext32 y))
(ext_ty Type (ty_ext32 ty))
(pair RegPair (udivmod ext_ty ext_x ext_y)))
(copy_reg ty (regpair_hi pair))))
(regpair_hi pair)))
;;;; Rules for `sdiv` and `srem` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
@ -591,7 +591,7 @@
;; Emit the actual divide instruction.
(pair RegPair (sdivmod ext_ty ext_x ext_y)))
;; The quotient can be found in the low half of the result.
(copy_reg ty (regpair_lo pair))))
(regpair_lo pair)))
;; Implement `srem`. Same as `sdiv`, but finds the remainder in
;; the high half of the result register pair instead. Also, handle
@ -603,7 +603,7 @@
(ext_ty Type (ty_ext32 ty))
(checked_x Reg (maybe_avoid_srem_overflow OFcheck ext_ty ext_x ext_y))
(pair RegPair (sdivmod ext_ty checked_x ext_y)))
(copy_reg ty (regpair_hi pair))))
(regpair_hi pair)))
;; Determine whether we need to perform an integer-overflow check.
;;
@ -1190,7 +1190,7 @@
(rule (clz_offset $I8 x) (add_simm16 $I8 x -56))
(rule (clz_offset $I16 x) (add_simm16 $I16 x -48))
(rule (clz_offset $I32 x) (add_simm16 $I32 x -32))
(rule (clz_offset $I64 x) (copy_reg $I64 x))
(rule (clz_offset $I64 x) x)
;; Count leading zeros, via FLOGR on an input zero-extended to 64 bits,
;; with the result compensated for the extra bits.

190
cranelift/filetests/filetests/isa/s390x/arithmetic.clif

@ -944,9 +944,9 @@ block0(v0: i128, v1: i128):
}
; VCode:
; stmg %r7, %r15, 56(%r15)
; stmg %r6, %r15, 48(%r15)
; block0:
; lgr %r10, %r2
; lgr %r6, %r2
; vl %v1, 0(%r3)
; vl %v3, 0(%r4)
; lgdr %r4, %f1
@ -955,22 +955,21 @@ block0(v0: i128, v1: i128):
; vlgvg %r9, %v3, 1
; lgr %r3, %r5
; mlgr %r2, %r9
; lgr %r8, %r2
; msgrkc %r2, %r5, %r7
; msgrkc %r14, %r5, %r7
; msgrkc %r5, %r4, %r9
; agrk %r4, %r2, %r8
; agrk %r4, %r14, %r2
; agr %r5, %r4
; vlvgp %v5, %r5, %r3
; lgr %r2, %r10
; vst %v5, 0(%r2)
; lmg %r7, %r15, 56(%r15)
; vlvgp %v1, %r5, %r3
; lgr %r2, %r6
; vst %v1, 0(%r2)
; lmg %r6, %r15, 48(%r15)
; br %r14
;
; Disassembled:
; block0: ; offset 0x0
; stmg %r7, %r15, 0x38(%r15)
; stmg %r6, %r15, 0x30(%r15)
; block1: ; offset 0x6
; lgr %r10, %r2
; lgr %r6, %r2
; vl %v1, 0(%r3)
; vl %v3, 0(%r4)
; lgdr %r4, %f1
@ -979,15 +978,14 @@ block0(v0: i128, v1: i128):
; vlgvg %r9, %v3, 1
; lgr %r3, %r5
; mlgr %r2, %r9
; lgr %r8, %r2
; msgrkc %r2, %r5, %r7
; msgrkc %r14, %r5, %r7
; msgrkc %r5, %r4, %r9
; agrk %r4, %r2, %r8
; agrk %r4, %r14, %r2
; agr %r5, %r4
; vlvgp %v5, %r5, %r3
; lgr %r2, %r10
; vst %v5, 0(%r2)
; lmg %r7, %r15, 0x38(%r15)
; vlvgp %v1, %r5, %r3
; lgr %r2, %r6
; vst %v1, 0(%r2)
; lmg %r6, %r15, 0x30(%r15)
; br %r14
function %imul_i64(i64, i64) -> i64 {
@ -1319,16 +1317,16 @@ block0(v0: i64, v1: i64):
; VCode:
; block0:
; lgr %r5, %r3
; lgr %r4, %r3
; lgr %r3, %r2
; mlgr %r2, %r5
; mlgr %r2, %r4
; br %r14
;
; Disassembled:
; block0: ; offset 0x0
; lgr %r5, %r3
; lgr %r4, %r3
; lgr %r3, %r2
; mlgr %r2, %r5
; mlgr %r2, %r4
; br %r14
function %umulhi_i32(i32, i32) -> i32 {
@ -1541,34 +1539,34 @@ block0(v0: i32, v1: i32):
}
; VCode:
; stmg %r7, %r15, 56(%r15)
; stmg %r6, %r15, 48(%r15)
; block0:
; lgr %r7, %r3
; lgr %r6, %r3
; lgfr %r3, %r2
; iilf %r4, 2147483647
; xrk %r5, %r4, %r3
; lgr %r4, %r7
; nr %r5, %r4
; cite %r5, -1
; dsgfr %r2, %r4
; lgr %r2, %r6
; nrk %r4, %r5, %r2
; cite %r4, -1
; dsgfr %r2, %r2
; lgr %r2, %r3
; lmg %r7, %r15, 56(%r15)
; lmg %r6, %r15, 48(%r15)
; br %r14
;
; Disassembled:
; block0: ; offset 0x0
; stmg %r7, %r15, 0x38(%r15)
; stmg %r6, %r15, 0x30(%r15)
; block1: ; offset 0x6
; lgr %r7, %r3
; lgr %r6, %r3
; lgfr %r3, %r2
; iilf %r4, 0x7fffffff
; xrk %r5, %r4, %r3
; lgr %r4, %r7
; nr %r5, %r4
; cite %r5, -1 ; trap: int_ovf
; dsgfr %r2, %r4 ; trap: int_divz
; lgr %r2, %r6
; nrk %r4, %r5, %r2
; cite %r4, -1 ; trap: int_ovf
; dsgfr %r2, %r2 ; trap: int_divz
; lgr %r2, %r3
; lmg %r7, %r15, 0x38(%r15)
; lmg %r6, %r15, 0x30(%r15)
; br %r14
function %sdiv_i32_imm(i32) -> i32 {
@ -1813,34 +1811,32 @@ block0(v0: i16, v1: i16):
}
; VCode:
; stmg %r8, %r15, 64(%r15)
; stmg %r7, %r15, 56(%r15)
; block0:
; lgr %r4, %r3
; lhi %r5, 0
; lgr %r8, %r5
; lgr %r7, %r5
; llhr %r3, %r2
; lgr %r5, %r4
; llhr %r5, %r5
; lgr %r2, %r8
; llhr %r5, %r4
; lgr %r2, %r7
; dlr %r2, %r5
; lgr %r2, %r3
; lmg %r8, %r15, 64(%r15)
; lmg %r7, %r15, 56(%r15)
; br %r14
;
; Disassembled:
; block0: ; offset 0x0
; stmg %r8, %r15, 0x40(%r15)
; stmg %r7, %r15, 0x38(%r15)
; block1: ; offset 0x6
; lgr %r4, %r3
; lhi %r5, 0
; lgr %r8, %r5
; lgr %r7, %r5
; llhr %r3, %r2
; lgr %r5, %r4
; llhr %r5, %r5
; lgr %r2, %r8
; llhr %r5, %r4
; lgr %r2, %r7
; dlr %r2, %r5 ; trap: int_divz
; lgr %r2, %r3
; lmg %r8, %r15, 0x40(%r15)
; lmg %r7, %r15, 0x38(%r15)
; br %r14
function %udiv_i16_imm(i16) -> i16 {
@ -1879,34 +1875,32 @@ block0(v0: i8, v1: i8):
}
; VCode:
; stmg %r8, %r15, 64(%r15)
; stmg %r7, %r15, 56(%r15)
; block0:
; lgr %r4, %r3
; lhi %r5, 0
; lgr %r8, %r5
; lgr %r7, %r5
; llcr %r3, %r2
; lgr %r5, %r4
; llcr %r5, %r5
; lgr %r2, %r8
; llcr %r5, %r4
; lgr %r2, %r7
; dlr %r2, %r5
; lgr %r2, %r3
; lmg %r8, %r15, 64(%r15)
; lmg %r7, %r15, 56(%r15)
; br %r14
;
; Disassembled:
; block0: ; offset 0x0
; stmg %r8, %r15, 0x40(%r15)
; stmg %r7, %r15, 0x38(%r15)
; block1: ; offset 0x6
; lgr %r4, %r3
; lhi %r5, 0
; lgr %r8, %r5
; lgr %r7, %r5
; llcr %r3, %r2
; lgr %r5, %r4
; llcr %r5, %r5
; lgr %r2, %r8
; llcr %r5, %r4
; lgr %r2, %r7
; dlr %r2, %r5 ; trap: int_divz
; lgr %r2, %r3
; lmg %r8, %r15, 0x40(%r15)
; lmg %r7, %r15, 0x38(%r15)
; br %r14
function %udiv_i8_imm(i8) -> i8 {
@ -1950,7 +1944,8 @@ block0(v0: i64, v1: i64):
; lgr %r4, %r3
; lgr %r3, %r2
; locghie %r3, 0
; dsgr %r2, %r4
; lgr %r2, %r4
; dsgr %r2, %r2
; br %r14
;
; Disassembled:
@ -1959,7 +1954,8 @@ block0(v0: i64, v1: i64):
; lgr %r4, %r3
; lgr %r3, %r2
; locghie %r3, 0
; dsgr %r2, %r4 ; trap: int_divz
; lgr %r2, %r4
; dsgr %r2, %r2 ; trap: int_divz
; br %r14
function %srem_i32(i32, i32) -> i32 {
@ -1972,16 +1968,14 @@ block0(v0: i32, v1: i32):
; block0:
; lgr %r5, %r3
; lgfr %r3, %r2
; lgr %r2, %r5
; dsgfr %r2, %r2
; dsgfr %r2, %r5
; br %r14
;
; Disassembled:
; block0: ; offset 0x0
; lgr %r5, %r3
; lgfr %r3, %r2
; lgr %r2, %r5
; dsgfr %r2, %r2 ; trap: int_divz
; dsgfr %r2, %r5 ; trap: int_divz
; br %r14
function %srem_i16(i16, i16) -> i16 {
@ -1992,17 +1986,19 @@ block0(v0: i16, v1: i16):
; VCode:
; block0:
; lgr %r4, %r3
; lgr %r5, %r3
; lghr %r3, %r2
; lhr %r4, %r4
; lgr %r2, %r5
; lhr %r4, %r2
; dsgfr %r2, %r4
; br %r14
;
; Disassembled:
; block0: ; offset 0x0
; lgr %r4, %r3
; lgr %r5, %r3
; lghr %r3, %r2
; lhr %r4, %r4
; lgr %r2, %r5
; lhr %r4, %r2
; dsgfr %r2, %r4 ; trap: int_divz
; br %r14
@ -2014,17 +2010,19 @@ block0(v0: i8, v1: i8):
; VCode:
; block0:
; lgr %r4, %r3
; lgr %r5, %r3
; lgbr %r3, %r2
; lbr %r4, %r4
; lgr %r2, %r5
; lbr %r4, %r2
; dsgfr %r2, %r4
; br %r14
;
; Disassembled:
; block0: ; offset 0x0
; lgr %r4, %r3
; lgr %r5, %r3
; lgbr %r3, %r2
; lbr %r4, %r4
; lgr %r2, %r5
; lbr %r4, %r2
; dsgfr %r2, %r4 ; trap: int_divz
; br %r14
@ -2079,32 +2077,30 @@ block0(v0: i16, v1: i16):
}
; VCode:
; stmg %r8, %r15, 64(%r15)
; stmg %r7, %r15, 56(%r15)
; block0:
; lgr %r4, %r3
; lhi %r5, 0
; lgr %r8, %r5
; lgr %r7, %r5
; llhr %r3, %r2
; lgr %r5, %r4
; llhr %r5, %r5
; lgr %r2, %r8
; llhr %r5, %r4
; lgr %r2, %r7
; dlr %r2, %r5
; lmg %r8, %r15, 64(%r15)
; lmg %r7, %r15, 56(%r15)
; br %r14
;
; Disassembled:
; block0: ; offset 0x0
; stmg %r8, %r15, 0x40(%r15)
; stmg %r7, %r15, 0x38(%r15)
; block1: ; offset 0x6
; lgr %r4, %r3
; lhi %r5, 0
; lgr %r8, %r5
; lgr %r7, %r5
; llhr %r3, %r2
; lgr %r5, %r4
; llhr %r5, %r5
; lgr %r2, %r8
; llhr %r5, %r4
; lgr %r2, %r7
; dlr %r2, %r5 ; trap: int_divz
; lmg %r8, %r15, 0x40(%r15)
; lmg %r7, %r15, 0x38(%r15)
; br %r14
function %urem_i8(i8, i8) -> i8 {
@ -2114,31 +2110,29 @@ block0(v0: i8, v1: i8):
}
; VCode:
; stmg %r8, %r15, 64(%r15)
; stmg %r7, %r15, 56(%r15)
; block0:
; lgr %r4, %r3
; lhi %r5, 0
; lgr %r8, %r5
; lgr %r7, %r5
; llcr %r3, %r2
; lgr %r5, %r4
; llcr %r5, %r5
; lgr %r2, %r8
; llcr %r5, %r4
; lgr %r2, %r7
; dlr %r2, %r5
; lmg %r8, %r15, 64(%r15)
; lmg %r7, %r15, 56(%r15)
; br %r14
;
; Disassembled:
; block0: ; offset 0x0
; stmg %r8, %r15, 0x40(%r15)
; stmg %r7, %r15, 0x38(%r15)
; block1: ; offset 0x6
; lgr %r4, %r3
; lhi %r5, 0
; lgr %r8, %r5
; lgr %r7, %r5
; llcr %r3, %r2
; lgr %r5, %r4
; llcr %r5, %r5
; lgr %r2, %r8
; llcr %r5, %r4
; lgr %r2, %r7
; dlr %r2, %r5 ; trap: int_divz
; lmg %r8, %r15, 0x40(%r15)
; lmg %r7, %r15, 0x38(%r15)
; br %r14

16
cranelift/filetests/filetests/isa/s390x/vec-arithmetic.clif

@ -1148,11 +1148,11 @@ block0(v0: i64x2, v1: i64x2):
; vlgvg %r3, %v24, 0
; vlgvg %r4, %v25, 0
; mlgr %r2, %r4
; lgr %r5, %r2
; lgr %r4, %r2
; vlgvg %r3, %v24, 1
; vlgvg %r4, %v25, 1
; mlgr %r2, %r4
; vlvgp %v24, %r5, %r2
; vlgvg %r2, %v25, 1
; mlgr %r2, %r2
; vlvgp %v24, %r4, %r2
; br %r14
;
; Disassembled:
@ -1160,11 +1160,11 @@ block0(v0: i64x2, v1: i64x2):
; vlgvg %r3, %v24, 0
; vlgvg %r4, %v25, 0
; mlgr %r2, %r4
; lgr %r5, %r2
; lgr %r4, %r2
; vlgvg %r3, %v24, 1
; vlgvg %r4, %v25, 1
; mlgr %r2, %r4
; vlvgp %v24, %r5, %r2
; vlgvg %r2, %v25, 1
; mlgr %r2, %r2
; vlvgp %v24, %r4, %r2
; br %r14
function %umulhi_i32x4(i32x4, i32x4) -> i32x4 {

Loading…
Cancel
Save