diff --git a/cranelift/codegen/src/isa/s390x/lower.isle b/cranelift/codegen/src/isa/s390x/lower.isle index 1db1e8de32..fb6170c797 100644 --- a/cranelift/codegen/src/isa/s390x/lower.isle +++ b/cranelift/codegen/src/isa/s390x/lower.isle @@ -408,8 +408,8 @@ (y_hi Reg (vec_extract_lane $I64X2 y 0 (zero_reg))) (y_lo Reg (vec_extract_lane $I64X2 y 1 (zero_reg))) (lo_pair RegPair (umul_wide x_lo y_lo)) - (res_lo Reg (copy_reg $I64 (regpair_lo lo_pair))) - (res_hi_1 Reg (copy_reg $I64 (regpair_hi lo_pair))) + (res_lo Reg (regpair_lo lo_pair)) + (res_hi_1 Reg (regpair_hi lo_pair)) (res_hi_2 Reg (mul_reg $I64 x_lo y_hi)) (res_hi_3 Reg (mul_reg $I64 x_hi y_lo)) (res_hi Reg (add_reg $I64 res_hi_3 (add_reg $I64 res_hi_2 res_hi_1)))) @@ -435,7 +435,7 @@ ;; Multiply high part unsigned, 64-bit types. (Uses umul_wide.) (rule (lower (has_type $I64 (umulhi x y))) (let ((pair RegPair (umul_wide x y))) - (copy_reg $I64 (regpair_hi pair)))) + (regpair_hi pair))) ;; Multiply high part unsigned, vector types with 8-, 16-, or 32-bit elements. (rule (lower (has_type $I8X16 (umulhi x y))) (vec_umulhi $I8X16 x y)) @@ -447,10 +447,10 @@ (rule (lower (has_type $I64X2 (umulhi x y))) (let ((pair_0 RegPair (umul_wide (vec_extract_lane $I64X2 x 0 (zero_reg)) (vec_extract_lane $I64X2 y 0 (zero_reg)))) - (res_0 Reg (copy_reg $I64 (regpair_hi pair_0))) + (res_0 Reg (regpair_hi pair_0)) (pair_1 RegPair (umul_wide (vec_extract_lane $I64X2 x 1 (zero_reg)) (vec_extract_lane $I64X2 y 1 (zero_reg)))) - (res_1 Reg (copy_reg $I64 (regpair_hi pair_1)))) + (res_1 Reg (regpair_hi pair_1))) (mov_to_vec128 $I64X2 res_0 res_1))) @@ -473,7 +473,7 @@ ;; Multiply high part signed, 64-bit types. (Uses smul_wide.) (rule (lower (has_type $I64 (smulhi x y))) (let ((pair RegPair (smul_wide x y))) - (copy_reg $I64 (regpair_hi pair)))) + (regpair_hi pair))) ;; Multiply high part signed, vector types with 8-, 16-, or 32-bit elements. (rule (lower (has_type $I8X16 (smulhi x y))) (vec_smulhi $I8X16 x y)) @@ -488,7 +488,7 @@ (res_0 Reg (copy_reg $I64 (regpair_hi pair_0))) (pair_1 RegPair (smul_wide (vec_extract_lane $I64X2 x 1 (zero_reg)) (vec_extract_lane $I64X2 y 1 (zero_reg)))) - (res_1 Reg (copy_reg $I64 (regpair_hi pair_1)))) + (res_1 Reg (regpair_hi pair_1))) (mov_to_vec128 $I64X2 res_0 res_1))) @@ -547,7 +547,7 @@ ;; Emit the actual divide instruction. (pair RegPair (udivmod ext_ty ext_x ext_y))) ;; The quotient can be found in the low half of the result. - (copy_reg ty (regpair_lo pair)))) + (regpair_lo pair))) ;; Implement `urem`. Same as `udiv`, but finds the remainder in ;; the high half of the result register pair instead. @@ -557,7 +557,7 @@ (ext_y Reg (put_in_reg_zext32 y)) (ext_ty Type (ty_ext32 ty)) (pair RegPair (udivmod ext_ty ext_x ext_y))) - (copy_reg ty (regpair_hi pair)))) + (regpair_hi pair))) ;;;; Rules for `sdiv` and `srem` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; @@ -591,7 +591,7 @@ ;; Emit the actual divide instruction. (pair RegPair (sdivmod ext_ty ext_x ext_y))) ;; The quotient can be found in the low half of the result. - (copy_reg ty (regpair_lo pair)))) + (regpair_lo pair))) ;; Implement `srem`. Same as `sdiv`, but finds the remainder in ;; the high half of the result register pair instead. Also, handle @@ -603,7 +603,7 @@ (ext_ty Type (ty_ext32 ty)) (checked_x Reg (maybe_avoid_srem_overflow OFcheck ext_ty ext_x ext_y)) (pair RegPair (sdivmod ext_ty checked_x ext_y))) - (copy_reg ty (regpair_hi pair)))) + (regpair_hi pair))) ;; Determine whether we need to perform an integer-overflow check. ;; @@ -1190,7 +1190,7 @@ (rule (clz_offset $I8 x) (add_simm16 $I8 x -56)) (rule (clz_offset $I16 x) (add_simm16 $I16 x -48)) (rule (clz_offset $I32 x) (add_simm16 $I32 x -32)) -(rule (clz_offset $I64 x) (copy_reg $I64 x)) +(rule (clz_offset $I64 x) x) ;; Count leading zeros, via FLOGR on an input zero-extended to 64 bits, ;; with the result compensated for the extra bits. diff --git a/cranelift/filetests/filetests/isa/s390x/arithmetic.clif b/cranelift/filetests/filetests/isa/s390x/arithmetic.clif index 8803998743..3befae30a3 100644 --- a/cranelift/filetests/filetests/isa/s390x/arithmetic.clif +++ b/cranelift/filetests/filetests/isa/s390x/arithmetic.clif @@ -944,9 +944,9 @@ block0(v0: i128, v1: i128): } ; VCode: -; stmg %r7, %r15, 56(%r15) +; stmg %r6, %r15, 48(%r15) ; block0: -; lgr %r10, %r2 +; lgr %r6, %r2 ; vl %v1, 0(%r3) ; vl %v3, 0(%r4) ; lgdr %r4, %f1 @@ -955,22 +955,21 @@ block0(v0: i128, v1: i128): ; vlgvg %r9, %v3, 1 ; lgr %r3, %r5 ; mlgr %r2, %r9 -; lgr %r8, %r2 -; msgrkc %r2, %r5, %r7 +; msgrkc %r14, %r5, %r7 ; msgrkc %r5, %r4, %r9 -; agrk %r4, %r2, %r8 +; agrk %r4, %r14, %r2 ; agr %r5, %r4 -; vlvgp %v5, %r5, %r3 -; lgr %r2, %r10 -; vst %v5, 0(%r2) -; lmg %r7, %r15, 56(%r15) +; vlvgp %v1, %r5, %r3 +; lgr %r2, %r6 +; vst %v1, 0(%r2) +; lmg %r6, %r15, 48(%r15) ; br %r14 ; ; Disassembled: ; block0: ; offset 0x0 -; stmg %r7, %r15, 0x38(%r15) +; stmg %r6, %r15, 0x30(%r15) ; block1: ; offset 0x6 -; lgr %r10, %r2 +; lgr %r6, %r2 ; vl %v1, 0(%r3) ; vl %v3, 0(%r4) ; lgdr %r4, %f1 @@ -979,15 +978,14 @@ block0(v0: i128, v1: i128): ; vlgvg %r9, %v3, 1 ; lgr %r3, %r5 ; mlgr %r2, %r9 -; lgr %r8, %r2 -; msgrkc %r2, %r5, %r7 +; msgrkc %r14, %r5, %r7 ; msgrkc %r5, %r4, %r9 -; agrk %r4, %r2, %r8 +; agrk %r4, %r14, %r2 ; agr %r5, %r4 -; vlvgp %v5, %r5, %r3 -; lgr %r2, %r10 -; vst %v5, 0(%r2) -; lmg %r7, %r15, 0x38(%r15) +; vlvgp %v1, %r5, %r3 +; lgr %r2, %r6 +; vst %v1, 0(%r2) +; lmg %r6, %r15, 0x30(%r15) ; br %r14 function %imul_i64(i64, i64) -> i64 { @@ -1319,16 +1317,16 @@ block0(v0: i64, v1: i64): ; VCode: ; block0: -; lgr %r5, %r3 +; lgr %r4, %r3 ; lgr %r3, %r2 -; mlgr %r2, %r5 +; mlgr %r2, %r4 ; br %r14 ; ; Disassembled: ; block0: ; offset 0x0 -; lgr %r5, %r3 +; lgr %r4, %r3 ; lgr %r3, %r2 -; mlgr %r2, %r5 +; mlgr %r2, %r4 ; br %r14 function %umulhi_i32(i32, i32) -> i32 { @@ -1541,34 +1539,34 @@ block0(v0: i32, v1: i32): } ; VCode: -; stmg %r7, %r15, 56(%r15) +; stmg %r6, %r15, 48(%r15) ; block0: -; lgr %r7, %r3 +; lgr %r6, %r3 ; lgfr %r3, %r2 ; iilf %r4, 2147483647 ; xrk %r5, %r4, %r3 -; lgr %r4, %r7 -; nr %r5, %r4 -; cite %r5, -1 -; dsgfr %r2, %r4 +; lgr %r2, %r6 +; nrk %r4, %r5, %r2 +; cite %r4, -1 +; dsgfr %r2, %r2 ; lgr %r2, %r3 -; lmg %r7, %r15, 56(%r15) +; lmg %r6, %r15, 48(%r15) ; br %r14 ; ; Disassembled: ; block0: ; offset 0x0 -; stmg %r7, %r15, 0x38(%r15) +; stmg %r6, %r15, 0x30(%r15) ; block1: ; offset 0x6 -; lgr %r7, %r3 +; lgr %r6, %r3 ; lgfr %r3, %r2 ; iilf %r4, 0x7fffffff ; xrk %r5, %r4, %r3 -; lgr %r4, %r7 -; nr %r5, %r4 -; cite %r5, -1 ; trap: int_ovf -; dsgfr %r2, %r4 ; trap: int_divz +; lgr %r2, %r6 +; nrk %r4, %r5, %r2 +; cite %r4, -1 ; trap: int_ovf +; dsgfr %r2, %r2 ; trap: int_divz ; lgr %r2, %r3 -; lmg %r7, %r15, 0x38(%r15) +; lmg %r6, %r15, 0x30(%r15) ; br %r14 function %sdiv_i32_imm(i32) -> i32 { @@ -1813,34 +1811,32 @@ block0(v0: i16, v1: i16): } ; VCode: -; stmg %r8, %r15, 64(%r15) +; stmg %r7, %r15, 56(%r15) ; block0: ; lgr %r4, %r3 ; lhi %r5, 0 -; lgr %r8, %r5 +; lgr %r7, %r5 ; llhr %r3, %r2 -; lgr %r5, %r4 -; llhr %r5, %r5 -; lgr %r2, %r8 +; llhr %r5, %r4 +; lgr %r2, %r7 ; dlr %r2, %r5 ; lgr %r2, %r3 -; lmg %r8, %r15, 64(%r15) +; lmg %r7, %r15, 56(%r15) ; br %r14 ; ; Disassembled: ; block0: ; offset 0x0 -; stmg %r8, %r15, 0x40(%r15) +; stmg %r7, %r15, 0x38(%r15) ; block1: ; offset 0x6 ; lgr %r4, %r3 ; lhi %r5, 0 -; lgr %r8, %r5 +; lgr %r7, %r5 ; llhr %r3, %r2 -; lgr %r5, %r4 -; llhr %r5, %r5 -; lgr %r2, %r8 +; llhr %r5, %r4 +; lgr %r2, %r7 ; dlr %r2, %r5 ; trap: int_divz ; lgr %r2, %r3 -; lmg %r8, %r15, 0x40(%r15) +; lmg %r7, %r15, 0x38(%r15) ; br %r14 function %udiv_i16_imm(i16) -> i16 { @@ -1879,34 +1875,32 @@ block0(v0: i8, v1: i8): } ; VCode: -; stmg %r8, %r15, 64(%r15) +; stmg %r7, %r15, 56(%r15) ; block0: ; lgr %r4, %r3 ; lhi %r5, 0 -; lgr %r8, %r5 +; lgr %r7, %r5 ; llcr %r3, %r2 -; lgr %r5, %r4 -; llcr %r5, %r5 -; lgr %r2, %r8 +; llcr %r5, %r4 +; lgr %r2, %r7 ; dlr %r2, %r5 ; lgr %r2, %r3 -; lmg %r8, %r15, 64(%r15) +; lmg %r7, %r15, 56(%r15) ; br %r14 ; ; Disassembled: ; block0: ; offset 0x0 -; stmg %r8, %r15, 0x40(%r15) +; stmg %r7, %r15, 0x38(%r15) ; block1: ; offset 0x6 ; lgr %r4, %r3 ; lhi %r5, 0 -; lgr %r8, %r5 +; lgr %r7, %r5 ; llcr %r3, %r2 -; lgr %r5, %r4 -; llcr %r5, %r5 -; lgr %r2, %r8 +; llcr %r5, %r4 +; lgr %r2, %r7 ; dlr %r2, %r5 ; trap: int_divz ; lgr %r2, %r3 -; lmg %r8, %r15, 0x40(%r15) +; lmg %r7, %r15, 0x38(%r15) ; br %r14 function %udiv_i8_imm(i8) -> i8 { @@ -1950,7 +1944,8 @@ block0(v0: i64, v1: i64): ; lgr %r4, %r3 ; lgr %r3, %r2 ; locghie %r3, 0 -; dsgr %r2, %r4 +; lgr %r2, %r4 +; dsgr %r2, %r2 ; br %r14 ; ; Disassembled: @@ -1959,7 +1954,8 @@ block0(v0: i64, v1: i64): ; lgr %r4, %r3 ; lgr %r3, %r2 ; locghie %r3, 0 -; dsgr %r2, %r4 ; trap: int_divz +; lgr %r2, %r4 +; dsgr %r2, %r2 ; trap: int_divz ; br %r14 function %srem_i32(i32, i32) -> i32 { @@ -1972,16 +1968,14 @@ block0(v0: i32, v1: i32): ; block0: ; lgr %r5, %r3 ; lgfr %r3, %r2 -; lgr %r2, %r5 -; dsgfr %r2, %r2 +; dsgfr %r2, %r5 ; br %r14 ; ; Disassembled: ; block0: ; offset 0x0 ; lgr %r5, %r3 ; lgfr %r3, %r2 -; lgr %r2, %r5 -; dsgfr %r2, %r2 ; trap: int_divz +; dsgfr %r2, %r5 ; trap: int_divz ; br %r14 function %srem_i16(i16, i16) -> i16 { @@ -1992,17 +1986,19 @@ block0(v0: i16, v1: i16): ; VCode: ; block0: -; lgr %r4, %r3 +; lgr %r5, %r3 ; lghr %r3, %r2 -; lhr %r4, %r4 +; lgr %r2, %r5 +; lhr %r4, %r2 ; dsgfr %r2, %r4 ; br %r14 ; ; Disassembled: ; block0: ; offset 0x0 -; lgr %r4, %r3 +; lgr %r5, %r3 ; lghr %r3, %r2 -; lhr %r4, %r4 +; lgr %r2, %r5 +; lhr %r4, %r2 ; dsgfr %r2, %r4 ; trap: int_divz ; br %r14 @@ -2014,17 +2010,19 @@ block0(v0: i8, v1: i8): ; VCode: ; block0: -; lgr %r4, %r3 +; lgr %r5, %r3 ; lgbr %r3, %r2 -; lbr %r4, %r4 +; lgr %r2, %r5 +; lbr %r4, %r2 ; dsgfr %r2, %r4 ; br %r14 ; ; Disassembled: ; block0: ; offset 0x0 -; lgr %r4, %r3 +; lgr %r5, %r3 ; lgbr %r3, %r2 -; lbr %r4, %r4 +; lgr %r2, %r5 +; lbr %r4, %r2 ; dsgfr %r2, %r4 ; trap: int_divz ; br %r14 @@ -2079,32 +2077,30 @@ block0(v0: i16, v1: i16): } ; VCode: -; stmg %r8, %r15, 64(%r15) +; stmg %r7, %r15, 56(%r15) ; block0: ; lgr %r4, %r3 ; lhi %r5, 0 -; lgr %r8, %r5 +; lgr %r7, %r5 ; llhr %r3, %r2 -; lgr %r5, %r4 -; llhr %r5, %r5 -; lgr %r2, %r8 +; llhr %r5, %r4 +; lgr %r2, %r7 ; dlr %r2, %r5 -; lmg %r8, %r15, 64(%r15) +; lmg %r7, %r15, 56(%r15) ; br %r14 ; ; Disassembled: ; block0: ; offset 0x0 -; stmg %r8, %r15, 0x40(%r15) +; stmg %r7, %r15, 0x38(%r15) ; block1: ; offset 0x6 ; lgr %r4, %r3 ; lhi %r5, 0 -; lgr %r8, %r5 +; lgr %r7, %r5 ; llhr %r3, %r2 -; lgr %r5, %r4 -; llhr %r5, %r5 -; lgr %r2, %r8 +; llhr %r5, %r4 +; lgr %r2, %r7 ; dlr %r2, %r5 ; trap: int_divz -; lmg %r8, %r15, 0x40(%r15) +; lmg %r7, %r15, 0x38(%r15) ; br %r14 function %urem_i8(i8, i8) -> i8 { @@ -2114,31 +2110,29 @@ block0(v0: i8, v1: i8): } ; VCode: -; stmg %r8, %r15, 64(%r15) +; stmg %r7, %r15, 56(%r15) ; block0: ; lgr %r4, %r3 ; lhi %r5, 0 -; lgr %r8, %r5 +; lgr %r7, %r5 ; llcr %r3, %r2 -; lgr %r5, %r4 -; llcr %r5, %r5 -; lgr %r2, %r8 +; llcr %r5, %r4 +; lgr %r2, %r7 ; dlr %r2, %r5 -; lmg %r8, %r15, 64(%r15) +; lmg %r7, %r15, 56(%r15) ; br %r14 ; ; Disassembled: ; block0: ; offset 0x0 -; stmg %r8, %r15, 0x40(%r15) +; stmg %r7, %r15, 0x38(%r15) ; block1: ; offset 0x6 ; lgr %r4, %r3 ; lhi %r5, 0 -; lgr %r8, %r5 +; lgr %r7, %r5 ; llcr %r3, %r2 -; lgr %r5, %r4 -; llcr %r5, %r5 -; lgr %r2, %r8 +; llcr %r5, %r4 +; lgr %r2, %r7 ; dlr %r2, %r5 ; trap: int_divz -; lmg %r8, %r15, 0x40(%r15) +; lmg %r7, %r15, 0x38(%r15) ; br %r14 diff --git a/cranelift/filetests/filetests/isa/s390x/vec-arithmetic.clif b/cranelift/filetests/filetests/isa/s390x/vec-arithmetic.clif index 960477cbbd..d1074b3d2b 100644 --- a/cranelift/filetests/filetests/isa/s390x/vec-arithmetic.clif +++ b/cranelift/filetests/filetests/isa/s390x/vec-arithmetic.clif @@ -1148,11 +1148,11 @@ block0(v0: i64x2, v1: i64x2): ; vlgvg %r3, %v24, 0 ; vlgvg %r4, %v25, 0 ; mlgr %r2, %r4 -; lgr %r5, %r2 +; lgr %r4, %r2 ; vlgvg %r3, %v24, 1 -; vlgvg %r4, %v25, 1 -; mlgr %r2, %r4 -; vlvgp %v24, %r5, %r2 +; vlgvg %r2, %v25, 1 +; mlgr %r2, %r2 +; vlvgp %v24, %r4, %r2 ; br %r14 ; ; Disassembled: @@ -1160,11 +1160,11 @@ block0(v0: i64x2, v1: i64x2): ; vlgvg %r3, %v24, 0 ; vlgvg %r4, %v25, 0 ; mlgr %r2, %r4 -; lgr %r5, %r2 +; lgr %r4, %r2 ; vlgvg %r3, %v24, 1 -; vlgvg %r4, %v25, 1 -; mlgr %r2, %r4 -; vlvgp %v24, %r5, %r2 +; vlgvg %r2, %v25, 1 +; mlgr %r2, %r2 +; vlvgp %v24, %r4, %r2 ; br %r14 function %umulhi_i32x4(i32x4, i32x4) -> i32x4 {