Browse Source

s390x: Enable more runtests, and fix a few bugs (#4516)

This enables more runtests to be executed on s390x.  Doing so
uncovered a two back-end bugs, which are fixed as well:

- The result of cls was always off by one.
- The result of popcnt.i16 has uninitialized high bits.

In addition, I found a bug in the load-op-store.clif test case:
     v3 = heap_addr.i64 heap0, v1, 4
     v4 = iconst.i64 42
     store.i32 v4, v3
This was clearly intended to perform a 32-bit store, but
actually performs a 64-bit store (it seems the type annotation
of the store opcode is ignored, and the type of the operand
is used instead).  That bug did not show any noticable symptoms
on little-endian architectures, but broke on big-endian.
pull/4524/head
Ulrich Weigand 2 years ago
committed by GitHub
parent
commit
dd40bf075a
No known key found for this signature in database GPG Key ID: 4AEE18F83AFDEB23
  1. 21
      cranelift/codegen/src/isa/s390x/lower.isle
  2. 14
      cranelift/filetests/filetests/isa/s390x/bitops.clif
  3. 1
      cranelift/filetests/filetests/runtests/bint.clif
  4. 1
      cranelift/filetests/filetests/runtests/cls.clif
  5. 1
      cranelift/filetests/filetests/runtests/clz.clif
  6. 1
      cranelift/filetests/filetests/runtests/conversions.clif
  7. 1
      cranelift/filetests/filetests/runtests/ctz.clif
  8. 3
      cranelift/filetests/filetests/runtests/iabs.clif
  9. 1
      cranelift/filetests/filetests/runtests/icmp-ugt.clif
  10. 1
      cranelift/filetests/filetests/runtests/ireduce.clif
  11. 3
      cranelift/filetests/filetests/runtests/load-op-store.clif
  12. 1
      cranelift/filetests/filetests/runtests/popcnt.clif
  13. 1
      cranelift/filetests/filetests/runtests/select.clif
  14. 3
      cranelift/filetests/filetests/runtests/simd-scalartovector-aarch64.clif
  15. 1
      cranelift/filetests/filetests/runtests/simd-scalartovector.clif
  16. 3
      cranelift/filetests/filetests/runtests/smulhi-aarch64.clif
  17. 1
      cranelift/filetests/filetests/runtests/smulhi.clif

21
cranelift/codegen/src/isa/s390x/lower.isle

@ -1149,17 +1149,26 @@
;;;; Rules for `cls` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; The result of cls is not supposed to count the sign bit itself, just
;; additional copies of it. Therefore, when computing cls in terms of clz,
;; we need to subtract one. Fold this into the offset computation.
(decl cls_offset (Type Reg) Reg)
(rule (cls_offset $I8 x) (add_simm16 $I8 x -57))
(rule (cls_offset $I16 x) (add_simm16 $I16 x -49))
(rule (cls_offset $I32 x) (add_simm16 $I32 x -33))
(rule (cls_offset $I64 x) (add_simm16 $I64 x -1))
;; Count leading sign-bit copies. We don't have any instruction for that,
;; so we instead count the leading zeros after inverting the input if negative,
;; i.e. computing
;; cls(x) == clz(x ^ (x >> 63))
;; cls(x) == clz(x ^ (x >> 63)) - 1
;; where x is the sign-extended input.
(rule (lower (has_type (fits_in_64 ty) (cls x)))
(let ((ext_reg Reg (put_in_reg_sext64 x))
(signbit_copies Reg (ashr_imm $I64 ext_reg 63))
(inv_reg Reg (xor_reg $I64 ext_reg signbit_copies))
(clz RegPair (clz_reg 64 inv_reg)))
(clz_offset ty (regpair_hi clz))))
(cls_offset ty (regpair_hi clz))))
;;;; Rules for `ctz` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
@ -1214,12 +1223,14 @@
;; of each input byte separately, so we need to accumulate those partial
;; results via a series of log2(type size in bytes) - 1 additions. We
;; accumulate in the high byte, so that a final right shift will zero out
;; any unrelated bits to give a clean result.
;; any unrelated bits to give a clean result. (This does not work with
;; $I16, where we instead accumulate in the low byte and clear high bits
;; via an explicit and operation.)
(rule (lower (has_type (and (mie2_disabled) $I16) (popcnt x)))
(let ((cnt2 Reg (popcnt_byte x))
(cnt1 Reg (add_reg $I32 cnt2 (lshl_imm $I32 cnt2 8))))
(lshr_imm $I32 cnt1 8)))
(cnt1 Reg (add_reg $I32 cnt2 (lshr_imm $I32 cnt2 8))))
(and_uimm16shifted $I32 cnt1 (uimm16shifted 255 0))))
(rule (lower (has_type (and (mie2_disabled) $I32) (popcnt x)))
(let ((cnt4 Reg (popcnt_byte x))

14
cranelift/filetests/filetests/isa/s390x/bitops.clif

@ -93,7 +93,7 @@ block0(v0: i64):
; srag %r5, %r2, 63
; xgrk %r3, %r2, %r5
; flogr %r0, %r3
; lgr %r2, %r0
; aghik %r2, %r0, -1
; br %r14
function %cls_i32(i32) -> i32 {
@ -107,7 +107,7 @@ block0(v0: i32):
; srag %r3, %r5, 63
; xgr %r5, %r3
; flogr %r0, %r5
; ahik %r2, %r0, -32
; ahik %r2, %r0, -33
; br %r14
function %cls_i16(i16) -> i16 {
@ -121,7 +121,7 @@ block0(v0: i16):
; srag %r3, %r5, 63
; xgr %r5, %r3
; flogr %r0, %r5
; ahik %r2, %r0, -48
; ahik %r2, %r0, -49
; br %r14
function %cls_i8(i8) -> i8 {
@ -135,7 +135,7 @@ block0(v0: i8):
; srag %r3, %r5, 63
; xgr %r5, %r3
; flogr %r0, %r5
; ahik %r2, %r0, -56
; ahik %r2, %r0, -57
; br %r14
function %ctz_i64(i64) -> i64 {
@ -238,9 +238,9 @@ block0(v0: i16):
; block0:
; popcnt %r5, %r2
; sllk %r3, %r5, 8
; ar %r5, %r3
; srlk %r2, %r5, 8
; srlk %r3, %r5, 8
; ark %r2, %r5, %r3
; nill %r2, 255
; br %r14
function %popcnt_i8(i8) -> i8 {

1
cranelift/filetests/filetests/runtests/bint.clif

@ -1,6 +1,7 @@
test interpret
test run
target aarch64
target s390x
target x86_64
function %bint_b1_i8_true() -> i8 {

1
cranelift/filetests/filetests/runtests/cls.clif

@ -1,6 +1,7 @@
test interpret
test run
target aarch64
target s390x
; not implemented on `x86_64`
function %cls_i8(i8) -> i8 {

1
cranelift/filetests/filetests/runtests/clz.clif

@ -1,6 +1,7 @@
test interpret
test run
target aarch64
target s390x
target x86_64
target x86_64 has_lzcnt

1
cranelift/filetests/filetests/runtests/conversions.clif

@ -1,6 +1,7 @@
test run
target x86_64
target s390x
target aarch64
function %fpromote_f32_f64(i64 vmctx, i64, f32) -> f64 {

1
cranelift/filetests/filetests/runtests/ctz.clif

@ -1,6 +1,7 @@
test interpret
test run
target aarch64
target s390x
target x86_64
target x86_64 has_bmi1

3
cranelift/filetests/filetests/runtests/iabs.clif

@ -1,6 +1,7 @@
test interpret
test run
target aarch64
target s390x
; x86_64 only supports vector iabs
function %iabs_i8(i8) -> i8 {
@ -41,4 +42,4 @@ block0(v0: i64):
; run: %iabs_i64(0) == 0
; run: %iabs_i64(9223372036854775807) == 9223372036854775807
; run: %iabs_i64(-9223372036854775807) == 9223372036854775807
; run: %iabs_i64(-9223372036854775808) == -9223372036854775808
; run: %iabs_i64(-9223372036854775808) == -9223372036854775808

1
cranelift/filetests/filetests/runtests/icmp-ugt.clif

@ -1,6 +1,7 @@
test interpret
test run
target aarch64
target s390x
target x86_64
function %icmp_ugt_i8(i8, i8) -> b1 {

1
cranelift/filetests/filetests/runtests/ireduce.clif

@ -1,6 +1,7 @@
test interpret
test run
target aarch64
target s390x
target x86_64
function %ireduce_i16_i8(i16) -> i8 {

3
cranelift/filetests/filetests/runtests/load-op-store.clif

@ -1,5 +1,6 @@
test run
target x86_64
target s390x
target aarch64
function %load_op_store_iadd_i64(i64 vmctx, i64, i64) -> i64 {
@ -28,7 +29,7 @@ function %load_op_store_iadd_i32(i64 vmctx, i64, i32) -> i32 {
block0(v0: i64, v1: i64, v2: i32):
v3 = heap_addr.i64 heap0, v1, 4
v4 = iconst.i64 42
v4 = iconst.i32 42
store.i32 v4, v3
v5 = load.i32 v3
v6 = iadd.i32 v5, v2

1
cranelift/filetests/filetests/runtests/popcnt.clif

@ -1,6 +1,7 @@
test interpret
test run
target aarch64
target s390x
target x86_64
target x86_64 has_popcnt

1
cranelift/filetests/filetests/runtests/select.clif

@ -1,5 +1,6 @@
test interpret
test run
target s390x
target x86_64
function %select_eq_f32(f32, f32) -> i32 {

3
cranelift/filetests/filetests/runtests/simd-scalartovector-aarch64.clif

@ -1,5 +1,6 @@
test run
target aarch64
target s390x
; i8 and i16 are invalid source sizes for x86_64
function %scalartovector_i8(i8) -> i8x16 {
@ -16,4 +17,4 @@ block0(v0: i16):
return v1
}
; run: %scalartovector_i16(1) == [1 0 0 0 0 0 0 0]
; run: %scalartovector_i16(65535) == [65535 0 0 0 0 0 0 0]
; run: %scalartovector_i16(65535) == [65535 0 0 0 0 0 0 0]

1
cranelift/filetests/filetests/runtests/simd-scalartovector.clif

@ -1,5 +1,6 @@
test run
target aarch64
target s390x
set enable_simd
target x86_64 has_sse3 has_ssse3 has_sse41

3
cranelift/filetests/filetests/runtests/smulhi-aarch64.clif

@ -1,6 +1,7 @@
test interpret
test run
target aarch64
target s390x
; x86_64 backend only supports `i16`, `i32`, and `i64` types.
function %smulhi_i8(i8, i8) -> i8 {
@ -10,4 +11,4 @@ block0(v0: i8, v1: i8):
}
; run: %smulhi_i8(-2, -4) == 0
; run: %smulhi_i8(2, -4) == -1
; run: %smulhi_i8(127, 127) == 63
; run: %smulhi_i8(127, 127) == 63

1
cranelift/filetests/filetests/runtests/smulhi.clif

@ -1,6 +1,7 @@
test interpret
test run
target aarch64
target s390x
set enable_simd
target x86_64 has_sse3 has_ssse3 has_sse41

Loading…
Cancel
Save