Browse Source

cranelift: Fix `cls` for small types on aarch64 (#4305)

The previous `cls` code was producing wrong results when fed with a -1 i8.

The fix here is to sign extend instead of zero extending since we want
to keep the sign bit as one in order for it to be counted correctly
in the cls instruction

This also merges the interpreter only tests now that aarch64
correctly supports this instruction
pull/4341/head
Afonso Bordado 2 years ago
committed by GitHub
parent
commit
42d4f97b78
No known key found for this signature in database GPG Key ID: 4AEE18F83AFDEB23
  1. 4
      cranelift/codegen/src/isa/aarch64/lower.isle
  2. 5
      cranelift/filetests/filetests/isa/aarch64/bitops.clif
  3. 23
      cranelift/filetests/filetests/runtests/cls-interpret.clif
  4. 20
      cranelift/filetests/filetests/runtests/cls.clif

4
cranelift/codegen/src/isa/aarch64/lower.isle

@ -1078,10 +1078,10 @@
;;;; Rules for `cls` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(rule (lower (has_type $I8 (cls x)))
(sub_imm $I32 (a64_cls $I32 (put_in_reg_zext32 x)) (u8_into_imm12 24)))
(sub_imm $I32 (a64_cls $I32 (put_in_reg_sext32 x)) (u8_into_imm12 24)))
(rule (lower (has_type $I16 (cls x)))
(sub_imm $I32 (a64_cls $I32 (put_in_reg_zext32 x)) (u8_into_imm12 16)))
(sub_imm $I32 (a64_cls $I32 (put_in_reg_sext32 x)) (u8_into_imm12 16)))
;; cls lo_cls, lo
;; cls hi_cls, hi

5
cranelift/filetests/filetests/isa/aarch64/bitops.clif

@ -121,7 +121,7 @@ block0(v0: i8):
}
; block0:
; uxtb w3, w0
; sxtb w3, w0
; cls w5, w3
; sub w0, w5, #24
; ret
@ -133,7 +133,7 @@ block0(v0: i16):
}
; block0:
; uxth w3, w0
; sxth w3, w0
; cls w5, w3
; sub w0, w5, #16
; ret
@ -928,4 +928,3 @@ block0(v0: i128, v1: i128):
; csel x0, x12, x6, ne
; csel x1, x4, x12, ne
; ret

23
cranelift/filetests/filetests/runtests/cls-interpret.clif

@ -1,23 +0,0 @@
test interpret
; aarch64 yields cls_i8(1) == 30, which is incorrect
function %cls_i8(i8) -> i8 {
block0(v0: i8):
v1 = cls v0
return v1
}
; run: %cls_i8(1) == 6
; run: %cls_i8(0x40) == 0
; run: %cls_i8(-1) == 7
; run: %cls_i8(0) == 7
function %cls_i16(i16) -> i16 {
block0(v0: i16):
v1 = cls v0
return v1
}
; run: %cls_i16(1) == 14
; run: %cls_i16(0x4000) == 0
; run: %cls_i16(-1) == 15
; run: %cls_i16(0) == 15

20
cranelift/filetests/filetests/runtests/cls-aarch64.clif → cranelift/filetests/filetests/runtests/cls.clif

@ -3,6 +3,26 @@ test run
target aarch64
; not implemented on `x86_64`
function %cls_i8(i8) -> i8 {
block0(v0: i8):
v1 = cls v0
return v1
}
; run: %cls_i8(1) == 6
; run: %cls_i8(0x40) == 0
; run: %cls_i8(-1) == 7
; run: %cls_i8(0) == 7
function %cls_i16(i16) -> i16 {
block0(v0: i16):
v1 = cls v0
return v1
}
; run: %cls_i16(1) == 14
; run: %cls_i16(0x4000) == 0
; run: %cls_i16(-1) == 15
; run: %cls_i16(0) == 15
function %cls_i32(i32) -> i32 {
block0(v0: i32):
v1 = cls v0
Loading…
Cancel
Save