From 42d4f97b78b150c3586156c6140255b80716ccad Mon Sep 17 00:00:00 2001 From: Afonso Bordado Date: Mon, 27 Jun 2022 23:55:02 +0100 Subject: [PATCH] cranelift: Fix `cls` for small types on aarch64 (#4305) The previous `cls` code was producing wrong results when fed with a -1 i8. The fix here is to sign extend instead of zero extending since we want to keep the sign bit as one in order for it to be counted correctly in the cls instruction This also merges the interpreter only tests now that aarch64 correctly supports this instruction --- cranelift/codegen/src/isa/aarch64/lower.isle | 4 ++-- .../filetests/isa/aarch64/bitops.clif | 5 ++-- .../filetests/runtests/cls-interpret.clif | 23 ------------------- .../runtests/{cls-aarch64.clif => cls.clif} | 20 ++++++++++++++++ 4 files changed, 24 insertions(+), 28 deletions(-) delete mode 100644 cranelift/filetests/filetests/runtests/cls-interpret.clif rename cranelift/filetests/filetests/runtests/{cls-aarch64.clif => cls.clif} (55%) diff --git a/cranelift/codegen/src/isa/aarch64/lower.isle b/cranelift/codegen/src/isa/aarch64/lower.isle index 3fc4f8de48..006dd3ecf1 100644 --- a/cranelift/codegen/src/isa/aarch64/lower.isle +++ b/cranelift/codegen/src/isa/aarch64/lower.isle @@ -1078,10 +1078,10 @@ ;;;; Rules for `cls` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; (rule (lower (has_type $I8 (cls x))) - (sub_imm $I32 (a64_cls $I32 (put_in_reg_zext32 x)) (u8_into_imm12 24))) + (sub_imm $I32 (a64_cls $I32 (put_in_reg_sext32 x)) (u8_into_imm12 24))) (rule (lower (has_type $I16 (cls x))) - (sub_imm $I32 (a64_cls $I32 (put_in_reg_zext32 x)) (u8_into_imm12 16))) + (sub_imm $I32 (a64_cls $I32 (put_in_reg_sext32 x)) (u8_into_imm12 16))) ;; cls lo_cls, lo ;; cls hi_cls, hi diff --git a/cranelift/filetests/filetests/isa/aarch64/bitops.clif b/cranelift/filetests/filetests/isa/aarch64/bitops.clif index ef24f89690..da88426aab 100644 --- a/cranelift/filetests/filetests/isa/aarch64/bitops.clif +++ b/cranelift/filetests/filetests/isa/aarch64/bitops.clif @@ -121,7 +121,7 @@ block0(v0: i8): } ; block0: -; uxtb w3, w0 +; sxtb w3, w0 ; cls w5, w3 ; sub w0, w5, #24 ; ret @@ -133,7 +133,7 @@ block0(v0: i16): } ; block0: -; uxth w3, w0 +; sxth w3, w0 ; cls w5, w3 ; sub w0, w5, #16 ; ret @@ -928,4 +928,3 @@ block0(v0: i128, v1: i128): ; csel x0, x12, x6, ne ; csel x1, x4, x12, ne ; ret - diff --git a/cranelift/filetests/filetests/runtests/cls-interpret.clif b/cranelift/filetests/filetests/runtests/cls-interpret.clif deleted file mode 100644 index 11c6826a9a..0000000000 --- a/cranelift/filetests/filetests/runtests/cls-interpret.clif +++ /dev/null @@ -1,23 +0,0 @@ -test interpret -; aarch64 yields cls_i8(1) == 30, which is incorrect - -function %cls_i8(i8) -> i8 { -block0(v0: i8): - v1 = cls v0 - return v1 -} -; run: %cls_i8(1) == 6 -; run: %cls_i8(0x40) == 0 -; run: %cls_i8(-1) == 7 -; run: %cls_i8(0) == 7 - -function %cls_i16(i16) -> i16 { -block0(v0: i16): - v1 = cls v0 - return v1 -} -; run: %cls_i16(1) == 14 -; run: %cls_i16(0x4000) == 0 -; run: %cls_i16(-1) == 15 -; run: %cls_i16(0) == 15 - diff --git a/cranelift/filetests/filetests/runtests/cls-aarch64.clif b/cranelift/filetests/filetests/runtests/cls.clif similarity index 55% rename from cranelift/filetests/filetests/runtests/cls-aarch64.clif rename to cranelift/filetests/filetests/runtests/cls.clif index 0c852028e8..00411c33d2 100644 --- a/cranelift/filetests/filetests/runtests/cls-aarch64.clif +++ b/cranelift/filetests/filetests/runtests/cls.clif @@ -3,6 +3,26 @@ test run target aarch64 ; not implemented on `x86_64` +function %cls_i8(i8) -> i8 { +block0(v0: i8): + v1 = cls v0 + return v1 +} +; run: %cls_i8(1) == 6 +; run: %cls_i8(0x40) == 0 +; run: %cls_i8(-1) == 7 +; run: %cls_i8(0) == 7 + +function %cls_i16(i16) -> i16 { +block0(v0: i16): + v1 = cls v0 + return v1 +} +; run: %cls_i16(1) == 14 +; run: %cls_i16(0x4000) == 0 +; run: %cls_i16(-1) == 15 +; run: %cls_i16(0) == 15 + function %cls_i32(i32) -> i32 { block0(v0: i32): v1 = cls v0