Browse Source

riscv64: Add support for min/max instructions (#7146)

pull/7154/head
Afonso Bordado 1 year ago
committed by GitHub
parent
commit
7ef5094a0e
No known key found for this signature in database GPG Key ID: 4AEE18F83AFDEB23
  1. 55
      cranelift/codegen/src/isa/riscv64/inst.isle
  2. 2
      cranelift/codegen/src/isa/riscv64/lower.isle
  3. 5
      cranelift/codegen/src/isa/riscv64/lower/isle.rs
  4. 9
      cranelift/codegen/src/isa/x64/inst.isle
  5. 5
      cranelift/codegen/src/isa/x64/lower/isle.rs
  6. 5
      cranelift/codegen/src/isle_prelude.rs
  7. 6
      cranelift/codegen/src/prelude.isle
  8. 135
      cranelift/filetests/filetests/isa/riscv64/smax-zbb.clif
  9. 135
      cranelift/filetests/filetests/isa/riscv64/smin-zbb.clif
  10. 139
      cranelift/filetests/filetests/isa/riscv64/umax-zbb.clif
  11. 139
      cranelift/filetests/filetests/isa/riscv64/umin-zbb.clif

55
cranelift/codegen/src/isa/riscv64/inst.isle

@ -1599,6 +1599,21 @@
(rule (rv_max rs1 rs2)
(alu_rrr (AluOPRRR.Max) rs1 rs2))
;; Helper for emitting the `maxu` instruction.
(decl rv_maxu (XReg XReg) XReg)
(rule (rv_maxu rs1 rs2)
(alu_rrr (AluOPRRR.Maxu) rs1 rs2))
;; Helper for emitting the `min` instruction.
(decl rv_min (XReg XReg) XReg)
(rule (rv_min rs1 rs2)
(alu_rrr (AluOPRRR.Max) rs1 rs2))
;; Helper for emitting the `minu` instruction.
(decl rv_minu (XReg XReg) XReg)
(rule (rv_minu rs1 rs2)
(alu_rrr (AluOPRRR.Minu) rs1 rs2))
;; Helper for emitting the `sext.b` instruction.
(decl rv_sextb (XReg) XReg)
(rule (rv_sextb rs1)
@ -2546,11 +2561,34 @@
(extern constructor gen_stack_addr gen_stack_addr)
(decl gen_select_xreg (IntegerCompare XReg XReg) XReg)
(rule (gen_select_xreg c x y)
(rule 1 (gen_select_xreg (int_compare_decompose cc x y) x y)
(if-let (IntCC.UnsignedLessThan) (intcc_without_eq cc))
(if-let $true (has_zbb))
(rv_minu x y))
(rule 1 (gen_select_xreg (int_compare_decompose cc x y) x y)
(if-let (IntCC.SignedLessThan) (intcc_without_eq cc))
(if-let $true (has_zbb))
(rv_min x y))
(rule 1 (gen_select_xreg (int_compare_decompose cc x y) x y)
(if-let (IntCC.UnsignedGreaterThan) (intcc_without_eq cc))
(if-let $true (has_zbb))
(rv_maxu x y))
(rule 1 (gen_select_xreg (int_compare_decompose cc x y) x y)
(if-let (IntCC.SignedGreaterThan) (intcc_without_eq cc))
(if-let $true (has_zbb))
(rv_max x y))
(rule 0 (gen_select_xreg c x y)
(let
((dst WritableReg (temp_writable_xreg))
(_ Unit (emit (MInst.Select dst c x y))))
(writable_reg_to_reg dst)))
(decl gen_select_vreg (IntegerCompare VReg VReg) VReg)
(rule (gen_select_vreg c x y)
(let
@ -2649,6 +2687,10 @@
(decl int_compare (IntCC XReg XReg) IntegerCompare)
(extern constructor int_compare int_compare)
;; Extract the components of an `IntegerCompare`
(decl int_compare_decompose (IntCC XReg XReg) IntegerCompare)
(extern extractor infallible int_compare_decompose int_compare_decompose)
(decl label_to_br_target (MachLabel) CondBrTarget)
(extern constructor label_to_br_target label_to_br_target)
(convert MachLabel CondBrTarget label_to_br_target)
@ -2826,17 +2868,6 @@
(i128_sub (value_regs_zero) val))
;; Selects the greatest of two registers as signed values.
(decl max (Type XReg XReg) XReg)
(rule (max (fits_in_64 (ty_int ty)) x y)
(if-let $true (has_zbb))
(rv_max x y))
(rule (max (fits_in_64 (ty_int ty)) x y)
(if-let $false (has_zbb))
(gen_select_xreg (cmp_gt x y) x y))
;; Builds an instruction sequence that traps if the comparision succeeds.
(decl gen_trapif (IntCC XReg XReg TrapCode) InstOutput)
(rule (gen_trapif cc a b trap_code)

2
cranelift/codegen/src/isa/riscv64/lower.isle

@ -2044,7 +2044,7 @@
(rule 0 (lower (has_type (ty_int_ref_scalar_64 ty) (iabs x)))
(let ((extended XReg (sext x))
(negated XReg (rv_neg extended)))
(max $I64 extended negated)))
(gen_select_xreg (cmp_gt extended negated) extended negated)))
;; For vectors we generate the same code, but with vector instructions
;; we can skip the sign extension, since the vector unit will only process

5
cranelift/codegen/src/isa/riscv64/lower/isle.rs

@ -482,6 +482,11 @@ impl generated_code::Context for RV64IsleContext<'_, '_, MInst, Riscv64Backend>
}
}
#[inline]
fn int_compare_decompose(&mut self, cmp: IntegerCompare) -> (IntCC, XReg, XReg) {
(cmp.kind, self.xreg_new(cmp.rs1), self.xreg_new(cmp.rs2))
}
#[inline]
fn vstate_from_type(&mut self, ty: Type) -> VState {
VState::from_type(ty)

9
cranelift/codegen/src/isa/x64/inst.isle

@ -1669,15 +1669,6 @@
(decl xmi_imm (u32) XmmMemImm)
(extern constructor xmi_imm xmi_imm)
;;;; Helpers for Working With Integer Comparison Codes ;;;;;;;;;;;;;;;;;;;;;;;;;
;;
;; This is a direct import of `IntCC::without_equal`.
;; Get the corresponding IntCC with the equal component removed.
;; For conditions without a zero component, this is a no-op.
(decl intcc_without_eq (IntCC) IntCC)
(extern constructor intcc_without_eq intcc_without_eq)
;;;; Helpers for determining the register class of a value type ;;;;;;;;;;;;;;;;
(type RegisterClass

5
cranelift/codegen/src/isa/x64/lower/isle.rs

@ -623,11 +623,6 @@ impl Context for IsleContext<'_, '_, MInst, X64Backend> {
}
}
#[inline]
fn intcc_without_eq(&mut self, x: &IntCC) -> IntCC {
x.without_equal()
}
#[inline]
fn intcc_to_cc(&mut self, intcc: &IntCC) -> CC {
CC::from_intcc(*intcc)

5
cranelift/codegen/src/isle_prelude.rs

@ -846,6 +846,11 @@ macro_rules! isle_common_prelude_methods {
cc.complement()
}
#[inline]
fn intcc_without_eq(&mut self, x: &IntCC) -> IntCC {
x.without_equal()
}
#[inline]
fn floatcc_swap_args(&mut self, cc: &FloatCC) -> FloatCC {
cc.swap_args()

6
cranelift/codegen/src/prelude.isle

@ -337,6 +337,12 @@
(decl intcc_complement (IntCC) IntCC)
(extern constructor intcc_complement intcc_complement)
;; This is a direct import of `IntCC::without_equal`.
;; Get the corresponding IntCC with the equal component removed.
;; For conditions without a zero component, this is a no-op.
(decl pure intcc_without_eq (IntCC) IntCC)
(extern constructor intcc_without_eq intcc_without_eq)
;; Swap args of a FloatCC flag.
(decl floatcc_swap_args (FloatCC) FloatCC)
(extern constructor floatcc_swap_args floatcc_swap_args)

135
cranelift/filetests/filetests/isa/riscv64/smax-zbb.clif

@ -0,0 +1,135 @@
test compile precise-output
set unwind_info=false
target riscv64 has_zbb
function %smax_i8(i8, i8) -> i8{
block0(v0: i8, v1: i8):
v2 = smax v0, v1
return v2
}
; VCode:
; block0:
; sext.b a3,a0
; sext.b a5,a1
; max a0,a3,a5
; ret
;
; Disassembled:
; block0: ; offset 0x0
; .byte 0x93, 0x16, 0x45, 0x60
; .byte 0x93, 0x97, 0x45, 0x60
; .byte 0x33, 0xe5, 0xf6, 0x0a
; ret
function %smax_i16(i16, i16) -> i16{
block0(v0: i16, v1: i16):
v2 = smax v0, v1
return v2
}
; VCode:
; block0:
; sext.h a3,a0
; sext.h a5,a1
; max a0,a3,a5
; ret
;
; Disassembled:
; block0: ; offset 0x0
; .byte 0x93, 0x16, 0x55, 0x60
; .byte 0x93, 0x97, 0x55, 0x60
; .byte 0x33, 0xe5, 0xf6, 0x0a
; ret
function %smax_i32(i32, i32) -> i32{
block0(v0: i32, v1: i32):
v2 = smax v0, v1
return v2
}
; VCode:
; block0:
; sext.w a3,a0
; sext.w a5,a1
; max a0,a3,a5
; ret
;
; Disassembled:
; block0: ; offset 0x0
; sext.w a3, a0
; sext.w a5, a1
; .byte 0x33, 0xe5, 0xf6, 0x0a
; ret
function %smax_i64(i64, i64) -> i64{
block0(v0: i64, v1: i64):
v2 = smax v0, v1
return v2
}
; VCode:
; block0:
; max a0,a0,a1
; ret
;
; Disassembled:
; block0: ; offset 0x0
; .byte 0x33, 0x65, 0xb5, 0x0a
; ret
function %smax_i128(i128, i128) -> i128{
block0(v0: i128, v1: i128):
v2 = smax v0, v1
return v2
}
; VCode:
; add sp,-16
; sd ra,8(sp)
; sd fp,0(sp)
; mv fp,sp
; sd s3,-8(sp)
; add sp,-16
; block0:
; sgt a5,[a0,a1],[a2,a3]##ty=i128
; mv a4,a0
; mv s3,a1
; select [a0,a1],[a4,s3],[a2,a3]##condition=(a5 ne zero)
; add sp,+16
; ld s3,-8(sp)
; ld ra,8(sp)
; ld fp,0(sp)
; add sp,+16
; ret
;
; Disassembled:
; block0: ; offset 0x0
; addi sp, sp, -0x10
; sd ra, 8(sp)
; sd s0, 0(sp)
; mv s0, sp
; sd s3, -8(sp)
; addi sp, sp, -0x10
; block1: ; offset 0x18
; blt a3, a1, 0xc
; bne a1, a3, 0x10
; bgeu a2, a0, 0xc
; addi a5, zero, 1
; j 8
; mv a5, zero
; mv a4, a0
; mv s3, a1
; beqz a5, 0x10
; mv a0, a4
; mv a1, s3
; j 0xc
; mv a0, a2
; mv a1, a3
; addi sp, sp, 0x10
; ld s3, -8(sp)
; ld ra, 8(sp)
; ld s0, 0(sp)
; addi sp, sp, 0x10
; ret

135
cranelift/filetests/filetests/isa/riscv64/smin-zbb.clif

@ -0,0 +1,135 @@
test compile precise-output
set unwind_info=false
target riscv64 has_zbb
function %smin_i8(i8, i8) -> i8{
block0(v0: i8, v1: i8):
v2 = smin v0, v1
return v2
}
; VCode:
; block0:
; sext.b a3,a0
; sext.b a5,a1
; max a0,a3,a5
; ret
;
; Disassembled:
; block0: ; offset 0x0
; .byte 0x93, 0x16, 0x45, 0x60
; .byte 0x93, 0x97, 0x45, 0x60
; .byte 0x33, 0xe5, 0xf6, 0x0a
; ret
function %smin_i16(i16, i16) -> i16{
block0(v0: i16, v1: i16):
v2 = smin v0, v1
return v2
}
; VCode:
; block0:
; sext.h a3,a0
; sext.h a5,a1
; max a0,a3,a5
; ret
;
; Disassembled:
; block0: ; offset 0x0
; .byte 0x93, 0x16, 0x55, 0x60
; .byte 0x93, 0x97, 0x55, 0x60
; .byte 0x33, 0xe5, 0xf6, 0x0a
; ret
function %smin_i32(i32, i32) -> i32{
block0(v0: i32, v1: i32):
v2 = smin v0, v1
return v2
}
; VCode:
; block0:
; sext.w a3,a0
; sext.w a5,a1
; max a0,a3,a5
; ret
;
; Disassembled:
; block0: ; offset 0x0
; sext.w a3, a0
; sext.w a5, a1
; .byte 0x33, 0xe5, 0xf6, 0x0a
; ret
function %smin_i64(i64, i64) -> i64{
block0(v0: i64, v1: i64):
v2 = smin v0, v1
return v2
}
; VCode:
; block0:
; max a0,a0,a1
; ret
;
; Disassembled:
; block0: ; offset 0x0
; .byte 0x33, 0x65, 0xb5, 0x0a
; ret
function %smin_i128(i128, i128) -> i128{
block0(v0: i128, v1: i128):
v2 = smin v0, v1
return v2
}
; VCode:
; add sp,-16
; sd ra,8(sp)
; sd fp,0(sp)
; mv fp,sp
; sd s3,-8(sp)
; add sp,-16
; block0:
; slt a5,[a0,a1],[a2,a3]##ty=i128
; mv a4,a0
; mv s3,a1
; select [a0,a1],[a4,s3],[a2,a3]##condition=(a5 ne zero)
; add sp,+16
; ld s3,-8(sp)
; ld ra,8(sp)
; ld fp,0(sp)
; add sp,+16
; ret
;
; Disassembled:
; block0: ; offset 0x0
; addi sp, sp, -0x10
; sd ra, 8(sp)
; sd s0, 0(sp)
; mv s0, sp
; sd s3, -8(sp)
; addi sp, sp, -0x10
; block1: ; offset 0x18
; blt a1, a3, 0xc
; bne a1, a3, 0x10
; bgeu a0, a2, 0xc
; addi a5, zero, 1
; j 8
; mv a5, zero
; mv a4, a0
; mv s3, a1
; beqz a5, 0x10
; mv a0, a4
; mv a1, s3
; j 0xc
; mv a0, a2
; mv a1, a3
; addi sp, sp, 0x10
; ld s3, -8(sp)
; ld ra, 8(sp)
; ld s0, 0(sp)
; addi sp, sp, 0x10
; ret

139
cranelift/filetests/filetests/isa/riscv64/umax-zbb.clif

@ -0,0 +1,139 @@
test compile precise-output
set unwind_info=false
target riscv64 has_zbb
function %umax_i8(i8, i8) -> i8{
block0(v0: i8, v1: i8):
v2 = umax v0, v1
return v2
}
; VCode:
; block0:
; andi a3,a0,255
; andi a5,a1,255
; maxu a0,a3,a5
; ret
;
; Disassembled:
; block0: ; offset 0x0
; andi a3, a0, 0xff
; andi a5, a1, 0xff
; .byte 0x33, 0xf5, 0xf6, 0x0a
; ret
function %umax_i16(i16, i16) -> i16{
block0(v0: i16, v1: i16):
v2 = umax v0, v1
return v2
}
; VCode:
; block0:
; zext.h a3,a0
; zext.h a5,a1
; maxu a0,a3,a5
; ret
;
; Disassembled:
; block0: ; offset 0x0
; .byte 0xbb, 0x46, 0x05, 0x08
; .byte 0xbb, 0xc7, 0x05, 0x08
; .byte 0x33, 0xf5, 0xf6, 0x0a
; ret
function %umax_i32(i32, i32) -> i32{
block0(v0: i32, v1: i32):
v2 = umax v0, v1
return v2
}
; VCode:
; block0:
; slli a3,a0,32
; srli a5,a3,32
; slli a1,a1,32
; srli a3,a1,32
; maxu a0,a5,a3
; ret
;
; Disassembled:
; block0: ; offset 0x0
; slli a3, a0, 0x20
; srli a5, a3, 0x20
; slli a1, a1, 0x20
; srli a3, a1, 0x20
; .byte 0x33, 0xf5, 0xd7, 0x0a
; ret
function %umax_i64(i64, i64) -> i64{
block0(v0: i64, v1: i64):
v2 = umax v0, v1
return v2
}
; VCode:
; block0:
; maxu a0,a0,a1
; ret
;
; Disassembled:
; block0: ; offset 0x0
; .byte 0x33, 0x75, 0xb5, 0x0a
; ret
function %umax_i128(i128, i128) -> i128{
block0(v0: i128, v1: i128):
v2 = umax v0, v1
return v2
}
; VCode:
; add sp,-16
; sd ra,8(sp)
; sd fp,0(sp)
; mv fp,sp
; sd s3,-8(sp)
; add sp,-16
; block0:
; ugt a5,[a0,a1],[a2,a3]##ty=i128
; mv a4,a0
; mv s3,a1
; select [a0,a1],[a4,s3],[a2,a3]##condition=(a5 ne zero)
; add sp,+16
; ld s3,-8(sp)
; ld ra,8(sp)
; ld fp,0(sp)
; add sp,+16
; ret
;
; Disassembled:
; block0: ; offset 0x0
; addi sp, sp, -0x10
; sd ra, 8(sp)
; sd s0, 0(sp)
; mv s0, sp
; sd s3, -8(sp)
; addi sp, sp, -0x10
; block1: ; offset 0x18
; bltu a3, a1, 0xc
; bne a1, a3, 0x10
; bgeu a2, a0, 0xc
; addi a5, zero, 1
; j 8
; mv a5, zero
; mv a4, a0
; mv s3, a1
; beqz a5, 0x10
; mv a0, a4
; mv a1, s3
; j 0xc
; mv a0, a2
; mv a1, a3
; addi sp, sp, 0x10
; ld s3, -8(sp)
; ld ra, 8(sp)
; ld s0, 0(sp)
; addi sp, sp, 0x10
; ret

139
cranelift/filetests/filetests/isa/riscv64/umin-zbb.clif

@ -0,0 +1,139 @@
test compile precise-output
set unwind_info=false
target riscv64 has_zbb
function %umin_i8(i8, i8) -> i8{
block0(v0: i8, v1: i8):
v2 = umin v0, v1
return v2
}
; VCode:
; block0:
; andi a3,a0,255
; andi a5,a1,255
; minu a0,a3,a5
; ret
;
; Disassembled:
; block0: ; offset 0x0
; andi a3, a0, 0xff
; andi a5, a1, 0xff
; .byte 0x33, 0xd5, 0xf6, 0x0a
; ret
function %umin_i16(i16, i16) -> i16{
block0(v0: i16, v1: i16):
v2 = umin v0, v1
return v2
}
; VCode:
; block0:
; zext.h a3,a0
; zext.h a5,a1
; minu a0,a3,a5
; ret
;
; Disassembled:
; block0: ; offset 0x0
; .byte 0xbb, 0x46, 0x05, 0x08
; .byte 0xbb, 0xc7, 0x05, 0x08
; .byte 0x33, 0xd5, 0xf6, 0x0a
; ret
function %umin_i32(i32, i32) -> i32{
block0(v0: i32, v1: i32):
v2 = umin v0, v1
return v2
}
; VCode:
; block0:
; slli a3,a0,32
; srli a5,a3,32
; slli a1,a1,32
; srli a3,a1,32
; minu a0,a5,a3
; ret
;
; Disassembled:
; block0: ; offset 0x0
; slli a3, a0, 0x20
; srli a5, a3, 0x20
; slli a1, a1, 0x20
; srli a3, a1, 0x20
; .byte 0x33, 0xd5, 0xd7, 0x0a
; ret
function %umin_i64(i64, i64) -> i64{
block0(v0: i64, v1: i64):
v2 = umin v0, v1
return v2
}
; VCode:
; block0:
; minu a0,a0,a1
; ret
;
; Disassembled:
; block0: ; offset 0x0
; .byte 0x33, 0x55, 0xb5, 0x0a
; ret
function %umin_i128(i128, i128) -> i128{
block0(v0: i128, v1: i128):
v2 = umin v0, v1
return v2
}
; VCode:
; add sp,-16
; sd ra,8(sp)
; sd fp,0(sp)
; mv fp,sp
; sd s3,-8(sp)
; add sp,-16
; block0:
; ult a5,[a0,a1],[a2,a3]##ty=i128
; mv a4,a0
; mv s3,a1
; select [a0,a1],[a4,s3],[a2,a3]##condition=(a5 ne zero)
; add sp,+16
; ld s3,-8(sp)
; ld ra,8(sp)
; ld fp,0(sp)
; add sp,+16
; ret
;
; Disassembled:
; block0: ; offset 0x0
; addi sp, sp, -0x10
; sd ra, 8(sp)
; sd s0, 0(sp)
; mv s0, sp
; sd s3, -8(sp)
; addi sp, sp, -0x10
; block1: ; offset 0x18
; bltu a1, a3, 0xc
; bne a1, a3, 0x10
; bgeu a0, a2, 0xc
; addi a5, zero, 1
; j 8
; mv a5, zero
; mv a4, a0
; mv s3, a1
; beqz a5, 0x10
; mv a0, a4
; mv a1, s3
; j 0xc
; mv a0, a2
; mv a1, a3
; addi sp, sp, 0x10
; ld s3, -8(sp)
; ld ra, 8(sp)
; ld s0, 0(sp)
; addi sp, sp, 0x10
; ret
Loading…
Cancel
Save