Browse Source

riscv64: improve unordered comparison generated code (#5636)

Improve the generated code for unordered floating point comparisons by negating the comparison and inveritng the branches. This allows us to pick the unordered versions, which generate significantly better code.
pull/5638/head
Trevor Elliott 2 years ago
committed by GitHub
parent
commit
7926808e8e
No known key found for this signature in database GPG Key ID: 4AEE18F83AFDEB23
  1. 36
      cranelift/codegen/src/isa/riscv64/inst.isle
  2. 12
      cranelift/codegen/src/isle_prelude.rs
  3. 4
      cranelift/codegen/src/prelude.isle
  4. 25
      cranelift/filetests/filetests/isa/riscv64/fcmp.clif

36
cranelift/codegen/src/isa/riscv64/inst.isle

@ -1912,12 +1912,20 @@
;; Branching on the result of an icmp
(rule 1
(lower_branch (brif (icmp cc a @ (value_type ty) b) _ _) targets)
(lower_branch (brif (maybe_uextend (icmp cc a @ (value_type ty) b)) _ _) targets)
(lower_br_icmp cc a b targets ty))
;; Branching on the result of an fcmp
(rule 1
(lower_branch (brif (fcmp cc a @ (value_type ty) b) _ _) targets)
(lower_branch (brif (maybe_uextend (fcmp cc a @ (value_type ty) b)) _ _) targets)
(if-let $true (floatcc_unordered cc))
(let ((then BranchTarget (label_to_br_target (vec_label_get targets 0)))
(else BranchTarget (label_to_br_target (vec_label_get targets 1))))
(emit_side_effect (cond_br (emit_fcmp (floatcc_inverse cc) ty a b) else then))))
(rule 1
(lower_branch (brif (maybe_uextend (fcmp cc a @ (value_type ty) b)) _ _) targets)
(if-let $false (floatcc_unordered cc))
(let ((then BranchTarget (label_to_br_target (vec_label_get targets 0)))
(else BranchTarget (label_to_br_target (vec_label_get targets 1))))
(emit_side_effect (cond_br (emit_fcmp cc ty a b) then else))))
@ -1935,15 +1943,23 @@
(lower_brz_or_nz (IntCC.NotEqual) cmp targets $I64)))
(rule 1
(lower_branch (brz (icmp cc a @ (value_type ty) b) _) targets)
(lower_branch (brz (maybe_uextend (icmp cc a @ (value_type ty) b)) _) targets)
(lower_br_icmp (intcc_inverse cc) a b targets ty))
(rule 1
(lower_branch (brz (fcmp cc a @ (value_type ty) b) _) targets)
(lower_branch (brz (maybe_uextend (fcmp cc a @ (value_type ty) b)) _) targets)
(if-let $true (floatcc_unordered cc))
(let ((then BranchTarget (label_to_br_target (vec_label_get targets 0)))
(else BranchTarget (label_to_br_target (vec_label_get targets 1))))
(emit_side_effect (cond_br (emit_fcmp (floatcc_inverse cc) ty a b) then else))))
(rule 1
(lower_branch (brz (maybe_uextend (fcmp cc a @ (value_type ty) b)) _) targets)
(if-let $false (floatcc_unordered cc))
(let ((then BranchTarget (label_to_br_target (vec_label_get targets 0)))
(else BranchTarget (label_to_br_target (vec_label_get targets 1))))
(emit_side_effect (cond_br (emit_fcmp cc ty a b) else then))))
;;;;
(rule
(lower_branch (brnz v @ (value_type ty) _) targets)
@ -1957,11 +1973,19 @@
(lower_brz_or_nz (IntCC.NotEqual) cmp targets $I64)))
(rule 1
(lower_branch (brnz (icmp cc a @ (value_type ty) b) _) targets)
(lower_branch (brnz (maybe_uextend (icmp cc a @ (value_type ty) b)) _) targets)
(lower_br_icmp cc a b targets ty))
(rule 1
(lower_branch (brnz (fcmp cc a @ (value_type ty) b) _) targets)
(lower_branch (brnz (maybe_uextend (fcmp cc a @ (value_type ty) b)) _) targets)
(if-let $true (floatcc_unordered cc))
(let ((then BranchTarget (label_to_br_target (vec_label_get targets 0)))
(else BranchTarget (label_to_br_target (vec_label_get targets 1))))
(emit_side_effect (cond_br (emit_fcmp (floatcc_inverse cc) ty a b) else then))))
(rule 1
(lower_branch (brnz (maybe_uextend (fcmp cc a @ (value_type ty) b)) _) targets)
(if-let $false (floatcc_unordered cc))
(let ((then BranchTarget (label_to_br_target (vec_label_get targets 0)))
(else BranchTarget (label_to_br_target (vec_label_get targets 1))))
(emit_side_effect (cond_br (emit_fcmp cc ty a b) then else))))

12
cranelift/codegen/src/isle_prelude.rs

@ -616,6 +616,18 @@ macro_rules! isle_common_prelude_methods {
cc.inverse()
}
fn floatcc_unordered(&mut self, cc: &FloatCC) -> bool {
match *cc {
FloatCC::Unordered
| FloatCC::UnorderedOrEqual
| FloatCC::UnorderedOrLessThan
| FloatCC::UnorderedOrLessThanOrEqual
| FloatCC::UnorderedOrGreaterThan
| FloatCC::UnorderedOrGreaterThanOrEqual => true,
_ => false,
}
}
#[inline]
fn unpack_value_array_2(&mut self, arr: &ValueArray2) -> (Value, Value) {
let [a, b] = *arr;

4
cranelift/codegen/src/prelude.isle

@ -225,6 +225,10 @@
(decl floatcc_inverse (FloatCC) FloatCC)
(extern constructor floatcc_inverse floatcc_inverse)
;; True when this FloatCC involves an unordered comparison.
(decl pure floatcc_unordered (FloatCC) bool)
(extern constructor floatcc_unordered floatcc_unordered)
;;;; Helper Clif Extractors ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; An extractor that only matches types that can fit in 16 bits.

25
cranelift/filetests/filetests/isa/riscv64/fcmp.clif

@ -27,3 +27,28 @@ block1:
; block3:
; ret
function %f1() {
block0:
v0 = f64const 0.0
v1 = fcmp ult v0, v0
brnz v1, block1
jump block1
block1:
return
}
; block0:
; li t1,0
; fmv.d.x ft1,t1
; li a2,0
; fmv.d.x ft5,a2
; fle.d a5,ft5,ft1
; bne a5,zero,taken(label2),not_taken(label1)
; block1:
; j label3
; block2:
; j label3
; block3:
; ret

Loading…
Cancel
Save