Browse Source

x64: Refactor float comparisons and their representations (#8362)

* x64: Refactor float comparisons and their representations

Currently the `XmmCmpRmR` instruction variant has a `dst` and a `src`
field. The instruction doesn't actually write to `dst`, however, and the
constructor of `xmm_cmp_rm_r` takes the `src` first followed by the
`dst`. This is inconsistent with most other xmm-related instructions
where the "src1" comes first and the "src2", which is a memory operand,
comes second. This memory-operand-second pattern also matches the Intel
manuals more closely.

This commit refactors the `XmmCmpRmR` instruction variant with the
following changes:

* `dst` is renamed to `src1`
* `src` is renamed to `src2`
* The `xmm_cmp_rm_r` helpers, and callers, swapped their arguments to
  take `Xmm` first and `XmmMem` second.
* The `x64_ptest` instruction followed suit as it was modelled after the
  same.
* Callers of `x64_ucomis` swapped their arguments to preserve the
  operand orders.
* The `Inst::xmm_cmp_rm_r` helper swapped operand order and additionally
  callers were updated.
* The VCode rendering of `XmmCmpRmR` swapped order of its operands,
  explaining changes in rendering of filetests (although machine code is
  not changing here).

The changes were then additionally propagated to Winch as well. In Winch
the `src`/`dst` naming was inherited so it was renamed to `src1` and
`src2` which swapped operands as well. In the case of Winch there was
additionally an accident in `float_cmp_op` where values were popped in
reverse order. This swapping-of-swapping all worked out prior, but to
get all the names to align correctly I've swapped this to be more
consistent. Sorry there's a lot of swaps-of-swaps here but the basic
idea is that the low-level instruction constructor swapped arguments so
to preserve the same (correct) output today something else needed to be
swapped. In Winch's case it wasn't the immediate caller of the
instruction constructor since that method looked correct, but it was
instead a higher-level `float_cmp_op` which then called a helper which
then called the low-level constructor which had operands swapped.

* Review comments
pull/8410/head
Alex Crichton 7 months ago
committed by GitHub
parent
commit
e170037750
No known key found for this signature in database GPG Key ID: B5690EEEBB952194
  1. 41
      cranelift/codegen/src/isa/x64/inst.isle
  2. 24
      cranelift/codegen/src/isa/x64/inst/emit.rs
  3. 8
      cranelift/codegen/src/isa/x64/inst/emit_tests.rs
  4. 26
      cranelift/codegen/src/isa/x64/inst/mod.rs
  5. 16
      cranelift/codegen/src/isa/x64/pcc.rs
  6. 2
      winch/codegen/src/codegen/context.rs
  7. 8
      winch/codegen/src/isa/x64/asm.rs

41
cranelift/codegen/src/isa/x64/inst.isle

@ -502,10 +502,13 @@
(rhs Xmm)
(dst WritableXmm))
;; Float comparisons/tests: cmp (b w l q) (reg addr imm) reg.
;; Float comparisons/tests
;;
;; Compares `src1` against `src2` with `op`. Note that if you're testing
;; `a < b` then `src1 == a` and `src2 == b`.
(XmmCmpRmR (op SseOpcode)
(src XmmMemAligned)
(dst Xmm))
(src1 Xmm)
(src2 XmmMemAligned))
;; A binary XMM instruction with an 8-bit immediate: e.g. cmp (ps pd) imm
;; (reg addr) reg
@ -2809,7 +2812,7 @@
(cmp_rmi_r size (CmpOpcode.Cmp) (RegMemImm.Imm src1) src2))
;; Helper for creating `MInst.XmmCmpRmR` instructions.
(decl xmm_cmp_rm_r (SseOpcode XmmMemAligned Xmm) ProducesFlags)
(decl xmm_cmp_rm_r (SseOpcode Xmm XmmMem) ProducesFlags)
(rule (xmm_cmp_rm_r opcode src1 src2)
(ProducesFlags.ProducesFlagsSideEffect
(MInst.XmmCmpRmR opcode src1 src2)))
@ -2829,7 +2832,7 @@
(cmp_rmi_r size (CmpOpcode.Test) src1 src2))
;; Helper for creating `ptest` instructions.
(decl x64_ptest (XmmMem Xmm) ProducesFlags)
(decl x64_ptest (Xmm XmmMem) ProducesFlags)
(rule (x64_ptest src1 src2)
(xmm_cmp_rm_r (SseOpcode.Ptest) src1 src2))
@ -4879,45 +4882,45 @@
(decl emit_fcmp (FloatCC Value Value) FcmpCondResult)
(rule (emit_fcmp (FloatCC.Equal) a @ (value_type (ty_scalar_float _)) b)
(FcmpCondResult.AndCondition (x64_ucomis b a) (CC.NP) (CC.Z)))
(FcmpCondResult.AndCondition (x64_ucomis a b) (CC.NP) (CC.Z)))
(rule (emit_fcmp (FloatCC.NotEqual) a @ (value_type (ty_scalar_float _)) b)
(FcmpCondResult.OrCondition (x64_ucomis b a) (CC.P) (CC.NZ)))
(FcmpCondResult.OrCondition (x64_ucomis a b) (CC.P) (CC.NZ)))
;; Some scalar lowerings correspond to one condition code.
(rule (emit_fcmp (FloatCC.Ordered) a @ (value_type (ty_scalar_float ty)) b)
(FcmpCondResult.Condition (x64_ucomis b a) (CC.NP)))
(FcmpCondResult.Condition (x64_ucomis a b) (CC.NP)))
(rule (emit_fcmp (FloatCC.Unordered) a @ (value_type (ty_scalar_float ty)) b)
(FcmpCondResult.Condition (x64_ucomis b a) (CC.P)))
(FcmpCondResult.Condition (x64_ucomis a b) (CC.P)))
(rule (emit_fcmp (FloatCC.OrderedNotEqual) a @ (value_type (ty_scalar_float ty)) b)
(FcmpCondResult.Condition (x64_ucomis b a) (CC.NZ)))
(FcmpCondResult.Condition (x64_ucomis a b) (CC.NZ)))
(rule (emit_fcmp (FloatCC.UnorderedOrEqual) a @ (value_type (ty_scalar_float ty)) b)
(FcmpCondResult.Condition (x64_ucomis b a) (CC.Z)))
(FcmpCondResult.Condition (x64_ucomis a b) (CC.Z)))
(rule (emit_fcmp (FloatCC.GreaterThan) a @ (value_type (ty_scalar_float ty)) b)
(FcmpCondResult.Condition (x64_ucomis b a) (CC.NBE)))
(FcmpCondResult.Condition (x64_ucomis a b) (CC.NBE)))
(rule (emit_fcmp (FloatCC.GreaterThanOrEqual) a @ (value_type (ty_scalar_float ty)) b)
(FcmpCondResult.Condition (x64_ucomis b a) (CC.NB)))
(FcmpCondResult.Condition (x64_ucomis a b) (CC.NB)))
(rule (emit_fcmp (FloatCC.UnorderedOrLessThan) a @ (value_type (ty_scalar_float ty)) b)
(FcmpCondResult.Condition (x64_ucomis b a) (CC.B)))
(FcmpCondResult.Condition (x64_ucomis a b) (CC.B)))
(rule (emit_fcmp (FloatCC.UnorderedOrLessThanOrEqual) a @ (value_type (ty_scalar_float ty)) b)
(FcmpCondResult.Condition (x64_ucomis b a) (CC.BE)))
(FcmpCondResult.Condition (x64_ucomis a b) (CC.BE)))
;; Other scalar lowerings are made possible by flipping the operands and
;; reversing the condition code.
(rule (emit_fcmp (FloatCC.LessThan) a @ (value_type (ty_scalar_float ty)) b)
;; Same flags as `GreaterThan`.
(FcmpCondResult.Condition (x64_ucomis a b) (CC.NBE)))
(FcmpCondResult.Condition (x64_ucomis b a) (CC.NBE)))
(rule (emit_fcmp (FloatCC.LessThanOrEqual) a @ (value_type (ty_scalar_float ty)) b)
;; Same flags as `GreaterThanOrEqual`.
(FcmpCondResult.Condition (x64_ucomis a b) (CC.NB)))
(FcmpCondResult.Condition (x64_ucomis b a) (CC.NB)))
(rule (emit_fcmp (FloatCC.UnorderedOrGreaterThan) a @ (value_type (ty_scalar_float ty)) b)
;; Same flags as `UnorderedOrLessThan`.
(FcmpCondResult.Condition (x64_ucomis a b) (CC.B)))
(FcmpCondResult.Condition (x64_ucomis b a) (CC.B)))
(rule (emit_fcmp (FloatCC.UnorderedOrGreaterThanOrEqual) a @ (value_type (ty_scalar_float ty)) b)
;; Same flags as `UnorderedOrLessThanOrEqual`.
(FcmpCondResult.Condition (x64_ucomis a b) (CC.BE)))
(FcmpCondResult.Condition (x64_ucomis b a) (CC.BE)))
;;;; Type Guards ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

24
cranelift/codegen/src/isa/x64/inst/emit.rs

@ -2964,7 +2964,7 @@ pub(crate) fn emit(
_ => unreachable!(),
};
let inst = Inst::xmm_cmp_rm_r(cmp_op, RegMem::reg(lhs), dst);
let inst = Inst::xmm_cmp_rm_r(cmp_op, dst, RegMem::reg(lhs));
inst.emit(&[], sink, info, state);
one_way_jmp(sink, CC::NZ, do_min_max);
@ -3171,9 +3171,9 @@ pub(crate) fn emit(
}
}
Inst::XmmCmpRmR { op, src, dst } => {
let dst = allocs.next(dst.to_reg());
let src = src.clone().to_reg_mem().with_allocs(allocs);
Inst::XmmCmpRmR { op, src1, src2 } => {
let src1 = allocs.next(src1.to_reg());
let src2 = src2.clone().to_reg_mem().with_allocs(allocs);
let rex = RexFlags::clear_w();
let (prefix, opcode, len) = match op {
@ -3183,13 +3183,13 @@ pub(crate) fn emit(
_ => unimplemented!("Emit xmm cmp rm r"),
};
match src {
match src2 {
RegMem::Reg { reg } => {
emit_std_reg_reg(sink, prefix, opcode, len, dst, reg, rex);
emit_std_reg_reg(sink, prefix, opcode, len, src1, reg, rex);
}
RegMem::Mem { addr } => {
let addr = &addr.finalize(state, sink);
emit_std_reg_mem(sink, prefix, opcode, len, dst, addr, rex, 0);
emit_std_reg_mem(sink, prefix, opcode, len, src1, addr, rex, 0);
}
}
}
@ -3461,7 +3461,7 @@ pub(crate) fn emit(
// Check for NaN.
let inst = Inst::xmm_cmp_rm_r(cmp_op, RegMem::reg(src), src);
let inst = Inst::xmm_cmp_rm_r(cmp_op, src, RegMem::reg(src));
inst.emit(&[], sink, info, state);
if *is_saturating {
@ -3492,7 +3492,7 @@ pub(crate) fn emit(
);
inst.emit(&[], sink, info, state);
let inst = Inst::xmm_cmp_rm_r(cmp_op, RegMem::reg(src), tmp_xmm);
let inst = Inst::xmm_cmp_rm_r(cmp_op, tmp_xmm, RegMem::reg(src));
inst.emit(&[], sink, info, state);
// Jump if >= to done.
@ -3553,7 +3553,7 @@ pub(crate) fn emit(
);
inst.emit(&[], sink, info, state);
let inst = Inst::xmm_cmp_rm_r(cmp_op, RegMem::reg(tmp_xmm), src);
let inst = Inst::xmm_cmp_rm_r(cmp_op, src, RegMem::reg(tmp_xmm));
inst.emit(&[], sink, info, state);
// no trap if src >= or > threshold
@ -3570,7 +3570,7 @@ pub(crate) fn emit(
);
inst.emit(&[], sink, info, state);
let inst = Inst::xmm_cmp_rm_r(cmp_op, RegMem::reg(src), tmp_xmm);
let inst = Inst::xmm_cmp_rm_r(cmp_op, tmp_xmm, RegMem::reg(src));
inst.emit(&[], sink, info, state);
// no trap if 0 >= src
@ -3668,7 +3668,7 @@ pub(crate) fn emit(
);
inst.emit(&[], sink, info, state);
let inst = Inst::xmm_cmp_rm_r(cmp_op, RegMem::reg(tmp_xmm), src);
let inst = Inst::xmm_cmp_rm_r(cmp_op, src, RegMem::reg(tmp_xmm));
inst.emit(&[], sink, info, state);
let handle_large = sink.get_label();

8
cranelift/codegen/src/isa/x64/inst/emit_tests.rs

@ -3896,25 +3896,25 @@ fn test_x64_emit() {
// XMM_CMP_RM_R
insns.push((
Inst::xmm_cmp_rm_r(SseOpcode::Ucomiss, RegMem::reg(xmm1), xmm2),
Inst::xmm_cmp_rm_r(SseOpcode::Ucomiss, xmm2, RegMem::reg(xmm1)),
"0F2ED1",
"ucomiss %xmm1, %xmm2",
));
insns.push((
Inst::xmm_cmp_rm_r(SseOpcode::Ucomiss, RegMem::reg(xmm0), xmm9),
Inst::xmm_cmp_rm_r(SseOpcode::Ucomiss, xmm9, RegMem::reg(xmm0)),
"440F2EC8",
"ucomiss %xmm0, %xmm9",
));
insns.push((
Inst::xmm_cmp_rm_r(SseOpcode::Ucomisd, RegMem::reg(xmm13), xmm4),
Inst::xmm_cmp_rm_r(SseOpcode::Ucomisd, xmm4, RegMem::reg(xmm13)),
"66410F2EE5",
"ucomisd %xmm13, %xmm4",
));
insns.push((
Inst::xmm_cmp_rm_r(SseOpcode::Ucomisd, RegMem::reg(xmm11), xmm12),
Inst::xmm_cmp_rm_r(SseOpcode::Ucomisd, xmm12, RegMem::reg(xmm11)),
"66450F2EE3",
"ucomisd %xmm11, %xmm12",
));

26
cranelift/codegen/src/isa/x64/inst/mod.rs

@ -396,12 +396,12 @@ impl Inst {
}
}
pub(crate) fn xmm_cmp_rm_r(op: SseOpcode, src: RegMem, dst: Reg) -> Inst {
src.assert_regclass_is(RegClass::Float);
debug_assert!(dst.class() == RegClass::Float);
let src = XmmMemAligned::new(src).unwrap();
let dst = Xmm::new(dst).unwrap();
Inst::XmmCmpRmR { op, src, dst }
pub(crate) fn xmm_cmp_rm_r(op: SseOpcode, src1: Reg, src2: RegMem) -> Inst {
src2.assert_regclass_is(RegClass::Float);
debug_assert!(src1.class() == RegClass::Float);
let src2 = XmmMemAligned::new(src2).unwrap();
let src1 = Xmm::new(src1).unwrap();
Inst::XmmCmpRmR { op, src1, src2 }
}
#[allow(dead_code)]
@ -1316,11 +1316,11 @@ impl PrettyPrint for Inst {
format!("{op} {src}, {dst}")
}
Inst::XmmCmpRmR { op, src, dst } => {
let dst = pretty_print_reg(dst.to_reg(), 8, allocs);
let src = src.pretty_print(8, allocs);
Inst::XmmCmpRmR { op, src1, src2 } => {
let src1 = pretty_print_reg(src1.to_reg(), 8, allocs);
let src2 = src2.pretty_print(8, allocs);
let op = ljustify(op.to_string());
format!("{op} {src}, {dst}")
format!("{op} {src2}, {src1}")
}
Inst::CvtIntToFloat {
@ -2155,9 +2155,9 @@ fn x64_get_operands<F: Fn(VReg) -> VReg>(inst: &Inst, collector: &mut OperandCol
collector.reg_use(src.to_reg());
dst.get_operands(collector);
}
Inst::XmmCmpRmR { src, dst, .. } => {
collector.reg_use(dst.to_reg());
src.get_operands(collector);
Inst::XmmCmpRmR { src1, src2, .. } => {
collector.reg_use(src1.to_reg());
src2.get_operands(collector);
}
Inst::Imm { dst, .. } => {
collector.reg_def(dst.to_writable_reg());

16
cranelift/codegen/src/isa/x64/pcc.rs

@ -768,13 +768,17 @@ pub(crate) fn check(
Inst::XmmMinMaxSeq { dst, .. } => ensure_no_fact(vcode, dst.to_writable_reg().to_reg()),
Inst::XmmCmpRmR { ref src, .. } => match <&RegMem>::from(src) {
RegMem::Mem { ref addr } => {
check_load(ctx, None, addr, vcode, I8X16, 128)?;
Ok(())
Inst::XmmCmpRmR {
ref src1, ref src2, ..
} => {
match <&RegMem>::from(src2) {
RegMem::Mem { ref addr } => {
check_load(ctx, None, addr, vcode, I8X16, 128)?;
}
RegMem::Reg { .. } => {}
}
RegMem::Reg { .. } => Ok(()),
},
ensure_no_fact(vcode, src1.to_reg())
}
Inst::XmmRmRImm {
dst,

2
winch/codegen/src/codegen/context.rs

@ -253,8 +253,8 @@ impl<'a> CodeGenContext<'a> {
F: FnMut(&mut M, Reg, Reg, Reg, OperandSize),
M: MacroAssembler,
{
let src1 = self.pop_to_reg(masm, None);
let src2 = self.pop_to_reg(masm, None);
let src1 = self.pop_to_reg(masm, None);
let dst = self.any_gpr(masm);
emit(masm, dst, src1.reg, src2.reg, size);
self.free_reg(src1);

8
winch/codegen/src/isa/x64/asm.rs

@ -995,9 +995,9 @@ impl Assembler {
});
}
/// Compares values in src and dst and sets ZF, PF, and CF flags in EFLAGS
/// Compares values in src1 and src2 and sets ZF, PF, and CF flags in EFLAGS
/// register.
pub fn ucomis(&mut self, src: Reg, dst: Reg, size: OperandSize) {
pub fn ucomis(&mut self, src1: Reg, src2: Reg, size: OperandSize) {
let op = match size {
OperandSize::S32 => SseOpcode::Ucomiss,
OperandSize::S64 => SseOpcode::Ucomisd,
@ -1006,8 +1006,8 @@ impl Assembler {
self.emit(Inst::XmmCmpRmR {
op,
src: Xmm::from(src).into(),
dst: dst.into(),
src1: src1.into(),
src2: Xmm::from(src2).into(),
});
}

Loading…
Cancel
Save