Browse Source

x64: clean up regalloc-related semantics on several instructions. (#4811)

* x64: clean up regalloc-related semantics on several instructions.

This PR removes all uses of "modify" operands on instructions in the x64
backend, and also removes all uses of "pinned vregs", or vregs that are
explicitly tied to particular physical registers. In place of both of
these mechanisms, which are legacies of the old regalloc design and
supported via compatibility code, the backend now uses operand
constraints. This is more flexible as it allows the regalloc to see the
liveranges and constraints without "reverse-engineering" move instructions.

Eventually, after removing all such uses (including in other backends
and by the ABI code), we can remove the compatibility code in regalloc2,
significantly simplifying its liverange-construction frontend and
thus allowing for higher confidence in correctness as well as possibly a
bit more compilation speed.

Curiously, there are a few extra move instructions now; they are likely
poor splitting decisions and I can try to chase these down later.

* Fix cranelift-codegen tests.

* Review feedback.
pull/4833/head
Chris Fallin 2 years ago
committed by GitHub
parent
commit
186c7c3b89
No known key found for this signature in database GPG Key ID: 4AEE18F83AFDEB23
  1. 76
      cranelift/codegen/src/isa/x64/inst.isle
  2. 77
      cranelift/codegen/src/isa/x64/inst/emit.rs
  3. 216
      cranelift/codegen/src/isa/x64/inst/emit_tests.rs
  4. 149
      cranelift/codegen/src/isa/x64/inst/mod.rs
  5. 112
      cranelift/codegen/src/isa/x64/lower/isle.rs
  6. 55
      cranelift/codegen/src/isa/x64/mod.rs
  7. 3
      cranelift/filetests/filetests/isa/x64/branches.clif
  8. 20
      cranelift/filetests/filetests/isa/x64/div-checks.clif
  9. 50
      cranelift/filetests/filetests/isa/x64/fcvt.clif
  10. 8
      cranelift/filetests/filetests/isa/x64/sdiv.clif
  11. 19
      cranelift/filetests/filetests/isa/x64/shuffle-avx512.clif
  12. 12
      cranelift/filetests/filetests/isa/x64/srem.clif
  13. 15
      cranelift/filetests/filetests/isa/x64/udiv.clif
  14. 15
      cranelift/filetests/filetests/isa/x64/urem.clif

76
cranelift/codegen/src/isa/x64/inst.isle

@ -64,24 +64,13 @@
;; A synthetic sequence to implement the right inline checks for
;; remainder and division, assuming the dividend is in %rax.
;;
;; Puts the result back into %rax if is_div, %rdx if !is_div, to mimic
;; what the div instruction does.
;;
;; The generated code sequence is described in the emit's function match
;; arm for this instruction.
;;
;; Note: %rdx is marked as modified by this instruction, to avoid an
;; early clobber problem with the temporary and divisor registers. Make
;; sure to zero %rdx right before this instruction, or you might run into
;; regalloc failures where %rdx is live before its first def!
(CheckedDivOrRemSeq (kind DivOrRemKind)
(size OperandSize)
(dividend_lo Gpr)
(dividend_hi Gpr)
;; The divisor operand. Note it's marked as modified
;; so that it gets assigned a register different from
;; the temporary.
(divisor WritableGpr)
(divisor Gpr)
(dst_quotient WritableGpr)
(dst_remainder WritableGpr)
(tmp OptionWritableGpr))
@ -205,12 +194,21 @@
(src3 XmmMem)
(dst WritableXmm))
;; XMM (scalar or vector) binary op that relies on the EVEX prefix.
;; XMM (scalar or vector) binary op that relies on the EVEX
;; prefix. Takes two inputs.
(XmmRmREvex (op Avx512Opcode)
(src1 XmmMem)
(src2 Xmm)
(dst WritableXmm))
;; XMM (scalar or vector) binary op that relies on the EVEX
;; prefix. Takes three inputs.
(XmmRmREvex3 (op Avx512Opcode)
(src1 XmmMem)
(src2 Xmm)
(src3 Xmm)
(dst WritableXmm))
;; XMM (scalar or vector) unary op: mov between XMM registers (32 64)
;; (reg addr) reg, sqrt, etc.
;;
@ -255,13 +253,7 @@
;; Converts an unsigned int64 to a float32/float64.
(CvtUint64ToFloatSeq (dst_size OperandSize) ;; 4 or 8
;; A copy of the source register, fed by
;; lowering. It is marked as modified during
;; register allocation to make sure that the
;; temporary registers differ from the src register,
;; since both registers are live at the same time in
;; the generated code sequence.
(src WritableGpr)
(src Gpr)
(dst WritableXmm)
(tmp_gpr1 WritableGpr)
(tmp_gpr2 WritableGpr))
@ -270,13 +262,7 @@
(CvtFloatToSintSeq (dst_size OperandSize)
(src_size OperandSize)
(is_saturating bool)
;; A copy of the source register, fed by
;; lowering. It is marked as modified during
;; register allocation to make sure that the
;; temporary registers differ from the src register,
;; since both registers are live at the same time in
;; the generated code sequence.
(src WritableXmm)
(src Xmm)
(dst WritableGpr)
(tmp_gpr WritableGpr)
(tmp_xmm WritableXmm))
@ -285,13 +271,7 @@
(CvtFloatToUintSeq (dst_size OperandSize)
(src_size OperandSize)
(is_saturating bool)
;; A copy of the source register, fed by
;; lowering. It is marked as modified during
;; register allocation to make sure that the
;; temporary registers differ from the src register,
;; since both registers are live at the same time in
;; the generated code sequence.
(src WritableXmm)
(src Xmm)
(dst WritableGpr)
(tmp_gpr WritableGpr)
(tmp_xmm WritableXmm))
@ -2769,11 +2749,11 @@
(decl x64_vpermi2b (Xmm Xmm Xmm) Xmm)
(rule (x64_vpermi2b src1 src2 src3)
(let ((dst WritableXmm (temp_writable_xmm))
(_ Unit (emit (gen_move $I8X16 dst src3)))
(_ Unit (emit (MInst.XmmRmREvex (Avx512Opcode.Vpermi2b)
src1
src2
dst))))
(_ Unit (emit (MInst.XmmRmREvex3 (Avx512Opcode.Vpermi2b)
src1
src2
src3
dst))))
dst))
;; Helper for creating `MInst.MulHi` instructions.
@ -3214,12 +3194,10 @@
(decl cvt_u64_to_float_seq (Type Gpr) Xmm)
(rule (cvt_u64_to_float_seq ty src)
(let ((size OperandSize (raw_operand_size_of_type ty))
(src_copy WritableGpr (temp_writable_gpr))
(dst WritableXmm (temp_writable_xmm))
(tmp_gpr1 WritableGpr (temp_writable_gpr))
(tmp_gpr2 WritableGpr (temp_writable_gpr))
(_ Unit (emit (gen_move $I64 src_copy src)))
(_ Unit (emit (MInst.CvtUint64ToFloatSeq size src_copy dst tmp_gpr1 tmp_gpr2))))
(_ Unit (emit (MInst.CvtUint64ToFloatSeq size src dst tmp_gpr1 tmp_gpr2))))
dst))
(decl cvt_float_to_uint_seq (Type Value bool) Gpr)
@ -3227,13 +3205,10 @@
(let ((out_size OperandSize (raw_operand_size_of_type out_ty))
(src_size OperandSize (raw_operand_size_of_type src_ty))
(tmp WritableXmm (temp_writable_xmm))
(_ Unit (emit (gen_move src_ty tmp src)))
(dst WritableGpr (temp_writable_gpr))
(tmp_xmm WritableXmm (temp_writable_xmm))
(tmp_gpr WritableGpr (temp_writable_gpr))
(_ Unit (emit (MInst.CvtFloatToUintSeq out_size src_size is_saturating tmp dst tmp_gpr tmp_xmm))))
(_ Unit (emit (MInst.CvtFloatToUintSeq out_size src_size is_saturating src dst tmp_gpr tmp_xmm))))
dst))
(decl cvt_float_to_sint_seq (Type Value bool) Gpr)
@ -3241,13 +3216,10 @@
(let ((out_size OperandSize (raw_operand_size_of_type out_ty))
(src_size OperandSize (raw_operand_size_of_type src_ty))
(tmp WritableXmm (temp_writable_xmm))
(_ Unit (emit (gen_move src_ty tmp src)))
(dst WritableGpr (temp_writable_gpr))
(tmp_xmm WritableXmm (temp_writable_xmm))
(tmp_gpr WritableGpr (temp_writable_gpr))
(_ Unit (emit (MInst.CvtFloatToSintSeq out_size src_size is_saturating tmp dst tmp_gpr tmp_xmm))))
(_ Unit (emit (MInst.CvtFloatToSintSeq out_size src_size is_saturating src dst tmp_gpr tmp_xmm))))
dst))
(decl fcvt_uint_mask_const () VCodeConstant)
@ -3396,10 +3368,6 @@
;; addresses).
(tmp1 WritableGpr (temp_writable_gpr))
;; Put a zero in tmp1. This is needed for Spectre mitigations (a
;; CMOV that zeroes the index on misspeculation).
(_ Unit (emit (MInst.Imm (OperandSize.Size32) 0 tmp1)))
;; This temporary is used as a signed integer of 32-bits (for the
;; wasm-table index) and then 64-bits (address addend). The small
;; lie about the I64 type is benign, since the temporary is dead

77
cranelift/codegen/src/isa/x64/inst/emit.rs

@ -377,11 +377,11 @@ pub(crate) fn emit(
} => {
let dividend_lo = allocs.next(dividend_lo.to_reg());
let dst_quotient = allocs.next(dst_quotient.to_reg().to_reg());
let dst_remainder = allocs.next(dst_remainder.to_reg().to_reg());
debug_assert_eq!(dividend_lo, regs::rax());
debug_assert_eq!(dst_quotient, regs::rax());
debug_assert_eq!(dst_remainder, regs::rdx());
if size.to_bits() > 8 {
let dst_remainder = allocs.next(dst_remainder.to_reg().to_reg());
debug_assert_eq!(dst_remainder, regs::rdx());
let dividend_hi = allocs.next(dividend_hi.to_reg());
debug_assert_eq!(dividend_hi, regs::rdx());
}
@ -468,7 +468,11 @@ pub(crate) fn emit(
let src = allocs.next(src.to_reg());
let dst = allocs.next(dst.to_reg().to_reg());
debug_assert_eq!(src, regs::rax());
debug_assert_eq!(dst, regs::rdx());
if *size == OperandSize::Size8 {
debug_assert_eq!(dst, regs::rax());
} else {
debug_assert_eq!(dst, regs::rdx());
}
match size {
OperandSize::Size8 => {
sink.put1(0x66);
@ -498,7 +502,7 @@ pub(crate) fn emit(
} => {
let dividend_lo = allocs.next(dividend_lo.to_reg());
let dividend_hi = allocs.next(dividend_hi.to_reg());
let divisor = allocs.next(divisor.to_reg().to_reg());
let divisor = allocs.next(divisor.to_reg());
let dst_quotient = allocs.next(dst_quotient.to_reg().to_reg());
let dst_remainder = allocs.next(dst_remainder.to_reg().to_reg());
let tmp = tmp.map(|tmp| allocs.next(tmp.to_reg().to_reg()));
@ -597,18 +601,45 @@ pub(crate) fn emit(
sink.bind_label(do_op);
}
let dividend_lo = Gpr::new(regs::rax()).unwrap();
let dst_quotient = WritableGpr::from_reg(Gpr::new(regs::rax()).unwrap());
let (dividend_hi, dst_remainder) = if *size == OperandSize::Size8 {
(
Gpr::new(regs::rax()).unwrap(),
Writable::from_reg(Gpr::new(regs::rax()).unwrap()),
)
} else {
(
Gpr::new(regs::rdx()).unwrap(),
Writable::from_reg(Gpr::new(regs::rdx()).unwrap()),
)
};
// Fill in the high parts:
if kind.is_signed() {
// sign-extend the sign-bit of rax into rdx, for signed opcodes.
let inst = Inst::sign_extend_data(*size);
let inst =
Inst::sign_extend_data(*size, dividend_lo, WritableGpr::from_reg(dividend_hi));
inst.emit(&[], sink, info, state);
} else {
} else if *size != OperandSize::Size8 {
// zero for unsigned opcodes.
let inst = Inst::imm(OperandSize::Size64, 0, Writable::from_reg(regs::rdx()));
let inst = Inst::imm(
OperandSize::Size64,
0,
Writable::from_reg(dividend_hi.to_reg()),
);
inst.emit(&[], sink, info, state);
}
let inst = Inst::div(*size, kind.is_signed(), RegMem::reg(divisor));
let inst = Inst::div(
*size,
kind.is_signed(),
RegMem::reg(divisor),
dividend_lo,
dividend_hi,
dst_quotient,
dst_remainder,
);
inst.emit(&[], sink, info, state);
// Lowering takes care of moving the result back into the right register, see comment
@ -1393,7 +1424,8 @@ pub(crate) fn emit(
// ;; generated by lowering: cmp #jmp_table_size, %idx
// jnb $default_target
// movl %idx, %tmp2
// cmovnb %tmp1, %tmp2 ;; Spectre mitigation; we require tmp1 to be zero on entry.
// mov $0, %tmp1
// cmovnb %tmp1, %tmp2 ;; Spectre mitigation.
// lea start_of_jump_table_offset(%rip), %tmp1
// movslq [%tmp1, %tmp2, 4], %tmp2 ;; shift of 2, viz. multiply index by 4
// addq %tmp2, %tmp1
@ -1406,6 +1438,13 @@ pub(crate) fn emit(
let inst = Inst::movzx_rm_r(ExtMode::LQ, RegMem::reg(idx), tmp2);
inst.emit(&[], sink, info, state);
// Zero `tmp1` to overwrite `tmp2` with zeroes on the
// out-of-bounds case (Spectre mitigation using CMOV).
// Note that we need to do this with a move-immediate
// form, because we cannot clobber the flags.
let inst = Inst::imm(OperandSize::Size32, 0, tmp1);
inst.emit(&[], sink, info, state);
// Spectre mitigation: CMOV to zero the index if the out-of-bounds branch above misspeculated.
let inst = Inst::cmove(
OperandSize::Size64,
@ -1768,9 +1807,21 @@ pub(crate) fn emit(
src1,
src2,
dst,
}
| Inst::XmmRmREvex3 {
op,
src1,
src2,
dst,
// `dst` reuses `src3`.
..
} => {
let dst = allocs.next(dst.to_reg().to_reg());
let src2 = allocs.next(src2.to_reg());
if let Inst::XmmRmREvex3 { src3, .. } = inst {
let src3 = allocs.next(src3.to_reg());
debug_assert_eq!(src3, dst);
}
let src1 = src1.clone().to_reg_mem().with_allocs(allocs);
let (w, opcode) = match op {
@ -2086,7 +2137,7 @@ pub(crate) fn emit(
tmp_gpr1,
tmp_gpr2,
} => {
let src = allocs.next(src.to_reg().to_reg());
let src = allocs.next(src.to_reg());
let dst = allocs.next(dst.to_reg().to_reg());
let tmp_gpr1 = allocs.next(tmp_gpr1.to_reg().to_reg());
let tmp_gpr2 = allocs.next(tmp_gpr2.to_reg().to_reg());
@ -2155,7 +2206,7 @@ pub(crate) fn emit(
let inst = Inst::shift_r(
OperandSize::Size64,
ShiftKind::ShiftRightLogical,
Some(1),
Imm8Gpr::new(Imm8Reg::Imm8 { imm: 1 }).unwrap(),
Writable::from_reg(tmp_gpr1),
);
inst.emit(&[], sink, info, state);
@ -2208,7 +2259,7 @@ pub(crate) fn emit(
tmp_gpr,
tmp_xmm,
} => {
let src = allocs.next(src.to_reg().to_reg());
let src = allocs.next(src.to_reg());
let dst = allocs.next(dst.to_reg().to_reg());
let tmp_gpr = allocs.next(tmp_gpr.to_reg().to_reg());
let tmp_xmm = allocs.next(tmp_xmm.to_reg().to_reg());
@ -2417,7 +2468,7 @@ pub(crate) fn emit(
tmp_gpr,
tmp_xmm,
} => {
let src = allocs.next(src.to_reg().to_reg());
let src = allocs.next(src.to_reg());
let dst = allocs.next(dst.to_reg().to_reg());
let tmp_gpr = allocs.next(tmp_gpr.to_reg().to_reg());
let tmp_xmm = allocs.next(tmp_xmm.to_reg().to_reg());

216
cranelift/codegen/src/isa/x64/inst/emit_tests.rs

@ -1723,6 +1723,10 @@ fn test_x64_emit() {
OperandSize::Size32,
true, /*signed*/
RegMem::reg(regs::rsi()),
Gpr::new(regs::rax()).unwrap(),
Gpr::new(regs::rdx()).unwrap(),
WritableGpr::from_reg(Gpr::new(regs::rax()).unwrap()),
WritableGpr::from_reg(Gpr::new(regs::rdx()).unwrap()),
),
"F7FE",
"idiv %eax, %edx, %esi, %eax, %edx",
@ -1732,6 +1736,10 @@ fn test_x64_emit() {
OperandSize::Size64,
true, /*signed*/
RegMem::reg(regs::r15()),
Gpr::new(regs::rax()).unwrap(),
Gpr::new(regs::rdx()).unwrap(),
WritableGpr::from_reg(Gpr::new(regs::rax()).unwrap()),
WritableGpr::from_reg(Gpr::new(regs::rdx()).unwrap()),
),
"49F7FF",
"idiv %rax, %rdx, %r15, %rax, %rdx",
@ -1741,6 +1749,10 @@ fn test_x64_emit() {
OperandSize::Size32,
false, /*signed*/
RegMem::reg(regs::r14()),
Gpr::new(regs::rax()).unwrap(),
Gpr::new(regs::rdx()).unwrap(),
WritableGpr::from_reg(Gpr::new(regs::rax()).unwrap()),
WritableGpr::from_reg(Gpr::new(regs::rdx()).unwrap()),
),
"41F7F6",
"div %eax, %edx, %r14d, %eax, %edx",
@ -1750,19 +1762,39 @@ fn test_x64_emit() {
OperandSize::Size64,
false, /*signed*/
RegMem::reg(regs::rdi()),
Gpr::new(regs::rax()).unwrap(),
Gpr::new(regs::rdx()).unwrap(),
WritableGpr::from_reg(Gpr::new(regs::rax()).unwrap()),
WritableGpr::from_reg(Gpr::new(regs::rdx()).unwrap()),
),
"48F7F7",
"div %rax, %rdx, %rdi, %rax, %rdx",
));
insns.push((
Inst::div(OperandSize::Size8, false, RegMem::reg(regs::rax())),
Inst::div(
OperandSize::Size8,
false,
RegMem::reg(regs::rax()),
Gpr::new(regs::rax()).unwrap(),
Gpr::new(regs::rdx()).unwrap(),
WritableGpr::from_reg(Gpr::new(regs::rax()).unwrap()),
WritableGpr::from_reg(Gpr::new(regs::rdx()).unwrap()),
),
"F6F0",
"div %al, (none), %al, %al, %dl",
"div %al, (none), %al, %al, (none)",
));
insns.push((
Inst::div(OperandSize::Size8, false, RegMem::reg(regs::rsi())),
Inst::div(
OperandSize::Size8,
false,
RegMem::reg(regs::rsi()),
Gpr::new(regs::rax()).unwrap(),
Gpr::new(regs::rdx()).unwrap(),
WritableGpr::from_reg(Gpr::new(regs::rax()).unwrap()),
WritableGpr::from_reg(Gpr::new(regs::rdx()).unwrap()),
),
"40F6F6",
"div %al, (none), %sil, %al, %dl",
"div %al, (none), %sil, %al, (none)",
));
// ========================================================
@ -1807,25 +1839,41 @@ fn test_x64_emit() {
// ========================================================
// cbw
insns.push((
Inst::sign_extend_data(OperandSize::Size8),
Inst::sign_extend_data(
OperandSize::Size8,
Gpr::new(regs::rax()).unwrap(),
WritableGpr::from_reg(Gpr::new(regs::rax()).unwrap()),
),
"6698",
"cbw %al, %dl",
"cbw %al, %al",
));
// ========================================================
// cdq family: SignExtendRaxRdx
insns.push((
Inst::sign_extend_data(OperandSize::Size16),
Inst::sign_extend_data(
OperandSize::Size16,
Gpr::new(regs::rax()).unwrap(),
WritableGpr::from_reg(Gpr::new(regs::rdx()).unwrap()),
),
"6699",
"cwd %ax, %dx",
));
insns.push((
Inst::sign_extend_data(OperandSize::Size32),
Inst::sign_extend_data(
OperandSize::Size32,
Gpr::new(regs::rax()).unwrap(),
WritableGpr::from_reg(Gpr::new(regs::rdx()).unwrap()),
),
"99",
"cdq %eax, %edx",
));
insns.push((
Inst::sign_extend_data(OperandSize::Size64),
Inst::sign_extend_data(
OperandSize::Size64,
Gpr::new(regs::rax()).unwrap(),
WritableGpr::from_reg(Gpr::new(regs::rdx()).unwrap()),
),
"4899",
"cqo %rax, %rdx",
));
@ -2813,47 +2861,92 @@ fn test_x64_emit() {
// ========================================================
// Shift_R
insns.push((
Inst::shift_r(OperandSize::Size32, ShiftKind::ShiftLeft, None, w_rdi),
Inst::shift_r(
OperandSize::Size32,
ShiftKind::ShiftLeft,
Imm8Gpr::new(Imm8Reg::Reg { reg: regs::rcx() }).unwrap(),
w_rdi,
),
"D3E7",
"shll %cl, %edi, %edi",
));
insns.push((
Inst::shift_r(OperandSize::Size32, ShiftKind::ShiftLeft, None, w_r12),
Inst::shift_r(
OperandSize::Size32,
ShiftKind::ShiftLeft,
Imm8Gpr::new(Imm8Reg::Reg { reg: regs::rcx() }).unwrap(),
w_r12,
),
"41D3E4",
"shll %cl, %r12d, %r12d",
));
insns.push((
Inst::shift_r(OperandSize::Size32, ShiftKind::ShiftLeft, Some(2), w_r8),
Inst::shift_r(
OperandSize::Size32,
ShiftKind::ShiftLeft,
Imm8Gpr::new(Imm8Reg::Imm8 { imm: 2 }).unwrap(),
w_r8,
),
"41C1E002",
"shll $2, %r8d, %r8d",
));
insns.push((
Inst::shift_r(OperandSize::Size32, ShiftKind::ShiftLeft, Some(31), w_r13),
Inst::shift_r(
OperandSize::Size32,
ShiftKind::ShiftLeft,
Imm8Gpr::new(Imm8Reg::Imm8 { imm: 31 }).unwrap(),
w_r13,
),
"41C1E51F",
"shll $31, %r13d, %r13d",
));
insns.push((
Inst::shift_r(OperandSize::Size64, ShiftKind::ShiftLeft, None, w_r13),
Inst::shift_r(
OperandSize::Size64,
ShiftKind::ShiftLeft,
Imm8Gpr::new(Imm8Reg::Reg { reg: regs::rcx() }).unwrap(),
w_r13,
),
"49D3E5",
"shlq %cl, %r13, %r13",
));
insns.push((
Inst::shift_r(OperandSize::Size64, ShiftKind::ShiftLeft, None, w_rdi),
Inst::shift_r(
OperandSize::Size64,
ShiftKind::ShiftLeft,
Imm8Gpr::new(Imm8Reg::Reg { reg: regs::rcx() }).unwrap(),
w_rdi,
),
"48D3E7",
"shlq %cl, %rdi, %rdi",
));
insns.push((
Inst::shift_r(OperandSize::Size64, ShiftKind::ShiftLeft, Some(2), w_r8),
Inst::shift_r(
OperandSize::Size64,
ShiftKind::ShiftLeft,
Imm8Gpr::new(Imm8Reg::Imm8 { imm: 2 }).unwrap(),
w_r8,
),
"49C1E002",
"shlq $2, %r8, %r8",
));
insns.push((
Inst::shift_r(OperandSize::Size64, ShiftKind::ShiftLeft, Some(3), w_rbx),
Inst::shift_r(
OperandSize::Size64,
ShiftKind::ShiftLeft,
Imm8Gpr::new(Imm8Reg::Imm8 { imm: 3 }).unwrap(),
w_rbx,
),
"48C1E303",
"shlq $3, %rbx, %rbx",
));
insns.push((
Inst::shift_r(OperandSize::Size64, ShiftKind::ShiftLeft, Some(63), w_r13),
Inst::shift_r(
OperandSize::Size64,
ShiftKind::ShiftLeft,
Imm8Gpr::new(Imm8Reg::Imm8 { imm: 63 }).unwrap(),
w_r13,
),
"49C1E53F",
"shlq $63, %r13, %r13",
));
@ -2861,7 +2954,7 @@ fn test_x64_emit() {
Inst::shift_r(
OperandSize::Size32,
ShiftKind::ShiftRightLogical,
None,
Imm8Gpr::new(Imm8Reg::Reg { reg: regs::rcx() }).unwrap(),
w_rdi,
),
"D3EF",
@ -2871,7 +2964,7 @@ fn test_x64_emit() {
Inst::shift_r(
OperandSize::Size32,
ShiftKind::ShiftRightLogical,
Some(2),
Imm8Gpr::new(Imm8Reg::Imm8 { imm: 2 }).unwrap(),
w_r8,
),
"41C1E802",
@ -2881,7 +2974,7 @@ fn test_x64_emit() {
Inst::shift_r(
OperandSize::Size32,
ShiftKind::ShiftRightLogical,
Some(31),
Imm8Gpr::new(Imm8Reg::Imm8 { imm: 31 }).unwrap(),
w_r13,
),
"41C1ED1F",
@ -2891,7 +2984,7 @@ fn test_x64_emit() {
Inst::shift_r(
OperandSize::Size64,
ShiftKind::ShiftRightLogical,
None,
Imm8Gpr::new(Imm8Reg::Reg { reg: regs::rcx() }).unwrap(),
w_rdi,
),
"48D3EF",
@ -2901,7 +2994,7 @@ fn test_x64_emit() {
Inst::shift_r(
OperandSize::Size64,
ShiftKind::ShiftRightLogical,
Some(2),
Imm8Gpr::new(Imm8Reg::Imm8 { imm: 2 }).unwrap(),
w_r8,
),
"49C1E802",
@ -2911,7 +3004,7 @@ fn test_x64_emit() {
Inst::shift_r(
OperandSize::Size64,
ShiftKind::ShiftRightLogical,
Some(63),
Imm8Gpr::new(Imm8Reg::Imm8 { imm: 63 }).unwrap(),
w_r13,
),
"49C1ED3F",
@ -2921,7 +3014,7 @@ fn test_x64_emit() {
Inst::shift_r(
OperandSize::Size32,
ShiftKind::ShiftRightArithmetic,
None,
Imm8Gpr::new(Imm8Reg::Reg { reg: regs::rcx() }).unwrap(),
w_rdi,
),
"D3FF",
@ -2931,7 +3024,7 @@ fn test_x64_emit() {
Inst::shift_r(
OperandSize::Size32,
ShiftKind::ShiftRightArithmetic,
Some(2),
Imm8Gpr::new(Imm8Reg::Imm8 { imm: 2 }).unwrap(),
w_r8,
),
"41C1F802",
@ -2941,7 +3034,7 @@ fn test_x64_emit() {
Inst::shift_r(
OperandSize::Size32,
ShiftKind::ShiftRightArithmetic,
Some(31),
Imm8Gpr::new(Imm8Reg::Imm8 { imm: 31 }).unwrap(),
w_r13,
),
"41C1FD1F",
@ -2951,7 +3044,7 @@ fn test_x64_emit() {
Inst::shift_r(
OperandSize::Size64,
ShiftKind::ShiftRightArithmetic,
None,
Imm8Gpr::new(Imm8Reg::Reg { reg: regs::rcx() }).unwrap(),
w_rdi,
),
"48D3FF",
@ -2961,7 +3054,7 @@ fn test_x64_emit() {
Inst::shift_r(
OperandSize::Size64,
ShiftKind::ShiftRightArithmetic,
Some(2),
Imm8Gpr::new(Imm8Reg::Imm8 { imm: 2 }).unwrap(),
w_r8,
),
"49C1F802",
@ -2971,54 +3064,99 @@ fn test_x64_emit() {
Inst::shift_r(
OperandSize::Size64,
ShiftKind::ShiftRightArithmetic,
Some(63),
Imm8Gpr::new(Imm8Reg::Imm8 { imm: 63 }).unwrap(),
w_r13,
),
"49C1FD3F",
"sarq $63, %r13, %r13",
));
insns.push((
Inst::shift_r(OperandSize::Size64, ShiftKind::RotateLeft, None, w_r8),
Inst::shift_r(
OperandSize::Size64,
ShiftKind::RotateLeft,
Imm8Gpr::new(Imm8Reg::Reg { reg: regs::rcx() }).unwrap(),
w_r8,
),
"49D3C0",
"rolq %cl, %r8, %r8",
));
insns.push((
Inst::shift_r(OperandSize::Size32, ShiftKind::RotateLeft, Some(3), w_r9),
Inst::shift_r(
OperandSize::Size32,
ShiftKind::RotateLeft,
Imm8Gpr::new(Imm8Reg::Imm8 { imm: 3 }).unwrap(),
w_r9,
),
"41C1C103",
"roll $3, %r9d, %r9d",
));
insns.push((
Inst::shift_r(OperandSize::Size32, ShiftKind::RotateRight, None, w_rsi),
Inst::shift_r(
OperandSize::Size32,
ShiftKind::RotateRight,
Imm8Gpr::new(Imm8Reg::Reg { reg: regs::rcx() }).unwrap(),
w_rsi,
),
"D3CE",
"rorl %cl, %esi, %esi",
));
insns.push((
Inst::shift_r(OperandSize::Size64, ShiftKind::RotateRight, Some(5), w_r15),
Inst::shift_r(
OperandSize::Size64,
ShiftKind::RotateRight,
Imm8Gpr::new(Imm8Reg::Imm8 { imm: 5 }).unwrap(),
w_r15,
),
"49C1CF05",
"rorq $5, %r15, %r15",
));
insns.push((
Inst::shift_r(OperandSize::Size8, ShiftKind::RotateRight, None, w_rsi),
Inst::shift_r(
OperandSize::Size8,
ShiftKind::RotateRight,
Imm8Gpr::new(Imm8Reg::Reg { reg: regs::rcx() }).unwrap(),
w_rsi,
),
"40D2CE",
"rorb %cl, %sil, %sil",
));
insns.push((
Inst::shift_r(OperandSize::Size8, ShiftKind::RotateRight, None, w_rax),
Inst::shift_r(
OperandSize::Size8,
ShiftKind::RotateRight,
Imm8Gpr::new(Imm8Reg::Reg { reg: regs::rcx() }).unwrap(),
w_rax,
),
"D2C8",
"rorb %cl, %al, %al",
));
insns.push((
Inst::shift_r(OperandSize::Size8, ShiftKind::RotateRight, Some(5), w_r15),
Inst::shift_r(
OperandSize::Size8,
ShiftKind::RotateRight,
Imm8Gpr::new(Imm8Reg::Imm8 { imm: 5 }).unwrap(),
w_r15,
),
"41C0CF05",
"rorb $5, %r15b, %r15b",
));
insns.push((
Inst::shift_r(OperandSize::Size16, ShiftKind::RotateRight, None, w_rsi),
Inst::shift_r(
OperandSize::Size16,
ShiftKind::RotateRight,
Imm8Gpr::new(Imm8Reg::Reg { reg: regs::rcx() }).unwrap(),
w_rsi,
),
"66D3CE",
"rorw %cl, %si, %si",
));
insns.push((
Inst::shift_r(OperandSize::Size16, ShiftKind::RotateRight, Some(5), w_r15),
Inst::shift_r(
OperandSize::Size16,
ShiftKind::RotateRight,
Imm8Gpr::new(Imm8Reg::Imm8 { imm: 5 }).unwrap(),
w_r15,
),
"6641C1CF05",
"rorw $5, %r15w, %r15w",
));

149
cranelift/codegen/src/isa/x64/inst/mod.rs

@ -131,7 +131,9 @@ impl Inst {
| Inst::XmmToGpr { op, .. }
| Inst::XmmUnaryRmR { op, .. } => smallvec![op.available_from()],
Inst::XmmUnaryRmREvex { op, .. } | Inst::XmmRmREvex { op, .. } => op.available_from(),
Inst::XmmUnaryRmREvex { op, .. }
| Inst::XmmRmREvex { op, .. }
| Inst::XmmRmREvex3 { op, .. } => op.available_from(),
Inst::XmmRmRVex { op, .. } => op.available_from(),
}
@ -195,47 +197,55 @@ impl Inst {
}
}
pub(crate) fn div(size: OperandSize, signed: bool, divisor: RegMem) -> Inst {
pub(crate) fn div(
size: OperandSize,
signed: bool,
divisor: RegMem,
dividend_lo: Gpr,
dividend_hi: Gpr,
dst_quotient: WritableGpr,
dst_remainder: WritableGpr,
) -> Inst {
divisor.assert_regclass_is(RegClass::Int);
Inst::Div {
size,
signed,
divisor: GprMem::new(divisor).unwrap(),
dividend_lo: Gpr::new(regs::rax()).unwrap(),
dividend_hi: Gpr::new(regs::rdx()).unwrap(),
dst_quotient: WritableGpr::from_reg(Gpr::new(regs::rax()).unwrap()),
dst_remainder: Writable::from_reg(Gpr::new(regs::rdx()).unwrap()),
dividend_lo,
dividend_hi,
dst_quotient,
dst_remainder,
}
}
pub(crate) fn checked_div_or_rem_seq(
kind: DivOrRemKind,
size: OperandSize,
divisor: Writable<Reg>,
divisor: Reg,
dividend_lo: Gpr,
dividend_hi: Gpr,
dst_quotient: WritableGpr,
dst_remainder: WritableGpr,
tmp: Option<Writable<Reg>>,
) -> Inst {
debug_assert!(divisor.to_reg().class() == RegClass::Int);
debug_assert!(divisor.class() == RegClass::Int);
debug_assert!(tmp
.map(|tmp| tmp.to_reg().class() == RegClass::Int)
.unwrap_or(true));
Inst::CheckedDivOrRemSeq {
kind,
size,
divisor: WritableGpr::from_writable_reg(divisor).unwrap(),
dividend_lo: Gpr::new(regs::rax()).unwrap(),
dividend_hi: Gpr::new(regs::rdx()).unwrap(),
dst_quotient: Writable::from_reg(Gpr::new(regs::rax()).unwrap()),
dst_remainder: Writable::from_reg(Gpr::new(regs::rdx()).unwrap()),
divisor: Gpr::new(divisor).unwrap(),
dividend_lo,
dividend_hi,
dst_quotient,
dst_remainder,
tmp: tmp.map(|tmp| WritableGpr::from_writable_reg(tmp).unwrap()),
}
}
pub(crate) fn sign_extend_data(size: OperandSize) -> Inst {
Inst::SignExtendData {
size,
src: Gpr::new(regs::rax()).unwrap(),
dst: Writable::from_reg(Gpr::new(regs::rdx()).unwrap()),
}
pub(crate) fn sign_extend_data(size: OperandSize, src: Gpr, dst: WritableGpr) -> Inst {
Inst::SignExtendData { size, src, dst }
}
pub(crate) fn imm(dst_size: OperandSize, simm64: u64, dst: Writable<Reg>) -> Inst {
@ -415,24 +425,18 @@ impl Inst {
pub(crate) fn shift_r(
size: OperandSize,
kind: ShiftKind,
num_bits: Option<u8>,
num_bits: Imm8Gpr,
dst: Writable<Reg>,
) -> Inst {
debug_assert!(if let Some(num_bits) = num_bits {
num_bits < size.to_bits()
} else {
true
});
if let Imm8Reg::Imm8 { imm: num_bits } = num_bits.clone().to_imm8_reg() {
debug_assert!(num_bits < size.to_bits());
}
debug_assert!(dst.to_reg().class() == RegClass::Int);
Inst::ShiftR {
size,
kind,
src: Gpr::new(dst.to_reg()).unwrap(),
num_bits: Imm8Gpr::new(match num_bits {
Some(imm) => Imm8Reg::Imm8 { imm },
None => Imm8Reg::Reg { reg: regs::rcx() },
})
.unwrap(),
num_bits,
dst: WritableGpr::from_writable_reg(dst).unwrap(),
}
}
@ -781,8 +785,11 @@ impl PrettyPrint for Inst {
let dividend_lo = pretty_print_reg(dividend_lo.to_reg(), size.to_bytes(), allocs);
let dst_quotient =
pretty_print_reg(dst_quotient.to_reg().to_reg(), size.to_bytes(), allocs);
let dst_remainder =
pretty_print_reg(dst_remainder.to_reg().to_reg(), size.to_bytes(), allocs);
let dst_remainder = if size.to_bits() > 8 {
pretty_print_reg(dst_remainder.to_reg().to_reg(), size.to_bytes(), allocs)
} else {
"(none)".to_string()
};
let dividend_hi = if size.to_bits() > 8 {
pretty_print_reg(dividend_hi.to_reg(), size.to_bytes(), allocs)
} else {
@ -842,7 +849,7 @@ impl PrettyPrint for Inst {
} => {
let dividend_lo = pretty_print_reg(dividend_lo.to_reg(), size.to_bytes(), allocs);
let dividend_hi = pretty_print_reg(dividend_hi.to_reg(), size.to_bytes(), allocs);
let divisor = pretty_print_reg(divisor.to_reg().to_reg(), size.to_bytes(), allocs);
let divisor = pretty_print_reg(divisor.to_reg(), size.to_bytes(), allocs);
let dst_quotient =
pretty_print_reg(dst_quotient.to_reg().to_reg(), size.to_bytes(), allocs);
let dst_remainder =
@ -949,12 +956,34 @@ impl PrettyPrint for Inst {
dst,
..
} => {
let src2 = pretty_print_reg(src2.to_reg(), 8, allocs);
let dst = pretty_print_reg(dst.to_reg().to_reg(), 8, allocs);
let src2 = pretty_print_reg(src2.to_reg(), 8, allocs);
let src1 = src1.pretty_print(8, allocs);
format!("{} {}, {}, {}", ljustify(op.to_string()), src1, src2, dst)
}
Inst::XmmRmREvex3 {
op,
src1,
src2,
src3,
dst,
..
} => {
let dst = pretty_print_reg(dst.to_reg().to_reg(), 8, allocs);
let src2 = pretty_print_reg(src2.to_reg(), 8, allocs);
let src3 = pretty_print_reg(src3.to_reg(), 8, allocs);
let src1 = src1.pretty_print(8, allocs);
format!(
"{} {}, {}, {}, {}",
ljustify(op.to_string()),
src1,
src2,
src3,
dst
)
}
Inst::XmmMinMaxSeq {
lhs,
rhs,
@ -1084,7 +1113,7 @@ impl PrettyPrint for Inst {
tmp_gpr2,
..
} => {
let src = pretty_print_reg(src.to_reg().to_reg(), 8, allocs);
let src = pretty_print_reg(src.to_reg(), 8, allocs);
let dst = pretty_print_reg(dst.to_reg().to_reg(), dst_size.to_bytes(), allocs);
let tmp_gpr1 = pretty_print_reg(tmp_gpr1.to_reg().to_reg(), 8, allocs);
let tmp_gpr2 = pretty_print_reg(tmp_gpr2.to_reg().to_reg(), 8, allocs);
@ -1114,7 +1143,7 @@ impl PrettyPrint for Inst {
tmp_gpr,
is_saturating,
} => {
let src = pretty_print_reg(src.to_reg().to_reg(), src_size.to_bytes(), allocs);
let src = pretty_print_reg(src.to_reg(), src_size.to_bytes(), allocs);
let dst = pretty_print_reg(dst.to_reg().to_reg(), dst_size.to_bytes(), allocs);
let tmp_gpr = pretty_print_reg(tmp_gpr.to_reg().to_reg(), 8, allocs);
let tmp_xmm = pretty_print_reg(tmp_xmm.to_reg().to_reg(), 8, allocs);
@ -1142,7 +1171,7 @@ impl PrettyPrint for Inst {
tmp_xmm,
is_saturating,
} => {
let src = pretty_print_reg(src.to_reg().to_reg(), src_size.to_bytes(), allocs);
let src = pretty_print_reg(src.to_reg(), src_size.to_bytes(), allocs);
let dst = pretty_print_reg(dst.to_reg().to_reg(), dst_size.to_bytes(), allocs);
let tmp_gpr = pretty_print_reg(tmp_gpr.to_reg().to_reg(), 8, allocs);
let tmp_xmm = pretty_print_reg(tmp_xmm.to_reg().to_reg(), 8, allocs);
@ -1424,9 +1453,19 @@ impl PrettyPrint for Inst {
not_taken.to_string()
),
Inst::JmpTableSeq { idx, .. } => {
Inst::JmpTableSeq {
idx, tmp1, tmp2, ..
} => {
let idx = pretty_print_reg(*idx, 8, allocs);
format!("{} {}", ljustify("br_table".into()), idx)
let tmp1 = pretty_print_reg(tmp1.to_reg(), 8, allocs);
let tmp2 = pretty_print_reg(tmp2.to_reg(), 8, allocs);
format!(
"{} {}, {}, {}",
ljustify("br_table".into()),
idx,
tmp1,
tmp2
)
}
Inst::JmpUnknown { target } => {
@ -1605,8 +1644,8 @@ fn x64_get_operands<F: Fn(VReg) -> VReg>(inst: &Inst, collector: &mut OperandCol
} => {
collector.reg_fixed_use(dividend_lo.to_reg(), regs::rax());
collector.reg_fixed_def(dst_quotient.to_writable_reg(), regs::rax());
collector.reg_fixed_def(dst_remainder.to_writable_reg(), regs::rdx());
if size.to_bits() > 8 {
collector.reg_fixed_def(dst_remainder.to_writable_reg(), regs::rdx());
collector.reg_fixed_use(dividend_hi.to_reg(), regs::rdx());
}
divisor.get_operands(collector);
@ -1634,10 +1673,12 @@ fn x64_get_operands<F: Fn(VReg) -> VReg>(inst: &Inst, collector: &mut OperandCol
} => {
collector.reg_fixed_use(dividend_lo.to_reg(), regs::rax());
collector.reg_fixed_use(dividend_hi.to_reg(), regs::rdx());
collector.reg_mod(divisor.to_writable_reg());
collector.reg_use(divisor.to_reg());
collector.reg_fixed_def(dst_quotient.to_writable_reg(), regs::rax());
collector.reg_fixed_def(dst_remainder.to_writable_reg(), regs::rdx());
if let Some(tmp) = tmp {
// Early def so that the temporary register does not
// conflict with inputs or outputs.
collector.reg_early_def(tmp.to_writable_reg());
}
}
@ -1718,13 +1759,25 @@ fn x64_get_operands<F: Fn(VReg) -> VReg>(inst: &Inst, collector: &mut OperandCol
dst,
..
} => {
match *op {
Avx512Opcode::Vpermi2b => collector.reg_mod(dst.to_writable_reg()),
_ => collector.reg_def(dst.to_writable_reg()),
}
assert_ne!(*op, Avx512Opcode::Vpermi2b);
collector.reg_def(dst.to_writable_reg());
collector.reg_use(src2.to_reg());
src1.get_operands(collector);
}
Inst::XmmRmREvex3 {
op,
src1,
src2,
src3,
dst,
..
} => {
assert_eq!(*op, Avx512Opcode::Vpermi2b);
collector.reg_reuse_def(dst.to_writable_reg(), 2); // Reuse `src3`.
collector.reg_use(src2.to_reg());
collector.reg_use(src3.to_reg());
src1.get_operands(collector);
}
Inst::XmmRmRImm {
op,
src1,
@ -1795,7 +1848,7 @@ fn x64_get_operands<F: Fn(VReg) -> VReg>(inst: &Inst, collector: &mut OperandCol
tmp_gpr2,
..
} => {
collector.reg_mod(src.to_writable_reg());
collector.reg_use(src.to_reg());
collector.reg_def(dst.to_writable_reg());
collector.reg_early_def(tmp_gpr1.to_writable_reg());
collector.reg_early_def(tmp_gpr2.to_writable_reg());
@ -1814,7 +1867,7 @@ fn x64_get_operands<F: Fn(VReg) -> VReg>(inst: &Inst, collector: &mut OperandCol
tmp_xmm,
..
} => {
collector.reg_mod(src.to_writable_reg());
collector.reg_use(src.to_reg());
collector.reg_def(dst.to_writable_reg());
collector.reg_early_def(tmp_gpr.to_writable_reg());
collector.reg_early_def(tmp_xmm.to_writable_reg());
@ -1911,7 +1964,7 @@ fn x64_get_operands<F: Fn(VReg) -> VReg>(inst: &Inst, collector: &mut OperandCol
..
} => {
collector.reg_use(*idx);
collector.reg_mod(*tmp1);
collector.reg_early_def(*tmp1);
collector.reg_early_def(*tmp2);
}

112
cranelift/codegen/src/isa/x64/lower/isle.rs

@ -955,40 +955,34 @@ impl Context for IsleContext<'_, '_, MInst, Flags, IsaFlags, 6> {
let is_div = kind.is_div();
let size = OperandSize::from_ty(ty);
self.lower_ctx.emit(MInst::gen_move(
Writable::from_reg(regs::rax()),
dividend.to_reg(),
ty,
));
let dst_quotient = self.lower_ctx.alloc_tmp(types::I64).only_reg().unwrap();
let dst_remainder = self.lower_ctx.alloc_tmp(types::I64).only_reg().unwrap();
// Always do explicit checks for `srem`: otherwise, INT_MIN % -1 is not handled properly.
if self.flags.avoid_div_traps() || *kind == DivOrRemKind::SignedRem {
// A vcode meta-instruction is used to lower the inline checks, since they embed
// pc-relative offsets that must not change, thus requiring regalloc to not
// interfere by introducing spills and reloads.
//
// Note it keeps the result in $rax (for divide) or $rdx (for rem), so that
// regalloc is aware of the coalescing opportunity between rax/rdx and the
// destination register.
let divisor_copy = self.lower_ctx.alloc_tmp(types::I64).only_reg().unwrap();
self.lower_ctx
.emit(MInst::gen_move(divisor_copy, divisor.to_reg(), types::I64));
let tmp = if *kind == DivOrRemKind::SignedDiv && size == OperandSize::Size64 {
Some(self.lower_ctx.alloc_tmp(types::I64).only_reg().unwrap())
} else {
None
};
// TODO use xor
self.lower_ctx.emit(MInst::imm(
let dividend_hi = self.lower_ctx.alloc_tmp(types::I64).only_reg().unwrap();
self.lower_ctx.emit(MInst::alu_rmi_r(
OperandSize::Size32,
0,
Writable::from_reg(regs::rdx()),
AluRmiROpcode::Xor,
RegMemImm::reg(dividend_hi.to_reg()),
dividend_hi,
));
self.lower_ctx.emit(MInst::checked_div_or_rem_seq(
kind.clone(),
size,
divisor_copy,
divisor.to_reg(),
Gpr::new(dividend.to_reg()).unwrap(),
Gpr::new(dividend_hi.to_reg()).unwrap(),
WritableGpr::from_reg(Gpr::new(dst_quotient.to_reg()).unwrap()),
WritableGpr::from_reg(Gpr::new(dst_remainder.to_reg()).unwrap()),
tmp,
));
} else {
@ -997,51 +991,89 @@ impl Context for IsleContext<'_, '_, MInst, Flags, IsaFlags, 6> {
// divisor into a register instead.
let divisor = RegMem::reg(divisor.to_reg());
let dividend_hi = self.lower_ctx.alloc_tmp(types::I64).only_reg().unwrap();
// Fill in the high parts:
if kind.is_signed() {
// sign-extend the sign-bit of al into ah for size 1, or rax into rdx, for
// signed opcodes.
self.lower_ctx.emit(MInst::sign_extend_data(size));
let dividend_lo = if kind.is_signed() && ty == types::I8 {
let dividend_lo = self.lower_ctx.alloc_tmp(types::I64).only_reg().unwrap();
// 8-bit div takes its dividend in only the `lo` reg.
self.lower_ctx.emit(MInst::sign_extend_data(
size,
Gpr::new(dividend.to_reg()).unwrap(),
WritableGpr::from_reg(Gpr::new(dividend_lo.to_reg()).unwrap()),
));
// `dividend_hi` is not used by the Div below, so we
// don't def it here.
dividend_lo.to_reg()
} else if kind.is_signed() {
// 16-bit and higher div takes its operand in hi:lo
// with half in each (64:64, 32:32 or 16:16).
self.lower_ctx.emit(MInst::sign_extend_data(
size,
Gpr::new(dividend.to_reg()).unwrap(),
WritableGpr::from_reg(Gpr::new(dividend_hi.to_reg()).unwrap()),
));
dividend.to_reg()
} else if ty == types::I8 {
let dividend_lo = self.lower_ctx.alloc_tmp(types::I64).only_reg().unwrap();
self.lower_ctx.emit(MInst::movzx_rm_r(
ExtMode::BL,
RegMem::reg(regs::rax()),
Writable::from_reg(regs::rax()),
RegMem::reg(dividend.to_reg()),
dividend_lo,
));
dividend_lo.to_reg()
} else {
// zero for unsigned opcodes.
self.lower_ctx.emit(MInst::imm(
OperandSize::Size64,
0,
Writable::from_reg(regs::rdx()),
));
}
self.lower_ctx
.emit(MInst::imm(OperandSize::Size64, 0, dividend_hi));
dividend.to_reg()
};
// Emit the actual idiv.
self.lower_ctx
.emit(MInst::div(size, kind.is_signed(), divisor));
self.lower_ctx.emit(MInst::div(
size,
kind.is_signed(),
divisor,
Gpr::new(dividend_lo).unwrap(),
Gpr::new(dividend_hi.to_reg()).unwrap(),
WritableGpr::from_reg(Gpr::new(dst_quotient.to_reg()).unwrap()),
WritableGpr::from_reg(Gpr::new(dst_remainder.to_reg()).unwrap()),
));
}
// Move the result back into the destination reg.
if is_div {
// The quotient is in rax.
self.lower_ctx
.emit(MInst::gen_move(dst.to_writable_reg(), regs::rax(), ty));
self.lower_ctx.emit(MInst::gen_move(
dst.to_writable_reg(),
dst_quotient.to_reg(),
ty,
));
} else {
if size == OperandSize::Size8 {
// The remainder is in AH. Right-shift by 8 bits then move from rax.
self.lower_ctx.emit(MInst::shift_r(
OperandSize::Size64,
ShiftKind::ShiftRightLogical,
Some(8),
Writable::from_reg(regs::rax()),
Imm8Gpr::new(Imm8Reg::Imm8 { imm: 8 }).unwrap(),
dst_quotient,
));
self.lower_ctx.emit(MInst::gen_move(
dst.to_writable_reg(),
dst_quotient.to_reg(),
ty,
));
self.lower_ctx
.emit(MInst::gen_move(dst.to_writable_reg(), regs::rax(), ty));
} else {
// The remainder is in rdx.
self.lower_ctx
.emit(MInst::gen_move(dst.to_writable_reg(), regs::rdx(), ty));
self.lower_ctx.emit(MInst::gen_move(
dst.to_writable_reg(),
dst_remainder.to_reg(),
ty,
));
}
}
}

55
cranelift/codegen/src/isa/x64/mod.rs

@ -427,37 +427,34 @@ mod test {
// 00000000 55 push rbp
// 00000001 4889E5 mov rbp,rsp
// 00000004 41B900000000 mov r9d,0x0
// 0000000A 83FF02 cmp edi,byte +0x2
// 0000000D 0F8320000000 jnc near 0x33
// 00000013 8BF7 mov esi,edi
// 00000015 490F43F1 cmovnc rsi,r9
// 00000019 4C8D0D0B000000 lea r9,[rel 0x2b]
// 00000020 496374B100 movsxd rsi,dword [r9+rsi*4+0x0]
// 00000025 4901F1 add r9,rsi
// 00000028 41FFE1 jmp r9
// 0000002B 1200 adc al,[rax]
// 0000002D 0000 add [rax],al
// 0000002F 1C00 sbb al,0x0
// 00000031 0000 add [rax],al
// 00000033 B803000000 mov eax,0x3
// 00000038 4889EC mov rsp,rbp
// 0000003B 5D pop rbp
// 0000003C C3 ret
// 0000003D B801000000 mov eax,0x1
// 00000042 4889EC mov rsp,rbp
// 00000045 5D pop rbp
// 00000046 C3 ret
// 00000047 B802000000 mov eax,0x2
// 0000004C 4889EC mov rsp,rbp
// 0000004F 5D pop rbp
// 00000050 C3 ret
// 00000004 83FF02 cmp edi,byte +0x2
// 00000007 0F8327000000 jnc near 0x34
// 0000000D 448BDF mov r11d,edi
// 00000010 41BA00000000 mov r10d,0x0
// 00000016 4D0F43DA cmovnc r11,r10
// 0000001A 4C8D150B000000 lea r10,[rel 0x2c]
// 00000021 4F635C9A00 movsxd r11,dword [r10+r11*4+0x0]
// 00000026 4D01DA add r10,r11
// 00000029 41FFE2 jmp r10
// 0000002C 120000001C000000 (jumptable data)
// 00000034 B803000000 mov eax,0x3
// 00000039 4889EC mov rsp,rbp
// 0000003C 5D pop rbp
// 0000003D C3 ret
// 0000003E B801000000 mov eax,0x1
// 00000043 4889EC mov rsp,rbp
// 00000046 5D pop rbp
// 00000047 C3 ret
// 00000048 B802000000 mov eax,0x2
// 0000004D 4889EC mov rsp,rbp
// 00000050 5D pop rbp
// 00000051 C3 ret
let golden = vec![
85, 72, 137, 229, 65, 185, 0, 0, 0, 0, 131, 255, 2, 15, 131, 32, 0, 0, 0, 139, 247, 73,
15, 67, 241, 76, 141, 13, 11, 0, 0, 0, 73, 99, 116, 177, 0, 73, 1, 241, 65, 255, 225,
18, 0, 0, 0, 28, 0, 0, 0, 184, 3, 0, 0, 0, 72, 137, 236, 93, 195, 184, 1, 0, 0, 0, 72,
137, 236, 93, 195, 184, 2, 0, 0, 0, 72, 137, 236, 93, 195,
85, 72, 137, 229, 131, 255, 2, 15, 131, 39, 0, 0, 0, 68, 139, 223, 65, 186, 0, 0, 0, 0,
77, 15, 67, 218, 76, 141, 21, 11, 0, 0, 0, 79, 99, 92, 154, 0, 77, 1, 218, 65, 255,
226, 18, 0, 0, 0, 28, 0, 0, 0, 184, 3, 0, 0, 0, 72, 137, 236, 93, 195, 184, 1, 0, 0, 0,
72, 137, 236, 93, 195, 184, 2, 0, 0, 0, 72, 137, 236, 93, 195,
];
assert_eq!(code, &golden[..]);

3
cranelift/filetests/filetests/isa/x64/branches.clif

@ -205,9 +205,8 @@ block2:
; pushq %rbp
; movq %rsp, %rbp
; block0:
; movl $0, %r8d
; cmpl $2, %edi
; br_table %rdi
; br_table %rdi, %r9, %r10
; block1:
; jmp label3
; block2:

20
cranelift/filetests/filetests/isa/x64/div-checks.clif

@ -10,8 +10,9 @@ target x86_64
function %i8(i8, i8) -> i8 {
block0(v0: i8, v1: i8):
v2 = srem.i8 v0, v1
; check: movq %rdi, %rax
; nextln: movl $$0, %edx
; check: xorl %r11d, %r11d, %r11d
; nextln: movq %rdi, %rax
; nextln: movq %r11, %rdx
; nextln: srem_seq %al, %dl, %sil, %al, %dl, tmp=(none)
; nextln: shrq $$8, %rax, %rax
@ -21,8 +22,9 @@ block0(v0: i8, v1: i8):
function %i16(i16, i16) -> i16 {
block0(v0: i16, v1: i16):
v2 = srem.i16 v0, v1
; check: movq %rdi, %rax
; nextln: movl $$0, %edx
; check: xorl %r11d, %r11d, %r11d
; nextln: movq %rdi, %rax
; nextln: movq %r11, %rdx
; nextln: srem_seq %ax, %dx, %si, %ax, %dx, tmp=(none)
; nextln: movq %rdx, %rax
@ -32,8 +34,9 @@ block0(v0: i16, v1: i16):
function %i32(i32, i32) -> i32 {
block0(v0: i32, v1: i32):
v2 = srem.i32 v0, v1
; check: movq %rdi, %rax
; nextln: movl $$0, %edx
; check: xorl %r11d, %r11d, %r11d
; nextln: movq %rdi, %rax
; nextln: movq %r11, %rdx
; nextln: srem_seq %eax, %edx, %esi, %eax, %edx, tmp=(none)
; nextln: movq %rdx, %rax
@ -43,8 +46,9 @@ block0(v0: i32, v1: i32):
function %i64(i64, i64) -> i64 {
block0(v0: i64, v1: i64):
v2 = srem.i64 v0, v1
; check: movq %rdi, %rax
; nextln: movl $$0, %edx
; check: xorl %r11d, %r11d, %r11d
; nextln: movq %rdi, %rax
; nextln: movq %r11, %rdx
; nextln: srem_seq %rax, %rdx, %rsi, %rax, %rdx, tmp=(none)
; nextln: movq %rdx, %rax

50
cranelift/filetests/filetests/isa/x64/fcvt.clif

@ -146,16 +146,16 @@ block0(v0: i8, v1: i16, v2: i32, v3: i64):
; pushq %rbp
; movq %rsp, %rbp
; block0:
; movzbq %dil, %rax
; cvtsi2ss %rax, %xmm0
; movzwq %si, %rax
; cvtsi2ss %rax, %xmm6
; movl %edx, %eax
; cvtsi2ss %rax, %xmm7
; u64_to_f32_seq %rcx, %xmm4, %r8, %rdx
; movzbq %dil, %rdi
; cvtsi2ss %rdi, %xmm0
; movzwq %si, %rdi
; cvtsi2ss %rdi, %xmm5
; movl %edx, %edi
; cvtsi2ss %rdi, %xmm6
; u64_to_f32_seq %rcx, %xmm2, %rdi, %rax
; addss %xmm0, %xmm5, %xmm0
; addss %xmm0, %xmm6, %xmm0
; addss %xmm0, %xmm7, %xmm0
; addss %xmm0, %xmm4, %xmm0
; addss %xmm0, %xmm2, %xmm0
; movq %rbp, %rsp
; popq %rbp
; ret
@ -209,7 +209,7 @@ block0(v0: f32):
; pushq %rbp
; movq %rsp, %rbp
; block0:
; cvt_float32_to_uint32_seq %xmm0, %eax, %r10, %xmm6
; cvt_float32_to_uint32_seq %xmm0, %eax, %r8, %xmm4
; movq %rbp, %rsp
; popq %rbp
; ret
@ -223,7 +223,7 @@ block0(v0: f32):
; pushq %rbp
; movq %rsp, %rbp
; block0:
; cvt_float32_to_uint64_seq %xmm0, %rax, %r10, %xmm6
; cvt_float32_to_uint64_seq %xmm0, %rax, %r8, %xmm4
; movq %rbp, %rsp
; popq %rbp
; ret
@ -237,7 +237,7 @@ block0(v0: f64):
; pushq %rbp
; movq %rsp, %rbp
; block0:
; cvt_float64_to_uint32_seq %xmm0, %eax, %r10, %xmm6
; cvt_float64_to_uint32_seq %xmm0, %eax, %r8, %xmm4
; movq %rbp, %rsp
; popq %rbp
; ret
@ -251,7 +251,7 @@ block0(v0: f64):
; pushq %rbp
; movq %rsp, %rbp
; block0:
; cvt_float64_to_uint64_seq %xmm0, %rax, %r10, %xmm6
; cvt_float64_to_uint64_seq %xmm0, %rax, %r8, %xmm4
; movq %rbp, %rsp
; popq %rbp
; ret
@ -265,7 +265,7 @@ block0(v0: f32):
; pushq %rbp
; movq %rsp, %rbp
; block0:
; cvt_float32_to_uint32_sat_seq %xmm0, %eax, %r10, %xmm6
; cvt_float32_to_uint32_sat_seq %xmm0, %eax, %r8, %xmm4
; movq %rbp, %rsp
; popq %rbp
; ret
@ -279,7 +279,7 @@ block0(v0: f32):
; pushq %rbp
; movq %rsp, %rbp
; block0:
; cvt_float32_to_uint64_sat_seq %xmm0, %rax, %r10, %xmm6
; cvt_float32_to_uint64_sat_seq %xmm0, %rax, %r8, %xmm4
; movq %rbp, %rsp
; popq %rbp
; ret
@ -293,7 +293,7 @@ block0(v0: f64):
; pushq %rbp
; movq %rsp, %rbp
; block0:
; cvt_float64_to_uint32_sat_seq %xmm0, %eax, %r10, %xmm6
; cvt_float64_to_uint32_sat_seq %xmm0, %eax, %r8, %xmm4
; movq %rbp, %rsp
; popq %rbp
; ret
@ -307,7 +307,7 @@ block0(v0: f64):
; pushq %rbp
; movq %rsp, %rbp
; block0:
; cvt_float64_to_uint64_sat_seq %xmm0, %rax, %r10, %xmm6
; cvt_float64_to_uint64_sat_seq %xmm0, %rax, %r8, %xmm4
; movq %rbp, %rsp
; popq %rbp
; ret
@ -321,7 +321,7 @@ block0(v0: f32):
; pushq %rbp
; movq %rsp, %rbp
; block0:
; cvt_float32_to_sint32_seq %xmm0, %eax, %r10, %xmm6
; cvt_float32_to_sint32_seq %xmm0, %eax, %r8, %xmm4
; movq %rbp, %rsp
; popq %rbp
; ret
@ -335,7 +335,7 @@ block0(v0: f32):
; pushq %rbp
; movq %rsp, %rbp
; block0:
; cvt_float32_to_sint64_seq %xmm0, %rax, %r10, %xmm6
; cvt_float32_to_sint64_seq %xmm0, %rax, %r8, %xmm4
; movq %rbp, %rsp
; popq %rbp
; ret
@ -349,7 +349,7 @@ block0(v0: f64):
; pushq %rbp
; movq %rsp, %rbp
; block0:
; cvt_float64_to_sint32_seq %xmm0, %eax, %r10, %xmm6
; cvt_float64_to_sint32_seq %xmm0, %eax, %r8, %xmm4
; movq %rbp, %rsp
; popq %rbp
; ret
@ -363,7 +363,7 @@ block0(v0: f64):
; pushq %rbp
; movq %rsp, %rbp
; block0:
; cvt_float64_to_sint64_seq %xmm0, %rax, %r10, %xmm6
; cvt_float64_to_sint64_seq %xmm0, %rax, %r8, %xmm4
; movq %rbp, %rsp
; popq %rbp
; ret
@ -377,7 +377,7 @@ block0(v0: f32):
; pushq %rbp
; movq %rsp, %rbp
; block0:
; cvt_float32_to_sint32_sat_seq %xmm0, %eax, %r10, %xmm6
; cvt_float32_to_sint32_sat_seq %xmm0, %eax, %r8, %xmm4
; movq %rbp, %rsp
; popq %rbp
; ret
@ -391,7 +391,7 @@ block0(v0: f32):
; pushq %rbp
; movq %rsp, %rbp
; block0:
; cvt_float32_to_sint64_sat_seq %xmm0, %rax, %r10, %xmm6
; cvt_float32_to_sint64_sat_seq %xmm0, %rax, %r8, %xmm4
; movq %rbp, %rsp
; popq %rbp
; ret
@ -405,7 +405,7 @@ block0(v0: f64):
; pushq %rbp
; movq %rsp, %rbp
; block0:
; cvt_float64_to_sint32_sat_seq %xmm0, %eax, %r10, %xmm6
; cvt_float64_to_sint32_sat_seq %xmm0, %eax, %r8, %xmm4
; movq %rbp, %rsp
; popq %rbp
; ret
@ -419,7 +419,7 @@ block0(v0: f64):
; pushq %rbp
; movq %rsp, %rbp
; block0:
; cvt_float64_to_sint64_sat_seq %xmm0, %rax, %r10, %xmm6
; cvt_float64_to_sint64_sat_seq %xmm0, %rax, %r8, %xmm4
; movq %rbp, %rsp
; popq %rbp
; ret

8
cranelift/filetests/filetests/isa/x64/sdiv.clif

@ -11,8 +11,9 @@ block0(v0: i8, v1: i8):
; movq %rsp, %rbp
; block0:
; movq %rdi, %rax
; cbw %al, %dl
; idiv %al, (none), %sil, %al, %dl
; cbw %al, %al
; movq %rax, %rdi
; idiv %al, (none), %sil, %al, (none)
; movq %rbp, %rsp
; popq %rbp
; ret
@ -28,6 +29,7 @@ block0(v0: i16, v1: i16):
; block0:
; movq %rdi, %rax
; cwd %ax, %dx
; movq %rdx, %r8
; idiv %ax, %dx, %si, %ax, %dx
; movq %rbp, %rsp
; popq %rbp
@ -44,6 +46,7 @@ block0(v0: i32, v1: i32):
; block0:
; movq %rdi, %rax
; cdq %eax, %edx
; movq %rdx, %r8
; idiv %eax, %edx, %esi, %eax, %edx
; movq %rbp, %rsp
; popq %rbp
@ -60,6 +63,7 @@ block0(v0: i64, v1: i64):
; block0:
; movq %rdi, %rax
; cqo %rax, %rdx
; movq %rdx, %r8
; idiv %rax, %rdx, %rsi, %rax, %rdx
; movq %rbp, %rsp
; popq %rbp

19
cranelift/filetests/filetests/isa/x64/shuffle-avx512.clif

@ -12,9 +12,10 @@ block0(v0: i8x16, v1: i8x16):
; pushq %rbp
; movq %rsp, %rbp
; block0:
; movdqa %xmm0, %xmm9
; movdqa %xmm0, %xmm6
; load_const VCodeConstant(0), %xmm0
; vpermi2b %xmm1, %xmm0, %xmm9
; movdqa %xmm6, %xmm8
; vpermi2b %xmm1, %xmm8, %xmm0, %xmm0
; movq %rbp, %rsp
; popq %rbp
; ret
@ -31,11 +32,12 @@ block0(v0: i8x16, v1: i8x16):
; pushq %rbp
; movq %rsp, %rbp
; block0:
; movdqa %xmm0, %xmm12
; movdqa %xmm0, %xmm9
; load_const VCodeConstant(1), %xmm0
; load_const VCodeConstant(0), %xmm7
; vpermi2b %xmm1, %xmm7, %xmm12
; andps %xmm0, %xmm7, %xmm0
; load_const VCodeConstant(0), %xmm8
; movdqa %xmm9, %xmm11
; vpermi2b %xmm1, %xmm11, %xmm8, %xmm8
; andps %xmm0, %xmm8, %xmm0
; movq %rbp, %rsp
; popq %rbp
; ret
@ -49,9 +51,10 @@ block0(v0: i8x16, v1: i8x16):
; pushq %rbp
; movq %rsp, %rbp
; block0:
; movdqa %xmm0, %xmm9
; movdqa %xmm0, %xmm6
; load_const VCodeConstant(0), %xmm0
; vpermi2b %xmm1, %xmm0, %xmm9
; movdqa %xmm6, %xmm8
; vpermi2b %xmm1, %xmm8, %xmm0, %xmm0
; movq %rbp, %rsp
; popq %rbp
; ret

12
cranelift/filetests/filetests/isa/x64/srem.clif

@ -10,8 +10,9 @@ block0(v0: i8, v1: i8):
; pushq %rbp
; movq %rsp, %rbp
; block0:
; xorl %r11d, %r11d, %r11d
; movq %rdi, %rax
; movl $0, %edx
; movq %r11, %rdx
; srem_seq %al, %dl, %sil, %al, %dl, tmp=(none)
; shrq $8, %rax, %rax
; movq %rbp, %rsp
@ -27,8 +28,9 @@ block0(v0: i16, v1: i16):
; pushq %rbp
; movq %rsp, %rbp
; block0:
; xorl %r11d, %r11d, %r11d
; movq %rdi, %rax
; movl $0, %edx
; movq %r11, %rdx
; srem_seq %ax, %dx, %si, %ax, %dx, tmp=(none)
; movq %rdx, %rax
; movq %rbp, %rsp
@ -44,8 +46,9 @@ block0(v0: i32, v1: i32):
; pushq %rbp
; movq %rsp, %rbp
; block0:
; xorl %r11d, %r11d, %r11d
; movq %rdi, %rax
; movl $0, %edx
; movq %r11, %rdx
; srem_seq %eax, %edx, %esi, %eax, %edx, tmp=(none)
; movq %rdx, %rax
; movq %rbp, %rsp
@ -61,8 +64,9 @@ block0(v0: i64, v1: i64):
; pushq %rbp
; movq %rsp, %rbp
; block0:
; xorl %r11d, %r11d, %r11d
; movq %rdi, %rax
; movl $0, %edx
; movq %r11, %rdx
; srem_seq %rax, %rdx, %rsi, %rax, %rdx, tmp=(none)
; movq %rdx, %rax
; movq %rbp, %rsp

15
cranelift/filetests/filetests/isa/x64/udiv.clif

@ -10,9 +10,9 @@ block0(v0: i8, v1: i8):
; pushq %rbp
; movq %rsp, %rbp
; block0:
; movq %rdi, %rax
; movzbl %al, %eax
; div %al, (none), %sil, %al, %dl
; movzbl %dil, %r10d
; movq %r10, %rax
; div %al, (none), %sil, %al, (none)
; movq %rbp, %rsp
; popq %rbp
; ret
@ -26,8 +26,9 @@ block0(v0: i16, v1: i16):
; pushq %rbp
; movq %rsp, %rbp
; block0:
; movl $0, %r11d
; movq %rdi, %rax
; movl $0, %edx
; movq %r11, %rdx
; div %ax, %dx, %si, %ax, %dx
; movq %rbp, %rsp
; popq %rbp
@ -42,8 +43,9 @@ block0(v0: i32, v1: i32):
; pushq %rbp
; movq %rsp, %rbp
; block0:
; movl $0, %r11d
; movq %rdi, %rax
; movl $0, %edx
; movq %r11, %rdx
; div %eax, %edx, %esi, %eax, %edx
; movq %rbp, %rsp
; popq %rbp
@ -58,8 +60,9 @@ block0(v0: i64, v1: i64):
; pushq %rbp
; movq %rsp, %rbp
; block0:
; movl $0, %r11d
; movq %rdi, %rax
; movl $0, %edx
; movq %r11, %rdx
; div %rax, %rdx, %rsi, %rax, %rdx
; movq %rbp, %rsp
; popq %rbp

15
cranelift/filetests/filetests/isa/x64/urem.clif

@ -10,9 +10,9 @@ block0(v0: i8, v1: i8):
; pushq %rbp
; movq %rsp, %rbp
; block0:
; movq %rdi, %rax
; movzbl %al, %eax
; div %al, (none), %sil, %al, %dl
; movzbl %dil, %r10d
; movq %r10, %rax
; div %al, (none), %sil, %al, (none)
; shrq $8, %rax, %rax
; movq %rbp, %rsp
; popq %rbp
@ -27,8 +27,9 @@ block0(v0: i16, v1: i16):
; pushq %rbp
; movq %rsp, %rbp
; block0:
; movl $0, %r11d
; movq %rdi, %rax
; movl $0, %edx
; movq %r11, %rdx
; div %ax, %dx, %si, %ax, %dx
; movq %rdx, %rax
; movq %rbp, %rsp
@ -44,8 +45,9 @@ block0(v0: i32, v1: i32):
; pushq %rbp
; movq %rsp, %rbp
; block0:
; movl $0, %r11d
; movq %rdi, %rax
; movl $0, %edx
; movq %r11, %rdx
; div %eax, %edx, %esi, %eax, %edx
; movq %rdx, %rax
; movq %rbp, %rsp
@ -61,8 +63,9 @@ block0(v0: i64, v1: i64):
; pushq %rbp
; movq %rsp, %rbp
; block0:
; movl $0, %r11d
; movq %rdi, %rax
; movl $0, %edx
; movq %r11, %rdx
; div %rax, %rdx, %rsi, %rax, %rdx
; movq %rdx, %rax
; movq %rbp, %rsp

Loading…
Cancel
Save