Browse Source

cranelift/x64: Fix XmmRmREvex pretty-printing (#8508)

* cranelift/x64: Fix XmmRmREvex pretty-printing

The operand collector had these operands in src1/src2/dst order, but the
pretty-printer fetched the allocations in dst/src1/src2 order instead.

Although our pretty-printer looked like it was printing src1/src2/dst,
because it consumed operands in the wrong order, what it actually
printed was src2/dst/src1.

Meanwhile, Capstone actually uses src2/src1/dst order in AT&T mode. (GNU
objdump agrees.)

In the only filetest covering the vpsraq instruction, our output agreed
with Capstone because register allocation picked the same register for
both src1 and dst, so the two orders were indistinguishable. I've
extended the filetest to force register allocation to pick different
registers.

This format is also used for vpmullq, but we didn't have any compile
filetests covering that instruction, so I've added one with the same
register allocation pattern.

Now our pretty-printer agrees with Capstone on both instructions.

* Fix emit-tests and vpermi2b

This test for vpmullq had what we have now determined is the wrong order
for src1 and src2.

There were no emit-tests for vpsraq, so I added one.

The vpermi2b tests used the wrong form of the Inst enum, judging by the
assertions that are in x64_get_operands (which is not exercised by emit
tests) and the fact that we never use that form for that instruction
anywhere else.

Pretty-printing vpermi2b disagreed with Capstone in the same way as
vpsraq and vpmullq. I've fixed that form to agree with Capstone as well,
aside from the duplicated src1/dst operand which are required to be
different before register allocation and equal afterward.
pull/8523/head
Jamey Sharp 6 months ago
committed by GitHub
parent
commit
c66c87411f
No known key found for this signature in database GPG Key ID: B5690EEEBB952194
  1. 50
      cranelift/codegen/src/isa/x64/inst/emit_tests.rs
  2. 6
      cranelift/codegen/src/isa/x64/inst/mod.rs
  3. 4
      cranelift/filetests/filetests/isa/x64/shuffle-avx512.clif
  4. 33
      cranelift/filetests/filetests/isa/x64/simd-i64x2-mul-avx512.clif
  5. 23
      cranelift/filetests/filetests/isa/x64/simd-i64x2-shift-avx512.clif

50
cranelift/codegen/src/isa/x64/inst/emit_tests.rs

@ -61,6 +61,7 @@ impl Inst {
}
fn xmm_rm_r_evex(op: Avx512Opcode, src1: Reg, src2: RegMem, dst: Writable<Reg>) -> Self {
debug_assert_ne!(op, Avx512Opcode::Vpermi2b);
src2.assert_regclass_is(RegClass::Float);
debug_assert!(src1.class() == RegClass::Float);
debug_assert!(dst.to_reg().class() == RegClass::Float);
@ -72,6 +73,27 @@ impl Inst {
}
}
fn xmm_rm_r_evex3(
op: Avx512Opcode,
src1: Reg,
src2: Reg,
src3: RegMem,
dst: Writable<Reg>,
) -> Self {
debug_assert_eq!(op, Avx512Opcode::Vpermi2b);
src3.assert_regclass_is(RegClass::Float);
debug_assert!(src1.class() == RegClass::Float);
debug_assert!(src2.class() == RegClass::Float);
debug_assert!(dst.to_reg().class() == RegClass::Float);
Inst::XmmRmREvex3 {
op,
src1: Xmm::new(src1).unwrap(),
src2: Xmm::new(src2).unwrap(),
src3: XmmMem::new(src3).unwrap(),
dst: WritableXmm::from_writable_reg(dst).unwrap(),
}
}
// TODO Can be replaced by `Inst::move` (high-level) and `Inst::unary_rm_r` (low-level)
fn xmm_mov(op: SseOpcode, src: RegMem, dst: Writable<Reg>) -> Inst {
src.assert_regclass_is(RegClass::Float);
@ -4189,19 +4211,37 @@ fn test_x64_emit() {
insns.push((
Inst::xmm_rm_r_evex(Avx512Opcode::Vpmullq, xmm10, RegMem::reg(xmm14), w_xmm1),
"62D2AD0840CE",
"vpmullq %xmm10, %xmm14, %xmm1",
"vpmullq %xmm14, %xmm10, %xmm1",
));
insns.push((
Inst::xmm_rm_r_evex(Avx512Opcode::Vpermi2b, xmm10, RegMem::reg(xmm14), w_xmm1),
Inst::xmm_rm_r_evex(Avx512Opcode::Vpsraq, xmm10, RegMem::reg(xmm14), w_xmm1),
"62D1AD08E2CE",
"vpsraq %xmm14, %xmm10, %xmm1",
));
insns.push((
Inst::xmm_rm_r_evex3(
Avx512Opcode::Vpermi2b,
xmm1,
xmm10,
RegMem::reg(xmm14),
w_xmm1,
),
"62D22D0875CE",
"vpermi2b %xmm10, %xmm14, %xmm1",
"vpermi2b %xmm14, %xmm10, %xmm1, %xmm1",
));
insns.push((
Inst::xmm_rm_r_evex(Avx512Opcode::Vpermi2b, xmm0, RegMem::reg(xmm1), w_xmm2),
Inst::xmm_rm_r_evex3(
Avx512Opcode::Vpermi2b,
xmm2,
xmm0,
RegMem::reg(xmm1),
w_xmm2,
),
"62F27D0875D1",
"vpermi2b %xmm0, %xmm1, %xmm2",
"vpermi2b %xmm1, %xmm0, %xmm2, %xmm2",
));
insns.push((

6
cranelift/codegen/src/isa/x64/inst/mod.rs

@ -1179,11 +1179,11 @@ impl PrettyPrint for Inst {
dst,
..
} => {
let dst = pretty_print_reg(dst.to_reg().to_reg(), 8, allocs);
let src1 = pretty_print_reg(src1.to_reg(), 8, allocs);
let src2 = src2.pretty_print(8, allocs);
let dst = pretty_print_reg(dst.to_reg().to_reg(), 8, allocs);
let op = ljustify(op.to_string());
format!("{op} {src1}, {src2}, {dst}")
format!("{op} {src2}, {src1}, {dst}")
}
Inst::XmmRmREvex3 {
@ -1199,7 +1199,7 @@ impl PrettyPrint for Inst {
let src3 = src3.pretty_print(8, allocs);
let dst = pretty_print_reg(dst.to_reg().to_reg(), 8, allocs);
let op = ljustify(op.to_string());
format!("{op} {src1}, {src2}, {src3}, {dst}")
format!("{op} {src3}, {src2}, {src1}, {dst}")
}
Inst::XmmMinMaxSeq {

4
cranelift/filetests/filetests/isa/x64/shuffle-avx512.clif

@ -15,7 +15,7 @@ block0(v0: i8x16, v1: i8x16):
; movdqa %xmm0, %xmm5
; movdqu const(0), %xmm0
; movdqa %xmm5, %xmm6
; vpermi2b %xmm0, %xmm6, %xmm1, %xmm0
; vpermi2b %xmm1, %xmm6, %xmm0, %xmm0
; movq %rbp, %rsp
; popq %rbp
; ret
@ -54,7 +54,7 @@ block0(v0: i8x16, v1: i8x16):
; movdqa %xmm0, %xmm5
; movdqu const(0), %xmm0
; movdqa %xmm5, %xmm6
; vpermi2b %xmm0, %xmm6, %xmm1, %xmm0
; vpermi2b %xmm1, %xmm6, %xmm0, %xmm0
; movq %rbp, %rsp
; popq %rbp
; ret

33
cranelift/filetests/filetests/isa/x64/simd-i64x2-mul-avx512.clif

@ -0,0 +1,33 @@
test compile precise-output
target x86_64 sse42 has_avx has_avx2 has_avx512dq has_avx512vl
function %imul(i64x2, i64x2) -> i64x2, i64x2 {
block0(v0: i64x2, v1: i64x2):
;; Force register allocation to pick a different destination than
;; source for at least one of these instructions.
v2 = imul v0, v1
v3 = imul v2, v1
return v2, v3
}
; VCode:
; pushq %rbp
; movq %rsp, %rbp
; block0:
; vpmullq %xmm1, %xmm0, %xmm0
; vpmullq %xmm1, %xmm0, %xmm1
; movq %rbp, %rsp
; popq %rbp
; ret
;
; Disassembled:
; block0: ; offset 0x0
; pushq %rbp
; movq %rsp, %rbp
; block1: ; offset 0x4
; vpmullq %xmm1, %xmm0, %xmm0
; vpmullq %xmm1, %xmm0, %xmm1
; movq %rbp, %rsp
; popq %rbp
; retq

23
cranelift/filetests/filetests/isa/x64/simd-i64x2-shift-avx512.clif

@ -1,19 +1,26 @@
test compile precise-output
target x86_64 sse42 has_avx has_avx2 has_avx512f has_avx512vl
function %sshr(i64x2, i64) -> i64x2 {
function %sshr(i64x2, i64) -> i64x2, i64x2 {
block0(v0: i64x2, v1: i64):
;; Force register allocation to pick a different destination than
;; source for at least one of these instructions.
v2 = sshr v0, v1
return v2
v3 = sshr v2, v1
return v2, v3
}
; VCode:
; pushq %rbp
; movq %rsp, %rbp
; block0:
; movq %rdi, %r9
; andq %r9, $63, %r9
; vmovd %r9d, %xmm1
; vpsraq %xmm1, %xmm0, %xmm0
; andq %rdi, $63, %rdi
; vmovd %edi, %xmm5
; vpsraq %xmm5, %xmm0, %xmm0
; vmovd %edi, %xmm1
; vpsraq %xmm1, %xmm0, %xmm1
; movq %rbp, %rsp
; popq %rbp
; ret
@ -23,9 +30,13 @@ block0(v0: i64x2, v1: i64):
; pushq %rbp
; movq %rsp, %rbp
; block1: ; offset 0x4
; movq %rdi, %r9
; andq $0x3f, %r9
; vmovd %r9d, %xmm1
; vpsraq %xmm1, %xmm0, %xmm0
; andq $0x3f, %rdi
; vmovd %edi, %xmm5
; vpsraq %xmm5, %xmm0, %xmm0
; vmovd %edi, %xmm1
; vpsraq %xmm1, %xmm0, %xmm1
; movq %rbp, %rsp
; popq %rbp
; retq

Loading…
Cancel
Save