From 615a575da11da798f0f58645584863bcb4c1a731 Mon Sep 17 00:00:00 2001 From: Johnnie Birch <45402135+jlb6740@users.noreply.github.com> Date: Sun, 22 Nov 2020 20:23:00 -0800 Subject: [PATCH] Add support for x86_64 packed move lowering for the vcode backend --- cranelift/codegen/src/isa/x64/inst/args.rs | 36 +++++++++ cranelift/codegen/src/isa/x64/inst/emit.rs | 12 +++ .../codegen/src/isa/x64/inst/emit_tests.rs | 75 +++++++++++++++++++ 3 files changed, 123 insertions(+) diff --git a/cranelift/codegen/src/isa/x64/inst/args.rs b/cranelift/codegen/src/isa/x64/inst/args.rs index e992288560..2958dd56d0 100644 --- a/cranelift/codegen/src/isa/x64/inst/args.rs +++ b/cranelift/codegen/src/isa/x64/inst/args.rs @@ -507,6 +507,18 @@ pub enum SseOpcode { Pminuw, Pminud, Pmovmskb, + Pmovsxbd, + Pmovsxbw, + Pmovsxbq, + Pmovsxwd, + Pmovsxwq, + Pmovsxdq, + Pmovzxbd, + Pmovzxbw, + Pmovzxbq, + Pmovzxwd, + Pmovzxwq, + Pmovzxdq, Pmulld, Pmullw, Pmuludq, @@ -692,6 +704,18 @@ impl SseOpcode { | SseOpcode::Pminsd | SseOpcode::Pminuw | SseOpcode::Pminud + | SseOpcode::Pmovsxbd + | SseOpcode::Pmovsxbw + | SseOpcode::Pmovsxbq + | SseOpcode::Pmovsxwd + | SseOpcode::Pmovsxwq + | SseOpcode::Pmovsxdq + | SseOpcode::Pmovzxbd + | SseOpcode::Pmovzxbw + | SseOpcode::Pmovzxbq + | SseOpcode::Pmovzxwd + | SseOpcode::Pmovzxwq + | SseOpcode::Pmovzxdq | SseOpcode::Pmulld | SseOpcode::Ptest | SseOpcode::Roundss @@ -812,6 +836,18 @@ impl fmt::Debug for SseOpcode { SseOpcode::Pminuw => "pminuw", SseOpcode::Pminud => "pminud", SseOpcode::Pmovmskb => "pmovmskb", + SseOpcode::Pmovsxbd => "pmovsxbd", + SseOpcode::Pmovsxbw => "pmovsxbw", + SseOpcode::Pmovsxbq => "pmovsxbq", + SseOpcode::Pmovsxwd => "pmovsxwd", + SseOpcode::Pmovsxwq => "pmovsxwq", + SseOpcode::Pmovsxdq => "pmovsxdq", + SseOpcode::Pmovzxbd => "pmovzxbd", + SseOpcode::Pmovzxbw => "pmovzxbw", + SseOpcode::Pmovzxbq => "pmovzxbq", + SseOpcode::Pmovzxwd => "pmovzxwd", + SseOpcode::Pmovzxwq => "pmovzxwq", + SseOpcode::Pmovzxdq => "pmovzxdq", SseOpcode::Pmulld => "pmulld", SseOpcode::Pmullw => "pmullw", SseOpcode::Pmuludq => "pmuludq", diff --git a/cranelift/codegen/src/isa/x64/inst/emit.rs b/cranelift/codegen/src/isa/x64/inst/emit.rs index 7d15063ad4..7d04e0e800 100644 --- a/cranelift/codegen/src/isa/x64/inst/emit.rs +++ b/cranelift/codegen/src/isa/x64/inst/emit.rs @@ -1802,6 +1802,18 @@ pub(crate) fn emit( SseOpcode::Pcmpgtw => (LegacyPrefixes::_66, 0x0F65, 2), SseOpcode::Pcmpgtd => (LegacyPrefixes::_66, 0x0F66, 2), SseOpcode::Pcmpgtq => (LegacyPrefixes::_66, 0x0F3837, 3), + SseOpcode::Pmovsxbd => (LegacyPrefixes::_66, 0x0F3821, 3), + SseOpcode::Pmovsxbw => (LegacyPrefixes::_66, 0x0F3820, 3), + SseOpcode::Pmovsxbq => (LegacyPrefixes::_66, 0x0F3822, 3), + SseOpcode::Pmovsxwd => (LegacyPrefixes::_66, 0x0F3823, 3), + SseOpcode::Pmovsxwq => (LegacyPrefixes::_66, 0x0F3824, 3), + SseOpcode::Pmovsxdq => (LegacyPrefixes::_66, 0x0F3825, 3), + SseOpcode::Pmovzxbd => (LegacyPrefixes::_66, 0x0F3831, 3), + SseOpcode::Pmovzxbw => (LegacyPrefixes::_66, 0x0F3830, 3), + SseOpcode::Pmovzxbq => (LegacyPrefixes::_66, 0x0F3832, 3), + SseOpcode::Pmovzxwd => (LegacyPrefixes::_66, 0x0F3833, 3), + SseOpcode::Pmovzxwq => (LegacyPrefixes::_66, 0x0F3834, 3), + SseOpcode::Pmovzxdq => (LegacyPrefixes::_66, 0x0F3835, 3), SseOpcode::Pmaxsb => (LegacyPrefixes::_66, 0x0F383C, 3), SseOpcode::Pmaxsw => (LegacyPrefixes::_66, 0x0FEE, 2), SseOpcode::Pmaxsd => (LegacyPrefixes::_66, 0x0F383D, 3), diff --git a/cranelift/codegen/src/isa/x64/inst/emit_tests.rs b/cranelift/codegen/src/isa/x64/inst/emit_tests.rs index 6c2fe6f2d4..bfcd7a401f 100644 --- a/cranelift/codegen/src/isa/x64/inst/emit_tests.rs +++ b/cranelift/codegen/src/isa/x64/inst/emit_tests.rs @@ -3183,6 +3183,81 @@ fn test_x64_emit() { "cvttps2dq %xmm9, %xmm8", )); + // ======================================================== + // XMM_RM_R: Packed Move + + insns.push(( + Inst::xmm_rm_r(SseOpcode::Pmovsxbd, RegMem::reg(xmm6), w_xmm8), + "66440F3821C6", + "pmovsxbd %xmm6, %xmm8", + )); + + insns.push(( + Inst::xmm_rm_r(SseOpcode::Pmovsxbw, RegMem::reg(xmm9), w_xmm10), + "66450F3820D1", + "pmovsxbw %xmm9, %xmm10", + )); + + insns.push(( + Inst::xmm_rm_r(SseOpcode::Pmovsxbq, RegMem::reg(xmm1), w_xmm1), + "660F3822C9", + "pmovsxbq %xmm1, %xmm1", + )); + + insns.push(( + Inst::xmm_rm_r(SseOpcode::Pmovsxwd, RegMem::reg(xmm13), w_xmm10), + "66450F3823D5", + "pmovsxwd %xmm13, %xmm10", + )); + + insns.push(( + Inst::xmm_rm_r(SseOpcode::Pmovsxwq, RegMem::reg(xmm12), w_xmm12), + "66450F3824E4", + "pmovsxwq %xmm12, %xmm12", + )); + + insns.push(( + Inst::xmm_rm_r(SseOpcode::Pmovsxdq, RegMem::reg(xmm10), w_xmm8), + "66450F3825C2", + "pmovsxdq %xmm10, %xmm8", + )); + + insns.push(( + Inst::xmm_rm_r(SseOpcode::Pmovzxbd, RegMem::reg(xmm5), w_xmm6), + "660F3831F5", + "pmovzxbd %xmm5, %xmm6", + )); + + insns.push(( + Inst::xmm_rm_r(SseOpcode::Pmovzxbw, RegMem::reg(xmm5), w_xmm13), + "66440F3830ED", + "pmovzxbw %xmm5, %xmm13", + )); + + insns.push(( + Inst::xmm_rm_r(SseOpcode::Pmovzxbq, RegMem::reg(xmm10), w_xmm11), + "66450F3832DA", + "pmovzxbq %xmm10, %xmm11", + )); + + insns.push(( + Inst::xmm_rm_r(SseOpcode::Pmovzxwd, RegMem::reg(xmm2), w_xmm10), + "66440F3833D2", + "pmovzxwd %xmm2, %xmm10", + )); + + insns.push(( + Inst::xmm_rm_r(SseOpcode::Pmovzxwq, RegMem::reg(xmm7), w_xmm4), + "660F3834E7", + "pmovzxwq %xmm7, %xmm4", + )); + + insns.push(( + Inst::xmm_rm_r(SseOpcode::Pmovzxdq, RegMem::reg(xmm3), w_xmm4), + "660F3835E3", + "pmovzxdq %xmm3, %xmm4", + )); + // XMM_Mov_R_M: float stores insns.push(( Inst::xmm_mov_r_m(SseOpcode::Movss, xmm15, Amode::imm_reg(128, r12)),