Browse Source

x64: Refactor and fill out some gpr-vs-xmm bits (#6058)

* x64: Add instruction helpers for `mov{d,q}`

These will soon grow AVX-equivalents so move them to instruction helpers
to have clauses for AVX in the future.

* x64: Don't auto-convert between RegMemImm and XmmMemImm

The previous conversion, `mov_rmi_to_xmm`, would move from GPR registers
to XMM registers which isn't what many of the other `convert` statements
between these newtypes do. This seemed like a possible footgun so I've
removed the auto-conversion and added an explicit helper to go from a
`u32` to an `XmmMemImm`.

* x64: Add AVX encodings of some more GPR-related insns

This commit adds some more support for AVX instructions where GPRs are
in use mixed in with XMM registers. This required a few more variants of
`Inst` to handle the new instructions.

* Fix vpmovmskb encoding

* Fix xmm-to-gpr encoding of vmovd/vmovq

* Fix typo

* Fix rebase conflict

* Fix rebase conflict with tests
pull/5934/head
Alex Crichton 2 years ago
committed by GitHub
parent
commit
2fde25311e
No known key found for this signature in database GPG Key ID: 4AEE18F83AFDEB23
  1. 110
      cranelift/codegen/src/isa/x64/inst.isle
  2. 9
      cranelift/codegen/src/isa/x64/inst/args.rs
  3. 83
      cranelift/codegen/src/isa/x64/inst/emit.rs
  4. 30
      cranelift/codegen/src/isa/x64/inst/mod.rs
  5. 38
      cranelift/codegen/src/isa/x64/lower.isle
  6. 4
      cranelift/codegen/src/isa/x64/lower/isle.rs
  7. 104
      cranelift/filetests/filetests/isa/x64/fcvt-avx.clif
  8. 104
      cranelift/filetests/filetests/isa/x64/float-bitcast-avx.clif
  9. 104
      cranelift/filetests/filetests/isa/x64/float-bitcast.clif
  10. 48
      cranelift/filetests/filetests/isa/x64/simd-arith-avx.clif
  11. 4
      cranelift/filetests/filetests/isa/x64/simd-bitwise-avx.clif
  12. 16
      cranelift/filetests/filetests/isa/x64/simd-splat-avx.clif
  13. 16
      cranelift/filetests/filetests/isa/x64/simd-splat-avx2.clif
  14. 108
      cranelift/filetests/filetests/isa/x64/vhigh_bits-avx.clif

110
cranelift/codegen/src/isa/x64/inst.isle

@ -320,6 +320,21 @@
(dst WritableGpr)
(imm u8))
;; XMM (scalar) unary op (from integer to float reg): vmovd, vmovq,
;; vcvtsi2s{s,d}
(GprToXmmVex (op AvxOpcode)
(src GprMem)
(dst WritableXmm)
(src_size OperandSize))
;; XMM (scalar) unary op (from xmm to integer reg): vmovd, vmovq,
;; vcvtts{s,d}2si
(XmmToGprVex (op AvxOpcode)
(src Xmm)
(dst WritableGpr)
(dst_size OperandSize))
;; XMM (scalar or vector) binary op that relies on the EVEX
;; prefix. Takes two inputs.
(XmmRmREvex (op Avx512Opcode)
@ -1277,6 +1292,13 @@
Vpbroadcastw
Vpbroadcastd
Vbroadcastss
Vmovd
Vmovq
Vmovmskps
Vmovmskpd
Vpmovmskb
Vcvtsi2ss
Vcvtsi2sd
))
(type Avx512Opcode extern
@ -1539,6 +1561,10 @@
(decl lo_gpr (Value) Gpr)
(rule (lo_gpr regs) (gpr_new (lo_reg regs)))
;; Construct a new `XmmMemImm` from a 32-bit immediate.
(decl xmi_imm (u32) XmmMemImm)
(extern constructor xmi_imm xmi_imm)
;;;; Helpers for Working With Integer Comparison Codes ;;;;;;;;;;;;;;;;;;;;;;;;;
;;
@ -1818,10 +1844,7 @@
(decl mov_rmi_to_xmm (RegMemImm) XmmMemImm)
(rule (mov_rmi_to_xmm rmi @ (RegMemImm.Mem _)) (xmm_mem_imm_new rmi))
(rule (mov_rmi_to_xmm rmi @ (RegMemImm.Imm _)) (xmm_mem_imm_new rmi))
(rule (mov_rmi_to_xmm (RegMemImm.Reg r))
(gpr_to_xmm (SseOpcode.Movd)
r
(OperandSize.Size32)))
(rule (mov_rmi_to_xmm (RegMemImm.Reg r)) (x64_movd_to_xmm r))
;;;; Helpers for Emitting Calls ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
@ -1941,9 +1964,37 @@
(if-let $true (use_avx_simd))
(xmm_movrm_vex (AvxOpcode.Vmovupd) addr data))
(decl x64_movd (Xmm) Gpr)
(rule (x64_movd from)
;; Helper for creating `movd` instructions.
(decl x64_movd_to_gpr (Xmm) Gpr)
(rule (x64_movd_to_gpr from)
(xmm_to_gpr (SseOpcode.Movd) from (OperandSize.Size32)))
(rule 1 (x64_movd_to_gpr from)
(if-let $true (use_avx_simd))
(xmm_to_gpr_vex (AvxOpcode.Vmovd) from (OperandSize.Size32)))
;; Helper for creating `movd` instructions.
(decl x64_movd_to_xmm (GprMem) Xmm)
(rule (x64_movd_to_xmm from)
(gpr_to_xmm (SseOpcode.Movd) from (OperandSize.Size32)))
(rule 1 (x64_movd_to_xmm from)
(if-let $true (use_avx_simd))
(gpr_to_xmm_vex (AvxOpcode.Vmovd) from (OperandSize.Size32)))
;; Helper for creating `movq` instructions.
(decl x64_movq_to_xmm (GprMem) Xmm)
(rule (x64_movq_to_xmm src)
(gpr_to_xmm (SseOpcode.Movq) src (OperandSize.Size64)))
(rule 1 (x64_movq_to_xmm from)
(if-let $true (use_avx_simd))
(gpr_to_xmm_vex (AvxOpcode.Vmovq) from (OperandSize.Size64)))
;; Helper for creating `movq` instructions.
(decl x64_movq_to_gpr (Xmm) Gpr)
(rule (x64_movq_to_gpr src)
(xmm_to_gpr (SseOpcode.Movq) src (OperandSize.Size64)))
(rule 1 (x64_movq_to_gpr from)
(if-let $true (use_avx_simd))
(xmm_to_gpr_vex (AvxOpcode.Vmovq) from (OperandSize.Size64)))
(decl x64_movdqu_load (XmmMem) Xmm)
(rule (x64_movdqu_load from)
@ -2186,15 +2237,11 @@
;; `f32` immediates.
(rule 2 (imm $F32 (u64_nonzero bits))
(gpr_to_xmm (SseOpcode.Movd)
(imm $I32 bits)
(OperandSize.Size32)))
(x64_movd_to_xmm (imm $I32 bits)))
;; `f64` immediates.
(rule 2 (imm $F64 (u64_nonzero bits))
(gpr_to_xmm (SseOpcode.Movq)
(imm $I64 bits)
(OperandSize.Size64)))
(x64_movq_to_xmm (imm $I64 bits)))
;; Special case for when a 64-bit immediate fits into 32-bits. We can use a
;; 32-bit move that zero-extends the value, which has a smaller encoding.
@ -3663,20 +3710,44 @@
(_ Unit (emit (MInst.XmmToGprImmVex op src dst imm))))
dst))
;; Helper for creating `MInst.XmmToGprVex` instructions.
(decl xmm_to_gpr_vex (AvxOpcode Xmm OperandSize) Gpr)
(rule (xmm_to_gpr_vex op src size)
(let ((dst WritableGpr (temp_writable_gpr))
(_ Unit (emit (MInst.XmmToGprVex op src dst size))))
dst))
;; Helper for creating `MInst.GprToXmmVex` instructions.
(decl gpr_to_xmm_vex (AvxOpcode GprMem OperandSize) Xmm)
(rule (gpr_to_xmm_vex op src size)
(let ((dst WritableXmm (temp_writable_xmm))
(_ Unit (emit (MInst.GprToXmmVex op src dst size))))
dst))
;; Helper for creating `pmovmskb` instructions.
(decl x64_pmovmskb (OperandSize Xmm) Gpr)
(rule (x64_pmovmskb size src)
(xmm_to_gpr (SseOpcode.Pmovmskb) src size))
(rule 1 (x64_pmovmskb size src)
(if-let $true (use_avx_simd))
(xmm_to_gpr_vex (AvxOpcode.Vpmovmskb) src size))
;; Helper for creating `movmskps` instructions.
(decl x64_movmskps (OperandSize Xmm) Gpr)
(rule (x64_movmskps size src)
(xmm_to_gpr (SseOpcode.Movmskps) src size))
(rule 1 (x64_movmskps size src)
(if-let $true (use_avx_simd))
(xmm_to_gpr_vex (AvxOpcode.Vmovmskps) src size))
;; Helper for creating `movmskpd` instructions.
(decl x64_movmskpd (OperandSize Xmm) Gpr)
(rule (x64_movmskpd size src)
(xmm_to_gpr (SseOpcode.Movmskpd) src size))
(rule 1 (x64_movmskpd size src)
(if-let $true (use_avx_simd))
(xmm_to_gpr_vex (AvxOpcode.Vmovmskpd) src size))
;; Helper for creating `MInst.GprToXmm` instructions.
(decl gpr_to_xmm (SseOpcode GprMem OperandSize) Xmm)
@ -3973,11 +4044,17 @@
(decl x64_cvtsi2ss (Type GprMem) Xmm)
(rule (x64_cvtsi2ss ty x)
(gpr_to_xmm (SseOpcode.Cvtsi2ss) x (raw_operand_size_of_type ty)))
(rule 1 (x64_cvtsi2ss ty x)
(if-let $true (use_avx_simd))
(gpr_to_xmm_vex (AvxOpcode.Vcvtsi2ss) x (raw_operand_size_of_type ty)))
;; Helper for creating `cvtsi2sd` instructions.
(decl x64_cvtsi2sd (Type GprMem) Xmm)
(rule (x64_cvtsi2sd ty x)
(gpr_to_xmm (SseOpcode.Cvtsi2sd) x (raw_operand_size_of_type ty)))
(rule 1 (x64_cvtsi2sd ty x)
(if-let $true (use_avx_simd))
(gpr_to_xmm_vex (AvxOpcode.Vcvtsi2sd) x (raw_operand_size_of_type ty)))
;; Helper for creating `cvttps2dq` instructions.
(decl x64_cvttps2dq (XmmMem) Xmm)
@ -4486,15 +4563,15 @@
(decl bitcast_xmm_to_gpr (Type Xmm) Gpr)
(rule (bitcast_xmm_to_gpr $F32 src)
(xmm_to_gpr (SseOpcode.Movd) src (OperandSize.Size32)))
(x64_movd_to_gpr src))
(rule (bitcast_xmm_to_gpr $F64 src)
(xmm_to_gpr (SseOpcode.Movq) src (OperandSize.Size64)))
(x64_movq_to_gpr src))
(decl bitcast_gpr_to_xmm (Type Gpr) Xmm)
(rule (bitcast_gpr_to_xmm $I32 src)
(gpr_to_xmm (SseOpcode.Movd) src (OperandSize.Size32)))
(x64_movd_to_xmm src))
(rule (bitcast_gpr_to_xmm $I64 src)
(gpr_to_xmm (SseOpcode.Movq) src (OperandSize.Size64)))
(x64_movq_to_xmm src))
;;;; Stack Addresses ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
@ -4678,7 +4755,6 @@
(convert Reg XmmMem reg_to_xmm_mem)
(convert Reg RegMemImm reg_to_reg_mem_imm)
(convert RegMem XmmMem reg_mem_to_xmm_mem)
(convert RegMemImm XmmMemImm mov_rmi_to_xmm)
(convert Xmm XmmMem xmm_to_xmm_mem)
(convert Xmm XmmMemImm xmm_to_xmm_mem_imm)
(convert Xmm XmmMemAligned xmm_to_xmm_mem_aligned)

9
cranelift/codegen/src/isa/x64/inst/args.rs

@ -1715,7 +1715,14 @@ impl AvxOpcode {
| AvxOpcode::Vpextrq
| AvxOpcode::Vpblendw
| AvxOpcode::Vmovddup
| AvxOpcode::Vbroadcastss => {
| AvxOpcode::Vbroadcastss
| AvxOpcode::Vmovd
| AvxOpcode::Vmovq
| AvxOpcode::Vmovmskps
| AvxOpcode::Vmovmskpd
| AvxOpcode::Vpmovmskb
| AvxOpcode::Vcvtsi2ss
| AvxOpcode::Vcvtsi2sd => {
smallvec![InstructionSet::AVX]
}

83
cranelift/codegen/src/isa/x64/inst/emit.rs

@ -2515,6 +2515,89 @@ pub(crate) fn emit(
.encode(sink);
}
Inst::XmmToGprVex {
op,
src,
dst,
dst_size,
} => {
let src = allocs.next(src.to_reg());
let dst = allocs.next(dst.to_reg().to_reg());
let (prefix, map, opcode) = match op {
// vmovd/vmovq are differentiated by `w`
AvxOpcode::Vmovd | AvxOpcode::Vmovq => (LegacyPrefixes::_66, OpcodeMap::_0F, 0x7E),
AvxOpcode::Vmovmskps => (LegacyPrefixes::None, OpcodeMap::_0F, 0x50),
AvxOpcode::Vmovmskpd => (LegacyPrefixes::_66, OpcodeMap::_0F, 0x50),
AvxOpcode::Vpmovmskb => (LegacyPrefixes::_66, OpcodeMap::_0F, 0xD7),
_ => unimplemented!("Opcode {:?} not implemented", op),
};
let w = match dst_size {
OperandSize::Size64 => true,
_ => false,
};
let mut vex = VexInstruction::new()
.length(VexVectorLength::V128)
.w(w)
.prefix(prefix)
.map(map)
.opcode(opcode);
vex = match op {
// The `vmovq/vmovd` reverse the order of the destination/source
// relative to other opcodes using this shape of instruction.
AvxOpcode::Vmovd | AvxOpcode::Vmovq => vex
.rm(dst.to_real_reg().unwrap().hw_enc())
.reg(src.to_real_reg().unwrap().hw_enc()),
_ => vex
.rm(src.to_real_reg().unwrap().hw_enc())
.reg(dst.to_real_reg().unwrap().hw_enc()),
};
vex.encode(sink);
}
Inst::GprToXmmVex {
op,
src,
dst,
src_size,
} => {
let dst = allocs.next(dst.to_reg().to_reg());
let src = match src.clone().to_reg_mem().with_allocs(allocs) {
RegMem::Reg { reg } => {
RegisterOrAmode::Register(reg.to_real_reg().unwrap().hw_enc().into())
}
RegMem::Mem { addr } => RegisterOrAmode::Amode(addr.finalize(state, sink)),
};
let (prefix, map, opcode) = match op {
// vmovd/vmovq are differentiated by `w`
AvxOpcode::Vmovd | AvxOpcode::Vmovq => (LegacyPrefixes::_66, OpcodeMap::_0F, 0x6E),
AvxOpcode::Vcvtsi2ss => (LegacyPrefixes::_F3, OpcodeMap::_0F, 0x2A),
AvxOpcode::Vcvtsi2sd => (LegacyPrefixes::_F2, OpcodeMap::_0F, 0x2A),
_ => unimplemented!("Opcode {:?} not implemented", op),
};
let w = match src_size {
OperandSize::Size64 => true,
_ => false,
};
let mut insn = VexInstruction::new()
.length(VexVectorLength::V128)
.w(w)
.prefix(prefix)
.map(map)
.opcode(opcode)
.rm(src)
.reg(dst.to_real_reg().unwrap().hw_enc());
// These opcodes technically take a second operand which is the
// upper bits to preserve during the float conversion. We don't
// actually use this in this backend right now so reuse the
// destination register. This at least matches what LLVM does.
if let AvxOpcode::Vcvtsi2ss | AvxOpcode::Vcvtsi2sd = op {
insn = insn.vvvv(dst.to_real_reg().unwrap().hw_enc());
}
insn.encode(sink);
}
Inst::XmmRmREvex {
op,
src1,

30
cranelift/codegen/src/isa/x64/inst/mod.rs

@ -158,7 +158,9 @@ impl Inst {
| Inst::XmmUnaryRmRImmVex { op, .. }
| Inst::XmmMovRMVex { op, .. }
| Inst::XmmMovRMImmVex { op, .. }
| Inst::XmmToGprImmVex { op, .. } => op.available_from(),
| Inst::XmmToGprImmVex { op, .. }
| Inst::XmmToGprVex { op, .. }
| Inst::GprToXmmVex { op, .. } => op.available_from(),
}
}
}
@ -1202,6 +1204,18 @@ impl PrettyPrint for Inst {
format!("{} {}, {}", ljustify(op.to_string()), src, dst)
}
Inst::XmmToGprVex {
op,
src,
dst,
dst_size,
} => {
let dst_size = dst_size.to_bytes();
let src = pretty_print_reg(src.to_reg(), 8, allocs);
let dst = pretty_print_reg(dst.to_reg().to_reg(), dst_size, allocs);
format!("{} {src}, {dst}", ljustify(op.to_string()))
}
Inst::XmmToGprImm { op, src, dst, imm } => {
let src = pretty_print_reg(src.to_reg(), 8, allocs);
let dst = pretty_print_reg(dst.to_reg().to_reg(), 8, allocs);
@ -1225,6 +1239,17 @@ impl PrettyPrint for Inst {
format!("{} {}, {}", ljustify(op.to_string()), src, dst)
}
Inst::GprToXmmVex {
op,
src,
src_size,
dst,
} => {
let dst = pretty_print_reg(dst.to_reg().to_reg(), 8, allocs);
let src = src.pretty_print(src_size.to_bytes(), allocs);
format!("{} {src}, {dst}", ljustify(op.to_string()))
}
Inst::XmmCmpRmR { op, src, dst } => {
let dst = pretty_print_reg(dst.to_reg(), 8, allocs);
let src = src.pretty_print(8, allocs);
@ -2082,12 +2107,13 @@ fn x64_get_operands<F: Fn(VReg) -> VReg>(inst: &Inst, collector: &mut OperandCol
collector.reg_fixed_nonallocatable(*dst);
}
Inst::XmmToGpr { src, dst, .. }
| Inst::XmmToGprVex { src, dst, .. }
| Inst::XmmToGprImm { src, dst, .. }
| Inst::XmmToGprImmVex { src, dst, .. } => {
collector.reg_use(src.to_reg());
collector.reg_def(dst.to_writable_reg());
}
Inst::GprToXmm { src, dst, .. } => {
Inst::GprToXmm { src, dst, .. } | Inst::GprToXmmVex { src, dst, .. } => {
collector.reg_def(dst.to_writable_reg());
src.get_operands(collector);
}

38
cranelift/codegen/src/isa/x64/lower.isle

@ -883,17 +883,17 @@
(let ((a0 Xmm a)
(b0 Xmm b)
;; a_hi = A >> 32
(a_hi Xmm (x64_psrlq a0 (RegMemImm.Imm 32)))
(a_hi Xmm (x64_psrlq a0 (xmi_imm 32)))
;; ah_bl = Ah * Bl
(ah_bl Xmm (x64_pmuludq a_hi b0))
;; b_hi = B >> 32
(b_hi Xmm (x64_psrlq b0 (RegMemImm.Imm 32)))
(b_hi Xmm (x64_psrlq b0 (xmi_imm 32)))
;; al_bh = Al * Bh
(al_bh Xmm (x64_pmuludq a0 b_hi))
;; aa_bb = ah_bl + al_bh
(aa_bb Xmm (x64_paddq ah_bl al_bh))
;; aa_bb_shifted = aa_bb << 32
(aa_bb_shifted Xmm (x64_psllq aa_bb (RegMemImm.Imm 32)))
(aa_bb_shifted Xmm (x64_psllq aa_bb (xmi_imm 32)))
;; al_bl = Al * Bl
(al_bl Xmm (x64_pmuludq a0 b0)))
;; al_bl + aa_bb_shifted
@ -1087,14 +1087,12 @@
;; Special case for `f32x4.abs`.
(rule (lower (has_type $F32X4 (fabs x)))
(x64_andps x
(x64_psrld (vector_all_ones)
(RegMemImm.Imm 1))))
(x64_psrld (vector_all_ones) (xmi_imm 1))))
;; Special case for `f64x2.abs`.
(rule (lower (has_type $F64X2 (fabs x)))
(x64_andpd x
(x64_psrlq (vector_all_ones)
(RegMemImm.Imm 1))))
(x64_psrlq (vector_all_ones) (xmi_imm 1))))
;;;; Rules for `fneg` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
@ -1106,13 +1104,11 @@
(rule (lower (has_type $F32X4 (fneg x)))
(x64_xorps x
(x64_pslld (vector_all_ones)
(RegMemImm.Imm 31))))
(x64_pslld (vector_all_ones) (xmi_imm 31))))
(rule (lower (has_type $F64X2 (fneg x)))
(x64_xorpd x
(x64_psllq (vector_all_ones)
(RegMemImm.Imm 63))))
(x64_psllq (vector_all_ones) (xmi_imm 63))))
;;;; Rules for `bmask` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
@ -1918,7 +1914,7 @@
;; Note that this is a 16x8 shift, but that's OK; we mask
;; off anything that traverses from one byte to the next
;; with the low_mask below.
(shifted_src Xmm (x64_psrlw src (RegMemImm.Imm 4)))
(shifted_src Xmm (x64_psrlw src (xmi_imm 4)))
(high_nibbles Xmm (sse_and $I8X16 shifted_src low_mask))
(lookup Xmm (x64_xmm_load_const $I8X16 (popcount_4bit_table)))
(bit_counts_low Xmm (x64_pshufb lookup low_nibbles))
@ -2237,7 +2233,7 @@
;; All-ones for NaN, shifted down to leave 10 top bits (1
;; sign, 8 exponent, 1 QNaN bit that must remain set)
;; cleared.
(nan_fraction_mask Xmm (x64_psrld is_nan_mask (RegMemImm.Imm 10)))
(nan_fraction_mask Xmm (x64_psrld is_nan_mask (xmi_imm 10)))
;; Do a NAND, so that we retain every bit not set in
;; `nan_fraction_mask`. This mask will be all zeroes (so
;; we retain every bit) in non-NaN cases, and will have
@ -2254,7 +2250,7 @@
(min_or Xmm (x64_orpd min1 min2))
(is_nan_mask Xmm (x64_cmppd min1 min2 (FcmpImm.Unordered)))
(min_or_2 Xmm (x64_orpd min_or is_nan_mask))
(nan_fraction_mask Xmm (x64_psrlq is_nan_mask (RegMemImm.Imm 13)))
(nan_fraction_mask Xmm (x64_psrlq is_nan_mask (xmi_imm 13)))
(final Xmm (x64_andnpd nan_fraction_mask min_or_2)))
final))
@ -2302,7 +2298,7 @@
;; All-ones for NaN, shifted down to leave 10 top bits (1
;; sign, 8 exponent, 1 QNaN bit that must remain set)
;; cleared.
(nan_fraction_mask Xmm (x64_psrld is_nan_mask (RegMemImm.Imm 10)))
(nan_fraction_mask Xmm (x64_psrld is_nan_mask (xmi_imm 10)))
;; Do a NAND, so that we retain every bit not set in
;; `nan_fraction_mask`. This mask will be all zeroes (so
;; we retain every bit) in non-NaN cases, and will have
@ -2346,7 +2342,7 @@
;; All-ones for NaN, shifted down to leave 13 top bits (1
;; sign, 11 exponent, 1 QNaN bit that must remain set)
;; cleared.
(nan_fraction_mask Xmm (x64_psrlq is_nan_mask (RegMemImm.Imm 13)))
(nan_fraction_mask Xmm (x64_psrlq is_nan_mask (xmi_imm 13)))
;; Do a NAND, so that we retain every bit not set in
;; `nan_fraction_mask`. This mask will be all zeroes (so
;; we retain every bit) in non-NaN cases, and will have
@ -3011,8 +3007,8 @@
(let ((a Xmm val)
;; get the low 16 bits
(a_lo Xmm (x64_pslld a (RegMemImm.Imm 16)))
(a_lo Xmm (x64_psrld a_lo (RegMemImm.Imm 16)))
(a_lo Xmm (x64_pslld a (xmi_imm 16)))
(a_lo Xmm (x64_psrld a_lo (xmi_imm 16)))
;; get the high 16 bits
(a_hi Xmm (x64_psubd a a_lo))
@ -3022,7 +3018,7 @@
;; shift the high bits by 1, convert, and double to get the correct
;; value
(a_hi Xmm (x64_psrld a_hi (RegMemImm.Imm 1)))
(a_hi Xmm (x64_psrld a_hi (xmi_imm 1)))
(a_hi Xmm (x64_cvtdq2ps a_hi))
(a_hi Xmm (x64_addps a_hi a_hi)))
@ -3060,7 +3056,7 @@
;; Set top bit only if < 0
(tmp Xmm (x64_pand dst tmp))
(tmp Xmm (x64_psrad tmp (RegMemImm.Imm 31))))
(tmp Xmm (x64_psrad tmp (xmi_imm 31))))
;; On overflow 0x80000000 is returned to a lane.
;; Below sets positive overflow lanes to 0x7FFFFFFF
@ -3130,7 +3126,7 @@
;; integer that it can represent. In the case of INT_MAX, this value gets
;; represented as 0x4f000000 which is the integer value (INT_MAX+1).
(tmp2 Xmm (x64_pcmpeqd tmp2 tmp2))
(tmp2 Xmm (x64_psrld tmp2 (RegMemImm.Imm 1)))
(tmp2 Xmm (x64_psrld tmp2 (xmi_imm 1)))
(tmp2 Xmm (x64_cvtdq2ps tmp2))
;; Make a copy of these lanes and then do the first conversion.

4
cranelift/codegen/src/isa/x64/lower/isle.rs

@ -1038,6 +1038,10 @@ impl Context for IsleContext<'_, '_, MInst, X64Backend> {
| bit(h, 7)?,
)
}
fn xmi_imm(&mut self, imm: u32) -> XmmMemImm {
XmmMemImm::new(RegMemImm::imm(imm)).unwrap()
}
}
impl IsleContext<'_, '_, MInst, X64Backend> {

104
cranelift/filetests/filetests/isa/x64/fcvt-avx.clif

@ -0,0 +1,104 @@
test compile precise-output
set enable_simd
target x86_64 has_avx
function %f3(i32) -> f32 {
block0(v0: i32):
v1 = fcvt_from_sint.f32 v0
return v1
}
; VCode:
; pushq %rbp
; movq %rsp, %rbp
; block0:
; vcvtsi2ss %edi, %xmm0
; movq %rbp, %rsp
; popq %rbp
; ret
;
; Disassembled:
; block0: ; offset 0x0
; pushq %rbp
; movq %rsp, %rbp
; block1: ; offset 0x4
; vcvtsi2ssl %edi, %xmm0, %xmm0
; movq %rbp, %rsp
; popq %rbp
; retq
function %f4(i64) -> f32 {
block0(v0: i64):
v1 = fcvt_from_sint.f32 v0
return v1
}
; VCode:
; pushq %rbp
; movq %rsp, %rbp
; block0:
; vcvtsi2ss %rdi, %xmm0
; movq %rbp, %rsp
; popq %rbp
; ret
;
; Disassembled:
; block0: ; offset 0x0
; pushq %rbp
; movq %rsp, %rbp
; block1: ; offset 0x4
; vcvtsi2ssq %rdi, %xmm0, %xmm0
; movq %rbp, %rsp
; popq %rbp
; retq
function %f7(i32) -> f64 {
block0(v0: i32):
v1 = fcvt_from_sint.f64 v0
return v1
}
; VCode:
; pushq %rbp
; movq %rsp, %rbp
; block0:
; vcvtsi2sd %edi, %xmm0
; movq %rbp, %rsp
; popq %rbp
; ret
;
; Disassembled:
; block0: ; offset 0x0
; pushq %rbp
; movq %rsp, %rbp
; block1: ; offset 0x4
; vcvtsi2sdl %edi, %xmm0, %xmm0
; movq %rbp, %rsp
; popq %rbp
; retq
function %f8(i64) -> f64 {
block0(v0: i64):
v1 = fcvt_from_sint.f64 v0
return v1
}
; VCode:
; pushq %rbp
; movq %rsp, %rbp
; block0:
; vcvtsi2sd %rdi, %xmm0
; movq %rbp, %rsp
; popq %rbp
; ret
;
; Disassembled:
; block0: ; offset 0x0
; pushq %rbp
; movq %rsp, %rbp
; block1: ; offset 0x4
; vcvtsi2sdq %rdi, %xmm0, %xmm0
; movq %rbp, %rsp
; popq %rbp
; retq

104
cranelift/filetests/filetests/isa/x64/float-bitcast-avx.clif

@ -0,0 +1,104 @@
test compile precise-output
set enable_simd
target x86_64 has_avx
function %i32_to_f32(i32) -> f32 {
block0(v0: i32):
v1 = bitcast.f32 v0
return v1
}
; VCode:
; pushq %rbp
; movq %rsp, %rbp
; block0:
; vmovd %edi, %xmm0
; movq %rbp, %rsp
; popq %rbp
; ret
;
; Disassembled:
; block0: ; offset 0x0
; pushq %rbp
; movq %rsp, %rbp
; block1: ; offset 0x4
; vmovd %edi, %xmm0
; movq %rbp, %rsp
; popq %rbp
; retq
function %i64_to_f64(i64) -> f64 {
block0(v0: i64):
v1 = bitcast.f64 v0
return v1
}
; VCode:
; pushq %rbp
; movq %rsp, %rbp
; block0:
; vmovq %rdi, %xmm0
; movq %rbp, %rsp
; popq %rbp
; ret
;
; Disassembled:
; block0: ; offset 0x0
; pushq %rbp
; movq %rsp, %rbp
; block1: ; offset 0x4
; vmovq %rdi, %xmm0
; movq %rbp, %rsp
; popq %rbp
; retq
function %f32_to_i32(f32) -> i32 {
block0(v0: f32):
v1 = bitcast.i32 v0
return v1
}
; VCode:
; pushq %rbp
; movq %rsp, %rbp
; block0:
; vmovd %xmm0, %eax
; movq %rbp, %rsp
; popq %rbp
; ret
;
; Disassembled:
; block0: ; offset 0x0
; pushq %rbp
; movq %rsp, %rbp
; block1: ; offset 0x4
; vmovd %xmm0, %eax
; movq %rbp, %rsp
; popq %rbp
; retq
function %f64_to_i64(f64) -> i64 {
block0(v0: f64):
v1 = bitcast.i64 v0
return v1
}
; VCode:
; pushq %rbp
; movq %rsp, %rbp
; block0:
; vmovq %xmm0, %rax
; movq %rbp, %rsp
; popq %rbp
; ret
;
; Disassembled:
; block0: ; offset 0x0
; pushq %rbp
; movq %rsp, %rbp
; block1: ; offset 0x4
; vmovq %xmm0, %rax
; movq %rbp, %rsp
; popq %rbp
; retq

104
cranelift/filetests/filetests/isa/x64/float-bitcast.clif

@ -0,0 +1,104 @@
test compile precise-output
set enable_simd
target x86_64
function %i32_to_f32(i32) -> f32 {
block0(v0: i32):
v1 = bitcast.f32 v0
return v1
}
; VCode:
; pushq %rbp
; movq %rsp, %rbp
; block0:
; movd %edi, %xmm0
; movq %rbp, %rsp
; popq %rbp
; ret
;
; Disassembled:
; block0: ; offset 0x0
; pushq %rbp
; movq %rsp, %rbp
; block1: ; offset 0x4
; movd %edi, %xmm0
; movq %rbp, %rsp
; popq %rbp
; retq
function %i64_to_f64(i64) -> f64 {
block0(v0: i64):
v1 = bitcast.f64 v0
return v1
}
; VCode:
; pushq %rbp
; movq %rsp, %rbp
; block0:
; movq %rdi, %xmm0
; movq %rbp, %rsp
; popq %rbp
; ret
;
; Disassembled:
; block0: ; offset 0x0
; pushq %rbp
; movq %rsp, %rbp
; block1: ; offset 0x4
; movq %rdi, %xmm0
; movq %rbp, %rsp
; popq %rbp
; retq
function %f32_to_i32(f32) -> i32 {
block0(v0: f32):
v1 = bitcast.i32 v0
return v1
}
; VCode:
; pushq %rbp
; movq %rsp, %rbp
; block0:
; movd %xmm0, %eax
; movq %rbp, %rsp
; popq %rbp
; ret
;
; Disassembled:
; block0: ; offset 0x0
; pushq %rbp
; movq %rsp, %rbp
; block1: ; offset 0x4
; movd %xmm0, %eax
; movq %rbp, %rsp
; popq %rbp
; retq
function %f64_to_i64(f64) -> i64 {
block0(v0: f64):
v1 = bitcast.i64 v0
return v1
}
; VCode:
; pushq %rbp
; movq %rsp, %rbp
; block0:
; movq %xmm0, %rax
; movq %rbp, %rsp
; popq %rbp
; ret
;
; Disassembled:
; block0: ; offset 0x0
; pushq %rbp
; movq %rsp, %rbp
; block1: ; offset 0x4
; movq %xmm0, %rax
; movq %rbp, %rsp
; popq %rbp
; retq

48
cranelift/filetests/filetests/isa/x64/simd-arith-avx.clif

@ -920,7 +920,7 @@ block0(v0: i8x16, v1: i32):
; vpunpcklbw %xmm0, %xmm0, %xmm5
; vpunpckhbw %xmm0, %xmm0, %xmm7
; addl %r9d, $8, %r9d
; movd %r9d, %xmm11
; vmovd %r9d, %xmm11
; vpsraw %xmm5, %xmm11, %xmm13
; vpsraw %xmm7, %xmm11, %xmm15
; vpacksswb %xmm13, %xmm15, %xmm0
@ -938,7 +938,7 @@ block0(v0: i8x16, v1: i32):
; vpunpcklbw %xmm0, %xmm0, %xmm5
; vpunpckhbw %xmm0, %xmm0, %xmm7
; addl $8, %r9d
; movd %r9d, %xmm11
; vmovd %r9d, %xmm11
; vpsraw %xmm11, %xmm5, %xmm13
; vpsraw %xmm11, %xmm7, %xmm15
; vpacksswb %xmm15, %xmm13, %xmm0
@ -992,7 +992,7 @@ block0(v0: i16x8, v1: i32):
; block0:
; movq %rdi, %rcx
; andq %rcx, $15, %rcx
; movd %ecx, %xmm5
; vmovd %ecx, %xmm5
; vpsraw %xmm0, %xmm5, %xmm0
; movq %rbp, %rsp
; popq %rbp
@ -1005,7 +1005,7 @@ block0(v0: i16x8, v1: i32):
; block1: ; offset 0x4
; movq %rdi, %rcx
; andq $0xf, %rcx
; movd %ecx, %xmm5
; vmovd %ecx, %xmm5
; vpsraw %xmm5, %xmm0, %xmm0
; movq %rbp, %rsp
; popq %rbp
@ -1049,7 +1049,7 @@ block0(v0: i32x4, v1: i32):
; block0:
; movq %rdi, %rcx
; andq %rcx, $31, %rcx
; movd %ecx, %xmm5
; vmovd %ecx, %xmm5
; vpsrad %xmm0, %xmm5, %xmm0
; movq %rbp, %rsp
; popq %rbp
@ -1062,7 +1062,7 @@ block0(v0: i32x4, v1: i32):
; block1: ; offset 0x4
; movq %rdi, %rcx
; andq $0x1f, %rcx
; movd %ecx, %xmm5
; vmovd %ecx, %xmm5
; vpsrad %xmm5, %xmm0, %xmm0
; movq %rbp, %rsp
; popq %rbp
@ -1315,7 +1315,7 @@ block0(v0: i8):
; pushq %rbp
; movq %rsp, %rbp
; block0:
; movd %edi, %xmm2
; vmovd %edi, %xmm2
; uninit %xmm4
; vpxor %xmm4, %xmm4, %xmm6
; vpshufb %xmm2, %xmm6, %xmm0
@ -1328,7 +1328,7 @@ block0(v0: i8):
; pushq %rbp
; movq %rsp, %rbp
; block1: ; offset 0x4
; movd %edi, %xmm2
; vmovd %edi, %xmm2
; vpxor %xmm4, %xmm4, %xmm6
; vpshufb %xmm6, %xmm2, %xmm0
; movq %rbp, %rsp
@ -1389,7 +1389,7 @@ block0(v0: i8x16, v1: i32):
; block0:
; movq %rdi, %r10
; andq %r10, $7, %r10
; movd %r10d, %xmm5
; vmovd %r10d, %xmm5
; vpsllw %xmm0, %xmm5, %xmm7
; lea const(0), %rsi
; shlq $4, %r10, %r10
@ -1406,7 +1406,7 @@ block0(v0: i8x16, v1: i32):
; block1: ; offset 0x4
; movq %rdi, %r10
; andq $7, %r10
; movd %r10d, %xmm5
; vmovd %r10d, %xmm5
; vpsllw %xmm5, %xmm0, %xmm7
; leaq 0x15(%rip), %rsi
; shlq $4, %r10
@ -1461,7 +1461,7 @@ block0(v0: i16x8, v1: i32):
; block0:
; movq %rdi, %rcx
; andq %rcx, $15, %rcx
; movd %ecx, %xmm5
; vmovd %ecx, %xmm5
; vpsllw %xmm0, %xmm5, %xmm0
; movq %rbp, %rsp
; popq %rbp
@ -1474,7 +1474,7 @@ block0(v0: i16x8, v1: i32):
; block1: ; offset 0x4
; movq %rdi, %rcx
; andq $0xf, %rcx
; movd %ecx, %xmm5
; vmovd %ecx, %xmm5
; vpsllw %xmm5, %xmm0, %xmm0
; movq %rbp, %rsp
; popq %rbp
@ -1518,7 +1518,7 @@ block0(v0: i32x4, v1: i32):
; block0:
; movq %rdi, %rcx
; andq %rcx, $31, %rcx
; movd %ecx, %xmm5
; vmovd %ecx, %xmm5
; vpslld %xmm0, %xmm5, %xmm0
; movq %rbp, %rsp
; popq %rbp
@ -1531,7 +1531,7 @@ block0(v0: i32x4, v1: i32):
; block1: ; offset 0x4
; movq %rdi, %rcx
; andq $0x1f, %rcx
; movd %ecx, %xmm5
; vmovd %ecx, %xmm5
; vpslld %xmm5, %xmm0, %xmm0
; movq %rbp, %rsp
; popq %rbp
@ -1575,7 +1575,7 @@ block0(v0: i64x2, v1: i32):
; block0:
; movq %rdi, %rcx
; andq %rcx, $63, %rcx
; movd %ecx, %xmm5
; vmovd %ecx, %xmm5
; vpsllq %xmm0, %xmm5, %xmm0
; movq %rbp, %rsp
; popq %rbp
@ -1588,7 +1588,7 @@ block0(v0: i64x2, v1: i32):
; block1: ; offset 0x4
; movq %rdi, %rcx
; andq $0x3f, %rcx
; movd %ecx, %xmm5
; vmovd %ecx, %xmm5
; vpsllq %xmm5, %xmm0, %xmm0
; movq %rbp, %rsp
; popq %rbp
@ -1632,7 +1632,7 @@ block0(v0: i8x16, v1: i32):
; block0:
; movq %rdi, %r10
; andq %r10, $7, %r10
; movd %r10d, %xmm5
; vmovd %r10d, %xmm5
; vpsrlw %xmm0, %xmm5, %xmm7
; lea const(0), %rsi
; shlq $4, %r10, %r10
@ -1648,7 +1648,7 @@ block0(v0: i8x16, v1: i32):
; block1: ; offset 0x4
; movq %rdi, %r10
; andq $7, %r10
; movd %r10d, %xmm5
; vmovd %r10d, %xmm5
; vpsrlw %xmm5, %xmm0, %xmm7
; leaq 0x15(%rip), %rsi
; shlq $4, %r10
@ -1713,7 +1713,7 @@ block0(v0: i16x8, v1: i32):
; block0:
; movq %rdi, %rcx
; andq %rcx, $15, %rcx
; movd %ecx, %xmm5
; vmovd %ecx, %xmm5
; vpsrlw %xmm0, %xmm5, %xmm0
; movq %rbp, %rsp
; popq %rbp
@ -1726,7 +1726,7 @@ block0(v0: i16x8, v1: i32):
; block1: ; offset 0x4
; movq %rdi, %rcx
; andq $0xf, %rcx
; movd %ecx, %xmm5
; vmovd %ecx, %xmm5
; vpsrlw %xmm5, %xmm0, %xmm0
; movq %rbp, %rsp
; popq %rbp
@ -1770,7 +1770,7 @@ block0(v0: i32x4, v1: i32):
; block0:
; movq %rdi, %rcx
; andq %rcx, $31, %rcx
; movd %ecx, %xmm5
; vmovd %ecx, %xmm5
; vpsrld %xmm0, %xmm5, %xmm0
; movq %rbp, %rsp
; popq %rbp
@ -1783,7 +1783,7 @@ block0(v0: i32x4, v1: i32):
; block1: ; offset 0x4
; movq %rdi, %rcx
; andq $0x1f, %rcx
; movd %ecx, %xmm5
; vmovd %ecx, %xmm5
; vpsrld %xmm5, %xmm0, %xmm0
; movq %rbp, %rsp
; popq %rbp
@ -1827,7 +1827,7 @@ block0(v0: i64x2, v1: i32):
; block0:
; movq %rdi, %rcx
; andq %rcx, $63, %rcx
; movd %ecx, %xmm5
; vmovd %ecx, %xmm5
; vpsrlq %xmm0, %xmm5, %xmm0
; movq %rbp, %rsp
; popq %rbp
@ -1840,7 +1840,7 @@ block0(v0: i64x2, v1: i32):
; block1: ; offset 0x4
; movq %rdi, %rcx
; andq $0x3f, %rcx
; movd %ecx, %xmm5
; vmovd %ecx, %xmm5
; vpsrlq %xmm5, %xmm0, %xmm0
; movq %rbp, %rsp
; popq %rbp

4
cranelift/filetests/filetests/isa/x64/simd-bitwise-avx.clif

@ -41,7 +41,7 @@ block0(v0: i64):
; movq %rsp, %rbp
; block0:
; movl $-2147483648, %eax
; movd %eax, %xmm4
; vmovd %eax, %xmm4
; vandnps %xmm4, const(0), %xmm6
; vandps %xmm4, 0(%rdi), %xmm8
; vorps %xmm6, %xmm8, %xmm0
@ -55,7 +55,7 @@ block0(v0: i64):
; movq %rsp, %rbp
; block1: ; offset 0x4
; movl $0x80000000, %eax
; movd %eax, %xmm4
; vmovd %eax, %xmm4
; vandnps 0x1b(%rip), %xmm4, %xmm6
; vandps (%rdi), %xmm4, %xmm8
; vorps %xmm8, %xmm6, %xmm0

16
cranelift/filetests/filetests/isa/x64/simd-splat-avx.clif

@ -12,7 +12,7 @@ block0(v0: i8):
; pushq %rbp
; movq %rsp, %rbp
; block0:
; movd %edi, %xmm2
; vmovd %edi, %xmm2
; uninit %xmm4
; vpxor %xmm4, %xmm4, %xmm6
; vpshufb %xmm2, %xmm6, %xmm0
@ -25,7 +25,7 @@ block0(v0: i8):
; pushq %rbp
; movq %rsp, %rbp
; block1: ; offset 0x4
; movd %edi, %xmm2
; vmovd %edi, %xmm2
; vpxor %xmm4, %xmm4, %xmm6
; vpshufb %xmm6, %xmm2, %xmm0
; movq %rbp, %rsp
@ -42,7 +42,7 @@ block0(v0: i16):
; pushq %rbp
; movq %rsp, %rbp
; block0:
; movd %edi, %xmm2
; vmovd %edi, %xmm2
; vpshuflw $0, %xmm2, %xmm4
; vpshufd $0, %xmm4, %xmm0
; movq %rbp, %rsp
@ -54,7 +54,7 @@ block0(v0: i16):
; pushq %rbp
; movq %rsp, %rbp
; block1: ; offset 0x4
; movd %edi, %xmm2
; vmovd %edi, %xmm2
; vpshuflw $0, %xmm2, %xmm4
; vpshufd $0, %xmm4, %xmm0
; movq %rbp, %rsp
@ -71,7 +71,7 @@ block0(v0: i32):
; pushq %rbp
; movq %rsp, %rbp
; block0:
; movd %edi, %xmm2
; vmovd %edi, %xmm2
; vpshufd $0, %xmm2, %xmm0
; movq %rbp, %rsp
; popq %rbp
@ -82,7 +82,7 @@ block0(v0: i32):
; pushq %rbp
; movq %rsp, %rbp
; block1: ; offset 0x4
; movd %edi, %xmm2
; vmovd %edi, %xmm2
; vpshufd $0, %xmm2, %xmm0
; movq %rbp, %rsp
; popq %rbp
@ -98,7 +98,7 @@ block0(v0: i64):
; pushq %rbp
; movq %rsp, %rbp
; block0:
; movq %rdi, %xmm2
; vmovq %rdi, %xmm2
; vmovddup %xmm2, %xmm0
; movq %rbp, %rsp
; popq %rbp
@ -109,7 +109,7 @@ block0(v0: i64):
; pushq %rbp
; movq %rsp, %rbp
; block1: ; offset 0x4
; movq %rdi, %xmm2
; vmovq %rdi, %xmm2
; vmovddup %xmm2, %xmm0
; movq %rbp, %rsp
; popq %rbp

16
cranelift/filetests/filetests/isa/x64/simd-splat-avx2.clif

@ -12,7 +12,7 @@ block0(v0: i8):
; pushq %rbp
; movq %rsp, %rbp
; block0:
; movd %edi, %xmm2
; vmovd %edi, %xmm2
; vpbroadcastb %xmm2, %xmm0
; movq %rbp, %rsp
; popq %rbp
@ -23,7 +23,7 @@ block0(v0: i8):
; pushq %rbp
; movq %rsp, %rbp
; block1: ; offset 0x4
; movd %edi, %xmm2
; vmovd %edi, %xmm2
; vpbroadcastb %xmm2, %xmm0
; movq %rbp, %rsp
; popq %rbp
@ -39,7 +39,7 @@ block0(v0: i16):
; pushq %rbp
; movq %rsp, %rbp
; block0:
; movd %edi, %xmm2
; vmovd %edi, %xmm2
; vpbroadcastw %xmm2, %xmm0
; movq %rbp, %rsp
; popq %rbp
@ -50,7 +50,7 @@ block0(v0: i16):
; pushq %rbp
; movq %rsp, %rbp
; block1: ; offset 0x4
; movd %edi, %xmm2
; vmovd %edi, %xmm2
; vpbroadcastw %xmm2, %xmm0
; movq %rbp, %rsp
; popq %rbp
@ -66,7 +66,7 @@ block0(v0: i32):
; pushq %rbp
; movq %rsp, %rbp
; block0:
; movd %edi, %xmm2
; vmovd %edi, %xmm2
; vpbroadcastd %xmm2, %xmm0
; movq %rbp, %rsp
; popq %rbp
@ -77,7 +77,7 @@ block0(v0: i32):
; pushq %rbp
; movq %rsp, %rbp
; block1: ; offset 0x4
; movd %edi, %xmm2
; vmovd %edi, %xmm2
; vpbroadcastd %xmm2, %xmm0
; movq %rbp, %rsp
; popq %rbp
@ -93,7 +93,7 @@ block0(v0: i64):
; pushq %rbp
; movq %rsp, %rbp
; block0:
; movq %rdi, %xmm2
; vmovq %rdi, %xmm2
; vmovddup %xmm2, %xmm0
; movq %rbp, %rsp
; popq %rbp
@ -104,7 +104,7 @@ block0(v0: i64):
; pushq %rbp
; movq %rsp, %rbp
; block1: ; offset 0x4
; movq %rdi, %xmm2
; vmovq %rdi, %xmm2
; vmovddup %xmm2, %xmm0
; movq %rbp, %rsp
; popq %rbp

108
cranelift/filetests/filetests/isa/x64/vhigh_bits-avx.clif

@ -0,0 +1,108 @@
test compile precise-output
set enable_simd
target x86_64 has_avx
function %f1(i8x16) -> i8 {
block0(v0: i8x16):
v1 = vhigh_bits.i8 v0
return v1
}
; VCode:
; pushq %rbp
; movq %rsp, %rbp
; block0:
; vpmovmskb %xmm0, %eax
; movq %rbp, %rsp
; popq %rbp
; ret
;
; Disassembled:
; block0: ; offset 0x0
; pushq %rbp
; movq %rsp, %rbp
; block1: ; offset 0x4
; vpmovmskb %xmm0, %eax
; movq %rbp, %rsp
; popq %rbp
; retq
function %f3(i16x8) -> i8 {
block0(v0: i16x8):
v1 = vhigh_bits.i8 v0
return v1
}
; VCode:
; pushq %rbp
; movq %rsp, %rbp
; block0:
; vpacksswb %xmm0, %xmm0, %xmm2
; vpmovmskb %xmm2, %eax
; shrq $8, %rax, %rax
; movq %rbp, %rsp
; popq %rbp
; ret
;
; Disassembled:
; block0: ; offset 0x0
; pushq %rbp
; movq %rsp, %rbp
; block1: ; offset 0x4
; vpacksswb %xmm0, %xmm0, %xmm2
; vpmovmskb %xmm2, %eax
; shrq $8, %rax
; movq %rbp, %rsp
; popq %rbp
; retq
function %f4(i32x4) -> i8 {
block0(v0: i32x4):
v1 = vhigh_bits.i8 v0
return v1
}
; VCode:
; pushq %rbp
; movq %rsp, %rbp
; block0:
; vmovmskps %xmm0, %eax
; movq %rbp, %rsp
; popq %rbp
; ret
;
; Disassembled:
; block0: ; offset 0x0
; pushq %rbp
; movq %rsp, %rbp
; block1: ; offset 0x4
; vmovmskps %xmm0, %eax
; movq %rbp, %rsp
; popq %rbp
; retq
function %f5(i64x2) -> i8 {
block0(v0: i64x2):
v1 = vhigh_bits.i8 v0
return v1
}
; VCode:
; pushq %rbp
; movq %rsp, %rbp
; block0:
; vmovmskpd %xmm0, %eax
; movq %rbp, %rsp
; popq %rbp
; ret
;
; Disassembled:
; block0: ; offset 0x0
; pushq %rbp
; movq %rsp, %rbp
; block1: ; offset 0x4
; vmovmskpd %xmm0, %eax
; movq %rbp, %rsp
; popq %rbp
; retq
Loading…
Cancel
Save