Browse Source

x64: Fix codegen for the `i8x16.swizzle` instruction (#4318)

This commit fixes a mistake in the `Swizzle` opcode implementation in
the x64 backend of Cranelift. Previously an input register was casted to
a writable register and then modified, which I believe instructions are
not supposed to do. This was discovered as part of my investigation
into #4315.
pull/4325/head
Alex Crichton 2 years ago
committed by GitHub
parent
commit
dc2fe0ac67
No known key found for this signature in database GPG Key ID: 4AEE18F83AFDEB23
  1. 7
      cranelift/codegen/src/isa/x64/lower.rs

7
cranelift/codegen/src/isa/x64/lower.rs

@ -2554,17 +2554,18 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
ctx.emit(Inst::xmm_load_const(constant, zero_mask, ty));
// Use the `zero_mask` on a writable `swizzle_mask`.
let swizzle_mask = Writable::from_reg(swizzle_mask);
let swizzle_mask_tmp = ctx.alloc_tmp(types::I8X16).only_reg().unwrap();
ctx.emit(Inst::gen_move(swizzle_mask_tmp, swizzle_mask, ty));
ctx.emit(Inst::xmm_rm_r(
SseOpcode::Paddusb,
RegMem::from(zero_mask),
swizzle_mask,
swizzle_mask_tmp,
));
// Shuffle `dst` using the fixed-up `swizzle_mask`.
ctx.emit(Inst::xmm_rm_r(
SseOpcode::Pshufb,
RegMem::from(swizzle_mask),
RegMem::from(swizzle_mask_tmp),
dst,
));
}

Loading…
Cancel
Save