From 1799b0947f98f7a4933e2b6933e228044d1fbdf6 Mon Sep 17 00:00:00 2001 From: Andrew Brown Date: Wed, 7 Oct 2020 15:48:25 -0700 Subject: [PATCH] [machinst x64]: implement packed bitselect --- cranelift/codegen/src/isa/x64/inst/mod.rs | 84 +++++++++++++++-------- cranelift/codegen/src/isa/x64/lower.rs | 23 +++++++ 2 files changed, 80 insertions(+), 27 deletions(-) diff --git a/cranelift/codegen/src/isa/x64/inst/mod.rs b/cranelift/codegen/src/isa/x64/inst/mod.rs index 46373bee76..1fe0de6941 100644 --- a/cranelift/codegen/src/isa/x64/inst/mod.rs +++ b/cranelift/codegen/src/isa/x64/inst/mod.rs @@ -1105,33 +1105,6 @@ impl Inst { } } - /// Choose which instruction to use for comparing two values for equality. - pub(crate) fn equals(ty: Type, from: RegMem, to: Writable) -> Inst { - match ty { - types::I8X16 => Inst::xmm_rm_r(SseOpcode::Pcmpeqb, from, to), - types::I16X8 => Inst::xmm_rm_r(SseOpcode::Pcmpeqw, from, to), - types::I32X4 => Inst::xmm_rm_r(SseOpcode::Pcmpeqd, from, to), - types::I64X2 => Inst::xmm_rm_r(SseOpcode::Pcmpeqq, from, to), - types::F32X4 => { - Inst::xmm_rm_r_imm(SseOpcode::Cmpps, from, to, FcmpImm::Equal.encode(), false) - } - types::F64X2 => { - Inst::xmm_rm_r_imm(SseOpcode::Cmppd, from, to, FcmpImm::Equal.encode(), false) - } - _ => unimplemented!("unimplemented type for Inst::equals: {}", ty), - } - } - - /// Choose which instruction to use for computing a bitwise XOR on two values. - pub(crate) fn xor(ty: Type, from: RegMem, to: Writable) -> Inst { - match ty { - types::F32X4 => Inst::xmm_rm_r(SseOpcode::Xorps, from, to), - types::F64X2 => Inst::xmm_rm_r(SseOpcode::Xorpd, from, to), - _ if ty.is_vector() => Inst::xmm_rm_r(SseOpcode::Pxor, from, to), - _ => unimplemented!("unimplemented type for Inst::xor: {}", ty), - } - } - /// Choose which instruction to use for loading a register value from memory. For loads smaller /// than 64 bits, this method expects a way to extend the value (i.e. [ExtKind::SignExtend], /// [ExtKind::ZeroExtend]); loads with no extension necessary will ignore this. @@ -1256,6 +1229,63 @@ impl Inst { _ => false, } } + + /// Choose which instruction to use for comparing two values for equality. + pub(crate) fn equals(ty: Type, from: RegMem, to: Writable) -> Inst { + match ty { + types::I8X16 | types::B8X16 => Inst::xmm_rm_r(SseOpcode::Pcmpeqb, from, to), + types::I16X8 | types::B16X8 => Inst::xmm_rm_r(SseOpcode::Pcmpeqw, from, to), + types::I32X4 | types::B32X4 => Inst::xmm_rm_r(SseOpcode::Pcmpeqd, from, to), + types::I64X2 | types::B64X2 => Inst::xmm_rm_r(SseOpcode::Pcmpeqq, from, to), + types::F32X4 => { + Inst::xmm_rm_r_imm(SseOpcode::Cmpps, from, to, FcmpImm::Equal.encode(), false) + } + types::F64X2 => { + Inst::xmm_rm_r_imm(SseOpcode::Cmppd, from, to, FcmpImm::Equal.encode(), false) + } + _ => unimplemented!("unimplemented type for Inst::equals: {}", ty), + } + } + + /// Choose which instruction to use for computing a bitwise AND on two values. + pub(crate) fn and(ty: Type, from: RegMem, to: Writable) -> Inst { + match ty { + types::F32X4 => Inst::xmm_rm_r(SseOpcode::Andps, from, to), + types::F64X2 => Inst::xmm_rm_r(SseOpcode::Andpd, from, to), + _ if ty.is_vector() && ty.bits() == 128 => Inst::xmm_rm_r(SseOpcode::Pand, from, to), + _ => unimplemented!("unimplemented type for Inst::and: {}", ty), + } + } + + /// Choose which instruction to use for computing a bitwise AND NOT on two values. + pub(crate) fn and_not(ty: Type, from: RegMem, to: Writable) -> Inst { + match ty { + types::F32X4 => Inst::xmm_rm_r(SseOpcode::Andnps, from, to), + types::F64X2 => Inst::xmm_rm_r(SseOpcode::Andnpd, from, to), + _ if ty.is_vector() && ty.bits() == 128 => Inst::xmm_rm_r(SseOpcode::Pandn, from, to), + _ => unimplemented!("unimplemented type for Inst::and_not: {}", ty), + } + } + + /// Choose which instruction to use for computing a bitwise OR on two values. + pub(crate) fn or(ty: Type, from: RegMem, to: Writable) -> Inst { + match ty { + types::F32X4 => Inst::xmm_rm_r(SseOpcode::Orps, from, to), + types::F64X2 => Inst::xmm_rm_r(SseOpcode::Orpd, from, to), + _ if ty.is_vector() && ty.bits() == 128 => Inst::xmm_rm_r(SseOpcode::Por, from, to), + _ => unimplemented!("unimplemented type for Inst::or: {}", ty), + } + } + + /// Choose which instruction to use for computing a bitwise XOR on two values. + pub(crate) fn xor(ty: Type, from: RegMem, to: Writable) -> Inst { + match ty { + types::F32X4 => Inst::xmm_rm_r(SseOpcode::Xorps, from, to), + types::F64X2 => Inst::xmm_rm_r(SseOpcode::Xorpd, from, to), + _ if ty.is_vector() && ty.bits() == 128 => Inst::xmm_rm_r(SseOpcode::Pxor, from, to), + _ => unimplemented!("unimplemented type for Inst::xor: {}", ty), + } + } } //============================================================================= diff --git a/cranelift/codegen/src/isa/x64/lower.rs b/cranelift/codegen/src/isa/x64/lower.rs index 8df89a6643..108072b97c 100644 --- a/cranelift/codegen/src/isa/x64/lower.rs +++ b/cranelift/codegen/src/isa/x64/lower.rs @@ -908,6 +908,29 @@ fn lower_insn_to_regs>( } } + Opcode::Bitselect => { + let ty = ty.unwrap(); + let condition = put_input_in_reg(ctx, inputs[0]); + let if_true = put_input_in_reg(ctx, inputs[1]); + let if_false = input_to_reg_mem(ctx, inputs[2]); + let dst = get_output_reg(ctx, outputs[0]); + + if ty.is_vector() { + let tmp1 = ctx.alloc_tmp(RegClass::V128, ty); + ctx.emit(Inst::gen_move(tmp1, if_true, ty)); + ctx.emit(Inst::and(ty, RegMem::reg(condition.clone()), tmp1)); + + let tmp2 = ctx.alloc_tmp(RegClass::V128, ty); + ctx.emit(Inst::gen_move(tmp2, condition, ty)); + ctx.emit(Inst::and_not(ty, if_false, tmp2)); + + ctx.emit(Inst::gen_move(dst, tmp2.to_reg(), ty)); + ctx.emit(Inst::or(ty, RegMem::from(tmp1), dst)); + } else { + unimplemented!("scalar bitselect") + } + } + Opcode::Ishl | Opcode::Ushr | Opcode::Sshr | Opcode::Rotl | Opcode::Rotr => { let dst_ty = ctx.output_ty(insn, 0); debug_assert_eq!(ctx.input_ty(insn, 0), dst_ty);