|
|
@ -7,7 +7,7 @@ use crate::ir::Inst as IRInst; |
|
|
|
use crate::ir::{InstructionData, Opcode, TrapCode}; |
|
|
|
use crate::machinst::lower::*; |
|
|
|
use crate::machinst::*; |
|
|
|
use crate::CodegenResult; |
|
|
|
use crate::{CodegenError, CodegenResult}; |
|
|
|
|
|
|
|
use crate::isa::aarch64::abi::*; |
|
|
|
use crate::isa::aarch64::inst::*; |
|
|
@ -66,7 +66,7 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>( |
|
|
|
let rd = get_output_reg(ctx, outputs[0]); |
|
|
|
let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None); |
|
|
|
let ty = ty.unwrap(); |
|
|
|
if ty_bits(ty) < 128 { |
|
|
|
if !ty.is_vector() { |
|
|
|
let (rm, negated) = put_input_in_rse_imm12_maybe_negated( |
|
|
|
ctx, |
|
|
|
inputs[1], |
|
|
@ -94,7 +94,7 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>( |
|
|
|
let rd = get_output_reg(ctx, outputs[0]); |
|
|
|
let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None); |
|
|
|
let ty = ty.unwrap(); |
|
|
|
if ty_bits(ty) < 128 { |
|
|
|
if !ty.is_vector() { |
|
|
|
let (rm, negated) = put_input_in_rse_imm12_maybe_negated( |
|
|
|
ctx, |
|
|
|
inputs[1], |
|
|
@ -124,7 +124,7 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>( |
|
|
|
let is_signed = op == Opcode::SaddSat || op == Opcode::SsubSat; |
|
|
|
let ty = ty.unwrap(); |
|
|
|
let rd = get_output_reg(ctx, outputs[0]); |
|
|
|
if ty_bits(ty) < 128 { |
|
|
|
if !ty.is_vector() { |
|
|
|
let narrow_mode = if is_signed { |
|
|
|
NarrowValueMode::SignExtend64 |
|
|
|
} else { |
|
|
@ -180,7 +180,7 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>( |
|
|
|
Opcode::Ineg => { |
|
|
|
let rd = get_output_reg(ctx, outputs[0]); |
|
|
|
let ty = ty.unwrap(); |
|
|
|
if ty_bits(ty) < 128 { |
|
|
|
if !ty.is_vector() { |
|
|
|
let rn = zero_reg(); |
|
|
|
let rm = put_input_in_rse_imm12(ctx, inputs[0], NarrowValueMode::None); |
|
|
|
let alu_op = choose_32_64(ty, ALUOp::Sub32, ALUOp::Sub64); |
|
|
@ -201,7 +201,7 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>( |
|
|
|
let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None); |
|
|
|
let rm = put_input_in_reg(ctx, inputs[1], NarrowValueMode::None); |
|
|
|
let ty = ty.unwrap(); |
|
|
|
if ty_bits(ty) < 128 { |
|
|
|
if !ty.is_vector() { |
|
|
|
let alu_op = choose_32_64(ty, ALUOp::MAdd32, ALUOp::MAdd64); |
|
|
|
ctx.emit(Inst::AluRRRR { |
|
|
|
alu_op, |
|
|
@ -274,6 +274,7 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>( |
|
|
|
rd: tmp1, |
|
|
|
rn, |
|
|
|
size: VectorSize::Size32x2, |
|
|
|
high_half: false, |
|
|
|
}); |
|
|
|
|
|
|
|
// Sum the respective high half components.
|
|
|
@ -293,6 +294,7 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>( |
|
|
|
rd: tmp2, |
|
|
|
rn: rm, |
|
|
|
size: VectorSize::Size32x2, |
|
|
|
high_half: false, |
|
|
|
}); |
|
|
|
|
|
|
|
// Shift the high half components, into the high half.
|
|
|
@ -570,7 +572,7 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>( |
|
|
|
Opcode::Bnot => { |
|
|
|
let rd = get_output_reg(ctx, outputs[0]); |
|
|
|
let ty = ty.unwrap(); |
|
|
|
if ty_bits(ty) < 128 { |
|
|
|
if !ty.is_vector() { |
|
|
|
let rm = put_input_in_rs_immlogic(ctx, inputs[0], NarrowValueMode::None); |
|
|
|
let alu_op = choose_32_64(ty, ALUOp::OrrNot32, ALUOp::OrrNot64); |
|
|
|
// NOT rd, rm ==> ORR_NOT rd, zero, rm
|
|
|
@ -594,7 +596,7 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>( |
|
|
|
| Opcode::BxorNot => { |
|
|
|
let rd = get_output_reg(ctx, outputs[0]); |
|
|
|
let ty = ty.unwrap(); |
|
|
|
if ty_bits(ty) < 128 { |
|
|
|
if !ty.is_vector() { |
|
|
|
let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None); |
|
|
|
let rm = put_input_in_rs_immlogic(ctx, inputs[1], NarrowValueMode::None); |
|
|
|
let alu_op = match op { |
|
|
@ -633,7 +635,7 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>( |
|
|
|
Opcode::Ishl | Opcode::Ushr | Opcode::Sshr => { |
|
|
|
let ty = ty.unwrap(); |
|
|
|
let rd = get_output_reg(ctx, outputs[0]); |
|
|
|
if ty_bits(ty) < 128 { |
|
|
|
if !ty.is_vector() { |
|
|
|
let size = OperandSize::from_bits(ty_bits(ty)); |
|
|
|
let narrow_mode = match (op, size) { |
|
|
|
(Opcode::Ishl, _) => NarrowValueMode::None, |
|
|
@ -1159,6 +1161,7 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>( |
|
|
|
t, |
|
|
|
rd, |
|
|
|
rn: rd.to_reg(), |
|
|
|
high_half: false, |
|
|
|
}); |
|
|
|
} |
|
|
|
} |
|
|
@ -1433,7 +1436,7 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>( |
|
|
|
|
|
|
|
Opcode::Bitselect | Opcode::Vselect => { |
|
|
|
let ty = ty.unwrap(); |
|
|
|
if ty_bits(ty) < 128 { |
|
|
|
if !ty.is_vector() { |
|
|
|
debug_assert_ne!(Opcode::Vselect, op); |
|
|
|
let tmp = ctx.alloc_tmp(RegClass::I64, I64); |
|
|
|
let rd = get_output_reg(ctx, outputs[0]); |
|
|
@ -1696,7 +1699,7 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>( |
|
|
|
}; |
|
|
|
let rn = put_input_in_reg(ctx, inputs[0], narrow_mode); |
|
|
|
|
|
|
|
if ty_bits(ty) < 128 { |
|
|
|
if !ty.is_vector() { |
|
|
|
let alu_op = choose_32_64(ty, ALUOp::SubS32, ALUOp::SubS64); |
|
|
|
let rm = put_input_in_rse_imm12(ctx, inputs[1], narrow_mode); |
|
|
|
ctx.emit(alu_inst_imm12(alu_op, writable_zero_reg(), rn, rm)); |
|
|
@ -1716,7 +1719,7 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>( |
|
|
|
let rm = put_input_in_reg(ctx, inputs[1], NarrowValueMode::None); |
|
|
|
let rd = get_output_reg(ctx, outputs[0]); |
|
|
|
|
|
|
|
if ty_bits(ty) < 128 { |
|
|
|
if !ty.is_vector() { |
|
|
|
match ty_bits(ty) { |
|
|
|
32 => { |
|
|
|
ctx.emit(Inst::FpuCmp32 { rn, rm }); |
|
|
@ -2106,7 +2109,7 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>( |
|
|
|
let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None); |
|
|
|
let rm = put_input_in_reg(ctx, inputs[1], NarrowValueMode::None); |
|
|
|
let rd = get_output_reg(ctx, outputs[0]); |
|
|
|
if bits < 128 { |
|
|
|
if !ty.is_vector() { |
|
|
|
let fpu_op = match (op, bits) { |
|
|
|
(Opcode::Fadd, 32) => FPUOp2::Add32, |
|
|
|
(Opcode::Fadd, 64) => FPUOp2::Add64, |
|
|
@ -2149,7 +2152,7 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>( |
|
|
|
let bits = ty_bits(ty); |
|
|
|
let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None); |
|
|
|
let rd = get_output_reg(ctx, outputs[0]); |
|
|
|
if bits < 128 { |
|
|
|
if !ty.is_vector() { |
|
|
|
let fpu_op = match (op, bits) { |
|
|
|
(Opcode::Sqrt, 32) => FPUOp1::Sqrt32, |
|
|
|
(Opcode::Sqrt, 64) => FPUOp1::Sqrt64, |
|
|
@ -2414,153 +2417,186 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>( |
|
|
|
} |
|
|
|
|
|
|
|
Opcode::FcvtFromUint | Opcode::FcvtFromSint => { |
|
|
|
let in_bits = ty_bits(ctx.input_ty(insn, 0)); |
|
|
|
let out_bits = ty_bits(ctx.output_ty(insn, 0)); |
|
|
|
let ty = ty.unwrap(); |
|
|
|
let signed = op == Opcode::FcvtFromSint; |
|
|
|
let op = match (signed, in_bits, out_bits) { |
|
|
|
(false, 8, 32) | (false, 16, 32) | (false, 32, 32) => IntToFpuOp::U32ToF32, |
|
|
|
(true, 8, 32) | (true, 16, 32) | (true, 32, 32) => IntToFpuOp::I32ToF32, |
|
|
|
(false, 8, 64) | (false, 16, 64) | (false, 32, 64) => IntToFpuOp::U32ToF64, |
|
|
|
(true, 8, 64) | (true, 16, 64) | (true, 32, 64) => IntToFpuOp::I32ToF64, |
|
|
|
(false, 64, 32) => IntToFpuOp::U64ToF32, |
|
|
|
(true, 64, 32) => IntToFpuOp::I64ToF32, |
|
|
|
(false, 64, 64) => IntToFpuOp::U64ToF64, |
|
|
|
(true, 64, 64) => IntToFpuOp::I64ToF64, |
|
|
|
_ => panic!("Unknown input/output-bits combination"), |
|
|
|
}; |
|
|
|
let narrow_mode = match (signed, in_bits) { |
|
|
|
(false, 8) | (false, 16) | (false, 32) => NarrowValueMode::ZeroExtend32, |
|
|
|
(true, 8) | (true, 16) | (true, 32) => NarrowValueMode::SignExtend32, |
|
|
|
(false, 64) => NarrowValueMode::ZeroExtend64, |
|
|
|
(true, 64) => NarrowValueMode::SignExtend64, |
|
|
|
_ => panic!("Unknown input size"), |
|
|
|
}; |
|
|
|
let rn = put_input_in_reg(ctx, inputs[0], narrow_mode); |
|
|
|
let rd = get_output_reg(ctx, outputs[0]); |
|
|
|
ctx.emit(Inst::IntToFpu { op, rd, rn }); |
|
|
|
|
|
|
|
if ty.is_vector() { |
|
|
|
let op = if signed { |
|
|
|
VecMisc2::Scvtf |
|
|
|
} else { |
|
|
|
VecMisc2::Ucvtf |
|
|
|
}; |
|
|
|
let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None); |
|
|
|
|
|
|
|
ctx.emit(Inst::VecMisc { |
|
|
|
op, |
|
|
|
rd, |
|
|
|
rn, |
|
|
|
size: VectorSize::from_ty(ty), |
|
|
|
}); |
|
|
|
} else { |
|
|
|
let in_bits = ty_bits(ctx.input_ty(insn, 0)); |
|
|
|
let out_bits = ty_bits(ty); |
|
|
|
let op = match (signed, in_bits, out_bits) { |
|
|
|
(false, 8, 32) | (false, 16, 32) | (false, 32, 32) => IntToFpuOp::U32ToF32, |
|
|
|
(true, 8, 32) | (true, 16, 32) | (true, 32, 32) => IntToFpuOp::I32ToF32, |
|
|
|
(false, 8, 64) | (false, 16, 64) | (false, 32, 64) => IntToFpuOp::U32ToF64, |
|
|
|
(true, 8, 64) | (true, 16, 64) | (true, 32, 64) => IntToFpuOp::I32ToF64, |
|
|
|
(false, 64, 32) => IntToFpuOp::U64ToF32, |
|
|
|
(true, 64, 32) => IntToFpuOp::I64ToF32, |
|
|
|
(false, 64, 64) => IntToFpuOp::U64ToF64, |
|
|
|
(true, 64, 64) => IntToFpuOp::I64ToF64, |
|
|
|
_ => panic!("Unknown input/output-bits combination"), |
|
|
|
}; |
|
|
|
let narrow_mode = match (signed, in_bits) { |
|
|
|
(false, 8) | (false, 16) | (false, 32) => NarrowValueMode::ZeroExtend32, |
|
|
|
(true, 8) | (true, 16) | (true, 32) => NarrowValueMode::SignExtend32, |
|
|
|
(false, 64) => NarrowValueMode::ZeroExtend64, |
|
|
|
(true, 64) => NarrowValueMode::SignExtend64, |
|
|
|
_ => panic!("Unknown input size"), |
|
|
|
}; |
|
|
|
let rn = put_input_in_reg(ctx, inputs[0], narrow_mode); |
|
|
|
ctx.emit(Inst::IntToFpu { op, rd, rn }); |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
Opcode::FcvtToUintSat | Opcode::FcvtToSintSat => { |
|
|
|
let in_ty = ctx.input_ty(insn, 0); |
|
|
|
let in_bits = ty_bits(in_ty); |
|
|
|
let out_ty = ctx.output_ty(insn, 0); |
|
|
|
let out_bits = ty_bits(out_ty); |
|
|
|
let ty = ty.unwrap(); |
|
|
|
let out_signed = op == Opcode::FcvtToSintSat; |
|
|
|
let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None); |
|
|
|
let rd = get_output_reg(ctx, outputs[0]); |
|
|
|
|
|
|
|
// FIMM Vtmp1, u32::MAX or u64::MAX or i32::MAX or i64::MAX
|
|
|
|
// FMIN Vtmp2, Vin, Vtmp1
|
|
|
|
// FIMM Vtmp1, 0 or 0 or i32::MIN or i64::MIN
|
|
|
|
// FMAX Vtmp2, Vtmp2, Vtmp1
|
|
|
|
// (if signed) FIMM Vtmp1, 0
|
|
|
|
// FCMP Vin, Vin
|
|
|
|
// FCSEL Vtmp2, Vtmp1, Vtmp2, NE // on NaN, select 0
|
|
|
|
// convert Rout, Vtmp2
|
|
|
|
|
|
|
|
assert!(in_bits == 32 || in_bits == 64); |
|
|
|
assert!(out_bits == 32 || out_bits == 64); |
|
|
|
|
|
|
|
let min: f64 = match (out_bits, out_signed) { |
|
|
|
(32, true) => std::i32::MIN as f64, |
|
|
|
(32, false) => 0.0, |
|
|
|
(64, true) => std::i64::MIN as f64, |
|
|
|
(64, false) => 0.0, |
|
|
|
_ => unreachable!(), |
|
|
|
}; |
|
|
|
|
|
|
|
let max = match (out_bits, out_signed) { |
|
|
|
(32, true) => std::i32::MAX as f64, |
|
|
|
(32, false) => std::u32::MAX as f64, |
|
|
|
(64, true) => std::i64::MAX as f64, |
|
|
|
(64, false) => std::u64::MAX as f64, |
|
|
|
_ => unreachable!(), |
|
|
|
}; |
|
|
|
|
|
|
|
let rtmp1 = ctx.alloc_tmp(RegClass::V128, in_ty); |
|
|
|
let rtmp2 = ctx.alloc_tmp(RegClass::V128, in_ty); |
|
|
|
if ty.is_vector() { |
|
|
|
let op = if out_signed { |
|
|
|
VecMisc2::Fcvtzs |
|
|
|
} else { |
|
|
|
VecMisc2::Fcvtzu |
|
|
|
}; |
|
|
|
|
|
|
|
if in_bits == 32 { |
|
|
|
ctx.emit(Inst::LoadFpuConst32 { |
|
|
|
rd: rtmp1, |
|
|
|
const_data: max as f32, |
|
|
|
}); |
|
|
|
} else { |
|
|
|
ctx.emit(Inst::LoadFpuConst64 { |
|
|
|
rd: rtmp1, |
|
|
|
const_data: max, |
|
|
|
}); |
|
|
|
} |
|
|
|
ctx.emit(Inst::FpuRRR { |
|
|
|
fpu_op: choose_32_64(in_ty, FPUOp2::Min32, FPUOp2::Min64), |
|
|
|
rd: rtmp2, |
|
|
|
rn: rn, |
|
|
|
rm: rtmp1.to_reg(), |
|
|
|
}); |
|
|
|
if in_bits == 32 { |
|
|
|
ctx.emit(Inst::LoadFpuConst32 { |
|
|
|
rd: rtmp1, |
|
|
|
const_data: min as f32, |
|
|
|
ctx.emit(Inst::VecMisc { |
|
|
|
op, |
|
|
|
rd, |
|
|
|
rn, |
|
|
|
size: VectorSize::from_ty(ty), |
|
|
|
}); |
|
|
|
} else { |
|
|
|
ctx.emit(Inst::LoadFpuConst64 { |
|
|
|
rd: rtmp1, |
|
|
|
const_data: min, |
|
|
|
}); |
|
|
|
} |
|
|
|
ctx.emit(Inst::FpuRRR { |
|
|
|
fpu_op: choose_32_64(in_ty, FPUOp2::Max32, FPUOp2::Max64), |
|
|
|
rd: rtmp2, |
|
|
|
rn: rtmp2.to_reg(), |
|
|
|
rm: rtmp1.to_reg(), |
|
|
|
}); |
|
|
|
if out_signed { |
|
|
|
let in_ty = ctx.input_ty(insn, 0); |
|
|
|
let in_bits = ty_bits(in_ty); |
|
|
|
let out_bits = ty_bits(ty); |
|
|
|
// FIMM Vtmp1, u32::MAX or u64::MAX or i32::MAX or i64::MAX
|
|
|
|
// FMIN Vtmp2, Vin, Vtmp1
|
|
|
|
// FIMM Vtmp1, 0 or 0 or i32::MIN or i64::MIN
|
|
|
|
// FMAX Vtmp2, Vtmp2, Vtmp1
|
|
|
|
// (if signed) FIMM Vtmp1, 0
|
|
|
|
// FCMP Vin, Vin
|
|
|
|
// FCSEL Vtmp2, Vtmp1, Vtmp2, NE // on NaN, select 0
|
|
|
|
// convert Rout, Vtmp2
|
|
|
|
|
|
|
|
assert!(in_bits == 32 || in_bits == 64); |
|
|
|
assert!(out_bits == 32 || out_bits == 64); |
|
|
|
|
|
|
|
let min: f64 = match (out_bits, out_signed) { |
|
|
|
(32, true) => std::i32::MIN as f64, |
|
|
|
(32, false) => 0.0, |
|
|
|
(64, true) => std::i64::MIN as f64, |
|
|
|
(64, false) => 0.0, |
|
|
|
_ => unreachable!(), |
|
|
|
}; |
|
|
|
|
|
|
|
let max = match (out_bits, out_signed) { |
|
|
|
(32, true) => std::i32::MAX as f64, |
|
|
|
(32, false) => std::u32::MAX as f64, |
|
|
|
(64, true) => std::i64::MAX as f64, |
|
|
|
(64, false) => std::u64::MAX as f64, |
|
|
|
_ => unreachable!(), |
|
|
|
}; |
|
|
|
|
|
|
|
let rtmp1 = ctx.alloc_tmp(RegClass::V128, in_ty); |
|
|
|
let rtmp2 = ctx.alloc_tmp(RegClass::V128, in_ty); |
|
|
|
|
|
|
|
if in_bits == 32 { |
|
|
|
ctx.emit(Inst::LoadFpuConst32 { |
|
|
|
rd: rtmp1, |
|
|
|
const_data: 0.0, |
|
|
|
const_data: max as f32, |
|
|
|
}); |
|
|
|
} else { |
|
|
|
ctx.emit(Inst::LoadFpuConst64 { |
|
|
|
rd: rtmp1, |
|
|
|
const_data: 0.0, |
|
|
|
const_data: max, |
|
|
|
}); |
|
|
|
} |
|
|
|
} |
|
|
|
if in_bits == 32 { |
|
|
|
ctx.emit(Inst::FpuCmp32 { rn: rn, rm: rn }); |
|
|
|
ctx.emit(Inst::FpuCSel32 { |
|
|
|
ctx.emit(Inst::FpuRRR { |
|
|
|
fpu_op: choose_32_64(in_ty, FPUOp2::Min32, FPUOp2::Min64), |
|
|
|
rd: rtmp2, |
|
|
|
rn: rtmp1.to_reg(), |
|
|
|
rm: rtmp2.to_reg(), |
|
|
|
cond: Cond::Ne, |
|
|
|
rn: rn, |
|
|
|
rm: rtmp1.to_reg(), |
|
|
|
}); |
|
|
|
} else { |
|
|
|
ctx.emit(Inst::FpuCmp64 { rn: rn, rm: rn }); |
|
|
|
ctx.emit(Inst::FpuCSel64 { |
|
|
|
if in_bits == 32 { |
|
|
|
ctx.emit(Inst::LoadFpuConst32 { |
|
|
|
rd: rtmp1, |
|
|
|
const_data: min as f32, |
|
|
|
}); |
|
|
|
} else { |
|
|
|
ctx.emit(Inst::LoadFpuConst64 { |
|
|
|
rd: rtmp1, |
|
|
|
const_data: min, |
|
|
|
}); |
|
|
|
} |
|
|
|
ctx.emit(Inst::FpuRRR { |
|
|
|
fpu_op: choose_32_64(in_ty, FPUOp2::Max32, FPUOp2::Max64), |
|
|
|
rd: rtmp2, |
|
|
|
rn: rtmp1.to_reg(), |
|
|
|
rm: rtmp2.to_reg(), |
|
|
|
cond: Cond::Ne, |
|
|
|
rn: rtmp2.to_reg(), |
|
|
|
rm: rtmp1.to_reg(), |
|
|
|
}); |
|
|
|
} |
|
|
|
if out_signed { |
|
|
|
if in_bits == 32 { |
|
|
|
ctx.emit(Inst::LoadFpuConst32 { |
|
|
|
rd: rtmp1, |
|
|
|
const_data: 0.0, |
|
|
|
}); |
|
|
|
} else { |
|
|
|
ctx.emit(Inst::LoadFpuConst64 { |
|
|
|
rd: rtmp1, |
|
|
|
const_data: 0.0, |
|
|
|
}); |
|
|
|
} |
|
|
|
} |
|
|
|
if in_bits == 32 { |
|
|
|
ctx.emit(Inst::FpuCmp32 { rn: rn, rm: rn }); |
|
|
|
ctx.emit(Inst::FpuCSel32 { |
|
|
|
rd: rtmp2, |
|
|
|
rn: rtmp1.to_reg(), |
|
|
|
rm: rtmp2.to_reg(), |
|
|
|
cond: Cond::Ne, |
|
|
|
}); |
|
|
|
} else { |
|
|
|
ctx.emit(Inst::FpuCmp64 { rn: rn, rm: rn }); |
|
|
|
ctx.emit(Inst::FpuCSel64 { |
|
|
|
rd: rtmp2, |
|
|
|
rn: rtmp1.to_reg(), |
|
|
|
rm: rtmp2.to_reg(), |
|
|
|
cond: Cond::Ne, |
|
|
|
}); |
|
|
|
} |
|
|
|
|
|
|
|
let cvt = match (in_bits, out_bits, out_signed) { |
|
|
|
(32, 32, false) => FpuToIntOp::F32ToU32, |
|
|
|
(32, 32, true) => FpuToIntOp::F32ToI32, |
|
|
|
(32, 64, false) => FpuToIntOp::F32ToU64, |
|
|
|
(32, 64, true) => FpuToIntOp::F32ToI64, |
|
|
|
(64, 32, false) => FpuToIntOp::F64ToU32, |
|
|
|
(64, 32, true) => FpuToIntOp::F64ToI32, |
|
|
|
(64, 64, false) => FpuToIntOp::F64ToU64, |
|
|
|
(64, 64, true) => FpuToIntOp::F64ToI64, |
|
|
|
_ => unreachable!(), |
|
|
|
}; |
|
|
|
ctx.emit(Inst::FpuToInt { |
|
|
|
op: cvt, |
|
|
|
rd, |
|
|
|
rn: rtmp2.to_reg(), |
|
|
|
}); |
|
|
|
let cvt = match (in_bits, out_bits, out_signed) { |
|
|
|
(32, 32, false) => FpuToIntOp::F32ToU32, |
|
|
|
(32, 32, true) => FpuToIntOp::F32ToI32, |
|
|
|
(32, 64, false) => FpuToIntOp::F32ToU64, |
|
|
|
(32, 64, true) => FpuToIntOp::F32ToI64, |
|
|
|
(64, 32, false) => FpuToIntOp::F64ToU32, |
|
|
|
(64, 32, true) => FpuToIntOp::F64ToI32, |
|
|
|
(64, 64, false) => FpuToIntOp::F64ToU64, |
|
|
|
(64, 64, true) => FpuToIntOp::F64ToI64, |
|
|
|
_ => unreachable!(), |
|
|
|
}; |
|
|
|
ctx.emit(Inst::FpuToInt { |
|
|
|
op: cvt, |
|
|
|
rd, |
|
|
|
rn: rtmp2.to_reg(), |
|
|
|
}); |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
Opcode::IaddIfcout => { |
|
|
@ -2689,12 +2725,62 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>( |
|
|
|
}); |
|
|
|
} |
|
|
|
|
|
|
|
Opcode::Snarrow |
|
|
|
| Opcode::Unarrow |
|
|
|
| Opcode::SwidenLow |
|
|
|
| Opcode::SwidenHigh |
|
|
|
| Opcode::UwidenLow |
|
|
|
| Opcode::UwidenHigh => unimplemented!(), |
|
|
|
Opcode::Snarrow | Opcode::Unarrow => { |
|
|
|
let op = if op == Opcode::Snarrow { |
|
|
|
VecMiscNarrowOp::Sqxtn |
|
|
|
} else { |
|
|
|
VecMiscNarrowOp::Sqxtun |
|
|
|
}; |
|
|
|
let rd = get_output_reg(ctx, outputs[0]); |
|
|
|
let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None); |
|
|
|
let rn2 = put_input_in_reg(ctx, inputs[1], NarrowValueMode::None); |
|
|
|
let ty = ty.unwrap(); |
|
|
|
|
|
|
|
ctx.emit(Inst::VecMiscNarrow { |
|
|
|
op, |
|
|
|
rd, |
|
|
|
rn, |
|
|
|
size: VectorSize::from_ty(ty), |
|
|
|
high_half: false, |
|
|
|
}); |
|
|
|
ctx.emit(Inst::VecMiscNarrow { |
|
|
|
op, |
|
|
|
rd, |
|
|
|
rn: rn2, |
|
|
|
size: VectorSize::from_ty(ty), |
|
|
|
high_half: true, |
|
|
|
}); |
|
|
|
} |
|
|
|
|
|
|
|
Opcode::SwidenLow | Opcode::SwidenHigh | Opcode::UwidenLow | Opcode::UwidenHigh => { |
|
|
|
let lane_type = ty.unwrap().lane_type(); |
|
|
|
let rd = get_output_reg(ctx, outputs[0]); |
|
|
|
let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None); |
|
|
|
let (t, high_half) = match (lane_type, op) { |
|
|
|
(I16, Opcode::SwidenLow) => (VecExtendOp::Sxtl8, false), |
|
|
|
(I16, Opcode::SwidenHigh) => (VecExtendOp::Sxtl8, true), |
|
|
|
(I16, Opcode::UwidenLow) => (VecExtendOp::Uxtl8, false), |
|
|
|
(I16, Opcode::UwidenHigh) => (VecExtendOp::Uxtl8, true), |
|
|
|
(I32, Opcode::SwidenLow) => (VecExtendOp::Sxtl16, false), |
|
|
|
(I32, Opcode::SwidenHigh) => (VecExtendOp::Sxtl16, true), |
|
|
|
(I32, Opcode::UwidenLow) => (VecExtendOp::Uxtl16, false), |
|
|
|
(I32, Opcode::UwidenHigh) => (VecExtendOp::Uxtl16, true), |
|
|
|
_ => { |
|
|
|
return Err(CodegenError::Unsupported(format!( |
|
|
|
"Unsupported SIMD vector lane type: {:?}", |
|
|
|
lane_type |
|
|
|
))); |
|
|
|
} |
|
|
|
}; |
|
|
|
|
|
|
|
ctx.emit(Inst::VecExtend { |
|
|
|
t, |
|
|
|
rd, |
|
|
|
rn, |
|
|
|
high_half, |
|
|
|
}); |
|
|
|
} |
|
|
|
|
|
|
|
Opcode::TlsValue => unimplemented!(), |
|
|
|
} |
|
|
|
|
|
|
|