Browse Source

AArch64: Implement SIMD conversions

Copyright (c) 2020, Arm Limited.
pull/2155/head
Anton Kirilov 4 years ago
parent
commit
b895ac0e40
  1. 33
      build.rs
  2. 9
      cranelift/codegen/src/isa/aarch64/inst/args.rs
  3. 51
      cranelift/codegen/src/isa/aarch64/inst/emit.rs
  4. 87
      cranelift/codegen/src/isa/aarch64/inst/emit_tests.rs
  5. 78
      cranelift/codegen/src/isa/aarch64/inst/mod.rs
  6. 368
      cranelift/codegen/src/isa/aarch64/lower_inst.rs

33
build.rs

@ -196,7 +196,6 @@ fn experimental_x64_should_panic(testsuite: &str, testname: &str, strategy: &str
/// Ignore tests that aren't supported yet. /// Ignore tests that aren't supported yet.
fn ignore(testsuite: &str, testname: &str, strategy: &str) -> bool { fn ignore(testsuite: &str, testname: &str, strategy: &str) -> bool {
let target = env::var("TARGET").unwrap();
match strategy { match strategy {
#[cfg(feature = "lightbeam")] #[cfg(feature = "lightbeam")]
"Lightbeam" => match (testsuite, testname) { "Lightbeam" => match (testsuite, testname) {
@ -207,38 +206,6 @@ fn ignore(testsuite: &str, testname: &str, strategy: &str) -> bool {
_ => (), _ => (),
}, },
"Cranelift" => match (testsuite, testname) { "Cranelift" => match (testsuite, testname) {
("simd", "simd_address") => return false,
("simd", "simd_align") => return false,
("simd", "simd_bitwise") => return false,
("simd", "simd_bit_shift") => return false,
("simd", "simd_boolean") => return false,
("simd", "simd_const") => return false,
("simd", "simd_f32x4") => return false,
("simd", "simd_f32x4_arith") => return false,
("simd", "simd_f32x4_cmp") => return false,
("simd", "simd_f64x2") => return false,
("simd", "simd_f64x2_arith") => return false,
("simd", "simd_f64x2_cmp") => return false,
("simd", "simd_i8x16_arith") => return false,
("simd", "simd_i8x16_arith2") => return false,
("simd", "simd_i8x16_cmp") => return false,
("simd", "simd_i8x16_sat_arith") => return false,
("simd", "simd_i16x8_arith") => return false,
("simd", "simd_i16x8_arith2") => return false,
("simd", "simd_i16x8_cmp") => return false,
("simd", "simd_i16x8_sat_arith") => return false,
("simd", "simd_i32x4_arith") => return false,
("simd", "simd_i32x4_arith2") => return false,
("simd", "simd_i32x4_cmp") => return false,
("simd", "simd_i64x2_arith") => return false,
("simd", "simd_lane") => return false,
("simd", "simd_load_extend") => return false,
("simd", "simd_load_splat") => return false,
("simd", "simd_store") => return false,
// Most simd tests are known to fail on aarch64 for now, it's going
// to be a big chunk of work to implement them all there!
("simd", _) if target.contains("aarch64") => return true,
// TODO(#1886): Ignore reference types tests if this isn't x64, // TODO(#1886): Ignore reference types tests if this isn't x64,
// because Cranelift only supports reference types on x64. // because Cranelift only supports reference types on x64.
("reference_types", _) => { ("reference_types", _) => {

9
cranelift/codegen/src/isa/aarch64/inst/args.rs

@ -671,6 +671,15 @@ impl VectorSize {
VectorSize::Size64x2 => unreachable!(), VectorSize::Size64x2 => unreachable!(),
} }
} }
pub fn halve(&self) -> VectorSize {
match self {
VectorSize::Size8x16 => VectorSize::Size8x8,
VectorSize::Size16x8 => VectorSize::Size16x4,
VectorSize::Size32x4 => VectorSize::Size32x2,
_ => *self,
}
}
} }
//============================================================================= //=============================================================================

51
cranelift/codegen/src/isa/aarch64/inst/emit.rs

@ -1400,6 +1400,22 @@ impl MachInstEmit for Inst {
debug_assert!(!size.is_128bits()); debug_assert!(!size.is_128bits());
(0b1, 0b10011, enc_size) (0b1, 0b10011, enc_size)
} }
VecMisc2::Fcvtzs => {
debug_assert!(size == VectorSize::Size32x4 || size == VectorSize::Size64x2);
(0b0, 0b11011, enc_size)
}
VecMisc2::Fcvtzu => {
debug_assert!(size == VectorSize::Size32x4 || size == VectorSize::Size64x2);
(0b1, 0b11011, enc_size)
}
VecMisc2::Scvtf => {
debug_assert!(size == VectorSize::Size32x4 || size == VectorSize::Size64x2);
(0b0, 0b11101, enc_size & 0b1)
}
VecMisc2::Ucvtf => {
debug_assert!(size == VectorSize::Size32x4 || size == VectorSize::Size64x2);
(0b1, 0b11101, enc_size & 0b1)
}
}; };
sink.put4(enc_vec_rr_misc((q << 1) | u, size, bits_12_16, rd, rn)); sink.put4(enc_vec_rr_misc((q << 1) | u, size, bits_12_16, rd, rn));
} }
@ -1644,7 +1660,12 @@ impl MachInstEmit for Inst {
| machreg_to_vec(rd.to_reg()), | machreg_to_vec(rd.to_reg()),
); );
} }
&Inst::VecExtend { t, rd, rn } => { &Inst::VecExtend {
t,
rd,
rn,
high_half,
} => {
let (u, immh) = match t { let (u, immh) = match t {
VecExtendOp::Sxtl8 => (0b0, 0b001), VecExtendOp::Sxtl8 => (0b0, 0b001),
VecExtendOp::Sxtl16 => (0b0, 0b010), VecExtendOp::Sxtl16 => (0b0, 0b010),
@ -1655,22 +1676,38 @@ impl MachInstEmit for Inst {
}; };
sink.put4( sink.put4(
0b000_011110_0000_000_101001_00000_00000 0b000_011110_0000_000_101001_00000_00000
| ((high_half as u32) << 30)
| (u << 29) | (u << 29)
| (immh << 19) | (immh << 19)
| (machreg_to_vec(rn) << 5) | (machreg_to_vec(rn) << 5)
| machreg_to_vec(rd.to_reg()), | machreg_to_vec(rd.to_reg()),
); );
} }
&Inst::VecMiscNarrow { op, rd, rn, size } => { &Inst::VecMiscNarrow {
debug_assert!(!size.is_128bits()); op,
let size = match size.widen() { rd,
VectorSize::Size64x2 => 0b10, rn,
_ => unimplemented!(), size,
high_half,
} => {
let size = match size.lane_size() {
ScalarSize::Size8 => 0b00,
ScalarSize::Size16 => 0b01,
ScalarSize::Size32 => 0b10,
_ => panic!("Unexpected vector operand lane size!"),
}; };
let (u, bits_12_16) = match op { let (u, bits_12_16) = match op {
VecMiscNarrowOp::Xtn => (0b0, 0b10010), VecMiscNarrowOp::Xtn => (0b0, 0b10010),
VecMiscNarrowOp::Sqxtn => (0b0, 0b10100),
VecMiscNarrowOp::Sqxtun => (0b1, 0b10010),
}; };
sink.put4(enc_vec_rr_misc(u, size, bits_12_16, rd, rn)); sink.put4(enc_vec_rr_misc(
((high_half as u32) << 1) | u,
size,
bits_12_16,
rd,
rn,
));
} }
&Inst::VecMovElement { &Inst::VecMovElement {
rd, rd,

87
cranelift/codegen/src/isa/aarch64/inst/emit_tests.rs

@ -2008,6 +2008,7 @@ fn test_aarch64_binemit() {
t: VecExtendOp::Sxtl8, t: VecExtendOp::Sxtl8,
rd: writable_vreg(4), rd: writable_vreg(4),
rn: vreg(27), rn: vreg(27),
high_half: false,
}, },
"64A7080F", "64A7080F",
"sxtl v4.8h, v27.8b", "sxtl v4.8h, v27.8b",
@ -2017,15 +2018,17 @@ fn test_aarch64_binemit() {
t: VecExtendOp::Sxtl16, t: VecExtendOp::Sxtl16,
rd: writable_vreg(17), rd: writable_vreg(17),
rn: vreg(19), rn: vreg(19),
high_half: true,
}, },
"71A6100F", "71A6104F",
"sxtl v17.4s, v19.4h", "sxtl2 v17.4s, v19.8h",
)); ));
insns.push(( insns.push((
Inst::VecExtend { Inst::VecExtend {
t: VecExtendOp::Sxtl32, t: VecExtendOp::Sxtl32,
rd: writable_vreg(30), rd: writable_vreg(30),
rn: vreg(6), rn: vreg(6),
high_half: false,
}, },
"DEA4200F", "DEA4200F",
"sxtl v30.2d, v6.2s", "sxtl v30.2d, v6.2s",
@ -2035,15 +2038,17 @@ fn test_aarch64_binemit() {
t: VecExtendOp::Uxtl8, t: VecExtendOp::Uxtl8,
rd: writable_vreg(3), rd: writable_vreg(3),
rn: vreg(29), rn: vreg(29),
high_half: true,
}, },
"A3A7082F", "A3A7086F",
"uxtl v3.8h, v29.8b", "uxtl2 v3.8h, v29.16b",
)); ));
insns.push(( insns.push((
Inst::VecExtend { Inst::VecExtend {
t: VecExtendOp::Uxtl16, t: VecExtendOp::Uxtl16,
rd: writable_vreg(15), rd: writable_vreg(15),
rn: vreg(12), rn: vreg(12),
high_half: false,
}, },
"8FA5102F", "8FA5102F",
"uxtl v15.4s, v12.4h", "uxtl v15.4s, v12.4h",
@ -2053,9 +2058,10 @@ fn test_aarch64_binemit() {
t: VecExtendOp::Uxtl32, t: VecExtendOp::Uxtl32,
rd: writable_vreg(28), rd: writable_vreg(28),
rn: vreg(2), rn: vreg(2),
high_half: true,
}, },
"5CA4202F", "5CA4206F",
"uxtl v28.2d, v2.2s", "uxtl2 v28.2d, v2.4s",
)); ));
insns.push(( insns.push((
@ -2088,11 +2094,36 @@ fn test_aarch64_binemit() {
rd: writable_vreg(22), rd: writable_vreg(22),
rn: vreg(8), rn: vreg(8),
size: VectorSize::Size32x2, size: VectorSize::Size32x2,
high_half: false,
}, },
"1629A10E", "1629A10E",
"xtn v22.2s, v8.2d", "xtn v22.2s, v8.2d",
)); ));
insns.push((
Inst::VecMiscNarrow {
op: VecMiscNarrowOp::Sqxtn,
rd: writable_vreg(31),
rn: vreg(0),
size: VectorSize::Size16x8,
high_half: true,
},
"1F48614E",
"sqxtn2 v31.8h, v0.4s",
));
insns.push((
Inst::VecMiscNarrow {
op: VecMiscNarrowOp::Sqxtun,
rd: writable_vreg(16),
rn: vreg(23),
size: VectorSize::Size8x16,
high_half: false,
},
"F02A212E",
"sqxtun v16.8b, v23.8h",
));
insns.push(( insns.push((
Inst::VecRRR { Inst::VecRRR {
alu_op: VecALUOp::Sqadd, alu_op: VecALUOp::Sqadd,
@ -3322,6 +3353,50 @@ fn test_aarch64_binemit() {
"shll v1.2d, v10.2s, #32", "shll v1.2d, v10.2s, #32",
)); ));
insns.push((
Inst::VecMisc {
op: VecMisc2::Fcvtzs,
rd: writable_vreg(4),
rn: vreg(22),
size: VectorSize::Size32x4,
},
"C4BAA14E",
"fcvtzs v4.4s, v22.4s",
));
insns.push((
Inst::VecMisc {
op: VecMisc2::Fcvtzu,
rd: writable_vreg(29),
rn: vreg(15),
size: VectorSize::Size64x2,
},
"FDB9E16E",
"fcvtzu v29.2d, v15.2d",
));
insns.push((
Inst::VecMisc {
op: VecMisc2::Scvtf,
rd: writable_vreg(20),
rn: vreg(8),
size: VectorSize::Size32x4,
},
"14D9214E",
"scvtf v20.4s, v8.4s",
));
insns.push((
Inst::VecMisc {
op: VecMisc2::Ucvtf,
rd: writable_vreg(10),
rn: vreg(19),
size: VectorSize::Size64x2,
},
"6ADA616E",
"ucvtf v10.2d, v19.2d",
));
insns.push(( insns.push((
Inst::VecLanes { Inst::VecLanes {
op: VecLanesOp::Uminv, op: VecLanesOp::Uminv,

78
cranelift/codegen/src/isa/aarch64/inst/mod.rs

@ -308,6 +308,14 @@ pub enum VecMisc2 {
Rev64, Rev64,
/// Shift left long (by element size) /// Shift left long (by element size)
Shll, Shll,
/// Floating-point convert to signed integer, rounding toward zero
Fcvtzs,
/// Floating-point convert to unsigned integer, rounding toward zero
Fcvtzu,
/// Signed integer convert to floating-point
Scvtf,
/// Unsigned integer convert to floating-point
Ucvtf,
} }
/// A Vector narrowing operation with two registers. /// A Vector narrowing operation with two registers.
@ -315,6 +323,10 @@ pub enum VecMisc2 {
pub enum VecMiscNarrowOp { pub enum VecMiscNarrowOp {
/// Extract Narrow /// Extract Narrow
Xtn, Xtn,
/// Signed saturating extract narrow
Sqxtn,
/// Signed saturating extract unsigned narrow
Sqxtun,
} }
/// An operation across the lanes of vectors. /// An operation across the lanes of vectors.
@ -884,6 +896,7 @@ pub enum Inst {
t: VecExtendOp, t: VecExtendOp,
rd: Writable<Reg>, rd: Writable<Reg>,
rn: Reg, rn: Reg,
high_half: bool,
}, },
/// Move vector element to another vector element. /// Move vector element to another vector element.
@ -901,6 +914,7 @@ pub enum Inst {
rd: Writable<Reg>, rd: Writable<Reg>,
rn: Reg, rn: Reg,
size: VectorSize, size: VectorSize,
high_half: bool,
}, },
/// A vector ALU op. /// A vector ALU op.
@ -1628,9 +1642,16 @@ fn aarch64_get_regs(inst: &Inst, collector: &mut RegUsageCollector) {
collector.add_mod(rd); collector.add_mod(rd);
collector.add_use(rn); collector.add_use(rn);
} }
&Inst::VecMiscNarrow { rd, rn, .. } => { &Inst::VecMiscNarrow {
collector.add_def(rd); rd, rn, high_half, ..
} => {
collector.add_use(rn); collector.add_use(rn);
if high_half {
collector.add_mod(rd);
} else {
collector.add_def(rd);
}
} }
&Inst::VecRRR { &Inst::VecRRR {
alu_op, rd, rn, rm, .. alu_op, rd, rn, rm, ..
@ -2300,10 +2321,16 @@ fn aarch64_map_regs<RUM: RegUsageMapper>(inst: &mut Inst, mapper: &RUM) {
&mut Inst::VecMiscNarrow { &mut Inst::VecMiscNarrow {
ref mut rd, ref mut rd,
ref mut rn, ref mut rn,
high_half,
.. ..
} => { } => {
map_def(mapper, rd);
map_use(mapper, rn); map_use(mapper, rn);
if high_half {
map_mod(mapper, rd);
} else {
map_def(mapper, rd);
}
} }
&mut Inst::VecRRR { &mut Inst::VecRRR {
alu_op, alu_op,
@ -3155,14 +3182,20 @@ impl Inst {
let rn = show_vreg_element(rn, mb_rru, 0, size); let rn = show_vreg_element(rn, mb_rru, 0, size);
format!("dup {}, {}", rd, rn) format!("dup {}, {}", rd, rn)
} }
&Inst::VecExtend { t, rd, rn } => { &Inst::VecExtend { t, rd, rn, high_half } => {
let (op, dest, src) = match t { let (op, dest, src) = match (t, high_half) {
VecExtendOp::Sxtl8 => ("sxtl", VectorSize::Size16x8, VectorSize::Size8x8), (VecExtendOp::Sxtl8, false) => ("sxtl", VectorSize::Size16x8, VectorSize::Size8x8),
VecExtendOp::Sxtl16 => ("sxtl", VectorSize::Size32x4, VectorSize::Size16x4), (VecExtendOp::Sxtl8, true) => ("sxtl2", VectorSize::Size16x8, VectorSize::Size8x16),
VecExtendOp::Sxtl32 => ("sxtl", VectorSize::Size64x2, VectorSize::Size32x2), (VecExtendOp::Sxtl16, false) => ("sxtl", VectorSize::Size32x4, VectorSize::Size16x4),
VecExtendOp::Uxtl8 => ("uxtl", VectorSize::Size16x8, VectorSize::Size8x8), (VecExtendOp::Sxtl16, true) => ("sxtl2", VectorSize::Size32x4, VectorSize::Size16x8),
VecExtendOp::Uxtl16 => ("uxtl", VectorSize::Size32x4, VectorSize::Size16x4), (VecExtendOp::Sxtl32, false) => ("sxtl", VectorSize::Size64x2, VectorSize::Size32x2),
VecExtendOp::Uxtl32 => ("uxtl", VectorSize::Size64x2, VectorSize::Size32x2), (VecExtendOp::Sxtl32, true) => ("sxtl2", VectorSize::Size64x2, VectorSize::Size32x4),
(VecExtendOp::Uxtl8, false) => ("uxtl", VectorSize::Size16x8, VectorSize::Size8x8),
(VecExtendOp::Uxtl8, true) => ("uxtl2", VectorSize::Size16x8, VectorSize::Size8x16),
(VecExtendOp::Uxtl16, false) => ("uxtl", VectorSize::Size32x4, VectorSize::Size16x4),
(VecExtendOp::Uxtl16, true) => ("uxtl2", VectorSize::Size32x4, VectorSize::Size16x8),
(VecExtendOp::Uxtl32, false) => ("uxtl", VectorSize::Size64x2, VectorSize::Size32x2),
(VecExtendOp::Uxtl32, true) => ("uxtl2", VectorSize::Size64x2, VectorSize::Size32x4),
}; };
let rd = show_vreg_vector(rd.to_reg(), mb_rru, dest); let rd = show_vreg_vector(rd.to_reg(), mb_rru, dest);
let rn = show_vreg_vector(rn, mb_rru, src); let rn = show_vreg_vector(rn, mb_rru, src);
@ -3179,11 +3212,22 @@ impl Inst {
let rn = show_vreg_element(rn, mb_rru, idx2, size); let rn = show_vreg_element(rn, mb_rru, idx2, size);
format!("mov {}, {}", rd, rn) format!("mov {}, {}", rd, rn)
} }
&Inst::VecMiscNarrow { op, rd, rn, size } => { &Inst::VecMiscNarrow { op, rd, rn, size, high_half } => {
let rd = show_vreg_vector(rd.to_reg(), mb_rru, size); let dest_size = if high_half {
assert!(size.is_128bits());
size
} else {
size.halve()
};
let rd = show_vreg_vector(rd.to_reg(), mb_rru, dest_size);
let rn = show_vreg_vector(rn, mb_rru, size.widen()); let rn = show_vreg_vector(rn, mb_rru, size.widen());
let op = match op { let op = match (op, high_half) {
VecMiscNarrowOp::Xtn => "xtn", (VecMiscNarrowOp::Xtn, false) => "xtn",
(VecMiscNarrowOp::Xtn, true) => "xtn2",
(VecMiscNarrowOp::Sqxtn, false) => "sqxtn",
(VecMiscNarrowOp::Sqxtn, true) => "sqxtn2",
(VecMiscNarrowOp::Sqxtun, false) => "sqxtun",
(VecMiscNarrowOp::Sqxtun, true) => "sqxtun2",
}; };
format!("{} {}, {}", op, rd, rn) format!("{} {}, {}", op, rd, rn)
} }
@ -3267,6 +3311,10 @@ impl Inst {
VecMisc2::Fsqrt => ("fsqrt", size), VecMisc2::Fsqrt => ("fsqrt", size),
VecMisc2::Rev64 => ("rev64", size), VecMisc2::Rev64 => ("rev64", size),
VecMisc2::Shll => ("shll", size), VecMisc2::Shll => ("shll", size),
VecMisc2::Fcvtzs => ("fcvtzs", size),
VecMisc2::Fcvtzu => ("fcvtzu", size),
VecMisc2::Scvtf => ("scvtf", size),
VecMisc2::Ucvtf => ("ucvtf", size),
}; };
let rd_size = if is_shll { size.widen() } else { size }; let rd_size = if is_shll { size.widen() } else { size };

368
cranelift/codegen/src/isa/aarch64/lower_inst.rs

@ -7,7 +7,7 @@ use crate::ir::Inst as IRInst;
use crate::ir::{InstructionData, Opcode, TrapCode}; use crate::ir::{InstructionData, Opcode, TrapCode};
use crate::machinst::lower::*; use crate::machinst::lower::*;
use crate::machinst::*; use crate::machinst::*;
use crate::CodegenResult; use crate::{CodegenError, CodegenResult};
use crate::isa::aarch64::abi::*; use crate::isa::aarch64::abi::*;
use crate::isa::aarch64::inst::*; use crate::isa::aarch64::inst::*;
@ -66,7 +66,7 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
let rd = get_output_reg(ctx, outputs[0]); let rd = get_output_reg(ctx, outputs[0]);
let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None); let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None);
let ty = ty.unwrap(); let ty = ty.unwrap();
if ty_bits(ty) < 128 { if !ty.is_vector() {
let (rm, negated) = put_input_in_rse_imm12_maybe_negated( let (rm, negated) = put_input_in_rse_imm12_maybe_negated(
ctx, ctx,
inputs[1], inputs[1],
@ -94,7 +94,7 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
let rd = get_output_reg(ctx, outputs[0]); let rd = get_output_reg(ctx, outputs[0]);
let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None); let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None);
let ty = ty.unwrap(); let ty = ty.unwrap();
if ty_bits(ty) < 128 { if !ty.is_vector() {
let (rm, negated) = put_input_in_rse_imm12_maybe_negated( let (rm, negated) = put_input_in_rse_imm12_maybe_negated(
ctx, ctx,
inputs[1], inputs[1],
@ -124,7 +124,7 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
let is_signed = op == Opcode::SaddSat || op == Opcode::SsubSat; let is_signed = op == Opcode::SaddSat || op == Opcode::SsubSat;
let ty = ty.unwrap(); let ty = ty.unwrap();
let rd = get_output_reg(ctx, outputs[0]); let rd = get_output_reg(ctx, outputs[0]);
if ty_bits(ty) < 128 { if !ty.is_vector() {
let narrow_mode = if is_signed { let narrow_mode = if is_signed {
NarrowValueMode::SignExtend64 NarrowValueMode::SignExtend64
} else { } else {
@ -180,7 +180,7 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
Opcode::Ineg => { Opcode::Ineg => {
let rd = get_output_reg(ctx, outputs[0]); let rd = get_output_reg(ctx, outputs[0]);
let ty = ty.unwrap(); let ty = ty.unwrap();
if ty_bits(ty) < 128 { if !ty.is_vector() {
let rn = zero_reg(); let rn = zero_reg();
let rm = put_input_in_rse_imm12(ctx, inputs[0], NarrowValueMode::None); let rm = put_input_in_rse_imm12(ctx, inputs[0], NarrowValueMode::None);
let alu_op = choose_32_64(ty, ALUOp::Sub32, ALUOp::Sub64); let alu_op = choose_32_64(ty, ALUOp::Sub32, ALUOp::Sub64);
@ -201,7 +201,7 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None); let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None);
let rm = put_input_in_reg(ctx, inputs[1], NarrowValueMode::None); let rm = put_input_in_reg(ctx, inputs[1], NarrowValueMode::None);
let ty = ty.unwrap(); let ty = ty.unwrap();
if ty_bits(ty) < 128 { if !ty.is_vector() {
let alu_op = choose_32_64(ty, ALUOp::MAdd32, ALUOp::MAdd64); let alu_op = choose_32_64(ty, ALUOp::MAdd32, ALUOp::MAdd64);
ctx.emit(Inst::AluRRRR { ctx.emit(Inst::AluRRRR {
alu_op, alu_op,
@ -274,6 +274,7 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
rd: tmp1, rd: tmp1,
rn, rn,
size: VectorSize::Size32x2, size: VectorSize::Size32x2,
high_half: false,
}); });
// Sum the respective high half components. // Sum the respective high half components.
@ -293,6 +294,7 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
rd: tmp2, rd: tmp2,
rn: rm, rn: rm,
size: VectorSize::Size32x2, size: VectorSize::Size32x2,
high_half: false,
}); });
// Shift the high half components, into the high half. // Shift the high half components, into the high half.
@ -570,7 +572,7 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
Opcode::Bnot => { Opcode::Bnot => {
let rd = get_output_reg(ctx, outputs[0]); let rd = get_output_reg(ctx, outputs[0]);
let ty = ty.unwrap(); let ty = ty.unwrap();
if ty_bits(ty) < 128 { if !ty.is_vector() {
let rm = put_input_in_rs_immlogic(ctx, inputs[0], NarrowValueMode::None); let rm = put_input_in_rs_immlogic(ctx, inputs[0], NarrowValueMode::None);
let alu_op = choose_32_64(ty, ALUOp::OrrNot32, ALUOp::OrrNot64); let alu_op = choose_32_64(ty, ALUOp::OrrNot32, ALUOp::OrrNot64);
// NOT rd, rm ==> ORR_NOT rd, zero, rm // NOT rd, rm ==> ORR_NOT rd, zero, rm
@ -594,7 +596,7 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
| Opcode::BxorNot => { | Opcode::BxorNot => {
let rd = get_output_reg(ctx, outputs[0]); let rd = get_output_reg(ctx, outputs[0]);
let ty = ty.unwrap(); let ty = ty.unwrap();
if ty_bits(ty) < 128 { if !ty.is_vector() {
let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None); let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None);
let rm = put_input_in_rs_immlogic(ctx, inputs[1], NarrowValueMode::None); let rm = put_input_in_rs_immlogic(ctx, inputs[1], NarrowValueMode::None);
let alu_op = match op { let alu_op = match op {
@ -633,7 +635,7 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
Opcode::Ishl | Opcode::Ushr | Opcode::Sshr => { Opcode::Ishl | Opcode::Ushr | Opcode::Sshr => {
let ty = ty.unwrap(); let ty = ty.unwrap();
let rd = get_output_reg(ctx, outputs[0]); let rd = get_output_reg(ctx, outputs[0]);
if ty_bits(ty) < 128 { if !ty.is_vector() {
let size = OperandSize::from_bits(ty_bits(ty)); let size = OperandSize::from_bits(ty_bits(ty));
let narrow_mode = match (op, size) { let narrow_mode = match (op, size) {
(Opcode::Ishl, _) => NarrowValueMode::None, (Opcode::Ishl, _) => NarrowValueMode::None,
@ -1159,6 +1161,7 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
t, t,
rd, rd,
rn: rd.to_reg(), rn: rd.to_reg(),
high_half: false,
}); });
} }
} }
@ -1433,7 +1436,7 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
Opcode::Bitselect | Opcode::Vselect => { Opcode::Bitselect | Opcode::Vselect => {
let ty = ty.unwrap(); let ty = ty.unwrap();
if ty_bits(ty) < 128 { if !ty.is_vector() {
debug_assert_ne!(Opcode::Vselect, op); debug_assert_ne!(Opcode::Vselect, op);
let tmp = ctx.alloc_tmp(RegClass::I64, I64); let tmp = ctx.alloc_tmp(RegClass::I64, I64);
let rd = get_output_reg(ctx, outputs[0]); let rd = get_output_reg(ctx, outputs[0]);
@ -1696,7 +1699,7 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
}; };
let rn = put_input_in_reg(ctx, inputs[0], narrow_mode); let rn = put_input_in_reg(ctx, inputs[0], narrow_mode);
if ty_bits(ty) < 128 { if !ty.is_vector() {
let alu_op = choose_32_64(ty, ALUOp::SubS32, ALUOp::SubS64); let alu_op = choose_32_64(ty, ALUOp::SubS32, ALUOp::SubS64);
let rm = put_input_in_rse_imm12(ctx, inputs[1], narrow_mode); let rm = put_input_in_rse_imm12(ctx, inputs[1], narrow_mode);
ctx.emit(alu_inst_imm12(alu_op, writable_zero_reg(), rn, rm)); ctx.emit(alu_inst_imm12(alu_op, writable_zero_reg(), rn, rm));
@ -1716,7 +1719,7 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
let rm = put_input_in_reg(ctx, inputs[1], NarrowValueMode::None); let rm = put_input_in_reg(ctx, inputs[1], NarrowValueMode::None);
let rd = get_output_reg(ctx, outputs[0]); let rd = get_output_reg(ctx, outputs[0]);
if ty_bits(ty) < 128 { if !ty.is_vector() {
match ty_bits(ty) { match ty_bits(ty) {
32 => { 32 => {
ctx.emit(Inst::FpuCmp32 { rn, rm }); ctx.emit(Inst::FpuCmp32 { rn, rm });
@ -2106,7 +2109,7 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None); let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None);
let rm = put_input_in_reg(ctx, inputs[1], NarrowValueMode::None); let rm = put_input_in_reg(ctx, inputs[1], NarrowValueMode::None);
let rd = get_output_reg(ctx, outputs[0]); let rd = get_output_reg(ctx, outputs[0]);
if bits < 128 { if !ty.is_vector() {
let fpu_op = match (op, bits) { let fpu_op = match (op, bits) {
(Opcode::Fadd, 32) => FPUOp2::Add32, (Opcode::Fadd, 32) => FPUOp2::Add32,
(Opcode::Fadd, 64) => FPUOp2::Add64, (Opcode::Fadd, 64) => FPUOp2::Add64,
@ -2149,7 +2152,7 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
let bits = ty_bits(ty); let bits = ty_bits(ty);
let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None); let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None);
let rd = get_output_reg(ctx, outputs[0]); let rd = get_output_reg(ctx, outputs[0]);
if bits < 128 { if !ty.is_vector() {
let fpu_op = match (op, bits) { let fpu_op = match (op, bits) {
(Opcode::Sqrt, 32) => FPUOp1::Sqrt32, (Opcode::Sqrt, 32) => FPUOp1::Sqrt32,
(Opcode::Sqrt, 64) => FPUOp1::Sqrt64, (Opcode::Sqrt, 64) => FPUOp1::Sqrt64,
@ -2414,153 +2417,186 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
} }
Opcode::FcvtFromUint | Opcode::FcvtFromSint => { Opcode::FcvtFromUint | Opcode::FcvtFromSint => {
let in_bits = ty_bits(ctx.input_ty(insn, 0)); let ty = ty.unwrap();
let out_bits = ty_bits(ctx.output_ty(insn, 0));
let signed = op == Opcode::FcvtFromSint; let signed = op == Opcode::FcvtFromSint;
let op = match (signed, in_bits, out_bits) {
(false, 8, 32) | (false, 16, 32) | (false, 32, 32) => IntToFpuOp::U32ToF32,
(true, 8, 32) | (true, 16, 32) | (true, 32, 32) => IntToFpuOp::I32ToF32,
(false, 8, 64) | (false, 16, 64) | (false, 32, 64) => IntToFpuOp::U32ToF64,
(true, 8, 64) | (true, 16, 64) | (true, 32, 64) => IntToFpuOp::I32ToF64,
(false, 64, 32) => IntToFpuOp::U64ToF32,
(true, 64, 32) => IntToFpuOp::I64ToF32,
(false, 64, 64) => IntToFpuOp::U64ToF64,
(true, 64, 64) => IntToFpuOp::I64ToF64,
_ => panic!("Unknown input/output-bits combination"),
};
let narrow_mode = match (signed, in_bits) {
(false, 8) | (false, 16) | (false, 32) => NarrowValueMode::ZeroExtend32,
(true, 8) | (true, 16) | (true, 32) => NarrowValueMode::SignExtend32,
(false, 64) => NarrowValueMode::ZeroExtend64,
(true, 64) => NarrowValueMode::SignExtend64,
_ => panic!("Unknown input size"),
};
let rn = put_input_in_reg(ctx, inputs[0], narrow_mode);
let rd = get_output_reg(ctx, outputs[0]); let rd = get_output_reg(ctx, outputs[0]);
ctx.emit(Inst::IntToFpu { op, rd, rn });
if ty.is_vector() {
let op = if signed {
VecMisc2::Scvtf
} else {
VecMisc2::Ucvtf
};
let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None);
ctx.emit(Inst::VecMisc {
op,
rd,
rn,
size: VectorSize::from_ty(ty),
});
} else {
let in_bits = ty_bits(ctx.input_ty(insn, 0));
let out_bits = ty_bits(ty);
let op = match (signed, in_bits, out_bits) {
(false, 8, 32) | (false, 16, 32) | (false, 32, 32) => IntToFpuOp::U32ToF32,
(true, 8, 32) | (true, 16, 32) | (true, 32, 32) => IntToFpuOp::I32ToF32,
(false, 8, 64) | (false, 16, 64) | (false, 32, 64) => IntToFpuOp::U32ToF64,
(true, 8, 64) | (true, 16, 64) | (true, 32, 64) => IntToFpuOp::I32ToF64,
(false, 64, 32) => IntToFpuOp::U64ToF32,
(true, 64, 32) => IntToFpuOp::I64ToF32,
(false, 64, 64) => IntToFpuOp::U64ToF64,
(true, 64, 64) => IntToFpuOp::I64ToF64,
_ => panic!("Unknown input/output-bits combination"),
};
let narrow_mode = match (signed, in_bits) {
(false, 8) | (false, 16) | (false, 32) => NarrowValueMode::ZeroExtend32,
(true, 8) | (true, 16) | (true, 32) => NarrowValueMode::SignExtend32,
(false, 64) => NarrowValueMode::ZeroExtend64,
(true, 64) => NarrowValueMode::SignExtend64,
_ => panic!("Unknown input size"),
};
let rn = put_input_in_reg(ctx, inputs[0], narrow_mode);
ctx.emit(Inst::IntToFpu { op, rd, rn });
}
} }
Opcode::FcvtToUintSat | Opcode::FcvtToSintSat => { Opcode::FcvtToUintSat | Opcode::FcvtToSintSat => {
let in_ty = ctx.input_ty(insn, 0); let ty = ty.unwrap();
let in_bits = ty_bits(in_ty);
let out_ty = ctx.output_ty(insn, 0);
let out_bits = ty_bits(out_ty);
let out_signed = op == Opcode::FcvtToSintSat; let out_signed = op == Opcode::FcvtToSintSat;
let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None); let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None);
let rd = get_output_reg(ctx, outputs[0]); let rd = get_output_reg(ctx, outputs[0]);
// FIMM Vtmp1, u32::MAX or u64::MAX or i32::MAX or i64::MAX if ty.is_vector() {
// FMIN Vtmp2, Vin, Vtmp1 let op = if out_signed {
// FIMM Vtmp1, 0 or 0 or i32::MIN or i64::MIN VecMisc2::Fcvtzs
// FMAX Vtmp2, Vtmp2, Vtmp1 } else {
// (if signed) FIMM Vtmp1, 0 VecMisc2::Fcvtzu
// FCMP Vin, Vin };
// FCSEL Vtmp2, Vtmp1, Vtmp2, NE // on NaN, select 0
// convert Rout, Vtmp2
assert!(in_bits == 32 || in_bits == 64);
assert!(out_bits == 32 || out_bits == 64);
let min: f64 = match (out_bits, out_signed) {
(32, true) => std::i32::MIN as f64,
(32, false) => 0.0,
(64, true) => std::i64::MIN as f64,
(64, false) => 0.0,
_ => unreachable!(),
};
let max = match (out_bits, out_signed) {
(32, true) => std::i32::MAX as f64,
(32, false) => std::u32::MAX as f64,
(64, true) => std::i64::MAX as f64,
(64, false) => std::u64::MAX as f64,
_ => unreachable!(),
};
let rtmp1 = ctx.alloc_tmp(RegClass::V128, in_ty);
let rtmp2 = ctx.alloc_tmp(RegClass::V128, in_ty);
if in_bits == 32 { ctx.emit(Inst::VecMisc {
ctx.emit(Inst::LoadFpuConst32 { op,
rd: rtmp1, rd,
const_data: max as f32, rn,
}); size: VectorSize::from_ty(ty),
} else {
ctx.emit(Inst::LoadFpuConst64 {
rd: rtmp1,
const_data: max,
});
}
ctx.emit(Inst::FpuRRR {
fpu_op: choose_32_64(in_ty, FPUOp2::Min32, FPUOp2::Min64),
rd: rtmp2,
rn: rn,
rm: rtmp1.to_reg(),
});
if in_bits == 32 {
ctx.emit(Inst::LoadFpuConst32 {
rd: rtmp1,
const_data: min as f32,
}); });
} else { } else {
ctx.emit(Inst::LoadFpuConst64 { let in_ty = ctx.input_ty(insn, 0);
rd: rtmp1, let in_bits = ty_bits(in_ty);
const_data: min, let out_bits = ty_bits(ty);
}); // FIMM Vtmp1, u32::MAX or u64::MAX or i32::MAX or i64::MAX
} // FMIN Vtmp2, Vin, Vtmp1
ctx.emit(Inst::FpuRRR { // FIMM Vtmp1, 0 or 0 or i32::MIN or i64::MIN
fpu_op: choose_32_64(in_ty, FPUOp2::Max32, FPUOp2::Max64), // FMAX Vtmp2, Vtmp2, Vtmp1
rd: rtmp2, // (if signed) FIMM Vtmp1, 0
rn: rtmp2.to_reg(), // FCMP Vin, Vin
rm: rtmp1.to_reg(), // FCSEL Vtmp2, Vtmp1, Vtmp2, NE // on NaN, select 0
}); // convert Rout, Vtmp2
if out_signed {
assert!(in_bits == 32 || in_bits == 64);
assert!(out_bits == 32 || out_bits == 64);
let min: f64 = match (out_bits, out_signed) {
(32, true) => std::i32::MIN as f64,
(32, false) => 0.0,
(64, true) => std::i64::MIN as f64,
(64, false) => 0.0,
_ => unreachable!(),
};
let max = match (out_bits, out_signed) {
(32, true) => std::i32::MAX as f64,
(32, false) => std::u32::MAX as f64,
(64, true) => std::i64::MAX as f64,
(64, false) => std::u64::MAX as f64,
_ => unreachable!(),
};
let rtmp1 = ctx.alloc_tmp(RegClass::V128, in_ty);
let rtmp2 = ctx.alloc_tmp(RegClass::V128, in_ty);
if in_bits == 32 { if in_bits == 32 {
ctx.emit(Inst::LoadFpuConst32 { ctx.emit(Inst::LoadFpuConst32 {
rd: rtmp1, rd: rtmp1,
const_data: 0.0, const_data: max as f32,
}); });
} else { } else {
ctx.emit(Inst::LoadFpuConst64 { ctx.emit(Inst::LoadFpuConst64 {
rd: rtmp1, rd: rtmp1,
const_data: 0.0, const_data: max,
}); });
} }
} ctx.emit(Inst::FpuRRR {
if in_bits == 32 { fpu_op: choose_32_64(in_ty, FPUOp2::Min32, FPUOp2::Min64),
ctx.emit(Inst::FpuCmp32 { rn: rn, rm: rn });
ctx.emit(Inst::FpuCSel32 {
rd: rtmp2, rd: rtmp2,
rn: rtmp1.to_reg(), rn: rn,
rm: rtmp2.to_reg(), rm: rtmp1.to_reg(),
cond: Cond::Ne,
}); });
} else { if in_bits == 32 {
ctx.emit(Inst::FpuCmp64 { rn: rn, rm: rn }); ctx.emit(Inst::LoadFpuConst32 {
ctx.emit(Inst::FpuCSel64 { rd: rtmp1,
const_data: min as f32,
});
} else {
ctx.emit(Inst::LoadFpuConst64 {
rd: rtmp1,
const_data: min,
});
}
ctx.emit(Inst::FpuRRR {
fpu_op: choose_32_64(in_ty, FPUOp2::Max32, FPUOp2::Max64),
rd: rtmp2, rd: rtmp2,
rn: rtmp1.to_reg(), rn: rtmp2.to_reg(),
rm: rtmp2.to_reg(), rm: rtmp1.to_reg(),
cond: Cond::Ne,
}); });
} if out_signed {
if in_bits == 32 {
ctx.emit(Inst::LoadFpuConst32 {
rd: rtmp1,
const_data: 0.0,
});
} else {
ctx.emit(Inst::LoadFpuConst64 {
rd: rtmp1,
const_data: 0.0,
});
}
}
if in_bits == 32 {
ctx.emit(Inst::FpuCmp32 { rn: rn, rm: rn });
ctx.emit(Inst::FpuCSel32 {
rd: rtmp2,
rn: rtmp1.to_reg(),
rm: rtmp2.to_reg(),
cond: Cond::Ne,
});
} else {
ctx.emit(Inst::FpuCmp64 { rn: rn, rm: rn });
ctx.emit(Inst::FpuCSel64 {
rd: rtmp2,
rn: rtmp1.to_reg(),
rm: rtmp2.to_reg(),
cond: Cond::Ne,
});
}
let cvt = match (in_bits, out_bits, out_signed) { let cvt = match (in_bits, out_bits, out_signed) {
(32, 32, false) => FpuToIntOp::F32ToU32, (32, 32, false) => FpuToIntOp::F32ToU32,
(32, 32, true) => FpuToIntOp::F32ToI32, (32, 32, true) => FpuToIntOp::F32ToI32,
(32, 64, false) => FpuToIntOp::F32ToU64, (32, 64, false) => FpuToIntOp::F32ToU64,
(32, 64, true) => FpuToIntOp::F32ToI64, (32, 64, true) => FpuToIntOp::F32ToI64,
(64, 32, false) => FpuToIntOp::F64ToU32, (64, 32, false) => FpuToIntOp::F64ToU32,
(64, 32, true) => FpuToIntOp::F64ToI32, (64, 32, true) => FpuToIntOp::F64ToI32,
(64, 64, false) => FpuToIntOp::F64ToU64, (64, 64, false) => FpuToIntOp::F64ToU64,
(64, 64, true) => FpuToIntOp::F64ToI64, (64, 64, true) => FpuToIntOp::F64ToI64,
_ => unreachable!(), _ => unreachable!(),
}; };
ctx.emit(Inst::FpuToInt { ctx.emit(Inst::FpuToInt {
op: cvt, op: cvt,
rd, rd,
rn: rtmp2.to_reg(), rn: rtmp2.to_reg(),
}); });
}
} }
Opcode::IaddIfcout => { Opcode::IaddIfcout => {
@ -2689,12 +2725,62 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
}); });
} }
Opcode::Snarrow Opcode::Snarrow | Opcode::Unarrow => {
| Opcode::Unarrow let op = if op == Opcode::Snarrow {
| Opcode::SwidenLow VecMiscNarrowOp::Sqxtn
| Opcode::SwidenHigh } else {
| Opcode::UwidenLow VecMiscNarrowOp::Sqxtun
| Opcode::UwidenHigh => unimplemented!(), };
let rd = get_output_reg(ctx, outputs[0]);
let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None);
let rn2 = put_input_in_reg(ctx, inputs[1], NarrowValueMode::None);
let ty = ty.unwrap();
ctx.emit(Inst::VecMiscNarrow {
op,
rd,
rn,
size: VectorSize::from_ty(ty),
high_half: false,
});
ctx.emit(Inst::VecMiscNarrow {
op,
rd,
rn: rn2,
size: VectorSize::from_ty(ty),
high_half: true,
});
}
Opcode::SwidenLow | Opcode::SwidenHigh | Opcode::UwidenLow | Opcode::UwidenHigh => {
let lane_type = ty.unwrap().lane_type();
let rd = get_output_reg(ctx, outputs[0]);
let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None);
let (t, high_half) = match (lane_type, op) {
(I16, Opcode::SwidenLow) => (VecExtendOp::Sxtl8, false),
(I16, Opcode::SwidenHigh) => (VecExtendOp::Sxtl8, true),
(I16, Opcode::UwidenLow) => (VecExtendOp::Uxtl8, false),
(I16, Opcode::UwidenHigh) => (VecExtendOp::Uxtl8, true),
(I32, Opcode::SwidenLow) => (VecExtendOp::Sxtl16, false),
(I32, Opcode::SwidenHigh) => (VecExtendOp::Sxtl16, true),
(I32, Opcode::UwidenLow) => (VecExtendOp::Uxtl16, false),
(I32, Opcode::UwidenHigh) => (VecExtendOp::Uxtl16, true),
_ => {
return Err(CodegenError::Unsupported(format!(
"Unsupported SIMD vector lane type: {:?}",
lane_type
)));
}
};
ctx.emit(Inst::VecExtend {
t,
rd,
rn,
high_half,
});
}
Opcode::TlsValue => unimplemented!(), Opcode::TlsValue => unimplemented!(),
} }

Loading…
Cancel
Save