Browse Source

Implement vector element extensions for AArch64

This commit also includes load and extend operations. Both are
prerequisites for enabling further SIMD spec tests.

Copyright (c) 2020, Arm Limited.
pull/1802/head
Anton Kirilov 5 years ago
parent
commit
51a551fb39
  1. 2
      build.rs
  2. 2
      cranelift/codegen/src/isa/aarch64/abi.rs
  3. 17
      cranelift/codegen/src/isa/aarch64/inst/emit.rs
  4. 54
      cranelift/codegen/src/isa/aarch64/inst/emit_tests.rs
  5. 54
      cranelift/codegen/src/isa/aarch64/inst/mod.rs
  6. 6
      cranelift/codegen/src/isa/aarch64/inst/regs.rs
  7. 5
      cranelift/codegen/src/isa/aarch64/lower.rs
  8. 37
      cranelift/codegen/src/isa/aarch64/lower_inst.rs

2
build.rs

@ -180,8 +180,8 @@ fn ignore(testsuite: &str, testname: &str, strategy: &str) -> bool {
_ => (),
},
"Cranelift" => match (testsuite, testname) {
("simd", "simd_store") => return false,
("simd", "simd_i8x16_cmp") => return false,
("simd", "simd_store") => return false,
// Most simd tests are known to fail on aarch64 for now, it's going
// to be a big chunk of work to implement them all there!
("simd", _) if target.contains("aarch64") => return true,

2
cranelift/codegen/src/isa/aarch64/abi.rs

@ -406,7 +406,7 @@ fn in_int_reg(ty: ir::Type) -> bool {
fn in_vec_reg(ty: ir::Type) -> bool {
match ty {
types::F32 | types::F64 | types::I8X16 => true,
types::F32 | types::F64 | types::I8X16 | types::I16X8 | types::I32X4 | types::I64X2 => true,
_ => false,
}
}

17
cranelift/codegen/src/isa/aarch64/inst/emit.rs

@ -1149,6 +1149,23 @@ impl MachInstEmit for Inst {
| machreg_to_gpr(rd.to_reg()),
);
}
&Inst::VecExtend { t, rd, rn } => {
let (u, immh) = match t {
VecExtendOp::Sxtl8 => (0b0, 0b001),
VecExtendOp::Sxtl16 => (0b0, 0b010),
VecExtendOp::Sxtl32 => (0b0, 0b100),
VecExtendOp::Uxtl8 => (0b1, 0b001),
VecExtendOp::Uxtl16 => (0b1, 0b010),
VecExtendOp::Uxtl32 => (0b1, 0b100),
};
sink.put4(
0b000_011110_0000_000_101001_00000_00000
| (u << 29)
| (immh << 19)
| (machreg_to_vec(rn) << 5)
| machreg_to_vec(rd.to_reg()),
);
}
&Inst::VecRRR {
rd,
rn,

54
cranelift/codegen/src/isa/aarch64/inst/emit_tests.rs

@ -1826,6 +1826,60 @@ fn test_aarch64_binemit() {
"E5979F9A",
"cset x5, hi",
));
insns.push((
Inst::VecExtend {
t: VecExtendOp::Sxtl8,
rd: writable_vreg(4),
rn: vreg(27),
},
"64A7080F",
"sxtl v4.8h, v27.8b",
));
insns.push((
Inst::VecExtend {
t: VecExtendOp::Sxtl16,
rd: writable_vreg(17),
rn: vreg(19),
},
"71A6100F",
"sxtl v17.4s, v19.4h",
));
insns.push((
Inst::VecExtend {
t: VecExtendOp::Sxtl32,
rd: writable_vreg(30),
rn: vreg(6),
},
"DEA4200F",
"sxtl v30.2d, v6.2s",
));
insns.push((
Inst::VecExtend {
t: VecExtendOp::Uxtl8,
rd: writable_vreg(3),
rn: vreg(29),
},
"A3A7082F",
"uxtl v3.8h, v29.8b",
));
insns.push((
Inst::VecExtend {
t: VecExtendOp::Uxtl16,
rd: writable_vreg(15),
rn: vreg(12),
},
"8FA5102F",
"uxtl v15.4s, v12.4h",
));
insns.push((
Inst::VecExtend {
t: VecExtendOp::Uxtl32,
rd: writable_vreg(28),
rn: vreg(2),
},
"5CA4202F",
"uxtl v28.2d, v2.2s",
));
insns.push((
Inst::VecRRR {
rd: writable_vreg(21),

54
cranelift/codegen/src/isa/aarch64/inst/mod.rs

@ -5,7 +5,8 @@
use crate::binemit::CodeOffset;
use crate::ir::types::{
B1, B16, B32, B64, B8, B8X16, F32, F32X2, F64, FFLAGS, I128, I16, I32, I64, I8, I8X16, IFLAGS,
B1, B16, B32, B64, B8, B8X16, F32, F32X2, F64, FFLAGS, I128, I16, I16X4, I16X8, I32, I32X2,
I32X4, I64, I64X2, I8, I8X16, I8X8, IFLAGS,
};
use crate::ir::{ExternalName, Opcode, SourceLoc, TrapCode, Type};
use crate::machinst::*;
@ -186,6 +187,23 @@ pub enum FpuRoundMode {
Nearest64,
}
/// Type of vector element extensions.
#[derive(Copy, Clone, Debug, PartialEq, Eq, Hash)]
pub enum VecExtendOp {
/// Signed extension of 8-bit elements
Sxtl8,
/// Signed extension of 16-bit elements
Sxtl16,
/// Signed extension of 32-bit elements
Sxtl32,
/// Unsigned extension of 8-bit elements
Uxtl8,
/// Unsigned extension of 16-bit elements
Uxtl16,
/// Unsigned extension of 32-bit elements
Uxtl32,
}
/// A vector ALU operation.
#[derive(Copy, Clone, Debug, PartialEq, Eq, Hash)]
pub enum VecALUOp {
@ -667,6 +685,13 @@ pub enum Inst {
rn: Reg,
},
/// Vector extend.
VecExtend {
t: VecExtendOp,
rd: Writable<Reg>,
rn: Reg,
},
/// A vector ALU op.
VecRRR {
alu_op: VecALUOp,
@ -1208,6 +1233,10 @@ fn aarch64_get_regs(inst: &Inst, collector: &mut RegUsageCollector) {
collector.add_def(rd);
collector.add_use(rn);
}
&Inst::VecExtend { rd, rn, .. } => {
collector.add_def(rd);
collector.add_use(rn);
}
&Inst::VecRRR { rd, rn, rm, .. } => {
collector.add_def(rd);
collector.add_use(rn);
@ -1752,6 +1781,14 @@ fn aarch64_map_regs<RUM: RegUsageMapper>(inst: &mut Inst, mapper: &RUM) {
map_def(mapper, rd);
map_use(mapper, rn);
}
&mut Inst::VecExtend {
ref mut rd,
ref mut rn,
..
} => {
map_def(mapper, rd);
map_use(mapper, rn);
}
&mut Inst::VecRRR {
ref mut rd,
ref mut rn,
@ -1940,7 +1977,7 @@ impl MachInst for Inst {
I8 | I16 | I32 | I64 | B1 | B8 | B16 | B32 | B64 => Ok(RegClass::I64),
F32 | F64 => Ok(RegClass::V128),
IFLAGS | FFLAGS => Ok(RegClass::I64),
I8X16 => Ok(RegClass::V128),
I8X16 | I16X8 | I32X4 | I64X2 => Ok(RegClass::V128),
B8X16 => Ok(RegClass::V128),
_ => Err(CodegenError::Unsupported(format!(
"Unexpected SSA-value type: {}",
@ -2515,6 +2552,19 @@ impl ShowWithRRU for Inst {
let rn = rn.show_rru(mb_rru);
format!("mov {}, {}.d[0]", rd, rn)
}
&Inst::VecExtend { t, rd, rn } => {
let (op, dest, src) = match t {
VecExtendOp::Sxtl8 => ("sxtl", I16X8, I8X8),
VecExtendOp::Sxtl16 => ("sxtl", I32X4, I16X4),
VecExtendOp::Sxtl32 => ("sxtl", I64X2, I32X2),
VecExtendOp::Uxtl8 => ("uxtl", I16X8, I8X8),
VecExtendOp::Uxtl16 => ("uxtl", I32X4, I16X4),
VecExtendOp::Uxtl32 => ("uxtl", I64X2, I32X2),
};
let rd = show_vreg_vector(rd.to_reg(), mb_rru, dest);
let rn = show_vreg_vector(rn, mb_rru, src);
format!("{} {}, {}", op, rd, rn)
}
&Inst::VecRRR {
rd,
rn,

6
cranelift/codegen/src/isa/aarch64/inst/regs.rs

@ -321,6 +321,12 @@ pub fn show_vreg_vector(reg: Reg, mb_rru: Option<&RealRegUniverse>, ty: Type) ->
match ty {
I8X16 => s.push_str(".16b"),
F32X2 => s.push_str(".2s"),
I8X8 => s.push_str(".8b"),
I16X4 => s.push_str(".4h"),
I16X8 => s.push_str(".8h"),
I32X2 => s.push_str(".2s"),
I32X4 => s.push_str(".4s"),
I64X2 => s.push_str(".2d"),
_ => unimplemented!(),
}

5
cranelift/codegen/src/isa/aarch64/lower.rs

@ -716,7 +716,8 @@ pub fn ty_bits(ty: Type) -> usize {
B64 | I64 | F64 => 64,
B128 | I128 => 128,
IFLAGS | FFLAGS => 32,
I8X16 | B8X16 => 128,
I8X8 | I16X4 | I32X2 => 64,
B8X16 | I8X16 | I16X8 | I32X4 | I64X2 => 128,
_ => panic!("ty_bits() on unknown type: {:?}", ty),
}
}
@ -724,7 +725,7 @@ pub fn ty_bits(ty: Type) -> usize {
pub(crate) fn ty_is_int(ty: Type) -> bool {
match ty {
B1 | B8 | I8 | B16 | I16 | B32 | I32 | B64 | I64 => true,
F32 | F64 | B128 | I128 | I8X16 => false,
F32 | F64 | B128 | I128 | I8X8 | I8X16 | I16X4 | I16X8 | I32X2 | I32X4 | I64X2 => false,
IFLAGS | FFLAGS => panic!("Unexpected flags type"),
_ => panic!("ty_is_int() on unknown type: {:?}", ty),
}

37
cranelift/codegen/src/isa/aarch64/lower_inst.rs

@ -829,7 +829,13 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
| Opcode::Uload16Complex
| Opcode::Sload16Complex
| Opcode::Uload32Complex
| Opcode::Sload32Complex => {
| Opcode::Sload32Complex
| Opcode::Sload8x8
| Opcode::Uload8x8
| Opcode::Sload16x4
| Opcode::Uload16x4
| Opcode::Sload32x2
| Opcode::Uload32x2 => {
let off = ldst_offset(ctx.data(insn)).unwrap();
let elem_ty = match op {
Opcode::Sload8 | Opcode::Uload8 | Opcode::Sload8Complex | Opcode::Uload8Complex => {
@ -844,6 +850,9 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
| Opcode::Sload32Complex
| Opcode::Uload32Complex => I32,
Opcode::Load | Opcode::LoadComplex => ctx.output_ty(insn, 0),
Opcode::Sload8x8 | Opcode::Uload8x8 => I8X8,
Opcode::Sload16x4 | Opcode::Uload16x4 => I16X4,
Opcode::Sload32x2 | Opcode::Uload32x2 => I32X2,
_ => unreachable!(),
};
let sign_extend = match op {
@ -877,10 +886,30 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
(32, true, false) => Inst::SLoad32 { rd, mem, srcloc },
(32, _, true) => Inst::FpuLoad32 { rd, mem, srcloc },
(64, _, false) => Inst::ULoad64 { rd, mem, srcloc },
// Note that we treat some of the vector loads as scalar floating-point loads,
// which is correct in a little endian environment.
(64, _, true) => Inst::FpuLoad64 { rd, mem, srcloc },
(128, _, _) => Inst::FpuLoad128 { rd, mem, srcloc },
_ => panic!("Unsupported size in load"),
});
let vec_extend = match op {
Opcode::Sload8x8 => Some(VecExtendOp::Sxtl8),
Opcode::Uload8x8 => Some(VecExtendOp::Uxtl8),
Opcode::Sload16x4 => Some(VecExtendOp::Sxtl16),
Opcode::Uload16x4 => Some(VecExtendOp::Uxtl16),
Opcode::Sload32x2 => Some(VecExtendOp::Sxtl32),
Opcode::Uload32x2 => Some(VecExtendOp::Uxtl32),
_ => None,
};
if let Some(t) = vec_extend {
ctx.emit(Inst::VecExtend {
t,
rd,
rn: rd.to_reg(),
});
}
}
Opcode::Store
@ -1433,17 +1462,11 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
| Opcode::Extractlane
| Opcode::ScalarToVector
| Opcode::Swizzle
| Opcode::Uload8x8
| Opcode::Uload8x8Complex
| Opcode::Sload8x8
| Opcode::Sload8x8Complex
| Opcode::Uload16x4
| Opcode::Uload16x4Complex
| Opcode::Sload16x4
| Opcode::Sload16x4Complex
| Opcode::Uload32x2
| Opcode::Uload32x2Complex
| Opcode::Sload32x2
| Opcode::Sload32x2Complex => {
// TODO
panic!("Vector ops not implemented.");

Loading…
Cancel
Save