Browse Source

cranelift(aarch64): Add single precision fmov (#8453)

This commit is a follow up to https://github.com/bytecodealliance/wasmtime/pull/8365/files#r1565962730, to enable emission of  32-bit fmov from Winch. 

I opted to introduce a new instruction over refactoring the existing `FpuMov64` to be more generic to keep things simple,  but I'm definitely open to exploring a refactoring if that's preferred. 

Encoding reference: https://developer.arm.com/documentation/ddi0602/2024-03/SIMD-FP-Instructions/FMOV--register---Floating-point-Move-register-without-conversion-?lang=en
pull/8455/head
Saúl Cabrera 7 months ago
committed by GitHub
parent
commit
bfb759d7ae
No known key found for this signature in database GPG Key ID: B5690EEEBB952194
  1. 5
      cranelift/codegen/src/isa/aarch64/inst.isle
  2. 5
      cranelift/codegen/src/isa/aarch64/inst/emit.rs
  3. 9
      cranelift/codegen/src/isa/aarch64/inst/emit_tests.rs
  4. 9
      cranelift/codegen/src/isa/aarch64/inst/mod.rs

5
cranelift/codegen/src/isa/aarch64/inst.isle

@ -331,6 +331,11 @@
;; Consumption of speculative data barrier.
(Csdb)
;; FPU 32-bit move.
(FpuMove32
(rd WritableReg)
(rn Reg))
;; FPU move. Note that this is distinct from a vector-register
;; move; moving just 64 bits seems to be significantly faster.
(FpuMove64

5
cranelift/codegen/src/isa/aarch64/inst/emit.rs

@ -1831,6 +1831,11 @@ impl MachInstEmit for Inst {
&Inst::Csdb {} => {
sink.put4(0xd503229f);
}
&Inst::FpuMove32 { rd, rn } => {
let rd = allocs.next_writable(rd);
let rn = allocs.next(rn);
sink.put4(enc_fpurr(0b000_11110_00_1_000000_10000, rd, rn));
}
&Inst::FpuMove64 { rd, rn } => {
let rd = allocs.next_writable(rd);
let rn = allocs.next(rn);

9
cranelift/codegen/src/isa/aarch64/inst/emit_tests.rs

@ -6135,6 +6135,15 @@ fn test_aarch64_binemit() {
"fmov d8, d4",
));
insns.push((
Inst::FpuMove32 {
rd: writable_vreg(8),
rn: vreg(4),
},
"8840201E",
"fmov s8, s4",
));
insns.push((
Inst::FpuMove128 {
rd: writable_vreg(17),

9
cranelift/codegen/src/isa/aarch64/inst/mod.rs

@ -595,6 +595,10 @@ fn aarch64_get_operands<F: Fn(VReg) -> VReg>(inst: &Inst, collector: &mut Operan
collector.reg_use(rt);
}
&Inst::Fence {} | &Inst::Csdb {} => {}
&Inst::FpuMove32 { rd, rn } => {
collector.reg_def(rd);
collector.reg_use(rn);
}
&Inst::FpuMove64 { rd, rn } => {
collector.reg_def(rd);
collector.reg_use(rn);
@ -1718,6 +1722,11 @@ impl Inst {
&Inst::Csdb {} => {
format!("csdb")
}
&Inst::FpuMove32 { rd, rn } => {
let rd = pretty_print_vreg_scalar(rd.to_reg(), ScalarSize::Size32, allocs);
let rn = pretty_print_vreg_scalar(rn, ScalarSize::Size32, allocs);
format!("fmov {}, {}", rd, rn)
}
&Inst::FpuMove64 { rd, rn } => {
let rd = pretty_print_vreg_scalar(rd.to_reg(), ScalarSize::Size64, allocs);
let rn = pretty_print_vreg_scalar(rn, ScalarSize::Size64, allocs);

Loading…
Cancel
Save