Browse Source

Add initial f16 and f128 support to the aarch64 backend (#9076)

pull/9138/head
beetrees 3 months ago
committed by GitHub
parent
commit
3f5c21bff4
No known key found for this signature in database GPG Key ID: B5690EEEBB952194
  1. 6
      cranelift/codegen/meta/src/isa/arm64.rs
  2. 9
      cranelift/codegen/src/isa/aarch64/abi.rs
  3. 80
      cranelift/codegen/src/isa/aarch64/inst.isle
  4. 15
      cranelift/codegen/src/isa/aarch64/inst/emit.rs
  5. 37
      cranelift/codegen/src/isa/aarch64/inst/emit_tests.rs
  6. 57
      cranelift/codegen/src/isa/aarch64/inst/imms.rs
  7. 55
      cranelift/codegen/src/isa/aarch64/inst/mod.rs
  8. 34
      cranelift/codegen/src/isa/aarch64/lower.isle
  9. 4
      cranelift/codegen/src/isa/aarch64/lower/isle.rs
  10. 6
      cranelift/codegen/src/isa/aarch64/pcc.rs
  11. 4
      cranelift/codegen/src/isa/aarch64/settings.rs
  12. 9
      cranelift/codegen/src/isle_prelude.rs
  13. 7
      cranelift/codegen/src/prelude.isle
  14. 35
      cranelift/filetests/filetests/isa/aarch64/bitcast-fp16.clif
  15. 112
      cranelift/filetests/filetests/isa/aarch64/bitcast.clif
  16. 61
      cranelift/filetests/filetests/isa/aarch64/call.clif
  17. 70
      cranelift/filetests/filetests/isa/aarch64/constants-fp16.clif
  18. 152
      cranelift/filetests/filetests/isa/aarch64/constants.clif
  19. 36
      cranelift/filetests/filetests/isa/aarch64/load-f16-f128.clif
  20. 21
      cranelift/filetests/filetests/isa/aarch64/select-fp16.clif
  21. 75
      cranelift/filetests/filetests/isa/aarch64/select.clif
  22. 36
      cranelift/filetests/filetests/isa/aarch64/store-f16-f128.clif
  23. 2
      cranelift/filetests/filetests/runtests/bitcast-f16-f128.clif
  24. 1
      cranelift/filetests/filetests/runtests/f128const.clif
  25. 2
      cranelift/filetests/filetests/runtests/f16const.clif
  26. 2
      cranelift/filetests/filetests/runtests/select-f16-f128.clif
  27. 4
      cranelift/native/src/lib.rs
  28. 1
      crates/fuzzing/src/generators/codegen_settings.rs
  29. 1
      crates/wasmtime/src/config.rs
  30. 1
      crates/wasmtime/src/engine.rs
  31. 1
      src/commands/compile.rs

6
cranelift/codegen/meta/src/isa/arm64.rs

@ -18,6 +18,12 @@ pub(crate) fn define() -> TargetIsa {
"",
false,
);
settings.add_bool(
"has_fp16",
"Use half-precision floating point (FEAT_FP16) instructions.",
"",
false,
);
settings.add_bool(
"sign_return_address_all",
"If function return address signing is enabled, then apply it to all \

9
cranelift/codegen/src/isa/aarch64/abi.rs

@ -102,7 +102,7 @@ impl ABIMachineSpec for AArch64MachineDeps {
fn compute_arg_locs(
call_conv: isa::CallConv,
_flags: &settings::Flags,
flags: &settings::Flags,
params: &[ir::AbiParam],
args_or_rets: ArgsOrRets,
add_ret_area_ptr: bool,
@ -161,6 +161,13 @@ impl ABIMachineSpec for AArch64MachineDeps {
param.value_type
);
if is_apple_cc && param.value_type == types::F128 && !flags.enable_llvm_abi_extensions()
{
panic!(
"f128 args/return values not supported for apple_aarch64 unless LLVM ABI extensions are enabled"
);
}
let (rcs, reg_types) = Inst::rc_for_type(param.value_type)?;
if matches!(

80
cranelift/codegen/src/isa/aarch64/inst.isle

@ -407,6 +407,18 @@
(rn Reg)
(rm Reg))
;; Floating-point load, half-precision (16 bit).
(FpuLoad16
(rd WritableReg)
(mem AMode)
(flags MemFlags))
;; Floating-point store, half-precision (16 bit).
(FpuStore16
(rd Reg)
(mem AMode)
(flags MemFlags))
;; Floating-point load, single-precision (32 bit).
(FpuLoad32
(rd WritableReg)
@ -483,6 +495,14 @@
(rd WritableReg)
(rn Reg))
;; FP conditional select, 16 bit.
;; Requires FEAT_FP16.
(FpuCSel16
(rd WritableReg)
(rn Reg)
(rm Reg)
(cond Cond))
;; FP conditional select, 32 bit.
(FpuCSel32
(rd WritableReg)
@ -504,8 +524,8 @@
(rn Reg))
;; Move from a GPR to a vector register. The scalar value is parked in the lowest lane
;; of the destination, and all other lanes are zeroed out. Currently only 32- and 64-bit
;; transactions are supported.
;; of the destination, and all other lanes are zeroed out. Currently 16-, 32- and 64-bit
;; transactions are supported. 16-bit moves require FEAT_FP16.
(MovToFpu
(rd WritableReg)
(rn Reg)
@ -1701,6 +1721,9 @@
(decl use_lse () Inst)
(extern extractor use_lse use_lse)
(decl pure use_fp16 () bool)
(extern constructor use_fp16 use_fp16)
;; Extractor helpers for various immediate constants ;;;;;;;;;;;;;;;;;;;;;;;;;;
(decl pure partial move_wide_const_from_u64 (Type u64) MoveWideConst)
@ -2221,9 +2244,19 @@
(_ Unit (emit (MInst.VecRRLong op dst src high_half))))
dst))
;; Helper for emitting `MInst.FpuCSel32` / `MInst.FpuCSel64`
;; Helper for emitting `MInst.FpuCSel16` / `MInst.FpuCSel32` / `MInst.FpuCSel64`
;; instructions.
(decl fpu_csel (Type Cond Reg Reg) ConsumesFlags)
(rule (fpu_csel $F16 cond if_true if_false)
(fpu_csel $F32 cond if_true if_false))
(rule 1 (fpu_csel $F16 cond if_true if_false)
(if-let $true (use_fp16))
(let ((dst WritableReg (temp_writable_reg $F16)))
(ConsumesFlags.ConsumesFlagsReturnsReg
(MInst.FpuCSel16 dst if_true if_false cond)
dst)))
(rule (fpu_csel $F32 cond if_true if_false)
(let ((dst WritableReg (temp_writable_reg $F32)))
(ConsumesFlags.ConsumesFlagsReturnsReg
@ -2268,6 +2301,9 @@
(let ((dst WritableReg (temp_writable_reg $I8X16))
(_ Unit (emit (MInst.MovToFpu dst x size))))
dst))
(rule 1 (mov_to_fpu x (ScalarSize.Size16))
(if-let $false (use_fp16))
(mov_to_fpu x (ScalarSize.Size32)))
;; Helper for emitting `MInst.FpuMoveFPImm` instructions.
(decl fpu_move_fp_imm (ASIMDFPModImm ScalarSize) Reg)
@ -2849,6 +2885,11 @@
(let ((dst WritableReg (temp_writable_reg $I64))
(_ Unit (emit (MInst.ULoad64 dst amode flags))))
dst))
(decl aarch64_fpuload16 (AMode MemFlags) Reg)
(rule (aarch64_fpuload16 amode flags)
(let ((dst WritableReg (temp_writable_reg $F64))
(_ Unit (emit (MInst.FpuLoad16 dst amode flags))))
dst))
(decl aarch64_fpuload32 (AMode MemFlags) Reg)
(rule (aarch64_fpuload32 amode flags)
(let ((dst WritableReg (temp_writable_reg $F64))
@ -2885,6 +2926,9 @@
(decl aarch64_store64 (AMode MemFlags Reg) SideEffectNoResult)
(rule (aarch64_store64 amode flags val)
(SideEffectNoResult.Inst (MInst.Store64 val amode flags)))
(decl aarch64_fpustore16 (AMode MemFlags Reg) SideEffectNoResult)
(rule (aarch64_fpustore16 amode flags val)
(SideEffectNoResult.Inst (MInst.FpuStore16 val amode flags)))
(decl aarch64_fpustore32 (AMode MemFlags Reg) SideEffectNoResult)
(rule (aarch64_fpustore32 amode flags val)
(SideEffectNoResult.Inst (MInst.FpuStore32 val amode flags)))
@ -3229,19 +3273,41 @@
(rule 1 (add_imm_to_addr val (imm12_from_u64 imm)) (add_imm $I64 val imm))
(rule 0 (add_imm_to_addr val offset) (add $I64 val (imm $I64 (ImmExtend.Zero) offset)))
;; Lower a constant f16.
;;
;; Note that we must make sure that all bits outside the lowest 16 are set to 0
;; because this function is also used to load wider constants (that have zeros
;; in their most significant bits).
(decl constant_f16 (u16) Reg)
(rule 3 (constant_f16 n)
(if-let $false (use_fp16))
(constant_f32 n))
(rule 2 (constant_f16 0)
(vec_dup_imm (asimd_mov_mod_imm_zero (ScalarSize.Size32))
$false
(VectorSize.Size32x2)))
(rule 1 (constant_f16 n)
(if-let imm (asimd_fp_mod_imm_from_u64 n (ScalarSize.Size16)))
(fpu_move_fp_imm imm (ScalarSize.Size16)))
(rule (constant_f16 n)
(mov_to_fpu (imm $I16 (ImmExtend.Zero) n) (ScalarSize.Size16)))
;; Lower a constant f32.
;;
;; Note that we must make sure that all bits outside the lowest 32 are set to 0
;; because this function is also used to load wider constants (that have zeros
;; in their most significant bits).
(decl constant_f32 (u32) Reg)
(rule 2 (constant_f32 0)
(rule 3 (constant_f32 0)
(vec_dup_imm (asimd_mov_mod_imm_zero (ScalarSize.Size32))
$false
(VectorSize.Size32x2)))
(rule 1 (constant_f32 n)
(rule 2 (constant_f32 n)
(if-let imm (asimd_fp_mod_imm_from_u64 n (ScalarSize.Size32)))
(fpu_move_fp_imm imm (ScalarSize.Size32)))
(rule 1 (constant_f32 (u32_as_u16 n))
(if-let $true (use_fp16))
(constant_f16 n))
(rule (constant_f32 n)
(mov_to_fpu (imm $I32 (ImmExtend.Zero) n) (ScalarSize.Size32)))
@ -4063,8 +4129,10 @@
;; Helpers for generating select instruction sequences.
(decl lower_select (ProducesFlags Cond Type Value Value) ValueRegs)
(rule 2 (lower_select flags cond (ty_scalar_float ty) rn rm)
(rule 2 (lower_select flags cond (ty_scalar_float (fits_in_64 ty)) rn rm)
(with_flags flags (fpu_csel ty cond rn rm)))
(rule 4 (lower_select flags cond $F128 rn rm)
(with_flags flags (vec_csel cond rn rm)))
(rule 3 (lower_select flags cond (ty_vec128 ty) rn rm)
(with_flags flags (vec_csel cond rn rm)))
(rule (lower_select flags cond ty rn rm)

15
cranelift/codegen/src/isa/aarch64/inst/emit.rs

@ -958,6 +958,7 @@ impl MachInstEmit for Inst {
| &Inst::ULoad64 {
rd, ref mem, flags, ..
}
| &Inst::FpuLoad16 { rd, ref mem, flags }
| &Inst::FpuLoad32 { rd, ref mem, flags }
| &Inst::FpuLoad64 { rd, ref mem, flags }
| &Inst::FpuLoad128 { rd, ref mem, flags } => {
@ -983,6 +984,7 @@ impl MachInstEmit for Inst {
Inst::ULoad32 { .. } => 0b1011100001,
Inst::SLoad32 { .. } => 0b1011100010,
Inst::ULoad64 { .. } => 0b1111100001,
Inst::FpuLoad16 { .. } => 0b0111110001,
Inst::FpuLoad32 { .. } => 0b1011110001,
Inst::FpuLoad64 { .. } => 0b1111110001,
Inst::FpuLoad128 { .. } => 0b0011110011,
@ -1098,6 +1100,7 @@ impl MachInstEmit for Inst {
| &Inst::Store16 { rd, ref mem, flags }
| &Inst::Store32 { rd, ref mem, flags }
| &Inst::Store64 { rd, ref mem, flags }
| &Inst::FpuStore16 { rd, ref mem, flags }
| &Inst::FpuStore32 { rd, ref mem, flags }
| &Inst::FpuStore64 { rd, ref mem, flags }
| &Inst::FpuStore128 { rd, ref mem, flags } => {
@ -1114,6 +1117,7 @@ impl MachInstEmit for Inst {
Inst::Store16 { .. } => 0b0111100000,
Inst::Store32 { .. } => 0b1011100000,
Inst::Store64 { .. } => 0b1111100000,
Inst::FpuStore16 { .. } => 0b0111110000,
Inst::FpuStore32 { .. } => 0b1011110000,
Inst::FpuStore64 { .. } => 0b1111110000,
Inst::FpuStore128 { .. } => 0b0011110010,
@ -2213,6 +2217,9 @@ impl MachInstEmit for Inst {
};
sink.put4(enc_inttofpu(top16, rd, rn));
}
&Inst::FpuCSel16 { rd, rn, rm, cond } => {
sink.put4(enc_fcsel(rd, rn, rm, cond, ScalarSize::Size16));
}
&Inst::FpuCSel32 { rd, rn, rm, cond } => {
sink.put4(enc_fcsel(rd, rn, rm, cond, ScalarSize::Size32));
}
@ -2234,6 +2241,7 @@ impl MachInstEmit for Inst {
}
&Inst::MovToFpu { rd, rn, size } => {
let template = match size {
ScalarSize::Size16 => 0b000_11110_11_1_00_111_000000_00000_00000,
ScalarSize::Size32 => 0b000_11110_00_1_00_111_000000_00000_00000,
ScalarSize::Size64 => 0b100_11110_01_1_00_111_000000_00000_00000,
_ => unreachable!(),
@ -2241,14 +2249,9 @@ impl MachInstEmit for Inst {
sink.put4(template | (machreg_to_gpr(rn) << 5) | machreg_to_vec(rd.to_reg()));
}
&Inst::FpuMoveFPImm { rd, imm, size } => {
let size_code = match size {
ScalarSize::Size32 => 0b00,
ScalarSize::Size64 => 0b01,
_ => unimplemented!(),
};
sink.put4(
0b000_11110_00_1_00_000_000100_00000_00000
| size_code << 22
| size.ftype() << 22
| ((imm.enc_bits() as u32) << 13)
| machreg_to_vec(rd.to_reg()),
);

37
cranelift/codegen/src/isa/aarch64/inst/emit_tests.rs

@ -6699,6 +6699,19 @@ fn test_aarch64_binemit() {
"fcmp d23, d24",
));
insns.push((
Inst::FpuLoad16 {
rd: writable_vreg(16),
mem: AMode::RegScaled {
rn: xreg(8),
rm: xreg(9),
},
flags: MemFlags::trusted(),
},
"1079697C",
"ldr h16, [x8, x9, LSL #1]",
));
insns.push((
Inst::FpuLoad32 {
rd: writable_vreg(16),
@ -6774,6 +6787,19 @@ fn test_aarch64_binemit() {
"ldr q16, pc+8",
));
insns.push((
Inst::FpuStore16 {
rd: vreg(16),
mem: AMode::RegScaled {
rn: xreg(8),
rm: xreg(9),
},
flags: MemFlags::trusted(),
},
"1079297C",
"str h16, [x8, x9, LSL #1]",
));
insns.push((
Inst::FpuStore32 {
rd: vreg(16),
@ -6973,6 +6999,17 @@ fn test_aarch64_binemit() {
"stp q18, q22, [sp], #304",
));
insns.push((
Inst::FpuCSel16 {
rd: writable_vreg(1),
rn: vreg(2),
rm: vreg(3),
cond: Cond::Hi,
},
"418CE31E",
"fcsel h1, h2, h3, hi",
));
insns.push((
Inst::FpuCSel32 {
rd: writable_vreg(1),

57
cranelift/codegen/src/isa/aarch64/inst/imms.rs

@ -750,7 +750,7 @@ impl ASIMDMovModImm {
#[derive(Clone, Copy, Debug, PartialEq)]
pub struct ASIMDFPModImm {
imm: u8,
is_64bit: bool,
size: ScalarSize,
}
impl ASIMDFPModImm {
@ -759,6 +759,21 @@ impl ASIMDFPModImm {
// In all cases immediates are encoded as an 8-bit number 0b_abcdefgh;
// let `D` be the inverse of the digit `d`.
match size {
ScalarSize::Size16 => {
// In this case the representable immediates are 16-bit numbers of the form
// 0b_aBbb_cdef_gh00_0000.
let value = value as u16;
let b0_5 = (value >> 6) & 0b111111;
let b6 = (value >> 6) & (1 << 6);
let b7 = (value >> 8) & (1 << 7);
let imm = (b0_5 | b6 | b7) as u8;
if value == Self::value16(imm) {
Some(ASIMDFPModImm { imm, size })
} else {
None
}
}
ScalarSize::Size32 => {
// In this case the representable immediates are 32-bit numbers of the form
// 0b_aBbb_bbbc_defg_h000 shifted to the left by 16.
@ -769,10 +784,7 @@ impl ASIMDFPModImm {
let imm = (b0_5 | b6 | b7) as u8;
if value == Self::value32(imm) {
Some(ASIMDFPModImm {
imm,
is_64bit: false,
})
Some(ASIMDFPModImm { imm, size })
} else {
None
}
@ -786,10 +798,7 @@ impl ASIMDFPModImm {
let imm = (b0_5 | b6 | b7) as u8;
if value == Self::value64(imm) {
Some(ASIMDFPModImm {
imm,
is_64bit: true,
})
Some(ASIMDFPModImm { imm, size })
} else {
None
}
@ -803,6 +812,17 @@ impl ASIMDFPModImm {
self.imm
}
/// Returns the 16-bit value that corresponds to an 8-bit encoding.
fn value16(imm: u8) -> u16 {
let imm = imm as u16;
let b0_5 = imm & 0b111111;
let b6 = (imm >> 6) & 1;
let b6_inv = b6 ^ 1;
let b7 = (imm >> 7) & 1;
b0_5 << 6 | (b6 * 0b11) << 12 | b6_inv << 14 | b7 << 15
}
/// Returns the 32-bit value that corresponds to an 8-bit encoding.
fn value32(imm: u8) -> u32 {
let imm = imm as u32;
@ -931,10 +951,21 @@ impl PrettyPrint for ASIMDMovModImm {
impl PrettyPrint for ASIMDFPModImm {
fn pretty_print(&self, _: u8) -> String {
if self.is_64bit {
format!("#{}", f64::from_bits(Self::value64(self.imm)))
} else {
format!("#{}", f32::from_bits(Self::value32(self.imm)))
match self.size {
ScalarSize::Size16 => {
// FIXME(#8312): Use `f16` once it is stable.
// `value` will always be a normal number. Convert it to a `f32`.
let value: u32 = Self::value16(self.imm).into();
let sign = (value & 0x8000) << 16;
// Adjust the exponent for the difference between the `f16` exponent bias and the
// `f32` exponent bias.
let exponent = ((value & 0x7c00) + ((127 - 15) << 10)) << 13;
let significand = (value & 0x3ff) << 13;
format!("#{}", f32::from_bits(sign | exponent | significand))
}
ScalarSize::Size32 => format!("#{}", f32::from_bits(Self::value32(self.imm))),
ScalarSize::Size64 => format!("#{}", f64::from_bits(Self::value64(self.imm))),
_ => unreachable!(),
}
}
}

55
cranelift/codegen/src/isa/aarch64/inst/mod.rs

@ -1,7 +1,7 @@
//! This module defines aarch64-specific machine instruction types.
use crate::binemit::{Addend, CodeOffset, Reloc};
use crate::ir::types::{F32, F64, I128, I16, I32, I64, I8, I8X16, R32, R64};
use crate::ir::types::{F128, F16, F32, F64, I128, I16, I32, I64, I8, I8X16, R32, R64};
use crate::ir::{types, ExternalName, MemFlags, Type};
use crate::isa::{CallConv, FunctionAlignment};
use crate::machinst::*;
@ -281,6 +281,11 @@ impl Inst {
mem,
flags,
},
F16 => Inst::FpuLoad16 {
rd: into_reg,
mem,
flags,
},
F32 => Inst::FpuLoad32 {
rd: into_reg,
mem,
@ -292,7 +297,7 @@ impl Inst {
flags,
},
_ => {
if ty.is_vector() {
if ty.is_vector() || ty.is_float() {
let bits = ty_bits(ty);
let rd = into_reg;
@ -332,6 +337,11 @@ impl Inst {
mem,
flags,
},
F16 => Inst::FpuStore16 {
rd: from_reg,
mem,
flags,
},
F32 => Inst::FpuStore32 {
rd: from_reg,
mem,
@ -343,7 +353,7 @@ impl Inst {
flags,
},
_ => {
if ty.is_vector() {
if ty.is_vector() || ty.is_float() {
let bits = ty_bits(ty);
let rd = from_reg;
@ -372,6 +382,7 @@ impl Inst {
Inst::ULoad32 { .. } => Some(I32),
Inst::SLoad32 { .. } => Some(I32),
Inst::ULoad64 { .. } => Some(I64),
Inst::FpuLoad16 { .. } => Some(F16),
Inst::FpuLoad32 { .. } => Some(F32),
Inst::FpuLoad64 { .. } => Some(F64),
Inst::FpuLoad128 { .. } => Some(I8X16),
@ -379,6 +390,7 @@ impl Inst {
Inst::Store16 { .. } => Some(I16),
Inst::Store32 { .. } => Some(I32),
Inst::Store64 { .. } => Some(I64),
Inst::FpuStore16 { .. } => Some(F16),
Inst::FpuStore32 { .. } => Some(F32),
Inst::FpuStore64 { .. } => Some(F64),
Inst::FpuStore128 { .. } => Some(I8X16),
@ -697,6 +709,10 @@ fn aarch64_get_operands(inst: &mut Inst, collector: &mut impl OperandVisitor) {
collector.reg_use(rn);
collector.reg_use(rm);
}
Inst::FpuLoad16 { rd, mem, .. } => {
collector.reg_def(rd);
memarg_operands(mem, collector);
}
Inst::FpuLoad32 { rd, mem, .. } => {
collector.reg_def(rd);
memarg_operands(mem, collector);
@ -709,6 +725,10 @@ fn aarch64_get_operands(inst: &mut Inst, collector: &mut impl OperandVisitor) {
collector.reg_def(rd);
memarg_operands(mem, collector);
}
Inst::FpuStore16 { rd, mem, .. } => {
collector.reg_use(rd);
memarg_operands(mem, collector);
}
Inst::FpuStore32 { rd, mem, .. } => {
collector.reg_use(rd);
memarg_operands(mem, collector);
@ -749,7 +769,9 @@ fn aarch64_get_operands(inst: &mut Inst, collector: &mut impl OperandVisitor) {
collector.reg_def(rd);
collector.reg_use(rn);
}
Inst::FpuCSel32 { rd, rn, rm, .. } | Inst::FpuCSel64 { rd, rn, rm, .. } => {
Inst::FpuCSel16 { rd, rn, rm, .. }
| Inst::FpuCSel32 { rd, rn, rm, .. }
| Inst::FpuCSel64 { rd, rn, rm, .. } => {
collector.reg_def(rd);
collector.reg_use(rn);
collector.reg_use(rm);
@ -1060,6 +1082,7 @@ impl MachInst for Inst {
| &Inst::SLoad32 { .. }
| &Inst::ULoad64 { .. }
| &Inst::LoadP64 { .. }
| &Inst::FpuLoad16 { .. }
| &Inst::FpuLoad32 { .. }
| &Inst::FpuLoad64 { .. }
| &Inst::FpuLoad128 { .. }
@ -1070,6 +1093,7 @@ impl MachInst for Inst {
| &Inst::Store32 { .. }
| &Inst::Store64 { .. }
| &Inst::StoreP64 { .. }
| &Inst::FpuStore16 { .. }
| &Inst::FpuStore32 { .. }
| &Inst::FpuStore64 { .. }
| &Inst::FpuStore128 { .. } => true,
@ -1134,8 +1158,10 @@ impl MachInst for Inst {
I64 => Ok((&[RegClass::Int], &[I64])),
R32 => panic!("32-bit reftype pointer should never be seen on AArch64"),
R64 => Ok((&[RegClass::Int], &[R64])),
F16 => Ok((&[RegClass::Float], &[F16])),
F32 => Ok((&[RegClass::Float], &[F32])),
F64 => Ok((&[RegClass::Float], &[F64])),
F128 => Ok((&[RegClass::Float], &[F128])),
I128 => Ok((&[RegClass::Int, RegClass::Int], &[I64, I64])),
_ if ty.is_vector() => {
assert!(ty.bits() <= 128);
@ -1809,6 +1835,13 @@ impl Inst {
let rm = pretty_print_vreg_scalar(rm, size);
format!("fcmp {rn}, {rm}")
}
&Inst::FpuLoad16 { rd, ref mem, .. } => {
let rd = pretty_print_vreg_scalar(rd.to_reg(), ScalarSize::Size16);
let mem = mem.clone();
let access_ty = self.mem_type().unwrap();
let (mem_str, mem) = mem_finalize_for_show(&mem, access_ty, state);
format!("{mem_str}ldr {rd}, {mem}")
}
&Inst::FpuLoad32 { rd, ref mem, .. } => {
let rd = pretty_print_vreg_scalar(rd.to_reg(), ScalarSize::Size32);
let mem = mem.clone();
@ -1831,6 +1864,13 @@ impl Inst {
let (mem_str, mem) = mem_finalize_for_show(&mem, access_ty, state);
format!("{mem_str}ldr {rd}, {mem}")
}
&Inst::FpuStore16 { rd, ref mem, .. } => {
let rd = pretty_print_vreg_scalar(rd, ScalarSize::Size16);
let mem = mem.clone();
let access_ty = self.mem_type().unwrap();
let (mem_str, mem) = mem_finalize_for_show(&mem, access_ty, state);
format!("{mem_str}str {rd}, {mem}")
}
&Inst::FpuStore32 { rd, ref mem, .. } => {
let rd = pretty_print_vreg_scalar(rd, ScalarSize::Size32);
let mem = mem.clone();
@ -1923,6 +1963,13 @@ impl Inst {
let rn = pretty_print_ireg(rn, sizesrc);
format!("{op} {rd}, {rn}")
}
&Inst::FpuCSel16 { rd, rn, rm, cond } => {
let rd = pretty_print_vreg_scalar(rd.to_reg(), ScalarSize::Size16);
let rn = pretty_print_vreg_scalar(rn, ScalarSize::Size16);
let rm = pretty_print_vreg_scalar(rm, ScalarSize::Size16);
let cond = cond.pretty_print(0);
format!("fcsel {rd}, {rn}, {rm}, {cond}")
}
&Inst::FpuCSel32 { rd, rn, rm, cond } => {
let rd = pretty_print_vreg_scalar(rd.to_reg(), ScalarSize::Size32);
let rn = pretty_print_vreg_scalar(rn, ScalarSize::Size32);

34
cranelift/codegen/src/isa/aarch64/lower.isle

@ -24,6 +24,11 @@
(rule (lower (has_type ty (null)))
(imm ty (ImmExtend.Zero) 0))
;;;; Rules for `f16const` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(rule (lower (f16const (u16_from_ieee16 n)))
(constant_f16 n))
;;;; Rules for `f32const` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(rule (lower (f32const (u32_from_ieee32 n)))
@ -34,6 +39,11 @@
(rule (lower (f64const (u64_from_ieee64 n)))
(constant_f64 n))
;;;; Rules for `f128const` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(rule (lower (has_type $F128 (f128const (u128_from_constant n))))
(constant_f128 n))
;;;; Rules for `nop` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(rule (lower (nop))
@ -2329,12 +2339,18 @@
(rule (lower
(has_type $R64 (load flags address offset)))
(aarch64_uload64 (amode $I64 address offset) flags))
(rule (lower
(has_type $F16 (load flags address offset)))
(aarch64_fpuload16 (amode $F16 address offset) flags))
(rule (lower
(has_type $F32 (load flags address offset)))
(aarch64_fpuload32 (amode $F32 address offset) flags))
(rule (lower
(has_type $F64 (load flags address offset)))
(aarch64_fpuload64 (amode $F64 address offset) flags))
(rule (lower
(has_type $F128 (load flags address offset)))
(aarch64_fpuload128 (amode $F128 address offset) flags))
(rule (lower
(has_type $I128 (load flags address offset)))
(aarch64_loadp64 (pair_amode address offset) flags))
@ -2447,6 +2463,10 @@
(side_effect
(aarch64_store32 (amode $I32 address offset) flags value)))
(rule (lower
(store flags value @ (value_type $F16) address offset))
(side_effect
(aarch64_fpustore16 (amode $F16 address offset) flags value)))
(rule (lower
(store flags value @ (value_type $F32) address offset))
(side_effect
@ -2455,6 +2475,10 @@
(store flags value @ (value_type $F64) address offset))
(side_effect
(aarch64_fpustore64 (amode $F64 address offset) flags value)))
(rule (lower
(store flags value @ (value_type $F128) address offset))
(side_effect
(aarch64_fpustore128 (amode $F128 address offset) flags value)))
(rule (lower
(store flags value @ (value_type $I128) address offset))
@ -2491,9 +2515,17 @@
;;; Rules for `bitcast` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
; SIMD&FP <=> SIMD&FP
(rule 5 (lower (has_type (ty_float_or_vec _) (bitcast _ x @ (value_type (ty_float_or_vec _)))))
(rule 7 (lower (has_type (ty_float_or_vec _) (bitcast _ x @ (value_type (ty_float_or_vec _)))))
x)
; I128 => SIMD&FP
(rule 6 (lower (has_type (ty_float_or_vec _) (bitcast _ x @ (value_type $I128))))
(mov_to_vec (mov_to_fpu (value_regs_get x 0) (ScalarSize.Size64)) (value_regs_get x 1) 1 (VectorSize.Size64x2)))
; SIMD&FP => I128
(rule 5 (lower (has_type $I128 (bitcast _ x @ (value_type (ty_float_or_vec _)))))
(value_regs (mov_from_vec x 0 (ScalarSize.Size64)) (mov_from_vec x 1 (ScalarSize.Size64))))
; GPR => SIMD&FP
(rule 4 (lower (has_type (ty_float_or_vec _) (bitcast _ x @ (value_type in_ty))))
(if (ty_int_ref_scalar_64 in_ty))

4
cranelift/codegen/src/isa/aarch64/lower/isle.rs

@ -149,6 +149,10 @@ impl Context for IsleContext<'_, '_, MInst, AArch64Backend> {
}
}
fn use_fp16(&mut self) -> bool {
self.backend.isa_flags.has_fp16()
}
fn move_wide_const_from_u64(&mut self, ty: Type, n: u64) -> Option<MoveWideConst> {
let bits = ty.bits();
let n = if bits < 64 {

6
cranelift/codegen/src/isa/aarch64/pcc.rs

@ -65,7 +65,8 @@ pub(crate) fn check(
let access_ty = inst.mem_type().unwrap();
check_load(ctx, Some(rd.to_reg()), flags, mem, vcode, access_ty)
}
Inst::FpuLoad32 { ref mem, flags, .. }
Inst::FpuLoad16 { ref mem, flags, .. }
| Inst::FpuLoad32 { ref mem, flags, .. }
| Inst::FpuLoad64 { ref mem, flags, .. }
| Inst::FpuLoad128 { ref mem, flags, .. } => {
let access_ty = inst.mem_type().unwrap();
@ -91,7 +92,8 @@ pub(crate) fn check(
let access_ty = inst.mem_type().unwrap();
check_store(ctx, Some(rd), flags, mem, vcode, access_ty)
}
Inst::FpuStore32 { ref mem, flags, .. }
Inst::FpuStore16 { ref mem, flags, .. }
| Inst::FpuStore32 { ref mem, flags, .. }
| Inst::FpuStore64 { ref mem, flags, .. }
| Inst::FpuStore128 { ref mem, flags, .. } => {
let access_ty = inst.mem_type().unwrap();

4
cranelift/codegen/src/isa/aarch64/settings.rs

@ -3,7 +3,7 @@
use crate::settings::{self, detail, Builder, Value};
use core::fmt;
// Include code generated by `cranelift-codegen/meta/src/gen_settings.rs:`. This file contains a
// Include code generated by `cranelift/codegen/meta/src/gen_settings.rs:`. This file contains a
// public `Flags` struct with an impl for all of the settings defined in
// `cranelift-codegen/meta/src/isa/arm64/settings.rs`.
// `cranelift/codegen/meta/src/isa/arm64.rs`.
include!(concat!(env!("OUT_DIR"), "/settings-arm64.rs"));

9
cranelift/codegen/src/isle_prelude.rs

@ -28,6 +28,11 @@ macro_rules! isle_common_prelude_methods {
x as i16
}
#[inline]
fn u16_as_u32(&mut self, x: u16) -> u32 {
x.into()
}
#[inline]
fn u16_as_u64(&mut self, x: u16) -> u64 {
x.into()
@ -910,6 +915,10 @@ macro_rules! isle_common_prelude_methods {
u32::try_from(val).ok()
}
fn u32_as_u16(&mut self, val: u32) -> Option<u16> {
val.try_into().ok()
}
fn u8_as_i8(&mut self, val: u8) -> i8 {
val as i8
}

7
cranelift/codegen/src/prelude.isle

@ -83,6 +83,10 @@
(decl pure u16_as_i16 (u16) i16)
(extern constructor u16_as_i16 u16_as_i16)
(decl pure u16_as_u32 (u16) u32)
(extern constructor u16_as_u32 u16_as_u32)
(convert u16 u32 u16_as_u32)
(decl pure u16_as_u64 (u16) u64)
(extern constructor u16_as_u64 u16_as_u64)
(convert u16 u64 u16_as_u64)
@ -134,6 +138,9 @@
(decl u64_as_u32 (u32) u64)
(extern extractor u64_as_u32 u64_as_u32)
(decl u32_as_u16 (u16) u32)
(extern extractor u32_as_u16 u32_as_u16)
(decl pure u64_as_i32 (u64) i32)
(extern constructor u64_as_i32 u64_as_i32)

35
cranelift/filetests/filetests/isa/aarch64/bitcast-fp16.clif

@ -0,0 +1,35 @@
test compile precise-output
target aarch64 has_fp16
function %bitcast_f16_to_i16(f16) -> i16 {
block0(v0: f16):
v1 = bitcast.i16 v0
return v1
}
; VCode:
; block0:
; umov w0, v0.h[0]
; ret
;
; Disassembled:
; block0: ; offset 0x0
; umov w0, v0.h[0]
; ret
function %bitcast_i16_to_f16(i16) -> f16 {
block0(v0: i16):
v1 = bitcast.f16 v0
return v1
}
; VCode:
; block0:
; fmov h0, w0
; ret
;
; Disassembled:
; block0: ; offset 0x0
; fmov h0, w0
; ret

112
cranelift/filetests/filetests/isa/aarch64/bitcast.clif

@ -1,7 +1,39 @@
test compile precise-output
target aarch64
function %f1(f32) -> i32 {
function %bitcast_f16_to_i16(f16) -> i16 {
block0(v0: f16):
v1 = bitcast.i16 v0
return v1
}
; VCode:
; block0:
; umov w0, v0.h[0]
; ret
;
; Disassembled:
; block0: ; offset 0x0
; umov w0, v0.h[0]
; ret
function %bitcast_i16_to_f16(i16) -> f16 {
block0(v0: i16):
v1 = bitcast.f16 v0
return v1
}
; VCode:
; block0:
; fmov s0, w0
; ret
;
; Disassembled:
; block0: ; offset 0x0
; fmov s0, w0
; ret
function %bitcast_f32_to_i32(f32) -> i32 {
block0(v0: f32):
v1 = bitcast.i32 v0
return v1
@ -17,7 +49,7 @@ block0(v0: f32):
; mov w0, v0.s[0]
; ret
function %f2(i32) -> f32 {
function %bitcast_i32_to_f32(i32) -> f32 {
block0(v0: i32):
v1 = bitcast.f32 v0
return v1
@ -33,7 +65,7 @@ block0(v0: i32):
; fmov s0, w0
; ret
function %f3(f64) -> i64 {
function %bitcast_f64_to_i64(f64) -> i64 {
block0(v0: f64):
v1 = bitcast.i64 v0
return v1
@ -49,7 +81,7 @@ block0(v0: f64):
; mov x0, v0.d[0]
; ret
function %f4(i64) -> f64 {
function %bitcast_i64_to_f64(i64) -> f64 {
block0(v0: i64):
v1 = bitcast.f64 v0
return v1
@ -65,3 +97,75 @@ block0(v0: i64):
; fmov d0, x0
; ret
function %bitcast_f128_to_i128(f128) -> i128 {
block0(v0: f128):
v1 = bitcast.i128 v0
return v1
}
; VCode:
; block0:
; mov x0, v0.d[0]
; mov x1, v0.d[1]
; ret
;
; Disassembled:
; block0: ; offset 0x0
; mov x0, v0.d[0]
; mov x1, v0.d[1]
; ret
function %bitcast_i128_to_f128(i128) -> f128 {
block0(v0: i128):
v1 = bitcast.f128 v0
return v1
}
; VCode:
; block0:
; fmov d0, x0
; mov v0.d[1], v0.d[1], x1
; ret
;
; Disassembled:
; block0: ; offset 0x0
; fmov d0, x0
; mov v0.d[1], x1
; ret
function %bitcast_i64x2_to_i128(i64x2) -> i128 {
block0(v0: i64x2):
v1 = bitcast.i128 little v0
return v1
}
; VCode:
; block0:
; mov x0, v0.d[0]
; mov x1, v0.d[1]
; ret
;
; Disassembled:
; block0: ; offset 0x0
; mov x0, v0.d[0]
; mov x1, v0.d[1]
; ret
function %bitcast_i128_to_i64x2(i128) -> i64x2 {
block0(v0: i128):
v1 = bitcast.i64x2 little v0
return v1
}
; VCode:
; block0:
; fmov d0, x0
; mov v0.d[1], v0.d[1], x1
; ret
;
; Disassembled:
; block0: ; offset 0x0
; fmov d0, x0
; mov v0.d[1], x1
; ret

61
cranelift/filetests/filetests/isa/aarch64/call.clif

@ -1,6 +1,7 @@
test compile precise-output
set unwind_info=false
set enable_probestack=false
set enable_llvm_abi_extensions
target aarch64
function %f1(i64) -> i64 {
@ -933,3 +934,63 @@ block0:
; ldp x29, x30, [sp], #0x10
; ret
function %second_f16(f16, f16) -> f16 system_v {
block0(v0: f16, v1: f16):
return v1
}
; VCode:
; block0:
; mov v0.16b, v1.16b
; ret
;
; Disassembled:
; block0: ; offset 0x0
; mov v0.16b, v1.16b
; ret
function %second_f128(f128, f128) -> f128 system_v {
block0(v0: f128, v1: f128):
return v1
}
; VCode:
; block0:
; mov v0.16b, v1.16b
; ret
;
; Disassembled:
; block0: ; offset 0x0
; mov v0.16b, v1.16b
; ret
function %second_f16_apple(f16, f16) -> f16 apple_aarch64 {
block0(v0: f16, v1: f16):
return v1
}
; VCode:
; block0:
; mov v0.16b, v1.16b
; ret
;
; Disassembled:
; block0: ; offset 0x0
; mov v0.16b, v1.16b
; ret
function %second_f128_apple(f128, f128) -> f128 apple_aarch64 {
block0(v0: f128, v1: f128):
return v1
}
; VCode:
; block0:
; mov v0.16b, v1.16b
; ret
;
; Disassembled:
; block0: ; offset 0x0
; mov v0.16b, v1.16b
; ret

70
cranelift/filetests/filetests/isa/aarch64/constants-fp16.clif

@ -0,0 +1,70 @@
test compile precise-output
set unwind_info=false
target aarch64 has_fp16
function %f() -> f16 {
block0:
v0 = f16const 0x1.0
return v0
}
; VCode:
; block0:
; fmov h0, #1
; ret
;
; Disassembled:
; block0: ; offset 0x0
; fmov h0, #1.00000000
; ret
function %f() -> f16 {
block0:
v0 = f16const 0x32.0
return v0
}
; VCode:
; block0:
; movz w0, #21056
; fmov h0, w0
; ret
;
; Disassembled:
; block0: ; offset 0x0
; mov w0, #0x5240
; fmov h0, w0
; ret
function %f() -> f16 {
block0:
v0 = f16const 0x0.0
return v0
}
; VCode:
; block0:
; movi v0.2s, #0
; ret
;
; Disassembled:
; block0: ; offset 0x0
; movi v0.2s, #0
; ret
function %f() -> f16 {
block0:
v0 = f16const -0x10.0
return v0
}
; VCode:
; block0:
; fmov h0, #-16
; ret
;
; Disassembled:
; block0: ; offset 0x0
; fmov h0, #-16.00000000
; ret

152
cranelift/filetests/filetests/isa/aarch64/constants.clif

@ -316,6 +316,28 @@ block0:
; mov x0, #-9
; ret
function %f() -> f128 {
block0:
v0 = f128const 0x1.0
return v0
}
; VCode:
; block0:
; ldr q0, [const(0)]
; ret
;
; Disassembled:
; block0: ; offset 0x0
; ldr q0, #0x10
; ret
; .byte 0x00, 0x00, 0x00, 0x00
; .byte 0x00, 0x00, 0x00, 0x00
; .byte 0x00, 0x00, 0x00, 0x00
; .byte 0x00, 0x00, 0x00, 0x00
; .byte 0x00, 0x00, 0x00, 0x00
; .byte 0x00, 0x00, 0xff, 0x3f
function %f() -> f64 {
block0:
v0 = f64const 0x1.0
@ -348,6 +370,46 @@ block0:
; fmov s0, #5.00000000
; ret
function %f() -> f16 {
block0:
v0 = f16const 0x1.0
return v0
}
; VCode:
; block0:
; movz w0, #15360
; fmov s0, w0
; ret
;
; Disassembled:
; block0: ; offset 0x0
; mov w0, #0x3c00
; fmov s0, w0
; ret
function %f() -> f128 {
block0:
v0 = f128const 0x32.0
return v0
}
; VCode:
; block0:
; ldr q0, [const(0)]
; ret
;
; Disassembled:
; block0: ; offset 0x0
; ldr q0, #0x10
; ret
; .byte 0x00, 0x00, 0x00, 0x00
; .byte 0x00, 0x00, 0x00, 0x00
; .byte 0x00, 0x00, 0x00, 0x00
; .byte 0x00, 0x00, 0x00, 0x00
; .byte 0x00, 0x00, 0x00, 0x00
; .byte 0x00, 0x90, 0x04, 0x40
function %f() -> f64 {
block0:
v0 = f64const 0x32.0
@ -384,6 +446,40 @@ block0:
; fmov s0, w0
; ret
function %f() -> f16 {
block0:
v0 = f16const 0x32.0
return v0
}
; VCode:
; block0:
; movz w0, #21056
; fmov s0, w0
; ret
;
; Disassembled:
; block0: ; offset 0x0
; mov w0, #0x5240
; fmov s0, w0
; ret
function %f() -> f128 {
block0:
v0 = f128const 0x0.0
return v0
}
; VCode:
; block0:
; movi v0.16b, #0
; ret
;
; Disassembled:
; block0: ; offset 0x0
; movi v0.16b, #0
; ret
function %f() -> f64 {
block0:
v0 = f64const 0x0.0
@ -416,6 +512,44 @@ block0:
; movi v0.2s, #0
; ret
function %f() -> f16 {
block0:
v0 = f16const 0x0.0
return v0
}
; VCode:
; block0:
; movi v0.2s, #0
; ret
;
; Disassembled:
; block0: ; offset 0x0
; movi v0.2s, #0
; ret
function %f() -> f128 {
block0:
v0 = f128const -0x10.0
return v0
}
; VCode:
; block0:
; ldr q0, [const(0)]
; ret
;
; Disassembled:
; block0: ; offset 0x0
; ldr q0, #0x10
; ret
; .byte 0x00, 0x00, 0x00, 0x00
; .byte 0x00, 0x00, 0x00, 0x00
; .byte 0x00, 0x00, 0x00, 0x00
; .byte 0x00, 0x00, 0x00, 0x00
; .byte 0x00, 0x00, 0x00, 0x00
; .byte 0x00, 0x00, 0x03, 0xc0
function %f() -> f64 {
block0:
v0 = f64const -0x10.0
@ -448,3 +582,21 @@ block0:
; fmov s0, #-16.00000000
; ret
function %f() -> f16 {
block0:
v0 = f16const -0x10.0
return v0
}
; VCode:
; block0:
; movz w0, #52224
; fmov s0, w0
; ret
;
; Disassembled:
; block0: ; offset 0x0
; mov w0, #0xcc00
; fmov s0, w0
; ret

36
cranelift/filetests/filetests/isa/aarch64/load-f16-f128.clif

@ -0,0 +1,36 @@
test compile precise-output
set unwind_info=false
target aarch64
function %load_f16(i64) -> f16 {
block0(v0: i64):
v1 = load.f16 v0
return v1
}
; VCode:
; block0:
; ldr h0, [x0]
; ret
;
; Disassembled:
; block0: ; offset 0x0
; ldr h0, [x0] ; trap: heap_oob
; ret
function %load_f128(i64) -> f128 {
block0(v0: i64):
v1 = load.f128 v0
return v1
}
; VCode:
; block0:
; ldr q0, [x0]
; ret
;
; Disassembled:
; block0: ; offset 0x0
; ldr q0, [x0] ; trap: heap_oob
; ret

21
cranelift/filetests/filetests/isa/aarch64/select-fp16.clif

@ -0,0 +1,21 @@
test compile precise-output
target aarch64 has_fp16
function %select_f16(i8, f16, f16) -> f16 {
block0(v0: i8, v1: f16, v2: f16):
v3 = select.f16 v0, v1, v2
return v3
}
; VCode:
; block0:
; ands wzr, w0, #255
; fcsel h0, h0, h1, ne
; ret
;
; Disassembled:
; block0: ; offset 0x0
; tst w0, #0xff
; fcsel h0, h0, h1, ne
; ret

75
cranelift/filetests/filetests/isa/aarch64/select.clif

@ -41,3 +41,78 @@ block0(v0: f32, v1: f32, v2: i64, v3: i64):
; csel x0, x0, x1, eq
; ret
function %select_f16(i8, f16, f16) -> f16 {
block0(v0: i8, v1: f16, v2: f16):
v3 = select.f16 v0, v1, v2
return v3
}
; VCode:
; block0:
; ands wzr, w0, #255
; fcsel s0, s0, s1, ne
; ret
;
; Disassembled:
; block0: ; offset 0x0
; tst w0, #0xff
; fcsel s0, s0, s1, ne
; ret
function %select_f32(i8, f32, f32) -> f32 {
block0(v0: i8, v1: f32, v2: f32):
v3 = select.f32 v0, v1, v2
return v3
}
; VCode:
; block0:
; ands wzr, w0, #255
; fcsel s0, s0, s1, ne
; ret
;
; Disassembled:
; block0: ; offset 0x0
; tst w0, #0xff
; fcsel s0, s0, s1, ne
; ret
function %select_f64(i8, f64, f64) -> f64 {
block0(v0: i8, v1: f64, v2: f64):
v3 = select.f64 v0, v1, v2
return v3
}
; VCode:
; block0:
; ands wzr, w0, #255
; fcsel d0, d0, d1, ne
; ret
;
; Disassembled:
; block0: ; offset 0x0
; tst w0, #0xff
; fcsel d0, d0, d1, ne
; ret
function %select_f128(i8, f128, f128) -> f128 {
block0(v0: i8, v1: f128, v2: f128):
v3 = select.f128 v0, v1, v2
return v3
}
; VCode:
; block0:
; ands wzr, w0, #255
; vcsel v0.16b, v0.16b, v1.16b, ne (if-then-else diamond)
; ret
;
; Disassembled:
; block0: ; offset 0x0
; tst w0, #0xff
; b.ne #0x10
; mov v0.16b, v1.16b
; b #0x14
; mov v0.16b, v0.16b
; ret

36
cranelift/filetests/filetests/isa/aarch64/store-f16-f128.clif

@ -0,0 +1,36 @@
test compile precise-output
set unwind_info=false
target aarch64
function %store_f16(f16, i64) {
block0(v0: f16, v1: i64):
store.f16 v0, v1
return
}
; VCode:
; block0:
; str h0, [x0]
; ret
;
; Disassembled:
; block0: ; offset 0x0
; str h0, [x0] ; trap: heap_oob
; ret
function %store_f128(f128, i64) {
block0(v0: f128, v1: i64):
store.f128 v0, v1
return
}
; VCode:
; block0:
; str q0, [x0]
; ret
;
; Disassembled:
; block0: ; offset 0x0
; str q0, [x0] ; trap: heap_oob
; ret

2
cranelift/filetests/filetests/runtests/bitcast-f16-f128.clif

@ -2,6 +2,8 @@ test interpret
test run
set enable_llvm_abi_extensions
target x86_64
target aarch64
target aarch64 has_fp16
function %bitcast_i16_f16(i16) -> f16 fast {
block0(v0: i16):

1
cranelift/filetests/filetests/runtests/f128const.clif

@ -2,6 +2,7 @@ test interpret
test run
set enable_llvm_abi_extensions
target x86_64
target aarch64
;; These values are special for RISC-V since it has a dedicated

2
cranelift/filetests/filetests/runtests/f16const.clif

@ -2,6 +2,8 @@ test interpret
test run
set enable_llvm_abi_extensions
target x86_64
target aarch64
target aarch64 has_fp16
;; These values are special for RISC-V since it has a dedicated

2
cranelift/filetests/filetests/runtests/select-f16-f128.clif

@ -2,6 +2,8 @@ test interpret
test run
set enable_llvm_abi_extensions
target x86_64
target aarch64
target aarch64 has_fp16
function %select_icmp_i8_f16(i8, f16, f16) -> f16 {
block0(v0: i8, v1: f16, v2: f16):

4
cranelift/native/src/lib.rs

@ -109,6 +109,10 @@ pub fn infer_native_flags(isa_builder: &mut dyn Configurable) -> Result<(), &'st
isa_builder.enable("has_pauth").unwrap();
}
if std::arch::is_aarch64_feature_detected!("fp16") {
isa_builder.enable("has_fp16").unwrap();
}
if cfg!(target_os = "macos") {
// Pointer authentication is always available on Apple Silicon.
isa_builder.enable("sign_return_address").unwrap();

1
crates/fuzzing/src/generators/codegen_settings.rs

@ -123,6 +123,7 @@ impl<'a> Arbitrary<'a> for CodegenSettings {
std: "bti" => clif: "use_bti",
std: "lse" => clif: "has_lse",
std: "fp16" => clif: "has_fp16",
// even though the natural correspondence seems to be
// between "paca" and "has_pauth", the latter has no effect
// in isolation, so we actually use the setting that affects

1
crates/wasmtime/src/config.rs

@ -2812,6 +2812,7 @@ fn detect_host_feature(feature: &str) -> Option<bool> {
return match feature {
"lse" => Some(std::arch::is_aarch64_feature_detected!("lse")),
"paca" => Some(std::arch::is_aarch64_feature_detected!("paca")),
"fp16" => Some(std::arch::is_aarch64_feature_detected!("fp16")),
_ => None,
};

1
crates/wasmtime/src/engine.rs

@ -391,6 +391,7 @@ impl Engine {
// aarch64 features to detect
"has_lse" => "lse",
"has_pauth" => "paca",
"has_fp16" => "fp16",
// aarch64 features which don't need detection
// No effect on its own.

1
src/commands/compile.rs

@ -202,6 +202,7 @@ mod test {
"-Dlogging=n",
"-Ccranelift-has-lse",
"-Ccranelift-has-pauth",
"-Ccranelift-has-fp16",
"-Ccranelift-sign-return-address",
"-Ccranelift-sign-return-address-all",
"-Ccranelift-sign-return-address-with-bkey",

Loading…
Cancel
Save