Browse Source

Cranelift AArch64: Migrate Splat to ISLE (#4521)

Copyright (c) 2022, Arm Limited.
pull/4533/head
Anton Kirilov 2 years ago
committed by GitHub
parent
commit
ead6edb0c5
No known key found for this signature in database GPG Key ID: 4AEE18F83AFDEB23
  1. 4
      cranelift/codegen/src/ir/types.rs
  2. 45
      cranelift/codegen/src/isa/aarch64/inst.isle
  3. 13
      cranelift/codegen/src/isa/aarch64/inst/emit.rs
  4. 16
      cranelift/codegen/src/isa/aarch64/inst/emit_tests.rs
  5. 11
      cranelift/codegen/src/isa/aarch64/inst/mod.rs
  6. 51
      cranelift/codegen/src/isa/aarch64/inst/regs.rs
  7. 43
      cranelift/codegen/src/isa/aarch64/lower.isle
  8. 26
      cranelift/codegen/src/isa/aarch64/lower/isle.rs
  9. 75
      cranelift/codegen/src/isa/aarch64/lower_inst.rs
  10. 9
      cranelift/codegen/src/isa/s390x/inst.isle
  11. 13
      cranelift/codegen/src/isa/s390x/lower.isle
  12. 5
      cranelift/codegen/src/isa/s390x/lower/isle.rs
  13. 28
      cranelift/codegen/src/machinst/isle.rs
  14. 24
      cranelift/codegen/src/prelude.isle
  15. 11
      cranelift/filetests/filetests/isa/aarch64/bitops.clif
  16. 91
      cranelift/filetests/filetests/isa/aarch64/dynamic-simd-narrow.clif
  17. 114
      cranelift/filetests/filetests/isa/aarch64/dynamic-simd-neon.clif
  18. 44
      cranelift/filetests/filetests/isa/aarch64/dynamic-simd-widen.clif
  19. 12
      cranelift/filetests/filetests/isa/aarch64/dynamic-slot.clif
  20. 87
      cranelift/filetests/filetests/isa/aarch64/prologue.clif
  21. 193
      cranelift/filetests/filetests/runtests/simd-splat.clif

4
cranelift/codegen/src/ir/types.rs

@ -171,8 +171,8 @@ impl Type {
self.replace_lanes(match self.lane_type() {
I8 | B1 | B8 => I8,
I16 | B16 => I16,
I32 | B32 => I32,
I64 | B64 => I64,
I32 | B32 | F32 => I32,
I64 | B64 | F64 => I64,
I128 | B128 => I128,
_ => unimplemented!(),
})

45
cranelift/codegen/src/isa/aarch64/inst.isle

@ -627,7 +627,8 @@
(VecLoadReplicate
(rd WritableReg)
(rn Reg)
(size VectorSize))
(size VectorSize)
(flags MemFlags))
;; Vector conditional select, 128 bit. A synthetic instruction, which generates a 4-insn
;; control-flow diamond.
@ -1376,6 +1377,16 @@
(decl cond_br_cond (Cond) CondBrKind)
(extern constructor cond_br_cond cond_br_cond)
;; Lower the address of a load or a store.
(decl amode (Type Inst u32) AMode)
;; TODO: Port lower_address() to ISLE.
(extern constructor amode amode)
;; Matches an `AMode` that is just a register.
(decl pure amode_is_reg (AMode) Reg)
;; TODO: Implement in ISLE.
(extern constructor amode_is_reg amode_is_reg)
;; Instruction creation helpers ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; Helper for creating the zero register.
@ -1481,6 +1492,13 @@
(_ Unit (emit (MInst.VecDup dst src size))))
dst))
;; Helper for emitting `MInst.VecDupFromFpu` instructions.
(decl vec_dup_from_fpu (Reg VectorSize) Reg)
(rule (vec_dup_from_fpu src size)
(let ((dst WritableReg (temp_writable_reg $I8X16))
(_ Unit (emit (MInst.VecDupFromFpu dst src size))))
dst))
;; Helper for emitting `MInst.AluRRImm12` instructions.
(decl alu_rr_imm12 (ALUOp Type Reg Imm12) Reg)
(rule (alu_rr_imm12 op ty src imm)
@ -2167,7 +2185,7 @@
(decl sinkable_atomic_load (SinkableAtomicLoad) Value)
(extern extractor sinkable_atomic_load sinkable_atomic_load)
;; Sink a `SinkableLoad` into a `Reg`.
;; Sink a `SinkableAtomicLoad` into a `Reg`.
;;
;; This is a side-effectful operation that notifies the context that the
;; instruction that produced the `SinkableAtomicLoad` has been sunk into another
@ -2230,6 +2248,29 @@
(alu_rrr op ty x_lo y_lo)
(alu_rrr op ty x_hi y_hi))))
;; Helper for emitting `MInst.VecLoadReplicate` instructions.
(decl ld1r (Reg VectorSize MemFlags) Reg)
(rule (ld1r src size flags)
(let ((dst WritableReg (temp_writable_reg $I8X16))
(_ Unit (emit (MInst.VecLoadReplicate dst src size flags))))
dst))
;; Helper for emitting `MInst.LoadAddr` instructions.
(decl load_addr (AMode) Reg)
(rule (load_addr addr)
(let ((dst WritableReg (temp_writable_reg $I64))
(_ Unit (emit (MInst.LoadAddr dst addr))))
dst))
(rule (load_addr addr)
(if-let addr_reg (amode_is_reg addr))
addr_reg)
;; Lower a vector splat with a constant parameter.
(decl splat_const (u64 VectorSize) Reg)
;; TODO: Port lower_splat_const() to ISLE.
(extern constructor splat_const splat_const)
;; Generate comparison to zero operator from input condition code
(decl float_cc_cmp_zero_to_vec_misc_op (FloatCC) VecMisc2)
(extern constructor float_cc_cmp_zero_to_vec_misc_op float_cc_cmp_zero_to_vec_misc_op)

13
cranelift/codegen/src/isa/aarch64/inst/emit.rs

@ -2258,10 +2258,10 @@ impl MachInstEmit for Inst {
ScalarSize::Size16 => 0b00010,
ScalarSize::Size32 => 0b00100,
ScalarSize::Size64 => 0b01000,
_ => unimplemented!("Unexpected VectorSize: {:?}", size),
_ => unreachable!(),
};
sink.put4(
0b000_01110000_00000_000011_00000_00000
0b0_0_0_01110000_00000_000011_00000_00000
| (q << 30)
| (imm5 << 16)
| (machreg_to_gpr(rn) << 5)
@ -2625,13 +2625,18 @@ impl MachInstEmit for Inst {
};
sink.put4(enc_vec_rrr(top11 | q << 9, rm, bit15_10, rn, rd));
}
&Inst::VecLoadReplicate { rd, rn, size } => {
&Inst::VecLoadReplicate {
rd,
rn,
size,
flags,
} => {
let rd = allocs.next_writable(rd);
let rn = allocs.next(rn);
let (q, size) = size.enc_size();
let srcloc = state.cur_srcloc();
if srcloc != SourceLoc::default() {
if srcloc != SourceLoc::default() && !flags.notrap() {
// Register the offset at which the actual load instruction starts.
sink.add_trap(TrapCode::HeapOutOfBounds);
}

16
cranelift/codegen/src/isa/aarch64/inst/emit_tests.rs

@ -2351,10 +2351,10 @@ fn test_aarch64_binemit() {
Inst::VecDup {
rd: writable_vreg(25),
rn: xreg(7),
size: VectorSize::Size8x16,
size: VectorSize::Size8x8,
},
"F90C014E",
"dup v25.16b, w7",
"F90C010E",
"dup v25.8b, w7",
));
insns.push((
Inst::VecDup {
@ -2387,10 +2387,10 @@ fn test_aarch64_binemit() {
Inst::VecDup {
rd: writable_vreg(0),
rn: xreg(28),
size: VectorSize::Size32x4,
size: VectorSize::Size32x2,
},
"800F044E",
"dup v0.4s, w28",
"800F040E",
"dup v0.2s, w28",
));
insns.push((
Inst::VecDup {
@ -5199,8 +5199,8 @@ fn test_aarch64_binemit() {
Inst::VecLoadReplicate {
rd: writable_vreg(31),
rn: xreg(0),
size: VectorSize::Size64x2,
flags: MemFlags::trusted(),
},
"1FCC404D",
"ld1r { v31.2d }, [x0]",
@ -5210,8 +5210,8 @@ fn test_aarch64_binemit() {
Inst::VecLoadReplicate {
rd: writable_vreg(0),
rn: xreg(25),
size: VectorSize::Size8x8,
flags: MemFlags::trusted(),
},
"20C3400D",
"ld1r { v0.8b }, [x25]",

11
cranelift/codegen/src/isa/aarch64/inst/mod.rs

@ -530,17 +530,6 @@ impl Inst {
}
}
}
/// Generate a LoadAddr instruction (load address of an amode into
/// register). Elides when possible (when amode is just a register). Returns
/// destination register: either `rd` or a register directly from the amode.
pub fn gen_load_addr(rd: Writable<Reg>, mem: AMode) -> (Reg, Option<Inst>) {
if let Some(r) = mem.is_reg() {
(r, None)
} else {
(rd.to_reg(), Some(Inst::LoadAddr { rd, mem }))
}
}
}
//=============================================================================

51
cranelift/codegen/src/isa/aarch64/inst/regs.rs

@ -165,6 +165,8 @@ pub fn create_reg_env(flags: &settings::Flags) -> MachineEnv {
preg(xreg(14)),
preg(xreg(15)),
// x16 and x17 are spilltmp and tmp2 (see above).
// x18 could be used by the platform to carry inter-procedural state;
// conservatively assume so and make it not allocatable.
// x19-28 are callee-saved and so not preferred.
// x21 is the pinned register (if enabled) and not allocatable if so.
// x29 is FP, x30 is LR, x31 is SP/ZR.
@ -178,30 +180,7 @@ pub fn create_reg_env(flags: &settings::Flags) -> MachineEnv {
preg(vreg(5)),
preg(vreg(6)),
preg(vreg(7)),
preg(vreg(8)),
preg(vreg(9)),
preg(vreg(10)),
preg(vreg(11)),
preg(vreg(12)),
preg(vreg(13)),
preg(vreg(14)),
preg(vreg(15)),
],
],
non_preferred_regs_by_class: [
vec![
preg(xreg(19)),
preg(xreg(20)),
// x21 is pinned reg if enabled; we add to this list below if not.
preg(xreg(22)),
preg(xreg(23)),
preg(xreg(24)),
preg(xreg(25)),
preg(xreg(26)),
preg(xreg(27)),
preg(xreg(28)),
],
vec![
// v8-15 are callee-saved and so not preferred.
preg(vreg(16)),
preg(vreg(17)),
preg(vreg(18)),
@ -220,6 +199,30 @@ pub fn create_reg_env(flags: &settings::Flags) -> MachineEnv {
preg(vreg(31)),
],
],
non_preferred_regs_by_class: [
vec![
preg(xreg(19)),
preg(xreg(20)),
// x21 is pinned reg if enabled; we add to this list below if not.
preg(xreg(22)),
preg(xreg(23)),
preg(xreg(24)),
preg(xreg(25)),
preg(xreg(26)),
preg(xreg(27)),
preg(xreg(28)),
],
vec![
preg(vreg(8)),
preg(vreg(9)),
preg(vreg(10)),
preg(vreg(11)),
preg(vreg(12)),
preg(vreg(13)),
preg(vreg(14)),
preg(vreg(15)),
],
],
fixed_stack_slots: vec![],
};

43
cranelift/codegen/src/isa/aarch64/lower.isle

@ -1423,7 +1423,8 @@
;;;; Rules for `bitselect` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(rule (lower (has_type (ty_int_bool_ref_scalar_64 ty) (bitselect c x y)))
(rule (lower (has_type ty (bitselect c x y)))
(if (ty_int_bool_ref_scalar_64 ty))
(let ((tmp1 Reg (and_reg ty x c))
(tmp2 Reg (bic ty y c)))
(orr ty tmp1 tmp2)))
@ -1441,12 +1442,14 @@
;; T -> I{64,32,16,8}: We can simply pass through the value: values
;; are always stored with high bits undefined, so we can just leave
;; them be.
(rule (lower (has_type (ty_int_bool_ref_scalar_64 ty) (ireduce src)))
(rule (lower (has_type ty (ireduce src)))
(if (ty_int_bool_ref_scalar_64 ty))
(value_regs_get src 0))
;; Likewise for breduce.
(rule (lower (has_type (ty_int_bool_ref_scalar_64 ty) (breduce src)))
(rule (lower (has_type ty (breduce src)))
(if (ty_int_bool_ref_scalar_64 ty))
(value_regs_get src 0))
@ -1515,6 +1518,39 @@
(let ((use_allocated_encoding bool (is_not_baldrdash_call_conv)))
(side_effect (udf use_allocated_encoding trap_code))))
;;;; Rules for `splat` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(rule (lower (has_type ty (splat x @ (value_type in_ty))))
(if (ty_int_bool_ref_scalar_64 in_ty))
(vec_dup x (vector_size ty)))
(rule (lower (has_type ty (splat x @ (value_type (ty_scalar_float _)))))
(vec_dup_from_fpu x (vector_size ty)))
(rule (lower (has_type ty (splat (bconst (u64_from_bool n)))))
(splat_const n (vector_size ty)))
(rule (lower (has_type ty (splat (breduce (bconst (u64_from_bool n))))))
(splat_const n (vector_size ty)))
(rule (lower (has_type ty (splat (f32const (u64_from_ieee32 n)))))
(splat_const n (vector_size ty)))
(rule (lower (has_type ty (splat (f64const (u64_from_ieee64 n)))))
(splat_const n (vector_size ty)))
(rule (lower (has_type ty (splat (iconst (u64_from_imm64 n)))))
(splat_const n (vector_size ty)))
(rule (lower (has_type ty (splat (ireduce (iconst (u64_from_imm64 n))))))
(splat_const n (vector_size ty)))
(rule (lower (has_type ty (splat x @ (load flags _addr offset))))
(if-let mem_op (is_sinkable_inst x))
(let ((_ Unit (sink_inst mem_op))
(addr AMode (amode (lane_type ty) mem_op offset))
(address Reg (load_addr addr)))
(ld1r address (vector_size ty) flags)))
;;;; Rules for `AtomicLoad` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(rule (lower (has_type (valid_atomic_transaction ty) (atomic_load flags addr)))
@ -1527,7 +1563,6 @@
addr))
(side_effect (store_release ty src addr)))
;;;; Rules for `AtomicRMW` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(rule 1 (lower (and (use_lse)

26
cranelift/codegen/src/isa/aarch64/lower/isle.rs

@ -5,12 +5,13 @@ pub mod generated_code;
// Types that the generated ISLE code uses via `use super::*`.
use super::{
writable_zero_reg, zero_reg, AMode, ASIMDFPModImm, ASIMDMovModImm, BranchTarget, CallIndInfo,
CallInfo, Cond, CondBrKind, ExtendOp, FPUOpRI, FloatCC, Imm12, ImmLogic, ImmShift,
insn_inputs, writable_zero_reg, zero_reg, AMode, ASIMDFPModImm, ASIMDMovModImm, BranchTarget,
CallIndInfo, CallInfo, Cond, CondBrKind, ExtendOp, FPUOpRI, FloatCC, Imm12, ImmLogic, ImmShift,
Inst as MInst, IntCC, JTSequenceInfo, MachLabel, MoveWideConst, MoveWideOp, NarrowValueMode,
Opcode, OperandSize, PairAMode, Reg, ScalarSize, ShiftOpAndAmt, UImm5, VecMisc2, VectorSize,
NZCV,
};
use crate::isa::aarch64::lower::{lower_address, lower_splat_const};
use crate::isa::aarch64::settings::Flags as IsaFlags;
use crate::machinst::{isle::*, InputSourceInst};
use crate::settings::Flags;
@ -442,4 +443,25 @@ where
_ => panic!(),
}
}
fn amode(&mut self, ty: Type, mem_op: Inst, offset: u32) -> AMode {
lower_address(
self.lower_ctx,
ty,
&insn_inputs(self.lower_ctx, mem_op)[..],
offset as i32,
)
}
fn amode_is_reg(&mut self, address: &AMode) -> Option<Reg> {
address.is_reg()
}
fn splat_const(&mut self, value: u64, size: &VectorSize) -> Reg {
let rd = self.temp_writable_reg(I8X16);
lower_splat_const(self.lower_ctx, rd, value, *size);
rd.to_reg()
}
}

75
cranelift/codegen/src/isa/aarch64/lower_inst.rs

@ -741,80 +741,7 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
}
}
Opcode::Splat => {
let rd = get_output_reg(ctx, outputs[0]).only_reg().unwrap();
let ty = ty.unwrap();
// TODO: Handle SVE Dup.
let ty = if ty.is_dynamic_vector() {
dynamic_to_fixed(ty)
} else {
ty
};
let size = VectorSize::from_ty(ty);
if let Some((_, insn)) = maybe_input_insn_multi(
ctx,
inputs[0],
&[
Opcode::Bconst,
Opcode::F32const,
Opcode::F64const,
Opcode::Iconst,
],
) {
lower_splat_const(ctx, rd, ctx.get_constant(insn).unwrap(), size);
} else if let Some(insn) =
maybe_input_insn_via_conv(ctx, inputs[0], Opcode::Iconst, Opcode::Ireduce)
{
lower_splat_const(ctx, rd, ctx.get_constant(insn).unwrap(), size);
} else if let Some(insn) =
maybe_input_insn_via_conv(ctx, inputs[0], Opcode::Bconst, Opcode::Breduce)
{
lower_splat_const(ctx, rd, ctx.get_constant(insn).unwrap(), size);
} else if let Some((_, insn)) = maybe_input_insn_multi(
ctx,
inputs[0],
&[
Opcode::Uload8,
Opcode::Sload8,
Opcode::Uload16,
Opcode::Sload16,
Opcode::Uload32,
Opcode::Sload32,
Opcode::Load,
],
) {
ctx.sink_inst(insn);
let load_inputs = insn_inputs(ctx, insn);
let load_outputs = insn_outputs(ctx, insn);
lower_load(
ctx,
insn,
&load_inputs[..],
load_outputs[0],
|ctx, _rd, _elem_ty, mem| {
let tmp = ctx.alloc_tmp(I64).only_reg().unwrap();
let (addr, addr_inst) = Inst::gen_load_addr(tmp, mem);
if let Some(addr_inst) = addr_inst {
ctx.emit(addr_inst);
}
ctx.emit(Inst::VecLoadReplicate { rd, rn: addr, size });
Ok(())
},
)?;
} else {
let input_ty = ctx.input_ty(insn, 0);
let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None);
let inst = if ty_has_int_representation(input_ty) {
Inst::VecDup { rd, rn, size }
} else {
Inst::VecDupFromFpu { rd, rn, size }
};
ctx.emit(inst);
}
}
Opcode::Splat => implemented_in_isle(ctx),
Opcode::ScalarToVector => implemented_in_isle(ctx),

9
cranelift/codegen/src/isa/s390x/inst.isle

@ -1641,15 +1641,6 @@
(decl sinkable_inst (Inst) Value)
(extern extractor sinkable_inst sinkable_inst)
;; Sink a sinkable instruction.
;;
;; This is a side-effectful operation that notifies the context that the
;; sinkable instruction been sunk into another instruction, and no longer
;; needs to be lowered.
(decl sink_inst (Inst) Unit)
(extern constructor sink_inst sink_inst)
;; Sinkable big-endian load instruction.
(decl sinkable_load (Inst) Value)
(extractor (sinkable_load inst)

13
cranelift/codegen/src/isa/s390x/lower.isle

@ -1656,8 +1656,9 @@
;; Insert vector lane from general-purpose register.
(rule (lower (insertlane x @ (value_type ty)
y @ (value_type (ty_int_bool_ref_scalar_64 _))
y @ (value_type in_ty)
(u8_from_uimm8 idx)))
(if (ty_int_bool_ref_scalar_64 in_ty))
(vec_insert_lane ty x y (be_lane_idx ty idx) (zero_reg)))
;; Insert vector lane from floating-point register.
@ -1771,8 +1772,9 @@
;;;; Rules for `extractlane` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; Extract vector lane to general-purpose register.
(rule (lower (has_type (ty_int_bool_ref_scalar_64 _)
(rule (lower (has_type out_ty
(extractlane x @ (value_type ty) (u8_from_uimm8 idx))))
(if (ty_int_bool_ref_scalar_64 out_ty))
(vec_extract_lane ty x (be_lane_idx ty idx) (zero_reg)))
;; Extract vector lane to floating-point register.
@ -1828,8 +1830,8 @@
;;;; Rules for `splat` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; Load replicated value from general-purpose register.
(rule (lower (has_type ty (splat
x @ (value_type (ty_int_bool_ref_scalar_64 _)))))
(rule (lower (has_type ty (splat x @ (value_type in_ty))))
(if (ty_int_bool_ref_scalar_64 in_ty))
(vec_replicate_lane ty (vec_insert_lane_undef ty x 0 (zero_reg)) 0))
;; Load replicated value from floating-point register.
@ -1888,7 +1890,8 @@
;; Load scalar value from general-purpose register.
(rule (lower (has_type ty (scalar_to_vector
x @ (value_type (ty_int_bool_ref_scalar_64 _)))))
x @ (value_type in_ty))))
(if (ty_int_bool_ref_scalar_64 in_ty))
(vec_insert_lane ty (vec_imm ty 0) x (be_lane_idx ty 0) (zero_reg)))
;; Load scalar value from floating-point register.

5
cranelift/codegen/src/isa/s390x/lower/isle.rs

@ -666,11 +666,6 @@ where
None
}
#[inline]
fn sink_inst(&mut self, inst: Inst) -> Unit {
self.lower_ctx.sink_inst(inst);
}
#[inline]
fn emit(&mut self, inst: &MInst) -> Unit {
self.lower_ctx.emit(inst.clone());

28
cranelift/codegen/src/machinst/isle.rs

@ -11,7 +11,9 @@ pub use crate::ir::{
SigRef, StackSlot,
};
pub use crate::isa::unwind::UnwindInst;
pub use crate::machinst::{ABIArg, ABIArgSlot, ABISig, RealReg, Reg, RelocDistance, Writable};
pub use crate::machinst::{
ABIArg, ABIArgSlot, ABISig, InputSourceInst, RealReg, Reg, RelocDistance, Writable,
};
pub type Unit = ();
pub type ValueSlice = (ValueList, usize);
@ -425,6 +427,15 @@ macro_rules! isle_prelude_methods {
imm.bits() as u64
}
#[inline]
fn u64_from_bool(&mut self, b: bool) -> u64 {
if b {
u64::MAX
} else {
0
}
}
#[inline]
fn inst_results(&mut self, inst: Inst) -> ValueSlice {
(self.lower_ctx.dfg().inst_results_list(inst), 0)
@ -854,6 +865,21 @@ macro_rules! isle_prelude_methods {
fn real_reg_to_writable_reg(&mut self, reg: RealReg) -> WritableReg {
Writable::from_reg(Reg::from(reg))
}
fn is_sinkable_inst(&mut self, val: Value) -> Option<Inst> {
let input = self.lower_ctx.get_value_as_source_or_const(val);
if let InputSourceInst::UniqueUse(inst, _) = input.inst {
Some(inst)
} else {
None
}
}
#[inline]
fn sink_inst(&mut self, inst: Inst) {
self.lower_ctx.sink_inst(inst);
}
};
}

24
cranelift/codegen/src/prelude.isle

@ -308,10 +308,10 @@
(decl fits_in_64 (Type) Type)
(extern extractor fits_in_64 fits_in_64)
;; An extractor that only matches scalar booleans, integers, and references that
;; can fit in 64 bits.
(decl ty_int_bool_ref_scalar_64 (Type) Type)
(extern extractor ty_int_bool_ref_scalar_64 ty_int_bool_ref_scalar_64)
;; A pure constructor that only matches scalar booleans, integers, and
;; references that can fit in 64 bits.
(decl pure ty_int_bool_ref_scalar_64 (Type) Type)
(extern constructor ty_int_bool_ref_scalar_64 ty_int_bool_ref_scalar_64)
;; An extractor that matches 32- and 64-bit types only.
(decl ty_32_or_64 (Type) Type)
@ -407,6 +407,10 @@
(decl u8_from_uimm8 (u8) Uimm8)
(extern extractor infallible u8_from_uimm8 u8_from_uimm8)
;; Extract a `u64` from a `bool`.
(decl u64_from_bool (u64) bool)
(extern extractor infallible u64_from_bool u64_from_bool)
;; Extract a `u64` from an `Imm64`.
(decl u64_from_imm64 (u64) Imm64)
(extern extractor infallible u64_from_imm64 u64_from_imm64)
@ -498,6 +502,10 @@
(decl pure zero_value (Value) Value)
(extern constructor zero_value zero_value)
;; Match a sinkable instruction from a value operand.
(decl pure is_sinkable_inst (Value) Inst)
(extern constructor is_sinkable_inst is_sinkable_inst)
;; Instruction creation helpers ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; Emit an instruction.
@ -508,6 +516,14 @@
(decl emit (MInst) Unit)
(extern constructor emit emit)
;; Sink an instruction.
;;
;; This is a side-effectful operation that notifies the context that the
;; instruction has been sunk into another instruction, and no longer needs to
;; be lowered.
(decl sink_inst (Inst) Unit)
(extern constructor sink_inst sink_inst)
;; Constant pool emission.
(type VCodeConstant (primitive VCodeConstant))

11
cranelift/filetests/filetests/isa/aarch64/bitops.clif

@ -244,18 +244,13 @@ block0(v0: i128):
return v1
}
; stp fp, lr, [sp, #-16]!
; mov fp, sp
; stp d11, d13, [sp, #-16]!
; block0:
; fmov d6, x0
; mov v6.d[1], x1
; cnt v11.16b, v6.16b
; addv b13, v11.16b
; umov w0, v13.b[0]
; cnt v19.16b, v6.16b
; addv b21, v19.16b
; umov w0, v21.b[0]
; movz w1, #0
; ldp d11, d13, [sp], #16
; ldp fp, lr, [sp], #16
; ret
function %d(i64) -> i64 {

91
cranelift/filetests/filetests/isa/aarch64/dynamic-simd-narrow.clif

@ -15,9 +15,9 @@ block0(v0: i16):
}
; block0:
; dup v2.4h, w0
; mov v7.16b, v2.16b
; mov v7.d[1], v2.d[0]
; dup v6.4h, w0
; mov v7.16b, v6.16b
; mov v7.d[1], v6.d[0]
; sqxtn v0.8b, v7.8h
; ret
@ -35,9 +35,9 @@ block0(v0: i16):
}
; block0:
; dup v2.8h, w0
; sqxtn v0.8b, v2.8h
; sqxtn2 v0.16b, v2.8h
; dup v6.8h, w0
; sqxtn v0.8b, v6.8h
; sqxtn2 v0.16b, v6.8h
; ret
function %snarrow_i32x2(i32) -> i16x4 {
@ -54,9 +54,9 @@ block0(v0: i32):
}
; block0:
; dup v2.2s, w0
; mov v7.16b, v2.16b
; mov v7.d[1], v2.d[0]
; dup v6.2s, w0
; mov v7.16b, v6.16b
; mov v7.d[1], v6.d[0]
; sqxtn v0.4h, v7.4s
; ret
@ -74,9 +74,9 @@ block0(v0: i32):
}
; block0:
; dup v2.4s, w0
; sqxtn v0.4h, v2.4s
; sqxtn2 v0.8h, v2.4s
; dup v6.4s, w0
; sqxtn v0.4h, v6.4s
; sqxtn2 v0.8h, v6.4s
; ret
function %snarrow_i64x2(i64) -> i32x4 {
@ -93,9 +93,9 @@ block0(v0: i64):
}
; block0:
; dup v2.2d, x0
; sqxtn v0.2s, v2.2d
; sqxtn2 v0.4s, v2.2d
; dup v6.2d, x0
; sqxtn v0.2s, v6.2d
; sqxtn2 v0.4s, v6.2d
; ret
function %unarrow_i16x4(i16) -> i8x8 {
@ -112,9 +112,9 @@ block0(v0: i16):
}
; block0:
; dup v2.4h, w0
; mov v7.16b, v2.16b
; mov v7.d[1], v2.d[0]
; dup v6.4h, w0
; mov v7.16b, v6.16b
; mov v7.d[1], v6.d[0]
; sqxtun v0.8b, v7.8h
; ret
@ -132,9 +132,9 @@ block0(v0: i16):
}
; block0:
; dup v2.8h, w0
; sqxtun v0.8b, v2.8h
; sqxtun2 v0.16b, v2.8h
; dup v6.8h, w0
; sqxtun v0.8b, v6.8h
; sqxtun2 v0.16b, v6.8h
; ret
function %unarrow_i32x2(i32) -> i16x4 {
@ -151,9 +151,9 @@ block0(v0: i32):
}
; block0:
; dup v2.2s, w0
; mov v7.16b, v2.16b
; mov v7.d[1], v2.d[0]
; dup v6.2s, w0
; mov v7.16b, v6.16b
; mov v7.d[1], v6.d[0]
; sqxtun v0.4h, v7.4s
; ret
@ -171,9 +171,9 @@ block0(v0: i32):
}
; block0:
; dup v2.4s, w0
; sqxtun v0.4h, v2.4s
; sqxtun2 v0.8h, v2.4s
; dup v6.4s, w0
; sqxtun v0.4h, v6.4s
; sqxtun2 v0.8h, v6.4s
; ret
function %unarrow_i64x2(i64) -> i32x4 {
@ -190,9 +190,9 @@ block0(v0: i64):
}
; block0:
; dup v2.2d, x0
; sqxtun v0.2s, v2.2d
; sqxtun2 v0.4s, v2.2d
; dup v6.2d, x0
; sqxtun v0.2s, v6.2d
; sqxtun2 v0.4s, v6.2d
; ret
function %uunarrow_i16x4(i16) -> i8x8 {
@ -209,9 +209,9 @@ block0(v0: i16):
}
; block0:
; dup v2.4h, w0
; mov v7.16b, v2.16b
; mov v7.d[1], v2.d[0]
; dup v6.4h, w0
; mov v7.16b, v6.16b
; mov v7.d[1], v6.d[0]
; uqxtn v0.8b, v7.8h
; ret
@ -229,9 +229,9 @@ block0(v0: i16):
}
; block0:
; dup v2.8h, w0
; uqxtn v0.8b, v2.8h
; uqxtn2 v0.16b, v2.8h
; dup v6.8h, w0
; uqxtn v0.8b, v6.8h
; uqxtn2 v0.16b, v6.8h
; ret
function %uunarrow_i32x2(i32) -> i16x4 {
@ -248,9 +248,9 @@ block0(v0: i32):
}
; block0:
; dup v2.2s, w0
; mov v7.16b, v2.16b
; mov v7.d[1], v2.d[0]
; dup v6.2s, w0
; mov v7.16b, v6.16b
; mov v7.d[1], v6.d[0]
; uqxtn v0.4h, v7.4s
; ret
@ -268,9 +268,9 @@ block0(v0: i32):
}
; block0:
; dup v2.4s, w0
; uqxtn v0.4h, v2.4s
; uqxtn2 v0.8h, v2.4s
; dup v6.4s, w0
; uqxtn v0.4h, v6.4s
; uqxtn2 v0.8h, v6.4s
; ret
function %uunarrow_i64x2(i64) -> i32x4 {
@ -287,8 +287,7 @@ block0(v0: i64):
}
; block0:
; dup v2.2d, x0
; uqxtn v0.2s, v2.2d
; uqxtn2 v0.4s, v2.2d
; dup v6.2d, x0
; uqxtn v0.2s, v6.2d
; uqxtn2 v0.4s, v6.2d
; ret

114
cranelift/filetests/filetests/isa/aarch64/dynamic-simd-neon.clif

@ -1,4 +1,4 @@
test compile
test compile precise-output
target aarch64
function %i8x16_splat_add(i8, i8) -> i8x16 {
@ -13,10 +13,11 @@ block0(v0: i8, v1: i8):
return v5
}
; check: dup v4.16b, w0
; nextln: dup v6.16b, w1
; nextln: add v0.16b, v4.16b, v6.16b
; nextln: ret
; block0:
; dup v16.16b, w0
; dup v17.16b, w1
; add v0.16b, v16.16b, v17.16b
; ret
function %i16x8_splat_add(i16, i16) -> i16x8 {
gv0 = dyn_scale_target_const.i16x8
@ -30,10 +31,11 @@ block0(v0: i16, v1: i16):
return v5
}
; check: dup v4.8h, w0
; nextln: dup v6.8h, w1
; nextln: add v0.8h, v4.8h, v6.8h
; nextln: ret
; block0:
; dup v16.8h, w0
; dup v17.8h, w1
; add v0.8h, v16.8h, v17.8h
; ret
function %i32x4_splat_mul(i32, i32) -> i32x4 {
gv0 = dyn_scale_target_const.i32x4
@ -47,10 +49,11 @@ block0(v0: i32, v1: i32):
return v5
}
; check: dup v4.4s, w0
; nextln: dup v6.4s, w1
; nextln: mul v0.4s, v4.4s, v6.4s
; nextln: ret
; block0:
; dup v16.4s, w0
; dup v17.4s, w1
; mul v0.4s, v16.4s, v17.4s
; ret
function %i64x2_splat_sub(i64, i64) -> i64x2 {
gv0 = dyn_scale_target_const.i64x2
@ -64,10 +67,11 @@ block0(v0: i64, v1: i64):
return v5
}
; check: dup v4.2d, x0
; nextln: dup v6.2d, x1
; nextln: sub v0.2d, v4.2d, v6.2d
; nextln: ret
; block0:
; dup v16.2d, x0
; dup v17.2d, x1
; sub v0.2d, v16.2d, v17.2d
; ret
function %f32x4_splat_add(f32, f32) -> f32x4 {
gv0 = dyn_scale_target_const.f32x4
@ -81,10 +85,11 @@ block0(v0: f32, v1: f32):
return v5
}
; check: dup v4.4s, v0.s[0]
; nextln: dup v6.4s, v1.s[0]
; nextln: fadd v0.4s, v4.4s, v6.4s
; nextln: ret
; block0:
; dup v16.4s, v0.s[0]
; dup v17.4s, v1.s[0]
; fadd v0.4s, v16.4s, v17.4s
; ret
function %f64x2_splat_sub(f64, f64) -> f64x2 {
gv0 = dyn_scale_target_const.f64x2
@ -98,10 +103,11 @@ block0(v0: f64, v1: f64):
return v5
}
; check: dup v4.2d, v0.d[0]
; nextln: dup v6.2d, v1.d[0]
; nextln: fsub v0.2d, v4.2d, v6.2d
; nextln: ret
; block0:
; dup v16.2d, v0.d[0]
; dup v17.2d, v1.d[0]
; fsub v0.2d, v16.2d, v17.2d
; ret
function %f64x2_splat_mul(f64, f64) -> f64x2 {
gv0 = dyn_scale_target_const.f64x2
@ -115,10 +121,11 @@ block0(v0: f64, v1: f64):
return v5
}
; check: dup v4.2d, v0.d[0]
; nextln: dup v6.2d, v1.d[0]
; nextln: fmul v0.2d, v4.2d, v6.2d
; nextln: ret
; block0:
; dup v16.2d, v0.d[0]
; dup v17.2d, v1.d[0]
; fmul v0.2d, v16.2d, v17.2d
; ret
function %f64x2_splat_div(f64, f64) -> f64x2 {
gv0 = dyn_scale_target_const.f64x2
@ -132,10 +139,11 @@ block0(v0: f64, v1: f64):
return v5
}
; check: dup v4.2d, v0.d[0]
; nextln: dup v6.2d, v1.d[0]
; nextln: fdiv v0.2d, v4.2d, v6.2d
; nextln: ret
; block0:
; dup v16.2d, v0.d[0]
; dup v17.2d, v1.d[0]
; fdiv v0.2d, v16.2d, v17.2d
; ret
function %f64x2_splat_min(f64, f64) -> f64x2 {
gv0 = dyn_scale_target_const.f64x2
@ -149,10 +157,11 @@ block0(v0: f64, v1: f64):
return v5
}
; check: dup v4.2d, v0.d[0]
; nextln: dup v6.2d, v1.d[0]
; nextln: fmin v0.2d, v4.2d, v6.2d
; nextln: ret
; block0:
; dup v16.2d, v0.d[0]
; dup v17.2d, v1.d[0]
; fmin v0.2d, v16.2d, v17.2d
; ret
function %f64x2_splat_max(f64, f64) -> f64x2 {
gv0 = dyn_scale_target_const.f64x2
@ -166,10 +175,11 @@ block0(v0: f64, v1: f64):
return v5
}
; check: dup v4.2d, v0.d[0]
; nextln: dup v6.2d, v1.d[0]
; nextln: fmax v0.2d, v4.2d, v6.2d
; nextln: ret
; block0:
; dup v16.2d, v0.d[0]
; dup v17.2d, v1.d[0]
; fmax v0.2d, v16.2d, v17.2d
; ret
function %f64x2_splat_min_pseudo(f64, f64) -> f64x2 {
gv0 = dyn_scale_target_const.f64x2
@ -183,11 +193,12 @@ block0(v0: f64, v1: f64):
return v5
}
; check: dup v4.2d, v0.d[0]
; nextln: dup v6.2d, v1.d[0]
; nextln: fcmgt v0.2d, v4.2d, v6.2d
; nextln: bsl v0.16b, v6.16b, v4.16b
; nextln: ret
; block0:
; dup v17.2d, v0.d[0]
; dup v18.2d, v1.d[0]
; fcmgt v0.2d, v17.2d, v18.2d
; bsl v0.16b, v18.16b, v17.16b
; ret
function %f64x2_splat_max_pseudo(f64, f64) -> f64x2 {
gv0 = dyn_scale_target_const.f64x2
@ -201,8 +212,9 @@ block0(v0: f64, v1: f64):
return v5
}
; check: dup v4.2d, v0.d[0]
; nextln: dup v6.2d, v1.d[0]
; nextln: fcmgt v0.2d, v6.2d, v4.2d
; nextln: bsl v0.16b, v6.16b, v4.16b
; nextln: ret
; block0:
; dup v17.2d, v0.d[0]
; dup v18.2d, v1.d[0]
; fcmgt v0.2d, v18.2d, v17.2d
; bsl v0.16b, v18.16b, v17.16b
; ret

44
cranelift/filetests/filetests/isa/aarch64/dynamic-simd-widen.clif

@ -1,4 +1,4 @@
test compile
test compile precise-output
target aarch64
function %swidenhigh_i8x16(i8) -> i16x8 {
@ -14,9 +14,10 @@ block0(v0: i8):
return v3
}
; check: dup v2.16b, w0
; nextln: sxtl2 v0.8h, v2.16b
; nextln: ret
; block0:
; dup v5.16b, w0
; sxtl2 v0.8h, v5.16b
; ret
function %swidenhigh_i16x8(i16) -> i32x4 {
gv0 = dyn_scale_target_const.i32x4
@ -31,9 +32,10 @@ block0(v0: i16):
return v3
}
; check: dup v2.8h, w0
; nextln: sxtl2 v0.4s, v2.8h
; nextln: ret
; block0:
; dup v5.8h, w0
; sxtl2 v0.4s, v5.8h
; ret
function %swidenhigh_i32x4(i32) -> i64x2 {
gv0 = dyn_scale_target_const.i32x4
@ -48,9 +50,10 @@ block0(v0: i32):
return v3
}
; check: dup v2.4s, w0
; nextln: sxtl2 v0.2d, v2.4s
; nextln: ret
; block0:
; dup v5.4s, w0
; sxtl2 v0.2d, v5.4s
; ret
function %swidenlow_i8x16(i8) -> i16x8 {
gv0 = dyn_scale_target_const.i16x8
@ -65,9 +68,10 @@ block0(v0: i8):
return v3
}
; check: dup v2.16b, w0
; nextln: sxtl v0.8h, v2.8b
; nextln: ret
; block0:
; dup v5.16b, w0
; sxtl v0.8h, v5.8b
; ret
function %swidenlow_i16x8(i16) -> i32x4 {
gv0 = dyn_scale_target_const.i32x4
@ -82,9 +86,10 @@ block0(v0: i16):
return v3
}
; check: dup v2.8h, w0
; nextln: sxtl v0.4s, v2.4h
; nextln: ret
; block0:
; dup v5.8h, w0
; sxtl v0.4s, v5.4h
; ret
function %swidenlow_i32x4(i32) -> i64x2 {
gv0 = dyn_scale_target_const.i32x4
@ -99,6 +104,7 @@ block0(v0: i32):
return v3
}
; check: dup v2.4s, w0
; nextln: sxtl v0.2d, v2.2s
; nextln: ret
; block0:
; dup v5.4s, w0
; sxtl v0.2d, v5.2s
; ret

12
cranelift/filetests/filetests/isa/aarch64/dynamic-slot.clif

@ -58,9 +58,9 @@ block0(v0: i32):
; mov fp, sp
; sub sp, sp, #16
; block0:
; dup v2.4s, w0
; mov x4, sp
; str q2, [x4]
; dup v3.4s, w0
; mov x3, sp
; str q3, [x3]
; add sp, sp, #16
; ldp fp, lr, [sp], #16
; ret
@ -101,9 +101,9 @@ block0(v0: i32):
; mov fp, sp
; sub sp, sp, #16
; block0:
; dup v2.4s, w0
; mov x4, sp
; str q2, [x4]
; dup v3.4s, w0
; mov x3, sp
; str q3, [x3]
; add sp, sp, #16
; ldp fp, lr, [sp], #16
; ret

87
cranelift/filetests/filetests/isa/aarch64/prologue.clif

@ -82,6 +82,14 @@ block0(v0: f64):
; stp d10, d11, [sp, #-16]!
; stp d8, d9, [sp, #-16]!
; block0:
; fadd d24, d0, d0
; fadd d25, d0, d0
; fadd d26, d0, d0
; fadd d27, d0, d0
; fadd d28, d0, d0
; fadd d29, d0, d0
; fadd d30, d0, d0
; fadd d31, d0, d0
; fadd d1, d0, d0
; fadd d2, d0, d0
; fadd d3, d0, d0
@ -89,14 +97,6 @@ block0(v0: f64):
; fadd d5, d0, d0
; fadd d6, d0, d0
; fadd d7, d0, d0
; fadd d8, d0, d0
; fadd d9, d0, d0
; fadd d10, d0, d0
; fadd d11, d0, d0
; fadd d12, d0, d0
; fadd d13, d0, d0
; fadd d14, d0, d0
; fadd d15, d0, d0
; fadd d16, d0, d0
; fadd d17, d0, d0
; fadd d18, d0, d0
@ -105,45 +105,45 @@ block0(v0: f64):
; fadd d21, d0, d0
; fadd d22, d0, d0
; fadd d23, d0, d0
; fadd d24, d0, d0
; fadd d25, d0, d0
; fadd d26, d0, d0
; fadd d27, d0, d0
; fadd d28, d0, d0
; fadd d29, d0, d0
; fadd d30, d0, d0
; fadd d31, d0, d0
; fadd d0, d0, d1
; fadd d1, d2, d3
; fadd d2, d4, d5
; fadd d3, d6, d7
; fadd d4, d8, d9
; fadd d5, d10, d11
; fadd d6, d12, d13
; fadd d7, d14, d15
; fadd d8, d16, d17
; fadd d9, d18, d19
; fadd d10, d20, d21
; fadd d11, d22, d23
; fadd d12, d24, d25
; fadd d13, d26, d27
; fadd d14, d28, d29
; fadd d15, d30, d31
; fadd d0, d0, d1
; fadd d1, d2, d3
; fadd d2, d4, d5
; fadd d3, d6, d7
; fadd d8, d0, d0
; fadd d9, d0, d0
; fadd d10, d0, d0
; fadd d11, d0, d0
; fadd d12, d0, d0
; fadd d13, d0, d0
; fadd d14, d0, d0
; fadd d15, d0, d0
; fadd d24, d0, d24
; fadd d25, d25, d26
; fadd d26, d27, d28
; fadd d27, d29, d30
; fadd d28, d31, d1
; fadd d29, d2, d3
; fadd d30, d4, d5
; fadd d31, d6, d7
; fadd d0, d16, d17
; fadd d1, d18, d19
; fadd d2, d20, d21
; fadd d3, d22, d23
; fadd d4, d8, d9
; fadd d5, d10, d11
; fadd d6, d12, d13
; fadd d7, d14, d15
; fadd d0, d0, d1
; fadd d1, d2, d3
; fadd d2, d4, d5
; fadd d3, d6, d7
; fadd d0, d0, d1
; fadd d1, d2, d3
; fadd d0, d0, d1
; fadd d24, d24, d25
; fadd d25, d26, d27
; fadd d26, d28, d29
; fadd d27, d30, d31
; fadd d28, d0, d1
; fadd d29, d2, d3
; fadd d30, d4, d5
; fadd d31, d6, d7
; fadd d24, d24, d25
; fadd d25, d26, d27
; fadd d26, d28, d29
; fadd d27, d30, d31
; fadd d24, d24, d25
; fadd d25, d26, d27
; fadd d0, d24, d25
; ldp d8, d9, [sp], #16
; ldp d10, d11, [sp], #16
; ldp d12, d13, [sp], #16
@ -242,4 +242,3 @@ block0(v0: i64):
; ldr x28, [sp], #16
; ldp fp, lr, [sp], #16
; ret

193
cranelift/filetests/filetests/runtests/simd-splat.clif

@ -1,4 +1,4 @@
test interpret
; test interpret TODO: Not yet implemented
test run
target aarch64
target s390x
@ -10,6 +10,8 @@ block0(v0: i8):
v1 = splat.i8x16 v0
return v1
}
; run: %splat_i8x16(-1) == [-1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1]
; run: %splat_i8x16(0) == [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
; run: %splat_i8x16(1) == [1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1]
function %splat_i16x8(i16) -> i16x8 {
@ -17,6 +19,8 @@ block0(v0: i16):
v1 = splat.i16x8 v0
return v1
}
; run: %splat_i16x8(-1) == [-1 -1 -1 -1 -1 -1 -1 -1]
; run: %splat_i16x8(0) == [0 0 0 0 0 0 0 0]
; run: %splat_i16x8(512) == [512 512 512 512 512 512 512 512]
function %splat_i32x4(i32) -> i32x4 {
@ -24,6 +28,8 @@ block0(v0: i32):
v1 = splat.i32x4 v0
return v1
}
; run: %splat_i32x4(-1) == [-1 -1 -1 -1]
; run: %splat_i32x4(0) == [0 0 0 0]
; run: %splat_i32x4(2000000) == [2000000 2000000 2000000 2000000]
function %splat_i64x2(i64) -> i64x2 {
@ -31,4 +37,189 @@ block0(v0: i64):
v1 = splat.i64x2 v0
return v1
}
; run: %splat_i64x2(-1) == [-1 -1]
; run: %splat_i64x2(0) == [0 0]
; run: %splat_i64x2(5000000000) == [5000000000 5000000000]
function %splat_f32x4(f32) -> f32x4 {
block0(v0: f32):
v1 = splat.f32x4 v0
return v1
}
; run: %splat_f32x4(-0x0.0) == [-0x0.0 -0x0.0 -0x0.0 -0x0.0]
; run: %splat_f32x4(0x1.0) == [0x1.0 0x1.0 0x1.0 0x1.0]
; run: %splat_f32x4(NaN) == [NaN NaN NaN NaN]
function %splat_f64x2(f64) -> f64x2 {
block0(v0: f64):
v1 = splat.f64x2 v0
return v1
}
; run: %splat_f64x2(0x0.0) == [0x0.0 0x0.0]
; run: %splat_f64x2(0x2.0) == [0x2.0 0x2.0]
; run: %splat_f64x2(NaN) == [NaN NaN]
; TODO: Test combinations of `bconst` and `splat`, potentially with `breduce` in
; the middle
function %splat_i8x16_2(i8x16) -> i8x16 {
block0(v0: i8x16):
v1 = iconst.i8 116
v2 = splat.i8x16 v1
v3 = iadd v0, v2
return v3
}
; run: %splat_i8x16_2([-128 -101 -75 -59 -22 -12 -7 -1 0 3 17 34 68 92 111 127]) == [-12 15 41 57 94 104 109 115 116 119 -123 -106 -72 -48 -29 -13]
function %splat_i8x16_3(i8x16) -> i8x16 {
block0(v0: i8x16):
v1 = iconst.i16 116
v2 = ireduce.i8 v1
v3 = splat.i8x16 v2
v4 = iadd v0, v3
return v4
}
; run: %splat_i8x16_3([-128 -101 -75 -59 -22 -12 -7 -1 0 3 17 34 68 92 111 127]) == [-12 15 41 57 94 104 109 115 116 119 -123 -106 -72 -48 -29 -13]
function %splat_i16x8_2(i16x8) -> i16x8 {
block0(v0: i16x8):
v1 = iconst.i16 42
v2 = splat.i16x8 v1
v3 = iadd v0, v2
return v3
}
; run: %splat_i16x8_2([-32768 -1500 -1 0 42 200 8576 32767]) == [-32726 -1458 41 42 84 242 8618 -32727]
function %splat_i16x8_3(i16x8) -> i16x8 {
block0(v0: i16x8):
v1 = iconst.i64 42
v2 = ireduce.i16 v1
v3 = splat.i16x8 v2
v4 = iadd v0, v3
return v4
}
; run: %splat_i16x8_3([-32768 -1500 -1 0 42 200 8576 32767]) == [-32726 -1458 41 42 84 242 8618 -32727]
function %splat_i32x4_2(i32x4) -> i32x4 {
block0(v0: i32x4):
v1 = iconst.i32 1024
v2 = splat.i32x4 v1
v3 = iadd v0, v2
return v3
}
; run: %splat_i32x4_2([-2147483648 -1 0 2147483647]) == [-2147482624 1023 1024 -2147482625]
function %splat_i32x4_3(i32x4) -> i32x4 {
block0(v0: i32x4):
v1 = iconst.i64 1024
v2 = ireduce.i32 v1
v3 = splat.i32x4 v2
v4 = iadd v0, v3
return v4
}
; run: %splat_i32x4_3([-2147483648 -1 0 2147483647]) == [-2147482624 1023 1024 -2147482625]
function %splat_i64x2_2(i64x2) -> i64x2 {
block0(v0: i64x2):
v1 = iconst.i64 -1
v2 = splat.i64x2 v1
v3 = iadd v0, v2
return v3
}
; run: %splat_i64x2_2([-1 0]) == [-2 -1]
function %splat_f32x4_2(f32x4) -> f32x4 {
block0(v0: f32x4):
v1 = f32const 0x1.5
v2 = splat.f32x4 v1
v3 = fadd v0, v2
return v3
}
; run: %splat_f32x4_2([0x0.0 NaN 0x1.0 0x2.0]) == [0x1.5 NaN 0x2.5 0x3.5]
function %splat_f64x2_2(f64x2) -> f64x2 {
block0(v0: f64x2):
v1 = f64const 0x7.5
v2 = splat.f64x2 v1
v3 = fadd v0, v2
return v3
}
; run: %splat_f64x2_2([0x0.0 0x1.0]) == [0x7.5 0x8.5]
function %load_splat_i8x16(i8) -> i8x16 {
ss0 = explicit_slot 8
block0(v0: i8):
stack_store.i8 v0, ss0
v1 = stack_load.i8 ss0
v2 = splat.i8x16 v1
return v2
}
; run: %load_splat_i8x16(-1) == [-1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1]
; run: %load_splat_i8x16(0) == [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
; run: %load_splat_i8x16(1) == [1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1]
function %load_splat_i16x8(i16) -> i16x8 {
ss0 = explicit_slot 8
block0(v0: i16):
stack_store.i16 v0, ss0
v1 = stack_load.i16 ss0
v2 = splat.i16x8 v1
return v2
}
; run: %load_splat_i16x8(-1) == [-1 -1 -1 -1 -1 -1 -1 -1]
; run: %load_splat_i16x8(0) == [0 0 0 0 0 0 0 0]
; run: %load_splat_i16x8(512) == [512 512 512 512 512 512 512 512]
function %load_splat_i32x4(i32) -> i32x4 {
ss0 = explicit_slot 8
block0(v0: i32):
stack_store.i32 v0, ss0
v1 = stack_load.i32 ss0
v2 = splat.i32x4 v1
return v2
}
; run: %load_splat_i32x4(-1) == [-1 -1 -1 -1]
; run: %load_splat_i32x4(0) == [0 0 0 0]
; run: %load_splat_i32x4(2000000) == [2000000 2000000 2000000 2000000]
function %load_splat_i64x2(i64) -> i64x2 {
ss0 = explicit_slot 8
block0(v0: i64):
stack_store.i64 v0, ss0
v1 = stack_load.i64 ss0
v2 = splat.i64x2 v1
return v2
}
; run: %load_splat_i64x2(-1) == [-1 -1]
; run: %load_splat_i64x2(0) == [0 0]
; run: %load_splat_i64x2(5000000000) == [5000000000 5000000000]
function %load_splat_f32x4(f32) -> f32x4 {
ss0 = explicit_slot 8
block0(v0: f32):
stack_store.f32 v0, ss0
v1 = stack_load.f32 ss0
v2 = splat.f32x4 v1
return v2
}
; run: %load_splat_f32x4(-0x0.0) == [-0x0.0 -0x0.0 -0x0.0 -0x0.0]
; run: %load_splat_f32x4(0x1.0) == [0x1.0 0x1.0 0x1.0 0x1.0]
; run: %load_splat_f32x4(NaN) == [NaN NaN NaN NaN]
function %load_splat_f64x2(f64) -> f64x2 {
ss0 = explicit_slot 8
block0(v0: f64):
stack_store.f64 v0, ss0
v1 = stack_load.f64 ss0
v2 = splat.f64x2 v1
return v2
}
; run: %load_splat_f64x2(0x0.0) == [0x0.0 0x0.0]
; run: %load_splat_f64x2(0x2.0) == [0x2.0 0x2.0]
; run: %load_splat_f64x2(NaN) == [NaN NaN]

Loading…
Cancel
Save