diff --git a/Cargo.lock b/Cargo.lock index ec72915319..e010c20113 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -608,6 +608,7 @@ dependencies = [ "hashbrown 0.14.3", "log", "postcard", + "pulley-interpreter", "regalloc2", "rustc-hash", "serde", @@ -665,6 +666,7 @@ dependencies = [ "gimli", "log", "num_cpus", + "pulley-interpreter", "serde", "serde_derive", "similar", diff --git a/Cargo.toml b/Cargo.toml index 5c6b44ea12..97f91c07c5 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -218,6 +218,8 @@ wasmtime-fuzzing = { path = "crates/fuzzing" } wasmtime-jit-icache-coherence = { path = "crates/jit-icache-coherence", version = "=25.0.0" } wasmtime-wit-bindgen = { path = "crates/wit-bindgen", version = "=25.0.0" } test-programs-artifacts = { path = 'crates/test-programs/artifacts' } + +pulley-interpreter = { path = 'pulley', version = "=0.1.0" } pulley-interpreter-fuzz = { path = 'pulley/fuzz' } cranelift-wasm = { path = "cranelift/wasm", version = "0.112.0" } diff --git a/cranelift/codegen/Cargo.toml b/cranelift/codegen/Cargo.toml index 7eb2105a46..1126c22336 100644 --- a/cranelift/codegen/Cargo.toml +++ b/cranelift/codegen/Cargo.toml @@ -30,6 +30,7 @@ target-lexicon = { workspace = true } log = { workspace = true } serde = { workspace = true, optional = true } serde_derive = { workspace = true, optional = true } +pulley-interpreter = { workspace = true, optional = true } postcard = { workspace = true, optional = true } gimli = { workspace = true, features = ["write", "std"], optional = true } smallvec = { workspace = true } @@ -81,6 +82,7 @@ x86 = [] arm64 = [] s390x = [] riscv64 = [] +pulley = ["dep:pulley-interpreter", "pulley-interpreter/encode", "pulley-interpreter/disas"] # Enable the ISA target for the host machine host-arch = [] @@ -89,7 +91,8 @@ all-arch = [ "x86", "arm64", "s390x", - "riscv64" + "riscv64", + "pulley", ] # For dependent crates that want to serialize some parts of cranelift diff --git a/cranelift/codegen/meta/src/isa/mod.rs b/cranelift/codegen/meta/src/isa/mod.rs index ecda9b83d0..655b14a9c5 100644 --- a/cranelift/codegen/meta/src/isa/mod.rs +++ b/cranelift/codegen/meta/src/isa/mod.rs @@ -3,6 +3,7 @@ use crate::cdsl::isa::TargetIsa; use std::fmt; mod arm64; +mod pulley; mod riscv64; mod s390x; pub(crate) mod x86; @@ -14,6 +15,8 @@ pub enum Isa { Arm64, S390x, Riscv64, + Pulley32, + Pulley64, } impl Isa { @@ -32,13 +35,22 @@ impl Isa { "s390x" => Some(Isa::S390x), x if ["x86_64", "i386", "i586", "i686"].contains(&x) => Some(Isa::X86), "riscv64" | "riscv64gc" | "riscv64imac" => Some(Isa::Riscv64), + "pulley32" => Some(Isa::Pulley32), + "pulley64" => Some(Isa::Pulley64), _ => None, } } /// Returns all supported isa targets. pub fn all() -> &'static [Isa] { - &[Isa::X86, Isa::Arm64, Isa::S390x, Isa::Riscv64] + &[ + Isa::X86, + Isa::Arm64, + Isa::S390x, + Isa::Riscv64, + Isa::Pulley32, + Isa::Pulley64, + ] } } @@ -50,6 +62,8 @@ impl fmt::Display for Isa { Isa::Arm64 => write!(f, "arm64"), Isa::S390x => write!(f, "s390x"), Isa::Riscv64 => write!(f, "riscv64"), + Isa::Pulley32 => write!(f, "pulley32"), + Isa::Pulley64 => write!(f, "pulley64"), } } } @@ -61,6 +75,7 @@ pub(crate) fn define(isas: &[Isa]) -> Vec { Isa::Arm64 => arm64::define(), Isa::S390x => s390x::define(), Isa::Riscv64 => riscv64::define(), + Isa::Pulley32 | Isa::Pulley64 => pulley::define(), }) .collect() } diff --git a/cranelift/codegen/meta/src/isa/pulley.rs b/cranelift/codegen/meta/src/isa/pulley.rs new file mode 100644 index 0000000000..7ae14ae682 --- /dev/null +++ b/cranelift/codegen/meta/src/isa/pulley.rs @@ -0,0 +1,14 @@ +use crate::cdsl::{isa::TargetIsa, settings::SettingGroupBuilder}; + +pub(crate) fn define() -> TargetIsa { + let mut settings = SettingGroupBuilder::new("pulley"); + settings.add_enum( + "pointer_width", + "The width of pointers for this Pulley target", + "Supported values:\n\ + * 'pointer32'\n\ + * 'pointer64'\n", + vec!["pointer32", "pointer64"], + ); + TargetIsa::new("pulley", settings.build()) +} diff --git a/cranelift/codegen/meta/src/isle.rs b/cranelift/codegen/meta/src/isle.rs index a4d53cd895..3744c88075 100644 --- a/cranelift/codegen/meta/src/isle.rs +++ b/cranelift/codegen/meta/src/isle.rs @@ -33,12 +33,17 @@ pub fn get_isle_compilations( // Directory for mid-end optimizations. let src_opts = codegen_crate_dir.join("src").join("opts"); + // Directories for lowering backends. let src_isa_x64 = codegen_crate_dir.join("src").join("isa").join("x64"); let src_isa_aarch64 = codegen_crate_dir.join("src").join("isa").join("aarch64"); let src_isa_s390x = codegen_crate_dir.join("src").join("isa").join("s390x"); - let src_isa_risc_v = codegen_crate_dir.join("src").join("isa").join("riscv64"); + let src_isa_pulley_shared = codegen_crate_dir + .join("src") + .join("isa") + .join("pulley_shared"); + // This is a set of ISLE compilation units. // // The format of each entry is: @@ -121,6 +126,17 @@ pub fn get_isle_compilations( ], untracked_inputs: vec![clif_lower_isle.clone()], }, + // The Pulley instruction selector. + IsleCompilation { + output: gen_dir.join("isle_pulley_shared.rs"), + inputs: vec![ + prelude_isle.clone(), + prelude_lower_isle.clone(), + src_isa_pulley_shared.join("inst.isle"), + src_isa_pulley_shared.join("lower.isle"), + ], + untracked_inputs: vec![clif_lower_isle.clone()], + }, ], } } diff --git a/cranelift/codegen/src/isa/aarch64/abi.rs b/cranelift/codegen/src/isa/aarch64/abi.rs index ff1ccd9723..81cacb9e2a 100644 --- a/cranelift/codegen/src/isa/aarch64/abi.rs +++ b/cranelift/codegen/src/isa/aarch64/abi.rs @@ -722,12 +722,12 @@ impl ABIMachineSpec for AArch64MachineDeps { // present, resize the incoming argument area of the frame to accommodate those arguments. let incoming_args_diff = frame_layout.tail_args_size - frame_layout.incoming_args_size; if incoming_args_diff > 0 { - // Decrement SP to account for the additional space required by a tail call + // Decrement SP to account for the additional space required by a tail call. insts.extend(Self::gen_sp_reg_adjust(-(incoming_args_diff as i32))); - // Move fp and lr down + // Move fp and lr down. if setup_frame { - // Reload the frame pointer from the stack + // Reload the frame pointer from the stack. insts.push(Inst::ULoad64 { rd: regs::writable_fp_reg(), mem: AMode::SPOffset { diff --git a/cranelift/codegen/src/isa/aarch64/lower/isle.rs b/cranelift/codegen/src/isa/aarch64/lower/isle.rs index fa4938389c..42df8204d7 100644 --- a/cranelift/codegen/src/isa/aarch64/lower/isle.rs +++ b/cranelift/codegen/src/isa/aarch64/lower/isle.rs @@ -16,7 +16,6 @@ use crate::ir::{condcodes, ArgumentExtension}; use crate::isa; use crate::isa::aarch64::inst::{FPULeftShiftImm, FPURightShiftImm, ReturnCallInfo}; use crate::isa::aarch64::AArch64Backend; -use crate::isle_common_prelude_methods; use crate::machinst::isle::*; use crate::{ binemit::CodeOffset, diff --git a/cranelift/codegen/src/isa/mod.rs b/cranelift/codegen/src/isa/mod.rs index 03ecdd2d54..5ba1b8610c 100644 --- a/cranelift/codegen/src/isa/mod.rs +++ b/cranelift/codegen/src/isa/mod.rs @@ -75,6 +75,13 @@ pub mod riscv64; #[cfg(feature = "s390x")] mod s390x; +#[cfg(feature = "pulley")] +mod pulley32; +#[cfg(feature = "pulley")] +mod pulley64; +#[cfg(feature = "pulley")] +mod pulley_shared; + pub mod unwind; mod call_conv; @@ -104,6 +111,8 @@ pub fn lookup(triple: Triple) -> Result { Architecture::Aarch64 { .. } => isa_builder!(aarch64, (feature = "arm64"), triple), Architecture::S390x { .. } => isa_builder!(s390x, (feature = "s390x"), triple), Architecture::Riscv64 { .. } => isa_builder!(riscv64, (feature = "riscv64"), triple), + Architecture::Pulley32 => isa_builder!(pulley32, (feature = "pulley"), triple), + Architecture::Pulley64 => isa_builder!(pulley64, (feature = "pulley"), triple), _ => Err(LookupError::Unsupported), } } diff --git a/cranelift/codegen/src/isa/pulley32.rs b/cranelift/codegen/src/isa/pulley32.rs new file mode 100644 index 0000000000..b337274660 --- /dev/null +++ b/cranelift/codegen/src/isa/pulley32.rs @@ -0,0 +1,13 @@ +pub use super::pulley_shared::isa_builder; + +use super::pulley_shared::PulleyTargetKind; +use crate::isa::pulley_shared::PointerWidth; + +#[derive(Debug, Default, Clone, Copy)] +pub(crate) struct Pulley32; + +impl PulleyTargetKind for Pulley32 { + fn pointer_width() -> PointerWidth { + PointerWidth::PointerWidth32 + } +} diff --git a/cranelift/codegen/src/isa/pulley64.rs b/cranelift/codegen/src/isa/pulley64.rs new file mode 100644 index 0000000000..31b1d08a04 --- /dev/null +++ b/cranelift/codegen/src/isa/pulley64.rs @@ -0,0 +1,13 @@ +pub use super::pulley_shared::isa_builder; + +use super::pulley_shared::PulleyTargetKind; +use crate::isa::pulley_shared::PointerWidth; + +#[derive(Debug, Default, Clone, Copy)] +pub(crate) struct Pulley64; + +impl PulleyTargetKind for Pulley64 { + fn pointer_width() -> PointerWidth { + PointerWidth::PointerWidth64 + } +} diff --git a/cranelift/codegen/src/isa/pulley_shared/abi.rs b/cranelift/codegen/src/isa/pulley_shared/abi.rs new file mode 100644 index 0000000000..a88fe2ac81 --- /dev/null +++ b/cranelift/codegen/src/isa/pulley_shared/abi.rs @@ -0,0 +1,821 @@ +//! Implementation of a standard Pulley ABI. + +use super::{inst::*, PulleyFlags, PulleyTargetKind}; +use crate::{ + ir::{self, types::*, MemFlags, Signature}, + isa::{self, unwind::UnwindInst}, + machinst::*, + settings, CodegenError, CodegenResult, +}; +use alloc::{boxed::Box, vec::Vec}; +use core::marker::PhantomData; +use regalloc2::{MachineEnv, PReg, PRegSet}; +use smallvec::{smallvec, SmallVec}; +use std::sync::OnceLock; + +/// Support for the Pulley ABI from the callee side (within a function body). +pub(crate) type PulleyCallee

= Callee>; + +/// Support for the Pulley ABI from the caller side (at a callsite). +pub(crate) type PulleyABICallSite

= CallSite>; + +/// This is the limit for the size of argument and return-value areas on the +/// stack. We place a reasonable limit here to avoid integer overflow issues +/// with 32-bit arithmetic: for now, 128 MB. +static STACK_ARG_RET_SIZE_LIMIT: u32 = 128 * 1024 * 1024; + +/// Pulley-specific ABI behavior. This struct just serves as an implementation +/// point for the trait; it is never actually instantiated. +pub struct PulleyMachineDeps

+where + P: PulleyTargetKind, +{ + _phantom: PhantomData

, +} + +impl

ABIMachineSpec for PulleyMachineDeps

+where + P: PulleyTargetKind, +{ + type I = InstAndKind

; + type F = PulleyFlags; + + fn word_bits() -> u32 { + P::pointer_width().bits().into() + } + + /// Return required stack alignment in bytes. + fn stack_align(_call_conv: isa::CallConv) -> u32 { + 16 + } + + fn compute_arg_locs( + call_conv: isa::CallConv, + _flags: &settings::Flags, + params: &[ir::AbiParam], + args_or_rets: ArgsOrRets, + add_ret_area_ptr: bool, + mut args: ArgsAccumulator, + ) -> CodegenResult<(u32, Option)> { + // NB: make sure this method stays in sync with + // `cranelift_pulley::interp::Vm::call`. + + let x_end = 15; + let f_end = 15; + let v_end = 15; + + let mut next_x_reg = 0; + let mut next_f_reg = 0; + let mut next_v_reg = 0; + let mut next_stack: u32 = 0; + + for param in params { + // Find the regclass(es) of the register(s) used to store a value of + // this type. + let (rcs, reg_tys) = Self::I::rc_for_type(param.value_type)?; + + let mut slots = ABIArgSlotVec::new(); + for (rc, reg_ty) in rcs.iter().zip(reg_tys.iter()) { + let next_reg = if (next_x_reg <= x_end) && *rc == RegClass::Int { + let x = Some(x_reg(next_x_reg)); + next_x_reg += 1; + x + } else if (next_f_reg <= f_end) && *rc == RegClass::Float { + let f = Some(f_reg(next_f_reg)); + next_f_reg += 1; + f + } else if (next_v_reg <= v_end) && *rc == RegClass::Vector { + let v = Some(v_reg(next_v_reg)); + next_v_reg += 1; + v + } else { + None + }; + + if let Some(reg) = next_reg { + slots.push(ABIArgSlot::Reg { + reg: reg.to_real_reg().unwrap(), + ty: *reg_ty, + extension: param.extension, + }); + } else { + // Compute size and 16-byte stack alignment happens + // separately after all args. + let size = reg_ty.bits() / 8; + let size = std::cmp::max(size, 8); + + // Align. + debug_assert!(size.is_power_of_two()); + next_stack = align_to(next_stack, size); + + slots.push(ABIArgSlot::Stack { + offset: i64::from(next_stack), + ty: *reg_ty, + extension: param.extension, + }); + + next_stack += size; + } + } + + args.push(ABIArg::Slots { + slots, + purpose: param.purpose, + }); + } + + let pos = if add_ret_area_ptr { + assert!(ArgsOrRets::Args == args_or_rets); + if next_x_reg <= x_end { + let arg = ABIArg::reg( + x_reg(next_x_reg).to_real_reg().unwrap(), + I64, + ir::ArgumentExtension::None, + ir::ArgumentPurpose::Normal, + ); + args.push(arg); + } else { + let arg = ABIArg::stack( + next_stack as i64, + I64, + ir::ArgumentExtension::None, + ir::ArgumentPurpose::Normal, + ); + args.push(arg); + next_stack += 8; + } + Some(args.args().len() - 1) + } else { + None + }; + + next_stack = align_to(next_stack, Self::stack_align(call_conv)); + + // To avoid overflow issues, limit the arg/return size to something + // reasonable -- here, 128 MB. + if next_stack > STACK_ARG_RET_SIZE_LIMIT { + return Err(CodegenError::ImplLimitExceeded); + } + + Ok((next_stack, pos)) + } + + fn gen_load_stack(mem: StackAMode, into_reg: Writable, ty: Type) -> Self::I { + Inst::gen_load(into_reg, mem.into(), ty, MemFlags::trusted()).into() + } + + fn gen_store_stack(mem: StackAMode, from_reg: Reg, ty: Type) -> Self::I { + Inst::gen_store(mem.into(), from_reg, ty, MemFlags::trusted()).into() + } + + fn gen_move(to_reg: Writable, from_reg: Reg, ty: Type) -> Self::I { + Self::I::gen_move(to_reg, from_reg, ty) + } + + fn gen_extend( + _to_reg: Writable, + _from_reg: Reg, + _signed: bool, + from_bits: u8, + to_bits: u8, + ) -> Self::I { + assert!(from_bits < to_bits); + todo!() + } + + fn get_ext_mode( + _call_conv: isa::CallConv, + specified: ir::ArgumentExtension, + ) -> ir::ArgumentExtension { + specified + } + + fn gen_args(args: Vec) -> Self::I { + Inst::Args { args }.into() + } + + fn gen_rets(rets: Vec) -> Self::I { + Inst::Rets { rets }.into() + } + + fn get_stacklimit_reg(_call_conv: isa::CallConv) -> Reg { + spilltmp_reg() + } + + fn gen_add_imm( + _call_conv: isa::CallConv, + _into_reg: Writable, + _from_reg: Reg, + _imm: u32, + ) -> SmallInstVec { + todo!() + } + + fn gen_stack_lower_bound_trap(_limit_reg: Reg) -> SmallInstVec { + todo!() + } + + fn gen_get_stack_addr(mem: StackAMode, dst: Writable) -> Self::I { + let dst = dst.to_reg(); + let dst = XReg::new(dst).unwrap(); + let dst = WritableXReg::from_reg(dst); + let mem = mem.into(); + Inst::LoadAddr { dst, mem }.into() + } + + fn gen_load_base_offset(into_reg: Writable, base: Reg, offset: i32, ty: Type) -> Self::I { + let offset = i64::from(offset); + let base = XReg::try_from(base).unwrap(); + let mem = Amode::RegOffset { base, offset }; + Inst::gen_load(into_reg, mem, ty, MemFlags::trusted()).into() + } + + fn gen_store_base_offset(_base: Reg, _offset: i32, _from_reg: Reg, _ty: Type) -> Self::I { + todo!() + } + + fn gen_sp_reg_adjust(amount: i32) -> SmallInstVec { + let temp = WritableXReg::try_from(writable_spilltmp_reg()).unwrap(); + + let imm = if let Ok(x) = i8::try_from(amount) { + Inst::Xconst8 { dst: temp, imm: x }.into() + } else if let Ok(x) = i16::try_from(amount) { + Inst::Xconst16 { dst: temp, imm: x }.into() + } else { + Inst::Xconst32 { + dst: temp, + imm: amount, + } + .into() + }; + + smallvec![ + imm, + Inst::Xadd32 { + dst: WritableXReg::try_from(writable_stack_reg()).unwrap(), + src1: XReg::new(stack_reg()).unwrap(), + src2: temp.to_reg(), + } + .into() + ] + } + + fn gen_prologue_frame_setup( + _call_conv: isa::CallConv, + flags: &settings::Flags, + _isa_flags: &PulleyFlags, + frame_layout: &FrameLayout, + ) -> SmallInstVec { + let mut insts = SmallVec::new(); + + if frame_layout.setup_area_size > 0 { + // sp = sub sp, 16 ;; alloc stack space for frame pointer and return address. + // store sp+8, lr ;; save return address. + // store sp, fp ;; save old fp. + // mov sp, fp ;; set fp to sp. + insts.extend(Self::gen_sp_reg_adjust(-16)); + insts.push( + Inst::gen_store( + Amode::SpOffset { offset: 8 }, + link_reg(), + I64, + MemFlags::trusted(), + ) + .into(), + ); + insts.push( + Inst::gen_store( + Amode::SpOffset { offset: 0 }, + fp_reg(), + I64, + MemFlags::trusted(), + ) + .into(), + ); + if flags.unwind_info() { + insts.push( + Inst::Unwind { + inst: UnwindInst::PushFrameRegs { + offset_upward_to_caller_sp: frame_layout.setup_area_size, + }, + } + .into(), + ); + } + insts.push( + Inst::Xmov { + dst: Writable::from_reg(XReg::new(fp_reg()).unwrap()), + src: XReg::new(stack_reg()).unwrap(), + } + .into(), + ); + } + + insts + } + + /// Reverse of `gen_prologue_frame_setup`. + fn gen_epilogue_frame_restore( + _call_conv: isa::CallConv, + _flags: &settings::Flags, + _isa_flags: &PulleyFlags, + frame_layout: &FrameLayout, + ) -> SmallInstVec { + let mut insts = SmallVec::new(); + + if frame_layout.setup_area_size > 0 { + insts.push( + Inst::gen_load( + writable_link_reg(), + Amode::SpOffset { offset: 8 }, + I64, + MemFlags::trusted(), + ) + .into(), + ); + insts.push( + Inst::gen_load( + writable_fp_reg(), + Amode::SpOffset { offset: 0 }, + I64, + MemFlags::trusted(), + ) + .into(), + ); + insts.extend(Self::gen_sp_reg_adjust(16)); + } + + if frame_layout.tail_args_size > 0 { + insts.extend(Self::gen_sp_reg_adjust( + frame_layout.tail_args_size.try_into().unwrap(), + )); + } + + insts + } + + fn gen_return( + _call_conv: isa::CallConv, + _isa_flags: &PulleyFlags, + _frame_layout: &FrameLayout, + ) -> SmallInstVec { + smallvec![Inst::Ret {}.into()] + } + + fn gen_probestack(_insts: &mut SmallInstVec, _frame_size: u32) { + todo!() + } + + fn gen_clobber_save( + _call_conv: isa::CallConv, + flags: &settings::Flags, + frame_layout: &FrameLayout, + ) -> SmallVec<[Self::I; 16]> { + let mut insts = SmallVec::new(); + let setup_frame = frame_layout.setup_area_size > 0; + + let incoming_args_diff = frame_layout.tail_args_size - frame_layout.incoming_args_size; + if incoming_args_diff > 0 { + // Decrement SP by the amount of additional incoming argument space + // we need + insts.extend(Self::gen_sp_reg_adjust(-(incoming_args_diff as i32))); + + if setup_frame { + // Write the lr position on the stack again, as it hasn't + // changed since it was pushed in `gen_prologue_frame_setup` + insts.push( + Inst::gen_store( + Amode::SpOffset { offset: 8 }, + link_reg(), + I64, + MemFlags::trusted(), + ) + .into(), + ); + insts.push( + Inst::gen_load( + writable_fp_reg(), + Amode::SpOffset { + offset: i64::from(incoming_args_diff), + }, + I64, + MemFlags::trusted(), + ) + .into(), + ); + insts.push( + Inst::gen_store( + Amode::SpOffset { offset: 0 }, + fp_reg(), + I64, + MemFlags::trusted(), + ) + .into(), + ); + + // Finally, sync the frame pointer with SP. + insts.push(Self::I::gen_move(writable_fp_reg(), stack_reg(), I64)); + } + } + + if flags.unwind_info() && setup_frame { + // The *unwind* frame (but not the actual frame) starts at the + // clobbers, just below the saved FP/LR pair. + insts.push( + Inst::Unwind { + inst: UnwindInst::DefineNewFrame { + offset_downward_to_clobbers: frame_layout.clobber_size, + offset_upward_to_caller_sp: frame_layout.setup_area_size, + }, + } + .into(), + ); + } + + // Adjust the stack pointer downward for clobbers, the function fixed + // frame (spillslots and storage slots), and outgoing arguments. + let stack_size = frame_layout.clobber_size + + frame_layout.fixed_frame_storage_size + + frame_layout.outgoing_args_size; + + // Store each clobbered register in order at offsets from SP, placing + // them above the fixed frame slots. + if stack_size > 0 { + insts.extend(Self::gen_sp_reg_adjust(-i32::try_from(stack_size).unwrap())); + + let mut cur_offset = 8; + for reg in &frame_layout.clobbered_callee_saves { + let r_reg = reg.to_reg(); + let ty = match r_reg.class() { + RegClass::Int => I64, + RegClass::Float => F64, + RegClass::Vector => unreachable!("no vector registers are callee-save"), + }; + insts.push( + Inst::gen_store( + Amode::SpOffset { + offset: i64::from(stack_size - cur_offset), + }, + Reg::from(reg.to_reg()), + ty, + MemFlags::trusted(), + ) + .into(), + ); + + if flags.unwind_info() { + insts.push( + Inst::Unwind { + inst: UnwindInst::SaveReg { + clobber_offset: frame_layout.clobber_size - cur_offset, + reg: r_reg, + }, + } + .into(), + ); + } + + cur_offset += 8 + } + } + + insts + } + + fn gen_clobber_restore( + _call_conv: isa::CallConv, + _flags: &settings::Flags, + frame_layout: &FrameLayout, + ) -> SmallVec<[Self::I; 16]> { + let mut insts = SmallVec::new(); + + let stack_size = frame_layout.clobber_size + + frame_layout.fixed_frame_storage_size + + frame_layout.outgoing_args_size; + + let mut cur_offset = 8; + for reg in &frame_layout.clobbered_callee_saves { + let rreg = reg.to_reg(); + let ty = match rreg.class() { + RegClass::Int => I64, + RegClass::Float => F64, + RegClass::Vector => unreachable!("vector registers are never callee-saved"), + }; + insts.push( + Inst::gen_load( + reg.map(Reg::from), + Amode::SpOffset { + offset: i64::from(stack_size - cur_offset), + }, + ty, + MemFlags::trusted(), + ) + .into(), + ); + cur_offset += 8 + } + + if stack_size > 0 { + insts.extend(Self::gen_sp_reg_adjust(stack_size as i32)); + } + + insts + } + + fn gen_call( + dest: &CallDest, + uses: CallArgList, + defs: CallRetList, + clobbers: PRegSet, + tmp: Writable, + callee_conv: isa::CallConv, + caller_conv: isa::CallConv, + callee_pop_size: u32, + ) -> SmallVec<[Self::I; 2]> { + if callee_conv == isa::CallConv::Tail || callee_conv == isa::CallConv::Fast { + match &dest { + &CallDest::ExtName(ref name, RelocDistance::Near) => smallvec![Inst::Call { + callee: Box::new(name.clone()), + info: Box::new(CallInfo { + uses, + defs, + clobbers, + callee_pop_size, + }), + } + .into()], + &CallDest::ExtName(ref name, RelocDistance::Far) => smallvec![ + Inst::LoadExtName { + dst: WritableXReg::try_from(tmp).unwrap(), + name: Box::new(name.clone()), + offset: 0, + } + .into(), + Inst::IndirectCall { + callee: XReg::new(tmp.to_reg()).unwrap(), + info: Box::new(CallInfo { + uses, + defs, + clobbers, + callee_pop_size, + }), + } + .into(), + ], + &CallDest::Reg(reg) => smallvec![Inst::IndirectCall { + callee: XReg::new(*reg).unwrap(), + info: Box::new(CallInfo { + uses, + defs, + clobbers, + callee_pop_size, + }), + } + .into()], + } + } else { + todo!("host calls? callee_conv = {callee_conv:?}; caller_conv = {caller_conv:?}") + } + } + + fn gen_memcpy Writable>( + _call_conv: isa::CallConv, + _dst: Reg, + _src: Reg, + _size: usize, + _alloc_tmp: F, + ) -> SmallVec<[Self::I; 8]> { + todo!() + } + + fn get_number_of_spillslots_for_value( + _rc: RegClass, + _target_vector_bytes: u32, + _isa_flags: &PulleyFlags, + ) -> u32 { + todo!() + } + + fn get_machine_env(_flags: &settings::Flags, _call_conv: isa::CallConv) -> &MachineEnv { + static MACHINE_ENV: OnceLock = OnceLock::new(); + MACHINE_ENV.get_or_init(create_reg_enviroment) + } + + fn get_regs_clobbered_by_call(_call_conv_of_callee: isa::CallConv) -> PRegSet { + DEFAULT_CLOBBERS + } + + fn compute_frame_layout( + _call_conv: isa::CallConv, + flags: &settings::Flags, + _sig: &Signature, + regs: &[Writable], + is_leaf: bool, + incoming_args_size: u32, + tail_args_size: u32, + fixed_frame_storage_size: u32, + outgoing_args_size: u32, + ) -> FrameLayout { + let mut regs: Vec> = regs + .iter() + .cloned() + .filter(|r| DEFAULT_CALLEE_SAVES.contains(r.to_reg().into())) + .collect(); + + regs.sort_unstable(); + + // Compute clobber size. + let clobber_size = compute_clobber_size(®s); + + // Compute linkage frame size. + let setup_area_size = if flags.preserve_frame_pointers() + || !is_leaf + // The function arguments that are passed on the stack are addressed + // relative to the Frame Pointer. + || incoming_args_size > 0 + || clobber_size > 0 + || fixed_frame_storage_size > 0 + { + 16 // FP, LR + } else { + 0 + }; + + FrameLayout { + incoming_args_size, + tail_args_size, + setup_area_size, + clobber_size, + fixed_frame_storage_size, + outgoing_args_size, + clobbered_callee_saves: regs, + } + } + + fn gen_inline_probestack( + _insts: &mut SmallInstVec, + _call_conv: isa::CallConv, + _frame_size: u32, + _guard_size: u32, + ) { + todo!() + } +} + +impl

PulleyABICallSite

+where + P: PulleyTargetKind, +{ + pub fn emit_return_call(self, _ctx: &mut Lower>, _args: isle::ValueSlice) { + todo!() + } +} + +const DEFAULT_CALLEE_SAVES: PRegSet = PRegSet::empty() + // Integer registers. + .with(px_reg(16)) + .with(px_reg(17)) + .with(px_reg(18)) + .with(px_reg(19)) + .with(px_reg(20)) + .with(px_reg(21)) + .with(px_reg(22)) + .with(px_reg(23)) + .with(px_reg(24)) + .with(px_reg(25)) + .with(px_reg(26)) + .with(px_reg(27)) + .with(px_reg(28)) + .with(px_reg(29)) + .with(px_reg(30)) + .with(px_reg(31)) + // Float registers. + .with(px_reg(16)) + .with(px_reg(17)) + .with(px_reg(18)) + .with(px_reg(19)) + .with(px_reg(20)) + .with(px_reg(21)) + .with(px_reg(22)) + .with(px_reg(23)) + .with(px_reg(24)) + .with(px_reg(25)) + .with(px_reg(26)) + .with(px_reg(27)) + .with(px_reg(28)) + .with(px_reg(29)) + .with(px_reg(30)) + .with(px_reg(31)) + // Note: no vector registers are callee-saved. +; + +fn compute_clobber_size(clobbers: &[Writable]) -> u32 { + let mut clobbered_size = 0; + for reg in clobbers { + match reg.to_reg().class() { + RegClass::Int => { + clobbered_size += 8; + } + RegClass::Float => { + clobbered_size += 8; + } + RegClass::Vector => unimplemented!("Vector Size Clobbered"), + } + } + align_to(clobbered_size, 16) +} + +const DEFAULT_CLOBBERS: PRegSet = PRegSet::empty() + // Integer registers: the first 16 get clobbered. + .with(px_reg(0)) + .with(px_reg(1)) + .with(px_reg(2)) + .with(px_reg(3)) + .with(px_reg(4)) + .with(px_reg(5)) + .with(px_reg(6)) + .with(px_reg(7)) + .with(px_reg(8)) + .with(px_reg(9)) + .with(px_reg(10)) + .with(px_reg(11)) + .with(px_reg(12)) + .with(px_reg(13)) + .with(px_reg(14)) + .with(px_reg(15)) + // Float registers: the first 16 get clobbered. + .with(pf_reg(0)) + .with(pf_reg(1)) + .with(pf_reg(2)) + .with(pf_reg(3)) + .with(pf_reg(4)) + .with(pf_reg(5)) + .with(pf_reg(6)) + .with(pf_reg(7)) + .with(pf_reg(9)) + .with(pf_reg(10)) + .with(pf_reg(11)) + .with(pf_reg(12)) + .with(pf_reg(13)) + .with(pf_reg(14)) + .with(pf_reg(15)) + // All vector registers get clobbered. + .with(pv_reg(0)) + .with(pv_reg(1)) + .with(pv_reg(2)) + .with(pv_reg(3)) + .with(pv_reg(4)) + .with(pv_reg(5)) + .with(pv_reg(6)) + .with(pv_reg(7)) + .with(pv_reg(8)) + .with(pv_reg(9)) + .with(pv_reg(10)) + .with(pv_reg(11)) + .with(pv_reg(12)) + .with(pv_reg(13)) + .with(pv_reg(14)) + .with(pv_reg(15)) + .with(pv_reg(16)) + .with(pv_reg(17)) + .with(pv_reg(18)) + .with(pv_reg(19)) + .with(pv_reg(20)) + .with(pv_reg(21)) + .with(pv_reg(22)) + .with(pv_reg(23)) + .with(pv_reg(24)) + .with(pv_reg(25)) + .with(pv_reg(26)) + .with(pv_reg(27)) + .with(pv_reg(28)) + .with(pv_reg(29)) + .with(pv_reg(30)) + .with(pv_reg(31)); + +fn create_reg_enviroment() -> MachineEnv { + // Prefer caller-saved registers over callee-saved registers, because that + // way we don't need to emit code to save and restore them if we don't + // mutate them. + + let preferred_regs_by_class: [Vec; 3] = { + let x_registers: Vec = (0..16).map(|x| px_reg(x)).collect(); + let f_registers: Vec = (0..16).map(|x| pf_reg(x)).collect(); + let v_registers: Vec = (0..32).map(|x| pv_reg(x)).collect(); + [x_registers, f_registers, v_registers] + }; + + let non_preferred_regs_by_class: [Vec; 3] = { + let x_registers: Vec = (16..32).map(|x| px_reg(x)).collect(); + let f_registers: Vec = (16..32).map(|x| pf_reg(x)).collect(); + let v_registers: Vec = vec![]; + [x_registers, f_registers, v_registers] + }; + + MachineEnv { + preferred_regs_by_class, + non_preferred_regs_by_class, + fixed_stack_slots: vec![], + scratch_by_class: [None, None, None], + } +} diff --git a/cranelift/codegen/src/isa/pulley_shared/inst.isle b/cranelift/codegen/src/isa/pulley_shared/inst.isle new file mode 100644 index 0000000000..9860851187 --- /dev/null +++ b/cranelift/codegen/src/isa/pulley_shared/inst.isle @@ -0,0 +1,562 @@ +;;;; Instruction definition ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +;; Note: in the instructions below, we order destination registers first and +;; then source registers afterwards. +(type MInst + (enum + ;;;; Pseudo-Instructions ;;;; + + ;; A pseudo-instruction that captures register arguments in vregs. + (Args (args VecArgPair)) + + ;; A pseudo-instruction that moves vregs to return registers. + (Rets (rets VecRetPair)) + + ;; A pseudo-instruction to update unwind info. + (Unwind (inst UnwindInst)) + + ;;;; Actual Instructions ;;;; + + ;; Raise a trap. + (Trap (code TrapCode)) + + ;; Nothing. + (Nop) + + ;; Get the stack pointer. + (GetSp (dst WritableXReg)) + + ;; Return. + (Ret) + + ;; Load an external symbol's address into a register. + (LoadExtName (dst WritableXReg) + (name BoxExternalName) + (offset i64)) + + ;; A direct call to a known callee. + (Call (callee BoxExternalName) + (info BoxCallInfo)) + + ;; An indirect call to an unknown callee. + (IndirectCall (callee XReg) + (info BoxCallInfo)) + + ;; Unconditional jumps. + (Jump (label MachLabel)) + + ;; Jump to `then` if `c` is nonzero, otherwise to `else`. + (BrIf (c XReg) (taken MachLabel) (not_taken MachLabel)) + + ;; Compare-and-branch macro ops. + (BrIfXeq32 (src1 XReg) (src2 XReg) (taken MachLabel) (not_taken MachLabel)) + (BrIfXneq32 (src1 XReg) (src2 XReg) (taken MachLabel) (not_taken MachLabel)) + (BrIfXslt32 (src1 XReg) (src2 XReg) (taken MachLabel) (not_taken MachLabel)) + (BrIfXslteq32 (src1 XReg) (src2 XReg) (taken MachLabel) (not_taken MachLabel)) + (BrIfXult32 (src1 XReg) (src2 XReg) (taken MachLabel) (not_taken MachLabel)) + (BrIfXulteq32 (src1 XReg) (src2 XReg) (taken MachLabel) (not_taken MachLabel)) + + ;; Register-to-register moves. + (Xmov (dst WritableXReg) (src XReg)) + (Fmov (dst WritableFReg) (src FReg)) + (Vmov (dst WritableVReg) (src VReg)) + + ;; Integer constants, zero-extended to 64 bits. + (Xconst8 (dst WritableXReg) (imm i8)) + (Xconst16 (dst WritableXReg) (imm i16)) + (Xconst32 (dst WritableXReg) (imm i32)) + (Xconst64 (dst WritableXReg) (imm i64)) + + ;; Integer arithmetic. + (Xadd32 (dst WritableXReg) (src1 XReg) (src2 XReg)) + (Xadd64 (dst WritableXReg) (src1 XReg) (src2 XReg)) + + ;; Comparisons. + (Xeq64 (dst WritableXReg) (src1 XReg) (src2 XReg)) + (Xneq64 (dst WritableXReg) (src1 XReg) (src2 XReg)) + (Xslt64 (dst WritableXReg) (src1 XReg) (src2 XReg)) + (Xslteq64 (dst WritableXReg) (src1 XReg) (src2 XReg)) + (Xult64 (dst WritableXReg) (src1 XReg) (src2 XReg)) + (Xulteq64 (dst WritableXReg) (src1 XReg) (src2 XReg)) + (Xeq32 (dst WritableXReg) (src1 XReg) (src2 XReg)) + (Xneq32 (dst WritableXReg) (src1 XReg) (src2 XReg)) + (Xslt32 (dst WritableXReg) (src1 XReg) (src2 XReg)) + (Xslteq32 (dst WritableXReg) (src1 XReg) (src2 XReg)) + (Xult32 (dst WritableXReg) (src1 XReg) (src2 XReg)) + (Xulteq32 (dst WritableXReg) (src1 XReg) (src2 XReg)) + + ;; Load the memory address referenced by `mem` into `dst`. + (LoadAddr (dst WritableXReg) (mem Amode)) + + ;; Loads. + (Load (dst WritableReg) (mem Amode) (ty Type) (flags MemFlags) (ext ExtKind)) + + ;; Stores. + (Store (mem Amode) (src Reg) (ty Type) (flags MemFlags)) + + ;; Bitcasts. + (BitcastIntFromFloat32 (dst WritableXReg) (src FReg)) + (BitcastIntFromFloat64 (dst WritableXReg) (src FReg)) + (BitcastFloatFromInt32 (dst WritableFReg) (src XReg)) + (BitcastFloatFromInt64 (dst WritableFReg) (src XReg)) + ) +) + +(type BoxCallInfo (primitive BoxCallInfo)) + +;;;; Address Modes ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +(type StackAMode extern (enum)) + +(type Amode + (enum + (SpOffset (offset i64)) + (RegOffset (base XReg) (offset i64)) + (Stack (amode StackAMode)) + ) +) + +(type ExtKind (enum None Sign Zero)) + +;;;; Newtypes for Different Register Classes ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +(type XReg (primitive XReg)) +(type WritableXReg (primitive WritableXReg)) + +(type FReg (primitive FReg)) +(type WritableFReg (primitive WritableFReg)) + +(type VReg (primitive VReg)) +(type WritableVReg (primitive WritableVReg)) + +;; Construct a new `XReg` from a `Reg`. +;; +;; Asserts that the register has an integer RegClass. +(decl xreg_new (Reg) XReg) +(extern constructor xreg_new xreg_new) +(convert Reg XReg xreg_new) + +;; Construct a new `WritableXReg` from a `WritableReg`. +;; +;; Asserts that the register has an integer RegClass. +(decl writable_xreg_new (WritableReg) WritableXReg) +(extern constructor writable_xreg_new writable_xreg_new) +(convert WritableReg WritableXReg writable_xreg_new) + +;; Put a value into a XReg. +;; +;; Asserts that the value goes into a XReg. +(decl put_in_xreg (Value) XReg) +(rule (put_in_xreg val) (xreg_new (put_in_reg val))) +(convert Value XReg put_in_xreg) + +;; Construct an `InstOutput` out of a single XReg register. +(decl output_xreg (XReg) InstOutput) +(rule (output_xreg x) (output_reg x)) +(convert XReg InstOutput output_xreg) + +;; Convert a `WritableXReg` to an `XReg`. +(decl pure writable_xreg_to_xreg (WritableXReg) XReg) +(extern constructor writable_xreg_to_xreg writable_xreg_to_xreg) +(convert WritableXReg XReg writable_xreg_to_xreg) + +;; Convert a `WritableXReg` to an `WritableReg`. +(decl pure writable_xreg_to_writable_reg (WritableXReg) WritableReg) +(extern constructor writable_xreg_to_writable_reg writable_xreg_to_writable_reg) +(convert WritableXReg WritableReg writable_xreg_to_writable_reg) + +;; Convert a `WritableXReg` to an `Reg`. +(decl pure writable_xreg_to_reg (WritableXReg) Reg) +(rule (writable_xreg_to_reg x) (writable_xreg_to_writable_reg x)) +(convert WritableXReg Reg writable_xreg_to_reg) + +;; Convert an `XReg` to a `Reg`. +(decl pure xreg_to_reg (XReg) Reg) +(extern constructor xreg_to_reg xreg_to_reg) +(convert XReg Reg xreg_to_reg) + +;; Convert a `XReg` to a `ValueRegs`. +(decl xreg_to_value_regs (XReg) ValueRegs) +(rule (xreg_to_value_regs x) (value_reg x)) +(convert XReg ValueRegs xreg_to_reg) + +;; Convert a `WritableXReg` to a `ValueRegs`. +(decl writable_xreg_to_value_regs (WritableXReg) ValueRegs) +(rule (writable_xreg_to_value_regs x) (value_reg x)) +(convert WritableXReg ValueRegs writable_xreg_to_value_regs) + +;; Allocates a new `WritableXReg`. +(decl temp_writable_xreg () WritableXReg) +(rule (temp_writable_xreg) (temp_writable_reg $I64)) + +;; Construct a new `FReg` from a `Reg`. +;; +;; Asserts that the register has a Float RegClass. +(decl freg_new (Reg) FReg) +(extern constructor freg_new freg_new) +(convert Reg FReg freg_new) + +;; Construct a new `WritableFReg` from a `WritableReg`. +;; +;; Asserts that the register has a Float RegClass. +(decl writable_freg_new (WritableReg) WritableFReg) +(extern constructor writable_freg_new writable_freg_new) +(convert WritableReg WritableFReg writable_freg_new) + +;; Put a value into a FReg. +;; +;; Asserts that the value goes into a FReg. +(decl put_in_freg (Value) FReg) +(rule (put_in_freg val) (freg_new (put_in_reg val))) +(convert Value FReg put_in_freg) + +;; Construct an `InstOutput` out of a single FReg register. +(decl output_freg (FReg) InstOutput) +(rule (output_freg x) (output_reg x)) +(convert FReg InstOutput output_freg) + +;; Convert a `WritableFReg` to an `FReg`. +(decl pure writable_freg_to_freg (WritableFReg) FReg) +(extern constructor writable_freg_to_freg writable_freg_to_freg) +(convert WritableFReg FReg writable_freg_to_freg) + +;; Convert a `WritableFReg` to an `WritableReg`. +(decl pure writable_freg_to_writable_reg (WritableFReg) WritableReg) +(extern constructor writable_freg_to_writable_reg writable_freg_to_writable_reg) +(convert WritableFReg WritableReg writable_freg_to_writable_reg) + +;; Convert a `WritableFReg` to an `Reg`. +(decl pure writable_freg_to_reg (WritableFReg) Reg) +(rule (writable_freg_to_reg x) (writable_freg_to_writable_reg x)) +(convert WritableFReg Reg writable_freg_to_reg) + +;; Convert an `FReg` to a `Reg`. +(decl pure freg_to_reg (FReg) Reg) +(extern constructor freg_to_reg freg_to_reg) +(convert FReg Reg freg_to_reg) + +;; Convert a `FReg` to a `ValueRegs`. +(decl freg_to_value_regs (FReg) ValueRegs) +(rule (freg_to_value_regs x) (value_reg x)) +(convert FReg ValueRegs xreg_to_reg) + +;; Convert a `WritableFReg` to a `ValueRegs`. +(decl writable_freg_to_value_regs (WritableFReg) ValueRegs) +(rule (writable_freg_to_value_regs x) (value_reg x)) +(convert WritableFReg ValueRegs writable_freg_to_value_regs) + +;; Allocates a new `WritableFReg`. +(decl temp_writable_freg () WritableFReg) +(rule (temp_writable_freg) (temp_writable_reg $F64)) + +;; Construct a new `VReg` from a `Reg`. +;; +;; Asserts that the register has a Vector RegClass. +(decl vreg_new (Reg) VReg) +(extern constructor vreg_new vreg_new) +(convert Reg VReg vreg_new) + +;; Construct a new `WritableVReg` from a `WritableReg`. +;; +;; Asserts that the register has a Vector RegClass. +(decl writable_vreg_new (WritableReg) WritableVReg) +(extern constructor writable_vreg_new writable_vreg_new) +(convert WritableReg WritableVReg writable_vreg_new) + +;; Put a value into a VReg. +;; +;; Asserts that the value goes into a VReg. +(decl put_in_vreg (Value) VReg) +(rule (put_in_vreg val) (vreg_new (put_in_reg val))) +(convert Value VReg put_in_vreg) + +;; Construct an `InstOutput` out of a single VReg register. +(decl output_vreg (VReg) InstOutput) +(rule (output_vreg x) (output_reg x)) +(convert VReg InstOutput output_vreg) + +;; Convert a `WritableVReg` to an `VReg`. +(decl pure writable_vreg_to_vreg (WritableVReg) VReg) +(extern constructor writable_vreg_to_vreg writable_vreg_to_vreg) +(convert WritableVReg VReg writable_vreg_to_vreg) + +;; Convert a `WritableVReg` to an `WritableReg`. +(decl pure writable_vreg_to_writable_reg (WritableVReg) WritableReg) +(extern constructor writable_vreg_to_writable_reg writable_vreg_to_writable_reg) +(convert WritableVReg WritableReg writable_vreg_to_writable_reg) + +;; Convert a `WritableVReg` to an `Reg`. +(decl pure writable_vreg_to_reg (WritableVReg) Reg) +(rule (writable_vreg_to_reg x) (writable_vreg_to_writable_reg x)) +(convert WritableVReg Reg writable_vreg_to_reg) + +;; Convert an `VReg` to a `Reg`. +(decl pure vreg_to_reg (VReg) Reg) +(extern constructor vreg_to_reg vreg_to_reg) +(convert VReg Reg vreg_to_reg) + +;; Convert a `VReg` to a `ValueRegs`. +(decl vreg_to_value_regs (VReg) ValueRegs) +(rule (vreg_to_value_regs x) (value_reg x)) +(convert VReg ValueRegs xreg_to_reg) + +;; Convert a `WritableVReg` to a `ValueRegs`. +(decl writable_vreg_to_value_regs (WritableVReg) ValueRegs) +(rule (writable_vreg_to_value_regs x) (value_reg x)) +(convert WritableVReg ValueRegs writable_vreg_to_value_regs) + +;; Allocates a new `WritableVReg`. +(decl temp_writable_vreg () WritableVReg) +(rule (temp_writable_vreg) (temp_writable_reg $I8X16)) + +;;;; Materializing Constants ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +;; Lower a constant into a register. +(decl imm (Type u64) Reg) + +;; If a value can fit into 8 bits, then prioritize that. +(rule 3 (imm (ty_int _) x) + (if-let y (i8_try_from_u64 x)) + (pulley_xconst8 y)) + +;; If a value can fit into 16 bits, then prioritize that. +(rule 2 (imm (ty_int _) x) + (if-let y (i16_try_from_u64 x)) + (pulley_xconst16 y)) + +;; If a value can fit into 32 bits, then prioritize that. +(rule 1 (imm (ty_int _) x) + (if-let y (i32_try_from_u64 x)) + (pulley_xconst32 y)) + +;; Base cases for integers. +(rule 0 (imm $I8 x) (pulley_xconst8 (u8_as_i8 (u64_as_u8 x)))) +(rule 0 (imm $I16 x) (pulley_xconst16 (u16_as_i16 (u64_as_u16 x)))) +(rule 0 (imm $I32 x) (pulley_xconst32 (u64_as_i32 x))) +(rule 0 (imm $I64 x) (pulley_xconst64 (u64_as_i64 x))) + +;; References are actually integers. +(rule 0 (imm $R32 x) (imm $I32 x)) +(rule 0 (imm $R64 x) (imm $I64 x)) + +;; Base cases for floats. +(rule 0 (imm $F32 c) (gen_bitcast (imm $I32 c) $I32 $F32)) +(rule 0 (imm $F64 c) (gen_bitcast (imm $I64 c) $I64 $F64)) + +;;;; Bitcasts ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +;; Bitcast from the first type, into the second type. +(decl gen_bitcast (Reg Type Type) Reg) +(rule (gen_bitcast r $F32 $I32) (pulley_bitcast_float_from_int_32 r)) +(rule (gen_bitcast r $F64 $I64) (pulley_bitcast_float_from_int_64 r)) +(rule (gen_bitcast r $I32 $F32) (pulley_bitcast_int_from_float_32 r)) +(rule (gen_bitcast r $I64 $F64) (pulley_bitcast_int_from_float_64 r)) +(rule (gen_bitcast r $I64 $R64) (copy_reg r $I64)) +(rule (gen_bitcast r $R64 $I64) (copy_reg r $I64)) +(rule -1 (gen_bitcast r ty ty) r) + +;; Copy a register of the given type to a new register. +;; +;; Generally, regalloc should take care of this kind of thing for us. This is +;; only useful for implementing things like `bitcast` from an `r64` to an `i64` +;; to avoid conflicting constraints on a single aliased value by splitting the +;; value into two parts. +(decl copy_reg (Reg Type) Reg) +(rule (copy_reg src (ty_int_ref_scalar_64 ty)) + (let ((dst WritableReg (temp_writable_reg ty)) + (_ Unit (emit (MInst.Xmov dst src)))) + dst)) + +;;;; Instruction Constructors ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +(decl pulley_trap (TrapCode) SideEffectNoResult) +(rule (pulley_trap code) + (SideEffectNoResult.Inst (MInst.Trap code))) + +(decl pulley_get_sp () XReg) +(rule (pulley_get_sp) + (let ((reg WritableXReg (temp_writable_xreg)) + (_ Unit (emit (MInst.GetSp reg)))) + reg)) + +(decl pulley_xconst8 (i8) XReg) +(rule (pulley_xconst8 x) + (let ((reg WritableXReg (temp_writable_xreg)) + (_ Unit (emit (MInst.Xconst8 reg x)))) + reg)) + +(decl pulley_xconst16 (i16) XReg) +(rule (pulley_xconst16 x) + (let ((reg WritableXReg (temp_writable_xreg)) + (_ Unit (emit (MInst.Xconst16 reg x)))) + reg)) + +(decl pulley_xconst32 (i32) XReg) +(rule (pulley_xconst32 x) + (let ((reg WritableXReg (temp_writable_xreg)) + (_ Unit (emit (MInst.Xconst32 reg x)))) + reg)) + +(decl pulley_xconst64 (i64) XReg) +(rule (pulley_xconst64 x) + (let ((reg WritableXReg (temp_writable_xreg)) + (_ Unit (emit (MInst.Xconst64 reg x)))) + reg)) + +(decl pulley_jump (MachLabel) SideEffectNoResult) +(rule (pulley_jump label) + (SideEffectNoResult.Inst (MInst.Jump label))) + +(decl pulley_br_if (XReg MachLabel MachLabel) SideEffectNoResult) +(rule (pulley_br_if c taken not_taken) + (SideEffectNoResult.Inst (MInst.BrIf c taken not_taken))) + +(decl pulley_br_if_xeq32 (XReg XReg MachLabel MachLabel) SideEffectNoResult) +(rule (pulley_br_if_xeq32 a b taken not_taken) + (SideEffectNoResult.Inst (MInst.BrIfXeq32 a b taken not_taken))) + +(decl pulley_br_if_xneq32 (XReg XReg MachLabel MachLabel) SideEffectNoResult) +(rule (pulley_br_if_xneq32 a b taken not_taken) + (SideEffectNoResult.Inst (MInst.BrIfXneq32 a b taken not_taken))) + +(decl pulley_br_if_xslt32 (XReg XReg MachLabel MachLabel) SideEffectNoResult) +(rule (pulley_br_if_xslt32 a b taken not_taken) + (SideEffectNoResult.Inst (MInst.BrIfXslt32 a b taken not_taken))) + +(decl pulley_br_if_xslteq32 (XReg XReg MachLabel MachLabel) SideEffectNoResult) +(rule (pulley_br_if_xslteq32 a b taken not_taken) + (SideEffectNoResult.Inst (MInst.BrIfXslteq32 a b taken not_taken))) + +(decl pulley_br_if_xult32 (XReg XReg MachLabel MachLabel) SideEffectNoResult) +(rule (pulley_br_if_xult32 a b taken not_taken) + (SideEffectNoResult.Inst (MInst.BrIfXult32 a b taken not_taken))) + +(decl pulley_br_if_xulteq32 (XReg XReg MachLabel MachLabel) SideEffectNoResult) +(rule (pulley_br_if_xulteq32 a b taken not_taken) + (SideEffectNoResult.Inst (MInst.BrIfXulteq32 a b taken not_taken))) + +(decl pulley_xadd32 (XReg XReg) XReg) +(rule (pulley_xadd32 a b) + (let ((dst WritableXReg (temp_writable_xreg)) + (_ Unit (emit (MInst.Xadd32 dst a b)))) + dst)) + +(decl pulley_xadd64 (XReg XReg) XReg) +(rule (pulley_xadd64 a b) + (let ((dst WritableXReg (temp_writable_xreg)) + (_ Unit (emit (MInst.Xadd64 dst a b)))) + dst)) + +(decl pulley_xeq64 (XReg XReg) XReg) +(rule (pulley_xeq64 a b) + (let ((dst WritableXReg (temp_writable_xreg)) + (_ Unit (emit (MInst.Xeq64 dst a b)))) + dst)) + +(decl pulley_xneq64 (XReg XReg) XReg) +(rule (pulley_xneq64 a b) + (let ((dst WritableXReg (temp_writable_xreg)) + (_ Unit (emit (MInst.Xneq64 dst a b)))) + dst)) + +(decl pulley_xslt64 (XReg XReg) XReg) +(rule (pulley_xslt64 a b) + (let ((dst WritableXReg (temp_writable_xreg)) + (_ Unit (emit (MInst.Xslt64 dst a b)))) + dst)) + +(decl pulley_xslteq64 (XReg XReg) XReg) +(rule (pulley_xslteq64 a b) + (let ((dst WritableXReg (temp_writable_xreg)) + (_ Unit (emit (MInst.Xslteq64 dst a b)))) + dst)) + +(decl pulley_xult64 (XReg XReg) XReg) +(rule (pulley_xult64 a b) + (let ((dst WritableXReg (temp_writable_xreg)) + (_ Unit (emit (MInst.Xult64 dst a b)))) + dst)) + +(decl pulley_xulteq64 (XReg XReg) XReg) +(rule (pulley_xulteq64 a b) + (let ((dst WritableXReg (temp_writable_xreg)) + (_ Unit (emit (MInst.Xulteq64 dst a b)))) + dst)) + +(decl pulley_xeq32 (XReg XReg) XReg) +(rule (pulley_xeq32 a b) + (let ((dst WritableXReg (temp_writable_xreg)) + (_ Unit (emit (MInst.Xeq32 dst a b)))) + dst)) + +(decl pulley_xneq32 (XReg XReg) XReg) +(rule (pulley_xneq32 a b) + (let ((dst WritableXReg (temp_writable_xreg)) + (_ Unit (emit (MInst.Xneq32 dst a b)))) + dst)) + +(decl pulley_xslt32 (XReg XReg) XReg) +(rule (pulley_xslt32 a b) + (let ((dst WritableXReg (temp_writable_xreg)) + (_ Unit (emit (MInst.Xslt32 dst a b)))) + dst)) + +(decl pulley_xslteq32 (XReg XReg) XReg) +(rule (pulley_xslteq32 a b) + (let ((dst WritableXReg (temp_writable_xreg)) + (_ Unit (emit (MInst.Xslteq32 dst a b)))) + dst)) + +(decl pulley_xult32 (XReg XReg) XReg) +(rule (pulley_xult32 a b) + (let ((dst WritableXReg (temp_writable_xreg)) + (_ Unit (emit (MInst.Xult32 dst a b)))) + dst)) + +(decl pulley_xulteq32 (XReg XReg) XReg) +(rule (pulley_xulteq32 a b) + (let ((dst WritableXReg (temp_writable_xreg)) + (_ Unit (emit (MInst.Xulteq32 dst a b)))) + dst)) + +(decl pulley_load (Amode Type MemFlags ExtKind) Reg) +(rule (pulley_load amode ty flags ext) + (let ((dst WritableReg (temp_writable_reg ty)) + (_ Unit (emit (MInst.Load dst amode ty flags ext)))) + dst)) + +(decl pulley_store (Amode Reg Type MemFlags) SideEffectNoResult) +(rule (pulley_store amode src ty flags) + (SideEffectNoResult.Inst (MInst.Store amode src ty flags))) + +(decl pulley_bitcast_float_from_int_32 (XReg) FReg) +(rule (pulley_bitcast_float_from_int_32 src) + (let ((dst WritableFReg (temp_writable_freg)) + (_ Unit (emit (MInst.BitcastFloatFromInt32 dst src)))) + dst)) + +(decl pulley_bitcast_float_from_int_64 (XReg) FReg) +(rule (pulley_bitcast_float_from_int_64 src) + (let ((dst WritableFReg (temp_writable_freg)) + (_ Unit (emit (MInst.BitcastFloatFromInt64 dst src)))) + dst)) + +(decl pulley_bitcast_int_from_float_32 (FReg) XReg) +(rule (pulley_bitcast_int_from_float_32 src) + (let ((dst WritableXReg (temp_writable_xreg)) + (_ Unit (emit (MInst.BitcastIntFromFloat32 dst src)))) + dst)) + +(decl pulley_bitcast_int_from_float_64 (FReg) XReg) +(rule (pulley_bitcast_int_from_float_64 src) + (let ((dst WritableXReg (temp_writable_xreg)) + (_ Unit (emit (MInst.BitcastIntFromFloat64 dst src)))) + dst)) + +;;;; Helpers for Emitting Calls ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +(decl gen_call (SigRef ExternalName RelocDistance ValueSlice) InstOutput) +(extern constructor gen_call gen_call) + +(decl gen_call_indirect (SigRef Value ValueSlice) InstOutput) +(extern constructor gen_call_indirect gen_call_indirect) diff --git a/cranelift/codegen/src/isa/pulley_shared/inst/args.rs b/cranelift/codegen/src/isa/pulley_shared/inst/args.rs new file mode 100644 index 0000000000..b0d73bc393 --- /dev/null +++ b/cranelift/codegen/src/isa/pulley_shared/inst/args.rs @@ -0,0 +1,191 @@ +//! Pulley instruction arguments. + +use super::*; +use crate::machinst::abi::StackAMode; + +/// A macro for defining a newtype of `Reg` that enforces some invariant about +/// the wrapped `Reg` (such as that it is of a particular register class). +macro_rules! newtype_of_reg { + ( + $newtype_reg:ident, + $newtype_writable_reg:ident, + |$check_reg:ident| $check:expr + ) => { + /// A newtype wrapper around `Reg`. + #[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)] + pub struct $newtype_reg(Reg); + + impl PartialEq for $newtype_reg { + fn eq(&self, other: &Reg) -> bool { + self.0 == *other + } + } + + impl From<$newtype_reg> for Reg { + fn from(r: $newtype_reg) -> Self { + r.0 + } + } + + impl TryFrom for $newtype_reg { + type Error = (); + fn try_from(r: Reg) -> Result { + Self::new(r).ok_or(()) + } + } + + impl $newtype_reg { + /// Create this newtype from the given register, or return `None` if the register + /// is not a valid instance of this newtype. + pub fn new($check_reg: Reg) -> Option { + if $check { + Some(Self($check_reg)) + } else { + None + } + } + + /// Get this newtype's underlying `Reg`. + pub fn to_reg(self) -> Reg { + self.0 + } + } + + // Convenience impl so that people working with this newtype can use it + // "just like" a plain `Reg`. + // + // NB: We cannot implement `DerefMut` because that would let people do + // nasty stuff like `*my_xreg.deref_mut() = some_freg`, breaking the + // invariants that `XReg` provides. + impl std::ops::Deref for $newtype_reg { + type Target = Reg; + + fn deref(&self) -> &Reg { + &self.0 + } + } + + /// If you know what you're doing, you can explicitly mutably borrow the + /// underlying `Reg`. Don't make it point to the wrong type of register + /// please. + impl AsMut for $newtype_reg { + fn as_mut(&mut self) -> &mut Reg { + &mut self.0 + } + } + + /// Writable Reg. + pub type $newtype_writable_reg = Writable<$newtype_reg>; + + impl From<$newtype_reg> for pulley_interpreter::regs::$newtype_reg { + fn from(r: $newtype_reg) -> Self { + Self::new(r.to_real_reg().unwrap().hw_enc()).unwrap() + } + } + impl<'a> From<&'a $newtype_reg> for pulley_interpreter::regs::$newtype_reg { + fn from(r: &'a $newtype_reg) -> Self { + Self::new(r.to_real_reg().unwrap().hw_enc()).unwrap() + } + } + impl From<$newtype_writable_reg> for pulley_interpreter::regs::$newtype_reg { + fn from(r: $newtype_writable_reg) -> Self { + Self::new(r.to_reg().to_real_reg().unwrap().hw_enc()).unwrap() + } + } + impl<'a> From<&'a $newtype_writable_reg> for pulley_interpreter::regs::$newtype_reg { + fn from(r: &'a $newtype_writable_reg) -> Self { + Self::new(r.to_reg().to_real_reg().unwrap().hw_enc()).unwrap() + } + } + + impl TryFrom> for $newtype_writable_reg { + type Error = (); + fn try_from(r: Writable) -> Result { + let r = r.to_reg(); + match $newtype_reg::new(r) { + Some(r) => Ok(Writable::from_reg(r)), + None => Err(()), + } + } + } + }; +} + +// Newtypes for registers classes. +newtype_of_reg!(XReg, WritableXReg, |reg| reg.class() == RegClass::Int); +newtype_of_reg!(FReg, WritableFReg, |reg| reg.class() == RegClass::Float); +newtype_of_reg!(VReg, WritableVReg, |reg| reg.class() == RegClass::Vector); + +pub use super::super::lower::isle::generated_code::ExtKind; + +pub use super::super::lower::isle::generated_code::Amode; + +impl Amode { + /// Add the registers referenced by this Amode to `collector`. + pub(crate) fn get_operands(&mut self, collector: &mut impl OperandVisitor) { + match self { + Amode::RegOffset { base, offset: _ } => collector.reg_use(base), + // Registers used in these modes aren't allocatable. + Amode::SpOffset { .. } | Amode::Stack { .. } => {} + } + } + + pub(crate) fn get_base_register(&self) -> Option { + match self { + Amode::RegOffset { base, offset: _ } => Some((*base).into()), + Amode::SpOffset { .. } | Amode::Stack { .. } => Some(stack_reg()), + } + } + + pub(crate) fn get_offset_with_state

(&self, state: &EmitState

) -> i64 + where + P: PulleyTargetKind, + { + match self { + Amode::RegOffset { base: _, offset } | Amode::SpOffset { offset } => *offset, + Amode::Stack { amode } => match amode { + StackAMode::IncomingArg(offset, stack_args_size) => { + let offset = i64::from(*stack_args_size) - *offset; + let frame_layout = state.frame_layout(); + let sp_offset = frame_layout.tail_args_size + + frame_layout.setup_area_size + + frame_layout.clobber_size + + frame_layout.fixed_frame_storage_size + + frame_layout.outgoing_args_size; + i64::from(sp_offset) - offset + } + StackAMode::Slot(offset) => *offset + state.virtual_sp_offset, + StackAMode::OutgoingArg(offset) => *offset, + }, + } + } +} + +impl core::fmt::Display for Amode { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + match self { + Amode::SpOffset { offset } => { + if *offset >= 0 { + write!(f, "sp+{offset}") + } else { + write!(f, "sp{offset}") + } + } + Amode::RegOffset { base, offset } => { + let name = reg_name(**base); + if *offset >= 0 { + write!(f, "{name}+{offset}") + } else { + write!(f, "{name}{offset}") + } + } + Amode::Stack { amode } => core::fmt::Debug::fmt(amode, f), + } + } +} + +impl From for Amode { + fn from(amode: StackAMode) -> Self { + Amode::Stack { amode } + } +} diff --git a/cranelift/codegen/src/isa/pulley_shared/inst/emit.rs b/cranelift/codegen/src/isa/pulley_shared/inst/emit.rs new file mode 100644 index 0000000000..d04ed2bfe0 --- /dev/null +++ b/cranelift/codegen/src/isa/pulley_shared/inst/emit.rs @@ -0,0 +1,508 @@ +//! Pulley binary code emission. + +use super::*; +use crate::binemit::StackMap; +use crate::ir; +use crate::isa::pulley_shared::abi::PulleyMachineDeps; +use crate::isa::pulley_shared::PointerWidth; +use crate::trace; +use core::marker::PhantomData; +use cranelift_control::ControlPlane; +use pulley_interpreter::encode as enc; + +pub struct EmitInfo { + #[allow(dead_code)] // Will get used as we fill out this backend. + shared_flags: settings::Flags, + + #[allow(dead_code)] // Will get used as we fill out this backend. + isa_flags: crate::isa::pulley_shared::settings::Flags, +} + +impl EmitInfo { + pub(crate) fn new( + shared_flags: settings::Flags, + isa_flags: crate::isa::pulley_shared::settings::Flags, + ) -> Self { + Self { + shared_flags, + isa_flags, + } + } +} + +/// State carried between emissions of a sequence of instructions. +#[derive(Default, Clone, Debug)] +pub struct EmitState

+where + P: PulleyTargetKind, +{ + _phantom: PhantomData

, + ctrl_plane: ControlPlane, + stack_map: Option, + user_stack_map: Option, + pub virtual_sp_offset: i64, + frame_layout: FrameLayout, +} + +impl

EmitState

+where + P: PulleyTargetKind, +{ + fn take_stack_map(&mut self) -> (Option, Option) { + (self.stack_map.take(), self.user_stack_map.take()) + } + + pub(crate) fn adjust_virtual_sp_offset(&mut self, amount: i64) { + let old = self.virtual_sp_offset; + let new = self.virtual_sp_offset + amount; + trace!("adjust virtual sp offset by {amount:#x}: {old:#x} -> {new:#x}",); + self.virtual_sp_offset = new; + } +} + +impl

MachInstEmitState> for EmitState

+where + P: PulleyTargetKind, +{ + fn new(abi: &Callee>, ctrl_plane: ControlPlane) -> Self { + EmitState { + _phantom: PhantomData, + ctrl_plane, + stack_map: None, + user_stack_map: None, + virtual_sp_offset: 0, + frame_layout: abi.frame_layout().clone(), + } + } + + fn pre_safepoint( + &mut self, + stack_map: Option, + user_stack_map: Option, + ) { + self.stack_map = stack_map; + self.user_stack_map = user_stack_map; + } + + fn ctrl_plane_mut(&mut self) -> &mut ControlPlane { + &mut self.ctrl_plane + } + + fn take_ctrl_plane(self) -> ControlPlane { + self.ctrl_plane + } + + fn frame_layout(&self) -> &FrameLayout { + &self.frame_layout + } +} + +impl

MachInstEmit for InstAndKind

+where + P: PulleyTargetKind, +{ + type State = EmitState

; + type Info = EmitInfo; + + fn emit(&self, sink: &mut MachBuffer, emit_info: &Self::Info, state: &mut Self::State) { + // N.B.: we *must* not exceed the "worst-case size" used to compute + // where to insert islands, except when islands are explicitly triggered + // (with an `EmitIsland`). We check this in debug builds. This is `mut` + // to allow disabling the check for `JTSequence`, which is always + // emitted following an `EmitIsland`. + let start = sink.cur_offset(); + pulley_emit(self, sink, emit_info, state, start); + + let end = sink.cur_offset(); + assert!( + (end - start) <= InstAndKind::

::worst_case_size(), + "encoded inst {self:?} longer than worst-case size: length: {}, Inst::worst_case_size() = {}", + end - start, + InstAndKind::

::worst_case_size() + ); + } + + fn pretty_print_inst(&self, state: &mut Self::State) -> String { + self.print_with_state(state) + } +} + +fn pulley_emit

( + inst: &Inst, + sink: &mut MachBuffer>, + _emit_info: &EmitInfo, + state: &mut EmitState

, + start_offset: u32, +) where + P: PulleyTargetKind, +{ + match inst { + // Pseduo-instructions that don't actually encode to anything. + Inst::Args { .. } | Inst::Rets { .. } | Inst::Unwind { .. } => {} + + Inst::Trap { code } => { + sink.add_trap(*code); + enc::trap(sink); + } + + Inst::Nop => todo!(), + + Inst::GetSp { dst } => enc::get_sp(sink, dst), + + Inst::Ret => enc::ret(sink), + + Inst::LoadExtName { .. } => todo!(), + + Inst::Call { callee, info } => { + let (stack_map, user_stack_map) = state.take_stack_map(); + if let Some(s) = stack_map { + sink.add_stack_map(StackMapExtent::UpcomingBytes(5), s); + } + sink.put1(pulley_interpreter::Opcode::Call as u8); + sink.add_reloc( + // TODO: is it actually okay to reuse this reloc here? + Reloc::X86CallPCRel4, + &**callee, + // This addend adjusts for the difference between the start of + // the instruction and the beginning of the immediate field. + -1, + ); + sink.put4(0); + if let Some(s) = user_stack_map { + let offset = sink.cur_offset(); + sink.push_user_stack_map(state, offset, s); + } + sink.add_call_site(); + + let callee_pop_size = i64::from(info.callee_pop_size); + state.adjust_virtual_sp_offset(-callee_pop_size); + } + + Inst::IndirectCall { .. } => todo!(), + + Inst::Jump { label } => { + sink.use_label_at_offset(start_offset + 1, *label, LabelUse::Jump(1)); + sink.add_uncond_branch(start_offset, start_offset + 5, *label); + enc::jump(sink, 0x00000000); + } + + Inst::BrIf { + c, + taken, + not_taken, + } => { + // If taken. + let taken_start = start_offset + 2; + let taken_end = taken_start + 4; + + sink.use_label_at_offset(taken_start, *taken, LabelUse::Jump(2)); + let mut inverted = SmallVec::<[u8; 16]>::new(); + enc::br_if_not(&mut inverted, c, 0x00000000); + debug_assert_eq!( + inverted.len(), + usize::try_from(taken_end - start_offset).unwrap() + ); + + sink.add_cond_branch(start_offset, taken_end, *taken, &inverted); + enc::br_if(sink, c, 0x00000000); + debug_assert_eq!(sink.cur_offset(), taken_end); + + // If not taken. + let not_taken_start = taken_end + 1; + let not_taken_end = not_taken_start + 4; + + sink.use_label_at_offset(not_taken_start, *not_taken, LabelUse::Jump(1)); + sink.add_uncond_branch(taken_end, not_taken_end, *not_taken); + enc::jump(sink, 0x00000000); + } + + Inst::BrIfXeq32 { + src1, + src2, + taken, + not_taken, + } => { + br_if_cond_helper( + sink, + start_offset, + *src1, + *src2, + taken, + not_taken, + enc::br_if_xeq32, + enc::br_if_xneq32, + ); + } + + Inst::BrIfXneq32 { + src1, + src2, + taken, + not_taken, + } => { + br_if_cond_helper( + sink, + start_offset, + *src1, + *src2, + taken, + not_taken, + enc::br_if_xneq32, + enc::br_if_xeq32, + ); + } + + Inst::BrIfXslt32 { + src1, + src2, + taken, + not_taken, + } => { + br_if_cond_helper( + sink, + start_offset, + *src1, + *src2, + taken, + not_taken, + enc::br_if_xslt32, + |s, src1, src2, x| enc::br_if_xslteq32(s, src2, src1, x), + ); + } + + Inst::BrIfXslteq32 { + src1, + src2, + taken, + not_taken, + } => { + br_if_cond_helper( + sink, + start_offset, + *src1, + *src2, + taken, + not_taken, + enc::br_if_xslteq32, + |s, src1, src2, x| enc::br_if_xslt32(s, src2, src1, x), + ); + } + + Inst::BrIfXult32 { + src1, + src2, + taken, + not_taken, + } => { + br_if_cond_helper( + sink, + start_offset, + *src1, + *src2, + taken, + not_taken, + enc::br_if_xult32, + |s, src1, src2, x| enc::br_if_xulteq32(s, src2, src1, x), + ); + } + + Inst::BrIfXulteq32 { + src1, + src2, + taken, + not_taken, + } => { + br_if_cond_helper( + sink, + start_offset, + *src1, + *src2, + taken, + not_taken, + enc::br_if_xulteq32, + |s, src1, src2, x| enc::br_if_xult32(s, src2, src1, x), + ); + } + + Inst::Xmov { dst, src } => enc::xmov(sink, dst, src), + Inst::Fmov { dst, src } => enc::fmov(sink, dst, src), + Inst::Vmov { dst, src } => enc::vmov(sink, dst, src), + + Inst::Xconst8 { dst, imm } => enc::xconst8(sink, dst, *imm), + Inst::Xconst16 { dst, imm } => enc::xconst16(sink, dst, *imm), + Inst::Xconst32 { dst, imm } => enc::xconst32(sink, dst, *imm), + Inst::Xconst64 { dst, imm } => enc::xconst64(sink, dst, *imm), + + Inst::Xadd32 { dst, src1, src2 } => { + enc::xadd32(sink, dst, src1, src2); + } + Inst::Xadd64 { dst, src1, src2 } => { + enc::xadd64(sink, dst, src1, src2); + } + + Inst::Xeq64 { dst, src1, src2 } => { + enc::xeq64(sink, dst, src1, src2); + } + Inst::Xneq64 { dst, src1, src2 } => { + enc::xneq64(sink, dst, src1, src2); + } + Inst::Xslt64 { dst, src1, src2 } => { + enc::xslt64(sink, dst, src1, src2); + } + Inst::Xslteq64 { dst, src1, src2 } => { + enc::xslteq64(sink, dst, src1, src2); + } + Inst::Xult64 { dst, src1, src2 } => { + enc::xult64(sink, dst, src1, src2); + } + Inst::Xulteq64 { dst, src1, src2 } => { + enc::xulteq64(sink, dst, src1, src2); + } + Inst::Xeq32 { dst, src1, src2 } => { + enc::xeq32(sink, dst, src1, src2); + } + Inst::Xneq32 { dst, src1, src2 } => { + enc::xneq32(sink, dst, src1, src2); + } + Inst::Xslt32 { dst, src1, src2 } => { + enc::xslt32(sink, dst, src1, src2); + } + Inst::Xslteq32 { dst, src1, src2 } => { + enc::xslteq32(sink, dst, src1, src2); + } + Inst::Xult32 { dst, src1, src2 } => { + enc::xult32(sink, dst, src1, src2); + } + Inst::Xulteq32 { dst, src1, src2 } => { + enc::xulteq32(sink, dst, src1, src2); + } + + Inst::LoadAddr { dst, mem } => { + let base = mem.get_base_register(); + let offset = mem.get_offset_with_state(state); + + if let Some(base) = base { + let base = XReg::new(base).unwrap(); + + if offset == 0 { + enc::xmov(sink, dst, base); + } else { + if let Ok(offset) = i8::try_from(offset) { + enc::xconst8(sink, dst, offset); + } else if let Ok(offset) = i16::try_from(offset) { + enc::xconst16(sink, dst, offset); + } else if let Ok(offset) = i32::try_from(offset) { + enc::xconst32(sink, dst, offset); + } else { + enc::xconst64(sink, dst, offset); + } + + match P::pointer_width() { + PointerWidth::PointerWidth32 => enc::xadd32(sink, dst, base, dst), + PointerWidth::PointerWidth64 => enc::xadd64(sink, dst, base, dst), + } + } + } else { + unreachable!("all pulley amodes have a base register right now") + } + } + + Inst::Load { + dst, + mem, + ty, + flags: _, + ext, + } => { + use ExtKind as X; + let r = mem.get_base_register().unwrap(); + let r = reg_to_pulley_xreg(r); + let dst = reg_to_pulley_xreg(dst.to_reg()); + let x = mem.get_offset_with_state(state); + match (*ext, *ty, i8::try_from(x)) { + (X::Sign, types::I32, Ok(0)) => enc::load32_s(sink, dst, r), + (X::Sign, types::I32, Ok(x)) => enc::load32_s_offset8(sink, dst, r, x), + (X::Sign, types::I32, Err(_)) => enc::load32_s_offset64(sink, dst, r, x), + + (X::Zero, types::I32, Ok(0)) => enc::load32_u(sink, dst, r), + (X::Zero, types::I32, Ok(x)) => enc::load32_u_offset8(sink, dst, r, x), + (X::Zero, types::I32, Err(_)) => enc::load32_u_offset64(sink, dst, r, x), + + (_, types::I64, Ok(0)) => enc::load64(sink, dst, r), + (_, types::I64, Ok(x)) => enc::load64_offset8(sink, dst, r, x), + (_, types::I64, Err(_)) => enc::load64_offset64(sink, dst, r, x), + + (..) => unimplemented!("load ext={ext:?} ty={ty}"), + } + } + + Inst::Store { + mem, + src, + ty, + flags: _, + } => { + let r = mem.get_base_register().unwrap(); + let r = reg_to_pulley_xreg(r); + let src = reg_to_pulley_xreg(*src); + let x = mem.get_offset_with_state(state); + match (*ty, i8::try_from(x)) { + (types::I32, Ok(0)) => enc::store32(sink, r, src), + (types::I32, Ok(x)) => enc::store32_offset8(sink, r, x, src), + (types::I32, Err(_)) => enc::store32_offset64(sink, r, x, src), + + (types::I64, Ok(0)) => enc::store64(sink, r, src), + (types::I64, Ok(x)) => enc::store64_offset8(sink, r, x, src), + (types::I64, Err(_)) => enc::store64_offset64(sink, r, x, src), + + (..) => todo!(), + } + } + + Inst::BitcastIntFromFloat32 { dst, src } => enc::bitcast_int_from_float_32(sink, dst, src), + Inst::BitcastIntFromFloat64 { dst, src } => enc::bitcast_int_from_float_64(sink, dst, src), + Inst::BitcastFloatFromInt32 { dst, src } => enc::bitcast_float_from_int_32(sink, dst, src), + Inst::BitcastFloatFromInt64 { dst, src } => enc::bitcast_float_from_int_64(sink, dst, src), + } +} + +fn br_if_cond_helper

( + sink: &mut MachBuffer>, + start_offset: u32, + src1: XReg, + src2: XReg, + taken: &MachLabel, + not_taken: &MachLabel, + mut enc: impl FnMut(&mut MachBuffer>, XReg, XReg, i32), + mut enc_inverted: impl FnMut(&mut SmallVec<[u8; 16]>, XReg, XReg, i32), +) where + P: PulleyTargetKind, +{ + // If taken. + let taken_start = start_offset + 3; + let taken_end = taken_start + 4; + + sink.use_label_at_offset(taken_start, *taken, LabelUse::Jump(3)); + let mut inverted = SmallVec::<[u8; 16]>::new(); + enc_inverted(&mut inverted, src1, src2, 0x00000000); + debug_assert_eq!( + inverted.len(), + usize::try_from(taken_end - start_offset).unwrap() + ); + + sink.add_cond_branch(start_offset, taken_end, *taken, &inverted); + enc(sink, src1, src2, 0x00000000); + debug_assert_eq!(sink.cur_offset(), taken_end); + + // If not taken. + let not_taken_start = taken_end + 1; + let not_taken_end = not_taken_start + 4; + + sink.use_label_at_offset(not_taken_start, *not_taken, LabelUse::Jump(1)); + sink.add_uncond_branch(taken_end, not_taken_end, *not_taken); + enc::jump(sink, 0x00000000); +} + +fn reg_to_pulley_xreg(r: Reg) -> pulley_interpreter::XReg { + pulley_interpreter::XReg::new(r.to_real_reg().unwrap().hw_enc()).unwrap() +} diff --git a/cranelift/codegen/src/isa/pulley_shared/inst/mod.rs b/cranelift/codegen/src/isa/pulley_shared/inst/mod.rs new file mode 100644 index 0000000000..bf6bae4c30 --- /dev/null +++ b/cranelift/codegen/src/isa/pulley_shared/inst/mod.rs @@ -0,0 +1,918 @@ +//! This module defines Pulley-specific machine instruction types. + +use core::marker::PhantomData; + +use crate::binemit::{Addend, CodeOffset, Reloc}; +use crate::ir::types::{self, F32, F64, I128, I16, I32, I64, I8, I8X16, R32, R64}; +use crate::ir::{self, MemFlags, Type}; +use crate::isa::pulley_shared::abi::PulleyMachineDeps; +use crate::isa::FunctionAlignment; +use crate::{machinst::*, trace}; +use crate::{settings, CodegenError, CodegenResult}; +use alloc::string::{String, ToString}; +use regalloc2::{PRegSet, RegClass}; +use smallvec::SmallVec; + +pub mod regs; +pub use self::regs::*; +pub mod args; +pub use self::args::*; +pub mod emit; +pub use self::emit::*; + +//============================================================================= +// Instructions (top level): definition + +pub use crate::isa::pulley_shared::lower::isle::generated_code::MInst as Inst; + +use super::PulleyTargetKind; + +/// Additional information for direct and indirect call instructions. +/// +/// Left out of line to lower the size of the `Inst` enum. +#[derive(Clone, Debug)] +pub struct CallInfo { + pub uses: CallArgList, + pub defs: CallRetList, + pub clobbers: PRegSet, + pub callee_pop_size: u32, +} + +impl Inst { + /// Generic constructor for a load (zero-extending where appropriate). + pub fn gen_load(dst: Writable, mem: Amode, ty: Type, flags: MemFlags) -> Inst { + Inst::Load { + dst, + mem, + ty, + flags, + ext: ExtKind::Zero, + } + } + + /// Generic constructor for a store. + pub fn gen_store(mem: Amode, from_reg: Reg, ty: Type, flags: MemFlags) -> Inst { + Inst::Store { + mem, + src: from_reg, + ty, + flags, + } + } +} + +fn pulley_get_operands(inst: &mut Inst, collector: &mut impl OperandVisitor) { + match inst { + Inst::Args { args } => { + for ArgPair { vreg, preg } in args { + collector.reg_fixed_def(vreg, *preg); + } + } + Inst::Rets { rets } => { + for RetPair { vreg, preg } in rets { + collector.reg_fixed_use(vreg, *preg); + } + } + Inst::Ret => { + unreachable!("`ret` is only added after regalloc") + } + + Inst::Unwind { .. } | Inst::Trap { .. } | Inst::Nop => {} + + Inst::GetSp { dst } => { + collector.reg_def(dst); + } + + Inst::LoadExtName { + dst, + name: _, + offset: _, + } => { + collector.reg_def(dst); + } + + Inst::Call { callee: _, info } => { + let CallInfo { uses, defs, .. } = &mut **info; + for CallArgPair { vreg, preg } in uses { + collector.reg_fixed_use(vreg, *preg); + } + for CallRetPair { vreg, preg } in defs { + collector.reg_fixed_def(vreg, *preg); + } + collector.reg_clobbers(info.clobbers); + } + Inst::IndirectCall { callee, info } => { + collector.reg_use(callee); + let CallInfo { uses, defs, .. } = &mut **info; + for CallArgPair { vreg, preg } in uses { + collector.reg_fixed_use(vreg, *preg); + } + for CallRetPair { vreg, preg } in defs { + collector.reg_fixed_def(vreg, *preg); + } + collector.reg_clobbers(info.clobbers); + } + + Inst::Jump { .. } => {} + + Inst::BrIf { + c, + taken: _, + not_taken: _, + } => { + collector.reg_use(c); + } + + Inst::BrIfXeq32 { + src1, + src2, + taken: _, + not_taken: _, + } + | Inst::BrIfXneq32 { + src1, + src2, + taken: _, + not_taken: _, + } + | Inst::BrIfXslt32 { + src1, + src2, + taken: _, + not_taken: _, + } + | Inst::BrIfXslteq32 { + src1, + src2, + taken: _, + not_taken: _, + } + | Inst::BrIfXult32 { + src1, + src2, + taken: _, + not_taken: _, + } + | Inst::BrIfXulteq32 { + src1, + src2, + taken: _, + not_taken: _, + } => { + collector.reg_use(src1); + collector.reg_use(src2); + } + + Inst::Xmov { dst, src } => { + collector.reg_use(src); + collector.reg_def(dst); + } + Inst::Fmov { dst, src } => { + collector.reg_use(src); + collector.reg_def(dst); + } + Inst::Vmov { dst, src } => { + collector.reg_use(src); + collector.reg_def(dst); + } + + Inst::Xconst8 { dst, imm: _ } + | Inst::Xconst16 { dst, imm: _ } + | Inst::Xconst32 { dst, imm: _ } + | Inst::Xconst64 { dst, imm: _ } => { + collector.reg_def(dst); + } + + Inst::Xadd32 { dst, src1, src2 } + | Inst::Xadd64 { dst, src1, src2 } + | Inst::Xeq64 { dst, src1, src2 } + | Inst::Xneq64 { dst, src1, src2 } + | Inst::Xslt64 { dst, src1, src2 } + | Inst::Xslteq64 { dst, src1, src2 } + | Inst::Xult64 { dst, src1, src2 } + | Inst::Xulteq64 { dst, src1, src2 } + | Inst::Xeq32 { dst, src1, src2 } + | Inst::Xneq32 { dst, src1, src2 } + | Inst::Xslt32 { dst, src1, src2 } + | Inst::Xslteq32 { dst, src1, src2 } + | Inst::Xult32 { dst, src1, src2 } + | Inst::Xulteq32 { dst, src1, src2 } => { + collector.reg_use(src1); + collector.reg_use(src2); + collector.reg_def(dst); + } + + Inst::LoadAddr { dst, mem } => { + collector.reg_def(dst); + mem.get_operands(collector); + } + + Inst::Load { + dst, + mem, + ty: _, + flags: _, + ext: _, + } => { + collector.reg_def(dst); + mem.get_operands(collector); + } + + Inst::Store { + mem, + src, + ty: _, + flags: _, + } => { + mem.get_operands(collector); + collector.reg_use(src); + } + + Inst::BitcastIntFromFloat32 { dst, src } => { + collector.reg_use(src); + collector.reg_def(dst); + } + Inst::BitcastIntFromFloat64 { dst, src } => { + collector.reg_use(src); + collector.reg_def(dst); + } + Inst::BitcastFloatFromInt32 { dst, src } => { + collector.reg_use(src); + collector.reg_def(dst); + } + Inst::BitcastFloatFromInt64 { dst, src } => { + collector.reg_use(src); + collector.reg_def(dst); + } + } +} + +/// A newtype over a Pulley instruction that also carries a phantom type +/// parameter describing whether we are targeting 32- or 64-bit Pulley bytecode. +/// +/// Implements `Deref`, `DerefMut`, and `From`/`Into` for `Inst` to allow for +/// seamless conversion between `Inst` and `InstAndKind`. +#[derive(Clone, Debug)] +pub struct InstAndKind

+where + P: PulleyTargetKind, +{ + inst: Inst, + kind: PhantomData

, +} + +impl

From for InstAndKind

+where + P: PulleyTargetKind, +{ + fn from(inst: Inst) -> Self { + Self { + inst, + kind: PhantomData, + } + } +} + +impl

From> for Inst +where + P: PulleyTargetKind, +{ + fn from(inst: InstAndKind

) -> Self { + inst.inst + } +} + +impl

core::ops::Deref for InstAndKind

+where + P: PulleyTargetKind, +{ + type Target = Inst; + + fn deref(&self) -> &Self::Target { + &self.inst + } +} + +impl

core::ops::DerefMut for InstAndKind

+where + P: PulleyTargetKind, +{ + fn deref_mut(&mut self) -> &mut Self::Target { + &mut self.inst + } +} + +impl

MachInst for InstAndKind

+where + P: PulleyTargetKind, +{ + type LabelUse = LabelUse; + type ABIMachineSpec = PulleyMachineDeps

; + + const TRAP_OPCODE: &'static [u8] = &[0]; + + fn gen_dummy_use(_reg: Reg) -> Self { + todo!() + } + + fn canonical_type_for_rc(rc: RegClass) -> Type { + match rc { + regalloc2::RegClass::Int => I64, + regalloc2::RegClass::Float => F64, + regalloc2::RegClass::Vector => I8X16, + } + } + + fn is_safepoint(&self) -> bool { + match self.inst { + Inst::Trap { .. } => true, + _ => false, + } + } + + fn get_operands(&mut self, collector: &mut impl OperandVisitor) { + pulley_get_operands(self, collector); + } + + fn is_move(&self) -> Option<(Writable, Reg)> { + match self.inst { + Inst::Xmov { dst, src } => Some((Writable::from_reg(*dst.to_reg()), *src)), + _ => None, + } + } + + fn is_included_in_clobbers(&self) -> bool { + self.is_args() + } + + fn is_trap(&self) -> bool { + match self.inst { + Inst::Trap { .. } => true, + _ => false, + } + } + + fn is_args(&self) -> bool { + match self.inst { + Inst::Args { .. } => true, + _ => false, + } + } + + fn is_term(&self) -> MachTerminator { + match self.inst { + Inst::Ret { .. } | Inst::Rets { .. } => MachTerminator::Ret, + Inst::Jump { .. } => MachTerminator::Uncond, + Inst::BrIf { .. } + | Inst::BrIfXeq32 { .. } + | Inst::BrIfXneq32 { .. } + | Inst::BrIfXslt32 { .. } + | Inst::BrIfXslteq32 { .. } + | Inst::BrIfXult32 { .. } + | Inst::BrIfXulteq32 { .. } => MachTerminator::Cond, + _ => MachTerminator::None, + } + } + + fn is_mem_access(&self) -> bool { + todo!() + } + + fn gen_move(to_reg: Writable, from_reg: Reg, ty: Type) -> Self { + match ty { + ir::types::I8 | ir::types::I16 | ir::types::I32 | ir::types::I64 => Inst::Xmov { + dst: WritableXReg::try_from(to_reg).unwrap(), + src: XReg::new(from_reg).unwrap(), + } + .into(), + ir::types::F32 | ir::types::F64 => Inst::Fmov { + dst: WritableFReg::try_from(to_reg).unwrap(), + src: FReg::new(from_reg).unwrap(), + } + .into(), + _ if ty.is_vector() => Inst::Vmov { + dst: WritableVReg::try_from(to_reg).unwrap(), + src: VReg::new(from_reg).unwrap(), + } + .into(), + _ => panic!("don't know how to generate a move for type {ty}"), + } + } + + fn gen_nop(_preferred_size: usize) -> Self { + todo!() + } + + fn rc_for_type(ty: Type) -> CodegenResult<(&'static [RegClass], &'static [Type])> { + match ty { + I8 => Ok((&[RegClass::Int], &[I8])), + I16 => Ok((&[RegClass::Int], &[I16])), + I32 => Ok((&[RegClass::Int], &[I32])), + I64 => Ok((&[RegClass::Int], &[I64])), + R32 => Ok((&[RegClass::Int], &[R32])), + R64 => Ok((&[RegClass::Int], &[R64])), + F32 => Ok((&[RegClass::Float], &[F32])), + F64 => Ok((&[RegClass::Float], &[F64])), + I128 => Ok((&[RegClass::Int, RegClass::Int], &[I64, I64])), + _ if ty.is_vector() => { + debug_assert!(ty.bits() <= 512); + + // Here we only need to return a SIMD type with the same size as `ty`. + // We use these types for spills and reloads, so prefer types with lanes <= 31 + // since that fits in the immediate field of `vsetivli`. + const SIMD_TYPES: [[Type; 1]; 6] = [ + [types::I8X2], + [types::I8X4], + [types::I8X8], + [types::I8X16], + [types::I16X16], + [types::I32X16], + ]; + let idx = (ty.bytes().ilog2() - 1) as usize; + let ty = &SIMD_TYPES[idx][..]; + + Ok((&[RegClass::Vector], ty)) + } + _ => Err(CodegenError::Unsupported(format!( + "Unexpected SSA-value type: {ty}" + ))), + } + } + + fn gen_jump(_target: MachLabel) -> Self { + todo!() + } + + fn worst_case_size() -> CodeOffset { + // `BrIfXeq32 { a, b, taken, not_taken }` expands to `br_if_xeq32 a, b, taken; jump not_taken`. + // + // The first instruction is seven bytes long: + // * 1 byte opcode + // * 1 byte `a` register encoding + // * 1 byte `b` register encoding + // * 4 byte `taken` displacement + // + // And the second instruction is five bytes long: + // * 1 byte opcode + // * 4 byte `not_taken` displacement + 12 + } + + fn ref_type_regclass(_settings: &settings::Flags) -> RegClass { + RegClass::Int + } + + fn function_alignment() -> FunctionAlignment { + FunctionAlignment { + minimum: 1, + preferred: 1, + } + } +} + +//============================================================================= +// Pretty-printing of instructions. + +pub fn reg_name(reg: Reg) -> String { + match reg.to_real_reg() { + Some(real) => { + let n = real.hw_enc(); + match (real.class(), n) { + (RegClass::Int, 63) => format!("sp"), + (RegClass::Int, 62) => format!("lr"), + (RegClass::Int, 61) => format!("fp"), + (RegClass::Int, 60) => format!("tmp0"), + (RegClass::Int, 59) => format!("tmp1"), + + (RegClass::Int, _) => format!("x{n}"), + (RegClass::Float, _) => format!("f{n}"), + (RegClass::Vector, _) => format!("v{n}"), + } + } + None => { + format!("{reg:?}") + } + } +} + +impl Inst { + fn print_with_state

(&self, _state: &mut EmitState

) -> String + where + P: PulleyTargetKind, + { + use core::fmt::Write; + + let format_reg = |reg: Reg| -> String { reg_name(reg) }; + + let format_ext = |ext: ExtKind| -> &'static str { + match ext { + ExtKind::None => "", + ExtKind::Sign => "_s", + ExtKind::Zero => "_u", + } + }; + + match self { + Inst::Args { args } => { + let mut s = "args".to_string(); + for arg in args { + let preg = format_reg(arg.preg); + let def = format_reg(arg.vreg.to_reg()); + write!(&mut s, " {def}={preg}").unwrap(); + } + s + } + Inst::Rets { rets } => { + let mut s = "rets".to_string(); + for ret in rets { + let preg = format_reg(ret.preg); + let vreg = format_reg(ret.vreg); + write!(&mut s, " {vreg}={preg}").unwrap(); + } + s + } + + Inst::Unwind { inst } => format!("unwind {inst:?}"), + + Inst::Trap { code } => format!("trap // code = {code:?}"), + + Inst::Nop => format!("nop"), + + Inst::Ret => format!("ret"), + + Inst::GetSp { dst } => { + let dst = format_reg(*dst.to_reg()); + format!("{dst} = get_sp") + } + + Inst::LoadExtName { dst, name, offset } => { + let dst = format_reg(*dst.to_reg()); + format!("{dst} = load_ext_name {name:?}, {offset}") + } + + Inst::Call { callee, info } => { + format!("call {callee:?}, {info:?}") + } + + Inst::IndirectCall { callee, info } => { + let callee = format_reg(**callee); + format!("indirect_call {callee}, {info:?}") + } + + Inst::Jump { label } => format!("jump {}", label.to_string()), + + Inst::BrIf { + c, + taken, + not_taken, + } => { + let c = format_reg(**c); + let taken = taken.to_string(); + let not_taken = not_taken.to_string(); + format!("br_if {c}, {taken}; jump {not_taken}") + } + + Inst::BrIfXeq32 { + src1, + src2, + taken, + not_taken, + } => { + let src1 = format_reg(**src1); + let src2 = format_reg(**src2); + let taken = taken.to_string(); + let not_taken = not_taken.to_string(); + format!("br_if_xeq32 {src1}, {src2}, {taken}; jump {not_taken}") + } + Inst::BrIfXneq32 { + src1, + src2, + taken, + not_taken, + } => { + let src1 = format_reg(**src1); + let src2 = format_reg(**src2); + let taken = taken.to_string(); + let not_taken = not_taken.to_string(); + format!("br_if_xneq32 {src1}, {src2}, {taken}; jump {not_taken}") + } + Inst::BrIfXslt32 { + src1, + src2, + taken, + not_taken, + } => { + let src1 = format_reg(**src1); + let src2 = format_reg(**src2); + let taken = taken.to_string(); + let not_taken = not_taken.to_string(); + format!("br_if_xslt32 {src1}, {src2}, {taken}; jump {not_taken}") + } + Inst::BrIfXslteq32 { + src1, + src2, + taken, + not_taken, + } => { + let src1 = format_reg(**src1); + let src2 = format_reg(**src2); + let taken = taken.to_string(); + let not_taken = not_taken.to_string(); + format!("br_if_xslteq32 {src1}, {src2}, {taken}; jump {not_taken}") + } + Inst::BrIfXult32 { + src1, + src2, + taken, + not_taken, + } => { + let src1 = format_reg(**src1); + let src2 = format_reg(**src2); + let taken = taken.to_string(); + let not_taken = not_taken.to_string(); + format!("br_if_xult32 {src1}, {src2}, {taken}; jump {not_taken}") + } + Inst::BrIfXulteq32 { + src1, + src2, + taken, + not_taken, + } => { + let src1 = format_reg(**src1); + let src2 = format_reg(**src2); + let taken = taken.to_string(); + let not_taken = not_taken.to_string(); + format!("br_if_xulteq32 {src1}, {src2}, {taken}; jump {not_taken}") + } + + Inst::Xmov { dst, src } => { + let dst = format_reg(*dst.to_reg()); + let src = format_reg(**src); + format!("{dst} = xmov {src}") + } + Inst::Fmov { dst, src } => { + let dst = format_reg(*dst.to_reg()); + let src = format_reg(**src); + format!("{dst} = fmov {src}") + } + Inst::Vmov { dst, src } => { + let dst = format_reg(*dst.to_reg()); + let src = format_reg(**src); + format!("{dst} = vmov {src}") + } + + Inst::Xconst8 { dst, imm } => { + let dst = format_reg(*dst.to_reg()); + format!("{dst} = xconst8 {imm}") + } + Inst::Xconst16 { dst, imm } => { + let dst = format_reg(*dst.to_reg()); + format!("{dst} = xconst16 {imm}") + } + Inst::Xconst32 { dst, imm } => { + let dst = format_reg(*dst.to_reg()); + format!("{dst} = xconst32 {imm}") + } + Inst::Xconst64 { dst, imm } => { + let dst = format_reg(*dst.to_reg()); + format!("{dst} = xconst64 {imm}") + } + + Inst::Xadd32 { dst, src1, src2 } => format!( + "{} = xadd32 {}, {}", + format_reg(*dst.to_reg()), + format_reg(**src1), + format_reg(**src2) + ), + Inst::Xadd64 { dst, src1, src2 } => format!( + "{} = xadd64 {}, {}", + format_reg(*dst.to_reg()), + format_reg(**src1), + format_reg(**src2) + ), + + Inst::Xeq64 { dst, src1, src2 } => format!( + "{} = xeq64 {}, {}", + format_reg(*dst.to_reg()), + format_reg(**src1), + format_reg(**src2) + ), + Inst::Xneq64 { dst, src1, src2 } => format!( + "{} = xneq64 {}, {}", + format_reg(*dst.to_reg()), + format_reg(**src1), + format_reg(**src2) + ), + Inst::Xslt64 { dst, src1, src2 } => format!( + "{} = xslt64 {}, {}", + format_reg(*dst.to_reg()), + format_reg(**src1), + format_reg(**src2) + ), + Inst::Xslteq64 { dst, src1, src2 } => format!( + "{} = xslteq64 {}, {}", + format_reg(*dst.to_reg()), + format_reg(**src1), + format_reg(**src2) + ), + Inst::Xult64 { dst, src1, src2 } => format!( + "{} = xult64 {}, {}", + format_reg(*dst.to_reg()), + format_reg(**src1), + format_reg(**src2) + ), + Inst::Xulteq64 { dst, src1, src2 } => format!( + "{} = xulteq64 {}, {}", + format_reg(*dst.to_reg()), + format_reg(**src1), + format_reg(**src2) + ), + Inst::Xeq32 { dst, src1, src2 } => format!( + "{} = xeq32 {}, {}", + format_reg(*dst.to_reg()), + format_reg(**src1), + format_reg(**src2) + ), + Inst::Xneq32 { dst, src1, src2 } => format!( + "{} = xneq32 {}, {}", + format_reg(*dst.to_reg()), + format_reg(**src1), + format_reg(**src2) + ), + Inst::Xslt32 { dst, src1, src2 } => format!( + "{} = xslt32 {}, {}", + format_reg(*dst.to_reg()), + format_reg(**src1), + format_reg(**src2) + ), + Inst::Xslteq32 { dst, src1, src2 } => format!( + "{} = xslteq32 {}, {}", + format_reg(*dst.to_reg()), + format_reg(**src1), + format_reg(**src2) + ), + Inst::Xult32 { dst, src1, src2 } => format!( + "{} = xult32 {}, {}", + format_reg(*dst.to_reg()), + format_reg(**src1), + format_reg(**src2) + ), + Inst::Xulteq32 { dst, src1, src2 } => format!( + "{} = xulteq32 {}, {}", + format_reg(*dst.to_reg()), + format_reg(**src1), + format_reg(**src2) + ), + + Inst::LoadAddr { dst, mem } => { + let dst = format_reg(*dst.to_reg()); + let mem = mem.to_string(); + format!("{dst} = load_addr {mem}") + } + + Inst::Load { + dst, + mem, + ty, + flags, + ext, + } => { + let dst = format_reg(dst.to_reg()); + let ty = ty.bits(); + let ext = format_ext(*ext); + let mem = mem.to_string(); + format!("{dst} = load{ty}{ext} {mem} // flags ={flags}") + } + + Inst::Store { + mem, + src, + ty, + flags, + } => { + let ty = ty.bits(); + let mem = mem.to_string(); + let src = format_reg(*src); + format!("store{ty} {mem}, {src} // flags = {flags}") + } + + Inst::BitcastIntFromFloat32 { dst, src } => { + let dst = format_reg(*dst.to_reg()); + let src = format_reg(**src); + format!("{dst} = bitcast_int_from_float32 {src}") + } + Inst::BitcastIntFromFloat64 { dst, src } => { + let dst = format_reg(*dst.to_reg()); + let src = format_reg(**src); + format!("{dst} = bitcast_int_from_float64 {src}") + } + Inst::BitcastFloatFromInt32 { dst, src } => { + let dst = format_reg(*dst.to_reg()); + let src = format_reg(**src); + format!("{dst} = bitcast_float_from_int32 {src}") + } + Inst::BitcastFloatFromInt64 { dst, src } => { + let dst = format_reg(*dst.to_reg()); + let src = format_reg(**src); + format!("{dst} = bitcast_float_from_int64 {src}") + } + } + } +} + +/// Different forms of label references for different instruction formats. +#[derive(Clone, Copy, Debug, PartialEq, Eq)] +pub enum LabelUse { + /// A PC-relative `jump`/`call`/etc... instruction with an `i32` relative + /// target. The payload value is an addend that describes the positive + /// offset from the start of the instruction to the offset being relocated. + Jump(u32), +} + +impl MachInstLabelUse for LabelUse { + /// Alignment for veneer code. Pulley instructions don't require any + /// particular alignment. + const ALIGN: CodeOffset = 1; + + /// Maximum PC-relative range (positive), inclusive. + fn max_pos_range(self) -> CodeOffset { + match self { + Self::Jump(_) => 0x7fff_ffff, + } + } + + /// Maximum PC-relative range (negative). + fn max_neg_range(self) -> CodeOffset { + match self { + Self::Jump(_) => 0x8000_0000, + } + } + + /// Size of window into code needed to do the patch. + fn patch_size(self) -> CodeOffset { + match self { + Self::Jump(_) => 4, + } + } + + /// Perform the patch. + fn patch(self, buffer: &mut [u8], use_offset: CodeOffset, label_offset: CodeOffset) { + let use_relative = (label_offset as i64) - (use_offset as i64); + debug_assert!(use_relative <= self.max_pos_range() as i64); + debug_assert!(use_relative >= -(self.max_neg_range() as i64)); + let pc_rel = i32::try_from(use_relative).unwrap() as u32; + match self { + Self::Jump(addend) => { + let value = pc_rel.wrapping_add(addend); + trace!( + "patching label use @ {use_offset:#x} to label {label_offset:#x} via \ + PC-relative offset {pc_rel:#x}" + ); + buffer.copy_from_slice(&value.to_le_bytes()[..]); + } + } + } + + /// Is a veneer supported for this label reference type? + fn supports_veneer(self) -> bool { + match self { + Self::Jump(_) => false, + } + } + + /// How large is the veneer, if supported? + fn veneer_size(self) -> CodeOffset { + match self { + Self::Jump(_) => 0, + } + } + + fn worst_case_veneer_size() -> CodeOffset { + 0 + } + + /// Generate a veneer into the buffer, given that this veneer is at `veneer_offset`, and return + /// an offset and label-use for the veneer's use of the original label. + fn generate_veneer( + self, + _buffer: &mut [u8], + _veneer_offset: CodeOffset, + ) -> (CodeOffset, LabelUse) { + match self { + Self::Jump(_) => panic!("veneer not supported for {self:?}"), + } + } + + fn from_reloc(reloc: Reloc, addend: Addend) -> Option { + match reloc { + Reloc::X86CallPCRel4 if addend < 0 => { + // We are always relocating some offset that is within an + // instruction, but pulley adds the offset relative to the PC + // pointing to the *start* of the instruction. Therefore, adjust + // back to the beginning of the instruction. + Some(LabelUse::Jump(i32::try_from(-addend).unwrap() as u32)) + } + _ => None, + } + } +} diff --git a/cranelift/codegen/src/isa/pulley_shared/inst/regs.rs b/cranelift/codegen/src/isa/pulley_shared/inst/regs.rs new file mode 100644 index 0000000000..8a6aa5eb56 --- /dev/null +++ b/cranelift/codegen/src/isa/pulley_shared/inst/regs.rs @@ -0,0 +1,169 @@ +//! Pulley registers. + +use crate::machinst::{Reg, Writable}; +use regalloc2::{PReg, RegClass, VReg}; + +#[inline] +pub fn x_reg(enc: usize) -> Reg { + let p = PReg::new(enc, RegClass::Int); + let v = VReg::new(p.index(), p.class()); + Reg::from(v) +} + +#[inline] +pub const fn px_reg(enc: usize) -> PReg { + PReg::new(enc, RegClass::Int) +} + +#[inline] +pub fn f_reg(enc: usize) -> Reg { + let p = PReg::new(enc, RegClass::Float); + let v = VReg::new(p.index(), p.class()); + Reg::from(v) +} + +#[inline] +pub const fn pf_reg(enc: usize) -> PReg { + PReg::new(enc, RegClass::Float) +} + +#[inline] +pub fn v_reg(enc: usize) -> Reg { + let p = PReg::new(enc, RegClass::Vector); + let v = VReg::new(p.index(), p.class()); + Reg::from(v) +} + +#[inline] +pub const fn pv_reg(enc: usize) -> PReg { + PReg::new(enc, RegClass::Vector) +} + +macro_rules! define_registers { + ( + $( + $reg:expr => $readable:ident, $writable:ident; + )* + ) => { + $( + #[inline] + #[allow(dead_code)] + pub fn $readable() -> Reg { + $reg + } + + #[inline] + #[allow(dead_code)] + pub fn $writable() -> Writable { + Writable::from_reg($readable()) + } + )* + }; +} + +define_registers! { + x_reg(0) => x0, writable_x0; + x_reg(1) => x1, writable_x1; + x_reg(2) => x2, writable_x2; + x_reg(3) => x3, writable_x3; + x_reg(4) => x4, writable_x4; + x_reg(5) => x5, writable_x5; + x_reg(6) => x6, writable_x6; + x_reg(7) => x7, writable_x7; + x_reg(8) => x8, writable_x8; + x_reg(9) => x9, writable_x9; + x_reg(10) => x10, writable_x10; + x_reg(11) => x11, writable_x11; + x_reg(12) => x12, writable_x12; + x_reg(13) => x13, writable_x13; + x_reg(14) => x14, writable_x14; + x_reg(15) => x15, writable_x15; + x_reg(16) => x16, writable_x16; + x_reg(17) => x17, writable_x17; + x_reg(18) => x18, writable_x18; + x_reg(19) => x19, writable_x19; + x_reg(20) => x20, writable_x20; + x_reg(21) => x21, writable_x21; + x_reg(22) => x22, writable_x22; + x_reg(23) => x23, writable_x23; + x_reg(24) => x24, writable_x24; + x_reg(25) => x25, writable_x25; + x_reg(26) => x26, writable_x26; + x_reg(27) => x27, writable_x27; + x_reg(28) => x28, writable_x28; + x_reg(29) => x29, writable_x29; + x_reg(30) => x30, writable_x30; + x_reg(31) => x31, writable_x31; + + x_reg(32) => stack_reg, writable_stack_reg; + x_reg(33) => link_reg, writable_link_reg; + x_reg(34) => fp_reg, writable_fp_reg; + x_reg(35) => spilltmp_reg, writable_spilltmp_reg; + x_reg(36) => spilltmp2_reg, writable_spilltmp2_reg; + + f_reg(0) => f0, writable_f0; + f_reg(1) => f1, writable_f1; + f_reg(2) => f2, writable_f2; + f_reg(3) => f3, writable_f3; + f_reg(4) => f4, writable_f4; + f_reg(5) => f5, writable_f5; + f_reg(6) => f6, writable_f6; + f_reg(7) => f7, writable_f7; + f_reg(8) => f8, writable_f8; + f_reg(9) => f9, writable_f9; + f_reg(10) => f10, writable_f10; + f_reg(11) => f11, writable_f11; + f_reg(12) => f12, writable_f12; + f_reg(13) => f13, writable_f13; + f_reg(14) => f14, writable_f14; + f_reg(15) => f15, writable_f15; + f_reg(16) => f16, writable_f16; + f_reg(17) => f17, writable_f17; + f_reg(18) => f18, writable_f18; + f_reg(19) => f19, writable_f19; + f_reg(20) => f20, writable_f20; + f_reg(21) => f21, writable_f21; + f_reg(22) => f22, writable_f22; + f_reg(23) => f23, writable_f23; + f_reg(24) => f24, writable_f24; + f_reg(25) => f25, writable_f25; + f_reg(26) => f26, writable_f26; + f_reg(27) => f27, writable_f27; + f_reg(28) => f28, writable_f28; + f_reg(29) => f29, writable_f29; + f_reg(30) => f30, writable_f30; + f_reg(31) => f31, writable_f31; + + v_reg(0) => v0, writable_v0; + v_reg(1) => v1, writable_v1; + v_reg(2) => v2, writable_v2; + v_reg(3) => v3, writable_v3; + v_reg(4) => v4, writable_v4; + v_reg(5) => v5, writable_v5; + v_reg(6) => v6, writable_v6; + v_reg(7) => v7, writable_v7; + v_reg(8) => v8, writable_v8; + v_reg(9) => v9, writable_v9; + v_reg(10) => v10, writable_v10; + v_reg(11) => v11, writable_v11; + v_reg(12) => v12, writable_v12; + v_reg(13) => v13, writable_v13; + v_reg(14) => v14, writable_v14; + v_reg(15) => v15, writable_v15; + v_reg(16) => v16, writable_v16; + v_reg(17) => v17, writable_v17; + v_reg(18) => v18, writable_v18; + v_reg(19) => v19, writable_v19; + v_reg(20) => v20, writable_v20; + v_reg(21) => v21, writable_v21; + v_reg(22) => v22, writable_v22; + v_reg(23) => v23, writable_v23; + v_reg(24) => v24, writable_v24; + v_reg(25) => v25, writable_v25; + v_reg(26) => v26, writable_v26; + v_reg(27) => v27, writable_v27; + v_reg(28) => v28, writable_v28; + v_reg(29) => v29, writable_v29; + v_reg(30) => v30, writable_v30; + v_reg(31) => v31, writable_v31; +} diff --git a/cranelift/codegen/src/isa/pulley_shared/lower.isle b/cranelift/codegen/src/isa/pulley_shared/lower.isle new file mode 100644 index 0000000000..11bf2b6214 --- /dev/null +++ b/cranelift/codegen/src/isa/pulley_shared/lower.isle @@ -0,0 +1,178 @@ +;; Pulley instruction selection and CLIF-to-MachInst lowering. + +;; The main lowering constructor term: takes a clif `Inst` and returns the +;; register(s) within which the lowered instruction's result values live. +(decl partial lower (Inst) InstOutput) + +;;;; Rules for Control Flow ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +;; The main control-flow-lowering term: takes a control-flow instruction and +;; target(s) and emits the necessary instructions. +(decl partial lower_branch (Inst MachLabelSlice) Unit) + +;; Unconditional jumps. +(rule (lower_branch (jump _) (single_target label)) + (emit_side_effect (pulley_jump label))) + +;; Generic case for conditional branches. +(rule -1 (lower_branch (brif (maybe_uextend c) _ _) (two_targets then else)) + (emit_side_effect (pulley_br_if c then else))) + +;; Conditional branches on `icmp`s. +(rule (lower_branch (brif (maybe_uextend (icmp cc a b @ (value_type $I32))) _ _) + (two_targets then else)) + (emit_side_effect (lower_brif_of_icmp32 cc a b then else))) + +(decl lower_brif_of_icmp32 (IntCC Value Value MachLabel MachLabel) SideEffectNoResult) +(rule (lower_brif_of_icmp32 (IntCC.Equal) a b then else) + (pulley_br_if_xeq32 a b then else)) +(rule (lower_brif_of_icmp32 (IntCC.NotEqual) a b then else) + (pulley_br_if_xneq32 a b then else)) +(rule (lower_brif_of_icmp32 (IntCC.SignedLessThan) a b then else) + (pulley_br_if_xslt32 a b then else)) +(rule (lower_brif_of_icmp32 (IntCC.SignedLessThanOrEqual) a b then else) + (pulley_br_if_xslteq32 a b then else)) +(rule (lower_brif_of_icmp32 (IntCC.UnsignedLessThan) a b then else) + (pulley_br_if_xult32 a b then else)) +(rule (lower_brif_of_icmp32 (IntCC.UnsignedLessThanOrEqual) a b then else) + (pulley_br_if_xulteq32 a b then else)) + +;; Pulley doesn't have instructions for `>` and `>=`, so we have to reverse the +;; operation. +(rule (lower_brif_of_icmp32 (IntCC.SignedGreaterThan) a b then else) + (lower_brif_of_icmp32 (IntCC.SignedLessThan) b a then else)) +(rule (lower_brif_of_icmp32 (IntCC.SignedGreaterThanOrEqual) a b then else) + (lower_brif_of_icmp32 (IntCC.SignedLessThanOrEqual) b a then else)) +(rule (lower_brif_of_icmp32 (IntCC.UnsignedGreaterThan) a b then else) + (lower_brif_of_icmp32 (IntCC.UnsignedLessThan) b a then else)) +(rule (lower_brif_of_icmp32 (IntCC.UnsignedGreaterThanOrEqual) a b then else) + (lower_brif_of_icmp32 (IntCC.UnsignedLessThanOrEqual) b a then else)) + +;; Branch tables. +(decl lower_br_table (Reg MachLabelSlice) Unit) +(extern constructor lower_br_table lower_br_table) +(rule (lower_branch (br_table index _) targets) + (lower_br_table index targets)) + +;;;; Rules for `trap` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +(rule (lower (trap code)) + (side_effect (pulley_trap code))) + +;;;; Rules for `get_stack_pointer` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +(rule (lower (get_stack_pointer)) + (pulley_get_sp)) + +;;;; Rules for `return` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +;; N.B.: the `ret` itself is generated by the ABI. +(rule (lower (return args)) + (lower_return args)) + +;;;; Rules for calls ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +(rule (lower (call (func_ref_data sig_ref extname dist) inputs)) + (gen_call sig_ref extname dist inputs)) + +(rule (lower (call_indirect sig_ref val inputs)) + (gen_call_indirect sig_ref val inputs)) + +;;;; Rules for `return_call` and `return_call_indirect` ;;;;;;;;;;;;;;;;;;;;;;;; + +(rule (lower (return_call (func_ref_data sig_ref extname dist) args)) + (gen_return_call sig_ref extname dist args)) + +(rule (lower (return_call_indirect sig_ref callee args)) + (gen_return_call_indirect sig_ref callee args)) + +;;;; Rules for `iconst` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +(rule (lower (has_type ty (iconst (u64_from_imm64 n)))) + (imm ty n)) + +;;;; Rules for `iadd` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +(rule (lower (has_type $I8 (iadd a b))) + (pulley_xadd32 a b)) + +(rule (lower (has_type $I16 (iadd a b))) + (pulley_xadd32 a b)) + +(rule (lower (has_type $I32 (iadd a b))) + (pulley_xadd32 a b)) + +(rule (lower (has_type $I64 (iadd a b))) + (pulley_xadd64 a b)) + +;;;; Rules for `icmp` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +(rule 1 (lower (icmp cc a b @ (value_type $I64))) + (lower_icmp $I64 cc a b)) +(rule (lower (icmp cc a b @ (value_type (fits_in_32 _)))) + (lower_icmp $I32 cc a b)) + +(decl lower_icmp (Type IntCC Value Value) XReg) + +(rule (lower_icmp $I64 (IntCC.Equal) a b) + (pulley_xeq64 a b)) + +(rule (lower_icmp $I64 (IntCC.NotEqual) a b) + (pulley_xneq64 a b)) + +(rule (lower_icmp $I64 (IntCC.SignedLessThan) a b) + (pulley_xslt64 a b)) + +(rule (lower_icmp $I64 (IntCC.SignedLessThanOrEqual) a b) + (pulley_xslteq64 a b)) + +(rule (lower_icmp $I64 (IntCC.UnsignedLessThan) a b) + (pulley_xult64 a b)) + +(rule (lower_icmp $I64 (IntCC.UnsignedLessThanOrEqual) a b) + (pulley_xulteq64 a b)) + +(rule (lower_icmp $I32 (IntCC.Equal) a b) + (pulley_xeq32 a b)) + +(rule (lower_icmp $I32 (IntCC.NotEqual) a b) + (pulley_xneq32 a b)) + +(rule (lower_icmp $I32 (IntCC.SignedLessThan) a b) + (pulley_xslt32 a b)) + +(rule (lower_icmp $I32 (IntCC.SignedLessThanOrEqual) a b) + (pulley_xslteq32 a b)) + +(rule (lower_icmp $I32 (IntCC.UnsignedLessThan) a b) + (pulley_xult32 a b)) + +(rule (lower_icmp $I32 (IntCC.UnsignedLessThanOrEqual) a b) + (pulley_xulteq32 a b)) + +;; Pulley doesn't have instructions for `>` and `>=`, so we have to reverse the +;; operation. +(rule (lower_icmp ty (IntCC.SignedGreaterThan) a b) + (lower_icmp ty (IntCC.SignedLessThan) b a)) +(rule (lower_icmp ty (IntCC.SignedGreaterThanOrEqual) a b) + (lower_icmp ty (IntCC.SignedLessThanOrEqual) b a)) +(rule (lower_icmp ty (IntCC.UnsignedGreaterThan) a b) + (lower_icmp ty (IntCC.UnsignedLessThan) b a)) +(rule (lower_icmp ty (IntCC.UnsignedGreaterThanOrEqual) a b) + (lower_icmp ty (IntCC.UnsignedLessThanOrEqual) b a)) + +;;;; Rules for `load` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +(rule (lower (has_type ty (load flags addr (offset32 offset)))) + (pulley_load (Amode.RegOffset addr (i32_as_i64 offset)) + ty + flags + (ExtKind.Zero))) + +;;;; Rules for `store` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +(rule (lower (store flags src @ (value_type ty) addr (offset32 offset))) + (side_effect (pulley_store (Amode.RegOffset addr (i32_as_i64 offset)) + src + ty + flags))) diff --git a/cranelift/codegen/src/isa/pulley_shared/lower.rs b/cranelift/codegen/src/isa/pulley_shared/lower.rs new file mode 100644 index 0000000000..9f8754d513 --- /dev/null +++ b/cranelift/codegen/src/isa/pulley_shared/lower.rs @@ -0,0 +1,36 @@ +//! Lowering backend for Pulley. + +pub mod isle; + +use super::{inst::*, PulleyBackend, PulleyTargetKind}; +use crate::{ + ir, + machinst::{lower::*, *}, +}; + +impl

LowerBackend for PulleyBackend

+where + P: PulleyTargetKind, +{ + type MInst = InstAndKind

; + + fn lower(&self, ctx: &mut Lower, ir_inst: ir::Inst) -> Option { + isle::lower(ctx, self, ir_inst) + } + + fn lower_branch( + &self, + ctx: &mut Lower, + ir_inst: ir::Inst, + targets: &[MachLabel], + ) -> Option<()> { + isle::lower_branch(ctx, self, ir_inst, targets) + } + + fn maybe_pinned_reg(&self) -> Option { + // Pulley does not support this feature right now. + None + } + + type FactFlowState = (); +} diff --git a/cranelift/codegen/src/isa/pulley_shared/lower/isle.rs b/cranelift/codegen/src/isa/pulley_shared/lower/isle.rs new file mode 100644 index 0000000000..51e17d29ce --- /dev/null +++ b/cranelift/codegen/src/isa/pulley_shared/lower/isle.rs @@ -0,0 +1,194 @@ +//! ISLE integration glue code for Pulley lowering. + +// Pull in the ISLE generated code. +pub mod generated_code; +use generated_code::MInst; +use inst::InstAndKind; + +// Types that the generated ISLE code uses via `use super::*`. +use crate::ir::{condcodes::*, immediates::*, types::*, *}; +use crate::isa::pulley_shared::{ + abi::*, + inst::{CallInfo, FReg, VReg, WritableFReg, WritableVReg, WritableXReg, XReg}, + *, +}; +use crate::machinst::{ + abi::{ArgPair, RetPair, StackAMode}, + isle::*, + IsTailCall, MachInst, Reg, VCodeConstant, VCodeConstantData, +}; +use alloc::boxed::Box; +use regalloc2::PReg; +type Unit = (); +type VecArgPair = Vec; +type VecRetPair = Vec; +type BoxCallInfo = Box; +type BoxExternalName = Box; + +pub(crate) struct PulleyIsleContext<'a, 'b, I, B> +where + I: VCodeInst, + B: LowerBackend, +{ + pub lower_ctx: &'a mut Lower<'b, I>, + pub backend: &'a B, +} + +impl<'a, 'b, P> PulleyIsleContext<'a, 'b, InstAndKind

, PulleyBackend

> +where + P: PulleyTargetKind, +{ + fn new(lower_ctx: &'a mut Lower<'b, InstAndKind

>, backend: &'a PulleyBackend

) -> Self { + Self { lower_ctx, backend } + } +} + +impl

generated_code::Context for PulleyIsleContext<'_, '_, InstAndKind

, PulleyBackend

> +where + P: PulleyTargetKind, +{ + crate::isle_lower_prelude_methods!(InstAndKind

); + crate::isle_prelude_caller_methods!(PulleyMachineDeps

, PulleyABICallSite

); + + fn gen_return_call( + &mut self, + callee_sig: SigRef, + callee: ExternalName, + distance: RelocDistance, + args: ValueSlice, + ) -> InstOutput { + let caller_conv = isa::CallConv::Tail; + debug_assert_eq!( + self.lower_ctx.abi().call_conv(self.lower_ctx.sigs()), + caller_conv, + "Can only do `return_call`s from within a `tail` calling convention function" + ); + + let call_site = PulleyABICallSite::from_func( + self.lower_ctx.sigs(), + callee_sig, + &callee, + IsTailCall::Yes, + distance, + caller_conv, + self.backend.flags().clone(), + ); + call_site.emit_return_call(self.lower_ctx, args); + + InstOutput::new() + } + + fn gen_return_call_indirect( + &mut self, + callee_sig: SigRef, + callee: Value, + args: ValueSlice, + ) -> InstOutput { + let caller_conv = isa::CallConv::Tail; + debug_assert_eq!( + self.lower_ctx.abi().call_conv(self.lower_ctx.sigs()), + caller_conv, + "Can only do `return_call`s from within a `tail` calling convention function" + ); + + let callee = self.put_in_reg(callee); + + let call_site = PulleyABICallSite::from_ptr( + self.lower_ctx.sigs(), + callee_sig, + callee, + IsTailCall::Yes, + caller_conv, + self.backend.flags().clone(), + ); + call_site.emit_return_call(self.lower_ctx, args); + + InstOutput::new() + } + + fn lower_br_table(&mut self, _index: Reg, _targets: &[MachLabel]) -> Unit { + todo!() + } + + fn vreg_new(&mut self, r: Reg) -> VReg { + VReg::new(r).unwrap() + } + fn writable_vreg_new(&mut self, r: WritableReg) -> WritableVReg { + r.map(|wr| VReg::new(wr).unwrap()) + } + fn writable_vreg_to_vreg(&mut self, arg0: WritableVReg) -> VReg { + arg0.to_reg() + } + fn writable_vreg_to_writable_reg(&mut self, arg0: WritableVReg) -> WritableReg { + arg0.map(|vr| vr.to_reg()) + } + fn vreg_to_reg(&mut self, arg0: VReg) -> Reg { + *arg0 + } + fn xreg_new(&mut self, r: Reg) -> XReg { + XReg::new(r).unwrap() + } + fn writable_xreg_new(&mut self, r: WritableReg) -> WritableXReg { + r.map(|wr| XReg::new(wr).unwrap()) + } + fn writable_xreg_to_xreg(&mut self, arg0: WritableXReg) -> XReg { + arg0.to_reg() + } + fn writable_xreg_to_writable_reg(&mut self, arg0: WritableXReg) -> WritableReg { + arg0.map(|xr| xr.to_reg()) + } + fn xreg_to_reg(&mut self, arg0: XReg) -> Reg { + *arg0 + } + fn freg_new(&mut self, r: Reg) -> FReg { + FReg::new(r).unwrap() + } + fn writable_freg_new(&mut self, r: WritableReg) -> WritableFReg { + r.map(|wr| FReg::new(wr).unwrap()) + } + fn writable_freg_to_freg(&mut self, arg0: WritableFReg) -> FReg { + arg0.to_reg() + } + fn writable_freg_to_writable_reg(&mut self, arg0: WritableFReg) -> WritableReg { + arg0.map(|fr| fr.to_reg()) + } + fn freg_to_reg(&mut self, arg0: FReg) -> Reg { + *arg0 + } + + #[inline] + fn emit(&mut self, arg0: &MInst) -> Unit { + self.lower_ctx.emit(arg0.clone().into()); + } +} + +/// The main entry point for lowering with ISLE. +pub(crate) fn lower

( + lower_ctx: &mut Lower>, + backend: &PulleyBackend

, + inst: Inst, +) -> Option +where + P: PulleyTargetKind, +{ + // TODO: reuse the ISLE context across lowerings so we can reuse its + // internal heap allocations. + let mut isle_ctx = PulleyIsleContext::new(lower_ctx, backend); + generated_code::constructor_lower(&mut isle_ctx, inst) +} + +/// The main entry point for branch lowering with ISLE. +pub(crate) fn lower_branch

( + lower_ctx: &mut Lower>, + backend: &PulleyBackend

, + branch: Inst, + targets: &[MachLabel], +) -> Option<()> +where + P: PulleyTargetKind, +{ + // TODO: reuse the ISLE context across lowerings so we can reuse its + // internal heap allocations. + let mut isle_ctx = PulleyIsleContext::new(lower_ctx, backend); + generated_code::constructor_lower_branch(&mut isle_ctx, branch, targets) +} diff --git a/cranelift/codegen/src/isa/pulley_shared/lower/isle/generated_code.rs b/cranelift/codegen/src/isa/pulley_shared/lower/isle/generated_code.rs new file mode 100644 index 0000000000..5467fc5ea6 --- /dev/null +++ b/cranelift/codegen/src/isa/pulley_shared/lower/isle/generated_code.rs @@ -0,0 +1,17 @@ +// See https://github.com/rust-lang/rust/issues/47995: we cannot use `#![...]` attributes inside of +// the generated ISLE source below because we include!() it. We must include!() it because its path +// depends on an environment variable; and also because of this, we can't do the `#[path = "..."] +// mod generated_code;` trick either. +#![allow( + dead_code, + unreachable_code, + unreachable_patterns, + unused_imports, + unused_variables, + non_snake_case, + unused_mut, + irrefutable_let_patterns, + clippy::all +)] + +include!(concat!(env!("ISLE_DIR"), "/isle_pulley_shared.rs")); diff --git a/cranelift/codegen/src/isa/pulley_shared/mod.rs b/cranelift/codegen/src/isa/pulley_shared/mod.rs new file mode 100644 index 0000000000..50e8c83749 --- /dev/null +++ b/cranelift/codegen/src/isa/pulley_shared/mod.rs @@ -0,0 +1,281 @@ +//! Common support compiling to either 32- or 64-bit Pulley bytecode. + +mod abi; +mod inst; +mod lower; +mod settings; + +use self::inst::EmitInfo; +use super::{Builder as IsaBuilder, FunctionAlignment}; +use crate::{ + dominator_tree::DominatorTree, + ir, + isa::{self, OwnedTargetIsa, TargetIsa}, + machinst::{self, CompiledCodeStencil, MachInst, SigSet, VCode}, + result::CodegenResult, + settings::{self as shared_settings, Flags}, + MachTextSectionBuilder, TextSectionBuilder, +}; +use alloc::boxed::Box; +use alloc::vec::Vec; +use core::fmt::Debug; +use core::marker::PhantomData; +use cranelift_control::ControlPlane; +use target_lexicon::{Architecture, Triple}; + +pub use settings::Flags as PulleyFlags; + +/// A trait to abstract over the different kinds of Pulley targets that exist +/// (32- vs 64-bit). +pub trait PulleyTargetKind: 'static + Clone + Debug + Default + Send + Sync { + // Required types and methods. + + fn pointer_width() -> PointerWidth; + + // Provided methods. Don't overwrite. + + fn name() -> &'static str { + match Self::pointer_width() { + PointerWidth::PointerWidth32 => "pulley32", + PointerWidth::PointerWidth64 => "pulley64", + } + } +} + +pub enum PointerWidth { + PointerWidth32, + PointerWidth64, +} + +impl PointerWidth { + pub fn bits(self) -> u8 { + match self { + PointerWidth::PointerWidth32 => 32, + PointerWidth::PointerWidth64 => 64, + } + } +} + +/// A Pulley backend. +pub struct PulleyBackend

+where + P: PulleyTargetKind, +{ + pulley_target: PhantomData

, + triple: Triple, + flags: Flags, + isa_flags: PulleyFlags, +} + +impl

core::fmt::Debug for PulleyBackend

+where + P: PulleyTargetKind, +{ + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + let PulleyBackend { + pulley_target: _, + triple, + flags: _, + isa_flags: _, + } = self; + f.debug_struct("PulleyBackend") + .field("triple", triple) + .finish_non_exhaustive() + } +} + +impl

core::fmt::Display for PulleyBackend

+where + P: PulleyTargetKind, +{ + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + core::fmt::Debug::fmt(self, f) + } +} + +impl

PulleyBackend

+where + P: PulleyTargetKind, +{ + /// Create a new pulley backend with the given (shared) flags. + pub fn new_with_flags( + triple: Triple, + flags: shared_settings::Flags, + isa_flags: PulleyFlags, + ) -> Self { + PulleyBackend { + pulley_target: PhantomData, + triple, + flags, + isa_flags, + } + } + + /// This performs lowering to VCode, register-allocates the code, computes block layout and + /// finalizes branches. The result is ready for binary emission. + fn compile_vcode( + &self, + func: &ir::Function, + domtree: &DominatorTree, + ctrl_plane: &mut ControlPlane, + ) -> CodegenResult<(VCode>, regalloc2::Output)> { + let emit_info = EmitInfo::new(self.flags.clone(), self.isa_flags.clone()); + let sigs = SigSet::new::>(func, &self.flags)?; + let abi = abi::PulleyCallee::new(func, self, &self.isa_flags, &sigs)?; + machinst::compile::compile::(func, domtree, self, abi, emit_info, sigs, ctrl_plane) + } +} + +impl

TargetIsa for PulleyBackend

+where + P: PulleyTargetKind, +{ + fn name(&self) -> &'static str { + P::name() + } + + fn triple(&self) -> &Triple { + &self.triple + } + + fn flags(&self) -> &Flags { + &self.flags + } + + fn isa_flags(&self) -> Vec { + self.isa_flags.iter().collect() + } + + fn dynamic_vector_bytes(&self, _dynamic_ty: ir::Type) -> u32 { + 512 + } + + fn page_size_align_log2(&self) -> u8 { + // Claim 64KiB pages to be conservative. + 16 + } + + fn compile_function( + &self, + func: &ir::Function, + domtree: &DominatorTree, + want_disasm: bool, + ctrl_plane: &mut cranelift_control::ControlPlane, + ) -> CodegenResult { + let (vcode, regalloc_result) = self.compile_vcode(func, domtree, ctrl_plane)?; + + let want_disasm = + want_disasm || (cfg!(feature = "trace-log") && log::log_enabled!(log::Level::Debug)); + let emit_result = vcode.emit(®alloc_result, want_disasm, &self.flags, ctrl_plane); + let frame_size = emit_result.frame_size; + let value_labels_ranges = emit_result.value_labels_ranges; + let buffer = emit_result.buffer; + let sized_stackslot_offsets = emit_result.sized_stackslot_offsets; + let dynamic_stackslot_offsets = emit_result.dynamic_stackslot_offsets; + + if let Some(disasm) = emit_result.disasm.as_ref() { + log::debug!("disassembly:\n{}", disasm); + } + + Ok(CompiledCodeStencil { + buffer, + frame_size, + vcode: emit_result.disasm, + value_labels_ranges, + sized_stackslot_offsets, + dynamic_stackslot_offsets, + bb_starts: emit_result.bb_offsets, + bb_edges: emit_result.bb_edges, + }) + } + + fn emit_unwind_info( + &self, + _result: &crate::CompiledCode, + _kind: super::unwind::UnwindInfoKind, + ) -> CodegenResult> { + // TODO: actually support unwind info? + Ok(None) + } + + fn text_section_builder( + &self, + num_labeled_funcs: usize, + ) -> alloc::boxed::Box { + Box::new(MachTextSectionBuilder::>::new( + num_labeled_funcs, + )) + } + + fn function_alignment(&self) -> FunctionAlignment { + inst::InstAndKind::

::function_alignment() + } + + fn has_native_fma(&self) -> bool { + false + } + + fn has_x86_blendv_lowering(&self, _ty: ir::Type) -> bool { + false + } + + fn has_x86_pshufb_lowering(&self) -> bool { + false + } + + fn has_x86_pmulhrsw_lowering(&self) -> bool { + false + } + + fn has_x86_pmaddubsw_lowering(&self) -> bool { + false + } +} + +/// Create a new Pulley ISA builder. +pub fn isa_builder(triple: Triple) -> IsaBuilder { + assert!(matches!( + triple.architecture, + Architecture::Pulley32 | Architecture::Pulley64 + )); + let constructor = match triple.architecture { + Architecture::Pulley32 => isa_constructor_32, + Architecture::Pulley64 => isa_constructor_64, + _ => unreachable!(), + }; + IsaBuilder { + triple, + setup: self::settings::builder(), + constructor, + } +} + +fn isa_constructor_32( + triple: Triple, + shared_flags: Flags, + builder: &shared_settings::Builder, +) -> CodegenResult { + use crate::settings::Configurable; + let mut builder = builder.clone(); + builder.set("pointer_width", "pointer32").unwrap(); + let isa_flags = PulleyFlags::new(&shared_flags, &builder); + + let backend = + PulleyBackend::::new_with_flags(triple, shared_flags, isa_flags); + Ok(backend.wrapped()) +} + +fn isa_constructor_64( + triple: Triple, + shared_flags: Flags, + builder: &shared_settings::Builder, +) -> CodegenResult { + use crate::settings::Configurable; + let mut builder = builder.clone(); + builder.set("pointer_width", "pointer64").unwrap(); + let isa_flags = PulleyFlags::new(&shared_flags, &builder); + + let backend = + PulleyBackend::::new_with_flags(triple, shared_flags, isa_flags); + Ok(backend.wrapped()) +} diff --git a/cranelift/codegen/src/isa/pulley_shared/settings.rs b/cranelift/codegen/src/isa/pulley_shared/settings.rs new file mode 100644 index 0000000000..937384db14 --- /dev/null +++ b/cranelift/codegen/src/isa/pulley_shared/settings.rs @@ -0,0 +1,16 @@ +//! Pulley settings. + +// The generated settings include some dead code. +#![allow(dead_code)] + +use crate::{ + machinst::IsaFlags, + settings::{self, detail, Builder, Value}, +}; +use core::fmt; + +// Include code generated by `cranelift-codegen/meta/src/gen_settings.rs:`. This file contains a +// public `Flags` struct with an impl for all of the settings defined in +include!(concat!(env!("OUT_DIR"), "/settings-pulley.rs")); + +impl IsaFlags for Flags {} diff --git a/cranelift/codegen/src/isa/riscv64/inst/emit.rs b/cranelift/codegen/src/isa/riscv64/inst/emit.rs index 31af226586..8093f85099 100644 --- a/cranelift/codegen/src/isa/riscv64/inst/emit.rs +++ b/cranelift/codegen/src/isa/riscv64/inst/emit.rs @@ -421,7 +421,6 @@ impl Inst { sink.add_trap(trap_code); sink.put2(0x0000); } - // c.addi16sp // // c.addi16sp shares the opcode with c.lui, but has a destination field of x2. diff --git a/cranelift/codegen/src/isa/riscv64/lower/isle.rs b/cranelift/codegen/src/isa/riscv64/lower/isle.rs index 6280d52141..e1e79e0759 100644 --- a/cranelift/codegen/src/isa/riscv64/lower/isle.rs +++ b/cranelift/codegen/src/isa/riscv64/lower/isle.rs @@ -7,6 +7,7 @@ use generated_code::MInst; // Types that the generated ISLE code uses via `use super::*`. use self::generated_code::{FpuOPWidth, VecAluOpRR, VecLmul}; +use crate::isa; use crate::isa::riscv64::abi::Riscv64ABICallSite; use crate::isa::riscv64::lower::args::{ FReg, VReg, WritableFReg, WritableVReg, WritableXReg, XReg, @@ -23,7 +24,6 @@ use crate::{ isa::riscv64::inst::*, machinst::{ArgPair, InstOutput, IsTailCall}, }; -use crate::{isa, isle_common_prelude_methods}; use regalloc2::PReg; use std::boxed::Box; use std::vec::Vec; diff --git a/cranelift/codegen/src/isa/s390x/lower/isle.rs b/cranelift/codegen/src/isa/s390x/lower/isle.rs index 6934e8bed6..f205fe74f2 100644 --- a/cranelift/codegen/src/isa/s390x/lower/isle.rs +++ b/cranelift/codegen/src/isa/s390x/lower/isle.rs @@ -12,7 +12,6 @@ use crate::isa::s390x::inst::{ UImm16Shifted, UImm32Shifted, WritableRegPair, }; use crate::isa::s390x::S390xBackend; -use crate::isle_common_prelude_methods; use crate::machinst::isle::*; use crate::machinst::{MachLabel, Reg}; use crate::{ diff --git a/cranelift/codegen/src/isa/x64/lower/isle.rs b/cranelift/codegen/src/isa/x64/lower/isle.rs index a79016375a..34bfb93f99 100644 --- a/cranelift/codegen/src/isa/x64/lower/isle.rs +++ b/cranelift/codegen/src/isa/x64/lower/isle.rs @@ -2,7 +2,7 @@ // Pull in the ISLE generated code. pub(crate) mod generated_code; -use crate::{ir::types, ir::AtomicRmwOp, isa, isle_common_prelude_methods}; +use crate::{ir::types, ir::AtomicRmwOp, isa}; use generated_code::{Context, MInst, RegisterClass}; // Types that the generated ISLE code uses via `use super::*`. diff --git a/cranelift/codegen/src/isle_prelude.rs b/cranelift/codegen/src/isle_prelude.rs index 82ff087aaa..dca232e816 100644 --- a/cranelift/codegen/src/isle_prelude.rs +++ b/cranelift/codegen/src/isle_prelude.rs @@ -23,6 +23,11 @@ macro_rules! isle_common_prelude_methods { x.into() } + #[inline] + fn u16_as_i16(&mut self, x: u16) -> i16 { + x as i16 + } + #[inline] fn u16_as_u64(&mut self, x: u16) -> u64 { x.into() @@ -43,6 +48,11 @@ macro_rules! isle_common_prelude_methods { x as i32 } + #[inline] + fn u64_as_i64(&mut self, x: u64) -> i64 { + x as i64 + } + #[inline] fn i32_as_i64(&mut self, x: i32) -> i64 { x.into() @@ -904,6 +914,34 @@ macro_rules! isle_common_prelude_methods { val as i8 } + fn u64_as_u8(&mut self, val: u64) -> u8 { + val as u8 + } + + fn u64_as_u16(&mut self, val: u64) -> u16 { + val as u16 + } + + fn u16_try_from_u64(&mut self, val: u64) -> Option { + u16::try_from(val).ok() + } + + fn u32_try_from_u64(&mut self, val: u64) -> Option { + u32::try_from(val).ok() + } + + fn i8_try_from_u64(&mut self, val: u64) -> Option { + i8::try_from(val).ok() + } + + fn i16_try_from_u64(&mut self, val: u64) -> Option { + i16::try_from(val).ok() + } + + fn i32_try_from_u64(&mut self, val: u64) -> Option { + i32::try_from(val).ok() + } + fn u128_replicated_u64(&mut self, val: u128) -> Option { let low64 = val as u64 as u128; if (low64 | (low64 << 64)) == val { diff --git a/cranelift/codegen/src/machinst/buffer.rs b/cranelift/codegen/src/machinst/buffer.rs index 0a376d5891..738a7f24a7 100644 --- a/cranelift/codegen/src/machinst/buffer.rs +++ b/cranelift/codegen/src/machinst/buffer.rs @@ -810,7 +810,12 @@ impl MachBuffer { assert!(self.cur_offset() == start); debug_assert!(end > start); assert!(!self.pending_fixup_records.is_empty()); - debug_assert!(inverted.len() == (end - start) as usize); + debug_assert!( + inverted.len() == (end - start) as usize, + "branch length = {}, but inverted length = {}", + end - start, + inverted.len() + ); let fixup = self.pending_fixup_records.len() - 1; let inverted = Some(SmallVec::from(inverted)); self.lazily_clear_labels_at_tail(); @@ -1423,7 +1428,7 @@ impl MachBuffer { self.emit_veneer(label, offset, kind); } else { let slice = &mut self.data[start..end]; - trace!("patching in-range!"); + trace!("patching in-range! slice = {slice:?}; offset = {offset:#x}; label_offset = {label_offset:#x}"); kind.patch(slice, offset, label_offset); } } else { @@ -1706,6 +1711,14 @@ impl MachBuffer { } } +impl Extend for MachBuffer { + fn extend>(&mut self, iter: T) { + for b in iter { + self.put1(b); + } + } +} + impl MachBufferFinalized { /// Get a list of source location mapping tuples in sorted-by-start-offset order. pub fn get_srclocs_sorted(&self) -> &[T::MachSrcLocType] { diff --git a/cranelift/codegen/src/machinst/isle.rs b/cranelift/codegen/src/machinst/isle.rs index d4c5612451..aca90549a8 100644 --- a/cranelift/codegen/src/machinst/isle.rs +++ b/cranelift/codegen/src/machinst/isle.rs @@ -45,7 +45,10 @@ pub enum RangeView { #[doc(hidden)] macro_rules! isle_lower_prelude_methods { () => { - isle_common_prelude_methods!(); + crate::isle_lower_prelude_methods!(MInst); + }; + ($inst:ty) => { + crate::isle_common_prelude_methods!(); #[inline] fn value_type(&mut self, val: Value) -> Type { @@ -530,6 +533,7 @@ macro_rules! isle_lower_prelude_methods { self.lower_ctx .abi() .sized_stackslot_addr(stack_slot, offset, dst) + .into() } fn abi_dynamic_stackslot_addr( @@ -542,7 +546,10 @@ macro_rules! isle_lower_prelude_methods { .abi() .dynamic_stackslot_offsets() .is_valid(stack_slot)); - self.lower_ctx.abi().dynamic_stackslot_addr(stack_slot, dst) + self.lower_ctx + .abi() + .dynamic_stackslot_addr(stack_slot, dst) + .into() } fn real_reg_to_reg(&mut self, reg: RealReg) -> Reg { @@ -597,7 +604,7 @@ macro_rules! isle_lower_prelude_methods { #[inline] fn gen_move(&mut self, ty: Type, dst: WritableReg, src: Reg) -> MInst { - MInst::gen_move(dst, src, ty) + <$inst>::gen_move(dst, src, ty).into() } /// Generate the return instruction. diff --git a/cranelift/codegen/src/machinst/reg.rs b/cranelift/codegen/src/machinst/reg.rs index 375be30c1a..3a6b624651 100644 --- a/cranelift/codegen/src/machinst/reg.rs +++ b/cranelift/codegen/src/machinst/reg.rs @@ -179,6 +179,11 @@ impl Writable { self.reg } + /// Get a mutable borrow of the underlying register. + pub fn reg_mut(&mut self) -> &mut T { + &mut self.reg + } + /// Map the underlying register to another value or type. pub fn map(self, f: impl Fn(T) -> U) -> Writable { Writable { reg: f(self.reg) } diff --git a/cranelift/codegen/src/prelude.isle b/cranelift/codegen/src/prelude.isle index 54d8428230..c205d8fae7 100644 --- a/cranelift/codegen/src/prelude.isle +++ b/cranelift/codegen/src/prelude.isle @@ -80,10 +80,37 @@ (extern constructor u8_as_u64 u8_as_u64) (convert u8 u64 u8_as_u64) +(decl pure u16_as_i16 (u16) i16) +(extern constructor u16_as_i16 u16_as_i16) + (decl pure u16_as_u64 (u16) u64) (extern constructor u16_as_u64 u16_as_u64) (convert u16 u64 u16_as_u64) +(decl pure u64_as_u8 (u64) u8) +(extern constructor u64_as_u8 u64_as_u8) + +(decl pure u64_as_u16 (u64) u16) +(extern constructor u64_as_u16 u64_as_u16) + +(decl pure u64_as_i64 (u64) i64) +(extern constructor u64_as_i64 u64_as_i64) + +(decl pure partial u16_try_from_u64 (u64) u16) +(extern constructor u16_try_from_u64 u16_try_from_u64) + +(decl pure partial u32_try_from_u64 (u64) u32) +(extern constructor u32_try_from_u64 u32_try_from_u64) + +(decl pure partial i8_try_from_u64 (u64) i8) +(extern constructor i8_try_from_u64 i8_try_from_u64) + +(decl pure partial i16_try_from_u64 (u64) i16) +(extern constructor i16_try_from_u64 i16_try_from_u64) + +(decl pure partial i32_try_from_u64 (u64) i32) +(extern constructor i32_try_from_u64 i32_try_from_u64) + (decl pure u32_as_u64 (u32) u64) (extern constructor u32_as_u64 u32_as_u64) (convert u32 u64 u32_as_u64) diff --git a/cranelift/filetests/Cargo.toml b/cranelift/filetests/Cargo.toml index 74fd10e7bb..b6e0df9814 100644 --- a/cranelift/filetests/Cargo.toml +++ b/cranelift/filetests/Cargo.toml @@ -37,3 +37,4 @@ serde = { workspace = true } serde_derive = { workspace = true } cranelift.workspace = true smallvec = { workspace = true } +pulley-interpreter = { workspace = true, features = ["disas", "std"] } diff --git a/cranelift/filetests/filetests/isa/pulley32/brif-icmp.clif b/cranelift/filetests/filetests/isa/pulley32/brif-icmp.clif new file mode 100644 index 0000000000..e40370d9f2 --- /dev/null +++ b/cranelift/filetests/filetests/isa/pulley32/brif-icmp.clif @@ -0,0 +1,345 @@ +test compile precise-output +target pulley32 + +function %brif_icmp_eq(i32, i32) -> i32 { +block0(v0: i32, v1: i32): + v2 = icmp eq v0, v1 + brif v2, block2, block1 + +block1: + v3 = iconst.i32 1 + return v3 + +block2: + v4 = iconst.i32 2 + return v4 +} + +; VCode: +; block0: +; br_if_xeq32 x0, x1, label2; jump label1 +; block1: +; x0 = xconst8 1 +; ret +; block2: +; x0 = xconst8 2 +; ret +; +; Disassembled: +; 0: 05 00 01 0b 00 00 00 br_if_xeq32 x0, x1, 0xb // target = 0xb +; 7: 0e 00 01 xconst8 x0, 1 +; a: 00 ret +; b: 0e 00 02 xconst8 x0, 2 +; e: 00 ret + +function %brif_icmp_ne(i32, i32) -> i32 { +block0(v0: i32, v1: i32): + v2 = icmp ne v0, v1 + brif v2, block2, block1 + +block1: + v3 = iconst.i32 1 + return v3 + +block2: + v4 = iconst.i32 2 + return v4 +} + +; VCode: +; block0: +; br_if_xneq32 x0, x1, label2; jump label1 +; block1: +; x0 = xconst8 1 +; ret +; block2: +; x0 = xconst8 2 +; ret +; +; Disassembled: +; 0: 06 00 01 0b 00 00 00 br_if_xneq32 x0, x1, 0xb // target = 0xb +; 7: 0e 00 01 xconst8 x0, 1 +; a: 00 ret +; b: 0e 00 02 xconst8 x0, 2 +; e: 00 ret + +function %brif_icmp_ult(i32, i32) -> i32 { +block0(v0: i32, v1: i32): + v2 = icmp ult v0, v1 + brif v2, block2, block1 + +block1: + v3 = iconst.i32 1 + return v3 + +block2: + v4 = iconst.i32 2 + return v4 +} + +; VCode: +; block0: +; br_if_xult32 x0, x1, label2; jump label1 +; block1: +; x0 = xconst8 1 +; ret +; block2: +; x0 = xconst8 2 +; ret +; +; Disassembled: +; 0: 09 00 01 0b 00 00 00 br_if_xult32 x0, x1, 0xb // target = 0xb +; 7: 0e 00 01 xconst8 x0, 1 +; a: 00 ret +; b: 0e 00 02 xconst8 x0, 2 +; e: 00 ret + +function %brif_icmp_ule(i32, i32) -> i32 { +block0(v0: i32, v1: i32): + v2 = icmp ule v0, v1 + brif v2, block2, block1 + +block1: + v3 = iconst.i32 1 + return v3 + +block2: + v4 = iconst.i32 2 + return v4 +} + +; VCode: +; block0: +; br_if_xulteq32 x0, x1, label2; jump label1 +; block1: +; x0 = xconst8 1 +; ret +; block2: +; x0 = xconst8 2 +; ret +; +; Disassembled: +; 0: 0a 00 01 0b 00 00 00 br_if_xulteq32 x0, x1, 0xb // target = 0xb +; 7: 0e 00 01 xconst8 x0, 1 +; a: 00 ret +; b: 0e 00 02 xconst8 x0, 2 +; e: 00 ret + +function %brif_icmp_slt(i32, i32) -> i32 { +block0(v0: i32, v1: i32): + v2 = icmp slt v0, v1 + brif v2, block2, block1 + +block1: + v3 = iconst.i32 1 + return v3 + +block2: + v4 = iconst.i32 2 + return v4 +} + +; VCode: +; block0: +; br_if_xslt32 x0, x1, label2; jump label1 +; block1: +; x0 = xconst8 1 +; ret +; block2: +; x0 = xconst8 2 +; ret +; +; Disassembled: +; 0: 07 00 01 0b 00 00 00 br_if_xslt32 x0, x1, 0xb // target = 0xb +; 7: 0e 00 01 xconst8 x0, 1 +; a: 00 ret +; b: 0e 00 02 xconst8 x0, 2 +; e: 00 ret + +function %brif_icmp_sle(i32, i32) -> i32 { +block0(v0: i32, v1: i32): + v2 = icmp sle v0, v1 + brif v2, block2, block1 + +block1: + v3 = iconst.i32 1 + return v3 + +block2: + v4 = iconst.i32 2 + return v4 +} + +; VCode: +; block0: +; br_if_xslteq32 x0, x1, label2; jump label1 +; block1: +; x0 = xconst8 1 +; ret +; block2: +; x0 = xconst8 2 +; ret +; +; Disassembled: +; 0: 08 00 01 0b 00 00 00 br_if_xslteq32 x0, x1, 0xb // target = 0xb +; 7: 0e 00 01 xconst8 x0, 1 +; a: 00 ret +; b: 0e 00 02 xconst8 x0, 2 +; e: 00 ret + +function %brif_icmp_ugt(i32, i32) -> i32 { +block0(v0: i32, v1: i32): + v2 = icmp ugt v0, v1 + brif v2, block2, block1 + +block1: + v3 = iconst.i32 1 + return v3 + +block2: + v4 = iconst.i32 2 + return v4 +} + +; VCode: +; block0: +; br_if_xult32 x1, x0, label2; jump label1 +; block1: +; x0 = xconst8 1 +; ret +; block2: +; x0 = xconst8 2 +; ret +; +; Disassembled: +; 0: 09 01 00 0b 00 00 00 br_if_xult32 x1, x0, 0xb // target = 0xb +; 7: 0e 00 01 xconst8 x0, 1 +; a: 00 ret +; b: 0e 00 02 xconst8 x0, 2 +; e: 00 ret + +function %brif_icmp_uge(i32, i32) -> i32 { +block0(v0: i32, v1: i32): + v2 = icmp uge v0, v1 + brif v2, block2, block1 + +block1: + v3 = iconst.i32 1 + return v3 + +block2: + v4 = iconst.i32 2 + return v4 +} + +; VCode: +; block0: +; br_if_xulteq32 x1, x0, label2; jump label1 +; block1: +; x0 = xconst8 1 +; ret +; block2: +; x0 = xconst8 2 +; ret +; +; Disassembled: +; 0: 0a 01 00 0b 00 00 00 br_if_xulteq32 x1, x0, 0xb // target = 0xb +; 7: 0e 00 01 xconst8 x0, 1 +; a: 00 ret +; b: 0e 00 02 xconst8 x0, 2 +; e: 00 ret + +function %brif_icmp_sgt(i32, i32) -> i32 { +block0(v0: i32, v1: i32): + v2 = icmp sgt v0, v1 + brif v2, block2, block1 + +block1: + v3 = iconst.i32 1 + return v3 + +block2: + v4 = iconst.i32 2 + return v4 +} + +; VCode: +; block0: +; br_if_xslt32 x1, x0, label2; jump label1 +; block1: +; x0 = xconst8 1 +; ret +; block2: +; x0 = xconst8 2 +; ret +; +; Disassembled: +; 0: 07 01 00 0b 00 00 00 br_if_xslt32 x1, x0, 0xb // target = 0xb +; 7: 0e 00 01 xconst8 x0, 1 +; a: 00 ret +; b: 0e 00 02 xconst8 x0, 2 +; e: 00 ret + +function %brif_icmp_sge(i32, i32) -> i32 { +block0(v0: i32, v1: i32): + v2 = icmp sge v0, v1 + brif v2, block2, block1 + +block1: + v3 = iconst.i32 1 + return v3 + +block2: + v4 = iconst.i32 2 + return v4 +} + +; VCode: +; block0: +; br_if_xslteq32 x1, x0, label2; jump label1 +; block1: +; x0 = xconst8 1 +; ret +; block2: +; x0 = xconst8 2 +; ret +; +; Disassembled: +; 0: 08 01 00 0b 00 00 00 br_if_xslteq32 x1, x0, 0xb // target = 0xb +; 7: 0e 00 01 xconst8 x0, 1 +; a: 00 ret +; b: 0e 00 02 xconst8 x0, 2 +; e: 00 ret + +function %brif_uextend_icmp_eq(i32, i32) -> i32 { +block0(v0: i32, v1: i32): + v2 = icmp eq v0, v1 + v3 = uextend.i32 v2 + brif v3, block2, block1 + +block1: + v4 = iconst.i32 1 + return v4 + +block2: + v5 = iconst.i32 2 + return v5 +} + +; VCode: +; block0: +; br_if_xeq32 x0, x1, label2; jump label1 +; block1: +; x0 = xconst8 1 +; ret +; block2: +; x0 = xconst8 2 +; ret +; +; Disassembled: +; 0: 05 00 01 0b 00 00 00 br_if_xeq32 x0, x1, 0xb // target = 0xb +; 7: 0e 00 01 xconst8 x0, 1 +; a: 00 ret +; b: 0e 00 02 xconst8 x0, 2 +; e: 00 ret + diff --git a/cranelift/filetests/filetests/isa/pulley32/brif.clif b/cranelift/filetests/filetests/isa/pulley32/brif.clif new file mode 100644 index 0000000000..1cbff38e40 --- /dev/null +++ b/cranelift/filetests/filetests/isa/pulley32/brif.clif @@ -0,0 +1,253 @@ +test compile precise-output +target pulley32 + +function %brif_i8(i8) -> i8 { +block0(v0: i8): + brif v0, block1, block2 + +block1: + v1 = iconst.i8 1 + return v1 + +block2: + v2 = iconst.i8 0 + return v2 +} + +; VCode: +; block0: +; br_if x0, label2; jump label1 +; block1: +; x0 = xconst8 0 +; ret +; block2: +; x0 = xconst8 1 +; ret +; +; Disassembled: +; 0: 03 00 0a 00 00 00 br_if x0, 0xa // target = 0xa +; 6: 0e 00 00 xconst8 x0, 0 +; 9: 00 ret +; a: 0e 00 01 xconst8 x0, 1 +; d: 00 ret + +function %brif_i16(i16) -> i8 { +block0(v0: i16): + brif v0, block1, block2 + +block1: + v1 = iconst.i8 1 + return v1 + +block2: + v2 = iconst.i8 0 + return v2 +} + +; VCode: +; block0: +; br_if x0, label2; jump label1 +; block1: +; x0 = xconst8 0 +; ret +; block2: +; x0 = xconst8 1 +; ret +; +; Disassembled: +; 0: 03 00 0a 00 00 00 br_if x0, 0xa // target = 0xa +; 6: 0e 00 00 xconst8 x0, 0 +; 9: 00 ret +; a: 0e 00 01 xconst8 x0, 1 +; d: 00 ret + +function %brif_i32(i32) -> i8 { +block0(v0: i32): + brif v0, block1, block2 + +block1: + v1 = iconst.i8 1 + return v1 + +block2: + v2 = iconst.i8 0 + return v2 +} + +; VCode: +; block0: +; br_if x0, label2; jump label1 +; block1: +; x0 = xconst8 0 +; ret +; block2: +; x0 = xconst8 1 +; ret +; +; Disassembled: +; 0: 03 00 0a 00 00 00 br_if x0, 0xa // target = 0xa +; 6: 0e 00 00 xconst8 x0, 0 +; 9: 00 ret +; a: 0e 00 01 xconst8 x0, 1 +; d: 00 ret + +function %brif_i64(i64) -> i8 { +block0(v0: i64): + brif v0, block1, block2 + +block1: + v1 = iconst.i8 1 + return v1 + +block2: + v2 = iconst.i8 0 + return v2 +} + +; VCode: +; block0: +; br_if x0, label2; jump label1 +; block1: +; x0 = xconst8 0 +; ret +; block2: +; x0 = xconst8 1 +; ret +; +; Disassembled: +; 0: 03 00 0a 00 00 00 br_if x0, 0xa // target = 0xa +; 6: 0e 00 00 xconst8 x0, 0 +; 9: 00 ret +; a: 0e 00 01 xconst8 x0, 1 +; d: 00 ret + +function %brif_icmp_i8(i8, i8) -> i8 { +block0(v0: i8, v1: i8): + v2 = icmp eq v0, v1 + brif v2, block1, block2 + +block1: + v3 = iconst.i8 1 + return v3 + +block2: + v4 = iconst.i8 0 + return v4 +} + +; VCode: +; block0: +; x5 = xeq32 x0, x1 +; br_if x5, label2; jump label1 +; block1: +; x0 = xconst8 0 +; ret +; block2: +; x0 = xconst8 1 +; ret +; +; Disassembled: +; 0: 1a 05 00 01 xeq32 x5, x0, x1 +; 4: 03 05 0a 00 00 00 br_if x5, 0xa // target = 0xe +; a: 0e 00 00 xconst8 x0, 0 +; d: 00 ret +; e: 0e 00 01 xconst8 x0, 1 +; 11: 00 ret + +function %brif_icmp_i16(i16, i16) -> i8 { +block0(v0: i16, v1: i16): + v2 = icmp ne v0, v1 + brif v2, block1, block2 + +block1: + v3 = iconst.i8 1 + return v3 + +block2: + v4 = iconst.i8 0 + return v4 +} + +; VCode: +; block0: +; x5 = xneq32 x0, x1 +; br_if x5, label2; jump label1 +; block1: +; x0 = xconst8 0 +; ret +; block2: +; x0 = xconst8 1 +; ret +; +; Disassembled: +; 0: 1b 05 00 01 xneq32 x5, x0, x1 +; 4: 03 05 0a 00 00 00 br_if x5, 0xa // target = 0xe +; a: 0e 00 00 xconst8 x0, 0 +; d: 00 ret +; e: 0e 00 01 xconst8 x0, 1 +; 11: 00 ret + +function %brif_icmp_i32(i32, i32) -> i8 { +block0(v0: i32, v1: i32): + v2 = icmp slt v0, v1 + brif v2, block1, block2 + +block1: + v3 = iconst.i8 1 + return v3 + +block2: + v4 = iconst.i8 0 + return v4 +} + +; VCode: +; block0: +; br_if_xslt32 x0, x1, label2; jump label1 +; block1: +; x0 = xconst8 0 +; ret +; block2: +; x0 = xconst8 1 +; ret +; +; Disassembled: +; 0: 07 00 01 0b 00 00 00 br_if_xslt32 x0, x1, 0xb // target = 0xb +; 7: 0e 00 00 xconst8 x0, 0 +; a: 00 ret +; b: 0e 00 01 xconst8 x0, 1 +; e: 00 ret + +function %brif_icmp_i64(i64, i64) -> i8 { +block0(v0: i64, v1: i64): + v2 = icmp uge v0, v1 + brif v2, block1, block2 + +block1: + v3 = iconst.i8 1 + return v3 + +block2: + v4 = iconst.i8 0 + return v4 +} + +; VCode: +; block0: +; x5 = xulteq64 x1, x0 +; br_if x5, label2; jump label1 +; block1: +; x0 = xconst8 0 +; ret +; block2: +; x0 = xconst8 1 +; ret +; +; Disassembled: +; 0: 19 05 01 00 xulteq64 x5, x1, x0 +; 4: 03 05 0a 00 00 00 br_if x5, 0xa // target = 0xe +; a: 0e 00 00 xconst8 x0, 0 +; d: 00 ret +; e: 0e 00 01 xconst8 x0, 1 +; 11: 00 ret + diff --git a/cranelift/filetests/filetests/isa/pulley32/call.clif b/cranelift/filetests/filetests/isa/pulley32/call.clif new file mode 100644 index 0000000000..15e22724c5 --- /dev/null +++ b/cranelift/filetests/filetests/isa/pulley32/call.clif @@ -0,0 +1,409 @@ +test compile precise-output +target pulley32 + +function %colocated_args_i64_rets_i64() -> i64 { + fn0 = colocated %g(i64) -> i64 + +block0: + v0 = iconst.i64 0 + v1 = call fn0(v0) + v2 = iconst.i64 1 + return v2 +} + +; VCode: +; x35 = xconst8 -16 +; x32 = xadd32 x32, x35 +; store64 sp+8, x33 // flags = notrap aligned +; store64 sp+0, x34 // flags = notrap aligned +; x34 = xmov x32 +; block0: +; x0 = xconst8 0 +; call TestCase(%g), CallInfo { uses: [CallArgPair { vreg: p0i, preg: p0i }], defs: [CallRetPair { vreg: Writable { reg: p0i }, preg: p0i }], clobbers: PRegSet { bits: [1204185006387685820006398, 4294967295] }, callee_pop_size: 0 } +; x0 = xconst8 1 +; x33 = load64_u sp+8 // flags = notrap aligned +; x34 = load64_u sp+0 // flags = notrap aligned +; x35 = xconst8 16 +; x32 = xadd32 x32, x35 +; ret +; +; Disassembled: +; 0: 0e 23 f0 xconst8 spilltmp0, -16 +; 3: 12 20 20 23 xadd32 sp, sp, spilltmp0 +; 7: 2c 20 08 21 store64_offset8 sp, 8, lr +; b: 2a 20 22 store64 sp, fp +; e: 0b 22 20 xmov fp, sp +; 11: 0e 00 00 xconst8 x0, 0 +; 14: 01 00 00 00 00 call 0x0 // target = 0x14 +; 19: 0e 00 01 xconst8 x0, 1 +; 1c: 25 21 20 08 load64_offset8 lr, sp, 8 +; 20: 22 22 20 load64 fp, sp +; 23: 0e 23 10 xconst8 spilltmp0, 16 +; 26: 12 20 20 23 xadd32 sp, sp, spilltmp0 +; 2a: 00 ret + +function %colocated_args_i32_rets_i32() -> i32 { + fn0 = colocated %g(i32) -> i32 + +block0: + v0 = iconst.i32 0 + v1 = call fn0(v0) + v2 = iconst.i32 1 + return v2 +} + +; VCode: +; x35 = xconst8 -16 +; x32 = xadd32 x32, x35 +; store64 sp+8, x33 // flags = notrap aligned +; store64 sp+0, x34 // flags = notrap aligned +; x34 = xmov x32 +; block0: +; x0 = xconst8 0 +; call TestCase(%g), CallInfo { uses: [CallArgPair { vreg: p0i, preg: p0i }], defs: [CallRetPair { vreg: Writable { reg: p0i }, preg: p0i }], clobbers: PRegSet { bits: [1204185006387685820006398, 4294967295] }, callee_pop_size: 0 } +; x0 = xconst8 1 +; x33 = load64_u sp+8 // flags = notrap aligned +; x34 = load64_u sp+0 // flags = notrap aligned +; x35 = xconst8 16 +; x32 = xadd32 x32, x35 +; ret +; +; Disassembled: +; 0: 0e 23 f0 xconst8 spilltmp0, -16 +; 3: 12 20 20 23 xadd32 sp, sp, spilltmp0 +; 7: 2c 20 08 21 store64_offset8 sp, 8, lr +; b: 2a 20 22 store64 sp, fp +; e: 0b 22 20 xmov fp, sp +; 11: 0e 00 00 xconst8 x0, 0 +; 14: 01 00 00 00 00 call 0x0 // target = 0x14 +; 19: 0e 00 01 xconst8 x0, 1 +; 1c: 25 21 20 08 load64_offset8 lr, sp, 8 +; 20: 22 22 20 load64 fp, sp +; 23: 0e 23 10 xconst8 spilltmp0, 16 +; 26: 12 20 20 23 xadd32 sp, sp, spilltmp0 +; 2a: 00 ret + +function %colocated_args_i64_i32_i64_i32() { + fn0 = colocated %g(i64, i32, i64, i32) + +block0: + v0 = iconst.i64 0 + v1 = iconst.i32 1 + v2 = iconst.i64 2 + v3 = iconst.i32 3 + call fn0(v0, v1, v2, v3) + return +} + +; VCode: +; x35 = xconst8 -16 +; x32 = xadd32 x32, x35 +; store64 sp+8, x33 // flags = notrap aligned +; store64 sp+0, x34 // flags = notrap aligned +; x34 = xmov x32 +; block0: +; x0 = xconst8 0 +; x1 = xconst8 1 +; x2 = xconst8 2 +; x3 = xconst8 3 +; call TestCase(%g), CallInfo { uses: [CallArgPair { vreg: p0i, preg: p0i }, CallArgPair { vreg: p1i, preg: p1i }, CallArgPair { vreg: p2i, preg: p2i }, CallArgPair { vreg: p3i, preg: p3i }], defs: [], clobbers: PRegSet { bits: [1204185006387685820006399, 4294967295] }, callee_pop_size: 0 } +; x33 = load64_u sp+8 // flags = notrap aligned +; x34 = load64_u sp+0 // flags = notrap aligned +; x35 = xconst8 16 +; x32 = xadd32 x32, x35 +; ret +; +; Disassembled: +; 0: 0e 23 f0 xconst8 spilltmp0, -16 +; 3: 12 20 20 23 xadd32 sp, sp, spilltmp0 +; 7: 2c 20 08 21 store64_offset8 sp, 8, lr +; b: 2a 20 22 store64 sp, fp +; e: 0b 22 20 xmov fp, sp +; 11: 0e 00 00 xconst8 x0, 0 +; 14: 0e 01 01 xconst8 x1, 1 +; 17: 0e 02 02 xconst8 x2, 2 +; 1a: 0e 03 03 xconst8 x3, 3 +; 1d: 01 00 00 00 00 call 0x0 // target = 0x1d +; 22: 25 21 20 08 load64_offset8 lr, sp, 8 +; 26: 22 22 20 load64 fp, sp +; 29: 0e 23 10 xconst8 spilltmp0, 16 +; 2c: 12 20 20 23 xadd32 sp, sp, spilltmp0 +; 30: 00 ret + +function %colocated_rets_i64_i64_i64_i64() -> i64 { + fn0 = colocated %g() -> i64, i64, i64, i64 + +block0: + v0, v1, v2, v3 = call fn0() + v4 = iadd v0, v2 + v5 = iadd v1, v3 + v6 = iadd v4, v5 + return v6 +} + +; VCode: +; x35 = xconst8 -16 +; x32 = xadd32 x32, x35 +; store64 sp+8, x33 // flags = notrap aligned +; store64 sp+0, x34 // flags = notrap aligned +; x34 = xmov x32 +; block0: +; call TestCase(%g), CallInfo { uses: [], defs: [CallRetPair { vreg: Writable { reg: p0i }, preg: p0i }, CallRetPair { vreg: Writable { reg: p1i }, preg: p1i }, CallRetPair { vreg: Writable { reg: p2i }, preg: p2i }, CallRetPair { vreg: Writable { reg: p3i }, preg: p3i }], clobbers: PRegSet { bits: [1204185006387685820006384, 4294967295] }, callee_pop_size: 0 } +; x4 = xadd64 x0, x2 +; x3 = xadd64 x1, x3 +; x0 = xadd64 x4, x3 +; x33 = load64_u sp+8 // flags = notrap aligned +; x34 = load64_u sp+0 // flags = notrap aligned +; x35 = xconst8 16 +; x32 = xadd32 x32, x35 +; ret +; +; Disassembled: +; 0: 0e 23 f0 xconst8 spilltmp0, -16 +; 3: 12 20 20 23 xadd32 sp, sp, spilltmp0 +; 7: 2c 20 08 21 store64_offset8 sp, 8, lr +; b: 2a 20 22 store64 sp, fp +; e: 0b 22 20 xmov fp, sp +; 11: 01 00 00 00 00 call 0x0 // target = 0x11 +; 16: 13 04 00 02 xadd64 x4, x0, x2 +; 1a: 13 03 01 03 xadd64 x3, x1, x3 +; 1e: 13 00 04 03 xadd64 x0, x4, x3 +; 22: 25 21 20 08 load64_offset8 lr, sp, 8 +; 26: 22 22 20 load64 fp, sp +; 29: 0e 23 10 xconst8 spilltmp0, 16 +; 2c: 12 20 20 23 xadd32 sp, sp, spilltmp0 +; 30: 00 ret + +function %colocated_stack_args() { + fn0 = colocated %g(i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64) + +block0: + v0 = iconst.i64 0 + call fn0(v0, v0, v0, v0, v0, v0, v0, v0, v0, v0, v0, v0, v0, v0, v0, v0, v0, v0, v0, v0, v0, v0) + return +} + +; VCode: +; x35 = xconst8 -16 +; x32 = xadd32 x32, x35 +; store64 sp+8, x33 // flags = notrap aligned +; store64 sp+0, x34 // flags = notrap aligned +; x34 = xmov x32 +; x35 = xconst8 -48 +; x32 = xadd32 x32, x35 +; block0: +; x15 = xconst8 0 +; store64 OutgoingArg(0), x15 // flags = notrap aligned +; store64 OutgoingArg(8), x15 // flags = notrap aligned +; store64 OutgoingArg(16), x15 // flags = notrap aligned +; store64 OutgoingArg(24), x15 // flags = notrap aligned +; store64 OutgoingArg(32), x15 // flags = notrap aligned +; store64 OutgoingArg(40), x15 // flags = notrap aligned +; x0 = xmov x15 +; x1 = xmov x15 +; x2 = xmov x15 +; x3 = xmov x15 +; x4 = xmov x15 +; x5 = xmov x15 +; x6 = xmov x15 +; x7 = xmov x15 +; x8 = xmov x15 +; x9 = xmov x15 +; x10 = xmov x15 +; x11 = xmov x15 +; x12 = xmov x15 +; x13 = xmov x15 +; x14 = xmov x15 +; call TestCase(%g), CallInfo { uses: [CallArgPair { vreg: p0i, preg: p0i }, CallArgPair { vreg: p1i, preg: p1i }, CallArgPair { vreg: p2i, preg: p2i }, CallArgPair { vreg: p3i, preg: p3i }, CallArgPair { vreg: p4i, preg: p4i }, CallArgPair { vreg: p5i, preg: p5i }, CallArgPair { vreg: p6i, preg: p6i }, CallArgPair { vreg: p7i, preg: p7i }, CallArgPair { vreg: p8i, preg: p8i }, CallArgPair { vreg: p9i, preg: p9i }, CallArgPair { vreg: p10i, preg: p10i }, CallArgPair { vreg: p11i, preg: p11i }, CallArgPair { vreg: p12i, preg: p12i }, CallArgPair { vreg: p13i, preg: p13i }, CallArgPair { vreg: p14i, preg: p14i }, CallArgPair { vreg: p15i, preg: p15i }], defs: [], clobbers: PRegSet { bits: [1204185006387685820006399, 4294967295] }, callee_pop_size: 0 } +; x35 = xconst8 48 +; x32 = xadd32 x32, x35 +; x33 = load64_u sp+8 // flags = notrap aligned +; x34 = load64_u sp+0 // flags = notrap aligned +; x35 = xconst8 16 +; x32 = xadd32 x32, x35 +; ret +; +; Disassembled: +; 0: 0e 23 f0 xconst8 spilltmp0, -16 +; 3: 12 20 20 23 xadd32 sp, sp, spilltmp0 +; 7: 2c 20 08 21 store64_offset8 sp, 8, lr +; b: 2a 20 22 store64 sp, fp +; e: 0b 22 20 xmov fp, sp +; 11: 0e 23 d0 xconst8 spilltmp0, -48 +; 14: 12 20 20 23 xadd32 sp, sp, spilltmp0 +; 18: 0e 0f 00 xconst8 x15, 0 +; 1b: 2a 20 0f store64 sp, x15 +; 1e: 2c 20 08 0f store64_offset8 sp, 8, x15 +; 22: 2c 20 10 0f store64_offset8 sp, 16, x15 +; 26: 2c 20 18 0f store64_offset8 sp, 24, x15 +; 2a: 2c 20 20 0f store64_offset8 sp, 32, x15 +; 2e: 2c 20 28 0f store64_offset8 sp, 40, x15 +; 32: 0b 00 0f xmov x0, x15 +; 35: 0b 01 0f xmov x1, x15 +; 38: 0b 02 0f xmov x2, x15 +; 3b: 0b 03 0f xmov x3, x15 +; 3e: 0b 04 0f xmov x4, x15 +; 41: 0b 05 0f xmov x5, x15 +; 44: 0b 06 0f xmov x6, x15 +; 47: 0b 07 0f xmov x7, x15 +; 4a: 0b 08 0f xmov x8, x15 +; 4d: 0b 09 0f xmov x9, x15 +; 50: 0b 0a 0f xmov x10, x15 +; 53: 0b 0b 0f xmov x11, x15 +; 56: 0b 0c 0f xmov x12, x15 +; 59: 0b 0d 0f xmov x13, x15 +; 5c: 0b 0e 0f xmov x14, x15 +; 5f: 01 00 00 00 00 call 0x0 // target = 0x5f +; 64: 0e 23 30 xconst8 spilltmp0, 48 +; 67: 12 20 20 23 xadd32 sp, sp, spilltmp0 +; 6b: 25 21 20 08 load64_offset8 lr, sp, 8 +; 6f: 22 22 20 load64 fp, sp +; 72: 0e 23 10 xconst8 spilltmp0, 16 +; 75: 12 20 20 23 xadd32 sp, sp, spilltmp0 +; 79: 00 ret + +function %colocated_stack_rets() -> i64 { + fn0 = colocated %g() -> i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64 + +block0: + v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20 = call fn0() + + v22 = iadd v0, v1 + v23 = iadd v2, v3 + v24 = iadd v4, v5 + v25 = iadd v6, v7 + v26 = iadd v8, v9 + v27 = iadd v10, v11 + v28 = iadd v12, v13 + v29 = iadd v14, v15 + v30 = iadd v16, v17 + v31 = iadd v17, v18 + v32 = iadd v19, v20 + + v33 = iadd v22, v23 + v34 = iadd v24, v25 + v35 = iadd v26, v27 + v36 = iadd v28, v29 + v37 = iadd v30, v31 + v38 = iadd v32, v32 + + v39 = iadd v33, v34 + v40 = iadd v35, v36 + v41 = iadd v37, v38 + + v42 = iadd v39, v40 + v43 = iadd v41, v41 + + v44 = iadd v42, v43 + return v44 +} + +; VCode: +; x35 = xconst8 -16 +; x32 = xadd32 x32, x35 +; store64 sp+8, x33 // flags = notrap aligned +; store64 sp+0, x34 // flags = notrap aligned +; x34 = xmov x32 +; x35 = xconst8 -64 +; x32 = xadd32 x32, x35 +; store64 sp+56, x16 // flags = notrap aligned +; store64 sp+48, x18 // flags = notrap aligned +; block0: +; x0 = load_addr OutgoingArg(0) +; call TestCase(%g), CallInfo { uses: [CallArgPair { vreg: p0i, preg: p0i }], defs: [CallRetPair { vreg: Writable { reg: p0i }, preg: p0i }, CallRetPair { vreg: Writable { reg: p1i }, preg: p1i }, CallRetPair { vreg: Writable { reg: p2i }, preg: p2i }, CallRetPair { vreg: Writable { reg: p3i }, preg: p3i }, CallRetPair { vreg: Writable { reg: p4i }, preg: p4i }, CallRetPair { vreg: Writable { reg: p5i }, preg: p5i }, CallRetPair { vreg: Writable { reg: p6i }, preg: p6i }, CallRetPair { vreg: Writable { reg: p7i }, preg: p7i }, CallRetPair { vreg: Writable { reg: p8i }, preg: p8i }, CallRetPair { vreg: Writable { reg: p9i }, preg: p9i }, CallRetPair { vreg: Writable { reg: p10i }, preg: p10i }, CallRetPair { vreg: Writable { reg: p11i }, preg: p11i }, CallRetPair { vreg: Writable { reg: p12i }, preg: p12i }, CallRetPair { vreg: Writable { reg: p13i }, preg: p13i }, CallRetPair { vreg: Writable { reg: p14i }, preg: p14i }, CallRetPair { vreg: Writable { reg: p15i }, preg: p15i }], clobbers: PRegSet { bits: [1204185006387685819940864, 4294967295] }, callee_pop_size: 0 } +; x16 = xmov x13 +; x18 = xmov x11 +; x25 = load64_u OutgoingArg(0) // flags = notrap aligned +; x11 = load64_u OutgoingArg(8) // flags = notrap aligned +; x13 = load64_u OutgoingArg(16) // flags = notrap aligned +; x31 = load64_u OutgoingArg(24) // flags = notrap aligned +; x17 = load64_u OutgoingArg(32) // flags = notrap aligned +; x30 = xadd64 x0, x1 +; x29 = xadd64 x2, x3 +; x5 = xadd64 x4, x5 +; x6 = xadd64 x6, x7 +; x7 = xadd64 x8, x9 +; x0 = xmov x18 +; x4 = xadd64 x10, x0 +; x10 = xmov x16 +; x8 = xadd64 x12, x10 +; x14 = xadd64 x14, x15 +; x15 = xadd64 x25, x11 +; x13 = xadd64 x11, x13 +; x0 = xadd64 x31, x17 +; x1 = xadd64 x30, x29 +; x2 = xadd64 x5, x6 +; x3 = xadd64 x7, x4 +; x14 = xadd64 x8, x14 +; x13 = xadd64 x15, x13 +; x15 = xadd64 x0, x0 +; x0 = xadd64 x1, x2 +; x14 = xadd64 x3, x14 +; x13 = xadd64 x13, x15 +; x14 = xadd64 x0, x14 +; x13 = xadd64 x13, x13 +; x0 = xadd64 x14, x13 +; x16 = load64_u sp+56 // flags = notrap aligned +; x18 = load64_u sp+48 // flags = notrap aligned +; x35 = xconst8 64 +; x32 = xadd32 x32, x35 +; x33 = load64_u sp+8 // flags = notrap aligned +; x34 = load64_u sp+0 // flags = notrap aligned +; x35 = xconst8 16 +; x32 = xadd32 x32, x35 +; ret +; +; Disassembled: +; 0: 0e 23 f0 xconst8 spilltmp0, -16 +; 3: 12 20 20 23 xadd32 sp, sp, spilltmp0 +; 7: 2c 20 08 21 store64_offset8 sp, 8, lr +; b: 2a 20 22 store64 sp, fp +; e: 0b 22 20 xmov fp, sp +; 11: 0e 23 c0 xconst8 spilltmp0, -64 +; 14: 12 20 20 23 xadd32 sp, sp, spilltmp0 +; 18: 2c 20 38 10 store64_offset8 sp, 56, x16 +; 1c: 2c 20 30 12 store64_offset8 sp, 48, x18 +; 20: 0b 00 20 xmov x0, sp +; 23: 01 00 00 00 00 call 0x0 // target = 0x23 +; 28: 0b 10 0d xmov x16, x13 +; 2b: 0b 12 0b xmov x18, x11 +; 2e: 22 19 20 load64 x25, sp +; 31: 25 0b 20 08 load64_offset8 x11, sp, 8 +; 35: 25 0d 20 10 load64_offset8 x13, sp, 16 +; 39: 25 1f 20 18 load64_offset8 x31, sp, 24 +; 3d: 25 11 20 20 load64_offset8 x17, sp, 32 +; 41: 13 1e 00 01 xadd64 x30, x0, x1 +; 45: 13 1d 02 03 xadd64 x29, x2, x3 +; 49: 13 05 04 05 xadd64 x5, x4, x5 +; 4d: 13 06 06 07 xadd64 x6, x6, x7 +; 51: 13 07 08 09 xadd64 x7, x8, x9 +; 55: 0b 00 12 xmov x0, x18 +; 58: 13 04 0a 00 xadd64 x4, x10, x0 +; 5c: 0b 0a 10 xmov x10, x16 +; 5f: 13 08 0c 0a xadd64 x8, x12, x10 +; 63: 13 0e 0e 0f xadd64 x14, x14, x15 +; 67: 13 0f 19 0b xadd64 x15, x25, x11 +; 6b: 13 0d 0b 0d xadd64 x13, x11, x13 +; 6f: 13 00 1f 11 xadd64 x0, x31, x17 +; 73: 13 01 1e 1d xadd64 x1, x30, x29 +; 77: 13 02 05 06 xadd64 x2, x5, x6 +; 7b: 13 03 07 04 xadd64 x3, x7, x4 +; 7f: 13 0e 08 0e xadd64 x14, x8, x14 +; 83: 13 0d 0f 0d xadd64 x13, x15, x13 +; 87: 13 0f 00 00 xadd64 x15, x0, x0 +; 8b: 13 00 01 02 xadd64 x0, x1, x2 +; 8f: 13 0e 03 0e xadd64 x14, x3, x14 +; 93: 13 0d 0d 0f xadd64 x13, x13, x15 +; 97: 13 0e 00 0e xadd64 x14, x0, x14 +; 9b: 13 0d 0d 0d xadd64 x13, x13, x13 +; 9f: 13 00 0e 0d xadd64 x0, x14, x13 +; a3: 25 10 20 38 load64_offset8 x16, sp, 56 +; a7: 25 12 20 30 load64_offset8 x18, sp, 48 +; ab: 0e 23 40 xconst8 spilltmp0, 64 +; ae: 12 20 20 23 xadd32 sp, sp, spilltmp0 +; b2: 25 21 20 08 load64_offset8 lr, sp, 8 +; b6: 22 22 20 load64 fp, sp +; b9: 0e 23 10 xconst8 spilltmp0, 16 +; bc: 12 20 20 23 xadd32 sp, sp, spilltmp0 +; c0: 00 ret + diff --git a/cranelift/filetests/filetests/isa/pulley32/get_stack_pointer.clif b/cranelift/filetests/filetests/isa/pulley32/get_stack_pointer.clif new file mode 100644 index 0000000000..f2e7899af6 --- /dev/null +++ b/cranelift/filetests/filetests/isa/pulley32/get_stack_pointer.clif @@ -0,0 +1,18 @@ +test compile precise-output +target pulley32 + +function %get_stack_pointer() -> i32 { +block0: + v0 = get_stack_pointer.i32 + return v0 +} + +; VCode: +; block0: +; x0 = get_sp +; ret +; +; Disassembled: +; 0: 33 02 00 00 get_sp x0 +; 4: 00 ret + diff --git a/cranelift/filetests/filetests/isa/pulley32/iadd.clif b/cranelift/filetests/filetests/isa/pulley32/iadd.clif new file mode 100644 index 0000000000..2398ea0013 --- /dev/null +++ b/cranelift/filetests/filetests/isa/pulley32/iadd.clif @@ -0,0 +1,63 @@ +test compile precise-output +target pulley32 + +function %i8(i8, i8) -> i8 { +block0(v0: i8, v1: i8): + v2 = iadd v0, v1 + return v2 +} + +; VCode: +; block0: +; x0 = xadd32 x0, x1 +; ret +; +; Disassembled: +; 0: 12 00 00 01 xadd32 x0, x0, x1 +; 4: 00 ret + +function %i16(i16, i16) -> i16 { +block0(v0: i16, v1: i16): + v2 = iadd v0, v1 + return v2 +} + +; VCode: +; block0: +; x0 = xadd32 x0, x1 +; ret +; +; Disassembled: +; 0: 12 00 00 01 xadd32 x0, x0, x1 +; 4: 00 ret + +function %i32(i32, i32) -> i32 { +block0(v0: i32, v1: i32): + v2 = iadd v0, v1 + return v2 +} + +; VCode: +; block0: +; x0 = xadd32 x0, x1 +; ret +; +; Disassembled: +; 0: 12 00 00 01 xadd32 x0, x0, x1 +; 4: 00 ret + +function %i64(i64, i64) -> i64 { +block0(v0: i64, v1: i64): + v2 = iadd v0, v1 + return v2 +} + +; VCode: +; block0: +; x0 = xadd64 x0, x1 +; ret +; +; Disassembled: +; 0: 13 00 00 01 xadd64 x0, x0, x1 +; 4: 00 ret + diff --git a/cranelift/filetests/filetests/isa/pulley32/icmp.clif b/cranelift/filetests/filetests/isa/pulley32/icmp.clif new file mode 100644 index 0000000000..5000bcc22b --- /dev/null +++ b/cranelift/filetests/filetests/isa/pulley32/icmp.clif @@ -0,0 +1,603 @@ +test compile precise-output +target pulley32 + +function %i8_eq(i8, i8) -> i8 { +block0(v0: i8, v1: i8): + v2 = icmp eq v0, v1 + return v2 +} + +; VCode: +; block0: +; x0 = xeq32 x0, x1 +; ret +; +; Disassembled: +; 0: 1a 00 00 01 xeq32 x0, x0, x1 +; 4: 00 ret + +function %i16_eq(i16, i16) -> i8 { +block0(v0: i16, v1: i16): + v2 = icmp eq v0, v1 + return v2 +} + +; VCode: +; block0: +; x0 = xeq32 x0, x1 +; ret +; +; Disassembled: +; 0: 1a 00 00 01 xeq32 x0, x0, x1 +; 4: 00 ret + +function %i32_eq(i32, i32) -> i8 { +block0(v0: i32, v1: i32): + v2 = icmp eq v0, v1 + return v2 +} + +; VCode: +; block0: +; x0 = xeq32 x0, x1 +; ret +; +; Disassembled: +; 0: 1a 00 00 01 xeq32 x0, x0, x1 +; 4: 00 ret + +function %i64_eq(i64, i64) -> i8 { +block0(v0: i64, v1: i64): + v2 = icmp eq v0, v1 + return v2 +} + +; VCode: +; block0: +; x0 = xeq64 x0, x1 +; ret +; +; Disassembled: +; 0: 14 00 00 01 xeq64 x0, x0, x1 +; 4: 00 ret + +function %i8_ne(i8, i8) -> i8 { +block0(v0: i8, v1: i8): + v2 = icmp ne v0, v1 + return v2 +} + +; VCode: +; block0: +; x0 = xneq32 x0, x1 +; ret +; +; Disassembled: +; 0: 1b 00 00 01 xneq32 x0, x0, x1 +; 4: 00 ret + +function %i16_ne(i16, i16) -> i8 { +block0(v0: i16, v1: i16): + v2 = icmp ne v0, v1 + return v2 +} + +; VCode: +; block0: +; x0 = xneq32 x0, x1 +; ret +; +; Disassembled: +; 0: 1b 00 00 01 xneq32 x0, x0, x1 +; 4: 00 ret + +function %i32_ne(i32, i32) -> i8 { +block0(v0: i32, v1: i32): + v2 = icmp ne v0, v1 + return v2 +} + +; VCode: +; block0: +; x0 = xneq32 x0, x1 +; ret +; +; Disassembled: +; 0: 1b 00 00 01 xneq32 x0, x0, x1 +; 4: 00 ret + +function %i64_ne(i64, i64) -> i8 { +block0(v0: i64, v1: i64): + v2 = icmp ne v0, v1 + return v2 +} + +; VCode: +; block0: +; x0 = xneq64 x0, x1 +; ret +; +; Disassembled: +; 0: 15 00 00 01 xneq64 x0, x0, x1 +; 4: 00 ret + +function %i8_ult(i8, i8) -> i8 { +block0(v0: i8, v1: i8): + v2 = icmp ult v0, v1 + return v2 +} + +; VCode: +; block0: +; x0 = xult32 x0, x1 +; ret +; +; Disassembled: +; 0: 1e 00 00 01 xult32 x0, x0, x1 +; 4: 00 ret + +function %i16_ult(i16, i16) -> i8 { +block0(v0: i16, v1: i16): + v2 = icmp ult v0, v1 + return v2 +} + +; VCode: +; block0: +; x0 = xult32 x0, x1 +; ret +; +; Disassembled: +; 0: 1e 00 00 01 xult32 x0, x0, x1 +; 4: 00 ret + +function %i32_ult(i32, i32) -> i8 { +block0(v0: i32, v1: i32): + v2 = icmp ult v0, v1 + return v2 +} + +; VCode: +; block0: +; x0 = xult32 x0, x1 +; ret +; +; Disassembled: +; 0: 1e 00 00 01 xult32 x0, x0, x1 +; 4: 00 ret + +function %i64_ult(i64, i64) -> i8 { +block0(v0: i64, v1: i64): + v2 = icmp ult v0, v1 + return v2 +} + +; VCode: +; block0: +; x0 = xult64 x0, x1 +; ret +; +; Disassembled: +; 0: 18 00 00 01 xult64 x0, x0, x1 +; 4: 00 ret + +function %i8_ule(i8, i8) -> i8 { +block0(v0: i8, v1: i8): + v2 = icmp ule v0, v1 + return v2 +} + +; VCode: +; block0: +; x0 = xulteq32 x0, x1 +; ret +; +; Disassembled: +; 0: 1f 00 00 01 xulteq32 x0, x0, x1 +; 4: 00 ret + +function %i16_ule(i16, i16) -> i8 { +block0(v0: i16, v1: i16): + v2 = icmp ule v0, v1 + return v2 +} + +; VCode: +; block0: +; x0 = xulteq32 x0, x1 +; ret +; +; Disassembled: +; 0: 1f 00 00 01 xulteq32 x0, x0, x1 +; 4: 00 ret + +function %i32_ule(i32, i32) -> i8 { +block0(v0: i32, v1: i32): + v2 = icmp ule v0, v1 + return v2 +} + +; VCode: +; block0: +; x0 = xulteq32 x0, x1 +; ret +; +; Disassembled: +; 0: 1f 00 00 01 xulteq32 x0, x0, x1 +; 4: 00 ret + +function %i64_ule(i64, i64) -> i8 { +block0(v0: i64, v1: i64): + v2 = icmp ule v0, v1 + return v2 +} + +; VCode: +; block0: +; x0 = xulteq64 x0, x1 +; ret +; +; Disassembled: +; 0: 19 00 00 01 xulteq64 x0, x0, x1 +; 4: 00 ret + +function %i8_slt(i8, i8) -> i8 { +block0(v0: i8, v1: i8): + v2 = icmp slt v0, v1 + return v2 +} + +; VCode: +; block0: +; x0 = xslt32 x0, x1 +; ret +; +; Disassembled: +; 0: 1c 00 00 01 xslt32 x0, x0, x1 +; 4: 00 ret + +function %i16_slt(i16, i16) -> i8 { +block0(v0: i16, v1: i16): + v2 = icmp slt v0, v1 + return v2 +} + +; VCode: +; block0: +; x0 = xslt32 x0, x1 +; ret +; +; Disassembled: +; 0: 1c 00 00 01 xslt32 x0, x0, x1 +; 4: 00 ret + +function %i32_slt(i32, i32) -> i8 { +block0(v0: i32, v1: i32): + v2 = icmp slt v0, v1 + return v2 +} + +; VCode: +; block0: +; x0 = xslt32 x0, x1 +; ret +; +; Disassembled: +; 0: 1c 00 00 01 xslt32 x0, x0, x1 +; 4: 00 ret + +function %i64_slt(i64, i64) -> i8 { +block0(v0: i64, v1: i64): + v2 = icmp slt v0, v1 + return v2 +} + +; VCode: +; block0: +; x0 = xslt64 x0, x1 +; ret +; +; Disassembled: +; 0: 16 00 00 01 xslt64 x0, x0, x1 +; 4: 00 ret + +function %i8_sle(i8, i8) -> i8 { +block0(v0: i8, v1: i8): + v2 = icmp sle v0, v1 + return v2 +} + +; VCode: +; block0: +; x0 = xslteq32 x0, x1 +; ret +; +; Disassembled: +; 0: 1d 00 00 01 xslteq32 x0, x0, x1 +; 4: 00 ret + +function %i16_sle(i16, i16) -> i8 { +block0(v0: i16, v1: i16): + v2 = icmp sle v0, v1 + return v2 +} + +; VCode: +; block0: +; x0 = xslteq32 x0, x1 +; ret +; +; Disassembled: +; 0: 1d 00 00 01 xslteq32 x0, x0, x1 +; 4: 00 ret + +function %i32_sle(i32, i32) -> i8 { +block0(v0: i32, v1: i32): + v2 = icmp sle v0, v1 + return v2 +} + +; VCode: +; block0: +; x0 = xslteq32 x0, x1 +; ret +; +; Disassembled: +; 0: 1d 00 00 01 xslteq32 x0, x0, x1 +; 4: 00 ret + +function %i64_sle(i64, i64) -> i8 { +block0(v0: i64, v1: i64): + v2 = icmp sle v0, v1 + return v2 +} + +; VCode: +; block0: +; x0 = xslteq64 x0, x1 +; ret +; +; Disassembled: +; 0: 17 00 00 01 xslteq64 x0, x0, x1 +; 4: 00 ret + +function %i8_ugt(i8, i8) -> i8 { +block0(v0: i8, v1: i8): + v2 = icmp ugt v0, v1 + return v2 +} + +; VCode: +; block0: +; x0 = xult32 x1, x0 +; ret +; +; Disassembled: +; 0: 1e 00 01 00 xult32 x0, x1, x0 +; 4: 00 ret + +function %i16_ugt(i16, i16) -> i8 { +block0(v0: i16, v1: i16): + v2 = icmp ugt v0, v1 + return v2 +} + +; VCode: +; block0: +; x0 = xult32 x1, x0 +; ret +; +; Disassembled: +; 0: 1e 00 01 00 xult32 x0, x1, x0 +; 4: 00 ret + +function %i32_ugt(i32, i32) -> i8 { +block0(v0: i32, v1: i32): + v2 = icmp ugt v0, v1 + return v2 +} + +; VCode: +; block0: +; x0 = xult32 x1, x0 +; ret +; +; Disassembled: +; 0: 1e 00 01 00 xult32 x0, x1, x0 +; 4: 00 ret + +function %i64_ugt(i64, i64) -> i8 { +block0(v0: i64, v1: i64): + v2 = icmp ugt v0, v1 + return v2 +} + +; VCode: +; block0: +; x0 = xult64 x1, x0 +; ret +; +; Disassembled: +; 0: 18 00 01 00 xult64 x0, x1, x0 +; 4: 00 ret + +function %i8_sgt(i8, i8) -> i8 { +block0(v0: i8, v1: i8): + v2 = icmp sgt v0, v1 + return v2 +} + +; VCode: +; block0: +; x0 = xslt32 x1, x0 +; ret +; +; Disassembled: +; 0: 1c 00 01 00 xslt32 x0, x1, x0 +; 4: 00 ret + +function %i16_sgt(i16, i16) -> i8 { +block0(v0: i16, v1: i16): + v2 = icmp sgt v0, v1 + return v2 +} + +; VCode: +; block0: +; x0 = xslt32 x1, x0 +; ret +; +; Disassembled: +; 0: 1c 00 01 00 xslt32 x0, x1, x0 +; 4: 00 ret + +function %i32_sgt(i32, i32) -> i8 { +block0(v0: i32, v1: i32): + v2 = icmp sgt v0, v1 + return v2 +} + +; VCode: +; block0: +; x0 = xslt32 x1, x0 +; ret +; +; Disassembled: +; 0: 1c 00 01 00 xslt32 x0, x1, x0 +; 4: 00 ret + +function %i64_sgt(i64, i64) -> i8 { +block0(v0: i64, v1: i64): + v2 = icmp sgt v0, v1 + return v2 +} + +; VCode: +; block0: +; x0 = xslt64 x1, x0 +; ret +; +; Disassembled: +; 0: 16 00 01 00 xslt64 x0, x1, x0 +; 4: 00 ret + +function %i8_uge(i8, i8) -> i8 { +block0(v0: i8, v1: i8): + v2 = icmp uge v0, v1 + return v2 +} + +; VCode: +; block0: +; x0 = xulteq32 x1, x0 +; ret +; +; Disassembled: +; 0: 1f 00 01 00 xulteq32 x0, x1, x0 +; 4: 00 ret + +function %i16_uge(i16, i16) -> i8 { +block0(v0: i16, v1: i16): + v2 = icmp uge v0, v1 + return v2 +} + +; VCode: +; block0: +; x0 = xulteq32 x1, x0 +; ret +; +; Disassembled: +; 0: 1f 00 01 00 xulteq32 x0, x1, x0 +; 4: 00 ret + +function %i32_uge(i32, i32) -> i8 { +block0(v0: i32, v1: i32): + v2 = icmp uge v0, v1 + return v2 +} + +; VCode: +; block0: +; x0 = xulteq32 x1, x0 +; ret +; +; Disassembled: +; 0: 1f 00 01 00 xulteq32 x0, x1, x0 +; 4: 00 ret + +function %i64_uge(i64, i64) -> i8 { +block0(v0: i64, v1: i64): + v2 = icmp uge v0, v1 + return v2 +} + +; VCode: +; block0: +; x0 = xulteq64 x1, x0 +; ret +; +; Disassembled: +; 0: 19 00 01 00 xulteq64 x0, x1, x0 +; 4: 00 ret + +function %i8_sge(i8, i8) -> i8 { +block0(v0: i8, v1: i8): + v2 = icmp sge v0, v1 + return v2 +} + +; VCode: +; block0: +; x0 = xslteq32 x1, x0 +; ret +; +; Disassembled: +; 0: 1d 00 01 00 xslteq32 x0, x1, x0 +; 4: 00 ret + +function %i16_sge(i16, i16) -> i8 { +block0(v0: i16, v1: i16): + v2 = icmp sge v0, v1 + return v2 +} + +; VCode: +; block0: +; x0 = xslteq32 x1, x0 +; ret +; +; Disassembled: +; 0: 1d 00 01 00 xslteq32 x0, x1, x0 +; 4: 00 ret + +function %i32_sge(i32, i32) -> i8 { +block0(v0: i32, v1: i32): + v2 = icmp sge v0, v1 + return v2 +} + +; VCode: +; block0: +; x0 = xslteq32 x1, x0 +; ret +; +; Disassembled: +; 0: 1d 00 01 00 xslteq32 x0, x1, x0 +; 4: 00 ret + +function %i64_sge(i64, i64) -> i8 { +block0(v0: i64, v1: i64): + v2 = icmp sge v0, v1 + return v2 +} + +; VCode: +; block0: +; x0 = xslteq64 x1, x0 +; ret +; +; Disassembled: +; 0: 17 00 01 00 xslteq64 x0, x1, x0 +; 4: 00 ret + diff --git a/cranelift/filetests/filetests/isa/pulley32/iconst.clif b/cranelift/filetests/filetests/isa/pulley32/iconst.clif new file mode 100644 index 0000000000..f38de07e3d --- /dev/null +++ b/cranelift/filetests/filetests/isa/pulley32/iconst.clif @@ -0,0 +1,63 @@ +test compile precise-output +target pulley32 + +function %i8() -> i8 { +block0: + v0 = iconst.i8 0xff + return v0 +} + +; VCode: +; block0: +; x0 = xconst16 255 +; ret +; +; Disassembled: +; 0: 0f 00 ff 00 xconst16 x0, 255 +; 4: 00 ret + +function %i16() -> i16 { +block0: + v0 = iconst.i16 0xffff + return v0 +} + +; VCode: +; block0: +; x0 = xconst32 65535 +; ret +; +; Disassembled: +; 0: 10 00 ff ff 00 00 xconst32 x0, 65535 +; 6: 00 ret + +function %i32() -> i32 { +block0: + v0 = iconst.i32 0xffff_ffff + return v0 +} + +; VCode: +; block0: +; x0 = xconst32 -1 +; ret +; +; Disassembled: +; 0: 10 00 ff ff ff ff xconst32 x0, -1 +; 6: 00 ret + +function %i64() -> i64 { +block0: + v0 = iconst.i64 0xffff_ffff_ffff_ffff + return v0 +} + +; VCode: +; block0: +; x0 = xconst64 -1 +; ret +; +; Disassembled: +; 0: 11 00 ff ff ff ff ff ff ff ff xconst64 x0, -1 +; a: 00 ret + diff --git a/cranelift/filetests/filetests/isa/pulley32/jump.clif b/cranelift/filetests/filetests/isa/pulley32/jump.clif new file mode 100644 index 0000000000..c216be08ed --- /dev/null +++ b/cranelift/filetests/filetests/isa/pulley32/jump.clif @@ -0,0 +1,38 @@ +test compile precise-output +target pulley32 + +function %jump(i8) -> i8 { +block0(v0: i8): + brif v0, block1, block2 + +block1: + v1 = iconst.i8 1 + jump block3(v1) + +block2: + v2 = iconst.i8 0 + jump block3(v2) + +block3(v3: i8): + return v3 +} + +; VCode: +; block0: +; br_if x0, label2; jump label1 +; block1: +; x0 = xconst8 0 +; jump label3 +; block2: +; x0 = xconst8 1 +; jump label3 +; block3: +; ret +; +; Disassembled: +; 0: 03 00 0e 00 00 00 br_if x0, 0xe // target = 0xe +; 6: 0e 00 00 xconst8 x0, 0 +; 9: 02 08 00 00 00 jump 0x8 // target = 0x11 +; e: 0e 00 01 xconst8 x0, 1 +; 11: 00 ret + diff --git a/cranelift/filetests/filetests/isa/pulley32/load.clif b/cranelift/filetests/filetests/isa/pulley32/load.clif new file mode 100644 index 0000000000..427d1a4a0d --- /dev/null +++ b/cranelift/filetests/filetests/isa/pulley32/load.clif @@ -0,0 +1,63 @@ +test compile precise-output +target pulley32 + +function %load_i32(i32) -> i32 { +block0(v0: i32): + v1 = load.i32 v0 + return v1 +} + +; VCode: +; block0: +; x0 = load32_u x0+0 // flags = +; ret +; +; Disassembled: +; 0: 20 00 00 load32_u x0, x0 +; 3: 00 ret + +function %load_i64(i32) -> i64 { +block0(v0: i32): + v1 = load.i64 v0 + return v1 +} + +; VCode: +; block0: +; x0 = load64_u x0+0 // flags = +; ret +; +; Disassembled: +; 0: 22 00 00 load64 x0, x0 +; 3: 00 ret + +function %load_i32_with_offset(i32) -> i32 { +block0(v0: i32): + v1 = load.i32 v0+4 + return v1 +} + +; VCode: +; block0: +; x0 = load32_u x0+4 // flags = +; ret +; +; Disassembled: +; 0: 23 00 00 04 load32_u_offset8 x0, x0, 4 +; 4: 00 ret + +function %load_i64_with_offset(i32) -> i64 { +block0(v0: i32): + v1 = load.i64 v0+8 + return v1 +} + +; VCode: +; block0: +; x0 = load64_u x0+8 // flags = +; ret +; +; Disassembled: +; 0: 25 00 00 08 load64_offset8 x0, x0, 8 +; 4: 00 ret + diff --git a/cranelift/filetests/filetests/isa/pulley32/store.clif b/cranelift/filetests/filetests/isa/pulley32/store.clif new file mode 100644 index 0000000000..57ec3de6f1 --- /dev/null +++ b/cranelift/filetests/filetests/isa/pulley32/store.clif @@ -0,0 +1,63 @@ +test compile precise-output +target pulley32 + +function %store_i32(i32, i32) { +block0(v0: i32, v1: i32): + store v0, v1 + return +} + +; VCode: +; block0: +; store32 x1+0, x0 // flags = +; ret +; +; Disassembled: +; 0: 29 01 00 store32 x1, x0 +; 3: 00 ret + +function %store_i64(i64, i32) { +block0(v0: i64, v1: i32): + store v0, v1 + return +} + +; VCode: +; block0: +; store64 x1+0, x0 // flags = +; ret +; +; Disassembled: +; 0: 2a 01 00 store64 x1, x0 +; 3: 00 ret + +function %store_i32_with_offset(i32, i32) { +block0(v0: i32, v1: i32): + store v0, v1+4 + return +} + +; VCode: +; block0: +; store32 x1+4, x0 // flags = +; ret +; +; Disassembled: +; 0: 2b 01 04 00 store32_offset8 x1, 4, x0 +; 4: 00 ret + +function %store_i64_with_offset(i64, i32) { +block0(v0: i64, v1: i32): + store v0, v1+8 + return +} + +; VCode: +; block0: +; store64 x1+8, x0 // flags = +; ret +; +; Disassembled: +; 0: 2c 01 08 00 store64_offset8 x1, 8, x0 +; 4: 00 ret + diff --git a/cranelift/filetests/filetests/isa/pulley32/trap.clif b/cranelift/filetests/filetests/isa/pulley32/trap.clif new file mode 100644 index 0000000000..12281fa134 --- /dev/null +++ b/cranelift/filetests/filetests/isa/pulley32/trap.clif @@ -0,0 +1,65 @@ +test compile precise-output +target pulley32 + +function %trap() { +block0: + trap user0 +} + +; VCode: +; block0: +; trap // code = User(0) +; +; Disassembled: +; 0: 33 00 00 trap + +function %trapnz(i64) { +block0(v0: i64): + v1 = iconst.i64 42 + v2 = icmp eq v0, v1 + trapnz v2, user0 + return +} + +; VCode: +; block0: +; x3 = xconst8 42 +; x3 = xeq64 x0, x3 +; br_if x3, label2; jump label1 +; block1: +; ret +; block2: +; trap // code = User(0) +; +; Disassembled: +; 0: 0e 03 2a xconst8 x3, 42 +; 3: 14 03 00 03 xeq64 x3, x0, x3 +; 7: 03 03 07 00 00 00 br_if x3, 0x7 // target = 0xe +; d: 00 ret +; e: 33 00 00 trap + +function %trapz(i64) { +block0(v0: i64): + v1 = iconst.i64 42 + v2 = icmp eq v0, v1 + trapz v2, user0 + return +} + +; VCode: +; block0: +; x3 = xconst8 42 +; x3 = xeq64 x0, x3 +; br_if x3, label2; jump label1 +; block2: +; ret +; block1: +; trap // code = User(0) +; +; Disassembled: +; 0: 0e 03 2a xconst8 x3, 42 +; 3: 14 03 00 03 xeq64 x3, x0, x3 +; 7: 04 03 07 00 00 00 br_if_not x3, 0x7 // target = 0xe +; d: 00 ret +; e: 33 00 00 trap + diff --git a/cranelift/filetests/filetests/isa/pulley64/brif-icmp.clif b/cranelift/filetests/filetests/isa/pulley64/brif-icmp.clif new file mode 100644 index 0000000000..ba51bf4aa4 --- /dev/null +++ b/cranelift/filetests/filetests/isa/pulley64/brif-icmp.clif @@ -0,0 +1,345 @@ +test compile precise-output +target pulley64 + +function %brif_icmp_eq(i32, i32) -> i32 { +block0(v0: i32, v1: i32): + v2 = icmp eq v0, v1 + brif v2, block2, block1 + +block1: + v3 = iconst.i32 1 + return v3 + +block2: + v4 = iconst.i32 2 + return v4 +} + +; VCode: +; block0: +; br_if_xeq32 x0, x1, label2; jump label1 +; block1: +; x0 = xconst8 1 +; ret +; block2: +; x0 = xconst8 2 +; ret +; +; Disassembled: +; 0: 05 00 01 0b 00 00 00 br_if_xeq32 x0, x1, 0xb // target = 0xb +; 7: 0e 00 01 xconst8 x0, 1 +; a: 00 ret +; b: 0e 00 02 xconst8 x0, 2 +; e: 00 ret + +function %brif_icmp_ne(i32, i32) -> i32 { +block0(v0: i32, v1: i32): + v2 = icmp ne v0, v1 + brif v2, block2, block1 + +block1: + v3 = iconst.i32 1 + return v3 + +block2: + v4 = iconst.i32 2 + return v4 +} + +; VCode: +; block0: +; br_if_xneq32 x0, x1, label2; jump label1 +; block1: +; x0 = xconst8 1 +; ret +; block2: +; x0 = xconst8 2 +; ret +; +; Disassembled: +; 0: 06 00 01 0b 00 00 00 br_if_xneq32 x0, x1, 0xb // target = 0xb +; 7: 0e 00 01 xconst8 x0, 1 +; a: 00 ret +; b: 0e 00 02 xconst8 x0, 2 +; e: 00 ret + +function %brif_icmp_ult(i32, i32) -> i32 { +block0(v0: i32, v1: i32): + v2 = icmp ult v0, v1 + brif v2, block2, block1 + +block1: + v3 = iconst.i32 1 + return v3 + +block2: + v4 = iconst.i32 2 + return v4 +} + +; VCode: +; block0: +; br_if_xult32 x0, x1, label2; jump label1 +; block1: +; x0 = xconst8 1 +; ret +; block2: +; x0 = xconst8 2 +; ret +; +; Disassembled: +; 0: 09 00 01 0b 00 00 00 br_if_xult32 x0, x1, 0xb // target = 0xb +; 7: 0e 00 01 xconst8 x0, 1 +; a: 00 ret +; b: 0e 00 02 xconst8 x0, 2 +; e: 00 ret + +function %brif_icmp_ule(i32, i32) -> i32 { +block0(v0: i32, v1: i32): + v2 = icmp ule v0, v1 + brif v2, block2, block1 + +block1: + v3 = iconst.i32 1 + return v3 + +block2: + v4 = iconst.i32 2 + return v4 +} + +; VCode: +; block0: +; br_if_xulteq32 x0, x1, label2; jump label1 +; block1: +; x0 = xconst8 1 +; ret +; block2: +; x0 = xconst8 2 +; ret +; +; Disassembled: +; 0: 0a 00 01 0b 00 00 00 br_if_xulteq32 x0, x1, 0xb // target = 0xb +; 7: 0e 00 01 xconst8 x0, 1 +; a: 00 ret +; b: 0e 00 02 xconst8 x0, 2 +; e: 00 ret + +function %brif_icmp_slt(i32, i32) -> i32 { +block0(v0: i32, v1: i32): + v2 = icmp slt v0, v1 + brif v2, block2, block1 + +block1: + v3 = iconst.i32 1 + return v3 + +block2: + v4 = iconst.i32 2 + return v4 +} + +; VCode: +; block0: +; br_if_xslt32 x0, x1, label2; jump label1 +; block1: +; x0 = xconst8 1 +; ret +; block2: +; x0 = xconst8 2 +; ret +; +; Disassembled: +; 0: 07 00 01 0b 00 00 00 br_if_xslt32 x0, x1, 0xb // target = 0xb +; 7: 0e 00 01 xconst8 x0, 1 +; a: 00 ret +; b: 0e 00 02 xconst8 x0, 2 +; e: 00 ret + +function %brif_icmp_sle(i32, i32) -> i32 { +block0(v0: i32, v1: i32): + v2 = icmp sle v0, v1 + brif v2, block2, block1 + +block1: + v3 = iconst.i32 1 + return v3 + +block2: + v4 = iconst.i32 2 + return v4 +} + +; VCode: +; block0: +; br_if_xslteq32 x0, x1, label2; jump label1 +; block1: +; x0 = xconst8 1 +; ret +; block2: +; x0 = xconst8 2 +; ret +; +; Disassembled: +; 0: 08 00 01 0b 00 00 00 br_if_xslteq32 x0, x1, 0xb // target = 0xb +; 7: 0e 00 01 xconst8 x0, 1 +; a: 00 ret +; b: 0e 00 02 xconst8 x0, 2 +; e: 00 ret + +function %brif_icmp_ugt(i32, i32) -> i32 { +block0(v0: i32, v1: i32): + v2 = icmp ugt v0, v1 + brif v2, block2, block1 + +block1: + v3 = iconst.i32 1 + return v3 + +block2: + v4 = iconst.i32 2 + return v4 +} + +; VCode: +; block0: +; br_if_xult32 x1, x0, label2; jump label1 +; block1: +; x0 = xconst8 1 +; ret +; block2: +; x0 = xconst8 2 +; ret +; +; Disassembled: +; 0: 09 01 00 0b 00 00 00 br_if_xult32 x1, x0, 0xb // target = 0xb +; 7: 0e 00 01 xconst8 x0, 1 +; a: 00 ret +; b: 0e 00 02 xconst8 x0, 2 +; e: 00 ret + +function %brif_icmp_uge(i32, i32) -> i32 { +block0(v0: i32, v1: i32): + v2 = icmp uge v0, v1 + brif v2, block2, block1 + +block1: + v3 = iconst.i32 1 + return v3 + +block2: + v4 = iconst.i32 2 + return v4 +} + +; VCode: +; block0: +; br_if_xulteq32 x1, x0, label2; jump label1 +; block1: +; x0 = xconst8 1 +; ret +; block2: +; x0 = xconst8 2 +; ret +; +; Disassembled: +; 0: 0a 01 00 0b 00 00 00 br_if_xulteq32 x1, x0, 0xb // target = 0xb +; 7: 0e 00 01 xconst8 x0, 1 +; a: 00 ret +; b: 0e 00 02 xconst8 x0, 2 +; e: 00 ret + +function %brif_icmp_sgt(i32, i32) -> i32 { +block0(v0: i32, v1: i32): + v2 = icmp sgt v0, v1 + brif v2, block2, block1 + +block1: + v3 = iconst.i32 1 + return v3 + +block2: + v4 = iconst.i32 2 + return v4 +} + +; VCode: +; block0: +; br_if_xslt32 x1, x0, label2; jump label1 +; block1: +; x0 = xconst8 1 +; ret +; block2: +; x0 = xconst8 2 +; ret +; +; Disassembled: +; 0: 07 01 00 0b 00 00 00 br_if_xslt32 x1, x0, 0xb // target = 0xb +; 7: 0e 00 01 xconst8 x0, 1 +; a: 00 ret +; b: 0e 00 02 xconst8 x0, 2 +; e: 00 ret + +function %brif_icmp_sge(i32, i32) -> i32 { +block0(v0: i32, v1: i32): + v2 = icmp sge v0, v1 + brif v2, block2, block1 + +block1: + v3 = iconst.i32 1 + return v3 + +block2: + v4 = iconst.i32 2 + return v4 +} + +; VCode: +; block0: +; br_if_xslteq32 x1, x0, label2; jump label1 +; block1: +; x0 = xconst8 1 +; ret +; block2: +; x0 = xconst8 2 +; ret +; +; Disassembled: +; 0: 08 01 00 0b 00 00 00 br_if_xslteq32 x1, x0, 0xb // target = 0xb +; 7: 0e 00 01 xconst8 x0, 1 +; a: 00 ret +; b: 0e 00 02 xconst8 x0, 2 +; e: 00 ret + +function %brif_uextend_icmp_eq(i32, i32) -> i32 { +block0(v0: i32, v1: i32): + v2 = icmp eq v0, v1 + v3 = uextend.i32 v2 + brif v3, block2, block1 + +block1: + v4 = iconst.i32 1 + return v4 + +block2: + v5 = iconst.i32 2 + return v5 +} + +; VCode: +; block0: +; br_if_xeq32 x0, x1, label2; jump label1 +; block1: +; x0 = xconst8 1 +; ret +; block2: +; x0 = xconst8 2 +; ret +; +; Disassembled: +; 0: 05 00 01 0b 00 00 00 br_if_xeq32 x0, x1, 0xb // target = 0xb +; 7: 0e 00 01 xconst8 x0, 1 +; a: 00 ret +; b: 0e 00 02 xconst8 x0, 2 +; e: 00 ret + diff --git a/cranelift/filetests/filetests/isa/pulley64/brif.clif b/cranelift/filetests/filetests/isa/pulley64/brif.clif new file mode 100644 index 0000000000..2a33dd7bb8 --- /dev/null +++ b/cranelift/filetests/filetests/isa/pulley64/brif.clif @@ -0,0 +1,253 @@ +test compile precise-output +target pulley64 + +function %brif_i8(i8) -> i8 { +block0(v0: i8): + brif v0, block1, block2 + +block1: + v1 = iconst.i8 1 + return v1 + +block2: + v2 = iconst.i8 0 + return v2 +} + +; VCode: +; block0: +; br_if x0, label2; jump label1 +; block1: +; x0 = xconst8 0 +; ret +; block2: +; x0 = xconst8 1 +; ret +; +; Disassembled: +; 0: 03 00 0a 00 00 00 br_if x0, 0xa // target = 0xa +; 6: 0e 00 00 xconst8 x0, 0 +; 9: 00 ret +; a: 0e 00 01 xconst8 x0, 1 +; d: 00 ret + +function %brif_i16(i16) -> i8 { +block0(v0: i16): + brif v0, block1, block2 + +block1: + v1 = iconst.i8 1 + return v1 + +block2: + v2 = iconst.i8 0 + return v2 +} + +; VCode: +; block0: +; br_if x0, label2; jump label1 +; block1: +; x0 = xconst8 0 +; ret +; block2: +; x0 = xconst8 1 +; ret +; +; Disassembled: +; 0: 03 00 0a 00 00 00 br_if x0, 0xa // target = 0xa +; 6: 0e 00 00 xconst8 x0, 0 +; 9: 00 ret +; a: 0e 00 01 xconst8 x0, 1 +; d: 00 ret + +function %brif_i32(i32) -> i8 { +block0(v0: i32): + brif v0, block1, block2 + +block1: + v1 = iconst.i8 1 + return v1 + +block2: + v2 = iconst.i8 0 + return v2 +} + +; VCode: +; block0: +; br_if x0, label2; jump label1 +; block1: +; x0 = xconst8 0 +; ret +; block2: +; x0 = xconst8 1 +; ret +; +; Disassembled: +; 0: 03 00 0a 00 00 00 br_if x0, 0xa // target = 0xa +; 6: 0e 00 00 xconst8 x0, 0 +; 9: 00 ret +; a: 0e 00 01 xconst8 x0, 1 +; d: 00 ret + +function %brif_i64(i64) -> i8 { +block0(v0: i64): + brif v0, block1, block2 + +block1: + v1 = iconst.i8 1 + return v1 + +block2: + v2 = iconst.i8 0 + return v2 +} + +; VCode: +; block0: +; br_if x0, label2; jump label1 +; block1: +; x0 = xconst8 0 +; ret +; block2: +; x0 = xconst8 1 +; ret +; +; Disassembled: +; 0: 03 00 0a 00 00 00 br_if x0, 0xa // target = 0xa +; 6: 0e 00 00 xconst8 x0, 0 +; 9: 00 ret +; a: 0e 00 01 xconst8 x0, 1 +; d: 00 ret + +function %brif_icmp_i8(i8, i8) -> i8 { +block0(v0: i8, v1: i8): + v2 = icmp eq v0, v1 + brif v2, block1, block2 + +block1: + v3 = iconst.i8 1 + return v3 + +block2: + v4 = iconst.i8 0 + return v4 +} + +; VCode: +; block0: +; x5 = xeq32 x0, x1 +; br_if x5, label2; jump label1 +; block1: +; x0 = xconst8 0 +; ret +; block2: +; x0 = xconst8 1 +; ret +; +; Disassembled: +; 0: 1a 05 00 01 xeq32 x5, x0, x1 +; 4: 03 05 0a 00 00 00 br_if x5, 0xa // target = 0xe +; a: 0e 00 00 xconst8 x0, 0 +; d: 00 ret +; e: 0e 00 01 xconst8 x0, 1 +; 11: 00 ret + +function %brif_icmp_i16(i16, i16) -> i8 { +block0(v0: i16, v1: i16): + v2 = icmp ne v0, v1 + brif v2, block1, block2 + +block1: + v3 = iconst.i8 1 + return v3 + +block2: + v4 = iconst.i8 0 + return v4 +} + +; VCode: +; block0: +; x5 = xneq32 x0, x1 +; br_if x5, label2; jump label1 +; block1: +; x0 = xconst8 0 +; ret +; block2: +; x0 = xconst8 1 +; ret +; +; Disassembled: +; 0: 1b 05 00 01 xneq32 x5, x0, x1 +; 4: 03 05 0a 00 00 00 br_if x5, 0xa // target = 0xe +; a: 0e 00 00 xconst8 x0, 0 +; d: 00 ret +; e: 0e 00 01 xconst8 x0, 1 +; 11: 00 ret + +function %brif_icmp_i32(i32, i32) -> i8 { +block0(v0: i32, v1: i32): + v2 = icmp slt v0, v1 + brif v2, block1, block2 + +block1: + v3 = iconst.i8 1 + return v3 + +block2: + v4 = iconst.i8 0 + return v4 +} + +; VCode: +; block0: +; br_if_xslt32 x0, x1, label2; jump label1 +; block1: +; x0 = xconst8 0 +; ret +; block2: +; x0 = xconst8 1 +; ret +; +; Disassembled: +; 0: 07 00 01 0b 00 00 00 br_if_xslt32 x0, x1, 0xb // target = 0xb +; 7: 0e 00 00 xconst8 x0, 0 +; a: 00 ret +; b: 0e 00 01 xconst8 x0, 1 +; e: 00 ret + +function %brif_icmp_i64(i64, i64) -> i8 { +block0(v0: i64, v1: i64): + v2 = icmp uge v0, v1 + brif v2, block1, block2 + +block1: + v3 = iconst.i8 1 + return v3 + +block2: + v4 = iconst.i8 0 + return v4 +} + +; VCode: +; block0: +; x5 = xulteq64 x1, x0 +; br_if x5, label2; jump label1 +; block1: +; x0 = xconst8 0 +; ret +; block2: +; x0 = xconst8 1 +; ret +; +; Disassembled: +; 0: 19 05 01 00 xulteq64 x5, x1, x0 +; 4: 03 05 0a 00 00 00 br_if x5, 0xa // target = 0xe +; a: 0e 00 00 xconst8 x0, 0 +; d: 00 ret +; e: 0e 00 01 xconst8 x0, 1 +; 11: 00 ret + diff --git a/cranelift/filetests/filetests/isa/pulley64/call.clif b/cranelift/filetests/filetests/isa/pulley64/call.clif new file mode 100644 index 0000000000..5b633b7b64 --- /dev/null +++ b/cranelift/filetests/filetests/isa/pulley64/call.clif @@ -0,0 +1,409 @@ +test compile precise-output +target pulley64 + +function %colocated_args_i64_rets_i64() -> i64 { + fn0 = colocated %g(i64) -> i64 + +block0: + v0 = iconst.i64 0 + v1 = call fn0(v0) + v2 = iconst.i64 1 + return v2 +} + +; VCode: +; x35 = xconst8 -16 +; x32 = xadd32 x32, x35 +; store64 sp+8, x33 // flags = notrap aligned +; store64 sp+0, x34 // flags = notrap aligned +; x34 = xmov x32 +; block0: +; x0 = xconst8 0 +; call TestCase(%g), CallInfo { uses: [CallArgPair { vreg: p0i, preg: p0i }], defs: [CallRetPair { vreg: Writable { reg: p0i }, preg: p0i }], clobbers: PRegSet { bits: [1204185006387685820006398, 4294967295] }, callee_pop_size: 0 } +; x0 = xconst8 1 +; x33 = load64_u sp+8 // flags = notrap aligned +; x34 = load64_u sp+0 // flags = notrap aligned +; x35 = xconst8 16 +; x32 = xadd32 x32, x35 +; ret +; +; Disassembled: +; 0: 0e 23 f0 xconst8 spilltmp0, -16 +; 3: 12 20 20 23 xadd32 sp, sp, spilltmp0 +; 7: 2c 20 08 21 store64_offset8 sp, 8, lr +; b: 2a 20 22 store64 sp, fp +; e: 0b 22 20 xmov fp, sp +; 11: 0e 00 00 xconst8 x0, 0 +; 14: 01 00 00 00 00 call 0x0 // target = 0x14 +; 19: 0e 00 01 xconst8 x0, 1 +; 1c: 25 21 20 08 load64_offset8 lr, sp, 8 +; 20: 22 22 20 load64 fp, sp +; 23: 0e 23 10 xconst8 spilltmp0, 16 +; 26: 12 20 20 23 xadd32 sp, sp, spilltmp0 +; 2a: 00 ret + +function %colocated_args_i32_rets_i32() -> i32 { + fn0 = colocated %g(i32) -> i32 + +block0: + v0 = iconst.i32 0 + v1 = call fn0(v0) + v2 = iconst.i32 1 + return v2 +} + +; VCode: +; x35 = xconst8 -16 +; x32 = xadd32 x32, x35 +; store64 sp+8, x33 // flags = notrap aligned +; store64 sp+0, x34 // flags = notrap aligned +; x34 = xmov x32 +; block0: +; x0 = xconst8 0 +; call TestCase(%g), CallInfo { uses: [CallArgPair { vreg: p0i, preg: p0i }], defs: [CallRetPair { vreg: Writable { reg: p0i }, preg: p0i }], clobbers: PRegSet { bits: [1204185006387685820006398, 4294967295] }, callee_pop_size: 0 } +; x0 = xconst8 1 +; x33 = load64_u sp+8 // flags = notrap aligned +; x34 = load64_u sp+0 // flags = notrap aligned +; x35 = xconst8 16 +; x32 = xadd32 x32, x35 +; ret +; +; Disassembled: +; 0: 0e 23 f0 xconst8 spilltmp0, -16 +; 3: 12 20 20 23 xadd32 sp, sp, spilltmp0 +; 7: 2c 20 08 21 store64_offset8 sp, 8, lr +; b: 2a 20 22 store64 sp, fp +; e: 0b 22 20 xmov fp, sp +; 11: 0e 00 00 xconst8 x0, 0 +; 14: 01 00 00 00 00 call 0x0 // target = 0x14 +; 19: 0e 00 01 xconst8 x0, 1 +; 1c: 25 21 20 08 load64_offset8 lr, sp, 8 +; 20: 22 22 20 load64 fp, sp +; 23: 0e 23 10 xconst8 spilltmp0, 16 +; 26: 12 20 20 23 xadd32 sp, sp, spilltmp0 +; 2a: 00 ret + +function %colocated_args_i64_i32_i64_i32() { + fn0 = colocated %g(i64, i32, i64, i32) + +block0: + v0 = iconst.i64 0 + v1 = iconst.i32 1 + v2 = iconst.i64 2 + v3 = iconst.i32 3 + call fn0(v0, v1, v2, v3) + return +} + +; VCode: +; x35 = xconst8 -16 +; x32 = xadd32 x32, x35 +; store64 sp+8, x33 // flags = notrap aligned +; store64 sp+0, x34 // flags = notrap aligned +; x34 = xmov x32 +; block0: +; x0 = xconst8 0 +; x1 = xconst8 1 +; x2 = xconst8 2 +; x3 = xconst8 3 +; call TestCase(%g), CallInfo { uses: [CallArgPair { vreg: p0i, preg: p0i }, CallArgPair { vreg: p1i, preg: p1i }, CallArgPair { vreg: p2i, preg: p2i }, CallArgPair { vreg: p3i, preg: p3i }], defs: [], clobbers: PRegSet { bits: [1204185006387685820006399, 4294967295] }, callee_pop_size: 0 } +; x33 = load64_u sp+8 // flags = notrap aligned +; x34 = load64_u sp+0 // flags = notrap aligned +; x35 = xconst8 16 +; x32 = xadd32 x32, x35 +; ret +; +; Disassembled: +; 0: 0e 23 f0 xconst8 spilltmp0, -16 +; 3: 12 20 20 23 xadd32 sp, sp, spilltmp0 +; 7: 2c 20 08 21 store64_offset8 sp, 8, lr +; b: 2a 20 22 store64 sp, fp +; e: 0b 22 20 xmov fp, sp +; 11: 0e 00 00 xconst8 x0, 0 +; 14: 0e 01 01 xconst8 x1, 1 +; 17: 0e 02 02 xconst8 x2, 2 +; 1a: 0e 03 03 xconst8 x3, 3 +; 1d: 01 00 00 00 00 call 0x0 // target = 0x1d +; 22: 25 21 20 08 load64_offset8 lr, sp, 8 +; 26: 22 22 20 load64 fp, sp +; 29: 0e 23 10 xconst8 spilltmp0, 16 +; 2c: 12 20 20 23 xadd32 sp, sp, spilltmp0 +; 30: 00 ret + +function %colocated_rets_i64_i64_i64_i64() -> i64 { + fn0 = colocated %g() -> i64, i64, i64, i64 + +block0: + v0, v1, v2, v3 = call fn0() + v4 = iadd v0, v2 + v5 = iadd v1, v3 + v6 = iadd v4, v5 + return v6 +} + +; VCode: +; x35 = xconst8 -16 +; x32 = xadd32 x32, x35 +; store64 sp+8, x33 // flags = notrap aligned +; store64 sp+0, x34 // flags = notrap aligned +; x34 = xmov x32 +; block0: +; call TestCase(%g), CallInfo { uses: [], defs: [CallRetPair { vreg: Writable { reg: p0i }, preg: p0i }, CallRetPair { vreg: Writable { reg: p1i }, preg: p1i }, CallRetPair { vreg: Writable { reg: p2i }, preg: p2i }, CallRetPair { vreg: Writable { reg: p3i }, preg: p3i }], clobbers: PRegSet { bits: [1204185006387685820006384, 4294967295] }, callee_pop_size: 0 } +; x4 = xadd64 x0, x2 +; x3 = xadd64 x1, x3 +; x0 = xadd64 x4, x3 +; x33 = load64_u sp+8 // flags = notrap aligned +; x34 = load64_u sp+0 // flags = notrap aligned +; x35 = xconst8 16 +; x32 = xadd32 x32, x35 +; ret +; +; Disassembled: +; 0: 0e 23 f0 xconst8 spilltmp0, -16 +; 3: 12 20 20 23 xadd32 sp, sp, spilltmp0 +; 7: 2c 20 08 21 store64_offset8 sp, 8, lr +; b: 2a 20 22 store64 sp, fp +; e: 0b 22 20 xmov fp, sp +; 11: 01 00 00 00 00 call 0x0 // target = 0x11 +; 16: 13 04 00 02 xadd64 x4, x0, x2 +; 1a: 13 03 01 03 xadd64 x3, x1, x3 +; 1e: 13 00 04 03 xadd64 x0, x4, x3 +; 22: 25 21 20 08 load64_offset8 lr, sp, 8 +; 26: 22 22 20 load64 fp, sp +; 29: 0e 23 10 xconst8 spilltmp0, 16 +; 2c: 12 20 20 23 xadd32 sp, sp, spilltmp0 +; 30: 00 ret + +function %colocated_stack_args() { + fn0 = colocated %g(i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64) + +block0: + v0 = iconst.i64 0 + call fn0(v0, v0, v0, v0, v0, v0, v0, v0, v0, v0, v0, v0, v0, v0, v0, v0, v0, v0, v0, v0, v0, v0) + return +} + +; VCode: +; x35 = xconst8 -16 +; x32 = xadd32 x32, x35 +; store64 sp+8, x33 // flags = notrap aligned +; store64 sp+0, x34 // flags = notrap aligned +; x34 = xmov x32 +; x35 = xconst8 -48 +; x32 = xadd32 x32, x35 +; block0: +; x15 = xconst8 0 +; store64 OutgoingArg(0), x15 // flags = notrap aligned +; store64 OutgoingArg(8), x15 // flags = notrap aligned +; store64 OutgoingArg(16), x15 // flags = notrap aligned +; store64 OutgoingArg(24), x15 // flags = notrap aligned +; store64 OutgoingArg(32), x15 // flags = notrap aligned +; store64 OutgoingArg(40), x15 // flags = notrap aligned +; x0 = xmov x15 +; x1 = xmov x15 +; x2 = xmov x15 +; x3 = xmov x15 +; x4 = xmov x15 +; x5 = xmov x15 +; x6 = xmov x15 +; x7 = xmov x15 +; x8 = xmov x15 +; x9 = xmov x15 +; x10 = xmov x15 +; x11 = xmov x15 +; x12 = xmov x15 +; x13 = xmov x15 +; x14 = xmov x15 +; call TestCase(%g), CallInfo { uses: [CallArgPair { vreg: p0i, preg: p0i }, CallArgPair { vreg: p1i, preg: p1i }, CallArgPair { vreg: p2i, preg: p2i }, CallArgPair { vreg: p3i, preg: p3i }, CallArgPair { vreg: p4i, preg: p4i }, CallArgPair { vreg: p5i, preg: p5i }, CallArgPair { vreg: p6i, preg: p6i }, CallArgPair { vreg: p7i, preg: p7i }, CallArgPair { vreg: p8i, preg: p8i }, CallArgPair { vreg: p9i, preg: p9i }, CallArgPair { vreg: p10i, preg: p10i }, CallArgPair { vreg: p11i, preg: p11i }, CallArgPair { vreg: p12i, preg: p12i }, CallArgPair { vreg: p13i, preg: p13i }, CallArgPair { vreg: p14i, preg: p14i }, CallArgPair { vreg: p15i, preg: p15i }], defs: [], clobbers: PRegSet { bits: [1204185006387685820006399, 4294967295] }, callee_pop_size: 0 } +; x35 = xconst8 48 +; x32 = xadd32 x32, x35 +; x33 = load64_u sp+8 // flags = notrap aligned +; x34 = load64_u sp+0 // flags = notrap aligned +; x35 = xconst8 16 +; x32 = xadd32 x32, x35 +; ret +; +; Disassembled: +; 0: 0e 23 f0 xconst8 spilltmp0, -16 +; 3: 12 20 20 23 xadd32 sp, sp, spilltmp0 +; 7: 2c 20 08 21 store64_offset8 sp, 8, lr +; b: 2a 20 22 store64 sp, fp +; e: 0b 22 20 xmov fp, sp +; 11: 0e 23 d0 xconst8 spilltmp0, -48 +; 14: 12 20 20 23 xadd32 sp, sp, spilltmp0 +; 18: 0e 0f 00 xconst8 x15, 0 +; 1b: 2a 20 0f store64 sp, x15 +; 1e: 2c 20 08 0f store64_offset8 sp, 8, x15 +; 22: 2c 20 10 0f store64_offset8 sp, 16, x15 +; 26: 2c 20 18 0f store64_offset8 sp, 24, x15 +; 2a: 2c 20 20 0f store64_offset8 sp, 32, x15 +; 2e: 2c 20 28 0f store64_offset8 sp, 40, x15 +; 32: 0b 00 0f xmov x0, x15 +; 35: 0b 01 0f xmov x1, x15 +; 38: 0b 02 0f xmov x2, x15 +; 3b: 0b 03 0f xmov x3, x15 +; 3e: 0b 04 0f xmov x4, x15 +; 41: 0b 05 0f xmov x5, x15 +; 44: 0b 06 0f xmov x6, x15 +; 47: 0b 07 0f xmov x7, x15 +; 4a: 0b 08 0f xmov x8, x15 +; 4d: 0b 09 0f xmov x9, x15 +; 50: 0b 0a 0f xmov x10, x15 +; 53: 0b 0b 0f xmov x11, x15 +; 56: 0b 0c 0f xmov x12, x15 +; 59: 0b 0d 0f xmov x13, x15 +; 5c: 0b 0e 0f xmov x14, x15 +; 5f: 01 00 00 00 00 call 0x0 // target = 0x5f +; 64: 0e 23 30 xconst8 spilltmp0, 48 +; 67: 12 20 20 23 xadd32 sp, sp, spilltmp0 +; 6b: 25 21 20 08 load64_offset8 lr, sp, 8 +; 6f: 22 22 20 load64 fp, sp +; 72: 0e 23 10 xconst8 spilltmp0, 16 +; 75: 12 20 20 23 xadd32 sp, sp, spilltmp0 +; 79: 00 ret + +function %colocated_stack_rets() -> i64 { + fn0 = colocated %g() -> i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64 + +block0: + v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20 = call fn0() + + v22 = iadd v0, v1 + v23 = iadd v2, v3 + v24 = iadd v4, v5 + v25 = iadd v6, v7 + v26 = iadd v8, v9 + v27 = iadd v10, v11 + v28 = iadd v12, v13 + v29 = iadd v14, v15 + v30 = iadd v16, v17 + v31 = iadd v17, v18 + v32 = iadd v19, v20 + + v33 = iadd v22, v23 + v34 = iadd v24, v25 + v35 = iadd v26, v27 + v36 = iadd v28, v29 + v37 = iadd v30, v31 + v38 = iadd v32, v32 + + v39 = iadd v33, v34 + v40 = iadd v35, v36 + v41 = iadd v37, v38 + + v42 = iadd v39, v40 + v43 = iadd v41, v41 + + v44 = iadd v42, v43 + return v44 +} + +; VCode: +; x35 = xconst8 -16 +; x32 = xadd32 x32, x35 +; store64 sp+8, x33 // flags = notrap aligned +; store64 sp+0, x34 // flags = notrap aligned +; x34 = xmov x32 +; x35 = xconst8 -64 +; x32 = xadd32 x32, x35 +; store64 sp+56, x16 // flags = notrap aligned +; store64 sp+48, x18 // flags = notrap aligned +; block0: +; x0 = load_addr OutgoingArg(0) +; call TestCase(%g), CallInfo { uses: [CallArgPair { vreg: p0i, preg: p0i }], defs: [CallRetPair { vreg: Writable { reg: p0i }, preg: p0i }, CallRetPair { vreg: Writable { reg: p1i }, preg: p1i }, CallRetPair { vreg: Writable { reg: p2i }, preg: p2i }, CallRetPair { vreg: Writable { reg: p3i }, preg: p3i }, CallRetPair { vreg: Writable { reg: p4i }, preg: p4i }, CallRetPair { vreg: Writable { reg: p5i }, preg: p5i }, CallRetPair { vreg: Writable { reg: p6i }, preg: p6i }, CallRetPair { vreg: Writable { reg: p7i }, preg: p7i }, CallRetPair { vreg: Writable { reg: p8i }, preg: p8i }, CallRetPair { vreg: Writable { reg: p9i }, preg: p9i }, CallRetPair { vreg: Writable { reg: p10i }, preg: p10i }, CallRetPair { vreg: Writable { reg: p11i }, preg: p11i }, CallRetPair { vreg: Writable { reg: p12i }, preg: p12i }, CallRetPair { vreg: Writable { reg: p13i }, preg: p13i }, CallRetPair { vreg: Writable { reg: p14i }, preg: p14i }, CallRetPair { vreg: Writable { reg: p15i }, preg: p15i }], clobbers: PRegSet { bits: [1204185006387685819940864, 4294967295] }, callee_pop_size: 0 } +; x16 = xmov x13 +; x18 = xmov x11 +; x25 = load64_u OutgoingArg(0) // flags = notrap aligned +; x11 = load64_u OutgoingArg(8) // flags = notrap aligned +; x13 = load64_u OutgoingArg(16) // flags = notrap aligned +; x31 = load64_u OutgoingArg(24) // flags = notrap aligned +; x17 = load64_u OutgoingArg(32) // flags = notrap aligned +; x30 = xadd64 x0, x1 +; x29 = xadd64 x2, x3 +; x5 = xadd64 x4, x5 +; x6 = xadd64 x6, x7 +; x7 = xadd64 x8, x9 +; x0 = xmov x18 +; x4 = xadd64 x10, x0 +; x10 = xmov x16 +; x8 = xadd64 x12, x10 +; x14 = xadd64 x14, x15 +; x15 = xadd64 x25, x11 +; x13 = xadd64 x11, x13 +; x0 = xadd64 x31, x17 +; x1 = xadd64 x30, x29 +; x2 = xadd64 x5, x6 +; x3 = xadd64 x7, x4 +; x14 = xadd64 x8, x14 +; x13 = xadd64 x15, x13 +; x15 = xadd64 x0, x0 +; x0 = xadd64 x1, x2 +; x14 = xadd64 x3, x14 +; x13 = xadd64 x13, x15 +; x14 = xadd64 x0, x14 +; x13 = xadd64 x13, x13 +; x0 = xadd64 x14, x13 +; x16 = load64_u sp+56 // flags = notrap aligned +; x18 = load64_u sp+48 // flags = notrap aligned +; x35 = xconst8 64 +; x32 = xadd32 x32, x35 +; x33 = load64_u sp+8 // flags = notrap aligned +; x34 = load64_u sp+0 // flags = notrap aligned +; x35 = xconst8 16 +; x32 = xadd32 x32, x35 +; ret +; +; Disassembled: +; 0: 0e 23 f0 xconst8 spilltmp0, -16 +; 3: 12 20 20 23 xadd32 sp, sp, spilltmp0 +; 7: 2c 20 08 21 store64_offset8 sp, 8, lr +; b: 2a 20 22 store64 sp, fp +; e: 0b 22 20 xmov fp, sp +; 11: 0e 23 c0 xconst8 spilltmp0, -64 +; 14: 12 20 20 23 xadd32 sp, sp, spilltmp0 +; 18: 2c 20 38 10 store64_offset8 sp, 56, x16 +; 1c: 2c 20 30 12 store64_offset8 sp, 48, x18 +; 20: 0b 00 20 xmov x0, sp +; 23: 01 00 00 00 00 call 0x0 // target = 0x23 +; 28: 0b 10 0d xmov x16, x13 +; 2b: 0b 12 0b xmov x18, x11 +; 2e: 22 19 20 load64 x25, sp +; 31: 25 0b 20 08 load64_offset8 x11, sp, 8 +; 35: 25 0d 20 10 load64_offset8 x13, sp, 16 +; 39: 25 1f 20 18 load64_offset8 x31, sp, 24 +; 3d: 25 11 20 20 load64_offset8 x17, sp, 32 +; 41: 13 1e 00 01 xadd64 x30, x0, x1 +; 45: 13 1d 02 03 xadd64 x29, x2, x3 +; 49: 13 05 04 05 xadd64 x5, x4, x5 +; 4d: 13 06 06 07 xadd64 x6, x6, x7 +; 51: 13 07 08 09 xadd64 x7, x8, x9 +; 55: 0b 00 12 xmov x0, x18 +; 58: 13 04 0a 00 xadd64 x4, x10, x0 +; 5c: 0b 0a 10 xmov x10, x16 +; 5f: 13 08 0c 0a xadd64 x8, x12, x10 +; 63: 13 0e 0e 0f xadd64 x14, x14, x15 +; 67: 13 0f 19 0b xadd64 x15, x25, x11 +; 6b: 13 0d 0b 0d xadd64 x13, x11, x13 +; 6f: 13 00 1f 11 xadd64 x0, x31, x17 +; 73: 13 01 1e 1d xadd64 x1, x30, x29 +; 77: 13 02 05 06 xadd64 x2, x5, x6 +; 7b: 13 03 07 04 xadd64 x3, x7, x4 +; 7f: 13 0e 08 0e xadd64 x14, x8, x14 +; 83: 13 0d 0f 0d xadd64 x13, x15, x13 +; 87: 13 0f 00 00 xadd64 x15, x0, x0 +; 8b: 13 00 01 02 xadd64 x0, x1, x2 +; 8f: 13 0e 03 0e xadd64 x14, x3, x14 +; 93: 13 0d 0d 0f xadd64 x13, x13, x15 +; 97: 13 0e 00 0e xadd64 x14, x0, x14 +; 9b: 13 0d 0d 0d xadd64 x13, x13, x13 +; 9f: 13 00 0e 0d xadd64 x0, x14, x13 +; a3: 25 10 20 38 load64_offset8 x16, sp, 56 +; a7: 25 12 20 30 load64_offset8 x18, sp, 48 +; ab: 0e 23 40 xconst8 spilltmp0, 64 +; ae: 12 20 20 23 xadd32 sp, sp, spilltmp0 +; b2: 25 21 20 08 load64_offset8 lr, sp, 8 +; b6: 22 22 20 load64 fp, sp +; b9: 0e 23 10 xconst8 spilltmp0, 16 +; bc: 12 20 20 23 xadd32 sp, sp, spilltmp0 +; c0: 00 ret + diff --git a/cranelift/filetests/filetests/isa/pulley64/get_stack_pointer.clif b/cranelift/filetests/filetests/isa/pulley64/get_stack_pointer.clif new file mode 100644 index 0000000000..b95a382e49 --- /dev/null +++ b/cranelift/filetests/filetests/isa/pulley64/get_stack_pointer.clif @@ -0,0 +1,17 @@ +test compile precise-output +target pulley64 + +function %get_stack_pointer() -> i64 { +block0: + v0 = get_stack_pointer.i64 + return v0 +} + +; VCode: +; block0: +; x0 = get_sp +; ret +; +; Disassembled: +; 0: 33 02 00 00 get_sp x0 +; 4: 00 ret diff --git a/cranelift/filetests/filetests/isa/pulley64/iadd.clif b/cranelift/filetests/filetests/isa/pulley64/iadd.clif new file mode 100644 index 0000000000..5a7fa7fe58 --- /dev/null +++ b/cranelift/filetests/filetests/isa/pulley64/iadd.clif @@ -0,0 +1,63 @@ +test compile precise-output +target pulley64 + +function %i8(i8, i8) -> i8 { +block0(v0: i8, v1: i8): + v2 = iadd v0, v1 + return v2 +} + +; VCode: +; block0: +; x0 = xadd32 x0, x1 +; ret +; +; Disassembled: +; 0: 12 00 00 01 xadd32 x0, x0, x1 +; 4: 00 ret + +function %i16(i16, i16) -> i16 { +block0(v0: i16, v1: i16): + v2 = iadd v0, v1 + return v2 +} + +; VCode: +; block0: +; x0 = xadd32 x0, x1 +; ret +; +; Disassembled: +; 0: 12 00 00 01 xadd32 x0, x0, x1 +; 4: 00 ret + +function %i32(i32, i32) -> i32 { +block0(v0: i32, v1: i32): + v2 = iadd v0, v1 + return v2 +} + +; VCode: +; block0: +; x0 = xadd32 x0, x1 +; ret +; +; Disassembled: +; 0: 12 00 00 01 xadd32 x0, x0, x1 +; 4: 00 ret + +function %i64(i64, i64) -> i64 { +block0(v0: i64, v1: i64): + v2 = iadd v0, v1 + return v2 +} + +; VCode: +; block0: +; x0 = xadd64 x0, x1 +; ret +; +; Disassembled: +; 0: 13 00 00 01 xadd64 x0, x0, x1 +; 4: 00 ret + diff --git a/cranelift/filetests/filetests/isa/pulley64/icmp.clif b/cranelift/filetests/filetests/isa/pulley64/icmp.clif new file mode 100644 index 0000000000..84372023c7 --- /dev/null +++ b/cranelift/filetests/filetests/isa/pulley64/icmp.clif @@ -0,0 +1,603 @@ +test compile precise-output +target pulley64 + +function %i8_eq(i8, i8) -> i8 { +block0(v0: i8, v1: i8): + v2 = icmp eq v0, v1 + return v2 +} + +; VCode: +; block0: +; x0 = xeq32 x0, x1 +; ret +; +; Disassembled: +; 0: 1a 00 00 01 xeq32 x0, x0, x1 +; 4: 00 ret + +function %i16_eq(i16, i16) -> i8 { +block0(v0: i16, v1: i16): + v2 = icmp eq v0, v1 + return v2 +} + +; VCode: +; block0: +; x0 = xeq32 x0, x1 +; ret +; +; Disassembled: +; 0: 1a 00 00 01 xeq32 x0, x0, x1 +; 4: 00 ret + +function %i32_eq(i32, i32) -> i8 { +block0(v0: i32, v1: i32): + v2 = icmp eq v0, v1 + return v2 +} + +; VCode: +; block0: +; x0 = xeq32 x0, x1 +; ret +; +; Disassembled: +; 0: 1a 00 00 01 xeq32 x0, x0, x1 +; 4: 00 ret + +function %i64_eq(i64, i64) -> i8 { +block0(v0: i64, v1: i64): + v2 = icmp eq v0, v1 + return v2 +} + +; VCode: +; block0: +; x0 = xeq64 x0, x1 +; ret +; +; Disassembled: +; 0: 14 00 00 01 xeq64 x0, x0, x1 +; 4: 00 ret + +function %i8_ne(i8, i8) -> i8 { +block0(v0: i8, v1: i8): + v2 = icmp ne v0, v1 + return v2 +} + +; VCode: +; block0: +; x0 = xneq32 x0, x1 +; ret +; +; Disassembled: +; 0: 1b 00 00 01 xneq32 x0, x0, x1 +; 4: 00 ret + +function %i16_ne(i16, i16) -> i8 { +block0(v0: i16, v1: i16): + v2 = icmp ne v0, v1 + return v2 +} + +; VCode: +; block0: +; x0 = xneq32 x0, x1 +; ret +; +; Disassembled: +; 0: 1b 00 00 01 xneq32 x0, x0, x1 +; 4: 00 ret + +function %i32_ne(i32, i32) -> i8 { +block0(v0: i32, v1: i32): + v2 = icmp ne v0, v1 + return v2 +} + +; VCode: +; block0: +; x0 = xneq32 x0, x1 +; ret +; +; Disassembled: +; 0: 1b 00 00 01 xneq32 x0, x0, x1 +; 4: 00 ret + +function %i64_ne(i64, i64) -> i8 { +block0(v0: i64, v1: i64): + v2 = icmp ne v0, v1 + return v2 +} + +; VCode: +; block0: +; x0 = xneq64 x0, x1 +; ret +; +; Disassembled: +; 0: 15 00 00 01 xneq64 x0, x0, x1 +; 4: 00 ret + +function %i8_ult(i8, i8) -> i8 { +block0(v0: i8, v1: i8): + v2 = icmp ult v0, v1 + return v2 +} + +; VCode: +; block0: +; x0 = xult32 x0, x1 +; ret +; +; Disassembled: +; 0: 1e 00 00 01 xult32 x0, x0, x1 +; 4: 00 ret + +function %i16_ult(i16, i16) -> i8 { +block0(v0: i16, v1: i16): + v2 = icmp ult v0, v1 + return v2 +} + +; VCode: +; block0: +; x0 = xult32 x0, x1 +; ret +; +; Disassembled: +; 0: 1e 00 00 01 xult32 x0, x0, x1 +; 4: 00 ret + +function %i32_ult(i32, i32) -> i8 { +block0(v0: i32, v1: i32): + v2 = icmp ult v0, v1 + return v2 +} + +; VCode: +; block0: +; x0 = xult32 x0, x1 +; ret +; +; Disassembled: +; 0: 1e 00 00 01 xult32 x0, x0, x1 +; 4: 00 ret + +function %i64_ult(i64, i64) -> i8 { +block0(v0: i64, v1: i64): + v2 = icmp ult v0, v1 + return v2 +} + +; VCode: +; block0: +; x0 = xult64 x0, x1 +; ret +; +; Disassembled: +; 0: 18 00 00 01 xult64 x0, x0, x1 +; 4: 00 ret + +function %i8_ule(i8, i8) -> i8 { +block0(v0: i8, v1: i8): + v2 = icmp ule v0, v1 + return v2 +} + +; VCode: +; block0: +; x0 = xulteq32 x0, x1 +; ret +; +; Disassembled: +; 0: 1f 00 00 01 xulteq32 x0, x0, x1 +; 4: 00 ret + +function %i16_ule(i16, i16) -> i8 { +block0(v0: i16, v1: i16): + v2 = icmp ule v0, v1 + return v2 +} + +; VCode: +; block0: +; x0 = xulteq32 x0, x1 +; ret +; +; Disassembled: +; 0: 1f 00 00 01 xulteq32 x0, x0, x1 +; 4: 00 ret + +function %i32_ule(i32, i32) -> i8 { +block0(v0: i32, v1: i32): + v2 = icmp ule v0, v1 + return v2 +} + +; VCode: +; block0: +; x0 = xulteq32 x0, x1 +; ret +; +; Disassembled: +; 0: 1f 00 00 01 xulteq32 x0, x0, x1 +; 4: 00 ret + +function %i64_ule(i64, i64) -> i8 { +block0(v0: i64, v1: i64): + v2 = icmp ule v0, v1 + return v2 +} + +; VCode: +; block0: +; x0 = xulteq64 x0, x1 +; ret +; +; Disassembled: +; 0: 19 00 00 01 xulteq64 x0, x0, x1 +; 4: 00 ret + +function %i8_slt(i8, i8) -> i8 { +block0(v0: i8, v1: i8): + v2 = icmp slt v0, v1 + return v2 +} + +; VCode: +; block0: +; x0 = xslt32 x0, x1 +; ret +; +; Disassembled: +; 0: 1c 00 00 01 xslt32 x0, x0, x1 +; 4: 00 ret + +function %i16_slt(i16, i16) -> i8 { +block0(v0: i16, v1: i16): + v2 = icmp slt v0, v1 + return v2 +} + +; VCode: +; block0: +; x0 = xslt32 x0, x1 +; ret +; +; Disassembled: +; 0: 1c 00 00 01 xslt32 x0, x0, x1 +; 4: 00 ret + +function %i32_slt(i32, i32) -> i8 { +block0(v0: i32, v1: i32): + v2 = icmp slt v0, v1 + return v2 +} + +; VCode: +; block0: +; x0 = xslt32 x0, x1 +; ret +; +; Disassembled: +; 0: 1c 00 00 01 xslt32 x0, x0, x1 +; 4: 00 ret + +function %i64_slt(i64, i64) -> i8 { +block0(v0: i64, v1: i64): + v2 = icmp slt v0, v1 + return v2 +} + +; VCode: +; block0: +; x0 = xslt64 x0, x1 +; ret +; +; Disassembled: +; 0: 16 00 00 01 xslt64 x0, x0, x1 +; 4: 00 ret + +function %i8_sle(i8, i8) -> i8 { +block0(v0: i8, v1: i8): + v2 = icmp sle v0, v1 + return v2 +} + +; VCode: +; block0: +; x0 = xslteq32 x0, x1 +; ret +; +; Disassembled: +; 0: 1d 00 00 01 xslteq32 x0, x0, x1 +; 4: 00 ret + +function %i16_sle(i16, i16) -> i8 { +block0(v0: i16, v1: i16): + v2 = icmp sle v0, v1 + return v2 +} + +; VCode: +; block0: +; x0 = xslteq32 x0, x1 +; ret +; +; Disassembled: +; 0: 1d 00 00 01 xslteq32 x0, x0, x1 +; 4: 00 ret + +function %i32_sle(i32, i32) -> i8 { +block0(v0: i32, v1: i32): + v2 = icmp sle v0, v1 + return v2 +} + +; VCode: +; block0: +; x0 = xslteq32 x0, x1 +; ret +; +; Disassembled: +; 0: 1d 00 00 01 xslteq32 x0, x0, x1 +; 4: 00 ret + +function %i64_sle(i64, i64) -> i8 { +block0(v0: i64, v1: i64): + v2 = icmp sle v0, v1 + return v2 +} + +; VCode: +; block0: +; x0 = xslteq64 x0, x1 +; ret +; +; Disassembled: +; 0: 17 00 00 01 xslteq64 x0, x0, x1 +; 4: 00 ret + +function %i8_ugt(i8, i8) -> i8 { +block0(v0: i8, v1: i8): + v2 = icmp ugt v0, v1 + return v2 +} + +; VCode: +; block0: +; x0 = xult32 x1, x0 +; ret +; +; Disassembled: +; 0: 1e 00 01 00 xult32 x0, x1, x0 +; 4: 00 ret + +function %i16_ugt(i16, i16) -> i8 { +block0(v0: i16, v1: i16): + v2 = icmp ugt v0, v1 + return v2 +} + +; VCode: +; block0: +; x0 = xult32 x1, x0 +; ret +; +; Disassembled: +; 0: 1e 00 01 00 xult32 x0, x1, x0 +; 4: 00 ret + +function %i32_ugt(i32, i32) -> i8 { +block0(v0: i32, v1: i32): + v2 = icmp ugt v0, v1 + return v2 +} + +; VCode: +; block0: +; x0 = xult32 x1, x0 +; ret +; +; Disassembled: +; 0: 1e 00 01 00 xult32 x0, x1, x0 +; 4: 00 ret + +function %i64_ugt(i64, i64) -> i8 { +block0(v0: i64, v1: i64): + v2 = icmp ugt v0, v1 + return v2 +} + +; VCode: +; block0: +; x0 = xult64 x1, x0 +; ret +; +; Disassembled: +; 0: 18 00 01 00 xult64 x0, x1, x0 +; 4: 00 ret + +function %i8_sgt(i8, i8) -> i8 { +block0(v0: i8, v1: i8): + v2 = icmp sgt v0, v1 + return v2 +} + +; VCode: +; block0: +; x0 = xslt32 x1, x0 +; ret +; +; Disassembled: +; 0: 1c 00 01 00 xslt32 x0, x1, x0 +; 4: 00 ret + +function %i16_sgt(i16, i16) -> i8 { +block0(v0: i16, v1: i16): + v2 = icmp sgt v0, v1 + return v2 +} + +; VCode: +; block0: +; x0 = xslt32 x1, x0 +; ret +; +; Disassembled: +; 0: 1c 00 01 00 xslt32 x0, x1, x0 +; 4: 00 ret + +function %i32_sgt(i32, i32) -> i8 { +block0(v0: i32, v1: i32): + v2 = icmp sgt v0, v1 + return v2 +} + +; VCode: +; block0: +; x0 = xslt32 x1, x0 +; ret +; +; Disassembled: +; 0: 1c 00 01 00 xslt32 x0, x1, x0 +; 4: 00 ret + +function %i64_sgt(i64, i64) -> i8 { +block0(v0: i64, v1: i64): + v2 = icmp sgt v0, v1 + return v2 +} + +; VCode: +; block0: +; x0 = xslt64 x1, x0 +; ret +; +; Disassembled: +; 0: 16 00 01 00 xslt64 x0, x1, x0 +; 4: 00 ret + +function %i8_uge(i8, i8) -> i8 { +block0(v0: i8, v1: i8): + v2 = icmp uge v0, v1 + return v2 +} + +; VCode: +; block0: +; x0 = xulteq32 x1, x0 +; ret +; +; Disassembled: +; 0: 1f 00 01 00 xulteq32 x0, x1, x0 +; 4: 00 ret + +function %i16_uge(i16, i16) -> i8 { +block0(v0: i16, v1: i16): + v2 = icmp uge v0, v1 + return v2 +} + +; VCode: +; block0: +; x0 = xulteq32 x1, x0 +; ret +; +; Disassembled: +; 0: 1f 00 01 00 xulteq32 x0, x1, x0 +; 4: 00 ret + +function %i32_uge(i32, i32) -> i8 { +block0(v0: i32, v1: i32): + v2 = icmp uge v0, v1 + return v2 +} + +; VCode: +; block0: +; x0 = xulteq32 x1, x0 +; ret +; +; Disassembled: +; 0: 1f 00 01 00 xulteq32 x0, x1, x0 +; 4: 00 ret + +function %i64_uge(i64, i64) -> i8 { +block0(v0: i64, v1: i64): + v2 = icmp uge v0, v1 + return v2 +} + +; VCode: +; block0: +; x0 = xulteq64 x1, x0 +; ret +; +; Disassembled: +; 0: 19 00 01 00 xulteq64 x0, x1, x0 +; 4: 00 ret + +function %i8_sge(i8, i8) -> i8 { +block0(v0: i8, v1: i8): + v2 = icmp sge v0, v1 + return v2 +} + +; VCode: +; block0: +; x0 = xslteq32 x1, x0 +; ret +; +; Disassembled: +; 0: 1d 00 01 00 xslteq32 x0, x1, x0 +; 4: 00 ret + +function %i16_sge(i16, i16) -> i8 { +block0(v0: i16, v1: i16): + v2 = icmp sge v0, v1 + return v2 +} + +; VCode: +; block0: +; x0 = xslteq32 x1, x0 +; ret +; +; Disassembled: +; 0: 1d 00 01 00 xslteq32 x0, x1, x0 +; 4: 00 ret + +function %i32_sge(i32, i32) -> i8 { +block0(v0: i32, v1: i32): + v2 = icmp sge v0, v1 + return v2 +} + +; VCode: +; block0: +; x0 = xslteq32 x1, x0 +; ret +; +; Disassembled: +; 0: 1d 00 01 00 xslteq32 x0, x1, x0 +; 4: 00 ret + +function %i64_sge(i64, i64) -> i8 { +block0(v0: i64, v1: i64): + v2 = icmp sge v0, v1 + return v2 +} + +; VCode: +; block0: +; x0 = xslteq64 x1, x0 +; ret +; +; Disassembled: +; 0: 17 00 01 00 xslteq64 x0, x1, x0 +; 4: 00 ret + diff --git a/cranelift/filetests/filetests/isa/pulley64/iconst.clif b/cranelift/filetests/filetests/isa/pulley64/iconst.clif new file mode 100644 index 0000000000..36ae9b0095 --- /dev/null +++ b/cranelift/filetests/filetests/isa/pulley64/iconst.clif @@ -0,0 +1,63 @@ +test compile precise-output +target pulley64 + +function %i8() -> i8 { +block0: + v0 = iconst.i8 0xff + return v0 +} + +; VCode: +; block0: +; x0 = xconst16 255 +; ret +; +; Disassembled: +; 0: 0f 00 ff 00 xconst16 x0, 255 +; 4: 00 ret + +function %i16() -> i16 { +block0: + v0 = iconst.i16 0xffff + return v0 +} + +; VCode: +; block0: +; x0 = xconst32 65535 +; ret +; +; Disassembled: +; 0: 10 00 ff ff 00 00 xconst32 x0, 65535 +; 6: 00 ret + +function %i32() -> i32 { +block0: + v0 = iconst.i32 0xffff_ffff + return v0 +} + +; VCode: +; block0: +; x0 = xconst32 -1 +; ret +; +; Disassembled: +; 0: 10 00 ff ff ff ff xconst32 x0, -1 +; 6: 00 ret + +function %i64() -> i64 { +block0: + v0 = iconst.i64 0xffff_ffff_ffff_ffff + return v0 +} + +; VCode: +; block0: +; x0 = xconst64 -1 +; ret +; +; Disassembled: +; 0: 11 00 ff ff ff ff ff ff ff ff xconst64 x0, -1 +; a: 00 ret + diff --git a/cranelift/filetests/filetests/isa/pulley64/jump.clif b/cranelift/filetests/filetests/isa/pulley64/jump.clif new file mode 100644 index 0000000000..011984697d --- /dev/null +++ b/cranelift/filetests/filetests/isa/pulley64/jump.clif @@ -0,0 +1,38 @@ +test compile precise-output +target pulley64 + +function %jump(i8) -> i8 { +block0(v0: i8): + brif v0, block1, block2 + +block1: + v1 = iconst.i8 1 + jump block3(v1) + +block2: + v2 = iconst.i8 0 + jump block3(v2) + +block3(v3: i8): + return v3 +} + +; VCode: +; block0: +; br_if x0, label2; jump label1 +; block1: +; x0 = xconst8 0 +; jump label3 +; block2: +; x0 = xconst8 1 +; jump label3 +; block3: +; ret +; +; Disassembled: +; 0: 03 00 0e 00 00 00 br_if x0, 0xe // target = 0xe +; 6: 0e 00 00 xconst8 x0, 0 +; 9: 02 08 00 00 00 jump 0x8 // target = 0x11 +; e: 0e 00 01 xconst8 x0, 1 +; 11: 00 ret + diff --git a/cranelift/filetests/filetests/isa/pulley64/load.clif b/cranelift/filetests/filetests/isa/pulley64/load.clif new file mode 100644 index 0000000000..a692da083b --- /dev/null +++ b/cranelift/filetests/filetests/isa/pulley64/load.clif @@ -0,0 +1,62 @@ +test compile precise-output +target pulley64 + +function %load_i32(i64) -> i32 { +block0(v0: i64): + v1 = load.i32 v0 + return v1 +} + +; VCode: +; block0: +; x0 = load32_u x0+0 // flags = +; ret +; +; Disassembled: +; 0: 20 00 00 load32_u x0, x0 +; 3: 00 ret + +function %load_i64(i64) -> i64 { +block0(v0: i64): + v1 = load.i64 v0 + return v1 +} + +; VCode: +; block0: +; x0 = load64_u x0+0 // flags = +; ret +; +; Disassembled: +; 0: 22 00 00 load64 x0, x0 +; 3: 00 ret + +function %load_i32_with_offset(i64) -> i32 { +block0(v0: i64): + v1 = load.i32 v0+4 + return v1 +} + +; VCode: +; block0: +; x0 = load32_u x0+4 // flags = +; ret +; +; Disassembled: +; 0: 23 00 00 04 load32_u_offset8 x0, x0, 4 +; 4: 00 ret + +function %load_i64_with_offset(i64) -> i64 { +block0(v0: i64): + v1 = load.i64 v0+8 + return v1 +} + +; VCode: +; block0: +; x0 = load64_u x0+8 // flags = +; ret +; +; Disassembled: +; 0: 25 00 00 08 load64_offset8 x0, x0, 8 +; 4: 00 ret diff --git a/cranelift/filetests/filetests/isa/pulley64/store.clif b/cranelift/filetests/filetests/isa/pulley64/store.clif new file mode 100644 index 0000000000..bec7e1a773 --- /dev/null +++ b/cranelift/filetests/filetests/isa/pulley64/store.clif @@ -0,0 +1,63 @@ +test compile precise-output +target pulley64 + +function %store_i32(i32, i64) { +block0(v0: i32, v1: i64): + store v0, v1 + return +} + +; VCode: +; block0: +; store32 x1+0, x0 // flags = +; ret +; +; Disassembled: +; 0: 29 01 00 store32 x1, x0 +; 3: 00 ret + +function %store_i64(i64, i64) { +block0(v0: i64, v1: i64): + store v0, v1 + return +} + +; VCode: +; block0: +; store64 x1+0, x0 // flags = +; ret +; +; Disassembled: +; 0: 2a 01 00 store64 x1, x0 +; 3: 00 ret + +function %store_i32_with_offset(i32, i64) { +block0(v0: i32, v1: i64): + store v0, v1+4 + return +} + +; VCode: +; block0: +; store32 x1+4, x0 // flags = +; ret +; +; Disassembled: +; 0: 2b 01 04 00 store32_offset8 x1, 4, x0 +; 4: 00 ret + +function %store_i64_with_offset(i64, i64) { +block0(v0: i64, v1: i64): + store v0, v1+8 + return +} + +; VCode: +; block0: +; store64 x1+8, x0 // flags = +; ret +; +; Disassembled: +; 0: 2c 01 08 00 store64_offset8 x1, 8, x0 +; 4: 00 ret + diff --git a/cranelift/filetests/filetests/isa/pulley64/trap.clif b/cranelift/filetests/filetests/isa/pulley64/trap.clif new file mode 100644 index 0000000000..9216fb9a5c --- /dev/null +++ b/cranelift/filetests/filetests/isa/pulley64/trap.clif @@ -0,0 +1,65 @@ +test compile precise-output +target pulley64 + +function %trap() { +block0: + trap user0 +} + +; VCode: +; block0: +; trap // code = User(0) +; +; Disassembled: +; 0: 33 00 00 trap + +function %trapnz(i64) { +block0(v0: i64): + v1 = iconst.i64 42 + v2 = icmp eq v0, v1 + trapnz v2, user0 + return +} + +; VCode: +; block0: +; x3 = xconst8 42 +; x3 = xeq64 x0, x3 +; br_if x3, label2; jump label1 +; block1: +; ret +; block2: +; trap // code = User(0) +; +; Disassembled: +; 0: 0e 03 2a xconst8 x3, 42 +; 3: 14 03 00 03 xeq64 x3, x0, x3 +; 7: 03 03 07 00 00 00 br_if x3, 0x7 // target = 0xe +; d: 00 ret +; e: 33 00 00 trap + +function %trapz(i64) { +block0(v0: i64): + v1 = iconst.i64 42 + v2 = icmp eq v0, v1 + trapz v2, user0 + return +} + +; VCode: +; block0: +; x3 = xconst8 42 +; x3 = xeq64 x0, x3 +; br_if x3, label2; jump label1 +; block2: +; ret +; block1: +; trap // code = User(0) +; +; Disassembled: +; 0: 0e 03 2a xconst8 x3, 42 +; 3: 14 03 00 03 xeq64 x3, x0, x3 +; 7: 04 03 07 00 00 00 br_if_not x3, 0x7 // target = 0xe +; d: 00 ret +; e: 33 00 00 trap + diff --git a/cranelift/filetests/src/test_compile.rs b/cranelift/filetests/src/test_compile.rs index c06875bd80..a99343b800 100644 --- a/cranelift/filetests/src/test_compile.rs +++ b/cranelift/filetests/src/test_compile.rs @@ -73,10 +73,19 @@ impl SubTest for TestCompile { info!("Generated {} bytes of code:\n{}", total_size, vcode); if self.precise_output { - let cs = isa - .to_capstone() - .map_err(|e| anyhow::format_err!("{}", e))?; - let dis = compiled_code.disassemble(Some(¶ms), &cs)?; + let dis = match isa.triple().architecture { + target_lexicon::Architecture::Pulley32 | target_lexicon::Architecture::Pulley64 => { + pulley_interpreter::disas::Disassembler::disassemble_all( + compiled_code.buffer.data(), + )? + } + _ => { + let cs = isa + .to_capstone() + .map_err(|e| anyhow::format_err!("{}", e))?; + compiled_code.disassemble(Some(¶ms), &cs)? + } + }; let actual = Vec::from_iter( std::iter::once("VCode:") @@ -84,6 +93,7 @@ impl SubTest for TestCompile { .chain(["", "Disassembled:"]) .chain(dis.lines()), ); + check_precise_output(&actual, context) } else { run_filecheck(&vcode, context) diff --git a/pulley/src/interp.rs b/pulley/src/interp.rs index 9b221f56c3..f861db27ac 100644 --- a/pulley/src/interp.rs +++ b/pulley/src/interp.rs @@ -74,7 +74,7 @@ impl Vm { rets: impl IntoIterator + 'a, ) -> Result + 'a, *mut u8> { // NB: make sure this method stays in sync with - // `Pbc64MachineDeps::compute_arg_locs`! + // `PulleyMachineDeps::compute_arg_locs`! let mut x_args = (0..16).map(|x| XReg::unchecked_new(x)); let mut f_args = (0..16).map(|f| FReg::unchecked_new(f)); diff --git a/scripts/publish.rs b/scripts/publish.rs index b3fcf21541..c5fbac21e8 100644 --- a/scripts/publish.rs +++ b/scripts/publish.rs @@ -17,8 +17,9 @@ use std::time::Duration; // note that this list must be topologically sorted by dependencies const CRATES_TO_PUBLISH: &[&str] = &[ - // cranelift + // pulley "pulley-interpreter", + // cranelift "cranelift-bitset", "cranelift-isle", "cranelift-entity", @@ -140,6 +141,7 @@ fn main() { crates.push(root); find_crates("crates".as_ref(), &ws, &mut crates); find_crates("cranelift".as_ref(), &ws, &mut crates); + find_crates("pulley".as_ref(), &ws, &mut crates); find_crates("winch".as_ref(), &ws, &mut crates); let pos = CRATES_TO_PUBLISH diff --git a/supply-chain/audits.toml b/supply-chain/audits.toml index c2bc0c4794..dc0f411985 100644 --- a/supply-chain/audits.toml +++ b/supply-chain/audits.toml @@ -3833,6 +3833,12 @@ user-id = 3618 # David Tolnay (dtolnay) start = "2019-04-23" end = "2025-05-06" +[[trusted.pulley-interpreter]] +criteria = "safe-to-deploy" +user-id = 696 # Nick Fitzgerald (fitzgen) +start = "2024-07-30" +end = "2025-08-08" + [[trusted.quote]] criteria = "safe-to-deploy" user-id = 3618 # David Tolnay (dtolnay) diff --git a/supply-chain/imports.lock b/supply-chain/imports.lock index 449085802e..c3afbb6691 100644 --- a/supply-chain/imports.lock +++ b/supply-chain/imports.lock @@ -145,6 +145,10 @@ audited_as = "0.110.1" version = "0.112.0" audited_as = "0.110.1" +[[unpublished.pulley-interpreter]] +version = "0.1.0" +audited_as = "0.0.0" + [[unpublished.wasi-common]] version = "24.0.0" audited_as = "23.0.1" @@ -810,6 +814,13 @@ user-id = 3618 user-login = "dtolnay" user-name = "David Tolnay" +[[publisher.pulley-interpreter]] +version = "0.0.0" +when = "2024-07-30" +user-id = 696 +user-login = "fitzgen" +user-name = "Nick Fitzgerald" + [[publisher.quote]] version = "1.0.36" when = "2024-04-10"