diff --git a/cranelift/codegen/src/isa/aarch64/inst/emit.rs b/cranelift/codegen/src/isa/aarch64/inst/emit.rs index 0784cdeb01..d2277b4ba6 100644 --- a/cranelift/codegen/src/isa/aarch64/inst/emit.rs +++ b/cranelift/codegen/src/isa/aarch64/inst/emit.rs @@ -1369,6 +1369,7 @@ impl MachInstEmit for Inst { } &Inst::MovFromPReg { rd, rm } => { let rd = allocs.next_writable(rd); + allocs.next_fixed_nonallocatable(rm); let rm: Reg = rm.into(); debug_assert!([ regs::fp_reg(), @@ -1383,6 +1384,7 @@ impl MachInstEmit for Inst { Inst::Mov { size, rd, rm }.emit(&[], sink, emit_info, state); } &Inst::MovToPReg { rd, rm } => { + allocs.next_fixed_nonallocatable(rd); let rd: Writable = Writable::from_reg(rd.into()); let rm = allocs.next(rm); debug_assert!([ diff --git a/cranelift/codegen/src/isa/aarch64/inst/mod.rs b/cranelift/codegen/src/isa/aarch64/inst/mod.rs index 29aeafc0ad..7e7c76f573 100644 --- a/cranelift/codegen/src/isa/aarch64/inst/mod.rs +++ b/cranelift/codegen/src/isa/aarch64/inst/mod.rs @@ -655,25 +655,13 @@ fn aarch64_get_operands VReg>(inst: &Inst, collector: &mut Operan collector.reg_use(rm); } &Inst::MovFromPReg { rd, rm } => { - debug_assert!([ - regs::fp_reg(), - regs::stack_reg(), - regs::link_reg(), - regs::pinned_reg() - ] - .contains(&rm.into())); debug_assert!(rd.to_reg().is_virtual()); collector.reg_def(rd); + collector.reg_fixed_nonallocatable(rm); } &Inst::MovToPReg { rd, rm } => { - debug_assert!([ - regs::fp_reg(), - regs::stack_reg(), - regs::link_reg(), - regs::pinned_reg() - ] - .contains(&rd.into())); debug_assert!(rm.is_virtual()); + collector.reg_fixed_nonallocatable(rd); collector.reg_use(rm); } &Inst::MovK { rd, rn, .. } => { @@ -1568,10 +1556,12 @@ impl Inst { } &Inst::MovFromPReg { rd, rm } => { let rd = pretty_print_ireg(rd.to_reg(), OperandSize::Size64, allocs); + allocs.next_fixed_nonallocatable(rm); let rm = show_ireg_sized(rm.into(), OperandSize::Size64); format!("mov {}, {}", rd, rm) } &Inst::MovToPReg { rd, rm } => { + allocs.next_fixed_nonallocatable(rd); let rd = show_ireg_sized(rd.into(), OperandSize::Size64); let rm = pretty_print_ireg(rm, OperandSize::Size64, allocs); format!("mov {}, {}", rd, rm) diff --git a/cranelift/codegen/src/isa/aarch64/mod.rs b/cranelift/codegen/src/isa/aarch64/mod.rs index 445dddfce3..72459c89e9 100644 --- a/cranelift/codegen/src/isa/aarch64/mod.rs +++ b/cranelift/codegen/src/isa/aarch64/mod.rs @@ -57,20 +57,11 @@ impl AArch64Backend { fn compile_vcode( &self, func: &Function, - flags: shared_settings::Flags, ) -> CodegenResult<(VCode, regalloc2::Output)> { - let emit_info = EmitInfo::new(flags.clone()); + let emit_info = EmitInfo::new(self.flags.clone()); let sigs = SigSet::new::(func, &self.flags)?; let abi = abi::AArch64Callee::new(func, self, &self.isa_flags, &sigs)?; - compile::compile::( - func, - flags, - self, - abi, - &self.machine_env, - emit_info, - sigs, - ) + compile::compile::(func, self, abi, emit_info, sigs) } } @@ -80,10 +71,13 @@ impl TargetIsa for AArch64Backend { func: &Function, want_disasm: bool, ) -> CodegenResult { - let flags = self.flags(); - let (vcode, regalloc_result) = self.compile_vcode(func, flags.clone())?; + let (vcode, regalloc_result) = self.compile_vcode(func)?; - let emit_result = vcode.emit(®alloc_result, want_disasm, flags.machine_code_cfg_info()); + let emit_result = vcode.emit( + ®alloc_result, + want_disasm, + self.flags.machine_code_cfg_info(), + ); let frame_size = emit_result.frame_size; let value_labels_ranges = emit_result.value_labels_ranges; let buffer = emit_result.buffer.finish(); @@ -119,6 +113,10 @@ impl TargetIsa for AArch64Backend { &self.flags } + fn machine_env(&self) -> &MachineEnv { + &self.machine_env + } + fn isa_flags(&self) -> Vec { self.isa_flags.iter().collect() } diff --git a/cranelift/codegen/src/isa/mod.rs b/cranelift/codegen/src/isa/mod.rs index c319433f5d..68fef2da6f 100644 --- a/cranelift/codegen/src/isa/mod.rs +++ b/cranelift/codegen/src/isa/mod.rs @@ -227,6 +227,9 @@ pub trait TargetIsa: fmt::Display + Send + Sync { /// Get the ISA-independent flags that were used to make this trait object. fn flags(&self) -> &settings::Flags; + /// Get the ISA-dependent MachineEnv for managing register allocation. + fn machine_env(&self) -> ®alloc2::MachineEnv; + /// Get the ISA-dependent flag values that were used to make this trait object. fn isa_flags(&self) -> Vec; diff --git a/cranelift/codegen/src/isa/riscv64/mod.rs b/cranelift/codegen/src/isa/riscv64/mod.rs index ffff098611..5f10175514 100644 --- a/cranelift/codegen/src/isa/riscv64/mod.rs +++ b/cranelift/codegen/src/isa/riscv64/mod.rs @@ -57,12 +57,11 @@ impl Riscv64Backend { fn compile_vcode( &self, func: &Function, - flags: shared_settings::Flags, ) -> CodegenResult<(VCode, regalloc2::Output)> { - let emit_info = EmitInfo::new(flags.clone(), self.isa_flags.clone()); + let emit_info = EmitInfo::new(self.flags.clone(), self.isa_flags.clone()); let sigs = SigSet::new::(func, &self.flags)?; let abi = abi::Riscv64Callee::new(func, self, &self.isa_flags, &sigs)?; - compile::compile::(func, flags, self, abi, &self.mach_env, emit_info, sigs) + compile::compile::(func, self, abi, emit_info, sigs) } } @@ -72,11 +71,14 @@ impl TargetIsa for Riscv64Backend { func: &Function, want_disasm: bool, ) -> CodegenResult { - let flags = self.flags(); - let (vcode, regalloc_result) = self.compile_vcode(func, flags.clone())?; + let (vcode, regalloc_result) = self.compile_vcode(func)?; let want_disasm = want_disasm || log::log_enabled!(log::Level::Debug); - let emit_result = vcode.emit(®alloc_result, want_disasm, flags.machine_code_cfg_info()); + let emit_result = vcode.emit( + ®alloc_result, + want_disasm, + self.flags.machine_code_cfg_info(), + ); let frame_size = emit_result.frame_size; let value_labels_ranges = emit_result.value_labels_ranges; let buffer = emit_result.buffer.finish(); @@ -115,6 +117,10 @@ impl TargetIsa for Riscv64Backend { &self.flags } + fn machine_env(&self) -> &MachineEnv { + &self.mach_env + } + fn isa_flags(&self) -> Vec { self.isa_flags.iter().collect() } diff --git a/cranelift/codegen/src/isa/s390x/mod.rs b/cranelift/codegen/src/isa/s390x/mod.rs index a10ad17bef..cc45c5c012 100644 --- a/cranelift/codegen/src/isa/s390x/mod.rs +++ b/cranelift/codegen/src/isa/s390x/mod.rs @@ -60,15 +60,7 @@ impl S390xBackend { let emit_info = EmitInfo::new(self.isa_flags.clone()); let sigs = SigSet::new::(func, &self.flags)?; let abi = abi::S390xCallee::new(func, self, &self.isa_flags, &sigs)?; - compile::compile::( - func, - self.flags.clone(), - self, - abi, - &self.machine_env, - emit_info, - sigs, - ) + compile::compile::(func, self, abi, emit_info, sigs) } } @@ -117,6 +109,10 @@ impl TargetIsa for S390xBackend { &self.flags } + fn machine_env(&self) -> &MachineEnv { + &self.machine_env + } + fn isa_flags(&self) -> Vec { self.isa_flags.iter().collect() } diff --git a/cranelift/codegen/src/isa/x64/inst/emit.rs b/cranelift/codegen/src/isa/x64/inst/emit.rs index 32f284a8c0..d773ae3816 100644 --- a/cranelift/codegen/src/isa/x64/inst/emit.rs +++ b/cranelift/codegen/src/isa/x64/inst/emit.rs @@ -699,6 +699,7 @@ pub(crate) fn emit( } Inst::MovFromPReg { src, dst } => { + allocs.next_fixed_nonallocatable(*src); let src: Reg = (*src).into(); debug_assert!([regs::rsp(), regs::rbp(), regs::pinned_reg()].contains(&src)); let src = Gpr::new(src).unwrap(); @@ -711,6 +712,7 @@ pub(crate) fn emit( Inst::MovToPReg { src, dst } => { let src = allocs.next(src.to_reg()); let src = Gpr::new(src).unwrap(); + allocs.next_fixed_nonallocatable(*dst); let dst: Reg = (*dst).into(); debug_assert!([regs::rsp(), regs::rbp(), regs::pinned_reg()].contains(&dst)); let dst = WritableGpr::from_writable_reg(Writable::from_reg(dst)).unwrap(); diff --git a/cranelift/codegen/src/isa/x64/inst/mod.rs b/cranelift/codegen/src/isa/x64/inst/mod.rs index 6ee5e7a26a..34a9df84ae 100644 --- a/cranelift/codegen/src/isa/x64/inst/mod.rs +++ b/cranelift/codegen/src/isa/x64/inst/mod.rs @@ -1263,6 +1263,7 @@ impl PrettyPrint for Inst { } Inst::MovFromPReg { src, dst } => { + allocs.next_fixed_nonallocatable(*src); let src: Reg = (*src).into(); let src = regs::show_ireg_sized(src, 8); let dst = pretty_print_reg(dst.to_reg().to_reg(), 8, allocs); @@ -1271,6 +1272,7 @@ impl PrettyPrint for Inst { Inst::MovToPReg { src, dst } => { let src = pretty_print_reg(src.to_reg(), 8, allocs); + allocs.next_fixed_nonallocatable(*dst); let dst: Reg = (*dst).into(); let dst = regs::show_ireg_sized(dst, 8); format!("{} {}, {}", ljustify("movq".to_string()), src, dst) @@ -1919,14 +1921,14 @@ fn x64_get_operands VReg>(inst: &Inst, collector: &mut OperandCol collector.reg_def(dst.to_writable_reg()); } Inst::MovFromPReg { dst, src } => { - debug_assert!([regs::rsp(), regs::rbp(), regs::pinned_reg()].contains(&(*src).into())); debug_assert!(dst.to_reg().to_reg().is_virtual()); + collector.reg_fixed_nonallocatable(*src); collector.reg_def(dst.to_writable_reg()); } Inst::MovToPReg { dst, src } => { debug_assert!(src.to_reg().is_virtual()); - debug_assert!([regs::rsp(), regs::rbp(), regs::pinned_reg()].contains(&(*dst).into())); collector.reg_use(src.to_reg()); + collector.reg_fixed_nonallocatable(*dst); } Inst::XmmToGpr { src, dst, .. } => { collector.reg_use(src.to_reg()); diff --git a/cranelift/codegen/src/isa/x64/mod.rs b/cranelift/codegen/src/isa/x64/mod.rs index 30d553ead8..a947e00f07 100644 --- a/cranelift/codegen/src/isa/x64/mod.rs +++ b/cranelift/codegen/src/isa/x64/mod.rs @@ -48,14 +48,13 @@ impl X64Backend { fn compile_vcode( &self, func: &Function, - flags: Flags, ) -> CodegenResult<(VCode, regalloc2::Output)> { // This performs lowering to VCode, register-allocates the code, computes // block layout and finalizes branches. The result is ready for binary emission. - let emit_info = EmitInfo::new(flags.clone(), self.x64_flags.clone()); + let emit_info = EmitInfo::new(self.flags.clone(), self.x64_flags.clone()); let sigs = SigSet::new::(func, &self.flags)?; let abi = abi::X64Callee::new(&func, self, &self.x64_flags, &sigs)?; - compile::compile::(&func, flags, self, abi, &self.reg_env, emit_info, sigs) + compile::compile::(&func, self, abi, emit_info, sigs) } } @@ -65,10 +64,13 @@ impl TargetIsa for X64Backend { func: &Function, want_disasm: bool, ) -> CodegenResult { - let flags = self.flags(); - let (vcode, regalloc_result) = self.compile_vcode(func, flags.clone())?; + let (vcode, regalloc_result) = self.compile_vcode(func)?; - let emit_result = vcode.emit(®alloc_result, want_disasm, flags.machine_code_cfg_info()); + let emit_result = vcode.emit( + ®alloc_result, + want_disasm, + self.flags.machine_code_cfg_info(), + ); let frame_size = emit_result.frame_size; let value_labels_ranges = emit_result.value_labels_ranges; let buffer = emit_result.buffer.finish(); @@ -96,6 +98,10 @@ impl TargetIsa for X64Backend { &self.flags } + fn machine_env(&self) -> &MachineEnv { + &self.reg_env + } + fn isa_flags(&self) -> Vec { self.x64_flags.iter().collect() } diff --git a/cranelift/codegen/src/machinst/compile.rs b/cranelift/codegen/src/machinst/compile.rs index a140842a90..bf773eb30a 100644 --- a/cranelift/codegen/src/machinst/compile.rs +++ b/cranelift/codegen/src/machinst/compile.rs @@ -7,24 +7,31 @@ use crate::timing; use crate::trace; use regalloc2::RegallocOptions; -use regalloc2::{self, MachineEnv}; /// Compile the given function down to VCode with allocated registers, ready /// for binary emission. pub fn compile( f: &Function, - flags: crate::settings::Flags, b: &B, abi: Callee<<::MInst as MachInst>::ABIMachineSpec>, - machine_env: &MachineEnv, emit_info: ::Info, sigs: SigSet, ) -> CodegenResult<(VCode, regalloc2::Output)> { + let machine_env = b.machine_env(); + // Compute lowered block order. let block_order = BlockLoweringOrder::new(f); // Build the lowering context. - let lower = crate::machinst::Lower::new(f, flags, abi, emit_info, block_order, sigs)?; + let lower = crate::machinst::Lower::new( + f, + b.flags().clone(), + machine_env, + abi, + emit_info, + block_order, + sigs, + )?; // Lower the IR. let vcode = { diff --git a/cranelift/codegen/src/machinst/lower.rs b/cranelift/codegen/src/machinst/lower.rs index e09b5585c8..97fcf909eb 100644 --- a/cranelift/codegen/src/machinst/lower.rs +++ b/cranelift/codegen/src/machinst/lower.rs @@ -21,10 +21,11 @@ use crate::machinst::{ }; use crate::{trace, CodegenResult}; use alloc::vec::Vec; +use regalloc2::{MachineEnv, PRegSet}; use smallvec::{smallvec, SmallVec}; use std::fmt::Debug; -use super::{VCodeBuildDirection, VRegAllocator}; +use super::{preg_set_from_machine_env, VCodeBuildDirection, VRegAllocator}; /// An "instruction color" partitions CLIF instructions by side-effecting ops. /// All instructions with the same "color" are guaranteed not to be separated by @@ -149,6 +150,9 @@ pub struct Lower<'func, I: VCodeInst> { /// Machine-independent flags. flags: crate::settings::Flags, + /// The set of allocatable registers. + allocatable: PRegSet, + /// Lowered machine instructions. vcode: VCodeBuilder, @@ -322,11 +326,12 @@ impl<'func, I: VCodeInst> Lower<'func, I> { pub fn new( f: &'func Function, flags: crate::settings::Flags, + machine_env: &MachineEnv, abi: Callee, emit_info: I::Info, block_order: BlockLoweringOrder, sigs: SigSet, - ) -> CodegenResult> { + ) -> CodegenResult { let constants = VCodeConstants::with_capacity(f.dfg.constants.len()); let vcode = VCodeBuilder::new( sigs, @@ -412,6 +417,7 @@ impl<'func, I: VCodeInst> Lower<'func, I> { Ok(Lower { f, flags, + allocatable: preg_set_from_machine_env(machine_env), vcode, vregs, value_regs, @@ -1019,7 +1025,7 @@ impl<'func, I: VCodeInst> Lower<'func, I> { // Now that we've emitted all instructions into the // VCodeBuilder, let's build the VCode. - let vcode = self.vcode.build(self.vregs); + let vcode = self.vcode.build(self.allocatable, self.vregs); trace!("built vcode: {:?}", vcode); Ok(vcode) diff --git a/cranelift/codegen/src/machinst/reg.rs b/cranelift/codegen/src/machinst/reg.rs index 626c64e39b..a23ddcecab 100644 --- a/cranelift/codegen/src/machinst/reg.rs +++ b/cranelift/codegen/src/machinst/reg.rs @@ -4,7 +4,7 @@ use alloc::{string::String, vec::Vec}; use core::{fmt::Debug, hash::Hash}; -use regalloc2::{Allocation, Operand, PReg, PRegSet, VReg}; +use regalloc2::{Allocation, MachineEnv, Operand, PReg, PRegSet, VReg}; #[cfg(feature = "enable-serde")] use serde::{Deserialize, Serialize}; @@ -38,6 +38,26 @@ pub fn first_user_vreg_index() -> usize { PINNED_VREGS } +/// Collect the registers from a regalloc2 MachineEnv into a PRegSet. +/// TODO: remove this once it's upstreamed in regalloc2 +pub fn preg_set_from_machine_env(machine_env: &MachineEnv) -> PRegSet { + let mut regs = PRegSet::default(); + + for class in machine_env.preferred_regs_by_class.iter() { + for reg in class.iter() { + regs.add(*reg); + } + } + + for class in machine_env.non_preferred_regs_by_class.iter() { + for reg in class.iter() { + regs.add(*reg); + } + } + + regs +} + /// A register named in an instruction. This register can be either a /// virtual register or a fixed physical register. It does not have /// any constraints applied to it: those can be added later in @@ -289,21 +309,30 @@ pub struct OperandCollector<'a, F: Fn(VReg) -> VReg> { operands: &'a mut Vec, operands_start: usize, clobbers: PRegSet, + + /// The subset of physical registers that are allocatable. + allocatable: PRegSet, + renamer: F, } impl<'a, F: Fn(VReg) -> VReg> OperandCollector<'a, F> { /// Start gathering operands into one flattened operand array. - pub fn new(operands: &'a mut Vec, renamer: F) -> Self { + pub fn new(operands: &'a mut Vec, allocatable: PRegSet, renamer: F) -> Self { let operands_start = operands.len(); Self { operands, operands_start, clobbers: PRegSet::default(), + allocatable, renamer, } } + fn is_allocatable_preg(&self, reg: PReg) -> bool { + self.allocatable.contains(reg) + } + /// Add an operand. fn add_operand(&mut self, operand: Operand) { let vreg = (self.renamer)(operand.vreg()); @@ -320,6 +349,12 @@ impl<'a, F: Fn(VReg) -> VReg> OperandCollector<'a, F> { ((start, end), self.clobbers) } + /// Add a use of a fixed, nonallocatable physical register. + pub fn reg_fixed_nonallocatable(&mut self, preg: PReg) { + debug_assert!(!self.is_allocatable_preg(preg)); + self.add_operand(Operand::fixed_nonallocatable(preg)) + } + /// Add a register use, at the start of the instruction (`Before` /// position). pub fn reg_use(&mut self, reg: Reg) { @@ -434,6 +469,19 @@ impl<'a> AllocationConsumer<'a> { } } + pub fn next_fixed_nonallocatable(&mut self, preg: PReg) { + let alloc = self.allocs.next(); + let alloc = alloc.map(|alloc| { + Reg::from( + alloc + .as_reg() + .expect("Should not have gotten a stack allocation"), + ) + }); + + assert_eq!(preg, alloc.unwrap().to_real_reg().unwrap().into()); + } + pub fn next(&mut self, pre_regalloc_reg: Reg) -> Reg { let alloc = self.allocs.next(); let alloc = alloc.map(|alloc| { diff --git a/cranelift/codegen/src/machinst/vcode.rs b/cranelift/codegen/src/machinst/vcode.rs index c0423e2a26..443eab2839 100644 --- a/cranelift/codegen/src/machinst/vcode.rs +++ b/cranelift/codegen/src/machinst/vcode.rs @@ -541,7 +541,7 @@ impl VCodeBuilder { .sort_unstable_by_key(|(vreg, _, _, _)| *vreg); } - fn collect_operands(&mut self) { + fn collect_operands(&mut self, allocatable: PRegSet) { for (i, insn) in self.vcode.insts.iter().enumerate() { // Push operands from the instruction onto the operand list. // @@ -555,9 +555,10 @@ impl VCodeBuilder { // its register fields (which is slow, branchy code) once. let vreg_aliases = &self.vcode.vreg_aliases; - let mut op_collector = OperandCollector::new(&mut self.vcode.operands, |vreg| { - Self::resolve_vreg_alias_impl(vreg_aliases, vreg) - }); + let mut op_collector = + OperandCollector::new(&mut self.vcode.operands, allocatable, |vreg| { + Self::resolve_vreg_alias_impl(vreg_aliases, vreg) + }); insn.get_operands(&mut op_collector); let (ops, clobbers) = op_collector.finish(); self.vcode.operand_ranges.push(ops); @@ -586,14 +587,14 @@ impl VCodeBuilder { } /// Build the final VCode. - pub fn build(mut self, vregs: VRegAllocator) -> VCode { + pub fn build(mut self, allocatable: PRegSet, vregs: VRegAllocator) -> VCode { self.vcode.vreg_types = vregs.vreg_types; self.vcode.reftyped_vregs = vregs.reftyped_vregs; if self.direction == VCodeBuildDirection::Backward { self.reverse_and_finalize(); } - self.collect_operands(); + self.collect_operands(allocatable); // Apply register aliases to the `reftyped_vregs` list since this list // will be returned directly to `regalloc2` eventually and all