diff --git a/cranelift/codegen/src/isa/aarch64/lower.rs b/cranelift/codegen/src/isa/aarch64/lower.rs index 129b332295..b50c2a5edc 100644 --- a/cranelift/codegen/src/isa/aarch64/lower.rs +++ b/cranelift/codegen/src/isa/aarch64/lower.rs @@ -212,9 +212,14 @@ pub(crate) fn input_to_reg>( let from_bits = ty_bits(ty) as u8; let inputs = ctx.get_input(input.insn, input.input); let in_reg = if let Some(c) = inputs.constant { + let masked = if from_bits < 64 { + c & ((1u64 << from_bits) - 1) + } else { + c + }; // Generate constants fresh at each use to minimize long-range register pressure. let to_reg = ctx.alloc_tmp(Inst::rc_for_type(ty).unwrap(), ty); - for inst in Inst::gen_constant(to_reg, c, ty).into_iter() { + for inst in Inst::gen_constant(to_reg, masked, ty).into_iter() { ctx.emit(inst); } to_reg.to_reg() diff --git a/cranelift/codegen/src/isa/aarch64/lower_inst.rs b/cranelift/codegen/src/isa/aarch64/lower_inst.rs index daeb0c33c3..02f5feb998 100644 --- a/cranelift/codegen/src/isa/aarch64/lower_inst.rs +++ b/cranelift/codegen/src/isa/aarch64/lower_inst.rs @@ -1252,7 +1252,15 @@ pub(crate) fn lower_insn_to_regs>( Opcode::Trapif | Opcode::Trapff => { let trap_info = (ctx.srcloc(insn), inst_trapcode(ctx.data(insn)).unwrap()); - let cond = if op == Opcode::Trapif { + let cond = if maybe_input_insn(ctx, inputs[0], Opcode::IaddIfcout).is_some() { + let condcode = inst_condcode(ctx.data(insn)).unwrap(); + let cond = lower_condcode(condcode); + // The flags must not have been clobbered by any other + // instruction between the iadd_ifcout and this instruction, as + // verified by the CLIF validator; so we can simply use the + // flags here. + cond + } else if op == Opcode::Trapif { let condcode = inst_condcode(ctx.data(insn)).unwrap(); let cond = lower_condcode(condcode); let is_signed = condcode_is_signed(condcode); @@ -1852,6 +1860,35 @@ pub(crate) fn lower_insn_to_regs>( }); } + Opcode::IaddIfcout => { + // This is a two-output instruction that is needed for the + // legalizer's explicit heap-check sequence, among possible other + // uses. Its second output is a flags output only ever meant to + // check for overflow using the + // `backend.unsigned_add_overflow_condition()` condition. + // + // Note that the CLIF validation will ensure that no flag-setting + // operation comes between this IaddIfcout and its use (e.g., a + // Trapif). Thus, we can rely on implicit communication through the + // processor flags rather than explicitly generating flags into a + // register. We simply use the variant of the add instruction that + // sets flags (`adds`) here. + + // Ensure that the second output isn't directly called for: it + // should only be used by a flags-consuming op, which will directly + // understand this instruction and merge the comparison. + assert!(!ctx.is_reg_needed(insn, ctx.get_output(insn, 1).to_reg())); + + // Now handle the iadd as above, except use an AddS opcode that sets + // flags. + let rd = output_to_reg(ctx, outputs[0]); + let rn = input_to_reg(ctx, inputs[0], NarrowValueMode::None); + let rm = input_to_rse_imm12(ctx, inputs[1], NarrowValueMode::None); + let ty = ty.unwrap(); + let alu_op = choose_32_64(ty, ALUOp::AddS32, ALUOp::AddS64); + ctx.emit(alu_inst_imm12(alu_op, rd, rn, rm)); + } + Opcode::IaddImm | Opcode::ImulImm | Opcode::UdivImm @@ -1862,7 +1899,6 @@ pub(crate) fn lower_insn_to_regs>( | Opcode::IaddCin | Opcode::IaddIfcin | Opcode::IaddCout - | Opcode::IaddIfcout | Opcode::IaddCarry | Opcode::IaddIfcarry | Opcode::IsubBin diff --git a/cranelift/codegen/src/isa/aarch64/mod.rs b/cranelift/codegen/src/isa/aarch64/mod.rs index 1b061045c0..99239c9469 100644 --- a/cranelift/codegen/src/isa/aarch64/mod.rs +++ b/cranelift/codegen/src/isa/aarch64/mod.rs @@ -1,5 +1,6 @@ //! ARM 64-bit Instruction Set Architecture. +use crate::ir::condcodes::IntCC; use crate::ir::Function; use crate::isa::Builder as IsaBuilder; use crate::machinst::{ @@ -92,6 +93,19 @@ impl MachBackend for AArch64Backend { fn reg_universe(&self) -> &RealRegUniverse { &self.reg_universe } + + fn unsigned_add_overflow_condition(&self) -> IntCC { + // Unsigned `>=`; this corresponds to the carry flag set on aarch64, which happens on + // overflow of an add. + IntCC::UnsignedGreaterThanOrEqual + } + + fn unsigned_sub_overflow_condition(&self) -> IntCC { + // unsigned `<`; this corresponds to the carry flag cleared on aarch64, which happens on + // underflow of a subtract (aarch64 follows a carry-cleared-on-borrow convention, the + // opposite of x86). + IntCC::UnsignedLessThan + } } /// Create a new `isa::Builder`. diff --git a/cranelift/codegen/src/isa/x64/mod.rs b/cranelift/codegen/src/isa/x64/mod.rs index 0e607dcfae..3b1652cb10 100644 --- a/cranelift/codegen/src/isa/x64/mod.rs +++ b/cranelift/codegen/src/isa/x64/mod.rs @@ -5,6 +5,7 @@ use alloc::boxed::Box; use regalloc::RealRegUniverse; use target_lexicon::Triple; +use crate::ir::condcodes::IntCC; use crate::ir::Function; use crate::isa::Builder as IsaBuilder; use crate::machinst::pretty_print::ShowWithRRU; @@ -84,6 +85,18 @@ impl MachBackend for X64Backend { fn reg_universe(&self) -> &RealRegUniverse { &self.reg_universe } + + fn unsigned_add_overflow_condition(&self) -> IntCC { + // Unsigned `>=`; this corresponds to the carry flag set on x86, which happens on + // overflow of an add. + IntCC::UnsignedGreaterThanOrEqual + } + + fn unsigned_sub_overflow_condition(&self) -> IntCC { + // unsigned `>=`; this corresponds to the carry flag set on x86, which happens on + // underflow of a subtract (carry is borrow for subtract). + IntCC::UnsignedGreaterThanOrEqual + } } /// Create a new `isa::Builder`. diff --git a/cranelift/codegen/src/machinst/lower.rs b/cranelift/codegen/src/machinst/lower.rs index a4bd09cfa0..a905449221 100644 --- a/cranelift/codegen/src/machinst/lower.rs +++ b/cranelift/codegen/src/machinst/lower.rs @@ -133,7 +133,7 @@ pub trait LowerCtx { /// Get the `idx`th output register of the given IR instruction. When /// `backend.lower_inst_to_regs(ctx, inst)` is called, it is expected that /// the backend will write results to these output register(s). - fn get_output(&mut self, ir_inst: Inst, idx: usize) -> Writable; + fn get_output(&self, ir_inst: Inst, idx: usize) -> Writable; // Codegen primitives: allocate temps, emit instructions, set result registers, // ask for an input to be gen'd into a register. @@ -146,6 +146,10 @@ pub trait LowerCtx { /// `get_input()`. Codegen may not happen otherwise for the producing /// instruction if it has no side effects and no uses. fn use_input_reg(&mut self, input: LowerInput); + /// Is the given register output needed after the given instruction? Allows + /// instructions with multiple outputs to make fine-grained decisions on + /// which outputs to actually generate. + fn is_reg_needed(&self, ir_inst: Inst, reg: Reg) -> bool; /// Retrieve constant data given a handle. fn get_constant_data(&self, constant_handle: Constant) -> &ConstantData; } @@ -906,7 +910,7 @@ impl<'func, I: VCodeInst> LowerCtx for Lower<'func, I> { self.get_input_for_val(ir_inst, val) } - fn get_output(&mut self, ir_inst: Inst, idx: usize) -> Writable { + fn get_output(&self, ir_inst: Inst, idx: usize) -> Writable { let val = self.f.dfg.inst_results(ir_inst)[idx]; Writable::from_reg(self.value_regs[val]) } @@ -928,6 +932,10 @@ impl<'func, I: VCodeInst> LowerCtx for Lower<'func, I> { self.vreg_needed[input.reg.get_index()] = true; } + fn is_reg_needed(&self, ir_inst: Inst, reg: Reg) -> bool { + self.inst_needed[ir_inst] || self.vreg_needed[reg.get_index()] + } + fn get_constant_data(&self, constant_handle: Constant) -> &ConstantData { self.f.dfg.constants.get(constant_handle) } diff --git a/cranelift/codegen/src/machinst/mod.rs b/cranelift/codegen/src/machinst/mod.rs index 8a29b06c0a..380a6fe37c 100644 --- a/cranelift/codegen/src/machinst/mod.rs +++ b/cranelift/codegen/src/machinst/mod.rs @@ -308,14 +308,10 @@ pub trait MachBackend { fn reg_universe(&self) -> &RealRegUniverse; /// Machine-specific condcode info needed by TargetIsa. - fn unsigned_add_overflow_condition(&self) -> IntCC { - // TODO: this is what x86 specifies. Is this right for arm64? - IntCC::UnsignedLessThan - } + /// Condition that will be true when an IaddIfcout overflows. + fn unsigned_add_overflow_condition(&self) -> IntCC; /// Machine-specific condcode info needed by TargetIsa. - fn unsigned_sub_overflow_condition(&self) -> IntCC { - // TODO: this is what x86 specifies. Is this right for arm64? - IntCC::UnsignedLessThan - } + /// Condition that will be true when an IsubIfcout overflows. + fn unsigned_sub_overflow_condition(&self) -> IntCC; } diff --git a/cranelift/codegen/src/value_label.rs b/cranelift/codegen/src/value_label.rs index 7f2fce7cce..e3daeb0f7a 100644 --- a/cranelift/codegen/src/value_label.rs +++ b/cranelift/codegen/src/value_label.rs @@ -91,6 +91,11 @@ pub fn build_value_labels_ranges( where T: From + Deref + Ord + Copy, { + // FIXME(#1523): New-style backend does not yet have debug info. + if isa.get_mach_backend().is_some() { + return HashMap::new(); + } + let values_labels = build_value_labels_index::(func); let mut blocks = func.layout.blocks().collect::>(); diff --git a/cranelift/filetests/filetests/vcode/aarch64/constants.clif b/cranelift/filetests/filetests/vcode/aarch64/constants.clif index 80baeb1c98..09f0aeaa53 100644 --- a/cranelift/filetests/filetests/vcode/aarch64/constants.clif +++ b/cranelift/filetests/filetests/vcode/aarch64/constants.clif @@ -200,3 +200,16 @@ block0: ; nextln: mov sp, fp ; nextln: ldp fp, lr, [sp], #16 ; nextln: ret + +function %f() -> i32 { +block0: + v0 = iconst.i32 -1 + return v0 +} + +; check: stp fp, lr, [sp, #-16]! +; nextln: mov fp, sp +; nextln: orr x0, xzr, #4294967295 +; nextln: mov sp, fp +; nextln: ldp fp, lr, [sp], #16 +; nextln: ret diff --git a/cranelift/filetests/filetests/vcode/aarch64/heap_addr.clif b/cranelift/filetests/filetests/vcode/aarch64/heap_addr.clif new file mode 100644 index 0000000000..ab97cb5190 --- /dev/null +++ b/cranelift/filetests/filetests/vcode/aarch64/heap_addr.clif @@ -0,0 +1,49 @@ +test compile +target aarch64 + +function %dynamic_heap_check(i64 vmctx, i32) -> i64 { + gv0 = vmctx + gv1 = load.i32 notrap aligned gv0 + heap0 = dynamic gv0, bound gv1, offset_guard 0x1000, index_type i32 + +block0(v0: i64, v1: i32): + v2 = heap_addr.i64 heap0, v1, 0 + return v2 +} + +; check: stp fp, lr, [sp, #-16]! +; nextln: mov fp, sp +; nextln: ldur w2, [x0] +; nextln: add w2, w2, #0 +; nextln: subs wzr, w1, w2 +; nextln: b.ls label1 ; b label2 +; nextln: Block 1: +; check: add x0, x0, x1, UXTW +; nextln: mov sp, fp +; nextln: ldp fp, lr, [sp], #16 +; nextln: ret +; nextln: Block 2: +; check: udf + + +function %static_heap_check(i64 vmctx, i32) -> i64 { + gv0 = vmctx + heap0 = static gv0, bound 0x1_0000, offset_guard 0x1000, index_type i32 + +block0(v0: i64, v1: i32): + v2 = heap_addr.i64 heap0, v1, 0 + return v2 +} + +; check: stp fp, lr, [sp, #-16]! +; nextln: mov fp, sp +; nextln: subs wzr, w1, #65536 +; nextln: b.ls label1 ; b label2 +; nextln: Block 1: +; check: add x0, x0, x1, UXTW +; nextln: mov sp, fp +; nextln: ldp fp, lr, [sp], #16 +; nextln: ret +; nextln: Block 2: +; check: udf + diff --git a/tests/all/fuzzing.rs b/tests/all/fuzzing.rs index 5904458807..8dbc845b58 100644 --- a/tests/all/fuzzing.rs +++ b/tests/all/fuzzing.rs @@ -21,7 +21,7 @@ fn instantiate_empty_module_with_memory() { } #[test] -#[cfg_attr(target_arch = "aarch64", should_panic)] // FIXME(#1523) +#[cfg_attr(target_arch = "aarch64", ignore)] // FIXME(#1523) fn instantiate_module_that_compiled_to_x64_has_register_32() { let mut config = Config::new(); config.debug_info(true);