From 010e028d671c436940beaec1fbb7c5c9aec0c99d Mon Sep 17 00:00:00 2001 From: Sam Parker Date: Wed, 25 May 2022 09:19:24 +0100 Subject: [PATCH] [AArch64] Port AtomicCAS to isle (#4140) Copyright (c) 2022, Arm Limited. --- cranelift/codegen/src/isa/aarch64/inst.isle | 38 +++++++++++++++- cranelift/codegen/src/isa/aarch64/lower.isle | 28 ++++++++---- .../codegen/src/isa/aarch64/lower_inst.rs | 45 +------------------ 3 files changed, 57 insertions(+), 54 deletions(-) diff --git a/cranelift/codegen/src/isa/aarch64/inst.isle b/cranelift/codegen/src/isa/aarch64/inst.isle index 1999a21fd9..12324ab7db 100644 --- a/cranelift/codegen/src/isa/aarch64/inst.isle +++ b/cranelift/codegen/src/isa/aarch64/inst.isle @@ -244,7 +244,9 @@ (rn Reg) (ty Type)) - ;; An atomic compare-and-swap operation. This instruction is sequentially consistent. + ;; An atomic compare-and-swap operation. These instructions require the + ;; Large System Extension (LSE) ISA support (FEAT_LSE). The instructions have + ;; acquire-release semantics. (AtomicCAS (rs WritableReg) (rt Reg) @@ -2129,6 +2131,16 @@ ) dst)) +;; Helper for emitting `MInst.AtomicCAS` instructions. +(decl lse_atomic_cas (Reg Reg Reg Type) Reg) +(rule (lse_atomic_cas addr expect replace ty) + (let ( + (dst WritableReg (temp_writable_reg ty)) + (_1 Unit (emit (MInst.Mov (operand_size ty) dst expect))) + (_2 Unit (emit (MInst.AtomicCAS dst replace addr ty))) + ) + dst)) + ;; Helper for emitting `MInst.AtomicRMWLoop` instructions. ;; - Make sure that both args are in virtual regs, since in effect ;; we have to do a parallel copy to get them safely to the AtomicRMW input @@ -2145,3 +2157,27 @@ (_ Unit (emit (MInst.AtomicRMWLoop ty op))) ) (mov64_from_real 27))) + +;; Helper for emitting `MInst.AtomicCASLoop` instructions. +;; This is very similar to, but not identical to, the AtomicRmw case. Note +;; that the AtomicCASLoop sequence does its own masking, so we don't need to worry +;; about zero-extending narrow (I8/I16/I32) values here. +;; Make sure that all three args are in virtual regs. See corresponding comment +;; for `atomic_rmw_loop` above. +(decl atomic_cas_loop (Reg Reg Reg Type) Reg) +(rule (atomic_cas_loop addr expect replace ty) + (let ( + (v_addr Reg (ensure_in_vreg addr $I64)) + (v_exp Reg (ensure_in_vreg expect $I64)) + (v_rep Reg (ensure_in_vreg replace $I64)) + ;; Move the args to the preordained AtomicCASLoop input regs + (r_addr Reg (mov64_to_real 25 v_addr)) + (r_exp Reg (mov64_to_real 26 v_exp)) + (r_rep Reg (mov64_to_real 28 v_rep)) + ;; Now the AtomicCASLoop itself, implemented in the normal way, with a + ;; load-exclusive, store-exclusive loop + (_ Unit (emit (MInst.AtomicCASLoop ty))) + ) + ;; And finally, copy the preordained AtomicCASLoop output reg to its destination. + ;; Also, x24 and x28 are trashed. + (mov64_from_real 27))) diff --git a/cranelift/codegen/src/isa/aarch64/lower.isle b/cranelift/codegen/src/isa/aarch64/lower.isle index fed903523a..b298a30509 100644 --- a/cranelift/codegen/src/isa/aarch64/lower.isle +++ b/cranelift/codegen/src/isa/aarch64/lower.isle @@ -1225,39 +1225,39 @@ ;;;; Rules for `AtomicRMW` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -(rule (lower (and (use_lse) +(rule 1 (lower (and (use_lse) (has_type (valid_atomic_transaction ty) (atomic_rmw flags (AtomicRmwOp.Add) addr src)))) (lse_atomic_rmw (AtomicRMWOp.Add) addr src ty)) -(rule (lower (and (use_lse) +(rule 1 (lower (and (use_lse) (has_type (valid_atomic_transaction ty) (atomic_rmw flags (AtomicRmwOp.Xor) addr src)))) (lse_atomic_rmw (AtomicRMWOp.Eor) addr src ty)) -(rule (lower (and (use_lse) +(rule 1 (lower (and (use_lse) (has_type (valid_atomic_transaction ty) (atomic_rmw flags (AtomicRmwOp.Or) addr src)))) (lse_atomic_rmw (AtomicRMWOp.Set) addr src ty)) -(rule (lower (and (use_lse) +(rule 1 (lower (and (use_lse) (has_type (valid_atomic_transaction ty) (atomic_rmw flags (AtomicRmwOp.Smax) addr src)))) (lse_atomic_rmw (AtomicRMWOp.Smax) addr src ty)) -(rule (lower (and (use_lse) +(rule 1 (lower (and (use_lse) (has_type (valid_atomic_transaction ty) (atomic_rmw flags (AtomicRmwOp.Smin) addr src)))) (lse_atomic_rmw (AtomicRMWOp.Smin) addr src ty)) -(rule (lower (and (use_lse) +(rule 1 (lower (and (use_lse) (has_type (valid_atomic_transaction ty) (atomic_rmw flags (AtomicRmwOp.Umax) addr src)))) (lse_atomic_rmw (AtomicRMWOp.Umax) addr src ty)) -(rule (lower (and (use_lse) +(rule 1 (lower (and (use_lse) (has_type (valid_atomic_transaction ty) (atomic_rmw flags (AtomicRmwOp.Umin) addr src)))) (lse_atomic_rmw (AtomicRMWOp.Umin) addr src ty)) -(rule (lower (and (use_lse) +(rule 1 (lower (and (use_lse) (has_type (valid_atomic_transaction ty) (atomic_rmw flags (AtomicRmwOp.Sub) addr src)))) (lse_atomic_rmw (AtomicRMWOp.Add) addr (sub ty (zero_reg) src) ty)) -(rule (lower (and (use_lse) +(rule 1 (lower (and (use_lse) (has_type (valid_atomic_transaction ty) (atomic_rmw flags (AtomicRmwOp.And) addr src)))) (lse_atomic_rmw (AtomicRMWOp.Clr) addr (eon ty src (zero_reg)) ty)) @@ -1296,3 +1296,13 @@ (rule (lower (has_type (valid_atomic_transaction ty) (atomic_rmw flags (AtomicRmwOp.Xchg) addr src))) (atomic_rmw_loop (AtomicRMWLoopOp.Xchg) addr src ty)) + +;;;; Rules for `AtomicCAS` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +(rule 1 (lower (and (use_lse) + (has_type (valid_atomic_transaction ty) + (atomic_cas flags addr src1 src2)))) + (lse_atomic_cas addr src1 src2 ty)) + +(rule (lower (and (has_type (valid_atomic_transaction ty) + (atomic_cas flags addr src1 src2)))) + (atomic_cas_loop addr src1 src2 ty)) diff --git a/cranelift/codegen/src/isa/aarch64/lower_inst.rs b/cranelift/codegen/src/isa/aarch64/lower_inst.rs index f952785728..7618ed1b30 100644 --- a/cranelift/codegen/src/isa/aarch64/lower_inst.rs +++ b/cranelift/codegen/src/isa/aarch64/lower_inst.rs @@ -239,50 +239,7 @@ pub(crate) fn lower_insn_to_regs>( Opcode::AtomicRmw => implemented_in_isle(ctx), - Opcode::AtomicCas => { - let r_dst = get_output_reg(ctx, outputs[0]).only_reg().unwrap(); - let mut r_addr = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None); - let mut r_expected = put_input_in_reg(ctx, inputs[1], NarrowValueMode::None); - let mut r_replacement = put_input_in_reg(ctx, inputs[2], NarrowValueMode::None); - let ty_access = ty.unwrap(); - assert!(is_valid_atomic_transaction_ty(ty_access)); - - if isa_flags.use_lse() { - ctx.emit(Inst::gen_move(r_dst, r_expected, ty_access)); - ctx.emit(Inst::AtomicCAS { - rs: r_dst, - rt: r_replacement, - rn: r_addr, - ty: ty_access, - }); - } else { - // This is very similar to, but not identical to, the AtomicRmw case. Note - // that the AtomicCASLoop sequence does its own masking, so we don't need to worry - // about zero-extending narrow (I8/I16/I32) values here. - // Make sure that all three args are in virtual regs. See corresponding comment - // for `Opcode::AtomicRmw` above. - r_addr = ctx.ensure_in_vreg(r_addr, I64); - r_expected = ctx.ensure_in_vreg(r_expected, I64); - r_replacement = ctx.ensure_in_vreg(r_replacement, I64); - // Move the args to the preordained AtomicCASLoop input regs - ctx.emit(Inst::gen_move(Writable::from_reg(xreg(25)), r_addr, I64)); - ctx.emit(Inst::gen_move( - Writable::from_reg(xreg(26)), - r_expected, - I64, - )); - ctx.emit(Inst::gen_move( - Writable::from_reg(xreg(28)), - r_replacement, - I64, - )); - // Now the AtomicCASLoop itself, implemented in the normal way, with an LL-SC loop - ctx.emit(Inst::AtomicCASLoop { ty: ty_access }); - // And finally, copy the preordained AtomicCASLoop output reg to its destination. - ctx.emit(Inst::gen_move(r_dst, xreg(27), I64)); - // Also, x24 and x28 are trashed. `fn aarch64_get_regs` must mention that. - } - } + Opcode::AtomicCas => implemented_in_isle(ctx), Opcode::AtomicLoad => { let rt = get_output_reg(ctx, outputs[0]).only_reg().unwrap();