diff --git a/cranelift/codegen/src/isa/s390x/abi.rs b/cranelift/codegen/src/isa/s390x/abi.rs index ddbcafcadf..f5d56b7113 100644 --- a/cranelift/codegen/src/isa/s390x/abi.rs +++ b/cranelift/codegen/src/isa/s390x/abi.rs @@ -109,10 +109,10 @@ fn get_intreg_for_arg(idx: usize) -> Option { fn get_fltreg_for_arg(idx: usize) -> Option { match idx { - 0 => Some(regs::fpr(0)), - 1 => Some(regs::fpr(2)), - 2 => Some(regs::fpr(4)), - 3 => Some(regs::fpr(6)), + 0 => Some(regs::vr(0)), + 1 => Some(regs::vr(2)), + 2 => Some(regs::vr(4)), + 3 => Some(regs::vr(6)), _ => None, } } @@ -130,11 +130,11 @@ fn get_intreg_for_ret(idx: usize) -> Option { fn get_fltreg_for_ret(idx: usize) -> Option { match idx { - 0 => Some(regs::fpr(0)), + 0 => Some(regs::vr(0)), // ABI extension to support multi-value returns: - 1 => Some(regs::fpr(2)), - 2 => Some(regs::fpr(4)), - 3 => Some(regs::fpr(6)), + 1 => Some(regs::vr(2)), + 2 => Some(regs::vr(4)), + 3 => Some(regs::vr(6)), _ => None, } } @@ -736,14 +736,30 @@ const fn clobbers() -> PRegSet { .with(gpr_preg(3)) .with(gpr_preg(4)) .with(gpr_preg(5)) - .with(fpr_preg(0)) - .with(fpr_preg(1)) - .with(fpr_preg(2)) - .with(fpr_preg(3)) - .with(fpr_preg(4)) - .with(fpr_preg(5)) - .with(fpr_preg(6)) - .with(fpr_preg(7)) + .with(vr_preg(0)) + .with(vr_preg(1)) + .with(vr_preg(2)) + .with(vr_preg(3)) + .with(vr_preg(4)) + .with(vr_preg(5)) + .with(vr_preg(6)) + .with(vr_preg(7)) + .with(vr_preg(16)) + .with(vr_preg(17)) + .with(vr_preg(18)) + .with(vr_preg(19)) + .with(vr_preg(20)) + .with(vr_preg(21)) + .with(vr_preg(22)) + .with(vr_preg(23)) + .with(vr_preg(24)) + .with(vr_preg(25)) + .with(vr_preg(26)) + .with(vr_preg(27)) + .with(vr_preg(28)) + .with(vr_preg(29)) + .with(vr_preg(30)) + .with(vr_preg(31)) } const CLOBBERS: PRegSet = clobbers(); diff --git a/cranelift/codegen/src/isa/s390x/inst.isle b/cranelift/codegen/src/isa/s390x/inst.isle index 23a2631c6b..11006ed643 100644 --- a/cranelift/codegen/src/isa/s390x/inst.isle +++ b/cranelift/codegen/src/isa/s390x/inst.isle @@ -445,62 +445,68 @@ (cond Cond) (imm i16)) - ;; A 32-bit FPU move. + ;; A 32-bit FPU move possibly implemented as vector instruction. (FpuMove32 (rd WritableReg) (rn Reg)) - ;; A 64-bit FPU move. + ;; A 64-bit FPU move possibly implemented as vector instruction. (FpuMove64 (rd WritableReg) (rn Reg)) - ;; A 32-bit conditional move FPU instruction. + ;; A 32-bit conditional move FPU instruction, possibly as vector instruction. (FpuCMov32 (rd WritableReg) (cond Cond) (rm Reg)) - ;; A 64-bit conditional move FPU instruction. + ;; A 64-bit conditional move FPU instruction, possibly as vector instruction. (FpuCMov64 (rd WritableReg) (cond Cond) (rm Reg)) - ;; A 64-bit move instruction from GPR to FPR. - (MovToFpr + ;; A 32-bit move instruction from GPR to FPR or vector element. + (MovToFpr32 (rd WritableReg) (rn Reg)) - ;; A 64-bit move instruction from FPR to GPR. - (MovFromFpr + ;; A 64-bit move instruction from GPR to FPR or vector element. + (MovToFpr64 (rd WritableReg) (rn Reg)) - ;; 1-op FPU instruction. + ;; A 32-bit move instruction from FPR or vector element to GPR. + (MovFromFpr32 + (rd WritableReg) + (rn Reg)) + + ;; A 64-bit move instruction from FPR or vector element to GPR. + (MovFromFpr64 + (rd WritableReg) + (rn Reg)) + + ;; 1-op FPU instruction implemented as vector instruction with the W bit. (FpuRR (fpu_op FPUOp1) (rd WritableReg) (rn Reg)) - ;; 2-op FPU instruction. + ;; 2-op FPU instruction implemented as vector instruction with the W bit. (FpuRRR (fpu_op FPUOp2) (rd WritableReg) + (rn Reg) (rm Reg)) - ;; 3-op FPU instruction. + ;; 3-op FPU instruction implemented as vector instruction with the W bit. (FpuRRRR (fpu_op FPUOp3) (rd WritableReg) (rn Reg) - (rm Reg)) - - ;; FPU copy sign instruction. - (FpuCopysign - (rd WritableReg) - (rn Reg) - (rm Reg)) + (rm Reg) + (ra Reg)) ;; FPU comparison, single-precision (32 bit). (FpuCmp32 @@ -562,30 +568,19 @@ (rd WritableReg) (const_data u64)) - ;; Conversion FP -> integer. - (FpuToInt - (op FpuToIntOp) - (rd WritableReg) - (rn Reg)) - - ;; Conversion integer -> FP. - (IntToFpu - (op IntToFpuOp) - (rd WritableReg) - (rn Reg)) - - ;; Round to integer. + ;; 1-op FPU instruction with rounding mode. (FpuRound - (op FpuRoundMode) + (op FpuRoundOp) + (mode FpuRoundMode) (rd WritableReg) (rn Reg)) - ;; 2-op FPU instruction implemented as vector instruction with the W bit. - (FpuVecRRR - (fpu_op FPUOp2) + ;; Vector select instruction. + (VecSelect (rd WritableReg) (rn Reg) - (rm Reg)) + (rm Reg) + (ra Reg)) ;; A machine call instruction. (Call @@ -824,7 +819,6 @@ (Sqrt32) (Sqrt64) (Cvt32To64) - (Cvt64To32) )) ;; A floating-point unit (FPU) operation with two args. @@ -853,44 +847,32 @@ (MSub64) )) -;; A conversion from an FP to an integer value. -(type FpuToIntOp +;; A floating-point unit (FPU) operation with one arg, and rounding mode. +(type FpuRoundOp (enum - (F32ToU32) - (F32ToI32) - (F32ToU64) - (F32ToI64) - (F64ToU32) - (F64ToI32) - (F64ToU64) - (F64ToI64) -)) - -;; A conversion from an integer to an FP value. -(type IntToFpuOp - (enum - (U32ToF32) - (I32ToF32) - (U32ToF64) - (I32ToF64) - (U64ToF32) - (I64ToF32) - (U64ToF64) - (I64ToF64) + (Cvt64To32) + (Round32) + (Round64) + (ToSInt32) + (ToSInt64) + (ToUInt32) + (ToUInt64) + (FromSInt32) + (FromSInt64) + (FromUInt32) + (FromUInt64) )) -;; Modes for FP rounding ops: round down (floor) or up (ceil), or toward zero -;; (trunc), or to nearest, and for 32- or 64-bit FP values. +;; Rounding modes for floating-point ops. (type FpuRoundMode (enum - (Minus32) - (Minus64) - (Plus32) - (Plus64) - (Zero32) - (Zero64) - (Nearest32) - (Nearest64) + (Current) + (ToNearest) + (ShorterPrecision) + (ToNearestTiesToEven) + (ToZero) + (ToPosInfinity) + (ToNegInfinity) )) @@ -1608,22 +1590,15 @@ ;; Helper for emitting `MInst.FpuRRR` instructions. (decl fpu_rrr (Type FPUOp2 Reg Reg) Reg) (rule (fpu_rrr ty op src1 src2) - (let ((dst WritableReg (copy_writable_reg ty src1)) - (_ Unit (emit (MInst.FpuRRR op dst src2)))) + (let ((dst WritableReg (temp_writable_reg ty)) + (_ Unit (emit (MInst.FpuRRR op dst src1 src2)))) dst)) ;; Helper for emitting `MInst.FpuRRRR` instructions. (decl fpu_rrrr (Type FPUOp3 Reg Reg Reg) Reg) (rule (fpu_rrrr ty op src1 src2 src3) - (let ((dst WritableReg (copy_writable_reg ty src1)) - (_ Unit (emit (MInst.FpuRRRR op dst src2 src3)))) - dst)) - -;; Helper for emitting `MInst.FpuCopysign` instructions. -(decl fpu_copysign (Type Reg Reg) Reg) -(rule (fpu_copysign ty src1 src2) (let ((dst WritableReg (temp_writable_reg ty)) - (_ Unit (emit (MInst.FpuCopysign dst src1 src2)))) + (_ Unit (emit (MInst.FpuRRRR op dst src1 src2 src3)))) dst)) ;; Helper for emitting `MInst.FpuCmp32` instructions. @@ -1636,46 +1611,39 @@ (rule (fpu_cmp64 src1 src2) (ProducesFlags.ProducesFlagsSideEffect (MInst.FpuCmp64 src1 src2))) -;; Helper for emitting `MInst.FpuToInt` instructions. -(decl fpu_to_int (Type FpuToIntOp Reg) ProducesFlags) -(rule (fpu_to_int ty op src) - (let ((dst WritableReg (temp_writable_reg ty))) - (ProducesFlags.ProducesFlagsReturnsReg (MInst.FpuToInt op dst src) - dst))) - -;; Helper for emitting `MInst.IntToFpu` instructions. -(decl int_to_fpu (Type IntToFpuOp Reg) Reg) -(rule (int_to_fpu ty op src) - (let ((dst WritableReg (temp_writable_reg ty)) - (_ Unit (emit (MInst.IntToFpu op dst src)))) - dst)) - ;; Helper for emitting `MInst.FpuRound` instructions. -(decl fpu_round (Type FpuRoundMode Reg) Reg) -(rule (fpu_round ty mode src) +(decl fpu_round (Type FpuRoundOp FpuRoundMode Reg) Reg) +(rule (fpu_round ty op mode src) (let ((dst WritableReg (temp_writable_reg ty)) - (_ Unit (emit (MInst.FpuRound mode dst src)))) + (_ Unit (emit (MInst.FpuRound op mode dst src)))) dst)) -;; Helper for emitting `MInst.FpuVecRRR` instructions. -(decl fpuvec_rrr (Type FPUOp2 Reg Reg) Reg) -(rule (fpuvec_rrr ty op src1 src2) - (let ((dst WritableReg (temp_writable_reg ty)) - (_ Unit (emit (MInst.FpuVecRRR op dst src1 src2)))) +;; Helper for emitting `MInst.MovToFpr32` instructions. +(decl mov_to_fpr32 (Reg) Reg) +(rule (mov_to_fpr32 src) + (let ((dst WritableReg (temp_writable_reg $F32)) + (_ Unit (emit (MInst.MovToFpr32 dst src)))) dst)) -;; Helper for emitting `MInst.MovToFpr` instructions. -(decl mov_to_fpr (Reg) Reg) -(rule (mov_to_fpr src) +;; Helper for emitting `MInst.MovToFpr64` instructions. +(decl mov_to_fpr64 (Reg) Reg) +(rule (mov_to_fpr64 src) (let ((dst WritableReg (temp_writable_reg $F64)) - (_ Unit (emit (MInst.MovToFpr dst src)))) + (_ Unit (emit (MInst.MovToFpr64 dst src)))) + dst)) + +;; Helper for emitting `MInst.MovFromFpr32` instructions. +(decl mov_from_fpr32 (Reg) Reg) +(rule (mov_from_fpr32 src) + (let ((dst WritableReg (temp_writable_reg $I32)) + (_ Unit (emit (MInst.MovFromFpr32 dst src)))) dst)) -;; Helper for emitting `MInst.MovFromFpr` instructions. -(decl mov_from_fpr (Reg) Reg) -(rule (mov_from_fpr src) +;; Helper for emitting `MInst.MovFromFpr64` instructions. +(decl mov_from_fpr64 (Reg) Reg) +(rule (mov_from_fpr64 src) (let ((dst WritableReg (temp_writable_reg $I64)) - (_ Unit (emit (MInst.MovFromFpr dst src)))) + (_ Unit (emit (MInst.MovFromFpr64 dst src)))) dst)) ;; Helper for emitting `MInst.FpuLoad32` instructions. @@ -1726,6 +1694,13 @@ (rule (fpu_storerev64 src addr) (SideEffectNoResult.Inst (MInst.FpuStoreRev64 src addr))) +;; Helper for emitting `MInst.VecSelect` instructions. +(decl vec_select (Type Reg Reg Reg) Reg) +(rule (vec_select ty src1 src2 src3) + (let ((dst WritableReg (temp_writable_reg ty)) + (_ Unit (emit (MInst.VecSelect dst src1 src2 src3)))) + dst)) + ;; Helper for emitting `MInst.LoadExtNameFar` instructions. (decl load_ext_name_far (ExternalName i64) Reg) (rule (load_ext_name_far name offset) @@ -2047,6 +2022,13 @@ (_ Unit (emit_imm ty dst n))) dst)) +;; Variant used for negative constants. +(decl imm32 (Type i32) Reg) +(rule (imm32 $I64 n) + (let ((dst WritableReg (temp_writable_reg $I64)) + (_ Unit (emit (MInst.Mov64SImm32 dst n)))) + (writable_reg_to_reg dst))) + ;; Place an immediate into the low half of a register pair. ;; The high half is taken from the input. (decl imm_regpair_lo (Type u64 RegPair) RegPair) @@ -2651,6 +2633,50 @@ dst)) +;; Helpers for generating saturating integer instructions ;;;;;;;;;;;;;;;;;;;;;; + +(decl uint_sat_reg (Type Type Reg) Reg) +(rule (uint_sat_reg ty ty reg) reg) +(rule (uint_sat_reg $I8 (ty_32_or_64 ty) reg) + (with_flags_reg (icmpu_uimm32 ty reg 256) + (cmov_imm ty (intcc_as_cond (IntCC.UnsignedGreaterThan)) 255 reg))) +(rule (uint_sat_reg $I16 (ty_32_or_64 ty) reg) + (with_flags_reg (icmpu_uimm32 ty reg 65535) + (cmov_imm ty (intcc_as_cond (IntCC.UnsignedGreaterThan)) -1 reg))) +(rule (uint_sat_reg $I32 $I64 reg) + (let ((bound Reg (imm $I64 4294967295)) + (cond ProducesBool + (bool (icmpu_reg $I64 reg bound) + (intcc_as_cond (IntCC.UnsignedGreaterThan))))) + (select_bool_reg $I64 cond bound reg))) + +(decl sint_sat_reg (Type Type Reg) Reg) +(rule (sint_sat_reg ty ty reg) reg) +(rule (sint_sat_reg $I8 (ty_32_or_64 ty) reg) + (let ((ub Reg (with_flags_reg (icmps_simm16 ty reg 127) + (cmov_imm ty + (intcc_as_cond (IntCC.SignedGreaterThan)) 127 reg)))) + (with_flags_reg (icmps_simm16 ty ub -128) + (cmov_imm ty (intcc_as_cond (IntCC.SignedLessThan)) -128 ub)))) +(rule (sint_sat_reg $I16 (ty_32_or_64 ty) reg) + (let ((ub Reg (with_flags_reg (icmps_simm16 ty reg 32767) + (cmov_imm ty + (intcc_as_cond (IntCC.SignedGreaterThan)) 32767 reg)))) + (with_flags_reg (icmps_simm16 ty ub -32768) + (cmov_imm ty (intcc_as_cond (IntCC.SignedLessThan)) -32768 ub)))) +(rule (sint_sat_reg $I32 $I64 reg) + (let ((u_bound Reg (imm32 $I64 2147483647)) + (u_cond ProducesBool + (bool (icmps_reg $I64 reg u_bound) + (intcc_as_cond (IntCC.SignedGreaterThan)))) + (ub Reg (select_bool_reg $I64 u_cond u_bound reg)) + (l_bound Reg (imm32 $I64 -2147483648)) + (l_cond ProducesBool + (bool (icmps_reg $I64 ub l_bound) + (intcc_as_cond (IntCC.SignedLessThan))))) + (select_bool_reg $I64 l_cond l_bound ub))) + + ;; Helpers for generating `add` instructions ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; (decl aluop_add (Type) ALUOp) @@ -3151,7 +3177,7 @@ (rule (fpuop2_min $F64) (FPUOp2.Min64)) (decl fmin_reg (Type Reg Reg) Reg) -(rule (fmin_reg ty x y) (fpuvec_rrr ty (fpuop2_min ty) x y)) +(rule (fmin_reg ty x y) (fpu_rrr ty (fpuop2_min ty) x y)) ;; Helpers for generating `fmax` instructions ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; @@ -3161,7 +3187,7 @@ (rule (fpuop2_max $F64) (FPUOp2.Max64)) (decl fmax_reg (Type Reg Reg) Reg) -(rule (fmax_reg ty x y) (fpuvec_rrr ty (fpuop2_max ty) x y)) +(rule (fmax_reg ty x y) (fpu_rrr ty (fpuop2_max ty) x y)) ;; Helpers for generating `fma` instructions ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; @@ -3171,7 +3197,7 @@ (rule (fpuop3_fma $F64) (FPUOp3.MAdd64)) (decl fma_reg (Type Reg Reg Reg) Reg) -(rule (fma_reg ty x y acc) (fpu_rrrr ty (fpuop3_fma ty) acc x y)) +(rule (fma_reg ty x y acc) (fpu_rrrr ty (fpuop3_fma ty) x y acc)) ;; Helpers for generating `sqrt` instructions ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; @@ -3204,124 +3230,136 @@ (rule (fabs_reg ty x) (fpu_rr ty (fpuop1_abs ty) x)) -;; Helpers for generating `ceil` instructions ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; Helpers for generating `ceil`, `floor`, `trunc`, `nearest` instructions ;;;; -(decl fpuroundmode_ceil (Type) FpuRoundMode) -(rule (fpuroundmode_ceil $F32) (FpuRoundMode.Plus32)) -(rule (fpuroundmode_ceil $F64) (FpuRoundMode.Plus64)) +(decl fpuroundop_round (Type) FpuRoundOp) +(rule (fpuroundop_round $F32) (FpuRoundOp.Round32)) +(rule (fpuroundop_round $F64) (FpuRoundOp.Round64)) (decl ceil_reg (Type Reg) Reg) -(rule (ceil_reg ty x) (fpu_round ty (fpuroundmode_ceil ty) x)) - - -;; Helpers for generating `floor` instructions ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; - -(decl fpuroundmode_floor (Type) FpuRoundMode) -(rule (fpuroundmode_floor $F32) (FpuRoundMode.Minus32)) -(rule (fpuroundmode_floor $F64) (FpuRoundMode.Minus64)) +(rule (ceil_reg ty x) (fpu_round ty (fpuroundop_round ty) + (FpuRoundMode.ToPosInfinity) x)) (decl floor_reg (Type Reg) Reg) -(rule (floor_reg ty x) (fpu_round ty (fpuroundmode_floor ty) x)) - - -;; Helpers for generating `trunc` instructions ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; - -(decl fpuroundmode_trunc (Type) FpuRoundMode) -(rule (fpuroundmode_trunc $F32) (FpuRoundMode.Zero32)) -(rule (fpuroundmode_trunc $F64) (FpuRoundMode.Zero64)) +(rule (floor_reg ty x) (fpu_round ty (fpuroundop_round ty) + (FpuRoundMode.ToNegInfinity) x)) (decl trunc_reg (Type Reg) Reg) -(rule (trunc_reg ty x) (fpu_round ty (fpuroundmode_trunc ty) x)) - - -;; Helpers for generating `nearest` instructions ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; - -(decl fpuroundmode_nearest (Type) FpuRoundMode) -(rule (fpuroundmode_nearest $F32) (FpuRoundMode.Nearest32)) -(rule (fpuroundmode_nearest $F64) (FpuRoundMode.Nearest64)) +(rule (trunc_reg ty x) (fpu_round ty (fpuroundop_round ty) + (FpuRoundMode.ToZero) x)) (decl nearest_reg (Type Reg) Reg) -(rule (nearest_reg ty x) (fpu_round ty (fpuroundmode_nearest ty) x)) +(rule (nearest_reg ty x) (fpu_round ty (fpuroundop_round ty) + (FpuRoundMode.ToNearestTiesToEven) x)) ;; Helpers for generating `fpromote` instructions ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -(decl fpuop1_promote (Type Type) FPUOp1) -(rule (fpuop1_promote $F64 $F32) (FPUOp1.Cvt32To64)) - (decl fpromote_reg (Type Type Reg) Reg) -(rule (fpromote_reg dst_ty src_ty x) - (fpu_rr dst_ty (fpuop1_promote dst_ty src_ty) x)) +(rule (fpromote_reg ty ty x) x) +(rule (fpromote_reg $F64 $F32 x) + (fpu_rr $F64 (FPUOp1.Cvt32To64) x)) ;; Helpers for generating `fdemote` instructions ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -(decl fpuop1_demote (Type Type) FPUOp1) -(rule (fpuop1_demote $F32 $F64) (FPUOp1.Cvt64To32)) - -(decl fdemote_reg (Type Type Reg) Reg) -(rule (fdemote_reg dst_ty src_ty x) - (fpu_rr dst_ty (fpuop1_demote dst_ty src_ty) x)) +(decl fdemote_reg (Type Type FpuRoundMode Reg) Reg) +(rule (fdemote_reg ty ty mode x) x) +(rule (fdemote_reg $F32 $F64 mode x) + (fpu_round $F32 (FpuRoundOp.Cvt64To32) mode x)) ;; Helpers for generating `fcvt_from_uint` instructions ;;;;;;;;;;;;;;;;;;;;;;;; -(decl uint_to_fpu_op (Type Type) IntToFpuOp) -(rule (uint_to_fpu_op $F32 $I32) (IntToFpuOp.U32ToF32)) -(rule (uint_to_fpu_op $F64 $I32) (IntToFpuOp.U32ToF64)) -(rule (uint_to_fpu_op $F32 $I64) (IntToFpuOp.U64ToF32)) -(rule (uint_to_fpu_op $F64 $I64) (IntToFpuOp.U64ToF64)) +(decl uint_to_fpu_op (Type) FpuRoundOp) +(rule (uint_to_fpu_op $F32) (FpuRoundOp.FromUInt32)) +(rule (uint_to_fpu_op $F64) (FpuRoundOp.FromUInt64)) -(decl fcvt_from_uint_reg (Type Type Reg) Reg) -(rule (fcvt_from_uint_reg dst_ty src_ty x) - (int_to_fpu dst_ty (uint_to_fpu_op dst_ty src_ty) x)) +(decl fcvt_from_uint_reg (Type FpuRoundMode Reg) Reg) +(rule (fcvt_from_uint_reg ty mode x) + (fpu_round ty (uint_to_fpu_op ty) mode x)) ;; Helpers for generating `fcvt_from_sint` instructions ;;;;;;;;;;;;;;;;;;;;;;;; -(decl sint_to_fpu_op (Type Type) IntToFpuOp) -(rule (sint_to_fpu_op $F32 $I32) (IntToFpuOp.I32ToF32)) -(rule (sint_to_fpu_op $F64 $I32) (IntToFpuOp.I32ToF64)) -(rule (sint_to_fpu_op $F32 $I64) (IntToFpuOp.I64ToF32)) -(rule (sint_to_fpu_op $F64 $I64) (IntToFpuOp.I64ToF64)) +(decl sint_to_fpu_op (Type) FpuRoundOp) +(rule (sint_to_fpu_op $F32) (FpuRoundOp.FromSInt32)) +(rule (sint_to_fpu_op $F64) (FpuRoundOp.FromSInt64)) -(decl fcvt_from_sint_reg (Type Type Reg) Reg) -(rule (fcvt_from_sint_reg dst_ty src_ty x) - (int_to_fpu dst_ty (sint_to_fpu_op dst_ty src_ty) x)) +(decl fcvt_from_sint_reg (Type FpuRoundMode Reg) Reg) +(rule (fcvt_from_sint_reg ty mode x) + (fpu_round ty (sint_to_fpu_op ty) mode x)) -;; Helpers for generating `fcvt_to_uint` instructions ;;;;;;;;;;;;;;;;;;;;;;;; +;; Helpers for generating `fcvt_to_[us]int` instructions ;;;;;;;;;;;;;;;;;;;;;;; -(decl fpu_to_uint_op (Type Type) FpuToIntOp) -(rule (fpu_to_uint_op $I32 $F32) (FpuToIntOp.F32ToU32)) -(rule (fpu_to_uint_op $I32 $F64) (FpuToIntOp.F64ToU32)) -(rule (fpu_to_uint_op $I64 $F32) (FpuToIntOp.F32ToU64)) -(rule (fpu_to_uint_op $I64 $F64) (FpuToIntOp.F64ToU64)) +(decl fcvt_flt_ty (Type Type) Type) +(rule (fcvt_flt_ty (fits_in_32 ty) (and (vxrs_ext2_enabled) $F32)) $F32) +(rule (fcvt_flt_ty (fits_in_64 ty) $F32) $F64) +(rule (fcvt_flt_ty (fits_in_64 ty) $F64) $F64) -(decl fcvt_to_uint_reg_with_flags (Type Type Reg) ProducesFlags) -(rule (fcvt_to_uint_reg_with_flags dst_ty src_ty x) - (fpu_to_int dst_ty (fpu_to_uint_op dst_ty src_ty) x)) +(decl fcvt_int_ty (Type Type) Type) +(rule (fcvt_int_ty (fits_in_32 ty) (and (vxrs_ext2_enabled) $F32)) $I32) +(rule (fcvt_int_ty (fits_in_64 ty) $F32) $I64) +(rule (fcvt_int_ty (fits_in_64 ty) $F64) $I64) -(decl fcvt_to_uint_reg (Type Type Reg) Reg) -(rule (fcvt_to_uint_reg dst_ty src_ty x) - (drop_flags (fcvt_to_uint_reg_with_flags dst_ty src_ty x))) +;; Helpers for generating `fcvt_to_uint` instructions ;;;;;;;;;;;;;;;;;;;;;;;; -;; Helpers for generating `fcvt_to_sint` instructions ;;;;;;;;;;;;;;;;;;;;;;;; +(decl fcvt_to_uint_reg (Type FpuRoundMode Reg) Reg) +(rule (fcvt_to_uint_reg $F32 mode x) + (mov_from_fpr32 (fpu_round $F32 (FpuRoundOp.ToUInt32) mode x))) +(rule (fcvt_to_uint_reg $F64 mode x) + (mov_from_fpr64 (fpu_round $F64 (FpuRoundOp.ToUInt64) mode x))) -(decl fpu_to_sint_op (Type Type) FpuToIntOp) -(rule (fpu_to_sint_op $I32 $F32) (FpuToIntOp.F32ToI32)) -(rule (fpu_to_sint_op $I32 $F64) (FpuToIntOp.F64ToI32)) -(rule (fpu_to_sint_op $I64 $F32) (FpuToIntOp.F32ToI64)) -(rule (fpu_to_sint_op $I64 $F64) (FpuToIntOp.F64ToI64)) +(decl fcvt_to_uint_ub (Type Type) Reg) +(rule (fcvt_to_uint_ub $F32 dst_ty) + (imm $F32 (fcvt_to_uint_ub32 (ty_bits dst_ty)))) +(rule (fcvt_to_uint_ub $F64 dst_ty) + (imm $F64 (fcvt_to_uint_ub64 (ty_bits dst_ty)))) -(decl fcvt_to_sint_reg_with_flags (Type Type Reg) ProducesFlags) -(rule (fcvt_to_sint_reg_with_flags dst_ty src_ty x) - (fpu_to_int dst_ty (fpu_to_sint_op dst_ty src_ty) x)) +(decl fcvt_to_uint_lb (Type) Reg) +(rule (fcvt_to_uint_lb $F32) (imm $F32 (fcvt_to_uint_lb32))) +(rule (fcvt_to_uint_lb $F64) (imm $F64 (fcvt_to_uint_lb64))) + +(decl fcvt_to_uint_ub32 (u8) u64) +(extern constructor fcvt_to_uint_ub32 fcvt_to_uint_ub32) +(decl fcvt_to_uint_lb32 () u64) +(extern constructor fcvt_to_uint_lb32 fcvt_to_uint_lb32) +(decl fcvt_to_uint_ub64 (u8) u64) +(extern constructor fcvt_to_uint_ub64 fcvt_to_uint_ub64) +(decl fcvt_to_uint_lb64 () u64) +(extern constructor fcvt_to_uint_lb64 fcvt_to_uint_lb64) + + +;; Helpers for generating `fcvt_to_sint` instructions ;;;;;;;;;;;;;;;;;;;;;;;; -(decl fcvt_to_sint_reg (Type Type Reg) Reg) -(rule (fcvt_to_sint_reg dst_ty src_ty x) - (drop_flags (fcvt_to_sint_reg_with_flags dst_ty src_ty x))) +(decl fcvt_to_sint_reg (Type FpuRoundMode Reg) Reg) +(rule (fcvt_to_sint_reg $F32 mode x) + (mov_from_fpr32 (fpu_round $F32 (FpuRoundOp.ToSInt32) mode x))) +(rule (fcvt_to_sint_reg $F64 mode x) + (mov_from_fpr64 (fpu_round $F64 (FpuRoundOp.ToSInt64) mode x))) + +(decl fcvt_to_sint_ub (Type Type) Reg) +(rule (fcvt_to_sint_ub $F32 dst_ty) + (imm $F32 (fcvt_to_sint_ub32 (ty_bits dst_ty)))) +(rule (fcvt_to_sint_ub $F64 dst_ty) + (imm $F64 (fcvt_to_sint_ub64 (ty_bits dst_ty)))) + +(decl fcvt_to_sint_lb (Type Type) Reg) +(rule (fcvt_to_sint_lb $F32 dst_ty) + (imm $F32 (fcvt_to_sint_lb32 (ty_bits dst_ty)))) +(rule (fcvt_to_sint_lb $F64 dst_ty) + (imm $F64 (fcvt_to_sint_lb64 (ty_bits dst_ty)))) + +(decl fcvt_to_sint_ub32 (u8) u64) +(extern constructor fcvt_to_sint_ub32 fcvt_to_sint_ub32) +(decl fcvt_to_sint_lb32 (u8) u64) +(extern constructor fcvt_to_sint_lb32 fcvt_to_sint_lb32) +(decl fcvt_to_sint_ub64 (u8) u64) +(extern constructor fcvt_to_sint_ub64 fcvt_to_sint_ub64) +(decl fcvt_to_sint_lb64 (u8) u64) +(extern constructor fcvt_to_sint_lb64 fcvt_to_sint_lb64) ;; Helpers for generating signed `icmp` instructions ;;;;;;;;;;;;;;;;;;;;;;;;;;; diff --git a/cranelift/codegen/src/isa/s390x/inst/emit.rs b/cranelift/codegen/src/isa/s390x/inst/emit.rs index e955607094..a75e6ffaf3 100644 --- a/cranelift/codegen/src/isa/s390x/inst/emit.rs +++ b/cranelift/codegen/src/isa/s390x/inst/emit.rs @@ -296,6 +296,38 @@ pub fn mem_imm16_emit( } } +pub fn mem_vrx_emit( + rd: Reg, + mem: &MemArg, + opcode: u16, + m3: u8, + add_trap: bool, + sink: &mut MachBuffer, + emit_info: &EmitInfo, + state: &mut EmitState, +) { + let (mem_insts, mem) = mem_finalize(mem, state, true, false, false, true); + for inst in mem_insts.into_iter() { + inst.emit(&[], sink, emit_info, state); + } + + if add_trap && mem.can_trap() { + let srcloc = state.cur_srcloc(); + if srcloc != SourceLoc::default() { + sink.add_trap(TrapCode::HeapOutOfBounds); + } + } + + match &mem { + &MemArg::BXD12 { + base, index, disp, .. + } => { + put(sink, &enc_vrx(opcode, rd, base, index, disp.bits(), m3)); + } + _ => unreachable!(), + } +} + //============================================================================= // Instructions and subcomponents: emission @@ -304,15 +336,50 @@ fn machreg_to_gpr(m: Reg) -> u8 { u8::try_from(m.to_real_reg().unwrap().hw_enc()).unwrap() } -fn machreg_to_fpr(m: Reg) -> u8 { +fn machreg_to_vr(m: Reg) -> u8 { assert_eq!(m.class(), RegClass::Float); u8::try_from(m.to_real_reg().unwrap().hw_enc()).unwrap() } -fn machreg_to_gpr_or_fpr(m: Reg) -> u8 { +fn machreg_to_fpr(m: Reg) -> u8 { + assert!(is_fpr(m)); u8::try_from(m.to_real_reg().unwrap().hw_enc()).unwrap() } +fn machreg_to_gpr_or_fpr(m: Reg) -> u8 { + let reg = u8::try_from(m.to_real_reg().unwrap().hw_enc()).unwrap(); + assert!(reg < 16); + reg +} + +fn rxb(v1: Option, v2: Option, v3: Option, v4: Option) -> u8 { + let mut rxb = 0; + + let is_high_vr = |reg| -> bool { + if let Some(reg) = reg { + if !is_fpr(reg) { + return true; + } + } + false + }; + + if is_high_vr(v1) { + rxb = rxb | 8; + } + if is_high_vr(v2) { + rxb = rxb | 4; + } + if is_high_vr(v3) { + rxb = rxb | 2; + } + if is_high_vr(v4) { + rxb = rxb | 1; + } + + rxb +} + /// E-type instructions. /// /// 15 @@ -785,19 +852,45 @@ fn enc_siy(opcode: u16, b1: Reg, d1: u32, i2: u8) -> [u8; 6] { enc } -/// VRR-type instructions. +/// VRRa-type instructions. +/// +/// 47 39 35 31 23 19 15 11 7 +/// opcode1 v1 v2 - m5 m3 m2 rxb opcode2 +/// 40 36 32 24 20 16 12 8 0 +/// +fn enc_vrr_a(opcode: u16, v1: Reg, v2: Reg, m3: u8, m4: u8, m5: u8) -> [u8; 6] { + let opcode1 = ((opcode >> 8) & 0xff) as u8; + let opcode2 = (opcode & 0xff) as u8; + let rxb = rxb(Some(v1), Some(v2), None, None); + let v1 = machreg_to_vr(v1) & 0x0f; + let v2 = machreg_to_vr(v2) & 0x0f; + let m3 = m3 & 0x0f; + let m4 = m4 & 0x0f; + let m5 = m5 & 0x0f; + + let mut enc: [u8; 6] = [0; 6]; + enc[0] = opcode1; + enc[1] = v1 << 4 | v2; + enc[2] = 0; + enc[3] = m5 << 4 | m4; + enc[4] = m3 << 4 | rxb; + enc[5] = opcode2; + enc +} + +/// VRRc-type instructions. /// /// 47 39 35 31 27 23 19 15 11 7 /// opcode1 v1 v2 v3 - m6 m5 m4 rxb opcode2 /// 40 36 32 28 24 20 16 12 8 0 /// -fn enc_vrr(opcode: u16, v1: Reg, v2: Reg, v3: Reg, m4: u8, m5: u8, m6: u8) -> [u8; 6] { +fn enc_vrr_c(opcode: u16, v1: Reg, v2: Reg, v3: Reg, m4: u8, m5: u8, m6: u8) -> [u8; 6] { let opcode1 = ((opcode >> 8) & 0xff) as u8; let opcode2 = (opcode & 0xff) as u8; - let rxb = 0; // FIXME - let v1 = machreg_to_fpr(v1) & 0x0f; // FIXME - let v2 = machreg_to_fpr(v2) & 0x0f; // FIXME - let v3 = machreg_to_fpr(v3) & 0x0f; // FIXME + let rxb = rxb(Some(v1), Some(v2), Some(v3), None); + let v1 = machreg_to_vr(v1) & 0x0f; + let v2 = machreg_to_vr(v2) & 0x0f; + let v3 = machreg_to_vr(v3) & 0x0f; let m4 = m4 & 0x0f; let m5 = m5 & 0x0f; let m6 = m6 & 0x0f; @@ -812,6 +905,87 @@ fn enc_vrr(opcode: u16, v1: Reg, v2: Reg, v3: Reg, m4: u8, m5: u8, m6: u8) -> [u enc } +/// VRRe-type instructions. +/// +/// 47 39 35 31 27 23 19 15 11 7 +/// opcode1 v1 v2 v3 m6 - m5 v4 rxb opcode2 +/// 40 36 32 28 24 20 16 12 8 0 +/// +fn enc_vrr_e(opcode: u16, v1: Reg, v2: Reg, v3: Reg, v4: Reg, m5: u8, m6: u8) -> [u8; 6] { + let opcode1 = ((opcode >> 8) & 0xff) as u8; + let opcode2 = (opcode & 0xff) as u8; + let rxb = rxb(Some(v1), Some(v2), Some(v3), Some(v4)); + let v1 = machreg_to_vr(v1) & 0x0f; + let v2 = machreg_to_vr(v2) & 0x0f; + let v3 = machreg_to_vr(v3) & 0x0f; + let v4 = machreg_to_vr(v4) & 0x0f; + let m5 = m5 & 0x0f; + let m6 = m6 & 0x0f; + + let mut enc: [u8; 6] = [0; 6]; + enc[0] = opcode1; + enc[1] = v1 << 4 | v2; + enc[2] = v3 << 4 | m6; + enc[3] = m5; + enc[4] = v4 << 4 | rxb; + enc[5] = opcode2; + enc +} + +/// VRSb-type instructions. +/// +/// 47 39 35 31 27 15 11 7 +/// opcode1 v1 r3 b2 d2 m4 rxb opcode2 +/// 40 36 32 28 16 12 8 0 +/// +fn enc_vrs_b(opcode: u16, v1: Reg, b2: Reg, d2: u32, r3: Reg, m4: u8) -> [u8; 6] { + let opcode1 = ((opcode >> 8) & 0xff) as u8; + let opcode2 = (opcode & 0xff) as u8; + let rxb = rxb(Some(v1), None, None, None); + let v1 = machreg_to_vr(v1) & 0x0f; + let b2 = machreg_to_gpr(b2) & 0x0f; + let r3 = machreg_to_gpr(r3) & 0x0f; + let d2_lo = (d2 & 0xff) as u8; + let d2_hi = ((d2 >> 8) & 0x0f) as u8; + let m4 = m4 & 0x0f; + + let mut enc: [u8; 6] = [0; 6]; + enc[0] = opcode1; + enc[1] = v1 << 4 | r3; + enc[2] = b2 << 4 | d2_hi; + enc[3] = d2_lo; + enc[4] = m4 << 4 | rxb; + enc[5] = opcode2; + enc +} + +/// VRSc-type instructions. +/// +/// 47 39 35 31 27 15 11 7 +/// opcode1 r1 v3 b2 d2 m4 rxb opcode2 +/// 40 36 32 28 16 12 8 0 +/// +fn enc_vrs_c(opcode: u16, r1: Reg, b2: Reg, d2: u32, v3: Reg, m4: u8) -> [u8; 6] { + let opcode1 = ((opcode >> 8) & 0xff) as u8; + let opcode2 = (opcode & 0xff) as u8; + let rxb = rxb(None, Some(v3), None, None); + let r1 = machreg_to_gpr(r1) & 0x0f; + let b2 = machreg_to_gpr(b2) & 0x0f; + let v3 = machreg_to_vr(v3) & 0x0f; + let d2_lo = (d2 & 0xff) as u8; + let d2_hi = ((d2 >> 8) & 0x0f) as u8; + let m4 = m4 & 0x0f; + + let mut enc: [u8; 6] = [0; 6]; + enc[0] = opcode1; + enc[1] = r1 << 4 | v3; + enc[2] = b2 << 4 | d2_hi; + enc[3] = d2_lo; + enc[4] = m4 << 4 | rxb; + enc[5] = opcode2; + enc +} + /// VRX-type instructions. /// /// 47 39 35 31 27 15 11 7 @@ -821,8 +995,8 @@ fn enc_vrr(opcode: u16, v1: Reg, v2: Reg, v3: Reg, m4: u8, m5: u8, m6: u8) -> [u fn enc_vrx(opcode: u16, v1: Reg, b2: Reg, x2: Reg, d2: u32, m3: u8) -> [u8; 6] { let opcode1 = ((opcode >> 8) & 0xff) as u8; let opcode2 = (opcode & 0xff) as u8; - let rxb = 0; // FIXME - let v1 = machreg_to_fpr(v1) & 0x0f; // FIXME + let rxb = rxb(Some(v1), None, None, None); + let v1 = machreg_to_vr(v1) & 0x0f; let b2 = machreg_to_gpr(b2) & 0x0f; let x2 = machreg_to_gpr(x2) & 0x0f; let d2_lo = (d2 & 0xff) as u8; @@ -1633,9 +1807,7 @@ impl MachInstEmit for Inst { | &Inst::Load64SExt32 { rd, ref mem } | &Inst::LoadRev16 { rd, ref mem } | &Inst::LoadRev32 { rd, ref mem } - | &Inst::LoadRev64 { rd, ref mem } - | &Inst::FpuLoad32 { rd, ref mem } - | &Inst::FpuLoad64 { rd, ref mem } => { + | &Inst::LoadRev64 { rd, ref mem } => { let rd = allocs.next_writable(rd); let mem = mem.with_allocs(&mut allocs); @@ -1655,8 +1827,6 @@ impl MachInstEmit for Inst { &Inst::LoadRev16 { .. } => (None, Some(0xe31f), None), // LRVH &Inst::LoadRev32 { .. } => (None, Some(0xe31e), None), // LRV &Inst::LoadRev64 { .. } => (None, Some(0xe30f), None), // LRVG - &Inst::FpuLoad32 { .. } => (Some(0x78), Some(0xed64), None), // LE(Y) - &Inst::FpuLoad64 { .. } => (Some(0x68), Some(0xed65), None), // LD(Y) _ => unreachable!(), }; let rd = rd.to_reg(); @@ -1664,36 +1834,27 @@ impl MachInstEmit for Inst { rd, &mem, opcode_rx, opcode_rxy, opcode_ril, true, sink, emit_info, state, ); } - &Inst::FpuLoadRev32 { rd, ref mem } | &Inst::FpuLoadRev64 { rd, ref mem } => { + &Inst::FpuLoad32 { rd, ref mem } + | &Inst::FpuLoad64 { rd, ref mem } + | &Inst::FpuLoadRev32 { rd, ref mem } + | &Inst::FpuLoadRev64 { rd, ref mem } => { let rd = allocs.next_writable(rd); let mem = mem.with_allocs(&mut allocs); - let opcode = match self { - &Inst::FpuLoadRev32 { .. } => 0xe603, // VLEBRF - &Inst::FpuLoadRev64 { .. } => 0xe602, // VLEBRG + let (opcode_rx, opcode_rxy, opcode_vrx) = match self { + &Inst::FpuLoad32 { .. } => (Some(0x78), Some(0xed64), 0xe703), // LE(Y), VLEF + &Inst::FpuLoad64 { .. } => (Some(0x68), Some(0xed65), 0xe702), // LD(Y), VLEG + &Inst::FpuLoadRev32 { .. } => (None, None, 0xe603), // VLEBRF + &Inst::FpuLoadRev64 { .. } => (None, None, 0xe602), // VLEBRG _ => unreachable!(), }; - - let (mem_insts, mem) = mem_finalize(&mem, state, true, false, false, true); - for inst in mem_insts.into_iter() { - inst.emit(&[], sink, emit_info, state); - } - - let srcloc = state.cur_srcloc(); - if srcloc != SourceLoc::default() && mem.can_trap() { - sink.add_trap(TrapCode::HeapOutOfBounds); - } - - match &mem { - &MemArg::BXD12 { - base, index, disp, .. - } => { - put( - sink, - &enc_vrx(opcode, rd.to_reg(), base, index, disp.bits(), 0), - ); - } - _ => unreachable!(), + let rd = rd.to_reg(); + if is_fpr(rd) && opcode_rx.is_some() { + mem_emit( + rd, &mem, opcode_rx, opcode_rxy, None, true, sink, emit_info, state, + ); + } else { + mem_vrx_emit(rd, &mem, opcode_vrx, 0, true, sink, emit_info, state); } } @@ -1703,9 +1864,7 @@ impl MachInstEmit for Inst { | &Inst::Store64 { rd, ref mem } | &Inst::StoreRev16 { rd, ref mem } | &Inst::StoreRev32 { rd, ref mem } - | &Inst::StoreRev64 { rd, ref mem } - | &Inst::FpuStore32 { rd, ref mem } - | &Inst::FpuStore64 { rd, ref mem } => { + | &Inst::StoreRev64 { rd, ref mem } => { let rd = allocs.next(rd); let mem = mem.with_allocs(&mut allocs); @@ -1717,8 +1876,6 @@ impl MachInstEmit for Inst { &Inst::StoreRev16 { .. } => (None, Some(0xe33f), None), // STRVH &Inst::StoreRev32 { .. } => (None, Some(0xe33e), None), // STRV &Inst::StoreRev64 { .. } => (None, Some(0xe32f), None), // STRVG - &Inst::FpuStore32 { .. } => (Some(0x70), Some(0xed66), None), // STE(Y) - &Inst::FpuStore64 { .. } => (Some(0x60), Some(0xed67), None), // STD(Y) _ => unreachable!(), }; mem_emit( @@ -1747,33 +1904,26 @@ impl MachInstEmit for Inst { }; mem_imm16_emit(imm, &mem, opcode, true, sink, emit_info, state); } - &Inst::FpuStoreRev32 { rd, ref mem } | &Inst::FpuStoreRev64 { rd, ref mem } => { + &Inst::FpuStore32 { rd, ref mem } + | &Inst::FpuStore64 { rd, ref mem } + | &Inst::FpuStoreRev32 { rd, ref mem } + | &Inst::FpuStoreRev64 { rd, ref mem } => { let rd = allocs.next(rd); let mem = mem.with_allocs(&mut allocs); - let opcode = match self { - &Inst::FpuStoreRev32 { .. } => 0xe60b, // VSTEBRF - &Inst::FpuStoreRev64 { .. } => 0xe60a, // VSTEBRG + let (opcode_rx, opcode_rxy, opcode_vrx) = match self { + &Inst::FpuStore32 { .. } => (Some(0x70), Some(0xed66), 0xe70b), // STE(Y), VSTEF + &Inst::FpuStore64 { .. } => (Some(0x60), Some(0xed67), 0xe70a), // STD(Y), VSTEG + &Inst::FpuStoreRev32 { .. } => (None, None, 0xe60b), // VSTEBRF + &Inst::FpuStoreRev64 { .. } => (None, None, 0xe60a), // VSTEBRG _ => unreachable!(), }; - - let (mem_insts, mem) = mem_finalize(&mem, state, true, false, false, true); - for inst in mem_insts.into_iter() { - inst.emit(&[], sink, emit_info, state); - } - - let srcloc = state.cur_srcloc(); - if srcloc != SourceLoc::default() && mem.can_trap() { - sink.add_trap(TrapCode::HeapOutOfBounds); - } - - match &mem { - &MemArg::BXD12 { - base, index, disp, .. - } => { - put(sink, &enc_vrx(opcode, rd, base, index, disp.bits(), 0)); - } - _ => unreachable!(), + if is_fpr(rd) && opcode_rx.is_some() { + mem_emit( + rd, &mem, opcode_rx, opcode_rxy, None, true, sink, emit_info, state, + ); + } else { + mem_vrx_emit(rd, &mem, opcode_vrx, 0, true, sink, emit_info, state); } } @@ -1966,47 +2116,95 @@ impl MachInstEmit for Inst { let rd = allocs.next_writable(rd); let rn = allocs.next(rn); - let opcode = 0x38; // LER - put(sink, &enc_rr(opcode, rd.to_reg(), rn)); + if is_fpr(rd.to_reg()) && is_fpr(rn) { + let opcode = 0x38; // LER + put(sink, &enc_rr(opcode, rd.to_reg(), rn)); + } else { + let opcode = 0xe756; // VLR + put(sink, &enc_vrr_a(opcode, rd.to_reg(), rn, 0, 0, 0)); + } } &Inst::FpuMove64 { rd, rn } => { let rd = allocs.next_writable(rd); let rn = allocs.next(rn); - let opcode = 0x28; // LDR - put(sink, &enc_rr(opcode, rd.to_reg(), rn)); + if is_fpr(rd.to_reg()) && is_fpr(rn) { + let opcode = 0x28; // LDR + put(sink, &enc_rr(opcode, rd.to_reg(), rn)); + } else { + let opcode = 0xe756; // VLR + put(sink, &enc_vrr_a(opcode, rd.to_reg(), rn, 0, 0, 0)); + } } &Inst::FpuCMov32 { rd, cond, rm } => { let rd = allocs.next_writable(rd); let rm = allocs.next(rm); - let opcode = 0xa74; // BCR - put(sink, &enc_ri_c(opcode, cond.invert().bits(), 4 + 2)); - let opcode = 0x38; // LER - put(sink, &enc_rr(opcode, rd.to_reg(), rm)); + if is_fpr(rd.to_reg()) && is_fpr(rm) { + let opcode = 0xa74; // BCR + put(sink, &enc_ri_c(opcode, cond.invert().bits(), 4 + 2)); + let opcode = 0x38; // LER + put(sink, &enc_rr(opcode, rd.to_reg(), rm)); + } else { + let opcode = 0xa74; // BCR + put(sink, &enc_ri_c(opcode, cond.invert().bits(), 4 + 6)); + let opcode = 0xe756; // VLR + put(sink, &enc_vrr_a(opcode, rd.to_reg(), rm, 0, 0, 0)); + } } &Inst::FpuCMov64 { rd, cond, rm } => { let rd = allocs.next_writable(rd); let rm = allocs.next(rm); - let opcode = 0xa74; // BCR - put(sink, &enc_ri_c(opcode, cond.invert().bits(), 4 + 2)); - let opcode = 0x28; // LDR - put(sink, &enc_rr(opcode, rd.to_reg(), rm)); + if is_fpr(rd.to_reg()) && is_fpr(rm) { + let opcode = 0xa74; // BCR + put(sink, &enc_ri_c(opcode, cond.invert().bits(), 4 + 2)); + let opcode = 0x28; // LDR + put(sink, &enc_rr(opcode, rd.to_reg(), rm)); + } else { + let opcode = 0xa74; // BCR + put(sink, &enc_ri_c(opcode, cond.invert().bits(), 4 + 6)); + let opcode = 0xe756; // VLR + put(sink, &enc_vrr_a(opcode, rd.to_reg(), rm, 0, 0, 0)); + } } - &Inst::MovToFpr { rd, rn } => { + &Inst::MovToFpr32 { rd, rn } => { let rd = allocs.next_writable(rd); let rn = allocs.next(rn); - let opcode = 0xb3c1; // LDGR - put(sink, &enc_rre(opcode, rd.to_reg(), rn)); + let (opcode, m4) = (0xe722, 2); // VLVG + put(sink, &enc_vrs_b(opcode, rd.to_reg(), zero_reg(), 0, rn, m4)); } - &Inst::MovFromFpr { rd, rn } => { + &Inst::MovToFpr64 { rd, rn } => { let rd = allocs.next_writable(rd); let rn = allocs.next(rn); - let opcode = 0xb3cd; // LGDR - put(sink, &enc_rre(opcode, rd.to_reg(), rn)); + if is_fpr(rd.to_reg()) { + let opcode = 0xb3c1; // LDGR + put(sink, &enc_rre(opcode, rd.to_reg(), rn)); + } else { + let (opcode, m4) = (0xe722, 3); // VLVG + put(sink, &enc_vrs_b(opcode, rd.to_reg(), zero_reg(), 0, rn, m4)); + } + } + &Inst::MovFromFpr32 { rd, rn } => { + let rd = allocs.next_writable(rd); + let rn = allocs.next(rn); + + let (opcode, m4) = (0xe721, 2); // VLGV + put(sink, &enc_vrs_c(opcode, rd.to_reg(), zero_reg(), 0, rn, m4)); + } + &Inst::MovFromFpr64 { rd, rn } => { + let rd = allocs.next_writable(rd); + let rn = allocs.next(rn); + + if is_fpr(rn) { + let opcode = 0xb3cd; // LGDR + put(sink, &enc_rre(opcode, rd.to_reg(), rn)); + } else { + let (opcode, m4) = (0xe721, 3); // VLVG + put(sink, &enc_vrs_c(opcode, rd.to_reg(), zero_reg(), 0, rn, m4)); + } } &Inst::LoadFpuConst32 { rd, const_data } => { let rd = allocs.next_writable(rd); @@ -2034,138 +2232,143 @@ impl MachInstEmit for Inst { }; inst.emit(&[], sink, emit_info, state); } - - &Inst::FpuCopysign { rd, rn, rm } => { - let rd = allocs.next_writable(rd); - let rn = allocs.next(rn); - let rm = allocs.next(rm); - - let opcode = 0xb372; // CPSDR - put(sink, &enc_rrf_ab(opcode, rd.to_reg(), rn, rm, 0)); - } &Inst::FpuRR { fpu_op, rd, rn } => { let rd = allocs.next_writable(rd); let rn = allocs.next(rn); - let opcode = match fpu_op { - FPUOp1::Abs32 => 0xb300, // LPEBR - FPUOp1::Abs64 => 0xb310, // LPDBR - FPUOp1::Neg32 => 0xb303, // LCEBR - FPUOp1::Neg64 => 0xb313, // LCDBR - FPUOp1::NegAbs32 => 0xb301, // LNEBR - FPUOp1::NegAbs64 => 0xb311, // LNDBR - FPUOp1::Sqrt32 => 0xb314, // SQEBR - FPUOp1::Sqrt64 => 0xb315, // SQDBR - FPUOp1::Cvt32To64 => 0xb304, // LDEBR - FPUOp1::Cvt64To32 => 0xb344, // LEDBR + let (opcode, m3, m5, opcode_fpr) = match fpu_op { + FPUOp1::Abs32 => (0xe7cc, 2, 2, 0xb300), // VFPSO, LPEBR + FPUOp1::Abs64 => (0xe7cc, 3, 2, 0xb310), // VFPSO, LPDBR + FPUOp1::Neg32 => (0xe7cc, 2, 0, 0xb303), // VFPSO, LCEBR + FPUOp1::Neg64 => (0xe7cc, 3, 0, 0xb313), // VFPSO, LCDBR + FPUOp1::NegAbs32 => (0xe7cc, 2, 1, 0xb301), // VFPSO, LNEBR + FPUOp1::NegAbs64 => (0xe7cc, 3, 1, 0xb311), // VFPSO, LNDBR + FPUOp1::Sqrt32 => (0xe7ce, 2, 0, 0xb314), // VFSQ, SQEBR + FPUOp1::Sqrt64 => (0xe7ce, 3, 0, 0xb315), // VFSQ, SQDBR + FPUOp1::Cvt32To64 => (0xe7c4, 2, 0, 0xb304), // VFLL, LDEBR }; - put(sink, &enc_rre(opcode, rd.to_reg(), rn)); - } - &Inst::FpuRRR { fpu_op, rd, rm } => { - let rd = allocs.next_writable(rd); - let rm = allocs.next(rm); - - let opcode = match fpu_op { - FPUOp2::Add32 => 0xb30a, // AEBR - FPUOp2::Add64 => 0xb31a, // ADBR - FPUOp2::Sub32 => 0xb30b, // SEBR - FPUOp2::Sub64 => 0xb31b, // SDBR - FPUOp2::Mul32 => 0xb317, // MEEBR - FPUOp2::Mul64 => 0xb31c, // MDBR - FPUOp2::Div32 => 0xb30d, // DEBR - FPUOp2::Div64 => 0xb31d, // DDBR - _ => unimplemented!(), - }; - put(sink, &enc_rre(opcode, rd.to_reg(), rm)); + if is_fpr(rd.to_reg()) && is_fpr(rn) { + put(sink, &enc_rre(opcode_fpr, rd.to_reg(), rn)); + } else { + put(sink, &enc_vrr_a(opcode, rd.to_reg(), rn, m3, 8, m5)); + } } - &Inst::FpuRRRR { fpu_op, rd, rn, rm } => { + &Inst::FpuRRR { fpu_op, rd, rn, rm } => { let rd = allocs.next_writable(rd); let rn = allocs.next(rn); let rm = allocs.next(rm); - let opcode = match fpu_op { - FPUOp3::MAdd32 => 0xb30e, // MAEBR - FPUOp3::MAdd64 => 0xb31e, // MADBR - FPUOp3::MSub32 => 0xb30f, // MSEBR - FPUOp3::MSub64 => 0xb31f, // MSDBR + let (opcode, m4, m6, opcode_fpr) = match fpu_op { + FPUOp2::Add32 => (0xe7e3, 2, 0, Some(0xb30a)), // VFA, AEBR + FPUOp2::Add64 => (0xe7e3, 3, 0, Some(0xb31a)), // VFA, ADBR + FPUOp2::Sub32 => (0xe7e2, 2, 0, Some(0xb30b)), // VFS, SEBR + FPUOp2::Sub64 => (0xe7e2, 3, 0, Some(0xb31b)), // VFS, SDBR + FPUOp2::Mul32 => (0xe7e7, 2, 0, Some(0xb317)), // VFM, MEEBR + FPUOp2::Mul64 => (0xe7e7, 3, 0, Some(0xb31c)), // VFM, MDBR + FPUOp2::Div32 => (0xe7e5, 2, 0, Some(0xb30d)), // VFD, DEBR + FPUOp2::Div64 => (0xe7e5, 3, 0, Some(0xb31d)), // VFD, DDBR + FPUOp2::Max32 => (0xe7ef, 2, 1, None), // VFMAX + FPUOp2::Max64 => (0xe7ef, 3, 1, None), // VFMAX + FPUOp2::Min32 => (0xe7ee, 2, 1, None), // VFMIN + FPUOp2::Min64 => (0xe7ee, 3, 1, None), // VFMIN }; - put(sink, &enc_rrd(opcode, rd.to_reg(), rm, rn)); + if opcode_fpr.is_some() && rd.to_reg() == rn && is_fpr(rn) && is_fpr(rm) { + put(sink, &enc_rre(opcode_fpr.unwrap(), rd.to_reg(), rm)); + } else { + put(sink, &enc_vrr_c(opcode, rd.to_reg(), rn, rm, m4, 8, m6)); + } } - &Inst::FpuToInt { op, rd, rn } => { + &Inst::FpuRRRR { + fpu_op, + rd, + rn, + rm, + ra, + } => { let rd = allocs.next_writable(rd); let rn = allocs.next(rn); + let rm = allocs.next(rm); + let ra = allocs.next(ra); - let opcode = match op { - FpuToIntOp::F32ToI32 => 0xb398, // CFEBRA - FpuToIntOp::F32ToU32 => 0xb39c, // CLFEBR - FpuToIntOp::F32ToI64 => 0xb3a8, // CGEBRA - FpuToIntOp::F32ToU64 => 0xb3ac, // CLGEBR - FpuToIntOp::F64ToI32 => 0xb399, // CFDBRA - FpuToIntOp::F64ToU32 => 0xb39d, // CLFDBR - FpuToIntOp::F64ToI64 => 0xb3a9, // CGDBRA - FpuToIntOp::F64ToU64 => 0xb3ad, // CLGDBR + let (opcode, m6, opcode_fpr) = match fpu_op { + FPUOp3::MAdd32 => (0xe78f, 2, 0xb30e), // VFMA, MAEBR + FPUOp3::MAdd64 => (0xe78f, 3, 0xb31e), // VFMA, MADBR + FPUOp3::MSub32 => (0xe78e, 2, 0xb30f), // VFMS, MSEBR + FPUOp3::MSub64 => (0xe78e, 3, 0xb31f), // VFMS, MSDBR }; - put(sink, &enc_rrf_cde(opcode, rd.to_reg(), rn, 5, 0)); + if rd.to_reg() == ra && is_fpr(rn) && is_fpr(rm) && is_fpr(ra) { + put(sink, &enc_rrd(opcode_fpr, rd.to_reg(), rm, rn)); + } else { + put(sink, &enc_vrr_e(opcode, rd.to_reg(), rn, rm, ra, 8, m6)); + } } - &Inst::IntToFpu { op, rd, rn } => { + &Inst::FpuRound { op, mode, rd, rn } => { let rd = allocs.next_writable(rd); let rn = allocs.next(rn); - let opcode = match op { - IntToFpuOp::I32ToF32 => 0xb394, // CEFBRA - IntToFpuOp::U32ToF32 => 0xb390, // CELFBR - IntToFpuOp::I64ToF32 => 0xb3a4, // CEGBRA - IntToFpuOp::U64ToF32 => 0xb3a0, // CELGBR - IntToFpuOp::I32ToF64 => 0xb395, // CDFBRA - IntToFpuOp::U32ToF64 => 0xb391, // CDLFBR - IntToFpuOp::I64ToF64 => 0xb3a5, // CDGBRA - IntToFpuOp::U64ToF64 => 0xb3a1, // CDLGBR + let mode = match mode { + FpuRoundMode::Current => 0, + FpuRoundMode::ToNearest => 1, + FpuRoundMode::ShorterPrecision => 3, + FpuRoundMode::ToNearestTiesToEven => 4, + FpuRoundMode::ToZero => 5, + FpuRoundMode::ToPosInfinity => 6, + FpuRoundMode::ToNegInfinity => 7, }; - put(sink, &enc_rrf_cde(opcode, rd.to_reg(), rn, 0, 0)); - } - &Inst::FpuRound { op, rd, rn } => { - let rd = allocs.next_writable(rd); - let rn = allocs.next(rn); - - let (opcode, m3) = match op { - FpuRoundMode::Minus32 => (0xb357, 7), // FIEBR - FpuRoundMode::Minus64 => (0xb35f, 7), // FIDBR - FpuRoundMode::Plus32 => (0xb357, 6), // FIEBR - FpuRoundMode::Plus64 => (0xb35f, 6), // FIDBR - FpuRoundMode::Zero32 => (0xb357, 5), // FIEBR - FpuRoundMode::Zero64 => (0xb35f, 5), // FIDBR - FpuRoundMode::Nearest32 => (0xb357, 4), // FIEBR - FpuRoundMode::Nearest64 => (0xb35f, 4), // FIDBR + let (opcode, m3, opcode_fpr) = match op { + FpuRoundOp::Cvt64To32 => (0xe7c5, 3, Some(0xb344)), // VFLR, LEDBR(A) + FpuRoundOp::Round32 => (0xe7c7, 2, Some(0xb357)), // VFI, FIEBR + FpuRoundOp::Round64 => (0xe7c7, 3, Some(0xb35f)), // VFI, FIDBR + FpuRoundOp::ToSInt32 => (0xe7c2, 2, None), // VCSFP + FpuRoundOp::ToSInt64 => (0xe7c2, 3, None), // VCSFP + FpuRoundOp::ToUInt32 => (0xe7c0, 2, None), // VCLFP + FpuRoundOp::ToUInt64 => (0xe7c0, 3, None), // VCLFP + FpuRoundOp::FromSInt32 => (0xe7c3, 2, None), // VCFPS + FpuRoundOp::FromSInt64 => (0xe7c3, 3, None), // VCFPS + FpuRoundOp::FromUInt32 => (0xe7c1, 2, None), // VCFPL + FpuRoundOp::FromUInt64 => (0xe7c1, 3, None), // VCFPL }; - put(sink, &enc_rrf_cde(opcode, rd.to_reg(), rn, m3, 0)); + if opcode_fpr.is_some() && is_fpr(rd.to_reg()) && is_fpr(rn) { + put( + sink, + &enc_rrf_cde(opcode_fpr.unwrap(), rd.to_reg(), rn, mode, 0), + ); + } else { + put(sink, &enc_vrr_a(opcode, rd.to_reg(), rn, m3, 8, mode)); + } } - &Inst::FpuVecRRR { fpu_op, rd, rn, rm } => { - let rd = allocs.next_writable(rd); + &Inst::FpuCmp32 { rn, rm } => { let rn = allocs.next(rn); let rm = allocs.next(rm); - let (opcode, m4) = match fpu_op { - FPUOp2::Max32 => (0xe7ef, 2), // VFMAX - FPUOp2::Max64 => (0xe7ef, 3), // VFMAX - FPUOp2::Min32 => (0xe7ee, 2), // VFMIN - FPUOp2::Min64 => (0xe7ee, 3), // VFMIN - _ => unimplemented!(), - }; - put(sink, &enc_vrr(opcode, rd.to_reg(), rn, rm, m4, 8, 1)); + if is_fpr(rn) && is_fpr(rm) { + let opcode = 0xb309; // CEBR + put(sink, &enc_rre(opcode, rn, rm)); + } else { + let opcode = 0xe7cb; // WFC + put(sink, &enc_vrr_a(opcode, rn, rm, 2, 0, 0)); + } } - &Inst::FpuCmp32 { rn, rm } => { + &Inst::FpuCmp64 { rn, rm } => { let rn = allocs.next(rn); let rm = allocs.next(rm); - let opcode = 0xb309; // CEBR - put(sink, &enc_rre(opcode, rn, rm)); + if is_fpr(rn) && is_fpr(rm) { + let opcode = 0xb319; // CDBR + put(sink, &enc_rre(opcode, rn, rm)); + } else { + let opcode = 0xe7cb; // WFC + put(sink, &enc_vrr_a(opcode, rn, rm, 3, 0, 0)); + } } - &Inst::FpuCmp64 { rn, rm } => { + &Inst::VecSelect { rd, rn, rm, ra } => { + let rd = allocs.next_writable(rd); let rn = allocs.next(rn); let rm = allocs.next(rm); + let ra = allocs.next(ra); - let opcode = 0xb319; // CDBR - put(sink, &enc_rre(opcode, rn, rm)); + let opcode = 0xe78d; // VSEL + put(sink, &enc_vrr_e(opcode, rd.to_reg(), rn, rm, ra, 0, 0)); } &Inst::Call { link, ref info } => { diff --git a/cranelift/codegen/src/isa/s390x/inst/emit_tests.rs b/cranelift/codegen/src/isa/s390x/inst/emit_tests.rs index 03147f9e5f..a398c798a9 100644 --- a/cranelift/codegen/src/isa/s390x/inst/emit_tests.rs +++ b/cranelift/codegen/src/isa/s390x/inst/emit_tests.rs @@ -6886,451 +6886,627 @@ fn test_s390x_binemit() { insns.push(( Inst::FpuMove32 { - rd: writable_fpr(8), - rn: fpr(4), + rd: writable_vr(8), + rn: vr(4), }, "3884", "ler %f8, %f4", )); + insns.push(( + Inst::FpuMove32 { + rd: writable_vr(8), + rn: vr(20), + }, + "E78400000456", + "vlr %v8, %v20", + )); insns.push(( Inst::FpuMove64 { - rd: writable_fpr(8), - rn: fpr(4), + rd: writable_vr(8), + rn: vr(4), }, "2884", "ldr %f8, %f4", )); + insns.push(( + Inst::FpuMove64 { + rd: writable_vr(8), + rn: vr(20), + }, + "E78400000456", + "vlr %v8, %v20", + )); insns.push(( Inst::FpuCMov32 { - rd: writable_fpr(8), - rm: fpr(4), + rd: writable_vr(8), + rm: vr(4), cond: Cond::from_mask(1), }, "A7E400033884", "jno 6 ; ler %f8, %f4", )); + insns.push(( + Inst::FpuCMov32 { + rd: writable_vr(8), + rm: vr(20), + cond: Cond::from_mask(1), + }, + "A7E40005E78400000456", + "jno 10 ; vlr %v8, %v20", + )); insns.push(( Inst::FpuCMov64 { - rd: writable_fpr(8), - rm: fpr(4), + rd: writable_vr(8), + rm: vr(4), cond: Cond::from_mask(1), }, "A7E400032884", "jno 6 ; ldr %f8, %f4", )); + insns.push(( + Inst::FpuCMov64 { + rd: writable_vr(8), + rm: vr(20), + cond: Cond::from_mask(1), + }, + "A7E40005E78400000456", + "jno 10 ; vlr %v8, %v20", + )); insns.push(( - Inst::MovToFpr { - rd: writable_fpr(8), + Inst::MovToFpr64 { + rd: writable_vr(8), rn: gpr(4), }, "B3C10084", "ldgr %f8, %r4", )); insns.push(( - Inst::MovFromFpr { + Inst::MovToFpr64 { + rd: writable_vr(24), + rn: gpr(4), + }, + "E78400003822", + "vlvgg %v24, %r4, 0", + )); + insns.push(( + Inst::MovToFpr32 { + rd: writable_vr(8), + rn: gpr(4), + }, + "E78400002022", + "vlvgf %v8, %r4, 0", + )); + insns.push(( + Inst::MovToFpr32 { + rd: writable_vr(24), + rn: gpr(4), + }, + "E78400002822", + "vlvgf %v24, %r4, 0", + )); + insns.push(( + Inst::MovFromFpr64 { rd: writable_gpr(8), - rn: fpr(4), + rn: vr(4), }, "B3CD0084", "lgdr %r8, %f4", )); + insns.push(( + Inst::MovFromFpr64 { + rd: writable_gpr(8), + rn: vr(20), + }, + "E78400003421", + "vlgvg %r8, %v20, 0", + )); + insns.push(( + Inst::MovFromFpr32 { + rd: writable_gpr(8), + rn: vr(4), + }, + "E78400002021", + "vlgvf %r8, %v4, 0", + )); + insns.push(( + Inst::MovFromFpr32 { + rd: writable_gpr(8), + rn: vr(20), + }, + "E78400002421", + "vlgvf %r8, %v20, 0", + )); insns.push(( Inst::FpuRR { fpu_op: FPUOp1::Abs32, - rd: writable_fpr(8), - rn: fpr(12), + rd: writable_vr(8), + rn: vr(12), }, "B300008C", "lpebr %f8, %f12", )); + insns.push(( + Inst::FpuRR { + fpu_op: FPUOp1::Abs32, + rd: writable_vr(24), + rn: vr(12), + }, + "E78C002828CC", + "wflpsb %v24, %f12", + )); insns.push(( Inst::FpuRR { fpu_op: FPUOp1::Abs64, - rd: writable_fpr(8), - rn: fpr(12), + rd: writable_vr(8), + rn: vr(12), }, "B310008C", "lpdbr %f8, %f12", )); + insns.push(( + Inst::FpuRR { + fpu_op: FPUOp1::Abs64, + rd: writable_vr(24), + rn: vr(12), + }, + "E78C002838CC", + "wflpdb %v24, %f12", + )); insns.push(( Inst::FpuRR { fpu_op: FPUOp1::Neg32, - rd: writable_fpr(8), - rn: fpr(12), + rd: writable_vr(8), + rn: vr(12), }, "B303008C", "lcebr %f8, %f12", )); + insns.push(( + Inst::FpuRR { + fpu_op: FPUOp1::Neg32, + rd: writable_vr(24), + rn: vr(12), + }, + "E78C000828CC", + "wflcsb %v24, %f12", + )); insns.push(( Inst::FpuRR { fpu_op: FPUOp1::Neg64, - rd: writable_fpr(8), - rn: fpr(12), + rd: writable_vr(8), + rn: vr(12), }, "B313008C", "lcdbr %f8, %f12", )); + insns.push(( + Inst::FpuRR { + fpu_op: FPUOp1::Neg64, + rd: writable_vr(24), + rn: vr(12), + }, + "E78C000838CC", + "wflcdb %v24, %f12", + )); insns.push(( Inst::FpuRR { fpu_op: FPUOp1::NegAbs32, - rd: writable_fpr(8), - rn: fpr(12), + rd: writable_vr(8), + rn: vr(12), }, "B301008C", "lnebr %f8, %f12", )); + insns.push(( + Inst::FpuRR { + fpu_op: FPUOp1::NegAbs32, + rd: writable_vr(24), + rn: vr(12), + }, + "E78C001828CC", + "wflnsb %v24, %f12", + )); insns.push(( Inst::FpuRR { fpu_op: FPUOp1::NegAbs64, - rd: writable_fpr(8), - rn: fpr(12), + rd: writable_vr(8), + rn: vr(12), }, "B311008C", "lndbr %f8, %f12", )); + insns.push(( + Inst::FpuRR { + fpu_op: FPUOp1::NegAbs64, + rd: writable_vr(24), + rn: vr(12), + }, + "E78C001838CC", + "wflndb %v24, %f12", + )); insns.push(( Inst::FpuRR { fpu_op: FPUOp1::Sqrt32, - rd: writable_fpr(8), - rn: fpr(12), + rd: writable_vr(8), + rn: vr(12), }, "B314008C", "sqebr %f8, %f12", )); + insns.push(( + Inst::FpuRR { + fpu_op: FPUOp1::Sqrt32, + rd: writable_vr(24), + rn: vr(12), + }, + "E78C000828CE", + "wfsqsb %v24, %f12", + )); insns.push(( Inst::FpuRR { fpu_op: FPUOp1::Sqrt64, - rd: writable_fpr(8), - rn: fpr(12), + rd: writable_vr(8), + rn: vr(12), }, "B315008C", "sqdbr %f8, %f12", )); + insns.push(( + Inst::FpuRR { + fpu_op: FPUOp1::Sqrt64, + rd: writable_vr(24), + rn: vr(12), + }, + "E78C000838CE", + "wfsqdb %v24, %f12", + )); insns.push(( Inst::FpuRR { fpu_op: FPUOp1::Cvt32To64, - rd: writable_fpr(8), - rn: fpr(12), + rd: writable_vr(8), + rn: vr(12), }, "B304008C", "ldebr %f8, %f12", )); insns.push(( Inst::FpuRR { - fpu_op: FPUOp1::Cvt64To32, - rd: writable_fpr(8), - rn: fpr(12), + fpu_op: FPUOp1::Cvt32To64, + rd: writable_vr(24), + rn: vr(12), }, - "B344008C", - "ledbr %f8, %f12", + "E78C000828C4", + "wldeb %v24, %f12", )); insns.push(( Inst::FpuRRR { fpu_op: FPUOp2::Add32, - rd: writable_fpr(8), - rm: fpr(12), + rd: writable_vr(8), + rn: vr(8), + rm: vr(12), }, "B30A008C", "aebr %f8, %f12", )); insns.push(( Inst::FpuRRR { - fpu_op: FPUOp2::Add64, - rd: writable_fpr(8), - rm: fpr(12), + fpu_op: FPUOp2::Add32, + rd: writable_vr(20), + rn: vr(8), + rm: vr(12), }, - "B31A008C", - "adbr %f8, %f12", + "E748C00828E3", + "wfasb %v20, %f8, %f12", )); insns.push(( Inst::FpuRRR { - fpu_op: FPUOp2::Sub32, - rd: writable_fpr(8), - rm: fpr(12), + fpu_op: FPUOp2::Add64, + rd: writable_vr(8), + rn: vr(8), + rm: vr(12), }, - "B30B008C", - "sebr %f8, %f12", + "B31A008C", + "adbr %f8, %f12", )); insns.push(( Inst::FpuRRR { - fpu_op: FPUOp2::Sub64, - rd: writable_fpr(8), - rm: fpr(12), + fpu_op: FPUOp2::Add64, + rd: writable_vr(20), + rn: vr(8), + rm: vr(12), }, - "B31B008C", - "sdbr %f8, %f12", + "E748C00838E3", + "wfadb %v20, %f8, %f12", )); insns.push(( Inst::FpuRRR { - fpu_op: FPUOp2::Mul32, - rd: writable_fpr(8), - rm: fpr(12), + fpu_op: FPUOp2::Sub32, + rd: writable_vr(8), + rn: vr(8), + rm: vr(12), }, - "B317008C", - "meebr %f8, %f12", + "B30B008C", + "sebr %f8, %f12", )); insns.push(( Inst::FpuRRR { - fpu_op: FPUOp2::Mul64, - rd: writable_fpr(8), - rm: fpr(12), + fpu_op: FPUOp2::Sub32, + rd: writable_vr(20), + rn: vr(8), + rm: vr(12), }, - "B31C008C", - "mdbr %f8, %f12", + "E748C00828E2", + "wfssb %v20, %f8, %f12", )); insns.push(( Inst::FpuRRR { - fpu_op: FPUOp2::Div32, - rd: writable_fpr(8), - rm: fpr(12), + fpu_op: FPUOp2::Sub64, + rd: writable_vr(8), + rn: vr(8), + rm: vr(12), }, - "B30D008C", - "debr %f8, %f12", + "B31B008C", + "sdbr %f8, %f12", )); insns.push(( Inst::FpuRRR { - fpu_op: FPUOp2::Div64, - rd: writable_fpr(8), - rm: fpr(12), + fpu_op: FPUOp2::Sub64, + rd: writable_vr(20), + rn: vr(8), + rm: vr(12), }, - "B31D008C", - "ddbr %f8, %f12", + "E748C00838E2", + "wfsdb %v20, %f8, %f12", )); - insns.push(( - Inst::FpuRRRR { - fpu_op: FPUOp3::MAdd32, - rd: writable_fpr(8), - rn: fpr(12), - rm: fpr(13), + Inst::FpuRRR { + fpu_op: FPUOp2::Mul32, + rd: writable_vr(8), + rn: vr(8), + rm: vr(12), }, - "B30E80CD", - "maebr %f8, %f12, %f13", + "B317008C", + "meebr %f8, %f12", )); insns.push(( - Inst::FpuRRRR { - fpu_op: FPUOp3::MAdd64, - rd: writable_fpr(8), - rn: fpr(12), - rm: fpr(13), + Inst::FpuRRR { + fpu_op: FPUOp2::Mul32, + rd: writable_vr(20), + rn: vr(8), + rm: vr(12), }, - "B31E80CD", - "madbr %f8, %f12, %f13", + "E748C00828E7", + "wfmsb %v20, %f8, %f12", )); insns.push(( - Inst::FpuRRRR { - fpu_op: FPUOp3::MSub32, - rd: writable_fpr(8), - rn: fpr(12), - rm: fpr(13), + Inst::FpuRRR { + fpu_op: FPUOp2::Mul64, + rd: writable_vr(8), + rn: vr(8), + rm: vr(12), }, - "B30F80CD", - "msebr %f8, %f12, %f13", + "B31C008C", + "mdbr %f8, %f12", )); insns.push(( - Inst::FpuRRRR { - fpu_op: FPUOp3::MSub64, - rd: writable_fpr(8), - rn: fpr(12), - rm: fpr(13), + Inst::FpuRRR { + fpu_op: FPUOp2::Mul64, + rd: writable_vr(20), + rn: vr(8), + rm: vr(12), }, - "B31F80CD", - "msdbr %f8, %f12, %f13", + "E748C00838E7", + "wfmdb %v20, %f8, %f12", )); - insns.push(( - Inst::FpuToInt { - op: FpuToIntOp::F32ToU32, - rd: writable_gpr(1), - rn: fpr(4), + Inst::FpuRRR { + fpu_op: FPUOp2::Div32, + rd: writable_vr(8), + rn: vr(8), + rm: vr(12), }, - "B39C5014", - "clfebr %r1, 5, %f4, 0", + "B30D008C", + "debr %f8, %f12", )); - insns.push(( - Inst::FpuToInt { - op: FpuToIntOp::F32ToU64, - rd: writable_gpr(1), - rn: fpr(4), + Inst::FpuRRR { + fpu_op: FPUOp2::Div32, + rd: writable_vr(20), + rn: vr(8), + rm: vr(12), }, - "B3AC5014", - "clgebr %r1, 5, %f4, 0", + "E748C00828E5", + "wfdsb %v20, %f8, %f12", )); - insns.push(( - Inst::FpuToInt { - op: FpuToIntOp::F32ToI32, - rd: writable_gpr(1), - rn: fpr(4), + Inst::FpuRRR { + fpu_op: FPUOp2::Div64, + rd: writable_vr(8), + rn: vr(8), + rm: vr(12), }, - "B3985014", - "cfebra %r1, 5, %f4, 0", + "B31D008C", + "ddbr %f8, %f12", )); - insns.push(( - Inst::FpuToInt { - op: FpuToIntOp::F32ToI64, - rd: writable_gpr(1), - rn: fpr(4), + Inst::FpuRRR { + fpu_op: FPUOp2::Div64, + rd: writable_vr(20), + rn: vr(8), + rm: vr(12), }, - "B3A85014", - "cgebra %r1, 5, %f4, 0", + "E748C00838E5", + "wfddb %v20, %f8, %f12", )); - insns.push(( - Inst::FpuToInt { - op: FpuToIntOp::F64ToU32, - rd: writable_gpr(1), - rn: fpr(4), + Inst::FpuRRR { + fpu_op: FPUOp2::Max32, + rd: writable_vr(4), + rn: vr(6), + rm: vr(8), }, - "B39D5014", - "clfdbr %r1, 5, %f4, 0", + "E746801820EF", + "wfmaxsb %f4, %f6, %f8, 1", )); - insns.push(( - Inst::FpuToInt { - op: FpuToIntOp::F64ToU64, - rd: writable_gpr(1), - rn: fpr(4), + Inst::FpuRRR { + fpu_op: FPUOp2::Max64, + rd: writable_vr(4), + rn: vr(6), + rm: vr(24), }, - "B3AD5014", - "clgdbr %r1, 5, %f4, 0", + "E746801832EF", + "wfmaxdb %f4, %f6, %v24, 1", )); - insns.push(( - Inst::FpuToInt { - op: FpuToIntOp::F64ToI32, - rd: writable_gpr(1), - rn: fpr(4), + Inst::FpuRRR { + fpu_op: FPUOp2::Min32, + rd: writable_vr(4), + rn: vr(6), + rm: vr(8), }, - "B3995014", - "cfdbra %r1, 5, %f4, 0", + "E746801820EE", + "wfminsb %f4, %f6, %f8, 1", )); - insns.push(( - Inst::FpuToInt { - op: FpuToIntOp::F64ToI64, - rd: writable_gpr(1), - rn: fpr(4), + Inst::FpuRRR { + fpu_op: FPUOp2::Min64, + rd: writable_vr(4), + rn: vr(6), + rm: vr(8), }, - "B3A95014", - "cgdbra %r1, 5, %f4, 0", + "E746801830EE", + "wfmindb %f4, %f6, %f8, 1", )); insns.push(( - Inst::IntToFpu { - op: IntToFpuOp::U32ToF32, - rd: writable_fpr(1), - rn: gpr(4), + Inst::FpuRRRR { + fpu_op: FPUOp3::MAdd32, + rd: writable_vr(8), + rn: vr(12), + rm: vr(13), + ra: vr(8), }, - "B3900014", - "celfbr %f1, 0, %r4, 0", + "B30E80CD", + "maebr %f8, %f12, %f13", )); - insns.push(( - Inst::IntToFpu { - op: IntToFpuOp::I32ToF32, - rd: writable_fpr(1), - rn: gpr(4), + Inst::FpuRRRR { + fpu_op: FPUOp3::MAdd32, + rd: writable_vr(8), + rn: vr(12), + rm: vr(13), + ra: vr(20), }, - "B3940014", - "cefbra %f1, 0, %r4, 0", + "E78CD208418F", + "wfmasb %f8, %f12, %f13, %v20", )); - insns.push(( - Inst::IntToFpu { - op: IntToFpuOp::U32ToF64, - rd: writable_fpr(1), - rn: gpr(4), + Inst::FpuRRRR { + fpu_op: FPUOp3::MAdd64, + rd: writable_vr(8), + rn: vr(12), + rm: vr(13), + ra: vr(8), }, - "B3910014", - "cdlfbr %f1, 0, %r4, 0", + "B31E80CD", + "madbr %f8, %f12, %f13", )); - insns.push(( - Inst::IntToFpu { - op: IntToFpuOp::I32ToF64, - rd: writable_fpr(1), - rn: gpr(4), + Inst::FpuRRRR { + fpu_op: FPUOp3::MAdd64, + rd: writable_vr(8), + rn: vr(12), + rm: vr(13), + ra: vr(20), }, - "B3950014", - "cdfbra %f1, 0, %r4, 0", + "E78CD308418F", + "wfmadb %f8, %f12, %f13, %v20", )); - insns.push(( - Inst::IntToFpu { - op: IntToFpuOp::U64ToF32, - rd: writable_fpr(1), - rn: gpr(4), + Inst::FpuRRRR { + fpu_op: FPUOp3::MSub32, + rd: writable_vr(8), + rn: vr(12), + rm: vr(13), + ra: vr(8), }, - "B3A00014", - "celgbr %f1, 0, %r4, 0", + "B30F80CD", + "msebr %f8, %f12, %f13", )); - insns.push(( - Inst::IntToFpu { - op: IntToFpuOp::I64ToF32, - rd: writable_fpr(1), - rn: gpr(4), + Inst::FpuRRRR { + fpu_op: FPUOp3::MSub32, + rd: writable_vr(8), + rn: vr(12), + rm: vr(13), + ra: vr(20), }, - "B3A40014", - "cegbra %f1, 0, %r4, 0", + "E78CD208418E", + "wfmssb %f8, %f12, %f13, %v20", )); - insns.push(( - Inst::IntToFpu { - op: IntToFpuOp::U64ToF64, - rd: writable_fpr(1), - rn: gpr(4), + Inst::FpuRRRR { + fpu_op: FPUOp3::MSub64, + rd: writable_vr(8), + rn: vr(12), + rm: vr(13), + ra: vr(8), }, - "B3A10014", - "cdlgbr %f1, 0, %r4, 0", + "B31F80CD", + "msdbr %f8, %f12, %f13", )); - insns.push(( - Inst::IntToFpu { - op: IntToFpuOp::I64ToF64, - rd: writable_fpr(1), - rn: gpr(4), + Inst::FpuRRRR { + fpu_op: FPUOp3::MSub64, + rd: writable_vr(8), + rn: vr(12), + rm: vr(13), + ra: vr(20), }, - "B3A50014", - "cdgbra %f1, 0, %r4, 0", + "E78CD308418E", + "wfmsdb %f8, %f12, %f13, %v20", )); insns.push(( - Inst::FpuCopysign { - rd: writable_fpr(4), - rn: fpr(8), - rm: fpr(12), + Inst::FpuCmp32 { + rn: vr(8), + rm: vr(12), }, - "B372C048", - "cpsdr %f4, %f12, %f8", + "B309008C", + "cebr %f8, %f12", )); - insns.push(( Inst::FpuCmp32 { - rn: fpr(8), - rm: fpr(12), + rn: vr(24), + rm: vr(12), }, - "B309008C", - "cebr %f8, %f12", + "E78C000028CB", + "wfcsb %v24, %f12", )); insns.push(( Inst::FpuCmp64 { - rn: fpr(8), - rm: fpr(12), + rn: vr(8), + rm: vr(12), }, "B319008C", "cdbr %f8, %f12", )); + insns.push(( + Inst::FpuCmp64 { + rn: vr(24), + rm: vr(12), + }, + "E78C000038CB", + "wfcdb %v24, %f12", + )); insns.push(( Inst::FpuLoad32 { - rd: writable_fpr(1), + rd: writable_vr(1), mem: MemArg::BXD12 { base: gpr(2), index: zero_reg(), @@ -7343,7 +7519,7 @@ fn test_s390x_binemit() { )); insns.push(( Inst::FpuLoad32 { - rd: writable_fpr(1), + rd: writable_vr(1), mem: MemArg::BXD12 { base: gpr(2), index: zero_reg(), @@ -7356,7 +7532,7 @@ fn test_s390x_binemit() { )); insns.push(( Inst::FpuLoad32 { - rd: writable_fpr(1), + rd: writable_vr(1), mem: MemArg::BXD20 { base: gpr(2), index: zero_reg(), @@ -7369,7 +7545,7 @@ fn test_s390x_binemit() { )); insns.push(( Inst::FpuLoad32 { - rd: writable_fpr(1), + rd: writable_vr(1), mem: MemArg::BXD20 { base: gpr(2), index: zero_reg(), @@ -7382,7 +7558,33 @@ fn test_s390x_binemit() { )); insns.push(( Inst::FpuLoad32 { - rd: writable_fpr(1), + rd: writable_vr(17), + mem: MemArg::BXD12 { + base: gpr(2), + index: zero_reg(), + disp: UImm12::zero(), + flags: MemFlags::trusted(), + }, + }, + "E71020000803", + "vlef %v17, 0(%r2), 0", + )); + insns.push(( + Inst::FpuLoad32 { + rd: writable_vr(17), + mem: MemArg::BXD12 { + base: gpr(2), + index: zero_reg(), + disp: UImm12::maybe_from_u64(4095).unwrap(), + flags: MemFlags::trusted(), + }, + }, + "E7102FFF0803", + "vlef %v17, 4095(%r2), 0", + )); + insns.push(( + Inst::FpuLoad32 { + rd: writable_vr(1), mem: MemArg::BXD12 { base: gpr(3), index: gpr(2), @@ -7395,7 +7597,7 @@ fn test_s390x_binemit() { )); insns.push(( Inst::FpuLoad32 { - rd: writable_fpr(1), + rd: writable_vr(1), mem: MemArg::BXD12 { base: gpr(3), index: gpr(2), @@ -7408,7 +7610,7 @@ fn test_s390x_binemit() { )); insns.push(( Inst::FpuLoad32 { - rd: writable_fpr(1), + rd: writable_vr(1), mem: MemArg::BXD20 { base: gpr(3), index: gpr(2), @@ -7421,7 +7623,7 @@ fn test_s390x_binemit() { )); insns.push(( Inst::FpuLoad32 { - rd: writable_fpr(1), + rd: writable_vr(1), mem: MemArg::BXD20 { base: gpr(3), index: gpr(2), @@ -7432,9 +7634,35 @@ fn test_s390x_binemit() { "ED123FFF7F64", "ley %f1, 524287(%r2,%r3)", )); + insns.push(( + Inst::FpuLoad32 { + rd: writable_vr(17), + mem: MemArg::BXD12 { + base: gpr(3), + index: gpr(2), + disp: UImm12::zero(), + flags: MemFlags::trusted(), + }, + }, + "E71230000803", + "vlef %v17, 0(%r2,%r3), 0", + )); + insns.push(( + Inst::FpuLoad32 { + rd: writable_vr(17), + mem: MemArg::BXD12 { + base: gpr(3), + index: gpr(2), + disp: UImm12::maybe_from_u64(4095).unwrap(), + flags: MemFlags::trusted(), + }, + }, + "E7123FFF0803", + "vlef %v17, 4095(%r2,%r3), 0", + )); insns.push(( Inst::FpuLoad64 { - rd: writable_fpr(1), + rd: writable_vr(1), mem: MemArg::BXD12 { base: gpr(2), index: zero_reg(), @@ -7447,7 +7675,7 @@ fn test_s390x_binemit() { )); insns.push(( Inst::FpuLoad64 { - rd: writable_fpr(1), + rd: writable_vr(1), mem: MemArg::BXD12 { base: gpr(2), index: zero_reg(), @@ -7460,7 +7688,7 @@ fn test_s390x_binemit() { )); insns.push(( Inst::FpuLoad64 { - rd: writable_fpr(1), + rd: writable_vr(1), mem: MemArg::BXD20 { base: gpr(2), index: zero_reg(), @@ -7473,7 +7701,7 @@ fn test_s390x_binemit() { )); insns.push(( Inst::FpuLoad64 { - rd: writable_fpr(1), + rd: writable_vr(1), mem: MemArg::BXD20 { base: gpr(2), index: zero_reg(), @@ -7486,7 +7714,33 @@ fn test_s390x_binemit() { )); insns.push(( Inst::FpuLoad64 { - rd: writable_fpr(1), + rd: writable_vr(17), + mem: MemArg::BXD12 { + base: gpr(2), + index: zero_reg(), + disp: UImm12::zero(), + flags: MemFlags::trusted(), + }, + }, + "E71020000802", + "vleg %v17, 0(%r2), 0", + )); + insns.push(( + Inst::FpuLoad64 { + rd: writable_vr(17), + mem: MemArg::BXD12 { + base: gpr(2), + index: zero_reg(), + disp: UImm12::maybe_from_u64(4095).unwrap(), + flags: MemFlags::trusted(), + }, + }, + "E7102FFF0802", + "vleg %v17, 4095(%r2), 0", + )); + insns.push(( + Inst::FpuLoad64 { + rd: writable_vr(1), mem: MemArg::BXD12 { base: gpr(3), index: gpr(2), @@ -7499,7 +7753,7 @@ fn test_s390x_binemit() { )); insns.push(( Inst::FpuLoad64 { - rd: writable_fpr(1), + rd: writable_vr(1), mem: MemArg::BXD12 { base: gpr(3), index: gpr(2), @@ -7512,7 +7766,7 @@ fn test_s390x_binemit() { )); insns.push(( Inst::FpuLoad64 { - rd: writable_fpr(1), + rd: writable_vr(1), mem: MemArg::BXD20 { base: gpr(3), index: gpr(2), @@ -7525,7 +7779,7 @@ fn test_s390x_binemit() { )); insns.push(( Inst::FpuLoad64 { - rd: writable_fpr(1), + rd: writable_vr(1), mem: MemArg::BXD20 { base: gpr(3), index: gpr(2), @@ -7536,9 +7790,35 @@ fn test_s390x_binemit() { "ED123FFF7F65", "ldy %f1, 524287(%r2,%r3)", )); + insns.push(( + Inst::FpuLoad64 { + rd: writable_vr(17), + mem: MemArg::BXD12 { + base: gpr(3), + index: gpr(2), + disp: UImm12::zero(), + flags: MemFlags::trusted(), + }, + }, + "E71230000802", + "vleg %v17, 0(%r2,%r3), 0", + )); + insns.push(( + Inst::FpuLoad64 { + rd: writable_vr(17), + mem: MemArg::BXD12 { + base: gpr(3), + index: gpr(2), + disp: UImm12::maybe_from_u64(4095).unwrap(), + flags: MemFlags::trusted(), + }, + }, + "E7123FFF0802", + "vleg %v17, 4095(%r2,%r3), 0", + )); insns.push(( Inst::FpuStore32 { - rd: fpr(1), + rd: vr(1), mem: MemArg::BXD12 { base: gpr(2), index: zero_reg(), @@ -7551,7 +7831,7 @@ fn test_s390x_binemit() { )); insns.push(( Inst::FpuStore32 { - rd: fpr(1), + rd: vr(1), mem: MemArg::BXD12 { base: gpr(2), index: zero_reg(), @@ -7564,7 +7844,7 @@ fn test_s390x_binemit() { )); insns.push(( Inst::FpuStore32 { - rd: fpr(1), + rd: vr(1), mem: MemArg::BXD20 { base: gpr(2), index: zero_reg(), @@ -7577,7 +7857,7 @@ fn test_s390x_binemit() { )); insns.push(( Inst::FpuStore32 { - rd: fpr(1), + rd: vr(1), mem: MemArg::BXD20 { base: gpr(2), index: zero_reg(), @@ -7590,7 +7870,33 @@ fn test_s390x_binemit() { )); insns.push(( Inst::FpuStore32 { - rd: fpr(1), + rd: vr(17), + mem: MemArg::BXD12 { + base: gpr(2), + index: zero_reg(), + disp: UImm12::zero(), + flags: MemFlags::trusted(), + }, + }, + "E7102000080B", + "vstef %v17, 0(%r2), 0", + )); + insns.push(( + Inst::FpuStore32 { + rd: vr(17), + mem: MemArg::BXD12 { + base: gpr(2), + index: zero_reg(), + disp: UImm12::maybe_from_u64(4095).unwrap(), + flags: MemFlags::trusted(), + }, + }, + "E7102FFF080B", + "vstef %v17, 4095(%r2), 0", + )); + insns.push(( + Inst::FpuStore32 { + rd: vr(1), mem: MemArg::BXD12 { base: gpr(3), index: gpr(2), @@ -7603,7 +7909,7 @@ fn test_s390x_binemit() { )); insns.push(( Inst::FpuStore32 { - rd: fpr(1), + rd: vr(1), mem: MemArg::BXD12 { base: gpr(3), index: gpr(2), @@ -7616,7 +7922,7 @@ fn test_s390x_binemit() { )); insns.push(( Inst::FpuStore32 { - rd: fpr(1), + rd: vr(1), mem: MemArg::BXD20 { base: gpr(3), index: gpr(2), @@ -7629,7 +7935,7 @@ fn test_s390x_binemit() { )); insns.push(( Inst::FpuStore32 { - rd: fpr(1), + rd: vr(1), mem: MemArg::BXD20 { base: gpr(3), index: gpr(2), @@ -7640,9 +7946,35 @@ fn test_s390x_binemit() { "ED123FFF7F66", "stey %f1, 524287(%r2,%r3)", )); + insns.push(( + Inst::FpuStore32 { + rd: vr(17), + mem: MemArg::BXD12 { + base: gpr(3), + index: gpr(2), + disp: UImm12::zero(), + flags: MemFlags::trusted(), + }, + }, + "E7123000080B", + "vstef %v17, 0(%r2,%r3), 0", + )); + insns.push(( + Inst::FpuStore32 { + rd: vr(17), + mem: MemArg::BXD12 { + base: gpr(3), + index: gpr(2), + disp: UImm12::maybe_from_u64(4095).unwrap(), + flags: MemFlags::trusted(), + }, + }, + "E7123FFF080B", + "vstef %v17, 4095(%r2,%r3), 0", + )); insns.push(( Inst::FpuStore64 { - rd: fpr(1), + rd: vr(1), mem: MemArg::BXD12 { base: gpr(2), index: zero_reg(), @@ -7655,7 +7987,7 @@ fn test_s390x_binemit() { )); insns.push(( Inst::FpuStore64 { - rd: fpr(1), + rd: vr(1), mem: MemArg::BXD12 { base: gpr(2), index: zero_reg(), @@ -7668,7 +8000,7 @@ fn test_s390x_binemit() { )); insns.push(( Inst::FpuStore64 { - rd: fpr(1), + rd: vr(1), mem: MemArg::BXD20 { base: gpr(2), index: zero_reg(), @@ -7681,7 +8013,7 @@ fn test_s390x_binemit() { )); insns.push(( Inst::FpuStore64 { - rd: fpr(1), + rd: vr(1), mem: MemArg::BXD20 { base: gpr(2), index: zero_reg(), @@ -7694,7 +8026,33 @@ fn test_s390x_binemit() { )); insns.push(( Inst::FpuStore64 { - rd: fpr(1), + rd: vr(17), + mem: MemArg::BXD12 { + base: gpr(2), + index: zero_reg(), + disp: UImm12::zero(), + flags: MemFlags::trusted(), + }, + }, + "E7102000080A", + "vsteg %v17, 0(%r2), 0", + )); + insns.push(( + Inst::FpuStore64 { + rd: vr(17), + mem: MemArg::BXD12 { + base: gpr(2), + index: zero_reg(), + disp: UImm12::maybe_from_u64(4095).unwrap(), + flags: MemFlags::trusted(), + }, + }, + "E7102FFF080A", + "vsteg %v17, 4095(%r2), 0", + )); + insns.push(( + Inst::FpuStore64 { + rd: vr(1), mem: MemArg::BXD12 { base: gpr(3), index: gpr(2), @@ -7707,7 +8065,7 @@ fn test_s390x_binemit() { )); insns.push(( Inst::FpuStore64 { - rd: fpr(1), + rd: vr(1), mem: MemArg::BXD12 { base: gpr(3), index: gpr(2), @@ -7720,7 +8078,7 @@ fn test_s390x_binemit() { )); insns.push(( Inst::FpuStore64 { - rd: fpr(1), + rd: vr(1), mem: MemArg::BXD20 { base: gpr(3), index: gpr(2), @@ -7733,7 +8091,7 @@ fn test_s390x_binemit() { )); insns.push(( Inst::FpuStore64 { - rd: fpr(1), + rd: vr(1), mem: MemArg::BXD20 { base: gpr(3), index: gpr(2), @@ -7744,10 +8102,36 @@ fn test_s390x_binemit() { "ED123FFF7F67", "stdy %f1, 524287(%r2,%r3)", )); + insns.push(( + Inst::FpuStore64 { + rd: vr(17), + mem: MemArg::BXD12 { + base: gpr(3), + index: gpr(2), + disp: UImm12::zero(), + flags: MemFlags::trusted(), + }, + }, + "E7123000080A", + "vsteg %v17, 0(%r2,%r3), 0", + )); + insns.push(( + Inst::FpuStore64 { + rd: vr(17), + mem: MemArg::BXD12 { + base: gpr(3), + index: gpr(2), + disp: UImm12::maybe_from_u64(4095).unwrap(), + flags: MemFlags::trusted(), + }, + }, + "E7123FFF080A", + "vsteg %v17, 4095(%r2,%r3), 0", + )); insns.push(( Inst::FpuLoadRev32 { - rd: writable_fpr(1), + rd: writable_vr(1), mem: MemArg::BXD12 { base: gpr(2), index: zero_reg(), @@ -7760,7 +8144,7 @@ fn test_s390x_binemit() { )); insns.push(( Inst::FpuLoadRev32 { - rd: writable_fpr(1), + rd: writable_vr(1), mem: MemArg::BXD12 { base: gpr(2), index: zero_reg(), @@ -7773,7 +8157,7 @@ fn test_s390x_binemit() { )); insns.push(( Inst::FpuLoadRev32 { - rd: writable_fpr(1), + rd: writable_vr(1), mem: MemArg::BXD20 { base: gpr(2), index: zero_reg(), @@ -7786,7 +8170,7 @@ fn test_s390x_binemit() { )); insns.push(( Inst::FpuLoadRev32 { - rd: writable_fpr(1), + rd: writable_vr(1), mem: MemArg::BXD20 { base: gpr(2), index: zero_reg(), @@ -7799,7 +8183,7 @@ fn test_s390x_binemit() { )); insns.push(( Inst::FpuLoadRev32 { - rd: writable_fpr(1), + rd: writable_vr(1), mem: MemArg::BXD12 { base: gpr(3), index: gpr(2), @@ -7812,7 +8196,7 @@ fn test_s390x_binemit() { )); insns.push(( Inst::FpuLoadRev32 { - rd: writable_fpr(1), + rd: writable_vr(1), mem: MemArg::BXD12 { base: gpr(3), index: gpr(2), @@ -7825,7 +8209,7 @@ fn test_s390x_binemit() { )); insns.push(( Inst::FpuLoadRev32 { - rd: writable_fpr(1), + rd: writable_vr(1), mem: MemArg::BXD20 { base: gpr(3), index: gpr(2), @@ -7838,7 +8222,7 @@ fn test_s390x_binemit() { )); insns.push(( Inst::FpuLoadRev32 { - rd: writable_fpr(1), + rd: writable_vr(1), mem: MemArg::BXD20 { base: gpr(3), index: gpr(2), @@ -7851,7 +8235,7 @@ fn test_s390x_binemit() { )); insns.push(( Inst::FpuLoadRev64 { - rd: writable_fpr(1), + rd: writable_vr(1), mem: MemArg::BXD12 { base: gpr(2), index: zero_reg(), @@ -7864,7 +8248,7 @@ fn test_s390x_binemit() { )); insns.push(( Inst::FpuLoadRev64 { - rd: writable_fpr(1), + rd: writable_vr(1), mem: MemArg::BXD12 { base: gpr(2), index: zero_reg(), @@ -7877,7 +8261,7 @@ fn test_s390x_binemit() { )); insns.push(( Inst::FpuLoadRev64 { - rd: writable_fpr(1), + rd: writable_vr(1), mem: MemArg::BXD20 { base: gpr(2), index: zero_reg(), @@ -7890,7 +8274,7 @@ fn test_s390x_binemit() { )); insns.push(( Inst::FpuLoadRev64 { - rd: writable_fpr(1), + rd: writable_vr(1), mem: MemArg::BXD20 { base: gpr(2), index: zero_reg(), @@ -7903,7 +8287,7 @@ fn test_s390x_binemit() { )); insns.push(( Inst::FpuLoadRev64 { - rd: writable_fpr(1), + rd: writable_vr(1), mem: MemArg::BXD12 { base: gpr(3), index: gpr(2), @@ -7916,7 +8300,7 @@ fn test_s390x_binemit() { )); insns.push(( Inst::FpuLoadRev64 { - rd: writable_fpr(1), + rd: writable_vr(1), mem: MemArg::BXD12 { base: gpr(3), index: gpr(2), @@ -7929,7 +8313,7 @@ fn test_s390x_binemit() { )); insns.push(( Inst::FpuLoadRev64 { - rd: writable_fpr(1), + rd: writable_vr(1), mem: MemArg::BXD20 { base: gpr(3), index: gpr(2), @@ -7942,7 +8326,7 @@ fn test_s390x_binemit() { )); insns.push(( Inst::FpuLoadRev64 { - rd: writable_fpr(1), + rd: writable_vr(1), mem: MemArg::BXD20 { base: gpr(3), index: gpr(2), @@ -7955,7 +8339,7 @@ fn test_s390x_binemit() { )); insns.push(( Inst::FpuStoreRev32 { - rd: fpr(1), + rd: vr(1), mem: MemArg::BXD12 { base: gpr(2), index: zero_reg(), @@ -7968,7 +8352,7 @@ fn test_s390x_binemit() { )); insns.push(( Inst::FpuStoreRev32 { - rd: fpr(1), + rd: vr(1), mem: MemArg::BXD12 { base: gpr(2), index: zero_reg(), @@ -7981,7 +8365,7 @@ fn test_s390x_binemit() { )); insns.push(( Inst::FpuStoreRev32 { - rd: fpr(1), + rd: vr(1), mem: MemArg::BXD20 { base: gpr(2), index: zero_reg(), @@ -7994,7 +8378,7 @@ fn test_s390x_binemit() { )); insns.push(( Inst::FpuStoreRev32 { - rd: fpr(1), + rd: vr(1), mem: MemArg::BXD20 { base: gpr(2), index: zero_reg(), @@ -8007,7 +8391,7 @@ fn test_s390x_binemit() { )); insns.push(( Inst::FpuStoreRev32 { - rd: fpr(1), + rd: vr(1), mem: MemArg::BXD12 { base: gpr(3), index: gpr(2), @@ -8020,7 +8404,7 @@ fn test_s390x_binemit() { )); insns.push(( Inst::FpuStoreRev32 { - rd: fpr(1), + rd: vr(1), mem: MemArg::BXD12 { base: gpr(3), index: gpr(2), @@ -8033,7 +8417,7 @@ fn test_s390x_binemit() { )); insns.push(( Inst::FpuStoreRev32 { - rd: fpr(1), + rd: vr(1), mem: MemArg::BXD20 { base: gpr(3), index: gpr(2), @@ -8046,7 +8430,7 @@ fn test_s390x_binemit() { )); insns.push(( Inst::FpuStoreRev32 { - rd: fpr(1), + rd: vr(1), mem: MemArg::BXD20 { base: gpr(3), index: gpr(2), @@ -8059,7 +8443,7 @@ fn test_s390x_binemit() { )); insns.push(( Inst::FpuStoreRev64 { - rd: fpr(1), + rd: vr(1), mem: MemArg::BXD12 { base: gpr(2), index: zero_reg(), @@ -8072,7 +8456,7 @@ fn test_s390x_binemit() { )); insns.push(( Inst::FpuStoreRev64 { - rd: fpr(1), + rd: vr(1), mem: MemArg::BXD12 { base: gpr(2), index: zero_reg(), @@ -8085,7 +8469,7 @@ fn test_s390x_binemit() { )); insns.push(( Inst::FpuStoreRev64 { - rd: fpr(1), + rd: vr(1), mem: MemArg::BXD20 { base: gpr(2), index: zero_reg(), @@ -8098,7 +8482,7 @@ fn test_s390x_binemit() { )); insns.push(( Inst::FpuStoreRev64 { - rd: fpr(1), + rd: vr(1), mem: MemArg::BXD20 { base: gpr(2), index: zero_reg(), @@ -8111,7 +8495,7 @@ fn test_s390x_binemit() { )); insns.push(( Inst::FpuStoreRev64 { - rd: fpr(1), + rd: vr(1), mem: MemArg::BXD12 { base: gpr(3), index: gpr(2), @@ -8124,7 +8508,7 @@ fn test_s390x_binemit() { )); insns.push(( Inst::FpuStoreRev64 { - rd: fpr(1), + rd: vr(1), mem: MemArg::BXD12 { base: gpr(3), index: gpr(2), @@ -8137,7 +8521,7 @@ fn test_s390x_binemit() { )); insns.push(( Inst::FpuStoreRev64 { - rd: fpr(1), + rd: vr(1), mem: MemArg::BXD20 { base: gpr(3), index: gpr(2), @@ -8150,7 +8534,7 @@ fn test_s390x_binemit() { )); insns.push(( Inst::FpuStoreRev64 { - rd: fpr(1), + rd: vr(1), mem: MemArg::BXD20 { base: gpr(3), index: gpr(2), @@ -8164,133 +8548,297 @@ fn test_s390x_binemit() { insns.push(( Inst::LoadFpuConst32 { - rd: writable_fpr(8), + rd: writable_vr(8), const_data: 1.0_f32.to_bits(), }, "A71500043F80000078801000", "bras %r1, 8 ; data.f32 1 ; le %f8, 0(%r1)", )); + insns.push(( + Inst::LoadFpuConst32 { + rd: writable_vr(24), + const_data: 1.0_f32.to_bits(), + }, + "A71500043F800000E78010000803", + "bras %r1, 8 ; data.f32 1 ; vlef %v24, 0(%r1), 0", + )); insns.push(( Inst::LoadFpuConst64 { - rd: writable_fpr(8), + rd: writable_vr(8), const_data: 1.0_f64.to_bits(), }, "A71500063FF000000000000068801000", "bras %r1, 12 ; data.f64 1 ; ld %f8, 0(%r1)", )); + insns.push(( + Inst::LoadFpuConst64 { + rd: writable_vr(24), + const_data: 1.0_f64.to_bits(), + }, + "A71500063FF0000000000000E78010000802", + "bras %r1, 12 ; data.f64 1 ; vleg %v24, 0(%r1), 0", + )); insns.push(( Inst::FpuRound { - rd: writable_fpr(8), - rn: fpr(12), - op: FpuRoundMode::Minus32, + op: FpuRoundOp::Cvt64To32, + mode: FpuRoundMode::Current, + rd: writable_vr(8), + rn: vr(12), + }, + "B344008C", + "ledbra %f8, %f12, 0", + )); + insns.push(( + Inst::FpuRound { + op: FpuRoundOp::Cvt64To32, + mode: FpuRoundMode::ToNearest, + rd: writable_vr(24), + rn: vr(12), + }, + "E78C001838C5", + "wledb %v24, %f12, 0, 1", + )); + insns.push(( + Inst::FpuRound { + op: FpuRoundOp::Round32, + mode: FpuRoundMode::ToNegInfinity, + rd: writable_vr(8), + rn: vr(12), }, "B357708C", "fiebr %f8, %f12, 7", )); insns.push(( Inst::FpuRound { - rd: writable_fpr(8), - rn: fpr(12), - op: FpuRoundMode::Minus64, + op: FpuRoundOp::Round64, + mode: FpuRoundMode::ToNegInfinity, + rd: writable_vr(8), + rn: vr(12), }, "B35F708C", "fidbr %f8, %f12, 7", )); insns.push(( Inst::FpuRound { - rd: writable_fpr(8), - rn: fpr(12), - op: FpuRoundMode::Plus32, + op: FpuRoundOp::Round32, + mode: FpuRoundMode::ToPosInfinity, + rd: writable_vr(8), + rn: vr(12), }, "B357608C", "fiebr %f8, %f12, 6", )); insns.push(( Inst::FpuRound { - rd: writable_fpr(8), - rn: fpr(12), - op: FpuRoundMode::Plus64, + op: FpuRoundOp::Round64, + mode: FpuRoundMode::ToPosInfinity, + rd: writable_vr(8), + rn: vr(12), }, "B35F608C", "fidbr %f8, %f12, 6", )); insns.push(( Inst::FpuRound { - rd: writable_fpr(8), - rn: fpr(12), - op: FpuRoundMode::Zero32, + op: FpuRoundOp::Round32, + mode: FpuRoundMode::ToZero, + rd: writable_vr(8), + rn: vr(12), }, "B357508C", "fiebr %f8, %f12, 5", )); insns.push(( Inst::FpuRound { - rd: writable_fpr(8), - rn: fpr(12), - op: FpuRoundMode::Zero64, + op: FpuRoundOp::Round64, + mode: FpuRoundMode::ToZero, + rd: writable_vr(8), + rn: vr(12), }, "B35F508C", "fidbr %f8, %f12, 5", )); insns.push(( Inst::FpuRound { - rd: writable_fpr(8), - rn: fpr(12), - op: FpuRoundMode::Nearest32, + op: FpuRoundOp::Round32, + mode: FpuRoundMode::ToNearestTiesToEven, + rd: writable_vr(8), + rn: vr(12), }, "B357408C", "fiebr %f8, %f12, 4", )); insns.push(( Inst::FpuRound { - rd: writable_fpr(8), - rn: fpr(12), - op: FpuRoundMode::Nearest64, + op: FpuRoundOp::Round64, + mode: FpuRoundMode::ToNearestTiesToEven, + rd: writable_vr(8), + rn: vr(12), }, "B35F408C", "fidbr %f8, %f12, 4", )); + insns.push(( + Inst::FpuRound { + op: FpuRoundOp::Round32, + mode: FpuRoundMode::ToNearest, + rd: writable_vr(24), + rn: vr(12), + }, + "E78C001828C7", + "wfisb %v24, %f12, 0, 1", + )); + insns.push(( + Inst::FpuRound { + op: FpuRoundOp::Round64, + mode: FpuRoundMode::ToNearest, + rd: writable_vr(24), + rn: vr(12), + }, + "E78C001838C7", + "wfidb %v24, %f12, 0, 1", + )); + insns.push(( + Inst::FpuRound { + op: FpuRoundOp::ToSInt32, + mode: FpuRoundMode::ToNearest, + rd: writable_vr(24), + rn: vr(12), + }, + "E78C001828C2", + "wcfeb %v24, %f12, 0, 1", + )); + insns.push(( + Inst::FpuRound { + op: FpuRoundOp::ToSInt64, + mode: FpuRoundMode::ToNearest, + rd: writable_vr(24), + rn: vr(12), + }, + "E78C001838C2", + "wcgdb %v24, %f12, 0, 1", + )); + insns.push(( + Inst::FpuRound { + op: FpuRoundOp::ToUInt32, + mode: FpuRoundMode::ToNearest, + rd: writable_vr(24), + rn: vr(12), + }, + "E78C001828C0", + "wclfeb %v24, %f12, 0, 1", + )); + insns.push(( + Inst::FpuRound { + op: FpuRoundOp::ToUInt64, + mode: FpuRoundMode::ToNearest, + rd: writable_vr(24), + rn: vr(12), + }, + "E78C001838C0", + "wclgdb %v24, %f12, 0, 1", + )); + insns.push(( + Inst::FpuRound { + op: FpuRoundOp::FromSInt32, + mode: FpuRoundMode::ToNearest, + rd: writable_vr(24), + rn: vr(12), + }, + "E78C001828C3", + "wcefb %v24, %f12, 0, 1", + )); + insns.push(( + Inst::FpuRound { + op: FpuRoundOp::FromSInt64, + mode: FpuRoundMode::ToNearest, + rd: writable_vr(24), + rn: vr(12), + }, + "E78C001838C3", + "wcdgb %v24, %f12, 0, 1", + )); + insns.push(( + Inst::FpuRound { + op: FpuRoundOp::FromUInt32, + mode: FpuRoundMode::ToNearest, + rd: writable_vr(24), + rn: vr(12), + }, + "E78C001828C1", + "wcelfb %v24, %f12, 0, 1", + )); + insns.push(( + Inst::FpuRound { + op: FpuRoundOp::FromUInt64, + mode: FpuRoundMode::ToNearest, + rd: writable_vr(24), + rn: vr(12), + }, + "E78C001838C1", + "wcdlgb %v24, %f12, 0, 1", + )); insns.push(( - Inst::FpuVecRRR { - fpu_op: FPUOp2::Max32, - rd: writable_fpr(4), - rn: fpr(6), - rm: fpr(8), + Inst::VecSelect { + rd: writable_vr(4), + rn: vr(6), + rm: vr(8), + ra: vr(10), }, - "E746801820EF", - "wfmaxsb %f4, %f6, %f8, 1", + "E7468000A08D", + "vsel %v4, %v6, %v8, %v10", )); insns.push(( - Inst::FpuVecRRR { - fpu_op: FPUOp2::Max64, - rd: writable_fpr(4), - rn: fpr(6), - rm: fpr(8), + Inst::VecSelect { + rd: writable_vr(20), + rn: vr(6), + rm: vr(8), + ra: vr(10), }, - "E746801830EF", - "wfmaxdb %f4, %f6, %f8, 1", + "E7468000A88D", + "vsel %v20, %v6, %v8, %v10", )); insns.push(( - Inst::FpuVecRRR { - fpu_op: FPUOp2::Min32, - rd: writable_fpr(4), - rn: fpr(6), - rm: fpr(8), + Inst::VecSelect { + rd: writable_vr(4), + rn: vr(22), + rm: vr(8), + ra: vr(10), }, - "E746801820EE", - "wfminsb %f4, %f6, %f8, 1", + "E7468000A48D", + "vsel %v4, %v22, %v8, %v10", )); insns.push(( - Inst::FpuVecRRR { - fpu_op: FPUOp2::Min64, - rd: writable_fpr(4), - rn: fpr(6), - rm: fpr(8), + Inst::VecSelect { + rd: writable_vr(4), + rn: vr(6), + rm: vr(24), + ra: vr(10), }, - "E746801830EE", - "wfmindb %f4, %f6, %f8, 1", + "E7468000A28D", + "vsel %v4, %v6, %v24, %v10", + )); + insns.push(( + Inst::VecSelect { + rd: writable_vr(4), + rn: vr(6), + rm: vr(8), + ra: vr(26), + }, + "E7468000A18D", + "vsel %v4, %v6, %v8, %v26", + )); + insns.push(( + Inst::VecSelect { + rd: writable_vr(20), + rn: vr(22), + rm: vr(24), + ra: vr(26), + }, + "E7468000AF8D", + "vsel %v20, %v22, %v24, %v26", )); let flags = settings::Flags::new(settings::builder()); diff --git a/cranelift/codegen/src/isa/s390x/inst/mod.rs b/cranelift/codegen/src/isa/s390x/inst/mod.rs index 6d7d1abffa..676e0d4794 100644 --- a/cranelift/codegen/src/isa/s390x/inst/mod.rs +++ b/cranelift/codegen/src/isa/s390x/inst/mod.rs @@ -27,8 +27,8 @@ mod emit_tests; // Instructions (top level): definition pub use crate::isa::s390x::lower::isle::generated_code::{ - ALUOp, CmpOp, FPUOp1, FPUOp2, FPUOp3, FpuRoundMode, FpuToIntOp, IntToFpuOp, MInst as Inst, - RxSBGOp, ShiftOp, UnaryOp, + ALUOp, CmpOp, FPUOp1, FPUOp2, FPUOp3, FpuRoundMode, FpuRoundOp, MInst as Inst, RxSBGOp, + ShiftOp, UnaryOp, }; /// Additional information for (direct) Call instructions, left out of line to lower the size of @@ -156,12 +156,13 @@ impl Inst { | Inst::FpuMove64 { .. } | Inst::FpuCMov32 { .. } | Inst::FpuCMov64 { .. } - | Inst::MovToFpr { .. } - | Inst::MovFromFpr { .. } + | Inst::MovToFpr32 { .. } + | Inst::MovToFpr64 { .. } + | Inst::MovFromFpr32 { .. } + | Inst::MovFromFpr64 { .. } | Inst::FpuRR { .. } | Inst::FpuRRR { .. } | Inst::FpuRRRR { .. } - | Inst::FpuCopysign { .. } | Inst::FpuCmp32 { .. } | Inst::FpuCmp64 { .. } | Inst::FpuLoad32 { .. } @@ -170,10 +171,7 @@ impl Inst { | Inst::FpuStore64 { .. } | Inst::LoadFpuConst32 { .. } | Inst::LoadFpuConst64 { .. } - | Inst::FpuToInt { .. } - | Inst::IntToFpu { .. } - | Inst::FpuRound { .. } - | Inst::FpuVecRRR { .. } + | Inst::VecSelect { .. } | Inst::Call { .. } | Inst::CallInd { .. } | Inst::Ret { .. } @@ -206,6 +204,11 @@ impl Inst { UnaryOp::PopcntReg => InstructionSet::MIE2, _ => InstructionSet::Base, }, + Inst::FpuRound { op, .. } => match op { + FpuRoundOp::ToSInt32 | FpuRoundOp::FromSInt32 => InstructionSet::MIE2, + FpuRoundOp::ToUInt32 | FpuRoundOp::FromUInt32 => InstructionSet::MIE2, + _ => InstructionSet::Base, + }, // These are all part of VXRS_EXT2 Inst::FpuLoadRev32 { .. } @@ -576,7 +579,10 @@ fn s390x_get_operands VReg>(inst: &Inst, collector: &mut OperandC collector.reg_mod(rd); collector.reg_use(rm); } - &Inst::MovToFpr { rd, rn } | &Inst::MovFromFpr { rd, rn } => { + &Inst::MovToFpr32 { rd, rn } + | &Inst::MovToFpr64 { rd, rn } + | &Inst::MovFromFpr32 { rd, rn } + | &Inst::MovFromFpr64 { rd, rn } => { collector.reg_def(rd); collector.reg_use(rn); } @@ -584,19 +590,16 @@ fn s390x_get_operands VReg>(inst: &Inst, collector: &mut OperandC collector.reg_def(rd); collector.reg_use(rn); } - &Inst::FpuRRR { rd, rm, .. } => { - collector.reg_mod(rd); - collector.reg_use(rm); - } - &Inst::FpuRRRR { rd, rn, rm, .. } => { - collector.reg_mod(rd); + &Inst::FpuRRR { rd, rn, rm, .. } => { + collector.reg_def(rd); collector.reg_use(rn); collector.reg_use(rm); } - &Inst::FpuCopysign { rd, rn, rm, .. } => { + &Inst::FpuRRRR { rd, rn, rm, ra, .. } => { collector.reg_def(rd); collector.reg_use(rn); collector.reg_use(rm); + collector.reg_use(ra); } &Inst::FpuCmp32 { rn, rm } | &Inst::FpuCmp64 { rn, rm } => { collector.reg_use(rn); @@ -637,22 +640,15 @@ fn s390x_get_operands VReg>(inst: &Inst, collector: &mut OperandC &Inst::LoadFpuConst32 { rd, .. } | &Inst::LoadFpuConst64 { rd, .. } => { collector.reg_def(rd); } - &Inst::FpuToInt { rd, rn, .. } => { - collector.reg_def(rd); - collector.reg_use(rn); - } - &Inst::IntToFpu { rd, rn, .. } => { - collector.reg_def(rd); - collector.reg_use(rn); - } &Inst::FpuRound { rd, rn, .. } => { collector.reg_def(rd); collector.reg_use(rn); } - &Inst::FpuVecRRR { rd, rn, rm, .. } => { + &Inst::VecSelect { rd, rn, rm, ra, .. } => { collector.reg_def(rd); collector.reg_use(rn); collector.reg_use(rm); + collector.reg_use(ra); } &Inst::Extend { rd, rn, .. } => { collector.reg_def(rd); @@ -1462,9 +1458,7 @@ impl Inst { | &Inst::Load64SExt32 { rd, ref mem } | &Inst::LoadRev16 { rd, ref mem } | &Inst::LoadRev32 { rd, ref mem } - | &Inst::LoadRev64 { rd, ref mem } - | &Inst::FpuLoad32 { rd, ref mem } - | &Inst::FpuLoad64 { rd, ref mem } => { + | &Inst::LoadRev64 { rd, ref mem } => { let (opcode_rx, opcode_rxy, opcode_ril) = match self { &Inst::Load32 { .. } => (Some("l"), Some("ly"), Some("lrl")), &Inst::Load32ZExt8 { .. } => (None, Some("llc"), None), @@ -1481,8 +1475,6 @@ impl Inst { &Inst::LoadRev16 { .. } => (None, Some("lrvh"), None), &Inst::LoadRev32 { .. } => (None, Some("lrv"), None), &Inst::LoadRev64 { .. } => (None, Some("lrvg"), None), - &Inst::FpuLoad32 { .. } => (Some("le"), Some("ley"), None), - &Inst::FpuLoad64 { .. } => (Some("ld"), Some("ldy"), None), _ => unreachable!(), }; @@ -1505,17 +1497,42 @@ impl Inst { let mem = mem.pretty_print_default(); format!("{}{} {}, {}", mem_str, op.unwrap(), rd, mem) } - &Inst::FpuLoadRev32 { rd, ref mem } | &Inst::FpuLoadRev64 { rd, ref mem } => { - let rd = pretty_print_reg(rd.to_reg(), allocs); - let mem = mem.with_allocs(allocs); - let (mem_str, mem) = mem_finalize_for_show(&mem, state, true, false, false, true); - let op = match self { - &Inst::FpuLoadRev32 { .. } => "vlebrf", - &Inst::FpuLoadRev64 { .. } => "vlebrg", + &Inst::FpuLoad32 { rd, ref mem } + | &Inst::FpuLoad64 { rd, ref mem } + | &Inst::FpuLoadRev32 { rd, ref mem } + | &Inst::FpuLoadRev64 { rd, ref mem } => { + let (opcode_rx, opcode_rxy, opcode_vrx) = match self { + &Inst::FpuLoad32 { .. } => (Some("le"), Some("ley"), "vlef"), + &Inst::FpuLoad64 { .. } => (Some("ld"), Some("ldy"), "vleg"), + &Inst::FpuLoadRev32 { .. } => (None, None, "vlebrf"), + &Inst::FpuLoadRev64 { .. } => (None, None, "vlebrg"), _ => unreachable!(), }; - let mem = mem.pretty_print_default(); - format!("{}{} {}, {}, 0", mem_str, op, rd, mem) + + let (rd, rd_fpr) = pretty_print_fpr(rd.to_reg(), allocs); + let mem = mem.with_allocs(allocs); + if rd_fpr.is_some() && opcode_rx.is_some() { + let (mem_str, mem) = + mem_finalize_for_show(&mem, state, true, true, false, true); + let op = match &mem { + &MemArg::BXD12 { .. } => opcode_rx, + &MemArg::BXD20 { .. } => opcode_rxy, + _ => unreachable!(), + }; + let mem = mem.pretty_print_default(); + format!("{}{} {}, {}", mem_str, op.unwrap(), rd_fpr.unwrap(), mem) + } else { + let (mem_str, mem) = + mem_finalize_for_show(&mem, state, true, false, false, true); + let mem = mem.pretty_print_default(); + format!( + "{}{} {}, {}, 0", + mem_str, + opcode_vrx, + rd_fpr.unwrap_or(rd), + mem + ) + } } &Inst::Store8 { rd, ref mem } | &Inst::Store16 { rd, ref mem } @@ -1523,9 +1540,7 @@ impl Inst { | &Inst::Store64 { rd, ref mem } | &Inst::StoreRev16 { rd, ref mem } | &Inst::StoreRev32 { rd, ref mem } - | &Inst::StoreRev64 { rd, ref mem } - | &Inst::FpuStore32 { rd, ref mem } - | &Inst::FpuStore64 { rd, ref mem } => { + | &Inst::StoreRev64 { rd, ref mem } => { let (opcode_rx, opcode_rxy, opcode_ril) = match self { &Inst::Store8 { .. } => (Some("stc"), Some("stcy"), None), &Inst::Store16 { .. } => (Some("sth"), Some("sthy"), Some("sthrl")), @@ -1534,8 +1549,6 @@ impl Inst { &Inst::StoreRev16 { .. } => (None, Some("strvh"), None), &Inst::StoreRev32 { .. } => (None, Some("strv"), None), &Inst::StoreRev64 { .. } => (None, Some("strvg"), None), - &Inst::FpuStore32 { .. } => (Some("ste"), Some("stey"), None), - &Inst::FpuStore64 { .. } => (Some("std"), Some("stdy"), None), _ => unreachable!(), }; @@ -1586,18 +1599,42 @@ impl Inst { format!("{}{} {}, {}", mem_str, op, mem, imm) } - &Inst::FpuStoreRev32 { rd, ref mem } | &Inst::FpuStoreRev64 { rd, ref mem } => { - let rd = pretty_print_reg(rd, allocs); - let mem = mem.with_allocs(allocs); - let (mem_str, mem) = mem_finalize_for_show(&mem, state, true, false, false, true); - let op = match self { - &Inst::FpuStoreRev32 { .. } => "vstebrf", - &Inst::FpuStoreRev64 { .. } => "vstebrg", + &Inst::FpuStore32 { rd, ref mem } + | &Inst::FpuStore64 { rd, ref mem } + | &Inst::FpuStoreRev32 { rd, ref mem } + | &Inst::FpuStoreRev64 { rd, ref mem } => { + let (opcode_rx, opcode_rxy, opcode_vrx) = match self { + &Inst::FpuStore32 { .. } => (Some("ste"), Some("stey"), "vstef"), + &Inst::FpuStore64 { .. } => (Some("std"), Some("stdy"), "vsteg"), + &Inst::FpuStoreRev32 { .. } => (None, None, "vstebrf"), + &Inst::FpuStoreRev64 { .. } => (None, None, "vstebrg"), _ => unreachable!(), }; - let mem = mem.pretty_print_default(); - format!("{}{} {}, {}, 0", mem_str, op, rd, mem) + let (rd, rd_fpr) = pretty_print_fpr(rd, allocs); + let mem = mem.with_allocs(allocs); + if rd_fpr.is_some() && opcode_rx.is_some() { + let (mem_str, mem) = + mem_finalize_for_show(&mem, state, true, true, false, true); + let op = match &mem { + &MemArg::BXD12 { .. } => opcode_rx, + &MemArg::BXD20 { .. } => opcode_rxy, + _ => unreachable!(), + }; + let mem = mem.pretty_print_default(); + format!("{}{} {}, {}", mem_str, op.unwrap(), rd_fpr.unwrap(), mem) + } else { + let (mem_str, mem) = + mem_finalize_for_show(&mem, state, true, false, false, true); + let mem = mem.pretty_print_default(); + format!( + "{}{} {}, {}, 0", + mem_str, + opcode_vrx, + rd_fpr.unwrap_or(rd), + mem + ) + } } &Inst::LoadMultiple64 { rt, rt2, ref mem } => { let mem = mem.with_allocs(allocs); @@ -1704,177 +1741,278 @@ impl Inst { format!("locghi{} {}, {}", cond, rd, imm) } &Inst::FpuMove32 { rd, rn } => { - let rd = pretty_print_reg(rd.to_reg(), allocs); - let rn = pretty_print_reg(rn, allocs); - format!("ler {}, {}", rd, rn) + let (rd, rd_fpr) = pretty_print_fpr(rd.to_reg(), allocs); + let (rn, rn_fpr) = pretty_print_fpr(rn, allocs); + if rd_fpr.is_some() && rn_fpr.is_some() { + format!("ler {}, {}", rd_fpr.unwrap(), rn_fpr.unwrap()) + } else { + format!("vlr {}, {}", rd, rn) + } } &Inst::FpuMove64 { rd, rn } => { - let rd = pretty_print_reg(rd.to_reg(), allocs); - let rn = pretty_print_reg(rn, allocs); - format!("ldr {}, {}", rd, rn) + let (rd, rd_fpr) = pretty_print_fpr(rd.to_reg(), allocs); + let (rn, rn_fpr) = pretty_print_fpr(rn, allocs); + if rd_fpr.is_some() && rn_fpr.is_some() { + format!("ldr {}, {}", rd_fpr.unwrap(), rn_fpr.unwrap()) + } else { + format!("vlr {}, {}", rd, rn) + } } &Inst::FpuCMov32 { rd, cond, rm } => { - let rd = pretty_print_reg(rd.to_reg(), allocs); - let rm = pretty_print_reg(rm, allocs); - let cond = cond.invert().pretty_print_default(); - format!("j{} 6 ; ler {}, {}", cond, rd, rm) + let (rd, rd_fpr) = pretty_print_fpr(rd.to_reg(), allocs); + let (rm, rm_fpr) = pretty_print_fpr(rm, allocs); + if rd_fpr.is_some() && rm_fpr.is_some() { + let cond = cond.invert().pretty_print_default(); + format!("j{} 6 ; ler {}, {}", cond, rd_fpr.unwrap(), rm_fpr.unwrap()) + } else { + let cond = cond.invert().pretty_print_default(); + format!("j{} 10 ; vlr {}, {}", cond, rd, rm) + } } &Inst::FpuCMov64 { rd, cond, rm } => { - let rd = pretty_print_reg(rd.to_reg(), allocs); - let rm = pretty_print_reg(rm, allocs); - let cond = cond.invert().pretty_print_default(); - format!("j{} 6 ; ldr {}, {}", cond, rd, rm) + let (rd, rd_fpr) = pretty_print_fpr(rd.to_reg(), allocs); + let (rm, rm_fpr) = pretty_print_fpr(rm, allocs); + if rd_fpr.is_some() && rm_fpr.is_some() { + let cond = cond.invert().pretty_print_default(); + format!("j{} 6 ; ldr {}, {}", cond, rd_fpr.unwrap(), rm_fpr.unwrap()) + } else { + let cond = cond.invert().pretty_print_default(); + format!("j{} 10 ; vlr {}, {}", cond, rd, rm) + } } - &Inst::MovToFpr { rd, rn } => { + &Inst::MovToFpr32 { rd, rn } => { let rd = pretty_print_reg(rd.to_reg(), allocs); let rn = pretty_print_reg(rn, allocs); - format!("ldgr {}, {}", rd, rn) + format!("vlvgf {}, {}, 0", rd, rn) } - &Inst::MovFromFpr { rd, rn } => { - let rd = pretty_print_reg(rd.to_reg(), allocs); + &Inst::MovToFpr64 { rd, rn } => { + let (rd, rd_fpr) = pretty_print_fpr(rd.to_reg(), allocs); let rn = pretty_print_reg(rn, allocs); - format!("lgdr {}, {}", rd, rn) + if rd_fpr.is_some() { + format!("ldgr {}, {}", rd_fpr.unwrap(), rn) + } else { + format!("vlvgg {}, {}, 0", rd, rn) + } } - &Inst::FpuRR { fpu_op, rd, rn } => { - let op = match fpu_op { - FPUOp1::Abs32 => "lpebr", - FPUOp1::Abs64 => "lpdbr", - FPUOp1::Neg32 => "lcebr", - FPUOp1::Neg64 => "lcdbr", - FPUOp1::NegAbs32 => "lnebr", - FPUOp1::NegAbs64 => "lndbr", - FPUOp1::Sqrt32 => "sqebr", - FPUOp1::Sqrt64 => "sqdbr", - FPUOp1::Cvt32To64 => "ldebr", - FPUOp1::Cvt64To32 => "ledbr", - }; + &Inst::MovFromFpr32 { rd, rn } => { let rd = pretty_print_reg(rd.to_reg(), allocs); let rn = pretty_print_reg(rn, allocs); - format!("{} {}, {}", op, rd, rn) + format!("vlgvf {}, {}, 0", rd, rn) } - &Inst::FpuRRR { fpu_op, rd, rm } => { - let op = match fpu_op { - FPUOp2::Add32 => "aebr", - FPUOp2::Add64 => "adbr", - FPUOp2::Sub32 => "sebr", - FPUOp2::Sub64 => "sdbr", - FPUOp2::Mul32 => "meebr", - FPUOp2::Mul64 => "mdbr", - FPUOp2::Div32 => "debr", - FPUOp2::Div64 => "ddbr", - _ => unimplemented!(), - }; + &Inst::MovFromFpr64 { rd, rn } => { let rd = pretty_print_reg(rd.to_reg(), allocs); - let rm = pretty_print_reg(rm, allocs); - format!("{} {}, {}", op, rd, rm) + let (rn, rn_fpr) = pretty_print_fpr(rn, allocs); + if rn_fpr.is_some() { + format!("lgdr {}, {}", rd, rn_fpr.unwrap()) + } else { + format!("vlgvg {}, {}, 0", rd, rn) + } } - &Inst::FpuRRRR { fpu_op, rd, rn, rm } => { - let op = match fpu_op { - FPUOp3::MAdd32 => "maebr", - FPUOp3::MAdd64 => "madbr", - FPUOp3::MSub32 => "msebr", - FPUOp3::MSub64 => "msdbr", + &Inst::FpuRR { fpu_op, rd, rn } => { + let (op, op_fpr) = match fpu_op { + FPUOp1::Abs32 => ("wflpsb", "lpebr"), + FPUOp1::Abs64 => ("wflpdb", "lpdbr"), + FPUOp1::Neg32 => ("wflcsb", "lcebr"), + FPUOp1::Neg64 => ("wflcdb", "lcdbr"), + FPUOp1::NegAbs32 => ("wflnsb", "lnebr"), + FPUOp1::NegAbs64 => ("wflndb", "lndbr"), + FPUOp1::Sqrt32 => ("wfsqsb", "sqebr"), + FPUOp1::Sqrt64 => ("wfsqdb", "sqdbr"), + FPUOp1::Cvt32To64 => ("wldeb", "ldebr"), }; - let rd = pretty_print_reg(rd.to_reg(), allocs); - let rn = pretty_print_reg(rn, allocs); - let rm = pretty_print_reg(rm, allocs); - format!("{} {}, {}, {}", op, rd, rn, rm) + + let (rd, rd_fpr) = pretty_print_fpr(rd.to_reg(), allocs); + let (rn, rn_fpr) = pretty_print_fpr(rn, allocs); + if rd_fpr.is_some() && rn_fpr.is_some() { + format!("{} {}, {}", op_fpr, rd_fpr.unwrap(), rn_fpr.unwrap()) + } else { + format!("{} {}, {}", op, rd_fpr.unwrap_or(rd), rn_fpr.unwrap_or(rn)) + } } - &Inst::FpuCopysign { rd, rn, rm } => { - let rd = pretty_print_reg(rd.to_reg(), allocs); - let rn = pretty_print_reg(rn, allocs); - let rm = pretty_print_reg(rm, allocs); - format!("cpsdr {}, {}, {}", rd, rm, rn) + &Inst::FpuRRR { fpu_op, rd, rn, rm } => { + let (op, opt_m6, op_fpr) = match fpu_op { + FPUOp2::Add32 => ("wfasb", "", Some("aebr")), + FPUOp2::Add64 => ("wfadb", "", Some("adbr")), + FPUOp2::Sub32 => ("wfssb", "", Some("sebr")), + FPUOp2::Sub64 => ("wfsdb", "", Some("sdbr")), + FPUOp2::Mul32 => ("wfmsb", "", Some("meebr")), + FPUOp2::Mul64 => ("wfmdb", "", Some("mdbr")), + FPUOp2::Div32 => ("wfdsb", "", Some("debr")), + FPUOp2::Div64 => ("wfddb", "", Some("ddbr")), + FPUOp2::Max32 => ("wfmaxsb", ", 1", None), + FPUOp2::Max64 => ("wfmaxdb", ", 1", None), + FPUOp2::Min32 => ("wfminsb", ", 1", None), + FPUOp2::Min64 => ("wfmindb", ", 1", None), + }; + + let (rd, rd_fpr) = pretty_print_fpr(rd.to_reg(), allocs); + let (rn, rn_fpr) = pretty_print_fpr(rn, allocs); + let (rm, rm_fpr) = pretty_print_fpr(rm, allocs); + if op_fpr.is_some() && rd == rn && rd_fpr.is_some() && rm_fpr.is_some() { + format!( + "{} {}, {}", + op_fpr.unwrap(), + rd_fpr.unwrap(), + rm_fpr.unwrap() + ) + } else { + format!( + "{} {}, {}, {}{}", + op, + rd_fpr.unwrap_or(rd), + rn_fpr.unwrap_or(rn), + rm_fpr.unwrap_or(rm), + opt_m6 + ) + } + } + &Inst::FpuRRRR { + fpu_op, + rd, + rn, + rm, + ra, + } => { + let (op, op_fpr) = match fpu_op { + FPUOp3::MAdd32 => ("wfmasb", "maebr"), + FPUOp3::MAdd64 => ("wfmadb", "madbr"), + FPUOp3::MSub32 => ("wfmssb", "msebr"), + FPUOp3::MSub64 => ("wfmsdb", "msdbr"), + }; + + let (rd, rd_fpr) = pretty_print_fpr(rd.to_reg(), allocs); + let (rn, rn_fpr) = pretty_print_fpr(rn, allocs); + let (rm, rm_fpr) = pretty_print_fpr(rm, allocs); + let (ra, ra_fpr) = pretty_print_fpr(ra, allocs); + if rd == ra && rd_fpr.is_some() && rn_fpr.is_some() && rm_fpr.is_some() { + format!( + "{} {}, {}, {}", + op_fpr, + rd_fpr.unwrap(), + rn_fpr.unwrap(), + rm_fpr.unwrap() + ) + } else { + format!( + "{} {}, {}, {}, {}", + op, + rd_fpr.unwrap_or(rd), + rn_fpr.unwrap_or(rn), + rm_fpr.unwrap_or(rm), + ra_fpr.unwrap_or(ra) + ) + } } &Inst::FpuCmp32 { rn, rm } => { - let rn = pretty_print_reg(rn, allocs); - let rm = pretty_print_reg(rm, allocs); - format!("cebr {}, {}", rn, rm) + let (rn, rn_fpr) = pretty_print_fpr(rn, allocs); + let (rm, rm_fpr) = pretty_print_fpr(rm, allocs); + if rn_fpr.is_some() && rm_fpr.is_some() { + format!("cebr {}, {}", rn_fpr.unwrap(), rm_fpr.unwrap()) + } else { + format!("wfcsb {}, {}", rn_fpr.unwrap_or(rn), rm_fpr.unwrap_or(rm)) + } } &Inst::FpuCmp64 { rn, rm } => { - let rn = pretty_print_reg(rn, allocs); - let rm = pretty_print_reg(rm, allocs); - format!("cdbr {}, {}", rn, rm) + let (rn, rn_fpr) = pretty_print_fpr(rn, allocs); + let (rm, rm_fpr) = pretty_print_fpr(rm, allocs); + if rn_fpr.is_some() && rm_fpr.is_some() { + format!("cdbr {}, {}", rn_fpr.unwrap(), rm_fpr.unwrap()) + } else { + format!("wfcdb {}, {}", rn_fpr.unwrap_or(rn), rm_fpr.unwrap_or(rm)) + } } &Inst::LoadFpuConst32 { rd, const_data } => { - let rd = pretty_print_reg(rd.to_reg(), allocs); + let (rd, rd_fpr) = pretty_print_fpr(rd.to_reg(), allocs); let tmp = pretty_print_reg(writable_spilltmp_reg().to_reg(), &mut empty_allocs); - format!( - "bras {}, 8 ; data.f32 {} ; le {}, 0({})", - tmp, - f32::from_bits(const_data), - rd, - tmp - ) + if rd_fpr.is_some() { + format!( + "bras {}, 8 ; data.f32 {} ; le {}, 0({})", + tmp, + f32::from_bits(const_data), + rd_fpr.unwrap(), + tmp + ) + } else { + format!( + "bras {}, 8 ; data.f32 {} ; vlef {}, 0({}), 0", + tmp, + f32::from_bits(const_data), + rd, + tmp + ) + } } &Inst::LoadFpuConst64 { rd, const_data } => { - let rd = pretty_print_reg(rd.to_reg(), allocs); + let (rd, rd_fpr) = pretty_print_fpr(rd.to_reg(), allocs); let tmp = pretty_print_reg(writable_spilltmp_reg().to_reg(), &mut empty_allocs); - format!( - "bras {}, 12 ; data.f64 {} ; ld {}, 0({})", - tmp, - f64::from_bits(const_data), - rd, - tmp - ) - } - &Inst::FpuToInt { op, rd, rn } => { - let op = match op { - FpuToIntOp::F32ToI32 => "cfebra", - FpuToIntOp::F32ToU32 => "clfebr", - FpuToIntOp::F32ToI64 => "cgebra", - FpuToIntOp::F32ToU64 => "clgebr", - FpuToIntOp::F64ToI32 => "cfdbra", - FpuToIntOp::F64ToU32 => "clfdbr", - FpuToIntOp::F64ToI64 => "cgdbra", - FpuToIntOp::F64ToU64 => "clgdbr", - }; - let rd = pretty_print_reg(rd.to_reg(), allocs); - let rn = pretty_print_reg(rn, allocs); - format!("{} {}, 5, {}, 0", op, rd, rn) + if rd_fpr.is_some() { + format!( + "bras {}, 12 ; data.f64 {} ; ld {}, 0({})", + tmp, + f64::from_bits(const_data), + rd_fpr.unwrap(), + tmp + ) + } else { + format!( + "bras {}, 12 ; data.f64 {} ; vleg {}, 0({}), 0", + tmp, + f64::from_bits(const_data), + rd, + tmp + ) + } } - &Inst::IntToFpu { op, rd, rn } => { - let op = match op { - IntToFpuOp::I32ToF32 => "cefbra", - IntToFpuOp::U32ToF32 => "celfbr", - IntToFpuOp::I64ToF32 => "cegbra", - IntToFpuOp::U64ToF32 => "celgbr", - IntToFpuOp::I32ToF64 => "cdfbra", - IntToFpuOp::U32ToF64 => "cdlfbr", - IntToFpuOp::I64ToF64 => "cdgbra", - IntToFpuOp::U64ToF64 => "cdlgbr", - }; - let rd = pretty_print_reg(rd.to_reg(), allocs); - let rn = pretty_print_reg(rn, allocs); - format!("{} {}, 0, {}, 0", op, rd, rn) - } - &Inst::FpuRound { op, rd, rn } => { - let (op, m3) = match op { - FpuRoundMode::Minus32 => ("fiebr", 7), - FpuRoundMode::Minus64 => ("fidbr", 7), - FpuRoundMode::Plus32 => ("fiebr", 6), - FpuRoundMode::Plus64 => ("fidbr", 6), - FpuRoundMode::Zero32 => ("fiebr", 5), - FpuRoundMode::Zero64 => ("fidbr", 5), - FpuRoundMode::Nearest32 => ("fiebr", 4), - FpuRoundMode::Nearest64 => ("fidbr", 4), + &Inst::FpuRound { op, mode, rd, rn } => { + let mode = match mode { + FpuRoundMode::Current => 0, + FpuRoundMode::ToNearest => 1, + FpuRoundMode::ShorterPrecision => 3, + FpuRoundMode::ToNearestTiesToEven => 4, + FpuRoundMode::ToZero => 5, + FpuRoundMode::ToPosInfinity => 6, + FpuRoundMode::ToNegInfinity => 7, }; - let rd = pretty_print_reg(rd.to_reg(), allocs); - let rn = pretty_print_reg(rn, allocs); - format!("{} {}, {}, {}", op, rd, rn, m3) - } - &Inst::FpuVecRRR { fpu_op, rd, rn, rm } => { - let op = match fpu_op { - FPUOp2::Max32 => "wfmaxsb", - FPUOp2::Max64 => "wfmaxdb", - FPUOp2::Min32 => "wfminsb", - FPUOp2::Min64 => "wfmindb", - _ => unimplemented!(), + let (opcode, opcode_fpr) = match op { + FpuRoundOp::Cvt64To32 => ("wledb", Some("ledbra")), + FpuRoundOp::Round32 => ("wfisb", Some("fiebr")), + FpuRoundOp::Round64 => ("wfidb", Some("fidbr")), + FpuRoundOp::ToSInt32 => ("wcfeb", None), + FpuRoundOp::ToSInt64 => ("wcgdb", None), + FpuRoundOp::ToUInt32 => ("wclfeb", None), + FpuRoundOp::ToUInt64 => ("wclgdb", None), + FpuRoundOp::FromSInt32 => ("wcefb", None), + FpuRoundOp::FromSInt64 => ("wcdgb", None), + FpuRoundOp::FromUInt32 => ("wcelfb", None), + FpuRoundOp::FromUInt64 => ("wcdlgb", None), }; + + let (rd, rd_fpr) = pretty_print_fpr(rd.to_reg(), allocs); + let (rn, rn_fpr) = pretty_print_fpr(rn, allocs); + if opcode_fpr.is_some() && rd_fpr.is_some() && rn_fpr.is_some() { + format!( + "{} {}, {}, {}", + opcode_fpr.unwrap(), + rd_fpr.unwrap(), + rn_fpr.unwrap(), + mode + ) + } else { + format!( + "{} {}, {}, 0, {}", + opcode, + rd_fpr.unwrap_or(rd), + rn_fpr.unwrap_or(rn), + mode + ) + } + } + &Inst::VecSelect { rd, rn, rm, ra } => { let rd = pretty_print_reg(rd.to_reg(), allocs); let rn = pretty_print_reg(rn, allocs); let rm = pretty_print_reg(rm, allocs); - format!("{} {}, {}, {}, 1", op, rd, rn, rm) + let ra = pretty_print_reg(ra, allocs); + format!("vsel {}, {}, {}, {}", rd, rn, rm, ra) } &Inst::Extend { rd, diff --git a/cranelift/codegen/src/isa/s390x/inst/regs.rs b/cranelift/codegen/src/isa/s390x/inst/regs.rs index 445a8c71c8..e272ac083b 100644 --- a/cranelift/codegen/src/isa/s390x/inst/regs.rs +++ b/cranelift/codegen/src/isa/s390x/inst/regs.rs @@ -27,21 +27,28 @@ pub fn writable_gpr(num: u8) -> Writable { Writable::from_reg(gpr(num)) } -/// Get a reference to a FPR (floating-point register). -pub fn fpr(num: u8) -> Reg { - let preg = fpr_preg(num); +/// Get a reference to a VR (vector register). +pub fn vr(num: u8) -> Reg { + let preg = vr_preg(num); Reg::from(VReg::new(preg.index(), RegClass::Float)) } -pub(crate) const fn fpr_preg(num: u8) -> PReg { - assert!(num < 16); +pub(crate) const fn vr_preg(num: u8) -> PReg { + assert!(num < 32); PReg::new(num as usize, RegClass::Float) } -/// Get a writable reference to a FPR. +/// Get a writable reference to a VR. #[allow(dead_code)] // used by tests. -pub fn writable_fpr(num: u8) -> Writable { - Writable::from_reg(fpr(num)) +pub fn writable_vr(num: u8) -> Writable { + Writable::from_reg(vr(num)) +} + +/// Test whether a vector register is overlapping an FPR. +pub fn is_fpr(r: Reg) -> bool { + let r = r.to_real_reg().unwrap(); + assert!(r.class() == RegClass::Float); + return r.hw_enc() < 16; } /// Get a reference to the stack-pointer register. @@ -92,14 +99,30 @@ pub fn create_machine_env(_flags: &settings::Flags) -> MachineEnv { preg(gpr(5)), ], vec![ - preg(fpr(0)), - preg(fpr(1)), - preg(fpr(2)), - preg(fpr(3)), - preg(fpr(4)), - preg(fpr(5)), - preg(fpr(6)), - preg(fpr(7)), + preg(vr(0)), + preg(vr(1)), + preg(vr(2)), + preg(vr(3)), + preg(vr(4)), + preg(vr(5)), + preg(vr(6)), + preg(vr(7)), + preg(vr(16)), + preg(vr(17)), + preg(vr(18)), + preg(vr(19)), + preg(vr(20)), + preg(vr(21)), + preg(vr(22)), + preg(vr(23)), + preg(vr(24)), + preg(vr(25)), + preg(vr(26)), + preg(vr(27)), + preg(vr(28)), + preg(vr(29)), + preg(vr(30)), + preg(vr(31)), ], ], non_preferred_regs_by_class: [ @@ -116,14 +139,14 @@ pub fn create_machine_env(_flags: &settings::Flags) -> MachineEnv { // no r15; it is the stack pointer. ], vec![ - preg(fpr(8)), - preg(fpr(9)), - preg(fpr(10)), - preg(fpr(11)), - preg(fpr(12)), - preg(fpr(13)), - preg(fpr(14)), - preg(fpr(15)), + preg(vr(8)), + preg(vr(9)), + preg(vr(10)), + preg(vr(11)), + preg(vr(12)), + preg(vr(13)), + preg(vr(14)), + preg(vr(15)), ], ], fixed_stack_slots: vec![], @@ -134,14 +157,28 @@ pub fn show_reg(reg: Reg) -> String { if let Some(rreg) = reg.to_real_reg() { match rreg.class() { RegClass::Int => format!("%r{}", rreg.hw_enc()), - RegClass::Float => format!("%f{}", rreg.hw_enc()), + RegClass::Float => format!("%v{}", rreg.hw_enc()), } } else { format!("%{:?}", reg) } } +pub fn maybe_show_fpr(reg: Reg) -> Option { + if let Some(rreg) = reg.to_real_reg() { + if is_fpr(reg) { + return Some(format!("%f{}", rreg.hw_enc())); + } + } + None +} + pub fn pretty_print_reg(reg: Reg, allocs: &mut AllocationConsumer<'_>) -> String { let reg = allocs.next(reg); show_reg(reg) } + +pub fn pretty_print_fpr(reg: Reg, allocs: &mut AllocationConsumer<'_>) -> (String, Option) { + let reg = allocs.next(reg); + (show_reg(reg), maybe_show_fpr(reg)) +} diff --git a/cranelift/codegen/src/isa/s390x/inst/unwind/systemv.rs b/cranelift/codegen/src/isa/s390x/inst/unwind/systemv.rs index 8abf6c84a2..5702134dbf 100644 --- a/cranelift/codegen/src/isa/s390x/inst/unwind/systemv.rs +++ b/cranelift/codegen/src/isa/s390x/inst/unwind/systemv.rs @@ -45,7 +45,7 @@ pub fn map_reg(reg: Reg) -> Result { Register(14), Register(15), ]; - const FPR_MAP: [gimli::Register; 16] = [ + const VR_MAP: [gimli::Register; 32] = [ Register(16), Register(20), Register(17), @@ -62,11 +62,27 @@ pub fn map_reg(reg: Reg) -> Result { Register(30), Register(27), Register(31), + Register(68), + Register(72), + Register(69), + Register(73), + Register(70), + Register(74), + Register(71), + Register(75), + Register(76), + Register(80), + Register(77), + Register(81), + Register(78), + Register(82), + Register(79), + Register(83), ]; match reg.class() { RegClass::Int => Ok(GPR_MAP[reg.to_real_reg().unwrap().hw_enc() as usize]), - RegClass::Float => Ok(FPR_MAP[reg.to_real_reg().unwrap().hw_enc() as usize]), + RegClass::Float => Ok(VR_MAP[reg.to_real_reg().unwrap().hw_enc() as usize]), } } diff --git a/cranelift/codegen/src/isa/s390x/lower.isle b/cranelift/codegen/src/isa/s390x/lower.isle index c5a60fa086..2585253e53 100644 --- a/cranelift/codegen/src/isa/s390x/lower.isle +++ b/cranelift/codegen/src/isa/s390x/lower.isle @@ -963,8 +963,10 @@ ;;;; Rules for `fcopysign` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; Copysign of two registers. -(rule (lower (has_type ty (fcopysign x y))) - (fpu_copysign ty x y)) +(rule (lower (has_type $F32 (fcopysign x y))) + (vec_select $F32 x y (imm $F32 2147483647))) +(rule (lower (has_type $F64 (fcopysign x y))) + (vec_select $F64 x y (imm $F64 9223372036854775807))) ;;;; Rules for `fma` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; @@ -1034,120 +1036,148 @@ ;; Demote a register. (rule (lower (has_type dst_ty (fdemote x @ (value_type src_ty)))) - (fdemote_reg dst_ty src_ty x)) + (fdemote_reg dst_ty src_ty (FpuRoundMode.Current) x)) ;;;; Rules for `fcvt_from_uint` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -;; Convert an unsigned integer value in a register to floating-point. -(rule (lower (has_type dst_ty (fcvt_from_uint x @ (value_type src_ty)))) - (fcvt_from_uint_reg dst_ty (ty_ext32 src_ty) - (put_in_reg_zext32 x))) +;; Convert a 32-bit or smaller unsigned integer to $F32 (z15 instruction). +(rule (lower (has_type $F32 + (fcvt_from_uint x @ (value_type (and (vxrs_ext2_enabled) (fits_in_32 ty)))))) + (fcvt_from_uint_reg $F32 (FpuRoundMode.ToNearestTiesToEven) + (mov_to_fpr32 (put_in_reg_zext32 x)))) + +;; Convert a 64-bit or smaller unsigned integer to $F32, via an intermediate $F64. +(rule (lower (has_type $F32 (fcvt_from_uint x @ (value_type (fits_in_64 ty))))) + (fdemote_reg $F32 $F64 (FpuRoundMode.ToNearestTiesToEven) + (fcvt_from_uint_reg $F64 (FpuRoundMode.ShorterPrecision) + (mov_to_fpr64 (put_in_reg_zext64 x))))) + +;; Convert a 64-bit or smaller unsigned integer to $F64. +(rule (lower (has_type $F64 (fcvt_from_uint x @ (value_type (fits_in_64 ty))))) + (fcvt_from_uint_reg $F64 (FpuRoundMode.ToNearestTiesToEven) + (mov_to_fpr64 (put_in_reg_zext64 x)))) ;;;; Rules for `fcvt_from_sint` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -;; Convert a signed integer value in a register to floating-point. -(rule (lower (has_type dst_ty (fcvt_from_sint x @ (value_type src_ty)))) - (fcvt_from_sint_reg dst_ty (ty_ext32 src_ty) - (put_in_reg_sext32 x))) +;; Convert a 32-bit or smaller signed integer to $F32 (z15 instruction). +(rule (lower (has_type $F32 + (fcvt_from_sint x @ (value_type (and (vxrs_ext2_enabled) (fits_in_32 ty)))))) + (fcvt_from_sint_reg $F32 (FpuRoundMode.ToNearestTiesToEven) + (mov_to_fpr32 (put_in_reg_sext32 x)))) + +;; Convert a 64-bit or smaller signed integer to $F32, via an intermediate $F64. +(rule (lower (has_type $F32 (fcvt_from_sint x @ (value_type (fits_in_64 ty))))) + (fdemote_reg $F32 $F64 (FpuRoundMode.ToNearestTiesToEven) + (fcvt_from_sint_reg $F64 (FpuRoundMode.ShorterPrecision) + (mov_to_fpr64 (put_in_reg_sext64 x))))) + +;; Convert a 64-bit or smaller signed integer to $F64. +(rule (lower (has_type $F64 (fcvt_from_sint x @ (value_type (fits_in_64 ty))))) + (fcvt_from_sint_reg $F64 (FpuRoundMode.ToNearestTiesToEven) + (mov_to_fpr64 (put_in_reg_sext64 x)))) ;;;; Rules for `fcvt_to_uint` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; Convert a floating-point value in a register to an unsigned integer value. ;; Traps if the input cannot be represented in the output type. -;; FIXME: Add support for 8-/16-bit destination types (needs overflow check). -(rule (lower (has_type (ty_32_or_64 dst_ty) (fcvt_to_uint x @ (value_type src_ty)))) - (let ((src Reg x) +(rule (lower (has_type dst_ty (fcvt_to_uint x @ (value_type src_ty)))) + (let ((src Reg (put_in_reg x)) ;; First, check whether the input is a NaN, and trap if so. - (_ Reg (trap_if (fcmp_reg src_ty src src) - (floatcc_as_cond (FloatCC.Unordered)) - (trap_code_bad_conversion_to_integer))) - ;; Perform the conversion. If this sets CC 3, we have a - ;; "special case". Since we already exluded the case where - ;; the input was a NaN, the only other option is that the - ;; conversion overflowed the target type. - (dst Reg (trap_if (fcvt_to_uint_reg_with_flags dst_ty src_ty src) - (floatcc_as_cond (FloatCC.Unordered)) - (trap_code_integer_overflow)))) - dst)) + (_1 Reg (trap_if (fcmp_reg src_ty src src) + (floatcc_as_cond (FloatCC.Unordered)) + (trap_code_bad_conversion_to_integer))) + ;; Now check whether the input is out of range for the target type. + (_2 Reg (trap_if (fcmp_reg src_ty src (fcvt_to_uint_ub src_ty dst_ty)) + (floatcc_as_cond (FloatCC.GreaterThanOrEqual)) + (trap_code_integer_overflow))) + (_3 Reg (trap_if (fcmp_reg src_ty src (fcvt_to_uint_lb src_ty)) + (floatcc_as_cond (FloatCC.LessThanOrEqual)) + (trap_code_integer_overflow))) + ;; Perform the conversion using the larger type size. + (flt_ty Type (fcvt_flt_ty dst_ty src_ty)) + (src_ext Reg (fpromote_reg flt_ty src_ty src))) + (fcvt_to_uint_reg flt_ty (FpuRoundMode.ToZero) src_ext))) ;;;; Rules for `fcvt_to_sint` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; Convert a floating-point value in a register to a signed integer value. ;; Traps if the input cannot be represented in the output type. -;; FIXME: Add support for 8-/16-bit destination types (needs overflow check). -(rule (lower (has_type (ty_32_or_64 dst_ty) (fcvt_to_sint x @ (value_type src_ty)))) - (let ((src Reg x) +(rule (lower (has_type dst_ty (fcvt_to_sint x @ (value_type src_ty)))) + (let ((src Reg (put_in_reg x)) ;; First, check whether the input is a NaN, and trap if so. - (_ Reg (trap_if (fcmp_reg src_ty src src) - (floatcc_as_cond (FloatCC.Unordered)) - (trap_code_bad_conversion_to_integer))) - ;; Perform the conversion. If this sets CC 3, we have a - ;; "special case". Since we already exluded the case where - ;; the input was a NaN, the only other option is that the - ;; conversion overflowed the target type. - (dst Reg (trap_if (fcvt_to_sint_reg_with_flags dst_ty src_ty src) - (floatcc_as_cond (FloatCC.Unordered)) - (trap_code_integer_overflow)))) - dst)) + (_1 Reg (trap_if (fcmp_reg src_ty src src) + (floatcc_as_cond (FloatCC.Unordered)) + (trap_code_bad_conversion_to_integer))) + ;; Now check whether the input is out of range for the target type. + (_2 Reg (trap_if (fcmp_reg src_ty src (fcvt_to_sint_ub src_ty dst_ty)) + (floatcc_as_cond (FloatCC.GreaterThanOrEqual)) + (trap_code_integer_overflow))) + (_3 Reg (trap_if (fcmp_reg src_ty src (fcvt_to_sint_lb src_ty dst_ty)) + (floatcc_as_cond (FloatCC.LessThanOrEqual)) + (trap_code_integer_overflow))) + ;; Perform the conversion using the larger type size. + (flt_ty Type (fcvt_flt_ty dst_ty src_ty)) + (src_ext Reg (fpromote_reg flt_ty src_ty src))) + ;; Perform the conversion. + (fcvt_to_sint_reg flt_ty (FpuRoundMode.ToZero) src_ext))) ;;;; Rules for `fcvt_to_uint_sat` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; Convert a floating-point value in a register to an unsigned integer value. -;; FIXME: Add support for 8-/16-bit destination types (needs overflow check). -(rule (lower (has_type (ty_32_or_64 dst_ty) (fcvt_to_uint_sat x @ (value_type src_ty)))) - (let ((src Reg x) - (dst Reg (fcvt_to_uint_reg dst_ty src_ty src)) - ;; In most special cases, the Z instruction already yields the - ;; result expected by Cranelift semantics. The only exception - ;; it the case where the input was a NaN. We explicitly check - ;; for that and force the output to 0 in that case. - (sat Reg (with_flags_reg (fcmp_reg src_ty src src) - (cmov_imm dst_ty - (floatcc_as_cond (FloatCC.Unordered)) 0 dst)))) - sat)) +(rule (lower (has_type dst_ty (fcvt_to_uint_sat x @ (value_type src_ty)))) + (let ((src Reg (put_in_reg x)) + ;; Perform the conversion using the larger type size. + (flt_ty Type (fcvt_flt_ty dst_ty src_ty)) + (int_ty Type (fcvt_int_ty dst_ty src_ty)) + (src_ext Reg (fpromote_reg flt_ty src_ty src)) + (dst Reg (fcvt_to_uint_reg flt_ty (FpuRoundMode.ToZero) src_ext))) + ;; Clamp the output to the destination type bounds. + (uint_sat_reg dst_ty int_ty dst))) ;;;; Rules for `fcvt_to_sint_sat` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; Convert a floating-point value in a register to a signed integer value. -;; FIXME: Add support for 8-/16-bit destination types (needs overflow check). -(rule (lower (has_type (ty_32_or_64 dst_ty) (fcvt_to_sint_sat x @ (value_type src_ty)))) - (let ((src Reg x) - (dst Reg (fcvt_to_sint_reg dst_ty src_ty src)) +(rule (lower (has_type dst_ty (fcvt_to_sint_sat x @ (value_type src_ty)))) + (let ((src Reg (put_in_reg x)) + ;; Perform the conversion using the larger type size. + (flt_ty Type (fcvt_flt_ty dst_ty src_ty)) + (int_ty Type (fcvt_int_ty dst_ty src_ty)) + (src_ext Reg (fpromote_reg flt_ty src_ty src)) + (dst Reg (fcvt_to_sint_reg flt_ty (FpuRoundMode.ToZero) src_ext)) ;; In most special cases, the Z instruction already yields the ;; result expected by Cranelift semantics. The only exception ;; it the case where the input was a NaN. We explicitly check ;; for that and force the output to 0 in that case. (sat Reg (with_flags_reg (fcmp_reg src_ty src src) - (cmov_imm dst_ty - (floatcc_as_cond (FloatCC.Unordered)) 0 dst)))) - sat)) + (cmov_imm int_ty + (floatcc_as_cond (FloatCC.Unordered)) 0 dst)))) + ;; Clamp the output to the destination type bounds. + (sint_sat_reg dst_ty int_ty sat))) ;;;; Rules for `bitcast` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; Reinterpret a 64-bit integer value as floating-point. (rule (lower (has_type $F64 (bitcast x @ (value_type $I64)))) - (mov_to_fpr x)) + (mov_to_fpr64 x)) ;; Reinterpret a 64-bit floating-point value as integer. (rule (lower (has_type $I64 (bitcast x @ (value_type $F64)))) - (mov_from_fpr x)) + (mov_from_fpr64 x)) ;; Reinterpret a 32-bit integer value as floating-point (via $I64). -;; Note that a 32-bit float is located in the high bits of the GPR. (rule (lower (has_type $F32 (bitcast x @ (value_type $I32)))) - (mov_to_fpr (lshl_imm $I64 x 32))) + (mov_to_fpr32 x)) ;; Reinterpret a 32-bit floating-point value as integer (via $I64). -;; Note that a 32-bit float is located in the high bits of the GPR. (rule (lower (has_type $I32 (bitcast x @ (value_type $F32)))) - (lshr_imm $I64 (mov_from_fpr x) 32)) + (mov_from_fpr32 x)) ;;;; Rules for `stack_addr` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; @@ -1232,7 +1262,7 @@ (rule (lower (has_type (and (vxrs_ext2_disabled) $F32) (load flags @ (littleendian) addr offset))) (let ((gpr Reg (loadrev32 (lower_address flags addr offset)))) - (mov_to_fpr (lshl_imm $I64 gpr 32)))) + (mov_to_fpr32 gpr))) ;; Load 64-bit big-endian floating-point values. (rule (lower (has_type $F64 (load flags @ (bigendian) addr offset))) @@ -1247,7 +1277,7 @@ (rule (lower (has_type (and (vxrs_ext2_disabled) $F64) (load flags @ (littleendian) addr offset))) (let ((gpr Reg (loadrev64 (lower_address flags addr offset)))) - (mov_to_fpr gpr))) + (mov_to_fpr64 gpr))) ;;;; Rules for `uload8` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; @@ -1390,7 +1420,7 @@ ;; Store 32-bit little-endian floating-point type (via GPR on z14). (rule (lower (store flags @ (littleendian) val @ (value_type (and $F32 (vxrs_ext2_disabled))) addr offset)) - (let ((gpr Reg (lshr_imm $I64 (mov_from_fpr (put_in_reg val)) 32))) + (let ((gpr Reg (mov_from_fpr32 (put_in_reg val)))) (side_effect (storerev32 gpr (lower_address flags addr offset))))) ;; Store 64-bit big-endian floating-point type. @@ -1408,7 +1438,7 @@ ;; Store 64-bit little-endian floating-point type (via GPR on z14). (rule (lower (store flags @ (littleendian) val @ (value_type (and $F64 (vxrs_ext2_disabled))) addr offset)) - (let ((gpr Reg (mov_from_fpr (put_in_reg val)))) + (let ((gpr Reg (mov_from_fpr64 (put_in_reg val)))) (side_effect (storerev64 gpr (lower_address flags addr offset))))) diff --git a/cranelift/codegen/src/isa/s390x/lower/isle.rs b/cranelift/codegen/src/isa/s390x/lower/isle.rs index aad9c8ac7e..ec775e0b41 100644 --- a/cranelift/codegen/src/isa/s390x/lower/isle.rs +++ b/cranelift/codegen/src/isa/s390x/lower/isle.rs @@ -425,6 +425,48 @@ where i64::from(off) } + #[inline] + fn fcvt_to_uint_ub32(&mut self, size: u8) -> u64 { + (2.0_f32).powi(size.into()).to_bits() as u64 + } + + #[inline] + fn fcvt_to_uint_lb32(&mut self) -> u64 { + (-1.0_f32).to_bits() as u64 + } + + #[inline] + fn fcvt_to_uint_ub64(&mut self, size: u8) -> u64 { + (2.0_f64).powi(size.into()).to_bits() + } + + #[inline] + fn fcvt_to_uint_lb64(&mut self) -> u64 { + (-1.0_f64).to_bits() + } + + #[inline] + fn fcvt_to_sint_ub32(&mut self, size: u8) -> u64 { + (2.0_f32).powi((size - 1).into()).to_bits() as u64 + } + + #[inline] + fn fcvt_to_sint_lb32(&mut self, size: u8) -> u64 { + let lb = (-2.0_f32).powi((size - 1).into()); + std::cmp::max(lb.to_bits() + 1, (lb - 1.0).to_bits()) as u64 + } + + #[inline] + fn fcvt_to_sint_ub64(&mut self, size: u8) -> u64 { + (2.0_f64).powi((size - 1).into()).to_bits() + } + + #[inline] + fn fcvt_to_sint_lb64(&mut self, size: u8) -> u64 { + let lb = (-2.0_f64).powi((size - 1).into()); + std::cmp::max(lb.to_bits() + 1, (lb - 1.0).to_bits()) + } + #[inline] fn littleendian(&mut self, flags: MemFlags) -> Option<()> { let endianness = flags.endianness(Endianness::Big); diff --git a/cranelift/filetests/filetests/isa/s390x/floating-point-arch13.clif b/cranelift/filetests/filetests/isa/s390x/floating-point-arch13.clif new file mode 100644 index 0000000000..81fe456fb9 --- /dev/null +++ b/cranelift/filetests/filetests/isa/s390x/floating-point-arch13.clif @@ -0,0 +1,736 @@ +test compile precise-output +target s390x arch13 + +function %fcvt_to_uint_f32_i8(f32) -> i8 { +block0(v0: f32): + v1 = fcvt_to_uint.i8 v0 + return v1 +} + +; block0: +; cebr %f0, %f0 +; jno 6 ; trap +; bras %r1, 8 ; data.f32 256 ; le %f5, 0(%r1) +; cebr %f0, %f5 +; jnhe 6 ; trap +; bras %r1, 8 ; data.f32 -1 ; vlef %v17, 0(%r1), 0 +; wfcsb %f0, %v17 +; jnle 6 ; trap +; wclfeb %v21, %f0, 0, 5 +; vlgvf %r2, %v21, 0 +; br %r14 + +function %fcvt_to_sint_f32_i8(f32) -> i8 { +block0(v0: f32): + v1 = fcvt_to_sint.i8 v0 + return v1 +} + +; block0: +; cebr %f0, %f0 +; jno 6 ; trap +; bras %r1, 8 ; data.f32 128 ; le %f5, 0(%r1) +; cebr %f0, %f5 +; jnhe 6 ; trap +; bras %r1, 8 ; data.f32 -129 ; vlef %v17, 0(%r1), 0 +; wfcsb %f0, %v17 +; jnle 6 ; trap +; wcfeb %v21, %f0, 0, 5 +; vlgvf %r2, %v21, 0 +; br %r14 + +function %fcvt_to_uint_f32_i16(f32) -> i16 { +block0(v0: f32): + v1 = fcvt_to_uint.i16 v0 + return v1 +} + +; block0: +; cebr %f0, %f0 +; jno 6 ; trap +; bras %r1, 8 ; data.f32 65536 ; le %f5, 0(%r1) +; cebr %f0, %f5 +; jnhe 6 ; trap +; bras %r1, 8 ; data.f32 -1 ; vlef %v17, 0(%r1), 0 +; wfcsb %f0, %v17 +; jnle 6 ; trap +; wclfeb %v21, %f0, 0, 5 +; vlgvf %r2, %v21, 0 +; br %r14 + +function %fcvt_to_sint_f32_i16(f32) -> i16 { +block0(v0: f32): + v1 = fcvt_to_sint.i16 v0 + return v1 +} + +; block0: +; cebr %f0, %f0 +; jno 6 ; trap +; bras %r1, 8 ; data.f32 32768 ; le %f5, 0(%r1) +; cebr %f0, %f5 +; jnhe 6 ; trap +; bras %r1, 8 ; data.f32 -32769 ; vlef %v17, 0(%r1), 0 +; wfcsb %f0, %v17 +; jnle 6 ; trap +; wcfeb %v21, %f0, 0, 5 +; vlgvf %r2, %v21, 0 +; br %r14 + +function %fcvt_to_uint_f32_i32(f32) -> i32 { +block0(v0: f32): + v1 = fcvt_to_uint.i32 v0 + return v1 +} + +; block0: +; cebr %f0, %f0 +; jno 6 ; trap +; bras %r1, 8 ; data.f32 4294967300 ; le %f5, 0(%r1) +; cebr %f0, %f5 +; jnhe 6 ; trap +; bras %r1, 8 ; data.f32 -1 ; vlef %v17, 0(%r1), 0 +; wfcsb %f0, %v17 +; jnle 6 ; trap +; wclfeb %v21, %f0, 0, 5 +; vlgvf %r2, %v21, 0 +; br %r14 + +function %fcvt_to_sint_f32_i32(f32) -> i32 { +block0(v0: f32): + v1 = fcvt_to_sint.i32 v0 + return v1 +} + +; block0: +; cebr %f0, %f0 +; jno 6 ; trap +; bras %r1, 8 ; data.f32 2147483600 ; le %f5, 0(%r1) +; cebr %f0, %f5 +; jnhe 6 ; trap +; bras %r1, 8 ; data.f32 -2147484000 ; vlef %v17, 0(%r1), 0 +; wfcsb %f0, %v17 +; jnle 6 ; trap +; wcfeb %v21, %f0, 0, 5 +; vlgvf %r2, %v21, 0 +; br %r14 + +function %fcvt_to_uint_f32_i64(f32) -> i64 { +block0(v0: f32): + v1 = fcvt_to_uint.i64 v0 + return v1 +} + +; block0: +; cebr %f0, %f0 +; jno 6 ; trap +; bras %r1, 8 ; data.f32 18446744000000000000 ; le %f5, 0(%r1) +; cebr %f0, %f5 +; jnhe 6 ; trap +; bras %r1, 8 ; data.f32 -1 ; vlef %v17, 0(%r1), 0 +; wfcsb %f0, %v17 +; jnle 6 ; trap +; wldeb %v21, %f0 +; wclgdb %v23, %v21, 0, 5 +; vlgvg %r2, %v23, 0 +; br %r14 + +function %fcvt_to_sint_f32_i64(f32) -> i64 { +block0(v0: f32): + v1 = fcvt_to_sint.i64 v0 + return v1 +} + +; block0: +; cebr %f0, %f0 +; jno 6 ; trap +; bras %r1, 8 ; data.f32 9223372000000000000 ; le %f5, 0(%r1) +; cebr %f0, %f5 +; jnhe 6 ; trap +; bras %r1, 8 ; data.f32 -9223373000000000000 ; vlef %v17, 0(%r1), 0 +; wfcsb %f0, %v17 +; jnle 6 ; trap +; wldeb %v21, %f0 +; wcgdb %v23, %v21, 0, 5 +; vlgvg %r2, %v23, 0 +; br %r14 + +function %fcvt_to_uint_f64_i8(f64) -> i8 { +block0(v0: f64): + v1 = fcvt_to_uint.i8 v0 + return v1 +} + +; block0: +; cdbr %f0, %f0 +; jno 6 ; trap +; bras %r1, 12 ; data.f64 256 ; ld %f5, 0(%r1) +; cdbr %f0, %f5 +; jnhe 6 ; trap +; bras %r1, 12 ; data.f64 -1 ; vleg %v17, 0(%r1), 0 +; wfcdb %f0, %v17 +; jnle 6 ; trap +; wclgdb %v21, %f0, 0, 5 +; vlgvg %r2, %v21, 0 +; br %r14 + +function %fcvt_to_sint_f64_i8(f64) -> i8 { +block0(v0: f64): + v1 = fcvt_to_sint.i8 v0 + return v1 +} + +; block0: +; cdbr %f0, %f0 +; jno 6 ; trap +; bras %r1, 12 ; data.f64 128 ; ld %f5, 0(%r1) +; cdbr %f0, %f5 +; jnhe 6 ; trap +; bras %r1, 12 ; data.f64 -129 ; vleg %v17, 0(%r1), 0 +; wfcdb %f0, %v17 +; jnle 6 ; trap +; wcgdb %v21, %f0, 0, 5 +; vlgvg %r2, %v21, 0 +; br %r14 + +function %fcvt_to_uint_f64_i16(f64) -> i16 { +block0(v0: f64): + v1 = fcvt_to_uint.i16 v0 + return v1 +} + +; block0: +; cdbr %f0, %f0 +; jno 6 ; trap +; bras %r1, 12 ; data.f64 65536 ; ld %f5, 0(%r1) +; cdbr %f0, %f5 +; jnhe 6 ; trap +; bras %r1, 12 ; data.f64 -1 ; vleg %v17, 0(%r1), 0 +; wfcdb %f0, %v17 +; jnle 6 ; trap +; wclgdb %v21, %f0, 0, 5 +; vlgvg %r2, %v21, 0 +; br %r14 + +function %fcvt_to_sint_f64_i16(f64) -> i16 { +block0(v0: f64): + v1 = fcvt_to_sint.i16 v0 + return v1 +} + +; block0: +; cdbr %f0, %f0 +; jno 6 ; trap +; bras %r1, 12 ; data.f64 32768 ; ld %f5, 0(%r1) +; cdbr %f0, %f5 +; jnhe 6 ; trap +; bras %r1, 12 ; data.f64 -32769 ; vleg %v17, 0(%r1), 0 +; wfcdb %f0, %v17 +; jnle 6 ; trap +; wcgdb %v21, %f0, 0, 5 +; vlgvg %r2, %v21, 0 +; br %r14 + +function %fcvt_to_uint_f64_i32(f64) -> i32 { +block0(v0: f64): + v1 = fcvt_to_uint.i32 v0 + return v1 +} + +; block0: +; cdbr %f0, %f0 +; jno 6 ; trap +; bras %r1, 12 ; data.f64 4294967296 ; ld %f5, 0(%r1) +; cdbr %f0, %f5 +; jnhe 6 ; trap +; bras %r1, 12 ; data.f64 -1 ; vleg %v17, 0(%r1), 0 +; wfcdb %f0, %v17 +; jnle 6 ; trap +; wclgdb %v21, %f0, 0, 5 +; vlgvg %r2, %v21, 0 +; br %r14 + +function %fcvt_to_sint_f64_i32(f64) -> i32 { +block0(v0: f64): + v1 = fcvt_to_sint.i32 v0 + return v1 +} + +; block0: +; cdbr %f0, %f0 +; jno 6 ; trap +; bras %r1, 12 ; data.f64 2147483648 ; ld %f5, 0(%r1) +; cdbr %f0, %f5 +; jnhe 6 ; trap +; bras %r1, 12 ; data.f64 -2147483649 ; vleg %v17, 0(%r1), 0 +; wfcdb %f0, %v17 +; jnle 6 ; trap +; wcgdb %v21, %f0, 0, 5 +; vlgvg %r2, %v21, 0 +; br %r14 + +function %fcvt_to_uint_f64_i64(f64) -> i64 { +block0(v0: f64): + v1 = fcvt_to_uint.i64 v0 + return v1 +} + +; block0: +; cdbr %f0, %f0 +; jno 6 ; trap +; bras %r1, 12 ; data.f64 18446744073709552000 ; ld %f5, 0(%r1) +; cdbr %f0, %f5 +; jnhe 6 ; trap +; bras %r1, 12 ; data.f64 -1 ; vleg %v17, 0(%r1), 0 +; wfcdb %f0, %v17 +; jnle 6 ; trap +; wclgdb %v21, %f0, 0, 5 +; vlgvg %r2, %v21, 0 +; br %r14 + +function %fcvt_to_sint_f64_i64(f64) -> i64 { +block0(v0: f64): + v1 = fcvt_to_sint.i64 v0 + return v1 +} + +; block0: +; cdbr %f0, %f0 +; jno 6 ; trap +; bras %r1, 12 ; data.f64 9223372036854776000 ; ld %f5, 0(%r1) +; cdbr %f0, %f5 +; jnhe 6 ; trap +; bras %r1, 12 ; data.f64 -9223372036854778000 ; vleg %v17, 0(%r1), 0 +; wfcdb %f0, %v17 +; jnle 6 ; trap +; wcgdb %v21, %f0, 0, 5 +; vlgvg %r2, %v21, 0 +; br %r14 + +function %fcvt_from_uint_i8_f32(i8) -> f32 { +block0(v0: i8): + v1 = fcvt_from_uint.f32 v0 + return v1 +} + +; block0: +; llcr %r5, %r2 +; vlvgf %v5, %r5, 0 +; wcelfb %f0, %f5, 0, 4 +; br %r14 + +function %fcvt_from_sint_i8_f32(i8) -> f32 { +block0(v0: i8): + v1 = fcvt_from_sint.f32 v0 + return v1 +} + +; block0: +; lbr %r5, %r2 +; vlvgf %v5, %r5, 0 +; wcefb %f0, %f5, 0, 4 +; br %r14 + +function %fcvt_from_uint_i16_f32(i16) -> f32 { +block0(v0: i16): + v1 = fcvt_from_uint.f32 v0 + return v1 +} + +; block0: +; llhr %r5, %r2 +; vlvgf %v5, %r5, 0 +; wcelfb %f0, %f5, 0, 4 +; br %r14 + +function %fcvt_from_sint_i16_f32(i16) -> f32 { +block0(v0: i16): + v1 = fcvt_from_sint.f32 v0 + return v1 +} + +; block0: +; lhr %r5, %r2 +; vlvgf %v5, %r5, 0 +; wcefb %f0, %f5, 0, 4 +; br %r14 + +function %fcvt_from_uint_i32_f32(i32) -> f32 { +block0(v0: i32): + v1 = fcvt_from_uint.f32 v0 + return v1 +} + +; block0: +; vlvgf %v3, %r2, 0 +; wcelfb %f0, %f3, 0, 4 +; br %r14 + +function %fcvt_from_sint_i32_f32(i32) -> f32 { +block0(v0: i32): + v1 = fcvt_from_sint.f32 v0 + return v1 +} + +; block0: +; vlvgf %v3, %r2, 0 +; wcefb %f0, %f3, 0, 4 +; br %r14 + +function %fcvt_from_uint_i64_f32(i64) -> f32 { +block0(v0: i64): + v1 = fcvt_from_uint.f32 v0 + return v1 +} + +; block0: +; ldgr %f3, %r2 +; wcdlgb %f5, %f3, 0, 3 +; ledbra %f0, %f5, 4 +; br %r14 + +function %fcvt_from_sint_i64_f32(i64) -> f32 { +block0(v0: i64): + v1 = fcvt_from_sint.f32 v0 + return v1 +} + +; block0: +; ldgr %f3, %r2 +; wcdgb %f5, %f3, 0, 3 +; ledbra %f0, %f5, 4 +; br %r14 + +function %fcvt_from_uint_i8_f64(i8) -> f64 { +block0(v0: i8): + v1 = fcvt_from_uint.f64 v0 + return v1 +} + +; block0: +; llgcr %r5, %r2 +; ldgr %f5, %r5 +; wcdlgb %f0, %f5, 0, 4 +; br %r14 + +function %fcvt_from_sint_i8_f64(i8) -> f64 { +block0(v0: i8): + v1 = fcvt_from_sint.f64 v0 + return v1 +} + +; block0: +; lgbr %r5, %r2 +; ldgr %f5, %r5 +; wcdgb %f0, %f5, 0, 4 +; br %r14 + +function %fcvt_from_uint_i16_f64(i16) -> f64 { +block0(v0: i16): + v1 = fcvt_from_uint.f64 v0 + return v1 +} + +; block0: +; llghr %r5, %r2 +; ldgr %f5, %r5 +; wcdlgb %f0, %f5, 0, 4 +; br %r14 + +function %fcvt_from_sint_i16_f64(i16) -> f64 { +block0(v0: i16): + v1 = fcvt_from_sint.f64 v0 + return v1 +} + +; block0: +; lghr %r5, %r2 +; ldgr %f5, %r5 +; wcdgb %f0, %f5, 0, 4 +; br %r14 + +function %fcvt_from_uint_i32_f64(i32) -> f64 { +block0(v0: i32): + v1 = fcvt_from_uint.f64 v0 + return v1 +} + +; block0: +; llgfr %r5, %r2 +; ldgr %f5, %r5 +; wcdlgb %f0, %f5, 0, 4 +; br %r14 + +function %fcvt_from_sint_i32_f64(i32) -> f64 { +block0(v0: i32): + v1 = fcvt_from_sint.f64 v0 + return v1 +} + +; block0: +; lgfr %r5, %r2 +; ldgr %f5, %r5 +; wcdgb %f0, %f5, 0, 4 +; br %r14 + +function %fcvt_from_uint_i64_f64(i64) -> f64 { +block0(v0: i64): + v1 = fcvt_from_uint.f64 v0 + return v1 +} + +; block0: +; ldgr %f3, %r2 +; wcdlgb %f0, %f3, 0, 4 +; br %r14 + +function %fcvt_from_sint_i64_f64(i64) -> f64 { +block0(v0: i64): + v1 = fcvt_from_sint.f64 v0 + return v1 +} + +; block0: +; ldgr %f3, %r2 +; wcdgb %f0, %f3, 0, 4 +; br %r14 + +function %fcvt_to_uint_sat_f32_i8(f32) -> i8 { +block0(v0: f32): + v1 = fcvt_to_uint_sat.i8 v0 + return v1 +} + +; block0: +; wclfeb %f3, %f0, 0, 5 +; vlgvf %r3, %v3, 0 +; lgr %r2, %r3 +; clfi %r3, 256 +; lochih %r2, 255 +; br %r14 + +function %fcvt_to_sint_sat_f32_i8(f32) -> i8 { +block0(v0: f32): + v1 = fcvt_to_sint_sat.i8 v0 + return v1 +} + +; block0: +; wcfeb %f3, %f0, 0, 5 +; vlgvf %r3, %v3, 0 +; cebr %f0, %f0 +; lochio %r3, 0 +; lgr %r4, %r3 +; chi %r3, 127 +; lochih %r4, 127 +; lgr %r2, %r4 +; chi %r4, -128 +; lochil %r2, -128 +; br %r14 + +function %fcvt_to_uint_sat_f32_i16(f32) -> i16 { +block0(v0: f32): + v1 = fcvt_to_uint_sat.i16 v0 + return v1 +} + +; block0: +; wclfeb %f3, %f0, 0, 5 +; vlgvf %r3, %v3, 0 +; lgr %r2, %r3 +; clfi %r3, 65535 +; lochih %r2, -1 +; br %r14 + +function %fcvt_to_sint_sat_f32_i16(f32) -> i16 { +block0(v0: f32): + v1 = fcvt_to_sint_sat.i16 v0 + return v1 +} + +; block0: +; wcfeb %f3, %f0, 0, 5 +; vlgvf %r3, %v3, 0 +; cebr %f0, %f0 +; lochio %r3, 0 +; lgr %r4, %r3 +; chi %r3, 32767 +; lochih %r4, 32767 +; lgr %r2, %r4 +; chi %r4, -32768 +; lochil %r2, -32768 +; br %r14 + +function %fcvt_to_uint_sat_f32_i32(f32) -> i32 { +block0(v0: f32): + v1 = fcvt_to_uint_sat.i32 v0 + return v1 +} + +; block0: +; wclfeb %f3, %f0, 0, 5 +; vlgvf %r2, %v3, 0 +; br %r14 + +function %fcvt_to_sint_sat_f32_i32(f32) -> i32 { +block0(v0: f32): + v1 = fcvt_to_sint_sat.i32 v0 + return v1 +} + +; block0: +; wcfeb %f3, %f0, 0, 5 +; vlgvf %r2, %v3, 0 +; cebr %f0, %f0 +; lochio %r2, 0 +; br %r14 + +function %fcvt_to_uint_sat_f32_i64(f32) -> i64 { +block0(v0: f32): + v1 = fcvt_to_uint_sat.i64 v0 + return v1 +} + +; block0: +; ldebr %f3, %f0 +; wclgdb %f5, %f3, 0, 5 +; lgdr %r2, %f5 +; br %r14 + +function %fcvt_to_sint_sat_f32_i64(f32) -> i64 { +block0(v0: f32): + v1 = fcvt_to_sint_sat.i64 v0 + return v1 +} + +; block0: +; ldebr %f3, %f0 +; wcgdb %f5, %f3, 0, 5 +; lgdr %r2, %f5 +; cebr %f0, %f0 +; locghio %r2, 0 +; br %r14 + +function %fcvt_to_uint_sat_f64_i8(f64) -> i8 { +block0(v0: f64): + v1 = fcvt_to_uint_sat.i8 v0 + return v1 +} + +; block0: +; wclgdb %f3, %f0, 0, 5 +; lgdr %r3, %f3 +; lgr %r2, %r3 +; clgfi %r3, 256 +; locghih %r2, 255 +; br %r14 + +function %fcvt_to_sint_sat_f64_i8(f64) -> i8 { +block0(v0: f64): + v1 = fcvt_to_sint_sat.i8 v0 + return v1 +} + +; block0: +; wcgdb %f3, %f0, 0, 5 +; lgdr %r3, %f3 +; cdbr %f0, %f0 +; locghio %r3, 0 +; lgr %r4, %r3 +; cghi %r3, 127 +; locghih %r4, 127 +; lgr %r2, %r4 +; cghi %r4, -128 +; locghil %r2, -128 +; br %r14 + +function %fcvt_to_uint_sat_f64_i16(f64) -> i16 { +block0(v0: f64): + v1 = fcvt_to_uint_sat.i16 v0 + return v1 +} + +; block0: +; wclgdb %f3, %f0, 0, 5 +; lgdr %r3, %f3 +; lgr %r2, %r3 +; clgfi %r3, 65535 +; locghih %r2, -1 +; br %r14 + +function %fcvt_to_sint_sat_f64_i16(f64) -> i16 { +block0(v0: f64): + v1 = fcvt_to_sint_sat.i16 v0 + return v1 +} + +; block0: +; wcgdb %f3, %f0, 0, 5 +; lgdr %r3, %f3 +; cdbr %f0, %f0 +; locghio %r3, 0 +; lgr %r4, %r3 +; cghi %r3, 32767 +; locghih %r4, 32767 +; lgr %r2, %r4 +; cghi %r4, -32768 +; locghil %r2, -32768 +; br %r14 + +function %fcvt_to_uint_sat_f64_i32(f64) -> i32 { +block0(v0: f64): + v1 = fcvt_to_uint_sat.i32 v0 + return v1 +} + +; block0: +; wclgdb %f3, %f0, 0, 5 +; lgdr %r2, %f3 +; llilf %r5, 4294967295 +; clgr %r2, %r5 +; locgrh %r2, %r5 +; br %r14 + +function %fcvt_to_sint_sat_f64_i32(f64) -> i32 { +block0(v0: f64): + v1 = fcvt_to_sint_sat.i32 v0 + return v1 +} + +; block0: +; wcgdb %f3, %f0, 0, 5 +; lgdr %r2, %f3 +; cdbr %f0, %f0 +; locghio %r2, 0 +; lgfi %r5, 2147483647 +; cgr %r2, %r5 +; locgrh %r2, %r5 +; lgfi %r3, -2147483648 +; cgr %r2, %r3 +; locgrl %r2, %r3 +; br %r14 + +function %fcvt_to_uint_sat_f64_i64(f64) -> i64 { +block0(v0: f64): + v1 = fcvt_to_uint_sat.i64 v0 + return v1 +} + +; block0: +; wclgdb %f3, %f0, 0, 5 +; lgdr %r2, %f3 +; br %r14 + +function %fcvt_to_sint_sat_f64_i64(f64) -> i64 { +block0(v0: f64): + v1 = fcvt_to_sint_sat.i64 v0 + return v1 +} + +; block0: +; wcgdb %f3, %f0, 0, 5 +; lgdr %r2, %f3 +; cdbr %f0, %f0 +; locghio %r2, 0 +; br %r14 + diff --git a/cranelift/filetests/filetests/isa/s390x/floating-point.clif b/cranelift/filetests/filetests/isa/s390x/floating-point.clif index 11a5292c8d..4a8a84f37e 100644 --- a/cranelift/filetests/filetests/isa/s390x/floating-point.clif +++ b/cranelift/filetests/filetests/isa/s390x/floating-point.clif @@ -245,7 +245,7 @@ block0(v0: f64): } ; block0: -; ledbr %f0, %f0 +; ledbra %f0, %f0, 0 ; br %r14 function %ceil_f32(f32) -> f32 { @@ -335,9 +335,7 @@ block0(v0: f32, v1: f32, v2: f32): } ; block0: -; ldr %f1, %f0 -; ldr %f0, %f4 -; maebr %f0, %f1, %f2 +; wfmasb %f0, %f0, %f2, %f4 ; br %r14 function %fma_f64(f64, f64, f64) -> f64 { @@ -347,9 +345,7 @@ block0(v0: f64, v1: f64, v2: f64): } ; block0: -; ldr %f1, %f0 -; ldr %f0, %f4 -; madbr %f0, %f1, %f2 +; wfmadb %f0, %f0, %f2, %f4 ; br %r14 function %fcopysign_f32(f32, f32) -> f32 { @@ -359,7 +355,8 @@ block0(v0: f32, v1: f32): } ; block0: -; cpsdr %f0, %f2, %f0 +; bras %r1, 8 ; data.f32 NaN ; le %f5, 0(%r1) +; vsel %v0, %v0, %v2, %v5 ; br %r14 function %fcopysign_f64(f64, f64) -> f64 { @@ -369,7 +366,88 @@ block0(v0: f64, v1: f64): } ; block0: -; cpsdr %f0, %f2, %f0 +; bras %r1, 12 ; data.f64 NaN ; ld %f5, 0(%r1) +; vsel %v0, %v0, %v2, %v5 +; br %r14 + +function %fcvt_to_uint_f32_i8(f32) -> i8 { +block0(v0: f32): + v1 = fcvt_to_uint.i8 v0 + return v1 +} + +; block0: +; cebr %f0, %f0 +; jno 6 ; trap +; bras %r1, 8 ; data.f32 256 ; le %f5, 0(%r1) +; cebr %f0, %f5 +; jnhe 6 ; trap +; bras %r1, 8 ; data.f32 -1 ; vlef %v17, 0(%r1), 0 +; wfcsb %f0, %v17 +; jnle 6 ; trap +; wldeb %v21, %f0 +; wclgdb %v23, %v21, 0, 5 +; vlgvg %r2, %v23, 0 +; br %r14 + +function %fcvt_to_sint_f32_i8(f32) -> i8 { +block0(v0: f32): + v1 = fcvt_to_sint.i8 v0 + return v1 +} + +; block0: +; cebr %f0, %f0 +; jno 6 ; trap +; bras %r1, 8 ; data.f32 128 ; le %f5, 0(%r1) +; cebr %f0, %f5 +; jnhe 6 ; trap +; bras %r1, 8 ; data.f32 -129 ; vlef %v17, 0(%r1), 0 +; wfcsb %f0, %v17 +; jnle 6 ; trap +; wldeb %v21, %f0 +; wcgdb %v23, %v21, 0, 5 +; vlgvg %r2, %v23, 0 +; br %r14 + +function %fcvt_to_uint_f32_i16(f32) -> i16 { +block0(v0: f32): + v1 = fcvt_to_uint.i16 v0 + return v1 +} + +; block0: +; cebr %f0, %f0 +; jno 6 ; trap +; bras %r1, 8 ; data.f32 65536 ; le %f5, 0(%r1) +; cebr %f0, %f5 +; jnhe 6 ; trap +; bras %r1, 8 ; data.f32 -1 ; vlef %v17, 0(%r1), 0 +; wfcsb %f0, %v17 +; jnle 6 ; trap +; wldeb %v21, %f0 +; wclgdb %v23, %v21, 0, 5 +; vlgvg %r2, %v23, 0 +; br %r14 + +function %fcvt_to_sint_f32_i16(f32) -> i16 { +block0(v0: f32): + v1 = fcvt_to_sint.i16 v0 + return v1 +} + +; block0: +; cebr %f0, %f0 +; jno 6 ; trap +; bras %r1, 8 ; data.f32 32768 ; le %f5, 0(%r1) +; cebr %f0, %f5 +; jnhe 6 ; trap +; bras %r1, 8 ; data.f32 -32769 ; vlef %v17, 0(%r1), 0 +; wfcsb %f0, %v17 +; jnle 6 ; trap +; wldeb %v21, %f0 +; wcgdb %v23, %v21, 0, 5 +; vlgvg %r2, %v23, 0 ; br %r14 function %fcvt_to_uint_f32_i32(f32) -> i32 { @@ -381,8 +459,15 @@ block0(v0: f32): ; block0: ; cebr %f0, %f0 ; jno 6 ; trap -; clfebr %r2, 5, %f0, 0 -; jno 6 ; trap +; bras %r1, 8 ; data.f32 4294967300 ; le %f5, 0(%r1) +; cebr %f0, %f5 +; jnhe 6 ; trap +; bras %r1, 8 ; data.f32 -1 ; vlef %v17, 0(%r1), 0 +; wfcsb %f0, %v17 +; jnle 6 ; trap +; wldeb %v21, %f0 +; wclgdb %v23, %v21, 0, 5 +; vlgvg %r2, %v23, 0 ; br %r14 function %fcvt_to_sint_f32_i32(f32) -> i32 { @@ -394,8 +479,15 @@ block0(v0: f32): ; block0: ; cebr %f0, %f0 ; jno 6 ; trap -; cfebra %r2, 5, %f0, 0 -; jno 6 ; trap +; bras %r1, 8 ; data.f32 2147483600 ; le %f5, 0(%r1) +; cebr %f0, %f5 +; jnhe 6 ; trap +; bras %r1, 8 ; data.f32 -2147484000 ; vlef %v17, 0(%r1), 0 +; wfcsb %f0, %v17 +; jnle 6 ; trap +; wldeb %v21, %f0 +; wcgdb %v23, %v21, 0, 5 +; vlgvg %r2, %v23, 0 ; br %r14 function %fcvt_to_uint_f32_i64(f32) -> i64 { @@ -407,8 +499,15 @@ block0(v0: f32): ; block0: ; cebr %f0, %f0 ; jno 6 ; trap -; clgebr %r2, 5, %f0, 0 -; jno 6 ; trap +; bras %r1, 8 ; data.f32 18446744000000000000 ; le %f5, 0(%r1) +; cebr %f0, %f5 +; jnhe 6 ; trap +; bras %r1, 8 ; data.f32 -1 ; vlef %v17, 0(%r1), 0 +; wfcsb %f0, %v17 +; jnle 6 ; trap +; wldeb %v21, %f0 +; wclgdb %v23, %v21, 0, 5 +; vlgvg %r2, %v23, 0 ; br %r14 function %fcvt_to_sint_f32_i64(f32) -> i64 { @@ -420,8 +519,91 @@ block0(v0: f32): ; block0: ; cebr %f0, %f0 ; jno 6 ; trap -; cgebra %r2, 5, %f0, 0 +; bras %r1, 8 ; data.f32 9223372000000000000 ; le %f5, 0(%r1) +; cebr %f0, %f5 +; jnhe 6 ; trap +; bras %r1, 8 ; data.f32 -9223373000000000000 ; vlef %v17, 0(%r1), 0 +; wfcsb %f0, %v17 +; jnle 6 ; trap +; wldeb %v21, %f0 +; wcgdb %v23, %v21, 0, 5 +; vlgvg %r2, %v23, 0 +; br %r14 + +function %fcvt_to_uint_f64_i8(f64) -> i8 { +block0(v0: f64): + v1 = fcvt_to_uint.i8 v0 + return v1 +} + +; block0: +; cdbr %f0, %f0 +; jno 6 ; trap +; bras %r1, 12 ; data.f64 256 ; ld %f5, 0(%r1) +; cdbr %f0, %f5 +; jnhe 6 ; trap +; bras %r1, 12 ; data.f64 -1 ; vleg %v17, 0(%r1), 0 +; wfcdb %f0, %v17 +; jnle 6 ; trap +; wclgdb %v21, %f0, 0, 5 +; vlgvg %r2, %v21, 0 +; br %r14 + +function %fcvt_to_sint_f64_i8(f64) -> i8 { +block0(v0: f64): + v1 = fcvt_to_sint.i8 v0 + return v1 +} + +; block0: +; cdbr %f0, %f0 +; jno 6 ; trap +; bras %r1, 12 ; data.f64 128 ; ld %f5, 0(%r1) +; cdbr %f0, %f5 +; jnhe 6 ; trap +; bras %r1, 12 ; data.f64 -129 ; vleg %v17, 0(%r1), 0 +; wfcdb %f0, %v17 +; jnle 6 ; trap +; wcgdb %v21, %f0, 0, 5 +; vlgvg %r2, %v21, 0 +; br %r14 + +function %fcvt_to_uint_f64_i16(f64) -> i16 { +block0(v0: f64): + v1 = fcvt_to_uint.i16 v0 + return v1 +} + +; block0: +; cdbr %f0, %f0 +; jno 6 ; trap +; bras %r1, 12 ; data.f64 65536 ; ld %f5, 0(%r1) +; cdbr %f0, %f5 +; jnhe 6 ; trap +; bras %r1, 12 ; data.f64 -1 ; vleg %v17, 0(%r1), 0 +; wfcdb %f0, %v17 +; jnle 6 ; trap +; wclgdb %v21, %f0, 0, 5 +; vlgvg %r2, %v21, 0 +; br %r14 + +function %fcvt_to_sint_f64_i16(f64) -> i16 { +block0(v0: f64): + v1 = fcvt_to_sint.i16 v0 + return v1 +} + +; block0: +; cdbr %f0, %f0 ; jno 6 ; trap +; bras %r1, 12 ; data.f64 32768 ; ld %f5, 0(%r1) +; cdbr %f0, %f5 +; jnhe 6 ; trap +; bras %r1, 12 ; data.f64 -32769 ; vleg %v17, 0(%r1), 0 +; wfcdb %f0, %v17 +; jnle 6 ; trap +; wcgdb %v21, %f0, 0, 5 +; vlgvg %r2, %v21, 0 ; br %r14 function %fcvt_to_uint_f64_i32(f64) -> i32 { @@ -433,8 +615,14 @@ block0(v0: f64): ; block0: ; cdbr %f0, %f0 ; jno 6 ; trap -; clfdbr %r2, 5, %f0, 0 -; jno 6 ; trap +; bras %r1, 12 ; data.f64 4294967296 ; ld %f5, 0(%r1) +; cdbr %f0, %f5 +; jnhe 6 ; trap +; bras %r1, 12 ; data.f64 -1 ; vleg %v17, 0(%r1), 0 +; wfcdb %f0, %v17 +; jnle 6 ; trap +; wclgdb %v21, %f0, 0, 5 +; vlgvg %r2, %v21, 0 ; br %r14 function %fcvt_to_sint_f64_i32(f64) -> i32 { @@ -446,8 +634,14 @@ block0(v0: f64): ; block0: ; cdbr %f0, %f0 ; jno 6 ; trap -; cfdbra %r2, 5, %f0, 0 -; jno 6 ; trap +; bras %r1, 12 ; data.f64 2147483648 ; ld %f5, 0(%r1) +; cdbr %f0, %f5 +; jnhe 6 ; trap +; bras %r1, 12 ; data.f64 -2147483649 ; vleg %v17, 0(%r1), 0 +; wfcdb %f0, %v17 +; jnle 6 ; trap +; wcgdb %v21, %f0, 0, 5 +; vlgvg %r2, %v21, 0 ; br %r14 function %fcvt_to_uint_f64_i64(f64) -> i64 { @@ -459,8 +653,14 @@ block0(v0: f64): ; block0: ; cdbr %f0, %f0 ; jno 6 ; trap -; clgdbr %r2, 5, %f0, 0 -; jno 6 ; trap +; bras %r1, 12 ; data.f64 18446744073709552000 ; ld %f5, 0(%r1) +; cdbr %f0, %f5 +; jnhe 6 ; trap +; bras %r1, 12 ; data.f64 -1 ; vleg %v17, 0(%r1), 0 +; wfcdb %f0, %v17 +; jnle 6 ; trap +; wclgdb %v21, %f0, 0, 5 +; vlgvg %r2, %v21, 0 ; br %r14 function %fcvt_to_sint_f64_i64(f64) -> i64 { @@ -472,8 +672,66 @@ block0(v0: f64): ; block0: ; cdbr %f0, %f0 ; jno 6 ; trap -; cgdbra %r2, 5, %f0, 0 -; jno 6 ; trap +; bras %r1, 12 ; data.f64 9223372036854776000 ; ld %f5, 0(%r1) +; cdbr %f0, %f5 +; jnhe 6 ; trap +; bras %r1, 12 ; data.f64 -9223372036854778000 ; vleg %v17, 0(%r1), 0 +; wfcdb %f0, %v17 +; jnle 6 ; trap +; wcgdb %v21, %f0, 0, 5 +; vlgvg %r2, %v21, 0 +; br %r14 + +function %fcvt_from_uint_i8_f32(i8) -> f32 { +block0(v0: i8): + v1 = fcvt_from_uint.f32 v0 + return v1 +} + +; block0: +; llgcr %r5, %r2 +; ldgr %f5, %r5 +; wcdlgb %f7, %f5, 0, 3 +; ledbra %f0, %f7, 4 +; br %r14 + +function %fcvt_from_sint_i8_f32(i8) -> f32 { +block0(v0: i8): + v1 = fcvt_from_sint.f32 v0 + return v1 +} + +; block0: +; lgbr %r5, %r2 +; ldgr %f5, %r5 +; wcdgb %f7, %f5, 0, 3 +; ledbra %f0, %f7, 4 +; br %r14 + +function %fcvt_from_uint_i16_f32(i16) -> f32 { +block0(v0: i16): + v1 = fcvt_from_uint.f32 v0 + return v1 +} + +; block0: +; llghr %r5, %r2 +; ldgr %f5, %r5 +; wcdlgb %f7, %f5, 0, 3 +; ledbra %f0, %f7, 4 +; br %r14 + +function %fcvt_from_sint_i16_f32(i16) -> f32 { +block0(v0: i16): + v1 = fcvt_from_sint.f32 v0 + return v1 +} + +; block0: +; lghr %r5, %r2 +; ldgr %f5, %r5 +; wcdgb %f7, %f5, 0, 3 +; ledbra %f0, %f7, 4 ; br %r14 function %fcvt_from_uint_i32_f32(i32) -> f32 { @@ -483,7 +741,10 @@ block0(v0: i32): } ; block0: -; celfbr %f0, 0, %r2, 0 +; llgfr %r5, %r2 +; ldgr %f5, %r5 +; wcdlgb %f7, %f5, 0, 3 +; ledbra %f0, %f7, 4 ; br %r14 function %fcvt_from_sint_i32_f32(i32) -> f32 { @@ -493,7 +754,10 @@ block0(v0: i32): } ; block0: -; cefbra %f0, 0, %r2, 0 +; lgfr %r5, %r2 +; ldgr %f5, %r5 +; wcdgb %f7, %f5, 0, 3 +; ledbra %f0, %f7, 4 ; br %r14 function %fcvt_from_uint_i64_f32(i64) -> f32 { @@ -503,7 +767,9 @@ block0(v0: i64): } ; block0: -; celgbr %f0, 0, %r2, 0 +; ldgr %f3, %r2 +; wcdlgb %f5, %f3, 0, 3 +; ledbra %f0, %f5, 4 ; br %r14 function %fcvt_from_sint_i64_f32(i64) -> f32 { @@ -513,7 +779,57 @@ block0(v0: i64): } ; block0: -; cegbra %f0, 0, %r2, 0 +; ldgr %f3, %r2 +; wcdgb %f5, %f3, 0, 3 +; ledbra %f0, %f5, 4 +; br %r14 + +function %fcvt_from_uint_i8_f64(i8) -> f64 { +block0(v0: i8): + v1 = fcvt_from_uint.f64 v0 + return v1 +} + +; block0: +; llgcr %r5, %r2 +; ldgr %f5, %r5 +; wcdlgb %f0, %f5, 0, 4 +; br %r14 + +function %fcvt_from_sint_i8_f64(i8) -> f64 { +block0(v0: i8): + v1 = fcvt_from_sint.f64 v0 + return v1 +} + +; block0: +; lgbr %r5, %r2 +; ldgr %f5, %r5 +; wcdgb %f0, %f5, 0, 4 +; br %r14 + +function %fcvt_from_uint_i16_f64(i16) -> f64 { +block0(v0: i16): + v1 = fcvt_from_uint.f64 v0 + return v1 +} + +; block0: +; llghr %r5, %r2 +; ldgr %f5, %r5 +; wcdlgb %f0, %f5, 0, 4 +; br %r14 + +function %fcvt_from_sint_i16_f64(i16) -> f64 { +block0(v0: i16): + v1 = fcvt_from_sint.f64 v0 + return v1 +} + +; block0: +; lghr %r5, %r2 +; ldgr %f5, %r5 +; wcdgb %f0, %f5, 0, 4 ; br %r14 function %fcvt_from_uint_i32_f64(i32) -> f64 { @@ -523,7 +839,9 @@ block0(v0: i32): } ; block0: -; cdlfbr %f0, 0, %r2, 0 +; llgfr %r5, %r2 +; ldgr %f5, %r5 +; wcdlgb %f0, %f5, 0, 4 ; br %r14 function %fcvt_from_sint_i32_f64(i32) -> f64 { @@ -533,7 +851,9 @@ block0(v0: i32): } ; block0: -; cdfbra %f0, 0, %r2, 0 +; lgfr %r5, %r2 +; ldgr %f5, %r5 +; wcdgb %f0, %f5, 0, 4 ; br %r14 function %fcvt_from_uint_i64_f64(i64) -> f64 { @@ -543,7 +863,8 @@ block0(v0: i64): } ; block0: -; cdlgbr %f0, 0, %r2, 0 +; ldgr %f3, %r2 +; wcdlgb %f0, %f3, 0, 4 ; br %r14 function %fcvt_from_sint_i64_f64(i64) -> f64 { @@ -553,7 +874,78 @@ block0(v0: i64): } ; block0: -; cdgbra %f0, 0, %r2, 0 +; ldgr %f3, %r2 +; wcdgb %f0, %f3, 0, 4 +; br %r14 + +function %fcvt_to_uint_sat_f32_i8(f32) -> i8 { +block0(v0: f32): + v1 = fcvt_to_uint_sat.i8 v0 + return v1 +} + +; block0: +; ldebr %f3, %f0 +; wclgdb %f5, %f3, 0, 5 +; lgdr %r5, %f5 +; lgr %r2, %r5 +; clgfi %r5, 256 +; locghih %r2, 255 +; br %r14 + +function %fcvt_to_sint_sat_f32_i8(f32) -> i8 { +block0(v0: f32): + v1 = fcvt_to_sint_sat.i8 v0 + return v1 +} + +; block0: +; ldebr %f3, %f0 +; wcgdb %f5, %f3, 0, 5 +; lgdr %r5, %f5 +; cebr %f0, %f0 +; locghio %r5, 0 +; lgr %r4, %r5 +; cghi %r5, 127 +; locghih %r4, 127 +; lgr %r2, %r4 +; cghi %r4, -128 +; locghil %r2, -128 +; br %r14 + +function %fcvt_to_uint_sat_f32_i16(f32) -> i16 { +block0(v0: f32): + v1 = fcvt_to_uint_sat.i16 v0 + return v1 +} + +; block0: +; ldebr %f3, %f0 +; wclgdb %f5, %f3, 0, 5 +; lgdr %r5, %f5 +; lgr %r2, %r5 +; clgfi %r5, 65535 +; locghih %r2, -1 +; br %r14 + +function %fcvt_to_sint_sat_f32_i16(f32) -> i16 { +block0(v0: f32): + v1 = fcvt_to_sint_sat.i16 v0 + return v1 +} + +; block0: +; ldebr %f3, %f0 +; wcgdb %f5, %f3, 0, 5 +; lgdr %r5, %f5 +; cebr %f0, %f0 +; locghio %r5, 0 +; lgr %r4, %r5 +; cghi %r5, 32767 +; locghih %r4, 32767 +; lgr %r2, %r4 +; cghi %r4, -32768 +; locghil %r2, -32768 ; br %r14 function %fcvt_to_uint_sat_f32_i32(f32) -> i32 { @@ -563,9 +955,12 @@ block0(v0: f32): } ; block0: -; clfebr %r2, 5, %f0, 0 -; cebr %f0, %f0 -; lochio %r2, 0 +; ldebr %f3, %f0 +; wclgdb %f5, %f3, 0, 5 +; lgdr %r2, %f5 +; llilf %r3, 4294967295 +; clgr %r2, %r3 +; locgrh %r2, %r3 ; br %r14 function %fcvt_to_sint_sat_f32_i32(f32) -> i32 { @@ -575,9 +970,17 @@ block0(v0: f32): } ; block0: -; cfebra %r2, 5, %f0, 0 +; ldebr %f3, %f0 +; wcgdb %f5, %f3, 0, 5 +; lgdr %r2, %f5 ; cebr %f0, %f0 -; lochio %r2, 0 +; locghio %r2, 0 +; lgfi %r3, 2147483647 +; cgr %r2, %r3 +; locgrh %r2, %r3 +; lgfi %r5, -2147483648 +; cgr %r2, %r5 +; locgrl %r2, %r5 ; br %r14 function %fcvt_to_uint_sat_f32_i64(f32) -> i64 { @@ -587,9 +990,9 @@ block0(v0: f32): } ; block0: -; clgebr %r2, 5, %f0, 0 -; cebr %f0, %f0 -; locghio %r2, 0 +; ldebr %f3, %f0 +; wclgdb %f5, %f3, 0, 5 +; lgdr %r2, %f5 ; br %r14 function %fcvt_to_sint_sat_f32_i64(f32) -> i64 { @@ -599,11 +1002,79 @@ block0(v0: f32): } ; block0: -; cgebra %r2, 5, %f0, 0 +; ldebr %f3, %f0 +; wcgdb %f5, %f3, 0, 5 +; lgdr %r2, %f5 ; cebr %f0, %f0 ; locghio %r2, 0 ; br %r14 +function %fcvt_to_uint_sat_f64_i8(f64) -> i8 { +block0(v0: f64): + v1 = fcvt_to_uint_sat.i8 v0 + return v1 +} + +; block0: +; wclgdb %f3, %f0, 0, 5 +; lgdr %r3, %f3 +; lgr %r2, %r3 +; clgfi %r3, 256 +; locghih %r2, 255 +; br %r14 + +function %fcvt_to_sint_sat_f64_i8(f64) -> i8 { +block0(v0: f64): + v1 = fcvt_to_sint_sat.i8 v0 + return v1 +} + +; block0: +; wcgdb %f3, %f0, 0, 5 +; lgdr %r3, %f3 +; cdbr %f0, %f0 +; locghio %r3, 0 +; lgr %r4, %r3 +; cghi %r3, 127 +; locghih %r4, 127 +; lgr %r2, %r4 +; cghi %r4, -128 +; locghil %r2, -128 +; br %r14 + +function %fcvt_to_uint_sat_f64_i16(f64) -> i16 { +block0(v0: f64): + v1 = fcvt_to_uint_sat.i16 v0 + return v1 +} + +; block0: +; wclgdb %f3, %f0, 0, 5 +; lgdr %r3, %f3 +; lgr %r2, %r3 +; clgfi %r3, 65535 +; locghih %r2, -1 +; br %r14 + +function %fcvt_to_sint_sat_f64_i16(f64) -> i16 { +block0(v0: f64): + v1 = fcvt_to_sint_sat.i16 v0 + return v1 +} + +; block0: +; wcgdb %f3, %f0, 0, 5 +; lgdr %r3, %f3 +; cdbr %f0, %f0 +; locghio %r3, 0 +; lgr %r4, %r3 +; cghi %r3, 32767 +; locghih %r4, 32767 +; lgr %r2, %r4 +; cghi %r4, -32768 +; locghil %r2, -32768 +; br %r14 + function %fcvt_to_uint_sat_f64_i32(f64) -> i32 { block0(v0: f64): v1 = fcvt_to_uint_sat.i32 v0 @@ -611,9 +1082,11 @@ block0(v0: f64): } ; block0: -; clfdbr %r2, 5, %f0, 0 -; cdbr %f0, %f0 -; lochio %r2, 0 +; wclgdb %f3, %f0, 0, 5 +; lgdr %r2, %f3 +; llilf %r5, 4294967295 +; clgr %r2, %r5 +; locgrh %r2, %r5 ; br %r14 function %fcvt_to_sint_sat_f64_i32(f64) -> i32 { @@ -623,9 +1096,16 @@ block0(v0: f64): } ; block0: -; cfdbra %r2, 5, %f0, 0 +; wcgdb %f3, %f0, 0, 5 +; lgdr %r2, %f3 ; cdbr %f0, %f0 -; lochio %r2, 0 +; locghio %r2, 0 +; lgfi %r5, 2147483647 +; cgr %r2, %r5 +; locgrh %r2, %r5 +; lgfi %r3, -2147483648 +; cgr %r2, %r3 +; locgrl %r2, %r3 ; br %r14 function %fcvt_to_uint_sat_f64_i64(f64) -> i64 { @@ -635,9 +1115,8 @@ block0(v0: f64): } ; block0: -; clgdbr %r2, 5, %f0, 0 -; cdbr %f0, %f0 -; locghio %r2, 0 +; wclgdb %f3, %f0, 0, 5 +; lgdr %r2, %f3 ; br %r14 function %fcvt_to_sint_sat_f64_i64(f64) -> i64 { @@ -647,7 +1126,8 @@ block0(v0: f64): } ; block0: -; cgdbra %r2, 5, %f0, 0 +; wcgdb %f3, %f0, 0, 5 +; lgdr %r2, %f3 ; cdbr %f0, %f0 ; locghio %r2, 0 ; br %r14 @@ -679,8 +1159,7 @@ block0(v0: i32): } ; block0: -; sllg %r5, %r2, 32 -; ldgr %f0, %r5 +; vlvgf %v0, %r2, 0 ; br %r14 function %bitcast_f32_i32(f32) -> i32 { @@ -690,7 +1169,6 @@ block0(v0: f32): } ; block0: -; lgdr %r5, %f0 -; srlg %r2, %r5, 32 +; vlgvf %r2, %v0, 0 ; br %r14 diff --git a/cranelift/filetests/filetests/isa/s390x/fpmem.clif b/cranelift/filetests/filetests/isa/s390x/fpmem.clif index 49b1ff1aeb..577397097d 100644 --- a/cranelift/filetests/filetests/isa/s390x/fpmem.clif +++ b/cranelift/filetests/filetests/isa/s390x/fpmem.clif @@ -40,8 +40,7 @@ block0(v0: i64): ; block0: ; lrv %r5, 0(%r2) -; sllg %r3, %r5, 32 -; ldgr %f0, %r3 +; vlvgf %v0, %r5, 0 ; br %r14 function %store_f64(f64, i64) { @@ -82,8 +81,7 @@ block0(v0: f32, v1: i64): } ; block0: -; lgdr %r3, %f0 -; srlg %r4, %r3, 32 -; strv %r4, 0(%r2) +; vlgvf %r3, %v0, 0 +; strv %r3, 0(%r2) ; br %r14 diff --git a/cranelift/filetests/filetests/isa/s390x/multivalue-ret.clif b/cranelift/filetests/filetests/isa/s390x/multivalue-ret.clif index dd58f5895f..72e076df19 100644 --- a/cranelift/filetests/filetests/isa/s390x/multivalue-ret.clif +++ b/cranelift/filetests/filetests/isa/s390x/multivalue-ret.clif @@ -76,9 +76,9 @@ block1: ; bras %r1, 12 ; data.f64 1 ; ld %f2, 0(%r1) ; bras %r1, 12 ; data.f64 2 ; ld %f4, 0(%r1) ; bras %r1, 12 ; data.f64 3 ; ld %f6, 0(%r1) -; bras %r1, 12 ; data.f64 4 ; ld %f5, 0(%r1) -; bras %r1, 12 ; data.f64 5 ; ld %f7, 0(%r1) -; std %f5, 0(%r2) -; std %f7, 8(%r2) +; bras %r1, 12 ; data.f64 4 ; vleg %v28, 0(%r1), 0 +; bras %r1, 12 ; data.f64 5 ; vleg %v31, 0(%r1), 0 +; vsteg %v28, 0(%r2), 0 +; vsteg %v31, 8(%r2), 0 ; br %r14