Browse Source

s390x: use full vector register file for FP operations (#4360)

This defines the full set of 32 128-bit vector registers on s390x.
(Note that the VRs overlap the existing FPRs.)  In addition, this
adds support to use all 32 vector registers to implement floating-
point operations, by using vector floating-point instructions with
the 'W' bit set to operate only on the first element.

This part of the vector instruction set mostly matches the old FP
instruction set, with two exceptions:

- There is no vector version of the COPY SIGN instruction.  Instead,
  now use a VECTOR SELECT with an appropriate bit mask to implement
  the fcopysign operation.

- There are no vector version of the float <-> int conversion
  instructions where source and target differ in bit size.  Use
  appropriate multiple conversion steps instead.  This also requires
  use of explicit checking to implement correct overflow handling.
  As a side effect, this version now also implements the i8 / i16
  variants of all conversions, which had been missing so far.

For all operations except those two above, we continue to use the
old FP instruction if applicable (i.e. if all operands happen to
have been allocated to the original FP register set), and use the
vector instruction otherwise.
pull/4362/head
Ulrich Weigand 2 years ago
committed by GitHub
parent
commit
ec83144c88
No known key found for this signature in database GPG Key ID: 4AEE18F83AFDEB23
  1. 48
      cranelift/codegen/src/isa/s390x/abi.rs
  2. 424
      cranelift/codegen/src/isa/s390x/inst.isle
  3. 569
      cranelift/codegen/src/isa/s390x/inst/emit.rs
  4. 1234
      cranelift/codegen/src/isa/s390x/inst/emit_tests.rs
  5. 518
      cranelift/codegen/src/isa/s390x/inst/mod.rs
  6. 87
      cranelift/codegen/src/isa/s390x/inst/regs.rs
  7. 20
      cranelift/codegen/src/isa/s390x/inst/unwind/systemv.rs
  8. 166
      cranelift/codegen/src/isa/s390x/lower.isle
  9. 42
      cranelift/codegen/src/isa/s390x/lower/isle.rs
  10. 736
      cranelift/filetests/filetests/isa/s390x/floating-point-arch13.clif
  11. 586
      cranelift/filetests/filetests/isa/s390x/floating-point.clif
  12. 8
      cranelift/filetests/filetests/isa/s390x/fpmem.clif
  13. 8
      cranelift/filetests/filetests/isa/s390x/multivalue-ret.clif

48
cranelift/codegen/src/isa/s390x/abi.rs

@ -109,10 +109,10 @@ fn get_intreg_for_arg(idx: usize) -> Option<Reg> {
fn get_fltreg_for_arg(idx: usize) -> Option<Reg> {
match idx {
0 => Some(regs::fpr(0)),
1 => Some(regs::fpr(2)),
2 => Some(regs::fpr(4)),
3 => Some(regs::fpr(6)),
0 => Some(regs::vr(0)),
1 => Some(regs::vr(2)),
2 => Some(regs::vr(4)),
3 => Some(regs::vr(6)),
_ => None,
}
}
@ -130,11 +130,11 @@ fn get_intreg_for_ret(idx: usize) -> Option<Reg> {
fn get_fltreg_for_ret(idx: usize) -> Option<Reg> {
match idx {
0 => Some(regs::fpr(0)),
0 => Some(regs::vr(0)),
// ABI extension to support multi-value returns:
1 => Some(regs::fpr(2)),
2 => Some(regs::fpr(4)),
3 => Some(regs::fpr(6)),
1 => Some(regs::vr(2)),
2 => Some(regs::vr(4)),
3 => Some(regs::vr(6)),
_ => None,
}
}
@ -736,14 +736,30 @@ const fn clobbers() -> PRegSet {
.with(gpr_preg(3))
.with(gpr_preg(4))
.with(gpr_preg(5))
.with(fpr_preg(0))
.with(fpr_preg(1))
.with(fpr_preg(2))
.with(fpr_preg(3))
.with(fpr_preg(4))
.with(fpr_preg(5))
.with(fpr_preg(6))
.with(fpr_preg(7))
.with(vr_preg(0))
.with(vr_preg(1))
.with(vr_preg(2))
.with(vr_preg(3))
.with(vr_preg(4))
.with(vr_preg(5))
.with(vr_preg(6))
.with(vr_preg(7))
.with(vr_preg(16))
.with(vr_preg(17))
.with(vr_preg(18))
.with(vr_preg(19))
.with(vr_preg(20))
.with(vr_preg(21))
.with(vr_preg(22))
.with(vr_preg(23))
.with(vr_preg(24))
.with(vr_preg(25))
.with(vr_preg(26))
.with(vr_preg(27))
.with(vr_preg(28))
.with(vr_preg(29))
.with(vr_preg(30))
.with(vr_preg(31))
}
const CLOBBERS: PRegSet = clobbers();

424
cranelift/codegen/src/isa/s390x/inst.isle

@ -445,62 +445,68 @@
(cond Cond)
(imm i16))
;; A 32-bit FPU move.
;; A 32-bit FPU move possibly implemented as vector instruction.
(FpuMove32
(rd WritableReg)
(rn Reg))
;; A 64-bit FPU move.
;; A 64-bit FPU move possibly implemented as vector instruction.
(FpuMove64
(rd WritableReg)
(rn Reg))
;; A 32-bit conditional move FPU instruction.
;; A 32-bit conditional move FPU instruction, possibly as vector instruction.
(FpuCMov32
(rd WritableReg)
(cond Cond)
(rm Reg))
;; A 64-bit conditional move FPU instruction.
;; A 64-bit conditional move FPU instruction, possibly as vector instruction.
(FpuCMov64
(rd WritableReg)
(cond Cond)
(rm Reg))
;; A 64-bit move instruction from GPR to FPR.
(MovToFpr
;; A 32-bit move instruction from GPR to FPR or vector element.
(MovToFpr32
(rd WritableReg)
(rn Reg))
;; A 64-bit move instruction from FPR to GPR.
(MovFromFpr
;; A 64-bit move instruction from GPR to FPR or vector element.
(MovToFpr64
(rd WritableReg)
(rn Reg))
;; 1-op FPU instruction.
;; A 32-bit move instruction from FPR or vector element to GPR.
(MovFromFpr32
(rd WritableReg)
(rn Reg))
;; A 64-bit move instruction from FPR or vector element to GPR.
(MovFromFpr64
(rd WritableReg)
(rn Reg))
;; 1-op FPU instruction implemented as vector instruction with the W bit.
(FpuRR
(fpu_op FPUOp1)
(rd WritableReg)
(rn Reg))
;; 2-op FPU instruction.
;; 2-op FPU instruction implemented as vector instruction with the W bit.
(FpuRRR
(fpu_op FPUOp2)
(rd WritableReg)
(rn Reg)
(rm Reg))
;; 3-op FPU instruction.
;; 3-op FPU instruction implemented as vector instruction with the W bit.
(FpuRRRR
(fpu_op FPUOp3)
(rd WritableReg)
(rn Reg)
(rm Reg))
;; FPU copy sign instruction.
(FpuCopysign
(rd WritableReg)
(rn Reg)
(rm Reg))
(rm Reg)
(ra Reg))
;; FPU comparison, single-precision (32 bit).
(FpuCmp32
@ -562,30 +568,19 @@
(rd WritableReg)
(const_data u64))
;; Conversion FP -> integer.
(FpuToInt
(op FpuToIntOp)
(rd WritableReg)
(rn Reg))
;; Conversion integer -> FP.
(IntToFpu
(op IntToFpuOp)
(rd WritableReg)
(rn Reg))
;; Round to integer.
;; 1-op FPU instruction with rounding mode.
(FpuRound
(op FpuRoundMode)
(op FpuRoundOp)
(mode FpuRoundMode)
(rd WritableReg)
(rn Reg))
;; 2-op FPU instruction implemented as vector instruction with the W bit.
(FpuVecRRR
(fpu_op FPUOp2)
;; Vector select instruction.
(VecSelect
(rd WritableReg)
(rn Reg)
(rm Reg))
(rm Reg)
(ra Reg))
;; A machine call instruction.
(Call
@ -824,7 +819,6 @@
(Sqrt32)
(Sqrt64)
(Cvt32To64)
(Cvt64To32)
))
;; A floating-point unit (FPU) operation with two args.
@ -853,44 +847,32 @@
(MSub64)
))
;; A conversion from an FP to an integer value.
(type FpuToIntOp
;; A floating-point unit (FPU) operation with one arg, and rounding mode.
(type FpuRoundOp
(enum
(F32ToU32)
(F32ToI32)
(F32ToU64)
(F32ToI64)
(F64ToU32)
(F64ToI32)
(F64ToU64)
(F64ToI64)
))
;; A conversion from an integer to an FP value.
(type IntToFpuOp
(enum
(U32ToF32)
(I32ToF32)
(U32ToF64)
(I32ToF64)
(U64ToF32)
(I64ToF32)
(U64ToF64)
(I64ToF64)
(Cvt64To32)
(Round32)
(Round64)
(ToSInt32)
(ToSInt64)
(ToUInt32)
(ToUInt64)
(FromSInt32)
(FromSInt64)
(FromUInt32)
(FromUInt64)
))
;; Modes for FP rounding ops: round down (floor) or up (ceil), or toward zero
;; (trunc), or to nearest, and for 32- or 64-bit FP values.
;; Rounding modes for floating-point ops.
(type FpuRoundMode
(enum
(Minus32)
(Minus64)
(Plus32)
(Plus64)
(Zero32)
(Zero64)
(Nearest32)
(Nearest64)
(Current)
(ToNearest)
(ShorterPrecision)
(ToNearestTiesToEven)
(ToZero)
(ToPosInfinity)
(ToNegInfinity)
))
@ -1608,22 +1590,15 @@
;; Helper for emitting `MInst.FpuRRR` instructions.
(decl fpu_rrr (Type FPUOp2 Reg Reg) Reg)
(rule (fpu_rrr ty op src1 src2)
(let ((dst WritableReg (copy_writable_reg ty src1))
(_ Unit (emit (MInst.FpuRRR op dst src2))))
(let ((dst WritableReg (temp_writable_reg ty))
(_ Unit (emit (MInst.FpuRRR op dst src1 src2))))
dst))
;; Helper for emitting `MInst.FpuRRRR` instructions.
(decl fpu_rrrr (Type FPUOp3 Reg Reg Reg) Reg)
(rule (fpu_rrrr ty op src1 src2 src3)
(let ((dst WritableReg (copy_writable_reg ty src1))
(_ Unit (emit (MInst.FpuRRRR op dst src2 src3))))
dst))
;; Helper for emitting `MInst.FpuCopysign` instructions.
(decl fpu_copysign (Type Reg Reg) Reg)
(rule (fpu_copysign ty src1 src2)
(let ((dst WritableReg (temp_writable_reg ty))
(_ Unit (emit (MInst.FpuCopysign dst src1 src2))))
(_ Unit (emit (MInst.FpuRRRR op dst src1 src2 src3))))
dst))
;; Helper for emitting `MInst.FpuCmp32` instructions.
@ -1636,46 +1611,39 @@
(rule (fpu_cmp64 src1 src2)
(ProducesFlags.ProducesFlagsSideEffect (MInst.FpuCmp64 src1 src2)))
;; Helper for emitting `MInst.FpuToInt` instructions.
(decl fpu_to_int (Type FpuToIntOp Reg) ProducesFlags)
(rule (fpu_to_int ty op src)
(let ((dst WritableReg (temp_writable_reg ty)))
(ProducesFlags.ProducesFlagsReturnsReg (MInst.FpuToInt op dst src)
dst)))
;; Helper for emitting `MInst.IntToFpu` instructions.
(decl int_to_fpu (Type IntToFpuOp Reg) Reg)
(rule (int_to_fpu ty op src)
(let ((dst WritableReg (temp_writable_reg ty))
(_ Unit (emit (MInst.IntToFpu op dst src))))
dst))
;; Helper for emitting `MInst.FpuRound` instructions.
(decl fpu_round (Type FpuRoundMode Reg) Reg)
(rule (fpu_round ty mode src)
(decl fpu_round (Type FpuRoundOp FpuRoundMode Reg) Reg)
(rule (fpu_round ty op mode src)
(let ((dst WritableReg (temp_writable_reg ty))
(_ Unit (emit (MInst.FpuRound mode dst src))))
(_ Unit (emit (MInst.FpuRound op mode dst src))))
dst))
;; Helper for emitting `MInst.FpuVecRRR` instructions.
(decl fpuvec_rrr (Type FPUOp2 Reg Reg) Reg)
(rule (fpuvec_rrr ty op src1 src2)
(let ((dst WritableReg (temp_writable_reg ty))
(_ Unit (emit (MInst.FpuVecRRR op dst src1 src2))))
;; Helper for emitting `MInst.MovToFpr32` instructions.
(decl mov_to_fpr32 (Reg) Reg)
(rule (mov_to_fpr32 src)
(let ((dst WritableReg (temp_writable_reg $F32))
(_ Unit (emit (MInst.MovToFpr32 dst src))))
dst))
;; Helper for emitting `MInst.MovToFpr` instructions.
(decl mov_to_fpr (Reg) Reg)
(rule (mov_to_fpr src)
;; Helper for emitting `MInst.MovToFpr64` instructions.
(decl mov_to_fpr64 (Reg) Reg)
(rule (mov_to_fpr64 src)
(let ((dst WritableReg (temp_writable_reg $F64))
(_ Unit (emit (MInst.MovToFpr dst src))))
(_ Unit (emit (MInst.MovToFpr64 dst src))))
dst))
;; Helper for emitting `MInst.MovFromFpr32` instructions.
(decl mov_from_fpr32 (Reg) Reg)
(rule (mov_from_fpr32 src)
(let ((dst WritableReg (temp_writable_reg $I32))
(_ Unit (emit (MInst.MovFromFpr32 dst src))))
dst))
;; Helper for emitting `MInst.MovFromFpr` instructions.
(decl mov_from_fpr (Reg) Reg)
(rule (mov_from_fpr src)
;; Helper for emitting `MInst.MovFromFpr64` instructions.
(decl mov_from_fpr64 (Reg) Reg)
(rule (mov_from_fpr64 src)
(let ((dst WritableReg (temp_writable_reg $I64))
(_ Unit (emit (MInst.MovFromFpr dst src))))
(_ Unit (emit (MInst.MovFromFpr64 dst src))))
dst))
;; Helper for emitting `MInst.FpuLoad32` instructions.
@ -1726,6 +1694,13 @@
(rule (fpu_storerev64 src addr)
(SideEffectNoResult.Inst (MInst.FpuStoreRev64 src addr)))
;; Helper for emitting `MInst.VecSelect` instructions.
(decl vec_select (Type Reg Reg Reg) Reg)
(rule (vec_select ty src1 src2 src3)
(let ((dst WritableReg (temp_writable_reg ty))
(_ Unit (emit (MInst.VecSelect dst src1 src2 src3))))
dst))
;; Helper for emitting `MInst.LoadExtNameFar` instructions.
(decl load_ext_name_far (ExternalName i64) Reg)
(rule (load_ext_name_far name offset)
@ -2047,6 +2022,13 @@
(_ Unit (emit_imm ty dst n)))
dst))
;; Variant used for negative constants.
(decl imm32 (Type i32) Reg)
(rule (imm32 $I64 n)
(let ((dst WritableReg (temp_writable_reg $I64))
(_ Unit (emit (MInst.Mov64SImm32 dst n))))
(writable_reg_to_reg dst)))
;; Place an immediate into the low half of a register pair.
;; The high half is taken from the input.
(decl imm_regpair_lo (Type u64 RegPair) RegPair)
@ -2651,6 +2633,50 @@
dst))
;; Helpers for generating saturating integer instructions ;;;;;;;;;;;;;;;;;;;;;;
(decl uint_sat_reg (Type Type Reg) Reg)
(rule (uint_sat_reg ty ty reg) reg)
(rule (uint_sat_reg $I8 (ty_32_or_64 ty) reg)
(with_flags_reg (icmpu_uimm32 ty reg 256)
(cmov_imm ty (intcc_as_cond (IntCC.UnsignedGreaterThan)) 255 reg)))
(rule (uint_sat_reg $I16 (ty_32_or_64 ty) reg)
(with_flags_reg (icmpu_uimm32 ty reg 65535)
(cmov_imm ty (intcc_as_cond (IntCC.UnsignedGreaterThan)) -1 reg)))
(rule (uint_sat_reg $I32 $I64 reg)
(let ((bound Reg (imm $I64 4294967295))
(cond ProducesBool
(bool (icmpu_reg $I64 reg bound)
(intcc_as_cond (IntCC.UnsignedGreaterThan)))))
(select_bool_reg $I64 cond bound reg)))
(decl sint_sat_reg (Type Type Reg) Reg)
(rule (sint_sat_reg ty ty reg) reg)
(rule (sint_sat_reg $I8 (ty_32_or_64 ty) reg)
(let ((ub Reg (with_flags_reg (icmps_simm16 ty reg 127)
(cmov_imm ty
(intcc_as_cond (IntCC.SignedGreaterThan)) 127 reg))))
(with_flags_reg (icmps_simm16 ty ub -128)
(cmov_imm ty (intcc_as_cond (IntCC.SignedLessThan)) -128 ub))))
(rule (sint_sat_reg $I16 (ty_32_or_64 ty) reg)
(let ((ub Reg (with_flags_reg (icmps_simm16 ty reg 32767)
(cmov_imm ty
(intcc_as_cond (IntCC.SignedGreaterThan)) 32767 reg))))
(with_flags_reg (icmps_simm16 ty ub -32768)
(cmov_imm ty (intcc_as_cond (IntCC.SignedLessThan)) -32768 ub))))
(rule (sint_sat_reg $I32 $I64 reg)
(let ((u_bound Reg (imm32 $I64 2147483647))
(u_cond ProducesBool
(bool (icmps_reg $I64 reg u_bound)
(intcc_as_cond (IntCC.SignedGreaterThan))))
(ub Reg (select_bool_reg $I64 u_cond u_bound reg))
(l_bound Reg (imm32 $I64 -2147483648))
(l_cond ProducesBool
(bool (icmps_reg $I64 ub l_bound)
(intcc_as_cond (IntCC.SignedLessThan)))))
(select_bool_reg $I64 l_cond l_bound ub)))
;; Helpers for generating `add` instructions ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(decl aluop_add (Type) ALUOp)
@ -3151,7 +3177,7 @@
(rule (fpuop2_min $F64) (FPUOp2.Min64))
(decl fmin_reg (Type Reg Reg) Reg)
(rule (fmin_reg ty x y) (fpuvec_rrr ty (fpuop2_min ty) x y))
(rule (fmin_reg ty x y) (fpu_rrr ty (fpuop2_min ty) x y))
;; Helpers for generating `fmax` instructions ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
@ -3161,7 +3187,7 @@
(rule (fpuop2_max $F64) (FPUOp2.Max64))
(decl fmax_reg (Type Reg Reg) Reg)
(rule (fmax_reg ty x y) (fpuvec_rrr ty (fpuop2_max ty) x y))
(rule (fmax_reg ty x y) (fpu_rrr ty (fpuop2_max ty) x y))
;; Helpers for generating `fma` instructions ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
@ -3171,7 +3197,7 @@
(rule (fpuop3_fma $F64) (FPUOp3.MAdd64))
(decl fma_reg (Type Reg Reg Reg) Reg)
(rule (fma_reg ty x y acc) (fpu_rrrr ty (fpuop3_fma ty) acc x y))
(rule (fma_reg ty x y acc) (fpu_rrrr ty (fpuop3_fma ty) x y acc))
;; Helpers for generating `sqrt` instructions ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
@ -3204,124 +3230,136 @@
(rule (fabs_reg ty x) (fpu_rr ty (fpuop1_abs ty) x))
;; Helpers for generating `ceil` instructions ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; Helpers for generating `ceil`, `floor`, `trunc`, `nearest` instructions ;;;;
(decl fpuroundmode_ceil (Type) FpuRoundMode)
(rule (fpuroundmode_ceil $F32) (FpuRoundMode.Plus32))
(rule (fpuroundmode_ceil $F64) (FpuRoundMode.Plus64))
(decl fpuroundop_round (Type) FpuRoundOp)
(rule (fpuroundop_round $F32) (FpuRoundOp.Round32))
(rule (fpuroundop_round $F64) (FpuRoundOp.Round64))
(decl ceil_reg (Type Reg) Reg)
(rule (ceil_reg ty x) (fpu_round ty (fpuroundmode_ceil ty) x))
;; Helpers for generating `floor` instructions ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(decl fpuroundmode_floor (Type) FpuRoundMode)
(rule (fpuroundmode_floor $F32) (FpuRoundMode.Minus32))
(rule (fpuroundmode_floor $F64) (FpuRoundMode.Minus64))
(rule (ceil_reg ty x) (fpu_round ty (fpuroundop_round ty)
(FpuRoundMode.ToPosInfinity) x))
(decl floor_reg (Type Reg) Reg)
(rule (floor_reg ty x) (fpu_round ty (fpuroundmode_floor ty) x))
;; Helpers for generating `trunc` instructions ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(decl fpuroundmode_trunc (Type) FpuRoundMode)
(rule (fpuroundmode_trunc $F32) (FpuRoundMode.Zero32))
(rule (fpuroundmode_trunc $F64) (FpuRoundMode.Zero64))
(rule (floor_reg ty x) (fpu_round ty (fpuroundop_round ty)
(FpuRoundMode.ToNegInfinity) x))
(decl trunc_reg (Type Reg) Reg)
(rule (trunc_reg ty x) (fpu_round ty (fpuroundmode_trunc ty) x))
;; Helpers for generating `nearest` instructions ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(decl fpuroundmode_nearest (Type) FpuRoundMode)
(rule (fpuroundmode_nearest $F32) (FpuRoundMode.Nearest32))
(rule (fpuroundmode_nearest $F64) (FpuRoundMode.Nearest64))
(rule (trunc_reg ty x) (fpu_round ty (fpuroundop_round ty)
(FpuRoundMode.ToZero) x))
(decl nearest_reg (Type Reg) Reg)
(rule (nearest_reg ty x) (fpu_round ty (fpuroundmode_nearest ty) x))
(rule (nearest_reg ty x) (fpu_round ty (fpuroundop_round ty)
(FpuRoundMode.ToNearestTiesToEven) x))
;; Helpers for generating `fpromote` instructions ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(decl fpuop1_promote (Type Type) FPUOp1)
(rule (fpuop1_promote $F64 $F32) (FPUOp1.Cvt32To64))
(decl fpromote_reg (Type Type Reg) Reg)
(rule (fpromote_reg dst_ty src_ty x)
(fpu_rr dst_ty (fpuop1_promote dst_ty src_ty) x))
(rule (fpromote_reg ty ty x) x)
(rule (fpromote_reg $F64 $F32 x)
(fpu_rr $F64 (FPUOp1.Cvt32To64) x))
;; Helpers for generating `fdemote` instructions ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(decl fpuop1_demote (Type Type) FPUOp1)
(rule (fpuop1_demote $F32 $F64) (FPUOp1.Cvt64To32))
(decl fdemote_reg (Type Type Reg) Reg)
(rule (fdemote_reg dst_ty src_ty x)
(fpu_rr dst_ty (fpuop1_demote dst_ty src_ty) x))
(decl fdemote_reg (Type Type FpuRoundMode Reg) Reg)
(rule (fdemote_reg ty ty mode x) x)
(rule (fdemote_reg $F32 $F64 mode x)
(fpu_round $F32 (FpuRoundOp.Cvt64To32) mode x))
;; Helpers for generating `fcvt_from_uint` instructions ;;;;;;;;;;;;;;;;;;;;;;;;
(decl uint_to_fpu_op (Type Type) IntToFpuOp)
(rule (uint_to_fpu_op $F32 $I32) (IntToFpuOp.U32ToF32))
(rule (uint_to_fpu_op $F64 $I32) (IntToFpuOp.U32ToF64))
(rule (uint_to_fpu_op $F32 $I64) (IntToFpuOp.U64ToF32))
(rule (uint_to_fpu_op $F64 $I64) (IntToFpuOp.U64ToF64))
(decl uint_to_fpu_op (Type) FpuRoundOp)
(rule (uint_to_fpu_op $F32) (FpuRoundOp.FromUInt32))
(rule (uint_to_fpu_op $F64) (FpuRoundOp.FromUInt64))
(decl fcvt_from_uint_reg (Type Type Reg) Reg)
(rule (fcvt_from_uint_reg dst_ty src_ty x)
(int_to_fpu dst_ty (uint_to_fpu_op dst_ty src_ty) x))
(decl fcvt_from_uint_reg (Type FpuRoundMode Reg) Reg)
(rule (fcvt_from_uint_reg ty mode x)
(fpu_round ty (uint_to_fpu_op ty) mode x))
;; Helpers for generating `fcvt_from_sint` instructions ;;;;;;;;;;;;;;;;;;;;;;;;
(decl sint_to_fpu_op (Type Type) IntToFpuOp)
(rule (sint_to_fpu_op $F32 $I32) (IntToFpuOp.I32ToF32))
(rule (sint_to_fpu_op $F64 $I32) (IntToFpuOp.I32ToF64))
(rule (sint_to_fpu_op $F32 $I64) (IntToFpuOp.I64ToF32))
(rule (sint_to_fpu_op $F64 $I64) (IntToFpuOp.I64ToF64))
(decl sint_to_fpu_op (Type) FpuRoundOp)
(rule (sint_to_fpu_op $F32) (FpuRoundOp.FromSInt32))
(rule (sint_to_fpu_op $F64) (FpuRoundOp.FromSInt64))
(decl fcvt_from_sint_reg (Type Type Reg) Reg)
(rule (fcvt_from_sint_reg dst_ty src_ty x)
(int_to_fpu dst_ty (sint_to_fpu_op dst_ty src_ty) x))
(decl fcvt_from_sint_reg (Type FpuRoundMode Reg) Reg)
(rule (fcvt_from_sint_reg ty mode x)
(fpu_round ty (sint_to_fpu_op ty) mode x))
;; Helpers for generating `fcvt_to_uint` instructions ;;;;;;;;;;;;;;;;;;;;;;;;
;; Helpers for generating `fcvt_to_[us]int` instructions ;;;;;;;;;;;;;;;;;;;;;;;
(decl fpu_to_uint_op (Type Type) FpuToIntOp)
(rule (fpu_to_uint_op $I32 $F32) (FpuToIntOp.F32ToU32))
(rule (fpu_to_uint_op $I32 $F64) (FpuToIntOp.F64ToU32))
(rule (fpu_to_uint_op $I64 $F32) (FpuToIntOp.F32ToU64))
(rule (fpu_to_uint_op $I64 $F64) (FpuToIntOp.F64ToU64))
(decl fcvt_flt_ty (Type Type) Type)
(rule (fcvt_flt_ty (fits_in_32 ty) (and (vxrs_ext2_enabled) $F32)) $F32)
(rule (fcvt_flt_ty (fits_in_64 ty) $F32) $F64)
(rule (fcvt_flt_ty (fits_in_64 ty) $F64) $F64)
(decl fcvt_to_uint_reg_with_flags (Type Type Reg) ProducesFlags)
(rule (fcvt_to_uint_reg_with_flags dst_ty src_ty x)
(fpu_to_int dst_ty (fpu_to_uint_op dst_ty src_ty) x))
(decl fcvt_int_ty (Type Type) Type)
(rule (fcvt_int_ty (fits_in_32 ty) (and (vxrs_ext2_enabled) $F32)) $I32)
(rule (fcvt_int_ty (fits_in_64 ty) $F32) $I64)
(rule (fcvt_int_ty (fits_in_64 ty) $F64) $I64)
(decl fcvt_to_uint_reg (Type Type Reg) Reg)
(rule (fcvt_to_uint_reg dst_ty src_ty x)
(drop_flags (fcvt_to_uint_reg_with_flags dst_ty src_ty x)))
;; Helpers for generating `fcvt_to_uint` instructions ;;;;;;;;;;;;;;;;;;;;;;;;
;; Helpers for generating `fcvt_to_sint` instructions ;;;;;;;;;;;;;;;;;;;;;;;;
(decl fcvt_to_uint_reg (Type FpuRoundMode Reg) Reg)
(rule (fcvt_to_uint_reg $F32 mode x)
(mov_from_fpr32 (fpu_round $F32 (FpuRoundOp.ToUInt32) mode x)))
(rule (fcvt_to_uint_reg $F64 mode x)
(mov_from_fpr64 (fpu_round $F64 (FpuRoundOp.ToUInt64) mode x)))
(decl fpu_to_sint_op (Type Type) FpuToIntOp)
(rule (fpu_to_sint_op $I32 $F32) (FpuToIntOp.F32ToI32))
(rule (fpu_to_sint_op $I32 $F64) (FpuToIntOp.F64ToI32))
(rule (fpu_to_sint_op $I64 $F32) (FpuToIntOp.F32ToI64))
(rule (fpu_to_sint_op $I64 $F64) (FpuToIntOp.F64ToI64))
(decl fcvt_to_uint_ub (Type Type) Reg)
(rule (fcvt_to_uint_ub $F32 dst_ty)
(imm $F32 (fcvt_to_uint_ub32 (ty_bits dst_ty))))
(rule (fcvt_to_uint_ub $F64 dst_ty)
(imm $F64 (fcvt_to_uint_ub64 (ty_bits dst_ty))))
(decl fcvt_to_sint_reg_with_flags (Type Type Reg) ProducesFlags)
(rule (fcvt_to_sint_reg_with_flags dst_ty src_ty x)
(fpu_to_int dst_ty (fpu_to_sint_op dst_ty src_ty) x))
(decl fcvt_to_uint_lb (Type) Reg)
(rule (fcvt_to_uint_lb $F32) (imm $F32 (fcvt_to_uint_lb32)))
(rule (fcvt_to_uint_lb $F64) (imm $F64 (fcvt_to_uint_lb64)))
(decl fcvt_to_uint_ub32 (u8) u64)
(extern constructor fcvt_to_uint_ub32 fcvt_to_uint_ub32)
(decl fcvt_to_uint_lb32 () u64)
(extern constructor fcvt_to_uint_lb32 fcvt_to_uint_lb32)
(decl fcvt_to_uint_ub64 (u8) u64)
(extern constructor fcvt_to_uint_ub64 fcvt_to_uint_ub64)
(decl fcvt_to_uint_lb64 () u64)
(extern constructor fcvt_to_uint_lb64 fcvt_to_uint_lb64)
;; Helpers for generating `fcvt_to_sint` instructions ;;;;;;;;;;;;;;;;;;;;;;;;
(decl fcvt_to_sint_reg (Type Type Reg) Reg)
(rule (fcvt_to_sint_reg dst_ty src_ty x)
(drop_flags (fcvt_to_sint_reg_with_flags dst_ty src_ty x)))
(decl fcvt_to_sint_reg (Type FpuRoundMode Reg) Reg)
(rule (fcvt_to_sint_reg $F32 mode x)
(mov_from_fpr32 (fpu_round $F32 (FpuRoundOp.ToSInt32) mode x)))
(rule (fcvt_to_sint_reg $F64 mode x)
(mov_from_fpr64 (fpu_round $F64 (FpuRoundOp.ToSInt64) mode x)))
(decl fcvt_to_sint_ub (Type Type) Reg)
(rule (fcvt_to_sint_ub $F32 dst_ty)
(imm $F32 (fcvt_to_sint_ub32 (ty_bits dst_ty))))
(rule (fcvt_to_sint_ub $F64 dst_ty)
(imm $F64 (fcvt_to_sint_ub64 (ty_bits dst_ty))))
(decl fcvt_to_sint_lb (Type Type) Reg)
(rule (fcvt_to_sint_lb $F32 dst_ty)
(imm $F32 (fcvt_to_sint_lb32 (ty_bits dst_ty))))
(rule (fcvt_to_sint_lb $F64 dst_ty)
(imm $F64 (fcvt_to_sint_lb64 (ty_bits dst_ty))))
(decl fcvt_to_sint_ub32 (u8) u64)
(extern constructor fcvt_to_sint_ub32 fcvt_to_sint_ub32)
(decl fcvt_to_sint_lb32 (u8) u64)
(extern constructor fcvt_to_sint_lb32 fcvt_to_sint_lb32)
(decl fcvt_to_sint_ub64 (u8) u64)
(extern constructor fcvt_to_sint_ub64 fcvt_to_sint_ub64)
(decl fcvt_to_sint_lb64 (u8) u64)
(extern constructor fcvt_to_sint_lb64 fcvt_to_sint_lb64)
;; Helpers for generating signed `icmp` instructions ;;;;;;;;;;;;;;;;;;;;;;;;;;;

569
cranelift/codegen/src/isa/s390x/inst/emit.rs

@ -296,6 +296,38 @@ pub fn mem_imm16_emit(
}
}
pub fn mem_vrx_emit(
rd: Reg,
mem: &MemArg,
opcode: u16,
m3: u8,
add_trap: bool,
sink: &mut MachBuffer<Inst>,
emit_info: &EmitInfo,
state: &mut EmitState,
) {
let (mem_insts, mem) = mem_finalize(mem, state, true, false, false, true);
for inst in mem_insts.into_iter() {
inst.emit(&[], sink, emit_info, state);
}
if add_trap && mem.can_trap() {
let srcloc = state.cur_srcloc();
if srcloc != SourceLoc::default() {
sink.add_trap(TrapCode::HeapOutOfBounds);
}
}
match &mem {
&MemArg::BXD12 {
base, index, disp, ..
} => {
put(sink, &enc_vrx(opcode, rd, base, index, disp.bits(), m3));
}
_ => unreachable!(),
}
}
//=============================================================================
// Instructions and subcomponents: emission
@ -304,15 +336,50 @@ fn machreg_to_gpr(m: Reg) -> u8 {
u8::try_from(m.to_real_reg().unwrap().hw_enc()).unwrap()
}
fn machreg_to_fpr(m: Reg) -> u8 {
fn machreg_to_vr(m: Reg) -> u8 {
assert_eq!(m.class(), RegClass::Float);
u8::try_from(m.to_real_reg().unwrap().hw_enc()).unwrap()
}
fn machreg_to_gpr_or_fpr(m: Reg) -> u8 {
fn machreg_to_fpr(m: Reg) -> u8 {
assert!(is_fpr(m));
u8::try_from(m.to_real_reg().unwrap().hw_enc()).unwrap()
}
fn machreg_to_gpr_or_fpr(m: Reg) -> u8 {
let reg = u8::try_from(m.to_real_reg().unwrap().hw_enc()).unwrap();
assert!(reg < 16);
reg
}
fn rxb(v1: Option<Reg>, v2: Option<Reg>, v3: Option<Reg>, v4: Option<Reg>) -> u8 {
let mut rxb = 0;
let is_high_vr = |reg| -> bool {
if let Some(reg) = reg {
if !is_fpr(reg) {
return true;
}
}
false
};
if is_high_vr(v1) {
rxb = rxb | 8;
}
if is_high_vr(v2) {
rxb = rxb | 4;
}
if is_high_vr(v3) {
rxb = rxb | 2;
}
if is_high_vr(v4) {
rxb = rxb | 1;
}
rxb
}
/// E-type instructions.
///
/// 15
@ -785,19 +852,45 @@ fn enc_siy(opcode: u16, b1: Reg, d1: u32, i2: u8) -> [u8; 6] {
enc
}
/// VRR-type instructions.
/// VRRa-type instructions.
///
/// 47 39 35 31 23 19 15 11 7
/// opcode1 v1 v2 - m5 m3 m2 rxb opcode2
/// 40 36 32 24 20 16 12 8 0
///
fn enc_vrr_a(opcode: u16, v1: Reg, v2: Reg, m3: u8, m4: u8, m5: u8) -> [u8; 6] {
let opcode1 = ((opcode >> 8) & 0xff) as u8;
let opcode2 = (opcode & 0xff) as u8;
let rxb = rxb(Some(v1), Some(v2), None, None);
let v1 = machreg_to_vr(v1) & 0x0f;
let v2 = machreg_to_vr(v2) & 0x0f;
let m3 = m3 & 0x0f;
let m4 = m4 & 0x0f;
let m5 = m5 & 0x0f;
let mut enc: [u8; 6] = [0; 6];
enc[0] = opcode1;
enc[1] = v1 << 4 | v2;
enc[2] = 0;
enc[3] = m5 << 4 | m4;
enc[4] = m3 << 4 | rxb;
enc[5] = opcode2;
enc
}
/// VRRc-type instructions.
///
/// 47 39 35 31 27 23 19 15 11 7
/// opcode1 v1 v2 v3 - m6 m5 m4 rxb opcode2
/// 40 36 32 28 24 20 16 12 8 0
///
fn enc_vrr(opcode: u16, v1: Reg, v2: Reg, v3: Reg, m4: u8, m5: u8, m6: u8) -> [u8; 6] {
fn enc_vrr_c(opcode: u16, v1: Reg, v2: Reg, v3: Reg, m4: u8, m5: u8, m6: u8) -> [u8; 6] {
let opcode1 = ((opcode >> 8) & 0xff) as u8;
let opcode2 = (opcode & 0xff) as u8;
let rxb = 0; // FIXME
let v1 = machreg_to_fpr(v1) & 0x0f; // FIXME
let v2 = machreg_to_fpr(v2) & 0x0f; // FIXME
let v3 = machreg_to_fpr(v3) & 0x0f; // FIXME
let rxb = rxb(Some(v1), Some(v2), Some(v3), None);
let v1 = machreg_to_vr(v1) & 0x0f;
let v2 = machreg_to_vr(v2) & 0x0f;
let v3 = machreg_to_vr(v3) & 0x0f;
let m4 = m4 & 0x0f;
let m5 = m5 & 0x0f;
let m6 = m6 & 0x0f;
@ -812,6 +905,87 @@ fn enc_vrr(opcode: u16, v1: Reg, v2: Reg, v3: Reg, m4: u8, m5: u8, m6: u8) -> [u
enc
}
/// VRRe-type instructions.
///
/// 47 39 35 31 27 23 19 15 11 7
/// opcode1 v1 v2 v3 m6 - m5 v4 rxb opcode2
/// 40 36 32 28 24 20 16 12 8 0
///
fn enc_vrr_e(opcode: u16, v1: Reg, v2: Reg, v3: Reg, v4: Reg, m5: u8, m6: u8) -> [u8; 6] {
let opcode1 = ((opcode >> 8) & 0xff) as u8;
let opcode2 = (opcode & 0xff) as u8;
let rxb = rxb(Some(v1), Some(v2), Some(v3), Some(v4));
let v1 = machreg_to_vr(v1) & 0x0f;
let v2 = machreg_to_vr(v2) & 0x0f;
let v3 = machreg_to_vr(v3) & 0x0f;
let v4 = machreg_to_vr(v4) & 0x0f;
let m5 = m5 & 0x0f;
let m6 = m6 & 0x0f;
let mut enc: [u8; 6] = [0; 6];
enc[0] = opcode1;
enc[1] = v1 << 4 | v2;
enc[2] = v3 << 4 | m6;
enc[3] = m5;
enc[4] = v4 << 4 | rxb;
enc[5] = opcode2;
enc
}
/// VRSb-type instructions.
///
/// 47 39 35 31 27 15 11 7
/// opcode1 v1 r3 b2 d2 m4 rxb opcode2
/// 40 36 32 28 16 12 8 0
///
fn enc_vrs_b(opcode: u16, v1: Reg, b2: Reg, d2: u32, r3: Reg, m4: u8) -> [u8; 6] {
let opcode1 = ((opcode >> 8) & 0xff) as u8;
let opcode2 = (opcode & 0xff) as u8;
let rxb = rxb(Some(v1), None, None, None);
let v1 = machreg_to_vr(v1) & 0x0f;
let b2 = machreg_to_gpr(b2) & 0x0f;
let r3 = machreg_to_gpr(r3) & 0x0f;
let d2_lo = (d2 & 0xff) as u8;
let d2_hi = ((d2 >> 8) & 0x0f) as u8;
let m4 = m4 & 0x0f;
let mut enc: [u8; 6] = [0; 6];
enc[0] = opcode1;
enc[1] = v1 << 4 | r3;
enc[2] = b2 << 4 | d2_hi;
enc[3] = d2_lo;
enc[4] = m4 << 4 | rxb;
enc[5] = opcode2;
enc
}
/// VRSc-type instructions.
///
/// 47 39 35 31 27 15 11 7
/// opcode1 r1 v3 b2 d2 m4 rxb opcode2
/// 40 36 32 28 16 12 8 0
///
fn enc_vrs_c(opcode: u16, r1: Reg, b2: Reg, d2: u32, v3: Reg, m4: u8) -> [u8; 6] {
let opcode1 = ((opcode >> 8) & 0xff) as u8;
let opcode2 = (opcode & 0xff) as u8;
let rxb = rxb(None, Some(v3), None, None);
let r1 = machreg_to_gpr(r1) & 0x0f;
let b2 = machreg_to_gpr(b2) & 0x0f;
let v3 = machreg_to_vr(v3) & 0x0f;
let d2_lo = (d2 & 0xff) as u8;
let d2_hi = ((d2 >> 8) & 0x0f) as u8;
let m4 = m4 & 0x0f;
let mut enc: [u8; 6] = [0; 6];
enc[0] = opcode1;
enc[1] = r1 << 4 | v3;
enc[2] = b2 << 4 | d2_hi;
enc[3] = d2_lo;
enc[4] = m4 << 4 | rxb;
enc[5] = opcode2;
enc
}
/// VRX-type instructions.
///
/// 47 39 35 31 27 15 11 7
@ -821,8 +995,8 @@ fn enc_vrr(opcode: u16, v1: Reg, v2: Reg, v3: Reg, m4: u8, m5: u8, m6: u8) -> [u
fn enc_vrx(opcode: u16, v1: Reg, b2: Reg, x2: Reg, d2: u32, m3: u8) -> [u8; 6] {
let opcode1 = ((opcode >> 8) & 0xff) as u8;
let opcode2 = (opcode & 0xff) as u8;
let rxb = 0; // FIXME
let v1 = machreg_to_fpr(v1) & 0x0f; // FIXME
let rxb = rxb(Some(v1), None, None, None);
let v1 = machreg_to_vr(v1) & 0x0f;
let b2 = machreg_to_gpr(b2) & 0x0f;
let x2 = machreg_to_gpr(x2) & 0x0f;
let d2_lo = (d2 & 0xff) as u8;
@ -1633,9 +1807,7 @@ impl MachInstEmit for Inst {
| &Inst::Load64SExt32 { rd, ref mem }
| &Inst::LoadRev16 { rd, ref mem }
| &Inst::LoadRev32 { rd, ref mem }
| &Inst::LoadRev64 { rd, ref mem }
| &Inst::FpuLoad32 { rd, ref mem }
| &Inst::FpuLoad64 { rd, ref mem } => {
| &Inst::LoadRev64 { rd, ref mem } => {
let rd = allocs.next_writable(rd);
let mem = mem.with_allocs(&mut allocs);
@ -1655,8 +1827,6 @@ impl MachInstEmit for Inst {
&Inst::LoadRev16 { .. } => (None, Some(0xe31f), None), // LRVH
&Inst::LoadRev32 { .. } => (None, Some(0xe31e), None), // LRV
&Inst::LoadRev64 { .. } => (None, Some(0xe30f), None), // LRVG
&Inst::FpuLoad32 { .. } => (Some(0x78), Some(0xed64), None), // LE(Y)
&Inst::FpuLoad64 { .. } => (Some(0x68), Some(0xed65), None), // LD(Y)
_ => unreachable!(),
};
let rd = rd.to_reg();
@ -1664,36 +1834,27 @@ impl MachInstEmit for Inst {
rd, &mem, opcode_rx, opcode_rxy, opcode_ril, true, sink, emit_info, state,
);
}
&Inst::FpuLoadRev32 { rd, ref mem } | &Inst::FpuLoadRev64 { rd, ref mem } => {
&Inst::FpuLoad32 { rd, ref mem }
| &Inst::FpuLoad64 { rd, ref mem }
| &Inst::FpuLoadRev32 { rd, ref mem }
| &Inst::FpuLoadRev64 { rd, ref mem } => {
let rd = allocs.next_writable(rd);
let mem = mem.with_allocs(&mut allocs);
let opcode = match self {
&Inst::FpuLoadRev32 { .. } => 0xe603, // VLEBRF
&Inst::FpuLoadRev64 { .. } => 0xe602, // VLEBRG
let (opcode_rx, opcode_rxy, opcode_vrx) = match self {
&Inst::FpuLoad32 { .. } => (Some(0x78), Some(0xed64), 0xe703), // LE(Y), VLEF
&Inst::FpuLoad64 { .. } => (Some(0x68), Some(0xed65), 0xe702), // LD(Y), VLEG
&Inst::FpuLoadRev32 { .. } => (None, None, 0xe603), // VLEBRF
&Inst::FpuLoadRev64 { .. } => (None, None, 0xe602), // VLEBRG
_ => unreachable!(),
};
let (mem_insts, mem) = mem_finalize(&mem, state, true, false, false, true);
for inst in mem_insts.into_iter() {
inst.emit(&[], sink, emit_info, state);
}
let srcloc = state.cur_srcloc();
if srcloc != SourceLoc::default() && mem.can_trap() {
sink.add_trap(TrapCode::HeapOutOfBounds);
}
match &mem {
&MemArg::BXD12 {
base, index, disp, ..
} => {
put(
sink,
&enc_vrx(opcode, rd.to_reg(), base, index, disp.bits(), 0),
);
}
_ => unreachable!(),
let rd = rd.to_reg();
if is_fpr(rd) && opcode_rx.is_some() {
mem_emit(
rd, &mem, opcode_rx, opcode_rxy, None, true, sink, emit_info, state,
);
} else {
mem_vrx_emit(rd, &mem, opcode_vrx, 0, true, sink, emit_info, state);
}
}
@ -1703,9 +1864,7 @@ impl MachInstEmit for Inst {
| &Inst::Store64 { rd, ref mem }
| &Inst::StoreRev16 { rd, ref mem }
| &Inst::StoreRev32 { rd, ref mem }
| &Inst::StoreRev64 { rd, ref mem }
| &Inst::FpuStore32 { rd, ref mem }
| &Inst::FpuStore64 { rd, ref mem } => {
| &Inst::StoreRev64 { rd, ref mem } => {
let rd = allocs.next(rd);
let mem = mem.with_allocs(&mut allocs);
@ -1717,8 +1876,6 @@ impl MachInstEmit for Inst {
&Inst::StoreRev16 { .. } => (None, Some(0xe33f), None), // STRVH
&Inst::StoreRev32 { .. } => (None, Some(0xe33e), None), // STRV
&Inst::StoreRev64 { .. } => (None, Some(0xe32f), None), // STRVG
&Inst::FpuStore32 { .. } => (Some(0x70), Some(0xed66), None), // STE(Y)
&Inst::FpuStore64 { .. } => (Some(0x60), Some(0xed67), None), // STD(Y)
_ => unreachable!(),
};
mem_emit(
@ -1747,33 +1904,26 @@ impl MachInstEmit for Inst {
};
mem_imm16_emit(imm, &mem, opcode, true, sink, emit_info, state);
}
&Inst::FpuStoreRev32 { rd, ref mem } | &Inst::FpuStoreRev64 { rd, ref mem } => {
&Inst::FpuStore32 { rd, ref mem }
| &Inst::FpuStore64 { rd, ref mem }
| &Inst::FpuStoreRev32 { rd, ref mem }
| &Inst::FpuStoreRev64 { rd, ref mem } => {
let rd = allocs.next(rd);
let mem = mem.with_allocs(&mut allocs);
let opcode = match self {
&Inst::FpuStoreRev32 { .. } => 0xe60b, // VSTEBRF
&Inst::FpuStoreRev64 { .. } => 0xe60a, // VSTEBRG
let (opcode_rx, opcode_rxy, opcode_vrx) = match self {
&Inst::FpuStore32 { .. } => (Some(0x70), Some(0xed66), 0xe70b), // STE(Y), VSTEF
&Inst::FpuStore64 { .. } => (Some(0x60), Some(0xed67), 0xe70a), // STD(Y), VSTEG
&Inst::FpuStoreRev32 { .. } => (None, None, 0xe60b), // VSTEBRF
&Inst::FpuStoreRev64 { .. } => (None, None, 0xe60a), // VSTEBRG
_ => unreachable!(),
};
let (mem_insts, mem) = mem_finalize(&mem, state, true, false, false, true);
for inst in mem_insts.into_iter() {
inst.emit(&[], sink, emit_info, state);
}
let srcloc = state.cur_srcloc();
if srcloc != SourceLoc::default() && mem.can_trap() {
sink.add_trap(TrapCode::HeapOutOfBounds);
}
match &mem {
&MemArg::BXD12 {
base, index, disp, ..
} => {
put(sink, &enc_vrx(opcode, rd, base, index, disp.bits(), 0));
}
_ => unreachable!(),
if is_fpr(rd) && opcode_rx.is_some() {
mem_emit(
rd, &mem, opcode_rx, opcode_rxy, None, true, sink, emit_info, state,
);
} else {
mem_vrx_emit(rd, &mem, opcode_vrx, 0, true, sink, emit_info, state);
}
}
@ -1966,47 +2116,95 @@ impl MachInstEmit for Inst {
let rd = allocs.next_writable(rd);
let rn = allocs.next(rn);
let opcode = 0x38; // LER
put(sink, &enc_rr(opcode, rd.to_reg(), rn));
if is_fpr(rd.to_reg()) && is_fpr(rn) {
let opcode = 0x38; // LER
put(sink, &enc_rr(opcode, rd.to_reg(), rn));
} else {
let opcode = 0xe756; // VLR
put(sink, &enc_vrr_a(opcode, rd.to_reg(), rn, 0, 0, 0));
}
}
&Inst::FpuMove64 { rd, rn } => {
let rd = allocs.next_writable(rd);
let rn = allocs.next(rn);
let opcode = 0x28; // LDR
put(sink, &enc_rr(opcode, rd.to_reg(), rn));
if is_fpr(rd.to_reg()) && is_fpr(rn) {
let opcode = 0x28; // LDR
put(sink, &enc_rr(opcode, rd.to_reg(), rn));
} else {
let opcode = 0xe756; // VLR
put(sink, &enc_vrr_a(opcode, rd.to_reg(), rn, 0, 0, 0));
}
}
&Inst::FpuCMov32 { rd, cond, rm } => {
let rd = allocs.next_writable(rd);
let rm = allocs.next(rm);
let opcode = 0xa74; // BCR
put(sink, &enc_ri_c(opcode, cond.invert().bits(), 4 + 2));
let opcode = 0x38; // LER
put(sink, &enc_rr(opcode, rd.to_reg(), rm));
if is_fpr(rd.to_reg()) && is_fpr(rm) {
let opcode = 0xa74; // BCR
put(sink, &enc_ri_c(opcode, cond.invert().bits(), 4 + 2));
let opcode = 0x38; // LER
put(sink, &enc_rr(opcode, rd.to_reg(), rm));
} else {
let opcode = 0xa74; // BCR
put(sink, &enc_ri_c(opcode, cond.invert().bits(), 4 + 6));
let opcode = 0xe756; // VLR
put(sink, &enc_vrr_a(opcode, rd.to_reg(), rm, 0, 0, 0));
}
}
&Inst::FpuCMov64 { rd, cond, rm } => {
let rd = allocs.next_writable(rd);
let rm = allocs.next(rm);
let opcode = 0xa74; // BCR
put(sink, &enc_ri_c(opcode, cond.invert().bits(), 4 + 2));
let opcode = 0x28; // LDR
put(sink, &enc_rr(opcode, rd.to_reg(), rm));
if is_fpr(rd.to_reg()) && is_fpr(rm) {
let opcode = 0xa74; // BCR
put(sink, &enc_ri_c(opcode, cond.invert().bits(), 4 + 2));
let opcode = 0x28; // LDR
put(sink, &enc_rr(opcode, rd.to_reg(), rm));
} else {
let opcode = 0xa74; // BCR
put(sink, &enc_ri_c(opcode, cond.invert().bits(), 4 + 6));
let opcode = 0xe756; // VLR
put(sink, &enc_vrr_a(opcode, rd.to_reg(), rm, 0, 0, 0));
}
}
&Inst::MovToFpr { rd, rn } => {
&Inst::MovToFpr32 { rd, rn } => {
let rd = allocs.next_writable(rd);
let rn = allocs.next(rn);
let opcode = 0xb3c1; // LDGR
put(sink, &enc_rre(opcode, rd.to_reg(), rn));
let (opcode, m4) = (0xe722, 2); // VLVG
put(sink, &enc_vrs_b(opcode, rd.to_reg(), zero_reg(), 0, rn, m4));
}
&Inst::MovFromFpr { rd, rn } => {
&Inst::MovToFpr64 { rd, rn } => {
let rd = allocs.next_writable(rd);
let rn = allocs.next(rn);
let opcode = 0xb3cd; // LGDR
put(sink, &enc_rre(opcode, rd.to_reg(), rn));
if is_fpr(rd.to_reg()) {
let opcode = 0xb3c1; // LDGR
put(sink, &enc_rre(opcode, rd.to_reg(), rn));
} else {
let (opcode, m4) = (0xe722, 3); // VLVG
put(sink, &enc_vrs_b(opcode, rd.to_reg(), zero_reg(), 0, rn, m4));
}
}
&Inst::MovFromFpr32 { rd, rn } => {
let rd = allocs.next_writable(rd);
let rn = allocs.next(rn);
let (opcode, m4) = (0xe721, 2); // VLGV
put(sink, &enc_vrs_c(opcode, rd.to_reg(), zero_reg(), 0, rn, m4));
}
&Inst::MovFromFpr64 { rd, rn } => {
let rd = allocs.next_writable(rd);
let rn = allocs.next(rn);
if is_fpr(rn) {
let opcode = 0xb3cd; // LGDR
put(sink, &enc_rre(opcode, rd.to_reg(), rn));
} else {
let (opcode, m4) = (0xe721, 3); // VLVG
put(sink, &enc_vrs_c(opcode, rd.to_reg(), zero_reg(), 0, rn, m4));
}
}
&Inst::LoadFpuConst32 { rd, const_data } => {
let rd = allocs.next_writable(rd);
@ -2034,138 +2232,143 @@ impl MachInstEmit for Inst {
};
inst.emit(&[], sink, emit_info, state);
}
&Inst::FpuCopysign { rd, rn, rm } => {
let rd = allocs.next_writable(rd);
let rn = allocs.next(rn);
let rm = allocs.next(rm);
let opcode = 0xb372; // CPSDR
put(sink, &enc_rrf_ab(opcode, rd.to_reg(), rn, rm, 0));
}
&Inst::FpuRR { fpu_op, rd, rn } => {
let rd = allocs.next_writable(rd);
let rn = allocs.next(rn);
let opcode = match fpu_op {
FPUOp1::Abs32 => 0xb300, // LPEBR
FPUOp1::Abs64 => 0xb310, // LPDBR
FPUOp1::Neg32 => 0xb303, // LCEBR
FPUOp1::Neg64 => 0xb313, // LCDBR
FPUOp1::NegAbs32 => 0xb301, // LNEBR
FPUOp1::NegAbs64 => 0xb311, // LNDBR
FPUOp1::Sqrt32 => 0xb314, // SQEBR
FPUOp1::Sqrt64 => 0xb315, // SQDBR
FPUOp1::Cvt32To64 => 0xb304, // LDEBR
FPUOp1::Cvt64To32 => 0xb344, // LEDBR
let (opcode, m3, m5, opcode_fpr) = match fpu_op {
FPUOp1::Abs32 => (0xe7cc, 2, 2, 0xb300), // VFPSO, LPEBR
FPUOp1::Abs64 => (0xe7cc, 3, 2, 0xb310), // VFPSO, LPDBR
FPUOp1::Neg32 => (0xe7cc, 2, 0, 0xb303), // VFPSO, LCEBR
FPUOp1::Neg64 => (0xe7cc, 3, 0, 0xb313), // VFPSO, LCDBR
FPUOp1::NegAbs32 => (0xe7cc, 2, 1, 0xb301), // VFPSO, LNEBR
FPUOp1::NegAbs64 => (0xe7cc, 3, 1, 0xb311), // VFPSO, LNDBR
FPUOp1::Sqrt32 => (0xe7ce, 2, 0, 0xb314), // VFSQ, SQEBR
FPUOp1::Sqrt64 => (0xe7ce, 3, 0, 0xb315), // VFSQ, SQDBR
FPUOp1::Cvt32To64 => (0xe7c4, 2, 0, 0xb304), // VFLL, LDEBR
};
put(sink, &enc_rre(opcode, rd.to_reg(), rn));
}
&Inst::FpuRRR { fpu_op, rd, rm } => {
let rd = allocs.next_writable(rd);
let rm = allocs.next(rm);
let opcode = match fpu_op {
FPUOp2::Add32 => 0xb30a, // AEBR
FPUOp2::Add64 => 0xb31a, // ADBR
FPUOp2::Sub32 => 0xb30b, // SEBR
FPUOp2::Sub64 => 0xb31b, // SDBR
FPUOp2::Mul32 => 0xb317, // MEEBR
FPUOp2::Mul64 => 0xb31c, // MDBR
FPUOp2::Div32 => 0xb30d, // DEBR
FPUOp2::Div64 => 0xb31d, // DDBR
_ => unimplemented!(),
};
put(sink, &enc_rre(opcode, rd.to_reg(), rm));
if is_fpr(rd.to_reg()) && is_fpr(rn) {
put(sink, &enc_rre(opcode_fpr, rd.to_reg(), rn));
} else {
put(sink, &enc_vrr_a(opcode, rd.to_reg(), rn, m3, 8, m5));
}
}
&Inst::FpuRRRR { fpu_op, rd, rn, rm } => {
&Inst::FpuRRR { fpu_op, rd, rn, rm } => {
let rd = allocs.next_writable(rd);
let rn = allocs.next(rn);
let rm = allocs.next(rm);
let opcode = match fpu_op {
FPUOp3::MAdd32 => 0xb30e, // MAEBR
FPUOp3::MAdd64 => 0xb31e, // MADBR
FPUOp3::MSub32 => 0xb30f, // MSEBR
FPUOp3::MSub64 => 0xb31f, // MSDBR
let (opcode, m4, m6, opcode_fpr) = match fpu_op {
FPUOp2::Add32 => (0xe7e3, 2, 0, Some(0xb30a)), // VFA, AEBR
FPUOp2::Add64 => (0xe7e3, 3, 0, Some(0xb31a)), // VFA, ADBR
FPUOp2::Sub32 => (0xe7e2, 2, 0, Some(0xb30b)), // VFS, SEBR
FPUOp2::Sub64 => (0xe7e2, 3, 0, Some(0xb31b)), // VFS, SDBR
FPUOp2::Mul32 => (0xe7e7, 2, 0, Some(0xb317)), // VFM, MEEBR
FPUOp2::Mul64 => (0xe7e7, 3, 0, Some(0xb31c)), // VFM, MDBR
FPUOp2::Div32 => (0xe7e5, 2, 0, Some(0xb30d)), // VFD, DEBR
FPUOp2::Div64 => (0xe7e5, 3, 0, Some(0xb31d)), // VFD, DDBR
FPUOp2::Max32 => (0xe7ef, 2, 1, None), // VFMAX
FPUOp2::Max64 => (0xe7ef, 3, 1, None), // VFMAX
FPUOp2::Min32 => (0xe7ee, 2, 1, None), // VFMIN
FPUOp2::Min64 => (0xe7ee, 3, 1, None), // VFMIN
};
put(sink, &enc_rrd(opcode, rd.to_reg(), rm, rn));
if opcode_fpr.is_some() && rd.to_reg() == rn && is_fpr(rn) && is_fpr(rm) {
put(sink, &enc_rre(opcode_fpr.unwrap(), rd.to_reg(), rm));
} else {
put(sink, &enc_vrr_c(opcode, rd.to_reg(), rn, rm, m4, 8, m6));
}
}
&Inst::FpuToInt { op, rd, rn } => {
&Inst::FpuRRRR {
fpu_op,
rd,
rn,
rm,
ra,
} => {
let rd = allocs.next_writable(rd);
let rn = allocs.next(rn);
let rm = allocs.next(rm);
let ra = allocs.next(ra);
let opcode = match op {
FpuToIntOp::F32ToI32 => 0xb398, // CFEBRA
FpuToIntOp::F32ToU32 => 0xb39c, // CLFEBR
FpuToIntOp::F32ToI64 => 0xb3a8, // CGEBRA
FpuToIntOp::F32ToU64 => 0xb3ac, // CLGEBR
FpuToIntOp::F64ToI32 => 0xb399, // CFDBRA
FpuToIntOp::F64ToU32 => 0xb39d, // CLFDBR
FpuToIntOp::F64ToI64 => 0xb3a9, // CGDBRA
FpuToIntOp::F64ToU64 => 0xb3ad, // CLGDBR
let (opcode, m6, opcode_fpr) = match fpu_op {
FPUOp3::MAdd32 => (0xe78f, 2, 0xb30e), // VFMA, MAEBR
FPUOp3::MAdd64 => (0xe78f, 3, 0xb31e), // VFMA, MADBR
FPUOp3::MSub32 => (0xe78e, 2, 0xb30f), // VFMS, MSEBR
FPUOp3::MSub64 => (0xe78e, 3, 0xb31f), // VFMS, MSDBR
};
put(sink, &enc_rrf_cde(opcode, rd.to_reg(), rn, 5, 0));
if rd.to_reg() == ra && is_fpr(rn) && is_fpr(rm) && is_fpr(ra) {
put(sink, &enc_rrd(opcode_fpr, rd.to_reg(), rm, rn));
} else {
put(sink, &enc_vrr_e(opcode, rd.to_reg(), rn, rm, ra, 8, m6));
}
}
&Inst::IntToFpu { op, rd, rn } => {
&Inst::FpuRound { op, mode, rd, rn } => {
let rd = allocs.next_writable(rd);
let rn = allocs.next(rn);
let opcode = match op {
IntToFpuOp::I32ToF32 => 0xb394, // CEFBRA
IntToFpuOp::U32ToF32 => 0xb390, // CELFBR
IntToFpuOp::I64ToF32 => 0xb3a4, // CEGBRA
IntToFpuOp::U64ToF32 => 0xb3a0, // CELGBR
IntToFpuOp::I32ToF64 => 0xb395, // CDFBRA
IntToFpuOp::U32ToF64 => 0xb391, // CDLFBR
IntToFpuOp::I64ToF64 => 0xb3a5, // CDGBRA
IntToFpuOp::U64ToF64 => 0xb3a1, // CDLGBR
let mode = match mode {
FpuRoundMode::Current => 0,
FpuRoundMode::ToNearest => 1,
FpuRoundMode::ShorterPrecision => 3,
FpuRoundMode::ToNearestTiesToEven => 4,
FpuRoundMode::ToZero => 5,
FpuRoundMode::ToPosInfinity => 6,
FpuRoundMode::ToNegInfinity => 7,
};
put(sink, &enc_rrf_cde(opcode, rd.to_reg(), rn, 0, 0));
}
&Inst::FpuRound { op, rd, rn } => {
let rd = allocs.next_writable(rd);
let rn = allocs.next(rn);
let (opcode, m3) = match op {
FpuRoundMode::Minus32 => (0xb357, 7), // FIEBR
FpuRoundMode::Minus64 => (0xb35f, 7), // FIDBR
FpuRoundMode::Plus32 => (0xb357, 6), // FIEBR
FpuRoundMode::Plus64 => (0xb35f, 6), // FIDBR
FpuRoundMode::Zero32 => (0xb357, 5), // FIEBR
FpuRoundMode::Zero64 => (0xb35f, 5), // FIDBR
FpuRoundMode::Nearest32 => (0xb357, 4), // FIEBR
FpuRoundMode::Nearest64 => (0xb35f, 4), // FIDBR
let (opcode, m3, opcode_fpr) = match op {
FpuRoundOp::Cvt64To32 => (0xe7c5, 3, Some(0xb344)), // VFLR, LEDBR(A)
FpuRoundOp::Round32 => (0xe7c7, 2, Some(0xb357)), // VFI, FIEBR
FpuRoundOp::Round64 => (0xe7c7, 3, Some(0xb35f)), // VFI, FIDBR
FpuRoundOp::ToSInt32 => (0xe7c2, 2, None), // VCSFP
FpuRoundOp::ToSInt64 => (0xe7c2, 3, None), // VCSFP
FpuRoundOp::ToUInt32 => (0xe7c0, 2, None), // VCLFP
FpuRoundOp::ToUInt64 => (0xe7c0, 3, None), // VCLFP
FpuRoundOp::FromSInt32 => (0xe7c3, 2, None), // VCFPS
FpuRoundOp::FromSInt64 => (0xe7c3, 3, None), // VCFPS
FpuRoundOp::FromUInt32 => (0xe7c1, 2, None), // VCFPL
FpuRoundOp::FromUInt64 => (0xe7c1, 3, None), // VCFPL
};
put(sink, &enc_rrf_cde(opcode, rd.to_reg(), rn, m3, 0));
if opcode_fpr.is_some() && is_fpr(rd.to_reg()) && is_fpr(rn) {
put(
sink,
&enc_rrf_cde(opcode_fpr.unwrap(), rd.to_reg(), rn, mode, 0),
);
} else {
put(sink, &enc_vrr_a(opcode, rd.to_reg(), rn, m3, 8, mode));
}
}
&Inst::FpuVecRRR { fpu_op, rd, rn, rm } => {
let rd = allocs.next_writable(rd);
&Inst::FpuCmp32 { rn, rm } => {
let rn = allocs.next(rn);
let rm = allocs.next(rm);
let (opcode, m4) = match fpu_op {
FPUOp2::Max32 => (0xe7ef, 2), // VFMAX
FPUOp2::Max64 => (0xe7ef, 3), // VFMAX
FPUOp2::Min32 => (0xe7ee, 2), // VFMIN
FPUOp2::Min64 => (0xe7ee, 3), // VFMIN
_ => unimplemented!(),
};
put(sink, &enc_vrr(opcode, rd.to_reg(), rn, rm, m4, 8, 1));
if is_fpr(rn) && is_fpr(rm) {
let opcode = 0xb309; // CEBR
put(sink, &enc_rre(opcode, rn, rm));
} else {
let opcode = 0xe7cb; // WFC
put(sink, &enc_vrr_a(opcode, rn, rm, 2, 0, 0));
}
}
&Inst::FpuCmp32 { rn, rm } => {
&Inst::FpuCmp64 { rn, rm } => {
let rn = allocs.next(rn);
let rm = allocs.next(rm);
let opcode = 0xb309; // CEBR
put(sink, &enc_rre(opcode, rn, rm));
if is_fpr(rn) && is_fpr(rm) {
let opcode = 0xb319; // CDBR
put(sink, &enc_rre(opcode, rn, rm));
} else {
let opcode = 0xe7cb; // WFC
put(sink, &enc_vrr_a(opcode, rn, rm, 3, 0, 0));
}
}
&Inst::FpuCmp64 { rn, rm } => {
&Inst::VecSelect { rd, rn, rm, ra } => {
let rd = allocs.next_writable(rd);
let rn = allocs.next(rn);
let rm = allocs.next(rm);
let ra = allocs.next(ra);
let opcode = 0xb319; // CDBR
put(sink, &enc_rre(opcode, rn, rm));
let opcode = 0xe78d; // VSEL
put(sink, &enc_vrr_e(opcode, rd.to_reg(), rn, rm, ra, 0, 0));
}
&Inst::Call { link, ref info } => {

1234
cranelift/codegen/src/isa/s390x/inst/emit_tests.rs

File diff suppressed because it is too large

518
cranelift/codegen/src/isa/s390x/inst/mod.rs

@ -27,8 +27,8 @@ mod emit_tests;
// Instructions (top level): definition
pub use crate::isa::s390x::lower::isle::generated_code::{
ALUOp, CmpOp, FPUOp1, FPUOp2, FPUOp3, FpuRoundMode, FpuToIntOp, IntToFpuOp, MInst as Inst,
RxSBGOp, ShiftOp, UnaryOp,
ALUOp, CmpOp, FPUOp1, FPUOp2, FPUOp3, FpuRoundMode, FpuRoundOp, MInst as Inst, RxSBGOp,
ShiftOp, UnaryOp,
};
/// Additional information for (direct) Call instructions, left out of line to lower the size of
@ -156,12 +156,13 @@ impl Inst {
| Inst::FpuMove64 { .. }
| Inst::FpuCMov32 { .. }
| Inst::FpuCMov64 { .. }
| Inst::MovToFpr { .. }
| Inst::MovFromFpr { .. }
| Inst::MovToFpr32 { .. }
| Inst::MovToFpr64 { .. }
| Inst::MovFromFpr32 { .. }
| Inst::MovFromFpr64 { .. }
| Inst::FpuRR { .. }
| Inst::FpuRRR { .. }
| Inst::FpuRRRR { .. }
| Inst::FpuCopysign { .. }
| Inst::FpuCmp32 { .. }
| Inst::FpuCmp64 { .. }
| Inst::FpuLoad32 { .. }
@ -170,10 +171,7 @@ impl Inst {
| Inst::FpuStore64 { .. }
| Inst::LoadFpuConst32 { .. }
| Inst::LoadFpuConst64 { .. }
| Inst::FpuToInt { .. }
| Inst::IntToFpu { .. }
| Inst::FpuRound { .. }
| Inst::FpuVecRRR { .. }
| Inst::VecSelect { .. }
| Inst::Call { .. }
| Inst::CallInd { .. }
| Inst::Ret { .. }
@ -206,6 +204,11 @@ impl Inst {
UnaryOp::PopcntReg => InstructionSet::MIE2,
_ => InstructionSet::Base,
},
Inst::FpuRound { op, .. } => match op {
FpuRoundOp::ToSInt32 | FpuRoundOp::FromSInt32 => InstructionSet::MIE2,
FpuRoundOp::ToUInt32 | FpuRoundOp::FromUInt32 => InstructionSet::MIE2,
_ => InstructionSet::Base,
},
// These are all part of VXRS_EXT2
Inst::FpuLoadRev32 { .. }
@ -576,7 +579,10 @@ fn s390x_get_operands<F: Fn(VReg) -> VReg>(inst: &Inst, collector: &mut OperandC
collector.reg_mod(rd);
collector.reg_use(rm);
}
&Inst::MovToFpr { rd, rn } | &Inst::MovFromFpr { rd, rn } => {
&Inst::MovToFpr32 { rd, rn }
| &Inst::MovToFpr64 { rd, rn }
| &Inst::MovFromFpr32 { rd, rn }
| &Inst::MovFromFpr64 { rd, rn } => {
collector.reg_def(rd);
collector.reg_use(rn);
}
@ -584,19 +590,16 @@ fn s390x_get_operands<F: Fn(VReg) -> VReg>(inst: &Inst, collector: &mut OperandC
collector.reg_def(rd);
collector.reg_use(rn);
}
&Inst::FpuRRR { rd, rm, .. } => {
collector.reg_mod(rd);
collector.reg_use(rm);
}
&Inst::FpuRRRR { rd, rn, rm, .. } => {
collector.reg_mod(rd);
&Inst::FpuRRR { rd, rn, rm, .. } => {
collector.reg_def(rd);
collector.reg_use(rn);
collector.reg_use(rm);
}
&Inst::FpuCopysign { rd, rn, rm, .. } => {
&Inst::FpuRRRR { rd, rn, rm, ra, .. } => {
collector.reg_def(rd);
collector.reg_use(rn);
collector.reg_use(rm);
collector.reg_use(ra);
}
&Inst::FpuCmp32 { rn, rm } | &Inst::FpuCmp64 { rn, rm } => {
collector.reg_use(rn);
@ -637,22 +640,15 @@ fn s390x_get_operands<F: Fn(VReg) -> VReg>(inst: &Inst, collector: &mut OperandC
&Inst::LoadFpuConst32 { rd, .. } | &Inst::LoadFpuConst64 { rd, .. } => {
collector.reg_def(rd);
}
&Inst::FpuToInt { rd, rn, .. } => {
collector.reg_def(rd);
collector.reg_use(rn);
}
&Inst::IntToFpu { rd, rn, .. } => {
collector.reg_def(rd);
collector.reg_use(rn);
}
&Inst::FpuRound { rd, rn, .. } => {
collector.reg_def(rd);
collector.reg_use(rn);
}
&Inst::FpuVecRRR { rd, rn, rm, .. } => {
&Inst::VecSelect { rd, rn, rm, ra, .. } => {
collector.reg_def(rd);
collector.reg_use(rn);
collector.reg_use(rm);
collector.reg_use(ra);
}
&Inst::Extend { rd, rn, .. } => {
collector.reg_def(rd);
@ -1462,9 +1458,7 @@ impl Inst {
| &Inst::Load64SExt32 { rd, ref mem }
| &Inst::LoadRev16 { rd, ref mem }
| &Inst::LoadRev32 { rd, ref mem }
| &Inst::LoadRev64 { rd, ref mem }
| &Inst::FpuLoad32 { rd, ref mem }
| &Inst::FpuLoad64 { rd, ref mem } => {
| &Inst::LoadRev64 { rd, ref mem } => {
let (opcode_rx, opcode_rxy, opcode_ril) = match self {
&Inst::Load32 { .. } => (Some("l"), Some("ly"), Some("lrl")),
&Inst::Load32ZExt8 { .. } => (None, Some("llc"), None),
@ -1481,8 +1475,6 @@ impl Inst {
&Inst::LoadRev16 { .. } => (None, Some("lrvh"), None),
&Inst::LoadRev32 { .. } => (None, Some("lrv"), None),
&Inst::LoadRev64 { .. } => (None, Some("lrvg"), None),
&Inst::FpuLoad32 { .. } => (Some("le"), Some("ley"), None),
&Inst::FpuLoad64 { .. } => (Some("ld"), Some("ldy"), None),
_ => unreachable!(),
};
@ -1505,17 +1497,42 @@ impl Inst {
let mem = mem.pretty_print_default();
format!("{}{} {}, {}", mem_str, op.unwrap(), rd, mem)
}
&Inst::FpuLoadRev32 { rd, ref mem } | &Inst::FpuLoadRev64 { rd, ref mem } => {
let rd = pretty_print_reg(rd.to_reg(), allocs);
let mem = mem.with_allocs(allocs);
let (mem_str, mem) = mem_finalize_for_show(&mem, state, true, false, false, true);
let op = match self {
&Inst::FpuLoadRev32 { .. } => "vlebrf",
&Inst::FpuLoadRev64 { .. } => "vlebrg",
&Inst::FpuLoad32 { rd, ref mem }
| &Inst::FpuLoad64 { rd, ref mem }
| &Inst::FpuLoadRev32 { rd, ref mem }
| &Inst::FpuLoadRev64 { rd, ref mem } => {
let (opcode_rx, opcode_rxy, opcode_vrx) = match self {
&Inst::FpuLoad32 { .. } => (Some("le"), Some("ley"), "vlef"),
&Inst::FpuLoad64 { .. } => (Some("ld"), Some("ldy"), "vleg"),
&Inst::FpuLoadRev32 { .. } => (None, None, "vlebrf"),
&Inst::FpuLoadRev64 { .. } => (None, None, "vlebrg"),
_ => unreachable!(),
};
let mem = mem.pretty_print_default();
format!("{}{} {}, {}, 0", mem_str, op, rd, mem)
let (rd, rd_fpr) = pretty_print_fpr(rd.to_reg(), allocs);
let mem = mem.with_allocs(allocs);
if rd_fpr.is_some() && opcode_rx.is_some() {
let (mem_str, mem) =
mem_finalize_for_show(&mem, state, true, true, false, true);
let op = match &mem {
&MemArg::BXD12 { .. } => opcode_rx,
&MemArg::BXD20 { .. } => opcode_rxy,
_ => unreachable!(),
};
let mem = mem.pretty_print_default();
format!("{}{} {}, {}", mem_str, op.unwrap(), rd_fpr.unwrap(), mem)
} else {
let (mem_str, mem) =
mem_finalize_for_show(&mem, state, true, false, false, true);
let mem = mem.pretty_print_default();
format!(
"{}{} {}, {}, 0",
mem_str,
opcode_vrx,
rd_fpr.unwrap_or(rd),
mem
)
}
}
&Inst::Store8 { rd, ref mem }
| &Inst::Store16 { rd, ref mem }
@ -1523,9 +1540,7 @@ impl Inst {
| &Inst::Store64 { rd, ref mem }
| &Inst::StoreRev16 { rd, ref mem }
| &Inst::StoreRev32 { rd, ref mem }
| &Inst::StoreRev64 { rd, ref mem }
| &Inst::FpuStore32 { rd, ref mem }
| &Inst::FpuStore64 { rd, ref mem } => {
| &Inst::StoreRev64 { rd, ref mem } => {
let (opcode_rx, opcode_rxy, opcode_ril) = match self {
&Inst::Store8 { .. } => (Some("stc"), Some("stcy"), None),
&Inst::Store16 { .. } => (Some("sth"), Some("sthy"), Some("sthrl")),
@ -1534,8 +1549,6 @@ impl Inst {
&Inst::StoreRev16 { .. } => (None, Some("strvh"), None),
&Inst::StoreRev32 { .. } => (None, Some("strv"), None),
&Inst::StoreRev64 { .. } => (None, Some("strvg"), None),
&Inst::FpuStore32 { .. } => (Some("ste"), Some("stey"), None),
&Inst::FpuStore64 { .. } => (Some("std"), Some("stdy"), None),
_ => unreachable!(),
};
@ -1586,18 +1599,42 @@ impl Inst {
format!("{}{} {}, {}", mem_str, op, mem, imm)
}
&Inst::FpuStoreRev32 { rd, ref mem } | &Inst::FpuStoreRev64 { rd, ref mem } => {
let rd = pretty_print_reg(rd, allocs);
let mem = mem.with_allocs(allocs);
let (mem_str, mem) = mem_finalize_for_show(&mem, state, true, false, false, true);
let op = match self {
&Inst::FpuStoreRev32 { .. } => "vstebrf",
&Inst::FpuStoreRev64 { .. } => "vstebrg",
&Inst::FpuStore32 { rd, ref mem }
| &Inst::FpuStore64 { rd, ref mem }
| &Inst::FpuStoreRev32 { rd, ref mem }
| &Inst::FpuStoreRev64 { rd, ref mem } => {
let (opcode_rx, opcode_rxy, opcode_vrx) = match self {
&Inst::FpuStore32 { .. } => (Some("ste"), Some("stey"), "vstef"),
&Inst::FpuStore64 { .. } => (Some("std"), Some("stdy"), "vsteg"),
&Inst::FpuStoreRev32 { .. } => (None, None, "vstebrf"),
&Inst::FpuStoreRev64 { .. } => (None, None, "vstebrg"),
_ => unreachable!(),
};
let mem = mem.pretty_print_default();
format!("{}{} {}, {}, 0", mem_str, op, rd, mem)
let (rd, rd_fpr) = pretty_print_fpr(rd, allocs);
let mem = mem.with_allocs(allocs);
if rd_fpr.is_some() && opcode_rx.is_some() {
let (mem_str, mem) =
mem_finalize_for_show(&mem, state, true, true, false, true);
let op = match &mem {
&MemArg::BXD12 { .. } => opcode_rx,
&MemArg::BXD20 { .. } => opcode_rxy,
_ => unreachable!(),
};
let mem = mem.pretty_print_default();
format!("{}{} {}, {}", mem_str, op.unwrap(), rd_fpr.unwrap(), mem)
} else {
let (mem_str, mem) =
mem_finalize_for_show(&mem, state, true, false, false, true);
let mem = mem.pretty_print_default();
format!(
"{}{} {}, {}, 0",
mem_str,
opcode_vrx,
rd_fpr.unwrap_or(rd),
mem
)
}
}
&Inst::LoadMultiple64 { rt, rt2, ref mem } => {
let mem = mem.with_allocs(allocs);
@ -1704,177 +1741,278 @@ impl Inst {
format!("locghi{} {}, {}", cond, rd, imm)
}
&Inst::FpuMove32 { rd, rn } => {
let rd = pretty_print_reg(rd.to_reg(), allocs);
let rn = pretty_print_reg(rn, allocs);
format!("ler {}, {}", rd, rn)
let (rd, rd_fpr) = pretty_print_fpr(rd.to_reg(), allocs);
let (rn, rn_fpr) = pretty_print_fpr(rn, allocs);
if rd_fpr.is_some() && rn_fpr.is_some() {
format!("ler {}, {}", rd_fpr.unwrap(), rn_fpr.unwrap())
} else {
format!("vlr {}, {}", rd, rn)
}
}
&Inst::FpuMove64 { rd, rn } => {
let rd = pretty_print_reg(rd.to_reg(), allocs);
let rn = pretty_print_reg(rn, allocs);
format!("ldr {}, {}", rd, rn)
let (rd, rd_fpr) = pretty_print_fpr(rd.to_reg(), allocs);
let (rn, rn_fpr) = pretty_print_fpr(rn, allocs);
if rd_fpr.is_some() && rn_fpr.is_some() {
format!("ldr {}, {}", rd_fpr.unwrap(), rn_fpr.unwrap())
} else {
format!("vlr {}, {}", rd, rn)
}
}
&Inst::FpuCMov32 { rd, cond, rm } => {
let rd = pretty_print_reg(rd.to_reg(), allocs);
let rm = pretty_print_reg(rm, allocs);
let cond = cond.invert().pretty_print_default();
format!("j{} 6 ; ler {}, {}", cond, rd, rm)
let (rd, rd_fpr) = pretty_print_fpr(rd.to_reg(), allocs);
let (rm, rm_fpr) = pretty_print_fpr(rm, allocs);
if rd_fpr.is_some() && rm_fpr.is_some() {
let cond = cond.invert().pretty_print_default();
format!("j{} 6 ; ler {}, {}", cond, rd_fpr.unwrap(), rm_fpr.unwrap())
} else {
let cond = cond.invert().pretty_print_default();
format!("j{} 10 ; vlr {}, {}", cond, rd, rm)
}
}
&Inst::FpuCMov64 { rd, cond, rm } => {
let rd = pretty_print_reg(rd.to_reg(), allocs);
let rm = pretty_print_reg(rm, allocs);
let cond = cond.invert().pretty_print_default();
format!("j{} 6 ; ldr {}, {}", cond, rd, rm)
let (rd, rd_fpr) = pretty_print_fpr(rd.to_reg(), allocs);
let (rm, rm_fpr) = pretty_print_fpr(rm, allocs);
if rd_fpr.is_some() && rm_fpr.is_some() {
let cond = cond.invert().pretty_print_default();
format!("j{} 6 ; ldr {}, {}", cond, rd_fpr.unwrap(), rm_fpr.unwrap())
} else {
let cond = cond.invert().pretty_print_default();
format!("j{} 10 ; vlr {}, {}", cond, rd, rm)
}
}
&Inst::MovToFpr { rd, rn } => {
&Inst::MovToFpr32 { rd, rn } => {
let rd = pretty_print_reg(rd.to_reg(), allocs);
let rn = pretty_print_reg(rn, allocs);
format!("ldgr {}, {}", rd, rn)
format!("vlvgf {}, {}, 0", rd, rn)
}
&Inst::MovFromFpr { rd, rn } => {
let rd = pretty_print_reg(rd.to_reg(), allocs);
&Inst::MovToFpr64 { rd, rn } => {
let (rd, rd_fpr) = pretty_print_fpr(rd.to_reg(), allocs);
let rn = pretty_print_reg(rn, allocs);
format!("lgdr {}, {}", rd, rn)
if rd_fpr.is_some() {
format!("ldgr {}, {}", rd_fpr.unwrap(), rn)
} else {
format!("vlvgg {}, {}, 0", rd, rn)
}
}
&Inst::FpuRR { fpu_op, rd, rn } => {
let op = match fpu_op {
FPUOp1::Abs32 => "lpebr",
FPUOp1::Abs64 => "lpdbr",
FPUOp1::Neg32 => "lcebr",
FPUOp1::Neg64 => "lcdbr",
FPUOp1::NegAbs32 => "lnebr",
FPUOp1::NegAbs64 => "lndbr",
FPUOp1::Sqrt32 => "sqebr",
FPUOp1::Sqrt64 => "sqdbr",
FPUOp1::Cvt32To64 => "ldebr",
FPUOp1::Cvt64To32 => "ledbr",
};
&Inst::MovFromFpr32 { rd, rn } => {
let rd = pretty_print_reg(rd.to_reg(), allocs);
let rn = pretty_print_reg(rn, allocs);
format!("{} {}, {}", op, rd, rn)
format!("vlgvf {}, {}, 0", rd, rn)
}
&Inst::FpuRRR { fpu_op, rd, rm } => {
let op = match fpu_op {
FPUOp2::Add32 => "aebr",
FPUOp2::Add64 => "adbr",
FPUOp2::Sub32 => "sebr",
FPUOp2::Sub64 => "sdbr",
FPUOp2::Mul32 => "meebr",
FPUOp2::Mul64 => "mdbr",
FPUOp2::Div32 => "debr",
FPUOp2::Div64 => "ddbr",
_ => unimplemented!(),
};
&Inst::MovFromFpr64 { rd, rn } => {
let rd = pretty_print_reg(rd.to_reg(), allocs);
let rm = pretty_print_reg(rm, allocs);
format!("{} {}, {}", op, rd, rm)
let (rn, rn_fpr) = pretty_print_fpr(rn, allocs);
if rn_fpr.is_some() {
format!("lgdr {}, {}", rd, rn_fpr.unwrap())
} else {
format!("vlgvg {}, {}, 0", rd, rn)
}
}
&Inst::FpuRRRR { fpu_op, rd, rn, rm } => {
let op = match fpu_op {
FPUOp3::MAdd32 => "maebr",
FPUOp3::MAdd64 => "madbr",
FPUOp3::MSub32 => "msebr",
FPUOp3::MSub64 => "msdbr",
&Inst::FpuRR { fpu_op, rd, rn } => {
let (op, op_fpr) = match fpu_op {
FPUOp1::Abs32 => ("wflpsb", "lpebr"),
FPUOp1::Abs64 => ("wflpdb", "lpdbr"),
FPUOp1::Neg32 => ("wflcsb", "lcebr"),
FPUOp1::Neg64 => ("wflcdb", "lcdbr"),
FPUOp1::NegAbs32 => ("wflnsb", "lnebr"),
FPUOp1::NegAbs64 => ("wflndb", "lndbr"),
FPUOp1::Sqrt32 => ("wfsqsb", "sqebr"),
FPUOp1::Sqrt64 => ("wfsqdb", "sqdbr"),
FPUOp1::Cvt32To64 => ("wldeb", "ldebr"),
};
let rd = pretty_print_reg(rd.to_reg(), allocs);
let rn = pretty_print_reg(rn, allocs);
let rm = pretty_print_reg(rm, allocs);
format!("{} {}, {}, {}", op, rd, rn, rm)
let (rd, rd_fpr) = pretty_print_fpr(rd.to_reg(), allocs);
let (rn, rn_fpr) = pretty_print_fpr(rn, allocs);
if rd_fpr.is_some() && rn_fpr.is_some() {
format!("{} {}, {}", op_fpr, rd_fpr.unwrap(), rn_fpr.unwrap())
} else {
format!("{} {}, {}", op, rd_fpr.unwrap_or(rd), rn_fpr.unwrap_or(rn))
}
}
&Inst::FpuCopysign { rd, rn, rm } => {
let rd = pretty_print_reg(rd.to_reg(), allocs);
let rn = pretty_print_reg(rn, allocs);
let rm = pretty_print_reg(rm, allocs);
format!("cpsdr {}, {}, {}", rd, rm, rn)
&Inst::FpuRRR { fpu_op, rd, rn, rm } => {
let (op, opt_m6, op_fpr) = match fpu_op {
FPUOp2::Add32 => ("wfasb", "", Some("aebr")),
FPUOp2::Add64 => ("wfadb", "", Some("adbr")),
FPUOp2::Sub32 => ("wfssb", "", Some("sebr")),
FPUOp2::Sub64 => ("wfsdb", "", Some("sdbr")),
FPUOp2::Mul32 => ("wfmsb", "", Some("meebr")),
FPUOp2::Mul64 => ("wfmdb", "", Some("mdbr")),
FPUOp2::Div32 => ("wfdsb", "", Some("debr")),
FPUOp2::Div64 => ("wfddb", "", Some("ddbr")),
FPUOp2::Max32 => ("wfmaxsb", ", 1", None),
FPUOp2::Max64 => ("wfmaxdb", ", 1", None),
FPUOp2::Min32 => ("wfminsb", ", 1", None),
FPUOp2::Min64 => ("wfmindb", ", 1", None),
};
let (rd, rd_fpr) = pretty_print_fpr(rd.to_reg(), allocs);
let (rn, rn_fpr) = pretty_print_fpr(rn, allocs);
let (rm, rm_fpr) = pretty_print_fpr(rm, allocs);
if op_fpr.is_some() && rd == rn && rd_fpr.is_some() && rm_fpr.is_some() {
format!(
"{} {}, {}",
op_fpr.unwrap(),
rd_fpr.unwrap(),
rm_fpr.unwrap()
)
} else {
format!(
"{} {}, {}, {}{}",
op,
rd_fpr.unwrap_or(rd),
rn_fpr.unwrap_or(rn),
rm_fpr.unwrap_or(rm),
opt_m6
)
}
}
&Inst::FpuRRRR {
fpu_op,
rd,
rn,
rm,
ra,
} => {
let (op, op_fpr) = match fpu_op {
FPUOp3::MAdd32 => ("wfmasb", "maebr"),
FPUOp3::MAdd64 => ("wfmadb", "madbr"),
FPUOp3::MSub32 => ("wfmssb", "msebr"),
FPUOp3::MSub64 => ("wfmsdb", "msdbr"),
};
let (rd, rd_fpr) = pretty_print_fpr(rd.to_reg(), allocs);
let (rn, rn_fpr) = pretty_print_fpr(rn, allocs);
let (rm, rm_fpr) = pretty_print_fpr(rm, allocs);
let (ra, ra_fpr) = pretty_print_fpr(ra, allocs);
if rd == ra && rd_fpr.is_some() && rn_fpr.is_some() && rm_fpr.is_some() {
format!(
"{} {}, {}, {}",
op_fpr,
rd_fpr.unwrap(),
rn_fpr.unwrap(),
rm_fpr.unwrap()
)
} else {
format!(
"{} {}, {}, {}, {}",
op,
rd_fpr.unwrap_or(rd),
rn_fpr.unwrap_or(rn),
rm_fpr.unwrap_or(rm),
ra_fpr.unwrap_or(ra)
)
}
}
&Inst::FpuCmp32 { rn, rm } => {
let rn = pretty_print_reg(rn, allocs);
let rm = pretty_print_reg(rm, allocs);
format!("cebr {}, {}", rn, rm)
let (rn, rn_fpr) = pretty_print_fpr(rn, allocs);
let (rm, rm_fpr) = pretty_print_fpr(rm, allocs);
if rn_fpr.is_some() && rm_fpr.is_some() {
format!("cebr {}, {}", rn_fpr.unwrap(), rm_fpr.unwrap())
} else {
format!("wfcsb {}, {}", rn_fpr.unwrap_or(rn), rm_fpr.unwrap_or(rm))
}
}
&Inst::FpuCmp64 { rn, rm } => {
let rn = pretty_print_reg(rn, allocs);
let rm = pretty_print_reg(rm, allocs);
format!("cdbr {}, {}", rn, rm)
let (rn, rn_fpr) = pretty_print_fpr(rn, allocs);
let (rm, rm_fpr) = pretty_print_fpr(rm, allocs);
if rn_fpr.is_some() && rm_fpr.is_some() {
format!("cdbr {}, {}", rn_fpr.unwrap(), rm_fpr.unwrap())
} else {
format!("wfcdb {}, {}", rn_fpr.unwrap_or(rn), rm_fpr.unwrap_or(rm))
}
}
&Inst::LoadFpuConst32 { rd, const_data } => {
let rd = pretty_print_reg(rd.to_reg(), allocs);
let (rd, rd_fpr) = pretty_print_fpr(rd.to_reg(), allocs);
let tmp = pretty_print_reg(writable_spilltmp_reg().to_reg(), &mut empty_allocs);
format!(
"bras {}, 8 ; data.f32 {} ; le {}, 0({})",
tmp,
f32::from_bits(const_data),
rd,
tmp
)
if rd_fpr.is_some() {
format!(
"bras {}, 8 ; data.f32 {} ; le {}, 0({})",
tmp,
f32::from_bits(const_data),
rd_fpr.unwrap(),
tmp
)
} else {
format!(
"bras {}, 8 ; data.f32 {} ; vlef {}, 0({}), 0",
tmp,
f32::from_bits(const_data),
rd,
tmp
)
}
}
&Inst::LoadFpuConst64 { rd, const_data } => {
let rd = pretty_print_reg(rd.to_reg(), allocs);
let (rd, rd_fpr) = pretty_print_fpr(rd.to_reg(), allocs);
let tmp = pretty_print_reg(writable_spilltmp_reg().to_reg(), &mut empty_allocs);
format!(
"bras {}, 12 ; data.f64 {} ; ld {}, 0({})",
tmp,
f64::from_bits(const_data),
rd,
tmp
)
}
&Inst::FpuToInt { op, rd, rn } => {
let op = match op {
FpuToIntOp::F32ToI32 => "cfebra",
FpuToIntOp::F32ToU32 => "clfebr",
FpuToIntOp::F32ToI64 => "cgebra",
FpuToIntOp::F32ToU64 => "clgebr",
FpuToIntOp::F64ToI32 => "cfdbra",
FpuToIntOp::F64ToU32 => "clfdbr",
FpuToIntOp::F64ToI64 => "cgdbra",
FpuToIntOp::F64ToU64 => "clgdbr",
};
let rd = pretty_print_reg(rd.to_reg(), allocs);
let rn = pretty_print_reg(rn, allocs);
format!("{} {}, 5, {}, 0", op, rd, rn)
if rd_fpr.is_some() {
format!(
"bras {}, 12 ; data.f64 {} ; ld {}, 0({})",
tmp,
f64::from_bits(const_data),
rd_fpr.unwrap(),
tmp
)
} else {
format!(
"bras {}, 12 ; data.f64 {} ; vleg {}, 0({}), 0",
tmp,
f64::from_bits(const_data),
rd,
tmp
)
}
}
&Inst::IntToFpu { op, rd, rn } => {
let op = match op {
IntToFpuOp::I32ToF32 => "cefbra",
IntToFpuOp::U32ToF32 => "celfbr",
IntToFpuOp::I64ToF32 => "cegbra",
IntToFpuOp::U64ToF32 => "celgbr",
IntToFpuOp::I32ToF64 => "cdfbra",
IntToFpuOp::U32ToF64 => "cdlfbr",
IntToFpuOp::I64ToF64 => "cdgbra",
IntToFpuOp::U64ToF64 => "cdlgbr",
};
let rd = pretty_print_reg(rd.to_reg(), allocs);
let rn = pretty_print_reg(rn, allocs);
format!("{} {}, 0, {}, 0", op, rd, rn)
}
&Inst::FpuRound { op, rd, rn } => {
let (op, m3) = match op {
FpuRoundMode::Minus32 => ("fiebr", 7),
FpuRoundMode::Minus64 => ("fidbr", 7),
FpuRoundMode::Plus32 => ("fiebr", 6),
FpuRoundMode::Plus64 => ("fidbr", 6),
FpuRoundMode::Zero32 => ("fiebr", 5),
FpuRoundMode::Zero64 => ("fidbr", 5),
FpuRoundMode::Nearest32 => ("fiebr", 4),
FpuRoundMode::Nearest64 => ("fidbr", 4),
&Inst::FpuRound { op, mode, rd, rn } => {
let mode = match mode {
FpuRoundMode::Current => 0,
FpuRoundMode::ToNearest => 1,
FpuRoundMode::ShorterPrecision => 3,
FpuRoundMode::ToNearestTiesToEven => 4,
FpuRoundMode::ToZero => 5,
FpuRoundMode::ToPosInfinity => 6,
FpuRoundMode::ToNegInfinity => 7,
};
let rd = pretty_print_reg(rd.to_reg(), allocs);
let rn = pretty_print_reg(rn, allocs);
format!("{} {}, {}, {}", op, rd, rn, m3)
}
&Inst::FpuVecRRR { fpu_op, rd, rn, rm } => {
let op = match fpu_op {
FPUOp2::Max32 => "wfmaxsb",
FPUOp2::Max64 => "wfmaxdb",
FPUOp2::Min32 => "wfminsb",
FPUOp2::Min64 => "wfmindb",
_ => unimplemented!(),
let (opcode, opcode_fpr) = match op {
FpuRoundOp::Cvt64To32 => ("wledb", Some("ledbra")),
FpuRoundOp::Round32 => ("wfisb", Some("fiebr")),
FpuRoundOp::Round64 => ("wfidb", Some("fidbr")),
FpuRoundOp::ToSInt32 => ("wcfeb", None),
FpuRoundOp::ToSInt64 => ("wcgdb", None),
FpuRoundOp::ToUInt32 => ("wclfeb", None),
FpuRoundOp::ToUInt64 => ("wclgdb", None),
FpuRoundOp::FromSInt32 => ("wcefb", None),
FpuRoundOp::FromSInt64 => ("wcdgb", None),
FpuRoundOp::FromUInt32 => ("wcelfb", None),
FpuRoundOp::FromUInt64 => ("wcdlgb", None),
};
let (rd, rd_fpr) = pretty_print_fpr(rd.to_reg(), allocs);
let (rn, rn_fpr) = pretty_print_fpr(rn, allocs);
if opcode_fpr.is_some() && rd_fpr.is_some() && rn_fpr.is_some() {
format!(
"{} {}, {}, {}",
opcode_fpr.unwrap(),
rd_fpr.unwrap(),
rn_fpr.unwrap(),
mode
)
} else {
format!(
"{} {}, {}, 0, {}",
opcode,
rd_fpr.unwrap_or(rd),
rn_fpr.unwrap_or(rn),
mode
)
}
}
&Inst::VecSelect { rd, rn, rm, ra } => {
let rd = pretty_print_reg(rd.to_reg(), allocs);
let rn = pretty_print_reg(rn, allocs);
let rm = pretty_print_reg(rm, allocs);
format!("{} {}, {}, {}, 1", op, rd, rn, rm)
let ra = pretty_print_reg(ra, allocs);
format!("vsel {}, {}, {}, {}", rd, rn, rm, ra)
}
&Inst::Extend {
rd,

87
cranelift/codegen/src/isa/s390x/inst/regs.rs

@ -27,21 +27,28 @@ pub fn writable_gpr(num: u8) -> Writable<Reg> {
Writable::from_reg(gpr(num))
}
/// Get a reference to a FPR (floating-point register).
pub fn fpr(num: u8) -> Reg {
let preg = fpr_preg(num);
/// Get a reference to a VR (vector register).
pub fn vr(num: u8) -> Reg {
let preg = vr_preg(num);
Reg::from(VReg::new(preg.index(), RegClass::Float))
}
pub(crate) const fn fpr_preg(num: u8) -> PReg {
assert!(num < 16);
pub(crate) const fn vr_preg(num: u8) -> PReg {
assert!(num < 32);
PReg::new(num as usize, RegClass::Float)
}
/// Get a writable reference to a FPR.
/// Get a writable reference to a VR.
#[allow(dead_code)] // used by tests.
pub fn writable_fpr(num: u8) -> Writable<Reg> {
Writable::from_reg(fpr(num))
pub fn writable_vr(num: u8) -> Writable<Reg> {
Writable::from_reg(vr(num))
}
/// Test whether a vector register is overlapping an FPR.
pub fn is_fpr(r: Reg) -> bool {
let r = r.to_real_reg().unwrap();
assert!(r.class() == RegClass::Float);
return r.hw_enc() < 16;
}
/// Get a reference to the stack-pointer register.
@ -92,14 +99,30 @@ pub fn create_machine_env(_flags: &settings::Flags) -> MachineEnv {
preg(gpr(5)),
],
vec![
preg(fpr(0)),
preg(fpr(1)),
preg(fpr(2)),
preg(fpr(3)),
preg(fpr(4)),
preg(fpr(5)),
preg(fpr(6)),
preg(fpr(7)),
preg(vr(0)),
preg(vr(1)),
preg(vr(2)),
preg(vr(3)),
preg(vr(4)),
preg(vr(5)),
preg(vr(6)),
preg(vr(7)),
preg(vr(16)),
preg(vr(17)),
preg(vr(18)),
preg(vr(19)),
preg(vr(20)),
preg(vr(21)),
preg(vr(22)),
preg(vr(23)),
preg(vr(24)),
preg(vr(25)),
preg(vr(26)),
preg(vr(27)),
preg(vr(28)),
preg(vr(29)),
preg(vr(30)),
preg(vr(31)),
],
],
non_preferred_regs_by_class: [
@ -116,14 +139,14 @@ pub fn create_machine_env(_flags: &settings::Flags) -> MachineEnv {
// no r15; it is the stack pointer.
],
vec![
preg(fpr(8)),
preg(fpr(9)),
preg(fpr(10)),
preg(fpr(11)),
preg(fpr(12)),
preg(fpr(13)),
preg(fpr(14)),
preg(fpr(15)),
preg(vr(8)),
preg(vr(9)),
preg(vr(10)),
preg(vr(11)),
preg(vr(12)),
preg(vr(13)),
preg(vr(14)),
preg(vr(15)),
],
],
fixed_stack_slots: vec![],
@ -134,14 +157,28 @@ pub fn show_reg(reg: Reg) -> String {
if let Some(rreg) = reg.to_real_reg() {
match rreg.class() {
RegClass::Int => format!("%r{}", rreg.hw_enc()),
RegClass::Float => format!("%f{}", rreg.hw_enc()),
RegClass::Float => format!("%v{}", rreg.hw_enc()),
}
} else {
format!("%{:?}", reg)
}
}
pub fn maybe_show_fpr(reg: Reg) -> Option<String> {
if let Some(rreg) = reg.to_real_reg() {
if is_fpr(reg) {
return Some(format!("%f{}", rreg.hw_enc()));
}
}
None
}
pub fn pretty_print_reg(reg: Reg, allocs: &mut AllocationConsumer<'_>) -> String {
let reg = allocs.next(reg);
show_reg(reg)
}
pub fn pretty_print_fpr(reg: Reg, allocs: &mut AllocationConsumer<'_>) -> (String, Option<String>) {
let reg = allocs.next(reg);
(show_reg(reg), maybe_show_fpr(reg))
}

20
cranelift/codegen/src/isa/s390x/inst/unwind/systemv.rs

@ -45,7 +45,7 @@ pub fn map_reg(reg: Reg) -> Result<Register, RegisterMappingError> {
Register(14),
Register(15),
];
const FPR_MAP: [gimli::Register; 16] = [
const VR_MAP: [gimli::Register; 32] = [
Register(16),
Register(20),
Register(17),
@ -62,11 +62,27 @@ pub fn map_reg(reg: Reg) -> Result<Register, RegisterMappingError> {
Register(30),
Register(27),
Register(31),
Register(68),
Register(72),
Register(69),
Register(73),
Register(70),
Register(74),
Register(71),
Register(75),
Register(76),
Register(80),
Register(77),
Register(81),
Register(78),
Register(82),
Register(79),
Register(83),
];
match reg.class() {
RegClass::Int => Ok(GPR_MAP[reg.to_real_reg().unwrap().hw_enc() as usize]),
RegClass::Float => Ok(FPR_MAP[reg.to_real_reg().unwrap().hw_enc() as usize]),
RegClass::Float => Ok(VR_MAP[reg.to_real_reg().unwrap().hw_enc() as usize]),
}
}

166
cranelift/codegen/src/isa/s390x/lower.isle

@ -963,8 +963,10 @@
;;;; Rules for `fcopysign` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; Copysign of two registers.
(rule (lower (has_type ty (fcopysign x y)))
(fpu_copysign ty x y))
(rule (lower (has_type $F32 (fcopysign x y)))
(vec_select $F32 x y (imm $F32 2147483647)))
(rule (lower (has_type $F64 (fcopysign x y)))
(vec_select $F64 x y (imm $F64 9223372036854775807)))
;;;; Rules for `fma` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
@ -1034,120 +1036,148 @@
;; Demote a register.
(rule (lower (has_type dst_ty (fdemote x @ (value_type src_ty))))
(fdemote_reg dst_ty src_ty x))
(fdemote_reg dst_ty src_ty (FpuRoundMode.Current) x))
;;;; Rules for `fcvt_from_uint` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; Convert an unsigned integer value in a register to floating-point.
(rule (lower (has_type dst_ty (fcvt_from_uint x @ (value_type src_ty))))
(fcvt_from_uint_reg dst_ty (ty_ext32 src_ty)
(put_in_reg_zext32 x)))
;; Convert a 32-bit or smaller unsigned integer to $F32 (z15 instruction).
(rule (lower (has_type $F32
(fcvt_from_uint x @ (value_type (and (vxrs_ext2_enabled) (fits_in_32 ty))))))
(fcvt_from_uint_reg $F32 (FpuRoundMode.ToNearestTiesToEven)
(mov_to_fpr32 (put_in_reg_zext32 x))))
;; Convert a 64-bit or smaller unsigned integer to $F32, via an intermediate $F64.
(rule (lower (has_type $F32 (fcvt_from_uint x @ (value_type (fits_in_64 ty)))))
(fdemote_reg $F32 $F64 (FpuRoundMode.ToNearestTiesToEven)
(fcvt_from_uint_reg $F64 (FpuRoundMode.ShorterPrecision)
(mov_to_fpr64 (put_in_reg_zext64 x)))))
;; Convert a 64-bit or smaller unsigned integer to $F64.
(rule (lower (has_type $F64 (fcvt_from_uint x @ (value_type (fits_in_64 ty)))))
(fcvt_from_uint_reg $F64 (FpuRoundMode.ToNearestTiesToEven)
(mov_to_fpr64 (put_in_reg_zext64 x))))
;;;; Rules for `fcvt_from_sint` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; Convert a signed integer value in a register to floating-point.
(rule (lower (has_type dst_ty (fcvt_from_sint x @ (value_type src_ty))))
(fcvt_from_sint_reg dst_ty (ty_ext32 src_ty)
(put_in_reg_sext32 x)))
;; Convert a 32-bit or smaller signed integer to $F32 (z15 instruction).
(rule (lower (has_type $F32
(fcvt_from_sint x @ (value_type (and (vxrs_ext2_enabled) (fits_in_32 ty))))))
(fcvt_from_sint_reg $F32 (FpuRoundMode.ToNearestTiesToEven)
(mov_to_fpr32 (put_in_reg_sext32 x))))
;; Convert a 64-bit or smaller signed integer to $F32, via an intermediate $F64.
(rule (lower (has_type $F32 (fcvt_from_sint x @ (value_type (fits_in_64 ty)))))
(fdemote_reg $F32 $F64 (FpuRoundMode.ToNearestTiesToEven)
(fcvt_from_sint_reg $F64 (FpuRoundMode.ShorterPrecision)
(mov_to_fpr64 (put_in_reg_sext64 x)))))
;; Convert a 64-bit or smaller signed integer to $F64.
(rule (lower (has_type $F64 (fcvt_from_sint x @ (value_type (fits_in_64 ty)))))
(fcvt_from_sint_reg $F64 (FpuRoundMode.ToNearestTiesToEven)
(mov_to_fpr64 (put_in_reg_sext64 x))))
;;;; Rules for `fcvt_to_uint` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; Convert a floating-point value in a register to an unsigned integer value.
;; Traps if the input cannot be represented in the output type.
;; FIXME: Add support for 8-/16-bit destination types (needs overflow check).
(rule (lower (has_type (ty_32_or_64 dst_ty) (fcvt_to_uint x @ (value_type src_ty))))
(let ((src Reg x)
(rule (lower (has_type dst_ty (fcvt_to_uint x @ (value_type src_ty))))
(let ((src Reg (put_in_reg x))
;; First, check whether the input is a NaN, and trap if so.
(_ Reg (trap_if (fcmp_reg src_ty src src)
(floatcc_as_cond (FloatCC.Unordered))
(trap_code_bad_conversion_to_integer)))
;; Perform the conversion. If this sets CC 3, we have a
;; "special case". Since we already exluded the case where
;; the input was a NaN, the only other option is that the
;; conversion overflowed the target type.
(dst Reg (trap_if (fcvt_to_uint_reg_with_flags dst_ty src_ty src)
(floatcc_as_cond (FloatCC.Unordered))
(trap_code_integer_overflow))))
dst))
(_1 Reg (trap_if (fcmp_reg src_ty src src)
(floatcc_as_cond (FloatCC.Unordered))
(trap_code_bad_conversion_to_integer)))
;; Now check whether the input is out of range for the target type.
(_2 Reg (trap_if (fcmp_reg src_ty src (fcvt_to_uint_ub src_ty dst_ty))
(floatcc_as_cond (FloatCC.GreaterThanOrEqual))
(trap_code_integer_overflow)))
(_3 Reg (trap_if (fcmp_reg src_ty src (fcvt_to_uint_lb src_ty))
(floatcc_as_cond (FloatCC.LessThanOrEqual))
(trap_code_integer_overflow)))
;; Perform the conversion using the larger type size.
(flt_ty Type (fcvt_flt_ty dst_ty src_ty))
(src_ext Reg (fpromote_reg flt_ty src_ty src)))
(fcvt_to_uint_reg flt_ty (FpuRoundMode.ToZero) src_ext)))
;;;; Rules for `fcvt_to_sint` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; Convert a floating-point value in a register to a signed integer value.
;; Traps if the input cannot be represented in the output type.
;; FIXME: Add support for 8-/16-bit destination types (needs overflow check).
(rule (lower (has_type (ty_32_or_64 dst_ty) (fcvt_to_sint x @ (value_type src_ty))))
(let ((src Reg x)
(rule (lower (has_type dst_ty (fcvt_to_sint x @ (value_type src_ty))))
(let ((src Reg (put_in_reg x))
;; First, check whether the input is a NaN, and trap if so.
(_ Reg (trap_if (fcmp_reg src_ty src src)
(floatcc_as_cond (FloatCC.Unordered))
(trap_code_bad_conversion_to_integer)))
;; Perform the conversion. If this sets CC 3, we have a
;; "special case". Since we already exluded the case where
;; the input was a NaN, the only other option is that the
;; conversion overflowed the target type.
(dst Reg (trap_if (fcvt_to_sint_reg_with_flags dst_ty src_ty src)
(floatcc_as_cond (FloatCC.Unordered))
(trap_code_integer_overflow))))
dst))
(_1 Reg (trap_if (fcmp_reg src_ty src src)
(floatcc_as_cond (FloatCC.Unordered))
(trap_code_bad_conversion_to_integer)))
;; Now check whether the input is out of range for the target type.
(_2 Reg (trap_if (fcmp_reg src_ty src (fcvt_to_sint_ub src_ty dst_ty))
(floatcc_as_cond (FloatCC.GreaterThanOrEqual))
(trap_code_integer_overflow)))
(_3 Reg (trap_if (fcmp_reg src_ty src (fcvt_to_sint_lb src_ty dst_ty))
(floatcc_as_cond (FloatCC.LessThanOrEqual))
(trap_code_integer_overflow)))
;; Perform the conversion using the larger type size.
(flt_ty Type (fcvt_flt_ty dst_ty src_ty))
(src_ext Reg (fpromote_reg flt_ty src_ty src)))
;; Perform the conversion.
(fcvt_to_sint_reg flt_ty (FpuRoundMode.ToZero) src_ext)))
;;;; Rules for `fcvt_to_uint_sat` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; Convert a floating-point value in a register to an unsigned integer value.
;; FIXME: Add support for 8-/16-bit destination types (needs overflow check).
(rule (lower (has_type (ty_32_or_64 dst_ty) (fcvt_to_uint_sat x @ (value_type src_ty))))
(let ((src Reg x)
(dst Reg (fcvt_to_uint_reg dst_ty src_ty src))
;; In most special cases, the Z instruction already yields the
;; result expected by Cranelift semantics. The only exception
;; it the case where the input was a NaN. We explicitly check
;; for that and force the output to 0 in that case.
(sat Reg (with_flags_reg (fcmp_reg src_ty src src)
(cmov_imm dst_ty
(floatcc_as_cond (FloatCC.Unordered)) 0 dst))))
sat))
(rule (lower (has_type dst_ty (fcvt_to_uint_sat x @ (value_type src_ty))))
(let ((src Reg (put_in_reg x))
;; Perform the conversion using the larger type size.
(flt_ty Type (fcvt_flt_ty dst_ty src_ty))
(int_ty Type (fcvt_int_ty dst_ty src_ty))
(src_ext Reg (fpromote_reg flt_ty src_ty src))
(dst Reg (fcvt_to_uint_reg flt_ty (FpuRoundMode.ToZero) src_ext)))
;; Clamp the output to the destination type bounds.
(uint_sat_reg dst_ty int_ty dst)))
;;;; Rules for `fcvt_to_sint_sat` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; Convert a floating-point value in a register to a signed integer value.
;; FIXME: Add support for 8-/16-bit destination types (needs overflow check).
(rule (lower (has_type (ty_32_or_64 dst_ty) (fcvt_to_sint_sat x @ (value_type src_ty))))
(let ((src Reg x)
(dst Reg (fcvt_to_sint_reg dst_ty src_ty src))
(rule (lower (has_type dst_ty (fcvt_to_sint_sat x @ (value_type src_ty))))
(let ((src Reg (put_in_reg x))
;; Perform the conversion using the larger type size.
(flt_ty Type (fcvt_flt_ty dst_ty src_ty))
(int_ty Type (fcvt_int_ty dst_ty src_ty))
(src_ext Reg (fpromote_reg flt_ty src_ty src))
(dst Reg (fcvt_to_sint_reg flt_ty (FpuRoundMode.ToZero) src_ext))
;; In most special cases, the Z instruction already yields the
;; result expected by Cranelift semantics. The only exception
;; it the case where the input was a NaN. We explicitly check
;; for that and force the output to 0 in that case.
(sat Reg (with_flags_reg (fcmp_reg src_ty src src)
(cmov_imm dst_ty
(floatcc_as_cond (FloatCC.Unordered)) 0 dst))))
sat))
(cmov_imm int_ty
(floatcc_as_cond (FloatCC.Unordered)) 0 dst))))
;; Clamp the output to the destination type bounds.
(sint_sat_reg dst_ty int_ty sat)))
;;;; Rules for `bitcast` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; Reinterpret a 64-bit integer value as floating-point.
(rule (lower (has_type $F64 (bitcast x @ (value_type $I64))))
(mov_to_fpr x))
(mov_to_fpr64 x))
;; Reinterpret a 64-bit floating-point value as integer.
(rule (lower (has_type $I64 (bitcast x @ (value_type $F64))))
(mov_from_fpr x))
(mov_from_fpr64 x))
;; Reinterpret a 32-bit integer value as floating-point (via $I64).
;; Note that a 32-bit float is located in the high bits of the GPR.
(rule (lower (has_type $F32 (bitcast x @ (value_type $I32))))
(mov_to_fpr (lshl_imm $I64 x 32)))
(mov_to_fpr32 x))
;; Reinterpret a 32-bit floating-point value as integer (via $I64).
;; Note that a 32-bit float is located in the high bits of the GPR.
(rule (lower (has_type $I32 (bitcast x @ (value_type $F32))))
(lshr_imm $I64 (mov_from_fpr x) 32))
(mov_from_fpr32 x))
;;;; Rules for `stack_addr` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
@ -1232,7 +1262,7 @@
(rule (lower (has_type (and (vxrs_ext2_disabled) $F32)
(load flags @ (littleendian) addr offset)))
(let ((gpr Reg (loadrev32 (lower_address flags addr offset))))
(mov_to_fpr (lshl_imm $I64 gpr 32))))
(mov_to_fpr32 gpr)))
;; Load 64-bit big-endian floating-point values.
(rule (lower (has_type $F64 (load flags @ (bigendian) addr offset)))
@ -1247,7 +1277,7 @@
(rule (lower (has_type (and (vxrs_ext2_disabled) $F64)
(load flags @ (littleendian) addr offset)))
(let ((gpr Reg (loadrev64 (lower_address flags addr offset))))
(mov_to_fpr gpr)))
(mov_to_fpr64 gpr)))
;;;; Rules for `uload8` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
@ -1390,7 +1420,7 @@
;; Store 32-bit little-endian floating-point type (via GPR on z14).
(rule (lower (store flags @ (littleendian)
val @ (value_type (and $F32 (vxrs_ext2_disabled))) addr offset))
(let ((gpr Reg (lshr_imm $I64 (mov_from_fpr (put_in_reg val)) 32)))
(let ((gpr Reg (mov_from_fpr32 (put_in_reg val))))
(side_effect (storerev32 gpr (lower_address flags addr offset)))))
;; Store 64-bit big-endian floating-point type.
@ -1408,7 +1438,7 @@
;; Store 64-bit little-endian floating-point type (via GPR on z14).
(rule (lower (store flags @ (littleendian)
val @ (value_type (and $F64 (vxrs_ext2_disabled))) addr offset))
(let ((gpr Reg (mov_from_fpr (put_in_reg val))))
(let ((gpr Reg (mov_from_fpr64 (put_in_reg val))))
(side_effect (storerev64 gpr (lower_address flags addr offset)))))

42
cranelift/codegen/src/isa/s390x/lower/isle.rs

@ -425,6 +425,48 @@ where
i64::from(off)
}
#[inline]
fn fcvt_to_uint_ub32(&mut self, size: u8) -> u64 {
(2.0_f32).powi(size.into()).to_bits() as u64
}
#[inline]
fn fcvt_to_uint_lb32(&mut self) -> u64 {
(-1.0_f32).to_bits() as u64
}
#[inline]
fn fcvt_to_uint_ub64(&mut self, size: u8) -> u64 {
(2.0_f64).powi(size.into()).to_bits()
}
#[inline]
fn fcvt_to_uint_lb64(&mut self) -> u64 {
(-1.0_f64).to_bits()
}
#[inline]
fn fcvt_to_sint_ub32(&mut self, size: u8) -> u64 {
(2.0_f32).powi((size - 1).into()).to_bits() as u64
}
#[inline]
fn fcvt_to_sint_lb32(&mut self, size: u8) -> u64 {
let lb = (-2.0_f32).powi((size - 1).into());
std::cmp::max(lb.to_bits() + 1, (lb - 1.0).to_bits()) as u64
}
#[inline]
fn fcvt_to_sint_ub64(&mut self, size: u8) -> u64 {
(2.0_f64).powi((size - 1).into()).to_bits()
}
#[inline]
fn fcvt_to_sint_lb64(&mut self, size: u8) -> u64 {
let lb = (-2.0_f64).powi((size - 1).into());
std::cmp::max(lb.to_bits() + 1, (lb - 1.0).to_bits())
}
#[inline]
fn littleendian(&mut self, flags: MemFlags) -> Option<()> {
let endianness = flags.endianness(Endianness::Big);

736
cranelift/filetests/filetests/isa/s390x/floating-point-arch13.clif

@ -0,0 +1,736 @@
test compile precise-output
target s390x arch13
function %fcvt_to_uint_f32_i8(f32) -> i8 {
block0(v0: f32):
v1 = fcvt_to_uint.i8 v0
return v1
}
; block0:
; cebr %f0, %f0
; jno 6 ; trap
; bras %r1, 8 ; data.f32 256 ; le %f5, 0(%r1)
; cebr %f0, %f5
; jnhe 6 ; trap
; bras %r1, 8 ; data.f32 -1 ; vlef %v17, 0(%r1), 0
; wfcsb %f0, %v17
; jnle 6 ; trap
; wclfeb %v21, %f0, 0, 5
; vlgvf %r2, %v21, 0
; br %r14
function %fcvt_to_sint_f32_i8(f32) -> i8 {
block0(v0: f32):
v1 = fcvt_to_sint.i8 v0
return v1
}
; block0:
; cebr %f0, %f0
; jno 6 ; trap
; bras %r1, 8 ; data.f32 128 ; le %f5, 0(%r1)
; cebr %f0, %f5
; jnhe 6 ; trap
; bras %r1, 8 ; data.f32 -129 ; vlef %v17, 0(%r1), 0
; wfcsb %f0, %v17
; jnle 6 ; trap
; wcfeb %v21, %f0, 0, 5
; vlgvf %r2, %v21, 0
; br %r14
function %fcvt_to_uint_f32_i16(f32) -> i16 {
block0(v0: f32):
v1 = fcvt_to_uint.i16 v0
return v1
}
; block0:
; cebr %f0, %f0
; jno 6 ; trap
; bras %r1, 8 ; data.f32 65536 ; le %f5, 0(%r1)
; cebr %f0, %f5
; jnhe 6 ; trap
; bras %r1, 8 ; data.f32 -1 ; vlef %v17, 0(%r1), 0
; wfcsb %f0, %v17
; jnle 6 ; trap
; wclfeb %v21, %f0, 0, 5
; vlgvf %r2, %v21, 0
; br %r14
function %fcvt_to_sint_f32_i16(f32) -> i16 {
block0(v0: f32):
v1 = fcvt_to_sint.i16 v0
return v1
}
; block0:
; cebr %f0, %f0
; jno 6 ; trap
; bras %r1, 8 ; data.f32 32768 ; le %f5, 0(%r1)
; cebr %f0, %f5
; jnhe 6 ; trap
; bras %r1, 8 ; data.f32 -32769 ; vlef %v17, 0(%r1), 0
; wfcsb %f0, %v17
; jnle 6 ; trap
; wcfeb %v21, %f0, 0, 5
; vlgvf %r2, %v21, 0
; br %r14
function %fcvt_to_uint_f32_i32(f32) -> i32 {
block0(v0: f32):
v1 = fcvt_to_uint.i32 v0
return v1
}
; block0:
; cebr %f0, %f0
; jno 6 ; trap
; bras %r1, 8 ; data.f32 4294967300 ; le %f5, 0(%r1)
; cebr %f0, %f5
; jnhe 6 ; trap
; bras %r1, 8 ; data.f32 -1 ; vlef %v17, 0(%r1), 0
; wfcsb %f0, %v17
; jnle 6 ; trap
; wclfeb %v21, %f0, 0, 5
; vlgvf %r2, %v21, 0
; br %r14
function %fcvt_to_sint_f32_i32(f32) -> i32 {
block0(v0: f32):
v1 = fcvt_to_sint.i32 v0
return v1
}
; block0:
; cebr %f0, %f0
; jno 6 ; trap
; bras %r1, 8 ; data.f32 2147483600 ; le %f5, 0(%r1)
; cebr %f0, %f5
; jnhe 6 ; trap
; bras %r1, 8 ; data.f32 -2147484000 ; vlef %v17, 0(%r1), 0
; wfcsb %f0, %v17
; jnle 6 ; trap
; wcfeb %v21, %f0, 0, 5
; vlgvf %r2, %v21, 0
; br %r14
function %fcvt_to_uint_f32_i64(f32) -> i64 {
block0(v0: f32):
v1 = fcvt_to_uint.i64 v0
return v1
}
; block0:
; cebr %f0, %f0
; jno 6 ; trap
; bras %r1, 8 ; data.f32 18446744000000000000 ; le %f5, 0(%r1)
; cebr %f0, %f5
; jnhe 6 ; trap
; bras %r1, 8 ; data.f32 -1 ; vlef %v17, 0(%r1), 0
; wfcsb %f0, %v17
; jnle 6 ; trap
; wldeb %v21, %f0
; wclgdb %v23, %v21, 0, 5
; vlgvg %r2, %v23, 0
; br %r14
function %fcvt_to_sint_f32_i64(f32) -> i64 {
block0(v0: f32):
v1 = fcvt_to_sint.i64 v0
return v1
}
; block0:
; cebr %f0, %f0
; jno 6 ; trap
; bras %r1, 8 ; data.f32 9223372000000000000 ; le %f5, 0(%r1)
; cebr %f0, %f5
; jnhe 6 ; trap
; bras %r1, 8 ; data.f32 -9223373000000000000 ; vlef %v17, 0(%r1), 0
; wfcsb %f0, %v17
; jnle 6 ; trap
; wldeb %v21, %f0
; wcgdb %v23, %v21, 0, 5
; vlgvg %r2, %v23, 0
; br %r14
function %fcvt_to_uint_f64_i8(f64) -> i8 {
block0(v0: f64):
v1 = fcvt_to_uint.i8 v0
return v1
}
; block0:
; cdbr %f0, %f0
; jno 6 ; trap
; bras %r1, 12 ; data.f64 256 ; ld %f5, 0(%r1)
; cdbr %f0, %f5
; jnhe 6 ; trap
; bras %r1, 12 ; data.f64 -1 ; vleg %v17, 0(%r1), 0
; wfcdb %f0, %v17
; jnle 6 ; trap
; wclgdb %v21, %f0, 0, 5
; vlgvg %r2, %v21, 0
; br %r14
function %fcvt_to_sint_f64_i8(f64) -> i8 {
block0(v0: f64):
v1 = fcvt_to_sint.i8 v0
return v1
}
; block0:
; cdbr %f0, %f0
; jno 6 ; trap
; bras %r1, 12 ; data.f64 128 ; ld %f5, 0(%r1)
; cdbr %f0, %f5
; jnhe 6 ; trap
; bras %r1, 12 ; data.f64 -129 ; vleg %v17, 0(%r1), 0
; wfcdb %f0, %v17
; jnle 6 ; trap
; wcgdb %v21, %f0, 0, 5
; vlgvg %r2, %v21, 0
; br %r14
function %fcvt_to_uint_f64_i16(f64) -> i16 {
block0(v0: f64):
v1 = fcvt_to_uint.i16 v0
return v1
}
; block0:
; cdbr %f0, %f0
; jno 6 ; trap
; bras %r1, 12 ; data.f64 65536 ; ld %f5, 0(%r1)
; cdbr %f0, %f5
; jnhe 6 ; trap
; bras %r1, 12 ; data.f64 -1 ; vleg %v17, 0(%r1), 0
; wfcdb %f0, %v17
; jnle 6 ; trap
; wclgdb %v21, %f0, 0, 5
; vlgvg %r2, %v21, 0
; br %r14
function %fcvt_to_sint_f64_i16(f64) -> i16 {
block0(v0: f64):
v1 = fcvt_to_sint.i16 v0
return v1
}
; block0:
; cdbr %f0, %f0
; jno 6 ; trap
; bras %r1, 12 ; data.f64 32768 ; ld %f5, 0(%r1)
; cdbr %f0, %f5
; jnhe 6 ; trap
; bras %r1, 12 ; data.f64 -32769 ; vleg %v17, 0(%r1), 0
; wfcdb %f0, %v17
; jnle 6 ; trap
; wcgdb %v21, %f0, 0, 5
; vlgvg %r2, %v21, 0
; br %r14
function %fcvt_to_uint_f64_i32(f64) -> i32 {
block0(v0: f64):
v1 = fcvt_to_uint.i32 v0
return v1
}
; block0:
; cdbr %f0, %f0
; jno 6 ; trap
; bras %r1, 12 ; data.f64 4294967296 ; ld %f5, 0(%r1)
; cdbr %f0, %f5
; jnhe 6 ; trap
; bras %r1, 12 ; data.f64 -1 ; vleg %v17, 0(%r1), 0
; wfcdb %f0, %v17
; jnle 6 ; trap
; wclgdb %v21, %f0, 0, 5
; vlgvg %r2, %v21, 0
; br %r14
function %fcvt_to_sint_f64_i32(f64) -> i32 {
block0(v0: f64):
v1 = fcvt_to_sint.i32 v0
return v1
}
; block0:
; cdbr %f0, %f0
; jno 6 ; trap
; bras %r1, 12 ; data.f64 2147483648 ; ld %f5, 0(%r1)
; cdbr %f0, %f5
; jnhe 6 ; trap
; bras %r1, 12 ; data.f64 -2147483649 ; vleg %v17, 0(%r1), 0
; wfcdb %f0, %v17
; jnle 6 ; trap
; wcgdb %v21, %f0, 0, 5
; vlgvg %r2, %v21, 0
; br %r14
function %fcvt_to_uint_f64_i64(f64) -> i64 {
block0(v0: f64):
v1 = fcvt_to_uint.i64 v0
return v1
}
; block0:
; cdbr %f0, %f0
; jno 6 ; trap
; bras %r1, 12 ; data.f64 18446744073709552000 ; ld %f5, 0(%r1)
; cdbr %f0, %f5
; jnhe 6 ; trap
; bras %r1, 12 ; data.f64 -1 ; vleg %v17, 0(%r1), 0
; wfcdb %f0, %v17
; jnle 6 ; trap
; wclgdb %v21, %f0, 0, 5
; vlgvg %r2, %v21, 0
; br %r14
function %fcvt_to_sint_f64_i64(f64) -> i64 {
block0(v0: f64):
v1 = fcvt_to_sint.i64 v0
return v1
}
; block0:
; cdbr %f0, %f0
; jno 6 ; trap
; bras %r1, 12 ; data.f64 9223372036854776000 ; ld %f5, 0(%r1)
; cdbr %f0, %f5
; jnhe 6 ; trap
; bras %r1, 12 ; data.f64 -9223372036854778000 ; vleg %v17, 0(%r1), 0
; wfcdb %f0, %v17
; jnle 6 ; trap
; wcgdb %v21, %f0, 0, 5
; vlgvg %r2, %v21, 0
; br %r14
function %fcvt_from_uint_i8_f32(i8) -> f32 {
block0(v0: i8):
v1 = fcvt_from_uint.f32 v0
return v1
}
; block0:
; llcr %r5, %r2
; vlvgf %v5, %r5, 0
; wcelfb %f0, %f5, 0, 4
; br %r14
function %fcvt_from_sint_i8_f32(i8) -> f32 {
block0(v0: i8):
v1 = fcvt_from_sint.f32 v0
return v1
}
; block0:
; lbr %r5, %r2
; vlvgf %v5, %r5, 0
; wcefb %f0, %f5, 0, 4
; br %r14
function %fcvt_from_uint_i16_f32(i16) -> f32 {
block0(v0: i16):
v1 = fcvt_from_uint.f32 v0
return v1
}
; block0:
; llhr %r5, %r2
; vlvgf %v5, %r5, 0
; wcelfb %f0, %f5, 0, 4
; br %r14
function %fcvt_from_sint_i16_f32(i16) -> f32 {
block0(v0: i16):
v1 = fcvt_from_sint.f32 v0
return v1
}
; block0:
; lhr %r5, %r2
; vlvgf %v5, %r5, 0
; wcefb %f0, %f5, 0, 4
; br %r14
function %fcvt_from_uint_i32_f32(i32) -> f32 {
block0(v0: i32):
v1 = fcvt_from_uint.f32 v0
return v1
}
; block0:
; vlvgf %v3, %r2, 0
; wcelfb %f0, %f3, 0, 4
; br %r14
function %fcvt_from_sint_i32_f32(i32) -> f32 {
block0(v0: i32):
v1 = fcvt_from_sint.f32 v0
return v1
}
; block0:
; vlvgf %v3, %r2, 0
; wcefb %f0, %f3, 0, 4
; br %r14
function %fcvt_from_uint_i64_f32(i64) -> f32 {
block0(v0: i64):
v1 = fcvt_from_uint.f32 v0
return v1
}
; block0:
; ldgr %f3, %r2
; wcdlgb %f5, %f3, 0, 3
; ledbra %f0, %f5, 4
; br %r14
function %fcvt_from_sint_i64_f32(i64) -> f32 {
block0(v0: i64):
v1 = fcvt_from_sint.f32 v0
return v1
}
; block0:
; ldgr %f3, %r2
; wcdgb %f5, %f3, 0, 3
; ledbra %f0, %f5, 4
; br %r14
function %fcvt_from_uint_i8_f64(i8) -> f64 {
block0(v0: i8):
v1 = fcvt_from_uint.f64 v0
return v1
}
; block0:
; llgcr %r5, %r2
; ldgr %f5, %r5
; wcdlgb %f0, %f5, 0, 4
; br %r14
function %fcvt_from_sint_i8_f64(i8) -> f64 {
block0(v0: i8):
v1 = fcvt_from_sint.f64 v0
return v1
}
; block0:
; lgbr %r5, %r2
; ldgr %f5, %r5
; wcdgb %f0, %f5, 0, 4
; br %r14
function %fcvt_from_uint_i16_f64(i16) -> f64 {
block0(v0: i16):
v1 = fcvt_from_uint.f64 v0
return v1
}
; block0:
; llghr %r5, %r2
; ldgr %f5, %r5
; wcdlgb %f0, %f5, 0, 4
; br %r14
function %fcvt_from_sint_i16_f64(i16) -> f64 {
block0(v0: i16):
v1 = fcvt_from_sint.f64 v0
return v1
}
; block0:
; lghr %r5, %r2
; ldgr %f5, %r5
; wcdgb %f0, %f5, 0, 4
; br %r14
function %fcvt_from_uint_i32_f64(i32) -> f64 {
block0(v0: i32):
v1 = fcvt_from_uint.f64 v0
return v1
}
; block0:
; llgfr %r5, %r2
; ldgr %f5, %r5
; wcdlgb %f0, %f5, 0, 4
; br %r14
function %fcvt_from_sint_i32_f64(i32) -> f64 {
block0(v0: i32):
v1 = fcvt_from_sint.f64 v0
return v1
}
; block0:
; lgfr %r5, %r2
; ldgr %f5, %r5
; wcdgb %f0, %f5, 0, 4
; br %r14
function %fcvt_from_uint_i64_f64(i64) -> f64 {
block0(v0: i64):
v1 = fcvt_from_uint.f64 v0
return v1
}
; block0:
; ldgr %f3, %r2
; wcdlgb %f0, %f3, 0, 4
; br %r14
function %fcvt_from_sint_i64_f64(i64) -> f64 {
block0(v0: i64):
v1 = fcvt_from_sint.f64 v0
return v1
}
; block0:
; ldgr %f3, %r2
; wcdgb %f0, %f3, 0, 4
; br %r14
function %fcvt_to_uint_sat_f32_i8(f32) -> i8 {
block0(v0: f32):
v1 = fcvt_to_uint_sat.i8 v0
return v1
}
; block0:
; wclfeb %f3, %f0, 0, 5
; vlgvf %r3, %v3, 0
; lgr %r2, %r3
; clfi %r3, 256
; lochih %r2, 255
; br %r14
function %fcvt_to_sint_sat_f32_i8(f32) -> i8 {
block0(v0: f32):
v1 = fcvt_to_sint_sat.i8 v0
return v1
}
; block0:
; wcfeb %f3, %f0, 0, 5
; vlgvf %r3, %v3, 0
; cebr %f0, %f0
; lochio %r3, 0
; lgr %r4, %r3
; chi %r3, 127
; lochih %r4, 127
; lgr %r2, %r4
; chi %r4, -128
; lochil %r2, -128
; br %r14
function %fcvt_to_uint_sat_f32_i16(f32) -> i16 {
block0(v0: f32):
v1 = fcvt_to_uint_sat.i16 v0
return v1
}
; block0:
; wclfeb %f3, %f0, 0, 5
; vlgvf %r3, %v3, 0
; lgr %r2, %r3
; clfi %r3, 65535
; lochih %r2, -1
; br %r14
function %fcvt_to_sint_sat_f32_i16(f32) -> i16 {
block0(v0: f32):
v1 = fcvt_to_sint_sat.i16 v0
return v1
}
; block0:
; wcfeb %f3, %f0, 0, 5
; vlgvf %r3, %v3, 0
; cebr %f0, %f0
; lochio %r3, 0
; lgr %r4, %r3
; chi %r3, 32767
; lochih %r4, 32767
; lgr %r2, %r4
; chi %r4, -32768
; lochil %r2, -32768
; br %r14
function %fcvt_to_uint_sat_f32_i32(f32) -> i32 {
block0(v0: f32):
v1 = fcvt_to_uint_sat.i32 v0
return v1
}
; block0:
; wclfeb %f3, %f0, 0, 5
; vlgvf %r2, %v3, 0
; br %r14
function %fcvt_to_sint_sat_f32_i32(f32) -> i32 {
block0(v0: f32):
v1 = fcvt_to_sint_sat.i32 v0
return v1
}
; block0:
; wcfeb %f3, %f0, 0, 5
; vlgvf %r2, %v3, 0
; cebr %f0, %f0
; lochio %r2, 0
; br %r14
function %fcvt_to_uint_sat_f32_i64(f32) -> i64 {
block0(v0: f32):
v1 = fcvt_to_uint_sat.i64 v0
return v1
}
; block0:
; ldebr %f3, %f0
; wclgdb %f5, %f3, 0, 5
; lgdr %r2, %f5
; br %r14
function %fcvt_to_sint_sat_f32_i64(f32) -> i64 {
block0(v0: f32):
v1 = fcvt_to_sint_sat.i64 v0
return v1
}
; block0:
; ldebr %f3, %f0
; wcgdb %f5, %f3, 0, 5
; lgdr %r2, %f5
; cebr %f0, %f0
; locghio %r2, 0
; br %r14
function %fcvt_to_uint_sat_f64_i8(f64) -> i8 {
block0(v0: f64):
v1 = fcvt_to_uint_sat.i8 v0
return v1
}
; block0:
; wclgdb %f3, %f0, 0, 5
; lgdr %r3, %f3
; lgr %r2, %r3
; clgfi %r3, 256
; locghih %r2, 255
; br %r14
function %fcvt_to_sint_sat_f64_i8(f64) -> i8 {
block0(v0: f64):
v1 = fcvt_to_sint_sat.i8 v0
return v1
}
; block0:
; wcgdb %f3, %f0, 0, 5
; lgdr %r3, %f3
; cdbr %f0, %f0
; locghio %r3, 0
; lgr %r4, %r3
; cghi %r3, 127
; locghih %r4, 127
; lgr %r2, %r4
; cghi %r4, -128
; locghil %r2, -128
; br %r14
function %fcvt_to_uint_sat_f64_i16(f64) -> i16 {
block0(v0: f64):
v1 = fcvt_to_uint_sat.i16 v0
return v1
}
; block0:
; wclgdb %f3, %f0, 0, 5
; lgdr %r3, %f3
; lgr %r2, %r3
; clgfi %r3, 65535
; locghih %r2, -1
; br %r14
function %fcvt_to_sint_sat_f64_i16(f64) -> i16 {
block0(v0: f64):
v1 = fcvt_to_sint_sat.i16 v0
return v1
}
; block0:
; wcgdb %f3, %f0, 0, 5
; lgdr %r3, %f3
; cdbr %f0, %f0
; locghio %r3, 0
; lgr %r4, %r3
; cghi %r3, 32767
; locghih %r4, 32767
; lgr %r2, %r4
; cghi %r4, -32768
; locghil %r2, -32768
; br %r14
function %fcvt_to_uint_sat_f64_i32(f64) -> i32 {
block0(v0: f64):
v1 = fcvt_to_uint_sat.i32 v0
return v1
}
; block0:
; wclgdb %f3, %f0, 0, 5
; lgdr %r2, %f3
; llilf %r5, 4294967295
; clgr %r2, %r5
; locgrh %r2, %r5
; br %r14
function %fcvt_to_sint_sat_f64_i32(f64) -> i32 {
block0(v0: f64):
v1 = fcvt_to_sint_sat.i32 v0
return v1
}
; block0:
; wcgdb %f3, %f0, 0, 5
; lgdr %r2, %f3
; cdbr %f0, %f0
; locghio %r2, 0
; lgfi %r5, 2147483647
; cgr %r2, %r5
; locgrh %r2, %r5
; lgfi %r3, -2147483648
; cgr %r2, %r3
; locgrl %r2, %r3
; br %r14
function %fcvt_to_uint_sat_f64_i64(f64) -> i64 {
block0(v0: f64):
v1 = fcvt_to_uint_sat.i64 v0
return v1
}
; block0:
; wclgdb %f3, %f0, 0, 5
; lgdr %r2, %f3
; br %r14
function %fcvt_to_sint_sat_f64_i64(f64) -> i64 {
block0(v0: f64):
v1 = fcvt_to_sint_sat.i64 v0
return v1
}
; block0:
; wcgdb %f3, %f0, 0, 5
; lgdr %r2, %f3
; cdbr %f0, %f0
; locghio %r2, 0
; br %r14

586
cranelift/filetests/filetests/isa/s390x/floating-point.clif

@ -245,7 +245,7 @@ block0(v0: f64):
}
; block0:
; ledbr %f0, %f0
; ledbra %f0, %f0, 0
; br %r14
function %ceil_f32(f32) -> f32 {
@ -335,9 +335,7 @@ block0(v0: f32, v1: f32, v2: f32):
}
; block0:
; ldr %f1, %f0
; ldr %f0, %f4
; maebr %f0, %f1, %f2
; wfmasb %f0, %f0, %f2, %f4
; br %r14
function %fma_f64(f64, f64, f64) -> f64 {
@ -347,9 +345,7 @@ block0(v0: f64, v1: f64, v2: f64):
}
; block0:
; ldr %f1, %f0
; ldr %f0, %f4
; madbr %f0, %f1, %f2
; wfmadb %f0, %f0, %f2, %f4
; br %r14
function %fcopysign_f32(f32, f32) -> f32 {
@ -359,7 +355,8 @@ block0(v0: f32, v1: f32):
}
; block0:
; cpsdr %f0, %f2, %f0
; bras %r1, 8 ; data.f32 NaN ; le %f5, 0(%r1)
; vsel %v0, %v0, %v2, %v5
; br %r14
function %fcopysign_f64(f64, f64) -> f64 {
@ -369,7 +366,88 @@ block0(v0: f64, v1: f64):
}
; block0:
; cpsdr %f0, %f2, %f0
; bras %r1, 12 ; data.f64 NaN ; ld %f5, 0(%r1)
; vsel %v0, %v0, %v2, %v5
; br %r14
function %fcvt_to_uint_f32_i8(f32) -> i8 {
block0(v0: f32):
v1 = fcvt_to_uint.i8 v0
return v1
}
; block0:
; cebr %f0, %f0
; jno 6 ; trap
; bras %r1, 8 ; data.f32 256 ; le %f5, 0(%r1)
; cebr %f0, %f5
; jnhe 6 ; trap
; bras %r1, 8 ; data.f32 -1 ; vlef %v17, 0(%r1), 0
; wfcsb %f0, %v17
; jnle 6 ; trap
; wldeb %v21, %f0
; wclgdb %v23, %v21, 0, 5
; vlgvg %r2, %v23, 0
; br %r14
function %fcvt_to_sint_f32_i8(f32) -> i8 {
block0(v0: f32):
v1 = fcvt_to_sint.i8 v0
return v1
}
; block0:
; cebr %f0, %f0
; jno 6 ; trap
; bras %r1, 8 ; data.f32 128 ; le %f5, 0(%r1)
; cebr %f0, %f5
; jnhe 6 ; trap
; bras %r1, 8 ; data.f32 -129 ; vlef %v17, 0(%r1), 0
; wfcsb %f0, %v17
; jnle 6 ; trap
; wldeb %v21, %f0
; wcgdb %v23, %v21, 0, 5
; vlgvg %r2, %v23, 0
; br %r14
function %fcvt_to_uint_f32_i16(f32) -> i16 {
block0(v0: f32):
v1 = fcvt_to_uint.i16 v0
return v1
}
; block0:
; cebr %f0, %f0
; jno 6 ; trap
; bras %r1, 8 ; data.f32 65536 ; le %f5, 0(%r1)
; cebr %f0, %f5
; jnhe 6 ; trap
; bras %r1, 8 ; data.f32 -1 ; vlef %v17, 0(%r1), 0
; wfcsb %f0, %v17
; jnle 6 ; trap
; wldeb %v21, %f0
; wclgdb %v23, %v21, 0, 5
; vlgvg %r2, %v23, 0
; br %r14
function %fcvt_to_sint_f32_i16(f32) -> i16 {
block0(v0: f32):
v1 = fcvt_to_sint.i16 v0
return v1
}
; block0:
; cebr %f0, %f0
; jno 6 ; trap
; bras %r1, 8 ; data.f32 32768 ; le %f5, 0(%r1)
; cebr %f0, %f5
; jnhe 6 ; trap
; bras %r1, 8 ; data.f32 -32769 ; vlef %v17, 0(%r1), 0
; wfcsb %f0, %v17
; jnle 6 ; trap
; wldeb %v21, %f0
; wcgdb %v23, %v21, 0, 5
; vlgvg %r2, %v23, 0
; br %r14
function %fcvt_to_uint_f32_i32(f32) -> i32 {
@ -381,8 +459,15 @@ block0(v0: f32):
; block0:
; cebr %f0, %f0
; jno 6 ; trap
; clfebr %r2, 5, %f0, 0
; jno 6 ; trap
; bras %r1, 8 ; data.f32 4294967300 ; le %f5, 0(%r1)
; cebr %f0, %f5
; jnhe 6 ; trap
; bras %r1, 8 ; data.f32 -1 ; vlef %v17, 0(%r1), 0
; wfcsb %f0, %v17
; jnle 6 ; trap
; wldeb %v21, %f0
; wclgdb %v23, %v21, 0, 5
; vlgvg %r2, %v23, 0
; br %r14
function %fcvt_to_sint_f32_i32(f32) -> i32 {
@ -394,8 +479,15 @@ block0(v0: f32):
; block0:
; cebr %f0, %f0
; jno 6 ; trap
; cfebra %r2, 5, %f0, 0
; jno 6 ; trap
; bras %r1, 8 ; data.f32 2147483600 ; le %f5, 0(%r1)
; cebr %f0, %f5
; jnhe 6 ; trap
; bras %r1, 8 ; data.f32 -2147484000 ; vlef %v17, 0(%r1), 0
; wfcsb %f0, %v17
; jnle 6 ; trap
; wldeb %v21, %f0
; wcgdb %v23, %v21, 0, 5
; vlgvg %r2, %v23, 0
; br %r14
function %fcvt_to_uint_f32_i64(f32) -> i64 {
@ -407,8 +499,15 @@ block0(v0: f32):
; block0:
; cebr %f0, %f0
; jno 6 ; trap
; clgebr %r2, 5, %f0, 0
; jno 6 ; trap
; bras %r1, 8 ; data.f32 18446744000000000000 ; le %f5, 0(%r1)
; cebr %f0, %f5
; jnhe 6 ; trap
; bras %r1, 8 ; data.f32 -1 ; vlef %v17, 0(%r1), 0
; wfcsb %f0, %v17
; jnle 6 ; trap
; wldeb %v21, %f0
; wclgdb %v23, %v21, 0, 5
; vlgvg %r2, %v23, 0
; br %r14
function %fcvt_to_sint_f32_i64(f32) -> i64 {
@ -420,8 +519,91 @@ block0(v0: f32):
; block0:
; cebr %f0, %f0
; jno 6 ; trap
; cgebra %r2, 5, %f0, 0
; bras %r1, 8 ; data.f32 9223372000000000000 ; le %f5, 0(%r1)
; cebr %f0, %f5
; jnhe 6 ; trap
; bras %r1, 8 ; data.f32 -9223373000000000000 ; vlef %v17, 0(%r1), 0
; wfcsb %f0, %v17
; jnle 6 ; trap
; wldeb %v21, %f0
; wcgdb %v23, %v21, 0, 5
; vlgvg %r2, %v23, 0
; br %r14
function %fcvt_to_uint_f64_i8(f64) -> i8 {
block0(v0: f64):
v1 = fcvt_to_uint.i8 v0
return v1
}
; block0:
; cdbr %f0, %f0
; jno 6 ; trap
; bras %r1, 12 ; data.f64 256 ; ld %f5, 0(%r1)
; cdbr %f0, %f5
; jnhe 6 ; trap
; bras %r1, 12 ; data.f64 -1 ; vleg %v17, 0(%r1), 0
; wfcdb %f0, %v17
; jnle 6 ; trap
; wclgdb %v21, %f0, 0, 5
; vlgvg %r2, %v21, 0
; br %r14
function %fcvt_to_sint_f64_i8(f64) -> i8 {
block0(v0: f64):
v1 = fcvt_to_sint.i8 v0
return v1
}
; block0:
; cdbr %f0, %f0
; jno 6 ; trap
; bras %r1, 12 ; data.f64 128 ; ld %f5, 0(%r1)
; cdbr %f0, %f5
; jnhe 6 ; trap
; bras %r1, 12 ; data.f64 -129 ; vleg %v17, 0(%r1), 0
; wfcdb %f0, %v17
; jnle 6 ; trap
; wcgdb %v21, %f0, 0, 5
; vlgvg %r2, %v21, 0
; br %r14
function %fcvt_to_uint_f64_i16(f64) -> i16 {
block0(v0: f64):
v1 = fcvt_to_uint.i16 v0
return v1
}
; block0:
; cdbr %f0, %f0
; jno 6 ; trap
; bras %r1, 12 ; data.f64 65536 ; ld %f5, 0(%r1)
; cdbr %f0, %f5
; jnhe 6 ; trap
; bras %r1, 12 ; data.f64 -1 ; vleg %v17, 0(%r1), 0
; wfcdb %f0, %v17
; jnle 6 ; trap
; wclgdb %v21, %f0, 0, 5
; vlgvg %r2, %v21, 0
; br %r14
function %fcvt_to_sint_f64_i16(f64) -> i16 {
block0(v0: f64):
v1 = fcvt_to_sint.i16 v0
return v1
}
; block0:
; cdbr %f0, %f0
; jno 6 ; trap
; bras %r1, 12 ; data.f64 32768 ; ld %f5, 0(%r1)
; cdbr %f0, %f5
; jnhe 6 ; trap
; bras %r1, 12 ; data.f64 -32769 ; vleg %v17, 0(%r1), 0
; wfcdb %f0, %v17
; jnle 6 ; trap
; wcgdb %v21, %f0, 0, 5
; vlgvg %r2, %v21, 0
; br %r14
function %fcvt_to_uint_f64_i32(f64) -> i32 {
@ -433,8 +615,14 @@ block0(v0: f64):
; block0:
; cdbr %f0, %f0
; jno 6 ; trap
; clfdbr %r2, 5, %f0, 0
; jno 6 ; trap
; bras %r1, 12 ; data.f64 4294967296 ; ld %f5, 0(%r1)
; cdbr %f0, %f5
; jnhe 6 ; trap
; bras %r1, 12 ; data.f64 -1 ; vleg %v17, 0(%r1), 0
; wfcdb %f0, %v17
; jnle 6 ; trap
; wclgdb %v21, %f0, 0, 5
; vlgvg %r2, %v21, 0
; br %r14
function %fcvt_to_sint_f64_i32(f64) -> i32 {
@ -446,8 +634,14 @@ block0(v0: f64):
; block0:
; cdbr %f0, %f0
; jno 6 ; trap
; cfdbra %r2, 5, %f0, 0
; jno 6 ; trap
; bras %r1, 12 ; data.f64 2147483648 ; ld %f5, 0(%r1)
; cdbr %f0, %f5
; jnhe 6 ; trap
; bras %r1, 12 ; data.f64 -2147483649 ; vleg %v17, 0(%r1), 0
; wfcdb %f0, %v17
; jnle 6 ; trap
; wcgdb %v21, %f0, 0, 5
; vlgvg %r2, %v21, 0
; br %r14
function %fcvt_to_uint_f64_i64(f64) -> i64 {
@ -459,8 +653,14 @@ block0(v0: f64):
; block0:
; cdbr %f0, %f0
; jno 6 ; trap
; clgdbr %r2, 5, %f0, 0
; jno 6 ; trap
; bras %r1, 12 ; data.f64 18446744073709552000 ; ld %f5, 0(%r1)
; cdbr %f0, %f5
; jnhe 6 ; trap
; bras %r1, 12 ; data.f64 -1 ; vleg %v17, 0(%r1), 0
; wfcdb %f0, %v17
; jnle 6 ; trap
; wclgdb %v21, %f0, 0, 5
; vlgvg %r2, %v21, 0
; br %r14
function %fcvt_to_sint_f64_i64(f64) -> i64 {
@ -472,8 +672,66 @@ block0(v0: f64):
; block0:
; cdbr %f0, %f0
; jno 6 ; trap
; cgdbra %r2, 5, %f0, 0
; jno 6 ; trap
; bras %r1, 12 ; data.f64 9223372036854776000 ; ld %f5, 0(%r1)
; cdbr %f0, %f5
; jnhe 6 ; trap
; bras %r1, 12 ; data.f64 -9223372036854778000 ; vleg %v17, 0(%r1), 0
; wfcdb %f0, %v17
; jnle 6 ; trap
; wcgdb %v21, %f0, 0, 5
; vlgvg %r2, %v21, 0
; br %r14
function %fcvt_from_uint_i8_f32(i8) -> f32 {
block0(v0: i8):
v1 = fcvt_from_uint.f32 v0
return v1
}
; block0:
; llgcr %r5, %r2
; ldgr %f5, %r5
; wcdlgb %f7, %f5, 0, 3
; ledbra %f0, %f7, 4
; br %r14
function %fcvt_from_sint_i8_f32(i8) -> f32 {
block0(v0: i8):
v1 = fcvt_from_sint.f32 v0
return v1
}
; block0:
; lgbr %r5, %r2
; ldgr %f5, %r5
; wcdgb %f7, %f5, 0, 3
; ledbra %f0, %f7, 4
; br %r14
function %fcvt_from_uint_i16_f32(i16) -> f32 {
block0(v0: i16):
v1 = fcvt_from_uint.f32 v0
return v1
}
; block0:
; llghr %r5, %r2
; ldgr %f5, %r5
; wcdlgb %f7, %f5, 0, 3
; ledbra %f0, %f7, 4
; br %r14
function %fcvt_from_sint_i16_f32(i16) -> f32 {
block0(v0: i16):
v1 = fcvt_from_sint.f32 v0
return v1
}
; block0:
; lghr %r5, %r2
; ldgr %f5, %r5
; wcdgb %f7, %f5, 0, 3
; ledbra %f0, %f7, 4
; br %r14
function %fcvt_from_uint_i32_f32(i32) -> f32 {
@ -483,7 +741,10 @@ block0(v0: i32):
}
; block0:
; celfbr %f0, 0, %r2, 0
; llgfr %r5, %r2
; ldgr %f5, %r5
; wcdlgb %f7, %f5, 0, 3
; ledbra %f0, %f7, 4
; br %r14
function %fcvt_from_sint_i32_f32(i32) -> f32 {
@ -493,7 +754,10 @@ block0(v0: i32):
}
; block0:
; cefbra %f0, 0, %r2, 0
; lgfr %r5, %r2
; ldgr %f5, %r5
; wcdgb %f7, %f5, 0, 3
; ledbra %f0, %f7, 4
; br %r14
function %fcvt_from_uint_i64_f32(i64) -> f32 {
@ -503,7 +767,9 @@ block0(v0: i64):
}
; block0:
; celgbr %f0, 0, %r2, 0
; ldgr %f3, %r2
; wcdlgb %f5, %f3, 0, 3
; ledbra %f0, %f5, 4
; br %r14
function %fcvt_from_sint_i64_f32(i64) -> f32 {
@ -513,7 +779,57 @@ block0(v0: i64):
}
; block0:
; cegbra %f0, 0, %r2, 0
; ldgr %f3, %r2
; wcdgb %f5, %f3, 0, 3
; ledbra %f0, %f5, 4
; br %r14
function %fcvt_from_uint_i8_f64(i8) -> f64 {
block0(v0: i8):
v1 = fcvt_from_uint.f64 v0
return v1
}
; block0:
; llgcr %r5, %r2
; ldgr %f5, %r5
; wcdlgb %f0, %f5, 0, 4
; br %r14
function %fcvt_from_sint_i8_f64(i8) -> f64 {
block0(v0: i8):
v1 = fcvt_from_sint.f64 v0
return v1
}
; block0:
; lgbr %r5, %r2
; ldgr %f5, %r5
; wcdgb %f0, %f5, 0, 4
; br %r14
function %fcvt_from_uint_i16_f64(i16) -> f64 {
block0(v0: i16):
v1 = fcvt_from_uint.f64 v0
return v1
}
; block0:
; llghr %r5, %r2
; ldgr %f5, %r5
; wcdlgb %f0, %f5, 0, 4
; br %r14
function %fcvt_from_sint_i16_f64(i16) -> f64 {
block0(v0: i16):
v1 = fcvt_from_sint.f64 v0
return v1
}
; block0:
; lghr %r5, %r2
; ldgr %f5, %r5
; wcdgb %f0, %f5, 0, 4
; br %r14
function %fcvt_from_uint_i32_f64(i32) -> f64 {
@ -523,7 +839,9 @@ block0(v0: i32):
}
; block0:
; cdlfbr %f0, 0, %r2, 0
; llgfr %r5, %r2
; ldgr %f5, %r5
; wcdlgb %f0, %f5, 0, 4
; br %r14
function %fcvt_from_sint_i32_f64(i32) -> f64 {
@ -533,7 +851,9 @@ block0(v0: i32):
}
; block0:
; cdfbra %f0, 0, %r2, 0
; lgfr %r5, %r2
; ldgr %f5, %r5
; wcdgb %f0, %f5, 0, 4
; br %r14
function %fcvt_from_uint_i64_f64(i64) -> f64 {
@ -543,7 +863,8 @@ block0(v0: i64):
}
; block0:
; cdlgbr %f0, 0, %r2, 0
; ldgr %f3, %r2
; wcdlgb %f0, %f3, 0, 4
; br %r14
function %fcvt_from_sint_i64_f64(i64) -> f64 {
@ -553,7 +874,78 @@ block0(v0: i64):
}
; block0:
; cdgbra %f0, 0, %r2, 0
; ldgr %f3, %r2
; wcdgb %f0, %f3, 0, 4
; br %r14
function %fcvt_to_uint_sat_f32_i8(f32) -> i8 {
block0(v0: f32):
v1 = fcvt_to_uint_sat.i8 v0
return v1
}
; block0:
; ldebr %f3, %f0
; wclgdb %f5, %f3, 0, 5
; lgdr %r5, %f5
; lgr %r2, %r5
; clgfi %r5, 256
; locghih %r2, 255
; br %r14
function %fcvt_to_sint_sat_f32_i8(f32) -> i8 {
block0(v0: f32):
v1 = fcvt_to_sint_sat.i8 v0
return v1
}
; block0:
; ldebr %f3, %f0
; wcgdb %f5, %f3, 0, 5
; lgdr %r5, %f5
; cebr %f0, %f0
; locghio %r5, 0
; lgr %r4, %r5
; cghi %r5, 127
; locghih %r4, 127
; lgr %r2, %r4
; cghi %r4, -128
; locghil %r2, -128
; br %r14
function %fcvt_to_uint_sat_f32_i16(f32) -> i16 {
block0(v0: f32):
v1 = fcvt_to_uint_sat.i16 v0
return v1
}
; block0:
; ldebr %f3, %f0
; wclgdb %f5, %f3, 0, 5
; lgdr %r5, %f5
; lgr %r2, %r5
; clgfi %r5, 65535
; locghih %r2, -1
; br %r14
function %fcvt_to_sint_sat_f32_i16(f32) -> i16 {
block0(v0: f32):
v1 = fcvt_to_sint_sat.i16 v0
return v1
}
; block0:
; ldebr %f3, %f0
; wcgdb %f5, %f3, 0, 5
; lgdr %r5, %f5
; cebr %f0, %f0
; locghio %r5, 0
; lgr %r4, %r5
; cghi %r5, 32767
; locghih %r4, 32767
; lgr %r2, %r4
; cghi %r4, -32768
; locghil %r2, -32768
; br %r14
function %fcvt_to_uint_sat_f32_i32(f32) -> i32 {
@ -563,9 +955,12 @@ block0(v0: f32):
}
; block0:
; clfebr %r2, 5, %f0, 0
; cebr %f0, %f0
; lochio %r2, 0
; ldebr %f3, %f0
; wclgdb %f5, %f3, 0, 5
; lgdr %r2, %f5
; llilf %r3, 4294967295
; clgr %r2, %r3
; locgrh %r2, %r3
; br %r14
function %fcvt_to_sint_sat_f32_i32(f32) -> i32 {
@ -575,9 +970,17 @@ block0(v0: f32):
}
; block0:
; cfebra %r2, 5, %f0, 0
; ldebr %f3, %f0
; wcgdb %f5, %f3, 0, 5
; lgdr %r2, %f5
; cebr %f0, %f0
; lochio %r2, 0
; locghio %r2, 0
; lgfi %r3, 2147483647
; cgr %r2, %r3
; locgrh %r2, %r3
; lgfi %r5, -2147483648
; cgr %r2, %r5
; locgrl %r2, %r5
; br %r14
function %fcvt_to_uint_sat_f32_i64(f32) -> i64 {
@ -587,9 +990,9 @@ block0(v0: f32):
}
; block0:
; clgebr %r2, 5, %f0, 0
; cebr %f0, %f0
; locghio %r2, 0
; ldebr %f3, %f0
; wclgdb %f5, %f3, 0, 5
; lgdr %r2, %f5
; br %r14
function %fcvt_to_sint_sat_f32_i64(f32) -> i64 {
@ -599,11 +1002,79 @@ block0(v0: f32):
}
; block0:
; cgebra %r2, 5, %f0, 0
; ldebr %f3, %f0
; wcgdb %f5, %f3, 0, 5
; lgdr %r2, %f5
; cebr %f0, %f0
; locghio %r2, 0
; br %r14
function %fcvt_to_uint_sat_f64_i8(f64) -> i8 {
block0(v0: f64):
v1 = fcvt_to_uint_sat.i8 v0
return v1
}
; block0:
; wclgdb %f3, %f0, 0, 5
; lgdr %r3, %f3
; lgr %r2, %r3
; clgfi %r3, 256
; locghih %r2, 255
; br %r14
function %fcvt_to_sint_sat_f64_i8(f64) -> i8 {
block0(v0: f64):
v1 = fcvt_to_sint_sat.i8 v0
return v1
}
; block0:
; wcgdb %f3, %f0, 0, 5
; lgdr %r3, %f3
; cdbr %f0, %f0
; locghio %r3, 0
; lgr %r4, %r3
; cghi %r3, 127
; locghih %r4, 127
; lgr %r2, %r4
; cghi %r4, -128
; locghil %r2, -128
; br %r14
function %fcvt_to_uint_sat_f64_i16(f64) -> i16 {
block0(v0: f64):
v1 = fcvt_to_uint_sat.i16 v0
return v1
}
; block0:
; wclgdb %f3, %f0, 0, 5
; lgdr %r3, %f3
; lgr %r2, %r3
; clgfi %r3, 65535
; locghih %r2, -1
; br %r14
function %fcvt_to_sint_sat_f64_i16(f64) -> i16 {
block0(v0: f64):
v1 = fcvt_to_sint_sat.i16 v0
return v1
}
; block0:
; wcgdb %f3, %f0, 0, 5
; lgdr %r3, %f3
; cdbr %f0, %f0
; locghio %r3, 0
; lgr %r4, %r3
; cghi %r3, 32767
; locghih %r4, 32767
; lgr %r2, %r4
; cghi %r4, -32768
; locghil %r2, -32768
; br %r14
function %fcvt_to_uint_sat_f64_i32(f64) -> i32 {
block0(v0: f64):
v1 = fcvt_to_uint_sat.i32 v0
@ -611,9 +1082,11 @@ block0(v0: f64):
}
; block0:
; clfdbr %r2, 5, %f0, 0
; cdbr %f0, %f0
; lochio %r2, 0
; wclgdb %f3, %f0, 0, 5
; lgdr %r2, %f3
; llilf %r5, 4294967295
; clgr %r2, %r5
; locgrh %r2, %r5
; br %r14
function %fcvt_to_sint_sat_f64_i32(f64) -> i32 {
@ -623,9 +1096,16 @@ block0(v0: f64):
}
; block0:
; cfdbra %r2, 5, %f0, 0
; wcgdb %f3, %f0, 0, 5
; lgdr %r2, %f3
; cdbr %f0, %f0
; lochio %r2, 0
; locghio %r2, 0
; lgfi %r5, 2147483647
; cgr %r2, %r5
; locgrh %r2, %r5
; lgfi %r3, -2147483648
; cgr %r2, %r3
; locgrl %r2, %r3
; br %r14
function %fcvt_to_uint_sat_f64_i64(f64) -> i64 {
@ -635,9 +1115,8 @@ block0(v0: f64):
}
; block0:
; clgdbr %r2, 5, %f0, 0
; cdbr %f0, %f0
; locghio %r2, 0
; wclgdb %f3, %f0, 0, 5
; lgdr %r2, %f3
; br %r14
function %fcvt_to_sint_sat_f64_i64(f64) -> i64 {
@ -647,7 +1126,8 @@ block0(v0: f64):
}
; block0:
; cgdbra %r2, 5, %f0, 0
; wcgdb %f3, %f0, 0, 5
; lgdr %r2, %f3
; cdbr %f0, %f0
; locghio %r2, 0
; br %r14
@ -679,8 +1159,7 @@ block0(v0: i32):
}
; block0:
; sllg %r5, %r2, 32
; ldgr %f0, %r5
; vlvgf %v0, %r2, 0
; br %r14
function %bitcast_f32_i32(f32) -> i32 {
@ -690,7 +1169,6 @@ block0(v0: f32):
}
; block0:
; lgdr %r5, %f0
; srlg %r2, %r5, 32
; vlgvf %r2, %v0, 0
; br %r14

8
cranelift/filetests/filetests/isa/s390x/fpmem.clif

@ -40,8 +40,7 @@ block0(v0: i64):
; block0:
; lrv %r5, 0(%r2)
; sllg %r3, %r5, 32
; ldgr %f0, %r3
; vlvgf %v0, %r5, 0
; br %r14
function %store_f64(f64, i64) {
@ -82,8 +81,7 @@ block0(v0: f32, v1: i64):
}
; block0:
; lgdr %r3, %f0
; srlg %r4, %r3, 32
; strv %r4, 0(%r2)
; vlgvf %r3, %v0, 0
; strv %r3, 0(%r2)
; br %r14

8
cranelift/filetests/filetests/isa/s390x/multivalue-ret.clif

@ -76,9 +76,9 @@ block1:
; bras %r1, 12 ; data.f64 1 ; ld %f2, 0(%r1)
; bras %r1, 12 ; data.f64 2 ; ld %f4, 0(%r1)
; bras %r1, 12 ; data.f64 3 ; ld %f6, 0(%r1)
; bras %r1, 12 ; data.f64 4 ; ld %f5, 0(%r1)
; bras %r1, 12 ; data.f64 5 ; ld %f7, 0(%r1)
; std %f5, 0(%r2)
; std %f7, 8(%r2)
; bras %r1, 12 ; data.f64 4 ; vleg %v28, 0(%r1), 0
; bras %r1, 12 ; data.f64 5 ; vleg %v31, 0(%r1), 0
; vsteg %v28, 0(%r2), 0
; vsteg %v31, 8(%r2), 0
; br %r14

Loading…
Cancel
Save