Browse Source

[AArch64] Merge 32- and 64-bit FPUOp2 (#4029)

And remove the unused saturating add/sub opcodes.

Copyright (c) 2022, Arm Limited.
pull/4031/head
Sam Parker 3 years ago
committed by GitHub
parent
commit
7c0ea28fc8
No known key found for this signature in database GPG Key ID: 4AEE18F83AFDEB23
  1. 27
      cranelift/codegen/src/isa/aarch64/inst.isle
  2. 33
      cranelift/codegen/src/isa/aarch64/inst/emit.rs
  3. 80
      cranelift/codegen/src/isa/aarch64/inst/emit_tests.rs
  4. 32
      cranelift/codegen/src/isa/aarch64/inst/mod.rs
  5. 2
      cranelift/codegen/src/isa/aarch64/lower/isle/generated_code.manifest
  6. 327
      cranelift/codegen/src/isa/aarch64/lower/isle/generated_code.rs
  7. 42
      cranelift/codegen/src/isa/aarch64/lower_inst.rs

27
cranelift/codegen/src/isa/aarch64/inst.isle

@ -323,6 +323,7 @@
;; 2-op FPU instruction. ;; 2-op FPU instruction.
(FpuRRR (FpuRRR
(fpu_op FPUOp2) (fpu_op FPUOp2)
(size ScalarSize)
(rd WritableReg) (rd WritableReg)
(rn Reg) (rn Reg)
(rm Reg)) (rm Reg))
@ -952,26 +953,12 @@
;; A floating-point unit (FPU) operation with two args. ;; A floating-point unit (FPU) operation with two args.
(type FPUOp2 (type FPUOp2
(enum (enum
(Add32) (Add)
(Add64) (Sub)
(Sub32) (Mul)
(Sub64) (Div)
(Mul32) (Max)
(Mul64) (Min)
(Div32)
(Div64)
(Max32)
(Max64)
(Min32)
(Min64)
;; Signed saturating add
(Sqadd64)
;; Unsigned saturating add
(Uqadd64)
;; Signed saturating subtract
(Sqsub64)
;; Unsigned saturating subtract
(Uqsub64)
)) ))
;; A floating-point unit (FPU) operation with three args. ;; A floating-point unit (FPU) operation with three args.

33
cranelift/codegen/src/isa/aarch64/inst/emit.rs

@ -1686,28 +1686,25 @@ impl MachInstEmit for Inst {
}; };
sink.put4(enc_fpurr(top22, rd, rn)); sink.put4(enc_fpurr(top22, rd, rn));
} }
&Inst::FpuRRR { fpu_op, rd, rn, rm } => { &Inst::FpuRRR {
fpu_op,
size,
rd,
rn,
rm,
} => {
let rd = allocs.next_writable(rd); let rd = allocs.next_writable(rd);
let rn = allocs.next(rn); let rn = allocs.next(rn);
let rm = allocs.next(rm); let rm = allocs.next(rm);
let top22 = match fpu_op { let top22 = match fpu_op {
FPUOp2::Add32 => 0b000_11110_00_1_00000_001010, FPUOp2::Add => 0b000_11110_00_1_00000_001010,
FPUOp2::Add64 => 0b000_11110_01_1_00000_001010, FPUOp2::Sub => 0b000_11110_00_1_00000_001110,
FPUOp2::Sub32 => 0b000_11110_00_1_00000_001110, FPUOp2::Mul => 0b000_11110_00_1_00000_000010,
FPUOp2::Sub64 => 0b000_11110_01_1_00000_001110, FPUOp2::Div => 0b000_11110_00_1_00000_000110,
FPUOp2::Mul32 => 0b000_11110_00_1_00000_000010, FPUOp2::Max => 0b000_11110_00_1_00000_010010,
FPUOp2::Mul64 => 0b000_11110_01_1_00000_000010, FPUOp2::Min => 0b000_11110_00_1_00000_010110,
FPUOp2::Div32 => 0b000_11110_00_1_00000_000110, };
FPUOp2::Div64 => 0b000_11110_01_1_00000_000110, let top22 = top22 | size.ftype() << 12;
FPUOp2::Max32 => 0b000_11110_00_1_00000_010010,
FPUOp2::Max64 => 0b000_11110_01_1_00000_010010,
FPUOp2::Min32 => 0b000_11110_00_1_00000_010110,
FPUOp2::Min64 => 0b000_11110_01_1_00000_010110,
FPUOp2::Sqadd64 => 0b010_11110_11_1_00000_000011,
FPUOp2::Uqadd64 => 0b011_11110_11_1_00000_000011,
FPUOp2::Sqsub64 => 0b010_11110_11_1_00000_001011,
FPUOp2::Uqsub64 => 0b011_11110_11_1_00000_001011,
};
sink.put4(enc_fpurrr(top22, rd, rn, rm)); sink.put4(enc_fpurrr(top22, rd, rn, rm));
} }
&Inst::FpuRRI { fpu_op, rd, rn } => { &Inst::FpuRRI { fpu_op, rd, rn } => {

80
cranelift/codegen/src/isa/aarch64/inst/emit_tests.rs

@ -5428,7 +5428,8 @@ fn test_aarch64_binemit() {
insns.push(( insns.push((
Inst::FpuRRR { Inst::FpuRRR {
fpu_op: FPUOp2::Add32, fpu_op: FPUOp2::Add,
size: ScalarSize::Size32,
rd: writable_vreg(15), rd: writable_vreg(15),
rn: vreg(30), rn: vreg(30),
rm: vreg(31), rm: vreg(31),
@ -5439,7 +5440,8 @@ fn test_aarch64_binemit() {
insns.push(( insns.push((
Inst::FpuRRR { Inst::FpuRRR {
fpu_op: FPUOp2::Add64, fpu_op: FPUOp2::Add,
size: ScalarSize::Size64,
rd: writable_vreg(15), rd: writable_vreg(15),
rn: vreg(30), rn: vreg(30),
rm: vreg(31), rm: vreg(31),
@ -5450,7 +5452,8 @@ fn test_aarch64_binemit() {
insns.push(( insns.push((
Inst::FpuRRR { Inst::FpuRRR {
fpu_op: FPUOp2::Sub32, fpu_op: FPUOp2::Sub,
size: ScalarSize::Size32,
rd: writable_vreg(15), rd: writable_vreg(15),
rn: vreg(30), rn: vreg(30),
rm: vreg(31), rm: vreg(31),
@ -5461,7 +5464,8 @@ fn test_aarch64_binemit() {
insns.push(( insns.push((
Inst::FpuRRR { Inst::FpuRRR {
fpu_op: FPUOp2::Sub64, fpu_op: FPUOp2::Sub,
size: ScalarSize::Size64,
rd: writable_vreg(15), rd: writable_vreg(15),
rn: vreg(30), rn: vreg(30),
rm: vreg(31), rm: vreg(31),
@ -5472,7 +5476,8 @@ fn test_aarch64_binemit() {
insns.push(( insns.push((
Inst::FpuRRR { Inst::FpuRRR {
fpu_op: FPUOp2::Mul32, fpu_op: FPUOp2::Mul,
size: ScalarSize::Size32,
rd: writable_vreg(15), rd: writable_vreg(15),
rn: vreg(30), rn: vreg(30),
rm: vreg(31), rm: vreg(31),
@ -5483,7 +5488,8 @@ fn test_aarch64_binemit() {
insns.push(( insns.push((
Inst::FpuRRR { Inst::FpuRRR {
fpu_op: FPUOp2::Mul64, fpu_op: FPUOp2::Mul,
size: ScalarSize::Size64,
rd: writable_vreg(15), rd: writable_vreg(15),
rn: vreg(30), rn: vreg(30),
rm: vreg(31), rm: vreg(31),
@ -5494,7 +5500,8 @@ fn test_aarch64_binemit() {
insns.push(( insns.push((
Inst::FpuRRR { Inst::FpuRRR {
fpu_op: FPUOp2::Div32, fpu_op: FPUOp2::Div,
size: ScalarSize::Size32,
rd: writable_vreg(15), rd: writable_vreg(15),
rn: vreg(30), rn: vreg(30),
rm: vreg(31), rm: vreg(31),
@ -5505,7 +5512,8 @@ fn test_aarch64_binemit() {
insns.push(( insns.push((
Inst::FpuRRR { Inst::FpuRRR {
fpu_op: FPUOp2::Div64, fpu_op: FPUOp2::Div,
size: ScalarSize::Size64,
rd: writable_vreg(15), rd: writable_vreg(15),
rn: vreg(30), rn: vreg(30),
rm: vreg(31), rm: vreg(31),
@ -5516,7 +5524,8 @@ fn test_aarch64_binemit() {
insns.push(( insns.push((
Inst::FpuRRR { Inst::FpuRRR {
fpu_op: FPUOp2::Max32, fpu_op: FPUOp2::Max,
size: ScalarSize::Size32,
rd: writable_vreg(15), rd: writable_vreg(15),
rn: vreg(30), rn: vreg(30),
rm: vreg(31), rm: vreg(31),
@ -5527,7 +5536,8 @@ fn test_aarch64_binemit() {
insns.push(( insns.push((
Inst::FpuRRR { Inst::FpuRRR {
fpu_op: FPUOp2::Max64, fpu_op: FPUOp2::Max,
size: ScalarSize::Size64,
rd: writable_vreg(15), rd: writable_vreg(15),
rn: vreg(30), rn: vreg(30),
rm: vreg(31), rm: vreg(31),
@ -5538,7 +5548,8 @@ fn test_aarch64_binemit() {
insns.push(( insns.push((
Inst::FpuRRR { Inst::FpuRRR {
fpu_op: FPUOp2::Min32, fpu_op: FPUOp2::Min,
size: ScalarSize::Size32,
rd: writable_vreg(15), rd: writable_vreg(15),
rn: vreg(30), rn: vreg(30),
rm: vreg(31), rm: vreg(31),
@ -5549,7 +5560,8 @@ fn test_aarch64_binemit() {
insns.push(( insns.push((
Inst::FpuRRR { Inst::FpuRRR {
fpu_op: FPUOp2::Min64, fpu_op: FPUOp2::Min,
size: ScalarSize::Size64,
rd: writable_vreg(15), rd: writable_vreg(15),
rn: vreg(30), rn: vreg(30),
rm: vreg(31), rm: vreg(31),
@ -5558,50 +5570,6 @@ fn test_aarch64_binemit() {
"fmin d15, d30, d31", "fmin d15, d30, d31",
)); ));
insns.push((
Inst::FpuRRR {
fpu_op: FPUOp2::Uqadd64,
rd: writable_vreg(21),
rn: vreg(22),
rm: vreg(23),
},
"D50EF77E",
"uqadd d21, d22, d23",
));
insns.push((
Inst::FpuRRR {
fpu_op: FPUOp2::Sqadd64,
rd: writable_vreg(21),
rn: vreg(22),
rm: vreg(23),
},
"D50EF75E",
"sqadd d21, d22, d23",
));
insns.push((
Inst::FpuRRR {
fpu_op: FPUOp2::Uqsub64,
rd: writable_vreg(21),
rn: vreg(22),
rm: vreg(23),
},
"D52EF77E",
"uqsub d21, d22, d23",
));
insns.push((
Inst::FpuRRR {
fpu_op: FPUOp2::Sqsub64,
rd: writable_vreg(21),
rn: vreg(22),
rm: vreg(23),
},
"D52EF75E",
"sqsub d21, d22, d23",
));
insns.push(( insns.push((
Inst::FpuRRRR { Inst::FpuRRRR {
fpu_op: FPUOp3::MAdd32, fpu_op: FPUOp3::MAdd32,

32
cranelift/codegen/src/isa/aarch64/inst/mod.rs

@ -1690,24 +1690,20 @@ impl Inst {
let rn = pretty_print_vreg_scalar(rn, sizesrc, allocs); let rn = pretty_print_vreg_scalar(rn, sizesrc, allocs);
format!("{} {}, {}", op, rd, rn) format!("{} {}, {}", op, rd, rn)
} }
&Inst::FpuRRR { fpu_op, rd, rn, rm } => { &Inst::FpuRRR {
let (op, size) = match fpu_op { fpu_op,
FPUOp2::Add32 => ("fadd", ScalarSize::Size32), size,
FPUOp2::Add64 => ("fadd", ScalarSize::Size64), rd,
FPUOp2::Sub32 => ("fsub", ScalarSize::Size32), rn,
FPUOp2::Sub64 => ("fsub", ScalarSize::Size64), rm,
FPUOp2::Mul32 => ("fmul", ScalarSize::Size32), } => {
FPUOp2::Mul64 => ("fmul", ScalarSize::Size64), let op = match fpu_op {
FPUOp2::Div32 => ("fdiv", ScalarSize::Size32), FPUOp2::Add => "fadd",
FPUOp2::Div64 => ("fdiv", ScalarSize::Size64), FPUOp2::Sub => "fsub",
FPUOp2::Max32 => ("fmax", ScalarSize::Size32), FPUOp2::Mul => "fmul",
FPUOp2::Max64 => ("fmax", ScalarSize::Size64), FPUOp2::Div => "fdiv",
FPUOp2::Min32 => ("fmin", ScalarSize::Size32), FPUOp2::Max => "fmax",
FPUOp2::Min64 => ("fmin", ScalarSize::Size64), FPUOp2::Min => "fmin",
FPUOp2::Sqadd64 => ("sqadd", ScalarSize::Size64),
FPUOp2::Uqadd64 => ("uqadd", ScalarSize::Size64),
FPUOp2::Sqsub64 => ("sqsub", ScalarSize::Size64),
FPUOp2::Uqsub64 => ("uqsub", ScalarSize::Size64),
}; };
let rd = pretty_print_vreg_scalar(rd.to_reg(), size, allocs); let rd = pretty_print_vreg_scalar(rd.to_reg(), size, allocs);
let rn = pretty_print_vreg_scalar(rn, size, allocs); let rn = pretty_print_vreg_scalar(rn, size, allocs);

2
cranelift/codegen/src/isa/aarch64/lower/isle/generated_code.manifest

@ -1,4 +1,4 @@
src/clif.isle 443b34b797fc8ace src/clif.isle 443b34b797fc8ace
src/prelude.isle afd037c4d91c875c src/prelude.isle afd037c4d91c875c
src/isa/aarch64/inst.isle a44074e06f955750 src/isa/aarch64/inst.isle 54184fdac4e4ca23
src/isa/aarch64/lower.isle 71c7e603b0e4bdef src/isa/aarch64/lower.isle 71c7e603b0e4bdef

327
cranelift/codegen/src/isa/aarch64/lower/isle/generated_code.rs

File diff suppressed because it is too large

42
cranelift/codegen/src/isa/aarch64/lower_inst.rs

@ -1591,32 +1591,26 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
Opcode::Fadd | Opcode::Fsub | Opcode::Fmul | Opcode::Fdiv | Opcode::Fmin | Opcode::Fmax => { Opcode::Fadd | Opcode::Fsub | Opcode::Fmul | Opcode::Fdiv | Opcode::Fmin | Opcode::Fmax => {
let ty = ty.unwrap(); let ty = ty.unwrap();
let bits = ty_bits(ty);
let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None); let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None);
let rm = put_input_in_reg(ctx, inputs[1], NarrowValueMode::None); let rm = put_input_in_reg(ctx, inputs[1], NarrowValueMode::None);
let rd = get_output_reg(ctx, outputs[0]).only_reg().unwrap(); let rd = get_output_reg(ctx, outputs[0]).only_reg().unwrap();
if !ty.is_vector() { if !ty.is_vector() {
let fpu_op = match (op, bits) { let fpu_op = match op {
(Opcode::Fadd, 32) => FPUOp2::Add32, Opcode::Fadd => FPUOp2::Add,
(Opcode::Fadd, 64) => FPUOp2::Add64, Opcode::Fsub => FPUOp2::Sub,
(Opcode::Fsub, 32) => FPUOp2::Sub32, Opcode::Fmul => FPUOp2::Mul,
(Opcode::Fsub, 64) => FPUOp2::Sub64, Opcode::Fdiv => FPUOp2::Div,
(Opcode::Fmul, 32) => FPUOp2::Mul32, Opcode::Fmin => FPUOp2::Min,
(Opcode::Fmul, 64) => FPUOp2::Mul64, Opcode::Fmax => FPUOp2::Max,
(Opcode::Fdiv, 32) => FPUOp2::Div32, _ => unreachable!(),
(Opcode::Fdiv, 64) => FPUOp2::Div64,
(Opcode::Fmin, 32) => FPUOp2::Min32,
(Opcode::Fmin, 64) => FPUOp2::Min64,
(Opcode::Fmax, 32) => FPUOp2::Max32,
(Opcode::Fmax, 64) => FPUOp2::Max64,
_ => {
return Err(CodegenError::Unsupported(format!(
"{}: Unsupported type: {:?}",
op, ty
)))
}
}; };
ctx.emit(Inst::FpuRRR { fpu_op, rd, rn, rm }); ctx.emit(Inst::FpuRRR {
fpu_op,
size: ScalarSize::from_ty(ty),
rd,
rn,
rm,
});
} else { } else {
let alu_op = match op { let alu_op = match op {
Opcode::Fadd => VecALUOp::Fadd, Opcode::Fadd => VecALUOp::Fadd,
@ -2149,7 +2143,8 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
lower_constant_f64(ctx, rtmp1, max); lower_constant_f64(ctx, rtmp1, max);
} }
ctx.emit(Inst::FpuRRR { ctx.emit(Inst::FpuRRR {
fpu_op: choose_32_64(in_ty, FPUOp2::Min32, FPUOp2::Min64), fpu_op: FPUOp2::Min,
size: ScalarSize::from_ty(in_ty),
rd: rtmp2, rd: rtmp2,
rn, rn,
rm: rtmp1.to_reg(), rm: rtmp1.to_reg(),
@ -2160,7 +2155,8 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
lower_constant_f64(ctx, rtmp1, min); lower_constant_f64(ctx, rtmp1, min);
} }
ctx.emit(Inst::FpuRRR { ctx.emit(Inst::FpuRRR {
fpu_op: choose_32_64(in_ty, FPUOp2::Max32, FPUOp2::Max64), fpu_op: FPUOp2::Max,
size: ScalarSize::from_ty(in_ty),
rd: rtmp2, rd: rtmp2,
rn: rtmp2.to_reg(), rn: rtmp2.to_reg(),
rm: rtmp1.to_reg(), rm: rtmp1.to_reg(),

Loading…
Cancel
Save