Browse Source

[machinst x64]: enable packed saturated arithmetic

pull/2277/head
Andrew Brown 4 years ago
parent
commit
c8cce5d2d7
  1. 2
      build.rs
  2. 12
      cranelift/codegen/src/isa/x64/inst/args.rs
  3. 4
      cranelift/codegen/src/isa/x64/inst/emit.rs
  4. 24
      cranelift/codegen/src/isa/x64/inst/emit_tests.rs
  5. 12
      cranelift/codegen/src/isa/x64/lower.rs
  6. 36
      cranelift/filetests/filetests/isa/x64/simd-arithmetic-run.clif

2
build.rs

@ -184,9 +184,11 @@ fn experimental_x64_should_panic(testsuite: &str, testname: &str, strategy: &str
("simd", "simd_i8x16_arith") => return false, ("simd", "simd_i8x16_arith") => return false,
("simd", "simd_i8x16_arith2") => return false, ("simd", "simd_i8x16_arith2") => return false,
("simd", "simd_i8x16_cmp") => return false, ("simd", "simd_i8x16_cmp") => return false,
("simd", "simd_i8x16_sat_arith") => return false,
("simd", "simd_i16x8_arith") => return false, ("simd", "simd_i16x8_arith") => return false,
("simd", "simd_i16x8_arith2") => return false, ("simd", "simd_i16x8_arith2") => return false,
("simd", "simd_i16x8_cmp") => return false, ("simd", "simd_i16x8_cmp") => return false,
("simd", "simd_i16x8_sat_arith") => return false,
("simd", "simd_i32x4_arith") => return false, ("simd", "simd_i32x4_arith") => return false,
("simd", "simd_i32x4_arith2") => return false, ("simd", "simd_i32x4_arith2") => return false,
("simd", "simd_i32x4_cmp") => return false, ("simd", "simd_i32x4_cmp") => return false,

12
cranelift/codegen/src/isa/x64/inst/args.rs

@ -459,6 +459,10 @@ pub enum SseOpcode {
Psubd, Psubd,
Psubq, Psubq,
Psubw, Psubw,
Psubsb,
Psubsw,
Psubusb,
Psubusw,
Ptest, Ptest,
Pxor, Pxor,
Rcpss, Rcpss,
@ -582,6 +586,10 @@ impl SseOpcode {
| SseOpcode::Psubd | SseOpcode::Psubd
| SseOpcode::Psubq | SseOpcode::Psubq
| SseOpcode::Psubw | SseOpcode::Psubw
| SseOpcode::Psubsb
| SseOpcode::Psubsw
| SseOpcode::Psubusb
| SseOpcode::Psubusw
| SseOpcode::Pxor | SseOpcode::Pxor
| SseOpcode::Sqrtpd | SseOpcode::Sqrtpd
| SseOpcode::Sqrtsd | SseOpcode::Sqrtsd
@ -736,6 +744,10 @@ impl fmt::Debug for SseOpcode {
SseOpcode::Psubd => "psubd", SseOpcode::Psubd => "psubd",
SseOpcode::Psubq => "psubq", SseOpcode::Psubq => "psubq",
SseOpcode::Psubw => "psubw", SseOpcode::Psubw => "psubw",
SseOpcode::Psubsb => "psubsb",
SseOpcode::Psubsw => "psubsw",
SseOpcode::Psubusb => "psubusb",
SseOpcode::Psubusw => "psubusw",
SseOpcode::Ptest => "ptest", SseOpcode::Ptest => "ptest",
SseOpcode::Pxor => "pxor", SseOpcode::Pxor => "pxor",
SseOpcode::Rcpss => "rcpss", SseOpcode::Rcpss => "rcpss",

4
cranelift/codegen/src/isa/x64/inst/emit.rs

@ -1798,6 +1798,10 @@ pub(crate) fn emit(
SseOpcode::Psubd => (LegacyPrefixes::_66, 0x0FFA, 2), SseOpcode::Psubd => (LegacyPrefixes::_66, 0x0FFA, 2),
SseOpcode::Psubq => (LegacyPrefixes::_66, 0x0FFB, 2), SseOpcode::Psubq => (LegacyPrefixes::_66, 0x0FFB, 2),
SseOpcode::Psubw => (LegacyPrefixes::_66, 0x0FF9, 2), SseOpcode::Psubw => (LegacyPrefixes::_66, 0x0FF9, 2),
SseOpcode::Psubsb => (LegacyPrefixes::_66, 0x0FE8, 2),
SseOpcode::Psubsw => (LegacyPrefixes::_66, 0x0FE9, 2),
SseOpcode::Psubusb => (LegacyPrefixes::_66, 0x0FD8, 2),
SseOpcode::Psubusw => (LegacyPrefixes::_66, 0x0FD9, 2),
SseOpcode::Pxor => (LegacyPrefixes::_66, 0x0FEF, 2), SseOpcode::Pxor => (LegacyPrefixes::_66, 0x0FEF, 2),
SseOpcode::Subps => (LegacyPrefixes::None, 0x0F5C, 2), SseOpcode::Subps => (LegacyPrefixes::None, 0x0F5C, 2),
SseOpcode::Subpd => (LegacyPrefixes::_66, 0x0F5C, 2), SseOpcode::Subpd => (LegacyPrefixes::_66, 0x0F5C, 2),

24
cranelift/codegen/src/isa/x64/inst/emit_tests.rs

@ -3128,6 +3128,30 @@ fn test_x64_emit() {
"paddusw %xmm1, %xmm8", "paddusw %xmm1, %xmm8",
)); ));
insns.push((
Inst::xmm_rm_r(SseOpcode::Psubsb, RegMem::reg(xmm9), w_xmm5),
"66410FE8E9",
"psubsb %xmm9, %xmm5",
));
insns.push((
Inst::xmm_rm_r(SseOpcode::Psubsw, RegMem::reg(xmm7), w_xmm6),
"660FE9F7",
"psubsw %xmm7, %xmm6",
));
insns.push((
Inst::xmm_rm_r(SseOpcode::Psubusb, RegMem::reg(xmm12), w_xmm13),
"66450FD8EC",
"psubusb %xmm12, %xmm13",
));
insns.push((
Inst::xmm_rm_r(SseOpcode::Psubusw, RegMem::reg(xmm1), w_xmm8),
"66440FD9C1",
"psubusw %xmm1, %xmm8",
));
insns.push(( insns.push((
Inst::xmm_rm_r(SseOpcode::Pavgb, RegMem::reg(xmm12), w_xmm13), Inst::xmm_rm_r(SseOpcode::Pavgb, RegMem::reg(xmm12), w_xmm13),
"66450FE0EC", "66450FE0EC",

12
cranelift/codegen/src/isa/x64/lower.rs

@ -546,6 +546,8 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
| Opcode::SaddSat | Opcode::SaddSat
| Opcode::UaddSat | Opcode::UaddSat
| Opcode::Isub | Opcode::Isub
| Opcode::SsubSat
| Opcode::UsubSat
| Opcode::Imul | Opcode::Imul
| Opcode::AvgRound | Opcode::AvgRound
| Opcode::Band | Opcode::Band
@ -578,6 +580,16 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
types::I64X2 => SseOpcode::Psubq, types::I64X2 => SseOpcode::Psubq,
_ => panic!("Unsupported type for packed isub instruction: {}", ty), _ => panic!("Unsupported type for packed isub instruction: {}", ty),
}, },
Opcode::SsubSat => match ty {
types::I8X16 => SseOpcode::Psubsb,
types::I16X8 => SseOpcode::Psubsw,
_ => panic!("Unsupported type for packed ssub_sat instruction: {}", ty),
},
Opcode::UsubSat => match ty {
types::I8X16 => SseOpcode::Psubusb,
types::I16X8 => SseOpcode::Psubusw,
_ => panic!("Unsupported type for packed usub_sat instruction: {}", ty),
},
Opcode::Imul => match ty { Opcode::Imul => match ty {
types::I16X8 => SseOpcode::Pmullw, types::I16X8 => SseOpcode::Pmullw,
types::I32X4 => SseOpcode::Pmulld, types::I32X4 => SseOpcode::Pmulld,

36
cranelift/filetests/filetests/isa/x64/simd-arithmetic-run.clif

@ -127,24 +127,24 @@ block0:
} }
; run ; run
;function %sub_sat_i8x16() -> b1 { function %sub_sat_i8x16() -> b1 {
;block0: block0:
; v0 = vconst.i8x16 [128 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0] ; 128 == 0x80 == -128 v0 = vconst.i8x16 [128 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0] ; 128 == 0x80 == -128
; v1 = vconst.i8x16 [1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1] v1 = vconst.i8x16 [1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1]
;
; v2 = ssub_sat v0, v1 v2 = ssub_sat v0, v1
; v3 = extractlane v2, 0 v3 = extractlane v2, 0
; v4 = icmp_imm eq v3, 0x80 ; 0x80 == -128 v4 = icmp_imm eq v3, 0x80 ; 0x80 == -128
;
; ; now re-use 0x80 as an unsigned 128 ; now re-use 0x80 as an unsigned 128
; v5 = usub_sat v0, v2 v5 = usub_sat v0, v2
; v6 = extractlane v5, 0 v6 = extractlane v5, 0
; v7 = icmp_imm eq v6, 0 v7 = icmp_imm eq v6, 0
;
; v8 = band v4, v7 v8 = band v4, v7
; return v8 return v8
;} }
; _run ; run
;function %add_sub_f32x4() -> b1 { ;function %add_sub_f32x4() -> b1 {
;block0: ;block0:

Loading…
Cancel
Save