diff --git a/build.rs b/build.rs index 8ac0808cf1..59d0914562 100644 --- a/build.rs +++ b/build.rs @@ -233,7 +233,6 @@ fn ignore(testsuite: &str, testname: &str, strategy: &str) -> bool { | ("simd", "simd_i16x8_extmul_i8x16") | ("simd", "simd_i32x4_extadd_pairwise_i16x8") | ("simd", "simd_i32x4_extmul_i16x8") - | ("simd", "simd_i32x4_trunc_sat_f64x2") | ("simd", "simd_i64x2_extmul_i32x4") => return true, _ => {} diff --git a/cranelift/codegen/meta/src/shared/instructions.rs b/cranelift/codegen/meta/src/shared/instructions.rs index 8759fd347d..06e20c6198 100644 --- a/cranelift/codegen/meta/src/shared/instructions.rs +++ b/cranelift/codegen/meta/src/shared/instructions.rs @@ -3985,19 +3985,19 @@ pub(crate) fn define( .constraints(vec![WiderOrEq(Int.clone(), IntTo.clone())]), ); - let I16or32xN = &TypeVar::new( - "I16or32xN", - "A SIMD vector type containing integer lanes 16 or 32 bits wide", + let I16or32or64xN = &TypeVar::new( + "I16or32or64xN", + "A SIMD vector type containing integer lanes 16, 32, or 64 bits wide", TypeSetBuilder::new() - .ints(16..32) - .simd_lanes(4..8) + .ints(16..64) + .simd_lanes(2..8) .includes_scalars(false) .build(), ); - let x = &Operand::new("x", I16or32xN); - let y = &Operand::new("y", I16or32xN); - let a = &Operand::new("a", &I16or32xN.split_lanes()); + let x = &Operand::new("x", I16or32or64xN); + let y = &Operand::new("y", I16or32or64xN); + let a = &Operand::new("a", &I16or32or64xN.split_lanes()); ig.push( Inst::new( @@ -4036,6 +4036,25 @@ pub(crate) fn define( .operands_out(vec![a]), ); + ig.push( + Inst::new( + "uunarrow", + r#" + Combine `x` and `y` into a vector with twice the lanes but half the integer width while + saturating overflowing values to the unsigned maximum and minimum. + + Note that all input lanes are considered unsigned. + + The lanes will be concatenated after narrowing. For example, when `x` and `y` are `i32x4` + and `x = [x3, x2, x1, x0]` and `y = [y3, y2, y1, y0]`, then after narrowing the value + returned is an `i16x8`: `a = [y3', y2', y1', y0', x3', x2', x1', x0']`. + "#, + &formats.binary, + ) + .operands_in(vec![x, y]) + .operands_out(vec![a]), + ); + let I8or16or32xN = &TypeVar::new( "I8or16or32xN", "A SIMD vector type containing integer lanes 8, 16, or 32 bits wide.", diff --git a/cranelift/codegen/src/isa/aarch64/inst/emit.rs b/cranelift/codegen/src/isa/aarch64/inst/emit.rs index 18b1932f80..87ba64ee13 100644 --- a/cranelift/codegen/src/isa/aarch64/inst/emit.rs +++ b/cranelift/codegen/src/isa/aarch64/inst/emit.rs @@ -1677,11 +1677,6 @@ impl MachInstEmit for Inst { debug_assert_ne!(VectorSize::Size64x2, size); (0b0, 0b00000, enc_size) } - VecMisc2::Shll => { - debug_assert_ne!(VectorSize::Size64x2, size); - debug_assert!(!size.is_128bits()); - (0b1, 0b10011, enc_size) - } VecMisc2::Fcvtzs => { debug_assert!(size == VectorSize::Size32x4 || size == VectorSize::Size64x2); (0b0, 0b11011, enc_size) @@ -2092,24 +2087,49 @@ impl MachInstEmit for Inst { | machreg_to_vec(rd.to_reg()), ); } - &Inst::VecMiscNarrow { + &Inst::VecRRLong { op, rd, rn, - size, high_half, } => { - let size = match size.lane_size() { - ScalarSize::Size8 => 0b00, - ScalarSize::Size16 => 0b01, - ScalarSize::Size32 => 0b10, - _ => panic!("Unexpected vector operand lane size!"), + let (u, size, bits_12_16) = match op { + VecRRLongOp::Fcvtl16 => (0b0, 0b00, 0b10111), + VecRRLongOp::Fcvtl32 => (0b0, 0b01, 0b10111), + VecRRLongOp::Shll8 => (0b1, 0b00, 0b10011), + VecRRLongOp::Shll16 => (0b1, 0b01, 0b10011), + VecRRLongOp::Shll32 => (0b1, 0b10, 0b10011), }; - let (u, bits_12_16) = match op { - VecMiscNarrowOp::Xtn => (0b0, 0b10010), - VecMiscNarrowOp::Sqxtn => (0b0, 0b10100), - VecMiscNarrowOp::Sqxtun => (0b1, 0b10010), + + sink.put4(enc_vec_rr_misc( + ((high_half as u32) << 1) | u, + size, + bits_12_16, + rd, + rn, + )); + } + &Inst::VecRRNarrow { + op, + rd, + rn, + high_half, + } => { + let (u, size, bits_12_16) = match op { + VecRRNarrowOp::Xtn16 => (0b0, 0b00, 0b10010), + VecRRNarrowOp::Xtn32 => (0b0, 0b01, 0b10010), + VecRRNarrowOp::Xtn64 => (0b0, 0b10, 0b10010), + VecRRNarrowOp::Sqxtn16 => (0b0, 0b00, 0b10100), + VecRRNarrowOp::Sqxtn32 => (0b0, 0b01, 0b10100), + VecRRNarrowOp::Sqxtn64 => (0b0, 0b10, 0b10100), + VecRRNarrowOp::Sqxtun16 => (0b1, 0b00, 0b10010), + VecRRNarrowOp::Sqxtun32 => (0b1, 0b01, 0b10010), + VecRRNarrowOp::Sqxtun64 => (0b1, 0b10, 0b10010), + VecRRNarrowOp::Uqxtn16 => (0b1, 0b00, 0b10100), + VecRRNarrowOp::Uqxtn32 => (0b1, 0b01, 0b10100), + VecRRNarrowOp::Uqxtn64 => (0b1, 0b10, 0b10100), }; + sink.put4(enc_vec_rr_misc( ((high_half as u32) << 1) | u, size, diff --git a/cranelift/codegen/src/isa/aarch64/inst/emit_tests.rs b/cranelift/codegen/src/isa/aarch64/inst/emit_tests.rs index a618b7e81c..2ed45ed78b 100644 --- a/cranelift/codegen/src/isa/aarch64/inst/emit_tests.rs +++ b/cranelift/codegen/src/isa/aarch64/inst/emit_tests.rs @@ -2425,11 +2425,87 @@ fn test_aarch64_binemit() { )); insns.push(( - Inst::VecMiscNarrow { - op: VecMiscNarrowOp::Xtn, + Inst::VecRRLong { + op: VecRRLongOp::Fcvtl16, + rd: writable_vreg(0), + rn: vreg(30), + high_half: false, + }, + "C07B210E", + "fcvtl v0.4s, v30.4h", + )); + + insns.push(( + Inst::VecRRLong { + op: VecRRLongOp::Fcvtl32, + rd: writable_vreg(16), + rn: vreg(1), + high_half: true, + }, + "3078614E", + "fcvtl2 v16.2d, v1.4s", + )); + + insns.push(( + Inst::VecRRLong { + op: VecRRLongOp::Shll8, + rd: writable_vreg(12), + rn: vreg(5), + high_half: false, + }, + "AC38212E", + "shll v12.8h, v5.8b, #8", + )); + + insns.push(( + Inst::VecRRLong { + op: VecRRLongOp::Shll16, + rd: writable_vreg(9), + rn: vreg(1), + high_half: true, + }, + "2938616E", + "shll2 v9.4s, v1.8h, #16", + )); + + insns.push(( + Inst::VecRRLong { + op: VecRRLongOp::Shll32, + rd: writable_vreg(1), + rn: vreg(10), + high_half: false, + }, + "4139A12E", + "shll v1.2d, v10.2s, #32", + )); + + insns.push(( + Inst::VecRRNarrow { + op: VecRRNarrowOp::Xtn16, + rd: writable_vreg(25), + rn: vreg(17), + high_half: false, + }, + "392A210E", + "xtn v25.8b, v17.8h", + )); + + insns.push(( + Inst::VecRRNarrow { + op: VecRRNarrowOp::Xtn32, + rd: writable_vreg(3), + rn: vreg(10), + high_half: true, + }, + "4329614E", + "xtn2 v3.8h, v10.4s", + )); + + insns.push(( + Inst::VecRRNarrow { + op: VecRRNarrowOp::Xtn64, rd: writable_vreg(22), rn: vreg(8), - size: VectorSize::Size32x2, high_half: false, }, "1629A10E", @@ -2437,11 +2513,21 @@ fn test_aarch64_binemit() { )); insns.push(( - Inst::VecMiscNarrow { - op: VecMiscNarrowOp::Sqxtn, + Inst::VecRRNarrow { + op: VecRRNarrowOp::Sqxtn16, + rd: writable_vreg(7), + rn: vreg(22), + high_half: true, + }, + "C74A214E", + "sqxtn2 v7.16b, v22.8h", + )); + + insns.push(( + Inst::VecRRNarrow { + op: VecRRNarrowOp::Sqxtn32, rd: writable_vreg(31), rn: vreg(0), - size: VectorSize::Size16x8, high_half: true, }, "1F48614E", @@ -2449,17 +2535,82 @@ fn test_aarch64_binemit() { )); insns.push(( - Inst::VecMiscNarrow { - op: VecMiscNarrowOp::Sqxtun, + Inst::VecRRNarrow { + op: VecRRNarrowOp::Sqxtn64, + rd: writable_vreg(14), + rn: vreg(20), + high_half: false, + }, + "8E4AA10E", + "sqxtn v14.2s, v20.2d", + )); + + insns.push(( + Inst::VecRRNarrow { + op: VecRRNarrowOp::Sqxtun16, rd: writable_vreg(16), rn: vreg(23), - size: VectorSize::Size8x16, high_half: false, }, "F02A212E", "sqxtun v16.8b, v23.8h", )); + insns.push(( + Inst::VecRRNarrow { + op: VecRRNarrowOp::Sqxtun32, + rd: writable_vreg(28), + rn: vreg(9), + high_half: true, + }, + "3C29616E", + "sqxtun2 v28.8h, v9.4s", + )); + + insns.push(( + Inst::VecRRNarrow { + op: VecRRNarrowOp::Sqxtun64, + rd: writable_vreg(15), + rn: vreg(15), + high_half: false, + }, + "EF29A12E", + "sqxtun v15.2s, v15.2d", + )); + + insns.push(( + Inst::VecRRNarrow { + op: VecRRNarrowOp::Uqxtn16, + rd: writable_vreg(21), + rn: vreg(4), + high_half: true, + }, + "9548216E", + "uqxtn2 v21.16b, v4.8h", + )); + + insns.push(( + Inst::VecRRNarrow { + op: VecRRNarrowOp::Uqxtn32, + rd: writable_vreg(31), + rn: vreg(31), + high_half: false, + }, + "FF4B612E", + "uqxtn v31.4h, v31.4s", + )); + + insns.push(( + Inst::VecRRNarrow { + op: VecRRNarrowOp::Uqxtn64, + rd: writable_vreg(11), + rn: vreg(12), + high_half: true, + }, + "8B49A16E", + "uqxtn2 v11.4s, v12.2d", + )); + insns.push(( Inst::VecRRPair { op: VecPairOp::Addp, @@ -3810,39 +3961,6 @@ fn test_aarch64_binemit() { "rev64 v1.4s, v10.4s", )); - insns.push(( - Inst::VecMisc { - op: VecMisc2::Shll, - rd: writable_vreg(12), - rn: vreg(5), - size: VectorSize::Size8x8, - }, - "AC38212E", - "shll v12.8h, v5.8b, #8", - )); - - insns.push(( - Inst::VecMisc { - op: VecMisc2::Shll, - rd: writable_vreg(9), - rn: vreg(1), - size: VectorSize::Size16x4, - }, - "2938612E", - "shll v9.4s, v1.4h, #16", - )); - - insns.push(( - Inst::VecMisc { - op: VecMisc2::Shll, - rd: writable_vreg(1), - rn: vreg(10), - size: VectorSize::Size32x2, - }, - "4139A12E", - "shll v1.2d, v10.2s, #32", - )); - insns.push(( Inst::VecMisc { op: VecMisc2::Fcvtzs, diff --git a/cranelift/codegen/src/isa/aarch64/inst/mod.rs b/cranelift/codegen/src/isa/aarch64/inst/mod.rs index d3df28982e..5f8f8f0eec 100644 --- a/cranelift/codegen/src/isa/aarch64/inst/mod.rs +++ b/cranelift/codegen/src/isa/aarch64/inst/mod.rs @@ -332,8 +332,6 @@ pub enum VecMisc2 { Fsqrt, /// Reverse elements in 64-bit doublewords Rev64, - /// Shift left long (by element size) - Shll, /// Floating-point convert to signed integer, rounding toward zero Fcvtzs, /// Floating-point convert to unsigned integer, rounding toward zero @@ -356,15 +354,48 @@ pub enum VecMisc2 { Cmeq0, } -/// A Vector narrowing operation with two registers. +/// A vector widening operation with one argument. #[derive(Copy, Clone, Debug, PartialEq, Eq, Hash)] -pub enum VecMiscNarrowOp { - /// Extract Narrow - Xtn, - /// Signed saturating extract narrow - Sqxtn, - /// Signed saturating extract unsigned narrow - Sqxtun, +pub enum VecRRLongOp { + /// Floating-point convert to higher precision long, 16-bit elements + Fcvtl16, + /// Floating-point convert to higher precision long, 32-bit elements + Fcvtl32, + /// Shift left long (by element size), 8-bit elements + Shll8, + /// Shift left long (by element size), 16-bit elements + Shll16, + /// Shift left long (by element size), 32-bit elements + Shll32, +} + +/// A vector narrowing operation with one argument. +#[derive(Copy, Clone, Debug, PartialEq, Eq, Hash)] +pub enum VecRRNarrowOp { + /// Extract narrow, 16-bit elements + Xtn16, + /// Extract narrow, 32-bit elements + Xtn32, + /// Extract narrow, 64-bit elements + Xtn64, + /// Signed saturating extract narrow, 16-bit elements + Sqxtn16, + /// Signed saturating extract narrow, 32-bit elements + Sqxtn32, + /// Signed saturating extract narrow, 64-bit elements + Sqxtn64, + /// Signed saturating extract unsigned narrow, 16-bit elements + Sqxtun16, + /// Signed saturating extract unsigned narrow, 32-bit elements + Sqxtun32, + /// Signed saturating extract unsigned narrow, 64-bit elements + Sqxtun64, + /// Unsigned saturating extract narrow, 16-bit elements + Uqxtn16, + /// Unsigned saturating extract narrow, 32-bit elements + Uqxtn32, + /// Unsigned saturating extract narrow, 64-bit elements + Uqxtn64, } /// A vector operation on a pair of elements with one register. @@ -1029,12 +1060,19 @@ pub enum Inst { size: VectorSize, }, + /// Vector widening operation. + VecRRLong { + op: VecRRLongOp, + rd: Writable, + rn: Reg, + high_half: bool, + }, + /// Vector narrowing operation. - VecMiscNarrow { - op: VecMiscNarrowOp, + VecRRNarrow { + op: VecRRNarrowOp, rd: Writable, rn: Reg, - size: VectorSize, high_half: bool, }, @@ -2073,7 +2111,11 @@ fn aarch64_get_regs(inst: &Inst, collector: &mut RegUsageCollector) { collector.add_mod(rd); collector.add_use(rn); } - &Inst::VecMiscNarrow { + &Inst::VecRRLong { rd, rn, .. } => { + collector.add_def(rd); + collector.add_use(rn); + } + &Inst::VecRRNarrow { rd, rn, high_half, .. } => { collector.add_use(rn); @@ -2868,7 +2910,15 @@ fn aarch64_map_regs(inst: &mut Inst, mapper: &RUM) { map_mod(mapper, rd); map_use(mapper, rn); } - &mut Inst::VecMiscNarrow { + &mut Inst::VecRRLong { + ref mut rd, + ref mut rn, + .. + } => { + map_def(mapper, rd); + map_use(mapper, rn); + } + &mut Inst::VecRRNarrow { ref mut rd, ref mut rn, high_half, @@ -3901,29 +3951,132 @@ impl Inst { let rn = show_vreg_element(rn, mb_rru, src_idx, size); format!("mov {}, {}", rd, rn) } - &Inst::VecMiscNarrow { + &Inst::VecRRLong { op, rd, rn, - size, high_half, } => { - let dest_size = if high_half { - assert!(size.is_128bits()); - size - } else { - size.halve() + let (op, rd_size, size, suffix) = match (op, high_half) { + (VecRRLongOp::Fcvtl16, false) => { + ("fcvtl", VectorSize::Size32x4, VectorSize::Size16x4, "") + } + (VecRRLongOp::Fcvtl16, true) => { + ("fcvtl2", VectorSize::Size32x4, VectorSize::Size16x8, "") + } + (VecRRLongOp::Fcvtl32, false) => { + ("fcvtl", VectorSize::Size64x2, VectorSize::Size32x2, "") + } + (VecRRLongOp::Fcvtl32, true) => { + ("fcvtl2", VectorSize::Size64x2, VectorSize::Size32x4, "") + } + (VecRRLongOp::Shll8, false) => { + ("shll", VectorSize::Size16x8, VectorSize::Size8x8, ", #8") + } + (VecRRLongOp::Shll8, true) => { + ("shll2", VectorSize::Size16x8, VectorSize::Size8x16, ", #8") + } + (VecRRLongOp::Shll16, false) => { + ("shll", VectorSize::Size32x4, VectorSize::Size16x4, ", #16") + } + (VecRRLongOp::Shll16, true) => { + ("shll2", VectorSize::Size32x4, VectorSize::Size16x8, ", #16") + } + (VecRRLongOp::Shll32, false) => { + ("shll", VectorSize::Size64x2, VectorSize::Size32x2, ", #32") + } + (VecRRLongOp::Shll32, true) => { + ("shll2", VectorSize::Size64x2, VectorSize::Size32x4, ", #32") + } }; - let rd = show_vreg_vector(rd.to_reg(), mb_rru, dest_size); - let rn = show_vreg_vector(rn, mb_rru, size.widen()); - let op = match (op, high_half) { - (VecMiscNarrowOp::Xtn, false) => "xtn", - (VecMiscNarrowOp::Xtn, true) => "xtn2", - (VecMiscNarrowOp::Sqxtn, false) => "sqxtn", - (VecMiscNarrowOp::Sqxtn, true) => "sqxtn2", - (VecMiscNarrowOp::Sqxtun, false) => "sqxtun", - (VecMiscNarrowOp::Sqxtun, true) => "sqxtun2", + let rd = show_vreg_vector(rd.to_reg(), mb_rru, rd_size); + let rn = show_vreg_vector(rn, mb_rru, size); + + format!("{} {}, {}{}", op, rd, rn, suffix) + } + &Inst::VecRRNarrow { + op, + rd, + rn, + high_half, + } => { + let (op, rd_size, size) = match (op, high_half) { + (VecRRNarrowOp::Xtn16, false) => { + ("xtn", VectorSize::Size8x8, VectorSize::Size16x8) + } + (VecRRNarrowOp::Xtn16, true) => { + ("xtn2", VectorSize::Size8x16, VectorSize::Size16x8) + } + (VecRRNarrowOp::Xtn32, false) => { + ("xtn", VectorSize::Size16x4, VectorSize::Size32x4) + } + (VecRRNarrowOp::Xtn32, true) => { + ("xtn2", VectorSize::Size16x8, VectorSize::Size32x4) + } + (VecRRNarrowOp::Xtn64, false) => { + ("xtn", VectorSize::Size32x2, VectorSize::Size64x2) + } + (VecRRNarrowOp::Xtn64, true) => { + ("xtn2", VectorSize::Size32x4, VectorSize::Size64x2) + } + (VecRRNarrowOp::Sqxtn16, false) => { + ("sqxtn", VectorSize::Size8x8, VectorSize::Size16x8) + } + (VecRRNarrowOp::Sqxtn16, true) => { + ("sqxtn2", VectorSize::Size8x16, VectorSize::Size16x8) + } + (VecRRNarrowOp::Sqxtn32, false) => { + ("sqxtn", VectorSize::Size16x4, VectorSize::Size32x4) + } + (VecRRNarrowOp::Sqxtn32, true) => { + ("sqxtn2", VectorSize::Size16x8, VectorSize::Size32x4) + } + (VecRRNarrowOp::Sqxtn64, false) => { + ("sqxtn", VectorSize::Size32x2, VectorSize::Size64x2) + } + (VecRRNarrowOp::Sqxtn64, true) => { + ("sqxtn2", VectorSize::Size32x4, VectorSize::Size64x2) + } + (VecRRNarrowOp::Sqxtun16, false) => { + ("sqxtun", VectorSize::Size8x8, VectorSize::Size16x8) + } + (VecRRNarrowOp::Sqxtun16, true) => { + ("sqxtun2", VectorSize::Size8x16, VectorSize::Size16x8) + } + (VecRRNarrowOp::Sqxtun32, false) => { + ("sqxtun", VectorSize::Size16x4, VectorSize::Size32x4) + } + (VecRRNarrowOp::Sqxtun32, true) => { + ("sqxtun2", VectorSize::Size16x8, VectorSize::Size32x4) + } + (VecRRNarrowOp::Sqxtun64, false) => { + ("sqxtun", VectorSize::Size32x2, VectorSize::Size64x2) + } + (VecRRNarrowOp::Sqxtun64, true) => { + ("sqxtun2", VectorSize::Size32x4, VectorSize::Size64x2) + } + (VecRRNarrowOp::Uqxtn16, false) => { + ("uqxtn", VectorSize::Size8x8, VectorSize::Size16x8) + } + (VecRRNarrowOp::Uqxtn16, true) => { + ("uqxtn2", VectorSize::Size8x16, VectorSize::Size16x8) + } + (VecRRNarrowOp::Uqxtn32, false) => { + ("uqxtn", VectorSize::Size16x4, VectorSize::Size32x4) + } + (VecRRNarrowOp::Uqxtn32, true) => { + ("uqxtn2", VectorSize::Size16x8, VectorSize::Size32x4) + } + (VecRRNarrowOp::Uqxtn64, false) => { + ("uqxtn", VectorSize::Size32x2, VectorSize::Size64x2) + } + (VecRRNarrowOp::Uqxtn64, true) => { + ("uqxtn2", VectorSize::Size32x4, VectorSize::Size64x2) + } }; + let rd = show_vreg_vector(rd.to_reg(), mb_rru, rd_size); + let rn = show_vreg_vector(rn, mb_rru, size); + format!("{} {}, {}", op, rd, rn) } &Inst::VecRRPair { op, rd, rn } => { @@ -3999,45 +4152,34 @@ impl Inst { format!("{} {}, {}, {}", op, rd, rn, rm) } &Inst::VecMisc { op, rd, rn, size } => { - let (op, rd_size, size, suffix) = match op { - VecMisc2::Not => { - let size = if size.is_128bits() { + let (op, size, suffix) = match op { + VecMisc2::Not => ( + "mvn", + if size.is_128bits() { VectorSize::Size8x16 } else { VectorSize::Size8x8 - }; - - ("mvn", size, size, "") - } - VecMisc2::Neg => ("neg", size, size, ""), - VecMisc2::Abs => ("abs", size, size, ""), - VecMisc2::Fabs => ("fabs", size, size, ""), - VecMisc2::Fneg => ("fneg", size, size, ""), - VecMisc2::Fsqrt => ("fsqrt", size, size, ""), - VecMisc2::Rev64 => ("rev64", size, size, ""), - VecMisc2::Shll => ( - "shll", - size.widen(), - size, - match size { - VectorSize::Size8x8 => ", #8", - VectorSize::Size16x4 => ", #16", - VectorSize::Size32x2 => ", #32", - _ => panic!("Unexpected vector size: {:?}", size), }, + "", ), - VecMisc2::Fcvtzs => ("fcvtzs", size, size, ""), - VecMisc2::Fcvtzu => ("fcvtzu", size, size, ""), - VecMisc2::Scvtf => ("scvtf", size, size, ""), - VecMisc2::Ucvtf => ("ucvtf", size, size, ""), - VecMisc2::Frintn => ("frintn", size, size, ""), - VecMisc2::Frintz => ("frintz", size, size, ""), - VecMisc2::Frintm => ("frintm", size, size, ""), - VecMisc2::Frintp => ("frintp", size, size, ""), - VecMisc2::Cnt => ("cnt", size, size, ""), - VecMisc2::Cmeq0 => ("cmeq", size, size, ", #0"), + VecMisc2::Neg => ("neg", size, ""), + VecMisc2::Abs => ("abs", size, ""), + VecMisc2::Fabs => ("fabs", size, ""), + VecMisc2::Fneg => ("fneg", size, ""), + VecMisc2::Fsqrt => ("fsqrt", size, ""), + VecMisc2::Rev64 => ("rev64", size, ""), + VecMisc2::Fcvtzs => ("fcvtzs", size, ""), + VecMisc2::Fcvtzu => ("fcvtzu", size, ""), + VecMisc2::Scvtf => ("scvtf", size, ""), + VecMisc2::Ucvtf => ("ucvtf", size, ""), + VecMisc2::Frintn => ("frintn", size, ""), + VecMisc2::Frintz => ("frintz", size, ""), + VecMisc2::Frintm => ("frintm", size, ""), + VecMisc2::Frintp => ("frintp", size, ""), + VecMisc2::Cnt => ("cnt", size, ""), + VecMisc2::Cmeq0 => ("cmeq", size, ", #0"), }; - let rd = show_vreg_vector(rd.to_reg(), mb_rru, rd_size); + let rd = show_vreg_vector(rd.to_reg(), mb_rru, size); let rn = show_vreg_vector(rn, mb_rru, size); format!("{} {}, {}{}", op, rd, rn, suffix) } diff --git a/cranelift/codegen/src/isa/aarch64/lower_inst.rs b/cranelift/codegen/src/isa/aarch64/lower_inst.rs index 6a5b70351c..8c46602cbd 100644 --- a/cranelift/codegen/src/isa/aarch64/lower_inst.rs +++ b/cranelift/codegen/src/isa/aarch64/lower_inst.rs @@ -365,11 +365,10 @@ pub(crate) fn lower_insn_to_regs>( // Extract the low half components of rn. // tmp1 = |c|a| - ctx.emit(Inst::VecMiscNarrow { - op: VecMiscNarrowOp::Xtn, + ctx.emit(Inst::VecRRNarrow { + op: VecRRNarrowOp::Xtn64, rd: tmp1, rn, - size: VectorSize::Size32x2, high_half: false, }); @@ -385,21 +384,20 @@ pub(crate) fn lower_insn_to_regs>( // Extract the low half components of rm. // tmp2 = |g|e| - ctx.emit(Inst::VecMiscNarrow { - op: VecMiscNarrowOp::Xtn, + ctx.emit(Inst::VecRRNarrow { + op: VecRRNarrowOp::Xtn64, rd: tmp2, rn: rm, - size: VectorSize::Size32x2, high_half: false, }); // Shift the high half components, into the high half. // rd = |dg+ch << 32|be+af << 32| - ctx.emit(Inst::VecMisc { - op: VecMisc2::Shll, + ctx.emit(Inst::VecRRLong { + op: VecRRLongOp::Shll32, rd, rn: rd.to_reg(), - size: VectorSize::Size32x2, + high_half: false, }); // Multiply the low components together, and accumulate with the high @@ -3439,31 +3437,48 @@ pub(crate) fn lower_insn_to_regs>( }); } - Opcode::Snarrow | Opcode::Unarrow => { - let op = if op == Opcode::Snarrow { - VecMiscNarrowOp::Sqxtn - } else { - VecMiscNarrowOp::Sqxtun + Opcode::Snarrow | Opcode::Unarrow | Opcode::Uunarrow => { + let nonzero_high_half = maybe_input_insn(ctx, inputs[1], Opcode::Vconst) + .map_or(true, |insn| { + const_param_to_u128(ctx, insn).expect("Invalid immediate bytes") != 0 + }); + let op = match (op, ty.unwrap().lane_type()) { + (Opcode::Snarrow, I8) => VecRRNarrowOp::Sqxtn16, + (Opcode::Snarrow, I16) => VecRRNarrowOp::Sqxtn32, + (Opcode::Snarrow, I32) => VecRRNarrowOp::Sqxtn64, + (Opcode::Unarrow, I8) => VecRRNarrowOp::Sqxtun16, + (Opcode::Unarrow, I16) => VecRRNarrowOp::Sqxtun32, + (Opcode::Unarrow, I32) => VecRRNarrowOp::Sqxtun64, + (Opcode::Uunarrow, I8) => VecRRNarrowOp::Uqxtn16, + (Opcode::Uunarrow, I16) => VecRRNarrowOp::Uqxtn32, + (Opcode::Uunarrow, I32) => VecRRNarrowOp::Uqxtn64, + (_, lane_type) => { + return Err(CodegenError::Unsupported(format!( + "Unsupported SIMD vector lane type: {:?}", + lane_type + ))) + } }; let rd = get_output_reg(ctx, outputs[0]).only_reg().unwrap(); let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None); - let rn2 = put_input_in_reg(ctx, inputs[1], NarrowValueMode::None); - let ty = ty.unwrap(); - ctx.emit(Inst::VecMiscNarrow { + ctx.emit(Inst::VecRRNarrow { op, rd, rn, - size: VectorSize::from_ty(ty), high_half: false, }); - ctx.emit(Inst::VecMiscNarrow { - op, - rd, - rn: rn2, - size: VectorSize::from_ty(ty), - high_half: true, - }); + + if nonzero_high_half { + let rn = put_input_in_reg(ctx, inputs[1], NarrowValueMode::None); + + ctx.emit(Inst::VecRRNarrow { + op, + rd, + rn, + high_half: true, + }); + } } Opcode::SwidenLow | Opcode::SwidenHigh | Opcode::UwidenLow | Opcode::UwidenHigh => { diff --git a/cranelift/codegen/src/isa/s390x/lower.rs b/cranelift/codegen/src/isa/s390x/lower.rs index 188d7884a3..8ab66add04 100644 --- a/cranelift/codegen/src/isa/s390x/lower.rs +++ b/cranelift/codegen/src/isa/s390x/lower.rs @@ -2860,6 +2860,7 @@ fn lower_insn_to_regs>( | Opcode::ScalarToVector | Opcode::Snarrow | Opcode::Unarrow + | Opcode::Uunarrow | Opcode::SwidenLow | Opcode::SwidenHigh | Opcode::UwidenLow diff --git a/cranelift/codegen/src/isa/x64/lower.rs b/cranelift/codegen/src/isa/x64/lower.rs index 1cf1da4e9b..ad74062e7e 100644 --- a/cranelift/codegen/src/isa/x64/lower.rs +++ b/cranelift/codegen/src/isa/x64/lower.rs @@ -6001,7 +6001,9 @@ fn lower_insn_to_regs>( unimplemented!("Vector split/concat ops not implemented."); } - Opcode::SqmulRoundSat => unimplemented!("unimplemented lowering for opcode {:?}", op), + Opcode::SqmulRoundSat | Opcode::Uunarrow => { + unimplemented!("unimplemented lowering for opcode {:?}", op) + } // Opcodes that should be removed by legalization. These should // eventually be removed if/when we replace in-situ legalization with diff --git a/cranelift/codegen/src/preopt.serialized b/cranelift/codegen/src/preopt.serialized index a0d55ca753..95e9f3e2b9 100644 Binary files a/cranelift/codegen/src/preopt.serialized and b/cranelift/codegen/src/preopt.serialized differ diff --git a/cranelift/interpreter/src/step.rs b/cranelift/interpreter/src/step.rs index 76ec43a814..c6ecaf50ec 100644 --- a/cranelift/interpreter/src/step.rs +++ b/cranelift/interpreter/src/step.rs @@ -530,6 +530,7 @@ where arg(0)?, ValueConversionKind::Truncate(ctrl_ty), )?), + Opcode::Uunarrow => unimplemented!("Uunarrow"), Opcode::Uextend => assign(Value::convert( arg(0)?, ValueConversionKind::ZeroExtend(ctrl_ty), diff --git a/cranelift/wasm/src/code_translator.rs b/cranelift/wasm/src/code_translator.rs index 259d301df2..6741de2c64 100644 --- a/cranelift/wasm/src/code_translator.rs +++ b/cranelift/wasm/src/code_translator.rs @@ -1790,10 +1790,26 @@ pub fn translate_operator( let a = pop1_with_bitcast(state, F32X4, builder); state.push1(builder.ins().fcvt_to_sint_sat(I32X4, a)) } + Operator::I32x4TruncSatF64x2SZero => { + let a = pop1_with_bitcast(state, F64X2, builder); + let converted_a = builder.ins().fcvt_to_sint_sat(I64X2, a); + let handle = builder.func.dfg.constants.insert(vec![0u8; 16].into()); + let zero = builder.ins().vconst(I64X2, handle); + + state.push1(builder.ins().snarrow(converted_a, zero)); + } Operator::I32x4TruncSatF32x4U => { let a = pop1_with_bitcast(state, F32X4, builder); state.push1(builder.ins().fcvt_to_uint_sat(I32X4, a)) } + Operator::I32x4TruncSatF64x2UZero => { + let a = pop1_with_bitcast(state, F64X2, builder); + let converted_a = builder.ins().fcvt_to_uint_sat(I64X2, a); + let handle = builder.func.dfg.constants.insert(vec![0u8; 16].into()); + let zero = builder.ins().vconst(I64X2, handle); + + state.push1(builder.ins().uunarrow(converted_a, zero)); + } Operator::I8x16NarrowI16x8S => { let (a, b) = pop2_with_bitcast(state, I16X8, builder); state.push1(builder.ins().snarrow(a, b)) @@ -1906,9 +1922,7 @@ pub fn translate_operator( | Operator::I16x8ExtAddPairwiseI8x16U | Operator::I32x4ExtAddPairwiseI16x8S | Operator::I32x4ExtAddPairwiseI16x8U - | Operator::F64x2ConvertLowI32x4U - | Operator::I32x4TruncSatF64x2SZero - | Operator::I32x4TruncSatF64x2UZero => { + | Operator::F64x2ConvertLowI32x4U => { return Err(wasm_unsupported!("proposed simd operator {:?}", op)); } Operator::ReturnCall { .. } | Operator::ReturnCallIndirect { .. } => {