cranelift: Fix `v{all,any}_true` and `vhigh_bits` instructions in the interpreter (#6985)

* cranelift: Implement `vall_true` for floats in the interpreter * cranelift: Implement `vany_true` for floats in the interpreter * cranelift: Implement `vhigh_bits` for floats in the interpreter * cranelift: Forbid vector return types for `vhigh_bits` This instruction doesen't really make sense with a vector return type. The description also states that it returns a scalar integer so I suspect it wasn't intended to allow vector integers. * fuzzgen: Enable `v{all,any}_true` and `vhigh_bits`
1 year ago · d8db07faf6
6 changed files with 81 additions and 60 deletions
--- a/cranelift/codegen/meta/src/shared/instructions.rs
+++ b/cranelift/codegen/meta/src/shared/instructions.rs
@ -1591,7 +1591,7 @@ pub(crate) fn define(
            &formats.unary,
        )
        .operands_in(vec![Operand::new("a", TxN)])
-        .operands_out(vec![Operand::new("x", Int)]),
+        .operands_out(vec![Operand::new("x", NarrowInt)]),
    );

    ig.push(
--- a/cranelift/filetests/filetests/runtests/simd-valltrue.clif
+++ b/cranelift/filetests/filetests/runtests/simd-valltrue.clif
@ -52,3 +52,23 @@ block0(v0: i64x2):
 ; run: %vall_true_i64x2([-1 0]) == 0
 ; run: %vall_true_i64x2([-1 -1]) == 1
 ; run: %vall_true_i64x2([0xffffffff_00000000 -1]) == 1
+
+function %vall_true_f32x4(f32x4) -> i8 {
+block0(v0: f32x4):
+    v1 = vall_true v0
+    return v1
+}
+; run: %vall_true_f32x4([0.0 0.0 0.0 0.0]) == 0
+; run: %vall_true_f32x4([0.0 -0.0 0.0 0.0]) == 0
+; run: %vall_true_f32x4([-0.0 -0.0 -0.0 -0.0]) == 1
+; run: %vall_true_f32x4([0x1.0 0x1.0 0x1.0 0x1.0]) == 1
+
+function %vall_true_f64x2(f64x2) -> i8 {
+block0(v0: f64x2):
+    v1 = vall_true v0
+    return v1
+}
+; run: %vall_true_f64x2([0.0 0.0]) == 0
+; run: %vall_true_f64x2([0.0 -0.0]) == 0
+; run: %vall_true_f64x2([-0.0 -0.0]) == 1
+; run: %vall_true_f64x2([0x1.0 0x1.0]) == 1
--- a/cranelift/filetests/filetests/runtests/simd-vanytrue.clif
+++ b/cranelift/filetests/filetests/runtests/simd-vanytrue.clif
@ -45,3 +45,24 @@ block0(v0: i64x2):
 ; run: %vany_true_i64x2([0 0]) == 0
 ; run: %vany_true_i64x2([-1 0]) == 1
 ; run: %vany_true_i64x2([-1 -1]) == 1
+
+function %vany_true_f32x4(f32x4) -> i8 {
+block0(v0: f32x4):
+    v1 = vany_true v0
+    return v1
+}
+; run: %vany_true_f32x4([0.0 0.0 0.0 0.0]) == 0
+; run: %vany_true_f32x4([0.0 -0.0 0.0 0.0]) == 1
+; run: %vany_true_f32x4([-0.0 -0.0 -0.0 -0.0]) == 1
+; run: %vany_true_f32x4([0x1.0 0x1.0 0x1.0 0x1.0]) == 1
+
+
+function %vany_true_f64x2(f64x2) -> i8 {
+block0(v0: f64x2):
+    v1 = vany_true v0
+    return v1
+}
+; run: %vany_true_f64x2([0.0 0.0]) == 0
+; run: %vany_true_f64x2([0.0 -0.0]) == 1
+; run: %vany_true_f64x2([-0.0 -0.0]) == 1
+; run: %vany_true_f64x2([0x1.0 0x1.0]) == 1
--- a/cranelift/filetests/filetests/runtests/simd-vhighbits-float.clif
+++ b/cranelift/filetests/filetests/runtests/simd-vhighbits-float.clif
@ -0,0 +1,26 @@
+test interpret
+test run
+target s390x
+target x86_64 has_sse3 has_ssse3 has_sse41
+target x86_64 has_sse3 has_ssse3 has_sse41 has_avx
+target riscv64gc has_v
+
+function %vhighbits_f32x4(f32x4) -> i8 {
+block0(v0: f32x4):
+    v1 = vhigh_bits.i8 v0
+    return v1
+}
+; run: %vhighbits_f32x4([0.0 0.0 0.0 0.0]) == 0
+; run: %vhighbits_f32x4([0.0 -0.0 0.0 0.0]) == 2
+; run: %vhighbits_f32x4([-0.0 -0.0 -0.0 -0.0]) == 0xF
+; run: %vhighbits_f32x4([0x1.0 0x1.0 0x1.0 0x1.0]) == 0
+
+function %vhighbits_f64x2(f64x2) -> i8 {
+block0(v0: f64x2):
+    v1 = vhigh_bits.i8 v0
+    return v1
+}
+; run: %vhighbits_f64x2([0.0 0.0]) == 0
+; run: %vhighbits_f64x2([0.0 -0.0]) == 2
+; run: %vhighbits_f64x2([-0.0 -0.0]) == 3
+; run: %vhighbits_f64x2([0x1.0 0x1.0]) == 0
--- a/cranelift/fuzzgen/src/function_generator.rs
+++ b/cranelift/fuzzgen/src/function_generator.rs
@ -670,6 +670,7 @@ fn valid_for_target(triple: &Triple, op: Opcode, args: &[Type], rets: &[Type]) -
                ),
                // TODO
                (Opcode::Bitselect, &[_, _, _], &[F32 | F64]),
+                (Opcode::VhighBits, &[F32X4 | F64X2]),
            )
        }

@ -896,7 +897,6 @@ static OPCODE_SIGNATURES: Lazy<Vec<OpcodeSignature>> = Lazy::new(|| {
                (Opcode::TableAddr),
                (Opcode::Null),
                (Opcode::X86Blendv),
-                (Opcode::VallTrue),
                (Opcode::IcmpImm),
                (Opcode::X86Pmulhrsw),
                (Opcode::IaddImm),
@ -924,58 +924,6 @@ static OPCODE_SIGNATURES: Lazy<Vec<OpcodeSignature>> = Lazy::new(|| {
                (Opcode::ScalarToVector),
                (Opcode::X86Pmaddubsw),
                (Opcode::X86Cvtt2dq),
-                (Opcode::VanyTrue, &[F32X4], &[I8]),
-                (Opcode::VanyTrue, &[F64X2], &[I8]),
-                (Opcode::VhighBits, &[F32X4], &[I8]),
-                (Opcode::VhighBits, &[F64X2], &[I8]),
-                (Opcode::VhighBits, &[I8X16], &[I16]),
-                (Opcode::VhighBits, &[I16X8], &[I16]),
-                (Opcode::VhighBits, &[I32X4], &[I16]),
-                (Opcode::VhighBits, &[I64X2], &[I16]),
-                (Opcode::VhighBits, &[F32X4], &[I16]),
-                (Opcode::VhighBits, &[F64X2], &[I16]),
-                (Opcode::VhighBits, &[I8X16], &[I32]),
-                (Opcode::VhighBits, &[I16X8], &[I32]),
-                (Opcode::VhighBits, &[I32X4], &[I32]),
-                (Opcode::VhighBits, &[I64X2], &[I32]),
-                (Opcode::VhighBits, &[F32X4], &[I32]),
-                (Opcode::VhighBits, &[F64X2], &[I32]),
-                (Opcode::VhighBits, &[I8X16], &[I64]),
-                (Opcode::VhighBits, &[I16X8], &[I64]),
-                (Opcode::VhighBits, &[I32X4], &[I64]),
-                (Opcode::VhighBits, &[I64X2], &[I64]),
-                (Opcode::VhighBits, &[F32X4], &[I64]),
-                (Opcode::VhighBits, &[F64X2], &[I64]),
-                (Opcode::VhighBits, &[I8X16], &[I128]),
-                (Opcode::VhighBits, &[I16X8], &[I128]),
-                (Opcode::VhighBits, &[I32X4], &[I128]),
-                (Opcode::VhighBits, &[I64X2], &[I128]),
-                (Opcode::VhighBits, &[F32X4], &[I128]),
-                (Opcode::VhighBits, &[F64X2], &[I128]),
-                (Opcode::VhighBits, &[I8X16], &[I8X16]),
-                (Opcode::VhighBits, &[I16X8], &[I8X16]),
-                (Opcode::VhighBits, &[I32X4], &[I8X16]),
-                (Opcode::VhighBits, &[I64X2], &[I8X16]),
-                (Opcode::VhighBits, &[F32X4], &[I8X16]),
-                (Opcode::VhighBits, &[F64X2], &[I8X16]),
-                (Opcode::VhighBits, &[I8X16], &[I16X8]),
-                (Opcode::VhighBits, &[I16X8], &[I16X8]),
-                (Opcode::VhighBits, &[I32X4], &[I16X8]),
-                (Opcode::VhighBits, &[I64X2], &[I16X8]),
-                (Opcode::VhighBits, &[F32X4], &[I16X8]),
-                (Opcode::VhighBits, &[F64X2], &[I16X8]),
-                (Opcode::VhighBits, &[I8X16], &[I32X4]),
-                (Opcode::VhighBits, &[I16X8], &[I32X4]),
-                (Opcode::VhighBits, &[I32X4], &[I32X4]),
-                (Opcode::VhighBits, &[I64X2], &[I32X4]),
-                (Opcode::VhighBits, &[F32X4], &[I32X4]),
-                (Opcode::VhighBits, &[F64X2], &[I32X4]),
-                (Opcode::VhighBits, &[I8X16], &[I64X2]),
-                (Opcode::VhighBits, &[I16X8], &[I64X2]),
-                (Opcode::VhighBits, &[I32X4], &[I64X2]),
-                (Opcode::VhighBits, &[I64X2], &[I64X2]),
-                (Opcode::VhighBits, &[F32X4], &[I64X2]),
-                (Opcode::VhighBits, &[F64X2], &[I64X2]),
                (Opcode::Umulhi, &[I128, I128], &[I128]),
                (Opcode::Smulhi, &[I128, I128], &[I128]),
                // https://github.com/bytecodealliance/wasmtime/issues/6073
--- a/cranelift/interpreter/src/step.rs
+++ b/cranelift/interpreter/src/step.rs
@ -1009,7 +1009,10 @@ where
        Opcode::VhighBits => {
            // `ctrl_ty` controls the return type for this, so the input type
            // must be retrieved via `inst_context`.
-            let vector_type = inst_context.type_of(inst_context.args()[0]).unwrap();
+            let vector_type = inst_context
+                .type_of(inst_context.args()[0])
+                .unwrap()
+                .as_int();
            let a = extractlanes(&arg(0), vector_type)?;
            let mut result: u128 = 0;
            for (i, val) in a.into_iter().enumerate() {
@ -1019,15 +1022,18 @@ where
            assign(DataValueExt::int(result as i128, ctrl_ty)?)
        }
        Opcode::VanyTrue => {
-            let lane_ty = ctrl_ty.lane_type();
+            let simd_ty = ctrl_ty.as_int();
+            let lane_ty = simd_ty.lane_type();
            let init = DataValue::bool(false, true, lane_ty)?;
-            let any = fold_vector(arg(0), ctrl_ty, init.clone(), |acc, lane| acc.or(lane))?;
+            let any = fold_vector(arg(0), simd_ty, init.clone(), |acc, lane| acc.or(lane))?;
            assign(DataValue::bool(any != init, false, types::I8)?)
        }
        Opcode::VallTrue => assign(DataValue::bool(
-            !(arg(0).iter_lanes(ctrl_ty)?.try_fold(false, |acc, lane| {
-                Ok::<bool, ValueError>(acc | lane.is_zero()?)
-            })?),
+            !(arg(0)
+                .iter_lanes(ctrl_ty.as_int())?
+                .try_fold(false, |acc, lane| {
+                    Ok::<bool, ValueError>(acc | lane.is_zero()?)
+                })?),
            false,
            types::I8,
        )?),