From d8db07faf6620581168bd289c79fb08df39ac768 Mon Sep 17 00:00:00 2001 From: Afonso Bordado Date: Sat, 9 Sep 2023 18:42:53 +0100 Subject: [PATCH] cranelift: Fix `v{all,any}_true` and `vhigh_bits` instructions in the interpreter (#6985) * cranelift: Implement `vall_true` for floats in the interpreter * cranelift: Implement `vany_true` for floats in the interpreter * cranelift: Implement `vhigh_bits` for floats in the interpreter * cranelift: Forbid vector return types for `vhigh_bits` This instruction doesen't really make sense with a vector return type. The description also states that it returns a scalar integer so I suspect it wasn't intended to allow vector integers. * fuzzgen: Enable `v{all,any}_true` and `vhigh_bits` --- .../codegen/meta/src/shared/instructions.rs | 2 +- .../filetests/runtests/simd-valltrue.clif | 20 +++++++ .../filetests/runtests/simd-vanytrue.clif | 21 ++++++++ .../runtests/simd-vhighbits-float.clif | 26 +++++++++ cranelift/fuzzgen/src/function_generator.rs | 54 +------------------ cranelift/interpreter/src/step.rs | 18 ++++--- 6 files changed, 81 insertions(+), 60 deletions(-) create mode 100644 cranelift/filetests/filetests/runtests/simd-vhighbits-float.clif diff --git a/cranelift/codegen/meta/src/shared/instructions.rs b/cranelift/codegen/meta/src/shared/instructions.rs index 89aa5bb0c3..6727d9c8c3 100644 --- a/cranelift/codegen/meta/src/shared/instructions.rs +++ b/cranelift/codegen/meta/src/shared/instructions.rs @@ -1591,7 +1591,7 @@ pub(crate) fn define( &formats.unary, ) .operands_in(vec![Operand::new("a", TxN)]) - .operands_out(vec![Operand::new("x", Int)]), + .operands_out(vec![Operand::new("x", NarrowInt)]), ); ig.push( diff --git a/cranelift/filetests/filetests/runtests/simd-valltrue.clif b/cranelift/filetests/filetests/runtests/simd-valltrue.clif index 870687b770..436ad476f3 100644 --- a/cranelift/filetests/filetests/runtests/simd-valltrue.clif +++ b/cranelift/filetests/filetests/runtests/simd-valltrue.clif @@ -52,3 +52,23 @@ block0(v0: i64x2): ; run: %vall_true_i64x2([-1 0]) == 0 ; run: %vall_true_i64x2([-1 -1]) == 1 ; run: %vall_true_i64x2([0xffffffff_00000000 -1]) == 1 + +function %vall_true_f32x4(f32x4) -> i8 { +block0(v0: f32x4): + v1 = vall_true v0 + return v1 +} +; run: %vall_true_f32x4([0.0 0.0 0.0 0.0]) == 0 +; run: %vall_true_f32x4([0.0 -0.0 0.0 0.0]) == 0 +; run: %vall_true_f32x4([-0.0 -0.0 -0.0 -0.0]) == 1 +; run: %vall_true_f32x4([0x1.0 0x1.0 0x1.0 0x1.0]) == 1 + +function %vall_true_f64x2(f64x2) -> i8 { +block0(v0: f64x2): + v1 = vall_true v0 + return v1 +} +; run: %vall_true_f64x2([0.0 0.0]) == 0 +; run: %vall_true_f64x2([0.0 -0.0]) == 0 +; run: %vall_true_f64x2([-0.0 -0.0]) == 1 +; run: %vall_true_f64x2([0x1.0 0x1.0]) == 1 diff --git a/cranelift/filetests/filetests/runtests/simd-vanytrue.clif b/cranelift/filetests/filetests/runtests/simd-vanytrue.clif index cc9389a507..7095523975 100644 --- a/cranelift/filetests/filetests/runtests/simd-vanytrue.clif +++ b/cranelift/filetests/filetests/runtests/simd-vanytrue.clif @@ -45,3 +45,24 @@ block0(v0: i64x2): ; run: %vany_true_i64x2([0 0]) == 0 ; run: %vany_true_i64x2([-1 0]) == 1 ; run: %vany_true_i64x2([-1 -1]) == 1 + +function %vany_true_f32x4(f32x4) -> i8 { +block0(v0: f32x4): + v1 = vany_true v0 + return v1 +} +; run: %vany_true_f32x4([0.0 0.0 0.0 0.0]) == 0 +; run: %vany_true_f32x4([0.0 -0.0 0.0 0.0]) == 1 +; run: %vany_true_f32x4([-0.0 -0.0 -0.0 -0.0]) == 1 +; run: %vany_true_f32x4([0x1.0 0x1.0 0x1.0 0x1.0]) == 1 + + +function %vany_true_f64x2(f64x2) -> i8 { +block0(v0: f64x2): + v1 = vany_true v0 + return v1 +} +; run: %vany_true_f64x2([0.0 0.0]) == 0 +; run: %vany_true_f64x2([0.0 -0.0]) == 1 +; run: %vany_true_f64x2([-0.0 -0.0]) == 1 +; run: %vany_true_f64x2([0x1.0 0x1.0]) == 1 diff --git a/cranelift/filetests/filetests/runtests/simd-vhighbits-float.clif b/cranelift/filetests/filetests/runtests/simd-vhighbits-float.clif new file mode 100644 index 0000000000..228e5cf060 --- /dev/null +++ b/cranelift/filetests/filetests/runtests/simd-vhighbits-float.clif @@ -0,0 +1,26 @@ +test interpret +test run +target s390x +target x86_64 has_sse3 has_ssse3 has_sse41 +target x86_64 has_sse3 has_ssse3 has_sse41 has_avx +target riscv64gc has_v + +function %vhighbits_f32x4(f32x4) -> i8 { +block0(v0: f32x4): + v1 = vhigh_bits.i8 v0 + return v1 +} +; run: %vhighbits_f32x4([0.0 0.0 0.0 0.0]) == 0 +; run: %vhighbits_f32x4([0.0 -0.0 0.0 0.0]) == 2 +; run: %vhighbits_f32x4([-0.0 -0.0 -0.0 -0.0]) == 0xF +; run: %vhighbits_f32x4([0x1.0 0x1.0 0x1.0 0x1.0]) == 0 + +function %vhighbits_f64x2(f64x2) -> i8 { +block0(v0: f64x2): + v1 = vhigh_bits.i8 v0 + return v1 +} +; run: %vhighbits_f64x2([0.0 0.0]) == 0 +; run: %vhighbits_f64x2([0.0 -0.0]) == 2 +; run: %vhighbits_f64x2([-0.0 -0.0]) == 3 +; run: %vhighbits_f64x2([0x1.0 0x1.0]) == 0 diff --git a/cranelift/fuzzgen/src/function_generator.rs b/cranelift/fuzzgen/src/function_generator.rs index 4ef6432052..a264155cad 100644 --- a/cranelift/fuzzgen/src/function_generator.rs +++ b/cranelift/fuzzgen/src/function_generator.rs @@ -670,6 +670,7 @@ fn valid_for_target(triple: &Triple, op: Opcode, args: &[Type], rets: &[Type]) - ), // TODO (Opcode::Bitselect, &[_, _, _], &[F32 | F64]), + (Opcode::VhighBits, &[F32X4 | F64X2]), ) } @@ -896,7 +897,6 @@ static OPCODE_SIGNATURES: Lazy> = Lazy::new(|| { (Opcode::TableAddr), (Opcode::Null), (Opcode::X86Blendv), - (Opcode::VallTrue), (Opcode::IcmpImm), (Opcode::X86Pmulhrsw), (Opcode::IaddImm), @@ -924,58 +924,6 @@ static OPCODE_SIGNATURES: Lazy> = Lazy::new(|| { (Opcode::ScalarToVector), (Opcode::X86Pmaddubsw), (Opcode::X86Cvtt2dq), - (Opcode::VanyTrue, &[F32X4], &[I8]), - (Opcode::VanyTrue, &[F64X2], &[I8]), - (Opcode::VhighBits, &[F32X4], &[I8]), - (Opcode::VhighBits, &[F64X2], &[I8]), - (Opcode::VhighBits, &[I8X16], &[I16]), - (Opcode::VhighBits, &[I16X8], &[I16]), - (Opcode::VhighBits, &[I32X4], &[I16]), - (Opcode::VhighBits, &[I64X2], &[I16]), - (Opcode::VhighBits, &[F32X4], &[I16]), - (Opcode::VhighBits, &[F64X2], &[I16]), - (Opcode::VhighBits, &[I8X16], &[I32]), - (Opcode::VhighBits, &[I16X8], &[I32]), - (Opcode::VhighBits, &[I32X4], &[I32]), - (Opcode::VhighBits, &[I64X2], &[I32]), - (Opcode::VhighBits, &[F32X4], &[I32]), - (Opcode::VhighBits, &[F64X2], &[I32]), - (Opcode::VhighBits, &[I8X16], &[I64]), - (Opcode::VhighBits, &[I16X8], &[I64]), - (Opcode::VhighBits, &[I32X4], &[I64]), - (Opcode::VhighBits, &[I64X2], &[I64]), - (Opcode::VhighBits, &[F32X4], &[I64]), - (Opcode::VhighBits, &[F64X2], &[I64]), - (Opcode::VhighBits, &[I8X16], &[I128]), - (Opcode::VhighBits, &[I16X8], &[I128]), - (Opcode::VhighBits, &[I32X4], &[I128]), - (Opcode::VhighBits, &[I64X2], &[I128]), - (Opcode::VhighBits, &[F32X4], &[I128]), - (Opcode::VhighBits, &[F64X2], &[I128]), - (Opcode::VhighBits, &[I8X16], &[I8X16]), - (Opcode::VhighBits, &[I16X8], &[I8X16]), - (Opcode::VhighBits, &[I32X4], &[I8X16]), - (Opcode::VhighBits, &[I64X2], &[I8X16]), - (Opcode::VhighBits, &[F32X4], &[I8X16]), - (Opcode::VhighBits, &[F64X2], &[I8X16]), - (Opcode::VhighBits, &[I8X16], &[I16X8]), - (Opcode::VhighBits, &[I16X8], &[I16X8]), - (Opcode::VhighBits, &[I32X4], &[I16X8]), - (Opcode::VhighBits, &[I64X2], &[I16X8]), - (Opcode::VhighBits, &[F32X4], &[I16X8]), - (Opcode::VhighBits, &[F64X2], &[I16X8]), - (Opcode::VhighBits, &[I8X16], &[I32X4]), - (Opcode::VhighBits, &[I16X8], &[I32X4]), - (Opcode::VhighBits, &[I32X4], &[I32X4]), - (Opcode::VhighBits, &[I64X2], &[I32X4]), - (Opcode::VhighBits, &[F32X4], &[I32X4]), - (Opcode::VhighBits, &[F64X2], &[I32X4]), - (Opcode::VhighBits, &[I8X16], &[I64X2]), - (Opcode::VhighBits, &[I16X8], &[I64X2]), - (Opcode::VhighBits, &[I32X4], &[I64X2]), - (Opcode::VhighBits, &[I64X2], &[I64X2]), - (Opcode::VhighBits, &[F32X4], &[I64X2]), - (Opcode::VhighBits, &[F64X2], &[I64X2]), (Opcode::Umulhi, &[I128, I128], &[I128]), (Opcode::Smulhi, &[I128, I128], &[I128]), // https://github.com/bytecodealliance/wasmtime/issues/6073 diff --git a/cranelift/interpreter/src/step.rs b/cranelift/interpreter/src/step.rs index a8ae06f906..8f437587e1 100644 --- a/cranelift/interpreter/src/step.rs +++ b/cranelift/interpreter/src/step.rs @@ -1009,7 +1009,10 @@ where Opcode::VhighBits => { // `ctrl_ty` controls the return type for this, so the input type // must be retrieved via `inst_context`. - let vector_type = inst_context.type_of(inst_context.args()[0]).unwrap(); + let vector_type = inst_context + .type_of(inst_context.args()[0]) + .unwrap() + .as_int(); let a = extractlanes(&arg(0), vector_type)?; let mut result: u128 = 0; for (i, val) in a.into_iter().enumerate() { @@ -1019,15 +1022,18 @@ where assign(DataValueExt::int(result as i128, ctrl_ty)?) } Opcode::VanyTrue => { - let lane_ty = ctrl_ty.lane_type(); + let simd_ty = ctrl_ty.as_int(); + let lane_ty = simd_ty.lane_type(); let init = DataValue::bool(false, true, lane_ty)?; - let any = fold_vector(arg(0), ctrl_ty, init.clone(), |acc, lane| acc.or(lane))?; + let any = fold_vector(arg(0), simd_ty, init.clone(), |acc, lane| acc.or(lane))?; assign(DataValue::bool(any != init, false, types::I8)?) } Opcode::VallTrue => assign(DataValue::bool( - !(arg(0).iter_lanes(ctrl_ty)?.try_fold(false, |acc, lane| { - Ok::(acc | lane.is_zero()?) - })?), + !(arg(0) + .iter_lanes(ctrl_ty.as_int())? + .try_fold(false, |acc, lane| { + Ok::(acc | lane.is_zero()?) + })?), false, types::I8, )?),