Browse Source

cranelift: Fix `v{all,any}_true` and `vhigh_bits` instructions in the interpreter (#6985)

* cranelift: Implement `vall_true` for floats in the interpreter

* cranelift: Implement `vany_true` for floats in the interpreter

* cranelift: Implement `vhigh_bits` for floats in the interpreter

* cranelift: Forbid vector return types for `vhigh_bits`

This instruction doesen't really make sense with a vector return type.
The description also states that it returns a scalar integer so I suspect
it wasn't intended to allow vector integers.

* fuzzgen: Enable `v{all,any}_true` and `vhigh_bits`
pull/6999/head
Afonso Bordado 1 year ago
committed by GitHub
parent
commit
d8db07faf6
No known key found for this signature in database GPG Key ID: 4AEE18F83AFDEB23
  1. 2
      cranelift/codegen/meta/src/shared/instructions.rs
  2. 20
      cranelift/filetests/filetests/runtests/simd-valltrue.clif
  3. 21
      cranelift/filetests/filetests/runtests/simd-vanytrue.clif
  4. 26
      cranelift/filetests/filetests/runtests/simd-vhighbits-float.clif
  5. 54
      cranelift/fuzzgen/src/function_generator.rs
  6. 18
      cranelift/interpreter/src/step.rs

2
cranelift/codegen/meta/src/shared/instructions.rs

@ -1591,7 +1591,7 @@ pub(crate) fn define(
&formats.unary,
)
.operands_in(vec![Operand::new("a", TxN)])
.operands_out(vec![Operand::new("x", Int)]),
.operands_out(vec![Operand::new("x", NarrowInt)]),
);
ig.push(

20
cranelift/filetests/filetests/runtests/simd-valltrue.clif

@ -52,3 +52,23 @@ block0(v0: i64x2):
; run: %vall_true_i64x2([-1 0]) == 0
; run: %vall_true_i64x2([-1 -1]) == 1
; run: %vall_true_i64x2([0xffffffff_00000000 -1]) == 1
function %vall_true_f32x4(f32x4) -> i8 {
block0(v0: f32x4):
v1 = vall_true v0
return v1
}
; run: %vall_true_f32x4([0.0 0.0 0.0 0.0]) == 0
; run: %vall_true_f32x4([0.0 -0.0 0.0 0.0]) == 0
; run: %vall_true_f32x4([-0.0 -0.0 -0.0 -0.0]) == 1
; run: %vall_true_f32x4([0x1.0 0x1.0 0x1.0 0x1.0]) == 1
function %vall_true_f64x2(f64x2) -> i8 {
block0(v0: f64x2):
v1 = vall_true v0
return v1
}
; run: %vall_true_f64x2([0.0 0.0]) == 0
; run: %vall_true_f64x2([0.0 -0.0]) == 0
; run: %vall_true_f64x2([-0.0 -0.0]) == 1
; run: %vall_true_f64x2([0x1.0 0x1.0]) == 1

21
cranelift/filetests/filetests/runtests/simd-vanytrue.clif

@ -45,3 +45,24 @@ block0(v0: i64x2):
; run: %vany_true_i64x2([0 0]) == 0
; run: %vany_true_i64x2([-1 0]) == 1
; run: %vany_true_i64x2([-1 -1]) == 1
function %vany_true_f32x4(f32x4) -> i8 {
block0(v0: f32x4):
v1 = vany_true v0
return v1
}
; run: %vany_true_f32x4([0.0 0.0 0.0 0.0]) == 0
; run: %vany_true_f32x4([0.0 -0.0 0.0 0.0]) == 1
; run: %vany_true_f32x4([-0.0 -0.0 -0.0 -0.0]) == 1
; run: %vany_true_f32x4([0x1.0 0x1.0 0x1.0 0x1.0]) == 1
function %vany_true_f64x2(f64x2) -> i8 {
block0(v0: f64x2):
v1 = vany_true v0
return v1
}
; run: %vany_true_f64x2([0.0 0.0]) == 0
; run: %vany_true_f64x2([0.0 -0.0]) == 1
; run: %vany_true_f64x2([-0.0 -0.0]) == 1
; run: %vany_true_f64x2([0x1.0 0x1.0]) == 1

26
cranelift/filetests/filetests/runtests/simd-vhighbits-float.clif

@ -0,0 +1,26 @@
test interpret
test run
target s390x
target x86_64 has_sse3 has_ssse3 has_sse41
target x86_64 has_sse3 has_ssse3 has_sse41 has_avx
target riscv64gc has_v
function %vhighbits_f32x4(f32x4) -> i8 {
block0(v0: f32x4):
v1 = vhigh_bits.i8 v0
return v1
}
; run: %vhighbits_f32x4([0.0 0.0 0.0 0.0]) == 0
; run: %vhighbits_f32x4([0.0 -0.0 0.0 0.0]) == 2
; run: %vhighbits_f32x4([-0.0 -0.0 -0.0 -0.0]) == 0xF
; run: %vhighbits_f32x4([0x1.0 0x1.0 0x1.0 0x1.0]) == 0
function %vhighbits_f64x2(f64x2) -> i8 {
block0(v0: f64x2):
v1 = vhigh_bits.i8 v0
return v1
}
; run: %vhighbits_f64x2([0.0 0.0]) == 0
; run: %vhighbits_f64x2([0.0 -0.0]) == 2
; run: %vhighbits_f64x2([-0.0 -0.0]) == 3
; run: %vhighbits_f64x2([0x1.0 0x1.0]) == 0

54
cranelift/fuzzgen/src/function_generator.rs

@ -670,6 +670,7 @@ fn valid_for_target(triple: &Triple, op: Opcode, args: &[Type], rets: &[Type]) -
),
// TODO
(Opcode::Bitselect, &[_, _, _], &[F32 | F64]),
(Opcode::VhighBits, &[F32X4 | F64X2]),
)
}
@ -896,7 +897,6 @@ static OPCODE_SIGNATURES: Lazy<Vec<OpcodeSignature>> = Lazy::new(|| {
(Opcode::TableAddr),
(Opcode::Null),
(Opcode::X86Blendv),
(Opcode::VallTrue),
(Opcode::IcmpImm),
(Opcode::X86Pmulhrsw),
(Opcode::IaddImm),
@ -924,58 +924,6 @@ static OPCODE_SIGNATURES: Lazy<Vec<OpcodeSignature>> = Lazy::new(|| {
(Opcode::ScalarToVector),
(Opcode::X86Pmaddubsw),
(Opcode::X86Cvtt2dq),
(Opcode::VanyTrue, &[F32X4], &[I8]),
(Opcode::VanyTrue, &[F64X2], &[I8]),
(Opcode::VhighBits, &[F32X4], &[I8]),
(Opcode::VhighBits, &[F64X2], &[I8]),
(Opcode::VhighBits, &[I8X16], &[I16]),
(Opcode::VhighBits, &[I16X8], &[I16]),
(Opcode::VhighBits, &[I32X4], &[I16]),
(Opcode::VhighBits, &[I64X2], &[I16]),
(Opcode::VhighBits, &[F32X4], &[I16]),
(Opcode::VhighBits, &[F64X2], &[I16]),
(Opcode::VhighBits, &[I8X16], &[I32]),
(Opcode::VhighBits, &[I16X8], &[I32]),
(Opcode::VhighBits, &[I32X4], &[I32]),
(Opcode::VhighBits, &[I64X2], &[I32]),
(Opcode::VhighBits, &[F32X4], &[I32]),
(Opcode::VhighBits, &[F64X2], &[I32]),
(Opcode::VhighBits, &[I8X16], &[I64]),
(Opcode::VhighBits, &[I16X8], &[I64]),
(Opcode::VhighBits, &[I32X4], &[I64]),
(Opcode::VhighBits, &[I64X2], &[I64]),
(Opcode::VhighBits, &[F32X4], &[I64]),
(Opcode::VhighBits, &[F64X2], &[I64]),
(Opcode::VhighBits, &[I8X16], &[I128]),
(Opcode::VhighBits, &[I16X8], &[I128]),
(Opcode::VhighBits, &[I32X4], &[I128]),
(Opcode::VhighBits, &[I64X2], &[I128]),
(Opcode::VhighBits, &[F32X4], &[I128]),
(Opcode::VhighBits, &[F64X2], &[I128]),
(Opcode::VhighBits, &[I8X16], &[I8X16]),
(Opcode::VhighBits, &[I16X8], &[I8X16]),
(Opcode::VhighBits, &[I32X4], &[I8X16]),
(Opcode::VhighBits, &[I64X2], &[I8X16]),
(Opcode::VhighBits, &[F32X4], &[I8X16]),
(Opcode::VhighBits, &[F64X2], &[I8X16]),
(Opcode::VhighBits, &[I8X16], &[I16X8]),
(Opcode::VhighBits, &[I16X8], &[I16X8]),
(Opcode::VhighBits, &[I32X4], &[I16X8]),
(Opcode::VhighBits, &[I64X2], &[I16X8]),
(Opcode::VhighBits, &[F32X4], &[I16X8]),
(Opcode::VhighBits, &[F64X2], &[I16X8]),
(Opcode::VhighBits, &[I8X16], &[I32X4]),
(Opcode::VhighBits, &[I16X8], &[I32X4]),
(Opcode::VhighBits, &[I32X4], &[I32X4]),
(Opcode::VhighBits, &[I64X2], &[I32X4]),
(Opcode::VhighBits, &[F32X4], &[I32X4]),
(Opcode::VhighBits, &[F64X2], &[I32X4]),
(Opcode::VhighBits, &[I8X16], &[I64X2]),
(Opcode::VhighBits, &[I16X8], &[I64X2]),
(Opcode::VhighBits, &[I32X4], &[I64X2]),
(Opcode::VhighBits, &[I64X2], &[I64X2]),
(Opcode::VhighBits, &[F32X4], &[I64X2]),
(Opcode::VhighBits, &[F64X2], &[I64X2]),
(Opcode::Umulhi, &[I128, I128], &[I128]),
(Opcode::Smulhi, &[I128, I128], &[I128]),
// https://github.com/bytecodealliance/wasmtime/issues/6073

18
cranelift/interpreter/src/step.rs

@ -1009,7 +1009,10 @@ where
Opcode::VhighBits => {
// `ctrl_ty` controls the return type for this, so the input type
// must be retrieved via `inst_context`.
let vector_type = inst_context.type_of(inst_context.args()[0]).unwrap();
let vector_type = inst_context
.type_of(inst_context.args()[0])
.unwrap()
.as_int();
let a = extractlanes(&arg(0), vector_type)?;
let mut result: u128 = 0;
for (i, val) in a.into_iter().enumerate() {
@ -1019,15 +1022,18 @@ where
assign(DataValueExt::int(result as i128, ctrl_ty)?)
}
Opcode::VanyTrue => {
let lane_ty = ctrl_ty.lane_type();
let simd_ty = ctrl_ty.as_int();
let lane_ty = simd_ty.lane_type();
let init = DataValue::bool(false, true, lane_ty)?;
let any = fold_vector(arg(0), ctrl_ty, init.clone(), |acc, lane| acc.or(lane))?;
let any = fold_vector(arg(0), simd_ty, init.clone(), |acc, lane| acc.or(lane))?;
assign(DataValue::bool(any != init, false, types::I8)?)
}
Opcode::VallTrue => assign(DataValue::bool(
!(arg(0).iter_lanes(ctrl_ty)?.try_fold(false, |acc, lane| {
Ok::<bool, ValueError>(acc | lane.is_zero()?)
})?),
!(arg(0)
.iter_lanes(ctrl_ty.as_int())?
.try_fold(false, |acc, lane| {
Ok::<bool, ValueError>(acc | lane.is_zero()?)
})?),
false,
types::I8,
)?),

Loading…
Cancel
Save