From 02ff19f2fc1d6e43e4778fd563ac41d95f8062ae Mon Sep 17 00:00:00 2001 From: dheaton-arm Date: Wed, 8 Sep 2021 09:18:38 +0100 Subject: [PATCH 1/2] Implement `SqmulRoundSat` for interpreter Implemented `SqmulRoundSat` for the Cranelift interpreter, performing QN-format fixed point multiplication for 16 and 32-bit integers in SIMD vectors. Copyright (c) 2021, Arm Limited --- .../runtests/simd-sqmulroundsat-aarch64.clif | 12 ++++++++ .../runtests/simd-sqmulroundsat.clif | 13 +++++++++ cranelift/interpreter/src/step.rs | 29 ++++++++++++++++++- 3 files changed, 53 insertions(+), 1 deletion(-) create mode 100644 cranelift/filetests/filetests/runtests/simd-sqmulroundsat-aarch64.clif create mode 100644 cranelift/filetests/filetests/runtests/simd-sqmulroundsat.clif diff --git a/cranelift/filetests/filetests/runtests/simd-sqmulroundsat-aarch64.clif b/cranelift/filetests/filetests/runtests/simd-sqmulroundsat-aarch64.clif new file mode 100644 index 0000000000..f6809ddc5c --- /dev/null +++ b/cranelift/filetests/filetests/runtests/simd-sqmulroundsat-aarch64.clif @@ -0,0 +1,12 @@ +test interpret +test run +target aarch64 +;; x86_64 hasn't implemented this for `i32x4` + +function %sqmulrs_i32x4(i32x4, i32x4) -> i32x4 { +block0(v0: i32x4, v1: i32x4): + v2 = sqmul_round_sat v0, v1 + return v2 +} +; run: %sqmulrs_i32x4([1000 2000 3000 4000], [10000 100000 1000000 10000000]) == [0 0 1 19] +; run: %sqmulrs_i32x4([2147483647 -2147483648 -2147483648 0], [2147483647 -2147483648 2147483647 0]) == [2147483646 2147483647 -2147483647 0] diff --git a/cranelift/filetests/filetests/runtests/simd-sqmulroundsat.clif b/cranelift/filetests/filetests/runtests/simd-sqmulroundsat.clif new file mode 100644 index 0000000000..1faa3592ad --- /dev/null +++ b/cranelift/filetests/filetests/runtests/simd-sqmulroundsat.clif @@ -0,0 +1,13 @@ +test interpret +test run +target aarch64 +set enable_simd +target x86_64 + +function %sqmulrs_i16x8(i16x8, i16x8) -> i16x8 { +block0(v0: i16x8, v1: i16x8): + v2 = sqmul_round_sat v0, v1 + return v2 +} +; run: %sqmulrs_i16x8([1 2 3 4 5 6 7 8], [1 10 100 1000 10000 15000 20000 25000]) == [0 0 0 0 2 3 4 6] +; run: %sqmulrs_i16x8([32767 32767 -32768 -32768 -32768 -32768 0 0], [32767 32767 -32768 -32768 32767 32767 0 0]) == [32766 32766 32767 32767 -32767 -32767 0 0] diff --git a/cranelift/interpreter/src/step.rs b/cranelift/interpreter/src/step.rs index 7fc16b06f7..8699f484b0 100644 --- a/cranelift/interpreter/src/step.rs +++ b/cranelift/interpreter/src/step.rs @@ -941,7 +941,34 @@ where .collect::>>()?; assign(vectorizelanes(&new_vec, new_type)?) } - Opcode::SqmulRoundSat => unimplemented!("SqmulRoundSat"), + Opcode::SqmulRoundSat => { + let lane_type = ctrl_ty.lane_type(); + let double_width = ctrl_ty.double_width().unwrap().lane_type(); + let arg0 = extractlanes(&arg(0)?, lane_type)?; + let arg1 = extractlanes(&arg(1)?, lane_type)?; + let (min, max) = lane_type.bounds(true); + let min: V = Value::int(min as i128, double_width)?; + let max: V = Value::int(max as i128, double_width)?; + let new_vec = arg0 + .into_iter() + .zip(arg1.into_iter()) + .map(|(x, y)| { + let x = x.into_int()?; + let y = y.into_int()?; + // temporarily double width of the value to avoid overflow. + let z: V = Value::int( + (x * y + (1 << (lane_type.bits() - 2))) >> (lane_type.bits() - 1), + double_width, + )?; + // check bounds, saturate, and truncate to correct width. + let z = Value::min(z, max.clone())?; + let z = Value::max(z, min.clone())?; + let z = z.convert(ValueConversionKind::Truncate(lane_type))?; + Ok(z) + }) + .collect::>>()?; + assign(vectorizelanes(&new_vec, ctrl_ty)?) + } Opcode::IaddPairwise => assign(binary_pairwise(arg(0)?, arg(1)?, ctrl_ty, Value::add)?), // TODO: these instructions should be removed once the new backend makes these obsolete From faaf6b537a5622efd1702693b0b04842a434df33 Mon Sep 17 00:00:00 2001 From: dheaton-arm Date: Wed, 22 Sep 2021 13:50:31 +0100 Subject: [PATCH 2/2] Prevent running tests on legacy backend. Copyright (c) 2021, Arm Limited --- cranelift/filetests/filetests/runtests/simd-sqmulroundsat.clif | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cranelift/filetests/filetests/runtests/simd-sqmulroundsat.clif b/cranelift/filetests/filetests/runtests/simd-sqmulroundsat.clif index 1faa3592ad..a6ada04f22 100644 --- a/cranelift/filetests/filetests/runtests/simd-sqmulroundsat.clif +++ b/cranelift/filetests/filetests/runtests/simd-sqmulroundsat.clif @@ -2,7 +2,7 @@ test interpret test run target aarch64 set enable_simd -target x86_64 +target x86_64 machinst function %sqmulrs_i16x8(i16x8, i16x8) -> i16x8 { block0(v0: i16x8, v1: i16x8):