wasi-nn: update openvino backend to v0.7.2, use API 2.0 (#8790)

* update openvino backend, use API 2.0 * update openvino crate to 0.7.2 * minor formatting, cargo lock with newer ov * prtest:full * fix incompatibilities with id enum and other minor fixes * vet: audit updates to `openvino` crates * implement traits for TensorType and ElementType --------- Co-authored-by: Andrew Brown <andrew.brown@intel.com>
4 months ago · 895183c9cd
6 changed files with 114 additions and 77 deletions
--- a/Cargo.lock
+++ b/Cargo.lock
@ -1973,9 +1973,9 @@ checksum = "0ab1bc2a289d34bd04a330323ac98a1b4bc82c9d9fcb1e66b63caa84da26b575"

 [[package]]
 name = "openvino"
-version = "0.6.0"
+version = "0.7.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "24bd3a7ef39968e6a4f1b1206c1c876f9bd50cf739ccbcd69f8539bbac5dcc7a"
+checksum = "aee013796927eec6012a344f10ecdc06bf26de79c626a2395e3f115464907ef6"
 dependencies = [
 "openvino-finder",
 "openvino-sys",
@ -1984,9 +1984,9 @@ dependencies = [

 [[package]]
 name = "openvino-finder"
-version = "0.6.0"
+version = "0.7.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "05d234d1394a413ea8adaf0c40806b9ad1946be6310b441f688840654a331973"
+checksum = "af4c6841df4cd60fef743015f3348f81b6b225bd255ed0c4cab6e8c479e45eaa"
 dependencies = [
 "cfg-if",
 "log",
@ -1994,9 +1994,9 @@ dependencies = [

 [[package]]
 name = "openvino-sys"
-version = "0.6.0"
+version = "0.7.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "44c98acf37fc84ad9d7da4dc6c18f0f60ad209b43a6f555be01f9003d0a2a43d"
+checksum = "f62fc2bd6882f2300a6b5017eaad292586d70995d333582aabcf1f1121cd147c"
 dependencies = [
 "env_logger",
 "libloading",
--- a/crates/wasi-nn/Cargo.toml
+++ b/crates/wasi-nn/Cargo.toml
@ -29,7 +29,7 @@ wasmtime = { workspace = true, features = [
 # These dependencies are necessary for the wasi-nn implementation:
 tracing = { workspace = true }
 thiserror = { workspace = true }
-openvino = { version = "0.6.0", features = [
+openvino = { version = "0.7.2", features = [
    "runtime-linking",
 ], optional = true }

--- a/crates/wasi-nn/src/backend/mod.rs
+++ b/crates/wasi-nn/src/backend/mod.rs
@ -107,6 +107,8 @@ pub enum BackendError {
    InvalidNumberOfBuilders(usize, usize),
    #[error("Not enough memory to copy tensor data of size: {0}")]
    NotEnoughMemory(usize),
+    #[error("Unsupported tensor type: {0}")]
+    UnsupportedTensorType(String),
 }

 /// Read a file into a byte vector.
--- a/crates/wasi-nn/src/backend/openvino.rs
+++ b/crates/wasi-nn/src/backend/openvino.rs
@ -3,9 +3,9 @@
 use super::{
    read, BackendError, BackendExecutionContext, BackendFromDir, BackendGraph, BackendInner, Id,
 };
-use crate::wit::{self, ExecutionTarget, GraphEncoding, Tensor, TensorType};
+use crate::wit::{ExecutionTarget, GraphEncoding, Tensor, TensorType};
 use crate::{ExecutionContext, Graph};
-use openvino::{InferenceError, Layout, Precision, SetupError, TensorDesc};
+use openvino::{DeviceType, ElementType, InferenceError, SetupError, Shape, Tensor as OvTensor};
 use std::path::Path;
 use std::sync::{Arc, Mutex};

@ -23,41 +23,33 @@ impl BackendInner for OpenvinoBackend {
        if builders.len() != 2 {
            return Err(BackendError::InvalidNumberOfBuilders(2, builders.len()).into());
        }
-
        // Construct the context if none is present; this is done lazily (i.e.
        // upon actually loading a model) because it may fail to find and load
        // the OpenVINO libraries. The laziness limits the extent of the error
        // only to wasi-nn users, not all WASI users.
        if self.0.is_none() {
-            self.0.replace(openvino::Core::new(None)?);
+            self.0.replace(openvino::Core::new()?);
        }
-
        // Read the guest array.
-        let xml = &builders[0];
-        let weights = &builders[1];
+        let xml = builders[0];
+        let weights = builders[1];

-        // Construct OpenVINO graph structures: `cnn_network` contains the graph
-        // structure, `exec_network` can perform inference.
+        // Construct a new tensor for the model weights.
+        let shape = Shape::new(&[1, weights.len() as i64])?;
+        let mut weights_tensor = OvTensor::new(ElementType::U8, &shape)?;
+        let buffer = weights_tensor.get_raw_data_mut()?;
+        buffer.copy_from_slice(&weights);
+
+        // Construct OpenVINO graph structures: `model` contains the graph
+        // structure, `compiled_model` can perform inference.
        let core = self
            .0
            .as_mut()
            .expect("openvino::Core was previously constructed");
-        let mut cnn_network = core.read_network_from_buffer(&xml, &weights)?;
-
-        // TODO: this is a temporary workaround. We need a more elegant way to
-        // specify the layout in the long run. However, without this newer
-        // versions of OpenVINO will fail due to parameter mismatch.
-        for i in 0..cnn_network.get_inputs_len()? {
-            let name = cnn_network.get_input_name(i)?;
-            cnn_network.set_input_layout(&name, Layout::NHWC)?;
-        }
-
-        let exec_network =
-            core.load_network(&cnn_network, map_execution_target_to_string(target))?;
-        let box_: Box<dyn BackendGraph> = Box::new(OpenvinoGraph(
-            Arc::new(cnn_network),
-            Arc::new(Mutex::new(exec_network)),
-        ));
+        let model = core.read_model_from_buffer(&xml, Some(&weights_tensor))?;
+        let compiled_model = core.compile_model(&model, target.into())?;
+        let box_: Box<dyn BackendGraph> =
+            Box::new(OpenvinoGraph(Arc::new(Mutex::new(compiled_model))));
        Ok(box_.into())
    }

@ -78,63 +70,62 @@ impl BackendFromDir for OpenvinoBackend {
    }
 }

-struct OpenvinoGraph(
-    Arc<openvino::CNNNetwork>,
-    Arc<Mutex<openvino::ExecutableNetwork>>,
-);
+struct OpenvinoGraph(Arc<Mutex<openvino::CompiledModel>>);

 unsafe impl Send for OpenvinoGraph {}
 unsafe impl Sync for OpenvinoGraph {}

 impl BackendGraph for OpenvinoGraph {
    fn init_execution_context(&self) -> Result<ExecutionContext, BackendError> {
-        let mut network = self.1.lock().unwrap();
-        let infer_request = network.create_infer_request()?;
+        let mut compiled_model = self.0.lock().unwrap();
+        let infer_request = compiled_model.create_infer_request()?;
        let box_: Box<dyn BackendExecutionContext> =
-            Box::new(OpenvinoExecutionContext(self.0.clone(), infer_request));
+            Box::new(OpenvinoExecutionContext(infer_request));
        Ok(box_.into())
    }
 }

-struct OpenvinoExecutionContext(Arc<openvino::CNNNetwork>, openvino::InferRequest);
+struct OpenvinoExecutionContext(openvino::InferRequest);

 impl BackendExecutionContext for OpenvinoExecutionContext {
    fn set_input(&mut self, id: Id, tensor: &Tensor) -> Result<(), BackendError> {
-        let input_name = match id {
-            Id::Index(i) => self.0.get_input_name(i as usize)?,
-            Id::Name(name) => name,
-        };
-
-        // Construct the blob structure. TODO: there must be some good way to
-        // discover the layout here; `desc` should not have to default to NHWC.
-        let precision = map_tensor_type_to_precision(tensor.ty);
+        // Construct the tensor.
+        let precision = tensor.ty.into();
        let dimensions = tensor
            .dimensions
            .iter()
-            .map(|&d| d as usize)
+            .map(|&d| d as i64)
            .collect::<Vec<_>>();
-        let desc = TensorDesc::new(Layout::NHWC, &dimensions, precision);
-        let blob = openvino::Blob::new(&desc, &tensor.data)?;
-
-        // Actually assign the blob to the request.
-        self.1.set_blob(&input_name, &blob)?;
+        let shape = Shape::new(&dimensions)?;
+        let mut new_tensor = OvTensor::new(precision, &shape)?;
+        let buffer = new_tensor.get_raw_data_mut()?;
+        buffer.copy_from_slice(&tensor.data);
+        // Assign the tensor to the request.
+        match id {
+            Id::Index(i) => self.0.set_input_tensor_by_index(i as usize, &new_tensor)?,
+            Id::Name(name) => self.0.set_tensor(&name, &new_tensor)?,
+        };
        Ok(())
    }

    fn compute(&mut self) -> Result<(), BackendError> {
-        self.1.infer()?;
+        self.0.infer()?;
        Ok(())
    }

    fn get_output(&mut self, id: Id) -> Result<Tensor, BackendError> {
        let output_name = match id {
-            Id::Index(i) => self.0.get_output_name(i as usize)?,
-            Id::Name(name) => name,
+            Id::Index(i) => self.0.get_output_tensor_by_index(i as usize)?,
+            Id::Name(name) => self.0.get_tensor(&name)?,
        };
-        let dimensions = vec![]; // TODO: get actual shape
-        let ty = wit::TensorType::Fp32; // TODO: get actual type.
-        let blob = self.1.get_blob(&output_name)?;
-        let data = blob.buffer()?.to_vec();
+        let dimensions = output_name
+            .get_shape()?
+            .get_dimensions()
+            .iter()
+            .map(|&dim| dim as u32)
+            .collect::<Vec<u32>>();
+        let ty = output_name.get_element_type()?.try_into()?;
+        let data = output_name.get_raw_data()?.to_vec();
        Ok(Tensor {
            dimensions,
            ty,
@ -157,24 +148,49 @@ impl From<SetupError> for BackendError {

 /// Return the execution target string expected by OpenVINO from the
 /// `ExecutionTarget` enum provided by wasi-nn.
-fn map_execution_target_to_string(target: ExecutionTarget) -> &'static str {
-    match target {
-        ExecutionTarget::Cpu => "CPU",
-        ExecutionTarget::Gpu => "GPU",
-        ExecutionTarget::Tpu => unimplemented!("OpenVINO does not support TPU execution targets"),
+impl From<ExecutionTarget> for DeviceType<'static> {
+    fn from(target: ExecutionTarget) -> Self {
+        match target {
+            ExecutionTarget::Cpu => DeviceType::CPU,
+            ExecutionTarget::Gpu => DeviceType::GPU,
+            ExecutionTarget::Tpu => {
+                unimplemented!("OpenVINO does not support TPU execution targets")
+            }
+        }
    }
 }

 /// Return OpenVINO's precision type for the `TensorType` enum provided by
 /// wasi-nn.
-fn map_tensor_type_to_precision(tensor_type: TensorType) -> openvino::Precision {
-    match tensor_type {
-        TensorType::Fp16 => Precision::FP16,
-        TensorType::Fp32 => Precision::FP32,
-        TensorType::Fp64 => Precision::FP64,
-        TensorType::U8 => Precision::U8,
-        TensorType::I32 => Precision::I32,
-        TensorType::I64 => Precision::I64,
-        TensorType::Bf16 => todo!("not yet supported in `openvino` bindings"),
+impl From<TensorType> for ElementType {
+    fn from(tensor_type: TensorType) -> Self {
+        match tensor_type {
+            TensorType::Fp16 => ElementType::F16,
+            TensorType::Fp32 => ElementType::F32,
+            TensorType::Fp64 => ElementType::F64,
+            TensorType::U8 => ElementType::U8,
+            TensorType::I32 => ElementType::I32,
+            TensorType::I64 => ElementType::I64,
+            TensorType::Bf16 => ElementType::Bf16,
+        }
+    }
+}
+
+/// Return the `TensorType` enum provided by wasi-nn for OpenVINO's precision type
+impl TryFrom<ElementType> for TensorType {
+    type Error = BackendError;
+    fn try_from(element_type: ElementType) -> Result<Self, Self::Error> {
+        match element_type {
+            ElementType::F16 => Ok(TensorType::Fp16),
+            ElementType::F32 => Ok(TensorType::Fp32),
+            ElementType::F64 => Ok(TensorType::Fp64),
+            ElementType::U8 => Ok(TensorType::U8),
+            ElementType::I32 => Ok(TensorType::I32),
+            ElementType::I64 => Ok(TensorType::I64),
+            ElementType::Bf16 => Ok(TensorType::Bf16),
+            _ => Err(BackendError::UnsupportedTensorType(
+                element_type.to_string(),
+            )),
+        }
    }
 }
--- a/crates/wasi-nn/tests/check/openvino.rs
+++ b/crates/wasi-nn/tests/check/openvino.rs
@ -4,8 +4,12 @@ use std::fs;

 /// Return `Ok` if we find a working OpenVINO installation.
 pub fn is_installed() -> Result<()> {
-    match std::panic::catch_unwind(|| println!("> found openvino version: {}", openvino::version()))
-    {
+    match std::panic::catch_unwind(|| {
+        println!(
+            "> found openvino version: {}",
+            openvino::version().build_number
+        )
+    }) {
        Ok(_) => Ok(()),
        Err(e) => bail!(
            "unable to find an OpenVINO installation: {:?}",
--- a/supply-chain/audits.toml
+++ b/supply-chain/audits.toml
@ -2109,6 +2109,11 @@ who = "Iceber Gu <caiwei95@hotmail.com>"
 criteria = "safe-to-deploy"
 delta = "0.5.0 -> 0.6.0"

+[[audits.openvino]]
+who = "Andrew Brown <andrew.brown@intel.com>"
+criteria = "safe-to-deploy"
+delta = "0.6.0 -> 0.7.2"
+
 [[audits.openvino-finder]]
 who = "Matthew Tamayo-Rios <matthew@geekbeast.com>"
 criteria = "safe-to-deploy"
@ -2127,6 +2132,11 @@ who = "Iceber Gu <caiwei95@hotmail.com>"
 criteria = "safe-to-deploy"
 delta = "0.5.0 -> 0.6.0"

+[[audits.openvino-finder]]
+who = "Andrew Brown <andrew.brown@intel.com>"
+criteria = "safe-to-deploy"
+delta = "0.6.0 -> 0.7.2"
+
 [[audits.openvino-sys]]
 who = "Matthew Tamayo-Rios <matthew@geekbeast.com>"
 criteria = "safe-to-deploy"
@ -2145,6 +2155,11 @@ who = "Iceber Gu <caiwei95@hotmail.com>"
 criteria = "safe-to-deploy"
 delta = "0.5.0 -> 0.6.0"

+[[audits.openvino-sys]]
+who = "Andrew Brown <andrew.brown@intel.com>"
+criteria = "safe-to-deploy"
+delta = "0.6.0 -> 0.7.2"
+
 [[audits.ort]]
 who = "Andrew Brown <andrew.brown@intel.com>"
 criteria = "safe-to-deploy"