Support loading models by name (#38)

Co-authored-by: Matthew Tamayo-Rios <[email protected]>
WebAssembly · Aug 9, 2023 · c1ff124 · c1ff124
1 parent f47f35c
commit c1ff124
Show file tree

Hide file tree

Showing 4 changed files with 111 additions and 1 deletion.
diff --git a/README.md b/README.md
@@ -123,7 +123,7 @@ detailed examples.
 
 ### Detailed design discussion
 
-For the details of the API, see [wasi-nn.wit.md](wasi-nn.wit.md).
+For the details of the API, see [wasi-nn.wit](wit/wasi-nn.wit).
 
 <!--
 This section should mostly refer to the .wit.md file that specifies the API. This section is for

diff --git a/wasi-nn.wit.md → legacy_wit/wasi-nn.wit.md b/wasi-nn.wit.md → legacy_wit/wasi-nn.wit.md
diff --git a/wasi-nn.witx b/wasi-nn.witx
@@ -2,6 +2,8 @@
 ;; version for the official specification and documentation.
 
 (typename $buffer_size u32)
+(typename $status u32)
+
 (typename $nn_errno
   (enum (@witx tag u16)
     $success
@@ -10,6 +12,9 @@
     $missing_memory
     $busy
     $runtime_error
+    $unsupported_operation
+    $model_too_large
+    $model_not_found
   )
 )
 (typename $tensor_dimensions (list u32))
@@ -39,6 +44,7 @@
     $tensorflow
     $pytorch
     $tensorflowlite
+    $autodetect
   )
 )
 (typename $execution_target
@@ -58,6 +64,17 @@
     (param $target $execution_target)
     (result $error (expected $graph (error $nn_errno)))
   )
+  ;;; Load an opaque sequence of bytes to use for inference.
+  ;;;
+  ;;; This allows runtime implementations to support multiple graph encoding formats. For unsupported graph encodings,
+  ;;; return `errno::inval`.
+  (@interface func (export "load_by_name")
+     ;;; The name of the model to load from the model registry
+     (param $model_name string)
+
+     (result $error (expected $graph (error $nn_errno)))
+  )
+
   (@interface func (export "init_execution_context")
     (param $graph $graph)
     (result $error (expected $graph_execution_context (error $nn_errno)))

diff --git a/wit/wasi-nn.wit b/wit/wasi-nn.wit
@@ -0,0 +1,93 @@
+package wasi:nn
+
+world inference {
+    import tensor
+    import graph
+    import execution
+    import errors
+}
+
+interface tensor {
+    type tensor-dimensions = list<u32>
+    type tensor-data = list<u8>
+
+    enum tensor-type {
+        fp16,
+        fp32,
+        bf16,
+        up8,
+        ip32
+    }
+
+    record tensor {
+        // Describe the size of the tensor (e.g., 2x2x2x2 -> [2, 2, 2, 2]). To represent a tensor
+        // containing a single value, use `[1]` for the tensor dimensions.
+        dimensions: tensor-dimensions,
+
+        // Describe the type of element in the tensor (e.g., f32).
+        tensor-type: tensor-type,
+
+        // Contains the tensor data.
+        data: tensor-data,
+    }
+}
+
+interface graph {
+    use errors.{error}
+    type graph-builder = list<u8>
+    type graph-builder-array = list<graph-builder>
+    use tensor.{tensor}
+
+    type graph = u32
+
+    enum graph-encoding {
+        openvino,
+        onnx,
+        tensorflow,
+        pytorch,
+        tensorflowlite,
+        autodetect,
+    }
+
+    enum execution-target {
+        cpu,
+        gpu,
+        tpu
+    }
+
+    load: func(builder: graph-builder-array, encoding: graph-encoding, target: execution-target) -> result<graph, error>
+    load-named-model: func(name: string) -> result<graph, error>
+}
+
+interface execution {
+    use errors.{error}
+    use tensor.{tensor, tensor-data}
+    use graph.{graph}
+
+    type graph-execution-context = u32
+    init-execution-context: func(graph: graph) -> result<graph-execution-context, error>
+    set-input: func(ctx: graph-execution-context, index: u32, tensor: tensor) -> result<_, error>
+    set-input-by-name: func(ctx: graph-execution-context, name: string, tensor: tensor) -> result<_, error>
+    compute: func(ctx: graph-execution-context) -> result<_, error>
+    get-output: func(ctx: graph-execution-context, index: u32) -> result<list<tensor-data>, error>
+    eval: func(tensors: list<tensor>) -> result<list<tensor>, error>
+
+}
+
+interface errors {
+    enum error {
+        // Caller module passed an invalid argument.
+        invalid-argument,
+        // Invalid encoding.
+        invalid-encoding,
+        busy,
+        // Runtime Error.
+        runtime-error,
+        // Unsupported operation
+        unsupported-operation,
+        // Model too large
+        model-too-large,
+        // Model not found
+        model-not-found
+    }
+}