[WebNN EP] Enable IO Bindings with MLBuffer

Enables using the MLBuffers to pass data between models. This reduces the number of copies between the CPU and devices as well as the renderer and GPU process in Chromium.
egalli · Aug 16, 2024 · 8cc8632 · 8cc8632
1 parent b9f3a5d
commit 8cc8632
Show file tree

Hide file tree

Showing 33 changed files with 1,116 additions and 62 deletions.
diff --git a/include/onnxruntime/core/framework/allocator.h b/include/onnxruntime/core/framework/allocator.h
@@ -51,6 +51,7 @@ constexpr const char* HIP_PINNED = "HipPinned";
 constexpr const char* OpenVINO_CPU = "OpenVINO_CPU";
 constexpr const char* OpenVINO_GPU = "OpenVINO_GPU";
 constexpr const char* WEBGPU_BUFFER = "WebGPU_Buffer";
+constexpr const char* WEBNN_BUFFER = "WebNN_Buffer";
 
 constexpr size_t kAllocAlignment = 256;
 

diff --git a/js/common/lib/tensor-factory-impl.ts b/js/common/lib/tensor-factory-impl.ts
@@ -11,6 +11,7 @@ import {
   TensorFromImageBitmapOptions,
   TensorFromImageDataOptions,
   TensorFromImageElementOptions,
+  TensorFromMLBufferOptions,
   TensorFromTextureOptions,
   TensorFromUrlOptions,
 } from './tensor-factory.js';
@@ -310,6 +311,17 @@ export const tensorFromGpuBuffer = <T extends TensorInterface.GpuBufferDataTypes
   return new Tensor({ location: 'gpu-buffer', type: dataType ?? 'float32', gpuBuffer, dims, download, dispose });
 };
 
+/**
+ * implementation of Tensor.fromMLBuffer().
+ */
+export const tensorFromMLBuffer = <T extends TensorInterface.MLBufferDataTypes>(
+  mlBuffer: TensorInterface.MLBufferType,
+  options: TensorFromMLBufferOptions<T>,
+): Tensor => {
+  const { dataType, dims, download, dispose } = options;
+  return new Tensor({ location: 'ml-buffer', type: dataType ?? 'float32', mlBuffer, dims, download, dispose });
+};
+
 /**
  * implementation of Tensor.fromPinnedBuffer().
  */

diff --git a/js/common/lib/tensor-factory.ts b/js/common/lib/tensor-factory.ts
@@ -86,6 +86,20 @@ export interface GpuBufferConstructorParameters<T extends Tensor.GpuBufferDataTy
   readonly gpuBuffer: Tensor.GpuBufferType;
 }
 
+export interface MLBufferConstructorParameters<T extends Tensor.MLBufferDataTypes = Tensor.MLBufferDataTypes>
+  extends CommonConstructorParameters<T>,
+    GpuResourceConstructorParameters<T> {
+  /**
+   * Specify the location of the data to be 'ml-buffer'.
+   */
+  readonly location: 'ml-buffer';
+
+  /**
+   * Specify the WebNN buffer that holds the tensor data.
+   */
+  readonly mlBuffer: Tensor.MLBufferType;
+}
+
 // #endregion
 
 // the following region contains type definitions of each individual options.
@@ -219,6 +233,15 @@ export interface TensorFromGpuBufferOptions<T extends Tensor.GpuBufferDataTypes>
   dataType?: T;
 }
 
+export interface TensorFromMLBufferOptions<T extends Tensor.MLBufferDataTypes>
+  extends Pick<Tensor, 'dims'>,
+    GpuResourceConstructorParameters<T> {
+  /**
+   * Describes the data type of the tensor.
+   */
+  dataType?: T;
+}
+
 // #endregion
 
 /**
@@ -336,6 +359,29 @@ export interface TensorFactory {
     options: TensorFromGpuBufferOptions<T>,
   ): TypedTensor<T>;
 
+  /**
+   * create a tensor from a WebNN MLBuffer
+   *
+   * @param buffer - the MLBuffer object to create tensor from
+   * @param options - An optional object representing options for creating tensor from a WebNN MLBuffer.
+   *
+   * The options include following properties:
+   * - `dataType`: the data type of the tensor. If omitted, assume 'float32'.
+   * - `dims`: the dimension of the tensor. Required.
+   * - `download`: an optional function to download the tensor data from the MLBuffer to CPU. If omitted, the MLBuffer
+   * data will not be able to download. Usually, this is provided by the WebNN backend for the inference outputs.
+   * Users don't need to provide this function.
+   * - `dispose`: an optional function to dispose the tensor data on the WebNN MLBuffer. If omitted, the MLBuffer will
+   * not be disposed. Usually, this is provided by the WebNN backend for the inference outputs. Users don't need to
+   * provide this function.
+   *
+   * @returns a tensor object
+   */
+  fromMLBuffer<T extends Tensor.MLBufferDataTypes>(
+    buffer: Tensor.MLBufferType,
+    options: TensorFromMLBufferOptions<T>,
+  ): TypedTensor<T>;
+
   /**
    * create a tensor from a pre-allocated buffer. The buffer will be used as a pinned buffer.
    *

diff --git a/js/common/lib/tensor-impl.ts b/js/common/lib/tensor-impl.ts
@@ -6,16 +6,19 @@ import { TensorToDataUrlOptions, TensorToImageDataOptions } from './tensor-conve
 import {
   tensorFromGpuBuffer,
   tensorFromImage,
+  tensorFromMLBuffer,
   tensorFromPinnedBuffer,
   tensorFromTexture,
 } from './tensor-factory-impl.js';
 import {
   CpuPinnedConstructorParameters,
   GpuBufferConstructorParameters,
+  MLBufferConstructorParameters,
   TensorFromGpuBufferOptions,
   TensorFromImageBitmapOptions,
   TensorFromImageDataOptions,
   TensorFromImageElementOptions,
+  TensorFromMLBufferOptions,
   TensorFromTextureOptions,
   TensorFromUrlOptions,
   TextureConstructorParameters,
@@ -37,6 +40,7 @@ type TensorDataType = TensorInterface.DataType;
 type TensorDataLocation = TensorInterface.DataLocation;
 type TensorTextureType = TensorInterface.TextureType;
 type TensorGpuBufferType = TensorInterface.GpuBufferType;
+type TensorMLBufferType = TensorInterface.MLBufferType;
 
 /**
  * the implementation of Tensor interface.
@@ -83,6 +87,15 @@ export class Tensor implements TensorInterface {
    */
   constructor(params: GpuBufferConstructorParameters);
 
+  /**
+   * Construct a new tensor object from the WebNN buffer with the given type and dims.
+   *
+   * Tensor's location will be set to 'ml-buffer'.
+   *
+   * @param params - Specify the parameters to construct the tensor.
+   */
+  constructor(params: MLBufferConstructorParameters);
+
   /**
    * implementation.
    */
@@ -94,7 +107,8 @@ export class Tensor implements TensorInterface {
       | readonly boolean[]
       | CpuPinnedConstructorParameters
       | TextureConstructorParameters
-      | GpuBufferConstructorParameters,
+      | GpuBufferConstructorParameters
+      | MLBufferConstructorParameters,
     arg1?: TensorDataType | readonly number[] | readonly string[] | readonly boolean[],
     arg2?: readonly number[],
   ) {
@@ -149,6 +163,25 @@ export class Tensor implements TensorInterface {
           this.disposer = arg0.dispose;
           break;
         }
+        case 'ml-buffer': {
+          if (
+            type !== 'float32' &&
+            type !== 'float16' &&
+            type !== 'int32' &&
+            type !== 'int64' &&
+            type !== 'uint32' &&
+            type !== 'uint64' &&
+            type !== 'int8' &&
+            type !== 'uint8' &&
+            type !== 'bool'
+          ) {
+            throw new TypeError(`unsupported type "${type}" to create tensor from MLBuffer`);
+          }
+          this.mlBufferData = arg0.mlBuffer;
+          this.downloader = arg0.download;
+          this.disposer = arg0.dispose;
+          break;
+        }
         default:
           throw new Error(`Tensor constructor: unsupported location '${this.dataLocation}'`);
       }
@@ -301,6 +334,13 @@ export class Tensor implements TensorInterface {
     return tensorFromGpuBuffer(gpuBuffer, options);
   }
 
+  static fromMLBuffer<T extends TensorInterface.MLBufferDataTypes>(
+    mlBuffer: TensorMLBufferType,
+    options: TensorFromMLBufferOptions<T>,
+  ): TensorInterface {
+    return tensorFromMLBuffer(mlBuffer, options);
+  }
+
   static fromPinnedBuffer<T extends TensorInterface.CpuPinnedDataTypes>(
     type: T,
     buffer: TensorInterface.DataTypeMap[T],
@@ -349,6 +389,11 @@ export class Tensor implements TensorInterface {
    */
   private gpuBufferData?: TensorGpuBufferType;
 
+  /**
+   * stores the underlying WebNN MLBuffer when location is 'ml-buffer'. otherwise empty.
+   */
+  private mlBufferData?: TensorMLBufferType;
+
   /**
    * stores an optional downloader function to download data from GPU to CPU.
    */
@@ -396,6 +441,14 @@ export class Tensor implements TensorInterface {
     }
     return this.gpuBufferData;
   }
+
+  get mlBuffer(): TensorMLBufferType {
+    this.ensureValid();
+    if (!this.mlBufferData) {
+      throw new Error('The data is not stored as a WebNN buffer.');
+    }
+    return this.mlBufferData;
+  }
   // #endregion
 
   // #region methods
@@ -407,7 +460,8 @@ export class Tensor implements TensorInterface {
       case 'cpu-pinned':
         return this.data;
       case 'texture':
-      case 'gpu-buffer': {
+      case 'gpu-buffer':
+      case 'ml-buffer': {
         if (!this.downloader) {
           throw new Error('The current tensor is not created with a specified data downloader.');
         }
@@ -448,6 +502,7 @@ export class Tensor implements TensorInterface {
     this.cpuData = undefined;
     this.gpuTextureData = undefined;
     this.gpuBufferData = undefined;
+    this.mlBufferData = undefined;
     this.downloader = undefined;
     this.isDownloading = undefined;
 

diff --git a/js/common/lib/tensor-utils-impl.ts b/js/common/lib/tensor-utils-impl.ts
@@ -4,6 +4,7 @@
 import {
   CpuPinnedConstructorParameters,
   GpuBufferConstructorParameters,
+  MLBufferConstructorParameters,
   TextureConstructorParameters,
 } from './tensor-factory.js';
 import { Tensor } from './tensor-impl.js';
@@ -56,6 +57,13 @@ export const tensorReshape = (tensor: Tensor, dims: readonly number[]): Tensor =
         type: tensor.type as GpuBufferConstructorParameters['type'],
         dims,
       });
+    case 'ml-buffer':
+      return new Tensor({
+        location: 'ml-buffer',
+        mlBuffer: tensor.mlBuffer,
+        type: tensor.type as MLBufferConstructorParameters['type'],
+        dims,
+      });
     default:
       throw new Error(`tensorReshape: tensor location ${tensor.location} is not supported`);
   }

diff --git a/js/common/lib/tensor.ts b/js/common/lib/tensor.ts
@@ -42,6 +42,13 @@ interface TypedTensorBase<T extends Tensor.Type> {
    */
   readonly gpuBuffer: Tensor.GpuBufferType;
 
+  /**
+   * Get the WebNN buffer that holds the tensor data.
+   *
+   * If the data is not in a WebNN MLBuffer, throw error.
+   */
+  readonly mlBuffer: Tensor.MLBufferType;
+
   /**
    * Get the buffer data of the tensor.
    *
@@ -132,15 +139,36 @@ export declare namespace Tensor {
    */
   export type GpuBufferType = { size: number; mapState: 'unmapped' | 'pending' | 'mapped' };
 
+  /**
+   * type alias for WebNN MLBuffer
+   *
+   * The specification for WebNN's ML Buffer is currently in flux.
+   */
+  export type MLBufferType = unknown;
+
   /**
    * supported data types for constructing a tensor from a WebGPU buffer
    */
   export type GpuBufferDataTypes = 'float32' | 'float16' | 'int32' | 'int64' | 'uint32' | 'uint8' | 'bool';
 
+  /**
+   * supported data types for constructing a tensor from a WebNN MLBuffer
+   */
+  export type MLBufferDataTypes =
+    | 'float32'
+    | 'float16'
+    | 'int8'
+    | 'uint8'
+    | 'int32'
+    | 'uint32'
+    | 'int64'
+    | 'uint64'
+    | 'bool';
+
   /**
    * represent where the tensor data is stored
    */
-  export type DataLocation = 'none' | 'cpu' | 'cpu-pinned' | 'texture' | 'gpu-buffer';
+  export type DataLocation = 'none' | 'cpu' | 'cpu-pinned' | 'texture' | 'gpu-buffer' | 'ml-buffer';
 
   /**
    * represent the data type of a tensor