Skip to content

Commit

Permalink
[WebNN EP] Enable IO Bindings with MLBuffer
Browse files Browse the repository at this point in the history
Enables using the MLBuffers to pass data between models. This
reduces the number of copies between the CPU and devices as well as
the renderer and GPU process in Chromium.
  • Loading branch information
egalli committed Aug 16, 2024
1 parent b9f3a5d commit 8cc8632
Show file tree
Hide file tree
Showing 33 changed files with 1,116 additions and 62 deletions.
1 change: 1 addition & 0 deletions include/onnxruntime/core/framework/allocator.h
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,7 @@ constexpr const char* HIP_PINNED = "HipPinned";
constexpr const char* OpenVINO_CPU = "OpenVINO_CPU";
constexpr const char* OpenVINO_GPU = "OpenVINO_GPU";
constexpr const char* WEBGPU_BUFFER = "WebGPU_Buffer";
constexpr const char* WEBNN_BUFFER = "WebNN_Buffer";

constexpr size_t kAllocAlignment = 256;

Expand Down
12 changes: 12 additions & 0 deletions js/common/lib/tensor-factory-impl.ts
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ import {
TensorFromImageBitmapOptions,
TensorFromImageDataOptions,
TensorFromImageElementOptions,
TensorFromMLBufferOptions,
TensorFromTextureOptions,
TensorFromUrlOptions,
} from './tensor-factory.js';
Expand Down Expand Up @@ -310,6 +311,17 @@ export const tensorFromGpuBuffer = <T extends TensorInterface.GpuBufferDataTypes
return new Tensor({ location: 'gpu-buffer', type: dataType ?? 'float32', gpuBuffer, dims, download, dispose });
};

/**
* implementation of Tensor.fromMLBuffer().
*/
export const tensorFromMLBuffer = <T extends TensorInterface.MLBufferDataTypes>(
mlBuffer: TensorInterface.MLBufferType,
options: TensorFromMLBufferOptions<T>,
): Tensor => {
const { dataType, dims, download, dispose } = options;
return new Tensor({ location: 'ml-buffer', type: dataType ?? 'float32', mlBuffer, dims, download, dispose });
};

/**
* implementation of Tensor.fromPinnedBuffer().
*/
Expand Down
46 changes: 46 additions & 0 deletions js/common/lib/tensor-factory.ts
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,20 @@ export interface GpuBufferConstructorParameters<T extends Tensor.GpuBufferDataTy
readonly gpuBuffer: Tensor.GpuBufferType;
}

export interface MLBufferConstructorParameters<T extends Tensor.MLBufferDataTypes = Tensor.MLBufferDataTypes>
extends CommonConstructorParameters<T>,
GpuResourceConstructorParameters<T> {
/**
* Specify the location of the data to be 'ml-buffer'.
*/
readonly location: 'ml-buffer';

/**
* Specify the WebNN buffer that holds the tensor data.
*/
readonly mlBuffer: Tensor.MLBufferType;
}

// #endregion

// the following region contains type definitions of each individual options.
Expand Down Expand Up @@ -219,6 +233,15 @@ export interface TensorFromGpuBufferOptions<T extends Tensor.GpuBufferDataTypes>
dataType?: T;
}

export interface TensorFromMLBufferOptions<T extends Tensor.MLBufferDataTypes>
extends Pick<Tensor, 'dims'>,
GpuResourceConstructorParameters<T> {
/**
* Describes the data type of the tensor.
*/
dataType?: T;
}

// #endregion

/**
Expand Down Expand Up @@ -336,6 +359,29 @@ export interface TensorFactory {
options: TensorFromGpuBufferOptions<T>,
): TypedTensor<T>;

/**
* create a tensor from a WebNN MLBuffer
*
* @param buffer - the MLBuffer object to create tensor from
* @param options - An optional object representing options for creating tensor from a WebNN MLBuffer.
*
* The options include following properties:
* - `dataType`: the data type of the tensor. If omitted, assume 'float32'.
* - `dims`: the dimension of the tensor. Required.
* - `download`: an optional function to download the tensor data from the MLBuffer to CPU. If omitted, the MLBuffer
* data will not be able to download. Usually, this is provided by the WebNN backend for the inference outputs.
* Users don't need to provide this function.
* - `dispose`: an optional function to dispose the tensor data on the WebNN MLBuffer. If omitted, the MLBuffer will
* not be disposed. Usually, this is provided by the WebNN backend for the inference outputs. Users don't need to
* provide this function.
*
* @returns a tensor object
*/
fromMLBuffer<T extends Tensor.MLBufferDataTypes>(
buffer: Tensor.MLBufferType,
options: TensorFromMLBufferOptions<T>,
): TypedTensor<T>;

/**
* create a tensor from a pre-allocated buffer. The buffer will be used as a pinned buffer.
*
Expand Down
59 changes: 57 additions & 2 deletions js/common/lib/tensor-impl.ts
Original file line number Diff line number Diff line change
Expand Up @@ -6,16 +6,19 @@ import { TensorToDataUrlOptions, TensorToImageDataOptions } from './tensor-conve
import {
tensorFromGpuBuffer,
tensorFromImage,
tensorFromMLBuffer,
tensorFromPinnedBuffer,
tensorFromTexture,
} from './tensor-factory-impl.js';
import {
CpuPinnedConstructorParameters,
GpuBufferConstructorParameters,
MLBufferConstructorParameters,
TensorFromGpuBufferOptions,
TensorFromImageBitmapOptions,
TensorFromImageDataOptions,
TensorFromImageElementOptions,
TensorFromMLBufferOptions,
TensorFromTextureOptions,
TensorFromUrlOptions,
TextureConstructorParameters,
Expand All @@ -37,6 +40,7 @@ type TensorDataType = TensorInterface.DataType;
type TensorDataLocation = TensorInterface.DataLocation;
type TensorTextureType = TensorInterface.TextureType;
type TensorGpuBufferType = TensorInterface.GpuBufferType;
type TensorMLBufferType = TensorInterface.MLBufferType;

/**
* the implementation of Tensor interface.
Expand Down Expand Up @@ -83,6 +87,15 @@ export class Tensor implements TensorInterface {
*/
constructor(params: GpuBufferConstructorParameters);

/**
* Construct a new tensor object from the WebNN buffer with the given type and dims.
*
* Tensor's location will be set to 'ml-buffer'.
*
* @param params - Specify the parameters to construct the tensor.
*/
constructor(params: MLBufferConstructorParameters);

/**
* implementation.
*/
Expand All @@ -94,7 +107,8 @@ export class Tensor implements TensorInterface {
| readonly boolean[]
| CpuPinnedConstructorParameters
| TextureConstructorParameters
| GpuBufferConstructorParameters,
| GpuBufferConstructorParameters
| MLBufferConstructorParameters,
arg1?: TensorDataType | readonly number[] | readonly string[] | readonly boolean[],
arg2?: readonly number[],
) {
Expand Down Expand Up @@ -149,6 +163,25 @@ export class Tensor implements TensorInterface {
this.disposer = arg0.dispose;
break;
}
case 'ml-buffer': {
if (
type !== 'float32' &&
type !== 'float16' &&
type !== 'int32' &&
type !== 'int64' &&
type !== 'uint32' &&
type !== 'uint64' &&
type !== 'int8' &&
type !== 'uint8' &&
type !== 'bool'
) {
throw new TypeError(`unsupported type "${type}" to create tensor from MLBuffer`);
}
this.mlBufferData = arg0.mlBuffer;
this.downloader = arg0.download;
this.disposer = arg0.dispose;
break;
}
default:
throw new Error(`Tensor constructor: unsupported location '${this.dataLocation}'`);
}
Expand Down Expand Up @@ -301,6 +334,13 @@ export class Tensor implements TensorInterface {
return tensorFromGpuBuffer(gpuBuffer, options);
}

static fromMLBuffer<T extends TensorInterface.MLBufferDataTypes>(
mlBuffer: TensorMLBufferType,
options: TensorFromMLBufferOptions<T>,
): TensorInterface {
return tensorFromMLBuffer(mlBuffer, options);
}

static fromPinnedBuffer<T extends TensorInterface.CpuPinnedDataTypes>(
type: T,
buffer: TensorInterface.DataTypeMap[T],
Expand Down Expand Up @@ -349,6 +389,11 @@ export class Tensor implements TensorInterface {
*/
private gpuBufferData?: TensorGpuBufferType;

/**
* stores the underlying WebNN MLBuffer when location is 'ml-buffer'. otherwise empty.
*/
private mlBufferData?: TensorMLBufferType;

/**
* stores an optional downloader function to download data from GPU to CPU.
*/
Expand Down Expand Up @@ -396,6 +441,14 @@ export class Tensor implements TensorInterface {
}
return this.gpuBufferData;
}

get mlBuffer(): TensorMLBufferType {
this.ensureValid();
if (!this.mlBufferData) {
throw new Error('The data is not stored as a WebNN buffer.');
}
return this.mlBufferData;
}
// #endregion

// #region methods
Expand All @@ -407,7 +460,8 @@ export class Tensor implements TensorInterface {
case 'cpu-pinned':
return this.data;
case 'texture':
case 'gpu-buffer': {
case 'gpu-buffer':
case 'ml-buffer': {
if (!this.downloader) {
throw new Error('The current tensor is not created with a specified data downloader.');
}
Expand Down Expand Up @@ -448,6 +502,7 @@ export class Tensor implements TensorInterface {
this.cpuData = undefined;
this.gpuTextureData = undefined;
this.gpuBufferData = undefined;
this.mlBufferData = undefined;
this.downloader = undefined;
this.isDownloading = undefined;

Expand Down
8 changes: 8 additions & 0 deletions js/common/lib/tensor-utils-impl.ts
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
import {
CpuPinnedConstructorParameters,
GpuBufferConstructorParameters,
MLBufferConstructorParameters,
TextureConstructorParameters,
} from './tensor-factory.js';
import { Tensor } from './tensor-impl.js';
Expand Down Expand Up @@ -56,6 +57,13 @@ export const tensorReshape = (tensor: Tensor, dims: readonly number[]): Tensor =
type: tensor.type as GpuBufferConstructorParameters['type'],
dims,
});
case 'ml-buffer':
return new Tensor({
location: 'ml-buffer',
mlBuffer: tensor.mlBuffer,
type: tensor.type as MLBufferConstructorParameters['type'],
dims,
});
default:
throw new Error(`tensorReshape: tensor location ${tensor.location} is not supported`);
}
Expand Down
30 changes: 29 additions & 1 deletion js/common/lib/tensor.ts
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,13 @@ interface TypedTensorBase<T extends Tensor.Type> {
*/
readonly gpuBuffer: Tensor.GpuBufferType;

/**
* Get the WebNN buffer that holds the tensor data.
*
* If the data is not in a WebNN MLBuffer, throw error.
*/
readonly mlBuffer: Tensor.MLBufferType;

/**
* Get the buffer data of the tensor.
*
Expand Down Expand Up @@ -132,15 +139,36 @@ export declare namespace Tensor {
*/
export type GpuBufferType = { size: number; mapState: 'unmapped' | 'pending' | 'mapped' };

/**
* type alias for WebNN MLBuffer
*
* The specification for WebNN's ML Buffer is currently in flux.
*/
export type MLBufferType = unknown;

/**
* supported data types for constructing a tensor from a WebGPU buffer
*/
export type GpuBufferDataTypes = 'float32' | 'float16' | 'int32' | 'int64' | 'uint32' | 'uint8' | 'bool';

/**
* supported data types for constructing a tensor from a WebNN MLBuffer
*/
export type MLBufferDataTypes =
| 'float32'
| 'float16'
| 'int8'
| 'uint8'
| 'int32'
| 'uint32'
| 'int64'
| 'uint64'
| 'bool';

/**
* represent where the tensor data is stored
*/
export type DataLocation = 'none' | 'cpu' | 'cpu-pinned' | 'texture' | 'gpu-buffer';
export type DataLocation = 'none' | 'cpu' | 'cpu-pinned' | 'texture' | 'gpu-buffer' | 'ml-buffer';

/**
* represent the data type of a tensor
Expand Down
Loading

0 comments on commit 8cc8632

Please sign in to comment.