From 1e07f85e4b69cf83613e6af9b4ac325ba278f12e Mon Sep 17 00:00:00 2001 From: Enrico Galli Date: Wed, 17 Jul 2024 15:27:11 -0700 Subject: [PATCH] PR feedback * Fixed issues when building under debug * Disabled MLBuffer on CPU device types * Renamed MlBuffer and MlContext to match specification --- js/common/lib/tensor-factory-impl.ts | 6 +-- js/common/lib/tensor-factory.ts | 8 +-- js/common/lib/tensor-impl.ts | 20 ++++---- js/common/lib/tensor-utils-impl.ts | 4 +- js/common/lib/tensor.ts | 6 +-- js/web/lib/wasm/jsep/backend-webnn.ts | 26 +++++----- js/web/lib/wasm/jsep/init.ts | 17 ++++++- js/web/lib/wasm/proxy-messages.ts | 10 ++-- js/web/lib/wasm/session-handler-inference.ts | 6 +-- js/web/lib/wasm/wasm-common.ts | 2 +- js/web/lib/wasm/wasm-core-impl.ts | 20 ++++---- js/web/lib/wasm/wasm-types.ts | 22 +++++--- js/web/test/test-runner.ts | 10 ++-- onnxruntime/core/providers/webnn/allocator.cc | 1 - onnxruntime/core/providers/webnn/allocator.h | 1 - .../core/providers/webnn/builders/helper.cc | 18 ++++++- .../core/providers/webnn/builders/helper.h | 4 +- .../core/providers/webnn/builders/model.cc | 10 ++-- .../core/providers/webnn/builders/model.h | 4 +- .../providers/webnn/builders/model_builder.cc | 2 +- .../core/providers/webnn/data_transfer.cc | 6 ++- .../core/providers/webnn/data_transfer.h | 1 - .../webnn/webnn_execution_provider.cc | 20 +++----- onnxruntime/wasm/pre-jsep.js | 50 +++++++++---------- 24 files changed, 154 insertions(+), 120 deletions(-) diff --git a/js/common/lib/tensor-factory-impl.ts b/js/common/lib/tensor-factory-impl.ts index e8db9c754910f..ad255999cb96c 100644 --- a/js/common/lib/tensor-factory-impl.ts +++ b/js/common/lib/tensor-factory-impl.ts @@ -275,10 +275,10 @@ export const tensorFromGpuBuffer = ( - mlBuffer: TensorInterface.MlBufferType, options: TensorFromGpuBufferOptions): Tensor => { +export const tensorFromMLBuffer = ( + mlBuffer: TensorInterface.MLBufferType, options: TensorFromGpuBufferOptions): Tensor => { const {dataType, dims, download, dispose} = options; return new Tensor({location: 'ml-buffer', type: dataType ?? 'float32', mlBuffer, dims, download, dispose}); }; diff --git a/js/common/lib/tensor-factory.ts b/js/common/lib/tensor-factory.ts index 663a833a1cf8d..68a79353d3f20 100644 --- a/js/common/lib/tensor-factory.ts +++ b/js/common/lib/tensor-factory.ts @@ -84,7 +84,7 @@ export interface GpuBufferConstructorParameters extends +export interface MLBufferConstructorParameters extends CommonConstructorParameters, GpuResourceConstructorParameters { /** * Specify the location of the data to be 'ml-buffer'. @@ -94,7 +94,7 @@ export interface MlBufferConstructorParameters dataType?: T; } -export interface TensorFromMlBufferOptions extends +export interface TensorFromMLBufferOptions extends Pick, GpuResourceConstructorParameters { /** * Describes the data type of the tensor. @@ -345,7 +345,7 @@ export interface TensorFactory { * * @returns a tensor object */ - fromMlBuffer(buffer: Tensor.MlBufferType, options: TensorFromMlBufferOptions): + fromMLBuffer(buffer: Tensor.MLBufferType, options: TensorFromMLBufferOptions): TypedTensor; /** diff --git a/js/common/lib/tensor-impl.ts b/js/common/lib/tensor-impl.ts index 4e4057ee2a13a..a0a03074e83fb 100644 --- a/js/common/lib/tensor-impl.ts +++ b/js/common/lib/tensor-impl.ts @@ -3,8 +3,8 @@ import {tensorToDataURL, tensorToImageData} from './tensor-conversion-impl.js'; import {TensorToDataUrlOptions, TensorToImageDataOptions} from './tensor-conversion.js'; -import {tensorFromGpuBuffer, tensorFromImage, tensorFromMlBuffer, tensorFromPinnedBuffer, tensorFromTexture} from './tensor-factory-impl.js'; -import {CpuPinnedConstructorParameters, GpuBufferConstructorParameters, MlBufferConstructorParameters, TensorFromGpuBufferOptions, TensorFromImageBitmapOptions, TensorFromImageDataOptions, TensorFromImageElementOptions, TensorFromTextureOptions, TensorFromUrlOptions, TextureConstructorParameters} from './tensor-factory.js'; +import {tensorFromGpuBuffer, tensorFromImage, tensorFromMLBuffer, tensorFromPinnedBuffer, tensorFromTexture} from './tensor-factory-impl.js'; +import {CpuPinnedConstructorParameters, GpuBufferConstructorParameters, MLBufferConstructorParameters, TensorFromGpuBufferOptions, TensorFromImageBitmapOptions, TensorFromImageDataOptions, TensorFromImageElementOptions, TensorFromTextureOptions, TensorFromUrlOptions, TextureConstructorParameters} from './tensor-factory.js'; import {checkTypedArray, NUMERIC_TENSOR_TYPE_TO_TYPEDARRAY_MAP, NUMERIC_TENSOR_TYPEDARRAY_TO_TYPE_MAP, SupportedTypedArray, SupportedTypedArrayConstructors} from './tensor-impl-type-mapping.js'; import {calculateSize, tensorReshape} from './tensor-utils-impl.js'; import {Tensor as TensorInterface} from './tensor.js'; @@ -16,7 +16,7 @@ type TensorDataType = TensorInterface.DataType; type TensorDataLocation = TensorInterface.DataLocation; type TensorTextureType = TensorInterface.TextureType; type TensorGpuBufferType = TensorInterface.GpuBufferType; -type TensorMlBufferType = TensorInterface.MlBufferType; +type TensorMLBufferType = TensorInterface.MLBufferType; /** * the implementation of Tensor interface. @@ -68,14 +68,14 @@ export class Tensor implements TensorInterface { * * @param params - Specify the parameters to construct the tensor. */ - constructor(params: MlBufferConstructorParameters); + constructor(params: MLBufferConstructorParameters); /** * implementation. */ constructor( arg0: TensorType|TensorDataType|readonly string[]|readonly boolean[]|CpuPinnedConstructorParameters| - TextureConstructorParameters|GpuBufferConstructorParameters|MlBufferConstructorParameters, + TextureConstructorParameters|GpuBufferConstructorParameters|MLBufferConstructorParameters, arg1?: TensorDataType|readonly number[]|readonly string[]|readonly boolean[], arg2?: readonly number[]) { // perform one-time check for BigInt/Float16Array support checkTypedArray(); @@ -273,9 +273,9 @@ export class Tensor implements TensorInterface { return tensorFromGpuBuffer(gpuBuffer, options); } - static fromMlBuffer( - mlBuffer: TensorMlBufferType, options: TensorFromGpuBufferOptions): TensorInterface { - return tensorFromMlBuffer(mlBuffer, options); + static fromMLBuffer( + mlBuffer: TensorMLBufferType, options: TensorFromGpuBufferOptions): TensorInterface { + return tensorFromMLBuffer(mlBuffer, options); } static fromPinnedBuffer( @@ -326,7 +326,7 @@ export class Tensor implements TensorInterface { /** * stores the underlying WebNN MLBuffer when location is 'ml-buffer'. otherwise empty. */ - private mlBufferData?: TensorMlBufferType; + private mlBufferData?: TensorMLBufferType; /** @@ -376,7 +376,7 @@ export class Tensor implements TensorInterface { return this.gpuBufferData; } - get mlBuffer(): TensorMlBufferType { + get mlBuffer(): TensorMLBufferType { this.ensureValid(); if (!this.mlBufferData) { throw new Error('The data is not stored as a WebNN buffer.'); diff --git a/js/common/lib/tensor-utils-impl.ts b/js/common/lib/tensor-utils-impl.ts index 48580d5ebb756..f504af281a58b 100644 --- a/js/common/lib/tensor-utils-impl.ts +++ b/js/common/lib/tensor-utils-impl.ts @@ -1,7 +1,7 @@ // Copyright (c) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. -import {CpuPinnedConstructorParameters, GpuBufferConstructorParameters, MlBufferConstructorParameters, TextureConstructorParameters} from './tensor-factory.js'; +import {CpuPinnedConstructorParameters, GpuBufferConstructorParameters, MLBufferConstructorParameters, TextureConstructorParameters} from './tensor-factory.js'; import {Tensor} from './tensor-impl.js'; /** @@ -56,7 +56,7 @@ export const tensorReshape = (tensor: Tensor, dims: readonly number[]): Tensor = return new Tensor({ location: 'ml-buffer', mlBuffer: tensor.mlBuffer, - type: tensor.type as MlBufferConstructorParameters['type'], + type: tensor.type as MLBufferConstructorParameters['type'], dims, }); default: diff --git a/js/common/lib/tensor.ts b/js/common/lib/tensor.ts index 88b318e374dce..689508daa1055 100644 --- a/js/common/lib/tensor.ts +++ b/js/common/lib/tensor.ts @@ -47,7 +47,7 @@ interface TypedTensorBase { * * If the data is not in a WebNN MLBuffer, throw error. */ - readonly mlBuffer: Tensor.MlBufferType; + readonly mlBuffer: Tensor.MLBufferType; /** * Get the buffer data of the tensor. @@ -144,7 +144,7 @@ export declare namespace Tensor { * * The specification for WebNN's ML Buffer is currently in flux. */ - export type MlBufferType = unknown; + export type MLBufferType = unknown; /** * supported data types for constructing a tensor from a WebGPU buffer @@ -154,7 +154,7 @@ export declare namespace Tensor { /** * supported data types for constructing a tensor from a WebNN MLBuffer */ - export type MlBufferDataTypes = 'float32'|'float16'|'int8'|'uint8'|'int32'|'uint32'|'int64'|'uint64'|'bool'; + export type MLBufferDataTypes = 'float32'|'float16'|'int8'|'uint8'|'int32'|'uint32'|'int64'|'uint64'|'bool'; /** * represent where the tensor data is stored diff --git a/js/web/lib/wasm/jsep/backend-webnn.ts b/js/web/lib/wasm/jsep/backend-webnn.ts index 0dcc8643d272f..d510972c236f0 100644 --- a/js/web/lib/wasm/jsep/backend-webnn.ts +++ b/js/web/lib/wasm/jsep/backend-webnn.ts @@ -54,7 +54,7 @@ export class WebNNBackend { /** * Maps from MLContext to session ids. */ - private sessionIdsByMlContext = new Map>(); + private sessionIdsByMLContext = new Map>(); /** * Current session id. */ @@ -68,38 +68,38 @@ export class WebNNBackend { if (this.currentSessionId === undefined) { throw new Error('No active session'); } - return this.getMlContext(this.currentSessionId); + return this.getMLContext(this.currentSessionId); } - public registerMlContext(sessionId: number, mlContext: MLContext): void { + public registerMLContext(sessionId: number, mlContext: MLContext): void { this.mlContextBySessionId.set(sessionId, mlContext); - let sessionIds = this.sessionIdsByMlContext.get(mlContext); + let sessionIds = this.sessionIdsByMLContext.get(mlContext); if (!sessionIds) { sessionIds = new Set(); - this.sessionIdsByMlContext.set(mlContext, sessionIds); + this.sessionIdsByMLContext.set(mlContext, sessionIds); } sessionIds.add(sessionId); } - public unregisterMlContext(sessionId: number): void { + public unregisterMLContext(sessionId: number): void { const mlContext = this.mlContextBySessionId.get(sessionId)!; if (!mlContext) { throw new Error(`No MLContext found for session ${sessionId}`); } this.mlContextBySessionId.delete(sessionId); - const sessionIds = this.sessionIdsByMlContext.get(mlContext)!; + const sessionIds = this.sessionIdsByMLContext.get(mlContext)!; sessionIds.delete(sessionId); if (sessionIds.size === 0) { - this.sessionIdsByMlContext.delete(mlContext); + this.sessionIdsByMLContext.delete(mlContext); } } public onReleaseSession(sessionId: number): void { - this.unregisterMlContext(sessionId); - this.bufferManager.releaseBuffersForContext(this.getMlContext(sessionId)); + this.unregisterMLContext(sessionId); + this.bufferManager.releaseBuffersForContext(this.getMLContext(sessionId)); } - public getMlContext(sessionId: number): MLContext { + public getMLContext(sessionId: number): MLContext { return this.mlContextBySessionId.get(sessionId)!; } @@ -137,14 +137,14 @@ export class WebNNBackend { return this.bufferManager.download(bufferId); } - public createMlBufferDownloader(bufferId: BufferId, type: Tensor.GpuBufferDataTypes): () => Promise { + public createMLBufferDownloader(bufferId: BufferId, type: Tensor.GpuBufferDataTypes): () => Promise { return async () => { const data = await this.bufferManager.download(bufferId); return createView(data, type); }; } - public registerMlBuffer(buffer: MLBuffer): BufferId { + public registerMLBuffer(buffer: MLBuffer): BufferId { return this.bufferManager.registerBuffer(this.currentContext, buffer); } diff --git a/js/web/lib/wasm/jsep/init.ts b/js/web/lib/wasm/jsep/init.ts index 0d86177d2e01f..ea9fe6f35b6b8 100644 --- a/js/web/lib/wasm/jsep/init.ts +++ b/js/web/lib/wasm/jsep/init.ts @@ -239,6 +239,21 @@ export const init = ]); } else { const backend = new WebNNBackend(); - jsepInit('webnn', [backend]); + jsepInit('webnn', [ + backend, + // jsepReserveBufferId + () => backend.reserveBufferId(), + // jsepReleaseBufferId, + (bufferId: number) => backend.releaseBufferId(bufferId), + // jsepEnsureBuffer + (bufferId: number, onnxDataType: number, dimensions: number[]) => + backend.ensureBuffer(bufferId, onnxDataType, dimensions), + // jsepUploadBuffer + (bufferId: number, data: Uint8Array) => { + backend.uploadBuffer(bufferId, data); + }, + // jsepDownloadBuffer + async (bufferId: number) => backend.downloadBuffer(bufferId), + ]); } }; diff --git a/js/web/lib/wasm/proxy-messages.ts b/js/web/lib/wasm/proxy-messages.ts index 7bcc608740239..d8f569f6952e6 100644 --- a/js/web/lib/wasm/proxy-messages.ts +++ b/js/web/lib/wasm/proxy-messages.ts @@ -15,9 +15,9 @@ export type GpuBufferMetadata = { dispose?: () => void; }; -export type MlBufferMetadata = { - mlBuffer: Tensor.MlBufferType; - download?: () => Promise; +export type MLBufferMetadata = { + mlBuffer: Tensor.MLBufferType; + download?: () => Promise; dispose?: () => void; }; @@ -26,7 +26,7 @@ export type MlBufferMetadata = { */ export type UnserializableTensorMetadata = [dataType: Tensor.Type, dims: readonly number[], data: GpuBufferMetadata, location: 'gpu-buffer']| - [dataType: Tensor.Type, dims: readonly number[], data: MlBufferMetadata, location: 'ml-buffer']| + [dataType: Tensor.Type, dims: readonly number[], data: MLBufferMetadata, location: 'ml-buffer']| [dataType: Tensor.Type, dims: readonly number[], data: Tensor.DataType, location: 'cpu-pinned']; /** @@ -37,7 +37,7 @@ export type UnserializableTensorMetadata = * - cpu: Uint8Array * - cpu-pinned: Uint8Array * - gpu-buffer: GpuBufferMetadata - * - ml-buffer: MlBufferMetadata + * - ml-buffer: MLBufferMetadata * - location: tensor data location */ export type TensorMetadata = SerializableTensorMetadata|UnserializableTensorMetadata; diff --git a/js/web/lib/wasm/session-handler-inference.ts b/js/web/lib/wasm/session-handler-inference.ts index 69b4b93028da7..cb8faa1675e51 100644 --- a/js/web/lib/wasm/session-handler-inference.ts +++ b/js/web/lib/wasm/session-handler-inference.ts @@ -5,7 +5,7 @@ import {InferenceSession, InferenceSessionHandler, SessionHandler, Tensor, TRACE import {SerializableInternalBuffer, TensorMetadata} from './proxy-messages'; import {copyFromExternalBuffer, createSession, endProfiling, releaseSession, run} from './proxy-wrapper'; -import {isGpuBufferSupportedType, isMlBufferSupportedType} from './wasm-common'; +import {isGpuBufferSupportedType, isMLBufferSupportedType} from './wasm-common'; import {isNode} from './wasm-utils-env'; import {loadFile} from './wasm-utils-load-file'; @@ -36,11 +36,11 @@ export const decodeTensorMetadata = (tensor: TensorMetadata): Tensor => { } case 'ml-buffer': { const dataType = tensor[0]; - if (!isMlBufferSupportedType(dataType)) { + if (!isMLBufferSupportedType(dataType)) { throw new Error(`not supported data type: ${dataType} for deserializing GPU tensor`); } const {mlBuffer, download, dispose} = tensor[2]; - return Tensor.fromMlBuffer(mlBuffer, {dataType, dims: tensor[1], download, dispose}); + return Tensor.fromMLBuffer(mlBuffer, {dataType, dims: tensor[1], download, dispose}); } default: throw new Error(`invalid data location: ${tensor[3]}`); diff --git a/js/web/lib/wasm/wasm-common.ts b/js/web/lib/wasm/wasm-common.ts index 5904d9b2992fb..d4bff214ee260 100644 --- a/js/web/lib/wasm/wasm-common.ts +++ b/js/web/lib/wasm/wasm-common.ts @@ -182,7 +182,7 @@ export const isGpuBufferSupportedType = (type: Tensor.Type): type is Tensor.GpuB /** * Check whether the given tensor type is supported by WebNN MLBuffer */ -export const isMlBufferSupportedType = (type: Tensor.Type): type is Tensor.MlBufferDataTypes => type === 'float32' || +export const isMLBufferSupportedType = (type: Tensor.Type): type is Tensor.MLBufferDataTypes => type === 'float32' || type === 'float16' || type === 'int32' || type === 'int64' || type === 'uint32' || type === 'uint64' || type === 'int8' || type === 'uint8' || type === 'bool'; diff --git a/js/web/lib/wasm/wasm-core-impl.ts b/js/web/lib/wasm/wasm-core-impl.ts index d5ccb26553f86..b23518dd20e73 100644 --- a/js/web/lib/wasm/wasm-core-impl.ts +++ b/js/web/lib/wasm/wasm-core-impl.ts @@ -11,7 +11,7 @@ import {Env, InferenceSession, Tensor} from 'onnxruntime-common'; import {SerializableInternalBuffer, SerializableSessionMetadata, SerializableTensorMetadata, TensorMetadata} from './proxy-messages'; import {setRunOptions} from './run-options'; import {setSessionOptions} from './session-options'; -import {dataLocationStringToEnum, getTensorElementSize, isGpuBufferSupportedType, isMlBufferSupportedType, logLevelStringToEnum, tensorDataTypeEnumToString, tensorDataTypeStringToEnum, tensorTypeToTypedArrayConstructor} from './wasm-common'; +import {dataLocationStringToEnum, getTensorElementSize, isGpuBufferSupportedType, isMLBufferSupportedType, logLevelStringToEnum, tensorDataTypeEnumToString, tensorDataTypeStringToEnum, tensorTypeToTypedArrayConstructor} from './wasm-common'; import {getInstance} from './wasm-factory'; import {allocWasmString, checkLastError} from './wasm-utils'; import {loadFile} from './wasm-utils-load-file'; @@ -292,7 +292,7 @@ export const createSession = async( // clear current MLContext after session creation if (wasm.currentContext) { - wasm.jsepRegisterMlContext!(sessionHandle, wasm.currentContext); + wasm.jsepRegisterMLContext!(sessionHandle, wasm.currentContext); wasm.currentContext = undefined; } @@ -446,11 +446,11 @@ export const prepareInputOutputTensor = const elementSizeInBytes = getTensorElementSize(tensorDataTypeStringToEnum(dataType))!; dataByteLength = dims.reduce((a, b) => a * b, 1) * elementSizeInBytes; - const registerMlBuffer = wasm.jsepRegisterMlBuffer; - if (!registerMlBuffer) { + const registerMLBuffer = wasm.jsepRegisterMLBuffer; + if (!registerMLBuffer) { throw new Error('Tensor location "ml-buffer" is not supported without using WebNN.'); } - rawData = registerMlBuffer(mlBuffer); + rawData = registerMLBuffer(mlBuffer); } else { const data = tensor[2]; @@ -691,13 +691,13 @@ export const run = async( 'gpu-buffer' ]); } else if (preferredLocation === 'ml-buffer' && size > 0) { - const getMlBuffer = wasm.jsepGetMlBuffer; - if (!getMlBuffer) { + const getMLBuffer = wasm.jsepGetMLBuffer; + if (!getMLBuffer) { throw new Error('preferredLocation "ml-buffer" is not supported without using WebNN.'); } - const mlBuffer = getMlBuffer(dataOffset); + const mlBuffer = getMLBuffer(dataOffset); const elementSize = getTensorElementSize(dataType); - if (elementSize === undefined || !isMlBufferSupportedType(type)) { + if (elementSize === undefined || !isMLBufferSupportedType(type)) { throw new Error(`Unsupported data type: ${type}`); } @@ -707,7 +707,7 @@ export const run = async( output.push([ type, dims, { mlBuffer, - download: wasm.jsepCreateMlBufferDownloader!(dataOffset, type), + download: wasm.jsepCreateMLBufferDownloader!(dataOffset, type), dispose: () => { wasm.jsepReleaseBufferId!(dataOffset); wasm._OrtReleaseTensor(tensor); diff --git a/js/web/lib/wasm/wasm-types.ts b/js/web/lib/wasm/wasm-types.ts index b88b287c3fea2..afca278422edd 100644 --- a/js/web/lib/wasm/wasm-types.ts +++ b/js/web/lib/wasm/wasm-types.ts @@ -23,6 +23,11 @@ export declare namespace JSEP { type CaptureBeginFunction = () => void; type CaptureEndFunction = () => void; type ReplayFunction = () => void; + type ReserveBufferIdFunction = () => number; + type ReleaseBufferIdFunction = (bufferId: number) => void; + type EnsureBufferFunction = (bufferId: number, dataType: number|MLOperandDataType, dimensions: number[]) => MLBuffer; + type UploadBufferFunction = (bufferId: number, data: Uint8Array) => void; + type DownloadBufferFunction = (bufferId: number) => Promise; export interface Module extends WebGpuModule, WebNnModule { /** @@ -47,7 +52,10 @@ export declare namespace JSEP { download: DownloadFunction, createKernel: CreateKernelFunction, releaseKernel: ReleaseKernelFunction, run: RunFunction, captureBegin: CaptureBeginFunction, captureEnd: CaptureEndFunction, replay: ReplayFunction ]): void; - jsepInit(name: 'webnn', initParams: [backend: BackendType]): void; + jsepInit(name: 'webnn', initParams: [ + backend: BackendType, reserveBufferId: ReserveBufferIdFunction, releaseBufferId: ReleaseBufferIdFunction, + ensureBuffer: EnsureBufferFunction, uploadBuffer: UploadBufferFunction, downloadBuffer: DownloadBufferFunction + ]): void; } export interface WebGpuModule { @@ -124,13 +132,13 @@ export declare namespace JSEP { * @param context - specify the MLContext. * @returns */ - jsepRegisterMlContext: (sessionId: number, context: MLContext) => void; + jsepRegisterMLContext: (sessionId: number, context: MLContext) => void; /** * [exported from pre-jsep.js] Get MLContext for a session. * @param sessionId - specify the session ID. * @returns the MLContext. */ - jsepGetMlContext: (sessionId: number) => MLContext; + jsepGetMLContext: (sessionId: number) => MLContext; /** * [exported from pre-jsep.js] Reserve a MLBuffer ID attached to the current session. * @returns the MLBuffer ID. @@ -147,7 +155,7 @@ export declare namespace JSEP { * @param bufferId - specify the MLBuffer ID. * @returns the MLBuffer. */ - jsepGetMlBuffer: (bufferId: number) => MLBuffer; + jsepGetMLBuffer: (bufferId: number) => MLBuffer; /** * [exported from pre-jsep.js] Ensure MLBuffer has been created with the correct type and dimensions. * @param bufferId - specify the MLBuffer ID. @@ -176,15 +184,15 @@ export declare namespace JSEP { * @param type - specify the data type. * @returns the downloader function. */ - jsepCreateMlBufferDownloader: + jsepCreateMLBufferDownloader: (bufferId: number, - type: Tensor.MlBufferDataTypes) => () => Promise; + type: Tensor.MLBufferDataTypes) => () => Promise; /** * [exported from pre-jsep.js] Register MLBuffer for a session. * @param mlBuffer - specify the MLBuffer. * @returns the MLBuffer ID. */ - jsepRegisterMlBuffer: (buffer: MLBuffer) => number; + jsepRegisterMLBuffer: (buffer: MLBuffer) => number; } } diff --git a/js/web/test/test-runner.ts b/js/web/test/test-runner.ts index 12c1923f46247..dcf1330b2d481 100644 --- a/js/web/test/test-runner.ts +++ b/js/web/test/test-runner.ts @@ -20,7 +20,7 @@ import {onnx} from '../lib/onnxjs/ort-schema/protobuf/onnx'; import {Tensor} from '../lib/onnxjs/tensor'; import {ProtoUtil} from '../lib/onnxjs/util'; import {createView} from '../lib/wasm/jsep/tensor-view'; -import {getTensorElementSize, isGpuBufferSupportedType, isMlBufferSupportedType, tensorDataTypeStringToEnum} from '../lib/wasm/wasm-common'; +import {getTensorElementSize, isGpuBufferSupportedType, isMLBufferSupportedType, tensorDataTypeStringToEnum} from '../lib/wasm/wasm-common'; import {base64toBuffer, createMockGraph, readFile} from './test-shared'; import {Test} from './test-types'; @@ -577,7 +577,7 @@ const getContext = (() => { })(); async function createMlTensorForOutput(type: ort.Tensor.Type, dims: readonly number[]) { - if (!isMlBufferSupportedType(type)) { + if (!isMLBufferSupportedType(type)) { throw new Error(`createMlTensorForOutput can not work with ${type} tensor`); } @@ -586,7 +586,7 @@ async function createMlTensorForOutput(type: ort.Tensor.Type, dims: readonly num const context = await getContext(); const mlBuffer = context.createBuffer({dataType, dimensions: dims as number[]}); - return ort.Tensor.fromMlBuffer(mlBuffer, { + return ort.Tensor.fromMLBuffer(mlBuffer, { dataType: type, dims, dispose: () => mlBuffer.destroy(), @@ -598,14 +598,14 @@ async function createMlTensorForOutput(type: ort.Tensor.Type, dims: readonly num } async function createMlTensorForInput(cpuTensor: ort.Tensor): Promise { - if (!isMlBufferSupportedType(cpuTensor.type) || Array.isArray(cpuTensor.data)) { + if (!isMLBufferSupportedType(cpuTensor.type) || Array.isArray(cpuTensor.data)) { throw new Error(`createMlTensorForInput can not work with ${cpuTensor.type} tensor`); } const context = await getContext(); const dataType = cpuTensor.type === 'bool' ? 'uint8' : cpuTensor.type; const mlBuffer = context.createBuffer({dataType, dimensions: cpuTensor.dims as number[]}); context.writeBuffer(mlBuffer, cpuTensor.data); - return ort.Tensor.fromMlBuffer( + return ort.Tensor.fromMLBuffer( mlBuffer, {dataType: cpuTensor.type, dims: cpuTensor.dims, dispose: () => mlBuffer.destroy()}); } diff --git a/onnxruntime/core/providers/webnn/allocator.cc b/onnxruntime/core/providers/webnn/allocator.cc index c09377253ddbb..355ee7e48b9f4 100644 --- a/onnxruntime/core/providers/webnn/allocator.cc +++ b/onnxruntime/core/providers/webnn/allocator.cc @@ -3,7 +3,6 @@ #include "core/providers/webnn/allocator.h" -#include "core/framework/session_state.h" #include "core/common/safeint.h" namespace onnxruntime { diff --git a/onnxruntime/core/providers/webnn/allocator.h b/onnxruntime/core/providers/webnn/allocator.h index edb7e1ea77217..9c3eff53fa842 100644 --- a/onnxruntime/core/providers/webnn/allocator.h +++ b/onnxruntime/core/providers/webnn/allocator.h @@ -8,7 +8,6 @@ #include "core/common/inlined_containers.h" #include "core/framework/allocator.h" -#include "core/framework/execution_provider.h" #include "core/framework/ortdevice.h" namespace onnxruntime { diff --git a/onnxruntime/core/providers/webnn/builders/helper.cc b/onnxruntime/core/providers/webnn/builders/helper.cc index 2b622c6b942cf..0a1b157e8f730 100644 --- a/onnxruntime/core/providers/webnn/builders/helper.cc +++ b/onnxruntime/core/providers/webnn/builders/helper.cc @@ -12,6 +12,19 @@ namespace onnxruntime { namespace webnn { +WebnnDeviceType DeviceTypeFromString(const std::string& device_type) { + if (device_type == "gpu") { + return WebnnDeviceType::GPU; + } + if (device_type == "cpu") { + return WebnnDeviceType::CPU; + } + if (device_type == "npu") { + return WebnnDeviceType::NPU; + } + ORT_THROW("Unknown WebNN deviceType."); +} + InitializedTensorSet CollectAllInitializedTensors(const GraphViewer& graph_viewer) { InitializedTensorSet all_initializers; if (graph_viewer.IsSubgraph()) { @@ -198,9 +211,10 @@ bool SetWebnnDataType(emscripten::val& desc, const int32_t data_type) { } } -bool IsMlBufferSupported() { +bool IsMLBufferSupported(WebnnDeviceType device_type) { static bool is_supported = !emscripten::val::global("MLBuffer").isUndefined(); - return is_supported; + // The current MLBuffer implementation only supports GPU and NPU devices. + return is_supported && device_type != WebnnDeviceType::CPU; } } // namespace webnn diff --git a/onnxruntime/core/providers/webnn/builders/helper.h b/onnxruntime/core/providers/webnn/builders/helper.h index 9dba7801cd6a5..dfe033af249d1 100644 --- a/onnxruntime/core/providers/webnn/builders/helper.h +++ b/onnxruntime/core/providers/webnn/builders/helper.h @@ -31,6 +31,8 @@ enum class WebnnDeviceType { NPU, }; +WebnnDeviceType DeviceTypeFromString(const std::string& device_type); + typedef struct { std::string opName; bool isCpuSupported; // The WebNN CPU backend XNNPack supports it (not about the CPU EP). @@ -283,7 +285,7 @@ bool GetBidirectionalBroadcastShape(std::vector& shape_a, bool SetWebnnDataType(emscripten::val& desc, const int32_t data_type); -bool IsMlBufferSupported(); +bool IsMLBufferSupported(WebnnDeviceType device_type); } // namespace webnn } // namespace onnxruntime diff --git a/onnxruntime/core/providers/webnn/builders/model.cc b/onnxruntime/core/providers/webnn/builders/model.cc index c47fad5f26224..151a5ed559d3b 100644 --- a/onnxruntime/core/providers/webnn/builders/model.cc +++ b/onnxruntime/core/providers/webnn/builders/model.cc @@ -11,22 +11,22 @@ #include "core/common/safeint.h" #include "core/graph/onnx_protobuf.h" #include "core/providers/common.h" -#include "core/providers/webnn/builders/helper.h" #include "model.h" namespace onnxruntime { namespace webnn { -Model::Model(const emscripten::val& context, const emscripten::val& graph, const logging::Logger& logger) +Model::Model(const emscripten::val& context, const emscripten::val& graph, const logging::Logger& logger, bool use_dispatch) : wnn_context_(context), wnn_graph_(graph), - logger_(logger) {} + logger_(logger), + use_dispatch_(use_dispatch) {} Model::~Model() {} Status Model::Predict(const InlinedHashMap& inputs, const InlinedHashMap& outputs) { - if (webnn::IsMlBufferSupported()) { + if (use_dispatch_) { return Dispatch(inputs, outputs); } else { @@ -201,7 +201,7 @@ void Model::SetOutputMap(InlinedHashMap&& output_map) { // Pre-allocate the input and output buffers for the WebNN graph. void Model::AllocateInputOutputBuffers() { // We don't need to allocate JS array buffers if the WebNN API supports MLBuffer. - if (webnn::IsMlBufferSupported()) { + if (use_dispatch_) { return; } for (const auto& input : inputs_) { diff --git a/onnxruntime/core/providers/webnn/builders/model.h b/onnxruntime/core/providers/webnn/builders/model.h index 8333d841f1a7c..f5ca137f5f6b5 100644 --- a/onnxruntime/core/providers/webnn/builders/model.h +++ b/onnxruntime/core/providers/webnn/builders/model.h @@ -83,7 +83,9 @@ class Model { OrtMutex mutex_; - Model(const emscripten::val& context, const emscripten::val& path, const logging::Logger& logger); + bool use_dispatch_; + + Model(const emscripten::val& context, const emscripten::val& path, const logging::Logger& logger, bool use_dispatch); void SetInputOutputInfo(InlinedHashMap&& input_output_info) { input_output_info_ = std::move(input_output_info); diff --git a/onnxruntime/core/providers/webnn/builders/model_builder.cc b/onnxruntime/core/providers/webnn/builders/model_builder.cc index 6b0e1495f552d..e7fcfeb27671f 100644 --- a/onnxruntime/core/providers/webnn/builders/model_builder.cc +++ b/onnxruntime/core/providers/webnn/builders/model_builder.cc @@ -332,7 +332,7 @@ Status ModelBuilder::Compile(std::unique_ptr& model) { if (!wnn_graph.as()) { return ORT_MAKE_STATUS(ONNXRUNTIME, FAIL, "Failed to build WebNN graph."); } - model.reset(new Model(std::move(wnn_context_), std::move(wnn_graph), logger_)); + model.reset(new Model(std::move(wnn_context_), std::move(wnn_graph), logger_, IsMLBufferSupported(wnn_device_type_))); model->SetInputs(std::move(input_names_)); model->SetOutputs(std::move(output_names_)); model->SetScalarOutputs(std::move(scalar_outputs_)); diff --git a/onnxruntime/core/providers/webnn/data_transfer.cc b/onnxruntime/core/providers/webnn/data_transfer.cc index 36c436f1ef3b5..66096c74a7950 100644 --- a/onnxruntime/core/providers/webnn/data_transfer.cc +++ b/onnxruntime/core/providers/webnn/data_transfer.cc @@ -4,6 +4,8 @@ #include "core/providers/webnn/data_transfer.h" #include +#include "core/framework/tensor.h" + namespace onnxruntime { namespace webnn { @@ -24,7 +26,7 @@ common::Status DataTransfer::CopyTensor(const Tensor& src, Tensor& dst) const { if (dst_device.Type() == OrtDevice::GPU) { EM_ASM({ - Module.jsepUploadBuffer($0, Module.HEAPU8.subarray($1, $1 + $2)); + Module.jsepUploadBuffer($0, HEAPU8.subarray($1, $1 + $2)); }, dst_data, reinterpret_cast(src_data), bytes); } else { @@ -33,7 +35,7 @@ common::Status DataTransfer::CopyTensor(const Tensor& src, Tensor& dst) const { EM_ASM({ const buffer = Emval.toValue($0); const src_array = new Uint8Array(buffer, 0, $2); - Module.HEAPU8.set(src_array, $1); + HEAPU8.set(src_array, $1); }, buffer.as_handle(), reinterpret_cast(dst_data), bytes); } diff --git a/onnxruntime/core/providers/webnn/data_transfer.h b/onnxruntime/core/providers/webnn/data_transfer.h index 11ae4b74f351f..03cfada46d1a0 100644 --- a/onnxruntime/core/providers/webnn/data_transfer.h +++ b/onnxruntime/core/providers/webnn/data_transfer.h @@ -6,7 +6,6 @@ #include #include "core/framework/data_transfer.h" -#include "core/framework/execution_provider.h" namespace onnxruntime { namespace webnn { diff --git a/onnxruntime/core/providers/webnn/webnn_execution_provider.cc b/onnxruntime/core/providers/webnn/webnn_execution_provider.cc index b76da43839f3c..36494da48f23d 100644 --- a/onnxruntime/core/providers/webnn/webnn_execution_provider.cc +++ b/onnxruntime/core/providers/webnn/webnn_execution_provider.cc @@ -23,20 +23,16 @@ WebNNExecutionProvider::WebNNExecutionProvider(const std::string& webnn_device_f : IExecutionProvider{ onnxruntime::kWebNNExecutionProvider, // If MLBuffer is supported, we force all the tensors to be allocated as MLBuffer. - OrtDevice(webnn::IsMlBufferSupported() ? OrtDevice::GPU : OrtDevice::CPU, OrtDevice::MemType::DEFAULT, 0)} { + OrtDevice( + webnn::IsMLBufferSupported(webnn::DeviceTypeFromString(webnn_device_flags)) ? OrtDevice::GPU : OrtDevice::CPU, + OrtDevice::MemType::DEFAULT, + 0)}, + wnn_device_type_(webnn::DeviceTypeFromString(webnn_device_flags)) { // WebNN EP uses NHWC layout for CPU XNNPACK backend and NCHW for GPU DML backend. - if (webnn_device_flags.compare("cpu") == 0) { + if (wnn_device_type_ == webnn::WebnnDeviceType::CPU) { preferred_layout_ = DataLayout::NHWC; - wnn_device_type_ = webnn::WebnnDeviceType::CPU; } else { preferred_layout_ = DataLayout::NCHW; - if (webnn_device_flags.compare("gpu") == 0) { - wnn_device_type_ = webnn::WebnnDeviceType::GPU; - } else if (webnn_device_flags.compare("npu") == 0) { - wnn_device_type_ = webnn::WebnnDeviceType::NPU; - } else { - ORT_THROW("Unknown WebNN deviceType."); - } } wnn_context_ = emscripten::val::module_property("currentContext"); @@ -382,14 +378,14 @@ WebNNExecutionProvider::GetKernelRegistry() const { } std::unique_ptr WebNNExecutionProvider::GetDataTransfer() const { - if (!webnn::IsMlBufferSupported()) { + if (!webnn::IsMLBufferSupported(wnn_device_type_)) { return nullptr; } return std::make_unique(); } std::vector WebNNExecutionProvider::CreatePreferredAllocators() { - if (!webnn::IsMlBufferSupported()) { + if (!webnn::IsMLBufferSupported(wnn_device_type_)) { return {}; } AllocatorCreationInfo customAllocatorCreationInfo([&](OrtDevice::DeviceId) { diff --git a/onnxruntime/wasm/pre-jsep.js b/onnxruntime/wasm/pre-jsep.js index 2147c9f67e1a2..c5bc4cd8c6a59 100644 --- a/onnxruntime/wasm/pre-jsep.js +++ b/onnxruntime/wasm/pre-jsep.js @@ -199,45 +199,43 @@ Module['jsepInit'] = (name, params) => { return backend['onRunStart'](sessionId); }; } else if(name === 'webnn') { - [Module.jsepBackend] = params; + // Functions called from EM_ASM need to be assigned in a way that can be minified. + [Module.jsepBackend, + Module.jsepReserveBufferId, + Module.jsepReleaseBufferId, + Module.jsepEnsureBuffer, + Module.jsepUploadBuffer, + Module.jsepDownloadBuffer, + ] = params; + - // expose webnn backend functions + // Functions called via emscripten::val::module_property need to be assigned in a way that the minifier won't + // change the name + Module['jsepEnsureBuffer'] = Module.jsepEnsureBuffer; + Module['jsepDownloadBuffer'] = Module.jsepDownloadBuffer; + + // Functions called from JS also need to have explicit names. const backend = Module.jsepBackend; Module['jsepOnRunStart'] = sessionId => { return backend['onRunStart'](sessionId); }; - Module['jsepRegisterMlContext'] = (sessionId, mlContext) => { - backend['registerMlContext'](sessionId, mlContext); + Module['jsepRegisterMLContext'] = (sessionId, mlContext) => { + backend['registerMLContext'](sessionId, mlContext); }; Module['jsepOnReleaseSession'] = sessionId => { backend['onReleaseSession'](sessionId); }; - Module['jsepGetMlContext'] = sessionId => { - return backend['getMlContext'](sessionId); + Module['jsepGetMLContext'] = sessionId => { + return backend['getMLContext'](sessionId); }; - Module['jsepReserveBufferId'] = () => { - return backend['reserveBufferId'](); - } - Module['jsepReleaseBufferId'] = (bufferId) => { - backend['releaseBufferId'](bufferId); - } - Module['jsepGetMlBuffer'] = (bufferId) => { + Module['jsepGetMLBuffer'] = (bufferId) => { return backend['getBuffer'](bufferId); } - Module['jsepEnsureBuffer'] = (bufferId, dataType, dimensions) => { - return backend['ensureBuffer'](bufferId, dataType, dimensions); - } - Module['jsepUploadBuffer'] = (bufferId, data) => { - backend['uploadBuffer'](bufferId, data); - } - Module['jsepDownloadBuffer'] = (bufferId) => { - return backend['downloadBuffer'](bufferId); - } - Module['jsepCreateMlBufferDownloader'] = (bufferId, type) => { - return backend['createMlBufferDownloader'](bufferId, type); + Module['jsepCreateMLBufferDownloader'] = (bufferId, type) => { + return backend['createMLBufferDownloader'](bufferId, type); } - Module['jsepRegisterMlBuffer'] = (buffer) => { - return backend['registerMlBuffer'](buffer); + Module['jsepRegisterMLBuffer'] = (buffer) => { + return backend['registerMLBuffer'](buffer); } } };