From 1e07f85e4b69cf83613e6af9b4ac325ba278f12e Mon Sep 17 00:00:00 2001
From: Enrico Galli <enrico.galli@intel.com>
Date: Wed, 17 Jul 2024 15:27:11 -0700
Subject: [PATCH] PR feedback

* Fixed issues when building under debug
* Disabled MLBuffer on CPU device types
* Renamed MlBuffer and MlContext to match specification
---
 js/common/lib/tensor-factory-impl.ts          |  6 +--
 js/common/lib/tensor-factory.ts               |  8 +--
 js/common/lib/tensor-impl.ts                  | 20 ++++----
 js/common/lib/tensor-utils-impl.ts            |  4 +-
 js/common/lib/tensor.ts                       |  6 +--
 js/web/lib/wasm/jsep/backend-webnn.ts         | 26 +++++-----
 js/web/lib/wasm/jsep/init.ts                  | 17 ++++++-
 js/web/lib/wasm/proxy-messages.ts             | 10 ++--
 js/web/lib/wasm/session-handler-inference.ts  |  6 +--
 js/web/lib/wasm/wasm-common.ts                |  2 +-
 js/web/lib/wasm/wasm-core-impl.ts             | 20 ++++----
 js/web/lib/wasm/wasm-types.ts                 | 22 +++++---
 js/web/test/test-runner.ts                    | 10 ++--
 onnxruntime/core/providers/webnn/allocator.cc |  1 -
 onnxruntime/core/providers/webnn/allocator.h  |  1 -
 .../core/providers/webnn/builders/helper.cc   | 18 ++++++-
 .../core/providers/webnn/builders/helper.h    |  4 +-
 .../core/providers/webnn/builders/model.cc    | 10 ++--
 .../core/providers/webnn/builders/model.h     |  4 +-
 .../providers/webnn/builders/model_builder.cc |  2 +-
 .../core/providers/webnn/data_transfer.cc     |  6 ++-
 .../core/providers/webnn/data_transfer.h      |  1 -
 .../webnn/webnn_execution_provider.cc         | 20 +++-----
 onnxruntime/wasm/pre-jsep.js                  | 50 +++++++++----------
 24 files changed, 154 insertions(+), 120 deletions(-)
diff --git a/js/common/lib/tensor-factory-impl.ts b/js/common/lib/tensor-factory-impl.ts
index e8db9c754910f..ad255999cb96c 100644
--- a/js/common/lib/tensor-factory-impl.ts
+++ b/js/common/lib/tensor-factory-impl.ts
@@ -275,10 +275,10 @@ export const tensorFromGpuBuffer = <T extends TensorInterface.GpuBufferDataTypes
 };
 
 /**
- * implementation of Tensor.fromMlBuffer().
+ * implementation of Tensor.fromMLBuffer().
  */
-export const tensorFromMlBuffer = <T extends TensorInterface.GpuBufferDataTypes>(
-    mlBuffer: TensorInterface.MlBufferType, options: TensorFromGpuBufferOptions<T>): Tensor => {
+export const tensorFromMLBuffer = <T extends TensorInterface.GpuBufferDataTypes>(
+    mlBuffer: TensorInterface.MLBufferType, options: TensorFromGpuBufferOptions<T>): Tensor => {
   const {dataType, dims, download, dispose} = options;
   return new Tensor({location: 'ml-buffer', type: dataType ?? 'float32', mlBuffer, dims, download, dispose});
 };
diff --git a/js/common/lib/tensor-factory.ts b/js/common/lib/tensor-factory.ts
index 663a833a1cf8d..68a79353d3f20 100644
--- a/js/common/lib/tensor-factory.ts
+++ b/js/common/lib/tensor-factory.ts
@@ -84,7 +84,7 @@ export interface GpuBufferConstructorParameters<T extends Tensor.GpuBufferDataTy
   readonly gpuBuffer: Tensor.GpuBufferType;
 }
 
-export interface MlBufferConstructorParameters<T extends Tensor.MlBufferDataTypes = Tensor.MlBufferDataTypes> extends
+export interface MLBufferConstructorParameters<T extends Tensor.MLBufferDataTypes = Tensor.MLBufferDataTypes> extends
     CommonConstructorParameters<T>, GpuResourceConstructorParameters<T> {
   /**
    * Specify the location of the data to be 'ml-buffer'.
@@ -94,7 +94,7 @@ export interface MlBufferConstructorParameters<T extends Tensor.MlBufferDataType
   /**
    * Specify the WebNN buffer that holds the tensor data.
    */
-  readonly mlBuffer: Tensor.MlBufferType;
+  readonly mlBuffer: Tensor.MLBufferType;
 }
 
 // #endregion
@@ -212,7 +212,7 @@ export interface TensorFromGpuBufferOptions<T extends Tensor.GpuBufferDataTypes>
   dataType?: T;
 }
 
-export interface TensorFromMlBufferOptions<T extends Tensor.MlBufferDataTypes> extends
+export interface TensorFromMLBufferOptions<T extends Tensor.MLBufferDataTypes> extends
     Pick<Tensor, 'dims'>, GpuResourceConstructorParameters<T> {
   /**
    * Describes the data type of the tensor.
@@ -345,7 +345,7 @@ export interface TensorFactory {
    *
    * @returns a tensor object
    */
-  fromMlBuffer<T extends Tensor.MlBufferDataTypes>(buffer: Tensor.MlBufferType, options: TensorFromMlBufferOptions<T>):
+  fromMLBuffer<T extends Tensor.MLBufferDataTypes>(buffer: Tensor.MLBufferType, options: TensorFromMLBufferOptions<T>):
       TypedTensor<T>;
 
   /**
diff --git a/js/common/lib/tensor-impl.ts b/js/common/lib/tensor-impl.ts
index 4e4057ee2a13a..a0a03074e83fb 100644
--- a/js/common/lib/tensor-impl.ts
+++ b/js/common/lib/tensor-impl.ts
@@ -3,8 +3,8 @@
 
 import {tensorToDataURL, tensorToImageData} from './tensor-conversion-impl.js';
 import {TensorToDataUrlOptions, TensorToImageDataOptions} from './tensor-conversion.js';
-import {tensorFromGpuBuffer, tensorFromImage, tensorFromMlBuffer, tensorFromPinnedBuffer, tensorFromTexture} from './tensor-factory-impl.js';
-import {CpuPinnedConstructorParameters, GpuBufferConstructorParameters, MlBufferConstructorParameters, TensorFromGpuBufferOptions, TensorFromImageBitmapOptions, TensorFromImageDataOptions, TensorFromImageElementOptions, TensorFromTextureOptions, TensorFromUrlOptions, TextureConstructorParameters} from './tensor-factory.js';
+import {tensorFromGpuBuffer, tensorFromImage, tensorFromMLBuffer, tensorFromPinnedBuffer, tensorFromTexture} from './tensor-factory-impl.js';
+import {CpuPinnedConstructorParameters, GpuBufferConstructorParameters, MLBufferConstructorParameters, TensorFromGpuBufferOptions, TensorFromImageBitmapOptions, TensorFromImageDataOptions, TensorFromImageElementOptions, TensorFromTextureOptions, TensorFromUrlOptions, TextureConstructorParameters} from './tensor-factory.js';
 import {checkTypedArray, NUMERIC_TENSOR_TYPE_TO_TYPEDARRAY_MAP, NUMERIC_TENSOR_TYPEDARRAY_TO_TYPE_MAP, SupportedTypedArray, SupportedTypedArrayConstructors} from './tensor-impl-type-mapping.js';
 import {calculateSize, tensorReshape} from './tensor-utils-impl.js';
 import {Tensor as TensorInterface} from './tensor.js';
@@ -16,7 +16,7 @@ type TensorDataType = TensorInterface.DataType;
 type TensorDataLocation = TensorInterface.DataLocation;
 type TensorTextureType = TensorInterface.TextureType;
 type TensorGpuBufferType = TensorInterface.GpuBufferType;
-type TensorMlBufferType = TensorInterface.MlBufferType;
+type TensorMLBufferType = TensorInterface.MLBufferType;
 
 /**
  * the implementation of Tensor interface.
@@ -68,14 +68,14 @@ export class Tensor implements TensorInterface {
    *
    * @param params - Specify the parameters to construct the tensor.
    */
-  constructor(params: MlBufferConstructorParameters);
+  constructor(params: MLBufferConstructorParameters);
 
   /**
    * implementation.
    */
   constructor(
       arg0: TensorType|TensorDataType|readonly string[]|readonly boolean[]|CpuPinnedConstructorParameters|
-      TextureConstructorParameters|GpuBufferConstructorParameters|MlBufferConstructorParameters,
+      TextureConstructorParameters|GpuBufferConstructorParameters|MLBufferConstructorParameters,
       arg1?: TensorDataType|readonly number[]|readonly string[]|readonly boolean[], arg2?: readonly number[]) {
     // perform one-time check for BigInt/Float16Array support
     checkTypedArray();
@@ -273,9 +273,9 @@ export class Tensor implements TensorInterface {
     return tensorFromGpuBuffer(gpuBuffer, options);
   }
 
-  static fromMlBuffer<T extends TensorInterface.GpuBufferDataTypes>(
-      mlBuffer: TensorMlBufferType, options: TensorFromGpuBufferOptions<T>): TensorInterface {
-    return tensorFromMlBuffer(mlBuffer, options);
+  static fromMLBuffer<T extends TensorInterface.GpuBufferDataTypes>(
+      mlBuffer: TensorMLBufferType, options: TensorFromGpuBufferOptions<T>): TensorInterface {
+    return tensorFromMLBuffer(mlBuffer, options);
   }
 
   static fromPinnedBuffer<T extends TensorInterface.CpuPinnedDataTypes>(
@@ -326,7 +326,7 @@ export class Tensor implements TensorInterface {
   /**
    * stores the underlying WebNN MLBuffer when location is 'ml-buffer'. otherwise empty.
    */
-  private mlBufferData?: TensorMlBufferType;
+  private mlBufferData?: TensorMLBufferType;
 
 
   /**
@@ -376,7 +376,7 @@ export class Tensor implements TensorInterface {
     return this.gpuBufferData;
   }
 
-  get mlBuffer(): TensorMlBufferType {
+  get mlBuffer(): TensorMLBufferType {
     this.ensureValid();
     if (!this.mlBufferData) {
       throw new Error('The data is not stored as a WebNN buffer.');
diff --git a/js/common/lib/tensor-utils-impl.ts b/js/common/lib/tensor-utils-impl.ts
index 48580d5ebb756..f504af281a58b 100644
--- a/js/common/lib/tensor-utils-impl.ts
+++ b/js/common/lib/tensor-utils-impl.ts
@@ -1,7 +1,7 @@
 // Copyright (c) Microsoft Corporation. All rights reserved.
 // Licensed under the MIT License.
 
-import {CpuPinnedConstructorParameters, GpuBufferConstructorParameters, MlBufferConstructorParameters, TextureConstructorParameters} from './tensor-factory.js';
+import {CpuPinnedConstructorParameters, GpuBufferConstructorParameters, MLBufferConstructorParameters, TextureConstructorParameters} from './tensor-factory.js';
 import {Tensor} from './tensor-impl.js';
 
 /**
@@ -56,7 +56,7 @@ export const tensorReshape = (tensor: Tensor, dims: readonly number[]): Tensor =
       return new Tensor({
         location: 'ml-buffer',
         mlBuffer: tensor.mlBuffer,
-        type: tensor.type as MlBufferConstructorParameters['type'],
+        type: tensor.type as MLBufferConstructorParameters['type'],
         dims,
       });
     default:
diff --git a/js/common/lib/tensor.ts b/js/common/lib/tensor.ts
index 88b318e374dce..689508daa1055 100644
--- a/js/common/lib/tensor.ts
+++ b/js/common/lib/tensor.ts
@@ -47,7 +47,7 @@ interface TypedTensorBase<T extends Tensor.Type> {
    *
    * If the data is not in a WebNN MLBuffer, throw error.
    */
-  readonly mlBuffer: Tensor.MlBufferType;
+  readonly mlBuffer: Tensor.MLBufferType;
 
   /**
    * Get the buffer data of the tensor.
@@ -144,7 +144,7 @@ export declare namespace Tensor {
    *
    * The specification for WebNN's ML Buffer is currently in flux.
    */
-  export type MlBufferType = unknown;
+  export type MLBufferType = unknown;
 
   /**
    * supported data types for constructing a tensor from a WebGPU buffer
@@ -154,7 +154,7 @@ export declare namespace Tensor {
   /**
    * supported data types for constructing a tensor from a WebNN MLBuffer
    */
-  export type MlBufferDataTypes = 'float32'|'float16'|'int8'|'uint8'|'int32'|'uint32'|'int64'|'uint64'|'bool';
+  export type MLBufferDataTypes = 'float32'|'float16'|'int8'|'uint8'|'int32'|'uint32'|'int64'|'uint64'|'bool';
 
   /**
    * represent where the tensor data is stored
diff --git a/js/web/lib/wasm/jsep/backend-webnn.ts b/js/web/lib/wasm/jsep/backend-webnn.ts
index 0dcc8643d272f..d510972c236f0 100644
--- a/js/web/lib/wasm/jsep/backend-webnn.ts
+++ b/js/web/lib/wasm/jsep/backend-webnn.ts
@@ -54,7 +54,7 @@ export class WebNNBackend {
   /**
    * Maps from MLContext to session ids.
    */
-  private sessionIdsByMlContext = new Map<MLContext, Set<number>>();
+  private sessionIdsByMLContext = new Map<MLContext, Set<number>>();
   /**
    * Current session id.
    */
@@ -68,38 +68,38 @@ export class WebNNBackend {
     if (this.currentSessionId === undefined) {
       throw new Error('No active session');
     }
-    return this.getMlContext(this.currentSessionId);
+    return this.getMLContext(this.currentSessionId);
   }
 
-  public registerMlContext(sessionId: number, mlContext: MLContext): void {
+  public registerMLContext(sessionId: number, mlContext: MLContext): void {
     this.mlContextBySessionId.set(sessionId, mlContext);
-    let sessionIds = this.sessionIdsByMlContext.get(mlContext);
+    let sessionIds = this.sessionIdsByMLContext.get(mlContext);
     if (!sessionIds) {
       sessionIds = new Set();
-      this.sessionIdsByMlContext.set(mlContext, sessionIds);
+      this.sessionIdsByMLContext.set(mlContext, sessionIds);
     }
     sessionIds.add(sessionId);
   }
 
-  public unregisterMlContext(sessionId: number): void {
+  public unregisterMLContext(sessionId: number): void {
     const mlContext = this.mlContextBySessionId.get(sessionId)!;
     if (!mlContext) {
       throw new Error(`No MLContext found for session ${sessionId}`);
     }
     this.mlContextBySessionId.delete(sessionId);
-    const sessionIds = this.sessionIdsByMlContext.get(mlContext)!;
+    const sessionIds = this.sessionIdsByMLContext.get(mlContext)!;
     sessionIds.delete(sessionId);
     if (sessionIds.size === 0) {
-      this.sessionIdsByMlContext.delete(mlContext);
+      this.sessionIdsByMLContext.delete(mlContext);
     }
   }
 
   public onReleaseSession(sessionId: number): void {
-    this.unregisterMlContext(sessionId);
-    this.bufferManager.releaseBuffersForContext(this.getMlContext(sessionId));
+    this.unregisterMLContext(sessionId);
+    this.bufferManager.releaseBuffersForContext(this.getMLContext(sessionId));
   }
 
-  public getMlContext(sessionId: number): MLContext {
+  public getMLContext(sessionId: number): MLContext {
     return this.mlContextBySessionId.get(sessionId)!;
   }
 
@@ -137,14 +137,14 @@ export class WebNNBackend {
     return this.bufferManager.download(bufferId);
   }
 
-  public createMlBufferDownloader(bufferId: BufferId, type: Tensor.GpuBufferDataTypes): () => Promise<Tensor.DataType> {
+  public createMLBufferDownloader(bufferId: BufferId, type: Tensor.GpuBufferDataTypes): () => Promise<Tensor.DataType> {
     return async () => {
       const data = await this.bufferManager.download(bufferId);
       return createView(data, type);
     };
   }
 
-  public registerMlBuffer(buffer: MLBuffer): BufferId {
+  public registerMLBuffer(buffer: MLBuffer): BufferId {
     return this.bufferManager.registerBuffer(this.currentContext, buffer);
   }
 
diff --git a/js/web/lib/wasm/jsep/init.ts b/js/web/lib/wasm/jsep/init.ts
index 0d86177d2e01f..ea9fe6f35b6b8 100644
--- a/js/web/lib/wasm/jsep/init.ts
+++ b/js/web/lib/wasm/jsep/init.ts
@@ -239,6 +239,21 @@ export const init =
     ]);
   } else {
     const backend = new WebNNBackend();
-    jsepInit('webnn', [backend]);
+    jsepInit('webnn', [
+      backend,
+      // jsepReserveBufferId
+      () => backend.reserveBufferId(),
+      // jsepReleaseBufferId,
+      (bufferId: number) => backend.releaseBufferId(bufferId),
+      // jsepEnsureBuffer
+      (bufferId: number, onnxDataType: number, dimensions: number[]) =>
+          backend.ensureBuffer(bufferId, onnxDataType, dimensions),
+      // jsepUploadBuffer
+      (bufferId: number, data: Uint8Array) => {
+        backend.uploadBuffer(bufferId, data);
+      },
+      // jsepDownloadBuffer
+      async (bufferId: number) => backend.downloadBuffer(bufferId),
+    ]);
   }
 };
diff --git a/js/web/lib/wasm/proxy-messages.ts b/js/web/lib/wasm/proxy-messages.ts
index 7bcc608740239..d8f569f6952e6 100644
--- a/js/web/lib/wasm/proxy-messages.ts
+++ b/js/web/lib/wasm/proxy-messages.ts
@@ -15,9 +15,9 @@ export type GpuBufferMetadata = {
   dispose?: () => void;
 };
 
-export type MlBufferMetadata = {
-  mlBuffer: Tensor.MlBufferType;
-  download?: () => Promise<Tensor.DataTypeMap[Tensor.MlBufferDataTypes]>;
+export type MLBufferMetadata = {
+  mlBuffer: Tensor.MLBufferType;
+  download?: () => Promise<Tensor.DataTypeMap[Tensor.MLBufferDataTypes]>;
   dispose?: () => void;
 };
 
@@ -26,7 +26,7 @@ export type MlBufferMetadata = {
  */
 export type UnserializableTensorMetadata =
     [dataType: Tensor.Type, dims: readonly number[], data: GpuBufferMetadata, location: 'gpu-buffer']|
-    [dataType: Tensor.Type, dims: readonly number[], data: MlBufferMetadata, location: 'ml-buffer']|
+    [dataType: Tensor.Type, dims: readonly number[], data: MLBufferMetadata, location: 'ml-buffer']|
     [dataType: Tensor.Type, dims: readonly number[], data: Tensor.DataType, location: 'cpu-pinned'];
 
 /**
@@ -37,7 +37,7 @@ export type UnserializableTensorMetadata =
  *   - cpu: Uint8Array
  *   - cpu-pinned: Uint8Array
  *   - gpu-buffer: GpuBufferMetadata
- *   - ml-buffer: MlBufferMetadata
+ *   - ml-buffer: MLBufferMetadata
  * - location: tensor data location
  */
 export type TensorMetadata = SerializableTensorMetadata|UnserializableTensorMetadata;
diff --git a/js/web/lib/wasm/session-handler-inference.ts b/js/web/lib/wasm/session-handler-inference.ts
index 69b4b93028da7..cb8faa1675e51 100644
--- a/js/web/lib/wasm/session-handler-inference.ts
+++ b/js/web/lib/wasm/session-handler-inference.ts
@@ -5,7 +5,7 @@ import {InferenceSession, InferenceSessionHandler, SessionHandler, Tensor, TRACE
 
 import {SerializableInternalBuffer, TensorMetadata} from './proxy-messages';
 import {copyFromExternalBuffer, createSession, endProfiling, releaseSession, run} from './proxy-wrapper';
-import {isGpuBufferSupportedType, isMlBufferSupportedType} from './wasm-common';
+import {isGpuBufferSupportedType, isMLBufferSupportedType} from './wasm-common';
 import {isNode} from './wasm-utils-env';
 import {loadFile} from './wasm-utils-load-file';
 
@@ -36,11 +36,11 @@ export const decodeTensorMetadata = (tensor: TensorMetadata): Tensor => {
     }
     case 'ml-buffer': {
       const dataType = tensor[0];
-      if (!isMlBufferSupportedType(dataType)) {
+      if (!isMLBufferSupportedType(dataType)) {
         throw new Error(`not supported data type: ${dataType} for deserializing GPU tensor`);
       }
       const {mlBuffer, download, dispose} = tensor[2];
-      return Tensor.fromMlBuffer(mlBuffer, {dataType, dims: tensor[1], download, dispose});
+      return Tensor.fromMLBuffer(mlBuffer, {dataType, dims: tensor[1], download, dispose});
     }
     default:
       throw new Error(`invalid data location: ${tensor[3]}`);
diff --git a/js/web/lib/wasm/wasm-common.ts b/js/web/lib/wasm/wasm-common.ts
index 5904d9b2992fb..d4bff214ee260 100644
--- a/js/web/lib/wasm/wasm-common.ts
+++ b/js/web/lib/wasm/wasm-common.ts
@@ -182,7 +182,7 @@ export const isGpuBufferSupportedType = (type: Tensor.Type): type is Tensor.GpuB
 /**
  * Check whether the given tensor type is supported by WebNN MLBuffer
  */
-export const isMlBufferSupportedType = (type: Tensor.Type): type is Tensor.MlBufferDataTypes => type === 'float32' ||
+export const isMLBufferSupportedType = (type: Tensor.Type): type is Tensor.MLBufferDataTypes => type === 'float32' ||
     type === 'float16' || type === 'int32' || type === 'int64' || type === 'uint32' || type === 'uint64' ||
     type === 'int8' || type === 'uint8' || type === 'bool';
 
diff --git a/js/web/lib/wasm/wasm-core-impl.ts b/js/web/lib/wasm/wasm-core-impl.ts
index d5ccb26553f86..b23518dd20e73 100644
--- a/js/web/lib/wasm/wasm-core-impl.ts
+++ b/js/web/lib/wasm/wasm-core-impl.ts
@@ -11,7 +11,7 @@ import {Env, InferenceSession, Tensor} from 'onnxruntime-common';
 import {SerializableInternalBuffer, SerializableSessionMetadata, SerializableTensorMetadata, TensorMetadata} from './proxy-messages';
 import {setRunOptions} from './run-options';
 import {setSessionOptions} from './session-options';
-import {dataLocationStringToEnum, getTensorElementSize, isGpuBufferSupportedType, isMlBufferSupportedType, logLevelStringToEnum, tensorDataTypeEnumToString, tensorDataTypeStringToEnum, tensorTypeToTypedArrayConstructor} from './wasm-common';
+import {dataLocationStringToEnum, getTensorElementSize, isGpuBufferSupportedType, isMLBufferSupportedType, logLevelStringToEnum, tensorDataTypeEnumToString, tensorDataTypeStringToEnum, tensorTypeToTypedArrayConstructor} from './wasm-common';
 import {getInstance} from './wasm-factory';
 import {allocWasmString, checkLastError} from './wasm-utils';
 import {loadFile} from './wasm-utils-load-file';
@@ -292,7 +292,7 @@ export const createSession = async(
 
     // clear current MLContext after session creation
     if (wasm.currentContext) {
-      wasm.jsepRegisterMlContext!(sessionHandle, wasm.currentContext);
+      wasm.jsepRegisterMLContext!(sessionHandle, wasm.currentContext);
       wasm.currentContext = undefined;
     }
 
@@ -446,11 +446,11 @@ export const prepareInputOutputTensor =
         const elementSizeInBytes = getTensorElementSize(tensorDataTypeStringToEnum(dataType))!;
         dataByteLength = dims.reduce((a, b) => a * b, 1) * elementSizeInBytes;
 
-        const registerMlBuffer = wasm.jsepRegisterMlBuffer;
-        if (!registerMlBuffer) {
+        const registerMLBuffer = wasm.jsepRegisterMLBuffer;
+        if (!registerMLBuffer) {
           throw new Error('Tensor location "ml-buffer" is not supported without using WebNN.');
         }
-        rawData = registerMlBuffer(mlBuffer);
+        rawData = registerMLBuffer(mlBuffer);
       } else {
         const data = tensor[2];
 
@@ -691,13 +691,13 @@ export const run = async(
               'gpu-buffer'
             ]);
           } else if (preferredLocation === 'ml-buffer' && size > 0) {
-            const getMlBuffer = wasm.jsepGetMlBuffer;
-            if (!getMlBuffer) {
+            const getMLBuffer = wasm.jsepGetMLBuffer;
+            if (!getMLBuffer) {
               throw new Error('preferredLocation "ml-buffer" is not supported without using WebNN.');
             }
-            const mlBuffer = getMlBuffer(dataOffset);
+            const mlBuffer = getMLBuffer(dataOffset);
             const elementSize = getTensorElementSize(dataType);
-            if (elementSize === undefined || !isMlBufferSupportedType(type)) {
+            if (elementSize === undefined || !isMLBufferSupportedType(type)) {
               throw new Error(`Unsupported data type: ${type}`);
             }
 
@@ -707,7 +707,7 @@ export const run = async(
             output.push([
               type, dims, {
                 mlBuffer,
-                download: wasm.jsepCreateMlBufferDownloader!(dataOffset, type),
+                download: wasm.jsepCreateMLBufferDownloader!(dataOffset, type),
                 dispose: () => {
                   wasm.jsepReleaseBufferId!(dataOffset);
                   wasm._OrtReleaseTensor(tensor);
diff --git a/js/web/lib/wasm/wasm-types.ts b/js/web/lib/wasm/wasm-types.ts
index b88b287c3fea2..afca278422edd 100644
--- a/js/web/lib/wasm/wasm-types.ts
+++ b/js/web/lib/wasm/wasm-types.ts
@@ -23,6 +23,11 @@ export declare namespace JSEP {
   type CaptureBeginFunction = () => void;
   type CaptureEndFunction = () => void;
   type ReplayFunction = () => void;
+  type ReserveBufferIdFunction = () => number;
+  type ReleaseBufferIdFunction = (bufferId: number) => void;
+  type EnsureBufferFunction = (bufferId: number, dataType: number|MLOperandDataType, dimensions: number[]) => MLBuffer;
+  type UploadBufferFunction = (bufferId: number, data: Uint8Array) => void;
+  type DownloadBufferFunction = (bufferId: number) => Promise<ArrayBuffer>;
 
   export interface Module extends WebGpuModule, WebNnModule {
     /**
@@ -47,7 +52,10 @@ export declare namespace JSEP {
       download: DownloadFunction, createKernel: CreateKernelFunction, releaseKernel: ReleaseKernelFunction,
       run: RunFunction, captureBegin: CaptureBeginFunction, captureEnd: CaptureEndFunction, replay: ReplayFunction
     ]): void;
-    jsepInit(name: 'webnn', initParams: [backend: BackendType]): void;
+    jsepInit(name: 'webnn', initParams: [
+      backend: BackendType, reserveBufferId: ReserveBufferIdFunction, releaseBufferId: ReleaseBufferIdFunction,
+      ensureBuffer: EnsureBufferFunction, uploadBuffer: UploadBufferFunction, downloadBuffer: DownloadBufferFunction
+    ]): void;
   }
 
   export interface WebGpuModule {
@@ -124,13 +132,13 @@ export declare namespace JSEP {
      * @param context - specify the MLContext.
      * @returns
      */
-    jsepRegisterMlContext: (sessionId: number, context: MLContext) => void;
+    jsepRegisterMLContext: (sessionId: number, context: MLContext) => void;
     /**
      * [exported from pre-jsep.js] Get MLContext for a session.
      * @param sessionId - specify the session ID.
      * @returns the MLContext.
      */
-    jsepGetMlContext: (sessionId: number) => MLContext;
+    jsepGetMLContext: (sessionId: number) => MLContext;
     /**
      * [exported from pre-jsep.js] Reserve a MLBuffer ID attached to the current session.
      * @returns the MLBuffer ID.
@@ -147,7 +155,7 @@ export declare namespace JSEP {
      * @param bufferId - specify the MLBuffer ID.
      * @returns the MLBuffer.
      */
-    jsepGetMlBuffer: (bufferId: number) => MLBuffer;
+    jsepGetMLBuffer: (bufferId: number) => MLBuffer;
     /**
      * [exported from pre-jsep.js] Ensure MLBuffer has been created with the correct type and dimensions.
      * @param bufferId - specify the MLBuffer ID.
@@ -176,15 +184,15 @@ export declare namespace JSEP {
      * @param type - specify the data type.
      * @returns the downloader function.
      */
-    jsepCreateMlBufferDownloader:
+    jsepCreateMLBufferDownloader:
         (bufferId: number,
-         type: Tensor.MlBufferDataTypes) => () => Promise<Tensor.DataTypeMap[Tensor.MlBufferDataTypes]>;
+         type: Tensor.MLBufferDataTypes) => () => Promise<Tensor.DataTypeMap[Tensor.MLBufferDataTypes]>;
     /**
      * [exported from pre-jsep.js] Register MLBuffer for a session.
      * @param mlBuffer - specify the MLBuffer.
      * @returns the MLBuffer ID.
      */
-    jsepRegisterMlBuffer: (buffer: MLBuffer) => number;
+    jsepRegisterMLBuffer: (buffer: MLBuffer) => number;
   }
 }
 
diff --git a/js/web/test/test-runner.ts b/js/web/test/test-runner.ts
index 12c1923f46247..dcf1330b2d481 100644
--- a/js/web/test/test-runner.ts
+++ b/js/web/test/test-runner.ts
@@ -20,7 +20,7 @@ import {onnx} from '../lib/onnxjs/ort-schema/protobuf/onnx';
 import {Tensor} from '../lib/onnxjs/tensor';
 import {ProtoUtil} from '../lib/onnxjs/util';
 import {createView} from '../lib/wasm/jsep/tensor-view';
-import {getTensorElementSize, isGpuBufferSupportedType, isMlBufferSupportedType, tensorDataTypeStringToEnum} from '../lib/wasm/wasm-common';
+import {getTensorElementSize, isGpuBufferSupportedType, isMLBufferSupportedType, tensorDataTypeStringToEnum} from '../lib/wasm/wasm-common';
 
 import {base64toBuffer, createMockGraph, readFile} from './test-shared';
 import {Test} from './test-types';
@@ -577,7 +577,7 @@ const getContext = (() => {
 })();
 
 async function createMlTensorForOutput(type: ort.Tensor.Type, dims: readonly number[]) {
-  if (!isMlBufferSupportedType(type)) {
+  if (!isMLBufferSupportedType(type)) {
     throw new Error(`createMlTensorForOutput can not work with ${type} tensor`);
   }
 
@@ -586,7 +586,7 @@ async function createMlTensorForOutput(type: ort.Tensor.Type, dims: readonly num
   const context = await getContext();
   const mlBuffer = context.createBuffer({dataType, dimensions: dims as number[]});
 
-  return ort.Tensor.fromMlBuffer(mlBuffer, {
+  return ort.Tensor.fromMLBuffer(mlBuffer, {
     dataType: type,
     dims,
     dispose: () => mlBuffer.destroy(),
@@ -598,14 +598,14 @@ async function createMlTensorForOutput(type: ort.Tensor.Type, dims: readonly num
 }
 
 async function createMlTensorForInput(cpuTensor: ort.Tensor): Promise<ort.Tensor> {
-  if (!isMlBufferSupportedType(cpuTensor.type) || Array.isArray(cpuTensor.data)) {
+  if (!isMLBufferSupportedType(cpuTensor.type) || Array.isArray(cpuTensor.data)) {
     throw new Error(`createMlTensorForInput can not work with ${cpuTensor.type} tensor`);
   }
   const context = await getContext();
   const dataType = cpuTensor.type === 'bool' ? 'uint8' : cpuTensor.type;
   const mlBuffer = context.createBuffer({dataType, dimensions: cpuTensor.dims as number[]});
   context.writeBuffer(mlBuffer, cpuTensor.data);
-  return ort.Tensor.fromMlBuffer(
+  return ort.Tensor.fromMLBuffer(
       mlBuffer, {dataType: cpuTensor.type, dims: cpuTensor.dims, dispose: () => mlBuffer.destroy()});
 }
 
diff --git a/onnxruntime/core/providers/webnn/allocator.cc b/onnxruntime/core/providers/webnn/allocator.cc
index c09377253ddbb..355ee7e48b9f4 100644
--- a/onnxruntime/core/providers/webnn/allocator.cc
+++ b/onnxruntime/core/providers/webnn/allocator.cc
@@ -3,7 +3,6 @@
 
 #include "core/providers/webnn/allocator.h"
 
-#include "core/framework/session_state.h"
 #include "core/common/safeint.h"
 
 namespace onnxruntime {
diff --git a/onnxruntime/core/providers/webnn/allocator.h b/onnxruntime/core/providers/webnn/allocator.h
index edb7e1ea77217..9c3eff53fa842 100644
--- a/onnxruntime/core/providers/webnn/allocator.h
+++ b/onnxruntime/core/providers/webnn/allocator.h
@@ -8,7 +8,6 @@
 
 #include "core/common/inlined_containers.h"
 #include "core/framework/allocator.h"
-#include "core/framework/execution_provider.h"
 #include "core/framework/ortdevice.h"
 
 namespace onnxruntime {
diff --git a/onnxruntime/core/providers/webnn/builders/helper.cc b/onnxruntime/core/providers/webnn/builders/helper.cc
index 2b622c6b942cf..0a1b157e8f730 100644
--- a/onnxruntime/core/providers/webnn/builders/helper.cc
+++ b/onnxruntime/core/providers/webnn/builders/helper.cc
@@ -12,6 +12,19 @@
 namespace onnxruntime {
 namespace webnn {
 
+WebnnDeviceType DeviceTypeFromString(const std::string& device_type) {
+  if (device_type == "gpu") {
+    return WebnnDeviceType::GPU;
+  }
+  if (device_type == "cpu") {
+    return WebnnDeviceType::CPU;
+  }
+  if (device_type == "npu") {
+    return WebnnDeviceType::NPU;
+  }
+  ORT_THROW("Unknown WebNN deviceType.");
+}
+
 InitializedTensorSet CollectAllInitializedTensors(const GraphViewer& graph_viewer) {
   InitializedTensorSet all_initializers;
   if (graph_viewer.IsSubgraph()) {
@@ -198,9 +211,10 @@ bool SetWebnnDataType(emscripten::val& desc, const int32_t data_type) {
   }
 }
 
-bool IsMlBufferSupported() {
+bool IsMLBufferSupported(WebnnDeviceType device_type) {
   static bool is_supported = !emscripten::val::global("MLBuffer").isUndefined();
-  return is_supported;
+  // The current MLBuffer implementation only supports GPU and NPU devices.
+  return is_supported && device_type != WebnnDeviceType::CPU;
 }
 
 }  // namespace webnn
diff --git a/onnxruntime/core/providers/webnn/builders/helper.h b/onnxruntime/core/providers/webnn/builders/helper.h
index 9dba7801cd6a5..dfe033af249d1 100644
--- a/onnxruntime/core/providers/webnn/builders/helper.h
+++ b/onnxruntime/core/providers/webnn/builders/helper.h
@@ -31,6 +31,8 @@ enum class WebnnDeviceType {
   NPU,
 };
 
+WebnnDeviceType DeviceTypeFromString(const std::string& device_type);
+
 typedef struct {
   std::string opName;
   bool isCpuSupported;  // The WebNN CPU backend XNNPack supports it (not about the CPU EP).
@@ -283,7 +285,7 @@ bool GetBidirectionalBroadcastShape(std::vector<int64_t>& shape_a,
 
 bool SetWebnnDataType(emscripten::val& desc, const int32_t data_type);
 
-bool IsMlBufferSupported();
+bool IsMLBufferSupported(WebnnDeviceType device_type);
 
 }  // namespace webnn
 }  // namespace onnxruntime
diff --git a/onnxruntime/core/providers/webnn/builders/model.cc b/onnxruntime/core/providers/webnn/builders/model.cc
index c47fad5f26224..151a5ed559d3b 100644
--- a/onnxruntime/core/providers/webnn/builders/model.cc
+++ b/onnxruntime/core/providers/webnn/builders/model.cc
@@ -11,22 +11,22 @@
 #include "core/common/safeint.h"
 #include "core/graph/onnx_protobuf.h"
 #include "core/providers/common.h"
-#include "core/providers/webnn/builders/helper.h"
 #include "model.h"
 
 namespace onnxruntime {
 namespace webnn {
 
-Model::Model(const emscripten::val& context, const emscripten::val& graph, const logging::Logger& logger)
+Model::Model(const emscripten::val& context, const emscripten::val& graph, const logging::Logger& logger, bool use_dispatch)
     : wnn_context_(context),
       wnn_graph_(graph),
-      logger_(logger) {}
+      logger_(logger),
+      use_dispatch_(use_dispatch) {}
 
 Model::~Model() {}
 
 Status Model::Predict(const InlinedHashMap<std::string, OnnxTensorData>& inputs,
                       const InlinedHashMap<std::string, OnnxTensorData>& outputs) {
-  if (webnn::IsMlBufferSupported()) {
+  if (use_dispatch_) {
     return Dispatch(inputs, outputs);
 
   } else {
@@ -201,7 +201,7 @@ void Model::SetOutputMap(InlinedHashMap<std::string, size_t>&& output_map) {
 // Pre-allocate the input and output buffers for the WebNN graph.
 void Model::AllocateInputOutputBuffers() {
   // We don't need to allocate JS array buffers if the WebNN API supports MLBuffer.
-  if (webnn::IsMlBufferSupported()) {
+  if (use_dispatch_) {
     return;
   }
   for (const auto& input : inputs_) {
diff --git a/onnxruntime/core/providers/webnn/builders/model.h b/onnxruntime/core/providers/webnn/builders/model.h
index 8333d841f1a7c..f5ca137f5f6b5 100644
--- a/onnxruntime/core/providers/webnn/builders/model.h
+++ b/onnxruntime/core/providers/webnn/builders/model.h
@@ -83,7 +83,9 @@ class Model {
 
   OrtMutex mutex_;
 
-  Model(const emscripten::val& context, const emscripten::val& path, const logging::Logger& logger);
+  bool use_dispatch_;
+
+  Model(const emscripten::val& context, const emscripten::val& path, const logging::Logger& logger, bool use_dispatch);
 
   void SetInputOutputInfo(InlinedHashMap<std::string, OnnxTensorInfo>&& input_output_info) {
     input_output_info_ = std::move(input_output_info);
diff --git a/onnxruntime/core/providers/webnn/builders/model_builder.cc b/onnxruntime/core/providers/webnn/builders/model_builder.cc
index 6b0e1495f552d..e7fcfeb27671f 100644
--- a/onnxruntime/core/providers/webnn/builders/model_builder.cc
+++ b/onnxruntime/core/providers/webnn/builders/model_builder.cc
@@ -332,7 +332,7 @@ Status ModelBuilder::Compile(std::unique_ptr<Model>& model) {
   if (!wnn_graph.as<bool>()) {
     return ORT_MAKE_STATUS(ONNXRUNTIME, FAIL, "Failed to build WebNN graph.");
   }
-  model.reset(new Model(std::move(wnn_context_), std::move(wnn_graph), logger_));
+  model.reset(new Model(std::move(wnn_context_), std::move(wnn_graph), logger_, IsMLBufferSupported(wnn_device_type_)));
   model->SetInputs(std::move(input_names_));
   model->SetOutputs(std::move(output_names_));
   model->SetScalarOutputs(std::move(scalar_outputs_));
diff --git a/onnxruntime/core/providers/webnn/data_transfer.cc b/onnxruntime/core/providers/webnn/data_transfer.cc
index 36c436f1ef3b5..66096c74a7950 100644
--- a/onnxruntime/core/providers/webnn/data_transfer.cc
+++ b/onnxruntime/core/providers/webnn/data_transfer.cc
@@ -4,6 +4,8 @@
 #include "core/providers/webnn/data_transfer.h"
 
 #include <emscripten.h>
+#include "core/framework/tensor.h"
+
 
 namespace onnxruntime {
 namespace webnn {
@@ -24,7 +26,7 @@ common::Status DataTransfer::CopyTensor(const Tensor& src, Tensor& dst) const {
 
     if (dst_device.Type() == OrtDevice::GPU) {
       EM_ASM({
-        Module.jsepUploadBuffer($0, Module.HEAPU8.subarray($1, $1 + $2));
+        Module.jsepUploadBuffer($0, HEAPU8.subarray($1, $1 + $2));
       },
              dst_data, reinterpret_cast<intptr_t>(src_data), bytes);
     } else {
@@ -33,7 +35,7 @@ common::Status DataTransfer::CopyTensor(const Tensor& src, Tensor& dst) const {
       EM_ASM({
         const buffer = Emval.toValue($0);
         const src_array = new Uint8Array(buffer, 0, $2);
-        Module.HEAPU8.set(src_array, $1);
+        HEAPU8.set(src_array, $1);
       },
              buffer.as_handle(), reinterpret_cast<intptr_t>(dst_data), bytes);
     }
diff --git a/onnxruntime/core/providers/webnn/data_transfer.h b/onnxruntime/core/providers/webnn/data_transfer.h
index 11ae4b74f351f..03cfada46d1a0 100644
--- a/onnxruntime/core/providers/webnn/data_transfer.h
+++ b/onnxruntime/core/providers/webnn/data_transfer.h
@@ -6,7 +6,6 @@
 #include <emscripten/val.h>
 
 #include "core/framework/data_transfer.h"
-#include "core/framework/execution_provider.h"
 
 namespace onnxruntime {
 namespace webnn {
diff --git a/onnxruntime/core/providers/webnn/webnn_execution_provider.cc b/onnxruntime/core/providers/webnn/webnn_execution_provider.cc
index b76da43839f3c..36494da48f23d 100644
--- a/onnxruntime/core/providers/webnn/webnn_execution_provider.cc
+++ b/onnxruntime/core/providers/webnn/webnn_execution_provider.cc
@@ -23,20 +23,16 @@ WebNNExecutionProvider::WebNNExecutionProvider(const std::string& webnn_device_f
     : IExecutionProvider{
           onnxruntime::kWebNNExecutionProvider,
           // If MLBuffer is supported, we force all the tensors to be allocated as MLBuffer.
-          OrtDevice(webnn::IsMlBufferSupported() ? OrtDevice::GPU : OrtDevice::CPU, OrtDevice::MemType::DEFAULT, 0)} {
+          OrtDevice(
+              webnn::IsMLBufferSupported(webnn::DeviceTypeFromString(webnn_device_flags)) ? OrtDevice::GPU : OrtDevice::CPU,
+              OrtDevice::MemType::DEFAULT,
+              0)},
+      wnn_device_type_(webnn::DeviceTypeFromString(webnn_device_flags)) {
   // WebNN EP uses NHWC layout for CPU XNNPACK backend and NCHW for GPU DML backend.
-  if (webnn_device_flags.compare("cpu") == 0) {
+  if (wnn_device_type_ == webnn::WebnnDeviceType::CPU) {
     preferred_layout_ = DataLayout::NHWC;
-    wnn_device_type_ = webnn::WebnnDeviceType::CPU;
   } else {
     preferred_layout_ = DataLayout::NCHW;
-    if (webnn_device_flags.compare("gpu") == 0) {
-      wnn_device_type_ = webnn::WebnnDeviceType::GPU;
-    } else if (webnn_device_flags.compare("npu") == 0) {
-      wnn_device_type_ = webnn::WebnnDeviceType::NPU;
-    } else {
-      ORT_THROW("Unknown WebNN deviceType.");
-    }
   }
 
   wnn_context_ = emscripten::val::module_property("currentContext");
@@ -382,14 +378,14 @@ WebNNExecutionProvider::GetKernelRegistry() const {
 }
 
 std::unique_ptr<onnxruntime::IDataTransfer> WebNNExecutionProvider::GetDataTransfer() const {
-  if (!webnn::IsMlBufferSupported()) {
+  if (!webnn::IsMLBufferSupported(wnn_device_type_)) {
     return nullptr;
   }
   return std::make_unique<webnn::DataTransfer>();
 }
 
 std::vector<AllocatorPtr> WebNNExecutionProvider::CreatePreferredAllocators() {
-  if (!webnn::IsMlBufferSupported()) {
+  if (!webnn::IsMLBufferSupported(wnn_device_type_)) {
     return {};
   }
   AllocatorCreationInfo customAllocatorCreationInfo([&](OrtDevice::DeviceId) {
diff --git a/onnxruntime/wasm/pre-jsep.js b/onnxruntime/wasm/pre-jsep.js
index 2147c9f67e1a2..c5bc4cd8c6a59 100644
--- a/onnxruntime/wasm/pre-jsep.js
+++ b/onnxruntime/wasm/pre-jsep.js
@@ -199,45 +199,43 @@ Module['jsepInit'] = (name, params) => {
       return backend['onRunStart'](sessionId);
     };
   } else if(name === 'webnn') {
-    [Module.jsepBackend] = params;
+    // Functions called from EM_ASM need to be assigned in a way that can be minified.
+    [Module.jsepBackend,
+      Module.jsepReserveBufferId,
+      Module.jsepReleaseBufferId,
+      Module.jsepEnsureBuffer,
+      Module.jsepUploadBuffer,
+      Module.jsepDownloadBuffer,
+    ] = params;
+
 
-    // expose webnn backend functions
+    // Functions called via emscripten::val::module_property need to be assigned in a way that the minifier won't
+    // change the name
+    Module['jsepEnsureBuffer'] = Module.jsepEnsureBuffer;
+    Module['jsepDownloadBuffer'] = Module.jsepDownloadBuffer;
+
+    // Functions called from JS also need to have explicit names.
     const backend = Module.jsepBackend;
     Module['jsepOnRunStart'] = sessionId => {
       return backend['onRunStart'](sessionId);
     };
-    Module['jsepRegisterMlContext'] = (sessionId, mlContext) => {
-      backend['registerMlContext'](sessionId, mlContext);
+    Module['jsepRegisterMLContext'] = (sessionId, mlContext) => {
+      backend['registerMLContext'](sessionId, mlContext);
     };
     Module['jsepOnReleaseSession'] = sessionId => {
       backend['onReleaseSession'](sessionId);
     };
-    Module['jsepGetMlContext'] = sessionId => {
-      return backend['getMlContext'](sessionId);
+    Module['jsepGetMLContext'] = sessionId => {
+      return backend['getMLContext'](sessionId);
     };
-    Module['jsepReserveBufferId'] = () => {
-      return backend['reserveBufferId']();
-    }
-    Module['jsepReleaseBufferId'] = (bufferId) => {
-      backend['releaseBufferId'](bufferId);
-    }
-    Module['jsepGetMlBuffer'] = (bufferId) => {
+    Module['jsepGetMLBuffer'] = (bufferId) => {
       return backend['getBuffer'](bufferId);
     }
-    Module['jsepEnsureBuffer'] = (bufferId, dataType, dimensions) => {
-      return backend['ensureBuffer'](bufferId, dataType, dimensions);
-    }
-    Module['jsepUploadBuffer'] = (bufferId, data) => {
-      backend['uploadBuffer'](bufferId, data);
-    }
-    Module['jsepDownloadBuffer'] = (bufferId) => {
-      return backend['downloadBuffer'](bufferId);
-    }
-    Module['jsepCreateMlBufferDownloader'] = (bufferId, type) => {
-      return backend['createMlBufferDownloader'](bufferId, type);
+    Module['jsepCreateMLBufferDownloader'] = (bufferId, type) => {
+      return backend['createMLBufferDownloader'](bufferId, type);
     }
-    Module['jsepRegisterMlBuffer'] = (buffer) => {
-      return backend['registerMlBuffer'](buffer);
+    Module['jsepRegisterMLBuffer'] = (buffer) => {
+      return backend['registerMLBuffer'](buffer);
     }
   }
 };