diff --git a/js/common/lib/inference-session.ts b/js/common/lib/inference-session.ts index c7760692eed00..edc32535fc64d 100644 --- a/js/common/lib/inference-session.ts +++ b/js/common/lib/inference-session.ts @@ -2,6 +2,7 @@ // Licensed under the MIT License. import {InferenceSession as InferenceSessionImpl} from './inference-session-impl.js'; +import {OnnxModelOptions} from './onnx-model.js'; import {OnnxValue, OnnxValueDataLocation} from './onnx-value.js'; /* eslint-disable @typescript-eslint/no-redeclare */ @@ -43,7 +44,7 @@ export declare namespace InferenceSession { /** * A set of configurations for session behavior. */ - export interface SessionOptions { + export interface SessionOptions extends OnnxModelOptions { /** * An array of execution provider options. * diff --git a/js/common/lib/onnx-model.ts b/js/common/lib/onnx-model.ts new file mode 100644 index 0000000000000..1cd3eedb6fcca --- /dev/null +++ b/js/common/lib/onnx-model.ts @@ -0,0 +1,57 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +/** + * A string that represents a file's URL or path. + * + * Path is vailable only in onnxruntime-node or onnxruntime-web running in Node.js. + */ +export type FileUrlOrPath = string; + +/** + * A Blob object that represents a file. + */ +export type FileBlob = Blob; + +/** + * A Uint8Array, ArrayBuffer or SharedArrayBuffer object that represents a file content. + * + * When it is an ArrayBuffer or SharedArrayBuffer, the whole buffer is assumed to be the file content. + */ +export type FileData = Uint8Array|ArrayBufferLike; + +/** + * Represents a file that can be loaded by the ONNX Runtime JavaScript API. + */ +export type FileType = FileUrlOrPath|FileBlob|FileData; + +/** + * Represents an external data file. + */ +export interface ExternalDataFileDescription { + /** + * Specify the external data file. + */ + data: FileType; + /** + * Specify the file path. + */ + path: string; +} + +/** + * Represents an external data file. + * + * When using a string, it should be a file URL or path that in the same directory as the model file. + */ +export type ExternalDataFileType = ExternalDataFileDescription|FileUrlOrPath; + +/** + * Options for model loading. + */ +export interface OnnxModelOptions { + /** + * Specifying a list of files that represents the external data. + */ + externalData?: readonly ExternalDataFileType[]; +} diff --git a/js/web/lib/wasm/binding/ort-wasm.d.ts b/js/web/lib/wasm/binding/ort-wasm.d.ts index 6c55dcc1bfd32..9d4d5875310b7 100644 --- a/js/web/lib/wasm/binding/ort-wasm.d.ts +++ b/js/web/lib/wasm/binding/ort-wasm.d.ts @@ -115,6 +115,11 @@ export interface OrtWasmModule extends EmscriptenModule { mainScriptUrlOrBlob?: string|Blob; // #endregion + // #region external data API + mountExternalData?(externalDataFilePath: string, externalDataFileData: Uint8Array): void; + unmountExternalData?(): void; + // #endregion + // #region JSEP /** * This is the entry of JSEP initialization. This function is called once when initializing ONNX Runtime. diff --git a/js/web/lib/wasm/proxy-worker/main.ts b/js/web/lib/wasm/proxy-worker/main.ts index 4df524cdcfb22..6cbd38c76ccc8 100644 --- a/js/web/lib/wasm/proxy-worker/main.ts +++ b/js/web/lib/wasm/proxy-worker/main.ts @@ -79,8 +79,14 @@ self.onmessage = (ev: MessageEvent): void => { } case 'create': { const {model, options} = message!; - const sessionMetadata = createSession(model, options); - postMessage({type, out: sessionMetadata} as OrtWasmMessage); + createSession(model, options) + .then( + sessionMetadata => { + postMessage({type, out: sessionMetadata} as OrtWasmMessage); + }, + err => { + postMessage({type, err}); + }); break; } case 'release': diff --git a/js/web/lib/wasm/session-handler-inference.ts b/js/web/lib/wasm/session-handler-inference.ts index e17ec37e3e612..2bece248669f5 100644 --- a/js/web/lib/wasm/session-handler-inference.ts +++ b/js/web/lib/wasm/session-handler-inference.ts @@ -1,12 +1,12 @@ // Copyright (c) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. -import {readFile} from 'node:fs/promises'; import {InferenceSession, InferenceSessionHandler, SessionHandler, Tensor, TRACE_FUNC_BEGIN, TRACE_FUNC_END} from 'onnxruntime-common'; import {SerializableInternalBuffer, TensorMetadata} from './proxy-messages'; import {copyFromExternalBuffer, createSession, endProfiling, releaseSession, run} from './proxy-wrapper'; import {isGpuBufferSupportedType} from './wasm-common'; +import {loadFile} from './wasm-utils-load-file'; export const encodeTensorMetadata = (tensor: Tensor, getName: () => string): TensorMetadata => { switch (tensor.location) { @@ -43,14 +43,8 @@ export class OnnxruntimeWebAssemblySessionHandler implements InferenceSessionHan outputNames: string[]; async fetchModelAndCopyToWasmMemory(path: string): Promise { - // fetch model from url and move to wasm heap. The arraybufffer that held the http - // response is freed once we return - const response = await fetch(path); - if (response.status !== 200) { - throw new Error(`failed to load model: ${path}`); - } - const arrayBuffer = await response.arrayBuffer(); - return copyFromExternalBuffer(new Uint8Array(arrayBuffer)); + // fetch model from url and move to wasm heap. + return copyFromExternalBuffer(await loadFile(path)); } async loadModel(pathOrBuffer: string|Uint8Array, options?: InferenceSession.SessionOptions): Promise { @@ -60,7 +54,7 @@ export class OnnxruntimeWebAssemblySessionHandler implements InferenceSessionHan if (typeof pathOrBuffer === 'string') { if (typeof process !== 'undefined' && process.versions && process.versions.node) { // node - model = await readFile(pathOrBuffer); + model = await loadFile(pathOrBuffer); } else { // browser // fetch model and copy to wasm heap. diff --git a/js/web/lib/wasm/wasm-core-impl.ts b/js/web/lib/wasm/wasm-core-impl.ts index a9dfd9218bb6f..5821fac3c468f 100644 --- a/js/web/lib/wasm/wasm-core-impl.ts +++ b/js/web/lib/wasm/wasm-core-impl.ts @@ -9,6 +9,7 @@ import {setSessionOptions} from './session-options'; import {dataLocationStringToEnum, getTensorElementSize, isGpuBufferSupportedType, logLevelStringToEnum, tensorDataTypeEnumToString, tensorDataTypeStringToEnum, tensorTypeToTypedArrayConstructor} from './wasm-common'; import {getInstance} from './wasm-factory'; import {allocWasmString, checkLastError} from './wasm-utils'; +import {loadFile} from './wasm-utils-load-file'; // #region Initializations @@ -187,108 +188,124 @@ export const copyFromExternalBuffer = (model: Uint8Array): [number, number] => { * @param options an optional session options object. * @returns a 3-elements tuple containing [session handle, input names, output names] */ -export const createSession = - (modelData: Uint8Array|SerializableInternalBuffer, - options?: InferenceSession.SessionOptions): SerializableSessionMetadata => { - let modelDataOffset: number, modelDataLength: number; - const wasm = getInstance(); - - if (Array.isArray(modelData)) { - // if model data is an array, it must be a 2-elements tuple containing the pointer and size of the model data - [modelDataOffset, modelDataLength] = modelData; - } else if (modelData.buffer === wasm.HEAPU8.buffer) { - // if model data uses the same buffer as the WASM heap, we don't need to copy it. - [modelDataOffset, modelDataLength] = [modelData.byteOffset, modelData.byteLength]; - } else { - // otherwise, copy the model data to the WASM heap. - [modelDataOffset, modelDataLength] = copyFromExternalBuffer(modelData); - } +export const createSession = async( + modelData: Uint8Array|SerializableInternalBuffer, + options?: InferenceSession.SessionOptions): Promise => { + let modelDataOffset: number, modelDataLength: number; + const wasm = getInstance(); - let sessionHandle = 0; - let sessionOptionsHandle = 0; - let ioBindingHandle = 0; - let allocs: number[] = []; - const inputNamesUTF8Encoded = []; - const outputNamesUTF8Encoded = []; + if (Array.isArray(modelData)) { + // if model data is an array, it must be a 2-elements tuple containing the pointer and size of the model data + [modelDataOffset, modelDataLength] = modelData; + } else if (modelData.buffer === wasm.HEAPU8.buffer) { + // if model data uses the same buffer as the WASM heap, we don't need to copy it. + [modelDataOffset, modelDataLength] = [modelData.byteOffset, modelData.byteLength]; + } else { + // otherwise, copy the model data to the WASM heap. + [modelDataOffset, modelDataLength] = copyFromExternalBuffer(modelData); + } - try { - [sessionOptionsHandle, allocs] = setSessionOptions(options); + let sessionHandle = 0; + let sessionOptionsHandle = 0; + let ioBindingHandle = 0; + let allocs: number[] = []; + const inputNamesUTF8Encoded = []; + const outputNamesUTF8Encoded = []; - sessionHandle = wasm._OrtCreateSession(modelDataOffset, modelDataLength, sessionOptionsHandle); - if (sessionHandle === 0) { - checkLastError('Can\'t create a session.'); - } + try { + [sessionOptionsHandle, allocs] = setSessionOptions(options); + + if (options?.externalData && wasm.mountExternalData) { + const loadingPromises = []; + for (const file of options.externalData) { + const path = typeof file === 'string' ? file : file.path; + loadingPromises.push(loadFile(typeof file === 'string' ? file : file.data).then(data => { + wasm.mountExternalData!(path, data); + })); + } - const [inputCount, outputCount] = getSessionInputOutputCount(sessionHandle); + // wait for all external data files to be loaded + await Promise.all(loadingPromises); + } - const inputNames = []; - const outputNames = []; - const outputPreferredLocations: SupportedTensorDataLocationForInputOutput[] = []; - for (let i = 0; i < inputCount; i++) { - const name = wasm._OrtGetInputName(sessionHandle, i); - if (name === 0) { - checkLastError('Can\'t get an input name.'); - } - inputNamesUTF8Encoded.push(name); - inputNames.push(wasm.UTF8ToString(name)); - } - for (let i = 0; i < outputCount; i++) { - const name = wasm._OrtGetOutputName(sessionHandle, i); - if (name === 0) { - checkLastError('Can\'t get an output name.'); - } - outputNamesUTF8Encoded.push(name); - const nameString = wasm.UTF8ToString(name); - outputNames.push(nameString); - - if (!BUILD_DEFS.DISABLE_WEBGPU) { - const location = typeof options?.preferredOutputLocation === 'string' ? - options.preferredOutputLocation : - options?.preferredOutputLocation?.[nameString] ?? 'cpu'; - if (location !== 'cpu' && location !== 'cpu-pinned' && location !== 'gpu-buffer') { - throw new Error(`Not supported preferred output location: ${location}.`); - } - outputPreferredLocations.push(location); - } - } + sessionHandle = wasm._OrtCreateSession(modelDataOffset, modelDataLength, sessionOptionsHandle); + if (sessionHandle === 0) { + checkLastError('Can\'t create a session.'); + } - // use IO binding only when at least one output is preffered to be on GPU. - let bindingState: IOBindingState|null = null; - if (!BUILD_DEFS.DISABLE_WEBGPU && outputPreferredLocations.some(l => l === 'gpu-buffer')) { - ioBindingHandle = wasm._OrtCreateBinding(sessionHandle); - if (ioBindingHandle === 0) { - checkLastError('Can\'t create IO binding.'); - } + const [inputCount, outputCount] = getSessionInputOutputCount(sessionHandle); - bindingState = { - handle: ioBindingHandle, - outputPreferredLocations, - outputPreferredLocationsEncoded: outputPreferredLocations.map(l => dataLocationStringToEnum(l)), - }; + const inputNames = []; + const outputNames = []; + const outputPreferredLocations: SupportedTensorDataLocationForInputOutput[] = []; + for (let i = 0; i < inputCount; i++) { + const name = wasm._OrtGetInputName(sessionHandle, i); + if (name === 0) { + checkLastError('Can\'t get an input name.'); + } + inputNamesUTF8Encoded.push(name); + inputNames.push(wasm.UTF8ToString(name)); + } + for (let i = 0; i < outputCount; i++) { + const name = wasm._OrtGetOutputName(sessionHandle, i); + if (name === 0) { + checkLastError('Can\'t get an output name.'); + } + outputNamesUTF8Encoded.push(name); + const nameString = wasm.UTF8ToString(name); + outputNames.push(nameString); + + if (!BUILD_DEFS.DISABLE_WEBGPU) { + const location = typeof options?.preferredOutputLocation === 'string' ? + options.preferredOutputLocation : + options?.preferredOutputLocation?.[nameString] ?? 'cpu'; + if (location !== 'cpu' && location !== 'cpu-pinned' && location !== 'gpu-buffer') { + throw new Error(`Not supported preferred output location: ${location}.`); } + outputPreferredLocations.push(location); + } + } - activeSessions.set(sessionHandle, [sessionHandle, inputNamesUTF8Encoded, outputNamesUTF8Encoded, bindingState]); - return [sessionHandle, inputNames, outputNames]; - } catch (e) { - inputNamesUTF8Encoded.forEach(buf => wasm._OrtFree(buf)); - outputNamesUTF8Encoded.forEach(buf => wasm._OrtFree(buf)); + // use IO binding only when at least one output is preffered to be on GPU. + let bindingState: IOBindingState|null = null; + if (!BUILD_DEFS.DISABLE_WEBGPU && outputPreferredLocations.some(l => l === 'gpu-buffer')) { + ioBindingHandle = wasm._OrtCreateBinding(sessionHandle); + if (ioBindingHandle === 0) { + checkLastError('Can\'t create IO binding.'); + } - if (ioBindingHandle !== 0) { - wasm._OrtReleaseBinding(ioBindingHandle); - } + bindingState = { + handle: ioBindingHandle, + outputPreferredLocations, + outputPreferredLocationsEncoded: outputPreferredLocations.map(l => dataLocationStringToEnum(l)), + }; + } - if (sessionHandle !== 0) { - wasm._OrtReleaseSession(sessionHandle); - } - throw e; - } finally { - wasm._free(modelDataOffset); - if (sessionOptionsHandle !== 0) { - wasm._OrtReleaseSessionOptions(sessionOptionsHandle); - } - allocs.forEach(alloc => wasm._free(alloc)); - } - }; + activeSessions.set(sessionHandle, [sessionHandle, inputNamesUTF8Encoded, outputNamesUTF8Encoded, bindingState]); + return [sessionHandle, inputNames, outputNames]; + } catch (e) { + inputNamesUTF8Encoded.forEach(buf => wasm._OrtFree(buf)); + outputNamesUTF8Encoded.forEach(buf => wasm._OrtFree(buf)); + + if (ioBindingHandle !== 0) { + wasm._OrtReleaseBinding(ioBindingHandle); + } + + if (sessionHandle !== 0) { + wasm._OrtReleaseSession(sessionHandle); + } + throw e; + } finally { + wasm._free(modelDataOffset); + if (sessionOptionsHandle !== 0) { + wasm._OrtReleaseSessionOptions(sessionOptionsHandle); + } + allocs.forEach(alloc => wasm._free(alloc)); + + // unmount external data if necessary + wasm.unmountExternalData?.(); + } +}; export const releaseSession = (sessionId: number): void => { const wasm = getInstance(); diff --git a/js/web/lib/wasm/wasm-utils-load-file.ts b/js/web/lib/wasm/wasm-utils-load-file.ts new file mode 100644 index 0000000000000..abe480a43c790 --- /dev/null +++ b/js/web/lib/wasm/wasm-utils-load-file.ts @@ -0,0 +1,77 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +import * as fs from 'fs'; +import {readFile} from 'node:fs/promises'; + +/** + * Load a file into a Uint8Array. + * + * @param file - the file to load. Can be a URL/path, a Blob, an ArrayBuffer, or a Uint8Array. + * @returns a Uint8Array containing the file data. + */ +export const loadFile = async(file: string|Blob|ArrayBufferLike|Uint8Array): Promise => { + if (typeof file === 'string') { + if (typeof process !== 'undefined' && process.versions && process.versions.node) { + // load file into ArrayBuffer in Node.js + try { + return new Uint8Array(await readFile(file)); + } catch (e) { + if (e.code === 'ERR_FS_FILE_TOO_LARGE') { + // file is too large, use fs.createReadStream instead + const stream = fs.createReadStream(file); + const chunks: Uint8Array[] = []; + for await (const chunk of stream) { + chunks.push(chunk); + } + return new Uint8Array(Buffer.concat(chunks)); + } + throw e; + } + } else { + // load file into ArrayBuffer in browsers + const response = await fetch(file); + if (!response.ok) { + throw new Error(`failed to load external data file: ${file}`); + } + const contentLengthHeader = response.headers.get('Content-Length'); + const fileSize = contentLengthHeader ? parseInt(contentLengthHeader, 10) : 0; + if (fileSize < 1073741824 /* 1GB */) { + // when Content-Length header is not set, we cannot determine the file size. We assume it is small enough to + // load into memory. + return new Uint8Array(await response.arrayBuffer()); + } else { + // file is too large, use stream instead + if (!response.body) { + throw new Error(`failed to load external data file: ${file}, no response body.`); + } + const reader = response.body.getReader(); + + // use WebAssembly Memory to allocate larger ArrayBuffer + const pages = Math.ceil(fileSize / 65536); + const buffer = new WebAssembly.Memory({initial: pages, maximum: pages}).buffer; + + let offset = 0; + // eslint-disable-next-line no-constant-condition + while (true) { + const {done, value} = await reader.read(); + if (done) { + break; + } + const chunkSize = value.byteLength; + const chunk = new Uint8Array(buffer, offset, chunkSize); + chunk.set(value); + offset += chunkSize; + } + return new Uint8Array(buffer, 0, fileSize); + } + } + + } else if (file instanceof Blob) { + return new Uint8Array(await file.arrayBuffer()); + } else if (file instanceof Uint8Array) { + return file; + } else { + return new Uint8Array(file); + } +}; diff --git a/js/web/script/build.ts b/js/web/script/build.ts index a52ac4454a5c1..ea0c122cb51de 100644 --- a/js/web/script/build.ts +++ b/js/web/script/build.ts @@ -122,7 +122,11 @@ async function buildOrt({ case 'node:fs/promises': case 'node:fs': case 'fs': - return {contents: 'export const readFile = undefined;'}; + return { + contents: 'export const readFile = undefined;' + + 'export const readFileSync = undefined;' + + 'export const createReadStream = undefined;' + }; case 'node:os': case 'os': return {contents: 'export const cpus = undefined;'}; diff --git a/js/web/test/e2e/browser-test-webgpu-external-data.js b/js/web/test/e2e/browser-test-webgpu-external-data.js new file mode 100644 index 0000000000000..8fb0b4d6ec545 --- /dev/null +++ b/js/web/test/e2e/browser-test-webgpu-external-data.js @@ -0,0 +1,24 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +'use strict'; + +it('Browser E2E testing - WebGPU backend with external data', async function() { + const session = await ort.InferenceSession.create('./model_with_orig_ext_data.onnx', { + executionProviders: ['webgpu'], + externalData: [{data: './model_with_orig_ext_data.bin', path: 'model_with_orig_ext_data.bin'}] + }); + + const fetches = await session.run({X: new ort.Tensor('float32', [1, 1], [1, 2])}); + + const Y = fetches.Y; + + assert(Y instanceof ort.Tensor); + assert(Y.dims.length === 2 && Y.dims[0] === 2 && Y.dims[1] === 3); + assert(Y.data[0] === 1); + assert(Y.data[1] === 1); + assert(Y.data[2] === 0); + assert(Y.data[3] === 0); + assert(Y.data[4] === 0); + assert(Y.data[5] === 0); +}); diff --git a/js/web/test/e2e/karma.conf.js b/js/web/test/e2e/karma.conf.js index b7ff408fa29c6..b541d9d120110 100644 --- a/js/web/test/e2e/karma.conf.js +++ b/js/web/test/e2e/karma.conf.js @@ -15,6 +15,8 @@ if (typeof USER_DATA !== 'string') { throw new Error('flag --user-data= is required'); } +const flags = ['--ignore-gpu-blocklist', '--gpu-vendor-id=0x10de']; + module.exports = function(config) { const distPrefix = SELF_HOST ? './node_modules/onnxruntime-web/dist/' : 'http://localhost:8081/dist/'; config.set({ @@ -25,10 +27,14 @@ module.exports = function(config) { {pattern: TEST_MAIN}, {pattern: './node_modules/onnxruntime-web/dist/*.wasm', included: false, nocache: true}, {pattern: './model.onnx', included: false}, + {pattern: './model_with_orig_ext_data.onnx', included: false}, + {pattern: './model_with_orig_ext_data.bin', included: false}, ], plugins: [require('@chiragrupani/karma-chromium-edge-launcher'), ...config.plugins], proxies: { '/model.onnx': '/base/model.onnx', + '/model_with_orig_ext_data.onnx': '/base/model_with_orig_ext_data.onnx', + '/model_with_orig_ext_data.bin': '/base/model_with_orig_ext_data.bin', '/test-wasm-path-override/ort-wasm.wasm': '/base/node_modules/onnxruntime-web/dist/ort-wasm.wasm', '/test-wasm-path-override/renamed.wasm': '/base/node_modules/onnxruntime-web/dist/ort-wasm.wasm', }, @@ -43,10 +49,11 @@ module.exports = function(config) { hostname: 'localhost', browsers: [], customLaunchers: { - Chrome_default: {base: 'ChromeHeadless', chromeDataDir: USER_DATA}, + Chrome_default: {base: 'Chrome', flags, chromeDataDir: USER_DATA}, Chrome_no_threads: { - base: 'ChromeHeadless', + base: 'Chrome', chromeDataDir: USER_DATA, + flags // TODO: no-thread flags }, Edge_default: {base: 'Edge', edgeDataDir: USER_DATA} diff --git a/js/web/test/e2e/model_with_orig_ext_data.bin b/js/web/test/e2e/model_with_orig_ext_data.bin new file mode 100644 index 0000000000000..d69e6beeff85d Binary files /dev/null and b/js/web/test/e2e/model_with_orig_ext_data.bin differ diff --git a/js/web/test/e2e/model_with_orig_ext_data.onnx b/js/web/test/e2e/model_with_orig_ext_data.onnx new file mode 100644 index 0000000000000..6f9cce0bc5b4f --- /dev/null +++ b/js/web/test/e2e/model_with_orig_ext_data.onnx @@ -0,0 +1,19 @@ +  onnx-example:æ +: +X +model_with_orig_ext_dataY"Pad* +mode"constant  +test-model*JBmodel_with_orig_ext_dataj( +locationmodel_with_orig_ext_data.binpZ +X +  + +Z& +model_with_orig_ext_data + + +b +Y +  + +B \ No newline at end of file diff --git a/js/web/test/e2e/run.js b/js/web/test/e2e/run.js index 2776f6dff46ab..46c04792f1b97 100644 --- a/js/web/test/e2e/run.js +++ b/js/web/test/e2e/run.js @@ -119,6 +119,7 @@ async function testAllBrowserCases({hostInKarma}) { await runKarma({hostInKarma, main: './browser-test-wasm-path-override-prefix.js'}); await runKarma({hostInKarma, main: './browser-test-wasm-path-override-prefix.js', ortMain: 'ort.wasm.min.js'}); await runKarma({hostInKarma, main: './browser-test-wasm-image-tensor-image.js'}); + await runKarma({hostInKarma, main: './browser-test-webgpu-external-data.js', ortMain: 'ort.webgpu.min.js'}); } async function runKarma({hostInKarma, main, browser = BROWSER, ortMain = 'ort.min.js'}) { diff --git a/js/web/test/e2e/simple-http-server.js b/js/web/test/e2e/simple-http-server.js index 1244aaddafd23..6a6162855df83 100644 --- a/js/web/test/e2e/simple-http-server.js +++ b/js/web/test/e2e/simple-http-server.js @@ -16,6 +16,7 @@ const validRequests = { '/dist/ort-wasm-simd.wasm': ['dist/ort-wasm-simd.wasm', 'application/wasm'], '/dist/ort-wasm-threaded.wasm': ['dist/ort-wasm-threaded.wasm', 'application/wasm'], '/dist/ort-wasm-simd-threaded.wasm': ['dist/ort-wasm-simd-threaded.wasm', 'application/wasm'], + '/dist/ort-wasm-simd.jsep.wasm': ['dist/ort-wasm-simd.jsep.wasm', 'application/wasm'], // proxied .wasm files: '/test-wasm-path-override/ort-wasm.wasm': ['dist/ort-wasm.wasm', 'application/wasm'], @@ -25,6 +26,7 @@ const validRequests = { '/dist/ort.min.js': ['dist/ort.min.js', 'text/javascript'], '/dist/ort.js': ['dist/ort.js', 'text/javascript'], '/dist/ort.webgl.min.js': ['dist/ort.webgl.min.js', 'text/javascript'], + '/dist/ort.webgpu.min.js': ['dist/ort.webgpu.min.js', 'text/javascript'], '/dist/ort.wasm.min.js': ['dist/ort.wasm.min.js', 'text/javascript'], '/dist/ort.wasm-core.min.js': ['dist/ort.wasm-core.min.js', 'text/javascript'], }; diff --git a/onnxruntime/core/framework/tensorprotoutils.cc b/onnxruntime/core/framework/tensorprotoutils.cc index fd32aaedcc2ee..8a2db6d5728af 100644 --- a/onnxruntime/core/framework/tensorprotoutils.cc +++ b/onnxruntime/core/framework/tensorprotoutils.cc @@ -7,6 +7,10 @@ #include #include +#if defined(__wasm__) +#include +#endif + #include "core/common/gsl.h" #include "core/common/logging/logging.h" #include "core/common/narrow.h" @@ -769,6 +773,7 @@ static void DeleteCharArray(void* param) noexcept { delete[] arr; } +#if !defined(__wasm__) static Status GetFileContent( const Env& env, const ORTCHAR_T* file_path, FileOffsetType offset, size_t length, void*& raw_buffer, OrtCallback& deleter) { @@ -797,6 +802,7 @@ static Status GetFileContent( raw_buffer = buffer.release(); return Status::OK(); } +#endif Status GetExtDataFromTensorProto(const Env& env, const ORTCHAR_T* model_path, const ONNX_NAMESPACE::TensorProto& tensor_proto, @@ -819,6 +825,69 @@ Status GetExtDataFromTensorProto(const Env& env, const ORTCHAR_T* model_path, ext_data_len = raw_data_safe_len; ext_data_deleter = OrtCallback{nullptr, nullptr}; } else { +#if defined(__wasm__) + ORT_RETURN_IF(file_offset < 0 || file_offset + raw_data_safe_len >= 4294967296, + "External initializer: ", tensor_proto.name(), + " offset: ", file_offset, " size to read: ", static_cast(raw_data_safe_len), + " are out of bounds or can not be read in full (>4GB)."); + + auto buffer = std::make_unique(raw_data_safe_len); + ext_data_deleter = OrtCallback{DeleteCharArray, buffer.get()}; + ext_data_buf = buffer.release(); + ext_data_len = raw_data_safe_len; + + // In WebAssembly, try use a simplified preloaded file map in WebAssembly when available. + auto err_code = EM_ASM_INT(({ + // If available, "Module.MountedFiles" is a Map for all preloaded files. + if (typeof Module == 'undefined' || !Module.MountedFiles) { + return 1; // "Module.MountedFiles" is not available. + } + let fileName = UTF8ToString($0 >>> 0); + if (fileName.startsWith('./')) { + fileName = fileName.substring(2); + } + const fileData = Module.MountedFiles.get(fileName); + if (!fileData) { + return 2; // File not found in preloaded files. + } + const offset = $1 >>> 0; + const length = $2 >>> 0; + const buffer = $3 >>> 0; + + if (offset + length > fileData.byteLength) { + return 3; // Out of bounds. + } + + try { + // Copy the file data (fileData,offset,length) into WebAssembly memory (HEAPU8,buffer,length). + HEAPU8.set(fileData.subarray(offset, offset + length), buffer); + return 0; + } catch { + return 4; + } + }), + external_data_file_path.c_str(), + static_cast(file_offset), + static_cast(raw_data_safe_len), + ext_data_buf); + const char* err_msg; + switch (err_code) { + case 0: + return Status::OK(); + case 1: + err_msg = "Module.MountedFiles is not available."; + break; + case 2: + err_msg = "File not found in preloaded files."; + break; + case 3: + err_msg = "Out of bounds."; + break; + default: + err_msg = "Unknown error occurred in memory copy."; + } + return ORT_MAKE_STATUS(ONNXRUNTIME, FAIL, "Failed to load external data file \"", external_data_file_path, "\", error: ", err_msg); +#else size_t file_length; // error reporting is inconsistent across platforms. Make sure the full path we attempted to open is included. auto status = env.GetFileLength(external_data_file_path.c_str(), file_length); @@ -836,6 +905,7 @@ Status GetExtDataFromTensorProto(const Env& env, const ORTCHAR_T* model_path, ORT_RETURN_IF_ERROR(GetFileContent(env, external_data_file_path.c_str(), file_offset, raw_data_safe_len, ext_data_buf, ext_data_deleter)); ext_data_len = raw_data_safe_len; +#endif } return Status::OK(); diff --git a/onnxruntime/core/optimizer/initializer.cc b/onnxruntime/core/optimizer/initializer.cc index 9e807ddc7be59..3679a40d32eee 100644 --- a/onnxruntime/core/optimizer/initializer.cc +++ b/onnxruntime/core/optimizer/initializer.cc @@ -27,10 +27,14 @@ Initializer::Initializer(ONNX_NAMESPACE::TensorProto_DataType data_type, Initializer::Initializer(const ONNX_NAMESPACE::TensorProto& tensor_proto, const Path& model_path) { ORT_ENFORCE(utils::HasDataType(tensor_proto), "Initializer must have a datatype"); +#if !defined(__wasm__) + // using full filepath is required by utils::TensorProtoToTensor(). One exception is WebAssembly platform, where + // external data is not loaded from real file system. if (utils::HasExternalData(tensor_proto)) { ORT_ENFORCE(!model_path.IsEmpty(), "model_path must not be empty. Ensure that a path is provided when the model is created or loaded."); } +#endif auto proto_data_type = tensor_proto.data_type(); if (utils::HasName(tensor_proto)) { diff --git a/onnxruntime/test/framework/test_tensor_loader.cc b/onnxruntime/test/framework/test_tensor_loader.cc index e71830be08b5e..71d70abceb82e 100644 --- a/onnxruntime/test/framework/test_tensor_loader.cc +++ b/onnxruntime/test/framework/test_tensor_loader.cc @@ -95,6 +95,7 @@ TEST(CApiTensorTest, load_simple_float_tensor_allocator) { g_ort->ReleaseStatus(ort_st); } +#if !defined(__wasm__) template static void run_external_data_test() { FILE* fp; @@ -154,6 +155,7 @@ TEST(CApiTensorTest, load_float_tensor_with_external_data) { run_external_data_test(); run_external_data_test(); } +#endif #if defined(__amd64__) || defined(_M_X64) #ifndef __ANDROID__ diff --git a/onnxruntime/test/optimizer/initializer_test.cc b/onnxruntime/test/optimizer/initializer_test.cc index 8da7e6d820746..ee93cfaa67e2a 100644 --- a/onnxruntime/test/optimizer/initializer_test.cc +++ b/onnxruntime/test/optimizer/initializer_test.cc @@ -19,6 +19,7 @@ namespace onnxruntime { namespace test { +#if !defined(__wasm__) namespace { template Status WriteExternalDataFile(gsl::span data, const PathString& path, ScopedFileDeleter& file_deleter) { @@ -106,6 +107,7 @@ TEST(OptimizerInitializerTest, LoadExternalData) { EXPECT_THROW(Initializer i(tensor_proto, tensor_data_dir_path), OnnxRuntimeException); } } +#endif template constexpr ONNX_NAMESPACE::TensorProto_DataType GetTensorProtoDataType(); diff --git a/onnxruntime/wasm/js_internal_api.js b/onnxruntime/wasm/js_internal_api.js index 427ad6f6d14f3..25ece9c700d5d 100644 --- a/onnxruntime/wasm/js_internal_api.js +++ b/onnxruntime/wasm/js_internal_api.js @@ -3,7 +3,27 @@ 'use strict'; -// init JSEP +/** + * Mount external data files of a model to the virtual file system (MEMFS). + * + * @param {string} externalDataFilesPath + * @param {Uint8Array} externalDataFilesData + */ +Module['mountExternalData'] = (externalDataFilePath, externalDataFileData) => { + const files = Module.MountedFiles || (Module.MountedFiles = new Map()); + files.set(externalDataFilePath, externalDataFileData); +}; + +/** + * Unmount external data files of a model from the virtual file system (MEMFS). + */ +Module['unmountExternalData'] = () => { + delete Module.MountedFiles; +}; + +/** + * init JSEP + */ Module['jsepInit'] = (backend, alloc, free, copy, copyAsync, createKernel, releaseKernel, runKernel) => { Module.jsepBackend = backend; Module.jsepAlloc = alloc;