From 5a9ddea5bc3bee64b6d645d30ca67315f0cdd9db Mon Sep 17 00:00:00 2001 From: Yang Gu Date: Mon, 25 Dec 2023 17:15:21 +0800 Subject: [PATCH] [js/webgpu] Introduce trace support This is to leverage console.timeStamp to add a single marker to browsers' (Only Chromium and Firefox support it) performance tool. With this support, we can dump both CPU and GPU timestamps, and use post-processing tool to clearly understand the calibrated timeline. A demo tool can be found at https://github.com/webatintel/ort-test, and more detailed info can be found at https://docs.google.com/document/d/1TuVxjE8jnELBXdhI4QGFgMnUqQn6Q53QA9y4a_dH688/edit. --- js/common/lib/env.ts | 7 +++ js/common/lib/index.ts | 1 + js/common/lib/inference-session-impl.ts | 5 +++ js/common/lib/trace.ts | 44 +++++++++++++++++++ js/web/lib/backend-wasm.ts | 4 ++ js/web/lib/wasm/jsep/backend-webgpu.ts | 4 +- .../lib/wasm/jsep/webgpu/program-manager.ts | 6 +++ js/web/lib/wasm/session-handler-inference.ts | 6 ++- 8 files changed, 75 insertions(+), 2 deletions(-) create mode 100644 js/common/lib/trace.ts diff --git a/js/common/lib/env.ts b/js/common/lib/env.ts index 0cded7e5edbcb..b007b5e164bf3 100644 --- a/js/common/lib/env.ts +++ b/js/common/lib/env.ts @@ -33,6 +33,13 @@ export declare namespace Env { */ simd?: boolean; + /** + * set or get a boolean value indicating whether to enable trace. + * + * @defaultValue `false` + */ + trace?: boolean; + /** * Set or get a number specifying the timeout for initialization of WebAssembly backend, in milliseconds. A zero * value indicates no timeout is set. diff --git a/js/common/lib/index.ts b/js/common/lib/index.ts index 9cbfcc4e8bcdc..d7c98380f3fa4 100644 --- a/js/common/lib/index.ts +++ b/js/common/lib/index.ts @@ -21,5 +21,6 @@ export * from './backend.js'; export * from './env.js'; export * from './inference-session.js'; export * from './tensor.js'; +export * from './trace.js'; export * from './onnx-value.js'; export * from './training-session.js'; diff --git a/js/common/lib/inference-session-impl.ts b/js/common/lib/inference-session-impl.ts index 9bc2088f2088a..55f40c8907a89 100644 --- a/js/common/lib/inference-session-impl.ts +++ b/js/common/lib/inference-session-impl.ts @@ -6,6 +6,7 @@ import {InferenceSessionHandler} from './backend.js'; import {InferenceSession as InferenceSessionInterface} from './inference-session.js'; import {OnnxValue} from './onnx-value.js'; import {Tensor} from './tensor.js'; +import {TRACE_FUNC_BEGIN, TRACE_FUNC_END} from './trace.js'; type SessionOptions = InferenceSessionInterface.SessionOptions; type RunOptions = InferenceSessionInterface.RunOptions; @@ -20,6 +21,7 @@ export class InferenceSession implements InferenceSessionInterface { run(feeds: FeedsType, options?: RunOptions): Promise; run(feeds: FeedsType, fetches: FetchesType, options?: RunOptions): Promise; async run(feeds: FeedsType, arg1?: FetchesType|RunOptions, arg2?: RunOptions): Promise { + TRACE_FUNC_BEGIN(); const fetches: {[name: string]: OnnxValue|null} = {}; let options: RunOptions = {}; // check inputs @@ -117,6 +119,7 @@ export class InferenceSession implements InferenceSessionInterface { } } } + TRACE_FUNC_END(); return returnValue; } @@ -132,6 +135,7 @@ export class InferenceSession implements InferenceSessionInterface { static async create( arg0: string|ArrayBufferLike|Uint8Array, arg1?: SessionOptions|number, arg2?: number, arg3?: SessionOptions): Promise { + TRACE_FUNC_BEGIN(); // either load from a file or buffer let filePathOrUint8Array: string|Uint8Array; let options: SessionOptions = {}; @@ -196,6 +200,7 @@ export class InferenceSession implements InferenceSessionInterface { const backendHints = eps.map(i => typeof i === 'string' ? i : i.name); const backend = await resolveBackend(backendHints); const handler = await backend.createInferenceSessionHandler(filePathOrUint8Array, options); + TRACE_FUNC_END(); return new InferenceSession(handler); } diff --git a/js/common/lib/trace.ts b/js/common/lib/trace.ts new file mode 100644 index 0000000000000..404f7ef8089af --- /dev/null +++ b/js/common/lib/trace.ts @@ -0,0 +1,44 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +import {env} from './env-impl.js'; + +export const TRACE = (deviceType: string, label: string) => { + if (!env.wasm.trace) { + return; + } + // eslint-disable-next-line no-console + console.timeStamp(`${deviceType}::ORT::${label}`); +}; + +const TRACE_FUNC = (msg: string, extraMsg?: string) => { + const stack = new Error().stack?.split(/\r\n|\r|\n/g) || []; + let hasTraceFunc = false; + for (let i = 0; i < stack.length; i++) { + if (hasTraceFunc && !stack[i].includes('TRACE_FUNC')) { + let label = `FUNC_${msg}::${stack[i].trim().split(' ')[1]}`; + if (extraMsg) { + label += `::${extraMsg}`; + } + TRACE('CPU', label); + return; + } + if (stack[i].includes('TRACE_FUNC')) { + hasTraceFunc = true; + } + } +}; + +export const TRACE_FUNC_BEGIN = (extraMsg?: string) => { + if (!env.wasm.trace) { + return; + } + TRACE_FUNC('BEGIN', extraMsg); +}; + +export const TRACE_FUNC_END = (extraMsg?: string) => { + if (!env.wasm.trace) { + return; + } + TRACE_FUNC('END', extraMsg); +}; diff --git a/js/web/lib/backend-wasm.ts b/js/web/lib/backend-wasm.ts index 2d123cdb71290..d9f63fec9c492 100644 --- a/js/web/lib/backend-wasm.ts +++ b/js/web/lib/backend-wasm.ts @@ -26,6 +26,10 @@ export const initializeFlags = (): void => { env.wasm.proxy = false; } + if (typeof env.wasm.trace !== 'boolean') { + env.wasm.trace = false; + } + if (typeof env.wasm.numThreads !== 'number' || !Number.isInteger(env.wasm.numThreads) || env.wasm.numThreads <= 0) { const numCpuLogicalCores = typeof navigator === 'undefined' ? cpus().length : navigator.hardwareConcurrency; env.wasm.numThreads = Math.min(4, Math.ceil((numCpuLogicalCores || 1) / 2)); diff --git a/js/web/lib/wasm/jsep/backend-webgpu.ts b/js/web/lib/wasm/jsep/backend-webgpu.ts index 6c3d22352772e..0148f32cdd91b 100644 --- a/js/web/lib/wasm/jsep/backend-webgpu.ts +++ b/js/web/lib/wasm/jsep/backend-webgpu.ts @@ -1,7 +1,7 @@ // Copyright (c) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. -import {Env, Tensor} from 'onnxruntime-common'; +import {Env, Tensor, TRACE_FUNC_BEGIN, TRACE_FUNC_END} from 'onnxruntime-common'; import {configureLogger, LOG_DEBUG} from './log'; import {createView, TensorView} from './tensor-view'; @@ -263,6 +263,7 @@ export class WebGpuBackend { run(program: ProgramInfo, inputTensorViews: readonly TensorView[], outputIndices: readonly number[], createKernelOutput: (index: number, dataType: number, dims: readonly number[]) => TensorView, createIntermediateOutput: (dataType: number, dims: readonly number[]) => TensorView): TensorView[] { + TRACE_FUNC_BEGIN(program.name); // create info for inputs const inputDatas: GpuData[] = []; for (let i = 0; i < inputTensorViews.length; ++i) { @@ -387,6 +388,7 @@ export class WebGpuBackend { artifact, inputTensorViews, outputTensorViews, inputDatas, outputDatas, normalizedDispatchGroup, uniformBufferBinding); + TRACE_FUNC_END(program.name); return outputTensorViews; } diff --git a/js/web/lib/wasm/jsep/webgpu/program-manager.ts b/js/web/lib/wasm/jsep/webgpu/program-manager.ts index ae5bf68483b46..0d699326366b3 100644 --- a/js/web/lib/wasm/jsep/webgpu/program-manager.ts +++ b/js/web/lib/wasm/jsep/webgpu/program-manager.ts @@ -1,6 +1,8 @@ // Copyright (c) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. +import {TRACE_FUNC_BEGIN, TRACE_FUNC_END} from 'onnxruntime-common'; + import {tensorDataTypeEnumToString} from '../../wasm-common'; import {WebGpuBackend} from '../backend-webgpu'; import {LOG_DEBUG} from '../log'; @@ -35,6 +37,7 @@ export class ProgramManager { run(buildArtifact: Artifact, inputTensorViews: readonly TensorView[], outputTensorViews: readonly TensorView[], inputs: GpuData[], outputs: GpuData[], dispatchGroup: [number, number, number], uniformBufferBinding: GPUBindingResource|undefined): void { + TRACE_FUNC_BEGIN(buildArtifact.programInfo.name); const device = this.backend.device; const computePassEncoder = this.backend.getComputePassEncoder(); @@ -128,11 +131,13 @@ export class ProgramManager { if (this.backend.pendingDispatchNumber >= 16) { this.backend.flush(); } + TRACE_FUNC_END(buildArtifact.programInfo.name); } dispose(): void { // this.repo.forEach(a => this.glContext.deleteProgram(a.program)); } build(programInfo: ProgramInfo, normalizedDispatchGroupSize: [number, number, number]): Artifact { + TRACE_FUNC_BEGIN(programInfo.name); const device = this.backend.device; const extensions: string[] = []; if (device.features.has('shader-f16')) { @@ -147,6 +152,7 @@ export class ProgramManager { const computePipeline = device.createComputePipeline( {compute: {module: shaderModule, entryPoint: 'main'}, layout: 'auto', label: programInfo.name}); + TRACE_FUNC_END(programInfo.name); return {programInfo, computePipeline}; } diff --git a/js/web/lib/wasm/session-handler-inference.ts b/js/web/lib/wasm/session-handler-inference.ts index b62287483208a..e17ec37e3e612 100644 --- a/js/web/lib/wasm/session-handler-inference.ts +++ b/js/web/lib/wasm/session-handler-inference.ts @@ -2,7 +2,7 @@ // Licensed under the MIT License. import {readFile} from 'node:fs/promises'; -import {InferenceSession, InferenceSessionHandler, SessionHandler, Tensor} from 'onnxruntime-common'; +import {InferenceSession, InferenceSessionHandler, SessionHandler, Tensor, TRACE_FUNC_BEGIN, TRACE_FUNC_END} from 'onnxruntime-common'; import {SerializableInternalBuffer, TensorMetadata} from './proxy-messages'; import {copyFromExternalBuffer, createSession, endProfiling, releaseSession, run} from './proxy-wrapper'; @@ -54,6 +54,7 @@ export class OnnxruntimeWebAssemblySessionHandler implements InferenceSessionHan } async loadModel(pathOrBuffer: string|Uint8Array, options?: InferenceSession.SessionOptions): Promise { + TRACE_FUNC_BEGIN(); let model: Parameters[0]; if (typeof pathOrBuffer === 'string') { @@ -70,6 +71,7 @@ export class OnnxruntimeWebAssemblySessionHandler implements InferenceSessionHan } [this.sessionId, this.inputNames, this.outputNames] = await createSession(model, options); + TRACE_FUNC_END(); } async dispose(): Promise { @@ -78,6 +80,7 @@ export class OnnxruntimeWebAssemblySessionHandler implements InferenceSessionHan async run(feeds: SessionHandler.FeedsType, fetches: SessionHandler.FetchesType, options: InferenceSession.RunOptions): Promise { + TRACE_FUNC_BEGIN(); const inputArray: Tensor[] = []; const inputIndices: number[] = []; Object.entries(feeds).forEach(kvp => { @@ -115,6 +118,7 @@ export class OnnxruntimeWebAssemblySessionHandler implements InferenceSessionHan for (let i = 0; i < results.length; i++) { resultMap[this.outputNames[outputIndices[i]]] = outputArray[i] ?? decodeTensorMetadata(results[i]); } + TRACE_FUNC_END(); return resultMap; }