Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[js/webgpu] Refactor timestamp-query and introduce timestamp-query-inside-passes #18894

Merged
merged 11 commits into from
Jan 13, 2024
208 changes: 170 additions & 38 deletions js/web/lib/wasm/jsep/backend-webgpu.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,12 +3,24 @@

import {Env, Tensor} from 'onnxruntime-common';

import {tensorDataTypeEnumToString} from '../wasm-common';

import {configureLogger, LOG_DEBUG} from './log';
import {createView, TensorView} from './tensor-view';
import {createGpuDataManager, downloadGpuData, GpuDataManager} from './webgpu/gpu-data-manager';
import {RunFunction, WEBGPU_OP_RESOLVE_RULES} from './webgpu/op-resolve-rules';
import {ProgramManager} from './webgpu/program-manager';
import {ComputeContext, GpuData, ProgramInfo, ProgramInputTensorInfoDependency} from './webgpu/types';
import {ComputeContext, GpuData, ProgramInfo, ProgramInputTensorInfoDependency, QueryType} from './webgpu/types';

interface KernelInfo {
gyagp marked this conversation as resolved.
Show resolved Hide resolved
opType: string;
nodeName: string;
kernelEntry: RunFunction;
attributes: [((attribute: unknown) => unknown)|undefined, unknown];
programName: string;
inputTensorViews: readonly TensorView[];
outputTensorViews: readonly TensorView[];
gyagp marked this conversation as resolved.
Show resolved Hide resolved
}
gyagp marked this conversation as resolved.
Show resolved Hide resolved

const getProgramInputTensorInfoDependencyKey =
(inputTensors: readonly TensorView[], inputDependencies: readonly ProgramInputTensorInfoDependency[]): string => {
Expand Down Expand Up @@ -122,20 +134,21 @@ export class WebGpuBackend {
return data;
}

/**
* a KernelID -> kernel info mapping. value is
* [ op_type, name, run function, [optional] preprocess_attribute_once function ]
*/
kernels: Map<number, [string, string, RunFunction, [((attribute: unknown) => unknown) | undefined, unknown]]>;

kernels: Map<number, KernelInfo>;
gyagp marked this conversation as resolved.
Show resolved Hide resolved
private commandEncoder: GPUCommandEncoder|null = null;
private computePassEncoder: GPUComputePassEncoder|null = null;
maxDispatchNumber = 16;
pendingDispatchNumber = 0;

queryData?: GpuData;
querySet?: GPUQuerySet;
querySetCount = 2;
queryTimeBase?: bigint;

// info of kernels pending submission for a single batch
private pendingKernels: number[] = [];
// queryReadData -> pendingKernels mapping for all the batches
private pendingQueries: Map<number, number[]> = new Map();
private queryResolveData?: GpuData;
private querySet?: GPUQuerySet;
private queryTimeBase?: bigint;
queryType: QueryType;

env: Env;

Expand All @@ -161,7 +174,9 @@ export class WebGpuBackend {
requiredFeatures,
};

if (adapter.features.has('timestamp-query')) {
if (adapter.features.has('chromium-experimental-timestamp-query-inside-passes')) {
requiredFeatures.push('chromium-experimental-timestamp-query-inside-passes' as GPUFeatureName);
} else if (adapter.features.has('timestamp-query')) {
requiredFeatures.push('timestamp-query');
}
if (adapter.features.has('shader-f16')) {
Expand All @@ -188,36 +203,47 @@ export class WebGpuBackend {
};

Object.defineProperty(this.env.webgpu, 'device', {value: this.device});

// init queryType, which is necessary for createKernel
this.setQueryType();
}

dispose(): void {
if (typeof this.querySet !== 'undefined') {
this.querySet.destroy();
this.gpuDataManager.release(this.queryResolveData!.id);
gyagp marked this conversation as resolved.
Show resolved Hide resolved
}
this.gpuDataManager.dispose();
}

getCommandEncoder(): GPUCommandEncoder {
if (!this.commandEncoder) {
this.commandEncoder = this.device.createCommandEncoder();

// refresh queryType, as sometimes we only need to enable query for a specific run
this.setQueryType();
if (this.queryType !== QueryType.none && typeof this.querySet === 'undefined') {
this.querySet = this.device.createQuerySet({
type: 'timestamp',
count: this.maxDispatchNumber * 2,
});
this.queryResolveData = this.gpuDataManager.create(
// eslint-disable-next-line no-bitwise
this.maxDispatchNumber * 2 * 8, GPUBufferUsage.COPY_SRC | GPUBufferUsage.QUERY_RESOLVE);
}
}
return this.commandEncoder;
}

getComputePassEncoder(): GPUComputePassEncoder {
if (!this.computePassEncoder) {
const computePassDescriptor: GPUComputePassDescriptor = {};
if (this.isQueryEnabled()) {
if (typeof this.querySet === 'undefined') {
this.querySet = this.device.createQuerySet({
type: 'timestamp',
count: this.querySetCount,
});
}

if (this.queryType === QueryType.atPasses) {
computePassDescriptor.timestampWrites = {
querySet: this.querySet,
beginningOfPassWriteIndex: 0,
endOfPassWriteIndex: 1,
querySet: this.querySet!,
beginningOfPassWriteIndex: this.pendingDispatchNumber * 2,
endOfPassWriteIndex: this.pendingDispatchNumber * 2 + 1,
};
}

Expand All @@ -234,19 +260,87 @@ export class WebGpuBackend {
}

flush(): void {
if (this.commandEncoder) {
this.endComputePass();
this.device.queue.submit([this.getCommandEncoder().finish()]);
this.gpuDataManager.refreshPendingBuffers();
this.commandEncoder = null;
this.pendingDispatchNumber = 0;
if (!this.commandEncoder) {
return;
}
}

isQueryEnabled(): boolean {
return this.device.features.has('timestamp-query') &&
(this.env.webgpu.profiling?.mode === 'default' ||
(!this.env.webgpu.profiling?.mode && this.env.webgpu.profilingMode === 'default'));
let queryReadData: GpuData;
if (this.queryType !== QueryType.none) {
this.commandEncoder.resolveQuerySet(
this.querySet!, 0, this.pendingDispatchNumber * 2, this.queryResolveData!.buffer, 0);
queryReadData = this.gpuDataManager.create(
gyagp marked this conversation as resolved.
Show resolved Hide resolved
// eslint-disable-next-line no-bitwise
this.pendingDispatchNumber * 2 * 8, GPUBufferUsage.MAP_READ | GPUBufferUsage.COPY_DST);
this.pendingQueries.set(queryReadData.id, this.pendingKernels);
this.pendingKernels = [];
this.commandEncoder.copyBufferToBuffer(
this.queryResolveData!.buffer, 0, queryReadData.buffer, 0, this.pendingDispatchNumber * 2 * 8);
}

this.device.queue.submit([this.commandEncoder.finish()]);
this.gpuDataManager.refreshPendingBuffers();
this.commandEncoder = null;
this.pendingDispatchNumber = 0;

if (this.queryType !== QueryType.none) {
void queryReadData!.buffer.mapAsync(GPUMapMode.READ).then(() => {
const mappedData = new BigUint64Array(queryReadData.buffer.getMappedRange());
const pendingKernels = this.pendingQueries.get(queryReadData.id);
for (let i = 0; i < mappedData.length / 2; i++) {
const kernelId = pendingKernels![i];
const kernelInfo = this.kernels.get(kernelId)!;
const opType = kernelInfo.opType;
const nodeName = kernelInfo.nodeName;
const programName = kernelInfo.programName;
const inputTensorViews = kernelInfo.inputTensorViews;
const outputTensorViews = kernelInfo.outputTensorViews;
const startTimeU64 = mappedData[i * 2];
const endTimeU64 = mappedData[i * 2 + 1];

if (typeof this.queryTimeBase === 'undefined') {
this.queryTimeBase = startTimeU64;
}

const startTime = Number(startTimeU64 - this.queryTimeBase);
const endTime = Number(endTimeU64 - this.queryTimeBase);

if (!Number.isSafeInteger(startTime) || !Number.isSafeInteger(endTime)) {
throw new RangeError('incorrect timestamp range');
}

if (this.env.webgpu.profiling?.ondata) {
this.env.webgpu.profiling.ondata({
version: 1,
inputsMetadata: inputTensorViews.map(
value => ({dims: value.dims, dataType: tensorDataTypeEnumToString(value.dataType)})),
outputsMetadata: outputTensorViews.map(
value => ({dims: value.dims, dataType: tensorDataTypeEnumToString(value.dataType)})),
kernelId,
kernelType: opType,
kernelName: nodeName,
gyagp marked this conversation as resolved.
Show resolved Hide resolved
startTime,
endTime,
});
} else {
// if no callback is provided, print the profiling message to console
let inputShapes = '';
inputTensorViews.forEach((value, i) => {
inputShapes += `input[${i}]: [${value.dims}] | ${tensorDataTypeEnumToString(value.dataType)}, `;
});
let outputShapes = '';
outputTensorViews.forEach((value, i) => {
outputShapes += `output[${i}]: [${value.dims}] | ${tensorDataTypeEnumToString(value.dataType)}, `;
});
// eslint-disable-next-line no-console
console.log(`[profiling] kernel "${kernelId}|${opType}|${nodeName}|${programName}" ${inputShapes}${
outputShapes}execution time: ${endTime - startTime} ns`);
}
}
queryReadData.buffer.unmap();
this.gpuDataManager.release(queryReadData.id);
gyagp marked this conversation as resolved.
Show resolved Hide resolved
this.pendingQueries.delete(queryReadData.id);
});
}
}

/**
Expand Down Expand Up @@ -378,14 +472,22 @@ export class WebGpuBackend {
this.programManager.setArtifact(key, artifact);
LOG_DEBUG('info', () => `[artifact] key: ${key}, programName: ${program.name}`);
}
// update kernels
const kernelInfo = this.kernels.get(this.currentKernelId!)!;
gyagp marked this conversation as resolved.
Show resolved Hide resolved
kernelInfo.programName = artifact.programInfo.name;
kernelInfo.inputTensorViews = inputTensorViews;
kernelInfo.outputTensorViews = outputTensorViews;

LOG_DEBUG(
'info',
() => `[ProgramManager] run "${program.name}" (key=${key}) with ${normalizedDispatchGroup[0]}x${
normalizedDispatchGroup[1]}x${normalizedDispatchGroup[2]}`);
this.programManager.run(
artifact, inputTensorViews, outputTensorViews, inputDatas, outputDatas, normalizedDispatchGroup,
uniformBufferBinding);

if (this.queryType !== QueryType.none) {
this.pendingKernels.push(this.currentKernelId!);
}

this.programManager.run(artifact, inputDatas, outputDatas, normalizedDispatchGroup, uniformBufferBinding);

return outputTensorViews;
}
Expand Down Expand Up @@ -418,7 +520,16 @@ export class WebGpuBackend {
throw new Error(`kernel not implemented: ${opType}`);
}

this.kernels.set(kernelId, [opType, nodeName, op[0], [op[1], attribute]]);
const kernelInfo: KernelInfo = {
opType,
nodeName,
kernelEntry: op[0],
attributes: [op[1], attribute],
programName: '',
inputTensorViews: [],
outputTensorViews: [],
};
this.kernels.set(kernelId, kernelInfo);
}

releaseKernel(kernelId: number): void {
Expand All @@ -439,7 +550,10 @@ export class WebGpuBackend {
if (!kernel) {
throw new Error(`kernel not created: ${kernelId}`);
}
const [opType, nodeName, kernelEntry, attributes] = kernel;
const opType = kernel.opType;
const nodeName = kernel.nodeName;
const kernelEntry = kernel.kernelEntry;
const attributes = kernel.attributes;
if (this.currentKernelId !== null) {
throw new Error(`kernel "[${opType}] ${nodeName}" is not allowed to be called recursively`);
}
Expand Down Expand Up @@ -514,5 +628,23 @@ export class WebGpuBackend {
return createView(data.buffer, type);
};
}
writeTimeStamp(index: number): void {
if (this.queryType !== QueryType.insidePasses) {
return;
}

// eslint-disable-next-line @typescript-eslint/no-explicit-any
(this.computePassEncoder as any).writeTimestamp(this.querySet, index);
}
setQueryType(): void {
this.queryType = QueryType.none;
if (this.env.webgpu.profiling?.mode === 'default') {
gyagp marked this conversation as resolved.
Show resolved Hide resolved
if (this.device.features.has('chromium-experimental-timestamp-query-inside-passes')) {
this.queryType = QueryType.insidePasses;
} else if (this.device.features.has('timestamp-query')) {
this.queryType = QueryType.atPasses;
}
gyagp marked this conversation as resolved.
Show resolved Hide resolved
}
}
// #endregion
}
9 changes: 5 additions & 4 deletions js/web/lib/wasm/jsep/init.ts
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ import {WebGpuBackend} from './backend-webgpu';
import {LOG_DEBUG} from './log';
import {TensorView} from './tensor-view';
import {ShapeUtil} from './util';
import {ComputeContext, ComputeContextInputsOutputsMapping, ProgramInfo} from './webgpu/types';
import {ComputeContext, ComputeContextInputsOutputsMapping, ProgramInfo, QueryType} from './webgpu/types';

/* eslint-disable no-bitwise */

Expand Down Expand Up @@ -186,9 +186,10 @@ export const init = async(module: OrtWasmModule, env: Env, gpuAdapter: GPUAdapte
},

// jsepCreateKernel
(name: string, kernel: number, attribute: unknown) => backend.createKernel(
name, kernel, attribute,
env.debug || backend.isQueryEnabled() ? module.UTF8ToString(module._JsepGetNodeName(kernel)) : `${kernel}`),
(opType: string, kernelId: number, attribute: unknown) => backend.createKernel(
opType, kernelId, attribute,
env.debug || backend.queryType !== QueryType.none ? module.UTF8ToString(module._JsepGetNodeName(kernelId)) :
`${opType}`),
gyagp marked this conversation as resolved.
Show resolved Hide resolved

// jsepReleaseKernel
(kernel: number) => backend.releaseKernel(kernel),
Expand Down
Loading