Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[js/webgpu] Refactor timestamp-query and introduce timestamp-query-inside-passes #18894

Merged
merged 11 commits into from
Jan 13, 2024
155 changes: 128 additions & 27 deletions js/web/lib/wasm/jsep/backend-webgpu.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,12 +3,14 @@

import {Env, Tensor} from 'onnxruntime-common';

import {tensorDataTypeEnumToString} from '../wasm-common';

import {configureLogger, LOG_DEBUG} from './log';
import {createView, TensorView} from './tensor-view';
import {createGpuDataManager, downloadGpuData, GpuDataManager} from './webgpu/gpu-data-manager';
import {RunFunction, WEBGPU_OP_RESOLVE_RULES} from './webgpu/op-resolve-rules';
import {ProgramManager} from './webgpu/program-manager';
import {ComputeContext, GpuData, ProgramInfo, ProgramInputTensorInfoDependency} from './webgpu/types';
import {ComputeContext, GpuData, PendingKernelInfo, ProgramInfo, ProgramInputTensorInfoDependency, QueryType} from './webgpu/types';

const getProgramInputTensorInfoDependencyKey =
(inputTensors: readonly TensorView[], inputDependencies: readonly ProgramInputTensorInfoDependency[]): string => {
Expand Down Expand Up @@ -130,12 +132,18 @@ export class WebGpuBackend {

private commandEncoder: GPUCommandEncoder|null = null;
private computePassEncoder: GPUComputePassEncoder|null = null;
maxDispatchNumber = 16;
pendingDispatchNumber = 0;

queryData?: GpuData;
querySet?: GPUQuerySet;
querySetCount = 2;
queryTimeBase?: bigint;

// info of kernels pending submission for a single batch
pendingKernels: PendingKernelInfo[] = [];
// queryReadData -> pendingKernels mapping for all the batches
private pendingQueries: Map<number, PendingKernelInfo[]> = new Map();
private queryResolveData?: GpuData;
private querySet?: GPUQuerySet;
private queryTimeBase?: bigint;
queryType: QueryType;

env: Env;

Expand All @@ -161,7 +169,9 @@ export class WebGpuBackend {
requiredFeatures,
};

if (adapter.features.has('timestamp-query')) {
if (adapter.features.has('chromium-experimental-timestamp-query-inside-passes')) {
requiredFeatures.push('chromium-experimental-timestamp-query-inside-passes' as GPUFeatureName);
} else if (adapter.features.has('timestamp-query')) {
requiredFeatures.push('timestamp-query');
}
if (adapter.features.has('shader-f16')) {
Expand Down Expand Up @@ -200,24 +210,30 @@ export class WebGpuBackend {
getCommandEncoder(): GPUCommandEncoder {
if (!this.commandEncoder) {
this.commandEncoder = this.device.createCommandEncoder();

this.setQueryType();
if (this.queryType !== QueryType.none && typeof this.querySet === 'undefined') {
this.querySet = this.device.createQuerySet({
type: 'timestamp',
count: this.maxDispatchNumber * 2,
});
this.queryResolveData = this.gpuDataManager.create(
// eslint-disable-next-line no-bitwise
this.maxDispatchNumber * 2 * 8, GPUBufferUsage.COPY_SRC | GPUBufferUsage.QUERY_RESOLVE);
}
}
return this.commandEncoder;
}

getComputePassEncoder(): GPUComputePassEncoder {
if (!this.computePassEncoder) {
const computePassDescriptor: GPUComputePassDescriptor = {};
if (this.isQueryEnabled()) {
if (typeof this.querySet === 'undefined') {
this.querySet = this.device.createQuerySet({
type: 'timestamp',
count: this.querySetCount,
});
}

if (this.queryType === QueryType.atPasses) {
computePassDescriptor.timestampWrites = {
querySet: this.querySet,
beginningOfPassWriteIndex: 0,
endOfPassWriteIndex: 1,
querySet: this.querySet!,
beginningOfPassWriteIndex: this.pendingDispatchNumber * 2,
endOfPassWriteIndex: this.pendingDispatchNumber * 2 + 1,
};
}

Expand All @@ -234,19 +250,85 @@ export class WebGpuBackend {
}

flush(): void {
if (this.commandEncoder) {
this.endComputePass();
this.device.queue.submit([this.getCommandEncoder().finish()]);
this.gpuDataManager.refreshPendingBuffers();
this.commandEncoder = null;
this.pendingDispatchNumber = 0;
if (!this.commandEncoder) {
return;
}
}

isQueryEnabled(): boolean {
return this.device.features.has('timestamp-query') &&
(this.env.webgpu.profiling?.mode === 'default' ||
(!this.env.webgpu.profiling?.mode && this.env.webgpu.profilingMode === 'default'));
let queryReadData: GpuData;
if (this.queryType !== QueryType.none) {
this.commandEncoder.resolveQuerySet(
this.querySet!, 0, this.pendingDispatchNumber * 2, this.queryResolveData!.buffer, 0);
queryReadData = this.gpuDataManager.create(
gyagp marked this conversation as resolved.
Show resolved Hide resolved
// eslint-disable-next-line no-bitwise
this.pendingDispatchNumber * 2 * 8, GPUBufferUsage.MAP_READ | GPUBufferUsage.COPY_DST);
this.pendingQueries.set(queryReadData.id, this.pendingKernels);
this.pendingKernels = [];
this.commandEncoder.copyBufferToBuffer(
this.queryResolveData!.buffer, 0, queryReadData.buffer, 0, this.pendingDispatchNumber * 2 * 8);
}

this.device.queue.submit([this.commandEncoder.finish()]);
this.gpuDataManager.refreshPendingBuffers();
this.commandEncoder = null;
this.pendingDispatchNumber = 0;

if (this.queryType !== QueryType.none) {
void queryReadData!.buffer.mapAsync(GPUMapMode.READ).then(() => {
const mappedData = new BigUint64Array(queryReadData.buffer.getMappedRange());
const pendingKernels = this.pendingQueries.get(queryReadData.id);
for (let i = 0; i < mappedData.length / 2; i++) {
const kernelId = pendingKernels![i].id;
const kernelInfo = this.kernels.get(kernelId)!;
const kernelType = kernelInfo[0];
const kernelName = pendingKernels![i].name;
const inputTensorViews = pendingKernels![i].inputTensorViews;
const outputTensorViews = pendingKernels![i].outputTensorViews;
const startTimeU64 = mappedData[i * 2];
const endTimeU64 = mappedData[i * 2 + 1];

if (typeof this.queryTimeBase === 'undefined') {
this.queryTimeBase = startTimeU64;
}

const startTime = Number(startTimeU64 - this.queryTimeBase);
const endTime = Number(endTimeU64 - this.queryTimeBase);

if (!Number.isSafeInteger(startTime) || !Number.isSafeInteger(endTime)) {
throw new RangeError('incorrect timestamp range');
}

if (this.env.webgpu.profiling?.ondata) {
this.env.webgpu.profiling.ondata({
version: 1,
inputsMetadata: inputTensorViews.map(
value => ({dims: value.dims, dataType: tensorDataTypeEnumToString(value.dataType)})),
outputsMetadata: outputTensorViews.map(
value => ({dims: value.dims, dataType: tensorDataTypeEnumToString(value.dataType)})),
kernelId,
kernelType,
kernelName,
startTime,
endTime,
});
} else {
// if no callback is provided, print the profiling message to console
let inputShapes = '';
inputTensorViews.forEach((value, i) => {
inputShapes += `input[${i}]: [${value.dims}] | ${tensorDataTypeEnumToString(value.dataType)}, `;
});
let outputShapes = '';
outputTensorViews.forEach((value, i) => {
outputShapes += `output[${i}]: [${value.dims}] | ${tensorDataTypeEnumToString(value.dataType)}, `;
});
// eslint-disable-next-line no-console
console.log(`[profiling] kernel "${kernelId}|${kernelName}" ${inputShapes}${outputShapes}execution time: ${
endTime - startTime} ns`);
}
}
queryReadData.buffer.unmap();
this.gpuDataManager.release(queryReadData.id);
gyagp marked this conversation as resolved.
Show resolved Hide resolved
});
}
}

/**
Expand Down Expand Up @@ -514,5 +596,24 @@ export class WebGpuBackend {
return createView(data.buffer, type);
};
}
writeTimeStamp(index: number): void {
if (this.queryType !== QueryType.insidePasses) {
return;
}

// eslint-disable-next-line @typescript-eslint/no-explicit-any
(this.computePassEncoder as any).writeTimestamp(this.querySet, index);
}

setQueryType(): void {
this.queryType = QueryType.none;
if (this.env.webgpu.profiling?.mode === 'default') {
gyagp marked this conversation as resolved.
Show resolved Hide resolved
if (this.device.features.has('chromium-experimental-timestamp-query-inside-passes')) {
this.queryType = QueryType.insidePasses;
} else if (this.device.features.has('timestamp-query')) {
this.queryType = QueryType.atPasses;
}
gyagp marked this conversation as resolved.
Show resolved Hide resolved
}
}
// #endregion
}
5 changes: 3 additions & 2 deletions js/web/lib/wasm/jsep/init.ts
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ import {WebGpuBackend} from './backend-webgpu';
import {LOG_DEBUG} from './log';
import {TensorView} from './tensor-view';
import {ShapeUtil} from './util';
import {ComputeContext, ComputeContextInputsOutputsMapping, ProgramInfo} from './webgpu/types';
import {ComputeContext, ComputeContextInputsOutputsMapping, ProgramInfo, QueryType} from './webgpu/types';

/* eslint-disable no-bitwise */

Expand Down Expand Up @@ -188,7 +188,8 @@ export const init = async(module: OrtWasmModule, env: Env, gpuAdapter: GPUAdapte
// jsepCreateKernel
(name: string, kernel: number, attribute: unknown) => backend.createKernel(
name, kernel, attribute,
env.debug || backend.isQueryEnabled() ? module.UTF8ToString(module._JsepGetNodeName(kernel)) : `${kernel}`),
env.debug || backend.queryType !== QueryType.none ? module.UTF8ToString(module._JsepGetNodeName(kernel)) :
`${kernel}`),

// jsepReleaseKernel
(kernel: number) => backend.releaseKernel(kernel),
Expand Down
91 changes: 21 additions & 70 deletions js/web/lib/wasm/jsep/webgpu/program-manager.ts
Original file line number Diff line number Diff line change
@@ -1,13 +1,12 @@
// Copyright (c) Microsoft Corporation. All rights reserved.
// Licensed under the MIT License.

import {tensorDataTypeEnumToString} from '../../wasm-common';
import {WebGpuBackend} from '../backend-webgpu';
import {LOG_DEBUG} from '../log';
import {TensorView} from '../tensor-view';

import {createShaderHelper} from './ops/common';
import {Artifact, GpuData, ProgramInfo} from './types';
import {Artifact, GpuData, PendingKernelInfo, ProgramInfo, QueryType} from './types';

/**
* ProgramManager is the main class behind running computations
Expand Down Expand Up @@ -36,8 +35,8 @@ export class ProgramManager {
inputs: GpuData[], outputs: GpuData[], dispatchGroup: [number, number, number],
uniformBufferBinding: GPUBindingResource|undefined): void {
const device = this.backend.device;

const computePassEncoder = this.backend.getComputePassEncoder();
this.backend.writeTimeStamp(this.backend.pendingDispatchNumber * 2);
computePassEncoder.setPipeline(buildArtifact.computePipeline);
const entries = [];
for (const input of inputs) {
Expand All @@ -55,77 +54,29 @@ export class ProgramManager {

computePassEncoder.dispatchWorkgroups(...dispatchGroup);

this.backend.pendingDispatchNumber++;

if (this.backend.isQueryEnabled()) {
if (typeof this.backend.queryData === 'undefined') {
this.backend.queryData = this.backend.gpuDataManager.create(
// eslint-disable-next-line no-bitwise
this.backend.querySetCount * 8, GPUBufferUsage.COPY_SRC | GPUBufferUsage.QUERY_RESOLVE);
}
const syncData = this.backend.gpuDataManager.create(
// eslint-disable-next-line no-bitwise
this.backend.querySetCount * 8, GPUBufferUsage.MAP_READ | GPUBufferUsage.COPY_DST);

this.backend.endComputePass();
this.backend.getCommandEncoder().resolveQuerySet(this.backend.querySet!, 0, 2, this.backend.queryData.buffer, 0);
this.backend.getCommandEncoder().copyBufferToBuffer(
this.backend.queryData.buffer, 0, syncData.buffer, 0, this.backend.querySetCount * 8);
this.backend.flush();

if (this.backend.queryType !== QueryType.none) {
const kernelId = this.backend.currentKernelId!;
const kernelInfo = this.backend.kernels.get(kernelId)!;

void syncData.buffer.mapAsync(GPUMapMode.READ).then(() => {
const mappedData = new BigUint64Array(syncData.buffer.getMappedRange());
const [startTimeU64, endTimeU64] = mappedData;
const [kernelType, kernelName] = kernelInfo;

syncData.buffer.unmap();

if (typeof this.backend.queryTimeBase === 'undefined') {
this.backend.queryTimeBase = startTimeU64;
}

const startTime = Number(startTimeU64 - this.backend.queryTimeBase);
const endTime = Number(endTimeU64 - this.backend.queryTimeBase);

if (!Number.isSafeInteger(startTime) || !Number.isSafeInteger(endTime)) {
throw new RangeError('incorrect timestamp range');
}

this.backend.gpuDataManager.release(syncData.id);
if (this.backend.env.webgpu.profiling?.ondata) {
this.backend.env.webgpu.profiling.ondata({
version: 1,
inputsMetadata: inputTensorViews.map(
value => ({dims: value.dims, dataType: tensorDataTypeEnumToString(value.dataType)})),
outputsMetadata: outputTensorViews.map(
value => ({dims: value.dims, dataType: tensorDataTypeEnumToString(value.dataType)})),
kernelId,
kernelType,
kernelName,
startTime,
endTime,
});
} else {
// if no callback is provided, print the profiling message to console
let inputShapes = '';
inputTensorViews.forEach((value, i) => {
inputShapes += `input[${i}]: [${value.dims}] | ${tensorDataTypeEnumToString(value.dataType)}, `;
});
let outputShapes = '';
outputTensorViews.forEach((value, i) => {
outputShapes += `output[${i}]: [${value.dims}] | ${tensorDataTypeEnumToString(value.dataType)}, `;
});
// eslint-disable-next-line no-console
console.log(`[profiling] kernel "${kernelId}|${kernelName}|${buildArtifact.programInfo.name}" ${inputShapes}${
outputShapes}execution time: ${endTime - startTime} ns`);
}
});
let kernelName = kernelInfo[0];
if (buildArtifact.programInfo.name !== kernelName) {
kernelName = `${kernelName}/${buildArtifact.programInfo.name}`;
gyagp marked this conversation as resolved.
Show resolved Hide resolved
}
const pendingKernelInfo: PendingKernelInfo = {
id: kernelId,
name: kernelName,
inputTensorViews,
outputTensorViews,
};
this.backend.pendingKernels.push(pendingKernelInfo);
gyagp marked this conversation as resolved.
Show resolved Hide resolved
this.backend.writeTimeStamp(this.backend.pendingDispatchNumber * 2 + 1);
}

if (this.backend.pendingDispatchNumber >= 16) {
this.backend.pendingDispatchNumber++;
if (this.backend.pendingDispatchNumber >= this.backend.maxDispatchNumber ||
this.backend.queryType === QueryType.atPasses) {
this.backend.endComputePass();
}
if (this.backend.pendingDispatchNumber >= this.backend.maxDispatchNumber) {
this.backend.flush();
}
}
Expand Down
14 changes: 13 additions & 1 deletion js/web/lib/wasm/jsep/webgpu/types.ts
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,12 @@ export enum GpuDataType {
}
export type GpuDataId = number;

export enum QueryType {
none,
insidePasses,
atPasses,
}
gyagp marked this conversation as resolved.
Show resolved Hide resolved

export interface GpuData {
type: GpuDataType;
id: GpuDataId;
Expand All @@ -23,12 +29,18 @@ export interface TensorInfo {
dataType: number;
}


export interface ProgramUniform {
type: 'int32'|'float32'|'uint32';
data: number|readonly number[];
}

export interface PendingKernelInfo {
id: number;
name: string;
inputTensorViews: readonly TensorView[];
outputTensorViews: readonly TensorView[];
}

/**
* Represent the dependency of a program on a specific input tensor.
*
Expand Down