From 0ca84549abac23aa9c9347df1a3ab68cee9c02b1 Mon Sep 17 00:00:00 2001 From: satyajandhyala Date: Tue, 12 Dec 2023 11:12:23 -0800 Subject: [PATCH] [JS/Web] Added uniforms to Reduce, Resize and Split Ops. (#18727) ### Description Added uniforms to Reduce op ### Motivation and Context Improve perforamnce. --- .../lib/wasm/jsep/webgpu/op-resolve-rules.ts | 22 +-- js/web/lib/wasm/jsep/webgpu/ops/argminmax.ts | 32 ++-- js/web/lib/wasm/jsep/webgpu/ops/cumsum.ts | 4 +- js/web/lib/wasm/jsep/webgpu/ops/reduce.ts | 114 ++++++------ js/web/lib/wasm/jsep/webgpu/ops/resize.ts | 173 ++++++++++-------- js/web/lib/wasm/jsep/webgpu/ops/slice.ts | 28 +-- js/web/lib/wasm/jsep/webgpu/ops/split.ts | 50 ++--- 7 files changed, 219 insertions(+), 204 deletions(-) diff --git a/js/web/lib/wasm/jsep/webgpu/op-resolve-rules.ts b/js/web/lib/wasm/jsep/webgpu/op-resolve-rules.ts index 201c9d4b209db..8e1ec782079be 100644 --- a/js/web/lib/wasm/jsep/webgpu/op-resolve-rules.ts +++ b/js/web/lib/wasm/jsep/webgpu/op-resolve-rules.ts @@ -23,7 +23,7 @@ import {multiHeadAttention, parseMultiHeadAttentionAttributes} from './ops/multi import {pad, parsePadAttributes} from './ops/pad'; import * as pool from './ops/pool'; import {range} from './ops/range'; -import {parseReduceAttributes, reduceL1, reduceL2, reduceLogSum, reduceLogSumExp, reduceMax, reduceMean, reduceMin, reduceProd, reduceSum, reduceSumSquare} from './ops/reduce'; +import {reduceL1, reduceL2, reduceLogSum, reduceLogSumExp, reduceMax, reduceMean, reduceMin, reduceProd, reduceSum, reduceSumSquare} from './ops/reduce'; import {parseResizeAttributes, resize} from './ops/resize'; import {parseSkipLayerNormAttributes, skipLayerNorm} from './ops/skip-layer-norm'; import {parseSliceAttributes, slice} from './ops/slice'; @@ -99,16 +99,16 @@ export const WEBGPU_OP_RESOLVE_RULES: Map = new ['Pow', [binaryOps.pow]], ['Range', [range]], ['Reciprocal', [unaryOps.reciprocal]], - ['ReduceMin', [reduceMin, parseReduceAttributes]], - ['ReduceMean', [reduceMean, parseReduceAttributes]], - ['ReduceMax', [reduceMax, parseReduceAttributes]], - ['ReduceSum', [reduceSum, parseReduceAttributes]], - ['ReduceProd', [reduceProd, parseReduceAttributes]], - ['ReduceL1', [reduceL1, parseReduceAttributes]], - ['ReduceL2', [reduceL2, parseReduceAttributes]], - ['ReduceLogSum', [reduceLogSum, parseReduceAttributes]], - ['ReduceLogSumExp', [reduceLogSumExp, parseReduceAttributes]], - ['ReduceSumSquare', [reduceSumSquare, parseReduceAttributes]], + ['ReduceMin', [reduceMin]], + ['ReduceMean', [reduceMean]], + ['ReduceMax', [reduceMax]], + ['ReduceSum', [reduceSum]], + ['ReduceProd', [reduceProd]], + ['ReduceL1', [reduceL1]], + ['ReduceL2', [reduceL2]], + ['ReduceLogSum', [reduceLogSum]], + ['ReduceLogSumExp', [reduceLogSumExp]], + ['ReduceSumSquare', [reduceSumSquare]], ['Relu', [unaryOps.relu]], ['Resize', [resize, parseResizeAttributes]], ['Sigmoid', [unaryOps.sigmoid]], diff --git a/js/web/lib/wasm/jsep/webgpu/ops/argminmax.ts b/js/web/lib/wasm/jsep/webgpu/ops/argminmax.ts index b6c6853c8f222..1f27525f370f3 100644 --- a/js/web/lib/wasm/jsep/webgpu/ops/argminmax.ts +++ b/js/web/lib/wasm/jsep/webgpu/ops/argminmax.ts @@ -33,23 +33,23 @@ export const argMin = (context: ComputeContext, attributes: ArgMinMaxAttributes) const idxZero = []; for (let k = 0; k < input.rank; k++) { if (axes.indexOf(k) >= 0 || axes.length === 0) { - idxZero.push(`inputIndices[${k}] = 0;`); // first element + idxZero.push(`input_indices[${k}] = 0;`); // first element } } return [ - `${idxZero.join('\n')}`, `var value = ${input.getByOffset('inputOffset')};\nvar bestIndex : i32 = 0;`, - `if (${input.getByOffset('inputOffset')} ${attributes.selectLastIndex > 0 ? '<=' : '<'} value) { - value = ${input.getByOffset('inputOffset')}; - bestIndex = i32(lastIndex); + `${idxZero.join('\n')}`, `var value = ${input.getByIndices('input_indices')};\nvar best_index : i32 = 0;`, + `if (${input.getByIndices('input_indices')} ${attributes.selectLastIndex > 0 ? '<=' : '<'} value) { + value = ${input.getByIndices('input_indices')}; + best_index = i32(last_index); }`, - '', output.setByOffset('global_idx', 'bestIndex') + '', output.setByOffset('global_idx', 'best_index') ]; }; context.compute( createReduceProgramInfo( - 'ArgMin', {hint: attributes.cacheKey}, [context.inputs[0]], argMinMaxOp, [attributes.axis], DataType.int64, - attributes.keepDims), + 'ArgMin', {hint: attributes.cacheKey, inputDependencies: ['rank']}, [context.inputs[0]], argMinMaxOp, + [attributes.axis], DataType.int64, attributes.keepDims), {inputs: [0]}); }; @@ -59,23 +59,23 @@ export const argMax = (context: ComputeContext, attributes: ArgMinMaxAttributes) const idxZero = []; for (let k = 0; k < input.rank; k++) { if (axes.indexOf(k) >= 0 || axes.length === 0) { - idxZero.push(`inputIndices[${k}] = 0;`); // first element + idxZero.push(`input_indices[${k}] = 0;`); // first element } } return [ - `${idxZero.join('\n')}`, `var value = ${input.getByOffset('inputOffset')};\nvar bestIndex : i32 = 0;`, - `if (${input.getByOffset('inputOffset')} ${attributes.selectLastIndex > 0 ? '>=' : '>'} value) { - value = ${input.getByOffset('inputOffset')}; - bestIndex = i32(lastIndex); + `${idxZero.join('\n')}`, `var value = ${input.getByIndices('input_indices')};\nvar best_index : i32 = 0;`, + `if (${input.getByIndices('input_indices')} ${attributes.selectLastIndex > 0 ? '>=' : '>'} value) { + value = ${input.getByIndices('input_indices')}; + best_index = i32(last_index); }`, - '', output.setByOffset('global_idx', 'bestIndex') + '', output.setByOffset('global_idx', 'best_index') ]; }; context.compute( createReduceProgramInfo( - 'argMax', {hint: attributes.cacheKey}, [context.inputs[0]], argMinMaxOp, [attributes.axis], DataType.int64, - attributes.keepDims), + 'argMax', {hint: attributes.cacheKey, inputDependencies: ['rank']}, [context.inputs[0]], argMinMaxOp, + [attributes.axis], DataType.int64, attributes.keepDims), {inputs: [0]}); }; diff --git a/js/web/lib/wasm/jsep/webgpu/ops/cumsum.ts b/js/web/lib/wasm/jsep/webgpu/ops/cumsum.ts index 85682f0b47220..2ff909c30e62e 100644 --- a/js/web/lib/wasm/jsep/webgpu/ops/cumsum.ts +++ b/js/web/lib/wasm/jsep/webgpu/ops/cumsum.ts @@ -7,7 +7,7 @@ import {ShapeUtil} from '../../util'; import {AttributeWithCacheKey, createAttributeWithCacheKey} from '../attribute-with-cache-key'; import {ComputeContext, ProgramInfo} from '../types'; -import {createTensorShapeVariables, inputVariable, outputVariable, ShaderHelper} from './common'; +import {createTensorShapeVariables, getElementAt, inputVariable, outputVariable, ShaderHelper} from './common'; export interface CumSumAttributes extends AttributeWithCacheKey { @@ -26,7 +26,7 @@ const createCumsumProgramInfo = const axis = ShapeUtil.normalizeAxis(axisValue, rank); const getShaderSource = (shaderHelper: ShaderHelper) => { const index = ` i32(${input.indicesGet('inputIndices', 'uniforms.axis')}) `; - const max = rank === 1 ? 'i32(uniforms.input_shape)' : 'i32(uniforms.input_shape[uniforms.axis])'; + const max = getElementAt('uniforms.input_shape', 'uniforms.axis', rank); const lowerLimit = attributes.reverse ? index + (attributes.exclusive ? ' + 1' : '') : '0'; const upperLimit = attributes.reverse ? max : index + (attributes.exclusive ? '' : ' + 1'); return ` diff --git a/js/web/lib/wasm/jsep/webgpu/ops/reduce.ts b/js/web/lib/wasm/jsep/webgpu/ops/reduce.ts index b5c956e57a9b1..e8851ac546942 100644 --- a/js/web/lib/wasm/jsep/webgpu/ops/reduce.ts +++ b/js/web/lib/wasm/jsep/webgpu/ops/reduce.ts @@ -7,7 +7,7 @@ import {ShapeUtil} from '../../util'; import {AttributeWithCacheKey, createAttributeWithCacheKey} from '../attribute-with-cache-key'; import {ComputeContext, ProgramInfo, ProgramShaderCacheInfo} from '../types'; -import {IndicesHelper, inputVariable, outputVariable, ShaderHelper} from './common'; +import {createTensorShapeVariables, IndicesHelper, inputVariable, outputVariable, ShaderHelper} from './common'; import {reduceL1Shared, reduceL2Shared, reduceLogSumExpShared, reduceLogSumShared, reduceMaxShared, reduceMeanShared, reduceMinShared, reduceProdShared, reduceSumShared, reduceSumSquareShared} from './reduce-shared'; const validateInputs = (inputs: readonly TensorView[]): void => { @@ -30,14 +30,14 @@ export type ReduceOp = (input: IndicesHelper, output: IndicesHelper, axes: readonly number[]) => [string, string, string, string, ...string[]]; -const noOp: ReduceOp = (input) => ['', '', `var value = ${input.getByOffset('inputOffset')};`, '']; +const noOp: ReduceOp = (input) => ['', '', `var value = ${input.getByIndices('input_indices')};`, '']; export const createReduceProgramInfo = (name: string, shaderCache: ProgramShaderCacheInfo, inputs: readonly TensorView[], reduceOp: ReduceOp, axesInput: number[], outputDataType: DataType, keepDims = false, noopWithEmptyAxes = false): ProgramInfo => { const outputShape: number[] = []; const inputShape = inputs[0].dims; - - const axes = ShapeUtil.normalizeAxes(axesInput, inputs[0].dims.length); + const inputRank = inputShape.length; + const axes = ShapeUtil.normalizeAxes(axesInput, inputRank); const reduceOnAllAxes = !noopWithEmptyAxes && axes.length === 0; inputShape.forEach((d, i) => { if (reduceOnAllAxes || axes.indexOf(i) >= 0) { @@ -48,53 +48,50 @@ export const createReduceProgramInfo = outputShape.push(d); } }); - - const idxCopy: string[] = []; // copy output indexes to input indexes - - const input = inputVariable('_A', inputs[0].dataType, inputShape); - const output = outputVariable('output', outputDataType, outputShape); - const ops = reduceOp(input, output, axes); - const inputOffsetAssignment = `inputOffset = ${input.indicesToOffset('inputIndices')};`; - const initinputOffsetLet = `let ${inputOffsetAssignment};`; - const initinputOffsetVar = `var ${inputOffsetAssignment};`; - const initinputOffset = (ops[1] === '') ? '' : initinputOffsetVar; - let reduceOps = ((ops[1] === '') ? initinputOffsetLet : inputOffsetAssignment) + '\n' + ops[2]; - - for (let k = 0, l = 0; k < inputs[0].dims.length; k++) { - // if this axis is reduced - if (reduceOnAllAxes || axes.indexOf(k) >= 0) { - if (keepDims) { + const outputRank = outputShape.length; + const outputSize = ShapeUtil.size(outputShape); + const getShaderSource = (shaderHelper: ShaderHelper) => { + const idxCopy: string[] = []; // copy output indexes to input indexes + + const input = inputVariable('_A', inputs[0].dataType, inputRank); + const output = outputVariable('output', outputDataType, outputRank); + const ops = reduceOp(input, output, axes); + let reduceOps = ops[2]; + + for (let k = 0, l = 0; k < inputRank; k++) { + // if this axis is reduced + if (reduceOnAllAxes || axes.indexOf(k) >= 0) { + if (keepDims) { + l++; + } + // loop over the d-th axis + reduceOps = `for(var j${k}: u32 = 0; j${k} < ${inputShape[k]}; j${k}++) { + ${ops[2].includes('last_index') ? `let last_index = j${k};` : ''} + ${input.indicesSet('input_indices', k, `j${k}`)} + ${reduceOps} + }`; + } else { + idxCopy.push(`${input.indicesSet('input_indices', k, output.indicesGet('output_indices', l))};`); l++; } - // loop over the d-th axis - reduceOps = `for(var j${k}: u32 = 0; j${k} < ${inputs[0].dims[k]}; j${k}++) { - ${ops[2].includes('lastIndex') ? `let lastIndex = j${k};` : ''} - ${input.indicesSet('inputIndices', k, `j${k}`)} - ${reduceOps} - }`; - } else { - idxCopy.push(`${input.indicesSet('inputIndices', k, output.indicesGet('outputIndices', l))};`); - l++; } - } + return ` - const outputSize = ShapeUtil.size(outputShape); - const getShaderSource = (shaderHelper: ShaderHelper) => ` - ${shaderHelper.declareVariables(input, output)} + ${shaderHelper.registerUniform('output_size', 'u32').declareVariables(input, output)} ${shaderHelper.mainStart()} - ${shaderHelper.guardAgainstOutOfBoundsWorkgroupSizes(outputSize)} - var inputIndices: ${input.type.indices}; - let outputIndices = ${output.offsetToIndices('global_idx')}; + ${shaderHelper.guardAgainstOutOfBoundsWorkgroupSizes('uniforms.output_size')} + var input_indices: ${input.type.indices}; + let output_indices = ${output.offsetToIndices('global_idx')}; ${idxCopy.join('\n')} ${ops[0]} // init ops for reduce max/min - ${initinputOffset} ${ops[1]} ${reduceOps} ${ops[3]} ${ops.length === 4 ? output.setByOffset('global_idx', 'value') : ops.slice(4).join('\n')} }`; + }; return { name, @@ -102,7 +99,11 @@ export const createReduceProgramInfo = getShaderSource, getRunData: () => ({ outputs: [{dims: outputShape, dataType: outputDataType}], - dispatchGroup: {x: Math.ceil(outputSize / 64 /* workgroup size */)} + dispatchGroup: {x: Math.ceil(outputSize / 64 /* workgroup size */)}, + programUniforms: [ + {type: 'uint32', data: outputSize}, ...createTensorShapeVariables(inputShape), + ...createTensorShapeVariables(outputShape) + ] }), }; }; @@ -125,7 +126,7 @@ const runReduceProgram = context.compute( createReduceProgramInfo( - name, {hint: updatedAttributes.cacheKey}, [inputs[0]], + name, {hint: updatedAttributes.cacheKey, inputDependencies: ['rank']}, [inputs[0]], updatedAttributes.noopWithEmptyAxes && updatedAttributes.axes.length === 0 ? noOp : reduceOp, updatedAttributes.axes, inputs[0].dataType, updatedAttributes.keepDims, updatedAttributes.noopWithEmptyAxes), @@ -137,7 +138,7 @@ const reduceLogSumNaive = (context: ComputeContext, attributes: ReduceAttributes const reduceOp: ReduceOp = (input, output) => [`var value = ${output.type.storage}(0);`, '', - `value += ${input.getByOffset('inputOffset')};`, + `value += ${input.getByIndices('input_indices')};`, 'value = log(value);', ]; runReduceProgram(context, 'ReduceLogSum', attributes, reduceOp); @@ -148,7 +149,7 @@ const reduceL1Naive = (context: ComputeContext, attributes: ReduceAttributes): v const reduceOp: ReduceOp = (input, output) => [`var value = ${output.type.storage}(0);`, '', - `value += abs(${input.getByOffset('inputOffset')});`, + `value += abs(${input.getByIndices('input_indices')});`, '', ]; runReduceProgram(context, 'ReduceL1', attributes, reduceOp); @@ -159,7 +160,7 @@ const reduceL2Naive = (context: ComputeContext, attributes: ReduceAttributes): v const reduceOp: ReduceOp = (input, output) => [`var t = ${output.type.value}(0); var value = ${output.type.value}(0);`, '', - `t = ${input.getByOffset('inputOffset')}; value += (t * t);`, + `t = ${input.getByIndices('input_indices')}; value += (t * t);`, 'value = sqrt(value);', ]; runReduceProgram(context, 'ReduceL2', attributes, reduceOp); @@ -170,7 +171,7 @@ const reduceLogSumExpNaive = (context: ComputeContext, attributes: ReduceAttribu const reduceOp: ReduceOp = (input, output) => [`var value = ${output.type.storage}(0);`, '', - `value += exp(${input.getByOffset('inputOffset')});`, + `value += exp(${input.getByIndices('input_indices')});`, 'value = log(value);', ]; runReduceProgram(context, 'ReduceLogSumExp', attributes, reduceOp); @@ -182,14 +183,14 @@ const reduceMaxNaive = (context: ComputeContext, attributes: ReduceAttributes): const idxZero = []; for (let k = 0; k < input.rank; k++) { if (axes.indexOf(k) >= 0 || axes.length === 0) { - idxZero.push(input.indicesSet('inputIndices', k, 0)); + idxZero.push(input.indicesSet('input_indices', k, 0)); } } return [ `${idxZero.join('\n')}`, - `var value = ${input.getByOffset('inputOffset')};`, - `value = max(value, ${input.getByOffset('inputOffset')});`, + `var value = ${input.getByIndices('input_indices')};`, + `value = max(value, ${input.getByIndices('input_indices')});`, '', ]; }; @@ -210,7 +211,7 @@ const reduceMeanNaive = (context: ComputeContext, attributes: ReduceAttributes): return [ 'var sum = f32(0);', '', - `sum += f32(${input.getByOffset('inputOffset')});`, + `sum += f32(${input.getByIndices('input_indices')});`, `let value = ${output.type.value}(sum / ${size});`, ]; }; @@ -223,14 +224,14 @@ const reduceMinNaive = (context: ComputeContext, attributes: ReduceAttributes): const idxZero = []; for (let k = 0; k < input.rank; k++) { if (axes.indexOf(k) >= 0 || axes.length === 0) { - idxZero.push(`inputIndices[${k}] = 0;`); // first element + idxZero.push(`input_indices[${k}] = 0;`); // first element } } return [ `${idxZero.join('\n')}`, - `var value = ${input.getByOffset('inputOffset')};`, - `value = min(value, ${input.getByOffset('inputOffset')});`, + `var value = ${input.getByIndices('input_indices')};`, + `value = min(value, ${input.getByIndices('input_indices')});`, '', ]; }; @@ -242,7 +243,7 @@ const reduceProdNaive = (context: ComputeContext, attributes: ReduceAttributes): const reduceOp: ReduceOp = (input, output) => [`var value = ${output.type.storage}(1);`, '', - `value *= ${input.getByOffset('inputOffset')};`, + `value *= ${input.getByIndices('input_indices')};`, '', ]; runReduceProgram(context, 'ReduceProd', attributes, reduceOp); @@ -253,7 +254,7 @@ const reduceSumNaive = (context: ComputeContext, attributes: ReduceAttributes): const reduceOp: ReduceOp = (input, output) => [`var value = ${output.type.storage}(0);`, '', - `value += ${input.getByOffset('inputOffset')};`, + `value += ${input.getByIndices('input_indices')};`, '', ]; runReduceProgram(context, 'ReduceSum', attributes, reduceOp); @@ -264,7 +265,7 @@ const reduceSumSquareNaive = (context: ComputeContext, attributes: ReduceAttribu const reduceOp: ReduceOp = (input, output) => [`var t = ${output.type.value}(0); var value = ${output.type.value}(0);`, '', - `t = ${input.getByOffset('inputOffset')}; value += t * t;`, + `t = ${input.getByIndices('input_indices')}; value += t * t;`, '', ]; runReduceProgram(context, 'ReduceSumSquare', attributes, reduceOp); @@ -273,7 +274,7 @@ const reduceSumSquareNaive = (context: ComputeContext, attributes: ReduceAttribu const useNaiveReduceMethod = (shape: readonly number[], axes: readonly number[], noopWithEmptyAxes: boolean): boolean => { if (axes.length === 0) { - return noopWithEmptyAxes ? true : false; + return noopWithEmptyAxes; } let outputSize = 1; @@ -289,7 +290,7 @@ const useNaiveReduceMethod = // The condition data is very rough, although considering the count of Execution Unit (EU), the potential // work groups in a EU and the counts of loops in the naive and shared methods, also doing experiments // on some machines. - return reduceSize < 32 && outputSize > 1024 ? true : false; + return reduceSize < 32 && outputSize > 1024; }; export const reduceMean = (context: ComputeContext, attributes: ReduceAttributes): void => { @@ -371,6 +372,3 @@ export const reduceLogSum = (context: ComputeContext, attributes: ReduceAttribut reduceLogSumShared(context, attributes); } }; - -export const parseReduceAttributes = (attributes: Record): ReduceAttributes => - createAttributeWithCacheKey(attributes as Omit); diff --git a/js/web/lib/wasm/jsep/webgpu/ops/resize.ts b/js/web/lib/wasm/jsep/webgpu/ops/resize.ts index 973a607f9377e..e1369c2c2b43b 100644 --- a/js/web/lib/wasm/jsep/webgpu/ops/resize.ts +++ b/js/web/lib/wasm/jsep/webgpu/ops/resize.ts @@ -7,7 +7,7 @@ import {ShapeUtil} from '../../util'; import {AttributeWithCacheKey, createAttributeWithCacheKey} from '../attribute-with-cache-key'; import {ComputeContext, ProgramInfo} from '../types'; -import {IndicesHelper, inputVariable, outputVariable, ShaderHelper} from './common'; +import {createTensorShapeVariables, getElementAt, IndicesHelper, inputVariable, outputVariable, ShaderHelper} from './common'; type CoordinateTransformMode = 'half_pixel'|'asymmetric'|'pytorch_half_pixel'|'tf_half_pixel_for_nn'|'align_corners'| 'tf_crop_and_resize'|'half_pixel_symmetric'; @@ -245,69 +245,67 @@ const adjustOutputShape = (inputShape: readonly number[], scales: number[], attr }; const calculateOriginalIndicesFromOutputIndices = - (output: IndicesHelper, inputShape: readonly number[], outputShape: readonly number[], scales: readonly number[], - roi: readonly number[]): string => ` - fn calculateOriginalIndicesFromOutputIndices(outputIndices: ${output.type.indices}) -> array<${ + (output: IndicesHelper, inputShape: readonly number[], outputShape: readonly number[], scalesLength: number, + roiLength: number): string => ` + fn calculateOriginalIndicesFromOutputIndices(output_indices: ${output.type.indices}) -> array<${ output.type.value}, ${outputShape.length}> { - const inputShape = array(${inputShape.map(i => `${i}u`).join(',')}); - const outputShape = array(${outputShape.map(i => `${i}u`).join(',')}); - const scales = array<${output.type.value}, ${scales.length}>(${scales.map(i => `${i}f`).join(',')}); - const roi = array<${output.type.value}, ${roi.length}>(${roi.map(i => `${i}f`).join(',')}); - var originalIndices: array<${output.type.value}, ${outputShape.length}>; + var original_indices: array<${output.type.value}, ${outputShape.length}>; for (var i:u32 = 0; i < ${outputShape.length}; i++) { - var outputIndex = ${outputShape.length === 1 ? 'outputIndices' : 'outputIndices[i]'}; - if (scales[i] == 1.0) { - originalIndices[i] = ${output.type.value}(outputIndex); + var output_index = ${output.type.value}(${output.indicesGet('output_indices', 'i')}); + var scale = ${getElementAt('uniforms.scales', 'i', scalesLength)}; + var roi_low = ${getElementAt('uniforms.roi', 'i', roiLength)}; + var roi_hi = ${getElementAt('uniforms.roi', `i + ${inputShape.length}`, roiLength)}; + if (scale == 1.0) { + original_indices[i] = output_index; } else { - originalIndices[i] = getOriginalCoordinateFromResizedCoordinate(${output.type.value}(outputIndex), scales[i], - ${output.type.value}(outputShape[i]), ${output.type.value}(inputShape[i]), roi[i], roi[i + ${ - inputShape.length}]); + var input_shape_i = ${output.type.value}(${getElementAt('uniforms.input_shape', 'i', inputShape.length)}); + var output_shape_i = ${output.type.value}(${getElementAt('uniforms.output_shape', 'i', outputShape.length)}); + original_indices[i] = getOriginalCoordinateFromResizedCoordinate(output_index, scale, output_shape_i, + input_shape_i, roi_low, roi_hi); } } - return originalIndices; + return original_indices; }`; const calculateInputIndicesFromOutputIndices = (input: IndicesHelper, output: IndicesHelper, inputShape: readonly number[], outputShape: readonly number[], - scales: readonly number[], roi: readonly number[], useExtrapolation: boolean): string => ` - fn calculateInputIndicesFromOutputIndices(outputIndices: ${output.type.indices}) -> ${input.type.indices} { - const inputShape = array(${inputShape.map(i => `${i}u`).join(',')}); - const outputShape = array(${outputShape.map(i => `${i}u`).join(',')}); - const scales = array<${input.type.value}, ${scales.length}>(${scales.map(i => `${i}`).join(',')}); - const roi = array<${input.type.value}, ${roi.length}>(${roi.map(i => `${i}`).join(',')}); - var inputIndices: ${input.type.indices}; - for (var i:u32 = 0; i < ${outputShape.length}; i++) { - var outputIndex = ${outputShape.length === 1 ? 'outputIndices' : 'outputIndices[i]'}; - var inputIndex: u32; - if (scales[i] == 1.0) { - inputIndex = outputIndex; - } else { - var original_idx = getOriginalCoordinateFromResizedCoordinate(${input.type.value}(outputIndex), scales[i], - ${input.type.value}(outputShape[i]), ${input.type.value}(inputShape[i]), roi[i], roi[i + ${ - inputShape.length}]); - if (!${useExtrapolation} || (original_idx >= 0 && original_idx < ${input.type.value}(inputShape[i]))) { - if (original_idx < 0) { - inputIndex = 0; - } else if (original_idx > (${input.type.value}(inputShape[i]) - 1)) { - inputIndex = inputShape[i] - 1; - } else { - inputIndex = u32(getNearestPixelFromOriginal(original_idx, scales[i] < 1)); - } + scalesLength: number, roiLength: number, useExtrapolation: boolean): string => ` + fn calculateInputIndicesFromOutputIndices(output_indices: ${output.type.indices}) -> ${input.type.indices} { + var input_indices: ${input.type.indices}; + for (var i:u32 = 0; i < ${outputShape.length}; i++) { + var output_index = ${output.type.value}(${output.indicesGet('output_indices', 'i')}); + var input_index: u32; + var scale = ${getElementAt('uniforms.scales', 'i', scalesLength)}; + if (scale == 1.0) { + input_index = u32(output_index); + } else { + var roi_low = ${getElementAt('uniforms.roi', 'i', roiLength)}; + var roi_hi = ${getElementAt('uniforms.roi', `i + ${inputShape.length}`, roiLength)}; + var input_shape_i = ${output.type.value}(${getElementAt('uniforms.input_shape', 'i', inputShape.length)}); + var output_shape_i = ${output.type.value}(${getElementAt('uniforms.output_shape', 'i', outputShape.length)}); + var original_idx = getOriginalCoordinateFromResizedCoordinate(output_index, scale, output_shape_i, + input_shape_i, roi_low, roi_hi); + if (!${useExtrapolation} || (original_idx >= 0 && original_idx < input_shape_i)) { + if (original_idx < 0) { + input_index = 0; + } else if (original_idx > (input_shape_i - 1)) { + input_index = u32(input_shape_i) - 1; } else { - inputIndex = u32(original_idx); + input_index = u32(getNearestPixelFromOriginal(original_idx, scale < 1)); } + } else { + input_index = u32(original_idx); } - ${input.indicesSet('inputIndices', 'i', 'inputIndex')} } - return inputIndices; + ${input.indicesSet('input_indices', 'i', ' input_index')} + } + return input_indices; }`; - const checkInputIndices = (input: IndicesHelper, inputShape: readonly number[]): string => ` - fn checkInputIndices(inputIndices: ${input.type.indices}) -> bool { - const inputShape = array(${inputShape.map(i => `${i}u`).join(',')}); + fn checkInputIndices(input_indices: ${input.type.indices}) -> bool { for (var i:u32 = 0; i < ${inputShape.length}; i++) { - var inputIndex = ${inputShape.length === 1 ? 'inputIndices' : 'inputIndices[i]'}; - if (inputIndex < 0 || inputIndex >= inputShape[i]) { + var input_index = ${input.indicesGet('input_indices', 'i')}; + if (input_index < 0 || input_index >= ${getElementAt('uniforms.input_shape', 'i', inputShape.length)}) { return false; } } @@ -322,18 +320,18 @@ const bilinearInterpolation = const dType = input.type.value; return ` fn getInputValue(batch: u32, channel: u32, row: u32, col: u32) -> ${dType} { - var inputIndices: ${input.type.indices}; - inputIndices[${heightIdx}] = max(0, min(row, ${inputShape[heightIdx]} - 1)); - inputIndices[${widthIdx}] = max(0, min(col, ${inputShape[widthIdx]} - 1)); + var input_indices: ${input.type.indices}; + ${input.indicesSet('input_indices', heightIdx, `max(0, min(row, ${inputShape[heightIdx]} - 1))`)}; + ${input.indicesSet('input_indices', widthIdx, `max(0, min(col, ${inputShape[widthIdx]} - 1))`)}; if (${inputShape.length} > 2) { - inputIndices[${channelIdx}] = channel; - inputIndices[${batchIdx}] = batch; + ${input.indicesSet('input_indices', channelIdx, 'channel')}; + ${input.indicesSet('input_indices', batchIdx, 'batch')}; }; - return input[${input.indicesToOffset('inputIndices')}]; + return ${input.getByIndices('input_indices')}; } - fn bilinearInterpolation(outputIndices: ${output.type.indices}) -> ${dType} { - var originalIndices = calculateOriginalIndicesFromOutputIndices(outputIndices); + fn bilinearInterpolation(output_indices: ${output.type.indices}) -> ${dType} { + var originalIndices = calculateOriginalIndicesFromOutputIndices(output_indices); var row:${dType} = originalIndices[${heightIdx}]; var col:${dType} = originalIndices[${widthIdx}]; if (${useExtrapolation} && (row < 0 || row > (${inputShape[heightIdx]} - 1) || col < 0 || col > ${ @@ -373,10 +371,10 @@ const bicubicInterpolation = const createCubicInterpolationFunction = (idx: number): string => { const direction = idx === heightIdx ? 'row' : 'col'; return ` - fn ${direction}CubicInterpolation(inputIndices: ${input.type.indices}, outputIndices: ${ + fn ${direction}CubicInterpolation(input_indices: ${input.type.indices}, output_indices: ${ output.type.indices}) -> ${dType} { - var outputIndex = ${outputShape.length === 1 ? 'outputIndices' : `outputIndices[${idx}]`}; - var originalIdx: ${dType} = getOriginalCoordinateFromResizedCoordinate(${dType}(outputIndex), ${scales[idx]}, + var output_index = ${output.indicesGet('output_indices', idx)}; + var originalIdx: ${dType} = getOriginalCoordinateFromResizedCoordinate(${dType}(output_index), ${scales[idx]}, ${dType}(${outputShape[idx]}), ${dType}(${inputShape[idx]}), ${roi[idx]}, ${roi[idx]} + ${inputShape.length}); var fractOriginalIdx: ${dType} = originalIdx - floor(originalIdx); var coefs = getCubicInterpolationCoefs(fractOriginalIdx); @@ -397,10 +395,11 @@ const bicubicInterpolation = ${direction} = max(0, min(${direction}, ${inputShape[idx]} - 1)); } } - var inputIndicesCopy: ${input.type.indices} = inputIndices; - inputIndicesCopy[${idx}] = u32(${direction}); - data[i + 1] = ${idx === heightIdx ? `input[${input.indicesToOffset('inputIndicesCopy')}];` : ` - rowCubicInterpolation(inputIndicesCopy, outputIndices);`} + var input_indices_copy: ${input.type.indices} = input_indices; + ${input.indicesSet('input_indices_copy', idx, `u32(${direction})`)}; + data[i + 1] = ${ + idx === heightIdx ? input.getByIndices('input_indices_copy') : + 'rowCubicInterpolation(input_indices_copy, output_indices)'}; } return cubicInterpolation1D(data, coefs); }`; @@ -429,9 +428,9 @@ const bicubicInterpolation = return (x[0] * coefs[0] + x[1] * coefs[1]+ x[2] * coefs[2]+ x[3] * coefs[3]) / coefsSum; } - fn bicubicInterpolation(outputIndices: ${output.type.indices}) -> ${dType} { - var inputIndices: ${input.type.indices} = outputIndices; - return colCubicInterpolation(inputIndices, outputIndices); + fn bicubicInterpolation(output_indices: ${output.type.indices}) -> ${dType} { + var input_indices: ${input.type.indices} = output_indices; + return colCubicInterpolation(input_indices, output_indices); } `; }; @@ -450,8 +449,8 @@ const createResizeProgramInfo = outputShape = adjustOutputShape(inputShape, scales, attributes); } } - const output = outputVariable('output', inputTensor.dataType, outputShape); - const input = inputVariable('input', inputTensor.dataType, inputShape); + const output = outputVariable('output', inputTensor.dataType, outputShape.length); + const input = inputVariable('input', inputTensor.dataType, inputShape.length); const outputSize = ShapeUtil.size(outputShape); const noScale = inputShape.length === outputShape.length && inputShape.every((d, i) => d === outputShape[i]); const useExtrapolation = attributes.coordinateTransformMode === 'tf_crop_and_resize'; @@ -467,11 +466,11 @@ const createResizeProgramInfo = ${getNearestPixelFromOriginal(attributes.nearestMode, opsetVersion, dataType)}; ${ calculateInputIndicesFromOutputIndices( - input, output, inputShape, outputShape, scales, roi, useExtrapolation)}; + input, output, inputShape, outputShape, scales.length, roi.length, useExtrapolation)}; `; case 'linear': return ` - ${calculateOriginalIndicesFromOutputIndices(output, inputShape, outputShape, scales, roi)}; + ${calculateOriginalIndicesFromOutputIndices(output, inputShape, outputShape, scales.length, roi.length)}; ${ bilinearInterpolation( input, output, inputShape, scales, useExtrapolation, attributes.extrapolationValue)}; @@ -488,25 +487,29 @@ const createResizeProgramInfo = } })()}; `} - ${shaderHelper.declareVariables(input, output)} + ${ + shaderHelper.registerUniform('output_size', 'u32') + .registerUniform('scales', 'f32', scales.length) + .registerUniform('roi', 'f32', roi.length) + .declareVariables(input, output)} ${shaderHelper.mainStart()} - ${shaderHelper.guardAgainstOutOfBoundsWorkgroupSizes(outputSize)} + ${shaderHelper.guardAgainstOutOfBoundsWorkgroupSizes('uniforms.output_size')} ${noScale ? 'output[global_idx] = input[global_idx];' : ` - let outputIndices = ${output.offsetToIndices('global_idx')}; - var inputIndices: ${input.type.indices}; + let output_indices = ${output.offsetToIndices('global_idx')}; + var input_indices: ${input.type.indices}; ${(() => { switch (attributes.mode) { case 'nearest': - return `inputIndices = calculateInputIndicesFromOutputIndices(outputIndices); - if (checkInputIndices(inputIndices)) { - output[global_idx] = input[${input.indicesToOffset('inputIndices')}]; + return `input_indices = calculateInputIndicesFromOutputIndices(output_indices); + if (checkInputIndices(input_indices)) { + output[global_idx] = ${input.getByIndices('input_indices')}; } else { output[global_idx] = ${attributes.extrapolationValue}; }`; case 'linear': - return 'output[global_idx] = bilinearInterpolation(outputIndices);'; + return 'output[global_idx] = bilinearInterpolation(output_indices);'; case 'cubic': - return 'output[global_idx] = bicubicInterpolation(outputIndices);'; + return 'output[global_idx] = bicubicInterpolation(output_indices);'; default: throw Error(`Unsupported resize mode: ${attributes.mode}`); } @@ -518,12 +521,20 @@ const createResizeProgramInfo = name: 'Resize', shaderCache: { hint: `${attributes.cacheKey}|${opsetVersion}|${scales.length > 0 ? scales : ''}|${ - sizes.length > 0 ? sizes : ''}|${noScale}` + sizes.length > 0 ? sizes : ''}|${roi.length > 0 ? roi : ''}|${noScale}`, + inputDependencies: ['rank'] }, getShaderSource, getRunData: () => ({ outputs: [{dims: outputShape, dataType: inputTensor.dataType}], - dispatchGroup: {x: Math.ceil(outputSize / 64 /* workgroup size */)} + dispatchGroup: {x: Math.ceil(outputSize / 64 /* workgroup size */)}, + programUniforms: [ + {type: 'uint32', data: outputSize}, + {type: 'float32', data: scales}, + {type: 'float32', data: roi}, + ...createTensorShapeVariables(inputShape), + ...createTensorShapeVariables(outputShape), + ] }) }; }; diff --git a/js/web/lib/wasm/jsep/webgpu/ops/slice.ts b/js/web/lib/wasm/jsep/webgpu/ops/slice.ts index 43d4e5356d1d9..5212c6475dce0 100644 --- a/js/web/lib/wasm/jsep/webgpu/ops/slice.ts +++ b/js/web/lib/wasm/jsep/webgpu/ops/slice.ts @@ -77,25 +77,25 @@ const fixStartEndValues = }; const calculateInputIndicesImpl = - (input: IndicesHelper, output: IndicesHelper, inputShape: readonly number[], outputShape: readonly number[]): - string => `fn calculateInputIndices(outputIndices: ${output.type.indices}) -> ${input.type.indices} { - var inputIndices: ${input.type.indices}; + (input: IndicesHelper, output: IndicesHelper, inputShape: readonly number[]): string => + `fn calculateInputIndices(output_indices: ${output.type.indices}) -> ${input.type.indices} { + var input_indices: ${input.type.indices}; var carry = 0u; for (var i = ${inputShape.length}; i >= 0; i--) { let input_shape_i = ${getElementAt('uniforms.input_shape', 'i', inputShape.length)}; let steps_i = ${getElementAt('uniforms.steps', 'i', inputShape.length)}; let signs_i = ${getElementAt('uniforms.signs', 'i', inputShape.length)}; let starts_i = ${getElementAt('uniforms.starts', 'i', inputShape.length)}; - var outputIndex = ${outputShape.length === 1 ? 'outputIndices' : 'outputIndices[i]'}; - var inputIndex = outputIndex * steps_i + starts_i + carry; - carry = inputIndex / input_shape_i; - inputIndex = inputIndex % input_shape_i; + var output_index = ${output.indicesGet('output_indices', 'i')}; + var input_index = output_index * steps_i + starts_i + carry; + carry = input_index / input_shape_i; + input_index = input_index % input_shape_i; if (signs_i < 0) { - inputIndex = input_shape_i - inputIndex - 1u + starts_i; + input_index = input_shape_i - input_index - 1u + starts_i; } - ${inputShape.length === 1 ? 'inputIndices' : 'inputIndices[i]'} = inputIndex; + ${input.indicesSet('input_indices', 'i', 'input_index')}; } - return inputIndices; + return input_indices; }`; const createSliceProgramInfo = (inputs: readonly TensorView[], attributes: SliceAttributes): ProgramInfo => { @@ -162,12 +162,12 @@ const createSliceProgramInfo = (inputs: readonly TensorView[], attributes: Slice const getShaderSource = (shaderHelper: ShaderHelper) => ` ${shaderHelper.registerUniforms(uniforms).declareVariables(input, output)} - ${calculateInputIndicesImpl(input, output, inputShape, outputShape)} + ${calculateInputIndicesImpl(input, output, inputShape)} ${shaderHelper.mainStart()} ${shaderHelper.guardAgainstOutOfBoundsWorkgroupSizes('uniforms.outputSize')} - let outputIndices = ${output.offsetToIndices('global_idx')}; - let inputIndices = calculateInputIndices(outputIndices); - ${output.setByOffset('global_idx', input.getByIndices('inputIndices'))} + let output_indices = ${output.offsetToIndices('global_idx')}; + let input_indices = calculateInputIndices(output_indices); + ${output.setByOffset('global_idx', input.getByIndices('input_indices'))} }`; return { name: 'Slice', diff --git a/js/web/lib/wasm/jsep/webgpu/ops/split.ts b/js/web/lib/wasm/jsep/webgpu/ops/split.ts index fd60d81b87ae1..b8582614fa214 100644 --- a/js/web/lib/wasm/jsep/webgpu/ops/split.ts +++ b/js/web/lib/wasm/jsep/webgpu/ops/split.ts @@ -4,9 +4,9 @@ import {TensorView} from '../../tensor-view'; import {ShapeUtil} from '../../util'; import {AttributeWithCacheKey, createAttributeWithCacheKey} from '../attribute-with-cache-key'; -import {ComputeContext, ProgramInfo, TensorInfo} from '../types'; +import {ComputeContext, ProgramInfo, ProgramUniform, TensorInfo} from '../types'; -import {IndicesHelper, inputVariable, outputVariable, ShaderHelper} from './common'; +import {createTensorShapeVariables, getElementAt, IndicesHelper, inputVariable, outputVariable, ShaderHelper} from './common'; export interface SplitAttributes extends AttributeWithCacheKey { readonly axis: number; @@ -34,7 +34,7 @@ const createSplitAttributesFromInputs = const calculateOutputIndexImpl = (numberOfTensors: number): string => ` fn calculateOutputIndex(index: u32) -> u32 { for (var i: u32 = 0u; i < ${numberOfTensors}u; i += 1u ) { - if (index < sizeInConcatAxis[i]) { + if (index < ${getElementAt('uniforms.size_in_split_axis', 'i', numberOfTensors)}) { return i; } } @@ -48,15 +48,15 @@ const writeBufferDataImpl = (outputs: readonly IndicesHelper[]) => { if (numberOfTensors === 1) { codeLines.push(returnSnippet); } else if (i === 0) { - codeLines.push(`if (outputNumber == ${i}u) { ${returnSnippet} }`); + codeLines.push(`if (output_number == ${i}u) { ${returnSnippet} }`); } else if (i === numberOfTensors - 1) { codeLines.push(`else { ${returnSnippet} }`); } else { - codeLines.push(`else if (outputNumber == ${i}) { ${returnSnippet} }`); + codeLines.push(`else if (output_number == ${i}) { ${returnSnippet} }`); } } return ` - fn writeBufferData(outputNumber: u32, indices: ${outputs[0].type.indices}, global_idx: u32) { + fn writeBufferData(output_number: u32, indices: ${outputs[0].type.indices}, global_idx: u32) { ${codeLines.join('\n')} }`; }; @@ -65,48 +65,54 @@ const createSplitProgramInfo = (inputs: readonly TensorView[], attributes: Split const inputShape = inputs[0].dims; const inputSize = ShapeUtil.size(inputShape); const dataType = inputs[0].dataType; - const rank = inputShape.length; - const axis = attributes.axis; - const adjustedAxis = (axis < 0) ? inputShape.length + axis : axis; + const axis = ShapeUtil.normalizeAxis(attributes.axis, inputShape.length); const outputs = new Array(attributes.numOutputs); const input = inputVariable('input', dataType, inputShape); - const sizeInConcatAxis = new Array(attributes.numOutputs); + const sizeInSplitAxis = new Array(attributes.numOutputs); const outputsTensorInfo: TensorInfo[] = []; const outputShapes: number[][] = []; let previousSum = 0; + const programUniforms: ProgramUniform[] = [{type: 'uint32', data: inputSize}]; for (let i = 0; i < attributes.numOutputs; i++) { previousSum += attributes.splitSizes[i]; - sizeInConcatAxis[i] = previousSum; + sizeInSplitAxis[i] = previousSum; const outputShape = inputShape.slice(); outputShape[attributes.axis] = attributes.splitSizes[i]; outputShapes.push(outputShape); - outputs[i] = outputVariable(`output${i}`, dataType, outputShapes[i]); + outputs[i] = outputVariable(`output${i}`, dataType, outputShape); outputsTensorInfo.push({dims: outputShapes[i], dataType: inputs[0].dataType}); } - const indicesAxis = rank < 2 ? 'indices' : `indices[${adjustedAxis}]`; + programUniforms.push({type: 'uint32', data: sizeInSplitAxis}); + programUniforms.push(...createTensorShapeVariables(inputShape)); + outputShapes.forEach((outputShape) => programUniforms.push(...createTensorShapeVariables(outputShape))); const getShaderSource = (shaderHelper: ShaderHelper) => ` - ${shaderHelper.declareVariables(input, ...outputs)} - const sizeInConcatAxis = array(${sizeInConcatAxis.map(i => `${i}u`).join(',')}); - ${calculateOutputIndexImpl(sizeInConcatAxis.length)} + ${ + shaderHelper.registerUniform('input_size', 'u32') + .registerUniform('size_in_split_axis', 'u32', sizeInSplitAxis.length) + .declareVariables(input, ...outputs)} + ${calculateOutputIndexImpl(sizeInSplitAxis.length)} ${writeBufferDataImpl(outputs)} ${shaderHelper.mainStart()} - ${shaderHelper.guardAgainstOutOfBoundsWorkgroupSizes(inputSize)} + ${shaderHelper.guardAgainstOutOfBoundsWorkgroupSizes('uniforms.input_size')} var indices = ${input.offsetToIndices('global_idx')}; - let outputNumber = calculateOutputIndex(${indicesAxis}); - if (outputNumber != 0) { - ${indicesAxis} -= sizeInConcatAxis[outputNumber - 1u]; + var index = ${input.indicesGet('indices', axis)}; + let output_number = calculateOutputIndex(index); + if (output_number != 0) { + index -= ${getElementAt('uniforms.size_in_split_axis', 'output_number - 1u', sizeInSplitAxis.length)}; + ${input.indicesSet('indices', axis, 'index')}; } - writeBufferData(outputNumber, indices, global_idx); + writeBufferData(output_number, indices, global_idx); }`; return { name: 'Split', - shaderCache: {hint: attributes.cacheKey}, + shaderCache: {hint: attributes.cacheKey, inputDependencies: ['rank']}, getShaderSource, getRunData: () => ({ outputs: outputsTensorInfo, dispatchGroup: {x: Math.ceil(inputSize / 64 /* workgroup size */)}, + programUniforms }) }; };