Skip to content

Commit

Permalink
[js/webgpu] Add hardSigmoid activation for fusedConv (microsoft#19233)
Browse files Browse the repository at this point in the history
### Description
Add hardSigmoid activation for fusedConv. It will be used by
mobilenetv3-small-100 model.
  • Loading branch information
qjia7 authored Jan 31, 2024
1 parent e5cf465 commit c602260
Show file tree
Hide file tree
Showing 7 changed files with 206 additions and 56 deletions.
11 changes: 3 additions & 8 deletions web/lib/wasm/jsep/webgpu/ops/3rd-party/conv2d_mm_webgpu.ts
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ import {TensorView} from '../../../tensor-view';
import {ProgramInfo, ProgramInputTensorInfoDependency, ProgramUniform} from '../../types';
import {createTensorShapeVariables, inputVariable, outputVariable, ShaderHelper, tensorTypeToWsglStorageType, UniformsArrayType} from '../common';
import {ConvAttributes} from '../conv';
import {getActivationSnippet} from '../fuse-utils';
import {appendActivationUniforms, appendActivationUniformsData, getActivationSnippet} from '../fuse-utils';

import {biasSnippet, typeSnippet} from './activation_util';
import {utilFunctions} from './conv_util';
Expand Down Expand Up @@ -193,10 +193,7 @@ export const createConv2DMatMulProgramInfo =
{type: 'int32', data: [attributes.pads[0], attributes.pads[1]]}, {type: 'int32', data: attributes.strides},
{type: 'int32', data: attributes.dilations}
];
if (attributes.activation === 'Clip') {
programUniforms.push(
{type: 'float32', data: attributes.clipMax!}, {type: 'float32', data: attributes.clipMin!});
}
appendActivationUniformsData(attributes, programUniforms);
programUniforms.push(
...createTensorShapeVariables(inputs[0].dims), ...createTensorShapeVariables(inputs[1].dims));
const inputDependencies: ProgramInputTensorInfoDependency[] = ['rank', 'rank'];
Expand All @@ -212,9 +209,7 @@ export const createConv2DMatMulProgramInfo =
{name: 'pad', type: 'i32', length: 2}, {name: 'stride', type: 'i32', length: 2},
{name: 'dilation', type: 'i32', length: 2}
];
if (attributes.activation === 'Clip') {
uniforms.push({name: 'clip_max', type: 'f32'}, {name: 'clip_min', type: 'f32'});
}
appendActivationUniforms(attributes, uniforms);

// TODO: support component 2, 3.
const components = isVec4 ? 4 : 1;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ import {TensorView} from '../../../tensor-view';
import {ProgramInfo, ProgramInputTensorInfoDependency, ProgramUniform} from '../../types';
import {createTensorShapeVariables, inputVariable, outputVariable, ShaderHelper, UniformsArrayType} from '../common';
import {ConvTransposeAttributes} from '../conv-transpose';
import {getActivationSnippet} from '../fuse-utils';
import {appendActivationUniforms, appendActivationUniformsData, getActivationSnippet} from '../fuse-utils';

import {biasSnippet, typeSnippet} from './activation_util';
import {utilFunctions} from './conv_util';
Expand Down Expand Up @@ -201,10 +201,7 @@ export const createConv2DTransposeMatMulProgramInfo =
{type: 'int32', data: attributes.strides}, {type: 'int32', data: attributes.dilations},
{type: 'int32', data: filterDims}, {type: 'int32', data: pads}
];
if (attributes.activation === 'Clip') {
programUniforms.push(
{type: 'float32', data: attributes.clipMax!}, {type: 'float32', data: attributes.clipMin!});
}
appendActivationUniformsData(attributes, programUniforms);
programUniforms.push(
...createTensorShapeVariables(inputs[0].dims), ...createTensorShapeVariables(inputs[1].dims));

Expand Down Expand Up @@ -237,9 +234,7 @@ export const createConv2DTransposeMatMulProgramInfo =
{name: 'filter_dims', type: 'i32', length: filterDims.length},
{name: 'pads', type: 'i32', length: pads.length}
];
if (attributes.activation === 'Clip') {
uniforms.push({name: 'clip_max', type: 'f32'}, {name: 'clip_min', type: 'f32'});
}
appendActivationUniforms(attributes, uniforms);
return `
${utilFunctions('uniforms.result_strides')}
${shaderHelper.registerUniforms(uniforms).declareVariables(...inputVariables, output)};
Expand Down
12 changes: 3 additions & 9 deletions web/lib/wasm/jsep/webgpu/ops/3rd-party/matmul_packed_webgpu.ts
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ import {TensorView} from '../../../tensor-view';
import {ShapeUtil} from '../../../util';
import {ProgramInfo, ProgramInputTensorInfoDependency, ProgramUniform} from '../../types';
import {createTensorShapeVariables, getBroadcastDims, IndicesHelper, inputVariable, internalVariable, outputVariable, ShaderHelper, tensorTypeToWsglStorageType, UniformsArrayType} from '../common';
import {getActivationSnippet, InternalActivationAttributes} from '../fuse-utils';
import {appendActivationUniforms, appendActivationUniformsData, getActivationSnippet, InternalActivationAttributes} from '../fuse-utils';

import {typeSnippet} from './activation_util';

Expand Down Expand Up @@ -449,11 +449,7 @@ export const createMatmulProgramInfo =
const outputShapeTemp = [batchSize, dimAOuter, dimBOuter / components];
const programUniforms: ProgramUniform[] =
[{type: 'int32', data: dimAOuter}, {type: 'int32', data: dimBOuter}, {type: 'int32', data: dimInner}];
if (activationAttributes.activation === 'Clip') {
programUniforms.push(
{type: 'float32', data: activationAttributes.clipMax!},
{type: 'float32', data: activationAttributes.clipMin!});
}
appendActivationUniformsData(activationAttributes, programUniforms);
programUniforms.push(
...createTensorShapeVariables(outerDims), ...createTensorShapeVariables(aShapeTemp),
...createTensorShapeVariables(bShapeTemp));
Expand Down Expand Up @@ -481,9 +477,7 @@ export const createMatmulProgramInfo =
}
const uniforms: UniformsArrayType =
[{name: 'dim_a_outer', type: 'i32'}, {name: 'dim_b_outer', type: 'i32'}, {name: 'dim_inner', type: 'i32'}];
if (activationAttributes.activation === 'Clip') {
uniforms.push({name: 'clip_max', type: 'f32'}, {name: 'clip_min', type: 'f32'});
}
appendActivationUniforms(activationAttributes, uniforms);
const applyActivation = getActivationSnippet(activationAttributes, output.type.value);
const declareFunctions = matMulReadWriteFnSource(
components, hasBias, applyActivation, [batchDims, A, B, output], [outerDimsA, outerDimsB, outerDims],
Expand Down
37 changes: 18 additions & 19 deletions web/lib/wasm/jsep/webgpu/ops/conv-grouped.ts
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ import {ProgramInfo, ProgramInputTensorInfoDependency, ProgramUniform} from '../

import {createTensorShapeVariables, getMaxComponents, inputVariable, outputVariable, ShaderHelper, UniformsArrayType} from './common';
import {calculateOutputShape, ConvAttributes} from './conv';
import {getActivationSnippet} from './fuse-utils';
import {appendActivationUniforms, appendActivationUniformsData, getActivationSnippet} from './fuse-utils';

/**
* naive grouped conv implementation, supports 1d/2d conv
Expand All @@ -32,10 +32,7 @@ export const createGroupedConvProgramInfo =
{type: 'uint32', data: [attributes.strides[0], attributes.strides[1]]},
{type: 'uint32', data: [attributes.pads[0], attributes.pads[1]]}, {type: 'uint32', data: outputChannelsPerGroup}
];
if (attributes.activation === 'Clip') {
programUniforms.push(
{type: 'float32', data: attributes.clipMax!}, {type: 'float32', data: attributes.clipMin!});
}
appendActivationUniformsData(attributes, programUniforms);
programUniforms.push(
...createTensorShapeVariables(xShape), ...createTensorShapeVariables(wShape),
...createTensorShapeVariables(outputShape));
Expand All @@ -61,9 +58,7 @@ export const createGroupedConvProgramInfo =
{name: 'strides', type: 'u32', length: 2}, {name: 'pads', type: 'u32', length: 2},
{name: 'output_channels_per_group', type: 'u32'}
];
if (attributes.activation === 'Clip') {
uniforms.push({name: 'clip_max', type: 'f32'}, {name: 'clip_min', type: 'f32'});
}
appendActivationUniforms(attributes, uniforms);
return `
${shaderHelper.registerUniforms(uniforms).declareVariables(...inputVars, output)}
Expand Down Expand Up @@ -132,10 +127,13 @@ export const createGroupedConvVectorizeProgramInfo =
const outputShapeInShader = [outputShape[0], outputShape[1], outputShape[2], outputShape[3] / components];

const programUniforms: ProgramUniform[] = [
{type: 'uint32', data: outputSize}, {type: 'int32', data: attributes.strides},
{type: 'int32', data: attributes.pads}, ...createTensorShapeVariables(xShape),
...createTensorShapeVariables(wShape), ...createTensorShapeVariables(outputShapeInShader)
{type: 'uint32', data: outputSize}, {type: 'int32', data: [attributes.strides[0], attributes.strides[1]]},
{type: 'int32', data: [attributes.pads[0], attributes.pads[1]]}
];
appendActivationUniformsData(attributes, programUniforms);
programUniforms.push(
...createTensorShapeVariables(xShape), ...createTensorShapeVariables(wShape),
...createTensorShapeVariables(outputShapeInShader));
const xNumber = (outputNumber - 1) * attributes.strides[1] + wShape[1];
const getShaderSource = (shaderHelper: ShaderHelper) => {
const output = outputVariable('output', inputs[0].dataType, outputShapeInShader.length, components);
Expand All @@ -147,13 +145,14 @@ export const createGroupedConvVectorizeProgramInfo =
inputVars.push(inputVariable('b', inputs[2].dataType, inputs[2].dims, components));
}
const processBias = hasBias ? 'value += b[output_channel];' : '';

const uniforms: UniformsArrayType = [
{name: 'output_size', type: 'u32'},
{name: 'strides', type: 'i32', length: 2},
{name: 'pads', type: 'i32', length: 2},
];
appendActivationUniforms(attributes, uniforms);
return `
${
shaderHelper.registerUniform('output_size', 'u32')
.registerUniform('strides', 'i32', 2)
.registerUniform('pads', 'i32', 2)
.declareVariables(...inputVars, output)}
${shaderHelper.registerUniforms(uniforms).declareVariables(...inputVars, output)}
${shaderHelper.mainStart()}
${shaderHelper.guardAgainstOutOfBoundsWorkgroupSizes('uniforms.output_size')}
let width0 = uniforms.output_shape[3];
Expand All @@ -173,7 +172,7 @@ export const createGroupedConvVectorizeProgramInfo =
// Use constant instead of uniform can give better performance for w's height/width.
for (var w_height: u32 = 0u; w_height < ${wShape[0]}; w_height++) {
let x_height = x_corner.x + i32(w_height);
if (x_height >= 0 || u32(x_height) < uniforms.x_shape[1]) {
if (x_height >= 0 && u32(x_height) < uniforms.x_shape[1]) {
for (var i = 0; i < ${xNumber}; i++) {
let x_width = x_corner.y + i;
if (x_width >= 0 && u32(x_width) < uniforms.x_shape[2]) {
Expand All @@ -185,7 +184,7 @@ export const createGroupedConvVectorizeProgramInfo =
for (var w_width: u32 = 0u; w_width < ${wShape[1]}; w_width++) {
let w_val = ${w.get('w_height', 'w_width', '0', 'output_channel')};
for (var i = 0u; i < ${outputNumber}u; i++) {
values[i] = fma(x_vals[i * ${attributes.strides[1]}u + w_width], w_val, values[i]);
values[i] = fma(x_vals[i * u32(uniforms.strides[1]) + w_width], w_val, values[i]);
}
}
}
Expand Down
35 changes: 32 additions & 3 deletions web/lib/wasm/jsep/webgpu/ops/fuse-utils.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,16 @@
// Licensed under the MIT License.

import {MAX_CLIP, MIN_CLIP} from '../../util';
import {ProgramUniform} from '../types';

import {UniformsArrayType} from './common';

export interface InternalActivationAttributes {
readonly activation: string;
readonly clipMin?: number;
readonly clipMax?: number;
readonly alpha?: number;
readonly beta?: number;
}

export const getActivationSnippet = (attributes: InternalActivationAttributes, valueType: string): string => {
Expand All @@ -17,17 +22,41 @@ export const getActivationSnippet = (attributes: InternalActivationAttributes, v
return `value = (${valueType}(1.0) / (${valueType}(1.0) + exp(-value)));`;
case 'Clip':
return `value = clamp(value, ${valueType}(uniforms.clip_min), ${valueType}(uniforms.clip_max));`;
case 'HardSigmoid':
return `value = max(${valueType}(0.0), min(${valueType}(1.0), ${valueType}(uniforms.alpha) * value + ${
valueType}(uniforms.beta)));`;
case '':
return '';
// TODO: adding other activations that can be fused.
default:
return '';
throw new Error(`Unsupported activation ${attributes.activation}`);
}
};

export const appendActivationUniformsData =
(attributes: InternalActivationAttributes, programUniform: ProgramUniform[]) => {
if (attributes.activation === 'Clip') {
programUniform.push({type: 'float32', data: attributes.clipMax!}, {type: 'float32', data: attributes.clipMin!});
} else if (attributes.activation === 'HardSigmoid') {
programUniform.push({type: 'float32', data: attributes.alpha!}, {type: 'float32', data: attributes.beta!});
}
};

export const appendActivationUniforms = (attributes: InternalActivationAttributes, uniforms: UniformsArrayType) => {
if (attributes.activation === 'Clip') {
uniforms.push({name: 'clip_max', type: 'f32'}, {name: 'clip_min', type: 'f32'});
} else if (attributes.activation === 'HardSigmoid') {
uniforms.push({name: 'alpha', type: 'f32'}, {name: 'beta', type: 'f32'});
}
};

export const parseInternalActivationAttributes =
(attributes: Record<string, unknown>|undefined): InternalActivationAttributes => {
const activation = attributes?.activation as string || '';

if (activation === 'Clip') {
if (activation === 'HardSigmoid') {
const [alpha, beta] = attributes?.activation_params as [number, number] || [0.2, 0.5];
return {activation, alpha, beta};
} else if (activation === 'Clip') {
const [clipMin, clipMax] = attributes?.activation_params as [number, number] || [MIN_CLIP, MAX_CLIP];
return {activation, clipMax, clipMin};
}
Expand Down
12 changes: 3 additions & 9 deletions web/lib/wasm/jsep/webgpu/ops/matmul.ts
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ import {ComputeContext, ProgramInfo, ProgramUniform} from '../types';

import {createMatmulProgramInfo} from './3rd-party/matmul_packed_webgpu';
import {createTensorShapeVariables, getBroadcastDims, getMaxComponents, IndicesHelper, inputVariable, internalVariable, outputVariable, ShaderHelper, UniformsArrayType,} from './common';
import {getActivationSnippet, InternalActivationAttributes} from './fuse-utils';
import {appendActivationUniforms, appendActivationUniformsData, getActivationSnippet, InternalActivationAttributes} from './fuse-utils';

export const createNaiveMatmulProgramInfo =
(inputs: readonly TensorView[], activationAttributes: InternalActivationAttributes, outputShape: readonly number[],
Expand All @@ -32,11 +32,7 @@ export const createNaiveMatmulProgramInfo =
{type: 'uint32', data: outputSize}, {type: 'uint32', data: M}, {type: 'uint32', data: N},
{type: 'uint32', data: K}
];
if (activationAttributes.activation === 'Clip') {
programUniforms.push(
{type: 'float32', data: activationAttributes.clipMax!},
{type: 'float32', data: activationAttributes.clipMin!});
}
appendActivationUniformsData(activationAttributes, programUniforms);
programUniforms.push(
...createTensorShapeVariables(outerDims), ...createTensorShapeVariables(aShape),
...createTensorShapeVariables(bShape));
Expand Down Expand Up @@ -69,9 +65,7 @@ export const createNaiveMatmulProgramInfo =
{name: 'output_size', type: 'u32'}, {name: 'M', type: 'u32'}, {name: 'N', type: 'u32'},
{name: 'K', type: 'u32'}
];
if (activationAttributes.activation === 'Clip') {
uniforms.push({name: 'clip_max', type: 'f32'}, {name: 'clip_min', type: 'f32'});
}
appendActivationUniforms(activationAttributes, uniforms);

const getIndices = (variable: IndicesHelper, broadCastDims: number[]) => {
const rank = variable.rank;
Expand Down
Loading

0 comments on commit c602260

Please sign in to comment.