diff --git a/lib/backends/webgl/ops/conv-pack.ts b/lib/backends/webgl/ops/conv-pack.ts index 414bf64d..1976494b 100644 --- a/lib/backends/webgl/ops/conv-pack.ts +++ b/lib/backends/webgl/ops/conv-pack.ts @@ -1,6 +1,7 @@ // Copyright (c) Microsoft Corporation. All rights reserved. // Licensed under the MIT license. +import {Attribute} from '../../../attribute'; import {Logger} from '../../../instrument'; import {Conv} from '../../../ops/conv'; import {Tensor} from '../../../tensor'; @@ -36,6 +37,11 @@ export class WebGLConvPacked extends Conv { const outputShape = WebGLConv.calcOutputShape(xshape, kshape, this.dilations, this.pads, this.strides); const im2col = new WebGLIm2ColPacked(outputShape, kshape, this.dilations, this.pads, this.strides); const matmul = new WebGLMatMulPacked(); + if (!!this.activation) { + const attributes = new Attribute(undefined); + attributes.set('__internal_activation', 'string', (this.activation)); + matmul.initialize(attributes); + } const reshape = new WebGLReshapePacked(); // shape for kernel reshape const shape = diff --git a/lib/backends/webgl/ops/conv.ts b/lib/backends/webgl/ops/conv.ts index 0a99a62d..5600b7c9 100644 --- a/lib/backends/webgl/ops/conv.ts +++ b/lib/backends/webgl/ops/conv.ts @@ -10,7 +10,9 @@ import {getGlsl} from '../glsl-source'; import {WebGLInferenceHandler} from '../inference-handler'; import {Artifact, ProgramInfo, RunData, TextureLayout, WebGLOperator} from '../types'; import {WebGLContext} from '../webgl-context'; + import {WebGLConvPacked} from './conv-pack'; +import {getActicationSnippet} from './fuse_utils'; export class WebGLConv extends Conv { unpackedGroupedConvImpl: WebGLUnpackedGroupedConv; @@ -66,7 +68,7 @@ export class WebGLUnpackedGroupedConv extends Conv implements WebGLOperator { createProgramInfo(handler: WebGLInferenceHandler, inputs: Tensor[]): ProgramInfo { const hasBias = inputs.length > 2; - const processBias = hasBias ? `dotProd += getBias(output_channel);` : ``; + const processBias = hasBias ? `value += getBias(output_channel);` : ``; const xShape = inputs[0].dims.slice(); const wShape = inputs[1].dims.slice(); const outputChannelsPerGroup = wShape[0] / this.group; @@ -85,10 +87,12 @@ export class WebGLUnpackedGroupedConv extends Conv implements WebGLOperator { const outputShape = WebGLConv.calcOutputShape(xShape, wShape, this.dilations, this.pads, this.strides); const glsl = getGlsl(handler.session.backend.glContext.version); + const {activationFunction, applyActivation} = getActicationSnippet(this.activation); + const shaderSource = ` const ivec2 strides = ivec2(${this.strides[0]}, ${this.strides[1]}); const ivec2 pads = ivec2(${this.pads[0]}, ${this.pads[1]}); - + ${activationFunction} void main() { ivec4 coords = getOutputCoords(); int batch = coords.x; @@ -96,7 +100,7 @@ export class WebGLUnpackedGroupedConv extends Conv implements WebGLOperator { ivec2 xRCCorner = coords.zw * strides - pads; int group_id = output_channel / ${outputChannelsPerGroup}; - float dotProd = 0.0; + float value = 0.0; for (int wInChannel = 0; wInChannel < ${wShape[1]}; wInChannel++) { int input_channel = group_id * ${wShape[1]} + wInChannel; for (int wHeight = 0; wHeight < ${wShape[2]}; wHeight++) { @@ -114,12 +118,13 @@ export class WebGLUnpackedGroupedConv extends Conv implements WebGLOperator { float xVal = getX(batch, input_channel, xWidth, xHeight); float wVal = getW(output_channel, wInChannel, wWidth, wHeight); - dotProd += xVal*wVal; + value += xVal*wVal; } } } ${processBias} - ${glsl.output} = vec4(dotProd, .0, .0, .0); + ${applyActivation} + ${glsl.output} = vec4(value, .0, .0, .0); } `; return { @@ -215,7 +220,6 @@ export class WebGLUnpackedConv extends Conv { let blend = false; for (let k = 0; k < sharedDim; k += sharedDimReadSize) { Logger.verbose('MatMul2D', `k = ${k}, sharedDim: ${sharedDim}, readSize = ${sharedDimReadSize}`); - if (k === sharedDimReadSize) { blend = true; gl.enable(gl.BLEND); @@ -248,6 +252,7 @@ export class WebGLUnpackedConv extends Conv { const im2colDims = WebGLUnpackedConv.calcIm2ColDims(xshape, kshape, outputShape, 4); const outputLayout = inferenceHandler.createTextureLayoutFromShape( im2colDims, 4, [im2colDims[0], im2colDims[1], im2colDims[2], im2colDims[3] * 4], {breakAxis: 3}); + const shaderSource = ` const int XC = ${xshape[1]}; const int XH = ${xshape[2]}; @@ -263,13 +268,12 @@ export class WebGLUnpackedConv extends Conv { const int KHKW = KH*KW; const int XCKHKW = XC * KHKW; const int outputChannels = 4; - vec4 process(int indices[${rank}]) { int b = indices[0]; // batch size int oh = indices[1] * strideH - padH; //output height int ow = indices[2] * strideW - padW; //output width int p = indices[3] * outputChannels; //patch - vec4 v = vec4(0.0); + vec4 value = vec4(0.0); for(int i=0; i < outputChannels; ++i) { if(p < XCKHKW) { int patchC = p / KHKW; @@ -286,12 +290,12 @@ export class WebGLUnpackedConv extends Conv { xh2 < XH && xw2 >= 0 && xw2 < XW) { - v[i] = _X(x); + value[i] = _X(x); } } ++p; } - return v; + return value; } `; return { @@ -321,7 +325,7 @@ export class WebGLUnpackedConv extends Conv { const outputLayout = inferenceHandler.createTextureLayoutFromShape(outputShape); const initValue = (inputs.length < 3) ? '0.0' : '_B(b)'; const sharedDim = im2colLayout.shape[3]; - const blendEnabled = inferenceHandler.session.backend.glContext.isBlendSupported; + const blendEnabled = inferenceHandler.session.backend.glContext.isBlendSupported && !this.activation; const sharedDimReadSize = blendEnabled && inferenceHandler.session.backend.matmulMaxBatchSize ? this.calcSharedDimReadSize(inferenceHandler.session.backend.matmulMaxBatchSize, sharedDim) : sharedDim; @@ -329,8 +333,12 @@ export class WebGLUnpackedConv extends Conv { if (inputs.length === 3) { samplers.push('B'); } + + const {activationFunction, applyActivation} = getActicationSnippet(this.activation); + const glsl = getGlsl(inferenceHandler.session.backend.glContext.version); const shaderSource = ` + ${activationFunction} float process(int indices[${rank}]) { int b[1]; b[0] = indices[1]; @@ -341,15 +349,16 @@ export class WebGLUnpackedConv extends Conv { int im2colOffset = im2col[0] * ${im2colLayout.strides[0]} + im2col[1] * ${ im2colLayout.strides[1]} + im2col[2] * ${im2colLayout.strides[2]} + sharedDimOffset; int kernelOffset = indices[1] * ${kLayout.strides[0]} + sharedDimOffset; - float sum = sharedDimOffset == 0 ? ${initValue} : 0.0; + float value = sharedDimOffset == 0 ? ${initValue} : 0.0; for (int i = 0; i < ${sharedDimReadSize}; ++i) { vec2 im2colCoords = offsetToCoords(im2colOffset, ${im2colLayout.width}, ${im2colLayout.height}); vec2 kernelCoords = offsetToCoords(kernelOffset, ${kLayout.width}, ${kLayout.height}); - sum += dot(${glsl.texture2D}(Im2Col, im2colCoords), ${glsl.texture2D}(K, kernelCoords)); + value += dot(${glsl.texture2D}(Im2Col, im2colCoords), ${glsl.texture2D}(K, kernelCoords)); ++im2colOffset; ++kernelOffset; } - return sum; + ${applyActivation} + return value; }`; return { inputLayouts: inputs.length === 3 ? [im2colLayout, kLayout, bLayout!] : [im2colLayout, kLayout], diff --git a/lib/backends/webgl/ops/fuse_utils.ts b/lib/backends/webgl/ops/fuse_utils.ts new file mode 100644 index 00000000..40eb6273 --- /dev/null +++ b/lib/backends/webgl/ops/fuse_utils.ts @@ -0,0 +1,23 @@ +import {glslRelu, glslSigmoid} from './unary-op'; + +export function getActicationSnippet(activation: string) { + let activationFunction = ''; + let activationName = ''; + switch (activation) { + case 'Relu': + activationName = glslRelu().name; + activationFunction = glslRelu().body; + break; + case 'Sigmoid': + activationName = glslSigmoid().name; + activationFunction = glslSigmoid().body; + break; + default: + activationName = ''; + activationFunction = ''; + } + const applyActivation = activation ? ` + value = ${activationName}(value);` : + ''; + return {activationFunction, applyActivation}; +} diff --git a/lib/backends/webgl/ops/matmul-pack.ts b/lib/backends/webgl/ops/matmul-pack.ts index 2f9e714e..280b2364 100644 --- a/lib/backends/webgl/ops/matmul-pack.ts +++ b/lib/backends/webgl/ops/matmul-pack.ts @@ -6,6 +6,7 @@ import {Tensor} from '../../../tensor'; import {BroadcastUtil} from '../../../util'; import {WebGLInferenceHandler} from '../inference-handler'; import {ProgramInfo, RunData, WebGLOperator} from '../types'; +import {getActicationSnippet} from './fuse_utils'; export class WebGLMatMulPacked extends MatMul implements WebGLOperator { run(inferenceHandler: WebGLInferenceHandler, inputs: Tensor[]): Tensor[] { @@ -25,8 +26,11 @@ export class WebGLMatMulPacked extends MatMul implements WebGLOperator { const aRank = aShape.length; const bRank = bShape.length; const sharedDim = aShape[aShape.length - 1]; + + const {activationFunction, applyActivation} = getActicationSnippet(this.activation); // TODO:fix broadcasting const shaderSource = ` + ${activationFunction} vec4 process(int indices[${rank}]) { int a[${aRank}]; int b[${bRank}]; @@ -41,6 +45,7 @@ export class WebGLMatMulPacked extends MatMul implements WebGLOperator { value += ${getA(aRank)}.ggaa * ${getB(bRank)}.baba; } ${processBias} + ${applyActivation} return value; }`; return { diff --git a/lib/graph.ts b/lib/graph.ts index 738ba032..6476e4e6 100644 --- a/lib/graph.ts +++ b/lib/graph.ts @@ -53,6 +53,8 @@ export declare namespace Graph { export interface Transformer { removeAllIdentityNodes(): void; removeAllDropoutNodes(): void; + + fuseConvActivationNodes(): void; // TODO: add generic functions to manipulate the graph } @@ -559,6 +561,7 @@ class GraphImpl implements Graph, Graph.Transformer { // apply common transform this.removeAllIdentityNodes(); this.removeAllDropoutNodes(); + this.fuseConvActivationNodes(); // apply initializer specific transform if (graphInitializer) { @@ -736,4 +739,27 @@ class GraphImpl implements Graph, Graph.Transformer { nodeIndex++; } } + + isActivation(n: Node): boolean { + switch (n.opType) { + // TODO: add other activation methods + case 'Relu': + case 'Sigmoid': + return true; + default: + return false; + } + } + + fuseConvActivationNodes() { + for (const node of this._nodes) { + if (node.opType === 'Conv') { + const next = this._allData[node.outputs[0]]._to; + if (next.length === 1 && this.isActivation(this._nodes[next[0]])) { + node.attributes.set('__internal_activation', 'string', (this._nodes[next[0]].opType)); + this.deleteNode(next[0]); + } + } + } + } } diff --git a/lib/ops/conv.ts b/lib/ops/conv.ts index 666eafa5..f0ea235b 100644 --- a/lib/ops/conv.ts +++ b/lib/ops/conv.ts @@ -17,6 +17,7 @@ export abstract class Conv implements Operator { this.kernelShape = attributes.getInts('kernel_shape', []); this.pads = attributes.getInts('pads', [0, 0, 0, 0]); this.strides = attributes.getInts('strides', [1, 1]); + this.activation = attributes.getString('__internal_activation', ''); } checkInputs(inputs: Tensor[]): boolean { @@ -88,4 +89,5 @@ export abstract class Conv implements Operator { protected kernelShape: number[]; protected pads: number[]; protected strides: number[]; + protected activation: string; } diff --git a/lib/ops/matmul.ts b/lib/ops/matmul.ts index eae672f9..91ab027a 100644 --- a/lib/ops/matmul.ts +++ b/lib/ops/matmul.ts @@ -9,7 +9,9 @@ import {Tensor} from '../tensor'; export abstract class MatMul implements Operator { abstract run(inferenceHandler: InferenceHandler, inputs: Tensor[]): Tensor[]|Promise; - initialize(attributes: Attribute): void {} + initialize(attributes: Attribute): void { + this.activation = attributes.getString('__internal_activation', ''); + } checkInputs(inputs: Tensor[]): boolean { if (!inputs || inputs.length !== 2) { @@ -38,4 +40,5 @@ export abstract class MatMul implements Operator { return true; } + protected activation: string; } diff --git a/test/data/super_resolution/super-resolution-10.onnx b/test/data/super_resolution/super-resolution-10.onnx new file mode 100644 index 00000000..53283c05 Binary files /dev/null and b/test/data/super_resolution/super-resolution-10.onnx differ diff --git a/test/data/super_resolution/test_data_set_0/input_0.pb b/test/data/super_resolution/test_data_set_0/input_0.pb new file mode 100644 index 00000000..8d3724a2 Binary files /dev/null and b/test/data/super_resolution/test_data_set_0/input_0.pb differ diff --git a/test/data/teams_model/msra_190729.onnx b/test/data/teams_model/msra_190729.onnx new file mode 100644 index 00000000..2b5ccc97 Binary files /dev/null and b/test/data/teams_model/msra_190729.onnx differ diff --git a/test/data/teams_model/test_data_set_0/input_0.pb b/test/data/teams_model/test_data_set_0/input_0.pb new file mode 100644 index 00000000..b14ff347 Binary files /dev/null and b/test/data/teams_model/test_data_set_0/input_0.pb differ diff --git a/test/data/teams_model/test_data_set_0/output_0.pb b/test/data/teams_model/test_data_set_0/output_0.pb new file mode 100644 index 00000000..019ff65a Binary files /dev/null and b/test/data/teams_model/test_data_set_0/output_0.pb differ