diff --git a/benchmark/super_resolution_model_zoo/karma.conf.js b/benchmark/super_resolution_model_zoo/karma.conf.js index 92c2c1fc..1daabad6 100644 --- a/benchmark/super_resolution_model_zoo/karma.conf.js +++ b/benchmark/super_resolution_model_zoo/karma.conf.js @@ -59,6 +59,7 @@ module.exports = function(config) { printMatches: false, // To enable pack, run 'PACK=1 npm run test' usePackedGlTexture: config.usePackedGlTexture==1 ? true : false, + runIteration: config.runIteration ? config.runIteration : 10, profile: config.profile }, browsers: ['ChromeTest', 'ChromeDebug', 'Edge', 'Safari'], diff --git a/benchmark/super_resolution_model_zoo/package.json b/benchmark/super_resolution_model_zoo/package.json index 76867407..f5a9b000 100644 --- a/benchmark/super_resolution_model_zoo/package.json +++ b/benchmark/super_resolution_model_zoo/package.json @@ -6,8 +6,8 @@ "scripts": { "build": "webpack --config ./webpack.conf.js --mode production", "build-debug": "webpack --config ./webpack.conf.js --mode development", - "test": "karma start --browsers ChromeTest --single-run --usePackedGlTexture=$PACK", - "profile": "karma start --browsers ChromeTest --single-run --profile --usePackedGlTexture=$PACK", + "test": "karma start --browsers ChromeTest --single-run --usePackedGlTexture=$PACK --runIteration=$RUNCOUNT", + "profile": "karma start --browsers ChromeTest --single-run --profile --usePackedGlTexture=$PACK --runIteration=$RUNCOUNT", "test-debug": "karma start --browsers ChromeDebug", "test-edge": "karma start --browsers Edge --single-run", "test-safari": "karma start --browsers Safari --single-run" diff --git a/benchmark/super_resolution_model_zoo/src/index.js b/benchmark/super_resolution_model_zoo/src/index.js index 776cd7e1..725ed246 100644 --- a/benchmark/super_resolution_model_zoo/src/index.js +++ b/benchmark/super_resolution_model_zoo/src/index.js @@ -99,9 +99,9 @@ async function runBenchmark(benchmarkData, backend, imageSize) { const imageLoader = new ImageLoader(imageSize, imageSize); const durations = []; for(const input of benchmarkData.inputs) { - console.log(`Running ${input.name}`) + console.log(`Running ${input.name} for ${runIteration} iterations.`) const imageData = await imageLoader.getImageData(input.url); - for(let i = 0 ; i < 10; i++) { + for(let i = 0 ; i < runIteration; i++) { const outputData = await impl.runModel(imageData.data); durations.push(impl.duration); } @@ -125,6 +125,7 @@ class TensorFlowResnetBenchmark { this.imageSize = imageSize; tf.disposeVariables(); tf.env().set('WEBGL_PACK', pack_texture); + console.log(`Pack mode enabled: ${tf.env().getBool('WEBGL_PACK')}`); if(backend) { console.log(`Setting the backend to ${backend}`); @@ -261,6 +262,7 @@ const results = []; const browser = __karma__.config.browser[0]; const profile = __karma__.config.profile; const pack_texture = __karma__.config.usePackedGlTexture; +const runIteration = __karma__.config.runIteration; console.log(`browser: ${browser}`) describe('super resolution Tests', ()=> { diff --git a/lib/backends/webgl/ops/conv-pack.ts b/lib/backends/webgl/ops/conv-pack.ts index 414bf64d..cb130279 100644 --- a/lib/backends/webgl/ops/conv-pack.ts +++ b/lib/backends/webgl/ops/conv-pack.ts @@ -15,7 +15,7 @@ import {WebGLReshapePacked} from './reshape-packed'; export class WebGLConvPacked extends Conv { protected artifacts: Artifact[]; protected programInfo: ProgramInfo[]; - + protected outputShape: number[]; run(inferenceHandler: WebGLInferenceHandler, inputs: Tensor[]): Tensor[] { const programManager = inferenceHandler.session.programManager; const xshape = inputs[0].dims.slice(); @@ -33,8 +33,8 @@ export class WebGLConvPacked extends Conv { `autpPad:${this.autoPad}, dilations:${this.dilations}, group:${this.group}, kernelShape:${ this.kernelShape}, pads:${this.pads}, strides:${this.strides}`); - const outputShape = WebGLConv.calcOutputShape(xshape, kshape, this.dilations, this.pads, this.strides); - const im2col = new WebGLIm2ColPacked(outputShape, kshape, this.dilations, this.pads, this.strides); + this.outputShape = WebGLConv.calcOutputShape(xshape, kshape, this.dilations, this.pads, this.strides); + const im2col = new WebGLIm2ColPacked(this.outputShape, kshape, this.dilations, this.pads, this.strides); const matmul = new WebGLMatMulPacked(); const reshape = new WebGLReshapePacked(); // shape for kernel reshape @@ -76,11 +76,10 @@ export class WebGLConvPacked extends Conv { inferenceHandler.checkAndUpdateTextureForm(this.artifacts[2], runDataMatmul); programManager.run(this.artifacts[2], runDataMatmul); const matmulOutput = runDataMatmul.outputTextureData.tensor; - // reshape output const outputShapeTensor = new Tensor( - [outputShape.length], 'int32', undefined, undefined, - new Int32Array([outputShape[0], outputShape[1], outputShape[2], outputShape[3]])); + [this.outputShape.length], 'int32', undefined, undefined, + new Int32Array([this.outputShape[0], this.outputShape[1], this.outputShape[2], this.outputShape[3]])); assert(this.artifacts.length > 2, () => 'expect at least 3 artifacts created'); if (this.artifacts.length === 3) { diff --git a/lib/backends/webgl/ops/matmul-pack.ts b/lib/backends/webgl/ops/matmul-pack.ts index 2f9e714e..eeefa90b 100644 --- a/lib/backends/webgl/ops/matmul-pack.ts +++ b/lib/backends/webgl/ops/matmul-pack.ts @@ -4,8 +4,10 @@ import {MatMul} from '../../../ops/matmul'; import {Tensor} from '../../../tensor'; import {BroadcastUtil} from '../../../util'; +import {getGlsl} from '../glsl-source'; import {WebGLInferenceHandler} from '../inference-handler'; import {ProgramInfo, RunData, WebGLOperator} from '../types'; +import {getCoordsDataType} from '../utils'; export class WebGLMatMulPacked extends MatMul implements WebGLOperator { run(inferenceHandler: WebGLInferenceHandler, inputs: Tensor[]): Tensor[] { @@ -13,7 +15,7 @@ export class WebGLMatMulPacked extends MatMul implements WebGLOperator { } createProgramInfo(handler: WebGLInferenceHandler, inputs: Tensor[]): ProgramInfo { const hasBias = inputs.length > 2; - const processBias = hasBias ? `value += vec4(getBias(a[0]*2).xx, getBias(a[0]*2).yy);` : ``; + const processBias = hasBias ? 'result += getBiasAtOutCoords();' : ''; const aShape = inputs[0].dims; const bShape = inputs[1].dims; const outputShape = BroadcastUtil.calcShape(aShape, bShape, true); @@ -21,34 +23,43 @@ export class WebGLMatMulPacked extends MatMul implements WebGLOperator { if (!outputShape) { throw new Error('Can\'t use matmul on the given tensors'); } - const rank = outputShape.length; + + const sharedDim = aShape[aShape.length - 1]; + const sharedDimIndex = Math.ceil(sharedDim / 2); + const aRank = aShape.length; const bRank = bShape.length; - const sharedDim = aShape[aShape.length - 1]; - // TODO:fix broadcasting + + const glsl = getGlsl(handler.session.backend.glContext.version); + const coordsDataType = getCoordsDataType(outputShape.length); + const allGlChannels = ['x', 'y', 'z', 'w', 'u', 'v']; + const shaderSource = ` - vec4 process(int indices[${rank}]) { - int a[${aRank}]; - int b[${bRank}]; - bcastMatmulIndices_A(indices, a); - bcastMatmulIndices_B(indices, b); + void main() { + ${coordsDataType} rc = getOutputCoords(); + + vec4 result = vec4(0); + + for (int i = 0; i < ${sharedDimIndex}; i++) { + vec4 a = getA(${getA(allGlChannels, aRank)}); + vec4 b = getB(${getB(allGlChannels, bRank)}); + + result += (a.rrbb * b.rgrg); + result += (a.ggaa * b.baba); + } + + ${processBias} + + ${glsl.output} = result; + }`; - vec4 value; - for (int k=0; k<((${sharedDim}+1)/2); ++k) { - a[${aRank - 1}] = k; - b[${bRank - 2}] = k; - value += ${getA(aRank)}.rrbb * ${getB(bRank)}.rgrg; - value += ${getA(aRank)}.ggaa * ${getB(bRank)}.baba; - } - ${processBias} - return value; - }`; return { inputLayouts: inputs.map((t, i) => handler.getOrCreateTextureLayout(t, 4, true, inputs[i].dims, true)), outputLayout: handler.createTextureLayoutFromShape(outputShape, 4, outputShape, {isPacked: true, reverseWH: true}), samplers: hasBias ? ['A', 'B', 'Bias'] : ['A', 'B'], shaderSource, + hasMain: true, expectPackedInputs: true, expectPackedOutputs: true, }; @@ -64,22 +75,22 @@ export class WebGLMatMulPacked extends MatMul implements WebGLOperator { } } -function getA(outputRank: number): string { - let res = 'getA('; - for (let i = 0; i < outputRank - 2; i++) { - res += `a[${i}], `; +function getA(allGlChannels: string[], rank: number): string { + let res = ''; + for (let i = 0; i < rank - 2; i++) { + res += `rc.${allGlChannels[i]}, `; } - res += `a[${outputRank - 2}]*2, ` + - 'k*2)'; + res += `rc.${allGlChannels[rank - 2]}, ` + + 'i<<1'; return res; } -function getB(outputRank: number): string { - let res = 'getB('; - for (let i = 0; i < outputRank - 2; i++) { - res += `b[${i}], `; +function getB(allGlChannels: string[], rank: number): string { + let res = ''; + for (let i = 0; i < rank - 2; i++) { + res += `rc.${allGlChannels[i]}, `; } - res += 'k*2, ' + - `b[${outputRank - 1}]*2)`; + res += 'i<<1, ' + + `rc.${allGlChannels[rank - 1]}`; return res; } diff --git a/test/unittests/backends/webgl/test_matmul_packed.ts b/test/unittests/backends/webgl/test_matmul_packed.ts new file mode 100644 index 00000000..f4eeab9b --- /dev/null +++ b/test/unittests/backends/webgl/test_matmul_packed.ts @@ -0,0 +1,247 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT license. +import {expect} from 'chai'; + +import {Backend, InferenceHandler, SessionHandler} from '../../../../lib/backend'; +import {WebGLBackend} from '../../../../lib/backends/backend-webgl'; +import {WebGLInferenceHandler} from '../../../../lib/backends/webgl/inference-handler'; +import {WebGLMatMulPacked} from '../../../../lib/backends/webgl/ops/matmul-pack'; +import {Profiler} from '../../../../lib/instrument'; +import {Tensor} from '../../../../lib/tensor'; +import {ShapeUtil} from '../../../../lib/util'; + +import {createAscendingArray} from './test_utils'; +import {createTextureFromArray} from './test_utils'; + +let backend: Backend|undefined; +let sessionhandler: SessionHandler|undefined; +let inferenceHandler: InferenceHandler|undefined; + +describe('#UnitTest# - packed matmul - Tensor matmul', () => { + before('Initialize Context', async () => { + const profiler = Profiler.create(); + backend = await Backend('webgl'); + // Explicitly set to true to trigger packed version + (backend as WebGLBackend).pack = true; + sessionhandler = backend.createSessionHandler({profiler}); + inferenceHandler = sessionhandler.createInferenceHandler(); + }); + + // Set it back to false, apparently this state is sticky throughout all the tests running in same browser session.. + after('Resetting Context', () => { + (backend as WebGLBackend).pack = false; + }); + + const testDataSet = getTestData(); + for (let k = 0; k < testDataSet.length; ++k) { + const testData = testDataSet[k]; + describe(`Test matmul ${JSON.stringify(testData)}`, () => {}); + it(`Test packed matmul kernel `, () => { + const webglInferenceHandler = inferenceHandler as WebGLInferenceHandler; + + // TODO support WebGl 1.0 + if (webglInferenceHandler.session.textureManager.glContext.version === 1) { + console.log('Running packed matmul with webgl1 is not supported. Skipping.'); + return; + } + + const op = new WebGLMatMulPacked(); + + const elementCountA = testData.elementCountA; + const elementCountB = testData.elementCountB; + + const inputTensorShapeA = testData.inputShapeA; + const inputTextureShapeA = testData.inputTextureShapeA; + + const inputTensorShapeB = testData.inputShapeB; + const inputTextureShapeB = testData.inputTextureShapeB; + + // create input data and tensor. The input data will be used to verify if the output tensor contains the + // same value but possibly different order depending on our packing algorithm. + const inputDataA = createAscendingArray(elementCountA); + const inputDataB = createAscendingArray(elementCountB); + const inputTensorA = new Tensor(inputTensorShapeA, 'float32', undefined, undefined, inputDataA); + const inputTensorB = new Tensor(inputTensorShapeB, 'float32', undefined, undefined, inputDataB); + + // manually creat packed texture from inputTensor, and insert in cache + const gl = webglInferenceHandler.session.textureManager.glContext.gl; + + webglInferenceHandler.session.textureManager.glContext.checkError(); + const webglTextureA = createTextureFromArray( + webglInferenceHandler.session.textureManager.glContext, testData.rawInputA ? testData.rawInputA : inputDataA, + gl.RGBA, inputTextureShapeA[0], inputTextureShapeA[1]); + const webglTextureB = createTextureFromArray( + webglInferenceHandler.session.textureManager.glContext, testData.rawInputB ? testData.rawInputB : inputDataB, + gl.RGBA, inputTextureShapeB[0], inputTextureShapeB[1]); + + webglInferenceHandler.session.textureManager.glContext.checkError(); + const packedShapeA = inputTextureShapeA; + const textureDataA = { + width: inputTextureShapeA[0], + height: inputTextureShapeA[1], + channels: 4 as const, + isPacked: true, + shape: packedShapeA, + strides: ShapeUtil.computeStrides(packedShapeA), + unpackedShape: inputTensorShapeA, + tensor: inputTensorA, + texture: webglTextureA! + }; + + const packedShapeB = inputTextureShapeB; + const textureDataB = { + width: inputTextureShapeB[0], + height: inputTextureShapeB[1], + channels: 4 as const, + isPacked: true, + shape: packedShapeB, + strides: ShapeUtil.computeStrides(packedShapeB), + unpackedShape: inputTensorShapeB, + tensor: inputTensorB, + texture: webglTextureB! + }; + + webglInferenceHandler.setTextureData(inputTensorA.dataId, textureDataA, true); + webglInferenceHandler.setTextureData(inputTensorB.dataId, textureDataB, true); + + const inputList = testData.biasValue ? + [ + inputTensorA, inputTensorB, + new Tensor([1], 'float32', undefined, undefined, new Float32Array([testData.biasValue])) + ] : + [inputTensorA, inputTensorB]; + + // compile shader code + const programInfo = op.createProgramInfo(inferenceHandler! as WebGLInferenceHandler, inputList); + + const artifact = webglInferenceHandler.session.programManager.build(programInfo); + webglInferenceHandler.session.programManager.setArtifact(op, artifact); + + // run kernal and get output + const runData = op.createRunData(webglInferenceHandler, artifact.programInfo, inputList); + webglInferenceHandler.session.programManager.run(artifact, runData); + const result = runData.outputTextureData.tensor.data; + + webglInferenceHandler.session.textureManager.glContext.checkError(); + // verify result. + const expectedOutput = testData.expectedOutput; + expect(result).to.not.equal(null); + let batchMultiplier = 1; + if (testData.inputShapeA.length > 2) { + batchMultiplier = testData.inputShapeA[0]; + } + if (testData.inputShapeB.length > 2) { + batchMultiplier = Math.max(batchMultiplier, testData.inputShapeB[0]); + } + + expect(result).to.have.lengthOf( + batchMultiplier * testData.inputShapeA[testData.inputShapeA.length - 2] * + testData.inputShapeB[testData.inputShapeB.length - 1]); + expect(result).to.deep.equal(expectedOutput); + }); + } +}); +interface TestData { + elementCountA: number; + elementCountB: number; + inputShapeA: number[]; + inputShapeB: number[]; + outputShape: number[]; + inputTextureShapeA: number[]; + inputTextureShapeB: number[]; + outputTextureShape: number[]; + expectedOutput: Float32Array; + // The value of bias matrix that will be broadcasted to the corresponding shape in matmul. + // i.e. If biasValue = 1, then bias matrix is [1], when being added to 2x2 matmul result, it will be bcasted to + // [1, 1] + // [1, 1] + biasValue?: number; + // If empty, the test will use auto-generated data. + rawInputA?: Float32Array; + // If empty, the test will use auto-generated data. + rawInputB?: Float32Array; +} +function getTestData(): TestData[] { + return [ + // test 2D tensor + { + elementCountA: 4, + elementCountB: 4, + inputShapeA: [2, 2], + inputShapeB: [2, 2], + outputShape: [2, 2], + inputTextureShapeA: [1, 1], + inputTextureShapeB: [1, 1], + outputTextureShape: [1, 1], + expectedOutput: new Float32Array([7, 10, 15, 22]), + }, + { + elementCountA: 4, + elementCountB: 4, + inputShapeA: [2, 2], + inputShapeB: [2, 2], + outputShape: [2, 2], + inputTextureShapeA: [1, 1], + inputTextureShapeB: [1, 1], + outputTextureShape: [1, 1], + biasValue: 1, + expectedOutput: new Float32Array([8, 11, 16, 23]), + }, + { + elementCountA: 6, + elementCountB: 6, + inputShapeA: [2, 3], + inputShapeB: [3, 2], + outputShape: [2, 2], + inputTextureShapeA: [2, 1], + inputTextureShapeB: [1, 2], + outputTextureShape: [1, 1], + expectedOutput: new Float32Array([22, 28, 49, 64]), + rawInputA: new Float32Array([1, 2, 4, 5, 3, 0, 6, 0]), + rawInputB: new Float32Array([1, 2, 3, 4, 5, 6, 0, 0]), + }, + { + elementCountA: 6, + elementCountB: 6, + inputShapeA: [2, 3], + inputShapeB: [3, 2], + outputShape: [2, 2], + inputTextureShapeA: [2, 1], + inputTextureShapeB: [1, 2], + outputTextureShape: [1, 1], + expectedOutput: new Float32Array([23, 29, 50, 65]), + biasValue: 1, + rawInputA: new Float32Array([1, 2, 4, 5, 3, 0, 6, 0]), + rawInputB: new Float32Array([1, 2, 3, 4, 5, 6, 0, 0]), + }, + { + elementCountA: 12, + elementCountB: 12, + inputShapeA: [2, 2, 3], + inputShapeB: [2, 3, 2], + outputShape: [2, 2, 2], + inputTextureShapeA: [2, 2], + inputTextureShapeB: [1, 4], + outputTextureShape: [2, 1], + expectedOutput: new Float32Array([23, 29, 50, 65, 23, 29, 50, 65]), + biasValue: 1, + rawInputA: new Float32Array([1, 2, 4, 5, 3, 0, 6, 0, 1, 2, 4, 5, 3, 0, 6, 0]), + rawInputB: new Float32Array([1, 2, 3, 4, 5, 6, 0, 0, 1, 2, 3, 4, 5, 6, 0, 0]), + }, + // test bcast + { + elementCountA: 12, + elementCountB: 6, + inputShapeA: [2, 2, 3], + inputShapeB: [3, 2], + outputShape: [2, 2, 2], + inputTextureShapeA: [2, 2], + inputTextureShapeB: [1, 2], + outputTextureShape: [2, 1], + expectedOutput: new Float32Array([23, 29, 50, 65, 23, 29, 50, 65]), + biasValue: 1, + rawInputA: new Float32Array([1, 2, 4, 5, 3, 0, 6, 0, 1, 2, 4, 5, 3, 0, 6, 0]), + rawInputB: new Float32Array([1, 2, 3, 4, 5, 6, 0, 0]), + }, + ]; +} diff --git a/test/unittests/index.ts b/test/unittests/index.ts index 13a5575c..3c26b870 100644 --- a/test/unittests/index.ts +++ b/test/unittests/index.ts @@ -10,6 +10,7 @@ if (typeof window !== 'undefined' && !onnx.backend.webgl.disabled) { require('./backends/webgl/test_concat_packed'); require('./backends/webgl/test_depth_to_space'); require('./backends/webgl/test_reshape_packed'); + require('./backends/webgl/test_matmul_packed'); } // require('./api/onnx');