Skip to content

Commit

Permalink
[js/webgpu] workaround the matmul vec4 issue
Browse files Browse the repository at this point in the history
This PR uses `indicesToOffset` instead of `get[set]ByIndices`  to
read/write data from buffer in matmul due to matmul vec4 can't get
correct result when the shape size is large(testsed in mobilenetv2).
It seems like an intel driver bug for fxc support. It works well when
switching to dxc even without this PR.
  • Loading branch information
qjia7 committed Sep 1, 2023
1 parent c961f67 commit 52dd45e
Showing 1 changed file with 12 additions and 11 deletions.
23 changes: 12 additions & 11 deletions js/web/lib/wasm/jsep/webgpu/ops/3rd-party/matmul_packed_webgpu.ts
Original file line number Diff line number Diff line change
Expand Up @@ -358,7 +358,7 @@ const matMulReadWriteFnSource =
resStr += `\naIndices[${i}] = 0;`;
});
resStr += `\naIndices[${aRank - 2}] = u32(row);
aIndices[${aRank - 1}] = u32(colIn);`;
aIndices[${aRank - 1}] = u32(col);`;
return resStr;
};
const getBIndices = () => {
Expand All @@ -372,7 +372,7 @@ const matMulReadWriteFnSource =
resStr += `\nbIndices[${i}] = 0;`;
});
resStr += `\nbIndices[${bRank - 2}] = u32(row);
bIndices[${bRank - 1}] = u32(colIn);`;
bIndices[${bRank - 1}] = u32(col);`;
return resStr;
};
const source = `
Expand All @@ -383,7 +383,8 @@ const matMulReadWriteFnSource =
if(row < dimAOuter && col < dimInner)
{
${getAIndices()}
value = ${aVariable.getByIndices('aIndices')};
// TODO: recover to use getByIndices/setByIndices when dxc is supported.
value = a[${aVariable.indicesToOffset('aIndices')}${component === 1 ? '' : `/ ${component}`}];
}
return value;
}
Expand All @@ -395,7 +396,7 @@ const matMulReadWriteFnSource =
if(row < dimInner && col < dimBOuter)
{
${getBIndices()}
value = ${bVariable.getByIndices('bIndices')};
value = b[${bVariable.indicesToOffset('bIndices')}${component === 1 ? '' : `/ ${component}`}];
}
return value;
}
Expand All @@ -404,10 +405,11 @@ const matMulReadWriteFnSource =
let col = colIn * ${component};
if (row < dimAOuter && col < dimBOuter) {
var value = valueIn;
let coords = vec3<i32>(batch, row, colIn);
let coords = vec3<i32>(batch, row, col);
${hasBias ? 'value = value + bias[colIn];' : ''}
${applyActivation}
${outputVariable.setByIndices('vec3<u32>(coords)', 'value')}
result[${outputVariable.indicesToOffset('vec3<u32>(coords)')}${
component === 1 ? '' : `/ ${component}`}] = value;
}
}
`;
Expand Down Expand Up @@ -446,18 +448,17 @@ export const createMatmulProgramInfo =
];

const components = isVec4 ? 4 : 1;
const A = inputVariable('a', inputs[0].dataType, [...outerDimsA, dimAOuter, dimInner / components], components);
const B = inputVariable('b', inputs[1].dataType, [...outerDimsB, dimInner, dimBOuter / components], components);
const output =
outputVariable('result', inputs[0].dataType, [batchSize, dimAOuter, dimBOuter / components], components);
const A = inputVariable('a', inputs[0].dataType, [...outerDimsA, dimAOuter, dimInner], components);
const B = inputVariable('b', inputs[1].dataType, [...outerDimsB, dimInner, dimBOuter], components);
const output = outputVariable('result', inputs[0].dataType, [batchSize, dimAOuter, dimBOuter], components);
variables.push(A);
variables.push(B);
variables.push(output);
const inputVariables = [A, B];
const hasBias = inputs.length > 2;
const declareFunctions = matMulReadWriteFnSource(component, hasBias, applyActivation, variables);
if (hasBias) {
inputVariables.push(inputVariable('bias', inputs[2].dataType, [dimBOuter / components], components));
inputVariables.push(inputVariable('bias', inputs[2].dataType, [dimBOuter], components));
}
const getShaderSource = (shaderHelper: ShaderHelper) => `
const dimAOuter: i32 = ${dimAOuter};
Expand Down

0 comments on commit 52dd45e

Please sign in to comment.