-
Notifications
You must be signed in to change notification settings - Fork 3k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge branch 'main' into adrianl/qnn-support-gatherelems-gathernd
- Loading branch information
Showing
28 changed files
with
878 additions
and
51 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,219 @@ | ||
// Copyright (c) Microsoft Corporation. All rights reserved. | ||
// Licensed under the MIT License. | ||
|
||
import {DataType} from '../../../wasm-common'; | ||
import {TensorView} from '../../tensor-view'; | ||
import {ShapeUtil} from '../../util'; | ||
import {AttributeWithCacheKey, createAttributeWithCacheKey} from '../attribute-with-cache-key'; | ||
import {ComputeContext, ProgramInfo, ProgramUniform} from '../types'; | ||
|
||
import {createTensorShapeVariables, getMaxComponents, inputVariable, outputVariable, ShaderHelper, UniformsArrayType} from './common'; | ||
|
||
export interface DequantizeLinerAttributes extends AttributeWithCacheKey { | ||
axis: number; | ||
blockSize: number; | ||
} | ||
|
||
const validateInputs = (inputs: readonly TensorView[], attributes: DequantizeLinerAttributes): void => { | ||
if (inputs.length < 2 || inputs.length > 3) { | ||
throw new Error('DequantizeLinear requires 2 or 3 inputs.'); | ||
} | ||
if (inputs.length === 3 && inputs[1].dims === inputs[2].dims) { | ||
throw new Error('x-scale and x-zero-point must have the same shape.'); | ||
} | ||
if (inputs.length === 3 && inputs[0].dataType !== inputs[2].dataType) { | ||
throw new Error('x and x-zero-point must have the same data type.'); | ||
} | ||
if (inputs[0].dataType === DataType.int32 && inputs.length > 2) { | ||
throw new Error('In the case of dequantizing int32 there is no zero point.'); | ||
} | ||
if (inputs[1].dims.length !== 0 && inputs[1].dims.length !== 1 && inputs[1].dims.length !== inputs[0].dims.length) { | ||
throw new Error('scale input must be a scalar, a 1D tensor, or have the same rank as the input tensor.'); | ||
} | ||
// validate scale and zero-point input shapes | ||
if (inputs.length > 2) { | ||
// zero-point input type should be the same as input data type. | ||
if (inputs[0].dataType !== inputs[2].dataType) { | ||
throw new Error('x and x-zero-point must have the same data type.'); | ||
} | ||
// Scale and zero-point inputs must have the same shape | ||
if (inputs[1].dims.length !== inputs[2].dims.length) { | ||
throw new Error('scale and zero-point inputs must have the same rank.'); | ||
} | ||
if (!inputs[1].dims.map((d, i) => d === inputs[2].dims[i]).reduce((a, b) => a && b, true)) { | ||
throw new Error('scale and zero-point inputs must have the same shape.'); | ||
} | ||
} | ||
// Validate blockSize | ||
if (attributes.blockSize > 0) { | ||
// Block qunatization | ||
if (inputs[1].dims.length === 0 || (inputs[1].dims.length === 1 && inputs[1].dims[0] === 1)) { | ||
throw new Error('blockSize must be set only for block quantization.'); | ||
} | ||
if (!inputs[1] | ||
.dims.map((d, i) => i === attributes.axis || d === inputs[0].dims[i]) | ||
.reduce((a, b) => a && b, true)) { | ||
throw new Error('For block qunatization, scale input shape to match the input shape except for the axis'); | ||
} | ||
// Scale input rank should be same as the input rank | ||
if (inputs[1].dims.length !== inputs[0].dims.length) { | ||
throw new Error('For block qunatization the scale input rank must be the same as the x rank.'); | ||
} | ||
const dI = inputs[0].dims[attributes.axis]; | ||
const si = inputs[1].dims[attributes.axis]; | ||
if (attributes.blockSize < Math.ceil(dI / si) || attributes.blockSize > Math.ceil(dI / (si - 1) - 1)) { | ||
throw new Error('blockSize must be with in the range [ceil(dI / Si), ceil(dI / (Si - 1) - 1)].'); | ||
} | ||
} | ||
}; | ||
|
||
const createDequantizeLinearProgramInfo = | ||
(inputs: readonly TensorView[], attributes: DequantizeLinerAttributes): ProgramInfo => { | ||
const axis = ShapeUtil.normalizeAxis(attributes.axis, inputs[0].dims.length); | ||
const inputType = inputs[0].dataType; | ||
const isSigned = inputType === DataType.int8; | ||
const outputShape = inputs[0].dims; // output shape is same as the input shape | ||
const dataType = inputs[1].dataType; // output type is same as the the scale input type | ||
const outputSize = ShapeUtil.size(outputShape); | ||
const isPacked = inputType === DataType.int8 || inputType === DataType.uint8; | ||
const inputShape = isPacked ? [Math.ceil(ShapeUtil.size(inputs[0].dims) / 4)] : inputs[0].dims; | ||
const scaleShape = inputs[1].dims; | ||
const zeroPointInput = inputs.length > 2 ? inputs[2] : undefined; | ||
const zeroPointShape = zeroPointInput ? | ||
(isPacked ? [Math.ceil(ShapeUtil.size(zeroPointInput.dims) / 4)] : zeroPointInput.dims) : | ||
undefined; | ||
// Scales input is a scaler for per-tensor/per-layer quantization, 1-D tensor for per-axis quantization | ||
// or tensor with same rank as input for blocked quantization. | ||
const perLayerQuantization = scaleShape.length === 0 || (scaleShape.length === 1 && scaleShape[0] === 1); | ||
const perAxisQuantization = perLayerQuantization === false && scaleShape.length === 1; | ||
// Left unnecessary commented-out assignment for documentation | ||
// const blockQuantization = perLayerQuantization === false && perAxisQuantization === false; | ||
const maxComponents = getMaxComponents(outputSize); | ||
const useComponents = perLayerQuantization && (!isPacked || maxComponents === 4); | ||
const components = useComponents ? maxComponents : 1; | ||
const inputComponent = (useComponents && !isPacked) ? maxComponents : 1; | ||
const input = inputVariable('input', isPacked ? DataType.uint32 : inputType, inputShape.length, inputComponent); | ||
const scale = inputVariable('scale', dataType, scaleShape.length); | ||
const zeroPoint = zeroPointInput ? | ||
inputVariable('zero_point', isPacked ? DataType.uint32 : inputType, zeroPointShape!.length) : | ||
undefined; | ||
const output = outputVariable('output', dataType, outputShape.length, components); | ||
const inputVariables = [input, scale]; | ||
if (zeroPoint) { | ||
inputVariables.push(zeroPoint); | ||
} | ||
const inputShapes = [inputShape, scaleShape]; | ||
if (zeroPointInput) { | ||
inputShapes.push(zeroPointShape!); | ||
} | ||
const programUniforms: ProgramUniform[] = [ | ||
{type: DataType.uint32, data: outputSize / components}, {type: DataType.uint32, data: axis}, | ||
{type: DataType.uint32, data: attributes.blockSize}, ...createTensorShapeVariables(...inputShapes, outputShape) | ||
]; | ||
const getShaderSource = (shaderHelper: ShaderHelper) => { | ||
const uniforms: UniformsArrayType = | ||
[{name: 'output_size', type: 'u32'}, {name: 'axis', type: 'u32'}, {name: 'block_size', type: 'u32'}]; | ||
return ` | ||
${shaderHelper.registerUniforms(uniforms).declareVariables(...inputVariables, output)} | ||
${shaderHelper.mainStart()} | ||
${shaderHelper.guardAgainstOutOfBoundsWorkgroupSizes('uniforms.output_size')} | ||
let output_indices = ${output.offsetToIndices('global_idx')}; | ||
// Set input x | ||
${(() => { | ||
if (isPacked) { | ||
return ` | ||
let input = ${input.getByOffset('global_idx / 4')}; | ||
let x_vec = ${isSigned ? 'unpack4xI8(input)' : 'unpack4xU8(input)'}; | ||
let x_value = ${components === 1 ? 'x_vec[global_idx % 4]' : 'x_vec'};`; | ||
} else { | ||
return `let x_value = ${input.getByOffset('global_idx')};`; | ||
} | ||
})()}; | ||
// Set scale input | ||
${(() => { | ||
if (perLayerQuantization) { | ||
// scale input is a scalar () | ||
return `let scale_value= ${scale.getByOffset('0')}`; | ||
} else if (perAxisQuantization) { | ||
// scale input is a 1D tensor | ||
return ` | ||
let scale_index = ${output.indicesGet('output_indices', 'uniforms.axis')}; | ||
let scale_value= ${scale.getByOffset('scale_index')};`; | ||
} else { | ||
// Block quantization. Scale input rank is same as input/output rank. | ||
return ` | ||
var scale_indices: ${scale.type.indices} = output_indices; | ||
let index = ${scale.indicesGet('scale_indices', 'uniforms.axis')} / uniforms.block_size; | ||
${scale.indicesSet('scale_indices', 'uniforms.axis', 'index')}; | ||
let scale_value= ${scale.getByIndices('scale_indices')};`; | ||
} | ||
})()}; | ||
// Set zero-point input | ||
${(() => { | ||
if (zeroPoint) { | ||
if (perLayerQuantization) { | ||
// zero-point input is a scalar | ||
if (isPacked) { | ||
return ` | ||
let zero_point_input = ${zeroPoint.getByOffset('0')}; | ||
let zero_point_vec = ${isSigned ? 'unpack4xI8(zero_point_input)' : 'unpack4xU8(zero_point_input)'}; | ||
let zero_point_value= zero_point_vec[0]`; | ||
} else { | ||
return `let zero_point_value = ${zeroPoint.getByOffset('0')}`; | ||
} | ||
} else if (perAxisQuantization) { | ||
// zero-point input is a 1D tensor | ||
if (isPacked) { | ||
return ` | ||
let zero_point_index = ${output.indicesGet('output_indices', 'uniforms.axis')}; | ||
let zero_point_input = ${zeroPoint.getByOffset('zero_point_index / 4')}; | ||
let zero_point_vec = ${isSigned ? 'unpack4xI8(zero_point_input)' : 'unpack4xU8(zero_point_input)'}; | ||
let zero_point_value = zero_point_vec[zero_point_index % 4]`; | ||
} else { | ||
return ` | ||
let zero_point_index = ${output.indicesGet('output_indices', 'uniforms.axis')}; | ||
let zero_point_value = ${zeroPoint.getByOffset('zero_point_index')};`; | ||
} | ||
} else { | ||
// BlockedQuantization. The zero-point input shape is same as the input shape except along axis. | ||
if (isPacked) { | ||
return ` | ||
let zero_point_offset = ${scale.indicesToOffset('scale_indices')}; | ||
let zero_point_input = ${zeroPoint.getByOffset('zero_point_offset / 4')}; | ||
let zero_point_vec = ${isSigned ? 'unpack4xI8(zero_point_input)' : 'unpack4xU8(zero_point_input)'}; | ||
let zero_point_value = zero_point_vec[zero_point_offset % 4];`; | ||
} else { | ||
return `let zero_point_value = ${zeroPoint.getByIndices('scale_indices')};`; | ||
} | ||
} | ||
} else { | ||
return `let zero_point_value = ${isPacked ? (isSigned ? 'i32' : 'u32') : input.type.value}(0);`; | ||
} | ||
})()}; | ||
// Compute and write output | ||
${output.setByOffset('global_idx', `${output.type.value}(x_value - zero_point_value) * scale_value`)}; | ||
}`; | ||
}; | ||
return { | ||
name: 'DequantizeLinear', | ||
shaderCache: | ||
{hint: attributes.cacheKey, inputDependencies: zeroPoint ? ['rank', 'rank', 'rank'] : ['rank', 'rank']}, | ||
getShaderSource, | ||
getRunData: () => ({ | ||
outputs: [{dims: outputShape, dataType}], | ||
dispatchGroup: {x: Math.ceil(outputSize / components / 64), y: 1, z: 1}, | ||
programUniforms | ||
}) | ||
}; | ||
}; | ||
|
||
export const dequantizeLinear = (context: ComputeContext, attributes: DequantizeLinerAttributes): void => { | ||
validateInputs(context.inputs, attributes); | ||
context.compute(createDequantizeLinearProgramInfo(context.inputs, attributes)); | ||
}; | ||
|
||
export const parseDequantizeLinearAttributes = (attributes: Record<string, unknown>): DequantizeLinerAttributes => | ||
createAttributeWithCacheKey({axis: attributes.axis as number, blockSize: attributes.blockSize as number}); |
Oops, something went wrong.