diff --git a/common/utils.js b/common/utils.js index a588fd8e..20dbab84 100644 --- a/common/utils.js +++ b/common/utils.js @@ -34,7 +34,57 @@ export async function getBufferFromUrl(url) { return arrayBuffer; } -export async function buildConstantByNpy(builder, url) { +// ref: http://stackoverflow.com/questions/32633585/how-do-you-convert-to-half-floats-in-javascript +export const toHalf = (function() { + const floatView = new Float32Array(1); + const int32View = new Int32Array(floatView.buffer); + + /* This method is faster than the OpenEXR implementation (very often + * used, eg. in Ogre), with the additional benefit of rounding, inspired + * by James Tursa?s half-precision code. */ + return function toHalf(val) { + floatView[0] = val; + const x = int32View[0]; + + let bits = (x >> 16) & 0x8000; /* Get the sign */ + let m = (x >> 12) & 0x07ff; /* Keep one extra bit for rounding */ + const e = (x >> 23) & 0xff; /* Using int is faster here */ + + /* If zero, or denormal, or exponent underflows too much for a denormal + * half, return signed zero. */ + if (e < 103) { + return bits; + } + + /* If NaN, return NaN. If Inf or exponent overflow, return Inf. */ + if (e > 142) { + bits |= 0x7c00; + /* If exponent was 0xff and one mantissa bit was set, it means NaN, + * not Inf, so make sure we set one mantissa bit too. */ + bits |= ((e == 255) ? 0 : 1) && (x & 0x007fffff); + return bits; + } + + /* If exponent underflows but not too much, return a denormal */ + if (e < 113) { + m |= 0x0800; + /* Extra rounding may overflow and set mantissa to 0 and exponent + * to 1, which is OK. */ + bits |= (m >> (114 - e)) + ((m >> (113 - e)) & 1); + return bits; + } + + bits |= ((e - 112) << 10) | (m >> 1); + /* Extra rounding. An overflow will set mantissa to 0 and increment + * the exponent, which is OK. */ + bits += m & 1; + return bits; + }; +})(); + +// Convert npy data in original data type to `targetType`, only support +// 'float32' to 'float16' conversion currently. +export async function buildConstantByNpy(builder, url, targetType) { const dataTypeMap = new Map([ ['f2', {type: 'float16', array: Uint16Array}], ['f4', {type: 'float32', array: Float32Array}], @@ -55,11 +105,22 @@ export async function buildConstantByNpy(builder, url) { throw new Error(`Data type ${npArray.dataType} is not supported.`); } const dimensions = npArray.shape; - const type = dataTypeMap.get(npArray.dataType).type; + let type = dataTypeMap.get(npArray.dataType).type; const TypedArrayConstructor = dataTypeMap.get(npArray.dataType).array; const dataView = new Uint8Array(npArray.data.buffer); const dataView2 = dataView.slice(); - const typedArray = new TypedArrayConstructor(dataView2.buffer); + let typedArray = new TypedArrayConstructor(dataView2.buffer); + if (type === 'float32' && targetType === 'float16') { + const uint16Array = new Uint16Array(typedArray.length); + for (let i = 0; i < typedArray.length; ++i) { + uint16Array[i] = toHalf(typedArray[i]); + } + typedArray = uint16Array; + type = targetType; + } else if (type !== targetType) { + throw new Error(`Conversion from ${npArray.dataType} ` + + `to ${targetType} is not supported.`); + } return builder.constant({dataType: type, type, dimensions}, typedArray); } @@ -494,7 +555,8 @@ export function getDefaultLayout(deviceType) { // Windows or Mac platform. if (deviceType.indexOf('cpu') != -1) { return 'nhwc'; - } else if (deviceType.indexOf('gpu') != -1) { + } else if (deviceType.indexOf('gpu') != -1 || + deviceType.indexOf('npu') != -1) { return 'nchw'; } } diff --git a/object_detection/index.html b/object_detection/index.html index bcc5c3dc..e795f22f 100644 --- a/object_detection/index.html +++ b/object_detection/index.html @@ -43,6 +43,9 @@ + diff --git a/object_detection/ssd_mobilenetv1_nchw.js b/object_detection/ssd_mobilenetv1_nchw.js index 035eaf92..fbbab007 100644 --- a/object_detection/ssd_mobilenetv1_nchw.js +++ b/object_detection/ssd_mobilenetv1_nchw.js @@ -7,6 +7,7 @@ export class SsdMobilenetV1Nchw { constructor() { this.context_ = null; this.deviceType_ = null; + this.targetDataType_ = 'float32'; this.model_ = null; this.builder_ = null; this.graph_ = null; @@ -57,9 +58,11 @@ ${nameArray[1]}_BatchNorm_batchnorm`; } const weightsName = this.weightsUrl_ + prefix + weightSuffix; - const weights = await buildConstantByNpy(this.builder_, weightsName); + const weights = await buildConstantByNpy( + this.builder_, weightsName, this.targetDataType_); const biasName = this.biasUrl_ + prefix + biasSuffix; - const bias = await buildConstantByNpy(this.builder_, biasName); + const bias = await buildConstantByNpy( + this.builder_, biasName, this.targetDataType_); options.padding = computePadding2DForAutoPad( /* nchw */[input.shape()[2], input.shape()[3]], /* oihw */[weights.shape()[2], weights.shape()[3]], @@ -69,7 +72,7 @@ ${nameArray[1]}_BatchNorm_batchnorm`; // TODO: Set clamp activation to options once it's supported in // WebNN DML backend. // Implement `clip` by `clamp` of WebNN API - if (this.deviceType_ == 'gpu') { + if (this.deviceType_ == 'gpu' || this.deviceType_ == 'npu') { return this.builder_.clamp( this.builder_.conv2d(input, weights, options), {minValue: 0, maxValue: 6}); @@ -83,12 +86,17 @@ ${nameArray[1]}_BatchNorm_batchnorm`; async load(contextOptions) { this.context_ = await navigator.ml.createContext(contextOptions); this.deviceType_ = contextOptions.deviceType; + if (this.deviceType_ == 'gpu' || this.deviceType_ == 'npu') { + this.targetDataType_ = 'float16'; + } this.builder_ = new MLGraphBuilder(this.context_); - const input = this.builder_.input('input', { - type: 'float32', + let input = this.builder_.input('input', { dataType: 'float32', dimensions: this.inputOptions.inputDimensions, }); + if (this.targetDataType_ === 'float16') { + input = this.builder_.cast(input, 'float16'); + } const strides = [2, 2]; const conv0 = await this.buildConv_( input, ['', '0', '', '165__cf__168'], @@ -249,7 +257,14 @@ ${nameArray[1]}_BatchNorm_batchnorm`; const concat1 = this.builder_.concat( [reshape6, reshape7, reshape8, reshape9, reshape10, reshape11], 1); - return {'boxes': concat0, 'scores': concat1}; + let boxes = concat0; + let scores = concat1; + + if (this.targetDataType_ === 'float16') { + boxes = this.builder_.cast(boxes, 'float32'); + scores = this.builder_.cast(boxes, 'float32'); + } + return {boxes, scores}; } async build(outputOperand) {