diff --git a/common/utils.js b/common/utils.js
index a588fd8e..20dbab84 100644
--- a/common/utils.js
+++ b/common/utils.js
@@ -34,7 +34,57 @@ export async function getBufferFromUrl(url) {
return arrayBuffer;
}
-export async function buildConstantByNpy(builder, url) {
+// ref: http://stackoverflow.com/questions/32633585/how-do-you-convert-to-half-floats-in-javascript
+export const toHalf = (function() {
+ const floatView = new Float32Array(1);
+ const int32View = new Int32Array(floatView.buffer);
+
+ /* This method is faster than the OpenEXR implementation (very often
+ * used, eg. in Ogre), with the additional benefit of rounding, inspired
+ * by James Tursa?s half-precision code. */
+ return function toHalf(val) {
+ floatView[0] = val;
+ const x = int32View[0];
+
+ let bits = (x >> 16) & 0x8000; /* Get the sign */
+ let m = (x >> 12) & 0x07ff; /* Keep one extra bit for rounding */
+ const e = (x >> 23) & 0xff; /* Using int is faster here */
+
+ /* If zero, or denormal, or exponent underflows too much for a denormal
+ * half, return signed zero. */
+ if (e < 103) {
+ return bits;
+ }
+
+ /* If NaN, return NaN. If Inf or exponent overflow, return Inf. */
+ if (e > 142) {
+ bits |= 0x7c00;
+ /* If exponent was 0xff and one mantissa bit was set, it means NaN,
+ * not Inf, so make sure we set one mantissa bit too. */
+ bits |= ((e == 255) ? 0 : 1) && (x & 0x007fffff);
+ return bits;
+ }
+
+ /* If exponent underflows but not too much, return a denormal */
+ if (e < 113) {
+ m |= 0x0800;
+ /* Extra rounding may overflow and set mantissa to 0 and exponent
+ * to 1, which is OK. */
+ bits |= (m >> (114 - e)) + ((m >> (113 - e)) & 1);
+ return bits;
+ }
+
+ bits |= ((e - 112) << 10) | (m >> 1);
+ /* Extra rounding. An overflow will set mantissa to 0 and increment
+ * the exponent, which is OK. */
+ bits += m & 1;
+ return bits;
+ };
+})();
+
+// Convert npy data in original data type to `targetType`, only support
+// 'float32' to 'float16' conversion currently.
+export async function buildConstantByNpy(builder, url, targetType) {
const dataTypeMap = new Map([
['f2', {type: 'float16', array: Uint16Array}],
['f4', {type: 'float32', array: Float32Array}],
@@ -55,11 +105,22 @@ export async function buildConstantByNpy(builder, url) {
throw new Error(`Data type ${npArray.dataType} is not supported.`);
}
const dimensions = npArray.shape;
- const type = dataTypeMap.get(npArray.dataType).type;
+ let type = dataTypeMap.get(npArray.dataType).type;
const TypedArrayConstructor = dataTypeMap.get(npArray.dataType).array;
const dataView = new Uint8Array(npArray.data.buffer);
const dataView2 = dataView.slice();
- const typedArray = new TypedArrayConstructor(dataView2.buffer);
+ let typedArray = new TypedArrayConstructor(dataView2.buffer);
+ if (type === 'float32' && targetType === 'float16') {
+ const uint16Array = new Uint16Array(typedArray.length);
+ for (let i = 0; i < typedArray.length; ++i) {
+ uint16Array[i] = toHalf(typedArray[i]);
+ }
+ typedArray = uint16Array;
+ type = targetType;
+ } else if (type !== targetType) {
+ throw new Error(`Conversion from ${npArray.dataType} ` +
+ `to ${targetType} is not supported.`);
+ }
return builder.constant({dataType: type, type, dimensions}, typedArray);
}
@@ -494,7 +555,8 @@ export function getDefaultLayout(deviceType) {
// Windows or Mac platform.
if (deviceType.indexOf('cpu') != -1) {
return 'nhwc';
- } else if (deviceType.indexOf('gpu') != -1) {
+ } else if (deviceType.indexOf('gpu') != -1 ||
+ deviceType.indexOf('npu') != -1) {
return 'nchw';
}
}
diff --git a/object_detection/index.html b/object_detection/index.html
index bcc5c3dc..e795f22f 100644
--- a/object_detection/index.html
+++ b/object_detection/index.html
@@ -43,6 +43,9 @@
+
diff --git a/object_detection/ssd_mobilenetv1_nchw.js b/object_detection/ssd_mobilenetv1_nchw.js
index 035eaf92..fbbab007 100644
--- a/object_detection/ssd_mobilenetv1_nchw.js
+++ b/object_detection/ssd_mobilenetv1_nchw.js
@@ -7,6 +7,7 @@ export class SsdMobilenetV1Nchw {
constructor() {
this.context_ = null;
this.deviceType_ = null;
+ this.targetDataType_ = 'float32';
this.model_ = null;
this.builder_ = null;
this.graph_ = null;
@@ -57,9 +58,11 @@ ${nameArray[1]}_BatchNorm_batchnorm`;
}
const weightsName = this.weightsUrl_ + prefix + weightSuffix;
- const weights = await buildConstantByNpy(this.builder_, weightsName);
+ const weights = await buildConstantByNpy(
+ this.builder_, weightsName, this.targetDataType_);
const biasName = this.biasUrl_ + prefix + biasSuffix;
- const bias = await buildConstantByNpy(this.builder_, biasName);
+ const bias = await buildConstantByNpy(
+ this.builder_, biasName, this.targetDataType_);
options.padding = computePadding2DForAutoPad(
/* nchw */[input.shape()[2], input.shape()[3]],
/* oihw */[weights.shape()[2], weights.shape()[3]],
@@ -69,7 +72,7 @@ ${nameArray[1]}_BatchNorm_batchnorm`;
// TODO: Set clamp activation to options once it's supported in
// WebNN DML backend.
// Implement `clip` by `clamp` of WebNN API
- if (this.deviceType_ == 'gpu') {
+ if (this.deviceType_ == 'gpu' || this.deviceType_ == 'npu') {
return this.builder_.clamp(
this.builder_.conv2d(input, weights, options),
{minValue: 0, maxValue: 6});
@@ -83,12 +86,17 @@ ${nameArray[1]}_BatchNorm_batchnorm`;
async load(contextOptions) {
this.context_ = await navigator.ml.createContext(contextOptions);
this.deviceType_ = contextOptions.deviceType;
+ if (this.deviceType_ == 'gpu' || this.deviceType_ == 'npu') {
+ this.targetDataType_ = 'float16';
+ }
this.builder_ = new MLGraphBuilder(this.context_);
- const input = this.builder_.input('input', {
- type: 'float32',
+ let input = this.builder_.input('input', {
dataType: 'float32',
dimensions: this.inputOptions.inputDimensions,
});
+ if (this.targetDataType_ === 'float16') {
+ input = this.builder_.cast(input, 'float16');
+ }
const strides = [2, 2];
const conv0 = await this.buildConv_(
input, ['', '0', '', '165__cf__168'],
@@ -249,7 +257,14 @@ ${nameArray[1]}_BatchNorm_batchnorm`;
const concat1 = this.builder_.concat(
[reshape6, reshape7, reshape8, reshape9, reshape10, reshape11], 1);
- return {'boxes': concat0, 'scores': concat1};
+ let boxes = concat0;
+ let scores = concat1;
+
+ if (this.targetDataType_ === 'float16') {
+ boxes = this.builder_.cast(boxes, 'float32');
+ scores = this.builder_.cast(boxes, 'float32');
+ }
+ return {boxes, scores};
}
async build(outputOperand) {