diff --git a/common/utils.js b/common/utils.js
index a588fd8e..20dbab84 100644
--- a/common/utils.js
+++ b/common/utils.js
@@ -34,7 +34,57 @@ export async function getBufferFromUrl(url) {
   return arrayBuffer;
 }
 
-export async function buildConstantByNpy(builder, url) {
+// ref: http://stackoverflow.com/questions/32633585/how-do-you-convert-to-half-floats-in-javascript
+export const toHalf = (function() {
+  const floatView = new Float32Array(1);
+  const int32View = new Int32Array(floatView.buffer);
+
+  /* This method is faster than the OpenEXR implementation (very often
+   * used, eg. in Ogre), with the additional benefit of rounding, inspired
+   * by James Tursa?s half-precision code. */
+  return function toHalf(val) {
+    floatView[0] = val;
+    const x = int32View[0];
+
+    let bits = (x >> 16) & 0x8000; /* Get the sign */
+    let m = (x >> 12) & 0x07ff; /* Keep one extra bit for rounding */
+    const e = (x >> 23) & 0xff; /* Using int is faster here */
+
+    /* If zero, or denormal, or exponent underflows too much for a denormal
+     * half, return signed zero. */
+    if (e < 103) {
+      return bits;
+    }
+
+    /* If NaN, return NaN. If Inf or exponent overflow, return Inf. */
+    if (e > 142) {
+      bits |= 0x7c00;
+      /* If exponent was 0xff and one mantissa bit was set, it means NaN,
+       * not Inf, so make sure we set one mantissa bit too. */
+      bits |= ((e == 255) ? 0 : 1) && (x & 0x007fffff);
+      return bits;
+    }
+
+    /* If exponent underflows but not too much, return a denormal */
+    if (e < 113) {
+      m |= 0x0800;
+      /* Extra rounding may overflow and set mantissa to 0 and exponent
+       * to 1, which is OK. */
+      bits |= (m >> (114 - e)) + ((m >> (113 - e)) & 1);
+      return bits;
+    }
+
+    bits |= ((e - 112) << 10) | (m >> 1);
+    /* Extra rounding. An overflow will set mantissa to 0 and increment
+     * the exponent, which is OK. */
+    bits += m & 1;
+    return bits;
+  };
+})();
+
+// Convert npy data in original data type to `targetType`, only support
+// 'float32' to 'float16' conversion currently.
+export async function buildConstantByNpy(builder, url, targetType) {
   const dataTypeMap = new Map([
     ['f2', {type: 'float16', array: Uint16Array}],
     ['f4', {type: 'float32', array: Float32Array}],
@@ -55,11 +105,22 @@ export async function buildConstantByNpy(builder, url) {
     throw new Error(`Data type ${npArray.dataType} is not supported.`);
   }
   const dimensions = npArray.shape;
-  const type = dataTypeMap.get(npArray.dataType).type;
+  let type = dataTypeMap.get(npArray.dataType).type;
   const TypedArrayConstructor = dataTypeMap.get(npArray.dataType).array;
   const dataView = new Uint8Array(npArray.data.buffer);
   const dataView2 = dataView.slice();
-  const typedArray = new TypedArrayConstructor(dataView2.buffer);
+  let typedArray = new TypedArrayConstructor(dataView2.buffer);
+  if (type === 'float32' && targetType === 'float16') {
+    const uint16Array = new Uint16Array(typedArray.length);
+    for (let i = 0; i < typedArray.length; ++i) {
+      uint16Array[i] = toHalf(typedArray[i]);
+    }
+    typedArray = uint16Array;
+    type = targetType;
+  } else if (type !== targetType) {
+    throw new Error(`Conversion from ${npArray.dataType} ` +
+        `to ${targetType} is not supported.`);
+  }
   return builder.constant({dataType: type, type, dimensions}, typedArray);
 }
 
@@ -494,7 +555,8 @@ export function getDefaultLayout(deviceType) {
     // Windows or Mac platform.
     if (deviceType.indexOf('cpu') != -1) {
       return 'nhwc';
-    } else if (deviceType.indexOf('gpu') != -1) {
+    } else if (deviceType.indexOf('gpu') != -1 ||
+               deviceType.indexOf('npu') != -1) {
       return 'nchw';
     }
   }
diff --git a/object_detection/index.html b/object_detection/index.html
index bcc5c3dc..e795f22f 100644
--- a/object_detection/index.html
+++ b/object_detection/index.html
@@ -43,6 +43,9 @@
               <label class="btn btn-outline-info custom" name="webnn">
                 <input type="radio" name="backend" id="webnn_gpu" autocomplete="off">WebNN (GPU)
               </label>
+              <label class="btn btn-outline-info custom" name="webnn">
+                <input type="radio" name="backend" id="webnn_npu" autocomplete="off">WebNN (NPU)
+              </label>
             </div>
           </div>
         </div>
diff --git a/object_detection/ssd_mobilenetv1_nchw.js b/object_detection/ssd_mobilenetv1_nchw.js
index 035eaf92..fbbab007 100644
--- a/object_detection/ssd_mobilenetv1_nchw.js
+++ b/object_detection/ssd_mobilenetv1_nchw.js
@@ -7,6 +7,7 @@ export class SsdMobilenetV1Nchw {
   constructor() {
     this.context_ = null;
     this.deviceType_ = null;
+    this.targetDataType_ = 'float32';
     this.model_ = null;
     this.builder_ = null;
     this.graph_ = null;
@@ -57,9 +58,11 @@ ${nameArray[1]}_BatchNorm_batchnorm`;
     }
 
     const weightsName = this.weightsUrl_ + prefix + weightSuffix;
-    const weights = await buildConstantByNpy(this.builder_, weightsName);
+    const weights = await buildConstantByNpy(
+        this.builder_, weightsName, this.targetDataType_);
     const biasName = this.biasUrl_ + prefix + biasSuffix;
-    const bias = await buildConstantByNpy(this.builder_, biasName);
+    const bias = await buildConstantByNpy(
+        this.builder_, biasName, this.targetDataType_);
     options.padding = computePadding2DForAutoPad(
         /* nchw */[input.shape()[2], input.shape()[3]],
         /* oihw */[weights.shape()[2], weights.shape()[3]],
@@ -69,7 +72,7 @@ ${nameArray[1]}_BatchNorm_batchnorm`;
       // TODO: Set clamp activation to options once it's supported in
       // WebNN DML backend.
       // Implement `clip` by `clamp` of  WebNN API
-      if (this.deviceType_ == 'gpu') {
+      if (this.deviceType_ == 'gpu' || this.deviceType_ == 'npu') {
         return this.builder_.clamp(
             this.builder_.conv2d(input, weights, options),
             {minValue: 0, maxValue: 6});
@@ -83,12 +86,17 @@ ${nameArray[1]}_BatchNorm_batchnorm`;
   async load(contextOptions) {
     this.context_ = await navigator.ml.createContext(contextOptions);
     this.deviceType_ = contextOptions.deviceType;
+    if (this.deviceType_ == 'gpu' || this.deviceType_ == 'npu') {
+      this.targetDataType_ = 'float16';
+    }
     this.builder_ = new MLGraphBuilder(this.context_);
-    const input = this.builder_.input('input', {
-      type: 'float32',
+    let input = this.builder_.input('input', {
       dataType: 'float32',
       dimensions: this.inputOptions.inputDimensions,
     });
+    if (this.targetDataType_ === 'float16') {
+      input = this.builder_.cast(input, 'float16');
+    }
     const strides = [2, 2];
     const conv0 = await this.buildConv_(
         input, ['', '0', '', '165__cf__168'],
@@ -249,7 +257,14 @@ ${nameArray[1]}_BatchNorm_batchnorm`;
     const concat1 = this.builder_.concat(
         [reshape6, reshape7, reshape8, reshape9, reshape10, reshape11], 1);
 
-    return {'boxes': concat0, 'scores': concat1};
+    let boxes = concat0;
+    let scores = concat1;
+
+    if (this.targetDataType_ === 'float16') {
+      boxes = this.builder_.cast(boxes, 'float32');
+      scores = this.builder_.cast(boxes, 'float32');
+    }
+    return {boxes, scores};
   }
 
   async build(outputOperand) {