Bug 1930286 [wpt PR 49083] - webnn: Support block-wise quantization f…

…or DirectML backend, a=testonly Automatic update from web-platform-tests webnn: Support block-wise quantization for DirectML backend Block-wise quantization divides input tensors into smaller blocks that are independently quantized, resulting in faster optimization and high precision quantization [1]. It is used for popular language models, such as phi-3 mini int4 quantized model [2]. Related WG issue [3] has been opened to discussion. Firstly, this CL validates scale and zero point tensors for block-wise quantization. Besides, this CL also implements the block-wise quantization in DirectML backend by using DML_OPERATOR_QUANTIZE and DML_OPERATOR_DEQUANTIZE which are available in FL >= 6.3. More validation and conformance tests are added to verify the implementation. [1]: https://arxiv.org/abs/2110.02861 [2]: https://huggingface.co/microsoft/Phi-3-mini-4k-instruct [3]: webmachinelearning/webnn#779 Bug: 40206287 Change-Id: I977b0be57deebd7afcae216edc3ddc3818b8c09f Cq-Include-Trybots: luci.chromium.try:mac14.arm64-blink-rel, mac14-blink-rel, mac15.arm64-blink-rel, mac15-blink-rel, linux-blink-rel Reviewed-on: https://chromium-review.googlesource.com/c/chromium/src/+/5964816 Reviewed-by: Rafael Cintron <[email protected]> Reviewed-by: ningxin hu <[email protected]> Commit-Queue: ningxin hu <[email protected]> Cr-Commit-Position: refs/heads/main@{#1380767} -- wpt-commits: 8686b7a6d288d3b2c22b5ddb5a21773619b22b85 wpt-pr: 49083
jamienicol · Nov 9, 2024 · a017f32 · a017f32
1 parent efa4619
commit a017f32
Show file tree

Hide file tree

Showing 4 changed files with 373 additions and 40 deletions.
diff --git a/testing/web-platform/tests/webnn/conformance_tests/dequantizeLinear.https.any.js b/testing/web-platform/tests/webnn/conformance_tests/dequantizeLinear.https.any.js
@@ -119,8 +119,8 @@ const dequantizeLinearTests = [
           'constant': true
         },
         'dequantizeLinearZeroPoint': {
-          'data': [128],
-          'descriptor': {shape: [], dataType: 'uint8'},
+          'data': [128, 128, 128, 128],
+          'descriptor': {shape: [4], dataType: 'uint8'},
           'constant': true
         }
       },
@@ -144,6 +144,50 @@ const dequantizeLinearTests = [
       }
     }
   },
+  {
+    'name':
+        'dequantizeLinear uint8 1D constant tensor with implicit block_size = 2.',
+    'graph': {
+      'inputs': {
+        'dequantizeLinearInput': {
+          'data': [12, 24, 35, 123],
+          'descriptor': {shape: [4], dataType: 'uint8'},
+          'constant': true
+        },
+        'dequantizeLinearScale': {
+          'data': [
+            9.343092918395996,
+            -4.617084980010986,
+          ],
+          'descriptor': {shape: [2], dataType: 'float32'},
+          'constant': true
+        },
+        'dequantizeLinearZeroPoint': {
+          'data': [128, 110],
+          'descriptor': {shape: [2], dataType: 'uint8'},
+          'constant': true
+        }
+      },
+      'operators': [{
+        'name': 'dequantizeLinear',
+        'arguments': [
+          {'input': 'dequantizeLinearInput'},
+          {'scale': 'dequantizeLinearScale'},
+          {'zeroPoint': 'dequantizeLinearZeroPoint'}
+        ],
+        'outputs': 'dequantizeLinearOutput'
+      }],
+      'expectedOutputs': {
+        'dequantizeLinearOutput': {
+          'data': [
+            -1083.798828125, -971.681640625, 346.2813720703125,
+            -60.0221061706543
+          ],
+          'descriptor': {shape: [4], dataType: 'float32'}
+        }
+      }
+    }
+  },
   {
     'name':
         'dequantizeLinear int8 4D constant tensor broadcasting scale and zeroPoint',
@@ -160,8 +204,8 @@ const dequantizeLinearTests = [
           'constant': true
         },
         'dequantizeLinearZeroPoint': {
-          'data': [12],
-          'descriptor': {shape: [], dataType: 'int8'},
+          'data': [12, 12],
+          'descriptor': {shape: [2, 1], dataType: 'int8'},
           'constant': true
         }
       },
@@ -185,6 +229,74 @@ const dequantizeLinearTests = [
       }
     }
   },
+  {
+    'name': 'dequantizeLinear int8 4D constant tensor with block_size = [3, 2]',
+    'graph': {
+      'inputs': {
+        'dequantizeLinearInput': {
+          'data': [
+            -124, 0,   23,  122, 12, 23, 45, 36, 67, 78, -22, 0,
+            -34,  -45, -56, -67, 89, 30, 12, 23, 56, 67, 56,  -12
+          ],
+          'descriptor': {shape: [6, 4], dataType: 'int8'},
+          'constant': true
+        },
+        'dequantizeLinearScale': {
+          'data': [
+            0.2800687253475189, -4.617084980010986, 1.2800687253475189,
+            -3.617084980010986
+          ],
+          'descriptor': {shape: [2, 2], dataType: 'float32'},
+          'constant': true
+        },
+        'dequantizeLinearZeroPoint': {
+          'data': [1, 3, 5, 12],
+          'descriptor': {shape: [2, 2], dataType: 'int8'},
+          'constant': true
+        }
+      },
+      'operators': [{
+        'name': 'dequantizeLinear',
+        'arguments': [
+          {'input': 'dequantizeLinearInput'},
+          {'scale': 'dequantizeLinearScale'},
+          {'zeroPoint': 'dequantizeLinearZeroPoint'}
+        ],
+        'outputs': 'dequantizeLinearOutput'
+      }],
+      'expectedOutputs': {
+        'dequantizeLinearOutput': {
+          'data': [
+            -35.00859069824219,
+            -0.2800687253475189,
+            -92.3416976928711,
+            -549.43310546875,
+            3.0807559490203857,
+            6.1615118980407715,
+            -193.91757202148438,
+            -152.36380004882812,
+            18.484535217285156,
+            21.565292358398438,
+            115.4271240234375,
+            13.851255416870117,
+            -49.92267990112305,
+            -64.0034408569336,
+            245.96177673339844,
+            285.7497253417969,
+            107.52577209472656,
+            32.0017204284668,
+            0,
+            -39.787933349609375,
+            65.28350830078125,
+            79.36426544189453,
+            -159.1517333984375,
+            86.81004333496094
+          ],
+          'descriptor': {shape: [6, 4], dataType: 'float32'}
+        }
+      }
+    }
+  },
   {
     'name': 'dequantizeLinear uint4 1D tensor with even input size',
     'graph': {
@@ -200,8 +312,8 @@ const dequantizeLinearTests = [
           'constant': true
         },
         'dequantizeLinearZeroPoint': {
-          'data': [0],
-          'descriptor': {shape: [], dataType: 'uint4'},
+          'data': [0, 1],
+          'descriptor': {shape: [2], dataType: 'uint4'},
           'constant': true
         }
       },
@@ -216,7 +328,7 @@ const dequantizeLinearTests = [
       }],
       'expectedOutputs': {
         'dequantizeLinearOutput': {
-          'data': [16.804121017456055, 0],
+          'data': [16.804121017456055, -1.1202747821807861],
           'descriptor': {shape: [2], dataType: 'float32'}
         }
       }
@@ -237,8 +349,8 @@ const dequantizeLinearTests = [
           'constant': true
         },
         'dequantizeLinearZeroPoint': {
-          'data': [2, 1, 4],
-          'descriptor': {shape: [3], dataType: 'uint4'},
+          'data': [2],
+          'descriptor': {shape: [1], dataType: 'uint4'},
           'constant': true
         }
       },
@@ -253,7 +365,7 @@ const dequantizeLinearTests = [
       }],
       'expectedOutputs': {
         'dequantizeLinearOutput': {
-          'data': [8.962198257446289, 12.323022842407227, 11.202747344970703],
+          'data': [8.962198257446289, 11.202747344970703, 13.443297386169434],
           'descriptor': {shape: [3], dataType: 'float32'}
         }
       }
@@ -278,7 +390,7 @@ const dequantizeLinearTests = [
         },
         'dequantizeLinearZeroPoint': {
           'data': [2, 3],
-          'descriptor': {shape: [2], dataType: 'uint4'},
+          'descriptor': {shape: [2, 1], dataType: 'uint4'},
           'constant': true
         }
       },
@@ -294,14 +406,57 @@ const dequantizeLinearTests = [
       'expectedOutputs': {
         'dequantizeLinearOutput': {
           'data': [
-            -18.686185836791992, -18.686185836791992, -36.93667984008789,
+            -18.686185836791992, -9.343092918395996, -32.31959533691406,
             -55.40502166748047
           ],
           'descriptor': {shape: [1, 1, 2, 2], dataType: 'float32'}
         }
       }
     }
   },
+  {
+    'name': 'dequantizeLinear uint4 3D input with block_size = [1, 1, 2]',
+    'graph': {
+      'inputs': {
+        'dequantizeLinearInput': {
+          'data': [0, 1, 10, 15],
+          'descriptor': {shape: [1, 1, 4], dataType: 'uint4'},
+          'constant': true
+        },
+        'dequantizeLinearScale': {
+          'data': [
+            9.343092918395996,
+            -4.617084980010986,
+          ],
+          'descriptor': {shape: [1, 2], dataType: 'float32'},
+          'constant': true
+        },
+        'dequantizeLinearZeroPoint': {
+          'data': [2, 3],
+          'descriptor': {shape: [1, 2], dataType: 'uint4'},
+          'constant': true
+        }
+      },
+      'operators': [{
+        'name': 'dequantizeLinear',
+        'arguments': [
+          {'input': 'dequantizeLinearInput'},
+          {'scale': 'dequantizeLinearScale'},
+          {'zeroPoint': 'dequantizeLinearZeroPoint'}
+        ],
+        'outputs': 'dequantizeLinearOutput'
+      }],
+      'expectedOutputs': {
+        'dequantizeLinearOutput': {
+          'data': [
+            -18.686185836791992, -9.343092918395996, -32.31959533691406,
+            -55.40502166748047
+          ],
+          'descriptor': {shape: [1, 1, 4], dataType: 'float32'}
+        }
+      }
+    }
+  },
   {
     'name': 'dequantizeLinear int4 1D tensor with even size',
     'graph': {
@@ -312,8 +467,8 @@ const dequantizeLinearTests = [
           'constant': true
         },
         'dequantizeLinearScale': {
-          'data': [1.1202747821807861],
-          'descriptor': {shape: [], dataType: 'float32'},
+          'data': [1.1202747821807861, 1.1202747821807861],
+          'descriptor': {shape: [2], dataType: 'float32'},
           'constant': true
         },
         'dequantizeLinearZeroPoint': {
@@ -350,12 +505,12 @@ const dequantizeLinearTests = [
         },
         'dequantizeLinearScale': {
           'data': [1.1202747821807861],
-          'descriptor': {shape: [], dataType: 'float32'},
+          'descriptor': {shape: [1], dataType: 'float32'},
           'constant': true
         },
         'dequantizeLinearZeroPoint': {
-          'data': [-3, 0, 0],
-          'descriptor': {shape: [3], dataType: 'int4'},
+          'data': [-3],
+          'descriptor': {shape: [1], dataType: 'int4'},
           'constant': true
         }
       },
@@ -370,7 +525,7 @@ const dequantizeLinearTests = [
       }],
       'expectedOutputs': {
         'dequantizeLinearOutput': {
-          'data': [2.2405495643615723, 7.841923713684082, 0],
+          'data': [2.2405495643615723, 11.202747344970703, 3.3608243465423584],
           'descriptor': {shape: [3], dataType: 'float32'}
         }
       }