Improve ONNX quantization doc (#1451)

better doc
huggingface · Oct 16, 2023 · 8f33e0e · 8f33e0e
1 parent 185bc08
commit 8f33e0e
Showing 1 changed file with 13 additions and 13 deletions.
diff --git a/optimum/onnxruntime/configuration.py b/optimum/onnxruntime/configuration.py
@@ -252,11 +252,11 @@ class QuantizationConfig:
         reduce_range (`bool`, defaults to `False`):
             Whether to use reduce-range 7-bits integers instead of 8-bits integers.
         nodes_to_quantize (`List[str]`, defaults to `[]`):
-            List of the nodes names to quantize.
+            List of the nodes names to quantize. When unspecified, all nodes will be quantized. If empty, all nodes being operators from `operators_to_quantize` will be quantized.
         nodes_to_exclude (`List[str]`, defaults to `[]`):
-            List of the nodes names to exclude when applying quantization.
+            List of the nodes names to exclude when applying quantization. The list of nodes in a model can be found loading the ONNX model through onnx.load, or through visual inspection with [netron](https://github.com/lutzroeder/netron).
         operators_to_quantize (`List[str]`):
-            List of the operators types to quantize. Defaults to all quantizable operators for the given quantization mode and format.
+            List of the operators types to quantize. Defaults to all quantizable operators for the given quantization mode and format. Quantizable operators can be found at https://github.com/microsoft/onnxruntime/blob/main/onnxruntime/python/tools/quantization/registry.py.
         qdq_add_pair_to_weight (`bool`, defaults to `False`):
             By default, floating-point weights are quantized and feed to solely inserted DeQuantizeLinear node.
             If set to True, the floating-point weights will remain and both QuantizeLinear / DeQuantizeLinear nodes
@@ -404,9 +404,9 @@ def arm64(
             nodes_to_quantize (`Optional[List[str]]`, defaults to `None`):
                 Specific nodes to quantize. If `None`, all nodes being operators from `operators_to_quantize` will be quantized.
             nodes_to_exclude (`Optional[List[str]]`, defaults to `None`):
-                Specific nodes to exclude from quantization.
+                Specific nodes to exclude from quantization. The list of nodes in a model can be found loading the ONNX model through onnx.load, or through visual inspection with [netron](https://github.com/lutzroeder/netron).
             operators_to_quantize (`Optional[List[str]]`, defaults to `None`):
-                Type of nodes to perform quantization on. By default, all the quantizable operators will be quantized.
+                Type of nodes to perform quantization on. By default, all the quantizable operators will be quantized. Quantizable operators can be found at https://github.com/microsoft/onnxruntime/blob/main/onnxruntime/python/tools/quantization/registry.py.
         """
         format, mode, operators_to_quantize = default_quantization_parameters(
             is_static, operators_to_quantize=operators_to_quantize
@@ -462,9 +462,9 @@ def avx2(
             nodes_to_quantize (`Optional[List[str]]`, defaults to `None`):
                 Specific nodes to quantize. If `None`, all nodes being operators from `operators_to_quantize` will be quantized.
             nodes_to_exclude (`Optional[List[str]]`, defaults to `None`):
-                Specific nodes to exclude from quantization.
+                Specific nodes to exclude from quantization. The list of nodes in a model can be found loading the ONNX model through onnx.load, or through visual inspection with [netron](https://github.com/lutzroeder/netron).
             operators_to_quantize (`Optional[List[str]]`, defaults to `None`):
-                Type of nodes to perform quantization on. By default, all the quantizable operators will be quantized.
+                Type of nodes to perform quantization on. By default, all the quantizable operators will be quantized. Quantizable operators can be found at https://github.com/microsoft/onnxruntime/blob/main/onnxruntime/python/tools/quantization/registry.py.
         """
         format, mode, operators_to_quantize = default_quantization_parameters(
             is_static, operators_to_quantize=operators_to_quantize
@@ -518,9 +518,9 @@ def avx512(
             nodes_to_quantize (`Optional[List[str]]`, defaults to `None`):
                 Specific nodes to quantize. If `None`, all nodes being operators from `operators_to_quantize` will be quantized.
             nodes_to_exclude (`Optional[List[str]]`, defaults to `None`):
-                Specific nodes to exclude from quantization.
+                Specific nodes to exclude from quantization. The list of nodes in a model can be found loading the ONNX model through onnx.load, or through visual inspection with [netron](https://github.com/lutzroeder/netron).
             operators_to_quantize (`Optional[List[str]]`, defaults to `None`):
-                Type of nodes to perform quantization on. By default, all the quantizable operators will be quantized.
+                Type of nodes to perform quantization on. By default, all the quantizable operators will be quantized. Quantizable operators can be found at https://github.com/microsoft/onnxruntime/blob/main/onnxruntime/python/tools/quantization/registry.py.
         """
         format, mode, operators_to_quantize = default_quantization_parameters(
             is_static, operators_to_quantize=operators_to_quantize
@@ -575,9 +575,9 @@ def avx512_vnni(
             nodes_to_quantize (`Optional[List[str]]`, defaults to `None`):
                 Specific nodes to quantize. If `None`, all nodes being operators from `operators_to_quantize` will be quantized.
             nodes_to_exclude (`Optional[List[str]]`, defaults to `None`):
-                Specific nodes to exclude from quantization.
+                Specific nodes to exclude from quantization. The list of nodes in a model can be found loading the ONNX model through onnx.load, or through visual inspection with [netron](https://github.com/lutzroeder/netron).
             operators_to_quantize (`Optional[List[str]]`, defaults to `None`):
-                Type of nodes to perform quantization on. By default, all the quantizable operators will be quantized.
+                Type of nodes to perform quantization on. By default, all the quantizable operators will be quantized. Quantizable operators can be found at https://github.com/microsoft/onnxruntime/blob/main/onnxruntime/python/tools/quantization/registry.py.
         """
         format, mode, operators_to_quantize = default_quantization_parameters(
             is_static, operators_to_quantize=operators_to_quantize
@@ -615,9 +615,9 @@ def tensorrt(
             nodes_to_quantize (`Optional[List[str]]`, defaults to `None`):
                 Specific nodes to quantize. If `None`, all nodes being operators from `operators_to_quantize` will be quantized.
             nodes_to_exclude (`Optional[List[str]]`, defaults to `None`):
-                Specific nodes to exclude from quantization.
+                Specific nodes to exclude from quantization. The list of nodes in a model can be found loading the ONNX model through onnx.load, or through visual inspection with [netron](https://github.com/lutzroeder/netron).
             operators_to_quantize (`Optional[List[str]]`, defaults to `None`):
-                Type of nodes to perform quantization on. By default, all the quantizable operators will be quantized.
+                Type of nodes to perform quantization on. By default, all the quantizable operators will be quantized. Quantizable operators can be found at https://github.com/microsoft/onnxruntime/blob/main/onnxruntime/python/tools/quantization/registry.py.
         """
         format, mode, operators_to_quantize = default_quantization_parameters(
             is_static=True, operators_to_quantize=operators_to_quantize