Skip to content

Commit

Permalink
GNNE-1714:Feature/add quant strict mode to config (#1116)
Browse files Browse the repository at this point in the history
* add quant sheme strict mode for algo json

* add bias name for fullyconnect

* Apply code-format changes

---------

Co-authored-by: guodongliang <[email protected]>
Co-authored-by: uranus0515 <[email protected]>
  • Loading branch information
3 people authored Oct 31, 2023
1 parent ef3d74f commit 03fd1a3
Show file tree
Hide file tree
Showing 11 changed files with 47 additions and 1 deletion.
4 changes: 4 additions & 0 deletions docs/MixQuant.md
Original file line number Diff line number Diff line change
Expand Up @@ -16,11 +16,13 @@ compiler.use_ptq(ptq_options)

```python
ptq_options.quant_scheme = ""
ptq_options.quant_scheme_strict_mode = False
ptq_options.export_quant_scheme = False
ptq_options.export_weight_range_by_channel = False
```

* **quant_scheme:导入量化参数配置文件的路径**
* **quant_scheme_strict_mode:是否严格按照quant_scheme执行量化**
* **export_quant_scheme:是否导出量化参数配置文件**
* **export_weight_range_by_channel:是否导出** `bychannel`形式的weights量化参数,为了保证量化效果,该参数建议设置为 `True`

Expand All @@ -36,6 +38,7 @@ compile_options.dump_ir = True

```python
ptq_options.quant_scheme = ""
ptq_options.quant_scheme_strict_mode = False
ptq_options.export_quant_scheme = True
ptq_options.export_weight_range_by_channel = True
```
Expand Down Expand Up @@ -108,6 +111,7 @@ ptq_options.export_weight_range_by_channel = True

```python
ptq_options.quant_scheme = "./QuantScheme.json" # path to your 'QuantScheme.json'
ptq_options.quant_scheme_strict_mode = False # Whether to strictly follow quant_scheme for quantification
ptq_options.export_quant_scheme = False
ptq_options.export_weight_range_by_channel = False # whatever
```
Expand Down
1 change: 1 addition & 0 deletions docs/USAGE_v2.md
Original file line number Diff line number Diff line change
Expand Up @@ -228,6 +228,7 @@ PTQTensorOptions类, 用于配置nncase PTQ选项,各属性说明如下
| dump_quant_error | bool || 是否生成量化损失,默认为False。在 `dump_ir=True`时生效 |
| dump_quant_error_symmetric_for_signed | bool || 是否生成使用范围对称的量化损失,默认为True。在 `dump_ir=True`时生效 |
| quant_scheme | string || 量化配置文件路径,默认为“ ”。在 `dump_ir=True`时生效 |
| quant_scheme_strict_mode | bool || 是否严格按照quant_scheme执行量化,默认为False。在 `quant_scheme`不为空时生效 |
| export_quant_scheme | bool || 是否导出量化配置文件,默认为False。在 `dump_ir=True`时生效 |
| export_weight_range_by_channel | bool || 导出量化配置文件时,是否按照channel统计权重的范围,默认为False。在 `dump_ir=True`时生效 |

Expand Down
1 change: 1 addition & 0 deletions docs/USAGE_v2_EN.md
Original file line number Diff line number Diff line change
Expand Up @@ -226,6 +226,7 @@ PTQTensorOptions is used to configure PTQ options. The details of all attributes
| dump_quant_error | bool | N | Specify whether dump quantification error, False by default. The parameters following worked when `dump_ir=True`. |
| dump_quant_error_symmetric_for_signed | bool | N | Specify whether dump quantification error by symmetric for signed number,True by default. |
| quant_scheme | string | N | specify the path of quantification scheme file,"" by default. |
| quant_scheme_strict_mode | bool | N | Specify whether strictly follow quant_scheme for quantification, False by default. |
| export_quant_scheme | bool | N | Specify whether export quantification scheme, False by default. |
| export_weight_range_by_channel | bool | N | Specify whether export weights range by channel, False by default. |

Expand Down
1 change: 1 addition & 0 deletions examples/user_guide/k230_simulate-EN.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -150,6 +150,7 @@
" # mix quantize options\n",
" # more details in docs/MixQuant.md\n",
" ptq_options.quant_scheme = \"\"\n",
" ptq_options.quant_scheme_strict_mode = False\n",
" ptq_options.export_quant_scheme = False\n",
" ptq_options.export_weight_range_by_channel = False\n",
" ############################################\n",
Expand Down
1 change: 1 addition & 0 deletions examples/user_guide/k230_simulate-ZH.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -150,6 +150,7 @@
" # mix quantize options\n",
" # more details in docs/MixQuant.md\n",
" ptq_options.quant_scheme = \"\"\n",
" ptq_options.quant_scheme_strict_mode = False\n",
" ptq_options.export_quant_scheme = False\n",
" ptq_options.export_weight_range_by_channel = False\n",
" ############################################\n",
Expand Down
3 changes: 3 additions & 0 deletions python/nncase/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,7 @@ class PTQTensorOptions:
input_mean: float
input_std: float
quant_scheme: str
quant_scheme_strict_mode: bool
samples_count: int
cali_data: List[RuntimeTensor]

Expand All @@ -83,6 +84,7 @@ def __init__(self) -> None:
self.input_mean: float = 0.5
self.input_std: float = 0.5
self.quant_scheme: str = ""
self.quant_scheme_strict_mode: bool = False
self.samples_count: int = 5
self.cali_data: List[RuntimeTensor] = []

Expand Down Expand Up @@ -244,6 +246,7 @@ def use_ptq(self, ptq_dataset_options: PTQTensorOptions) -> None:

self._quantize_options.use_mix_quant = ptq_dataset_options.use_mix_quant
self._quantize_options.quant_scheme = ptq_dataset_options.quant_scheme
self._quantize_options.quant_scheme_strict_mode = ptq_dataset_options.quant_scheme_strict_mode
self._quantize_options.export_quant_scheme = ptq_dataset_options.export_quant_scheme
self._quantize_options.export_weight_range_by_channel = ptq_dataset_options.export_weight_range_by_channel
self._quantize_options.dump_quant_error = ptq_dataset_options.dump_quant_error
Expand Down
5 changes: 5 additions & 0 deletions python/nncase/native/ffi.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -185,6 +185,11 @@ PYBIND11_MODULE(_nncase, m) {
py::overload_cast<>(&quantize_options::quant_scheme),
py::overload_cast<std::string_view>(
&quantize_options::quant_scheme))
.def_property(
"quant_scheme_strict_mode",
py::overload_cast<>(&quantize_options::quant_scheme_strict_mode),
py::overload_cast<bool>(
&quantize_options::quant_scheme_strict_mode))
.def_property(
"export_quant_scheme",
py::overload_cast<>(&quantize_options::export_quant_scheme),
Expand Down
8 changes: 8 additions & 0 deletions src/Native/include/nncase/compiler.h
Original file line number Diff line number Diff line change
Expand Up @@ -199,6 +199,8 @@ typedef struct {
void (*quantize_options_set_quant_scheme)(
clr_object_handle_t quantize_options, const char *quant_scheme,
size_t quant_scheme_length);
void (*quantize_options_set_quant_scheme_strict_mode)(
clr_object_handle_t quantize_options, bool quant_scheme_strict_mode);
void (*quantize_options_set_export_quant_scheme)(
clr_object_handle_t quantize_options, bool export_quant_scheme);
void (*quantize_options_set_export_weight_range_by_channel)(
Expand Down Expand Up @@ -401,6 +403,12 @@ class quantize_options : public clr_object_base {
obj_.get(), value.data(), value.length());
}

bool quant_scheme_strict_mode() { return false; }
void quant_scheme_strict_mode(bool value) {
nncase_clr_api()->quantize_options_set_quant_scheme_strict_mode(
obj_.get(), value);
}

bool export_quant_scheme() { return false; }
void export_quant_scheme(bool value) {
nncase_clr_api()->quantize_options_set_export_quant_scheme(obj_.get(),
Expand Down
18 changes: 18 additions & 0 deletions src/Nncase.Compiler/Interop/CApi.cs
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,7 @@ public unsafe struct CApiMT
public delegate* unmanaged<IntPtr, FineTuneWeightsMethod, void> QuantOptionsSetFineTuneWeightsMethodPtr;
public delegate* unmanaged<IntPtr, byte, void> QuantOptionsSetUseMixQuantPtr;
public delegate* unmanaged<IntPtr, byte*, nuint, void> QuantOptionsSetQuantSchemePtr;
public delegate* unmanaged<IntPtr, byte, void> QuantOptionsSetQuantSchemeStrictModePtr;
public delegate* unmanaged<IntPtr, byte, void> QuantOptionsSetExportQuantSchemePtr;
public delegate* unmanaged<IntPtr, byte, void> QuantOptionsSetExportWeightRangeByChannelPtr;
public delegate* unmanaged<IntPtr, byte, void> QuantOptionsSetDumpQuantErrorPtr;
Expand Down Expand Up @@ -154,6 +155,7 @@ public static void Initialize(CApiMT* mt)
mt->QuantOptionsSetFineTuneWeightsMethodPtr = &QuantizeOptionsSetFineTuneWeightsMethod;
mt->QuantOptionsSetUseMixQuantPtr = &QuantOptionsSetUseMixQuant;
mt->QuantOptionsSetQuantSchemePtr = &QuantizeOptionsSetQuantScheme;
mt->QuantOptionsSetQuantSchemeStrictModePtr = &QuantizeOptionsSetQuantSchemeStrictMode;
mt->QuantOptionsSetExportQuantSchemePtr = &QuantizeOptionsSetExportQuantScheme;
mt->QuantOptionsSetExportWeightRangeByChannelPtr = &QuantizeOptionsSetExportWeightRangeByChannel;
mt->QuantOptionsSetDumpQuantErrorPtr = &QuantizeOptionsSetDumpQuantError;
Expand Down Expand Up @@ -603,6 +605,22 @@ private static void QuantizeOptionsSetQuantScheme(IntPtr quantizeOptionsHandle,
Get<QuantizeOptions>(quantizeOptionsHandle).QuantScheme = ToString(quantSchemePtr, quantSchemeLength);
}

[UnmanagedCallersOnly]
private static void QuantizeOptionsSetQuantSchemeStrictMode(IntPtr quantizeOptionsHandle, byte quantSchemeStrictMode)
{
switch (quantSchemeStrictMode)
{
case 0:
Get<QuantizeOptions>(quantizeOptionsHandle).QuantSchemeStrictMode = false;
break;
case 1:
Get<QuantizeOptions>(quantizeOptionsHandle).QuantSchemeStrictMode = true;
break;
default:
throw new ArgumentException("Invalid QuantSchemeStrictMode Flag");
}
}

[UnmanagedCallersOnly]
private static void QuantizeOptionsSetExportQuantScheme(IntPtr quantizeOptionsHandle, byte exportQuantScheme)
{
Expand Down
5 changes: 4 additions & 1 deletion src/Nncase.Importer/TFLite/MatMul.cs
Original file line number Diff line number Diff line change
Expand Up @@ -69,9 +69,12 @@ private Expr VisitMatMul(in tflite.Operator op, bool isFullyConnected = true)
List<string> outputNames = new() { GetOutputTensor(op, 0).Name + "_matmul" };
matmul.Metadata.OutputNames = outputNames;
outputNames.Clear();
outputNames.Add(GetOutputTensor(op, 0).Name);
outputNames.Add(GetOutputTensor(op, 0).Name + "_bias");
bias.Metadata.OutputNames = outputNames;
var mm = matmul + bias;
outputNames.Clear();
outputNames.Add(GetOutputTensor(op, 0).Name);
mm.Metadata.OutputNames = outputNames;

return fusedActivationFunction switch
{
Expand Down
1 change: 1 addition & 0 deletions tests/config.toml
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@ finetune_weights_method = 'NoFineTuneWeights'
input_mean = 0.5
input_std = 0.5
quant_scheme = ""
quant_scheme_strict_mode = false

[infer_report_opt]
enabled = false
Expand Down

0 comments on commit 03fd1a3

Please sign in to comment.