diff --git a/notebooks/305-tensorflow-quantization-aware-training/305-tensorflow-quantization-aware-training.ipynb b/notebooks/305-tensorflow-quantization-aware-training/305-tensorflow-quantization-aware-training.ipynb index 41342c1677e..2cfff738bbe 100644 --- a/notebooks/305-tensorflow-quantization-aware-training/305-tensorflow-quantization-aware-training.ipynb +++ b/notebooks/305-tensorflow-quantization-aware-training/305-tensorflow-quantization-aware-training.ipynb @@ -49,6 +49,8 @@ "from nncf.tensorflow.helpers.model_creation import create_compressed_model\n", "from nncf.tensorflow.initialization import register_default_init_args\n", "from nncf.common.logging.logger import set_log_level\n", + "from openvino.runtime import serialize\n", + "from openvino.tools import mo\n", "\n", "set_log_level(logging.ERROR)\n", "\n", @@ -60,10 +62,8 @@ "BASE_MODEL_NAME = \"ResNet-18\"\n", "\n", "fp32_h5_path = Path(MODEL_DIR / (BASE_MODEL_NAME + \"_fp32\")).with_suffix(\".h5\")\n", - "fp32_sm_path = Path(OUTPUT_DIR / (BASE_MODEL_NAME + \"_fp32\"))\n", "fp32_ir_path = Path(OUTPUT_DIR / \"saved_model\").with_suffix(\".xml\")\n", "int8_pb_path = Path(OUTPUT_DIR / (BASE_MODEL_NAME + \"_int8\")).with_suffix(\".pb\")\n", - "int8_pb_name = Path(BASE_MODEL_NAME + \"_int8\").with_suffix(\".pb\")\n", "int8_ir_path = int8_pb_path.with_suffix(\".xml\")\n", "\n", "BATCH_SIZE = 128\n", @@ -222,7 +222,7 @@ "outputs": [], "source": [ "IMG_SHAPE = IMG_SIZE + (3,)\n", - "model = ResNet18(input_shape=IMG_SHAPE)" + "fp32_model = ResNet18(input_shape=IMG_SHAPE)" ] }, { @@ -245,37 +245,22 @@ "outputs": [], "source": [ "# Load the floating-point weights.\n", - "model.load_weights(fp32_h5_path)\n", + "fp32_model.load_weights(fp32_h5_path)\n", "\n", "# Compile the floating-point model.\n", - "model.compile(loss=tf.keras.losses.CategoricalCrossentropy(label_smoothing=0.1),\n", - " metrics=[tf.keras.metrics.CategoricalAccuracy(name='acc@1')])\n", + "fp32_model.compile(\n", + " loss=tf.keras.losses.CategoricalCrossentropy(label_smoothing=0.1),\n", + " metrics=[tf.keras.metrics.CategoricalAccuracy(name='acc@1')]\n", + ")\n", "\n", "# Validate the floating-point model.\n", - "test_loss, acc_fp32 = model.evaluate(validation_dataset,\n", - " callbacks=tf.keras.callbacks.ProgbarLogger(stateful_metrics=['acc@1']))\n", + "test_loss, acc_fp32 = fp32_model.evaluate(\n", + " validation_dataset,\n", + " callbacks=tf.keras.callbacks.ProgbarLogger(stateful_metrics=['acc@1'])\n", + ")\n", "print(f\"\\nAccuracy of FP32 model: {acc_fp32:.3f}\")" ] }, - { - "cell_type": "markdown", - "id": "b80f67d6", - "metadata": {}, - "source": [ - "Save the floating-point model to the saved model, which will be later used for conversion to OpenVINO IR and further performance measurement." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "450cbcb2", - "metadata": {}, - "outputs": [], - "source": [ - "model.save(fp32_sm_path)\n", - "print(f'Absolute path where the model is saved:\\n {fp32_sm_path.resolve()}')" - ] - }, { "cell_type": "markdown", "id": "13b81167", @@ -346,7 +331,7 @@ "metadata": {}, "outputs": [], "source": [ - "compression_ctrl, model = create_compressed_model(model, nncf_config)" + "compression_ctrl, int8_model = create_compressed_model(fp32_model, nncf_config)" ] }, { @@ -365,13 +350,17 @@ "outputs": [], "source": [ "# Compile the INT8 model.\n", - "model.compile(optimizer=tf.keras.optimizers.Adam(lr=LR),\n", - " loss=tf.keras.losses.CategoricalCrossentropy(label_smoothing=0.1),\n", - " metrics=[tf.keras.metrics.CategoricalAccuracy(name='acc@1')])\n", + "int8_model.compile(\n", + " optimizer=tf.keras.optimizers.Adam(lr=LR),\n", + " loss=tf.keras.losses.CategoricalCrossentropy(label_smoothing=0.1),\n", + " metrics=[tf.keras.metrics.CategoricalAccuracy(name='acc@1')]\n", + ")\n", "\n", "# Validate the INT8 model.\n", - "test_loss, test_acc = model.evaluate(validation_dataset,\n", - " callbacks=tf.keras.callbacks.ProgbarLogger(stateful_metrics=['acc@1']))\n", + "test_loss, test_acc = int8_model.evaluate(\n", + " validation_dataset,\n", + " callbacks=tf.keras.callbacks.ProgbarLogger(stateful_metrics=['acc@1'])\n", + ")\n", "print(f\"\\nAccuracy of INT8 model after initialization: {test_acc:.3f}\")" ] }, @@ -393,53 +382,38 @@ "scrolled": true, "tags": [], "test_replace": { - "fit(train_dataset,": "fit(validation_dataset," + "train_dataset,": "validation_dataset," } }, "outputs": [], "source": [ "# Train the INT8 model.\n", - "model.fit(train_dataset,\n", - " epochs=2)\n", + "int8_model.fit(\n", + " train_dataset,\n", + " epochs=2\n", + ")\n", "\n", "# Validate the INT8 model.\n", - "test_loss, acc_int8 = model.evaluate(validation_dataset,\n", - " callbacks=tf.keras.callbacks.ProgbarLogger(stateful_metrics=['acc@1']))\n", + "test_loss, acc_int8 = int8_model.evaluate(\n", + " validation_dataset,\n", + " callbacks=tf.keras.callbacks.ProgbarLogger(stateful_metrics=['acc@1'])\n", + ")\n", "print(f\"\\nAccuracy of INT8 model after fine-tuning: {acc_int8:.3f}\")\n", "print(f\"\\nAccuracy drop of tuned INT8 model over pre-trained FP32 model: {acc_fp32 - acc_int8:.3f}\")" ] }, - { - "cell_type": "markdown", - "id": "7af453ef", - "metadata": {}, - "source": [ - "Save the `INT8` model to the frozen graph (saved model does not work with quantized model for now). Frozen graph will be later used for conversion to OpenVINO IR and further performance measurement." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "6b208b6c", - "metadata": {}, - "outputs": [], - "source": [ - "compression_ctrl.export_model(int8_pb_path, 'frozen_graph')\n", - "print(f'Absolute path where the int8 model is saved:\\n {int8_pb_path.resolve()}')" - ] - }, { "cell_type": "markdown", "id": "1248a563", "metadata": {}, "source": [ - "## Export Frozen Graph Models to OpenVINO Intermediate Representation (IR)\n", + "## Export Models to OpenVINO Intermediate Representation (IR)\n", "\n", - "Use Model Optimizer to convert the Saved Model and Frozen Graph models to OpenVINO IR. The models are saved to the current directory.\n", + "Use Model Optimizer Python API to convert the models to OpenVINO IR.\n", "\n", - "For more information about Model Optimizer, see the [Model Optimizer Developer Guide](https://docs.openvino.ai/latest/openvino_docs_MO_DG_Deep_Learning_Model_Optimizer_DevGuide.html).\n", + "For more information about Model Optimizer, see the [Model Optimizer Developer Guide](https://docs.openvino.ai/latest/openvino_docs_MO_DG_Python_API.html).\n", "\n", - "Executing this command may take a while. There may be some errors or warnings in the output. When Model Optimization successfully exports to OpenVINO IR, the last lines of the output will include: `[ SUCCESS ] Generated IR version 11 model`" + "Executing this command may take a while." ] }, { @@ -449,7 +423,10 @@ "metadata": {}, "outputs": [], "source": [ - "!mo --input_shape=\"[1,64,64,3]\" --input=data --saved_model_dir=$fp32_sm_path --output_dir=$OUTPUT_DIR" + "model_ir_fp32 = mo.convert_model(\n", + " fp32_model,\n", + " input_shape=[1, 64, 64, 3],\n", + ")" ] }, { @@ -459,7 +436,10 @@ "metadata": {}, "outputs": [], "source": [ - "!mo --input_shape=\"[1,64,64,3]\" --input=Placeholder --input_model=$int8_pb_path --output_dir=$OUTPUT_DIR" + "model_ir_int8 = mo.convert_model(\n", + " int8_model,\n", + " input_shape=[1, 64, 64, 3],\n", + ")" ] }, { @@ -483,6 +463,10 @@ }, "outputs": [], "source": [ + "serialize(model_ir_fp32, str(fp32_ir_path))\n", + "serialize(model_ir_int8, str(int8_ir_path))\n", + "\n", + "\n", "def parse_benchmark_output(benchmark_output):\n", " parsed_output = [line for line in benchmark_output if 'FPS' in line]\n", " print(*parsed_output, sep='\\n')\n",