diff --git a/docs/conversion_process.jpg b/docs/conversion_process.jpg new file mode 100644 index 000000000..966c0b0c7 Binary files /dev/null and b/docs/conversion_process.jpg differ diff --git a/docs/keras_to_xcore.ipynb b/docs/keras_to_xcore.ipynb new file mode 100644 index 000000000..cabb280ca --- /dev/null +++ b/docs/keras_to_xcore.ipynb @@ -0,0 +1,378 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "0dcf0b88-e2ba-48c2-8a57-ddc7a7dc520a", + "metadata": {}, + "source": [ + "# Process Overview\n", + "\n", + "Start with a Keras model, which is then converted into a tflite model. The tflite model is then run through the xformer compiler to make an xmos optimised tflite file.\n", + "\n", + "We can use the relavent interpreters for each model to verify that given the same input, they both produce the same output. " + ] + }, + { + "cell_type": "markdown", + "id": "87c94f93-a1aa-4ac7-8b69-a4bccd22cae6", + "metadata": {}, + "source": [ + "\"Diagram" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "id": "8729a280-96ca-49aa-941e-ed0bf785c086", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Requirement already satisfied: xmos_ai_tools in /usr/local/anaconda3/lib/python3.8/site-packages (0.1.4)\n", + "Requirement already satisfied: numpy<2.0 in /usr/local/anaconda3/lib/python3.8/site-packages (from xmos_ai_tools) (1.20.1)\n", + "Requirement already satisfied: tensorflow in /usr/local/anaconda3/lib/python3.8/site-packages (2.8.0)\n", + "Requirement already satisfied: libclang>=9.0.1 in /usr/local/anaconda3/lib/python3.8/site-packages (from tensorflow) (13.0.0)\n", + "Requirement already satisfied: numpy>=1.20 in /usr/local/anaconda3/lib/python3.8/site-packages (from tensorflow) (1.20.1)\n", + "Requirement already satisfied: typing-extensions>=3.6.6 in /usr/local/anaconda3/lib/python3.8/site-packages (from tensorflow) (3.7.4.3)\n", + "Requirement already satisfied: tensorboard<2.9,>=2.8 in /usr/local/anaconda3/lib/python3.8/site-packages (from tensorflow) (2.8.0)\n", + "Requirement already satisfied: keras<2.9,>=2.8.0rc0 in /usr/local/anaconda3/lib/python3.8/site-packages (from tensorflow) (2.8.0)\n", + "Requirement already satisfied: opt-einsum>=2.3.2 in /usr/local/anaconda3/lib/python3.8/site-packages (from tensorflow) (3.3.0)\n", + "Requirement already satisfied: termcolor>=1.1.0 in /usr/local/anaconda3/lib/python3.8/site-packages (from tensorflow) (1.1.0)\n", + "Requirement already satisfied: google-pasta>=0.1.1 in /usr/local/anaconda3/lib/python3.8/site-packages (from tensorflow) (0.2.0)\n", + "Requirement already satisfied: six>=1.12.0 in /usr/local/anaconda3/lib/python3.8/site-packages (from tensorflow) (1.15.0)\n", + "Requirement already satisfied: gast>=0.2.1 in /usr/local/anaconda3/lib/python3.8/site-packages (from tensorflow) (0.5.3)\n", + "Requirement already satisfied: wrapt>=1.11.0 in /usr/local/anaconda3/lib/python3.8/site-packages (from tensorflow) (1.12.1)\n", + "Requirement already satisfied: h5py>=2.9.0 in /usr/local/anaconda3/lib/python3.8/site-packages (from tensorflow) (2.10.0)\n", + "Requirement already satisfied: protobuf>=3.9.2 in /usr/local/anaconda3/lib/python3.8/site-packages (from tensorflow) (3.19.4)\n", + "Requirement already satisfied: astunparse>=1.6.0 in /usr/local/anaconda3/lib/python3.8/site-packages (from tensorflow) (1.6.3)\n", + "Requirement already satisfied: tf-estimator-nightly==2.8.0.dev2021122109 in /usr/local/anaconda3/lib/python3.8/site-packages (from tensorflow) (2.8.0.dev2021122109)\n", + "Requirement already satisfied: flatbuffers>=1.12 in /usr/local/anaconda3/lib/python3.8/site-packages (from tensorflow) (2.0)\n", + "Requirement already satisfied: keras-preprocessing>=1.1.1 in /usr/local/anaconda3/lib/python3.8/site-packages (from tensorflow) (1.1.2)\n", + "Requirement already satisfied: grpcio<2.0,>=1.24.3 in /usr/local/anaconda3/lib/python3.8/site-packages (from tensorflow) (1.44.0)\n", + "Requirement already satisfied: setuptools in /usr/local/anaconda3/lib/python3.8/site-packages (from tensorflow) (52.0.0.post20210125)\n", + "Requirement already satisfied: absl-py>=0.4.0 in /usr/local/anaconda3/lib/python3.8/site-packages (from tensorflow) (1.0.0)\n", + "Requirement already satisfied: tensorflow-io-gcs-filesystem>=0.23.1 in /usr/local/anaconda3/lib/python3.8/site-packages (from tensorflow) (0.24.0)\n", + "Requirement already satisfied: wheel<1.0,>=0.23.0 in /usr/local/anaconda3/lib/python3.8/site-packages (from astunparse>=1.6.0->tensorflow) (0.36.2)\n", + "Requirement already satisfied: google-auth-oauthlib<0.5,>=0.4.1 in /usr/local/anaconda3/lib/python3.8/site-packages (from tensorboard<2.9,>=2.8->tensorflow) (0.4.6)\n", + "Requirement already satisfied: tensorboard-data-server<0.7.0,>=0.6.0 in /usr/local/anaconda3/lib/python3.8/site-packages (from tensorboard<2.9,>=2.8->tensorflow) (0.6.1)\n", + "Requirement already satisfied: requests<3,>=2.21.0 in /usr/local/anaconda3/lib/python3.8/site-packages (from tensorboard<2.9,>=2.8->tensorflow) (2.25.1)\n", + "Requirement already satisfied: markdown>=2.6.8 in /usr/local/anaconda3/lib/python3.8/site-packages (from tensorboard<2.9,>=2.8->tensorflow) (3.3.6)\n", + "Requirement already satisfied: tensorboard-plugin-wit>=1.6.0 in /usr/local/anaconda3/lib/python3.8/site-packages (from tensorboard<2.9,>=2.8->tensorflow) (1.8.1)\n", + "Requirement already satisfied: google-auth<3,>=1.6.3 in /usr/local/anaconda3/lib/python3.8/site-packages (from tensorboard<2.9,>=2.8->tensorflow) (2.6.0)\n", + "Requirement already satisfied: werkzeug>=0.11.15 in /usr/local/anaconda3/lib/python3.8/site-packages (from tensorboard<2.9,>=2.8->tensorflow) (1.0.1)\n", + "Requirement already satisfied: pyasn1-modules>=0.2.1 in /usr/local/anaconda3/lib/python3.8/site-packages (from google-auth<3,>=1.6.3->tensorboard<2.9,>=2.8->tensorflow) (0.2.8)\n", + "Requirement already satisfied: cachetools<6.0,>=2.0.0 in /usr/local/anaconda3/lib/python3.8/site-packages (from google-auth<3,>=1.6.3->tensorboard<2.9,>=2.8->tensorflow) (5.0.0)\n", + "Requirement already satisfied: rsa<5,>=3.1.4 in /usr/local/anaconda3/lib/python3.8/site-packages (from google-auth<3,>=1.6.3->tensorboard<2.9,>=2.8->tensorflow) (4.8)\n", + "Requirement already satisfied: requests-oauthlib>=0.7.0 in /usr/local/anaconda3/lib/python3.8/site-packages (from google-auth-oauthlib<0.5,>=0.4.1->tensorboard<2.9,>=2.8->tensorflow) (1.3.1)\n", + "Requirement already satisfied: importlib-metadata>=4.4 in /usr/local/anaconda3/lib/python3.8/site-packages (from markdown>=2.6.8->tensorboard<2.9,>=2.8->tensorflow) (4.11.2)\n", + "Requirement already satisfied: zipp>=0.5 in /usr/local/anaconda3/lib/python3.8/site-packages (from importlib-metadata>=4.4->markdown>=2.6.8->tensorboard<2.9,>=2.8->tensorflow) (3.4.1)\n", + "Requirement already satisfied: pyasn1<0.5.0,>=0.4.6 in /usr/local/anaconda3/lib/python3.8/site-packages (from pyasn1-modules>=0.2.1->google-auth<3,>=1.6.3->tensorboard<2.9,>=2.8->tensorflow) (0.4.8)\n", + "Requirement already satisfied: chardet<5,>=3.0.2 in /usr/local/anaconda3/lib/python3.8/site-packages (from requests<3,>=2.21.0->tensorboard<2.9,>=2.8->tensorflow) (4.0.0)\n", + "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/anaconda3/lib/python3.8/site-packages (from requests<3,>=2.21.0->tensorboard<2.9,>=2.8->tensorflow) (2020.12.5)\n", + "Requirement already satisfied: idna<3,>=2.5 in /usr/local/anaconda3/lib/python3.8/site-packages (from requests<3,>=2.21.0->tensorboard<2.9,>=2.8->tensorflow) (2.10)\n", + "Requirement already satisfied: urllib3<1.27,>=1.21.1 in /usr/local/anaconda3/lib/python3.8/site-packages (from requests<3,>=2.21.0->tensorboard<2.9,>=2.8->tensorflow) (1.26.4)\n", + "Requirement already satisfied: oauthlib>=3.0.0 in /usr/local/anaconda3/lib/python3.8/site-packages (from requests-oauthlib>=0.7.0->google-auth-oauthlib<0.5,>=0.4.1->tensorboard<2.9,>=2.8->tensorflow) (3.2.0)\n" + ] + } + ], + "source": [ + "! pip install xmos_ai_tools\n", + "! pip install tensorflow" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "id": "12967287", + "metadata": {}, + "outputs": [], + "source": [ + "import tensorflow as tf\n", + "import numpy as np\n", + "from xmos_ai_tools import xformer, xcore_tflm_host_interpreter" + ] + }, + { + "cell_type": "markdown", + "id": "b592ad9d-79e0-4304-a005-57eb03bf26ef", + "metadata": {}, + "source": [ + "# Make a Model to convert\n", + "Use Keras to make a model of arbiraty size and shape" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "id": "39cfdc25", + "metadata": {}, + "outputs": [], + "source": [ + "pool_size = (2, 2)\n", + "input_shape = (3, 3, 4)\n", + "model = tf.keras.Sequential([\n", + " tf.keras.layers.AveragePooling2D(pool_size=pool_size, input_shape=input_shape)\n", + "])\n", + "model.compile()" + ] + }, + { + "cell_type": "markdown", + "id": "50d58472-a59c-45c5-b47c-fc8d571b7677", + "metadata": {}, + "source": [ + "## Convert keras model into a tflite model\n", + "The xcore converter cannot optimise a keras model directly to run on xcore devices, so it must first be converted into a tflite file(a flatbuffer)." + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "id": "aafe3198", + "metadata": {}, + "outputs": [], + "source": [ + "converter = tf.lite.TFLiteConverter.from_keras_model(model)" + ] + }, + { + "cell_type": "markdown", + "id": "fa2f5c47", + "metadata": {}, + "source": [ + "### Representitive Dataset\n", + "\n", + "Tensorflow can optimise the converted model to int8 if you pass it a representative dataset. This dataset can be a small subset (around ~100-500 samples) of the training or validation data\n", + "\n", + "The below function randomly gemerates this, but see [the tensorflow ducumentation](https://www.tensorflow.org/lite/performance/post_training_quantization) to see how to do this in practice." + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "id": "fa57eb0a", + "metadata": {}, + "outputs": [], + "source": [ + "# As an example use a random dataset\n", + "def representative_dataset():\n", + " batch_size = 8\n", + " for _ in range(100):\n", + " data = np.random.uniform(-0.1, 0.001, (batch_size, *input_shape))\n", + " yield [data.astype(np.float32)]" + ] + }, + { + "cell_type": "markdown", + "id": "52dca7ac-8a6a-4d9c-b4d3-42d0b3fbb622", + "metadata": {}, + "source": [ + "* **tf.lite.Optimize.DEFAULT:** Default optimization strategy that quantizes model weights. Enhanced optimizations are gained by providing a representative dataset that quantizes biases and activations as well. Converter will do its best to reduce size and latency, while minimizing the loss in accuracy.\n", + "\n", + "* **target_spec.supported_ops:** Import TFLITE ops. [Tensorflow docs](https://www.tensorflow.org/lite/guide/ops_select)" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "id": "7b093742", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "INFO:tensorflow:Assets written to: /var/folders/fg/_pf9q2tj3cl9yfjb392zl7rm0000gn/T/tmpd8ewikm8/assets\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "INFO:tensorflow:Assets written to: /var/folders/fg/_pf9q2tj3cl9yfjb392zl7rm0000gn/T/tmpd8ewikm8/assets\n", + "/usr/local/anaconda3/lib/python3.8/site-packages/tensorflow/lite/python/convert.py:746: UserWarning: Statistics for quantized inputs were expected, but not specified; continuing anyway.\n", + " warnings.warn(\"Statistics for quantized inputs were expected, but not \"\n", + "WARNING:absl:Buffer deduplication procedure will be skipped when flatbuffer library is not properly loaded\n" + ] + } + ], + "source": [ + "# Set up the converter to convert our float model into int8 quantised model\n", + "#explain https://www.tensorflow.org/lite/performance/post_training_quantization\n", + "converter.optimizations = [tf.lite.Optimize.DEFAULT]\n", + "converter.representative_dataset = representative_dataset\n", + "converter.target_spec.supported_ops = [tf.lite.OpsSet.TFLITE_BUILTINS_INT8]\n", + "converter.inference_input_type = tf.int8 \n", + "converter.inference_output_type = tf.int8\n", + "\n", + "tflite_model = converter.convert()\n", + "\n", + "# Save the model.\n", + "tflite_model_path = 'avgpooling2d.tflite'\n", + "with open(tflite_model_path, 'wb') as f:\n", + " f.write(tflite_model)" + ] + }, + { + "cell_type": "markdown", + "id": "379a4e6b-7a17-49c8-b289-0714b315f0bc", + "metadata": {}, + "source": [ + "# Optimise model for XCore\n", + "Use `xcore_conv.convert(input_path, output_path)` to make an xcore optimised version of the model." + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "id": "a26aa710", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0" + ] + }, + "execution_count": 16, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "xcore_optimised_path = 'xcore_model.tflite'\n", + "xformer.convert(tflite_model_path, xcore_optimised_path, None)" + ] + }, + { + "cell_type": "markdown", + "id": "73ee3d58-55da-4c5b-9ec5-46c82321b0a8", + "metadata": {}, + "source": [ + "# Check it worked\n", + "To check if it worked, we can use the interpreters to run the models and make sure that they produce the same output.\n", + "\n", + "For normal tensorflow tflite models, use `tensorflow.lite.Interpreter`. For XCore optimised models, the `XCOREInterpreter` must be used." + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "id": "5ada7955", + "metadata": {}, + "outputs": [], + "source": [ + "tf_interpreter = tf.lite.Interpreter(model_path=tflite_model_path)\n", + "tf_interpreter.allocate_tensors()\n", + "\n", + "tf_input_details = tf_interpreter.get_input_details()\n", + "tf_output_details = tf_interpreter.get_output_details()\n", + "\n", + "tf_input_shape = tf_input_details[0]['shape']\n", + "# Fill with 126 so that xcore can be given same input\n", + "tf_input_data = np.array(np.random.randint(126, 127, tf_input_shape), dtype=np.int8)\n", + "\n", + "tf_interpreter.set_tensor(tf_input_details[0]['index'], tf_input_data)\n", + "\n", + "tf_interpreter.invoke()\n", + "tf_output_data = tf_interpreter.get_tensor(tf_output_details[0]['index'])" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "id": "62d94234", + "metadata": {}, + "outputs": [ + { + "ename": "TypeError", + "evalue": "'module' object is not callable", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mTypeError\u001b[0m Traceback (most recent call last)", + "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mxcore_interpreter\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mxcore_tflm_host_interpreter\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mmodel_path\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mxcore_optimised_path\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 2\u001b[0m \u001b[0mxcore_interpreter\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mallocate_tensors\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 3\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 4\u001b[0m \u001b[0mxcore_input_details\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mxcore_interpreter\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mget_input_details\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 5\u001b[0m \u001b[0mxcore_output_details\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mxcore_interpreter\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mget_output_details\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;31mTypeError\u001b[0m: 'module' object is not callable" + ] + } + ], + "source": [ + "xcore_interpreter = xcore_tflm_host_interpreter(model_path=xcore_optimised_path)\n", + "xcore_interpreter.allocate_tensors()\n", + "\n", + "xcore_input_details = xcore_interpreter.get_input_details()\n", + "xcore_output_details = xcore_interpreter.get_output_details()\n", + "\n", + "xcore_input_shape = xcore_input_details[0]['shape']\n", + "# Fill with 126 so that xcore converter has the same inputs\n", + "xcore_input_data = np.array(np.random.randint(126, 127, xcore_input_shape), dtype=np.int8)\n", + "\n", + "xcore_interpreter.set_tensor(xcore_input_details[0]['index'], xcore_input_data)\n", + "\n", + "xcore_interpreter.invoke()\n", + "xcore_output_data = xcore_interpreter.get_tensor(xcore_output_details[0]['index'])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3f8e306c-f6f1-42d3-b1b3-384142733fbe", + "metadata": {}, + "outputs": [], + "source": [ + "print(\"Both models' output the same result?\")\n", + "print(\"yes\" if np.array_equal(xcore_output_data[0], tf_output_data[0]) else \"no\")\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "5ee7356b-80a7-40fe-a0a7-376444114c13", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "cbb4f40b-0f94-45e7-b398-dda5d002294c", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6ca3b7e5-703a-4818-98f1-f349057d0a26", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "interpreter": { + "hash": "0436a0dea52299ed28644175e220c962eae431d92561f4f402c0c00186dcb06f" + }, + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.8" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +}