From 71c600d6b24ac109040f8c0e7f6d703634c7242b Mon Sep 17 00:00:00 2001
From: Vyacheslav Bazhenov <v.bazhenov@partner.samsung.com>
Date: Mon, 15 Jul 2024 11:57:53 +0300
Subject: [PATCH] [luci] Introduce Compress weights pass

This commit introduces CopressWeightsPass for Conv2D

ONE-DCO-1.0-Signed-off-by: Vyacheslav Bazhenov <slavikmipt@gmail.com>
---
 Makefile.template                             |    4 +-
 compiler/circle-inspect/CMakeLists.txt        |    8 +-
 compiler/circle-inspect/requires.cmake        |    2 +-
 compiler/circle-operator/CMakeLists.txt       |    8 +-
 compiler/circle-operator/requires.cmake       |    2 +-
 compiler/circle-tensordump/CMakeLists.txt     |    8 +-
 compiler/circle-tensordump/requires.cmake     |    2 +-
 compiler/circle-verify/CMakeLists.txt         |    8 +-
 compiler/circle-verify/requires.cmake         |    2 +-
 compiler/circle2circle/src/Circle2Circle.cpp  |    3 +
 compiler/circlechef/CMakeLists.txt            |    6 +-
 compiler/circlechef/circle/CMakeLists.txt     |    4 +-
 compiler/circlechef/core/CMakeLists.txt       |    2 +-
 compiler/circlechef/requires.cmake            |    2 +-
 compiler/circledump/CMakeLists.txt            |   10 +-
 compiler/circledump/README.md                 |    2 +-
 compiler/circledump/requires.cmake            |    2 +-
 .../include/luci_interpreter/core/Tensor.h    |   15 +
 .../pal/linux/HuffmanDecoder.h                |  357 ++++
 .../luci-interpreter/pal/linux/PALConv2d.h    |  135 +-
 .../src/SimpleMemoryManager.cpp               |   17 +-
 compiler/luci-interpreter/src/core/Tensor.cpp |   26 +-
 .../luci-interpreter/src/kernels/Conv2D.cpp   |  147 +-
 .../luci-interpreter/src/kernels/Conv2D.h     |    2 +
 compiler/luci-interpreter/src/kernels/Utils.h |    3 +-
 .../src/loader/GraphLoader.cpp                |    3 +
 compiler/luci-pass-value-py-test/test.lst     |    1 +
 compiler/luci/export/CMakeLists.txt           |    4 +-
 .../luci/export/src/CircleExporterUtils.cpp   |   15 +
 .../luci/export/src/CircleExporterUtils.h     |    1 +
 .../luci/export/src/CircleTensorExporter.cpp  |   28 +-
 compiler/luci/import/CMakeLists.txt           |    4 +-
 .../include/luci/Import/CircleImporterUtils.h |   33 +
 .../luci/import/src/CircleImporterUtils.cpp   |   36 +
 compiler/luci/import/src/CircleReader.cpp     |    6 +
 .../luci/import/src/Nodes/CircleConst.cpp     |    2 +-
 .../luci/import/src/Nodes/CircleConv2D.cpp    |    1 +
 .../include/luci/IR/AttrWeightCompression.h   |   33 +
 .../lang/include/luci/IR/Nodes/CircleConst.h  |    5 +
 compiler/luci/lang/src/Nodes/CircleConst.cpp  |    3 +
 compiler/luci/partition/CMakeLists.txt        |    2 +-
 .../luci/pass/include/luci/CircleOptimizer.h  |    1 +
 .../include/luci/Pass/CompressWeightsPass.h   |   39 +
 compiler/luci/pass/src/CircleOptimizer.cpp    |    8 +-
 .../luci/pass/src/CompressWeightsPass.cpp     |  109 ++
 .../luci/pass/src/helpers/HuffmanDecoder.h    |  356 ++++
 .../luci/pass/src/helpers/HuffmanEncoder.h    |  207 ++
 compiler/luci/requires.cmake                  |    2 +-
 compiler/mio-circle09/CMakeLists.txt          |   52 +
 compiler/mio-circle09/README.md               |    3 +
 compiler/mio-circle09/example.cpp             |   41 +
 .../mio-circle09/include/mio_circle/Helper.h  |   54 +
 .../mio-circle09/include/mio_circle/Reader.h  |  103 +
 compiler/mio-circle09/src/Helper.cpp          |  110 ++
 compiler/mio-circle09/src/Helper.test.cpp     |  153 ++
 compiler/mio-circle09/src/Reader.cpp          |  222 +++
 compiler/mio-circle09/src/Reader.test.cpp     |   60 +
 compiler/pics/CMakeLists.txt                  |    2 +-
 compiler/pics/requires.cmake                  |    2 +-
 compiler/tflite2circle/CMakeLists.txt         |    4 +-
 compiler/tflite2circle/requires.cmake         |    2 +-
 infra/nncc/Makefile.arm32                     |    4 +-
 infra/packaging/preset/20230907               |    2 +-
 infra/packaging/preset/20230907_windows       |    2 +-
 infra/scripts/compiler_modules.sh             |    4 +-
 packaging/nnfw.spec                           |    4 +-
 res/CircleSchema/0.9/circle_schema.fbs        | 1705 +++++++++++++++++
 67 files changed, 4126 insertions(+), 79 deletions(-)
 create mode 100644 compiler/luci-interpreter/pal/linux/HuffmanDecoder.h
 create mode 100644 compiler/luci/import/include/luci/Import/CircleImporterUtils.h
 create mode 100644 compiler/luci/import/src/CircleImporterUtils.cpp
 create mode 100644 compiler/luci/lang/include/luci/IR/AttrWeightCompression.h
 create mode 100644 compiler/luci/pass/include/luci/Pass/CompressWeightsPass.h
 create mode 100644 compiler/luci/pass/src/CompressWeightsPass.cpp
 create mode 100644 compiler/luci/pass/src/helpers/HuffmanDecoder.h
 create mode 100644 compiler/luci/pass/src/helpers/HuffmanEncoder.h
 create mode 100644 compiler/mio-circle09/CMakeLists.txt
 create mode 100644 compiler/mio-circle09/README.md
 create mode 100644 compiler/mio-circle09/example.cpp
 create mode 100644 compiler/mio-circle09/include/mio_circle/Helper.h
 create mode 100644 compiler/mio-circle09/include/mio_circle/Reader.h
 create mode 100644 compiler/mio-circle09/src/Helper.cpp
 create mode 100644 compiler/mio-circle09/src/Helper.test.cpp
 create mode 100644 compiler/mio-circle09/src/Reader.cpp
 create mode 100644 compiler/mio-circle09/src/Reader.test.cpp
 create mode 100644 res/CircleSchema/0.9/circle_schema.fbs

diff --git a/Makefile.template b/Makefile.template
index 6e0c29590c7..d16aa3d274b 100644
--- a/Makefile.template
+++ b/Makefile.template
@@ -159,7 +159,7 @@ prepare_nncc_internal: $(WORKSPACE)
 ifeq (,$(findstring android,$(TARGET_OS)))
 	EXTERNAL_FLATC=$(BUILDTOOL_PATH)/bin/flatc ./nncc configure -DBUILD_GTEST=OFF -DENABLE_TEST=OFF -DEXTERNALS_BUILD_THREADS=$(NPROCS) -DCMAKE_BUILD_TYPE=$(BUILD_TYPE) \
 		-DCMAKE_INSTALL_PREFIX=$(OVERLAY_FOLDER) \
-		-DBUILD_WHITELIST="luci;foder;pepper-csv2vec;loco;locop;logo;logo-core;mio-circle08;luci-compute;oops;hermes;hermes-std;angkor;pp;pepper-strcast;pepper-str" \
+		-DBUILD_WHITELIST="luci;foder;pepper-csv2vec;loco;locop;logo;logo-core;mio-circle09;luci-compute;oops;hermes;hermes-std;angkor;pp;pepper-strcast;pepper-str" \
 		$(OPTIONS_NNCC)
 	./nncc build -j$(NPROCS)
 	cmake --install $(NNCC_FOLDER) $(INSTALL_OPTIONS)
@@ -171,7 +171,7 @@ ifeq (,$(findstring android,$(TARGET_OS)))
 	@cp compiler/angkor/include/nncc/core/ADT/tensor/Index.h ${OVERLAY_FOLDER}/include/nncc/core/ADT/tensor
 	@cp compiler/oops/include/oops/InternalExn.h ${OVERLAY_FOLDER}/include/oops
 	@cp compiler/luci/lang/include/luci/IR/CircleNodes.lst ${OVERLAY_FOLDER}/include/luci/IR
-	@cp ${NNCC_WORKSPACE}/compiler/mio-circle08/gen/mio/circle/schema_generated.h ${OVERLAY_FOLDER}/include/mio/circle
+	@cp ${NNCC_WORKSPACE}/compiler/mio-circle09/gen/mio/circle/schema_generated.h ${OVERLAY_FOLDER}/include/mio/circle
 	@cp -r ${NNCC_WORKSPACE}/overlay/include/flatbuffers ${OVERLAY_FOLDER}/include
 	@echo "Done prepare-nncc"
 endif
diff --git a/compiler/circle-inspect/CMakeLists.txt b/compiler/circle-inspect/CMakeLists.txt
index 76e65ddc6a8..857114d2c72 100644
--- a/compiler/circle-inspect/CMakeLists.txt
+++ b/compiler/circle-inspect/CMakeLists.txt
@@ -1,6 +1,6 @@
-if(NOT TARGET mio_circle08)
+if(NOT TARGET mio_circle09)
   return()
-endif(NOT TARGET mio_circle08)
+endif(NOT TARGET mio_circle09)
 
 set(DRIVER "driver/Driver.cpp")
 
@@ -10,6 +10,6 @@ add_executable(circle-inspect ${DRIVER} ${SOURCES})
 target_include_directories(circle-inspect PRIVATE src)
 target_link_libraries(circle-inspect arser)
 target_link_libraries(circle-inspect foder)
-target_link_libraries(circle-inspect mio_circle08)
-target_link_libraries(circle-inspect mio_circle08_helper)
+target_link_libraries(circle-inspect mio_circle09)
+target_link_libraries(circle-inspect mio_circle09_helper)
 target_link_libraries(circle-inspect safemain)
diff --git a/compiler/circle-inspect/requires.cmake b/compiler/circle-inspect/requires.cmake
index 8a57c8f1123..b157872cee0 100644
--- a/compiler/circle-inspect/requires.cmake
+++ b/compiler/circle-inspect/requires.cmake
@@ -1,4 +1,4 @@
 require("arser")
 require("foder")
-require("mio-circle08")
+require("mio-circle09")
 require("safemain")
diff --git a/compiler/circle-operator/CMakeLists.txt b/compiler/circle-operator/CMakeLists.txt
index a13e76eb86b..492ad2da991 100644
--- a/compiler/circle-operator/CMakeLists.txt
+++ b/compiler/circle-operator/CMakeLists.txt
@@ -1,6 +1,6 @@
-if(NOT TARGET mio_circle08)
+if(NOT TARGET mio_circle09)
   return()
-endif(NOT TARGET mio_circle08)
+endif(NOT TARGET mio_circle09)
 
 set(DRIVER "driver/Driver.cpp")
 
@@ -10,8 +10,8 @@ add_executable(circle-operator ${DRIVER} ${SOURCES})
 target_include_directories(circle-operator PRIVATE src)
 target_link_libraries(circle-operator arser)
 target_link_libraries(circle-operator foder)
-target_link_libraries(circle-operator mio_circle08)
-target_link_libraries(circle-operator mio_circle08_helper)
+target_link_libraries(circle-operator mio_circle09)
+target_link_libraries(circle-operator mio_circle09_helper)
 target_link_libraries(circle-operator safemain)
 
 install(TARGETS circle-operator DESTINATION bin)
diff --git a/compiler/circle-operator/requires.cmake b/compiler/circle-operator/requires.cmake
index 8a57c8f1123..b157872cee0 100644
--- a/compiler/circle-operator/requires.cmake
+++ b/compiler/circle-operator/requires.cmake
@@ -1,4 +1,4 @@
 require("arser")
 require("foder")
-require("mio-circle08")
+require("mio-circle09")
 require("safemain")
diff --git a/compiler/circle-tensordump/CMakeLists.txt b/compiler/circle-tensordump/CMakeLists.txt
index c65f634e8d2..04d0a9f2495 100644
--- a/compiler/circle-tensordump/CMakeLists.txt
+++ b/compiler/circle-tensordump/CMakeLists.txt
@@ -1,6 +1,6 @@
-if(NOT TARGET mio_circle08)
+if(NOT TARGET mio_circle09)
   return()
-endif(NOT TARGET mio_circle08)
+endif(NOT TARGET mio_circle09)
 
 nnas_find_package(HDF5 COMPONENTS STATIC QUIET)
 
@@ -19,8 +19,8 @@ target_include_directories(circle-tensordump PRIVATE ${HDF5_INCLUDE_DIRS})
 target_link_libraries(circle-tensordump PRIVATE ${HDF5_CXX_LIBRARIES})
 target_link_libraries(circle-tensordump PRIVATE arser)
 target_link_libraries(circle-tensordump PRIVATE foder)
-target_link_libraries(circle-tensordump PRIVATE mio_circle08)
-target_link_libraries(circle-tensordump PRIVATE mio_circle08_helper)
+target_link_libraries(circle-tensordump PRIVATE mio_circle09)
+target_link_libraries(circle-tensordump PRIVATE mio_circle09_helper)
 target_link_libraries(circle-tensordump PRIVATE safemain)
 
 install(TARGETS circle-tensordump DESTINATION bin)
diff --git a/compiler/circle-tensordump/requires.cmake b/compiler/circle-tensordump/requires.cmake
index 8a57c8f1123..b157872cee0 100644
--- a/compiler/circle-tensordump/requires.cmake
+++ b/compiler/circle-tensordump/requires.cmake
@@ -1,4 +1,4 @@
 require("arser")
 require("foder")
-require("mio-circle08")
+require("mio-circle09")
 require("safemain")
diff --git a/compiler/circle-verify/CMakeLists.txt b/compiler/circle-verify/CMakeLists.txt
index 3ccdd0306aa..12909d65e57 100644
--- a/compiler/circle-verify/CMakeLists.txt
+++ b/compiler/circle-verify/CMakeLists.txt
@@ -1,14 +1,14 @@
-if(NOT TARGET mio_circle08)
-  message(STATUS "Skip circle-verify: mio_circle08 not found")
+if(NOT TARGET mio_circle09)
+  message(STATUS "Skip circle-verify: mio_circle09 not found")
   return()
-endif(NOT TARGET mio_circle08)
+endif(NOT TARGET mio_circle09)
 
 file(GLOB_RECURSE SOURCES "src/*.cpp")
 
 add_executable(circle-verify ${SOURCES})
 target_include_directories(circle-verify PRIVATE src)
 target_link_libraries(circle-verify arser)
-target_link_libraries(circle-verify mio_circle08)
+target_link_libraries(circle-verify mio_circle09)
 target_link_libraries(circle-verify safemain)
 target_link_libraries(circle-verify cwrap)
 target_link_libraries(circle-verify foder)
diff --git a/compiler/circle-verify/requires.cmake b/compiler/circle-verify/requires.cmake
index d382ef9761d..4c4f0fca4d1 100644
--- a/compiler/circle-verify/requires.cmake
+++ b/compiler/circle-verify/requires.cmake
@@ -1,5 +1,5 @@
 require("arser")
-require("mio-circle08")
+require("mio-circle09")
 require("safemain")
 require("cwrap")
 require("foder")
diff --git a/compiler/circle2circle/src/Circle2Circle.cpp b/compiler/circle2circle/src/Circle2Circle.cpp
index c32060bd8af..33f55f082d6 100644
--- a/compiler/circle2circle/src/Circle2Circle.cpp
+++ b/compiler/circle2circle/src/Circle2Circle.cpp
@@ -191,6 +191,8 @@ int entry(int argc, char **argv)
              "This will convert single input Transpose to Reshape");
   add_switch(arser, "--expand_broadcast_const", "This will expand broadcastable constant inputs");
   add_switch(arser, "--unroll_unidirseqlstm", "Unroll UnidirectionalSequenceLSTM operator.");
+  add_switch(arser, "--compress_weights_huffman",
+             "Loseless weights compression with Huffman encoding.");
   add_switch(arser, "--convert_nchw_to_nhwc",
              "Experimental: This will convert NCHW operators to NHWC under the assumption that "
              "input model is NCHW.");
@@ -340,6 +342,7 @@ int entry(int argc, char **argv)
   option_str_to_enum["decompose_softmax"] = Algorithms::DecomposeSoftmaxPass;
   option_str_to_enum["expand_broadcast_const"] = Algorithms::ExpandBroadcastConst;
   option_str_to_enum["unroll_unidirseqlstm"] = Algorithms::UnrollUnidirSeqLSTM;
+  option_str_to_enum["compress_weights_huffman"] = Algorithms::CompressWeightsHuffman;
   // clang-format on
 
   if (arser.get<bool>("--verbose"))
diff --git a/compiler/circlechef/CMakeLists.txt b/compiler/circlechef/CMakeLists.txt
index 18b58a9c17d..4c11cad756f 100644
--- a/compiler/circlechef/CMakeLists.txt
+++ b/compiler/circlechef/CMakeLists.txt
@@ -5,10 +5,10 @@ if(NOT Protobuf_FOUND)
   return()
 endif(NOT Protobuf_FOUND)
 
-if(NOT TARGET mio_circle08)
-  message(STATUS "circlechef: SKIP (missing mio-circle08)")
+if(NOT TARGET mio_circle09)
+  message(STATUS "circlechef: SKIP (missing mio-circle09)")
   return()
-endif(NOT TARGET mio_circle08)
+endif(NOT TARGET mio_circle09)
 
 # Recipe Parser
 add_subdirectory(proto)
diff --git a/compiler/circlechef/circle/CMakeLists.txt b/compiler/circlechef/circle/CMakeLists.txt
index e50d4a64ed5..075f2f0346a 100644
--- a/compiler/circlechef/circle/CMakeLists.txt
+++ b/compiler/circlechef/circle/CMakeLists.txt
@@ -4,7 +4,7 @@ add_library(circlechef_circle STATIC ${SOURCES})
 target_include_directories(circlechef_circle PUBLIC include)
 target_include_directories(circlechef_circle PRIVATE src)
 target_link_libraries(circlechef_circle circlechef_proto)
-target_link_libraries(circlechef_circle mio_circle08)
-target_link_libraries(circlechef_circle mio_circle08_helper)
+target_link_libraries(circlechef_circle mio_circle09)
+target_link_libraries(circlechef_circle mio_circle09_helper)
 target_link_libraries(circlechef_circle cwrap)
 target_link_libraries(circlechef_circle souschef)
diff --git a/compiler/circlechef/core/CMakeLists.txt b/compiler/circlechef/core/CMakeLists.txt
index 073abbdfc4f..48189fc65aa 100644
--- a/compiler/circlechef/core/CMakeLists.txt
+++ b/compiler/circlechef/core/CMakeLists.txt
@@ -7,7 +7,7 @@ target_include_directories(circlechef_core PUBLIC include)
 target_include_directories(circlechef_core PRIVATE src)
 target_link_libraries(circlechef_core PUBLIC circlechef_proto)
 target_link_libraries(circlechef_core PUBLIC circlechef_log)
-target_link_libraries(circlechef_core PUBLIC mio_circle08)
+target_link_libraries(circlechef_core PUBLIC mio_circle09)
 target_link_libraries(circlechef_core PUBLIC souschef)
 target_link_libraries(circlechef_core PRIVATE nncc_coverage)
 
diff --git a/compiler/circlechef/requires.cmake b/compiler/circlechef/requires.cmake
index 77bfddc97ba..8e7f3524fa7 100644
--- a/compiler/circlechef/requires.cmake
+++ b/compiler/circlechef/requires.cmake
@@ -1,6 +1,6 @@
 require("arser")
 require("cwrap")
-require("mio-circle08")
+require("mio-circle09")
 require("safemain")
 require("hermes")
 require("hermes-std")
diff --git a/compiler/circledump/CMakeLists.txt b/compiler/circledump/CMakeLists.txt
index 9945ba0f0e0..9a497df00d8 100644
--- a/compiler/circledump/CMakeLists.txt
+++ b/compiler/circledump/CMakeLists.txt
@@ -1,7 +1,7 @@
-if(NOT TARGET mio_circle08)
-  message(STATUS "Skip circledump: mio_circle08 not found")
+if(NOT TARGET mio_circle09)
+  message(STATUS "Skip circledump: mio_circle09 not found")
   return()
-endif(NOT TARGET mio_circle08)
+endif(NOT TARGET mio_circle09)
 
 set(DRIVER "driver/Driver.cpp")
 
@@ -11,8 +11,8 @@ add_executable(circledump ${DRIVER} ${SOURCES})
 target_include_directories(circledump PRIVATE include)
 target_link_libraries(circledump arser)
 target_link_libraries(circledump foder)
-target_link_libraries(circledump mio_circle08)
-target_link_libraries(circledump mio_circle08_helper)
+target_link_libraries(circledump mio_circle09)
+target_link_libraries(circledump mio_circle09_helper)
 target_link_libraries(circledump safemain)
 
 install(TARGETS circledump DESTINATION bin)
diff --git a/compiler/circledump/README.md b/compiler/circledump/README.md
index 9fa2653006c..972c4432146 100644
--- a/compiler/circledump/README.md
+++ b/compiler/circledump/README.md
@@ -65,6 +65,6 @@ O T(3) ofm
 
 ### Dependency
 
-- mio-circle08
+- mio-circle09
 - safemain
 - FlatBuffers
diff --git a/compiler/circledump/requires.cmake b/compiler/circledump/requires.cmake
index 8a57c8f1123..b157872cee0 100644
--- a/compiler/circledump/requires.cmake
+++ b/compiler/circledump/requires.cmake
@@ -1,4 +1,4 @@
 require("arser")
 require("foder")
-require("mio-circle08")
+require("mio-circle09")
 require("safemain")
diff --git a/compiler/luci-interpreter/include/luci_interpreter/core/Tensor.h b/compiler/luci-interpreter/include/luci_interpreter/core/Tensor.h
index f118ee22c24..91ca85380c4 100644
--- a/compiler/luci-interpreter/include/luci_interpreter/core/Tensor.h
+++ b/compiler/luci-interpreter/include/luci_interpreter/core/Tensor.h
@@ -18,6 +18,7 @@
 #define LUCI_INTERPRETER_CORE_TENSOR_H
 
 #include "luci_interpreter/core/DataType.h"
+#include <luci/IR/AttrWeightCompression.h>
 
 #include <cassert>
 #include <cstddef>
@@ -146,6 +147,8 @@ class Tensor
 
   void resize(const Shape &new_shape);
 
+  void resize(const Shape &new_shape, size_t raw_size);
+
   void set_data_buffer(uint8_t *buffer)
   {
     if (buffer == nullptr)
@@ -173,11 +176,21 @@ class Tensor
 
   void set_offset(int32_t offset) { _offset = offset; }
 
+  luci::CompressionType get_compression() const { return _compression; }
+
+  void set_compression(luci::CompressionType compression) { _compression = compression; }
+
+  size_t get_raw_size(void) const { return _raw_size; }
+  void set_raw_size(size_t size) { _raw_size = size; }
+
 private:
   DataType _element_type;
   Shape _shape;
   AffineQuantization _quantization;
   uint8_t *_data = nullptr;
+  // Used for compressed/sparsed tensors when size != WxHxLxD
+  size_t _raw_size{0};
+
   std::string _name;
   bool _data_allocated = false;
   // Write of tensor is reported to registered Observers only if this tensor is observable
@@ -190,6 +203,8 @@ class Tensor
   // Used by static memory manager.
   // Stores the offset from the beginning of the allocated memory buffer.
   int32_t _offset = -1;
+
+  luci::CompressionType _compression{luci::CompressionType::NONE};
 };
 
 } // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/pal/linux/HuffmanDecoder.h b/compiler/luci-interpreter/pal/linux/HuffmanDecoder.h
new file mode 100644
index 00000000000..6a8dd712b7c
--- /dev/null
+++ b/compiler/luci-interpreter/pal/linux/HuffmanDecoder.h
@@ -0,0 +1,357 @@
+/*
+ * Copyright (c) 2024 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_INTERPRETER_PAL_HUFFMAN_DECODER_H__
+#define __LUCI_INTERPRETER_PAL_HUFFMAN_DECODER_H__
+
+#include <iostream>
+#include <unordered_map>
+#include <vector>
+#include <tuple>
+#include <queue>
+#include <string>
+#include <bitset>
+#include <climits>
+
+namespace luci_interpreter_pal
+{
+
+namespace huffman
+{
+template <typename T> struct Node
+{
+  Node *p_left = nullptr;
+  Node *p_right = nullptr;
+  T data;
+  unsigned int freq;
+};
+
+template <typename T> class HuffmanDecoder
+{
+private:
+  Node<T> *root = nullptr;
+  std::unordered_map<T, std::string> huffmanCode;
+  std::vector<bool> encoded_bitset{};
+  std::size_t nodes_count = 0;
+
+private:
+  Node<T> *allocateNode(T data, unsigned int freq, Node<T> *p_left, Node<T> *p_right)
+  {
+    Node<T> *node = new Node<T>;
+    node->data = data;
+    node->freq = freq;
+    node->p_left = p_left;
+    node->p_right = p_right;
+    nodes_count++;
+    return node;
+  }
+
+  std::string exportHuffmanTreeToString(Node<T> *node)
+  {
+    if (node == nullptr)
+      return "";
+    if (!node->p_left && !node->p_right)
+    {
+      return "0" + std::bitset<sizeof(T) * CHAR_BIT>(node->data).to_string();
+    }
+    std::string tmp = "1";
+    tmp += exportHuffmanTreeToString(node->p_left);
+    tmp += exportHuffmanTreeToString(node->p_right);
+    return tmp;
+  }
+
+  Node<T> *importHuffmanTreeFromBoolVec(std::vector<bool> &vec, size_t &index)
+  {
+    if (vec.empty())
+      return nullptr;
+    if (vec[index])
+    {
+      index++;
+      Node<T> *p_left = importHuffmanTreeFromBoolVec(vec, index);
+      Node<T> *p_right = importHuffmanTreeFromBoolVec(vec, index);
+      return allocateNode(0, 0, p_left, p_right);
+    }
+    else if (vec[index] == false)
+    {
+      index++;
+      T tmp = 0;
+      for (size_t i = 0; i < sizeof(T) * CHAR_BIT; ++i)
+      {
+        if (vec[index++])
+          tmp |= (1 << (sizeof(T) * CHAR_BIT - 1)) >> i;
+      }
+
+      return allocateNode(tmp, 0, nullptr, nullptr);
+    }
+    return nullptr;
+  }
+
+  Node<T> *importHuffmanTreeFromString(std::string &str)
+  {
+
+    if (str.substr(0, 1) == "1")
+    {
+      str = str.substr(1);
+      Node<T> *p_left = importHuffmanTreeFromString(str);
+      Node<T> *p_right = importHuffmanTreeFromString(str);
+      return allocateNode(0, 0, p_left, p_right);
+    }
+    else if (str.substr(0, 1) == "0")
+    {
+      str = str.substr(1);
+      std::bitset<sizeof(T) * CHAR_BIT> tmp(str.substr(0, sizeof(T) * CHAR_BIT));
+      str = str.substr(sizeof(T) * CHAR_BIT);
+      return allocateNode(static_cast<T>(tmp.to_ullong()), 0, nullptr, nullptr);
+    }
+  }
+
+  void buildHuffmanTable(Node<T> *node, const std::string str = "")
+  {
+    if (node == nullptr)
+      return;
+
+    if (!node->p_left && !node->p_right)
+    {
+      huffmanCode[node->data] = str;
+    }
+
+    buildHuffmanTable(node->p_left, str + "0");
+    buildHuffmanTable(node->p_right, str + "1");
+  }
+
+  void decode(Node<T> *node, std::string &str, std::vector<T> &out_vec, size_t &index)
+  {
+    if (node == nullptr)
+    {
+      return;
+    }
+
+    if (!node->p_left && !node->p_right)
+    {
+      out_vec.push_back(node->data);
+      return;
+    }
+
+    if (str.size() == index)
+      return;
+    if (str[index] == '0')
+    {
+      decode(node->p_left, str, out_vec, ++index);
+    }
+    else
+    {
+      decode(node->p_right, str, out_vec, ++index);
+    }
+  }
+
+  struct EncodedTreeAndData
+  {
+    std::vector<bool> tree_vec{};
+    std::vector<bool> data_vec{};
+  };
+
+  EncodedTreeAndData unpackArrayToEncodedTreeAndData(const uint8_t *pack_ptr)
+  {
+    constexpr auto kTreeSizeBytesN = sizeof(size_t);
+    constexpr auto kDataSizeBytesN = sizeof(size_t);
+
+    const std::bitset<CHAR_BIT * kTreeSizeBytesN> tree_size_bitset(
+      *static_cast<const size_t *>(static_cast<const void *>(pack_ptr)));
+    const std::bitset<CHAR_BIT * kDataSizeBytesN> data_size_bitset(
+      *static_cast<const size_t *>(static_cast<const void *>(pack_ptr + kTreeSizeBytesN)));
+
+    const size_t kTreeSizeInBits = static_cast<size_t>(tree_size_bitset.to_ullong());
+    const size_t kDataSizeInBits = static_cast<size_t>(data_size_bitset.to_ullong());
+
+    auto start_pos = kTreeSizeBytesN + kDataSizeBytesN;
+    EncodedTreeAndData tree_and_data;
+
+    const auto kTreeSizeInBytes =
+      kTreeSizeInBits % CHAR_BIT ? kTreeSizeInBits / CHAR_BIT + 1 : kTreeSizeInBits / CHAR_BIT;
+
+    for (size_t i = 0; i < kTreeSizeInBytes; ++i)
+    {
+      const auto kNumOfBits =
+        kTreeSizeInBits - i * CHAR_BIT < CHAR_BIT ? kTreeSizeInBits - i * CHAR_BIT : CHAR_BIT;
+      for (size_t j = 0; j < kNumOfBits; ++j)
+      {
+        if (*(pack_ptr + start_pos + i) & ((1 << 7) >> j))
+          tree_and_data.tree_vec.push_back(true);
+        else
+          tree_and_data.tree_vec.push_back(false);
+      }
+    }
+    const auto kDataSizeInBytes =
+      kDataSizeInBits % CHAR_BIT ? kDataSizeInBits / CHAR_BIT + 1 : kDataSizeInBits / CHAR_BIT;
+    const auto kOffsetInBits = kTreeSizeInBits % CHAR_BIT;
+    start_pos += kOffsetInBits ? kTreeSizeInBytes - 1 : kTreeSizeInBytes;
+
+    for (size_t i = 0; i < kDataSizeInBytes; ++i)
+    {
+      const auto kNumOfBits =
+        kDataSizeInBits - i * CHAR_BIT < CHAR_BIT ? kDataSizeInBits - i * CHAR_BIT : CHAR_BIT;
+      const auto kBitsInFirstByteToRead =
+        kNumOfBits < CHAR_BIT - kOffsetInBits ? kNumOfBits : CHAR_BIT - kOffsetInBits;
+      for (size_t j = kOffsetInBits; j < kOffsetInBits + kBitsInFirstByteToRead; ++j)
+      {
+
+        if (*(pack_ptr + start_pos + i) & ((1 << 7) >> j))
+          tree_and_data.data_vec.push_back(true);
+        else
+          tree_and_data.data_vec.push_back(false);
+      }
+      if (kNumOfBits < CHAR_BIT - kOffsetInBits)
+        break;
+      const auto kBitsLeft = kNumOfBits - (CHAR_BIT - kOffsetInBits) < kOffsetInBits
+                               ? kNumOfBits - (CHAR_BIT - kOffsetInBits)
+                               : kOffsetInBits;
+      for (size_t j = 0; j < kBitsLeft; ++j)
+      {
+
+        if (*(pack_ptr + start_pos + i + 1) & ((1 << 7) >> j))
+          tree_and_data.data_vec.push_back(true);
+        else
+          tree_and_data.data_vec.push_back(false);
+      }
+    }
+    return tree_and_data;
+  }
+
+  EncodedTreeAndData unpackArrayToEncodedTreeAndData(const std::vector<uint8_t> &packed_vec)
+  {
+    constexpr auto kTreeSizeBytesN = sizeof(size_t);
+    constexpr auto kDataSizeBytesN = sizeof(size_t);
+    const uint8_t *pack_ptr = packed_vec.data();
+    const std::bitset<CHAR_BIT * kTreeSizeBytesN> tree_size_bitset(
+      *static_cast<const size_t *>(static_cast<const void *>(pack_ptr)));
+    const std::bitset<CHAR_BIT * kDataSizeBytesN> data_size_bitset(
+      *static_cast<const size_t *>(static_cast<const void *>(pack_ptr + kTreeSizeBytesN)));
+
+    const size_t kTreeSizeInBits = static_cast<size_t>(tree_size_bitset.to_ullong());
+    const size_t kDataSizeInBits = static_cast<size_t>(data_size_bitset.to_ullong());
+
+    auto start_pos = kTreeSizeBytesN + kDataSizeBytesN;
+    EncodedTreeAndData tree_and_data;
+
+    const auto kTreeSizeInBytes =
+      kTreeSizeInBits % CHAR_BIT ? kTreeSizeInBits / CHAR_BIT + 1 : kTreeSizeInBits / CHAR_BIT;
+
+    for (size_t i = 0; i < kTreeSizeInBytes; ++i)
+    {
+      const auto kNumOfBits =
+        kTreeSizeInBits - i * CHAR_BIT < CHAR_BIT ? kTreeSizeInBits - i * CHAR_BIT : CHAR_BIT;
+      for (size_t j = 0; j < kNumOfBits; ++j)
+      {
+        if (*(pack_ptr + start_pos + i) & ((1 << 7) >> j))
+          tree_and_data.data_vec.push_back(true);
+        else
+          tree_and_data.data_vec.push_back(false);
+      }
+    }
+    const auto kDataSizeInBytes =
+      kDataSizeInBits % CHAR_BIT ? kDataSizeInBits / CHAR_BIT + 1 : kDataSizeInBits / CHAR_BIT;
+    const auto kOffsetInBits = kTreeSizeInBits % CHAR_BIT;
+    start_pos += kOffsetInBits ? kTreeSizeInBytes - 1 : kTreeSizeInBytes;
+
+    for (size_t i = 0; i < kDataSizeInBytes; ++i)
+    {
+      const auto kNumOfBits =
+        kDataSizeInBits - i * CHAR_BIT < CHAR_BIT ? kDataSizeInBits - i * CHAR_BIT : CHAR_BIT;
+      const auto kBitsInFirstByteToRead =
+        kNumOfBits < CHAR_BIT - kOffsetInBits ? kNumOfBits : CHAR_BIT - kOffsetInBits;
+      for (size_t j = kOffsetInBits; j < kOffsetInBits + kBitsInFirstByteToRead; ++j)
+      {
+
+        if (*(pack_ptr + start_pos + i) & ((1 << 7) >> j))
+          tree_and_data.data_vec.push_back(true);
+        else
+          tree_and_data.data_vec.push_back(false);
+      }
+      if (kNumOfBits < CHAR_BIT - kOffsetInBits)
+        break;
+      const auto kBitsLeft = kNumOfBits - (CHAR_BIT - kOffsetInBits) < kOffsetInBits
+                               ? kNumOfBits - (CHAR_BIT - kOffsetInBits)
+                               : kOffsetInBits;
+      for (size_t j = 0; j < kBitsLeft; ++j)
+      {
+
+        if (*(pack_ptr + start_pos + i + 1) & ((1 << 7) >> j))
+          tree_and_data.data_vec.push_back(true);
+        else
+          tree_and_data.data_vec.push_back(false);
+      }
+    }
+    return tree_and_data;
+  }
+
+public:
+  void decode(Node<T> *node, std::vector<bool> &vec, T *dst_ptr)
+  {
+    if (node == nullptr)
+    {
+      return;
+    }
+
+    if (!node->p_left && !node->p_right)
+    {
+      *dst_ptr = node->data;
+      return;
+    }
+
+    if (vec.size() == _decode_idx)
+      return;
+    if (vec[_decode_idx] == false)
+    {
+      ++_decode_idx;
+      decode(node->p_left, vec, dst_ptr);
+    }
+    else
+    {
+      ++_decode_idx;
+      decode(node->p_right, vec, dst_ptr);
+    }
+  }
+
+private:
+  size_t _decode_idx = 0;
+  EncodedTreeAndData _encoded_tree_and_data;
+
+public:
+  void init_decoder(const uint8_t *input)
+  {
+    size_t index = 0;
+    _encoded_tree_and_data = unpackArrayToEncodedTreeAndData(input);
+    root = importHuffmanTreeFromBoolVec(_encoded_tree_and_data.tree_vec, index);
+  }
+
+  void reset_decode_idx(void) { _decode_idx = 0; }
+
+  int decode_n(uint8_t *dst_ptr, size_t num)
+  {
+    size_t bytes_decoded = 0;
+    for (size_t i = 0; i < num && _decode_idx < _encoded_tree_and_data.data_vec.size(); ++i)
+    {
+      decode(root, _encoded_tree_and_data.data_vec, dst_ptr + bytes_decoded);
+      bytes_decoded++;
+    }
+    return bytes_decoded;
+  }
+
+  HuffmanDecoder() = default;
+};
+} // namespace huffman
+} // namespace luci_interpreter_pal
+#endif // __LUCI_INTERPRETER_PAL_HUFFMAN_DECODER_H__
diff --git a/compiler/luci-interpreter/pal/linux/PALConv2d.h b/compiler/luci-interpreter/pal/linux/PALConv2d.h
index 0ce83fc6e35..4d0f3a37774 100644
--- a/compiler/luci-interpreter/pal/linux/PALConv2d.h
+++ b/compiler/luci-interpreter/pal/linux/PALConv2d.h
@@ -19,6 +19,7 @@
 
 #include <tensorflow/lite/kernels/internal/optimized/legacy_optimized_ops.h>
 #include <tensorflow/lite/kernels/internal/reference/integer_ops/conv.h>
+#include "HuffmanDecoder.h"
 
 namespace luci_interpreter_pal
 {
@@ -84,6 +85,135 @@ static inline void Conv(const tflite::ConvParams &params, const tflite::RuntimeS
                               scratchpad_data, gemmlowp_context.get());
 }
 
+template <typename T>
+void ConvPerChannelHuffman(const tflite::ConvParams &params, const int32_t *mult,
+                           const int32_t *shifts, const tflite::RuntimeShape &input_shape,
+                           const T *input_data, const tflite::RuntimeShape &filter_shape,
+                           const T *filter_data, const tflite::RuntimeShape &bias_shape,
+                           const int32 *bias_data, const tflite::RuntimeShape &output_shape,
+                           T *output_data, const tflite::RuntimeShape &scratchpad_shape,
+                           T *scratchpad_data)
+{
+  (void)scratchpad_shape;
+  (void)scratchpad_data;
+  // Get parameters.
+  const int32_t input_offset = params.input_offset; // r = s(q - Z)
+  const int stride_width = params.stride_width;
+  const int stride_height = params.stride_height;
+  const int dilation_width_factor = params.dilation_width_factor;
+  const int dilation_height_factor = params.dilation_height_factor;
+  const int pad_width = params.padding_values.width;
+  const int pad_height = params.padding_values.height;
+  const int32_t output_offset = params.output_offset;
+  const int32_t filter_offset = params.weights_offset;
+
+  // Set min and max value of the output.
+  const int32_t output_activation_min = params.quantized_activation_min;
+  const int32_t output_activation_max = params.quantized_activation_max;
+
+  // Consistency check.
+  TFLITE_DCHECK_LE(output_activation_min, output_activation_max);
+  TFLITE_DCHECK_EQ(input_shape.DimensionsCount(), 4);
+  TFLITE_DCHECK_EQ(filter_shape.DimensionsCount(), 4);
+  TFLITE_DCHECK_EQ(output_shape.DimensionsCount(), 4);
+  const int batches = MatchingDim(input_shape, 0, output_shape, 0);
+  const int input_depth = input_shape.Dims(3);
+  const int output_depth = MatchingDim(filter_shape, 0, output_shape, 3);
+  if (bias_data)
+  {
+    TFLITE_DCHECK_EQ(bias_shape.FlatSize(), output_depth);
+  }
+
+  // Check dimensions of the tensors.
+  const int input_height = input_shape.Dims(1);
+  const int input_width = input_shape.Dims(2);
+  const int filter_height = filter_shape.Dims(1);
+  const int filter_width = filter_shape.Dims(2);
+  const int filter_input_depth = filter_shape.Dims(3);
+  const int groups = input_depth / filter_input_depth;
+  TFLITE_DCHECK_EQ(input_depth % filter_input_depth, 0);
+  const int filters_per_group = output_depth / groups;
+  const int output_height = output_shape.Dims(1);
+  const int output_width = output_shape.Dims(2);
+
+  huffman::HuffmanDecoder<uint8_t> decoder;
+  decoder.init_decoder(reinterpret_cast<const uint8_t *>(filter_data));
+  decoder.reset_decode_idx();
+  for (int out_channel = 0; out_channel < output_depth; ++out_channel)
+  {
+    auto group = out_channel / filters_per_group;
+
+    // extract compressed filter
+    decoder.decode_n(reinterpret_cast<uint8_t *>(&scratchpad_data[0]), scratchpad_shape.FlatSize());
+
+    for (int batch = 0; batch < batches; ++batch)
+    {
+      for (int out_y = 0; out_y < output_height; ++out_y)
+      {
+        const int in_y_origin = (out_y * stride_height) - pad_height;
+        for (int out_x = 0; out_x < output_width; ++out_x)
+        {
+          const int in_x_origin = (out_x * stride_width) - pad_width;
+          int32_t acc = 0;
+
+          for (int in_channel = 0; in_channel < filter_input_depth; ++in_channel)
+          {
+            for (int filter_y = 0; filter_y < filter_height; ++filter_y)
+            {
+              const int in_y = in_y_origin + dilation_height_factor * filter_y;
+              for (int filter_x = 0; filter_x < filter_width; ++filter_x)
+              {
+                const int in_x = in_x_origin + dilation_width_factor * filter_x;
+
+                // Zero padding by omitting the areas outside the image.
+                const bool is_point_inside_image =
+                  (in_x >= 0) && (in_x < input_width) && (in_y >= 0) && (in_y < input_height);
+
+                if (!is_point_inside_image)
+                {
+                  continue;
+                }
+
+                int32_t input_val = input_data[Offset(input_shape, batch, in_y, in_x,
+                                                      in_channel + group * filter_input_depth)];
+                int32_t filter_val =
+                  scratchpad_data[(filter_y * filter_height + filter_x) * filter_width +
+                                  in_channel];
+                // Accumulate with 32 bits accumulator.
+                // In the nudging process during model quantization, we force
+                // real value of 0.0 be represented by a quantized value. This
+                // guarantees that the input_offset is a int8_t, even though
+                // it is represented using int32_t. int32_t += int8_t *
+                // (int8_t - int8_t) so the highest value we can get from each
+                // accumulation is [-127, 127] * ([-128, 127] -
+                // [-128, 127]), which is [-32512, 32512]. log2(32512)
+                // = 14.98, which means we can accumulate at least 2^16
+                // multiplications without overflow. The accumulator is
+                // applied to a filter so the accumulation logic will hold as
+                // long as the filter size (filter_y * filter_x * in_channel)
+                // does not exceed 2^16, which is the case in all the models
+                // we have seen so far.
+                // accumulator depth is smaller than 2^16.
+                acc += (filter_val + filter_offset) * (input_val + input_offset);
+              }
+            }
+          }
+
+          if (bias_data)
+          {
+            acc += bias_data[out_channel];
+          }
+          acc = tflite::MultiplyByQuantizedMultiplier(acc, mult[out_channel], shifts[out_channel]);
+          acc += output_offset;
+          acc = std::max(acc, output_activation_min);
+          acc = std::min(acc, output_activation_max);
+          output_data[Offset(output_shape, batch, out_y, out_x, out_channel)] = static_cast<T>(acc);
+        }
+      }
+    }
+  }
+}
+
 static inline void ConvPerChannel(const tflite::ConvParams &params, const int32_t *mult,
                                   const int32_t *shifts, const tflite::RuntimeShape &input_shape,
                                   const int8 *input_data, const tflite::RuntimeShape &filter_shape,
@@ -105,7 +235,8 @@ static inline void SetupScratchpadTensor(luci_interpreter::Tensor *scratchpad,
                                          const tflite::ConvParams &params,
                                          const tflite::RuntimeShape &input_shape,
                                          const tflite::RuntimeShape &filter_shape,
-                                         const tflite::RuntimeShape &output_shape)
+                                         const tflite::RuntimeShape &output_shape,
+                                         bool is_compressed = false)
 {
   const int32_t filter_height = filter_shape.Dims(1);
   const int32_t filter_width = filter_shape.Dims(2);
@@ -117,7 +248,7 @@ static inline void SetupScratchpadTensor(luci_interpreter::Tensor *scratchpad,
   const bool need_non_dilated_scratchpad = params.stride_height != 1 || params.stride_width != 1 ||
                                            filter_height != 1 || filter_width != 1;
   auto _need_scratchpad = input_data_type != luci_interpreter::DataType::S16 &&
-                          (need_dilated_scratchpad || need_non_dilated_scratchpad);
+                          (need_dilated_scratchpad || need_non_dilated_scratchpad || is_compressed);
 
   if (_need_scratchpad)
   {
diff --git a/compiler/luci-interpreter/src/SimpleMemoryManager.cpp b/compiler/luci-interpreter/src/SimpleMemoryManager.cpp
index a39c34a0ad8..bf13b0cc9a8 100644
--- a/compiler/luci-interpreter/src/SimpleMemoryManager.cpp
+++ b/compiler/luci-interpreter/src/SimpleMemoryManager.cpp
@@ -29,12 +29,21 @@ void SimpleMemoryManager::allocate_memory(luci_interpreter::Tensor &tensor)
   {
     release_memory(tensor);
   }
-  const auto element_size = getDataTypeSize(tensor.element_type());
+  size_t bytes_to_allocate = 0;
+  if (tensor.get_raw_size() > 0)
+  {
+    bytes_to_allocate = tensor.get_raw_size();
+  }
+  else
+  {
+    const auto element_size = getDataTypeSize(tensor.element_type());
 
-  // Use large_num_elements to avoid overflow
-  const auto num_elements = tensor.shape().large_num_elements();
+    // Use large_num_elements to avoid overflow
+    const auto num_elements = tensor.shape().large_num_elements();
+    bytes_to_allocate = num_elements * element_size;
+  }
 
-  auto *data = new uint8_t[num_elements * element_size];
+  auto *data = new uint8_t[bytes_to_allocate];
   tensor.set_data_buffer(data);
 }
 
diff --git a/compiler/luci-interpreter/src/core/Tensor.cpp b/compiler/luci-interpreter/src/core/Tensor.cpp
index 3c3c5ffffe8..b7769174e23 100644
--- a/compiler/luci-interpreter/src/core/Tensor.cpp
+++ b/compiler/luci-interpreter/src/core/Tensor.cpp
@@ -45,14 +45,34 @@ void Tensor::writeData(const void *data_ptr, size_t data_size)
 {
   const size_t element_size = getDataTypeSize(element_type());
   const int32_t num_elements = shape().num_elements();
-  if (data_size != num_elements * element_size)
+  if (_raw_size > 0)
   {
-    throw std::invalid_argument("Invalid data size.");
+    if (data_size != _raw_size)
+    {
+      throw std::invalid_argument("Invalid data size.");
+    }
+  }
+  else
+  {
+    if (data_size != num_elements * element_size)
+    {
+      throw std::invalid_argument("Invalid data size.");
+    }
   }
   assert(data_ptr != nullptr);
   std::memcpy(data<void>(), data_ptr, data_size);
 }
 
-void Tensor::resize(const Shape &new_shape) { _shape = new_shape; }
+void Tensor::resize(const Shape &new_shape)
+{
+  _shape = new_shape;
+  _raw_size = 0;
+}
+
+void Tensor::resize(const Shape &new_shape, size_t raw_size)
+{
+  _shape = new_shape;
+  _raw_size = raw_size;
+}
 
 } // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/kernels/Conv2D.cpp b/compiler/luci-interpreter/src/kernels/Conv2D.cpp
index 9aae9da2644..a5377408adc 100644
--- a/compiler/luci-interpreter/src/kernels/Conv2D.cpp
+++ b/compiler/luci-interpreter/src/kernels/Conv2D.cpp
@@ -117,9 +117,10 @@ void Conv2D::configure()
   params.dilation_height_factor = _params.dilation_height_factor;
   params.dilation_width_factor = _params.dilation_width_factor;
   auto scratchpad = getOutputTensors()[1];
+  bool is_compressed = filter()->get_compression() != luci::CompressionType::NONE;
   luci_interpreter_pal::SetupScratchpadTensor(scratchpad, input()->element_type(), params,
                                               getTensorShape(input()), getTensorShape(filter()),
-                                              getTensorShape(output()));
+                                              getTensorShape(output()), is_compressed);
 
   switch (_params.activation)
   {
@@ -145,20 +146,34 @@ void Conv2D::execute() const
       }
       throw std::runtime_error("luci-intp Conv2D(2) Unsupported type.");
     case DataType::U8:
-      if (filter()->scales().size() == 1)
+      if (filter()->get_compression() == luci::CompressionType::HUFFMAN)
       {
-        evalQuantized();
+        evalQuantizedU8PerChannelHuffman();
       }
-      else if (filter()->scales().size() > 1)
+      else
       {
-        LUCI_INTERPRETER_CHECK(filter()->shape().num_dims() == 4);
-        LUCI_INTERPRETER_CHECK(filter()->scales().size() ==
-                               static_cast<size_t>(filter()->shape().dim(0)));
-        evalQuantizedPerChannel();
+        if (filter()->scales().size() == 1)
+        {
+          evalQuantized();
+        }
+        else if (filter()->scales().size() > 1)
+        {
+          LUCI_INTERPRETER_CHECK(filter()->shape().num_dims() == 4);
+          LUCI_INTERPRETER_CHECK(filter()->scales().size() ==
+                                 static_cast<size_t>(filter()->shape().dim(0)));
+          evalQuantizedPerChannel();
+        }
       }
       break;
     case DataType::S8:
-      evalQuantizedS8PerChannel();
+      if (filter()->get_compression() == luci::CompressionType::HUFFMAN)
+      {
+        evalQuantizedS8PerChannelHuffman();
+      }
+      else
+      {
+        evalQuantizedS8PerChannel();
+      }
       break;
     case DataType::S16:
       evalQuantizedS16();
@@ -321,6 +336,120 @@ void Conv2D::evalQuantizedPerChannel() const
   }
 }
 
+// TODO: remove code duplication with S8
+void Conv2D::evalQuantizedU8PerChannelHuffman() const
+{
+  int32_t activation_min{};
+  int32_t activation_max{};
+  calculateActivationRangeQuantized(_params.activation, output(), &activation_min, &activation_max);
+
+  tflite::ConvParams params{};
+  params.padding_values.height = _padding_height;
+  params.padding_values.width = _padding_width;
+  params.stride_height = _params.stride_height;
+  params.stride_width = _params.stride_width;
+  params.dilation_height_factor = _params.dilation_height_factor;
+  params.dilation_width_factor = _params.dilation_width_factor;
+  // The kernel expects filter zero points to be negated.
+  params.input_offset = -input()->zero_point();    // Note the '-'.
+  params.weights_offset = -filter()->zero_point(); // Unused in tflite code
+  params.output_offset = output()->zero_point();
+  params.quantized_activation_min = activation_min;
+  params.quantized_activation_max = activation_max;
+
+  const std::vector<double> effective_output_scales =
+    getQuantizedConvolutionMultiplers(input()->scale(), filter()->scales(), output()->scale());
+
+  std::vector<ChannelQuantMultipliers> quant_multipliers =
+    quantizeMultipliers(effective_output_scales);
+
+  std::vector<int32_t> shifts;
+  std::transform(quant_multipliers.begin(), quant_multipliers.end(), std::back_inserter(shifts),
+                 [](ChannelQuantMultipliers cm) { return cm.shift; });
+  std::vector<int32_t> multipliers;
+  std::transform(quant_multipliers.begin(), quant_multipliers.end(),
+                 std::back_inserter(multipliers),
+                 [](ChannelQuantMultipliers cm) { return cm.multiplier; });
+
+  auto scratchpad = getOutputTensors()[1];
+  uint8_t *scratchpad_data = nullptr;
+
+  // Scratchpad used for decompression
+  const auto filter_shape = getTensorShape(filter());
+  const int filter_height = filter_shape.Dims(1);
+  const int filter_width = filter_shape.Dims(2);
+  const int filter_input_depth = filter_shape.Dims(3);
+  auto scratchpad_shape = Shape({filter_height, filter_width, filter_input_depth});
+
+  if (scratchpad->is_allocatable())
+  {
+    scratchpad->resize(scratchpad_shape);
+    scratchpad_data = scratchpad->data<uint8_t>();
+  }
+  luci_interpreter_pal::ConvPerChannelHuffman<uint8_t>(
+    params, multipliers.data(), shifts.data(), getTensorShape(input()),
+    getTensorData<uint8_t>(input()), getTensorShape(filter()), getTensorData<uint8_t>(filter()),
+    getTensorShape(bias()), getTensorData<int32_t>(bias()), getTensorShape(output()),
+    getTensorData<uint8_t>(output()), getTensorShape(scratchpad), scratchpad_data);
+}
+
+void Conv2D::evalQuantizedS8PerChannelHuffman() const
+{
+  int32_t activation_min{};
+  int32_t activation_max{};
+  calculateActivationRangeQuantized(_params.activation, output(), &activation_min, &activation_max);
+
+  tflite::ConvParams params{};
+  params.padding_values.height = _padding_height;
+  params.padding_values.width = _padding_width;
+  params.stride_height = _params.stride_height;
+  params.stride_width = _params.stride_width;
+  params.dilation_height_factor = _params.dilation_height_factor;
+  params.dilation_width_factor = _params.dilation_width_factor;
+  // The kernel expects filter zero points to be negated.
+  params.input_offset = -input()->zero_point(); // Note the '-'.
+  params.weights_offset = 0;                    // Unused in tflite code
+  params.output_offset = output()->zero_point();
+  params.quantized_activation_min = activation_min;
+  params.quantized_activation_max = activation_max;
+
+  const std::vector<double> effective_output_scales =
+    getQuantizedConvolutionMultiplers(input()->scale(), filter()->scales(), output()->scale());
+
+  std::vector<ChannelQuantMultipliers> quant_multipliers =
+    quantizeMultipliers(effective_output_scales);
+
+  std::vector<int32_t> shifts;
+  std::transform(quant_multipliers.begin(), quant_multipliers.end(), std::back_inserter(shifts),
+                 [](ChannelQuantMultipliers cm) { return cm.shift; });
+  std::vector<int32_t> multipliers;
+  std::transform(quant_multipliers.begin(), quant_multipliers.end(),
+                 std::back_inserter(multipliers),
+                 [](ChannelQuantMultipliers cm) { return cm.multiplier; });
+
+  auto scratchpad = getOutputTensors()[1];
+  int8_t *scratchpad_data = nullptr;
+
+  // Scratchpad used for decompression
+  const auto filter_shape = getTensorShape(filter());
+  const int filter_height = filter_shape.Dims(1);
+  const int filter_width = filter_shape.Dims(2);
+  const int filter_input_depth = filter_shape.Dims(3);
+  auto scratchpad_shape = Shape({filter_height, filter_width, filter_input_depth});
+
+  if (scratchpad->is_allocatable())
+  {
+    scratchpad->resize(scratchpad_shape);
+    scratchpad_data = scratchpad->data<int8_t>();
+  }
+
+  luci_interpreter_pal::ConvPerChannelHuffman<int8_t>(
+    params, multipliers.data(), shifts.data(), getTensorShape(input()),
+    getTensorData<int8_t>(input()), getTensorShape(filter()), getTensorData<int8_t>(filter()),
+    getTensorShape(bias()), getTensorData<int32_t>(bias()), getTensorShape(output()),
+    getTensorData<int8_t>(output()), getTensorShape(scratchpad), scratchpad_data);
+}
+
 void Conv2D::evalQuantizedS8PerChannel() const
 {
   int32_t activation_min{};
diff --git a/compiler/luci-interpreter/src/kernels/Conv2D.h b/compiler/luci-interpreter/src/kernels/Conv2D.h
index 330bf3a2a69..096bd85f4db 100644
--- a/compiler/luci-interpreter/src/kernels/Conv2D.h
+++ b/compiler/luci-interpreter/src/kernels/Conv2D.h
@@ -47,6 +47,8 @@ class Conv2D : public KernelWithParams<Conv2DParams>
   void evalQuantizedPerChannel() const;
   void evalQuantizedS8PerChannel() const;
   void evalQuantizedS16() const;
+  void evalQuantizedS8PerChannelHuffman() const;
+  void evalQuantizedU8PerChannelHuffman() const;
 
 private:
   int32_t _padding_height{};
diff --git a/compiler/luci-interpreter/src/kernels/Utils.h b/compiler/luci-interpreter/src/kernels/Utils.h
index e975585cdf3..422c0b4d7d8 100644
--- a/compiler/luci-interpreter/src/kernels/Utils.h
+++ b/compiler/luci-interpreter/src/kernels/Utils.h
@@ -137,7 +137,8 @@ Shape calculateShapeForBroadcast(const Shape &input1_shape, const Shape &input2_
 inline double getQuantizedConvolutionMultipler(float input_scale, float filter_scale,
                                                float output_scale)
 {
-  const double input_product_scale = static_cast<double>(input_scale * filter_scale);
+  const double input_product_scale =
+    static_cast<double>(static_cast<double>(input_scale) * static_cast<double>(filter_scale));
   LUCI_INTERPRETER_CHECK(input_product_scale >= 0);
   return input_product_scale / static_cast<double>(output_scale);
 }
diff --git a/compiler/luci-interpreter/src/loader/GraphLoader.cpp b/compiler/luci-interpreter/src/loader/GraphLoader.cpp
index cf83713d906..6e1399dd467 100644
--- a/compiler/luci-interpreter/src/loader/GraphLoader.cpp
+++ b/compiler/luci-interpreter/src/loader/GraphLoader.cpp
@@ -243,9 +243,11 @@ void GraphLoader::loadTensors()
       const void *const_data = getNodeData(const_node, &data_size);
       if (const_data != nullptr)
       {
+        tensor->set_raw_size(data_size);
         _memory_manager->allocate_memory(*tensor);
         tensor->writeData(const_data, data_size);
       }
+      tensor->set_compression(const_node->compression());
     }
     else if (const auto *custom_out_node = dynamic_cast<const luci::CircleCustomOut *>(node))
     {
@@ -258,6 +260,7 @@ void GraphLoader::loadTensors()
         const void *const_data = getNodeData(custom_node, &data_size);
         if (const_data != nullptr)
         {
+          tensor->set_raw_size(data_size);
           _memory_manager->allocate_memory(*tensor);
           tensor->writeData(const_data, data_size);
         }
diff --git a/compiler/luci-pass-value-py-test/test.lst b/compiler/luci-pass-value-py-test/test.lst
index ebf84e02660..aeec02dc132 100644
--- a/compiler/luci-pass-value-py-test/test.lst
+++ b/compiler/luci-pass-value-py-test/test.lst
@@ -7,6 +7,7 @@
 
 # eval(Net_Preactivation_BN_000 fuse_preactivation_batchnorm) : value diff exist
 # --> https://github.com/Samsung/ONE/issues/5782
+eval(Conv2D_U8_000 compress_weights_huffman)
 eval(FullyConnected_007 replace_non_const_fc_with_batch_matmul)
 eval(HardSwish_001 decompose_hardswish)
 eval(Net_Add_FloorMod_Gather_000 remove_gather_guard)
diff --git a/compiler/luci/export/CMakeLists.txt b/compiler/luci/export/CMakeLists.txt
index bc10ad24cba..4c21faa10b3 100644
--- a/compiler/luci/export/CMakeLists.txt
+++ b/compiler/luci/export/CMakeLists.txt
@@ -12,7 +12,7 @@ target_include_directories(luci_export PUBLIC include)
 target_link_libraries(luci_export PRIVATE luci_lang)
 target_link_libraries(luci_export PRIVATE luci_service)
 target_link_libraries(luci_export PRIVATE luci_pass)
-target_link_libraries(luci_export PRIVATE mio_circle08)
+target_link_libraries(luci_export PRIVATE mio_circle09)
 target_link_libraries(luci_export PRIVATE luci_env)
 target_link_libraries(luci_export PRIVATE luci_log)
 target_link_libraries(luci_export PRIVATE luci_logex)
@@ -36,6 +36,6 @@ target_include_directories(luci_export_test PRIVATE src)
 target_link_libraries(luci_export_test luci_export)
 target_link_libraries(luci_export_test luci_plan)
 target_link_libraries(luci_export_test luci_lang)
-target_link_libraries(luci_export_test mio_circle08)
+target_link_libraries(luci_export_test mio_circle09)
 target_link_libraries(luci_export_test luci_env)
 target_link_libraries(luci_export_test oops)
diff --git a/compiler/luci/export/src/CircleExporterUtils.cpp b/compiler/luci/export/src/CircleExporterUtils.cpp
index f6e380d7872..13889f17f89 100644
--- a/compiler/luci/export/src/CircleExporterUtils.cpp
+++ b/compiler/luci/export/src/CircleExporterUtils.cpp
@@ -25,6 +25,21 @@
 namespace luci
 {
 
+circle::CompressionType to_circle_compressiontype(luci::CompressionType type)
+{
+  switch (type)
+  {
+    case luci::CompressionType::UNDEFINED:
+    case luci::CompressionType::NONE:
+      return circle::CompressionType_NONE;
+    case luci::CompressionType::HUFFMAN:
+      return circle::CompressionType_HUFFMAN;
+    default:
+      INTERNAL_EXN_V("trying to convert unsupported luci::WeightCompression",
+                     oops::to_uint32(type));
+  }
+}
+
 circle::ActivationFunctionType to_circle_actfunc(luci::FusedActFunc func)
 {
   switch (func)
diff --git a/compiler/luci/export/src/CircleExporterUtils.h b/compiler/luci/export/src/CircleExporterUtils.h
index 83b040753dc..309511ebab9 100644
--- a/compiler/luci/export/src/CircleExporterUtils.h
+++ b/compiler/luci/export/src/CircleExporterUtils.h
@@ -29,6 +29,7 @@
 namespace luci
 {
 
+circle::CompressionType to_circle_compressiontype(luci::CompressionType type);
 circle::ActivationFunctionType to_circle_actfunc(luci::FusedActFunc func);
 circle::TensorType to_circle_tensortype(loco::DataType type);
 circle::MirrorPadMode to_circle_mirrorpadmode(luci::MirrorPadMode mode);
diff --git a/compiler/luci/export/src/CircleTensorExporter.cpp b/compiler/luci/export/src/CircleTensorExporter.cpp
index 57ae160bd54..bae3fbbc458 100644
--- a/compiler/luci/export/src/CircleTensorExporter.cpp
+++ b/compiler/luci/export/src/CircleTensorExporter.cpp
@@ -556,30 +556,48 @@ bool has_same_values(luci::CircleConst *lhs, luci::CircleConst *rhs)
   switch (lhs->dtype())
   {
     case loco::DataType::FLOAT32:
+      if (lhs->size<loco::DataType::FLOAT32>() != rhs->size<loco::DataType::FLOAT32>())
+        return false;
       return has_same_elements<loco::DataType::FLOAT32>(lhs, rhs);
 
     case loco::DataType::S4:
+      if (lhs->size<loco::DataType::S4>() != rhs->size<loco::DataType::S4>())
+        return false;
       return has_same_elements<loco::DataType::S4>(lhs, rhs);
 
     case loco::DataType::S8:
+      if (lhs->size<loco::DataType::S8>() != rhs->size<loco::DataType::S8>())
+        return false;
       return has_same_elements<loco::DataType::S8>(lhs, rhs);
 
     case loco::DataType::S16:
+      if (lhs->size<loco::DataType::S16>() != rhs->size<loco::DataType::S16>())
+        return false;
       return has_same_elements<loco::DataType::S16>(lhs, rhs);
 
     case loco::DataType::S32:
+      if (lhs->size<loco::DataType::S32>() != rhs->size<loco::DataType::S32>())
+        return false;
       return has_same_elements<loco::DataType::S32>(lhs, rhs);
 
     case loco::DataType::S64:
+      if (lhs->size<loco::DataType::S64>() != rhs->size<loco::DataType::S64>())
+        return false;
       return has_same_elements<loco::DataType::S64>(lhs, rhs);
 
     case loco::DataType::U4:
+      if (lhs->size<loco::DataType::U4>() != rhs->size<loco::DataType::U4>())
+        return false;
       return has_same_elements<loco::DataType::U4>(lhs, rhs);
 
     case loco::DataType::U8:
+      if (lhs->size<loco::DataType::U8>() != rhs->size<loco::DataType::U8>())
+        return false;
       return has_same_elements<loco::DataType::U8>(lhs, rhs);
 
     case loco::DataType::BOOL:
+      if (lhs->size<loco::DataType::BOOL>() != rhs->size<loco::DataType::BOOL>())
+        return false;
       return has_same_elements<loco::DataType::BOOL>(lhs, rhs);
 
     default:
@@ -646,8 +664,14 @@ void exportOpDefinedTensor(const CircleTensorInfo &info, FlatBufferBuilder &buil
 
   auto is_variable = info.is_variable();
 
-  auto tensor_offset = CreateTensor(builder, shape_offset, info.dtype(), buffer_id, name_offset,
-                                    quantparam, is_variable, sparsityparam, shape_signature_offset);
+  luci::CircleConst *content = info.content();
+  auto compression_type = circle::CompressionType_NONE;
+  if (content)
+    compression_type = to_circle_compressiontype(info.content()->compression());
+
+  auto tensor_offset =
+    CreateTensor(builder, shape_offset, info.dtype(), buffer_id, name_offset, quantparam,
+                 is_variable, sparsityparam, shape_signature_offset, false, 0, compression_type);
   gd._tensors.push_back(tensor_offset);
 }
 
diff --git a/compiler/luci/import/CMakeLists.txt b/compiler/luci/import/CMakeLists.txt
index 8c1da0e7729..85f96ad9702 100644
--- a/compiler/luci/import/CMakeLists.txt
+++ b/compiler/luci/import/CMakeLists.txt
@@ -12,7 +12,7 @@ target_include_directories(luci_import PUBLIC include)
 target_link_libraries(luci_import PUBLIC luci_lang)
 target_link_libraries(luci_import PUBLIC luci_profile)
 target_link_libraries(luci_import PUBLIC luci_plan)
-target_link_libraries(luci_import PUBLIC mio_circle08)
+target_link_libraries(luci_import PUBLIC mio_circle09)
 target_link_libraries(luci_import PRIVATE luci_env)
 target_link_libraries(luci_import PRIVATE luci_log)
 target_link_libraries(luci_import PRIVATE luci_logex)
@@ -20,7 +20,7 @@ target_link_libraries(luci_import PRIVATE nncc_common)
 target_link_libraries(luci_import PRIVATE locop)
 target_link_libraries(luci_import PRIVATE foder)
 target_link_libraries(luci_import PRIVATE oops)
-target_link_libraries(luci_import PRIVATE mio_circle08_helper)
+target_link_libraries(luci_import PRIVATE mio_circle09_helper)
 install(TARGETS luci_import DESTINATION lib)
 install(DIRECTORY include/ DESTINATION include
         FILES_MATCHING PATTERN "*.h")
diff --git a/compiler/luci/import/include/luci/Import/CircleImporterUtils.h b/compiler/luci/import/include/luci/Import/CircleImporterUtils.h
new file mode 100644
index 00000000000..f96ec210747
--- /dev/null
+++ b/compiler/luci/import/include/luci/Import/CircleImporterUtils.h
@@ -0,0 +1,33 @@
+/*
+ * Copyright (c) 2024 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __CIRCLE_IMPORTER_UTILS_H__
+#define __CIRCLE_IMPORTER_UTILS_H__
+
+#include <luci/IR/CircleNodes.h>
+
+#include <loco.h>
+
+#include <mio/circle/schema_generated.h>
+
+namespace luci
+{
+
+luci::CompressionType from_circle_compressiontype(circle::CompressionType type);
+
+} // namespace luci
+
+#endif // __CIRCLE_IMPORTER_UTILS_H__
diff --git a/compiler/luci/import/src/CircleImporterUtils.cpp b/compiler/luci/import/src/CircleImporterUtils.cpp
new file mode 100644
index 00000000000..7e93799bbd6
--- /dev/null
+++ b/compiler/luci/import/src/CircleImporterUtils.cpp
@@ -0,0 +1,36 @@
+/*
+ * Copyright (c) 2024 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Import/CircleImporterUtils.h"
+
+#include <oops/InternalExn.h>
+
+namespace luci
+{
+luci::CompressionType from_circle_compressiontype(circle::CompressionType type)
+{
+  switch (type)
+  {
+    case circle::CompressionType_NONE:
+      return luci::CompressionType::NONE;
+    case circle::CompressionType_HUFFMAN:
+      return luci::CompressionType::HUFFMAN;
+    default:
+      INTERNAL_EXN_V("trying to convert unsupported luci::WeightCompression",
+                     oops::to_uint32(type));
+  }
+}
+} // namespace luci
diff --git a/compiler/luci/import/src/CircleReader.cpp b/compiler/luci/import/src/CircleReader.cpp
index 392f0c2a5b9..ccf3e0f7e5b 100644
--- a/compiler/luci/import/src/CircleReader.cpp
+++ b/compiler/luci/import/src/CircleReader.cpp
@@ -15,6 +15,7 @@
  */
 
 #include "luci/Import/CircleReader.h"
+#include <luci/Import/CircleImporterUtils.h>
 
 #include <mio_circle/Helper.h>
 
@@ -289,6 +290,11 @@ void copy_tensor_attributes(const circle::Tensor *tensor, CircleNode *node)
     if (sparsityparam)
       node->sparsityparam(std::move(sparsityparam));
   }
+  auto const_node = dynamic_cast<CircleConst *>(node);
+  if (const_node)
+  {
+    const_node->compression(luci::from_circle_compressiontype(tensor->compression_type()));
+  }
 }
 
 std::string fb_string2std_string(const flatbuffers::String *fb_str)
diff --git a/compiler/luci/import/src/Nodes/CircleConst.cpp b/compiler/luci/import/src/Nodes/CircleConst.cpp
index 189f4d897f4..77121a8b339 100644
--- a/compiler/luci/import/src/Nodes/CircleConst.cpp
+++ b/compiler/luci/import/src/Nodes/CircleConst.cpp
@@ -52,7 +52,7 @@ void copy_data(const VectorWrapper<uint8_t> &raw_data, uint32_t num_elements,
   using T = typename loco::DataTypeImpl<DT>::Type;
 
   // TODO calculate the exact buffer size of sparse tensor
-  if (const_node->sparsityparam())
+  if (const_node->sparsityparam() or const_node->compression() != luci::CompressionType::NONE)
   {
     num_elements = raw_data.size() / sizeof(T);
   }
diff --git a/compiler/luci/import/src/Nodes/CircleConv2D.cpp b/compiler/luci/import/src/Nodes/CircleConv2D.cpp
index 8cbecdc003b..a4ab6221b74 100644
--- a/compiler/luci/import/src/Nodes/CircleConv2D.cpp
+++ b/compiler/luci/import/src/Nodes/CircleConv2D.cpp
@@ -14,6 +14,7 @@
  * limitations under the License.
  */
 
+#include "luci/Import/CircleImporterUtils.h"
 #include "luci/Import/Nodes/CircleConv2D.h"
 
 #include <luci/IR/Nodes/CircleConv2D.h>
diff --git a/compiler/luci/lang/include/luci/IR/AttrWeightCompression.h b/compiler/luci/lang/include/luci/IR/AttrWeightCompression.h
new file mode 100644
index 00000000000..e1a83b01908
--- /dev/null
+++ b/compiler/luci/lang/include/luci/IR/AttrWeightCompression.h
@@ -0,0 +1,33 @@
+/*
+ * Copyright (c) 2024 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_IR_ATTRWEIGHTCOMPRESSION_H__
+#define __LUCI_IR_ATTRWEIGHTCOMPRESSION_H__
+
+namespace luci
+{
+
+enum class CompressionType
+{
+  UNDEFINED, // This is not defined by TFLite or Circle. This was added to
+             // prevent programming error.
+  NONE,
+  HUFFMAN
+};
+
+} // namespace luci
+
+#endif // __LUCI_IR_ATTRWEIGHTCOMPRESSION_H__
diff --git a/compiler/luci/lang/include/luci/IR/Nodes/CircleConst.h b/compiler/luci/lang/include/luci/IR/Nodes/CircleConst.h
index 3e9a274e0cd..bdf7631c886 100644
--- a/compiler/luci/lang/include/luci/IR/Nodes/CircleConst.h
+++ b/compiler/luci/lang/include/luci/IR/Nodes/CircleConst.h
@@ -17,6 +17,7 @@
 #ifndef __LUCI_IR_CIRCLECONST_H__
 #define __LUCI_IR_CIRCLECONST_H__
 
+#include "luci/IR/AttrWeightCompression.h"
 #include "luci/IR/CircleNodeDecl.h"
 #include "luci/IR/CircleOpcode.h"
 
@@ -34,6 +35,9 @@ namespace luci
 class CircleConst final : public FixedArityNode<0, CircleNodeImpl<CircleOpcode::CIRCLECONST>>
 {
 public:
+  CompressionType compression(void) const;
+  void compression(CompressionType c);
+
   template <loco::DataType DT> uint32_t size(void) const;
   template <loco::DataType DT> void size(uint32_t size);
   template <loco::DataType DT> const typename loco::DataTypeImpl<DT>::Type &at(uint32_t n) const;
@@ -46,6 +50,7 @@ class CircleConst final : public FixedArityNode<0, CircleNodeImpl<CircleOpcode::
   std::vector<uint8_t> _data;
   // TODO use _data for STRING and remove _strings
   std::vector<std::string> _strings; // for STRING type
+  CompressionType _compression{CompressionType::NONE};
 };
 
 } // namespace luci
diff --git a/compiler/luci/lang/src/Nodes/CircleConst.cpp b/compiler/luci/lang/src/Nodes/CircleConst.cpp
index c17a4e2c36d..f2b3f9c96da 100644
--- a/compiler/luci/lang/src/Nodes/CircleConst.cpp
+++ b/compiler/luci/lang/src/Nodes/CircleConst.cpp
@@ -20,6 +20,9 @@
 
 namespace luci
 {
+CompressionType CircleConst::compression(void) const { return _compression; }
+
+void CircleConst::compression(luci::CompressionType c) { _compression = c; }
 
 template <loco::DataType DT> uint32_t CircleConst::size(void) const
 {
diff --git a/compiler/luci/partition/CMakeLists.txt b/compiler/luci/partition/CMakeLists.txt
index 001194eb6aa..421fb776c06 100644
--- a/compiler/luci/partition/CMakeLists.txt
+++ b/compiler/luci/partition/CMakeLists.txt
@@ -13,7 +13,7 @@ target_link_libraries(luci_partition PUBLIC luci_lang)
 target_link_libraries(luci_partition PRIVATE luci_service)
 target_link_libraries(luci_partition PRIVATE luci_log)
 target_link_libraries(luci_partition PRIVATE luci_logex)
-target_link_libraries(luci_partition PRIVATE mio_circle08)
+target_link_libraries(luci_partition PRIVATE mio_circle09)
 target_link_libraries(luci_partition PRIVATE nncc_common)
 target_link_libraries(luci_partition PRIVATE pepper_csv2vec)
 target_link_libraries(luci_partition PRIVATE oops)
diff --git a/compiler/luci/pass/include/luci/CircleOptimizer.h b/compiler/luci/pass/include/luci/CircleOptimizer.h
index 8a1eb6d4f78..9f7447debd4 100644
--- a/compiler/luci/pass/include/luci/CircleOptimizer.h
+++ b/compiler/luci/pass/include/luci/CircleOptimizer.h
@@ -113,6 +113,7 @@ class CircleOptimizer final
       UnrollUnidirSeqLSTM,
       XpSepActFromTransposeConv,
       RemoveGatherGuard,
+      CompressWeightsHuffman
     };
 
     enum AlgorithmParameters
diff --git a/compiler/luci/pass/include/luci/Pass/CompressWeightsPass.h b/compiler/luci/pass/include/luci/Pass/CompressWeightsPass.h
new file mode 100644
index 00000000000..f9f97791914
--- /dev/null
+++ b/compiler/luci/pass/include/luci/Pass/CompressWeightsPass.h
@@ -0,0 +1,39 @@
+/*
+ * Copyright (c) 2024 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_COMPRESS_WEIGHTS_PASS_H__
+#define __LUCI_COMPRESS_WEIGHTS_PASS_H__
+
+#include <logo/Pass.h>
+
+namespace luci
+{
+
+/**
+ * @brief  Class to generate FC/CONV with compressed weights
+ *
+ * To see the target Op pattern, please visit implementation.
+ */
+struct CompressWeightsPass final : public logo::Pass
+{
+  const char *name(void) const final { return "luci::CompressWeightsPass"; }
+
+  bool run(loco::Graph *g) final;
+};
+
+} // namespace luci
+
+#endif // __LUCI_COMPRESS_WEIGHTS_PASS_H__
diff --git a/compiler/luci/pass/src/CircleOptimizer.cpp b/compiler/luci/pass/src/CircleOptimizer.cpp
index 90060253080..e8b87697cf7 100644
--- a/compiler/luci/pass/src/CircleOptimizer.cpp
+++ b/compiler/luci/pass/src/CircleOptimizer.cpp
@@ -97,6 +97,7 @@
 #include "luci/Pass/DecomposeSoftmaxPass.h"
 #include "luci/Pass/UnrollUnidirectionalSequenceLSTMPass.h"
 #include "luci/Pass/XpSepActFromTransposeConvPass.h"
+#include "luci/Pass/CompressWeightsPass.h"
 // TODO add more passes
 
 #include "luci/Pass/CircleShapeInferencePass.h"
@@ -312,7 +313,10 @@ void CircleOptimizer::optimize(loco::Graph *g) const
   {
     phase.emplace_back(std::make_unique<luci::RemoveRedundantTransposePass>());
   }
-
+  if (_options->query(Options::Algorithm::CompressWeightsHuffman))
+  {
+    phase.emplace_back(std::make_unique<luci::CompressWeightsPass>());
+  }
   // clang-format off
   std::map<Options::Algorithm, std::unique_ptr<logo::Pass> (*)(void)> option_to_pass;
 
@@ -387,7 +391,7 @@ void CircleOptimizer::optimize(loco::Graph *g) const
   option_to_pass[Options::Algorithm::XpSepActFromTransposeConv] = &createPassInstance<luci::XpSepActFromTransposeConvPass>;
   option_to_pass[Options::Algorithm::ForwardReshapeToUnaryOp] = &createPassInstance<luci::ForwardReshapeToUnaryOpPass>;
   option_to_pass[Options::Algorithm::ForwardTransposeOp] = &createPassInstance<luci::ForwardTransposeOpPass>;
-  // clang-format on 
+  // clang-format on
 
   for (auto const &m : option_to_pass)
   {
diff --git a/compiler/luci/pass/src/CompressWeightsPass.cpp b/compiler/luci/pass/src/CompressWeightsPass.cpp
new file mode 100644
index 00000000000..77ef8d96adc
--- /dev/null
+++ b/compiler/luci/pass/src/CompressWeightsPass.cpp
@@ -0,0 +1,109 @@
+/*
+ * Copyright (c) 2024 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci/Pass/CompressWeightsPass.h"
+#include "helpers/HuffmanEncoder.h"
+#include "helpers/NodeFiller.h"
+
+#include <luci/IR/CircleNodes.h>
+#include <luci/Service/Nodes/CircleConst.h>
+
+#include <cmath>
+#include <cassert>
+
+namespace
+{
+
+template <loco::DataType T> class TypeSelector;
+
+template <> class TypeSelector<loco::DataType::U8>
+{
+public:
+  using Type = uint8_t;
+};
+template <> class TypeSelector<loco::DataType::S8>
+{
+public:
+  using Type = int8_t;
+};
+
+template <loco::DataType DT> bool compress_weights_huffman_conv2d(luci::CircleConv2D *conv2d)
+{
+  using T = typename TypeSelector<DT>::Type;
+  assert(conv2d);
+
+  auto weights = loco::must_cast<luci::CircleConst *>(conv2d->filter());
+  if (weights->compression() != luci::CompressionType::NONE)
+    return false;
+
+  luci::huffman::HuffmanEncoder<T> encoder;
+  auto new_weights = luci::clone(weights);
+
+  std::vector<T> tmp_buf(weights->size<DT>());
+
+  for (size_t i = 0; i < weights->size<DT>(); ++i)
+  {
+    tmp_buf[i] = weights->at<DT>(i);
+  }
+
+  std::vector<uint8_t> encoded = encoder.encode(tmp_buf);
+
+  new_weights->dtype(DT);
+  new_weights->size<DT>(encoded.size());
+  new_weights->compression(luci::CompressionType::HUFFMAN);
+
+  for (size_t i = 0; i < new_weights->size<DT>(); ++i)
+  {
+    new_weights->at<DT>(i) = encoded[i];
+  }
+  conv2d->filter(new_weights);
+
+  return true;
+}
+
+} // namespace
+
+namespace luci
+{
+
+bool CompressWeightsPass::run(loco::Graph *g)
+{
+  bool changed = false;
+
+  for (auto node : loco::active_nodes(loco::output_nodes(g)))
+  {
+    auto conv2d = dynamic_cast<luci::CircleConv2D *>(node);
+    if (not conv2d)
+      continue;
+
+    auto filter = loco::must_cast<luci::CircleConst *>(conv2d->filter());
+
+    if (filter->dtype() == loco::DataType::S8)
+    {
+      if (compress_weights_huffman_conv2d<loco::DataType::S8>(conv2d))
+        changed = true;
+    }
+    else if (filter->dtype() == loco::DataType::U8)
+    {
+      if (compress_weights_huffman_conv2d<loco::DataType::U8>(conv2d))
+        changed = true;
+    }
+  }
+
+  return changed;
+}
+
+} // namespace luci
diff --git a/compiler/luci/pass/src/helpers/HuffmanDecoder.h b/compiler/luci/pass/src/helpers/HuffmanDecoder.h
new file mode 100644
index 00000000000..9387d39c1eb
--- /dev/null
+++ b/compiler/luci/pass/src/helpers/HuffmanDecoder.h
@@ -0,0 +1,356 @@
+/*
+ * Copyright (c) 2024 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_PASS_HELPERS_HUFFMAN_DECODER_H__
+#define __LUCI_PASS_HELPERS_HUFFMAN_DECODER_H__
+
+#include <iostream>
+#include <unordered_map>
+#include <vector>
+#include <tuple>
+#include <queue>
+#include <string>
+#include <bitset>
+#include <climits>
+
+namespace luci
+{
+
+namespace huffman
+{
+
+template <typename T> struct Node
+{
+  Node *p_left = nullptr;
+  Node *p_right = nullptr;
+  T data;
+};
+
+template <typename T> class HuffmanDecoder
+{
+private:
+  Node<T> *root = nullptr;
+  std::unordered_map<T, std::string> huffmanCode;
+  std::vector<bool> encoded_bitset{};
+  std::size_t nodes_count = 0;
+
+private:
+  Node<T> *allocateNode(T data, unsigned int freq, Node<T> *p_left, Node<T> *p_right)
+  {
+    Node<T> *node = new Node<T>;
+    node->data = data;
+    node->freq = freq;
+    node->p_left = p_left;
+    node->p_right = p_right;
+    nodes_count++;
+    return node;
+  }
+
+  std::string exportHuffmanTreeToString(Node<T> *node)
+  {
+    if (node == nullptr)
+      return "";
+    if (!node->p_left && !node->p_right)
+    {
+      return "0" + std::bitset<sizeof(T) * CHAR_BIT>(node->data).to_string();
+    }
+    std::string tmp = "1";
+    tmp += exportHuffmanTreeToString(node->p_left);
+    tmp += exportHuffmanTreeToString(node->p_right);
+    return tmp;
+  }
+
+  Node<T> *importHuffmanTreeFromBoolVec(std::vector<bool> &vec, size_t &index)
+  {
+    if (vec.empty())
+      return nullptr;
+    if (vec[index])
+    {
+      index++;
+      Node<T> *p_left = importHuffmanTreeFromBoolVec(vec, index);
+      Node<T> *p_right = importHuffmanTreeFromBoolVec(vec, index);
+      return allocateNode(0, 0, p_left, p_right);
+    }
+    else if (vec[index] == false)
+    {
+      index++;
+      T tmp = 0;
+      for (size_t i = 0; i < sizeof(T) * CHAR_BIT; ++i)
+      {
+        if (vec[index++])
+          tmp |= (1 << (sizeof(T) * CHAR_BIT - 1)) >> i;
+      }
+
+      return allocateNode(tmp, 0, nullptr, nullptr);
+    }
+    return nullptr;
+  }
+
+  Node<T> *importHuffmanTreeFromString(std::string &str)
+  {
+
+    if (str.substr(0, 1) == "1")
+    {
+      str = str.substr(1);
+      Node<T> *p_left = importHuffmanTreeFromString(str);
+      Node<T> *p_right = importHuffmanTreeFromString(str);
+      return allocateNode(0, 0, p_left, p_right);
+    }
+    else if (str.substr(0, 1) == "0")
+    {
+      str = str.substr(1);
+      std::bitset<sizeof(T) * CHAR_BIT> tmp(str.substr(0, sizeof(T) * CHAR_BIT));
+      str = str.substr(sizeof(T) * CHAR_BIT);
+      return allocateNode(static_cast<T>(tmp.to_ullong()), 0, nullptr, nullptr);
+    }
+  }
+
+  void buildHuffmanTable(Node<T> *node, const std::string str = "")
+  {
+    if (node == nullptr)
+      return;
+
+    if (!node->p_left && !node->p_right)
+    {
+      huffmanCode[node->data] = str;
+    }
+
+    buildHuffmanTable(node->p_left, str + "0");
+    buildHuffmanTable(node->p_right, str + "1");
+  }
+
+  void decode(Node<T> *node, std::string &str, std::vector<T> &out_vec, size_t &index)
+  {
+    if (node == nullptr)
+    {
+      return;
+    }
+
+    if (!node->p_left && !node->p_right)
+    {
+      out_vec.push_back(node->data);
+      return;
+    }
+
+    if (str.size() == index)
+      return;
+    if (str[index] == '0')
+    {
+      decode(node->p_left, str, out_vec, ++index);
+    }
+    else
+    {
+      decode(node->p_right, str, out_vec, ++index);
+    }
+  }
+
+  struct EncodedTreeAndData
+  {
+    std::vector<bool> tree_vec{};
+    std::vector<bool> data_vec{};
+  };
+
+  EncodedTreeAndData unpackArrayToEncodedTreeAndData(const uint8_t *pack_ptr)
+  {
+    constexpr auto kTreeSizeBytesN = sizeof(size_t);
+    constexpr auto kDataSizeBytesN = sizeof(size_t);
+
+    const std::bitset<CHAR_BIT * kTreeSizeBytesN> tree_size_bitset(
+      *static_cast<const size_t *>(static_cast<const void *>(pack_ptr)));
+    const std::bitset<CHAR_BIT * kDataSizeBytesN> data_size_bitset(
+      *static_cast<const size_t *>(static_cast<const void *>(pack_ptr + kTreeSizeBytesN)));
+
+    const size_t kTreeSizeInBits = static_cast<size_t>(tree_size_bitset.to_ullong());
+    const size_t kDataSizeInBits = static_cast<size_t>(data_size_bitset.to_ullong());
+
+    auto start_pos = kTreeSizeBytesN + kDataSizeBytesN;
+    EncodedTreeAndData tree_and_data;
+
+    const auto kTreeSizeInBytes =
+      kTreeSizeInBits % CHAR_BIT ? kTreeSizeInBits / CHAR_BIT + 1 : kTreeSizeInBits / CHAR_BIT;
+
+    for (size_t i = 0; i < kTreeSizeInBytes; ++i)
+    {
+      const auto kNumOfBits =
+        kTreeSizeInBits - i * CHAR_BIT < CHAR_BIT ? kTreeSizeInBits - i * CHAR_BIT : CHAR_BIT;
+      for (size_t j = 0; j < kNumOfBits; ++j)
+      {
+        if (*(pack_ptr + start_pos + i) & ((1 << 7) >> j))
+          tree_and_data.tree_vec.push_back(true);
+        else
+          tree_and_data.tree_vec.push_back(false);
+      }
+    }
+    const auto kDataSizeInBytes =
+      kDataSizeInBits % CHAR_BIT ? kDataSizeInBits / CHAR_BIT + 1 : kDataSizeInBits / CHAR_BIT;
+    const auto kOffsetInBits = kTreeSizeInBits % CHAR_BIT;
+    start_pos += kOffsetInBits ? kTreeSizeInBytes - 1 : kTreeSizeInBytes;
+
+    for (size_t i = 0; i < kDataSizeInBytes; ++i)
+    {
+      const auto kNumOfBits =
+        kDataSizeInBits - i * CHAR_BIT < CHAR_BIT ? kDataSizeInBits - i * CHAR_BIT : CHAR_BIT;
+      const auto kBitsInFirstByteToRead =
+        kNumOfBits < CHAR_BIT - kOffsetInBits ? kNumOfBits : CHAR_BIT - kOffsetInBits;
+      for (size_t j = kOffsetInBits; j < kOffsetInBits + kBitsInFirstByteToRead; ++j)
+      {
+        if (*(pack_ptr + start_pos + i) & ((1 << 7) >> j))
+          tree_and_data.data_vec.push_back(true);
+        else
+          tree_and_data.data_vec.push_back(false);
+      }
+      if (kNumOfBits < CHAR_BIT - kOffsetInBits)
+        break;
+      const auto kBitsLeft = kNumOfBits - (CHAR_BIT - kOffsetInBits) < kOffsetInBits
+                               ? kNumOfBits - (CHAR_BIT - kOffsetInBits)
+                               : kOffsetInBits;
+      for (size_t j = 0; j < kBitsLeft; ++j)
+      {
+        if (*(pack_ptr + start_pos + i + 1) & ((1 << 7) >> j))
+          tree_and_data.data_vec.push_back(true);
+        else
+          tree_and_data.data_vec.push_back(false);
+      }
+    }
+    return tree_and_data;
+  }
+
+  EncodedTreeAndData unpackArrayToEncodedTreeAndData(const std::vector<uint8_t> &packed_vec)
+  {
+    constexpr auto kTreeSizeBytesN = sizeof(size_t);
+    constexpr auto kDataSizeBytesN = sizeof(size_t);
+    const uint8_t *pack_ptr = packed_vec.data();
+    const std::bitset<CHAR_BIT * kTreeSizeBytesN> tree_size_bitset(
+      *static_cast<const size_t *>(static_cast<const void *>(pack_ptr)));
+    const std::bitset<CHAR_BIT * kDataSizeBytesN> data_size_bitset(
+      *static_cast<const size_t *>(static_cast<const void *>(pack_ptr + kTreeSizeBytesN)));
+
+    const size_t kTreeSizeInBits = static_cast<size_t>(tree_size_bitset.to_ullong());
+    const size_t kDataSizeInBits = static_cast<size_t>(data_size_bitset.to_ullong());
+
+    auto start_pos = kTreeSizeBytesN + kDataSizeBytesN;
+    EncodedTreeAndData tree_and_data;
+
+    const auto kTreeSizeInBytes =
+      kTreeSizeInBits % CHAR_BIT ? kTreeSizeInBits / CHAR_BIT + 1 : kTreeSizeInBits / CHAR_BIT;
+
+    for (size_t i = 0; i < kTreeSizeInBytes; ++i)
+    {
+      const auto kNumOfBits =
+        kTreeSizeInBits - i * CHAR_BIT < CHAR_BIT ? kTreeSizeInBits - i * CHAR_BIT : CHAR_BIT;
+      for (size_t j = 0; j < kNumOfBits; ++j)
+      {
+        if (*(pack_ptr + start_pos + i) & ((1 << 7) >> j))
+          tree_and_data.data_vec.push_back(true);
+        else
+          tree_and_data.data_vec.push_back(false);
+      }
+    }
+    const auto kDataSizeInBytes =
+      kDataSizeInBits % CHAR_BIT ? kDataSizeInBits / CHAR_BIT + 1 : kDataSizeInBits / CHAR_BIT;
+    const auto kOffsetInBits = kTreeSizeInBits % CHAR_BIT;
+    start_pos += kOffsetInBits ? kTreeSizeInBytes - 1 : kTreeSizeInBytes;
+
+    for (size_t i = 0; i < kDataSizeInBytes; ++i)
+    {
+      const auto kNumOfBits =
+        kDataSizeInBits - i * CHAR_BIT < CHAR_BIT ? kDataSizeInBits - i * CHAR_BIT : CHAR_BIT;
+      const auto kBitsInFirstByteToRead =
+        kNumOfBits < CHAR_BIT - kOffsetInBits ? kNumOfBits : CHAR_BIT - kOffsetInBits;
+      for (size_t j = kOffsetInBits; j < kOffsetInBits + kBitsInFirstByteToRead; ++j)
+      {
+
+        if (*(pack_ptr + start_pos + i) & ((1 << 7) >> j))
+          tree_and_data.data_vec.push_back(true);
+        else
+          tree_and_data.data_vec.push_back(false);
+      }
+      if (kNumOfBits < CHAR_BIT - kOffsetInBits)
+        break;
+      const auto kBitsLeft = kNumOfBits - (CHAR_BIT - kOffsetInBits) < kOffsetInBits
+                               ? kNumOfBits - (CHAR_BIT - kOffsetInBits)
+                               : kOffsetInBits;
+      for (size_t j = 0; j < kBitsLeft; ++j)
+      {
+        if (*(pack_ptr + start_pos + i + 1) & ((1 << 7) >> j))
+          tree_and_data.data_vec.push_back(true);
+        else
+          tree_and_data.data_vec.push_back(false);
+      }
+    }
+    return tree_and_data;
+  }
+
+public:
+  void decode(Node<T> *node, std::vector<bool> &vec, T *dst_ptr)
+  {
+    if (node == nullptr)
+    {
+      return;
+    }
+
+    if (!node->p_left && !node->p_right)
+    {
+      *dst_ptr = node->data;
+      return;
+    }
+
+    if (vec.size() == _decode_idx)
+      return;
+    if (vec[_decode_idx] == false)
+    {
+      ++_decode_idx;
+      decode(node->p_left, vec, dst_ptr);
+    }
+    else
+    {
+      ++_decode_idx;
+      decode(node->p_right, vec, dst_ptr);
+    }
+  }
+
+private:
+  size_t _decode_idx = 0;
+  EncodedTreeAndData _encoded_tree_and_data;
+
+public:
+  void init_decoder(const uint8_t *input)
+  {
+    size_t index = 0;
+    _encoded_tree_and_data = unpackArrayToEncodedTreeAndData(input);
+    root = importHuffmanTreeFromBoolVec(_encoded_tree_and_data.tree_vec, index);
+  }
+
+  void reset_decode_idx(void) { _decode_idx = 0; }
+
+  int decode_n(uint8_t *dst_ptr, size_t num)
+  {
+    size_t bytes_decoded = 0;
+    for (int i = 0; i < num && _decode_idx < _encoded_tree_and_data.data_vec.size(); ++i)
+    {
+      decode(root, _encoded_tree_and_data.data_vec, dst_ptr + bytes_decoded);
+      bytes_decoded++;
+    }
+    return bytes_decoded;
+  }
+
+  HuffmanDecoder() = default;
+};
+
+} // namespace huffman
+} // namespace luci
+
+#endif // __LUCI_PASS_HELPERS_HUFFMAN_DECODER_H__
diff --git a/compiler/luci/pass/src/helpers/HuffmanEncoder.h b/compiler/luci/pass/src/helpers/HuffmanEncoder.h
new file mode 100644
index 00000000000..26e8d3e9c54
--- /dev/null
+++ b/compiler/luci/pass/src/helpers/HuffmanEncoder.h
@@ -0,0 +1,207 @@
+/*
+ * Copyright (c) 2024 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_PASS_HELPERS_HUFFMAN_ENCODER_H__
+#define __LUCI_PASS_HELPERS_HUFFMAN_ENCODER_H__
+
+#include <bitset>
+#include <climits>
+#include <queue>
+#include <string>
+#include <tuple>
+#include <unordered_map>
+#include <vector>
+
+namespace luci
+{
+namespace huffman
+{
+
+// Node of prefix tree
+template <typename T> struct Node
+{
+  std::shared_ptr<Node<T>> p_left;
+  std::shared_ptr<Node<T>> p_right;
+  T data;
+  unsigned int freq;
+};
+
+// Compare functor for priority queue
+template <typename T> struct CompareNodes
+{
+  bool operator()(std::shared_ptr<Node<T>> l, std::shared_ptr<Node<T>> r)
+  {
+    return l->freq > r->freq;
+  }
+};
+
+template <typename T> class HuffmanEncoder
+{
+private:
+  std::unordered_map<T, std::string> _huffman_table;
+
+private:
+  std::shared_ptr<Node<T>> allocateNode(T data, unsigned int freq, std::shared_ptr<Node<T>> p_left,
+                                        std::shared_ptr<Node<T>> p_right)
+  {
+    std::shared_ptr<Node<T>> node = std::make_unique<Node<T>>();
+    node->data = data;
+    node->freq = freq;
+    node->p_left = p_left;
+    node->p_right = p_right;
+    return node;
+  }
+
+  std::unordered_map<T, unsigned int> calculateFrequencyMap(const std::vector<T> &input)
+  {
+    std::unordered_map<T, unsigned int> out_map;
+    for (auto &item : input)
+      out_map[item] = out_map.find(item) != out_map.end() ? out_map[item] + 1 : 1;
+
+    return out_map;
+  }
+
+  std::string exportHuffmanTreeToString(std::shared_ptr<Node<T>> node)
+  {
+    if (node == nullptr)
+      return "";
+
+    if (!node->p_left && !node->p_right)
+    {
+      return "0" + std::bitset<sizeof(T) * CHAR_BIT>(node->data).to_string();
+    }
+
+    std::string tmp = "1";
+    tmp += exportHuffmanTreeToString(node->p_left);
+    tmp += exportHuffmanTreeToString(node->p_right);
+    return tmp;
+  }
+
+  void buildHuffmanTable(std::shared_ptr<Node<T>> node, const std::string str = "")
+  {
+    if (node == nullptr)
+      return;
+
+    if (!node->p_left && !node->p_right)
+    {
+      _huffman_table[node->data] = str;
+    }
+
+    buildHuffmanTable(node->p_left, str + "0");
+    buildHuffmanTable(node->p_right, str + "1");
+  }
+
+  std::shared_ptr<Node<T>> buildHuffmanTree(const std::vector<T> &input)
+  {
+    auto freq_map = calculateFrequencyMap(input);
+
+    std::priority_queue<std::shared_ptr<Node<T>>, std::vector<std::shared_ptr<Node<T>>>,
+                        CompareNodes<T>>
+      pq;
+
+    for (auto &item : freq_map)
+    {
+      pq.push(allocateNode(item.first, item.second, nullptr, nullptr));
+    }
+
+    while (pq.size() != 1)
+    {
+      std::shared_ptr<Node<T>> left = pq.top();
+      pq.pop();
+      std::shared_ptr<Node<T>> right = pq.top();
+      pq.pop();
+
+      unsigned int sum = left->freq + right->freq;
+      pq.push(allocateNode(0, sum, left, right));
+    }
+
+    return pq.top();
+  }
+
+  struct EncodedTreeAndData
+  {
+    std::vector<bool> tree_vec{};
+    std::vector<bool> data_vec{};
+  };
+
+  std::vector<uint8_t> packEncodedDataToArray(const std::string &tree_str,
+                                              const std::string &encoded_data)
+  {
+    std::vector<uint8_t> arr;
+    const size_t kTreeSizeInBits = tree_str.size();
+    const size_t kDataSizeInBits = encoded_data.size();
+
+    for (size_t i = 0; i < sizeof(size_t); ++i)
+    {
+      arr.push_back(
+        *(static_cast<const uint8_t *>(static_cast<const void *>(&kTreeSizeInBits)) + i));
+    }
+
+    for (size_t i = 0; i < sizeof(size_t); ++i)
+    {
+      arr.push_back(
+        *(static_cast<const uint8_t *>(static_cast<const void *>(&kDataSizeInBits)) + i));
+    }
+
+    const auto merged_str = tree_str + encoded_data;
+    const size_t kMergedSizeInBits = merged_str.size();
+
+    const auto kMergedSizeInBytes = kMergedSizeInBits % CHAR_BIT ? kMergedSizeInBits / CHAR_BIT + 1
+                                                                 : kMergedSizeInBits / CHAR_BIT;
+    for (size_t i = 0; i < kMergedSizeInBytes; ++i)
+    {
+      const auto kNumOfBits =
+        kMergedSizeInBits - i * CHAR_BIT < CHAR_BIT ? kMergedSizeInBits - i * CHAR_BIT : CHAR_BIT;
+
+      std::string tmp_str = merged_str.substr(i * CHAR_BIT, kNumOfBits);
+
+      for (size_t i = 0; i < CHAR_BIT - kNumOfBits; ++i)
+        tmp_str += "0";
+
+      const std::bitset<CHAR_BIT> tmp_bitset(tmp_str);
+
+      arr.push_back(static_cast<uint8_t>(tmp_bitset.to_ullong()));
+    }
+    return arr;
+  }
+
+public:
+  // Encodes input vector of values of type T and returns encoded vector of uint8_t
+  std::vector<uint8_t> encode(const std::vector<T> &input)
+  {
+    std::shared_ptr<Node<T>> root = buildHuffmanTree(input);
+    buildHuffmanTable(root);
+
+    std::string exported_tree = exportHuffmanTreeToString(root);
+    std::string str = "";
+
+    for (auto &item : input)
+    {
+      str += _huffman_table[item];
+    }
+
+    std::vector<uint8_t> raw_arr = packEncodedDataToArray(exported_tree, str);
+    return raw_arr;
+  }
+
+public:
+  HuffmanEncoder() = default;
+};
+
+} // namespace huffman
+} // namespace luci
+
+#endif // __LUCI_PASS_HELPERS_HUFFMAN_ENCODER_H__
diff --git a/compiler/luci/requires.cmake b/compiler/luci/requires.cmake
index 7fd58df1b64..8dbca6575f4 100644
--- a/compiler/luci/requires.cmake
+++ b/compiler/luci/requires.cmake
@@ -4,7 +4,7 @@ require("loco")
 require("locop")
 require("logo")
 require("logo-core")
-require("mio-circle08")
+require("mio-circle09")
 require("luci-compute")
 require("oops")
 require("hermes")
diff --git a/compiler/mio-circle09/CMakeLists.txt b/compiler/mio-circle09/CMakeLists.txt
new file mode 100644
index 00000000000..01d3caf24b9
--- /dev/null
+++ b/compiler/mio-circle09/CMakeLists.txt
@@ -0,0 +1,52 @@
+nnas_find_package(FlatBuffers EXACT 23.5.26 QUIET)
+
+if(NOT FlatBuffers_FOUND)
+  message(STATUS "mio-circle09 skip: FlatBuffers 23.5.26 NOT FOUND")
+  return()
+endif(NOT FlatBuffers_FOUND)
+
+message(STATUS "Build mio-circle09: TRUE")
+
+# TODO Find a better way
+# TODO use nnpackage
+# set(SCHEMA_FILE "${NNAS_PROJECT_SOURCE_DIR}/nnpackage/schema/circle_schema.fbs")
+set(SCHEMA_FILE "${NNAS_PROJECT_SOURCE_DIR}/res/CircleSchema/0.9/circle_schema.fbs")
+
+# NOTE Copy circle_schema.fbs as schema.fbs to generate "schema_generated.fbs" instead of "circle_schema_generated.fbs"
+add_custom_command(OUTPUT "${CMAKE_CURRENT_BINARY_DIR}/schema.fbs"
+        COMMAND ${CMAKE_COMMAND} -E copy "${SCHEMA_FILE}" schema.fbs
+        WORKING_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}"
+        DEPENDS "${SCHEMA_FILE}"
+        )
+
+FlatBuffersMuteable_Target(mio_circle09
+        OUTPUT_DIR "${CMAKE_CURRENT_BINARY_DIR}/gen/mio/circle"
+        INCLUDE_DIR "${CMAKE_CURRENT_BINARY_DIR}/gen"
+        SCHEMA_DIR "${CMAKE_CURRENT_BINARY_DIR}"
+        SCHEMA_FILES "schema.fbs"
+        )
+
+# This example shows how to use "mio-circle09" library
+add_executable(mio_circle09_example example.cpp)
+target_link_libraries(mio_circle09_example mio_circle09)
+
+file(GLOB_RECURSE SOURCES "src/*.cpp")
+file(GLOB_RECURSE TESTS "src/*.test.cpp")
+list(REMOVE_ITEM SOURCES ${TESTS})
+
+add_library(mio_circle09_helper STATIC ${SOURCES})
+set_target_properties(mio_circle09_helper PROPERTIES POSITION_INDEPENDENT_CODE ON)
+target_include_directories(mio_circle09_helper PRIVATE src)
+target_include_directories(mio_circle09_helper PUBLIC include)
+target_link_libraries(mio_circle09_helper mio_circle09)
+
+if(NOT ENABLE_TEST)
+  return()
+endif(NOT ENABLE_TEST)
+
+nnas_find_package(GTest REQUIRED)
+
+GTest_AddTest(mio_circle09_helper_test ${TESTS})
+target_include_directories(mio_circle09_helper_test PRIVATE src)
+target_link_libraries(mio_circle09_helper_test mio_circle09)
+target_link_libraries(mio_circle09_helper_test mio_circle09_helper)
diff --git a/compiler/mio-circle09/README.md b/compiler/mio-circle09/README.md
new file mode 100644
index 00000000000..aed08aa7a5f
--- /dev/null
+++ b/compiler/mio-circle09/README.md
@@ -0,0 +1,3 @@
+# mio-circle09
+
+Let's make it easy to read and write Circle models.
diff --git a/compiler/mio-circle09/example.cpp b/compiler/mio-circle09/example.cpp
new file mode 100644
index 00000000000..ec039adfda4
--- /dev/null
+++ b/compiler/mio-circle09/example.cpp
@@ -0,0 +1,41 @@
+/*
+ * Copyright (c) 2024 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+//
+// This example shows how to include and use "mio-circle09"
+//
+#include <mio/circle/schema_generated.h>
+
+#include <fstream>
+#include <iostream>
+#include <vector>
+
+int main(int argc, char **argv)
+{
+  std::ifstream ifs(argv[1], std::ios_base::binary);
+  std::vector<char> buf(std::istreambuf_iterator<char>{ifs}, std::istreambuf_iterator<char>{});
+
+  flatbuffers::Verifier verifier{reinterpret_cast<uint8_t *>(buf.data()), buf.size()};
+
+  if (!circle::VerifyModelBuffer(verifier))
+  {
+    std::cout << "Fail" << std::endl;
+    return 255;
+  }
+
+  std::cout << "Pass" << std::endl;
+  return 0;
+}
diff --git a/compiler/mio-circle09/include/mio_circle/Helper.h b/compiler/mio-circle09/include/mio_circle/Helper.h
new file mode 100644
index 00000000000..ee8077a5cf4
--- /dev/null
+++ b/compiler/mio-circle09/include/mio_circle/Helper.h
@@ -0,0 +1,54 @@
+/*
+ * Copyright (c) 2024 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __MIO_CIRCLE09_HELPER_H__
+#define __MIO_CIRCLE09_HELPER_H__
+
+#include <mio/circle/schema_generated.h>
+
+#include <vector>
+
+namespace mio
+{
+namespace circle
+{
+
+::circle::BuiltinOperator builtin_code_neutral(const ::circle::OperatorCode *opcode);
+bool is_valid(const ::circle::OperatorCode *opcode);
+bool is_custom(const ::circle::OperatorCode *opcode);
+std::string opcode_name(const ::circle::OperatorCode *opcode);
+const char *tensor_type(const ::circle::Tensor *tensor);
+const char *tensor_name(const ::circle::Tensor *tensor);
+
+template <typename T> std::vector<T> as_index_vector(const flatbuffers::Vector<T> *flat_array)
+{
+  if (flat_array == nullptr)
+  {
+    throw std::runtime_error("flat array is nullptr");
+  }
+
+  std::vector<T> ret(flat_array->size());
+  for (uint32_t i = 0; i < flat_array->size(); i++)
+  {
+    ret[i] = flat_array->Get(i);
+  }
+  return ret;
+}
+
+} // namespace circle
+} // namespace mio
+
+#endif // __MIO_CIRCLE09_HELPER_H__
diff --git a/compiler/mio-circle09/include/mio_circle/Reader.h b/compiler/mio-circle09/include/mio_circle/Reader.h
new file mode 100644
index 00000000000..561888d201f
--- /dev/null
+++ b/compiler/mio-circle09/include/mio_circle/Reader.h
@@ -0,0 +1,103 @@
+/*
+ * Copyright (c) 2024 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __MIO_CIRCLE09_READER_H__
+#define __MIO_CIRCLE09_READER_H__
+
+#include <mio/circle/schema_generated.h>
+
+#include <map>
+#include <string>
+#include <vector>
+
+// NOTE Reader class originated from circledump and for circle-tensordump
+//      where this class has more work to be done for stability
+//      as the tools are for developers not customores.
+
+namespace mio
+{
+namespace circle
+{
+
+/**
+ * @brief Loads Circle file and provides helpers to access attributes
+ */
+class Reader
+{
+private:
+  using CircleSubGraphs_t = flatbuffers::Vector<flatbuffers::Offset<::circle::SubGraph>>;
+  using CircleBuffers_t = flatbuffers::Vector<flatbuffers::Offset<::circle::Buffer>>;
+  using CircleTensors_t = flatbuffers::Vector<flatbuffers::Offset<::circle::Tensor>>;
+  using CircleOperators_t = flatbuffers::Vector<flatbuffers::Offset<::circle::Operator>>;
+  using CircleMetadata_t = flatbuffers::Vector<flatbuffers::Offset<::circle::Metadata>>;
+  using CircleSignatureDef_t = flatbuffers::Vector<flatbuffers::Offset<::circle::SignatureDef>>;
+
+public:
+  Reader(const ::circle::Model *model);
+  Reader(const ::circle::Model *model, const std::vector<char> *rawdata);
+
+  Reader() = delete;
+
+public:
+  uint32_t version() const { return _version; }
+
+  const std::vector<const ::circle::OperatorCode *> &opcodes() { return _op_codes; }
+  const CircleBuffers_t *buffers() { return _buffers; }
+  const CircleTensors_t *tensors() { return _tensors; }
+  const CircleOperators_t *operators() { return _operators; }
+  const std::vector<int32_t> &inputs() const { return _inputs; }
+  const std::vector<int32_t> &outputs() const { return _outputs; }
+  const CircleMetadata_t *metadata() const { return _metadata; }
+  const CircleSignatureDef_t *signature_defs() const { return _signature_defs; }
+
+  uint32_t num_subgraph() const { return _subgraphs->size(); }
+
+  size_t buffer_info(uint32_t buf_idx, const uint8_t **buff_data);
+  size_t buffer_info(uint32_t buf_idx, const uint8_t **buff_data, bool &ext_offset);
+  ::circle::BuiltinOperator builtin_code(const ::circle::Operator *op) const;
+  std::string opcode_name(const ::circle::Operator *op) const;
+  std::vector<int32_t> outputs(const ::circle::Operator *op) const;
+  std::string tensor_name(const ::circle::Tensor *tensor) const;
+  std::string tensor_dtype(const ::circle::Tensor *tensor) const;
+
+public:
+  bool select_subgraph(uint32_t subgraph);
+  const std::string &subgraph_name(void) const { return _subgraph_name; }
+  uint32_t subgraph_index(void) const { return _subgraph_index; }
+
+private:
+  uint32_t _version;
+
+  const std::vector<char> *_rawdata{nullptr};
+
+  const CircleSubGraphs_t *_subgraphs{nullptr};
+  const CircleBuffers_t *_buffers{nullptr};
+  const CircleTensors_t *_tensors{nullptr};
+  const CircleOperators_t *_operators{nullptr};
+  const CircleMetadata_t *_metadata{nullptr};
+  const CircleSignatureDef_t *_signature_defs{nullptr};
+
+  uint32_t _subgraph_index = 0;
+  std::string _subgraph_name;
+  std::vector<const ::circle::OperatorCode *> _op_codes;
+  std::vector<int32_t> _inputs;
+  std::vector<int32_t> _outputs;
+};
+
+} // namespace circle
+} // namespace mio
+
+#endif // __MIO_CIRCLE09_READER_H__
diff --git a/compiler/mio-circle09/src/Helper.cpp b/compiler/mio-circle09/src/Helper.cpp
new file mode 100644
index 00000000000..a7bbd23eab4
--- /dev/null
+++ b/compiler/mio-circle09/src/Helper.cpp
@@ -0,0 +1,110 @@
+/*
+ * Copyright (c) 2024 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "mio_circle/Helper.h"
+
+#include <algorithm>
+#include <sstream>
+
+namespace mio
+{
+namespace circle
+{
+
+/**
+ * This will provide v3/v3a/v3b format neutral BuiltinOperator
+ * NOTE circle has minus value opcode (252~254 as uint8_t)
+ *      we cannot use std::max() like tflite as deprecated_builtin_code can be
+ *      minus and builtin_code being 0 for v0.3 files.
+ */
+::circle::BuiltinOperator builtin_code_neutral(const ::circle::OperatorCode *opcode)
+{
+  assert(opcode != nullptr);
+  if (opcode->deprecated_builtin_code() == 127)
+  {
+    assert(opcode->builtin_code() >= 127);
+    return opcode->builtin_code();
+  }
+  // There was no 255(-1) value in v0.3
+  assert(opcode->deprecated_builtin_code() != -1);
+  return static_cast<::circle::BuiltinOperator>(opcode->deprecated_builtin_code());
+}
+
+bool is_valid(const ::circle::OperatorCode *opcode)
+{
+  // Valid Range : BuiltinOperator_MIN <= deprecated_builtin_code <= 127
+  const int8_t deprecated_builtin_code = opcode->deprecated_builtin_code();
+  if (deprecated_builtin_code < ::circle::BuiltinOperator_MIN)
+    return false;
+  // There was no 255(-1) value in v0.3
+  if (deprecated_builtin_code == -1)
+    return false;
+
+  const ::circle::BuiltinOperator builtin_code = opcode->builtin_code();
+  if (!(::circle::BuiltinOperator_MIN <= builtin_code &&
+        builtin_code <= ::circle::BuiltinOperator_MAX))
+    return false;
+
+  return true;
+}
+
+bool is_custom(const ::circle::OperatorCode *opcode)
+{
+  ::circle::BuiltinOperator code = builtin_code_neutral(opcode);
+  return (code == ::circle::BuiltinOperator_CUSTOM);
+}
+
+std::string opcode_name(const ::circle::OperatorCode *opcode)
+{
+  assert(opcode);
+
+  if (!is_valid(opcode))
+  {
+    std::ostringstream oss;
+    oss << "(invalid)";
+    return oss.str();
+  }
+
+  if (is_custom(opcode))
+  {
+    if (!opcode->custom_code())
+      return "(invalid custom)";
+
+    std::string custom_op = "CUSTOM(";
+    custom_op += opcode->custom_code()->c_str();
+    custom_op += ")";
+    return custom_op;
+  }
+
+  ::circle::BuiltinOperator code = builtin_code_neutral(opcode);
+  return ::circle::EnumNameBuiltinOperator(code);
+}
+
+const char *tensor_type(const ::circle::Tensor *tensor)
+{
+  return ::circle::EnumNameTensorType(tensor->type());
+}
+
+const char *tensor_name(const ::circle::Tensor *tensor)
+{
+  if (tensor->name() == nullptr || std::string(tensor->name()->c_str()).empty())
+    return "(noname)";
+
+  return tensor->name()->c_str();
+}
+
+} // namespace circle
+} // namespace mio
diff --git a/compiler/mio-circle09/src/Helper.test.cpp b/compiler/mio-circle09/src/Helper.test.cpp
new file mode 100644
index 00000000000..de9b912d2d6
--- /dev/null
+++ b/compiler/mio-circle09/src/Helper.test.cpp
@@ -0,0 +1,153 @@
+/*
+ * Copyright (c) 2024 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "mio_circle/Helper.h"
+
+#include <flatbuffers/flatbuffers.h>
+#include <gtest/gtest.h>
+
+#include <vector>
+
+class mio_circle09_helper_test : public ::testing::Test
+{
+protected:
+  void initialization_finish(void)
+  {
+    _fbb.Finish(circle::CreateModelDirect(_fbb, 0, &_opcodes_vec));
+  }
+
+protected:
+  void add_operator_code(int8_t deprecated_builtin_code, const char *custom_code,
+                         circle::BuiltinOperator builtin_code)
+  {
+    _opcodes_vec.push_back(circle::CreateOperatorCodeDirect(
+      _fbb, deprecated_builtin_code, custom_code, 1 /* version */, builtin_code));
+  }
+
+  const circle::OperatorCode *get_operator_code(uint8_t idx)
+  {
+    return circle::GetModel(_fbb.GetBufferPointer())->operator_codes()->Get(idx);
+  }
+
+private:
+  flatbuffers::FlatBufferBuilder _fbb;
+  std::vector<flatbuffers::Offset<circle::OperatorCode>> _opcodes_vec;
+};
+
+TEST_F(mio_circle09_helper_test, v09)
+{
+  // BuiltinOperator_ADD = 0
+  // BuiltinOperator_CONV_2D = 3
+  add_operator_code(3, "", circle::BuiltinOperator_ADD);
+  initialization_finish();
+
+  ASSERT_TRUE(mio::circle::is_valid(get_operator_code(0)));
+  ASSERT_EQ(mio::circle::builtin_code_neutral(get_operator_code(0)),
+            circle::BuiltinOperator_CONV_2D);
+  ASSERT_FALSE(mio::circle::is_custom(get_operator_code(0)));
+}
+
+TEST_F(mio_circle09_helper_test, v09_custom_old)
+{
+  // BuiltinOperator_ADD = 0
+  // BuiltinOperator_CUSTOM = 32
+  add_operator_code(32, "custom", circle::BuiltinOperator_ADD);
+  initialization_finish();
+
+  ASSERT_TRUE(mio::circle::is_valid(get_operator_code(0)));
+  ASSERT_EQ(mio::circle::builtin_code_neutral(get_operator_code(0)),
+            circle::BuiltinOperator_CUSTOM);
+  ASSERT_TRUE(mio::circle::is_custom(get_operator_code(0)));
+}
+
+TEST_F(mio_circle09_helper_test, v09_NEG)
+{
+  // BuiltinOperator_ADD = 0
+  // BuiltinOperator_CUMSUM = 128
+  // deprecated_builtin_code cannot be negative value
+  add_operator_code(128, "", circle::BuiltinOperator_ADD);
+  initialization_finish();
+
+  ASSERT_FALSE(mio::circle::is_valid(get_operator_code(0)));
+}
+
+TEST_F(mio_circle09_helper_test, v09_under127)
+{
+  // BuiltinOperator_CONV_2D = 3
+  add_operator_code(3, "", circle::BuiltinOperator_CONV_2D);
+  initialization_finish();
+
+  ASSERT_TRUE(mio::circle::is_valid(get_operator_code(0)));
+  ASSERT_EQ(mio::circle::builtin_code_neutral(get_operator_code(0)),
+            circle::BuiltinOperator_CONV_2D);
+  ASSERT_FALSE(mio::circle::is_custom(get_operator_code(0)));
+}
+
+TEST_F(mio_circle09_helper_test, v09_under127_NEG)
+{
+  // BuiltinOperator_CONV_2D = 3
+  // BuiltinOperator_CUMSUM = 128
+  // deprecated_builtin_code cannot be negative value
+  add_operator_code(128, "", circle::BuiltinOperator_CONV_2D);
+  initialization_finish();
+
+  ASSERT_FALSE(mio::circle::is_valid(get_operator_code(0)));
+}
+
+TEST_F(mio_circle09_helper_test, v09_custom)
+{
+  // BuiltinOperator_CUSTOM = 32
+  add_operator_code(32, "custom", circle::BuiltinOperator_CUSTOM);
+  initialization_finish();
+
+  ASSERT_TRUE(mio::circle::is_valid(get_operator_code(0)));
+  ASSERT_EQ(mio::circle::builtin_code_neutral(get_operator_code(0)),
+            circle::BuiltinOperator_CUSTOM);
+  ASSERT_TRUE(mio::circle::is_custom(get_operator_code(0)));
+}
+
+TEST_F(mio_circle09_helper_test, v09_custom_NEG)
+{
+  // BuiltinOperator_CUMSUM = 128
+  // deprecated_builtin_code cannot be negative value
+  add_operator_code(128, "custom", circle::BuiltinOperator_CUSTOM);
+  initialization_finish();
+
+  ASSERT_FALSE(mio::circle::is_valid(get_operator_code(0)));
+}
+
+TEST_F(mio_circle09_helper_test, v09_over127)
+{
+  // BuiltinOperator_PLACEHOLDER_FOR_GREATER_OP_CODES = 127
+  // BuiltinOperator_CUMSUM = 128
+  add_operator_code(127, "", circle::BuiltinOperator_CUMSUM);
+  initialization_finish();
+
+  ASSERT_TRUE(mio::circle::is_valid(get_operator_code(0)));
+  ASSERT_EQ(mio::circle::builtin_code_neutral(get_operator_code(0)),
+            circle::BuiltinOperator_CUMSUM);
+  ASSERT_FALSE(mio::circle::is_custom(get_operator_code(0)));
+}
+
+TEST_F(mio_circle09_helper_test, v09_over127_NEG)
+{
+  // BuiltinOperator_CUMSUM = 128
+  // deprecated_builtin_code cannot be negative value
+  add_operator_code(128, "", circle::BuiltinOperator_CUMSUM);
+  initialization_finish();
+
+  ASSERT_FALSE(mio::circle::is_valid(get_operator_code(0)));
+}
diff --git a/compiler/mio-circle09/src/Reader.cpp b/compiler/mio-circle09/src/Reader.cpp
new file mode 100644
index 00000000000..c1a5767d0cf
--- /dev/null
+++ b/compiler/mio-circle09/src/Reader.cpp
@@ -0,0 +1,222 @@
+/*
+ * Copyright (c) 2024 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "mio_circle/Reader.h"
+#include "mio_circle/Helper.h"
+
+#include <iostream>
+#include <sstream>
+#include <string>
+
+namespace mio
+{
+namespace circle
+{
+
+Reader::Reader(const ::circle::Model *model)
+{
+  if (model == nullptr)
+  {
+    throw std::runtime_error("Invalid model");
+  }
+
+  _version = model->version();
+  _subgraphs = model->subgraphs();
+  _buffers = model->buffers();
+  _metadata = model->metadata();
+  _signature_defs = model->signature_defs();
+
+  auto opcodes = model->operator_codes();
+  for (const ::circle::OperatorCode *opcode : *opcodes)
+  {
+    _op_codes.push_back(opcode);
+  }
+}
+
+Reader::Reader(const ::circle::Model *model, const std::vector<char> *rawdata)
+{
+  if (model == nullptr)
+  {
+    throw std::runtime_error("Invalid model");
+  }
+
+  _rawdata = rawdata;
+
+  _version = model->version();
+  _subgraphs = model->subgraphs();
+  _buffers = model->buffers();
+  _metadata = model->metadata();
+  _signature_defs = model->signature_defs();
+
+  auto opcodes = model->operator_codes();
+  for (const ::circle::OperatorCode *opcode : *opcodes)
+  {
+    _op_codes.push_back(opcode);
+  }
+}
+
+size_t Reader::buffer_info(uint32_t buf_idx, const uint8_t **buff_data)
+{
+  if (buff_data != nullptr)
+  {
+    *buff_data = nullptr;
+  }
+
+  if (buf_idx == 0)
+    return 0;
+
+  if (auto *buffer = (*_buffers)[buf_idx])
+  {
+    assert(buffer->offset() == 0);
+
+    if (auto *array = buffer->data())
+    {
+      if (size_t size = array->size())
+      {
+        if (buff_data != nullptr)
+        {
+          *buff_data = reinterpret_cast<const uint8_t *>(array->data());
+        }
+        return size;
+      }
+    }
+  }
+
+  return 0;
+}
+
+size_t Reader::buffer_info(uint32_t buf_idx, const uint8_t **buff_data, bool &ext_offset)
+{
+  ext_offset = false;
+
+  if (buff_data != nullptr)
+  {
+    *buff_data = nullptr;
+  }
+
+  if (buf_idx == 0)
+    return 0;
+
+  if (auto *buffer = (*_buffers)[buf_idx])
+  {
+    auto buffer_offset = buffer->offset();
+    if (buffer_offset > 1)
+    {
+      assert(_rawdata); // make debug break for invalid case
+      if (_rawdata == nullptr)
+        return 0;
+
+      ext_offset = true;
+      if (buff_data != nullptr)
+      {
+        *buff_data = reinterpret_cast<const uint8_t *>(&_rawdata->at(buffer_offset));
+      }
+      return buffer->size();
+    }
+    else if (auto *array = buffer->data())
+    {
+      if (size_t size = array->size())
+      {
+        if (buff_data != nullptr)
+        {
+          *buff_data = reinterpret_cast<const uint8_t *>(array->data());
+        }
+        return size;
+      }
+    }
+    else
+    {
+      if (buffer->offset() == 1 && buffer->size() == 1)
+      {
+        std::cerr << "Buffer " << buf_idx << " has invalid offset/size." << std::endl;
+      }
+    }
+  }
+
+  return 0;
+}
+
+::circle::BuiltinOperator Reader::builtin_code(const ::circle::Operator *op) const
+{
+  uint32_t index = op->opcode_index();
+  assert(index < _op_codes.size());
+  const ::circle::OperatorCode *opcode = _op_codes.at(index);
+
+  return mio::circle::builtin_code_neutral(opcode);
+}
+
+std::string Reader::opcode_name(const ::circle::Operator *op) const
+{
+  uint32_t index = op->opcode_index();
+  assert(index < _op_codes.size());
+  const ::circle::OperatorCode *opcode = _op_codes.at(index);
+
+  if (!mio::circle::is_valid(opcode))
+  {
+    std::ostringstream oss;
+    oss << "(invalid: " << index << ")";
+    return oss.str();
+  }
+
+  return mio::circle::opcode_name(opcode);
+}
+
+std::vector<int32_t> Reader::outputs(const ::circle::Operator *op) const
+{
+  return as_index_vector(op->outputs());
+}
+
+std::string Reader::tensor_name(const ::circle::Tensor *tensor) const
+{
+  return mio::circle::tensor_name(tensor);
+}
+
+std::string Reader::tensor_dtype(const ::circle::Tensor *tensor) const
+{
+  return mio::circle::tensor_type(tensor);
+}
+
+bool Reader::select_subgraph(uint32_t sgindex)
+{
+  _subgraph_index = sgindex;
+  _tensors = nullptr;
+  _operators = nullptr;
+
+  _inputs.clear();
+  _outputs.clear();
+
+  if (_subgraphs->size() <= sgindex)
+  {
+    assert(false);
+    return false;
+  }
+
+  const ::circle::SubGraph *subgraph = (*_subgraphs)[sgindex];
+
+  auto name = subgraph->name();
+  _subgraph_name = name ? name->c_str() : "(noname)";
+
+  _tensors = subgraph->tensors();
+  _operators = subgraph->operators();
+
+  _inputs = as_index_vector(subgraph->inputs());
+  _outputs = as_index_vector(subgraph->outputs());
+
+  return true;
+}
+
+} // namespace circle
+} // namespace mio
diff --git a/compiler/mio-circle09/src/Reader.test.cpp b/compiler/mio-circle09/src/Reader.test.cpp
new file mode 100644
index 00000000000..cef74bad89e
--- /dev/null
+++ b/compiler/mio-circle09/src/Reader.test.cpp
@@ -0,0 +1,60 @@
+/*
+ * Copyright (c) 2024 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "mio_circle/Reader.h"
+
+#include <flatbuffers/flatbuffers.h>
+#include <gtest/gtest.h>
+
+class mio_circle09_reader_test : public ::testing::Test
+{
+protected:
+  void initialization_emty(void)
+  {
+    _model = circle::CreateModelDirect(_fbb, 0, &_opcodes_vec);
+    circle::FinishModelBuffer(_fbb, _model);
+  }
+
+  const circle::Model *circleModel(void)
+  {
+    auto ptr = _fbb.GetBufferPointer();
+    return circle::GetModel(ptr);
+  }
+
+private:
+  flatbuffers::FlatBufferBuilder _fbb;
+  flatbuffers::Offset<circle::Model> _model;
+  std::vector<flatbuffers::Offset<circle::OperatorCode>> _opcodes_vec;
+};
+
+TEST_F(mio_circle09_reader_test, null_Model_NEG)
+{
+  EXPECT_THROW(mio::circle::Reader reader(nullptr), std::runtime_error);
+}
+
+TEST_F(mio_circle09_reader_test, empty_Model)
+{
+  initialization_emty();
+
+  const circle::Model *model = circleModel();
+  EXPECT_NE(nullptr, model);
+
+  mio::circle::Reader reader(model);
+
+  SUCCEED();
+}
+
+// TODO add more tests
diff --git a/compiler/pics/CMakeLists.txt b/compiler/pics/CMakeLists.txt
index a6f955a5a87..a7bbbfbfca3 100644
--- a/compiler/pics/CMakeLists.txt
+++ b/compiler/pics/CMakeLists.txt
@@ -11,7 +11,7 @@ unset(PICS_DEPS)
 ###
 set(CIRCLE_SCHEMA_PYTHON_DIR "${CMAKE_CURRENT_BINARY_DIR}/circle")
 
-get_target_property(SCHEMA_BIN_PATH mio_circle08 BINARY_DIR)
+get_target_property(SCHEMA_BIN_PATH mio_circle09 BINARY_DIR)
 
 add_custom_command(
   OUTPUT ${CIRCLE_SCHEMA_PYTHON_DIR}
diff --git a/compiler/pics/requires.cmake b/compiler/pics/requires.cmake
index b1d32605069..e843025039b 100644
--- a/compiler/pics/requires.cmake
+++ b/compiler/pics/requires.cmake
@@ -1 +1 @@
-require("mio-circle08")
+require("mio-circle09")
diff --git a/compiler/tflite2circle/CMakeLists.txt b/compiler/tflite2circle/CMakeLists.txt
index 35aad329a84..d7678cf2e6e 100644
--- a/compiler/tflite2circle/CMakeLists.txt
+++ b/compiler/tflite2circle/CMakeLists.txt
@@ -2,7 +2,7 @@ nnas_include(TargetRequire)
 
 unset(REQUIRED_TARGETS)
 list(APPEND REQUIRED_TARGETS mio_tflite2121)
-list(APPEND REQUIRED_TARGETS mio_circle08)
+list(APPEND REQUIRED_TARGETS mio_circle09)
 TargetRequire_Return(${REQUIRED_TARGETS})
 
 set(DRIVER "driver/Driver.cpp")
@@ -15,7 +15,7 @@ target_link_libraries(tflite2circle foder)
 target_link_libraries(tflite2circle safemain)
 target_link_libraries(tflite2circle mio_tflite2121)
 target_link_libraries(tflite2circle mio_tflite2121_helper)
-target_link_libraries(tflite2circle mio_circle08)
+target_link_libraries(tflite2circle mio_circle09)
 target_link_libraries(tflite2circle vconone)
 target_link_libraries(tflite2circle nncc_coverage)
 
diff --git a/compiler/tflite2circle/requires.cmake b/compiler/tflite2circle/requires.cmake
index cf770dfa1f7..052540f17c3 100644
--- a/compiler/tflite2circle/requires.cmake
+++ b/compiler/tflite2circle/requires.cmake
@@ -1,6 +1,6 @@
 require("arser")
 require("foder")
 require("mio-tflite2121")
-require("mio-circle08")
+require("mio-circle09")
 require("safemain")
 require("vconone")
diff --git a/infra/nncc/Makefile.arm32 b/infra/nncc/Makefile.arm32
index 0d344a048ac..c3bbfa4bd94 100644
--- a/infra/nncc/Makefile.arm32
+++ b/infra/nncc/Makefile.arm32
@@ -26,7 +26,7 @@ ARM32_BUILD_ITEMS+=;pepper-csv2vec;crew
 ARM32_BUILD_ITEMS+=;oops;pepper-assert
 ARM32_BUILD_ITEMS+=;hermes;hermes-std
 ARM32_BUILD_ITEMS+=;loco;locop;logo-core;logo
-ARM32_BUILD_ITEMS+=;safemain;mio-circle08;mio-tflite2121
+ARM32_BUILD_ITEMS+=;safemain;mio-circle09;mio-tflite2121
 ARM32_BUILD_ITEMS+=;dio-hdf5
 ARM32_BUILD_ITEMS+=;luci-compute
 ARM32_BUILD_ITEMS+=;foder;circle-verify;souschef;arser;vconone
@@ -44,7 +44,7 @@ ARM32_HOST_ITEMS+=;pepper-csv2vec
 ARM32_HOST_ITEMS+=;oops
 ARM32_HOST_ITEMS+=;hermes;hermes-std
 ARM32_HOST_ITEMS+=;loco;locop;logo-core;logo
-ARM32_HOST_ITEMS+=;safemain;mio-circle08;mio-tflite2121
+ARM32_HOST_ITEMS+=;safemain;mio-circle09;mio-tflite2121
 ARM32_HOST_ITEMS+=;luci-compute
 ARM32_HOST_ITEMS+=;foder;circle-verify;souschef;arser;vconone
 ARM32_HOST_ITEMS+=;luci
diff --git a/infra/packaging/preset/20230907 b/infra/packaging/preset/20230907
index 5834a591b0c..9fa10195a73 100644
--- a/infra/packaging/preset/20230907
+++ b/infra/packaging/preset/20230907
@@ -20,7 +20,7 @@ function preset_configure()
   # loco IR and related utilities
   REQUIRED_UNITS+=("loco" "locop" "locomotiv" "logo-core" "logo")
   # Flatbuffer I/O
-  REQUIRED_UNITS+=("mio-tflite2121" "mio-circle08")
+  REQUIRED_UNITS+=("mio-tflite2121" "mio-circle09")
   # Data I/O
   REQUIRED_UNITS+=("dio-hdf5")
   # Compute
diff --git a/infra/packaging/preset/20230907_windows b/infra/packaging/preset/20230907_windows
index 037e870ddf3..57fba531765 100644
--- a/infra/packaging/preset/20230907_windows
+++ b/infra/packaging/preset/20230907_windows
@@ -17,7 +17,7 @@ function preset_configure()
   # loco IR and related utilities
   REQUIRED_UNITS+=("loco" "locop" "locomotiv" "logo-core" "logo")
   # Flatbuffer I/O
-  REQUIRED_UNITS+=("mio-tflite2121" "mio-circle08")
+  REQUIRED_UNITS+=("mio-tflite2121" "mio-circle09")
   # Data I/O
   REQUIRED_UNITS+=("dio-hdf5")
   # Compute
diff --git a/infra/scripts/compiler_modules.sh b/infra/scripts/compiler_modules.sh
index 1f5310a8261..e4c0d8f21e6 100644
--- a/infra/scripts/compiler_modules.sh
+++ b/infra/scripts/compiler_modules.sh
@@ -12,7 +12,7 @@ DEBUG_BUILD_ITEMS+=";oops;pepper-assert;pepper-csv2vec"
 DEBUG_BUILD_ITEMS+=";hermes;hermes-std"
 DEBUG_BUILD_ITEMS+=";loco;locop;locomotiv;logo-core;logo"
 DEBUG_BUILD_ITEMS+=";foder;crew;souschef;arser;vconone"
-DEBUG_BUILD_ITEMS+=";safemain;mio-circle08;mio-tflite2121;dio-hdf5"
+DEBUG_BUILD_ITEMS+=";safemain;mio-circle09;mio-tflite2121;dio-hdf5"
 DEBUG_BUILD_ITEMS+=";luci-compute"
 DEBUG_BUILD_ITEMS+=";tflite2circle"
 DEBUG_BUILD_ITEMS+=";luci"
@@ -50,7 +50,7 @@ NNPKG_RES_ITEMS+=";luci-compute"
 # Circle compiler library (.circle -> .circle)
 NNPKG_RES_ITEMS+=";luci"
 # Flatbuffer I/O
-NNPKG_RES_ITEMS+=";mio-tflite2121;mio-circle08"
+NNPKG_RES_ITEMS+=";mio-tflite2121;mio-circle09"
 # Tools
 NNPKG_RES_ITEMS+=";tflite2circle;circle2circle;luci-interpreter"
 NNPKG_RES_ITEMS+=";souschef;tflchef;circlechef;circle-verify"
diff --git a/packaging/nnfw.spec b/packaging/nnfw.spec
index 97b6cea9f7a..7f780034a5b 100644
--- a/packaging/nnfw.spec
+++ b/packaging/nnfw.spec
@@ -193,7 +193,7 @@ tar -xf %{SOURCE3016} -C ./externals
 %if %{odc_build} == 1
 %{nncc_env} ./nncc configure -DBUILD_GTEST=OFF -DENABLE_TEST=OFF -DEXTERNALS_BUILD_THREADS=%{nproc} -DCMAKE_BUILD_TYPE=%{build_type} -DTARGET_ARCH=%{target_arch} -DTARGET_OS=tizen \
         -DCMAKE_INSTALL_PREFIX=$(pwd)/%{overlay_path} \
-	-DBUILD_WHITELIST="luci;foder;pepper-csv2vec;loco;locop;logo;logo-core;mio-circle08;luci-compute;oops;hermes;hermes-std;angkor;pp;pepper-strcast;pepper-str"
+	-DBUILD_WHITELIST="luci;foder;pepper-csv2vec;loco;locop;logo;logo-core;mio-circle09;luci-compute;oops;hermes;hermes-std;angkor;pp;pepper-strcast;pepper-str"
 %{nncc_env} ./nncc build %{build_jobs}
 cmake --install %{nncc_workspace} %{strip_options}
 %endif # odc_build
@@ -206,7 +206,7 @@ mkdir -p %{overlay_path}/include/mio/circle
 cp compiler/angkor/include/nncc/core/ADT/tensor/Index.h %{overlay_path}/include/nncc/core/ADT/tensor
 cp compiler/oops/include/oops/InternalExn.h %{overlay_path}/include/oops
 cp compiler/luci/lang/include/luci/IR/CircleNodes.lst %{overlay_path}/include/luci/IR
-cp %{nncc_workspace}/compiler/mio-circle08/gen/mio/circle/schema_generated.h %{overlay_path}/include/mio/circle
+cp %{nncc_workspace}/compiler/mio-circle09/gen/mio/circle/schema_generated.h %{overlay_path}/include/mio/circle
 cp -r %{nncc_workspace}/overlay/include/flatbuffers %{overlay_path}/include
 
 # runtime build
diff --git a/res/CircleSchema/0.9/circle_schema.fbs b/res/CircleSchema/0.9/circle_schema.fbs
new file mode 100644
index 00000000000..de775255d12
--- /dev/null
+++ b/res/CircleSchema/0.9/circle_schema.fbs
@@ -0,0 +1,1705 @@
+// Copyright (c) 2019~2023 Samsung Electronics Co., Ltd. All Rights Reserved
+// Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Revision History
+//
+// Version Major.Minor
+//
+// Major version is schema version.
+// We keep schema version if it is compatible
+// Minor version is for human communication
+// It will not be stored in circle model.
+//
+// Version 0.0: Initial version. Based on TensorFlow Lite v1.13.1 schema.
+// Version 0.1: Based on TF v2.2-rc2 + more (from TensorFlow `56d281c`)
+//              `BATCH_MATMUL` operator, `FLOAT64` tensor type,
+//              `asymmetric_quantize_inputs` for several operator options
+// Version 0.2: BCQ_GATHER and BCQ_FULLY_CONNECTED are added.
+// Version 0.3: SHUFFLED16x1FLOAT32 is added.
+// Version 0.4: Base up to TensorFlow Lite v2.7.0 schema.
+// Version 0.5: Base up to TensorFlow Lite v2.10.1 schema.
+// Version 0.6: Base up to TensorFlow Lite v2.13.0 schema.
+// Version 0.7: Base up to TensorFlow Lite v2.15.0 schema, deprecate data_format in Subgraph table
+// Version 0.8: GRU op is added. UINT4 is added.
+// Version 0.9: Weight compression option is added
+
+namespace circle;
+
+// This corresponds to the version.
+file_identifier "CIR0";
+// File extension of any written files.
+file_extension "circle";
+
+// IMPORTANT: All new members of tables, enums and unions must be added at the
+// end to ensure backwards compatibility.
+
+// The type of data stored in a tensor.
+enum TensorType : byte {
+  UINT4 = -1,
+  FLOAT32 = 0,
+  FLOAT16 = 1,
+  INT32 = 2,
+  UINT8 = 3,
+  INT64 = 4,
+  STRING = 5,
+  BOOL = 6,
+  INT16 = 7,
+  COMPLEX64 = 8,
+  INT8 = 9,
+  FLOAT64 = 10,
+  COMPLEX128 = 11,
+  UINT64 = 12,
+  // Experimental: Resource and variant types are experimental, that are subject
+  // to change. Do not implement custom kernels using resource & variant types
+  // now.
+  RESOURCE = 13,
+  VARIANT = 14,
+  UINT32 = 15,
+  UINT16 = 16,
+  INT4 = 17,
+}
+
+// Custom quantization parameters for experimenting with new quantization
+// techniques.
+table CustomQuantization {
+  custom:[ubyte] (force_align: 16);
+}
+
+// Represents a specific quantization technique's parameters.
+union QuantizationDetails {
+  CustomQuantization,
+}
+
+// Parameters for converting a quantized tensor back to float.
+table QuantizationParameters {
+  // These four parameters are the asymmetric linear quantization parameters.
+  // Given a quantized value q, the corresponding float value f should be:
+  //   f = scale * (q - zero_point)
+  // For other quantization types, the QuantizationDetails below is used.
+  // NOTE min/max values are valid if
+  // 1. length of min/max == 0 or
+  // 2. length of min/max == length of scale/zero_point
+  // Otherwise, min/max are not valid (undefined behavior).
+  min:[float];
+  max:[float];
+  scale:[float];  // For dequantizing the tensor's values.
+  zero_point:[long];
+
+  // If this is not none, the other quantization parameters (i.e. min, max,
+  // scale, zero_point fields above) are ignored and the value of the
+  // QuantizationDetails union should be used.
+  details:QuantizationDetails;
+
+  // Specifies the dimension of the Tensor's shape that the scales and
+  // zero_points correspond to. For example, a tensor t, with dims=[4, 3, 2, 1]
+  // with quantization params:
+  //   scale=[1.0, 2.0, 3.0], zero_point=[1, 2, 3], quantization_dimension=1
+  // will be quantized across the second dimension of t.
+  //   t[:, 0, :, :] will have scale[0]=1.0, zero_point[0]=1
+  //   t[:, 1, :, :] will have scale[1]=2.0, zero_point[0]=2
+  //   t[:, 2, :, :] will have scale[2]=3.0, zero_point[0]=3
+  quantized_dimension:int;
+}
+
+// Sparse tensors.
+// We use a modification of the TACO format.
+// Reference: http://tensor-compiler.org/kjolstad-oopsla17-tensor-compiler.pdf
+//
+// To encode a conceptual n-dimensional dense tensor with dims (d0, ..., dn-1),
+// potentially with a k-dimensional block (0 <= k <= n) with dims
+// (dn, ..., dn+k-1), the format needs to specify:
+//   1. In what order to traverse these dimensions. For example, to store a 2-D
+//      matrix in row major order, the traversal order would be (d0, d1),
+//      whereas to store it in column major order, the traversal order would be
+//      (d1, d0). If the 2-D matrix has a 2-D inner block, the traversal order
+//      could be (d0, d1, d2, d3).
+//   2. How each block dimension in (dn, ..., dn+k-1) maps to the original
+//      tensor dimension in (d0, ..., dn-1).
+//   3. In the traversal order defined above, the format (dense vs. sparse) and
+//      index metadata for each dimension. For a dense dimension, this is just
+//      the size of that dimension. For a sparse dimension, it's the same as
+//      the compressed index defined in the Compressed Sparse Row (CSR) format.
+//      (http://scipy-lectures.org/advanced/scipy_sparse/csr_matrix.html)
+
+// The storage type for a dimension. Currently we support:
+//   1. DENSE: each coordinate in this dimension is stored implicitly.
+//   2. SPARSE_CSR: only the coordinates with non-zero elements are stored. The
+//      compression technique is the same what CSR uses.
+// More types like a sparse dimension with a different compression technique
+// could be added to the list in the future.
+enum DimensionType : byte {
+  DENSE = 0,
+  SPARSE_CSR = 1,
+}
+
+table Int32Vector {
+  values:[int];
+}
+
+table Uint16Vector {
+  values:[ushort] (force_align: 4);
+}
+
+table Uint8Vector {
+  values:[ubyte] (force_align: 4);
+}
+
+// Variable-typed buffer to store the index metadata for a sparse dimension.
+// The widest type is Int32 instead of UInt32 because tensor's shape is a int32
+// vector. We don't want the per-dimensional index to overflow that range.
+union SparseIndexVector {
+  Int32Vector,
+  Uint16Vector,
+  Uint8Vector
+}
+
+table DimensionMetadata {
+  // Whether a dimension is dense or sparse.
+  format:DimensionType;
+  // Index metadata used for a dimension.
+  //   - If format is DimensionType.DENSE then we use the dense_size field to
+  //     store the size of that dimension. Each index in that dimension is
+  //     stored implicitly.
+  //   - If format is DimensionType.SPARSE_CSR then we use array_segments and
+  //     array_indices to encode that dimension. array_segments represents how
+  //     to segment the indices array, each segment corresponds to one element
+  //     in the previous dimension. array_indices represents the index of the
+  //     non-zero elements within this dimension (as those in the CSR matrix
+  //     format, where the first array is row pointers and the second array is
+  //     column indices).
+  dense_size:int;
+  array_segments:SparseIndexVector;
+  array_indices:SparseIndexVector;
+}
+
+// Parameters to encode a sparse TfLite tensor.
+table SparsityParameters {
+  // The traversal order of the dimensions defined in the `shape` field of the
+  // conceptual dense tensor. For a n-dimensional tensors with dims (d0, d1,
+  // ..., dn-1),
+  //   - if not block sparse, the traversal_order is just a permutation of (d0,
+  //     ..., dn-1). For example, a 2-D matrix stored in row-major order would
+  //     have traversal_order = (d0, d1).
+  //   - if block sparse with a k-dimensional block (0 <= k <= n), the
+  //     traversal_order has n + k elements. The first n elements are still a
+  //     permutation of (d0, ..., dn-1). The lask k elements are a permutation
+  //     of (dn, ..., dn+k-1), defining how to traverse a block internally. For
+  //     example, a 2-D matrix with 2-D blocks, both stored in row-major order
+  //     would have traversal_order = (d0, d1, d2, d3).
+  traversal_order:[int];
+  // For an n-dimensional tensor with a k-dimensional block (0 <= k <= n),
+  // stores how a block dimension in (dn, ..., dn+k-1) maps to the original
+  // tensor dimension in (d0, ..., dn).
+  // It's stored in the order of (dn, ..., dn+k-1).
+  // If not block-sparse, this field is NULL.
+  block_map:[int];
+  // In the traversal order defined above, the metadata needed for
+  // each dimension to locate the non-zero values in the original dense tensor.
+  // The size of the dim_metadata array = the size of the traversal_order array
+  // = n + k.
+  dim_metadata:[DimensionMetadata];
+}
+
+// The nested tensor type for VARIANT type.
+table VariantSubType {
+  // The tensor shape.
+  shape:[int];
+  type:TensorType;
+  // If false, the rank or the number of tensor dimensions is unknown.
+  // If false, "shape" must be [].
+  has_rank: bool = false;
+}
+
+enum CompressionType : byte {
+  NONE = 0,
+  //Huffman encoding only
+  HUFFMAN = 1
+}
+
+table Tensor {
+  // The tensor shape. The meaning of each entry is operator-specific but
+  // builtin ops use: [batch size, height, width, number of channels] (That's
+  // Tensorflow's NHWC).
+  shape:[int];
+  type:TensorType;
+  // An index that refers to the buffers table at the root of the model. Or,
+  // if there is no data buffer associated (i.e. intermediate results), then
+  // this is 0 (which refers to an always existent empty buffer).
+  //
+  // The data_buffer itself is an opaque container, with the assumption that the
+  // target device is little-endian. In addition, all builtin operators assume
+  // the memory is ordered such that if `shape` is [4, 3, 2], then index
+  // [i, j, k] maps to data_buffer[i*3*2 + j*2 + k].
+  buffer:uint;
+  name:string;  // For debugging and importing back into tensorflow.
+  quantization:QuantizationParameters;  // Optional.
+
+  is_variable:bool = false;
+
+  // Parameters to encode a sparse tensor. See the example in
+  // tensorflow/lite/testdata/sparse_tensor.json.
+  sparsity:SparsityParameters;  // Optional.
+
+  // Encodes `shape` with unknown dimensions. Unknown dimensions are
+  // represented with -1.
+  shape_signature:[int]; // Optional.
+
+  // This field is added to distinguish between scalars and tensors of unknown
+  // ranks (both of which shape is []).
+  // For scalars (rank = 0), shape = [] and has_rank = true.
+  // For tensors with known rank (rank > 0) and shape, shape = [...] and
+  // has_rank = true.
+  // For tensors with unknown rank and shape, shape = [] and has_rank = false.
+  has_rank: bool = false;
+
+  // The nested Tensor types for VARIANT type. This is always empty for
+  // non-VARIANT types. This is optional because the nested type can be omitted.
+  // Currently only 1 subtype is supported. The field is defined as an array for
+  // flexibility of supporting multiple subtypes in the future.
+  variant_tensors:[VariantSubType];
+
+  compression_type:CompressionType = NONE;
+
+}
+
+// A list of builtin operators. Builtin operators are slightly faster than custom
+// ones, but not by much. Moreover, while custom operators accept an opaque
+// object containing configuration parameters, builtins have a predetermined
+// set of acceptable options.
+// LINT.IfChange
+enum BuiltinOperator : int32 {
+  GRU = -5,
+  BCQ_GATHER = -4,
+  BCQ_FULLY_CONNECTED = -3,
+  INSTANCE_NORM = -2,
+  ADD = 0,
+  AVERAGE_POOL_2D = 1,
+  CONCATENATION = 2,
+  CONV_2D = 3,
+  DEPTHWISE_CONV_2D = 4,
+  DEPTH_TO_SPACE = 5,
+  DEQUANTIZE = 6,
+  EMBEDDING_LOOKUP = 7,
+  FLOOR = 8,
+  FULLY_CONNECTED = 9,
+  HASHTABLE_LOOKUP = 10,
+  L2_NORMALIZATION = 11,
+  L2_POOL_2D = 12,
+  LOCAL_RESPONSE_NORMALIZATION = 13,
+  LOGISTIC = 14,
+  LSH_PROJECTION = 15,
+  LSTM = 16,
+  MAX_POOL_2D = 17,
+  MUL = 18,
+  RELU = 19,
+  // NOTE(aselle): RELU_N1_TO_1 used to be called RELU1, but it was renamed
+  // since different model developers use RELU1 in different ways. Never
+  // create another op called RELU1.
+  RELU_N1_TO_1 = 20,
+  RELU6 = 21,
+  RESHAPE = 22,
+  RESIZE_BILINEAR = 23,
+  RNN = 24,
+  SOFTMAX = 25,
+  SPACE_TO_DEPTH = 26,
+  SVDF = 27,
+  TANH = 28,
+  CONCAT_EMBEDDINGS = 29,
+  SKIP_GRAM = 30,
+  CALL = 31,
+  CUSTOM = 32,
+  EMBEDDING_LOOKUP_SPARSE = 33,
+  PAD = 34,
+  UNIDIRECTIONAL_SEQUENCE_RNN = 35,
+  GATHER = 36,
+  BATCH_TO_SPACE_ND = 37,
+  SPACE_TO_BATCH_ND = 38,
+  TRANSPOSE = 39,
+  MEAN = 40,
+  SUB = 41,
+  DIV = 42,
+  SQUEEZE = 43,
+  UNIDIRECTIONAL_SEQUENCE_LSTM = 44,
+  STRIDED_SLICE = 45,
+  BIDIRECTIONAL_SEQUENCE_RNN = 46,
+  EXP = 47,
+  TOPK_V2 = 48,
+  SPLIT = 49,
+  LOG_SOFTMAX = 50,
+  // DELEGATE is a special op type for the operations which are delegated to
+  // other backends.
+  // WARNING: Experimental interface, subject to change
+  DELEGATE = 51,
+  BIDIRECTIONAL_SEQUENCE_LSTM = 52,
+  CAST = 53,
+  PRELU = 54,
+  MAXIMUM = 55,
+  ARG_MAX = 56,
+  MINIMUM = 57,
+  LESS = 58,
+  NEG = 59,
+  PADV2 = 60,
+  GREATER = 61,
+  GREATER_EQUAL = 62,
+  LESS_EQUAL = 63,
+  SELECT = 64,
+  SLICE = 65,
+  SIN = 66,
+  TRANSPOSE_CONV = 67,
+  SPARSE_TO_DENSE = 68,
+  TILE = 69,
+  EXPAND_DIMS = 70,
+  EQUAL = 71,
+  NOT_EQUAL = 72,
+  LOG = 73,
+  SUM = 74,
+  SQRT = 75,
+  RSQRT = 76,
+  SHAPE = 77,
+  POW = 78,
+  ARG_MIN = 79,
+  FAKE_QUANT = 80,
+  REDUCE_PROD = 81,
+  REDUCE_MAX = 82,
+  PACK = 83,
+  LOGICAL_OR = 84,
+  ONE_HOT = 85,
+  LOGICAL_AND = 86,
+  LOGICAL_NOT = 87,
+  UNPACK = 88,
+  REDUCE_MIN = 89,
+  FLOOR_DIV = 90,
+  REDUCE_ANY = 91,
+  SQUARE = 92,
+  ZEROS_LIKE = 93,
+  FILL = 94,
+  FLOOR_MOD = 95,
+  RANGE = 96,
+  RESIZE_NEAREST_NEIGHBOR = 97,
+  LEAKY_RELU = 98,
+  SQUARED_DIFFERENCE = 99,
+  MIRROR_PAD = 100,
+  ABS = 101,
+  SPLIT_V = 102,
+  UNIQUE = 103,
+  CEIL = 104,
+  REVERSE_V2 = 105,
+  ADD_N = 106,
+  GATHER_ND = 107,
+  COS = 108,
+  WHERE = 109,
+  RANK = 110,
+  ELU = 111,
+  REVERSE_SEQUENCE = 112,
+  MATRIX_DIAG = 113,
+  QUANTIZE = 114,
+  MATRIX_SET_DIAG = 115,
+  ROUND = 116,
+  HARD_SWISH = 117,
+  IF = 118,
+  WHILE = 119,
+  NON_MAX_SUPPRESSION_V4 = 120,
+  NON_MAX_SUPPRESSION_V5 = 121,
+  SCATTER_ND = 122,
+  SELECT_V2 = 123,
+  DENSIFY = 124,
+  SEGMENT_SUM = 125,
+  BATCH_MATMUL = 126,
+  PLACEHOLDER_FOR_GREATER_OP_CODES = 127,
+  CUMSUM = 128,
+  CALL_ONCE = 129,
+  BROADCAST_TO = 130,
+  RFFT2D = 131,
+  CONV_3D = 132,
+  IMAG=133,
+  REAL=134,
+  COMPLEX_ABS=135,
+  HASHTABLE = 136,
+  HASHTABLE_FIND = 137,
+  HASHTABLE_IMPORT = 138,
+  HASHTABLE_SIZE = 139,
+  REDUCE_ALL = 140,
+  CONV_3D_TRANSPOSE = 141,
+  VAR_HANDLE = 142,
+  READ_VARIABLE = 143,
+  ASSIGN_VARIABLE = 144,
+  BROADCAST_ARGS = 145,
+  RANDOM_STANDARD_NORMAL = 146,
+  BUCKETIZE = 147,
+  RANDOM_UNIFORM = 148,
+  MULTINOMIAL = 149,
+  GELU = 150,
+  DYNAMIC_UPDATE_SLICE = 151,
+  RELU_0_TO_1 = 152,
+  UNSORTED_SEGMENT_PROD = 153,
+  UNSORTED_SEGMENT_MAX = 154,
+  UNSORTED_SEGMENT_SUM = 155,
+  ATAN2 = 156,
+  UNSORTED_SEGMENT_MIN = 157,
+  SIGN = 158,
+  BITCAST = 159,
+  BITWISE_XOR = 160,
+  RIGHT_SHIFT = 161,
+  // All Operators start with STABLEHLO_ prefixes are subject to change
+  // Many of the ops below can not be executed by runtime
+  STABLEHLO_LOGISTIC = 162, // WARNING: Do not have runtime support
+  STABLEHLO_ADD = 163, // WARNING: No runtime support yet
+  STABLEHLO_DIVIDE = 164, // WARNING: No runtime support yet
+  STABLEHLO_MULTIPLY = 165, // WARNING: No runtime support yet
+  STABLEHLO_MAXIMUM = 166, // WARNING: No runtime support yet
+  STABLEHLO_RESHAPE = 167, // WARNING: No runtime support yet
+  STABLEHLO_CLAMP = 168, // WARNING: No runtime support
+  STABLEHLO_CONCATENATE = 169, // WARNING: No runtime support
+  STABLEHLO_BROADCAST_IN_DIM = 170, // WARNING: No runtime support
+  STABLEHLO_CONVOLUTION = 171, // WARNING: No runtime support
+  STABLEHLO_SLICE = 172, // WARNING: No runtime support
+  STABLEHLO_CUSTOM_CALL = 173, // WARNING: No runtime support
+  STABLEHLO_REDUCE = 174, // WARNING: No runtime support
+  STABLEHLO_ABS = 175, // WARNING: No runtime support
+  STABLEHLO_AND = 176, // WARNING: No runtime support
+  STABLEHLO_COSINE = 177, // WARNING: No runtime support
+  STABLEHLO_EXPONENTIAL = 178, // WARNING: No runtime support
+  STABLEHLO_FLOOR = 179, // WARNING: No runtime support
+  STABLEHLO_LOG = 180, // WARNING: No runtime support
+  STABLEHLO_MINIMUM = 181, // WARNING: No runtime support
+  STABLEHLO_NEGATE = 182, // WARNING: No runtime support
+  STABLEHLO_OR = 183, // WARNING: No runtime support
+  STABLEHLO_POWER = 184, // WARNING: No runtime support
+  STABLEHLO_REMAINDER = 185, // WARNING: No runtime support
+  STABLEHLO_RSQRT = 186, // WARNING: No runtime support
+  STABLEHLO_SELECT = 187, // WARNING: No runtime support
+  STABLEHLO_SUBTRACT = 188, // WARNING: No runtime support
+  STABLEHLO_TANH = 189, // WARNING: No runtime support
+  STABLEHLO_SCATTER = 190,
+  STABLEHLO_COMPARE = 191, // WARNING: No runtime support
+  STABLEHLO_CONVERT = 192, // WARNING: No runtime support
+  STABLEHLO_DYNAMIC_SLICE = 193, // WARNING: No runtime support
+  STABLEHLO_DYNAMIC_UPDATE_SLICE = 194, // WARNING: No runtime support
+  STABLEHLO_PAD = 195, // WARNING: No runtime support
+  STABLEHLO_IOTA = 196, // WARNING: No runtime support
+  STABLEHLO_DOT_GENERAL = 197, // WARNING: No runtime support
+  STABLEHLO_REDUCE_WINDOW = 198, // WARNING: No runtime support
+  STABLEHLO_SORT = 199, // WARNING: No runtime support
+  STABLEHLO_WHILE = 200, // WARNING: No runtime support
+  STABLEHLO_GATHER = 201, // WARNING: No runtime support
+  STABLEHLO_TRANSPOSE = 202, // WARNING: No runtime support
+  DILATE = 203,
+  STABLEHLO_RNG_BIT_GENERATOR = 204,
+  REDUCE_WINDOW = 205,
+}
+// LINT.ThenChange(nnapi_linter/linter.proto)
+
+// Options for the builtin operators.
+union BuiltinOptions {
+  Conv2DOptions,
+  DepthwiseConv2DOptions,
+  ConcatEmbeddingsOptions,
+  LSHProjectionOptions,
+  Pool2DOptions,
+  SVDFOptions,
+  RNNOptions,
+  FullyConnectedOptions,
+  SoftmaxOptions,
+  ConcatenationOptions,
+  AddOptions,
+  L2NormOptions,
+  LocalResponseNormalizationOptions,
+  LSTMOptions,
+  ResizeBilinearOptions,
+  CallOptions,
+  ReshapeOptions,
+  SkipGramOptions,
+  SpaceToDepthOptions,
+  EmbeddingLookupSparseOptions,
+  MulOptions,
+  PadOptions,
+  GatherOptions,
+  BatchToSpaceNDOptions,
+  SpaceToBatchNDOptions,
+  TransposeOptions,
+  ReducerOptions,
+  SubOptions,
+  DivOptions,
+  SqueezeOptions,
+  SequenceRNNOptions,
+  StridedSliceOptions,
+  ExpOptions,
+  TopKV2Options,
+  SplitOptions,
+  LogSoftmaxOptions,
+  CastOptions,
+  DequantizeOptions,
+  MaximumMinimumOptions,
+  ArgMaxOptions,
+  LessOptions,
+  NegOptions,
+  PadV2Options,
+  GreaterOptions,
+  GreaterEqualOptions,
+  LessEqualOptions,
+  SelectOptions,
+  SliceOptions,
+  TransposeConvOptions,
+  SparseToDenseOptions,
+  TileOptions,
+  ExpandDimsOptions,
+  EqualOptions,
+  NotEqualOptions,
+  ShapeOptions,
+  PowOptions,
+  ArgMinOptions,
+  FakeQuantOptions,
+  PackOptions,
+  LogicalOrOptions,
+  OneHotOptions,
+  LogicalAndOptions,
+  LogicalNotOptions,
+  UnpackOptions,
+  FloorDivOptions,
+  SquareOptions,
+  ZerosLikeOptions,
+  FillOptions,
+  BidirectionalSequenceLSTMOptions,
+  BidirectionalSequenceRNNOptions,
+  UnidirectionalSequenceLSTMOptions,
+  FloorModOptions,
+  RangeOptions,
+  ResizeNearestNeighborOptions,
+  LeakyReluOptions,
+  SquaredDifferenceOptions,
+  MirrorPadOptions,
+  AbsOptions,
+  SplitVOptions,
+  UniqueOptions,
+  ReverseV2Options,
+  AddNOptions,
+  GatherNdOptions,
+  CosOptions,
+  WhereOptions,
+  RankOptions,
+  ReverseSequenceOptions,
+  MatrixDiagOptions,
+  QuantizeOptions,
+  MatrixSetDiagOptions,
+  HardSwishOptions,
+  IfOptions,
+  WhileOptions,
+  DepthToSpaceOptions,
+  NonMaxSuppressionV4Options,
+  NonMaxSuppressionV5Options,
+  ScatterNdOptions,
+  SelectV2Options,
+  DensifyOptions,
+  SegmentSumOptions,
+  BatchMatMulOptions,
+  CumsumOptions,
+  CallOnceOptions,
+  BroadcastToOptions,
+  Rfft2dOptions,
+  Conv3DOptions,
+  HashtableOptions,
+  HashtableFindOptions,
+  HashtableImportOptions,
+  HashtableSizeOptions,
+  VarHandleOptions,
+  ReadVariableOptions,
+  AssignVariableOptions,
+  RandomOptions,
+  BucketizeOptions,
+  GeluOptions,
+  DynamicUpdateSliceOptions,
+  UnsortedSegmentProdOptions,
+  UnsortedSegmentMaxOptions,
+  UnsortedSegmentMinOptions,
+  UnsortedSegmentSumOptions,
+  ATan2Options,
+  SignOptions,
+  BitcastOptions,
+  BitwiseXorOptions,
+  RightShiftOptions,
+  GRUOptions = 251,
+  BCQGatherOptions = 252,
+  BCQFullyConnectedOptions = 253,
+  InstanceNormOptions = 254,
+}
+
+union BuiltinOptions2{
+  StablehloConcatenateOptions,
+  StablehloBroadcastInDimOptions,
+  StablehloSliceOptions,
+  StablehloConvolutionOptions,
+  StablehloCustomCallOptions,
+  StablehloReduceOptions,
+  StablehloScatterOptions,
+  StablehloCompareOptions,
+  StablehloDynamicSliceOptions,
+  StablehloPadOptions,
+  StablehloIotaOptions,
+  StablehloDotGeneralOptions,
+  StablehloReduceWindowOptions,
+  StablehloSortOptions,
+  StablehloWhileOptions,
+  StablehloGatherOptions,
+  StablehloTransposeOptions,
+  DilateOptions,
+  StablehloRngBitGeneratorOptions,
+  ReduceWindowOptions,
+}
+
+table StablehloGatherOptions{
+  offset_dims : [long];
+  collapsed_slice_dims : [long];
+  start_index_map : [long];
+  index_vector_dim : long;
+  slice_sizes : [long];
+  indices_are_sorted : bool;
+}
+
+table StablehloTransposeOptions{
+  permutation : [long];
+}
+
+enum StablehloPrecisionConfig : uint {
+  DEFAULT,
+  HIGH,
+  HIGHEST,
+}
+
+table StablehloDotGeneralOptions{
+  lhs_batching_dimensions : [long];
+  rhs_batching_dimensions : [long];
+  lhs_contracting_dimensions : [long];
+  rhs_contracting_dimensions : [long];
+  precision_config : [StablehloPrecisionConfig];
+}
+
+table StablehloReduceWindowOptions{
+  window_dimensions : [long];
+  window_strides : [long];
+  base_dilations : [long];
+  window_dilations : [long];
+  padding : [long];
+  body_subgraph_index : int;
+}
+
+table StablehloWhileOptions{
+  cond_subgraph_index : int;
+  body_subgraph_index : int;
+}
+
+table StablehloSortOptions{
+  dimension : long;
+  is_stable : bool;
+  comparator_subgraph_index : int;
+}
+
+table StablehloConcatenateOptions {
+  dimension : long;
+}
+
+table StablehloBroadcastInDimOptions{
+  broadcast_dimensions : [long];
+}
+
+enum StablehloComparisonDirection : uint {
+  STABLEHLO_COMPARISON_DIRECTION_EQ,
+  STABLEHLO_COMPARISON_DIRECTION_NE,
+  STABLEHLO_COMPARISON_DIRECTION_GE,
+  STABLEHLO_COMPARISON_DIRECTION_GT,
+  STABLEHLO_COMPARISON_DIRECTION_LE,
+  STABLEHLO_COMPARISON_DIRECTION_LT,
+
+}
+
+enum StablehloComparisonType : uint {
+  STABLEHLO_COMPARISON_TYPE_NOTYPE,
+  STABLEHLO_COMPARISON_TYPE_FLOAT,
+  STABLEHLO_COMPARISON_TYPE_FLOAT_TOTAL_ORDER,
+  STABLEHLO_COMPARISON_TYPE_SIGNED,
+  STABLEHLO_COMPARISON_TYPE_UNSIGNED,
+}
+
+table StablehloCompareOptions{
+  comparison_direction : StablehloComparisonDirection;
+  compare_type : StablehloComparisonType;
+}
+
+table StablehloDynamicSliceOptions{
+  slice_sizes : [long];
+}
+
+table StablehloPadOptions{
+  edge_padding_low : [long];
+  edge_padding_high : [long];
+  interior_padding : [long];
+}
+
+table StablehloIotaOptions{
+  iota_dimension : long;
+}
+
+table StablehloCustomCallOptions {
+  call_target_name : string;
+  has_side_effect : bool;
+  backend_config: string;
+  api_version : int; // will be decprecated
+  called_computations: [int]; // should point to subgraphs of the computations
+  custom_attributes : [ubyte];
+}
+
+table StablehloReduceOptions {
+  dimensions : [long];
+  body_subgraph_index : int;
+}
+
+table StablehloSliceOptions{
+  start_indices : [long];
+  limit_indices : [long];
+  strides : [long];
+}
+
+table StablehloConvolutionOptions{
+  window_strides : [long];
+  padding : [long];
+  lhs_dilation : [long];
+  rhs_dilation : [long];
+  window_reversal : [bool];
+  input_batch_dimension : long;
+  input_feature_dimension : long;
+  input_spatial_dimensions : [long];
+  kernel_input_feature_dimension : long;
+  kernel_output_feature_dimension : long;
+  kernel_spatial_dimensions : [long];
+  output_batch_dimension : long;
+  output_feature_dimension : long;
+  output_spatial_dimensions	: [long];
+  feature_group_count : long;
+  batch_group_count : long;
+  precision_config : [StablehloPrecisionConfig];
+}
+
+table StablehloScatterOptions {
+  indices_are_sorted: bool;
+  update_window_dims: [long];
+  inserted_window_dims: [long];
+  scatter_dims_to_operand_dims: [long];
+  index_vector_dim: long;
+  unique_indices: bool;
+  update_computation_subgraph_index: int;
+}
+
+enum RngAlgorithm : byte {
+  // An algorithm auto-selected by the system according to device type.
+  DEFAULT = 0,
+  // The Philox algorithm, as described in paper
+  // ['Parallel Random Numbers: As Easy as 1, 2, 3']
+  // (https://www.thesalmons.org/john/random123/papers/random123sc11.pdf)
+  PHILOX = 1,
+  // The ThreeFry algorithm, as described in paper
+  // ['Parallel Random Numbers: As Easy as 1, 2, 3']
+  // (https://www.thesalmons.org/john/random123/papers/random123sc11.pdf)
+  THREEFRY = 2,
+}
+
+table StablehloRngBitGeneratorOptions {
+  algorithm:RngAlgorithm;
+}
+
+// LINT.IfChange
+enum Padding : byte { SAME, VALID }
+// LINT.ThenChange(//tensorflow/compiler/mlir/lite/ir/tfl_op_enums.td)
+
+// LINT.IfChange
+enum ActivationFunctionType : byte {
+  NONE = 0,
+  RELU = 1,
+  RELU_N1_TO_1 = 2,
+  RELU6 = 3,
+  TANH = 4,
+  SIGN_BIT = 5,
+}
+// LINT.ThenChange(//tensorflow/compiler/mlir/lite/ir/tfl_op_enums.td)
+
+table Conv2DOptions {
+  padding:Padding;
+  stride_w:int;
+  stride_h:int;
+  fused_activation_function:ActivationFunctionType;
+  dilation_w_factor:int = 1;
+  dilation_h_factor:int = 1;
+  // Parameters for Conv2D version 8 or above.
+  // When set, quantized_bias_type defines the dtype for both bias and accumulator.
+  quantized_bias_type: TensorType;
+}
+
+// Options for both Conv3D and Conv3DTranspose.
+table Conv3DOptions {
+  padding:Padding;
+  stride_d:int;
+  stride_w:int;
+  stride_h:int;
+  fused_activation_function:ActivationFunctionType;
+  dilation_d_factor:int = 1;
+  dilation_w_factor:int = 1;
+  dilation_h_factor:int = 1;
+}
+
+table Pool2DOptions {
+  padding:Padding;
+  stride_w:int;
+  stride_h:int;
+  filter_width:int;
+  filter_height:int;
+  fused_activation_function:ActivationFunctionType;
+}
+
+table DepthwiseConv2DOptions {
+  // Parameters for DepthwiseConv version 1 or above.
+  padding:Padding;
+  stride_w:int;
+  stride_h:int;
+  // `depth_multiplier` is redundant. It's used by CPU kernels in
+  // TensorFlow 2.0 or below, but ignored in versions above.
+  // See comments in lite/c/builtin_op_data.h for more details.
+  depth_multiplier:int;
+  fused_activation_function:ActivationFunctionType;
+  // Parameters for DepthwiseConv version 2 or above.
+  dilation_w_factor:int = 1;
+  dilation_h_factor:int = 1;
+}
+
+table ConcatEmbeddingsOptions {
+  num_channels:int;
+  num_columns_per_channel:[int];
+  embedding_dim_per_channel:[int]; // This could be inferred from parameters.
+}
+
+enum LSHProjectionType: byte {
+  UNKNOWN = 0,
+  SPARSE = 1,
+  DENSE = 2,
+}
+
+table LSHProjectionOptions {
+  type: LSHProjectionType;
+}
+
+table SVDFOptions {
+  rank:int;
+  fused_activation_function:ActivationFunctionType;
+  // For weights-only quantization, use asymmetric quantization for non
+  // constant inputs at evaluation time.
+  asymmetric_quantize_inputs:bool;
+}
+
+// An implementation of TensorFlow RNNCell.
+table RNNOptions {
+  fused_activation_function:ActivationFunctionType;
+  asymmetric_quantize_inputs:bool;
+}
+
+// An implementation of TensorFlow dynamic_rnn with RNNCell.
+table SequenceRNNOptions {
+  time_major:bool;
+  fused_activation_function:ActivationFunctionType;
+  asymmetric_quantize_inputs:bool;
+}
+
+// An implementation of TensorFlow bidrectional_dynamic_rnn with RNNCell.
+table BidirectionalSequenceRNNOptions {
+  time_major:bool;
+  fused_activation_function:ActivationFunctionType;
+  merge_outputs: bool;
+  asymmetric_quantize_inputs:bool;
+}
+
+// LINT.IfChange
+enum FullyConnectedOptionsWeightsFormat: byte {
+  DEFAULT = 0,
+  SHUFFLED4x16INT8 = 1,
+  SHUFFLED16x1FLOAT32 = 127
+}
+// LINT.ThenChange(//tensorflow/compiler/mlir/lite/ir/tfl_op_enums.td)
+
+// An implementation of TensorFlow fully_connected (a.k.a Dense) layer.
+table FullyConnectedOptions {
+  // Parameters for FullyConnected version 1 or above.
+  fused_activation_function:ActivationFunctionType;
+
+  // Parameters for FullyConnected version 2 or above.
+  weights_format:FullyConnectedOptionsWeightsFormat = DEFAULT;
+
+  // Parameters for FullyConnected version 5 or above.
+  // If set to true, then the number of dimension is preserved. Furthermore,
+  // all but the last dimension of the input and output shapes will be equal.
+  keep_num_dims: bool;
+
+  // Parameters for FullyConnected version 7 or above.
+  // If set to true, then weights-only op will use asymmetric quantization for
+  // inputs.
+  asymmetric_quantize_inputs: bool;
+
+  // Parameters for FullyConnected version 11 or above.
+  // When set, quantized_bias_type defines the dtype for both bias and accumulator.
+  quantized_bias_type: TensorType;
+}
+
+table SoftmaxOptions {
+  beta: float;
+}
+
+// An implementation of TensorFlow concat.
+table ConcatenationOptions {
+  axis:int;
+  fused_activation_function:ActivationFunctionType;
+}
+
+table AddOptions {
+  fused_activation_function:ActivationFunctionType;
+  // Parameters supported by version 3.
+  pot_scale_int16:bool = true;
+}
+
+table MulOptions {
+  fused_activation_function:ActivationFunctionType;
+}
+
+table L2NormOptions {
+  // This field is currently ignored in the L2 Norm Op.
+  fused_activation_function:ActivationFunctionType;
+}
+
+table LocalResponseNormalizationOptions {
+  radius:int;
+  bias:float;
+  alpha:float;
+  beta:float;
+}
+
+// LINT.IfChange
+enum LSTMKernelType : byte {
+  // Full LSTM kernel which supports peephole and projection.
+  FULL = 0,
+  // Basic LSTM kernels. Equivalent to TensorFlow BasicLSTMCell.
+  BASIC = 1,
+}
+// LINT.ThenChange(//tensorflow/compiler/mlir/lite/ir/tfl_op_enums.td)
+
+// An implementation of TensorFlow LSTMCell and CoupledInputForgetGateLSTMCell
+table LSTMOptions {
+  // Parameters for LSTM version 1 or above.
+  fused_activation_function:ActivationFunctionType;
+  cell_clip: float; // Optional, 0.0 means no clipping
+  proj_clip: float; // Optional, 0.0 means no clipping
+
+  // Parameters for LSTM version 2 or above.
+  // Basic kernel is only supported in version 2 or above.
+  kernel_type: LSTMKernelType = FULL;
+
+  // Parameters for LSTM version 4 or above.
+  asymmetric_quantize_inputs: bool;
+}
+
+// An implementation of TensorFlow dynamic_rnn with LSTMCell.
+table UnidirectionalSequenceLSTMOptions {
+  fused_activation_function:ActivationFunctionType;
+  cell_clip: float; // Optional, 0.0 means no clipping
+  proj_clip: float; // Optional, 0.0 means no clipping
+
+  // If true then first dimension is sequence, otherwise batch.
+  time_major:bool;
+
+  // Parameter for Unidirectional Sequence LSTM version 3.
+  asymmetric_quantize_inputs:bool;
+
+  // Parameter for unidirectional sequence RNN version 4.
+  diagonal_recurrent_tensors:bool;
+}
+
+table BidirectionalSequenceLSTMOptions {
+  // Parameters supported by version 1:
+  fused_activation_function:ActivationFunctionType;
+  cell_clip: float; // Optional, 0.0 means no clipping
+  proj_clip: float; // Optional, 0.0 means no clipping
+
+  // If true, store the outputs of both directions into the first output.
+  merge_outputs: bool;
+
+  // Parameters supported by version 2:
+  // If true then first dimension is sequence, otherwise batch.
+  // Version 1 implementations assumed time_major to be true, so this default
+  // value should never change.
+  time_major: bool = true;
+
+  // Parameters for version 3 or above.
+  asymmetric_quantize_inputs:bool;
+}
+
+table ResizeBilinearOptions {
+  new_height: int (deprecated);
+  new_width: int (deprecated);
+  align_corners: bool;
+  half_pixel_centers: bool;
+}
+
+table ResizeNearestNeighborOptions {
+  align_corners: bool;
+  half_pixel_centers: bool;
+}
+
+// A call operation options
+table CallOptions {
+  // The subgraph index that needs to be called.
+  subgraph:uint;
+}
+
+table PadOptions {
+}
+
+table PadV2Options {
+}
+
+table ReshapeOptions {
+  new_shape:[int];
+}
+
+table SpaceToBatchNDOptions {
+}
+
+table BatchToSpaceNDOptions {
+}
+
+table SkipGramOptions {
+  ngram_size: int;
+  max_skip_size: int;
+  include_all_ngrams: bool;
+}
+
+table SpaceToDepthOptions {
+  block_size: int;
+}
+
+table DepthToSpaceOptions {
+  block_size: int;
+}
+
+table SubOptions {
+  fused_activation_function:ActivationFunctionType;
+  // Parameters supported by version 5
+  pot_scale_int16:bool = true;
+}
+
+table DivOptions {
+  fused_activation_function:ActivationFunctionType;
+}
+
+table TopKV2Options {
+}
+
+enum CombinerType : byte {
+  SUM = 0,
+  MEAN = 1,
+  SQRTN = 2,
+}
+
+table EmbeddingLookupSparseOptions {
+  combiner:CombinerType;
+}
+
+table GatherOptions {
+  axis: int;
+  // Parameters for Gather version 5 or above.
+  batch_dims: int = 0;
+}
+
+table TransposeOptions {
+}
+
+table ExpOptions {
+}
+
+table CosOptions {
+}
+
+table ReducerOptions {
+  keep_dims: bool;
+}
+
+table SqueezeOptions {
+  squeeze_dims:[int];
+}
+
+table SplitOptions {
+  num_splits: int;
+}
+
+table SplitVOptions {
+  num_splits: int;
+}
+
+table StridedSliceOptions {
+  begin_mask: int;
+  end_mask: int;
+  ellipsis_mask: int;
+  new_axis_mask: int;
+  shrink_axis_mask: int;
+  // If true, then the end tensor is an offset of the begin tensor.
+  offset: bool;
+}
+
+table LogSoftmaxOptions {
+}
+
+table CastOptions {
+  in_data_type: TensorType;
+  out_data_type: TensorType;
+}
+
+table DequantizeOptions {
+}
+
+table MaximumMinimumOptions {
+}
+
+table TileOptions {
+}
+
+table ArgMaxOptions {
+  output_type : TensorType;
+}
+
+table ArgMinOptions {
+  output_type : TensorType;
+}
+
+table GreaterOptions {
+}
+
+table GreaterEqualOptions {
+}
+
+table LessOptions {
+}
+
+table LessEqualOptions {
+}
+
+table NegOptions {
+}
+
+table SelectOptions {
+}
+
+table SliceOptions {
+}
+
+table TransposeConvOptions {
+  // Parameters supported by version 1, 2, 3:
+  padding:Padding;
+  stride_w:int;
+  stride_h:int;
+
+  // Parameters supported by version 4:
+  fused_activation_function:ActivationFunctionType = NONE;
+
+  // Parameters for TransposeConv version 5 or above.
+  // If set, use this for bias and accumulator.
+  // When set, quantized_bias_type defines the dtype for both bias and accumulator.
+  quantized_bias_type: TensorType;
+}
+
+table ExpandDimsOptions {
+}
+
+table SparseToDenseOptions {
+  validate_indices:bool;
+}
+
+table EqualOptions {
+}
+
+table NotEqualOptions {
+}
+
+table ShapeOptions {
+  // Optional output type of the operation (int32 or int64). Defaults to int32.
+  out_type : TensorType;
+}
+
+table RankOptions {
+}
+
+table PowOptions {
+}
+
+table FakeQuantOptions {
+  // Parameters supported by version 1:
+  min:float;
+  max:float;
+  num_bits:int;
+
+  // Parameters supported by version 2:
+  narrow_range:bool;
+}
+
+table PackOptions {
+  values_count:int;
+  axis:int;
+}
+
+table LogicalOrOptions {
+}
+
+table OneHotOptions {
+  axis:int;
+}
+
+table AbsOptions {
+}
+
+
+table HardSwishOptions {
+}
+
+table LogicalAndOptions {
+}
+
+table LogicalNotOptions {
+}
+
+table UnpackOptions {
+  num:int;
+  axis:int;
+}
+
+table FloorDivOptions {
+}
+
+table SquareOptions {
+}
+
+table ZerosLikeOptions {
+}
+
+table FillOptions {
+}
+
+table FloorModOptions {
+}
+
+table RangeOptions {
+}
+
+table LeakyReluOptions {
+  alpha:float;
+}
+
+table SquaredDifferenceOptions {
+}
+
+// LINT.IfChange
+enum MirrorPadMode : byte {
+  // Doesn't include borders.
+  REFLECT = 0,
+  // Includes borders.
+  SYMMETRIC = 1,
+}
+// LINT.ThenChange(//tensorflow/compiler/mlir/lite/ir/tfl_op_enums.td)
+
+table MirrorPadOptions {
+  mode:MirrorPadMode;
+}
+
+table UniqueOptions {
+  idx_out_type:TensorType = INT32;
+}
+
+table ReverseV2Options {
+}
+
+table AddNOptions {
+}
+
+table GatherNdOptions {
+}
+
+table WhereOptions {
+}
+
+table ReverseSequenceOptions {
+  seq_dim:int;
+  batch_dim:int = 0;
+}
+
+table MatrixDiagOptions {
+}
+
+table QuantizeOptions {
+}
+
+table MatrixSetDiagOptions {
+}
+
+table IfOptions {
+  then_subgraph_index:int;
+  else_subgraph_index:int;
+}
+
+table CallOnceOptions {
+  init_subgraph_index:int;
+}
+
+table WhileOptions {
+  cond_subgraph_index:int;
+  body_subgraph_index:int;
+}
+
+table NonMaxSuppressionV4Options {
+}
+
+table NonMaxSuppressionV5Options {
+}
+
+table ScatterNdOptions {
+}
+
+table SelectV2Options {
+}
+
+table DensifyOptions {
+}
+
+table SegmentSumOptions {
+}
+
+table BatchMatMulOptions {
+  adjoint_lhs:bool;
+  adjoint_rhs:bool;
+  // Parameters for BatchMatMul version 4 or above.
+  // If set to true, then weights-only op will use asymmetric quantization for
+  // inputs.
+  asymmetric_quantize_inputs: bool;
+}
+
+table CumsumOptions {
+  exclusive:bool;
+  reverse:bool;
+}
+
+table BroadcastToOptions {
+}
+
+table Rfft2dOptions {
+}
+
+table HashtableOptions {
+  // The identity of hash tables. This identity will be used across different
+  // subgraphs in the same interpreter instance.
+  table_id:int;
+  key_dtype:TensorType;
+  value_dtype:TensorType;
+}
+
+table HashtableFindOptions {
+}
+
+table HashtableImportOptions {
+}
+
+table HashtableSizeOptions {
+}
+
+table VarHandleOptions {
+  container:string;
+  shared_name:string;
+}
+
+table ReadVariableOptions {
+}
+
+table AssignVariableOptions {
+}
+
+table RandomOptions {
+  seed: long;
+  seed2: long;
+}
+
+table BucketizeOptions {
+  boundaries: [float];  // The bucket boundaries.
+}
+
+table GeluOptions {
+  approximate: bool;
+}
+
+table DynamicUpdateSliceOptions {
+}
+
+table UnsortedSegmentProdOptions {
+}
+
+table UnsortedSegmentMaxOptions {
+}
+
+table UnsortedSegmentSumOptions {
+}
+
+table ATan2Options {
+}
+
+table UnsortedSegmentMinOptions{
+}
+
+table SignOptions {
+}
+
+table BitcastOptions {
+}
+
+table BitwiseXorOptions {
+}
+
+table RightShiftOptions {
+}
+
+table DilateOptions {
+}
+
+enum ReduceWindowFunction : int {
+  UNSUPPORTED,
+  ADD,
+  MUL,
+  MINIMUM,
+  MAXIMUM,
+  ALL,
+  ANY,
+}
+
+table ReduceWindowOptions{
+  reduce_function: ReduceWindowFunction;
+}
+
+table GRUOptions {
+  fused_activation_function:ActivationFunctionType;
+  return_sequences : bool;
+  time_major : bool;
+}
+
+table BCQGatherOptions {
+  input_hidden_size: int;
+  axis: int;
+}
+
+table BCQFullyConnectedOptions {
+  weights_hidden_size: int;
+  fused_activation_function:ActivationFunctionType;
+}
+
+table InstanceNormOptions {
+  epsilon:float;
+  fused_activation_function:ActivationFunctionType;
+}
+
+// An OperatorCode can be an enum value (BuiltinOperator) if the operator is a
+// builtin, or a string if the operator is custom.
+table OperatorCode {
+  // This field is for backward compatibility. This field will be used when
+  // the value of the extended builtin_code field has less than
+  // BulitinOperator_PLACEHOLDER_FOR_GREATER_OP_CODES.
+  deprecated_builtin_code:byte;
+  custom_code:string;
+
+  // The version of the operator. The version need to be bumped whenever new
+  // parameters are introduced into an op.
+  version:int = 1;
+
+  // This field is introduced for resolving op builtin code shortage problem
+  // (the original BuiltinOperator enum field was represented as a byte).
+  // This field will be used when the value of the extended builtin_code field
+  // has greater than BulitinOperator_PLACEHOLDER_FOR_GREATER_OP_CODES.
+  builtin_code:BuiltinOperator;
+}
+
+enum CustomOptionsFormat : byte {
+  FLEXBUFFERS = 0,
+}
+
+enum DataFormat : byte {
+  // For 2D data, NHWC(batch, height, width, channels)
+  // For 3D data, NDHWC(batch, depth, height, width, channels)
+  CHANNELS_LAST = 0,
+  // For 2D data, NCHW(batch, channels, height, width)
+  // For 3D data, NCDHW(batch, channels, depth, height, width)
+  CHANNELS_FIRST = 1,
+}
+
+// An operator takes tensors as inputs and outputs. The type of operation being
+// performed is determined by an index into the list of valid OperatorCodes,
+// while the specifics of each operations is configured using builtin_options
+// or custom_options.
+table Operator {
+  // Index into the operator_codes array. Using an integer here avoids
+  // complicate map lookups.
+  opcode_index:uint;
+
+  // Optional input are indicated by -1.
+  inputs:[int];
+  outputs:[int];
+
+  builtin_options:BuiltinOptions;
+  custom_options:[ubyte];
+  custom_options_format:CustomOptionsFormat;
+
+  // A list of booleans indicating the input tensors which are being mutated by
+  // this operator.(e.g. used by RNN and LSTM).
+  // For example, if the "inputs" array refers to 5 tensors and the second and
+  // fifth are mutable variables, then this list will contain
+  // [false, true, false, false, true].
+  //
+  // If the list is empty, no variable is mutated in this operator.
+  // The list either has the same length as `inputs`, or is empty.
+  mutating_variable_inputs:[bool];
+
+  // A list of indices to the subgraph's "tensors" that are internal to an Op.
+  // Internal tensors are those that do not flow in or out of the operation,
+  // but instead are part of internal computation. As such, the operation's
+  // implementation may manage its memory more efficiently. They are needed
+  // however (i.e. not just an implementation detail) since they are part of the
+  // computation, which may require relevant metadata such as quantization
+  // parameters.
+  intermediates:[int];
+
+  // When an op is using custom_options in a model that is larger than 2GB, then
+  // we instead use the following attributes to find the buffer location which
+  // is stored outside of flatbuffers, the offset is calculated relative to the
+  // beginning of the file and is only valid if > 1
+  large_custom_options_offset: ulong;
+  large_custom_options_size: ulong;
+
+  // Flatbuffers union struct has a 128 elements limit in JAVA, so a second
+  // union is added, in the case of where BuitlinOptions2 runs out, a third
+  // one can be added
+  builtin_options_2 : BuiltinOptions2;
+}
+
+// The root type, defining a subgraph, which typically represents an entire
+// model.
+table SubGraph {
+  // A list of all tensors used in this subgraph.
+  tensors:[Tensor];
+
+  // Indices of the tensors that are inputs into this subgraph. Note this is
+  // the list of non-static tensors that feed into the subgraph for inference.
+  inputs:[int];
+
+  // Indices of the tensors that are outputs out of this subgraph. Note this is
+  // the list of output tensors that are considered the product of the
+  // subgraph's inference.
+  outputs:[int];
+
+  // All operators, in execution order.
+  operators:[Operator];
+
+  // Name of this subgraph (used for debugging).
+  name:string;
+
+  // Data format for input/output of SubGraph, deprecated
+  deprecated_data_format: DataFormat (deprecated);
+}
+
+// Table of raw data buffers (used for constant tensors). Referenced by tensors
+// by index. The generous alignment accommodates mmap-friendly data structures.
+table Buffer {
+  data:[ubyte] (force_align: 16);
+
+  // In a model that is larger than 2GB, then buffers instead uses the following
+  // attributes to find stored data, which is outside of flatbuffers
+  // the offset is calculated relative to the beginning of the file and is only
+  // valid if > 1.
+  offset: ulong;
+  size: ulong;
+}
+
+table Metadata {
+  // A human readable string to uniquely identify a Metadata.
+  name:string;
+  // An index to the buffers table.
+  buffer:uint;
+}
+
+// Map from an alias name of tensor to tensor index in the graph.
+// This is used in Signature def.
+table TensorMap {
+  // Represents the alias to use for this tensor.
+  name:string;
+
+  // The actual tensor index in the primary graph, that 'name' corresponds to.
+  tensor_index:uint;
+}
+
+// This corresponds to SignatureDef in Tensorflow SavedModel.
+// The SignatureDef will be part of the SavedModel provided for conversion.
+table SignatureDef {
+  // Named inputs for this signature.
+  inputs:[TensorMap];
+
+  // Named outputs for this signature.
+  outputs:[TensorMap];
+
+  // Key value which was in the Tensorflow SavedModel SignatureDef map.
+  signature_key:string;
+
+  // Model tag, deprecated.
+  deprecated_tag:string (deprecated);
+
+  // Index of subgraphs that corresponds to the exported method.
+  subgraph_index:uint;
+}
+
+table Model {
+  // Version of the schema.
+  version:uint;
+
+  // A list of all operator codes used in this model. This is
+  // kept in order because operators carry an index into this
+  // vector.
+  operator_codes:[OperatorCode];
+
+  // All the subgraphs of the model. The 0th is assumed to be the main
+  // model.
+  subgraphs:[SubGraph];
+
+  // A description of the model.
+  description:string;
+
+  // Buffers of the model.
+  // Note the 0th entry of this array must be an empty buffer (sentinel).
+  // This is a convention so that tensors without a buffer can provide 0 as
+  // their buffer.
+  buffers:[Buffer];
+
+  // Metadata about the model. Indirects into the existings buffers list.
+  // Deprecated, prefer to use metadata field.
+  metadata_buffer:[int];
+
+  // Metadata about the model.
+  metadata:[Metadata];
+
+  // Optional SignatureDefs for the model.
+  signature_defs:[SignatureDef];
+}
+
+root_type Model;