From 71c600d6b24ac109040f8c0e7f6d703634c7242b Mon Sep 17 00:00:00 2001 From: Vyacheslav Bazhenov Date: Mon, 15 Jul 2024 11:57:53 +0300 Subject: [PATCH] [luci] Introduce Compress weights pass This commit introduces CopressWeightsPass for Conv2D ONE-DCO-1.0-Signed-off-by: Vyacheslav Bazhenov --- Makefile.template | 4 +- compiler/circle-inspect/CMakeLists.txt | 8 +- compiler/circle-inspect/requires.cmake | 2 +- compiler/circle-operator/CMakeLists.txt | 8 +- compiler/circle-operator/requires.cmake | 2 +- compiler/circle-tensordump/CMakeLists.txt | 8 +- compiler/circle-tensordump/requires.cmake | 2 +- compiler/circle-verify/CMakeLists.txt | 8 +- compiler/circle-verify/requires.cmake | 2 +- compiler/circle2circle/src/Circle2Circle.cpp | 3 + compiler/circlechef/CMakeLists.txt | 6 +- compiler/circlechef/circle/CMakeLists.txt | 4 +- compiler/circlechef/core/CMakeLists.txt | 2 +- compiler/circlechef/requires.cmake | 2 +- compiler/circledump/CMakeLists.txt | 10 +- compiler/circledump/README.md | 2 +- compiler/circledump/requires.cmake | 2 +- .../include/luci_interpreter/core/Tensor.h | 15 + .../pal/linux/HuffmanDecoder.h | 357 ++++ .../luci-interpreter/pal/linux/PALConv2d.h | 135 +- .../src/SimpleMemoryManager.cpp | 17 +- compiler/luci-interpreter/src/core/Tensor.cpp | 26 +- .../luci-interpreter/src/kernels/Conv2D.cpp | 147 +- .../luci-interpreter/src/kernels/Conv2D.h | 2 + compiler/luci-interpreter/src/kernels/Utils.h | 3 +- .../src/loader/GraphLoader.cpp | 3 + compiler/luci-pass-value-py-test/test.lst | 1 + compiler/luci/export/CMakeLists.txt | 4 +- .../luci/export/src/CircleExporterUtils.cpp | 15 + .../luci/export/src/CircleExporterUtils.h | 1 + .../luci/export/src/CircleTensorExporter.cpp | 28 +- compiler/luci/import/CMakeLists.txt | 4 +- .../include/luci/Import/CircleImporterUtils.h | 33 + .../luci/import/src/CircleImporterUtils.cpp | 36 + compiler/luci/import/src/CircleReader.cpp | 6 + .../luci/import/src/Nodes/CircleConst.cpp | 2 +- .../luci/import/src/Nodes/CircleConv2D.cpp | 1 + .../include/luci/IR/AttrWeightCompression.h | 33 + .../lang/include/luci/IR/Nodes/CircleConst.h | 5 + compiler/luci/lang/src/Nodes/CircleConst.cpp | 3 + compiler/luci/partition/CMakeLists.txt | 2 +- .../luci/pass/include/luci/CircleOptimizer.h | 1 + .../include/luci/Pass/CompressWeightsPass.h | 39 + compiler/luci/pass/src/CircleOptimizer.cpp | 8 +- .../luci/pass/src/CompressWeightsPass.cpp | 109 ++ .../luci/pass/src/helpers/HuffmanDecoder.h | 356 ++++ .../luci/pass/src/helpers/HuffmanEncoder.h | 207 ++ compiler/luci/requires.cmake | 2 +- compiler/mio-circle09/CMakeLists.txt | 52 + compiler/mio-circle09/README.md | 3 + compiler/mio-circle09/example.cpp | 41 + .../mio-circle09/include/mio_circle/Helper.h | 54 + .../mio-circle09/include/mio_circle/Reader.h | 103 + compiler/mio-circle09/src/Helper.cpp | 110 ++ compiler/mio-circle09/src/Helper.test.cpp | 153 ++ compiler/mio-circle09/src/Reader.cpp | 222 +++ compiler/mio-circle09/src/Reader.test.cpp | 60 + compiler/pics/CMakeLists.txt | 2 +- compiler/pics/requires.cmake | 2 +- compiler/tflite2circle/CMakeLists.txt | 4 +- compiler/tflite2circle/requires.cmake | 2 +- infra/nncc/Makefile.arm32 | 4 +- infra/packaging/preset/20230907 | 2 +- infra/packaging/preset/20230907_windows | 2 +- infra/scripts/compiler_modules.sh | 4 +- packaging/nnfw.spec | 4 +- res/CircleSchema/0.9/circle_schema.fbs | 1705 +++++++++++++++++ 67 files changed, 4126 insertions(+), 79 deletions(-) create mode 100644 compiler/luci-interpreter/pal/linux/HuffmanDecoder.h create mode 100644 compiler/luci/import/include/luci/Import/CircleImporterUtils.h create mode 100644 compiler/luci/import/src/CircleImporterUtils.cpp create mode 100644 compiler/luci/lang/include/luci/IR/AttrWeightCompression.h create mode 100644 compiler/luci/pass/include/luci/Pass/CompressWeightsPass.h create mode 100644 compiler/luci/pass/src/CompressWeightsPass.cpp create mode 100644 compiler/luci/pass/src/helpers/HuffmanDecoder.h create mode 100644 compiler/luci/pass/src/helpers/HuffmanEncoder.h create mode 100644 compiler/mio-circle09/CMakeLists.txt create mode 100644 compiler/mio-circle09/README.md create mode 100644 compiler/mio-circle09/example.cpp create mode 100644 compiler/mio-circle09/include/mio_circle/Helper.h create mode 100644 compiler/mio-circle09/include/mio_circle/Reader.h create mode 100644 compiler/mio-circle09/src/Helper.cpp create mode 100644 compiler/mio-circle09/src/Helper.test.cpp create mode 100644 compiler/mio-circle09/src/Reader.cpp create mode 100644 compiler/mio-circle09/src/Reader.test.cpp create mode 100644 res/CircleSchema/0.9/circle_schema.fbs diff --git a/Makefile.template b/Makefile.template index 6e0c29590c7..d16aa3d274b 100644 --- a/Makefile.template +++ b/Makefile.template @@ -159,7 +159,7 @@ prepare_nncc_internal: $(WORKSPACE) ifeq (,$(findstring android,$(TARGET_OS))) EXTERNAL_FLATC=$(BUILDTOOL_PATH)/bin/flatc ./nncc configure -DBUILD_GTEST=OFF -DENABLE_TEST=OFF -DEXTERNALS_BUILD_THREADS=$(NPROCS) -DCMAKE_BUILD_TYPE=$(BUILD_TYPE) \ -DCMAKE_INSTALL_PREFIX=$(OVERLAY_FOLDER) \ - -DBUILD_WHITELIST="luci;foder;pepper-csv2vec;loco;locop;logo;logo-core;mio-circle08;luci-compute;oops;hermes;hermes-std;angkor;pp;pepper-strcast;pepper-str" \ + -DBUILD_WHITELIST="luci;foder;pepper-csv2vec;loco;locop;logo;logo-core;mio-circle09;luci-compute;oops;hermes;hermes-std;angkor;pp;pepper-strcast;pepper-str" \ $(OPTIONS_NNCC) ./nncc build -j$(NPROCS) cmake --install $(NNCC_FOLDER) $(INSTALL_OPTIONS) @@ -171,7 +171,7 @@ ifeq (,$(findstring android,$(TARGET_OS))) @cp compiler/angkor/include/nncc/core/ADT/tensor/Index.h ${OVERLAY_FOLDER}/include/nncc/core/ADT/tensor @cp compiler/oops/include/oops/InternalExn.h ${OVERLAY_FOLDER}/include/oops @cp compiler/luci/lang/include/luci/IR/CircleNodes.lst ${OVERLAY_FOLDER}/include/luci/IR - @cp ${NNCC_WORKSPACE}/compiler/mio-circle08/gen/mio/circle/schema_generated.h ${OVERLAY_FOLDER}/include/mio/circle + @cp ${NNCC_WORKSPACE}/compiler/mio-circle09/gen/mio/circle/schema_generated.h ${OVERLAY_FOLDER}/include/mio/circle @cp -r ${NNCC_WORKSPACE}/overlay/include/flatbuffers ${OVERLAY_FOLDER}/include @echo "Done prepare-nncc" endif diff --git a/compiler/circle-inspect/CMakeLists.txt b/compiler/circle-inspect/CMakeLists.txt index 76e65ddc6a8..857114d2c72 100644 --- a/compiler/circle-inspect/CMakeLists.txt +++ b/compiler/circle-inspect/CMakeLists.txt @@ -1,6 +1,6 @@ -if(NOT TARGET mio_circle08) +if(NOT TARGET mio_circle09) return() -endif(NOT TARGET mio_circle08) +endif(NOT TARGET mio_circle09) set(DRIVER "driver/Driver.cpp") @@ -10,6 +10,6 @@ add_executable(circle-inspect ${DRIVER} ${SOURCES}) target_include_directories(circle-inspect PRIVATE src) target_link_libraries(circle-inspect arser) target_link_libraries(circle-inspect foder) -target_link_libraries(circle-inspect mio_circle08) -target_link_libraries(circle-inspect mio_circle08_helper) +target_link_libraries(circle-inspect mio_circle09) +target_link_libraries(circle-inspect mio_circle09_helper) target_link_libraries(circle-inspect safemain) diff --git a/compiler/circle-inspect/requires.cmake b/compiler/circle-inspect/requires.cmake index 8a57c8f1123..b157872cee0 100644 --- a/compiler/circle-inspect/requires.cmake +++ b/compiler/circle-inspect/requires.cmake @@ -1,4 +1,4 @@ require("arser") require("foder") -require("mio-circle08") +require("mio-circle09") require("safemain") diff --git a/compiler/circle-operator/CMakeLists.txt b/compiler/circle-operator/CMakeLists.txt index a13e76eb86b..492ad2da991 100644 --- a/compiler/circle-operator/CMakeLists.txt +++ b/compiler/circle-operator/CMakeLists.txt @@ -1,6 +1,6 @@ -if(NOT TARGET mio_circle08) +if(NOT TARGET mio_circle09) return() -endif(NOT TARGET mio_circle08) +endif(NOT TARGET mio_circle09) set(DRIVER "driver/Driver.cpp") @@ -10,8 +10,8 @@ add_executable(circle-operator ${DRIVER} ${SOURCES}) target_include_directories(circle-operator PRIVATE src) target_link_libraries(circle-operator arser) target_link_libraries(circle-operator foder) -target_link_libraries(circle-operator mio_circle08) -target_link_libraries(circle-operator mio_circle08_helper) +target_link_libraries(circle-operator mio_circle09) +target_link_libraries(circle-operator mio_circle09_helper) target_link_libraries(circle-operator safemain) install(TARGETS circle-operator DESTINATION bin) diff --git a/compiler/circle-operator/requires.cmake b/compiler/circle-operator/requires.cmake index 8a57c8f1123..b157872cee0 100644 --- a/compiler/circle-operator/requires.cmake +++ b/compiler/circle-operator/requires.cmake @@ -1,4 +1,4 @@ require("arser") require("foder") -require("mio-circle08") +require("mio-circle09") require("safemain") diff --git a/compiler/circle-tensordump/CMakeLists.txt b/compiler/circle-tensordump/CMakeLists.txt index c65f634e8d2..04d0a9f2495 100644 --- a/compiler/circle-tensordump/CMakeLists.txt +++ b/compiler/circle-tensordump/CMakeLists.txt @@ -1,6 +1,6 @@ -if(NOT TARGET mio_circle08) +if(NOT TARGET mio_circle09) return() -endif(NOT TARGET mio_circle08) +endif(NOT TARGET mio_circle09) nnas_find_package(HDF5 COMPONENTS STATIC QUIET) @@ -19,8 +19,8 @@ target_include_directories(circle-tensordump PRIVATE ${HDF5_INCLUDE_DIRS}) target_link_libraries(circle-tensordump PRIVATE ${HDF5_CXX_LIBRARIES}) target_link_libraries(circle-tensordump PRIVATE arser) target_link_libraries(circle-tensordump PRIVATE foder) -target_link_libraries(circle-tensordump PRIVATE mio_circle08) -target_link_libraries(circle-tensordump PRIVATE mio_circle08_helper) +target_link_libraries(circle-tensordump PRIVATE mio_circle09) +target_link_libraries(circle-tensordump PRIVATE mio_circle09_helper) target_link_libraries(circle-tensordump PRIVATE safemain) install(TARGETS circle-tensordump DESTINATION bin) diff --git a/compiler/circle-tensordump/requires.cmake b/compiler/circle-tensordump/requires.cmake index 8a57c8f1123..b157872cee0 100644 --- a/compiler/circle-tensordump/requires.cmake +++ b/compiler/circle-tensordump/requires.cmake @@ -1,4 +1,4 @@ require("arser") require("foder") -require("mio-circle08") +require("mio-circle09") require("safemain") diff --git a/compiler/circle-verify/CMakeLists.txt b/compiler/circle-verify/CMakeLists.txt index 3ccdd0306aa..12909d65e57 100644 --- a/compiler/circle-verify/CMakeLists.txt +++ b/compiler/circle-verify/CMakeLists.txt @@ -1,14 +1,14 @@ -if(NOT TARGET mio_circle08) - message(STATUS "Skip circle-verify: mio_circle08 not found") +if(NOT TARGET mio_circle09) + message(STATUS "Skip circle-verify: mio_circle09 not found") return() -endif(NOT TARGET mio_circle08) +endif(NOT TARGET mio_circle09) file(GLOB_RECURSE SOURCES "src/*.cpp") add_executable(circle-verify ${SOURCES}) target_include_directories(circle-verify PRIVATE src) target_link_libraries(circle-verify arser) -target_link_libraries(circle-verify mio_circle08) +target_link_libraries(circle-verify mio_circle09) target_link_libraries(circle-verify safemain) target_link_libraries(circle-verify cwrap) target_link_libraries(circle-verify foder) diff --git a/compiler/circle-verify/requires.cmake b/compiler/circle-verify/requires.cmake index d382ef9761d..4c4f0fca4d1 100644 --- a/compiler/circle-verify/requires.cmake +++ b/compiler/circle-verify/requires.cmake @@ -1,5 +1,5 @@ require("arser") -require("mio-circle08") +require("mio-circle09") require("safemain") require("cwrap") require("foder") diff --git a/compiler/circle2circle/src/Circle2Circle.cpp b/compiler/circle2circle/src/Circle2Circle.cpp index c32060bd8af..33f55f082d6 100644 --- a/compiler/circle2circle/src/Circle2Circle.cpp +++ b/compiler/circle2circle/src/Circle2Circle.cpp @@ -191,6 +191,8 @@ int entry(int argc, char **argv) "This will convert single input Transpose to Reshape"); add_switch(arser, "--expand_broadcast_const", "This will expand broadcastable constant inputs"); add_switch(arser, "--unroll_unidirseqlstm", "Unroll UnidirectionalSequenceLSTM operator."); + add_switch(arser, "--compress_weights_huffman", + "Loseless weights compression with Huffman encoding."); add_switch(arser, "--convert_nchw_to_nhwc", "Experimental: This will convert NCHW operators to NHWC under the assumption that " "input model is NCHW."); @@ -340,6 +342,7 @@ int entry(int argc, char **argv) option_str_to_enum["decompose_softmax"] = Algorithms::DecomposeSoftmaxPass; option_str_to_enum["expand_broadcast_const"] = Algorithms::ExpandBroadcastConst; option_str_to_enum["unroll_unidirseqlstm"] = Algorithms::UnrollUnidirSeqLSTM; + option_str_to_enum["compress_weights_huffman"] = Algorithms::CompressWeightsHuffman; // clang-format on if (arser.get("--verbose")) diff --git a/compiler/circlechef/CMakeLists.txt b/compiler/circlechef/CMakeLists.txt index 18b58a9c17d..4c11cad756f 100644 --- a/compiler/circlechef/CMakeLists.txt +++ b/compiler/circlechef/CMakeLists.txt @@ -5,10 +5,10 @@ if(NOT Protobuf_FOUND) return() endif(NOT Protobuf_FOUND) -if(NOT TARGET mio_circle08) - message(STATUS "circlechef: SKIP (missing mio-circle08)") +if(NOT TARGET mio_circle09) + message(STATUS "circlechef: SKIP (missing mio-circle09)") return() -endif(NOT TARGET mio_circle08) +endif(NOT TARGET mio_circle09) # Recipe Parser add_subdirectory(proto) diff --git a/compiler/circlechef/circle/CMakeLists.txt b/compiler/circlechef/circle/CMakeLists.txt index e50d4a64ed5..075f2f0346a 100644 --- a/compiler/circlechef/circle/CMakeLists.txt +++ b/compiler/circlechef/circle/CMakeLists.txt @@ -4,7 +4,7 @@ add_library(circlechef_circle STATIC ${SOURCES}) target_include_directories(circlechef_circle PUBLIC include) target_include_directories(circlechef_circle PRIVATE src) target_link_libraries(circlechef_circle circlechef_proto) -target_link_libraries(circlechef_circle mio_circle08) -target_link_libraries(circlechef_circle mio_circle08_helper) +target_link_libraries(circlechef_circle mio_circle09) +target_link_libraries(circlechef_circle mio_circle09_helper) target_link_libraries(circlechef_circle cwrap) target_link_libraries(circlechef_circle souschef) diff --git a/compiler/circlechef/core/CMakeLists.txt b/compiler/circlechef/core/CMakeLists.txt index 073abbdfc4f..48189fc65aa 100644 --- a/compiler/circlechef/core/CMakeLists.txt +++ b/compiler/circlechef/core/CMakeLists.txt @@ -7,7 +7,7 @@ target_include_directories(circlechef_core PUBLIC include) target_include_directories(circlechef_core PRIVATE src) target_link_libraries(circlechef_core PUBLIC circlechef_proto) target_link_libraries(circlechef_core PUBLIC circlechef_log) -target_link_libraries(circlechef_core PUBLIC mio_circle08) +target_link_libraries(circlechef_core PUBLIC mio_circle09) target_link_libraries(circlechef_core PUBLIC souschef) target_link_libraries(circlechef_core PRIVATE nncc_coverage) diff --git a/compiler/circlechef/requires.cmake b/compiler/circlechef/requires.cmake index 77bfddc97ba..8e7f3524fa7 100644 --- a/compiler/circlechef/requires.cmake +++ b/compiler/circlechef/requires.cmake @@ -1,6 +1,6 @@ require("arser") require("cwrap") -require("mio-circle08") +require("mio-circle09") require("safemain") require("hermes") require("hermes-std") diff --git a/compiler/circledump/CMakeLists.txt b/compiler/circledump/CMakeLists.txt index 9945ba0f0e0..9a497df00d8 100644 --- a/compiler/circledump/CMakeLists.txt +++ b/compiler/circledump/CMakeLists.txt @@ -1,7 +1,7 @@ -if(NOT TARGET mio_circle08) - message(STATUS "Skip circledump: mio_circle08 not found") +if(NOT TARGET mio_circle09) + message(STATUS "Skip circledump: mio_circle09 not found") return() -endif(NOT TARGET mio_circle08) +endif(NOT TARGET mio_circle09) set(DRIVER "driver/Driver.cpp") @@ -11,8 +11,8 @@ add_executable(circledump ${DRIVER} ${SOURCES}) target_include_directories(circledump PRIVATE include) target_link_libraries(circledump arser) target_link_libraries(circledump foder) -target_link_libraries(circledump mio_circle08) -target_link_libraries(circledump mio_circle08_helper) +target_link_libraries(circledump mio_circle09) +target_link_libraries(circledump mio_circle09_helper) target_link_libraries(circledump safemain) install(TARGETS circledump DESTINATION bin) diff --git a/compiler/circledump/README.md b/compiler/circledump/README.md index 9fa2653006c..972c4432146 100644 --- a/compiler/circledump/README.md +++ b/compiler/circledump/README.md @@ -65,6 +65,6 @@ O T(3) ofm ### Dependency -- mio-circle08 +- mio-circle09 - safemain - FlatBuffers diff --git a/compiler/circledump/requires.cmake b/compiler/circledump/requires.cmake index 8a57c8f1123..b157872cee0 100644 --- a/compiler/circledump/requires.cmake +++ b/compiler/circledump/requires.cmake @@ -1,4 +1,4 @@ require("arser") require("foder") -require("mio-circle08") +require("mio-circle09") require("safemain") diff --git a/compiler/luci-interpreter/include/luci_interpreter/core/Tensor.h b/compiler/luci-interpreter/include/luci_interpreter/core/Tensor.h index f118ee22c24..91ca85380c4 100644 --- a/compiler/luci-interpreter/include/luci_interpreter/core/Tensor.h +++ b/compiler/luci-interpreter/include/luci_interpreter/core/Tensor.h @@ -18,6 +18,7 @@ #define LUCI_INTERPRETER_CORE_TENSOR_H #include "luci_interpreter/core/DataType.h" +#include #include #include @@ -146,6 +147,8 @@ class Tensor void resize(const Shape &new_shape); + void resize(const Shape &new_shape, size_t raw_size); + void set_data_buffer(uint8_t *buffer) { if (buffer == nullptr) @@ -173,11 +176,21 @@ class Tensor void set_offset(int32_t offset) { _offset = offset; } + luci::CompressionType get_compression() const { return _compression; } + + void set_compression(luci::CompressionType compression) { _compression = compression; } + + size_t get_raw_size(void) const { return _raw_size; } + void set_raw_size(size_t size) { _raw_size = size; } + private: DataType _element_type; Shape _shape; AffineQuantization _quantization; uint8_t *_data = nullptr; + // Used for compressed/sparsed tensors when size != WxHxLxD + size_t _raw_size{0}; + std::string _name; bool _data_allocated = false; // Write of tensor is reported to registered Observers only if this tensor is observable @@ -190,6 +203,8 @@ class Tensor // Used by static memory manager. // Stores the offset from the beginning of the allocated memory buffer. int32_t _offset = -1; + + luci::CompressionType _compression{luci::CompressionType::NONE}; }; } // namespace luci_interpreter diff --git a/compiler/luci-interpreter/pal/linux/HuffmanDecoder.h b/compiler/luci-interpreter/pal/linux/HuffmanDecoder.h new file mode 100644 index 00000000000..6a8dd712b7c --- /dev/null +++ b/compiler/luci-interpreter/pal/linux/HuffmanDecoder.h @@ -0,0 +1,357 @@ +/* + * Copyright (c) 2024 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __LUCI_INTERPRETER_PAL_HUFFMAN_DECODER_H__ +#define __LUCI_INTERPRETER_PAL_HUFFMAN_DECODER_H__ + +#include +#include +#include +#include +#include +#include +#include +#include + +namespace luci_interpreter_pal +{ + +namespace huffman +{ +template struct Node +{ + Node *p_left = nullptr; + Node *p_right = nullptr; + T data; + unsigned int freq; +}; + +template class HuffmanDecoder +{ +private: + Node *root = nullptr; + std::unordered_map huffmanCode; + std::vector encoded_bitset{}; + std::size_t nodes_count = 0; + +private: + Node *allocateNode(T data, unsigned int freq, Node *p_left, Node *p_right) + { + Node *node = new Node; + node->data = data; + node->freq = freq; + node->p_left = p_left; + node->p_right = p_right; + nodes_count++; + return node; + } + + std::string exportHuffmanTreeToString(Node *node) + { + if (node == nullptr) + return ""; + if (!node->p_left && !node->p_right) + { + return "0" + std::bitset(node->data).to_string(); + } + std::string tmp = "1"; + tmp += exportHuffmanTreeToString(node->p_left); + tmp += exportHuffmanTreeToString(node->p_right); + return tmp; + } + + Node *importHuffmanTreeFromBoolVec(std::vector &vec, size_t &index) + { + if (vec.empty()) + return nullptr; + if (vec[index]) + { + index++; + Node *p_left = importHuffmanTreeFromBoolVec(vec, index); + Node *p_right = importHuffmanTreeFromBoolVec(vec, index); + return allocateNode(0, 0, p_left, p_right); + } + else if (vec[index] == false) + { + index++; + T tmp = 0; + for (size_t i = 0; i < sizeof(T) * CHAR_BIT; ++i) + { + if (vec[index++]) + tmp |= (1 << (sizeof(T) * CHAR_BIT - 1)) >> i; + } + + return allocateNode(tmp, 0, nullptr, nullptr); + } + return nullptr; + } + + Node *importHuffmanTreeFromString(std::string &str) + { + + if (str.substr(0, 1) == "1") + { + str = str.substr(1); + Node *p_left = importHuffmanTreeFromString(str); + Node *p_right = importHuffmanTreeFromString(str); + return allocateNode(0, 0, p_left, p_right); + } + else if (str.substr(0, 1) == "0") + { + str = str.substr(1); + std::bitset tmp(str.substr(0, sizeof(T) * CHAR_BIT)); + str = str.substr(sizeof(T) * CHAR_BIT); + return allocateNode(static_cast(tmp.to_ullong()), 0, nullptr, nullptr); + } + } + + void buildHuffmanTable(Node *node, const std::string str = "") + { + if (node == nullptr) + return; + + if (!node->p_left && !node->p_right) + { + huffmanCode[node->data] = str; + } + + buildHuffmanTable(node->p_left, str + "0"); + buildHuffmanTable(node->p_right, str + "1"); + } + + void decode(Node *node, std::string &str, std::vector &out_vec, size_t &index) + { + if (node == nullptr) + { + return; + } + + if (!node->p_left && !node->p_right) + { + out_vec.push_back(node->data); + return; + } + + if (str.size() == index) + return; + if (str[index] == '0') + { + decode(node->p_left, str, out_vec, ++index); + } + else + { + decode(node->p_right, str, out_vec, ++index); + } + } + + struct EncodedTreeAndData + { + std::vector tree_vec{}; + std::vector data_vec{}; + }; + + EncodedTreeAndData unpackArrayToEncodedTreeAndData(const uint8_t *pack_ptr) + { + constexpr auto kTreeSizeBytesN = sizeof(size_t); + constexpr auto kDataSizeBytesN = sizeof(size_t); + + const std::bitset tree_size_bitset( + *static_cast(static_cast(pack_ptr))); + const std::bitset data_size_bitset( + *static_cast(static_cast(pack_ptr + kTreeSizeBytesN))); + + const size_t kTreeSizeInBits = static_cast(tree_size_bitset.to_ullong()); + const size_t kDataSizeInBits = static_cast(data_size_bitset.to_ullong()); + + auto start_pos = kTreeSizeBytesN + kDataSizeBytesN; + EncodedTreeAndData tree_and_data; + + const auto kTreeSizeInBytes = + kTreeSizeInBits % CHAR_BIT ? kTreeSizeInBits / CHAR_BIT + 1 : kTreeSizeInBits / CHAR_BIT; + + for (size_t i = 0; i < kTreeSizeInBytes; ++i) + { + const auto kNumOfBits = + kTreeSizeInBits - i * CHAR_BIT < CHAR_BIT ? kTreeSizeInBits - i * CHAR_BIT : CHAR_BIT; + for (size_t j = 0; j < kNumOfBits; ++j) + { + if (*(pack_ptr + start_pos + i) & ((1 << 7) >> j)) + tree_and_data.tree_vec.push_back(true); + else + tree_and_data.tree_vec.push_back(false); + } + } + const auto kDataSizeInBytes = + kDataSizeInBits % CHAR_BIT ? kDataSizeInBits / CHAR_BIT + 1 : kDataSizeInBits / CHAR_BIT; + const auto kOffsetInBits = kTreeSizeInBits % CHAR_BIT; + start_pos += kOffsetInBits ? kTreeSizeInBytes - 1 : kTreeSizeInBytes; + + for (size_t i = 0; i < kDataSizeInBytes; ++i) + { + const auto kNumOfBits = + kDataSizeInBits - i * CHAR_BIT < CHAR_BIT ? kDataSizeInBits - i * CHAR_BIT : CHAR_BIT; + const auto kBitsInFirstByteToRead = + kNumOfBits < CHAR_BIT - kOffsetInBits ? kNumOfBits : CHAR_BIT - kOffsetInBits; + for (size_t j = kOffsetInBits; j < kOffsetInBits + kBitsInFirstByteToRead; ++j) + { + + if (*(pack_ptr + start_pos + i) & ((1 << 7) >> j)) + tree_and_data.data_vec.push_back(true); + else + tree_and_data.data_vec.push_back(false); + } + if (kNumOfBits < CHAR_BIT - kOffsetInBits) + break; + const auto kBitsLeft = kNumOfBits - (CHAR_BIT - kOffsetInBits) < kOffsetInBits + ? kNumOfBits - (CHAR_BIT - kOffsetInBits) + : kOffsetInBits; + for (size_t j = 0; j < kBitsLeft; ++j) + { + + if (*(pack_ptr + start_pos + i + 1) & ((1 << 7) >> j)) + tree_and_data.data_vec.push_back(true); + else + tree_and_data.data_vec.push_back(false); + } + } + return tree_and_data; + } + + EncodedTreeAndData unpackArrayToEncodedTreeAndData(const std::vector &packed_vec) + { + constexpr auto kTreeSizeBytesN = sizeof(size_t); + constexpr auto kDataSizeBytesN = sizeof(size_t); + const uint8_t *pack_ptr = packed_vec.data(); + const std::bitset tree_size_bitset( + *static_cast(static_cast(pack_ptr))); + const std::bitset data_size_bitset( + *static_cast(static_cast(pack_ptr + kTreeSizeBytesN))); + + const size_t kTreeSizeInBits = static_cast(tree_size_bitset.to_ullong()); + const size_t kDataSizeInBits = static_cast(data_size_bitset.to_ullong()); + + auto start_pos = kTreeSizeBytesN + kDataSizeBytesN; + EncodedTreeAndData tree_and_data; + + const auto kTreeSizeInBytes = + kTreeSizeInBits % CHAR_BIT ? kTreeSizeInBits / CHAR_BIT + 1 : kTreeSizeInBits / CHAR_BIT; + + for (size_t i = 0; i < kTreeSizeInBytes; ++i) + { + const auto kNumOfBits = + kTreeSizeInBits - i * CHAR_BIT < CHAR_BIT ? kTreeSizeInBits - i * CHAR_BIT : CHAR_BIT; + for (size_t j = 0; j < kNumOfBits; ++j) + { + if (*(pack_ptr + start_pos + i) & ((1 << 7) >> j)) + tree_and_data.data_vec.push_back(true); + else + tree_and_data.data_vec.push_back(false); + } + } + const auto kDataSizeInBytes = + kDataSizeInBits % CHAR_BIT ? kDataSizeInBits / CHAR_BIT + 1 : kDataSizeInBits / CHAR_BIT; + const auto kOffsetInBits = kTreeSizeInBits % CHAR_BIT; + start_pos += kOffsetInBits ? kTreeSizeInBytes - 1 : kTreeSizeInBytes; + + for (size_t i = 0; i < kDataSizeInBytes; ++i) + { + const auto kNumOfBits = + kDataSizeInBits - i * CHAR_BIT < CHAR_BIT ? kDataSizeInBits - i * CHAR_BIT : CHAR_BIT; + const auto kBitsInFirstByteToRead = + kNumOfBits < CHAR_BIT - kOffsetInBits ? kNumOfBits : CHAR_BIT - kOffsetInBits; + for (size_t j = kOffsetInBits; j < kOffsetInBits + kBitsInFirstByteToRead; ++j) + { + + if (*(pack_ptr + start_pos + i) & ((1 << 7) >> j)) + tree_and_data.data_vec.push_back(true); + else + tree_and_data.data_vec.push_back(false); + } + if (kNumOfBits < CHAR_BIT - kOffsetInBits) + break; + const auto kBitsLeft = kNumOfBits - (CHAR_BIT - kOffsetInBits) < kOffsetInBits + ? kNumOfBits - (CHAR_BIT - kOffsetInBits) + : kOffsetInBits; + for (size_t j = 0; j < kBitsLeft; ++j) + { + + if (*(pack_ptr + start_pos + i + 1) & ((1 << 7) >> j)) + tree_and_data.data_vec.push_back(true); + else + tree_and_data.data_vec.push_back(false); + } + } + return tree_and_data; + } + +public: + void decode(Node *node, std::vector &vec, T *dst_ptr) + { + if (node == nullptr) + { + return; + } + + if (!node->p_left && !node->p_right) + { + *dst_ptr = node->data; + return; + } + + if (vec.size() == _decode_idx) + return; + if (vec[_decode_idx] == false) + { + ++_decode_idx; + decode(node->p_left, vec, dst_ptr); + } + else + { + ++_decode_idx; + decode(node->p_right, vec, dst_ptr); + } + } + +private: + size_t _decode_idx = 0; + EncodedTreeAndData _encoded_tree_and_data; + +public: + void init_decoder(const uint8_t *input) + { + size_t index = 0; + _encoded_tree_and_data = unpackArrayToEncodedTreeAndData(input); + root = importHuffmanTreeFromBoolVec(_encoded_tree_and_data.tree_vec, index); + } + + void reset_decode_idx(void) { _decode_idx = 0; } + + int decode_n(uint8_t *dst_ptr, size_t num) + { + size_t bytes_decoded = 0; + for (size_t i = 0; i < num && _decode_idx < _encoded_tree_and_data.data_vec.size(); ++i) + { + decode(root, _encoded_tree_and_data.data_vec, dst_ptr + bytes_decoded); + bytes_decoded++; + } + return bytes_decoded; + } + + HuffmanDecoder() = default; +}; +} // namespace huffman +} // namespace luci_interpreter_pal +#endif // __LUCI_INTERPRETER_PAL_HUFFMAN_DECODER_H__ diff --git a/compiler/luci-interpreter/pal/linux/PALConv2d.h b/compiler/luci-interpreter/pal/linux/PALConv2d.h index 0ce83fc6e35..4d0f3a37774 100644 --- a/compiler/luci-interpreter/pal/linux/PALConv2d.h +++ b/compiler/luci-interpreter/pal/linux/PALConv2d.h @@ -19,6 +19,7 @@ #include #include +#include "HuffmanDecoder.h" namespace luci_interpreter_pal { @@ -84,6 +85,135 @@ static inline void Conv(const tflite::ConvParams ¶ms, const tflite::RuntimeS scratchpad_data, gemmlowp_context.get()); } +template +void ConvPerChannelHuffman(const tflite::ConvParams ¶ms, const int32_t *mult, + const int32_t *shifts, const tflite::RuntimeShape &input_shape, + const T *input_data, const tflite::RuntimeShape &filter_shape, + const T *filter_data, const tflite::RuntimeShape &bias_shape, + const int32 *bias_data, const tflite::RuntimeShape &output_shape, + T *output_data, const tflite::RuntimeShape &scratchpad_shape, + T *scratchpad_data) +{ + (void)scratchpad_shape; + (void)scratchpad_data; + // Get parameters. + const int32_t input_offset = params.input_offset; // r = s(q - Z) + const int stride_width = params.stride_width; + const int stride_height = params.stride_height; + const int dilation_width_factor = params.dilation_width_factor; + const int dilation_height_factor = params.dilation_height_factor; + const int pad_width = params.padding_values.width; + const int pad_height = params.padding_values.height; + const int32_t output_offset = params.output_offset; + const int32_t filter_offset = params.weights_offset; + + // Set min and max value of the output. + const int32_t output_activation_min = params.quantized_activation_min; + const int32_t output_activation_max = params.quantized_activation_max; + + // Consistency check. + TFLITE_DCHECK_LE(output_activation_min, output_activation_max); + TFLITE_DCHECK_EQ(input_shape.DimensionsCount(), 4); + TFLITE_DCHECK_EQ(filter_shape.DimensionsCount(), 4); + TFLITE_DCHECK_EQ(output_shape.DimensionsCount(), 4); + const int batches = MatchingDim(input_shape, 0, output_shape, 0); + const int input_depth = input_shape.Dims(3); + const int output_depth = MatchingDim(filter_shape, 0, output_shape, 3); + if (bias_data) + { + TFLITE_DCHECK_EQ(bias_shape.FlatSize(), output_depth); + } + + // Check dimensions of the tensors. + const int input_height = input_shape.Dims(1); + const int input_width = input_shape.Dims(2); + const int filter_height = filter_shape.Dims(1); + const int filter_width = filter_shape.Dims(2); + const int filter_input_depth = filter_shape.Dims(3); + const int groups = input_depth / filter_input_depth; + TFLITE_DCHECK_EQ(input_depth % filter_input_depth, 0); + const int filters_per_group = output_depth / groups; + const int output_height = output_shape.Dims(1); + const int output_width = output_shape.Dims(2); + + huffman::HuffmanDecoder decoder; + decoder.init_decoder(reinterpret_cast(filter_data)); + decoder.reset_decode_idx(); + for (int out_channel = 0; out_channel < output_depth; ++out_channel) + { + auto group = out_channel / filters_per_group; + + // extract compressed filter + decoder.decode_n(reinterpret_cast(&scratchpad_data[0]), scratchpad_shape.FlatSize()); + + for (int batch = 0; batch < batches; ++batch) + { + for (int out_y = 0; out_y < output_height; ++out_y) + { + const int in_y_origin = (out_y * stride_height) - pad_height; + for (int out_x = 0; out_x < output_width; ++out_x) + { + const int in_x_origin = (out_x * stride_width) - pad_width; + int32_t acc = 0; + + for (int in_channel = 0; in_channel < filter_input_depth; ++in_channel) + { + for (int filter_y = 0; filter_y < filter_height; ++filter_y) + { + const int in_y = in_y_origin + dilation_height_factor * filter_y; + for (int filter_x = 0; filter_x < filter_width; ++filter_x) + { + const int in_x = in_x_origin + dilation_width_factor * filter_x; + + // Zero padding by omitting the areas outside the image. + const bool is_point_inside_image = + (in_x >= 0) && (in_x < input_width) && (in_y >= 0) && (in_y < input_height); + + if (!is_point_inside_image) + { + continue; + } + + int32_t input_val = input_data[Offset(input_shape, batch, in_y, in_x, + in_channel + group * filter_input_depth)]; + int32_t filter_val = + scratchpad_data[(filter_y * filter_height + filter_x) * filter_width + + in_channel]; + // Accumulate with 32 bits accumulator. + // In the nudging process during model quantization, we force + // real value of 0.0 be represented by a quantized value. This + // guarantees that the input_offset is a int8_t, even though + // it is represented using int32_t. int32_t += int8_t * + // (int8_t - int8_t) so the highest value we can get from each + // accumulation is [-127, 127] * ([-128, 127] - + // [-128, 127]), which is [-32512, 32512]. log2(32512) + // = 14.98, which means we can accumulate at least 2^16 + // multiplications without overflow. The accumulator is + // applied to a filter so the accumulation logic will hold as + // long as the filter size (filter_y * filter_x * in_channel) + // does not exceed 2^16, which is the case in all the models + // we have seen so far. + // accumulator depth is smaller than 2^16. + acc += (filter_val + filter_offset) * (input_val + input_offset); + } + } + } + + if (bias_data) + { + acc += bias_data[out_channel]; + } + acc = tflite::MultiplyByQuantizedMultiplier(acc, mult[out_channel], shifts[out_channel]); + acc += output_offset; + acc = std::max(acc, output_activation_min); + acc = std::min(acc, output_activation_max); + output_data[Offset(output_shape, batch, out_y, out_x, out_channel)] = static_cast(acc); + } + } + } + } +} + static inline void ConvPerChannel(const tflite::ConvParams ¶ms, const int32_t *mult, const int32_t *shifts, const tflite::RuntimeShape &input_shape, const int8 *input_data, const tflite::RuntimeShape &filter_shape, @@ -105,7 +235,8 @@ static inline void SetupScratchpadTensor(luci_interpreter::Tensor *scratchpad, const tflite::ConvParams ¶ms, const tflite::RuntimeShape &input_shape, const tflite::RuntimeShape &filter_shape, - const tflite::RuntimeShape &output_shape) + const tflite::RuntimeShape &output_shape, + bool is_compressed = false) { const int32_t filter_height = filter_shape.Dims(1); const int32_t filter_width = filter_shape.Dims(2); @@ -117,7 +248,7 @@ static inline void SetupScratchpadTensor(luci_interpreter::Tensor *scratchpad, const bool need_non_dilated_scratchpad = params.stride_height != 1 || params.stride_width != 1 || filter_height != 1 || filter_width != 1; auto _need_scratchpad = input_data_type != luci_interpreter::DataType::S16 && - (need_dilated_scratchpad || need_non_dilated_scratchpad); + (need_dilated_scratchpad || need_non_dilated_scratchpad || is_compressed); if (_need_scratchpad) { diff --git a/compiler/luci-interpreter/src/SimpleMemoryManager.cpp b/compiler/luci-interpreter/src/SimpleMemoryManager.cpp index a39c34a0ad8..bf13b0cc9a8 100644 --- a/compiler/luci-interpreter/src/SimpleMemoryManager.cpp +++ b/compiler/luci-interpreter/src/SimpleMemoryManager.cpp @@ -29,12 +29,21 @@ void SimpleMemoryManager::allocate_memory(luci_interpreter::Tensor &tensor) { release_memory(tensor); } - const auto element_size = getDataTypeSize(tensor.element_type()); + size_t bytes_to_allocate = 0; + if (tensor.get_raw_size() > 0) + { + bytes_to_allocate = tensor.get_raw_size(); + } + else + { + const auto element_size = getDataTypeSize(tensor.element_type()); - // Use large_num_elements to avoid overflow - const auto num_elements = tensor.shape().large_num_elements(); + // Use large_num_elements to avoid overflow + const auto num_elements = tensor.shape().large_num_elements(); + bytes_to_allocate = num_elements * element_size; + } - auto *data = new uint8_t[num_elements * element_size]; + auto *data = new uint8_t[bytes_to_allocate]; tensor.set_data_buffer(data); } diff --git a/compiler/luci-interpreter/src/core/Tensor.cpp b/compiler/luci-interpreter/src/core/Tensor.cpp index 3c3c5ffffe8..b7769174e23 100644 --- a/compiler/luci-interpreter/src/core/Tensor.cpp +++ b/compiler/luci-interpreter/src/core/Tensor.cpp @@ -45,14 +45,34 @@ void Tensor::writeData(const void *data_ptr, size_t data_size) { const size_t element_size = getDataTypeSize(element_type()); const int32_t num_elements = shape().num_elements(); - if (data_size != num_elements * element_size) + if (_raw_size > 0) { - throw std::invalid_argument("Invalid data size."); + if (data_size != _raw_size) + { + throw std::invalid_argument("Invalid data size."); + } + } + else + { + if (data_size != num_elements * element_size) + { + throw std::invalid_argument("Invalid data size."); + } } assert(data_ptr != nullptr); std::memcpy(data(), data_ptr, data_size); } -void Tensor::resize(const Shape &new_shape) { _shape = new_shape; } +void Tensor::resize(const Shape &new_shape) +{ + _shape = new_shape; + _raw_size = 0; +} + +void Tensor::resize(const Shape &new_shape, size_t raw_size) +{ + _shape = new_shape; + _raw_size = raw_size; +} } // namespace luci_interpreter diff --git a/compiler/luci-interpreter/src/kernels/Conv2D.cpp b/compiler/luci-interpreter/src/kernels/Conv2D.cpp index 9aae9da2644..a5377408adc 100644 --- a/compiler/luci-interpreter/src/kernels/Conv2D.cpp +++ b/compiler/luci-interpreter/src/kernels/Conv2D.cpp @@ -117,9 +117,10 @@ void Conv2D::configure() params.dilation_height_factor = _params.dilation_height_factor; params.dilation_width_factor = _params.dilation_width_factor; auto scratchpad = getOutputTensors()[1]; + bool is_compressed = filter()->get_compression() != luci::CompressionType::NONE; luci_interpreter_pal::SetupScratchpadTensor(scratchpad, input()->element_type(), params, getTensorShape(input()), getTensorShape(filter()), - getTensorShape(output())); + getTensorShape(output()), is_compressed); switch (_params.activation) { @@ -145,20 +146,34 @@ void Conv2D::execute() const } throw std::runtime_error("luci-intp Conv2D(2) Unsupported type."); case DataType::U8: - if (filter()->scales().size() == 1) + if (filter()->get_compression() == luci::CompressionType::HUFFMAN) { - evalQuantized(); + evalQuantizedU8PerChannelHuffman(); } - else if (filter()->scales().size() > 1) + else { - LUCI_INTERPRETER_CHECK(filter()->shape().num_dims() == 4); - LUCI_INTERPRETER_CHECK(filter()->scales().size() == - static_cast(filter()->shape().dim(0))); - evalQuantizedPerChannel(); + if (filter()->scales().size() == 1) + { + evalQuantized(); + } + else if (filter()->scales().size() > 1) + { + LUCI_INTERPRETER_CHECK(filter()->shape().num_dims() == 4); + LUCI_INTERPRETER_CHECK(filter()->scales().size() == + static_cast(filter()->shape().dim(0))); + evalQuantizedPerChannel(); + } } break; case DataType::S8: - evalQuantizedS8PerChannel(); + if (filter()->get_compression() == luci::CompressionType::HUFFMAN) + { + evalQuantizedS8PerChannelHuffman(); + } + else + { + evalQuantizedS8PerChannel(); + } break; case DataType::S16: evalQuantizedS16(); @@ -321,6 +336,120 @@ void Conv2D::evalQuantizedPerChannel() const } } +// TODO: remove code duplication with S8 +void Conv2D::evalQuantizedU8PerChannelHuffman() const +{ + int32_t activation_min{}; + int32_t activation_max{}; + calculateActivationRangeQuantized(_params.activation, output(), &activation_min, &activation_max); + + tflite::ConvParams params{}; + params.padding_values.height = _padding_height; + params.padding_values.width = _padding_width; + params.stride_height = _params.stride_height; + params.stride_width = _params.stride_width; + params.dilation_height_factor = _params.dilation_height_factor; + params.dilation_width_factor = _params.dilation_width_factor; + // The kernel expects filter zero points to be negated. + params.input_offset = -input()->zero_point(); // Note the '-'. + params.weights_offset = -filter()->zero_point(); // Unused in tflite code + params.output_offset = output()->zero_point(); + params.quantized_activation_min = activation_min; + params.quantized_activation_max = activation_max; + + const std::vector effective_output_scales = + getQuantizedConvolutionMultiplers(input()->scale(), filter()->scales(), output()->scale()); + + std::vector quant_multipliers = + quantizeMultipliers(effective_output_scales); + + std::vector shifts; + std::transform(quant_multipliers.begin(), quant_multipliers.end(), std::back_inserter(shifts), + [](ChannelQuantMultipliers cm) { return cm.shift; }); + std::vector multipliers; + std::transform(quant_multipliers.begin(), quant_multipliers.end(), + std::back_inserter(multipliers), + [](ChannelQuantMultipliers cm) { return cm.multiplier; }); + + auto scratchpad = getOutputTensors()[1]; + uint8_t *scratchpad_data = nullptr; + + // Scratchpad used for decompression + const auto filter_shape = getTensorShape(filter()); + const int filter_height = filter_shape.Dims(1); + const int filter_width = filter_shape.Dims(2); + const int filter_input_depth = filter_shape.Dims(3); + auto scratchpad_shape = Shape({filter_height, filter_width, filter_input_depth}); + + if (scratchpad->is_allocatable()) + { + scratchpad->resize(scratchpad_shape); + scratchpad_data = scratchpad->data(); + } + luci_interpreter_pal::ConvPerChannelHuffman( + params, multipliers.data(), shifts.data(), getTensorShape(input()), + getTensorData(input()), getTensorShape(filter()), getTensorData(filter()), + getTensorShape(bias()), getTensorData(bias()), getTensorShape(output()), + getTensorData(output()), getTensorShape(scratchpad), scratchpad_data); +} + +void Conv2D::evalQuantizedS8PerChannelHuffman() const +{ + int32_t activation_min{}; + int32_t activation_max{}; + calculateActivationRangeQuantized(_params.activation, output(), &activation_min, &activation_max); + + tflite::ConvParams params{}; + params.padding_values.height = _padding_height; + params.padding_values.width = _padding_width; + params.stride_height = _params.stride_height; + params.stride_width = _params.stride_width; + params.dilation_height_factor = _params.dilation_height_factor; + params.dilation_width_factor = _params.dilation_width_factor; + // The kernel expects filter zero points to be negated. + params.input_offset = -input()->zero_point(); // Note the '-'. + params.weights_offset = 0; // Unused in tflite code + params.output_offset = output()->zero_point(); + params.quantized_activation_min = activation_min; + params.quantized_activation_max = activation_max; + + const std::vector effective_output_scales = + getQuantizedConvolutionMultiplers(input()->scale(), filter()->scales(), output()->scale()); + + std::vector quant_multipliers = + quantizeMultipliers(effective_output_scales); + + std::vector shifts; + std::transform(quant_multipliers.begin(), quant_multipliers.end(), std::back_inserter(shifts), + [](ChannelQuantMultipliers cm) { return cm.shift; }); + std::vector multipliers; + std::transform(quant_multipliers.begin(), quant_multipliers.end(), + std::back_inserter(multipliers), + [](ChannelQuantMultipliers cm) { return cm.multiplier; }); + + auto scratchpad = getOutputTensors()[1]; + int8_t *scratchpad_data = nullptr; + + // Scratchpad used for decompression + const auto filter_shape = getTensorShape(filter()); + const int filter_height = filter_shape.Dims(1); + const int filter_width = filter_shape.Dims(2); + const int filter_input_depth = filter_shape.Dims(3); + auto scratchpad_shape = Shape({filter_height, filter_width, filter_input_depth}); + + if (scratchpad->is_allocatable()) + { + scratchpad->resize(scratchpad_shape); + scratchpad_data = scratchpad->data(); + } + + luci_interpreter_pal::ConvPerChannelHuffman( + params, multipliers.data(), shifts.data(), getTensorShape(input()), + getTensorData(input()), getTensorShape(filter()), getTensorData(filter()), + getTensorShape(bias()), getTensorData(bias()), getTensorShape(output()), + getTensorData(output()), getTensorShape(scratchpad), scratchpad_data); +} + void Conv2D::evalQuantizedS8PerChannel() const { int32_t activation_min{}; diff --git a/compiler/luci-interpreter/src/kernels/Conv2D.h b/compiler/luci-interpreter/src/kernels/Conv2D.h index 330bf3a2a69..096bd85f4db 100644 --- a/compiler/luci-interpreter/src/kernels/Conv2D.h +++ b/compiler/luci-interpreter/src/kernels/Conv2D.h @@ -47,6 +47,8 @@ class Conv2D : public KernelWithParams void evalQuantizedPerChannel() const; void evalQuantizedS8PerChannel() const; void evalQuantizedS16() const; + void evalQuantizedS8PerChannelHuffman() const; + void evalQuantizedU8PerChannelHuffman() const; private: int32_t _padding_height{}; diff --git a/compiler/luci-interpreter/src/kernels/Utils.h b/compiler/luci-interpreter/src/kernels/Utils.h index e975585cdf3..422c0b4d7d8 100644 --- a/compiler/luci-interpreter/src/kernels/Utils.h +++ b/compiler/luci-interpreter/src/kernels/Utils.h @@ -137,7 +137,8 @@ Shape calculateShapeForBroadcast(const Shape &input1_shape, const Shape &input2_ inline double getQuantizedConvolutionMultipler(float input_scale, float filter_scale, float output_scale) { - const double input_product_scale = static_cast(input_scale * filter_scale); + const double input_product_scale = + static_cast(static_cast(input_scale) * static_cast(filter_scale)); LUCI_INTERPRETER_CHECK(input_product_scale >= 0); return input_product_scale / static_cast(output_scale); } diff --git a/compiler/luci-interpreter/src/loader/GraphLoader.cpp b/compiler/luci-interpreter/src/loader/GraphLoader.cpp index cf83713d906..6e1399dd467 100644 --- a/compiler/luci-interpreter/src/loader/GraphLoader.cpp +++ b/compiler/luci-interpreter/src/loader/GraphLoader.cpp @@ -243,9 +243,11 @@ void GraphLoader::loadTensors() const void *const_data = getNodeData(const_node, &data_size); if (const_data != nullptr) { + tensor->set_raw_size(data_size); _memory_manager->allocate_memory(*tensor); tensor->writeData(const_data, data_size); } + tensor->set_compression(const_node->compression()); } else if (const auto *custom_out_node = dynamic_cast(node)) { @@ -258,6 +260,7 @@ void GraphLoader::loadTensors() const void *const_data = getNodeData(custom_node, &data_size); if (const_data != nullptr) { + tensor->set_raw_size(data_size); _memory_manager->allocate_memory(*tensor); tensor->writeData(const_data, data_size); } diff --git a/compiler/luci-pass-value-py-test/test.lst b/compiler/luci-pass-value-py-test/test.lst index ebf84e02660..aeec02dc132 100644 --- a/compiler/luci-pass-value-py-test/test.lst +++ b/compiler/luci-pass-value-py-test/test.lst @@ -7,6 +7,7 @@ # eval(Net_Preactivation_BN_000 fuse_preactivation_batchnorm) : value diff exist # --> https://github.com/Samsung/ONE/issues/5782 +eval(Conv2D_U8_000 compress_weights_huffman) eval(FullyConnected_007 replace_non_const_fc_with_batch_matmul) eval(HardSwish_001 decompose_hardswish) eval(Net_Add_FloorMod_Gather_000 remove_gather_guard) diff --git a/compiler/luci/export/CMakeLists.txt b/compiler/luci/export/CMakeLists.txt index bc10ad24cba..4c21faa10b3 100644 --- a/compiler/luci/export/CMakeLists.txt +++ b/compiler/luci/export/CMakeLists.txt @@ -12,7 +12,7 @@ target_include_directories(luci_export PUBLIC include) target_link_libraries(luci_export PRIVATE luci_lang) target_link_libraries(luci_export PRIVATE luci_service) target_link_libraries(luci_export PRIVATE luci_pass) -target_link_libraries(luci_export PRIVATE mio_circle08) +target_link_libraries(luci_export PRIVATE mio_circle09) target_link_libraries(luci_export PRIVATE luci_env) target_link_libraries(luci_export PRIVATE luci_log) target_link_libraries(luci_export PRIVATE luci_logex) @@ -36,6 +36,6 @@ target_include_directories(luci_export_test PRIVATE src) target_link_libraries(luci_export_test luci_export) target_link_libraries(luci_export_test luci_plan) target_link_libraries(luci_export_test luci_lang) -target_link_libraries(luci_export_test mio_circle08) +target_link_libraries(luci_export_test mio_circle09) target_link_libraries(luci_export_test luci_env) target_link_libraries(luci_export_test oops) diff --git a/compiler/luci/export/src/CircleExporterUtils.cpp b/compiler/luci/export/src/CircleExporterUtils.cpp index f6e380d7872..13889f17f89 100644 --- a/compiler/luci/export/src/CircleExporterUtils.cpp +++ b/compiler/luci/export/src/CircleExporterUtils.cpp @@ -25,6 +25,21 @@ namespace luci { +circle::CompressionType to_circle_compressiontype(luci::CompressionType type) +{ + switch (type) + { + case luci::CompressionType::UNDEFINED: + case luci::CompressionType::NONE: + return circle::CompressionType_NONE; + case luci::CompressionType::HUFFMAN: + return circle::CompressionType_HUFFMAN; + default: + INTERNAL_EXN_V("trying to convert unsupported luci::WeightCompression", + oops::to_uint32(type)); + } +} + circle::ActivationFunctionType to_circle_actfunc(luci::FusedActFunc func) { switch (func) diff --git a/compiler/luci/export/src/CircleExporterUtils.h b/compiler/luci/export/src/CircleExporterUtils.h index 83b040753dc..309511ebab9 100644 --- a/compiler/luci/export/src/CircleExporterUtils.h +++ b/compiler/luci/export/src/CircleExporterUtils.h @@ -29,6 +29,7 @@ namespace luci { +circle::CompressionType to_circle_compressiontype(luci::CompressionType type); circle::ActivationFunctionType to_circle_actfunc(luci::FusedActFunc func); circle::TensorType to_circle_tensortype(loco::DataType type); circle::MirrorPadMode to_circle_mirrorpadmode(luci::MirrorPadMode mode); diff --git a/compiler/luci/export/src/CircleTensorExporter.cpp b/compiler/luci/export/src/CircleTensorExporter.cpp index 57ae160bd54..bae3fbbc458 100644 --- a/compiler/luci/export/src/CircleTensorExporter.cpp +++ b/compiler/luci/export/src/CircleTensorExporter.cpp @@ -556,30 +556,48 @@ bool has_same_values(luci::CircleConst *lhs, luci::CircleConst *rhs) switch (lhs->dtype()) { case loco::DataType::FLOAT32: + if (lhs->size() != rhs->size()) + return false; return has_same_elements(lhs, rhs); case loco::DataType::S4: + if (lhs->size() != rhs->size()) + return false; return has_same_elements(lhs, rhs); case loco::DataType::S8: + if (lhs->size() != rhs->size()) + return false; return has_same_elements(lhs, rhs); case loco::DataType::S16: + if (lhs->size() != rhs->size()) + return false; return has_same_elements(lhs, rhs); case loco::DataType::S32: + if (lhs->size() != rhs->size()) + return false; return has_same_elements(lhs, rhs); case loco::DataType::S64: + if (lhs->size() != rhs->size()) + return false; return has_same_elements(lhs, rhs); case loco::DataType::U4: + if (lhs->size() != rhs->size()) + return false; return has_same_elements(lhs, rhs); case loco::DataType::U8: + if (lhs->size() != rhs->size()) + return false; return has_same_elements(lhs, rhs); case loco::DataType::BOOL: + if (lhs->size() != rhs->size()) + return false; return has_same_elements(lhs, rhs); default: @@ -646,8 +664,14 @@ void exportOpDefinedTensor(const CircleTensorInfo &info, FlatBufferBuilder &buil auto is_variable = info.is_variable(); - auto tensor_offset = CreateTensor(builder, shape_offset, info.dtype(), buffer_id, name_offset, - quantparam, is_variable, sparsityparam, shape_signature_offset); + luci::CircleConst *content = info.content(); + auto compression_type = circle::CompressionType_NONE; + if (content) + compression_type = to_circle_compressiontype(info.content()->compression()); + + auto tensor_offset = + CreateTensor(builder, shape_offset, info.dtype(), buffer_id, name_offset, quantparam, + is_variable, sparsityparam, shape_signature_offset, false, 0, compression_type); gd._tensors.push_back(tensor_offset); } diff --git a/compiler/luci/import/CMakeLists.txt b/compiler/luci/import/CMakeLists.txt index 8c1da0e7729..85f96ad9702 100644 --- a/compiler/luci/import/CMakeLists.txt +++ b/compiler/luci/import/CMakeLists.txt @@ -12,7 +12,7 @@ target_include_directories(luci_import PUBLIC include) target_link_libraries(luci_import PUBLIC luci_lang) target_link_libraries(luci_import PUBLIC luci_profile) target_link_libraries(luci_import PUBLIC luci_plan) -target_link_libraries(luci_import PUBLIC mio_circle08) +target_link_libraries(luci_import PUBLIC mio_circle09) target_link_libraries(luci_import PRIVATE luci_env) target_link_libraries(luci_import PRIVATE luci_log) target_link_libraries(luci_import PRIVATE luci_logex) @@ -20,7 +20,7 @@ target_link_libraries(luci_import PRIVATE nncc_common) target_link_libraries(luci_import PRIVATE locop) target_link_libraries(luci_import PRIVATE foder) target_link_libraries(luci_import PRIVATE oops) -target_link_libraries(luci_import PRIVATE mio_circle08_helper) +target_link_libraries(luci_import PRIVATE mio_circle09_helper) install(TARGETS luci_import DESTINATION lib) install(DIRECTORY include/ DESTINATION include FILES_MATCHING PATTERN "*.h") diff --git a/compiler/luci/import/include/luci/Import/CircleImporterUtils.h b/compiler/luci/import/include/luci/Import/CircleImporterUtils.h new file mode 100644 index 00000000000..f96ec210747 --- /dev/null +++ b/compiler/luci/import/include/luci/Import/CircleImporterUtils.h @@ -0,0 +1,33 @@ +/* + * Copyright (c) 2024 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __CIRCLE_IMPORTER_UTILS_H__ +#define __CIRCLE_IMPORTER_UTILS_H__ + +#include + +#include + +#include + +namespace luci +{ + +luci::CompressionType from_circle_compressiontype(circle::CompressionType type); + +} // namespace luci + +#endif // __CIRCLE_IMPORTER_UTILS_H__ diff --git a/compiler/luci/import/src/CircleImporterUtils.cpp b/compiler/luci/import/src/CircleImporterUtils.cpp new file mode 100644 index 00000000000..7e93799bbd6 --- /dev/null +++ b/compiler/luci/import/src/CircleImporterUtils.cpp @@ -0,0 +1,36 @@ +/* + * Copyright (c) 2024 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "luci/Import/CircleImporterUtils.h" + +#include + +namespace luci +{ +luci::CompressionType from_circle_compressiontype(circle::CompressionType type) +{ + switch (type) + { + case circle::CompressionType_NONE: + return luci::CompressionType::NONE; + case circle::CompressionType_HUFFMAN: + return luci::CompressionType::HUFFMAN; + default: + INTERNAL_EXN_V("trying to convert unsupported luci::WeightCompression", + oops::to_uint32(type)); + } +} +} // namespace luci diff --git a/compiler/luci/import/src/CircleReader.cpp b/compiler/luci/import/src/CircleReader.cpp index 392f0c2a5b9..ccf3e0f7e5b 100644 --- a/compiler/luci/import/src/CircleReader.cpp +++ b/compiler/luci/import/src/CircleReader.cpp @@ -15,6 +15,7 @@ */ #include "luci/Import/CircleReader.h" +#include #include @@ -289,6 +290,11 @@ void copy_tensor_attributes(const circle::Tensor *tensor, CircleNode *node) if (sparsityparam) node->sparsityparam(std::move(sparsityparam)); } + auto const_node = dynamic_cast(node); + if (const_node) + { + const_node->compression(luci::from_circle_compressiontype(tensor->compression_type())); + } } std::string fb_string2std_string(const flatbuffers::String *fb_str) diff --git a/compiler/luci/import/src/Nodes/CircleConst.cpp b/compiler/luci/import/src/Nodes/CircleConst.cpp index 189f4d897f4..77121a8b339 100644 --- a/compiler/luci/import/src/Nodes/CircleConst.cpp +++ b/compiler/luci/import/src/Nodes/CircleConst.cpp @@ -52,7 +52,7 @@ void copy_data(const VectorWrapper &raw_data, uint32_t num_elements, using T = typename loco::DataTypeImpl
::Type; // TODO calculate the exact buffer size of sparse tensor - if (const_node->sparsityparam()) + if (const_node->sparsityparam() or const_node->compression() != luci::CompressionType::NONE) { num_elements = raw_data.size() / sizeof(T); } diff --git a/compiler/luci/import/src/Nodes/CircleConv2D.cpp b/compiler/luci/import/src/Nodes/CircleConv2D.cpp index 8cbecdc003b..a4ab6221b74 100644 --- a/compiler/luci/import/src/Nodes/CircleConv2D.cpp +++ b/compiler/luci/import/src/Nodes/CircleConv2D.cpp @@ -14,6 +14,7 @@ * limitations under the License. */ +#include "luci/Import/CircleImporterUtils.h" #include "luci/Import/Nodes/CircleConv2D.h" #include diff --git a/compiler/luci/lang/include/luci/IR/AttrWeightCompression.h b/compiler/luci/lang/include/luci/IR/AttrWeightCompression.h new file mode 100644 index 00000000000..e1a83b01908 --- /dev/null +++ b/compiler/luci/lang/include/luci/IR/AttrWeightCompression.h @@ -0,0 +1,33 @@ +/* + * Copyright (c) 2024 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __LUCI_IR_ATTRWEIGHTCOMPRESSION_H__ +#define __LUCI_IR_ATTRWEIGHTCOMPRESSION_H__ + +namespace luci +{ + +enum class CompressionType +{ + UNDEFINED, // This is not defined by TFLite or Circle. This was added to + // prevent programming error. + NONE, + HUFFMAN +}; + +} // namespace luci + +#endif // __LUCI_IR_ATTRWEIGHTCOMPRESSION_H__ diff --git a/compiler/luci/lang/include/luci/IR/Nodes/CircleConst.h b/compiler/luci/lang/include/luci/IR/Nodes/CircleConst.h index 3e9a274e0cd..bdf7631c886 100644 --- a/compiler/luci/lang/include/luci/IR/Nodes/CircleConst.h +++ b/compiler/luci/lang/include/luci/IR/Nodes/CircleConst.h @@ -17,6 +17,7 @@ #ifndef __LUCI_IR_CIRCLECONST_H__ #define __LUCI_IR_CIRCLECONST_H__ +#include "luci/IR/AttrWeightCompression.h" #include "luci/IR/CircleNodeDecl.h" #include "luci/IR/CircleOpcode.h" @@ -34,6 +35,9 @@ namespace luci class CircleConst final : public FixedArityNode<0, CircleNodeImpl> { public: + CompressionType compression(void) const; + void compression(CompressionType c); + template uint32_t size(void) const; template void size(uint32_t size); template const typename loco::DataTypeImpl
::Type &at(uint32_t n) const; @@ -46,6 +50,7 @@ class CircleConst final : public FixedArityNode<0, CircleNodeImpl _data; // TODO use _data for STRING and remove _strings std::vector _strings; // for STRING type + CompressionType _compression{CompressionType::NONE}; }; } // namespace luci diff --git a/compiler/luci/lang/src/Nodes/CircleConst.cpp b/compiler/luci/lang/src/Nodes/CircleConst.cpp index c17a4e2c36d..f2b3f9c96da 100644 --- a/compiler/luci/lang/src/Nodes/CircleConst.cpp +++ b/compiler/luci/lang/src/Nodes/CircleConst.cpp @@ -20,6 +20,9 @@ namespace luci { +CompressionType CircleConst::compression(void) const { return _compression; } + +void CircleConst::compression(luci::CompressionType c) { _compression = c; } template uint32_t CircleConst::size(void) const { diff --git a/compiler/luci/partition/CMakeLists.txt b/compiler/luci/partition/CMakeLists.txt index 001194eb6aa..421fb776c06 100644 --- a/compiler/luci/partition/CMakeLists.txt +++ b/compiler/luci/partition/CMakeLists.txt @@ -13,7 +13,7 @@ target_link_libraries(luci_partition PUBLIC luci_lang) target_link_libraries(luci_partition PRIVATE luci_service) target_link_libraries(luci_partition PRIVATE luci_log) target_link_libraries(luci_partition PRIVATE luci_logex) -target_link_libraries(luci_partition PRIVATE mio_circle08) +target_link_libraries(luci_partition PRIVATE mio_circle09) target_link_libraries(luci_partition PRIVATE nncc_common) target_link_libraries(luci_partition PRIVATE pepper_csv2vec) target_link_libraries(luci_partition PRIVATE oops) diff --git a/compiler/luci/pass/include/luci/CircleOptimizer.h b/compiler/luci/pass/include/luci/CircleOptimizer.h index 8a1eb6d4f78..9f7447debd4 100644 --- a/compiler/luci/pass/include/luci/CircleOptimizer.h +++ b/compiler/luci/pass/include/luci/CircleOptimizer.h @@ -113,6 +113,7 @@ class CircleOptimizer final UnrollUnidirSeqLSTM, XpSepActFromTransposeConv, RemoveGatherGuard, + CompressWeightsHuffman }; enum AlgorithmParameters diff --git a/compiler/luci/pass/include/luci/Pass/CompressWeightsPass.h b/compiler/luci/pass/include/luci/Pass/CompressWeightsPass.h new file mode 100644 index 00000000000..f9f97791914 --- /dev/null +++ b/compiler/luci/pass/include/luci/Pass/CompressWeightsPass.h @@ -0,0 +1,39 @@ +/* + * Copyright (c) 2024 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __LUCI_COMPRESS_WEIGHTS_PASS_H__ +#define __LUCI_COMPRESS_WEIGHTS_PASS_H__ + +#include + +namespace luci +{ + +/** + * @brief Class to generate FC/CONV with compressed weights + * + * To see the target Op pattern, please visit implementation. + */ +struct CompressWeightsPass final : public logo::Pass +{ + const char *name(void) const final { return "luci::CompressWeightsPass"; } + + bool run(loco::Graph *g) final; +}; + +} // namespace luci + +#endif // __LUCI_COMPRESS_WEIGHTS_PASS_H__ diff --git a/compiler/luci/pass/src/CircleOptimizer.cpp b/compiler/luci/pass/src/CircleOptimizer.cpp index 90060253080..e8b87697cf7 100644 --- a/compiler/luci/pass/src/CircleOptimizer.cpp +++ b/compiler/luci/pass/src/CircleOptimizer.cpp @@ -97,6 +97,7 @@ #include "luci/Pass/DecomposeSoftmaxPass.h" #include "luci/Pass/UnrollUnidirectionalSequenceLSTMPass.h" #include "luci/Pass/XpSepActFromTransposeConvPass.h" +#include "luci/Pass/CompressWeightsPass.h" // TODO add more passes #include "luci/Pass/CircleShapeInferencePass.h" @@ -312,7 +313,10 @@ void CircleOptimizer::optimize(loco::Graph *g) const { phase.emplace_back(std::make_unique()); } - + if (_options->query(Options::Algorithm::CompressWeightsHuffman)) + { + phase.emplace_back(std::make_unique()); + } // clang-format off std::map (*)(void)> option_to_pass; @@ -387,7 +391,7 @@ void CircleOptimizer::optimize(loco::Graph *g) const option_to_pass[Options::Algorithm::XpSepActFromTransposeConv] = &createPassInstance; option_to_pass[Options::Algorithm::ForwardReshapeToUnaryOp] = &createPassInstance; option_to_pass[Options::Algorithm::ForwardTransposeOp] = &createPassInstance; - // clang-format on + // clang-format on for (auto const &m : option_to_pass) { diff --git a/compiler/luci/pass/src/CompressWeightsPass.cpp b/compiler/luci/pass/src/CompressWeightsPass.cpp new file mode 100644 index 00000000000..77ef8d96adc --- /dev/null +++ b/compiler/luci/pass/src/CompressWeightsPass.cpp @@ -0,0 +1,109 @@ +/* + * Copyright (c) 2024 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "luci/Pass/CompressWeightsPass.h" +#include "helpers/HuffmanEncoder.h" +#include "helpers/NodeFiller.h" + +#include +#include + +#include +#include + +namespace +{ + +template class TypeSelector; + +template <> class TypeSelector +{ +public: + using Type = uint8_t; +}; +template <> class TypeSelector +{ +public: + using Type = int8_t; +}; + +template bool compress_weights_huffman_conv2d(luci::CircleConv2D *conv2d) +{ + using T = typename TypeSelector
::Type; + assert(conv2d); + + auto weights = loco::must_cast(conv2d->filter()); + if (weights->compression() != luci::CompressionType::NONE) + return false; + + luci::huffman::HuffmanEncoder encoder; + auto new_weights = luci::clone(weights); + + std::vector tmp_buf(weights->size
()); + + for (size_t i = 0; i < weights->size
(); ++i) + { + tmp_buf[i] = weights->at
(i); + } + + std::vector encoded = encoder.encode(tmp_buf); + + new_weights->dtype(DT); + new_weights->size
(encoded.size()); + new_weights->compression(luci::CompressionType::HUFFMAN); + + for (size_t i = 0; i < new_weights->size
(); ++i) + { + new_weights->at
(i) = encoded[i]; + } + conv2d->filter(new_weights); + + return true; +} + +} // namespace + +namespace luci +{ + +bool CompressWeightsPass::run(loco::Graph *g) +{ + bool changed = false; + + for (auto node : loco::active_nodes(loco::output_nodes(g))) + { + auto conv2d = dynamic_cast(node); + if (not conv2d) + continue; + + auto filter = loco::must_cast(conv2d->filter()); + + if (filter->dtype() == loco::DataType::S8) + { + if (compress_weights_huffman_conv2d(conv2d)) + changed = true; + } + else if (filter->dtype() == loco::DataType::U8) + { + if (compress_weights_huffman_conv2d(conv2d)) + changed = true; + } + } + + return changed; +} + +} // namespace luci diff --git a/compiler/luci/pass/src/helpers/HuffmanDecoder.h b/compiler/luci/pass/src/helpers/HuffmanDecoder.h new file mode 100644 index 00000000000..9387d39c1eb --- /dev/null +++ b/compiler/luci/pass/src/helpers/HuffmanDecoder.h @@ -0,0 +1,356 @@ +/* + * Copyright (c) 2024 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __LUCI_PASS_HELPERS_HUFFMAN_DECODER_H__ +#define __LUCI_PASS_HELPERS_HUFFMAN_DECODER_H__ + +#include +#include +#include +#include +#include +#include +#include +#include + +namespace luci +{ + +namespace huffman +{ + +template struct Node +{ + Node *p_left = nullptr; + Node *p_right = nullptr; + T data; +}; + +template class HuffmanDecoder +{ +private: + Node *root = nullptr; + std::unordered_map huffmanCode; + std::vector encoded_bitset{}; + std::size_t nodes_count = 0; + +private: + Node *allocateNode(T data, unsigned int freq, Node *p_left, Node *p_right) + { + Node *node = new Node; + node->data = data; + node->freq = freq; + node->p_left = p_left; + node->p_right = p_right; + nodes_count++; + return node; + } + + std::string exportHuffmanTreeToString(Node *node) + { + if (node == nullptr) + return ""; + if (!node->p_left && !node->p_right) + { + return "0" + std::bitset(node->data).to_string(); + } + std::string tmp = "1"; + tmp += exportHuffmanTreeToString(node->p_left); + tmp += exportHuffmanTreeToString(node->p_right); + return tmp; + } + + Node *importHuffmanTreeFromBoolVec(std::vector &vec, size_t &index) + { + if (vec.empty()) + return nullptr; + if (vec[index]) + { + index++; + Node *p_left = importHuffmanTreeFromBoolVec(vec, index); + Node *p_right = importHuffmanTreeFromBoolVec(vec, index); + return allocateNode(0, 0, p_left, p_right); + } + else if (vec[index] == false) + { + index++; + T tmp = 0; + for (size_t i = 0; i < sizeof(T) * CHAR_BIT; ++i) + { + if (vec[index++]) + tmp |= (1 << (sizeof(T) * CHAR_BIT - 1)) >> i; + } + + return allocateNode(tmp, 0, nullptr, nullptr); + } + return nullptr; + } + + Node *importHuffmanTreeFromString(std::string &str) + { + + if (str.substr(0, 1) == "1") + { + str = str.substr(1); + Node *p_left = importHuffmanTreeFromString(str); + Node *p_right = importHuffmanTreeFromString(str); + return allocateNode(0, 0, p_left, p_right); + } + else if (str.substr(0, 1) == "0") + { + str = str.substr(1); + std::bitset tmp(str.substr(0, sizeof(T) * CHAR_BIT)); + str = str.substr(sizeof(T) * CHAR_BIT); + return allocateNode(static_cast(tmp.to_ullong()), 0, nullptr, nullptr); + } + } + + void buildHuffmanTable(Node *node, const std::string str = "") + { + if (node == nullptr) + return; + + if (!node->p_left && !node->p_right) + { + huffmanCode[node->data] = str; + } + + buildHuffmanTable(node->p_left, str + "0"); + buildHuffmanTable(node->p_right, str + "1"); + } + + void decode(Node *node, std::string &str, std::vector &out_vec, size_t &index) + { + if (node == nullptr) + { + return; + } + + if (!node->p_left && !node->p_right) + { + out_vec.push_back(node->data); + return; + } + + if (str.size() == index) + return; + if (str[index] == '0') + { + decode(node->p_left, str, out_vec, ++index); + } + else + { + decode(node->p_right, str, out_vec, ++index); + } + } + + struct EncodedTreeAndData + { + std::vector tree_vec{}; + std::vector data_vec{}; + }; + + EncodedTreeAndData unpackArrayToEncodedTreeAndData(const uint8_t *pack_ptr) + { + constexpr auto kTreeSizeBytesN = sizeof(size_t); + constexpr auto kDataSizeBytesN = sizeof(size_t); + + const std::bitset tree_size_bitset( + *static_cast(static_cast(pack_ptr))); + const std::bitset data_size_bitset( + *static_cast(static_cast(pack_ptr + kTreeSizeBytesN))); + + const size_t kTreeSizeInBits = static_cast(tree_size_bitset.to_ullong()); + const size_t kDataSizeInBits = static_cast(data_size_bitset.to_ullong()); + + auto start_pos = kTreeSizeBytesN + kDataSizeBytesN; + EncodedTreeAndData tree_and_data; + + const auto kTreeSizeInBytes = + kTreeSizeInBits % CHAR_BIT ? kTreeSizeInBits / CHAR_BIT + 1 : kTreeSizeInBits / CHAR_BIT; + + for (size_t i = 0; i < kTreeSizeInBytes; ++i) + { + const auto kNumOfBits = + kTreeSizeInBits - i * CHAR_BIT < CHAR_BIT ? kTreeSizeInBits - i * CHAR_BIT : CHAR_BIT; + for (size_t j = 0; j < kNumOfBits; ++j) + { + if (*(pack_ptr + start_pos + i) & ((1 << 7) >> j)) + tree_and_data.tree_vec.push_back(true); + else + tree_and_data.tree_vec.push_back(false); + } + } + const auto kDataSizeInBytes = + kDataSizeInBits % CHAR_BIT ? kDataSizeInBits / CHAR_BIT + 1 : kDataSizeInBits / CHAR_BIT; + const auto kOffsetInBits = kTreeSizeInBits % CHAR_BIT; + start_pos += kOffsetInBits ? kTreeSizeInBytes - 1 : kTreeSizeInBytes; + + for (size_t i = 0; i < kDataSizeInBytes; ++i) + { + const auto kNumOfBits = + kDataSizeInBits - i * CHAR_BIT < CHAR_BIT ? kDataSizeInBits - i * CHAR_BIT : CHAR_BIT; + const auto kBitsInFirstByteToRead = + kNumOfBits < CHAR_BIT - kOffsetInBits ? kNumOfBits : CHAR_BIT - kOffsetInBits; + for (size_t j = kOffsetInBits; j < kOffsetInBits + kBitsInFirstByteToRead; ++j) + { + if (*(pack_ptr + start_pos + i) & ((1 << 7) >> j)) + tree_and_data.data_vec.push_back(true); + else + tree_and_data.data_vec.push_back(false); + } + if (kNumOfBits < CHAR_BIT - kOffsetInBits) + break; + const auto kBitsLeft = kNumOfBits - (CHAR_BIT - kOffsetInBits) < kOffsetInBits + ? kNumOfBits - (CHAR_BIT - kOffsetInBits) + : kOffsetInBits; + for (size_t j = 0; j < kBitsLeft; ++j) + { + if (*(pack_ptr + start_pos + i + 1) & ((1 << 7) >> j)) + tree_and_data.data_vec.push_back(true); + else + tree_and_data.data_vec.push_back(false); + } + } + return tree_and_data; + } + + EncodedTreeAndData unpackArrayToEncodedTreeAndData(const std::vector &packed_vec) + { + constexpr auto kTreeSizeBytesN = sizeof(size_t); + constexpr auto kDataSizeBytesN = sizeof(size_t); + const uint8_t *pack_ptr = packed_vec.data(); + const std::bitset tree_size_bitset( + *static_cast(static_cast(pack_ptr))); + const std::bitset data_size_bitset( + *static_cast(static_cast(pack_ptr + kTreeSizeBytesN))); + + const size_t kTreeSizeInBits = static_cast(tree_size_bitset.to_ullong()); + const size_t kDataSizeInBits = static_cast(data_size_bitset.to_ullong()); + + auto start_pos = kTreeSizeBytesN + kDataSizeBytesN; + EncodedTreeAndData tree_and_data; + + const auto kTreeSizeInBytes = + kTreeSizeInBits % CHAR_BIT ? kTreeSizeInBits / CHAR_BIT + 1 : kTreeSizeInBits / CHAR_BIT; + + for (size_t i = 0; i < kTreeSizeInBytes; ++i) + { + const auto kNumOfBits = + kTreeSizeInBits - i * CHAR_BIT < CHAR_BIT ? kTreeSizeInBits - i * CHAR_BIT : CHAR_BIT; + for (size_t j = 0; j < kNumOfBits; ++j) + { + if (*(pack_ptr + start_pos + i) & ((1 << 7) >> j)) + tree_and_data.data_vec.push_back(true); + else + tree_and_data.data_vec.push_back(false); + } + } + const auto kDataSizeInBytes = + kDataSizeInBits % CHAR_BIT ? kDataSizeInBits / CHAR_BIT + 1 : kDataSizeInBits / CHAR_BIT; + const auto kOffsetInBits = kTreeSizeInBits % CHAR_BIT; + start_pos += kOffsetInBits ? kTreeSizeInBytes - 1 : kTreeSizeInBytes; + + for (size_t i = 0; i < kDataSizeInBytes; ++i) + { + const auto kNumOfBits = + kDataSizeInBits - i * CHAR_BIT < CHAR_BIT ? kDataSizeInBits - i * CHAR_BIT : CHAR_BIT; + const auto kBitsInFirstByteToRead = + kNumOfBits < CHAR_BIT - kOffsetInBits ? kNumOfBits : CHAR_BIT - kOffsetInBits; + for (size_t j = kOffsetInBits; j < kOffsetInBits + kBitsInFirstByteToRead; ++j) + { + + if (*(pack_ptr + start_pos + i) & ((1 << 7) >> j)) + tree_and_data.data_vec.push_back(true); + else + tree_and_data.data_vec.push_back(false); + } + if (kNumOfBits < CHAR_BIT - kOffsetInBits) + break; + const auto kBitsLeft = kNumOfBits - (CHAR_BIT - kOffsetInBits) < kOffsetInBits + ? kNumOfBits - (CHAR_BIT - kOffsetInBits) + : kOffsetInBits; + for (size_t j = 0; j < kBitsLeft; ++j) + { + if (*(pack_ptr + start_pos + i + 1) & ((1 << 7) >> j)) + tree_and_data.data_vec.push_back(true); + else + tree_and_data.data_vec.push_back(false); + } + } + return tree_and_data; + } + +public: + void decode(Node *node, std::vector &vec, T *dst_ptr) + { + if (node == nullptr) + { + return; + } + + if (!node->p_left && !node->p_right) + { + *dst_ptr = node->data; + return; + } + + if (vec.size() == _decode_idx) + return; + if (vec[_decode_idx] == false) + { + ++_decode_idx; + decode(node->p_left, vec, dst_ptr); + } + else + { + ++_decode_idx; + decode(node->p_right, vec, dst_ptr); + } + } + +private: + size_t _decode_idx = 0; + EncodedTreeAndData _encoded_tree_and_data; + +public: + void init_decoder(const uint8_t *input) + { + size_t index = 0; + _encoded_tree_and_data = unpackArrayToEncodedTreeAndData(input); + root = importHuffmanTreeFromBoolVec(_encoded_tree_and_data.tree_vec, index); + } + + void reset_decode_idx(void) { _decode_idx = 0; } + + int decode_n(uint8_t *dst_ptr, size_t num) + { + size_t bytes_decoded = 0; + for (int i = 0; i < num && _decode_idx < _encoded_tree_and_data.data_vec.size(); ++i) + { + decode(root, _encoded_tree_and_data.data_vec, dst_ptr + bytes_decoded); + bytes_decoded++; + } + return bytes_decoded; + } + + HuffmanDecoder() = default; +}; + +} // namespace huffman +} // namespace luci + +#endif // __LUCI_PASS_HELPERS_HUFFMAN_DECODER_H__ diff --git a/compiler/luci/pass/src/helpers/HuffmanEncoder.h b/compiler/luci/pass/src/helpers/HuffmanEncoder.h new file mode 100644 index 00000000000..26e8d3e9c54 --- /dev/null +++ b/compiler/luci/pass/src/helpers/HuffmanEncoder.h @@ -0,0 +1,207 @@ +/* + * Copyright (c) 2024 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __LUCI_PASS_HELPERS_HUFFMAN_ENCODER_H__ +#define __LUCI_PASS_HELPERS_HUFFMAN_ENCODER_H__ + +#include +#include +#include +#include +#include +#include +#include + +namespace luci +{ +namespace huffman +{ + +// Node of prefix tree +template struct Node +{ + std::shared_ptr> p_left; + std::shared_ptr> p_right; + T data; + unsigned int freq; +}; + +// Compare functor for priority queue +template struct CompareNodes +{ + bool operator()(std::shared_ptr> l, std::shared_ptr> r) + { + return l->freq > r->freq; + } +}; + +template class HuffmanEncoder +{ +private: + std::unordered_map _huffman_table; + +private: + std::shared_ptr> allocateNode(T data, unsigned int freq, std::shared_ptr> p_left, + std::shared_ptr> p_right) + { + std::shared_ptr> node = std::make_unique>(); + node->data = data; + node->freq = freq; + node->p_left = p_left; + node->p_right = p_right; + return node; + } + + std::unordered_map calculateFrequencyMap(const std::vector &input) + { + std::unordered_map out_map; + for (auto &item : input) + out_map[item] = out_map.find(item) != out_map.end() ? out_map[item] + 1 : 1; + + return out_map; + } + + std::string exportHuffmanTreeToString(std::shared_ptr> node) + { + if (node == nullptr) + return ""; + + if (!node->p_left && !node->p_right) + { + return "0" + std::bitset(node->data).to_string(); + } + + std::string tmp = "1"; + tmp += exportHuffmanTreeToString(node->p_left); + tmp += exportHuffmanTreeToString(node->p_right); + return tmp; + } + + void buildHuffmanTable(std::shared_ptr> node, const std::string str = "") + { + if (node == nullptr) + return; + + if (!node->p_left && !node->p_right) + { + _huffman_table[node->data] = str; + } + + buildHuffmanTable(node->p_left, str + "0"); + buildHuffmanTable(node->p_right, str + "1"); + } + + std::shared_ptr> buildHuffmanTree(const std::vector &input) + { + auto freq_map = calculateFrequencyMap(input); + + std::priority_queue>, std::vector>>, + CompareNodes> + pq; + + for (auto &item : freq_map) + { + pq.push(allocateNode(item.first, item.second, nullptr, nullptr)); + } + + while (pq.size() != 1) + { + std::shared_ptr> left = pq.top(); + pq.pop(); + std::shared_ptr> right = pq.top(); + pq.pop(); + + unsigned int sum = left->freq + right->freq; + pq.push(allocateNode(0, sum, left, right)); + } + + return pq.top(); + } + + struct EncodedTreeAndData + { + std::vector tree_vec{}; + std::vector data_vec{}; + }; + + std::vector packEncodedDataToArray(const std::string &tree_str, + const std::string &encoded_data) + { + std::vector arr; + const size_t kTreeSizeInBits = tree_str.size(); + const size_t kDataSizeInBits = encoded_data.size(); + + for (size_t i = 0; i < sizeof(size_t); ++i) + { + arr.push_back( + *(static_cast(static_cast(&kTreeSizeInBits)) + i)); + } + + for (size_t i = 0; i < sizeof(size_t); ++i) + { + arr.push_back( + *(static_cast(static_cast(&kDataSizeInBits)) + i)); + } + + const auto merged_str = tree_str + encoded_data; + const size_t kMergedSizeInBits = merged_str.size(); + + const auto kMergedSizeInBytes = kMergedSizeInBits % CHAR_BIT ? kMergedSizeInBits / CHAR_BIT + 1 + : kMergedSizeInBits / CHAR_BIT; + for (size_t i = 0; i < kMergedSizeInBytes; ++i) + { + const auto kNumOfBits = + kMergedSizeInBits - i * CHAR_BIT < CHAR_BIT ? kMergedSizeInBits - i * CHAR_BIT : CHAR_BIT; + + std::string tmp_str = merged_str.substr(i * CHAR_BIT, kNumOfBits); + + for (size_t i = 0; i < CHAR_BIT - kNumOfBits; ++i) + tmp_str += "0"; + + const std::bitset tmp_bitset(tmp_str); + + arr.push_back(static_cast(tmp_bitset.to_ullong())); + } + return arr; + } + +public: + // Encodes input vector of values of type T and returns encoded vector of uint8_t + std::vector encode(const std::vector &input) + { + std::shared_ptr> root = buildHuffmanTree(input); + buildHuffmanTable(root); + + std::string exported_tree = exportHuffmanTreeToString(root); + std::string str = ""; + + for (auto &item : input) + { + str += _huffman_table[item]; + } + + std::vector raw_arr = packEncodedDataToArray(exported_tree, str); + return raw_arr; + } + +public: + HuffmanEncoder() = default; +}; + +} // namespace huffman +} // namespace luci + +#endif // __LUCI_PASS_HELPERS_HUFFMAN_ENCODER_H__ diff --git a/compiler/luci/requires.cmake b/compiler/luci/requires.cmake index 7fd58df1b64..8dbca6575f4 100644 --- a/compiler/luci/requires.cmake +++ b/compiler/luci/requires.cmake @@ -4,7 +4,7 @@ require("loco") require("locop") require("logo") require("logo-core") -require("mio-circle08") +require("mio-circle09") require("luci-compute") require("oops") require("hermes") diff --git a/compiler/mio-circle09/CMakeLists.txt b/compiler/mio-circle09/CMakeLists.txt new file mode 100644 index 00000000000..01d3caf24b9 --- /dev/null +++ b/compiler/mio-circle09/CMakeLists.txt @@ -0,0 +1,52 @@ +nnas_find_package(FlatBuffers EXACT 23.5.26 QUIET) + +if(NOT FlatBuffers_FOUND) + message(STATUS "mio-circle09 skip: FlatBuffers 23.5.26 NOT FOUND") + return() +endif(NOT FlatBuffers_FOUND) + +message(STATUS "Build mio-circle09: TRUE") + +# TODO Find a better way +# TODO use nnpackage +# set(SCHEMA_FILE "${NNAS_PROJECT_SOURCE_DIR}/nnpackage/schema/circle_schema.fbs") +set(SCHEMA_FILE "${NNAS_PROJECT_SOURCE_DIR}/res/CircleSchema/0.9/circle_schema.fbs") + +# NOTE Copy circle_schema.fbs as schema.fbs to generate "schema_generated.fbs" instead of "circle_schema_generated.fbs" +add_custom_command(OUTPUT "${CMAKE_CURRENT_BINARY_DIR}/schema.fbs" + COMMAND ${CMAKE_COMMAND} -E copy "${SCHEMA_FILE}" schema.fbs + WORKING_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}" + DEPENDS "${SCHEMA_FILE}" + ) + +FlatBuffersMuteable_Target(mio_circle09 + OUTPUT_DIR "${CMAKE_CURRENT_BINARY_DIR}/gen/mio/circle" + INCLUDE_DIR "${CMAKE_CURRENT_BINARY_DIR}/gen" + SCHEMA_DIR "${CMAKE_CURRENT_BINARY_DIR}" + SCHEMA_FILES "schema.fbs" + ) + +# This example shows how to use "mio-circle09" library +add_executable(mio_circle09_example example.cpp) +target_link_libraries(mio_circle09_example mio_circle09) + +file(GLOB_RECURSE SOURCES "src/*.cpp") +file(GLOB_RECURSE TESTS "src/*.test.cpp") +list(REMOVE_ITEM SOURCES ${TESTS}) + +add_library(mio_circle09_helper STATIC ${SOURCES}) +set_target_properties(mio_circle09_helper PROPERTIES POSITION_INDEPENDENT_CODE ON) +target_include_directories(mio_circle09_helper PRIVATE src) +target_include_directories(mio_circle09_helper PUBLIC include) +target_link_libraries(mio_circle09_helper mio_circle09) + +if(NOT ENABLE_TEST) + return() +endif(NOT ENABLE_TEST) + +nnas_find_package(GTest REQUIRED) + +GTest_AddTest(mio_circle09_helper_test ${TESTS}) +target_include_directories(mio_circle09_helper_test PRIVATE src) +target_link_libraries(mio_circle09_helper_test mio_circle09) +target_link_libraries(mio_circle09_helper_test mio_circle09_helper) diff --git a/compiler/mio-circle09/README.md b/compiler/mio-circle09/README.md new file mode 100644 index 00000000000..aed08aa7a5f --- /dev/null +++ b/compiler/mio-circle09/README.md @@ -0,0 +1,3 @@ +# mio-circle09 + +Let's make it easy to read and write Circle models. diff --git a/compiler/mio-circle09/example.cpp b/compiler/mio-circle09/example.cpp new file mode 100644 index 00000000000..ec039adfda4 --- /dev/null +++ b/compiler/mio-circle09/example.cpp @@ -0,0 +1,41 @@ +/* + * Copyright (c) 2024 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +// +// This example shows how to include and use "mio-circle09" +// +#include + +#include +#include +#include + +int main(int argc, char **argv) +{ + std::ifstream ifs(argv[1], std::ios_base::binary); + std::vector buf(std::istreambuf_iterator{ifs}, std::istreambuf_iterator{}); + + flatbuffers::Verifier verifier{reinterpret_cast(buf.data()), buf.size()}; + + if (!circle::VerifyModelBuffer(verifier)) + { + std::cout << "Fail" << std::endl; + return 255; + } + + std::cout << "Pass" << std::endl; + return 0; +} diff --git a/compiler/mio-circle09/include/mio_circle/Helper.h b/compiler/mio-circle09/include/mio_circle/Helper.h new file mode 100644 index 00000000000..ee8077a5cf4 --- /dev/null +++ b/compiler/mio-circle09/include/mio_circle/Helper.h @@ -0,0 +1,54 @@ +/* + * Copyright (c) 2024 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __MIO_CIRCLE09_HELPER_H__ +#define __MIO_CIRCLE09_HELPER_H__ + +#include + +#include + +namespace mio +{ +namespace circle +{ + +::circle::BuiltinOperator builtin_code_neutral(const ::circle::OperatorCode *opcode); +bool is_valid(const ::circle::OperatorCode *opcode); +bool is_custom(const ::circle::OperatorCode *opcode); +std::string opcode_name(const ::circle::OperatorCode *opcode); +const char *tensor_type(const ::circle::Tensor *tensor); +const char *tensor_name(const ::circle::Tensor *tensor); + +template std::vector as_index_vector(const flatbuffers::Vector *flat_array) +{ + if (flat_array == nullptr) + { + throw std::runtime_error("flat array is nullptr"); + } + + std::vector ret(flat_array->size()); + for (uint32_t i = 0; i < flat_array->size(); i++) + { + ret[i] = flat_array->Get(i); + } + return ret; +} + +} // namespace circle +} // namespace mio + +#endif // __MIO_CIRCLE09_HELPER_H__ diff --git a/compiler/mio-circle09/include/mio_circle/Reader.h b/compiler/mio-circle09/include/mio_circle/Reader.h new file mode 100644 index 00000000000..561888d201f --- /dev/null +++ b/compiler/mio-circle09/include/mio_circle/Reader.h @@ -0,0 +1,103 @@ +/* + * Copyright (c) 2024 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __MIO_CIRCLE09_READER_H__ +#define __MIO_CIRCLE09_READER_H__ + +#include + +#include +#include +#include + +// NOTE Reader class originated from circledump and for circle-tensordump +// where this class has more work to be done for stability +// as the tools are for developers not customores. + +namespace mio +{ +namespace circle +{ + +/** + * @brief Loads Circle file and provides helpers to access attributes + */ +class Reader +{ +private: + using CircleSubGraphs_t = flatbuffers::Vector>; + using CircleBuffers_t = flatbuffers::Vector>; + using CircleTensors_t = flatbuffers::Vector>; + using CircleOperators_t = flatbuffers::Vector>; + using CircleMetadata_t = flatbuffers::Vector>; + using CircleSignatureDef_t = flatbuffers::Vector>; + +public: + Reader(const ::circle::Model *model); + Reader(const ::circle::Model *model, const std::vector *rawdata); + + Reader() = delete; + +public: + uint32_t version() const { return _version; } + + const std::vector &opcodes() { return _op_codes; } + const CircleBuffers_t *buffers() { return _buffers; } + const CircleTensors_t *tensors() { return _tensors; } + const CircleOperators_t *operators() { return _operators; } + const std::vector &inputs() const { return _inputs; } + const std::vector &outputs() const { return _outputs; } + const CircleMetadata_t *metadata() const { return _metadata; } + const CircleSignatureDef_t *signature_defs() const { return _signature_defs; } + + uint32_t num_subgraph() const { return _subgraphs->size(); } + + size_t buffer_info(uint32_t buf_idx, const uint8_t **buff_data); + size_t buffer_info(uint32_t buf_idx, const uint8_t **buff_data, bool &ext_offset); + ::circle::BuiltinOperator builtin_code(const ::circle::Operator *op) const; + std::string opcode_name(const ::circle::Operator *op) const; + std::vector outputs(const ::circle::Operator *op) const; + std::string tensor_name(const ::circle::Tensor *tensor) const; + std::string tensor_dtype(const ::circle::Tensor *tensor) const; + +public: + bool select_subgraph(uint32_t subgraph); + const std::string &subgraph_name(void) const { return _subgraph_name; } + uint32_t subgraph_index(void) const { return _subgraph_index; } + +private: + uint32_t _version; + + const std::vector *_rawdata{nullptr}; + + const CircleSubGraphs_t *_subgraphs{nullptr}; + const CircleBuffers_t *_buffers{nullptr}; + const CircleTensors_t *_tensors{nullptr}; + const CircleOperators_t *_operators{nullptr}; + const CircleMetadata_t *_metadata{nullptr}; + const CircleSignatureDef_t *_signature_defs{nullptr}; + + uint32_t _subgraph_index = 0; + std::string _subgraph_name; + std::vector _op_codes; + std::vector _inputs; + std::vector _outputs; +}; + +} // namespace circle +} // namespace mio + +#endif // __MIO_CIRCLE09_READER_H__ diff --git a/compiler/mio-circle09/src/Helper.cpp b/compiler/mio-circle09/src/Helper.cpp new file mode 100644 index 00000000000..a7bbd23eab4 --- /dev/null +++ b/compiler/mio-circle09/src/Helper.cpp @@ -0,0 +1,110 @@ +/* + * Copyright (c) 2024 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "mio_circle/Helper.h" + +#include +#include + +namespace mio +{ +namespace circle +{ + +/** + * This will provide v3/v3a/v3b format neutral BuiltinOperator + * NOTE circle has minus value opcode (252~254 as uint8_t) + * we cannot use std::max() like tflite as deprecated_builtin_code can be + * minus and builtin_code being 0 for v0.3 files. + */ +::circle::BuiltinOperator builtin_code_neutral(const ::circle::OperatorCode *opcode) +{ + assert(opcode != nullptr); + if (opcode->deprecated_builtin_code() == 127) + { + assert(opcode->builtin_code() >= 127); + return opcode->builtin_code(); + } + // There was no 255(-1) value in v0.3 + assert(opcode->deprecated_builtin_code() != -1); + return static_cast<::circle::BuiltinOperator>(opcode->deprecated_builtin_code()); +} + +bool is_valid(const ::circle::OperatorCode *opcode) +{ + // Valid Range : BuiltinOperator_MIN <= deprecated_builtin_code <= 127 + const int8_t deprecated_builtin_code = opcode->deprecated_builtin_code(); + if (deprecated_builtin_code < ::circle::BuiltinOperator_MIN) + return false; + // There was no 255(-1) value in v0.3 + if (deprecated_builtin_code == -1) + return false; + + const ::circle::BuiltinOperator builtin_code = opcode->builtin_code(); + if (!(::circle::BuiltinOperator_MIN <= builtin_code && + builtin_code <= ::circle::BuiltinOperator_MAX)) + return false; + + return true; +} + +bool is_custom(const ::circle::OperatorCode *opcode) +{ + ::circle::BuiltinOperator code = builtin_code_neutral(opcode); + return (code == ::circle::BuiltinOperator_CUSTOM); +} + +std::string opcode_name(const ::circle::OperatorCode *opcode) +{ + assert(opcode); + + if (!is_valid(opcode)) + { + std::ostringstream oss; + oss << "(invalid)"; + return oss.str(); + } + + if (is_custom(opcode)) + { + if (!opcode->custom_code()) + return "(invalid custom)"; + + std::string custom_op = "CUSTOM("; + custom_op += opcode->custom_code()->c_str(); + custom_op += ")"; + return custom_op; + } + + ::circle::BuiltinOperator code = builtin_code_neutral(opcode); + return ::circle::EnumNameBuiltinOperator(code); +} + +const char *tensor_type(const ::circle::Tensor *tensor) +{ + return ::circle::EnumNameTensorType(tensor->type()); +} + +const char *tensor_name(const ::circle::Tensor *tensor) +{ + if (tensor->name() == nullptr || std::string(tensor->name()->c_str()).empty()) + return "(noname)"; + + return tensor->name()->c_str(); +} + +} // namespace circle +} // namespace mio diff --git a/compiler/mio-circle09/src/Helper.test.cpp b/compiler/mio-circle09/src/Helper.test.cpp new file mode 100644 index 00000000000..de9b912d2d6 --- /dev/null +++ b/compiler/mio-circle09/src/Helper.test.cpp @@ -0,0 +1,153 @@ +/* + * Copyright (c) 2024 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "mio_circle/Helper.h" + +#include +#include + +#include + +class mio_circle09_helper_test : public ::testing::Test +{ +protected: + void initialization_finish(void) + { + _fbb.Finish(circle::CreateModelDirect(_fbb, 0, &_opcodes_vec)); + } + +protected: + void add_operator_code(int8_t deprecated_builtin_code, const char *custom_code, + circle::BuiltinOperator builtin_code) + { + _opcodes_vec.push_back(circle::CreateOperatorCodeDirect( + _fbb, deprecated_builtin_code, custom_code, 1 /* version */, builtin_code)); + } + + const circle::OperatorCode *get_operator_code(uint8_t idx) + { + return circle::GetModel(_fbb.GetBufferPointer())->operator_codes()->Get(idx); + } + +private: + flatbuffers::FlatBufferBuilder _fbb; + std::vector> _opcodes_vec; +}; + +TEST_F(mio_circle09_helper_test, v09) +{ + // BuiltinOperator_ADD = 0 + // BuiltinOperator_CONV_2D = 3 + add_operator_code(3, "", circle::BuiltinOperator_ADD); + initialization_finish(); + + ASSERT_TRUE(mio::circle::is_valid(get_operator_code(0))); + ASSERT_EQ(mio::circle::builtin_code_neutral(get_operator_code(0)), + circle::BuiltinOperator_CONV_2D); + ASSERT_FALSE(mio::circle::is_custom(get_operator_code(0))); +} + +TEST_F(mio_circle09_helper_test, v09_custom_old) +{ + // BuiltinOperator_ADD = 0 + // BuiltinOperator_CUSTOM = 32 + add_operator_code(32, "custom", circle::BuiltinOperator_ADD); + initialization_finish(); + + ASSERT_TRUE(mio::circle::is_valid(get_operator_code(0))); + ASSERT_EQ(mio::circle::builtin_code_neutral(get_operator_code(0)), + circle::BuiltinOperator_CUSTOM); + ASSERT_TRUE(mio::circle::is_custom(get_operator_code(0))); +} + +TEST_F(mio_circle09_helper_test, v09_NEG) +{ + // BuiltinOperator_ADD = 0 + // BuiltinOperator_CUMSUM = 128 + // deprecated_builtin_code cannot be negative value + add_operator_code(128, "", circle::BuiltinOperator_ADD); + initialization_finish(); + + ASSERT_FALSE(mio::circle::is_valid(get_operator_code(0))); +} + +TEST_F(mio_circle09_helper_test, v09_under127) +{ + // BuiltinOperator_CONV_2D = 3 + add_operator_code(3, "", circle::BuiltinOperator_CONV_2D); + initialization_finish(); + + ASSERT_TRUE(mio::circle::is_valid(get_operator_code(0))); + ASSERT_EQ(mio::circle::builtin_code_neutral(get_operator_code(0)), + circle::BuiltinOperator_CONV_2D); + ASSERT_FALSE(mio::circle::is_custom(get_operator_code(0))); +} + +TEST_F(mio_circle09_helper_test, v09_under127_NEG) +{ + // BuiltinOperator_CONV_2D = 3 + // BuiltinOperator_CUMSUM = 128 + // deprecated_builtin_code cannot be negative value + add_operator_code(128, "", circle::BuiltinOperator_CONV_2D); + initialization_finish(); + + ASSERT_FALSE(mio::circle::is_valid(get_operator_code(0))); +} + +TEST_F(mio_circle09_helper_test, v09_custom) +{ + // BuiltinOperator_CUSTOM = 32 + add_operator_code(32, "custom", circle::BuiltinOperator_CUSTOM); + initialization_finish(); + + ASSERT_TRUE(mio::circle::is_valid(get_operator_code(0))); + ASSERT_EQ(mio::circle::builtin_code_neutral(get_operator_code(0)), + circle::BuiltinOperator_CUSTOM); + ASSERT_TRUE(mio::circle::is_custom(get_operator_code(0))); +} + +TEST_F(mio_circle09_helper_test, v09_custom_NEG) +{ + // BuiltinOperator_CUMSUM = 128 + // deprecated_builtin_code cannot be negative value + add_operator_code(128, "custom", circle::BuiltinOperator_CUSTOM); + initialization_finish(); + + ASSERT_FALSE(mio::circle::is_valid(get_operator_code(0))); +} + +TEST_F(mio_circle09_helper_test, v09_over127) +{ + // BuiltinOperator_PLACEHOLDER_FOR_GREATER_OP_CODES = 127 + // BuiltinOperator_CUMSUM = 128 + add_operator_code(127, "", circle::BuiltinOperator_CUMSUM); + initialization_finish(); + + ASSERT_TRUE(mio::circle::is_valid(get_operator_code(0))); + ASSERT_EQ(mio::circle::builtin_code_neutral(get_operator_code(0)), + circle::BuiltinOperator_CUMSUM); + ASSERT_FALSE(mio::circle::is_custom(get_operator_code(0))); +} + +TEST_F(mio_circle09_helper_test, v09_over127_NEG) +{ + // BuiltinOperator_CUMSUM = 128 + // deprecated_builtin_code cannot be negative value + add_operator_code(128, "", circle::BuiltinOperator_CUMSUM); + initialization_finish(); + + ASSERT_FALSE(mio::circle::is_valid(get_operator_code(0))); +} diff --git a/compiler/mio-circle09/src/Reader.cpp b/compiler/mio-circle09/src/Reader.cpp new file mode 100644 index 00000000000..c1a5767d0cf --- /dev/null +++ b/compiler/mio-circle09/src/Reader.cpp @@ -0,0 +1,222 @@ +/* + * Copyright (c) 2024 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "mio_circle/Reader.h" +#include "mio_circle/Helper.h" + +#include +#include +#include + +namespace mio +{ +namespace circle +{ + +Reader::Reader(const ::circle::Model *model) +{ + if (model == nullptr) + { + throw std::runtime_error("Invalid model"); + } + + _version = model->version(); + _subgraphs = model->subgraphs(); + _buffers = model->buffers(); + _metadata = model->metadata(); + _signature_defs = model->signature_defs(); + + auto opcodes = model->operator_codes(); + for (const ::circle::OperatorCode *opcode : *opcodes) + { + _op_codes.push_back(opcode); + } +} + +Reader::Reader(const ::circle::Model *model, const std::vector *rawdata) +{ + if (model == nullptr) + { + throw std::runtime_error("Invalid model"); + } + + _rawdata = rawdata; + + _version = model->version(); + _subgraphs = model->subgraphs(); + _buffers = model->buffers(); + _metadata = model->metadata(); + _signature_defs = model->signature_defs(); + + auto opcodes = model->operator_codes(); + for (const ::circle::OperatorCode *opcode : *opcodes) + { + _op_codes.push_back(opcode); + } +} + +size_t Reader::buffer_info(uint32_t buf_idx, const uint8_t **buff_data) +{ + if (buff_data != nullptr) + { + *buff_data = nullptr; + } + + if (buf_idx == 0) + return 0; + + if (auto *buffer = (*_buffers)[buf_idx]) + { + assert(buffer->offset() == 0); + + if (auto *array = buffer->data()) + { + if (size_t size = array->size()) + { + if (buff_data != nullptr) + { + *buff_data = reinterpret_cast(array->data()); + } + return size; + } + } + } + + return 0; +} + +size_t Reader::buffer_info(uint32_t buf_idx, const uint8_t **buff_data, bool &ext_offset) +{ + ext_offset = false; + + if (buff_data != nullptr) + { + *buff_data = nullptr; + } + + if (buf_idx == 0) + return 0; + + if (auto *buffer = (*_buffers)[buf_idx]) + { + auto buffer_offset = buffer->offset(); + if (buffer_offset > 1) + { + assert(_rawdata); // make debug break for invalid case + if (_rawdata == nullptr) + return 0; + + ext_offset = true; + if (buff_data != nullptr) + { + *buff_data = reinterpret_cast(&_rawdata->at(buffer_offset)); + } + return buffer->size(); + } + else if (auto *array = buffer->data()) + { + if (size_t size = array->size()) + { + if (buff_data != nullptr) + { + *buff_data = reinterpret_cast(array->data()); + } + return size; + } + } + else + { + if (buffer->offset() == 1 && buffer->size() == 1) + { + std::cerr << "Buffer " << buf_idx << " has invalid offset/size." << std::endl; + } + } + } + + return 0; +} + +::circle::BuiltinOperator Reader::builtin_code(const ::circle::Operator *op) const +{ + uint32_t index = op->opcode_index(); + assert(index < _op_codes.size()); + const ::circle::OperatorCode *opcode = _op_codes.at(index); + + return mio::circle::builtin_code_neutral(opcode); +} + +std::string Reader::opcode_name(const ::circle::Operator *op) const +{ + uint32_t index = op->opcode_index(); + assert(index < _op_codes.size()); + const ::circle::OperatorCode *opcode = _op_codes.at(index); + + if (!mio::circle::is_valid(opcode)) + { + std::ostringstream oss; + oss << "(invalid: " << index << ")"; + return oss.str(); + } + + return mio::circle::opcode_name(opcode); +} + +std::vector Reader::outputs(const ::circle::Operator *op) const +{ + return as_index_vector(op->outputs()); +} + +std::string Reader::tensor_name(const ::circle::Tensor *tensor) const +{ + return mio::circle::tensor_name(tensor); +} + +std::string Reader::tensor_dtype(const ::circle::Tensor *tensor) const +{ + return mio::circle::tensor_type(tensor); +} + +bool Reader::select_subgraph(uint32_t sgindex) +{ + _subgraph_index = sgindex; + _tensors = nullptr; + _operators = nullptr; + + _inputs.clear(); + _outputs.clear(); + + if (_subgraphs->size() <= sgindex) + { + assert(false); + return false; + } + + const ::circle::SubGraph *subgraph = (*_subgraphs)[sgindex]; + + auto name = subgraph->name(); + _subgraph_name = name ? name->c_str() : "(noname)"; + + _tensors = subgraph->tensors(); + _operators = subgraph->operators(); + + _inputs = as_index_vector(subgraph->inputs()); + _outputs = as_index_vector(subgraph->outputs()); + + return true; +} + +} // namespace circle +} // namespace mio diff --git a/compiler/mio-circle09/src/Reader.test.cpp b/compiler/mio-circle09/src/Reader.test.cpp new file mode 100644 index 00000000000..cef74bad89e --- /dev/null +++ b/compiler/mio-circle09/src/Reader.test.cpp @@ -0,0 +1,60 @@ +/* + * Copyright (c) 2024 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "mio_circle/Reader.h" + +#include +#include + +class mio_circle09_reader_test : public ::testing::Test +{ +protected: + void initialization_emty(void) + { + _model = circle::CreateModelDirect(_fbb, 0, &_opcodes_vec); + circle::FinishModelBuffer(_fbb, _model); + } + + const circle::Model *circleModel(void) + { + auto ptr = _fbb.GetBufferPointer(); + return circle::GetModel(ptr); + } + +private: + flatbuffers::FlatBufferBuilder _fbb; + flatbuffers::Offset _model; + std::vector> _opcodes_vec; +}; + +TEST_F(mio_circle09_reader_test, null_Model_NEG) +{ + EXPECT_THROW(mio::circle::Reader reader(nullptr), std::runtime_error); +} + +TEST_F(mio_circle09_reader_test, empty_Model) +{ + initialization_emty(); + + const circle::Model *model = circleModel(); + EXPECT_NE(nullptr, model); + + mio::circle::Reader reader(model); + + SUCCEED(); +} + +// TODO add more tests diff --git a/compiler/pics/CMakeLists.txt b/compiler/pics/CMakeLists.txt index a6f955a5a87..a7bbbfbfca3 100644 --- a/compiler/pics/CMakeLists.txt +++ b/compiler/pics/CMakeLists.txt @@ -11,7 +11,7 @@ unset(PICS_DEPS) ### set(CIRCLE_SCHEMA_PYTHON_DIR "${CMAKE_CURRENT_BINARY_DIR}/circle") -get_target_property(SCHEMA_BIN_PATH mio_circle08 BINARY_DIR) +get_target_property(SCHEMA_BIN_PATH mio_circle09 BINARY_DIR) add_custom_command( OUTPUT ${CIRCLE_SCHEMA_PYTHON_DIR} diff --git a/compiler/pics/requires.cmake b/compiler/pics/requires.cmake index b1d32605069..e843025039b 100644 --- a/compiler/pics/requires.cmake +++ b/compiler/pics/requires.cmake @@ -1 +1 @@ -require("mio-circle08") +require("mio-circle09") diff --git a/compiler/tflite2circle/CMakeLists.txt b/compiler/tflite2circle/CMakeLists.txt index 35aad329a84..d7678cf2e6e 100644 --- a/compiler/tflite2circle/CMakeLists.txt +++ b/compiler/tflite2circle/CMakeLists.txt @@ -2,7 +2,7 @@ nnas_include(TargetRequire) unset(REQUIRED_TARGETS) list(APPEND REQUIRED_TARGETS mio_tflite2121) -list(APPEND REQUIRED_TARGETS mio_circle08) +list(APPEND REQUIRED_TARGETS mio_circle09) TargetRequire_Return(${REQUIRED_TARGETS}) set(DRIVER "driver/Driver.cpp") @@ -15,7 +15,7 @@ target_link_libraries(tflite2circle foder) target_link_libraries(tflite2circle safemain) target_link_libraries(tflite2circle mio_tflite2121) target_link_libraries(tflite2circle mio_tflite2121_helper) -target_link_libraries(tflite2circle mio_circle08) +target_link_libraries(tflite2circle mio_circle09) target_link_libraries(tflite2circle vconone) target_link_libraries(tflite2circle nncc_coverage) diff --git a/compiler/tflite2circle/requires.cmake b/compiler/tflite2circle/requires.cmake index cf770dfa1f7..052540f17c3 100644 --- a/compiler/tflite2circle/requires.cmake +++ b/compiler/tflite2circle/requires.cmake @@ -1,6 +1,6 @@ require("arser") require("foder") require("mio-tflite2121") -require("mio-circle08") +require("mio-circle09") require("safemain") require("vconone") diff --git a/infra/nncc/Makefile.arm32 b/infra/nncc/Makefile.arm32 index 0d344a048ac..c3bbfa4bd94 100644 --- a/infra/nncc/Makefile.arm32 +++ b/infra/nncc/Makefile.arm32 @@ -26,7 +26,7 @@ ARM32_BUILD_ITEMS+=;pepper-csv2vec;crew ARM32_BUILD_ITEMS+=;oops;pepper-assert ARM32_BUILD_ITEMS+=;hermes;hermes-std ARM32_BUILD_ITEMS+=;loco;locop;logo-core;logo -ARM32_BUILD_ITEMS+=;safemain;mio-circle08;mio-tflite2121 +ARM32_BUILD_ITEMS+=;safemain;mio-circle09;mio-tflite2121 ARM32_BUILD_ITEMS+=;dio-hdf5 ARM32_BUILD_ITEMS+=;luci-compute ARM32_BUILD_ITEMS+=;foder;circle-verify;souschef;arser;vconone @@ -44,7 +44,7 @@ ARM32_HOST_ITEMS+=;pepper-csv2vec ARM32_HOST_ITEMS+=;oops ARM32_HOST_ITEMS+=;hermes;hermes-std ARM32_HOST_ITEMS+=;loco;locop;logo-core;logo -ARM32_HOST_ITEMS+=;safemain;mio-circle08;mio-tflite2121 +ARM32_HOST_ITEMS+=;safemain;mio-circle09;mio-tflite2121 ARM32_HOST_ITEMS+=;luci-compute ARM32_HOST_ITEMS+=;foder;circle-verify;souschef;arser;vconone ARM32_HOST_ITEMS+=;luci diff --git a/infra/packaging/preset/20230907 b/infra/packaging/preset/20230907 index 5834a591b0c..9fa10195a73 100644 --- a/infra/packaging/preset/20230907 +++ b/infra/packaging/preset/20230907 @@ -20,7 +20,7 @@ function preset_configure() # loco IR and related utilities REQUIRED_UNITS+=("loco" "locop" "locomotiv" "logo-core" "logo") # Flatbuffer I/O - REQUIRED_UNITS+=("mio-tflite2121" "mio-circle08") + REQUIRED_UNITS+=("mio-tflite2121" "mio-circle09") # Data I/O REQUIRED_UNITS+=("dio-hdf5") # Compute diff --git a/infra/packaging/preset/20230907_windows b/infra/packaging/preset/20230907_windows index 037e870ddf3..57fba531765 100644 --- a/infra/packaging/preset/20230907_windows +++ b/infra/packaging/preset/20230907_windows @@ -17,7 +17,7 @@ function preset_configure() # loco IR and related utilities REQUIRED_UNITS+=("loco" "locop" "locomotiv" "logo-core" "logo") # Flatbuffer I/O - REQUIRED_UNITS+=("mio-tflite2121" "mio-circle08") + REQUIRED_UNITS+=("mio-tflite2121" "mio-circle09") # Data I/O REQUIRED_UNITS+=("dio-hdf5") # Compute diff --git a/infra/scripts/compiler_modules.sh b/infra/scripts/compiler_modules.sh index 1f5310a8261..e4c0d8f21e6 100644 --- a/infra/scripts/compiler_modules.sh +++ b/infra/scripts/compiler_modules.sh @@ -12,7 +12,7 @@ DEBUG_BUILD_ITEMS+=";oops;pepper-assert;pepper-csv2vec" DEBUG_BUILD_ITEMS+=";hermes;hermes-std" DEBUG_BUILD_ITEMS+=";loco;locop;locomotiv;logo-core;logo" DEBUG_BUILD_ITEMS+=";foder;crew;souschef;arser;vconone" -DEBUG_BUILD_ITEMS+=";safemain;mio-circle08;mio-tflite2121;dio-hdf5" +DEBUG_BUILD_ITEMS+=";safemain;mio-circle09;mio-tflite2121;dio-hdf5" DEBUG_BUILD_ITEMS+=";luci-compute" DEBUG_BUILD_ITEMS+=";tflite2circle" DEBUG_BUILD_ITEMS+=";luci" @@ -50,7 +50,7 @@ NNPKG_RES_ITEMS+=";luci-compute" # Circle compiler library (.circle -> .circle) NNPKG_RES_ITEMS+=";luci" # Flatbuffer I/O -NNPKG_RES_ITEMS+=";mio-tflite2121;mio-circle08" +NNPKG_RES_ITEMS+=";mio-tflite2121;mio-circle09" # Tools NNPKG_RES_ITEMS+=";tflite2circle;circle2circle;luci-interpreter" NNPKG_RES_ITEMS+=";souschef;tflchef;circlechef;circle-verify" diff --git a/packaging/nnfw.spec b/packaging/nnfw.spec index 97b6cea9f7a..7f780034a5b 100644 --- a/packaging/nnfw.spec +++ b/packaging/nnfw.spec @@ -193,7 +193,7 @@ tar -xf %{SOURCE3016} -C ./externals %if %{odc_build} == 1 %{nncc_env} ./nncc configure -DBUILD_GTEST=OFF -DENABLE_TEST=OFF -DEXTERNALS_BUILD_THREADS=%{nproc} -DCMAKE_BUILD_TYPE=%{build_type} -DTARGET_ARCH=%{target_arch} -DTARGET_OS=tizen \ -DCMAKE_INSTALL_PREFIX=$(pwd)/%{overlay_path} \ - -DBUILD_WHITELIST="luci;foder;pepper-csv2vec;loco;locop;logo;logo-core;mio-circle08;luci-compute;oops;hermes;hermes-std;angkor;pp;pepper-strcast;pepper-str" + -DBUILD_WHITELIST="luci;foder;pepper-csv2vec;loco;locop;logo;logo-core;mio-circle09;luci-compute;oops;hermes;hermes-std;angkor;pp;pepper-strcast;pepper-str" %{nncc_env} ./nncc build %{build_jobs} cmake --install %{nncc_workspace} %{strip_options} %endif # odc_build @@ -206,7 +206,7 @@ mkdir -p %{overlay_path}/include/mio/circle cp compiler/angkor/include/nncc/core/ADT/tensor/Index.h %{overlay_path}/include/nncc/core/ADT/tensor cp compiler/oops/include/oops/InternalExn.h %{overlay_path}/include/oops cp compiler/luci/lang/include/luci/IR/CircleNodes.lst %{overlay_path}/include/luci/IR -cp %{nncc_workspace}/compiler/mio-circle08/gen/mio/circle/schema_generated.h %{overlay_path}/include/mio/circle +cp %{nncc_workspace}/compiler/mio-circle09/gen/mio/circle/schema_generated.h %{overlay_path}/include/mio/circle cp -r %{nncc_workspace}/overlay/include/flatbuffers %{overlay_path}/include # runtime build diff --git a/res/CircleSchema/0.9/circle_schema.fbs b/res/CircleSchema/0.9/circle_schema.fbs new file mode 100644 index 00000000000..de775255d12 --- /dev/null +++ b/res/CircleSchema/0.9/circle_schema.fbs @@ -0,0 +1,1705 @@ +// Copyright (c) 2019~2023 Samsung Electronics Co., Ltd. All Rights Reserved +// Copyright 2017 The TensorFlow Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Revision History +// +// Version Major.Minor +// +// Major version is schema version. +// We keep schema version if it is compatible +// Minor version is for human communication +// It will not be stored in circle model. +// +// Version 0.0: Initial version. Based on TensorFlow Lite v1.13.1 schema. +// Version 0.1: Based on TF v2.2-rc2 + more (from TensorFlow `56d281c`) +// `BATCH_MATMUL` operator, `FLOAT64` tensor type, +// `asymmetric_quantize_inputs` for several operator options +// Version 0.2: BCQ_GATHER and BCQ_FULLY_CONNECTED are added. +// Version 0.3: SHUFFLED16x1FLOAT32 is added. +// Version 0.4: Base up to TensorFlow Lite v2.7.0 schema. +// Version 0.5: Base up to TensorFlow Lite v2.10.1 schema. +// Version 0.6: Base up to TensorFlow Lite v2.13.0 schema. +// Version 0.7: Base up to TensorFlow Lite v2.15.0 schema, deprecate data_format in Subgraph table +// Version 0.8: GRU op is added. UINT4 is added. +// Version 0.9: Weight compression option is added + +namespace circle; + +// This corresponds to the version. +file_identifier "CIR0"; +// File extension of any written files. +file_extension "circle"; + +// IMPORTANT: All new members of tables, enums and unions must be added at the +// end to ensure backwards compatibility. + +// The type of data stored in a tensor. +enum TensorType : byte { + UINT4 = -1, + FLOAT32 = 0, + FLOAT16 = 1, + INT32 = 2, + UINT8 = 3, + INT64 = 4, + STRING = 5, + BOOL = 6, + INT16 = 7, + COMPLEX64 = 8, + INT8 = 9, + FLOAT64 = 10, + COMPLEX128 = 11, + UINT64 = 12, + // Experimental: Resource and variant types are experimental, that are subject + // to change. Do not implement custom kernels using resource & variant types + // now. + RESOURCE = 13, + VARIANT = 14, + UINT32 = 15, + UINT16 = 16, + INT4 = 17, +} + +// Custom quantization parameters for experimenting with new quantization +// techniques. +table CustomQuantization { + custom:[ubyte] (force_align: 16); +} + +// Represents a specific quantization technique's parameters. +union QuantizationDetails { + CustomQuantization, +} + +// Parameters for converting a quantized tensor back to float. +table QuantizationParameters { + // These four parameters are the asymmetric linear quantization parameters. + // Given a quantized value q, the corresponding float value f should be: + // f = scale * (q - zero_point) + // For other quantization types, the QuantizationDetails below is used. + // NOTE min/max values are valid if + // 1. length of min/max == 0 or + // 2. length of min/max == length of scale/zero_point + // Otherwise, min/max are not valid (undefined behavior). + min:[float]; + max:[float]; + scale:[float]; // For dequantizing the tensor's values. + zero_point:[long]; + + // If this is not none, the other quantization parameters (i.e. min, max, + // scale, zero_point fields above) are ignored and the value of the + // QuantizationDetails union should be used. + details:QuantizationDetails; + + // Specifies the dimension of the Tensor's shape that the scales and + // zero_points correspond to. For example, a tensor t, with dims=[4, 3, 2, 1] + // with quantization params: + // scale=[1.0, 2.0, 3.0], zero_point=[1, 2, 3], quantization_dimension=1 + // will be quantized across the second dimension of t. + // t[:, 0, :, :] will have scale[0]=1.0, zero_point[0]=1 + // t[:, 1, :, :] will have scale[1]=2.0, zero_point[0]=2 + // t[:, 2, :, :] will have scale[2]=3.0, zero_point[0]=3 + quantized_dimension:int; +} + +// Sparse tensors. +// We use a modification of the TACO format. +// Reference: http://tensor-compiler.org/kjolstad-oopsla17-tensor-compiler.pdf +// +// To encode a conceptual n-dimensional dense tensor with dims (d0, ..., dn-1), +// potentially with a k-dimensional block (0 <= k <= n) with dims +// (dn, ..., dn+k-1), the format needs to specify: +// 1. In what order to traverse these dimensions. For example, to store a 2-D +// matrix in row major order, the traversal order would be (d0, d1), +// whereas to store it in column major order, the traversal order would be +// (d1, d0). If the 2-D matrix has a 2-D inner block, the traversal order +// could be (d0, d1, d2, d3). +// 2. How each block dimension in (dn, ..., dn+k-1) maps to the original +// tensor dimension in (d0, ..., dn-1). +// 3. In the traversal order defined above, the format (dense vs. sparse) and +// index metadata for each dimension. For a dense dimension, this is just +// the size of that dimension. For a sparse dimension, it's the same as +// the compressed index defined in the Compressed Sparse Row (CSR) format. +// (http://scipy-lectures.org/advanced/scipy_sparse/csr_matrix.html) + +// The storage type for a dimension. Currently we support: +// 1. DENSE: each coordinate in this dimension is stored implicitly. +// 2. SPARSE_CSR: only the coordinates with non-zero elements are stored. The +// compression technique is the same what CSR uses. +// More types like a sparse dimension with a different compression technique +// could be added to the list in the future. +enum DimensionType : byte { + DENSE = 0, + SPARSE_CSR = 1, +} + +table Int32Vector { + values:[int]; +} + +table Uint16Vector { + values:[ushort] (force_align: 4); +} + +table Uint8Vector { + values:[ubyte] (force_align: 4); +} + +// Variable-typed buffer to store the index metadata for a sparse dimension. +// The widest type is Int32 instead of UInt32 because tensor's shape is a int32 +// vector. We don't want the per-dimensional index to overflow that range. +union SparseIndexVector { + Int32Vector, + Uint16Vector, + Uint8Vector +} + +table DimensionMetadata { + // Whether a dimension is dense or sparse. + format:DimensionType; + // Index metadata used for a dimension. + // - If format is DimensionType.DENSE then we use the dense_size field to + // store the size of that dimension. Each index in that dimension is + // stored implicitly. + // - If format is DimensionType.SPARSE_CSR then we use array_segments and + // array_indices to encode that dimension. array_segments represents how + // to segment the indices array, each segment corresponds to one element + // in the previous dimension. array_indices represents the index of the + // non-zero elements within this dimension (as those in the CSR matrix + // format, where the first array is row pointers and the second array is + // column indices). + dense_size:int; + array_segments:SparseIndexVector; + array_indices:SparseIndexVector; +} + +// Parameters to encode a sparse TfLite tensor. +table SparsityParameters { + // The traversal order of the dimensions defined in the `shape` field of the + // conceptual dense tensor. For a n-dimensional tensors with dims (d0, d1, + // ..., dn-1), + // - if not block sparse, the traversal_order is just a permutation of (d0, + // ..., dn-1). For example, a 2-D matrix stored in row-major order would + // have traversal_order = (d0, d1). + // - if block sparse with a k-dimensional block (0 <= k <= n), the + // traversal_order has n + k elements. The first n elements are still a + // permutation of (d0, ..., dn-1). The lask k elements are a permutation + // of (dn, ..., dn+k-1), defining how to traverse a block internally. For + // example, a 2-D matrix with 2-D blocks, both stored in row-major order + // would have traversal_order = (d0, d1, d2, d3). + traversal_order:[int]; + // For an n-dimensional tensor with a k-dimensional block (0 <= k <= n), + // stores how a block dimension in (dn, ..., dn+k-1) maps to the original + // tensor dimension in (d0, ..., dn). + // It's stored in the order of (dn, ..., dn+k-1). + // If not block-sparse, this field is NULL. + block_map:[int]; + // In the traversal order defined above, the metadata needed for + // each dimension to locate the non-zero values in the original dense tensor. + // The size of the dim_metadata array = the size of the traversal_order array + // = n + k. + dim_metadata:[DimensionMetadata]; +} + +// The nested tensor type for VARIANT type. +table VariantSubType { + // The tensor shape. + shape:[int]; + type:TensorType; + // If false, the rank or the number of tensor dimensions is unknown. + // If false, "shape" must be []. + has_rank: bool = false; +} + +enum CompressionType : byte { + NONE = 0, + //Huffman encoding only + HUFFMAN = 1 +} + +table Tensor { + // The tensor shape. The meaning of each entry is operator-specific but + // builtin ops use: [batch size, height, width, number of channels] (That's + // Tensorflow's NHWC). + shape:[int]; + type:TensorType; + // An index that refers to the buffers table at the root of the model. Or, + // if there is no data buffer associated (i.e. intermediate results), then + // this is 0 (which refers to an always existent empty buffer). + // + // The data_buffer itself is an opaque container, with the assumption that the + // target device is little-endian. In addition, all builtin operators assume + // the memory is ordered such that if `shape` is [4, 3, 2], then index + // [i, j, k] maps to data_buffer[i*3*2 + j*2 + k]. + buffer:uint; + name:string; // For debugging and importing back into tensorflow. + quantization:QuantizationParameters; // Optional. + + is_variable:bool = false; + + // Parameters to encode a sparse tensor. See the example in + // tensorflow/lite/testdata/sparse_tensor.json. + sparsity:SparsityParameters; // Optional. + + // Encodes `shape` with unknown dimensions. Unknown dimensions are + // represented with -1. + shape_signature:[int]; // Optional. + + // This field is added to distinguish between scalars and tensors of unknown + // ranks (both of which shape is []). + // For scalars (rank = 0), shape = [] and has_rank = true. + // For tensors with known rank (rank > 0) and shape, shape = [...] and + // has_rank = true. + // For tensors with unknown rank and shape, shape = [] and has_rank = false. + has_rank: bool = false; + + // The nested Tensor types for VARIANT type. This is always empty for + // non-VARIANT types. This is optional because the nested type can be omitted. + // Currently only 1 subtype is supported. The field is defined as an array for + // flexibility of supporting multiple subtypes in the future. + variant_tensors:[VariantSubType]; + + compression_type:CompressionType = NONE; + +} + +// A list of builtin operators. Builtin operators are slightly faster than custom +// ones, but not by much. Moreover, while custom operators accept an opaque +// object containing configuration parameters, builtins have a predetermined +// set of acceptable options. +// LINT.IfChange +enum BuiltinOperator : int32 { + GRU = -5, + BCQ_GATHER = -4, + BCQ_FULLY_CONNECTED = -3, + INSTANCE_NORM = -2, + ADD = 0, + AVERAGE_POOL_2D = 1, + CONCATENATION = 2, + CONV_2D = 3, + DEPTHWISE_CONV_2D = 4, + DEPTH_TO_SPACE = 5, + DEQUANTIZE = 6, + EMBEDDING_LOOKUP = 7, + FLOOR = 8, + FULLY_CONNECTED = 9, + HASHTABLE_LOOKUP = 10, + L2_NORMALIZATION = 11, + L2_POOL_2D = 12, + LOCAL_RESPONSE_NORMALIZATION = 13, + LOGISTIC = 14, + LSH_PROJECTION = 15, + LSTM = 16, + MAX_POOL_2D = 17, + MUL = 18, + RELU = 19, + // NOTE(aselle): RELU_N1_TO_1 used to be called RELU1, but it was renamed + // since different model developers use RELU1 in different ways. Never + // create another op called RELU1. + RELU_N1_TO_1 = 20, + RELU6 = 21, + RESHAPE = 22, + RESIZE_BILINEAR = 23, + RNN = 24, + SOFTMAX = 25, + SPACE_TO_DEPTH = 26, + SVDF = 27, + TANH = 28, + CONCAT_EMBEDDINGS = 29, + SKIP_GRAM = 30, + CALL = 31, + CUSTOM = 32, + EMBEDDING_LOOKUP_SPARSE = 33, + PAD = 34, + UNIDIRECTIONAL_SEQUENCE_RNN = 35, + GATHER = 36, + BATCH_TO_SPACE_ND = 37, + SPACE_TO_BATCH_ND = 38, + TRANSPOSE = 39, + MEAN = 40, + SUB = 41, + DIV = 42, + SQUEEZE = 43, + UNIDIRECTIONAL_SEQUENCE_LSTM = 44, + STRIDED_SLICE = 45, + BIDIRECTIONAL_SEQUENCE_RNN = 46, + EXP = 47, + TOPK_V2 = 48, + SPLIT = 49, + LOG_SOFTMAX = 50, + // DELEGATE is a special op type for the operations which are delegated to + // other backends. + // WARNING: Experimental interface, subject to change + DELEGATE = 51, + BIDIRECTIONAL_SEQUENCE_LSTM = 52, + CAST = 53, + PRELU = 54, + MAXIMUM = 55, + ARG_MAX = 56, + MINIMUM = 57, + LESS = 58, + NEG = 59, + PADV2 = 60, + GREATER = 61, + GREATER_EQUAL = 62, + LESS_EQUAL = 63, + SELECT = 64, + SLICE = 65, + SIN = 66, + TRANSPOSE_CONV = 67, + SPARSE_TO_DENSE = 68, + TILE = 69, + EXPAND_DIMS = 70, + EQUAL = 71, + NOT_EQUAL = 72, + LOG = 73, + SUM = 74, + SQRT = 75, + RSQRT = 76, + SHAPE = 77, + POW = 78, + ARG_MIN = 79, + FAKE_QUANT = 80, + REDUCE_PROD = 81, + REDUCE_MAX = 82, + PACK = 83, + LOGICAL_OR = 84, + ONE_HOT = 85, + LOGICAL_AND = 86, + LOGICAL_NOT = 87, + UNPACK = 88, + REDUCE_MIN = 89, + FLOOR_DIV = 90, + REDUCE_ANY = 91, + SQUARE = 92, + ZEROS_LIKE = 93, + FILL = 94, + FLOOR_MOD = 95, + RANGE = 96, + RESIZE_NEAREST_NEIGHBOR = 97, + LEAKY_RELU = 98, + SQUARED_DIFFERENCE = 99, + MIRROR_PAD = 100, + ABS = 101, + SPLIT_V = 102, + UNIQUE = 103, + CEIL = 104, + REVERSE_V2 = 105, + ADD_N = 106, + GATHER_ND = 107, + COS = 108, + WHERE = 109, + RANK = 110, + ELU = 111, + REVERSE_SEQUENCE = 112, + MATRIX_DIAG = 113, + QUANTIZE = 114, + MATRIX_SET_DIAG = 115, + ROUND = 116, + HARD_SWISH = 117, + IF = 118, + WHILE = 119, + NON_MAX_SUPPRESSION_V4 = 120, + NON_MAX_SUPPRESSION_V5 = 121, + SCATTER_ND = 122, + SELECT_V2 = 123, + DENSIFY = 124, + SEGMENT_SUM = 125, + BATCH_MATMUL = 126, + PLACEHOLDER_FOR_GREATER_OP_CODES = 127, + CUMSUM = 128, + CALL_ONCE = 129, + BROADCAST_TO = 130, + RFFT2D = 131, + CONV_3D = 132, + IMAG=133, + REAL=134, + COMPLEX_ABS=135, + HASHTABLE = 136, + HASHTABLE_FIND = 137, + HASHTABLE_IMPORT = 138, + HASHTABLE_SIZE = 139, + REDUCE_ALL = 140, + CONV_3D_TRANSPOSE = 141, + VAR_HANDLE = 142, + READ_VARIABLE = 143, + ASSIGN_VARIABLE = 144, + BROADCAST_ARGS = 145, + RANDOM_STANDARD_NORMAL = 146, + BUCKETIZE = 147, + RANDOM_UNIFORM = 148, + MULTINOMIAL = 149, + GELU = 150, + DYNAMIC_UPDATE_SLICE = 151, + RELU_0_TO_1 = 152, + UNSORTED_SEGMENT_PROD = 153, + UNSORTED_SEGMENT_MAX = 154, + UNSORTED_SEGMENT_SUM = 155, + ATAN2 = 156, + UNSORTED_SEGMENT_MIN = 157, + SIGN = 158, + BITCAST = 159, + BITWISE_XOR = 160, + RIGHT_SHIFT = 161, + // All Operators start with STABLEHLO_ prefixes are subject to change + // Many of the ops below can not be executed by runtime + STABLEHLO_LOGISTIC = 162, // WARNING: Do not have runtime support + STABLEHLO_ADD = 163, // WARNING: No runtime support yet + STABLEHLO_DIVIDE = 164, // WARNING: No runtime support yet + STABLEHLO_MULTIPLY = 165, // WARNING: No runtime support yet + STABLEHLO_MAXIMUM = 166, // WARNING: No runtime support yet + STABLEHLO_RESHAPE = 167, // WARNING: No runtime support yet + STABLEHLO_CLAMP = 168, // WARNING: No runtime support + STABLEHLO_CONCATENATE = 169, // WARNING: No runtime support + STABLEHLO_BROADCAST_IN_DIM = 170, // WARNING: No runtime support + STABLEHLO_CONVOLUTION = 171, // WARNING: No runtime support + STABLEHLO_SLICE = 172, // WARNING: No runtime support + STABLEHLO_CUSTOM_CALL = 173, // WARNING: No runtime support + STABLEHLO_REDUCE = 174, // WARNING: No runtime support + STABLEHLO_ABS = 175, // WARNING: No runtime support + STABLEHLO_AND = 176, // WARNING: No runtime support + STABLEHLO_COSINE = 177, // WARNING: No runtime support + STABLEHLO_EXPONENTIAL = 178, // WARNING: No runtime support + STABLEHLO_FLOOR = 179, // WARNING: No runtime support + STABLEHLO_LOG = 180, // WARNING: No runtime support + STABLEHLO_MINIMUM = 181, // WARNING: No runtime support + STABLEHLO_NEGATE = 182, // WARNING: No runtime support + STABLEHLO_OR = 183, // WARNING: No runtime support + STABLEHLO_POWER = 184, // WARNING: No runtime support + STABLEHLO_REMAINDER = 185, // WARNING: No runtime support + STABLEHLO_RSQRT = 186, // WARNING: No runtime support + STABLEHLO_SELECT = 187, // WARNING: No runtime support + STABLEHLO_SUBTRACT = 188, // WARNING: No runtime support + STABLEHLO_TANH = 189, // WARNING: No runtime support + STABLEHLO_SCATTER = 190, + STABLEHLO_COMPARE = 191, // WARNING: No runtime support + STABLEHLO_CONVERT = 192, // WARNING: No runtime support + STABLEHLO_DYNAMIC_SLICE = 193, // WARNING: No runtime support + STABLEHLO_DYNAMIC_UPDATE_SLICE = 194, // WARNING: No runtime support + STABLEHLO_PAD = 195, // WARNING: No runtime support + STABLEHLO_IOTA = 196, // WARNING: No runtime support + STABLEHLO_DOT_GENERAL = 197, // WARNING: No runtime support + STABLEHLO_REDUCE_WINDOW = 198, // WARNING: No runtime support + STABLEHLO_SORT = 199, // WARNING: No runtime support + STABLEHLO_WHILE = 200, // WARNING: No runtime support + STABLEHLO_GATHER = 201, // WARNING: No runtime support + STABLEHLO_TRANSPOSE = 202, // WARNING: No runtime support + DILATE = 203, + STABLEHLO_RNG_BIT_GENERATOR = 204, + REDUCE_WINDOW = 205, +} +// LINT.ThenChange(nnapi_linter/linter.proto) + +// Options for the builtin operators. +union BuiltinOptions { + Conv2DOptions, + DepthwiseConv2DOptions, + ConcatEmbeddingsOptions, + LSHProjectionOptions, + Pool2DOptions, + SVDFOptions, + RNNOptions, + FullyConnectedOptions, + SoftmaxOptions, + ConcatenationOptions, + AddOptions, + L2NormOptions, + LocalResponseNormalizationOptions, + LSTMOptions, + ResizeBilinearOptions, + CallOptions, + ReshapeOptions, + SkipGramOptions, + SpaceToDepthOptions, + EmbeddingLookupSparseOptions, + MulOptions, + PadOptions, + GatherOptions, + BatchToSpaceNDOptions, + SpaceToBatchNDOptions, + TransposeOptions, + ReducerOptions, + SubOptions, + DivOptions, + SqueezeOptions, + SequenceRNNOptions, + StridedSliceOptions, + ExpOptions, + TopKV2Options, + SplitOptions, + LogSoftmaxOptions, + CastOptions, + DequantizeOptions, + MaximumMinimumOptions, + ArgMaxOptions, + LessOptions, + NegOptions, + PadV2Options, + GreaterOptions, + GreaterEqualOptions, + LessEqualOptions, + SelectOptions, + SliceOptions, + TransposeConvOptions, + SparseToDenseOptions, + TileOptions, + ExpandDimsOptions, + EqualOptions, + NotEqualOptions, + ShapeOptions, + PowOptions, + ArgMinOptions, + FakeQuantOptions, + PackOptions, + LogicalOrOptions, + OneHotOptions, + LogicalAndOptions, + LogicalNotOptions, + UnpackOptions, + FloorDivOptions, + SquareOptions, + ZerosLikeOptions, + FillOptions, + BidirectionalSequenceLSTMOptions, + BidirectionalSequenceRNNOptions, + UnidirectionalSequenceLSTMOptions, + FloorModOptions, + RangeOptions, + ResizeNearestNeighborOptions, + LeakyReluOptions, + SquaredDifferenceOptions, + MirrorPadOptions, + AbsOptions, + SplitVOptions, + UniqueOptions, + ReverseV2Options, + AddNOptions, + GatherNdOptions, + CosOptions, + WhereOptions, + RankOptions, + ReverseSequenceOptions, + MatrixDiagOptions, + QuantizeOptions, + MatrixSetDiagOptions, + HardSwishOptions, + IfOptions, + WhileOptions, + DepthToSpaceOptions, + NonMaxSuppressionV4Options, + NonMaxSuppressionV5Options, + ScatterNdOptions, + SelectV2Options, + DensifyOptions, + SegmentSumOptions, + BatchMatMulOptions, + CumsumOptions, + CallOnceOptions, + BroadcastToOptions, + Rfft2dOptions, + Conv3DOptions, + HashtableOptions, + HashtableFindOptions, + HashtableImportOptions, + HashtableSizeOptions, + VarHandleOptions, + ReadVariableOptions, + AssignVariableOptions, + RandomOptions, + BucketizeOptions, + GeluOptions, + DynamicUpdateSliceOptions, + UnsortedSegmentProdOptions, + UnsortedSegmentMaxOptions, + UnsortedSegmentMinOptions, + UnsortedSegmentSumOptions, + ATan2Options, + SignOptions, + BitcastOptions, + BitwiseXorOptions, + RightShiftOptions, + GRUOptions = 251, + BCQGatherOptions = 252, + BCQFullyConnectedOptions = 253, + InstanceNormOptions = 254, +} + +union BuiltinOptions2{ + StablehloConcatenateOptions, + StablehloBroadcastInDimOptions, + StablehloSliceOptions, + StablehloConvolutionOptions, + StablehloCustomCallOptions, + StablehloReduceOptions, + StablehloScatterOptions, + StablehloCompareOptions, + StablehloDynamicSliceOptions, + StablehloPadOptions, + StablehloIotaOptions, + StablehloDotGeneralOptions, + StablehloReduceWindowOptions, + StablehloSortOptions, + StablehloWhileOptions, + StablehloGatherOptions, + StablehloTransposeOptions, + DilateOptions, + StablehloRngBitGeneratorOptions, + ReduceWindowOptions, +} + +table StablehloGatherOptions{ + offset_dims : [long]; + collapsed_slice_dims : [long]; + start_index_map : [long]; + index_vector_dim : long; + slice_sizes : [long]; + indices_are_sorted : bool; +} + +table StablehloTransposeOptions{ + permutation : [long]; +} + +enum StablehloPrecisionConfig : uint { + DEFAULT, + HIGH, + HIGHEST, +} + +table StablehloDotGeneralOptions{ + lhs_batching_dimensions : [long]; + rhs_batching_dimensions : [long]; + lhs_contracting_dimensions : [long]; + rhs_contracting_dimensions : [long]; + precision_config : [StablehloPrecisionConfig]; +} + +table StablehloReduceWindowOptions{ + window_dimensions : [long]; + window_strides : [long]; + base_dilations : [long]; + window_dilations : [long]; + padding : [long]; + body_subgraph_index : int; +} + +table StablehloWhileOptions{ + cond_subgraph_index : int; + body_subgraph_index : int; +} + +table StablehloSortOptions{ + dimension : long; + is_stable : bool; + comparator_subgraph_index : int; +} + +table StablehloConcatenateOptions { + dimension : long; +} + +table StablehloBroadcastInDimOptions{ + broadcast_dimensions : [long]; +} + +enum StablehloComparisonDirection : uint { + STABLEHLO_COMPARISON_DIRECTION_EQ, + STABLEHLO_COMPARISON_DIRECTION_NE, + STABLEHLO_COMPARISON_DIRECTION_GE, + STABLEHLO_COMPARISON_DIRECTION_GT, + STABLEHLO_COMPARISON_DIRECTION_LE, + STABLEHLO_COMPARISON_DIRECTION_LT, + +} + +enum StablehloComparisonType : uint { + STABLEHLO_COMPARISON_TYPE_NOTYPE, + STABLEHLO_COMPARISON_TYPE_FLOAT, + STABLEHLO_COMPARISON_TYPE_FLOAT_TOTAL_ORDER, + STABLEHLO_COMPARISON_TYPE_SIGNED, + STABLEHLO_COMPARISON_TYPE_UNSIGNED, +} + +table StablehloCompareOptions{ + comparison_direction : StablehloComparisonDirection; + compare_type : StablehloComparisonType; +} + +table StablehloDynamicSliceOptions{ + slice_sizes : [long]; +} + +table StablehloPadOptions{ + edge_padding_low : [long]; + edge_padding_high : [long]; + interior_padding : [long]; +} + +table StablehloIotaOptions{ + iota_dimension : long; +} + +table StablehloCustomCallOptions { + call_target_name : string; + has_side_effect : bool; + backend_config: string; + api_version : int; // will be decprecated + called_computations: [int]; // should point to subgraphs of the computations + custom_attributes : [ubyte]; +} + +table StablehloReduceOptions { + dimensions : [long]; + body_subgraph_index : int; +} + +table StablehloSliceOptions{ + start_indices : [long]; + limit_indices : [long]; + strides : [long]; +} + +table StablehloConvolutionOptions{ + window_strides : [long]; + padding : [long]; + lhs_dilation : [long]; + rhs_dilation : [long]; + window_reversal : [bool]; + input_batch_dimension : long; + input_feature_dimension : long; + input_spatial_dimensions : [long]; + kernel_input_feature_dimension : long; + kernel_output_feature_dimension : long; + kernel_spatial_dimensions : [long]; + output_batch_dimension : long; + output_feature_dimension : long; + output_spatial_dimensions : [long]; + feature_group_count : long; + batch_group_count : long; + precision_config : [StablehloPrecisionConfig]; +} + +table StablehloScatterOptions { + indices_are_sorted: bool; + update_window_dims: [long]; + inserted_window_dims: [long]; + scatter_dims_to_operand_dims: [long]; + index_vector_dim: long; + unique_indices: bool; + update_computation_subgraph_index: int; +} + +enum RngAlgorithm : byte { + // An algorithm auto-selected by the system according to device type. + DEFAULT = 0, + // The Philox algorithm, as described in paper + // ['Parallel Random Numbers: As Easy as 1, 2, 3'] + // (https://www.thesalmons.org/john/random123/papers/random123sc11.pdf) + PHILOX = 1, + // The ThreeFry algorithm, as described in paper + // ['Parallel Random Numbers: As Easy as 1, 2, 3'] + // (https://www.thesalmons.org/john/random123/papers/random123sc11.pdf) + THREEFRY = 2, +} + +table StablehloRngBitGeneratorOptions { + algorithm:RngAlgorithm; +} + +// LINT.IfChange +enum Padding : byte { SAME, VALID } +// LINT.ThenChange(//tensorflow/compiler/mlir/lite/ir/tfl_op_enums.td) + +// LINT.IfChange +enum ActivationFunctionType : byte { + NONE = 0, + RELU = 1, + RELU_N1_TO_1 = 2, + RELU6 = 3, + TANH = 4, + SIGN_BIT = 5, +} +// LINT.ThenChange(//tensorflow/compiler/mlir/lite/ir/tfl_op_enums.td) + +table Conv2DOptions { + padding:Padding; + stride_w:int; + stride_h:int; + fused_activation_function:ActivationFunctionType; + dilation_w_factor:int = 1; + dilation_h_factor:int = 1; + // Parameters for Conv2D version 8 or above. + // When set, quantized_bias_type defines the dtype for both bias and accumulator. + quantized_bias_type: TensorType; +} + +// Options for both Conv3D and Conv3DTranspose. +table Conv3DOptions { + padding:Padding; + stride_d:int; + stride_w:int; + stride_h:int; + fused_activation_function:ActivationFunctionType; + dilation_d_factor:int = 1; + dilation_w_factor:int = 1; + dilation_h_factor:int = 1; +} + +table Pool2DOptions { + padding:Padding; + stride_w:int; + stride_h:int; + filter_width:int; + filter_height:int; + fused_activation_function:ActivationFunctionType; +} + +table DepthwiseConv2DOptions { + // Parameters for DepthwiseConv version 1 or above. + padding:Padding; + stride_w:int; + stride_h:int; + // `depth_multiplier` is redundant. It's used by CPU kernels in + // TensorFlow 2.0 or below, but ignored in versions above. + // See comments in lite/c/builtin_op_data.h for more details. + depth_multiplier:int; + fused_activation_function:ActivationFunctionType; + // Parameters for DepthwiseConv version 2 or above. + dilation_w_factor:int = 1; + dilation_h_factor:int = 1; +} + +table ConcatEmbeddingsOptions { + num_channels:int; + num_columns_per_channel:[int]; + embedding_dim_per_channel:[int]; // This could be inferred from parameters. +} + +enum LSHProjectionType: byte { + UNKNOWN = 0, + SPARSE = 1, + DENSE = 2, +} + +table LSHProjectionOptions { + type: LSHProjectionType; +} + +table SVDFOptions { + rank:int; + fused_activation_function:ActivationFunctionType; + // For weights-only quantization, use asymmetric quantization for non + // constant inputs at evaluation time. + asymmetric_quantize_inputs:bool; +} + +// An implementation of TensorFlow RNNCell. +table RNNOptions { + fused_activation_function:ActivationFunctionType; + asymmetric_quantize_inputs:bool; +} + +// An implementation of TensorFlow dynamic_rnn with RNNCell. +table SequenceRNNOptions { + time_major:bool; + fused_activation_function:ActivationFunctionType; + asymmetric_quantize_inputs:bool; +} + +// An implementation of TensorFlow bidrectional_dynamic_rnn with RNNCell. +table BidirectionalSequenceRNNOptions { + time_major:bool; + fused_activation_function:ActivationFunctionType; + merge_outputs: bool; + asymmetric_quantize_inputs:bool; +} + +// LINT.IfChange +enum FullyConnectedOptionsWeightsFormat: byte { + DEFAULT = 0, + SHUFFLED4x16INT8 = 1, + SHUFFLED16x1FLOAT32 = 127 +} +// LINT.ThenChange(//tensorflow/compiler/mlir/lite/ir/tfl_op_enums.td) + +// An implementation of TensorFlow fully_connected (a.k.a Dense) layer. +table FullyConnectedOptions { + // Parameters for FullyConnected version 1 or above. + fused_activation_function:ActivationFunctionType; + + // Parameters for FullyConnected version 2 or above. + weights_format:FullyConnectedOptionsWeightsFormat = DEFAULT; + + // Parameters for FullyConnected version 5 or above. + // If set to true, then the number of dimension is preserved. Furthermore, + // all but the last dimension of the input and output shapes will be equal. + keep_num_dims: bool; + + // Parameters for FullyConnected version 7 or above. + // If set to true, then weights-only op will use asymmetric quantization for + // inputs. + asymmetric_quantize_inputs: bool; + + // Parameters for FullyConnected version 11 or above. + // When set, quantized_bias_type defines the dtype for both bias and accumulator. + quantized_bias_type: TensorType; +} + +table SoftmaxOptions { + beta: float; +} + +// An implementation of TensorFlow concat. +table ConcatenationOptions { + axis:int; + fused_activation_function:ActivationFunctionType; +} + +table AddOptions { + fused_activation_function:ActivationFunctionType; + // Parameters supported by version 3. + pot_scale_int16:bool = true; +} + +table MulOptions { + fused_activation_function:ActivationFunctionType; +} + +table L2NormOptions { + // This field is currently ignored in the L2 Norm Op. + fused_activation_function:ActivationFunctionType; +} + +table LocalResponseNormalizationOptions { + radius:int; + bias:float; + alpha:float; + beta:float; +} + +// LINT.IfChange +enum LSTMKernelType : byte { + // Full LSTM kernel which supports peephole and projection. + FULL = 0, + // Basic LSTM kernels. Equivalent to TensorFlow BasicLSTMCell. + BASIC = 1, +} +// LINT.ThenChange(//tensorflow/compiler/mlir/lite/ir/tfl_op_enums.td) + +// An implementation of TensorFlow LSTMCell and CoupledInputForgetGateLSTMCell +table LSTMOptions { + // Parameters for LSTM version 1 or above. + fused_activation_function:ActivationFunctionType; + cell_clip: float; // Optional, 0.0 means no clipping + proj_clip: float; // Optional, 0.0 means no clipping + + // Parameters for LSTM version 2 or above. + // Basic kernel is only supported in version 2 or above. + kernel_type: LSTMKernelType = FULL; + + // Parameters for LSTM version 4 or above. + asymmetric_quantize_inputs: bool; +} + +// An implementation of TensorFlow dynamic_rnn with LSTMCell. +table UnidirectionalSequenceLSTMOptions { + fused_activation_function:ActivationFunctionType; + cell_clip: float; // Optional, 0.0 means no clipping + proj_clip: float; // Optional, 0.0 means no clipping + + // If true then first dimension is sequence, otherwise batch. + time_major:bool; + + // Parameter for Unidirectional Sequence LSTM version 3. + asymmetric_quantize_inputs:bool; + + // Parameter for unidirectional sequence RNN version 4. + diagonal_recurrent_tensors:bool; +} + +table BidirectionalSequenceLSTMOptions { + // Parameters supported by version 1: + fused_activation_function:ActivationFunctionType; + cell_clip: float; // Optional, 0.0 means no clipping + proj_clip: float; // Optional, 0.0 means no clipping + + // If true, store the outputs of both directions into the first output. + merge_outputs: bool; + + // Parameters supported by version 2: + // If true then first dimension is sequence, otherwise batch. + // Version 1 implementations assumed time_major to be true, so this default + // value should never change. + time_major: bool = true; + + // Parameters for version 3 or above. + asymmetric_quantize_inputs:bool; +} + +table ResizeBilinearOptions { + new_height: int (deprecated); + new_width: int (deprecated); + align_corners: bool; + half_pixel_centers: bool; +} + +table ResizeNearestNeighborOptions { + align_corners: bool; + half_pixel_centers: bool; +} + +// A call operation options +table CallOptions { + // The subgraph index that needs to be called. + subgraph:uint; +} + +table PadOptions { +} + +table PadV2Options { +} + +table ReshapeOptions { + new_shape:[int]; +} + +table SpaceToBatchNDOptions { +} + +table BatchToSpaceNDOptions { +} + +table SkipGramOptions { + ngram_size: int; + max_skip_size: int; + include_all_ngrams: bool; +} + +table SpaceToDepthOptions { + block_size: int; +} + +table DepthToSpaceOptions { + block_size: int; +} + +table SubOptions { + fused_activation_function:ActivationFunctionType; + // Parameters supported by version 5 + pot_scale_int16:bool = true; +} + +table DivOptions { + fused_activation_function:ActivationFunctionType; +} + +table TopKV2Options { +} + +enum CombinerType : byte { + SUM = 0, + MEAN = 1, + SQRTN = 2, +} + +table EmbeddingLookupSparseOptions { + combiner:CombinerType; +} + +table GatherOptions { + axis: int; + // Parameters for Gather version 5 or above. + batch_dims: int = 0; +} + +table TransposeOptions { +} + +table ExpOptions { +} + +table CosOptions { +} + +table ReducerOptions { + keep_dims: bool; +} + +table SqueezeOptions { + squeeze_dims:[int]; +} + +table SplitOptions { + num_splits: int; +} + +table SplitVOptions { + num_splits: int; +} + +table StridedSliceOptions { + begin_mask: int; + end_mask: int; + ellipsis_mask: int; + new_axis_mask: int; + shrink_axis_mask: int; + // If true, then the end tensor is an offset of the begin tensor. + offset: bool; +} + +table LogSoftmaxOptions { +} + +table CastOptions { + in_data_type: TensorType; + out_data_type: TensorType; +} + +table DequantizeOptions { +} + +table MaximumMinimumOptions { +} + +table TileOptions { +} + +table ArgMaxOptions { + output_type : TensorType; +} + +table ArgMinOptions { + output_type : TensorType; +} + +table GreaterOptions { +} + +table GreaterEqualOptions { +} + +table LessOptions { +} + +table LessEqualOptions { +} + +table NegOptions { +} + +table SelectOptions { +} + +table SliceOptions { +} + +table TransposeConvOptions { + // Parameters supported by version 1, 2, 3: + padding:Padding; + stride_w:int; + stride_h:int; + + // Parameters supported by version 4: + fused_activation_function:ActivationFunctionType = NONE; + + // Parameters for TransposeConv version 5 or above. + // If set, use this for bias and accumulator. + // When set, quantized_bias_type defines the dtype for both bias and accumulator. + quantized_bias_type: TensorType; +} + +table ExpandDimsOptions { +} + +table SparseToDenseOptions { + validate_indices:bool; +} + +table EqualOptions { +} + +table NotEqualOptions { +} + +table ShapeOptions { + // Optional output type of the operation (int32 or int64). Defaults to int32. + out_type : TensorType; +} + +table RankOptions { +} + +table PowOptions { +} + +table FakeQuantOptions { + // Parameters supported by version 1: + min:float; + max:float; + num_bits:int; + + // Parameters supported by version 2: + narrow_range:bool; +} + +table PackOptions { + values_count:int; + axis:int; +} + +table LogicalOrOptions { +} + +table OneHotOptions { + axis:int; +} + +table AbsOptions { +} + + +table HardSwishOptions { +} + +table LogicalAndOptions { +} + +table LogicalNotOptions { +} + +table UnpackOptions { + num:int; + axis:int; +} + +table FloorDivOptions { +} + +table SquareOptions { +} + +table ZerosLikeOptions { +} + +table FillOptions { +} + +table FloorModOptions { +} + +table RangeOptions { +} + +table LeakyReluOptions { + alpha:float; +} + +table SquaredDifferenceOptions { +} + +// LINT.IfChange +enum MirrorPadMode : byte { + // Doesn't include borders. + REFLECT = 0, + // Includes borders. + SYMMETRIC = 1, +} +// LINT.ThenChange(//tensorflow/compiler/mlir/lite/ir/tfl_op_enums.td) + +table MirrorPadOptions { + mode:MirrorPadMode; +} + +table UniqueOptions { + idx_out_type:TensorType = INT32; +} + +table ReverseV2Options { +} + +table AddNOptions { +} + +table GatherNdOptions { +} + +table WhereOptions { +} + +table ReverseSequenceOptions { + seq_dim:int; + batch_dim:int = 0; +} + +table MatrixDiagOptions { +} + +table QuantizeOptions { +} + +table MatrixSetDiagOptions { +} + +table IfOptions { + then_subgraph_index:int; + else_subgraph_index:int; +} + +table CallOnceOptions { + init_subgraph_index:int; +} + +table WhileOptions { + cond_subgraph_index:int; + body_subgraph_index:int; +} + +table NonMaxSuppressionV4Options { +} + +table NonMaxSuppressionV5Options { +} + +table ScatterNdOptions { +} + +table SelectV2Options { +} + +table DensifyOptions { +} + +table SegmentSumOptions { +} + +table BatchMatMulOptions { + adjoint_lhs:bool; + adjoint_rhs:bool; + // Parameters for BatchMatMul version 4 or above. + // If set to true, then weights-only op will use asymmetric quantization for + // inputs. + asymmetric_quantize_inputs: bool; +} + +table CumsumOptions { + exclusive:bool; + reverse:bool; +} + +table BroadcastToOptions { +} + +table Rfft2dOptions { +} + +table HashtableOptions { + // The identity of hash tables. This identity will be used across different + // subgraphs in the same interpreter instance. + table_id:int; + key_dtype:TensorType; + value_dtype:TensorType; +} + +table HashtableFindOptions { +} + +table HashtableImportOptions { +} + +table HashtableSizeOptions { +} + +table VarHandleOptions { + container:string; + shared_name:string; +} + +table ReadVariableOptions { +} + +table AssignVariableOptions { +} + +table RandomOptions { + seed: long; + seed2: long; +} + +table BucketizeOptions { + boundaries: [float]; // The bucket boundaries. +} + +table GeluOptions { + approximate: bool; +} + +table DynamicUpdateSliceOptions { +} + +table UnsortedSegmentProdOptions { +} + +table UnsortedSegmentMaxOptions { +} + +table UnsortedSegmentSumOptions { +} + +table ATan2Options { +} + +table UnsortedSegmentMinOptions{ +} + +table SignOptions { +} + +table BitcastOptions { +} + +table BitwiseXorOptions { +} + +table RightShiftOptions { +} + +table DilateOptions { +} + +enum ReduceWindowFunction : int { + UNSUPPORTED, + ADD, + MUL, + MINIMUM, + MAXIMUM, + ALL, + ANY, +} + +table ReduceWindowOptions{ + reduce_function: ReduceWindowFunction; +} + +table GRUOptions { + fused_activation_function:ActivationFunctionType; + return_sequences : bool; + time_major : bool; +} + +table BCQGatherOptions { + input_hidden_size: int; + axis: int; +} + +table BCQFullyConnectedOptions { + weights_hidden_size: int; + fused_activation_function:ActivationFunctionType; +} + +table InstanceNormOptions { + epsilon:float; + fused_activation_function:ActivationFunctionType; +} + +// An OperatorCode can be an enum value (BuiltinOperator) if the operator is a +// builtin, or a string if the operator is custom. +table OperatorCode { + // This field is for backward compatibility. This field will be used when + // the value of the extended builtin_code field has less than + // BulitinOperator_PLACEHOLDER_FOR_GREATER_OP_CODES. + deprecated_builtin_code:byte; + custom_code:string; + + // The version of the operator. The version need to be bumped whenever new + // parameters are introduced into an op. + version:int = 1; + + // This field is introduced for resolving op builtin code shortage problem + // (the original BuiltinOperator enum field was represented as a byte). + // This field will be used when the value of the extended builtin_code field + // has greater than BulitinOperator_PLACEHOLDER_FOR_GREATER_OP_CODES. + builtin_code:BuiltinOperator; +} + +enum CustomOptionsFormat : byte { + FLEXBUFFERS = 0, +} + +enum DataFormat : byte { + // For 2D data, NHWC(batch, height, width, channels) + // For 3D data, NDHWC(batch, depth, height, width, channels) + CHANNELS_LAST = 0, + // For 2D data, NCHW(batch, channels, height, width) + // For 3D data, NCDHW(batch, channels, depth, height, width) + CHANNELS_FIRST = 1, +} + +// An operator takes tensors as inputs and outputs. The type of operation being +// performed is determined by an index into the list of valid OperatorCodes, +// while the specifics of each operations is configured using builtin_options +// or custom_options. +table Operator { + // Index into the operator_codes array. Using an integer here avoids + // complicate map lookups. + opcode_index:uint; + + // Optional input are indicated by -1. + inputs:[int]; + outputs:[int]; + + builtin_options:BuiltinOptions; + custom_options:[ubyte]; + custom_options_format:CustomOptionsFormat; + + // A list of booleans indicating the input tensors which are being mutated by + // this operator.(e.g. used by RNN and LSTM). + // For example, if the "inputs" array refers to 5 tensors and the second and + // fifth are mutable variables, then this list will contain + // [false, true, false, false, true]. + // + // If the list is empty, no variable is mutated in this operator. + // The list either has the same length as `inputs`, or is empty. + mutating_variable_inputs:[bool]; + + // A list of indices to the subgraph's "tensors" that are internal to an Op. + // Internal tensors are those that do not flow in or out of the operation, + // but instead are part of internal computation. As such, the operation's + // implementation may manage its memory more efficiently. They are needed + // however (i.e. not just an implementation detail) since they are part of the + // computation, which may require relevant metadata such as quantization + // parameters. + intermediates:[int]; + + // When an op is using custom_options in a model that is larger than 2GB, then + // we instead use the following attributes to find the buffer location which + // is stored outside of flatbuffers, the offset is calculated relative to the + // beginning of the file and is only valid if > 1 + large_custom_options_offset: ulong; + large_custom_options_size: ulong; + + // Flatbuffers union struct has a 128 elements limit in JAVA, so a second + // union is added, in the case of where BuitlinOptions2 runs out, a third + // one can be added + builtin_options_2 : BuiltinOptions2; +} + +// The root type, defining a subgraph, which typically represents an entire +// model. +table SubGraph { + // A list of all tensors used in this subgraph. + tensors:[Tensor]; + + // Indices of the tensors that are inputs into this subgraph. Note this is + // the list of non-static tensors that feed into the subgraph for inference. + inputs:[int]; + + // Indices of the tensors that are outputs out of this subgraph. Note this is + // the list of output tensors that are considered the product of the + // subgraph's inference. + outputs:[int]; + + // All operators, in execution order. + operators:[Operator]; + + // Name of this subgraph (used for debugging). + name:string; + + // Data format for input/output of SubGraph, deprecated + deprecated_data_format: DataFormat (deprecated); +} + +// Table of raw data buffers (used for constant tensors). Referenced by tensors +// by index. The generous alignment accommodates mmap-friendly data structures. +table Buffer { + data:[ubyte] (force_align: 16); + + // In a model that is larger than 2GB, then buffers instead uses the following + // attributes to find stored data, which is outside of flatbuffers + // the offset is calculated relative to the beginning of the file and is only + // valid if > 1. + offset: ulong; + size: ulong; +} + +table Metadata { + // A human readable string to uniquely identify a Metadata. + name:string; + // An index to the buffers table. + buffer:uint; +} + +// Map from an alias name of tensor to tensor index in the graph. +// This is used in Signature def. +table TensorMap { + // Represents the alias to use for this tensor. + name:string; + + // The actual tensor index in the primary graph, that 'name' corresponds to. + tensor_index:uint; +} + +// This corresponds to SignatureDef in Tensorflow SavedModel. +// The SignatureDef will be part of the SavedModel provided for conversion. +table SignatureDef { + // Named inputs for this signature. + inputs:[TensorMap]; + + // Named outputs for this signature. + outputs:[TensorMap]; + + // Key value which was in the Tensorflow SavedModel SignatureDef map. + signature_key:string; + + // Model tag, deprecated. + deprecated_tag:string (deprecated); + + // Index of subgraphs that corresponds to the exported method. + subgraph_index:uint; +} + +table Model { + // Version of the schema. + version:uint; + + // A list of all operator codes used in this model. This is + // kept in order because operators carry an index into this + // vector. + operator_codes:[OperatorCode]; + + // All the subgraphs of the model. The 0th is assumed to be the main + // model. + subgraphs:[SubGraph]; + + // A description of the model. + description:string; + + // Buffers of the model. + // Note the 0th entry of this array must be an empty buffer (sentinel). + // This is a convention so that tensors without a buffer can provide 0 as + // their buffer. + buffers:[Buffer]; + + // Metadata about the model. Indirects into the existings buffers list. + // Deprecated, prefer to use metadata field. + metadata_buffer:[int]; + + // Metadata about the model. + metadata:[Metadata]; + + // Optional SignatureDefs for the model. + signature_defs:[SignatureDef]; +} + +root_type Model;