From c852b17b1a430a71671c2a64875cc9e0b5fa4c63 Mon Sep 17 00:00:00 2001
From: cyLi-Tiger <2017202049@ruc.edu.cn>
Date: Wed, 25 Oct 2023 23:09:59 +0800
Subject: [PATCH 1/3] update prepare.sh

---
 third_party/prepare.sh | 51 ++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 51 insertions(+)

diff --git a/third_party/prepare.sh b/third_party/prepare.sh
index 276f3f7c..41def411 100755
--- a/third_party/prepare.sh
+++ b/third_party/prepare.sh
@@ -94,3 +94,54 @@ if [ ! -e $SRC_DIR/flatcc/bin/flatcc ];then
     flatcc_build=$SRC_DIR/flatcc/build
     build_flatcc $flatcc_dir $flatcc_build
 fi
+
+function build_protobuf() {
+    cd $1
+    git checkout v3.20.2
+    rm -rf $2
+    mkdir -p $2
+    cd $2
+    cmake $1/cmake \
+    -Dprotobuf_BUILD_SHARED_LIBS=OFF \
+    -DCMAKE_INSTALL_PREFIX=$3 \
+    -DCMAKE_INSTALL_SYSCONFDIR=/etc \
+    -DCMAKE_POSITION_INDEPENDENT_CODE=ON \
+    -Dprotobuf_BUILD_TESTS=OFF \
+    -DCMAKE_BUILD_TYPE=Release
+    make -j32
+    make install
+}
+
+echo "begin to build protobuf"
+cd $SRC_DIR
+git submodule update -f --init protobuf
+protobuf_dir=$SRC_DIR/protobuf
+protobuf_build=$SRC_DIR/protobuf/build
+protobuf_install=$SRC_DIR/protobuf/install
+build_protobuf $protobuf_dir $protobuf_build $protobuf_install
+
+export PATH=$SRC_DIR/protobuf/install/bin:$PATH
+export CMAKE_PREFIX_PATH=$SRC_DIR/protobuf/install/lib:$CMAKE_PREFIX_PATH
+python3_path=$(which "python3")
+function build_onnx() {
+    cd $1 
+    git checkout 7f0a6331
+    rm -rf $2
+    mkdir -p $2
+    cd $2
+    cmake -DCMAKE_INSTALL_PREFIX=$3\
+          -DONNX_USE_LITE_PROTO=ON\
+          -DPYTHON_EXECUTABLE=$python3_path\
+          -G Ninja \
+        $1
+
+    ninja install
+}
+
+echo "begin to build onnx"
+cd $SRC_DIR
+git submodule update -f --init onnx
+onnx_dir=$SRC_DIR/onnx
+onnx_build=$SRC_DIR/onnx/build
+onnx_install=$SRC_DIR/onnx/install
+build_onnx $onnx_dir $onnx_build $onnx_install

From f18ba3929ed466bc49f658ab2a02e0bc88155033 Mon Sep 17 00:00:00 2001
From: cyLi-Tiger <2017202049@ruc.edu.cn>
Date: Sat, 28 Oct 2023 17:03:07 +0800
Subject: [PATCH 2/3] add onnx

---
 .gitmodules                                   |   6 +
 compiler/CMakeLists.txt                       |  24 +
 .../include/compiler/Target/onnx/helper.h     |  60 ++
 .../include/compiler/Target/onnx/import.h     |  21 +
 compiler/lib/Target/CMakeLists.txt            |   1 +
 compiler/lib/Target/onnx/CMakeLists.txt       |  22 +
 compiler/lib/Target/onnx/importer.cpp         | 586 ++++++++++++++++++
 compiler/tools/CMakeLists.txt                 |   2 +
 compiler/tools/onnx-importer/CMakeLists.txt   |   7 +
 .../tools/onnx-importer/onnx-importer.cpp     |  42 ++
 compiler/tools/onnx-to-tinynn/CMakeLists.txt  |  20 +
 .../tools/onnx-to-tinynn/onnx-to-tinynn.cpp   | 430 +++++++++++++
 third_party/onnx                              |   1 +
 third_party/protobuf                          |   1 +
 14 files changed, 1223 insertions(+)
 create mode 100644 compiler/include/compiler/Target/onnx/helper.h
 create mode 100644 compiler/include/compiler/Target/onnx/import.h
 create mode 100644 compiler/lib/Target/onnx/CMakeLists.txt
 create mode 100644 compiler/lib/Target/onnx/importer.cpp
 create mode 100644 compiler/tools/onnx-importer/CMakeLists.txt
 create mode 100644 compiler/tools/onnx-importer/onnx-importer.cpp
 create mode 100644 compiler/tools/onnx-to-tinynn/CMakeLists.txt
 create mode 100644 compiler/tools/onnx-to-tinynn/onnx-to-tinynn.cpp
 create mode 160000 third_party/onnx
 create mode 160000 third_party/protobuf

diff --git a/.gitmodules b/.gitmodules
index 1de0b0ae..3e82bc97 100644
--- a/.gitmodules
+++ b/.gitmodules
@@ -13,3 +13,9 @@
 [submodule "third_party/llvm-project"]
 	path = third_party/llvm-project
 	url = https://github.com/llvm/llvm-project.git
+[submodule "third_party/onnx"]
+	path = third_party/onnx
+	url = https://github.com/onnx/onnx.git
+[submodule "third_party/protobuf"]
+	path = third_party/protobuf
+	url = https://github.com/protocolbuffers/protobuf.git
diff --git a/compiler/CMakeLists.txt b/compiler/CMakeLists.txt
index fbaa77fa..b2342d6c 100644
--- a/compiler/CMakeLists.txt
+++ b/compiler/CMakeLists.txt
@@ -109,6 +109,30 @@ add_library(mgb_imported INTERFACE)
 target_link_libraries(mgb_imported INTERFACE ${MGB_INSTALL_LIB})
 target_include_directories(mgb_imported INTERFACE ${MGB_INCLUDE_DIR})
 
+find_library(
+  ONNX_INSTALL_LIB
+  NAMES libonnx.a
+  PATHS ${PROJECT_SOURCE_DIR}/../third_party/onnx/install/lib/ REQUIRED)
+find_library(
+  ONNX_PROTO_INSTALL_LIB
+  NAMES libonnx_proto.a
+  PATHS ${PROJECT_SOURCE_DIR}/../third_party/onnx/install/lib/ REQUIRED)
+
+list(APPEND ONNX_LIBS -Wl,--whole-archive ${ONNX_PROTO_INSTALL_LIB}
+     -Wl,--no-whole-archive)
+list(APPEND ONNX_LIBS ${ONNX_INSTALL_LIB})
+set(ONNX_INCLUDE_DIR ${PROJECT_SOURCE_DIR}/../third_party/onnx/install/include)
+
+list(APPEND PROTOBUF_LIBS
+     ${PROJECT_SOURCE_DIR}/../third_party/protobuf/install/lib/libprotobuf.a
+     ${PROJECT_SOURCE_DIR}/../third_party/protobuf/install/lib/libprotoc.a)
+set(PROTOBUF_INCLUDE_DIR ${PROJECT_SOURCE_DIR}/../third_party/protobuf/install/include)
+
+add_library(onnx_imported INTERFACE)
+target_link_libraries(onnx_imported INTERFACE ${ONNX_LIB} ${PROTOBUF_LIBS})
+target_include_directories(onnx_imported INTERFACE ${ONNX_INCLUDE_DIR}
+                                                   ${PROTOBUF_INCLUDE_DIR})
+
 if(APPLE)
   if(${CMAKE_HOST_SYSTEM_PROCESSOR} MATCHES arm64)
     set(TCC_INSTALL_LIB
diff --git a/compiler/include/compiler/Target/onnx/helper.h b/compiler/include/compiler/Target/onnx/helper.h
new file mode 100644
index 00000000..435c3b38
--- /dev/null
+++ b/compiler/include/compiler/Target/onnx/helper.h
@@ -0,0 +1,60 @@
+#pragma once
+
+#include <map>
+#include <vector>
+
+#include "compiler/Common/Logger.h"
+#include "compiler/Common/MemoryStatus.h"
+
+#include "megdnn/basic_types.h"
+#include "mlir/IR/AffineMap.h"
+#include "mlir/IR/BuiltinTypes.h"
+#include "mlir/IR/MLIRContext.h"
+
+#include "llvm/Support/raw_ostream.h"
+
+#include "compiler/Dialect/MGB/IR/MGBDialect.h"
+
+#include "mlir/Dialect/StandardOps/IR/Ops.h"
+#include "onnx/common/ir.h"
+
+namespace mlir {
+namespace ONNX {
+static inline mlir::Type elemTypeToType(
+        mlir::MLIRContext* context, const int32_t& elem_type) {
+    switch (elem_type) {
+        case ONNX_NAMESPACE::TensorProto_DataType_FLOAT:
+            return mlir::FloatType::getF32(context);
+        case ONNX_NAMESPACE::TensorProto_DataType_UINT8:
+            return mlir::IntegerType::get(context, 8, mlir::IntegerType::Unsigned);
+        case ONNX_NAMESPACE::TensorProto_DataType_INT32:
+            return mlir::IntegerType::get(context, 32, mlir::IntegerType::Signed);
+        default:
+            CC_ABORT << "Unsupported dtype " << elem_type << "\n";
+            break;
+    }
+    return mlir::Type();
+}
+
+static inline mlir::ShapedType valueToShapedType(
+        mlir::MLIRContext* context, ONNX_NAMESPACE::Value* value) {
+    std::vector<int64_t> dims;
+    for (auto dim : value->sizes()) {
+        dims.emplace_back(dim.dim);
+    }
+    LOG_DEBUG << "Create RankedTensorType in Value with shape= " << dims << "\n";
+    mlir::ShapedType res;
+    if (dims.size() > 0) {
+        res = mlir::RankedTensorType::get(
+                dims, elemTypeToType(context, value->elemType()));
+    } else {
+        LOG_WARN << "Shape is unknown, compiler just make 1 dim dynamic tensor "
+                    "type\n";
+        res = mlir::RankedTensorType::get(
+                {-1}, elemTypeToType(context, value->elemType()));
+    }
+    return res;
+}
+
+}  // namespace ONNX
+}  // namespace mlir
\ No newline at end of file
diff --git a/compiler/include/compiler/Target/onnx/import.h b/compiler/include/compiler/Target/onnx/import.h
new file mode 100644
index 00000000..587e5bfc
--- /dev/null
+++ b/compiler/include/compiler/Target/onnx/import.h
@@ -0,0 +1,21 @@
+#pragma once
+
+#include <map>
+#include <vector>
+
+#include "megdnn/basic_types.h"
+#include "mlir/IR/BuiltinOps.h"
+
+namespace mlir {
+namespace ONNX {
+
+struct ONNXImporterOptions {
+    std::string module_name;
+    std::string model_path;
+    std::string input_shape_str;
+};
+
+mlir::LogicalResult import_onnx(mlir::ModuleOp module, std::string model_path);
+
+}  // namespace ONNX
+}  // namespace mlir
\ No newline at end of file
diff --git a/compiler/lib/Target/CMakeLists.txt b/compiler/lib/Target/CMakeLists.txt
index 915227bc..bf72bb6e 100644
--- a/compiler/lib/Target/CMakeLists.txt
+++ b/compiler/lib/Target/CMakeLists.txt
@@ -1,3 +1,4 @@
 add_subdirectory(MGB)
 add_subdirectory(TinyNN)
 add_subdirectory(Hako)
+add_subdirectory(onnx)
\ No newline at end of file
diff --git a/compiler/lib/Target/onnx/CMakeLists.txt b/compiler/lib/Target/onnx/CMakeLists.txt
new file mode 100644
index 00000000..cffda0d0
--- /dev/null
+++ b/compiler/lib/Target/onnx/CMakeLists.txt
@@ -0,0 +1,22 @@
+set(LLVM_OPTIONAL_SOURCES onnx_importer.cpp)
+
+set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DONNX_ML=1 -DONNX_NAMESPACE=onnx")
+add_compile_definitions("ONNX_NO_EXCEPTIONS")
+
+add_mlir_translation_library(
+  MLIRONNXImporter
+  importer.cpp
+  DEPENDS
+  MLIRMGBIncGen
+  LINK_LIBS
+  PUBLIC
+  MLIRIR
+  MLIRMGB
+  MLIRStandard)
+# detail obj library created in llvm_add_library
+target_include_directories(
+  obj.MLIRONNXImporter PRIVATE ${MGB_INCLUDE_DIR} ${ONNX_INCLUDE_DIR}
+                               ${PROTOBUF_INCLUDE_DIR})
+# add onnx-imported
+target_link_libraries(MLIRONNXImporter PUBLIC $<BUILD_INTERFACE:onnx_imported>)
+# target_compile_options(MLIRONNXImporter PUBLIC -fexceptions)
\ No newline at end of file
diff --git a/compiler/lib/Target/onnx/importer.cpp b/compiler/lib/Target/onnx/importer.cpp
new file mode 100644
index 00000000..2b6938d6
--- /dev/null
+++ b/compiler/lib/Target/onnx/importer.cpp
@@ -0,0 +1,586 @@
+#include "llvm/ADT/StringExtras.h"
+#include "mlir/Dialect/StandardOps/IR/Ops.h"
+#include "mlir/IR/Verifier.h"
+#include "mlir/Pass/PassManager.h"
+#include "mlir/Transforms/Passes.h"
+
+#include <onnx/onnx_pb.h>
+#include "compiler/Common/Logger.h"
+#include "compiler/Common/MemoryStatus.h"
+#include "compiler/Dialect/MGB/IR/MGBDialect.h"
+#include "compiler/Target/onnx/helper.h"
+#include "compiler/Target/onnx/import.h"
+#include "onnx/common/common.h"
+#include "onnx/common/file_utils.h"
+#include "onnx/common/ir.h"
+#include "onnx/common/ir_pb_converter.h"
+#include "onnx/shape_inference/implementation.h"
+
+namespace mlir {
+namespace ONNX {
+class ONNXImporter {
+public:
+    ONNXImporter(mlir::ModuleOp mod)
+            : m_module(mod), m_context(m_module->getContext()), m_builder(m_context) {
+        m_context->loadDialect<mlir::MGB::MGBDialect>();
+        m_context->loadDialect<mlir::StandardOpsDialect>();
+    }
+    ~ONNXImporter() {}
+
+    mlir::LogicalResult import_onnx(std::string model_path) {
+        OpBuilder::InsertionGuard _(m_builder);
+
+        ONNX_NAMESPACE::ModelProto model;
+        ONNX_NAMESPACE::LoadProtoFromPath<ONNX_NAMESPACE::ModelProto>(
+                model_path, model);
+        // comment: the shape of conv pads must be [x1_begin, x2_begin...x1_end, x2_end]
+        // or it can't be inferred by ONNX_NAMESPACE::shape_inference::InferShapes
+        ONNX_NAMESPACE::shape_inference::InferShapes(model);
+
+        std::unique_ptr<ONNX_NAMESPACE::Graph> g(
+                ONNX_NAMESPACE::ImportModelProto(model));
+
+        m_builder.setInsertionPointToEnd(m_module.getBody());
+        auto func = m_builder.create<mlir::FuncOp>(
+                m_builder.getUnknownLoc(), g->name(),
+                get_func_type(g->inputs(), g->outputs()));
+        mlir::Block* entryBlock = func.addEntryBlock();
+        m_builder.setInsertionPointToStart(entryBlock);
+
+        // deal with inputs
+        for (int i = 0; i < g->inputs().size(); ++i) {
+            std::string name = g->inputs()[i]->uniqueName();
+            m_value2value[g->inputs()[i]] = entryBlock->getArgument(i);
+            func.setArgAttr(i, "mgb.func_arg_name", m_builder.getStringAttr(name));
+        }
+
+        // build a map for initializer to get its value* by name
+        std::unordered_map<std::string, ONNX_NAMESPACE::Value*> init_map;
+        for (auto node : g->nodes()) {
+            auto inputs = node->inputs();
+            auto outputs = node->outputs();
+            for (ONNX_NAMESPACE::Value* input : inputs)
+                init_map.emplace(input->uniqueName(), input);
+            for (ONNX_NAMESPACE::Value* output : outputs)
+                init_map.emplace(output->uniqueName(), output);
+        }
+
+        std::unordered_map<ONNX_NAMESPACE::Value*, ONNX_NAMESPACE::Tensor> tensor_map;
+        // save initializers as ParamStorage and load by ParamProvider
+        int size = g->initializers().size();
+        for (int i = 0; i < size; ++i) {
+            std::string initializer_name = g->initializer_names()[i];
+            ONNX_NAMESPACE::Tensor initializer = g->initializers()[i];
+            ONNX_NAMESPACE::Value* init_value = init_map.at(initializer_name);
+            tensor_map.emplace(init_value, initializer);
+            auto storage = create_param_storage(initializer, init_value);
+            mlir::Value value = m_builder.create<mlir::MGB::ParamProvider>(
+                    m_builder.getUnknownLoc(), storage);
+            m_value2value[init_value] = value;
+        }
+
+        // elemwiseMap maps elemwise opr in onnx to elemwise mode in mgb
+        std::unordered_map<ONNX_NAMESPACE::BuiltinSymbol, megdnn::param::Elemwise::Mode>
+                elemwiseMap;
+        elemwiseMap.emplace(
+                ONNX_NAMESPACE::BuiltinSymbol::kAdd,
+                megdnn::param::Elemwise::Mode::ADD);
+        elemwiseMap.emplace(
+                ONNX_NAMESPACE::BuiltinSymbol::kSigmoid,
+                megdnn::param::Elemwise::Mode::SIGMOID);
+        elemwiseMap.emplace(
+                ONNX_NAMESPACE::BuiltinSymbol::kMul,
+                megdnn::param::Elemwise::Mode::MUL);
+
+        // deal with oprs
+        for (auto node : g->nodes()) {
+            LOG_DEBUG << node->kind().toString() << "\n";
+            if (!strcmp(node->kind().toString(), "Add")) {
+                ONNX_NAMESPACE::Value* output = node->output();
+                mlir::Value value = m_builder.create<mlir::MGB::Elemwise>(
+                        m_builder.getUnknownLoc(), valueToShapedType(m_context, output),
+                        onnxValueToMLIRValue(node->inputs()),
+                        elemwiseMap[ONNX_NAMESPACE::BuiltinSymbol::kAdd]);
+                m_value2value.emplace(output, value);
+            } else if (!strcmp(node->kind().toString(), "Mul")) {
+                ONNX_NAMESPACE::Value* output = node->output();
+                mlir::Value value = m_builder.create<mlir::MGB::Elemwise>(
+                        m_builder.getUnknownLoc(), valueToShapedType(m_context, output),
+                        onnxValueToMLIRValue(node->inputs()),
+                        elemwiseMap[ONNX_NAMESPACE::BuiltinSymbol::kMul]);
+                m_value2value.emplace(output, value);
+            } else if (!strcmp(node->kind().toString(), "Sigmoid")) {
+                ONNX_NAMESPACE::Value* output = node->output();
+                mlir::Value value = m_builder.create<mlir::MGB::Elemwise>(
+                        m_builder.getUnknownLoc(), valueToShapedType(m_context, output),
+                        onnxValueToMLIRValue(node->inputs()),
+                        elemwiseMap[ONNX_NAMESPACE::BuiltinSymbol::kSigmoid]);
+                m_value2value.emplace(output, value);
+            } else if (!strcmp(node->kind().toString(), "Conv")) {
+                ONNX_NAMESPACE::Value* output = node->output();
+                CC_ASSERT(node->hasAttribute(
+                        ONNX_NAMESPACE::BuiltinSymbol::kkernel_shape));
+                auto kernel_shape =
+                        node->is(ONNX_NAMESPACE::BuiltinSymbol::kkernel_shape);
+                CC_ASSERT(
+                        node->hasAttribute(ONNX_NAMESPACE::BuiltinSymbol::kdilations));
+                auto dilations = node->is(ONNX_NAMESPACE::BuiltinSymbol::kdilations);
+                CC_ASSERT(node->hasAttribute(ONNX_NAMESPACE::BuiltinSymbol::kpads));
+                auto pads = node->is(ONNX_NAMESPACE::BuiltinSymbol::kpads);
+                CC_ASSERT(node->hasAttribute(ONNX_NAMESPACE::BuiltinSymbol::kstrides));
+                auto strides = node->is(ONNX_NAMESPACE::BuiltinSymbol::kstrides);
+                CC_ASSERT(node->hasAttribute(ONNX_NAMESPACE::BuiltinSymbol::kgroup));
+                auto group = node->i(ONNX_NAMESPACE::BuiltinSymbol::kgroup);
+                megdnn::param::ConvBias::Sparse sparse =
+                        megdnn::param::ConvBias::Sparse::DENSE;
+                if (group > 1)
+                    sparse = megdnn::param::ConvBias::Sparse::GROUP;
+                megdnn::param::ConvBias::Mode mode =
+                        megdnn::param::ConvBias::Mode::CROSS_CORRELATION;
+
+                mlir::Value value = m_builder.create<mlir::MGB::ConvBias>(
+                        m_builder.getUnknownLoc(), valueToShapedType(m_context, output),
+                        onnxValueToMLIRValue(node->inputs()),
+                        megdnn::param::ConvBias::NonlineMode::IDENTITY, mode, sparse,
+                        megdnn::param::ConvBias::Format::NCHW, pads[0], pads[2],
+                        strides[0], strides[1], dilations[0], dilations[1],
+                        megdnn::param::ConvBias::ComputeMode::DEFAULT);
+                m_value2value.emplace(output, value);
+            } else if (!strcmp(node->kind().toString(), "Clip")) {
+                ONNX_NAMESPACE::Value* output = node->output();
+                std::vector<mlir::Value> min_node_input;
+                min_node_input.push_back(m_value2value.at(node->input(0)));
+                min_node_input.push_back(m_value2value.at(node->input(1)));
+                mlir::Value min_output_value = m_builder.create<mlir::MGB::Elemwise>(
+                        m_builder.getUnknownLoc(), valueToShapedType(m_context, output),
+                        min_node_input, megdnn::param::Elemwise::Mode::MAX);
+                std::vector<mlir::Value> max_node_input;
+                max_node_input.push_back(min_output_value);
+                max_node_input.push_back(m_value2value.at(node->input(2)));
+                mlir::Value max_output_value = m_builder.create<mlir::MGB::Elemwise>(
+                        m_builder.getUnknownLoc(), valueToShapedType(m_context, output),
+                        max_node_input, megdnn::param::Elemwise::Mode::MIN);
+                m_value2value.emplace(output, max_output_value);
+            } else if (!strcmp(node->kind().toString(), "Identity")) {
+                ONNX_NAMESPACE::Value* output = node->output();
+                m_value2value.emplace(output, m_value2value.at(node->input(0)));
+            } else if (!strcmp(node->kind().toString(), "Flatten")) {
+                ONNX_NAMESPACE::Value* output = node->output();
+                mlir::Value value = m_builder.create<mlir::MGB::Reshape>(
+                        m_builder.getUnknownLoc(), valueToShapedType(m_context, output),
+                        onnxValueToMLIRValue(node->inputs()));
+                m_value2value.emplace(output, value);
+            } else if (!strcmp(node->kind().toString(), "Reshape")) {
+                ONNX_NAMESPACE::Value* output = node->output();
+                mlir::Value value = m_builder.create<mlir::MGB::Reshape>(
+                        m_builder.getUnknownLoc(), valueToShapedType(m_context, output),
+                        m_value2value.at(node->input(0)));
+                m_value2value.emplace(output, value);
+            } else if (!strcmp(node->kind().toString(), "GlobalAveragePool")) {
+                ONNX_NAMESPACE::Value* output = node->output();
+                auto sizes = node->input()->sizes();
+                uint32_t window_w = sizes[sizes.size() - 2].dim;
+                uint32_t window_h = sizes[sizes.size() - 1].dim;
+                mlir::Value value = m_builder.create<mlir::MGB::Pooling>(
+                        m_builder.getUnknownLoc(), valueToShapedType(m_context, output),
+                        m_value2value.at(node->input()),
+                        megdnn::param::PoolingV0::Mode::AVERAGE, 0, 0, 1, 1, window_w,
+                        window_h, megdnn::param::Convolution::Format::NCHW);
+                m_value2value.emplace(output, value);
+            } else if (!strcmp(node->kind().toString(), "MaxPool")) {
+                ONNX_NAMESPACE::Value* output = node->output();
+                CC_ASSERT(node->hasAttribute(
+                        ONNX_NAMESPACE::BuiltinSymbol::kkernel_shape));
+                auto kernel_shapes =
+                        node->is(ONNX_NAMESPACE::BuiltinSymbol::kkernel_shape);
+                CC_ASSERT(node->hasAttribute(ONNX_NAMESPACE::BuiltinSymbol::kpads));
+                auto pads = node->is(ONNX_NAMESPACE::BuiltinSymbol::kpads);
+                CC_ASSERT(node->hasAttribute(ONNX_NAMESPACE::BuiltinSymbol::kstrides));
+                auto strides = node->is(ONNX_NAMESPACE::BuiltinSymbol::kstrides);
+                mlir::Value value = m_builder.create<mlir::MGB::Pooling>(
+                        m_builder.getUnknownLoc(), valueToShapedType(m_context, output),
+                        m_value2value.at(node->input()),
+                        megdnn::param::PoolingV0::Mode::MAX, pads[0], pads[1],
+                        strides[0], strides[1], kernel_shapes[0], kernel_shapes[1],
+                        megdnn::param::Convolution::Format::NCHW);
+                m_value2value.emplace(output, value);
+            } else if (!strcmp(node->kind().toString(), "Gemm")) {
+                ONNX_NAMESPACE::Value* output = node->output();
+                bool transA = false;
+                bool transB = false;
+                if (node->hasAttribute(ONNX_NAMESPACE::BuiltinSymbol::ktransA))
+                    transA = true;
+                if (node->hasAttribute(ONNX_NAMESPACE::BuiltinSymbol::ktransB))
+                    transB = true;
+                mlir::Value value = m_builder.create<mlir::MGB::MatrixMul>(
+                        m_builder.getUnknownLoc(), valueToShapedType(m_context, output),
+                        m_value2value.at(node->input(0)),
+                        m_value2value.at(node->input(1)), transA, transB,
+                        megdnn::param::MatrixMulV1::ComputeMode::DEFAULT,
+                        megdnn::param::MatrixMul::Format::DEFAULT);
+                if (node->inputs().size() == 3) {
+                    std::vector<mlir::Value> addValue;
+                    addValue.emplace_back(value);
+                    addValue.emplace_back(m_value2value.at(node->input(2)));
+                    value = m_builder.create<mlir::MGB::Elemwise>(
+                            m_builder.getUnknownLoc(),
+                            valueToShapedType(m_context, output), addValue,
+                            elemwiseMap[ONNX_NAMESPACE::BuiltinSymbol::kAdd]);
+                }
+                m_value2value.emplace(output, value);
+            } else if (!strcmp(node->kind().toString(), "Relu")) {
+                ONNX_NAMESPACE::Value* output = node->output();
+                mlir::Value value = m_builder.create<mlir::MGB::Elemwise>(
+                        m_builder.getUnknownLoc(), valueToShapedType(m_context, output),
+                        m_value2value.at(node->input(0)),
+                        megdnn::param::Elemwise::Mode::RELU);
+                m_value2value.emplace(output, value);
+            } else if (!strcmp(node->kind().toString(), "Concat")) {
+                ONNX_NAMESPACE::Value* output = node->output();
+                CC_ASSERT(node->hasAttribute(ONNX_NAMESPACE::BuiltinSymbol::kaxis));
+                auto axis = node->i(ONNX_NAMESPACE::BuiltinSymbol::kaxis);
+                mlir::Value value = m_builder.create<mlir::MGB::Concat>(
+                        m_builder.getUnknownLoc(), valueToShapedType(m_context, output),
+                        onnxValueToMLIRValue(node->inputs()), axis,
+                        mgb::CompNode::default_cpu());
+                m_value2value.emplace(output, value);
+            } else if (!strcmp(node->kind().toString(), "Slice")) {
+                ONNX_NAMESPACE::Value* output = node->output();
+                auto desc_flag = get_subtensor_desc_and_flag(node, tensor_map);
+                auto desc = std::get<0>(desc_flag);
+                auto flag = std::get<1>(desc_flag);
+                // only give the first input, otherwise this op would be dealt as
+                // dynamic shape during MGBToKernel conversion
+                mlir::Value value = m_builder.create<mlir::MGB::Subtensor>(
+                        m_builder.getUnknownLoc(), valueToShapedType(m_context, output),
+                        m_value2value.at(node->input(0)), desc, flag);
+                m_value2value.emplace(output, value);
+            } else if (!strcmp(node->kind().toString(), "Transpose")) {
+                ONNX_NAMESPACE::Value* output = node->output();
+                CC_ASSERT(node->hasAttribute(ONNX_NAMESPACE::BuiltinSymbol::kperm));
+                auto perm = node->is(ONNX_NAMESPACE::BuiltinSymbol::kperm);
+                std::vector<int32_t> perm_32(perm.size());
+                std::transform(perm.begin(), perm.end(), perm_32.begin(), [](int n) {
+                    return static_cast<int32_t>(n);
+                });
+                mlir::Value value = m_builder.create<mlir::MGB::Dimshuffle>(
+                        m_builder.getUnknownLoc(), valueToShapedType(m_context, output),
+                        m_value2value.at(node->input()), perm_32);
+                m_value2value.emplace(output, value);
+            } else if (!strcmp(node->kind().toString(), "ReduceMean")) {
+                ONNX_NAMESPACE::Value* output = node->output();
+                CC_ASSERT(node->hasAttribute(ONNX_NAMESPACE::BuiltinSymbol::kaxes));
+                auto axes = node->is(ONNX_NAMESPACE::BuiltinSymbol::kaxes);
+                CC_ASSERT(node->hasAttribute(ONNX_NAMESPACE::BuiltinSymbol::kkeepdims));
+                auto keepdims = node->i(ONNX_NAMESPACE::BuiltinSymbol::kkeepdims);
+                ::megdnn::param::Reduce::Mode mode =
+                        ::megdnn::param::Reduce::Mode::MEAN;
+                ::megdnn::param::Reduce::DataType data_type =
+                        ::megdnn::param::Reduce::DataType::DEFAULT;
+
+                // ReduceMean is simulated by Reduce and AxisAddRemove
+                mlir::Value reduce_input = m_value2value.at(node->input());
+                std::vector<ONNX_NAMESPACE::Dimension> previous_sizes =
+                        node->input()->sizes();
+                mlir::Value final_output;
+
+                for (auto it = axes.rbegin(); it != axes.rend(); it++) {
+                    size_t axis = *it;
+                    // form the shape of reduce's output
+                    std::vector<ONNX_NAMESPACE::Dimension> reduce_output_sizes(
+                            previous_sizes.begin(), previous_sizes.begin() + axis);
+                    reduce_output_sizes.emplace_back(ONNX_NAMESPACE::Dimension(1));
+                    reduce_output_sizes.insert(
+                            reduce_output_sizes.end(),
+                            previous_sizes.begin() + axis + 1, previous_sizes.end());
+
+                    ONNX_NAMESPACE::Value* reduce_output_resized =
+                            output->setSizes(reduce_output_sizes);
+                    LOG_DEBUG << "reduce_output_size: " << reduce_output_sizes.size()
+                              << "\n";
+                    mlir::Value reduce_output = m_builder.create<mlir::MGB::Reduce>(
+                            m_builder.getUnknownLoc(),
+                            valueToShapedType(m_context, reduce_output_resized),
+                            reduce_input, mode, axis, data_type);
+                    reduce_input = reduce_output;
+                    final_output = reduce_output;
+                    previous_sizes = reduce_output_sizes;
+
+                    if (keepdims == 0) {
+                        std::vector<ONNX_NAMESPACE::Dimension> reshape_output_sizes(
+                                previous_sizes.begin(), previous_sizes.begin() + axis);
+                        reshape_output_sizes.insert(
+                                reshape_output_sizes.end(),
+                                previous_sizes.begin() + axis + 1,
+                                previous_sizes.end());
+                        ONNX_NAMESPACE::Value* reshape_output_resized =
+                                output->setSizes(reshape_output_sizes);
+                        LOG_DEBUG << "reshape_output_size: "
+                                  << reshape_output_sizes.size() << "\n";
+                        mlir::Value axisremove_output =
+                                m_builder.create<mlir::MGB::Reshape>(
+                                        m_builder.getUnknownLoc(),
+                                        valueToShapedType(
+                                                m_context, reshape_output_resized),
+                                        reduce_output);
+                        reduce_input = axisremove_output;
+                        final_output = axisremove_output;
+                        previous_sizes = reshape_output_sizes;
+                    }
+                }
+                m_value2value.emplace(output, final_output);
+            } else if (!strcmp(node->kind().toString(), "Constant")) {
+                ONNX_NAMESPACE::Value* output = node->output();
+                CC_ASSERT(node->hasAttribute(ONNX_NAMESPACE::BuiltinSymbol::kvalue));
+                ONNX_NAMESPACE::Tensor value_tensor =
+                        node->t(ONNX_NAMESPACE::BuiltinSymbol::kvalue);
+                if (value_tensor.sizes().size() == 0) {
+                    std::vector<ONNX_NAMESPACE::Dimension> constant_sizes;
+                    constant_sizes.emplace_back(ONNX_NAMESPACE::Dimension(1));
+                    output->setSizes(constant_sizes);
+                }
+                auto storage = create_param_storage(value_tensor, output);
+                mlir::Value value = m_builder.create<mlir::MGB::ParamProvider>(
+                        m_builder.getUnknownLoc(), storage);
+                m_value2value.emplace(output, value);
+            } else if (!strcmp(node->kind().toString(), "Undefined")) {
+                continue;
+            } else {
+                CC_ABORT << "unsupported onnx operator type " << node->kind().toString()
+                         << "\n";
+            }
+        }
+
+        std::vector<mlir::Value> results;
+        // deal with outputs
+        for (size_t i = 0; i < g->outputs().size(); ++i) {
+            std::string name = g->outputs()[i]->uniqueName();
+            func.setResultAttr(
+                    i, "mgb.func_result_name", m_builder.getStringAttr(name));
+            results.push_back(m_value2value.at(g->outputs()[i]));
+        }
+        m_builder.create<ReturnOp>(m_builder.getUnknownLoc(), results);
+        m_value2value.clear();
+        return mlir::verify(m_module);
+    }
+
+private:
+    std::tuple<
+            std::vector<std::tuple<int32_t, int32_t, int32_t, int32_t, int32_t>>,
+            std::vector<std::tuple<int32_t, int32_t, int32_t, int32_t, int32_t>>>
+    get_subtensor_desc_and_flag(
+            ONNX_NAMESPACE::Node* node,
+            std::unordered_map<ONNX_NAMESPACE::Value*, ONNX_NAMESPACE::Tensor>&
+                    tensor_map) {
+        ONNX_NAMESPACE::Value* output = node->output();
+        auto starts_tensor = tensor_map.at(node->input(1));
+        size_t nr_elems = 1;
+        for (int size : starts_tensor.sizes()) {
+            nr_elems *= size;
+        }
+        std::vector<int64_t> starts;
+        starts.resize(nr_elems);
+        memcpy(starts.data(), starts_tensor.data<int64_t>(),
+               nr_elems * sizeof(int64_t));
+
+        auto ends_tensor = tensor_map.at(node->input(2));
+        nr_elems = 1;
+        for (int size : ends_tensor.sizes()) {
+            nr_elems *= size;
+        }
+        std::vector<int64_t> ends;
+        ends.resize(nr_elems);
+        memcpy(ends.data(), ends_tensor.data<int64_t>(), nr_elems * sizeof(int64_t));
+        std::vector<int64_t> axes;
+        axes.reserve(output->sizes().size());
+        for (int64_t i = 0; i < output->sizes().size(); i++) {
+            axes.emplace_back(i);
+        }
+        std::vector<int64_t> steps;
+        steps.reserve(starts.size());
+        for (int i = 0; i < starts.size(); i++) {
+            steps.emplace_back(1);
+        }
+        if (node->inputs().size() == 4) {
+            ONNX_NAMESPACE::Tensor axes_tensor = tensor_map.at(node->input(3));
+            nr_elems = 1;
+            for (int size : axes_tensor.sizes()) {
+                nr_elems *= size;
+            }
+            axes.resize(nr_elems);
+            memcpy(axes.data(), axes_tensor.data<int64_t>(),
+                   nr_elems * sizeof(int64_t));
+        }
+        if (node->inputs().size() == 5) {
+            ONNX_NAMESPACE::Tensor steps_tensor = tensor_map.at(node->input(4));
+            nr_elems = 1;
+            for (int size : steps_tensor.sizes()) {
+                nr_elems *= size;
+            }
+            steps.resize(nr_elems);
+            memcpy(steps.data(), steps_tensor.data<int64_t>(),
+                   nr_elems * sizeof(int64_t));
+        }
+        LOG_DEBUG << starts.size() << " " << ends.size() << " " << axes.size() << " "
+                  << steps.size() << "\n";
+
+        CC_ASSERT(starts.size() == ends.size());
+        CC_ASSERT(starts.size() == axes.size());
+        CC_ASSERT(starts.size() == steps.size());
+
+        std::vector<std::tuple<int32_t, int32_t, int32_t, int32_t, int32_t>> desc;
+        std::vector<std::tuple<int32_t, int32_t, int32_t, int32_t, int32_t>> flag;
+
+        for (int i = 0; i < axes.size(); i++) {
+            desc.push_back({axes[i], starts[i], ends[i], steps[i], -1});
+            flag.push_back({0, 0, 0, 0, -1});
+        }
+
+        LOG_DEBUG << "desc size: "
+                  << " " << std::get<0>(desc[0]) << " " << std::get<1>(desc[0]) << " "
+                  << std::get<2>(desc[0]) << " " << std::get<3>(desc[0]) << " "
+                  << std::get<4>(desc[0]) << " "
+                  << "\n";
+
+        LOG_DEBUG << "flag size: " << std::get<0>(flag[0]) << " "
+                  << std::get<1>(flag[0]) << " " << std::get<2>(flag[0]) << " "
+                  << std::get<3>(flag[0]) << " " << std::get<4>(flag[0]) << " "
+                  << "\n";
+
+        return {desc, flag};
+    }
+
+    // get FunctionType from inputs and outputs
+    mlir::FunctionType get_func_type(
+            ONNX_NAMESPACE::ArrayRef<ONNX_NAMESPACE::Value*> inputs,
+            ONNX_NAMESPACE::ArrayRef<ONNX_NAMESPACE::Value*> outputs) {
+        auto arg_types = llvm::to_vector<1>(llvm::map_range(
+                inputs, [this](ONNX_NAMESPACE::Value* value) -> mlir::Type {
+                    return valueToShapedType(m_context, value);
+                }));
+        auto result_types = llvm::to_vector<1>(llvm::map_range(
+                outputs, [this](ONNX_NAMESPACE::Value* value) -> mlir::Type {
+                    return valueToShapedType(m_context, value);
+                }));
+        return mlir::FunctionType::get(m_context, arg_types, result_types);
+    }
+
+    // from onnx Tensor to attr
+    mlir::DenseElementsAttr tensor_to_attr(
+            const ONNX_NAMESPACE::Tensor& tensor, ONNX_NAMESPACE::Value* init_value) {
+#define FOR_EACH_TYPE_CTYPE(DTYPE_ENUM, CTYPE)                                       \
+    if (tensor.elem_type() == DTYPE_ENUM) {                                          \
+        LOG_DEBUG << "type: " << tensor.elem_type() << "\n";                         \
+        size_t nr_elems = 1;                                                         \
+        for (int size : tensor.sizes())                                              \
+            nr_elems *= size;                                                        \
+        std::vector<CTYPE> data(nr_elems);                                           \
+        memcpy(data.data(), tensor.data<CTYPE>(), nr_elems * sizeof(CTYPE));         \
+        return mlir::DenseElementsAttr::get(                                         \
+                valueToShapedType(m_context, init_value), llvm::makeArrayRef(data)); \
+    }
+
+        FOR_EACH_TYPE_CTYPE(
+                ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_FLOAT, float)
+        FOR_EACH_TYPE_CTYPE(
+                ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_INT32, int)
+        FOR_EACH_TYPE_CTYPE(
+                ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_DOUBLE,
+                double)
+        // MegEngine doesn't support int64, convert it to int32
+        if (tensor.elem_type() ==
+            ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_INT64) {
+            LOG_DEBUG << "type: " << tensor.elem_type() << "\n";
+            size_t nr_elems = 1;
+            for (int size : tensor.sizes())
+                nr_elems *= size;
+            std::vector<int64_t> data(nr_elems);
+            memcpy(data.data(), tensor.data<int64_t>(), nr_elems * sizeof(int64_t));
+            std::vector<int32_t> data_32;
+            std::transform(
+                    data.begin(), data.end(), std::back_inserter(data_32),
+                    [](int64_t num) { return static_cast<int32_t>(num); });
+            init_value->setElemType(
+                    ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_INT32);
+            return mlir::DenseElementsAttr::get(
+                    valueToShapedType(m_context, init_value),
+                    llvm::makeArrayRef(data_32));
+        }
+
+        CC_ABORT << "unsupported data type " << tensor.elem_type() << '\n';
+        return {};
+    }
+
+    MGB::ParamStorage& create_param_storage(
+            const ONNX_NAMESPACE::Tensor& initializer,
+            ONNX_NAMESPACE::Value* init_value) {
+        std::string name = init_value->uniqueName();
+        bool equal_flag = true;
+        auto tensor_attr = tensor_to_attr(initializer, init_value);
+        std::string id_str;
+        llvm::raw_string_ostream id_stream(id_str);
+        id_stream << tensor_attr;
+        if (m_param_storage.find(id_str) == m_param_storage.end()) {
+            equal_flag = false;
+        } else {
+            auto param = m_param_storage[id_str];
+            int32_t user_count = param.user_count() + 1;
+            auto user_count_attr = mlir::IntegerAttr::get(
+                    param.user_countAttr().getType(), user_count);
+            auto value = param.value();
+            if (value != tensor_attr)
+                equal_flag = false;
+            else
+                param.user_countAttr(user_count_attr);
+        }
+        // std::cout << equal_flag << std::endl;
+        if (!equal_flag) {
+            OpBuilder::InsertionGuard _(m_builder);
+            m_builder.setInsertionPointToStart(m_module.getBody());
+            auto storage = m_builder.create<MGB::ParamStorage>(
+                    m_builder.getUnknownLoc(), name, tensor_attr,
+                    valueToShapedType(m_context, init_value), 1);
+            storage.setPrivate();
+            m_param_storage.emplace(id_str, storage);
+        }
+        return m_param_storage[id_str];
+    }
+
+    std::vector<mlir::Value> onnxValueToMLIRValue(
+            std::vector<ONNX_NAMESPACE::Value*> values) {
+        std::vector<mlir::Value> inputs;
+        inputs.reserve(values.size());
+        for (auto v : values) {
+            // find value by outputs from previous nodes
+            inputs.push_back(m_value2value.at(v));
+        }
+        return inputs;
+    }
+
+    mlir::ModuleOp m_module;
+    mlir::MLIRContext* m_context;
+    mlir::OpBuilder m_builder;
+    std::unordered_map<ONNX_NAMESPACE::Value*, mlir::Value> m_value2value;
+    std::unordered_map<std::string, MGB::ParamStorage> m_param_storage;
+};
+
+mlir::LogicalResult removeUnusedParam(mlir::ModuleOp module) {
+    mlir::PassManager pm(module->getContext());
+    pm.addNestedPass<FuncOp>(mlir::createCanonicalizerPass());
+    pm.addPass(mlir::createSymbolDCEPass());
+    return pm.run(module);
+}
+
+mlir::LogicalResult import_onnx(mlir::ModuleOp module, std::string model_path) {
+    LOG_DEBUG << "\n\t\t\t Begin Import ONNX \t\t\t\n";
+    LOG_DEBUG << "load model from " << model_path << "\n";
+    ONNXImporter imp(module);
+    auto result = imp.import_onnx(model_path);
+    LOG_DEBUG << "\t\t\t End Import onnx \t\t\t\n\n";
+    if (mlir::failed(result))
+        return result;
+    return removeUnusedParam(module);
+}
+
+}  // namespace ONNX
+}  // namespace mlir
\ No newline at end of file
diff --git a/compiler/tools/CMakeLists.txt b/compiler/tools/CMakeLists.txt
index 58f2ea21..3927e9eb 100644
--- a/compiler/tools/CMakeLists.txt
+++ b/compiler/tools/CMakeLists.txt
@@ -6,3 +6,5 @@ add_subdirectory(tinynn-exporter)
 add_subdirectory(dump-kernel)
 add_subdirectory(megcc-translate)
 add_subdirectory(kernel_exporter)
+add_subdirectory(onnx-importer)
+add_subdirectory(onnx-to-tinynn)
\ No newline at end of file
diff --git a/compiler/tools/onnx-importer/CMakeLists.txt b/compiler/tools/onnx-importer/CMakeLists.txt
new file mode 100644
index 00000000..d10ed3ba
--- /dev/null
+++ b/compiler/tools/onnx-importer/CMakeLists.txt
@@ -0,0 +1,7 @@
+get_property(dialect_libs GLOBAL PROPERTY MLIR_DIALECT_LIBS)
+
+add_llvm_executable(onnx-importer onnx-importer.cpp NO_INSTALL_RPATH)
+llvm_update_compile_flags(onnx-importer)
+target_link_libraries(onnx-importer PRIVATE ${dialect_libs} MLIRONNXImporter Common
+                                            ${ONNX_LIBS} ${PROTOBUF_LIBS})
+mlir_check_all_link_libraries(onnx-importer)
\ No newline at end of file
diff --git a/compiler/tools/onnx-importer/onnx-importer.cpp b/compiler/tools/onnx-importer/onnx-importer.cpp
new file mode 100644
index 00000000..9906301b
--- /dev/null
+++ b/compiler/tools/onnx-importer/onnx-importer.cpp
@@ -0,0 +1,42 @@
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/Support/CommandLine.h"
+
+#include "compiler/Common/Logger.h"
+#include "compiler/Common/Version.h"
+#include "compiler/Target/onnx/import.h"
+
+using namespace llvm;
+
+cl::opt<std::string> InputFile(
+        cl::Positional, cl::Required, cl::desc("<input mgb model>"));
+cl::opt<std::string> OutputFile(
+        cl::Positional, cl::Required, cl::desc("<output mlir file>"));
+cl::opt<bool> Verbose(
+        "verbose", cl::desc("log more detail information when compiler model"));
+
+int main(int argc, char** argv) {
+    cl::AddExtraVersionPrinter(
+            [](raw_ostream& oss) { oss << megcc::getMegccVersionString(); });
+    cl::ParseCommandLineOptions(argc, argv);
+    if (Verbose) {
+        megcc::SetLogLevel(megcc::LogLevel::DEBUG);
+    }
+    mlir::MLIRContext ctx;
+    llvm::SmallVector<llvm::StringRef> names;
+    llvm::SplitString(OutputFile, names, ".");
+    llvm::outs() << "Import onnx model from " << InputFile.getValue() << "\n";
+    mlir::OwningOpRef<mlir::ModuleOp> mod =
+            mlir::ModuleOp::create(mlir::UnknownLoc::get(&ctx));
+    mlir::LogicalResult status =
+            mlir::ONNX::import_onnx(mod.get(), InputFile.getValue());
+    if (mlir::failed(status)) {
+        llvm::errs() << "import onnx model failed\n";
+        return -1;
+    }
+    std::error_code EC;
+    llvm::raw_fd_stream FileStream(OutputFile.getValue(), EC);
+    llvm::outs() << "Export mgb dialect to " << OutputFile.getValue() << "\n";
+    mod->print(FileStream);
+    llvm::outs() << "onnx convert to mgb dialect done.\n";
+    return 0;
+}
\ No newline at end of file
diff --git a/compiler/tools/onnx-to-tinynn/CMakeLists.txt b/compiler/tools/onnx-to-tinynn/CMakeLists.txt
new file mode 100644
index 00000000..ea6ee153
--- /dev/null
+++ b/compiler/tools/onnx-to-tinynn/CMakeLists.txt
@@ -0,0 +1,20 @@
+get_property(dialect_libs GLOBAL PROPERTY MLIR_DIALECT_LIBS)
+
+add_llvm_executable(onnx-to-tinynn onnx-to-tinynn.cpp NO_INSTALL_RPATH)
+llvm_update_compile_flags(onnx-to-tinynn)
+target_link_libraries(
+  onnx-to-tinynn
+  PRIVATE ${dialect_libs}
+          MLIRONNXImporter
+          MLIRTinyNNExporter
+          MLIRMGBToKernel
+          MLIRMGB
+          MLIRMGBTransforms
+          MLIRKernel
+          MLIRKernelTransforms
+          KernelGen
+          Common
+          HakoParse
+          ${ONNX_LIBS}
+          ${PROTOBUF_LIBS})
+mlir_check_all_link_libraries(onnx-to-tinynn)
\ No newline at end of file
diff --git a/compiler/tools/onnx-to-tinynn/onnx-to-tinynn.cpp b/compiler/tools/onnx-to-tinynn/onnx-to-tinynn.cpp
new file mode 100644
index 00000000..17e370aa
--- /dev/null
+++ b/compiler/tools/onnx-to-tinynn/onnx-to-tinynn.cpp
@@ -0,0 +1,430 @@
+/**
+ * \file compiler/tools/tinynn-exporter/tinynn-exporter.cpp
+ *
+ * This file is part of MegCC, a deep learning compiler developed by Megvii.
+ *
+ * \copyright Copyright (c) 2021-2022 Megvii Inc. All rights reserved.
+ */
+
+#include <fstream>
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/FileSystem.h"
+#include "llvm/Support/JSON.h"
+#include "llvm/Support/MemoryBuffer.h"
+#include "mlir/Dialect/Bufferization/Transforms/Passes.h"
+#include "mlir/Dialect/StandardOps/IR/Ops.h"
+#include "mlir/Parser.h"
+#include "mlir/Pass/PassManager.h"
+#include "mlir/Transforms/Passes.h"
+
+#include "compiler/Common/Logger.h"
+#include "compiler/Common/Version.h"
+#include "compiler/Conversion/MGBToKernel/MGBToKernel.h"
+#include "compiler/Dialect/Kernel/IR/KernelDialect.h"
+#include "compiler/Dialect/Kernel/Transforms/Passes.h"
+#include "compiler/Dialect/MGB/IR/MGBDialect.h"
+#include "compiler/Dialect/MGB/Transforms/Passes.h"
+#include "compiler/KernelGen/KernelGen.h"
+#include "compiler/Target/Hako/hako_parse.h"
+#include "compiler/Target/TinyNN/export.h"
+#include "compiler/Target/onnx/import.h"
+using namespace llvm;
+
+cl::opt<std::string> InputFile(
+        cl::Positional, cl::Optional, cl::desc("<input megengine cpp model>"));
+cl::opt<std::string> OutputDir(
+        cl::Positional, cl::Optional,
+        cl::desc("<output dir for tinynn model and generated kernels>"));
+cl::opt<std::string> dumpDir(
+        "dump", cl::Optional,
+        cl::desc("<override output dir in json for tinynn "
+                 "model and generated kernels>"));
+cl::opt<std::string> InputShapes(
+        "input-shapes", cl::Optional, cl::desc("modify input shapes"),
+        cl::value_desc("name0=(xx0,yy0);name1=(xx1,yy1,zz1)"));
+cl::opt<bool> Verbose(
+        "verbose", cl::desc("log more detail information when compiler model"));
+cl::opt<bool> EnableNchw44("enable_nchw44", cl::desc("enable nchw44 trans"));
+cl::opt<bool> EnableNchw44Dot("enable_nchw44_dot", cl::desc("enable nchw44-dot trans"));
+cl::opt<bool> MGBFuseKernel("mgb_fuse_kernel", cl::desc("fuse mgb kernel as possible"));
+cl::opt<bool> SaveModel("save-model", cl::desc("save model to c"));
+cl::opt<bool> Add_nhwc2nchw_to_input(
+        "add_nhwc2nchw_to_input", cl::desc("add nhwc2nchw dimshuffle to input"));
+
+cl::opt<std::string> JsonFile(
+        "json", cl::Optional, cl::desc("config app by json"),
+        cl::value_desc("<path/to/json/file>"));
+
+cl::opt<bool> EnableCompressWeightToFp16(
+        "enable_compress_fp16",
+        cl::desc("enable compress model weight from fp32 to fp16, enable this "
+                 "may effect model precision."));
+
+cl::opt<bool> Decrypt(
+        "decrypt",
+        cl::desc("Only try to convert the input file to the mge format model and save "
+                 "it in the ./decryption/<model_name>.mge"));
+
+extern llvm::cl::opt<megcc::KernelGen::Arch> target_arch;
+struct DumpJson {
+    struct ModelJson {
+        ModelJson() {
+            str_options["model_name"] = "";
+            str_options["model_path"] = "";
+            str_options["input_shape_str"] = "";
+            bool_options["enable_nchw44"] = false;
+            bool_options["enable_nchw44_dot"] = false;
+            bool_options["add_nhwc2nchw_to_input"] = false;
+            bool_options["mgb_fuse_kernel"] = false;
+            bool_options["enable_compress_fp16"] = false;
+        }
+        static ModelJson parse(json::Object& obj) {
+            ModelJson res;
+            for (auto& kv : res.str_options) {
+                auto key = kv.first;
+                auto value = obj.getString(key);
+                CC_ASSERT(value) << "need models/model/" << key << " string value\n";
+                res.str_options[key] = value.getValue().str();
+            }
+            for (auto& kv : res.bool_options) {
+                auto key = kv.first;
+                auto value = obj.getBoolean(key);
+                if (value) {
+                    res.bool_options[key] = value.getValue();
+                }
+            }
+            return res;
+        }
+        std::map<std::string, std::string> str_options;
+        std::map<std::string, bool> bool_options;
+        std::string to_string() const {
+            std::stringstream ss;
+            for (auto& kv : str_options) {
+                ss << kv.first << ": " << kv.second << "\n";
+            }
+            for (auto& kv : bool_options) {
+                ss << kv.first << ": " << kv.second << "\n";
+            }
+            return ss.str();
+        }
+    };
+
+    std::string dump_dir;
+    std::vector<ModelJson> models;
+    std::map<std::string, std::vector<std::string>> cv_impl;
+
+    std::string to_string() const {
+        std::stringstream ss;
+        ss << "dump " << models.size() << " models to dump_dir:" << dump_dir << "\n";
+        for (auto& model : models) {
+            ss << "{\n" << model.to_string() << "}\n";
+        }
+        ss << "cv_impl:\n";
+        ss << cv_impl.size() << "\n";
+        for (auto& kv : cv_impl) {
+            ss << "test..";
+            ss << kv.first << ": [";
+            for (auto& dtype : kv.second) {
+                ss << dtype << ", ";
+            }
+            ss << "]\n ";
+        }
+        return ss.str();
+    }
+
+    static std::shared_ptr<DumpJson> make(std::string path) {
+        auto res = std::make_shared<DumpJson>();
+        auto buffer = MemoryBuffer::getFile(path, true);
+        CC_ASSERT(buffer) << "can not open json file " << path << "\n";
+        auto json_parse = json::parse((*buffer)->getBuffer());
+        CC_ASSERT(json_parse);
+        auto json_obj = json_parse->getAsObject();
+        auto dump_dir = json_obj->getString("dump_dir");
+        CC_ASSERT(dump_dir) << "need dump_dir key\n";
+        res->dump_dir = dump_dir.getValue().str();
+        auto model_list = json_obj->getArray("models");
+        if (model_list) {
+            for (auto& model : *model_list) {
+                auto model_dict = model.getAsObject();
+                CC_ASSERT(model_dict) << "models/model must be dict\n";
+                ModelJson model_json = ModelJson::parse(*model_dict);
+                res->models.push_back(model_json);
+            }
+        }
+        auto cv_obj = json_obj->getObject("cv");
+        if (cv_obj) {
+            for (auto& cv_obj : *cv_obj) {
+                auto cv_name = cv_obj.getFirst().str();
+                auto cv_dtypes = cv_obj.getSecond().getAsArray();
+                std::vector<std::string> dtype_vec;
+                if (cv_dtypes) {
+                    for (auto& dtype : *cv_dtypes) {
+                        dtype_vec.push_back(dtype.getAsString().getValue().str());
+                    }
+                }
+                if (dtype_vec.size() > 0) {
+                    res->cv_impl[cv_name] = dtype_vec;
+                }
+            }
+        }
+        return res;
+    }
+};
+
+class DumpCVHelper {
+public:
+    using Kerns = std::vector<const megcc::KernelGen::KernelFunc*>;
+    using GenKerns = megcc::KernelGen::KernelPack::KernType;
+    struct CVConfig {
+        GenKerns kernel_type;
+        int nr_operands;
+        std::map<std::string, std::string> str_param;
+        std::map<std::string, float> flt_param;
+    };
+
+    DumpCVHelper() {
+        m_name2gen["transpose"] = {GenKerns::CVTransposeKernel, 2};
+        m_name2gen["roicopy"] = {GenKerns::RoiCopyKernel, 2};
+        m_name2gen["rotate"] = {GenKerns::RotateKernel, 2};
+        m_name2gen["resize_linear"] = {
+                GenKerns::ResizeKernel, 2, {{"imode", "LINEAR"}, {"format", "NHWC"}}};
+        m_name2gen["flip"] = {GenKerns::FlipKernel, 2};
+        m_name2gen["warp_affine_replicate_linear"] = {
+                GenKerns::WarpAffineKernel,
+                3,
+                {{"imode", "LINEAR"},
+                 {"format", "NHWC"},
+                 {"border_mode", "REPLICATE"}}};
+        m_name2gen["warp_affine_replicate_linear"].flt_param["border_val"] = 0.f;
+        m_name2gen["warp_affine_constant_linear"] = {
+                GenKerns::WarpAffineKernel,
+                4,
+                {{"imode", "LINEAR"}, {"format", "NHWC"}, {"border_mode", "CONSTANT"}}};
+        m_name2gen["warp_affine_constant_linear"].flt_param["border_val"] = 0.f;
+        m_name2gen["rgb2bgr"] = {GenKerns::CvtColorKernel, 2, {{"mode", "RGB2BGR"}}};
+        m_name2gen["rgb2yuv"] = {GenKerns::CvtColorKernel, 2, {{"mode", "RGB2YUV"}}};
+        m_name2gen["rgb2gray"] = {GenKerns::CvtColorKernel, 2, {{"mode", "RGB2GRAY"}}};
+        m_name2gen["yuv2bgr_nv21"] = {
+                GenKerns::CvtColorKernel, 2, {{"mode", "YUV2BGR_NV21"}}};
+    }
+
+    Kerns get_kerns(const std::string& cv_name, megcc::KernelGen::Arch arch) {
+        CC_ASSERT(m_name2gen.find(cv_name) != m_name2gen.end())
+                << "can not find cv " << cv_name << "\n";
+        auto kernel_type = m_name2gen[cv_name].kernel_type;
+        auto kernels =
+                megcc::KernelGen::KernelPack::GetKernel(kernel_type, target_arch).first;
+        {
+            auto bare_kernels = megcc::KernelGen::KernelPack::GetKernel(
+                                        kernel_type, megcc::KernelGen::Arch::BAREMETAL)
+                                        .first;
+            for (auto x : bare_kernels) {
+                kernels.push_back(x);
+            }
+        }
+        return kernels;
+    }
+
+    CVConfig get_kern_config(const std::string& cv_name) {
+        CC_ASSERT(m_name2gen.find(cv_name) != m_name2gen.end())
+                << "not support cv " << cv_name << "\n";
+        return m_name2gen[cv_name];
+    }
+
+private:
+    std::unordered_map<std::string, CVConfig> m_name2gen;
+};
+
+static inline std::unordered_map<std::string, megcc::CCAttr> get_attr_map(
+        const DumpCVHelper::CVConfig& config, const std::string& dtype) {
+    std::unordered_map<std::string, megcc::CCAttr> attr_map;
+    attr_map["nr_operands"] = megcc::CCAttr(config.nr_operands);
+    for (int i = 0; i < config.nr_operands; ++i) {
+        megcc::CCOperand operand;
+        operand.dtype = dtype;
+        attr_map["operand:" + std::to_string(i)] = megcc::CCAttr(operand);
+    }
+    for (auto& kv : config.str_param) {
+        attr_map[kv.first] = megcc::CCAttr(kv.second);
+    }
+    for (auto& kv : config.flt_param) {
+        attr_map[kv.first] = megcc::CCAttr(kv.second);
+    }
+    return attr_map;
+}
+
+static void export_cv_one_dtype(
+        mlir::KernelExporter& kernel_exporter, std::string& cv_name,
+        std::string& cv_dtype) {
+    static DumpCVHelper dump_cv_helper;
+    auto kernels = dump_cv_helper.get_kerns(cv_name, target_arch);
+    CC_ASSERT(kernels.size() > 0) << "export " << cv_name << "failed";
+    auto attr_map = get_attr_map(dump_cv_helper.get_kern_config(cv_name), cv_dtype);
+    megcc::CodeGenContext ctx(attr_map);
+    std::function<void(std::vector<megcc::KernelGen::KernelObj>&)> reg_dep =
+            [&](std::vector<megcc::KernelGen::KernelObj>& deps) {
+                for (auto& dep_kern : deps) {
+                    kernel_exporter.addInternalKernel(
+                            dep_kern.kernel_symbol, "", dep_kern.kernel_body, "", "");
+                    reg_dep(dep_kern.kernel_dep);
+                }
+            };
+    for (auto kernel : kernels) {
+        if (kernel->IsCVAvailable(&ctx)) {
+            auto kern_sym = kernel->GetCVKernelSymbol(&ctx);
+            auto sig = kernel->GetCVKernelSignature(&ctx);
+            auto body = kernel->GetCVKernelBody(&ctx);
+            auto deps = kernel->GetDependInternalSymbol(&ctx);
+            reg_dep(deps);
+            kernel_exporter.addCVKernel(kern_sym, sig, body);
+            return;
+        }
+    }
+    CC_ASSERT(0) << "no usable kernel for " << cv_name << "\n";
+}
+
+static void export_cv_opr(
+        mlir::KernelExporter& kernel_exporter,
+        const std::map<std::string, std::vector<std::string>>& cv_impl) {
+    for (auto& kv : cv_impl) {
+        auto cv_name = kv.first;
+        auto dtypes = kv.second;
+        for (auto& dtype : dtypes) {
+            export_cv_one_dtype(kernel_exporter, cv_name, dtype);
+        }
+    }
+}
+
+int main(int argc, char** argv) {
+    cl::AddExtraVersionPrinter(
+            [](raw_ostream& oss) { oss << megcc::getMegccVersionString(); });
+    mlir::registerPassManagerCLOptions();
+    cl::ParseCommandLineOptions(argc, argv);
+    if (Verbose) {
+        megcc::SetLogLevel(megcc::LogLevel::DEBUG);
+    }
+
+    std::shared_ptr<DumpJson> dump_info;
+    if (JsonFile.length() > 0) {
+        dump_info = DumpJson::make(JsonFile.getValue());
+        if (dumpDir.length() > 0) {
+            dump_info->dump_dir = dumpDir.getValue();
+        }
+        llvm::outs() << dump_info->to_string();
+    } else {
+        CC_ASSERT(InputFile.length() > 0);
+        if (!Decrypt)
+            CC_ASSERT(OutputDir.length() > 0);
+        dump_info = std::make_shared<DumpJson>();
+        dump_info->dump_dir = OutputDir.getValue();
+        DumpJson::ModelJson model_json;
+        model_json.str_options["model_name"] = "";
+        model_json.str_options["model_path"] = InputFile.getValue();
+        model_json.str_options["input_shape_str"] = InputShapes.getValue();
+        dump_info->models.push_back(model_json);
+    }
+
+    if (Decrypt) {
+        for (auto model : dump_info->models) {
+            std::string model_path = model.str_options.at("model_path");
+            size_t found = model_path.find_last_of('/');
+            std::string file_name =
+                    model_path.substr((found == std::string::npos) ? 0 : found + 1);
+            std::ifstream fin(model_path, std::ios::in | std::ios::binary);
+            std::vector<uint8_t> model_buffer(std::istreambuf_iterator<char>(fin), {});
+            fin.close();
+
+            megcc::DecryptedModel&& res = megcc::parse_model(model_buffer);
+            auto& mdl_model_buffer = res.model;
+            megcc::EncryptionType enc_type = res.enc_type;
+            if (enc_type == megcc::EncryptionType::NONE) {
+                if (JsonFile.length() > 0) {
+                    llvm::outs()
+                            << "Warning: " << file_name << " NO need to decryption.\n";
+                } else {
+                    CC_ASSERT(0) << file_name << " NO need to decryption.\n";
+                }
+            }
+            llvm::sys::fs::create_directories("./decryption", true);
+            std::string out_name = "./decryption/" + file_name + ".mge";
+            std::ofstream fout(out_name, std::ios::out | std::ios::binary);
+            fout.write(
+                    reinterpret_cast<char*>(mdl_model_buffer.data()),
+                    mdl_model_buffer.size());
+            fout.close();
+        }
+        llvm::outs() << "Decrypted model has been saved into ./decrption\n";
+    } else {
+        auto dump_dir = dump_info->dump_dir;
+        mlir::KernelExporter kernel_exporter;
+        for (auto model : dump_info->models) {
+            mlir::MLIRContext ctx;
+            mlir::ONNX::ONNXImporterOptions options;
+            // llvm::outs()
+            // bool model_mgb_fuse_kernel = model.bool_options.at("mgb_fuse_kernel");
+            auto model_name = model.str_options.at("model_name");
+            options.model_path = model.str_options.at("model_path");
+            options.input_shape_str = model.str_options.at("input_shape_str");
+            if (model_name.size() > 0) {
+                options.module_name = model_name;
+            } else {
+                llvm::SmallVector<llvm::StringRef> dir_names;
+                llvm::SplitString(InputFile, dir_names, "/");
+                llvm::SmallVector<llvm::StringRef> names;
+                llvm::SplitString(dir_names[dir_names.size() - 1], names, ".");
+                options.module_name = names[0].str();
+            }
+            auto model_input = model.str_options.at("model_path");
+            llvm::outs() << "Import mgb/mge model from " << model_input << "\n";
+            mlir::OwningOpRef<mlir::ModuleOp> mod =
+                    mlir::ModuleOp::create(mlir::UnknownLoc::get(&ctx));
+            auto status = mlir::ONNX::import_onnx(mod.get(), model_input);
+            if (mlir::failed(status)) {
+                llvm::outs() << "import megengine model failed\n";
+                return -1;
+            }
+            mlir::PassManager pm(&ctx);
+
+            pm.addPass(mlir::createMGBToKernelPass());
+            pm.addNestedPass<mlir::FuncOp>(mlir::createMemoryForwardingPass());
+            pm.addPass(mlir::createKernelMaterializationPass());
+            pm.addNestedPass<mlir::FuncOp>(mlir::createStaticMemoryPlanningPass());
+            // pm.addNestedPass<mlir::FuncOp>(mlir::createKernelFinalCleanPass());
+            //! Now all the memory is allocated in runtime, the Deallocation
+            //! instruction is not used.
+            // pm.addNestedPass<mlir::FuncOp>(mlir::createBufferDeallocationPass());
+            pm.addNestedPass<mlir::FuncOp>(
+                    mlir::bufferization::createFinalizingBufferizePass());
+            llvm::outs() << "Apply createMGBToKernelPass and "
+                            "createKernelMaterializationPass to the dialect.\n";
+            if (failed(pm.run(mod.get()))) {
+                return -1;
+            }
+            llvm::outs() << "Export tinynn model and kernel to dir " << dump_dir
+                         << "\n";
+
+            if (!llvm::sys::fs::exists(dump_dir.c_str())) {
+                llvm::sys::fs::create_directories(dump_dir.c_str());
+            } else {
+                CC_ASSERT(llvm::sys::fs::is_directory(dump_dir.c_str()))
+                "output: "
+                        << dump_dir
+                        << "is existed and not a directory, try remove it manually or "
+                           "choice another one";
+            }
+            mlir::export_tinynn_model(
+                    mod.get(), dump_dir + "/" + options.module_name + ".tiny",
+                    SaveModel, kernel_exporter,
+                    model.bool_options.at("enable_compress_fp16"));
+            llvm::outs() << "onnx model convert to tinynn model " << options.module_name
+                         << " done.\n";
+        }
+        export_cv_opr(kernel_exporter, dump_info->cv_impl);
+        kernel_exporter.write(dump_dir);
+        llvm::outs() << "onnx model convert to tinynn kernel done.\n";
+    }
+    return 0;
+}
+
+// vim: syntax=cpp.doxygen
\ No newline at end of file
diff --git a/third_party/onnx b/third_party/onnx
new file mode 160000
index 00000000..60b0a340
--- /dev/null
+++ b/third_party/onnx
@@ -0,0 +1 @@
+Subproject commit 60b0a34037f2980c519793b1bbc3a6ed79b0dfa3
diff --git a/third_party/protobuf b/third_party/protobuf
new file mode 160000
index 00000000..d76a6300
--- /dev/null
+++ b/third_party/protobuf
@@ -0,0 +1 @@
+Subproject commit d76a6300d29d8d9feeeadddc28925ebd610b1f31

From 08f1655ec4e8817df0c403ef73631c53dfa9d1c4 Mon Sep 17 00:00:00 2001
From: cyLi-Tiger <2017202049@ruc.edu.cn>
Date: Sun, 29 Oct 2023 14:06:41 +0800
Subject: [PATCH 3/3] update ConvKernel isAvailable checking

---
 compiler/lib/KernelGen/Common/ConvKernel.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/compiler/lib/KernelGen/Common/ConvKernel.h b/compiler/lib/KernelGen/Common/ConvKernel.h
index f58c25ac..b5c962eb 100644
--- a/compiler/lib/KernelGen/Common/ConvKernel.h
+++ b/compiler/lib/KernelGen/Common/ConvKernel.h
@@ -14,7 +14,7 @@ class ConvImpl : public KernelFunc {
     static bool is_channel_broadcast_bias(TContext* ctx) {
         if (is_bias(ctx)) {
             CCOperand bias = ctx->getAttrOprand("operand:2");
-            return bias.shape[0] == 1 && bias.shape[2] == 1 && bias.shape[3] == 1;
+            return (bias.shape[0] == 1 && bias.shape[2] == 1 && bias.shape[3] == 1) || bias.shape.size() == 1;
         }
         return false;
     }