diff --git a/nnpackage/schema/circle_schema.fbs b/nnpackage/schema/circle_schema.fbs index 460fa43ee11..255fd94d4a8 100644 --- a/nnpackage/schema/circle_schema.fbs +++ b/nnpackage/schema/circle_schema.fbs @@ -45,8 +45,8 @@ file_extension "circle"; // end to ensure backwards compatibility. // The type of data stored in a tensor. +// Q4_0, Q4_1, Q8_0, Q8_1 are follow ggml quantization spec (https://github.com/ggerganov/ggml) enum TensorType : byte { - UINT4 = -1, FLOAT32 = 0, FLOAT16 = 1, INT32 = 2, @@ -68,6 +68,12 @@ enum TensorType : byte { UINT32 = 15, UINT16 = 16, INT4 = 17, + // Belows are using negative value to represent not existing TensorType on TensorFlow Lite schema + UINT4 = -1, + Q4_0 = -2, + Q4_1 = -3, + Q8_0 = -4, + Q8_1 = -5, } // Custom quantization parameters for experimenting with new quantization @@ -78,7 +84,7 @@ table CustomQuantization { // Represents a specific quantization technique's parameters. union QuantizationDetails { - CustomQuantization, + CustomQuantization } // Parameters for converting a quantized tensor back to float. diff --git a/runtime/libs/circle-schema/include/circle_schema_generated.h b/runtime/libs/circle-schema/include/circle_schema_generated.h index 3da596b1ebc..7a563042b81 100644 --- a/runtime/libs/circle-schema/include/circle_schema_generated.h +++ b/runtime/libs/circle-schema/include/circle_schema_generated.h @@ -701,6 +701,10 @@ struct ModelT; enum TensorType : int8_t { + TensorType_Q8_1 = -5, + TensorType_Q8_0 = -4, + TensorType_Q4_1 = -3, + TensorType_Q4_0 = -2, TensorType_UINT4 = -1, TensorType_FLOAT32 = 0, TensorType_FLOAT16 = 1, @@ -720,13 +724,14 @@ enum TensorType : int8_t TensorType_UINT32 = 15, TensorType_UINT16 = 16, TensorType_INT4 = 17, - TensorType_MIN = TensorType_UINT4, + TensorType_MIN = TensorType_Q8_1, TensorType_MAX = TensorType_INT4 }; -inline const TensorType (&EnumValuesTensorType())[19] +inline const TensorType (&EnumValuesTensorType())[23] { static const TensorType values[] = { + TensorType_Q8_1, TensorType_Q8_0, TensorType_Q4_1, TensorType_Q4_0, TensorType_UINT4, TensorType_FLOAT32, TensorType_FLOAT16, TensorType_INT32, TensorType_UINT8, TensorType_INT64, TensorType_STRING, TensorType_BOOL, TensorType_INT16, TensorType_COMPLEX64, TensorType_INT8, TensorType_FLOAT64, @@ -737,18 +742,18 @@ inline const TensorType (&EnumValuesTensorType())[19] inline const char *const *EnumNamesTensorType() { - static const char *const names[20] = {"UINT4", "FLOAT32", "FLOAT16", "INT32", "UINT8", - "INT64", "STRING", "BOOL", "INT16", "COMPLEX64", - "INT8", "FLOAT64", "COMPLEX128", "UINT64", "RESOURCE", - "VARIANT", "UINT32", "UINT16", "INT4", nullptr}; + static const char *const names[24] = { + "Q8_1", "Q8_0", "Q4_1", "Q4_0", "UINT4", "FLOAT32", "FLOAT16", "INT32", + "UINT8", "INT64", "STRING", "BOOL", "INT16", "COMPLEX64", "INT8", "FLOAT64", + "COMPLEX128", "UINT64", "RESOURCE", "VARIANT", "UINT32", "UINT16", "INT4", nullptr}; return names; } inline const char *EnumNameTensorType(TensorType e) { - if (::flatbuffers::IsOutRange(e, TensorType_UINT4, TensorType_INT4)) + if (::flatbuffers::IsOutRange(e, TensorType_Q8_1, TensorType_INT4)) return ""; - const size_t index = static_cast(e) - static_cast(TensorType_UINT4); + const size_t index = static_cast(e) - static_cast(TensorType_Q8_1); return EnumNamesTensorType()[index]; } diff --git a/runtime/onert/core/include/ir/DataType.h b/runtime/onert/core/include/ir/DataType.h index 0ec0e07119e..86b8d2a60a1 100644 --- a/runtime/onert/core/include/ir/DataType.h +++ b/runtime/onert/core/include/ir/DataType.h @@ -39,6 +39,8 @@ enum class DataType QUANT_INT16_ASYMM = 10, QUANT_INT8_SYMM_PER_CHANNEL = 11, QUANT_INT16_SYMM = 12, + QUANT_GGML_Q4_0 = 13, + QUANT_GGML_Q8_0 = 14 }; size_t sizeOfDataType(DataType data_type); diff --git a/runtime/onert/core/include/ir/OperandInfo.h b/runtime/onert/core/include/ir/OperandInfo.h index 2957be23e99..f6739033e24 100644 --- a/runtime/onert/core/include/ir/OperandInfo.h +++ b/runtime/onert/core/include/ir/OperandInfo.h @@ -120,7 +120,7 @@ class OperandInfo * @brief Return size of tensor (bytes) * @return Tensor size */ - size_t total_size() const { return _shape.num_elements() * sizeOfDataType(_typeInfo.type()); } + size_t total_size() const; MemAllocType memAllocType() const { return _alloc_type; } void setAsConstant() { _const = true; } diff --git a/runtime/onert/core/src/ir/DataType.cc b/runtime/onert/core/src/ir/DataType.cc index 07670c72081..8f9ed3ea29e 100644 --- a/runtime/onert/core/src/ir/DataType.cc +++ b/runtime/onert/core/src/ir/DataType.cc @@ -52,6 +52,9 @@ size_t sizeOfDataType(DataType data_type) return sizeof(int16_t); case DataType::QUANT_INT16_SYMM: return sizeof(int16_t); + // Chunk type size is not supported + // case DataType::QUANT_GGML_Q4_0: + // case DataType::QUANT_GGML_Q8_0 default: throw std::runtime_error{"Unsupported type size"}; } diff --git a/runtime/onert/core/src/ir/OperandInfo.cc b/runtime/onert/core/src/ir/OperandInfo.cc new file mode 100644 index 00000000000..71f2091b52e --- /dev/null +++ b/runtime/onert/core/src/ir/OperandInfo.cc @@ -0,0 +1,49 @@ +/* + * Copyright (c) 2024 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "ir/OperandInfo.h" + +#include + +namespace onert +{ +namespace ir +{ + +size_t OperandInfo::total_size() const +{ + const auto data_type = _typeInfo.type(); + try + { + return _shape.num_elements() * sizeOfDataType(data_type); + } + catch (const std::runtime_error &e) + { + if (data_type != DataType::QUANT_GGML_Q4_0 && data_type != DataType::QUANT_GGML_Q8_0) + throw e; + + // Assume last dim is multiple of chunk size (32) + assert(_shape.dim(_shape.rank() - 1) % 32 == 0); + const auto num_chunks = _shape.num_elements() / 32; + const auto chunk_size = data_type == DataType::QUANT_GGML_Q4_0 + ? (sizeof(uint8_t) * 32 / 2 + sizeof(uint16_t)) + : (sizeof(uint8_t) * 32 + sizeof(uint16_t)); + return num_chunks * chunk_size; + } +} + +} // namespace ir +} // namespace onert diff --git a/runtime/onert/core/src/loader/BaseLoader.h b/runtime/onert/core/src/loader/BaseLoader.h index c3a50b0d8c9..e66f90ef4fc 100644 --- a/runtime/onert/core/src/loader/BaseLoader.h +++ b/runtime/onert/core/src/loader/BaseLoader.h @@ -94,6 +94,7 @@ template class BaseLoader // Helper functions ir::Activation convertActivation(ActivationFunctionType type); + virtual ir::DataType getTensorDataType(const Tensor *tensor); ir::DataType tensorTypeToDataType(TensorType type); ir::OperandIndex tensorIdxToOperandIdx(int32_t tensorIdx); flexbuffers::Map getCustomOpAttrMap(const Operator *op); @@ -295,6 +296,12 @@ BaseLoader::BaseLoader::convertActivation(const ActivationFunction } } +template +ir::DataType BaseLoader::BaseLoader::getTensorDataType(const Tensor *tensor) +{ + return tensorTypeToDataType(tensor->type()); +} + template ir::DataType BaseLoader::BaseLoader::tensorTypeToDataType(const TensorType type) { @@ -381,7 +388,7 @@ ir::OperandIndex BaseLoader::loadOperand(const Tensor *tensor, ir: // be used. // TypeInfo - ir::TypeInfo type_info(tensorTypeToDataType(tensor->type())); + ir::TypeInfo type_info(getTensorDataType(tensor)); loadQuantization(tensor, type_info); loadSparsity(tensor, type_info); diff --git a/runtime/onert/core/src/loader/CircleLoader.cc b/runtime/onert/core/src/loader/CircleLoader.cc index 1d502308bea..20af4148f64 100644 --- a/runtime/onert/core/src/loader/CircleLoader.cc +++ b/runtime/onert/core/src/loader/CircleLoader.cc @@ -87,6 +87,17 @@ class CircleLoader final : public loader::BaseLoader } } +protected: + ir::DataType getTensorDataType(const Tensor *tensor) override + { + auto type = tensor->type(); + if (type == TensorType::TensorType_Q4_0) + return ir::DataType::QUANT_GGML_Q4_0; + if (type == TensorType::TensorType_Q8_0) + return ir::DataType::QUANT_GGML_Q8_0; + return tensorTypeToDataType(type); + } + private: std::unique_ptr loadSubgraph(const circle::SubGraph *circle_subg) override {