diff --git a/nnpackage/schema/circle_schema.fbs b/nnpackage/schema/circle_schema.fbs index 826f0d649dc..bd5aa950c5c 100644 --- a/nnpackage/schema/circle_schema.fbs +++ b/nnpackage/schema/circle_schema.fbs @@ -68,6 +68,11 @@ enum TensorType : byte { UINT32 = 15, UINT16 = 16, INT4 = 17, + // Q4_0, Q4_1, Q8_0, Q8_1 are follow ggml quantization spec (https://github.com/ggerganov/ggml) + Q4_0 = 18, + Q4_1 = 19, + Q8_0 = 20, + Q8_1 = 21, } // Custom quantization parameters for experimenting with new quantization @@ -76,15 +81,9 @@ table CustomQuantization { custom:[ubyte] (force_align: 16); } -// Block quantization: from ggml quantization (https://github.com/ggerganov/ggml) -table CircleBlockQuantization { - name:string; -} - // Represents a specific quantization technique's parameters. union QuantizationDetails { - CustomQuantization, - CircleBlockQuantization + CustomQuantization } // Parameters for converting a quantized tensor back to float. diff --git a/runtime/libs/circle-schema/include/circle_schema_generated.h b/runtime/libs/circle-schema/include/circle_schema_generated.h index ed98085ebaa..bc2ea616e2f 100644 --- a/runtime/libs/circle-schema/include/circle_schema_generated.h +++ b/runtime/libs/circle-schema/include/circle_schema_generated.h @@ -1,3 +1,20 @@ +/* + * Copyright (c) 2019-2024 Samsung Electronics Co., Ltd. All Rights Reserved + * Copyright 2018 The TensorFlow Authors. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +// command: flatc -c --gen-onefile --gen-object-api ../../../../nnpackage/schema/circle_schema.fbs // automatically generated by the FlatBuffers compiler, do not modify #ifndef FLATBUFFERS_GENERATED_CIRCLESCHEMA_CIRCLE_H_ @@ -18,10 +35,6 @@ struct CustomQuantization; struct CustomQuantizationBuilder; struct CustomQuantizationT; -struct CircleBlockQuantization; -struct CircleBlockQuantizationBuilder; -struct CircleBlockQuantizationT; - struct QuantizationParameters; struct QuantizationParametersBuilder; struct QuantizationParametersT; @@ -707,33 +720,38 @@ enum TensorType : int8_t TensorType_UINT32 = 15, TensorType_UINT16 = 16, TensorType_INT4 = 17, + TensorType_Q4_0 = 18, + TensorType_Q4_1 = 19, + TensorType_Q8_0 = 20, + TensorType_Q8_1 = 21, TensorType_MIN = TensorType_UINT4, - TensorType_MAX = TensorType_INT4 + TensorType_MAX = TensorType_Q8_1 }; -inline const TensorType (&EnumValuesTensorType())[19] +inline const TensorType (&EnumValuesTensorType())[23] { static const TensorType values[] = { TensorType_UINT4, TensorType_FLOAT32, TensorType_FLOAT16, TensorType_INT32, TensorType_UINT8, TensorType_INT64, TensorType_STRING, TensorType_BOOL, TensorType_INT16, TensorType_COMPLEX64, TensorType_INT8, TensorType_FLOAT64, TensorType_COMPLEX128, TensorType_UINT64, TensorType_RESOURCE, TensorType_VARIANT, - TensorType_UINT32, TensorType_UINT16, TensorType_INT4}; + TensorType_UINT32, TensorType_UINT16, TensorType_INT4, TensorType_Q4_0, + TensorType_Q4_1, TensorType_Q8_0, TensorType_Q8_1}; return values; } inline const char *const *EnumNamesTensorType() { - static const char *const names[20] = {"UINT4", "FLOAT32", "FLOAT16", "INT32", "UINT8", - "INT64", "STRING", "BOOL", "INT16", "COMPLEX64", - "INT8", "FLOAT64", "COMPLEX128", "UINT64", "RESOURCE", - "VARIANT", "UINT32", "UINT16", "INT4", nullptr}; + static const char *const names[24] = { + "UINT4", "FLOAT32", "FLOAT16", "INT32", "UINT8", "INT64", "STRING", "BOOL", + "INT16", "COMPLEX64", "INT8", "FLOAT64", "COMPLEX128", "UINT64", "RESOURCE", "VARIANT", + "UINT32", "UINT16", "INT4", "Q4_0", "Q4_1", "Q8_0", "Q8_1", nullptr}; return names; } inline const char *EnumNameTensorType(TensorType e) { - if (::flatbuffers::IsOutRange(e, TensorType_UINT4, TensorType_INT4)) + if (::flatbuffers::IsOutRange(e, TensorType_UINT4, TensorType_Q8_1)) return ""; const size_t index = static_cast(e) - static_cast(TensorType_UINT4); return EnumNamesTensorType()[index]; @@ -743,30 +761,27 @@ enum QuantizationDetails : uint8_t { QuantizationDetails_NONE = 0, QuantizationDetails_CustomQuantization = 1, - QuantizationDetails_CircleBlockQuantization = 2, QuantizationDetails_MIN = QuantizationDetails_NONE, - QuantizationDetails_MAX = QuantizationDetails_CircleBlockQuantization + QuantizationDetails_MAX = QuantizationDetails_CustomQuantization }; -inline const QuantizationDetails (&EnumValuesQuantizationDetails())[3] +inline const QuantizationDetails (&EnumValuesQuantizationDetails())[2] { static const QuantizationDetails values[] = {QuantizationDetails_NONE, - QuantizationDetails_CustomQuantization, - QuantizationDetails_CircleBlockQuantization}; + QuantizationDetails_CustomQuantization}; return values; } inline const char *const *EnumNamesQuantizationDetails() { - static const char *const names[4] = {"NONE", "CustomQuantization", "CircleBlockQuantization", - nullptr}; + static const char *const names[3] = {"NONE", "CustomQuantization", nullptr}; return names; } inline const char *EnumNameQuantizationDetails(QuantizationDetails e) { if (::flatbuffers::IsOutRange(e, QuantizationDetails_NONE, - QuantizationDetails_CircleBlockQuantization)) + QuantizationDetails_CustomQuantization)) return ""; const size_t index = static_cast(e); return EnumNamesQuantizationDetails()[index]; @@ -782,11 +797,6 @@ template <> struct QuantizationDetailsTraits static const QuantizationDetails enum_value = QuantizationDetails_CustomQuantization; }; -template <> struct QuantizationDetailsTraits -{ - static const QuantizationDetails enum_value = QuantizationDetails_CircleBlockQuantization; -}; - template struct QuantizationDetailsUnionTraits { static const QuantizationDetails enum_value = QuantizationDetails_NONE; @@ -797,11 +807,6 @@ template <> struct QuantizationDetailsUnionTraits static const QuantizationDetails enum_value = QuantizationDetails_CustomQuantization; }; -template <> struct QuantizationDetailsUnionTraits -{ - static const QuantizationDetails enum_value = QuantizationDetails_CircleBlockQuantization; -}; - struct QuantizationDetailsUnion { QuantizationDetails type; @@ -862,18 +867,6 @@ struct QuantizationDetailsUnion ? reinterpret_cast(value) : nullptr; } - circle::CircleBlockQuantizationT *AsCircleBlockQuantization() - { - return type == QuantizationDetails_CircleBlockQuantization - ? reinterpret_cast(value) - : nullptr; - } - const circle::CircleBlockQuantizationT *AsCircleBlockQuantization() const - { - return type == QuantizationDetails_CircleBlockQuantization - ? reinterpret_cast(value) - : nullptr; - } }; bool VerifyQuantizationDetails(::flatbuffers::Verifier &verifier, const void *obj, @@ -6252,81 +6245,6 @@ ::flatbuffers::Offset CreateCustomQuantization(::flatbuffers::FlatBufferBuilder &_fbb, const CustomQuantizationT *_o, const ::flatbuffers::rehasher_function_t *_rehasher = nullptr); -struct CircleBlockQuantizationT : public ::flatbuffers::NativeTable -{ - typedef CircleBlockQuantization TableType; - std::string name{}; -}; - -struct CircleBlockQuantization FLATBUFFERS_FINAL_CLASS : private ::flatbuffers::Table -{ - typedef CircleBlockQuantizationT NativeTableType; - typedef CircleBlockQuantizationBuilder Builder; - enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE - { - VT_NAME = 4 - }; - const ::flatbuffers::String *name() const - { - return GetPointer(VT_NAME); - } - bool Verify(::flatbuffers::Verifier &verifier) const - { - return VerifyTableStart(verifier) && VerifyOffset(verifier, VT_NAME) && - verifier.VerifyString(name()) && verifier.EndTable(); - } - CircleBlockQuantizationT * - UnPack(const ::flatbuffers::resolver_function_t *_resolver = nullptr) const; - void UnPackTo(CircleBlockQuantizationT *_o, - const ::flatbuffers::resolver_function_t *_resolver = nullptr) const; - static ::flatbuffers::Offset - Pack(::flatbuffers::FlatBufferBuilder &_fbb, const CircleBlockQuantizationT *_o, - const ::flatbuffers::rehasher_function_t *_rehasher = nullptr); -}; - -struct CircleBlockQuantizationBuilder -{ - typedef CircleBlockQuantization Table; - ::flatbuffers::FlatBufferBuilder &fbb_; - ::flatbuffers::uoffset_t start_; - void add_name(::flatbuffers::Offset<::flatbuffers::String> name) - { - fbb_.AddOffset(CircleBlockQuantization::VT_NAME, name); - } - explicit CircleBlockQuantizationBuilder(::flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb) - { - start_ = fbb_.StartTable(); - } - ::flatbuffers::Offset Finish() - { - const auto end = fbb_.EndTable(start_); - auto o = ::flatbuffers::Offset(end); - return o; - } -}; - -inline ::flatbuffers::Offset -CreateCircleBlockQuantization(::flatbuffers::FlatBufferBuilder &_fbb, - ::flatbuffers::Offset<::flatbuffers::String> name = 0) -{ - CircleBlockQuantizationBuilder builder_(_fbb); - builder_.add_name(name); - return builder_.Finish(); -} - -inline ::flatbuffers::Offset -CreateCircleBlockQuantizationDirect(::flatbuffers::FlatBufferBuilder &_fbb, - const char *name = nullptr) -{ - auto name__ = name ? _fbb.CreateString(name) : 0; - return circle::CreateCircleBlockQuantization(_fbb, name__); -} - -::flatbuffers::Offset -CreateCircleBlockQuantization(::flatbuffers::FlatBufferBuilder &_fbb, - const CircleBlockQuantizationT *_o, - const ::flatbuffers::rehasher_function_t *_rehasher = nullptr); - struct QuantizationParametersT : public ::flatbuffers::NativeTable { typedef QuantizationParameters TableType; @@ -6380,12 +6298,6 @@ struct QuantizationParameters FLATBUFFERS_FINAL_CLASS : private ::flatbuffers::T ? static_cast(details()) : nullptr; } - const circle::CircleBlockQuantization *details_as_CircleBlockQuantization() const - { - return details_type() == circle::QuantizationDetails_CircleBlockQuantization - ? static_cast(details()) - : nullptr; - } int32_t quantized_dimension() const { return GetField(VT_QUANTIZED_DIMENSION, 0); } bool Verify(::flatbuffers::Verifier &verifier) const { @@ -6415,13 +6327,6 @@ QuantizationParameters::details_as() const return details_as_CustomQuantization(); } -template <> -inline const circle::CircleBlockQuantization * -QuantizationParameters::details_as() const -{ - return details_as_CircleBlockQuantization(); -} - struct QuantizationParametersBuilder { typedef QuantizationParameters Table; @@ -20834,53 +20739,6 @@ CreateCustomQuantization(::flatbuffers::FlatBufferBuilder &_fbb, const CustomQua return circle::CreateCustomQuantization(_fbb, _custom); } -inline CircleBlockQuantizationT * -CircleBlockQuantization::UnPack(const ::flatbuffers::resolver_function_t *_resolver) const -{ - auto _o = std::unique_ptr(new CircleBlockQuantizationT()); - UnPackTo(_o.get(), _resolver); - return _o.release(); -} - -inline void -CircleBlockQuantization::UnPackTo(CircleBlockQuantizationT *_o, - const ::flatbuffers::resolver_function_t *_resolver) const -{ - (void)_o; - (void)_resolver; - { - auto _e = name(); - if (_e) - _o->name = _e->str(); - } -} - -inline ::flatbuffers::Offset -CircleBlockQuantization::Pack(::flatbuffers::FlatBufferBuilder &_fbb, - const CircleBlockQuantizationT *_o, - const ::flatbuffers::rehasher_function_t *_rehasher) -{ - return CreateCircleBlockQuantization(_fbb, _o, _rehasher); -} - -inline ::flatbuffers::Offset -CreateCircleBlockQuantization(::flatbuffers::FlatBufferBuilder &_fbb, - const CircleBlockQuantizationT *_o, - const ::flatbuffers::rehasher_function_t *_rehasher) -{ - (void)_rehasher; - (void)_o; - struct _VectorArgs - { - ::flatbuffers::FlatBufferBuilder *__fbb; - const CircleBlockQuantizationT *__o; - const ::flatbuffers::rehasher_function_t *__rehasher; - } _va = {&_fbb, _o, _rehasher}; - (void)_va; - auto _name = _o->name.empty() ? 0 : _fbb.CreateString(_o->name); - return circle::CreateCircleBlockQuantization(_fbb, _name); -} - inline QuantizationParametersT * QuantizationParameters::UnPack(const ::flatbuffers::resolver_function_t *_resolver) const { @@ -30040,11 +29898,6 @@ inline bool VerifyQuantizationDetails(::flatbuffers::Verifier &verifier, const v auto ptr = reinterpret_cast(obj); return verifier.VerifyTable(ptr); } - case QuantizationDetails_CircleBlockQuantization: - { - auto ptr = reinterpret_cast(obj); - return verifier.VerifyTable(ptr); - } default: return true; } @@ -30081,11 +29934,6 @@ inline void *QuantizationDetailsUnion::UnPack(const void *obj, QuantizationDetai auto ptr = reinterpret_cast(obj); return ptr->UnPack(resolver); } - case QuantizationDetails_CircleBlockQuantization: - { - auto ptr = reinterpret_cast(obj); - return ptr->UnPack(resolver); - } default: return nullptr; } @@ -30103,11 +29951,6 @@ QuantizationDetailsUnion::Pack(::flatbuffers::FlatBufferBuilder &_fbb, auto ptr = reinterpret_cast(value); return CreateCustomQuantization(_fbb, ptr, _rehasher).Union(); } - case QuantizationDetails_CircleBlockQuantization: - { - auto ptr = reinterpret_cast(value); - return CreateCircleBlockQuantization(_fbb, ptr, _rehasher).Union(); - } default: return 0; } @@ -30124,12 +29967,6 @@ inline QuantizationDetailsUnion::QuantizationDetailsUnion(const QuantizationDeta new circle::CustomQuantizationT(*reinterpret_cast(u.value)); break; } - case QuantizationDetails_CircleBlockQuantization: - { - value = new circle::CircleBlockQuantizationT( - *reinterpret_cast(u.value)); - break; - } default: break; } @@ -30145,12 +29982,6 @@ inline void QuantizationDetailsUnion::Reset() delete ptr; break; } - case QuantizationDetails_CircleBlockQuantization: - { - auto ptr = reinterpret_cast(value); - delete ptr; - break; - } default: break; } diff --git a/runtime/onert/core/include/ir/DataType.h b/runtime/onert/core/include/ir/DataType.h index 774622bfc4f..86b8d2a60a1 100644 --- a/runtime/onert/core/include/ir/DataType.h +++ b/runtime/onert/core/include/ir/DataType.h @@ -39,8 +39,8 @@ enum class DataType QUANT_INT16_ASYMM = 10, QUANT_INT8_SYMM_PER_CHANNEL = 11, QUANT_INT16_SYMM = 12, - QUANT_UINT4_SYMM_PER_BLOCK = 13, // Symmetric by substracting 8 always - QUANT_INT8_SYMM_PER_BLOCK = 14 + QUANT_GGML_Q4_0 = 13, + QUANT_GGML_Q8_0 = 14 }; size_t sizeOfDataType(DataType data_type); diff --git a/runtime/onert/core/src/ir/DataType.cc b/runtime/onert/core/src/ir/DataType.cc index 657bac2ebeb..8f9ed3ea29e 100644 --- a/runtime/onert/core/src/ir/DataType.cc +++ b/runtime/onert/core/src/ir/DataType.cc @@ -53,8 +53,8 @@ size_t sizeOfDataType(DataType data_type) case DataType::QUANT_INT16_SYMM: return sizeof(int16_t); // Chunk type size is not supported - // case DataType::QUANT_UINT4_SYMM_PER_BLOCK: - // case DataType::QUANT_INT8_SYMM_PER_BLOCK + // case DataType::QUANT_GGML_Q4_0: + // case DataType::QUANT_GGML_Q8_0 default: throw std::runtime_error{"Unsupported type size"}; } diff --git a/runtime/onert/core/src/ir/OperandInfo.cc b/runtime/onert/core/src/ir/OperandInfo.cc index 54f6f3a2b78..71f2091b52e 100644 --- a/runtime/onert/core/src/ir/OperandInfo.cc +++ b/runtime/onert/core/src/ir/OperandInfo.cc @@ -32,14 +32,13 @@ size_t OperandInfo::total_size() const } catch (const std::runtime_error &e) { - if (data_type != DataType::QUANT_UINT4_SYMM_PER_BLOCK && - data_type != DataType::QUANT_INT8_SYMM_PER_BLOCK) + if (data_type != DataType::QUANT_GGML_Q4_0 && data_type != DataType::QUANT_GGML_Q8_0) throw e; // Assume last dim is multiple of chunk size (32) assert(_shape.dim(_shape.rank() - 1) % 32 == 0); const auto num_chunks = _shape.num_elements() / 32; - const auto chunk_size = data_type == DataType::QUANT_UINT4_SYMM_PER_BLOCK + const auto chunk_size = data_type == DataType::QUANT_GGML_Q4_0 ? (sizeof(uint8_t) * 32 / 2 + sizeof(uint16_t)) : (sizeof(uint8_t) * 32 + sizeof(uint16_t)); return num_chunks * chunk_size; diff --git a/runtime/onert/core/src/loader/CircleLoader.cc b/runtime/onert/core/src/loader/CircleLoader.cc index 0bcb418058a..20af4148f64 100644 --- a/runtime/onert/core/src/loader/CircleLoader.cc +++ b/runtime/onert/core/src/loader/CircleLoader.cc @@ -91,14 +91,10 @@ class CircleLoader final : public loader::BaseLoader ir::DataType getTensorDataType(const Tensor *tensor) override { auto type = tensor->type(); - if (type == TensorType::TensorType_UINT4 && tensor->quantization() && - tensor->quantization()->details_type() == - circle::QuantizationDetails::QuantizationDetails_CircleBlockQuantization) - return ir::DataType::QUANT_UINT4_SYMM_PER_BLOCK; - if (type == TensorType::TensorType_INT8 && tensor->quantization() && - tensor->quantization()->details_type() == - circle::QuantizationDetails::QuantizationDetails_CircleBlockQuantization) - return ir::DataType::QUANT_INT8_SYMM_PER_BLOCK; + if (type == TensorType::TensorType_Q4_0) + return ir::DataType::QUANT_GGML_Q4_0; + if (type == TensorType::TensorType_Q8_0) + return ir::DataType::QUANT_GGML_Q8_0; return tensorTypeToDataType(type); }