Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Draft: [onert] block quantization #13693

Closed
wants to merge 8 commits into from
Closed
Show file tree
Hide file tree
Changes from 7 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions nnpackage/schema/circle_schema.fbs
Original file line number Diff line number Diff line change
Expand Up @@ -76,9 +76,15 @@ table CustomQuantization {
custom:[ubyte] (force_align: 16);
}

// Block quantization: from ggml quantization (https://github.com/ggerganov/ggml)
table CircleBlockQuantization {
name:string;
}

// Represents a specific quantization technique's parameters.
union QuantizationDetails {
CustomQuantization,
CircleBlockQuantization
Copy link
Contributor

@glistening glistening Aug 27, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is the prefix Circle necessary to avoid name conflict from flatbuffers generated files? I guess GGMLBlockQuantization may be better as @jinevening suggested offline. It makes it clear what CircleBlockQuantization means.

}

// Parameters for converting a quantized tensor back to float.
Expand Down
218 changes: 196 additions & 22 deletions runtime/libs/circle-schema/include/circle_schema_generated.h
Original file line number Diff line number Diff line change
@@ -1,20 +1,3 @@
/*
* Copyright (c) 2019-2024 Samsung Electronics Co., Ltd. All Rights Reserved
* Copyright 2018 The TensorFlow Authors. All Rights Reserved
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
// command: flatc -c --gen-onefile --gen-object-api ../../../../nnpackage/schema/circle_schema.fbs
// automatically generated by the FlatBuffers compiler, do not modify

#ifndef FLATBUFFERS_GENERATED_CIRCLESCHEMA_CIRCLE_H_
Expand All @@ -35,6 +18,10 @@ struct CustomQuantization;
struct CustomQuantizationBuilder;
struct CustomQuantizationT;

struct CircleBlockQuantization;
struct CircleBlockQuantizationBuilder;
struct CircleBlockQuantizationT;

struct QuantizationParameters;
struct QuantizationParametersBuilder;
struct QuantizationParametersT;
Expand Down Expand Up @@ -756,27 +743,30 @@ enum QuantizationDetails : uint8_t
{
QuantizationDetails_NONE = 0,
QuantizationDetails_CustomQuantization = 1,
QuantizationDetails_CircleBlockQuantization = 2,
QuantizationDetails_MIN = QuantizationDetails_NONE,
QuantizationDetails_MAX = QuantizationDetails_CustomQuantization
QuantizationDetails_MAX = QuantizationDetails_CircleBlockQuantization
};

inline const QuantizationDetails (&EnumValuesQuantizationDetails())[2]
inline const QuantizationDetails (&EnumValuesQuantizationDetails())[3]
{
static const QuantizationDetails values[] = {QuantizationDetails_NONE,
QuantizationDetails_CustomQuantization};
QuantizationDetails_CustomQuantization,
QuantizationDetails_CircleBlockQuantization};
return values;
}

inline const char *const *EnumNamesQuantizationDetails()
{
static const char *const names[3] = {"NONE", "CustomQuantization", nullptr};
static const char *const names[4] = {"NONE", "CustomQuantization", "CircleBlockQuantization",
nullptr};
return names;
}

inline const char *EnumNameQuantizationDetails(QuantizationDetails e)
{
if (::flatbuffers::IsOutRange(e, QuantizationDetails_NONE,
QuantizationDetails_CustomQuantization))
QuantizationDetails_CircleBlockQuantization))
return "";
const size_t index = static_cast<size_t>(e);
return EnumNamesQuantizationDetails()[index];
Expand All @@ -792,6 +782,11 @@ template <> struct QuantizationDetailsTraits<circle::CustomQuantization>
static const QuantizationDetails enum_value = QuantizationDetails_CustomQuantization;
};

template <> struct QuantizationDetailsTraits<circle::CircleBlockQuantization>
{
static const QuantizationDetails enum_value = QuantizationDetails_CircleBlockQuantization;
};

template <typename T> struct QuantizationDetailsUnionTraits
{
static const QuantizationDetails enum_value = QuantizationDetails_NONE;
Expand All @@ -802,6 +797,11 @@ template <> struct QuantizationDetailsUnionTraits<circle::CustomQuantizationT>
static const QuantizationDetails enum_value = QuantizationDetails_CustomQuantization;
};

template <> struct QuantizationDetailsUnionTraits<circle::CircleBlockQuantizationT>
{
static const QuantizationDetails enum_value = QuantizationDetails_CircleBlockQuantization;
};

struct QuantizationDetailsUnion
{
QuantizationDetails type;
Expand Down Expand Up @@ -862,6 +862,18 @@ struct QuantizationDetailsUnion
? reinterpret_cast<const circle::CustomQuantizationT *>(value)
: nullptr;
}
circle::CircleBlockQuantizationT *AsCircleBlockQuantization()
{
return type == QuantizationDetails_CircleBlockQuantization
? reinterpret_cast<circle::CircleBlockQuantizationT *>(value)
: nullptr;
}
const circle::CircleBlockQuantizationT *AsCircleBlockQuantization() const
{
return type == QuantizationDetails_CircleBlockQuantization
? reinterpret_cast<const circle::CircleBlockQuantizationT *>(value)
: nullptr;
}
};

bool VerifyQuantizationDetails(::flatbuffers::Verifier &verifier, const void *obj,
Expand Down Expand Up @@ -6240,6 +6252,81 @@ ::flatbuffers::Offset<CustomQuantization>
CreateCustomQuantization(::flatbuffers::FlatBufferBuilder &_fbb, const CustomQuantizationT *_o,
const ::flatbuffers::rehasher_function_t *_rehasher = nullptr);

struct CircleBlockQuantizationT : public ::flatbuffers::NativeTable
{
typedef CircleBlockQuantization TableType;
std::string name{};
};

struct CircleBlockQuantization FLATBUFFERS_FINAL_CLASS : private ::flatbuffers::Table
{
typedef CircleBlockQuantizationT NativeTableType;
typedef CircleBlockQuantizationBuilder Builder;
enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE
{
VT_NAME = 4
};
const ::flatbuffers::String *name() const
{
return GetPointer<const ::flatbuffers::String *>(VT_NAME);
}
bool Verify(::flatbuffers::Verifier &verifier) const
{
return VerifyTableStart(verifier) && VerifyOffset(verifier, VT_NAME) &&
verifier.VerifyString(name()) && verifier.EndTable();
}
CircleBlockQuantizationT *
UnPack(const ::flatbuffers::resolver_function_t *_resolver = nullptr) const;
void UnPackTo(CircleBlockQuantizationT *_o,
const ::flatbuffers::resolver_function_t *_resolver = nullptr) const;
static ::flatbuffers::Offset<CircleBlockQuantization>
Pack(::flatbuffers::FlatBufferBuilder &_fbb, const CircleBlockQuantizationT *_o,
const ::flatbuffers::rehasher_function_t *_rehasher = nullptr);
};

struct CircleBlockQuantizationBuilder
{
typedef CircleBlockQuantization Table;
::flatbuffers::FlatBufferBuilder &fbb_;
::flatbuffers::uoffset_t start_;
void add_name(::flatbuffers::Offset<::flatbuffers::String> name)
{
fbb_.AddOffset(CircleBlockQuantization::VT_NAME, name);
}
explicit CircleBlockQuantizationBuilder(::flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb)
{
start_ = fbb_.StartTable();
}
::flatbuffers::Offset<CircleBlockQuantization> Finish()
{
const auto end = fbb_.EndTable(start_);
auto o = ::flatbuffers::Offset<CircleBlockQuantization>(end);
return o;
}
};

inline ::flatbuffers::Offset<CircleBlockQuantization>
CreateCircleBlockQuantization(::flatbuffers::FlatBufferBuilder &_fbb,
::flatbuffers::Offset<::flatbuffers::String> name = 0)
{
CircleBlockQuantizationBuilder builder_(_fbb);
builder_.add_name(name);
return builder_.Finish();
}

inline ::flatbuffers::Offset<CircleBlockQuantization>
CreateCircleBlockQuantizationDirect(::flatbuffers::FlatBufferBuilder &_fbb,
const char *name = nullptr)
{
auto name__ = name ? _fbb.CreateString(name) : 0;
return circle::CreateCircleBlockQuantization(_fbb, name__);
}

::flatbuffers::Offset<CircleBlockQuantization>
CreateCircleBlockQuantization(::flatbuffers::FlatBufferBuilder &_fbb,
const CircleBlockQuantizationT *_o,
const ::flatbuffers::rehasher_function_t *_rehasher = nullptr);

struct QuantizationParametersT : public ::flatbuffers::NativeTable
{
typedef QuantizationParameters TableType;
Expand Down Expand Up @@ -6293,6 +6380,12 @@ struct QuantizationParameters FLATBUFFERS_FINAL_CLASS : private ::flatbuffers::T
? static_cast<const circle::CustomQuantization *>(details())
: nullptr;
}
const circle::CircleBlockQuantization *details_as_CircleBlockQuantization() const
{
return details_type() == circle::QuantizationDetails_CircleBlockQuantization
? static_cast<const circle::CircleBlockQuantization *>(details())
: nullptr;
}
int32_t quantized_dimension() const { return GetField<int32_t>(VT_QUANTIZED_DIMENSION, 0); }
bool Verify(::flatbuffers::Verifier &verifier) const
{
Expand Down Expand Up @@ -6322,6 +6415,13 @@ QuantizationParameters::details_as<circle::CustomQuantization>() const
return details_as_CustomQuantization();
}

template <>
inline const circle::CircleBlockQuantization *
QuantizationParameters::details_as<circle::CircleBlockQuantization>() const
{
return details_as_CircleBlockQuantization();
}

struct QuantizationParametersBuilder
{
typedef QuantizationParameters Table;
Expand Down Expand Up @@ -20734,6 +20834,53 @@ CreateCustomQuantization(::flatbuffers::FlatBufferBuilder &_fbb, const CustomQua
return circle::CreateCustomQuantization(_fbb, _custom);
}

inline CircleBlockQuantizationT *
CircleBlockQuantization::UnPack(const ::flatbuffers::resolver_function_t *_resolver) const
{
auto _o = std::unique_ptr<CircleBlockQuantizationT>(new CircleBlockQuantizationT());
UnPackTo(_o.get(), _resolver);
return _o.release();
}

inline void
CircleBlockQuantization::UnPackTo(CircleBlockQuantizationT *_o,
const ::flatbuffers::resolver_function_t *_resolver) const
{
(void)_o;
(void)_resolver;
{
auto _e = name();
if (_e)
_o->name = _e->str();
}
}

inline ::flatbuffers::Offset<CircleBlockQuantization>
CircleBlockQuantization::Pack(::flatbuffers::FlatBufferBuilder &_fbb,
const CircleBlockQuantizationT *_o,
const ::flatbuffers::rehasher_function_t *_rehasher)
{
return CreateCircleBlockQuantization(_fbb, _o, _rehasher);
}

inline ::flatbuffers::Offset<CircleBlockQuantization>
CreateCircleBlockQuantization(::flatbuffers::FlatBufferBuilder &_fbb,
const CircleBlockQuantizationT *_o,
const ::flatbuffers::rehasher_function_t *_rehasher)
{
(void)_rehasher;
(void)_o;
struct _VectorArgs
{
::flatbuffers::FlatBufferBuilder *__fbb;
const CircleBlockQuantizationT *__o;
const ::flatbuffers::rehasher_function_t *__rehasher;
} _va = {&_fbb, _o, _rehasher};
(void)_va;
auto _name = _o->name.empty() ? 0 : _fbb.CreateString(_o->name);
return circle::CreateCircleBlockQuantization(_fbb, _name);
}

inline QuantizationParametersT *
QuantizationParameters::UnPack(const ::flatbuffers::resolver_function_t *_resolver) const
{
Expand Down Expand Up @@ -29893,6 +30040,11 @@ inline bool VerifyQuantizationDetails(::flatbuffers::Verifier &verifier, const v
auto ptr = reinterpret_cast<const circle::CustomQuantization *>(obj);
return verifier.VerifyTable(ptr);
}
case QuantizationDetails_CircleBlockQuantization:
{
auto ptr = reinterpret_cast<const circle::CircleBlockQuantization *>(obj);
return verifier.VerifyTable(ptr);
}
default:
return true;
}
Expand Down Expand Up @@ -29929,6 +30081,11 @@ inline void *QuantizationDetailsUnion::UnPack(const void *obj, QuantizationDetai
auto ptr = reinterpret_cast<const circle::CustomQuantization *>(obj);
return ptr->UnPack(resolver);
}
case QuantizationDetails_CircleBlockQuantization:
{
auto ptr = reinterpret_cast<const circle::CircleBlockQuantization *>(obj);
return ptr->UnPack(resolver);
}
default:
return nullptr;
}
Expand All @@ -29946,6 +30103,11 @@ QuantizationDetailsUnion::Pack(::flatbuffers::FlatBufferBuilder &_fbb,
auto ptr = reinterpret_cast<const circle::CustomQuantizationT *>(value);
return CreateCustomQuantization(_fbb, ptr, _rehasher).Union();
}
case QuantizationDetails_CircleBlockQuantization:
{
auto ptr = reinterpret_cast<const circle::CircleBlockQuantizationT *>(value);
return CreateCircleBlockQuantization(_fbb, ptr, _rehasher).Union();
}
default:
return 0;
}
Expand All @@ -29962,6 +30124,12 @@ inline QuantizationDetailsUnion::QuantizationDetailsUnion(const QuantizationDeta
new circle::CustomQuantizationT(*reinterpret_cast<circle::CustomQuantizationT *>(u.value));
break;
}
case QuantizationDetails_CircleBlockQuantization:
{
value = new circle::CircleBlockQuantizationT(
*reinterpret_cast<circle::CircleBlockQuantizationT *>(u.value));
break;
}
default:
break;
}
Expand All @@ -29977,6 +30145,12 @@ inline void QuantizationDetailsUnion::Reset()
delete ptr;
break;
}
case QuantizationDetails_CircleBlockQuantization:
{
auto ptr = reinterpret_cast<circle::CircleBlockQuantizationT *>(value);
delete ptr;
break;
}
default:
break;
}
Expand Down
2 changes: 2 additions & 0 deletions runtime/onert/core/include/ir/DataType.h
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,8 @@ enum class DataType
QUANT_INT16_ASYMM = 10,
QUANT_INT8_SYMM_PER_CHANNEL = 11,
QUANT_INT16_SYMM = 12,
QUANT_UINT4_SYMM_PER_BLOCK = 13, // Symmetric by substracting 8 always
QUANT_INT8_SYMM_PER_BLOCK = 14
};

size_t sizeOfDataType(DataType data_type);
Expand Down
2 changes: 1 addition & 1 deletion runtime/onert/core/include/ir/OperandInfo.h
Original file line number Diff line number Diff line change
Expand Up @@ -120,7 +120,7 @@ class OperandInfo
* @brief Return size of tensor (bytes)
* @return Tensor size
*/
size_t total_size() const { return _shape.num_elements() * sizeOfDataType(_typeInfo.type()); }
size_t total_size() const;

MemAllocType memAllocType() const { return _alloc_type; }
void setAsConstant() { _const = true; }
Expand Down
3 changes: 3 additions & 0 deletions runtime/onert/core/src/ir/DataType.cc
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,9 @@ size_t sizeOfDataType(DataType data_type)
return sizeof(int16_t);
case DataType::QUANT_INT16_SYMM:
return sizeof(int16_t);
// Chunk type size is not supported
// case DataType::QUANT_UINT4_SYMM_PER_BLOCK:
// case DataType::QUANT_INT8_SYMM_PER_BLOCK
default:
throw std::runtime_error{"Unsupported type size"};
}
Expand Down
Loading
Loading