[onert] Support block quantization operand size calculation

This commit update total_size() method to calculate block quantization type operand size. ONE-DCO-1.0-Signed-off-by: Hyeongseok Oh <[email protected]>
Samsung · Sep 2, 2024 · 3fd091e · 3fd091e
1 parent dc0a540
commit 3fd091e
Show file tree

Hide file tree

Showing 4 changed files with 110 additions and 1 deletion.
diff --git a/runtime/onert/core/include/ir/OperandInfo.h b/runtime/onert/core/include/ir/OperandInfo.h
@@ -120,7 +120,7 @@ class OperandInfo
    * @brief   Return size of tensor (bytes)
    * @return  Tensor size
    */
-  size_t total_size() const { return _shape.num_elements() * sizeOfDataType(_typeInfo.type()); }
+  size_t total_size() const;
 
   MemAllocType memAllocType() const { return _alloc_type; }
   void setAsConstant() { _const = true; }

diff --git a/runtime/onert/core/src/ir/DataType.cc b/runtime/onert/core/src/ir/DataType.cc
@@ -53,6 +53,7 @@ size_t sizeOfDataType(DataType data_type)
     case DataType::QUANT_INT16_SYMM:
       return sizeof(int16_t);
     default:
+      // ggml block quantize type data size is not supported
       throw std::runtime_error{"Unsupported type size"};
   }
 }

diff --git a/runtime/onert/core/src/ir/OperandInfo.cc b/runtime/onert/core/src/ir/OperandInfo.cc
@@ -0,0 +1,53 @@
+/*
+ * Copyright (c) 2024 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "ir/OperandInfo.h"
+
+#include <cassert>
+
+namespace onert
+{
+namespace ir
+{
+
+size_t OperandInfo::total_size() const
+{
+  const auto data_type = _typeInfo.type();
+  try
+  {
+    return _shape.num_elements() * sizeOfDataType(data_type);
+  }
+  catch (const std::runtime_error &e)
+  {
+    // Caclulate total size for ggml block quantization type on exception handling
+    // because it is rare case and we should care about performance on non-block case.
+    if (data_type != DataType::QUANT_GGML_Q4_0 && data_type != DataType::QUANT_GGML_Q8_0)
+      throw e;
+
+    if (_shape.dim(_shape.rank() - 1) % 32 != 0)
+      throw std::runtime_error{
+        "Block quantization requires the last dimension to be a multiple of 32"};
+
+    const auto num_blocks = _shape.num_elements() / 32;
+    const auto block_size = data_type == DataType::QUANT_GGML_Q4_0
+                              ? (sizeof(uint8_t) * 32 / 2 + sizeof(uint16_t))
+                              : (sizeof(uint8_t) * 32 + sizeof(uint16_t));
+    return num_blocks * block_size;
+  }
+}
+
+} // namespace ir
+} // namespace onert
diff --git a/runtime/onert/core/src/ir/OperandInfo.test.cc b/runtime/onert/core/src/ir/OperandInfo.test.cc
@@ -0,0 +1,55 @@
+/*
+ * Copyright (c) 2024 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "ir/OperandInfo.h"
+
+#include <gtest/gtest.h>
+
+using namespace onert::ir;
+
+TEST(ir_OperandInfo, total_size)
+{
+  auto info = OperandInfo::createStaticInfo(Shape{1, 2, 3}, TypeInfo{DataType::FLOAT32});
+  EXPECT_EQ(info.total_size(), 24);
+
+  info = OperandInfo::createStaticInfo(Shape{1, 2, 3}, TypeInfo{DataType::QUANT_INT8_SYMM});
+  EXPECT_EQ(info.total_size(), 6);
+
+  // Block quantization type operand
+  info = OperandInfo::createStaticInfo(Shape{1, 4, 32}, TypeInfo{DataType::QUANT_GGML_Q4_0});
+  EXPECT_EQ(info.total_size(), 18 * 4);
+}
+
+// Unsupported type
+TEST(ir_OperandInfo, neg_total_size_type)
+{
+  auto info = OperandInfo::createStaticInfo(Shape{1, 2, 3}, TypeInfo{DataType{-1}});
+  EXPECT_THROW(info.total_size(), std::runtime_error);
+}
+
+// Unsupported shape
+TEST(ir_OperandInfo, neg_total_size_dimension)
+{
+  // Unspecified shape
+  auto info = OperandInfo::createStaticInfo(Shape{1, -1, 3}, TypeInfo{DataType::FLOAT32});
+  EXPECT_THROW(info.total_size(), std::runtime_error);
+
+  // Block quantization operand
+  info = OperandInfo::createStaticInfo(Shape{1, 2, 3}, TypeInfo{DataType::QUANT_GGML_Q4_0});
+  EXPECT_THROW(info.total_size(), std::runtime_error);
+  info = OperandInfo::createStaticInfo(Shape{1, 2, 5}, TypeInfo{DataType::QUANT_GGML_Q8_0});
+  EXPECT_THROW(info.total_size(), std::runtime_error);
+}