Skip to content

Commit

Permalink
Merge remote-tracking branch 'upstream/master' into data_codec_cleanup
Browse files Browse the repository at this point in the history
  • Loading branch information
shaoting-huang committed Jun 17, 2024
2 parents 08fde0a + f993b29 commit 0e5fe3a
Show file tree
Hide file tree
Showing 47 changed files with 1,135 additions and 484 deletions.
28 changes: 28 additions & 0 deletions internal/core/src/common/Types.h
Original file line number Diff line number Diff line change
Expand Up @@ -258,6 +258,34 @@ IsBinaryDataType(DataType data_type) {
return IsJsonDataType(data_type) || IsArrayDataType(data_type);
}

inline bool
IsPrimitiveType(proto::schema::DataType type) {
switch (type) {
case proto::schema::DataType::Bool:
case proto::schema::DataType::Int8:
case proto::schema::DataType::Int16:
case proto::schema::DataType::Int32:
case proto::schema::DataType::Int64:
case proto::schema::DataType::Float:
case proto::schema::DataType::Double:
case proto::schema::DataType::String:
case proto::schema::DataType::VarChar:
return true;
default:
return false;
}
}

inline bool
IsJsonType(proto::schema::DataType type) {
return type == proto::schema::DataType::JSON;
}

inline bool
IsArrayType(proto::schema::DataType type) {
return type == proto::schema::DataType::Array;
}

inline bool
IsBinaryVectorDataType(DataType data_type) {
return data_type == DataType::VECTOR_BINARY;
Expand Down
83 changes: 55 additions & 28 deletions internal/core/src/index/BitmapIndex.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,8 @@ namespace index {
template <typename T>
BitmapIndex<T>::BitmapIndex(
const storage::FileManagerContext& file_manager_context)
: is_built_(false) {
: is_built_(false),
schema_(file_manager_context.fieldDataMeta.field_schema) {
if (file_manager_context.Valid()) {
file_manager_ =
std::make_shared<storage::MemFileManagerImpl>(file_manager_context);
Expand All @@ -45,7 +46,9 @@ template <typename T>
BitmapIndex<T>::BitmapIndex(
const storage::FileManagerContext& file_manager_context,
std::shared_ptr<milvus_storage::Space> space)
: is_built_(false), data_(), space_(space) {
: is_built_(false),
schema_(file_manager_context.fieldDataMeta.field_schema),
space_(space) {
if (file_manager_context.Valid()) {
file_manager_ = std::make_shared<storage::MemFileManagerImpl>(
file_manager_context, space);
Expand All @@ -67,27 +70,7 @@ BitmapIndex<T>::Build(const Config& config) {
auto field_datas =
file_manager_->CacheRawDataToMemory(insert_files.value());

int total_num_rows = 0;
for (const auto& field_data : field_datas) {
total_num_rows += field_data->get_num_rows();
}
if (total_num_rows == 0) {
throw SegcoreError(DataIsEmpty,
"scalar bitmap index can not build null values");
}

total_num_rows_ = total_num_rows;

int64_t offset = 0;
for (const auto& data : field_datas) {
auto slice_row_num = data->get_num_rows();
for (size_t i = 0; i < slice_row_num; ++i) {
auto val = reinterpret_cast<const T*>(data->RawValue(i));
data_[*val].add(offset);
offset++;
}
}
is_built_ = true;
BuildWithFieldData(field_datas);
}

template <typename T>
Expand Down Expand Up @@ -144,6 +127,21 @@ BitmapIndex<T>::BuildV2(const Config& config) {
BuildWithFieldData(field_datas);
}

template <typename T>
void
BitmapIndex<T>::BuildPrimitiveField(
const std::vector<FieldDataPtr>& field_datas) {
int64_t offset = 0;
for (const auto& data : field_datas) {
auto slice_row_num = data->get_num_rows();
for (size_t i = 0; i < slice_row_num; ++i) {
auto val = reinterpret_cast<const T*>(data->RawValue(i));
data_[*val].add(offset);
offset++;
}
}
}

template <typename T>
void
BitmapIndex<T>::BuildWithFieldData(
Expand All @@ -158,17 +156,46 @@ BitmapIndex<T>::BuildWithFieldData(
}
total_num_rows_ = total_num_rows;

switch (schema_.data_type()) {
case proto::schema::DataType::Bool:
case proto::schema::DataType::Int8:
case proto::schema::DataType::Int16:
case proto::schema::DataType::Int32:
case proto::schema::DataType::Int64:
case proto::schema::DataType::Float:
case proto::schema::DataType::Double:
case proto::schema::DataType::String:
case proto::schema::DataType::VarChar:
BuildPrimitiveField(field_datas);
break;
case proto::schema::DataType::Array:
BuildArrayField(field_datas);
break;
default:
PanicInfo(
DataTypeInvalid,
fmt::format("Invalid data type: {} for build bitmap index",
proto::schema::DataType_Name(schema_.data_type())));
}
is_built_ = true;
}

template <typename T>
void
BitmapIndex<T>::BuildArrayField(const std::vector<FieldDataPtr>& field_datas) {
int64_t offset = 0;
for (const auto& data : field_datas) {
auto slice_row_num = data->get_num_rows();
for (size_t i = 0; i < slice_row_num; ++i) {
auto val = reinterpret_cast<const T*>(data->RawValue(i));
data_[*val].add(offset);
auto array =
reinterpret_cast<const milvus::Array*>(data->RawValue(i));
for (size_t j = 0; j < array->length(); ++j) {
auto val = array->template get_data<T>(j);
data_[val].add(offset);
}
offset++;
}
}

is_built_ = true;
}

template <typename T>
Expand Down Expand Up @@ -877,4 +904,4 @@ template class BitmapIndex<double>;
template class BitmapIndex<std::string>;

} // namespace index
} // namespace milvus
} // namespace milvus
26 changes: 12 additions & 14 deletions internal/core/src/index/BitmapIndex.h
Original file line number Diff line number Diff line change
Expand Up @@ -50,17 +50,6 @@ class BitmapIndex : public ScalarIndex<T> {
const storage::FileManagerContext& file_manager_context,
std::shared_ptr<milvus_storage::Space> space);

explicit BitmapIndex(
const std::shared_ptr<storage::MemFileManagerImpl>& file_manager)
: file_manager_(file_manager) {
}

explicit BitmapIndex(
const std::shared_ptr<storage::MemFileManagerImpl>& file_manager,
std::shared_ptr<milvus_storage::Space> space)
: file_manager_(file_manager), space_(space) {
}

~BitmapIndex() override = default;

BinarySet
Expand Down Expand Up @@ -117,6 +106,7 @@ class BitmapIndex : public ScalarIndex<T> {

BinarySet
Upload(const Config& config = {}) override;

BinarySet
UploadV2(const Config& config = {}) override;

Expand All @@ -125,6 +115,11 @@ class BitmapIndex : public ScalarIndex<T> {
return true;
}

void
LoadWithoutAssemble(const BinarySet& binary_set,
const Config& config) override;

public:
int64_t
Cardinality() {
if (build_mode_ == BitmapIndexBuildMode::ROARING) {
Expand All @@ -134,11 +129,13 @@ class BitmapIndex : public ScalarIndex<T> {
}
}

private:
void
LoadWithoutAssemble(const BinarySet& binary_set,
const Config& config) override;
BuildPrimitiveField(const std::vector<FieldDataPtr>& datas);

void
BuildArrayField(const std::vector<FieldDataPtr>& datas);

private:
size_t
GetIndexDataSize();

Expand Down Expand Up @@ -188,6 +185,7 @@ class BitmapIndex : public ScalarIndex<T> {
std::map<T, roaring::Roaring> data_;
std::map<T, TargetBitmap> bitsets_;
size_t total_num_rows_{0};
proto::schema::FieldSchema schema_;
std::shared_ptr<storage::MemFileManagerImpl> file_manager_;
std::shared_ptr<milvus_storage::Space> space_;
};
Expand Down
Loading

0 comments on commit 0e5fe3a

Please sign in to comment.