Skip to content

Commit

Permalink
Create full text index on non-varchar column, no error raised (#704)
Browse files Browse the repository at this point in the history
### What problem does this PR solve?

Create full text index on non-varchar column will trigger error
response.

### Type of change

- [x] Bug Fix (non-breaking change which fixes an issue)

---------

Signed-off-by: jinhai <[email protected]>
  • Loading branch information
JinHai-CN authored Mar 5, 2024
1 parent ebba3d5 commit 81da91e
Show file tree
Hide file tree
Showing 8 changed files with 50 additions and 21 deletions.
32 changes: 21 additions & 11 deletions python/test/test_index.py
Original file line number Diff line number Diff line change
Expand Up @@ -205,7 +205,7 @@ def test_create_drop_different_fulltext_index_invalid_options(self, column_name,
"c1": types}, None)

index_info = [index.IndexInfo(column_name[0], index_type[0], params[0])]
if not column_name[1] or not index_type[1] or not params[1]:
if types != "varchar" or not column_name[1] or not index_type[1] or not params[1]:
with pytest.raises(Exception):
table_obj.create_index("my_index", index_info, None)
else:
Expand Down Expand Up @@ -283,8 +283,9 @@ def test_drop_index_show_index(self):
# create index on different type of column and show index
@pytest.mark.parametrize("types", ["vector, 3, float"])
@pytest.mark.parametrize("index_type", [
(index.IndexType.Hnsw, False), (index.IndexType.IVFFlat,
True), (index.IndexType.FullText, True)
(index.IndexType.Hnsw, False, "ERROR:3061*"),
(index.IndexType.IVFFlat, True),
(index.IndexType.FullText, False, "ERROR:3009*")
])
def test_create_index_on_different_type_of_column(self, types, index_type):
# connect
Expand All @@ -296,7 +297,7 @@ def test_create_index_on_different_type_of_column(self, types, index_type):
"c1": types}, None)
# create created index
if not index_type[1]:
with pytest.raises(Exception, match="ERROR:3061*"):
with pytest.raises(Exception, match=index_type[2]):
table_obj.create_index("my_index",
[index.IndexInfo("c1",
index_type[0],
Expand Down Expand Up @@ -340,7 +341,8 @@ def test_insert_data_create_index(self, index_type):
assert res.error_code == ErrorCode.OK

@pytest.mark.parametrize("index_type", [
index.IndexType.IVFFlat, index.IndexType.FullText
(index.IndexType.IVFFlat, True),
(index.IndexType.FullText, False, "ERROR:3009*")
])
@pytest.mark.parametrize("file_format", ["csv"])
def test_import_data_create_index(self, index_type, file_format):
Expand All @@ -354,12 +356,20 @@ def test_import_data_create_index(self, index_type, file_format):

table_obj.import_data(os.getcwd() + TEST_DATA_DIR +
file_format + "/pysdk_test." + file_format)
res = table_obj.create_index("my_index",
[index.IndexInfo("c2",
index_type,
[index.InitParameter("centroids_count", "128"),
index.InitParameter("metric", "l2")])], None)
assert res.error_code == ErrorCode.OK
if (index_type[1]):
res = table_obj.create_index("my_index",
[index.IndexInfo("c2",
index_type[0],
[index.InitParameter("centroids_count", "128"),
index.InitParameter("metric", "l2")])], None)
assert res.error_code == ErrorCode.OK
else:
with pytest.raises(Exception, match=index_type[2]):
table_obj.create_index("my_index",
[index.IndexInfo("c2",
index_type[0],
[index.InitParameter("centroids_count", "128"),
index.InitParameter("metric", "l2")])], None)

# disconnect
res = infinity_obj.disconnect()
Expand Down
1 change: 1 addition & 0 deletions src/planner/logical_planner.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -554,6 +554,7 @@ Status LogicalPlanner::BuildCreateIndex(const CreateStatement *statement, Shared
SharedPtr<IndexBase> base_index_ptr{nullptr};
switch (index_info->index_type_) {
case IndexType::kFullText: {
IndexFullText::ValidateColumnDataType(base_table_ref, index_info->column_name_);
base_index_ptr = IndexFullText::Make(index_name,
fmt::format("{}_{}", create_index_info->table_name_, *index_name),
{index_info->column_name_},
Expand Down
2 changes: 1 addition & 1 deletion src/storage/bg_task/periodic_trigger.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ namespace infinity {
void CleanupPeriodicTrigger::Trigger() {
TxnTimeStamp visible_ts = txn_mgr_->GetMinUncommitTs();
if (visible_ts == last_visible_ts_) {
LOG_TRACE(fmt::format("No need to cleanup visible timestamp: {}", visible_ts));
// LOG_TRACE(fmt::format("No need to cleanup visible timestamp: {}", visible_ts));
return;
} else if (visible_ts < last_visible_ts_) {
UnrecoverableException("The visible timestamp is not monotonic.");
Expand Down
24 changes: 19 additions & 5 deletions src/storage/definition/index_full_text.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -17,15 +17,17 @@ module;
#include <string>
#include <vector>

module index_full_text;

import stl;
import index_base;
import third_party;

import status;
import serialize;
import infinity_exception;
import statement_common;

module index_full_text;
import base_table_ref;
import logical_type;

namespace infinity {

Expand Down Expand Up @@ -78,15 +80,15 @@ void IndexFullText::WriteAdv(char *&ptr) const {
WriteBufAdv(ptr, is_homebrewed);
}

SharedPtr<IndexBase> IndexFullText::ReadAdv(char *&, int32_t ) {
SharedPtr<IndexBase> IndexFullText::ReadAdv(char *&, int32_t) {
UnrecoverableError("Not implemented");
return nullptr;
}

String IndexFullText::ToString() const {
std::stringstream ss;
String output_str = IndexBase::ToString();
if(!analyzer_.empty()) {
if (!analyzer_.empty()) {
output_str += ", " + analyzer_;
}
return output_str;
Expand All @@ -104,4 +106,16 @@ SharedPtr<IndexFullText> IndexFullText::Deserialize(const nlohmann::json &) {
return nullptr;
}

void IndexFullText::ValidateColumnDataType(const SharedPtr<BaseTableRef> &base_table_ref, const String &column_name) {
auto &column_names_vector = *(base_table_ref->column_names_);
auto &column_types_vector = *(base_table_ref->column_types_);
SizeT column_id = std::find(column_names_vector.begin(), column_names_vector.end(), column_name) - column_names_vector.begin();
if (column_id == column_names_vector.size()) {
RecoverableError(Status::ColumnNotExist(column_name));
} else if (auto &data_type = column_types_vector[column_id]; data_type->type() != LogicalType::kVarchar) {
RecoverableError(Status::InvalidIndexDefinition(
fmt::format("Attempt to create full-text index on column: {}, data type: {}.", column_name, data_type->ToString())));
}
}

} // namespace infinity
6 changes: 5 additions & 1 deletion src/storage/definition/index_full_text.cppm
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ import third_party;
import index_base;
import create_index_info;
import statement_common;
import base_table_ref;

namespace infinity {

Expand All @@ -32,7 +33,7 @@ public:
Make(SharedPtr<String> index_name, const String &file_name, Vector<String> column_names, const Vector<InitParameter *> &index_param_list);

IndexFullText(SharedPtr<String> index_name, const String &file_name, Vector<String> column_names, const String &analyzer, bool homebrewed)
: IndexBase(IndexType::kFullText, index_name, file_name, std::move(column_names)), analyzer_(analyzer), homebrewed_(homebrewed) {}
: IndexBase(IndexType::kFullText, index_name, file_name, std::move(column_names)), analyzer_(analyzer), homebrewed_(homebrewed) {};

~IndexFullText() final = default;

Expand All @@ -53,6 +54,9 @@ public:

static SharedPtr<IndexFullText> Deserialize(const nlohmann::json &index_def_json);

public:
static void ValidateColumnDataType(const SharedPtr<BaseTableRef> &base_table_ref, const String &column_name);

public:
String analyzer_{};
bool homebrewed_{false};
Expand Down
2 changes: 1 addition & 1 deletion src/storage/definition/index_hnsw.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -134,7 +134,7 @@ void IndexHnsw::ValidateColumnDataType(const SharedPtr<BaseTableRef> &base_table
RecoverableError(Status::ColumnNotExist(column_name));
} else if (auto &data_type = column_types_vector[column_id]; data_type->type() != LogicalType::kEmbedding) {
RecoverableError(Status::InvalidIndexDefinition(
fmt::format("Invalid parameter for Hnsw index: column name: {}, data type not supported: {}.", column_name, data_type->ToString())));
fmt::format("Attempt to create HNSW index on column: {}, data type: {}.", column_name, data_type->ToString())));
}
}

Expand Down
2 changes: 1 addition & 1 deletion src/storage/definition/index_ivfflat.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -103,7 +103,7 @@ void IndexIVFFlat::ValidateColumnDataType(const SharedPtr<BaseTableRef> &base_ta
RecoverableError(Status::ColumnNotExist(column_name));
} else if (auto &data_type = column_types_vector[column_id]; data_type->type() != LogicalType::kEmbedding) {
RecoverableError(Status::InvalidIndexDefinition(
fmt::format("Invalid parameter for IVFFlat index: column name: {}, data type not supported: {}.", column_name, data_type->ToString())));
fmt::format("Attempt to create IVFFLAT index on column: {}, data type: {}.", column_name, data_type->ToString())));
}
}

Expand Down
2 changes: 1 addition & 1 deletion src/storage/definition/index_secondary.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ void IndexSecondary::ValidateColumnDataType(const SharedPtr<BaseTableRef> &base_
RecoverableError(Status::ColumnNotExist(column_name));
} else if (auto &data_type = column_types_vector[column_id]; !(data_type->CanBuildSecondaryIndex())) {
RecoverableError(Status::InvalidIndexDefinition(
fmt::format("Invalid parameter for secondary index: column name: {}, data type not supported: {}.", column_name, data_type->ToString())));
fmt::format("Attempt to create index on column: {}, data type: {}.", column_name, data_type->ToString())));
}
}

Expand Down

0 comments on commit 81da91e

Please sign in to comment.