Skip to content

Commit

Permalink
Merge pull request #45 from myscale/sync/myscaledb-oss
Browse files Browse the repository at this point in the history
MyscaleDB 1.8.0 Release
  • Loading branch information
feixue1121 authored Oct 21, 2024
2 parents a4b3aa9 + 1097f33 commit 5b7f4f8
Show file tree
Hide file tree
Showing 55 changed files with 3,176 additions and 1,001 deletions.
4 changes: 2 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,7 @@ The simplest way to use MyScaleDB is to create an instance on MyScale Cloud serv
To quickly get a MyScaleDB instance up and running, simply pull and run the latest Docker image:

```bash
docker run --name myscaledb --net=host myscale/myscaledb:1.7.1
docker run --name myscaledb --net=host myscale/myscaledb:1.8.0
```

>Note: Myscale's default configuration only allows localhost ip access. For the docker run startup method, you need to specify `--net=host` to access services deployed in docker mode on the current node.
Expand Down Expand Up @@ -114,7 +114,7 @@ version: '3.7'

services:
myscaledb:
image: myscale/myscaledb:1.7.1
image: myscale/myscaledb:1.8.0
tty: true
ports:
- '8123:8123'
Expand Down
8 changes: 4 additions & 4 deletions cmake/autogenerated_myscale_versions.txt
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,9 @@
# NOTE: has nothing common with DBMS_TCP_PROTOCOL_VERSION,
# only DBMS_TCP_PROTOCOL_VERSION should be incremented on protocol changes.
SET(MYSCALE_VERSION_MAJOR 1)
SET(MYSCALE_VERSION_MINOR 7)
SET(MYSCALE_VERSION_PATCH 1)
SET(MYSCALE_VERSION_DESCRIBE myscale-v1.7.1)
SET(MYSCALE_VERSION_STRING 1.7.1)
SET(MYSCALE_VERSION_MINOR 8)
SET(MYSCALE_VERSION_PATCH 0)
SET(MYSCALE_VERSION_DESCRIBE myscale-v1.8.0)
SET(MYSCALE_VERSION_STRING 1.8.0)
# end of autochange

4 changes: 4 additions & 0 deletions rust/supercrate/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,10 @@ target_include_directories(supercrate_cxxbridge
${cxx_include}
)

#-Wno-dollar-in-identifier-extension -Wno-unused-macros
target_compile_options(supercrate_cxxbridge PUBLIC -Wno-dollar-in-identifier-extension)
target_compile_options(supercrate_cxxbridge PUBLIC -Wno-unused-macros)

# Create total target with alias with given namespace
add_library(supercrate-total INTERFACE)
target_link_libraries(supercrate-total
Expand Down
2 changes: 2 additions & 0 deletions src/Core/Settings.h
Original file line number Diff line number Diff line change
Expand Up @@ -922,6 +922,8 @@ class IColumn;
M(Bool, optimize_prefilter_in_search, true, "Enable prewhere optimization for vector or text search if some partition columns in prewhere condition.", 0) \
M(UInt64, max_search_result_window, 10000, "The maximum value of n + m in limit clause for pagination in vector/text/hybrid search", 0) \
M(Bool, dfs_query_then_fetch, false, "Enable Distributed Frequency Search (DFS) query to gather global statistical info for accurate BM25 calculation.", 0) \
M(UInt64, distances_top_k_multiply_factor, 3, "Multiply k in limit by this factor for the top_k in multiple distance functions", 0) \
M(UInt64, parallel_reading_prefilter_option, 1, "Control parallel reading prefilter options for vector/text/hybrid search in SELECT queries with where clause. 0 - disable, 1 - adaptive enable depending on mark ranges and row count. 2 - always enable.", 0) \
// End of COMMON_SETTINGS
// Please add settings related to formats into the FORMAT_FACTORY_SETTINGS and move obsolete settings to OBSOLETE_SETTINGS.

Expand Down
12 changes: 6 additions & 6 deletions src/Interpreters/Context.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4151,19 +4151,19 @@ ReadSettings Context::getReadSettings() const
return res;
}

std::optional<VSDescription> Context::getVecScanDescription() const
MutableVSDescriptionsPtr Context::getVecScanDescriptions() const
{
return vector_scan_description;
return right_vector_scan_descs;
}

void Context::setVecScanDescription(VSDescription & vec_scan_desc) const
void Context::setVecScanDescriptions(MutableVSDescriptionsPtr vec_scan_descs) const
{
vector_scan_description = vec_scan_desc;
right_vector_scan_descs = vec_scan_descs;
}

void Context::resetVecScanDescription() const
void Context::resetVecScanDescriptions() const
{
vector_scan_description.reset();
right_vector_scan_descs.reset();
}

TextSearchInfoPtr Context::getTextSearchInfo() const
Expand Down
8 changes: 4 additions & 4 deletions src/Interpreters/Context.h
Original file line number Diff line number Diff line change
Expand Up @@ -408,7 +408,7 @@ class Context: public std::enable_shared_from_this<Context>

/// TODO: will be enhanced similar as scalars.
/// Used when vector scan func exists in right joined table
mutable std::optional<VSDescription> vector_scan_description;
mutable MutableVSDescriptionsPtr right_vector_scan_descs;

mutable TextSearchInfoPtr right_text_search_info;
mutable HybridSearchInfoPtr right_hybrid_search_info;
Expand Down Expand Up @@ -1160,9 +1160,9 @@ class Context: public std::enable_shared_from_this<Context>
ParallelReplicasMode getParallelReplicasMode() const;

/// Used for vector scan functions
std::optional<VSDescription> getVecScanDescription() const;
void setVecScanDescription(VSDescription & vec_scan_desc) const;
void resetVecScanDescription() const;
MutableVSDescriptionsPtr getVecScanDescriptions() const;
void setVecScanDescriptions(MutableVSDescriptionsPtr vec_scan_descs) const;
void resetVecScanDescriptions() const;

/// Used for text search functions
TextSearchInfoPtr getTextSearchInfo() const;
Expand Down
76 changes: 49 additions & 27 deletions src/Interpreters/ExpressionAnalyzer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,7 @@
#include <VectorIndex/Interpreters/GetHybridSearchVisitor.h>
#include <VectorIndex/Interpreters/parseVSParameters.h>
#include <VectorIndex/Utils/VIUtils.h>
#include <VectorIndex/Utils/HybridSearchUtils.h>
#include <AggregateFunctions/parseAggregateFunctionParameters.h>

namespace DB
Expand Down Expand Up @@ -143,10 +144,15 @@ inline void checkTantivyIndex([[maybe_unused]]const StorageSnapshotPtr & storage
for (const auto & index_desc : metadata_snapshot->getSecondaryIndices())
{
/// Find tantivy inverted index on the search column
if (index_desc.type == TANTIVY_INDEX_NAME && index_desc.column_names.size() == 1 && index_desc.column_names[0] == text_column_name)
if (index_desc.type == TANTIVY_INDEX_NAME)
{
find_tantivy_index = true;
break;
auto & column_names = index_desc.column_names;
/// Support search on a column in a multi-columns index
if (std::find(column_names.begin(), column_names.end(), text_column_name) != column_names.end())
{
find_tantivy_index = true;
break;
}
}
}
}
Expand Down Expand Up @@ -561,20 +567,22 @@ void ExpressionAnalyzer::analyzeVectorScan(ActionsDAGPtr & temp_actions)
{
if (syntax->search_func_type == HybridSearchFuncType::VECTOR_SCAN && !syntax->hybrid_search_funcs.empty())
has_vector_scan = makeVectorScanDescriptions(temp_actions);
else if (auto vec_scan_desc = getContext()->getVecScanDescription())
else if (auto vec_scan_descs = getContext()->getVecScanDescriptions())
{
if (syntax->storage_snapshot)
{
LOG_DEBUG(getLogger(), "[analyzeVectorScan] Get vector scan function from right table");
/// vector search column exists in right joined table
vector_scan_descriptions.emplace_back(*vec_scan_desc);
vector_scan_descriptions = *vec_scan_descs;
has_vector_scan = true;
}
}
/// Fill in dim and recognize VectorSearchType from metadata
if (has_vector_scan)
{
getAndCheckVectorScanInfoFromMetadata(syntax->storage_snapshot, vector_scan_descriptions[0], getContext());
/// Support multiple distance functions
for (auto & vector_scan_desc : vector_scan_descriptions)
getAndCheckVectorScanInfoFromMetadata(syntax->storage_snapshot, vector_scan_desc, getContext());
}
}

Expand Down Expand Up @@ -620,7 +628,7 @@ void ExpressionAnalyzer::analyzeHybridSearch(ActionsDAGPtr & temp_actions)
if (!syntax->is_remote_storage && hybrid_search_info->text_search_info)
checkTantivyIndex(syntax->storage_snapshot, hybrid_search_info->text_search_info->text_column_name);

/// Get vector search type and dim from metadata, check paramaters in vector scan and add to vector_paramters
/// Get vector search type and dim from metadata, check paramaters in vector scan and add to vector_parameters
VSDescription & vec_scan_desc =
const_cast<VSDescription &>(hybrid_search_info->vector_scan_info->vector_scan_descs[0]);

Expand Down Expand Up @@ -877,7 +885,9 @@ VSDescription ExpressionAnalyzer::commonMakeVectorScanDescription(
const String & function_col_name,
ASTPtr query_column,
ASTPtr query_vector,
int topk)
int topk,
String vector_scan_metric_type,
Search::DataType vector_search_type)
{
VSDescription vector_scan_desc;
vector_scan_desc.column_name = function_col_name;
Expand Down Expand Up @@ -941,45 +951,52 @@ VSDescription ExpressionAnalyzer::commonMakeVectorScanDescription(
vector_scan_desc.query_column_name);

/// vector search type from syntax result
vector_scan_desc.vector_search_type = syntax->vector_search_type;
vector_scan_desc.vector_search_type = vector_search_type;

/// top_k is get from limit N
vector_scan_desc.topk = topk;

/// Pass the correct direction to vector_scan_desc according to metric_type
vector_scan_desc.direction = Poco::toUpper(syntax->vector_scan_metric_type) == "IP" ? -1 : 1;
vector_scan_desc.direction = Poco::toUpper(vector_scan_metric_type) == "IP" ? -1 : 1;

return vector_scan_desc;
}

/// create vector scan descriptions, mainly record the column name and parameters
bool ExpressionAnalyzer::makeVectorScanDescriptions(ActionsDAGPtr & actions)
{
for (const ASTFunction * node : hybrid_search_funcs())
for (size_t i = 0; i < hybrid_search_funcs().size(); ++i)
{
if (node->arguments)
getRootActionsNoMakeSet(node->arguments, actions);
const ASTFunction * node = hybrid_search_funcs()[i];

const ASTs & arguments = node->arguments ? node->arguments->children : ASTs();

// arguments 0 indicates the vector name; arguments 1 indicates the specific vector content.
if (arguments.size() != 2)
{
throw Exception(ErrorCodes::BAD_ARGUMENTS,
"wrong argument number in distance function");
throw Exception(ErrorCodes::BAD_ARGUMENTS, "wrong argument number in distance function");
}

auto vector_scan_desc = commonMakeVectorScanDescription(actions, node->getColumnName(), arguments[0], arguments[1], static_cast<int>(syntax->limit_length));
getRootActionsNoMakeSet(node->arguments, actions);

auto vector_scan_desc = commonMakeVectorScanDescription(actions, node->getColumnName(), arguments[0], arguments[1],
static_cast<int>(syntax->limit_length), syntax->vector_scan_metric_types[i], syntax->vector_search_types[i]);

/// Save parameters, parse and check parameters will be done in analyzeVectorScan()
vector_scan_desc.parameters = (node->parameters) ? getAggregateFunctionParametersArray(node->parameters, "", getContext()) : Array();
LOG_DEBUG(getLogger(), "[makeVectorScanDescriptions] create vector scan function: {}", node->name);
LOG_DEBUG(getLogger(), "[makeVectorScanDescriptions] create vector scan function: {}, column name:{}", node->name, node->getColumnName());

if (syntax->hybrid_search_from_right_table)
{
analyzedJoin().setVecScanDescription(vector_scan_desc);
}
else
vector_scan_descriptions.push_back(vector_scan_desc);
vector_scan_descriptions.push_back(vector_scan_desc);
}

/// Support multiple distance functions
/// If vector columns are from right table, save the vector scan descriptions to analyzedJoin().
if (syntax->hybrid_search_from_right_table)
{
auto vector_scan_descs_ptr = std::make_shared<VSDescriptions>(vector_scan_descriptions);
analyzedJoin().setVecScanDescriptions(vector_scan_descs_ptr);

vector_scan_descriptions.clear();
}

return !vector_scan_descriptions.empty();
Expand Down Expand Up @@ -1209,7 +1226,8 @@ bool ExpressionAnalyzer::makeHybridSearchInfo(ActionsDAGPtr & actions)
/// make VSDescription for HybridSearchInfo
{
getRootActionsNoMakeSet(arguments[2], actions);
auto vector_scan_desc = commonMakeVectorScanDescription(actions, "distance_func", arguments[0], arguments[2], num_candidates);
auto vector_scan_desc = commonMakeVectorScanDescription(actions, "distance_func", arguments[0], arguments[2],
num_candidates, syntax->vector_scan_metric_types[0], syntax->vector_search_types[0]);

/// Save vector_scan_parameter to vector_scan_desc's parameters
if (!vector_scan_parameter.empty())
Expand Down Expand Up @@ -1738,10 +1756,10 @@ static std::unique_ptr<QueryPlan> buildJoinedPlan(
{
/// Add vector scan description to Context for subquery of joined table
bool has_vector_scan = false;
if (auto vec_scan_desc = analyzed_join.getVecScanDescription())
if (auto vec_scan_descs = analyzed_join.getVecScanDescriptions())
{
has_vector_scan = true;
context->setVecScanDescription(*vec_scan_desc);
context->setVecScanDescriptions(vec_scan_descs);
}

/// Add text search info to Context for subquery of joined table
Expand Down Expand Up @@ -1806,7 +1824,7 @@ static std::unique_ptr<QueryPlan> buildJoinedPlan(

/// Reset vector scan description
if (has_vector_scan)
context->resetVecScanDescription();
context->resetVecScanDescriptions();
else if (has_text_search)
context->resetTextSearchInfo();
else if (has_hybrid_search)
Expand Down Expand Up @@ -2351,6 +2369,10 @@ ActionsDAGPtr SelectQueryExpressionAnalyzer::appendProjectResult(ExpressionActio
{
String result_name = ast->getAliasOrColumnName();

/// Skip score_type column used for distributed hybrid search fusion
if (hasHybridSearch() && (result_name == SCORE_TYPE_COLUMN.name))
continue;

if (required_result_columns_set.empty() || required_result_columns_set.contains(result_name))
{
std::string source_name = ast->getColumnName();
Expand Down
4 changes: 3 additions & 1 deletion src/Interpreters/ExpressionAnalyzer.h
Original file line number Diff line number Diff line change
Expand Up @@ -241,7 +241,9 @@ class ExpressionAnalyzer : protected ExpressionAnalyzerData, private boost::nonc
const String & function_col_name,
ASTPtr query_column,
ASTPtr query_vector,
int topk);
int topk,
String vector_scan_metric_type,
Search::DataType vector_search_type);

void analyzeHybridSearch(ActionsDAGPtr & temp_actions);
bool makeHybridSearchInfo(ActionsDAGPtr & actions);
Expand Down
Loading

0 comments on commit 5b7f4f8

Please sign in to comment.