diff --git a/.github/workflows/slow_test.yml b/.github/workflows/slow_test.yml
index 6439887a4b..de45fa258d 100644
--- a/.github/workflows/slow_test.yml
+++ b/.github/workflows/slow_test.yml
@@ -126,7 +126,7 @@ jobs:
# Fix sanitizer: https://github.com/ClickHouse/ClickHouse/issues/64086
old_value=$(sudo sysctl -n vm.mmap_rnd_bits)
sudo sysctl -w vm.mmap_rnd_bits=28
- sudo docker exec ${TESTER_CONTAINER} bash -c "cd /infinity/ && rm -fr /var/infinity && export INF_DIRECTORY=`cat .tester_env` && echo INF_DIRECTORY=${INF_DIRECTORY} && python3 tools/run_cluster_test.py --infinity_path=cmake-build-debug/src/infinity --infinity_dir=${INF_DIRECTORY}"
+ sudo docker exec ${TESTER_CONTAINER} bash -c "cd /infinity/ && rm -fr /var/infinity && export INF_DIRECTORY=`cat .tester_env` && echo INF_DIRECTORY=${INF_DIRECTORY} && python3 tools/run_cluster_test.py --infinity_path=cmake-build-debug/src/infinity --infinity_dir=${INF_DIRECTORY} --minio_port=9005 --minio_console_port=9006"
sudo sysctl -w vm.mmap_rnd_bits=$old_value
- name: Collect thread sanitizer output in cluster test
diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml
index 5f839f347a..ebdade27b6 100644
--- a/.github/workflows/tests.yml
+++ b/.github/workflows/tests.yml
@@ -84,7 +84,7 @@ jobs:
id: run_cluster_test
run: |
sudo docker exec ${TESTER_CONTAINER} bash -c "rm -rf /root/.config/pip/pip.conf && cd /infinity/ && pip3 uninstall -y infinity-sdk infinity-embedded-sdk && cd python/infinity_sdk/ && pip3 install . -i https://pypi.tuna.tsinghua.edu.cn/simple --trusted-host tuna.tsinghua.edu.cn && cd ../.."
- sudo docker exec ${TESTER_CONTAINER} bash -c "cd /infinity/ && rm -fr /var/infinity && export INF_DIRECTORY=`cat .tester_env` && echo INF_DIRECTORY=${INF_DIRECTORY} && python3 tools/run_cluster_test.py --infinity_path=cmake-build-debug/src/infinity --infinity_dir=${INF_DIRECTORY}"
+ sudo docker exec ${TESTER_CONTAINER} bash -c "cd /infinity/ && rm -fr /var/infinity && export INF_DIRECTORY=`cat .tester_env` && echo INF_DIRECTORY=${INF_DIRECTORY} && python3 tools/run_cluster_test.py --infinity_path=cmake-build-debug/src/infinity --infinity_dir=${INF_DIRECTORY} --minio_port=9005 --minio_console_port=9006"
- name: Collect cluster test output
if: ${{ !cancelled() }}
@@ -123,7 +123,7 @@ jobs:
MINIO_DIR=~/minio_data_$(od -An -N4 -tx4 /dev/urandom | tr -d ' ')
echo "MINIO_CONTAINER=${MINIO_CONTAINER}" >> $GITHUB_ENV
echo "MINIO_DIR=${MINIO_DIR}" >> $GITHUB_ENV
- sudo docker rm -f -v ${MINIO_CONTAINER} && sudo rm -fr ${MINIO_DIR} && sudo mkdir ${MINIO_DIR} && sudo docker run -d --net=container:${BUILDER_CONTAINER} --name ${MINIO_CONTAINER} -e "MINIO_ROOT_PASSWORD=minioadmin" -e "MINIO_ROOT_USER=minioadmin" -v ${MINIO_DIR}:/data quay.io/minio/minio server /data --console-address ":9001" && sleep 5s
+ sudo docker rm -f -v ${MINIO_CONTAINER} && sudo rm -fr ${MINIO_DIR} && sudo mkdir ${MINIO_DIR} && sudo docker run -d --net=container:${BUILDER_CONTAINER} --name ${MINIO_CONTAINER} -e "MINIO_ROOT_PASSWORD=minioadmin" -e "MINIO_ROOT_USER=minioadmin" -v ${MINIO_DIR}:/data quay.io/minio/minio server /data --console-address ":9006" --address ":9005" && sleep 5s
- name: Start infinity debug version with minio
if: ${{ !cancelled() && !failure() }}
@@ -290,7 +290,7 @@ jobs:
MINIO_DIR=~/minio_data_$(od -An -N4 -tx4 /dev/urandom | tr -d ' ')
echo "MINIO_CONTAINER=${MINIO_CONTAINER}" >> $GITHUB_ENV
echo "MINIO_DIR=${MINIO_DIR}" >> $GITHUB_ENV
- sudo docker rm -f -v ${MINIO_CONTAINER} && sudo rm -fr ${MINIO_DIR} && sudo mkdir ${MINIO_DIR} && sudo docker run -d --net=container:${BUILDER_CONTAINER} --name ${MINIO_CONTAINER} -e "MINIO_ROOT_PASSWORD=minioadmin" -e "MINIO_ROOT_USER=minioadmin" -v ${MINIO_DIR}:/data quay.io/minio/minio server /data --console-address ":9001" && sleep 5s
+ sudo docker rm -f -v ${MINIO_CONTAINER} && sudo rm -fr ${MINIO_DIR} && sudo mkdir ${MINIO_DIR} && sudo docker run -d --net=container:${BUILDER_CONTAINER} --name ${MINIO_CONTAINER} -e "MINIO_ROOT_PASSWORD=minioadmin" -e "MINIO_ROOT_USER=minioadmin" -v ${MINIO_DIR}:/data quay.io/minio/minio server /data --console-address ":9006" --address ":9005" && sleep 5s
- name: Start infinity release version with minio
if: ${{ !cancelled() && !failure() }}
diff --git a/README.md b/README.md
index 9c85b75a2e..8ad93c72ea 100644
--- a/README.md
+++ b/README.md
@@ -1,5 +1,5 @@
-
+
@@ -26,7 +26,7 @@ Infinity is a cutting-edge AI-native database that provides a wide range of sear
## ⚡️ Performance
-
+
## 🌟 Key Features
@@ -60,9 +60,9 @@ Supports a wide range of data types including strings, numerics, vectors, and mo
Infinity supports two working modes, embedded mode and client-server mode. Infinity's embedded mode enables you to quickly embed Infinity into your Python applications, without the need to connect to a separate backend server. The following shows how to operate in embedded mode:
```bash
- pip install infinity-embedded-sdk==0.5.0.dev5
+ pip install infinity-embedded-sdk==0.5.0.dev6
```
-1. Use Infinity to conduct a dense vector search:
+ Use Infinity to conduct a dense vector search:
```python
import infinity_embedded
diff --git a/benchmark/local_infinity/fulltext/fulltext_benchmark.cpp b/benchmark/local_infinity/fulltext/fulltext_benchmark.cpp
index 00a5a6b49d..a2ed13d291 100644
--- a/benchmark/local_infinity/fulltext/fulltext_benchmark.cpp
+++ b/benchmark/local_infinity/fulltext/fulltext_benchmark.cpp
@@ -254,7 +254,7 @@ void BenchmarkQuery(SharedPtr infinity, const String &db_name, const S
output_columns->emplace_back(select_rowid_expr);
output_columns->emplace_back(select_score_expr);
}
- infinity->Search(db_name, table_name, search_expr, nullptr, nullptr, nullptr, output_columns, nullptr, nullptr, nullptr);
+ infinity->Search(db_name, table_name, search_expr, nullptr, nullptr, nullptr, output_columns, nullptr, nullptr, nullptr, false);
/*
auto result = infinity->Search(db_name, table_name, search_expr, nullptr, output_columns);
{
diff --git a/benchmark/local_infinity/infinity_benchmark.cpp b/benchmark/local_infinity/infinity_benchmark.cpp
index 528ca8387f..4b1a9e4831 100644
--- a/benchmark/local_infinity/infinity_benchmark.cpp
+++ b/benchmark/local_infinity/infinity_benchmark.cpp
@@ -226,7 +226,8 @@ int main() {
output_columns,
nullptr,
nullptr,
- nullptr);
+ nullptr,
+ false);
});
results.push_back(fmt::format("-> Select QPS: {}", total_times / tims_costing_second));
}
diff --git a/benchmark/local_infinity/knn/knn_query_benchmark.cpp b/benchmark/local_infinity/knn/knn_query_benchmark.cpp
index a17d516b9e..38e2099de0 100644
--- a/benchmark/local_infinity/knn/knn_query_benchmark.cpp
+++ b/benchmark/local_infinity/knn/knn_query_benchmark.cpp
@@ -220,7 +220,7 @@ int main(int argc, char *argv[]) {
auto select_rowid_expr = new FunctionExpr();
select_rowid_expr->func_name_ = "row_id";
output_columns->emplace_back(select_rowid_expr);
- auto result = infinity->Search(db_name, table_name, search_expr, nullptr, nullptr, nullptr, output_columns, nullptr, nullptr, nullptr);
+ auto result = infinity->Search(db_name, table_name, search_expr, nullptr, nullptr, nullptr, output_columns, nullptr, nullptr, nullptr, false);
{
auto &cv = result.result_table_->GetDataBlockById(0)->column_vectors;
auto &column = *cv[0];
diff --git a/benchmark/remote_infinity/remote_query_benchmark.cpp b/benchmark/remote_infinity/remote_query_benchmark.cpp
index e07c4bdc22..14ad9fcf44 100644
--- a/benchmark/remote_infinity/remote_query_benchmark.cpp
+++ b/benchmark/remote_infinity/remote_query_benchmark.cpp
@@ -51,7 +51,7 @@ struct InfinityClient {
transport->open();
CommonResponse response;
ConnectRequest request;
- request.__set_client_version(26); // 0.5.0.dev5
+ request.__set_client_version(27); // 0.5.0.dev6
client->Connect(response, request);
session_id = response.session_id;
}
diff --git a/client/cpp/infinity_client.cpp b/client/cpp/infinity_client.cpp
index d229b2af82..f5e5fdefd7 100644
--- a/client/cpp/infinity_client.cpp
+++ b/client/cpp/infinity_client.cpp
@@ -25,7 +25,7 @@ Client Client::Connect(const std::string &ip_address, uint16_t port) {
transport->open();
CommonResponse response;
ConnectRequest request;
- request.__set_client_version(26); // 0.5.0.dev5
+ request.__set_client_version(27); // 0.5.0.dev6
client->Connect(response, request);
return {socket, transport, protocol, std::move(client), response.session_id};
}
diff --git a/conf/follower.toml b/conf/follower.toml
index 3bbc4ac63f..517a745dcf 100644
--- a/conf/follower.toml
+++ b/conf/follower.toml
@@ -1,7 +1,7 @@
[general]
version = "0.5.0"
time_zone = "utc-8"
-server_mode = "cluster"
+server_mode = "admin" # "standalone"
[network]
server_address = "0.0.0.0"
@@ -46,7 +46,7 @@ mem_index_capacity = 1048576
storage_type = "minio"
[storage.object_storage]
-url = "127.0.0.1:9000"
+url = "127.0.0.1:9005"
bucket_name = "infinity"
access_key = "minioadmin"
secret_key = "minioadmin"
diff --git a/conf/infinity_conf.toml b/conf/infinity_conf.toml
index b0739ac24f..a9344e4ad3 100644
--- a/conf/infinity_conf.toml
+++ b/conf/infinity_conf.toml
@@ -37,7 +37,7 @@ mem_index_capacity = 1048576
# S3 storage config example:
# [storage.object_storage]
-# url = "127.0.0.1:9000"
+# url = "127.0.0.1:9005"
# bucket_name = "infinity"
# access_key = "minioadmin"
# secret_key = "minioadmin"
diff --git a/conf/infinity_minio_conf.toml b/conf/infinity_minio_conf.toml
new file mode 100644
index 0000000000..c8eca3498c
--- /dev/null
+++ b/conf/infinity_minio_conf.toml
@@ -0,0 +1,72 @@
+[general]
+version = "0.5.0"
+time_zone = "utc-8"
+
+[network]
+server_address = "0.0.0.0"
+postgres_port = 5432
+http_port = 23820
+client_port = 23817
+connection_pool_size = 128
+
+[log]
+log_filename = "infinity.log"
+log_dir = "/var/infinity/log"
+log_to_stdout = false
+log_file_max_size = "10GB"
+log_file_rotate_count = 10
+
+# trace/debug/info/warning/error/critical 6 log levels, default: info
+log_level = "info"
+
+[storage]
+persistence_dir = "/var/infinity/persistence"
+data_dir = "/var/infinity/data"
+# periodically activates garbage collection:
+# 0 means real-time,
+# s means seconds, for example "60s", 60 seconds
+# m means minutes, for example "60m", 60 minutes
+# h means hours, for example "1h", 1 hour
+optimize_interval = "10s"
+cleanup_interval = "60s"
+compact_interval = "120s"
+storage_type = "minio"
+# dump memory index entry when it reachs the capacity
+mem_index_capacity = 1048576
+
+[storage.object_storage]
+url = "127.0.0.1:9005"
+bucket_name = "infinity"
+access_key = "minioadmin"
+secret_key = "minioadmin"
+enable_https = false
+
+# S3 storage config example:
+# [storage.object_storage]
+# url = "127.0.0.1:9005"
+# bucket_name = "infinity"
+# access_key = "minioadmin"
+# secret_key = "minioadmin"
+# enable_https = false
+
+[buffer]
+buffer_manager_size = "4GB"
+lru_num = 7
+temp_dir = "/var/infinity/tmp"
+result_cache = "off"
+memindex_memory_quota = "1GB"
+
+[wal]
+wal_dir = "/var/infinity/wal"
+full_checkpoint_interval = "86400s"
+delta_checkpoint_interval = "60s"
+# delta_checkpoint_threshold = 1000000000
+wal_compact_threshold = "1GB"
+
+# flush_at_once: write and flush log each commit
+# only_write: write log, OS control when to flush the log, default
+# flush_per_second: logs are written after each commit and flushed to disk per second.
+wal_flush = "only_write"
+
+[resource]
+resource_dir = "/var/infinity/resource"
diff --git a/conf/leader.toml b/conf/leader.toml
index 68a9c56b27..42d24eaf0d 100644
--- a/conf/leader.toml
+++ b/conf/leader.toml
@@ -1,7 +1,7 @@
[general]
version = "0.5.0"
time_zone = "utc-8"
-server_mode = "cluster"
+server_mode = "admin" # "standalone"
[network]
server_address = "0.0.0.0"
@@ -46,7 +46,7 @@ mem_index_capacity = 1048576
storage_type = "minio"
[storage.object_storage]
-url = "127.0.0.1:9000"
+url = "127.0.0.1:9005"
bucket_name = "infinity"
access_key = "minioadmin"
secret_key = "minioadmin"
diff --git a/conf/learner.toml b/conf/learner.toml
index a432bffe90..27152a5485 100644
--- a/conf/learner.toml
+++ b/conf/learner.toml
@@ -1,7 +1,7 @@
[general]
version = "0.5.0"
time_zone = "utc-8"
-server_mode = "cluster"
+server_mode = "admin" # "standalone"
[network]
server_address = "0.0.0.0"
@@ -46,7 +46,7 @@ mem_index_capacity = 1048576
storage_type = "minio"
[storage.object_storage]
-url = "127.0.0.1:9000"
+url = "127.0.0.1:9005"
bucket_name = "infinity"
access_key = "minioadmin"
secret_key = "minioadmin"
diff --git a/conf/learner2.toml b/conf/learner2.toml
index 79073f90f0..9de53f1d6d 100644
--- a/conf/learner2.toml
+++ b/conf/learner2.toml
@@ -1,7 +1,7 @@
[general]
version = "0.5.0"
time_zone = "utc-8"
-server_mode = "cluster"
+server_mode = "admin" # "standalone"
[network]
server_address = "0.0.0.0"
@@ -46,7 +46,7 @@ mem_index_capacity = 1048576
storage_type = "minio"
[storage.object_storage]
-url = "127.0.0.1:9000"
+url = "127.0.0.1:9005"
bucket_name = "infinity"
access_key = "minioadmin"
secret_key = "minioadmin"
diff --git a/conf/pytest_parallel_infinity_conf.toml b/conf/pytest_parallel_infinity_conf.toml
index bc5c9822e2..d4d6fd1a23 100644
--- a/conf/pytest_parallel_infinity_conf.toml
+++ b/conf/pytest_parallel_infinity_conf.toml
@@ -16,6 +16,8 @@ log_level = "trace"
[storage]
persistence_dir = "/var/infinity/persistence"
+compact_interval = "10s"
+cleanup_interval = "0s"
[buffer]
buffer_manager_size = "8GB"
diff --git a/conf/pytest_parallel_infinity_follower.toml b/conf/pytest_parallel_infinity_follower.toml
index fbfc4745ac..5db2a512ea 100644
--- a/conf/pytest_parallel_infinity_follower.toml
+++ b/conf/pytest_parallel_infinity_follower.toml
@@ -1,7 +1,7 @@
[general]
version = "0.5.0"
time_zone = "utc-8"
-server_mode = "cluster"
+server_mode = "admin" # "standalone"
[network]
server_address = "0.0.0.0"
@@ -40,7 +40,7 @@ mem_index_capacity = 1048576
storage_type = "minio"
[storage.object_storage]
-url = "127.0.0.1:9000"
+url = "127.0.0.1:9005"
bucket_name = "infinity"
access_key = "pk9s2oJFX1qXLYObwIcz"
secret_key = "ho1G9xh2iKup4Xj9Ja3eRgg8bfwMyDv4fvkQGcZl"
diff --git a/conf/pytest_parallel_infinity_leader.toml b/conf/pytest_parallel_infinity_leader.toml
index 7e975215cf..4aeda8a4cb 100644
--- a/conf/pytest_parallel_infinity_leader.toml
+++ b/conf/pytest_parallel_infinity_leader.toml
@@ -1,7 +1,7 @@
[general]
version = "0.5.0"
time_zone = "utc-8"
-server_mode = "cluster"
+server_mode = "admin" # "standalone"
[network]
server_address = "0.0.0.0"
@@ -40,7 +40,7 @@ mem_index_capacity = 1048576
storage_type = "minio"
[storage.object_storage]
-url = "127.0.0.1:9000"
+url = "127.0.0.1:9005"
bucket_name = "infinity"
access_key = "minioadmin"
secret_key = "minioadmin"
diff --git a/conf/pytest_parallel_infinity_minio.toml b/conf/pytest_parallel_infinity_minio.toml
index 400fbe3627..2dd0fcc91e 100644
--- a/conf/pytest_parallel_infinity_minio.toml
+++ b/conf/pytest_parallel_infinity_minio.toml
@@ -18,7 +18,7 @@ persistence_dir = "/var/infinity/persistence"
storage_type = "minio"
[storage.object_storage]
-url = "127.0.0.1:9000"
+url = "127.0.0.1:9005"
bucket_name = "infinity"
access_key = "minioadmin"
secret_key = "minioadmin"
diff --git a/docs/getstarted/build_from_source.mdx b/docs/getstarted/build_from_source.mdx
index 93954b3655..3a4df25aa5 100644
--- a/docs/getstarted/build_from_source.mdx
+++ b/docs/getstarted/build_from_source.mdx
@@ -7,6 +7,10 @@ slug: /build_from_source
import Tabs from '@theme/Tabs';
import TabItem from '@theme/TabItem';
+Build Infinity from source, build and run unit/functional tests.
+
+---
+
This document provides instructions for building Infinity from source, as well as building and running unit and functional tests.
:::tip NOTE
@@ -260,7 +264,7 @@ cmake --build . -t test_main
2. Install Python sdk of infinity:
```bash
- pip install infinity-sdk==0.5.0.dev5
+ pip install infinity-sdk==0.5.0.dev6
```
3. Run the functional tests:
@@ -282,7 +286,7 @@ cmake --build . -t test_main
2. Install Python sdk of infinity:
```bash
- pip install infinity-sdk==0.5.0.dev5
+ pip install infinity-sdk==0.5.0.dev6
```
3. Run the functional tests:
@@ -305,7 +309,7 @@ cmake --build . -t test_main
2. Install Python sdk of infinity:
```bash
- pip install infinity-sdk==0.5.0.dev5
+ pip install infinity-sdk==0.5.0.dev6
```
3. Run the functional tests:
diff --git a/docs/getstarted/deploy_infinity_server.mdx b/docs/getstarted/deploy_infinity_server.mdx
index 28825d478d..9d5ee6962a 100644
--- a/docs/getstarted/deploy_infinity_server.mdx
+++ b/docs/getstarted/deploy_infinity_server.mdx
@@ -7,6 +7,10 @@ slug: /deploy_infinity_server
import Tabs from '@theme/Tabs';
import TabItem from '@theme/TabItem';
+Three ways to deploy Infinity.
+
+---
+
This document provides guidance on deploying the Infinity database. In general, you can deploy Infinity in the following three ways:
- [Import Infinity as a Python module](#import-infinity-as-a-python-module): To run Infinity locally as a Python module.
@@ -30,7 +34,7 @@ This approach allows you to call Infinity as a Python module. To deploy Infinity
### Install Infinity as a module
```
-pip install infinity-embedded-sdk==0.5.0.dev5
+pip install infinity-embedded-sdk==0.5.0.dev6
```
### Create an Infinity object
@@ -97,10 +101,10 @@ If you are on Windows 10+, you must enable WSL or WSL2 to deploy Infinity using
### Install Infinity client
```
-pip install infinity-sdk==0.5.0.dev5
+pip install infinity-sdk==0.5.0.dev6
```
-### Connect to Infinity Server and run a dense vector search
+### Run a vector search
```python
import infinity
@@ -147,7 +151,7 @@ This section provides instructions on deploying Infinity using binary package on
Fedora/RHEL/CentOS/OpenSUSE
```bash
-sudo rpm -i infinity-0.5.0.dev5-x86_64.rpm
+sudo rpm -i infinity-0.5.0.dev6-x86_64.rpm
```
```bash
@@ -158,7 +162,7 @@ sudo systemctl start infinity
```bash
-sudo dpkg -i infinity-0.5.0.dev5-x86_64.deb
+sudo dpkg -i infinity-0.5.0.dev6-x86_64.deb
```
```bash
@@ -171,10 +175,10 @@ sudo systemctl start infinity
### Install Infinity client
```
-pip install infinity-sdk==0.5.0.dev5
+pip install infinity-sdk==0.5.0.dev6
```
-### Connect to Infinity Server and run a dense vector search
+### Run a vector search
```python
import infinity
diff --git a/docs/getstarted/quickstart.md b/docs/getstarted/quickstart.md
index 2a271cc211..61a7235f99 100644
--- a/docs/getstarted/quickstart.md
+++ b/docs/getstarted/quickstart.md
@@ -5,6 +5,8 @@ slug: /
# Quickstart
+A quickstart guide.
+
## Prerequisites
- CPU: x86_64 with AVX2 support.
@@ -19,7 +21,7 @@ If you wish to embed Infinity into your Python application without the need for
1. Install the Infinity-embedded SDK:
```bash
- pip install infinity-embedded-sdk==0.5.0.dev5
+ pip install infinity-embedded-sdk==0.5.0.dev6
```
2. Use Infinity to conduct a dense vector search:
```python
diff --git a/docs/guides/search_guide.md b/docs/guides/search_guide.md
index 44e6b9912f..1e342d8445 100644
--- a/docs/guides/search_guide.md
+++ b/docs/guides/search_guide.md
@@ -2,57 +2,146 @@
sidebar_position: 1
slug: /search_guide
---
-# Search usage guide
+# Conduct a search
+
+Full-text, vector, sparse vector, tensor, hybrid search.
+
+---
## Overview
-Infinity offers powerful search capabilities. This page covers the search usage.
+This document offers guidance on conducting a search within Infinity.
## Full-text search
-Full text search will work if full text index is created. There are two kinds of work modes for full text indexing:
+### Work modes for full-text index
-- Real-time mode - If the full text index is created immediately after the table is created, then the full-text index will work in real time mode if data is ingested at this time. Real time index will accumulate posting data within memory and flush to disk if it reaches up the quota.
-- Offline mode - If the full-text index is created after the data is inserted, then it will work in offline mode, where the full-text index is constructed through external sorting.
+A full-text index must be built to perform a full-text search, and this index operates in two modes:
+
+- **Real-time mode** - If created immediately after a table is created, a full-text index will be built on ingested data in real time, accumulating posting data within memory and flushing it to disk when a specified quota is reached.
+- **Offline mode** - For data inserted before the creation of a full-text index, index will be built in offline mode using external sorting.
### Tokenizer
-There are several built-in tokenizers within Infinity. With the exception of the default standard analyzer and ngram analyzer, everything else requires the resource file to be in the right place. Make sure to download [resource package](https://github.com/infiniflow/resource) and put it to correct directory according to `[resource]` configuration:
+When creating a full-text index, you are required to specify a tokenizer/analyzer, which will be used for future full-text searches on the same column(s). Infinity has many built-in tokenizers. Except for the Ngram analyzer and the default standard analyzer, all other analyzers require dedicated resource files. Please download the appropriate files for your chosen analyzer from [this link](https://github.com/infiniflow/resource) and save it to the directory specified by `resource_dir` in the configuration file.
```yaml
[resource]
-# Directory for Infinity's resource files, including the dictionary files used by the analyzer
+# Directory for Infinity's resource files, including dictionaries to be used by analyzers
resource_dir = "/var/infinity/resource"
```
-You must specify a tokenizer when creating a full text index, but you don't need to specify one when querying, because the query will select the same tokenizer in the same columns.
+The following are Infinity's built-in analyzers/tokenizers.
+
+#### Standard analyzer
+
+The standard analyzer is the default tokenizer and works best with Latin characters. It uses stemmer before outputting tokens segmented by white space, and `English` is the default stemmer. To specify a stemmer for a different language, use `"standard-xxx"`, where `xxx` is the language to use.
+
+Supported language stemmers include: `Danish`, `Dutch`, `English`, `Finnish`, `French`, `German`, `Hungarian`, `Italian`, `Norwegian`, `Porter`, `Portuguese`, `Romanian`, `Russian`, `Spanish`, `Swedish`, and `Turkish`.
+
+#### Ngram analyzer
+
+A definition of N-gram can be found on [wikipedia](https://en.wikipedia.org/wiki/N-gram). Use `"ngram-x"` to select the Ngram analyzer, where `x` represents the value of `N`. For example, a common choice for full-text searches in code is `"ngram-3"`.
+
+#### Simplified Chinese analyzer
+
+Use `"chinese"` to select the simplified Chinese analyzer, which is a wrapper of [Jieba](https://github.com/yanyiwu/cppjieba) analyzer. Use `"chinese-fine"` to output fine-grained analyzer results.
+
+#### Traditional Chinese analyzer
+
+Use `"traditional"` to select the traditional Chinese analyzer, which essentially converts simplified Chinese into traditional Chinese based on the outputs of the [Jieba](https://github.com/yanyiwu/cppjieba) analyzer.
+
+#### Japanese analyzer
+
+Use `"japanese"` to select the Japanese analyzer, which is a wrapper of [mecab](http://taku910.github.io/mecab/).
+
+#### Korean analyzer
+
+Use `"korean"` to select the Korean tokenizer, which is a wrapper of [mecab](http://taku910.github.io/mecab/) but uses a different Korean dictionary.
+
+#### RAG analyzer
+
+The RAG analyzer is a bilingual tokenizer that supports Chinese (simplified and traditional) and English. It is a C++ adaptation of [RAGFlow's tokenizer](https://github.com/infiniflow/ragflow/blob/main/rag/nlp/rag_tokenizer.py), and its tokenization of Latin characters derives from [NLTK](https://www.nltk.org/api/nltk.tokenize.punkt.html).
+
+This analyzer offers better recall for Chinese than the [Jieba](https://github.com/yanyiwu/cppjieba) analyzer, but lower tokenization throughput due to higher computational costs. Its English language processing involves an additional lemmatization step before stemming, different from that of the standard analyzer.
+
+Use `"rag"` to select the RAG analyzer or `"rag-fine"` for fine-grained mode, which outputs tokenization results with the second highest score.
+
+:::note
+Both RAG tokenization and fine-grained RAG tokenization are used in RAGFlow to ensure high recall.
+:::
+
+#### IK analyzer
+
+The IK analyzer is a bilingual tokenizer that supports Chinese (simplified and traditional) and English. It is a C++ adaptation of the [IK Analyzer](https://github.com/infinilabs/analysis-ik), which is widely used as a tokenizer by Chinese Elasticsearch users.
+
+Use `"ik"` to select this analyzer, which is equivalent to the `ik_smart` option in the [IK Analyzer](https://github.com/infinilabs/analysis-ik), or `"ik-fine"` for fine-grained mode, which is equivalent to the `ik_max_word` option in the [IK Analyzer](https://github.com/infinilabs/analysis-ik).
+
+#### Keyword analyzer
+
+The keyword analyzer is a "noop" analyzer used for columns containing keywords only, where traditional scoring methods like `BM25` do not apply. It scores `0` or `1`, depending on whether any keywords are matched.
+
+Use `"keyword"` to select this analyzer.
+
+### Search and ranking syntax
+
+Infinity supports the following syntax or full-text search expressions:
+
+- Single term
+- AND multiple terms
+- OR multiple terms
+- Phrase search
+- CARAT opertor
+- Sloppy phrase search
+- Field-specific search
+- Escape character
+
+#### Single term
+
+Example: `"blooms"`
+
+#### AND multiple terms
+
+- `"space AND efficient"`
+
+#### OR multiple terms
+
+- `"Bloom OR filter"`
+- `"Bloom filter"`
+
+:::tip NOTE
+`OR` is the default semantic in a multi-term full-text search unless explicitly specified otherwise.
+:::
+
+#### Phrase search
+
+- `"Bloom filter"`
+- `'Bloom filter'`
+
+#### CARAT operator
+
+Use `^` to boost the importance of a specific term. For example: `quick^2 brown` boosts the importance of `quick` by a factor of 2, making it twice as important as `brown`.
+
+#### Sloppy phrase search
+
+Example: `'"harmful chemical"~10'`
+
+#### Field-specific search
-- Standard analyzer: It's the default tokenizer, and is suitable for latin characters. Standard analyzer will just output tokens segmented by white spaces. It will also use stemmer before outputing, and `English` is the default stemmer. If you want to specify stemmers for other languages, use `standard-xxx` and `xxx` is the language you want to use. Currently, supported language stemmer includes: `Danish`, `Dutch`, `English`, `Finnish`, `French`, `German`, `Hungarian`, `Italian`, `Norwegian`, `Porter`, `Portuguese`, `Romanian`, `Russian`, `Spanish`, `Swedish`, `Turkish`.
-- Ngram analyzer: The definition of Ngram could be referred to through [wikipedia](https://en.wikipedia.org/wiki/N-gram). You must specify the number of `N` when creating full text index if you want to use Ngram analyzer through `ngram-x` where `x` equals to the definition of `N`. For example, for code search, a typical choice is `ngram-3` .
-- Chinese analyzer: Use `chinese` to specify tokenizer for simplified Chinese. It's a wrapper of [Jieba](https://github.com/yanyiwu/cppjieba) analyzer. Use `chinese-fine` to output the fine-grained analyzer results.
-- Traditional Chinese analyzer: Use `traditional` to specify tokenizer for traditional Chinese, which is actually a conversion between simplified Chinese and traditional Chinese based on the outputs of [Jieba](https://github.com/yanyiwu/cppjieba) analyzer.
-- Japanese analyzer: Use `japanese` to specify tokenizer for Japanese. It's a wrapper of [mecab](http://taku910.github.io/mecab/).
-- Korean analyzer: Use `korean` to specify tokenizer for Korean. It's also a wrapper of [mecab](http://taku910.github.io/mecab/) but has different Korean dictionary.
-- RAG analyzer: It's a C++ migration of tokenizer imported from [RAGFlow](https://github.com/infiniflow/ragflow/blob/main/rag/nlp/rag_tokenizer.py). It's a multilingual tokenizer and currently, both Chinese and English are well supported. RAG analyzer has better recall compared to [Jieba](https://github.com/yanyiwu/cppjieba) analyzer, but lower tokenization throughput due to much more expensive computation. The English processing within RAG analyzer is also different from Standard analyzer, because it has an extra step of lemmatization before stemming, additionally, the tokenization of latin characters is a c++ migration of [NLTK](https://www.nltk.org/api/nltk.tokenize.punkt.html). RAG analyzer also supports fined grained mode through `rag-fine`, which will output tokenization results with the second highest score. In RAGFlow, both RAG tokenization and fine-grained RAG tokenization are used to guarantee the recall.
-- Keyword analyzer: It's a noop analyzer. This is used if you have columns containing keywords only, and you don't want such traditional scoring approaches as `BM25`to take into effects, the score will return 0 or 1 according to whether any keywords are hit.
+Example: `"title:(quick OR brown) AND body:foobar"`
-### Search and ranking
+#### Escape character
-Infinity offers following syntax for full text search:
+Use `\` to escape reserved characters like ` ` `(` `)` `^` `"` `'` `~` `*` `?` `:` `\`. For example: `"space\:efficient"`.
-- Single term: `"blooms"`
-- AND multiple terms: `"space AND efficient"`, `"space && efficient"` or `"space + efficient"`
-- OR multiple terms: `"Bloom OR filter"`, `"Bloom || filter"` or just `"Bloom filter"` .
-- Phrase search: `"Bloom filter" or 'Bloom filter'`
-- CARAT operator: `^`: Used to boost the importance of a term, e.g., `quick^2 brown` boosts the importance of `quick` by a factor of 2, making it twice as important as `brown`.
-- Sloppy phrase search: `'"harmful chemical"~10'`
-- Field-specific search: `"title:(quick OR brown) AND body:foobar"`
-- Escaping reserved characters: `"space\-efficient"` . `:` `~` `()` `""` `+` `-` `=` `&` `|` `[]` `{}` `*` `?` `\` `/` are reserved characters for search syntax.
+### Scoring
-`OR` is the default semantic among multiple terms if user does not specify in search syntax. Infinity offers `BM25` scoring and block-max `WAND` for dynamic pruning to accelerate the multiple terms search processing. There are two approaches to bypass `BM25` scoring:
+Infinity offers `BM25` scoring and block-max `WAND` for dynamic pruning to accelerate multi-term searches. To *not* use `BM25` scoring, do either of the following:
-* Using `keyword` analyzer when creating index, then `BM25` will not be used and it will return the score based on whether keywords are hit.
-* Specifying `similarity=boolean` during searching. Then the scoring is decided by the number of keywords hits.
+- Set `"analyzer"` to `"keyword"` when creating index (to select the keyword analyzer).
+ *The returned score will then be based on whether keywords are matched.*
+- Add `{"similarity": "boolean"}` as a search option.
+ *The scoring will then depend on the number of matched keywords.*
## Dense vector search
@@ -105,7 +194,7 @@ Infinity offers three types of rerankers for fusion:
## Conditional filters
-Conditional filters in Infinity must work through an index to facilitate search. There are two types of indexes in Infinity that support conditional filters:
+Conditional filters in Infinity must work through an index to facilitate search. The following two types of indexes in Infinity support conditional filters:
- **Secondary index**: Built on numeric or string columns. This index does not apply any tokenization to a string column when using conditional filters.
- **Full-text index**: Built on full-text columns. This index applies tokenization to the full-text column but does not trigger any relevance scoring procedure.
diff --git a/docs/guides/set_up_cluster.md b/docs/guides/set_up_cluster.md
new file mode 100644
index 0000000000..abfeb8a1a9
--- /dev/null
+++ b/docs/guides/set_up_cluster.md
@@ -0,0 +1,192 @@
+---
+sidebar_position: 2
+slug: /set_up_cluster
+---
+# Set up an Infinity cluster
+
+Architecture overview and user guide for Infinity cluster.
+
+---
+
+## Overview
+
+An Infinity cluster consists of one leader node, up to four follower nodes, and several learner nodes:
+
+- **Leader node**: The read node and the only write node.
+- **Follower node**: Read node.
+- **Learner node**: Read node.
+
+As of v0.5.0, the supported shared storage is MinIO.
+
+![infinity_cluster](https://github.com/user-attachments/assets/3e9abeed-1698-4741-8bdb-ba3b05c1d7a3)
+
+### Architecture
+
+Infinity employs a distributed architecture comprising one leader node, *N* follower nodes (0 ≤ *N* ≤ 4), and a number of learner nodes. As illustrated in the diagram above, all nodes in the cluster use MinIO for persistent storage.
+
+- **Leader node**: The node responsible for processing transactions and managing connection status of other nodes in the cluster. When a transaction occurs, the leader node transmits the logs to both follower and learner nodes. The leader node confirms the completion of the transaction only upon receiving messages confirming completion of log persistence from *all* follower nodes.
+- **Follower node**: Receives log/WAL from the leader synchronously. It acts as a backup for the leader node, maintaining strong consistency with the leader's data state.
+- **Learner node**: Receives log/WAL from the leader *asynchronously*. A learner also serves as a backup for the leader node. However, its state may be behind that of the leader, because it is not required to maintain strong consistency with the leader, and neither does the leader need to confirm whether all learner nodes have completed log persistence.
+
+From the user's perspective, the leader is the only write node, and all write operations must go through the leader node; all nodes in the cluster serve as read nodes, allowing you to send read operations to any of the leader, follower, or learner nodes, thereby alleviating the write burden on the leader.
+
+### Startup and communication processes
+
+When started up as a cluster node (see [Customize configuration files for cluster](#customize-configuration-files-for-cluster)), a node enters `ADMIN` mode, but is not automatically assigned a role like leader, follower, or learner. You must call `ADMIN SET NODE ROLE` to assign it a role. Once a leader node starts, it reads logs from the local disk to determine the metadata and data to read from shared storage.
+
+Once you set a node to follower or learner using `ADMIN SET NODE ROLE`, it registers with the leader node. Upon receiving the registration request, the leader node sends back its current log for the followers and learners to construct their data state from shared storage.
+
+### Keep-alive mechanism
+
+Once successfully registered with the leader node, a follower or learner starts sending periodic heartbeats to it. The leader node relies on these heartbeats to manage the connection status of each node. For example, if it does not receive heartbeats from a particular node for a specified time period, it sets that node's connection status to `timeout`.
+
+### Log synchronization
+
+When a transaction occurs, the leader node sends its log to both follower and learner nodes. The leader confirms the transaction's completion only after receiving confirmation that all its follower nodes have successfully persisted the log. While the leader also sends logs to learner nodes, it does not require confirmation from them.
+
+### Mode and role transition
+
+![mode_transition](https://github.com/user-attachments/assets/932072a3-9ffb-4aad-89f1-7eef0fff931c)
+
+## Set up an Infinity cluster
+
+### Customize configuration files for cluster
+
+For *each* cluster node, you are required to prepare a customized configuration file to start it. Ensure that you properly set `server_mode`, `peer_ip`, `peer_port`, `storage_type`, and other related parameters.
+
+1. Set `server_mode` to `"admin"`.
+2. Set `storage_type` to `"minio"`.
+3. Set `peer_ip` and `peer_port`.
+4. Update object storage-specific settings.
+5. Save your changes and start up Infinity using the customized configuration file.
+ *When a cluster node starts, it automatically operates in `ADMIN` mode.*
+
+For further instructions on specifying a configuration file or setting parameters, see the [Configurations](https://infiniflow.org/docs/dev/configurations).
+
+### Set the leader node
+
+ A cluster can have only one leader node. If the cluster you start does not have a leader node, call `ADMIN SET NODE ROLE` to promote the node you just started, which currently operates in `ADMIN` mode, to leader. Below is an example code:
+
+```shell
+curl --request POST \
+ --url http://localhost:23821/admin/node/current \
+ --header 'accept: application/json' \
+ --header 'content-type: application/json' \
+ --data ' {
+ "role" : "leader",
+ "name" : "Harry",
+ } '
+```
+
+*When the method call succeeds, the node switches to leader and operates in `CLUSTER` mode.*
+
+:::tip NOTE
+
+A node in `ADMIN` mode with `storage_type = "minio"` or in `CLUSTER` mode (as a follower or learner node) can be promoted to leader.
+
+:::
+
+You can also use `ADMIN SHOW CURRENT NODE` to verify the node's role and connection status:
+
+```shell
+curl --request GET \
+ --url http://localhost:23821/admin/node/current \
+ --header 'accept: application/json'
+```
+
+### Set a follower node
+
+If the current node operates in `ADMIN` mode and the number of follower nodes in your cluster is less than four, call `ADMIN SET NODE ROLE` to promote this node to follower node:
+
+```shell
+curl --request POST \
+ --url http://localhost:23822/admin/node/current \
+ --header 'accept: application/json' \
+ --header 'content-type: application/json' \
+ --data ' {
+ "role" : "follower",
+ "name" : "Hermione",
+ "address" : "0.0.0.0:23851"
+ } '
+```
+
+*When the method call succeeds, the node is promoted to follower and registered with the leader node, which listens on `0.0.0.0:23851`.*
+
+:::tip NOTE
+
+A node in `ADMIN` mode with `storage_type = "minio"` can be promoted to follower node.
+
+:::
+
+### Set a learner node
+
+If the current node operates in `ADMIN` mode, call `ADMIN SET NODE ROLE` to promote this new node to learner node.
+
+```shell
+curl --request POST \
+ --url http://localhost:23823/admin/node/current \
+ --header 'accept: application/json' \
+ --header 'content-type: application/json' \
+ --data ' {
+ "role" : "learner",
+ "name" : "Ron",
+ "address" : "0.0.0.0:23851"
+ } '
+```
+
+*When the method call succeeds, the node is promoted to learner and registered with the leader node, which listens on `0.0.0.0:23851`.*
+
+:::tip NOTE
+
+Only a node in `ADMIN` mode with `storage_type = "minio"` can be promoted to learner node.
+
+:::
+
+### Check cluster health status
+
+You can send an HTTP request `ADMIN LIST NODES` to any node in the cluster to display the health status of all nodes. In the following code example, a follower node is called:
+
+```shell
+curl --request GET \
+ --url http://localhost:23822/admin/nodes \
+ --header 'accept: application/json'
+```
+
+*When the method call succeeds, you get the following information of each node:*
+
+- *The HTTP address of the node.*
+- *The number of heartbeats received from the leader node.*
+- *The name of the node.*
+- *The role of the node: leader, follower, or learner.*
+- *The connection status of the node.*
+- *The last time that the node was updated.*
+
+:::tip NOTE
+
+See `ADMIN LIST NODES` for further details.
+
+:::
+
+### Remove a node from the cluster
+
+Call `ADMIN REMOVE NODE` to remove a node from the cluster. Note that you must send your HTTP request to the leader node for this action. In the following code example, learner Ron will be removed:
+
+```shell
+curl --request DELETE \
+ --url http://localhost:23821/admin/node/ron \
+ --header 'accept: application/json' \
+ --header 'content-type: application/json'
+```
+
+*When the method call succeeds, the node operates in `ADMIN` mode and is unregistered.*
+
+## Distributed APIs
+
+- [ADMIN SET NODE ROLE](https://infiniflow.org/docs/dev/http_api_reference#admin-set-node-role)
+- [ADMIN SHOW NODE VARIABLES](https://infiniflow.org/docs/dev/http_api_reference#admin-show-node-variables)
+- [ADMIN SHOW NODE CONFIGS](https://infiniflow.org/docs/dev/http_api_reference#admin-show-node-configs)
+- [ADMIN SHOW NODE VARIABLE](https://infiniflow.org/docs/dev/http_api_reference#admin-show-node-variable)
+- [ADMIN SHOW CURRENT NODE](https://infiniflow.org/docs/dev/http_api_reference#admin-show-current-node)
+- [ADMIN SHOW NODE](https://infiniflow.org/docs/dev/http_api_reference#admin-show-node)
+- [ADMIN LIST NODES](https://infiniflow.org/docs/dev/http_api_reference#admin-list-nodes)
+- [ADMIN REMOVE NODE](https://infiniflow.org/docs/dev/http_api_reference#admin-remove-node)
\ No newline at end of file
diff --git a/docs/references/benchmark.md b/docs/references/benchmark.md
index f056a974b7..b66c36bd76 100644
--- a/docs/references/benchmark.md
+++ b/docs/references/benchmark.md
@@ -1,8 +1,9 @@
---
-sidebar_position: 1
+sidebar_position: 3
slug: /benchmark
---
# Benchmark
+
This document compares the following key specifications of Elasticsearch, Qdrant, Quickwit and Infinity:
- Time to insert & build index
diff --git a/docs/references/configurations.mdx b/docs/references/configurations.mdx
index ad83630f99..38be8dfc3c 100644
--- a/docs/references/configurations.mdx
+++ b/docs/references/configurations.mdx
@@ -1,5 +1,5 @@
---
-sidebar_position: 5
+sidebar_position: 0
slug: /configurations
---
@@ -7,6 +7,10 @@ slug: /configurations
import Tabs from '@theme/Tabs';
import TabItem from '@theme/TabItem';
+How to set and load configuration file when starting Infinity.
+
+---
+
This document provides instructions for loading configuration file for Infinity and descriptions of each configuration entry.
@@ -59,6 +63,12 @@ time_zone = "utc-8"
# The number of worker threads. Defaults to the number of CPU cores.
# Range: [1, 16384]
cpu_limit = 8
+# The mode in which the server starts. Available options:
+# - `"standalone"`: Start Infinity as a standalone server.
+# - `"admin"`:
+# - Start Infinity either as a standalone server in `ADMIN` mode (when `storage_type` is set to `"local"`)
+# - Start Infinity as a cluster node in `ADMIN` mode (when `storage_type` is set to `"minio"`)
+server_mode = "standalone"
# Network configuration
[network]
@@ -74,9 +84,9 @@ client_port = 23817
# The maximum number of connections. Defaults to 256.
# Range: [1, 65536]
connection_pool_size = 128
-# The IP address of the Infinity peer server to be accessed by a peer node
+# The IP address on which the current node listens. Used for registration and inter-node communication
peer_ip = "0.0.0.0"
-# The port of the Infinity peer server to be accessed by a peer node
+# The port number on which the current node listens. Used for registration and inter-node communication
peer_port = 23850
# The delay time for reconnecting to the Infinity peer server after a failed connection
@@ -137,10 +147,24 @@ compact_interval = "120s"
# the system performs a flush operation on that index.
# Range: [8192, 8388608]
mem_index_capacity = 1048576
-# Storage type. Defaults to "local".
+# The type of storage to use. Available options:
+# - `"local"`: (default)
+# - `"minio"`: If you set `server_mode` to `"admin"` and `storage_type` to `"minio"`, the node will start as a cluster node in `ADMIN` mode.
# Range: {"local"|"minio"}
storage_type = "local"
+# The number of dense vector index building worker threads. Defaults to the half number of CPU cores.
+# Range: [1, number of CPU cores]
+dense_index_building_worker = 2
+
+# The number of sparse vector index building worker threads. Defaults to the half number of CPU cores.
+# Range: [1, number of CPU cores]
+sparse_index_building_worker = 2
+
+# The number of fulltext index building worker threads. Defaults to the half number of CPU cores.
+# Range: [1, number of CPU cores]
+fulltext_index_building_worker = 2
+
# Object storage configuration
[storage.object_storage]
# URL of the object storage server
diff --git a/docs/references/faq.md b/docs/references/faq.md
index 72febbd874..4fff1765a7 100644
--- a/docs/references/faq.md
+++ b/docs/references/faq.md
@@ -1,10 +1,12 @@
---
-sidebar_position: 2
+sidebar_position: 4
slug: /FAQ
---
# Frequently asked questions
+FAQs to be developed.
+
## What is Retrieval-Augmented Generation?
Retrieval Augmented Generation (RAG) is a technique used to improve the accuracy and reliability of responses from foundation models, specifically Large Language Models (LLMs). It works by supplementing the existing LLMs with external sources of knowledge.
@@ -21,4 +23,4 @@ In addition to basic vector search, an AI-vector database also offers advanced c
## Where can I find a benchmark report of your database?
-You can find a benchmark report on Infinity, the AI-native database, [here](../references/benchmark.md).
\ No newline at end of file
+You can find a benchmark report on Infinity, the AI-native database, [here](../references/benchmark.md).
diff --git a/docs/references/http_api_reference.mdx b/docs/references/http_api_reference.mdx
index 1259237108..86273816dc 100644
--- a/docs/references/http_api_reference.mdx
+++ b/docs/references/http_api_reference.mdx
@@ -1,5 +1,5 @@
---
-sidebar_position: 3
+sidebar_position: 1
slug: /http_api_reference
---
@@ -976,10 +976,12 @@ curl --request POST \
- `"standard"`: (Default) Standard analyzer, segmented by tokens, lowercase processing, provides stemming outputs. Use `-` to specify stemmer for languages, `English` is the default stemmer: `"standard-english"` and `"standard"` have the same stemmer setting. Supported language stemmer includes: `Danish`, `Dutch`, `English`, `Finnish`, `French`, `German`, `Hungarian`, `Italian`, `Norwegian`, `Porter`, `Portuguese`, `Romanian`,`Russian`,`Spanish`,`Swedish`,`Turkish`.
- `"rag"`: Multilingual RAG analyzer imported from [RAGFlow](https://github.com/infiniflow/ragflow/blob/main/rag/nlp/rag_tokenizer.py), supporting `Chinese` and `English`. Use `-fine` to output the fine-grained analyzer results.
- `"chinese"`: Simplified Chinese. Use `-fine` to output the fine-grained analyzer results.
- - `"traditional"`: Traditional Chinese
- - `"japanese"`: Japanese
- - `"korean"`: Korean
- - `"ngram"`: [N-gram](https://en.wikipedia.org/wiki/N-gram)
+ - `"ik"`: Bilingual analyzer imported from [ik-analyzer](https://github.com/infinilabs/analysis-ik), supporting `Chinese` and `English`. Use `-fine` to output the fine-grained analyzer results.
+ - `"traditional"`: Traditional Chinese.
+ - `"japanese"`: Japanese.
+ - `"korean"`: Korean.
+ - `"ngram"`: [N-gram](https://en.wikipedia.org/wiki/N-gram).
+ - `"keyword"`: "noop" analyzer used for columns containing keywords only.
- Parameter settings for a secondary index:
- `"type"`: `"secondary"`
- Parameter settings for a BMP index:
@@ -1773,6 +1775,10 @@ Searches for data in a specified table. The search can range from a simple vecto
- `"highlight"`: `string[]`
- `"filter"`: `string`
- `"fusion"`: `object`
+ - `"sort"` : `object[]`
+ - `"limit"` : `string`
+ - `"offset"` : `string`
+ - `"option"` : `object`
##### Request example
@@ -1910,10 +1916,10 @@ curl --request GET \
A non-empty text string to search for. Used *only* when `"match_method"` is set to `"text"`.
You can use various search options within the matching text, including:
- Single terms: `"blooms"`
- - OR multiple terms: `"Bloom OR filter"`, `"Bloom || filter"` or just `"Bloom filter"`
+ - OR multiple terms: `"Bloom OR filter"` or just `"Bloom filter"`
- Phrase search: `'"Bloom filter"'`
- - AND multiple terms: `"space AND efficient"`, `"space && efficient"` or `"space + efficient"`
- - Escaping reserved characters: `"space\-efficient"`
+ - AND multiple terms: `"space AND efficient"`
+ - Escaping reserved characters: `"space\:efficient"`
- Sloppy phrase search: `'"harmful chemical"~10'`
- Field-specific search: `"title:(quick OR brown) AND body:foobar"`
- `element_type`: `str`, *Required*
@@ -1977,17 +1983,17 @@ curl --request GET \
- If `"fields"` is an empty string, this parameter specifies the default field to search on.
- **"operator"**: `str`, *Optional*
- If not specified, the search follows Infinity's full-text search syntax, meaning that logical and arithmetic operators, quotation marks and escape characters will function as full-text search operators, such as:
- - AND operator: `AND`, `&&`, `+`
- - OR operator: `OR`, `||`
- - NOT operator: `NOT`, `!`, `-`
+ - AND operator: `AND`
+ - OR operator: `OR`
+ - NOT operator: `NOT`
- PAREN operator: `(`, `)`, need to appear in pairs, and can be nested.
- COLON operator: `:`: Used to specify field-specific search, e.g., `body:foobar` searches for `foobar` in the `body` field.
- CARAT operator: `^`: Used to boost the importance of a term, e.g., `quick^2 brown` boosts the importance of `quick` by a factor of 2, making it twice as important as `brown`.
- TILDE operator: `~`: Used for sloppy phrase search, e.g., `"harmful chemical"~10` searches for the phrase `"harmful chemical"` within a tolerable distance of 10 words.
- SINGLE_QUOTED_STRING: Used to search for a phrase, e.g., `'Bloom filter'`.
- DOUBLE_QUOTED_STRING: Used to search for a phrase, e.g., `"Bloom filter"`.
- - Escape characters: Used to escape reserved characters, e.g., `space\-efficient`. Starting with a backslash `\` will escape the following characters:
- `' '`, `'+'`, `'-'`, `'='`, `'&'`, `'|'`, `'!'`, `'('`, `')'`, `'{'`, `'}'`, `'['`, `']'`, `'^'`, `'"'`, `'~'`, `'*'`, `'?'`, `':'`, `'\'`, `'/'`
+ - Escape characters: Used to escape reserved characters, e.g., `space\:efficient`. Starting with a backslash `\` will escape the following characters:
+ `' '`, `'('`, `')'`, `'^'`, `'"'`, `'\''`, `'~'`, `'*'`, `'?'`, `':'`, `'\\'`
- If specified, Infinity's full-text search syntax will not take effect, and the specified operator will be interpolated into `matching_text`.
Useful for searching text including code numbers like `"A01-233:BC"`.
- `{"operator": "or"}`: Interpolates the `OR` operator between words in `matching_text` to create a new search text.
@@ -2004,7 +2010,17 @@ curl --request GET \
- `"query_tensor"`: The tensor data to compare against. This should be provided as a list of lists of numerical values.
- `"element_type"`: The element data type of the query tensor. Usually `"float"`.
+- `"sort"` : `object[]`
+ Defines how to sort the results.
+- `"limit"` : `string`
+ Indicates the limit row count.
+
+- `"offset"` : `string`
+ Indicates the offset position of the limit expression. You must use this parameter together with `limit`.
+
+- `"option"` : `object`
+ Indicates some search options. This parameter must be used in conjunction with `limit`.
#### Response
@@ -2027,11 +2043,14 @@ The response includes a JSON object like the following:
"age": 16
}
]
+ "total_hits_count": 3
}
```
- `"error_code"`: `integer`
`0`: The operation succeeds.
+- `"total_hits_count"`: `integer`, Optional
+ Available if you set a search option with `"total_hits_count": "true"`
@@ -3485,6 +3504,73 @@ A `500` HTTP status code indicates an error condition. The response includes a J
---
+
+### Admin remove node
+
+**POST** `/admin/node/{node_name}`
+
+Removes a node from the cluster. This command can only be executed by the leader node.
+
+#### Request
+
+- Method: DELETE
+- URL: `/admin/node/node_name`
+- Headers:
+ - `accept: application/json`
+ - `content-type: application/json`
+
+##### Request example
+
+```shell
+curl --request DELETE \
+ --url http://localhost:23821/admin/node/follower1 \
+ --header 'accept: application/json' \
+ --header 'content-type: application/json'
+```
+
+#### Response
+
+
+
+
+The response includes a JSON object like the following:
+
+```shell
+{
+ "error_code": 0
+}
+```
+
+- `"error_code"`: `integer`
+ `0`: The operation succeeds.
+
+
+
+
+A `500` HTTP status code indicates an error condition. The response includes a JSON object like the following:
+
+```shell
+{
+ "error_code":7020
+ "error_message" : "Duplicate node: following"
+}
+```
+
+- `"error_code"`: `integer`
+ A non-zero value indicates a specific error condition.
+- `"error_message"`: `string`
+ When `error_code` is non-zero, `"error_message"` provides additional details about the error.
+
+
+
+
+---
+
### Admin show node variables
**GET** `/admin/variables`
@@ -3673,7 +3759,7 @@ Gets information about the currently connected node.
#### Request
- Method: GET
-- URL: `/admin/node/{node_name}`
+- URL: `/admin/node/current`
- Headers: `accept: application/json`
##### Request example
@@ -3781,7 +3867,7 @@ A `500` HTTP status code indicates an error condition. The response includes a J
**GET** `/admin/nodes`
-Lists all nodes in the cluster.
+Lists all nodes in the cluster
#### Request
@@ -3809,19 +3895,37 @@ The response includes a JSON object like the following:
```shell
{
- "error_code":0,
- "nodes":[
- ["following","follower"],
- ["boss","leader"],
- ["learning","learner"]
- ]
+ "error_code": 0,
+ "nodes": [
+ {
+ "address": "0.0.0.0:23852",
+ "heartbeat": "91",
+ "last_update": "Mon Dec 2 14:48:34 2024\n",
+ "name": "follower",
+ "role": "follower",
+ "status": "alive"
+ },
+ {
+ "address": "0.0.0.0:23851",
+ "heartbeat": "0",
+ "last_update": "Mon Dec 2 14:48:34 2024\n",
+ "name": "boss",
+ "role": "leader",
+ "status": "alive"
+ }
+ ]
}
```
-- `"error_code"`: `integer`
+- `"error_code"`: `integer`
`0`: The operation succeeds.
-- `"nodes" : array` :
- Each element is in `[nodename, noderole]` format.
+- `"nodes" : array` :
+ Each element is an object with following information:
+ - `"address"` : the address of the node.
+ - `"heartbeat"` : only valid for non-leader role, this is the number of heartbeat message received from the leader.
+ - `"name"` : the node's name.
+ - `"role"` : the node's role.
+ - `"status"` : the current status of the node.
---
diff --git a/docs/references/pysdk_api_reference.md b/docs/references/pysdk_api_reference.md
index 87cd1306fe..5c09eabefb 100644
--- a/docs/references/pysdk_api_reference.md
+++ b/docs/references/pysdk_api_reference.md
@@ -1,5 +1,5 @@
---
-sidebar_position: 4
+sidebar_position: 2
slug: /pysdk_api_reference
---
# Python API Reference
@@ -901,11 +901,13 @@ An `IndexInfo` structure contains three fields,`column_name`, `index_type`, and
- `"ANALYZER"`: *Optional*
- `"standard"`: (Default) The standard analyzer, segmented by token, lowercase processing, and provides stemming output. Use `-` to specify the languages stemmer. `English` is the default stemmer: `"standard-english"` and `"standard"` are the same stemmer setting. Supported language stemmers include: `Danish`, `Dutch`, `English`, `Finnish`, `French`, `German`, `Hungarian`, `Italian`, `Norwegian`, `Porter`, `Portuguese`, `Romanian`, `Russian`, `Spanish`, `Swedish`, and `Turkish`.
- `"rag"`: Multilingual RAG analyzer imported from [RAGFlow](https://github.com/infiniflow/ragflow/blob/main/rag/nlp/rag_tokenizer.py), supporting `Chinese` and `English`. Use `-fine` to output the fine-grained analyzer results.
+ - `"ik"`: Bilingual analyzer imported from [ik-analyzer](https://github.com/infinilabs/analysis-ik), supporting `Chinese` and `English`. Use `-fine` to output the fine-grained analyzer results.
- `"chinese"`: Simplified Chinese. Use `-fine` to output the fine-grained analyzer results.
- - `"traditional"`: Traditional Chinese
- - `"japanese"`: Japanese
- - `"korean"`: Korean
- - `"ngram"`: [N-gram](https://en.wikipedia.org/wiki/N-gram)
+ - `"traditional"`: Traditional Chinese.
+ - `"japanese"`: Japanese.
+ - `"korean"`: Korean.
+ - `"ngram"`: [N-gram](https://en.wikipedia.org/wiki/N-gram).
+ - `"keyword"`: "noop" analyzer used for columns containing keywords only.
- Parameter settings for a secondary index:
No parameters are required. For now, use an empty list `[]`.
- Parameter settings for a BMP index:
@@ -1569,7 +1571,7 @@ table_object.delete("c1 >= 70 and c1 <= 90")
---
-### update data
+### update
```python
table_object.update(cond, data)
@@ -1786,13 +1788,134 @@ table_object.output(["*"]).filter("filter_fulltext('doc', 'first second', 'minim
---
+### sort
+
+```python
+table_object.sort(sort_expression_list)
+```
+
+Creates a sort expression using `sort_expression_list`.
+
+#### Parameters
+
+##### sort_expression_list: `list`, *Required*
+
+An expression list defining how to sort the results.
+
+#### Returns
+
+- Success: An `infinity.local_infinity.table.LocalTable` object in embedded mode or an `infinity.remote_thrift.table.RemoteTable` object in client-server mode.
+- Failure: `InfinityException`
+ - `error_code`: `int` A non-zero value indicating a specific error condition.
+ - `error_msg`: `str` A message providing additional details about the error.
+
+#### Examples
+
+```python
+# Output results sorted by the `c2` expression in ascending order and the `c1` expression in descending order
+table_obj.output(["c1", "c2"]).sort([["c2", SortType.Asc], ["c1", SortType.Desc]]).to_df()
+```
+
+---
+
+### limit
+
+```python
+table_object.limit(limit_num)
+```
+
+Creates an expression to limit the number of the output rows to a maximum of `limit_num`.
+
+#### Parameters
+
+##### limit_num: `int`, *Required*
+
+An integer specifying the maximum number of output rows.
+
+#### Returns
+
+- Success: An `infinity.local_infinity.table.LocalTable` object in embedded mode or an `infinity.remote_thrift.table.RemoteTable` object in client-server mode.
+- Failure: `InfinityException`
+ - `error_code`: `int` A non-zero value indicating a specific error condition.
+ - `error_msg`: `str` A message providing additional details about the error.
+
+#### Examples
+
+```python
+# Limit the output row count to a maximum of two
+table_instance.output(["num", "vec"]).limit(2).to_pl()
+```
+
+---
+
+### offset
+
+```python
+table_object.limit(limit_num).offset(offset_value)
+```
+
+Creates a limit expression with an offset value, setting the output to start from `offset_value` and limiting the row count to a maximum of `limit_num`. This method must be used in conjunction with `limit()`.
+
+#### Parameters
+
+##### offset_value: `int`, *Required*
+
+An integer specifying the offset position of the limit expression.
+
+#### Returns
+
+- Success: An `infinity.local_infinity.table.LocalTable` object in embedded mode or an `infinity.remote_thrift.table.RemoteTable` object in client-server mode.
+- Failure: `InfinityException`
+ - `error_code`: `int` A non-zero value indicating a specific error condition.
+ - `error_msg`: `str` A message providing additional details about the error.
+
+#### Examples
+
+```python
+# Limit the output row count not more than 2, start from position 1
+table_instance.output(["num", "vec"]).offset(1).limit(2).to_pl()
+```
+
+### option
+
+```python
+table_object.option(option_dict)
+```
+
+Indicates some search options.
+
+#### Parameters
+
+##### option_dict: `dict`, *Required*
+
+A dictionary specifying the following search options:
+
+- **"total_hits_count"**: `bool`, *Optional*
+ - Must combine with limit expression. If `"total_hits_count"` is `True`, the query will output an extra result including total hits row count of the query.
+
+#### Returns
+
+- Success: An `infinity.local_infinity.table.LocalTable` object in embedded mode or an `infinity.remote_thrift.table.RemoteTable` object in client-server mode.
+- Failure: `InfinityException`
+ - `error_code`: `int` A non-zero value indicating a specific error condition.
+ - `error_msg`: `str` A message providing additional details about the error.
+
+#### Examples
+
+```python
+# Limit the output row count not more than 2, start from position 1, output an extra result to indicate total hits row count
+table_instance.output(["num", "vec"]).limit(2).offset(1).option({"total_hits_count": True}).to_pl()
+```
+
+---
+
### match_dense
```python
table_object.match_dense(vector_column_name, embedding_data, embedding_data_type, distance_type, topn, knn_params = None)
```
-Creates a dense vector search expression to identify the top n closest rows to the given dense vector. Suitable for working with dense vectors (dense embeddings) or multi-vectors (multiple dense embeddings in one row).
+Creates a dense vector search expression to identify the closest top n rows to the given dense vector. Suitable for working with dense vectors (dense embeddings) or multi-vectors (multiple dense embeddings in one row).
:::tip NOTE
To display your query results, you must chain this method with `output(columns)`, which specifies the columns to output, and a method such as `to_pl()`, `to_df()`, or `to_arrow()` to format the query results.
@@ -2031,10 +2154,10 @@ To display your query results, you must chain this method with `output(columns)`
A non-empty text string to search for. You can use various search options within the matching text, including:
- Single terms: `"blooms"`
-- OR multiple terms: `"Bloom OR filter"`, `"Bloom || filter"` or just `"Bloom filter"`
+- OR multiple terms: `"Bloom OR filter"` or just `"Bloom filter"`
- Phrase search: `'"Bloom filter"'`
-- AND multiple terms: `"space AND efficient"`, `"space && efficient"` or `"space + efficient"`
-- Escaping reserved characters: `"space\-efficient"`
+- AND multiple terms: `"space AND efficient"`
+- Escaping reserved characters: `"space\:efficient"`
- Sloppy phrase search: `'"harmful chemical"~10'`
- Field-specific search: `"title:(quick OR brown) AND body:foobar"`
@@ -2050,17 +2173,17 @@ An optional dictionary specifying the following search options:
- If `"fields"` is an empty string, this parameter specifies the default field to search on.
- **"operator"**: `str`, *Optional*
- If not specified, the search follows Infinity's full-text search syntax, meaning that logical and arithmetic operators, quotation marks and escape characters will function as full-text search operators, such as:
- - AND operator: `AND`, `&&`, `+`
- - OR operator: `OR`, `||`
- - NOT operator: `NOT`, `!`, `-`
+ - AND operator: `AND`
+ - OR operator: `OR`
+ - NOT operator: `NOT`
- PAREN operator: `(`, `)`, need to appear in pairs, and can be nested.
- COLON operator: `:`: Used to specify field-specific search, e.g., `body:foobar` searches for `foobar` in the `body` field.
- CARAT operator: `^`: Used to boost the importance of a term, e.g., `quick^2 brown` boosts the importance of `quick` by a factor of 2, making it twice as important as `brown`.
- TILDE operator: `~`: Used for sloppy phrase search, e.g., `"harmful chemical"~10` searches for the phrase `"harmful chemical"` within a tolerable distance of 10 words.
- SINGLE_QUOTED_STRING: Used to search for a phrase, e.g., `'Bloom filter'`.
- DOUBLE_QUOTED_STRING: Used to search for a phrase, e.g., `"Bloom filter"`.
- - Escape characters: Used to escape reserved characters, e.g., `space\-efficient`. Starting with a backslash `\` will escape the following characters:
- `' '`, `'+'`, `'-'`, `'='`, `'&'`, `'|'`, `'!'`, `'('`, `')'`, `'{'`, `'}'`, `'['`, `']'`, `'^'`, `'"'`, `'~'`, `'*'`, `'?'`, `':'`, `'\'`, `'/'`
+ - Escape characters: Used to escape reserved characters, e.g., `space\:efficient`. Starting with a backslash `\` will escape the following characters:
+ `' '`, `'('`, `')'`, `'^'`, `'"'`, `'\''`, `'~'`, `'*'`, `'?'`, `':'`, `'\\'`
- If specified, Infinity's full-text search syntax will not take effect, and the specified operator will be interpolated into `matching_text`.
Useful for searching text including code numbers like `"A01-233:BC"`.
- `{"operator": "or"}`: Interpolates the `OR` operator between words in `matching_text` to create a new search text.
@@ -2283,7 +2406,7 @@ We recommend calling `to_df()`, `to_pl()`, or `to_arrow()` to format your result
#### Returns
-`tuple[dict[str, list[Any]], dict[str, Any]]`
+A `tuple[dict[str, list[Any]], dict[str, Any]], {}` object
### to_df
@@ -2291,7 +2414,7 @@ We recommend calling `to_df()`, `to_pl()`, or `to_arrow()` to format your result
table_object.to_df()
```
-Returns the query result in pandas DataFrame format.
+Returns the query result as a tuple consisting of a pandas DataFrame and a dict.
:::tip NOTE
Call `to_df()` in a chain after (not necessarily "immediately after") `output(columns)` on the same table object.
@@ -2299,13 +2422,13 @@ Call `to_df()` in a chain after (not necessarily "immediately after") `output(co
#### Returns
-A `pandas.DataFrame` object.
+A `tuple[pandas.DataFrame, {}]` object
#### Examples
```python
# Format columns "c1" and C2" of the current table into a pandas DataFrame
-res = table_object.output(["c1", "c2"]).to_df()
+res, extra_res = table_object.output(["c1", "c2"]).to_df()
```
### to_pl
@@ -2314,7 +2437,7 @@ res = table_object.output(["c1", "c2"]).to_df()
table_object.to_pl()
```
-Returns the query result in Polas DataFrame format.
+Returns the query result as a tuple consisting of a Polars DataFrame and a dict.
:::tip NOTE
Call `to_pl()` in a chain after (not necessarily "immediately after") `output(columns)` on the same table object.
@@ -2322,13 +2445,13 @@ Call `to_pl()` in a chain after (not necessarily "immediately after") `output(co
#### Returns
-A `polas.DataFrame` object.
+A `tuple[polas.DataFrame, {}]` object.
#### Examples
```python
-# Format a vector search result into a Polas DataFrame.
-res = table_object.output(["*"]).match_dense("vec", [3.0, 2.8, 2.7, 3.1], "float", "ip", 10).to_pl()
+# Format a vector search result into a Polars DataFrame.
+res, extra_res = table_object.output(["*"]).match_dense("vec", [3.0, 2.8, 2.7, 3.1], "float", "ip", 10).to_pl()
```
### to_arrow
@@ -2337,7 +2460,7 @@ res = table_object.output(["*"]).match_dense("vec", [3.0, 2.8, 2.7, 3.1], "float
table_object.to_arrow()
```
-Returns the query result in Apache Arrow Table format.
+Returns the query result as a tuple consisting of an Apache Arrow Table and a dict.
:::tip NOTE
Call `to_arrow()` in a chain after (not necessarily "immediately after") `output(columns)` on the same table object.
@@ -2345,13 +2468,13 @@ Call `to_arrow()` in a chain after (not necessarily "immediately after") `output
#### Returns
-A `pyarrow.Table` object.
+A `tuple[pyarrow.Table, {}]` object.
#### Examples
```python
# Format the current table object into an Apache Arrow Table.
-res = table_object.output(["*"]).filter("score >= 90").to_arrow()
+res, extra_result = table_object.output(["*"]).filter("score >= 90").to_arrow()
```
---
diff --git a/example/delete_update_data.py b/example/delete_update_data.py
index 2f1ac7ac9f..398c6edfe3 100644
--- a/example/delete_update_data.py
+++ b/example/delete_update_data.py
@@ -87,8 +87,10 @@
print('about to update data')
table_instance.update("num = 2", {"body": "unnecessary and harmful", "vec": [14.0, 7.2, 0.8, 10.9]})
- result = table_instance.output(["*"]).to_pl()
+ result, extra_result = table_instance.output(["*"]).to_pl()
print(result)
+ if extra_result is not None:
+ print(extra_result)
infinity_instance.disconnect()
print('test done')
diff --git a/example/export_data.py b/example/export_data.py
index d39ce8a5b8..d3d655487e 100644
--- a/example/export_data.py
+++ b/example/export_data.py
@@ -86,7 +86,7 @@
},
{
"num": 7,
- "body": "Chris",
+ "name": "Chris",
"age": 21,
"score": 88.0,
},
diff --git a/example/filter_data.py b/example/filter_data.py
index abb067511d..a13eecfeab 100644
--- a/example/filter_data.py
+++ b/example/filter_data.py
@@ -72,7 +72,7 @@
},
{
"num": 7,
- "body": "Chris",
+ "name": "Chris",
"score": 88.0,
},
{
@@ -99,8 +99,10 @@
# result = table_instance.output(["num", "name", "score"]).filter("not (score > 80.0)").to_pl()
# print(result)
- result = table_instance.output(["num", "name", "score"]).filter("num <> 9").to_pl()
+ result, extra_result = table_instance.output(["num", "name", "score"]).filter("num <> 9").to_pl()
print(result)
+ if extra_result is not None:
+ print(extra_result)
infinity_instance.disconnect()
print('test done')
diff --git a/example/filter_fulltext_keyword.py b/example/filter_fulltext_keyword.py
index 5809da1333..daf6e691a9 100644
--- a/example/filter_fulltext_keyword.py
+++ b/example/filter_fulltext_keyword.py
@@ -101,16 +101,22 @@
)
# output 7, 8, 9, 10
- result = table_instance.output(["*"]).filter("(score > 80.0) and (score <= 90.0)").to_pl()
+ result, extra_result = table_instance.output(["*"]).filter("(score > 80.0) and (score <= 90.0)").to_pl()
print(result)
+ if extra_result is not None:
+ print(extra_result)
# output 6, 8
- result = table_instance.output(["*"]).filter("filter_fulltext('uuid', 'UUID-2-1 UUID-2-3')").to_pl()
+ result, extra_result = table_instance.output(["*"]).filter("filter_fulltext('uuid', 'UUID-2-1 UUID-2-3')").to_pl()
print(result)
+ if extra_result is not None:
+ print(extra_result)
# output 8
- result = table_instance.output(["*"]).filter("(score > 80.0) and (score <= 90.0) and filter_fulltext('uuid', 'UUID-2-1 UUID-2-3')").to_pl()
+ result, extra_result = table_instance.output(["*"]).filter("(score > 80.0) and (score <= 90.0) and filter_fulltext('uuid', 'UUID-2-1 UUID-2-3')").to_pl()
print(result)
+ if extra_result is not None:
+ print(extra_result)
# drop table
db_instance.drop_table("my_table")
diff --git a/example/fulltext_search.py b/example/fulltext_search.py
index f3e6102187..75ad5c962d 100644
--- a/example/fulltext_search.py
+++ b/example/fulltext_search.py
@@ -86,13 +86,15 @@
r'"harmful chemical"~10', # sloppy phrase, refers to https://www.elastic.co/guide/en/elasticsearch/reference/current/query-dsl-match-query-phrase.html
]
for question in questions:
- qb_result = (
+ qb_result, extra_result = (
table_instance.output(["num", "body", "_score"]).highlight(["body"])
.match_text("body", question, 10)
.to_pl()
)
print(f"question: {question}")
print(qb_result)
+ if extra_result is not None:
+ print(extra_result)
infinity_instance.disconnect()
diff --git a/example/fulltext_search_zh.py b/example/fulltext_search_zh.py
index 58f7deaf22..d658058c77 100644
--- a/example/fulltext_search_zh.py
+++ b/example/fulltext_search_zh.py
@@ -112,9 +112,11 @@
r'"Bloom filter"', # phrase: adjacent multiple terms
]
for question in questions:
- qb_result = table_instance.output(["num", "body", "_score"]).highlight(["body"]).match_text("body", question, 10).to_pl()
+ qb_result, extra_result = table_instance.output(["num", "body", "_score"]).highlight(["body"]).match_text("body", question, 10).to_pl()
print(f"question: {question}")
print(qb_result)
+ if extra_result is not None:
+ print(extra_result)
infinity_instance.disconnect()
diff --git a/example/functions.py b/example/functions.py
index 440216155b..a7821643b2 100644
--- a/example/functions.py
+++ b/example/functions.py
@@ -26,55 +26,85 @@
# varchar functions
#function char_length
-res = table_obj.output(["*", "char_length(c1)"]).filter("char_length(c1) = 1").to_df()
+res, extra_result = table_obj.output(["*", "char_length(c1)"]).filter("char_length(c1) = 1").to_df()
print(res)
+if extra_result is not None:
+ print(extra_result)
-res = table_obj.output(["*", "char_length(c1)"]).filter("char_length(c1) = 3").to_df()
+res, extra_result = table_obj.output(["*", "char_length(c1)"]).filter("char_length(c1) = 3").to_df()
print(res)
+if extra_result is not None:
+ print(extra_result)
-res = table_obj.output(["*", "char_length(c1)"]).filter("char_length(c1) = 4").to_df()
+res, extra_result = table_obj.output(["*", "char_length(c1)"]).filter("char_length(c1) = 4").to_df()
print(res)
+if extra_result is not None:
+ print(extra_result)
-res = table_obj.output(["*", "char_length(c1)"]).filter("char_length(c1) = char_length(c2)").to_df()
+res, extra_result = table_obj.output(["*", "char_length(c1)"]).filter("char_length(c1) = char_length(c2)").to_df()
print(res)
+if extra_result is not None:
+ print(extra_result)
#function regex
-res = table_obj.output(["*", "regex(c1, 'bc')"]).filter("regex(c1, 'bc')").to_df()
+res, extra_result = table_obj.output(["*", "regex(c1, 'bc')"]).filter("regex(c1, 'bc')").to_df()
print(res)
+if extra_result is not None:
+ print(extra_result)
-res = table_obj.output(["*"]).filter("regex(c1, '(\w+([-+.]\w+)*)@(\w+([-.]\w+)*)\.(\w+([-.]\w+)*)')").to_df()
+res, extra_result = table_obj.output(["*"]).filter("regex(c1, '(\w+([-+.]\w+)*)@(\w+([-.]\w+)*)\.(\w+([-.]\w+)*)')").to_df()
print(res)
+if extra_result is not None:
+ print(extra_result)
#function substring
-res = table_obj.output(["*", "substring(c1, 0, 2)"]).filter("substring(c1, 0, 2) = 'ab'").to_df()
+res, extra_result = table_obj.output(["*", "substring(c1, 0, 2)"]).filter("substring(c1, 0, 2) = 'ab'").to_df()
print(res)
+if extra_result is not None:
+ print(extra_result)
-res = table_obj.output(["*", "substring(c1, 0, 4)"]).filter("substring(c1, 0, 4) = 'test'").to_df()
+res, extra_result = table_obj.output(["*", "substring(c1, 0, 4)"]).filter("substring(c1, 0, 4) = 'test'").to_df()
print(res)
+if extra_result is not None:
+ print(extra_result)
#function upper and lower
-res = table_obj.output(["*", "upper(c1)"]).filter("upper(c1) = 'TEST@GMAIL.COM'").to_df()
+res, extra_result = table_obj.output(["*", "upper(c1)"]).filter("upper(c1) = 'TEST@GMAIL.COM'").to_df()
print(res)
+if extra_result is not None:
+ print(extra_result)
-res = table_obj.output(["*"]).filter("lower('ABC') = c1").to_df()
+res, extra_result = table_obj.output(["*"]).filter("lower('ABC') = c1").to_df()
print(res)
+if extra_result is not None:
+ print(extra_result)
#function ltrim, rtrim, trim
-res = table_obj.output(["*", "ltrim(c1)"]).filter("ltrim(c1) = 'abc'").to_df()
+res, extra_result = table_obj.output(["*", "ltrim(c1)"]).filter("ltrim(c1) = 'abc'").to_df()
print(res)
+if extra_result is not None:
+ print(extra_result)
-res = table_obj.output(["*", "rtrim(c1)"]).filter("rtrim(c1) = 'abc'").to_df()
+res, extra_result = table_obj.output(["*", "rtrim(c1)"]).filter("rtrim(c1) = 'abc'").to_df()
print(res)
+if extra_result is not None:
+ print(extra_result)
-res = table_obj.output(["*", "trim(c1)"]).filter("trim(c1) = 'abc'").to_df()
+res, extra_result = table_obj.output(["*", "trim(c1)"]).filter("trim(c1) = 'abc'").to_df()
print(res)
+if extra_result is not None:
+ print(extra_result)
-res = table_obj.output(["*"]).filter("trim(' abc ') = rtrim(ltrim(' abc '))").to_df()
+res, extra_result = table_obj.output(["*"]).filter("trim(' abc ') = rtrim(ltrim(' abc '))").to_df()
print(res)
+if extra_result is not None:
+ print(extra_result)
#function char_position
-res = table_obj.output(["*", "char_position(c1, 'bc')"]).filter("char_position(c1, c1) <> 0").to_df()
+res, extra_result = table_obj.output(["*", "char_position(c1, 'bc')"]).filter("char_position(c1, c1) <> 0").to_df()
print(res)
+if extra_result is not None:
+ print(extra_result)
# math functions
db_obj.drop_table("function_example", ConflictType.Ignore)
@@ -87,27 +117,39 @@
{"c1": 9, "c2": 10}, {"c1": 11, "c2": 12}, {"c1": 13, "c2": 14}, {"c1": 15, "c2": 16},])
#function sqrt
-res = table_obj.output(["*", "sqrt(c1)", "sqrt(c2)"]).to_df()
+res, extra_result = table_obj.output(["*", "sqrt(c1)", "sqrt(c2)"]).to_df()
print(res)
+if extra_result is not None:
+ print(extra_result)
-res = table_obj.output(["*", "sqrt(c1)", "sqrt(c2)"]).filter("sqrt(c1) = 3").to_df()
+res, extra_result = table_obj.output(["*", "sqrt(c1)", "sqrt(c2)"]).filter("sqrt(c1) = 3").to_df()
print(res)
+if extra_result is not None:
+ print(extra_result)
#function round
-res = table_obj.output(["*", "round(c1)", "round(c2)"]).to_df()
+res, extra_result = table_obj.output(["*", "round(c1)", "round(c2)"]).to_df()
print(res)
+if extra_result is not None:
+ print(extra_result)
#function ceiling
-res = table_obj.output(["*", "ceil(c1)", "ceil(c2)"]).to_df()
+res, extra_result = table_obj.output(["*", "ceil(c1)", "ceil(c2)"]).to_df()
print(res)
+if extra_result is not None:
+ print(extra_result)
#function floor
-res = table_obj.output(["*", "floor(c1)", "floor(c2)"]).to_df()
+res, extra_result = table_obj.output(["*", "floor(c1)", "floor(c2)"]).to_df()
print(res)
+if extra_result is not None:
+ print(extra_result)
#function ln
-res = table_obj.output(["*", "ln(c1)", "ln(c2)"]).to_df()
+res, extra_result = table_obj.output(["*", "ln(c1)", "ln(c2)"]).to_df()
print(res)
+if extra_result is not None:
+ print(extra_result)
res = db_obj.drop_table("function_example")
diff --git a/example/http/insert_search_data.sh b/example/http/insert_search_data.sh
index ffd62a01b9..0a5d4af466 100755
--- a/example/http/insert_search_data.sh
+++ b/example/http/insert_search_data.sh
@@ -144,7 +144,11 @@ curl --request GET \
],
"filter": "num > 1 and year < 2024",
"offset": "1",
- "limit": "1"
+ "limit": "1",
+ "option":
+ {
+ "total_hits_count": "true"
+ }
} '
echo -e '\n\n-- search with dense vector'
diff --git a/example/hybrid_search.py b/example/hybrid_search.py
index 762d6f47f6..6692d84cc6 100644
--- a/example/hybrid_search.py
+++ b/example/hybrid_search.py
@@ -90,9 +90,9 @@
infinity.common.ConflictType.Error,
)
- result = (
+ result, extra_result = (
table_instance.output(
- ["num", "body", "vec", "sparse", "year", "tensor", "_score"]
+ ["num", "body", "vec", "sparse", "year", "tensor", "score()"]
)
.match_dense("vec", [3.0, 2.8, 2.7, 3.1], "float", "cosine", 3)
.match_sparse(
@@ -108,6 +108,8 @@
.to_pl()
# .explain(explain_type=infinity.table.ExplainType.UnOpt)
)
+ if extra_result is not None:
+ print(extra_result)
print(result)
infinity_instance.disconnect()
diff --git a/example/import_data.py b/example/import_data.py
index 38027df883..a355fdba3e 100644
--- a/example/import_data.py
+++ b/example/import_data.py
@@ -48,8 +48,10 @@
table_instance.import_data(project_directory + "/../test/data/csv/fulltext_delete.csv",
{"header": True, "file_type": "csv", "delimiter": "\t"})
- result = table_instance.output(["num", "doc"]).to_pl()
+ result, extra_result = table_instance.output(["num", "doc"]).to_pl()
print(result)
+ if extra_result is not None:
+ print(extra_result)
infinity_instance.disconnect()
diff --git a/example/search_with_limit_offset.py b/example/search_with_limit_offset.py
index ede57f24f1..7495275f8a 100644
--- a/example/search_with_limit_offset.py
+++ b/example/search_with_limit_offset.py
@@ -61,9 +61,11 @@
]
)
- result = table_instance.output(["num", "vec", "_similarity"]).match_dense("vec", [3.0, 2.8, 2.7, 3.1], "float",
- "cosine", 3).limit(2).offset(1).to_pl()
+ result, extra_result = table_instance.output(["num", "vec", "_similarity"]).match_dense("vec", [3.0, 2.8, 2.7, 3.1], "float", "cosine", 3).limit(2).offset(1).option({"total_hits_count": True}).to_pl()
print(result)
+ if extra_result is not None:
+ print(extra_result)
+
infinity_instance.disconnect()
print('test done')
diff --git a/example/secondary_index.py b/example/secondary_index.py
index 48d1eefbaa..3c4a7aac5a 100644
--- a/example/secondary_index.py
+++ b/example/secondary_index.py
@@ -55,8 +55,10 @@
)
table_instance.create_index("index1", infinity.index.IndexInfo("id", infinity.index.IndexType.Secondary))
- res = table_instance.filter("id='ID_1'").output(["*"]).to_pl()
+ res, extra_result = table_instance.filter("id='ID_1'").output(["*"]).to_pl()
print(res)
+ if extra_result is not None:
+ print(extra_result)
infinity_instance.disconnect()
diff --git a/example/simple_example.py b/example/simple_example.py
index d4117f200f..73799d4c5c 100644
--- a/example/simple_example.py
+++ b/example/simple_example.py
@@ -61,8 +61,10 @@
]
)
- res = table_instance.output(["num", "body", "vec"]).to_pl()
+ res, extra_result = table_instance.output(["num", "body", "vec"]).to_pl()
print(res)
+ if extra_result is not None:
+ print(extra_result)
infinity_instance.disconnect()
diff --git a/example/sparse_vector_search.py b/example/sparse_vector_search.py
index 0a66bdc1dc..979a841502 100644
--- a/example/sparse_vector_search.py
+++ b/example/sparse_vector_search.py
@@ -61,8 +61,11 @@
]
)
- result = table_instance.output(["num", "vec", "_similarity"]).match_sparse("vec", infinity.common.SparseVector([0, 20, 80], [1.0, 2.0, 3.0]), "ip", 3).to_pl()
+ result, extra_result = table_instance.output(["num", "vec", "_similarity"]).match_sparse("vec", infinity.common.SparseVector([0, 20, 80], [1.0, 2.0, 3.0]), "ip", 3).to_pl()
print(result)
+ if extra_result is not None:
+ print(extra_result)
+
infinity_instance.disconnect()
print('test done')
diff --git a/example/tensor_search.py b/example/tensor_search.py
index f9822adcc6..59072f41fc 100644
--- a/example/tensor_search.py
+++ b/example/tensor_search.py
@@ -62,10 +62,14 @@
},
]
)
- result = table_instance.output(["num", "vec", "_score"]).match_tensor("vec",
- [[0.9, 0.0, 0.0, 0.0], [1.1, 0.0, 0.0, 0.0]],
- 'float', 2).to_pl()
+ result, extra_result = table_instance.output(["num", "vec", "_score"]).match_tensor("vec",
+ [[0.9, 0.0, 0.0, 0.0],
+ [1.1, 0.0, 0.0, 0.0]],
+ 'float', 2).to_pl()
print(result)
+ if extra_result is not None:
+ print(extra_result)
+
infinity_instance.disconnect()
print('test done')
diff --git a/example/vector_search.py b/example/vector_search.py
index 443d4d7e6b..ab377359f5 100644
--- a/example/vector_search.py
+++ b/example/vector_search.py
@@ -61,9 +61,16 @@
]
)
- result = table_instance.output(["num", "vec", "_similarity"]).match_dense("vec", [3.0, 2.8, 2.7, 3.1], "float",
- "cosine", 3).to_pl()
+ result, extra_result = (table_instance.
+ output(["num", "vec", "_similarity"]).
+ match_dense("vec", [3.0, 2.8, 2.7, 3.1], "float", "cosine", 3).
+ option({"total_hits_count": False}).
+ to_pl())
+
print(result)
+ if extra_result is not None:
+ print(extra_result)
+
infinity_instance.disconnect()
print('test done')
diff --git a/pyproject.toml b/pyproject.toml
index 723c9970dc..d75de1c4a1 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -11,7 +11,7 @@ build-backend = "scikit_build_core.build"
[project]
name = "infinity_embedded_sdk"
-version = "0.5.0.dev5"
+version = "0.5.0.dev6"
requires-python = ">=3.10"
dependencies = [
"sqlglot~=11.7.0",
diff --git a/python/README.md b/python/README.md
index 9e201caa2d..1ea77fa2ff 100644
--- a/python/README.md
+++ b/python/README.md
@@ -51,11 +51,11 @@ LD_PRELOAD=$(ldconfig -p | grep 'libjemalloc.so ' | awk '{print $4}') python3 ex
```
Note: If you run with the debug version, you must set the **libasan** environment variable, for example
```shell
-LD_PRELOAD=$(find $(clang-18 -print-resource-dir) -name "libclang_rt.asan.so") python3 example/simple_example.py
+LD_PRELOAD=$(find $(clang-18 -print-resource-dir) -name "libclang_rt.asan-x86_64.so") python3 example/simple_example.py
```
-Note: When running with the debug version infinity-sdk, you may find some memory leaks caused by arrow. You can use `ASAN_OPTIONS=detect_leaks=0` to disable memory leak detection, for example
+Note: When running with the debug version infinity_embedded-sdk, you may find some memory leaks caused by arrow. You can use `ASAN_OPTIONS=detect_leaks=0` to disable memory leak detection, for example
```shell
-LD_PRELOAD=$(find $(clang-18 -print-resource-dir) -name "libclang_rt.asan.so") ASAN_OPTIONS=detect_leaks=0 python3 example/simple_example.py
+LD_PRELOAD=$(find $(clang-18 -print-resource-dir) -name "libclang_rt.asan-x86_64.so") ASAN_OPTIONS=detect_leaks=0 python3 example/simple_example.py
```
# run pysdk test
diff --git a/python/infinity_embedded/__init__.py b/python/infinity_embedded/__init__.py
index 4b039c4926..764f6a567f 100644
--- a/python/infinity_embedded/__init__.py
+++ b/python/infinity_embedded/__init__.py
@@ -1,4 +1,4 @@
-# Copyright(C) 2023 InfiniFlow, Inc. All rights reserved.
+# Copyright(C) 2024 InfiniFlow, Inc. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
@@ -20,12 +20,14 @@
# import pkg_resources
# __version__ = pkg_resources.get_distribution("infinity_sdk").version
-from infinity_embedded.common import URI, NetworkAddress, LOCAL_HOST, LOCAL_INFINITY_PATH, InfinityException, LOCAL_INFINITY_CONFIG_PATH
+from infinity_embedded.common import URI, NetworkAddress, LOCAL_HOST, LOCAL_INFINITY_PATH, InfinityException, \
+ LOCAL_INFINITY_CONFIG_PATH
from infinity_embedded.infinity import InfinityConnection
from infinity_embedded.local_infinity.infinity import LocalInfinityConnection
from infinity_embedded.errors import ErrorCode
-def connect(uri = LOCAL_INFINITY_PATH, config_path = LOCAL_INFINITY_CONFIG_PATH) -> InfinityConnection:
+
+def connect(uri=LOCAL_INFINITY_PATH, config_path=LOCAL_INFINITY_CONFIG_PATH) -> InfinityConnection:
if isinstance(uri, str) and len(uri) != 0:
return LocalInfinityConnection(uri, config_path)
else:
diff --git a/python/infinity_embedded/common.py b/python/infinity_embedded/common.py
index d956f5c28d..a0d2e5fb87 100644
--- a/python/infinity_embedded/common.py
+++ b/python/infinity_embedded/common.py
@@ -1,4 +1,4 @@
-# Copyright(C) 2023 InfiniFlow, Inc. All rights reserved.
+# Copyright(C) 2024 InfiniFlow, Inc. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
@@ -11,6 +11,7 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
+
from pathlib import Path
from typing import Union
from dataclasses import dataclass
@@ -75,10 +76,12 @@ class ConflictType(object):
Error = 1
Replace = 2
+
class SortType(object):
Asc = 0
Desc = 1
+
class InfinityException(Exception):
def __init__(self, error_code=0, error_message=None):
self.error_code = error_code
diff --git a/python/infinity_embedded/db.py b/python/infinity_embedded/db.py
index 1e1693c890..930924397e 100644
--- a/python/infinity_embedded/db.py
+++ b/python/infinity_embedded/db.py
@@ -1,4 +1,4 @@
-# Copyright(C) 2023 InfiniFlow, Inc. All rights reserved.
+# Copyright(C) 2024 InfiniFlow, Inc. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
@@ -14,6 +14,7 @@
from abc import ABC, abstractmethod
+
class Database(ABC):
@abstractmethod
diff --git a/python/infinity_embedded/errors.py b/python/infinity_embedded/errors.py
index 1e482d3eb8..a40d15f381 100644
--- a/python/infinity_embedded/errors.py
+++ b/python/infinity_embedded/errors.py
@@ -1,4 +1,4 @@
-# Copyright(C) 2023 InfiniFlow, Inc. All rights reserved.
+# Copyright(C) 2024 InfiniFlow, Inc. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
@@ -117,6 +117,15 @@ class ErrorCode(IntEnum):
INVALID_EXPLAIN_TYPE = 3081,
CHUNK_NOT_EXIST = 3082,
NAME_MISMATCHED = 3083,
+ TRANSACTION_NOT_FOUND = 3084,
+ INVALID_DATABASE_INDEX = 3085,
+ INVALID_TABLE_INDEX = 3086,
+ FUNCTION_IS_DISABLE = 3087,
+ NOT_FOUND = 3088,
+ ERROR_INIT = 3089,
+ FILE_IS_OPEN = 3090,
+ UNKNOWN = 3091,
+ INVALID_QUERY_OPTION = 3092,
TXN_ROLLBACK = 4001,
TXN_CONFLICT = 4002,
@@ -126,6 +135,7 @@ class ErrorCode(IntEnum):
TOO_MANY_CONNECTIONS = 5003,
CONFIGURATION_LIMIT_EXCEED = 5004,
QUERY_IS_TOO_COMPLEX = 5005,
+ FAIL_TO_GET_SYS_INFO = 5006,
QUERY_CANCELLED = 6001,
QUERY_NOT_SUPPORTED = 6002,
@@ -147,7 +157,26 @@ class ErrorCode(IntEnum):
MUNMAP_FILE_ERROR = 7014,
INVALID_FILE_FLAG = 7015,
INVALID_SERVER_ADDRESS = 7016,
+ FAIL_TO_FUN_PYTHON = 7017,
+ CANT_CONNECT_SERVER = 7018,
+ NOT_EXIST_NODE = 7019,
+ DUPLICATE_NODE = 7020,
+ CANT_CONNECT_LEADER = 7021,
+ MINIO_INVALID_ACCESS_KEY = 7022,
+ MINIO_BUCKET_NOT_EXISTS = 7023,
+ INVALID_STORAGE_TYPE = 7024,
+ NOT_REGISTERED = 7025,
+ CANT_SWITCH_ROLE = 7026,
+ TOO_MANY_FOLLOWER = 7027,
+ TOO_MANY_LEARNER = 7028,
INVALID_ENTRY = 8001,
- NOT_FOUND_ENTRY = 8002,
- EMPTY_ENTRY_LIST = 8003,
+ DUPLICATE_ENTRY = 8002
+ NOT_FOUND_ENTRY = 8003,
+ EMPTY_ENTRY_LIST = 8004,
+ NO_WAL_ENTRY_FOUND = 8005,
+ WRONG_CHECKPOINT_TYPE = 8006,
+ INVALID_NODE_ROLE = 8007,
+ INVALID_NODE_STATUS = 8008,
+ NODE_INFO_UPDATED = 8009,
+ NODE_NAME_MISMATCH = 8010
diff --git a/python/infinity_embedded/index.py b/python/infinity_embedded/index.py
index 1620ceb644..392185c275 100644
--- a/python/infinity_embedded/index.py
+++ b/python/infinity_embedded/index.py
@@ -1,4 +1,4 @@
-# Copyright(C) 2023 InfiniFlow, Inc. All rights reserved.
+# Copyright(C) 2024 InfiniFlow, Inc. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
@@ -18,7 +18,6 @@
from infinity_embedded.embedded_infinity_ext import IndexType as LocalIndexType, WrapIndexInfo
from infinity_embedded.embedded_infinity_ext import InitParameter as LocalInitParameter
-from infinity_embedded.embedded_infinity_ext import WrapIndexInfo as LocalIndexInfo
from infinity_embedded.errors import ErrorCode
diff --git a/python/infinity_embedded/infinity.py b/python/infinity_embedded/infinity.py
index 1251925629..8b40e08186 100644
--- a/python/infinity_embedded/infinity.py
+++ b/python/infinity_embedded/infinity.py
@@ -1,4 +1,4 @@
-# Copyright(C) 2023 InfiniFlow, Inc. All rights reserved.
+# Copyright(C) 2024 InfiniFlow, Inc. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
@@ -11,8 +11,10 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
+
from abc import ABC, abstractmethod
+
# abstract class
class InfinityConnection(ABC):
def __init__(self, uri):
diff --git a/python/infinity_embedded/local_infinity/client.py b/python/infinity_embedded/local_infinity/client.py
index 501867ad57..82f322ab27 100644
--- a/python/infinity_embedded/local_infinity/client.py
+++ b/python/infinity_embedded/local_infinity/client.py
@@ -1,4 +1,4 @@
-# Copyright(C) 2023 InfiniFlow, Inc. All rights reserved.
+# Copyright(C) 2024 InfiniFlow, Inc. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
@@ -22,7 +22,8 @@ class LocalQueryResult:
def __init__(self, error_code: PyErrorCode, error_msg: str, db_names=None, table_names=None, index_names=None,
column_defs=None, column_fields=None, database_name=None, store_dir=None, table_count=None,
comment=None,
- table_name=None, index_name=None, index_type=None, index_comment=None, deleted_rows=0):
+ table_name=None, index_name=None, index_type=None, index_comment=None, deleted_rows=0,
+ extra_result=None):
self.error_code = error_code
self.error_msg = error_msg
self.db_names = db_names
@@ -40,10 +41,11 @@ def __init__(self, error_code: PyErrorCode, error_msg: str, db_names=None, table
self.index_type = index_type
self.index_comment = index_comment
self.deleted_rows = deleted_rows
+ self.extra_result = extra_result
class LocalInfinityClient:
- def __init__(self, path: str = LOCAL_INFINITY_PATH, config_path = LOCAL_INFINITY_CONFIG_PATH):
+ def __init__(self, path: str = LOCAL_INFINITY_PATH, config_path=LOCAL_INFINITY_CONFIG_PATH):
self.path = path
Infinity.LocalInit(path, config_path)
self.client = Infinity.LocalConnect()
@@ -69,7 +71,7 @@ def convert_res(self, res, has_db_names=False, has_table_names=False, has_result
return LocalQueryResult(PyErrorCode(res.error_code.value), res.error_msg, table_names=res.names)
if has_result_data:
return LocalQueryResult(PyErrorCode(res.error_code.value), res.error_msg, column_defs=res.column_defs,
- column_fields=res.column_fields)
+ column_fields=res.column_fields, extra_result=res.extra_result)
if has_db_name:
return LocalQueryResult(PyErrorCode(res.error_code.value), res.error_msg, database_name=res.database_name,
store_dir=res.store_dir, table_count=res.table_count, comment=res.comment)
@@ -205,6 +207,7 @@ def search(self,
highlight_list: list[WrapParsedExpr] = [],
order_by_list: list[WrapOrderByExpr] = [],
group_by_list: list[WrapParsedExpr] = [],
+ total_hits_count_flag: bool = False,
search_expr: WrapSearchExpr = None,
where_expr: WrapParsedExpr = None,
limit_expr: WrapParsedExpr = None,
@@ -217,6 +220,7 @@ def search(self,
highlight_list,
order_by_list,
group_by_list,
+ total_hits_count_flag,
search_expr,
where_expr,
limit_expr,
diff --git a/python/infinity_embedded/local_infinity/db.py b/python/infinity_embedded/local_infinity/db.py
index d2835e3b11..81e0308120 100644
--- a/python/infinity_embedded/local_infinity/db.py
+++ b/python/infinity_embedded/local_infinity/db.py
@@ -1,3 +1,17 @@
+# Copyright(C) 2024 InfiniFlow, Inc. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
from abc import ABC
from infinity_embedded.db import Database
diff --git a/python/infinity_embedded/local_infinity/infinity.py b/python/infinity_embedded/local_infinity/infinity.py
index 669a007579..9201def38f 100644
--- a/python/infinity_embedded/local_infinity/infinity.py
+++ b/python/infinity_embedded/local_infinity/infinity.py
@@ -1,3 +1,17 @@
+# Copyright(C) 2024 InfiniFlow, Inc. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
import os
from infinity_embedded import InfinityConnection
from abc import ABC
@@ -90,7 +104,6 @@ def show_current_node(self):
else:
raise InfinityException(res.error_code, res.error_msg)
-
def search(self, db_name, table_name):
self.check_connect()
res = self._client.search(db_name, table_name, [])
diff --git a/python/infinity_embedded/local_infinity/query_builder.py b/python/infinity_embedded/local_infinity/query_builder.py
index a11935d869..3e1967e232 100644
--- a/python/infinity_embedded/local_infinity/query_builder.py
+++ b/python/infinity_embedded/local_infinity/query_builder.py
@@ -1,3 +1,17 @@
+# Copyright(C) 2024 InfiniFlow, Inc. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
from __future__ import annotations
from abc import ABC
@@ -18,17 +32,19 @@
from infinity_embedded.table import ExplainType as BaseExplainType
from infinity_embedded.errors import ErrorCode
+
class Query(ABC):
def __init__(
- self,
- columns: Optional[List[WrapParsedExpr]],
- highlight: Optional[List[WrapParsedExpr]],
- search: Optional[WrapSearchExpr],
- filter: Optional[WrapParsedExpr],
- group_by: Optional[List[WrapParsedExpr]],
- limit: Optional[WrapParsedExpr],
- offset: Optional[WrapParsedExpr],
- sort: Optional[List[WrapOrderByExpr]]
+ self,
+ columns: Optional[List[WrapParsedExpr]],
+ highlight: Optional[List[WrapParsedExpr]],
+ search: Optional[WrapSearchExpr],
+ filter: Optional[WrapParsedExpr],
+ group_by: Optional[List[WrapParsedExpr]],
+ limit: Optional[WrapParsedExpr],
+ offset: Optional[WrapParsedExpr],
+ sort: Optional[List[WrapOrderByExpr]],
+ total_hits_count: Optional[bool]
):
self.columns = columns
self.highlight = highlight
@@ -38,22 +54,23 @@ def __init__(
self.limit = limit
self.offset = offset
self.sort = sort
+ self.total_hits_count = total_hits_count
class ExplainQuery(Query):
def __init__(
- self,
- columns: Optional[List[WrapParsedExpr]],
- highlight: Optional[List[WrapParsedExpr]],
- search: Optional[WrapSearchExpr],
- filter: Optional[WrapParsedExpr],
- group_by: Optional[List[WrapParsedExpr]],
- limit: Optional[WrapParsedExpr],
- offset: Optional[WrapParsedExpr],
- sort: Optional[List[WrapOrderByExpr]],
- explain_type: Optional[BaseExplainType],
+ self,
+ columns: Optional[List[WrapParsedExpr]],
+ highlight: Optional[List[WrapParsedExpr]],
+ search: Optional[WrapSearchExpr],
+ filter: Optional[WrapParsedExpr],
+ group_by: Optional[List[WrapParsedExpr]],
+ limit: Optional[WrapParsedExpr],
+ offset: Optional[WrapParsedExpr],
+ sort: Optional[List[WrapOrderByExpr]],
+ explain_type: Optional[BaseExplainType],
):
- super().__init__(columns, highlight, search, filter, group_by, limit, offset, sort)
+ super().__init__(columns, highlight, search, filter, group_by, limit, offset, sort, None)
self.explain_type = explain_type
@@ -68,6 +85,7 @@ def __init__(self, table):
self._limit = None
self._offset = None
self._sort = None
+ self._total_hits_count = None
def reset(self):
self._columns = None
@@ -78,15 +96,16 @@ def reset(self):
self._limit = None
self._offset = None
self._sort = None
+ self._total_hits_count = None
def match_dense(
- self,
- vector_column_name: str,
- embedding_data: VEC,
- embedding_data_type: str,
- distance_type: str,
- topn: int,
- knn_params: {} = None,
+ self,
+ vector_column_name: str,
+ embedding_data: VEC,
+ embedding_data_type: str,
+ distance_type: str,
+ topn: int,
+ knn_params: {} = None,
) -> InfinityLocalQueryBuilder:
if self._search is None:
self._search = WrapSearchExpr()
@@ -104,7 +123,8 @@ def match_dense(
if embedding_data_type == "bit":
if len(embedding_data) % 8 != 0:
raise InfinityException(
- ErrorCode.INVALID_EMBEDDING_DATA_TYPE, f"Embeddings with data bit must have dimension of times of 8!"
+ ErrorCode.INVALID_EMBEDDING_DATA_TYPE,
+ f"Embeddings with data bit must have dimension of times of 8!"
)
else:
new_embedding_data = []
@@ -170,7 +190,8 @@ def match_dense(
elem_type = EmbeddingDataType.kElemBFloat16
data.bf16_array_value = embedding_data
else:
- raise InfinityException(ErrorCode.INVALID_EMBEDDING_DATA_TYPE, f"Invalid embedding {embedding_data[0]} type")
+ raise InfinityException(ErrorCode.INVALID_EMBEDDING_DATA_TYPE,
+ f"Invalid embedding {embedding_data[0]} type")
dist_type = KnnDistanceType.kInvalid
if distance_type == "l2":
@@ -214,12 +235,12 @@ def match_dense(
return self
def match_sparse(
- self,
- vector_column_name: str,
- sparse_data: SparseVector | dict,
- metric_type: str,
- topn: int,
- opt_params: {} = None,
+ self,
+ vector_column_name: str,
+ sparse_data: SparseVector | dict,
+ metric_type: str,
+ topn: int,
+ opt_params: {} = None,
) -> InfinityLocalQueryBuilder:
if self._search is None:
self._search = WrapSearchExpr()
@@ -294,7 +315,7 @@ def match_sparse(
return self
def match_text(
- self, fields: str, matching_text: str, topn: int, extra_options: Optional[dict]
+ self, fields: str, matching_text: str, topn: int, extra_options: Optional[dict]
) -> InfinityLocalQueryBuilder:
if self._search is None:
self._search = WrapSearchExpr()
@@ -320,12 +341,12 @@ def match_text(
return self
def match_tensor(
- self,
- column_name: str,
- query_data: VEC,
- query_data_type: str,
- topn: int,
- extra_option: Optional[dict] = None,
+ self,
+ column_name: str,
+ query_data: VEC,
+ query_data_type: str,
+ topn: int,
+ extra_option: Optional[dict] = None,
) -> InfinityLocalQueryBuilder:
if self._search is None:
self._search = WrapSearchExpr()
@@ -425,6 +446,26 @@ def output(self, columns: Optional[list]) -> InfinityLocalQueryBuilder:
parsed_expr = WrapParsedExpr(expr_type)
parsed_expr.function_expr = func_expr
+ select_list.append(parsed_expr)
+ case "_create_timestamp":
+ func_expr = WrapFunctionExpr()
+ func_expr.func_name = "create_timestamp"
+ func_expr.arguments = []
+
+ expr_type = ParsedExprType(ParsedExprType.kFunction)
+ parsed_expr = WrapParsedExpr(expr_type)
+ parsed_expr.function_expr = func_expr
+
+ select_list.append(parsed_expr)
+ case "_delete_timestamp":
+ func_expr = WrapFunctionExpr()
+ func_expr.func_name = "delete_timestamp"
+ func_expr.arguments = []
+
+ expr_type = ParsedExprType(ParsedExprType.kFunction)
+ parsed_expr = WrapParsedExpr(expr_type)
+ parsed_expr.function_expr = func_expr
+
select_list.append(parsed_expr)
case "_score":
func_expr = WrapFunctionExpr()
@@ -456,6 +497,37 @@ def output(self, columns: Optional[list]) -> InfinityLocalQueryBuilder:
parsed_expr = WrapParsedExpr(expr_type)
parsed_expr.function_expr = func_expr
+ select_list.append(parsed_expr)
+ case "_score_factors":
+ func_expr = WrapFunctionExpr()
+ func_expr.func_name = "score_factors"
+ func_expr.arguments = []
+
+ expr_type = ParsedExprType(ParsedExprType.kFunction)
+ parsed_expr = WrapParsedExpr(expr_type)
+ parsed_expr.function_expr = func_expr
+
+ select_list.append(parsed_expr)
+
+ case "_similarity_factors":
+ func_expr = WrapFunctionExpr()
+ func_expr.func_name = "similarity_factors"
+ func_expr.arguments = []
+
+ expr_type = ParsedExprType(ParsedExprType.kFunction)
+ parsed_expr = WrapParsedExpr(expr_type)
+ parsed_expr.function_expr = func_expr
+
+ select_list.append(parsed_expr)
+ case "_distance_factors":
+ func_expr = WrapFunctionExpr()
+ func_expr.func_name = "distance_factors"
+ func_expr.arguments = []
+
+ expr_type = ParsedExprType(ParsedExprType.kFunction)
+ parsed_expr = WrapParsedExpr(expr_type)
+ parsed_expr.function_expr = func_expr
+
select_list.append(parsed_expr)
case _:
@@ -477,6 +549,12 @@ def highlight(self, columns: Optional[list]) -> InfinityLocalQueryBuilder:
self._highlight = highlight_list
return self
+ def option(self, option_kv: {}):
+ if 'total_hits_count' in option_kv:
+ if isinstance(option_kv['total_hits_count'], bool):
+ self._total_hits_count = option_kv['total_hits_count']
+ return self
+
def sort(self, order_by_expr_list: Optional[List[list[str, SortType]]]) -> InfinityLocalQueryBuilder:
sort_list: List[WrapOrderByExpr] = []
@@ -504,6 +582,28 @@ def sort(self, order_by_expr_list: Optional[List[list[str, SortType]]]) -> Infin
parsed_expr = WrapParsedExpr(expr_type)
parsed_expr.function_expr = func_expr
+ order_by_expr = WrapOrderByExpr(parsed_expr, order_by_expr[1] == SortType.Asc)
+ sort_list.append(order_by_expr)
+ case "_create_timestamp":
+ func_expr = WrapFunctionExpr()
+ func_expr.func_name = "create_timestamp"
+ func_expr.arguments = []
+
+ expr_type = ParsedExprType(ParsedExprType.kFunction)
+ parsed_expr = WrapParsedExpr(expr_type)
+ parsed_expr.function_expr = func_expr
+
+ order_by_expr = WrapOrderByExpr(parsed_expr, order_by_expr[1] == SortType.Asc)
+ sort_list.append(order_by_expr)
+ case "_delete_timestamp":
+ func_expr = WrapFunctionExpr()
+ func_expr.func_name = "delete_timestamp"
+ func_expr.arguments = []
+
+ expr_type = ParsedExprType(ParsedExprType.kFunction)
+ parsed_expr = WrapParsedExpr(expr_type)
+ parsed_expr.function_expr = func_expr
+
order_by_expr = WrapOrderByExpr(parsed_expr, order_by_expr[1] == SortType.Asc)
sort_list.append(order_by_expr)
case "_score":
@@ -539,6 +639,40 @@ def sort(self, order_by_expr_list: Optional[List[list[str, SortType]]]) -> Infin
order_by_expr = WrapOrderByExpr(parsed_expr, order_by_expr[1] == SortType.Asc)
sort_list.append(order_by_expr)
+ case "_score_factors":
+ func_expr = WrapFunctionExpr()
+ func_expr.func_name = "score_factors"
+ func_expr.arguments = []
+
+ expr_type = ParsedExprType(ParsedExprType.kFunction)
+ parsed_expr = WrapParsedExpr(expr_type)
+ parsed_expr.function_expr = func_expr
+
+ order_by_expr = WrapOrderByExpr(parsed_expr, order_by_expr[1] == SortType.Asc)
+ sort_list.append(order_by_expr)
+ case "_similarity_factors":
+ func_expr = WrapFunctionExpr()
+ func_expr.func_name = "similarity_factors"
+ func_expr.arguments = []
+
+ expr_type = ParsedExprType(ParsedExprType.kFunction)
+ parsed_expr = WrapParsedExpr(expr_type)
+ parsed_expr.function_expr = func_expr
+
+ order_by_expr = WrapOrderByExpr(parsed_expr, order_by_expr[1] == SortType.Asc)
+ sort_list.append(order_by_expr)
+ case "_distance_factors":
+ func_expr = WrapFunctionExpr()
+ func_expr.func_name = "distance_factors"
+ func_expr.arguments = []
+
+ expr_type = ParsedExprType(ParsedExprType.kFunction)
+ parsed_expr = WrapParsedExpr(expr_type)
+ parsed_expr.function_expr = func_expr
+
+ order_by_expr = WrapOrderByExpr(parsed_expr, order_by_expr[1] == SortType.Asc)
+ sort_list.append(order_by_expr)
+
case _:
parsed_expr = parse_expr(maybe_parse(order_by_expr_str))
order_by_expr = WrapOrderByExpr(parsed_expr, order_by_expr[1] == SortType.Asc)
@@ -547,7 +681,7 @@ def sort(self, order_by_expr_list: Optional[List[list[str, SortType]]]) -> Infin
self._sort = sort_list
return self
- def to_result(self):
+ def to_result(self) -> tuple[dict[str, list[Any]], dict[str, Any], {}]:
query = Query(
columns=self._columns,
highlight=self._highlight,
@@ -557,23 +691,26 @@ def to_result(self):
limit=self._limit,
offset=self._offset,
sort=self._sort,
+ total_hits_count=self._total_hits_count,
)
self.reset()
return self._table._execute_query(query)
- def to_df(self) -> pd.DataFrame:
+ def to_df(self) -> (pd.DataFrame, {}):
df_dict = {}
- data_dict, data_type_dict = self.to_result()
+ data_dict, data_type_dict, extra_result = self.to_result()
for k, v in data_dict.items():
data_series = pd.Series(v, dtype=logic_type_to_dtype(data_type_dict[k]))
df_dict[k] = data_series
- return pd.DataFrame(df_dict)
+ return pd.DataFrame(df_dict), extra_result
- def to_pl(self) -> pl.DataFrame:
- return pl.from_pandas(self.to_df())
+ def to_pl(self) -> (pl.DataFrame, {}):
+ dataframe, extra_result = self.to_df()
+ return pl.from_pandas(dataframe), extra_result
- def to_arrow(self) -> Table:
- return pa.Table.from_pandas(self.to_df())
+ def to_arrow(self) -> (Table, {}):
+ dataframe, extra_result = self.to_df()
+ return pa.Table.from_pandas(dataframe), extra_result
def explain(self, explain_type=ExplainType.kPhysical) -> Any:
query = ExplainQuery(
diff --git a/python/infinity_embedded/local_infinity/table.py b/python/infinity_embedded/local_infinity/table.py
index b6a1ac35ea..7b0137de3e 100644
--- a/python/infinity_embedded/local_infinity/table.py
+++ b/python/infinity_embedded/local_infinity/table.py
@@ -1,4 +1,4 @@
-# Copyright(C) 2023 InfiniFlow, Inc. All rights reserved.
+# Copyright(C) 2024 InfiniFlow, Inc. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
@@ -11,6 +11,7 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
+
import functools
import inspect
from typing import Optional, Union, List, Any
@@ -59,7 +60,7 @@ def wrapper(*args, **kwargs):
@name_validity_check("index_name", "Index")
def create_index(self, index_name: str, index_info: IndexInfo,
- conflict_type: ConflictType = ConflictType.Error, index_comment : str = ""):
+ conflict_type: ConflictType = ConflictType.Error, index_comment: str = ""):
index_name = index_name.strip()
create_index_conflict: LocalConflictType
@@ -166,8 +167,8 @@ def insert(self, data: Union[INSERT_DATA, list[INSERT_DATA]]):
constant_expression = get_local_constant_expr_from_python_value(value)
parse_exprs.append(constant_expression)
insert_row = WrapInsertRowExpr()
- insert_row.columns=column_names
- insert_row.values=parse_exprs
+ insert_row.columns = column_names
+ insert_row.values = parse_exprs
fields.append(insert_row)
res = self._conn.insert(db_name=db_name, table_name=table_name, fields=fields)
@@ -384,10 +385,15 @@ def sort(self, order_by_expr_list: Optional[List[list[str, SortType]]]):
raise InfinityException(ErrorCode.INVALID_PARAMETER_VALUE,
"order_by_expr_list must be a list of [column_name, sort_type]")
if order_by_expr[1] not in [SortType.Asc, SortType.Desc]:
- raise InfinityException(ErrorCode.INVALID_PARAMETER_VALUE, "sort_type must be SortType.Asc or SortType.Desc")
+ raise InfinityException(ErrorCode.INVALID_PARAMETER_VALUE,
+ "sort_type must be SortType.Asc or SortType.Desc")
self.query_builder.sort(order_by_expr_list)
return self
+ def option(self, option_kv: {}):
+ self.query_builder.option(option_kv)
+ return self
+
def to_df(self):
return self.query_builder.to_df()
@@ -432,12 +438,16 @@ def _execute_query(self, query: Query):
if query.group_by is not None:
group_by_list = query.group_by
+ total_hits_count_flag = False
+ if query.total_hits_count:
+ total_hits_count_flag = True
res = self._conn.search(db_name=self._db_name,
table_name=self._table_name,
select_list=query.columns,
highlight_list=highlight,
order_by_list=order_by_list,
group_by_list=group_by_list,
+ total_hits_count_flag=total_hits_count_flag,
search_expr=query.search,
where_expr=query.filter,
limit_expr=query.limit,
diff --git a/python/infinity_embedded/local_infinity/types.py b/python/infinity_embedded/local_infinity/types.py
index 85f31d601b..80f3a8f22a 100644
--- a/python/infinity_embedded/local_infinity/types.py
+++ b/python/infinity_embedded/local_infinity/types.py
@@ -1,4 +1,4 @@
-# Copyright(C) 2023 InfiniFlow, Inc. All rights reserved.
+# Copyright(C) 2024 InfiniFlow, Inc. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
@@ -13,12 +13,12 @@
# limitations under the License.
import struct
+import json
from collections import defaultdict
-from typing import Any, Tuple, Dict, List
-import polars as pl
+from typing import Any
import numpy as np
from numpy import dtype
-from infinity_embedded.common import VEC, SparseVector, InfinityException, DEFAULT_MATCH_VECTOR_TOPN
+from infinity_embedded.common import VEC, SparseVector, InfinityException
from infinity_embedded.embedded_infinity_ext import *
from infinity_embedded.errors import ErrorCode
from datetime import date, time, datetime, timedelta
@@ -407,7 +407,7 @@ def make_match_tensor_expr(vector_column_name: str, embedding_data: VEC, embeddi
match_tensor_expr.embedding_data = data
return match_tensor_expr
-def build_result(res: WrapQueryResult) -> tuple[dict[str | Any, list[Any, Any]], dict[str | Any, Any]]:
+def build_result(res: WrapQueryResult) -> tuple[dict[str | Any, list[Any, Any]], dict[str | Any, Any], Any]:
data_dict = {}
data_type_dict = {}
column_counter = defaultdict(int)
@@ -426,4 +426,11 @@ def build_result(res: WrapQueryResult) -> tuple[dict[str | Any, list[Any, Any]],
data_dict[column_name] = data_list
data_type_dict[column_name] = column_data_type
- return data_dict, data_type_dict
+ extra_result = None
+ if res.extra_result is not None:
+ try:
+ extra_result = json.loads(res.extra_result)
+ except json.JSONDecodeError:
+ pass
+
+ return data_dict, data_type_dict, extra_result
diff --git a/python/infinity_embedded/local_infinity/utils.py b/python/infinity_embedded/local_infinity/utils.py
index 9dd1ac8175..c5a55bf4b5 100644
--- a/python/infinity_embedded/local_infinity/utils.py
+++ b/python/infinity_embedded/local_infinity/utils.py
@@ -1,4 +1,4 @@
-# Copyright(C) 2023 InfiniFlow, Inc. All rights reserved.
+# Copyright(C) 2024 InfiniFlow, Inc. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
@@ -15,6 +15,7 @@
import re
import functools
import inspect
+from typing import Any
import pandas as pd
import polars as pl
from sqlglot import condition
@@ -24,11 +25,10 @@
from infinity_embedded.common import InfinityException, SparseVector
from infinity_embedded.local_infinity.types import build_result, logic_type_to_dtype
from infinity_embedded.utils import binary_exp_to_paser_exp
-from infinity_embedded.embedded_infinity_ext import WrapInExpr, WrapParsedExpr, WrapOrderByExpr, WrapFunctionExpr, \
+from infinity_embedded.embedded_infinity_ext import WrapInExpr, WrapParsedExpr, WrapFunctionExpr, \
WrapColumnExpr, WrapConstantExpr, ParsedExprType, LiteralType
from infinity_embedded.embedded_infinity_ext import WrapEmbeddingType, WrapColumnDef, WrapDataType, LogicalType, \
EmbeddingDataType, WrapSparseType, ConstraintType
-from datetime import date, time, datetime, timedelta
def traverse_conditions(cons, fn=None):
@@ -365,9 +365,9 @@ def wrapper(*args, **kwargs):
return decorator
-def select_res_to_polars(res) -> pl.DataFrame:
+def select_res_to_polars(res) -> (pl.DataFrame, Any):
df_dict = {}
- data_dict, data_type_dict = build_result(res)
+ data_dict, data_type_dict, extra_result = build_result(res)
for k, v in data_dict.items():
data_series = pd.Series(v, dtype=logic_type_to_dtype(data_type_dict[k]))
df_dict[k] = data_series
diff --git a/python/infinity_embedded/table.py b/python/infinity_embedded/table.py
index 164d9b72da..2f7b78bc54 100644
--- a/python/infinity_embedded/table.py
+++ b/python/infinity_embedded/table.py
@@ -1,4 +1,4 @@
-# Copyright(C) 2023 InfiniFlow, Inc. All rights reserved.
+# Copyright(C) 2024 InfiniFlow, Inc. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
@@ -12,15 +12,13 @@
# See the License for the specific language governing permissions and
# limitations under the License.
-from abc import ABC, abstractmethod
from enum import Enum
-from typing import Optional, Union, Any
-from infinity_embedded.index import IndexInfo
from infinity_embedded.common import InfinityException, INSERT_DATA
from infinity_embedded.embedded_infinity_ext import ExplainType as LocalExplainType
from infinity_embedded.errors import ErrorCode
+
class ExplainType(Enum):
Analyze = 1
Ast = 2
@@ -30,7 +28,6 @@ class ExplainType(Enum):
Pipeline = 6
Fragment = 7
-
def to_local_ttype(self):
if self is ExplainType.Ast:
return LocalExplainType.kAst
diff --git a/python/infinity_embedded/utils.py b/python/infinity_embedded/utils.py
index 8f3c2d7bb5..6857fded8e 100644
--- a/python/infinity_embedded/utils.py
+++ b/python/infinity_embedded/utils.py
@@ -1,4 +1,4 @@
-# Copyright(C) 2023 InfiniFlow, Inc. All rights reserved.
+# Copyright(C) 2024 InfiniFlow, Inc. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
@@ -47,5 +47,6 @@ def binary_exp_to_paser_exp(binary_expr_key) -> str:
else:
raise InfinityException(ErrorCode.INVALID_EXPRESSION, f"unknown binary expression: {binary_expr_key}")
+
def deprecated_api(message):
warnings.warn(message, DeprecationWarning, stacklevel=2)
diff --git a/python/infinity_http.py b/python/infinity_http.py
index d1bb4cba3a..e2f7be7d9a 100644
--- a/python/infinity_http.py
+++ b/python/infinity_http.py
@@ -3,6 +3,7 @@
import requests
import logging
+import json
from test_pysdk.common.common_data import *
from infinity.common import ConflictType, InfinityException, SparseVector, SortType
from typing import Optional, Any
@@ -711,6 +712,9 @@ def __init__(self, output: list, table_http: table_http):
self._match_sparse = []
self._search_exprs = []
self._sort = []
+ self._limit = None
+ self._offset = None
+ self._option = None
def select(self):
url = f"databases/{self.table_http.database_name}/tables/{self.table_http.table_name}/docs"
@@ -726,15 +730,28 @@ def select(self):
tmp["highlight"] = self._highlight
if len(self._sort):
tmp["sort"] = self._sort
+ if self._limit is not None:
+ tmp["limit"] = str(self._limit)
+ if self._offset is not None:
+ tmp["offset"] = str(self._offset)
+ if self._option is not None:
+ tmp["option"] = self._option
# print(tmp)
d = self.table_http.net.set_up_data([], tmp)
r = self.table_http.net.request(url, "get", h, d)
self.table_http.net.raise_exception(r)
# print(r.json())
- if "output" in r.json():
- self.output_res = r.json()["output"]
+ result_json = r.json()
+ if "output" in result_json:
+ self.output_res = result_json["output"]
else:
self.output_res = []
+
+ if "total_hits_count" in result_json:
+ self.total_hits_count = result_json["total_hits_count"]
+ else:
+ self.total_hits_count = None
+
return self
def explain(self, ExplainType=ExplainType.Physical):
@@ -757,6 +774,13 @@ def explain(self, ExplainType=ExplainType.Physical):
tmp["output"] = self._output
if len(self._highlight):
tmp["highlight"] = self._highlight
+ if self._limit is not None:
+ tmp["limit"] = self._limit
+ if self._offset is not None:
+ tmp["offset"] = self._offset
+ if self._option is not None:
+ tmp["option"] = self._option
+
tmp["explain_type"] = ExplainType_transfrom(ExplainType)
# print(tmp)
d = self.table_http.net.set_up_data([], tmp)
@@ -796,6 +820,23 @@ def sort(self, order_by_expr_list: Optional[List[list[str, SortType]]]):
self._sort.append(tmp)
return self
+ def limit(self, limit_num):
+ self._limit = limit_num
+ return self
+
+ def offset(self, offset):
+ self._offset = offset
+ return self
+
+ def option(self, option: {}):
+ # option_str = json.dumps(option)
+ # option_str = str(option)
+ # option_str.replace("'\"'", "")
+ # eval(option_str)
+ # option_str.replace("'", "")
+ self._option = option
+ return self
+
def match_text(self, fields: str, query: str, topn: int, opt_params: Optional[dict] = None):
tmp_match_expr = {"match_method": "text", "fields": fields, "matching_text": query, "topn": topn}
if opt_params is not None:
@@ -902,6 +943,10 @@ def to_result(self):
df_dict[k] = new_tup
# print(self.output_res)
# print(df_dict)
+ extra_result = None
+ if self.total_hits_count is not None:
+ extra_result = {}
+ extra_result["total_hits_count"] = self.total_hits_count
df_type = {}
for k in df_dict:
@@ -936,17 +981,19 @@ def to_result(self):
if (function_name in bool_functions):
df_type[k] = dtype('bool')
break
- return df_dict, df_type
+ return df_dict, df_type, extra_result
def to_pl(self):
- return pl.from_pandas(self.to_df())
+ dataframe, extra_result = self.to_df()
+ return pl.from_pandas(dataframe), extra_result
def to_df(self):
- df_dict, df_type = self.to_result()
- return pd.DataFrame(df_dict).astype(df_type)
+ df_dict, df_type, extra_result = self.to_result()
+ return pd.DataFrame(df_dict).astype(df_type), extra_result
def to_arrow(self):
- return pa.Table.from_pandas(self.to_df())
+ dataframe, extra_result = self.to_df()
+ return pa.Table.from_pandas(dataframe), extra_result
@dataclass
class database_result():
diff --git a/python/infinity_sdk/infinity/__init__.py b/python/infinity_sdk/infinity/__init__.py
index 95bfba7981..4dff794bcc 100644
--- a/python/infinity_sdk/infinity/__init__.py
+++ b/python/infinity_sdk/infinity/__init__.py
@@ -26,7 +26,8 @@
from infinity.remote_thrift.infinity import RemoteThriftInfinityConnection
from infinity.errors import ErrorCode
-def connect(uri = LOCAL_HOST, logger: logging.Logger = None) -> InfinityConnection:
+
+def connect(uri=LOCAL_HOST, logger: logging.Logger = None) -> InfinityConnection:
if isinstance(uri, NetworkAddress):
return RemoteThriftInfinityConnection(uri, logger)
else:
diff --git a/python/infinity_sdk/infinity/common.py b/python/infinity_sdk/infinity/common.py
index 2300818b9c..09b4fdeaa0 100644
--- a/python/infinity_sdk/infinity/common.py
+++ b/python/infinity_sdk/infinity/common.py
@@ -74,10 +74,12 @@ class ConflictType(object):
Error = 1
Replace = 2
+
class SortType(object):
Asc = 0
Desc = 1
+
class InfinityException(Exception):
def __init__(self, error_code=0, error_message=None):
self.error_code = error_code
diff --git a/python/infinity_sdk/infinity/connection_pool.py b/python/infinity_sdk/infinity/connection_pool.py
index 6f4893e07e..e2f74490ad 100644
--- a/python/infinity_sdk/infinity/connection_pool.py
+++ b/python/infinity_sdk/infinity/connection_pool.py
@@ -1,3 +1,17 @@
+# Copyright(C) 2024 InfiniFlow, Inc. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
from threading import Lock
import infinity
from infinity.common import NetworkAddress
@@ -5,7 +19,7 @@
class ConnectionPool(object):
- def __init__(self, uri = NetworkAddress("127.0.0.1", 23817), max_size=16):
+ def __init__(self, uri=NetworkAddress("127.0.0.1", 23817), max_size=16):
self.uri_ = uri
self.max_size_ = max_size
self.free_pool_ = []
@@ -13,7 +27,6 @@ def __init__(self, uri = NetworkAddress("127.0.0.1", 23817), max_size=16):
for i in range(max_size):
self._create_conn()
-
def _del__(self):
self.destroy()
@@ -21,7 +34,6 @@ def _create_conn(self):
infinity_coon = infinity.connect(self.uri_)
self.free_pool_.append(infinity_coon)
-
def get_conn(self):
with self.lock_:
if (len(self.free_pool_) == 0):
@@ -30,20 +42,18 @@ def get_conn(self):
logging.debug("get_conn")
return conn
-
def release_conn(self, conn):
"""
Note: User is allowed to release a connection not created by ConnectionPool, or not releasing(due to exception or some other reasons) a connection created by ConnectionPool.
"""
with self.lock_:
- if(self.free_pool_.count(conn)):
+ if (self.free_pool_.count(conn)):
raise Exception("the connection has been released")
if (len(self.free_pool_) < self.max_size_):
self.free_pool_.append(conn)
logging.debug("release_conn")
-
def destroy(self):
for conn in iter(self.free_pool_):
conn.disconnect()
- self.free_pool_.clear()
\ No newline at end of file
+ self.free_pool_.clear()
diff --git a/python/infinity_sdk/infinity/db.py b/python/infinity_sdk/infinity/db.py
index 1e1693c890..635ee74581 100644
--- a/python/infinity_sdk/infinity/db.py
+++ b/python/infinity_sdk/infinity/db.py
@@ -14,6 +14,7 @@
from abc import ABC, abstractmethod
+
class Database(ABC):
@abstractmethod
diff --git a/python/infinity_sdk/infinity/errors.py b/python/infinity_sdk/infinity/errors.py
index 1e482d3eb8..df959070f4 100644
--- a/python/infinity_sdk/infinity/errors.py
+++ b/python/infinity_sdk/infinity/errors.py
@@ -117,6 +117,15 @@ class ErrorCode(IntEnum):
INVALID_EXPLAIN_TYPE = 3081,
CHUNK_NOT_EXIST = 3082,
NAME_MISMATCHED = 3083,
+ TRANSACTION_NOT_FOUND = 3084,
+ INVALID_DATABASE_INDEX = 3085,
+ INVALID_TABLE_INDEX = 3086,
+ FUNCTION_IS_DISABLE = 3087,
+ NOT_FOUND = 3088,
+ ERROR_INIT = 3089,
+ FILE_IS_OPEN = 3090,
+ UNKNOWN = 3091,
+ INVALID_QUERY_OPTION = 3092,
TXN_ROLLBACK = 4001,
TXN_CONFLICT = 4002,
@@ -126,6 +135,7 @@ class ErrorCode(IntEnum):
TOO_MANY_CONNECTIONS = 5003,
CONFIGURATION_LIMIT_EXCEED = 5004,
QUERY_IS_TOO_COMPLEX = 5005,
+ FAIL_TO_GET_SYS_INFO = 5006,
QUERY_CANCELLED = 6001,
QUERY_NOT_SUPPORTED = 6002,
@@ -147,7 +157,26 @@ class ErrorCode(IntEnum):
MUNMAP_FILE_ERROR = 7014,
INVALID_FILE_FLAG = 7015,
INVALID_SERVER_ADDRESS = 7016,
+ FAIL_TO_FUN_PYTHON = 7017,
+ CANT_CONNECT_SERVER = 7018,
+ NOT_EXIST_NODE = 7019,
+ DUPLICATE_NODE = 7020,
+ CANT_CONNECT_LEADER = 7021,
+ MINIO_INVALID_ACCESS_KEY = 7022,
+ MINIO_BUCKET_NOT_EXISTS = 7023,
+ INVALID_STORAGE_TYPE = 7024,
+ NOT_REGISTERED = 7025,
+ CANT_SWITCH_ROLE = 7026,
+ TOO_MANY_FOLLOWER = 7027,
+ TOO_MANY_LEARNER = 7028,
INVALID_ENTRY = 8001,
- NOT_FOUND_ENTRY = 8002,
- EMPTY_ENTRY_LIST = 8003,
+ DUPLICATE_ENTRY = 8002
+ NOT_FOUND_ENTRY = 8003,
+ EMPTY_ENTRY_LIST = 8004,
+ NO_WAL_ENTRY_FOUND = 8005,
+ WRONG_CHECKPOINT_TYPE = 8006,
+ INVALID_NODE_ROLE = 8007,
+ INVALID_NODE_STATUS = 8008,
+ NODE_INFO_UPDATED = 8009,
+ NODE_NAME_MISMATCH = 8010
\ No newline at end of file
diff --git a/python/infinity_sdk/infinity/remote_thrift/client.py b/python/infinity_sdk/infinity/remote_thrift/client.py
index 27e3718447..82800c6c73 100644
--- a/python/infinity_sdk/infinity/remote_thrift/client.py
+++ b/python/infinity_sdk/infinity/remote_thrift/client.py
@@ -28,6 +28,7 @@
TRY_TIMES = 10
+
class ThriftInfinityClient:
def __init__(self, uri: URI, *, try_times: int = TRY_TIMES, logger: logging.Logger = None):
self.lock = rwlock.RWLockRead()
@@ -96,7 +97,8 @@ def _reconnect(self):
# version: 0.5.0.dev2, client_version: 24
# version: 0.5.0.dev3, client_version: 25
# version: 0.5.0.dev4 and 0.5.0.dev5, client_version: 26
- res = self.client.Connect(ConnectRequest(client_version=26)) # 0.5.0.dev5
+ # version: 0.5.0.dev6, client_version: 27
+ res = self.client.Connect(ConnectRequest(client_version=27)) # 0.5.0.dev6
if res.error_code != 0:
raise InfinityException(res.error_code, res.error_msg)
self.session_id = res.session_id
@@ -115,12 +117,14 @@ def wrapper(self, *args, **kwargs):
if old_session_i == self.session_i:
self._reconnect()
self.session_i += 1
- self.logger.debug(f"Tried {i} times, session_id: {self.session_id}, session_i: {self.session_i}, exception: {str(e)}")
+ self.logger.debug(
+ f"Tried {i} times, session_id: {self.session_id}, session_i: {self.session_i}, exception: {str(e)}")
except Exception as e:
raise
else:
return CommonResponse(ErrorCode.TOO_MANY_CONNECTIONS, f"Try {self.try_times} times, but still failed")
return ret
+
return wrapper
@retry_wrapper
@@ -259,7 +263,7 @@ def export_data(self, db_name: str, table_name: str, file_name: str, export_opti
@retry_wrapper
def select(self, db_name: str, table_name: str, select_list, highlight_list, search_expr,
- where_expr, group_by_list, limit_expr, offset_expr, order_by_list):
+ where_expr, group_by_list, limit_expr, offset_expr, order_by_list, total_hits_count):
return self.client.Select(SelectRequest(session_id=self.session_id,
db_name=db_name,
table_name=table_name,
@@ -270,7 +274,8 @@ def select(self, db_name: str, table_name: str, select_list, highlight_list, sea
group_by_list=group_by_list,
limit_expr=limit_expr,
offset_expr=offset_expr,
- order_by_list=order_by_list
+ order_by_list=order_by_list,
+ total_hits_count=total_hits_count
))
@retry_wrapper
@@ -384,3 +389,7 @@ def command(self, command: ttypes.CommandRequest):
def flush(self, flush_request: ttypes.FlushRequest):
flush_request.session_id = self.session_id
return self.client.Flush(flush_request)
+
+ @retry_wrapper
+ def compact(self, db_name: str, table_name: str):
+ return self.client.Compact(CompactRequest(session_id=self.session_id, db_name=db_name, table_name=table_name))
diff --git a/python/infinity_sdk/infinity/remote_thrift/db.py b/python/infinity_sdk/infinity/remote_thrift/db.py
index 60e2b8553e..e5da17858e 100644
--- a/python/infinity_sdk/infinity/remote_thrift/db.py
+++ b/python/infinity_sdk/infinity/remote_thrift/db.py
@@ -28,6 +28,7 @@
from infinity.common import ConflictType
from infinity.common import InfinityException
+
class RemoteDatabase(Database, ABC):
def __init__(self, conn, name: str):
self._conn = conn
diff --git a/python/infinity_sdk/infinity/remote_thrift/infinity_thrift_rpc/InfinityService.py b/python/infinity_sdk/infinity/remote_thrift/infinity_thrift_rpc/InfinityService.py
index 7da04b05a6..c05a3817cf 100644
--- a/python/infinity_sdk/infinity/remote_thrift/infinity_thrift_rpc/InfinityService.py
+++ b/python/infinity_sdk/infinity/remote_thrift/infinity_thrift_rpc/InfinityService.py
@@ -315,6 +315,14 @@ def Flush(self, request):
"""
pass
+ def Compact(self, request):
+ """
+ Parameters:
+ - request
+
+ """
+ pass
+
class Client(Iface):
def __init__(self, iprot, oprot=None):
@@ -1507,6 +1515,38 @@ def recv_Flush(self):
return result.success
raise TApplicationException(TApplicationException.MISSING_RESULT, "Flush failed: unknown result")
+ def Compact(self, request):
+ """
+ Parameters:
+ - request
+
+ """
+ self.send_Compact(request)
+ return self.recv_Compact()
+
+ def send_Compact(self, request):
+ self._oprot.writeMessageBegin('Compact', TMessageType.CALL, self._seqid)
+ args = Compact_args()
+ args.request = request
+ args.write(self._oprot)
+ self._oprot.writeMessageEnd()
+ self._oprot.trans.flush()
+
+ def recv_Compact(self):
+ iprot = self._iprot
+ (fname, mtype, rseqid) = iprot.readMessageBegin()
+ if mtype == TMessageType.EXCEPTION:
+ x = TApplicationException()
+ x.read(iprot)
+ iprot.readMessageEnd()
+ raise x
+ result = Compact_result()
+ result.read(iprot)
+ iprot.readMessageEnd()
+ if result.success is not None:
+ return result.success
+ raise TApplicationException(TApplicationException.MISSING_RESULT, "Compact failed: unknown result")
+
class Processor(Iface, TProcessor):
def __init__(self, handler):
@@ -1549,6 +1589,7 @@ def __init__(self, handler):
self._processMap["Cleanup"] = Processor.process_Cleanup
self._processMap["Command"] = Processor.process_Command
self._processMap["Flush"] = Processor.process_Flush
+ self._processMap["Compact"] = Processor.process_Compact
self._on_message_begin = None
def on_message_begin(self, func):
@@ -2422,6 +2463,29 @@ def process_Flush(self, seqid, iprot, oprot):
oprot.writeMessageEnd()
oprot.trans.flush()
+ def process_Compact(self, seqid, iprot, oprot):
+ args = Compact_args()
+ args.read(iprot)
+ iprot.readMessageEnd()
+ result = Compact_result()
+ try:
+ result.success = self._handler.Compact(args.request)
+ msg_type = TMessageType.REPLY
+ except TTransport.TTransportException:
+ raise
+ except TApplicationException as ex:
+ logging.exception('TApplication exception in handler')
+ msg_type = TMessageType.EXCEPTION
+ result = ex
+ except Exception:
+ logging.exception('Unexpected exception in handler')
+ msg_type = TMessageType.EXCEPTION
+ result = TApplicationException(TApplicationException.INTERNAL_ERROR, 'Internal error')
+ oprot.writeMessageBegin("Compact", msg_type, seqid)
+ result.write(oprot)
+ oprot.writeMessageEnd()
+ oprot.trans.flush()
+
# HELPER FUNCTIONS AND STRUCTURES
@@ -7048,5 +7112,130 @@ def __ne__(self, other):
Flush_result.thrift_spec = (
(0, TType.STRUCT, 'success', [CommonResponse, None], None, ), # 0
)
+
+
+class Compact_args(object):
+ """
+ Attributes:
+ - request
+
+ """
+
+
+ def __init__(self, request=None,):
+ self.request = request
+
+ def read(self, iprot):
+ if iprot._fast_decode is not None and isinstance(iprot.trans, TTransport.CReadableTransport) and self.thrift_spec is not None:
+ iprot._fast_decode(self, iprot, [self.__class__, self.thrift_spec])
+ return
+ iprot.readStructBegin()
+ while True:
+ (fname, ftype, fid) = iprot.readFieldBegin()
+ if ftype == TType.STOP:
+ break
+ if fid == 1:
+ if ftype == TType.STRUCT:
+ self.request = CompactRequest()
+ self.request.read(iprot)
+ else:
+ iprot.skip(ftype)
+ else:
+ iprot.skip(ftype)
+ iprot.readFieldEnd()
+ iprot.readStructEnd()
+
+ def write(self, oprot):
+ if oprot._fast_encode is not None and self.thrift_spec is not None:
+ oprot.trans.write(oprot._fast_encode(self, [self.__class__, self.thrift_spec]))
+ return
+ oprot.writeStructBegin('Compact_args')
+ if self.request is not None:
+ oprot.writeFieldBegin('request', TType.STRUCT, 1)
+ self.request.write(oprot)
+ oprot.writeFieldEnd()
+ oprot.writeFieldStop()
+ oprot.writeStructEnd()
+
+ def validate(self):
+ return
+
+ def __repr__(self):
+ L = ['%s=%r' % (key, value)
+ for key, value in self.__dict__.items()]
+ return '%s(%s)' % (self.__class__.__name__, ', '.join(L))
+
+ def __eq__(self, other):
+ return isinstance(other, self.__class__) and self.__dict__ == other.__dict__
+
+ def __ne__(self, other):
+ return not (self == other)
+all_structs.append(Compact_args)
+Compact_args.thrift_spec = (
+ None, # 0
+ (1, TType.STRUCT, 'request', [CompactRequest, None], None, ), # 1
+)
+
+
+class Compact_result(object):
+ """
+ Attributes:
+ - success
+
+ """
+
+
+ def __init__(self, success=None,):
+ self.success = success
+
+ def read(self, iprot):
+ if iprot._fast_decode is not None and isinstance(iprot.trans, TTransport.CReadableTransport) and self.thrift_spec is not None:
+ iprot._fast_decode(self, iprot, [self.__class__, self.thrift_spec])
+ return
+ iprot.readStructBegin()
+ while True:
+ (fname, ftype, fid) = iprot.readFieldBegin()
+ if ftype == TType.STOP:
+ break
+ if fid == 0:
+ if ftype == TType.STRUCT:
+ self.success = CommonResponse()
+ self.success.read(iprot)
+ else:
+ iprot.skip(ftype)
+ else:
+ iprot.skip(ftype)
+ iprot.readFieldEnd()
+ iprot.readStructEnd()
+
+ def write(self, oprot):
+ if oprot._fast_encode is not None and self.thrift_spec is not None:
+ oprot.trans.write(oprot._fast_encode(self, [self.__class__, self.thrift_spec]))
+ return
+ oprot.writeStructBegin('Compact_result')
+ if self.success is not None:
+ oprot.writeFieldBegin('success', TType.STRUCT, 0)
+ self.success.write(oprot)
+ oprot.writeFieldEnd()
+ oprot.writeFieldStop()
+ oprot.writeStructEnd()
+
+ def validate(self):
+ return
+
+ def __repr__(self):
+ L = ['%s=%r' % (key, value)
+ for key, value in self.__dict__.items()]
+ return '%s(%s)' % (self.__class__.__name__, ', '.join(L))
+
+ def __eq__(self, other):
+ return isinstance(other, self.__class__) and self.__dict__ == other.__dict__
+
+ def __ne__(self, other):
+ return not (self == other)
+all_structs.append(Compact_result)
+Compact_result.thrift_spec = (
+ (0, TType.STRUCT, 'success', [CommonResponse, None], None, ), # 0
+)
fix_spec(all_structs)
del all_structs
diff --git a/python/infinity_sdk/infinity/remote_thrift/infinity_thrift_rpc/ttypes.py b/python/infinity_sdk/infinity/remote_thrift/infinity_thrift_rpc/ttypes.py
index 62bb260d07..506b5f7711 100644
--- a/python/infinity_sdk/infinity/remote_thrift/infinity_thrift_rpc/ttypes.py
+++ b/python/infinity_sdk/infinity/remote_thrift/infinity_thrift_rpc/ttypes.py
@@ -6747,6 +6747,7 @@ class SelectRequest(object):
- limit_expr
- offset_expr
- order_by_list
+ - total_hits_count
"""
@@ -6755,7 +6756,7 @@ def __init__(self, session_id=None, db_name=None, table_name=None, select_list=[
], highlight_list=[
], search_expr=None, where_expr=None, group_by_list=[
], having_expr=None, limit_expr=None, offset_expr=None, order_by_list=[
- ],):
+ ], total_hits_count=None,):
self.session_id = session_id
self.db_name = db_name
self.table_name = table_name
@@ -6780,6 +6781,7 @@ def __init__(self, session_id=None, db_name=None, table_name=None, select_list=[
order_by_list = [
]
self.order_by_list = order_by_list
+ self.total_hits_count = total_hits_count
def read(self, iprot):
if iprot._fast_decode is not None and isinstance(iprot.trans, TTransport.CReadableTransport) and self.thrift_spec is not None:
@@ -6879,6 +6881,11 @@ def read(self, iprot):
iprot.readListEnd()
else:
iprot.skip(ftype)
+ elif fid == 13:
+ if ftype == TType.BOOL:
+ self.total_hits_count = iprot.readBool()
+ else:
+ iprot.skip(ftype)
else:
iprot.skip(ftype)
iprot.readFieldEnd()
@@ -6949,6 +6956,10 @@ def write(self, oprot):
iter384.write(oprot)
oprot.writeListEnd()
oprot.writeFieldEnd()
+ if self.total_hits_count is not None:
+ oprot.writeFieldBegin('total_hits_count', TType.BOOL, 13)
+ oprot.writeBool(self.total_hits_count)
+ oprot.writeFieldEnd()
oprot.writeFieldStop()
oprot.writeStructEnd()
@@ -6974,13 +6985,14 @@ class SelectResponse(object):
- error_msg
- column_defs
- column_fields
+ - extra_result
"""
def __init__(self, error_code=None, error_msg=None, column_defs=[
], column_fields=[
- ],):
+ ], extra_result=None,):
self.error_code = error_code
self.error_msg = error_msg
if column_defs is self.thrift_spec[3][4]:
@@ -6991,6 +7003,7 @@ def __init__(self, error_code=None, error_msg=None, column_defs=[
column_fields = [
]
self.column_fields = column_fields
+ self.extra_result = extra_result
def read(self, iprot):
if iprot._fast_decode is not None and isinstance(iprot.trans, TTransport.CReadableTransport) and self.thrift_spec is not None:
@@ -7033,6 +7046,11 @@ def read(self, iprot):
iprot.readListEnd()
else:
iprot.skip(ftype)
+ elif fid == 5:
+ if ftype == TType.STRING:
+ self.extra_result = iprot.readString().decode('utf-8', errors='replace') if sys.version_info[0] == 2 else iprot.readString()
+ else:
+ iprot.skip(ftype)
else:
iprot.skip(ftype)
iprot.readFieldEnd()
@@ -7065,6 +7083,10 @@ def write(self, oprot):
iter398.write(oprot)
oprot.writeListEnd()
oprot.writeFieldEnd()
+ if self.extra_result is not None:
+ oprot.writeFieldBegin('extra_result', TType.STRING, 5)
+ oprot.writeString(self.extra_result.encode('utf-8') if sys.version_info[0] == 2 else self.extra_result)
+ oprot.writeFieldEnd()
oprot.writeFieldStop()
oprot.writeStructEnd()
@@ -8840,6 +8862,85 @@ def __eq__(self, other):
def __ne__(self, other):
return not (self == other)
+
+
+class CompactRequest(object):
+ """
+ Attributes:
+ - session_id
+ - db_name
+ - table_name
+
+ """
+
+
+ def __init__(self, session_id=None, db_name=None, table_name=None,):
+ self.session_id = session_id
+ self.db_name = db_name
+ self.table_name = table_name
+
+ def read(self, iprot):
+ if iprot._fast_decode is not None and isinstance(iprot.trans, TTransport.CReadableTransport) and self.thrift_spec is not None:
+ iprot._fast_decode(self, iprot, [self.__class__, self.thrift_spec])
+ return
+ iprot.readStructBegin()
+ while True:
+ (fname, ftype, fid) = iprot.readFieldBegin()
+ if ftype == TType.STOP:
+ break
+ if fid == 1:
+ if ftype == TType.I64:
+ self.session_id = iprot.readI64()
+ else:
+ iprot.skip(ftype)
+ elif fid == 2:
+ if ftype == TType.STRING:
+ self.db_name = iprot.readString().decode('utf-8', errors='replace') if sys.version_info[0] == 2 else iprot.readString()
+ else:
+ iprot.skip(ftype)
+ elif fid == 3:
+ if ftype == TType.STRING:
+ self.table_name = iprot.readString().decode('utf-8', errors='replace') if sys.version_info[0] == 2 else iprot.readString()
+ else:
+ iprot.skip(ftype)
+ else:
+ iprot.skip(ftype)
+ iprot.readFieldEnd()
+ iprot.readStructEnd()
+
+ def write(self, oprot):
+ if oprot._fast_encode is not None and self.thrift_spec is not None:
+ oprot.trans.write(oprot._fast_encode(self, [self.__class__, self.thrift_spec]))
+ return
+ oprot.writeStructBegin('CompactRequest')
+ if self.session_id is not None:
+ oprot.writeFieldBegin('session_id', TType.I64, 1)
+ oprot.writeI64(self.session_id)
+ oprot.writeFieldEnd()
+ if self.db_name is not None:
+ oprot.writeFieldBegin('db_name', TType.STRING, 2)
+ oprot.writeString(self.db_name.encode('utf-8') if sys.version_info[0] == 2 else self.db_name)
+ oprot.writeFieldEnd()
+ if self.table_name is not None:
+ oprot.writeFieldBegin('table_name', TType.STRING, 3)
+ oprot.writeString(self.table_name.encode('utf-8') if sys.version_info[0] == 2 else self.table_name)
+ oprot.writeFieldEnd()
+ oprot.writeFieldStop()
+ oprot.writeStructEnd()
+
+ def validate(self):
+ return
+
+ def __repr__(self):
+ L = ['%s=%r' % (key, value)
+ for key, value in self.__dict__.items()]
+ return '%s(%s)' % (self.__class__.__name__, ', '.join(L))
+
+ def __eq__(self, other):
+ return isinstance(other, self.__class__) and self.__dict__ == other.__dict__
+
+ def __ne__(self, other):
+ return not (self == other)
all_structs.append(Property)
Property.thrift_spec = (
None, # 0
@@ -9392,6 +9493,7 @@ def __ne__(self, other):
(11, TType.STRUCT, 'offset_expr', [ParsedExpr, None], None, ), # 11
(12, TType.LIST, 'order_by_list', (TType.STRUCT, [OrderByExpr, None], False), [
], ), # 12
+ (13, TType.BOOL, 'total_hits_count', None, None, ), # 13
)
all_structs.append(SelectResponse)
SelectResponse.thrift_spec = (
@@ -9402,6 +9504,7 @@ def __ne__(self, other):
], ), # 3
(4, TType.LIST, 'column_fields', (TType.STRUCT, [ColumnField, None], False), [
], ), # 4
+ (5, TType.STRING, 'extra_result', 'UTF8', None, ), # 5
)
all_structs.append(DeleteRequest)
DeleteRequest.thrift_spec = (
@@ -9559,5 +9662,12 @@ def __ne__(self, other):
(1, TType.I64, 'session_id', None, None, ), # 1
(2, TType.STRING, 'flush_type', 'UTF8', None, ), # 2
)
+all_structs.append(CompactRequest)
+CompactRequest.thrift_spec = (
+ None, # 0
+ (1, TType.I64, 'session_id', None, None, ), # 1
+ (2, TType.STRING, 'db_name', 'UTF8', None, ), # 2
+ (3, TType.STRING, 'table_name', 'UTF8', None, ), # 3
+)
fix_spec(all_structs)
del all_structs
diff --git a/python/infinity_sdk/infinity/remote_thrift/query_builder.py b/python/infinity_sdk/infinity/remote_thrift/query_builder.py
index 4beb15e62f..c6ac98289d 100644
--- a/python/infinity_sdk/infinity/remote_thrift/query_builder.py
+++ b/python/infinity_sdk/infinity/remote_thrift/query_builder.py
@@ -48,6 +48,7 @@ def __init__(
limit: Optional[ParsedExpr],
offset: Optional[ParsedExpr],
sort: Optional[List[OrderByExpr]],
+ total_hits_count: Optional[bool]
):
self.columns = columns
self.highlight = highlight
@@ -57,6 +58,7 @@ def __init__(
self.limit = limit
self.offset = offset
self.sort = sort
+ self.total_hits_count = total_hits_count
class ExplainQuery(Query):
@@ -72,7 +74,7 @@ def __init__(
sort: Optional[List[OrderByExpr]],
explain_type: Optional[ExplainType],
):
- super().__init__(columns, highlight, search, filter, groupby, limit, offset, sort)
+ super().__init__(columns, highlight, search, filter, groupby, limit, offset, sort, False)
self.explain_type = explain_type
@@ -87,6 +89,7 @@ def __init__(self, table):
self._limit = None
self._offset = None
self._sort = None
+ self._total_hits_count = None
def reset(self):
self._columns = None
@@ -97,6 +100,7 @@ def reset(self):
self._limit = None
self._offset = None
self._sort = None
+ self._total_hits_count = None
def match_dense(
self,
@@ -353,6 +357,16 @@ def output(self, columns: Optional[list]) -> InfinityThriftQueryBuilder:
expr_type = ParsedExprType(function_expr=func_expr)
parsed_expr = ParsedExpr(type=expr_type)
select_list.append(parsed_expr)
+ case "_create_timestamp":
+ func_expr = FunctionExpr(function_name="create_timestamp", arguments=[])
+ expr_type = ParsedExprType(function_expr=func_expr)
+ parsed_expr = ParsedExpr(type=expr_type)
+ select_list.append(parsed_expr)
+ case "_delete_timestamp":
+ func_expr = FunctionExpr(function_name="delete_timestamp", arguments=[])
+ expr_type = ParsedExprType(function_expr=func_expr)
+ parsed_expr = ParsedExpr(type=expr_type)
+ select_list.append(parsed_expr)
case "_score":
func_expr = FunctionExpr(function_name="score", arguments=[])
expr_type = ParsedExprType(function_expr=func_expr)
@@ -368,6 +382,21 @@ def output(self, columns: Optional[list]) -> InfinityThriftQueryBuilder:
expr_type = ParsedExprType(function_expr=func_expr)
parsed_expr = ParsedExpr(type=expr_type)
select_list.append(parsed_expr)
+ case "_score_factors":
+ func_expr = FunctionExpr(function_name="score_factors", arguments=[])
+ expr_type = ParsedExprType(function_expr=func_expr)
+ parsed_expr = ParsedExpr(type=expr_type)
+ select_list.append(parsed_expr)
+ case "_similarity_factors":
+ func_expr = FunctionExpr(function_name="similarity_factors", arguments=[])
+ expr_type = ParsedExprType(function_expr=func_expr)
+ parsed_expr = ParsedExpr(type=expr_type)
+ select_list.append(parsed_expr)
+ case "_distance_factors":
+ func_expr = FunctionExpr(function_name="distance_factors", arguments=[])
+ expr_type = ParsedExprType(function_expr=func_expr)
+ parsed_expr = ParsedExpr(type=expr_type)
+ select_list.append(parsed_expr)
case _:
select_list.append(parse_expr(maybe_parse(column)))
@@ -384,6 +413,12 @@ def highlight(self, columns: Optional[list]) -> InfinityThriftQueryBuilder:
self._highlight = highlight_list
return self
+ def option(self, option_kv: {}):
+ if 'total_hits_count' in option_kv:
+ if isinstance(option_kv['total_hits_count'], bool):
+ self._total_hits_count = option_kv['total_hits_count']
+ return self
+
def sort(self, order_by_expr_list: Optional[List[list[str, SortType]]]) -> InfinityThriftQueryBuilder:
sort_list: List[OrderByExpr] = []
for order_by_expr in order_by_expr_list:
@@ -407,6 +442,20 @@ def sort(self, order_by_expr_list: Optional[List[list[str, SortType]]]) -> Infin
order_by_flag: bool = order_by_expr[1] == SortType.Asc
order_by_expr = OrderByExpr(expr=parsed_expr, asc=order_by_flag)
sort_list.append(order_by_expr)
+ case "_create_timestamp":
+ func_expr = FunctionExpr(function_name="create_timestamp", arguments=[])
+ expr_type = ParsedExprType(function_expr=func_expr)
+ parsed_expr = ParsedExpr(type=expr_type)
+ order_by_flag: bool = order_by_expr[1] == SortType.Asc
+ order_by_expr = OrderByExpr(expr=parsed_expr, asc=order_by_flag)
+ sort_list.append(order_by_expr)
+ case "_delete_timestamp":
+ func_expr = FunctionExpr(function_name="delete_timestamp", arguments=[])
+ expr_type = ParsedExprType(function_expr=func_expr)
+ parsed_expr = ParsedExpr(type=expr_type)
+ order_by_flag: bool = order_by_expr[1] == SortType.Asc
+ order_by_expr = OrderByExpr(expr=parsed_expr, asc=order_by_flag)
+ sort_list.append(order_by_expr)
case "_score":
func_expr = FunctionExpr(function_name="score", arguments=[])
expr_type = ParsedExprType(function_expr=func_expr)
@@ -428,6 +477,27 @@ def sort(self, order_by_expr_list: Optional[List[list[str, SortType]]]) -> Infin
order_by_flag: bool = order_by_expr[1] == SortType.Asc
order_by_expr = OrderByExpr(expr=parsed_expr, asc=order_by_flag)
sort_list.append(order_by_expr)
+ case "_score_factors":
+ func_expr = FunctionExpr(function_name="score_factors", arguments=[])
+ expr_type = ParsedExprType(function_expr=func_expr)
+ parsed_expr = ParsedExpr(type=expr_type)
+ order_by_flag: bool = order_by_expr[1] == SortType.Asc
+ order_by_expr = OrderByExpr(expr=parsed_expr, asc=order_by_flag)
+ sort_list.append(order_by_expr)
+ case "_similarity_factors":
+ func_expr = FunctionExpr(function_name="similarity_factors", arguments=[])
+ expr_type = ParsedExprType(function_expr=func_expr)
+ parsed_expr = ParsedExpr(type=expr_type)
+ order_by_flag: bool = order_by_expr[1] == SortType.Asc
+ order_by_expr = OrderByExpr(expr=parsed_expr, asc=order_by_flag)
+ sort_list.append(order_by_expr)
+ case "_distance_factors":
+ func_expr = FunctionExpr(function_name="distance_factors", arguments=[])
+ expr_type = ParsedExprType(function_expr=func_expr)
+ parsed_expr = ParsedExpr(type=expr_type)
+ order_by_flag: bool = order_by_expr[1] == SortType.Asc
+ order_by_expr = OrderByExpr(expr=parsed_expr, asc=order_by_flag)
+ sort_list.append(order_by_expr)
case _:
parsed_expr = parse_expr(maybe_parse(order_by_expr_str))
order_by_flag: bool = order_by_expr[1] == SortType.Asc
@@ -436,7 +506,7 @@ def sort(self, order_by_expr_list: Optional[List[list[str, SortType]]]) -> Infin
self._sort = sort_list
return self
- def to_result(self) -> tuple[dict[str, list[Any]], dict[str, Any]]:
+ def to_result(self) -> tuple[dict[str, list[Any]], dict[str, Any], {}]:
query = Query(
columns=self._columns,
highlight=self._highlight,
@@ -446,23 +516,26 @@ def to_result(self) -> tuple[dict[str, list[Any]], dict[str, Any]]:
limit=self._limit,
offset=self._offset,
sort=self._sort,
+ total_hits_count=self._total_hits_count,
)
self.reset()
return self._table._execute_query(query)
- def to_df(self) -> pd.DataFrame:
+ def to_df(self) -> (pd.DataFrame, {}):
df_dict = {}
- data_dict, data_type_dict = self.to_result()
+ data_dict, data_type_dict, extra_result = self.to_result()
for k, v in data_dict.items():
data_series = pd.Series(v, dtype=logic_type_to_dtype(data_type_dict[k]))
df_dict[k] = data_series
- return pd.DataFrame(df_dict)
+ return pd.DataFrame(df_dict), extra_result
- def to_pl(self) -> pl.DataFrame:
- return pl.from_pandas(self.to_df())
+ def to_pl(self) -> (pl.DataFrame, {}):
+ dataframe, extra_result = self.to_df()
+ return pl.from_pandas(dataframe), extra_result
- def to_arrow(self) -> Table:
- return pa.Table.from_pandas(self.to_df())
+ def to_arrow(self) -> (Table, {}):
+ dataframe, extra_result = self.to_df()
+ return pa.Table.from_pandas(dataframe), extra_result
def explain(self, explain_type=ExplainType.Physical) -> Any:
query = ExplainQuery(
diff --git a/python/infinity_sdk/infinity/remote_thrift/table.py b/python/infinity_sdk/infinity/remote_thrift/table.py
index 284341daf6..3ff29cc695 100644
--- a/python/infinity_sdk/infinity/remote_thrift/table.py
+++ b/python/infinity_sdk/infinity/remote_thrift/table.py
@@ -62,7 +62,7 @@ def wrapper(*args, **kwargs):
@name_validity_check("index_name", "Index")
def create_index(self, index_name: str, index_info: IndexInfo,
- conflict_type: ConflictType = ConflictType.Error, index_comment = ""):
+ conflict_type: ConflictType = ConflictType.Error, index_comment=""):
index_name = index_name.strip()
index_info_to_use = index_info.to_ttype()
@@ -400,10 +400,15 @@ def sort(self, order_by_expr_list: Optional[List[list[str, SortType]]]):
raise InfinityException(ErrorCode.INVALID_PARAMETER_VALUE,
"order_by_expr_list must be a list of [column_name, sort_type]")
if order_by_expr[1] not in [SortType.Asc, SortType.Desc]:
- raise InfinityException(ErrorCode.INVALID_PARAMETER_VALUE, "sort_type must be SortType.Asc or SortType.Desc")
+ raise InfinityException(ErrorCode.INVALID_PARAMETER_VALUE,
+ "sort_type must be SortType.Asc or SortType.Desc")
self.query_builder.sort(order_by_expr_list)
return self
+ def option(self, option_kv: {}):
+ self.query_builder.option(option_kv)
+ return self
+
def to_result(self):
return self.query_builder.to_result()
@@ -438,6 +443,9 @@ def drop_columns(self, column_names: list[str] | str):
return self._conn.drop_columns(db_name=self._db_name, table_name=self._table_name, column_names=column_names)
+ def compact(self):
+ return self._conn.compact(db_name=self._db_name, table_name=self._table_name)
+
def _execute_query(self, query: Query) -> tuple[dict[str, list[Any]], dict[str, Any]]:
# execute the query
@@ -450,7 +458,8 @@ def _execute_query(self, query: Query) -> tuple[dict[str, list[Any]], dict[str,
group_by_list=None,
limit_expr=query.limit,
offset_expr=query.offset,
- order_by_list=query.sort)
+ order_by_list=query.sort,
+ total_hits_count=query.total_hits_count)
# process the results
if res.error_code == ErrorCode.OK:
diff --git a/python/infinity_sdk/infinity/remote_thrift/types.py b/python/infinity_sdk/infinity/remote_thrift/types.py
index b535c943e0..08f7b866a5 100644
--- a/python/infinity_sdk/infinity/remote_thrift/types.py
+++ b/python/infinity_sdk/infinity/remote_thrift/types.py
@@ -13,11 +13,12 @@
# limitations under the License.
import struct
+import json
import numpy as np
from infinity.common import VEC, SparseVector, InfinityException
from infinity.remote_thrift.infinity_thrift_rpc.ttypes import *
from collections import defaultdict
-from typing import Any, Tuple, Dict, List, Optional
+from typing import Any, Optional
from datetime import date, time, datetime, timedelta
import polars as pl
@@ -173,14 +174,16 @@ def column_vector_to_list(column_type: ttypes.ColumnType, column_data_type: ttyp
case _:
raise NotImplementedError(f"Unsupported type {column_type}")
+
def parse_date_bytes(column_vector):
parsed_list = list(struct.unpack('<{}i'.format(len(column_vector) // 4), column_vector))
date_list = []
epoch = date(1970, 1, 1)
- for value in parsed_list:
- date_list.append((epoch + timedelta(days = value)).strftime('%Y-%m-%d'))
+ for value in parsed_list:
+ date_list.append((epoch + timedelta(days=value)).strftime('%Y-%m-%d'))
return date_list
+
def parse_time_bytes(column_vector):
parsed_list = list(struct.unpack('<{}i'.format(len(column_vector) // 4), column_vector))
time_list = []
@@ -191,15 +194,18 @@ def parse_time_bytes(column_vector):
time_list.append(time(hour=hours, minute=minutes, second=seconds).strftime('%H:%M:%S'))
return time_list
+
def parse_datetime_bytes(column_vector):
parsed_list = list(struct.unpack('<{}i'.format(len(column_vector) // 4), column_vector))
datetime_list = []
epoch = datetime(1970, 1, 1)
for i in range(0, len(parsed_list), 2):
if i + 1 < len(parsed_list):
- datetime_list.append((epoch + timedelta(days = parsed_list[i], seconds = parsed_list[i + 1])).strftime('%Y-%m-%d %H:%M:%S'));
+ datetime_list.append(
+ (epoch + timedelta(days=parsed_list[i], seconds=parsed_list[i + 1])).strftime('%Y-%m-%d %H:%M:%S'));
return datetime_list
+
def parse_interval_bytes(column_vector):
parsed_list = list(struct.unpack('<{}i'.format(len(column_vector) // 4), column_vector))
interval_list = []
@@ -207,6 +213,7 @@ def parse_interval_bytes(column_vector):
interval_list.append(str(timedelta(seconds=value).total_seconds()) + 's')
return interval_list
+
def parse_bytes(bytes_data):
results = []
offset = 0
@@ -298,6 +305,7 @@ def tensor_to_list(column_data_type: ttypes.DataType, binary_data) -> list[list[
raise NotImplementedError(
f"Unsupported type {column_data_type.physical_type.embedding_type.element_type}")
+
def parse_sparse_bytes(column_data_type: ttypes.DataType, column_vector):
dimension = column_data_type.physical_type.sparse_type.dimension
element_type = column_data_type.physical_type.sparse_type.element_type
@@ -374,7 +382,7 @@ def find_data_type(column_name: str, column_defs: list[ttypes.ColumnDef]) -> tty
raise KeyError(f"column name {column_name} not found in column defs")
-def build_result(res: ttypes.SelectResponse) -> tuple[dict[str | Any, list[Any, Any]], dict[str | Any, Any]]:
+def build_result(res: ttypes.SelectResponse) -> tuple[dict[str | Any, list[Any, Any]], dict[str | Any, Any], {}]:
data_dict = {}
data_type_dict = {}
column_counter = defaultdict(int)
@@ -394,7 +402,14 @@ def build_result(res: ttypes.SelectResponse) -> tuple[dict[str | Any, list[Any,
data_dict[column_name] = data_list
data_type_dict[column_name] = column_data_type
- return data_dict, data_type_dict
+ extra_result = None
+ if res.extra_result is not None:
+ try:
+ extra_result = json.loads(res.extra_result)
+ except json.JSONDecodeError:
+ pass
+
+ return data_dict, data_type_dict, extra_result
def make_match_tensor_expr(vector_column_name: str, embedding_data: VEC, embedding_data_type: str, method_type: str,
@@ -483,6 +498,7 @@ def make_match_sparse_expr(vector_column_name: str, sparse_data: SparseVector |
for k, v in opt_params.items():
match_sparse_options.append(InitParameter(param_name=k, param_value=v))
- match_sparse_expr = MatchSparseExpr(column_expr=column_expr, query_sparse_expr=query_sparse_expr, metric_type=metric_type,
+ match_sparse_expr = MatchSparseExpr(column_expr=column_expr, query_sparse_expr=query_sparse_expr,
+ metric_type=metric_type,
topn=topn, opt_params=match_sparse_options, filter_expr=filter_expr)
return match_sparse_expr
diff --git a/python/infinity_sdk/infinity/remote_thrift/utils.py b/python/infinity_sdk/infinity/remote_thrift/utils.py
index 11baf3576b..5e7bed11ff 100644
--- a/python/infinity_sdk/infinity/remote_thrift/utils.py
+++ b/python/infinity_sdk/infinity/remote_thrift/utils.py
@@ -15,6 +15,7 @@
import re
import functools
import inspect
+from typing import Any
import pandas as pd
import polars as pl
from sqlglot import condition
@@ -25,7 +26,6 @@
from infinity.utils import binary_exp_to_paser_exp
from infinity.common import InfinityException, SparseVector
from infinity.errors import ErrorCode
-from datetime import date, time, datetime, timedelta
def traverse_conditions(cons, fn=None) -> ttypes.ParsedExpr:
@@ -74,6 +74,16 @@ def traverse_conditions(cons, fn=None) -> ttypes.ParsedExpr:
expr_type = ttypes.ParsedExprType(function_expr=func_expr)
parsed_expr = ttypes.ParsedExpr(type=expr_type)
return parsed_expr
+ case "_create_timestamp":
+ func_expr = ttypes.FunctionExpr(function_name="create_timestamp", arguments=[])
+ expr_type = ttypes.ParsedExprType(function_expr=func_expr)
+ parsed_expr = ttypes.ParsedExpr(type=expr_type)
+ return parsed_expr
+ case "_delete_timestamp":
+ func_expr = ttypes.FunctionExpr(function_name="delete_timestamp", arguments=[])
+ expr_type = ttypes.ParsedExprType(function_expr=func_expr)
+ parsed_expr = ttypes.ParsedExpr(type=expr_type)
+ return parsed_expr
case "_score":
func_expr = ttypes.FunctionExpr(function_name="score", arguments=[])
expr_type = ttypes.ParsedExprType(function_expr=func_expr)
@@ -89,6 +99,21 @@ def traverse_conditions(cons, fn=None) -> ttypes.ParsedExpr:
expr_type = ttypes.ParsedExprType(function_expr=func_expr)
parsed_expr = ttypes.ParsedExpr(type=expr_type)
return parsed_expr
+ case "_score_factors":
+ func_expr = ttypes.FunctionExpr(function_name="score_factors", arguments=[])
+ expr_type = ttypes.ParsedExprType(function_expr=func_expr)
+ parsed_expr = ttypes.ParsedExpr(type=expr_type)
+ return parsed_expr
+ case "_similarity_factors":
+ func_expr = ttypes.FunctionExpr(function_name="similarity_factors", arguments=[])
+ expr_type = ttypes.ParsedExprType(function_expr=func_expr)
+ parsed_expr = ttypes.ParsedExpr(type=expr_type)
+ return parsed_expr
+ case "_distance_factors":
+ func_expr = ttypes.FunctionExpr(function_name="distance_factors", arguments=[])
+ expr_type = ttypes.ParsedExprType(function_expr=func_expr)
+ parsed_expr = ttypes.ParsedExpr(type=expr_type)
+ return parsed_expr
case _:
parsed_expr = ttypes.ParsedExpr()
column_expr = ttypes.ColumnExpr()
@@ -384,9 +409,9 @@ def wrapper(*args, **kwargs):
return decorator
-def select_res_to_polars(res) -> pl.DataFrame:
+def select_res_to_polars(res) -> (pl.DataFrame, Any):
df_dict = {}
- data_dict, data_type_dict = build_result(res)
+ data_dict, data_type_dict, extra_result = build_result(res)
for k, v in data_dict.items():
data_series = pd.Series(v, dtype=logic_type_to_dtype(data_type_dict[k]))
df_dict[k] = data_series
diff --git a/python/infinity_sdk/infinity/table.py b/python/infinity_sdk/infinity/table.py
index 10393991ce..a2205ad629 100644
--- a/python/infinity_sdk/infinity/table.py
+++ b/python/infinity_sdk/infinity/table.py
@@ -21,6 +21,7 @@
from infinity.common import InfinityException, INSERT_DATA
from infinity.errors import ErrorCode
+
class ExplainType(Enum):
Analyze = 1
Ast = 2
@@ -46,4 +47,4 @@ def to_ttype(self):
elif self is ExplainType.Fragment:
return ttypes.ExplainType.Fragment
else:
- raise InfinityException(ErrorCode.INVALID_EXPLAIN_TYPE, "Unknown explain type")
\ No newline at end of file
+ raise InfinityException(ErrorCode.INVALID_EXPLAIN_TYPE, "Unknown explain type")
diff --git a/python/infinity_sdk/infinity/utils.py b/python/infinity_sdk/infinity/utils.py
index 2514dfe445..64d7b82101 100644
--- a/python/infinity_sdk/infinity/utils.py
+++ b/python/infinity_sdk/infinity/utils.py
@@ -47,5 +47,6 @@ def binary_exp_to_paser_exp(binary_expr_key) -> str:
else:
raise InfinityException(ErrorCode.INVALID_EXPRESSION, f"unknown binary expression: {binary_expr_key}")
+
def deprecated_api(message):
warnings.warn(message, DeprecationWarning, stacklevel=2)
diff --git a/python/infinity_sdk/pyproject.toml b/python/infinity_sdk/pyproject.toml
index 92f7574c5e..50386f3aaf 100644
--- a/python/infinity_sdk/pyproject.toml
+++ b/python/infinity_sdk/pyproject.toml
@@ -1,6 +1,6 @@
[project]
name = "infinity-sdk"
-version = "0.5.0.dev5"
+version = "0.5.0.dev6"
requires-python = ">=3.10"
dependencies = [
"sqlglot~=11.7.0",
diff --git a/python/parallel_test/test_chaos.py b/python/parallel_test/test_chaos.py
index e39de5ea5c..44e0715fea 100644
--- a/python/parallel_test/test_chaos.py
+++ b/python/parallel_test/test_chaos.py
@@ -82,13 +82,13 @@ def read_out_data():
def search_fulltext(table_obj):
- res = table_obj.output(["index", "body", "other_vector", "_row_id", "_score"]).match_text(
+ res, extra_result = table_obj.output(["index", "body", "other_vector", "_row_id", "_score"]).match_text(
"body^5", "harmful chemical", 3).to_pl()
print(res)
def search_vector(table_obj):
- res = table_obj.output(["*"]).match_dense("other_vector", [2] * 4, "float", "l2", 3).to_pl()
+ res, extra_result = table_obj.output(["*"]).match_dense("other_vector", [2] * 4, "float", "l2", 3).to_pl()
print(res)
diff --git a/python/parallel_test/test_ddl_and_insert_delete.py b/python/parallel_test/test_ddl_and_insert_delete.py
index 5c32e3d892..22f6418adf 100644
--- a/python/parallel_test/test_ddl_and_insert_delete.py
+++ b/python/parallel_test/test_ddl_and_insert_delete.py
@@ -77,7 +77,7 @@ def insert(db_obj: Database):
value.append({"tag": random.randint(0, 9),
"c1": [random.random(), random.random(), random.random(), random.random()]})
table_obj.insert(value)
- res = table_obj.output(['*']).to_df()
+ res, extra_result = table_obj.output(['*']).to_df()
print(res)
except Exception as e:
return
diff --git a/python/parallel_test/test_index_parallel.py b/python/parallel_test/test_index_parallel.py
index b9d23e3750..732c867dec 100644
--- a/python/parallel_test/test_index_parallel.py
+++ b/python/parallel_test/test_index_parallel.py
@@ -52,7 +52,7 @@ def read_worker(connection_pool: ConnectionPool, end_time):
table_obj = db_obj.get_table("test_fulltext_index_parallel")
while time.time() < end_time:
- res = table_obj.output(["doctitle", "docdate", "_row_id", "_score"]).match_text(
+ res, extra_result = table_obj.output(["doctitle", "docdate", "_row_id", "_score"]).match_text(
"body^5", "harmful chemical", 3).to_pl()
print(res)
time.sleep(0.1)
@@ -149,7 +149,7 @@ def test_vector_index_single_thread(self, get_infinity_connection_pool, index_ty
print("begin import")
table_obj.import_data(file_path)
print("import complete")
- res = table_obj.output(["variant_id"]).match_dense(
+ res, extra_result = table_obj.output(["variant_id"]).match_dense(
knn_column_name, [1] * 4, "float", knn_distance_type, 5).to_pl()
print(res)
@@ -199,7 +199,7 @@ def read_worker(connection_pool: ConnectionPool, end_time, knn_column_name, knn_
table_obj = db_obj.get_table("test_vector_index_parallel")
while time.time() < end_time:
- res = table_obj.output(["variant_id"]).match_dense(
+ res, extra_result = table_obj.output(["variant_id"]).match_dense(
knn_column_name, [1] * 4, "float", knn_distance_type, 5).to_pl()
print(res)
time.sleep(0.1)
@@ -313,7 +313,7 @@ def query_worker(connection_pool: ConnectionPool, table_name, end_time, thread_i
while time.time() < end_time:
try:
- res = table_obj.output(["doctitle", "docdate", "_row_id", "_score"]).match_text(
+ res, extra_result = table_obj.output(["doctitle", "docdate", "_row_id", "_score"]).match_text(
"body^5", "harmful chemical", 3).to_pl()
# print(f"thread {thread_id}: check result:\n{res}")
self.logger.info(f"thread {thread_id}: check result:\n{res}")
diff --git a/python/parallel_test/test_insert_delete_parallel.py b/python/parallel_test/test_insert_delete_parallel.py
index 2e8a39fbd0..64db2968bf 100644
--- a/python/parallel_test/test_insert_delete_parallel.py
+++ b/python/parallel_test/test_insert_delete_parallel.py
@@ -46,7 +46,7 @@ def test_insert_and_delete_parallel(self, get_infinity_connection_pool):
infinity_obj = connection_pool.get_conn()
db_obj = infinity_obj.get_database(db_name)
table_obj = db_obj.get_table(table_name)
- res = table_obj.output(["*"]).to_df()
+ res, extra_result = table_obj.output(["*"]).to_df()
assert len(res) == 0
res = db_obj.drop_table(table_name, ConflictType.Error)
diff --git a/python/parallel_test/test_insert_delete_parallel_simple.py b/python/parallel_test/test_insert_delete_parallel_simple.py
index 33ef407fa3..eef92b64de 100644
--- a/python/parallel_test/test_insert_delete_parallel_simple.py
+++ b/python/parallel_test/test_insert_delete_parallel_simple.py
@@ -40,7 +40,7 @@ def test_insert_and_delete_parallel_simple(self, get_infinity_connection_pool):
infinity_obj = connection_pool.get_conn()
db_obj = infinity_obj.get_database(db_name)
table_obj = db_obj.get_table(table_name)
- res = table_obj.output(["*"]).to_df()
+ res, extra_result = table_obj.output(["*"]).to_df()
print(res)
assert len(res) == 0
res = db_obj.drop_table(table_name, ConflictType.Error)
diff --git a/python/parallel_test/test_insert_delete_update.py b/python/parallel_test/test_insert_delete_update.py
index e606261dbc..da3304c295 100644
--- a/python/parallel_test/test_insert_delete_update.py
+++ b/python/parallel_test/test_insert_delete_update.py
@@ -35,7 +35,7 @@ def test_insert_delete_update_parallel_vec(self, get_infinity_connection_pool):
infinity_obj = connection_pool.get_conn()
db_obj = infinity_obj.get_database(db_name)
table_obj = db_obj.get_table(table_name)
- res = table_obj.output(['*']).to_df()
+ res, extra_result = table_obj.output(['*']).to_df()
print(res)
res = db_obj.drop_table(table_name, ConflictType.Error)
assert res.error_code == ErrorCode.OK
@@ -67,7 +67,7 @@ def updata(table_obj):
def search(table_obj):
- res = table_obj.output(['*']).to_df()
+ res, extra_result = table_obj.output(['*']).to_df()
print(res)
diff --git a/python/parallel_test/test_insert_parallel.py b/python/parallel_test/test_insert_parallel.py
index c430ebbd9f..5f8b81888f 100644
--- a/python/parallel_test/test_insert_parallel.py
+++ b/python/parallel_test/test_insert_parallel.py
@@ -47,7 +47,7 @@ def test_insert_parallel(self, get_infinity_connection_pool):
db_obj = infinity_obj.get_database("default_db")
table_obj = db_obj.get_table("parallel_insert_test")
- res = table_obj.output(['*']).to_df()
+ res, extra_result = table_obj.output(['*']).to_df()
print(res)
assert len(res) == total_row_count
@@ -91,7 +91,7 @@ def test_insert_one_thread(self, get_infinity_connection_pool):
table_obj.insert(value)
value.clear()
print(f"test_insert_one_thread: cost {time.time() - start_ts} s")
- res = table_obj.output(['*']).to_df()
+ res, extra_result = table_obj.output(['*']).to_df()
print(res)
assert len(res) == total_row_count
@@ -141,7 +141,7 @@ def test_insert_and_count_star_parallel(self, get_infinity_connection_pool):
db_obj = infinity_obj.get_database("default_db")
table_obj = db_obj.get_table("parallel_insert_test")
- res = table_obj.output(['*']).to_df()
+ res, extra_result = table_obj.output(['*']).to_df()
print(res)
assert len(res) == total_row_count
diff --git a/python/restart_test/restart_util.py b/python/restart_test/restart_util.py
index d9afd7e585..38cb9ea812 100644
--- a/python/restart_test/restart_util.py
+++ b/python/restart_test/restart_util.py
@@ -191,6 +191,8 @@ def index():
def import_file() -> str:
base_filepath = "test/data/jsonl/test_table.jsonl"
filepath = "test/data/jsonl/test_table_gen.jsonl"
+ if os.path.exists(filepath):
+ return filepath
if not os.path.exists("test/data/jsonl"):
os.makedirs("test/data/jsonl")
with open(base_filepath, "r") as base_file:
diff --git a/python/restart_test/test_alter.py b/python/restart_test/test_alter.py
index 9ae5a68722..507456d698 100644
--- a/python/restart_test/test_alter.py
+++ b/python/restart_test/test_alter.py
@@ -55,7 +55,7 @@ def part2(infinity_obj):
db_obj = infinity_obj.get_database("default_db")
table_obj = db_obj.get_table(table_name)
- res = table_obj.output(["*"]).to_df()
+ res, extra_result = table_obj.output(["*"]).to_df()
pd.testing.assert_frame_equal(
res,
pd.DataFrame(
@@ -136,7 +136,7 @@ def part1(infinity_obj):
def part2(infinity_obj):
db_obj = infinity_obj.get_database("default_db")
table_obj = db_obj.get_table(table_name)
- res = table_obj.output(["*"]).to_df()
+ res, extra_result = table_obj.output(["*"]).to_df()
pd.testing.assert_frame_equal(
res,
pd.DataFrame(
@@ -163,7 +163,7 @@ def part2(infinity_obj):
res = table_obj.list_indexes()
assert len(res.index_names) == 1
- data_dict, _ = (
+ data_dict, _, _ = (
table_obj.output(["c1"])
.match_text(fields="c3", matching_text="test", topn=1)
.to_result()
@@ -229,3 +229,48 @@ def part2(infinity_obj):
db_obj.drop_table(table_name)
part2()
+
+ def test_restart_after_alter_and_checkpoint(self, infinity_runner: InfinityRunner):
+ table_name = "test_alter4"
+ config = "test/data/config/restart_test/test_alter/1.toml"
+
+ infinity_runner.clear()
+ uri = common_values.TEST_LOCAL_HOST
+ data_dir = "/var/infinity/data"
+
+ decorator = infinity_runner_decorator_factory(config, uri, infinity_runner)
+
+ @decorator
+ def part1(infinity_obj):
+ db_obj = infinity_obj.get_database("default_db")
+ db_obj.drop_table(table_name, ConflictType.Ignore)
+ table_obj = db_obj.create_table(
+ table_name,
+ {
+ "c1": {"type": "int"},
+ "c2": {"type": "int"},
+ "c3": {"type": "varchar"},
+ },
+ )
+ table_obj.insert([{"c1": 1, "c2": 2, "c3": "test"}])
+
+ table_obj.add_columns({"c4": {"type": "varchar", "default": "tttt"}})
+ table_obj.drop_columns(["c2"])
+
+ infinity_obj.flush_data()
+
+ table_obj.drop_columns(["c3"])
+
+ infinity_obj.flush_delta()
+
+ part1()
+
+ @decorator
+ def part2(infinity_obj):
+ dropped_column_dirs = pathlib.Path(data_dir).rglob("1.col")
+ assert len(list(dropped_column_dirs)) == 0
+
+ dropped_column_dirs = pathlib.Path(data_dir).rglob("2.col")
+ assert len(list(dropped_column_dirs)) == 0
+
+ part2()
diff --git a/python/restart_test/test_cleanup.py b/python/restart_test/test_cleanup.py
index 218eb3a88e..3b5c8d9659 100644
--- a/python/restart_test/test_cleanup.py
+++ b/python/restart_test/test_cleanup.py
@@ -161,7 +161,7 @@ def part2(infinity_obj):
db_obj = infinity_obj.get_database("default_db")
table_obj = db_obj.get_table(table_name)
- data_dict, _ = table_obj.output(["count(*)"]).to_result()
+ data_dict, _, _ = table_obj.output(["count(*)"]).to_result()
count_star = data_dict["count(star)"][0]
assert count_star == insert_n
diff --git a/python/restart_test/test_compact.py b/python/restart_test/test_compact.py
new file mode 100644
index 0000000000..186fbae3a8
--- /dev/null
+++ b/python/restart_test/test_compact.py
@@ -0,0 +1,91 @@
+from infinity_runner import InfinityRunner, infinity_runner_decorator_factory
+from common import common_values
+from restart_util import *
+from infinity.common import ConflictType
+
+
+class TestCompact:
+ def test_restart_after_compact_and_cleanup(self, infinity_runner: InfinityRunner):
+ config = "test/data/config/restart_test/test_compact/1.toml"
+ uri = common_values.TEST_LOCAL_HOST
+ infinity_runner.clear()
+
+ decorator = infinity_runner_decorator_factory(config, uri, infinity_runner)
+
+ columns = LChYDataGenerato.columns()
+ indexes = LChYDataGenerato.index()
+ import_file = LChYDataGenerato.import_file()
+
+ @decorator
+ def part1(infinity_obj):
+ db_obj = infinity_obj.get_database("default_db")
+ db_obj.drop_table("test_compact", ConflictType.Ignore)
+ table_obj = db_obj.create_table("test_compact", columns)
+ table_obj.import_data(import_file, {"file_type": "jsonl"})
+ table_obj.import_data(import_file, {"file_type": "jsonl"})
+ infinity_obj.flush_delta()
+ table_obj.compact()
+ for index_info in indexes:
+ table_obj.create_index(f"idx_{index_info.column_name}", index_info)
+
+ infinity_obj.cleanup()
+
+ part1()
+
+ @decorator
+ def part2(infinity_obj):
+ pass
+
+ part2()
+
+ def test_restart_compact_index(self, infinity_runner: InfinityRunner):
+ config = "test/data/config/restart_test/test_compact/1.toml"
+ uri = common_values.TEST_LOCAL_HOST
+ infinity_runner.clear()
+
+ decorator = infinity_runner_decorator_factory(config, uri, infinity_runner)
+
+ table_name = "test_compact1"
+ dataset_path = "test/data/csv/enwiki_9.csv"
+ import_options = {"delimiter": "\t", "file_type": "csv"}
+
+ @decorator
+ def part1(infinity_obj):
+ db_obj = infinity_obj.get_database("default_db")
+ db_obj.drop_table(table_name, ConflictType.Ignore)
+ table_obj = db_obj.create_table(
+ table_name,
+ {
+ "doctitle": {"type": "varchar"},
+ "docdate": {"type": "varchar"},
+ "body": {"type": "varchar"},
+ },
+ )
+ table_obj.create_index(
+ "ft_index", index.IndexInfo("body", index.IndexType.FullText)
+ )
+ table_obj.import_data(dataset_path, import_options)
+ table_obj.import_data(dataset_path, import_options)
+ table_obj.compact()
+
+ infinity_obj.flush_data()
+
+ table_obj.import_data(dataset_path, import_options)
+ table_obj.compact()
+ infinity_obj.flush_delta()
+
+ table_obj.import_data(dataset_path, import_options)
+ table_obj.compact()
+
+
+ part1()
+ import_time = 4
+
+ @decorator
+ def part2(infinity_obj):
+ table_obj = infinity_obj.get_database("default_db").get_table(table_name)
+ data_dict, data_type_dict, _ = table_obj.output(["count(*)"]).to_result()
+ count_star = data_dict["count(star)"][0]
+ assert count_star == 9 * import_time
+
+ part2()
diff --git a/python/restart_test/test_fulltext.py b/python/restart_test/test_fulltext.py
index 0b8b696836..59908082dc 100644
--- a/python/restart_test/test_fulltext.py
+++ b/python/restart_test/test_fulltext.py
@@ -145,8 +145,8 @@ def t1():
.to_result()
)
- data_dict, _ = res
- gt_data_dict, _ = gt_res
+ data_dict, _, _ = res
+ gt_data_dict, _, _ = gt_res
if data_dict != gt_data_dict:
print(f"diff: {data_dict} {gt_data_dict}")
else:
diff --git a/python/restart_test/test_insert.py b/python/restart_test/test_insert.py
index eeb0e89222..a2cb836a0b 100644
--- a/python/restart_test/test_insert.py
+++ b/python/restart_test/test_insert.py
@@ -83,7 +83,7 @@ def part1(infinity_obj):
db_obj = infinity_obj.get_database("default_db")
table_obj = db_obj.get_table("test_insert")
- data_dict, _ = table_obj.output(["count(*)"]).to_result()
+ data_dict, _, _ = table_obj.output(["count(*)"]).to_result()
count_star = data_dict["count(star)"][0]
assert count_star == cur_insert_n
print(f"cur_insert_n: {cur_insert_n}")
@@ -243,7 +243,7 @@ def part1(infinity_obj, test_i: int):
db_obj = infinity_obj.get_database("default_db")
table_obj = db_obj.get_table("test_insert_checkpoint")
- data_dict, _ = table_obj.output(["count(*)"]).to_result()
+ data_dict, _, _ = table_obj.output(["count(*)"]).to_result()
count_star = data_dict["count(star)"][0]
assert count_star == line_num
diff --git a/python/restart_test/test_insert_import.py b/python/restart_test/test_insert_import.py
index a45d722247..8ddd5a5f01 100644
--- a/python/restart_test/test_insert_import.py
+++ b/python/restart_test/test_insert_import.py
@@ -111,7 +111,7 @@ def part1(infinity_obj):
db_obj = infinity_obj.get_database("default_db")
table_obj = db_obj.get_table("test_insert")
- data_dict, _ = table_obj.output(["count(*)"]).to_result()
+ data_dict, _, _ = table_obj.output(["count(*)"]).to_result()
count_star = data_dict["count(star)"][0]
assert count_star == cur_n
logger.debug(f"cur_n: {cur_n}")
diff --git a/python/restart_test/test_memidx.py b/python/restart_test/test_memidx.py
index facae0546a..6678ca8f5f 100644
--- a/python/restart_test/test_memidx.py
+++ b/python/restart_test/test_memidx.py
@@ -5,6 +5,7 @@
import time
import pathlib
from infinity.common import ConflictType, SparseVector
+import pytest
class TestMemIdx:
@@ -58,7 +59,7 @@ def part2(infinity_obj):
time.sleep(5)
db_obj = infinity_obj.get_database("default_db")
table_obj = db_obj.get_table("test_memidx1")
- data_dict, data_type_dict = (
+ data_dict, data_type_dict, _ = (
table_obj.output(["c1"])
.match_dense("c2", [0.3, 0.3, 0.2, 0.2], "float", "l2", 6)
.to_result()
@@ -66,7 +67,7 @@ def part2(infinity_obj):
# print(data_dict["c1"])
assert data_dict["c1"] == [4, 4, 4, 4, 4, 2]
- data_dict, data_type_dict = table_obj.output(["count(*)"]).to_result()
+ data_dict, data_type_dict, _ = table_obj.output(["count(*)"]).to_result()
# print(data_dict)
assert data_dict["count(star)"] == [10]
@@ -87,14 +88,14 @@ def part3(infinity_obj):
table_obj = db_obj.get_table("test_memidx1")
def check():
- data_dict, data_type_dict = (
+ data_dict, data_type_dict, _ = (
table_obj.output(["c1"])
.match_dense("c2", [0.3, 0.3, 0.2, 0.2], "float", "l2", 6)
.to_result()
)
assert data_dict["c1"] == [8, 6, 6, 4, 4, 4]
- data_dict, data_type_dict = table_obj.output(["count(*)"]).to_result()
+ data_dict, data_type_dict, _ = table_obj.output(["count(*)"]).to_result()
assert data_dict["count(star)"] == [13]
check()
@@ -130,6 +131,322 @@ def check():
# select count(*) from test_memidx1;
# # result: 13
+ def test_mem_ivf(self, infinity_runner: InfinityRunner):
+ config1 = "test/data/config/restart_test/test_memidx/1.toml"
+ config2 = "test/data/config/restart_test/test_memidx/2.toml"
+ config3 = "test/data/config/restart_test/test_memidx/3.toml"
+ uri = common_values.TEST_LOCAL_HOST
+ infinity_runner.clear()
+
+ decorator1 = infinity_runner_decorator_factory(config1, uri, infinity_runner)
+
+ @decorator1
+ def part1(infinity_obj):
+ db_obj = infinity_obj.get_database("default_db")
+ table_obj = db_obj.create_table(
+ "test_mem_ivf",
+ {"c1": {"type": "int"}, "c2": {"type": "vector,4,float"}},
+ )
+ res = table_obj.create_index(
+ "idx1",
+ index.IndexInfo(
+ "c2",
+ index.IndexType.IVF,
+ {
+ "metric": "l2",
+ },
+ ),
+ )
+ assert res.error_code == infinity.ErrorCode.OK
+
+ table_obj.insert([{"c1": 2, "c2": [0.1, 0.2, 0.3, -0.2]} for i in range(51)])
+ # trigger the dump by 52th record
+ table_obj.insert([{"c1": 4, "c2": [0.2, 0.1, 0.3, 0.4]}])
+ # table_obj.insert([{"c1": 2, "c2": [0.1, 0.2, 0.3, -0.2]} for i in range(2)])
+ time.sleep(5)
+ table_obj.insert([{"c1": 4, "c2": [0.2, 0.1, 0.3, 0.4]} for i in range(4)])
+
+ part1()
+
+ # config1 can hold 51 rows of ivf mem index before dump
+ # 1. recover by dumpindex wal & memindex recovery
+ decorator2 = infinity_runner_decorator_factory(config2, uri, infinity_runner)
+
+ @decorator2
+ def part2(infinity_obj):
+ time.sleep(5)
+ db_obj = infinity_obj.get_database("default_db")
+ table_obj = db_obj.get_table("test_mem_ivf")
+ data_dict, data_type_dict, _ = table_obj.output(["count(*)"]).to_result()
+ # print(data_dict)
+ assert data_dict["count(star)"] == [56]
+
+ data_dict, data_type_dict, _ = (
+ table_obj.output(["c1"])
+ .match_dense("c2", [0.3, 0.3, 0.2, 0.2], "float", "l2", 6, {"nprobe" : "100"})
+ .to_result()
+ )
+ # print(data_dict["c1"])
+ assert data_dict["c1"] == [4, 4, 4, 4, 4, 2]
+
+ data_dict, data_type_dict, _ = table_obj.output(["count(*)"]).to_result()
+ # print(data_dict)
+ assert data_dict["count(star)"] == [56]
+
+ table_obj.insert([{"c1": 6, "c2": [0.3, 0.2, 0.1, 0.4]} for i in range(2)])
+ # wait for memindex dump & delta checkpoint to dump
+ time.sleep(5)
+ table_obj.insert([{"c1": 8, "c2": [0.4, 0.3, 0.2, 0.1]}])
+
+ part2()
+
+ # 2. recover by delta ckp & dumpindex wal & memindex recovery
+ decorator3 = infinity_runner_decorator_factory(config3, uri, infinity_runner)
+
+ @decorator3
+ def part3(infinity_obj):
+ time.sleep(5)
+ db_obj = infinity_obj.get_database("default_db")
+ table_obj = db_obj.get_table("test_mem_ivf")
+
+ def check():
+ data_dict, data_type_dict, _ = (
+ table_obj.output(["c1"])
+ .match_dense("c2", [0.3, 0.3, 0.2, 0.2], "float", "l2", 6)
+ .to_result()
+ )
+ assert data_dict["c1"] == [8, 6, 6, 4, 4, 4]
+
+ data_dict, data_type_dict, _ = table_obj.output(["count(*)"]).to_result()
+ assert data_dict["count(star)"] == [59]
+
+ check()
+ infinity_obj.optimize("default_db", "test_mem_ivf", optimize_opt=None)
+ check()
+
+ db_obj.drop_table("test_memidx1")
+
+ part3()
+
+ def test_mem_indexer(self, infinity_runner : InfinityRunner):
+ config1 = "test/data/config/restart_test/test_memidx/1.toml"
+ config2 = "test/data/config/restart_test/test_memidx/2.toml"
+ config3 = "test/data/config/restart_test/test_memidx/3.toml"
+ uri = common_values.TEST_LOCAL_HOST
+ infinity_runner.clear()
+
+ decorator1 = infinity_runner_decorator_factory(config1, uri, infinity_runner)
+
+ @decorator1
+ def part1(infinity_obj):
+ db_obj = infinity_obj.get_database("default_db")
+ table_obj = db_obj.create_table(
+ "test_mem_indexer",
+ {"c1" : {"type" : "int"}, "c2": {"type": "varchar"}},
+ )
+ res = table_obj.create_index(
+ "idx1",
+ index.IndexInfo(
+ "c2",
+ index.IndexType.FullText,
+ ),
+ )
+ assert res.error_code == infinity.ErrorCode.OK
+
+ table_obj.insert([
+ {"c1" : 1, "c2" : "this is a test text"},
+ {"c1" : 2, "c2" : "this is not a test text"},
+ ])
+ # trigger the dump in 3rd record
+ table_obj.insert([
+ {"c1" : 3, "c2" : "this is indeed a test text"},
+ ])
+ table_obj.insert([
+ {"c1" : 4, "c2" : "this is definitely not a test text"},
+ {"c1" : 5, "c2" : "this is nothing but a test text"},
+ ])
+
+ part1()
+
+ # config1 can hold 2 rows of identical fulltext mem index before dump
+ # 1. recover by dumpindex wal & memindex recovery
+ decorator2 = infinity_runner_decorator_factory(config2, uri, infinity_runner)
+
+ @decorator2
+ def part2(infinity_obj):
+ time.sleep(5)
+ db_obj = infinity_obj.get_database("default_db")
+ table_obj = db_obj.get_table("test_mem_indexer")
+ data_dict, data_type_dict, _ = table_obj.output(["count(*)"]).to_result()
+ # print(data_dict)
+ assert data_dict["count(star)"] == [5]
+
+ data_dict, data_type_dict, _ = (
+ table_obj.output(["c1"])
+ .match_text('c2', 'test text', 3)
+ .to_result()
+ )
+ # print(data_dict["c1"])
+ assert data_dict["c1"] == [1, 2, 3]
+
+ data_dict, data_type_dict, _ = table_obj.output(["count(*)"]).to_result()
+ # print(data_dict)
+ assert data_dict["count(star)"] == [5]
+
+ # the 2nd dump
+ table_obj.insert([
+ {"c1" : 6, "c2" : "this is the exact opposite of a test text"},
+ ])
+ time.sleep(5)
+ table_obj.insert([
+ {"c1" : 7, "c2" : "what is this?"},
+ {"c1" : 8, "c2" : "this is what?"},
+ {"c1" : 9, "c2" : "not a test text!"},
+ {"c1" : 10, "c2" : "what a this?"},
+ {"c1" : 11, "c2" : "this is you!"},
+ ])
+
+ part2()
+
+ # 2. recover by delta ckp & dumpindex wal & memindex recovery
+ decorator3 = infinity_runner_decorator_factory(config3, uri, infinity_runner)
+
+ @decorator3
+ def part3(infinity_obj):
+ time.sleep(5)
+ db_obj = infinity_obj.get_database("default_db")
+ table_obj = db_obj.get_table("test_mem_indexer")
+
+ def check(rows):
+ data_dict, data_type_dict, _ = (
+ table_obj.output(["c1"])
+ .match_text('c2', 'this what', 3)
+ .to_result()
+ )
+ # print(data_dict["c1"])
+ assert data_dict["c1"] == [7, 8, 10]
+
+ data_dict, data_type_dict, _ = table_obj.output(["count(*)"]).to_result()
+ assert data_dict["count(star)"] == [rows]
+
+ check(11)
+ table_obj.insert([
+ {"c1" : 12, "c2" : "this is a text!"},
+ ])
+ check(12)
+
+ # the 3rd dump
+ db_obj.drop_table("test_mem_indexer")
+
+ part3()
+
+ @pytest.mark.skip(reason="bug")
+ def test_mem_bmp(self, infinity_runner: InfinityRunner):
+ config1 = "test/data/config/restart_test/test_memidx/1.toml"
+ config2 = "test/data/config/restart_test/test_memidx/2.toml"
+ config3 = "test/data/config/restart_test/test_memidx/3.toml"
+ uri = common_values.TEST_LOCAL_HOST
+ infinity_runner.clear()
+
+ test_data = [
+ {"c1" : 1, "c2" : SparseVector(indices=[0, 10, 20, 30, 40, 50, 60, 70, 80, 90], values=[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0])},
+ {"c1" : 2, "c2" : SparseVector(indices=[0, 20, 40, 60, 80], values=[2.0, 2.0, 2.0, 2.0, 2.0])},
+ {"c1" : 3, "c2" : SparseVector(indices=[0, 30, 60, 90], values=[3.0, 3.0, 3.0, 3.0])},
+ {"c1" : 4, "c2" : SparseVector(indices=[0, 40, 80], values=[4.0, 4.0, 4.0])},
+ {"c1" : 5, "c2" : SparseVector(indices=[0], values=[0.0])},
+ ]
+ query_vector = SparseVector(indices=[0, 20, 80], values=[1.0, 2.0, 3.0])
+
+ decorator1 = infinity_runner_decorator_factory(config1, uri, infinity_runner)
+
+ @decorator1
+ def part1(infinity_obj):
+ db_obj = infinity_obj.get_database("default_db")
+ table_obj = db_obj.create_table(
+ "test_mem_bmp",
+ {"c1": {"type": "int"}, "c2": {"type": "sparse,100,float,int"}},
+ )
+ res = table_obj.create_index(
+ "idx1",
+ index.IndexInfo(
+ "c2",
+ index.IndexType.BMP,
+ {"BLOCK_SIZE": "8", "COMPRESS_TYPE": "compress"},
+ ),
+ )
+ assert res.error_code == infinity.ErrorCode.OK
+
+ # trigger dump
+ for i in range(7):
+ table_obj.insert(test_data)
+
+ part1()
+
+ # config1 can hold 51 rows of ivf mem index before dump
+ # 1. recover by dumpindex wal & memindex recovery
+ decorator2 = infinity_runner_decorator_factory(config2, uri, infinity_runner)
+
+ @decorator2
+ def part2(infinity_obj):
+ time.sleep(5)
+ db_obj = infinity_obj.get_database("default_db")
+ table_obj = db_obj.get_table("test_mem_bmp")
+ data_dict, data_type_dict = table_obj.output(["count(*)"]).to_result()
+ # print(data_dict)
+ assert data_dict["count(star)"] == [35]
+
+ data_dict, data_type_dict = (
+ table_obj.output(["c1"])
+ .match_sparse("c2", query_vector, "ip", 8)
+ .to_result()
+ )
+ assert data_dict["c1"] == [4, 4, 4, 4, 4, 4, 4, 2]
+
+ data_dict, data_type_dict = table_obj.output(["count(*)"]).to_result()
+ # print(data_dict)
+ assert data_dict["count(star)"] == [35]
+
+ for i in range(3):
+ table_obj.insert(test_data)
+ time.sleep(5)
+
+ data_dict, data_type_dict = (
+ table_obj.output(["c1"])
+ .match_sparse("c2", query_vector, "ip", 11)
+ .to_result()
+ )
+ assert data_dict["c1"] == [4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 2]
+
+ part2()
+
+ # 2. recover by delta ckp & dumpindex wal & memindex recovery
+ decorator3 = infinity_runner_decorator_factory(config3, uri, infinity_runner)
+
+ @decorator3
+ def part3(infinity_obj):
+ time.sleep(5)
+ db_obj = infinity_obj.get_database("default_db")
+ table_obj = db_obj.get_table("test_mem_bmp")
+
+ def check():
+ data_dict, data_type_dict = (
+ table_obj.output(["c1"])
+ .match_sparse("c2", query_vector, "ip", 11)
+ .to_result()
+ )
+ assert data_dict["c1"] == [4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 2]
+
+ data_dict, data_type_dict = table_obj.output(["count(*)"]).to_result()
+ assert data_dict["count(star)"] == [50]
+
+ check()
+ infinity_obj.optimize("default_db", "test_mem_bmp", optimize_opt=None)
+ check()
+
+ db_obj.drop_table("test_mem_bmp")
+
+ part3()
+
def test_optimize_from_different_database(self, infinity_runner: InfinityRunner):
infinity_runner.clear()
@@ -358,12 +675,12 @@ def part1(infinity_obj):
def part2(infinity_obj):
db_obj = infinity_obj.get_database("default_db")
table_obj = db_obj.get_table(table_name)
- data_dict, data_type_dict = (
+ data_dict, data_type_dict, _ = (
table_obj.output(["c1"]).filter("c2 >= 8192").to_result()
)
assert data_dict["c1"] == [8192 + i for i in range(100)]
- data_dict, data_type_dict = (
+ data_dict, data_type_dict, _ = (
table_obj.output(["c1"])
.match_sparse("c3", SparseVector(indices=[1], values=[1.0]), "ip", 100)
.to_result()
@@ -406,7 +723,7 @@ def part1(infinity_obj):
def part2(infinity_obj):
db_obj = infinity_obj.get_database("default_db")
table_obj = db_obj.get_table("test_memidx5")
- data_dict, data_type_dict = (
+ data_dict, data_type_dict, _ = (
table_obj.output(["c1"]).match_text("c2", "hello", 2).to_result()
)
assert data_dict["c1"] == [1, 2]
diff --git a/python/test_cluster/conftest.py b/python/test_cluster/conftest.py
index 10b601a83e..e2dd953503 100644
--- a/python/test_cluster/conftest.py
+++ b/python/test_cluster/conftest.py
@@ -18,6 +18,11 @@ def pytest_addoption(parser):
parser.addoption(
"--minio_port",
action="store",
+ default=9000,
+ )
+ parser.addoption(
+ "--minio_console_port",
+ action="store",
default=9001,
)
parser.addoption(
@@ -55,7 +60,8 @@ def pytest_generate_tests(metafunc):
infinity_path = metafunc.config.getoption("infinity_path")
minio_dir = metafunc.config.getoption("minio_dir")
minio_port = metafunc.config.getoption("minio_port")
- minio_params = MinioParams(minio_dir, minio_port)
+ minio_console_port = metafunc.config.getoption("minio_console_port")
+ minio_params = MinioParams(minio_dir, minio_port, minio_console_port)
infinity_dir = metafunc.config.getoption("infinity_dir")
use_sudo = metafunc.config.getoption("use_sudo")
diff --git a/python/test_cluster/database_operations.py b/python/test_cluster/database_operations.py
index 4dcea24df1..80b98b284c 100644
--- a/python/test_cluster/database_operations.py
+++ b/python/test_cluster/database_operations.py
@@ -8,11 +8,12 @@
from dataclasses import dataclass
from typing import Dict, Set, Tuple
-class instance_state:
- def __init__(self, client : infinity_http.infinity_http = None):
- self.db2tables : Dict[str, Set[str]] = {"default_db" : set()}
- self.dbtable2index : Dict[Tuple[str, str], Set[str]] = {}
- self.dbtable2df : Dict[Tuple[str, str], pd.DataFrame] = {}
+
+class instance_state:
+ def __init__(self, client: infinity_http.infinity_http = None):
+ self.db2tables: Dict[str, Set[str]] = {"default_db": set()}
+ self.dbtable2index: Dict[Tuple[str, str], Set[str]] = {}
+ self.dbtable2df: Dict[Tuple[str, str], pd.DataFrame] = {}
if client is not None:
databases = client.list_databases().db_names
@@ -22,8 +23,8 @@ def __init__(self, client : infinity_http.infinity_http = None):
tables = db_object.get_all_tables()
for table_name in tables:
table_object = db_object.get_table(table_name)
- df = table_object.output(["*"]).to_df()
- res = table_object.output(["*"]).to_result()
+ df, extra_result = table_object.output(["*"]).to_df()
+ res, _, _ = table_object.output(["*"]).to_result()
# print(f"instance_state initializing, table {db_name}.{table_name}")
# print(res)
self.add_table(db_name, table_name, ConflictType.Ignore)
@@ -32,38 +33,40 @@ def __init__(self, client : infinity_http.infinity_http = None):
for index in indexes:
self.add_index(db_name, table_name, index["index_name"], ConflictType.Ignore)
- def check_db_exist(self, db_name : str):
+ def check_db_exist(self, db_name: str):
if db_name not in self.db2tables:
raise InfinityException(ErrorCode.DB_NOT_EXIST, f"database {db_name} does not exist!")
- def check_db_not_exist(self, db_name : str):
+ def check_db_not_exist(self, db_name: str):
if db_name in self.db2tables:
raise InfinityException(ErrorCode.DUPLICATE_DATABASE_NAME, f"database {db_name} already exists!")
- def check_table_exist(self, db_name : str, table_name : str):
+ def check_table_exist(self, db_name: str, table_name: str):
self.check_db_exist(db_name)
if table_name not in self.db2tables[db_name]:
raise InfinityException(ErrorCode.TABLE_NOT_EXIST, f"table {db_name}.{table_name} does not exist!")
- def check_table_not_exist(self, db_name : str, table_name : str):
+ def check_table_not_exist(self, db_name: str, table_name: str):
self.check_db_exist(db_name)
if table_name in self.db2tables[db_name]:
raise InfinityException(ErrorCode.DUPLICATE_TABLE_NAME, f"table {db_name}.{table_name} already exists!")
- def check_index_exist(self, db_name : str, table_name : str, index_name : str):
+ def check_index_exist(self, db_name: str, table_name: str, index_name: str):
self.check_table_exist(db_name, table_name)
if index_name not in self.dbtable2index:
- raise InfinityException(ErrorCode.INDEX_NOT_EXIST, f"table {db_name}.{table_name}.{index_name} does not exist!")
+ raise InfinityException(ErrorCode.INDEX_NOT_EXIST,
+ f"table {db_name}.{table_name}.{index_name} does not exist!")
- def check_index_not_exist(self, db_name : str, table_name : str, index_name : str):
+ def check_index_not_exist(self, db_name: str, table_name: str, index_name: str):
self.check_table_exist(db_name, table_name)
if index_name in self.dbtable2index:
- raise InfinityException(ErrorCode.DUPLICATE_INDEX_NAME, f"table {db_name}.{table_name}.{index_name} already exists!")
+ raise InfinityException(ErrorCode.DUPLICATE_INDEX_NAME,
+ f"table {db_name}.{table_name}.{index_name} already exists!")
# operations to a instance_state()
# add drop : database, table, index
- def add_database(self, db_name : str, conflict_type : ConflictType):
+ def add_database(self, db_name: str, conflict_type: ConflictType):
if conflict_type == ConflictType.Ignore:
if db_name in self.db2tables:
return
@@ -72,7 +75,7 @@ def add_database(self, db_name : str, conflict_type : ConflictType):
self.db2tables[db_name] = set()
- def drop_database(self, db_name : str, conflict_type : ConflictType):
+ def drop_database(self, db_name: str, conflict_type: ConflictType):
if conflict_type == ConflictType.Ignore:
if db_name not in self.db2tables:
return
@@ -84,7 +87,7 @@ def drop_database(self, db_name : str, conflict_type : ConflictType):
self.dbtable2index.pop((db_name, table_name))
self.db2tables.pop(db_name)
- def add_table(self, db_name : str, table_name : str, conflict_type : ConflictType):
+ def add_table(self, db_name: str, table_name: str, conflict_type: ConflictType):
if conflict_type == ConflictType.Ignore:
self.check_db_exist(db_name)
if table_name in self.db2tables[db_name]:
@@ -95,7 +98,7 @@ def add_table(self, db_name : str, table_name : str, conflict_type : ConflictTyp
self.db2tables[db_name].add(table_name)
self.dbtable2index[(db_name, table_name)] = set()
- def drop_table(self, db_name : str, table_name : str, conflit_type : ConflictType):
+ def drop_table(self, db_name: str, table_name: str, conflit_type: ConflictType):
if conflit_type == ConflictType.Ignore:
self.check_database_exist(db_name)
if table_name not in self.db2tables:
@@ -107,7 +110,7 @@ def drop_table(self, db_name : str, table_name : str, conflit_type : ConflictTyp
self.dbtable2index.pop((db_name, table_name))
self.dbtable2df.pop((db_name, table_name))
- def add_index(self, db_name : str, table_name : str, index_name : str, conflict_type : ConflictType):
+ def add_index(self, db_name: str, table_name: str, index_name: str, conflict_type: ConflictType):
if conflict_type == ConflictType.Ignore:
self.check_table_exist(db_name, table_name)
if index_name in self.dbtable2index[(db_name, table_name)]:
@@ -117,7 +120,7 @@ def add_index(self, db_name : str, table_name : str, index_name : str, conflict_
self.dbtable2index[(db_name, table_name)].add(index_name)
- def drop_index(self, db_name : str, table_name : str, index_name : str, conflit_type : ConflictType):
+ def drop_index(self, db_name: str, table_name: str, index_name: str, conflit_type: ConflictType):
if conflit_type == ConflictType.Ignore:
self.check_table_exist(db_name, table_name)
if index_name not in self.dbtable2index[(db_name, table_name)]:
@@ -127,23 +130,24 @@ def drop_index(self, db_name : str, table_name : str, index_name : str, conflit_
self.dbtable2index[(db_name, table_name)].remove(index_name)
- def get_table_df(self, db_name : str, table_name :str) -> pd.DataFrame | None:
+ def get_table_df(self, db_name: str, table_name: str) -> pd.DataFrame | None:
self.check_table_exist(db_name, table_name)
if (db_name, table_name) in self.dbtable2df:
return self.dbtable2df[(db_name, table_name)]
- else :
+ else:
return None
- def set_table_df(self, db_name : str, table_name :str, df : pd.DataFrame) :
+ def set_table_df(self, db_name: str, table_name: str, df: pd.DataFrame):
df = df.reset_index(drop=True)
# print(f"setting {db_name}.{table_name} = ")
# print(df)
self.check_table_exist(db_name, table_name)
self.dbtable2df[(db_name, table_name)] = df
+
# this will clear a instance to its initial state:
# only a default_db is remained
-def clear_instance(state : instance_state, client : infinity_http.infinity_http):
+def clear_instance(state: instance_state, client: infinity_http.infinity_http):
for db_name, tables in state.db2tables.items():
if db_name == "default_db":
db_obj = client.get_database(db_name)
@@ -152,10 +156,12 @@ def clear_instance(state : instance_state, client : infinity_http.infinity_http)
else:
client.drop_database(db_name)
-def check_instance_table_equal(state : instance_state, client : infinity_http.infinity_http, db_name : str, table_name : str):
+
+def check_instance_table_equal(state: instance_state, client: infinity_http.infinity_http, db_name: str,
+ table_name: str):
db_obj = client.get_database(db_name)
table_obj = db_obj.get_table(table_name)
- res = table_obj.output(["*"]).to_df()
+ res, extra_result = table_obj.output(["*"]).to_df()
expected = state.get_table_df(db_name, table_name)
# print("res = ")
# print(res)
@@ -163,30 +169,37 @@ def check_instance_table_equal(state : instance_state, client : infinity_http.in
# print(expected)
pd.testing.assert_frame_equal(res, expected)
-def check_instance_equal(state : instance_state, client : infinity_http.infinity_http):
+
+def check_instance_equal(state: instance_state, client: infinity_http.infinity_http):
client_state = instance_state(client)
assert state.db2tables == client_state.db2tables
assert state.dbtable2index == client_state.dbtable2index
for db_name, tables in state.db2tables.items():
for table_name in tables:
- pd.testing.assert_frame_equal(state.dbtable2df[(db_name, table_name)], client_state.dbtable2df[(db_name, table_name)])
+ pd.testing.assert_frame_equal(state.dbtable2df[(db_name, table_name)],
+ client_state.dbtable2df[(db_name, table_name)])
+
# do operations on a single node
-def do_some_operations(client : infinity_http.infinity_http, state : instance_state):
+def do_some_operations(client: infinity_http.infinity_http, state: instance_state):
table_create_insert_delete_modify(client, state)
+
# do operations on a cluster of nodes
-def do_some_operations_cluster(leader_client : infinity_http.infinity_http, other_clients : [infinity_http.infinity_http], leader_state : instance_state):
+def do_some_operations_cluster(leader_client: infinity_http.infinity_http, other_clients: [infinity_http.infinity_http],
+ leader_state: instance_state):
table_create_insert_delete_modify(leader_client, leader_state)
time.sleep(1)
for client in other_clients:
table_create_insert_delete_modify_verify(client, leader_state)
return
-def table_create_insert_delete_modify_verify(client : infinity_http.infinity_http, leader_state : instance_state):
+
+def table_create_insert_delete_modify_verify(client: infinity_http.infinity_http, leader_state: instance_state):
check_instance_equal(leader_state, client)
-def table_create_insert_delete_modify(client : infinity_http.infinity_http, leader_state : instance_state):
+
+def table_create_insert_delete_modify(client: infinity_http.infinity_http, leader_state: instance_state):
db = client.get_database("default_db")
table = db.create_table("test_data", {"c1": {"type": "int"}, "c2": {"type": "vector,4,float"}}, ConflictType.Ignore)
leader_state.add_table("default_db", "test_data", ConflictType.Ignore)
@@ -198,10 +211,10 @@ def table_create_insert_delete_modify(client : infinity_http.infinity_http, lead
table.insert([{"c1": i, "c2": [1.0, 2.0, 3.0, 4.0]}])
df_to_insert = pd.DataFrame(
{
- "c1" : [i],
- "c2" : [[1.0, 2.0, 3.0, 4.0]]
+ "c1": [i],
+ "c2": [[1.0, 2.0, 3.0, 4.0]]
}
- ).astype({"c1" : dtype("int32"), "c2" : dtype("object")})
+ ).astype({"c1": dtype("int32"), "c2": dtype("object")})
table_df = pd.concat([table_df, df_to_insert])
for i in range(0, 10, 2):
diff --git a/python/test_cluster/infinity_cluster.py b/python/test_cluster/infinity_cluster.py
index 2217321f9a..97cce44b69 100644
--- a/python/test_cluster/infinity_cluster.py
+++ b/python/test_cluster/infinity_cluster.py
@@ -34,9 +34,10 @@ def is_port_in_use(port: int) -> bool:
class MinioParams:
- def __init__(self, minio_dir: str, minio_port: int):
+ def __init__(self, minio_dir: str, minio_port: int, minio_console_port: int):
self.minio_dir = minio_dir
self.minio_port = minio_port
+ self.minio_console_port = minio_console_port
class BaseInfinityRunner:
@@ -244,7 +245,7 @@ def add_node(self, node_name: str, config_path: str, init=True):
def add_minio(self, minio_params: MinioParams):
minio_image_name = "quay.io/minio/minio"
- minio_cmd = f'server /data --console-address ":{minio_params.minio_port}"'
+ minio_cmd = f'server /data --address ":{minio_params.minio_port}" --console-address ":{minio_params.minio_console_port}"'
docker_client = docker.from_env()
container_name = "minio_host"
diff --git a/python/test_cluster/test_admin.py b/python/test_cluster/test_admin.py
index 267af8eeca..87f67b0e62 100644
--- a/python/test_cluster/test_admin.py
+++ b/python/test_cluster/test_admin.py
@@ -13,7 +13,7 @@ def test_admin(cluster: InfinityCluster):
res = infinity1.show_current_node()
logger.info(f'{res.node_role}, {res.node_status}')
assert (res.node_role == "admin")
- assert (res.node_status == "starting")
+ assert (res.node_status == "started")
res = infinity1.show_admin_variables()
logger.info(res.data)
diff --git a/python/test_cluster/test_basic.py b/python/test_cluster/test_basic.py
index 90898397a4..3955d9aba2 100644
--- a/python/test_cluster/test_basic.py
+++ b/python/test_cluster/test_basic.py
@@ -42,29 +42,29 @@ def test_0(cluster: InfinityCluster):
cluster.remove_node("node1")
-def test_mock(mock_cluster: MockInfinityCluster):
- cluster = mock_cluster
- with cluster:
- cluster.add_node("node1", "conf/leader.toml")
- cluster.add_node("node2", "conf/follower.toml")
-
- cluster.set_leader("node1")
- cluster.set_follower("node2")
-
- time.sleep(1)
-
- cluster.disconnect("node2")
- time.sleep(0.1)
- cluster.reconnect("node2")
-
- cluster.block_peer_net("node2")
- time.sleep(0.1)
- cluster.restore_peer_net("node2")
-
- time.sleep(1)
-
- cluster.remove_node("node2")
- cluster.remove_node("node1")
+# def test_mock(mock_cluster: MockInfinityCluster):
+# cluster = mock_cluster
+# with cluster:
+# cluster.add_node("node1", "conf/leader.toml")
+# cluster.add_node("node2", "conf/follower.toml")
+#
+# cluster.set_leader("node1")
+# cluster.set_follower("node2")
+#
+# time.sleep(1)
+#
+# cluster.disconnect("node2")
+# time.sleep(0.1)
+# cluster.reconnect("node2")
+#
+# cluster.block_peer_net("node2")
+# time.sleep(0.1)
+# cluster.restore_peer_net("node2")
+#
+# time.sleep(1)
+#
+# cluster.remove_node("node2")
+# cluster.remove_node("node1")
@pytest.mark.docker
diff --git a/python/test_cluster/test_delete.py b/python/test_cluster/test_delete.py
index 703a21e15e..67db17e64b 100644
--- a/python/test_cluster/test_delete.py
+++ b/python/test_cluster/test_delete.py
@@ -41,25 +41,25 @@ def test_delete(self, cluster: InfinityCluster):
res = table_obj.delete("c1 = 1")
assert res.error_code == ErrorCode.OK
- res = table_obj.output(["*"]).to_df()
+ res, extra_result = table_obj.output(["*"]).to_df()
pd.testing.assert_frame_equal(res, pd.DataFrame({'c1': (2, 3, 4), 'c2': (20, 30, 40), 'c3': (200, 300, 400)})
.astype({'c1': dtype('int32'), 'c2': dtype('int32'), 'c3': dtype('int32')}))
time.sleep(1)
db_obj_2 = infinity2.get_database("default_db")
table_obj_2 = db_obj_2.get_table("test_delete")
- res = table_obj_2.output(["*"]).to_df()
+ res, extra_result = table_obj_2.output(["*"]).to_df()
pd.testing.assert_frame_equal(res, pd.DataFrame({'c1': (2, 3, 4), 'c2': (20, 30, 40), 'c3': (200, 300, 400)})
.astype({'c1': dtype('int32'), 'c2': dtype('int32'), 'c3': dtype('int32')}))
res = table_obj.delete()
assert res.error_code == ErrorCode.OK
- res = table_obj.output(["*"]).to_df()
+ res, extra_result = table_obj.output(["*"]).to_df()
pd.testing.assert_frame_equal(res, pd.DataFrame({'c1': (), 'c2': (), 'c3': ()})
.astype({'c1': dtype('int32'), 'c2': dtype('int32'), 'c3': dtype('int32')}))
- res = table_obj_2.output(["*"]).to_df()
+ res, extra_result = table_obj_2.output(["*"]).to_df()
pd.testing.assert_frame_equal(res, pd.DataFrame({'c1': (), 'c2': (), 'c3': ()})
.astype({'c1': dtype('int32'), 'c2': dtype('int32'), 'c3': dtype('int32')}))
@@ -104,11 +104,11 @@ def test_delete_on_follower(self, cluster: InfinityCluster):
print(e)
assert(e.error_code == 8007)
- res = table_obj.output(["*"]).to_df()
+ res, extra_result = table_obj.output(["*"]).to_df()
pd.testing.assert_frame_equal(res, pd.DataFrame({'c1': (1, 2, 3, 4), 'c2': (10, 20, 30, 40), 'c3': (100, 200, 300, 400)})
.astype({'c1': dtype('int32'), 'c2': dtype('int32'), 'c3': dtype('int32')}))
- res = table_obj_2.output(["*"]).to_df()
+ res, extra_result = table_obj_2.output(["*"]).to_df()
pd.testing.assert_frame_equal(res, pd.DataFrame({'c1': (1, 2, 3, 4), 'c2': (10, 20, 30, 40), 'c3': (100, 200, 300, 400)})
.astype({'c1': dtype('int32'), 'c2': dtype('int32'), 'c3': dtype('int32')}))
diff --git a/python/test_cluster/test_import.py b/python/test_cluster/test_import.py
index 2e1c7b732d..c718c02aa8 100644
--- a/python/test_cluster/test_import.py
+++ b/python/test_cluster/test_import.py
@@ -45,7 +45,7 @@ def test1(self, cluster: InfinityCluster):
}
).astype({"c1": dtype("int32"), "c2": dtype("object")})
- res = table_obj1.output(["*"]).to_df()
+ res, extra_result = table_obj1.output(["*"]).to_df()
pd.testing.assert_frame_equal(res, res_gt)
time.sleep(1)
@@ -54,7 +54,7 @@ def test1(self, cluster: InfinityCluster):
infinity2 = cluster.client("node2")
db_obj2 = infinity2.get_database("default_db")
table_obj2 = db_obj2.get_table(table_name)
- res = table_obj2.output(["*"]).to_df()
+ res, extra_result = table_obj2.output(["*"]).to_df()
pd.testing.assert_frame_equal(res, res_gt)
db_obj1.drop_table(table_name)
diff --git a/python/test_cluster/test_index.py b/python/test_cluster/test_index.py
index 520523475a..7f0035ae21 100644
--- a/python/test_cluster/test_index.py
+++ b/python/test_cluster/test_index.py
@@ -61,12 +61,12 @@ def test1(self, cluster: InfinityCluster):
"c2": ("text1", "text2"),
}
).astype({"c1": dtype("int32"), "c2": dtype("object")})
- res1 = table_obj1.output(["*"]).filter("c1 < 3").to_df()
+ res1, extra_result = table_obj1.output(["*"]).filter("c1 < 3").to_df()
pd.testing.assert_frame_equal(res1, res_gt)
print("select in node2")
time.sleep(1)
- res2 = table_obj2.output(["*"]).filter("c1 < 3").to_df()
+ res2, extra_result = table_obj2.output(["*"]).filter("c1 < 3").to_df()
# print(res2)
pd.testing.assert_frame_equal(res2, res_gt)
diff --git a/python/test_cluster/test_insert.py b/python/test_cluster/test_insert.py
index 8cde4d111b..1ad7559806 100644
--- a/python/test_cluster/test_insert.py
+++ b/python/test_cluster/test_insert.py
@@ -42,7 +42,7 @@ def __test_inner_1(self, cluster: InfinityCluster):
}
).astype({"c1": dtype("int32"), "c2": dtype("object")})
- res = table1.output(["*"]).to_df()
+ res, extra_result = table1.output(["*"]).to_df()
pd.testing.assert_frame_equal(res, res_gt)
time.sleep(1)
@@ -51,7 +51,7 @@ def __test_inner_1(self, cluster: InfinityCluster):
infinity2 = cluster.client("node2")
db2 = infinity2.get_database("default_db")
table2 = db2.get_table(table_name)
- res = table2.output(["*"]).to_df()
+ res, extra_result = table2.output(["*"]).to_df()
pd.testing.assert_frame_equal(res, res_gt)
res = db1.drop_table(table_name)
@@ -60,8 +60,8 @@ def __test_inner_1(self, cluster: InfinityCluster):
def test_insert_11(self, cluster: InfinityCluster):
self.__test_inner_1(cluster)
- def test_insert_12(self, mock_cluster: MockInfinityCluster):
- self.__test_inner_1(mock_cluster)
+ # def test_insert_12(self, mock_cluster: MockInfinityCluster):
+ # self.__test_inner_1(mock_cluster)
@pytest.mark.docker
def test_insert_13(self, docker_cluster: DockerInfinityCluster):
@@ -104,7 +104,7 @@ def test_insert_2(self, docker_cluster: DockerInfinityCluster):
infinity2 = docker_cluster.client("node2")
db2 = infinity2.get_database("default_db")
table2 = db2.get_table(table_name)
- res = table2.output(["*"]).to_df()
+ res, extra_result = table2.output(["*"]).to_df()
pd.testing.assert_frame_equal(res, res_gt)
docker_cluster.disconnect("node2")
@@ -121,7 +121,7 @@ def noreturn_request():
docker_cluster.reconnect("node2")
docker_cluster.disconnect("node1")
- res = table2.output(["*"]).to_df()
+ res, extra_result = table2.output(["*"]).to_df()
pd.testing.assert_frame_equal(res, res_gt)
docker_cluster.reconnect("node1")
@@ -169,7 +169,7 @@ def test_insert_3(self, docker_cluster: DockerInfinityCluster):
infinity2 = docker_cluster.client("node2")
db2 = infinity2.get_database("default_db")
table2 = db2.get_table(table_name)
- res = table2.output(["*"]).to_df()
+ res, extra_result = table2.output(["*"]).to_df()
pd.testing.assert_frame_equal(res, res_gt)
# reconnect leader
@@ -189,7 +189,7 @@ def test_insert_3(self, docker_cluster: DockerInfinityCluster):
),
}
).astype({"c1": dtype("int32"), "c2": dtype("object")})
- res = table2.output(["*"]).to_df()
+ res, extra_result = table2.output(["*"]).to_df()
pd.testing.assert_frame_equal(res, res_gt)
db1.drop_table(table_name)
diff --git a/python/test_cluster/test_knn.py b/python/test_cluster/test_knn.py
index ea8a9f5a41..c4cbff528f 100644
--- a/python/test_cluster/test_knn.py
+++ b/python/test_cluster/test_knn.py
@@ -47,13 +47,13 @@ def test_knn(self, cluster: InfinityCluster):
res = table_obj.import_data(test_csv_dir, None)
assert res.error_code == ErrorCode.OK
- res = table_obj.output(["variant_id", "_row_id"]).match_dense("gender_vector", [1.0] * 4, "float", "ip", 10).to_pl()
+ res, extra_result = table_obj.output(["variant_id", "_row_id"]).match_dense("gender_vector", [1.0] * 4, "float", "ip", 10).to_pl()
print(res)
time.sleep(1)
db_obj_2 = infinity2.get_database("default_db")
table_obj_2 = db_obj_2.get_table("test_knn")
- res = table_obj_2.output(["variant_id", "_row_id"]).match_dense("gender_vector", [1.0] * 4, "float", "ip", 10).to_pl()
+ res, extra_result = table_obj_2.output(["variant_id", "_row_id"]).match_dense("gender_vector", [1.0] * 4, "float", "ip", 10).to_pl()
print(res)
res = db_obj.drop_table("test_knn", ConflictType.Error)
diff --git a/python/test_cluster/test_member_change.py b/python/test_cluster/test_member_change.py
index 99ae559c79..1645e82c65 100644
--- a/python/test_cluster/test_member_change.py
+++ b/python/test_cluster/test_member_change.py
@@ -148,7 +148,7 @@ def verify_data(node_name: str):
nonlocal insert_line
infinity: infinity_http = cluster.client(node_name)
table = infinity.get_database("default_db").get_table(table_name)
- res = table.output(["*"]).to_df()
+ res, extra_result = table.output(["*"]).to_df()
if res.shape[0] == insert_line + 1:
insert_line += 1
logger.debug(f"test_i: {i}, verify data, node_name: {node_name}")
diff --git a/python/test_cluster/test_select.py b/python/test_cluster/test_select.py
index bca4b6e1f5..135bd5542a 100644
--- a/python/test_cluster/test_select.py
+++ b/python/test_cluster/test_select.py
@@ -9,6 +9,7 @@
from infinity.errors import ErrorCode
import common_values
+
class TestSelect:
def test_select(self, cluster: InfinityCluster):
with cluster:
@@ -32,48 +33,49 @@ def test_select(self, cluster: InfinityCluster):
"c2": {"type": "int", "constraints": ["not null"]}}, ConflictType.Error)
res = table_obj.insert(
- [{"c1": -3, "c2": -3}, {"c1": -2, "c2": -2}, {"c1": -1, "c2": -1}, {"c1": 0, "c2": 0}, {"c1": 1, "c2": 1},
- {"c1": 2, "c2": 2}, {"c1": 3, "c2": 3}])
+ [{"c1": -3, "c2": -3}, {"c1": -2, "c2": -2}, {"c1": -1, "c2": -1}, {"c1": 0, "c2": 0},
+ {"c1": 1, "c2": 1},
+ {"c1": 2, "c2": 2}, {"c1": 3, "c2": 3}])
assert res.error_code == ErrorCode.OK
res = table_obj.insert(
- [{"c1": -8, "c2": -8}, {"c1": -7, "c2": -7}, {"c1": -6, "c2": -6}, {"c1": 7, "c2": 7}, {"c1": 8, "c2": 8},
- {"c1": 9, "c2": 9}])
+ [{"c1": -8, "c2": -8}, {"c1": -7, "c2": -7}, {"c1": -6, "c2": -6}, {"c1": 7, "c2": 7},
+ {"c1": 8, "c2": 8},
+ {"c1": 9, "c2": 9}])
assert res.error_code == ErrorCode.OK
-
- res = table_obj.output(["*"]).to_df()
+ res, extra_result = table_obj.output(["*"]).to_df()
pd.testing.assert_frame_equal(res, pd.DataFrame({'c1': (-3, -2, -1, 0, 1, 2, 3, -8, -7, -6, 7, 8, 9),
- 'c2': (-3, -2, -1, 0, 1, 2, 3, -8, -7, -6, 7, 8, 9)})
- .astype({'c1': dtype('int32'), 'c2': dtype('int32')}))
+ 'c2': (-3, -2, -1, 0, 1, 2, 3, -8, -7, -6, 7, 8, 9)})
+ .astype({'c1': dtype('int32'), 'c2': dtype('int32')}))
- res = table_obj.output(["c1", "c2"]).to_df()
+ res, extra_result = table_obj.output(["c1", "c2"]).to_df()
pd.testing.assert_frame_equal(res, pd.DataFrame({'c1': (-3, -2, -1, 0, 1, 2, 3, -8, -7, -6, 7, 8, 9),
- 'c2': (-3, -2, -1, 0, 1, 2, 3, -8, -7, -6, 7, 8, 9)})
- .astype({'c1': dtype('int32'), 'c2': dtype('int32')}))
+ 'c2': (-3, -2, -1, 0, 1, 2, 3, -8, -7, -6, 7, 8, 9)})
+ .astype({'c1': dtype('int32'), 'c2': dtype('int32')}))
- res = table_obj.output(
+ res, extra_result = table_obj.output(
["c1 + c2"]).filter("c1 = 3").to_df()
pd.testing.assert_frame_equal(res, pd.DataFrame({'(c1 + c2)': (6,)})
- .astype({'(c1 + c2)': dtype('int32')}))
+ .astype({'(c1 + c2)': dtype('int32')}))
time.sleep(1)
db_obj_2 = infinity2.get_database("default_db")
table_obj_2 = db_obj_2.get_table("test_select")
- res = table_obj_2.output(["*"]).to_df()
+ res, extra_result = table_obj_2.output(["*"]).to_df()
pd.testing.assert_frame_equal(res, pd.DataFrame({'c1': (-3, -2, -1, 0, 1, 2, 3, -8, -7, -6, 7, 8, 9),
- 'c2': (-3, -2, -1, 0, 1, 2, 3, -8, -7, -6, 7, 8, 9)})
- .astype({'c1': dtype('int32'), 'c2': dtype('int32')}))
+ 'c2': (-3, -2, -1, 0, 1, 2, 3, -8, -7, -6, 7, 8, 9)})
+ .astype({'c1': dtype('int32'), 'c2': dtype('int32')}))
- res = table_obj_2.output(["c1", "c2"]).to_df()
+ res, extra_result = table_obj_2.output(["c1", "c2"]).to_df()
pd.testing.assert_frame_equal(res, pd.DataFrame({'c1': (-3, -2, -1, 0, 1, 2, 3, -8, -7, -6, 7, 8, 9),
- 'c2': (-3, -2, -1, 0, 1, 2, 3, -8, -7, -6, 7, 8, 9)})
- .astype({'c1': dtype('int32'), 'c2': dtype('int32')}))
+ 'c2': (-3, -2, -1, 0, 1, 2, 3, -8, -7, -6, 7, 8, 9)})
+ .astype({'c1': dtype('int32'), 'c2': dtype('int32')}))
- res = table_obj_2.output(
+ res, extra_result = table_obj_2.output(
["c1 + c2"]).filter("c1 = 3").to_df()
pd.testing.assert_frame_equal(res, pd.DataFrame({'(c1 + c2)': (6,)})
- .astype({'(c1 + c2)': dtype('int32')}))
+ .astype({'(c1 + c2)': dtype('int32')}))
res = db_obj.drop_table("test_select", ConflictType.Error)
assert res.error_code == ErrorCode.OK
diff --git a/python/test_cluster/test_single_node.py b/python/test_cluster/test_single_node.py
index 0f8b6ae43f..7a0af66afb 100644
--- a/python/test_cluster/test_single_node.py
+++ b/python/test_cluster/test_single_node.py
@@ -81,7 +81,7 @@ def test_standalone2admin2leader2admin(cluster: InfinityCluster):
test: (standalone, operations)->admin->(leader, operations)->admin
'''
with cluster:
- cluster.add_node("test", "conf/infinity_conf.toml")
+ cluster.add_node("test", "conf/infinity_minio_conf.toml")
test_client = cluster.client("test")
state = instance_state(test_client)
assert test_client.show_current_node().node_role == "standalone"
diff --git a/python/test_cluster/test_tc.py b/python/test_cluster/test_tc.py
index 633b42c5e0..2800be92db 100644
--- a/python/test_cluster/test_tc.py
+++ b/python/test_cluster/test_tc.py
@@ -78,7 +78,7 @@ def test_tc1(cluster: InfinityCluster):
}
).astype({"c1": dtype("int32"), "c2": dtype("object")})
- res = table1.output(["*"]).to_df()
+ res, extra_result = table1.output(["*"]).to_df()
pd.testing.assert_frame_equal(res, res_gt)
time.sleep(1)
@@ -87,7 +87,7 @@ def test_tc1(cluster: InfinityCluster):
infinity2 = cluster.client("node2")
db2 = infinity2.get_database("default_db")
table2 = db2.get_table(table_name)
- res = table2.output(["*"]).to_df()
+ res, extra_result = table2.output(["*"]).to_df()
pd.testing.assert_frame_equal(res, res_gt)
try:
@@ -154,12 +154,12 @@ def test_tc1(cluster: InfinityCluster):
}
).astype({"c1": dtype("int32"), "c2": dtype("object")})
- res = table1.output(["*"]).to_df()
+ res, extra_result = table1.output(["*"]).to_df()
pd.testing.assert_frame_equal(res, res_gt)
db2 = infinity2.get_database("default_db")
table2 = db2.get_table(table_name)
- res = table2.output(["*"]).to_df()
+ res, extra_result = table2.output(["*"]).to_df()
pd.testing.assert_frame_equal(res, res_gt)
cluster.set_admin("node2")
@@ -185,9 +185,9 @@ def test_tc1(cluster: InfinityCluster):
assert (res.node_role == "leader")
assert (res.node_status == "alive")
- res = table1.output(["*"]).to_df()
+ res, extra_result = table1.output(["*"]).to_df()
pd.testing.assert_frame_equal(res, res_gt)
- res = table2.output(["*"]).to_df()
+ res, extra_result = table2.output(["*"]).to_df()
pd.testing.assert_frame_equal(res, res_gt)
res = db1.drop_table(table_name, ConflictType.Ignore)
@@ -282,7 +282,7 @@ def test_tc2(cluster: InfinityCluster):
for server in [infinity1, infinity2, infinity3, infinity4]:
db = server.get_database("default_db")
table = db.get_table(table_name)
- res = table.output(["*"]).to_df()
+ res, extra_result = table.output(["*"]).to_df()
pd.testing.assert_frame_equal(res, res_gt)
res = db1.drop_table(table_name)
diff --git a/python/test_cluster/test_update.py b/python/test_cluster/test_update.py
index a4f06f242e..4ece414a5c 100644
--- a/python/test_cluster/test_update.py
+++ b/python/test_cluster/test_update.py
@@ -40,7 +40,7 @@ def test_update(self, cluster: InfinityCluster):
res = table_obj.update("c1 = 1", {"c2": 90, "c3": 900})
assert res.error_code == ErrorCode.OK
- res = table_obj.output(["*"]).to_df()
+ res, extra_result = table_obj.output(["*"]).to_df()
pd.testing.assert_frame_equal(res, pd.DataFrame(
{'c1': (2, 3, 4, 1), 'c2': (20, 30, 40, 90), 'c3': (200, 300, 400, 900)})
.astype({'c1': dtype('int32'), 'c2': dtype('int32'), 'c3': dtype('int32')}))
@@ -48,7 +48,7 @@ def test_update(self, cluster: InfinityCluster):
time.sleep(1)
db_obj_2 = infinity2.get_database("default_db")
table_obj_2 = db_obj_2.get_table("test_update")
- res = table_obj_2.output(["*"]).to_df()
+ res, extra_result = table_obj_2.output(["*"]).to_df()
pd.testing.assert_frame_equal(res, pd.DataFrame(
{'c1': (2, 3, 4, 1), 'c2': (20, 30, 40, 90), 'c3': (200, 300, 400, 900)})
.astype({'c1': dtype('int32'), 'c2': dtype('int32'), 'c3': dtype('int32')}))
diff --git a/python/test_pysdk/test_alter.py b/python/test_pysdk/test_alter.py
index be52eb233e..8892313068 100644
--- a/python/test_pysdk/test_alter.py
+++ b/python/test_pysdk/test_alter.py
@@ -54,12 +54,12 @@ def test_simple_add_columns(self):
res = table_obj.insert([{"c1": 1, "c2": 2}])
assert res.error_code == infinity.ErrorCode.OK
- res = table_obj.output(["*"]).to_df()
+ res, extra_result = table_obj.output(["*"]).to_df()
pd.testing.assert_frame_equal(
res,
pd.DataFrame({"c1": [1], "c2": [2]}).astype(
{"c1": dtype("int32"), "c2": dtype("int32")}
- ),
+ )
)
res = table_obj.add_columns({"c2": {"type": "varchar", "default": "default"}})
@@ -71,17 +71,17 @@ def test_simple_add_columns(self):
res = table_obj.add_columns({"c3": {"type": "varchar", "default": "default"}})
assert res.error_code == infinity.ErrorCode.OK
- res = table_obj.output(["*"]).to_df()
+ res, extra_result = table_obj.output(["*"]).to_df()
pd.testing.assert_frame_equal(
res,
pd.DataFrame({"c1": [1], "c2": [2], "c3": ["default"]}).astype(
{"c1": dtype("int32"), "c2": dtype("int32"), "c3": dtype("object")}
- ),
+ )
)
table_obj.insert([{"c1": 2, "c2": 3, "c3": "test"}])
- res = table_obj.output(["*"]).to_df()
+ res, extra_result = table_obj.output(["*"]).to_df()
pd.testing.assert_frame_equal(
res,
pd.DataFrame(
@@ -110,7 +110,7 @@ def test_simple_drop_columns(self):
res = table_obj.insert([{"c1": 1, "c2": 2, "c3": "test"}])
assert res.error_code == infinity.ErrorCode.OK
- res = table_obj.output(["*"]).to_df()
+ res, extra_result = table_obj.output(["*"]).to_df()
pd.testing.assert_frame_equal(
res,
pd.DataFrame({"c1": [1], "c2": [2], "c3": ["test"]}).astype(
@@ -124,7 +124,7 @@ def test_simple_drop_columns(self):
res = table_obj.drop_columns("c2")
assert res.error_code == infinity.ErrorCode.OK
- res = table_obj.output(["*"]).to_df()
+ res, extra_result = table_obj.output(["*"]).to_df()
pd.testing.assert_frame_equal(
res,
pd.DataFrame({"c1": [1], "c3": ["test"]}).astype(
@@ -134,7 +134,7 @@ def test_simple_drop_columns(self):
table_obj.insert([{"c1": 2, "c3": "test2"}])
- res = table_obj.output(["*"]).to_df()
+ res, extra_result = table_obj.output(["*"]).to_df()
pd.testing.assert_frame_equal(
res,
pd.DataFrame({"c1": [1, 2], "c3": ["test", "test2"]}).astype(
@@ -187,7 +187,7 @@ def test_insert_after_drop_columns(self):
]
)
- result = table_obj.output(["*"]).to_df()
+ result, extra_result = table_obj.output(["*"]).to_df()
print(result)
pd.testing.assert_frame_equal(
result,
@@ -252,7 +252,7 @@ def test_add_drop_column_with_index(self):
res = table_obj.add_columns({"c2": {"type": "varchar", "default": "test"}})
assert res.error_code == infinity.ErrorCode.OK
- res = table_obj.output(["*"]).to_df()
+ res, extra_result = table_obj.output(["*"]).to_df()
pd.testing.assert_frame_equal(
res,
pd.DataFrame({"c1": [1], "c3": ["test"], "c2": ["test"]}).astype(
@@ -262,7 +262,7 @@ def test_add_drop_column_with_index(self):
table_obj.insert([{"c1": 1, "c2": "t1", "c3": "t2"}])
- res = table_obj.output(["*"]).to_df()
+ res, extra_result = table_obj.output(["*"]).to_df()
pd.testing.assert_frame_equal(
res,
pd.DataFrame(
diff --git a/python/test_pysdk/test_basic.py b/python/test_pysdk/test_basic.py
index 409fa4415d..143728138e 100644
--- a/python/test_pysdk/test_basic.py
+++ b/python/test_pysdk/test_basic.py
@@ -175,12 +175,12 @@ def test_basic(self, check_data ,suffix):
[{"c1": 1, "c2": 1.1}, {"c1": 2, "c2": 2.2}])
assert res.error_code == ErrorCode.OK
# search
- res = table_obj.output(["c1 + 0.1"]).to_df()
+ res, extra_result = table_obj.output(["c1 + 0.1"]).to_df()
print(res)
pd.testing.assert_frame_equal(res, pd.DataFrame({'(c1 + 0.1)': (1.1, 2.1)}).astype(
{'(c1 + 0.1)': dtype('float64')}))
- res = table_obj.output(
+ res, extra_result = table_obj.output(
["*"]).filter("c1 > 1").to_df()
print(res)
pd.testing.assert_frame_equal(res, pd.DataFrame({'c1': (2,), 'c2': (2.2,)}).astype(
@@ -237,7 +237,7 @@ def test_basic(self, check_data ,suffix):
res = export_table_obj.import_data(common_values.TEST_TMP_DIR + suffix + test_export_csv_file)
assert res.error_code == ErrorCode.OK
- res = table_obj.output(["c1"]).filter("c1 > 1").to_df()
+ res, extra_result = table_obj.output(["c1"]).filter("c1 > 1").to_df()
print(res)
res = db_obj.drop_table("my_table_export"+suffix)
@@ -249,7 +249,7 @@ def test_basic(self, check_data ,suffix):
res = export_table_obj.import_data(common_values.TEST_TMP_DIR + suffix + test_export_jsonl_file,
import_options={"file_type": "jsonl"})
assert res.error_code == ErrorCode.OK
- res = table_obj.output(["c1"]).filter("c1 > 1").to_df()
+ res, extra_result = table_obj.output(["c1"]).filter("c1 > 1").to_df()
print(res)
res = db_obj.drop_table("my_table_export"+suffix)
assert res.error_code == ErrorCode.OK
@@ -261,7 +261,7 @@ def test_basic(self, check_data ,suffix):
res = export_table_obj.import_data(common_values.TEST_TMP_DIR + suffix + test_export_csv_file,
import_options={"file_type": "csv"})
assert res.error_code == ErrorCode.OK
- res = table_obj.output(["c1"]).filter("c1 > 1").to_df()
+ res, extra_result = table_obj.output(["c1"]).filter("c1 > 1").to_df()
print(res)
res = db_obj.drop_table("my_table_export"+suffix)
assert res.error_code == ErrorCode.OK
@@ -272,7 +272,7 @@ def test_basic(self, check_data ,suffix):
os.remove(common_values.TEST_TMP_DIR + suffix + test_export_jsonl_file_part)
# search
- res = table_obj.output(
+ res, extra_result = table_obj.output(
["c1"]).filter("c1 > 1").to_df()
print(res)
res = db_obj.drop_table("my_table4"+suffix)
diff --git a/python/test_pysdk/test_convert.py b/python/test_pysdk/test_convert.py
index 1c4b4803f7..9ed5c7ef35 100644
--- a/python/test_pysdk/test_convert.py
+++ b/python/test_pysdk/test_convert.py
@@ -49,11 +49,11 @@ def test_to_pl(self, suffix):
table_obj = db_obj.get_table("test_to_pl"+suffix)
table_obj.insert([{"c1": 1, "c2": 2}])
print()
- res = table_obj.output(["c1", "c2"]).to_pl()
+ res, extra_result = table_obj.output(["c1", "c2"]).to_pl()
print(res)
- res = table_obj.output(["c1", "c1"]).to_pl()
+ res, extra_result = table_obj.output(["c1", "c1"]).to_pl()
print(res)
- res = table_obj.output(["c1", "c2", "c1"]).to_pl()
+ res, extra_result = table_obj.output(["c1", "c2", "c1"]).to_pl()
print(res)
db_obj.drop_table("test_to_pl"+suffix, ConflictType.Error)
def test_to_pa(self, suffix):
@@ -65,11 +65,11 @@ def test_to_pa(self, suffix):
table_obj = db_obj.get_table("test_to_pa"+suffix)
table_obj.insert([{"c1": 1, "c2": 2.0}])
print()
- res = table_obj.output(["c1", "c2"]).to_arrow()
+ res, extra_result = table_obj.output(["c1", "c2"]).to_arrow()
print(res)
- res = table_obj.output(["c1", "c1"]).to_arrow()
+ res, extra_result = table_obj.output(["c1", "c1"]).to_arrow()
print(res)
- res = table_obj.output(["c1", "c2", "c1"]).to_arrow()
+ res, extra_result = table_obj.output(["c1", "c2", "c1"]).to_arrow()
print(res)
db_obj.drop_table("test_to_pa"+suffix, ConflictType.Error)
def test_to_df(self, suffix):
@@ -81,11 +81,11 @@ def test_to_df(self, suffix):
table_obj = db_obj.get_table("test_to_df"+suffix)
table_obj.insert([{"c1": 1, "c2": 2.0}])
print()
- res = table_obj.output(["c1", "c2"]).to_df()
+ res, extra_result = table_obj.output(["c1", "c2"]).to_df()
print(res)
- res = table_obj.output(["c1", "c1"]).to_df()
+ res, extra_result = table_obj.output(["c1", "c1"]).to_df()
print(res)
- res = table_obj.output(["c1", "c2", "c1"]).to_df()
+ res, extra_result = table_obj.output(["c1", "c2", "c1"]).to_df()
print(res)
db_obj.drop_table("test_to_df"+suffix, ConflictType.Error)
@@ -102,8 +102,8 @@ def test_without_output_select_list(self, suffix):
with pytest.raises(InfinityException) as e:
insert_res_df = table_obj.output([]).to_df()
- insert_res_arrow = table_obj.output([]).to_arrow()
- insert_res_pl = table_obj.output([]).to_pl()
+ insert_res_arrow, extra_result = table_obj.output([]).to_arrow()
+ insert_res_pl, extra_result = table_obj.output([]).to_pl()
print(insert_res_df, insert_res_arrow, insert_res_pl)
assert e.value.args[0] == ErrorCode.EMPTY_SELECT_FIELDS
@@ -129,7 +129,7 @@ def test_convert_test_with_valid_select_list_output(self, condition_list, suffix
{"c1": 1000, "c2": 2.0},
{"c1": 10000, "c2": 2.0}])
- insert_res_df = table_obj.output(["c1", condition_list]).to_pl()
+ insert_res_df, extra_result = table_obj.output(["c1", condition_list]).to_pl()
print(insert_res_df)
db_obj.drop_table("test_with_valid_select_list_output"+suffix, ConflictType.Error)
@@ -151,7 +151,7 @@ def test_convert_test_with_invalid_select_list_output(self, condition_list, suff
{"c1": 10000, "c2": 2.0}])
with pytest.raises(Exception):
- insert_res_df = table_obj.output(["c1", condition_list]).to_pl()
+ insert_res_df, extra_result = table_obj.output(["c1", condition_list]).to_pl()
print(insert_res_df)
db_obj.drop_table("test_with_invalid_select_list_output"+suffix, ConflictType.Error)
@@ -180,7 +180,7 @@ def test_convert_test_output_with_valid_filter_function(self, filter_list, suffi
{"c1": 1000, "c2": 2.0},
{"c1": 10000, "c2": 2.0}])
# TODO add more filter function
- insert_res_df = InfinityThriftQueryBuilder(table_obj).output(["*"]).filter(filter_list).to_pl()
+ insert_res_df, extra_result = InfinityThriftQueryBuilder(table_obj).output(["*"]).filter(filter_list).to_pl()
print(str(insert_res_df))
db_obj.drop_table("test_output_with_valid_filter_function"+suffix, ConflictType.Error)
@@ -209,7 +209,7 @@ def test_convert_test_output_with_invalid_filter_function(self, filter_list, suf
{"c1": 10000, "c2": 2.0}])
# TODO add more filter function
with pytest.raises(Exception) as e:
- insert_res_df = InfinityThriftQueryBuilder(table_obj).output(["*"]).filter(filter_list).to_pl()
+ insert_res_df, extra_result = InfinityThriftQueryBuilder(table_obj).output(["*"]).filter(filter_list).to_pl()
print(str(insert_res_df))
print(e.type)
diff --git a/python/test_pysdk/test_delete.py b/python/test_pysdk/test_delete.py
index 45e0e29d2a..ddf99d3829 100644
--- a/python/test_pysdk/test_delete.py
+++ b/python/test_pysdk/test_delete.py
@@ -100,7 +100,7 @@ def test_delete(self, suffix):
assert res.error_code == ErrorCode.OK
assert res.deleted_rows == 1
- res = table_obj.output(["*"]).to_df()
+ res, extra_result = table_obj.output(["*"]).to_df()
pd.testing.assert_frame_equal(res, pd.DataFrame({'c1': (2, 3, 4), 'c2': (20, 30, 40), 'c3': (200, 300, 400)})
.astype({'c1': dtype('int32'), 'c2': dtype('int32'), 'c3': dtype('int32')}))
@@ -108,7 +108,7 @@ def test_delete(self, suffix):
assert res.error_code == ErrorCode.OK
assert res.deleted_rows == 3
- res = table_obj.output(["*"]).to_df()
+ res, extra_result = table_obj.output(["*"]).to_df()
pd.testing.assert_frame_equal(res, pd.DataFrame({'c1': (), 'c2': (), 'c3': ()})
.astype({'c1': dtype('int32'), 'c2': dtype('int32'), 'c3': dtype('int32')}))
@@ -192,7 +192,7 @@ def test_delete_table_no_rows_met_condition(self,suffix):
except Exception as e:
print(e)
- res = table_obj.output(["*"]).to_df()
+ res, extra_result = table_obj.output(["*"]).to_df()
print("{}:{}".format(common_values.types_array[i], res))
assert tb
@@ -207,7 +207,7 @@ def test_delete_table_with_one_block(self, suffix):
# insert
values = [{"c1": 1} for _ in range(8192)]
table_obj.insert(values)
- insert_res = table_obj.output(["*"]).to_df()
+ insert_res, extra_result = table_obj.output(["*"]).to_df()
print(insert_res)
# delete
@@ -215,7 +215,7 @@ def test_delete_table_with_one_block(self, suffix):
assert res.error_code == ErrorCode.OK
assert res.deleted_rows == 8192
- delete_res = table_obj.output(["*"]).to_df()
+ delete_res, extra_result = table_obj.output(["*"]).to_df()
print(delete_res)
db_obj.drop_table("test_delete_table_with_one_block"+suffix, ConflictType.Error)
@@ -230,13 +230,13 @@ def test_delete_table_with_one_segment(self, suffix):
for i in range(1024):
values = [{"c1": i} for _ in range(10)]
table_obj.insert(values)
- insert_res = table_obj.output(["*"]).to_df()
+ insert_res, extra_result = table_obj.output(["*"]).to_df()
print(insert_res)
# delete
for i in range(1024):
table_obj.delete("c1 = " + str(i))
- delete_res = table_obj.output(["*"]).to_df()
+ delete_res, extra_result = table_obj.output(["*"]).to_df()
db_obj.drop_table("test_delete_table_with_one_segment"+suffix, ConflictType.Error)
print(delete_res)
@@ -250,7 +250,7 @@ def test_select_before_after_delete(self, suffix):
for i in range(10):
values = [{"c1": i} for _ in range(10)]
table_obj.insert(values)
- insert_res = table_obj.output(["*"]).to_df()
+ insert_res, extra_result = table_obj.output(["*"]).to_df()
print(insert_res)
# delete
@@ -258,7 +258,7 @@ def test_select_before_after_delete(self, suffix):
assert res.error_code == ErrorCode.OK
assert res.deleted_rows == 10
- delete_res = table_obj.output(["*"]).to_df()
+ delete_res, extra_result = table_obj.output(["*"]).to_df()
print(delete_res)
db_obj.drop_table("test_select_before_after_delete"+suffix, ConflictType.Error)
@@ -277,7 +277,7 @@ def test_delete_insert_data(self, suffix):
assert res.error_code == ErrorCode.OK
assert res.deleted_rows == 10
- delete_res = table_obj.output(["*"]).to_df()
+ delete_res, extra_result = table_obj.output(["*"]).to_df()
print(delete_res)
db_obj.drop_table("test_delete_insert_data"+suffix, ConflictType.Error)
@@ -301,7 +301,7 @@ def test_delete_inserted_long_before_data(self, suffix):
assert res.error_code == ErrorCode.OK
assert res.deleted_rows == 5
- delete_res = table_obj.output(["*"]).to_df()
+ delete_res, extra_result = table_obj.output(["*"]).to_df()
print(delete_res)
db_obj.drop_table("test_delete_inserted_long_before_data"+suffix, ConflictType.Error)
@@ -346,11 +346,11 @@ def test_various_expression_in_where_clause(self, column_types, column_types_exa
try:
table_obj.insert(values)
- insert_res = table_obj.output(["*"]).to_df()
+ insert_res, extra_result = table_obj.output(["*"]).to_df()
print(insert_res)
table_obj.delete("c1 = " + str(column_types_example))
- delete_res = table_obj.output(["*"]).to_df()
+ delete_res, extra_result = table_obj.output(["*"]).to_df()
print(delete_res)
except Exception as e:
print(e)
@@ -368,7 +368,7 @@ def test_delete_one_block_without_expression(self, suffix):
# insert
values = [{"c1": 1} for _ in range(8192)]
table_obj.insert(values)
- insert_res = table_obj.output(["*"]).to_df()
+ insert_res, extra_result = table_obj.output(["*"]).to_df()
print(insert_res)
# delete
@@ -376,7 +376,7 @@ def test_delete_one_block_without_expression(self, suffix):
assert res.error_code == ErrorCode.OK
assert res.deleted_rows == 8192
- delete_res = table_obj.output(["*"]).to_df()
+ delete_res, extra_result = table_obj.output(["*"]).to_df()
print(delete_res)
res = db_obj.drop_table("test_delete_one_block_without_expression"+suffix, ConflictType.Error)
assert res.error_code == ErrorCode.OK
@@ -392,7 +392,7 @@ def test_delete_one_segment_without_expression(self,suffix):
for i in range(1024):
values = [{"c1": i} for _ in range(10)]
table_obj.insert(values)
- insert_res = table_obj.output(["*"]).to_df()
+ insert_res, extra_result = table_obj.output(["*"]).to_df()
print(insert_res)
# delete
@@ -400,7 +400,7 @@ def test_delete_one_segment_without_expression(self,suffix):
assert res.error_code == ErrorCode.OK
assert res.deleted_rows == 10240
- delete_res = table_obj.output(["*"]).to_df()
+ delete_res, extra_result = table_obj.output(["*"]).to_df()
print(delete_res)
db_obj.drop_table("test_delete_one_segment_without_expression"+suffix, ConflictType.Error)
@@ -424,14 +424,14 @@ def test_filter_with_valid_expression(self, filter_list, suffix):
for i in range(10):
values = [{"c1": i, "c2": 3.0} for _ in range(10)]
table_obj.insert(values)
- insert_res = table_obj.output(["*"]).to_df()
+ insert_res, extra_result = table_obj.output(["*"]).to_df()
print(insert_res)
# delete
res = table_obj.delete(filter_list)
assert res.error_code == ErrorCode.OK
- delete_res = table_obj.output(["*"]).to_df()
+ delete_res, extra_result = table_obj.output(["*"]).to_df()
print(delete_res)
db_obj.drop_table("test_filter_expression"+suffix, ConflictType.Error)
@@ -455,13 +455,13 @@ def test_filter_with_invalid_expression(self, filter_list, suffix):
for i in range(10):
values = [{"c1": i, "c2": 3.0} for _ in range(10)]
table_obj.insert(values)
- insert_res = table_obj.output(["*"]).to_df()
+ insert_res, extra_result = table_obj.output(["*"]).to_df()
print(insert_res)
# delete
# TODO: Detailed error information check
with pytest.raises(Exception):
table_obj.delete(filter_list)
- delete_res = table_obj.output(["*"]).to_df()
+ delete_res, extra_result = table_obj.output(["*"]).to_df()
print(delete_res)
db_obj.drop_table("test_filter_expression"+suffix, ConflictType.Error)
\ No newline at end of file
diff --git a/python/test_pysdk/test_export.py b/python/test_pysdk/test_export.py
index d7ec702b97..6eb599b627 100644
--- a/python/test_pysdk/test_export.py
+++ b/python/test_pysdk/test_export.py
@@ -74,7 +74,7 @@ def test_export_csv(self, suffix):
table_obj = db_obj.create_table("test_export_csv"+suffix, {"doctitle": {"type": "varchar"}, "docdate": {"type": "varchar"}, "body": {"type": "varchar"}, "num": {"type": "integer"}, "vec": {"type": "vector, 4, float"}})
res = table_obj.import_data(test_csv_dir, import_options={"file_type": "csv", "delimiter" : "\t"})
assert res.error_code == ErrorCode.OK
- res = table_obj.output(["count(*)"]).to_pl()
+ res, extra_result = table_obj.output(["count(*)"]).to_pl()
print(res)
test_export_csv_file_path = common_values.TEST_TMP_DIR + suffix +"test_export_csv.csv"
@@ -124,7 +124,7 @@ def test_export_jsonl(self, suffix):
table_obj = db_obj.create_table("test_export_jsonl"+suffix, {"doctitle": {"type": "varchar"}, "docdate": {"type": "varchar"}, "body": {"type": "varchar"}, "num": {"type": "integer"}, "vec": {"type": "vector, 4, float"}})
res = table_obj.import_data(test_csv_dir, import_options={"file_type": "csv", "delimiter" : "\t"})
assert res.error_code == ErrorCode.OK
- res = table_obj.output(["count(*)"]).to_pl()
+ res, extra_result = table_obj.output(["count(*)"]).to_pl()
print(res)
test_export_jsonl_file_path = common_values.TEST_TMP_DIR + suffix + "test_export_jsonl.jsonl"
@@ -174,7 +174,7 @@ def test_export_fvecs(self, suffix):
table_obj = db_obj.create_table("test_export_fvecs"+suffix, {"doctitle": {"type": "varchar"}, "docdate": {"type": "varchar"}, "body": {"type": "varchar"}, "num": {"type": "integer"}, "vec": {"type": "vector, 4, float"}})
res = table_obj.import_data(test_csv_dir, import_options={"file_type": "csv", "delimiter" : "\t"})
assert res.error_code == ErrorCode.OK
- res = table_obj.output(["count(*)"]).to_pl()
+ res, extra_result = table_obj.output(["count(*)"]).to_pl()
print(res)
test_export_fvecs_file_path = common_values.TEST_TMP_DIR + suffix + "test_export_fvecs.fvecs"
diff --git a/python/test_pysdk/test_index.py b/python/test_pysdk/test_index.py
index e4fb2eb796..6be7d62e82 100644
--- a/python/test_pysdk/test_index.py
+++ b/python/test_pysdk/test_index.py
@@ -166,7 +166,7 @@ def test_drop_index_fulltext(self, suffix):
res = table_obj.create_index("my_index", index.IndexInfo("body", index.IndexType.FullText), ConflictType.Error)
assert res.error_code == ErrorCode.OK
# fulltext search when index is created: expect success
- res = table_obj.output(["doctitle", "_score"]).match_text("body^5", "harmful chemical", 3).to_pl()
+ res, extra_result = table_obj.output(["doctitle", "_score"]).match_text("body^5", "harmful chemical", 3).to_pl()
print(res)
res = table_obj.drop_index("my_index")
assert res.error_code == ErrorCode.OK
@@ -640,13 +640,13 @@ def test_insert_data_fulltext_index_search(self, file_format, suffix):
"docdate": data["docdate"][i], "body": data["body"][i]})
table_obj.insert(value)
time.sleep(5)
- res = table_obj.output(["doctitle", "docdate", "_row_id", "_score"]).match_text(
+ res, extra_result = table_obj.output(["doctitle", "docdate", "_row_id", "_score"]).match_text(
"body^5", "harmful chemical", 3).to_pl()
assert not res.is_empty()
print(res)
# Check if highlight work
- res = table_obj.output(["doctitle", "docdate", "body", "_row_id", "_score"]).highlight(["body"]).match_text(
+ res, extra_result = table_obj.output(["doctitle", "docdate", "body", "_row_id", "_score"]).highlight(["body"]).match_text(
"body^5", "harmful chemical", 3).to_pl()
assert not res.is_empty()
for body in res["body"].to_list():
@@ -700,12 +700,12 @@ def test_empty_fulltext_index(self, file_format, suffix):
index.IndexType.FullText))
assert res.error_code == ErrorCode.OK
- res = table_obj.output(["doctitle", "docdate", "_row_id", "_score"]).match_text(
+ res, extra_result = table_obj.output(["doctitle", "docdate", "_row_id", "_score"]).match_text(
"body^5", "harmful chemical", 3).to_pl()
assert not res.is_empty()
print(res)
- res = table_obj.output(["doctitle", "docdate", "body2", "_row_id", "_score"]).match_text(
+ res, extra_result = table_obj.output(["doctitle", "docdate", "body2", "_row_id", "_score"]).match_text(
"body2^5", "harmful chemical", 3).to_pl()
assert res.is_empty()
print(res)
@@ -750,12 +750,12 @@ def test_create_index_on_deleted_table(self, suffix):
embedding_data = [i for i in range(128)]
value = [{"c1": embedding_data} for _ in range(1024)]
table_obj.insert(value)
- res = table_obj.output(["*"]).to_pl()
+ res, extra_result = table_obj.output(["*"]).to_pl()
print(res)
# delete data
table_obj.delete()
- res = table_obj.output(["*"]).to_pl()
+ res, extra_result = table_obj.output(["*"]).to_pl()
print(res)
# create index
@@ -790,7 +790,7 @@ def test_create_index_on_update_table(self, suffix):
embedding_data = [i for i in range(128)]
value = [{"c1": embedding_data, "c2": i} for i in range(10)]
table_obj.insert(value)
- res = table_obj.output(["*"]).to_pl()
+ res, extra_result = table_obj.output(["*"]).to_pl()
print(res)
# update data
embedding_data = [i + 0.1 * i for i in range(128)]
@@ -799,7 +799,7 @@ def test_create_index_on_update_table(self, suffix):
value = [{"c1": embedding_data} for _ in range(10)]
for i in range(10):
table_obj.update("c2 = " + str(i), {"c1": embedding_data})
- res = table_obj.output(["*"]).to_pl()
+ res, extra_result = table_obj.output(["*"]).to_pl()
print(res)
res = db_obj.drop_table(
"test_create_index_on_update_table" + suffix, ConflictType.Error)
diff --git a/python/test_pysdk/test_insert.py b/python/test_pysdk/test_insert.py
index 143384eb13..ffcf908186 100644
--- a/python/test_pysdk/test_insert.py
+++ b/python/test_pysdk/test_insert.py
@@ -101,7 +101,7 @@ def _test_insert_basic(self, suffix):
res = table_obj.insert([{"c2": 3, "c1": 3}, {"c1": 4, "c2": 4}])
assert res.error_code == ErrorCode.OK
- res = table_obj.output(["*"]).to_df()
+ res, extra_result = table_obj.output(["*"]).to_df()
pd.testing.assert_frame_equal(res, pd.DataFrame({'c1': (0, 1, 2, 3, 4), 'c2': (0, 1, 2, 3, 4)})
.astype({'c1': dtype('int32'), 'c2': dtype('int32')}))
@@ -121,7 +121,7 @@ def _test_insert_bool(self, suffix):
assert table_obj
res = table_obj.insert([{"c1": -1, "c2": True}, {"c1": 2, "c2": False}])
assert res.error_code == ErrorCode.OK
- res = table_obj.output(["*"]).to_df()
+ res, extra_result = table_obj.output(["*"]).to_df()
print(res)
pd.testing.assert_frame_equal(res, pd.DataFrame({'c1': (-1, 2), 'c2': (True, False)}).astype(
{'c1': dtype('float32'), 'c2': dtype('bool')}))
@@ -145,7 +145,7 @@ def _test_insert_bool(self, suffix):
assert table_instance
res = table_instance.insert({"c1": 1, "c7": "Tom"})
assert res.error_code == ErrorCode.OK
- res = table_instance.output(["*"]).to_df()
+ res, extra_result = table_instance.output(["*"]).to_df()
print(res)
pd.testing.assert_frame_equal(res, pd.DataFrame(
{'c1': (1,), 'c2': (0,), 'c3': (0,), 'c4': (0,), 'c5': (0,), 'c6': (0,), 'c7': ("Tom",), 'c8': (1.0,),
@@ -171,7 +171,7 @@ def _test_insert_float16_bfloat16(self, suffix):
[{"c1": -1, "c2": 1, "c3": -1}, {"c1": 2, "c2": -2, "c3": 2}, {"c1": -3, "c2": 3, "c3": -3},
{"c1": 4, "c2": -4, "c3": 4}, {"c1": -5, "c2": 5, "c3": -5}])
assert res.error_code == ErrorCode.OK
- res = table_obj.output(["*"]).to_df()
+ res, extra_result = table_obj.output(["*"]).to_df()
print(res)
pd.testing.assert_frame_equal(res, pd.DataFrame(
{'c1': (-1, 2, -3, 4, -5), 'c2': (1, -2, 3, -4, 5), 'c3': (-1, 2, -3, 4, -5)}).astype(
@@ -197,7 +197,7 @@ def _test_insert_varchar(self, suffix):
res = table_obj.insert([{"c1": "^789$ test insert varchar"}])
assert res.error_code == ErrorCode.OK
- res = table_obj.output(["*"]).to_df()
+ res, extra_result = table_obj.output(["*"]).to_df()
pd.testing.assert_frame_equal(res, pd.DataFrame({'c1': ("test_insert_varchar", " test insert varchar ",
"^789$ test insert varchar")}))
res = db_obj.drop_table("test_insert_varchar"+suffix, ConflictType.Error)
@@ -217,7 +217,7 @@ def _test_insert_big_varchar(self, suffix):
res = table_obj.insert([{"c1": "test_insert_big_varchar" * 1000}])
assert res.error_code == ErrorCode.OK
- res = table_obj.output(["*"]).to_df()
+ res, extra_result = table_obj.output(["*"]).to_df()
pd.testing.assert_frame_equal(res, pd.DataFrame(
{'c1': ["test_insert_big_varchar" * 1000] * 100}))
@@ -242,13 +242,13 @@ def _test_insert_embedding(self, suffix):
assert res.error_code == ErrorCode.OK
res = table_obj.insert([{"c1": [-7, -8, -9]}])
assert res.error_code == ErrorCode.OK
- res = table_obj.output(["*"]).to_df()
+ res, extra_result = table_obj.output(["*"]).to_df()
pd.testing.assert_frame_equal(res, pd.DataFrame(
{'c1': ([1, 2, 3], [4, 5, 6], [7, 8, 9], [-7, -8, -9])}))
res = table_obj.insert([{"c1": [1, 2, 3]}, {"c1": [4, 5, 6]}, {
"c1": [7, 8, 9]}, {"c1": [-7, -8, -9]}])
assert res.error_code == ErrorCode.OK
- res = table_obj.output(["*"]).to_df()
+ res, extra_result = table_obj.output(["*"]).to_df()
pd.testing.assert_frame_equal(res, pd.DataFrame({'c1': ([1, 2, 3], [4, 5, 6], [7, 8, 9], [-7, -8, -9],
[1, 2, 3], [4, 5, 6], [7, 8, 9], [-7, -8, -9])}))
@@ -269,7 +269,7 @@ def _test_insert_embedding(self, suffix):
res = table_obj.insert([{"c1": embedding_insert_float[3]}])
assert res.error_code == ErrorCode.OK
- res = table_obj.output(["*"]).to_df()
+ res, extra_result = table_obj.output(["*"]).to_df()
pd.testing.assert_frame_equal(res, pd.DataFrame(
{'c1': embedding_insert_float}))
@@ -288,7 +288,7 @@ def _test_insert_embedding(self, suffix):
assert res.error_code == ErrorCode.OK
res = table_obj.insert([{"c1": embedding_insert_float[3]}])
assert res.error_code == ErrorCode.OK
- res = table_obj.output(["*"]).to_df()
+ res, extra_result = table_obj.output(["*"]).to_df()
print(res)
pd.testing.assert_frame_equal(res, pd.DataFrame(
{'c1': [np.array(x).astype(np.float16).tolist() for x in embedding_insert_float]}))
@@ -307,7 +307,7 @@ def _test_insert_embedding(self, suffix):
assert res.error_code == ErrorCode.OK
res = table_obj.insert([{"c1": embedding_insert_float[3]}])
assert res.error_code == ErrorCode.OK
- res = table_obj.output(["*"]).to_df()
+ res, extra_result = table_obj.output(["*"]).to_df()
print(res)
tmp_bf16 = np.array(embedding_insert_float).astype(' 1").to_df()
+ res, extra_result = table_obj.output(["c1"]).filter("c1 > 1").to_df()
print(res)
res = db_obj.drop_table("test_import"+suffix, ConflictType.Error)
assert res.error_code == ErrorCode.OK
@@ -107,14 +107,14 @@ def test_import_different_file_format_data(self, file_format, check_data, suffix
table_obj = db_obj.create_table("test_import_different_file_format_data"+suffix,
{"c1": {"type": "vector,128,float"}}, ConflictType.Error)
table_obj.import_data(common_values.TEST_TMP_DIR + file_name, {"file_type": file_format})
- res = table_obj.output(["*"]).to_df()
+ res, extra_result = table_obj.output(["*"]).to_df()
print(res)
else:
print(common_values.TEST_DATA_DIR + file_format + "/pysdk_test." + file_format)
table_obj.import_data(
os.getcwd() + common_values.TEST_DATA_DIR + file_format + "/pysdk_test." + file_format,
{"file_type": file_format})
- res = table_obj.output(["*"]).to_df()
+ res, extra_result = table_obj.output(["*"]).to_df()
print(res)
res = db_obj.drop_table("test_import_different_file_format_data"+suffix, ConflictType.Error)
@@ -128,7 +128,7 @@ def test_import_empty_file_fvecs(self, file_format, suffix):
{"c1": {"type": "vector,128,float"}}, ConflictType.Error)
table_obj.import_data(os.getcwd() + common_values.TEST_DATA_DIR + file_format + "/test_empty." + file_format)
- res = table_obj.output(["*"]).to_df()
+ res, extra_result = table_obj.output(["*"]).to_df()
print(res)
db_obj.drop_table("test_import_empty_file_fvecs"+suffix, ConflictType.Error)
@@ -140,7 +140,7 @@ def test_import_empty_file_csv(self, file_format, suffix):
{"c1": {"type": "int"}, "c2": {"type": "vector,3,int"}}, ConflictType.Error)
table_obj.import_data(os.getcwd() + common_values.TEST_DATA_DIR + file_format + "/test_empty." + file_format)
- res = table_obj.output(["*"]).to_df()
+ res, extra_result = table_obj.output(["*"]).to_df()
print(res)
db_obj.drop_table("test_import_empty_file_csv"+suffix, ConflictType.Error)
@@ -152,7 +152,7 @@ def test_import_empty_file_jsonl(self, file_format, suffix):
{"c1": {"type": "int"}, "c2": {"type": "vector,3,int"}}, ConflictType.Error)
table_obj.import_data(os.getcwd() + common_values.TEST_DATA_DIR + file_format + "/test_empty." + file_format)
- res = table_obj.output(["*"]).to_df()
+ res, extra_result = table_obj.output(["*"]).to_df()
print(res)
db_obj.drop_table("test_import_empty_file_jsonl"+suffix, ConflictType.Error)
@@ -170,7 +170,7 @@ def test_import_format_unrecognized_data(self, file_format, suffix):
os.getcwd() + common_values.TEST_DATA_DIR + file_format + "/pysdk_test." + file_format,
{"file_type": file_format})
- res = table_obj.output(["*"]).to_df()
+ res, extra_result = table_obj.output(["*"]).to_df()
print(res)
db_obj.drop_table("test_import_format_unrecognized_data"+suffix, ConflictType.Error)
@@ -206,7 +206,7 @@ def test_csv_with_different_delimiter(self, check_data, delimiter, types, suffix
import_options={
"delimiter": delimiter[1]
})
- res = table_obj.output(["*"]).to_df()
+ res, extra_result = table_obj.output(["*"]).to_df()
print(res)
db_obj.drop_table("test_csv_with_different_delimiter"+suffix, ConflictType.Error)
else:
@@ -235,7 +235,7 @@ def test_csv_with_different_delimiter_more_than_one_character(self, check_data,
table_obj.import_data(common_values.TEST_TMP_DIR + "pysdk_test_" + delimiter + ".csv",
import_options={"delimiter": " "})
- res = table_obj.output(["*"]).to_df()
+ res, extra_result = table_obj.output(["*"]).to_df()
print(res)
db_obj.drop_table("test_csv_with_different_delimiter_more_than_one_character"+suffix, ConflictType.Error)
@@ -251,7 +251,7 @@ def test_import_csv_with_headers(self, check_data, has_header, suffix):
ConflictType.Error)
table_obj.import_data(common_values.TEST_TMP_DIR + "pysdk_test_commas.csv",
import_options={"header": has_header})
- res = table_obj.output(["*"]).to_pl()
+ res, extra_result = table_obj.output(["*"]).to_pl()
print(res)
db_obj.drop_table("test_import_csv_with_headers"+suffix, ConflictType.Error)
@@ -275,7 +275,7 @@ def test_import_fvecs_table_with_more_columns(self, check_data, suffix):
assert e.type == InfinityException
assert e.value.args[0] == ErrorCode.IMPORT_FILE_FORMAT_ERROR
- res = table_obj.output(["*"]).to_df()
+ res, extra_result = table_obj.output(["*"]).to_df()
print(res)
db_obj.drop_table("test_import_fvecs_table_with_more_columns"+suffix, ConflictType.Error)
@@ -298,7 +298,7 @@ def test_import_embedding_with_not_match_definition(self, check_data, types, suf
res = table_obj.import_data(test_csv_dir, import_options={"file_type": "csv"})
assert res.error_code == ErrorCode.OK
- res = table_obj.output(["*"]).to_df()
+ res, extra_result = table_obj.output(["*"]).to_df()
print(res)
db_obj.drop_table("test_import_embedding_with_not_match_definition"+suffix, ConflictType.Error)
@@ -321,7 +321,7 @@ def test_import_embedding_with_dimension_unmatch(self, check_data, types, suffix
assert e.type == InfinityException
assert e.value.args[0] == ErrorCode.IMPORT_FILE_FORMAT_ERROR
- res = table_obj.output(["*"]).to_df()
+ res, extra_result = table_obj.output(["*"]).to_df()
print(res)
db_obj.drop_table("test_import_embedding_with_not_match_definition"+suffix, ConflictType.Error)
@@ -341,7 +341,7 @@ def test_import_embedding_with_unmatched_elem_type(self, check_data, types, suff
test_csv_dir = common_values.TEST_TMP_DIR + "embedding_int_dim3.csv"
res = table_obj.import_data(test_csv_dir, import_options={"file_type": "csv"})
assert res.error_code == ErrorCode.OK
- res = table_obj.output(["*"]).to_df()
+ res, extra_result = table_obj.output(["*"]).to_df()
print(res)
db_obj.drop_table("test_import_embedding_with_not_match_definition"+suffix, ConflictType.Ignore)
@@ -359,7 +359,7 @@ def test_import_varchar_with_not_match_definition(self, check_data, suffix):
res = table_obj.import_data(test_csv_dir)
assert res.error_code == ErrorCode.OK
- res = table_obj.output(["*"]).to_pl()
+ res, extra_result = table_obj.output(["*"]).to_pl()
print(res)
db_obj.drop_table("test_import_varchar_with_not_match_definition"+suffix, ConflictType.Error)
@@ -379,7 +379,7 @@ def test_import_10000_columns(self, check_data, suffix):
res = table_obj.import_data(test_csv_dir)
assert res.error_code == ErrorCode.OK
- res = table_obj.output(["*"]).to_df()
+ res, extra_result = table_obj.output(["*"]).to_df()
print(res)
db_obj.drop_table("test_import_10000_columns"+suffix, ConflictType.Error)
@@ -403,7 +403,7 @@ def test_table_with_not_matched_columns(self, columns, check_data, suffix):
assert e.type == InfinityException
assert e.value.args[0] == ErrorCode.COLUMN_COUNT_MISMATCH or e.value.args[0] == ErrorCode.IMPORT_FILE_FORMAT_ERROR
- res = table_obj.output(["*"]).to_df()
+ res, extra_result = table_obj.output(["*"]).to_df()
print(res)
db_obj.drop_table("test_table_with_not_matched_columns"+suffix, ConflictType.Error)
@@ -423,7 +423,7 @@ def test_import_with_different_size(self, check_data, data_size, suffix):
res = table_obj.import_data(test_csv_dir)
assert res.error_code == ErrorCode.OK
- res = table_obj.output(["count(*)"]).to_pl()
+ res, extra_result = table_obj.output(["count(*)"]).to_pl()
assert res.height == 1 and res.width == 1 and res.item(0, 0) == data_size
db_obj.drop_table("test_import_with_different_size"+suffix, ConflictType.Ignore)
@@ -443,7 +443,7 @@ def test_import_exceeding_rows(self, check_data, suffix):
res = table_obj.import_data(test_csv_dir)
assert res.error_code == ErrorCode.OK
- res = table_obj.output(["count(*)"]).to_pl()
+ res, extra_result = table_obj.output(["count(*)"]).to_pl()
assert res.height == 1 and res.width == 1 and res.item(0, 0) == 1024 * 8192
db_obj.drop_table("test_import_exceeding_rows"+suffix, ConflictType.Error)
@@ -499,7 +499,7 @@ def test_import_jsonl_file_with_default(self, check_data, suffix):
test_csv_dir = common_values.TEST_TMP_DIR + "test_default.jsonl"
res = table_obj.import_data(test_csv_dir, import_options={"file_type": "jsonl"})
assert res.error_code == ErrorCode.OK
- res = table_obj.output(["*"]).to_pl()
+ res, extra_result = table_obj.output(["*"]).to_pl()
print(res)
db_obj.drop_table("test_import_jsonl_file_with_default"+suffix, ConflictType.Error)
@@ -531,7 +531,7 @@ def test_import_csv_file_with_default(self, check_data, suffix):
test_csv_dir = common_values.TEST_TMP_DIR + "pysdk_test_import_default.csv"
res = table_obj.import_data(test_csv_dir, import_options={"file_type": "csv"})
assert res.error_code == ErrorCode.OK
- res = table_obj.output(["*"]).to_pl()
+ res, extra_result = table_obj.output(["*"]).to_pl()
print(res)
db_obj.drop_table("test_import_csv_file_with_default"+suffix, ConflictType.Error)
@@ -566,6 +566,6 @@ def test_import_json_file_with_default(self, check_data, suffix):
test_csv_dir = common_values.TEST_TMP_DIR + "pysdk_test_default.json"
res = table_obj.import_data(test_csv_dir, import_options={"file_type": "json"})
assert res.error_code == ErrorCode.OK
- res = table_obj.output(["*"]).to_pl()
+ res, extra_result = table_obj.output(["*"]).to_pl()
print(res)
db_obj.drop_table("test_import_json_file_with_default"+suffix, ConflictType.Error)
diff --git a/python/test_pysdk/test_query.py b/python/test_pysdk/test_query.py
index 1e2789b830..140ffd12cb 100644
--- a/python/test_pysdk/test_query.py
+++ b/python/test_pysdk/test_query.py
@@ -85,7 +85,7 @@ def test_query(self):
query_builder.match_dense('vec', [3.0] * 5, 'float', 'ip', 2)
query_builder.match_text('body', 'harmful', 2, None)
query_builder.fusion(method='rrf', topn=10, fusion_params=None)
- res = query_builder.to_df()
+ res, extra_result = query_builder.to_df()
print(res)
res = table.drop_index("my_index", ConflictType.Error)
assert res.error_code == ErrorCode.OK
diff --git a/python/test_pysdk/test_select.py b/python/test_pysdk/test_select.py
index 850fd842d7..18c41e4ff8 100644
--- a/python/test_pysdk/test_select.py
+++ b/python/test_pysdk/test_select.py
@@ -11,6 +11,7 @@
from numpy import dtype
from infinity.errors import ErrorCode
from infinity.common import ConflictType, SortType
+
current_dir = os.path.dirname(os.path.abspath(__file__))
parent_dir = os.path.dirname(current_dir)
if parent_dir not in sys.path:
@@ -19,14 +20,17 @@
from common.utils import copy_data
from datetime import date, time, datetime
+
@pytest.fixture(scope="class")
def local_infinity(request):
return request.config.getoption("--local-infinity")
+
@pytest.fixture(scope="class")
def http(request):
return request.config.getoption("--http")
+
@pytest.fixture(scope="class")
def setup_class(request, local_infinity, http):
if local_infinity:
@@ -49,6 +53,7 @@ def setup_class(request, local_infinity, http):
yield
request.cls.infinity_obj.disconnect()
+
@pytest.mark.usefixtures("setup_class")
@pytest.mark.usefixtures("suffix")
class TestInfinity:
@@ -116,9 +121,9 @@ def test_select(self, suffix):
db_obj = self.infinity_obj.get_database("default_db")
# infinity
- db_obj.drop_table("test_select"+suffix, ConflictType.Ignore)
+ db_obj.drop_table("test_select" + suffix, ConflictType.Ignore)
table_obj = db_obj.create_table(
- "test_select"+suffix, {
+ "test_select" + suffix, {
"c1": {"type": "int", "constraints": ["primary key", "not null"]},
"c2": {"type": "int", "constraints": ["not null"]}}, ConflictType.Error)
@@ -134,77 +139,75 @@ def test_select(self, suffix):
{"c1": 9, "c2": 9}])
assert res.error_code == ErrorCode.OK
- res = table_obj.output(["*"]).to_df()
+ res, extra_result = table_obj.output(["*"]).to_df()
pd.testing.assert_frame_equal(res, pd.DataFrame({'c1': (-3, -2, -1, 0, 1, 2, 3, -8, -7, -6, 7, 8, 9),
'c2': (-3, -2, -1, 0, 1, 2, 3, -8, -7, -6, 7, 8, 9)})
.astype({'c1': dtype('int32'), 'c2': dtype('int32')}))
- res = table_obj.output(["c1", "c2"]).to_df()
+ res, extra_result = table_obj.output(["c1", "c2"]).to_df()
pd.testing.assert_frame_equal(res, pd.DataFrame({'c1': (-3, -2, -1, 0, 1, 2, 3, -8, -7, -6, 7, 8, 9),
'c2': (-3, -2, -1, 0, 1, 2, 3, -8, -7, -6, 7, 8, 9)})
.astype({'c1': dtype('int32'), 'c2': dtype('int32')}))
- res = table_obj.output(
+ res, extra_result = table_obj.output(
["c1 + c2"]).filter("c1 = 3").to_df()
pd.testing.assert_frame_equal(res, pd.DataFrame({'(c1 + c2)': (6,)})
.astype({'(c1 + c2)': dtype('int32')}))
- res = table_obj.output(
+ res, extra_result = table_obj.output(
["c1"]).filter("c1 > 2 and c2 < 4").to_df()
pd.testing.assert_frame_equal(res, pd.DataFrame({'c1': (3,)})
.astype({'c1': dtype('int32')}))
- res = table_obj.output(["c2"]).filter(
- "(-7 < c1 or 9 <= c1) and (c1 = 3)").to_df()
+ res, extra_result = table_obj.output(["c2"]).filter("(-7 < c1 or 9 <= c1) and (c1 = 3)").to_df()
pd.testing.assert_frame_equal(res, pd.DataFrame({'c2': (3,)})
.astype({'c2': dtype('int32')}))
- res = table_obj.output(["c2"]).filter(
- "(-8 < c1 and c1 <= -7) or (c1 >= 1 and 2 > c1)").to_df()
+ res, extra_result = table_obj.output(["c2"]).filter("(-8 < c1 and c1 <= -7) or (c1 >= 1 and 2 > c1)").to_df()
pd.testing.assert_frame_equal(res, pd.DataFrame({'c2': (1, -7)})
.astype({'c2': dtype('int32')}))
- res = table_obj.output(["c2"]).filter(
+ res, extra_result = table_obj.output(["c2"]).filter(
"((c1 >= -8 and -4 >= c1) or (c1 >= 0 and 5 > c1)) and ((c1 > 0 and c1 <= 1) or (c1 > -8 and c1 < -6))").to_df()
pd.testing.assert_frame_equal(res, pd.DataFrame({'c2': (1, -7)})
.astype({'c2': dtype('int32')}))
- res = table_obj.output(["c2"]).filter(
+ res, extra_result = table_obj.output(["c2"]).filter(
"(-7 < c1 or 9 <= c1) and (c2 = 3)").to_df()
pd.testing.assert_frame_equal(res, pd.DataFrame({'c2': (3,)})
.astype({'c2': dtype('int32')}))
- res = table_obj.output(["c2"]).filter(
+ res, extra_result = table_obj.output(["c2"]).filter(
"(-8 < c1 and c2 <= -7) or (c1 >= 1 and 2 > c2)").to_df()
pd.testing.assert_frame_equal(res, pd.DataFrame({'c2': (1, -7)})
.astype({'c2': dtype('int32')}))
- res = table_obj.output(["c2"]).filter(
+ res, extra_result = table_obj.output(["c2"]).filter(
"((c2 >= -8 and -4 >= c1) or (c1 >= 0 and 5 > c2)) and ((c2 > 0 and c1 <= 1) or (c1 > -8 and c2 < -6))").to_df()
pd.testing.assert_frame_equal(res, pd.DataFrame({'c2': (1, -7)})
.astype({'c2': dtype('int32')}))
- res = table_obj.output(["c2"]).filter(
+ res, extra_result = table_obj.output(["c2"]).filter(
"(not(c2 < -8 or -4 < c1) or not(c1 < 0 or 5 <= c2)) and not((c2 <= 0 or c1 > 1) and (c1 <= -8 or c2 >= -6))").to_df()
pd.testing.assert_frame_equal(res, pd.DataFrame({'c2': (1, -7)})
.astype({'c2': dtype('int32')}))
- res = table_obj.output(["*"]).filter("c1 in (1, 2, 3)").to_df()
+ res, extra_result = table_obj.output(["*"]).filter("c1 in (1, 2, 3)").to_df()
pd.testing.assert_frame_equal(res, pd.DataFrame({'c1': (1, 2, 3),
'c2': (1, 2, 3)})
.astype({'c1': dtype('int32'), 'c2': dtype('int32')}))
- res = table_obj.output(["*"]).filter("c1 in (1, 2, 3) and c2 in (1, 2, 3)").to_df()
+ res, extra_result = table_obj.output(["*"]).filter("c1 in (1, 2, 3) and c2 in (1, 2, 3)").to_df()
pd.testing.assert_frame_equal(res, pd.DataFrame({'c1': (1, 2, 3),
'c2': (1, 2, 3)})
.astype({'c1': dtype('int32'), 'c2': dtype('int32')}))
- res = table_obj.output(["*"]).filter("c1 not in (1, 2, 3)").to_df()
+ res, extra_result = table_obj.output(["*"]).filter("c1 not in (1, 2, 3)").to_df()
pd.testing.assert_frame_equal(res, pd.DataFrame({'c1': (-3, -2, -1, 0, -8, -7, -6, 7, 8, 9),
'c2': (-3, -2, -1, 0, -8, -7, -6, 7, 8, 9)})
.astype({'c1': dtype('int32'), 'c2': dtype('int32')}))
- res = table_obj.output(["*"]).filter("(c2 + 1) in (8, 9, 10)").to_df()
+ res, extra_result = table_obj.output(["*"]).filter("(c2 + 1) in (8, 9, 10)").to_df()
pd.testing.assert_frame_equal(res, pd.DataFrame({'c1': (7, 8, 9),
'c2': (7, 8, 9)})
.astype({'c1': dtype('int32'), 'c2': dtype('int32')}))
@@ -214,7 +217,7 @@ def test_select(self, suffix):
# 'c2': (-3, -2, -1)})
# .astype({'c1': dtype('int32'), 'c2': dtype('int32')}))
- res = db_obj.drop_table("test_select"+suffix, ConflictType.Error)
+ res = db_obj.drop_table("test_select" + suffix, ConflictType.Error)
assert res.error_code == ErrorCode.OK
def test_select_datetime(self, suffix):
@@ -239,14 +242,14 @@ def test_select_datetime(self, suffix):
"""
db_obj = self.infinity_obj.get_database("default_db")
- db_obj.drop_table("test_select_datetime"+suffix, ConflictType.Ignore)
+ db_obj.drop_table("test_select_datetime" + suffix, ConflictType.Ignore)
table_obj = db_obj.create_table(
- "test_select_datetime"+suffix, {
+ "test_select_datetime" + suffix, {
"c1": {"type": "date"},
"c2": {"type": "time"},
- "c3" : {"type": "datetime"},
- "c4" : {"type" : "timestamp"}},
- ConflictType.Error)
+ "c3": {"type": "datetime"},
+ "c4": {"type": "timestamp"}},
+ ConflictType.Error)
assert table_obj is not None
@@ -260,7 +263,7 @@ def test_select_datetime(self, suffix):
dt_list.append("2024-09-23 20:45:11")
ts_list.append("2024-09-23 20:45:11")
res = table_obj.insert(
- {"c1" : d_list[0], "c2" : t_list[0], "c3" : dt_list[0], "c4" : ts_list[0]}
+ {"c1": d_list[0], "c2": t_list[0], "c3": dt_list[0], "c4": ts_list[0]}
)
assert res.error_code == ErrorCode.OK
@@ -269,7 +272,7 @@ def test_select_datetime(self, suffix):
dt_list.append("2022-05-26 21:44:33")
ts_list.append("2022-05-26 21:44:33")
res = table_obj.insert(
- {"c1" : d_list[1], "c2" : t_list[1], "c3" : dt_list[1], "c4" : ts_list[1]}
+ {"c1": d_list[1], "c2": t_list[1], "c3": dt_list[1], "c4": ts_list[1]}
)
assert res.error_code == ErrorCode.OK
@@ -278,27 +281,31 @@ def test_select_datetime(self, suffix):
dt_list.append("2021-03-04 20:58:59")
ts_list.append("2021-03-04 20:58:59")
res = table_obj.insert(
- {"c1" : d_list[2], "c2" : t_list[2], "c3" : dt_list[2], "c4" : ts_list[2]}
+ {"c1": d_list[2], "c2": t_list[2], "c3": dt_list[2], "c4": ts_list[2]}
)
assert res.error_code == ErrorCode.OK
-
- res = table_obj.output(["*"]).to_pl()
- for i in range(3) :
- assert res.item(i, 0) == d_list[i] and res.item(i, 1) == t_list[i] and res.item(i, 2) == dt_list[i] and res.item(i, 3) == ts_list[i]
- res = table_obj.output(["c1", "c2"]).filter("c1='2024-09-23'").to_pl()
+ res, extra_result = table_obj.output(["*"]).to_pl()
+ for i in range(3):
+ assert res.item(i, 0) == d_list[i] and res.item(i, 1) == t_list[i] and res.item(i, 2) == dt_list[
+ i] and res.item(i, 3) == ts_list[i]
+
+ res, extra_result = table_obj.output(["c1", "c2"]).filter("c1='2024-09-23'").to_pl()
assert res.item(0, 0) == d_list[0] and res.item(0, 1) == t_list[0]
- res = table_obj.output(["*"]).filter("c2='21:44:33'").to_pl()
- assert res.item(0, 0) == d_list[1] and res.item(0, 1) == t_list[1] and res.item(0, 2) == dt_list[1] and res.item(0, 3) == ts_list[1]
+ res, extra_result = table_obj.output(["*"]).filter("c2='21:44:33'").to_pl()
+ assert res.item(0, 0) == d_list[1] and res.item(0, 1) == t_list[1] and res.item(0, 2) == dt_list[
+ 1] and res.item(0, 3) == ts_list[1]
- res = table_obj.output(["*"]).filter("c3='2021-03-04 20:58:59'").to_pl()
- assert res.item(0, 0) == d_list[2] and res.item(0, 1) == t_list[2] and res.item(0, 2) == dt_list[2] and res.item(0, 3) == ts_list[2]
+ res, extra_result = table_obj.output(["*"]).filter("c3='2021-03-04 20:58:59'").to_pl()
+ assert res.item(0, 0) == d_list[2] and res.item(0, 1) == t_list[2] and res.item(0, 2) == dt_list[
+ 2] and res.item(0, 3) == ts_list[2]
- res = table_obj.output(["*"]).filter("c4='2021-03-04 20:58:59'").to_pl()
- assert res.item(0, 0) == d_list[2] and res.item(0, 1) == t_list[2] and res.item(0, 2) == dt_list[2] and res.item(0, 3) == ts_list[2]
+ res, extra_result = table_obj.output(["*"]).filter("c4='2021-03-04 20:58:59'").to_pl()
+ assert res.item(0, 0) == d_list[2] and res.item(0, 1) == t_list[2] and res.item(0, 2) == dt_list[
+ 2] and res.item(0, 3) == ts_list[2]
- res = db_obj.drop_table("test_select_datetime"+suffix, ConflictType.Ignore)
+ res = db_obj.drop_table("test_select_datetime" + suffix, ConflictType.Ignore)
assert res.error_code == ErrorCode.OK
def test_select_aggregate(self, suffix):
@@ -340,9 +347,9 @@ def test_select_aggregate(self, suffix):
db_obj = self.infinity_obj.get_database("default_db")
# infinity
- db_obj.drop_table("test_select_aggregate"+suffix, ConflictType.Ignore)
+ db_obj.drop_table("test_select_aggregate" + suffix, ConflictType.Ignore)
table_obj = db_obj.create_table(
- "test_select_aggregate"+suffix, {
+ "test_select_aggregate" + suffix, {
"c1": {"type": "int", "constraints": ["primary key", "not null"]},
"c2": {"type": "float", "constraints": ["not null"]}}, ConflictType.Error)
@@ -359,20 +366,20 @@ def test_select_aggregate(self, suffix):
{"c1": 90, "c2": -19}])
assert res.error_code == ErrorCode.OK
- res = table_obj.output(["count(*)"]).to_pl()
+ res, extra_result = table_obj.output(["count(*)"]).to_pl()
assert res.height == 1 and res.width == 1 and res.item(0, 0) == 13
- res = table_obj.output(["max(c1)"]).to_pl()
+ res, extra_result = table_obj.output(["max(c1)"]).to_pl()
assert res.height == 1 and res.width == 1 and res.item(0, 0) == 90
- res = table_obj.output(["min(c2)"]).to_pl()
+ res, extra_result = table_obj.output(["min(c2)"]).to_pl()
assert res.height == 1 and res.width == 1 and res.item(0, 0) == -19
- res = table_obj.output(["min(c1) + max(c2)"]).to_pl()
+ res, extra_result = table_obj.output(["min(c1) + max(c2)"]).to_pl()
print(res)
- res = table_obj.output(["sum(c1)"]).to_pl()
+ res, extra_result = table_obj.output(["sum(c1)"]).to_pl()
print(res)
- res = table_obj.output(["avg(c2)"]).to_pl()
+ res, extra_result = table_obj.output(["avg(c2)"]).to_pl()
print(res)
- res = db_obj.drop_table("test_select_aggregate"+suffix, ConflictType.Error)
+ res = db_obj.drop_table("test_select_aggregate" + suffix, ConflictType.Error)
assert res.error_code == ErrorCode.OK
def test_select_varchar(self, suffix):
@@ -425,11 +432,11 @@ def test_select_varchar(self, suffix):
"""
db_obj = self.infinity_obj.get_database("default_db")
- db_obj.drop_table("test_select_varchar"+suffix, ConflictType.Ignore)
- db_obj.create_table("test_select_varchar"+suffix,
+ db_obj.drop_table("test_select_varchar" + suffix, ConflictType.Ignore)
+ db_obj.create_table("test_select_varchar" + suffix,
{"c1": {"type": "varchar", "constraints": ["primary key", "not null"]},
"c2": {"type": "varchar", "constraints": ["not null"]}}, ConflictType.Error)
- table_obj = db_obj.get_table("test_select_varchar"+suffix)
+ table_obj = db_obj.get_table("test_select_varchar" + suffix)
table_obj.insert(
[{"c1": 'a', "c2": 'a'}, {"c1": 'b', "c2": 'b'}, {"c1": 'c', "c2": 'c'}, {"c1": 'd', "c2": 'd'},
{"c1": 'e', "c2": 'e'}, {"c1": 'f', "c2": 'f'}, {
@@ -437,38 +444,38 @@ def test_select_varchar(self, suffix):
{"c1": 'i', "c2": 'i'}, {"c1": 'j', "c2": 'j'}, {
"c1": 'k', "c2": 'k'}, {"c1": 'l', "c2": 'l'},
{"c1": 'm', "c2": 'm'}])
- res = table_obj.output(["*"]).to_df()
+ res, extra_result = table_obj.output(["*"]).to_df()
pd.testing.assert_frame_equal(res, pd.DataFrame({'c1': ('a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k',
'l', 'm'),
'c2': ('a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k',
'l', 'm')})
.astype({'c1': dtype('O'), 'c2': dtype('O')}))
- res = table_obj.output(
+ res, extra_result = table_obj.output(
["c1", "c2"]).filter("c1 = 'a'").to_df()
pd.testing.assert_frame_equal(res, pd.DataFrame({'c1': ('a',), 'c2': ('a',)}).astype(
{'c1': dtype('O'), 'c2': dtype('O')}))
# TODO NotImplement Error: Not implement: varchar > varchar
- # res = table_obj.output(["c1"]).filter("c1 > 'a' and c2 < 'c'").to_df()
+ # res, extra_result = table_obj.output(["c1"]).filter("c1 > 'a' and c2 < 'c'").to_df()
# pd.testing.assert_frame_equal(res, pd.DataFrame({'c1': ('b',)}).astype({'c1': dtype('O')}))
- res = db_obj.drop_table("test_select_varchar"+suffix)
+ res = db_obj.drop_table("test_select_varchar" + suffix)
assert res.error_code == ErrorCode.OK
def test_select_big(self, suffix):
db_obj = self.infinity_obj.get_database("default_db")
- res = db_obj.drop_table("test_select_big"+suffix, ConflictType.Ignore)
- db_obj.create_table("test_select_big"+suffix, {
+ res = db_obj.drop_table("test_select_big" + suffix, ConflictType.Ignore)
+ db_obj.create_table("test_select_big" + suffix, {
"c1": {"type": "varchar", "constraints": ["primary key", "not null"]},
"c2": {"type": "varchar", "constraints": ["not null"]}}, ConflictType.Error)
- table_obj = db_obj.get_table("test_select_big"+suffix)
+ table_obj = db_obj.get_table("test_select_big" + suffix)
for i in range(1000):
table_obj.insert(
[{"c1": 'a', "c2": 'a'}, {"c1": 'b', "c2": 'b'}, {"c1": 'c', "c2": 'c'}, {"c1": 'd', "c2": 'd'}])
- res = db_obj.drop_table("test_select_big"+suffix, ConflictType.Error)
+ res = db_obj.drop_table("test_select_big" + suffix, ConflictType.Error)
assert res.error_code == ErrorCode.OK
@pytest.mark.parametrize("check_data", [{"file_name": "embedding_int_dim3.csv",
@@ -491,12 +498,12 @@ def test_select_embedding_int32(self, check_data, suffix):
"""
db_obj = self.infinity_obj.get_database("default_db")
- db_obj.drop_table("test_select_embedding"+suffix, ConflictType.Ignore)
+ db_obj.drop_table("test_select_embedding" + suffix, ConflictType.Ignore)
- res = db_obj.create_table("test_select_embedding"+suffix, {
+ res = db_obj.create_table("test_select_embedding" + suffix, {
"c1": {"type": "int"}, "c2": {"type": "vector,3,int"}}, ConflictType.Error)
- table_obj = db_obj.get_table("test_select_embedding"+suffix)
+ table_obj = db_obj.get_table("test_select_embedding" + suffix)
if not check_data:
copy_data("embedding_int_dim3.csv")
@@ -507,17 +514,17 @@ def test_select_embedding_int32(self, check_data, suffix):
res = table_obj.import_data(test_csv_dir, None)
assert res.error_code == ErrorCode.OK
- res = table_obj.output(["c2"]).to_df()
+ res, extra_result = table_obj.output(["c2"]).to_df()
print(res)
pd.testing.assert_frame_equal(res, pd.DataFrame(
{'c2': ([2, 3, 4], [6, 7, 8], [10, 11, 12])}))
- res = table_obj.output(["*"]).to_df()
+ res, extra_result = table_obj.output(["*"]).to_df()
print(res)
pd.testing.assert_frame_equal(res, pd.DataFrame({'c1': (1, 5, 9), 'c2': ([2, 3, 4], [6, 7, 8], [10, 11, 12])})
.astype({'c1': dtype('int32'), 'c2': dtype('O')}))
- res = db_obj.drop_table("test_select_embedding"+suffix, ConflictType.Error)
+ res = db_obj.drop_table("test_select_embedding" + suffix, ConflictType.Error)
assert res.error_code == ErrorCode.OK
@pytest.mark.parametrize("check_data", [{"file_name": "embedding_float_dim4.csv",
@@ -539,12 +546,12 @@ def test_select_embedding_float(self, check_data, suffix):
"""
db_obj = self.infinity_obj.get_database("default_db")
- db_obj.drop_table("test_select_embedding_float"+suffix, ConflictType.Ignore)
+ db_obj.drop_table("test_select_embedding_float" + suffix, ConflictType.Ignore)
- res = db_obj.create_table("test_select_embedding_float"+suffix, {
+ res = db_obj.create_table("test_select_embedding_float" + suffix, {
"c1": {"type": "float"}, "c2": {"type": "vector,4,float"}}, ConflictType.Error)
- table_obj = db_obj.get_table("test_select_embedding_float"+suffix)
+ table_obj = db_obj.get_table("test_select_embedding_float" + suffix)
test_dir = "/var/infinity/test_data/"
test_csv_dir = test_dir + "embedding_float_dim4.csv"
@@ -555,13 +562,13 @@ def test_select_embedding_float(self, check_data, suffix):
res = table_obj.import_data(test_csv_dir, None)
assert res.error_code == ErrorCode.OK
- res = table_obj.output(["c2"]).to_df()
+ res, extra_result = table_obj.output(["c2"]).to_df()
print(res)
pd.testing.assert_frame_equal(res, pd.DataFrame(
{'c2': ([0.1, 0.2, 0.3, -0.2], [0.2, 0.1, 0.3, 0.4],
[0.3, 0.2, 0.1, 0.4], [0.4, 0.3, 0.2, 0.1])}))
- res = table_obj.output(["*"]).to_df()
+ res, extra_result = table_obj.output(["*"]).to_df()
print(res)
pd.testing.assert_frame_equal(res,
@@ -572,7 +579,7 @@ def test_select_embedding_float(self, check_data, suffix):
.astype({'c1': dtype('float32'), 'c2': dtype('O')}))
res = db_obj.drop_table(
- "test_select_embedding_float"+suffix, ConflictType.Error)
+ "test_select_embedding_float" + suffix, ConflictType.Error)
assert res.error_code == ErrorCode.OK
@pytest.mark.parametrize("check_data", [{"file_name": "embedding_int_dim3.csv",
@@ -597,12 +604,12 @@ def test_select_big_embedding(self, check_data, suffix):
"""
db_obj = self.infinity_obj.get_database("default_db")
- db_obj.drop_table("test_select_big_embedding"+suffix, ConflictType.Ignore)
+ db_obj.drop_table("test_select_big_embedding" + suffix, ConflictType.Ignore)
- db_obj.create_table("test_select_big_embedding"+suffix, {
+ db_obj.create_table("test_select_big_embedding" + suffix, {
"c1": {"type": "int"}, "c2": {"type": "vector,3,int"}}, ConflictType.Error)
- table_obj = db_obj.get_table("test_select_big_embedding"+suffix)
+ table_obj = db_obj.get_table("test_select_big_embedding" + suffix)
if not check_data:
copy_data("embedding_int_dim3.csv")
@@ -615,49 +622,49 @@ def test_select_big_embedding(self, check_data, suffix):
assert res.error_code == ErrorCode.OK
res = db_obj.drop_table(
- "test_select_big_embedding"+suffix, ConflictType.Error)
+ "test_select_big_embedding" + suffix, ConflictType.Error)
assert res.error_code == ErrorCode.OK
@pytest.mark.usefixtures("skip_if_http")
def test_select_same_output(self, suffix):
db_obj = self.infinity_obj.get_database("default_db")
- db_obj.drop_table("test_select_same_output"+suffix, ConflictType.Ignore)
- db_obj.create_table("test_select_same_output"+suffix, {
+ db_obj.drop_table("test_select_same_output" + suffix, ConflictType.Ignore)
+ db_obj.create_table("test_select_same_output" + suffix, {
"c1": {"type": "int"}, "c2": {"type": "int"}}, ConflictType.Error)
- table_obj = db_obj.get_table("test_select_same_output"+suffix)
+ table_obj = db_obj.get_table("test_select_same_output" + suffix)
table_obj.insert([{"c1": 1, "c2": 2}])
print()
- res = table_obj.output(["c1", "c2"]).to_df()
+ res, extra_result = table_obj.output(["c1", "c2"]).to_df()
print(res)
- res = table_obj.output(["c1", "c1"]).to_df()
+ res, extra_result = table_obj.output(["c1", "c1"]).to_df()
print(res)
- res = table_obj.output(["c1", "c2", "c1"]).to_df()
+ res, extra_result = table_obj.output(["c1", "c2", "c1"]).to_df()
print(res)
- res = db_obj.drop_table("test_select_same_output"+suffix, ConflictType.Error)
+ res = db_obj.drop_table("test_select_same_output" + suffix, ConflictType.Error)
assert res.error_code == ErrorCode.OK
@pytest.mark.usefixtures("skip_if_http")
def test_empty_table(self, suffix):
db_obj = self.infinity_obj.get_database("default_db")
- db_obj.drop_table("test_empty_table"+suffix, ConflictType.Ignore)
- db_obj.create_table("test_empty_table"+suffix, {
+ db_obj.drop_table("test_empty_table" + suffix, ConflictType.Ignore)
+ db_obj.create_table("test_empty_table" + suffix, {
"c1": {"type": "int"}, "c2": {"type": "int"}}, ConflictType.Error)
- table_obj = db_obj.get_table("test_empty_table"+suffix)
+ table_obj = db_obj.get_table("test_empty_table" + suffix)
print()
- res = table_obj.output(["c1", "c2"]).to_df()
+ res, extra_result = table_obj.output(["c1", "c2"]).to_df()
pd.testing.assert_frame_equal(res, pd.DataFrame({'c1': (), 'c2': ()}).astype(
{'c1': dtype('int32'), 'c2': dtype('int32')}))
- res = table_obj.output(["c1", "c1"]).to_df()
+ res, extra_result = table_obj.output(["c1", "c1"]).to_df()
pd.testing.assert_frame_equal(res, pd.DataFrame({'c1': (), 'c1_2': ()}).astype(
{'c1': dtype('int32'), 'c1_2': dtype('int32')}))
- res = table_obj.output(["c1", "c2", "c1"]).to_df()
+ res, extra_result = table_obj.output(["c1", "c2", "c1"]).to_df()
pd.testing.assert_frame_equal(res, pd.DataFrame({'c1': (), 'c2': (), 'c1_2': ()}).astype(
{'c1': dtype('int32'), 'c2': dtype('int32'), 'c1_2': dtype('int32')}))
- res = db_obj.drop_table("test_empty_table"+suffix, ConflictType.Error)
+ res = db_obj.drop_table("test_empty_table" + suffix, ConflictType.Error)
assert res.error_code == ErrorCode.OK
@pytest.mark.parametrize("filter_list", [
@@ -672,8 +679,8 @@ def test_empty_table(self, suffix):
def test_valid_filter_expression(self, filter_list, suffix):
# connect
db_obj = self.infinity_obj.get_database("default_db")
- db_obj.drop_table("test_valid_filter_expression"+suffix, ConflictType.Ignore)
- table_obj = db_obj.create_table("test_valid_filter_expression"+suffix, {
+ db_obj.drop_table("test_valid_filter_expression" + suffix, ConflictType.Ignore)
+ table_obj = db_obj.create_table("test_valid_filter_expression" + suffix, {
"c1": {"type": "int"}, "c2": {"type": "float"}}, ConflictType.Error)
table_obj.insert([{"c1": 1, "c2": 2.0},
{"c1": 10, "c2": 2.0},
@@ -681,11 +688,11 @@ def test_valid_filter_expression(self, filter_list, suffix):
{"c1": 1000, "c2": 2.0},
{"c1": 10000, "c2": 2.0}])
# TODO add more filter function
- select_res_df = table_obj.output(["*"]).filter(filter_list).to_pl()
- print(str(select_res_df))
+ res, extra_result = table_obj.output(["*"]).filter(filter_list).to_pl()
+ print(str(res))
res = db_obj.drop_table(
- "test_valid_filter_expression"+suffix, ConflictType.Error)
+ "test_valid_filter_expression" + suffix, ConflictType.Error)
assert res.error_code == ErrorCode.OK
@pytest.mark.parametrize("filter_list", [
@@ -700,9 +707,9 @@ def test_valid_filter_expression(self, filter_list, suffix):
def test_invalid_filter_expression(self, filter_list, suffix):
# connect
db_obj = self.infinity_obj.get_database("default_db")
- db_obj.drop_table("test_invalid_filter_expression"+suffix,
+ db_obj.drop_table("test_invalid_filter_expression" + suffix,
ConflictType.Ignore)
- table_obj = db_obj.create_table("test_invalid_filter_expression"+suffix, {
+ table_obj = db_obj.create_table("test_invalid_filter_expression" + suffix, {
"c1": {"type": "int"}, "c2": {"type": "float"}}, ConflictType.Error)
table_obj.insert([{"c1": 1, "c2": 2.0},
{"c1": 10, "c2": 2.0},
@@ -711,11 +718,11 @@ def test_invalid_filter_expression(self, filter_list, suffix):
{"c1": 10000, "c2": 2.0}])
# TODO add more filter function
with pytest.raises(Exception):
- select_res_df = table_obj.output(["*"]).filter(filter_list).to_pl()
- print(str(select_res_df))
+ res, extra_result = table_obj.output(["*"]).filter(filter_list).to_pl()
+ print(str(res))
res = db_obj.drop_table(
- "test_invalid_filter_expression"+suffix, ConflictType.Error)
+ "test_invalid_filter_expression" + suffix, ConflictType.Error)
assert res.error_code == ErrorCode.OK
def test_filter_fulltext(self, suffix):
@@ -729,25 +736,39 @@ def test_filter_fulltext(self, suffix):
def test_func():
expect_result = pd.DataFrame({'num': (1,), "doc": "first text"}).astype({'num': dtype('int32')})
- pd.testing.assert_frame_equal(expect_result, table_obj.output(["*"]).filter(
- "filter_text('doc', 'first text', 'minimum_should_match=100%')").to_df())
- pd.testing.assert_frame_equal(expect_result, table_obj.output(["*"]).filter(
- "filter_text('', 'first second', 'default_field=doc;minimum_should_match=99%') and not num = 2").to_df())
- pd.testing.assert_frame_equal(expect_result, table_obj.output(["*"]).filter(
- "filter_text('doc', 'first OR second') and (num < 2 or num > 2)").to_df())
- pd.testing.assert_frame_equal(expect_result, table_obj.output(["*"]).filter(
- "(filter_text('doc', 'first') or filter_fulltext('doc', 'second')) and (num < 2 or num > 2)").to_df())
+ res, extra_result = table_obj.output(["*"]).filter(
+ "filter_text('doc', 'first text', 'minimum_should_match=100%')").to_df()
+ pd.testing.assert_frame_equal(expect_result, res)
+
+ res, extra_result = table_obj.output(["*"]).filter(
+ "filter_text('', 'first second', 'default_field=doc;minimum_should_match=99%') and not num = 2").to_df()
+ pd.testing.assert_frame_equal(expect_result, res)
+
+ res, extra_result = table_obj.output(["*"]).filter(
+ "filter_text('doc', 'first OR second') and (num < 2 or num > 2)").to_df()
+ pd.testing.assert_frame_equal(expect_result, res)
+
+ res, extra_result = table_obj.output(["*"]).filter(
+ "(filter_text('doc', 'first') or filter_fulltext('doc', 'second')) and (num < 2 or num > 2)").to_df()
+ pd.testing.assert_frame_equal(expect_result, res)
expect_result = pd.DataFrame(
{'num': (1, 2, 3), "doc": ("first text", "second text multiple", "third text many words")}).astype(
{'num': dtype('int32')})
- pd.testing.assert_frame_equal(expect_result, table_obj.output(["*"]).filter(
- "filter_text('doc', 'first') or num >= 2").to_df())
- pd.testing.assert_frame_equal(expect_result, table_obj.output(["*"]).filter(
- "filter_fulltext('doc', 'second') or (num < 2 or num > 2)").to_df())
+
+ res, extra_result = table_obj.output(["*"]).filter(
+ "filter_text('doc', 'first') or num >= 2").to_df()
+ pd.testing.assert_frame_equal(expect_result, res)
+
+ res, extra_result = table_obj.output(["*"]).filter(
+ "filter_fulltext('doc', 'second') or (num < 2 or num > 2)").to_df()
+ pd.testing.assert_frame_equal(expect_result, res)
+
+ res, extra_result = table_obj.output(
+ ["filter_text('doc', 'second') or num > 2", "filter_text('doc', 'second')"]).to_df()
pd.testing.assert_frame_equal(pd.DataFrame({
"(FILTER_FULLTEXT('doc', 'second') OR (num > 2))": (False, True, True),
"FILTER_FULLTEXT('doc', 'second')": (False, True, False)}),
- table_obj.output(["filter_text('doc', 'second') or num > 2", "filter_text('doc', 'second')"]).to_df())
+ res)
test_func()
table_obj.create_index("my_sc_index", index.IndexInfo("num", index.IndexType.Secondary), ConflictType.Error)
@@ -760,7 +781,9 @@ def test_neg_func(self, suffix):
db_obj.drop_table("test_neg_func" + suffix, ConflictType.Ignore)
table_obj = db_obj.create_table("test_neg_func" + suffix, {"num": {"type": "float64"}}, ConflictType.Error)
table_obj.insert([{"num": 1.0}, {"num": 2.0}, {"num": 3.0}])
- pd.testing.assert_frame_equal(table_obj.output(["-abs(num) - 1"]).filter("-abs(num) >= -2").to_df(),
+
+ res, extra_result = table_obj.output(["-abs(num) - 1"]).filter("-abs(num) >= -2").to_df()
+ pd.testing.assert_frame_equal(res,
pd.DataFrame({"(-(ABS(num)) - 1)": (-2.0, -3.0)}))
res = db_obj.drop_table("test_neg_func" + suffix, ConflictType.Error)
assert res.error_code == ErrorCode.OK
@@ -769,9 +792,9 @@ def test_sort(self, suffix):
db_obj = self.infinity_obj.get_database("default_db")
# infinity
- db_obj.drop_table("test_sort"+suffix, ConflictType.Ignore)
+ db_obj.drop_table("test_sort" + suffix, ConflictType.Ignore)
table_obj = db_obj.create_table(
- "test_sort"+suffix, {
+ "test_sort" + suffix, {
"c1": {"type": "int", "constraints": ["primary key", "not null"]},
"c2": {"type": "int", "constraints": ["not null"]}}, ConflictType.Error)
@@ -787,212 +810,216 @@ def test_sort(self, suffix):
{"c1": 9, "c2": 9}])
assert res.error_code == ErrorCode.OK
- res = table_obj.output(["c1", "c2"]).sort([["c2", SortType.Asc], ["c1", SortType.Desc]]).to_df()
+ res, extra_res = table_obj.output(["c1", "c2"]).sort([["c2", SortType.Asc], ["c1", SortType.Desc]]).to_df()
pd.testing.assert_frame_equal(res, pd.DataFrame({'c1': (0, 1, -1, 2, -2, 3, -3, -6, 7, -7, 8, -8, 9),
'c2': (0, 1, 1, 2, 2, 3, 3, 6, 7, 7, 8, 8, 9)})
.astype({'c1': dtype('int32'), 'c2': dtype('int32')}))
- res = table_obj.output(["c1", "c2"]).sort([["c2", SortType.Asc], ["c1", SortType.Asc]]).to_df()
+ res, extra_res = table_obj.output(["c1", "c2"]).sort([["c2", SortType.Asc], ["c1", SortType.Asc]]).to_df()
pd.testing.assert_frame_equal(res, pd.DataFrame({'c1': (0, -1, 1, -2, 2, -3, 3, -6, -7, 7, -8, 8, 9),
'c2': (0, 1, 1, 2, 2, 3, 3, 6, 7, 7, 8, 8, 9)})
.astype({'c1': dtype('int32'), 'c2': dtype('int32')}))
- res = table_obj.output(["_row_id"]).sort([["_row_id", SortType.Desc]]).to_df()
- #pd.testing.assert_frame_equal(res, pd.DataFrame({'ROW_ID': (12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0)})
+ res, extra_res = table_obj.output(["_row_id"]).sort([["_row_id", SortType.Desc]]).to_df()
+ # pd.testing.assert_frame_equal(res, pd.DataFrame({'ROW_ID': (12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0)})
# .astype({'ROW_ID': dtype('int64')}))
print(res)
- res = db_obj.drop_table("test_sort"+suffix, ConflictType.Error)
+ res = db_obj.drop_table("test_sort" + suffix, ConflictType.Error)
assert res.error_code == ErrorCode.OK
def test_select_varchar_length(self, suffix):
db_obj = self.infinity_obj.get_database("default_db")
- db_obj.drop_table("test_select_varchar_length"+suffix, ConflictType.Ignore)
- db_obj.create_table("test_select_varchar_length"+suffix,
+ db_obj.drop_table("test_select_varchar_length" + suffix, ConflictType.Ignore)
+ db_obj.create_table("test_select_varchar_length" + suffix,
{"c1": {"type": "varchar", "constraints": ["primary key", "not null"]},
"c2": {"type": "varchar", "constraints": ["not null"]}}, ConflictType.Error)
- table_obj = db_obj.get_table("test_select_varchar_length"+suffix)
+ table_obj = db_obj.get_table("test_select_varchar_length" + suffix)
table_obj.insert(
[{"c1": 'a', "c2": 'a'}, {"c1": 'b', "c2": 'b'}, {"c1": 'c', "c2": 'c'}, {"c1": 'd', "c2": 'd'},
- {"c1": 'abc', "c2": 'abc'}, {"c1": 'bbc', "c2": 'bbc'}, {"c1": 'cbc', "c2": 'cbc'}, {"c1": 'dbc', "c2": 'dbc'}])
+ {"c1": 'abc', "c2": 'abc'}, {"c1": 'bbc', "c2": 'bbc'}, {"c1": 'cbc', "c2": 'cbc'},
+ {"c1": 'dbc', "c2": 'dbc'}])
- res = table_obj.output(["*"]).filter("char_length(c1) = 1").to_df()
+ res, extra_res = table_obj.output(["*"]).filter("char_length(c1) = 1").to_df()
print(res)
pd.testing.assert_frame_equal(res, pd.DataFrame({'c1': ('a', 'b', 'c', 'd'),
'c2': ('a', 'b', 'c', 'd')})
.astype({'c1': dtype('O'), 'c2': dtype('O')}))
- res = table_obj.output(["*"]).filter("char_length(c1) = 3").to_df()
+ res, extra_res = table_obj.output(["*"]).filter("char_length(c1) = 3").to_df()
print(res)
pd.testing.assert_frame_equal(res, pd.DataFrame({'c1': ('abc', 'bbc', 'cbc', 'dbc'),
'c2': ('abc', 'bbc', 'cbc', 'dbc')})
.astype({'c1': dtype('O'), 'c2': dtype('O')}))
- res = db_obj.drop_table("test_select_varchar_length"+suffix)
+ res = db_obj.drop_table("test_select_varchar_length" + suffix)
assert res.error_code == ErrorCode.OK
def test_select_regex(self, suffix):
db_obj = self.infinity_obj.get_database("default_db")
- db_obj.drop_table("test_select_regex"+suffix, ConflictType.Ignore)
- db_obj.create_table("test_select_regex"+suffix,
+ db_obj.drop_table("test_select_regex" + suffix, ConflictType.Ignore)
+ db_obj.create_table("test_select_regex" + suffix,
{"c1": {"type": "varchar", "constraints": ["primary key", "not null"]},
"c2": {"type": "varchar", "constraints": ["not null"]}}, ConflictType.Error)
- table_obj = db_obj.get_table("test_select_regex"+suffix)
+ table_obj = db_obj.get_table("test_select_regex" + suffix)
table_obj.insert(
[{"c1": 'a', "c2": 'a'}, {"c1": 'b', "c2": 'b'}, {"c1": 'c', "c2": 'c'}, {"c1": 'd', "c2": 'd'},
- {"c1": 'abc', "c2": 'abc'}, {"c1": 'bbc', "c2": 'bbc'}, {"c1": 'cbc', "c2": 'cbc'}, {"c1": 'dbc', "c2": 'dbc'},])
+ {"c1": 'abc', "c2": 'abc'}, {"c1": 'bbc', "c2": 'bbc'}, {"c1": 'cbc', "c2": 'cbc'},
+ {"c1": 'dbc', "c2": 'dbc'}, ])
- res = table_obj.output(["*"]).filter("regex(c1, 'bc')").to_df()
+ res, extra_res = table_obj.output(["*"]).filter("regex(c1, 'bc')").to_df()
print(res)
pd.testing.assert_frame_equal(res, pd.DataFrame({'c1': ('abc', 'bbc', 'cbc', 'dbc'),
'c2': ('abc', 'bbc', 'cbc', 'dbc')})
.astype({'c1': dtype('O'), 'c2': dtype('O')}))
-
- res = db_obj.drop_table("test_select_regex"+suffix)
+ res = db_obj.drop_table("test_select_regex" + suffix)
assert res.error_code == ErrorCode.OK
def test_select_upper_lower(self, suffix):
db_obj = self.infinity_obj.get_database("default_db")
- db_obj.drop_table("test_select_upper_lower"+suffix, ConflictType.Ignore)
- db_obj.create_table("test_select_upper_lower"+suffix,
+ db_obj.drop_table("test_select_upper_lower" + suffix, ConflictType.Ignore)
+ db_obj.create_table("test_select_upper_lower" + suffix,
{"c1": {"type": "varchar", "constraints": ["primary key", "not null"]},
"c2": {"type": "varchar", "constraints": ["not null"]}}, ConflictType.Error)
- table_obj = db_obj.get_table("test_select_upper_lower"+suffix)
+ table_obj = db_obj.get_table("test_select_upper_lower" + suffix)
table_obj.insert(
[{"c1": 'a', "c2": 'A'}, {"c1": 'b', "c2": 'B'}, {"c1": 'c', "c2": 'C'}, {"c1": 'd', "c2": 'D'},
- {"c1": 'abc', "c2": 'ABC'}, {"c1": 'bbc', "c2": 'bbc'}, {"c1": 'cbc', "c2": 'cbc'}, {"c1": 'dbc', "c2": 'dbc'},])
+ {"c1": 'abc', "c2": 'ABC'}, {"c1": 'bbc', "c2": 'bbc'}, {"c1": 'cbc', "c2": 'cbc'},
+ {"c1": 'dbc', "c2": 'dbc'}, ])
- res = table_obj.output(["*"]).filter("upper(c1) = c2").to_df()
+ res, extra_res = table_obj.output(["*"]).filter("upper(c1) = c2").to_df()
print(res)
pd.testing.assert_frame_equal(res, pd.DataFrame({'c1': ('a', 'b', 'c', 'd', 'abc'),
'c2': ('A', 'B', 'C', 'D', 'ABC')})
.astype({'c1': dtype('O'), 'c2': dtype('O')}))
-
- res = db_obj.drop_table("test_select_upper_lower"+suffix)
+ res = db_obj.drop_table("test_select_upper_lower" + suffix)
assert res.error_code == ErrorCode.OK
def test_select_substring(self, suffix):
db_obj = self.infinity_obj.get_database("default_db")
- db_obj.drop_table("test_select_substring"+suffix, ConflictType.Ignore)
- db_obj.create_table("test_select_substring"+suffix,
+ db_obj.drop_table("test_select_substring" + suffix, ConflictType.Ignore)
+ db_obj.create_table("test_select_substring" + suffix,
{"c1": {"type": "varchar", "constraints": ["primary key", "not null"]},
"c2": {"type": "varchar", "constraints": ["not null"]}}, ConflictType.Error)
- table_obj = db_obj.get_table("test_select_substring"+suffix)
+ table_obj = db_obj.get_table("test_select_substring" + suffix)
table_obj.insert(
[{"c1": 'a', "c2": 'A'}, {"c1": 'b', "c2": 'B'}, {"c1": 'c', "c2": 'C'}, {"c1": 'd', "c2": 'D'},
- {"c1": 'abc', "c2": 'ABC'}, {"c1": 'bbcc', "c2": 'bbc'}, {"c1": 'cbcc', "c2": 'cbc'}, {"c1": 'dbcc', "c2": 'dbc'},])
+ {"c1": 'abc', "c2": 'ABC'}, {"c1": 'bbcc', "c2": 'bbc'}, {"c1": 'cbcc', "c2": 'cbc'},
+ {"c1": 'dbcc', "c2": 'dbc'}, ])
- res = table_obj.output(["*"]).filter("substring(c1, 0, 3) = c2").to_df()
+ res, extra_res = table_obj.output(["*"]).filter("substring(c1, 0, 3) = c2").to_df()
print(res)
pd.testing.assert_frame_equal(res, pd.DataFrame({'c1': ('bbcc', 'cbcc', 'dbcc'),
'c2': ('bbc', 'cbc', 'dbc')})
.astype({'c1': dtype('O'), 'c2': dtype('O')}))
- res = db_obj.drop_table("test_select_substring"+suffix)
+ res = db_obj.drop_table("test_select_substring" + suffix)
assert res.error_code == ErrorCode.OK
def test_select_trim(self, suffix):
db_obj = self.infinity_obj.get_database("default_db")
- db_obj.drop_table("test_select_trim"+suffix, ConflictType.Ignore)
- db_obj.create_table("test_select_trim"+suffix,
+ db_obj.drop_table("test_select_trim" + suffix, ConflictType.Ignore)
+ db_obj.create_table("test_select_trim" + suffix,
{"c1": {"type": "varchar", "constraints": ["primary key", "not null"]},
"c2": {"type": "varchar", "constraints": ["not null"]}}, ConflictType.Error)
- table_obj = db_obj.get_table("test_select_trim"+suffix)
+ table_obj = db_obj.get_table("test_select_trim" + suffix)
table_obj.insert(
- [{"c1": ' a', "c2": 'a'}, {"c1": ' b', "c2": 'b'}, {"c1": ' c', "c2": 'c'},
- {"c1": 'ab ', "c2": 'ab'}, {"c1": 'bcc ', "c2": 'bcc'}, {"c1": 'cbc ', "c2": 'cbc'}, {"c1": ' dbc ', "c2": 'dbc'},])
+ [{"c1": ' a', "c2": 'a'}, {"c1": ' b', "c2": 'b'}, {"c1": ' c', "c2": 'c'},
+ {"c1": 'ab ', "c2": 'ab'}, {"c1": 'bcc ', "c2": 'bcc'}, {"c1": 'cbc ', "c2": 'cbc'},
+ {"c1": ' dbc ', "c2": 'dbc'}, ])
- res = table_obj.output(["*"]).filter("ltrim(c1) = c2").to_df()
+ res, extra_res = table_obj.output(["*"]).filter("ltrim(c1) = c2").to_df()
print(res)
pd.testing.assert_frame_equal(res, pd.DataFrame({'c1': (' a', ' b', ' c'),
'c2': ('a', 'b', 'c')})
.astype({'c1': dtype('O'), 'c2': dtype('O')}))
-
- res = table_obj.output(["*"]).filter("rtrim(c1) = c2").to_df()
+
+ res, extra_res = table_obj.output(["*"]).filter("rtrim(c1) = c2").to_df()
print(res)
pd.testing.assert_frame_equal(res, pd.DataFrame({'c1': ('ab ', 'bcc ', 'cbc '),
'c2': ('ab', 'bcc', 'cbc')})
.astype({'c1': dtype('O'), 'c2': dtype('O')}))
- res = table_obj.output(["*"]).filter("trim(c1) = c2").to_df()
+ res, extra_res = table_obj.output(["*"]).filter("trim(c1) = c2").to_df()
print(res)
pd.testing.assert_frame_equal(res, pd.DataFrame({'c1': (' a', ' b', ' c', 'ab ', 'bcc ', 'cbc ', ' dbc '),
'c2': ('a', 'b', 'c', 'ab', 'bcc', 'cbc', 'dbc')})
.astype({'c1': dtype('O'), 'c2': dtype('O')}))
- res = db_obj.drop_table("test_select_trim"+suffix)
+ res = db_obj.drop_table("test_select_trim" + suffix)
assert res.error_code == ErrorCode.OK
def test_select_position(self, suffix):
db_obj = self.infinity_obj.get_database("default_db")
- db_obj.drop_table("test_select_position"+suffix, ConflictType.Ignore)
- db_obj.create_table("test_select_position"+suffix,
+ db_obj.drop_table("test_select_position" + suffix, ConflictType.Ignore)
+ db_obj.create_table("test_select_position" + suffix,
{"c1": {"type": "varchar", "constraints": ["primary key", "not null"]},
"c2": {"type": "varchar", "constraints": ["not null"]}}, ConflictType.Error)
- table_obj = db_obj.get_table("test_select_position"+suffix)
+ table_obj = db_obj.get_table("test_select_position" + suffix)
table_obj.insert(
[{"c1": 'a', "c2": 'A'}, {"c1": 'b', "c2": 'B'}, {"c1": 'c', "c2": 'C'}, {"c1": 'd', "c2": 'D'},
- {"c1": 'abc', "c2": 'ABC'}, {"c1": 'bbcc', "c2": 'bbc'}, {"c1": 'cbcc', "c2": 'cbc'}, {"c1": 'dbcc', "c2": 'dbc'},])
+ {"c1": 'abc', "c2": 'ABC'}, {"c1": 'bbcc', "c2": 'bbc'}, {"c1": 'cbcc', "c2": 'cbc'},
+ {"c1": 'dbcc', "c2": 'dbc'}, ])
- res = table_obj.output(["*"]).filter("char_position(c1, c2) <> 0").to_df()
+ res, extra_res = table_obj.output(["*"]).filter("char_position(c1, c2) <> 0").to_df()
print(res)
pd.testing.assert_frame_equal(res, pd.DataFrame({'c1': ('bbcc', 'cbcc', 'dbcc'),
'c2': ('bbc', 'cbc', 'dbc')})
.astype({'c1': dtype('O'), 'c2': dtype('O')}))
- res = db_obj.drop_table("test_select_position"+suffix)
+ res = db_obj.drop_table("test_select_position" + suffix)
assert res.error_code == ErrorCode.OK
def test_select_sqrt(self, suffix):
db_obj = self.infinity_obj.get_database("default_db")
- db_obj.drop_table("test_select_sqrt"+suffix, ConflictType.Ignore)
- db_obj.create_table("test_select_sqrt"+suffix,
+ db_obj.drop_table("test_select_sqrt" + suffix, ConflictType.Ignore)
+ db_obj.create_table("test_select_sqrt" + suffix,
{"c1": {"type": "integer"},
"c2": {"type": "double"}}, ConflictType.Error)
- table_obj = db_obj.get_table("test_select_sqrt"+suffix)
+ table_obj = db_obj.get_table("test_select_sqrt" + suffix)
table_obj.insert(
[{"c1": '1', "c2": '2'}, {"c1": '4', "c2": '5'}, {"c1": '9', "c2": '10'}, {"c1": '16', "c2": '17'}])
- res = table_obj.output(["*", "sqrt(c1)", "sqrt(c2)"]).to_df()
+ res, extra_res = table_obj.output(["*", "sqrt(c1)", "sqrt(c2)"]).to_df()
print(res)
- res = table_obj.output(["*"]).filter("sqrt(c1) = 2").to_df()
+ res, extra_res = table_obj.output(["*"]).filter("sqrt(c1) = 2").to_df()
pd.testing.assert_frame_equal(res, pd.DataFrame({'c1': (4,),
'c2': (5,)})
.astype({'c1': dtype('int32'), 'c2': dtype('double')}))
- res = db_obj.drop_table("test_select_sqrt"+suffix)
+ res = db_obj.drop_table("test_select_sqrt" + suffix)
assert res.error_code == ErrorCode.OK
def test_select_round(self, suffix):
db_obj = self.infinity_obj.get_database("default_db")
- db_obj.drop_table("test_select_round"+suffix, ConflictType.Ignore)
- db_obj.create_table("test_select_round"+suffix,
+ db_obj.drop_table("test_select_round" + suffix, ConflictType.Ignore)
+ db_obj.create_table("test_select_round" + suffix,
{"c1": {"type": "integer"},
"c2": {"type": "double"}}, ConflictType.Error)
- table_obj = db_obj.get_table("test_select_round"+suffix)
+ table_obj = db_obj.get_table("test_select_round" + suffix)
table_obj.insert(
[{"c1": '1', "c2": '2.4'}, {"c1": '4', "c2": '-2.4'}, {"c1": '9', "c2": '2.5'}, {"c1": '16', "c2": '-2.5'}])
- res = table_obj.output(["c1", "round(c2)"]).to_df()
+ res, extra_res = table_obj.output(["c1", "round(c2)"]).to_df()
print(res)
pd.testing.assert_frame_equal(res, pd.DataFrame({'c1': (1, 4, 9, 16),
'round(c2)': (2, -2, 3, -3)})
.astype({'c1': dtype('int32'), 'round(c2)': dtype('double')}))
- res = table_obj.output(["c1", "ceil(c2)"]).to_df()
+ res, extra_res = table_obj.output(["c1", "ceil(c2)"]).to_df()
print(res)
pd.testing.assert_frame_equal(res, pd.DataFrame({'c1': (1, 4, 9, 16),
'ceil(c2)': (3, -2, 3, -2)})
.astype({'c1': dtype('int32'), 'ceil(c2)': dtype('double')}))
- res = table_obj.output(["c1", "floor(c2)"]).to_df()
+ res, extra_res = table_obj.output(["c1", "floor(c2)"]).to_df()
print(res)
pd.testing.assert_frame_equal(res, pd.DataFrame({'c1': (1, 4, 9, 16),
'floor(c2)': (2, -3, 2, -3)})
.astype({'c1': dtype('int32'), 'floor(c2)': dtype('double')}))
- res = db_obj.drop_table("test_select_round"+suffix)
- assert res.error_code == ErrorCode.OK
\ No newline at end of file
+ res = db_obj.drop_table("test_select_round" + suffix)
+ assert res.error_code == ErrorCode.OK
diff --git a/python/test_pysdk/test_update.py b/python/test_pysdk/test_update.py
index c2365275cd..7c79b4cb75 100644
--- a/python/test_pysdk/test_update.py
+++ b/python/test_pysdk/test_update.py
@@ -103,7 +103,7 @@ def test_update(self, suffix):
res = table_obj.update("c1 = 1", {"c2": 90, "c3": 900})
assert res.error_code == ErrorCode.OK
- res = table_obj.output(["*"]).to_df()
+ res, extra_result = table_obj.output(["*"]).to_df()
pd.testing.assert_frame_equal(res, pd.DataFrame(
{'c1': (2, 3, 4, 1), 'c2': (20, 30, 40, 90), 'c3': (200, 300, 400, 900)})
.astype({'c1': dtype('int32'), 'c2': dtype('int32'), 'c3': dtype('int32')}))
@@ -111,7 +111,7 @@ def test_update(self, suffix):
with pytest.raises(Exception):
table_obj.update(None, {"c2": 90, "c3": 900})
- res = table_obj.output(["*"]).to_df()
+ res, extra_result = table_obj.output(["*"]).to_df()
pd.testing.assert_frame_equal(res, pd.DataFrame(
{'c1': (2, 3, 4, 1), 'c2': (20, 30, 40, 90), 'c3': (200, 300, 400, 900)})
.astype({'c1': dtype('int32'), 'c2': dtype('int32'), 'c3': dtype('int32')}))
@@ -187,7 +187,7 @@ def test_update_no_row_is_met_the_condition(self, suffix):
try:
tb_obj.insert([{"c1": common_values.types_example_array[i],
"c2": common_values.types_example_array[i]}])
- res = tb_obj.output(["*"]).to_df()
+ res, extra_result = tb_obj.output(["*"]).to_df()
print(res)
print("insert c1 = " + str(common_values.types_example_array[i]) +
", c2 = " + str(common_values.types_example_array[i]))
@@ -196,7 +196,7 @@ def test_update_no_row_is_met_the_condition(self, suffix):
try:
tb_obj.update("c1 = 2", {"c2": common_values.types_example_array[i]})
- res = tb_obj.output(["*"]).to_df()
+ res, extra_result = tb_obj.output(["*"]).to_df()
print("update type: {} \n {}".format(common_values.types_array[i], res))
except Exception as e:
@@ -224,7 +224,7 @@ def test_update_all_row_is_met_the_condition(self, suffix):
try:
tb_obj.insert([{"c1": common_values.types_example_array[i],
"c2": common_values.types_example_array[i]}])
- res = tb_obj.output(["*"]).to_df()
+ res, extra_result = tb_obj.output(["*"]).to_df()
print(res)
print("insert c1 = " + str(common_values.types_example_array[i]) +
", c2 = " + str(common_values.types_example_array[i]))
@@ -234,7 +234,7 @@ def test_update_all_row_is_met_the_condition(self, suffix):
try:
tb_obj.update("c1 = " + str(common_values.types_example_array[i]),
{"c2": common_values.types_example_array[i]})
- res = tb_obj.output(["*"]).to_df()
+ res, extra_result = tb_obj.output(["*"]).to_df()
print("update type: {} \n {}".format(common_values.types_array[i], res))
except Exception as e:
@@ -255,12 +255,12 @@ def test_update_table_with_one_block(self, suffix):
values = [{"c1": 1, "c2": 2} for _ in range(8192)]
# values = [{"c1": 1, "c2": 2}]
table_obj.insert(values)
- insert_res = table_obj.output(["*"]).to_df()
+ insert_res, extra_result = table_obj.output(["*"]).to_df()
print(insert_res)
# update
table_obj.update("c1 = 1", {"c2": 20})
- delete_res = table_obj.output(["*"]).to_df()
+ delete_res, extra_result = table_obj.output(["*"]).to_df()
print(delete_res)
res = db_obj.drop_table("test_update_table_with_one_block"+suffix, ConflictType.Error)
@@ -278,12 +278,12 @@ def test_update_table_with_one_segment(self, suffix):
for i in range(1024):
values = [{"c1": 1, "c2": 2} for _ in range(8)]
table_obj.insert(values)
- insert_res = table_obj.output(["*"]).to_df()
+ insert_res, extra_result = table_obj.output(["*"]).to_df()
print(insert_res)
# update
table_obj.update("c1 = 1", {"c2": 20})
- delete_res = table_obj.output(["*"]).to_df()
+ delete_res, extra_result = table_obj.output(["*"]).to_df()
print(delete_res)
res = db_obj.drop_table("test_update_table_with_one_segment"+suffix, ConflictType.Error)
@@ -299,17 +299,17 @@ def test_update_before_delete(self, suffix):
# insert
values = [{"c1": 1, "c2": 2} for _ in range(8)]
table_obj.insert(values)
- insert_res = table_obj.output(["*"]).to_df()
+ insert_res, extra_result = table_obj.output(["*"]).to_df()
print(insert_res)
# delete
table_obj.delete("c1 = 1")
- delete_res = table_obj.output(["*"]).to_df()
+ delete_res, extra_result = table_obj.output(["*"]).to_df()
print(delete_res)
# update
table_obj.update("c1 = 1", {"c2": 20})
- update_res = table_obj.output(["*"]).to_df()
+ update_res, extra_result = table_obj.output(["*"]).to_df()
print(update_res)
res = db_obj.drop_table("test_update_before_delete"+suffix, ConflictType.Error)
@@ -325,12 +325,12 @@ def test_update_inserted_data(self, suffix):
# insert
values = [{"c1": 1, "c2": 2} for _ in range(8)]
table_obj.insert(values)
- insert_res = table_obj.output(["*"]).to_df()
+ insert_res, extra_result = table_obj.output(["*"]).to_df()
print(insert_res)
# update
table_obj.update("c1 = 1", {"c2": 21})
- update_res = table_obj.output(["*"]).to_df()
+ update_res, extra_result = table_obj.output(["*"]).to_df()
print(update_res)
res = db_obj.drop_table("test_update_inserted_data"+suffix, ConflictType.Error)
@@ -349,14 +349,14 @@ def test_update_inserted_long_before(self, suffix):
# insert
values = [{"c1": 1, "c2": 2} for _ in range(8)]
table_obj.insert(values)
- insert_res = table_obj.output(["*"]).to_df()
+ insert_res, extra_result = table_obj.output(["*"]).to_df()
print(insert_res)
time.sleep(60)
# update
table_obj.update("c1 = 1", {"c2": 21})
- update_res = table_obj.output(["*"]).to_df()
+ update_res, extra_result = table_obj.output(["*"]).to_df()
print(update_res)
res = db_obj.drop_table("test_update_inserted_long_before"+suffix, ConflictType.Error)
@@ -374,7 +374,7 @@ def test_update_dropped_table(self, suffix):
# update
with pytest.raises(InfinityException) as e:
table_obj.update("c1 = 1", {"c2": 21})
- update_res = table_obj.output(["*"]).to_df()
+ update_res, extra_result = table_obj.output(["*"]).to_df()
print(update_res)
assert e.type == InfinityException
@@ -390,7 +390,7 @@ def test_update_invalid_value_1(self, types, types_example, suffix):
ConflictType.Error)
# update
table_obj.update("c1 = 1", {"c2": types_example})
- update_res = table_obj.output(["*"]).to_df()
+ update_res, extra_result = table_obj.output(["*"]).to_df()
print(update_res)
res = db_obj.drop_table("test_update_invalid_value"+suffix, ConflictType.Error)
@@ -411,7 +411,7 @@ def test_update_new_value(self, types, types_example, suffix):
# update
table_obj.update("c1 = 1", {"c2": types_example})
- update_res = table_obj.output(["*"]).to_df()
+ update_res, extra_result = table_obj.output(["*"]).to_df()
print(update_res)
res = db_obj.drop_table("test_update_new_value"+suffix, ConflictType.Error)
@@ -435,7 +435,7 @@ def test_update_invalid_value_2(self, types, types_example, suffix):
assert e.type == InfinityException
assert e.value.args[0] == ErrorCode.NOT_SUPPORTED_TYPE_CONVERSION
- update_res = table_obj.output(["*"]).to_df()
+ update_res, extra_result = table_obj.output(["*"]).to_df()
print(update_res)
res = db_obj.drop_table("test_update_invalid_value"+suffix, ConflictType.Error)
@@ -462,12 +462,12 @@ def test_valid_filter_expression(self, filter_list, types_example, suffix):
for i in range(10):
values = [{"c1": i, "c2": 3.0} for _ in range(10)]
table_obj.insert(values)
- insert_res = table_obj.output(["*"]).to_df()
+ insert_res, extra_result = table_obj.output(["*"]).to_df()
print(insert_res)
# delete
table_obj.update(filter_list, {"c2": types_example})
- delete_res = table_obj.output(["*"]).to_df()
+ delete_res, extra_result = table_obj.output(["*"]).to_df()
print(delete_res)
res = db_obj.drop_table("test_filter_expression"+suffix, ConflictType.Error)
@@ -495,14 +495,14 @@ def test_invalid_filter_expression(self, filter_list, types_example, suffix):
for i in range(10):
values = [{"c1": i, "c2": 3.0} for _ in range(10)]
table_obj.insert(values)
- insert_res = table_obj.output(["*"]).to_df()
+ insert_res, extra_result = table_obj.output(["*"]).to_df()
print(insert_res)
# delete
with pytest.raises(Exception):
table_obj.update(filter_list, {"c2": types_example})
- delete_res = table_obj.output(["*"]).to_df()
+ delete_res, extra_result = table_obj.output(["*"]).to_df()
print(delete_res)
res = db_obj.drop_table("test_invalid_filter_expression"+suffix, ConflictType.Error)
@@ -531,12 +531,12 @@ def test_update_sparse_vector(self, suffix):
}
])
- res = table_instance.output(["*"]).to_pl()
+ res, extra_result = table_instance.output(["*"]).to_pl()
print(res)
table_instance.update("id = 1", {"content_demo_sparse":SparseVector([1, 2, 3], [1.1, 1.1, 1.1])})
- res = table_instance.output(["*"]).to_pl()
+ res, extra_result = table_instance.output(["*"]).to_pl()
print(res)
res = db_obj.drop_table("test_update_sparse_vector"+suffix, ConflictType.Error)
diff --git a/src/admin/admin_executor.cpp b/src/admin/admin_executor.cpp
index efb92b1050..d81135ee33 100644
--- a/src/admin/admin_executor.cpp
+++ b/src/admin/admin_executor.cpp
@@ -4111,6 +4111,7 @@ QueryResult AdminExecutor::ShowCurrentNode(QueryContext *query_context, const Ad
}
}
} else {
+ NodeRole server_role = InfinityContext::instance().GetServerRole();
{
SizeT column_id = 0;
{
@@ -4121,7 +4122,7 @@ QueryResult AdminExecutor::ShowCurrentNode(QueryContext *query_context, const Ad
++column_id;
{
- Value value = Value::MakeVarchar(ToString(InfinityContext::instance().GetServerRole()));
+ Value value = Value::MakeVarchar(ToString(server_role));
ValueExpression value_expr(value);
value_expr.AppendToChunk(output_block_ptr->column_vectors[column_id]);
}
@@ -4139,7 +4140,7 @@ QueryResult AdminExecutor::ShowCurrentNode(QueryContext *query_context, const Ad
{
bool infinity_started = InfinityContext::instance().InfinityContextStarted();
String infinity_status("started");
- if (!infinity_started) {
+ if (!infinity_started && server_role != NodeRole::kAdmin) {
infinity_status = "starting";
}
Value value = Value::MakeVarchar(infinity_status);
@@ -4253,10 +4254,12 @@ QueryResult AdminExecutor::SetRole(QueryContext *query_context, const AdminState
status = InfinityContext::instance().ChangeServerRole(NodeRole::kFollower, false, node_name, leader_ip, leader_port);
if (!status.ok()) {
- LOG_INFO("Fail to change to FOLLOWER role");
- Status restore_status = InfinityContext::instance().ChangeServerRole(NodeRole::kAdmin);
- if (!restore_status.ok()) {
- UnrecoverableError(fmt::format("Fail to change node role to FOLLOWER, then fail to restore to ADMIN."));
+ if(status.code() != ErrorCode::kCantSwitchRole) {
+ LOG_INFO("Fail to change to FOLLOWER role");
+ Status restore_status = InfinityContext::instance().ChangeServerRole(NodeRole::kAdmin);
+ if (!restore_status.ok()) {
+ UnrecoverableError(fmt::format("Fail to change node role to FOLLOWER, then fail to restore to ADMIN."));
+ }
}
} else {
LOG_INFO("Start in FOLLOWER role");
@@ -4296,10 +4299,12 @@ QueryResult AdminExecutor::SetRole(QueryContext *query_context, const AdminState
status = InfinityContext::instance().ChangeServerRole(NodeRole::kLearner, false, node_name, leader_ip, leader_port);
if (!status.ok()) {
- LOG_INFO("Fail to change to LEARNER role");
- Status restore_status = InfinityContext::instance().ChangeServerRole(NodeRole::kAdmin);
- if (!restore_status.ok()) {
- UnrecoverableError(fmt::format("Fail to change node role to FOLLOWER, then fail to restore to ADMIN."));
+ if(status.code() != ErrorCode::kCantSwitchRole) {
+ LOG_INFO("Fail to change to LEARNER role");
+ Status restore_status = InfinityContext::instance().ChangeServerRole(NodeRole::kAdmin);
+ if (!restore_status.ok()) {
+ UnrecoverableError(fmt::format("Fail to change node role to FOLLOWER, then fail to restore to ADMIN."));
+ }
}
} else {
LOG_INFO("Start in LEARNER role");
diff --git a/src/common/analyzer/analyzer.cppm b/src/common/analyzer/analyzer.cppm
index 1ee76ed136..34c8f197de 100644
--- a/src/common/analyzer/analyzer.cppm
+++ b/src/common/analyzer/analyzer.cppm
@@ -34,8 +34,6 @@ public:
virtual ~Analyzer() = default;
- void SetInnerAnalyzer(SharedPtr &analyzer) { inner_analyzer_ = analyzer; }
-
void SetExtractSpecialChar(bool extract_special_char, bool convert_to_placeholder = true) {
extract_special_char_ = extract_special_char;
convert_to_placeholder_ = convert_to_placeholder;
@@ -43,6 +41,8 @@ public:
void SetCharOffset(bool set) { get_char_offset_ = set; }
+ void SetTokenizerConfig(const TokenizeConfig &conf) { tokenizer_.SetConfig(conf); }
+
int Analyze(const Term &input, TermList &output) {
void *array[2] = {&output, this};
return AnalyzeImpl(input, &array, &Analyzer::AppendTermList);
@@ -84,7 +84,6 @@ protected:
Tokenizer tokenizer_;
- SharedPtr inner_analyzer_;
/// Whether including speical characters (e.g. puncutations) in the result.
bool extract_special_char_;
diff --git a/src/common/analyzer/analyzer_pool.cpp b/src/common/analyzer/analyzer_pool.cpp
index c8b60cc73f..660a1727eb 100644
--- a/src/common/analyzer/analyzer_pool.cpp
+++ b/src/common/analyzer/analyzer_pool.cpp
@@ -23,6 +23,7 @@ import third_party;
import config;
import infinity_context;
import analyzer;
+import tokenizer;
import stemmer;
import chinese_analyzer;
import traditional_chinese_analyzer;
@@ -31,7 +32,8 @@ import korean_analyzer;
import standard_analyzer;
import ngram_analyzer;
import rag_analyzer;
-import keyword_analyzer;
+import whitespace_analyzer;
+import ik_analyzer;
import logger;
namespace infinity {
@@ -75,6 +77,7 @@ Tuple, Status> AnalyzerPool::GetAnalyzer(const std::string_v
Config *config = InfinityContext::instance().config();
if (config == nullptr) {
// InfinityContext has not been initialized.
+ // For unit test only
path = "/var/infinity/resource";
} else {
path = config->ResourcePath();
@@ -107,6 +110,7 @@ Tuple, Status> AnalyzerPool::GetAnalyzer(const std::string_v
Config *config = InfinityContext::instance().config();
if (config == nullptr) {
// InfinityContext has not been initialized.
+ // For unit test only
path = "/var/infinity/resource";
} else {
path = config->ResourcePath();
@@ -140,6 +144,7 @@ Tuple, Status> AnalyzerPool::GetAnalyzer(const std::string_v
Config *config = InfinityContext::instance().config();
if (config == nullptr) {
// InfinityContext has not been initialized.
+ // For unit test only
path = "/var/infinity/resource";
} else {
path = config->ResourcePath();
@@ -164,6 +169,39 @@ Tuple, Status> AnalyzerPool::GetAnalyzer(const std::string_v
analyzer->SetFineGrained(fine_grained);
return {std::move(analyzer), Status::OK()};
}
+ case Str2Int(IK.data()): {
+ //
+ Analyzer *prototype = cache_[IK].get();
+ if (prototype == nullptr) {
+ String path;
+ Config *config = InfinityContext::instance().config();
+ if (config == nullptr) {
+ // InfinityContext has not been initialized.
+ // For unit test only
+ path = "/var/infinity/resource";
+ } else {
+ path = config->ResourcePath();
+ }
+ UniquePtr analyzer = MakeUnique(std::move(path));
+ Status load_status = analyzer->Load();
+ if (!load_status.ok()) {
+ return {nullptr, load_status};
+ }
+ prototype = analyzer.get();
+ cache_[IK] = std::move(analyzer);
+ }
+ bool fine_grained = false;
+ const char *str = name.data();
+ while (*str != '\0' && *str != '-') {
+ str++;
+ }
+ if (strcmp(str, "-fine") == 0) {
+ fine_grained = true;
+ }
+ UniquePtr analyzer = MakeUnique(*reinterpret_cast(prototype));
+ analyzer->SetFineGrained(fine_grained);
+ return {std::move(analyzer), Status::OK()};
+ }
case Str2Int(JAPANESE.data()): {
Analyzer *prototype = cache_[JAPANESE].get();
if (prototype == nullptr) {
@@ -171,6 +209,7 @@ Tuple, Status> AnalyzerPool::GetAnalyzer(const std::string_v
Config *config = InfinityContext::instance().config();
if (config == nullptr) {
// InfinityContext has not been initialized.
+ // For unit test only
path = "/var/infinity/resource";
} else {
path = config->ResourcePath();
@@ -192,6 +231,7 @@ Tuple, Status> AnalyzerPool::GetAnalyzer(const std::string_v
Config *config = InfinityContext::instance().config();
if (config == nullptr) {
// InfinityContext has not been initialized.
+ // For unit test only
path = "/var/infinity/resource";
} else {
path = config->ResourcePath();
@@ -208,6 +248,19 @@ Tuple, Status> AnalyzerPool::GetAnalyzer(const std::string_v
}
case Str2Int(STANDARD.data()): {
UniquePtr analyzer = MakeUnique();
+
+ TokenizeConfig token_config;
+ // String allow_str("-");
+ String divide_str("@#$");
+ String unite_str("/");
+ // Allow("-"): 2012-02-14 => 2012-02-14
+ // Divide: delimiters
+ // Unite: 2012/02/14 => 20120214
+ // token_config.AddAllows(allow_str);
+ token_config.AddDivides(divide_str);
+ token_config.AddUnites(unite_str);
+ analyzer->SetTokenizerConfig(token_config);
+
Language lang = STEM_LANG_ENGLISH;
const char *str = name.data();
while (*str != '\0' && *str != '-') {
@@ -269,7 +322,10 @@ Tuple, Status> AnalyzerPool::GetAnalyzer(const std::string_v
return {MakeUnique(ngram), Status::OK()};
}
case Str2Int(KEYWORD.data()): {
- return {MakeUnique(), Status::OK()};
+ return {MakeUnique(), Status::OK()};
+ }
+ case Str2Int(WHITESPACE.data()): {
+ return {MakeUnique(), Status::OK()};
}
default: {
if(std::filesystem::is_regular_file(name)) {
diff --git a/src/common/analyzer/analyzer_pool.cppm b/src/common/analyzer/analyzer_pool.cppm
index 6a7aa61a8f..46a2e0cf08 100644
--- a/src/common/analyzer/analyzer_pool.cppm
+++ b/src/common/analyzer/analyzer_pool.cppm
@@ -40,7 +40,9 @@ public:
static constexpr std::string_view STANDARD = "standard";
static constexpr std::string_view NGRAM = "ngram";
static constexpr std::string_view RAG = "rag";
+ static constexpr std::string_view IK = "ik";
static constexpr std::string_view KEYWORD = "keyword";
+ static constexpr std::string_view WHITESPACE = "whitespace";
private:
CacheType cache_{};
diff --git a/src/common/analyzer/common_analyzer.cpp b/src/common/analyzer/common_analyzer.cpp
index 63b1a4b3de..6d45eda603 100644
--- a/src/common/analyzer/common_analyzer.cpp
+++ b/src/common/analyzer/common_analyzer.cpp
@@ -29,14 +29,7 @@ constexpr int MAX_TUPLE_LENGTH = 1024;
CommonLanguageAnalyzer::CommonLanguageAnalyzer()
: Analyzer(), lowercase_string_buffer_(term_string_buffer_limit_), stemmer_(MakeUnique()), case_sensitive_(false), contain_lower_(false),
- extract_eng_stem_(true), extract_synonym_(false), cjk_(false), remove_stopwords_(false) {
- TokenizeConfig token_config;
- String divide_str("@#$");
- String unite_str("/");
- token_config.AddDivides(divide_str);
- token_config.AddUnites(unite_str);
- tokenizer_.SetConfig(token_config);
-}
+ extract_eng_stem_(true), extract_synonym_(false), cjk_(false), remove_stopwords_(false) {}
CommonLanguageAnalyzer::~CommonLanguageAnalyzer() {}
diff --git a/src/common/analyzer/ik/analyze_context.cpp b/src/common/analyzer/ik/analyze_context.cpp
index 8c21c3bc51..b98949e4eb 100644
--- a/src/common/analyzer/ik/analyze_context.cpp
+++ b/src/common/analyzer/ik/analyze_context.cpp
@@ -1,11 +1,9 @@
module;
-#include
#include
-#include
-#include
#include
-#include
+
+module analyze_context;
import stl;
import quick_sort_set;
@@ -14,10 +12,8 @@ import lexeme;
import lexeme_path;
import ik_dict;
-module analyze_context;
-
namespace infinity {
-AnalyzeContext::AnalyzeContext(Dictionary *dict) : dict_(dict) {
+AnalyzeContext::AnalyzeContext(Dictionary *dict, bool ik_smart) : dict_(dict), ik_smart_(ik_smart) {
buff_offset_ = 0;
cursor_ = 0;
last_useless_char_num_ = 0;
@@ -52,11 +48,7 @@ bool AnalyzeContext::MoveCursor() {
}
}
-bool AnalyzeContext::NeedRefillBuffer() const {
- return available_ == BUFF_SIZE && cursor_ < available_ - 1 && cursor_ > available_ - BUFF_EXHAUST_CRITICAL && !IsBufferLocked();
-}
-
-void AnalyzeContext::AddLexeme(Lexeme *lexeme) { org_lexemes_.AddLexeme(lexeme); }
+bool AnalyzeContext::AddLexeme(Lexeme *lexeme) { return org_lexemes_->AddLexeme(lexeme); }
void AnalyzeContext::AddLexemePath(LexemePath *path) {
if (path != nullptr) {
@@ -114,14 +106,16 @@ Lexeme *AnalyzeContext::GetNextLexeme() {
result->SetLexemeText(
std::wstring(segment_buff_.begin() + result->GetBegin(), segment_buff_.begin() + result->GetBegin() + result->GetLength()));
break;
+ } else {
+ delete result;
+ result = nullptr;
}
}
return result;
}
void AnalyzeContext::Reset() {
- buff_locker_.clear();
- org_lexemes_ = QuickSortSet();
+ org_lexemes_ = MakeUnique();
available_ = 0;
buff_offset_ = 0;
char_types_.clear();
@@ -132,6 +126,8 @@ void AnalyzeContext::Reset() {
}
void AnalyzeContext::Compound(Lexeme *result) {
+ if (!ik_smart_)
+ return;
if (!results_.empty()) {
if (Lexeme::TYPE_ARABIC == result->GetLexemeType()) {
Lexeme *next_lexeme = results_.front();
@@ -142,7 +138,9 @@ void AnalyzeContext::Compound(Lexeme *result) {
append_ok = result->Append(*next_lexeme, Lexeme::TYPE_CQUAN);
}
if (append_ok) {
+ Lexeme *r = results_.front();
results_.pop_front();
+ delete r;
}
}
if (Lexeme::TYPE_CNUM == result->GetLexemeType() && !results_.empty()) {
@@ -152,7 +150,9 @@ void AnalyzeContext::Compound(Lexeme *result) {
append_ok = result->Append(*next_lexeme, Lexeme::TYPE_CQUAN);
}
if (append_ok) {
+ Lexeme *r = results_.front();
results_.pop_front();
+ delete r;
}
}
}
diff --git a/src/common/analyzer/ik/analyze_context.cppm b/src/common/analyzer/ik/analyze_context.cppm
index f1d3ddb49d..8023cfbf35 100644
--- a/src/common/analyzer/ik/analyze_context.cppm
+++ b/src/common/analyzer/ik/analyze_context.cppm
@@ -31,9 +31,7 @@ public:
int last_useless_char_num_;
- HashSet buff_locker_;
-
- QuickSortSet org_lexemes_;
+ UniquePtr org_lexemes_;
HashMap> path_map_;
@@ -41,7 +39,9 @@ public:
Dictionary *dict_{nullptr};
- AnalyzeContext(Dictionary *dict);
+ bool ik_smart_{true};
+
+ AnalyzeContext(Dictionary *dict, bool is_smart = true);
int GetCursor() const { return cursor_; }
@@ -59,23 +59,15 @@ public:
bool MoveCursor();
- void LockBuffer(const std::wstring &segmenter_name) { buff_locker_.insert(segmenter_name); }
-
- void UnlockBuffer(const std::wstring &segmenter_name) { buff_locker_.erase(segmenter_name); }
-
- bool IsBufferLocked() const { return !buff_locker_.empty(); }
-
bool IsBufferConsumed() const { return cursor_ == available_ - 1; }
- bool NeedRefillBuffer() const;
-
void MarkBufferOffset() { buff_offset_ += cursor_; }
- void AddLexeme(Lexeme *lexeme);
+ bool AddLexeme(Lexeme *lexeme);
void AddLexemePath(LexemePath *path);
- QuickSortSet *GetOrgLexemes() { return &(org_lexemes_); }
+ QuickSortSet *GetOrgLexemes() { return org_lexemes_.get(); }
void OutputToResult();
diff --git a/src/common/analyzer/ik/arbitrator.cpp b/src/common/analyzer/ik/arbitrator.cpp
index 608233bbc3..050457e26a 100644
--- a/src/common/analyzer/ik/arbitrator.cpp
+++ b/src/common/analyzer/ik/arbitrator.cpp
@@ -2,13 +2,14 @@ module;
#include
+module arbitrator;
+
import stl;
import analyze_context;
import lexeme;
import lexeme_path;
import quick_sort_set;
-
-module arbitrator;
+import third_party;
namespace infinity {
@@ -16,36 +17,37 @@ void IKArbitrator::Process(AnalyzeContext *context, bool use_smart) {
QuickSortSet *org_lexemes = context->GetOrgLexemes();
Lexeme *org_lexeme = org_lexemes->PollFirst();
- LexemePath *cross_path = new LexemePath();
+ UniquePtr cross_path = MakeUnique();
while (org_lexeme != nullptr) {
if (!cross_path->AddCrossLexeme(org_lexeme)) {
if (cross_path->Size() == 1 || !use_smart) {
- context->AddLexemePath(cross_path);
+ context->AddLexemePath(cross_path.release());
} else {
QuickSortSet::Cell *head_cell = cross_path->GetHead();
LexemePath *judge_result = Judge(head_cell, cross_path->GetPathLength());
context->AddLexemePath(judge_result);
- delete cross_path;
}
-
- cross_path = new LexemePath();
+ cross_path = MakeUnique();
cross_path->AddCrossLexeme(org_lexeme);
}
org_lexeme = org_lexemes->PollFirst();
}
if (cross_path->Size() == 1 || !use_smart) {
- context->AddLexemePath(cross_path);
+ context->AddLexemePath(cross_path.release());
} else {
QuickSortSet::Cell *head_cell = cross_path->GetHead();
LexemePath *judge_result = Judge(head_cell, cross_path->GetPathLength());
context->AddLexemePath(judge_result);
- delete cross_path;
}
}
+struct CompareLexemePath {
+ bool operator()(const UniquePtr &lhs, const UniquePtr &rhs) const { return lhs->CompareTo(*rhs); }
+};
+
LexemePath *IKArbitrator::Judge(QuickSortSet::Cell *lexeme_cell, int fulltext_length) {
- Set> path_options;
+ std::set, CompareLexemePath> path_options;
UniquePtr option = MakeUnique();
std::stack lexeme_stack = ForwardPath(lexeme_cell, option.get());
@@ -68,7 +70,9 @@ std::stack IKArbitrator::ForwardPath(QuickSortSet::Cell *l
std::stack conflict_stack;
QuickSortSet::Cell *c = lexeme_cell;
while (c != nullptr && c->GetLexeme() != nullptr) {
- if (!option->AddNotCrossLexeme(c->GetLexeme())) {
+ Lexeme *lexeme = c->GetLexeme()->Copy();
+ if (!option->AddNotCrossLexeme(lexeme)) {
+ delete lexeme;
conflict_stack.push(c);
}
c = c->GetNext();
@@ -78,7 +82,8 @@ std::stack IKArbitrator::ForwardPath(QuickSortSet::Cell *l
void IKArbitrator::BackPath(Lexeme *l, LexemePath *option) {
while (option->CheckCross(l)) {
- option->RemoveTail();
+ Lexeme *lexeme = option->RemoveTail();
+ delete lexeme;
}
}
diff --git a/src/common/analyzer/ik/character_util.cppm b/src/common/analyzer/ik/character_util.cppm
index 75be1e5c8d..6b06ad0286 100644
--- a/src/common/analyzer/ik/character_util.cppm
+++ b/src/common/analyzer/ik/character_util.cppm
@@ -1,7 +1,7 @@
module;
-#include
-#include
+// #include
+// #include
#include
export module character_util;
@@ -81,9 +81,10 @@ public:
}
static std::wstring UTF8ToUTF16(const std::string &utf8_str) {
+ // std::wstring_convert, wchar_t> converter;
+ // return converter.from_bytes(utf8_str);
std::wstring utf16_str;
std::string_view utf8_view(utf8_str);
-
while (!utf8_view.empty()) {
if ((utf8_view[0] & 0x80) == 0) { // 1-byte character
utf16_str.push_back(static_cast(utf8_view[0]));
@@ -120,9 +121,10 @@ public:
}
static std::string UTF16ToUTF8(const std::wstring &utf16_str) {
+ // std::wstring_convert, wchar_t> converter;
+ // return converter.to_bytes(utf16_str);
std::string utf8_str;
std::wstring_view utf16_view(utf16_str);
-
while (!utf16_view.empty()) {
if (utf16_view[0] < 0xD800 || utf16_view[0] > 0xDFFF) { // Basic Multilingual Plane
uint32_t code_point = utf16_view[0];
diff --git a/src/common/analyzer/ik/cjk_segmenter.cpp b/src/common/analyzer/ik/cjk_segmenter.cpp
index b9b728cd78..2ff6a9dc3e 100644
--- a/src/common/analyzer/ik/cjk_segmenter.cpp
+++ b/src/common/analyzer/ik/cjk_segmenter.cpp
@@ -2,6 +2,8 @@ module;
#include
+module cjk_segmenter;
+
import stl;
import hit;
import segmenter;
@@ -9,47 +11,52 @@ import analyze_context;
import lexeme;
import character_util;
import ik_dict;
-
-module cjk_segmenter;
+import third_party;
namespace infinity {
const std::wstring CJKSegmenter::SEGMENTER_NAME = L"CJK_SEGMENTER";
-CJKSegmenter::CJKSegmenter(Dictionary *dict) : dict_(dict) { tmp_hits_ = List(); }
+CJKSegmenter::CJKSegmenter(Dictionary *dict) : dict_(dict) {}
void CJKSegmenter::Analyze(AnalyzeContext *context) {
if (CharacterUtil::CHAR_USELESS != context->GetCurrentCharType()) {
if (!tmp_hits_.empty()) {
- std::vector tmp_array(tmp_hits_.begin(), tmp_hits_.end());
- for (Hit *hit : tmp_array) {
+ for (auto it = tmp_hits_.begin(); it != tmp_hits_.end();) {
+ Hit *hit = (*it).get();
hit = dict_->MatchWithHit(context->GetSegmentBuff(), context->GetCursor(), hit);
+
if (hit->IsMatch()) {
- Lexeme *newLexeme =
+ Lexeme *new_lexeme =
new Lexeme(context->GetBufferOffset(), hit->GetBegin(), context->GetCursor() - hit->GetBegin() + 1, Lexeme::TYPE_CNWORD);
- context->AddLexeme(newLexeme);
+ if (!context->AddLexeme(new_lexeme))
+ delete new_lexeme;
if (!hit->IsPrefix()) {
- tmp_hits_.remove(hit);
+ it = tmp_hits_.erase(it);
+ } else {
+ ++it;
}
} else if (hit->IsUnmatch()) {
- tmp_hits_.remove(hit);
+ it = tmp_hits_.erase(it);
+ } else {
+ ++it;
}
}
}
- Hit *single_char_hit = dict_->MatchInMainDict(context->GetSegmentBuff(), context->GetCursor(), 1);
+ UniquePtr single_char_hit(dict_->MatchInMainDict(context->GetSegmentBuff(), context->GetCursor(), 1));
if (single_char_hit->IsMatch()) {
- Lexeme *newLexeme = new Lexeme(context->GetBufferOffset(), context->GetCursor(), 1, Lexeme::TYPE_CNWORD);
- context->AddLexeme(newLexeme);
+ Lexeme *new_lexeme = new Lexeme(context->GetBufferOffset(), context->GetCursor(), 1, Lexeme::TYPE_CNWORD);
+ if (!context->AddLexeme(new_lexeme))
+ delete new_lexeme;
if (single_char_hit->IsPrefix()) {
- tmp_hits_.push_back(single_char_hit);
+ tmp_hits_.push_back(std::move(single_char_hit));
}
} else if (single_char_hit->IsPrefix()) {
- tmp_hits_.push_back(single_char_hit);
+ tmp_hits_.push_back(std::move(single_char_hit));
}
-
} else {
tmp_hits_.clear();
}
@@ -57,12 +64,6 @@ void CJKSegmenter::Analyze(AnalyzeContext *context) {
if (context->IsBufferConsumed()) {
tmp_hits_.clear();
}
-
- if (tmp_hits_.empty()) {
- context->UnlockBuffer(SEGMENTER_NAME);
- } else {
- context->LockBuffer(SEGMENTER_NAME);
- }
}
void CJKSegmenter::Reset() { tmp_hits_.clear(); }
diff --git a/src/common/analyzer/ik/cjk_segmenter.cppm b/src/common/analyzer/ik/cjk_segmenter.cppm
index 054c8d5705..08486b37f5 100644
--- a/src/common/analyzer/ik/cjk_segmenter.cppm
+++ b/src/common/analyzer/ik/cjk_segmenter.cppm
@@ -16,7 +16,7 @@ export class CJKSegmenter : public Segmenter {
public:
static const std::wstring SEGMENTER_NAME;
- List tmp_hits_;
+ List> tmp_hits_;
Dictionary *dict_{nullptr};
diff --git a/src/common/analyzer/ik/cn_quantifier_segmenter.cpp b/src/common/analyzer/ik/cn_quantifier_segmenter.cpp
index 319e4de231..ebbf43048c 100644
--- a/src/common/analyzer/ik/cn_quantifier_segmenter.cpp
+++ b/src/common/analyzer/ik/cn_quantifier_segmenter.cpp
@@ -2,6 +2,8 @@ module;
#include
+module cn_quantifier_segmenter;
+
import stl;
import hit;
import segmenter;
@@ -10,8 +12,6 @@ import lexeme;
import character_util;
import ik_dict;
-module cn_quantifier_segmenter;
-
namespace infinity {
const std::wstring CNQuantifierSegmenter::SEGMENTER_NAME = L"QUAN_SEGMENTER";
@@ -27,23 +27,12 @@ void CNQuantifierSegmenter::InitChnNumber() {
CNQuantifierSegmenter::CNQuantifierSegmenter(Dictionary *dict) : dict_(dict) {
nstart_ = -1;
nend_ = -1;
- count_hits_ = List();
InitChnNumber();
}
void CNQuantifierSegmenter::Analyze(AnalyzeContext *context) {
- // 处理中文数词
ProcessCNumber(context);
- // 处理中文量词
ProcessCount(context);
-
- // 判断是否锁定缓冲区
- if (nstart_ == -1 && nend_ == -1 && count_hits_.empty()) {
- // 对缓冲区解锁
- context->UnlockBuffer(SEGMENTER_NAME);
- } else {
- context->LockBuffer(SEGMENTER_NAME);
- }
}
void CNQuantifierSegmenter::Reset() {
@@ -81,34 +70,40 @@ void CNQuantifierSegmenter::ProcessCount(AnalyzeContext *context) {
if (CharacterUtil::CHAR_CHINESE == context->GetCurrentCharType()) {
if (!count_hits_.empty()) {
- std::vector tmp_array(count_hits_.begin(), count_hits_.end());
- for (Hit *hit : tmp_array) {
+ for (auto it = count_hits_.begin(); it != count_hits_.end();) {
+ Hit *hit = (*it).get();
hit = dict_->MatchWithHit(context->GetSegmentBuff(), context->GetCursor(), hit);
if (hit->IsMatch()) {
Lexeme *new_lexeme =
new Lexeme(context->GetBufferOffset(), hit->GetBegin(), context->GetCursor() - hit->GetBegin() + 1, Lexeme::TYPE_COUNT);
- context->AddLexeme(new_lexeme);
+ if (!context->AddLexeme(new_lexeme))
+ delete new_lexeme;
if (!hit->IsPrefix()) {
- count_hits_.remove(hit);
+ it = count_hits_.erase(it);
+ } else {
+ ++it;
}
} else if (hit->IsUnmatch()) {
- count_hits_.remove(hit);
+ it = count_hits_.erase(it);
+ } else {
+ ++it;
}
}
}
- Hit *single_char_hit = dict_->MatchInQuantifierDict(context->GetSegmentBuff(), context->GetCursor(), 1);
+ UniquePtr single_char_hit(dict_->MatchInQuantifierDict(context->GetSegmentBuff(), context->GetCursor(), 1));
if (single_char_hit->IsMatch()) {
Lexeme *new_lexeme = new Lexeme(context->GetBufferOffset(), context->GetCursor(), 1, Lexeme::TYPE_COUNT);
- context->AddLexeme(new_lexeme);
+ if (!context->AddLexeme(new_lexeme))
+ delete new_lexeme;
if (single_char_hit->IsPrefix()) {
- count_hits_.push_back(single_char_hit);
+ count_hits_.push_back(std::move(single_char_hit));
}
} else if (single_char_hit->IsPrefix()) {
- count_hits_.push_back(single_char_hit);
+ count_hits_.push_back(std::move(single_char_hit));
}
} else {
@@ -138,7 +133,8 @@ bool CNQuantifierSegmenter::NeedCountScan(AnalyzeContext *context) {
void CNQuantifierSegmenter::OutputNumLexeme(AnalyzeContext *context) {
if (nstart_ > -1 && nend_ > -1) {
Lexeme *new_lexeme = new Lexeme(context->GetBufferOffset(), nstart_, nend_ - nstart_ + 1, Lexeme::TYPE_CNUM);
- context->AddLexeme(new_lexeme);
+ if (!context->AddLexeme(new_lexeme))
+ delete new_lexeme;
}
}
diff --git a/src/common/analyzer/ik/cn_quantifier_segmenter.cppm b/src/common/analyzer/ik/cn_quantifier_segmenter.cppm
index a44b2712b2..51d03bb684 100644
--- a/src/common/analyzer/ik/cn_quantifier_segmenter.cppm
+++ b/src/common/analyzer/ik/cn_quantifier_segmenter.cppm
@@ -23,7 +23,7 @@ public:
int nstart_;
int nend_;
- List count_hits_;
+ List> count_hits_;
Dictionary *dict_{nullptr};
diff --git a/src/common/analyzer/ik/dict.cpp b/src/common/analyzer/ik/dict.cpp
index 407dd940ae..e2f84ffb33 100644
--- a/src/common/analyzer/ik/dict.cpp
+++ b/src/common/analyzer/ik/dict.cpp
@@ -2,6 +2,7 @@ module;
#include
#include
+#include
#include
#include
#include
@@ -13,22 +14,46 @@ import hit;
import stl;
import status;
import character_util;
+import third_party;
namespace fs = std::filesystem;
namespace infinity {
-const String PATH_DIC_MAIN = "ik/main.dic";
-const String PATH_DIC_SURNAME = "ik/surname.dic";
-const String PATH_DIC_QUANTIFIER = "ik/quantifier.dic";
-const String PATH_DIC_SUFFIX = "ik/suffix.dic";
-const String PATH_DIC_PREP = "ik/preposition.dic";
-const String PATH_DIC_STOP = "ik/stopword.dic";
-const String FILE_NAME = "ik/IKAnalyzer.cfg.xml";
-const String EXT_DICT = "ik/ext_dict";
-const String EXT_STOP = "ik/ext_stopwords";
+const String PATH_DIC_MAIN = "main.dic";
+const String PATH_DIC_SURNAME = "surname.dic";
+const String PATH_DIC_QUANTIFIER = "quantifier.dic";
+const String PATH_DIC_SUFFIX = "suffix.dic";
+const String PATH_DIC_PREP = "preposition.dic";
+const String PATH_DIC_STOP = "stopword.dic";
+const String FILE_NAME = "IKAnalyzer.cfg.xml";
+const String EXT_DICT = "ext_dict";
+const String EXT_STOP = "ext_stopwords";
+
+bool IsSpaceOrNewline(char c) { return std::isspace(static_cast(c)) || c == '\n' || c == '\r'; }
+
+String Trim(const String &str) {
+ if (str.empty()) {
+ return str;
+ }
+
+ std::size_t start = 0;
+ while (start < str.size() && IsSpaceOrNewline(str[start])) {
+ ++start;
+ }
+
+ std::size_t end = str.size() - 1;
+ while (end > start && IsSpaceOrNewline(str[end])) {
+ --end;
+ }
+ return str.substr(start, end - start + 1);
+}
-Dictionary::Dictionary(const String &dir) : conf_dir_(dir) {}
+Dictionary::Dictionary(const String &dir) {
+ fs::path root(dir);
+ fs::path ik_root = root / "ik";
+ conf_dir_ = ik_root.string();
+}
Status Dictionary::Load() {
Status load_status;
@@ -93,8 +118,9 @@ Status Dictionary::LoadDictFile(DictSegment *dict, const String &file_path, bool
if (!is.is_open()) {
return Status::InvalidAnalyzerFile(file_path);
}
- std::string line;
+ String line;
while (std::getline(is, line)) {
+ line = Trim(line);
std::wstring word = CharacterUtil::UTF8ToUTF16(line);
if (!word.empty() && word[0] == L'\uFEFF') {
word = word.substr(1);
@@ -167,12 +193,13 @@ Hit *Dictionary::MatchInQuantifierDict(const Vector &char_array, int be
}
Hit *Dictionary::MatchWithHit(const Vector &char_array, int current_index, Hit *matched_hit) {
- DictSegment *ds = matched_hit->getMatchedDictSegment();
+ DictSegment *ds = matched_hit->GetMatchedDictSegment();
return ds->Match(char_array, current_index, 1, matched_hit);
}
bool Dictionary::IsStopWord(const Vector &char_array, int begin, int length) {
- return stop_words_->Match(char_array, begin, length)->IsMatch();
+ UniquePtr hit(stop_words_->Match(char_array, begin, length));
+ return hit->IsMatch();
}
Status Dictionary::LoadMainDict() {
@@ -194,7 +221,6 @@ Status Dictionary::LoadExtDict() {
Status load_status;
if (!ext_dict_files.empty()) {
for (const String &ext_dict_name : ext_dict_files) {
- std::cout << "[Dict Loading] " << ext_dict_name << std::endl;
String file = fs::path(conf_dir_) / fs::path(ext_dict_name).string();
load_status = LoadDictFile(main_dict_.get(), file, false, "Extra Dict");
if (!load_status.ok()) {
@@ -217,7 +243,6 @@ Status Dictionary::LoadStopWordDict() {
Vector ext_stopword_dict_files = GetExtStopWordDictionarys();
if (!ext_stopword_dict_files.empty()) {
for (const String &ext_stopword_dict_file : ext_stopword_dict_files) {
- std::cout << "[Dict Loading] " << ext_stopword_dict_file << std::endl;
String file = fs::path(conf_dir_) / fs::path(ext_stopword_dict_file).string();
load_status = LoadDictFile(stop_words_.get(), file, false, "Extra Stopwords");
if (!load_status.ok()) {
@@ -272,10 +297,11 @@ void Dictionary::ParseProperties(const String &content) {
std::stringstream ss(content);
String line;
while (std::getline(ss, line)) {
- size_t pos = line.find('=');
- if (pos != String::npos) {
- String key = line.substr(0, pos);
- String value = line.substr(pos + 1);
+ std::regex attribute_regex(R"#(([^<]+))#");
+ std::smatch match;
+ if (std::regex_search(line, match, attribute_regex)) {
+ std::string key = match[1].str();
+ std::string value = match[2].str();
props_[key] = value;
}
}
diff --git a/src/common/analyzer/ik/dict_segment.cpp b/src/common/analyzer/ik/dict_segment.cpp
index 52489a391e..969b4e27b3 100644
--- a/src/common/analyzer/ik/dict_segment.cpp
+++ b/src/common/analyzer/ik/dict_segment.cpp
@@ -1,15 +1,18 @@
module;
-import stl;
-import hit;
-
#include
module ik_dict_segment;
+import stl;
+import hit;
+import character_util;
+import third_party;
+
namespace infinity {
HashMap DictSegment::char_map_;
+
DictSegment::DictSegment(wchar_t node_char) : node_char_(node_char) {}
Hit *DictSegment::Match(const Vector &char_array, int begin, int length, Hit *search_hit) {
@@ -24,20 +27,19 @@ Hit *DictSegment::Match(const Vector &char_array, int begin, int length
wchar_t key_char = char_array[begin];
DictSegment *ds = nullptr;
- Vector> &segment_array = children_array_;
- HashMap> &segment_map = children_map_;
-
- if (!segment_array.empty()) {
+ if (!children_array_.empty()) {
UniquePtr key_segment = MakeUnique(key_char);
- auto it = std::lower_bound(segment_array.begin(),
- segment_array.begin() + store_size_,
+ auto it = std::lower_bound(children_array_.begin(),
+ children_array_.begin() + store_size_,
key_segment,
[](const UniquePtr &a, const UniquePtr &b) { return a->node_char_ < b->node_char_; });
- if (it != segment_array.begin() + store_size_ && (*it)->node_char_ == key_char) {
+ if (it != children_array_.begin() + store_size_ && (*it)->node_char_ == key_char) {
ds = (*it).get();
}
- } else if (!segment_map.empty()) {
- ds = segment_map[key_char].get();
+ } else if (!children_map_.empty()) {
+ auto it = children_map_.find(key_char);
+ if (it != children_map_.end())
+ ds = it->second.get();
}
if (ds != nullptr) {
@@ -59,12 +61,14 @@ Hit *DictSegment::Match(const Vector &char_array, int begin, int length
void DictSegment::FillSegment(const Vector &char_array, int begin, int length, int enabled) {
wchar_t begin_char = char_array[begin];
- wchar_t key_char = char_map_[begin_char];
- if (key_char == L'\0') {
+ wchar_t key_char;
+ HashMap::iterator it = char_map_.find(begin_char);
+ if (it == char_map_.end()) {
char_map_[begin_char] = begin_char;
key_char = begin_char;
+ } else {
+ key_char = it->second;
}
-
DictSegment *ds = LookforSegment(key_char, enabled);
if (ds != nullptr) {
if (length > 1) {
@@ -79,13 +83,13 @@ DictSegment *DictSegment::LookforSegment(wchar_t key_char, int create) {
DictSegment *ds = nullptr;
if (store_size_ <= ARRAY_LENGTH_LIMIT) {
- Vector> &segment_array = GetChildrenArray();
+ Vector> &children_array_ = GetChildrenArray();
UniquePtr key_segment = MakeUnique(key_char);
- auto it = std::lower_bound(segment_array.begin(),
- segment_array.begin() + store_size_,
+ auto it = std::lower_bound(children_array_.begin(),
+ children_array_.begin() + store_size_,
key_segment,
[](const UniquePtr &a, const UniquePtr &b) { return a->node_char_ < b->node_char_; });
- if (it != segment_array.begin() + store_size_ && (*it)->node_char_ == key_char) {
+ if (it != children_array_.begin() + store_size_ && (*it)->node_char_ == key_char) {
ds = (*it).get();
}
@@ -93,26 +97,28 @@ DictSegment *DictSegment::LookforSegment(wchar_t key_char, int create) {
UniquePtr ds_ptr = MakeUnique(key_char);
ds = ds_ptr.get();
if (store_size_ < ARRAY_LENGTH_LIMIT) {
- segment_array[store_size_] = std::move(ds_ptr);
+ children_array_[store_size_] = std::move(ds_ptr);
store_size_++;
- std::sort(segment_array.begin(),
- segment_array.begin() + store_size_,
+ std::sort(children_array_.begin(),
+ children_array_.begin() + store_size_,
[](const UniquePtr &a, const UniquePtr &b) { return a->node_char_ < b->node_char_; });
} else {
- HashMap> &segment_map = GetChildrenMap();
- Migrate(segment_array, segment_map);
- segment_map[key_char] = std::move(ds_ptr);
+ for (auto &segment : children_array_) {
+ if (segment.get() != nullptr) {
+ children_map_[segment->node_char_] = std::move(segment);
+ }
+ }
+ children_map_[key_char] = std::move(ds_ptr);
store_size_++;
children_array_.clear();
}
}
} else {
- HashMap> &segment_map = GetChildrenMap();
- ds = segment_map[key_char].get();
+ ds = children_map_[key_char].get();
if (ds == nullptr && create == 1) {
UniquePtr ds_ptr = MakeUnique(key_char);
ds = ds_ptr.get();
- segment_map[key_char] = std::move(ds_ptr);
+ children_map_[key_char] = std::move(ds_ptr);
store_size_++;
}
}
@@ -120,12 +126,4 @@ DictSegment *DictSegment::LookforSegment(wchar_t key_char, int create) {
return ds;
}
-void DictSegment::Migrate(Vector> &segment_array, HashMap> &segment_map) {
- for (auto &segment : segment_array) {
- if (segment.get() != nullptr) {
- segment_map[segment->node_char_] = std::move(segment);
- }
- }
-}
-
} // namespace infinity
diff --git a/src/common/analyzer/ik/dict_segment.cppm b/src/common/analyzer/ik/dict_segment.cppm
index a9b46c12d4..76b4a39f47 100644
--- a/src/common/analyzer/ik/dict_segment.cppm
+++ b/src/common/analyzer/ik/dict_segment.cppm
@@ -54,10 +54,6 @@ private:
}
return children_map_;
}
-
- void Migrate(Vector> &segment_array, HashMap> &segment_map);
-
- int CompareTo(const DictSegment &o) const { return node_char_ - o.node_char_; }
};
} // namespace infinity
\ No newline at end of file
diff --git a/src/common/analyzer/ik/hit.cpp b/src/common/analyzer/ik/hit.cpp
index cbb6984ca4..50747ece83 100644
--- a/src/common/analyzer/ik/hit.cpp
+++ b/src/common/analyzer/ik/hit.cpp
@@ -1,9 +1,9 @@
module;
-import ik_dict_segment;
-
module hit;
+import ik_dict_segment;
+
namespace infinity {
void Hit::SetMatchedDictSegment(DictSegment *matched_dict_segment) { matched_dict_segment_ = matched_dict_segment; }
diff --git a/src/common/analyzer/ik/hit.cppm b/src/common/analyzer/ik/hit.cppm
index 4ca2a0713a..ed2c793d11 100644
--- a/src/common/analyzer/ik/hit.cppm
+++ b/src/common/analyzer/ik/hit.cppm
@@ -33,7 +33,7 @@ public:
void SetUnmatch() { hit_state_ = UNMATCH; }
- DictSegment *getMatchedDictSegment() const { return matched_dict_segment_; }
+ DictSegment *GetMatchedDictSegment() const { return matched_dict_segment_; }
void SetMatchedDictSegment(DictSegment *matched_dict_segment_);
diff --git a/src/common/analyzer/ik/ik_analyzer.cpp b/src/common/analyzer/ik/ik_analyzer.cpp
index 1e57c8d61b..21dd3ab8ac 100644
--- a/src/common/analyzer/ik/ik_analyzer.cpp
+++ b/src/common/analyzer/ik/ik_analyzer.cpp
@@ -2,6 +2,8 @@ module;
#include
+module ik_analyzer;
+
import stl;
import segmenter;
import cjk_segmenter;
@@ -13,14 +15,13 @@ import arbitrator;
import term;
import status;
import character_util;
-
-module ik_analyzer;
+import third_party;
namespace infinity {
IKAnalyzer::IKAnalyzer(const String &path) : dict_path_(path) {}
-IKAnalyzer::IKAnalyzer(const IKAnalyzer &other) : own_dict_(false), dict_(other.dict_) { Init(); }
+IKAnalyzer::IKAnalyzer(const IKAnalyzer &other) : own_dict_(false), ik_smart_(other.ik_smart_), dict_(other.dict_) { Init(); }
IKAnalyzer::~IKAnalyzer() {
if (own_dict_) {
@@ -29,11 +30,18 @@ IKAnalyzer::~IKAnalyzer() {
}
void IKAnalyzer::Init() {
- context_ = MakeUnique(dict_);
+ context_ = MakeUnique(dict_, ik_smart_);
LoadSegmenters();
arbitrator_ = MakeUnique();
}
+void IKAnalyzer::SetFineGrained(bool fine_grained) {
+ ik_smart_ = !fine_grained;
+ if (context_.get()) {
+ context_->ik_smart_ = ik_smart_;
+ }
+}
+
Status IKAnalyzer::Load() {
dict_ = new Dictionary(dict_path_);
Status load_status = dict_->Load();
@@ -52,13 +60,6 @@ void IKAnalyzer::LoadSegmenters() {
segmenters_.push_back(MakeUnique(dict_));
}
-Lexeme *IKAnalyzer::Next() {
- Lexeme *l = context_->GetNextLexeme();
- while (l == nullptr) {
- }
- return l;
-}
-
void IKAnalyzer::Reset() {
context_->Reset();
for (auto &segmenter : segmenters_) {
@@ -72,20 +73,18 @@ int IKAnalyzer::AnalyzeImpl(const Term &input, void *data, HookType func) {
unsigned level = 0;
unsigned offset = 0;
std::wstring line = CharacterUtil::UTF8ToUTF16(input.text_);
+ context_->Reset();
context_->FillBuffer(line);
context_->InitCursor();
do {
for (auto &segmenter : segmenters_) {
segmenter->Analyze(context_.get());
}
- if (context_->NeedRefillBuffer()) {
- break;
- }
} while (context_->MoveCursor());
for (auto &segmenter : segmenters_) {
segmenter->Reset();
}
- arbitrator_->Process(context_.get(), true);
+ arbitrator_->Process(context_.get(), ik_smart_);
context_->OutputToResult();
context_->MarkBufferOffset();
Lexeme *lexeme = nullptr;
diff --git a/src/common/analyzer/ik/ik_analyzer.cppm b/src/common/analyzer/ik/ik_analyzer.cppm
index aea48938cf..f944fecc38 100644
--- a/src/common/analyzer/ik/ik_analyzer.cppm
+++ b/src/common/analyzer/ik/ik_analyzer.cppm
@@ -26,6 +26,8 @@ public:
Status Load();
+ void SetFineGrained(bool fine_grained);
+
protected:
int AnalyzeImpl(const Term &input, void *data, HookType func) override;
@@ -34,8 +36,6 @@ private:
void LoadSegmenters();
- Lexeme *Next();
-
void Reset();
int GetLastUselessCharNum();
@@ -45,6 +45,8 @@ private:
bool own_dict_{};
+ bool ik_smart_{true};
+
Dictionary *dict_{nullptr};
UniquePtr context_;
diff --git a/src/common/analyzer/ik/letter_segmenter.cpp b/src/common/analyzer/ik/letter_segmenter.cpp
index b50620c035..2179c5c535 100644
--- a/src/common/analyzer/ik/letter_segmenter.cpp
+++ b/src/common/analyzer/ik/letter_segmenter.cpp
@@ -2,14 +2,14 @@ module;
#include
+module letter_segmenter;
+
import stl;
import segmenter;
import analyze_context;
import lexeme;
import character_util;
-module letter_segmenter;
-
namespace infinity {
const std::wstring LetterSegmenter::SEGMENTER_NAME = L"LETTER_SEGMENTER";
Vector LetterSegmenter::Letter_Connector = {L'#', L'&', L'+', L'-', L'.', L'@', L'_'};
@@ -27,16 +27,9 @@ LetterSegmenter::LetterSegmenter() {
}
void LetterSegmenter::Analyze(AnalyzeContext *context) {
- bool buffer_lock_flag = false;
- buffer_lock_flag = ProcessEnglishLetter(context) || buffer_lock_flag;
- buffer_lock_flag = ProcessArabicLetter(context) || buffer_lock_flag;
- buffer_lock_flag = ProcessMixLetter(context) || buffer_lock_flag;
-
- if (buffer_lock_flag) {
- context->LockBuffer(SEGMENTER_NAME);
- } else {
- context->UnlockBuffer(SEGMENTER_NAME);
- }
+ ProcessEnglishLetter(context);
+ ProcessArabicLetter(context);
+ ProcessMixLetter(context);
}
void LetterSegmenter::Reset() {
@@ -63,7 +56,8 @@ bool LetterSegmenter::ProcessMixLetter(AnalyzeContext *context) {
end_ = context->GetCursor();
} else {
Lexeme *new_lexeme = new Lexeme(context->GetBufferOffset(), start_, end_ - start_ + 1, Lexeme::TYPE_LETTER);
- context->AddLexeme(new_lexeme);
+ if (!context->AddLexeme(new_lexeme))
+ delete new_lexeme;
start_ = -1;
end_ = -1;
}
@@ -71,7 +65,8 @@ bool LetterSegmenter::ProcessMixLetter(AnalyzeContext *context) {
if (context->IsBufferConsumed() && (start_ != -1 && end_ != -1)) {
Lexeme *new_lexeme = new Lexeme(context->GetBufferOffset(), start_, end_ - start_ + 1, Lexeme::TYPE_LETTER);
- context->AddLexeme(new_lexeme);
+ if (!context->AddLexeme(new_lexeme))
+ delete new_lexeme;
start_ = -1;
end_ = -1;
}
@@ -97,7 +92,8 @@ bool LetterSegmenter::ProcessEnglishLetter(AnalyzeContext *context) {
english_end_ = context->GetCursor();
} else {
Lexeme *new_lexeme = new Lexeme(context->GetBufferOffset(), english_start_, english_end_ - english_start_ + 1, Lexeme::TYPE_ENGLISH);
- context->AddLexeme(new_lexeme);
+ if (!context->AddLexeme(new_lexeme))
+ delete new_lexeme;
english_start_ = -1;
english_end_ = -1;
}
@@ -105,7 +101,8 @@ bool LetterSegmenter::ProcessEnglishLetter(AnalyzeContext *context) {
if (context->IsBufferConsumed() && (english_start_ != -1 && english_end_ != -1)) {
Lexeme *new_lexeme = new Lexeme(context->GetBufferOffset(), english_start_, english_end_ - english_start_ + 1, Lexeme::TYPE_ENGLISH);
- context->AddLexeme(new_lexeme);
+ if (!context->AddLexeme(new_lexeme))
+ delete new_lexeme;
english_start_ = -1;
english_end_ = -1;
}
@@ -132,7 +129,8 @@ bool LetterSegmenter::ProcessArabicLetter(AnalyzeContext *context) {
} else if (CharacterUtil::CHAR_USELESS == context->GetCurrentCharType() && IsNumConnector(context->GetCurrentChar())) {
} else {
Lexeme *new_lexeme = new Lexeme(context->GetBufferOffset(), arabic_start_, arabic_end_ - arabic_start_ + 1, Lexeme::TYPE_ARABIC);
- context->AddLexeme(new_lexeme);
+ if (!context->AddLexeme(new_lexeme))
+ delete new_lexeme;
arabic_start_ = -1;
arabic_end_ = -1;
}
@@ -140,7 +138,8 @@ bool LetterSegmenter::ProcessArabicLetter(AnalyzeContext *context) {
if (context->IsBufferConsumed() && (arabic_start_ != -1 && arabic_end_ != -1)) {
Lexeme *new_lexeme = new Lexeme(context->GetBufferOffset(), arabic_start_, arabic_end_ - arabic_start_ + 1, Lexeme::TYPE_ARABIC);
- context->AddLexeme(new_lexeme);
+ if (!context->AddLexeme(new_lexeme))
+ delete new_lexeme;
arabic_start_ = -1;
arabic_end_ = -1;
}
diff --git a/src/common/analyzer/ik/lexeme.cpp b/src/common/analyzer/ik/lexeme.cpp
index e13386dd4f..5c98194492 100644
--- a/src/common/analyzer/ik/lexeme.cpp
+++ b/src/common/analyzer/ik/lexeme.cpp
@@ -15,6 +15,11 @@ Lexeme::Lexeme(int offset, int begin, int length, int lexeme_type) {
lexeme_type_ = lexeme_type;
}
+Lexeme *Lexeme::Copy() {
+ Lexeme *copy = new Lexeme(offset_, begin_, length_, lexeme_type_);
+ return copy;
+}
+
bool Lexeme::Append(const Lexeme &l, int lexeme_type) {
if (!l.lexeme_text_.empty() && GetEndPosition() == l.GetBeginPosition()) {
length_ += l.length_;
diff --git a/src/common/analyzer/ik/lexeme.cppm b/src/common/analyzer/ik/lexeme.cppm
index 7187e9e536..60549b631f 100644
--- a/src/common/analyzer/ik/lexeme.cppm
+++ b/src/common/analyzer/ik/lexeme.cppm
@@ -32,6 +32,8 @@ public:
Lexeme(int offset, int begin, int length, int lexeme_type);
+ Lexeme *Copy();
+
bool Equals(const Lexeme &other) const { return offset_ == other.offset_ && begin_ == other.begin_ && length_ == other.length_; }
int Hash() const {
@@ -80,7 +82,7 @@ public:
std::wstring GetLexemeText() const { return lexeme_text_; }
void SetLexemeText(const std::wstring &lexeme_text) {
- if (lexeme_text_.empty()) {
+ if (lexeme_text.empty()) {
lexeme_text_ = L"";
length_ = 0;
} else {
diff --git a/src/common/analyzer/ik/lexeme_path.cpp b/src/common/analyzer/ik/lexeme_path.cpp
index ad1237907d..d6ac709f77 100644
--- a/src/common/analyzer/ik/lexeme_path.cpp
+++ b/src/common/analyzer/ik/lexeme_path.cpp
@@ -2,7 +2,6 @@ module;
#include
#include
-#include
module lexeme_path;
@@ -104,7 +103,9 @@ LexemePath *LexemePath::Copy() const {
the_copy->payload_length_ = payload_length_;
Cell *c = GetHead();
while (c != nullptr && c->GetLexeme() != nullptr) {
- the_copy->AddLexeme(c->GetLexeme());
+ Lexeme *lexeme = c->GetLexeme()->Copy();
+ if (!(the_copy->AddLexeme(lexeme)))
+ delete lexeme;
c = c->GetNext();
}
return the_copy;
diff --git a/src/common/analyzer/ik/quick_sort_set.cpp b/src/common/analyzer/ik/quick_sort_set.cpp
index 05a9c4ef4d..47c1cfb1aa 100644
--- a/src/common/analyzer/ik/quick_sort_set.cpp
+++ b/src/common/analyzer/ik/quick_sort_set.cpp
@@ -5,58 +5,59 @@ import lexeme;
module quick_sort_set;
namespace infinity {
+
QuickSortSet::QuickSortSet() {}
QuickSortSet::~QuickSortSet() {
while (size_ > 0) {
Lexeme *tail = PollLast();
delete tail;
- size_--;
}
}
bool QuickSortSet::AddLexeme(Lexeme *lexeme) {
- Cell *new_cell = new Cell(lexeme);
+ UniquePtr new_cell = MakeUnique(lexeme);
if (size_ == 0) {
- head_ = new_cell;
- tail_ = new_cell;
+ Cell *cell_ptr = new_cell.release();
+ head_ = cell_ptr;
+ tail_ = cell_ptr;
size_++;
return true;
} else {
- if (tail_->CompareTo(new_cell) == 0) {
- delete new_cell;
+ if (tail_->CompareTo(new_cell.get()) == 0) {
return false;
- } else if (tail_->CompareTo(new_cell) < 0) {
- tail_->next_ = new_cell;
- new_cell->prev_ = tail_;
- tail_ = new_cell;
+ } else if (tail_->CompareTo(new_cell.get()) < 0) {
+ Cell *cell_ptr = new_cell.release();
+ tail_->next_ = cell_ptr;
+ cell_ptr->prev_ = tail_;
+ tail_ = cell_ptr;
size_++;
return true;
- } else if (head_->CompareTo(new_cell) > 0) {
- head_->prev_ = new_cell;
- new_cell->next_ = head_;
- head_ = new_cell;
+ } else if (head_->CompareTo(new_cell.get()) > 0) {
+ Cell *cell_ptr = new_cell.release();
+ head_->prev_ = cell_ptr;
+ cell_ptr->next_ = head_;
+ head_ = cell_ptr;
size_++;
return true;
} else {
Cell *index = tail_;
- while (index != nullptr && index->CompareTo(new_cell) > 0) {
+ while (index != nullptr && index->CompareTo(new_cell.get()) > 0) {
index = index->prev_;
}
- if (index->CompareTo(new_cell) == 0) {
- delete new_cell;
+ if (index->CompareTo(new_cell.get()) == 0) {
return false;
- } else if (index->CompareTo(new_cell) < 0) {
- new_cell->prev_ = index;
- new_cell->next_ = index->next_;
- index->next_->prev_ = new_cell;
- index->next_ = new_cell;
+ } else if (index->CompareTo(new_cell.get()) < 0) {
+ Cell *cell_ptr = new_cell.release();
+ cell_ptr->prev_ = index;
+ cell_ptr->next_ = index->next_;
+ index->next_->prev_ = cell_ptr;
+ index->next_ = cell_ptr;
size_++;
return true;
}
}
}
- delete new_cell;
return false;
}
} // namespace infinity
diff --git a/src/common/analyzer/keyword_analyzer.cpp b/src/common/analyzer/whitespace_analyzer.cpp
similarity index 89%
rename from src/common/analyzer/keyword_analyzer.cpp
rename to src/common/analyzer/whitespace_analyzer.cpp
index 9e539c44cc..fc76172a30 100644
--- a/src/common/analyzer/keyword_analyzer.cpp
+++ b/src/common/analyzer/whitespace_analyzer.cpp
@@ -16,15 +16,14 @@ module;
#include
#include
-module keyword_analyzer;
-
+module whitespace_analyzer;
import stl;
import term;
import analyzer;
namespace infinity {
-int KeywordAnalyzer::AnalyzeImpl(const Term &input, void *data, HookType func) {
+int WhitespaceAnalyzer::AnalyzeImpl(const Term &input, void *data, HookType func) {
std::istringstream is(input.text_);
std::string t;
u32 offset = 0;
diff --git a/src/common/analyzer/keyword_analyzer.cppm b/src/common/analyzer/whitespace_analyzer.cppm
similarity index 76%
rename from src/common/analyzer/keyword_analyzer.cppm
rename to src/common/analyzer/whitespace_analyzer.cppm
index bddf389714..d262b07c7f 100644
--- a/src/common/analyzer/keyword_analyzer.cppm
+++ b/src/common/analyzer/whitespace_analyzer.cppm
@@ -1,4 +1,4 @@
-// Copyright(C) 2023 InfiniFlow, Inc. All rights reserved.
+// Copyright(C) 2024 InfiniFlow, Inc. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
@@ -14,16 +14,17 @@
module;
-export module keyword_analyzer;
+export module whitespace_analyzer;
import stl;
import term;
import analyzer;
namespace infinity {
-export class KeywordAnalyzer : public Analyzer {
+
+export class WhitespaceAnalyzer : public Analyzer {
public:
- KeywordAnalyzer() = default;
- ~KeywordAnalyzer() override = default;
+ WhitespaceAnalyzer() = default;
+ ~WhitespaceAnalyzer() override = default;
protected:
int AnalyzeImpl(const Term &input, void *data, HookType func) override;
diff --git a/src/common/default_values.cppm b/src/common/default_values.cppm
index bbe4b4ac0d..d962ab620f 100644
--- a/src/common/default_values.cppm
+++ b/src/common/default_values.cppm
@@ -278,6 +278,9 @@ export {
constexpr std::string_view MEMINDEX_MEMORY_QUOTA_OPTION_NAME = "memindex_memory_quota";
constexpr std::string_view RESULT_CACHE_OPTION_NAME = "result_cache";
constexpr std::string_view CACHE_RESULT_CAPACITY_OPTION_NAME = "cache_result_capacity";
+ constexpr std::string_view DENSE_INDEX_BUILDING_WORKER_OPTION_NAME = "dense_index_building_worker";
+ constexpr std::string_view SPARSE_INDEX_BUILDING_WORKER_OPTION_NAME = "sparse_index_building_worker";
+ constexpr std::string_view FULLTEXT_INDEX_BUILDING_WORKER_OPTION_NAME = "fulltext_index_building_worker";
constexpr std::string_view WAL_DIR_OPTION_NAME = "wal_dir";
constexpr std::string_view WAL_COMPACT_THRESHOLD_OPTION_NAME = "wal_compact_threshold";
@@ -314,8 +317,10 @@ export {
constexpr std::string_view SYSTEM_MEMORY_USAGE_VAR_NAME = "system_memory_usage"; // global
constexpr std::string_view OPEN_FILE_COUNT_VAR_NAME = "open_file_count"; // global
constexpr std::string_view CPU_USAGE_VAR_NAME = "cpu_usage"; // global
- constexpr std::string_view FOLLOWER_NUMBER = "follower_number"; // global
+ constexpr std::string_view FOLLOWER_NUMBER_VAR_NAME = "follower_number"; // global
constexpr std::string_view CACHE_RESULT_NUM_VAR_NAME = "cache_result_num"; // global
+ constexpr std::string_view MEMORY_CACHE_MISS_VAR_NAME = "memory_cache_miss"; // global
+ constexpr std::string_view DISK_CACHE_MISS_VAR_NAME = "disk_cache_miss"; // global
// IO related
constexpr SizeT DEFAULT_READ_BUFFER_SIZE = 4096;
diff --git a/src/common/status.cpp b/src/common/status.cpp
index 158d1e7735..e5bfb74d7a 100644
--- a/src/common/status.cpp
+++ b/src/common/status.cpp
@@ -448,6 +448,10 @@ Status Status::FileIsOpen(const String &filename) { return Status(ErrorCode::kFi
Status Status::Unknown(const String &name) { return Status(ErrorCode::kUnknown, MakeUnique(fmt::format("Unknown {}", name))); }
+Status Status::InvalidQueryOption(const String &detail) {
+ return Status(ErrorCode::kUnknown, MakeUnique(fmt::format("Invalid query option: {}", detail)));
+}
+
// 4. TXN fail
Status Status::TxnRollback(u64 txn_id, const String &rollback_reason) {
return Status(ErrorCode::kTxnRollback, MakeUnique(fmt::format("Transaction: {} is rollback. {}", txn_id, rollback_reason)));
@@ -599,6 +603,12 @@ Status Status::NotRegistered(const String &node_info) {
Status Status::CantSwitchRole(const String &detailed_info) { return Status(ErrorCode::kCantSwitchRole, MakeUnique(detailed_info)); }
+Status Status::TooManyFollower(infinity::u8 follower_limit) {
+ return Status(ErrorCode::kTooManyFollower, MakeUnique(fmt::format("Too many followers, limit: {}", follower_limit)));
+}
+
+Status Status::TooManyLearner() { return Status(ErrorCode::kTooManyLearner, MakeUnique("Too many learner, limit: 255")); }
+
// meta
Status Status::InvalidEntry() { return Status(ErrorCode::kInvalidEntry, MakeUnique("Invalid entry")); }
diff --git a/src/common/status.cppm b/src/common/status.cppm
index 19bb71b379..b953e7d013 100644
--- a/src/common/status.cppm
+++ b/src/common/status.cppm
@@ -138,6 +138,7 @@ export enum class ErrorCode : long {
kErrorInit = 3089,
kFileIsOpen = 3090,
kUnknown = 3091,
+ kInvalidQueryOption = 3092,
// 4. Txn fail
kTxnRollback = 4001,
@@ -183,6 +184,8 @@ export enum class ErrorCode : long {
kInvalidStorageType = 7024,
kNotRegistered = 7025,
kCantSwitchRole = 7026,
+ kTooManyFollower = 7027,
+ kTooManyLearner = 7028,
// 8. meta error
kInvalidEntry = 8001,
@@ -311,6 +314,7 @@ public:
static Status ErrorInit(const String &detailed_info);
static Status FileIsOpen(const String &filename);
static Status Unknown(const String &name);
+ static Status InvalidQueryOption(const String& detail);
// 4. TXN fail
static Status TxnRollback(u64 txn_id, const String &rollback_reason = "no reanson gived");
@@ -356,6 +360,8 @@ public:
static Status InvalidStorageType(const String &expected, const String &actual);
static Status NotRegistered(const String &node_info);
static Status CantSwitchRole(const String &detailed_info);
+ static Status TooManyFollower(u8 follower_limit);
+ static Status TooManyLearner();
// meta
static Status InvalidEntry();
diff --git a/src/embedded_infinity/wrap_infinity.cpp b/src/embedded_infinity/wrap_infinity.cpp
index 93e1d4afca..a782925a16 100644
--- a/src/embedded_infinity/wrap_infinity.cpp
+++ b/src/embedded_infinity/wrap_infinity.cpp
@@ -1358,6 +1358,13 @@ void ProcessDataBlocks(QueryResult &query_result, WrapQueryResult &wrap_query_re
auto data_block = query_result.result_table_->GetDataBlockById(block_idx);
ProcessColumns(data_block, query_result.result_table_->ColumnCount(), columns);
}
+
+ if(query_result.result_table_->total_hits_count_flag_) {
+ nlohmann::json json_response;
+ json_response["total_hits_count"] = query_result.result_table_->total_hits_count_;
+ wrap_query_result.extra_result = json_response.dump();
+ }
+
HandleColumnDef(wrap_query_result, query_result.result_table_->ColumnCount(), query_result.result_table_->definition_ptr_, columns);
}
@@ -1368,6 +1375,7 @@ WrapQueryResult WrapSearch(Infinity &instance,
Vector highlight_list,
Vector order_by_list,
Vector group_by_list,
+ bool total_hits_count_flag,
WrapSearchExpr *wrap_search_expr,
WrapParsedExpr *filter_expr,
WrapParsedExpr *limit_expr,
@@ -1531,8 +1539,17 @@ WrapQueryResult WrapSearch(Infinity &instance,
}
}
- auto query_result =
- instance.Search(db_name, table_name, search_expr, filter, limit, offset, output_columns, highlight, order_by_exprs, group_by_exprs);
+ auto query_result = instance.Search(db_name,
+ table_name,
+ search_expr,
+ filter,
+ limit,
+ offset,
+ output_columns,
+ highlight,
+ order_by_exprs,
+ group_by_exprs,
+ total_hits_count_flag);
search_expr = nullptr;
filter = nullptr;
limit = nullptr;
diff --git a/src/embedded_infinity/wrap_infinity.cppm b/src/embedded_infinity/wrap_infinity.cppm
index 93e1147746..0e1181ef76 100644
--- a/src/embedded_infinity/wrap_infinity.cppm
+++ b/src/embedded_infinity/wrap_infinity.cppm
@@ -125,6 +125,7 @@ export struct WrapQueryResult {
Vector names;
Vector column_defs;
Vector column_fields;
+ String extra_result;
// show database
String database_name;
String store_dir;
@@ -432,6 +433,7 @@ export WrapQueryResult WrapSearch(Infinity &instance,
Vector highlight_list,
Vector order_by_list,
Vector group_by_list,
+ bool total_hits_count_flag,
WrapSearchExpr *wrap_search_expr = nullptr,
WrapParsedExpr *where_expr = nullptr,
WrapParsedExpr *limit_expr = nullptr,
diff --git a/src/embedded_infinity_ext.cpp b/src/embedded_infinity_ext.cpp
index cb92ed61b2..6124856de2 100644
--- a/src/embedded_infinity_ext.cpp
+++ b/src/embedded_infinity_ext.cpp
@@ -48,6 +48,7 @@ NB_MODULE(embedded_infinity_ext, m) {
.def_rw("names", &WrapQueryResult::names)
.def_rw("column_defs", &WrapQueryResult::column_defs)
.def_rw("column_fields", &WrapQueryResult::column_fields)
+ .def_rw("extra_result", &WrapQueryResult::extra_result)
.def_rw("database_name", &WrapQueryResult::database_name)
.def_rw("store_dir", &WrapQueryResult::store_dir)
.def_rw("table_count", &WrapQueryResult::table_count)
@@ -325,6 +326,7 @@ NB_MODULE(embedded_infinity_ext, m) {
nb::arg("highlight_list"),
nb::arg("order_by_list"),
nb::arg("group_by_list"),
+ nb::arg("total_hits_count_flag"),
nb::arg("wrap_search_expr") = nullptr,
nb::arg("where_expr") = nullptr,
nb::arg("limit_expr") = nullptr,
diff --git a/src/executor/operator/physical_compact_finish.cpp b/src/executor/operator/physical_compact_finish.cpp
index 852dbae8fa..341352b57e 100644
--- a/src/executor/operator/physical_compact_finish.cpp
+++ b/src/executor/operator/physical_compact_finish.cpp
@@ -33,6 +33,8 @@ import internal_types;
import infinity_context;
import infinity_exception;
import status;
+import txn_store;
+import segment_index_entry;
namespace infinity {
@@ -79,6 +81,29 @@ void PhysicalCompactFinish::SaveSegmentData(QueryContext *query_context, const C
}
LOG_DEBUG(ss.str());
+ for (const auto &compact_segment_data : compact_state_data->segment_data_list_) {
+ TxnStore *txn_store = txn->txn_store();
+ TxnTableStore *txn_table_store = txn_store->GetTxnTableStore(table_entry);
+ auto index_map = table_entry->IndexMetaMap();
+ for (const auto &[index_name, index_meta] : *index_map) {
+ auto [table_index_entry, status] = index_meta->GetEntryNolock(txn->TxnID(), txn->BeginTS());
+ if (!status.ok()) {
+ continue;
+ }
+ Vector segment_index_entries;
+ auto &segment_index_map = table_index_entry->index_by_segment();
+ for (const auto *old_segment : compact_segment_data.old_segments_) {
+ auto iter = segment_index_map.find(old_segment->segment_id());
+ if (iter == segment_index_map.end()) {
+ continue;
+ }
+ auto *segment_index_entry = iter->second.get();
+ segment_index_entries.push_back(segment_index_entry);
+ }
+ txn_table_store->AddSegmentIndexesStore(table_index_entry, std::move(segment_index_entries));
+ }
+ }
+
txn->Compact(table_entry, std::move(segment_data), compact_type_);
}
diff --git a/src/executor/operator/physical_import.cpp b/src/executor/operator/physical_import.cpp
index 1912b5ea76..62af0ee72b 100644
--- a/src/executor/operator/physical_import.cpp
+++ b/src/executor/operator/physical_import.cpp
@@ -1280,6 +1280,34 @@ void PhysicalImport::JSONLRowHandler(const nlohmann::json &line_json, Vector schema;
+ arrow::Status status = arrow_reader->GetSchema(&schema);
+ if (!status.ok()) {
+ return Status::ImportFileFormatError(status.ToString());
+ }
+ const arrow::FieldVector &fields = schema->fields();
+ const Vector> &column_defs = table_entry->column_defs();
+ if (fields.size() != column_defs.size()) {
+ return Status::ColumnCountMismatch(fmt::format("Column count mismatch: {} != {}", fields.size(), column_defs.size()));
+ }
+ for (SizeT i = 0; i < fields.size(); ++i) {
+ const auto &field = fields[i];
+ const auto &column_def = column_defs[i];
+
+ if (*column_def->type() != *field->type()) {
+ return Status::ImportFileFormatError(
+ fmt::format("Column {} mismatch, {} != {}", i, column_def->type()->ToString(), field->type()->ToString()));
+ }
+ }
+
+ return Status::OK();
+}
+
+} // namespace
+
void PhysicalImport::ImportPARQUET(QueryContext *query_context, ImportOperatorState *import_op_state) {
arrow::MemoryPool *pool = arrow::DefaultMemoryPool();
@@ -1304,6 +1332,10 @@ void PhysicalImport::ImportPARQUET(QueryContext *query_context, ImportOperatorSt
}
std::unique_ptr arrow_reader = build_result.MoveValueUnsafe();
+ if (Status status = CheckParquetColumns(table_entry_, arrow_reader.get()); !status.ok()) {
+ RecoverableError(status);
+ }
+
std::shared_ptr rb_reader;
if (auto status = arrow_reader->GetRecordBatchReader(&rb_reader); !status.ok()) {
RecoverableError(Status::ImportFileFormatError(status.ToString()));
@@ -1330,10 +1362,6 @@ void PhysicalImport::ImportPARQUET(QueryContext *query_context, ImportOperatorSt
auto batch = maybe_batch.MoveValueUnsafe();
const auto batch_row_count = batch->num_rows();
const auto batch_col_count = batch->num_columns();
- if (static_cast(batch_col_count) != table_entry_->ColumnCount()) {
- RecoverableError(
- Status::ColumnCountMismatch(fmt::format("Column count mismatch: {} != {}", batch_col_count, table_entry_->ColumnCount())));
- }
for (i64 batch_row_id = 0; batch_row_id < batch_row_count; ++batch_row_id) {
for (int column_idx = 0; column_idx < batch_col_count; ++column_idx) {
SharedPtr column = batch->column(column_idx);
@@ -1408,14 +1436,8 @@ void ParquetSparseValueHandler(const SparseInfo *sparse_info,
ColumnVector &column_vector,
i64 start_offset,
i64 end_offset) {
- if (sparse_info->DataType() != EmbeddingDataType::kElemBit && data_array.get() == nullptr) {
- RecoverableError(Status::ImportFileFormatError("Invalid parquet file format."));
- }
switch (sparse_info->DataType()) {
case EmbeddingDataType::kElemBit: {
- if (data_array.get() != nullptr) {
- RecoverableError(Status::ImportFileFormatError("Invalid parquet file format."));
- }
ParquetSparseValueHandler(sparse_info,
index_array,
nullptr,
@@ -1425,13 +1447,7 @@ void ParquetSparseValueHandler(const SparseInfo *sparse_info,
break;
}
case EmbeddingDataType::kElemUInt8: {
- if (data_array.get() == nullptr) {
- RecoverableError(Status::ImportFileFormatError("Invalid parquet file format."));
- }
- auto uint8_value_array = std::dynamic_pointer_cast(data_array->values());
- if (uint8_value_array.get() == nullptr) {
- RecoverableError(Status::ImportFileFormatError("Invalid parquet file format."));
- }
+ auto uint8_value_array = std::static_pointer_cast(data_array->values());
ParquetSparseValueHandler(sparse_info,
index_array,
uint8_value_array,
@@ -1441,13 +1457,7 @@ void ParquetSparseValueHandler(const SparseInfo *sparse_info,
break;
}
case EmbeddingDataType::kElemInt8: {
- if (data_array.get() == nullptr) {
- RecoverableError(Status::ImportFileFormatError("Invalid parquet file format."));
- }
- auto int8_value_array = std::dynamic_pointer_cast(data_array->values());
- if (int8_value_array.get() == nullptr) {
- RecoverableError(Status::ImportFileFormatError("Invalid parquet file format."));
- }
+ auto int8_value_array = std::static_pointer_cast(data_array->values());
ParquetSparseValueHandler(sparse_info,
index_array,
int8_value_array,
@@ -1457,13 +1467,7 @@ void ParquetSparseValueHandler(const SparseInfo *sparse_info,
break;
}
case EmbeddingDataType::kElemInt16: {
- if (data_array.get() == nullptr) {
- RecoverableError(Status::ImportFileFormatError("Invalid parquet file format."));
- }
- auto int16_value_array = std::dynamic_pointer_cast(data_array->values());
- if (int16_value_array.get() == nullptr) {
- RecoverableError(Status::ImportFileFormatError("Invalid parquet file format."));
- }
+ auto int16_value_array = std::static_pointer_cast(data_array->values());
ParquetSparseValueHandler(sparse_info,
index_array,
int16_value_array,
@@ -1473,13 +1477,7 @@ void ParquetSparseValueHandler(const SparseInfo *sparse_info,
break;
}
case EmbeddingDataType::kElemInt32: {
- if (data_array.get() == nullptr) {
- RecoverableError(Status::ImportFileFormatError("Invalid parquet file format."));
- }
- auto int32_value_array = std::dynamic_pointer_cast(data_array->values());
- if (int32_value_array.get() == nullptr) {
- RecoverableError(Status::ImportFileFormatError("Invalid parquet file format."));
- }
+ auto int32_value_array = std::static_pointer_cast(data_array->values());
ParquetSparseValueHandler(sparse_info,
index_array,
int32_value_array,
@@ -1489,13 +1487,7 @@ void ParquetSparseValueHandler(const SparseInfo *sparse_info,
break;
}
case EmbeddingDataType::kElemInt64: {
- if (data_array.get() == nullptr) {
- RecoverableError(Status::ImportFileFormatError("Invalid parquet file format."));
- }
- auto int64_value_array = std::dynamic_pointer_cast(data_array->values());
- if (int64_value_array.get() == nullptr) {
- RecoverableError(Status::ImportFileFormatError("Invalid parquet file format."));
- }
+ auto int64_value_array = std::static_pointer_cast(data_array->values());
ParquetSparseValueHandler(sparse_info,
index_array,
int64_value_array,
@@ -1505,13 +1497,7 @@ void ParquetSparseValueHandler(const SparseInfo *sparse_info,
break;
}
case EmbeddingDataType::kElemFloat16: {
- if (data_array.get() == nullptr) {
- RecoverableError(Status::ImportFileFormatError("Invalid parquet file format."));
- }
- auto float16_value_array = std::dynamic_pointer_cast(data_array->values());
- if (float16_value_array.get() == nullptr) {
- RecoverableError(Status::ImportFileFormatError("Invalid parquet file format."));
- }
+ auto float16_value_array = std::static_pointer_cast(data_array->values());
ParquetSparseValueHandler(sparse_info,
index_array,
float16_value_array,
@@ -1521,13 +1507,7 @@ void ParquetSparseValueHandler(const SparseInfo *sparse_info,
break;
}
case EmbeddingDataType::kElemBFloat16: {
- if (data_array.get() == nullptr) {
- RecoverableError(Status::ImportFileFormatError("Invalid parquet file format."));
- }
- auto float_value_array = std::dynamic_pointer_cast(data_array->values());
- if (float_value_array.get() == nullptr) {
- RecoverableError(Status::ImportFileFormatError("Invalid parquet file format."));
- }
+ auto float_value_array = std::static_pointer_cast(data_array->values());
ParquetSparseValueHandler(sparse_info,
index_array,
float_value_array,
@@ -1537,13 +1517,7 @@ void ParquetSparseValueHandler(const SparseInfo *sparse_info,
break;
}
case EmbeddingDataType::kElemFloat: {
- if (data_array.get() == nullptr) {
- RecoverableError(Status::ImportFileFormatError("Invalid parquet file format."));
- }
- auto float_value_array = std::dynamic_pointer_cast(data_array->values());
- if (float_value_array.get() == nullptr) {
- RecoverableError(Status::ImportFileFormatError("Invalid parquet file format."));
- }
+ auto float_value_array = std::static_pointer_cast(data_array->values());
ParquetSparseValueHandler(sparse_info,
index_array,
float_value_array,
@@ -1553,13 +1527,7 @@ void ParquetSparseValueHandler(const SparseInfo *sparse_info,
break;
}
case EmbeddingDataType::kElemDouble: {
- if (data_array.get() == nullptr) {
- RecoverableError(Status::ImportFileFormatError("Invalid parquet file format."));
- }
- auto double_value_array = std::dynamic_pointer_cast(data_array->values());
- if (double_value_array.get() == nullptr) {
- RecoverableError(Status::ImportFileFormatError("Invalid parquet file format."));
- }
+ auto double_value_array = std::static_pointer_cast(data_array->values());
ParquetSparseValueHandler(sparse_info,
index_array,
double_value_array,
@@ -1595,10 +1563,7 @@ Pair, SizeT> ParquetEmbeddingHandler(SharedPtr list_
}
const SizeT byte_size = dim / 8;
auto embedding = MakeUnique(byte_size);
- auto bool_array = std::dynamic_pointer_cast(list_array->values());
- if (bool_array.get() == nullptr) {
- RecoverableError(Status::ImportFileFormatError("Invalid parquet file format."));
- }
+ auto bool_array = std::static_pointer_cast(list_array->values());
auto *raw_u8_ptr = reinterpret_cast(embedding.get());
for (i64 j = start_offset; j < end_offset; ++j) {
if (bool_array->Value(j)) {
@@ -1610,10 +1575,7 @@ Pair, SizeT> ParquetEmbeddingHandler(SharedPtr list_
}
case EmbeddingDataType::kElemUInt8: {
auto embedding = MakeUnique(dim * sizeof(u8));
- auto uint8_array = std::dynamic_pointer_cast(list_array->values());
- if (uint8_array.get() == nullptr) {
- RecoverableError(Status::ImportFileFormatError("Invalid parquet file format."));
- }
+ auto uint8_array = std::static_pointer_cast(list_array->values());
for (i64 j = start_offset; j < end_offset; ++j) {
const u8 value = uint8_array->Value(j);
reinterpret_cast(embedding.get())[j - start_offset] = value;
@@ -1622,10 +1584,7 @@ Pair, SizeT> ParquetEmbeddingHandler(SharedPtr list_
}
case EmbeddingDataType::kElemInt8: {
auto embedding = MakeUnique(dim * sizeof(i8));
- auto int8_array = std::dynamic_pointer_cast(list_array->values());
- if (int8_array.get() == nullptr) {
- RecoverableError(Status::ImportFileFormatError("Invalid parquet file format."));
- }
+ auto int8_array = std::static_pointer_cast(list_array->values());
for (i64 j = start_offset; j < end_offset; ++j) {
i8 value = int8_array->Value(j);
reinterpret_cast(embedding.get())[j - start_offset] = value;
@@ -1634,10 +1593,7 @@ Pair, SizeT> ParquetEmbeddingHandler(SharedPtr list_
}
case EmbeddingDataType::kElemInt16: {
auto embedding = MakeUnique(dim * sizeof(i16));
- auto int16_array = std::dynamic_pointer_cast(list_array->values());
- if (int16_array.get() == nullptr) {
- RecoverableError(Status::ImportFileFormatError("Invalid parquet file format."));
- }
+ auto int16_array = std::static_pointer_cast(list_array->values());
for (i64 j = start_offset; j < end_offset; ++j) {
i16 value = int16_array->Value(j);
reinterpret_cast(embedding.get())[j - start_offset] = value;
@@ -1646,10 +1602,7 @@ Pair, SizeT> ParquetEmbeddingHandler(SharedPtr list_
}
case EmbeddingDataType::kElemInt32: {
auto embedding = MakeUnique(dim * sizeof(i32));
- auto int32_array = std::dynamic_pointer_cast(list_array->values());
- if (int32_array.get() == nullptr) {
- RecoverableError(Status::ImportFileFormatError("Invalid parquet file format."));
- }
+ auto int32_array = std::static_pointer_cast(list_array->values());
for (i64 j = start_offset; j < end_offset; ++j) {
i32 value = int32_array->Value(j);
reinterpret_cast(embedding.get())[j - start_offset] = value;
@@ -1658,10 +1611,7 @@ Pair, SizeT> ParquetEmbeddingHandler(SharedPtr list_
}
case EmbeddingDataType::kElemInt64: {
auto embedding = MakeUnique(dim * sizeof(i64));
- auto int64_array = std::dynamic_pointer_cast(list_array->values());
- if (int64_array.get() == nullptr) {
- RecoverableError(Status::ImportFileFormatError("Invalid parquet file format."));
- }
+ auto int64_array = std::static_pointer_cast(list_array->values());
for (i64 j = start_offset; j < end_offset; ++j) {
i64 value = int64_array->Value(j);
reinterpret_cast(embedding.get())[j - start_offset] = value;
@@ -1670,10 +1620,7 @@ Pair, SizeT> ParquetEmbeddingHandler(SharedPtr list_
}
case EmbeddingDataType::kElemFloat16: {
auto embedding = MakeUnique(dim * sizeof(Float16T));
- auto float16_array = std::dynamic_pointer_cast(list_array->values());
- if (float16_array.get() == nullptr) {
- RecoverableError(Status::ImportFileFormatError("Invalid parquet file format."));
- }
+ auto float16_array = std::static_pointer_cast(list_array->values());
for (i64 j = start_offset; j < end_offset; ++j) {
const u16 value = float16_array->Value(j);
reinterpret_cast(embedding.get())[j - start_offset] = value;
@@ -1682,10 +1629,7 @@ Pair, SizeT> ParquetEmbeddingHandler(SharedPtr list_
}
case EmbeddingDataType::kElemBFloat16: {
auto embedding = MakeUnique(dim * sizeof(BFloat16T));
- auto float_array = std::dynamic_pointer_cast(list_array->values());
- if (float_array.get() == nullptr) {
- RecoverableError(Status::ImportFileFormatError("Invalid parquet file format."));
- }
+ auto float_array = std::static_pointer_cast(list_array->values());
for (i64 j = start_offset; j < end_offset; ++j) {
const float value = float_array->Value(j);
reinterpret_cast(embedding.get())[j - start_offset] = value;
@@ -1694,10 +1638,7 @@ Pair, SizeT> ParquetEmbeddingHandler(SharedPtr list_
}
case EmbeddingDataType::kElemFloat: {
auto embedding = MakeUnique(dim * sizeof(float));
- auto float_array = std::dynamic_pointer_cast(list_array->values());
- if (float_array.get() == nullptr) {
- RecoverableError(Status::ImportFileFormatError("Invalid parquet file format."));
- }
+ auto float_array = std::static_pointer_cast(list_array->values());
for (i64 j = start_offset; j < end_offset; ++j) {
float value = float_array->Value(j);
reinterpret_cast(embedding.get())[j - start_offset] = value;
@@ -1706,10 +1647,7 @@ Pair, SizeT> ParquetEmbeddingHandler(SharedPtr list_
}
case EmbeddingDataType::kElemDouble: {
auto embedding = MakeUnique(dim * sizeof(double));
- auto double_array = std::dynamic_pointer_cast(list_array->values());
- if (double_array.get() == nullptr) {
- RecoverableError(Status::ImportFileFormatError("Invalid parquet file format."));
- }
+ auto double_array = std::static_pointer_cast(list_array->values());
for (i64 j = start_offset; j < end_offset; ++j) {
double value = double_array->Value(j);
reinterpret_cast(embedding.get())[j - start_offset] = value;
@@ -1736,10 +1674,7 @@ ParquetTensorHandler(SharedPtr list_array, const EmbeddingInfo
embedding_vec.push_back(std::move(data));
}
} else {
- auto tensor_ele_array = std::dynamic_pointer_cast(list_array->values());
- if (tensor_ele_array.get() == nullptr) {
- RecoverableError(Status::ImportFileFormatError("Invalid parquet file format."));
- }
+ auto tensor_ele_array = std::static_pointer_cast(list_array->values());
for (i64 j = start_offset; j < end_offset; ++j) {
auto data = ParquetEmbeddingHandler(tensor_ele_array, embedding_info, j);
embedding_vec.push_back(std::move(data));
@@ -1753,78 +1688,78 @@ ParquetTensorHandler(SharedPtr list_array, const EmbeddingInfo
void PhysicalImport::ParquetValueHandler(const SharedPtr &array, ColumnVector &column_vector, u64 value_idx) {
switch (const auto column_data_logical_type = column_vector.data_type()->type(); column_data_logical_type) {
case LogicalType::kBoolean: {
- auto value = std::dynamic_pointer_cast(array)->Value(value_idx);
+ auto value = std::static_pointer_cast(array)->Value(value_idx);
column_vector.AppendByPtr(reinterpret_cast(&value));
break;
}
case LogicalType::kTinyInt: {
- auto value = std::dynamic_pointer_cast(array)->Value(value_idx);
+ auto value = std::static_pointer_cast(array)->Value(value_idx);
column_vector.AppendByPtr(reinterpret_cast(&value));
break;
}
case LogicalType::kSmallInt: {
- auto value = std::dynamic_pointer_cast(array)->Value(value_idx);
+ auto value = std::static_pointer_cast(array)->Value(value_idx);
column_vector.AppendByPtr(reinterpret_cast(&value));
break;
}
case LogicalType::kInteger: {
- auto value = std::dynamic_pointer_cast(array)->Value(value_idx);
+ auto value = std::static_pointer_cast(array)->Value(value_idx);
column_vector.AppendByPtr(reinterpret_cast | |