Skip to content

Commit

Permalink
use binary protocol to transfer vecs
Browse files Browse the repository at this point in the history
  • Loading branch information
onurctirtir committed Sep 10, 2024
1 parent 0cc417c commit 381215d
Show file tree
Hide file tree
Showing 2 changed files with 6 additions and 4 deletions.
8 changes: 5 additions & 3 deletions neurips23/streaming/base_postgres.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,8 @@ def __init__(self, metric, index_params):
self.index_build_params = {k: v for k, v in index_params.items() if k != "insert_conns"}

self.ind_op_class = self.determine_index_op_class(metric)
self.query_op = self.determine_query_op(metric)

self.search_query = f"SELECT id FROM test_tbl ORDER BY vec_col {self.determine_query_op(metric)} %b LIMIT %b"

start_database_result = subprocess.run(['bash', '/home/app/start_database.sh'], capture_output=True, text=True)
if start_database_result.returncode != 0:
Expand Down Expand Up @@ -113,7 +114,8 @@ def insert(self, X, ids):
self.num_unprocessed_deletes = 0

def copy_data(conn_idx, id_start_idx, id_end_idx):
with self.conns[conn_idx].cursor().copy("COPY test_tbl (id, vec_col) FROM STDIN") as copy:
with self.conns[conn_idx].cursor().copy("COPY test_tbl (id, vec_col) FROM STDIN WITH (FORMAT BINARY)") as copy:
copy.set_types(["int8", "vector"])
for id, vec in zip(ids[id_start_idx:id_end_idx], X[id_start_idx:id_end_idx]):
copy.write_row((id, vec))

Expand Down Expand Up @@ -169,7 +171,7 @@ def batch_query(conn_idx, query_vec_start_idx, query_vec_end_idx):
for query_vec in X[query_vec_start_idx: query_vec_end_idx]:
with self.conns[conn_idx].cursor() as cur:
try:
cur.execute(f"SELECT id FROM test_tbl ORDER BY vec_col {self.query_op} %s LIMIT {k}", (query_vec, ))
cur.execute(self.search_query, (query_vec, k, ), binary=True, prepare=True)
except Exception as e:
raise Exception(f"Error '{e}' when querying with k={k}\nQuery vector was:\n{query_vec}") from e

Expand Down
2 changes: 1 addition & 1 deletion requirements_py3.10.txt
Original file line number Diff line number Diff line change
Expand Up @@ -10,4 +10,4 @@ scikit-learn
jinja2==3.1.2
pandas==2.0.0
psycopg==3.2.1
pgvector==0.1.6
pgvector==0.3.3

0 comments on commit 381215d

Please sign in to comment.