Skip to content

Commit

Permalink
chore: code style
Browse files Browse the repository at this point in the history
  • Loading branch information
undo76 committed Dec 11, 2024
1 parent aa18cc8 commit 3c8b003
Show file tree
Hide file tree
Showing 3 changed files with 23 additions and 30 deletions.
4 changes: 2 additions & 2 deletions poetry.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 2 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,8 @@ ragas = { version = ">=0.1.12", optional = true }
typer = ">=0.12.5"
# Frontend:
chainlit = { version = ">=1.2.0", optional = true }
# Utilities:
packaging = ">=23.0"

[tool.poetry.extras] # https://python-poetry.org/docs/pyproject/#extras
chainlit = ["chainlit"]
Expand Down
47 changes: 19 additions & 28 deletions src/raglite/_database.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,12 +6,13 @@
from functools import lru_cache
from hashlib import sha256
from pathlib import Path
from typing import Any
from typing import Any, cast
from xml.sax.saxutils import escape

import numpy as np
from markdown_it import MarkdownIt
from packaging import version
from packaging.version import Version
from pydantic import ConfigDict
from sqlalchemy.engine import Engine, make_url
from sqlmodel import JSON, Column, Field, Relationship, Session, SQLModel, create_engine, text
Expand Down Expand Up @@ -291,8 +292,10 @@ def from_chunks(
@lru_cache(maxsize=1)
def _pgvector_version(session: Session) -> Version:
try:
result = session.execute(text("SELECT extversion FROM pg_extension WHERE extname = 'vector'"))
pgvector_version = version.parse(result.scalar())
result = session.execute(
text("SELECT extversion FROM pg_extension WHERE extname = 'vector'")
)
pgvector_version = version.parse(cast(str, result.scalar_one()))
except Exception as e:
error_message = "Unable to parse pgvector version, is pgvector installed?"
raise ValueError(error_message) from e
Expand Down Expand Up @@ -349,7 +352,7 @@ def create_database_engine(config: RAGLiteConfig | None = None) -> Engine:
"""
)
)
base_sql = f"""
create_vector_index_sql = f"""
CREATE INDEX IF NOT EXISTS vector_search_chunk_index ON chunk_embedding
USING hnsw (
(embedding::halfvec({embedding_dim}))
Expand All @@ -358,54 +361,42 @@ def create_database_engine(config: RAGLiteConfig | None = None) -> Engine:
SET hnsw.ef_search = {20 * 4 * 8};
"""
# Add iterative scan if version >= 0.8.0
pgvector_version = _get_pgvector_version(session)
if pgvector_version and version.parse(pgvector_version) >= version.parse("0.8.0"):
sql = f"""{base_sql};
SET hnsw.iterative_scan = {'relaxed_order' if config.reranker else 'strict_order'};
"""
else:
sql = f"{base_sql};"
session.execute(text(sql))
pgvector_version = _pgvector_version(session)
if pgvector_version and pgvector_version >= version.parse("0.8.0"):
create_vector_index_sql += f"\nSET hnsw.iterative_scan = {'relaxed_order' if config.reranker else 'strict_order'};"
session.execute(text(create_vector_index_sql))
session.commit()
elif db_backend == "sqlite":
# Create a virtual table for keyword search on the chunk table.
# We use the chunk table as an external content table [1] to avoid duplicating the data.
# [1] https://www.sqlite.org/fts5.html#external_content_tables
with Session(engine) as session:
session.execute(
text(
"""
text("""
CREATE VIRTUAL TABLE IF NOT EXISTS keyword_search_chunk_index USING fts5(body, content='chunk', content_rowid='rowid');
"""
)
""")
)
session.execute(
text(
"""
text("""
CREATE TRIGGER IF NOT EXISTS keyword_search_chunk_index_auto_insert AFTER INSERT ON chunk BEGIN
INSERT INTO keyword_search_chunk_index(rowid, body) VALUES (new.rowid, new.body);
END;
"""
)
""")
)
session.execute(
text(
"""
text("""
CREATE TRIGGER IF NOT EXISTS keyword_search_chunk_index_auto_delete AFTER DELETE ON chunk BEGIN
INSERT INTO keyword_search_chunk_index(keyword_search_chunk_index, rowid, body) VALUES('delete', old.rowid, old.body);
END;
"""
)
""")
)
session.execute(
text(
"""
text("""
CREATE TRIGGER IF NOT EXISTS keyword_search_chunk_index_auto_update AFTER UPDATE ON chunk BEGIN
INSERT INTO keyword_search_chunk_index(keyword_search_chunk_index, rowid, body) VALUES('delete', old.rowid, old.body);
INSERT INTO keyword_search_chunk_index(rowid, body) VALUES (new.rowid, new.body);
END;
"""
)
""")
)
session.commit()
return engine

0 comments on commit 3c8b003

Please sign in to comment.