diff --git a/poetry.lock b/poetry.lock index 5626d81..cd3b686 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1,4 +1,4 @@ -# This file is automatically @generated by Poetry 1.8.3 and should not be changed by hand. +# This file is automatically @generated by Poetry 1.7.1 and should not be changed by hand. [[package]] name = "aiofiles" @@ -6813,4 +6813,4 @@ ragas = ["ragas"] [metadata] lock-version = "2.0" python-versions = ">=3.10,<4.0" -content-hash = "ff8a8596ac88ae5406234f08810e2e4d654714af0aa3663451e988a2cf6ef51e" +content-hash = "dc40d87c427c923ef0a4fc3fca79f9b06eb39a3f42254cd34c614f1cc482d464" diff --git a/pyproject.toml b/pyproject.toml index 3cecd49..5dec6ca 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -52,6 +52,8 @@ ragas = { version = ">=0.1.12", optional = true } typer = ">=0.12.5" # Frontend: chainlit = { version = ">=1.2.0", optional = true } +# Utilities: +packaging = ">=23.0" [tool.poetry.extras] # https://python-poetry.org/docs/pyproject/#extras chainlit = ["chainlit"] diff --git a/src/raglite/_database.py b/src/raglite/_database.py index fefa7f9..0535847 100644 --- a/src/raglite/_database.py +++ b/src/raglite/_database.py @@ -6,12 +6,13 @@ from functools import lru_cache from hashlib import sha256 from pathlib import Path -from typing import Any +from typing import Any, cast from xml.sax.saxutils import escape import numpy as np from markdown_it import MarkdownIt from packaging import version +from packaging.version import Version from pydantic import ConfigDict from sqlalchemy.engine import Engine, make_url from sqlmodel import JSON, Column, Field, Relationship, Session, SQLModel, create_engine, text @@ -291,8 +292,10 @@ def from_chunks( @lru_cache(maxsize=1) def _pgvector_version(session: Session) -> Version: try: - result = session.execute(text("SELECT extversion FROM pg_extension WHERE extname = 'vector'")) - pgvector_version = version.parse(result.scalar()) + result = session.execute( + text("SELECT extversion FROM pg_extension WHERE extname = 'vector'") + ) + pgvector_version = version.parse(cast(str, result.scalar_one())) except Exception as e: error_message = "Unable to parse pgvector version, is pgvector installed?" raise ValueError(error_message) from e @@ -349,7 +352,7 @@ def create_database_engine(config: RAGLiteConfig | None = None) -> Engine: """ ) ) - base_sql = f""" + create_vector_index_sql = f""" CREATE INDEX IF NOT EXISTS vector_search_chunk_index ON chunk_embedding USING hnsw ( (embedding::halfvec({embedding_dim})) @@ -358,14 +361,10 @@ def create_database_engine(config: RAGLiteConfig | None = None) -> Engine: SET hnsw.ef_search = {20 * 4 * 8}; """ # Add iterative scan if version >= 0.8.0 - pgvector_version = _get_pgvector_version(session) - if pgvector_version and version.parse(pgvector_version) >= version.parse("0.8.0"): - sql = f"""{base_sql}; - SET hnsw.iterative_scan = {'relaxed_order' if config.reranker else 'strict_order'}; - """ - else: - sql = f"{base_sql};" - session.execute(text(sql)) + pgvector_version = _pgvector_version(session) + if pgvector_version and pgvector_version >= version.parse("0.8.0"): + create_vector_index_sql += f"\nSET hnsw.iterative_scan = {'relaxed_order' if config.reranker else 'strict_order'};" + session.execute(text(create_vector_index_sql)) session.commit() elif db_backend == "sqlite": # Create a virtual table for keyword search on the chunk table. @@ -373,39 +372,31 @@ def create_database_engine(config: RAGLiteConfig | None = None) -> Engine: # [1] https://www.sqlite.org/fts5.html#external_content_tables with Session(engine) as session: session.execute( - text( - """ + text(""" CREATE VIRTUAL TABLE IF NOT EXISTS keyword_search_chunk_index USING fts5(body, content='chunk', content_rowid='rowid'); - """ - ) + """) ) session.execute( - text( - """ + text(""" CREATE TRIGGER IF NOT EXISTS keyword_search_chunk_index_auto_insert AFTER INSERT ON chunk BEGIN INSERT INTO keyword_search_chunk_index(rowid, body) VALUES (new.rowid, new.body); END; - """ - ) + """) ) session.execute( - text( - """ + text(""" CREATE TRIGGER IF NOT EXISTS keyword_search_chunk_index_auto_delete AFTER DELETE ON chunk BEGIN INSERT INTO keyword_search_chunk_index(keyword_search_chunk_index, rowid, body) VALUES('delete', old.rowid, old.body); END; - """ - ) + """) ) session.execute( - text( - """ + text(""" CREATE TRIGGER IF NOT EXISTS keyword_search_chunk_index_auto_update AFTER UPDATE ON chunk BEGIN INSERT INTO keyword_search_chunk_index(keyword_search_chunk_index, rowid, body) VALUES('delete', old.rowid, old.body); INSERT INTO keyword_search_chunk_index(rowid, body) VALUES (new.rowid, new.body); END; - """ - ) + """) ) session.commit() return engine