feat(db): index tables to improve study search and sorting performance (

#1902) Merge pull request #1902 from AntaresSimulatorTeam/feature/1035-index-study-table-search-engine
AntaresSimulatorTeam · Jan 23, 2024 · 4ef1f38 · 4ef1f38
2 parents b9c36f0 + 447353a
commit 4ef1f38
Showing 5 changed files with 161 additions and 25 deletions.
diff --git a/alembic/versions/1f5db5dfad80_add_indexes_to_study_tables.py b/alembic/versions/1f5db5dfad80_add_indexes_to_study_tables.py
@@ -0,0 +1,71 @@
+# noinspection SpellCheckingInspection
+"""
+Add indexes to Study tables
+
+The goal of this migration is to add indexes on the `study`, `rawstudy` and `study_additional_data` tables,
+in order to speed up data search queries for the search engine.
+
+Revision ID: 1f5db5dfad80
+Revises: 782a481f3414
+Create Date: 2024-01-19 18:37:34.155199
+"""
+from alembic import op
+import sqlalchemy as sa  # type: ignore
+
+
+# revision identifiers, used by Alembic.
+# noinspection SpellCheckingInspection
+revision = "1f5db5dfad80"
+down_revision = "782a481f3414"
+branch_labels = None
+depends_on = None
+
+
+# noinspection SpellCheckingInspection
+def upgrade() -> None:
+    # ### commands auto generated by Alembic - please adjust! ###
+    with op.batch_alter_table("rawstudy", schema=None) as batch_op:
+        batch_op.alter_column("workspace", existing_type=sa.VARCHAR(length=255), nullable=False)
+        batch_op.create_index(batch_op.f("ix_rawstudy_missing"), ["missing"], unique=False)
+        batch_op.create_index(batch_op.f("ix_rawstudy_workspace"), ["workspace"], unique=False)
+
+    with op.batch_alter_table("study", schema=None) as batch_op:
+        batch_op.create_index(batch_op.f("ix_study_archived"), ["archived"], unique=False)
+        batch_op.create_index(batch_op.f("ix_study_created_at"), ["created_at"], unique=False)
+        batch_op.create_index(batch_op.f("ix_study_folder"), ["folder"], unique=False)
+        batch_op.create_index(batch_op.f("ix_study_name"), ["name"], unique=False)
+        batch_op.create_index(batch_op.f("ix_study_owner_id"), ["owner_id"], unique=False)
+        batch_op.create_index(batch_op.f("ix_study_parent_id"), ["parent_id"], unique=False)
+        batch_op.create_index(batch_op.f("ix_study_type"), ["type"], unique=False)
+        batch_op.create_index(batch_op.f("ix_study_updated_at"), ["updated_at"], unique=False)
+        batch_op.create_index(batch_op.f("ix_study_version"), ["version"], unique=False)
+
+    with op.batch_alter_table("study_additional_data", schema=None) as batch_op:
+        batch_op.create_index(batch_op.f("ix_study_additional_data_patch"), ["patch"], unique=False)
+
+    # ### end Alembic commands ###
+
+
+# noinspection SpellCheckingInspection
+def downgrade() -> None:
+    # ### commands auto generated by Alembic - please adjust! ###
+    with op.batch_alter_table("study_additional_data", schema=None) as batch_op:
+        batch_op.drop_index(batch_op.f("ix_study_additional_data_patch"))
+
+    with op.batch_alter_table("study", schema=None) as batch_op:
+        batch_op.drop_index(batch_op.f("ix_study_version"))
+        batch_op.drop_index(batch_op.f("ix_study_updated_at"))
+        batch_op.drop_index(batch_op.f("ix_study_type"))
+        batch_op.drop_index(batch_op.f("ix_study_parent_id"))
+        batch_op.drop_index(batch_op.f("ix_study_owner_id"))
+        batch_op.drop_index(batch_op.f("ix_study_name"))
+        batch_op.drop_index(batch_op.f("ix_study_folder"))
+        batch_op.drop_index(batch_op.f("ix_study_created_at"))
+        batch_op.drop_index(batch_op.f("ix_study_archived"))
+
+    with op.batch_alter_table("rawstudy", schema=None) as batch_op:
+        batch_op.drop_index(batch_op.f("ix_rawstudy_workspace"))
+        batch_op.drop_index(batch_op.f("ix_rawstudy_missing"))
+        batch_op.alter_column("workspace", existing_type=sa.VARCHAR(length=255), nullable=True)
+
+    # ### end Alembic commands ###
diff --git a/antarest/study/model.py b/antarest/study/model.py
@@ -70,7 +70,7 @@ class StudyAdditionalData(Base):  # type:ignore
     )
     author = Column(String(255), default="Unknown")
     horizon = Column(String)
-    patch = Column(String(), nullable=True)
+    patch = Column(String(), index=True, nullable=True)
 
     def __eq__(self, other: t.Any) -> bool:
         if not super().__eq__(other):
@@ -93,19 +93,19 @@ class Study(Base):  # type: ignore
         default=lambda: str(uuid.uuid4()),
         unique=True,
     )
-    name = Column(String(255))
-    type = Column(String(50))
-    version = Column(String(255))
+    name = Column(String(255), index=True)
+    type = Column(String(50), index=True)
+    version = Column(String(255), index=True)
     author = Column(String(255))
-    created_at = Column(DateTime)
-    updated_at = Column(DateTime)
+    created_at = Column(DateTime, index=True)
+    updated_at = Column(DateTime, index=True)
     last_access = Column(DateTime)
     path = Column(String())
-    folder = Column(String, nullable=True)
-    parent_id = Column(String(36), ForeignKey("study.id", name="fk_study_study_id"))
+    folder = Column(String, nullable=True, index=True)
+    parent_id = Column(String(36), ForeignKey("study.id", name="fk_study_study_id"), index=True)
     public_mode = Column(Enum(PublicMode), default=PublicMode.NONE)
-    owner_id = Column(Integer, ForeignKey(Identity.id), nullable=True)
-    archived = Column(Boolean(), default=False)
+    owner_id = Column(Integer, ForeignKey(Identity.id), nullable=True, index=True)
+    archived = Column(Boolean(), default=False, index=True)
     owner = relationship(Identity, uselist=False)
     groups = relationship(Group, secondary=lambda: groups_metadata, cascade="")
     additional_data = relationship(
@@ -167,8 +167,8 @@ class RawStudy(Study):
         primary_key=True,
     )
     content_status = Column(Enum(StudyContentStatus))
-    workspace = Column(String(255), default=DEFAULT_WORKSPACE_NAME)
-    missing = Column(DateTime, nullable=True)
+    workspace = Column(String(255), default=DEFAULT_WORKSPACE_NAME, nullable=False, index=True)
+    missing = Column(DateTime, nullable=True, index=True)
 
     __mapper_args__ = {
         "polymorphic_identity": "rawstudy",

diff --git a/scripts/rollback.sh b/scripts/rollback.sh
@@ -12,5 +12,5 @@ CUR_DIR=$(cd "$(dirname "$0")" && pwd)
 BASE_DIR=$(dirname "$CUR_DIR")
 
 cd "$BASE_DIR"
-alembic downgrade d495746853cc
+alembic downgrade 782a481f3414
 cd -
diff --git a/tests/study/model.py b/tests/study/model.py
@@ -0,0 +1,66 @@
+"""
+Test the database model.
+"""
+import uuid
+
+from sqlalchemy import inspect  # type: ignore
+from sqlalchemy.engine import Engine  # type: ignore
+from sqlalchemy.orm import Session  # type: ignore
+
+from antarest.study.model import Study
+
+
+# noinspection SpellCheckingInspection
+class TestStudy:
+    """
+    Test the study model.
+    """
+
+    def test_study(self, db_session: Session) -> None:
+        """
+        Basic test of the `study` table.
+        """
+        study_id = uuid.uuid4()
+
+        with db_session:
+            db_session.add(Study(id=str(study_id), name="Study 1"))
+            db_session.commit()
+
+        with db_session:
+            study = db_session.query(Study).first()
+            assert study.id == str(study_id)
+            assert study.name == "Study 1"
+
+    def test_index_on_study(self, db_engine: Engine) -> None:
+        inspector = inspect(db_engine)
+        indexes = inspector.get_indexes("study")
+        index_names = {index["name"] for index in indexes}
+        assert index_names == {
+            "ix_study_archived",
+            "ix_study_created_at",
+            "ix_study_folder",
+            "ix_study_name",
+            "ix_study_owner_id",
+            "ix_study_parent_id",
+            "ix_study_type",
+            "ix_study_updated_at",
+            "ix_study_version",
+        }
+
+    def test_index_on_rawstudy(self, db_engine: Engine) -> None:
+        inspector = inspect(db_engine)
+        indexes = inspector.get_indexes("rawstudy")
+        index_names = {index["name"] for index in indexes}
+        assert index_names == {"ix_rawstudy_workspace", "ix_rawstudy_missing"}
+
+    def test_index_on_variantstudy(self, db_engine: Engine) -> None:
+        inspector = inspect(db_engine)
+        indexes = inspector.get_indexes("variantstudy")
+        index_names = {index["name"] for index in indexes}
+        assert not index_names
+
+    def test_index_on_study_additional_data(self, db_engine: Engine) -> None:
+        inspector = inspect(db_engine)
+        indexes = inspector.get_indexes("study_additional_data")
+        index_names = {index["name"] for index in indexes}
+        assert index_names == {"ix_study_additional_data_patch"}
diff --git a/tests/study/storage/rawstudy/test_raw_study_service.py b/tests/study/storage/rawstudy/test_raw_study_service.py
@@ -1,11 +1,10 @@
 import datetime
+import typing as t
 import zipfile
 from pathlib import Path
-from typing import List, Optional
 
 import numpy as np
 import pytest
-from sqlalchemy import create_engine  # type: ignore
 
 from antarest.core.model import PublicMode
 from antarest.core.utils.fastapi_sqlalchemy import db
@@ -67,10 +66,10 @@ def test_export_study_flat(
         study_storage_service: StudyStorageService,
         # pytest parameters
         outputs: bool,
-        output_filter: Optional[List[str]],
+        output_filter: t.Optional[t.List[str]],
         denormalize: bool,
     ) -> None:
-        ## Prepare database objects
+        # Prepare database objects
         # noinspection PyArgumentList
         user = User(id=0, name="admin")
         db.session.add(user)
@@ -100,7 +99,7 @@ def test_export_study_flat(
         db.session.add(raw_study)
         db.session.commit()
 
-        ## Prepare the RAW Study
+        # Prepare the RAW Study
         raw_study_service.create(raw_study)
         file_study = raw_study_service.get_raw(raw_study)
 
@@ -144,7 +143,7 @@ def test_export_study_flat(
             storage_service=study_storage_service,
         )
 
-        ## Prepare fake outputs
+        # Prepare fake outputs
         my_solver_outputs = ["20230802-1425eco", "20230802-1628eco.zip"]
         for filename in my_solver_outputs:
             output_path = raw_study_path / "output" / filename
@@ -163,7 +162,7 @@ def test_export_study_flat(
                 output_path.mkdir(exist_ok=True, parents=True)
                 (output_path / "simulation.log").write_text("Simulation done")
 
-        ## Collect all files by types to prepare the comparison
+        # Collect all files by types to prepare the comparison
         src_study_files = set()
         src_matrices = set()
         src_outputs = set()
@@ -176,7 +175,7 @@ def test_export_study_flat(
             else:
                 src_study_files.add(relpath)
 
-        ## Run the export
+        # Run the export
         target_path = tmp_path / raw_study_path.with_suffix(".exported").name
         raw_study_service.export_study_flat(
             raw_study,
@@ -186,7 +185,7 @@ def test_export_study_flat(
             denormalize=denormalize,
         )
 
-        ## Collect the resulting files
+        # Collect the resulting files
         res_study_files = set()
         res_matrices = set()
         res_outputs = set()
@@ -199,7 +198,7 @@ def test_export_study_flat(
             else:
                 res_study_files.add(relpath)
 
-        ## Check the matrice
+        # Check the matrice
         # If de-normalization is enabled, the previous loop won't find the matrices
         # because the matrix extensions are ".txt" instead of ".txt.link".
         # Therefore, it is necessary to move the corresponding ".txt" files
@@ -210,7 +209,7 @@ def test_export_study_flat(
             res_study_files -= res_matrices
         assert res_matrices == src_matrices
 
-        ## Check the outputs
+        # Check the outputs
         if outputs:
             # If `outputs` is True the filtering can occurs
             if output_filter is None:
@@ -224,5 +223,5 @@ def test_export_study_flat(
             # whatever the value of the `output_list_filter` is
             assert not res_outputs
 
-        ## Check the study files
+        # Check the study files
         assert res_study_files == src_study_files