Skip to content

Commit

Permalink
feat(db): index tables to improve study search and sorting performance (
Browse files Browse the repository at this point in the history
#1902)

Merge pull request #1902 from AntaresSimulatorTeam/feature/1035-index-study-table-search-engine
laurent-laporte-pro authored Jan 23, 2024

Verified

This commit was created on GitHub.com and signed with GitHub’s verified signature.
2 parents b9c36f0 + 447353a commit 4ef1f38
Showing 5 changed files with 161 additions and 25 deletions.
71 changes: 71 additions & 0 deletions alembic/versions/1f5db5dfad80_add_indexes_to_study_tables.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
# noinspection SpellCheckingInspection
"""
Add indexes to Study tables
The goal of this migration is to add indexes on the `study`, `rawstudy` and `study_additional_data` tables,
in order to speed up data search queries for the search engine.
Revision ID: 1f5db5dfad80
Revises: 782a481f3414
Create Date: 2024-01-19 18:37:34.155199
"""
from alembic import op
import sqlalchemy as sa # type: ignore


# revision identifiers, used by Alembic.
# noinspection SpellCheckingInspection
revision = "1f5db5dfad80"
down_revision = "782a481f3414"
branch_labels = None
depends_on = None


# noinspection SpellCheckingInspection
def upgrade() -> None:
# ### commands auto generated by Alembic - please adjust! ###
with op.batch_alter_table("rawstudy", schema=None) as batch_op:
batch_op.alter_column("workspace", existing_type=sa.VARCHAR(length=255), nullable=False)
batch_op.create_index(batch_op.f("ix_rawstudy_missing"), ["missing"], unique=False)
batch_op.create_index(batch_op.f("ix_rawstudy_workspace"), ["workspace"], unique=False)

with op.batch_alter_table("study", schema=None) as batch_op:
batch_op.create_index(batch_op.f("ix_study_archived"), ["archived"], unique=False)
batch_op.create_index(batch_op.f("ix_study_created_at"), ["created_at"], unique=False)
batch_op.create_index(batch_op.f("ix_study_folder"), ["folder"], unique=False)
batch_op.create_index(batch_op.f("ix_study_name"), ["name"], unique=False)
batch_op.create_index(batch_op.f("ix_study_owner_id"), ["owner_id"], unique=False)
batch_op.create_index(batch_op.f("ix_study_parent_id"), ["parent_id"], unique=False)
batch_op.create_index(batch_op.f("ix_study_type"), ["type"], unique=False)
batch_op.create_index(batch_op.f("ix_study_updated_at"), ["updated_at"], unique=False)
batch_op.create_index(batch_op.f("ix_study_version"), ["version"], unique=False)

with op.batch_alter_table("study_additional_data", schema=None) as batch_op:
batch_op.create_index(batch_op.f("ix_study_additional_data_patch"), ["patch"], unique=False)

# ### end Alembic commands ###


# noinspection SpellCheckingInspection
def downgrade() -> None:
# ### commands auto generated by Alembic - please adjust! ###
with op.batch_alter_table("study_additional_data", schema=None) as batch_op:
batch_op.drop_index(batch_op.f("ix_study_additional_data_patch"))

with op.batch_alter_table("study", schema=None) as batch_op:
batch_op.drop_index(batch_op.f("ix_study_version"))
batch_op.drop_index(batch_op.f("ix_study_updated_at"))
batch_op.drop_index(batch_op.f("ix_study_type"))
batch_op.drop_index(batch_op.f("ix_study_parent_id"))
batch_op.drop_index(batch_op.f("ix_study_owner_id"))
batch_op.drop_index(batch_op.f("ix_study_name"))
batch_op.drop_index(batch_op.f("ix_study_folder"))
batch_op.drop_index(batch_op.f("ix_study_created_at"))
batch_op.drop_index(batch_op.f("ix_study_archived"))

with op.batch_alter_table("rawstudy", schema=None) as batch_op:
batch_op.drop_index(batch_op.f("ix_rawstudy_workspace"))
batch_op.drop_index(batch_op.f("ix_rawstudy_missing"))
batch_op.alter_column("workspace", existing_type=sa.VARCHAR(length=255), nullable=True)

# ### end Alembic commands ###
24 changes: 12 additions & 12 deletions antarest/study/model.py
Original file line number Diff line number Diff line change
@@ -70,7 +70,7 @@ class StudyAdditionalData(Base): # type:ignore
)
author = Column(String(255), default="Unknown")
horizon = Column(String)
patch = Column(String(), nullable=True)
patch = Column(String(), index=True, nullable=True)

def __eq__(self, other: t.Any) -> bool:
if not super().__eq__(other):
@@ -93,19 +93,19 @@ class Study(Base): # type: ignore
default=lambda: str(uuid.uuid4()),
unique=True,
)
name = Column(String(255))
type = Column(String(50))
version = Column(String(255))
name = Column(String(255), index=True)
type = Column(String(50), index=True)
version = Column(String(255), index=True)
author = Column(String(255))
created_at = Column(DateTime)
updated_at = Column(DateTime)
created_at = Column(DateTime, index=True)
updated_at = Column(DateTime, index=True)
last_access = Column(DateTime)
path = Column(String())
folder = Column(String, nullable=True)
parent_id = Column(String(36), ForeignKey("study.id", name="fk_study_study_id"))
folder = Column(String, nullable=True, index=True)
parent_id = Column(String(36), ForeignKey("study.id", name="fk_study_study_id"), index=True)
public_mode = Column(Enum(PublicMode), default=PublicMode.NONE)
owner_id = Column(Integer, ForeignKey(Identity.id), nullable=True)
archived = Column(Boolean(), default=False)
owner_id = Column(Integer, ForeignKey(Identity.id), nullable=True, index=True)
archived = Column(Boolean(), default=False, index=True)
owner = relationship(Identity, uselist=False)
groups = relationship(Group, secondary=lambda: groups_metadata, cascade="")
additional_data = relationship(
@@ -167,8 +167,8 @@ class RawStudy(Study):
primary_key=True,
)
content_status = Column(Enum(StudyContentStatus))
workspace = Column(String(255), default=DEFAULT_WORKSPACE_NAME)
missing = Column(DateTime, nullable=True)
workspace = Column(String(255), default=DEFAULT_WORKSPACE_NAME, nullable=False, index=True)
missing = Column(DateTime, nullable=True, index=True)

__mapper_args__ = {
"polymorphic_identity": "rawstudy",
2 changes: 1 addition & 1 deletion scripts/rollback.sh
Original file line number Diff line number Diff line change
@@ -12,5 +12,5 @@ CUR_DIR=$(cd "$(dirname "$0")" && pwd)
BASE_DIR=$(dirname "$CUR_DIR")

cd "$BASE_DIR"
alembic downgrade d495746853cc
alembic downgrade 782a481f3414
cd -
66 changes: 66 additions & 0 deletions tests/study/model.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
"""
Test the database model.
"""
import uuid

from sqlalchemy import inspect # type: ignore
from sqlalchemy.engine import Engine # type: ignore
from sqlalchemy.orm import Session # type: ignore

from antarest.study.model import Study


# noinspection SpellCheckingInspection
class TestStudy:
"""
Test the study model.
"""

def test_study(self, db_session: Session) -> None:
"""
Basic test of the `study` table.
"""
study_id = uuid.uuid4()

with db_session:
db_session.add(Study(id=str(study_id), name="Study 1"))
db_session.commit()

with db_session:
study = db_session.query(Study).first()
assert study.id == str(study_id)
assert study.name == "Study 1"

def test_index_on_study(self, db_engine: Engine) -> None:
inspector = inspect(db_engine)
indexes = inspector.get_indexes("study")
index_names = {index["name"] for index in indexes}
assert index_names == {
"ix_study_archived",
"ix_study_created_at",
"ix_study_folder",
"ix_study_name",
"ix_study_owner_id",
"ix_study_parent_id",
"ix_study_type",
"ix_study_updated_at",
"ix_study_version",
}

def test_index_on_rawstudy(self, db_engine: Engine) -> None:
inspector = inspect(db_engine)
indexes = inspector.get_indexes("rawstudy")
index_names = {index["name"] for index in indexes}
assert index_names == {"ix_rawstudy_workspace", "ix_rawstudy_missing"}

def test_index_on_variantstudy(self, db_engine: Engine) -> None:
inspector = inspect(db_engine)
indexes = inspector.get_indexes("variantstudy")
index_names = {index["name"] for index in indexes}
assert not index_names

def test_index_on_study_additional_data(self, db_engine: Engine) -> None:
inspector = inspect(db_engine)
indexes = inspector.get_indexes("study_additional_data")
index_names = {index["name"] for index in indexes}
assert index_names == {"ix_study_additional_data_patch"}
23 changes: 11 additions & 12 deletions tests/study/storage/rawstudy/test_raw_study_service.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,10 @@
import datetime
import typing as t
import zipfile
from pathlib import Path
from typing import List, Optional

import numpy as np
import pytest
from sqlalchemy import create_engine # type: ignore

from antarest.core.model import PublicMode
from antarest.core.utils.fastapi_sqlalchemy import db
@@ -67,10 +66,10 @@ def test_export_study_flat(
study_storage_service: StudyStorageService,
# pytest parameters
outputs: bool,
output_filter: Optional[List[str]],
output_filter: t.Optional[t.List[str]],
denormalize: bool,
) -> None:
## Prepare database objects
# Prepare database objects
# noinspection PyArgumentList
user = User(id=0, name="admin")
db.session.add(user)
@@ -100,7 +99,7 @@ def test_export_study_flat(
db.session.add(raw_study)
db.session.commit()

## Prepare the RAW Study
# Prepare the RAW Study
raw_study_service.create(raw_study)
file_study = raw_study_service.get_raw(raw_study)

@@ -144,7 +143,7 @@ def test_export_study_flat(
storage_service=study_storage_service,
)

## Prepare fake outputs
# Prepare fake outputs
my_solver_outputs = ["20230802-1425eco", "20230802-1628eco.zip"]
for filename in my_solver_outputs:
output_path = raw_study_path / "output" / filename
@@ -163,7 +162,7 @@ def test_export_study_flat(
output_path.mkdir(exist_ok=True, parents=True)
(output_path / "simulation.log").write_text("Simulation done")

## Collect all files by types to prepare the comparison
# Collect all files by types to prepare the comparison
src_study_files = set()
src_matrices = set()
src_outputs = set()
@@ -176,7 +175,7 @@ def test_export_study_flat(
else:
src_study_files.add(relpath)

## Run the export
# Run the export
target_path = tmp_path / raw_study_path.with_suffix(".exported").name
raw_study_service.export_study_flat(
raw_study,
@@ -186,7 +185,7 @@ def test_export_study_flat(
denormalize=denormalize,
)

## Collect the resulting files
# Collect the resulting files
res_study_files = set()
res_matrices = set()
res_outputs = set()
@@ -199,7 +198,7 @@ def test_export_study_flat(
else:
res_study_files.add(relpath)

## Check the matrice
# Check the matrice
# If de-normalization is enabled, the previous loop won't find the matrices
# because the matrix extensions are ".txt" instead of ".txt.link".
# Therefore, it is necessary to move the corresponding ".txt" files
@@ -210,7 +209,7 @@ def test_export_study_flat(
res_study_files -= res_matrices
assert res_matrices == src_matrices

## Check the outputs
# Check the outputs
if outputs:
# If `outputs` is True the filtering can occurs
if output_filter is None:
@@ -224,5 +223,5 @@ def test_export_study_flat(
# whatever the value of the `output_list_filter` is
assert not res_outputs

## Check the study files
# Check the study files
assert res_study_files == src_study_files

0 comments on commit 4ef1f38

Please sign in to comment.