Skip to content

Commit

Permalink
perf(watcher): improve performance of the Watcher service (#1888)
Browse files Browse the repository at this point in the history
Merge pull request #1888 from AntaresSimulatorTeam/ANT-925-Amelioration-de-la-performance-du-watcher
ANT-925
  • Loading branch information
laurent-laporte-pro authored Feb 16, 2024
2 parents 8bdc837 + d0370cd commit 5c269c6
Show file tree
Hide file tree
Showing 8 changed files with 192 additions and 67 deletions.
86 changes: 86 additions & 0 deletions alembic/versions/fd73601a9075_add_delete_cascade_studies.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,86 @@
"""
Add delete cascade constraint to study foreign keys
Revision ID: fd73601a9075
Revises: 3c70366b10ea
Create Date: 2024-02-12 17:27:37.314443
"""
import sqlalchemy as sa # type: ignore
from alembic import op

# revision identifiers, used by Alembic.
revision = "fd73601a9075"
down_revision = "dae93f1d9110"
branch_labels = None
depends_on = None

# noinspection SpellCheckingInspection
RAWSTUDY_FK = "rawstudy_id_fkey"

# noinspection SpellCheckingInspection
VARIANTSTUDY_FK = "variantstudy_id_fkey"

# noinspection SpellCheckingInspection
STUDY_ADDITIONAL_DATA_FK = "study_additional_data_study_id_fkey"


def upgrade() -> None:
dialect_name: str = op.get_context().dialect.name

# SQLite doesn't support dropping foreign keys, so we need to ignore it here
if dialect_name == "postgresql":
with op.batch_alter_table("rawstudy", schema=None) as batch_op:
batch_op.drop_constraint(RAWSTUDY_FK, type_="foreignkey")
batch_op.create_foreign_key(RAWSTUDY_FK, "study", ["id"], ["id"], ondelete="CASCADE")

with op.batch_alter_table("study_additional_data", schema=None) as batch_op:
batch_op.drop_constraint(STUDY_ADDITIONAL_DATA_FK, type_="foreignkey")
batch_op.create_foreign_key(STUDY_ADDITIONAL_DATA_FK, "study", ["study_id"], ["id"], ondelete="CASCADE")

with op.batch_alter_table("variantstudy", schema=None) as batch_op:
batch_op.drop_constraint(VARIANTSTUDY_FK, type_="foreignkey")
batch_op.create_foreign_key(VARIANTSTUDY_FK, "study", ["id"], ["id"], ondelete="CASCADE")

with op.batch_alter_table("group_metadata", schema=None) as batch_op:
batch_op.alter_column("group_id", existing_type=sa.VARCHAR(length=36), nullable=False)
batch_op.alter_column("study_id", existing_type=sa.VARCHAR(length=36), nullable=False)
batch_op.create_index(batch_op.f("ix_group_metadata_group_id"), ["group_id"], unique=False)
batch_op.create_index(batch_op.f("ix_group_metadata_study_id"), ["study_id"], unique=False)
if dialect_name == "postgresql":
batch_op.drop_constraint("group_metadata_group_id_fkey", type_="foreignkey")
batch_op.drop_constraint("group_metadata_study_id_fkey", type_="foreignkey")
batch_op.create_foreign_key(
"group_metadata_group_id_fkey", "groups", ["group_id"], ["id"], ondelete="CASCADE"
)
batch_op.create_foreign_key(
"group_metadata_study_id_fkey", "study", ["study_id"], ["id"], ondelete="CASCADE"
)


def downgrade() -> None:
dialect_name: str = op.get_context().dialect.name
# SQLite doesn't support dropping foreign keys, so we need to ignore it here
if dialect_name == "postgresql":
with op.batch_alter_table("rawstudy", schema=None) as batch_op:
batch_op.drop_constraint(RAWSTUDY_FK, type_="foreignkey")
batch_op.create_foreign_key(RAWSTUDY_FK, "study", ["id"], ["id"])

with op.batch_alter_table("study_additional_data", schema=None) as batch_op:
batch_op.drop_constraint(STUDY_ADDITIONAL_DATA_FK, type_="foreignkey")
batch_op.create_foreign_key(STUDY_ADDITIONAL_DATA_FK, "study", ["study_id"], ["id"])

with op.batch_alter_table("variantstudy", schema=None) as batch_op:
batch_op.drop_constraint(VARIANTSTUDY_FK, type_="foreignkey")
batch_op.create_foreign_key(VARIANTSTUDY_FK, "study", ["id"], ["id"])

with op.batch_alter_table("group_metadata", schema=None) as batch_op:
# SQLite doesn't support dropping foreign keys, so we need to ignore it here
if dialect_name == "postgresql":
batch_op.drop_constraint("group_metadata_study_id_fkey", type_="foreignkey")
batch_op.drop_constraint("group_metadata_group_id_fkey", type_="foreignkey")
batch_op.create_foreign_key("group_metadata_study_id_fkey", "study", ["study_id"], ["id"])
batch_op.create_foreign_key("group_metadata_group_id_fkey", "groups", ["group_id"], ["id"])
batch_op.drop_index(batch_op.f("ix_group_metadata_study_id"))
batch_op.drop_index(batch_op.f("ix_group_metadata_group_id"))
batch_op.alter_column("study_id", existing_type=sa.VARCHAR(length=36), nullable=True)
batch_op.alter_column("group_id", existing_type=sa.VARCHAR(length=36), nullable=True)
49 changes: 34 additions & 15 deletions antarest/study/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,6 @@
Integer,
PrimaryKeyConstraint,
String,
Table,
)
from sqlalchemy.orm import relationship # type: ignore

Expand Down Expand Up @@ -50,21 +49,40 @@

NEW_DEFAULT_STUDY_VERSION: str = "860"

groups_metadata = Table(
"group_metadata",
Base.metadata,
Column("group_id", String(36), ForeignKey("groups.id")),
Column("study_id", String(36), ForeignKey("study.id")),
)

class StudyGroup(Base): # type:ignore
"""
A table to manage the many-to-many relationship between `Study` and `Group`
Attributes:
study_id: The ID of the study associated with the group.
group_id: The IS of the group associated with the study.
"""

__tablename__ = "group_metadata"
__table_args__ = (PrimaryKeyConstraint("study_id", "group_id"),)

group_id: str = Column(String(36), ForeignKey("groups.id", ondelete="CASCADE"), index=True, nullable=False)
study_id: str = Column(String(36), ForeignKey("study.id", ondelete="CASCADE"), index=True, nullable=False)

def __str__(self) -> str: # pragma: no cover
cls_name = self.__class__.__name__
return f"[{cls_name}] study_id={self.study_id}, group={self.group_id}"

def __repr__(self) -> str: # pragma: no cover
cls_name = self.__class__.__name__
study_id = self.study_id
group_id = self.group_id
return f"{cls_name}({study_id=}, {group_id=})"


class StudyTag(Base): # type:ignore
"""
A table to manage the many-to-many relationship between `Study` and `Tag`
Attributes:
study_id (str): The ID of the study associated with the tag.
tag_label (str): The label of the tag associated with the study.
study_id: The ID of the study associated with the tag.
tag_label: The label of the tag associated with the study.
"""

__tablename__ = "study_tag"
Expand All @@ -74,7 +92,8 @@ class StudyTag(Base): # type:ignore
tag_label: str = Column(String(40), ForeignKey("tag.label", ondelete="CASCADE"), index=True, nullable=False)

def __str__(self) -> str: # pragma: no cover
return f"[StudyTag] study_id={self.study_id}, tag={self.tag}"
cls_name = self.__class__.__name__
return f"[{cls_name}] study_id={self.study_id}, tag={self.tag}"

def __repr__(self) -> str: # pragma: no cover
cls_name = self.__class__.__name__
Expand All @@ -90,8 +109,8 @@ class Tag(Base): # type:ignore
This class is used to store tags associated with studies.
Attributes:
label (str): The label of the tag.
color (str): The color code associated with the tag.
label: The label of the tag.
color: The color code associated with the tag.
"""

__tablename__ = "tag"
Expand Down Expand Up @@ -130,7 +149,7 @@ class StudyAdditionalData(Base): # type:ignore

study_id = Column(
String(36),
ForeignKey("study.id"),
ForeignKey("study.id", ondelete="CASCADE"),
primary_key=True,
)
author = Column(String(255), default="Unknown")
Expand Down Expand Up @@ -174,7 +193,7 @@ class Study(Base): # type: ignore

tags: t.List[Tag] = relationship(Tag, secondary=StudyTag.__table__, back_populates="studies")
owner = relationship(Identity, uselist=False)
groups = relationship(Group, secondary=lambda: groups_metadata, cascade="")
groups = relationship(Group, secondary=StudyGroup.__table__, cascade="")
additional_data = relationship(
StudyAdditionalData,
uselist=False,
Expand Down Expand Up @@ -230,7 +249,7 @@ class RawStudy(Study):

id = Column(
String(36),
ForeignKey("study.id"),
ForeignKey("study.id", ondelete="CASCADE"),
primary_key=True,
)
content_status = Column(Enum(StudyContentStatus))
Expand Down
21 changes: 14 additions & 7 deletions antarest/study/repository.py
Original file line number Diff line number Diff line change
Expand Up @@ -138,21 +138,19 @@ def save(
def refresh(self, metadata: Study) -> None:
self.session.refresh(metadata)

def get(self, id: str) -> t.Optional[Study]:
def get(self, study_id: str) -> t.Optional[Study]:
"""Get the study by ID or return `None` if not found in database."""
# todo: I think we should use a `entity = with_polymorphic(Study, "*")`
# to make sure RawStudy and VariantStudy fields are also fetched.
# see: antarest.study.service.StudyService.delete_study
# When we fetch a study, we also need to fetch the associated owner and groups
# to check the permissions of the current user efficiently.
study: Study = (
# fmt: off
self.session.query(Study)
.options(joinedload(Study.owner))
.options(joinedload(Study.groups))
.options(joinedload(Study.tags))
.get(id)
# fmt: on
.get(study_id)
)
return study

Expand Down Expand Up @@ -272,10 +270,10 @@ def get_all_raw(self, exists: t.Optional[bool] = None) -> t.Sequence[RawStudy]:
studies: t.Sequence[RawStudy] = query.all()
return studies

def delete(self, id: str) -> None:
def delete(self, id_: str, *ids: str) -> None:
ids = (id_,) + ids
session = self.session
u: Study = session.query(Study).get(id)
session.delete(u)
session.query(Study).filter(Study.id.in_(ids)).delete(synchronize_session=False)
session.commit()

def update_tags(self, study: Study, new_tags: t.Sequence[str]) -> None:
Expand All @@ -292,3 +290,12 @@ def update_tags(self, study: Study, new_tags: t.Sequence[str]) -> None:
study.tags = [Tag(label=tag) for tag in new_labels] + existing_tags
self.session.merge(study)
self.session.commit()

def list_duplicates(self) -> t.List[t.Tuple[str, str]]:
"""
Get list of duplicates as tuples (id, path).
"""
session = self.session
subquery = session.query(Study.path).group_by(Study.path).having(func.count() > 1).subquery()
query = session.query(Study.id, Study.path).filter(Study.path.in_(subquery))
return t.cast(t.List[t.Tuple[str, str]], query.all())
25 changes: 11 additions & 14 deletions antarest/study/service.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import base64
import collections
import contextlib
import io
import json
Expand Down Expand Up @@ -701,20 +702,16 @@ def get_input_matrix_startdate(
return get_start_date(file_study, output_id, level)

def remove_duplicates(self) -> None:
study_paths: t.Dict[str, t.List[str]] = {}
for study in self.repository.get_all():
if isinstance(study, RawStudy) and not study.archived:
path = str(study.path)
if path not in study_paths:
study_paths[path] = []
study_paths[path].append(study.id)

for studies_with_same_path in study_paths.values():
if len(studies_with_same_path) > 1:
logger.info(f"Found studies {studies_with_same_path} with same path, de duplicating")
for study_name in studies_with_same_path[1:]:
logger.info(f"Removing study {study_name}")
self.repository.delete(study_name)
duplicates = self.repository.list_duplicates()
ids: t.List[str] = []
# ids with same path
duplicates_by_path = collections.defaultdict(list)
for study_id, path in duplicates:
duplicates_by_path[path].append(study_id)
for path, study_ids in duplicates_by_path.items():
ids.extend(study_ids[1:])
if ids: # Check if ids is not empty
self.repository.delete(*ids)

def sync_studies_on_disk(self, folders: t.List[StudyFolder], directory: t.Optional[Path] = None) -> None:
"""
Expand Down
2 changes: 1 addition & 1 deletion antarest/study/storage/variantstudy/model/dbmodel.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,7 @@ class VariantStudy(Study):

id: str = Column(
String(36),
ForeignKey("study.id"),
ForeignKey("study.id", ondelete="CASCADE"),
primary_key=True,
)
generation_task: t.Optional[str] = Column(String(), nullable=True)
Expand Down
2 changes: 1 addition & 1 deletion tests/integration/studies_blueprint/test_synthesis.py
Original file line number Diff line number Diff line change
Expand Up @@ -108,4 +108,4 @@ def test_variant_study(
)
assert res.status_code == 200, res.json()
duration = time.time() - start
assert 0 <= duration <= 0.1, f"Duration is {duration} seconds"
assert 0 <= duration <= 0.2, f"Duration is {duration} seconds"
42 changes: 23 additions & 19 deletions tests/storage/repository/test_study.py
Original file line number Diff line number Diff line change
@@ -1,22 +1,24 @@
from datetime import datetime

from sqlalchemy.orm import Session # type: ignore

from antarest.core.cache.business.local_chache import LocalCache
from antarest.core.model import PublicMode
from antarest.login.model import Group, User
from antarest.study.model import DEFAULT_WORKSPACE_NAME, RawStudy, Study, StudyContentStatus
from antarest.study.repository import StudyMetadataRepository
from antarest.study.storage.variantstudy.model.dbmodel import VariantStudy
from tests.helpers import with_db_context


@with_db_context
def test_lifecycle() -> None:
user = User(id=0, name="admin")
def test_lifecycle(db_session: Session) -> None:
repo = StudyMetadataRepository(LocalCache(), session=db_session)

user = User(id=1, name="admin")
group = Group(id="my-group", name="group")
repo = StudyMetadataRepository(LocalCache())

a = Study(
name="a",
version="42",
version="820",
author="John Smith",
created_at=datetime.utcnow(),
updated_at=datetime.utcnow(),
Expand All @@ -26,7 +28,7 @@ def test_lifecycle() -> None:
)
b = RawStudy(
name="b",
version="43",
version="830",
author="Morpheus",
created_at=datetime.utcnow(),
updated_at=datetime.utcnow(),
Expand All @@ -36,7 +38,7 @@ def test_lifecycle() -> None:
)
c = RawStudy(
name="c",
version="43",
version="830",
author="Trinity",
created_at=datetime.utcnow(),
updated_at=datetime.utcnow(),
Expand All @@ -47,7 +49,7 @@ def test_lifecycle() -> None:
)
d = VariantStudy(
name="d",
version="43",
version="830",
author="Mr. Anderson",
created_at=datetime.utcnow(),
updated_at=datetime.utcnow(),
Expand All @@ -57,30 +59,32 @@ def test_lifecycle() -> None:
)

a = repo.save(a)
b = repo.save(b)
a_id = a.id

repo.save(b)
repo.save(c)
repo.save(d)
assert b.id
c = repo.one(a.id)
assert a == c

c = repo.one(a_id)
assert a_id == c.id

assert len(repo.get_all()) == 4
assert len(repo.get_all_raw(exists=True)) == 1
assert len(repo.get_all_raw(exists=False)) == 1
assert len(repo.get_all_raw()) == 2

repo.delete(a.id)
assert repo.get(a.id) is None
repo.delete(a_id)
assert repo.get(a_id) is None


def test_study_inheritance(db_session: Session) -> None:
repo = StudyMetadataRepository(LocalCache(), session=db_session)

@with_db_context
def test_study_inheritance() -> None:
user = User(id=0, name="admin")
group = Group(id="my-group", name="group")
repo = StudyMetadataRepository(LocalCache())
a = RawStudy(
name="a",
version="42",
version="820",
author="John Smith",
created_at=datetime.utcnow(),
updated_at=datetime.utcnow(),
Expand Down
Loading

0 comments on commit 5c269c6

Please sign in to comment.