feat(study-search): optimize the studies search engine (#1890)

Merge pull request #1890 from AntaresSimulatorTeam/feature/ANT-940-study-search-improvement
AntaresSimulatorTeam · Jan 26, 2024 · 37d0f5a · 37d0f5a
2 parents 1b81da3 + 1429686
commit 37d0f5a
Show file tree

Hide file tree

Showing 14 changed files with 1,764 additions and 199 deletions.
diff --git a/antarest/launcher/service.py b/antarest/launcher/service.py
@@ -38,6 +38,7 @@
     XpansionParametersDTO,
 )
 from antarest.launcher.repository import JobResultRepository
+from antarest.study.repository import StudyFilter
 from antarest.study.service import StudyService
 from antarest.study.storage.utils import assert_permission, extract_output_name, find_single_output_path
 
@@ -305,8 +306,14 @@ def _filter_from_user_permission(self, job_results: List[JobResult], user: Optio
         orphan_visibility_threshold = datetime.utcnow() - timedelta(days=ORPHAN_JOBS_VISIBILITY_THRESHOLD)
         allowed_job_results = []
 
-        studies_ids = [job_result.study_id for job_result in job_results]
-        studies = {study.id: study for study in self.study_service.repository.get_all(studies_ids=studies_ids)}
+        study_ids = [job_result.study_id for job_result in job_results]
+        if study_ids:
+            studies = {
+                study.id: study
+                for study in self.study_service.repository.get_all(study_filter=StudyFilter(study_ids=study_ids))
+            }
+        else:
+            studies = {}
 
         for job_result in job_results:
             if job_result.study_id in studies:

diff --git a/antarest/study/repository.py b/antarest/study/repository.py
@@ -1,18 +1,97 @@
 import datetime
+import enum
 import logging
 import typing as t
 
-from sqlalchemy import and_, or_  # type: ignore
+from pydantic import BaseModel, NonNegativeInt
+from sqlalchemy import func, not_, or_  # type: ignore
 from sqlalchemy.orm import Session, joinedload, with_polymorphic  # type: ignore
 
 from antarest.core.interfaces.cache import CacheConstants, ICache
 from antarest.core.utils.fastapi_sqlalchemy import db
+from antarest.login.model import Group
 from antarest.study.common.utils import get_study_information
 from antarest.study.model import DEFAULT_WORKSPACE_NAME, RawStudy, Study, StudyAdditionalData
 
 logger = logging.getLogger(__name__)
 
 
+def escape_like(string: str, escape_char: str = "\\") -> str:
+    """
+    Escape the string parameter used in SQL LIKE expressions.
+
+    Examples::
+
+        from sqlalchemy_utils import escape_like
+
+        query = session.query(User).filter(
+            User.name.ilike(escape_like('John'))
+        )
+
+    Args:
+        string: a string to escape
+        escape_char: escape character
+
+    Returns:
+        Escaped string.
+    """
+    return string.replace(escape_char, escape_char * 2).replace("%", escape_char + "%").replace("_", escape_char + "_")
+
+
+class StudyFilter(BaseModel, frozen=True, extra="forbid"):
+    """Study filter class gathering the main filtering parameters
+
+    Attributes:
+        name: optional name regex of the study to match
+        managed: indicate if just managed studies should be retrieved
+        archived: optional if the study is archived
+        variant: optional if the study is raw study
+        versions: versions to filter by
+        users: users to filter by
+        groups: groups to filter by
+        tags: tags to filter by
+        study_ids: study IDs to filter by
+        exists: if raw study missing
+        workspace: optional workspace of the study
+        folder: optional folder prefix of the study
+    """
+
+    name: str = ""
+    managed: t.Optional[bool] = None
+    archived: t.Optional[bool] = None
+    variant: t.Optional[bool] = None
+    versions: t.Sequence[str] = ()
+    users: t.Sequence[int] = ()
+    groups: t.Sequence[str] = ()
+    tags: t.Sequence[str] = ()
+    study_ids: t.Sequence[str] = ()
+    exists: t.Optional[bool] = None
+    workspace: str = ""
+    folder: str = ""
+
+
+class StudySortBy(str, enum.Enum):
+    """How to sort the results of studies query results"""
+
+    NAME_ASC = "+name"
+    NAME_DESC = "-name"
+    DATE_ASC = "+date"
+    DATE_DESC = "-date"
+
+
+class StudyPagination(BaseModel, frozen=True, extra="forbid"):
+    """
+    Pagination of a studies query results
+
+    Attributes:
+        page_nb: offset
+        page_size: SQL limit
+    """
+
+    page_nb: NonNegativeInt = 0
+    page_size: NonNegativeInt = 0
+
+
 class StudyMetadataRepository:
     """
     Database connector to manage Study entity
@@ -70,6 +149,9 @@ def refresh(self, metadata: Study) -> None:
 
     def get(self, id: str) -> t.Optional[Study]:
         """Get the study by ID or return `None` if not found in database."""
+        # todo: I think we should use a `entity = with_polymorphic(Study, "*")`
+        #  to make sure RawStudy and VariantStudy fields are also fetched.
+        #  see: antarest.study.service.StudyService.delete_study
         # When we fetch a study, we also need to fetch the associated owner and groups
         # to check the permissions of the current user efficiently.
         study: Study = (
@@ -84,6 +166,9 @@ def get(self, id: str) -> t.Optional[Study]:
 
     def one(self, study_id: str) -> Study:
         """Get the study by ID or raise `sqlalchemy.exc.NoResultFound` if not found in database."""
+        # todo: I think we should use a `entity = with_polymorphic(Study, "*")`
+        #  to make sure RawStudy and VariantStudy fields are also fetched.
+        #  see: antarest.study.service.StudyService.delete_study
         # When we fetch a study, we also need to fetch the associated owner and groups
         # to check the permissions of the current user efficiently.
         study: Study = (
@@ -101,37 +186,94 @@ def get_additional_data(self, study_id: str) -> t.Optional[StudyAdditionalData]:
 
     def get_all(
         self,
-        managed: t.Optional[bool] = None,
-        studies_ids: t.Optional[t.List[str]] = None,
-        exists: bool = True,
+        study_filter: StudyFilter = StudyFilter(),
+        sort_by: t.Optional[StudySortBy] = None,
+        pagination: StudyPagination = StudyPagination(),
     ) -> t.List[Study]:
+        """
+        This function goal is to create a search engine throughout the studies with optimal
+        runtime.
+
+        Args:
+            study_filter: composed of all filtering criteria
+            sort_by: how the user would like the results to be sorted
+            pagination: specifies the number of results to displayed in each page and the actually displayed page
+
+        Returns:
+            The matching studies in proper order and pagination
+        """
         # When we fetch a study, we also need to fetch the associated owner and groups
         # to check the permissions of the current user efficiently.
         # We also need to fetch the additional data to display the study information
         # efficiently (see: `utils.get_study_information`)
         entity = with_polymorphic(Study, "*")
 
+        # noinspection PyTypeChecker
         q = self.session.query(entity)
-        if exists:
-            q = q.filter(RawStudy.missing.is_(None))
+        if study_filter.exists is not None:
+            if study_filter.exists:
+                q = q.filter(RawStudy.missing.is_(None))
+            else:
+                q = q.filter(not_(RawStudy.missing.is_(None)))
         q = q.options(joinedload(entity.owner))
         q = q.options(joinedload(entity.groups))
         q = q.options(joinedload(entity.additional_data))
-        if managed is not None:
-            if managed:
+        if study_filter.managed is not None:
+            if study_filter.managed:
                 q = q.filter(or_(entity.type == "variantstudy", RawStudy.workspace == DEFAULT_WORKSPACE_NAME))
             else:
                 q = q.filter(entity.type == "rawstudy")
                 q = q.filter(RawStudy.workspace != DEFAULT_WORKSPACE_NAME)
-        if studies_ids is not None:
-            q = q.filter(entity.id.in_(studies_ids))
+        if study_filter.study_ids:
+            q = q.filter(entity.id.in_(study_filter.study_ids))
+        if study_filter.users:
+            q = q.filter(entity.owner_id.in_(study_filter.users))
+        if study_filter.groups:
+            q = q.join(entity.groups).filter(Group.id.in_(study_filter.groups))
+        if study_filter.archived is not None:
+            q = q.filter(entity.archived == study_filter.archived)
+        if study_filter.name:
+            regex = f"%{escape_like(study_filter.name)}%"
+            q = q.filter(entity.name.ilike(regex))
+        if study_filter.folder:
+            regex = f"{escape_like(study_filter.folder)}%"
+            q = q.filter(entity.folder.ilike(regex))
+        if study_filter.workspace:
+            q = q.filter(RawStudy.workspace == study_filter.workspace)
+        if study_filter.variant is not None:
+            if study_filter.variant:
+                q = q.filter(entity.type == "variantstudy")
+            else:
+                q = q.filter(entity.type == "rawstudy")
+        if study_filter.versions:
+            q = q.filter(entity.version.in_(study_filter.versions))
+
+        if sort_by:
+            if sort_by == StudySortBy.DATE_DESC:
+                q = q.order_by(entity.created_at.desc())
+            elif sort_by == StudySortBy.DATE_ASC:
+                q = q.order_by(entity.created_at.asc())
+            elif sort_by == StudySortBy.NAME_DESC:
+                q = q.order_by(func.upper(entity.name).desc())
+            elif sort_by == StudySortBy.NAME_ASC:
+                q = q.order_by(func.upper(entity.name).asc())
+            else:
+                raise NotImplementedError(sort_by)
+
+        # pagination
+        if pagination.page_nb or pagination.page_size:
+            q = q.offset(pagination.page_nb * pagination.page_size).limit(pagination.page_size)
+
         studies: t.List[Study] = q.all()
         return studies
 
-    def get_all_raw(self, show_missing: bool = True) -> t.List[RawStudy]:
+    def get_all_raw(self, exists: t.Optional[bool] = None) -> t.List[RawStudy]:
         query = self.session.query(RawStudy)
-        if not show_missing:
-            query = query.filter(RawStudy.missing.is_(None))
+        if exists is not None:
+            if exists:
+                query = query.filter(RawStudy.missing.is_(None))
+            else:
+                query = query.filter(not_(RawStudy.missing.is_(None)))
         studies: t.List[RawStudy] = query.all()
         return studies