diff --git a/.github/workflows/commitlint.yml b/.github/workflows/commitlint.yml new file mode 100644 index 0000000000..8e08ce865c --- /dev/null +++ b/.github/workflows/commitlint.yml @@ -0,0 +1,13 @@ +name: Lint Commit Messages +on: [pull_request] + +permissions: + contents: read + pull-requests: read + +jobs: + commitlint: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v3 + - uses: wagoid/commitlint-github-action@v5 diff --git a/alembic/versions/dae93f1d9110_populate_tag_and_study_tag_tables_with_.py b/alembic/versions/dae93f1d9110_populate_tag_and_study_tag_tables_with_.py new file mode 100644 index 0000000000..c6c29d3716 --- /dev/null +++ b/alembic/versions/dae93f1d9110_populate_tag_and_study_tag_tables_with_.py @@ -0,0 +1,104 @@ +""" +Populate `tag` and `study_tag` tables from `patch` field in `study_additional_data` table + +Revision ID: dae93f1d9110 +Revises: 3c70366b10ea +Create Date: 2024-02-08 10:30:20.590919 +""" +import collections +import itertools +import json +import secrets + +import sqlalchemy as sa # type: ignore +from alembic import op +from sqlalchemy.engine import Connection # type: ignore + +from antarest.study.css4_colors import COLOR_NAMES + +# revision identifiers, used by Alembic. +revision = "dae93f1d9110" +down_revision = "3c70366b10ea" +branch_labels = None +depends_on = None + + +def upgrade() -> None: + """ + Populate `tag` and `study_tag` tables from `patch` field in `study_additional_data` table + + Four steps to proceed: + - Retrieve study-tags pairs from patches in `study_additional_data`. + - Delete all rows in `tag` and `study_tag`, as tag updates between revised 3c70366b10ea and this version, + do modify the data in patches alongside the two previous tables. + - Populate `tag` table using unique tag-labels and by randomly generating their associated colors. + - Populate `study_tag` using study-tags pairs. + """ + + # create connexion to the db + connexion: Connection = op.get_bind() + + # retrieve the tags and the study-tag pairs from the db + study_tags = connexion.execute("SELECT study_id,patch FROM study_additional_data") + tags_by_ids = {} + for study_id, patch in study_tags: + obj = json.loads(patch or "{}") + study = obj.get("study") or {} + tags = frozenset(study.get("tags") or ()) + tags_by_ids[study_id] = tags + + # delete rows in tables `tag` and `study_tag` + connexion.execute("DELETE FROM study_tag") + connexion.execute("DELETE FROM tag") + + # insert the tags in the `tag` table + labels = set(itertools.chain.from_iterable(tags_by_ids.values())) + bulk_tags = [{"label": label, "color": secrets.choice(COLOR_NAMES)} for label in labels] + if bulk_tags: + sql = sa.text("INSERT INTO tag (label, color) VALUES (:label, :color)") + connexion.execute(sql, *bulk_tags) + + # Create relationships between studies and tags in the `study_tag` table + bulk_study_tags = [{"study_id": id_, "tag_label": lbl} for id_, tags in tags_by_ids.items() for lbl in tags] + if bulk_study_tags: + sql = sa.text("INSERT INTO study_tag (study_id, tag_label) VALUES (:study_id, :tag_label)") + connexion.execute(sql, *bulk_study_tags) + + +def downgrade() -> None: + """ + Restore `patch` field in `study_additional_data` from `tag` and `study_tag` tables + + Three steps to proceed: + - Retrieve study-tags pairs from `study_tag` table. + - Update patches study-tags in `study_additional_data` using these pairs. + - Delete all rows from `tag` and `study_tag`. + """ + # create a connection to the db + connexion: Connection = op.get_bind() + + # Creating the `tags_by_ids` mapping from data in the `study_tags` table + tags_by_ids = collections.defaultdict(set) + study_tags = connexion.execute("SELECT study_id, tag_label FROM study_tag") + for study_id, tag_label in study_tags: + tags_by_ids[study_id].add(tag_label) + + # Then, we read objects from the `patch` field of the `study_additional_data` table + objects_by_ids = {} + study_tags = connexion.execute("SELECT study_id, patch FROM study_additional_data") + for study_id, patch in study_tags: + obj = json.loads(patch or "{}") + obj["study"] = obj.get("study") or {} + obj["study"]["tags"] = obj["study"].get("tags") or [] + obj["study"]["tags"] = sorted(tags_by_ids[study_id] | set(obj["study"]["tags"])) + objects_by_ids[study_id] = obj + + # Updating objects in the `study_additional_data` table + bulk_patches = [{"study_id": id_, "patch": json.dumps(obj)} for id_, obj in objects_by_ids.items()] + if bulk_patches: + sql = sa.text("UPDATE study_additional_data SET patch = :patch WHERE study_id = :study_id") + connexion.execute(sql, *bulk_patches) + + # Deleting study_tags and tags + connexion.execute("DELETE FROM study_tag") + connexion.execute("DELETE FROM tag") diff --git a/alembic/versions/fd73601a9075_add_delete_cascade_studies.py b/alembic/versions/fd73601a9075_add_delete_cascade_studies.py new file mode 100644 index 0000000000..3f9f42c684 --- /dev/null +++ b/alembic/versions/fd73601a9075_add_delete_cascade_studies.py @@ -0,0 +1,86 @@ +""" +Add delete cascade constraint to study foreign keys + +Revision ID: fd73601a9075 +Revises: 3c70366b10ea +Create Date: 2024-02-12 17:27:37.314443 +""" +import sqlalchemy as sa # type: ignore +from alembic import op + +# revision identifiers, used by Alembic. +revision = "fd73601a9075" +down_revision = "dae93f1d9110" +branch_labels = None +depends_on = None + +# noinspection SpellCheckingInspection +RAWSTUDY_FK = "rawstudy_id_fkey" + +# noinspection SpellCheckingInspection +VARIANTSTUDY_FK = "variantstudy_id_fkey" + +# noinspection SpellCheckingInspection +STUDY_ADDITIONAL_DATA_FK = "study_additional_data_study_id_fkey" + + +def upgrade() -> None: + dialect_name: str = op.get_context().dialect.name + + # SQLite doesn't support dropping foreign keys, so we need to ignore it here + if dialect_name == "postgresql": + with op.batch_alter_table("rawstudy", schema=None) as batch_op: + batch_op.drop_constraint(RAWSTUDY_FK, type_="foreignkey") + batch_op.create_foreign_key(RAWSTUDY_FK, "study", ["id"], ["id"], ondelete="CASCADE") + + with op.batch_alter_table("study_additional_data", schema=None) as batch_op: + batch_op.drop_constraint(STUDY_ADDITIONAL_DATA_FK, type_="foreignkey") + batch_op.create_foreign_key(STUDY_ADDITIONAL_DATA_FK, "study", ["study_id"], ["id"], ondelete="CASCADE") + + with op.batch_alter_table("variantstudy", schema=None) as batch_op: + batch_op.drop_constraint(VARIANTSTUDY_FK, type_="foreignkey") + batch_op.create_foreign_key(VARIANTSTUDY_FK, "study", ["id"], ["id"], ondelete="CASCADE") + + with op.batch_alter_table("group_metadata", schema=None) as batch_op: + batch_op.alter_column("group_id", existing_type=sa.VARCHAR(length=36), nullable=False) + batch_op.alter_column("study_id", existing_type=sa.VARCHAR(length=36), nullable=False) + batch_op.create_index(batch_op.f("ix_group_metadata_group_id"), ["group_id"], unique=False) + batch_op.create_index(batch_op.f("ix_group_metadata_study_id"), ["study_id"], unique=False) + if dialect_name == "postgresql": + batch_op.drop_constraint("group_metadata_group_id_fkey", type_="foreignkey") + batch_op.drop_constraint("group_metadata_study_id_fkey", type_="foreignkey") + batch_op.create_foreign_key( + "group_metadata_group_id_fkey", "groups", ["group_id"], ["id"], ondelete="CASCADE" + ) + batch_op.create_foreign_key( + "group_metadata_study_id_fkey", "study", ["study_id"], ["id"], ondelete="CASCADE" + ) + + +def downgrade() -> None: + dialect_name: str = op.get_context().dialect.name + # SQLite doesn't support dropping foreign keys, so we need to ignore it here + if dialect_name == "postgresql": + with op.batch_alter_table("rawstudy", schema=None) as batch_op: + batch_op.drop_constraint(RAWSTUDY_FK, type_="foreignkey") + batch_op.create_foreign_key(RAWSTUDY_FK, "study", ["id"], ["id"]) + + with op.batch_alter_table("study_additional_data", schema=None) as batch_op: + batch_op.drop_constraint(STUDY_ADDITIONAL_DATA_FK, type_="foreignkey") + batch_op.create_foreign_key(STUDY_ADDITIONAL_DATA_FK, "study", ["study_id"], ["id"]) + + with op.batch_alter_table("variantstudy", schema=None) as batch_op: + batch_op.drop_constraint(VARIANTSTUDY_FK, type_="foreignkey") + batch_op.create_foreign_key(VARIANTSTUDY_FK, "study", ["id"], ["id"]) + + with op.batch_alter_table("group_metadata", schema=None) as batch_op: + # SQLite doesn't support dropping foreign keys, so we need to ignore it here + if dialect_name == "postgresql": + batch_op.drop_constraint("group_metadata_study_id_fkey", type_="foreignkey") + batch_op.drop_constraint("group_metadata_group_id_fkey", type_="foreignkey") + batch_op.create_foreign_key("group_metadata_study_id_fkey", "study", ["study_id"], ["id"]) + batch_op.create_foreign_key("group_metadata_group_id_fkey", "groups", ["group_id"], ["id"]) + batch_op.drop_index(batch_op.f("ix_group_metadata_study_id")) + batch_op.drop_index(batch_op.f("ix_group_metadata_group_id")) + batch_op.alter_column("study_id", existing_type=sa.VARCHAR(length=36), nullable=True) + batch_op.alter_column("group_id", existing_type=sa.VARCHAR(length=36), nullable=True) diff --git a/antarest/core/filetransfer/service.py b/antarest/core/filetransfer/service.py index 760573a42f..80a81e6927 100644 --- a/antarest/core/filetransfer/service.py +++ b/antarest/core/filetransfer/service.py @@ -43,6 +43,8 @@ def request_download( filename: str, name: Optional[str] = None, owner: Optional[JWTUser] = None, + use_notification: bool = True, + expiration_time_in_minutes: int = 0, ) -> FileDownload: fh, path = tempfile.mkstemp(dir=self.tmp_dir, suffix=filename) os.close(fh) @@ -55,36 +57,40 @@ def request_download( path=str(tmpfile), owner=owner.impersonator if owner is not None else None, expiration_date=datetime.datetime.utcnow() - + datetime.timedelta(minutes=self.download_default_expiration_timeout_minutes), + + datetime.timedelta( + minutes=expiration_time_in_minutes or self.download_default_expiration_timeout_minutes + ), ) self.repository.add(download) - self.event_bus.push( - Event( - type=EventType.DOWNLOAD_CREATED, - payload=download.to_dto(), - permissions=PermissionInfo(owner=owner.impersonator) - if owner - else PermissionInfo(public_mode=PublicMode.READ), + if use_notification: + self.event_bus.push( + Event( + type=EventType.DOWNLOAD_CREATED, + payload=download.to_dto(), + permissions=PermissionInfo(owner=owner.impersonator) + if owner + else PermissionInfo(public_mode=PublicMode.READ), + ) ) - ) return download - def set_ready(self, download_id: str) -> None: + def set_ready(self, download_id: str, use_notification: bool = True) -> None: download = self.repository.get(download_id) if not download: raise FileDownloadNotFound() download.ready = True self.repository.save(download) - self.event_bus.push( - Event( - type=EventType.DOWNLOAD_READY, - payload=download.to_dto(), - permissions=PermissionInfo(owner=download.owner) - if download.owner - else PermissionInfo(public_mode=PublicMode.READ), + if use_notification: + self.event_bus.push( + Event( + type=EventType.DOWNLOAD_READY, + payload=download.to_dto(), + permissions=PermissionInfo(owner=download.owner) + if download.owner + else PermissionInfo(public_mode=PublicMode.READ), + ) ) - ) def fail(self, download_id: str, reason: str = "") -> None: download = self.repository.get(download_id) diff --git a/antarest/study/model.py b/antarest/study/model.py index fe10b4f211..5079198296 100644 --- a/antarest/study/model.py +++ b/antarest/study/model.py @@ -16,7 +16,6 @@ Integer, PrimaryKeyConstraint, String, - Table, ) from sqlalchemy.orm import relationship # type: ignore @@ -50,12 +49,31 @@ NEW_DEFAULT_STUDY_VERSION: str = "860" -groups_metadata = Table( - "group_metadata", - Base.metadata, - Column("group_id", String(36), ForeignKey("groups.id")), - Column("study_id", String(36), ForeignKey("study.id")), -) + +class StudyGroup(Base): # type:ignore + """ + A table to manage the many-to-many relationship between `Study` and `Group` + + Attributes: + study_id: The ID of the study associated with the group. + group_id: The IS of the group associated with the study. + """ + + __tablename__ = "group_metadata" + __table_args__ = (PrimaryKeyConstraint("study_id", "group_id"),) + + group_id: str = Column(String(36), ForeignKey("groups.id", ondelete="CASCADE"), index=True, nullable=False) + study_id: str = Column(String(36), ForeignKey("study.id", ondelete="CASCADE"), index=True, nullable=False) + + def __str__(self) -> str: # pragma: no cover + cls_name = self.__class__.__name__ + return f"[{cls_name}] study_id={self.study_id}, group={self.group_id}" + + def __repr__(self) -> str: # pragma: no cover + cls_name = self.__class__.__name__ + study_id = self.study_id + group_id = self.group_id + return f"{cls_name}({study_id=}, {group_id=})" class StudyTag(Base): # type:ignore @@ -63,8 +81,8 @@ class StudyTag(Base): # type:ignore A table to manage the many-to-many relationship between `Study` and `Tag` Attributes: - study_id (str): The ID of the study associated with the tag. - tag_label (str): The label of the tag associated with the study. + study_id: The ID of the study associated with the tag. + tag_label: The label of the tag associated with the study. """ __tablename__ = "study_tag" @@ -74,7 +92,8 @@ class StudyTag(Base): # type:ignore tag_label: str = Column(String(40), ForeignKey("tag.label", ondelete="CASCADE"), index=True, nullable=False) def __str__(self) -> str: # pragma: no cover - return f"[StudyTag] study_id={self.study_id}, tag={self.tag}" + cls_name = self.__class__.__name__ + return f"[{cls_name}] study_id={self.study_id}, tag={self.tag}" def __repr__(self) -> str: # pragma: no cover cls_name = self.__class__.__name__ @@ -90,8 +109,8 @@ class Tag(Base): # type:ignore This class is used to store tags associated with studies. Attributes: - label (str): The label of the tag. - color (str): The color code associated with the tag. + label: The label of the tag. + color: The color code associated with the tag. """ __tablename__ = "tag" @@ -130,7 +149,7 @@ class StudyAdditionalData(Base): # type:ignore study_id = Column( String(36), - ForeignKey("study.id"), + ForeignKey("study.id", ondelete="CASCADE"), primary_key=True, ) author = Column(String(255), default="Unknown") @@ -174,7 +193,7 @@ class Study(Base): # type: ignore tags: t.List[Tag] = relationship(Tag, secondary=StudyTag.__table__, back_populates="studies") owner = relationship(Identity, uselist=False) - groups = relationship(Group, secondary=lambda: groups_metadata, cascade="") + groups = relationship(Group, secondary=StudyGroup.__table__, cascade="") additional_data = relationship( StudyAdditionalData, uselist=False, @@ -230,7 +249,7 @@ class RawStudy(Study): id = Column( String(36), - ForeignKey("study.id"), + ForeignKey("study.id", ondelete="CASCADE"), primary_key=True, ) content_status = Column(Enum(StudyContentStatus)) diff --git a/antarest/study/repository.py b/antarest/study/repository.py index 3aa6e60681..9d6c9317fb 100644 --- a/antarest/study/repository.py +++ b/antarest/study/repository.py @@ -138,7 +138,7 @@ def save( def refresh(self, metadata: Study) -> None: self.session.refresh(metadata) - def get(self, id: str) -> t.Optional[Study]: + def get(self, study_id: str) -> t.Optional[Study]: """Get the study by ID or return `None` if not found in database.""" # todo: I think we should use a `entity = with_polymorphic(Study, "*")` # to make sure RawStudy and VariantStudy fields are also fetched. @@ -146,13 +146,11 @@ def get(self, id: str) -> t.Optional[Study]: # When we fetch a study, we also need to fetch the associated owner and groups # to check the permissions of the current user efficiently. study: Study = ( - # fmt: off self.session.query(Study) .options(joinedload(Study.owner)) .options(joinedload(Study.groups)) .options(joinedload(Study.tags)) - .get(id) - # fmt: on + .get(study_id) ) return study @@ -272,10 +270,10 @@ def get_all_raw(self, exists: t.Optional[bool] = None) -> t.Sequence[RawStudy]: studies: t.Sequence[RawStudy] = query.all() return studies - def delete(self, id: str) -> None: + def delete(self, id_: str, *ids: str) -> None: + ids = (id_,) + ids session = self.session - u: Study = session.query(Study).get(id) - session.delete(u) + session.query(Study).filter(Study.id.in_(ids)).delete(synchronize_session=False) session.commit() def update_tags(self, study: Study, new_tags: t.Sequence[str]) -> None: @@ -292,3 +290,12 @@ def update_tags(self, study: Study, new_tags: t.Sequence[str]) -> None: study.tags = [Tag(label=tag) for tag in new_labels] + existing_tags self.session.merge(study) self.session.commit() + + def list_duplicates(self) -> t.List[t.Tuple[str, str]]: + """ + Get list of duplicates as tuples (id, path). + """ + session = self.session + subquery = session.query(Study.path).group_by(Study.path).having(func.count() > 1).subquery() + query = session.query(Study.id, Study.path).filter(Study.path.in_(subquery)) + return t.cast(t.List[t.Tuple[str, str]], query.all()) diff --git a/antarest/study/service.py b/antarest/study/service.py index ae86fe62ae..9b22ae7638 100644 --- a/antarest/study/service.py +++ b/antarest/study/service.py @@ -1,4 +1,5 @@ import base64 +import collections import contextlib import io import json @@ -12,6 +13,7 @@ from uuid import uuid4 import numpy as np +import pandas as pd from fastapi import HTTPException, UploadFile from markupsafe import escape from starlette.responses import FileResponse, Response @@ -20,6 +22,7 @@ from antarest.core.exceptions import ( BadEditInstructionException, CommandApplicationError, + IncorrectPathError, NotAManagedStudyException, StudyDeletionNotAllowed, StudyNotFoundError, @@ -54,6 +57,7 @@ from antarest.study.business.areas.thermal_management import ThermalManager from antarest.study.business.binding_constraint_management import BindingConstraintManager from antarest.study.business.config_management import ConfigManager +from antarest.study.business.correlation_management import CorrelationManager from antarest.study.business.district_manager import DistrictManager from antarest.study.business.general_management import GeneralManager from antarest.study.business.link_management import LinkInfoDTO, LinkManager @@ -93,6 +97,7 @@ StudySimResultDTO, ) from antarest.study.repository import StudyFilter, StudyMetadataRepository, StudyPagination, StudySortBy +from antarest.study.storage.matrix_profile import adjust_matrix_columns_index from antarest.study.storage.rawstudy.model.filesystem.config.model import FileStudyTreeConfigDTO from antarest.study.storage.rawstudy.model.filesystem.folder_node import ChildNotFoundError from antarest.study.storage.rawstudy.model.filesystem.ini_file_node import IniFileNode @@ -268,6 +273,7 @@ def __init__( self.xpansion_manager = XpansionManager(self.storage_service) self.matrix_manager = MatrixManager(self.storage_service) self.binding_constraint_manager = BindingConstraintManager(self.storage_service) + self.correlation_manager = CorrelationManager(self.storage_service) self.cache_service = cache_service self.config = config self.on_deletion_callbacks: t.List[t.Callable[[str], None]] = [] @@ -696,20 +702,16 @@ def get_input_matrix_startdate( return get_start_date(file_study, output_id, level) def remove_duplicates(self) -> None: - study_paths: t.Dict[str, t.List[str]] = {} - for study in self.repository.get_all(): - if isinstance(study, RawStudy) and not study.archived: - path = str(study.path) - if path not in study_paths: - study_paths[path] = [] - study_paths[path].append(study.id) - - for studies_with_same_path in study_paths.values(): - if len(studies_with_same_path) > 1: - logger.info(f"Found studies {studies_with_same_path} with same path, de duplicating") - for study_name in studies_with_same_path[1:]: - logger.info(f"Removing study {study_name}") - self.repository.delete(study_name) + duplicates = self.repository.list_duplicates() + ids: t.List[str] = [] + # ids with same path + duplicates_by_path = collections.defaultdict(list) + for study_id, path in duplicates: + duplicates_by_path[path].append(study_id) + for path, study_ids in duplicates_by_path.items(): + ids.extend(study_ids[1:]) + if ids: # Check if ids is not empty + self.repository.delete(*ids) def sync_studies_on_disk(self, folders: t.List[StudyFolder], directory: t.Optional[Path] = None) -> None: """ @@ -2379,3 +2381,44 @@ def get_disk_usage(self, uuid: str, params: RequestParameters) -> int: study_path = self.storage_service.raw_study_service.get_study_path(study) # If the study is a variant, it's possible that it only exists in DB and not on disk. If so, we return 0. return get_disk_usage(study_path) if study_path.exists() else 0 + + def get_matrix_with_index_and_header( + self, *, study_id: str, path: str, with_index: bool, with_header: bool, parameters: RequestParameters + ) -> pd.DataFrame: + matrix_path = Path(path) + study = self.get_study(study_id) + + if matrix_path.parts in [("input", "hydro", "allocation"), ("input", "hydro", "correlation")]: + all_areas = t.cast( + t.List[AreaInfoDTO], + self.get_all_areas(study_id, area_type=AreaType.AREA, ui=False, params=parameters), + ) + if matrix_path.parts[-1] == "allocation": + hydro_matrix = self.allocation_manager.get_allocation_matrix(study, all_areas) + else: + hydro_matrix = self.correlation_manager.get_correlation_matrix(all_areas, study, []) # type: ignore + return pd.DataFrame(data=hydro_matrix.data, columns=hydro_matrix.columns, index=hydro_matrix.index) + + matrix_obj = self.get(study_id, path, depth=3, formatted=True, params=parameters) + if set(matrix_obj) != {"data", "index", "columns"}: + raise IncorrectPathError(f"The provided path does not point to a valid matrix: '{path}'") + if not matrix_obj["data"]: + return pd.DataFrame() + + df_matrix = pd.DataFrame(**matrix_obj) + if with_index: + matrix_index = self.get_input_matrix_startdate(study_id, path, parameters) + time_column = pd.date_range( + start=matrix_index.start_date, periods=len(df_matrix), freq=matrix_index.level.value[0] + ) + df_matrix.index = time_column + + adjust_matrix_columns_index( + df_matrix, + path, + with_index=with_index, + with_header=with_header, + study_version=int(study.version), + ) + + return df_matrix diff --git a/antarest/study/storage/matrix_profile.py b/antarest/study/storage/matrix_profile.py new file mode 100644 index 0000000000..7dc137dc10 --- /dev/null +++ b/antarest/study/storage/matrix_profile.py @@ -0,0 +1,211 @@ +import calendar +import copy +import fnmatch +import typing as t +from pathlib import Path + +import pandas as pd + + +class _MatrixProfile(t.NamedTuple): + """ + Matrix profile for time series or specific matrices. + """ + + cols: t.Sequence[str] + rows: t.Sequence[str] + + def process_dataframe( + self, + df: pd.DataFrame, + matrix_path: str, + *, + with_index: bool, + with_header: bool, + ) -> None: + """ + Adjust the column names and index of a dataframe according to the matrix profile. + + *NOTE:* The modification is done in place. + + Args: + df: The dataframe to process. + matrix_path: The path of the matrix file, relative to the study directory. + with_index: Whether to set the index of the dataframe. + with_header: Whether to set the column names of the dataframe. + """ + if with_header: + if Path(matrix_path).parts[1] == "links": + cols = self._process_links_columns(matrix_path) + else: + cols = self.cols + if cols: + df.columns = pd.Index(cols) + else: + df.columns = pd.Index([f"TS-{i}" for i in range(1, len(df.columns) + 1)]) + + if with_index and self.rows: + df.index = pd.Index(self.rows) + + def _process_links_columns(self, matrix_path: str) -> t.Sequence[str]: + """Process column names specific to the links matrices.""" + path_parts = Path(matrix_path).parts + area1_id = path_parts[2] + area2_id = path_parts[3] + result = list(self.cols) + for k, col in enumerate(result): + if col == "Hurdle costs direct": + result[k] = f"{col} ({area1_id}->{area2_id})" + elif col == "Hurdle costs indirect": + result[k] = f"{col} ({area2_id}->{area1_id})" + return result + + +_SPECIFIC_MATRICES: t.Dict[str, _MatrixProfile] +""" +The dictionary ``_SPECIFIC_MATRICES`` maps file patterns to ``_MatrixProfile`` objects, +representing non-time series matrices. +It's used in the `adjust_matrix_columns_index` method to fetch matrix profiles based on study versions. +""" + + +# noinspection SpellCheckingInspection +_SPECIFIC_MATRICES = { + "input/hydro/common/capacity/creditmodulations_*": _MatrixProfile( + cols=[str(i) for i in range(101)], + rows=["Generating Power", "Pumping Power"], + ), + "input/hydro/common/capacity/maxpower_*": _MatrixProfile( + cols=[ + "Generating Max Power (MW)", + "Generating Max Energy (Hours at Pmax)", + "Pumping Max Power (MW)", + "Pumping Max Energy (Hours at Pmax)", + ], + rows=[], + ), + "input/hydro/common/capacity/reservoir_*": _MatrixProfile( + # Values are displayed in % in the UI, but the actual values are in p.u. (per unit) + cols=["Lev Low (p.u)", "Lev Avg (p.u)", "Lev High (p.u)"], + rows=[], + ), + "input/hydro/common/capacity/waterValues_*": _MatrixProfile( + cols=[f"{i}%" for i in range(101)], + rows=[], + ), + "input/hydro/series/*/mod": _MatrixProfile(cols=[], rows=[]), + "input/hydro/series/*/ror": _MatrixProfile(cols=[], rows=[]), + "input/hydro/common/capacity/inflowPattern_*": _MatrixProfile(cols=["Inflow Pattern (X)"], rows=[]), + "input/hydro/prepro/*/energy": _MatrixProfile( + cols=["Expectation (MWh)", "Std Deviation (MWh)", "Min. (MWh)", "Max. (MWh)", "ROR Share"], + rows=calendar.month_name[1:], + ), + "input/thermal/prepro/*/*/modulation": _MatrixProfile( + cols=["Marginal cost modulation", "Market bid modulation", "Capacity modulation", "Min gen modulation"], + rows=[], + ), + "input/thermal/prepro/*/*/data": _MatrixProfile( + cols=["FO Duration", "PO Duration", "FO Rate", "PO Rate", "NPO Min", "NPO Max"], + rows=[], + ), + "input/reserves/*": _MatrixProfile( + cols=["Primary Res. (draft)", "Strategic Res. (draft)", "DSM", "Day Ahead"], + rows=[], + ), + "input/misc-gen/miscgen-*": _MatrixProfile( + cols=["CHP", "Bio Mass", "Bio Gaz", "Waste", "GeoThermal", "Other", "PSP", "ROW Balance"], + rows=[], + ), + "input/bindingconstraints/*": _MatrixProfile(cols=["<", ">", "="], rows=[]), + "input/links/*/*": _MatrixProfile( + cols=[ + "Capacités de transmission directes", + "Capacités de transmission indirectes", + "Hurdle costs direct", + "Hurdle costs indirect", + "Impedances", + "Loop flow", + "P.Shift Min", + "P.Shift Max", + ], + rows=[], + ), +} + +_SPECIFIC_MATRICES_820 = copy.deepcopy(_SPECIFIC_MATRICES) +"""Specific matrices for study version 8.2.""" + +_SPECIFIC_MATRICES_820["input/links/*/*"] = _MatrixProfile( + cols=[ + "Hurdle costs direct", + "Hurdle costs indirect", + "Impedances", + "Loop flow", + "P.Shift Min", + "P.Shift Max", + ], + rows=[], +) + +# Specific matrices for study version 8.6 +_SPECIFIC_MATRICES_860 = copy.deepcopy(_SPECIFIC_MATRICES_820) +"""Specific matrices for study version 8.6.""" + +# noinspection SpellCheckingInspection +# +_SPECIFIC_MATRICES_860["input/hydro/series/*/mingen"] = _MatrixProfile(cols=[], rows=[]) + +_SPECIFIC_MATRICES_870 = copy.deepcopy(_SPECIFIC_MATRICES_820) +"""Specific matrices for study version 8.7.""" + +# noinspection SpellCheckingInspection +# Scenarized RHS for binding constraints +_SPECIFIC_MATRICES_870["input/bindingconstraints/*"] = _MatrixProfile(cols=[], rows=[]) + + +def adjust_matrix_columns_index( + df: pd.DataFrame, matrix_path: str, with_index: bool, with_header: bool, study_version: int +) -> None: + """ + Adjust the column names and index of a dataframe according to the matrix profile. + + *NOTE:* The modification is done in place. + + Args: + df: The dataframe to process. + matrix_path: The path of the matrix file, relative to the study directory. + with_index: Whether to set the index of the dataframe. + with_header: Whether to set the column names of the dataframe. + study_version: The version of the study. + """ + # Get the matrix profiles for a given study version + if study_version < 820: + matrix_profiles = _SPECIFIC_MATRICES + elif study_version < 860: + matrix_profiles = _SPECIFIC_MATRICES_820 + elif study_version < 870: + matrix_profiles = _SPECIFIC_MATRICES_860 + else: + matrix_profiles = _SPECIFIC_MATRICES_870 + + # Apply the matrix profile to the dataframe to adjust the column names and index + for pattern, matrix_profile in matrix_profiles.items(): + if fnmatch.fnmatch(matrix_path, pattern): + matrix_profile.process_dataframe( + df, + matrix_path, + with_index=with_index, + with_header=with_header, + ) + return + + if fnmatch.fnmatch(matrix_path, "output/*"): + # Outputs already have their own column names + return + + # The matrix may be a time series, in which case we don't need to adjust anything + # (the "Time" columns is already the index) + # Column names should be Monte-Carlo years: "TS-1", "TS-2", ... + df.columns = pd.Index([f"TS-{i}" for i in range(1, len(df.columns) + 1)]) + + return None diff --git a/antarest/study/storage/utils.py b/antarest/study/storage/utils.py index a0fc7a02fe..365eb1f370 100644 --- a/antarest/study/storage/utils.py +++ b/antarest/study/storage/utils.py @@ -243,30 +243,9 @@ def assert_permission( MATRIX_INPUT_DAYS_COUNT = 365 -MONTHS = ( - "January", - "February", - "March", - "April", - "May", - "June", - "July", - "August", - "September", - "October", - "November", - "December", -) +MONTHS = calendar.month_name[1:] -DAY_NAMES = ( - "Monday", - "Tuesday", - "Wednesday", - "Thursday", - "Friday", - "Saturday", - "Sunday", -) +DAY_NAMES = calendar.day_name[:] def get_start_date( @@ -293,7 +272,7 @@ def get_start_date( starting_month_index = MONTHS.index(starting_month.title()) + 1 starting_day_index = DAY_NAMES.index(starting_day.title()) - target_year = 2000 + target_year = 2018 while True: if leapyear == calendar.isleap(target_year): first_day = datetime(target_year, starting_month_index, 1) diff --git a/antarest/study/storage/variantstudy/model/dbmodel.py b/antarest/study/storage/variantstudy/model/dbmodel.py index bbe264f89f..9272eb797f 100644 --- a/antarest/study/storage/variantstudy/model/dbmodel.py +++ b/antarest/study/storage/variantstudy/model/dbmodel.py @@ -77,7 +77,7 @@ class VariantStudy(Study): id: str = Column( String(36), - ForeignKey("study.id"), + ForeignKey("study.id", ondelete="CASCADE"), primary_key=True, ) generation_task: t.Optional[str] = Column(String(), nullable=True) diff --git a/antarest/study/web/raw_studies_blueprint.py b/antarest/study/web/raw_studies_blueprint.py index 41e214d1ad..d452a53e9e 100644 --- a/antarest/study/web/raw_studies_blueprint.py +++ b/antarest/study/web/raw_studies_blueprint.py @@ -2,14 +2,16 @@ import io import json import logging -import pathlib import typing as t +from pathlib import Path, PurePosixPath +import pandas as pd from fastapi import APIRouter, Body, Depends, File, HTTPException from fastapi.params import Param, Query -from starlette.responses import JSONResponse, PlainTextResponse, Response, StreamingResponse +from starlette.responses import FileResponse, JSONResponse, PlainTextResponse, Response, StreamingResponse from antarest.core.config import Config +from antarest.core.filetransfer.model import FileDownloadNotFound from antarest.core.jwt import JWTUser from antarest.core.model import SUB_JSON from antarest.core.requests import RequestParameters @@ -17,6 +19,7 @@ from antarest.core.utils.utils import sanitize_uuid from antarest.core.utils.web import APITag from antarest.login.auth import Auth +from antarest.study.business.enum_ignore_case import EnumIgnoreCase from antarest.study.service import StudyService logger = logging.getLogger(__name__) @@ -49,6 +52,54 @@ } +class TableExportFormat(EnumIgnoreCase): + """Export format for tables.""" + + XLSX = "xlsx" + TSV = "tsv" + + def __str__(self) -> str: + """Return the format as a string for display.""" + return self.value.title() + + @property + def media_type(self) -> str: + """Return the media type used for the HTTP response.""" + if self == TableExportFormat.XLSX: + # noinspection SpellCheckingInspection + return "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet" + elif self == TableExportFormat.TSV: + return "text/tab-separated-values" + else: # pragma: no cover + raise NotImplementedError(f"Export format '{self}' is not implemented") + + @property + def suffix(self) -> str: + """Return the file suffix for the format.""" + if self == TableExportFormat.XLSX: + return ".xlsx" + elif self == TableExportFormat.TSV: + return ".tsv" + else: # pragma: no cover + raise NotImplementedError(f"Export format '{self}' is not implemented") + + def export_table( + self, + df: pd.DataFrame, + export_path: t.Union[str, Path], + *, + with_index: bool = True, + with_header: bool = True, + ) -> None: + """Export a table to a file in the given format.""" + if self == TableExportFormat.XLSX: + return df.to_excel(export_path, index=with_index, header=with_header, engine="openpyxl") + elif self == TableExportFormat.TSV: + return df.to_csv(export_path, sep="\t", index=with_index, header=with_header, float_format="%.6f") + else: # pragma: no cover + raise NotImplementedError(f"Export format '{self}' is not implemented") + + def create_raw_study_routes( study_service: StudyService, config: Config, @@ -88,7 +139,7 @@ def get_study( - `formatted`: A flag specifying whether the data should be returned in a formatted manner. Returns the fetched data: a JSON object (in most cases), a plain text file - or a file attachment (Microsoft Office document, CSV/TSV file...). + or a file attachment (Microsoft Office document, TSV/TSV file...). """ logger.info( f"📘 Fetching data at {path} (depth={depth}) from study {uuid}", @@ -99,10 +150,10 @@ def get_study( if isinstance(output, bytes): # Guess the suffix form the target data - resource_path = pathlib.PurePosixPath(path) + resource_path = PurePosixPath(path) parent_cfg = study_service.get(uuid, str(resource_path.parent), depth=2, formatted=True, params=parameters) child = parent_cfg[resource_path.name] - suffix = pathlib.PurePosixPath(child).suffix + suffix = PurePosixPath(child).suffix content_type, encoding = CONTENT_TYPES.get(suffix, (None, None)) if content_type == "application/json": @@ -243,4 +294,67 @@ def validate( ) return study_service.check_errors(uuid) + @bp.get( + "/studies/{uuid}/raw/download", + summary="Download a matrix in a given format", + tags=[APITag.study_raw_data], + ) + def get_matrix( + uuid: str, + matrix_path: str = Query( # type: ignore + ..., alias="path", description="Relative path of the matrix to download", title="Matrix Path" + ), + export_format: TableExportFormat = Query( # type: ignore + TableExportFormat.XLSX, alias="format", description="Export format", title="Export Format" + ), + with_header: bool = Query( # type: ignore + True, alias="header", description="Whether to include the header or not", title="With Header" + ), + with_index: bool = Query( # type: ignore + True, alias="index", description="Whether to include the index or not", title="With Index" + ), + current_user: JWTUser = Depends(auth.get_current_user), + ) -> FileResponse: + parameters = RequestParameters(user=current_user) + df_matrix = study_service.get_matrix_with_index_and_header( + study_id=uuid, + path=matrix_path, + with_index=with_index, + with_header=with_header, + parameters=parameters, + ) + + matrix_name = Path(matrix_path).stem + export_file_download = study_service.file_transfer_manager.request_download( + f"{matrix_name}{export_format.suffix}", + f"Exporting matrix '{matrix_name}' to {export_format} format for study '{uuid}'", + current_user, + use_notification=False, + expiration_time_in_minutes=10, + ) + export_path = Path(export_file_download.path) + export_id = export_file_download.id + + try: + export_format.export_table(df_matrix, export_path, with_index=with_index, with_header=with_header) + study_service.file_transfer_manager.set_ready(export_id, use_notification=False) + except ValueError as e: + study_service.file_transfer_manager.fail(export_id, str(e)) + raise HTTPException( + status_code=http.HTTPStatus.UNPROCESSABLE_ENTITY, + detail=f"Cannot replace '{export_path}' due to Excel policy: {e}", + ) from e + except FileDownloadNotFound as e: + study_service.file_transfer_manager.fail(export_id, str(e)) + raise HTTPException( + status_code=http.HTTPStatus.UNPROCESSABLE_ENTITY, + detail=f"The file download does not exist in database :{str(e)}", + ) from e + + return FileResponse( + export_path, + headers={"Content-Disposition": f'attachment; filename="{export_file_download.filename}"'}, + media_type=export_format.media_type, + ) + return bp diff --git a/docs/assets/media/user-guide/study/areas/05-hydro.min-generation.series.png b/docs/assets/media/user-guide/study/areas/05-hydro.min-generation.series.png new file mode 100644 index 0000000000..23372726ab Binary files /dev/null and b/docs/assets/media/user-guide/study/areas/05-hydro.min-generation.series.png differ diff --git a/docs/user-guide/study/areas/05-hydro.md b/docs/user-guide/study/areas/05-hydro.md index 40a02d4eca..c2a535bb2c 100644 --- a/docs/user-guide/study/areas/05-hydro.md +++ b/docs/user-guide/study/areas/05-hydro.md @@ -71,3 +71,9 @@ This tab allows you to configure the hydro storage time series of the hydraulic ## Run of River This tab allows you to configure the run of river time series of the hydraulic generators. + +## Minimum Generation + +The "Min Gen." tab is dedicated to configuring the minimum generation levels of the hydraulic generators. This tab presents a time series that represents the minimum hourly production for one or more Monte-Carlo years. + +![05-hydro.min-generation.series.png](../../../assets/media/user-guide/study/areas/05-hydro.min-generation.series.png) \ No newline at end of file diff --git a/requirements.txt b/requirements.txt index 4e12840d32..2d8f4be828 100644 --- a/requirements.txt +++ b/requirements.txt @@ -13,6 +13,7 @@ Jinja2~=3.0.3 jsonref~=0.2 MarkupSafe~=2.0.1 numpy~=1.22.1 +openpyxl~=3.1.2 pandas~=1.4.0 paramiko~=2.12.0 plyer~=2.0.0 diff --git a/scripts/rollback.sh b/scripts/rollback.sh index bf92685dc4..46d04a7966 100755 --- a/scripts/rollback.sh +++ b/scripts/rollback.sh @@ -12,5 +12,5 @@ CUR_DIR=$(cd "$(dirname "$0")" && pwd) BASE_DIR=$(dirname "$CUR_DIR") cd "$BASE_DIR" -alembic downgrade 1f5db5dfad80 +alembic downgrade 3c70366b10ea cd - diff --git a/tests/integration/raw_studies_blueprint/test_download_matrices.py b/tests/integration/raw_studies_blueprint/test_download_matrices.py new file mode 100644 index 0000000000..ca2c501374 --- /dev/null +++ b/tests/integration/raw_studies_blueprint/test_download_matrices.py @@ -0,0 +1,400 @@ +import datetime +import io +import typing as t + +import numpy as np +import pandas as pd +import pytest +from starlette.testclient import TestClient + +from antarest.core.tasks.model import TaskStatus +from tests.integration.utils import wait_task_completion + + +class Proxy: + def __init__(self, client: TestClient, user_access_token: str): + self.client = client + self.user_access_token = user_access_token + self.headers = {"Authorization": f"Bearer {user_access_token}"} + + +class PreparerProxy(Proxy): + def copy_upgrade_study(self, ref_study_id, target_version=820): + """ + Copy a study in the managed workspace and upgrade it to a specific version + """ + # Prepare a managed study to test specific matrices for version 8.2 + res = self.client.post( + f"/v1/studies/{ref_study_id}/copy", + params={"dest": "copied-820", "use_task": False}, + headers=self.headers, + ) + res.raise_for_status() + study_820_id = res.json() + + res = self.client.put( + f"/v1/studies/{study_820_id}/upgrade", + params={"target_version": target_version}, + headers=self.headers, + ) + res.raise_for_status() + task_id = res.json() + assert task_id + + task = wait_task_completion(self.client, self.user_access_token, task_id, timeout=20) + assert task.status == TaskStatus.COMPLETED + return study_820_id + + def upload_matrix(self, study_id: str, matrix_path: str, df: pd.DataFrame) -> None: + tsv = io.BytesIO() + df.to_csv(tsv, sep="\t", index=False, header=False) + tsv.seek(0) + # noinspection SpellCheckingInspection + res = self.client.put( + f"/v1/studies/{study_id}/raw", + params={"path": matrix_path, "create_missing": True}, + headers=self.headers, + files={"file": tsv, "create_missing": "true"}, + ) + res.raise_for_status() + + def create_variant(self, parent_id: str, *, name: str) -> str: + res = self.client.post( + f"/v1/studies/{parent_id}/variants", + headers=self.headers, + params={"name": name}, + ) + res.raise_for_status() + variant_id = res.json() + return variant_id + + def generate_snapshot(self, variant_id: str, denormalize=False, from_scratch=True) -> None: + # Generate a snapshot for the variant + res = self.client.put( + f"/v1/studies/{variant_id}/generate", + headers=self.headers, + params={"denormalize": denormalize, "from_scratch": from_scratch}, + ) + res.raise_for_status() + task_id = res.json() + assert task_id + + task = wait_task_completion(self.client, self.user_access_token, task_id, timeout=20) + assert task.status == TaskStatus.COMPLETED + + def create_area(self, parent_id, *, name: str, country: str = "FR") -> str: + res = self.client.post( + f"/v1/studies/{parent_id}/areas", + headers=self.headers, + json={"name": name, "type": "AREA", "metadata": {"country": country}}, + ) + res.raise_for_status() + area_id = res.json()["id"] + return area_id + + def update_general_data(self, study_id: str, **data: t.Any): + res = self.client.put( + f"/v1/studies/{study_id}/config/general/form", + json=data, + headers=self.headers, + ) + res.raise_for_status() + + +@pytest.mark.integration_test +class TestDownloadMatrices: + """ + Checks the retrieval of matrices with the endpoint GET studies/uuid/raw/download + """ + + def test_download_matrices(self, client: TestClient, user_access_token: str, study_id: str) -> None: + user_headers = {"Authorization": f"Bearer {user_access_token}"} + + # ===================== + # STUDIES PREPARATION + # ===================== + + preparer = PreparerProxy(client, user_access_token) + + study_820_id = preparer.copy_upgrade_study(study_id, target_version=820) + + # Create Variant + variant_id = preparer.create_variant(study_820_id, name="New Variant") + + # Create a new area to implicitly create normalized matrices + area_id = preparer.create_area(variant_id, name="Mayenne", country="France") + + # Change study start_date + preparer.update_general_data(variant_id, firstMonth="July") + + # Really generates the snapshot + preparer.generate_snapshot(variant_id) + + # Prepare a managed study to test specific matrices for version 8.6 + study_860_id = preparer.copy_upgrade_study(study_id, target_version=860) + + # Import a Min Gen. matrix: shape=(8760, 3), with random integers between 0 and 1000 + min_gen_df = pd.DataFrame(np.random.randint(0, 1000, size=(8760, 3))) + preparer.upload_matrix(study_860_id, "input/hydro/series/de/mingen", min_gen_df) + + # ============================================= + # TESTS NOMINAL CASE ON RAW AND VARIANT STUDY + # ============================================= + + raw_matrix_path = r"input/load/series/load_de" + variant_matrix_path = f"input/load/series/load_{area_id}" + + raw_start_date = datetime.datetime(2018, 1, 1) + variant_start_date = datetime.datetime(2019, 7, 1) + + for uuid, path, start_date in [ + (study_820_id, raw_matrix_path, raw_start_date), + (variant_id, variant_matrix_path, variant_start_date), + ]: + # Export the matrix in xlsx format (which is the default format) + # and retrieve it as binary content (a ZIP-like file). + res = client.get( + f"/v1/studies/{uuid}/raw/download", + params={"path": path}, + headers=user_headers, + ) + assert res.status_code == 200 + # noinspection SpellCheckingInspection + assert res.headers["content-type"] == "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet" + + # load into dataframe + # noinspection PyTypeChecker + dataframe = pd.read_excel(io.BytesIO(res.content), index_col=0) + + # check time coherence + actual_index = dataframe.index + # noinspection PyUnresolvedReferences + first_date = actual_index[0].to_pydatetime() + # noinspection PyUnresolvedReferences + second_date = actual_index[1].to_pydatetime() + first_month = 1 if uuid == study_820_id else 7 # July + assert first_date.month == second_date.month == first_month + assert first_date.day == second_date.day == 1 + assert first_date.hour == 0 + assert second_date.hour == 1 + + # asserts that the result is the same as the one we get with the classic get /raw endpoint + res = client.get( + f"/v1/studies/{uuid}/raw", + params={"path": path, "formatted": True}, + headers=user_headers, + ) + expected_matrix = res.json() + expected_matrix["columns"] = [f"TS-{n + 1}" for n in expected_matrix["columns"]] + time_column = pd.date_range( + start=start_date, + periods=len(expected_matrix["data"]), + freq="H", + ) + expected_matrix["index"] = time_column + expected = pd.DataFrame(**expected_matrix) + assert dataframe.index.tolist() == expected.index.tolist() + assert dataframe.columns.tolist() == expected.columns.tolist() + assert (dataframe == expected).all().all() + + # ============================= + # TESTS INDEX AND HEADER PARAMETERS + # ============================= + + # test only few possibilities as each API call is quite long + # (also check that the format is case-insensitive) + for header in [True, False]: + index = not header + res = client.get( + f"/v1/studies/{study_820_id}/raw/download", + params={"path": raw_matrix_path, "format": "TSV", "header": header, "index": index}, + headers=user_headers, + ) + assert res.status_code == 200 + assert res.headers["content-type"] == "text/tab-separated-values; charset=utf-8" + + content = io.BytesIO(res.content) + dataframe = pd.read_csv( + content, index_col=0 if index else None, header="infer" if header else None, sep="\t" + ) + first_index = dataframe.index[0] + assert first_index == "2018-01-01 00:00:00" if index else first_index == 0 + assert isinstance(dataframe.columns[0], str) if header else isinstance(dataframe.columns[0], np.int64) + + # ============================= + # TEST SPECIFIC MATRICES + # ============================= + + # tests links headers before v8.2 + res = client.get( + f"/v1/studies/{study_id}/raw/download", + params={"path": "input/links/de/fr", "format": "tsv", "index": False}, + headers=user_headers, + ) + assert res.status_code == 200 + content = io.BytesIO(res.content) + dataframe = pd.read_csv(content, sep="\t") + assert list(dataframe.columns) == [ + "Capacités de transmission directes", + "Capacités de transmission indirectes", + "Hurdle costs direct (de->fr)", + "Hurdle costs indirect (fr->de)", + "Impedances", + "Loop flow", + "P.Shift Min", + "P.Shift Max", + ] + + # tests links headers after v8.2 + res = client.get( + f"/v1/studies/{study_820_id}/raw/download", + params={"path": "input/links/de/fr_parameters", "format": "tsv"}, + headers=user_headers, + ) + assert res.status_code == 200 + content = io.BytesIO(res.content) + dataframe = pd.read_csv(content, index_col=0, sep="\t") + assert list(dataframe.columns) == [ + "Hurdle costs direct (de->fr_parameters)", + "Hurdle costs indirect (fr_parameters->de)", + "Impedances", + "Loop flow", + "P.Shift Min", + "P.Shift Max", + ] + + # allocation and correlation matrices + for path in ["input/hydro/allocation", "input/hydro/correlation"]: + res = client.get( + f"/v1/studies/{study_820_id}/raw/download", params={"path": path, "format": "tsv"}, headers=user_headers + ) + assert res.status_code == 200 + content = io.BytesIO(res.content) + dataframe = pd.read_csv(content, index_col=0, sep="\t") + assert list(dataframe.index) == list(dataframe.columns) == ["de", "es", "fr", "it"] + assert all(dataframe.iloc[i, i] == 1.0 for i in range(len(dataframe))) + + # test for empty matrix + res = client.get( + f"/v1/studies/{study_id}/raw/download", + params={"path": "input/hydro/common/capacity/waterValues_de", "format": "tsv"}, + headers=user_headers, + ) + assert res.status_code == 200 + content = io.BytesIO(res.content) + dataframe = pd.read_csv(content, index_col=0, sep="\t") + assert dataframe.empty + + # modulation matrix + res = client.get( + f"/v1/studies/{study_820_id}/raw/download", + params={"path": "input/thermal/prepro/de/01_solar/modulation", "format": "tsv"}, + headers=user_headers, + ) + assert res.status_code == 200 + content = io.BytesIO(res.content) + dataframe = pd.read_csv(content, index_col=0, sep="\t") + assert dataframe.index[0] == "2018-01-01 00:00:00" + dataframe.index = range(len(dataframe)) + transposed_matrix = list(zip(*[8760 * [1.0], 8760 * [1.0], 8760 * [1.0], 8760 * [0.0]])) + expected_df = pd.DataFrame( + columns=["Marginal cost modulation", "Market bid modulation", "Capacity modulation", "Min gen modulation"], + index=range(8760), + data=transposed_matrix, + ) + assert dataframe.equals(expected_df) + + # asserts endpoint returns the right columns for output matrix + res = client.get( + f"/v1/studies/{study_id}/raw/download", + params={ + "path": "output/20201014-1422eco-hello/economy/mc-ind/00001/links/de/fr/values-hourly", + "format": "tsv", + }, + headers=user_headers, + ) + assert res.status_code == 200 + content = io.BytesIO(res.content) + dataframe = pd.read_csv(content, index_col=0, sep="\t") + # noinspection SpellCheckingInspection + assert list(dataframe.columns) == [ + "('FLOW LIN.', 'MWh', '')", + "('UCAP LIN.', 'MWh', '')", + "('LOOP FLOW', 'MWh', '')", + "('FLOW QUAD.', 'MWh', '')", + "('CONG. FEE (ALG.)', 'Euro', '')", + "('CONG. FEE (ABS.)', 'Euro', '')", + "('MARG. COST', 'Euro/MW', '')", + "('CONG. PROB +', '%', '')", + "('CONG. PROB -', '%', '')", + "('HURDLE COST', 'Euro', '')", + ] + + # test energy matrix to test the regex + res = client.get( + f"/v1/studies/{study_id}/raw/download", + params={"path": "input/hydro/prepro/de/energy", "format": "tsv"}, + headers=user_headers, + ) + assert res.status_code == 200 + content = io.BytesIO(res.content) + dataframe = pd.read_csv(content, index_col=0, sep="\t") + assert dataframe.empty + + # test the Min Gen of the 8.6 study + res = client.get( + f"/v1/studies/{study_860_id}/raw/download", + params={"path": "input/hydro/series/de/mingen", "format": "tsv"}, + headers=user_headers, + ) + assert res.status_code == 200 + content = io.BytesIO(res.content) + dataframe = pd.read_csv(content, index_col=0, sep="\t") + assert dataframe.shape == (8760, 3) + assert dataframe.columns.tolist() == ["TS-1", "TS-2", "TS-3"] + assert dataframe.index[0] == "2018-01-01 00:00:00" + # noinspection PyUnresolvedReferences + assert (dataframe.values == min_gen_df.values).all() + + # ============================= + # ERRORS + # ============================= + + fake_str = "fake_str" + + # fake study_id + res = client.get( + f"/v1/studies/{fake_str}/raw/download", + params={"path": raw_matrix_path, "format": "tsv"}, + headers=user_headers, + ) + assert res.status_code == 404 + assert res.json()["exception"] == "StudyNotFoundError" + + # fake path + res = client.get( + f"/v1/studies/{study_820_id}/raw/download", + params={"path": f"input/links/de/{fake_str}", "format": "tsv"}, + headers=user_headers, + ) + assert res.status_code == 404 + assert res.json()["exception"] == "ChildNotFoundError" + + # path that does not lead to a matrix + res = client.get( + f"/v1/studies/{study_820_id}/raw/download", + params={"path": "settings/generaldata", "format": "tsv"}, + headers=user_headers, + ) + assert res.status_code == 404 + assert res.json()["exception"] == "IncorrectPathError" + assert res.json()["description"] == "The provided path does not point to a valid matrix: 'settings/generaldata'" + + # wrong format + res = client.get( + f"/v1/studies/{study_820_id}/raw/download", + params={"path": raw_matrix_path, "format": fake_str}, + headers=user_headers, + ) + assert res.status_code == 422 + assert res.json()["exception"] == "RequestValidationError" diff --git a/tests/integration/studies_blueprint/test_study_matrix_index.py b/tests/integration/studies_blueprint/test_study_matrix_index.py index 69880cb357..4aeacceff4 100644 --- a/tests/integration/studies_blueprint/test_study_matrix_index.py +++ b/tests/integration/studies_blueprint/test_study_matrix_index.py @@ -33,7 +33,7 @@ def test_get_study_matrix_index( expected = { "first_week_size": 7, "level": "hourly", - "start_date": "2001-01-01 00:00:00", + "start_date": "2018-01-01 00:00:00", "steps": 8760, } assert actual == expected @@ -50,7 +50,7 @@ def test_get_study_matrix_index( expected = { "first_week_size": 7, "level": "daily", - "start_date": "2001-01-01 00:00:00", + "start_date": "2018-01-01 00:00:00", "steps": 365, } assert actual == expected @@ -67,7 +67,7 @@ def test_get_study_matrix_index( expected = { "first_week_size": 7, "level": "hourly", - "start_date": "2001-01-01 00:00:00", + "start_date": "2018-01-01 00:00:00", "steps": 8760, } assert actual == expected @@ -80,7 +80,7 @@ def test_get_study_matrix_index( actual = res.json() expected = { "first_week_size": 7, - "start_date": "2001-01-01 00:00:00", + "start_date": "2018-01-01 00:00:00", "steps": 8760, "level": "hourly", } @@ -96,5 +96,5 @@ def test_get_study_matrix_index( ) assert res.status_code == 200 actual = res.json() - expected = {"first_week_size": 7, "start_date": "2001-01-01 00:00:00", "steps": 7, "level": "daily"} + expected = {"first_week_size": 7, "start_date": "2018-01-01 00:00:00", "steps": 7, "level": "daily"} assert actual == expected diff --git a/tests/integration/studies_blueprint/test_synthesis.py b/tests/integration/studies_blueprint/test_synthesis.py index 70f5f0c907..9afd66be9b 100644 --- a/tests/integration/studies_blueprint/test_synthesis.py +++ b/tests/integration/studies_blueprint/test_synthesis.py @@ -108,4 +108,4 @@ def test_variant_study( ) assert res.status_code == 200, res.json() duration = time.time() - start - assert 0 <= duration <= 0.1, f"Duration is {duration} seconds" + assert 0 <= duration <= 0.2, f"Duration is {duration} seconds" diff --git a/tests/storage/business/test_study_service_utils.py b/tests/storage/business/test_study_service_utils.py index 623f17a55e..dcd674e0e3 100644 --- a/tests/storage/business/test_study_service_utils.py +++ b/tests/storage/business/test_study_service_utils.py @@ -104,7 +104,7 @@ def test_output_downloads_export(tmp_path: Path): }, StudyDownloadLevelDTO.WEEKLY, MatrixIndex( - start_date=str(datetime.datetime(2001, 1, 1)), + start_date=str(datetime.datetime(2018, 1, 1)), steps=51, first_week_size=7, level=StudyDownloadLevelDTO.WEEKLY, @@ -121,7 +121,7 @@ def test_output_downloads_export(tmp_path: Path): }, StudyDownloadLevelDTO.WEEKLY, MatrixIndex( - start_date=str(datetime.datetime(2002, 7, 5)), + start_date=str(datetime.datetime(2019, 7, 5)), steps=48, first_week_size=5, level=StudyDownloadLevelDTO.WEEKLY, @@ -138,7 +138,7 @@ def test_output_downloads_export(tmp_path: Path): }, StudyDownloadLevelDTO.MONTHLY, MatrixIndex( - start_date=str(datetime.datetime(2002, 7, 1)), + start_date=str(datetime.datetime(2019, 7, 1)), steps=7, first_week_size=7, level=StudyDownloadLevelDTO.MONTHLY, @@ -155,7 +155,7 @@ def test_output_downloads_export(tmp_path: Path): }, StudyDownloadLevelDTO.MONTHLY, MatrixIndex( - start_date=str(datetime.datetime(2002, 7, 1)), + start_date=str(datetime.datetime(2019, 7, 1)), steps=4, first_week_size=7, level=StudyDownloadLevelDTO.MONTHLY, @@ -172,7 +172,7 @@ def test_output_downloads_export(tmp_path: Path): }, StudyDownloadLevelDTO.HOURLY, MatrixIndex( - start_date=str(datetime.datetime(2010, 3, 5)), + start_date=str(datetime.datetime(2021, 3, 5)), steps=2304, first_week_size=3, level=StudyDownloadLevelDTO.HOURLY, @@ -189,7 +189,7 @@ def test_output_downloads_export(tmp_path: Path): }, StudyDownloadLevelDTO.ANNUAL, MatrixIndex( - start_date=str(datetime.datetime(2010, 3, 5)), + start_date=str(datetime.datetime(2021, 3, 5)), steps=1, first_week_size=3, level=StudyDownloadLevelDTO.ANNUAL, @@ -206,7 +206,7 @@ def test_output_downloads_export(tmp_path: Path): }, StudyDownloadLevelDTO.DAILY, MatrixIndex( - start_date=str(datetime.datetime(2009, 3, 3)), + start_date=str(datetime.datetime(2026, 3, 3)), steps=98, first_week_size=3, level=StudyDownloadLevelDTO.DAILY, diff --git a/tests/storage/repository/test_study.py b/tests/storage/repository/test_study.py index f865ab613a..bb4ea795e3 100644 --- a/tests/storage/repository/test_study.py +++ b/tests/storage/repository/test_study.py @@ -1,22 +1,24 @@ from datetime import datetime +from sqlalchemy.orm import Session # type: ignore + from antarest.core.cache.business.local_chache import LocalCache from antarest.core.model import PublicMode from antarest.login.model import Group, User from antarest.study.model import DEFAULT_WORKSPACE_NAME, RawStudy, Study, StudyContentStatus from antarest.study.repository import StudyMetadataRepository from antarest.study.storage.variantstudy.model.dbmodel import VariantStudy -from tests.helpers import with_db_context -@with_db_context -def test_lifecycle() -> None: - user = User(id=0, name="admin") +def test_lifecycle(db_session: Session) -> None: + repo = StudyMetadataRepository(LocalCache(), session=db_session) + + user = User(id=1, name="admin") group = Group(id="my-group", name="group") - repo = StudyMetadataRepository(LocalCache()) + a = Study( name="a", - version="42", + version="820", author="John Smith", created_at=datetime.utcnow(), updated_at=datetime.utcnow(), @@ -26,7 +28,7 @@ def test_lifecycle() -> None: ) b = RawStudy( name="b", - version="43", + version="830", author="Morpheus", created_at=datetime.utcnow(), updated_at=datetime.utcnow(), @@ -36,7 +38,7 @@ def test_lifecycle() -> None: ) c = RawStudy( name="c", - version="43", + version="830", author="Trinity", created_at=datetime.utcnow(), updated_at=datetime.utcnow(), @@ -47,7 +49,7 @@ def test_lifecycle() -> None: ) d = VariantStudy( name="d", - version="43", + version="830", author="Mr. Anderson", created_at=datetime.utcnow(), updated_at=datetime.utcnow(), @@ -57,30 +59,32 @@ def test_lifecycle() -> None: ) a = repo.save(a) - b = repo.save(b) + a_id = a.id + + repo.save(b) repo.save(c) repo.save(d) - assert b.id - c = repo.one(a.id) - assert a == c + + c = repo.one(a_id) + assert a_id == c.id assert len(repo.get_all()) == 4 assert len(repo.get_all_raw(exists=True)) == 1 assert len(repo.get_all_raw(exists=False)) == 1 assert len(repo.get_all_raw()) == 2 - repo.delete(a.id) - assert repo.get(a.id) is None + repo.delete(a_id) + assert repo.get(a_id) is None + +def test_study_inheritance(db_session: Session) -> None: + repo = StudyMetadataRepository(LocalCache(), session=db_session) -@with_db_context -def test_study_inheritance() -> None: user = User(id=0, name="admin") group = Group(id="my-group", name="group") - repo = StudyMetadataRepository(LocalCache()) a = RawStudy( name="a", - version="42", + version="820", author="John Smith", created_at=datetime.utcnow(), updated_at=datetime.utcnow(), diff --git a/tests/storage/test_service.py b/tests/storage/test_service.py index e7e8662394..fa7ed5c62d 100644 --- a/tests/storage/test_service.py +++ b/tests/storage/test_service.py @@ -350,18 +350,30 @@ def test_partial_sync_studies_from_disk() -> None: ) -@pytest.mark.unit_test -def test_remove_duplicate() -> None: - ma = RawStudy(id="a", path="a") - mb = RawStudy(id="b", path="a") +@with_db_context +def test_remove_duplicate(db_session: Session) -> None: + with db_session: + db_session.add(RawStudy(id="a", path="/path/to/a")) + db_session.add(RawStudy(id="b", path="/path/to/a")) + db_session.add(RawStudy(id="c", path="/path/to/c")) + db_session.commit() + study_count = db_session.query(RawStudy).filter(RawStudy.path == "/path/to/a").count() + assert study_count == 2 # there are 2 studies with same path before removing duplicates - repository = Mock() - repository.get_all.return_value = [ma, mb] - config = Config(storage=StorageConfig(workspaces={DEFAULT_WORKSPACE_NAME: WorkspaceConfig()})) - service = build_study_service(Mock(), repository, config) + with db_session: + repository = StudyMetadataRepository(Mock(), db_session) + config = Config(storage=StorageConfig(workspaces={DEFAULT_WORKSPACE_NAME: WorkspaceConfig()})) + service = build_study_service(Mock(), repository, config) + service.remove_duplicates() - service.remove_duplicates() - repository.delete.assert_called_once_with(mb.id) + # example with 1 duplicate with same path + with db_session: + study_count = db_session.query(RawStudy).filter(RawStudy.path == "/path/to/a").count() + assert study_count == 1 + # example with no duplicates with same path + with db_session: + study_count = db_session.query(RawStudy).filter(RawStudy.path == "/path/to/c").count() + assert study_count == 1 # noinspection PyArgumentList @@ -571,7 +583,7 @@ def test_download_output() -> None: # AREA TYPE res_matrix = MatrixAggregationResultDTO( index=MatrixIndex( - start_date="2001-01-01 00:00:00", + start_date="2018-01-01 00:00:00", steps=1, first_week_size=7, level=StudyDownloadLevelDTO.ANNUAL, @@ -631,7 +643,7 @@ def test_download_output() -> None: input_data.filter = ["east>west"] res_matrix = MatrixAggregationResultDTO( index=MatrixIndex( - start_date="2001-01-01 00:00:00", + start_date="2018-01-01 00:00:00", steps=1, first_week_size=7, level=StudyDownloadLevelDTO.ANNUAL, @@ -661,7 +673,7 @@ def test_download_output() -> None: input_data.filterIn = "n" res_matrix = MatrixAggregationResultDTO( index=MatrixIndex( - start_date="2001-01-01 00:00:00", + start_date="2018-01-01 00:00:00", steps=1, first_week_size=7, level=StudyDownloadLevelDTO.ANNUAL, diff --git a/webapp/src/components/App/Singlestudy/explore/Modelization/Areas/Hydro/index.tsx b/webapp/src/components/App/Singlestudy/explore/Modelization/Areas/Hydro/index.tsx index 289913bcbe..0baab19ab8 100644 --- a/webapp/src/components/App/Singlestudy/explore/Modelization/Areas/Hydro/index.tsx +++ b/webapp/src/components/App/Singlestudy/explore/Modelization/Areas/Hydro/index.tsx @@ -11,6 +11,7 @@ import { getCurrentAreaId } from "../../../../../../../redux/selectors"; function Hydro() { const { study } = useOutletContext<{ study: StudyMetadata }>(); const areaId = useAppSelector(getCurrentAreaId); + const studyVersion = parseInt(study.version, 10); const tabList = useMemo(() => { const basePath = `/studies/${study?.id}/explore/modelization/area/${encodeURI( @@ -30,8 +31,9 @@ function Hydro() { { label: "Water values", path: `${basePath}/watervalues` }, { label: "Hydro Storage", path: `${basePath}/hydrostorage` }, { label: "Run of river", path: `${basePath}/ror` }, - ]; - }, [areaId, study?.id]); + studyVersion >= 860 && { label: "Min Gen", path: `${basePath}/mingen` }, + ].filter(Boolean); + }, [areaId, study?.id, studyVersion]); //////////////////////////////////////////////////////////////// // JSX diff --git a/webapp/src/components/App/Singlestudy/explore/Modelization/Areas/Hydro/utils.ts b/webapp/src/components/App/Singlestudy/explore/Modelization/Areas/Hydro/utils.ts index 8fc143d280..ed8457afe4 100644 --- a/webapp/src/components/App/Singlestudy/explore/Modelization/Areas/Hydro/utils.ts +++ b/webapp/src/components/App/Singlestudy/explore/Modelization/Areas/Hydro/utils.ts @@ -15,6 +15,7 @@ export enum HydroMatrixType { WaterValues, HydroStorage, RunOfRiver, + MinGen, InflowPattern, OverallMonthlyHydro, Allocation, @@ -99,6 +100,10 @@ export const HYDRO_ROUTES: HydroRoute[] = [ path: "ror", type: HydroMatrixType.RunOfRiver, }, + { + path: "mingen", + type: HydroMatrixType.MinGen, + }, ]; export const MATRICES: Matrices = { @@ -144,6 +149,11 @@ export const MATRICES: Matrices = { url: "input/hydro/series/{areaId}/ror", stats: MatrixStats.STATS, }, + [HydroMatrixType.MinGen]: { + title: "Min Gen", + url: "input/hydro/series/{areaId}/mingen", + stats: MatrixStats.STATS, + }, [HydroMatrixType.InflowPattern]: { title: "Inflow Pattern", url: "input/hydro/common/capacity/inflowPattern_{areaId}", diff --git a/webapp/src/services/api/matrix.ts b/webapp/src/services/api/matrix.ts index 9b2d4f5213..1eff05f7c9 100644 --- a/webapp/src/services/api/matrix.ts +++ b/webapp/src/services/api/matrix.ts @@ -97,7 +97,7 @@ export const editMatrix = async ( matrixEdit: MatrixEditDTO[], ): Promise => { const res = await client.put( - `/v1/studies/${sid}/matrix?path=${path}`, + `/v1/studies/${sid}/matrix?path=${encodeURIComponent(path)}`, matrixEdit, ); return res.data;