From 02c8851577d2821aae32adc8e72ec800046cd80a Mon Sep 17 00:00:00 2001 From: belthlemar Date: Fri, 5 Jul 2024 17:59:48 +0200 Subject: [PATCH 01/22] first draft --- .../extensions/adequacy_patch/extension.py | 4 +-- antarest/matrixstore/uri_resolver_service.py | 8 ++--- antarest/study/common/studystorage.py | 4 +-- antarest/study/service.py | 11 ++++--- .../study/storage/abstract_storage_service.py | 17 +++------- .../rawstudy/model/filesystem/folder_node.py | 32 ++++++------------- .../model/filesystem/ini_file_node.py | 6 +--- .../rawstudy/model/filesystem/inode.py | 4 +-- .../rawstudy/model/filesystem/lazy_node.py | 18 ++++------- .../model/filesystem/matrix/matrix.py | 8 ++--- .../filesystem/matrix/output_series_matrix.py | 8 ++--- .../model/filesystem/raw_file_node.py | 8 +---- .../model/filesystem/root/input/areas/list.py | 14 ++------ .../simulation/ts_numbers/ts_numbers_data.py | 6 +--- .../variantstudy/variant_study_service.py | 17 ++-------- antarest/study/web/raw_studies_blueprint.py | 21 ++++++++++-- .../storage/business/test_xpansion_manager.py | 10 +++--- .../repository/filesystem/test_lazy_node.py | 8 +---- tests/storage/repository/filesystem/utils.py | 8 +---- 19 files changed, 75 insertions(+), 137 deletions(-) diff --git a/antarest/launcher/extensions/adequacy_patch/extension.py b/antarest/launcher/extensions/adequacy_patch/extension.py index d6499375e8..2614956522 100644 --- a/antarest/launcher/extensions/adequacy_patch/extension.py +++ b/antarest/launcher/extensions/adequacy_patch/extension.py @@ -97,9 +97,7 @@ def after_export_flat_hook( study = self.study_service.storage_service.raw_study_service.study_factory.create_from_fs( study_export_path, study_id, use_cache=False ) - user_config = study.tree.get( - ["user"], - ) + user_config = study.tree.get(["user"]) assert_this("flowbased" in user_config) adequacy_patch_config = yaml.safe_load(cast(bytes, study.tree.get(["user", "adequacypatch", "config.yml"]))) assert_this("areas" in adequacy_patch_config) diff --git a/antarest/matrixstore/uri_resolver_service.py b/antarest/matrixstore/uri_resolver_service.py index 01717c57bd..8740a247b2 100644 --- a/antarest/matrixstore/uri_resolver_service.py +++ b/antarest/matrixstore/uri_resolver_service.py @@ -11,7 +11,7 @@ class UriResolverService: def __init__(self, matrix_service: ISimpleMatrixService): self.matrix_service = matrix_service - def resolve(self, uri: str, formatted: bool = True) -> SUB_JSON: + def resolve(self, uri: str, format: str = "") -> SUB_JSON: res = UriResolverService._extract_uri_components(uri) if res: protocol, uuid = res @@ -19,7 +19,7 @@ def resolve(self, uri: str, formatted: bool = True) -> SUB_JSON: return None if protocol == "matrix": - return self._resolve_matrix(uuid, formatted) + return self._resolve_matrix(uuid, format) raise NotImplementedError(f"protocol {protocol} not implemented") @staticmethod @@ -37,10 +37,10 @@ def extract_id(uri: str) -> Optional[str]: res = UriResolverService._extract_uri_components(uri) return res[1] if res else None - def _resolve_matrix(self, id: str, formatted: bool = True) -> SUB_JSON: + def _resolve_matrix(self, id: str, format: str = "") -> SUB_JSON: data = self.matrix_service.get(id) if data: - if formatted: + if format == "json": return { "data": data.data, "index": data.index, diff --git a/antarest/study/common/studystorage.py b/antarest/study/common/studystorage.py index 94664ef339..f3170e1be8 100644 --- a/antarest/study/common/studystorage.py +++ b/antarest/study/common/studystorage.py @@ -30,7 +30,7 @@ def get( metadata: T, url: str = "", depth: int = 3, - formatted: bool = True, + format: str = "", ) -> JSON: """ Entry point to fetch data inside study. @@ -38,7 +38,7 @@ def get( metadata: study url: path data inside study to reach depth: tree depth to reach after reach data path - formatted: indicate if raw files must be parsed and formatted + format: indicate if raw files must be parsed and formatted Returns: study data formatted in json diff --git a/antarest/study/service.py b/antarest/study/service.py index 494e7d2f2f..6b7380bb3a 100644 --- a/antarest/study/service.py +++ b/antarest/study/service.py @@ -132,6 +132,7 @@ from antarest.study.storage.variantstudy.model.dbmodel import VariantStudy from antarest.study.storage.variantstudy.model.model import CommandDTO from antarest.study.storage.variantstudy.variant_study_service import VariantStudyService +from antarest.study.web.raw_studies_blueprint import MATRIX_FORMAT from antarest.worker.archive_worker import ArchiveTaskArgs from antarest.worker.simulator_worker import GenerateTimeseriesTaskArgs @@ -307,7 +308,7 @@ def get( uuid: str, url: str, depth: int, - formatted: bool, + format: str, params: RequestParameters, ) -> JSON: """ @@ -316,7 +317,7 @@ def get( uuid: study uuid url: route to follow inside study structure depth: depth to expand tree when route matched - formatted: indicate if raw files must be parsed and formatted + format: indicate if raw files must be parsed and formatted params: request parameters Returns: data study formatted in json @@ -325,7 +326,7 @@ def get( study = self.get_study(uuid) assert_permission(params.user, study, StudyPermissionType.READ) - return self.storage_service.get_storage(study).get(study, url, depth, formatted) + return self.storage_service.get_storage(study).get(study, url, depth, format) def aggregate_output_data( self, @@ -398,7 +399,7 @@ def get_logs( try: log = t.cast( bytes, - file_study.tree.get(log_location, depth=1, formatted=True), + file_study.tree.get(log_location, depth=1, format="json"), ).decode(encoding="utf-8") # when missing file, RawFileNode return empty bytes if log: @@ -2527,7 +2528,7 @@ def get_matrix_with_index_and_header( hydro_matrix = self.correlation_manager.get_correlation_matrix(all_areas, study, []) # type: ignore return pd.DataFrame(data=hydro_matrix.data, columns=hydro_matrix.columns, index=hydro_matrix.index) - matrix_obj = self.get(study_id, path, depth=3, formatted=True, params=parameters) + matrix_obj = self.get(study_id, path, depth=3, format="json", params=parameters) if set(matrix_obj) != {"data", "index", "columns"}: raise IncorrectPathError(f"The provided path does not point to a valid matrix: '{path}'") if not matrix_obj["data"]: diff --git a/antarest/study/storage/abstract_storage_service.py b/antarest/study/storage/abstract_storage_service.py index 892f855970..22bab623d4 100644 --- a/antarest/study/storage/abstract_storage_service.py +++ b/antarest/study/storage/abstract_storage_service.py @@ -116,21 +116,14 @@ def get_study_information( tags=[tag.label for tag in study.tags], ) - def get( - self, - metadata: T, - url: str = "", - depth: int = 3, - formatted: bool = True, - use_cache: bool = True, - ) -> JSON: + def get(self, metadata: T, url: str = "", depth: int = 3, format: str = "", use_cache: bool = True) -> JSON: """ Entry point to fetch data inside study. Args: metadata: study url: path data inside study to reach depth: tree depth to reach after reach data path - formatted: indicate if raw files must be parsed and formatted + format: indicate if raw files must be parsed and formatted use_cache: indicate if the cache must be used Returns: study data formatted in json @@ -149,11 +142,11 @@ def get( logger.info(f"Raw Study {metadata.id} read from cache") data = from_cache else: - data = study.tree.get(parts, depth=depth, formatted=formatted) + data = study.tree.get(parts, depth=depth) self.cache.put(cache_id, data) logger.info(f"Cache new entry from RawStudyService (studyID: {metadata.id})") else: - data = study.tree.get(parts, depth=depth, formatted=formatted) + data = study.tree.get(parts, depth=depth) del study return data @@ -250,7 +243,7 @@ def import_output( extension = ".zip" if is_zipped else "" path_output = path_output.rename(Path(path_output.parent, output_full_name + extension)) - data = self.get(metadata, f"output/{output_full_name}", 1, use_cache=False) + data = self.get(metadata, f"output/{output_full_name}", 1) if data is None: self.delete_output(metadata, "imported_output") diff --git a/antarest/study/storage/rawstudy/model/filesystem/folder_node.py b/antarest/study/storage/rawstudy/model/filesystem/folder_node.py index 3ea51c098d..a16790a57f 100644 --- a/antarest/study/storage/rawstudy/model/filesystem/folder_node.py +++ b/antarest/study/storage/rawstudy/model/filesystem/folder_node.py @@ -52,7 +52,7 @@ def _forward_get( self, url: t.List[str], depth: int = -1, - formatted: bool = True, + format: str = "", get_node: bool = False, ) -> t.Union[JSON, INode[JSON, SUB_JSON, JSON]]: children = self.build() @@ -62,7 +62,7 @@ def _forward_get( if len(names) == 1: child = children[names[0]] if not get_node: - return child.get(sub_url, depth=depth, expanded=False, formatted=formatted) # type: ignore + return child.get(sub_url, depth=depth, expanded=False, format=format) # type: ignore else: return child.get_node( sub_url, @@ -70,20 +70,12 @@ def _forward_get( # many items asked or * asked else: if not get_node: - return { - key: children[key].get( - sub_url, - depth=depth, - expanded=False, - formatted=formatted, - ) - for key in names - } + return {key: children[key].get(sub_url, depth=depth, expanded=False, format=format) for key in names} else: raise ValueError("Multiple nodes requested") def _expand_get( - self, depth: int = -1, formatted: bool = True, get_node: bool = False + self, depth: int = -1, format: str = "", get_node: bool = False ) -> t.Union[JSON, INode[JSON, SUB_JSON, JSON]]: if get_node: return self @@ -93,7 +85,7 @@ def _expand_get( if depth == 0: return {} return { - name: node.get(depth=depth - 1, expanded=True, formatted=formatted) if depth != 1 else {} + name: node.get(depth=depth - 1, expanded=True, format=format) if depth != 1 else {} for name, node in children.items() } @@ -101,22 +93,18 @@ def _get( self, url: t.Optional[t.List[str]] = None, depth: int = -1, - formatted: bool = True, + format: str = "", get_node: bool = False, ) -> t.Union[JSON, INode[JSON, SUB_JSON, JSON]]: if url and url != [""]: - return self._forward_get(url, depth, formatted, get_node) + return self._forward_get(url, depth, format, get_node) else: - return self._expand_get(depth, formatted, get_node) + return self._expand_get(depth, format, get_node) def get( - self, - url: t.Optional[t.List[str]] = None, - depth: int = -1, - expanded: bool = False, - formatted: bool = True, + self, url: t.Optional[t.List[str]] = None, depth: int = -1, expanded: bool = False, format: str = "" ) -> JSON: - output = self._get(url=url, depth=depth, formatted=formatted, get_node=False) + output = self._get(url=url, depth=depth, format=format, get_node=False) assert not isinstance(output, INode) return output diff --git a/antarest/study/storage/rawstudy/model/filesystem/ini_file_node.py b/antarest/study/storage/rawstudy/model/filesystem/ini_file_node.py index ba75363abd..949ec5a9b6 100644 --- a/antarest/study/storage/rawstudy/model/filesystem/ini_file_node.py +++ b/antarest/study/storage/rawstudy/model/filesystem/ini_file_node.py @@ -133,11 +133,7 @@ def _get_filtering_kwargs(self, url: t.List[str]) -> t.Dict[str, str]: return {} def get( - self, - url: t.Optional[t.List[str]] = None, - depth: int = -1, - expanded: bool = False, - formatted: bool = True, + self, url: t.Optional[t.List[str]] = None, depth: int = -1, expanded: bool = False, format: str = "" ) -> SUB_JSON: output = self._get(url, depth, expanded, get_node=False) assert not isinstance(output, INode) diff --git a/antarest/study/storage/rawstudy/model/filesystem/inode.py b/antarest/study/storage/rawstudy/model/filesystem/inode.py index e3eff7feee..10d6c44237 100644 --- a/antarest/study/storage/rawstudy/model/filesystem/inode.py +++ b/antarest/study/storage/rawstudy/model/filesystem/inode.py @@ -25,7 +25,7 @@ def get( url: Optional[List[str]] = None, depth: int = -1, expanded: bool = False, - formatted: bool = True, + format: str = "", ) -> G: """ Ask data inside tree. @@ -34,7 +34,7 @@ def get( url: data path to retrieve depth: after url is reached, node expand tree until matches depth asked expanded: context parameter to determine if current node become from a expansion - formatted: ask for raw file transformation (for matrix) + format: ask for raw file transformation (for matrix) Returns: json diff --git a/antarest/study/storage/rawstudy/model/filesystem/lazy_node.py b/antarest/study/storage/rawstudy/model/filesystem/lazy_node.py index 98f0c74a40..87b1253fa3 100644 --- a/antarest/study/storage/rawstudy/model/filesystem/lazy_node.py +++ b/antarest/study/storage/rawstudy/model/filesystem/lazy_node.py @@ -61,7 +61,7 @@ def _get( url: Optional[List[str]] = None, depth: int = -1, expanded: bool = False, - formatted: bool = True, + format: str = "", get_node: bool = False, ) -> Union[Union[str, G], INode[G, S, V]]: self._assert_url_end(url) @@ -74,21 +74,17 @@ def _get( if expanded: return link else: - return cast(G, self.context.resolver.resolve(link, formatted)) + return cast(G, self.context.resolver.resolve(link, format)) if expanded: return self.get_lazy_content() else: - return self.load(url, depth, expanded, formatted) + return self.load(url, depth, expanded, format) def get( - self, - url: Optional[List[str]] = None, - depth: int = -1, - expanded: bool = False, - formatted: bool = True, + self, url: Optional[List[str]] = None, depth: int = -1, expanded: bool = False, format: str = "" ) -> Union[str, G]: - output = self._get(url, depth, expanded, formatted, get_node=False) + output = self._get(url, depth, expanded, format, get_node=False) assert not isinstance(output, INode) return output @@ -140,7 +136,7 @@ def load( url: Optional[List[str]] = None, depth: int = -1, expanded: bool = False, - formatted: bool = True, + format: str = "", ) -> G: """ Fetch data on disk. @@ -149,7 +145,7 @@ def load( url: data path to retrieve depth: after url is reached, node expand tree until matches depth asked expanded: context parameter to determine if current node become from a expansion - formatted: ask for raw file transformation + format: ask for raw file transformation Returns: diff --git a/antarest/study/storage/rawstudy/model/filesystem/matrix/matrix.py b/antarest/study/storage/rawstudy/model/filesystem/matrix/matrix.py index 1427301f49..5b5008f224 100644 --- a/antarest/study/storage/rawstudy/model/filesystem/matrix/matrix.py +++ b/antarest/study/storage/rawstudy/model/filesystem/matrix/matrix.py @@ -90,14 +90,10 @@ def denormalize(self) -> None: self.get_link_path().unlink() def load( - self, - url: Optional[List[str]] = None, - depth: int = -1, - expanded: bool = False, - formatted: bool = True, + self, url: Optional[List[str]] = None, depth: int = -1, expanded: bool = False, format: str = "" ) -> Union[bytes, JSON]: file_path, tmp_dir = self._get_real_file_path() - if not formatted: + if format == "bytes": if file_path.exists(): return file_path.read_bytes() diff --git a/antarest/study/storage/rawstudy/model/filesystem/matrix/output_series_matrix.py b/antarest/study/storage/rawstudy/model/filesystem/matrix/output_series_matrix.py index ff1384391a..050974761d 100644 --- a/antarest/study/storage/rawstudy/model/filesystem/matrix/output_series_matrix.py +++ b/antarest/study/storage/rawstudy/model/filesystem/matrix/output_series_matrix.py @@ -109,15 +109,11 @@ def check_errors( return errors def load( - self, - url: Optional[List[str]] = None, - depth: int = -1, - expanded: bool = False, - formatted: bool = True, + self, url: Optional[List[str]] = None, depth: int = -1, expanded: bool = False, format: str = "" ) -> Union[bytes, JSON]: try: file_path, tmp_dir = self._get_real_file_path() - if not formatted: + if format == "bytes": if file_path.exists(): file_content = file_path.read_bytes() if tmp_dir: diff --git a/antarest/study/storage/rawstudy/model/filesystem/raw_file_node.py b/antarest/study/storage/rawstudy/model/filesystem/raw_file_node.py index 57770ce02d..0056929f82 100644 --- a/antarest/study/storage/rawstudy/model/filesystem/raw_file_node.py +++ b/antarest/study/storage/rawstudy/model/filesystem/raw_file_node.py @@ -24,13 +24,7 @@ def get_lazy_content( ) -> str: return f"file://{self.config.path.name}" - def load( - self, - url: Optional[List[str]] = None, - depth: int = -1, - expanded: bool = False, - formatted: bool = True, - ) -> bytes: + def load(self, url: Optional[List[str]] = None, depth: int = -1, expanded: bool = False, format: str = "") -> bytes: file_path, tmp_dir = self._get_real_file_path() if file_path.exists(): diff --git a/antarest/study/storage/rawstudy/model/filesystem/root/input/areas/list.py b/antarest/study/storage/rawstudy/model/filesystem/root/input/areas/list.py index 0e84498e1d..90c7df8027 100644 --- a/antarest/study/storage/rawstudy/model/filesystem/root/input/areas/list.py +++ b/antarest/study/storage/rawstudy/model/filesystem/root/input/areas/list.py @@ -16,21 +16,11 @@ def __init__(self, context: ContextServer, config: FileStudyTreeConfig): super().__init__(config) self.context = context - def get_node( - self, - url: Optional[List[str]] = None, - depth: int = -1, - expanded: bool = False, - formatted: bool = True, - ) -> INode[List[str], List[str], List[str]]: + def get_node(self, url: Optional[List[str]] = None) -> INode[List[str], List[str], List[str]]: return self def get( - self, - url: Optional[List[str]] = None, - depth: int = -1, - expanded: bool = False, - formatted: bool = True, + self, url: Optional[List[str]] = None, depth: int = -1, expanded: bool = False, format: str = "" ) -> List[str]: if self.config.zip_path: path, tmp_dir = self._extract_file_to_tmp_dir() diff --git a/antarest/study/storage/rawstudy/model/filesystem/root/output/simulation/ts_numbers/ts_numbers_data.py b/antarest/study/storage/rawstudy/model/filesystem/root/output/simulation/ts_numbers/ts_numbers_data.py index 90240b04c7..910f8126ff 100644 --- a/antarest/study/storage/rawstudy/model/filesystem/root/output/simulation/ts_numbers/ts_numbers_data.py +++ b/antarest/study/storage/rawstudy/model/filesystem/root/output/simulation/ts_numbers/ts_numbers_data.py @@ -9,11 +9,7 @@ class TsNumbersVector(LazyNode[List[int], List[int], JSON]): def load( - self, - url: Optional[List[str]] = None, - depth: int = -1, - expanded: bool = False, - formatted: bool = True, + self, url: Optional[List[str]] = None, depth: int = -1, expanded: bool = False, format: str = "" ) -> List[int]: file_path, tmp_dir = self._get_real_file_path() diff --git a/antarest/study/storage/variantstudy/variant_study_service.py b/antarest/study/storage/variantstudy/variant_study_service.py index 1452071aae..62f2c54e95 100644 --- a/antarest/study/storage/variantstudy/variant_study_service.py +++ b/antarest/study/storage/variantstudy/variant_study_service.py @@ -464,12 +464,7 @@ def _get_variants_parents(self, id: str, params: RequestParameters) -> t.List[St return output_list def get( - self, - metadata: VariantStudy, - url: str = "", - depth: int = 3, - formatted: bool = True, - use_cache: bool = True, + self, metadata: VariantStudy, url: str = "", depth: int = 3, format: str = "", use_cache: bool = True ) -> JSON: """ Entry point to fetch data inside study. @@ -477,20 +472,14 @@ def get( metadata: study url: path data inside study to reach depth: tree depth to reach after reach data path - formatted: indicate if raw files must be parsed and formatted + format: indicate if raw files must be parsed and formatted use_cache: indicate if cache should be used Returns: study data formatted in json """ self._safe_generation(metadata, timeout=60) self.repository.refresh(metadata) - return super().get( - metadata=metadata, - url=url, - depth=depth, - formatted=formatted, - use_cache=use_cache, - ) + return super().get(metadata=metadata, url=url, depth=depth) def create_variant_study(self, uuid: str, name: str, params: RequestParameters) -> VariantStudy: """ diff --git a/antarest/study/web/raw_studies_blueprint.py b/antarest/study/web/raw_studies_blueprint.py index ac857f7bb3..07d90559cf 100644 --- a/antarest/study/web/raw_studies_blueprint.py +++ b/antarest/study/web/raw_studies_blueprint.py @@ -19,6 +19,7 @@ from antarest.core.utils.web import APITag from antarest.login.auth import Auth from antarest.study.business.aggregator_management import AreasQueryFile, LinksQueryFile +from antarest.study.business.enum_ignore_case import EnumIgnoreCase from antarest.study.service import StudyService from antarest.study.storage.df_download import TableExportFormat, export_file from antarest.study.storage.rawstudy.model.filesystem.matrix.matrix import MatrixFrequency @@ -59,6 +60,13 @@ ".json": ("application/json", "utf-8"), } + +class MATRIX_FORMAT(EnumIgnoreCase): + JSON = "json" + BYTES = "bytes" + ARROW = "arrow" + + DEFAULT_EXPORT_FORMAT = Query(TableExportFormat.CSV, alias="format", description="Export format", title="Export Format") @@ -97,6 +105,7 @@ def get_study( path: str = Param("/", examples=get_path_examples()), # type: ignore depth: int = 3, formatted: bool = True, + format: t.Optional[MATRIX_FORMAT] = None, current_user: JWTUser = Depends(auth.get_current_user), ) -> t.Any: """ @@ -117,12 +126,20 @@ def get_study( extra={"user": current_user.id}, ) parameters = RequestParameters(user=current_user) - output = study_service.get(uuid, path, depth=depth, formatted=formatted, params=parameters) + + if not format: + real_format = "json" if formatted else "bytes" + else: + real_format = format.value + + output = study_service.get(uuid, path, depth=depth, format=real_format, params=parameters) if isinstance(output, bytes): # Guess the suffix form the target data resource_path = PurePosixPath(path) - parent_cfg = study_service.get(uuid, str(resource_path.parent), depth=2, formatted=True, params=parameters) + parent_cfg = study_service.get( + uuid, str(resource_path.parent), depth=2, format=real_format, params=parameters + ) child = parent_cfg[resource_path.name] suffix = PurePosixPath(child).suffix diff --git a/tests/storage/business/test_xpansion_manager.py b/tests/storage/business/test_xpansion_manager.py index bb5651bcbd..b4db4a18af 100644 --- a/tests/storage/business/test_xpansion_manager.py +++ b/tests/storage/business/test_xpansion_manager.py @@ -129,11 +129,11 @@ def test_create_configuration(tmp_path: Path, version: int, expected_output: JSO xpansion_manager = make_xpansion_manager(empty_study) with pytest.raises(ChildNotFoundError): - empty_study.tree.get(["user", "expansion"], expanded=True, depth=9) + empty_study.tree.get(["user", "expansion"], depth=9, expanded=True) xpansion_manager.create_xpansion_configuration(study) - actual = empty_study.tree.get(["user", "expansion"], expanded=True, depth=9) + actual = empty_study.tree.get(["user", "expansion"], depth=9, expanded=True) assert actual == expected_output @@ -147,16 +147,16 @@ def test_delete_xpansion_configuration(tmp_path: Path) -> None: xpansion_manager = make_xpansion_manager(empty_study) with pytest.raises(ChildNotFoundError): - empty_study.tree.get(["user", "expansion"], expanded=True, depth=9) + empty_study.tree.get(["user", "expansion"], depth=9, expanded=True) xpansion_manager.create_xpansion_configuration(study) - assert empty_study.tree.get(["user", "expansion"], expanded=True, depth=9) + assert empty_study.tree.get(["user", "expansion"], depth=9, expanded=True) xpansion_manager.delete_xpansion_configuration(study) with pytest.raises(ChildNotFoundError): - empty_study.tree.get(["user", "expansion"], expanded=True, depth=9) + empty_study.tree.get(["user", "expansion"], depth=9, expanded=True) @pytest.mark.unit_test diff --git a/tests/storage/repository/filesystem/test_lazy_node.py b/tests/storage/repository/filesystem/test_lazy_node.py index f899d32fa3..b82912fee1 100644 --- a/tests/storage/repository/filesystem/test_lazy_node.py +++ b/tests/storage/repository/filesystem/test_lazy_node.py @@ -20,13 +20,7 @@ def __init__(self, context: ContextServer, config: FileStudyTreeConfig) -> None: context=context, ) - def load( - self, - url: Optional[List[str]] = None, - depth: int = -1, - expanded: bool = False, - formatted: bool = False, - ) -> str: + def load(self, url: Optional[List[str]] = None, depth: int = -1, expanded: bool = False, format: str = "") -> str: return "Mock Matrix Content" def dump(self, data: str, url: Optional[List[str]] = None) -> None: diff --git a/tests/storage/repository/filesystem/utils.py b/tests/storage/repository/filesystem/utils.py index cb563e8567..c46a6997c4 100644 --- a/tests/storage/repository/filesystem/utils.py +++ b/tests/storage/repository/filesystem/utils.py @@ -28,13 +28,7 @@ def get_node( ) -> INode[int, int, int]: return self - def get( - self, - url: Optional[List[str]] = None, - depth: int = -1, - expanded: bool = True, - formatted: bool = True, - ) -> int: + def get(self, url: Optional[List[str]] = None, depth: int = -1, expanded: bool = False, format: str = "") -> int: return self.value def save(self, data: int, url: Optional[List[str]] = None) -> None: From b91ae9508af29cb34fd255d8fcd1f0e57c4e45f3 Mon Sep 17 00:00:00 2001 From: belthlemar Date: Fri, 5 Jul 2024 18:01:59 +0200 Subject: [PATCH 02/22] remove useless import --- antarest/study/service.py | 1 - 1 file changed, 1 deletion(-) diff --git a/antarest/study/service.py b/antarest/study/service.py index 6b7380bb3a..f181d874eb 100644 --- a/antarest/study/service.py +++ b/antarest/study/service.py @@ -132,7 +132,6 @@ from antarest.study.storage.variantstudy.model.dbmodel import VariantStudy from antarest.study.storage.variantstudy.model.model import CommandDTO from antarest.study.storage.variantstudy.variant_study_service import VariantStudyService -from antarest.study.web.raw_studies_blueprint import MATRIX_FORMAT from antarest.worker.archive_worker import ArchiveTaskArgs from antarest.worker.simulator_worker import GenerateTimeseriesTaskArgs From 0202d5dcbb2f50b6858cf62ae16da6c522f5cc56 Mon Sep 17 00:00:00 2001 From: belthlemar Date: Fri, 5 Jul 2024 18:24:20 +0200 Subject: [PATCH 03/22] fix little issue --- antarest/matrixstore/uri_resolver_service.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/antarest/matrixstore/uri_resolver_service.py b/antarest/matrixstore/uri_resolver_service.py index 8740a247b2..6a86aa551b 100644 --- a/antarest/matrixstore/uri_resolver_service.py +++ b/antarest/matrixstore/uri_resolver_service.py @@ -11,7 +11,7 @@ class UriResolverService: def __init__(self, matrix_service: ISimpleMatrixService): self.matrix_service = matrix_service - def resolve(self, uri: str, format: str = "") -> SUB_JSON: + def resolve(self, uri: str, format: str = "json") -> SUB_JSON: res = UriResolverService._extract_uri_components(uri) if res: protocol, uuid = res @@ -37,7 +37,7 @@ def extract_id(uri: str) -> Optional[str]: res = UriResolverService._extract_uri_components(uri) return res[1] if res else None - def _resolve_matrix(self, id: str, format: str = "") -> SUB_JSON: + def _resolve_matrix(self, id: str, format: str) -> SUB_JSON: data = self.matrix_service.get(id) if data: if format == "json": From f3827806a730f9807eb1d6413b389cda74ae67d3 Mon Sep 17 00:00:00 2001 From: belthlemar Date: Fri, 5 Jul 2024 18:48:01 +0200 Subject: [PATCH 04/22] little fix --- antarest/study/storage/abstract_storage_service.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/antarest/study/storage/abstract_storage_service.py b/antarest/study/storage/abstract_storage_service.py index 22bab623d4..08ee83287b 100644 --- a/antarest/study/storage/abstract_storage_service.py +++ b/antarest/study/storage/abstract_storage_service.py @@ -142,11 +142,11 @@ def get(self, metadata: T, url: str = "", depth: int = 3, format: str = "", use_ logger.info(f"Raw Study {metadata.id} read from cache") data = from_cache else: - data = study.tree.get(parts, depth=depth) + data = study.tree.get(parts, depth=depth, format=format) self.cache.put(cache_id, data) logger.info(f"Cache new entry from RawStudyService (studyID: {metadata.id})") else: - data = study.tree.get(parts, depth=depth) + data = study.tree.get(parts, depth=depth, format=format) del study return data From 40c22cf47e557029ebcf642a65f2e7f82617be6e Mon Sep 17 00:00:00 2001 From: belthlemar Date: Fri, 5 Jul 2024 18:58:11 +0200 Subject: [PATCH 05/22] make json a default value --- antarest/study/common/studystorage.py | 2 +- antarest/study/storage/abstract_storage_service.py | 2 +- .../storage/rawstudy/model/filesystem/folder_node.py | 8 ++++---- .../storage/rawstudy/model/filesystem/ini_file_node.py | 2 +- antarest/study/storage/rawstudy/model/filesystem/inode.py | 2 +- .../study/storage/rawstudy/model/filesystem/lazy_node.py | 6 +++--- .../storage/rawstudy/model/filesystem/matrix/matrix.py | 2 +- .../model/filesystem/matrix/output_series_matrix.py | 2 +- .../storage/rawstudy/model/filesystem/raw_file_node.py | 4 +++- .../rawstudy/model/filesystem/root/input/areas/list.py | 2 +- .../root/output/simulation/ts_numbers/ts_numbers_data.py | 2 +- .../study/storage/variantstudy/variant_study_service.py | 2 +- tests/storage/repository/filesystem/utils.py | 4 +++- 13 files changed, 22 insertions(+), 18 deletions(-) diff --git a/antarest/study/common/studystorage.py b/antarest/study/common/studystorage.py index f3170e1be8..fc7c9bbdac 100644 --- a/antarest/study/common/studystorage.py +++ b/antarest/study/common/studystorage.py @@ -30,7 +30,7 @@ def get( metadata: T, url: str = "", depth: int = 3, - format: str = "", + format: str = "json", ) -> JSON: """ Entry point to fetch data inside study. diff --git a/antarest/study/storage/abstract_storage_service.py b/antarest/study/storage/abstract_storage_service.py index 08ee83287b..da386b4699 100644 --- a/antarest/study/storage/abstract_storage_service.py +++ b/antarest/study/storage/abstract_storage_service.py @@ -116,7 +116,7 @@ def get_study_information( tags=[tag.label for tag in study.tags], ) - def get(self, metadata: T, url: str = "", depth: int = 3, format: str = "", use_cache: bool = True) -> JSON: + def get(self, metadata: T, url: str = "", depth: int = 3, format: str = "json", use_cache: bool = True) -> JSON: """ Entry point to fetch data inside study. Args: diff --git a/antarest/study/storage/rawstudy/model/filesystem/folder_node.py b/antarest/study/storage/rawstudy/model/filesystem/folder_node.py index a16790a57f..449ea4b683 100644 --- a/antarest/study/storage/rawstudy/model/filesystem/folder_node.py +++ b/antarest/study/storage/rawstudy/model/filesystem/folder_node.py @@ -52,7 +52,7 @@ def _forward_get( self, url: t.List[str], depth: int = -1, - format: str = "", + format: str = "json", get_node: bool = False, ) -> t.Union[JSON, INode[JSON, SUB_JSON, JSON]]: children = self.build() @@ -75,7 +75,7 @@ def _forward_get( raise ValueError("Multiple nodes requested") def _expand_get( - self, depth: int = -1, format: str = "", get_node: bool = False + self, depth: int = -1, format: str = "json", get_node: bool = False ) -> t.Union[JSON, INode[JSON, SUB_JSON, JSON]]: if get_node: return self @@ -93,7 +93,7 @@ def _get( self, url: t.Optional[t.List[str]] = None, depth: int = -1, - format: str = "", + format: str = "json", get_node: bool = False, ) -> t.Union[JSON, INode[JSON, SUB_JSON, JSON]]: if url and url != [""]: @@ -102,7 +102,7 @@ def _get( return self._expand_get(depth, format, get_node) def get( - self, url: t.Optional[t.List[str]] = None, depth: int = -1, expanded: bool = False, format: str = "" + self, url: t.Optional[t.List[str]] = None, depth: int = -1, expanded: bool = False, format: str = "json" ) -> JSON: output = self._get(url=url, depth=depth, format=format, get_node=False) assert not isinstance(output, INode) diff --git a/antarest/study/storage/rawstudy/model/filesystem/ini_file_node.py b/antarest/study/storage/rawstudy/model/filesystem/ini_file_node.py index 949ec5a9b6..e4a0d586af 100644 --- a/antarest/study/storage/rawstudy/model/filesystem/ini_file_node.py +++ b/antarest/study/storage/rawstudy/model/filesystem/ini_file_node.py @@ -133,7 +133,7 @@ def _get_filtering_kwargs(self, url: t.List[str]) -> t.Dict[str, str]: return {} def get( - self, url: t.Optional[t.List[str]] = None, depth: int = -1, expanded: bool = False, format: str = "" + self, url: t.Optional[t.List[str]] = None, depth: int = -1, expanded: bool = False, format: str = "json" ) -> SUB_JSON: output = self._get(url, depth, expanded, get_node=False) assert not isinstance(output, INode) diff --git a/antarest/study/storage/rawstudy/model/filesystem/inode.py b/antarest/study/storage/rawstudy/model/filesystem/inode.py index 10d6c44237..14c748b401 100644 --- a/antarest/study/storage/rawstudy/model/filesystem/inode.py +++ b/antarest/study/storage/rawstudy/model/filesystem/inode.py @@ -25,7 +25,7 @@ def get( url: Optional[List[str]] = None, depth: int = -1, expanded: bool = False, - format: str = "", + format: str = "json", ) -> G: """ Ask data inside tree. diff --git a/antarest/study/storage/rawstudy/model/filesystem/lazy_node.py b/antarest/study/storage/rawstudy/model/filesystem/lazy_node.py index 87b1253fa3..4c3f156368 100644 --- a/antarest/study/storage/rawstudy/model/filesystem/lazy_node.py +++ b/antarest/study/storage/rawstudy/model/filesystem/lazy_node.py @@ -61,7 +61,7 @@ def _get( url: Optional[List[str]] = None, depth: int = -1, expanded: bool = False, - format: str = "", + format: str = "json", get_node: bool = False, ) -> Union[Union[str, G], INode[G, S, V]]: self._assert_url_end(url) @@ -82,7 +82,7 @@ def _get( return self.load(url, depth, expanded, format) def get( - self, url: Optional[List[str]] = None, depth: int = -1, expanded: bool = False, format: str = "" + self, url: Optional[List[str]] = None, depth: int = -1, expanded: bool = False, format: str = "json" ) -> Union[str, G]: output = self._get(url, depth, expanded, format, get_node=False) assert not isinstance(output, INode) @@ -136,7 +136,7 @@ def load( url: Optional[List[str]] = None, depth: int = -1, expanded: bool = False, - format: str = "", + format: str = "json", ) -> G: """ Fetch data on disk. diff --git a/antarest/study/storage/rawstudy/model/filesystem/matrix/matrix.py b/antarest/study/storage/rawstudy/model/filesystem/matrix/matrix.py index 5b5008f224..18d41422df 100644 --- a/antarest/study/storage/rawstudy/model/filesystem/matrix/matrix.py +++ b/antarest/study/storage/rawstudy/model/filesystem/matrix/matrix.py @@ -90,7 +90,7 @@ def denormalize(self) -> None: self.get_link_path().unlink() def load( - self, url: Optional[List[str]] = None, depth: int = -1, expanded: bool = False, format: str = "" + self, url: Optional[List[str]] = None, depth: int = -1, expanded: bool = False, format: str = "json" ) -> Union[bytes, JSON]: file_path, tmp_dir = self._get_real_file_path() if format == "bytes": diff --git a/antarest/study/storage/rawstudy/model/filesystem/matrix/output_series_matrix.py b/antarest/study/storage/rawstudy/model/filesystem/matrix/output_series_matrix.py index 050974761d..43d412a68f 100644 --- a/antarest/study/storage/rawstudy/model/filesystem/matrix/output_series_matrix.py +++ b/antarest/study/storage/rawstudy/model/filesystem/matrix/output_series_matrix.py @@ -109,7 +109,7 @@ def check_errors( return errors def load( - self, url: Optional[List[str]] = None, depth: int = -1, expanded: bool = False, format: str = "" + self, url: Optional[List[str]] = None, depth: int = -1, expanded: bool = False, format: str = "json" ) -> Union[bytes, JSON]: try: file_path, tmp_dir = self._get_real_file_path() diff --git a/antarest/study/storage/rawstudy/model/filesystem/raw_file_node.py b/antarest/study/storage/rawstudy/model/filesystem/raw_file_node.py index 0056929f82..7207949898 100644 --- a/antarest/study/storage/rawstudy/model/filesystem/raw_file_node.py +++ b/antarest/study/storage/rawstudy/model/filesystem/raw_file_node.py @@ -24,7 +24,9 @@ def get_lazy_content( ) -> str: return f"file://{self.config.path.name}" - def load(self, url: Optional[List[str]] = None, depth: int = -1, expanded: bool = False, format: str = "") -> bytes: + def load( + self, url: Optional[List[str]] = None, depth: int = -1, expanded: bool = False, format: str = "json" + ) -> bytes: file_path, tmp_dir = self._get_real_file_path() if file_path.exists(): diff --git a/antarest/study/storage/rawstudy/model/filesystem/root/input/areas/list.py b/antarest/study/storage/rawstudy/model/filesystem/root/input/areas/list.py index 90c7df8027..f67c287c16 100644 --- a/antarest/study/storage/rawstudy/model/filesystem/root/input/areas/list.py +++ b/antarest/study/storage/rawstudy/model/filesystem/root/input/areas/list.py @@ -20,7 +20,7 @@ def get_node(self, url: Optional[List[str]] = None) -> INode[List[str], List[str return self def get( - self, url: Optional[List[str]] = None, depth: int = -1, expanded: bool = False, format: str = "" + self, url: Optional[List[str]] = None, depth: int = -1, expanded: bool = False, format: str = "json" ) -> List[str]: if self.config.zip_path: path, tmp_dir = self._extract_file_to_tmp_dir() diff --git a/antarest/study/storage/rawstudy/model/filesystem/root/output/simulation/ts_numbers/ts_numbers_data.py b/antarest/study/storage/rawstudy/model/filesystem/root/output/simulation/ts_numbers/ts_numbers_data.py index 910f8126ff..41f15ce975 100644 --- a/antarest/study/storage/rawstudy/model/filesystem/root/output/simulation/ts_numbers/ts_numbers_data.py +++ b/antarest/study/storage/rawstudy/model/filesystem/root/output/simulation/ts_numbers/ts_numbers_data.py @@ -9,7 +9,7 @@ class TsNumbersVector(LazyNode[List[int], List[int], JSON]): def load( - self, url: Optional[List[str]] = None, depth: int = -1, expanded: bool = False, format: str = "" + self, url: Optional[List[str]] = None, depth: int = -1, expanded: bool = False, format: str = "json" ) -> List[int]: file_path, tmp_dir = self._get_real_file_path() diff --git a/antarest/study/storage/variantstudy/variant_study_service.py b/antarest/study/storage/variantstudy/variant_study_service.py index 62f2c54e95..ddb53be8e0 100644 --- a/antarest/study/storage/variantstudy/variant_study_service.py +++ b/antarest/study/storage/variantstudy/variant_study_service.py @@ -464,7 +464,7 @@ def _get_variants_parents(self, id: str, params: RequestParameters) -> t.List[St return output_list def get( - self, metadata: VariantStudy, url: str = "", depth: int = 3, format: str = "", use_cache: bool = True + self, metadata: VariantStudy, url: str = "", depth: int = 3, format: str = "json", use_cache: bool = True ) -> JSON: """ Entry point to fetch data inside study. diff --git a/tests/storage/repository/filesystem/utils.py b/tests/storage/repository/filesystem/utils.py index c46a6997c4..5fdd991195 100644 --- a/tests/storage/repository/filesystem/utils.py +++ b/tests/storage/repository/filesystem/utils.py @@ -28,7 +28,9 @@ def get_node( ) -> INode[int, int, int]: return self - def get(self, url: Optional[List[str]] = None, depth: int = -1, expanded: bool = False, format: str = "") -> int: + def get( + self, url: Optional[List[str]] = None, depth: int = -1, expanded: bool = False, format: str = "json" + ) -> int: return self.value def save(self, data: int, url: Optional[List[str]] = None) -> None: From 6f0824e4b9a7f3b4c0946d613be57297f20cbbc3 Mon Sep 17 00:00:00 2001 From: belthlemar Date: Fri, 5 Jul 2024 19:12:05 +0200 Subject: [PATCH 06/22] fix some tests --- tests/storage/business/test_raw_study_service.py | 2 +- tests/storage/business/test_variant_study_service.py | 2 +- tests/storage/integration/test_STA_mini.py | 4 ++-- tests/storage/integration/test_write_STA_mini.py | 2 +- tests/storage/web/test_studies_bp.py | 8 +++----- 5 files changed, 8 insertions(+), 10 deletions(-) diff --git a/tests/storage/business/test_raw_study_service.py b/tests/storage/business/test_raw_study_service.py index 2b5c4b2dfd..12914c52d5 100644 --- a/tests/storage/business/test_raw_study_service.py +++ b/tests/storage/business/test_raw_study_service.py @@ -75,7 +75,7 @@ def test_get(tmp_path: str, project_path) -> None: assert output == data - study.get.assert_called_once_with(["settings"], depth=2, formatted=True) + study.get.assert_called_once_with(["settings"], depth=2, format="json") @pytest.mark.unit_test diff --git a/tests/storage/business/test_variant_study_service.py b/tests/storage/business/test_variant_study_service.py index 7c6e00f99c..53182ce3ee 100644 --- a/tests/storage/business/test_variant_study_service.py +++ b/tests/storage/business/test_variant_study_service.py @@ -101,7 +101,7 @@ def task_status(*args): assert output == data - study.get.assert_called_once_with(["settings"], depth=2, formatted=True) + study.get.assert_called_once_with(["settings"], depth=2, format="json") @pytest.mark.unit_test diff --git a/tests/storage/integration/test_STA_mini.py b/tests/storage/integration/test_STA_mini.py index 6f0d830ccd..b5a9bce0b8 100644 --- a/tests/storage/integration/test_STA_mini.py +++ b/tests/storage/integration/test_STA_mini.py @@ -53,12 +53,12 @@ def assert_with_errors( storage_service: StudyService, url: str, expected_output: Union[str, dict], - formatted: bool = True, + format: str = "json", ) -> None: url = url[len("/v1/studies/") :] uuid, url = url.split("/raw?path=") params = RequestParameters(user=ADMIN) - output = storage_service.get(uuid=uuid, url=url, depth=3, formatted=formatted, params=params) + output = storage_service.get(uuid=uuid, url=url, depth=3, format=format, params=params) assert_study( output, expected_output, diff --git a/tests/storage/integration/test_write_STA_mini.py b/tests/storage/integration/test_write_STA_mini.py index 4ebb924753..c5cefee8bf 100644 --- a/tests/storage/integration/test_write_STA_mini.py +++ b/tests/storage/integration/test_write_STA_mini.py @@ -28,7 +28,7 @@ def assert_with_errors( res = storage_service.edit_study(uuid=uuid, url=url, new=new, params=params) assert res == new - res = storage_service.get(uuid=uuid, url=url, depth=-1, formatted=True, params=params) + res = storage_service.get(uuid=uuid, url=url, depth=-1, format="json", params=params) if expected is not None: assert res == expected else: diff --git a/tests/storage/web/test_studies_bp.py b/tests/storage/web/test_studies_bp.py index 05366331ca..11fbe8e38f 100644 --- a/tests/storage/web/test_studies_bp.py +++ b/tests/storage/web/test_studies_bp.py @@ -72,9 +72,7 @@ def test_server() -> None: client = TestClient(app) client.get("/v1/studies/study1/raw?path=settings/general/params") - mock_service.get.assert_called_once_with( - "study1", "settings/general/params", depth=3, formatted=True, params=PARAMS - ) + mock_service.get.assert_called_once_with("study1", "settings/general/params", depth=3, format="json", params=PARAMS) @pytest.mark.unit_test @@ -123,7 +121,7 @@ def test_server_with_parameters() -> None: parameters = RequestParameters(user=ADMIN) assert result.status_code == HTTPStatus.OK - mock_storage_service.get.assert_called_once_with("study1", "/", depth=4, formatted=True, params=parameters) + mock_storage_service.get.assert_called_once_with("study1", "/", depth=4, format="json", params=parameters) result = client.get("/v1/studies/study2/raw?depth=WRONG_TYPE") assert result.status_code == HTTPStatus.UNPROCESSABLE_ENTITY @@ -132,7 +130,7 @@ def test_server_with_parameters() -> None: assert result.status_code == HTTPStatus.OK excepted_parameters = RequestParameters(user=ADMIN) - mock_storage_service.get.assert_called_with("study2", "/", depth=3, formatted=True, params=excepted_parameters) + mock_storage_service.get.assert_called_with("study2", "/", depth=3, format="json", params=excepted_parameters) @pytest.mark.unit_test From 3fcb7487bb4c117a36fd1ddc70b14ab5df03509d Mon Sep 17 00:00:00 2001 From: belthlemar Date: Mon, 8 Jul 2024 09:42:04 +0200 Subject: [PATCH 07/22] fix last tests --- antarest/study/storage/abstract_storage_service.py | 2 +- antarest/study/storage/variantstudy/variant_study_service.py | 2 +- tests/storage/repository/filesystem/test_lazy_node.py | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/antarest/study/storage/abstract_storage_service.py b/antarest/study/storage/abstract_storage_service.py index da386b4699..cdd41d641a 100644 --- a/antarest/study/storage/abstract_storage_service.py +++ b/antarest/study/storage/abstract_storage_service.py @@ -243,7 +243,7 @@ def import_output( extension = ".zip" if is_zipped else "" path_output = path_output.rename(Path(path_output.parent, output_full_name + extension)) - data = self.get(metadata, f"output/{output_full_name}", 1) + data = self.get(metadata, f"output/{output_full_name}", 1, use_cache=False) if data is None: self.delete_output(metadata, "imported_output") diff --git a/antarest/study/storage/variantstudy/variant_study_service.py b/antarest/study/storage/variantstudy/variant_study_service.py index ddb53be8e0..6a72568638 100644 --- a/antarest/study/storage/variantstudy/variant_study_service.py +++ b/antarest/study/storage/variantstudy/variant_study_service.py @@ -479,7 +479,7 @@ def get( """ self._safe_generation(metadata, timeout=60) self.repository.refresh(metadata) - return super().get(metadata=metadata, url=url, depth=depth) + return super().get(metadata=metadata, url=url, depth=depth, format=format, use_cache=use_cache) def create_variant_study(self, uuid: str, name: str, params: RequestParameters) -> VariantStudy: """ diff --git a/tests/storage/repository/filesystem/test_lazy_node.py b/tests/storage/repository/filesystem/test_lazy_node.py index b82912fee1..8474598a52 100644 --- a/tests/storage/repository/filesystem/test_lazy_node.py +++ b/tests/storage/repository/filesystem/test_lazy_node.py @@ -61,7 +61,7 @@ def test_get_no_expanded_link(tmp_path: Path): config=config, ) assert "Mock Matrix Content" == node.get(expanded=False) - resolver.resolve.assert_called_once_with(uri, True) + resolver.resolve.assert_called_once_with(uri, "json") def test_get_expanded_txt(tmp_path: Path): From 3a3308ae7695e804e53e6ea2bc1210838f943cf6 Mon Sep 17 00:00:00 2001 From: belthlemar Date: Mon, 8 Jul 2024 15:31:11 +0200 Subject: [PATCH 08/22] add pyarrow to requirements --- antarest/matrixstore/uri_resolver_service.py | 17 +++++++++------ .../filesystem/matrix/input_series_matrix.py | 17 +++++++++++---- .../model/filesystem/matrix/matrix.py | 14 +++++++++---- .../filesystem/matrix/output_series_matrix.py | 21 +++++++++++-------- antarest/study/web/raw_studies_blueprint.py | 12 +++++++---- requirements.txt | 1 + .../test_fetch_raw_data.py | 1 + .../filesystem/matrix/test_matrix_node.py | 1 + 8 files changed, 57 insertions(+), 27 deletions(-) diff --git a/antarest/matrixstore/uri_resolver_service.py b/antarest/matrixstore/uri_resolver_service.py index 6a86aa551b..fdca820a46 100644 --- a/antarest/matrixstore/uri_resolver_service.py +++ b/antarest/matrixstore/uri_resolver_service.py @@ -1,9 +1,9 @@ +import io import re -from typing import Optional, Tuple +import typing as t import pandas as pd -from antarest.core.model import SUB_JSON from antarest.matrixstore.service import ISimpleMatrixService @@ -11,7 +11,7 @@ class UriResolverService: def __init__(self, matrix_service: ISimpleMatrixService): self.matrix_service = matrix_service - def resolve(self, uri: str, format: str = "json") -> SUB_JSON: + def resolve(self, uri: str, format: str = "json") -> t.Union[io.BytesIO, str, t.Dict[str, t.Any], None]: res = UriResolverService._extract_uri_components(uri) if res: protocol, uuid = res @@ -23,7 +23,7 @@ def resolve(self, uri: str, format: str = "json") -> SUB_JSON: raise NotImplementedError(f"protocol {protocol} not implemented") @staticmethod - def _extract_uri_components(uri: str) -> Optional[Tuple[str, str]]: + def _extract_uri_components(uri: str) -> t.Optional[t.Tuple[str, str]]: match = re.match(r"^(\w+)://(.+)$", uri) if not match: return None @@ -33,11 +33,11 @@ def _extract_uri_components(uri: str) -> Optional[Tuple[str, str]]: return protocol, uuid @staticmethod - def extract_id(uri: str) -> Optional[str]: + def extract_id(uri: str) -> t.Optional[str]: res = UriResolverService._extract_uri_components(uri) return res[1] if res else None - def _resolve_matrix(self, id: str, format: str) -> SUB_JSON: + def _resolve_matrix(self, id: str, format: str) -> t.Union[io.BytesIO, str, t.Dict[str, t.Any]]: data = self.matrix_service.get(id) if data: if format == "json": @@ -54,6 +54,11 @@ def _resolve_matrix(self, id: str, format: str) -> SUB_JSON: ) if df.empty: return "" + elif format == "arrow": + buffer = io.BytesIO() + df.columns = df.columns.map(str) + df.to_feather(buffer, compression="uncompressed") + return buffer else: csv = df.to_csv( None, diff --git a/antarest/study/storage/rawstudy/model/filesystem/matrix/input_series_matrix.py b/antarest/study/storage/rawstudy/model/filesystem/matrix/input_series_matrix.py index 4cda0b4027..2d9e8be844 100644 --- a/antarest/study/storage/rawstudy/model/filesystem/matrix/input_series_matrix.py +++ b/antarest/study/storage/rawstudy/model/filesystem/matrix/input_series_matrix.py @@ -1,3 +1,4 @@ +import io import logging from pathlib import Path from typing import Any, List, Optional, Union, cast @@ -45,7 +46,8 @@ def parse( file_path: Optional[Path] = None, tmp_dir: Any = None, return_dataframe: bool = False, - ) -> Union[JSON, pd.DataFrame]: + format: str = "json", + ) -> Union[JSON, io.BytesIO, pd.DataFrame]: file_path = file_path or self.config.path try: # sourcery skip: extract-method @@ -73,14 +75,21 @@ def parse( raise ChildNotFoundError(f"File '{relpath}' not found in the study '{study_id}'") from e stopwatch.log_elapsed(lambda x: logger.info(f"Matrix parsed in {x}s")) - matrix.dropna(how="any", axis=1, inplace=True) + matrix = matrix.dropna(how="any", axis=1) if return_dataframe: return matrix - data = cast(JSON, matrix.to_dict(orient="split")) - stopwatch.log_elapsed(lambda x: logger.info(f"Matrix to dict in {x}s")) + if format == "json": + matrix_json = cast(JSON, matrix.to_dict(orient="split")) + stopwatch.log_elapsed(lambda x: logger.info(f"Matrix to dict in {x}s")) + return matrix_json + data = io.BytesIO() + matrix.columns = matrix.columns.map(str) + matrix.to_feather(data, compression="uncompressed") + stopwatch.log_elapsed(lambda x: logger.info(f"Matrix to arrow in {x}s")) return data + except EmptyDataError: logger.warning(f"Empty file found when parsing {file_path}") matrix = pd.DataFrame(self.default_empty) diff --git a/antarest/study/storage/rawstudy/model/filesystem/matrix/matrix.py b/antarest/study/storage/rawstudy/model/filesystem/matrix/matrix.py index 18d41422df..d7a7715b1d 100644 --- a/antarest/study/storage/rawstudy/model/filesystem/matrix/matrix.py +++ b/antarest/study/storage/rawstudy/model/filesystem/matrix/matrix.py @@ -1,3 +1,4 @@ +import io import logging from abc import ABC, abstractmethod from enum import Enum @@ -63,6 +64,7 @@ def normalize(self) -> None: return matrix = self.parse() + assert isinstance(matrix, dict) if "data" in matrix: data = cast(List[List[float]], matrix["data"]) @@ -89,9 +91,9 @@ def denormalize(self) -> None: self.dump(matrix) self.get_link_path().unlink() - def load( + def load( # type: ignore self, url: Optional[List[str]] = None, depth: int = -1, expanded: bool = False, format: str = "json" - ) -> Union[bytes, JSON]: + ) -> Union[bytes, JSON, io.BytesIO, pd.DataFrame]: file_path, tmp_dir = self._get_real_file_path() if format == "bytes": if file_path.exists(): @@ -102,7 +104,10 @@ def load( tmp_dir.cleanup() return b"" - return cast(JSON, self.parse(file_path, tmp_dir)) + result = self.parse(file_path, tmp_dir, False, format) + if format == "json": + return cast(JSON, result) + return result @abstractmethod def parse( @@ -110,7 +115,8 @@ def parse( file_path: Optional[Path] = None, tmp_dir: Any = None, return_dataframe: bool = False, - ) -> Union[JSON, pd.DataFrame]: + format: str = "json", + ) -> Union[JSON, io.BytesIO, pd.DataFrame]: """ Parse the matrix content """ diff --git a/antarest/study/storage/rawstudy/model/filesystem/matrix/output_series_matrix.py b/antarest/study/storage/rawstudy/model/filesystem/matrix/output_series_matrix.py index 43d412a68f..f9af888b2e 100644 --- a/antarest/study/storage/rawstudy/model/filesystem/matrix/output_series_matrix.py +++ b/antarest/study/storage/rawstudy/model/filesystem/matrix/output_series_matrix.py @@ -1,3 +1,4 @@ +import io import logging from pathlib import Path from typing import Any, List, Optional, Union, cast @@ -87,13 +88,15 @@ def parse_dataframe( matrix.columns = body.columns return matrix - def parse( - self, - file_path: Optional[Path] = None, - tmp_dir: Any = None, - ) -> JSON: + def parse(self, file_path: Path, tmp_dir: Any, format: str) -> Union[JSON, io.BytesIO]: matrix = self.parse_dataframe(file_path, tmp_dir) - return cast(JSON, matrix.to_dict(orient="split")) + if format == "json": + return cast(JSON, matrix.to_dict(orient="split")) + else: + buffer = io.BytesIO() + matrix.columns = matrix.columns.map(str) + matrix.to_feather(buffer, compression="uncompressed") + return buffer def check_errors( self, @@ -108,9 +111,9 @@ def check_errors( errors.append(f"Output Series Matrix f{self.config.path} not exists") return errors - def load( + def load( # type: ignore self, url: Optional[List[str]] = None, depth: int = -1, expanded: bool = False, format: str = "json" - ) -> Union[bytes, JSON]: + ) -> Union[bytes, JSON, io.BytesIO]: try: file_path, tmp_dir = self._get_real_file_path() if format == "bytes": @@ -127,7 +130,7 @@ def load( if not file_path.exists(): raise FileNotFoundError(file_path) - return self.parse(file_path, tmp_dir) + return self.parse(file_path, tmp_dir, format) except FileNotFoundError as e: raise ChildNotFoundError( f"Output file '{self.config.path.name}' not found in study {self.config.study_id}" diff --git a/antarest/study/web/raw_studies_blueprint.py b/antarest/study/web/raw_studies_blueprint.py index 07d90559cf..501df7dab5 100644 --- a/antarest/study/web/raw_studies_blueprint.py +++ b/antarest/study/web/raw_studies_blueprint.py @@ -61,7 +61,7 @@ } -class MATRIX_FORMAT(EnumIgnoreCase): +class MatrixFormat(EnumIgnoreCase): JSON = "json" BYTES = "bytes" ARROW = "arrow" @@ -105,9 +105,9 @@ def get_study( path: str = Param("/", examples=get_path_examples()), # type: ignore depth: int = 3, formatted: bool = True, - format: t.Optional[MATRIX_FORMAT] = None, + format: t.Optional[MatrixFormat] = None, current_user: JWTUser = Depends(auth.get_current_user), - ) -> t.Any: + ) -> Response: """ Fetches raw data from a study, and returns the data in different formats based on the file type, or as a JSON response. @@ -116,7 +116,8 @@ def get_study( - `uuid`: The UUID of the study. - `path`: The path to the data to fetch. - `depth`: The depth of the data to retrieve. - - `formatted`: A flag specifying whether the data should be returned in a formatted manner. + - `formatted`: Deprecated flag, use `format` instead. + - `format`: The format you want your file to be displayed in. Arrow format is only supported by matrix files. Returns the fetched data: a JSON object (in most cases), a plain text file or a file attachment (Microsoft Office document, TSV/TSV file...). @@ -178,6 +179,9 @@ def get_study( # because it's better to avoid raising an exception. return Response(content=output, media_type="application/octet-stream") + if isinstance(output, io.BytesIO): + return Response(content=output.read(), media_type="application/octet-stream") + # We want to allow `NaN`, `+Infinity`, and `-Infinity` values in the JSON response # even though they are not standard JSON values because they are supported in JavaScript. # Additionally, we cannot use `orjson` because, despite its superior performance, it converts diff --git a/requirements.txt b/requirements.txt index 5a543c02fc..7c805e6c83 100644 --- a/requirements.txt +++ b/requirements.txt @@ -17,6 +17,7 @@ pandas~=1.4.0 paramiko~=2.12.0 plyer~=2.0.0 psycopg2-binary==2.9.4 +pyarrow~=16.1.0 py7zr~=0.20.6 pydantic~=1.9.0 PyQt5~=5.15.6 diff --git a/tests/integration/raw_studies_blueprint/test_fetch_raw_data.py b/tests/integration/raw_studies_blueprint/test_fetch_raw_data.py index e55929c97a..d1ef1d5814 100644 --- a/tests/integration/raw_studies_blueprint/test_fetch_raw_data.py +++ b/tests/integration/raw_studies_blueprint/test_fetch_raw_data.py @@ -7,6 +7,7 @@ from unittest.mock import ANY import numpy as np +import pandas as pd import pytest from starlette.testclient import TestClient diff --git a/tests/storage/repository/filesystem/matrix/test_matrix_node.py b/tests/storage/repository/filesystem/matrix/test_matrix_node.py index 6858a5ff0f..e0434bf72c 100644 --- a/tests/storage/repository/filesystem/matrix/test_matrix_node.py +++ b/tests/storage/repository/filesystem/matrix/test_matrix_node.py @@ -32,6 +32,7 @@ def parse( file_path: Optional[Path] = None, tmp_dir: Optional[TemporaryDirectory] = None, return_dataframe: bool = False, + format: str = "json", ) -> JSON: return MOCK_MATRIX_JSON From 2cb0d7e45d991a5d9e7bbfa7e19eacb00ca0d277 Mon Sep 17 00:00:00 2001 From: belthlemar Date: Mon, 8 Jul 2024 17:36:28 +0200 Subject: [PATCH 09/22] last change --- antarest/matrixstore/uri_resolver_service.py | 14 ++++++++------ antarest/study/web/raw_studies_blueprint.py | 6 +++--- .../raw_studies_blueprint/test_fetch_raw_data.py | 1 - 3 files changed, 11 insertions(+), 10 deletions(-) diff --git a/antarest/matrixstore/uri_resolver_service.py b/antarest/matrixstore/uri_resolver_service.py index fdca820a46..2197dfd15d 100644 --- a/antarest/matrixstore/uri_resolver_service.py +++ b/antarest/matrixstore/uri_resolver_service.py @@ -3,6 +3,7 @@ import typing as t import pandas as pd +from starlette.responses import Response from antarest.matrixstore.service import ISimpleMatrixService @@ -11,7 +12,7 @@ class UriResolverService: def __init__(self, matrix_service: ISimpleMatrixService): self.matrix_service = matrix_service - def resolve(self, uri: str, format: str = "json") -> t.Union[io.BytesIO, str, t.Dict[str, t.Any], None]: + def resolve(self, uri: str, format: str = "json") -> t.Union[bytes, str, t.Dict[str, t.Any], None]: res = UriResolverService._extract_uri_components(uri) if res: protocol, uuid = res @@ -37,7 +38,7 @@ def extract_id(uri: str) -> t.Optional[str]: res = UriResolverService._extract_uri_components(uri) return res[1] if res else None - def _resolve_matrix(self, id: str, format: str) -> t.Union[io.BytesIO, str, t.Dict[str, t.Any]]: + def _resolve_matrix(self, id: str, format: str) -> t.Union[bytes, str, t.Dict[str, t.Any]]: data = self.matrix_service.get(id) if data: if format == "json": @@ -55,10 +56,11 @@ def _resolve_matrix(self, id: str, format: str) -> t.Union[io.BytesIO, str, t.Di if df.empty: return "" elif format == "arrow": - buffer = io.BytesIO() - df.columns = df.columns.map(str) - df.to_feather(buffer, compression="uncompressed") - return buffer + with io.BytesIO() as buffer: + df.columns = df.columns.map(str) + df.to_feather(buffer, compression="uncompressed") + return buffer.getvalue() + else: csv = df.to_csv( None, diff --git a/antarest/study/web/raw_studies_blueprint.py b/antarest/study/web/raw_studies_blueprint.py index 501df7dab5..7b74a2c1fc 100644 --- a/antarest/study/web/raw_studies_blueprint.py +++ b/antarest/study/web/raw_studies_blueprint.py @@ -136,6 +136,9 @@ def get_study( output = study_service.get(uuid, path, depth=depth, format=real_format, params=parameters) if isinstance(output, bytes): + if real_format == "arrow": + return Response(content=output, media_type="application/octet-stream") + # Guess the suffix form the target data resource_path = PurePosixPath(path) parent_cfg = study_service.get( @@ -179,9 +182,6 @@ def get_study( # because it's better to avoid raising an exception. return Response(content=output, media_type="application/octet-stream") - if isinstance(output, io.BytesIO): - return Response(content=output.read(), media_type="application/octet-stream") - # We want to allow `NaN`, `+Infinity`, and `-Infinity` values in the JSON response # even though they are not standard JSON values because they are supported in JavaScript. # Additionally, we cannot use `orjson` because, despite its superior performance, it converts diff --git a/tests/integration/raw_studies_blueprint/test_fetch_raw_data.py b/tests/integration/raw_studies_blueprint/test_fetch_raw_data.py index d1ef1d5814..e55929c97a 100644 --- a/tests/integration/raw_studies_blueprint/test_fetch_raw_data.py +++ b/tests/integration/raw_studies_blueprint/test_fetch_raw_data.py @@ -7,7 +7,6 @@ from unittest.mock import ANY import numpy as np -import pandas as pd import pytest from starlette.testclient import TestClient From 6d411cdea6b30bebd271198e1e5d34ebe7b5ba58 Mon Sep 17 00:00:00 2001 From: belthlemar Date: Mon, 8 Jul 2024 19:03:01 +0200 Subject: [PATCH 10/22] remove useless import --- antarest/matrixstore/uri_resolver_service.py | 1 - 1 file changed, 1 deletion(-) diff --git a/antarest/matrixstore/uri_resolver_service.py b/antarest/matrixstore/uri_resolver_service.py index 2197dfd15d..0cc0286b96 100644 --- a/antarest/matrixstore/uri_resolver_service.py +++ b/antarest/matrixstore/uri_resolver_service.py @@ -3,7 +3,6 @@ import typing as t import pandas as pd -from starlette.responses import Response from antarest.matrixstore.service import ISimpleMatrixService From e1517508cefd7e00dec2adbd44eedd64b3cd0306 Mon Sep 17 00:00:00 2001 From: belthlemar Date: Tue, 9 Jul 2024 11:50:49 +0200 Subject: [PATCH 11/22] add tests --- .../filesystem/matrix/input_series_matrix.py | 12 ++-- .../model/filesystem/matrix/matrix.py | 5 +- .../filesystem/matrix/output_series_matrix.py | 16 +++-- antarest/study/web/raw_studies_blueprint.py | 8 +-- .../test_fetch_raw_data.py | 64 +++++++++++++++++-- 5 files changed, 79 insertions(+), 26 deletions(-) diff --git a/antarest/study/storage/rawstudy/model/filesystem/matrix/input_series_matrix.py b/antarest/study/storage/rawstudy/model/filesystem/matrix/input_series_matrix.py index 2d9e8be844..88e137ac0b 100644 --- a/antarest/study/storage/rawstudy/model/filesystem/matrix/input_series_matrix.py +++ b/antarest/study/storage/rawstudy/model/filesystem/matrix/input_series_matrix.py @@ -47,7 +47,7 @@ def parse( tmp_dir: Any = None, return_dataframe: bool = False, format: str = "json", - ) -> Union[JSON, io.BytesIO, pd.DataFrame]: + ) -> Union[JSON, bytes, pd.DataFrame]: file_path = file_path or self.config.path try: # sourcery skip: extract-method @@ -84,11 +84,11 @@ def parse( stopwatch.log_elapsed(lambda x: logger.info(f"Matrix to dict in {x}s")) return matrix_json - data = io.BytesIO() - matrix.columns = matrix.columns.map(str) - matrix.to_feather(data, compression="uncompressed") - stopwatch.log_elapsed(lambda x: logger.info(f"Matrix to arrow in {x}s")) - return data + with io.BytesIO() as buffer: + matrix.columns = matrix.columns.map(str) + matrix.to_feather(buffer, compression="uncompressed") + stopwatch.log_elapsed(lambda x: logger.info(f"Matrix to arrow in {x}s")) + return buffer.getvalue() except EmptyDataError: logger.warning(f"Empty file found when parsing {file_path}") diff --git a/antarest/study/storage/rawstudy/model/filesystem/matrix/matrix.py b/antarest/study/storage/rawstudy/model/filesystem/matrix/matrix.py index d7a7715b1d..bde09db17b 100644 --- a/antarest/study/storage/rawstudy/model/filesystem/matrix/matrix.py +++ b/antarest/study/storage/rawstudy/model/filesystem/matrix/matrix.py @@ -1,4 +1,3 @@ -import io import logging from abc import ABC, abstractmethod from enum import Enum @@ -93,7 +92,7 @@ def denormalize(self) -> None: def load( # type: ignore self, url: Optional[List[str]] = None, depth: int = -1, expanded: bool = False, format: str = "json" - ) -> Union[bytes, JSON, io.BytesIO, pd.DataFrame]: + ) -> Union[bytes, JSON, pd.DataFrame]: file_path, tmp_dir = self._get_real_file_path() if format == "bytes": if file_path.exists(): @@ -116,7 +115,7 @@ def parse( tmp_dir: Any = None, return_dataframe: bool = False, format: str = "json", - ) -> Union[JSON, io.BytesIO, pd.DataFrame]: + ) -> Union[JSON, bytes, pd.DataFrame]: """ Parse the matrix content """ diff --git a/antarest/study/storage/rawstudy/model/filesystem/matrix/output_series_matrix.py b/antarest/study/storage/rawstudy/model/filesystem/matrix/output_series_matrix.py index f9af888b2e..65000baa11 100644 --- a/antarest/study/storage/rawstudy/model/filesystem/matrix/output_series_matrix.py +++ b/antarest/study/storage/rawstudy/model/filesystem/matrix/output_series_matrix.py @@ -88,15 +88,17 @@ def parse_dataframe( matrix.columns = body.columns return matrix - def parse(self, file_path: Path, tmp_dir: Any, format: str) -> Union[JSON, io.BytesIO]: + def parse(self, file_path: Path, tmp_dir: Any, format: str) -> Union[JSON, bytes]: matrix = self.parse_dataframe(file_path, tmp_dir) if format == "json": return cast(JSON, matrix.to_dict(orient="split")) else: - buffer = io.BytesIO() - matrix.columns = matrix.columns.map(str) - matrix.to_feather(buffer, compression="uncompressed") - return buffer + with io.BytesIO() as buffer: + matrix.columns = matrix.columns.map(str) + matrix.reset_index(inplace=True) + matrix.rename(columns={matrix.columns[0]: "Index"}, inplace=True) + matrix.to_feather(buffer, compression="uncompressed") + return buffer.getvalue() def check_errors( self, @@ -111,9 +113,9 @@ def check_errors( errors.append(f"Output Series Matrix f{self.config.path} not exists") return errors - def load( # type: ignore + def load( self, url: Optional[List[str]] = None, depth: int = -1, expanded: bool = False, format: str = "json" - ) -> Union[bytes, JSON, io.BytesIO]: + ) -> Union[bytes, JSON]: try: file_path, tmp_dir = self._get_real_file_path() if format == "bytes": diff --git a/antarest/study/web/raw_studies_blueprint.py b/antarest/study/web/raw_studies_blueprint.py index 7b74a2c1fc..a76b5a07e0 100644 --- a/antarest/study/web/raw_studies_blueprint.py +++ b/antarest/study/web/raw_studies_blueprint.py @@ -128,15 +128,15 @@ def get_study( ) parameters = RequestParameters(user=current_user) - if not format: - real_format = "json" if formatted else "bytes" - else: + if format: real_format = format.value + else: + real_format = "json" if formatted else "bytes" output = study_service.get(uuid, path, depth=depth, format=real_format, params=parameters) if isinstance(output, bytes): - if real_format == "arrow": + if real_format == MatrixFormat.ARROW: return Response(content=output, media_type="application/octet-stream") # Guess the suffix form the target data diff --git a/tests/integration/raw_studies_blueprint/test_fetch_raw_data.py b/tests/integration/raw_studies_blueprint/test_fetch_raw_data.py index e55929c97a..f19d426d30 100644 --- a/tests/integration/raw_studies_blueprint/test_fetch_raw_data.py +++ b/tests/integration/raw_studies_blueprint/test_fetch_raw_data.py @@ -7,6 +7,8 @@ from unittest.mock import ANY import numpy as np +import pyarrow as pa +import pyarrow.feather as feather import pytest from starlette.testclient import TestClient @@ -174,27 +176,77 @@ def test_get_study( # If we ask for a matrix, we should have a JSON content if formatted is True rel_path = "/input/links/de/fr" + expected_row = [100000, 100000, 0.01, 0.01, 0, 0, 0, 0] res = client.get( f"/v1/studies/{internal_study_id}/raw", params={"path": rel_path, "formatted": True}, headers=headers, ) assert res.status_code == 200, res.json() - actual = res.json() - assert actual == {"index": ANY, "columns": ANY, "data": ANY} + old_result = res.json() + assert old_result == {"index": ANY, "columns": ANY, "data": ANY} + assert old_result["data"][0] == expected_row + + # We should have the same result with new flag 'format' set to 'JSON' + res = client.get( + f"/v1/studies/{internal_study_id}/raw", + params={"path": rel_path, "format": "json"}, + headers=headers, + ) + assert res.status_code == 200, res.json() + new_result = res.json() + assert new_result == old_result # If we ask for a matrix, we should have a CSV content if formatted is False - rel_path = "/input/links/de/fr" res = client.get( f"/v1/studies/{internal_study_id}/raw", params={"path": rel_path, "formatted": False}, headers=headers, ) assert res.status_code == 200, res.json() - actual = res.text - actual_lines = actual.splitlines() + old_result = res.text + actual_lines = old_result.splitlines() first_row = [float(x) for x in actual_lines[0].split("\t")] - assert first_row == [100000, 100000, 0.01, 0.01, 0, 0, 0, 0] + assert first_row == expected_row + + # We should have the same result with new flag 'format' set to 'bytes' + res = client.get( + f"/v1/studies/{internal_study_id}/raw", + params={"path": rel_path, "format": "bytes"}, + headers=headers, + ) + assert res.status_code == 200, res.json() + new_result = res.text + assert new_result == old_result + + # If we ask for a matrix, we should have arrow binary if format = "arrow" + res = client.get( + f"/v1/studies/{internal_study_id}/raw", + params={"path": rel_path, "format": "arrow"}, + headers=headers, + ) + assert res.status_code == 200 + assert isinstance(res.content, bytes) + assert res.text.startswith("ARROW") + buffer = pa.BufferReader(res.content) + table = feather.read_table(buffer) + df = table.to_pandas() + assert list(df.loc[0]) == expected_row + + # Asserts output matrix (containing index and columns) can be retrieved with arrow + output_path = "/output/20201014-1422eco-hello/economy/mc-all/areas/de/id-daily" + res = client.get( + f"/v1/studies/{internal_study_id}/raw", + params={"path": output_path, "format": "arrow"}, + headers=headers, + ) + assert res.status_code == 200 + assert isinstance(res.content, bytes) + assert res.text.startswith("ARROW") + buffer = pa.BufferReader(res.content) + table = feather.read_table(buffer) + df = table.to_pandas() + assert df.columns[0] == "Index" # asserts the first columns corresponds to the index in such a case. # If ask for an empty matrix, we should have an empty binary content res = client.get( From 529de987a35cab0ec014c86f052c9e31c413dd09 Mon Sep 17 00:00:00 2001 From: belthlemar Date: Tue, 9 Jul 2024 18:02:26 +0200 Subject: [PATCH 12/22] add arrow support in put raw --- antarest/study/service.py | 14 ++- antarest/study/web/raw_studies_blueprint.py | 2 +- requirements-dev.txt | 1 + .../test_fetch_raw_data.py | 95 ++++--------------- 4 files changed, 34 insertions(+), 78 deletions(-) diff --git a/antarest/study/service.py b/antarest/study/service.py index f181d874eb..055d8df18f 100644 --- a/antarest/study/service.py +++ b/antarest/study/service.py @@ -14,6 +14,8 @@ import numpy as np import pandas as pd +import pyarrow as pa +import pyarrow.feather as feather from fastapi import HTTPException, UploadFile from markupsafe import escape from starlette.responses import FileResponse, Response @@ -1440,9 +1442,15 @@ def _create_edit_study_command( ) elif isinstance(tree_node, InputSeriesMatrix): if isinstance(data, bytes): - # noinspection PyTypeChecker - matrix = np.loadtxt(io.BytesIO(data), delimiter="\t", dtype=np.float64, ndmin=2) - matrix = matrix.reshape((1, 0)) if matrix.size == 0 else matrix + # checks if it corresponds to arrow format or if it's a classic file. + if data[:5].decode("utf-8") == "ARROW": + buffer = pa.BufferReader(data) # type: ignore + table = feather.read_table(buffer) + df = table.to_pandas() + matrix = df.to_numpy() + else: + matrix = np.loadtxt(io.BytesIO(data), delimiter="\t", dtype=np.float64, ndmin=2) + matrix = matrix.reshape((1, 0)) if matrix.size == 0 else matrix return ReplaceMatrix( target=url, matrix=matrix.tolist(), diff --git a/antarest/study/web/raw_studies_blueprint.py b/antarest/study/web/raw_studies_blueprint.py index a76b5a07e0..cb05a30b39 100644 --- a/antarest/study/web/raw_studies_blueprint.py +++ b/antarest/study/web/raw_studies_blueprint.py @@ -385,7 +385,7 @@ def replace_study_file( Parameters: - `uuid`: The UUID of the study. - `path`: The path to the data to update. Defaults to "/". - - `file`: The raw file to be posted (e.g. a CSV file opened in binary mode). + - `file`: The raw file to be posted (e.g. a CSV file opened in binary mode or a matrix in arrow format). - `create_missing`: Flag to indicate whether to create file or parent directories if missing. """ logger.info( diff --git a/requirements-dev.txt b/requirements-dev.txt index e7ff79c736..808596e882 100644 --- a/requirements-dev.txt +++ b/requirements-dev.txt @@ -11,6 +11,7 @@ pyinstaller-hooks-contrib==2024.6 # of the corresponding implementation libraries used in production (in `requirements.txt`). pandas-stubs~=1.4.0 +pyarrow-stubs~=10.0.1.7 types-psycopg2~=2.9.4 types-redis~=4.1.2 types-requests~=2.27.1 diff --git a/tests/integration/raw_studies_blueprint/test_fetch_raw_data.py b/tests/integration/raw_studies_blueprint/test_fetch_raw_data.py index f19d426d30..6cf82a8239 100644 --- a/tests/integration/raw_studies_blueprint/test_fetch_raw_data.py +++ b/tests/integration/raw_studies_blueprint/test_fetch_raw_data.py @@ -46,7 +46,7 @@ def test_get_study( with db(): study: RawStudy = db.session.get(Study, internal_study_id) study_dir = pathlib.Path(study.path) - headers = {"Authorization": f"Bearer {user_access_token}"} + client.headers = {"Authorization": f"Bearer {user_access_token}"} shutil.copytree( ASSETS_DIR.joinpath("user"), @@ -58,11 +58,7 @@ def test_get_study( user_folder_dir = study_dir.joinpath("user/folder") for file_path in user_folder_dir.glob("*.*"): rel_path = file_path.relative_to(study_dir).as_posix() - res = client.get( - f"/v1/studies/{internal_study_id}/raw", - params={"path": rel_path, "depth": 1}, - headers=headers, - ) + res = client.get(f"/v1/studies/{internal_study_id}/raw", params={"path": rel_path, "depth": 1}) assert res.status_code == 200, res.json() if file_path.suffix == ".json": # special case for JSON files @@ -85,9 +81,7 @@ def test_get_study( for file_path in user_folder_dir.glob("*.*"): rel_path = file_path.relative_to(study_dir) res = client.get( - f"/v1/studies/{internal_study_id}/raw", - params={"path": f"/{rel_path.as_posix()}", "depth": 1}, - headers=headers, + f"/v1/studies/{internal_study_id}/raw", params={"path": f"/{rel_path.as_posix()}", "depth": 1} ) assert res.status_code == 200, res.json() actual = res.content @@ -95,11 +89,7 @@ def test_get_study( assert actual == expected # If you try to retrieve a file that doesn't exist, we should have a 404 error - res = client.get( - f"/v1/studies/{internal_study_id}/raw", - params={"path": "user/somewhere/something.txt"}, - headers=headers, - ) + res = client.get(f"/v1/studies/{internal_study_id}/raw", params={"path": "user/somewhere/something.txt"}) assert res.status_code == 404, res.json() assert res.json() == { "description": "'somewhere' not a child of User", @@ -111,7 +101,6 @@ def test_get_study( res = client.put( f"/v1/studies/{internal_study_id}/raw", params={"path": "user/somewhere/something.txt"}, - headers=headers, files={"file": io.BytesIO(b"Goodbye World!")}, ) assert res.status_code == 404, res.json() @@ -125,7 +114,6 @@ def test_get_study( res = client.put( f"/v1/studies/{internal_study_id}/raw", params={"path": "user/somewhere/something.txt", "create_missing": True}, - headers=headers, files={"file": io.BytesIO(b"Goodbye Cruel World!")}, ) assert res.status_code == 204, res.json() @@ -135,27 +123,18 @@ def test_get_study( res = client.put( f"/v1/studies/{internal_study_id}/raw", params={"path": "user/somewhere/something.txt", "create_missing": True}, - headers=headers, files={"file": io.BytesIO(b"This is the end!")}, ) assert res.status_code == 204, res.json() # You can check that the resource has been created or updated. - res = client.get( - f"/v1/studies/{internal_study_id}/raw", - params={"path": "user/somewhere/something.txt"}, - headers=headers, - ) + res = client.get(f"/v1/studies/{internal_study_id}/raw", params={"path": "user/somewhere/something.txt"}) assert res.status_code == 200, res.json() assert res.content == b"This is the end!" # If we ask for properties, we should have a JSON content rel_path = "/input/links/de/properties/fr" - res = client.get( - f"/v1/studies/{internal_study_id}/raw", - params={"path": rel_path, "depth": 2}, - headers=headers, - ) + res = client.get(f"/v1/studies/{internal_study_id}/raw", params={"path": rel_path, "depth": 2}) assert res.status_code == 200, res.json() actual = res.json() assert actual == { @@ -177,32 +156,20 @@ def test_get_study( # If we ask for a matrix, we should have a JSON content if formatted is True rel_path = "/input/links/de/fr" expected_row = [100000, 100000, 0.01, 0.01, 0, 0, 0, 0] - res = client.get( - f"/v1/studies/{internal_study_id}/raw", - params={"path": rel_path, "formatted": True}, - headers=headers, - ) + res = client.get(f"/v1/studies/{internal_study_id}/raw", params={"path": rel_path, "formatted": True}) assert res.status_code == 200, res.json() old_result = res.json() assert old_result == {"index": ANY, "columns": ANY, "data": ANY} assert old_result["data"][0] == expected_row # We should have the same result with new flag 'format' set to 'JSON' - res = client.get( - f"/v1/studies/{internal_study_id}/raw", - params={"path": rel_path, "format": "json"}, - headers=headers, - ) + res = client.get(f"/v1/studies/{internal_study_id}/raw", params={"path": rel_path, "format": "json"}) assert res.status_code == 200, res.json() new_result = res.json() assert new_result == old_result # If we ask for a matrix, we should have a CSV content if formatted is False - res = client.get( - f"/v1/studies/{internal_study_id}/raw", - params={"path": rel_path, "formatted": False}, - headers=headers, - ) + res = client.get(f"/v1/studies/{internal_study_id}/raw", params={"path": rel_path, "formatted": False}) assert res.status_code == 200, res.json() old_result = res.text actual_lines = old_result.splitlines() @@ -210,36 +177,25 @@ def test_get_study( assert first_row == expected_row # We should have the same result with new flag 'format' set to 'bytes' - res = client.get( - f"/v1/studies/{internal_study_id}/raw", - params={"path": rel_path, "format": "bytes"}, - headers=headers, - ) + res = client.get(f"/v1/studies/{internal_study_id}/raw", params={"path": rel_path, "format": "bytes"}) assert res.status_code == 200, res.json() new_result = res.text assert new_result == old_result # If we ask for a matrix, we should have arrow binary if format = "arrow" - res = client.get( - f"/v1/studies/{internal_study_id}/raw", - params={"path": rel_path, "format": "arrow"}, - headers=headers, - ) + res = client.get(f"/v1/studies/{internal_study_id}/raw", params={"path": rel_path, "format": "arrow"}) assert res.status_code == 200 assert isinstance(res.content, bytes) assert res.text.startswith("ARROW") - buffer = pa.BufferReader(res.content) + arrow_bytes = res.content + buffer = pa.BufferReader(arrow_bytes) table = feather.read_table(buffer) df = table.to_pandas() assert list(df.loc[0]) == expected_row # Asserts output matrix (containing index and columns) can be retrieved with arrow output_path = "/output/20201014-1422eco-hello/economy/mc-all/areas/de/id-daily" - res = client.get( - f"/v1/studies/{internal_study_id}/raw", - params={"path": output_path, "format": "arrow"}, - headers=headers, - ) + res = client.get(f"/v1/studies/{internal_study_id}/raw", params={"path": output_path, "format": "arrow"}) assert res.status_code == 200 assert isinstance(res.content, bytes) assert res.text.startswith("ARROW") @@ -248,11 +204,14 @@ def test_get_study( df = table.to_pandas() assert df.columns[0] == "Index" # asserts the first columns corresponds to the index in such a case. + # Try to replace a matrix with a one in arrow format + res = client.put(f"/v1/studies/{internal_study_id}/raw", params={"path": rel_path}, files={"file": arrow_bytes}) + assert res.status_code in {201, 204} + # If ask for an empty matrix, we should have an empty binary content res = client.get( f"/v1/studies/{internal_study_id}/raw", params={"path": "input/thermal/prepro/de/01_solar/data", "formatted": False}, - headers=headers, ) assert res.status_code == 200, res.json() assert res.content == b"" @@ -261,7 +220,6 @@ def test_get_study( res = client.get( f"/v1/studies/{internal_study_id}/raw", params={"path": "input/thermal/prepro/de/01_solar/data", "formatted": True}, - headers=headers, ) assert res.status_code == 200, res.json() assert res.json() == {"index": [0], "columns": [], "data": []} @@ -271,19 +229,13 @@ def test_get_study( for file_path in user_folder_dir.glob("*.*"): rel_path = file_path.relative_to(study_dir) res = client.get( - f"/v1/studies/{internal_study_id}/raw", - params={"path": f"/{rel_path.as_posix()}", "depth": 1}, - headers=headers, + f"/v1/studies/{internal_study_id}/raw", params={"path": f"/{rel_path.as_posix()}", "depth": 1} ) assert res.status_code == http.HTTPStatus.UNPROCESSABLE_ENTITY # We can access to the configuration the classic way, # for instance, we can get the list of areas: - res = client.get( - f"/v1/studies/{internal_study_id}/raw", - params={"path": "/input/areas/list", "depth": 1}, - headers=headers, - ) + res = client.get(f"/v1/studies/{internal_study_id}/raw", params={"path": "/input/areas/list", "depth": 1}) assert res.status_code == 200, res.json() assert res.json() == ["DE", "ES", "FR", "IT"] @@ -291,16 +243,11 @@ def test_get_study( res = client.get( f"/v1/studies/{internal_study_id}/raw", params={"path": "output/20201014-1427eco/economy/mc-all/areas/de/id-monthly"}, - headers=headers, ) assert res.status_code == 200 assert np.isnan(res.json()["data"][0]).any() # Iterate over all possible combinations of path and depth for path, depth in itertools.product([None, "", "/"], [0, 1, 2]): - res = client.get( - f"/v1/studies/{internal_study_id}/raw", - params={"path": path, "depth": depth}, - headers=headers, - ) + res = client.get(f"/v1/studies/{internal_study_id}/raw", params={"path": path, "depth": depth}) assert res.status_code == 200, f"Error for path={path} and depth={depth}" From 6452e54a42ff851dbfe76713fffa5c7ed5c58b57 Mon Sep 17 00:00:00 2001 From: belthlemar Date: Wed, 10 Jul 2024 19:33:43 +0200 Subject: [PATCH 13/22] resolve comments --- antarest/matrixstore/uri_resolver_service.py | 4 +- antarest/study/common/studystorage.py | 4 +- antarest/study/service.py | 11 +- .../study/storage/abstract_storage_service.py | 6 +- .../rawstudy/model/filesystem/folder_node.py | 12 +- .../model/filesystem/ini_file_node.py | 6 +- .../rawstudy/model/filesystem/inode.py | 2 +- .../rawstudy/model/filesystem/lazy_node.py | 6 +- .../filesystem/matrix/input_series_matrix.py | 2 +- .../model/filesystem/matrix/matrix.py | 8 +- .../filesystem/matrix/output_series_matrix.py | 6 +- .../model/filesystem/raw_file_node.py | 2 +- .../model/filesystem/root/input/areas/list.py | 2 +- .../simulation/ts_numbers/ts_numbers_data.py | 2 +- .../variantstudy/variant_study_service.py | 9 +- antarest/study/web/raw_studies_blueprint.py | 19 +-- .../test_fetch_raw_data.py | 150 +++++++++--------- 17 files changed, 131 insertions(+), 120 deletions(-) diff --git a/antarest/matrixstore/uri_resolver_service.py b/antarest/matrixstore/uri_resolver_service.py index 0cc0286b96..4af7e36eff 100644 --- a/antarest/matrixstore/uri_resolver_service.py +++ b/antarest/matrixstore/uri_resolver_service.py @@ -11,7 +11,7 @@ class UriResolverService: def __init__(self, matrix_service: ISimpleMatrixService): self.matrix_service = matrix_service - def resolve(self, uri: str, format: str = "json") -> t.Union[bytes, str, t.Dict[str, t.Any], None]: + def resolve(self, uri: str, format: t.Optional[str] = None) -> t.Union[bytes, str, t.Dict[str, t.Any], None]: res = UriResolverService._extract_uri_components(uri) if res: protocol, uuid = res @@ -37,7 +37,7 @@ def extract_id(uri: str) -> t.Optional[str]: res = UriResolverService._extract_uri_components(uri) return res[1] if res else None - def _resolve_matrix(self, id: str, format: str) -> t.Union[bytes, str, t.Dict[str, t.Any]]: + def _resolve_matrix(self, id: str, format: t.Optional[str] = None) -> t.Union[bytes, str, t.Dict[str, t.Any]]: data = self.matrix_service.get(id) if data: if format == "json": diff --git a/antarest/study/common/studystorage.py b/antarest/study/common/studystorage.py index fc7c9bbdac..baf8cef78e 100644 --- a/antarest/study/common/studystorage.py +++ b/antarest/study/common/studystorage.py @@ -30,7 +30,7 @@ def get( metadata: T, url: str = "", depth: int = 3, - format: str = "json", + format: t.Optional[str] = None, ) -> JSON: """ Entry point to fetch data inside study. @@ -38,7 +38,7 @@ def get( metadata: study url: path data inside study to reach depth: tree depth to reach after reach data path - format: indicate if raw files must be parsed and formatted + format: Indicates the file return format. Can be 'json', 'arrow' or None. If so, the file will be returned as is. Returns: study data formatted in json diff --git a/antarest/study/service.py b/antarest/study/service.py index 055d8df18f..d79611b451 100644 --- a/antarest/study/service.py +++ b/antarest/study/service.py @@ -304,21 +304,14 @@ def _on_study_delete(self, uuid: str) -> None: for callback in self.on_deletion_callbacks: callback(uuid) - def get( - self, - uuid: str, - url: str, - depth: int, - format: str, - params: RequestParameters, - ) -> JSON: + def get(self, uuid: str, url: str, depth: int, params: RequestParameters, format: t.Optional[str] = None) -> JSON: """ Get study data inside filesystem Args: uuid: study uuid url: route to follow inside study structure depth: depth to expand tree when route matched - format: indicate if raw files must be parsed and formatted + format: Indicates the file return format. Can be 'json', 'arrow' or None. If so, the file will be returned as is. params: request parameters Returns: data study formatted in json diff --git a/antarest/study/storage/abstract_storage_service.py b/antarest/study/storage/abstract_storage_service.py index cdd41d641a..cbc1ab1523 100644 --- a/antarest/study/storage/abstract_storage_service.py +++ b/antarest/study/storage/abstract_storage_service.py @@ -116,14 +116,16 @@ def get_study_information( tags=[tag.label for tag in study.tags], ) - def get(self, metadata: T, url: str = "", depth: int = 3, format: str = "json", use_cache: bool = True) -> JSON: + def get( + self, metadata: T, url: str = "", depth: int = 3, format: t.Optional[str] = None, use_cache: bool = True + ) -> JSON: """ Entry point to fetch data inside study. Args: metadata: study url: path data inside study to reach depth: tree depth to reach after reach data path - format: indicate if raw files must be parsed and formatted + format: Indicates the file return format. Can be 'json', 'arrow' or None. If so, the file will be returned as is. use_cache: indicate if the cache must be used Returns: study data formatted in json diff --git a/antarest/study/storage/rawstudy/model/filesystem/folder_node.py b/antarest/study/storage/rawstudy/model/filesystem/folder_node.py index 449ea4b683..058b14629b 100644 --- a/antarest/study/storage/rawstudy/model/filesystem/folder_node.py +++ b/antarest/study/storage/rawstudy/model/filesystem/folder_node.py @@ -52,7 +52,7 @@ def _forward_get( self, url: t.List[str], depth: int = -1, - format: str = "json", + format: t.Optional[str] = None, get_node: bool = False, ) -> t.Union[JSON, INode[JSON, SUB_JSON, JSON]]: children = self.build() @@ -75,7 +75,7 @@ def _forward_get( raise ValueError("Multiple nodes requested") def _expand_get( - self, depth: int = -1, format: str = "json", get_node: bool = False + self, depth: int = -1, format: t.Optional[str] = None, get_node: bool = False ) -> t.Union[JSON, INode[JSON, SUB_JSON, JSON]]: if get_node: return self @@ -93,7 +93,7 @@ def _get( self, url: t.Optional[t.List[str]] = None, depth: int = -1, - format: str = "json", + format: t.Optional[str] = None, get_node: bool = False, ) -> t.Union[JSON, INode[JSON, SUB_JSON, JSON]]: if url and url != [""]: @@ -102,7 +102,11 @@ def _get( return self._expand_get(depth, format, get_node) def get( - self, url: t.Optional[t.List[str]] = None, depth: int = -1, expanded: bool = False, format: str = "json" + self, + url: t.Optional[t.List[str]] = None, + depth: int = -1, + expanded: bool = False, + format: t.Optional[str] = None, ) -> JSON: output = self._get(url=url, depth=depth, format=format, get_node=False) assert not isinstance(output, INode) diff --git a/antarest/study/storage/rawstudy/model/filesystem/ini_file_node.py b/antarest/study/storage/rawstudy/model/filesystem/ini_file_node.py index e4a0d586af..5518ec8f73 100644 --- a/antarest/study/storage/rawstudy/model/filesystem/ini_file_node.py +++ b/antarest/study/storage/rawstudy/model/filesystem/ini_file_node.py @@ -133,7 +133,11 @@ def _get_filtering_kwargs(self, url: t.List[str]) -> t.Dict[str, str]: return {} def get( - self, url: t.Optional[t.List[str]] = None, depth: int = -1, expanded: bool = False, format: str = "json" + self, + url: t.Optional[t.List[str]] = None, + depth: int = -1, + expanded: bool = False, + format: t.Optional[str] = None, ) -> SUB_JSON: output = self._get(url, depth, expanded, get_node=False) assert not isinstance(output, INode) diff --git a/antarest/study/storage/rawstudy/model/filesystem/inode.py b/antarest/study/storage/rawstudy/model/filesystem/inode.py index 14c748b401..b5771903de 100644 --- a/antarest/study/storage/rawstudy/model/filesystem/inode.py +++ b/antarest/study/storage/rawstudy/model/filesystem/inode.py @@ -25,7 +25,7 @@ def get( url: Optional[List[str]] = None, depth: int = -1, expanded: bool = False, - format: str = "json", + format: Optional[str] = None, ) -> G: """ Ask data inside tree. diff --git a/antarest/study/storage/rawstudy/model/filesystem/lazy_node.py b/antarest/study/storage/rawstudy/model/filesystem/lazy_node.py index 4c3f156368..bc1336c807 100644 --- a/antarest/study/storage/rawstudy/model/filesystem/lazy_node.py +++ b/antarest/study/storage/rawstudy/model/filesystem/lazy_node.py @@ -61,7 +61,7 @@ def _get( url: Optional[List[str]] = None, depth: int = -1, expanded: bool = False, - format: str = "json", + format: Optional[str] = None, get_node: bool = False, ) -> Union[Union[str, G], INode[G, S, V]]: self._assert_url_end(url) @@ -82,7 +82,7 @@ def _get( return self.load(url, depth, expanded, format) def get( - self, url: Optional[List[str]] = None, depth: int = -1, expanded: bool = False, format: str = "json" + self, url: Optional[List[str]] = None, depth: int = -1, expanded: bool = False, format: Optional[str] = None ) -> Union[str, G]: output = self._get(url, depth, expanded, format, get_node=False) assert not isinstance(output, INode) @@ -136,7 +136,7 @@ def load( url: Optional[List[str]] = None, depth: int = -1, expanded: bool = False, - format: str = "json", + format: Optional[str] = None, ) -> G: """ Fetch data on disk. diff --git a/antarest/study/storage/rawstudy/model/filesystem/matrix/input_series_matrix.py b/antarest/study/storage/rawstudy/model/filesystem/matrix/input_series_matrix.py index 88e137ac0b..b02db64e24 100644 --- a/antarest/study/storage/rawstudy/model/filesystem/matrix/input_series_matrix.py +++ b/antarest/study/storage/rawstudy/model/filesystem/matrix/input_series_matrix.py @@ -46,7 +46,7 @@ def parse( file_path: Optional[Path] = None, tmp_dir: Any = None, return_dataframe: bool = False, - format: str = "json", + format: Optional[str] = None, ) -> Union[JSON, bytes, pd.DataFrame]: file_path = file_path or self.config.path try: diff --git a/antarest/study/storage/rawstudy/model/filesystem/matrix/matrix.py b/antarest/study/storage/rawstudy/model/filesystem/matrix/matrix.py index bde09db17b..c047eeddd7 100644 --- a/antarest/study/storage/rawstudy/model/filesystem/matrix/matrix.py +++ b/antarest/study/storage/rawstudy/model/filesystem/matrix/matrix.py @@ -83,7 +83,7 @@ def denormalize(self) -> None: # noinspection SpellCheckingInspection logger.info(f"Denormalizing matrix {self.config.path}") uuid = self.get_link_path().read_text() - matrix = self.context.resolver.resolve(uuid) + matrix = self.context.resolver.resolve(uuid, format="json") if not matrix or not isinstance(matrix, dict): raise DenormalizationException(f"Failed to retrieve original matrix for {self.config.path}") @@ -91,10 +91,10 @@ def denormalize(self) -> None: self.get_link_path().unlink() def load( # type: ignore - self, url: Optional[List[str]] = None, depth: int = -1, expanded: bool = False, format: str = "json" + self, url: Optional[List[str]] = None, depth: int = -1, expanded: bool = False, format: Optional[str] = None ) -> Union[bytes, JSON, pd.DataFrame]: file_path, tmp_dir = self._get_real_file_path() - if format == "bytes": + if not format: if file_path.exists(): return file_path.read_bytes() @@ -114,7 +114,7 @@ def parse( file_path: Optional[Path] = None, tmp_dir: Any = None, return_dataframe: bool = False, - format: str = "json", + format: Optional[str] = None, ) -> Union[JSON, bytes, pd.DataFrame]: """ Parse the matrix content diff --git a/antarest/study/storage/rawstudy/model/filesystem/matrix/output_series_matrix.py b/antarest/study/storage/rawstudy/model/filesystem/matrix/output_series_matrix.py index 65000baa11..86b99275b8 100644 --- a/antarest/study/storage/rawstudy/model/filesystem/matrix/output_series_matrix.py +++ b/antarest/study/storage/rawstudy/model/filesystem/matrix/output_series_matrix.py @@ -88,7 +88,7 @@ def parse_dataframe( matrix.columns = body.columns return matrix - def parse(self, file_path: Path, tmp_dir: Any, format: str) -> Union[JSON, bytes]: + def parse(self, file_path: Path, tmp_dir: Any, format: Optional[str] = None) -> Union[JSON, bytes]: matrix = self.parse_dataframe(file_path, tmp_dir) if format == "json": return cast(JSON, matrix.to_dict(orient="split")) @@ -114,11 +114,11 @@ def check_errors( return errors def load( - self, url: Optional[List[str]] = None, depth: int = -1, expanded: bool = False, format: str = "json" + self, url: Optional[List[str]] = None, depth: int = -1, expanded: bool = False, format: Optional[str] = None ) -> Union[bytes, JSON]: try: file_path, tmp_dir = self._get_real_file_path() - if format == "bytes": + if not format: if file_path.exists(): file_content = file_path.read_bytes() if tmp_dir: diff --git a/antarest/study/storage/rawstudy/model/filesystem/raw_file_node.py b/antarest/study/storage/rawstudy/model/filesystem/raw_file_node.py index 7207949898..031cf6c462 100644 --- a/antarest/study/storage/rawstudy/model/filesystem/raw_file_node.py +++ b/antarest/study/storage/rawstudy/model/filesystem/raw_file_node.py @@ -25,7 +25,7 @@ def get_lazy_content( return f"file://{self.config.path.name}" def load( - self, url: Optional[List[str]] = None, depth: int = -1, expanded: bool = False, format: str = "json" + self, url: Optional[List[str]] = None, depth: int = -1, expanded: bool = False, format: Optional[str] = None ) -> bytes: file_path, tmp_dir = self._get_real_file_path() diff --git a/antarest/study/storage/rawstudy/model/filesystem/root/input/areas/list.py b/antarest/study/storage/rawstudy/model/filesystem/root/input/areas/list.py index f67c287c16..e35f80a3ba 100644 --- a/antarest/study/storage/rawstudy/model/filesystem/root/input/areas/list.py +++ b/antarest/study/storage/rawstudy/model/filesystem/root/input/areas/list.py @@ -20,7 +20,7 @@ def get_node(self, url: Optional[List[str]] = None) -> INode[List[str], List[str return self def get( - self, url: Optional[List[str]] = None, depth: int = -1, expanded: bool = False, format: str = "json" + self, url: Optional[List[str]] = None, depth: int = -1, expanded: bool = False, format: Optional[str] = None ) -> List[str]: if self.config.zip_path: path, tmp_dir = self._extract_file_to_tmp_dir() diff --git a/antarest/study/storage/rawstudy/model/filesystem/root/output/simulation/ts_numbers/ts_numbers_data.py b/antarest/study/storage/rawstudy/model/filesystem/root/output/simulation/ts_numbers/ts_numbers_data.py index 41f15ce975..991a8ace08 100644 --- a/antarest/study/storage/rawstudy/model/filesystem/root/output/simulation/ts_numbers/ts_numbers_data.py +++ b/antarest/study/storage/rawstudy/model/filesystem/root/output/simulation/ts_numbers/ts_numbers_data.py @@ -9,7 +9,7 @@ class TsNumbersVector(LazyNode[List[int], List[int], JSON]): def load( - self, url: Optional[List[str]] = None, depth: int = -1, expanded: bool = False, format: str = "json" + self, url: Optional[List[str]] = None, depth: int = -1, expanded: bool = False, format: Optional[str] = None ) -> List[int]: file_path, tmp_dir = self._get_real_file_path() diff --git a/antarest/study/storage/variantstudy/variant_study_service.py b/antarest/study/storage/variantstudy/variant_study_service.py index 6a72568638..70f885e43c 100644 --- a/antarest/study/storage/variantstudy/variant_study_service.py +++ b/antarest/study/storage/variantstudy/variant_study_service.py @@ -464,7 +464,12 @@ def _get_variants_parents(self, id: str, params: RequestParameters) -> t.List[St return output_list def get( - self, metadata: VariantStudy, url: str = "", depth: int = 3, format: str = "json", use_cache: bool = True + self, + metadata: VariantStudy, + url: str = "", + depth: int = 3, + format: t.Optional[str] = None, + use_cache: bool = True, ) -> JSON: """ Entry point to fetch data inside study. @@ -472,7 +477,7 @@ def get( metadata: study url: path data inside study to reach depth: tree depth to reach after reach data path - format: indicate if raw files must be parsed and formatted + format: Indicates the file return format. Can be either Can be 'json', 'arrow' or None. If so, the file will be returned as is. use_cache: indicate if cache should be used Returns: study data formatted in json diff --git a/antarest/study/web/raw_studies_blueprint.py b/antarest/study/web/raw_studies_blueprint.py index cb05a30b39..f230cd87fe 100644 --- a/antarest/study/web/raw_studies_blueprint.py +++ b/antarest/study/web/raw_studies_blueprint.py @@ -63,7 +63,6 @@ class MatrixFormat(EnumIgnoreCase): JSON = "json" - BYTES = "bytes" ARROW = "arrow" @@ -116,10 +115,10 @@ def get_study( - `uuid`: The UUID of the study. - `path`: The path to the data to fetch. - `depth`: The depth of the data to retrieve. - - `formatted`: Deprecated flag, use `format` instead. - - `format`: The format you want your file to be displayed in. Arrow format is only supported by matrix files. + - `formatted`: If false, returns the file as bytes. Else, the `format` flag applies. + - `format`: Either 'json' or 'arrow'. Arrow format is only supported by matrix files. - Returns the fetched data: a JSON object (in most cases), a plain text file + Returns the fetched data: a JSON object (in most cases), a plain text file, a matrix file in arrow format or a file attachment (Microsoft Office document, TSV/TSV file...). """ logger.info( @@ -128,21 +127,19 @@ def get_study( ) parameters = RequestParameters(user=current_user) - if format: - real_format = format.value - else: - real_format = "json" if formatted else "bytes" + _format = format or MatrixFormat.JSON + real_format = _format.value if formatted else None - output = study_service.get(uuid, path, depth=depth, format=real_format, params=parameters) + output = study_service.get(uuid, path, depth=depth, params=parameters, format=real_format) if isinstance(output, bytes): if real_format == MatrixFormat.ARROW: - return Response(content=output, media_type="application/octet-stream") + return Response(content=output, media_type="application/vnd.apache.arrow.file") # Guess the suffix form the target data resource_path = PurePosixPath(path) parent_cfg = study_service.get( - uuid, str(resource_path.parent), depth=2, format=real_format, params=parameters + uuid, str(resource_path.parent), depth=2, params=parameters, format=real_format ) child = parent_cfg[resource_path.name] suffix = PurePosixPath(child).suffix diff --git a/tests/integration/raw_studies_blueprint/test_fetch_raw_data.py b/tests/integration/raw_studies_blueprint/test_fetch_raw_data.py index 6cf82a8239..777b5121f6 100644 --- a/tests/integration/raw_studies_blueprint/test_fetch_raw_data.py +++ b/tests/integration/raw_studies_blueprint/test_fetch_raw_data.py @@ -153,77 +153,6 @@ def test_get_study( "use-phase-shifter": False, } - # If we ask for a matrix, we should have a JSON content if formatted is True - rel_path = "/input/links/de/fr" - expected_row = [100000, 100000, 0.01, 0.01, 0, 0, 0, 0] - res = client.get(f"/v1/studies/{internal_study_id}/raw", params={"path": rel_path, "formatted": True}) - assert res.status_code == 200, res.json() - old_result = res.json() - assert old_result == {"index": ANY, "columns": ANY, "data": ANY} - assert old_result["data"][0] == expected_row - - # We should have the same result with new flag 'format' set to 'JSON' - res = client.get(f"/v1/studies/{internal_study_id}/raw", params={"path": rel_path, "format": "json"}) - assert res.status_code == 200, res.json() - new_result = res.json() - assert new_result == old_result - - # If we ask for a matrix, we should have a CSV content if formatted is False - res = client.get(f"/v1/studies/{internal_study_id}/raw", params={"path": rel_path, "formatted": False}) - assert res.status_code == 200, res.json() - old_result = res.text - actual_lines = old_result.splitlines() - first_row = [float(x) for x in actual_lines[0].split("\t")] - assert first_row == expected_row - - # We should have the same result with new flag 'format' set to 'bytes' - res = client.get(f"/v1/studies/{internal_study_id}/raw", params={"path": rel_path, "format": "bytes"}) - assert res.status_code == 200, res.json() - new_result = res.text - assert new_result == old_result - - # If we ask for a matrix, we should have arrow binary if format = "arrow" - res = client.get(f"/v1/studies/{internal_study_id}/raw", params={"path": rel_path, "format": "arrow"}) - assert res.status_code == 200 - assert isinstance(res.content, bytes) - assert res.text.startswith("ARROW") - arrow_bytes = res.content - buffer = pa.BufferReader(arrow_bytes) - table = feather.read_table(buffer) - df = table.to_pandas() - assert list(df.loc[0]) == expected_row - - # Asserts output matrix (containing index and columns) can be retrieved with arrow - output_path = "/output/20201014-1422eco-hello/economy/mc-all/areas/de/id-daily" - res = client.get(f"/v1/studies/{internal_study_id}/raw", params={"path": output_path, "format": "arrow"}) - assert res.status_code == 200 - assert isinstance(res.content, bytes) - assert res.text.startswith("ARROW") - buffer = pa.BufferReader(res.content) - table = feather.read_table(buffer) - df = table.to_pandas() - assert df.columns[0] == "Index" # asserts the first columns corresponds to the index in such a case. - - # Try to replace a matrix with a one in arrow format - res = client.put(f"/v1/studies/{internal_study_id}/raw", params={"path": rel_path}, files={"file": arrow_bytes}) - assert res.status_code in {201, 204} - - # If ask for an empty matrix, we should have an empty binary content - res = client.get( - f"/v1/studies/{internal_study_id}/raw", - params={"path": "input/thermal/prepro/de/01_solar/data", "formatted": False}, - ) - assert res.status_code == 200, res.json() - assert res.content == b"" - - # But, if we use formatted = True, we should have a JSON objet representing and empty matrix - res = client.get( - f"/v1/studies/{internal_study_id}/raw", - params={"path": "input/thermal/prepro/de/01_solar/data", "formatted": True}, - ) - assert res.status_code == 200, res.json() - assert res.json() == {"index": [0], "columns": [], "data": []} - # Some files can be corrupted user_folder_dir = study_dir.joinpath("user/bad") for file_path in user_folder_dir.glob("*.*"): @@ -247,7 +176,84 @@ def test_get_study( assert res.status_code == 200 assert np.isnan(res.json()["data"][0]).any() - # Iterate over all possible combinations of path and depth + # Iterate over all possible combinations of path and depth (to mimic the debug view) for path, depth in itertools.product([None, "", "/"], [0, 1, 2]): res = client.get(f"/v1/studies/{internal_study_id}/raw", params={"path": path, "depth": depth}) assert res.status_code == 200, f"Error for path={path} and depth={depth}" + + # For an empty matrix, we should have an empty binary content + res = client.get( + f"/v1/studies/{internal_study_id}/raw", + params={"path": "input/thermal/prepro/de/01_solar/data", "formatted": False}, + ) + assert res.status_code == 200, res.json() + assert res.content == b"" + + # But, if we use formatted = True, we should have a JSON objet representing and empty matrix + res = client.get( + f"/v1/studies/{internal_study_id}/raw", + params={"path": "input/thermal/prepro/de/01_solar/data", "formatted": True}, + ) + assert res.status_code == 200, res.json() + assert res.json() == {"index": [0], "columns": [], "data": []} + + # ============================= + # MATRICES + # ============================= + + matrix_types = { + "input": {"path": "/input/links/de/fr", "expected_row": [100000, 100000, 0.01, 0.01, 0, 0, 0, 0]}, + "output": { + "path": "/output/20201014-1422eco-hello/economy/mc-all/areas/de/id-daily", + "expected_row": [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0], + }, + } + + for matrix_type, parameters in matrix_types.items(): + path = parameters["path"] + expected_row = parameters["expected_row"] + + # We should have a JSON content if formatted is True + res = client.get(f"/v1/studies/{internal_study_id}/raw", params={"path": path, "formatted": True}) + assert res.status_code == 200, res.json() + old_result = res.json() + assert old_result == {"index": ANY, "columns": ANY, "data": ANY} + assert old_result["data"][0][:8] == expected_row + + # We should have the same result with new flag 'format' set to 'JSON' + res = client.get(f"/v1/studies/{internal_study_id}/raw", params={"path": path, "format": "json"}) + assert res.status_code == 200, res.json() + new_result = res.json() + assert new_result == old_result + + # We should have a CSV content if formatted is False + res = client.get(f"/v1/studies/{internal_study_id}/raw", params={"path": path, "formatted": False}) + assert res.status_code == 200, res.json() + actual_lines = res.text.splitlines() + if matrix_type == "input": + first_row = [float(x) for x in actual_lines[0].split("\t")] + assert first_row == expected_row + else: + assert actual_lines[0].split("\t") == ["DE", "area", "id", "daily"] + + # We should have arrow binary if format = "arrow" + res = client.get(f"/v1/studies/{internal_study_id}/raw", params={"path": path, "format": "arrow"}) + assert res.status_code == 200 + assert isinstance(res.content, bytes) + assert res.text.startswith("ARROW") + arrow_bytes = res.content + buffer = pa.BufferReader(arrow_bytes) + table = feather.read_table(buffer) + df = table.to_pandas() + if matrix_type == "input": + assert list(df.loc[0]) == expected_row + else: + assert df.columns[0] == "Index" # asserts the first columns corresponds to the index in such a case. + assert list(df.loc[0][:9]) == ["01/01"] + expected_row + + if matrix_type == "input": + # Try to replace a matrix with a one in arrow format + res = client.put( + f"/v1/studies/{internal_study_id}/raw", params={"path": path}, files={"file": arrow_bytes} + ) + assert res.status_code in {201, 204} From f358549afc56fe0b6a14b81595e29ce9efeac7f6 Mon Sep 17 00:00:00 2001 From: belthlemar Date: Wed, 10 Jul 2024 19:37:38 +0200 Subject: [PATCH 14/22] change doc --- antarest/study/common/studystorage.py | 2 +- antarest/study/service.py | 2 +- antarest/study/storage/abstract_storage_service.py | 2 +- antarest/study/storage/variantstudy/variant_study_service.py | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/antarest/study/common/studystorage.py b/antarest/study/common/studystorage.py index baf8cef78e..96d8e34a75 100644 --- a/antarest/study/common/studystorage.py +++ b/antarest/study/common/studystorage.py @@ -38,7 +38,7 @@ def get( metadata: study url: path data inside study to reach depth: tree depth to reach after reach data path - format: Indicates the file return format. Can be 'json', 'arrow' or None. If so, the file will be returned as is. + format: Indicates the file return format. Can be 'json', 'arrow' or None. If None, the file will be returned as is. Returns: study data formatted in json diff --git a/antarest/study/service.py b/antarest/study/service.py index d79611b451..ae79c73e4b 100644 --- a/antarest/study/service.py +++ b/antarest/study/service.py @@ -311,7 +311,7 @@ def get(self, uuid: str, url: str, depth: int, params: RequestParameters, format uuid: study uuid url: route to follow inside study structure depth: depth to expand tree when route matched - format: Indicates the file return format. Can be 'json', 'arrow' or None. If so, the file will be returned as is. + format: Indicates the file return format. Can be 'json', 'arrow' or None. If None, the file will be returned as is. params: request parameters Returns: data study formatted in json diff --git a/antarest/study/storage/abstract_storage_service.py b/antarest/study/storage/abstract_storage_service.py index cbc1ab1523..53e9ff94da 100644 --- a/antarest/study/storage/abstract_storage_service.py +++ b/antarest/study/storage/abstract_storage_service.py @@ -125,7 +125,7 @@ def get( metadata: study url: path data inside study to reach depth: tree depth to reach after reach data path - format: Indicates the file return format. Can be 'json', 'arrow' or None. If so, the file will be returned as is. + format: Indicates the file return format. Can be 'json', 'arrow' or None. If None, the file will be returned as is. use_cache: indicate if the cache must be used Returns: study data formatted in json diff --git a/antarest/study/storage/variantstudy/variant_study_service.py b/antarest/study/storage/variantstudy/variant_study_service.py index 70f885e43c..b3f7f1879f 100644 --- a/antarest/study/storage/variantstudy/variant_study_service.py +++ b/antarest/study/storage/variantstudy/variant_study_service.py @@ -477,7 +477,7 @@ def get( metadata: study url: path data inside study to reach depth: tree depth to reach after reach data path - format: Indicates the file return format. Can be either Can be 'json', 'arrow' or None. If so, the file will be returned as is. + format: Indicates the file return format. Can be either Can be 'json', 'arrow' or None. If None, the file will be returned as is. use_cache: indicate if cache should be used Returns: study data formatted in json From cb7a788ce7640f4d6b06ea36bd5cc5110fd3c5c0 Mon Sep 17 00:00:00 2001 From: belthlemar Date: Thu, 11 Jul 2024 17:18:12 +0200 Subject: [PATCH 15/22] resolve some issues --- .../business/areas/renewable_management.py | 2 +- .../business/areas/st_storage_management.py | 4 ++-- .../business/areas/thermal_management.py | 4 ++-- .../business/binding_constraint_management.py | 2 +- .../study/business/correlation_management.py | 2 +- .../study/business/xpansion_management.py | 2 +- .../rawstudy/model/filesystem/lazy_node.py | 2 +- .../filesystem/matrix/input_series_matrix.py | 2 +- .../model/filesystem/matrix/matrix.py | 2 +- .../business/command_extractor.py | 2 +- .../business/test_raw_study_service.py | 2 +- .../storage/business/test_xpansion_manager.py | 20 +++++++++---------- tests/storage/integration/test_STA_mini.py | 4 ++-- .../matrix/test_input_series_matrix.py | 6 +++--- .../matrix/test_output_series_matrix.py | 4 ++-- .../repository/filesystem/test_lazy_node.py | 6 +++--- 16 files changed, 33 insertions(+), 33 deletions(-) diff --git a/antarest/study/business/areas/renewable_management.py b/antarest/study/business/areas/renewable_management.py index 1009c9d22c..773d5b6591 100644 --- a/antarest/study/business/areas/renewable_management.py +++ b/antarest/study/business/areas/renewable_management.py @@ -352,7 +352,7 @@ def duplicate_cluster( # Prepare and execute commands storage_service = self.storage_service.get_storage(study) command_context = self.storage_service.variant_study_service.command_factory.command_context - current_matrix = storage_service.get(study, source_path)["data"] + current_matrix = storage_service.get(study, source_path, format="json")["data"] replace_matrix_cmd = ReplaceMatrix(target=new_path, matrix=current_matrix, command_context=command_context) commands = [create_cluster_cmd, replace_matrix_cmd] diff --git a/antarest/study/business/areas/st_storage_management.py b/antarest/study/business/areas/st_storage_management.py index 373f8c3ea4..599f61b698 100644 --- a/antarest/study/business/areas/st_storage_management.py +++ b/antarest/study/business/areas/st_storage_management.py @@ -565,7 +565,7 @@ def duplicate_cluster(self, study: Study, area_id: str, source_id: str, new_clus storage_service = self.storage_service.get_storage(study) command_context = self.storage_service.variant_study_service.command_factory.command_context for source_path, new_path in zip(source_paths, new_paths): - current_matrix = storage_service.get(study, source_path)["data"] + current_matrix = storage_service.get(study, source_path, format="json")["data"] command = ReplaceMatrix(target=new_path, matrix=current_matrix, command_context=command_context) commands.append(command) @@ -605,7 +605,7 @@ def _get_matrix_obj( file_study = self._get_file_study(study) path = _STORAGE_SERIES_PATH.format(area_id=area_id, storage_id=storage_id, ts_name=ts_name) try: - matrix = file_study.tree.get(path.split("/"), depth=1) + matrix = file_study.tree.get(path.split("/"), depth=1, format="json") except KeyError: raise STStorageMatrixNotFound(path) from None return matrix diff --git a/antarest/study/business/areas/thermal_management.py b/antarest/study/business/areas/thermal_management.py index 205965eb54..adbbd0b544 100644 --- a/antarest/study/business/areas/thermal_management.py +++ b/antarest/study/business/areas/thermal_management.py @@ -433,7 +433,7 @@ def duplicate_cluster( storage_service = self.storage_service.get_storage(study) command_context = self.storage_service.variant_study_service.command_factory.command_context for source_path, new_path in zip(source_paths, new_paths): - current_matrix = storage_service.get(study, source_path)["data"] + current_matrix = storage_service.get(study, source_path, format="json")["data"] command = ReplaceMatrix(target=new_path, matrix=current_matrix, command_context=command_context) commands.append(command) @@ -451,7 +451,7 @@ def validate_series(self, study: Study, area_id: str, cluster_id: str) -> bool: ts_widths: t.MutableMapping[int, t.MutableSequence[str]] = {} for ts_path in series_path: - matrix = self.storage_service.get_storage(study).get(study, ts_path.as_posix()) + matrix = self.storage_service.get_storage(study).get(study, ts_path.as_posix(), format="json") matrix_data = matrix["data"] matrix_height = len(matrix_data) # We ignore empty matrices as there are default matrices for the simulator. diff --git a/antarest/study/business/binding_constraint_management.py b/antarest/study/business/binding_constraint_management.py index 7f42bb7f59..f3b307247b 100644 --- a/antarest/study/business/binding_constraint_management.py +++ b/antarest/study/business/binding_constraint_management.py @@ -341,7 +341,7 @@ def _get_references_by_widths( bc_id = bc.id matrix_id = fmt.format(bc_id=bc.id) logger.info(f"⏲ Validating BC '{bc_id}': {matrix_id=} [{_index}/{_total}]") - obj = file_study.tree.get(url=["input", "bindingconstraints", matrix_id]) + obj = file_study.tree.get(url=["input", "bindingconstraints", matrix_id], format="json") matrix = np.array(obj["data"], dtype=float) # We ignore empty matrices as there are default matrices for the simulator. if not matrix.size: diff --git a/antarest/study/business/correlation_management.py b/antarest/study/business/correlation_management.py index b9abcff2f2..035122a899 100644 --- a/antarest/study/business/correlation_management.py +++ b/antarest/study/business/correlation_management.py @@ -187,7 +187,7 @@ def _get_array( file_study: FileStudy, area_ids: Sequence[str], ) -> npt.NDArray[np.float64]: - correlation_cfg = file_study.tree.get(self.url, depth=3) + correlation_cfg = file_study.tree.get(self.url, depth=3, format="json") return _config_to_array(area_ids, correlation_cfg) def _set_array( diff --git a/antarest/study/business/xpansion_management.py b/antarest/study/business/xpansion_management.py index 22c612af9a..ec237a27de 100644 --- a/antarest/study/business/xpansion_management.py +++ b/antarest/study/business/xpansion_management.py @@ -691,7 +691,7 @@ def get_resource_content( ) -> t.Union[JSON, bytes]: logger.info(f"Getting xpansion {resource_type} resource file '{filename}' from study '{study.id}'") file_study = self.study_storage_service.get_storage(study).get_raw(study) - return file_study.tree.get(self._raw_file_dir(resource_type) + [filename]) + return file_study.tree.get(url=self._raw_file_dir(resource_type) + [filename], format="json") def list_resources(self, study: Study, resource_type: XpansionResourceFileType) -> t.List[str]: logger.info(f"Getting all xpansion {resource_type} files from study '{study.id}'") diff --git a/antarest/study/storage/rawstudy/model/filesystem/lazy_node.py b/antarest/study/storage/rawstudy/model/filesystem/lazy_node.py index bc1336c807..c5d7a242b1 100644 --- a/antarest/study/storage/rawstudy/model/filesystem/lazy_node.py +++ b/antarest/study/storage/rawstudy/model/filesystem/lazy_node.py @@ -111,7 +111,7 @@ def save(self, data: Union[str, bytes, S], url: Optional[List[str]] = None) -> N self._assert_not_in_zipped_file() self._assert_url_end(url) - if isinstance(data, str) and self.context.resolver.resolve(data): + if isinstance(data, str) and self.context.resolver.resolve(data, format="json"): self.get_link_path().write_text(data) if self.config.path.exists(): self.config.path.unlink() diff --git a/antarest/study/storage/rawstudy/model/filesystem/matrix/input_series_matrix.py b/antarest/study/storage/rawstudy/model/filesystem/matrix/input_series_matrix.py index b02db64e24..ffc5df593d 100644 --- a/antarest/study/storage/rawstudy/model/filesystem/matrix/input_series_matrix.py +++ b/antarest/study/storage/rawstudy/model/filesystem/matrix/input_series_matrix.py @@ -55,7 +55,7 @@ def parse( link_path = self.get_link_path() if link_path.exists(): link = link_path.read_text() - matrix_json = self.context.resolver.resolve(link) + matrix_json = self.context.resolver.resolve(link, format="json") matrix_json = cast(JSON, matrix_json) matrix: pd.DataFrame = pd.DataFrame(**matrix_json) else: diff --git a/antarest/study/storage/rawstudy/model/filesystem/matrix/matrix.py b/antarest/study/storage/rawstudy/model/filesystem/matrix/matrix.py index c047eeddd7..4488409a2a 100644 --- a/antarest/study/storage/rawstudy/model/filesystem/matrix/matrix.py +++ b/antarest/study/storage/rawstudy/model/filesystem/matrix/matrix.py @@ -62,7 +62,7 @@ def normalize(self) -> None: if self.get_link_path().exists() or self.config.zip_path: return - matrix = self.parse() + matrix = self.parse(format="json") assert isinstance(matrix, dict) if "data" in matrix: diff --git a/antarest/study/storage/variantstudy/business/command_extractor.py b/antarest/study/storage/variantstudy/business/command_extractor.py index 4ac5070a69..8a3014bc2c 100644 --- a/antarest/study/storage/variantstudy/business/command_extractor.py +++ b/antarest/study/storage/variantstudy/business/command_extractor.py @@ -365,7 +365,7 @@ def extract_binding_constraint( matrices: t.Dict[str, t.List[t.List[float]]] = {} for name, url in urls.items(): - matrix = study_tree.get(url) + matrix = study_tree.get(url, format="json") if matrix is not None: matrices[name] = matrix["data"] diff --git a/tests/storage/business/test_raw_study_service.py b/tests/storage/business/test_raw_study_service.py index 12914c52d5..f04ea181dc 100644 --- a/tests/storage/business/test_raw_study_service.py +++ b/tests/storage/business/test_raw_study_service.py @@ -71,7 +71,7 @@ def test_get(tmp_path: str, project_path) -> None: ) metadata = RawStudy(id="study2.py", workspace=DEFAULT_WORKSPACE_NAME, path=str(path_study)) - output = study_service.get(metadata=metadata, url=sub_route, depth=2) + output = study_service.get(metadata=metadata, url=sub_route, depth=2, format="json") assert output == data diff --git a/tests/storage/business/test_xpansion_manager.py b/tests/storage/business/test_xpansion_manager.py index b4db4a18af..da18fabf80 100644 --- a/tests/storage/business/test_xpansion_manager.py +++ b/tests/storage/business/test_xpansion_manager.py @@ -471,20 +471,20 @@ def test_add_resources(tmp_path: Path) -> None: [UploadFile(filename=filename3, file=io.StringIO(content3))], ) - assert filename1 in empty_study.tree.get(["user", "expansion", "constraints"]) - expected1 = empty_study.tree.get(["user", "expansion", "constraints", filename1]) + assert filename1 in empty_study.tree.get(url=["user", "expansion", "constraints"], format="json") + expected1 = empty_study.tree.get(url=["user", "expansion", "constraints", filename1], format="json") assert content1.encode() == t.cast(bytes, expected1) - assert filename2 in empty_study.tree.get(["user", "expansion", "constraints"]) - expected2 = empty_study.tree.get(["user", "expansion", "constraints", filename2]) + assert filename2 in empty_study.tree.get(url=["user", "expansion", "constraints"], format="json") + expected2 = empty_study.tree.get(url=["user", "expansion", "constraints", filename2], format="json") assert content2.encode() == t.cast(bytes, expected2) - assert filename3 in empty_study.tree.get(["user", "expansion", "weights"]) + assert filename3 in empty_study.tree.get(url=["user", "expansion", "weights"], format="json") assert { "columns": [0], "data": [[2.0]], "index": [0], - } == empty_study.tree.get(["user", "expansion", "weights", filename3]) + } == empty_study.tree.get(url=["user", "expansion", "weights", filename3], format="json") settings = xpansion_manager.get_xpansion_settings(study) settings.yearly_weights = filename3 @@ -573,19 +573,19 @@ def test_add_capa(tmp_path: Path) -> None: xpansion_manager.add_resource(study, XpansionResourceFileType.CAPACITIES, upload_file_list) - assert filename1 in empty_study.tree.get(["user", "expansion", "capa"]) + assert filename1 in empty_study.tree.get(url=["user", "expansion", "capa"], format="json") assert { "columns": [0], "data": [[0.0]], "index": [0], - } == empty_study.tree.get(["user", "expansion", "capa", filename1]) + } == empty_study.tree.get(url=["user", "expansion", "capa", filename1], format="json") - assert filename2 in empty_study.tree.get(["user", "expansion", "capa"]) + assert filename2 in empty_study.tree.get(url=["user", "expansion", "capa"], format="json") assert { "columns": [0], "data": [[1.0]], "index": [0], - } == empty_study.tree.get(["user", "expansion", "capa", filename2]) + } == empty_study.tree.get(url=["user", "expansion", "capa", filename2], format="json") @pytest.mark.unit_test diff --git a/tests/storage/integration/test_STA_mini.py b/tests/storage/integration/test_STA_mini.py index b5a9bce0b8..8fb150127a 100644 --- a/tests/storage/integration/test_STA_mini.py +++ b/tests/storage/integration/test_STA_mini.py @@ -497,8 +497,8 @@ def test_sta_mini_copy(storage_service) -> None: uuid = result.json() parameters = RequestParameters(user=ADMIN) - data_source = storage_service.get(source_study_name, "/", -1, True, parameters) - data_destination = storage_service.get(uuid, "/", -1, True, parameters) + data_source = storage_service.get(source_study_name, "/", -1, parameters, format="json") + data_destination = storage_service.get(uuid, "/", -1, parameters, format="json") link_url_source = data_source["input"]["links"]["de"]["fr"] assert "matrixfile://fr.txt" == link_url_source diff --git a/tests/storage/repository/filesystem/matrix/test_input_series_matrix.py b/tests/storage/repository/filesystem/matrix/test_input_series_matrix.py index 6b7bcbaa01..e06edd587b 100644 --- a/tests/storage/repository/filesystem/matrix/test_input_series_matrix.py +++ b/tests/storage/repository/filesystem/matrix/test_input_series_matrix.py @@ -37,7 +37,7 @@ def test_load(self, my_study_config: FileStudyTreeConfig) -> None: file.write_text(content) node = InputSeriesMatrix(context=Mock(), config=my_study_config, nb_columns=8) - actual = node.load() + actual = node.load(format="json") expected = { "columns": [0, 1, 2, 3, 4, 5, 6, 7], "data": [ @@ -51,7 +51,7 @@ def test_load(self, my_study_config: FileStudyTreeConfig) -> None: def test_load__file_not_found(self, my_study_config: FileStudyTreeConfig) -> None: node = InputSeriesMatrix(context=Mock(), config=my_study_config) with pytest.raises(ChildNotFoundError) as ctx: - node.load() + node.load(format="json") err_msg = str(ctx.value) assert "input.txt" in err_msg assert my_study_config.study_id in err_msg @@ -78,7 +78,7 @@ def resolve(uri: str, formatted: bool = True) -> t.Dict[str, t.Any]: ) node = InputSeriesMatrix(context=context, config=my_study_config) - actual = node.load() + actual = node.load(format="json") assert actual == matrix_obj def test_save(self, my_study_config: FileStudyTreeConfig) -> None: diff --git a/tests/storage/repository/filesystem/matrix/test_output_series_matrix.py b/tests/storage/repository/filesystem/matrix/test_output_series_matrix.py index d77bd47ee2..7c3ab7a3cc 100644 --- a/tests/storage/repository/filesystem/matrix/test_output_series_matrix.py +++ b/tests/storage/repository/filesystem/matrix/test_output_series_matrix.py @@ -67,7 +67,7 @@ def test_load(self, my_study_config: FileStudyTreeConfig) -> None: date_serializer=serializer, head_writer=AreaHeadWriter(area="", data_type="", freq=""), ) - assert node.load() == matrix.to_dict(orient="split") + assert node.load(format="json") == matrix.to_dict(orient="split") def test_load__file_not_found(self, my_study_config: FileStudyTreeConfig) -> None: node = OutputSeriesMatrix( @@ -78,7 +78,7 @@ def test_load__file_not_found(self, my_study_config: FileStudyTreeConfig) -> Non head_writer=AreaHeadWriter(area="", data_type="", freq=""), ) with pytest.raises(ChildNotFoundError) as ctx: - node.load() + node.load(format="json") err_msg = str(ctx.value) assert "'matrix-daily.txt" in err_msg assert my_study_config.study_id in err_msg diff --git a/tests/storage/repository/filesystem/test_lazy_node.py b/tests/storage/repository/filesystem/test_lazy_node.py index 8474598a52..8e1034bd4f 100644 --- a/tests/storage/repository/filesystem/test_lazy_node.py +++ b/tests/storage/repository/filesystem/test_lazy_node.py @@ -61,7 +61,7 @@ def test_get_no_expanded_link(tmp_path: Path): config=config, ) assert "Mock Matrix Content" == node.get(expanded=False) - resolver.resolve.assert_called_once_with(uri, "json") + resolver.resolve.assert_called_once_with(uri, None) def test_get_expanded_txt(tmp_path: Path): @@ -110,7 +110,7 @@ def test_save_uri(tmp_path: Path): node.save(uri) assert (file.parent / f"{file.name}.link").read_text() == uri assert not file.exists() - resolver.resolve.assert_called_once_with(uri) + resolver.resolve.assert_called_once_with(uri, format="json") def test_save_txt(tmp_path: Path): @@ -131,4 +131,4 @@ def test_save_txt(tmp_path: Path): node.save(content) assert file.read_text() == content assert not link.exists() - resolver.resolve.assert_called_once_with(content) + resolver.resolve.assert_called_once_with(content, format="json") From f5a4623aff5cd1af6e982138f9a8cb6e3418ff2f Mon Sep 17 00:00:00 2001 From: belthlemar Date: Thu, 11 Jul 2024 17:28:24 +0200 Subject: [PATCH 16/22] fix another test --- .../study/storage/variantstudy/business/command_extractor.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/antarest/study/storage/variantstudy/business/command_extractor.py b/antarest/study/storage/variantstudy/business/command_extractor.py index 8a3014bc2c..8e152a94e5 100644 --- a/antarest/study/storage/variantstudy/business/command_extractor.py +++ b/antarest/study/storage/variantstudy/business/command_extractor.py @@ -417,7 +417,7 @@ def generate_replace_matrix( url: t.List[str], default_value: t.Optional[str] = None, ) -> ICommand: - data = study_tree.get(url) + data = study_tree.get(url, format="json") if isinstance(data, str): matrix = data elif isinstance(data, dict): From 9ac959fc3901b7733ad2c7a234143fd754096f24 Mon Sep 17 00:00:00 2001 From: belthlemar Date: Thu, 11 Jul 2024 17:51:19 +0200 Subject: [PATCH 17/22] resolve last test --- tests/storage/business/test_url_resolver_service.py | 2 +- tests/storage/business/test_variant_study_service.py | 2 +- .../repository/filesystem/matrix/test_input_series_matrix.py | 4 ++-- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/tests/storage/business/test_url_resolver_service.py b/tests/storage/business/test_url_resolver_service.py index 40dabc53c4..3979fd9f71 100644 --- a/tests/storage/business/test_url_resolver_service.py +++ b/tests/storage/business/test_url_resolver_service.py @@ -30,7 +30,7 @@ def test_resolve_matrix(): resolver = UriResolverService(matrix_service=matrix_service) - assert MOCK_MATRIX_JSON == resolver.resolve("matrix://my-id") + assert MOCK_MATRIX_JSON == resolver.resolve("matrix://my-id", format="json") matrix_service.get.assert_called_once_with("my-id") assert f"1.000000\t2.000000{os.linesep}3.000000\t4.000000{os.linesep}" == resolver.resolve("matrix://my-id", False) diff --git a/tests/storage/business/test_variant_study_service.py b/tests/storage/business/test_variant_study_service.py index 53182ce3ee..150e38ba45 100644 --- a/tests/storage/business/test_variant_study_service.py +++ b/tests/storage/business/test_variant_study_service.py @@ -97,7 +97,7 @@ def task_status(*args): study_service.task_service.await_task.assert_called() study_service.exists.return_value = True - output = study_service.get(metadata=metadata, url=sub_route, depth=2) + output = study_service.get(metadata=metadata, url=sub_route, depth=2, format="json") assert output == data diff --git a/tests/storage/repository/filesystem/matrix/test_input_series_matrix.py b/tests/storage/repository/filesystem/matrix/test_input_series_matrix.py index e06edd587b..4cd98dd83a 100644 --- a/tests/storage/repository/filesystem/matrix/test_input_series_matrix.py +++ b/tests/storage/repository/filesystem/matrix/test_input_series_matrix.py @@ -67,9 +67,9 @@ def test_load__link_to_matrix(self, my_study_config: FileStudyTreeConfig) -> Non } link.write_text(matrix_uri) - def resolve(uri: str, formatted: bool = True) -> t.Dict[str, t.Any]: + def resolve(uri: str, format: t.Optional[str] = None) -> t.Dict[str, t.Any]: assert uri == matrix_uri - assert formatted is True + assert format == "json" return matrix_obj context = ContextServer( From 51446d2f9631bbb489f8e868f949e2fff19aaddc Mon Sep 17 00:00:00 2001 From: belthlemar Date: Fri, 19 Jul 2024 09:47:52 +0200 Subject: [PATCH 18/22] resolve conflicts with dev --- .../filesystem/root/output/simulation/mode/mcall/grid.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/antarest/study/storage/rawstudy/model/filesystem/root/output/simulation/mode/mcall/grid.py b/antarest/study/storage/rawstudy/model/filesystem/root/output/simulation/mode/mcall/grid.py index 9a542e0c97..812b1246c5 100644 --- a/antarest/study/storage/rawstudy/model/filesystem/root/output/simulation/mode/mcall/grid.py +++ b/antarest/study/storage/rawstudy/model/filesystem/root/output/simulation/mode/mcall/grid.py @@ -38,7 +38,7 @@ def load( url: t.Optional[t.List[str]] = None, depth: int = -1, expanded: bool = False, - formatted: bool = True, + format: t.Optional[str] = None, ) -> JSON: file_path = self.config.path df = pd.read_csv(file_path, sep="\t") @@ -74,7 +74,7 @@ def load( url: t.Optional[t.List[str]] = None, depth: int = -1, expanded: bool = False, - formatted: bool = True, + format: t.Optional[str] = None, ) -> JSON: file_path = self.config.path with open(file_path, "r") as f: From e7b77033ea1e5745f9d440a590593a77265dc1ba Mon Sep 17 00:00:00 2001 From: belthlemar Date: Fri, 26 Jul 2024 17:11:48 +0200 Subject: [PATCH 19/22] resolve conflicts --- .../study/storage/rawstudy/model/filesystem/lazy_node.py | 8 ++++++-- tests/storage/repository/filesystem/test_lazy_node.py | 4 ++-- 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/antarest/study/storage/rawstudy/model/filesystem/lazy_node.py b/antarest/study/storage/rawstudy/model/filesystem/lazy_node.py index f8f8f396d2..e784a4b093 100644 --- a/antarest/study/storage/rawstudy/model/filesystem/lazy_node.py +++ b/antarest/study/storage/rawstudy/model/filesystem/lazy_node.py @@ -84,7 +84,11 @@ def _get( return self.load(url, depth, expanded, format) def get( - self, url: t.Optional[t.List[str]] = None, depth: int = -1, expanded: bool = False, format: t.Optional[str] = None + self, + url: t.Optional[t.List[str]] = None, + depth: int = -1, + expanded: bool = False, + format: t.Optional[str] = None, ) -> t.Union[str, G]: output = self._get(url, depth, expanded, format, get_node=False) assert not isinstance(output, INode) @@ -164,7 +168,7 @@ def load( url: t.Optional[t.List[str]] = None, depth: int = -1, expanded: bool = False, - format: Optional[str] = None, + format: t.Optional[str] = None, ) -> G: """ Fetch data on disk. diff --git a/tests/storage/repository/filesystem/test_lazy_node.py b/tests/storage/repository/filesystem/test_lazy_node.py index 582270f605..8f11874a25 100644 --- a/tests/storage/repository/filesystem/test_lazy_node.py +++ b/tests/storage/repository/filesystem/test_lazy_node.py @@ -180,7 +180,7 @@ def test_rename_file(tmp_path: Path, target_is_link: bool): assert file.read_text() == content assert not link.exists() assert not renaming_file.exists() - resolver.resolve.assert_called_once_with(content) + resolver.resolve.assert_called_once_with(content, format="json") node.rename_file(target) @@ -235,7 +235,7 @@ def test_copy_file(tmp_path: Path, target_is_link: bool): assert file.read_text() == content assert not link.exists() assert not copied_file.exists() - resolver.resolve.assert_called_once_with(content) + resolver.resolve.assert_called_once_with(content, format="json") node.copy_file(target) From beb23fc89b954d6e7736e8b1c4b1beec8645456e Mon Sep 17 00:00:00 2001 From: belthlemar Date: Fri, 26 Jul 2024 18:13:15 +0200 Subject: [PATCH 20/22] resolve issue with dev --- antarest/study/business/binding_constraint_management.py | 9 ++++++++- .../study/storage/rawstudy/model/filesystem/lazy_node.py | 2 +- 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/antarest/study/business/binding_constraint_management.py b/antarest/study/business/binding_constraint_management.py index 60affd437e..de4abea242 100644 --- a/antarest/study/business/binding_constraint_management.py +++ b/antarest/study/business/binding_constraint_management.py @@ -337,10 +337,17 @@ def _get_references_by_widths( The height of the matrices may vary depending on the time step, but the width should be consistent within a group of binding constraints. """ + operator_matrix_file_map = { + BindingConstraintOperator.EQUAL: ["{bc_id}_eq"], + BindingConstraintOperator.GREATER: ["{bc_id}_gt"], + BindingConstraintOperator.LESS: ["{bc_id}_lt"], + BindingConstraintOperator.BOTH: ["{bc_id}_lt", "{bc_id}_gt"], + } + references_by_width: t.Dict[int, t.List[t.Tuple[str, str]]] = {} _total = len(bcs) for _index, bc in enumerate(bcs): - matrices_name = OPERATOR_CONFLICT_MAP[bc.operator] if file_study.config.version >= 870 else ["{bc_id}"] + matrices_name = operator_matrix_file_map[bc.operator] if file_study.config.version >= 870 else ["{bc_id}"] for matrix_name in matrices_name: matrix_id = matrix_name.format(bc_id=bc.id) logger.info(f"⏲ Validating BC '{bc.id}': {matrix_id=} [{_index+1}/{_total}]") diff --git a/antarest/study/storage/rawstudy/model/filesystem/lazy_node.py b/antarest/study/storage/rawstudy/model/filesystem/lazy_node.py index e784a4b093..400b638e8a 100644 --- a/antarest/study/storage/rawstudy/model/filesystem/lazy_node.py +++ b/antarest/study/storage/rawstudy/model/filesystem/lazy_node.py @@ -133,7 +133,7 @@ def save(self, data: t.Union[str, bytes, S], url: t.Optional[t.List[str]] = None self._assert_not_in_zipped_file() self._assert_url_end(url) - if isinstance(data, str) and self.context.resolver.resolve(data, format="json"): + if isinstance(data, str) and self.context.resolver.resolve(uri=data, format='json'): self.get_link_path().write_text(data) if self.config.path.exists(): self.config.path.unlink() From 8311d899b4b985bc688eba2eec7f0c5b55e312c1 Mon Sep 17 00:00:00 2001 From: belthlemar Date: Fri, 26 Jul 2024 18:14:33 +0200 Subject: [PATCH 21/22] fix tets --- antarest/study/storage/rawstudy/model/filesystem/lazy_node.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/antarest/study/storage/rawstudy/model/filesystem/lazy_node.py b/antarest/study/storage/rawstudy/model/filesystem/lazy_node.py index 400b638e8a..c2fb90b673 100644 --- a/antarest/study/storage/rawstudy/model/filesystem/lazy_node.py +++ b/antarest/study/storage/rawstudy/model/filesystem/lazy_node.py @@ -133,7 +133,7 @@ def save(self, data: t.Union[str, bytes, S], url: t.Optional[t.List[str]] = None self._assert_not_in_zipped_file() self._assert_url_end(url) - if isinstance(data, str) and self.context.resolver.resolve(uri=data, format='json'): + if isinstance(data, str) and self.context.resolver.resolve(data, format='json'): self.get_link_path().write_text(data) if self.config.path.exists(): self.config.path.unlink() From 0a7ccf3bdcbac4cd67e306f50ab4a0848e6ef1b2 Mon Sep 17 00:00:00 2001 From: belthlemar Date: Fri, 26 Jul 2024 18:16:47 +0200 Subject: [PATCH 22/22] fix lint --- antarest/study/storage/rawstudy/model/filesystem/lazy_node.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/antarest/study/storage/rawstudy/model/filesystem/lazy_node.py b/antarest/study/storage/rawstudy/model/filesystem/lazy_node.py index c2fb90b673..e784a4b093 100644 --- a/antarest/study/storage/rawstudy/model/filesystem/lazy_node.py +++ b/antarest/study/storage/rawstudy/model/filesystem/lazy_node.py @@ -133,7 +133,7 @@ def save(self, data: t.Union[str, bytes, S], url: t.Optional[t.List[str]] = None self._assert_not_in_zipped_file() self._assert_url_end(url) - if isinstance(data, str) and self.context.resolver.resolve(data, format='json'): + if isinstance(data, str) and self.context.resolver.resolve(data, format="json"): self.get_link_path().write_text(data) if self.config.path.exists(): self.config.path.unlink()