diff --git a/antarest/launcher/extensions/adequacy_patch/extension.py b/antarest/launcher/extensions/adequacy_patch/extension.py index d6499375e8..2614956522 100644 --- a/antarest/launcher/extensions/adequacy_patch/extension.py +++ b/antarest/launcher/extensions/adequacy_patch/extension.py @@ -97,9 +97,7 @@ def after_export_flat_hook( study = self.study_service.storage_service.raw_study_service.study_factory.create_from_fs( study_export_path, study_id, use_cache=False ) - user_config = study.tree.get( - ["user"], - ) + user_config = study.tree.get(["user"]) assert_this("flowbased" in user_config) adequacy_patch_config = yaml.safe_load(cast(bytes, study.tree.get(["user", "adequacypatch", "config.yml"]))) assert_this("areas" in adequacy_patch_config) diff --git a/antarest/matrixstore/uri_resolver_service.py b/antarest/matrixstore/uri_resolver_service.py index 01717c57bd..4af7e36eff 100644 --- a/antarest/matrixstore/uri_resolver_service.py +++ b/antarest/matrixstore/uri_resolver_service.py @@ -1,9 +1,9 @@ +import io import re -from typing import Optional, Tuple +import typing as t import pandas as pd -from antarest.core.model import SUB_JSON from antarest.matrixstore.service import ISimpleMatrixService @@ -11,7 +11,7 @@ class UriResolverService: def __init__(self, matrix_service: ISimpleMatrixService): self.matrix_service = matrix_service - def resolve(self, uri: str, formatted: bool = True) -> SUB_JSON: + def resolve(self, uri: str, format: t.Optional[str] = None) -> t.Union[bytes, str, t.Dict[str, t.Any], None]: res = UriResolverService._extract_uri_components(uri) if res: protocol, uuid = res @@ -19,11 +19,11 @@ def resolve(self, uri: str, formatted: bool = True) -> SUB_JSON: return None if protocol == "matrix": - return self._resolve_matrix(uuid, formatted) + return self._resolve_matrix(uuid, format) raise NotImplementedError(f"protocol {protocol} not implemented") @staticmethod - def _extract_uri_components(uri: str) -> Optional[Tuple[str, str]]: + def _extract_uri_components(uri: str) -> t.Optional[t.Tuple[str, str]]: match = re.match(r"^(\w+)://(.+)$", uri) if not match: return None @@ -33,14 +33,14 @@ def _extract_uri_components(uri: str) -> Optional[Tuple[str, str]]: return protocol, uuid @staticmethod - def extract_id(uri: str) -> Optional[str]: + def extract_id(uri: str) -> t.Optional[str]: res = UriResolverService._extract_uri_components(uri) return res[1] if res else None - def _resolve_matrix(self, id: str, formatted: bool = True) -> SUB_JSON: + def _resolve_matrix(self, id: str, format: t.Optional[str] = None) -> t.Union[bytes, str, t.Dict[str, t.Any]]: data = self.matrix_service.get(id) if data: - if formatted: + if format == "json": return { "data": data.data, "index": data.index, @@ -54,6 +54,12 @@ def _resolve_matrix(self, id: str, formatted: bool = True) -> SUB_JSON: ) if df.empty: return "" + elif format == "arrow": + with io.BytesIO() as buffer: + df.columns = df.columns.map(str) + df.to_feather(buffer, compression="uncompressed") + return buffer.getvalue() + else: csv = df.to_csv( None, diff --git a/antarest/study/business/areas/renewable_management.py b/antarest/study/business/areas/renewable_management.py index 1009c9d22c..773d5b6591 100644 --- a/antarest/study/business/areas/renewable_management.py +++ b/antarest/study/business/areas/renewable_management.py @@ -352,7 +352,7 @@ def duplicate_cluster( # Prepare and execute commands storage_service = self.storage_service.get_storage(study) command_context = self.storage_service.variant_study_service.command_factory.command_context - current_matrix = storage_service.get(study, source_path)["data"] + current_matrix = storage_service.get(study, source_path, format="json")["data"] replace_matrix_cmd = ReplaceMatrix(target=new_path, matrix=current_matrix, command_context=command_context) commands = [create_cluster_cmd, replace_matrix_cmd] diff --git a/antarest/study/business/areas/st_storage_management.py b/antarest/study/business/areas/st_storage_management.py index 776f57a039..64774484aa 100644 --- a/antarest/study/business/areas/st_storage_management.py +++ b/antarest/study/business/areas/st_storage_management.py @@ -565,7 +565,7 @@ def duplicate_cluster(self, study: Study, area_id: str, source_id: str, new_clus storage_service = self.storage_service.get_storage(study) command_context = self.storage_service.variant_study_service.command_factory.command_context for source_path, new_path in zip(source_paths, new_paths): - current_matrix = storage_service.get(study, source_path)["data"] + current_matrix = storage_service.get(study, source_path, format="json")["data"] command = ReplaceMatrix(target=new_path, matrix=current_matrix, command_context=command_context) commands.append(command) @@ -605,7 +605,7 @@ def _get_matrix_obj( file_study = self._get_file_study(study) path = _STORAGE_SERIES_PATH.format(area_id=area_id, storage_id=storage_id, ts_name=ts_name) try: - matrix = file_study.tree.get(path.split("/"), depth=1) + matrix = file_study.tree.get(path.split("/"), depth=1, format="json") except KeyError: raise STStorageMatrixNotFound(path) from None return matrix diff --git a/antarest/study/business/areas/thermal_management.py b/antarest/study/business/areas/thermal_management.py index 205965eb54..adbbd0b544 100644 --- a/antarest/study/business/areas/thermal_management.py +++ b/antarest/study/business/areas/thermal_management.py @@ -433,7 +433,7 @@ def duplicate_cluster( storage_service = self.storage_service.get_storage(study) command_context = self.storage_service.variant_study_service.command_factory.command_context for source_path, new_path in zip(source_paths, new_paths): - current_matrix = storage_service.get(study, source_path)["data"] + current_matrix = storage_service.get(study, source_path, format="json")["data"] command = ReplaceMatrix(target=new_path, matrix=current_matrix, command_context=command_context) commands.append(command) @@ -451,7 +451,7 @@ def validate_series(self, study: Study, area_id: str, cluster_id: str) -> bool: ts_widths: t.MutableMapping[int, t.MutableSequence[str]] = {} for ts_path in series_path: - matrix = self.storage_service.get_storage(study).get(study, ts_path.as_posix()) + matrix = self.storage_service.get_storage(study).get(study, ts_path.as_posix(), format="json") matrix_data = matrix["data"] matrix_height = len(matrix_data) # We ignore empty matrices as there are default matrices for the simulator. diff --git a/antarest/study/business/binding_constraint_management.py b/antarest/study/business/binding_constraint_management.py index 50220da54a..de4abea242 100644 --- a/antarest/study/business/binding_constraint_management.py +++ b/antarest/study/business/binding_constraint_management.py @@ -351,7 +351,7 @@ def _get_references_by_widths( for matrix_name in matrices_name: matrix_id = matrix_name.format(bc_id=bc.id) logger.info(f"⏲ Validating BC '{bc.id}': {matrix_id=} [{_index+1}/{_total}]") - obj = file_study.tree.get(url=["input", "bindingconstraints", matrix_id]) + obj = file_study.tree.get(url=["input", "bindingconstraints", matrix_id], format="json") matrix = np.array(obj["data"], dtype=float) # We ignore empty matrices as there are default matrices for the simulator. if not matrix.size: diff --git a/antarest/study/business/correlation_management.py b/antarest/study/business/correlation_management.py index b9abcff2f2..035122a899 100644 --- a/antarest/study/business/correlation_management.py +++ b/antarest/study/business/correlation_management.py @@ -187,7 +187,7 @@ def _get_array( file_study: FileStudy, area_ids: Sequence[str], ) -> npt.NDArray[np.float64]: - correlation_cfg = file_study.tree.get(self.url, depth=3) + correlation_cfg = file_study.tree.get(self.url, depth=3, format="json") return _config_to_array(area_ids, correlation_cfg) def _set_array( diff --git a/antarest/study/business/xpansion_management.py b/antarest/study/business/xpansion_management.py index 66d25860dd..d52cdc0ec1 100644 --- a/antarest/study/business/xpansion_management.py +++ b/antarest/study/business/xpansion_management.py @@ -691,7 +691,7 @@ def get_resource_content( ) -> t.Union[JSON, bytes]: logger.info(f"Getting xpansion {resource_type} resource file '{filename}' from study '{study.id}'") file_study = self.study_storage_service.get_storage(study).get_raw(study) - return file_study.tree.get(self._raw_file_dir(resource_type) + [filename]) + return file_study.tree.get(url=self._raw_file_dir(resource_type) + [filename], format="json") def list_resources(self, study: Study, resource_type: XpansionResourceFileType) -> t.List[str]: logger.info(f"Getting all xpansion {resource_type} files from study '{study.id}'") diff --git a/antarest/study/common/studystorage.py b/antarest/study/common/studystorage.py index 94664ef339..96d8e34a75 100644 --- a/antarest/study/common/studystorage.py +++ b/antarest/study/common/studystorage.py @@ -30,7 +30,7 @@ def get( metadata: T, url: str = "", depth: int = 3, - formatted: bool = True, + format: t.Optional[str] = None, ) -> JSON: """ Entry point to fetch data inside study. @@ -38,7 +38,7 @@ def get( metadata: study url: path data inside study to reach depth: tree depth to reach after reach data path - formatted: indicate if raw files must be parsed and formatted + format: Indicates the file return format. Can be 'json', 'arrow' or None. If None, the file will be returned as is. Returns: study data formatted in json diff --git a/antarest/study/service.py b/antarest/study/service.py index 1143b3a309..b669ac60a1 100644 --- a/antarest/study/service.py +++ b/antarest/study/service.py @@ -14,6 +14,8 @@ import numpy as np import pandas as pd +import pyarrow as pa +import pyarrow.feather as feather from fastapi import HTTPException, UploadFile from markupsafe import escape from starlette.responses import FileResponse, Response @@ -343,21 +345,14 @@ def _on_study_delete(self, uuid: str) -> None: for callback in self.on_deletion_callbacks: callback(uuid) - def get( - self, - uuid: str, - url: str, - depth: int, - formatted: bool, - params: RequestParameters, - ) -> JSON: + def get(self, uuid: str, url: str, depth: int, params: RequestParameters, format: t.Optional[str] = None) -> JSON: """ Get study data inside filesystem Args: uuid: study uuid url: route to follow inside study structure depth: depth to expand tree when route matched - formatted: indicate if raw files must be parsed and formatted + format: Indicates the file return format. Can be 'json', 'arrow' or None. If None, the file will be returned as is. params: request parameters Returns: data study formatted in json @@ -366,7 +361,7 @@ def get( study = self.get_study(uuid) assert_permission(params.user, study, StudyPermissionType.READ) - return self.storage_service.get_storage(study).get(study, url, depth, formatted) + return self.storage_service.get_storage(study).get(study, url, depth, format) def aggregate_output_data( self, @@ -439,7 +434,7 @@ def get_logs( try: log = t.cast( bytes, - file_study.tree.get(log_location, depth=1, formatted=True), + file_study.tree.get(log_location, depth=1, format="json"), ).decode(encoding="utf-8") # when missing file, RawFileNode return empty bytes if log: @@ -1481,9 +1476,15 @@ def _create_edit_study_command( ) elif isinstance(tree_node, InputSeriesMatrix): if isinstance(data, bytes): - # noinspection PyTypeChecker - matrix = np.loadtxt(io.BytesIO(data), delimiter="\t", dtype=np.float64, ndmin=2) - matrix = matrix.reshape((1, 0)) if matrix.size == 0 else matrix + # checks if it corresponds to arrow format or if it's a classic file. + if data[:5].decode("utf-8") == "ARROW": + buffer = pa.BufferReader(data) # type: ignore + table = feather.read_table(buffer) + df = table.to_pandas() + matrix = df.to_numpy() + else: + matrix = np.loadtxt(io.BytesIO(data), delimiter="\t", dtype=np.float64, ndmin=2) + matrix = matrix.reshape((1, 0)) if matrix.size == 0 else matrix return ReplaceMatrix( target=url, matrix=matrix.tolist(), @@ -2583,7 +2584,7 @@ def get_matrix_with_index_and_header( hydro_matrix = self.correlation_manager.get_correlation_matrix(all_areas, study, []) # type: ignore return pd.DataFrame(data=hydro_matrix.data, columns=hydro_matrix.columns, index=hydro_matrix.index) - matrix_obj = self.get(study_id, path, depth=3, formatted=True, params=parameters) + matrix_obj = self.get(study_id, path, depth=3, format="json", params=parameters) if set(matrix_obj) != {"data", "index", "columns"}: raise IncorrectPathError(f"The provided path does not point to a valid matrix: '{path}'") if not matrix_obj["data"]: diff --git a/antarest/study/storage/abstract_storage_service.py b/antarest/study/storage/abstract_storage_service.py index 892f855970..53e9ff94da 100644 --- a/antarest/study/storage/abstract_storage_service.py +++ b/antarest/study/storage/abstract_storage_service.py @@ -117,12 +117,7 @@ def get_study_information( ) def get( - self, - metadata: T, - url: str = "", - depth: int = 3, - formatted: bool = True, - use_cache: bool = True, + self, metadata: T, url: str = "", depth: int = 3, format: t.Optional[str] = None, use_cache: bool = True ) -> JSON: """ Entry point to fetch data inside study. @@ -130,7 +125,7 @@ def get( metadata: study url: path data inside study to reach depth: tree depth to reach after reach data path - formatted: indicate if raw files must be parsed and formatted + format: Indicates the file return format. Can be 'json', 'arrow' or None. If None, the file will be returned as is. use_cache: indicate if the cache must be used Returns: study data formatted in json @@ -149,11 +144,11 @@ def get( logger.info(f"Raw Study {metadata.id} read from cache") data = from_cache else: - data = study.tree.get(parts, depth=depth, formatted=formatted) + data = study.tree.get(parts, depth=depth, format=format) self.cache.put(cache_id, data) logger.info(f"Cache new entry from RawStudyService (studyID: {metadata.id})") else: - data = study.tree.get(parts, depth=depth, formatted=formatted) + data = study.tree.get(parts, depth=depth, format=format) del study return data diff --git a/antarest/study/storage/rawstudy/model/filesystem/folder_node.py b/antarest/study/storage/rawstudy/model/filesystem/folder_node.py index ba1d859ce3..137f0a5ffb 100644 --- a/antarest/study/storage/rawstudy/model/filesystem/folder_node.py +++ b/antarest/study/storage/rawstudy/model/filesystem/folder_node.py @@ -45,7 +45,7 @@ def _forward_get( self, url: t.List[str], depth: int = -1, - formatted: bool = True, + format: t.Optional[str] = None, get_node: bool = False, ) -> t.Union[JSON, INode[JSON, SUB_JSON, JSON]]: children = self.build() @@ -55,7 +55,7 @@ def _forward_get( if len(names) == 1: child = children[names[0]] if not get_node: - return child.get(sub_url, depth=depth, expanded=False, formatted=formatted) # type: ignore + return child.get(sub_url, depth=depth, expanded=False, format=format) # type: ignore else: return child.get_node( sub_url, @@ -63,20 +63,12 @@ def _forward_get( # many items asked or * asked else: if not get_node: - return { - key: children[key].get( - sub_url, - depth=depth, - expanded=False, - formatted=formatted, - ) - for key in names - } + return {key: children[key].get(sub_url, depth=depth, expanded=False, format=format) for key in names} else: raise ValueError("Multiple nodes requested") def _expand_get( - self, depth: int = -1, formatted: bool = True, get_node: bool = False + self, depth: int = -1, format: t.Optional[str] = None, get_node: bool = False ) -> t.Union[JSON, INode[JSON, SUB_JSON, JSON]]: if get_node: return self @@ -86,7 +78,7 @@ def _expand_get( if depth == 0: return {} return { - name: node.get(depth=depth - 1, expanded=True, formatted=formatted) if depth != 1 else {} + name: node.get(depth=depth - 1, expanded=True, format=format) if depth != 1 else {} for name, node in children.items() } @@ -94,22 +86,22 @@ def _get( self, url: t.Optional[t.List[str]] = None, depth: int = -1, - formatted: bool = True, + format: t.Optional[str] = None, get_node: bool = False, ) -> t.Union[JSON, INode[JSON, SUB_JSON, JSON]]: if url and url != [""]: - return self._forward_get(url, depth, formatted, get_node) + return self._forward_get(url, depth, format, get_node) else: - return self._expand_get(depth, formatted, get_node) + return self._expand_get(depth, format, get_node) def get( self, url: t.Optional[t.List[str]] = None, depth: int = -1, expanded: bool = False, - formatted: bool = True, + format: t.Optional[str] = None, ) -> JSON: - output = self._get(url=url, depth=depth, formatted=formatted, get_node=False) + output = self._get(url=url, depth=depth, format=format, get_node=False) assert not isinstance(output, INode) return output diff --git a/antarest/study/storage/rawstudy/model/filesystem/ini_file_node.py b/antarest/study/storage/rawstudy/model/filesystem/ini_file_node.py index b2ba1c0646..85f4a7dcc4 100644 --- a/antarest/study/storage/rawstudy/model/filesystem/ini_file_node.py +++ b/antarest/study/storage/rawstudy/model/filesystem/ini_file_node.py @@ -152,7 +152,7 @@ def get( url: t.Optional[t.List[str]] = None, depth: int = -1, expanded: bool = False, - formatted: bool = True, + format: t.Optional[str] = None, ) -> SUB_JSON: output = self._get(url, depth, expanded, get_node=False) assert not isinstance(output, INode) diff --git a/antarest/study/storage/rawstudy/model/filesystem/inode.py b/antarest/study/storage/rawstudy/model/filesystem/inode.py index e3eff7feee..b5771903de 100644 --- a/antarest/study/storage/rawstudy/model/filesystem/inode.py +++ b/antarest/study/storage/rawstudy/model/filesystem/inode.py @@ -25,7 +25,7 @@ def get( url: Optional[List[str]] = None, depth: int = -1, expanded: bool = False, - formatted: bool = True, + format: Optional[str] = None, ) -> G: """ Ask data inside tree. @@ -34,7 +34,7 @@ def get( url: data path to retrieve depth: after url is reached, node expand tree until matches depth asked expanded: context parameter to determine if current node become from a expansion - formatted: ask for raw file transformation (for matrix) + format: ask for raw file transformation (for matrix) Returns: json diff --git a/antarest/study/storage/rawstudy/model/filesystem/lazy_node.py b/antarest/study/storage/rawstudy/model/filesystem/lazy_node.py index 7e47affbc9..e784a4b093 100644 --- a/antarest/study/storage/rawstudy/model/filesystem/lazy_node.py +++ b/antarest/study/storage/rawstudy/model/filesystem/lazy_node.py @@ -63,7 +63,7 @@ def _get( url: t.Optional[t.List[str]] = None, depth: int = -1, expanded: bool = False, - formatted: bool = True, + format: t.Optional[str] = None, get_node: bool = False, ) -> t.Union[t.Union[str, G], INode[G, S, V]]: self._assert_url_end(url) @@ -76,21 +76,21 @@ def _get( if expanded: return link else: - return t.cast(G, self.context.resolver.resolve(link, formatted)) + return t.cast(G, self.context.resolver.resolve(link, format)) if expanded: return self.get_lazy_content() else: - return self.load(url, depth, expanded, formatted) + return self.load(url, depth, expanded, format) def get( self, url: t.Optional[t.List[str]] = None, depth: int = -1, expanded: bool = False, - formatted: bool = True, + format: t.Optional[str] = None, ) -> t.Union[str, G]: - output = self._get(url, depth, expanded, formatted, get_node=False) + output = self._get(url, depth, expanded, format, get_node=False) assert not isinstance(output, INode) return output @@ -133,7 +133,7 @@ def save(self, data: t.Union[str, bytes, S], url: t.Optional[t.List[str]] = None self._assert_not_in_zipped_file() self._assert_url_end(url) - if isinstance(data, str) and self.context.resolver.resolve(data): + if isinstance(data, str) and self.context.resolver.resolve(data, format="json"): self.get_link_path().write_text(data) if self.config.path.exists(): self.config.path.unlink() @@ -168,7 +168,7 @@ def load( url: t.Optional[t.List[str]] = None, depth: int = -1, expanded: bool = False, - formatted: bool = True, + format: t.Optional[str] = None, ) -> G: """ Fetch data on disk. @@ -177,7 +177,7 @@ def load( url: data path to retrieve depth: after url is reached, node expand tree until matches depth asked expanded: context parameter to determine if current node comes from an expansion - formatted: ask for raw file transformation + format: ask for raw file transformation Returns: diff --git a/antarest/study/storage/rawstudy/model/filesystem/matrix/input_series_matrix.py b/antarest/study/storage/rawstudy/model/filesystem/matrix/input_series_matrix.py index a68b0f521e..d84851c53d 100644 --- a/antarest/study/storage/rawstudy/model/filesystem/matrix/input_series_matrix.py +++ b/antarest/study/storage/rawstudy/model/filesystem/matrix/input_series_matrix.py @@ -1,3 +1,4 @@ +import io import logging from pathlib import Path from typing import Any, List, Optional, Union, cast @@ -45,7 +46,8 @@ def parse( file_path: Optional[Path] = None, tmp_dir: Any = None, return_dataframe: bool = False, - ) -> Union[JSON, pd.DataFrame]: + format: Optional[str] = None, + ) -> Union[JSON, bytes, pd.DataFrame]: file_path = file_path or self.config.path try: # sourcery skip: extract-method @@ -53,7 +55,7 @@ def parse( link_path = self.get_link_path() if link_path.exists(): link = link_path.read_text() - matrix_json = self.context.resolver.resolve(link) + matrix_json = self.context.resolver.resolve(link, format="json") matrix_json = cast(JSON, matrix_json) matrix: pd.DataFrame = pd.DataFrame(**matrix_json) else: @@ -73,14 +75,21 @@ def parse( raise ChildNotFoundError(f"File '{relpath}' not found in the study '{study_id}'") from e stopwatch.log_elapsed(lambda x: logger.info(f"Matrix parsed in {x}s")) - matrix.dropna(how="any", axis=1, inplace=True) + matrix = matrix.dropna(how="any", axis=1) if return_dataframe: return matrix - data = cast(JSON, matrix.to_dict(orient="split")) - stopwatch.log_elapsed(lambda x: logger.info(f"Matrix to dict in {x}s")) + if format == "json": + matrix_json = cast(JSON, matrix.to_dict(orient="split")) + stopwatch.log_elapsed(lambda x: logger.info(f"Matrix to dict in {x}s")) + return matrix_json + + with io.BytesIO() as buffer: + matrix.columns = matrix.columns.map(str) + matrix.to_feather(buffer, compression="uncompressed") + stopwatch.log_elapsed(lambda x: logger.info(f"Matrix to arrow in {x}s")) + return buffer.getvalue() - return data except EmptyDataError: logger.warning(f"Empty file found when parsing {file_path}") matrix = pd.DataFrame(self.default_empty) diff --git a/antarest/study/storage/rawstudy/model/filesystem/matrix/matrix.py b/antarest/study/storage/rawstudy/model/filesystem/matrix/matrix.py index 904d57c115..aec754146f 100644 --- a/antarest/study/storage/rawstudy/model/filesystem/matrix/matrix.py +++ b/antarest/study/storage/rawstudy/model/filesystem/matrix/matrix.py @@ -75,7 +75,8 @@ def normalize(self) -> None: if self.get_link_path().exists() or self.config.zip_path: return - matrix = self.parse() + matrix = self.parse(format="json") + assert isinstance(matrix, dict) if "data" in matrix: data = cast(List[List[float]], matrix["data"]) @@ -95,22 +96,18 @@ def denormalize(self) -> None: # noinspection SpellCheckingInspection logger.info(f"Denormalizing matrix {self.config.path}") uuid = self.get_link_path().read_text() - matrix = self.context.resolver.resolve(uuid) + matrix = self.context.resolver.resolve(uuid, format="json") if not matrix or not isinstance(matrix, dict): raise DenormalizationException(f"Failed to retrieve original matrix for {self.config.path}") self.dump(matrix) self.get_link_path().unlink() - def load( - self, - url: Optional[List[str]] = None, - depth: int = -1, - expanded: bool = False, - formatted: bool = True, - ) -> Union[bytes, JSON]: + def load( # type: ignore + self, url: Optional[List[str]] = None, depth: int = -1, expanded: bool = False, format: Optional[str] = None + ) -> Union[bytes, JSON, pd.DataFrame]: file_path, tmp_dir = self._get_real_file_path() - if not formatted: + if not format: if file_path.exists(): return file_path.read_bytes() @@ -119,7 +116,10 @@ def load( tmp_dir.cleanup() return b"" - return cast(JSON, self.parse(file_path, tmp_dir)) + result = self.parse(file_path, tmp_dir, False, format) + if format == "json": + return cast(JSON, result) + return result @abstractmethod def parse( @@ -127,7 +127,8 @@ def parse( file_path: Optional[Path] = None, tmp_dir: Any = None, return_dataframe: bool = False, - ) -> Union[JSON, pd.DataFrame]: + format: Optional[str] = None, + ) -> Union[JSON, bytes, pd.DataFrame]: """ Parse the matrix content """ diff --git a/antarest/study/storage/rawstudy/model/filesystem/matrix/output_series_matrix.py b/antarest/study/storage/rawstudy/model/filesystem/matrix/output_series_matrix.py index 70317c6255..8ef522cb8e 100644 --- a/antarest/study/storage/rawstudy/model/filesystem/matrix/output_series_matrix.py +++ b/antarest/study/storage/rawstudy/model/filesystem/matrix/output_series_matrix.py @@ -1,3 +1,4 @@ +import io import logging from pathlib import Path from typing import Any, List, Optional, Union, cast @@ -86,13 +87,17 @@ def parse_dataframe( matrix.columns = body.columns return matrix - def parse( - self, - file_path: Optional[Path] = None, - tmp_dir: Any = None, - ) -> JSON: + def parse(self, file_path: Path, tmp_dir: Any, format: Optional[str] = None) -> Union[JSON, bytes]: matrix = self.parse_dataframe(file_path, tmp_dir) - return cast(JSON, matrix.to_dict(orient="split")) + if format == "json": + return cast(JSON, matrix.to_dict(orient="split")) + else: + with io.BytesIO() as buffer: + matrix.columns = matrix.columns.map(str) + matrix.reset_index(inplace=True) + matrix.rename(columns={matrix.columns[0]: "Index"}, inplace=True) + matrix.to_feather(buffer, compression="uncompressed") + return buffer.getvalue() def check_errors( self, @@ -108,15 +113,11 @@ def check_errors( return errors def load( - self, - url: Optional[List[str]] = None, - depth: int = -1, - expanded: bool = False, - formatted: bool = True, + self, url: Optional[List[str]] = None, depth: int = -1, expanded: bool = False, format: Optional[str] = None ) -> Union[bytes, JSON]: try: file_path, tmp_dir = self._get_real_file_path() - if not formatted: + if not format: if file_path.exists(): file_content = file_path.read_bytes() if tmp_dir: @@ -130,7 +131,7 @@ def load( if not file_path.exists(): raise FileNotFoundError(file_path) - return self.parse(file_path, tmp_dir) + return self.parse(file_path, tmp_dir, format) except FileNotFoundError as e: raise ChildNotFoundError( f"Output file '{self.config.path.name}' not found in study {self.config.study_id}" diff --git a/antarest/study/storage/rawstudy/model/filesystem/raw_file_node.py b/antarest/study/storage/rawstudy/model/filesystem/raw_file_node.py index 57770ce02d..031cf6c462 100644 --- a/antarest/study/storage/rawstudy/model/filesystem/raw_file_node.py +++ b/antarest/study/storage/rawstudy/model/filesystem/raw_file_node.py @@ -25,11 +25,7 @@ def get_lazy_content( return f"file://{self.config.path.name}" def load( - self, - url: Optional[List[str]] = None, - depth: int = -1, - expanded: bool = False, - formatted: bool = True, + self, url: Optional[List[str]] = None, depth: int = -1, expanded: bool = False, format: Optional[str] = None ) -> bytes: file_path, tmp_dir = self._get_real_file_path() diff --git a/antarest/study/storage/rawstudy/model/filesystem/root/input/areas/list.py b/antarest/study/storage/rawstudy/model/filesystem/root/input/areas/list.py index 0e84498e1d..e35f80a3ba 100644 --- a/antarest/study/storage/rawstudy/model/filesystem/root/input/areas/list.py +++ b/antarest/study/storage/rawstudy/model/filesystem/root/input/areas/list.py @@ -16,21 +16,11 @@ def __init__(self, context: ContextServer, config: FileStudyTreeConfig): super().__init__(config) self.context = context - def get_node( - self, - url: Optional[List[str]] = None, - depth: int = -1, - expanded: bool = False, - formatted: bool = True, - ) -> INode[List[str], List[str], List[str]]: + def get_node(self, url: Optional[List[str]] = None) -> INode[List[str], List[str], List[str]]: return self def get( - self, - url: Optional[List[str]] = None, - depth: int = -1, - expanded: bool = False, - formatted: bool = True, + self, url: Optional[List[str]] = None, depth: int = -1, expanded: bool = False, format: Optional[str] = None ) -> List[str]: if self.config.zip_path: path, tmp_dir = self._extract_file_to_tmp_dir() diff --git a/antarest/study/storage/rawstudy/model/filesystem/root/output/simulation/mode/mcall/grid.py b/antarest/study/storage/rawstudy/model/filesystem/root/output/simulation/mode/mcall/grid.py index 9a542e0c97..812b1246c5 100644 --- a/antarest/study/storage/rawstudy/model/filesystem/root/output/simulation/mode/mcall/grid.py +++ b/antarest/study/storage/rawstudy/model/filesystem/root/output/simulation/mode/mcall/grid.py @@ -38,7 +38,7 @@ def load( url: t.Optional[t.List[str]] = None, depth: int = -1, expanded: bool = False, - formatted: bool = True, + format: t.Optional[str] = None, ) -> JSON: file_path = self.config.path df = pd.read_csv(file_path, sep="\t") @@ -74,7 +74,7 @@ def load( url: t.Optional[t.List[str]] = None, depth: int = -1, expanded: bool = False, - formatted: bool = True, + format: t.Optional[str] = None, ) -> JSON: file_path = self.config.path with open(file_path, "r") as f: diff --git a/antarest/study/storage/rawstudy/model/filesystem/root/output/simulation/ts_numbers/ts_numbers_data.py b/antarest/study/storage/rawstudy/model/filesystem/root/output/simulation/ts_numbers/ts_numbers_data.py index 90240b04c7..991a8ace08 100644 --- a/antarest/study/storage/rawstudy/model/filesystem/root/output/simulation/ts_numbers/ts_numbers_data.py +++ b/antarest/study/storage/rawstudy/model/filesystem/root/output/simulation/ts_numbers/ts_numbers_data.py @@ -9,11 +9,7 @@ class TsNumbersVector(LazyNode[List[int], List[int], JSON]): def load( - self, - url: Optional[List[str]] = None, - depth: int = -1, - expanded: bool = False, - formatted: bool = True, + self, url: Optional[List[str]] = None, depth: int = -1, expanded: bool = False, format: Optional[str] = None ) -> List[int]: file_path, tmp_dir = self._get_real_file_path() diff --git a/antarest/study/storage/variantstudy/business/command_extractor.py b/antarest/study/storage/variantstudy/business/command_extractor.py index 4ac5070a69..8e152a94e5 100644 --- a/antarest/study/storage/variantstudy/business/command_extractor.py +++ b/antarest/study/storage/variantstudy/business/command_extractor.py @@ -365,7 +365,7 @@ def extract_binding_constraint( matrices: t.Dict[str, t.List[t.List[float]]] = {} for name, url in urls.items(): - matrix = study_tree.get(url) + matrix = study_tree.get(url, format="json") if matrix is not None: matrices[name] = matrix["data"] @@ -417,7 +417,7 @@ def generate_replace_matrix( url: t.List[str], default_value: t.Optional[str] = None, ) -> ICommand: - data = study_tree.get(url) + data = study_tree.get(url, format="json") if isinstance(data, str): matrix = data elif isinstance(data, dict): diff --git a/antarest/study/storage/variantstudy/variant_study_service.py b/antarest/study/storage/variantstudy/variant_study_service.py index a0c03a8457..cb9698b794 100644 --- a/antarest/study/storage/variantstudy/variant_study_service.py +++ b/antarest/study/storage/variantstudy/variant_study_service.py @@ -468,7 +468,7 @@ def get( metadata: VariantStudy, url: str = "", depth: int = 3, - formatted: bool = True, + format: t.Optional[str] = None, use_cache: bool = True, ) -> JSON: """ @@ -477,20 +477,14 @@ def get( metadata: study url: path data inside study to reach depth: tree depth to reach after reach data path - formatted: indicate if raw files must be parsed and formatted + format: Indicates the file return format. Can be either Can be 'json', 'arrow' or None. If None, the file will be returned as is. use_cache: indicate if cache should be used Returns: study data formatted in json """ self._safe_generation(metadata, timeout=60) self.repository.refresh(metadata) - return super().get( - metadata=metadata, - url=url, - depth=depth, - formatted=formatted, - use_cache=use_cache, - ) + return super().get(metadata=metadata, url=url, depth=depth, format=format, use_cache=use_cache) def create_variant_study(self, uuid: str, name: str, params: RequestParameters) -> VariantStudy: """ diff --git a/antarest/study/web/raw_studies_blueprint.py b/antarest/study/web/raw_studies_blueprint.py index 93c2f018dc..38da86ca3f 100644 --- a/antarest/study/web/raw_studies_blueprint.py +++ b/antarest/study/web/raw_studies_blueprint.py @@ -19,6 +19,7 @@ from antarest.core.utils.web import APITag from antarest.login.auth import Auth from antarest.study.business.aggregator_management import AreasQueryFile, LinksQueryFile +from antarest.study.business.enum_ignore_case import EnumIgnoreCase from antarest.study.service import StudyService from antarest.study.storage.df_download import TableExportFormat, export_file from antarest.study.storage.rawstudy.model.filesystem.matrix.matrix import MatrixFrequency @@ -59,6 +60,12 @@ ".json": ("application/json", "utf-8"), } + +class MatrixFormat(EnumIgnoreCase): + JSON = "json" + ARROW = "arrow" + + DEFAULT_EXPORT_FORMAT = Query(TableExportFormat.CSV, alias="format", description="Export format", title="Export Format") @@ -97,8 +104,9 @@ def get_study( path: str = Param("/", examples=get_path_examples()), # type: ignore depth: int = 3, formatted: bool = True, + format: t.Optional[MatrixFormat] = None, current_user: JWTUser = Depends(auth.get_current_user), - ) -> t.Any: + ) -> Response: """ Fetches raw data from a study, and returns the data in different formats based on the file type, or as a JSON response. @@ -107,9 +115,10 @@ def get_study( - `uuid`: The UUID of the study. - `path`: The path to the data to fetch. - `depth`: The depth of the data to retrieve. - - `formatted`: A flag specifying whether the data should be returned in a formatted manner. + - `formatted`: If false, returns the file as bytes. Else, the `format` flag applies. + - `format`: Either 'json' or 'arrow'. Arrow format is only supported by matrix files. - Returns the fetched data: a JSON object (in most cases), a plain text file + Returns the fetched data: a JSON object (in most cases), a plain text file, a matrix file in arrow format or a file attachment (Microsoft Office document, TSV/TSV file...). """ logger.info( @@ -117,12 +126,21 @@ def get_study( extra={"user": current_user.id}, ) parameters = RequestParameters(user=current_user) - output = study_service.get(uuid, path, depth=depth, formatted=formatted, params=parameters) + + _format = format or MatrixFormat.JSON + real_format = _format.value if formatted else None + + output = study_service.get(uuid, path, depth=depth, params=parameters, format=real_format) if isinstance(output, bytes): + if real_format == MatrixFormat.ARROW: + return Response(content=output, media_type="application/vnd.apache.arrow.file") + # Guess the suffix form the target data resource_path = PurePosixPath(path) - parent_cfg = study_service.get(uuid, str(resource_path.parent), depth=2, formatted=True, params=parameters) + parent_cfg = study_service.get( + uuid, str(resource_path.parent), depth=2, params=parameters, format=real_format + ) child = parent_cfg[resource_path.name] suffix = PurePosixPath(child).suffix @@ -364,7 +382,7 @@ def replace_study_file( Parameters: - `uuid`: The UUID of the study. - `path`: The path to the data to update. Defaults to "/". - - `file`: The raw file to be posted (e.g. a CSV file opened in binary mode). + - `file`: The raw file to be posted (e.g. a CSV file opened in binary mode or a matrix in arrow format). - `create_missing`: Flag to indicate whether to create file or parent directories if missing. """ logger.info( diff --git a/requirements-dev.txt b/requirements-dev.txt index e7ff79c736..808596e882 100644 --- a/requirements-dev.txt +++ b/requirements-dev.txt @@ -11,6 +11,7 @@ pyinstaller-hooks-contrib==2024.6 # of the corresponding implementation libraries used in production (in `requirements.txt`). pandas-stubs~=1.4.0 +pyarrow-stubs~=10.0.1.7 types-psycopg2~=2.9.4 types-redis~=4.1.2 types-requests~=2.27.1 diff --git a/requirements.txt b/requirements.txt index eb42c65793..e034eba343 100644 --- a/requirements.txt +++ b/requirements.txt @@ -19,6 +19,7 @@ pandas~=1.4.0 paramiko~=2.12.0 plyer~=2.0.0 psycopg2-binary==2.9.4 +pyarrow~=16.1.0 py7zr~=0.20.6 pydantic~=1.9.0 PyQt5~=5.15.6 diff --git a/tests/integration/raw_studies_blueprint/test_fetch_raw_data.py b/tests/integration/raw_studies_blueprint/test_fetch_raw_data.py index e55929c97a..777b5121f6 100644 --- a/tests/integration/raw_studies_blueprint/test_fetch_raw_data.py +++ b/tests/integration/raw_studies_blueprint/test_fetch_raw_data.py @@ -7,6 +7,8 @@ from unittest.mock import ANY import numpy as np +import pyarrow as pa +import pyarrow.feather as feather import pytest from starlette.testclient import TestClient @@ -44,7 +46,7 @@ def test_get_study( with db(): study: RawStudy = db.session.get(Study, internal_study_id) study_dir = pathlib.Path(study.path) - headers = {"Authorization": f"Bearer {user_access_token}"} + client.headers = {"Authorization": f"Bearer {user_access_token}"} shutil.copytree( ASSETS_DIR.joinpath("user"), @@ -56,11 +58,7 @@ def test_get_study( user_folder_dir = study_dir.joinpath("user/folder") for file_path in user_folder_dir.glob("*.*"): rel_path = file_path.relative_to(study_dir).as_posix() - res = client.get( - f"/v1/studies/{internal_study_id}/raw", - params={"path": rel_path, "depth": 1}, - headers=headers, - ) + res = client.get(f"/v1/studies/{internal_study_id}/raw", params={"path": rel_path, "depth": 1}) assert res.status_code == 200, res.json() if file_path.suffix == ".json": # special case for JSON files @@ -83,9 +81,7 @@ def test_get_study( for file_path in user_folder_dir.glob("*.*"): rel_path = file_path.relative_to(study_dir) res = client.get( - f"/v1/studies/{internal_study_id}/raw", - params={"path": f"/{rel_path.as_posix()}", "depth": 1}, - headers=headers, + f"/v1/studies/{internal_study_id}/raw", params={"path": f"/{rel_path.as_posix()}", "depth": 1} ) assert res.status_code == 200, res.json() actual = res.content @@ -93,11 +89,7 @@ def test_get_study( assert actual == expected # If you try to retrieve a file that doesn't exist, we should have a 404 error - res = client.get( - f"/v1/studies/{internal_study_id}/raw", - params={"path": "user/somewhere/something.txt"}, - headers=headers, - ) + res = client.get(f"/v1/studies/{internal_study_id}/raw", params={"path": "user/somewhere/something.txt"}) assert res.status_code == 404, res.json() assert res.json() == { "description": "'somewhere' not a child of User", @@ -109,7 +101,6 @@ def test_get_study( res = client.put( f"/v1/studies/{internal_study_id}/raw", params={"path": "user/somewhere/something.txt"}, - headers=headers, files={"file": io.BytesIO(b"Goodbye World!")}, ) assert res.status_code == 404, res.json() @@ -123,7 +114,6 @@ def test_get_study( res = client.put( f"/v1/studies/{internal_study_id}/raw", params={"path": "user/somewhere/something.txt", "create_missing": True}, - headers=headers, files={"file": io.BytesIO(b"Goodbye Cruel World!")}, ) assert res.status_code == 204, res.json() @@ -133,27 +123,18 @@ def test_get_study( res = client.put( f"/v1/studies/{internal_study_id}/raw", params={"path": "user/somewhere/something.txt", "create_missing": True}, - headers=headers, files={"file": io.BytesIO(b"This is the end!")}, ) assert res.status_code == 204, res.json() # You can check that the resource has been created or updated. - res = client.get( - f"/v1/studies/{internal_study_id}/raw", - params={"path": "user/somewhere/something.txt"}, - headers=headers, - ) + res = client.get(f"/v1/studies/{internal_study_id}/raw", params={"path": "user/somewhere/something.txt"}) assert res.status_code == 200, res.json() assert res.content == b"This is the end!" # If we ask for properties, we should have a JSON content rel_path = "/input/links/de/properties/fr" - res = client.get( - f"/v1/studies/{internal_study_id}/raw", - params={"path": rel_path, "depth": 2}, - headers=headers, - ) + res = client.get(f"/v1/studies/{internal_study_id}/raw", params={"path": rel_path, "depth": 2}) assert res.status_code == 200, res.json() actual = res.json() assert actual == { @@ -172,35 +153,38 @@ def test_get_study( "use-phase-shifter": False, } - # If we ask for a matrix, we should have a JSON content if formatted is True - rel_path = "/input/links/de/fr" - res = client.get( - f"/v1/studies/{internal_study_id}/raw", - params={"path": rel_path, "formatted": True}, - headers=headers, - ) + # Some files can be corrupted + user_folder_dir = study_dir.joinpath("user/bad") + for file_path in user_folder_dir.glob("*.*"): + rel_path = file_path.relative_to(study_dir) + res = client.get( + f"/v1/studies/{internal_study_id}/raw", params={"path": f"/{rel_path.as_posix()}", "depth": 1} + ) + assert res.status_code == http.HTTPStatus.UNPROCESSABLE_ENTITY + + # We can access to the configuration the classic way, + # for instance, we can get the list of areas: + res = client.get(f"/v1/studies/{internal_study_id}/raw", params={"path": "/input/areas/list", "depth": 1}) assert res.status_code == 200, res.json() - actual = res.json() - assert actual == {"index": ANY, "columns": ANY, "data": ANY} + assert res.json() == ["DE", "ES", "FR", "IT"] - # If we ask for a matrix, we should have a CSV content if formatted is False - rel_path = "/input/links/de/fr" + # asserts that the GET /raw endpoint is able to read matrix containing NaN values res = client.get( f"/v1/studies/{internal_study_id}/raw", - params={"path": rel_path, "formatted": False}, - headers=headers, + params={"path": "output/20201014-1427eco/economy/mc-all/areas/de/id-monthly"}, ) - assert res.status_code == 200, res.json() - actual = res.text - actual_lines = actual.splitlines() - first_row = [float(x) for x in actual_lines[0].split("\t")] - assert first_row == [100000, 100000, 0.01, 0.01, 0, 0, 0, 0] + assert res.status_code == 200 + assert np.isnan(res.json()["data"][0]).any() + + # Iterate over all possible combinations of path and depth (to mimic the debug view) + for path, depth in itertools.product([None, "", "/"], [0, 1, 2]): + res = client.get(f"/v1/studies/{internal_study_id}/raw", params={"path": path, "depth": depth}) + assert res.status_code == 200, f"Error for path={path} and depth={depth}" - # If ask for an empty matrix, we should have an empty binary content + # For an empty matrix, we should have an empty binary content res = client.get( f"/v1/studies/{internal_study_id}/raw", params={"path": "input/thermal/prepro/de/01_solar/data", "formatted": False}, - headers=headers, ) assert res.status_code == 200, res.json() assert res.content == b"" @@ -209,46 +193,67 @@ def test_get_study( res = client.get( f"/v1/studies/{internal_study_id}/raw", params={"path": "input/thermal/prepro/de/01_solar/data", "formatted": True}, - headers=headers, ) assert res.status_code == 200, res.json() assert res.json() == {"index": [0], "columns": [], "data": []} - # Some files can be corrupted - user_folder_dir = study_dir.joinpath("user/bad") - for file_path in user_folder_dir.glob("*.*"): - rel_path = file_path.relative_to(study_dir) - res = client.get( - f"/v1/studies/{internal_study_id}/raw", - params={"path": f"/{rel_path.as_posix()}", "depth": 1}, - headers=headers, - ) - assert res.status_code == http.HTTPStatus.UNPROCESSABLE_ENTITY + # ============================= + # MATRICES + # ============================= - # We can access to the configuration the classic way, - # for instance, we can get the list of areas: - res = client.get( - f"/v1/studies/{internal_study_id}/raw", - params={"path": "/input/areas/list", "depth": 1}, - headers=headers, - ) - assert res.status_code == 200, res.json() - assert res.json() == ["DE", "ES", "FR", "IT"] + matrix_types = { + "input": {"path": "/input/links/de/fr", "expected_row": [100000, 100000, 0.01, 0.01, 0, 0, 0, 0]}, + "output": { + "path": "/output/20201014-1422eco-hello/economy/mc-all/areas/de/id-daily", + "expected_row": [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0], + }, + } - # asserts that the GET /raw endpoint is able to read matrix containing NaN values - res = client.get( - f"/v1/studies/{internal_study_id}/raw", - params={"path": "output/20201014-1427eco/economy/mc-all/areas/de/id-monthly"}, - headers=headers, - ) - assert res.status_code == 200 - assert np.isnan(res.json()["data"][0]).any() + for matrix_type, parameters in matrix_types.items(): + path = parameters["path"] + expected_row = parameters["expected_row"] - # Iterate over all possible combinations of path and depth - for path, depth in itertools.product([None, "", "/"], [0, 1, 2]): - res = client.get( - f"/v1/studies/{internal_study_id}/raw", - params={"path": path, "depth": depth}, - headers=headers, - ) - assert res.status_code == 200, f"Error for path={path} and depth={depth}" + # We should have a JSON content if formatted is True + res = client.get(f"/v1/studies/{internal_study_id}/raw", params={"path": path, "formatted": True}) + assert res.status_code == 200, res.json() + old_result = res.json() + assert old_result == {"index": ANY, "columns": ANY, "data": ANY} + assert old_result["data"][0][:8] == expected_row + + # We should have the same result with new flag 'format' set to 'JSON' + res = client.get(f"/v1/studies/{internal_study_id}/raw", params={"path": path, "format": "json"}) + assert res.status_code == 200, res.json() + new_result = res.json() + assert new_result == old_result + + # We should have a CSV content if formatted is False + res = client.get(f"/v1/studies/{internal_study_id}/raw", params={"path": path, "formatted": False}) + assert res.status_code == 200, res.json() + actual_lines = res.text.splitlines() + if matrix_type == "input": + first_row = [float(x) for x in actual_lines[0].split("\t")] + assert first_row == expected_row + else: + assert actual_lines[0].split("\t") == ["DE", "area", "id", "daily"] + + # We should have arrow binary if format = "arrow" + res = client.get(f"/v1/studies/{internal_study_id}/raw", params={"path": path, "format": "arrow"}) + assert res.status_code == 200 + assert isinstance(res.content, bytes) + assert res.text.startswith("ARROW") + arrow_bytes = res.content + buffer = pa.BufferReader(arrow_bytes) + table = feather.read_table(buffer) + df = table.to_pandas() + if matrix_type == "input": + assert list(df.loc[0]) == expected_row + else: + assert df.columns[0] == "Index" # asserts the first columns corresponds to the index in such a case. + assert list(df.loc[0][:9]) == ["01/01"] + expected_row + + if matrix_type == "input": + # Try to replace a matrix with a one in arrow format + res = client.put( + f"/v1/studies/{internal_study_id}/raw", params={"path": path}, files={"file": arrow_bytes} + ) + assert res.status_code in {201, 204} diff --git a/tests/storage/business/test_raw_study_service.py b/tests/storage/business/test_raw_study_service.py index 2b5c4b2dfd..f04ea181dc 100644 --- a/tests/storage/business/test_raw_study_service.py +++ b/tests/storage/business/test_raw_study_service.py @@ -71,11 +71,11 @@ def test_get(tmp_path: str, project_path) -> None: ) metadata = RawStudy(id="study2.py", workspace=DEFAULT_WORKSPACE_NAME, path=str(path_study)) - output = study_service.get(metadata=metadata, url=sub_route, depth=2) + output = study_service.get(metadata=metadata, url=sub_route, depth=2, format="json") assert output == data - study.get.assert_called_once_with(["settings"], depth=2, formatted=True) + study.get.assert_called_once_with(["settings"], depth=2, format="json") @pytest.mark.unit_test diff --git a/tests/storage/business/test_url_resolver_service.py b/tests/storage/business/test_url_resolver_service.py index 40dabc53c4..3979fd9f71 100644 --- a/tests/storage/business/test_url_resolver_service.py +++ b/tests/storage/business/test_url_resolver_service.py @@ -30,7 +30,7 @@ def test_resolve_matrix(): resolver = UriResolverService(matrix_service=matrix_service) - assert MOCK_MATRIX_JSON == resolver.resolve("matrix://my-id") + assert MOCK_MATRIX_JSON == resolver.resolve("matrix://my-id", format="json") matrix_service.get.assert_called_once_with("my-id") assert f"1.000000\t2.000000{os.linesep}3.000000\t4.000000{os.linesep}" == resolver.resolve("matrix://my-id", False) diff --git a/tests/storage/business/test_variant_study_service.py b/tests/storage/business/test_variant_study_service.py index 7c6e00f99c..150e38ba45 100644 --- a/tests/storage/business/test_variant_study_service.py +++ b/tests/storage/business/test_variant_study_service.py @@ -97,11 +97,11 @@ def task_status(*args): study_service.task_service.await_task.assert_called() study_service.exists.return_value = True - output = study_service.get(metadata=metadata, url=sub_route, depth=2) + output = study_service.get(metadata=metadata, url=sub_route, depth=2, format="json") assert output == data - study.get.assert_called_once_with(["settings"], depth=2, formatted=True) + study.get.assert_called_once_with(["settings"], depth=2, format="json") @pytest.mark.unit_test diff --git a/tests/storage/business/test_xpansion_manager.py b/tests/storage/business/test_xpansion_manager.py index 100bddd286..bbc42ea558 100644 --- a/tests/storage/business/test_xpansion_manager.py +++ b/tests/storage/business/test_xpansion_manager.py @@ -129,11 +129,11 @@ def test_create_configuration(tmp_path: Path, version: int, expected_output: JSO xpansion_manager = make_xpansion_manager(empty_study) with pytest.raises(ChildNotFoundError): - empty_study.tree.get(["user", "expansion"], expanded=True, depth=9) + empty_study.tree.get(["user", "expansion"], depth=9, expanded=True) xpansion_manager.create_xpansion_configuration(study) - actual = empty_study.tree.get(["user", "expansion"], expanded=True, depth=9) + actual = empty_study.tree.get(["user", "expansion"], depth=9, expanded=True) assert actual == expected_output @@ -147,16 +147,16 @@ def test_delete_xpansion_configuration(tmp_path: Path) -> None: xpansion_manager = make_xpansion_manager(empty_study) with pytest.raises(ChildNotFoundError): - empty_study.tree.get(["user", "expansion"], expanded=True, depth=9) + empty_study.tree.get(["user", "expansion"], depth=9, expanded=True) xpansion_manager.create_xpansion_configuration(study) - assert empty_study.tree.get(["user", "expansion"], expanded=True, depth=9) + assert empty_study.tree.get(["user", "expansion"], depth=9, expanded=True) xpansion_manager.delete_xpansion_configuration(study) with pytest.raises(ChildNotFoundError): - empty_study.tree.get(["user", "expansion"], expanded=True, depth=9) + empty_study.tree.get(["user", "expansion"], depth=9, expanded=True) @pytest.mark.unit_test @@ -471,20 +471,20 @@ def test_add_resources(tmp_path: Path) -> None: [UploadFile(filename=filename3, file=io.StringIO(content3))], ) - assert filename1 in empty_study.tree.get(["user", "expansion", "constraints"]) - expected1 = empty_study.tree.get(["user", "expansion", "constraints", filename1]) + assert filename1 in empty_study.tree.get(url=["user", "expansion", "constraints"], format="json") + expected1 = empty_study.tree.get(url=["user", "expansion", "constraints", filename1], format="json") assert content1.encode() == t.cast(bytes, expected1) - assert filename2 in empty_study.tree.get(["user", "expansion", "constraints"]) - expected2 = empty_study.tree.get(["user", "expansion", "constraints", filename2]) + assert filename2 in empty_study.tree.get(url=["user", "expansion", "constraints"], format="json") + expected2 = empty_study.tree.get(url=["user", "expansion", "constraints", filename2], format="json") assert content2.encode() == t.cast(bytes, expected2) - assert filename3 in empty_study.tree.get(["user", "expansion", "weights"]) + assert filename3 in empty_study.tree.get(url=["user", "expansion", "weights"], format="json") assert { "columns": [0], "data": [[2.0]], "index": [0], - } == empty_study.tree.get(["user", "expansion", "weights", filename3]) + } == empty_study.tree.get(url=["user", "expansion", "weights", filename3], format="json") settings = xpansion_manager.get_xpansion_settings(study) settings.yearly_weights = filename3 @@ -573,19 +573,19 @@ def test_add_capa(tmp_path: Path) -> None: xpansion_manager.add_resource(study, XpansionResourceFileType.CAPACITIES, upload_file_list) - assert filename1 in empty_study.tree.get(["user", "expansion", "capa"]) + assert filename1 in empty_study.tree.get(url=["user", "expansion", "capa"], format="json") assert { "columns": [0], "data": [[0.0]], "index": [0], - } == empty_study.tree.get(["user", "expansion", "capa", filename1]) + } == empty_study.tree.get(url=["user", "expansion", "capa", filename1], format="json") - assert filename2 in empty_study.tree.get(["user", "expansion", "capa"]) + assert filename2 in empty_study.tree.get(url=["user", "expansion", "capa"], format="json") assert { "columns": [0], "data": [[1.0]], "index": [0], - } == empty_study.tree.get(["user", "expansion", "capa", filename2]) + } == empty_study.tree.get(url=["user", "expansion", "capa", filename2], format="json") @pytest.mark.unit_test diff --git a/tests/storage/integration/test_STA_mini.py b/tests/storage/integration/test_STA_mini.py index f165c3c2dd..21fdf22ce9 100644 --- a/tests/storage/integration/test_STA_mini.py +++ b/tests/storage/integration/test_STA_mini.py @@ -55,12 +55,12 @@ def assert_with_errors( storage_service: StudyService, url: str, expected_output: Union[str, dict], - formatted: bool = True, + format: str = "json", ) -> None: url = url[len("/v1/studies/") :] uuid, url = url.split("/raw?path=") params = RequestParameters(user=ADMIN) - output = storage_service.get(uuid=uuid, url=url, depth=3, formatted=formatted, params=params) + output = storage_service.get(uuid=uuid, url=url, depth=3, format=format, params=params) assert_study( output, expected_output, @@ -499,8 +499,8 @@ def test_sta_mini_copy(storage_service) -> None: uuid = result.json() parameters = RequestParameters(user=ADMIN) - data_source = storage_service.get(source_study_name, "/", -1, True, parameters) - data_destination = storage_service.get(uuid, "/", -1, True, parameters) + data_source = storage_service.get(source_study_name, "/", -1, parameters, format="json") + data_destination = storage_service.get(uuid, "/", -1, parameters, format="json") link_url_source = data_source["input"]["links"]["de"]["fr"] assert "matrixfile://fr.txt" == link_url_source diff --git a/tests/storage/integration/test_write_STA_mini.py b/tests/storage/integration/test_write_STA_mini.py index 069d138e6c..685ec12bc4 100644 --- a/tests/storage/integration/test_write_STA_mini.py +++ b/tests/storage/integration/test_write_STA_mini.py @@ -29,7 +29,7 @@ def assert_with_errors( res = storage_service.edit_study(uuid=uuid, url=url, new=new, params=params) assert res == new - res = storage_service.get(uuid=uuid, url=url, depth=-1, formatted=True, params=params) + res = storage_service.get(uuid=uuid, url=url, depth=-1, format="json", params=params) if expected is not None: assert res == expected else: diff --git a/tests/storage/repository/filesystem/matrix/test_input_series_matrix.py b/tests/storage/repository/filesystem/matrix/test_input_series_matrix.py index b6ac49fce1..25d0c6b932 100644 --- a/tests/storage/repository/filesystem/matrix/test_input_series_matrix.py +++ b/tests/storage/repository/filesystem/matrix/test_input_series_matrix.py @@ -37,7 +37,7 @@ def test_load(self, my_study_config: FileStudyTreeConfig) -> None: file.write_text(content) node = InputSeriesMatrix(context=Mock(), config=my_study_config, nb_columns=8) - actual = node.load() + actual = node.load(format="json") expected = { "columns": [0, 1, 2, 3, 4, 5, 6, 7], "data": [ @@ -51,7 +51,7 @@ def test_load(self, my_study_config: FileStudyTreeConfig) -> None: def test_load__file_not_found(self, my_study_config: FileStudyTreeConfig) -> None: node = InputSeriesMatrix(context=Mock(), config=my_study_config) with pytest.raises(ChildNotFoundError) as ctx: - node.load() + node.load(format="json") err_msg = str(ctx.value) assert "input.txt" in err_msg assert my_study_config.study_id in err_msg @@ -67,9 +67,9 @@ def test_load__link_to_matrix(self, my_study_config: FileStudyTreeConfig) -> Non } link.write_text(matrix_uri) - def resolve(uri: str, formatted: bool = True) -> t.Dict[str, t.Any]: + def resolve(uri: str, format: t.Optional[str] = None) -> t.Dict[str, t.Any]: assert uri == matrix_uri - assert formatted is True + assert format == "json" return matrix_obj context = ContextServer( @@ -78,7 +78,7 @@ def resolve(uri: str, formatted: bool = True) -> t.Dict[str, t.Any]: ) node = InputSeriesMatrix(context=context, config=my_study_config) - actual = node.load() + actual = node.load(format="json") assert actual == matrix_obj def test_save(self, my_study_config: FileStudyTreeConfig) -> None: diff --git a/tests/storage/repository/filesystem/matrix/test_matrix_node.py b/tests/storage/repository/filesystem/matrix/test_matrix_node.py index 6858a5ff0f..e0434bf72c 100644 --- a/tests/storage/repository/filesystem/matrix/test_matrix_node.py +++ b/tests/storage/repository/filesystem/matrix/test_matrix_node.py @@ -32,6 +32,7 @@ def parse( file_path: Optional[Path] = None, tmp_dir: Optional[TemporaryDirectory] = None, return_dataframe: bool = False, + format: str = "json", ) -> JSON: return MOCK_MATRIX_JSON diff --git a/tests/storage/repository/filesystem/matrix/test_output_series_matrix.py b/tests/storage/repository/filesystem/matrix/test_output_series_matrix.py index e6eb256c51..c12fca73b3 100644 --- a/tests/storage/repository/filesystem/matrix/test_output_series_matrix.py +++ b/tests/storage/repository/filesystem/matrix/test_output_series_matrix.py @@ -66,7 +66,7 @@ def test_load(self, my_study_config: FileStudyTreeConfig) -> None: date_serializer=serializer, head_writer=AreaHeadWriter(area="", data_type="", freq=""), ) - assert node.load() == matrix.to_dict(orient="split") + assert node.load(format="json") == matrix.to_dict(orient="split") def test_load__file_not_found(self, my_study_config: FileStudyTreeConfig) -> None: node = OutputSeriesMatrix( @@ -77,7 +77,7 @@ def test_load__file_not_found(self, my_study_config: FileStudyTreeConfig) -> Non head_writer=AreaHeadWriter(area="", data_type="", freq=""), ) with pytest.raises(ChildNotFoundError) as ctx: - node.load() + node.load(format="json") err_msg = str(ctx.value) assert "'matrix-daily.txt" in err_msg assert my_study_config.study_id in err_msg diff --git a/tests/storage/repository/filesystem/test_lazy_node.py b/tests/storage/repository/filesystem/test_lazy_node.py index a2c72415f5..8f11874a25 100644 --- a/tests/storage/repository/filesystem/test_lazy_node.py +++ b/tests/storage/repository/filesystem/test_lazy_node.py @@ -22,13 +22,7 @@ def __init__(self, context: ContextServer, config: FileStudyTreeConfig) -> None: context=context, ) - def load( - self, - url: Optional[List[str]] = None, - depth: int = -1, - expanded: bool = False, - formatted: bool = False, - ) -> str: + def load(self, url: Optional[List[str]] = None, depth: int = -1, expanded: bool = False, format: str = "") -> str: return "Mock Matrix Content" def dump(self, data: str, url: Optional[List[str]] = None) -> None: @@ -69,7 +63,7 @@ def test_get_no_expanded_link(tmp_path: Path): config=config, ) assert "Mock Matrix Content" == node.get(expanded=False) - resolver.resolve.assert_called_once_with(uri, True) + resolver.resolve.assert_called_once_with(uri, None) def test_get_expanded_txt(tmp_path: Path): @@ -118,7 +112,7 @@ def test_save_uri(tmp_path: Path): node.save(uri) assert (file.parent / f"{file.name}.link").read_text() == uri assert not file.exists() - resolver.resolve.assert_called_once_with(uri) + resolver.resolve.assert_called_once_with(uri, format="json") def test_save_txt(tmp_path: Path): @@ -139,7 +133,7 @@ def test_save_txt(tmp_path: Path): node.save(content) assert file.read_text() == content assert not link.exists() - resolver.resolve.assert_called_once_with(content) + resolver.resolve.assert_called_once_with(content, format="json") @pytest.mark.parametrize("target_is_link", [True, False]) @@ -186,7 +180,7 @@ def test_rename_file(tmp_path: Path, target_is_link: bool): assert file.read_text() == content assert not link.exists() assert not renaming_file.exists() - resolver.resolve.assert_called_once_with(content) + resolver.resolve.assert_called_once_with(content, format="json") node.rename_file(target) @@ -241,7 +235,7 @@ def test_copy_file(tmp_path: Path, target_is_link: bool): assert file.read_text() == content assert not link.exists() assert not copied_file.exists() - resolver.resolve.assert_called_once_with(content) + resolver.resolve.assert_called_once_with(content, format="json") node.copy_file(target) diff --git a/tests/storage/repository/filesystem/utils.py b/tests/storage/repository/filesystem/utils.py index cb563e8567..5fdd991195 100644 --- a/tests/storage/repository/filesystem/utils.py +++ b/tests/storage/repository/filesystem/utils.py @@ -29,11 +29,7 @@ def get_node( return self def get( - self, - url: Optional[List[str]] = None, - depth: int = -1, - expanded: bool = True, - formatted: bool = True, + self, url: Optional[List[str]] = None, depth: int = -1, expanded: bool = False, format: str = "json" ) -> int: return self.value diff --git a/tests/storage/web/test_studies_bp.py b/tests/storage/web/test_studies_bp.py index 2ee0a9c7e2..6fc668a830 100644 --- a/tests/storage/web/test_studies_bp.py +++ b/tests/storage/web/test_studies_bp.py @@ -73,9 +73,7 @@ def test_server() -> None: client = TestClient(app) client.get("/v1/studies/study1/raw?path=settings/general/params") - mock_service.get.assert_called_once_with( - "study1", "settings/general/params", depth=3, formatted=True, params=PARAMS - ) + mock_service.get.assert_called_once_with("study1", "settings/general/params", depth=3, format="json", params=PARAMS) @pytest.mark.unit_test @@ -124,7 +122,7 @@ def test_server_with_parameters() -> None: parameters = RequestParameters(user=ADMIN) assert result.status_code == HTTPStatus.OK - mock_storage_service.get.assert_called_once_with("study1", "/", depth=4, formatted=True, params=parameters) + mock_storage_service.get.assert_called_once_with("study1", "/", depth=4, format="json", params=parameters) result = client.get("/v1/studies/study2/raw?depth=WRONG_TYPE") assert result.status_code == HTTPStatus.UNPROCESSABLE_ENTITY @@ -133,7 +131,7 @@ def test_server_with_parameters() -> None: assert result.status_code == HTTPStatus.OK excepted_parameters = RequestParameters(user=ADMIN) - mock_storage_service.get.assert_called_with("study2", "/", depth=3, formatted=True, params=excepted_parameters) + mock_storage_service.get.assert_called_with("study2", "/", depth=3, format="json", params=excepted_parameters) @pytest.mark.unit_test