Skip to content

Commit

Permalink
Merge branch 'dev' into feature/add-installer-submodule
Browse files Browse the repository at this point in the history
  • Loading branch information
maugde authored Aug 27, 2024
2 parents 4e2f8f9 + aa2216a commit 8ab67b7
Show file tree
Hide file tree
Showing 30 changed files with 40,594 additions and 1,159 deletions.
19 changes: 19 additions & 0 deletions antarest/core/exceptions.py
Original file line number Diff line number Diff line change
Expand Up @@ -392,6 +392,20 @@ def __str__(self) -> str:
return self.detail


class OutputSubFolderNotFound(HTTPException):
"""
Exception raised when an output sub folders do not exist
"""

def __init__(self, output_id: str, mc_root: str) -> None:
message = f"The output '{output_id}' sub-folder '{mc_root}' does not exist"
super().__init__(HTTPStatus.NOT_FOUND, message)

def __str__(self) -> str:
"""Return a string representation of the exception."""
return self.detail


class BadZipBinary(HTTPException):
def __init__(self, message: str) -> None:
super().__init__(HTTPStatus.UNSUPPORTED_MEDIA_TYPE, message)
Expand Down Expand Up @@ -446,6 +460,11 @@ def __init__(self, message: str) -> None:
super().__init__(HTTPStatus.UNPROCESSABLE_ENTITY, message)


class MCRootNotHandled(HTTPException):
def __init__(self, message: str) -> None:
super().__init__(HTTPStatus.UNPROCESSABLE_ENTITY, message)


class MatrixWidthMismatchError(HTTPException):
def __init__(self, message: str) -> None:
super().__init__(HTTPStatus.UNPROCESSABLE_ENTITY, message)
Expand Down
347 changes: 275 additions & 72 deletions antarest/study/business/aggregator_management.py

Large diffs are not rendered by default.

29 changes: 14 additions & 15 deletions antarest/study/service.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,13 @@
from antarest.matrixstore.matrix_editor import MatrixEditInstruction
from antarest.study.business.adequacy_patch_management import AdequacyPatchManager
from antarest.study.business.advanced_parameters_management import AdvancedParamsManager
from antarest.study.business.aggregator_management import AggregatorManager, AreasQueryFile, LinksQueryFile
from antarest.study.business.aggregator_management import (
AggregatorManager,
MCAllAreasQueryFile,
MCAllLinksQueryFile,
MCIndAreasQueryFile,
MCIndLinksQueryFile,
)
from antarest.study.business.allocation_management import AllocationManager
from antarest.study.business.area_management import AreaCreationDTO, AreaInfoDTO, AreaManager, AreaType, UpdateAreaUi
from antarest.study.business.areas.hydro_management import HydroManager
Expand Down Expand Up @@ -372,42 +378,35 @@ def aggregate_output_data(
self,
uuid: str,
output_id: str,
query_file: t.Union[AreasQueryFile, LinksQueryFile],
query_file: t.Union[MCIndAreasQueryFile, MCAllAreasQueryFile, MCIndLinksQueryFile, MCAllLinksQueryFile],
frequency: MatrixFrequency,
mc_years: t.Sequence[int],
columns_names: t.Sequence[str],
ids_to_consider: t.Sequence[str],
params: RequestParameters,
mc_years: t.Optional[t.Sequence[int]] = None,
) -> pd.DataFrame:
"""
Aggregates output data based on several filtering conditions
Args:
uuid: study uuid
output_id: simulation output ID
query_file: which types of data to retrieve ("values", "details", "details-st-storage", "details-res")
query_file: which types of data to retrieve: "values", "details", "details-st-storage", "details-res", "ids"
frequency: yearly, monthly, weekly, daily or hourly.
mc_years: list of monte-carlo years, if empty, all years are selected
columns_names: columns to be selected, if empty, all columns are selected
columns_names: regexes (if details) or columns to be selected, if empty, all columns are selected
ids_to_consider: list of areas or links ids to consider, if empty, all areas are selected
params: request parameters
mc_years: list of monte-carlo years, if empty, all years are selected (only for mc-ind)
Returns: the aggregated data as a DataFrame
"""
study = self.get_study(uuid)
assert_permission(params.user, study, StudyPermissionType.READ)
study_path = self.storage_service.raw_study_service.get_study_path(study)
# fmt: off
aggregator_manager = AggregatorManager(
study_path,
output_id,
query_file,
frequency,
mc_years,
columns_names,
ids_to_consider
study_path, output_id, query_file, frequency, ids_to_consider, columns_names, mc_years
)
# fmt: on
return aggregator_manager.aggregate_output_data()

def get_logs(
Expand Down
163 changes: 153 additions & 10 deletions antarest/study/web/raw_studies_blueprint.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,12 @@
from antarest.core.utils.utils import sanitize_string, sanitize_uuid
from antarest.core.utils.web import APITag
from antarest.login.auth import Auth
from antarest.study.business.aggregator_management import AreasQueryFile, LinksQueryFile
from antarest.study.business.aggregator_management import (
MCAllAreasQueryFile,
MCAllLinksQueryFile,
MCIndAreasQueryFile,
MCIndLinksQueryFile,
)
from antarest.study.service import StudyService
from antarest.study.storage.df_download import TableExportFormat, export_file
from antarest.study.storage.rawstudy.model.filesystem.matrix.matrix import MatrixFrequency
Expand Down Expand Up @@ -175,14 +180,14 @@ def get_study(
return Response(content=json_response, media_type="application/json")

@bp.get(
"/studies/{uuid}/areas/aggregate/{output_id}",
"/studies/{uuid}/areas/aggregate/mc-ind/{output_id}",
tags=[APITag.study_raw_data],
summary="Retrieve Aggregated Areas Raw Data from Study Output",
summary="Retrieve Aggregated Areas Raw Data from Study Economy MCs individual Outputs",
)
def aggregate_areas_raw_data(
uuid: str,
output_id: str,
query_file: AreasQueryFile,
query_file: MCIndAreasQueryFile,
frequency: MatrixFrequency,
mc_years: str = "",
areas_ids: str = "",
Expand All @@ -195,13 +200,14 @@ def aggregate_areas_raw_data(
Create an aggregation of areas raw data
Parameters:
- `uuid`: study ID
- `output_id`: the output ID aka the simulation ID
- `query_file`: "values", "details", "details-STstorage", "details-res"
- `frequency`: "hourly", "daily", "weekly", "monthly", "annual"
- `mc_years`: which Monte Carlo years to be selected. If empty, all are selected (comma separated)
- `areas_ids`: which areas to be selected. If empty, all are selected (comma separated)
- `columns_names`: which columns to be selected. If empty, all are selected (comma separated)
- `columns_names`: names or regexes (if `query_file` is of type `details`) to select columns (comma separated)
- `export_format`: Returned file format (csv by default).
Returns:
Expand All @@ -223,10 +229,10 @@ def aggregate_areas_raw_data(
output_id=output_id,
query_file=query_file,
frequency=frequency,
mc_years=[int(mc_year) for mc_year in _split_comma_separated_values(mc_years)],
columns_names=_split_comma_separated_values(columns_names),
ids_to_consider=_split_comma_separated_values(areas_ids),
params=parameters,
mc_years=[int(mc_year) for mc_year in _split_comma_separated_values(mc_years)],
)

download_name = f"aggregated_output_{uuid}_{output_id}{export_format.suffix}"
Expand All @@ -244,14 +250,14 @@ def aggregate_areas_raw_data(
)

@bp.get(
"/studies/{uuid}/links/aggregate/{output_id}",
"/studies/{uuid}/links/aggregate/mc-ind/{output_id}",
tags=[APITag.study_raw_data],
summary="Retrieve Aggregated Links Raw Data from Study Output",
summary="Retrieve Aggregated Links Raw Data from Study Economy MCs individual Outputs",
)
def aggregate_links_raw_data(
uuid: str,
output_id: str,
query_file: LinksQueryFile,
query_file: MCIndLinksQueryFile,
frequency: MatrixFrequency,
mc_years: str = "",
links_ids: str = "",
Expand All @@ -263,13 +269,14 @@ def aggregate_links_raw_data(
Create an aggregation of links raw data
Parameters:
- `uuid`: study ID
- `output_id`: the output ID aka the simulation ID
- `query_file`: "values" (currently the only available option)
- `frequency`: "hourly", "daily", "weekly", "monthly", "annual"
- `mc_years`: which Monte Carlo years to be selected. If empty, all are selected (comma separated)
- `links_ids`: which links to be selected (ex: "be - fr"). If empty, all are selected (comma separated)
- `columns_names`: which columns to be selected. If empty, all are selected (comma separated)
- `columns_names`: names or regexes (if `query_file` is of type `details`) to select columns (comma separated)
- `export_format`: Returned file format (csv by default).
Returns:
Expand All @@ -291,7 +298,140 @@ def aggregate_links_raw_data(
output_id=output_id,
query_file=query_file,
frequency=frequency,
columns_names=_split_comma_separated_values(columns_names),
ids_to_consider=_split_comma_separated_values(links_ids),
params=parameters,
mc_years=[int(mc_year) for mc_year in _split_comma_separated_values(mc_years)],
)

download_name = f"aggregated_output_{uuid}_{output_id}{export_format.suffix}"
download_log = f"Exporting aggregated output data for study '{uuid}' as {export_format} file"

return export_file(
df_matrix,
study_service.file_transfer_manager,
export_format,
True,
True,
download_name,
download_log,
current_user,
)

@bp.get(
"/studies/{uuid}/areas/aggregate/mc-all/{output_id}",
tags=[APITag.study_raw_data],
summary="Retrieve Aggregated Areas Raw Data from Study Economy MCs All Outputs",
)
def aggregate_areas_raw_data__all(
uuid: str,
output_id: str,
query_file: MCAllAreasQueryFile,
frequency: MatrixFrequency,
areas_ids: str = "",
columns_names: str = "",
export_format: TableExportFormat = DEFAULT_EXPORT_FORMAT, # type: ignore
current_user: JWTUser = Depends(auth.get_current_user),
) -> FileResponse:
# noinspection SpellCheckingInspection
"""
Create an aggregation of areas raw data in mc-all
Parameters:
- `uuid`: study ID
- `output_id`: the output ID aka the simulation ID
- `query_file`: "values", "details", "details-STstorage", "details-res", "id"
- `frequency`: "hourly", "daily", "weekly", "monthly", "annual"
- `areas_ids`: which areas to be selected. If empty, all are selected (comma separated)
- `columns_names`: names or regexes (if `query_file` is of type `details`) to select columns (comma separated)
- `export_format`: Returned file format (csv by default).
Returns:
FileResponse that corresponds to a dataframe with the aggregated areas raw data
"""
logger.info(
f"Aggregating areas output data for study {uuid}, output {output_id},"
f"from files '{query_file}-{frequency}.txt'",
extra={"user": current_user.id},
)

# Avoid vulnerabilities by sanitizing the `uuid` and `output_id` parameters
uuid = sanitize_uuid(uuid)
output_id = sanitize_string(output_id)

parameters = RequestParameters(user=current_user)
df_matrix = study_service.aggregate_output_data(
uuid,
output_id=output_id,
query_file=query_file,
frequency=frequency,
columns_names=_split_comma_separated_values(columns_names),
ids_to_consider=_split_comma_separated_values(areas_ids),
params=parameters,
)

download_name = f"aggregated_output_{uuid}_{output_id}{export_format.suffix}"
download_log = f"Exporting aggregated output data for study '{uuid}' as {export_format} file"

return export_file(
df_matrix,
study_service.file_transfer_manager,
export_format,
True,
True,
download_name,
download_log,
current_user,
)

@bp.get(
"/studies/{uuid}/links/aggregate/mc-all/{output_id}",
tags=[APITag.study_raw_data],
summary="Retrieve Aggregated Links Raw Data from Study Economy MC-All Outputs",
)
def aggregate_links_raw_data__all(
uuid: str,
output_id: str,
query_file: MCAllLinksQueryFile,
frequency: MatrixFrequency,
links_ids: str = "",
columns_names: str = "",
export_format: TableExportFormat = DEFAULT_EXPORT_FORMAT, # type: ignore
current_user: JWTUser = Depends(auth.get_current_user),
) -> FileResponse:
"""
Create an aggregation of links in mc-all
Parameters:
- `uuid`: study ID
- `output_id`: the output ID aka the simulation ID
- `query_file`: "values", "id"
- `frequency`: "hourly", "daily", "weekly", "monthly", "annual"
- `links_ids`: which links to be selected (ex: "be - fr"). If empty, all are selected (comma separated)
- `columns_names`: names or regexes (if `query_file` is of type `details`) to select columns (comma separated)
- `export_format`: Returned file format (csv by default).
Returns:
FileResponse that corresponds to a dataframe with the aggregated links raw data
"""
logger.info(
f"Aggregating links mc-all data for study {uuid}, output {output_id},"
f"from files '{query_file}-{frequency}.txt'",
extra={"user": current_user.id},
)

# Avoid vulnerabilities by sanitizing the `uuid` and `output_id` parameters
uuid = sanitize_uuid(uuid)
output_id = sanitize_string(output_id)

parameters = RequestParameters(user=current_user)
df_matrix = study_service.aggregate_output_data(
uuid,
output_id=output_id,
query_file=query_file,
frequency=frequency,
columns_names=_split_comma_separated_values(columns_names),
ids_to_consider=_split_comma_separated_values(links_ids),
params=parameters,
Expand Down Expand Up @@ -329,6 +469,7 @@ def edit_study(
> NOTE: use the PUT endpoint to upload a file.
Parameters:
- `uuid`: The UUID of the study.
- `path`: The path to the data to update. Defaults to "/".
- `data`: The formatted data to be posted. Defaults to an empty string.
Expand Down Expand Up @@ -362,6 +503,7 @@ def replace_study_file(
Update raw data for a study by posting a raw file.
Parameters:
- `uuid`: The UUID of the study.
- `path`: The path to the data to update. Defaults to "/".
- `file`: The raw file to be posted (e.g. a CSV file opened in binary mode).
Expand Down Expand Up @@ -425,6 +567,7 @@ def get_matrix(
Download a matrix in a given format.
Parameters:
- `uuid`: study ID
- `matrix_path`: Relative path of the matrix to download.
- `export_format`: Returned file format (csv by default).
Expand Down
Binary file modified tests/integration/assets/STA-mini.7z
Binary file not shown.
Binary file modified tests/integration/assets/STA-mini.zip
Binary file not shown.
Original file line number Diff line number Diff line change
Expand Up @@ -256,8 +256,8 @@ def test_lifecycle(
{
"path": str(ext_workspace_path / "STA-mini"),
"file_type": "directory",
"file_count": IntegerRange(900, 1000), # 918
"size_bytes": IntegerRange(7_000_000, 9_000_000), # nt: 7_741_619, posix: 8_597_683
"file_count": IntegerRange(1000, 1100), # 1043
"size_bytes": IntegerRange(9_000_000, 11_000_000), # 10_428_620
"created": AnyIsoDateTime(),
"accessed": AnyIsoDateTime(),
"modified": AnyIsoDateTime(),
Expand Down Expand Up @@ -415,7 +415,7 @@ def test_size_of_studies(
sizes.append(actual[0]["size_bytes"])

# Check the sizes
# The size of the new study should be between 140 and 300 KB.
# The suze of 'STA-mini' should be between 7 and 9 MB.
# The size of the new study should be between 140 and 350 KB.
# The suze of 'STA-mini' should be between 9 and 11 MB.
sizes.sort()
assert sizes == [IntegerRange(140_000, 300_000), IntegerRange(7_000_000, 9_000_000)]
assert sizes == [IntegerRange(140_000, 350_000), IntegerRange(9_000_000, 11_000_000)]
Loading

0 comments on commit 8ab67b7

Please sign in to comment.