Skip to content

Commit

Permalink
feat(aggregation-api): optimize code
Browse files Browse the repository at this point in the history
  • Loading branch information
mabw-rte committed Aug 20, 2024
1 parent 36d1110 commit cd6f626
Show file tree
Hide file tree
Showing 3 changed files with 29 additions and 76 deletions.
46 changes: 16 additions & 30 deletions antarest/study/business/aggregator_management.py
Original file line number Diff line number Diff line change
Expand Up @@ -360,47 +360,33 @@ def aggregate_output_data(self) -> pd.DataFrame:
Aggregates the output data of a study and returns it as a DataFrame
"""

# check that the `mc_years` is Sequence[int]
assert self.mc_years is not None, "mc_years should be a `Sequence` of integers"
if self.mc_root == MCRoot.MC_IND:
# check that the `mc_years` is Sequence[int]
assert self.mc_years is not None, "mc_years should be a `Sequence` of integers"

# Checks if mc-ind results exist
if not self.mc_ind_path.exists():
raise OutputNotFound(self.output_id)
# Checks if mc-ind results exist
if not self.mc_ind_path.exists():
raise OutputNotFound(self.output_id)

# Retrieves the horizon from the study output
horizon_path = self.study_path / HORIZON_TEMPLATE.format(sim_id=self.output_id)
launching_config = IniReader().read(horizon_path)
horizon = launching_config.get("general", {}).get("horizon", 2018)

# filters files to consider
all_output_files = sorted(self._gather_all_files_to_consider__ind())

logger.info(
f"Parsing {len(all_output_files)} {self.frequency.value} files"
f"to build the aggregated output for study `{self.study_path.name}`"
)
# builds final dataframe
final_df = self._build_dataframe(all_output_files, horizon)
# filters files to consider
all_output_files = sorted(self._gather_all_files_to_consider__ind())

return final_df
elif self.mc_root == MCRoot.MC_ALL:
# Checks if mc-all results exist
if not self.mc_all_path.exists():
raise OutputNotFound(self.output_id)

def aggregate_output_data__all(self) -> pd.DataFrame:
"""
Aggregates the output data of a study and returns it as a DataFrame
"""
# filters files to consider
all_output_files = sorted(self._gather_all_files_to_consider__all())

# Checks if mc-all results exist
if not self.mc_all_path.exists():
raise OutputNotFound(self.output_id)
else:
raise NotImplementedError(f"Unknown Monte Carlo root: {self.mc_root}")

# Retrieves the horizon from the study output
horizon_path = self.study_path / HORIZON_TEMPLATE.format(sim_id=self.output_id)
launching_config = IniReader().read(horizon_path)
horizon = launching_config.get("general", {}).get("horizon", 2018)

# filters files to consider
all_output_files = sorted(self._gather_all_files_to_consider__all())

logger.info(
f"Parsing {len(all_output_files)} {self.frequency.value} files"
f"to build the aggregated output for study `{self.study_path.name}`"
Expand Down
51 changes: 9 additions & 42 deletions antarest/study/service.py
Original file line number Diff line number Diff line change
Expand Up @@ -331,69 +331,36 @@ def aggregate_output_data(
self,
uuid: str,
output_id: str,
query_file: t.Union[MCIndAreasQueryFile, MCIndLinksQueryFile],
query_file: t.Union[MCIndAreasQueryFile, MCAllAreasQueryFile, MCIndLinksQueryFile, MCAllLinksQueryFile],
frequency: MatrixFrequency,
mc_years: t.Sequence[int],
columns_names: t.Sequence[str],
ids_to_consider: t.Sequence[str],
params: RequestParameters,
mc_years: t.Optional[t.Sequence[int]] = None,
) -> pd.DataFrame:
"""
Aggregates output data (in `economy/mc-ind`) based on several filtering conditions
Args:
uuid: study uuid
output_id: simulation output ID
query_file: which types of data to retrieve ("values", "details", "details-st-storage", "details-res")
frequency: yearly, monthly, weekly, daily or hourly.
mc_years: list of monte-carlo years, if empty, all years are selected
columns_names: regexes (if details) or columns to be selected, if empty, all columns are selected
ids_to_consider: list of areas or links ids to consider, if empty, all areas are selected
params: request parameters
Returns: the aggregated data as a DataFrame
"""
study = self.get_study(uuid)
assert_permission(params.user, study, StudyPermissionType.READ)
study_path = self.storage_service.raw_study_service.get_study_path(study)
# fmt: off
aggregator_manager = AggregatorManager(study_path, output_id, query_file, frequency, ids_to_consider,
columns_names, mc_years)
# fmt: on
return aggregator_manager.aggregate_output_data()
Aggregates output data based on several filtering conditions
def aggregate_output_data__all(
self,
uuid: str,
output_id: str,
query_file: t.Union[MCAllAreasQueryFile, MCAllLinksQueryFile],
frequency: MatrixFrequency,
columns_names: t.Sequence[str],
ids_to_consider: t.Sequence[str],
params: RequestParameters,
) -> pd.DataFrame:
"""
Aggregates output data (in `economy/mc-all`) based on several filtering conditions
Args:
uuid: study uuid
output_id: simulation output ID
query_file: which types of data to retrieve ("values", "details", "details-st-storage", "details-res")
query_file: which types of data to retrieve: "values", "details", "details-st-storage", "details-res", "ids"
frequency: yearly, monthly, weekly, daily or hourly.
columns_names: regexes (if details) or columns to be selected, if empty, all columns are selected
ids_to_consider: list of areas or links ids to consider, if empty, all areas are selected
params: request parameters
mc_years: list of monte-carlo years, if empty, all years are selected (only for mc-ind)
Returns: the aggregated data as a DataFrame
"""
study = self.get_study(uuid)
assert_permission(params.user, study, StudyPermissionType.READ)
study_path = self.storage_service.raw_study_service.get_study_path(study)
# fmt: off
aggregator_manager = AggregatorManager(study_path, output_id, query_file, frequency, ids_to_consider,
columns_names)
# fmt: on
return aggregator_manager.aggregate_output_data__all()
aggregator_manager = AggregatorManager(
study_path, output_id, query_file, frequency, ids_to_consider, columns_names, mc_years
)
return aggregator_manager.aggregate_output_data()

def get_logs(
self,
Expand Down
8 changes: 4 additions & 4 deletions antarest/study/web/raw_studies_blueprint.py
Original file line number Diff line number Diff line change
Expand Up @@ -229,10 +229,10 @@ def aggregate_areas_raw_data(
output_id=output_id,
query_file=query_file,
frequency=frequency,
mc_years=[int(mc_year) for mc_year in _split_comma_separated_values(mc_years)],
columns_names=_split_comma_separated_values(columns_names),
ids_to_consider=_split_comma_separated_values(areas_ids),
params=parameters,
mc_years=[int(mc_year) for mc_year in _split_comma_separated_values(mc_years)],
)

download_name = f"aggregated_output_{uuid}_{output_id}{export_format.suffix}"
Expand Down Expand Up @@ -298,10 +298,10 @@ def aggregate_links_raw_data(
output_id=output_id,
query_file=query_file,
frequency=frequency,
mc_years=[int(mc_year) for mc_year in _split_comma_separated_values(mc_years)],
columns_names=_split_comma_separated_values(columns_names),
ids_to_consider=_split_comma_separated_values(links_ids),
params=parameters,
mc_years=[int(mc_year) for mc_year in _split_comma_separated_values(mc_years)],
)

download_name = f"aggregated_output_{uuid}_{output_id}{export_format.suffix}"
Expand Down Expand Up @@ -361,7 +361,7 @@ def aggregate_areas_raw_data__all(
output_id = sanitize_string(output_id)

parameters = RequestParameters(user=current_user)
df_matrix = study_service.aggregate_output_data__all(
df_matrix = study_service.aggregate_output_data(
uuid,
output_id=output_id,
query_file=query_file,
Expand Down Expand Up @@ -427,7 +427,7 @@ def aggregate_links_raw_data__all(
output_id = sanitize_string(output_id)

parameters = RequestParameters(user=current_user)
df_matrix = study_service.aggregate_output_data__all(
df_matrix = study_service.aggregate_output_data(
uuid,
output_id=output_id,
query_file=query_file,
Expand Down

0 comments on commit cd6f626

Please sign in to comment.