feat(aggregation-api): optimize code

AntaresSimulatorTeam · Aug 20, 2024 · cd6f626 · cd6f626
1 parent 36d1110
commit cd6f626
Show file tree

Hide file tree

Showing 3 changed files with 29 additions and 76 deletions.
diff --git a/antarest/study/business/aggregator_management.py b/antarest/study/business/aggregator_management.py
@@ -360,47 +360,33 @@ def aggregate_output_data(self) -> pd.DataFrame:
         Aggregates the output data of a study and returns it as a DataFrame
         """
 
-        # check that the `mc_years` is Sequence[int]
-        assert self.mc_years is not None, "mc_years should be a `Sequence` of integers"
+        if self.mc_root == MCRoot.MC_IND:
+            # check that the `mc_years` is Sequence[int]
+            assert self.mc_years is not None, "mc_years should be a `Sequence` of integers"
 
-        # Checks if mc-ind results exist
-        if not self.mc_ind_path.exists():
-            raise OutputNotFound(self.output_id)
+            # Checks if mc-ind results exist
+            if not self.mc_ind_path.exists():
+                raise OutputNotFound(self.output_id)
 
-        # Retrieves the horizon from the study output
-        horizon_path = self.study_path / HORIZON_TEMPLATE.format(sim_id=self.output_id)
-        launching_config = IniReader().read(horizon_path)
-        horizon = launching_config.get("general", {}).get("horizon", 2018)
-
-        # filters files to consider
-        all_output_files = sorted(self._gather_all_files_to_consider__ind())
-
-        logger.info(
-            f"Parsing {len(all_output_files)} {self.frequency.value} files"
-            f"to build the aggregated output for study `{self.study_path.name}`"
-        )
-        # builds final dataframe
-        final_df = self._build_dataframe(all_output_files, horizon)
+            # filters files to consider
+            all_output_files = sorted(self._gather_all_files_to_consider__ind())
 
-        return final_df
+        elif self.mc_root == MCRoot.MC_ALL:
+            # Checks if mc-all results exist
+            if not self.mc_all_path.exists():
+                raise OutputNotFound(self.output_id)
 
-    def aggregate_output_data__all(self) -> pd.DataFrame:
-        """
-        Aggregates the output data of a study and returns it as a DataFrame
-        """
+            # filters files to consider
+            all_output_files = sorted(self._gather_all_files_to_consider__all())
 
-        # Checks if mc-all results exist
-        if not self.mc_all_path.exists():
-            raise OutputNotFound(self.output_id)
+        else:
+            raise NotImplementedError(f"Unknown Monte Carlo root: {self.mc_root}")
 
         # Retrieves the horizon from the study output
         horizon_path = self.study_path / HORIZON_TEMPLATE.format(sim_id=self.output_id)
         launching_config = IniReader().read(horizon_path)
         horizon = launching_config.get("general", {}).get("horizon", 2018)
 
-        # filters files to consider
-        all_output_files = sorted(self._gather_all_files_to_consider__all())
-
         logger.info(
             f"Parsing {len(all_output_files)} {self.frequency.value} files"
             f"to build the aggregated output for study `{self.study_path.name}`"

diff --git a/antarest/study/service.py b/antarest/study/service.py
@@ -331,69 +331,36 @@ def aggregate_output_data(
         self,
         uuid: str,
         output_id: str,
-        query_file: t.Union[MCIndAreasQueryFile, MCIndLinksQueryFile],
+        query_file: t.Union[MCIndAreasQueryFile, MCAllAreasQueryFile, MCIndLinksQueryFile, MCAllLinksQueryFile],
         frequency: MatrixFrequency,
-        mc_years: t.Sequence[int],
         columns_names: t.Sequence[str],
         ids_to_consider: t.Sequence[str],
         params: RequestParameters,
+        mc_years: t.Optional[t.Sequence[int]] = None,
     ) -> pd.DataFrame:
         """
-        Aggregates output data (in `economy/mc-ind`) based on several filtering conditions
-        Args:
-            uuid: study uuid
-            output_id: simulation output ID
-            query_file: which types of data to retrieve ("values", "details", "details-st-storage", "details-res")
-            frequency: yearly, monthly, weekly, daily or hourly.
-            mc_years: list of monte-carlo years, if empty, all years are selected
-            columns_names: regexes (if details) or columns to be selected, if empty, all columns are selected
-            ids_to_consider: list of areas or links ids to consider, if empty, all areas are selected
-            params: request parameters
-
-        Returns: the aggregated data as a DataFrame
-
-        """
-        study = self.get_study(uuid)
-        assert_permission(params.user, study, StudyPermissionType.READ)
-        study_path = self.storage_service.raw_study_service.get_study_path(study)
-        # fmt: off
-        aggregator_manager = AggregatorManager(study_path, output_id, query_file, frequency, ids_to_consider,
-                                               columns_names, mc_years)
-        # fmt: on
-        return aggregator_manager.aggregate_output_data()
+        Aggregates output data based on several filtering conditions
 
-    def aggregate_output_data__all(
-        self,
-        uuid: str,
-        output_id: str,
-        query_file: t.Union[MCAllAreasQueryFile, MCAllLinksQueryFile],
-        frequency: MatrixFrequency,
-        columns_names: t.Sequence[str],
-        ids_to_consider: t.Sequence[str],
-        params: RequestParameters,
-    ) -> pd.DataFrame:
-        """
-        Aggregates output data (in `economy/mc-all`) based on several filtering conditions
         Args:
             uuid: study uuid
             output_id: simulation output ID
-            query_file: which types of data to retrieve ("values", "details", "details-st-storage", "details-res")
+            query_file: which types of data to retrieve: "values", "details", "details-st-storage", "details-res", "ids"
             frequency: yearly, monthly, weekly, daily or hourly.
             columns_names: regexes (if details) or columns to be selected, if empty, all columns are selected
             ids_to_consider: list of areas or links ids to consider, if empty, all areas are selected
             params: request parameters
+            mc_years: list of monte-carlo years, if empty, all years are selected (only for mc-ind)
 
         Returns: the aggregated data as a DataFrame
 
         """
         study = self.get_study(uuid)
         assert_permission(params.user, study, StudyPermissionType.READ)
         study_path = self.storage_service.raw_study_service.get_study_path(study)
-        # fmt: off
-        aggregator_manager = AggregatorManager(study_path, output_id, query_file, frequency, ids_to_consider,
-                                               columns_names)
-        # fmt: on
-        return aggregator_manager.aggregate_output_data__all()
+        aggregator_manager = AggregatorManager(
+            study_path, output_id, query_file, frequency, ids_to_consider, columns_names, mc_years
+        )
+        return aggregator_manager.aggregate_output_data()
 
     def get_logs(
         self,

diff --git a/antarest/study/web/raw_studies_blueprint.py b/antarest/study/web/raw_studies_blueprint.py
@@ -229,10 +229,10 @@ def aggregate_areas_raw_data(
             output_id=output_id,
             query_file=query_file,
             frequency=frequency,
-            mc_years=[int(mc_year) for mc_year in _split_comma_separated_values(mc_years)],
             columns_names=_split_comma_separated_values(columns_names),
             ids_to_consider=_split_comma_separated_values(areas_ids),
             params=parameters,
+            mc_years=[int(mc_year) for mc_year in _split_comma_separated_values(mc_years)],
         )
 
         download_name = f"aggregated_output_{uuid}_{output_id}{export_format.suffix}"
@@ -298,10 +298,10 @@ def aggregate_links_raw_data(
             output_id=output_id,
             query_file=query_file,
             frequency=frequency,
-            mc_years=[int(mc_year) for mc_year in _split_comma_separated_values(mc_years)],
             columns_names=_split_comma_separated_values(columns_names),
             ids_to_consider=_split_comma_separated_values(links_ids),
             params=parameters,
+            mc_years=[int(mc_year) for mc_year in _split_comma_separated_values(mc_years)],
         )
 
         download_name = f"aggregated_output_{uuid}_{output_id}{export_format.suffix}"
@@ -361,7 +361,7 @@ def aggregate_areas_raw_data__all(
         output_id = sanitize_string(output_id)
 
         parameters = RequestParameters(user=current_user)
-        df_matrix = study_service.aggregate_output_data__all(
+        df_matrix = study_service.aggregate_output_data(
             uuid,
             output_id=output_id,
             query_file=query_file,
@@ -427,7 +427,7 @@ def aggregate_links_raw_data__all(
         output_id = sanitize_string(output_id)
 
         parameters = RequestParameters(user=current_user)
-        df_matrix = study_service.aggregate_output_data__all(
+        df_matrix = study_service.aggregate_output_data(
             uuid,
             output_id=output_id,
             query_file=query_file,