-
Notifications
You must be signed in to change notification settings - Fork 0
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
feat(api): add output funcionalities (get_matrix, aggregate_values) #42
base: main
Are you sure you want to change the base?
Changes from all commits
bd3bb82
26c13d0
1bbd8e3
e8329c9
7971f1f
4f09802
a483290
ffa4786
b314654
ce92449
69d8a52
d555a3d
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -9,9 +9,96 @@ | |
# SPDX-License-Identifier: MPL-2.0 | ||
# | ||
# This file is part of the Antares project. | ||
from enum import Enum | ||
from typing import Any | ||
|
||
import pandas as pd | ||
|
||
from pydantic import BaseModel | ||
|
||
|
||
class QueryFile(Enum): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. You're missing the value ID = "id" |
||
VALUES = "values" | ||
DETAILS = "details" | ||
DETAILS_STSTORAGE = "details-STstorage" | ||
DETAILS_RES = "details-res" | ||
|
||
|
||
class Frequency(Enum): | ||
HOURLY = "hourly" | ||
DAILY = "daily" | ||
WEEKLY = "weekly" | ||
MONTHLY = "monthly" | ||
ANNUAL = "annual" | ||
|
||
|
||
class McType(Enum): | ||
ALL = "mc-all" | ||
IND = "mc-ind" | ||
|
||
|
||
class ObjectType(Enum): | ||
LINKS = "links" | ||
AREAS = "areas" | ||
|
||
|
||
class AggregationEntry(BaseModel): | ||
""" | ||
Represents an entry for aggregation queries | ||
|
||
Attributes: | ||
query_file: The file to query. | ||
frequency: "hourly", "daily", "weekly", "monthly", "annual" | ||
mc_years: Monte Carlo years to include in the query. If left empty, all years are included. | ||
type_ids: which links/areas to be selected (ex: "be - fr"). If empty, all are selected (comma separated) | ||
columns_names: names or regexes (if query_file is of type details) to select columns (comma separated) | ||
""" | ||
|
||
query_file: QueryFile | ||
frequency: Frequency | ||
mc_years: str = "" | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Probably best to type it as a list for users. Same for column_names |
||
type_ids: str = "" | ||
columns_names: str = "" | ||
|
||
def to_query(self, object_type: ObjectType) -> str: | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Rename to_api_query to show it's for api |
||
mc_years = f"&mc_years={self.mc_years}" if self.mc_years else "" | ||
type_ids = f"&{object_type.value}_ids={self.type_ids}" if self.type_ids else "" | ||
columns_names = f"&columns_names={self.columns_names}" if self.columns_names else "" | ||
|
||
return f"query_file={self.query_file.value}&frequency={self.frequency.value}{mc_years}{type_ids}{columns_names}&format=csv" | ||
|
||
|
||
class Output(BaseModel): | ||
name: str | ||
archived: bool | ||
|
||
def __init__(self, output_service, **kwargs: Any): # type: ignore | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think you should remove the usage of pydantic inside this class and just do a normal class |
||
super().__init__(**kwargs) | ||
self._output_service = output_service | ||
|
||
def get_matrix(self, path: str) -> pd.DataFrame: | ||
""" | ||
Gets the matrix of the output | ||
|
||
Args: | ||
path: output path, eg: "mc-all/areas/south/values-hourly" | ||
|
||
Returns: Pandas DataFrame | ||
""" | ||
full_path = f"output/{self.name}/economy/{path}" | ||
return self._output_service.get_matrix(full_path) | ||
|
||
def aggregate_values( | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Now that I see the code I don't know if we should keep one method with arguments or 4 distincts method. The benefit of having 4 method would be to not introduce McType and ObjectType class that I don't really like and also ensure the queryfiles can always work. I'll see with Alexander what he thinks about it There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Alexander doesn't care. So I think it's better to introduce 4 methods. You'll also have to introduce other query classes that go for every method, see the AntaresWeb code below for inspiration: class MCIndAreasQueryFile(StrEnum):
VALUES = "values"
DETAILS = "details"
DETAILS_ST_STORAGE = "details-STstorage"
DETAILS_RES = "details-res"
class MCAllAreasQueryFile(StrEnum):
VALUES = "values"
DETAILS = "details"
DETAILS_ST_STORAGE = "details-STstorage"
DETAILS_RES = "details-res"
ID = "id"
class MCIndLinksQueryFile(StrEnum):
VALUES = "values"
class MCAllLinksQueryFile(StrEnum):
VALUES = "values"
ID = "id" There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think the wording FileType would be more appropriate as we'll also use it for the local implem one day There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. From an user perspective these classes will probably be hard to use. We'll have to think if there's a better solution in a next PR |
||
self, aggregation_entry: AggregationEntry, mc_type: McType, object_type: ObjectType | ||
) -> pd.DataFrame: | ||
""" | ||
Creates a matrix of aggregated raw data | ||
|
||
Args: | ||
aggregate_input: input for the /aggregate endpoint | ||
mc_type: all or ind (enum) | ||
object_type: links or area (enum) | ||
|
||
Returns: Pandas DataFrame corresponding to the aggregated raw data | ||
""" | ||
return self._output_service.aggregate_values(self.name, aggregation_entry, mc_type, object_type) |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -219,8 +219,8 @@ def __init__( | |
self._area_service = service_factory.create_area_service() | ||
self._link_service = service_factory.create_link_service() | ||
self._run_service = service_factory.create_run_service() | ||
self._output_service = service_factory.create_output_service() | ||
self._binding_constraints_service = service_factory.create_binding_constraints_service() | ||
self._output_service = service_factory.create_output_service() | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Don't think you need to create an output service here anymore There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The alternative would be to create the output service in read_outputs local and api manually without the factory. Same thing could be said about binding_constraint_service which is not really needed in study either. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'll check There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think what you should do is add a method set_output_service inside the study service that you'll call inside the create_study_service method. This way you can remove the service from the study as you'll have it inside the study_service. That's something we're doing already for the area_service if you want to check |
||
self._settings = DefaultStudySettings.model_validate(settings if settings is not None else StudySettings()) | ||
self._areas: Dict[str, Area] = dict() | ||
self._links: Dict[str, Link] = dict() | ||
|
@@ -377,7 +377,7 @@ def read_outputs(self) -> list[Output]: | |
|
||
Returns: Output list | ||
""" | ||
outputs = self._output_service.read_outputs() | ||
outputs = self._study_service.read_outputs(self._output_service) | ||
self._outputs = {output.name: output for output in outputs} | ||
return outputs | ||
|
||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -9,12 +9,15 @@ | |
# SPDX-License-Identifier: MPL-2.0 | ||
# | ||
# This file is part of the Antares project. | ||
from io import StringIO | ||
|
||
import pandas as pd | ||
|
||
from antares.craft.api_conf.api_conf import APIconf | ||
from antares.craft.api_conf.request_wrapper import RequestWrapper | ||
from antares.craft.exceptions.exceptions import APIError, OutputsRetrievalError | ||
from antares.craft.model.output import Output | ||
from antares.craft.exceptions.exceptions import AggregateCreationError, APIError | ||
from antares.craft.model.output import AggregationEntry, McType, ObjectType | ||
from antares.craft.service.api_services.utils import get_matrix | ||
from antares.craft.service.base_services import BaseOutputService | ||
|
||
|
||
|
@@ -26,11 +29,15 @@ def __init__(self, config: APIconf, study_id: str): | |
self._base_url = f"{self.config.get_host()}/api/v1" | ||
self._wrapper = RequestWrapper(self.config.set_up_api_conf()) | ||
|
||
def read_outputs(self) -> list[Output]: | ||
url = f"{self._base_url}/studies/{self.study_id}/outputs" | ||
def get_matrix(self, path: str) -> pd.DataFrame: | ||
return get_matrix(self._base_url, self.study_id, self._wrapper, path) | ||
|
||
def aggregate_values( | ||
self, output_id: str, aggregation_entry: AggregationEntry, mc_type: McType, object_type: ObjectType | ||
) -> pd.DataFrame: | ||
url = f"{self._base_url}/studies/{self.study_id}/{object_type.value}/aggregate/{mc_type.value}/{output_id}?{aggregation_entry.to_query(object_type)}" | ||
try: | ||
response = self._wrapper.get(url) | ||
outputs_json_list = response.json() | ||
return [Output(name=output["name"], archived=output["archived"]) for output in outputs_json_list] | ||
return pd.read_csv(StringIO(response.text)) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. In AntaresWeb we do There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. According to the doc, separator is "," by default, so no problem. As for the data handling, apparently they offer similar performance -> "StringIO, however, is a native in-memory unicode container and will exhibit similar speed to BytesIO", from python doc There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Okay. It's weird as in pandas doc online I see , as default but in the source code it seems like it's None. Weird |
||
except APIError as e: | ||
raise OutputsRetrievalError(self.study_id, e.message) | ||
raise AggregateCreationError(self.study_id, output_id, mc_type, object_type, e.message) |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -9,7 +9,6 @@ | |
# SPDX-License-Identifier: MPL-2.0 | ||
# | ||
# This file is part of the Antares project. | ||
|
||
import pytest | ||
import requests_mock | ||
|
||
|
@@ -18,6 +17,8 @@ | |
from json import dumps | ||
from unittest.mock import Mock, patch | ||
|
||
import pandas as pd | ||
|
||
from antares.craft.api_conf.api_conf import APIconf | ||
from antares.craft.exceptions.exceptions import ( | ||
AreaCreationError, | ||
|
@@ -34,6 +35,7 @@ | |
from antares.craft.model.binding_constraint import BindingConstraint, BindingConstraintProperties | ||
from antares.craft.model.hydro import HydroProperties | ||
from antares.craft.model.link import Link, LinkProperties, LinkUi | ||
from antares.craft.model.output import AggregationEntry, Frequency, McType, ObjectType, QueryFile | ||
from antares.craft.model.settings.general import GeneralParameters | ||
from antares.craft.model.settings.study_settings import StudySettings | ||
from antares.craft.model.simulation import AntaresSimulationParameters, Job, JobStatus, Solver | ||
|
@@ -472,3 +474,36 @@ def test_read_outputs(self): | |
mocker.get(run_url, json={"description": error_message}, status_code=404) | ||
with pytest.raises(OutputsRetrievalError, match=error_message): | ||
self.study.read_outputs() | ||
|
||
# ==== get_matrix() ==== | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. You should probably put this in a separated test as you're not really checking the same thing as the first part of the test and you can re-introduce a fake output at the start of your test |
||
|
||
matrix_url = f"https://antares.com/api/v1/studies/{self.study_id}/raw?path=output/{output1.name}/economy/mc-all/grid/links" | ||
matrix_output = {"columns": ["upstream", "downstream"], "data": [["be", "fr"]]} | ||
mocker.get(matrix_url, json=matrix_output) | ||
|
||
matrix = output1.get_matrix("mc-all/grid/links") | ||
assert isinstance(matrix, pd.DataFrame) | ||
assert matrix.shape[1] == len(matrix_output["columns"]) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Can't you just put matrix_output inside a dataframe and test the equality ? It would be clearer |
||
assert list(matrix.columns) == matrix_output["columns"] | ||
assert matrix.shape[0] == len(matrix_output["data"]) | ||
assert matrix.iloc[0].tolist() == matrix_output["data"][0] | ||
|
||
# ==== aggregate_values ==== | ||
|
||
aggregate_url = f"https://antares.com/api/v1/studies/{self.study_id}/links/aggregate/mc-all/{output1.name}?query_file=values&frequency=annual&format=csv" | ||
aggregate_output = """ | ||
link,timeId,FLOW LIN. EXP,FLOW LIN. STD | ||
be - fr,1,0.000000,0.000000 | ||
be - fr,2,0.000000,0.000000 | ||
""" | ||
mocker.get(aggregate_url, text=aggregate_output) | ||
|
||
aggregation_entry = AggregationEntry(query_file=QueryFile.VALUES, frequency=Frequency.ANNUAL) | ||
aggregated_matrix = output1.aggregate_values(aggregation_entry, McType.ALL, ObjectType.LINKS) | ||
|
||
assert isinstance(aggregated_matrix, pd.DataFrame) | ||
assert aggregated_matrix.shape[1] == 4 | ||
assert list(aggregated_matrix.columns) == ["link", "timeId", "FLOW LIN. EXP", "FLOW LIN. STD"] | ||
assert aggregated_matrix.shape[0] == 2 | ||
assert aggregated_matrix.iloc[0].tolist() == ["be - fr", 1, 0.0, 0.0] | ||
assert aggregated_matrix.iloc[1].tolist() == ["be - fr", 2, 0.0, 0.0] |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Generally speaking I think we prefer not having to import anything related to the project in this file as it's like an util file