Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat(api): add output funcionalities (get_matrix, aggregate_values) #42

Open
wants to merge 12 commits into
base: main
Choose a base branch
from
11 changes: 11 additions & 0 deletions src/antares/craft/exceptions/exceptions.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,8 @@

from typing import List

from antares.craft.model.output import McType, ObjectType
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Generally speaking I think we prefer not having to import anything related to the project in this file as it's like an util file



class InvalidChoiceError(ValueError):
def __init__(self, message: str = "Invalid choice") -> None:
Expand Down Expand Up @@ -328,3 +330,12 @@ class OutputsRetrievalError(Exception):
def __init__(self, study_id: str, message: str) -> None:
self.message = f"Could not get outputs for {study_id}: " + message
super().__init__(self.message)


class AggregateCreationError(Exception):
def __init__(self, study_id: str, output_id: str, mc_type: McType, object_type: ObjectType, message: str) -> None:
self.message = (
f"Could not create {mc_type.value}/{object_type.value} aggregate for study {study_id}, output {output_id}: "
+ message
)
super().__init__(self.message)
87 changes: 87 additions & 0 deletions src/antares/craft/model/output.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,9 +9,96 @@
# SPDX-License-Identifier: MPL-2.0
#
# This file is part of the Antares project.
from enum import Enum
from typing import Any

import pandas as pd

from pydantic import BaseModel


class QueryFile(Enum):
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You're missing the value ID = "id"
I would also prefer to rename STSTORAGE ST_STORAGE

VALUES = "values"
DETAILS = "details"
DETAILS_STSTORAGE = "details-STstorage"
DETAILS_RES = "details-res"


class Frequency(Enum):
HOURLY = "hourly"
DAILY = "daily"
WEEKLY = "weekly"
MONTHLY = "monthly"
ANNUAL = "annual"


class McType(Enum):
ALL = "mc-all"
IND = "mc-ind"


class ObjectType(Enum):
LINKS = "links"
AREAS = "areas"


class AggregationEntry(BaseModel):
"""
Represents an entry for aggregation queries

Attributes:
query_file: The file to query.
frequency: "hourly", "daily", "weekly", "monthly", "annual"
mc_years: Monte Carlo years to include in the query. If left empty, all years are included.
type_ids: which links/areas to be selected (ex: "be - fr"). If empty, all are selected (comma separated)
columns_names: names or regexes (if query_file is of type details) to select columns (comma separated)
"""

query_file: QueryFile
frequency: Frequency
mc_years: str = ""
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Probably best to type it as a list for users. Same for column_names

type_ids: str = ""
columns_names: str = ""

def to_query(self, object_type: ObjectType) -> str:
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Rename to_api_query to show it's for api

mc_years = f"&mc_years={self.mc_years}" if self.mc_years else ""
type_ids = f"&{object_type.value}_ids={self.type_ids}" if self.type_ids else ""
columns_names = f"&columns_names={self.columns_names}" if self.columns_names else ""

return f"query_file={self.query_file.value}&frequency={self.frequency.value}{mc_years}{type_ids}{columns_names}&format=csv"


class Output(BaseModel):
name: str
archived: bool

def __init__(self, output_service, **kwargs: Any): # type: ignore
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think you should remove the usage of pydantic inside this class and just do a normal class

super().__init__(**kwargs)
self._output_service = output_service

def get_matrix(self, path: str) -> pd.DataFrame:
"""
Gets the matrix of the output

Args:
path: output path, eg: "mc-all/areas/south/values-hourly"

Returns: Pandas DataFrame
"""
full_path = f"output/{self.name}/economy/{path}"
return self._output_service.get_matrix(full_path)

def aggregate_values(
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Now that I see the code I don't know if we should keep one method with arguments or 4 distincts method. The benefit of having 4 method would be to not introduce McType and ObjectType class that I don't really like and also ensure the queryfiles can always work. I'll see with Alexander what he thinks about it

Copy link
Contributor

@MartinBelthle MartinBelthle Dec 20, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Alexander doesn't care. So I think it's better to introduce 4 methods. You'll also have to introduce other query classes that go for every method, see the AntaresWeb code below for inspiration:

class MCIndAreasQueryFile(StrEnum):
    VALUES = "values"
    DETAILS = "details"
    DETAILS_ST_STORAGE = "details-STstorage"
    DETAILS_RES = "details-res"


class MCAllAreasQueryFile(StrEnum):
    VALUES = "values"
    DETAILS = "details"
    DETAILS_ST_STORAGE = "details-STstorage"
    DETAILS_RES = "details-res"
    ID = "id"


class MCIndLinksQueryFile(StrEnum):
    VALUES = "values"


class MCAllLinksQueryFile(StrEnum):
    VALUES = "values"
    ID = "id"

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think the wording FileType would be more appropriate as we'll also use it for the local implem one day

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

From an user perspective these classes will probably be hard to use. We'll have to think if there's a better solution in a next PR

self, aggregation_entry: AggregationEntry, mc_type: McType, object_type: ObjectType
) -> pd.DataFrame:
"""
Creates a matrix of aggregated raw data

Args:
aggregate_input: input for the /aggregate endpoint
mc_type: all or ind (enum)
object_type: links or area (enum)

Returns: Pandas DataFrame corresponding to the aggregated raw data
"""
return self._output_service.aggregate_values(self.name, aggregation_entry, mc_type, object_type)
4 changes: 2 additions & 2 deletions src/antares/craft/model/study.py
Original file line number Diff line number Diff line change
Expand Up @@ -219,8 +219,8 @@ def __init__(
self._area_service = service_factory.create_area_service()
self._link_service = service_factory.create_link_service()
self._run_service = service_factory.create_run_service()
self._output_service = service_factory.create_output_service()
self._binding_constraints_service = service_factory.create_binding_constraints_service()
self._output_service = service_factory.create_output_service()
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Don't think you need to create an output service here anymore

Copy link
Collaborator Author

@salemsd salemsd Dec 20, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The alternative would be to create the output service in read_outputs local and api manually without the factory. Same thing could be said about binding_constraint_service which is not really needed in study either.
Or we could move read_outputs back into output_service but that would create redundancy with an output also having its own service.
I don't see what else I could do ?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'll check

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think what you should do is add a method set_output_service inside the study service that you'll call inside the create_study_service method. This way you can remove the service from the study as you'll have it inside the study_service. That's something we're doing already for the area_service if you want to check

self._settings = DefaultStudySettings.model_validate(settings if settings is not None else StudySettings())
self._areas: Dict[str, Area] = dict()
self._links: Dict[str, Link] = dict()
Expand Down Expand Up @@ -377,7 +377,7 @@ def read_outputs(self) -> list[Output]:

Returns: Output list
"""
outputs = self._output_service.read_outputs()
outputs = self._study_service.read_outputs(self._output_service)
self._outputs = {output.name: output for output in outputs}
return outputs

Expand Down
21 changes: 14 additions & 7 deletions src/antares/craft/service/api_services/output_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,12 +9,15 @@
# SPDX-License-Identifier: MPL-2.0
#
# This file is part of the Antares project.
from io import StringIO

import pandas as pd

from antares.craft.api_conf.api_conf import APIconf
from antares.craft.api_conf.request_wrapper import RequestWrapper
from antares.craft.exceptions.exceptions import APIError, OutputsRetrievalError
from antares.craft.model.output import Output
from antares.craft.exceptions.exceptions import AggregateCreationError, APIError
from antares.craft.model.output import AggregationEntry, McType, ObjectType
from antares.craft.service.api_services.utils import get_matrix
from antares.craft.service.base_services import BaseOutputService


Expand All @@ -26,11 +29,15 @@ def __init__(self, config: APIconf, study_id: str):
self._base_url = f"{self.config.get_host()}/api/v1"
self._wrapper = RequestWrapper(self.config.set_up_api_conf())

def read_outputs(self) -> list[Output]:
url = f"{self._base_url}/studies/{self.study_id}/outputs"
def get_matrix(self, path: str) -> pd.DataFrame:
return get_matrix(self._base_url, self.study_id, self._wrapper, path)

def aggregate_values(
self, output_id: str, aggregation_entry: AggregationEntry, mc_type: McType, object_type: ObjectType
) -> pd.DataFrame:
url = f"{self._base_url}/studies/{self.study_id}/{object_type.value}/aggregate/{mc_type.value}/{output_id}?{aggregation_entry.to_query(object_type)}"
try:
response = self._wrapper.get(url)
outputs_json_list = response.json()
return [Output(name=output["name"], archived=output["archived"]) for output in outputs_json_list]
return pd.read_csv(StringIO(response.text))
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In AntaresWeb we do pd.read_csv(io.BytesIO(response.content), sep=","), don't know if it's faster. We can keep the code as is but don't you have issue as you didn't specify the separator ?

Copy link
Collaborator Author

@salemsd salemsd Dec 20, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

According to the doc, separator is "," by default, so no problem. As for the data handling, apparently they offer similar performance -> "StringIO, however, is a native in-memory unicode container and will exhibit similar speed to BytesIO", from python doc

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Okay. It's weird as in pandas doc online I see , as default but in the source code it seems like it's None. Weird

except APIError as e:
raise OutputsRetrievalError(self.study_id, e.message)
raise AggregateCreationError(self.study_id, output_id, mc_type, object_type, e.message)
15 changes: 14 additions & 1 deletion src/antares/craft/service/api_services/study_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,11 +18,13 @@
from antares.craft.exceptions.exceptions import (
APIError,
BindingConstraintDeletionError,
OutputsRetrievalError,
StudyDeletionError,
StudySettingsUpdateError,
StudyVariantCreationError,
)
from antares.craft.model.binding_constraint import BindingConstraint
from antares.craft.model.output import Output
from antares.craft.model.settings.adequacy_patch import AdequacyPatchParameters
from antares.craft.model.settings.advanced_parameters import AdvancedParameters
from antares.craft.model.settings.general import GeneralParameters
Expand All @@ -31,7 +33,7 @@
from antares.craft.model.settings.study_settings import StudySettings
from antares.craft.model.settings.thematic_trimming import ThematicTrimmingParameters
from antares.craft.model.settings.time_series import TimeSeriesParameters
from antares.craft.service.base_services import BaseStudyService
from antares.craft.service.base_services import BaseOutputService, BaseStudyService

if TYPE_CHECKING:
from antares.craft.model.study import Study
Expand Down Expand Up @@ -119,3 +121,14 @@ def create_variant(self, variant_name: str) -> "Study":
return study.read_study_api(self.config, variant_id)
except APIError as e:
raise StudyVariantCreationError(self.study_id, e.message) from e

def read_outputs(self, output_service: BaseOutputService) -> list[Output]:
url = f"{self._base_url}/studies/{self.study_id}/outputs"
try:
response = self._wrapper.get(url)
outputs_json_list = response.json()
return [
Output(output_service, name=output["name"], archived=output["archived"]) for output in outputs_json_list
]
except APIError as e:
raise OutputsRetrievalError(self.study_id, e.message)
6 changes: 5 additions & 1 deletion src/antares/craft/service/api_services/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,5 +25,9 @@ def get_matrix(base_url: str, study_id: str, wrapper: RequestWrapper, series_pat
raw_url = f"{base_url}/studies/{study_id}/raw?path={series_path}"
response = wrapper.get(raw_url)
json_df = response.json()
dataframe = pd.DataFrame(data=json_df["data"], index=json_df["index"], columns=json_df["columns"])

if "index" in json_df:
dataframe = pd.DataFrame(data=json_df["data"], index=json_df["index"], columns=json_df["columns"])
else:
dataframe = pd.DataFrame(data=json_df["data"], columns=json_df["columns"])
return dataframe
36 changes: 32 additions & 4 deletions src/antares/craft/service/base_services.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@
)
from antares.craft.model.hydro import Hydro, HydroMatrixName, HydroProperties
from antares.craft.model.link import Link, LinkProperties, LinkUi
from antares.craft.model.output import Output
from antares.craft.model.output import AggregationEntry, McType, ObjectType, Output
from antares.craft.model.renewable import RenewableCluster, RenewableClusterProperties
from antares.craft.model.settings.study_settings import StudySettings
from antares.craft.model.simulation import AntaresSimulationParameters, Job
Expand Down Expand Up @@ -557,6 +557,15 @@ def create_variant(self, variant_name: str) -> "Study":
"""
pass

@abstractmethod
def read_outputs(self, output_service: "BaseOutputService") -> list[Output]:
"""
Gets the output list of a study

Returns: Output list
"""
pass


class BaseRenewableService(ABC):
@abstractmethod
Expand Down Expand Up @@ -631,10 +640,29 @@ def wait_job_completion(self, job: Job, time_out: int) -> None:

class BaseOutputService(ABC):
@abstractmethod
def read_outputs(self) -> list[Output]:
def get_matrix(self, path: str) -> pd.DataFrame:
"""
Gets the output list of a study
Gets the matrix of the output

Returns: Output list
Args:
path: output path

Returns: Pandas DataFrame
"""
pass

@abstractmethod
def aggregate_values(
self, output_id: str, aggregation_entry: AggregationEntry, mc_type: McType, object_type: ObjectType
) -> pd.DataFrame:
"""
Creates a matrix of aggregated raw data

Args:
output_id: id of the output
aggregation_entry: input for the /aggregate endpoint
mc_type: all or ind (enum)
object_type: links or areas (enum)

Returns: Pandas DataFrame corresponding to the aggregated raw data
"""
11 changes: 9 additions & 2 deletions src/antares/craft/service/local_services/output_local.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,10 @@
# This file is part of the Antares project.
from typing import Any

import pandas as pd

from antares.craft.config.local_configuration import LocalConfiguration
from antares.craft.model.output import Output
from antares.craft.model.output import AggregationEntry, McType, ObjectType
from antares.craft.service.base_services import BaseOutputService


Expand All @@ -22,5 +24,10 @@ def __init__(self, config: LocalConfiguration, study_name: str, **kwargs: Any) -
self.config = config
self.study_name = study_name

def read_outputs(self) -> list[Output]:
def get_matrix(self, path: str) -> pd.DataFrame:
raise NotImplementedError

def aggregate_values(
self, output_id: str, aggregation_entry: AggregationEntry, mc_type: McType, object_type: ObjectType
) -> pd.DataFrame:
raise NotImplementedError
6 changes: 5 additions & 1 deletion src/antares/craft/service/local_services/study_local.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,9 @@

from antares.craft.config.local_configuration import LocalConfiguration
from antares.craft.model.binding_constraint import BindingConstraint
from antares.craft.model.output import Output
from antares.craft.model.settings.study_settings import StudySettings
from antares.craft.service.base_services import BaseStudyService
from antares.craft.service.base_services import BaseOutputService, BaseStudyService

if TYPE_CHECKING:
from antares.craft.model.study import Study
Expand Down Expand Up @@ -46,3 +47,6 @@ def delete(self, children: bool) -> None:

def create_variant(self, variant_name: str) -> "Study":
raise NotImplementedError

def read_outputs(self, output_service: BaseOutputService) -> list[Output]:
raise NotImplementedError
37 changes: 36 additions & 1 deletion tests/antares/services/api_services/test_study_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,6 @@
# SPDX-License-Identifier: MPL-2.0
#
# This file is part of the Antares project.

import pytest
import requests_mock

Expand All @@ -18,6 +17,8 @@
from json import dumps
from unittest.mock import Mock, patch

import pandas as pd

from antares.craft.api_conf.api_conf import APIconf
from antares.craft.exceptions.exceptions import (
AreaCreationError,
Expand All @@ -34,6 +35,7 @@
from antares.craft.model.binding_constraint import BindingConstraint, BindingConstraintProperties
from antares.craft.model.hydro import HydroProperties
from antares.craft.model.link import Link, LinkProperties, LinkUi
from antares.craft.model.output import AggregationEntry, Frequency, McType, ObjectType, QueryFile
from antares.craft.model.settings.general import GeneralParameters
from antares.craft.model.settings.study_settings import StudySettings
from antares.craft.model.simulation import AntaresSimulationParameters, Job, JobStatus, Solver
Expand Down Expand Up @@ -472,3 +474,36 @@ def test_read_outputs(self):
mocker.get(run_url, json={"description": error_message}, status_code=404)
with pytest.raises(OutputsRetrievalError, match=error_message):
self.study.read_outputs()

# ==== get_matrix() ====
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You should probably put this in a separated test as you're not really checking the same thing as the first part of the test and you can re-introduce a fake output at the start of your test


matrix_url = f"https://antares.com/api/v1/studies/{self.study_id}/raw?path=output/{output1.name}/economy/mc-all/grid/links"
matrix_output = {"columns": ["upstream", "downstream"], "data": [["be", "fr"]]}
mocker.get(matrix_url, json=matrix_output)

matrix = output1.get_matrix("mc-all/grid/links")
assert isinstance(matrix, pd.DataFrame)
assert matrix.shape[1] == len(matrix_output["columns"])
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can't you just put matrix_output inside a dataframe and test the equality ? It would be clearer
Same remark for the aggregate values part

assert list(matrix.columns) == matrix_output["columns"]
assert matrix.shape[0] == len(matrix_output["data"])
assert matrix.iloc[0].tolist() == matrix_output["data"][0]

# ==== aggregate_values ====

aggregate_url = f"https://antares.com/api/v1/studies/{self.study_id}/links/aggregate/mc-all/{output1.name}?query_file=values&frequency=annual&format=csv"
aggregate_output = """
link,timeId,FLOW LIN. EXP,FLOW LIN. STD
be - fr,1,0.000000,0.000000
be - fr,2,0.000000,0.000000
"""
mocker.get(aggregate_url, text=aggregate_output)

aggregation_entry = AggregationEntry(query_file=QueryFile.VALUES, frequency=Frequency.ANNUAL)
aggregated_matrix = output1.aggregate_values(aggregation_entry, McType.ALL, ObjectType.LINKS)

assert isinstance(aggregated_matrix, pd.DataFrame)
assert aggregated_matrix.shape[1] == 4
assert list(aggregated_matrix.columns) == ["link", "timeId", "FLOW LIN. EXP", "FLOW LIN. STD"]
assert aggregated_matrix.shape[0] == 2
assert aggregated_matrix.iloc[0].tolist() == ["be - fr", 1, 0.0, 0.0]
assert aggregated_matrix.iloc[1].tolist() == ["be - fr", 2, 0.0, 0.0]
Loading