Skip to content

Commit

Permalink
Merge pull request #103 from ImperialCollegeLondon/optimise-dsr-api
Browse files Browse the repository at this point in the history
Use HDF5 file upload for DSR API data
  • Loading branch information
AdrianDAlessandro authored Jul 25, 2023
2 parents 25ebdcc + 11bbd02 commit e8eb8a7
Show file tree
Hide file tree
Showing 7 changed files with 184 additions and 84 deletions.
72 changes: 45 additions & 27 deletions datahub/dsr.py
Original file line number Diff line number Diff line change
@@ -1,29 +1,31 @@
"""This module defines the data structures for the MEDUSA Demand Simulator model."""
import numpy as np
from fastapi import HTTPException
from numpy.typing import NDArray
from pydantic import BaseModel, Field


class DSRModel(BaseModel):
"""Define required key values for Demand Side Response data."""

amount: list = Field(alias="Amount", size=(13,))
cost: list = Field(alias="Cost", size=(1440, 13))
kwh_cost: list = Field(alias="kWh Cost", size=(2,))
activities: list = Field(alias="Activities", size=(1440, 7))
amount: list = Field(alias="Amount", shape=(13,))
cost: list = Field(alias="Cost", shape=(1440, 13))
kwh_cost: list = Field(alias="kWh Cost", shape=(2,))
activities: list = Field(alias="Activities", shape=(1440, 7))
activities_outside_home: list = Field(
alias="Activities Outside Home", size=(1440, 7)
alias="Activities Outside Home", shape=(1440, 7)
)
activity_types: list = Field(alias="Activity Types", size=(7,))
ev_id_matrix: list = Field(alias="EV ID Matrix", default=None, size=(1440, 4329))
ev_dt: list = Field(alias="EV DT", size=(1440, 2))
ev_locations: list = Field(alias="EV Locations", default=None, size=(1440, 4329))
ev_battery: list = Field(alias="EV Battery", default=None, size=(1440, 4329))
ev_state: list = Field(alias="EV State", size=(1440, 4329))
ev_mask: list = Field(alias="EV Mask", default=None, size=(1440, 4329))
baseline_ev: list = Field(alias="Baseline EV", size=(1440,))
baseline_non_ev: list = Field(alias="Baseline Non-EV", size=(1440,))
actual_ev: list = Field(alias="Actual EV", size=(1440,))
actual_non_ev: list = Field(alias="Actual Non-EV", size=(1440,))
activity_types: list = Field(alias="Activity Types", shape=(7,))
ev_id_matrix: list = Field(alias="EV ID Matrix", default=None, shape=(1440, 4329))
ev_dt: list = Field(alias="EV DT", shape=(1440, 2))
ev_locations: list = Field(alias="EV Locations", default=None, shape=(1440, 4329))
ev_battery: list = Field(alias="EV Battery", default=None, shape=(1440, 4329))
ev_state: list = Field(alias="EV State", shape=(1440, 4329))
ev_mask: list = Field(alias="EV Mask", default=None, shape=(1440, 4329))
baseline_ev: list = Field(alias="Baseline EV", shape=(1440,))
baseline_non_ev: list = Field(alias="Baseline Non-EV", shape=(1440,))
actual_ev: list = Field(alias="Actual EV", shape=(1440,))
actual_non_ev: list = Field(alias="Actual Non-EV", shape=(1440,))
name: str = Field(alias="Name", default="")
warn: str = Field(alias="Warn", default="")

Expand All @@ -33,24 +35,40 @@ class Config:
allow_population_by_field_name = True


def validate_dsr_arrays(data: dict[str, str | list]) -> list[str]:
"""Validate the sizes of the arrays in the DSR data.
def validate_dsr_data(data: dict[str, NDArray]) -> None:
"""Validate the shapes of the arrays in the DSR data.
Args:
data: The dictionary representation of the DSR Data. The keys are field aliases.
It is generated with the data.dict(by_alias=True) where data is a DSRModel.
Returns:
An empty list if there are no issues. A list of the failing fields if there are.
Raises:
A HTTPException is there are mising failing fields if there are.
"""
missing_fields = [
field for field in DSRModel.schema()["required"] if field not in data.keys()
]
if missing_fields:
raise HTTPException(
status_code=422,
detail=f"Missing required fields: {', '.join(missing_fields)}.",
)

aliases = []
for alias, field in DSRModel.schema()["properties"].items():
if field["type"] == "array":
try:
array = np.array(data[alias])
except ValueError:
try:
array = data[alias]
except KeyError:
if field:
aliases.append(alias)
continue
if array.shape != field["size"] or array.dtype != np.float64:
continue
if field["type"] == "array":
if array.shape != field["shape"] or not np.issubdtype(
array.dtype, np.number
):
aliases.append(alias)
return aliases
if aliases:
raise HTTPException(
status_code=422,
detail=f"Invalid size for: {', '.join(aliases)}.",
)
66 changes: 47 additions & 19 deletions datahub/main.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,13 @@
"""Script for running Datahub API."""
from typing import Any, Hashable

from fastapi import FastAPI, HTTPException
import h5py # type: ignore
from fastapi import FastAPI, HTTPException, UploadFile
from pydantic import BaseModel

from . import data as dt
from . import log
from .dsr import DSRModel, validate_dsr_arrays
from .dsr import validate_dsr_data
from .opal import OpalModel
from .wesim import get_wesim

Expand Down Expand Up @@ -62,7 +63,6 @@ def get_opal_data( # type: ignore[misc]
Args:
start: Starting index for exported Dataframe
end: Last index that will be included in exported Dataframe
Returns:
Expand All @@ -88,31 +88,60 @@ def get_opal_data( # type: ignore[misc]


@app.post("/dsr")
def update_dsr_data(data: DSRModel) -> dict[str, str]:
"""POST method function for appending data to the DSR list.
def upload_dsr(file: UploadFile) -> dict[str, str | None]:
"""POST method for appending data to the DSR list.
This takes a HDF5 file as input. This file has a flat structure, with each dataset
available at the top level.
The required fields (datasets) are:
- Amount (13 x 1)
- Cost (1440 x 13)
- kWh Cost (2 x 1)
- Activities (1440 x 7)
- Activities Outside Home (1440 x 7)
- Activity Types (7 x 1)
- EV DT (1440 x 2)
- EV State (1440 x 4329)
- Baseline EV (1440 x 1)
- Baseline Non-EV (1440 x 1)
- Actual EV (1440 x 1)
- Actual Non-EV (1440 x 1)
The optional fields are:
- EV ID Matrix (1440 x 4329)
- EV Locations (1440 x 4329)
- EV Battery (1440 x 4329)
- EV Mask (1440 x 4329)
- Name (str)
- Warn (str)
Further details for the DSR data specification can be found in
[the GitHub wiki.](https://github.com/ImperialCollegeLondon/gridlington-datahub/wiki/Agent-model-data#output)
\f
Args:
data: The DSR Data
Returns:
A dictionary with a success message
file (UploadFile): A HDF5 file with the DSR data.
Raises:
A HTTPException if the data is invalid
"""
HTTPException: If the data is invalid
Returns:
dict[str, str]: dictionary with the filename
""" # noqa: D301
log.info("Recieved Opal data.")
data_dict = data.dict(by_alias=True)
if alias := validate_dsr_arrays(data_dict):
message = f"Invalid size for: {', '.join(alias)}."
log.error(message)
raise HTTPException(status_code=400, detail=message)
with h5py.File(file.file, "r") as h5file:
data = {key: value[...] for key, value in h5file.items()}

validate_dsr_data(data)

log.info("Appending new data...")
log.debug(f"Current DSR data length: {len(dt.dsr_data)}")
dt.dsr_data.append(data_dict)
dt.dsr_data.append(data)
log.debug(f"Updated DSR data length: {len(dt.dsr_data)}")

return {"message": "Data submitted successfully."}
return {"filename": file.filename}


@app.get("/dsr")
Expand All @@ -123,7 +152,6 @@ def get_dsr_data( # type: ignore[misc]
Args:
start: Starting index for exported list
end: Last index that will be included in exported list
Returns:
Expand Down
4 changes: 3 additions & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,9 @@ requires-python = ">=3.10"
dependencies = [
"pandas[excel]",
"fastapi",
"uvicorn"
"uvicorn",
"python-multipart",
"h5py"
]

[project.optional-dependencies]
Expand Down
5 changes: 5 additions & 0 deletions requirements-dev.txt
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,8 @@ h11==0.14.0
# via
# httpcore
# uvicorn
h5py==3.9.0
# via datahub (pyproject.toml)
httpcore==0.17.2
# via httpx
httpx==0.24.1
Expand All @@ -71,6 +73,7 @@ nodeenv==1.8.0
# via pre-commit
numpy==1.25.0
# via
# h5py
# pandas
# pandas-stubs
odfpy==1.4.1
Expand Down Expand Up @@ -116,6 +119,8 @@ pytest-mypy==0.10.3
# via datahub (pyproject.toml)
python-dateutil==2.8.2
# via pandas
python-multipart==0.0.6
# via datahub (pyproject.toml)
pytz==2023.3
# via pandas
pyxlsb==1.0.10
Expand Down
8 changes: 7 additions & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -18,10 +18,14 @@ fastapi==0.99.1
# via datahub (pyproject.toml)
h11==0.14.0
# via uvicorn
h5py==3.9.0
# via datahub (pyproject.toml)
idna==3.4
# via anyio
numpy==1.25.0
# via pandas
# via
# h5py
# pandas
odfpy==1.4.1
# via pandas
openpyxl==3.1.2
Expand All @@ -32,6 +36,8 @@ pydantic==1.10.10
# via fastapi
python-dateutil==2.8.2
# via pandas
python-multipart==0.0.6
# via datahub (pyproject.toml)
pytz==2023.3
# via pandas
pyxlsb==1.0.10
Expand Down
38 changes: 30 additions & 8 deletions tests/conftest.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import random

import h5py # type: ignore
import numpy as np
import pytest
from fastapi.testclient import TestClient
Expand Down Expand Up @@ -38,12 +39,33 @@ def opal_data_array():


@pytest.fixture
def dsr_data():
"""Pytest Fixture for random Opal data input."""
data = {}
for field in list(DSRModel.__fields__.values()):
if field.annotation == str:
data[field.alias] = "Name or Warning"
else:
data[field.alias] = np.random.rand(*field.field_info.extra["size"]).tolist()
def dsr_data(dsr_data_path):
"""Pytest Fixture for DSR data as a dictionary."""
with h5py.File(dsr_data_path, "r") as h5file:
data = {key: value[...] for key, value in h5file.items()}
return data


@pytest.fixture
def dsr_data_path(tmp_path):
"""The path to a temporary HDF5 file with first-time-only generated DSR data."""
# Define the file path within the temporary directory
file_path = tmp_path / "data.h5"

# Check if the file already exists
if file_path.is_file():
# If the file exists, return its path
return file_path

# Otherwise, create and write data to the file
with h5py.File(file_path, "w") as h5file:
for field in list(DSRModel.__fields__.values()):
if field.annotation == str:
h5file[field.alias] = "Name or Warning"
else:
h5file[field.alias] = np.random.rand(
*field.field_info.extra["shape"]
).astype("float16")

# Return the path to the file
return file_path
Loading

0 comments on commit e8eb8a7

Please sign in to comment.