Merge pull request #108 from ImperialCollegeLondon/refactor

Refactor the code
ImperialCollegeLondon · Aug 3, 2023 · 5fce417 · 5fce417
2 parents dc5c2ca + dc57c88
commit 5fce417
Show file tree

Hide file tree

Showing 14 changed files with 215 additions and 198 deletions.
diff --git a/datahub/__init__.py b/datahub/__init__.py
@@ -4,8 +4,6 @@
 
 from .core.log_config import logging_dict_config
 
-__version__ = "0.0.1"
-
 logging.config.dictConfig(logging_dict_config)
 
 log = logging.getLogger("api_logger")

diff --git a/datahub/data.py b/datahub/data.py
@@ -1,65 +1,8 @@
 """This module defines the data structures for each of the models."""
-from typing import Any, Hashable
+import pandas as pd
 
 from .opal import create_opal_frame
 
-opal_data = [
-    1,
-    8.58,
-    34.9085,
-    34.9055,
-    16.177,
-    7.8868,
-    15.1744,
-    3.3549,
-    0,
-    0,
-    0,
-    0,
-    0,
-    0,
-    0,
-    16192.8871,
-    16194.8348,
-    -0.5713,
-    -0.8467,
-    16.2002,
-    9.0618,
-    0.2806,
-    -2.1328,
-    0,
-    0.7931,
-    0.0522,
-    0.0522,
-    34.8373,
-    34.8343,
-    0,
-    0,
-    30.801,
-    30.801,
-    28,
-    5,
-    63,
-    72,
-    0,
-    303,
-    7230,
-    3.774,
-    3.774,
-    510,
-    2,
-    34,
-]
-
-opal_df = create_opal_frame()
+opal_df: pd.DataFrame = create_opal_frame()
 dsr_data: list[dict[str, str | list]] = []  # type: ignore[type-arg]
-wesim_data: dict[str, dict[Hashable, Any]] = {}  # type: ignore[misc]
-
-
-if __name__ == "__main__":
-    opal_df = create_opal_frame()
-    print("Initial ---")
-    print(opal_df)
-    # opal_df = pd.concat([opal_df, append_opal_frame(opal_data)])
-    # print("Append ---")
-    # print(opal_df)
+wesim_data: dict[str, dict] = {}  # type: ignore[type-arg]
diff --git a/datahub/main.py b/datahub/main.py
@@ -1,35 +1,33 @@
 """Script for running Datahub API."""
-from typing import Any, Hashable
-
 import h5py  # type: ignore
 from fastapi import FastAPI, HTTPException, UploadFile
-from pydantic import BaseModel
 
 from . import data as dt
 from . import log
 from .dsr import validate_dsr_data
-from .opal import OpalModel
+from .opal import OpalArrayData, OpalModel
 from .wesim import get_wesim
 
-app = FastAPI()
-
-
-class OpalArrayData(BaseModel):
-    """Class for defining required key values for Opal data as an array."""
-
-    array: list[float]
+app = FastAPI(
+    title="Gridlington DataHub",
+)
 
 
 @app.post("/opal")
 def create_opal_data(data: OpalModel | OpalArrayData) -> dict[str, str]:
     """POST method function for appending data to Opal Dataframe.
 
+    It takes the Opal data as a dictionary or list in JSON format and updates the data
+    held in the datahub and returns a success message.
+
+    \f
+
     Args:
         data: The raw opal data in either Dict or List format
 
     Returns:
         A Dict of the Opal data that has just been added to the Dataframe
-    """
+    """  # noqa: D301
     log.info("Recieved Opal data.")
 
     raw_data = data.dict()
@@ -48,29 +46,42 @@ def create_opal_data(data: OpalModel | OpalArrayData) -> dict[str, str]:
 
     log.info("Appending new data...")
     log.debug(f"Original Opal DataFrame:\n\n{dt.opal_df}")
-    dt.opal_df.opal.append(append_input)
+    try:
+        dt.opal_df.opal.append(append_input)
+    except AssertionError:
+        message = "Error with Opal data on server. Fails validation."
+        log.error(message)
+        raise HTTPException(status_code=400, detail=message)
+
     log.debug(f"Updated Opal DataFrame:\n\n{dt.opal_df}")
 
     return {"message": "Data submitted successfully."}
 
 
-# TODO: Fix return typing annotation
 @app.get("/opal")
-def get_opal_data(  # type: ignore[misc]
+def get_opal_data(
     start: int = 0, end: int | None = None
-) -> dict[Hashable, Any]:
+) -> dict[str, dict]:  # type: ignore[type-arg]
     """GET method function for getting Opal Dataframe as JSON.
 
+    It takes optional query parameters of:
+    - `start`: Starting index for exported Dataframe
+    - `end`: Last index that will be included in exported Dataframe
+
+    And returns a dictionary containing the Opal Dataframe in JSON format.
+
+    This can be converted back to a DataFrame using the following:
+    `pd.DataFrame(**data)`
+
+    \f
+
     Args:
         start: Starting index for exported Dataframe
         end: Last index that will be included in exported Dataframe
 
     Returns:
-        A Dict containing the Opal Dataframe in JSON format
-
-        This can be converted back to a Dataframe using the following:
-        pd.DataFrame(**data)
-    """
+        A Dict containing the Opal DataFrame in JSON format
+    """  # noqa: D301
     log.info("Sending Opal data...")
     log.debug(f"Query parameters:\n\nstart={start}\nend={end}\n")
     if isinstance(end, int) and end < start:
@@ -145,18 +156,31 @@ def upload_dsr(file: UploadFile) -> dict[str, str | None]:
 
 
 @app.get("/dsr")
-def get_dsr_data(  # type: ignore[misc]
+def get_dsr_data(
     start: int = 0, end: int | None = None
-) -> dict[Hashable, Any]:
+) -> dict[str, list]:  # type: ignore[type-arg]
     """GET method function for getting DSR data as JSON.
 
+    It takes optional query parameters of:
+    - `start`: Starting index for exported list
+    - `end`: Last index that will be included in exported list
+
+    And returns a dictionary containing the DSR data in JSON format.
+
+    This can be converted back to a DataFrame using the following:
+    `pd.DataFrame(**data)`
+
+    TODO: Ensure data is json serializable or returned in binary format
+
+    \f
+
     Args:
         start: Starting index for exported list
         end: Last index that will be included in exported list
 
     Returns:
         A Dict containing the DSR list
-    """
+    """  # noqa: D301
     log.info("Sending DSR data...")
     log.debug(f"Query parameters:\n\nstart={start}\nend={end}\n")
     if isinstance(end, int) and end < start:
@@ -173,12 +197,21 @@ def get_dsr_data(  # type: ignore[misc]
 
 
 @app.get("/wesim")
-def get_wesim_data() -> dict[Hashable, Any]:  # type: ignore[misc]
+def get_wesim_data() -> dict[str, dict[str, dict]]:  # type: ignore[type-arg]
     """GET method function for getting Wesim data as JSON.
 
+    It returns a dictionary with the WESIM data in JSON format containing the following
+    4 DataFrames:
+    - Capacity (6, 12)
+    - Regions (30, 10)
+    - Interconnector Capacity (4, 2)
+    - Interconnectors (25, 3)
+
+    \f
+
     Returns:
         A Dict containing the Wesim Dataframes
-    """
+    """  # noqa: D301
     log.info("Sending Wesim data...")
     if dt.wesim_data == {}:
         log.debug("Wesim data empty! Creating Wesim data...")

diff --git a/datahub/opal.py b/datahub/opal.py
@@ -1,10 +1,17 @@
 """This module defines the data structures for the Opal model."""
+import numpy as np
 import pandas as pd
 from pydantic import BaseModel, Field
 
 OPAL_START_DATE = "2035-01-22 00:00"
 
 
+class OpalArrayData(BaseModel):
+    """Class for defining required key values for Opal data as an array."""
+
+    array: list[float]
+
+
 class OpalModel(BaseModel):
     """Define required key values for Opal data."""
 
@@ -69,11 +76,24 @@ class OpalAccessor:
     """Pandas custom accessor for appending new data to Opal dataframe."""
 
     def __init__(self, pandas_obj: pd.DataFrame) -> None:
-        """Initialization of dataframe.
+        """Initialization of dataframe."""
+        self._validate(pandas_obj)
+        self._obj = pandas_obj
+
+    @staticmethod
+    def _validate(pandas_obj: pd.DataFrame) -> None:
+        """Validates the DataFrame to ensure it is usable by this accessor.
 
-        TODO: Add validation function.
+        Raises:
+            AssertionError if the Dataset fails the validation.
         """
-        self._obj = pandas_obj
+        assert set(pandas_obj.columns) == set(opal_headers.keys())
+        assert pd.api.types.is_datetime64_dtype(pandas_obj.get("Time", None))
+        assert all(
+            np.issubdtype(dtype, np.number)
+            for column, dtype in pandas_obj.dtypes.items()
+            if column != "Time"
+        )
 
     def append(self, data: dict[str, float] | list[float]) -> None:
         """Function to append new data to existing dataframe.

diff --git a/datahub/wesim.py b/datahub/wesim.py
@@ -1,6 +1,5 @@
 """This module defines the data structures for the WESIM model."""
 import os
-from typing import Any, Hashable
 
 import pandas as pd
 
@@ -114,7 +113,7 @@ def structure_capacity(df: pd.DataFrame) -> pd.DataFrame:
     return df.reset_index().replace({"Code": REGIONS_KEY})
 
 
-def get_wesim() -> dict[str, dict[Hashable, Any]]:  # type: ignore[misc]
+def get_wesim() -> dict[str, dict]:  # type: ignore[type-arg]
     """Gets the WESIM data from disk and puts it into pandas dataframes.
 
     Returns:
@@ -137,10 +136,3 @@ def get_wesim() -> dict[str, dict[Hashable, Any]]:  # type: ignore[misc]
         "Interconnector Capacity": interconnector_capacity.to_dict(orient="split"),
         "Interconnectors": interconnectors.to_dict(orient="split"),
     }
-
-
-if __name__ == "__main__":
-    for name, df in get_wesim().items():
-        print(name + ":")
-        print(df)
-        print("--------")
diff --git a/pyproject.toml b/pyproject.toml
@@ -1,19 +1,19 @@
 [project]
 name = "datahub"
-version = "0.0.1"
+version = "0.1.1"
 authors = [
     { name = "Adrian D'Alessandro", email = "[email protected]" },
     { name = "Callum West", email = "[email protected]" },
     { name = "Dan Davies", email = "[email protected]" },
-    { name = "Imperial College London RSE Team", email = "[email protected]" }
+    { name = "Imperial College London RSE Team", email = "[email protected]" },
 ]
 requires-python = ">=3.10"
 dependencies = [
     "pandas[excel]",
     "fastapi",
     "uvicorn",
     "python-multipart",
-    "h5py"
+    "h5py",
 ]
 
 [project.optional-dependencies]
@@ -28,7 +28,7 @@ dev = [
     "pytest-mypy",
     "pytest-mock",
     "pandas-stubs",
-    "httpx"
+    "httpx",
 ]
 
 [tool.setuptools.packages.find]

diff --git a/tests/conftest.py b/tests/conftest.py
@@ -69,3 +69,11 @@ def dsr_data_path(tmp_path):
 
     # Return the path to the file
     return file_path
+
+
+@pytest.fixture
+def wesim_input_data():
+    """The filepath for the test version of the wesim data."""
+    from datahub.wesim import read_wesim
+
+    return read_wesim("tests/data/wesim_example.xlsx")
diff --git a/tests/test_datahub.py b/tests/test_datahub.py