Skip to content

Commit

Permalink
feat: add Granularity support for base load calculations, feat: clean…
Browse files Browse the repository at this point in the history
…ed up base_load demo to be neat and packaged. chore: added some performance testing.
  • Loading branch information
Molier committed Dec 9, 2024
1 parent fc1874b commit 5cbb5fc
Show file tree
Hide file tree
Showing 8 changed files with 276 additions and 1,924 deletions.
1,974 changes: 70 additions & 1,904 deletions demo_baseLoad.ipynb

Large diffs are not rendered by default.

2 changes: 2 additions & 0 deletions openenergyid/baseload/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,11 +5,13 @@
EnergySchema,
load_data,
calculate_base_load,
Granularity,
)

__all__ = [
"BaseLoadMetrics",
"EnergySchema",
"load_data",
"calculate_base_load",
"Granularity",
]
42 changes: 23 additions & 19 deletions openenergyid/baseload/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,26 +10,27 @@
load_data(path: str) -> pl.LazyFrame:
Loads and validates energy usage data from an NDJSON file.
calculate_base_load(lf: pl.LazyFrame, timeframe: TimeFrame = TimeFrame.DAILY) -> pl.DataFrame:
Calculates base load metrics from energy usage data aggregated by the specified timeframe.
calculate_base_load(lf: pl.LazyFrame, granularity: Granularity = Granularity.DAILY) -> pl.DataFrame:
Calculates base load metrics from energy usage data aggregated by the specified granularity.
main(file_path: str, timeframe: TimeFrame) -> pl.DataFrame:
Processes energy data and returns base load metrics for the specified timeframe.
main(file_path: str, granularity: Granularity) -> pl.DataFrame:
Processes energy data and returns base load metrics for the specified granularity.
"""

from enum import Enum
from typing import NamedTuple
import polars as pl
import pandera.polars as pa
from openenergyid.enums import Granularity
## VERY important to use pandera.polars instead of pandera to avoid pandas errors


class TimeFrame(Enum):
HOURLY = "1h"
DAILY = "1d"
WEEKLY = "1w"
MONTHLY = "1mo"
YEARLY = "1y"
# Map Granularity to polars format
GRANULARITY_TO_POLARS = {
Granularity.PT15M: "15m",
Granularity.PT1H: "1h",
Granularity.P1D: "1d",
Granularity.P1M: "1mo",
Granularity.P1Y: "1y",
}


class BaseLoadMetrics(NamedTuple):
Expand Down Expand Up @@ -74,12 +75,15 @@ def load_data(path: str) -> pl.LazyFrame:
return pl.LazyFrame(validated_df)


def calculate_base_load(lf: pl.LazyFrame, timeframe: TimeFrame = TimeFrame.DAILY) -> pl.DataFrame:
"""Calculate base load metrics aggregated by specified timeframe"""
def calculate_base_load(
lf: pl.LazyFrame, granularity: Granularity = Granularity.P1D
) -> pl.DataFrame:
"""Calculate base load metrics aggregated by specified granularity"""
polars_interval = GRANULARITY_TO_POLARS[granularity]
return (
lf.filter(pl.col("total") >= 0)
.sort("timestamp")
.group_by_dynamic("timestamp", every=timeframe.value)
.group_by_dynamic("timestamp", every=polars_interval)
.agg(
[
pl.col("total").sum().alias("total_usage"),
Expand All @@ -98,12 +102,12 @@ def calculate_base_load(lf: pl.LazyFrame, timeframe: TimeFrame = TimeFrame.DAILY
)


def main(file_path: str, timeframe: TimeFrame) -> pl.DataFrame:
"""Process energy data and return base load metrics for specified timeframe"""
return calculate_base_load(load_data(file_path), timeframe)
def main(file_path: str, granularity: Granularity) -> pl.DataFrame:
"""Process energy data and return base load metrics for specified granularity"""
return calculate_base_load(load_data(file_path), granularity)


# Example usage:
if __name__ == "__main__":
results = main("data/energy_use.ndjson", TimeFrame.MONTHLY)
results = main("data/PP/energy_use_test1.ndjson", Granularity.P1M)
print(results)
164 changes: 164 additions & 0 deletions performance_testing.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,164 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [],
"source": [
"import polars as pl\n",
"import json"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# General Performance Testing\n",
"\n",
"In here we test and try some general things for the codebase.\n",
"Fe. the polars efficiency, we try to document and reference relevant docs where needed to keep it peer reviewed."
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Some speedtests regarding polars reading in of files/frames/\n",
"\n",
"references:\n",
"* [pandasVSpolars speed test, apr 2023](https://medium.com/cuenex/pandas-2-0-vs-polars-the-ultimate-battle-a378eb75d6d1)\n",
"* [input/output in polars](https://docs.pola.rs/api/python/stable/reference/io.html)\n"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## test 1 reading in a newline delimited json to check efficiency\n"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"9.57 μs ± 218 ns per loop (mean ± std. dev. of 7 runs, 100,000 loops each)\n"
]
}
],
"source": [
"%%timeit\n",
"energy_use_df = pl.scan_ndjson(\n",
" \"data/PP/energy_use_test1.ndjson\",\n",
" schema={\"timestamp\": pl.Datetime(time_zone=\"Europe/Brussels\"), \"total\": pl.Float64},\n",
")"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div><style>\n",
".dataframe > thead > tr,\n",
".dataframe > tbody > tr {\n",
" text-align: right;\n",
" white-space: pre-wrap;\n",
"}\n",
"</style>\n",
"<small>shape: (5, 2)</small><table border=\"1\" class=\"dataframe\"><thead><tr><th>timestamp</th><th>total</th></tr><tr><td>datetime[μs, Europe/Brussels]</td><td>f64</td></tr></thead><tbody><tr><td>2023-01-01 00:00:00 CET</td><td>0.025</td></tr><tr><td>2023-01-01 00:15:00 CET</td><td>0.017</td></tr><tr><td>2023-01-01 00:30:00 CET</td><td>0.023</td></tr><tr><td>2023-01-01 00:45:00 CET</td><td>0.024</td></tr><tr><td>2023-01-01 01:00:00 CET</td><td>0.023</td></tr></tbody></table></div>"
],
"text/plain": [
"shape: (5, 2)\n",
"┌───────────────────────────────┬───────┐\n",
"│ timestamp ┆ total │\n",
"│ --- ┆ --- │\n",
"│ datetime[μs, Europe/Brussels] ┆ f64 │\n",
"╞═══════════════════════════════╪═══════╡\n",
"│ 2023-01-01 00:00:00 CET ┆ 0.025 │\n",
"│ 2023-01-01 00:15:00 CET ┆ 0.017 │\n",
"│ 2023-01-01 00:30:00 CET ┆ 0.023 │\n",
"│ 2023-01-01 00:45:00 CET ┆ 0.024 │\n",
"│ 2023-01-01 01:00:00 CET ┆ 0.023 │\n",
"└───────────────────────────────┴───────┘"
]
},
"execution_count": 9,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"energy_use_lf_1 = pl.scan_ndjson(\n",
" \"data/PP/energy_use_test1.ndjson\",\n",
" schema={\"timestamp\": pl.Datetime(time_zone=\"Europe/Brussels\"), \"total\": pl.Float64},\n",
")\n",
"energy_use_lf_1.collect().head()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Test 2, reading in the \"smaller version of the json\" and tranforming it into polars."
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"34.5 ms ± 1.31 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)\n"
]
}
],
"source": [
"%%timeit\n",
"# Read the JSON file\n",
"with open(\"data/PP/energy_use.json\", \"r\") as file:\n",
" data = json.load(file)\n",
"\n",
"# Convert the data into a list of dictionaries\n",
"data_list = [{\"timestamp\": int(k), \"value\": v} for k, v in data.items()]\n",
"\n",
"# Create a DataFrame from the list\n",
"df = pl.DataFrame(\n",
" data_list, schema={\"timestamp\": pl.Datetime(time_zone=\"Europe/Brussels\"), \"value\": pl.Float64}\n",
")"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "openenergyid-Nm3FK_LY-py3.11",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.9"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
17 changes: 16 additions & 1 deletion poetry.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -69,3 +69,4 @@ energyid = "^0.0.17"
snakeviz = "^2.2.0"
plotly = "^5.24.1"
vegafusion = {version = ">=1.5.0", extras = ["embed"]}
vl-convert-python = "^1.7.0"
Binary file added vis/KDE of EnUsage.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added vis/heatmap.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.

0 comments on commit 5cbb5fc

Please sign in to comment.