Skip to content

Commit

Permalink
Refactor capacity input model, add default threshold, update cap anal…
Browse files Browse the repository at this point in the history
…ysis logic

- Refactored the capacity input model in the `models.py` file to include a default value for the `threshold` field. The `threshold` field now has a default value of 2.5 and must be greater than or equal to 0.
- Updated the `main.py` file to import the `pandera.typing` module for type annotations.
- Modified the `find_peaks_with_surroundings` method in the `CapacityAnalysis` class to skip peaks that are within a certain time window of previously found peaks.
- Made other minor code improvements and optimizations.
  • Loading branch information
Molier committed Aug 13, 2024
1 parent b61c3fb commit 75cacb9
Show file tree
Hide file tree
Showing 3 changed files with 77 additions and 69 deletions.
124 changes: 60 additions & 64 deletions demo_capacity_analysis.ipynb

Large diffs are not rendered by default.

20 changes: 16 additions & 4 deletions openenergyid/capacity/main.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,9 @@
"""Main module for capacity analysis."""

import datetime as dt
import typing
import pandas as pd
import pandera.typing as pdt


class CapacityAnalysis:
Expand All @@ -21,7 +23,7 @@ class CapacityAnalysis:

def __init__(
self,
data: pd.Series,
data: pdt.Series,
threshold: float = 2.5,
window: str = "MS", # Default to month start
x_padding: int = 4,
Expand Down Expand Up @@ -50,11 +52,12 @@ def find_peaks(self) -> pd.Series:
"""
# Group by the specified window (default is month start)
grouped = self.data.groupby(pd.Grouper(freq=self.window))

# Find the index (timestamp) of the maximum value in each group
peak_indices = grouped.idxmax()

# Get the corresponding peak values
peaks = self.data.loc[peak_indices][self.data > self.threshold]

return peaks

def find_peaks_with_surroundings(
Expand All @@ -69,12 +72,20 @@ def find_peaks_with_surroundings(
Returns:
List[tuple[dt.datetime,float,pd.Series]]: A list of tuples containing peak time, peak value, and surrounding data.
"""
peaks = self.data.sort_values(ascending=False).head(num_peaks)
peaks = self.data.nlargest(num_peaks * 2)
peaks = peaks[peaks > self.threshold]
if peaks.empty:
return []

result = []
window_size = dt.timedelta(minutes=15 * (2 * self.x_padding + 1))

for peak_time, peak_value in peaks.items():
peak_time = typing.cast(pd.Timestamp, peak_time)

if any(abs(peak_time - prev_peak[0]) < window_size for prev_peak in result):
continue

start_time = peak_time - dt.timedelta(minutes=15 * self.x_padding)
end_time = peak_time + dt.timedelta(minutes=15 * (self.x_padding + 1))
surrounding_data = self.data[start_time:end_time]
Expand All @@ -86,5 +97,6 @@ def find_peaks_with_surroundings(
surrounding_data,
]
)

if len(result) == num_peaks:
break
return result
2 changes: 1 addition & 1 deletion openenergyid/capacity/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ class CapacityInput(BaseModel):

timezone: str = Field(alias="timeZone")
series: TimeSeries
threshold: float = Field(ge=0)
threshold: float = Field(default=2.5, ge=0)


class PeakDetail(BaseModel):
Expand Down

0 comments on commit 75cacb9

Please sign in to comment.