Skip to content

Commit

Permalink
ENTSO-E downtime messages for nuclear
Browse files Browse the repository at this point in the history
  • Loading branch information
vividfog committed Mar 7, 2024
1 parent 570aa40 commit d0b8861
Show file tree
Hide file tree
Showing 4 changed files with 162 additions and 83 deletions.
20 changes: 14 additions & 6 deletions nordpool_predict_fi.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,10 @@
from util.fingrid import update_nuclear
from util.llm import narrate_prediction
from datetime import datetime, timedelta
from util.entso_e import entso_e_nuclear
from util.sql import db_update, db_query_all
from util.github import push_updates_to_github
from util.dataframes import update_df_from_df
from util.fmi import update_wind_speed, update_temperature
from util.models import write_model_stats, stats, list_models
from util.eval import create_prediction_snapshot, rotate_snapshots
Expand Down Expand Up @@ -236,19 +238,25 @@ def get_mandatory_env_variable(name):
df.rename(columns={'index': 'Timestamp'}, inplace=True)

# Get the latest FMI wind speed values for the data frame, past and future
# NOTE: To save on API calls, we don't fill in weather history beyond 7 days even if asked
# NOTE: To save on API calls, this won't backfill history beyond 7 days even if asked
df = update_wind_speed(df)

# Get the latest FMI temperature values for the data frame, past and future
# NOTE: To save on API calls, we don't fill in weather history beyond 7 days even if asked
# NOTE: To save on API calls, this won't backfill history beyond 7 days even if asked
df = update_temperature(df)

# Get the latest nuclear power data for the data frame, and infer the future
# NOTE: To save on API calls, we don't fill in weather history beyond 7 days even if asked
# Get the latest nuclear power data for the data frame, and infer the future from last known value
# NOTE: To save on API calls, this won't backfill history beyond 7 days even if asked
df = update_nuclear(df, fingrid_api_key=fingrid_api_key)


# Fetch future nuclear downtime information from ENTSO-E unavailability data, h/t github:@pkautio
df_entso_e = entso_e_nuclear(entso_e_api_key)

# Refresh the previously inferred nuclear power numbers with the ENTSO-E data
df = update_df_from_df(df, df_entso_e)

# Get the latest spot prices for the data frame, past and future if any
# NOTE: To save on API calls, we don't fill in weather history beyond 7 days even if asked
# NOTE: To save on API calls, this won't backfill history beyond 7 days even if asked
df = update_spot(df)

# TODO: Decide if including wind power capacity is necessary; it seems to worsen the MSE and R2
Expand Down
36 changes: 36 additions & 0 deletions util/dataframes.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
"""
dataframes.py
This module provides functions for manipulating and updating pandas DataFrames.
Functions:
- update_df_from_df(df1, df2): Updates `df1` with values from `df2` based on matching 'timestamp' and common columns.
"""

import pandas as pd

def update_df_from_df(df1, df2):
# Ensure df1's 'Timestamp' column is in the correct format and UTC
df1['Timestamp'] = pd.to_datetime(df1['Timestamp'], utc=True)

# Dynamically identify the timestamp column in df2, we've seen both uppercase and lowercase
timestamp_col_df2 = 'timestamp' if 'timestamp' in df2.columns else 'Timestamp'

# Convert df2's timestamp column to datetime and to UTC
df2[timestamp_col_df2] = pd.to_datetime(df2[timestamp_col_df2], utc=True)

# Standardize column names for the operation
df2.rename(columns={timestamp_col_df2: 'Timestamp'}, inplace=True)

# Find the common column to update, excluding 'Timestamp'
common_cols = set(df1.columns).intersection(set(df2.columns)) - {'Timestamp'}
if not common_cols:
print("No common columns to update.")
return df1
common_col = common_cols.pop()

# Iterate through DF2 and update DF1 based on matching Timestamps and the common column
for index, row in df2.iterrows():
df1.loc[df1['Timestamp'] == row['Timestamp'], common_col] = row[common_col]

return df1
112 changes: 112 additions & 0 deletions util/entso_e.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,112 @@
import os
from dotenv import load_dotenv
import pandas as pd
from datetime import datetime, timedelta
from entsoe import EntsoePandasClient
import pandas as pd

"""
Fetch nuclear production forecast data for Finland for next 5 days.
Forecast is based on known maximum production based on all 5 nuclear plants (OL1, OL2, OL3 and Loviisa 1 and 2), minus the planned maintenance reduction of available capacity. Forecast is based on market messages available from ENTSO-E
Parameters:
- API Key for ENTSO-E access
Returns:
- pd.DataFrame: A pandas DataFrame with a row for each hour of the specified date range and column for forecasted Nuclear production
"""

def entso_e_nuclear(entso_e_api_key):

print("* ENTSO-E: Fetching nuclear downtime messages...")
client = EntsoePandasClient(api_key=entso_e_api_key)

# Total nuclear capacity in Finland is 4372 MW (2 x 890 MW, 1 x 1600 MW and 2 x 496 MW)
total_capacity = 4372 # TODO: Get from .env.local instead

start_time = datetime.utcnow().replace(minute=0, second=0, microsecond=0) + timedelta(hours=1) # Round to the next full hour
end_time = start_time + timedelta(days=6) # 5+1 to fill the data frame (TODO: these should match without this hack)

start = pd.Timestamp(pd.to_datetime(start_time), tz='Europe/Helsinki')
end = pd.Timestamp(pd.to_datetime(end_time), tz='Europe/Helsinki')

country_code = 'FI' # Finland

# "Unavaibility of generation units" from Entso-E includes Olkiluoto units
unavailable_generation = client.query_unavailability_of_generation_units(country_code, start=start, end=end, docstatus=None, periodstartupdate=None, periodendupdate=None)
unavailable_nuclear1 = unavailable_generation[unavailable_generation['plant_type'] == 'Nuclear']
unavailable_nuclear1 = unavailable_nuclear1[unavailable_nuclear1['businesstype'] == 'Planned maintenance']
unavailable_nuclear1 = unavailable_nuclear1[unavailable_nuclear1['resolution'] == 'PT60M']
unavailable_nuclear1 = unavailable_nuclear1[['start', 'end','avail_qty','nominal_power', 'production_resource_name']]

# "Unavailability of production plants" from Entso-E includes Loviisa units
unavailable_production = client.query_unavailability_of_production_units(country_code, start, end, docstatus=None, periodstartupdate=None, periodendupdate=None)
unavailable_nuclear2 = unavailable_production[unavailable_production['plant_type'] == 'Nuclear']
unavailable_nuclear2 = unavailable_nuclear2[unavailable_nuclear2['businesstype'] == 'Planned maintenance']
unavailable_nuclear2 = unavailable_nuclear2[unavailable_nuclear2['resolution'] == 'PT60M']
unavailable_nuclear2 = unavailable_nuclear2[['start', 'end','avail_qty','nominal_power', 'production_resource_name']]

# Combine datasets
unavailable_nuclear = pd.concat([unavailable_nuclear1, unavailable_nuclear2], axis=0)
unavailable_nuclear["nominal_power"] = pd.to_numeric(unavailable_nuclear["nominal_power"])
unavailable_nuclear["avail_qty"] = pd.to_numeric(unavailable_nuclear["avail_qty"])

# Drop "created_doc_time" column
unavailable_nuclear = unavailable_nuclear.reset_index(drop=True)

# Calculate unavailable capacity for each unavailability entry
unavailable_nuclear_capacity = unavailable_nuclear.assign(unavailable_qty = (unavailable_nuclear['nominal_power'] - unavailable_nuclear['avail_qty']))
unavailable_nuclear_capacity['start'] = pd.to_datetime(unavailable_nuclear_capacity['start'])
unavailable_nuclear_capacity['end'] = pd.to_datetime(unavailable_nuclear_capacity['end'])

# Initialize forecast dataset with baseline capacity when all capacity is available
date_range = pd.date_range(start=start, end=end, freq='h')
nuclear_forecast = pd.DataFrame(index=date_range, columns=["available_capacity"])
nuclear_forecast['available_capacity'] = total_capacity

# Adjust available capacity based on unavailability entries
for _, row in unavailable_nuclear_capacity.iterrows():
mask = (nuclear_forecast.index >= row['start']) & (nuclear_forecast.index < row['end'])
nuclear_forecast.loc[mask, 'available_capacity'] -= row['unavailable_qty']

nuclear_forecast.reset_index(inplace=True)

# Let's go back to the host program naming conventions
nuclear_forecast.rename(columns={'index': 'timestamp', 'available_capacity': 'NuclearPowerMW'}, inplace=True)
nuclear_forecast.rename(columns={'index': 'Timestamp'}, inplace=True)

avg_capacity = round(nuclear_forecast['NuclearPowerMW'].mean())
max_capacity = round(nuclear_forecast['NuclearPowerMW'].max())
min_capacity = round(nuclear_forecast['NuclearPowerMW'].min())

print(f"→ ENTSO-E: Avg: {avg_capacity}, max: {max_capacity}, min: {min_capacity} MW")

return nuclear_forecast

def main():

# Test the function
# Run from the root folder of the project: python util/nuclear_forecast.py

# Load environment variables from .env.local file
load_dotenv(dotenv_path='.env.local')

# Retrieve the ENTSO_E_API_KEY from environment variables
entso_e_api_key = os.getenv('ENTSO_E_API_KEY')

if not entso_e_api_key:
print("ENTSO_E_API_KEY not found in .env.local file.")
return

try:
# Fetch nuclear power forecast data
nuclear_forecast_data = entso_e_nuclear(entso_e_api_key)

# Display the fetched data
print(nuclear_forecast_data)
except Exception as e:
# Handle any errors that occur during the data fetching process
print(f"An error occurred: {e}")

if __name__ == "__main__":
main()
77 changes: 0 additions & 77 deletions util/nuclear_forecast.py

This file was deleted.

0 comments on commit d0b8861

Please sign in to comment.