Understanding U.S. Electric System Operating Data #254
Replies: 5 comments 16 replies
-
Start investigating what type(s) of data we have. import json
import os
from zipfile import ZipFile
data_dir = os.path.join(os.path.expanduser("~"), "Workspace", "bulk_data")
mar12_data = os.path.join(data_dir, "2024-03-12", "EBA.zip")
aug14_data = os.path.join(data_dir, "2024-08-14", "EBA.zip")
aug19_data = os.path.join(data_dir, "2024-08-19", "EBA.zip")
def to_date(d_str):
datetime = None
try:
datetime = pd.to_datetime(
d_str,
utc=True,
format='%Y%m%dT%HZ'
)
except ValueError:
try:
datetime = pd.to_datetime(
d_str+":00",
format='%Y%m%dT%H%z'
)
except ValueError:
try:
datetime = pd.to_datetime(
d_str,
format='%Y%m%dT%H'
)
except ValueError:
try:
datetime = pd.to_datetime(d_str)
except:
print("Failed")
else:
print("Last ditch effort")
else:
print("Alt UTC")
else:
print("Local")
else:
print("UTC")
return datetime
def read_line(zf, ln=1):
"""Read a specific line from a zip file."""
data = {}
z = ZipFile(zf, 'r')
fn = None
fn = None
# Assumes only one file is zipped (e.g., EBA.txt)
for tmp in z.namelist():
fn = tmp
if fn:
with z.open(fn) as f:
for i in range(0, ln, 1):
line = f.readline()
data = json.loads(line)
return data
def read_zip(zf):
data = {
'ng': 0,
'idh': 0,
'd': 0,
'junk': []
}
z = ZipFile(zf, 'r')
fn = None
# Assumes only one file is zipped (e.g., EBA.txt)
for tmp in z.namelist():
fn = tmp
if fn:
with z.open(fn) as f:
for line in f:
try:
f_json = json.loads(line)
except:
print("failed on line '%s'" % line.decode('utf-8')[0:255])
else:
# Data of interest have a 'series_id'
if 'series_id' in f_json.keys():
# Filter for UTC hourly
if f_json.get('f', 'HL') == 'H':
series_id = f_json.get("series_id", None)
if series_id:
series_parts = series_id.split(".")
# Note that if len is 5, then it's
# fuel specific
if len(series_parts) == 4:
d = series_parts[2]
# net generation
if d == 'NG':
data['ng'] += 1
# Interchange (BA-to-BA)
elif d == 'ID':
data['idh'] += 1
# Demand
elif d == 'D':
data['d'] +=1
else:
data['junk'].append(series_id)
else:
data['junk'].append(series_id)
z.close()
return data
data_list = [mar12_data, aug14_data, aug19_data]
for data in data_list:
yfn = ":".join(data.split("/")[-2:])
print(yfn, os.path.isfile(data))
my_json = read_line(aug14_data, 1) # read any line from EBA.zip
d = read_zip(aug14_data) |
Beta Was this translation helpful? Give feedback.
-
BTW: Why are we using the same UTC time zone for across all balancing authorities? |
Beta Was this translation helpful? Give feedback.
-
Convert df_net_gen into a summation of BAs. file_name = "net_gen_2024-08-19.csv"
my_str = "BA_Code,Annual_NG\n"
for col in df_net_gen.columns:
my_str += "%s,%0.1f\n" % (col, df_net_gen[col].sum())
with open(file_name, 'w') as f:
f.write(my_str) |
Beta Was this translation helpful? Give feedback.
-
The API WayHere's a model for querying the EIA API for demand (D), net generation (NG) and balancing authority interchanges (ID): Updated for daily data requests; shortens the amount of data required to be transferred and shouldn't impact results, which are aggregated in the end. import requests
def decode_str(bstring):
"""Return a Python string.
Decodes a byte string.
Args:
bstring (bytes): An encoded byte string.
Returns:
(str): A Python string.
"""
if isinstance(bstring, bytes):
try:
bstring = bstring.decode("utf-8")
except:
bstring = ""
elif isinstance(bstring, str):
pass
else:
bstring = ""
return bstring
def get_request(url, url_try=0, max_tries=5):
"""Return a JSON data response from EIA's API.
Args:
url (str): The URL in proper syntax.
url_try (int): Internal counter for URL retries; default is 0
max_tries (int): When to stop retrying; default is 5
Returns:
(dict, int):
The JSON response and URL try count.
The JSON dictionary includes keys:
- 'response' (dict): with keys:
- 'total' (int): count of records in 'data'
- 'dateFormat' (str): For example, 'YYYY-MM-DD"T"HH24'
- 'frequency' (str): For example, 'hourly'
- 'description' (str): Data description
- 'data' (list): Dictionaries with keys:
- 'period'
- 'fromba': for ID only
- 'fromba-name': for ID only
- 'toba': for ID only
- 'toba-name': for ID only
- 'respondent': for D and NG only
- 'respondent-name': for D and NG only
- 'type': for D and NG only
- 'type-name': for D and NG only
- 'value'
- 'value-units'
- 'request' (dict): Parameters sent to the API
- 'apiVersion' (str): API version string (e.g., '2.1.7')
- 'ExcelAddInVersion' (str): AddIn version string (e.g., '2.1.0')
"""
r_dict = {}
url_try += 1
r = requests.get(url)
r_status = r.status_code
if r_status == 200:
r_content = r.content
try:
r_dict = r.json()
except:
# If at first you, fail...
r_content = decode_str(r_content)
r_dict = json.loads(r_content)
else:
if url_try < max_tries:
# Retry
r_dict, url_try = get_request(url, url_try)
else:
print("Requests failed!")
return (r_dict, url_try)
# Based on eia_utils.py from scenario-modeler
_baseurl = "https://api.eia.gov/v2/"
_sub_domain_h = "electricity/rto/region-data/data/"
_sub_domain_d = "electricity/rto/daily-region-data/data/"
_sub_domain2_h = "electricity/rto/interchange-data/data/"
_sub_domain2_d = "electricity/rto/daily-interchange-data/data/"
_api_key = "" # Use your own key!
_freq = "daily" # or 'local-hourly' or 'daily'
_region_id = "ISNE"
# NOTE: if using 'local-hourly' these times need to be in the timezone format!
# NOTE: the API time filter is based on day (not hour)!
_start = "2022-12-30"
_end = "2022-12-31"
# Correct URL based on frequency (daily vs hourly)
_sub_domain = _sub_domain_h
_sub_domain2 = _sub_domain2_h
if _freq == 'daily':
_sub_domain = _sub_domain_d
_sub_domain2 = _sub_domain2_d
# Net generation and demand:
url = (
f"{_baseurl}{_sub_domain}?api_key={_api_key}&out=json"
f"&frequency={_freq}"
f"&start={_start}"
f"&end={_end}"
f"&facets[respondent][]={_region_id}"
f"&facets[type][]=D"
f"&facets[type][]=NG"
"&data[]=value"
)
# Interchange
url2 = (
f"{_baseurl}{_sub_domain2}?api_key={_api_key}&out=json"
f"&frequency={_freq}"
f"&start={_start}"
f"&end={_end}"
f"&facets[fromba][]={_region_id}"
"&data[]=value"
)
my_ngd, url_tries = get_request(url)
my_id, url_tries = get_request(url2) |
Beta Was this translation helpful? Give feedback.
-
Great work! So what I summarize is that it's currently hard to trust the EBA.zip. The biggest concern with using the API is making sure we're getting the right timezone? Why do we need to pick timezones. From what I can tell from the above all data in the API is reported as UTC? |
Beta Was this translation helpful? Give feedback.
-
The question has come up as to whether to use the public API for EBA.zip (https://www.eia.gov/opendata/bulk/EBA.zip) or the EIA's API. The cause of this concern is that different vintages of EBA.zip have influenced the ElectricityLCI's baselines for the same year (e.g., 2022). For data that are about two years old, it is concerning that updates to EBA.zip should exhibit variance. So, let's look at this dataset.
I procured three versions of EBA.zip cached at different times (i.e., 12 March 2024, 14 August 2024, and 19 August 2024) to see what changes in 2022 data between them.
Beta Was this translation helpful? Give feedback.
All reactions