Skip to content

Commit

Permalink
new coal transportation from 2023 coal model
Browse files Browse the repository at this point in the history
addresses USEPA#243. note that distance data (kg*km) and modes of transport are significantly changed from 2016 using 2020 data
  • Loading branch information
dt-woods committed Dec 6, 2024
1 parent 0a6507c commit 062297b
Show file tree
Hide file tree
Showing 2 changed files with 252 additions and 70 deletions.
279 changes: 209 additions & 70 deletions electricitylci/coal_upstream.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,10 @@
from electricitylci.globals import paths
from electricitylci.globals import data_dir
from electricitylci.globals import STATE_ABBREV
from electricitylci.eia923_generation import eia923_download # +model_specs
from electricitylci.eia860_facilities import eia860_balancing_authority
from electricitylci.eia923_generation import eia923_download
from electricitylci.eia923_generation import eia923_generation_and_fuel
from electricitylci.model_config import model_specs
import electricitylci.PhysicalQuantities as pq
from electricitylci.utils import download
from electricitylci.utils import find_file_in_folder
Expand Down Expand Up @@ -46,7 +48,7 @@
For the 2023 coal model, see: https://www.osti.gov/biblio/2370100.
Last updated:
2024-10-25
2024-12-06
"""
__all__ = [
"COAL_MINING_LCI_VINTAGE",
Expand Down Expand Up @@ -114,7 +116,7 @@
'Avg Railroad Ton*Miles': 'Railroad',
'Avg Truck Ton*Miles': 'Truck',
}
'''dict : A map between coal model transport columns and their short names.'''
'''dict : A map from 2016 coal model transport columns to their short names.'''


##############################################################################
Expand Down Expand Up @@ -211,47 +213,114 @@ def _process_2023_coal_transport_lci(df, name):


def _make_2023_coal_transport_data(year):
# IN PROGRESS.
#
# The 2023 transport distances are by NERC region and coal basin.
# The goal is to get a data frame with facility IDs matched to their
# NERC region and coal basin.

# Get NERC regions for coal facilities
coal_reg = eia923_generation_and_fuel(year)
coal_reg = coal_reg[
['plant_id', 'nerc_region', 'state', 'reported_fuel_type_code']].copy()
# Keep only coal facilities
coal_filt = coal_reg['reported_fuel_type_code'].isin(coal_type_codes.keys())
coal_reg = coal_reg.loc[coal_filt, :]
# Lose facilities without a NERC region
coal_reg = coal_reg.dropna(subset='nerc_region')

# Get the basin map.
basin_map = pd.read_csv(os.path.join(data_dir, 'eia_to_netl_basin.csv'))
basin_map = basin_map.set_index('eia_basin')
basin_map = basin_map['netl_basin']
basin_map = basin_map.to_dict()

# Get the state to basin map.
state_map = pd.read_csv(os.path.join(data_dir, 'coal_state_to_basin.csv'))
state_map = state_map.set_index('state')
state_map = state_map['basin1']
state_map = state_map.to_dict()

coal_reg['basin'] = coal_reg['state'].map(state_map)
coal_reg.dropna(subset='basin')

# TODO;

# The 2023 coal baseline model's 'Transportation' worksheet was
# saved to CSV. All distances are in miles.
# Columns include 'Basin', 'NERC Region', 'Belt', 'Truck', 'Barge',
# 'Ocean Vessel', 'Railroad' (renamed from 'Train'), and 'Total (mi)'.
coal_distance = pd.read_csv(
os.path.join(
data_dir, "coal", "2023", "coal_transportation_distances.csv")
"""Generate essentially the same the data as the CSV file from the 2016
baseline, updated with transportation data from the 2023 coal model,
where gaps are filled using the U.S. average.
Transportation data units are kg*km
(kilograms of coal x kilometers of distance transported).
Parameters
----------
year : int
The year used for facility data from EIA 860.
Returns
-------
pandas.DataFrame
A data frame with plant IDs, coal basins, NERC regions, and kg coal*km
coal transported data for: Belt, Truck, Barge, Ocean Vessel, and Train.
Raises
------
OSError
If the data file is not found.
"""
# Generate the coal upstream map, which labels each facility with its
# coal source code: a three-part combo of coal basin, coal type, and
# mine type. We only want the coal basin data from this.
coal_map_df = generate_upstream_coal_map(year)
coal_map_df["Basin"] = coal_map_df["coal_source_code"].str.split("-").str[0]

# Now, let's find the NERC region for each facility.
ba_region_df = eia860_balancing_authority(year, regional_aggregation=None)

# Let's create a dictionary that maps facilities to their NERC region,
# fixing the plant ID from string to integer along the way.
# We don't need the heat input or the old coal source code, so let's drop
# them.
region_dict = dict(
zip(ba_region_df["Plant Id"], ba_region_df["NERC Region"])
)
region_dict = {int(k): v for k, v in region_dict.items()}
coal_map_df['NERC Region'] = coal_map_df['plant_id'].map(region_dict)
coal_map_df = coal_map_df.drop(columns=['coal_source_code', 'heat_input'])

# Read the 2023 coal model transportation data
# Source: https://github.com/USEPA/ElectricityLCI/discussions/273
coal_dir = os.path.join(data_dir, "coal", "2023")
coal_file = os.path.join(coal_dir, "coal_transport_dist.csv")
if not os.path.isfile(coal_file):
raise OSError(
"Failed to find 2023 coal transportation "
"data file, '%s'" % coal_file)
coal_trans_df = pd.read_csv(coal_file)

# NOTE: the 2023 coal model uses a slightly different naming scheme
# for WNW coal basin, so let's fix it.
basin_codes_new = {k:v for k, v in basin_codes.items()}
del basin_codes_new["West/Northwest"]
basin_codes_new["West/North West"] = "WNW"

# Now, map the basin names to their basin codes.
# NOTE this works for all basins except for "U.S. Average"
coal_trans_df["Basin"] = coal_trans_df["Basin"].map(basin_codes_new)

# Some facilities may not map to our coal model, so let's save the
# U.S. average and use it for them.
# TODO: Consider saving the weighted averages for regions as well!
us_ave_coal_trans = coal_trans_df.loc[coal_trans_df['Basin'].isna(), :]
us_ave_coal_trans = us_ave_coal_trans.reset_index(drop=True)

# Drop the NaNs from our coal transportation data frame
# (i.e., the U.S. average that we saved separately).
coal_trans_df = coal_trans_df.dropna().copy()

# Put it all together by merging our transportation data and the
# coal data using the NERC region and coal basin codes as the
# common attributes.
final_df = pd.merge(
left=coal_map_df,
right=coal_trans_df,
on=['Basin', 'NERC Region'],
how='left',
)

# there are facilities not mapped to transportation; let's give them the
# U.S. average values
# TODO: consider using weighted-average regional values.
final_df = final_df.fillna({
'Belt': us_ave_coal_trans.loc[0, 'Belt'],
'Truck': us_ave_coal_trans.loc[0, 'Truck'],
'Barge': us_ave_coal_trans.loc[0, 'Barge'],
'Ocean Vessel': us_ave_coal_trans.loc[0, 'Ocean Vessel'],
'Train': us_ave_coal_trans.loc[0, 'Train'],
})

# The transportation data from the coal model are in miles.
# Let's convert miles to kilometers, and calculate the kg*km values by
# multiplying the quantity (kg of coal) by transportation distance
# (miles converted to km).
mi_to_km = pq.convert(1, 'mi', 'km')

trans_cols = ["Belt", "Truck", "Barge", "Ocean Vessel", "Train"]
final_df[trans_cols] = final_df[trans_cols].mul(mi_to_km)
final_df[trans_cols] = final_df[trans_cols].mul(
final_df["quantity"],
axis=0
)

return final_df


def _make_ave_transport(trans_df, lci_df):
Expand Down Expand Up @@ -969,6 +1038,100 @@ def get_2023_ave_coal_transport(trans_df, input_df):
return trans_lci


def get_coal_transportation():
"""Create the coal transport data frame in kilograms of coal by kilometers
of distance transported for each facility by transportation type
(e.g. 'Barge' or 'Truck').
Returns
-------
pandas.DataFrame
A three-column data frame of 'plant_id', 'coal_source_code'
(i.e., tranportation type like 'Truck' or 'Barge'), and 'quantity'
(i.e., transportation of kilograms of coal by kilometers of distance).
The 2020 version has five types of transportation (i.e., 'Barge', 'Lake
Vessel', 'Ocean Vessel', 'Railroad', and 'Truck).
The 2023 version has five types of transportation (i.e., 'Barge',
'Belt', 'Ocean Vessel', 'Railroad', and 'Truck').
Raises
------
ValueError
If the global parameter year is not correctly assigned.
Notes
-----
Method depends on the global parameter, `COAL_TRANSPORT_LCI_VINTAGE`.
For 2020, the 2016 baseline's ABB data file is referenced (i.e.,
'2016_Coal_Trans_By_Plant_ABB_Data.csv').
For 2023, the 2023 coal baseline data file is referenced
(i.e., 'coal_transport_dist.csv' in the coal/2023 folder of data).
"""
# IN PROGRESS
if COAL_TRANSPORT_LCI_VINTAGE == 2020:
# The 2016 transportation data by facility.
logging.info("Using 2016 coal baseline transportation distance data.")
coal_transportation = pd.read_csv(
os.path.join(data_dir, '2016_Coal_Trans_By_Plant_ABB_Data.csv')
)
# Make rows facility IDs with Transport column (modes) and
# value (ton*mi)
coal_transportation = coal_transportation.melt(
'Plant Government ID',
var_name='Transport'
)
# NOTE: the 2016 transportation functional unit is ton*miles;
# convert ton*mi to kg*km
coal_transportation["value"] = (
coal_transportation["value"]
* pq.convert(1, "ton", "kg")
* pq.convert(1, "mi", "km")
)
# Rename transport columns
coal_transportation = coal_transportation.rename(columns={
'Plant Government ID': 'plant_id',
'Transport': 'coal_source_code',
'value': 'quantity',
})
# Correct coal_transportation codes
coal_transportation['coal_source_code'] = coal_transportation.apply(
_transport_code, axis=1)
elif COAL_TRANSPORT_LCI_VINTAGE == 2023:
logging.info("Using 2023 coal model transportation distance data")
coal_transportation = _make_2023_coal_transport_data(
model_specs.eia_gen_year)

# NOTE: the 2016 baseline uses 'Railroad' in place of 'Train'
coal_transportation = coal_transportation.rename(
columns={'Train': 'Railroad'}
)

# The data frame needs melted to match the 2016 data frame, which has
# three columns: plant_id, coal_source_code (i.e., transportation type),
# and quantity (i.e., the kg*km values).
coal_transportation = coal_transportation.melt(
id_vars=("plant_id",),
value_vars=('Belt', 'Truck', 'Barge', 'Ocean Vessel', 'Railroad')
)

# To allow facilities receiving coal from more than one region/basin,
# group by facility and sum by transportation type.
coal_transportation = coal_transportation.groupby(by=['plant_id', 'variable']).agg({'value': 'sum'}).reset_index(drop=False)

# Rename to match the 2016 data frame
coal_transportation = coal_transportation.rename(
columns={'variable': 'coal_source_code', 'value': 'quantity'}
)
else:
raise ValueError(
"The coal transport year, %d, "
"is unknown!" % COAL_TRANSPORT_LCI_VINTAGE)

return coal_transportation


def read_coal_mining():
"""Read coal mining (extraction and processing) life cycle inventory.
Expand Down Expand Up @@ -1071,33 +1234,8 @@ def read_coal_transportation():
- 'input', whether flow is resource (true) or emission (false)
"""
# Presently, we only have the 2016 transportation data by facility.
logging.info("Using 2016 coal plant transportation data.")
coal_transportation = pd.read_csv(
os.path.join(data_dir, '2016_Coal_Trans_By_Plant_ABB_Data.csv')
)
# Make rows facility IDs with Transport column (modes) and
# value (ton*mi)
coal_transportation = coal_transportation.melt(
'Plant Government ID',
var_name='Transport'
)
# NOTE: the 2016 transportation functional unit is ton*miles;
# convert ton*mi to kg*km
coal_transportation["value"] = (
coal_transportation["value"]
* pq.convert(1, "ton", "kg")
* pq.convert(1, "mi", "km")
)
# Rename transport columns
coal_transportation = coal_transportation.rename(columns={
'Plant Government ID': 'plant_id',
'Transport': 'coal_source_code',
'value': 'quantity',
})
# Correct coal_transportation codes
coal_transportation['coal_source_code'] = coal_transportation.apply(
_transport_code, axis=1)
# Get the appropriate coal transportation distance data:
coal_transportation = get_coal_transportation()

# FORK IN THE ROAD
if COAL_TRANSPORT_LCI_VINTAGE == 2023:
Expand Down Expand Up @@ -1467,6 +1605,7 @@ def wtd_mean(pdser, total_db):
invent_plants = coal_mining_inventory_df['plant_id'].unique()

# Check for any inventory plants that don't have transportation LCI.
# NOTE: this should not occur unless the LCI vintage years are mis-matched.
missing_plants = [
int(x) for x in invent_plants if x not in trans_plants]
num_miss_plants = len(missing_plants)
Expand Down
43 changes: 43 additions & 0 deletions electricitylci/data/coal/2023/coal_transport_dist.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
Basin,NERC Region,Belt,Truck,Barge,Ocean Vessel,Train
Central Appalachia,FRCC,0,0,0,0,1107.977752
Central Appalachia,MRO,0,0,0,307,560.0852524
Central Appalachia,NPCC,0,0,0,0,885.1087031
Central Appalachia,RFC,0,5.746550107,104.3861809,2.733886858,82.4622286
Central Appalachia,SERC,0,1.212929678,7.506436059,0,432.0265178
Central Appalachia,Weighted average,0,2.481161639,35.04156235,0.925235054,345.1554444
Central Interior,SPP,0,18.17334589,0,0,18.44396159
Gulf Lignite,ERCOT,0.636293625,5.68677811,0,0,3.57183154
Gulf Lignite,SERC,0,0.5,0,0,0
Gulf Lignite,SPP,4.451175234,0.528544448,0,0,0
Gulf Lignite,Weighted average,1.220014482,4.465128744,0,0,2.727310722
Illinois Basin,FRCC,0,7.329290563,365.4292272,1.516801349,621.4660044
Illinois Basin,MRO,0,0,0,0,343.2172334
Illinois Basin,RFC,0,10.21258341,135.6002717,0.115027265,48.01586662
Illinois Basin,SERC,0.384204428,6.31479746,92.99234444,0,207.5130215
Illinois Basin,SPP,0,14.21745361,878.4490668,0,27.80648186
Illinois Basin,Weighted average,0.196243583,7.893121021,138.4167554,0.194932872,188.1140205
Lignite,MRO,3.247430368,0.749585777,0,0,7.341607743
Northern Appalachia,FRCC,0,0,0,0,1387.352562
Northern Appalachia,MRO,0,0,0,165,695
Northern Appalachia,NPCC,0,1.830338124,0,0,553.1587493
Northern Appalachia,RFC,0.166831582,7.542097031,75.14958068,1.691337393,78.66021175
Northern Appalachia,SERC,0,6.086323608,22.51291511,0,550.1558208
Northern Appalachia,Weighted average,0.139705124,7.232441815,66.26487988,1.436492556,160.4347721
Powder River Basin,ERCOT,0,0,0,0,1520.471575
Powder River Basin,MRO,0,0,8.904027538,0.06946903,876.263257
Powder River Basin,RFC,0,0,0,211.7130418,1182.554026
Powder River Basin,SERC,0,0,21.8143319,0,1362.973426
Powder River Basin,SPP,0,0,0,0,927.4462063
Powder River Basin,WECC,1.283299364,0,0,0,398.1586126
Powder River Basin,Weighted average,0.185069808,0,7.418959303,31.03121824,1059.925682
Rocky Mountain,FRCC,0,0,0,436.894845,1992.837173
Rocky Mountain,MRO,0,0,0,343.8040108,1296.735856
Rocky Mountain,RFC,0,0,0.113565012,0,1787.926627
Rocky Mountain,SERC,0,0,0,0,2029
Rocky Mountain,WECC,0.87258711,13.07121003,0,0,83.3385493
Rocky Mountain,Weighted average,0.846069985,12.6739879,0.001283021,5.150114206,138.1862896
Southern Appalachia,SERC,0,32.62822162,28.1533781,0,1.349550346
West/North West,ASCC,0,4,0,0,0
West/North West,WECC,0,4,0,0,0
West/North West,Weighted average,0,4,0,0,0
U.S. Average,U.S. Average,0.39809141,3.778318915,35.09228677,42.13749849,577.2729147

0 comments on commit 062297b

Please sign in to comment.