From 062297b08a4a2b6b32e138dc96f1924b60d0b311 Mon Sep 17 00:00:00 2001 From: dt-woods Date: Fri, 6 Dec 2024 15:50:36 -0500 Subject: [PATCH] new coal transportation from 2023 coal model addresses #243. note that distance data (kg*km) and modes of transport are significantly changed from 2016 using 2020 data --- electricitylci/coal_upstream.py | 279 +++++++++++++----- .../data/coal/2023/coal_transport_dist.csv | 43 +++ 2 files changed, 252 insertions(+), 70 deletions(-) create mode 100644 electricitylci/data/coal/2023/coal_transport_dist.csv diff --git a/electricitylci/coal_upstream.py b/electricitylci/coal_upstream.py index 43d2344..581677d 100644 --- a/electricitylci/coal_upstream.py +++ b/electricitylci/coal_upstream.py @@ -17,8 +17,10 @@ from electricitylci.globals import paths from electricitylci.globals import data_dir from electricitylci.globals import STATE_ABBREV -from electricitylci.eia923_generation import eia923_download # +model_specs +from electricitylci.eia860_facilities import eia860_balancing_authority +from electricitylci.eia923_generation import eia923_download from electricitylci.eia923_generation import eia923_generation_and_fuel +from electricitylci.model_config import model_specs import electricitylci.PhysicalQuantities as pq from electricitylci.utils import download from electricitylci.utils import find_file_in_folder @@ -46,7 +48,7 @@ For the 2023 coal model, see: https://www.osti.gov/biblio/2370100. Last updated: - 2024-10-25 + 2024-12-06 """ __all__ = [ "COAL_MINING_LCI_VINTAGE", @@ -114,7 +116,7 @@ 'Avg Railroad Ton*Miles': 'Railroad', 'Avg Truck Ton*Miles': 'Truck', } -'''dict : A map between coal model transport columns and their short names.''' +'''dict : A map from 2016 coal model transport columns to their short names.''' ############################################################################## @@ -211,47 +213,114 @@ def _process_2023_coal_transport_lci(df, name): def _make_2023_coal_transport_data(year): - # IN PROGRESS. - # - # The 2023 transport distances are by NERC region and coal basin. - # The goal is to get a data frame with facility IDs matched to their - # NERC region and coal basin. - - # Get NERC regions for coal facilities - coal_reg = eia923_generation_and_fuel(year) - coal_reg = coal_reg[ - ['plant_id', 'nerc_region', 'state', 'reported_fuel_type_code']].copy() - # Keep only coal facilities - coal_filt = coal_reg['reported_fuel_type_code'].isin(coal_type_codes.keys()) - coal_reg = coal_reg.loc[coal_filt, :] - # Lose facilities without a NERC region - coal_reg = coal_reg.dropna(subset='nerc_region') - - # Get the basin map. - basin_map = pd.read_csv(os.path.join(data_dir, 'eia_to_netl_basin.csv')) - basin_map = basin_map.set_index('eia_basin') - basin_map = basin_map['netl_basin'] - basin_map = basin_map.to_dict() - - # Get the state to basin map. - state_map = pd.read_csv(os.path.join(data_dir, 'coal_state_to_basin.csv')) - state_map = state_map.set_index('state') - state_map = state_map['basin1'] - state_map = state_map.to_dict() - - coal_reg['basin'] = coal_reg['state'].map(state_map) - coal_reg.dropna(subset='basin') - - # TODO; - - # The 2023 coal baseline model's 'Transportation' worksheet was - # saved to CSV. All distances are in miles. - # Columns include 'Basin', 'NERC Region', 'Belt', 'Truck', 'Barge', - # 'Ocean Vessel', 'Railroad' (renamed from 'Train'), and 'Total (mi)'. - coal_distance = pd.read_csv( - os.path.join( - data_dir, "coal", "2023", "coal_transportation_distances.csv") + """Generate essentially the same the data as the CSV file from the 2016 + baseline, updated with transportation data from the 2023 coal model, + where gaps are filled using the U.S. average. + + Transportation data units are kg*km + (kilograms of coal x kilometers of distance transported). + + Parameters + ---------- + year : int + The year used for facility data from EIA 860. + + Returns + ------- + pandas.DataFrame + A data frame with plant IDs, coal basins, NERC regions, and kg coal*km + coal transported data for: Belt, Truck, Barge, Ocean Vessel, and Train. + + Raises + ------ + OSError + If the data file is not found. + """ + # Generate the coal upstream map, which labels each facility with its + # coal source code: a three-part combo of coal basin, coal type, and + # mine type. We only want the coal basin data from this. + coal_map_df = generate_upstream_coal_map(year) + coal_map_df["Basin"] = coal_map_df["coal_source_code"].str.split("-").str[0] + + # Now, let's find the NERC region for each facility. + ba_region_df = eia860_balancing_authority(year, regional_aggregation=None) + + # Let's create a dictionary that maps facilities to their NERC region, + # fixing the plant ID from string to integer along the way. + # We don't need the heat input or the old coal source code, so let's drop + # them. + region_dict = dict( + zip(ba_region_df["Plant Id"], ba_region_df["NERC Region"]) ) + region_dict = {int(k): v for k, v in region_dict.items()} + coal_map_df['NERC Region'] = coal_map_df['plant_id'].map(region_dict) + coal_map_df = coal_map_df.drop(columns=['coal_source_code', 'heat_input']) + + # Read the 2023 coal model transportation data + # Source: https://github.com/USEPA/ElectricityLCI/discussions/273 + coal_dir = os.path.join(data_dir, "coal", "2023") + coal_file = os.path.join(coal_dir, "coal_transport_dist.csv") + if not os.path.isfile(coal_file): + raise OSError( + "Failed to find 2023 coal transportation " + "data file, '%s'" % coal_file) + coal_trans_df = pd.read_csv(coal_file) + + # NOTE: the 2023 coal model uses a slightly different naming scheme + # for WNW coal basin, so let's fix it. + basin_codes_new = {k:v for k, v in basin_codes.items()} + del basin_codes_new["West/Northwest"] + basin_codes_new["West/North West"] = "WNW" + + # Now, map the basin names to their basin codes. + # NOTE this works for all basins except for "U.S. Average" + coal_trans_df["Basin"] = coal_trans_df["Basin"].map(basin_codes_new) + + # Some facilities may not map to our coal model, so let's save the + # U.S. average and use it for them. + # TODO: Consider saving the weighted averages for regions as well! + us_ave_coal_trans = coal_trans_df.loc[coal_trans_df['Basin'].isna(), :] + us_ave_coal_trans = us_ave_coal_trans.reset_index(drop=True) + + # Drop the NaNs from our coal transportation data frame + # (i.e., the U.S. average that we saved separately). + coal_trans_df = coal_trans_df.dropna().copy() + + # Put it all together by merging our transportation data and the + # coal data using the NERC region and coal basin codes as the + # common attributes. + final_df = pd.merge( + left=coal_map_df, + right=coal_trans_df, + on=['Basin', 'NERC Region'], + how='left', + ) + + # there are facilities not mapped to transportation; let's give them the + # U.S. average values + # TODO: consider using weighted-average regional values. + final_df = final_df.fillna({ + 'Belt': us_ave_coal_trans.loc[0, 'Belt'], + 'Truck': us_ave_coal_trans.loc[0, 'Truck'], + 'Barge': us_ave_coal_trans.loc[0, 'Barge'], + 'Ocean Vessel': us_ave_coal_trans.loc[0, 'Ocean Vessel'], + 'Train': us_ave_coal_trans.loc[0, 'Train'], + }) + + # The transportation data from the coal model are in miles. + # Let's convert miles to kilometers, and calculate the kg*km values by + # multiplying the quantity (kg of coal) by transportation distance + # (miles converted to km). + mi_to_km = pq.convert(1, 'mi', 'km') + + trans_cols = ["Belt", "Truck", "Barge", "Ocean Vessel", "Train"] + final_df[trans_cols] = final_df[trans_cols].mul(mi_to_km) + final_df[trans_cols] = final_df[trans_cols].mul( + final_df["quantity"], + axis=0 + ) + + return final_df def _make_ave_transport(trans_df, lci_df): @@ -969,6 +1038,100 @@ def get_2023_ave_coal_transport(trans_df, input_df): return trans_lci +def get_coal_transportation(): + """Create the coal transport data frame in kilograms of coal by kilometers + of distance transported for each facility by transportation type + (e.g. 'Barge' or 'Truck'). + + Returns + ------- + pandas.DataFrame + A three-column data frame of 'plant_id', 'coal_source_code' + (i.e., tranportation type like 'Truck' or 'Barge'), and 'quantity' + (i.e., transportation of kilograms of coal by kilometers of distance). + + The 2020 version has five types of transportation (i.e., 'Barge', 'Lake + Vessel', 'Ocean Vessel', 'Railroad', and 'Truck). + + The 2023 version has five types of transportation (i.e., 'Barge', + 'Belt', 'Ocean Vessel', 'Railroad', and 'Truck'). + + Raises + ------ + ValueError + If the global parameter year is not correctly assigned. + + Notes + ----- + Method depends on the global parameter, `COAL_TRANSPORT_LCI_VINTAGE`. + For 2020, the 2016 baseline's ABB data file is referenced (i.e., + '2016_Coal_Trans_By_Plant_ABB_Data.csv'). + For 2023, the 2023 coal baseline data file is referenced + (i.e., 'coal_transport_dist.csv' in the coal/2023 folder of data). + """ + # IN PROGRESS + if COAL_TRANSPORT_LCI_VINTAGE == 2020: + # The 2016 transportation data by facility. + logging.info("Using 2016 coal baseline transportation distance data.") + coal_transportation = pd.read_csv( + os.path.join(data_dir, '2016_Coal_Trans_By_Plant_ABB_Data.csv') + ) + # Make rows facility IDs with Transport column (modes) and + # value (ton*mi) + coal_transportation = coal_transportation.melt( + 'Plant Government ID', + var_name='Transport' + ) + # NOTE: the 2016 transportation functional unit is ton*miles; + # convert ton*mi to kg*km + coal_transportation["value"] = ( + coal_transportation["value"] + * pq.convert(1, "ton", "kg") + * pq.convert(1, "mi", "km") + ) + # Rename transport columns + coal_transportation = coal_transportation.rename(columns={ + 'Plant Government ID': 'plant_id', + 'Transport': 'coal_source_code', + 'value': 'quantity', + }) + # Correct coal_transportation codes + coal_transportation['coal_source_code'] = coal_transportation.apply( + _transport_code, axis=1) + elif COAL_TRANSPORT_LCI_VINTAGE == 2023: + logging.info("Using 2023 coal model transportation distance data") + coal_transportation = _make_2023_coal_transport_data( + model_specs.eia_gen_year) + + # NOTE: the 2016 baseline uses 'Railroad' in place of 'Train' + coal_transportation = coal_transportation.rename( + columns={'Train': 'Railroad'} + ) + + # The data frame needs melted to match the 2016 data frame, which has + # three columns: plant_id, coal_source_code (i.e., transportation type), + # and quantity (i.e., the kg*km values). + coal_transportation = coal_transportation.melt( + id_vars=("plant_id",), + value_vars=('Belt', 'Truck', 'Barge', 'Ocean Vessel', 'Railroad') + ) + + # To allow facilities receiving coal from more than one region/basin, + # group by facility and sum by transportation type. + coal_transportation = coal_transportation.groupby(by=['plant_id', 'variable']).agg({'value': 'sum'}).reset_index(drop=False) + + # Rename to match the 2016 data frame + coal_transportation = coal_transportation.rename( + columns={'variable': 'coal_source_code', 'value': 'quantity'} + ) + else: + raise ValueError( + "The coal transport year, %d, " + "is unknown!" % COAL_TRANSPORT_LCI_VINTAGE) + + return coal_transportation + + def read_coal_mining(): """Read coal mining (extraction and processing) life cycle inventory. @@ -1071,33 +1234,8 @@ def read_coal_transportation(): - 'input', whether flow is resource (true) or emission (false) """ - # Presently, we only have the 2016 transportation data by facility. - logging.info("Using 2016 coal plant transportation data.") - coal_transportation = pd.read_csv( - os.path.join(data_dir, '2016_Coal_Trans_By_Plant_ABB_Data.csv') - ) - # Make rows facility IDs with Transport column (modes) and - # value (ton*mi) - coal_transportation = coal_transportation.melt( - 'Plant Government ID', - var_name='Transport' - ) - # NOTE: the 2016 transportation functional unit is ton*miles; - # convert ton*mi to kg*km - coal_transportation["value"] = ( - coal_transportation["value"] - * pq.convert(1, "ton", "kg") - * pq.convert(1, "mi", "km") - ) - # Rename transport columns - coal_transportation = coal_transportation.rename(columns={ - 'Plant Government ID': 'plant_id', - 'Transport': 'coal_source_code', - 'value': 'quantity', - }) - # Correct coal_transportation codes - coal_transportation['coal_source_code'] = coal_transportation.apply( - _transport_code, axis=1) + # Get the appropriate coal transportation distance data: + coal_transportation = get_coal_transportation() # FORK IN THE ROAD if COAL_TRANSPORT_LCI_VINTAGE == 2023: @@ -1467,6 +1605,7 @@ def wtd_mean(pdser, total_db): invent_plants = coal_mining_inventory_df['plant_id'].unique() # Check for any inventory plants that don't have transportation LCI. + # NOTE: this should not occur unless the LCI vintage years are mis-matched. missing_plants = [ int(x) for x in invent_plants if x not in trans_plants] num_miss_plants = len(missing_plants) diff --git a/electricitylci/data/coal/2023/coal_transport_dist.csv b/electricitylci/data/coal/2023/coal_transport_dist.csv new file mode 100644 index 0000000..283da68 --- /dev/null +++ b/electricitylci/data/coal/2023/coal_transport_dist.csv @@ -0,0 +1,43 @@ +Basin,NERC Region,Belt,Truck,Barge,Ocean Vessel,Train +Central Appalachia,FRCC,0,0,0,0,1107.977752 +Central Appalachia,MRO,0,0,0,307,560.0852524 +Central Appalachia,NPCC,0,0,0,0,885.1087031 +Central Appalachia,RFC,0,5.746550107,104.3861809,2.733886858,82.4622286 +Central Appalachia,SERC,0,1.212929678,7.506436059,0,432.0265178 +Central Appalachia,Weighted average,0,2.481161639,35.04156235,0.925235054,345.1554444 +Central Interior,SPP,0,18.17334589,0,0,18.44396159 +Gulf Lignite,ERCOT,0.636293625,5.68677811,0,0,3.57183154 +Gulf Lignite,SERC,0,0.5,0,0,0 +Gulf Lignite,SPP,4.451175234,0.528544448,0,0,0 +Gulf Lignite,Weighted average,1.220014482,4.465128744,0,0,2.727310722 +Illinois Basin,FRCC,0,7.329290563,365.4292272,1.516801349,621.4660044 +Illinois Basin,MRO,0,0,0,0,343.2172334 +Illinois Basin,RFC,0,10.21258341,135.6002717,0.115027265,48.01586662 +Illinois Basin,SERC,0.384204428,6.31479746,92.99234444,0,207.5130215 +Illinois Basin,SPP,0,14.21745361,878.4490668,0,27.80648186 +Illinois Basin,Weighted average,0.196243583,7.893121021,138.4167554,0.194932872,188.1140205 +Lignite,MRO,3.247430368,0.749585777,0,0,7.341607743 +Northern Appalachia,FRCC,0,0,0,0,1387.352562 +Northern Appalachia,MRO,0,0,0,165,695 +Northern Appalachia,NPCC,0,1.830338124,0,0,553.1587493 +Northern Appalachia,RFC,0.166831582,7.542097031,75.14958068,1.691337393,78.66021175 +Northern Appalachia,SERC,0,6.086323608,22.51291511,0,550.1558208 +Northern Appalachia,Weighted average,0.139705124,7.232441815,66.26487988,1.436492556,160.4347721 +Powder River Basin,ERCOT,0,0,0,0,1520.471575 +Powder River Basin,MRO,0,0,8.904027538,0.06946903,876.263257 +Powder River Basin,RFC,0,0,0,211.7130418,1182.554026 +Powder River Basin,SERC,0,0,21.8143319,0,1362.973426 +Powder River Basin,SPP,0,0,0,0,927.4462063 +Powder River Basin,WECC,1.283299364,0,0,0,398.1586126 +Powder River Basin,Weighted average,0.185069808,0,7.418959303,31.03121824,1059.925682 +Rocky Mountain,FRCC,0,0,0,436.894845,1992.837173 +Rocky Mountain,MRO,0,0,0,343.8040108,1296.735856 +Rocky Mountain,RFC,0,0,0.113565012,0,1787.926627 +Rocky Mountain,SERC,0,0,0,0,2029 +Rocky Mountain,WECC,0.87258711,13.07121003,0,0,83.3385493 +Rocky Mountain,Weighted average,0.846069985,12.6739879,0.001283021,5.150114206,138.1862896 +Southern Appalachia,SERC,0,32.62822162,28.1533781,0,1.349550346 +West/North West,ASCC,0,4,0,0,0 +West/North West,WECC,0,4,0,0,0 +West/North West,Weighted average,0,4,0,0,0 +U.S. Average,U.S. Average,0.39809141,3.778318915,35.09228677,42.13749849,577.2729147 \ No newline at end of file