Skip to content

Commit

Permalink
took record id out of index
Browse files Browse the repository at this point in the history
  • Loading branch information
katie-lamb committed Sep 11, 2023
1 parent 2c998e1 commit b4901f4
Show file tree
Hide file tree
Showing 4 changed files with 35 additions and 29 deletions.
Original file line number Diff line number Diff line change
@@ -1,15 +1,15 @@
"""add ppe
Revision ID: 1d8ac4541321
Revision ID: 0bab48479df4
Revises: 8b3029915ab1
Create Date: 2023-09-08 11:38:16.069476
Create Date: 2023-09-10 09:47:21.159465
"""
import sqlalchemy as sa
from alembic import op

# revision identifiers, used by Alembic.
revision = '1d8ac4541321'
revision = '0bab48479df4'
down_revision = '8b3029915ab1'
branch_labels = None
depends_on = None
Expand Down Expand Up @@ -58,18 +58,19 @@ def upgrade() -> None:
sa.ForeignKeyConstraint(['utility_id_pudl'], ['utilities_pudl.utility_id_pudl'], name=op.f('fk_mega_generators_eia_utility_id_pudl_utilities_pudl'))
)
op.create_table('plant_parts_eia',
sa.Column('record_id_eia', sa.Text(), nullable=False, comment='Identifier for EIA plant parts analysis records.'),
sa.Column('plant_id_eia', sa.Integer(), nullable=True, comment='The unique six-digit facility identification number, also called an ORISPL, assigned by the Energy Information Administration.'),
sa.Column('report_date', sa.Date(), nullable=True, comment='Date reported.'),
sa.Column('plant_part', sa.Enum('plant_operating_year', 'plant_prime_fuel', 'plant_match_ferc1', 'plant_technology', 'plant_prime_mover', 'plant_ferc_acct', 'plant_unit', 'plant', 'plant_gen'), nullable=True, comment='The part of the plant a record corresponds to.'),
sa.Column('plant_part', sa.Enum('plant_prime_fuel', 'plant_ferc_acct', 'plant_gen', 'plant_operating_year', 'plant_unit', 'plant_prime_mover', 'plant_technology', 'plant_match_ferc1', 'plant'), nullable=True, comment='The part of the plant a record corresponds to.'),
sa.Column('generator_id', sa.Text(), nullable=True, comment='Generator ID is usually numeric, but sometimes includes letters. Make sure you treat it as a string!'),
sa.Column('unit_id_pudl', sa.Integer(), nullable=True, comment='Dynamically assigned PUDL unit id. WARNING: This ID is not guaranteed to be static long term as the input data and algorithm may evolve over time.'),
sa.Column('prime_mover_code', sa.Text(), nullable=True, comment='Code for the type of prime mover (e.g. CT, CG)'),
sa.Column('energy_source_code_1', sa.Enum('SUN', 'BIT', 'NG', 'PUR', 'OBL', 'BLQ', 'OBS', 'LIG', 'MWH', 'WH', 'AB', 'ANT', 'RFO', 'WC', 'BFG', 'PG', 'MSN', 'DFO', 'TDF', 'SGC', 'SG', 'SGP', 'NUC', 'WND', 'SUB', 'OTH', 'GEO', 'SC', 'KER', 'JF', 'WAT', 'WO', 'OG', 'RC', 'MSB', 'OBG', 'PC', 'MSW', 'LFG', 'WDL', 'SLW', 'WDS'), nullable=True, comment='The code representing the most predominant type of energy that fuels the generator.'),
sa.Column('technology_description', sa.Enum('Petroleum Liquids', 'Natural Gas Fired Combined Cycle', 'All Other', 'Landfill Gas', 'Flywheels', 'Nuclear', 'Solar Thermal with Energy Storage', 'Municipal Solid Waste', 'Conventional Hydroelectric', 'Batteries', 'Solar Thermal without Energy Storage', 'Natural Gas Internal Combustion Engine', 'Solar Photovoltaic', 'Hydroelectric Pumped Storage', 'Natural Gas with Compressed Air Storage', 'Onshore Wind Turbine', 'Hydrokinetic', 'Natural Gas Fired Combustion Turbine', 'Conventional Steam Coal', 'Offshore Wind Turbine', 'Coal Integrated Gasification Combined Cycle', 'Geothermal', 'Other Waste Biomass', 'Petroleum Coke', 'Other Natural Gas', 'Wood/Wood Waste Biomass', 'Other Gases', 'Natural Gas Steam Turbine'), nullable=True, comment='High level description of the technology used by the generator to produce electricity.'),
sa.Column('energy_source_code_1', sa.Enum('WH', 'MSB', 'PUR', 'SC', 'BIT', 'WO', 'BLQ', 'JF', 'OBG', 'BFG', 'TDF', 'MSW', 'OBS', 'SLW', 'WDS', 'DFO', 'SGC', 'LIG', 'MWH', 'SGP', 'WC', 'WND', 'PG', 'RFO', 'WDL', 'SG', 'MSN', 'ANT', 'RC', 'SUN', 'OTH', 'KER', 'NG', 'PC', 'WAT', 'NUC', 'SUB', 'LFG', 'OBL', 'OG', 'AB', 'GEO'), nullable=True, comment='The code representing the most predominant type of energy that fuels the generator.'),
sa.Column('technology_description', sa.Enum('Conventional Hydroelectric', 'Other Natural Gas', 'Onshore Wind Turbine', 'Other Waste Biomass', 'Landfill Gas', 'Conventional Steam Coal', 'Natural Gas Fired Combined Cycle', 'Coal Integrated Gasification Combined Cycle', 'Flywheels', 'Municipal Solid Waste', 'Natural Gas Internal Combustion Engine', 'Other Gases', 'Natural Gas Fired Combustion Turbine', 'Hydroelectric Pumped Storage', 'Petroleum Coke', 'Batteries', 'Natural Gas Steam Turbine', 'Solar Thermal without Energy Storage', 'All Other', 'Solar Photovoltaic', 'Geothermal', 'Solar Thermal with Energy Storage', 'Natural Gas with Compressed Air Storage', 'Offshore Wind Turbine', 'Nuclear', 'Petroleum Liquids', 'Hydrokinetic', 'Wood/Wood Waste Biomass'), nullable=True, comment='High level description of the technology used by the generator to produce electricity.'),
sa.Column('ferc_acct_name', sa.Enum('Hydraulic', 'Nuclear', 'Steam', 'Other'), nullable=True, comment='Name of FERC account, derived from technology description and prime mover code.'),
sa.Column('utility_id_eia', sa.Integer(), nullable=True, comment='The EIA Utility Identification number.'),
sa.Column('true_gran', sa.Boolean(), nullable=True, comment='Indicates whether a plant part list record is associated with the highest priority plant part for all identical records.'),
sa.Column('appro_part_label', sa.Enum('plant_operating_year', 'plant_prime_fuel', 'plant_match_ferc1', 'plant_technology', 'plant_prime_mover', 'plant_ferc_acct', 'plant_unit', 'plant', 'plant_gen'), nullable=True, comment='Plant part of the associated true granularity record.'),
sa.Column('appro_part_label', sa.Enum('plant_prime_fuel', 'plant_ferc_acct', 'plant_gen', 'plant_operating_year', 'plant_unit', 'plant_prime_mover', 'plant_technology', 'plant_match_ferc1', 'plant'), nullable=True, comment='Plant part of the associated true granularity record.'),
sa.Column('appro_record_id_eia', sa.Text(), nullable=True, comment='EIA record ID of the associated true granularity record.'),
sa.Column('ferc1_generator_agg_id', sa.Integer(), nullable=True, comment='ID dynamically assigned by PUDL to EIA records with multiple matches to a single FERC ID in the FERC-EIA manual matching process.'),
sa.Column('capacity_eoy_mw', sa.Float(), nullable=True, comment='Total end of year installed (nameplate) capacity for a plant part, in megawatts.'),
Expand Down Expand Up @@ -105,7 +106,8 @@ def upgrade() -> None:
sa.ForeignKeyConstraint(['plant_id_pudl'], ['plants_pudl.plant_id_pudl'], name=op.f('fk_plant_parts_eia_plant_id_pudl_plants_pudl')),
sa.ForeignKeyConstraint(['prime_mover_code'], ['prime_movers_eia.code'], name=op.f('fk_plant_parts_eia_prime_mover_code_prime_movers_eia')),
sa.ForeignKeyConstraint(['utility_id_eia', 'report_date'], ['utilities_eia860.utility_id_eia', 'utilities_eia860.report_date'], name=op.f('fk_plant_parts_eia_utility_id_eia_utilities_eia860')),
sa.ForeignKeyConstraint(['utility_id_pudl'], ['utilities_pudl.utility_id_pudl'], name=op.f('fk_plant_parts_eia_utility_id_pudl_utilities_pudl'))
sa.ForeignKeyConstraint(['utility_id_pudl'], ['utilities_pudl.utility_id_pudl'], name=op.f('fk_plant_parts_eia_utility_id_pudl_utilities_pudl')),
sa.PrimaryKeyConstraint('record_id_eia', name=op.f('pk_plant_parts_eia'))
)
# ### end Alembic commands ###

Expand Down
40 changes: 19 additions & 21 deletions src/pudl/analysis/plant_parts_eia.py
Original file line number Diff line number Diff line change
Expand Up @@ -393,7 +393,7 @@ def mega_gens_asset(mcoe: pd.DataFrame, own_eia860: pd.DataFrame) -> pd.DataFram
"denorm_plants_eia": AssetIn(key="denorm_plants_eia"),
"denorm_utilities_eia": AssetIn(key="denorm_utilities_eia"),
},
io_manager_key=io_manager_key,
# io_manager_key=io_manager_key,
compute_kind="Python",
)
def plant_parts_eia_asset(
Expand Down Expand Up @@ -628,12 +628,7 @@ class MakePlantParts:
"""

def __init__(self):
"""Initialize instance of :class:`MakePlantParts`.
Args:
pudl_out (pudl.output.pudltabl.PudlTabl): An object used to create
the tables for EIA and FERC Form 1 analysis.
"""
"""Initialize instance of :class:`MakePlantParts`."""
self.parts_to_ids = make_parts_to_ids_dict()

# get a list of all of the id columns that constitue the primary keys
Expand All @@ -647,6 +642,10 @@ def execute(self, gens_mega, plants_eia860, utils_eia860):
pandas.DataFrame: The complete plant parts list
"""
# aggregate everything by each plant part
gens_mega = gens_mega[
(gens_mega.report_date < "2021-01-01")
& (gens_mega.report_date >= "2020-01-01")
]
part_dfs = []
for part_name in PLANT_PARTS:
if part_name == "plant_match_ferc1":
Expand Down Expand Up @@ -684,7 +683,6 @@ def execute(self, gens_mega, plants_eia860, utils_eia860):
.pipe(self._clean_plant_parts)
.pipe(Resource.from_id("plant_parts_eia").format_df)
)
self.plant_parts_eia.index = self.plant_parts_eia.index.astype("string")
return self.plant_parts_eia

#######################################
Expand Down Expand Up @@ -832,20 +830,18 @@ def add_additional_cols(self, plant_parts_eia, plants_eia860, utils_eia860):
return plant_parts_eia

def _clean_plant_parts(self, plant_parts_eia):
plant_parts_eia = (
plant_parts_eia.assign(
report_year=lambda x: x.report_date.dt.year,
plant_id_report_year=lambda x: x.plant_id_pudl.astype(str)
+ "_"
+ x.report_year.astype(str),
)
.pipe(
pudl.helpers.cleanstrings_snake,
["record_id_eia", "appro_record_id_eia"],
)
.set_index("record_id_eia")
plant_parts_eia = plant_parts_eia.assign(
report_year=lambda x: x.report_date.dt.year,
plant_id_report_year=lambda x: x.plant_id_pudl.astype(str)
+ "_"
+ x.report_year.astype(str),
).pipe(
pudl.helpers.cleanstrings_snake,
["record_id_eia", "appro_record_id_eia"],
)
return plant_parts_eia[~plant_parts_eia.index.duplicated(keep="first")]
return plant_parts_eia[
~plant_parts_eia["record_id_eia"].duplicated(keep="first")
]

def add_attributes(self, part_df, attribute_df, part_name):
"""Add constant and min/max attributes to plant parts."""
Expand Down Expand Up @@ -1490,8 +1486,10 @@ def add_record_id(part_df, id_cols, plant_part_col="plant_part", year=True):
)
if year:
part_df = part_df.rename(columns={"record_id_eia_temp": "record_id_eia"})
part_df["record_id_eia"] = part_df["record_id_eia"].astype("string")
else:
part_df = part_df.rename(columns={"record_id_eia_temp": "plant_part_id_eia"})
part_df["plant_part_id_eia"] = part_df["plant_part_id_eia"].astype("string")
return part_df


Expand Down
4 changes: 4 additions & 0 deletions src/pudl/metadata/fields.py
Original file line number Diff line number Diff line change
Expand Up @@ -2092,6 +2092,10 @@
"type": "string",
"description": "Identifier indicating original FERC Form 1 source record. format: {table_name}_{report_year}_{report_prd}_{respondent_id}_{spplmnt_num}_{row_number}. Unique within FERC Form 1 DB tables which are not row-mapped.", # noqa: FS003
},
"record_id_eia": {
"type": "string",
"description": "Identifier for EIA plant parts analysis records.",
},
"region_name_us_census": {
"type": "string",
"description": "Human-readable name of a US Census region.",
Expand Down
2 changes: 2 additions & 0 deletions src/pudl/metadata/resources/eia.py
Original file line number Diff line number Diff line change
Expand Up @@ -622,6 +622,7 @@
"description": "Output table with the aggregation of all EIA plant parts. For use with matching to FERC 1.",
"schema": {
"fields": [
"record_id_eia",
"plant_id_eia",
"report_date",
"plant_part",
Expand Down Expand Up @@ -665,6 +666,7 @@
"report_year",
"plant_id_report_year",
],
"primary_key": ["record_id_eia"],
},
"sources": ["eia860", "eia923"],
"etl_group": "outputs",
Expand Down

0 comments on commit b4901f4

Please sign in to comment.