diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 447b95a8dd..047ca0c5cb 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -52,7 +52,7 @@ repos: - id: rm-unneeded-f-str - repo: https://github.com/pre-commit/mirrors-prettier - rev: v3.0.1 + rev: v3.0.2 hooks: - id: prettier types_or: [yaml] diff --git a/README.rst b/README.rst index d1443dfa36..df5edcda3e 100644 --- a/README.rst +++ b/README.rst @@ -64,18 +64,18 @@ What data is available? PUDL currently integrates data from: -* `EIA Form 860 `__: 2001-2021 -* `EIA Form 860m `__: 2022-06 -* `EIA Form 861 `__: 2001-2021 -* `EIA Form 923 `__: 2001-2021 -* `EPA Continuous Emissions Monitoring System (CEMS) `__: 1995-2021 +* `EIA Form 860 `__: 2001-2022 +* `EIA Form 860m `__: 2023-06 +* `EIA Form 861 `__: 2001-2022 +* `EIA Form 923 `__: 2001-2022 +* `EPA Continuous Emissions Monitoring System (CEMS) `__: 1995-2022 * `FERC Form 1 `__: 1994-2021 * `FERC Form 714 `__: 2006-2020 * `US Census Demographic Profile 1 Geodatabase `__: 2010 Thanks to support from the `Alfred P. Sloan Foundation Energy & Environment Program `__, from -2021 to 2023 we will be integrating the following data as well: +2021 to 2024 we will be integrating the following data as well: * `EIA Form 176 `__ (The Annual Report of Natural Gas Supply and Disposition) @@ -124,7 +124,7 @@ Want access to all the published data in bulk? If you're familiar with Python and `Jupyter Notebooks `__ and are willing to install Docker you can: -* `Download a PUDL data release `__ from +* `Download a PUDL data release `__ from CERN's `Zenodo `__ archiving service. * `Install Docker `__ * Run the archived image using ``docker-compose up`` diff --git a/devtools/ferc1-eia-glue/training_data/create_FERC1-EIA_manual_mapping_spreadsheets.ipynb b/devtools/ferc1-eia-glue/training_data/create_FERC1-EIA_manual_mapping_spreadsheets.ipynb index a27bbd394d..c45e2e008f 100644 --- a/devtools/ferc1-eia-glue/training_data/create_FERC1-EIA_manual_mapping_spreadsheets.ipynb +++ b/devtools/ferc1-eia-glue/training_data/create_FERC1-EIA_manual_mapping_spreadsheets.ipynb @@ -42,6 +42,7 @@ "\n", "# Local libraries\n", "import pudl\n", + "from pudl.workspace.setup import PudlPaths\n", "from pudl.analysis.ferc1_eia_train import *" ] }, @@ -54,8 +55,7 @@ }, "outputs": [], "source": [ - "pudl_settings = pudl.workspace.setup.get_defaults()\n", - "pudl_engine = sa.create_engine(pudl_settings['pudl_db'])\n", + "pudl_engine = sa.create_engine(PudlPaths().pudl_db)\n", "pudl_out = pudl.output.pudltabl.PudlTabl(pudl_engine, freq='AS', fill_net_gen=True)" ] }, diff --git a/devtools/ferc1-eia-glue/training_data/validate_and_integrate_FERC1-EIA_manually_mapped_records.ipynb b/devtools/ferc1-eia-glue/training_data/validate_and_integrate_FERC1-EIA_manually_mapped_records.ipynb index 171e01512b..22149567ec 100644 --- a/devtools/ferc1-eia-glue/training_data/validate_and_integrate_FERC1-EIA_manually_mapped_records.ipynb +++ b/devtools/ferc1-eia-glue/training_data/validate_and_integrate_FERC1-EIA_manually_mapped_records.ipynb @@ -43,6 +43,7 @@ "\n", "# Local libraries\n", "import pudl\n", + "from pudl.workspace.setup import PudlPaths\n", "from pudl.analysis.ferc1_eia_train import *" ] }, @@ -55,8 +56,7 @@ }, "outputs": [], "source": [ - "pudl_settings = pudl.workspace.setup.get_defaults()\n", - "pudl_engine = sa.create_engine(pudl_settings['pudl_db'])\n", + "pudl_engine = sa.create_engine(PudlPaths().pudl_db)\n", "pudl_out = pudl.output.pudltabl.PudlTabl(pudl_engine, freq='AS', fill_net_gen=True)" ] }, diff --git a/docs/data_access.rst b/docs/data_access.rst index 26b7c92545..92befc2829 100644 --- a/docs/data_access.rst +++ b/docs/data_access.rst @@ -83,7 +83,7 @@ AWS CLI, or programmatically via the S3 API. They can also be downloaded directl HTTPS using the following links: * `PUDL SQLite DB `__ -* `EPA CEMS Hourly Emissions Parquet (1995-2021) `__ +* `EPA CEMS Hourly Emissions Parquet (1995-2022) `__ * `Census DP1 SQLite DB (2010) `__ * Raw FERC Form 1: diff --git a/docs/dev/datastore.rst b/docs/dev/datastore.rst index e9411537f6..31e1e6f5b0 100644 --- a/docs/dev/datastore.rst +++ b/docs/dev/datastore.rst @@ -38,15 +38,17 @@ For more detailed usage information, see: $ pudl_datastore --help The downloaded data will be used by the script to populate a datastore under -the ``data`` directory in your workspace, organized by data source, form, and -date:: +your ``$PUDL_INPUT`` directory, organized by data source, form, and DOI:: data/censusdp1tract/ data/eia860/ + data/eia860m/ data/eia861/ data/eia923/ data/epacems/ data/ferc1/ + data/ferc2/ + data/ferc60/ data/ferc714/ If the download fails to complete successfully, the script can be run repeatedly until @@ -64,28 +66,13 @@ archival and versioning of datasets. See the `documentation for information on adding datasets to the datastore. -Prepare the Datastore -^^^^^^^^^^^^^^^^^^^^^ +Tell PUDL about the archive +^^^^^^^^^^^^^^^^^^^^^^^^^^^ -If you have used pudl-archiver to prepare a Zenodo archive as above, you -can add support for your archive to the datastore by adding the DOI to -pudl.workspace.datastore.DOI, under "sandbox" or "production" as appropriate. - -If you want to prepare an archive for the datastore separately, the following -are required. - -#. The root path must contain a ``datapackage.json`` file that conforms to the -`frictionless datapackage spec `__ -#. Each listed resource among the ``datapackage.json`` resources must include: - - * ``path`` containing the zenodo download url for the specific file. - * ``remote_url`` with the same url as the ``path`` - * ``name`` of the file - * ``hash`` with the md5 hash of the file - * ``parts`` a set of key / value pairs defining additional attributes that - can be used to select a subset of the whole datapackage. For example, the - ``epacems`` dataset is partitioned by year and state, and - ``"parts": {"year": 2010, "state": "ca"}`` would indicate that the - resource contains data for the state of California in the year 2010. - Unpartitioned datasets like the ``ferc714`` which includes all years in - a single file, would have an empty ``"parts": {}`` +Once you have used pudl-archiver to prepare a Zenodo archive as above, you +can make the PUDL Datastore aware of it by updating the appropriate DOI in +:class:`pudl.workspace.datastore.ZenodoFetcher`. DOIs can refer to resources from the +`Zenodo sandbox server `__ for archives that are still in +testing or development (sandbox DOIs have a prefix of ``10.5072``), or the +`Zenodo production server `__ server if the archive is ready for +public use (production DOIs hae a prefix of ``10.5281``). diff --git a/docs/dev/testing.rst b/docs/dev/testing.rst index 79439e468f..679074661f 100644 --- a/docs/dev/testing.rst +++ b/docs/dev/testing.rst @@ -304,7 +304,6 @@ You can always check to see what custom flags exist by running Path to a non-standard ETL settings file to use. --gcs-cache-path=GCS_CACHE_PATH If set, use this GCS path as a datastore cache layer. - --sandbox Use raw inputs from the Zenodo sandbox server. The main flexibility that these custom options provide is in selecting where the raw input data comes from and what data the tests should be run diff --git a/docs/release_notes.rst b/docs/release_notes.rst index bd6a802217..48207381f3 100644 --- a/docs/release_notes.rst +++ b/docs/release_notes.rst @@ -71,6 +71,8 @@ Data Coverage * Updated :doc:`data_sources/eia860` to include early release data from 2022. * Updated :doc:`data_sources/eia923` to include early release data from 2022. +* Updated :doc:`data_sources/epacems` to switch from the old FTP server to the new + CAMPD API, and to include 2022 data. * New :ref:`epacamd_eia` crosswalk version v0.3, see issue :issue:`2317` and PR :pr:`2316`. EPA's updates add manual matches and exclusions focusing on operating units with a generator ID as of 2018. diff --git a/migrations/env.py b/migrations/env.py index 19ec0dfbfd..f99a0b3e29 100644 --- a/migrations/env.py +++ b/migrations/env.py @@ -5,7 +5,7 @@ from sqlalchemy import engine_from_config, pool from pudl.metadata.classes import Package -from pudl.workspace.setup import get_defaults +from pudl.workspace.setup import PudlPaths # this is the Alembic Config object, which provides # access to the values within the .ini file in use. @@ -28,7 +28,7 @@ # my_important_option = config.get_main_option("my_important_option") # ... etc. -db_location = get_defaults()["pudl_db"] +db_location = PudlPaths().pudl_db logger.info(f"alembic config.sqlalchemy.url: {db_location}") config.set_main_option("sqlalchemy.url", db_location) diff --git a/migrations/versions/3c458b36094e_start_over_with_new_pk_in_emissions_.py b/migrations/versions/16948340e558_regenerate_migrations_to_name_all_.py similarity index 79% rename from migrations/versions/3c458b36094e_start_over_with_new_pk_in_emissions_.py rename to migrations/versions/16948340e558_regenerate_migrations_to_name_all_.py index 01a3d2762a..1d2f5bb1a0 100644 --- a/migrations/versions/3c458b36094e_start_over_with_new_pk_in_emissions_.py +++ b/migrations/versions/16948340e558_regenerate_migrations_to_name_all_.py @@ -1,8 +1,8 @@ -"""Start over with new PK in emissions_control_equipment_types_eia +"""Regenerate migrations to name all unnamed constraints. -Revision ID: 3c458b36094e +Revision ID: 16948340e558 Revises: -Create Date: 2023-06-12 15:32:46.636042 +Create Date: 2023-08-17 12:05:15.020719 """ from alembic import op @@ -10,7 +10,7 @@ from sqlalchemy.dialects import sqlite # revision identifiers, used by Alembic. -revision = '3c458b36094e' +revision = '16948340e558' down_revision = None branch_labels = None depends_on = None @@ -22,75 +22,99 @@ def upgrade() -> None: sa.Column('code', sa.Text(), nullable=False, comment='Originally reported short code.'), sa.Column('label', sa.Text(), nullable=True, comment='Longer human-readable code using snake_case'), sa.Column('description', sa.Text(), nullable=True, comment='Long human-readable description of the meaning of a code/label.'), - sa.PrimaryKeyConstraint('code') + sa.PrimaryKeyConstraint('code', name=op.f('pk_averaging_periods_eia')) ) op.create_table('balancing_authorities_eia', sa.Column('code', sa.Text(), nullable=False, comment='Originally reported short code.'), sa.Column('label', sa.Text(), nullable=True, comment='Longer human-readable code using snake_case'), sa.Column('description', sa.Text(), nullable=True, comment='Long human-readable description of the meaning of a code/label.'), - sa.PrimaryKeyConstraint('code') + sa.PrimaryKeyConstraint('code', name=op.f('pk_balancing_authorities_eia')) ) op.create_table('balancing_authority_assn_eia861', sa.Column('report_date', sa.Date(), nullable=False, comment='Date reported.'), sa.Column('balancing_authority_id_eia', sa.Integer(), nullable=False, comment='EIA balancing authority ID. This is often (but not always!) the same as the utility ID associated with the same legal entity.'), sa.Column('utility_id_eia', sa.Integer(), nullable=False, comment='The EIA Utility Identification number.'), sa.Column('state', sa.Text(), nullable=False, comment='Two letter US state abbreviation.'), - sa.PrimaryKeyConstraint('report_date', 'balancing_authority_id_eia', 'utility_id_eia', 'state') + sa.PrimaryKeyConstraint('report_date', 'balancing_authority_id_eia', 'utility_id_eia', 'state', name=op.f('pk_balancing_authority_assn_eia861')) ) op.create_table('balancing_authority_eia861', sa.Column('report_date', sa.Date(), nullable=False, comment='Date reported.'), sa.Column('balancing_authority_id_eia', sa.Integer(), nullable=False, comment='EIA balancing authority ID. This is often (but not always!) the same as the utility ID associated with the same legal entity.'), sa.Column('balancing_authority_code_eia', sa.Text(), nullable=True, comment='EIA short code identifying a balancing authority.'), sa.Column('balancing_authority_name_eia', sa.Text(), nullable=True, comment='Name of the balancing authority.'), - sa.PrimaryKeyConstraint('report_date', 'balancing_authority_id_eia') + sa.PrimaryKeyConstraint('report_date', 'balancing_authority_id_eia', name=op.f('pk_balancing_authority_eia861')) ) op.create_table('boiler_generator_assn_types_eia', sa.Column('code', sa.Text(), nullable=False, comment='Originally reported short code.'), sa.Column('label', sa.Text(), nullable=True, comment='Longer human-readable code using snake_case'), sa.Column('description', sa.Text(), nullable=True, comment='Long human-readable description of the meaning of a code/label.'), - sa.PrimaryKeyConstraint('code') + sa.PrimaryKeyConstraint('code', name=op.f('pk_boiler_generator_assn_types_eia')) ) op.create_table('boiler_status_eia', sa.Column('code', sa.Text(), nullable=False, comment='Originally reported short code.'), sa.Column('label', sa.Text(), nullable=True, comment='Longer human-readable code using snake_case'), sa.Column('description', sa.Text(), nullable=True, comment='Long human-readable description of the meaning of a code/label.'), - sa.PrimaryKeyConstraint('code') + sa.PrimaryKeyConstraint('code', name=op.f('pk_boiler_status_eia')) ) op.create_table('boiler_types_eia', sa.Column('code', sa.Text(), nullable=False, comment='Originally reported short code.'), sa.Column('label', sa.Text(), nullable=True, comment='Longer human-readable code using snake_case'), sa.Column('description', sa.Text(), nullable=True, comment='Long human-readable description of the meaning of a code/label.'), - sa.PrimaryKeyConstraint('code') + sa.PrimaryKeyConstraint('code', name=op.f('pk_boiler_types_eia')) ) op.create_table('coalmine_types_eia', sa.Column('code', sa.Text(), nullable=False, comment='Originally reported short code.'), sa.Column('label', sa.Text(), nullable=True, comment='Longer human-readable code using snake_case'), sa.Column('description', sa.Text(), nullable=True, comment='Long human-readable description of the meaning of a code/label.'), - sa.PrimaryKeyConstraint('code') + sa.PrimaryKeyConstraint('code', name=op.f('pk_coalmine_types_eia')) + ) + op.create_table('compiled_geometry_balancing_authority_eia861', + sa.Column('county_id_fips', sa.Text(), nullable=False, comment='County ID from the Federal Information Processing Standard Publication 6-4.'), + sa.Column('county_name_census', sa.Text(), nullable=True, comment='County name as specified in Census DP1 Data.'), + sa.Column('population', sa.Float(), nullable=True, comment='County population, sourced from Census DP1 data.'), + sa.Column('area_km2', sa.Float(), nullable=True, comment='County area in km2.'), + sa.Column('report_date', sa.Date(), nullable=False, comment='Date reported.'), + sa.Column('balancing_authority_id_eia', sa.Integer(), nullable=False, comment='EIA balancing authority ID. This is often (but not always!) the same as the utility ID associated with the same legal entity.'), + sa.Column('state', sa.Text(), nullable=True, comment='Two letter US state abbreviation.'), + sa.Column('county', sa.Text(), nullable=False, comment='County name.'), + sa.Column('state_id_fips', sa.Text(), nullable=True, comment='Two digit state FIPS code.'), + sa.PrimaryKeyConstraint('balancing_authority_id_eia', 'report_date', 'county_id_fips', 'county', name=op.f('pk_compiled_geometry_balancing_authority_eia861')) + ) + op.create_table('compiled_geometry_utility_eia861', + sa.Column('county_id_fips', sa.Text(), nullable=False, comment='County ID from the Federal Information Processing Standard Publication 6-4.'), + sa.Column('county_name_census', sa.Text(), nullable=True, comment='County name as specified in Census DP1 Data.'), + sa.Column('population', sa.Float(), nullable=True, comment='County population, sourced from Census DP1 data.'), + sa.Column('area_km2', sa.Float(), nullable=True, comment='County area in km2.'), + sa.Column('report_date', sa.Date(), nullable=False, comment='Date reported.'), + sa.Column('utility_id_eia', sa.Integer(), nullable=False, comment='The EIA Utility Identification number.'), + sa.Column('state', sa.Text(), nullable=True, comment='Two letter US state abbreviation.'), + sa.Column('county', sa.Text(), nullable=True, comment='County name.'), + sa.Column('state_id_fips', sa.Text(), nullable=True, comment='Two digit state FIPS code.'), + sa.PrimaryKeyConstraint('utility_id_eia', 'report_date', 'county_id_fips', name=op.f('pk_compiled_geometry_utility_eia861')) ) op.create_table('contract_types_eia', sa.Column('code', sa.Text(), nullable=False, comment='Originally reported short code.'), sa.Column('label', sa.Text(), nullable=True, comment='Longer human-readable code using snake_case'), sa.Column('description', sa.Text(), nullable=True, comment='Long human-readable description of the meaning of a code/label.'), - sa.PrimaryKeyConstraint('code') + sa.PrimaryKeyConstraint('code', name=op.f('pk_contract_types_eia')) ) op.create_table('data_maturities', sa.Column('code', sa.Text(), nullable=False, comment='Originally reported short code.'), sa.Column('description', sa.Text(), nullable=True, comment='Long human-readable description of the meaning of a code/label.'), - sa.PrimaryKeyConstraint('code') + sa.PrimaryKeyConstraint('code', name=op.f('pk_data_maturities')) ) op.create_table('datasources', sa.Column('datasource', sa.Enum('censusdp1tract', 'eia176', 'eia860', 'eia860m', 'eia861', 'eia923', 'eia_bulk_elec', 'eiawater', 'epacems', 'epacamd_eia', 'ferc1', 'ferc2', 'ferc6', 'ferc60', 'ferc714', 'ferceqr', 'mshamines', 'phmsagas', 'pudl'), nullable=False, comment='Code identifying a dataset available within PUDL.'), sa.Column('partitions', sa.Text(), nullable=True, comment='The data parititions used to generate this instance of the database.'), sa.Column('doi', sa.Text(), nullable=True, comment='Unique digitial object identifier of Zenodo archive.'), sa.Column('pudl_version', sa.Text(), nullable=True, comment='The version of PUDL used to generate this database.'), - sa.PrimaryKeyConstraint('datasource') + sa.PrimaryKeyConstraint('datasource', name=op.f('pk_datasources')) ) op.create_table('emission_control_equipment_types_eia', sa.Column('code', sa.Text(), nullable=False, comment='Originally reported short code.'), sa.Column('label', sa.Text(), nullable=True, comment='Longer human-readable code using snake_case'), sa.Column('description', sa.Text(), nullable=True, comment='Long human-readable description of the meaning of a code/label.'), - sa.PrimaryKeyConstraint('code') + sa.PrimaryKeyConstraint('code', name=op.f('pk_emission_control_equipment_types_eia')) ) op.create_table('energy_sources_eia', sa.Column('code', sa.Text(), nullable=False, comment='Originally reported short code.'), @@ -103,13 +127,13 @@ def upgrade() -> None: sa.Column('fuel_phase', sa.Enum('gas', 'liquid', 'solid'), nullable=True, comment='Physical phase of matter of the fuel.'), sa.Column('fuel_type_code_pudl', sa.Enum('coal', 'gas', 'hydro', 'nuclear', 'oil', 'other', 'solar', 'waste', 'wind'), nullable=True, comment='Simplified fuel type code used in PUDL'), sa.Column('description', sa.Text(), nullable=True, comment='Long human-readable description of the meaning of a code/label.'), - sa.PrimaryKeyConstraint('code') + sa.PrimaryKeyConstraint('code', name=op.f('pk_energy_sources_eia')) ) op.create_table('environmental_equipment_manufacturers_eia', sa.Column('code', sa.Text(), nullable=False, comment='Originally reported short code.'), sa.Column('label', sa.Text(), nullable=True, comment='Longer human-readable code using snake_case'), sa.Column('description', sa.Text(), nullable=True, comment='Long human-readable description of the meaning of a code/label.'), - sa.PrimaryKeyConstraint('code') + sa.PrimaryKeyConstraint('code', name=op.f('pk_environmental_equipment_manufacturers_eia')) ) op.create_table('epacamd_eia_subplant_ids', sa.Column('plant_id_eia', sa.Integer(), nullable=True, comment='The unique six-digit facility identification number, also called an ORISPL, assigned by the Energy Information Administration.'), @@ -122,13 +146,13 @@ def upgrade() -> None: op.create_table('ferc_accounts', sa.Column('ferc_account_id', sa.Text(), nullable=False, comment="Account identifier from FERC's Uniform System of Accounts for Electric Plant. Includes higher level labeled categories."), sa.Column('ferc_account_description', sa.Text(), nullable=True), - sa.PrimaryKeyConstraint('ferc_account_id') + sa.PrimaryKeyConstraint('ferc_account_id', name=op.f('pk_ferc_accounts')) ) op.create_table('firing_types_eia', sa.Column('code', sa.Text(), nullable=False, comment='Originally reported short code.'), sa.Column('label', sa.Text(), nullable=True, comment='Longer human-readable code using snake_case'), sa.Column('description', sa.Text(), nullable=True, comment='Long human-readable description of the meaning of a code/label.'), - sa.PrimaryKeyConstraint('code') + sa.PrimaryKeyConstraint('code', name=op.f('pk_firing_types_eia')) ) op.create_table('fuel_receipts_costs_aggs_eia', sa.Column('fuel_agg', sa.Text(), nullable=False, comment='Category of fuel aggregation in EIA bulk electricity data.'), @@ -138,67 +162,67 @@ def upgrade() -> None: sa.Column('report_date', sa.Date(), nullable=False, comment='Date reported.'), sa.Column('fuel_received_mmbtu', sa.Float(), nullable=True, comment='Aggregated fuel receipts, in MMBtu, in EIA bulk electricity data.'), sa.Column('fuel_cost_per_mmbtu', sa.Float(), nullable=True, comment='Average fuel cost per mmBTU of heat content in nominal USD.'), - sa.PrimaryKeyConstraint('fuel_agg', 'geo_agg', 'sector_agg', 'temporal_agg', 'report_date') + sa.PrimaryKeyConstraint('fuel_agg', 'geo_agg', 'sector_agg', 'temporal_agg', 'report_date', name=op.f('pk_fuel_receipts_costs_aggs_eia')) ) op.create_table('fuel_transportation_modes_eia', sa.Column('code', sa.Text(), nullable=False, comment='Originally reported short code.'), sa.Column('label', sa.Text(), nullable=True, comment='Longer human-readable code using snake_case'), sa.Column('description', sa.Text(), nullable=True, comment='Long human-readable description of the meaning of a code/label.'), - sa.PrimaryKeyConstraint('code') + sa.PrimaryKeyConstraint('code', name=op.f('pk_fuel_transportation_modes_eia')) ) op.create_table('fuel_types_aer_eia', sa.Column('code', sa.Text(), nullable=False, comment='Originally reported short code.'), sa.Column('description', sa.Text(), nullable=True, comment='Long human-readable description of the meaning of a code/label.'), - sa.PrimaryKeyConstraint('code') + sa.PrimaryKeyConstraint('code', name=op.f('pk_fuel_types_aer_eia')) ) op.create_table('mercury_compliance_strategies_eia', sa.Column('code', sa.Text(), nullable=False, comment='Originally reported short code.'), sa.Column('label', sa.Text(), nullable=True, comment='Longer human-readable code using snake_case'), sa.Column('description', sa.Text(), nullable=True, comment='Long human-readable description of the meaning of a code/label.'), - sa.PrimaryKeyConstraint('code') + sa.PrimaryKeyConstraint('code', name=op.f('pk_mercury_compliance_strategies_eia')) ) op.create_table('momentary_interruptions_eia', sa.Column('code', sa.Text(), nullable=False, comment='Originally reported short code.'), sa.Column('label', sa.Text(), nullable=True, comment='Longer human-readable code using snake_case'), sa.Column('description', sa.Text(), nullable=True, comment='Long human-readable description of the meaning of a code/label.'), - sa.PrimaryKeyConstraint('code') + sa.PrimaryKeyConstraint('code', name=op.f('pk_momentary_interruptions_eia')) ) op.create_table('nox_compliance_strategies_eia', sa.Column('code', sa.Text(), nullable=False, comment='Originally reported short code.'), sa.Column('label', sa.Text(), nullable=True, comment='Longer human-readable code using snake_case'), sa.Column('description', sa.Text(), nullable=True, comment='Long human-readable description of the meaning of a code/label.'), - sa.PrimaryKeyConstraint('code') + sa.PrimaryKeyConstraint('code', name=op.f('pk_nox_compliance_strategies_eia')) ) op.create_table('nox_control_status_eia', sa.Column('code', sa.Text(), nullable=False, comment='Originally reported short code.'), sa.Column('label', sa.Text(), nullable=True, comment='Longer human-readable code using snake_case'), sa.Column('description', sa.Text(), nullable=True, comment='Long human-readable description of the meaning of a code/label.'), - sa.PrimaryKeyConstraint('code') + sa.PrimaryKeyConstraint('code', name=op.f('pk_nox_control_status_eia')) ) op.create_table('nox_units_eia', sa.Column('code', sa.Text(), nullable=False, comment='Originally reported short code.'), sa.Column('label', sa.Text(), nullable=True, comment='Longer human-readable code using snake_case'), sa.Column('description', sa.Text(), nullable=True, comment='Long human-readable description of the meaning of a code/label.'), - sa.PrimaryKeyConstraint('code') + sa.PrimaryKeyConstraint('code', name=op.f('pk_nox_units_eia')) ) op.create_table('operational_status_eia', sa.Column('code', sa.Text(), nullable=False, comment='Originally reported short code.'), sa.Column('label', sa.Text(), nullable=True, comment='Longer human-readable code using snake_case'), sa.Column('description', sa.Text(), nullable=True, comment='Long human-readable description of the meaning of a code/label.'), sa.Column('operational_status', sa.Text(), nullable=True, comment='The operating status of the asset. For generators this is based on which tab the generator was listed in in EIA 860.'), - sa.PrimaryKeyConstraint('code') + sa.PrimaryKeyConstraint('code', name=op.f('pk_operational_status_eia')) ) op.create_table('particulate_compliance_strategies_eia', sa.Column('code', sa.Text(), nullable=False, comment='Originally reported short code.'), sa.Column('label', sa.Text(), nullable=True, comment='Longer human-readable code using snake_case'), sa.Column('description', sa.Text(), nullable=True, comment='Long human-readable description of the meaning of a code/label.'), - sa.PrimaryKeyConstraint('code') + sa.PrimaryKeyConstraint('code', name=op.f('pk_particulate_compliance_strategies_eia')) ) op.create_table('particulate_units_eia', sa.Column('code', sa.Text(), nullable=False, comment='Originally reported short code.'), sa.Column('label', sa.Text(), nullable=True, comment='Longer human-readable code using snake_case'), sa.Column('description', sa.Text(), nullable=True, comment='Long human-readable description of the meaning of a code/label.'), - sa.PrimaryKeyConstraint('code') + sa.PrimaryKeyConstraint('code', name=op.f('pk_particulate_units_eia')) ) op.create_table('plants_entity_eia', sa.Column('plant_id_eia', sa.Integer(), nullable=False, comment='The unique six-digit facility identification number, also called an ORISPL, assigned by the Energy Information Administration.'), @@ -211,102 +235,109 @@ def upgrade() -> None: sa.Column('street_address', sa.Text(), nullable=True), sa.Column('zip_code', sa.Text(), nullable=True, comment='Five digit US Zip Code.'), sa.Column('timezone', sa.Enum('Africa/Abidjan', 'Africa/Accra', 'Africa/Addis_Ababa', 'Africa/Algiers', 'Africa/Asmara', 'Africa/Asmera', 'Africa/Bamako', 'Africa/Bangui', 'Africa/Banjul', 'Africa/Bissau', 'Africa/Blantyre', 'Africa/Brazzaville', 'Africa/Bujumbura', 'Africa/Cairo', 'Africa/Casablanca', 'Africa/Ceuta', 'Africa/Conakry', 'Africa/Dakar', 'Africa/Dar_es_Salaam', 'Africa/Djibouti', 'Africa/Douala', 'Africa/El_Aaiun', 'Africa/Freetown', 'Africa/Gaborone', 'Africa/Harare', 'Africa/Johannesburg', 'Africa/Juba', 'Africa/Kampala', 'Africa/Khartoum', 'Africa/Kigali', 'Africa/Kinshasa', 'Africa/Lagos', 'Africa/Libreville', 'Africa/Lome', 'Africa/Luanda', 'Africa/Lubumbashi', 'Africa/Lusaka', 'Africa/Malabo', 'Africa/Maputo', 'Africa/Maseru', 'Africa/Mbabane', 'Africa/Mogadishu', 'Africa/Monrovia', 'Africa/Nairobi', 'Africa/Ndjamena', 'Africa/Niamey', 'Africa/Nouakchott', 'Africa/Ouagadougou', 'Africa/Porto-Novo', 'Africa/Sao_Tome', 'Africa/Timbuktu', 'Africa/Tripoli', 'Africa/Tunis', 'Africa/Windhoek', 'America/Adak', 'America/Anchorage', 'America/Anguilla', 'America/Antigua', 'America/Araguaina', 'America/Argentina/Buenos_Aires', 'America/Argentina/Catamarca', 'America/Argentina/ComodRivadavia', 'America/Argentina/Cordoba', 'America/Argentina/Jujuy', 'America/Argentina/La_Rioja', 'America/Argentina/Mendoza', 'America/Argentina/Rio_Gallegos', 'America/Argentina/Salta', 'America/Argentina/San_Juan', 'America/Argentina/San_Luis', 'America/Argentina/Tucuman', 'America/Argentina/Ushuaia', 'America/Aruba', 'America/Asuncion', 'America/Atikokan', 'America/Atka', 'America/Bahia', 'America/Bahia_Banderas', 'America/Barbados', 'America/Belem', 'America/Belize', 'America/Blanc-Sablon', 'America/Boa_Vista', 'America/Bogota', 'America/Boise', 'America/Buenos_Aires', 'America/Cambridge_Bay', 'America/Campo_Grande', 'America/Cancun', 'America/Caracas', 'America/Catamarca', 'America/Cayenne', 'America/Cayman', 'America/Chicago', 'America/Chihuahua', 'America/Ciudad_Juarez', 'America/Coral_Harbour', 'America/Cordoba', 'America/Costa_Rica', 'America/Creston', 'America/Cuiaba', 'America/Curacao', 'America/Danmarkshavn', 'America/Dawson', 'America/Dawson_Creek', 'America/Denver', 'America/Detroit', 'America/Dominica', 'America/Edmonton', 'America/Eirunepe', 'America/El_Salvador', 'America/Ensenada', 'America/Fort_Nelson', 'America/Fort_Wayne', 'America/Fortaleza', 'America/Glace_Bay', 'America/Godthab', 'America/Goose_Bay', 'America/Grand_Turk', 'America/Grenada', 'America/Guadeloupe', 'America/Guatemala', 'America/Guayaquil', 'America/Guyana', 'America/Halifax', 'America/Havana', 'America/Hermosillo', 'America/Indiana/Indianapolis', 'America/Indiana/Knox', 'America/Indiana/Marengo', 'America/Indiana/Petersburg', 'America/Indiana/Tell_City', 'America/Indiana/Vevay', 'America/Indiana/Vincennes', 'America/Indiana/Winamac', 'America/Indianapolis', 'America/Inuvik', 'America/Iqaluit', 'America/Jamaica', 'America/Jujuy', 'America/Juneau', 'America/Kentucky/Louisville', 'America/Kentucky/Monticello', 'America/Knox_IN', 'America/Kralendijk', 'America/La_Paz', 'America/Lima', 'America/Los_Angeles', 'America/Louisville', 'America/Lower_Princes', 'America/Maceio', 'America/Managua', 'America/Manaus', 'America/Marigot', 'America/Martinique', 'America/Matamoros', 'America/Mazatlan', 'America/Mendoza', 'America/Menominee', 'America/Merida', 'America/Metlakatla', 'America/Mexico_City', 'America/Miquelon', 'America/Moncton', 'America/Monterrey', 'America/Montevideo', 'America/Montreal', 'America/Montserrat', 'America/Nassau', 'America/New_York', 'America/Nipigon', 'America/Nome', 'America/Noronha', 'America/North_Dakota/Beulah', 'America/North_Dakota/Center', 'America/North_Dakota/New_Salem', 'America/Nuuk', 'America/Ojinaga', 'America/Panama', 'America/Pangnirtung', 'America/Paramaribo', 'America/Phoenix', 'America/Port-au-Prince', 'America/Port_of_Spain', 'America/Porto_Acre', 'America/Porto_Velho', 'America/Puerto_Rico', 'America/Punta_Arenas', 'America/Rainy_River', 'America/Rankin_Inlet', 'America/Recife', 'America/Regina', 'America/Resolute', 'America/Rio_Branco', 'America/Rosario', 'America/Santa_Isabel', 'America/Santarem', 'America/Santiago', 'America/Santo_Domingo', 'America/Sao_Paulo', 'America/Scoresbysund', 'America/Shiprock', 'America/Sitka', 'America/St_Barthelemy', 'America/St_Johns', 'America/St_Kitts', 'America/St_Lucia', 'America/St_Thomas', 'America/St_Vincent', 'America/Swift_Current', 'America/Tegucigalpa', 'America/Thule', 'America/Thunder_Bay', 'America/Tijuana', 'America/Toronto', 'America/Tortola', 'America/Vancouver', 'America/Virgin', 'America/Whitehorse', 'America/Winnipeg', 'America/Yakutat', 'America/Yellowknife', 'Antarctica/Casey', 'Antarctica/Davis', 'Antarctica/DumontDUrville', 'Antarctica/Macquarie', 'Antarctica/Mawson', 'Antarctica/McMurdo', 'Antarctica/Palmer', 'Antarctica/Rothera', 'Antarctica/South_Pole', 'Antarctica/Syowa', 'Antarctica/Troll', 'Antarctica/Vostok', 'Arctic/Longyearbyen', 'Asia/Aden', 'Asia/Almaty', 'Asia/Amman', 'Asia/Anadyr', 'Asia/Aqtau', 'Asia/Aqtobe', 'Asia/Ashgabat', 'Asia/Ashkhabad', 'Asia/Atyrau', 'Asia/Baghdad', 'Asia/Bahrain', 'Asia/Baku', 'Asia/Bangkok', 'Asia/Barnaul', 'Asia/Beirut', 'Asia/Bishkek', 'Asia/Brunei', 'Asia/Calcutta', 'Asia/Chita', 'Asia/Choibalsan', 'Asia/Chongqing', 'Asia/Chungking', 'Asia/Colombo', 'Asia/Dacca', 'Asia/Damascus', 'Asia/Dhaka', 'Asia/Dili', 'Asia/Dubai', 'Asia/Dushanbe', 'Asia/Famagusta', 'Asia/Gaza', 'Asia/Harbin', 'Asia/Hebron', 'Asia/Ho_Chi_Minh', 'Asia/Hong_Kong', 'Asia/Hovd', 'Asia/Irkutsk', 'Asia/Istanbul', 'Asia/Jakarta', 'Asia/Jayapura', 'Asia/Jerusalem', 'Asia/Kabul', 'Asia/Kamchatka', 'Asia/Karachi', 'Asia/Kashgar', 'Asia/Kathmandu', 'Asia/Katmandu', 'Asia/Khandyga', 'Asia/Kolkata', 'Asia/Krasnoyarsk', 'Asia/Kuala_Lumpur', 'Asia/Kuching', 'Asia/Kuwait', 'Asia/Macao', 'Asia/Macau', 'Asia/Magadan', 'Asia/Makassar', 'Asia/Manila', 'Asia/Muscat', 'Asia/Nicosia', 'Asia/Novokuznetsk', 'Asia/Novosibirsk', 'Asia/Omsk', 'Asia/Oral', 'Asia/Phnom_Penh', 'Asia/Pontianak', 'Asia/Pyongyang', 'Asia/Qatar', 'Asia/Qostanay', 'Asia/Qyzylorda', 'Asia/Rangoon', 'Asia/Riyadh', 'Asia/Saigon', 'Asia/Sakhalin', 'Asia/Samarkand', 'Asia/Seoul', 'Asia/Shanghai', 'Asia/Singapore', 'Asia/Srednekolymsk', 'Asia/Taipei', 'Asia/Tashkent', 'Asia/Tbilisi', 'Asia/Tehran', 'Asia/Tel_Aviv', 'Asia/Thimbu', 'Asia/Thimphu', 'Asia/Tokyo', 'Asia/Tomsk', 'Asia/Ujung_Pandang', 'Asia/Ulaanbaatar', 'Asia/Ulan_Bator', 'Asia/Urumqi', 'Asia/Ust-Nera', 'Asia/Vientiane', 'Asia/Vladivostok', 'Asia/Yakutsk', 'Asia/Yangon', 'Asia/Yekaterinburg', 'Asia/Yerevan', 'Atlantic/Azores', 'Atlantic/Bermuda', 'Atlantic/Canary', 'Atlantic/Cape_Verde', 'Atlantic/Faeroe', 'Atlantic/Faroe', 'Atlantic/Jan_Mayen', 'Atlantic/Madeira', 'Atlantic/Reykjavik', 'Atlantic/South_Georgia', 'Atlantic/St_Helena', 'Atlantic/Stanley', 'Australia/ACT', 'Australia/Adelaide', 'Australia/Brisbane', 'Australia/Broken_Hill', 'Australia/Canberra', 'Australia/Currie', 'Australia/Darwin', 'Australia/Eucla', 'Australia/Hobart', 'Australia/LHI', 'Australia/Lindeman', 'Australia/Lord_Howe', 'Australia/Melbourne', 'Australia/NSW', 'Australia/North', 'Australia/Perth', 'Australia/Queensland', 'Australia/South', 'Australia/Sydney', 'Australia/Tasmania', 'Australia/Victoria', 'Australia/West', 'Australia/Yancowinna', 'Brazil/Acre', 'Brazil/DeNoronha', 'Brazil/East', 'Brazil/West', 'CET', 'CST6CDT', 'Canada/Atlantic', 'Canada/Central', 'Canada/Eastern', 'Canada/Mountain', 'Canada/Newfoundland', 'Canada/Pacific', 'Canada/Saskatchewan', 'Canada/Yukon', 'Chile/Continental', 'Chile/EasterIsland', 'Cuba', 'EET', 'EST', 'EST5EDT', 'Egypt', 'Eire', 'Etc/GMT', 'Etc/GMT+0', 'Etc/GMT+1', 'Etc/GMT+10', 'Etc/GMT+11', 'Etc/GMT+12', 'Etc/GMT+2', 'Etc/GMT+3', 'Etc/GMT+4', 'Etc/GMT+5', 'Etc/GMT+6', 'Etc/GMT+7', 'Etc/GMT+8', 'Etc/GMT+9', 'Etc/GMT-0', 'Etc/GMT-1', 'Etc/GMT-10', 'Etc/GMT-11', 'Etc/GMT-12', 'Etc/GMT-13', 'Etc/GMT-14', 'Etc/GMT-2', 'Etc/GMT-3', 'Etc/GMT-4', 'Etc/GMT-5', 'Etc/GMT-6', 'Etc/GMT-7', 'Etc/GMT-8', 'Etc/GMT-9', 'Etc/GMT0', 'Etc/Greenwich', 'Etc/UCT', 'Etc/UTC', 'Etc/Universal', 'Etc/Zulu', 'Europe/Amsterdam', 'Europe/Andorra', 'Europe/Astrakhan', 'Europe/Athens', 'Europe/Belfast', 'Europe/Belgrade', 'Europe/Berlin', 'Europe/Bratislava', 'Europe/Brussels', 'Europe/Bucharest', 'Europe/Budapest', 'Europe/Busingen', 'Europe/Chisinau', 'Europe/Copenhagen', 'Europe/Dublin', 'Europe/Gibraltar', 'Europe/Guernsey', 'Europe/Helsinki', 'Europe/Isle_of_Man', 'Europe/Istanbul', 'Europe/Jersey', 'Europe/Kaliningrad', 'Europe/Kiev', 'Europe/Kirov', 'Europe/Kyiv', 'Europe/Lisbon', 'Europe/Ljubljana', 'Europe/London', 'Europe/Luxembourg', 'Europe/Madrid', 'Europe/Malta', 'Europe/Mariehamn', 'Europe/Minsk', 'Europe/Monaco', 'Europe/Moscow', 'Europe/Nicosia', 'Europe/Oslo', 'Europe/Paris', 'Europe/Podgorica', 'Europe/Prague', 'Europe/Riga', 'Europe/Rome', 'Europe/Samara', 'Europe/San_Marino', 'Europe/Sarajevo', 'Europe/Saratov', 'Europe/Simferopol', 'Europe/Skopje', 'Europe/Sofia', 'Europe/Stockholm', 'Europe/Tallinn', 'Europe/Tirane', 'Europe/Tiraspol', 'Europe/Ulyanovsk', 'Europe/Uzhgorod', 'Europe/Vaduz', 'Europe/Vatican', 'Europe/Vienna', 'Europe/Vilnius', 'Europe/Volgograd', 'Europe/Warsaw', 'Europe/Zagreb', 'Europe/Zaporozhye', 'Europe/Zurich', 'GB', 'GB-Eire', 'GMT', 'GMT+0', 'GMT-0', 'GMT0', 'Greenwich', 'HST', 'Hongkong', 'Iceland', 'Indian/Antananarivo', 'Indian/Chagos', 'Indian/Christmas', 'Indian/Cocos', 'Indian/Comoro', 'Indian/Kerguelen', 'Indian/Mahe', 'Indian/Maldives', 'Indian/Mauritius', 'Indian/Mayotte', 'Indian/Reunion', 'Iran', 'Israel', 'Jamaica', 'Japan', 'Kwajalein', 'Libya', 'MET', 'MST', 'MST7MDT', 'Mexico/BajaNorte', 'Mexico/BajaSur', 'Mexico/General', 'NZ', 'NZ-CHAT', 'Navajo', 'PRC', 'PST8PDT', 'Pacific/Apia', 'Pacific/Auckland', 'Pacific/Bougainville', 'Pacific/Chatham', 'Pacific/Chuuk', 'Pacific/Easter', 'Pacific/Efate', 'Pacific/Enderbury', 'Pacific/Fakaofo', 'Pacific/Fiji', 'Pacific/Funafuti', 'Pacific/Galapagos', 'Pacific/Gambier', 'Pacific/Guadalcanal', 'Pacific/Guam', 'Pacific/Honolulu', 'Pacific/Johnston', 'Pacific/Kanton', 'Pacific/Kiritimati', 'Pacific/Kosrae', 'Pacific/Kwajalein', 'Pacific/Majuro', 'Pacific/Marquesas', 'Pacific/Midway', 'Pacific/Nauru', 'Pacific/Niue', 'Pacific/Norfolk', 'Pacific/Noumea', 'Pacific/Pago_Pago', 'Pacific/Palau', 'Pacific/Pitcairn', 'Pacific/Pohnpei', 'Pacific/Ponape', 'Pacific/Port_Moresby', 'Pacific/Rarotonga', 'Pacific/Saipan', 'Pacific/Samoa', 'Pacific/Tahiti', 'Pacific/Tarawa', 'Pacific/Tongatapu', 'Pacific/Truk', 'Pacific/Wake', 'Pacific/Wallis', 'Pacific/Yap', 'Poland', 'Portugal', 'ROC', 'ROK', 'Singapore', 'Turkey', 'UCT', 'US/Alaska', 'US/Aleutian', 'US/Arizona', 'US/Central', 'US/East-Indiana', 'US/Eastern', 'US/Hawaii', 'US/Indiana-Starke', 'US/Michigan', 'US/Mountain', 'US/Pacific', 'US/Samoa', 'UTC', 'Universal', 'W-SU', 'WET', 'Zulu'), nullable=True, comment='IANA timezone name'), - sa.PrimaryKeyConstraint('plant_id_eia') + sa.PrimaryKeyConstraint('plant_id_eia', name=op.f('pk_plants_entity_eia')) ) op.create_table('plants_pudl', sa.Column('plant_id_pudl', sa.Integer(), nullable=False, comment='A manually assigned PUDL plant ID. May not be constant over time.'), sa.Column('plant_name_pudl', sa.Text(), nullable=True, comment='Plant name, chosen arbitrarily from the several possible plant names available in the plant matching process. Included for human readability only.'), - sa.PrimaryKeyConstraint('plant_id_pudl') + sa.PrimaryKeyConstraint('plant_id_pudl', name=op.f('pk_plants_pudl')) ) op.create_table('political_subdivisions', sa.Column('country_code', sa.Enum('USA', 'CAN'), nullable=False, comment='Three letter ISO-3166 country code (e.g. USA or CAN).'), sa.Column('country_name', sa.Text(), nullable=True, comment='Full country name (e.g. United States of America).'), - sa.Column('subdivision_code', sa.Enum('VA', 'VI', 'SK', 'PR', 'MT', 'DC', 'VT', 'KY', 'NY', 'KS', 'ME', 'ON', 'WY', 'NU', 'MN', 'WV', 'AL', 'LA', 'MP', 'QC', 'SC', 'SD', 'NE', 'TN', 'NM', 'YT', 'OH', 'AB', 'MO', 'NC', 'NS', 'CA', 'MD', 'NB', 'FL', 'BC', 'ID', 'IN', 'NL', 'GA', 'OK', 'UT', 'IA', 'MI', 'NH', 'NV', 'IL', 'NJ', 'AZ', 'MS', 'PE', 'MA', 'AK', 'CO', 'RI', 'GU', 'ND', 'PA', 'TX', 'HI', 'WI', 'NT', 'OR', 'DE', 'WA', 'MB', 'AS', 'AR', 'CT'), nullable=False, comment='Two-letter ISO-3166 political subdivision code (e.g. US state or Canadian provice abbreviations like CA or AB).'), + sa.Column('subdivision_code', sa.Enum('GU', 'WI', 'AZ', 'RI', 'MO', 'SC', 'KS', 'PE', 'UT', 'PA', 'ME', 'NH', 'SD', 'LA', 'NJ', 'DC', 'FL', 'IL', 'MS', 'ND', 'BC', 'NL', 'NU', 'OH', 'CT', 'NC', 'VI', 'SK', 'NY', 'NV', 'NE', 'IA', 'AK', 'HI', 'ON', 'TN', 'VA', 'AR', 'DE', 'AS', 'NS', 'GA', 'TX', 'MA', 'OR', 'QC', 'MT', 'WV', 'IN', 'MP', 'MB', 'MI', 'KY', 'ID', 'WY', 'NM', 'NB', 'AL', 'MD', 'AB', 'WA', 'YT', 'CA', 'CO', 'MN', 'OK', 'PR', 'NT', 'VT'), nullable=False, comment='Two-letter ISO-3166 political subdivision code (e.g. US state or Canadian provice abbreviations like CA or AB).'), sa.Column('subdivision_name', sa.Text(), nullable=True, comment='Full name of political subdivision (e.g. US state or Canadian province names like California or Alberta.'), sa.Column('subdivision_type', sa.Text(), nullable=True, comment='ISO-3166 political subdivision type. E.g. state, province, outlying_area.'), sa.Column('timezone_approx', sa.Enum('Africa/Abidjan', 'Africa/Accra', 'Africa/Addis_Ababa', 'Africa/Algiers', 'Africa/Asmara', 'Africa/Asmera', 'Africa/Bamako', 'Africa/Bangui', 'Africa/Banjul', 'Africa/Bissau', 'Africa/Blantyre', 'Africa/Brazzaville', 'Africa/Bujumbura', 'Africa/Cairo', 'Africa/Casablanca', 'Africa/Ceuta', 'Africa/Conakry', 'Africa/Dakar', 'Africa/Dar_es_Salaam', 'Africa/Djibouti', 'Africa/Douala', 'Africa/El_Aaiun', 'Africa/Freetown', 'Africa/Gaborone', 'Africa/Harare', 'Africa/Johannesburg', 'Africa/Juba', 'Africa/Kampala', 'Africa/Khartoum', 'Africa/Kigali', 'Africa/Kinshasa', 'Africa/Lagos', 'Africa/Libreville', 'Africa/Lome', 'Africa/Luanda', 'Africa/Lubumbashi', 'Africa/Lusaka', 'Africa/Malabo', 'Africa/Maputo', 'Africa/Maseru', 'Africa/Mbabane', 'Africa/Mogadishu', 'Africa/Monrovia', 'Africa/Nairobi', 'Africa/Ndjamena', 'Africa/Niamey', 'Africa/Nouakchott', 'Africa/Ouagadougou', 'Africa/Porto-Novo', 'Africa/Sao_Tome', 'Africa/Timbuktu', 'Africa/Tripoli', 'Africa/Tunis', 'Africa/Windhoek', 'America/Adak', 'America/Anchorage', 'America/Anguilla', 'America/Antigua', 'America/Araguaina', 'America/Argentina/Buenos_Aires', 'America/Argentina/Catamarca', 'America/Argentina/ComodRivadavia', 'America/Argentina/Cordoba', 'America/Argentina/Jujuy', 'America/Argentina/La_Rioja', 'America/Argentina/Mendoza', 'America/Argentina/Rio_Gallegos', 'America/Argentina/Salta', 'America/Argentina/San_Juan', 'America/Argentina/San_Luis', 'America/Argentina/Tucuman', 'America/Argentina/Ushuaia', 'America/Aruba', 'America/Asuncion', 'America/Atikokan', 'America/Atka', 'America/Bahia', 'America/Bahia_Banderas', 'America/Barbados', 'America/Belem', 'America/Belize', 'America/Blanc-Sablon', 'America/Boa_Vista', 'America/Bogota', 'America/Boise', 'America/Buenos_Aires', 'America/Cambridge_Bay', 'America/Campo_Grande', 'America/Cancun', 'America/Caracas', 'America/Catamarca', 'America/Cayenne', 'America/Cayman', 'America/Chicago', 'America/Chihuahua', 'America/Ciudad_Juarez', 'America/Coral_Harbour', 'America/Cordoba', 'America/Costa_Rica', 'America/Creston', 'America/Cuiaba', 'America/Curacao', 'America/Danmarkshavn', 'America/Dawson', 'America/Dawson_Creek', 'America/Denver', 'America/Detroit', 'America/Dominica', 'America/Edmonton', 'America/Eirunepe', 'America/El_Salvador', 'America/Ensenada', 'America/Fort_Nelson', 'America/Fort_Wayne', 'America/Fortaleza', 'America/Glace_Bay', 'America/Godthab', 'America/Goose_Bay', 'America/Grand_Turk', 'America/Grenada', 'America/Guadeloupe', 'America/Guatemala', 'America/Guayaquil', 'America/Guyana', 'America/Halifax', 'America/Havana', 'America/Hermosillo', 'America/Indiana/Indianapolis', 'America/Indiana/Knox', 'America/Indiana/Marengo', 'America/Indiana/Petersburg', 'America/Indiana/Tell_City', 'America/Indiana/Vevay', 'America/Indiana/Vincennes', 'America/Indiana/Winamac', 'America/Indianapolis', 'America/Inuvik', 'America/Iqaluit', 'America/Jamaica', 'America/Jujuy', 'America/Juneau', 'America/Kentucky/Louisville', 'America/Kentucky/Monticello', 'America/Knox_IN', 'America/Kralendijk', 'America/La_Paz', 'America/Lima', 'America/Los_Angeles', 'America/Louisville', 'America/Lower_Princes', 'America/Maceio', 'America/Managua', 'America/Manaus', 'America/Marigot', 'America/Martinique', 'America/Matamoros', 'America/Mazatlan', 'America/Mendoza', 'America/Menominee', 'America/Merida', 'America/Metlakatla', 'America/Mexico_City', 'America/Miquelon', 'America/Moncton', 'America/Monterrey', 'America/Montevideo', 'America/Montreal', 'America/Montserrat', 'America/Nassau', 'America/New_York', 'America/Nipigon', 'America/Nome', 'America/Noronha', 'America/North_Dakota/Beulah', 'America/North_Dakota/Center', 'America/North_Dakota/New_Salem', 'America/Nuuk', 'America/Ojinaga', 'America/Panama', 'America/Pangnirtung', 'America/Paramaribo', 'America/Phoenix', 'America/Port-au-Prince', 'America/Port_of_Spain', 'America/Porto_Acre', 'America/Porto_Velho', 'America/Puerto_Rico', 'America/Punta_Arenas', 'America/Rainy_River', 'America/Rankin_Inlet', 'America/Recife', 'America/Regina', 'America/Resolute', 'America/Rio_Branco', 'America/Rosario', 'America/Santa_Isabel', 'America/Santarem', 'America/Santiago', 'America/Santo_Domingo', 'America/Sao_Paulo', 'America/Scoresbysund', 'America/Shiprock', 'America/Sitka', 'America/St_Barthelemy', 'America/St_Johns', 'America/St_Kitts', 'America/St_Lucia', 'America/St_Thomas', 'America/St_Vincent', 'America/Swift_Current', 'America/Tegucigalpa', 'America/Thule', 'America/Thunder_Bay', 'America/Tijuana', 'America/Toronto', 'America/Tortola', 'America/Vancouver', 'America/Virgin', 'America/Whitehorse', 'America/Winnipeg', 'America/Yakutat', 'America/Yellowknife', 'Antarctica/Casey', 'Antarctica/Davis', 'Antarctica/DumontDUrville', 'Antarctica/Macquarie', 'Antarctica/Mawson', 'Antarctica/McMurdo', 'Antarctica/Palmer', 'Antarctica/Rothera', 'Antarctica/South_Pole', 'Antarctica/Syowa', 'Antarctica/Troll', 'Antarctica/Vostok', 'Arctic/Longyearbyen', 'Asia/Aden', 'Asia/Almaty', 'Asia/Amman', 'Asia/Anadyr', 'Asia/Aqtau', 'Asia/Aqtobe', 'Asia/Ashgabat', 'Asia/Ashkhabad', 'Asia/Atyrau', 'Asia/Baghdad', 'Asia/Bahrain', 'Asia/Baku', 'Asia/Bangkok', 'Asia/Barnaul', 'Asia/Beirut', 'Asia/Bishkek', 'Asia/Brunei', 'Asia/Calcutta', 'Asia/Chita', 'Asia/Choibalsan', 'Asia/Chongqing', 'Asia/Chungking', 'Asia/Colombo', 'Asia/Dacca', 'Asia/Damascus', 'Asia/Dhaka', 'Asia/Dili', 'Asia/Dubai', 'Asia/Dushanbe', 'Asia/Famagusta', 'Asia/Gaza', 'Asia/Harbin', 'Asia/Hebron', 'Asia/Ho_Chi_Minh', 'Asia/Hong_Kong', 'Asia/Hovd', 'Asia/Irkutsk', 'Asia/Istanbul', 'Asia/Jakarta', 'Asia/Jayapura', 'Asia/Jerusalem', 'Asia/Kabul', 'Asia/Kamchatka', 'Asia/Karachi', 'Asia/Kashgar', 'Asia/Kathmandu', 'Asia/Katmandu', 'Asia/Khandyga', 'Asia/Kolkata', 'Asia/Krasnoyarsk', 'Asia/Kuala_Lumpur', 'Asia/Kuching', 'Asia/Kuwait', 'Asia/Macao', 'Asia/Macau', 'Asia/Magadan', 'Asia/Makassar', 'Asia/Manila', 'Asia/Muscat', 'Asia/Nicosia', 'Asia/Novokuznetsk', 'Asia/Novosibirsk', 'Asia/Omsk', 'Asia/Oral', 'Asia/Phnom_Penh', 'Asia/Pontianak', 'Asia/Pyongyang', 'Asia/Qatar', 'Asia/Qostanay', 'Asia/Qyzylorda', 'Asia/Rangoon', 'Asia/Riyadh', 'Asia/Saigon', 'Asia/Sakhalin', 'Asia/Samarkand', 'Asia/Seoul', 'Asia/Shanghai', 'Asia/Singapore', 'Asia/Srednekolymsk', 'Asia/Taipei', 'Asia/Tashkent', 'Asia/Tbilisi', 'Asia/Tehran', 'Asia/Tel_Aviv', 'Asia/Thimbu', 'Asia/Thimphu', 'Asia/Tokyo', 'Asia/Tomsk', 'Asia/Ujung_Pandang', 'Asia/Ulaanbaatar', 'Asia/Ulan_Bator', 'Asia/Urumqi', 'Asia/Ust-Nera', 'Asia/Vientiane', 'Asia/Vladivostok', 'Asia/Yakutsk', 'Asia/Yangon', 'Asia/Yekaterinburg', 'Asia/Yerevan', 'Atlantic/Azores', 'Atlantic/Bermuda', 'Atlantic/Canary', 'Atlantic/Cape_Verde', 'Atlantic/Faeroe', 'Atlantic/Faroe', 'Atlantic/Jan_Mayen', 'Atlantic/Madeira', 'Atlantic/Reykjavik', 'Atlantic/South_Georgia', 'Atlantic/St_Helena', 'Atlantic/Stanley', 'Australia/ACT', 'Australia/Adelaide', 'Australia/Brisbane', 'Australia/Broken_Hill', 'Australia/Canberra', 'Australia/Currie', 'Australia/Darwin', 'Australia/Eucla', 'Australia/Hobart', 'Australia/LHI', 'Australia/Lindeman', 'Australia/Lord_Howe', 'Australia/Melbourne', 'Australia/NSW', 'Australia/North', 'Australia/Perth', 'Australia/Queensland', 'Australia/South', 'Australia/Sydney', 'Australia/Tasmania', 'Australia/Victoria', 'Australia/West', 'Australia/Yancowinna', 'Brazil/Acre', 'Brazil/DeNoronha', 'Brazil/East', 'Brazil/West', 'CET', 'CST6CDT', 'Canada/Atlantic', 'Canada/Central', 'Canada/Eastern', 'Canada/Mountain', 'Canada/Newfoundland', 'Canada/Pacific', 'Canada/Saskatchewan', 'Canada/Yukon', 'Chile/Continental', 'Chile/EasterIsland', 'Cuba', 'EET', 'EST', 'EST5EDT', 'Egypt', 'Eire', 'Etc/GMT', 'Etc/GMT+0', 'Etc/GMT+1', 'Etc/GMT+10', 'Etc/GMT+11', 'Etc/GMT+12', 'Etc/GMT+2', 'Etc/GMT+3', 'Etc/GMT+4', 'Etc/GMT+5', 'Etc/GMT+6', 'Etc/GMT+7', 'Etc/GMT+8', 'Etc/GMT+9', 'Etc/GMT-0', 'Etc/GMT-1', 'Etc/GMT-10', 'Etc/GMT-11', 'Etc/GMT-12', 'Etc/GMT-13', 'Etc/GMT-14', 'Etc/GMT-2', 'Etc/GMT-3', 'Etc/GMT-4', 'Etc/GMT-5', 'Etc/GMT-6', 'Etc/GMT-7', 'Etc/GMT-8', 'Etc/GMT-9', 'Etc/GMT0', 'Etc/Greenwich', 'Etc/UCT', 'Etc/UTC', 'Etc/Universal', 'Etc/Zulu', 'Europe/Amsterdam', 'Europe/Andorra', 'Europe/Astrakhan', 'Europe/Athens', 'Europe/Belfast', 'Europe/Belgrade', 'Europe/Berlin', 'Europe/Bratislava', 'Europe/Brussels', 'Europe/Bucharest', 'Europe/Budapest', 'Europe/Busingen', 'Europe/Chisinau', 'Europe/Copenhagen', 'Europe/Dublin', 'Europe/Gibraltar', 'Europe/Guernsey', 'Europe/Helsinki', 'Europe/Isle_of_Man', 'Europe/Istanbul', 'Europe/Jersey', 'Europe/Kaliningrad', 'Europe/Kiev', 'Europe/Kirov', 'Europe/Kyiv', 'Europe/Lisbon', 'Europe/Ljubljana', 'Europe/London', 'Europe/Luxembourg', 'Europe/Madrid', 'Europe/Malta', 'Europe/Mariehamn', 'Europe/Minsk', 'Europe/Monaco', 'Europe/Moscow', 'Europe/Nicosia', 'Europe/Oslo', 'Europe/Paris', 'Europe/Podgorica', 'Europe/Prague', 'Europe/Riga', 'Europe/Rome', 'Europe/Samara', 'Europe/San_Marino', 'Europe/Sarajevo', 'Europe/Saratov', 'Europe/Simferopol', 'Europe/Skopje', 'Europe/Sofia', 'Europe/Stockholm', 'Europe/Tallinn', 'Europe/Tirane', 'Europe/Tiraspol', 'Europe/Ulyanovsk', 'Europe/Uzhgorod', 'Europe/Vaduz', 'Europe/Vatican', 'Europe/Vienna', 'Europe/Vilnius', 'Europe/Volgograd', 'Europe/Warsaw', 'Europe/Zagreb', 'Europe/Zaporozhye', 'Europe/Zurich', 'GB', 'GB-Eire', 'GMT', 'GMT+0', 'GMT-0', 'GMT0', 'Greenwich', 'HST', 'Hongkong', 'Iceland', 'Indian/Antananarivo', 'Indian/Chagos', 'Indian/Christmas', 'Indian/Cocos', 'Indian/Comoro', 'Indian/Kerguelen', 'Indian/Mahe', 'Indian/Maldives', 'Indian/Mauritius', 'Indian/Mayotte', 'Indian/Reunion', 'Iran', 'Israel', 'Jamaica', 'Japan', 'Kwajalein', 'Libya', 'MET', 'MST', 'MST7MDT', 'Mexico/BajaNorte', 'Mexico/BajaSur', 'Mexico/General', 'NZ', 'NZ-CHAT', 'Navajo', 'PRC', 'PST8PDT', 'Pacific/Apia', 'Pacific/Auckland', 'Pacific/Bougainville', 'Pacific/Chatham', 'Pacific/Chuuk', 'Pacific/Easter', 'Pacific/Efate', 'Pacific/Enderbury', 'Pacific/Fakaofo', 'Pacific/Fiji', 'Pacific/Funafuti', 'Pacific/Galapagos', 'Pacific/Gambier', 'Pacific/Guadalcanal', 'Pacific/Guam', 'Pacific/Honolulu', 'Pacific/Johnston', 'Pacific/Kanton', 'Pacific/Kiritimati', 'Pacific/Kosrae', 'Pacific/Kwajalein', 'Pacific/Majuro', 'Pacific/Marquesas', 'Pacific/Midway', 'Pacific/Nauru', 'Pacific/Niue', 'Pacific/Norfolk', 'Pacific/Noumea', 'Pacific/Pago_Pago', 'Pacific/Palau', 'Pacific/Pitcairn', 'Pacific/Pohnpei', 'Pacific/Ponape', 'Pacific/Port_Moresby', 'Pacific/Rarotonga', 'Pacific/Saipan', 'Pacific/Samoa', 'Pacific/Tahiti', 'Pacific/Tarawa', 'Pacific/Tongatapu', 'Pacific/Truk', 'Pacific/Wake', 'Pacific/Wallis', 'Pacific/Yap', 'Poland', 'Portugal', 'ROC', 'ROK', 'Singapore', 'Turkey', 'UCT', 'US/Alaska', 'US/Aleutian', 'US/Arizona', 'US/Central', 'US/East-Indiana', 'US/Eastern', 'US/Hawaii', 'US/Indiana-Starke', 'US/Michigan', 'US/Mountain', 'US/Pacific', 'US/Samoa', 'UTC', 'Universal', 'W-SU', 'WET', 'Zulu'), nullable=True, comment='IANA timezone name of the timezone which encompasses the largest portion of the population in the associated geographic area.'), sa.Column('state_id_fips', sa.Text(), nullable=True, comment='Two digit state FIPS code.'), sa.Column('division_name_us_census', sa.Text(), nullable=True, comment='Longer human readable name describing the US Census division.'), - sa.Column('division_code_us_census', sa.Enum('PCN', 'SAT', 'ESC', 'NEW', 'WSC', 'MAT', 'WNC', 'MTN', 'PCC', 'ENC'), nullable=True, comment='Three-letter US Census division code as it appears in the bulk electricity data published by the EIA. Note that EIA splits the Pacific division into distinct contiguous (CA, OR, WA) and non-contiguous (AK, HI) states. For reference see this US Census region and division map: https://www2.census.gov/geo/pdfs/maps-data/maps/reference/us_regdiv.pdf'), + sa.Column('division_code_us_census', sa.Enum('PCC', 'NEW', 'ESC', 'MAT', 'WSC', 'SAT', 'WNC', 'PCN', 'ENC', 'MTN'), nullable=True, comment='Three-letter US Census division code as it appears in the bulk electricity data published by the EIA. Note that EIA splits the Pacific division into distinct contiguous (CA, OR, WA) and non-contiguous (AK, HI) states. For reference see this US Census region and division map: https://www2.census.gov/geo/pdfs/maps-data/maps/reference/us_regdiv.pdf'), sa.Column('region_name_us_census', sa.Text(), nullable=True, comment='Human-readable name of a US Census region.'), sa.Column('is_epacems_state', sa.Boolean(), nullable=True, comment="Indicates whether the associated state reports data within the EPA's Continuous Emissions Monitoring System."), - sa.PrimaryKeyConstraint('country_code', 'subdivision_code') + sa.PrimaryKeyConstraint('country_code', 'subdivision_code', name=op.f('pk_political_subdivisions')) ) op.create_table('power_purchase_types_ferc1', sa.Column('code', sa.Text(), nullable=False, comment='Originally reported short code.'), sa.Column('label', sa.Text(), nullable=True, comment='Longer human-readable code using snake_case'), sa.Column('description', sa.Text(), nullable=True, comment='Long human-readable description of the meaning of a code/label.'), - sa.PrimaryKeyConstraint('code') + sa.PrimaryKeyConstraint('code', name=op.f('pk_power_purchase_types_ferc1')) + ) + op.create_table('predicted_state_hourly_demand', + sa.Column('state_id_fips', sa.Text(), nullable=False, comment='Two digit state FIPS code.'), + sa.Column('utc_datetime', sqlite.DATETIME(), nullable=False), + sa.Column('demand_mwh', sa.Float(), nullable=True), + sa.Column('scaled_demand_mwh', sa.Float(), nullable=True, comment='Estimated electricity demand scaled by the total sales within a state.'), + sa.PrimaryKeyConstraint('state_id_fips', 'utc_datetime', name=op.f('pk_predicted_state_hourly_demand')) ) op.create_table('prime_movers_eia', sa.Column('code', sa.Text(), nullable=False, comment='Originally reported short code.'), sa.Column('label', sa.Text(), nullable=True, comment='Longer human-readable code using snake_case'), sa.Column('description', sa.Text(), nullable=True, comment='Long human-readable description of the meaning of a code/label.'), - sa.PrimaryKeyConstraint('code') + sa.PrimaryKeyConstraint('code', name=op.f('pk_prime_movers_eia')) ) op.create_table('regulations_eia', sa.Column('code', sa.Text(), nullable=False, comment='Originally reported short code.'), sa.Column('label', sa.Text(), nullable=True, comment='Longer human-readable code using snake_case'), sa.Column('description', sa.Text(), nullable=True, comment='Long human-readable description of the meaning of a code/label.'), - sa.PrimaryKeyConstraint('code') + sa.PrimaryKeyConstraint('code', name=op.f('pk_regulations_eia')) ) op.create_table('reporting_frequencies_eia', sa.Column('code', sa.Text(), nullable=False, comment='Originally reported short code.'), sa.Column('label', sa.Text(), nullable=True, comment='Longer human-readable code using snake_case'), sa.Column('description', sa.Text(), nullable=True, comment='Long human-readable description of the meaning of a code/label.'), - sa.PrimaryKeyConstraint('code') + sa.PrimaryKeyConstraint('code', name=op.f('pk_reporting_frequencies_eia')) ) op.create_table('respondent_id_ferc714', sa.Column('respondent_id_ferc714', sa.Integer(), nullable=False), sa.Column('respondent_name_ferc714', sa.Text(), nullable=True), sa.Column('eia_code', sa.Integer(), nullable=True), - sa.PrimaryKeyConstraint('respondent_id_ferc714') + sa.PrimaryKeyConstraint('respondent_id_ferc714', name=op.f('pk_respondent_id_ferc714')) ) op.create_table('sector_consolidated_eia', sa.Column('code', sa.Integer(), nullable=False, comment='Originally reported short code.'), sa.Column('label', sa.Text(), nullable=True, comment='Longer human-readable code using snake_case'), sa.Column('description', sa.Text(), nullable=True, comment='Long human-readable description of the meaning of a code/label.'), - sa.PrimaryKeyConstraint('code') + sa.PrimaryKeyConstraint('code', name=op.f('pk_sector_consolidated_eia')) ) op.create_table('so2_compliance_strategies_eia', sa.Column('code', sa.Text(), nullable=False, comment='Originally reported short code.'), sa.Column('label', sa.Text(), nullable=True, comment='Longer human-readable code using snake_case'), sa.Column('description', sa.Text(), nullable=True, comment='Long human-readable description of the meaning of a code/label.'), - sa.PrimaryKeyConstraint('code') + sa.PrimaryKeyConstraint('code', name=op.f('pk_so2_compliance_strategies_eia')) ) op.create_table('so2_units_eia', sa.Column('code', sa.Text(), nullable=False, comment='Originally reported short code.'), sa.Column('label', sa.Text(), nullable=True, comment='Longer human-readable code using snake_case'), sa.Column('description', sa.Text(), nullable=True, comment='Long human-readable description of the meaning of a code/label.'), - sa.PrimaryKeyConstraint('code') + sa.PrimaryKeyConstraint('code', name=op.f('pk_so2_units_eia')) ) op.create_table('steam_plant_types_eia', sa.Column('code', sa.Text(), nullable=False, comment='Originally reported short code.'), sa.Column('label', sa.Text(), nullable=True, comment='Longer human-readable code using snake_case'), sa.Column('description', sa.Text(), nullable=True, comment='Long human-readable description of the meaning of a code/label.'), - sa.PrimaryKeyConstraint('code') + sa.PrimaryKeyConstraint('code', name=op.f('pk_steam_plant_types_eia')) ) op.create_table('utilities_entity_eia', sa.Column('utility_id_eia', sa.Integer(), nullable=False, comment='The EIA Utility Identification number.'), sa.Column('utility_name_eia', sa.Text(), nullable=True, comment='The name of the utility.'), - sa.PrimaryKeyConstraint('utility_id_eia') + sa.PrimaryKeyConstraint('utility_id_eia', name=op.f('pk_utilities_entity_eia')) ) op.create_table('utilities_pudl', sa.Column('utility_id_pudl', sa.Integer(), nullable=False, comment='A manually assigned PUDL utility ID. May not be stable over time.'), sa.Column('utility_name_pudl', sa.Text(), nullable=True, comment='Utility name, chosen arbitrarily from the several possible utility names available in the utility matching process. Included for human readability only.'), - sa.PrimaryKeyConstraint('utility_id_pudl') + sa.PrimaryKeyConstraint('utility_id_pudl', name=op.f('pk_utilities_pudl')) ) op.create_table('utility_assn_eia861', sa.Column('report_date', sa.Date(), nullable=False, comment='Date reported.'), sa.Column('utility_id_eia', sa.Integer(), nullable=False, comment='The EIA Utility Identification number.'), sa.Column('state', sa.Text(), nullable=False, comment='Two letter US state abbreviation.'), - sa.PrimaryKeyConstraint('report_date', 'utility_id_eia', 'state') + sa.PrimaryKeyConstraint('report_date', 'utility_id_eia', 'state', name=op.f('pk_utility_assn_eia861')) ) op.create_table('wet_dry_bottom_eia', sa.Column('code', sa.Text(), nullable=False, comment='Originally reported short code.'), sa.Column('label', sa.Text(), nullable=True, comment='Longer human-readable code using snake_case'), sa.Column('description', sa.Text(), nullable=True, comment='Long human-readable description of the meaning of a code/label.'), - sa.PrimaryKeyConstraint('code') + sa.PrimaryKeyConstraint('code', name=op.f('pk_wet_dry_bottom_eia')) ) op.create_table('advanced_metering_infrastructure_eia861', sa.Column('advanced_metering_infrastructure', sa.Integer(), nullable=True), @@ -325,17 +356,17 @@ def upgrade() -> None: sa.Column('utility_id_eia', sa.Integer(), nullable=False, comment='The EIA Utility Identification number.'), sa.Column('utility_name_eia', sa.Text(), nullable=True, comment='The name of the utility.'), sa.Column('data_maturity', sa.Text(), nullable=True, comment='Level of maturity of the data record. Some data sources report less-than-final data. PUDL sometimes includes this data, but use at your own risk.'), - sa.ForeignKeyConstraint(['data_maturity'], ['data_maturities.code'], ), - sa.PrimaryKeyConstraint('balancing_authority_code_eia', 'customer_class', 'report_date', 'state', 'utility_id_eia') + sa.ForeignKeyConstraint(['data_maturity'], ['data_maturities.code'], name=op.f('fk_advanced_metering_infrastructure_eia861_data_maturity_data_maturities')), + sa.PrimaryKeyConstraint('balancing_authority_code_eia', 'customer_class', 'report_date', 'state', 'utility_id_eia', name=op.f('pk_advanced_metering_infrastructure_eia861')) ) op.create_table('boilers_entity_eia', sa.Column('plant_id_eia', sa.Integer(), nullable=False, comment='The unique six-digit facility identification number, also called an ORISPL, assigned by the Energy Information Administration.'), sa.Column('boiler_id', sa.Text(), nullable=False, comment='Alphanumeric boiler ID.'), sa.Column('boiler_manufacturer', sa.Text(), nullable=True, comment='Name of boiler manufacturer.'), sa.Column('boiler_manufacturer_code', sa.Text(), nullable=True, comment='EIA short code for boiler manufacturer.'), - sa.ForeignKeyConstraint(['boiler_manufacturer_code'], ['environmental_equipment_manufacturers_eia.code'], ), - sa.ForeignKeyConstraint(['plant_id_eia'], ['plants_entity_eia.plant_id_eia'], ), - sa.PrimaryKeyConstraint('plant_id_eia', 'boiler_id') + sa.ForeignKeyConstraint(['boiler_manufacturer_code'], ['environmental_equipment_manufacturers_eia.code'], name=op.f('fk_boilers_entity_eia_boiler_manufacturer_code_environmental_equipment_manufacturers_eia')), + sa.ForeignKeyConstraint(['plant_id_eia'], ['plants_entity_eia.plant_id_eia'], name=op.f('fk_boilers_entity_eia_plant_id_eia_plants_entity_eia')), + sa.PrimaryKeyConstraint('plant_id_eia', 'boiler_id', name=op.f('pk_boilers_entity_eia')) ) op.create_table('coalmine_eia923', sa.Column('mine_id_pudl', sa.Integer(), nullable=False, comment='Dynamically assigned PUDL mine identifier.'), @@ -345,9 +376,9 @@ def upgrade() -> None: sa.Column('county_id_fips', sa.Text(), nullable=True, comment='County ID from the Federal Information Processing Standard Publication 6-4.'), sa.Column('mine_id_msha', sa.Integer(), nullable=True, comment='MSHA issued mine identifier.'), sa.Column('data_maturity', sa.Text(), nullable=True, comment='Level of maturity of the data record. Some data sources report less-than-final data. PUDL sometimes includes this data, but use at your own risk.'), - sa.ForeignKeyConstraint(['data_maturity'], ['data_maturities.code'], ), - sa.ForeignKeyConstraint(['mine_type_code'], ['coalmine_types_eia.code'], ), - sa.PrimaryKeyConstraint('mine_id_pudl') + sa.ForeignKeyConstraint(['data_maturity'], ['data_maturities.code'], name=op.f('fk_coalmine_eia923_data_maturity_data_maturities')), + sa.ForeignKeyConstraint(['mine_type_code'], ['coalmine_types_eia.code'], name=op.f('fk_coalmine_eia923_mine_type_code_coalmine_types_eia')), + sa.PrimaryKeyConstraint('mine_id_pudl', name=op.f('pk_coalmine_eia923')) ) op.create_table('demand_hourly_pa_ferc714', sa.Column('respondent_id_ferc714', sa.Integer(), nullable=False), @@ -355,8 +386,8 @@ def upgrade() -> None: sa.Column('utc_datetime', sqlite.DATETIME(), nullable=False), sa.Column('timezone', sa.Enum('America/New_York', 'America/Chicago', 'America/Denver', 'America/Los_Angeles', 'America/Anchorage', 'Pacific/Honolulu'), nullable=True, comment='IANA timezone name'), sa.Column('demand_mwh', sa.Float(), nullable=True), - sa.ForeignKeyConstraint(['respondent_id_ferc714'], ['respondent_id_ferc714.respondent_id_ferc714'], ), - sa.PrimaryKeyConstraint('respondent_id_ferc714', 'utc_datetime') + sa.ForeignKeyConstraint(['respondent_id_ferc714'], ['respondent_id_ferc714.respondent_id_ferc714'], name=op.f('fk_demand_hourly_pa_ferc714_respondent_id_ferc714_respondent_id_ferc714')), + sa.PrimaryKeyConstraint('respondent_id_ferc714', 'utc_datetime', name=op.f('pk_demand_hourly_pa_ferc714')) ) op.create_table('demand_response_eia861', sa.Column('actual_peak_demand_savings_mw', sa.Float(), nullable=True), @@ -373,8 +404,8 @@ def upgrade() -> None: sa.Column('utility_id_eia', sa.Integer(), nullable=False, comment='The EIA Utility Identification number.'), sa.Column('utility_name_eia', sa.Text(), nullable=True, comment='The name of the utility.'), sa.Column('data_maturity', sa.Text(), nullable=True, comment='Level of maturity of the data record. Some data sources report less-than-final data. PUDL sometimes includes this data, but use at your own risk.'), - sa.ForeignKeyConstraint(['data_maturity'], ['data_maturities.code'], ), - sa.PrimaryKeyConstraint('balancing_authority_code_eia', 'customer_class', 'report_date', 'state', 'utility_id_eia') + sa.ForeignKeyConstraint(['data_maturity'], ['data_maturities.code'], name=op.f('fk_demand_response_eia861_data_maturity_data_maturities')), + sa.PrimaryKeyConstraint('balancing_authority_code_eia', 'customer_class', 'report_date', 'state', 'utility_id_eia', name=op.f('pk_demand_response_eia861')) ) op.create_table('demand_response_water_heater_eia861', sa.Column('balancing_authority_code_eia', sa.Text(), nullable=False, comment='EIA short code identifying a balancing authority.'), @@ -383,8 +414,8 @@ def upgrade() -> None: sa.Column('utility_id_eia', sa.Integer(), nullable=False, comment='The EIA Utility Identification number.'), sa.Column('water_heater', sa.Integer(), nullable=True), sa.Column('data_maturity', sa.Text(), nullable=True, comment='Level of maturity of the data record. Some data sources report less-than-final data. PUDL sometimes includes this data, but use at your own risk.'), - sa.ForeignKeyConstraint(['data_maturity'], ['data_maturities.code'], ), - sa.PrimaryKeyConstraint('balancing_authority_code_eia', 'report_date', 'state', 'utility_id_eia') + sa.ForeignKeyConstraint(['data_maturity'], ['data_maturities.code'], name=op.f('fk_demand_response_water_heater_eia861_data_maturity_data_maturities')), + sa.PrimaryKeyConstraint('balancing_authority_code_eia', 'report_date', 'state', 'utility_id_eia', name=op.f('pk_demand_response_water_heater_eia861')) ) op.create_table('demand_side_management_ee_dr_eia861', sa.Column('annual_indirect_program_cost', sa.Float(), nullable=True), @@ -411,7 +442,7 @@ def upgrade() -> None: sa.Column('time_responsiveness_customers', sa.Integer(), nullable=True), sa.Column('utility_id_eia', sa.Integer(), nullable=True, comment='The EIA Utility Identification number.'), sa.Column('data_maturity', sa.Text(), nullable=True, comment='Level of maturity of the data record. Some data sources report less-than-final data. PUDL sometimes includes this data, but use at your own risk.'), - sa.ForeignKeyConstraint(['data_maturity'], ['data_maturities.code'], ) + sa.ForeignKeyConstraint(['data_maturity'], ['data_maturities.code'], name=op.f('fk_demand_side_management_ee_dr_eia861_data_maturity_data_maturities')) ) op.create_table('demand_side_management_misc_eia861', sa.Column('energy_savings_estimates_independently_verified', sa.Boolean(), nullable=True), @@ -428,7 +459,7 @@ def upgrade() -> None: sa.Column('utility_id_eia', sa.Integer(), nullable=True, comment='The EIA Utility Identification number.'), sa.Column('utility_name_eia', sa.Text(), nullable=True, comment='The name of the utility.'), sa.Column('data_maturity', sa.Text(), nullable=True, comment='Level of maturity of the data record. Some data sources report less-than-final data. PUDL sometimes includes this data, but use at your own risk.'), - sa.ForeignKeyConstraint(['data_maturity'], ['data_maturities.code'], ) + sa.ForeignKeyConstraint(['data_maturity'], ['data_maturities.code'], name=op.f('fk_demand_side_management_misc_eia861_data_maturity_data_maturities')) ) op.create_table('demand_side_management_sales_eia861', sa.Column('nerc_region', sa.Enum('BASN', 'CALN', 'CALS', 'DSW', 'ASCC', 'ISONE', 'ERCOT', 'NORW', 'NYISO', 'PJM', 'ROCK', 'ECAR', 'FRCC', 'HICC', 'MAAC', 'MAIN', 'MAPP', 'MRO', 'NPCC', 'RFC', 'SERC', 'SPP', 'TRE', 'WECC', 'WSCC', 'MISO', 'ECAR_MAAC', 'MAPP_WECC', 'RFC_SERC', 'SPP_WECC', 'MRO_WECC', 'ERCOT_SPP', 'SPP_TRE', 'ERCOT_TRE', 'MISO_TRE', 'VI', 'GU', 'PR', 'AS', 'UNK'), nullable=True, comment='NERC region in which the plant is located'), @@ -438,7 +469,7 @@ def upgrade() -> None: sa.Column('state', sa.Text(), nullable=True, comment='Two letter US state abbreviation.'), sa.Column('utility_id_eia', sa.Integer(), nullable=True, comment='The EIA Utility Identification number.'), sa.Column('data_maturity', sa.Text(), nullable=True, comment='Level of maturity of the data record. Some data sources report less-than-final data. PUDL sometimes includes this data, but use at your own risk.'), - sa.ForeignKeyConstraint(['data_maturity'], ['data_maturities.code'], ) + sa.ForeignKeyConstraint(['data_maturity'], ['data_maturities.code'], name=op.f('fk_demand_side_management_sales_eia861_data_maturity_data_maturities')) ) op.create_table('denorm_emissions_control_equipment_eia860', sa.Column('report_year', sa.Integer(), nullable=False, comment='Four-digit year in which the data was reported.'), @@ -461,14 +492,14 @@ def upgrade() -> None: sa.Column('emission_control_equipment_cost', sa.Float(), nullable=True, comment='The total cost to install a piece of emission control equipment.'), sa.Column('emission_control_operating_date', sa.Date(), nullable=True, comment='The date a piece of emissions control equipment began operating. Derived from month and year columns in the raw data.'), sa.Column('emission_control_retirement_date', sa.Date(), nullable=True, comment='The expected or actual retirement date for a piece of emissions control equipment. Derived from month and year columns in the raw data.'), - sa.ForeignKeyConstraint(['data_maturity'], ['data_maturities.code'], ), - sa.ForeignKeyConstraint(['emission_control_equipment_type_code'], ['emission_control_equipment_types_eia.code'], ), - sa.ForeignKeyConstraint(['operational_status_code'], ['operational_status_eia.code'], ), - sa.ForeignKeyConstraint(['plant_id_eia'], ['plants_entity_eia.plant_id_eia'], ), - sa.ForeignKeyConstraint(['plant_id_pudl'], ['plants_pudl.plant_id_pudl'], ), - sa.ForeignKeyConstraint(['utility_id_eia'], ['utilities_entity_eia.utility_id_eia'], ), - sa.ForeignKeyConstraint(['utility_id_pudl'], ['utilities_pudl.utility_id_pudl'], ), - sa.PrimaryKeyConstraint('report_year', 'plant_id_eia', 'emission_control_id_pudl') + sa.ForeignKeyConstraint(['data_maturity'], ['data_maturities.code'], name=op.f('fk_denorm_emissions_control_equipment_eia860_data_maturity_data_maturities')), + sa.ForeignKeyConstraint(['emission_control_equipment_type_code'], ['emission_control_equipment_types_eia.code'], name=op.f('fk_denorm_emissions_control_equipment_eia860_emission_control_equipment_type_code_emission_control_equipment_types_eia')), + sa.ForeignKeyConstraint(['operational_status_code'], ['operational_status_eia.code'], name=op.f('fk_denorm_emissions_control_equipment_eia860_operational_status_code_operational_status_eia')), + sa.ForeignKeyConstraint(['plant_id_eia'], ['plants_entity_eia.plant_id_eia'], name=op.f('fk_denorm_emissions_control_equipment_eia860_plant_id_eia_plants_entity_eia')), + sa.ForeignKeyConstraint(['plant_id_pudl'], ['plants_pudl.plant_id_pudl'], name=op.f('fk_denorm_emissions_control_equipment_eia860_plant_id_pudl_plants_pudl')), + sa.ForeignKeyConstraint(['utility_id_eia'], ['utilities_entity_eia.utility_id_eia'], name=op.f('fk_denorm_emissions_control_equipment_eia860_utility_id_eia_utilities_entity_eia')), + sa.ForeignKeyConstraint(['utility_id_pudl'], ['utilities_pudl.utility_id_pudl'], name=op.f('fk_denorm_emissions_control_equipment_eia860_utility_id_pudl_utilities_pudl')), + sa.PrimaryKeyConstraint('report_year', 'plant_id_eia', 'emission_control_id_pudl', name=op.f('pk_denorm_emissions_control_equipment_eia860')) ) op.create_table('denorm_fuel_receipts_costs_eia923', sa.Column('report_date', sa.Date(), nullable=True, comment='Date reported.'), @@ -481,7 +512,7 @@ def upgrade() -> None: sa.Column('state', sa.Text(), nullable=True, comment='Two letter US state abbreviation.'), sa.Column('contract_type_code', sa.Enum('S', 'C', 'NC', 'T'), nullable=True, comment='Purchase type under which receipts occurred in the reporting month. C: Contract, NC: New Contract, S: Spot Purchase, T: Tolling Agreement.'), sa.Column('contract_expiration_date', sa.Date(), nullable=True, comment='Date contract expires.Format: MMYY.'), - sa.Column('energy_source_code', sa.Text(), nullable=True, comment='The fuel code associated with the fuel receipt. Two or three character alphanumeric.'), + sa.Column('energy_source_code', sa.Text(), nullable=True, comment='A 2-3 letter code indicating the energy source (e.g. fuel type) associated with the record.'), sa.Column('fuel_type_code_pudl', sa.Enum('coal', 'gas', 'hydro', 'nuclear', 'oil', 'other', 'solar', 'waste', 'wind'), nullable=True, comment='Simplified fuel type code used in PUDL'), sa.Column('fuel_group_code', sa.Enum('petroleum', 'other_gas', 'petroleum_coke', 'natural_gas', 'coal'), nullable=True, comment='Fuel groups used in the Electric Power Monthly'), sa.Column('supplier_name', sa.Text(), nullable=True, comment='Company that sold the fuel to the plant or, in the case of Natural Gas, pipline owner.'), @@ -507,16 +538,16 @@ def upgrade() -> None: sa.Column('mine_state', sa.Text(), nullable=True, comment='State where the coal mine is located. Two letter abbreviation.'), sa.Column('coalmine_county_id_fips', sa.Text(), nullable=True, comment='County ID from the Federal Information Processing Standard Publication 6-4. This is the county where the coal mine is located.'), sa.Column('mine_type_code', sa.Text(), nullable=True, comment='Type of coal mine.'), - sa.ForeignKeyConstraint(['contract_type_code'], ['contract_types_eia.code'], ), - sa.ForeignKeyConstraint(['data_maturity'], ['data_maturities.code'], ), - sa.ForeignKeyConstraint(['energy_source_code'], ['energy_sources_eia.code'], ), - sa.ForeignKeyConstraint(['mine_type_code'], ['coalmine_types_eia.code'], ), - sa.ForeignKeyConstraint(['plant_id_eia'], ['plants_entity_eia.plant_id_eia'], ), - sa.ForeignKeyConstraint(['plant_id_pudl'], ['plants_pudl.plant_id_pudl'], ), - sa.ForeignKeyConstraint(['primary_transportation_mode_code'], ['fuel_transportation_modes_eia.code'], ), - sa.ForeignKeyConstraint(['secondary_transportation_mode_code'], ['fuel_transportation_modes_eia.code'], ), - sa.ForeignKeyConstraint(['utility_id_eia'], ['utilities_entity_eia.utility_id_eia'], ), - sa.ForeignKeyConstraint(['utility_id_pudl'], ['utilities_pudl.utility_id_pudl'], ) + sa.ForeignKeyConstraint(['contract_type_code'], ['contract_types_eia.code'], name=op.f('fk_denorm_fuel_receipts_costs_eia923_contract_type_code_contract_types_eia')), + sa.ForeignKeyConstraint(['data_maturity'], ['data_maturities.code'], name=op.f('fk_denorm_fuel_receipts_costs_eia923_data_maturity_data_maturities')), + sa.ForeignKeyConstraint(['energy_source_code'], ['energy_sources_eia.code'], name=op.f('fk_denorm_fuel_receipts_costs_eia923_energy_source_code_energy_sources_eia')), + sa.ForeignKeyConstraint(['mine_type_code'], ['coalmine_types_eia.code'], name=op.f('fk_denorm_fuel_receipts_costs_eia923_mine_type_code_coalmine_types_eia')), + sa.ForeignKeyConstraint(['plant_id_eia'], ['plants_entity_eia.plant_id_eia'], name=op.f('fk_denorm_fuel_receipts_costs_eia923_plant_id_eia_plants_entity_eia')), + sa.ForeignKeyConstraint(['plant_id_pudl'], ['plants_pudl.plant_id_pudl'], name=op.f('fk_denorm_fuel_receipts_costs_eia923_plant_id_pudl_plants_pudl')), + sa.ForeignKeyConstraint(['primary_transportation_mode_code'], ['fuel_transportation_modes_eia.code'], name=op.f('fk_denorm_fuel_receipts_costs_eia923_primary_transportation_mode_code_fuel_transportation_modes_eia')), + sa.ForeignKeyConstraint(['secondary_transportation_mode_code'], ['fuel_transportation_modes_eia.code'], name=op.f('fk_denorm_fuel_receipts_costs_eia923_secondary_transportation_mode_code_fuel_transportation_modes_eia')), + sa.ForeignKeyConstraint(['utility_id_eia'], ['utilities_entity_eia.utility_id_eia'], name=op.f('fk_denorm_fuel_receipts_costs_eia923_utility_id_eia_utilities_entity_eia')), + sa.ForeignKeyConstraint(['utility_id_pudl'], ['utilities_pudl.utility_id_pudl'], name=op.f('fk_denorm_fuel_receipts_costs_eia923_utility_id_pudl_utilities_pudl')) ) op.create_table('denorm_fuel_receipts_costs_monthly_eia923', sa.Column('report_date', sa.Date(), nullable=True, comment='Date reported.'), @@ -539,10 +570,10 @@ def upgrade() -> None: sa.Column('mercury_content_ppm', sa.Float(), nullable=True, comment='Mercury content in parts per million (ppm) to the nearest 0.001 ppm.'), sa.Column('moisture_content_pct', sa.Float(), nullable=True), sa.Column('chlorine_content_ppm', sa.Float(), nullable=True), - sa.ForeignKeyConstraint(['plant_id_eia'], ['plants_entity_eia.plant_id_eia'], ), - sa.ForeignKeyConstraint(['plant_id_pudl'], ['plants_pudl.plant_id_pudl'], ), - sa.ForeignKeyConstraint(['utility_id_eia'], ['utilities_entity_eia.utility_id_eia'], ), - sa.ForeignKeyConstraint(['utility_id_pudl'], ['utilities_pudl.utility_id_pudl'], ) + sa.ForeignKeyConstraint(['plant_id_eia'], ['plants_entity_eia.plant_id_eia'], name=op.f('fk_denorm_fuel_receipts_costs_monthly_eia923_plant_id_eia_plants_entity_eia')), + sa.ForeignKeyConstraint(['plant_id_pudl'], ['plants_pudl.plant_id_pudl'], name=op.f('fk_denorm_fuel_receipts_costs_monthly_eia923_plant_id_pudl_plants_pudl')), + sa.ForeignKeyConstraint(['utility_id_eia'], ['utilities_entity_eia.utility_id_eia'], name=op.f('fk_denorm_fuel_receipts_costs_monthly_eia923_utility_id_eia_utilities_entity_eia')), + sa.ForeignKeyConstraint(['utility_id_pudl'], ['utilities_pudl.utility_id_pudl'], name=op.f('fk_denorm_fuel_receipts_costs_monthly_eia923_utility_id_pudl_utilities_pudl')) ) op.create_table('denorm_generation_fuel_combined_eia923', sa.Column('report_date', sa.Date(), nullable=False, comment='Date reported.'), @@ -552,7 +583,7 @@ def upgrade() -> None: sa.Column('utility_id_eia', sa.Integer(), nullable=True, comment='The EIA Utility Identification number.'), sa.Column('utility_id_pudl', sa.Integer(), nullable=True, comment='A manually assigned PUDL utility ID. May not be stable over time.'), sa.Column('utility_name_eia', sa.Text(), nullable=True, comment='The name of the utility.'), - sa.Column('energy_source_code', sa.Text(), nullable=False, comment='The fuel code associated with the fuel receipt. Two or three character alphanumeric.'), + sa.Column('energy_source_code', sa.Text(), nullable=False, comment='A 2-3 letter code indicating the energy source (e.g. fuel type) associated with the record.'), sa.Column('fuel_type_code_pudl', sa.Enum('coal', 'gas', 'hydro', 'nuclear', 'oil', 'other', 'solar', 'waste', 'wind'), nullable=True, comment='Simplified fuel type code used in PUDL'), sa.Column('fuel_type_code_aer', sa.Text(), nullable=True, comment='A partial aggregation of the reported fuel type codes into larger categories used by EIA in, for example, the Annual Energy Review (AER). Two or three letter alphanumeric.'), sa.Column('prime_mover_code', sa.Text(), nullable=False, comment='Code for the type of prime mover (e.g. CT, CG)'), @@ -563,15 +594,15 @@ def upgrade() -> None: sa.Column('fuel_consumed_for_electricity_mmbtu', sa.Float(), nullable=True, comment='Total consumption of fuel to produce electricity, in physical unit, year to date.'), sa.Column('net_generation_mwh', sa.Float(), nullable=True, comment='Net electricity generation for the specified period in megawatt-hours (MWh).'), sa.Column('data_maturity', sa.Text(), nullable=True, comment='Level of maturity of the data record. Some data sources report less-than-final data. PUDL sometimes includes this data, but use at your own risk.'), - sa.ForeignKeyConstraint(['data_maturity'], ['data_maturities.code'], ), - sa.ForeignKeyConstraint(['energy_source_code'], ['energy_sources_eia.code'], ), - sa.ForeignKeyConstraint(['fuel_type_code_aer'], ['fuel_types_aer_eia.code'], ), - sa.ForeignKeyConstraint(['plant_id_eia'], ['plants_entity_eia.plant_id_eia'], ), - sa.ForeignKeyConstraint(['plant_id_pudl'], ['plants_pudl.plant_id_pudl'], ), - sa.ForeignKeyConstraint(['prime_mover_code'], ['prime_movers_eia.code'], ), - sa.ForeignKeyConstraint(['utility_id_eia'], ['utilities_entity_eia.utility_id_eia'], ), - sa.ForeignKeyConstraint(['utility_id_pudl'], ['utilities_pudl.utility_id_pudl'], ), - sa.PrimaryKeyConstraint('plant_id_eia', 'report_date', 'prime_mover_code', 'energy_source_code') + sa.ForeignKeyConstraint(['data_maturity'], ['data_maturities.code'], name=op.f('fk_denorm_generation_fuel_combined_eia923_data_maturity_data_maturities')), + sa.ForeignKeyConstraint(['energy_source_code'], ['energy_sources_eia.code'], name=op.f('fk_denorm_generation_fuel_combined_eia923_energy_source_code_energy_sources_eia')), + sa.ForeignKeyConstraint(['fuel_type_code_aer'], ['fuel_types_aer_eia.code'], name=op.f('fk_denorm_generation_fuel_combined_eia923_fuel_type_code_aer_fuel_types_aer_eia')), + sa.ForeignKeyConstraint(['plant_id_eia'], ['plants_entity_eia.plant_id_eia'], name=op.f('fk_denorm_generation_fuel_combined_eia923_plant_id_eia_plants_entity_eia')), + sa.ForeignKeyConstraint(['plant_id_pudl'], ['plants_pudl.plant_id_pudl'], name=op.f('fk_denorm_generation_fuel_combined_eia923_plant_id_pudl_plants_pudl')), + sa.ForeignKeyConstraint(['prime_mover_code'], ['prime_movers_eia.code'], name=op.f('fk_denorm_generation_fuel_combined_eia923_prime_mover_code_prime_movers_eia')), + sa.ForeignKeyConstraint(['utility_id_eia'], ['utilities_entity_eia.utility_id_eia'], name=op.f('fk_denorm_generation_fuel_combined_eia923_utility_id_eia_utilities_entity_eia')), + sa.ForeignKeyConstraint(['utility_id_pudl'], ['utilities_pudl.utility_id_pudl'], name=op.f('fk_denorm_generation_fuel_combined_eia923_utility_id_pudl_utilities_pudl')), + sa.PrimaryKeyConstraint('plant_id_eia', 'report_date', 'prime_mover_code', 'energy_source_code', name=op.f('pk_denorm_generation_fuel_combined_eia923')) ) op.create_table('denorm_generation_fuel_combined_monthly_eia923', sa.Column('report_date', sa.Date(), nullable=False, comment='Date reported.'), @@ -581,7 +612,7 @@ def upgrade() -> None: sa.Column('utility_id_eia', sa.Integer(), nullable=True, comment='The EIA Utility Identification number.'), sa.Column('utility_id_pudl', sa.Integer(), nullable=True, comment='A manually assigned PUDL utility ID. May not be stable over time.'), sa.Column('utility_name_eia', sa.Text(), nullable=True, comment='The name of the utility.'), - sa.Column('energy_source_code', sa.Text(), nullable=False, comment='The fuel code associated with the fuel receipt. Two or three character alphanumeric.'), + sa.Column('energy_source_code', sa.Text(), nullable=False, comment='A 2-3 letter code indicating the energy source (e.g. fuel type) associated with the record.'), sa.Column('fuel_type_code_pudl', sa.Enum('coal', 'gas', 'hydro', 'nuclear', 'oil', 'other', 'solar', 'waste', 'wind'), nullable=True, comment='Simplified fuel type code used in PUDL'), sa.Column('prime_mover_code', sa.Text(), nullable=False, comment='Code for the type of prime mover (e.g. CT, CG)'), sa.Column('fuel_consumed_units', sa.Float(), nullable=True, comment='Consumption of the fuel type in physical unit. Note: this is the total quantity consumed for both electricity and, in the case of combined heat and power plants, process steam production.'), @@ -590,13 +621,13 @@ def upgrade() -> None: sa.Column('fuel_consumed_mmbtu', sa.Float(), nullable=True, comment='Total consumption of fuel in physical unit, year to date. Note: this is the total quantity consumed for both electricity and, in the case of combined heat and power plants, process steam production.'), sa.Column('fuel_consumed_for_electricity_mmbtu', sa.Float(), nullable=True, comment='Total consumption of fuel to produce electricity, in physical unit, year to date.'), sa.Column('net_generation_mwh', sa.Float(), nullable=True, comment='Net electricity generation for the specified period in megawatt-hours (MWh).'), - sa.ForeignKeyConstraint(['energy_source_code'], ['energy_sources_eia.code'], ), - sa.ForeignKeyConstraint(['plant_id_eia'], ['plants_entity_eia.plant_id_eia'], ), - sa.ForeignKeyConstraint(['plant_id_pudl'], ['plants_pudl.plant_id_pudl'], ), - sa.ForeignKeyConstraint(['prime_mover_code'], ['prime_movers_eia.code'], ), - sa.ForeignKeyConstraint(['utility_id_eia'], ['utilities_entity_eia.utility_id_eia'], ), - sa.ForeignKeyConstraint(['utility_id_pudl'], ['utilities_pudl.utility_id_pudl'], ), - sa.PrimaryKeyConstraint('plant_id_eia', 'report_date', 'prime_mover_code', 'energy_source_code') + sa.ForeignKeyConstraint(['energy_source_code'], ['energy_sources_eia.code'], name=op.f('fk_denorm_generation_fuel_combined_monthly_eia923_energy_source_code_energy_sources_eia')), + sa.ForeignKeyConstraint(['plant_id_eia'], ['plants_entity_eia.plant_id_eia'], name=op.f('fk_denorm_generation_fuel_combined_monthly_eia923_plant_id_eia_plants_entity_eia')), + sa.ForeignKeyConstraint(['plant_id_pudl'], ['plants_pudl.plant_id_pudl'], name=op.f('fk_denorm_generation_fuel_combined_monthly_eia923_plant_id_pudl_plants_pudl')), + sa.ForeignKeyConstraint(['prime_mover_code'], ['prime_movers_eia.code'], name=op.f('fk_denorm_generation_fuel_combined_monthly_eia923_prime_mover_code_prime_movers_eia')), + sa.ForeignKeyConstraint(['utility_id_eia'], ['utilities_entity_eia.utility_id_eia'], name=op.f('fk_denorm_generation_fuel_combined_monthly_eia923_utility_id_eia_utilities_entity_eia')), + sa.ForeignKeyConstraint(['utility_id_pudl'], ['utilities_pudl.utility_id_pudl'], name=op.f('fk_denorm_generation_fuel_combined_monthly_eia923_utility_id_pudl_utilities_pudl')), + sa.PrimaryKeyConstraint('plant_id_eia', 'report_date', 'prime_mover_code', 'energy_source_code', name=op.f('pk_denorm_generation_fuel_combined_monthly_eia923')) ) op.create_table('distributed_generation_fuel_eia861', sa.Column('estimated_or_actual_fuel_data', sa.Enum('estimated', 'actual'), nullable=True), @@ -606,7 +637,7 @@ def upgrade() -> None: sa.Column('state', sa.Text(), nullable=True, comment='Two letter US state abbreviation.'), sa.Column('utility_id_eia', sa.Integer(), nullable=True, comment='The EIA Utility Identification number.'), sa.Column('data_maturity', sa.Text(), nullable=True, comment='Level of maturity of the data record. Some data sources report less-than-final data. PUDL sometimes includes this data, but use at your own risk.'), - sa.ForeignKeyConstraint(['data_maturity'], ['data_maturities.code'], ) + sa.ForeignKeyConstraint(['data_maturity'], ['data_maturities.code'], name=op.f('fk_distributed_generation_fuel_eia861_data_maturity_data_maturities')) ) op.create_table('distributed_generation_misc_eia861', sa.Column('backup_capacity_mw', sa.Float(), nullable=True), @@ -620,7 +651,7 @@ def upgrade() -> None: sa.Column('utility_id_eia', sa.Integer(), nullable=True, comment='The EIA Utility Identification number.'), sa.Column('utility_name_eia', sa.Text(), nullable=True, comment='The name of the utility.'), sa.Column('data_maturity', sa.Text(), nullable=True, comment='Level of maturity of the data record. Some data sources report less-than-final data. PUDL sometimes includes this data, but use at your own risk.'), - sa.ForeignKeyConstraint(['data_maturity'], ['data_maturities.code'], ) + sa.ForeignKeyConstraint(['data_maturity'], ['data_maturities.code'], name=op.f('fk_distributed_generation_misc_eia861_data_maturity_data_maturities')) ) op.create_table('distributed_generation_tech_eia861', sa.Column('capacity_mw', sa.Float(), nullable=True, comment='Total installed (nameplate) capacity, in megawatts.'), @@ -630,7 +661,7 @@ def upgrade() -> None: sa.Column('tech_class', sa.Enum('backup', 'chp_cogen', 'combustion_turbine', 'fuel_cell', 'hydro', 'internal_combustion', 'other', 'pv', 'steam', 'storage_pv', 'all_storage', 'total', 'virtual_pv', 'wind'), nullable=True), sa.Column('utility_id_eia', sa.Integer(), nullable=True, comment='The EIA Utility Identification number.'), sa.Column('data_maturity', sa.Text(), nullable=True, comment='Level of maturity of the data record. Some data sources report less-than-final data. PUDL sometimes includes this data, but use at your own risk.'), - sa.ForeignKeyConstraint(['data_maturity'], ['data_maturities.code'], ) + sa.ForeignKeyConstraint(['data_maturity'], ['data_maturities.code'], name=op.f('fk_distributed_generation_tech_eia861_data_maturity_data_maturities')) ) op.create_table('distribution_systems_eia861', sa.Column('circuits_with_voltage_optimization', sa.Integer(), nullable=True), @@ -641,7 +672,7 @@ def upgrade() -> None: sa.Column('utility_id_eia', sa.Integer(), nullable=True, comment='The EIA Utility Identification number.'), sa.Column('utility_name_eia', sa.Text(), nullable=True, comment='The name of the utility.'), sa.Column('data_maturity', sa.Text(), nullable=True, comment='Level of maturity of the data record. Some data sources report less-than-final data. PUDL sometimes includes this data, but use at your own risk.'), - sa.ForeignKeyConstraint(['data_maturity'], ['data_maturities.code'], ) + sa.ForeignKeyConstraint(['data_maturity'], ['data_maturities.code'], name=op.f('fk_distribution_systems_eia861_data_maturity_data_maturities')) ) op.create_table('dynamic_pricing_eia861', sa.Column('balancing_authority_code_eia', sa.Text(), nullable=True, comment='EIA short code identifying a balancing authority.'), @@ -658,7 +689,7 @@ def upgrade() -> None: sa.Column('utility_name_eia', sa.Text(), nullable=True, comment='The name of the utility.'), sa.Column('variable_peak_pricing', sa.Boolean(), nullable=True), sa.Column('data_maturity', sa.Text(), nullable=True, comment='Level of maturity of the data record. Some data sources report less-than-final data. PUDL sometimes includes this data, but use at your own risk.'), - sa.ForeignKeyConstraint(['data_maturity'], ['data_maturities.code'], ) + sa.ForeignKeyConstraint(['data_maturity'], ['data_maturities.code'], name=op.f('fk_dynamic_pricing_eia861_data_maturity_data_maturities')) ) op.create_table('emissions_control_equipment_eia860', sa.Column('report_year', sa.Integer(), nullable=False, comment='Four-digit year in which the data was reported.'), @@ -675,11 +706,11 @@ def upgrade() -> None: sa.Column('emission_control_equipment_cost', sa.Float(), nullable=True, comment='The total cost to install a piece of emission control equipment.'), sa.Column('emission_control_operating_date', sa.Date(), nullable=True, comment='The date a piece of emissions control equipment began operating. Derived from month and year columns in the raw data.'), sa.Column('emission_control_retirement_date', sa.Date(), nullable=True, comment='The expected or actual retirement date for a piece of emissions control equipment. Derived from month and year columns in the raw data.'), - sa.ForeignKeyConstraint(['data_maturity'], ['data_maturities.code'], ), - sa.ForeignKeyConstraint(['emission_control_equipment_type_code'], ['emission_control_equipment_types_eia.code'], ), - sa.ForeignKeyConstraint(['operational_status_code'], ['operational_status_eia.code'], ), - sa.ForeignKeyConstraint(['plant_id_eia'], ['plants_entity_eia.plant_id_eia'], ), - sa.PrimaryKeyConstraint('report_year', 'plant_id_eia', 'emission_control_id_pudl') + sa.ForeignKeyConstraint(['data_maturity'], ['data_maturities.code'], name=op.f('fk_emissions_control_equipment_eia860_data_maturity_data_maturities')), + sa.ForeignKeyConstraint(['emission_control_equipment_type_code'], ['emission_control_equipment_types_eia.code'], name=op.f('fk_emissions_control_equipment_eia860_emission_control_equipment_type_code_emission_control_equipment_types_eia')), + sa.ForeignKeyConstraint(['operational_status_code'], ['operational_status_eia.code'], name=op.f('fk_emissions_control_equipment_eia860_operational_status_code_operational_status_eia')), + sa.ForeignKeyConstraint(['plant_id_eia'], ['plants_entity_eia.plant_id_eia'], name=op.f('fk_emissions_control_equipment_eia860_plant_id_eia_plants_entity_eia')), + sa.PrimaryKeyConstraint('report_year', 'plant_id_eia', 'emission_control_id_pudl', name=op.f('pk_emissions_control_equipment_eia860')) ) op.create_table('energy_efficiency_eia861', sa.Column('balancing_authority_code_eia', sa.Text(), nullable=True, comment='EIA short code identifying a balancing authority.'), @@ -699,12 +730,29 @@ def upgrade() -> None: sa.Column('utility_name_eia', sa.Text(), nullable=True, comment='The name of the utility.'), sa.Column('weighted_average_life_years', sa.Float(), nullable=True), sa.Column('data_maturity', sa.Text(), nullable=True, comment='Level of maturity of the data record. Some data sources report less-than-final data. PUDL sometimes includes this data, but use at your own risk.'), - sa.ForeignKeyConstraint(['data_maturity'], ['data_maturities.code'], ) + sa.ForeignKeyConstraint(['data_maturity'], ['data_maturities.code'], name=op.f('fk_energy_efficiency_eia861_data_maturity_data_maturities')) + ) + op.create_table('fipsified_respondents_ferc714', + sa.Column('eia_code', sa.Integer(), nullable=True), + sa.Column('respondent_type', sa.Enum('utility', 'balancing_authority'), nullable=True), + sa.Column('respondent_id_ferc714', sa.Integer(), nullable=True), + sa.Column('respondent_name_ferc714', sa.Text(), nullable=True), + sa.Column('report_date', sa.Date(), nullable=True, comment='Date reported.'), + sa.Column('balancing_authority_id_eia', sa.Integer(), nullable=True, comment='EIA balancing authority ID. This is often (but not always!) the same as the utility ID associated with the same legal entity.'), + sa.Column('balancing_authority_code_eia', sa.Text(), nullable=True, comment='EIA short code identifying a balancing authority.'), + sa.Column('balancing_authority_name_eia', sa.Text(), nullable=True, comment='Name of the balancing authority.'), + sa.Column('utility_id_eia', sa.Integer(), nullable=True, comment='The EIA Utility Identification number.'), + sa.Column('utility_name_eia', sa.Text(), nullable=True, comment='The name of the utility.'), + sa.Column('state', sa.Text(), nullable=True, comment='Two letter US state abbreviation.'), + sa.Column('county', sa.Text(), nullable=True, comment='County name.'), + sa.Column('state_id_fips', sa.Text(), nullable=True, comment='Two digit state FIPS code.'), + sa.Column('county_id_fips', sa.Text(), nullable=True, comment='County ID from the Federal Information Processing Standard Publication 6-4.'), + sa.ForeignKeyConstraint(['respondent_id_ferc714'], ['respondent_id_ferc714.respondent_id_ferc714'], name=op.f('fk_fipsified_respondents_ferc714_respondent_id_ferc714_respondent_id_ferc714')) ) op.create_table('generation_fuel_eia923', sa.Column('report_date', sa.Date(), nullable=False, comment='Date reported.'), sa.Column('plant_id_eia', sa.Integer(), nullable=False, comment='The unique six-digit facility identification number, also called an ORISPL, assigned by the Energy Information Administration.'), - sa.Column('energy_source_code', sa.Text(), nullable=False, comment='The fuel code associated with the fuel receipt. Two or three character alphanumeric.'), + sa.Column('energy_source_code', sa.Text(), nullable=False, comment='A 2-3 letter code indicating the energy source (e.g. fuel type) associated with the record.'), sa.Column('fuel_type_code_pudl', sa.Enum('coal', 'gas', 'hydro', 'nuclear', 'oil', 'other', 'solar', 'waste', 'wind'), nullable=True, comment='Simplified fuel type code used in PUDL'), sa.Column('fuel_type_code_aer', sa.Text(), nullable=True, comment='A partial aggregation of the reported fuel type codes into larger categories used by EIA in, for example, the Annual Energy Review (AER). Two or three letter alphanumeric.'), sa.Column('prime_mover_code', sa.Text(), nullable=False, comment='Code for the type of prime mover (e.g. CT, CG)'), @@ -715,18 +763,18 @@ def upgrade() -> None: sa.Column('fuel_consumed_for_electricity_mmbtu', sa.Float(), nullable=True, comment='Total consumption of fuel to produce electricity, in physical unit, year to date.'), sa.Column('net_generation_mwh', sa.Float(), nullable=True, comment='Net electricity generation for the specified period in megawatt-hours (MWh).'), sa.Column('data_maturity', sa.Text(), nullable=True, comment='Level of maturity of the data record. Some data sources report less-than-final data. PUDL sometimes includes this data, but use at your own risk.'), - sa.ForeignKeyConstraint(['data_maturity'], ['data_maturities.code'], ), - sa.ForeignKeyConstraint(['energy_source_code'], ['energy_sources_eia.code'], ), - sa.ForeignKeyConstraint(['fuel_type_code_aer'], ['fuel_types_aer_eia.code'], ), - sa.ForeignKeyConstraint(['plant_id_eia'], ['plants_entity_eia.plant_id_eia'], ), - sa.ForeignKeyConstraint(['prime_mover_code'], ['prime_movers_eia.code'], ), - sa.PrimaryKeyConstraint('plant_id_eia', 'report_date', 'prime_mover_code', 'energy_source_code') + sa.ForeignKeyConstraint(['data_maturity'], ['data_maturities.code'], name=op.f('fk_generation_fuel_eia923_data_maturity_data_maturities')), + sa.ForeignKeyConstraint(['energy_source_code'], ['energy_sources_eia.code'], name=op.f('fk_generation_fuel_eia923_energy_source_code_energy_sources_eia')), + sa.ForeignKeyConstraint(['fuel_type_code_aer'], ['fuel_types_aer_eia.code'], name=op.f('fk_generation_fuel_eia923_fuel_type_code_aer_fuel_types_aer_eia')), + sa.ForeignKeyConstraint(['plant_id_eia'], ['plants_entity_eia.plant_id_eia'], name=op.f('fk_generation_fuel_eia923_plant_id_eia_plants_entity_eia')), + sa.ForeignKeyConstraint(['prime_mover_code'], ['prime_movers_eia.code'], name=op.f('fk_generation_fuel_eia923_prime_mover_code_prime_movers_eia')), + sa.PrimaryKeyConstraint('plant_id_eia', 'report_date', 'prime_mover_code', 'energy_source_code', name=op.f('pk_generation_fuel_eia923')) ) op.create_table('generation_fuel_nuclear_eia923', sa.Column('plant_id_eia', sa.Integer(), nullable=False, comment='The unique six-digit facility identification number, also called an ORISPL, assigned by the Energy Information Administration.'), sa.Column('report_date', sa.Date(), nullable=False, comment='Date reported.'), sa.Column('nuclear_unit_id', sa.Text(), nullable=False, comment='For nuclear plants only, the unit number .One digit numeric. Nuclear plants are the only type of plants for which data are shown explicitly at the generating unit level.'), - sa.Column('energy_source_code', sa.Text(), nullable=False, comment='The fuel code associated with the fuel receipt. Two or three character alphanumeric.'), + sa.Column('energy_source_code', sa.Text(), nullable=False, comment='A 2-3 letter code indicating the energy source (e.g. fuel type) associated with the record.'), sa.Column('fuel_type_code_pudl', sa.Enum('coal', 'gas', 'hydro', 'nuclear', 'oil', 'other', 'solar', 'waste', 'wind'), nullable=True, comment='Simplified fuel type code used in PUDL'), sa.Column('fuel_type_code_aer', sa.Text(), nullable=True, comment='A partial aggregation of the reported fuel type codes into larger categories used by EIA in, for example, the Annual Energy Review (AER). Two or three letter alphanumeric.'), sa.Column('prime_mover_code', sa.Text(), nullable=False, comment='Code for the type of prime mover (e.g. CT, CG)'), @@ -737,12 +785,12 @@ def upgrade() -> None: sa.Column('fuel_consumed_for_electricity_mmbtu', sa.Float(), nullable=True, comment='Total consumption of fuel to produce electricity, in physical unit, year to date.'), sa.Column('net_generation_mwh', sa.Float(), nullable=True, comment='Net electricity generation for the specified period in megawatt-hours (MWh).'), sa.Column('data_maturity', sa.Text(), nullable=True, comment='Level of maturity of the data record. Some data sources report less-than-final data. PUDL sometimes includes this data, but use at your own risk.'), - sa.ForeignKeyConstraint(['data_maturity'], ['data_maturities.code'], ), - sa.ForeignKeyConstraint(['energy_source_code'], ['energy_sources_eia.code'], ), - sa.ForeignKeyConstraint(['fuel_type_code_aer'], ['fuel_types_aer_eia.code'], ), - sa.ForeignKeyConstraint(['plant_id_eia'], ['plants_entity_eia.plant_id_eia'], ), - sa.ForeignKeyConstraint(['prime_mover_code'], ['prime_movers_eia.code'], ), - sa.PrimaryKeyConstraint('plant_id_eia', 'report_date', 'nuclear_unit_id', 'energy_source_code', 'prime_mover_code') + sa.ForeignKeyConstraint(['data_maturity'], ['data_maturities.code'], name=op.f('fk_generation_fuel_nuclear_eia923_data_maturity_data_maturities')), + sa.ForeignKeyConstraint(['energy_source_code'], ['energy_sources_eia.code'], name=op.f('fk_generation_fuel_nuclear_eia923_energy_source_code_energy_sources_eia')), + sa.ForeignKeyConstraint(['fuel_type_code_aer'], ['fuel_types_aer_eia.code'], name=op.f('fk_generation_fuel_nuclear_eia923_fuel_type_code_aer_fuel_types_aer_eia')), + sa.ForeignKeyConstraint(['plant_id_eia'], ['plants_entity_eia.plant_id_eia'], name=op.f('fk_generation_fuel_nuclear_eia923_plant_id_eia_plants_entity_eia')), + sa.ForeignKeyConstraint(['prime_mover_code'], ['prime_movers_eia.code'], name=op.f('fk_generation_fuel_nuclear_eia923_prime_mover_code_prime_movers_eia')), + sa.PrimaryKeyConstraint('plant_id_eia', 'report_date', 'nuclear_unit_id', 'energy_source_code', 'prime_mover_code', name=op.f('pk_generation_fuel_nuclear_eia923')) ) op.create_table('generators_entity_eia', sa.Column('plant_id_eia', sa.Integer(), nullable=False, comment='The unique six-digit facility identification number, also called an ORISPL, assigned by the Energy Information Administration.'), @@ -765,8 +813,8 @@ def upgrade() -> None: sa.Column('original_planned_generator_operating_date', sa.Date(), nullable=True, comment='The date the generator was originally scheduled to be operational'), sa.Column('operating_switch', sa.Text(), nullable=True, comment='Indicates whether the fuel switching generator can switch when operating'), sa.Column('previously_canceled', sa.Boolean(), nullable=True, comment='Indicates whether the generator was previously reported as indefinitely postponed or canceled'), - sa.ForeignKeyConstraint(['plant_id_eia'], ['plants_entity_eia.plant_id_eia'], ), - sa.PrimaryKeyConstraint('plant_id_eia', 'generator_id') + sa.ForeignKeyConstraint(['plant_id_eia'], ['plants_entity_eia.plant_id_eia'], name=op.f('fk_generators_entity_eia_plant_id_eia_plants_entity_eia')), + sa.PrimaryKeyConstraint('plant_id_eia', 'generator_id', name=op.f('pk_generators_entity_eia')) ) op.create_table('green_pricing_eia861', sa.Column('customer_class', sa.Enum('commercial', 'industrial', 'direct_connection', 'other', 'residential', 'total', 'transportation'), nullable=True, comment='High level categorization of customer type.'), @@ -780,7 +828,17 @@ def upgrade() -> None: sa.Column('utility_id_eia', sa.Integer(), nullable=True, comment='The EIA Utility Identification number.'), sa.Column('utility_name_eia', sa.Text(), nullable=True, comment='The name of the utility.'), sa.Column('data_maturity', sa.Text(), nullable=True, comment='Level of maturity of the data record. Some data sources report less-than-final data. PUDL sometimes includes this data, but use at your own risk.'), - sa.ForeignKeyConstraint(['data_maturity'], ['data_maturities.code'], ) + sa.ForeignKeyConstraint(['data_maturity'], ['data_maturities.code'], name=op.f('fk_green_pricing_eia861_data_maturity_data_maturities')) + ) + op.create_table('heat_rate_by_unit_monthly', + sa.Column('report_date', sa.Date(), nullable=False, comment='Date reported.'), + sa.Column('plant_id_eia', sa.Integer(), nullable=False, comment='The unique six-digit facility identification number, also called an ORISPL, assigned by the Energy Information Administration.'), + sa.Column('unit_id_pudl', sa.Integer(), nullable=False, comment='Dynamically assigned PUDL unit id. WARNING: This ID is not guaranteed to be static long term as the input data and algorithm may evolve over time.'), + sa.Column('net_generation_mwh', sa.Float(), nullable=True, comment='Net electricity generation for the specified period in megawatt-hours (MWh).'), + sa.Column('fuel_consumed_for_electricity_mmbtu', sa.Float(), nullable=True, comment='Total consumption of fuel to produce electricity, in physical unit, year to date.'), + sa.Column('heat_rate_mmbtu_mwh', sa.Float(), nullable=True, comment='Fuel content per unit of electricity generated. Coming from MCOE calculation.'), + sa.ForeignKeyConstraint(['plant_id_eia'], ['plants_entity_eia.plant_id_eia'], name=op.f('fk_heat_rate_by_unit_monthly_plant_id_eia_plants_entity_eia')), + sa.PrimaryKeyConstraint('report_date', 'plant_id_eia', 'unit_id_pudl', name=op.f('pk_heat_rate_by_unit_monthly')) ) op.create_table('mergers_eia861', sa.Column('entity_type', sa.Text(), nullable=True, comment='Entity type of principal owner.'), @@ -797,7 +855,7 @@ def upgrade() -> None: sa.Column('zip_code', sa.Text(), nullable=True, comment='Five digit US Zip Code.'), sa.Column('zip_code_4', sa.Text(), nullable=True, comment='Four digit US Zip Code suffix.'), sa.Column('data_maturity', sa.Text(), nullable=True, comment='Level of maturity of the data record. Some data sources report less-than-final data. PUDL sometimes includes this data, but use at your own risk.'), - sa.ForeignKeyConstraint(['data_maturity'], ['data_maturities.code'], ) + sa.ForeignKeyConstraint(['data_maturity'], ['data_maturities.code'], name=op.f('fk_mergers_eia861_data_maturity_data_maturities')) ) op.create_table('net_metering_customer_fuel_class_eia861', sa.Column('balancing_authority_code_eia', sa.Text(), nullable=True, comment='EIA short code identifying a balancing authority.'), @@ -813,7 +871,7 @@ def upgrade() -> None: sa.Column('utility_id_eia', sa.Integer(), nullable=True, comment='The EIA Utility Identification number.'), sa.Column('utility_name_eia', sa.Text(), nullable=True, comment='The name of the utility.'), sa.Column('data_maturity', sa.Text(), nullable=True, comment='Level of maturity of the data record. Some data sources report less-than-final data. PUDL sometimes includes this data, but use at your own risk.'), - sa.ForeignKeyConstraint(['data_maturity'], ['data_maturities.code'], ) + sa.ForeignKeyConstraint(['data_maturity'], ['data_maturities.code'], name=op.f('fk_net_metering_customer_fuel_class_eia861_data_maturity_data_maturities')) ) op.create_table('net_metering_misc_eia861', sa.Column('balancing_authority_code_eia', sa.Text(), nullable=True, comment='EIA short code identifying a balancing authority.'), @@ -822,7 +880,7 @@ def upgrade() -> None: sa.Column('state', sa.Text(), nullable=True, comment='Two letter US state abbreviation.'), sa.Column('utility_id_eia', sa.Integer(), nullable=True, comment='The EIA Utility Identification number.'), sa.Column('data_maturity', sa.Text(), nullable=True, comment='Level of maturity of the data record. Some data sources report less-than-final data. PUDL sometimes includes this data, but use at your own risk.'), - sa.ForeignKeyConstraint(['data_maturity'], ['data_maturities.code'], ) + sa.ForeignKeyConstraint(['data_maturity'], ['data_maturities.code'], name=op.f('fk_net_metering_misc_eia861_data_maturity_data_maturities')) ) op.create_table('non_net_metering_customer_fuel_class_eia861', sa.Column('balancing_authority_code_eia', sa.Text(), nullable=True, comment='EIA short code identifying a balancing authority.'), @@ -834,7 +892,7 @@ def upgrade() -> None: sa.Column('utility_id_eia', sa.Integer(), nullable=True, comment='The EIA Utility Identification number.'), sa.Column('utility_name_eia', sa.Text(), nullable=True, comment='The name of the utility.'), sa.Column('data_maturity', sa.Text(), nullable=True, comment='Level of maturity of the data record. Some data sources report less-than-final data. PUDL sometimes includes this data, but use at your own risk.'), - sa.ForeignKeyConstraint(['data_maturity'], ['data_maturities.code'], ) + sa.ForeignKeyConstraint(['data_maturity'], ['data_maturities.code'], name=op.f('fk_non_net_metering_customer_fuel_class_eia861_data_maturity_data_maturities')) ) op.create_table('non_net_metering_misc_eia861', sa.Column('backup_capacity_mw', sa.Float(), nullable=True), @@ -846,7 +904,7 @@ def upgrade() -> None: sa.Column('utility_id_eia', sa.Integer(), nullable=True, comment='The EIA Utility Identification number.'), sa.Column('utility_owned_capacity_mw', sa.Float(), nullable=True), sa.Column('data_maturity', sa.Text(), nullable=True, comment='Level of maturity of the data record. Some data sources report less-than-final data. PUDL sometimes includes this data, but use at your own risk.'), - sa.ForeignKeyConstraint(['data_maturity'], ['data_maturities.code'], ) + sa.ForeignKeyConstraint(['data_maturity'], ['data_maturities.code'], name=op.f('fk_non_net_metering_misc_eia861_data_maturity_data_maturities')) ) op.create_table('operational_data_misc_eia861', sa.Column('consumed_by_facility_mwh', sa.Float(), nullable=True), @@ -877,7 +935,7 @@ def upgrade() -> None: sa.Column('wholesale_power_purchases_mwh', sa.Float(), nullable=True), sa.Column('winter_peak_demand_mw', sa.Float(), nullable=True), sa.Column('data_maturity', sa.Text(), nullable=True, comment='Level of maturity of the data record. Some data sources report less-than-final data. PUDL sometimes includes this data, but use at your own risk.'), - sa.ForeignKeyConstraint(['data_maturity'], ['data_maturities.code'], ) + sa.ForeignKeyConstraint(['data_maturity'], ['data_maturities.code'], name=op.f('fk_operational_data_misc_eia861_data_maturity_data_maturities')) ) op.create_table('operational_data_revenue_eia861', sa.Column('nerc_region', sa.Enum('BASN', 'CALN', 'CALS', 'DSW', 'ASCC', 'ISONE', 'ERCOT', 'NORW', 'NYISO', 'PJM', 'ROCK', 'ECAR', 'FRCC', 'HICC', 'MAAC', 'MAIN', 'MAPP', 'MRO', 'NPCC', 'RFC', 'SERC', 'SPP', 'TRE', 'WECC', 'WSCC', 'MISO', 'ECAR_MAAC', 'MAPP_WECC', 'RFC_SERC', 'SPP_WECC', 'MRO_WECC', 'ERCOT_SPP', 'SPP_TRE', 'ERCOT_TRE', 'MISO_TRE', 'VI', 'GU', 'PR', 'AS', 'UNK'), nullable=True, comment='NERC region in which the plant is located'), @@ -887,14 +945,14 @@ def upgrade() -> None: sa.Column('state', sa.Text(), nullable=True, comment='Two letter US state abbreviation.'), sa.Column('utility_id_eia', sa.Integer(), nullable=True, comment='The EIA Utility Identification number.'), sa.Column('data_maturity', sa.Text(), nullable=True, comment='Level of maturity of the data record. Some data sources report less-than-final data. PUDL sometimes includes this data, but use at your own risk.'), - sa.ForeignKeyConstraint(['data_maturity'], ['data_maturities.code'], ) + sa.ForeignKeyConstraint(['data_maturity'], ['data_maturities.code'], name=op.f('fk_operational_data_revenue_eia861_data_maturity_data_maturities')) ) op.create_table('plants_eia', sa.Column('plant_id_eia', sa.Integer(), nullable=False, comment='The unique six-digit facility identification number, also called an ORISPL, assigned by the Energy Information Administration.'), sa.Column('plant_name_eia', sa.Text(), nullable=True, comment='Plant name.'), sa.Column('plant_id_pudl', sa.Integer(), nullable=True, comment='A manually assigned PUDL plant ID. May not be constant over time.'), - sa.ForeignKeyConstraint(['plant_id_pudl'], ['plants_pudl.plant_id_pudl'], ), - sa.PrimaryKeyConstraint('plant_id_eia') + sa.ForeignKeyConstraint(['plant_id_pudl'], ['plants_pudl.plant_id_pudl'], name=op.f('fk_plants_eia_plant_id_pudl_plants_pudl')), + sa.PrimaryKeyConstraint('plant_id_eia', name=op.f('pk_plants_eia')) ) op.create_table('reliability_eia861', sa.Column('caidi_w_major_event_days_minus_loss_of_service_minutes', sa.Float(), nullable=True), @@ -919,8 +977,8 @@ def upgrade() -> None: sa.Column('utility_id_eia', sa.Integer(), nullable=True, comment='The EIA Utility Identification number.'), sa.Column('utility_name_eia', sa.Text(), nullable=True, comment='The name of the utility.'), sa.Column('data_maturity', sa.Text(), nullable=True, comment='Level of maturity of the data record. Some data sources report less-than-final data. PUDL sometimes includes this data, but use at your own risk.'), - sa.ForeignKeyConstraint(['data_maturity'], ['data_maturities.code'], ), - sa.ForeignKeyConstraint(['momentary_interruption_definition'], ['momentary_interruptions_eia.code'], ) + sa.ForeignKeyConstraint(['data_maturity'], ['data_maturities.code'], name=op.f('fk_reliability_eia861_data_maturity_data_maturities')), + sa.ForeignKeyConstraint(['momentary_interruption_definition'], ['momentary_interruptions_eia.code'], name=op.f('fk_reliability_eia861_momentary_interruption_definition_momentary_interruptions_eia')) ) op.create_table('sales_eia861', sa.Column('utility_id_eia', sa.Integer(), nullable=False, comment='The EIA Utility Identification number.'), @@ -938,8 +996,8 @@ def upgrade() -> None: sa.Column('sales_mwh', sa.Float(), nullable=True, comment='Quantity of electricity sold in MWh.'), sa.Column('sales_revenue', sa.Float(), nullable=True, comment='Revenue from electricity sold.'), sa.Column('data_maturity', sa.Text(), nullable=True, comment='Level of maturity of the data record. Some data sources report less-than-final data. PUDL sometimes includes this data, but use at your own risk.'), - sa.ForeignKeyConstraint(['data_maturity'], ['data_maturities.code'], ), - sa.PrimaryKeyConstraint('utility_id_eia', 'state', 'report_date', 'balancing_authority_code_eia', 'customer_class', 'business_model', 'service_type') + sa.ForeignKeyConstraint(['data_maturity'], ['data_maturities.code'], name=op.f('fk_sales_eia861_data_maturity_data_maturities')), + sa.PrimaryKeyConstraint('utility_id_eia', 'state', 'report_date', 'balancing_authority_code_eia', 'customer_class', 'business_model', 'service_type', name=op.f('pk_sales_eia861')) ) op.create_table('service_territory_eia861', sa.Column('county', sa.Text(), nullable=True, comment='County name.'), @@ -951,15 +1009,35 @@ def upgrade() -> None: sa.Column('state_id_fips', sa.Text(), nullable=True, comment='Two digit state FIPS code.'), sa.Column('county_id_fips', sa.Text(), nullable=False, comment='County ID from the Federal Information Processing Standard Publication 6-4.'), sa.Column('data_maturity', sa.Text(), nullable=True, comment='Level of maturity of the data record. Some data sources report less-than-final data. PUDL sometimes includes this data, but use at your own risk.'), - sa.ForeignKeyConstraint(['data_maturity'], ['data_maturities.code'], ), - sa.PrimaryKeyConstraint('report_date', 'utility_id_eia', 'county_id_fips') + sa.ForeignKeyConstraint(['data_maturity'], ['data_maturities.code'], name=op.f('fk_service_territory_eia861_data_maturity_data_maturities')), + sa.PrimaryKeyConstraint('report_date', 'utility_id_eia', 'county_id_fips', name=op.f('pk_service_territory_eia861')) + ) + op.create_table('summarized_demand_ferc714', + sa.Column('report_date', sa.Date(), nullable=False, comment='Date reported.'), + sa.Column('respondent_id_ferc714', sa.Integer(), nullable=False), + sa.Column('demand_annual_mwh', sa.Float(), nullable=True), + sa.Column('population', sa.Float(), nullable=True, comment='County population, sourced from Census DP1 data.'), + sa.Column('area_km2', sa.Float(), nullable=True, comment='County area in km2.'), + sa.Column('population_density_km2', sa.Float(), nullable=True, comment='Average population per sq. km area of a service territory.'), + sa.Column('demand_annual_per_capita_mwh', sa.Float(), nullable=True, comment='Per-capita annual demand, averaged using Census county-level population estimates.'), + sa.Column('demand_density_mwh_km2', sa.Float(), nullable=True, comment='Annual demand per km2 of a given service territory.'), + sa.Column('eia_code', sa.Integer(), nullable=True), + sa.Column('respondent_type', sa.Enum('utility', 'balancing_authority'), nullable=True), + sa.Column('respondent_name_ferc714', sa.Text(), nullable=True), + sa.Column('balancing_authority_id_eia', sa.Integer(), nullable=True, comment='EIA balancing authority ID. This is often (but not always!) the same as the utility ID associated with the same legal entity.'), + sa.Column('balancing_authority_code_eia', sa.Text(), nullable=True, comment='EIA short code identifying a balancing authority.'), + sa.Column('balancing_authority_name_eia', sa.Text(), nullable=True, comment='Name of the balancing authority.'), + sa.Column('utility_id_eia', sa.Integer(), nullable=True, comment='The EIA Utility Identification number.'), + sa.Column('utility_name_eia', sa.Text(), nullable=True, comment='The name of the utility.'), + sa.ForeignKeyConstraint(['respondent_id_ferc714'], ['respondent_id_ferc714.respondent_id_ferc714'], name=op.f('fk_summarized_demand_ferc714_respondent_id_ferc714_respondent_id_ferc714')), + sa.PrimaryKeyConstraint('respondent_id_ferc714', 'report_date', name=op.f('pk_summarized_demand_ferc714')) ) op.create_table('utilities_eia', sa.Column('utility_id_eia', sa.Integer(), nullable=False, comment='The EIA Utility Identification number.'), sa.Column('utility_name_eia', sa.Text(), nullable=True, comment='The name of the utility.'), sa.Column('utility_id_pudl', sa.Integer(), nullable=True, comment='A manually assigned PUDL utility ID. May not be stable over time.'), - sa.ForeignKeyConstraint(['utility_id_pudl'], ['utilities_pudl.utility_id_pudl'], ), - sa.PrimaryKeyConstraint('utility_id_eia') + sa.ForeignKeyConstraint(['utility_id_pudl'], ['utilities_pudl.utility_id_pudl'], name=op.f('fk_utilities_eia_utility_id_pudl_utilities_pudl')), + sa.PrimaryKeyConstraint('utility_id_eia', name=op.f('pk_utilities_eia')) ) op.create_table('utilities_eia860', sa.Column('utility_id_eia', sa.Integer(), nullable=False, comment='The EIA Utility Identification number.'), @@ -987,16 +1065,16 @@ def upgrade() -> None: sa.Column('phone_number_2', sa.Text(), nullable=True, comment='Phone number for utility contact 2.'), sa.Column('phone_extension_2', sa.Text(), nullable=True, comment='Phone extension for utility contact 2'), sa.Column('data_maturity', sa.Text(), nullable=True, comment='Level of maturity of the data record. Some data sources report less-than-final data. PUDL sometimes includes this data, but use at your own risk.'), - sa.ForeignKeyConstraint(['data_maturity'], ['data_maturities.code'], ), - sa.ForeignKeyConstraint(['utility_id_eia'], ['utilities_entity_eia.utility_id_eia'], ), - sa.PrimaryKeyConstraint('utility_id_eia', 'report_date') + sa.ForeignKeyConstraint(['data_maturity'], ['data_maturities.code'], name=op.f('fk_utilities_eia860_data_maturity_data_maturities')), + sa.ForeignKeyConstraint(['utility_id_eia'], ['utilities_entity_eia.utility_id_eia'], name=op.f('fk_utilities_eia860_utility_id_eia_utilities_entity_eia')), + sa.PrimaryKeyConstraint('utility_id_eia', 'report_date', name=op.f('pk_utilities_eia860')) ) op.create_table('utilities_ferc1', sa.Column('utility_id_ferc1', sa.Integer(), nullable=False, comment='PUDL-assigned utility ID, identifying a FERC1 utility. This is an auto-incremented ID and is not expected to be stable from year to year.'), sa.Column('utility_name_ferc1', sa.Text(), nullable=True, comment='Name of the responding utility, as it is reported in FERC Form 1. For human readability only.'), sa.Column('utility_id_pudl', sa.Integer(), nullable=True, comment='A manually assigned PUDL utility ID. May not be stable over time.'), - sa.ForeignKeyConstraint(['utility_id_pudl'], ['utilities_pudl.utility_id_pudl'], ), - sa.PrimaryKeyConstraint('utility_id_ferc1') + sa.ForeignKeyConstraint(['utility_id_pudl'], ['utilities_pudl.utility_id_pudl'], name=op.f('fk_utilities_ferc1_utility_id_pudl_utilities_pudl')), + sa.PrimaryKeyConstraint('utility_id_ferc1', name=op.f('pk_utilities_ferc1')) ) op.create_table('utility_data_misc_eia861', sa.Column('alternative_fuel_vehicle_2_activity', sa.Boolean(), nullable=True), @@ -1018,7 +1096,7 @@ def upgrade() -> None: sa.Column('utility_name_eia', sa.Text(), nullable=True, comment='The name of the utility.'), sa.Column('wholesale_marketing_activity', sa.Boolean(), nullable=True), sa.Column('data_maturity', sa.Text(), nullable=True, comment='Level of maturity of the data record. Some data sources report less-than-final data. PUDL sometimes includes this data, but use at your own risk.'), - sa.ForeignKeyConstraint(['data_maturity'], ['data_maturities.code'], ) + sa.ForeignKeyConstraint(['data_maturity'], ['data_maturities.code'], name=op.f('fk_utility_data_misc_eia861_data_maturity_data_maturities')) ) op.create_table('utility_data_nerc_eia861', sa.Column('nerc_region', sa.Enum('BASN', 'CALN', 'CALS', 'DSW', 'ASCC', 'ISONE', 'ERCOT', 'NORW', 'NYISO', 'PJM', 'ROCK', 'ECAR', 'FRCC', 'HICC', 'MAAC', 'MAIN', 'MAPP', 'MRO', 'NPCC', 'RFC', 'SERC', 'SPP', 'TRE', 'WECC', 'WSCC', 'MISO', 'ECAR_MAAC', 'MAPP_WECC', 'RFC_SERC', 'SPP_WECC', 'MRO_WECC', 'ERCOT_SPP', 'SPP_TRE', 'ERCOT_TRE', 'MISO_TRE', 'VI', 'GU', 'PR', 'AS', 'UNK'), nullable=True, comment='NERC region in which the plant is located'), @@ -1027,7 +1105,7 @@ def upgrade() -> None: sa.Column('state', sa.Text(), nullable=True, comment='Two letter US state abbreviation.'), sa.Column('utility_id_eia', sa.Integer(), nullable=True, comment='The EIA Utility Identification number.'), sa.Column('data_maturity', sa.Text(), nullable=True, comment='Level of maturity of the data record. Some data sources report less-than-final data. PUDL sometimes includes this data, but use at your own risk.'), - sa.ForeignKeyConstraint(['data_maturity'], ['data_maturities.code'], ) + sa.ForeignKeyConstraint(['data_maturity'], ['data_maturities.code'], name=op.f('fk_utility_data_nerc_eia861_data_maturity_data_maturities')) ) op.create_table('utility_data_rto_eia861', sa.Column('nerc_region', sa.Enum('BASN', 'CALN', 'CALS', 'DSW', 'ASCC', 'ISONE', 'ERCOT', 'NORW', 'NYISO', 'PJM', 'ROCK', 'ECAR', 'FRCC', 'HICC', 'MAAC', 'MAIN', 'MAPP', 'MRO', 'NPCC', 'RFC', 'SERC', 'SPP', 'TRE', 'WECC', 'WSCC', 'MISO', 'ECAR_MAAC', 'MAPP_WECC', 'RFC_SERC', 'SPP_WECC', 'MRO_WECC', 'ERCOT_SPP', 'SPP_TRE', 'ERCOT_TRE', 'MISO_TRE', 'VI', 'GU', 'PR', 'AS', 'UNK'), nullable=True, comment='NERC region in which the plant is located'), @@ -1036,14 +1114,14 @@ def upgrade() -> None: sa.Column('state', sa.Text(), nullable=True, comment='Two letter US state abbreviation.'), sa.Column('utility_id_eia', sa.Integer(), nullable=True, comment='The EIA Utility Identification number.'), sa.Column('data_maturity', sa.Text(), nullable=True, comment='Level of maturity of the data record. Some data sources report less-than-final data. PUDL sometimes includes this data, but use at your own risk.'), - sa.ForeignKeyConstraint(['data_maturity'], ['data_maturities.code'], ) + sa.ForeignKeyConstraint(['data_maturity'], ['data_maturities.code'], name=op.f('fk_utility_data_rto_eia861_data_maturity_data_maturities')) ) op.create_table('utility_plant_assn', sa.Column('utility_id_pudl', sa.Integer(), nullable=False, comment='A manually assigned PUDL utility ID. May not be stable over time.'), sa.Column('plant_id_pudl', sa.Integer(), nullable=False, comment='A manually assigned PUDL plant ID. May not be constant over time.'), - sa.ForeignKeyConstraint(['plant_id_pudl'], ['plants_pudl.plant_id_pudl'], ), - sa.ForeignKeyConstraint(['utility_id_pudl'], ['utilities_pudl.utility_id_pudl'], ), - sa.PrimaryKeyConstraint('utility_id_pudl', 'plant_id_pudl') + sa.ForeignKeyConstraint(['plant_id_pudl'], ['plants_pudl.plant_id_pudl'], name=op.f('fk_utility_plant_assn_plant_id_pudl_plants_pudl')), + sa.ForeignKeyConstraint(['utility_id_pudl'], ['utilities_pudl.utility_id_pudl'], name=op.f('fk_utility_plant_assn_utility_id_pudl_utilities_pudl')), + sa.PrimaryKeyConstraint('utility_id_pudl', 'plant_id_pudl', name=op.f('pk_utility_plant_assn')) ) op.create_table('balance_sheet_assets_ferc1', sa.Column('utility_id_ferc1', sa.Integer(), nullable=False, comment='PUDL-assigned utility ID, identifying a FERC1 utility. This is an auto-incremented ID and is not expected to be stable from year to year.'), @@ -1055,8 +1133,8 @@ def upgrade() -> None: sa.Column('ferc_account', sa.Text(), nullable=True, comment="Actual FERC Account number (e.g. '359.1') if available, or a PUDL assigned ID when FERC accounts have been split or combined in reporting."), sa.Column('balance', sa.Text(), nullable=True, comment='Indication of whether a column is a credit or debit, as reported in the XBRL taxonomy.'), sa.Column('row_type_xbrl', sa.Enum('calculated_value', 'reported_value', 'correction'), nullable=True, comment='Indicates whether the value reported in the row is calculated, or uniquely reported within the table.'), - sa.ForeignKeyConstraint(['utility_id_ferc1'], ['utilities_ferc1.utility_id_ferc1'], ), - sa.PrimaryKeyConstraint('utility_id_ferc1', 'report_year', 'asset_type') + sa.ForeignKeyConstraint(['utility_id_ferc1'], ['utilities_ferc1.utility_id_ferc1'], name=op.f('fk_balance_sheet_assets_ferc1_utility_id_ferc1_utilities_ferc1')), + sa.PrimaryKeyConstraint('utility_id_ferc1', 'report_year', 'asset_type', name=op.f('pk_balance_sheet_assets_ferc1')) ) op.create_table('balance_sheet_liabilities_ferc1', sa.Column('record_id', sa.Text(), nullable=True, comment='Identifier indicating original FERC Form 1 source record. format: {table_name}_{report_year}_{report_prd}_{respondent_id}_{spplmnt_num}_{row_number}. Unique within FERC Form 1 DB tables which are not row-mapped.'), @@ -1068,13 +1146,13 @@ def upgrade() -> None: sa.Column('balance', sa.Text(), nullable=True, comment='Indication of whether a column is a credit or debit, as reported in the XBRL taxonomy.'), sa.Column('ferc_account', sa.Text(), nullable=True, comment="Actual FERC Account number (e.g. '359.1') if available, or a PUDL assigned ID when FERC accounts have been split or combined in reporting."), sa.Column('row_type_xbrl', sa.Enum('calculated_value', 'reported_value', 'correction'), nullable=True, comment='Indicates whether the value reported in the row is calculated, or uniquely reported within the table.'), - sa.ForeignKeyConstraint(['utility_id_ferc1'], ['utilities_ferc1.utility_id_ferc1'], ), - sa.PrimaryKeyConstraint('utility_id_ferc1', 'report_year', 'liability_type') + sa.ForeignKeyConstraint(['utility_id_ferc1'], ['utilities_ferc1.utility_id_ferc1'], name=op.f('fk_balance_sheet_liabilities_ferc1_utility_id_ferc1_utilities_ferc1')), + sa.PrimaryKeyConstraint('utility_id_ferc1', 'report_year', 'liability_type', name=op.f('pk_balance_sheet_liabilities_ferc1')) ) op.create_table('boiler_fuel_eia923', sa.Column('plant_id_eia', sa.Integer(), nullable=False, comment='The unique six-digit facility identification number, also called an ORISPL, assigned by the Energy Information Administration.'), sa.Column('boiler_id', sa.Text(), nullable=False, comment='Alphanumeric boiler ID.'), - sa.Column('energy_source_code', sa.Text(), nullable=False, comment='The fuel code associated with the fuel receipt. Two or three character alphanumeric.'), + sa.Column('energy_source_code', sa.Text(), nullable=False, comment='A 2-3 letter code indicating the energy source (e.g. fuel type) associated with the record.'), sa.Column('prime_mover_code', sa.Text(), nullable=False, comment='Code for the type of prime mover (e.g. CT, CG)'), sa.Column('fuel_type_code_pudl', sa.Enum('coal', 'gas', 'hydro', 'nuclear', 'oil', 'other', 'solar', 'waste', 'wind'), nullable=True, comment='Simplified fuel type code used in PUDL'), sa.Column('report_date', sa.Date(), nullable=False, comment='Date reported.'), @@ -1082,10 +1160,20 @@ def upgrade() -> None: sa.Column('fuel_mmbtu_per_unit', sa.Float(), nullable=True, comment='Heat content of the fuel in millions of Btus per physical unit.'), sa.Column('sulfur_content_pct', sa.Float(), nullable=True, comment='Sulfur content percentage by weight to the nearest 0.01 percent.'), sa.Column('ash_content_pct', sa.Float(), nullable=True, comment='Ash content percentage by weight to the nearest 0.1 percent.'), - sa.ForeignKeyConstraint(['energy_source_code'], ['energy_sources_eia.code'], ), - sa.ForeignKeyConstraint(['plant_id_eia', 'boiler_id'], ['boilers_entity_eia.plant_id_eia', 'boilers_entity_eia.boiler_id'], ), - sa.ForeignKeyConstraint(['prime_mover_code'], ['prime_movers_eia.code'], ), - sa.PrimaryKeyConstraint('plant_id_eia', 'boiler_id', 'energy_source_code', 'prime_mover_code', 'report_date') + sa.ForeignKeyConstraint(['energy_source_code'], ['energy_sources_eia.code'], name=op.f('fk_boiler_fuel_eia923_energy_source_code_energy_sources_eia')), + sa.ForeignKeyConstraint(['plant_id_eia', 'boiler_id'], ['boilers_entity_eia.plant_id_eia', 'boilers_entity_eia.boiler_id'], name=op.f('fk_boiler_fuel_eia923_plant_id_eia_boilers_entity_eia')), + sa.ForeignKeyConstraint(['prime_mover_code'], ['prime_movers_eia.code'], name=op.f('fk_boiler_fuel_eia923_prime_mover_code_prime_movers_eia')), + sa.PrimaryKeyConstraint('plant_id_eia', 'boiler_id', 'energy_source_code', 'prime_mover_code', 'report_date', name=op.f('pk_boiler_fuel_eia923')) + ) + op.create_table('capacity_factor_by_generator_monthly', + sa.Column('report_date', sa.Date(), nullable=False, comment='Date reported.'), + sa.Column('plant_id_eia', sa.Integer(), nullable=False, comment='The unique six-digit facility identification number, also called an ORISPL, assigned by the Energy Information Administration.'), + sa.Column('generator_id', sa.Text(), nullable=False, comment='Generator ID is usually numeric, but sometimes includes letters. Make sure you treat it as a string!'), + sa.Column('net_generation_mwh', sa.Float(), nullable=True, comment='Net electricity generation for the specified period in megawatt-hours (MWh).'), + sa.Column('capacity_mw', sa.Float(), nullable=True, comment='Total installed (nameplate) capacity, in megawatts.'), + sa.Column('capacity_factor', sa.Float(), nullable=True, comment='Fraction of potential generation that was actually reported for a plant part.'), + sa.ForeignKeyConstraint(['plant_id_eia', 'generator_id'], ['generators_entity_eia.plant_id_eia', 'generators_entity_eia.generator_id'], name=op.f('fk_capacity_factor_by_generator_monthly_plant_id_eia_generators_entity_eia')), + sa.PrimaryKeyConstraint('report_date', 'plant_id_eia', 'generator_id', name=op.f('pk_capacity_factor_by_generator_monthly')) ) op.create_table('cash_flow_ferc1', sa.Column('record_id', sa.Text(), nullable=True, comment='Identifier indicating original FERC Form 1 source record. format: {table_name}_{report_year}_{report_prd}_{respondent_id}_{spplmnt_num}_{row_number}. Unique within FERC Form 1 DB tables which are not row-mapped.'), @@ -1095,8 +1183,8 @@ def upgrade() -> None: sa.Column('amount', sa.Float(), nullable=True, comment='Reported amount of dollars. This could be a balance or a change in value.'), sa.Column('balance', sa.Text(), nullable=True, comment='Indication of whether a column is a credit or debit, as reported in the XBRL taxonomy.'), sa.Column('row_type_xbrl', sa.Enum('calculated_value', 'reported_value', 'correction'), nullable=True, comment='Indicates whether the value reported in the row is calculated, or uniquely reported within the table.'), - sa.ForeignKeyConstraint(['utility_id_ferc1'], ['utilities_ferc1.utility_id_ferc1'], ), - sa.PrimaryKeyConstraint('utility_id_ferc1', 'report_year', 'amount_type') + sa.ForeignKeyConstraint(['utility_id_ferc1'], ['utilities_ferc1.utility_id_ferc1'], name=op.f('fk_cash_flow_ferc1_utility_id_ferc1_utilities_ferc1')), + sa.PrimaryKeyConstraint('utility_id_ferc1', 'report_year', 'amount_type', name=op.f('pk_cash_flow_ferc1')) ) op.create_table('denorm_balance_sheet_assets_ferc1', sa.Column('report_year', sa.Integer(), nullable=False, comment='Four-digit year in which the data was reported.'), @@ -1110,9 +1198,9 @@ def upgrade() -> None: sa.Column('ferc_account', sa.Text(), nullable=True, comment="Actual FERC Account number (e.g. '359.1') if available, or a PUDL assigned ID when FERC accounts have been split or combined in reporting."), sa.Column('row_type_xbrl', sa.Enum('calculated_value', 'reported_value', 'correction'), nullable=True, comment='Indicates whether the value reported in the row is calculated, or uniquely reported within the table.'), sa.Column('starting_balance', sa.Float(), nullable=True, comment='Account balance at beginning of year.'), - sa.ForeignKeyConstraint(['utility_id_ferc1'], ['utilities_ferc1.utility_id_ferc1'], ), - sa.ForeignKeyConstraint(['utility_id_pudl'], ['utilities_pudl.utility_id_pudl'], ), - sa.PrimaryKeyConstraint('utility_id_ferc1', 'report_year', 'asset_type') + sa.ForeignKeyConstraint(['utility_id_ferc1'], ['utilities_ferc1.utility_id_ferc1'], name=op.f('fk_denorm_balance_sheet_assets_ferc1_utility_id_ferc1_utilities_ferc1')), + sa.ForeignKeyConstraint(['utility_id_pudl'], ['utilities_pudl.utility_id_pudl'], name=op.f('fk_denorm_balance_sheet_assets_ferc1_utility_id_pudl_utilities_pudl')), + sa.PrimaryKeyConstraint('utility_id_ferc1', 'report_year', 'asset_type', name=op.f('pk_denorm_balance_sheet_assets_ferc1')) ) op.create_table('denorm_balance_sheet_liabilities_ferc1', sa.Column('record_id', sa.Text(), nullable=True, comment='Identifier indicating original FERC Form 1 source record. format: {table_name}_{report_year}_{report_prd}_{respondent_id}_{spplmnt_num}_{row_number}. Unique within FERC Form 1 DB tables which are not row-mapped.'), @@ -1126,9 +1214,9 @@ def upgrade() -> None: sa.Column('balance', sa.Text(), nullable=True, comment='Indication of whether a column is a credit or debit, as reported in the XBRL taxonomy.'), sa.Column('ferc_account', sa.Text(), nullable=True, comment="Actual FERC Account number (e.g. '359.1') if available, or a PUDL assigned ID when FERC accounts have been split or combined in reporting."), sa.Column('row_type_xbrl', sa.Enum('calculated_value', 'reported_value', 'correction'), nullable=True, comment='Indicates whether the value reported in the row is calculated, or uniquely reported within the table.'), - sa.ForeignKeyConstraint(['utility_id_ferc1'], ['utilities_ferc1.utility_id_ferc1'], ), - sa.ForeignKeyConstraint(['utility_id_pudl'], ['utilities_pudl.utility_id_pudl'], ), - sa.PrimaryKeyConstraint('utility_id_ferc1', 'report_year', 'liability_type') + sa.ForeignKeyConstraint(['utility_id_ferc1'], ['utilities_ferc1.utility_id_ferc1'], name=op.f('fk_denorm_balance_sheet_liabilities_ferc1_utility_id_ferc1_utilities_ferc1')), + sa.ForeignKeyConstraint(['utility_id_pudl'], ['utilities_pudl.utility_id_pudl'], name=op.f('fk_denorm_balance_sheet_liabilities_ferc1_utility_id_pudl_utilities_pudl')), + sa.PrimaryKeyConstraint('utility_id_ferc1', 'report_year', 'liability_type', name=op.f('pk_denorm_balance_sheet_liabilities_ferc1')) ) op.create_table('denorm_boiler_fuel_eia923', sa.Column('report_date', sa.Date(), nullable=False, comment='Date reported.'), @@ -1140,7 +1228,7 @@ def upgrade() -> None: sa.Column('utility_name_eia', sa.Text(), nullable=True, comment='The name of the utility.'), sa.Column('boiler_id', sa.Text(), nullable=False, comment='Alphanumeric boiler ID.'), sa.Column('unit_id_pudl', sa.Integer(), nullable=True, comment='Dynamically assigned PUDL unit id. WARNING: This ID is not guaranteed to be static long term as the input data and algorithm may evolve over time.'), - sa.Column('energy_source_code', sa.Text(), nullable=False, comment='The fuel code associated with the fuel receipt. Two or three character alphanumeric.'), + sa.Column('energy_source_code', sa.Text(), nullable=False, comment='A 2-3 letter code indicating the energy source (e.g. fuel type) associated with the record.'), sa.Column('prime_mover_code', sa.Text(), nullable=False, comment='Code for the type of prime mover (e.g. CT, CG)'), sa.Column('fuel_type_code_pudl', sa.Enum('coal', 'gas', 'hydro', 'nuclear', 'oil', 'other', 'solar', 'waste', 'wind'), nullable=True, comment='Simplified fuel type code used in PUDL'), sa.Column('fuel_consumed_units', sa.Float(), nullable=True, comment='Consumption of the fuel type in physical unit. Note: this is the total quantity consumed for both electricity and, in the case of combined heat and power plants, process steam production.'), @@ -1148,13 +1236,13 @@ def upgrade() -> None: sa.Column('fuel_consumed_mmbtu', sa.Float(), nullable=True, comment='Total consumption of fuel in physical unit, year to date. Note: this is the total quantity consumed for both electricity and, in the case of combined heat and power plants, process steam production.'), sa.Column('sulfur_content_pct', sa.Float(), nullable=True, comment='Sulfur content percentage by weight to the nearest 0.01 percent.'), sa.Column('ash_content_pct', sa.Float(), nullable=True, comment='Ash content percentage by weight to the nearest 0.1 percent.'), - sa.ForeignKeyConstraint(['energy_source_code'], ['energy_sources_eia.code'], ), - sa.ForeignKeyConstraint(['plant_id_eia', 'boiler_id'], ['boilers_entity_eia.plant_id_eia', 'boilers_entity_eia.boiler_id'], ), - sa.ForeignKeyConstraint(['plant_id_pudl'], ['plants_pudl.plant_id_pudl'], ), - sa.ForeignKeyConstraint(['prime_mover_code'], ['prime_movers_eia.code'], ), - sa.ForeignKeyConstraint(['utility_id_eia'], ['utilities_entity_eia.utility_id_eia'], ), - sa.ForeignKeyConstraint(['utility_id_pudl'], ['utilities_pudl.utility_id_pudl'], ), - sa.PrimaryKeyConstraint('plant_id_eia', 'boiler_id', 'energy_source_code', 'prime_mover_code', 'report_date') + sa.ForeignKeyConstraint(['energy_source_code'], ['energy_sources_eia.code'], name=op.f('fk_denorm_boiler_fuel_eia923_energy_source_code_energy_sources_eia')), + sa.ForeignKeyConstraint(['plant_id_eia', 'boiler_id'], ['boilers_entity_eia.plant_id_eia', 'boilers_entity_eia.boiler_id'], name=op.f('fk_denorm_boiler_fuel_eia923_plant_id_eia_boilers_entity_eia')), + sa.ForeignKeyConstraint(['plant_id_pudl'], ['plants_pudl.plant_id_pudl'], name=op.f('fk_denorm_boiler_fuel_eia923_plant_id_pudl_plants_pudl')), + sa.ForeignKeyConstraint(['prime_mover_code'], ['prime_movers_eia.code'], name=op.f('fk_denorm_boiler_fuel_eia923_prime_mover_code_prime_movers_eia')), + sa.ForeignKeyConstraint(['utility_id_eia'], ['utilities_entity_eia.utility_id_eia'], name=op.f('fk_denorm_boiler_fuel_eia923_utility_id_eia_utilities_entity_eia')), + sa.ForeignKeyConstraint(['utility_id_pudl'], ['utilities_pudl.utility_id_pudl'], name=op.f('fk_denorm_boiler_fuel_eia923_utility_id_pudl_utilities_pudl')), + sa.PrimaryKeyConstraint('plant_id_eia', 'boiler_id', 'energy_source_code', 'prime_mover_code', 'report_date', name=op.f('pk_denorm_boiler_fuel_eia923')) ) op.create_table('denorm_boiler_fuel_monthly_eia923', sa.Column('report_date', sa.Date(), nullable=False, comment='Date reported.'), @@ -1166,7 +1254,7 @@ def upgrade() -> None: sa.Column('utility_name_eia', sa.Text(), nullable=True, comment='The name of the utility.'), sa.Column('boiler_id', sa.Text(), nullable=False, comment='Alphanumeric boiler ID.'), sa.Column('unit_id_pudl', sa.Integer(), nullable=True, comment='Dynamically assigned PUDL unit id. WARNING: This ID is not guaranteed to be static long term as the input data and algorithm may evolve over time.'), - sa.Column('energy_source_code', sa.Text(), nullable=False, comment='The fuel code associated with the fuel receipt. Two or three character alphanumeric.'), + sa.Column('energy_source_code', sa.Text(), nullable=False, comment='A 2-3 letter code indicating the energy source (e.g. fuel type) associated with the record.'), sa.Column('prime_mover_code', sa.Text(), nullable=False, comment='Code for the type of prime mover (e.g. CT, CG)'), sa.Column('fuel_type_code_pudl', sa.Enum('coal', 'gas', 'hydro', 'nuclear', 'oil', 'other', 'solar', 'waste', 'wind'), nullable=True, comment='Simplified fuel type code used in PUDL'), sa.Column('fuel_consumed_units', sa.Float(), nullable=True, comment='Consumption of the fuel type in physical unit. Note: this is the total quantity consumed for both electricity and, in the case of combined heat and power plants, process steam production.'), @@ -1174,13 +1262,13 @@ def upgrade() -> None: sa.Column('fuel_consumed_mmbtu', sa.Float(), nullable=True, comment='Total consumption of fuel in physical unit, year to date. Note: this is the total quantity consumed for both electricity and, in the case of combined heat and power plants, process steam production.'), sa.Column('sulfur_content_pct', sa.Float(), nullable=True, comment='Sulfur content percentage by weight to the nearest 0.01 percent.'), sa.Column('ash_content_pct', sa.Float(), nullable=True, comment='Ash content percentage by weight to the nearest 0.1 percent.'), - sa.ForeignKeyConstraint(['energy_source_code'], ['energy_sources_eia.code'], ), - sa.ForeignKeyConstraint(['plant_id_eia', 'boiler_id'], ['boilers_entity_eia.plant_id_eia', 'boilers_entity_eia.boiler_id'], ), - sa.ForeignKeyConstraint(['plant_id_pudl'], ['plants_pudl.plant_id_pudl'], ), - sa.ForeignKeyConstraint(['prime_mover_code'], ['prime_movers_eia.code'], ), - sa.ForeignKeyConstraint(['utility_id_eia'], ['utilities_entity_eia.utility_id_eia'], ), - sa.ForeignKeyConstraint(['utility_id_pudl'], ['utilities_pudl.utility_id_pudl'], ), - sa.PrimaryKeyConstraint('plant_id_eia', 'boiler_id', 'energy_source_code', 'prime_mover_code', 'report_date') + sa.ForeignKeyConstraint(['energy_source_code'], ['energy_sources_eia.code'], name=op.f('fk_denorm_boiler_fuel_monthly_eia923_energy_source_code_energy_sources_eia')), + sa.ForeignKeyConstraint(['plant_id_eia', 'boiler_id'], ['boilers_entity_eia.plant_id_eia', 'boilers_entity_eia.boiler_id'], name=op.f('fk_denorm_boiler_fuel_monthly_eia923_plant_id_eia_boilers_entity_eia')), + sa.ForeignKeyConstraint(['plant_id_pudl'], ['plants_pudl.plant_id_pudl'], name=op.f('fk_denorm_boiler_fuel_monthly_eia923_plant_id_pudl_plants_pudl')), + sa.ForeignKeyConstraint(['prime_mover_code'], ['prime_movers_eia.code'], name=op.f('fk_denorm_boiler_fuel_monthly_eia923_prime_mover_code_prime_movers_eia')), + sa.ForeignKeyConstraint(['utility_id_eia'], ['utilities_entity_eia.utility_id_eia'], name=op.f('fk_denorm_boiler_fuel_monthly_eia923_utility_id_eia_utilities_entity_eia')), + sa.ForeignKeyConstraint(['utility_id_pudl'], ['utilities_pudl.utility_id_pudl'], name=op.f('fk_denorm_boiler_fuel_monthly_eia923_utility_id_pudl_utilities_pudl')), + sa.PrimaryKeyConstraint('plant_id_eia', 'boiler_id', 'energy_source_code', 'prime_mover_code', 'report_date', name=op.f('pk_denorm_boiler_fuel_monthly_eia923')) ) op.create_table('denorm_cash_flow_ferc1', sa.Column('record_id', sa.Text(), nullable=True, comment='Identifier indicating original FERC Form 1 source record. format: {table_name}_{report_year}_{report_prd}_{respondent_id}_{spplmnt_num}_{row_number}. Unique within FERC Form 1 DB tables which are not row-mapped.'), @@ -1192,9 +1280,9 @@ def upgrade() -> None: sa.Column('amount', sa.Float(), nullable=True, comment='Reported amount of dollars. This could be a balance or a change in value.'), sa.Column('balance', sa.Text(), nullable=True, comment='Indication of whether a column is a credit or debit, as reported in the XBRL taxonomy.'), sa.Column('row_type_xbrl', sa.Enum('calculated_value', 'reported_value', 'correction'), nullable=True, comment='Indicates whether the value reported in the row is calculated, or uniquely reported within the table.'), - sa.ForeignKeyConstraint(['utility_id_ferc1'], ['utilities_ferc1.utility_id_ferc1'], ), - sa.ForeignKeyConstraint(['utility_id_pudl'], ['utilities_pudl.utility_id_pudl'], ), - sa.PrimaryKeyConstraint('utility_id_ferc1', 'report_year', 'amount_type') + sa.ForeignKeyConstraint(['utility_id_ferc1'], ['utilities_ferc1.utility_id_ferc1'], name=op.f('fk_denorm_cash_flow_ferc1_utility_id_ferc1_utilities_ferc1')), + sa.ForeignKeyConstraint(['utility_id_pudl'], ['utilities_pudl.utility_id_pudl'], name=op.f('fk_denorm_cash_flow_ferc1_utility_id_pudl_utilities_pudl')), + sa.PrimaryKeyConstraint('utility_id_ferc1', 'report_year', 'amount_type', name=op.f('pk_denorm_cash_flow_ferc1')) ) op.create_table('denorm_depreciation_amortization_summary_ferc1', sa.Column('record_id', sa.Text(), nullable=True, comment='Identifier indicating original FERC Form 1 source record. format: {table_name}_{report_year}_{report_prd}_{respondent_id}_{spplmnt_num}_{row_number}. Unique within FERC Form 1 DB tables which are not row-mapped.'), @@ -1206,9 +1294,9 @@ def upgrade() -> None: sa.Column('ferc_account_label', sa.Text(), nullable=False, comment='Long FERC account identifier derived from values reported in the XBRL taxonomies. May also refer to aggregations of individual FERC accounts.'), sa.Column('ferc_account', sa.Text(), nullable=True, comment="Actual FERC Account number (e.g. '359.1') if available, or a PUDL assigned ID when FERC accounts have been split or combined in reporting."), sa.Column('dollar_value', sa.Float(), nullable=True, comment='Dollar value of reported income, expense, asset, or liability.'), - sa.ForeignKeyConstraint(['utility_id_ferc1'], ['utilities_ferc1.utility_id_ferc1'], ), - sa.ForeignKeyConstraint(['utility_id_pudl'], ['utilities_pudl.utility_id_pudl'], ), - sa.PrimaryKeyConstraint('utility_id_ferc1', 'report_year', 'plant_function', 'ferc_account_label') + sa.ForeignKeyConstraint(['utility_id_ferc1'], ['utilities_ferc1.utility_id_ferc1'], name=op.f('fk_denorm_depreciation_amortization_summary_ferc1_utility_id_ferc1_utilities_ferc1')), + sa.ForeignKeyConstraint(['utility_id_pudl'], ['utilities_pudl.utility_id_pudl'], name=op.f('fk_denorm_depreciation_amortization_summary_ferc1_utility_id_pudl_utilities_pudl')), + sa.PrimaryKeyConstraint('utility_id_ferc1', 'report_year', 'plant_function', 'ferc_account_label', name=op.f('pk_denorm_depreciation_amortization_summary_ferc1')) ) op.create_table('denorm_electric_energy_dispositions_ferc1', sa.Column('utility_id_ferc1', sa.Integer(), nullable=False, comment='PUDL-assigned utility ID, identifying a FERC1 utility. This is an auto-incremented ID and is not expected to be stable from year to year.'), @@ -1219,9 +1307,9 @@ def upgrade() -> None: sa.Column('row_type_xbrl', sa.Enum('calculated_value', 'reported_value', 'correction'), nullable=True, comment='Indicates whether the value reported in the row is calculated, or uniquely reported within the table.'), sa.Column('energy_mwh', sa.Float(), nullable=True, comment='Sources and uses of energy in MWh.'), sa.Column('record_id', sa.Text(), nullable=True, comment='Identifier indicating original FERC Form 1 source record. format: {table_name}_{report_year}_{report_prd}_{respondent_id}_{spplmnt_num}_{row_number}. Unique within FERC Form 1 DB tables which are not row-mapped.'), - sa.ForeignKeyConstraint(['utility_id_ferc1'], ['utilities_ferc1.utility_id_ferc1'], ), - sa.ForeignKeyConstraint(['utility_id_pudl'], ['utilities_pudl.utility_id_pudl'], ), - sa.PrimaryKeyConstraint('utility_id_ferc1', 'report_year', 'energy_disposition_type') + sa.ForeignKeyConstraint(['utility_id_ferc1'], ['utilities_ferc1.utility_id_ferc1'], name=op.f('fk_denorm_electric_energy_dispositions_ferc1_utility_id_ferc1_utilities_ferc1')), + sa.ForeignKeyConstraint(['utility_id_pudl'], ['utilities_pudl.utility_id_pudl'], name=op.f('fk_denorm_electric_energy_dispositions_ferc1_utility_id_pudl_utilities_pudl')), + sa.PrimaryKeyConstraint('utility_id_ferc1', 'report_year', 'energy_disposition_type', name=op.f('pk_denorm_electric_energy_dispositions_ferc1')) ) op.create_table('denorm_electric_energy_sources_ferc1', sa.Column('utility_id_ferc1', sa.Integer(), nullable=False, comment='PUDL-assigned utility ID, identifying a FERC1 utility. This is an auto-incremented ID and is not expected to be stable from year to year.'), @@ -1232,9 +1320,9 @@ def upgrade() -> None: sa.Column('row_type_xbrl', sa.Enum('calculated_value', 'reported_value', 'correction'), nullable=True, comment='Indicates whether the value reported in the row is calculated, or uniquely reported within the table.'), sa.Column('energy_mwh', sa.Float(), nullable=True, comment='Sources and uses of energy in MWh.'), sa.Column('record_id', sa.Text(), nullable=True, comment='Identifier indicating original FERC Form 1 source record. format: {table_name}_{report_year}_{report_prd}_{respondent_id}_{spplmnt_num}_{row_number}. Unique within FERC Form 1 DB tables which are not row-mapped.'), - sa.ForeignKeyConstraint(['utility_id_ferc1'], ['utilities_ferc1.utility_id_ferc1'], ), - sa.ForeignKeyConstraint(['utility_id_pudl'], ['utilities_pudl.utility_id_pudl'], ), - sa.PrimaryKeyConstraint('utility_id_ferc1', 'report_year', 'energy_source_type') + sa.ForeignKeyConstraint(['utility_id_ferc1'], ['utilities_ferc1.utility_id_ferc1'], name=op.f('fk_denorm_electric_energy_sources_ferc1_utility_id_ferc1_utilities_ferc1')), + sa.ForeignKeyConstraint(['utility_id_pudl'], ['utilities_pudl.utility_id_pudl'], name=op.f('fk_denorm_electric_energy_sources_ferc1_utility_id_pudl_utilities_pudl')), + sa.PrimaryKeyConstraint('utility_id_ferc1', 'report_year', 'energy_source_type', name=op.f('pk_denorm_electric_energy_sources_ferc1')) ) op.create_table('denorm_electric_operating_expenses_ferc1', sa.Column('utility_id_ferc1', sa.Integer(), nullable=False, comment='PUDL-assigned utility ID, identifying a FERC1 utility. This is an auto-incremented ID and is not expected to be stable from year to year.'), @@ -1246,9 +1334,9 @@ def upgrade() -> None: sa.Column('record_id', sa.Text(), nullable=True, comment='Identifier indicating original FERC Form 1 source record. format: {table_name}_{report_year}_{report_prd}_{respondent_id}_{spplmnt_num}_{row_number}. Unique within FERC Form 1 DB tables which are not row-mapped.'), sa.Column('ferc_account', sa.Text(), nullable=True, comment="Actual FERC Account number (e.g. '359.1') if available, or a PUDL assigned ID when FERC accounts have been split or combined in reporting."), sa.Column('row_type_xbrl', sa.Enum('calculated_value', 'reported_value', 'correction'), nullable=True, comment='Indicates whether the value reported in the row is calculated, or uniquely reported within the table.'), - sa.ForeignKeyConstraint(['utility_id_ferc1'], ['utilities_ferc1.utility_id_ferc1'], ), - sa.ForeignKeyConstraint(['utility_id_pudl'], ['utilities_pudl.utility_id_pudl'], ), - sa.PrimaryKeyConstraint('utility_id_ferc1', 'report_year', 'expense_type') + sa.ForeignKeyConstraint(['utility_id_ferc1'], ['utilities_ferc1.utility_id_ferc1'], name=op.f('fk_denorm_electric_operating_expenses_ferc1_utility_id_ferc1_utilities_ferc1')), + sa.ForeignKeyConstraint(['utility_id_pudl'], ['utilities_pudl.utility_id_pudl'], name=op.f('fk_denorm_electric_operating_expenses_ferc1_utility_id_pudl_utilities_pudl')), + sa.PrimaryKeyConstraint('utility_id_ferc1', 'report_year', 'expense_type', name=op.f('pk_denorm_electric_operating_expenses_ferc1')) ) op.create_table('denorm_electric_operating_revenues_ferc1', sa.Column('utility_id_ferc1', sa.Integer(), nullable=False, comment='PUDL-assigned utility ID, identifying a FERC1 utility. This is an auto-incremented ID and is not expected to be stable from year to year.'), @@ -1262,9 +1350,9 @@ def upgrade() -> None: sa.Column('avg_customers_per_month', sa.Float(), nullable=True, comment='Average number of customers per month.'), sa.Column('ferc_account', sa.Text(), nullable=True, comment="Actual FERC Account number (e.g. '359.1') if available, or a PUDL assigned ID when FERC accounts have been split or combined in reporting."), sa.Column('row_type_xbrl', sa.Enum('calculated_value', 'reported_value', 'correction'), nullable=True, comment='Indicates whether the value reported in the row is calculated, or uniquely reported within the table.'), - sa.ForeignKeyConstraint(['utility_id_ferc1'], ['utilities_ferc1.utility_id_ferc1'], ), - sa.ForeignKeyConstraint(['utility_id_pudl'], ['utilities_pudl.utility_id_pudl'], ), - sa.PrimaryKeyConstraint('utility_id_ferc1', 'report_year', 'revenue_type') + sa.ForeignKeyConstraint(['utility_id_ferc1'], ['utilities_ferc1.utility_id_ferc1'], name=op.f('fk_denorm_electric_operating_revenues_ferc1_utility_id_ferc1_utilities_ferc1')), + sa.ForeignKeyConstraint(['utility_id_pudl'], ['utilities_pudl.utility_id_pudl'], name=op.f('fk_denorm_electric_operating_revenues_ferc1_utility_id_pudl_utilities_pudl')), + sa.PrimaryKeyConstraint('utility_id_ferc1', 'report_year', 'revenue_type', name=op.f('pk_denorm_electric_operating_revenues_ferc1')) ) op.create_table('denorm_electric_plant_depreciation_changes_ferc1', sa.Column('utility_id_ferc1', sa.Integer(), nullable=False, comment='PUDL-assigned utility ID, identifying a FERC1 utility. This is an auto-incremented ID and is not expected to be stable from year to year.'), @@ -1274,14 +1362,14 @@ def upgrade() -> None: sa.Column('depreciation_type', sa.Text(), nullable=False, comment='Type of depreciation provision within FERC Account 108, including cost ofremoval, depreciation expenses, salvage, cost of retired plant, etc.'), sa.Column('plant_status', sa.Text(), nullable=False, comment='Utility plant financial status (in service, future, leased, total).'), sa.Column('utility_type', sa.Text(), nullable=False, comment='Listing of utility plant types. Examples include Electric Utility, Gas Utility, and Other Utility.'), - sa.Column('utility_plant_value', sa.Float(), nullable=True, comment='Utility plant value.'), + sa.Column('dollar_value', sa.Float(), nullable=True, comment='Dollar value of reported income, expense, asset, or liability.'), sa.Column('record_id', sa.Text(), nullable=True, comment='Identifier indicating original FERC Form 1 source record. format: {table_name}_{report_year}_{report_prd}_{respondent_id}_{spplmnt_num}_{row_number}. Unique within FERC Form 1 DB tables which are not row-mapped.'), sa.Column('balance', sa.Text(), nullable=True, comment='Indication of whether a column is a credit or debit, as reported in the XBRL taxonomy.'), sa.Column('ferc_account', sa.Text(), nullable=True, comment="Actual FERC Account number (e.g. '359.1') if available, or a PUDL assigned ID when FERC accounts have been split or combined in reporting."), sa.Column('row_type_xbrl', sa.Enum('calculated_value', 'reported_value', 'correction'), nullable=True, comment='Indicates whether the value reported in the row is calculated, or uniquely reported within the table.'), - sa.ForeignKeyConstraint(['utility_id_ferc1'], ['utilities_ferc1.utility_id_ferc1'], ), - sa.ForeignKeyConstraint(['utility_id_pudl'], ['utilities_pudl.utility_id_pudl'], ), - sa.PrimaryKeyConstraint('utility_id_ferc1', 'report_year', 'depreciation_type', 'plant_status', 'utility_type') + sa.ForeignKeyConstraint(['utility_id_ferc1'], ['utilities_ferc1.utility_id_ferc1'], name=op.f('fk_denorm_electric_plant_depreciation_changes_ferc1_utility_id_ferc1_utilities_ferc1')), + sa.ForeignKeyConstraint(['utility_id_pudl'], ['utilities_pudl.utility_id_pudl'], name=op.f('fk_denorm_electric_plant_depreciation_changes_ferc1_utility_id_pudl_utilities_pudl')), + sa.PrimaryKeyConstraint('utility_id_ferc1', 'report_year', 'depreciation_type', 'plant_status', 'utility_type', name=op.f('pk_denorm_electric_plant_depreciation_changes_ferc1')) ) op.create_table('denorm_electric_plant_depreciation_functional_ferc1', sa.Column('utility_id_ferc1', sa.Integer(), nullable=False, comment='PUDL-assigned utility ID, identifying a FERC1 utility. This is an auto-incremented ID and is not expected to be stable from year to year.'), @@ -1296,9 +1384,9 @@ def upgrade() -> None: sa.Column('balance', sa.Text(), nullable=True, comment='Indication of whether a column is a credit or debit, as reported in the XBRL taxonomy.'), sa.Column('ferc_account', sa.Text(), nullable=True, comment="Actual FERC Account number (e.g. '359.1') if available, or a PUDL assigned ID when FERC accounts have been split or combined in reporting."), sa.Column('row_type_xbrl', sa.Enum('calculated_value', 'reported_value', 'correction'), nullable=True, comment='Indicates whether the value reported in the row is calculated, or uniquely reported within the table.'), - sa.ForeignKeyConstraint(['utility_id_ferc1'], ['utilities_ferc1.utility_id_ferc1'], ), - sa.ForeignKeyConstraint(['utility_id_pudl'], ['utilities_pudl.utility_id_pudl'], ), - sa.PrimaryKeyConstraint('utility_id_ferc1', 'report_year', 'plant_function', 'plant_status', 'utility_type') + sa.ForeignKeyConstraint(['utility_id_ferc1'], ['utilities_ferc1.utility_id_ferc1'], name=op.f('fk_denorm_electric_plant_depreciation_functional_ferc1_utility_id_ferc1_utilities_ferc1')), + sa.ForeignKeyConstraint(['utility_id_pudl'], ['utilities_pudl.utility_id_pudl'], name=op.f('fk_denorm_electric_plant_depreciation_functional_ferc1_utility_id_pudl_utilities_pudl')), + sa.PrimaryKeyConstraint('utility_id_ferc1', 'report_year', 'plant_function', 'plant_status', 'utility_type', name=op.f('pk_denorm_electric_plant_depreciation_functional_ferc1')) ) op.create_table('denorm_electricity_sales_by_rate_schedule_ferc1', sa.Column('utility_id_ferc1', sa.Integer(), nullable=True, comment='PUDL-assigned utility ID, identifying a FERC1 utility. This is an auto-incremented ID and is not expected to be stable from year to year.'), @@ -1314,8 +1402,8 @@ def upgrade() -> None: sa.Column('kwh_per_customer', sa.Float(), nullable=True, comment='kwh per customer.'), sa.Column('revenue_per_kwh', sa.Float(), nullable=True), sa.Column('record_id', sa.Text(), nullable=True, comment='Identifier indicating original FERC Form 1 source record. format: {table_name}_{report_year}_{report_prd}_{respondent_id}_{spplmnt_num}_{row_number}. Unique within FERC Form 1 DB tables which are not row-mapped.'), - sa.ForeignKeyConstraint(['utility_id_ferc1'], ['utilities_ferc1.utility_id_ferc1'], ), - sa.ForeignKeyConstraint(['utility_id_pudl'], ['utilities_pudl.utility_id_pudl'], ) + sa.ForeignKeyConstraint(['utility_id_ferc1'], ['utilities_ferc1.utility_id_ferc1'], name=op.f('fk_denorm_electricity_sales_by_rate_schedule_ferc1_utility_id_ferc1_utilities_ferc1')), + sa.ForeignKeyConstraint(['utility_id_pudl'], ['utilities_pudl.utility_id_pudl'], name=op.f('fk_denorm_electricity_sales_by_rate_schedule_ferc1_utility_id_pudl_utilities_pudl')) ) op.create_table('denorm_generation_eia923', sa.Column('report_date', sa.Date(), nullable=False, comment='Date reported.'), @@ -1329,12 +1417,12 @@ def upgrade() -> None: sa.Column('unit_id_pudl', sa.Integer(), nullable=True, comment='Dynamically assigned PUDL unit id. WARNING: This ID is not guaranteed to be static long term as the input data and algorithm may evolve over time.'), sa.Column('net_generation_mwh', sa.Float(), nullable=True, comment='Net electricity generation for the specified period in megawatt-hours (MWh).'), sa.Column('data_maturity', sa.Text(), nullable=True, comment='Level of maturity of the data record. Some data sources report less-than-final data. PUDL sometimes includes this data, but use at your own risk.'), - sa.ForeignKeyConstraint(['data_maturity'], ['data_maturities.code'], ), - sa.ForeignKeyConstraint(['plant_id_eia', 'generator_id'], ['generators_entity_eia.plant_id_eia', 'generators_entity_eia.generator_id'], ), - sa.ForeignKeyConstraint(['plant_id_pudl'], ['plants_pudl.plant_id_pudl'], ), - sa.ForeignKeyConstraint(['utility_id_eia'], ['utilities_entity_eia.utility_id_eia'], ), - sa.ForeignKeyConstraint(['utility_id_pudl'], ['utilities_pudl.utility_id_pudl'], ), - sa.PrimaryKeyConstraint('plant_id_eia', 'generator_id', 'report_date') + sa.ForeignKeyConstraint(['data_maturity'], ['data_maturities.code'], name=op.f('fk_denorm_generation_eia923_data_maturity_data_maturities')), + sa.ForeignKeyConstraint(['plant_id_eia', 'generator_id'], ['generators_entity_eia.plant_id_eia', 'generators_entity_eia.generator_id'], name=op.f('fk_denorm_generation_eia923_plant_id_eia_generators_entity_eia')), + sa.ForeignKeyConstraint(['plant_id_pudl'], ['plants_pudl.plant_id_pudl'], name=op.f('fk_denorm_generation_eia923_plant_id_pudl_plants_pudl')), + sa.ForeignKeyConstraint(['utility_id_eia'], ['utilities_entity_eia.utility_id_eia'], name=op.f('fk_denorm_generation_eia923_utility_id_eia_utilities_entity_eia')), + sa.ForeignKeyConstraint(['utility_id_pudl'], ['utilities_pudl.utility_id_pudl'], name=op.f('fk_denorm_generation_eia923_utility_id_pudl_utilities_pudl')), + sa.PrimaryKeyConstraint('plant_id_eia', 'generator_id', 'report_date', name=op.f('pk_denorm_generation_eia923')) ) op.create_table('denorm_generation_monthly_eia923', sa.Column('report_date', sa.Date(), nullable=False, comment='Date reported.'), @@ -1347,11 +1435,11 @@ def upgrade() -> None: sa.Column('generator_id', sa.Text(), nullable=False, comment='Generator ID is usually numeric, but sometimes includes letters. Make sure you treat it as a string!'), sa.Column('unit_id_pudl', sa.Integer(), nullable=True, comment='Dynamically assigned PUDL unit id. WARNING: This ID is not guaranteed to be static long term as the input data and algorithm may evolve over time.'), sa.Column('net_generation_mwh', sa.Float(), nullable=True, comment='Net electricity generation for the specified period in megawatt-hours (MWh).'), - sa.ForeignKeyConstraint(['plant_id_eia', 'generator_id'], ['generators_entity_eia.plant_id_eia', 'generators_entity_eia.generator_id'], ), - sa.ForeignKeyConstraint(['plant_id_pudl'], ['plants_pudl.plant_id_pudl'], ), - sa.ForeignKeyConstraint(['utility_id_eia'], ['utilities_entity_eia.utility_id_eia'], ), - sa.ForeignKeyConstraint(['utility_id_pudl'], ['utilities_pudl.utility_id_pudl'], ), - sa.PrimaryKeyConstraint('plant_id_eia', 'generator_id', 'report_date') + sa.ForeignKeyConstraint(['plant_id_eia', 'generator_id'], ['generators_entity_eia.plant_id_eia', 'generators_entity_eia.generator_id'], name=op.f('fk_denorm_generation_monthly_eia923_plant_id_eia_generators_entity_eia')), + sa.ForeignKeyConstraint(['plant_id_pudl'], ['plants_pudl.plant_id_pudl'], name=op.f('fk_denorm_generation_monthly_eia923_plant_id_pudl_plants_pudl')), + sa.ForeignKeyConstraint(['utility_id_eia'], ['utilities_entity_eia.utility_id_eia'], name=op.f('fk_denorm_generation_monthly_eia923_utility_id_eia_utilities_entity_eia')), + sa.ForeignKeyConstraint(['utility_id_pudl'], ['utilities_pudl.utility_id_pudl'], name=op.f('fk_denorm_generation_monthly_eia923_utility_id_pudl_utilities_pudl')), + sa.PrimaryKeyConstraint('plant_id_eia', 'generator_id', 'report_date', name=op.f('pk_denorm_generation_monthly_eia923')) ) op.create_table('denorm_income_statement_ferc1', sa.Column('record_id', sa.Text(), nullable=True, comment='Identifier indicating original FERC Form 1 source record. format: {table_name}_{report_year}_{report_prd}_{respondent_id}_{spplmnt_num}_{row_number}. Unique within FERC Form 1 DB tables which are not row-mapped.'), @@ -1365,9 +1453,9 @@ def upgrade() -> None: sa.Column('balance', sa.Text(), nullable=True, comment='Indication of whether a column is a credit or debit, as reported in the XBRL taxonomy.'), sa.Column('ferc_account', sa.Text(), nullable=True, comment="Actual FERC Account number (e.g. '359.1') if available, or a PUDL assigned ID when FERC accounts have been split or combined in reporting."), sa.Column('row_type_xbrl', sa.Enum('calculated_value', 'reported_value', 'correction'), nullable=True, comment='Indicates whether the value reported in the row is calculated, or uniquely reported within the table.'), - sa.ForeignKeyConstraint(['utility_id_ferc1'], ['utilities_ferc1.utility_id_ferc1'], ), - sa.ForeignKeyConstraint(['utility_id_pudl'], ['utilities_pudl.utility_id_pudl'], ), - sa.PrimaryKeyConstraint('utility_id_ferc1', 'report_year', 'utility_type', 'income_type') + sa.ForeignKeyConstraint(['utility_id_ferc1'], ['utilities_ferc1.utility_id_ferc1'], name=op.f('fk_denorm_income_statement_ferc1_utility_id_ferc1_utilities_ferc1')), + sa.ForeignKeyConstraint(['utility_id_pudl'], ['utilities_pudl.utility_id_pudl'], name=op.f('fk_denorm_income_statement_ferc1_utility_id_pudl_utilities_pudl')), + sa.PrimaryKeyConstraint('utility_id_ferc1', 'report_year', 'utility_type', 'income_type', name=op.f('pk_denorm_income_statement_ferc1')) ) op.create_table('denorm_other_regulatory_liabilities_ferc1', sa.Column('utility_id_ferc1', sa.Integer(), nullable=True, comment='PUDL-assigned utility ID, identifying a FERC1 utility. This is an auto-incremented ID and is not expected to be stable from year to year.'), @@ -1380,8 +1468,8 @@ def upgrade() -> None: sa.Column('increase_in_other_regulatory_liabilities', sa.Float(), nullable=True, comment='The increase during the reporting period of other regulatory liabilities.'), sa.Column('account_detail', sa.Text(), nullable=True, comment='Description of the account number credited from making debit adjustment to other regulatory liabilities.'), sa.Column('decrease_in_other_regulatory_liabilities', sa.Float(), nullable=True, comment='The decrease during the reporting period of other regulatory liabilities.'), - sa.ForeignKeyConstraint(['utility_id_ferc1'], ['utilities_ferc1.utility_id_ferc1'], ), - sa.ForeignKeyConstraint(['utility_id_pudl'], ['utilities_pudl.utility_id_pudl'], ) + sa.ForeignKeyConstraint(['utility_id_ferc1'], ['utilities_ferc1.utility_id_ferc1'], name=op.f('fk_denorm_other_regulatory_liabilities_ferc1_utility_id_ferc1_utilities_ferc1')), + sa.ForeignKeyConstraint(['utility_id_pudl'], ['utilities_pudl.utility_id_pudl'], name=op.f('fk_denorm_other_regulatory_liabilities_ferc1_utility_id_pudl_utilities_pudl')) ) op.create_table('denorm_plant_in_service_ferc1', sa.Column('report_year', sa.Integer(), nullable=False, comment='Four-digit year in which the data was reported.'), @@ -1398,9 +1486,9 @@ def upgrade() -> None: sa.Column('row_type_xbrl', sa.Enum('calculated_value', 'reported_value', 'correction'), nullable=True, comment='Indicates whether the value reported in the row is calculated, or uniquely reported within the table.'), sa.Column('starting_balance', sa.Float(), nullable=True, comment='Account balance at beginning of year.'), sa.Column('transfers', sa.Float(), nullable=True, comment='Cost of transfers into (out of) the account.'), - sa.ForeignKeyConstraint(['utility_id_ferc1'], ['utilities_ferc1.utility_id_ferc1'], ), - sa.ForeignKeyConstraint(['utility_id_pudl'], ['utilities_pudl.utility_id_pudl'], ), - sa.PrimaryKeyConstraint('utility_id_ferc1', 'report_year', 'ferc_account_label') + sa.ForeignKeyConstraint(['utility_id_ferc1'], ['utilities_ferc1.utility_id_ferc1'], name=op.f('fk_denorm_plant_in_service_ferc1_utility_id_ferc1_utilities_ferc1')), + sa.ForeignKeyConstraint(['utility_id_pudl'], ['utilities_pudl.utility_id_pudl'], name=op.f('fk_denorm_plant_in_service_ferc1_utility_id_pudl_utilities_pudl')), + sa.PrimaryKeyConstraint('utility_id_ferc1', 'report_year', 'ferc_account_label', name=op.f('pk_denorm_plant_in_service_ferc1')) ) op.create_table('denorm_purchased_power_ferc1', sa.Column('report_year', sa.Integer(), nullable=True, comment='Four-digit year in which the data was reported.'), @@ -1421,9 +1509,9 @@ def upgrade() -> None: sa.Column('received_mwh', sa.Float(), nullable=True, comment='Gross megawatt-hours received in power exchanges and used as the basis for settlement.'), sa.Column('tariff', sa.Text(), nullable=True, comment='FERC Rate Schedule Number or Tariff. (Note: may be incomplete if originally reported on multiple lines.)'), sa.Column('total_settlement', sa.Float(), nullable=True, comment='Sum of demand, energy, and other charges (USD). For power exchanges, the settlement amount for the net receipt of energy. If more energy was delivered than received, this amount is negative.'), - sa.ForeignKeyConstraint(['purchase_type_code'], ['power_purchase_types_ferc1.code'], ), - sa.ForeignKeyConstraint(['utility_id_ferc1'], ['utilities_ferc1.utility_id_ferc1'], ), - sa.ForeignKeyConstraint(['utility_id_pudl'], ['utilities_pudl.utility_id_pudl'], ) + sa.ForeignKeyConstraint(['purchase_type_code'], ['power_purchase_types_ferc1.code'], name=op.f('fk_denorm_purchased_power_ferc1_purchase_type_code_power_purchase_types_ferc1')), + sa.ForeignKeyConstraint(['utility_id_ferc1'], ['utilities_ferc1.utility_id_ferc1'], name=op.f('fk_denorm_purchased_power_ferc1_utility_id_ferc1_utilities_ferc1')), + sa.ForeignKeyConstraint(['utility_id_pudl'], ['utilities_pudl.utility_id_pudl'], name=op.f('fk_denorm_purchased_power_ferc1_utility_id_pudl_utilities_pudl')) ) op.create_table('denorm_retained_earnings_ferc1', sa.Column('utility_id_ferc1', sa.Integer(), nullable=False, comment='PUDL-assigned utility ID, identifying a FERC1 utility. This is an auto-incremented ID and is not expected to be stable from year to year.'), @@ -1432,15 +1520,14 @@ def upgrade() -> None: sa.Column('report_year', sa.Integer(), nullable=False, comment='Four-digit year in which the data was reported.'), sa.Column('record_id', sa.Text(), nullable=True, comment='Identifier indicating original FERC Form 1 source record. format: {table_name}_{report_year}_{report_prd}_{respondent_id}_{spplmnt_num}_{row_number}. Unique within FERC Form 1 DB tables which are not row-mapped.'), sa.Column('earnings_type', sa.Text(), nullable=False, comment='Label describing types of earnings.'), - sa.Column('amount', sa.Float(), nullable=True, comment='Reported amount of dollars. This could be a balance or a change in value.'), sa.Column('starting_balance', sa.Float(), nullable=True, comment='Account balance at beginning of year.'), sa.Column('ending_balance', sa.Float(), nullable=True, comment='Account balance at end of year.'), sa.Column('balance', sa.Text(), nullable=True, comment='Indication of whether a column is a credit or debit, as reported in the XBRL taxonomy.'), sa.Column('ferc_account', sa.Text(), nullable=True, comment="Actual FERC Account number (e.g. '359.1') if available, or a PUDL assigned ID when FERC accounts have been split or combined in reporting."), sa.Column('row_type_xbrl', sa.Enum('calculated_value', 'reported_value', 'correction'), nullable=True, comment='Indicates whether the value reported in the row is calculated, or uniquely reported within the table.'), - sa.ForeignKeyConstraint(['utility_id_ferc1'], ['utilities_ferc1.utility_id_ferc1'], ), - sa.ForeignKeyConstraint(['utility_id_pudl'], ['utilities_pudl.utility_id_pudl'], ), - sa.PrimaryKeyConstraint('utility_id_ferc1', 'report_year', 'earnings_type') + sa.ForeignKeyConstraint(['utility_id_ferc1'], ['utilities_ferc1.utility_id_ferc1'], name=op.f('fk_denorm_retained_earnings_ferc1_utility_id_ferc1_utilities_ferc1')), + sa.ForeignKeyConstraint(['utility_id_pudl'], ['utilities_pudl.utility_id_pudl'], name=op.f('fk_denorm_retained_earnings_ferc1_utility_id_pudl_utilities_pudl')), + sa.PrimaryKeyConstraint('utility_id_ferc1', 'report_year', 'earnings_type', name=op.f('pk_denorm_retained_earnings_ferc1')) ) op.create_table('denorm_transmission_statistics_ferc1', sa.Column('record_id', sa.Text(), nullable=True, comment='Identifier indicating original FERC Form 1 source record. format: {table_name}_{report_year}_{report_prd}_{respondent_id}_{spplmnt_num}_{row_number}. Unique within FERC Form 1 DB tables which are not row-mapped.'), @@ -1464,8 +1551,8 @@ def upgrade() -> None: sa.Column('opex_maintenance', sa.Float(), nullable=True, comment='Production expenses: Maintenance (USD).'), sa.Column('opex_rents', sa.Float(), nullable=True, comment='Production expenses: rents (USD).'), sa.Column('opex_total', sa.Float(), nullable=True, comment='Total production expenses, excluding fuel (USD).'), - sa.ForeignKeyConstraint(['utility_id_ferc1'], ['utilities_ferc1.utility_id_ferc1'], ), - sa.ForeignKeyConstraint(['utility_id_pudl'], ['utilities_pudl.utility_id_pudl'], ) + sa.ForeignKeyConstraint(['utility_id_ferc1'], ['utilities_ferc1.utility_id_ferc1'], name=op.f('fk_denorm_transmission_statistics_ferc1_utility_id_ferc1_utilities_ferc1')), + sa.ForeignKeyConstraint(['utility_id_pudl'], ['utilities_pudl.utility_id_pudl'], name=op.f('fk_denorm_transmission_statistics_ferc1_utility_id_pudl_utilities_pudl')) ) op.create_table('denorm_utilities_eia', sa.Column('utility_id_eia', sa.Integer(), nullable=False, comment='The EIA Utility Identification number.'), @@ -1495,10 +1582,10 @@ def upgrade() -> None: sa.Column('phone_number_2', sa.Text(), nullable=True, comment='Phone number for utility contact 2.'), sa.Column('phone_extension_2', sa.Text(), nullable=True, comment='Phone extension for utility contact 2'), sa.Column('data_maturity', sa.Text(), nullable=True, comment='Level of maturity of the data record. Some data sources report less-than-final data. PUDL sometimes includes this data, but use at your own risk.'), - sa.ForeignKeyConstraint(['data_maturity'], ['data_maturities.code'], ), - sa.ForeignKeyConstraint(['utility_id_eia', 'report_date'], ['utilities_eia860.utility_id_eia', 'utilities_eia860.report_date'], ), - sa.ForeignKeyConstraint(['utility_id_pudl'], ['utilities_pudl.utility_id_pudl'], ), - sa.PrimaryKeyConstraint('utility_id_eia', 'report_date') + sa.ForeignKeyConstraint(['data_maturity'], ['data_maturities.code'], name=op.f('fk_denorm_utilities_eia_data_maturity_data_maturities')), + sa.ForeignKeyConstraint(['utility_id_eia', 'report_date'], ['utilities_eia860.utility_id_eia', 'utilities_eia860.report_date'], name=op.f('fk_denorm_utilities_eia_utility_id_eia_utilities_eia860')), + sa.ForeignKeyConstraint(['utility_id_pudl'], ['utilities_pudl.utility_id_pudl'], name=op.f('fk_denorm_utilities_eia_utility_id_pudl_utilities_pudl')), + sa.PrimaryKeyConstraint('utility_id_eia', 'report_date', name=op.f('pk_denorm_utilities_eia')) ) op.create_table('denorm_utility_plant_summary_ferc1', sa.Column('utility_id_ferc1', sa.Integer(), nullable=False, comment='PUDL-assigned utility ID, identifying a FERC1 utility. This is an auto-incremented ID and is not expected to be stable from year to year.'), @@ -1511,9 +1598,9 @@ def upgrade() -> None: sa.Column('row_type_xbrl', sa.Enum('calculated_value', 'reported_value', 'correction'), nullable=True, comment='Indicates whether the value reported in the row is calculated, or uniquely reported within the table.'), sa.Column('ending_balance', sa.Float(), nullable=True, comment='Account balance at end of year.'), sa.Column('record_id', sa.Text(), nullable=True, comment='Identifier indicating original FERC Form 1 source record. format: {table_name}_{report_year}_{report_prd}_{respondent_id}_{spplmnt_num}_{row_number}. Unique within FERC Form 1 DB tables which are not row-mapped.'), - sa.ForeignKeyConstraint(['utility_id_ferc1'], ['utilities_ferc1.utility_id_ferc1'], ), - sa.ForeignKeyConstraint(['utility_id_pudl'], ['utilities_pudl.utility_id_pudl'], ), - sa.PrimaryKeyConstraint('utility_id_ferc1', 'report_year', 'utility_type', 'utility_plant_asset_type') + sa.ForeignKeyConstraint(['utility_id_ferc1'], ['utilities_ferc1.utility_id_ferc1'], name=op.f('fk_denorm_utility_plant_summary_ferc1_utility_id_ferc1_utilities_ferc1')), + sa.ForeignKeyConstraint(['utility_id_pudl'], ['utilities_pudl.utility_id_pudl'], name=op.f('fk_denorm_utility_plant_summary_ferc1_utility_id_pudl_utilities_pudl')), + sa.PrimaryKeyConstraint('utility_id_ferc1', 'report_year', 'utility_type', 'utility_plant_asset_type', name=op.f('pk_denorm_utility_plant_summary_ferc1')) ) op.create_table('depreciation_amortization_summary_ferc1', sa.Column('record_id', sa.Text(), nullable=True, comment='Identifier indicating original FERC Form 1 source record. format: {table_name}_{report_year}_{report_prd}_{respondent_id}_{spplmnt_num}_{row_number}. Unique within FERC Form 1 DB tables which are not row-mapped.'), @@ -1523,8 +1610,8 @@ def upgrade() -> None: sa.Column('ferc_account_label', sa.Text(), nullable=False, comment='Long FERC account identifier derived from values reported in the XBRL taxonomies. May also refer to aggregations of individual FERC accounts.'), sa.Column('ferc_account', sa.Text(), nullable=True, comment="Actual FERC Account number (e.g. '359.1') if available, or a PUDL assigned ID when FERC accounts have been split or combined in reporting."), sa.Column('dollar_value', sa.Float(), nullable=True, comment='Dollar value of reported income, expense, asset, or liability.'), - sa.ForeignKeyConstraint(['utility_id_ferc1'], ['utilities_ferc1.utility_id_ferc1'], ), - sa.PrimaryKeyConstraint('utility_id_ferc1', 'report_year', 'plant_function', 'ferc_account_label') + sa.ForeignKeyConstraint(['utility_id_ferc1'], ['utilities_ferc1.utility_id_ferc1'], name=op.f('fk_depreciation_amortization_summary_ferc1_utility_id_ferc1_utilities_ferc1')), + sa.PrimaryKeyConstraint('utility_id_ferc1', 'report_year', 'plant_function', 'ferc_account_label', name=op.f('pk_depreciation_amortization_summary_ferc1')) ) op.create_table('electric_energy_dispositions_ferc1', sa.Column('utility_id_ferc1', sa.Integer(), nullable=False, comment='PUDL-assigned utility ID, identifying a FERC1 utility. This is an auto-incremented ID and is not expected to be stable from year to year.'), @@ -1533,8 +1620,8 @@ def upgrade() -> None: sa.Column('row_type_xbrl', sa.Enum('calculated_value', 'reported_value', 'correction'), nullable=True, comment='Indicates whether the value reported in the row is calculated, or uniquely reported within the table.'), sa.Column('energy_mwh', sa.Float(), nullable=True, comment='Sources and uses of energy in MWh.'), sa.Column('record_id', sa.Text(), nullable=True, comment='Identifier indicating original FERC Form 1 source record. format: {table_name}_{report_year}_{report_prd}_{respondent_id}_{spplmnt_num}_{row_number}. Unique within FERC Form 1 DB tables which are not row-mapped.'), - sa.ForeignKeyConstraint(['utility_id_ferc1'], ['utilities_ferc1.utility_id_ferc1'], ), - sa.PrimaryKeyConstraint('utility_id_ferc1', 'report_year', 'energy_disposition_type') + sa.ForeignKeyConstraint(['utility_id_ferc1'], ['utilities_ferc1.utility_id_ferc1'], name=op.f('fk_electric_energy_dispositions_ferc1_utility_id_ferc1_utilities_ferc1')), + sa.PrimaryKeyConstraint('utility_id_ferc1', 'report_year', 'energy_disposition_type', name=op.f('pk_electric_energy_dispositions_ferc1')) ) op.create_table('electric_energy_sources_ferc1', sa.Column('utility_id_ferc1', sa.Integer(), nullable=False, comment='PUDL-assigned utility ID, identifying a FERC1 utility. This is an auto-incremented ID and is not expected to be stable from year to year.'), @@ -1543,8 +1630,8 @@ def upgrade() -> None: sa.Column('row_type_xbrl', sa.Enum('calculated_value', 'reported_value', 'correction'), nullable=True, comment='Indicates whether the value reported in the row is calculated, or uniquely reported within the table.'), sa.Column('energy_mwh', sa.Float(), nullable=True, comment='Sources and uses of energy in MWh.'), sa.Column('record_id', sa.Text(), nullable=True, comment='Identifier indicating original FERC Form 1 source record. format: {table_name}_{report_year}_{report_prd}_{respondent_id}_{spplmnt_num}_{row_number}. Unique within FERC Form 1 DB tables which are not row-mapped.'), - sa.ForeignKeyConstraint(['utility_id_ferc1'], ['utilities_ferc1.utility_id_ferc1'], ), - sa.PrimaryKeyConstraint('utility_id_ferc1', 'report_year', 'energy_source_type') + sa.ForeignKeyConstraint(['utility_id_ferc1'], ['utilities_ferc1.utility_id_ferc1'], name=op.f('fk_electric_energy_sources_ferc1_utility_id_ferc1_utilities_ferc1')), + sa.PrimaryKeyConstraint('utility_id_ferc1', 'report_year', 'energy_source_type', name=op.f('pk_electric_energy_sources_ferc1')) ) op.create_table('electric_operating_expenses_ferc1', sa.Column('utility_id_ferc1', sa.Integer(), nullable=False, comment='PUDL-assigned utility ID, identifying a FERC1 utility. This is an auto-incremented ID and is not expected to be stable from year to year.'), @@ -1554,8 +1641,8 @@ def upgrade() -> None: sa.Column('record_id', sa.Text(), nullable=True, comment='Identifier indicating original FERC Form 1 source record. format: {table_name}_{report_year}_{report_prd}_{respondent_id}_{spplmnt_num}_{row_number}. Unique within FERC Form 1 DB tables which are not row-mapped.'), sa.Column('ferc_account', sa.Text(), nullable=True, comment="Actual FERC Account number (e.g. '359.1') if available, or a PUDL assigned ID when FERC accounts have been split or combined in reporting."), sa.Column('row_type_xbrl', sa.Enum('calculated_value', 'reported_value', 'correction'), nullable=True, comment='Indicates whether the value reported in the row is calculated, or uniquely reported within the table.'), - sa.ForeignKeyConstraint(['utility_id_ferc1'], ['utilities_ferc1.utility_id_ferc1'], ), - sa.PrimaryKeyConstraint('utility_id_ferc1', 'report_year', 'expense_type') + sa.ForeignKeyConstraint(['utility_id_ferc1'], ['utilities_ferc1.utility_id_ferc1'], name=op.f('fk_electric_operating_expenses_ferc1_utility_id_ferc1_utilities_ferc1')), + sa.PrimaryKeyConstraint('utility_id_ferc1', 'report_year', 'expense_type', name=op.f('pk_electric_operating_expenses_ferc1')) ) op.create_table('electric_operating_revenues_ferc1', sa.Column('utility_id_ferc1', sa.Integer(), nullable=False, comment='PUDL-assigned utility ID, identifying a FERC1 utility. This is an auto-incremented ID and is not expected to be stable from year to year.'), @@ -1567,8 +1654,8 @@ def upgrade() -> None: sa.Column('avg_customers_per_month', sa.Float(), nullable=True, comment='Average number of customers per month.'), sa.Column('ferc_account', sa.Text(), nullable=True, comment="Actual FERC Account number (e.g. '359.1') if available, or a PUDL assigned ID when FERC accounts have been split or combined in reporting."), sa.Column('row_type_xbrl', sa.Enum('calculated_value', 'reported_value', 'correction'), nullable=True, comment='Indicates whether the value reported in the row is calculated, or uniquely reported within the table.'), - sa.ForeignKeyConstraint(['utility_id_ferc1'], ['utilities_ferc1.utility_id_ferc1'], ), - sa.PrimaryKeyConstraint('utility_id_ferc1', 'report_year', 'revenue_type') + sa.ForeignKeyConstraint(['utility_id_ferc1'], ['utilities_ferc1.utility_id_ferc1'], name=op.f('fk_electric_operating_revenues_ferc1_utility_id_ferc1_utilities_ferc1')), + sa.PrimaryKeyConstraint('utility_id_ferc1', 'report_year', 'revenue_type', name=op.f('pk_electric_operating_revenues_ferc1')) ) op.create_table('electric_plant_depreciation_changes_ferc1', sa.Column('utility_id_ferc1', sa.Integer(), nullable=False, comment='PUDL-assigned utility ID, identifying a FERC1 utility. This is an auto-incremented ID and is not expected to be stable from year to year.'), @@ -1576,13 +1663,13 @@ def upgrade() -> None: sa.Column('depreciation_type', sa.Text(), nullable=False, comment='Type of depreciation provision within FERC Account 108, including cost ofremoval, depreciation expenses, salvage, cost of retired plant, etc.'), sa.Column('plant_status', sa.Text(), nullable=False, comment='Utility plant financial status (in service, future, leased, total).'), sa.Column('utility_type', sa.Text(), nullable=False, comment='Listing of utility plant types. Examples include Electric Utility, Gas Utility, and Other Utility.'), - sa.Column('utility_plant_value', sa.Float(), nullable=True, comment='Utility plant value.'), + sa.Column('dollar_value', sa.Float(), nullable=True, comment='Dollar value of reported income, expense, asset, or liability.'), sa.Column('record_id', sa.Text(), nullable=True, comment='Identifier indicating original FERC Form 1 source record. format: {table_name}_{report_year}_{report_prd}_{respondent_id}_{spplmnt_num}_{row_number}. Unique within FERC Form 1 DB tables which are not row-mapped.'), sa.Column('balance', sa.Text(), nullable=True, comment='Indication of whether a column is a credit or debit, as reported in the XBRL taxonomy.'), sa.Column('ferc_account', sa.Text(), nullable=True, comment="Actual FERC Account number (e.g. '359.1') if available, or a PUDL assigned ID when FERC accounts have been split or combined in reporting."), sa.Column('row_type_xbrl', sa.Enum('calculated_value', 'reported_value', 'correction'), nullable=True, comment='Indicates whether the value reported in the row is calculated, or uniquely reported within the table.'), - sa.ForeignKeyConstraint(['utility_id_ferc1'], ['utilities_ferc1.utility_id_ferc1'], ), - sa.PrimaryKeyConstraint('utility_id_ferc1', 'report_year', 'depreciation_type', 'plant_status', 'utility_type') + sa.ForeignKeyConstraint(['utility_id_ferc1'], ['utilities_ferc1.utility_id_ferc1'], name=op.f('fk_electric_plant_depreciation_changes_ferc1_utility_id_ferc1_utilities_ferc1')), + sa.PrimaryKeyConstraint('utility_id_ferc1', 'report_year', 'depreciation_type', 'plant_status', 'utility_type', name=op.f('pk_electric_plant_depreciation_changes_ferc1')) ) op.create_table('electric_plant_depreciation_functional_ferc1', sa.Column('utility_id_ferc1', sa.Integer(), nullable=False, comment='PUDL-assigned utility ID, identifying a FERC1 utility. This is an auto-incremented ID and is not expected to be stable from year to year.'), @@ -1595,8 +1682,8 @@ def upgrade() -> None: sa.Column('balance', sa.Text(), nullable=True, comment='Indication of whether a column is a credit or debit, as reported in the XBRL taxonomy.'), sa.Column('ferc_account', sa.Text(), nullable=True, comment="Actual FERC Account number (e.g. '359.1') if available, or a PUDL assigned ID when FERC accounts have been split or combined in reporting."), sa.Column('row_type_xbrl', sa.Enum('calculated_value', 'reported_value', 'correction'), nullable=True, comment='Indicates whether the value reported in the row is calculated, or uniquely reported within the table.'), - sa.ForeignKeyConstraint(['utility_id_ferc1'], ['utilities_ferc1.utility_id_ferc1'], ), - sa.PrimaryKeyConstraint('utility_id_ferc1', 'report_year', 'plant_function', 'plant_status', 'utility_type') + sa.ForeignKeyConstraint(['utility_id_ferc1'], ['utilities_ferc1.utility_id_ferc1'], name=op.f('fk_electric_plant_depreciation_functional_ferc1_utility_id_ferc1_utilities_ferc1')), + sa.PrimaryKeyConstraint('utility_id_ferc1', 'report_year', 'plant_function', 'plant_status', 'utility_type', name=op.f('pk_electric_plant_depreciation_functional_ferc1')) ) op.create_table('electricity_sales_by_rate_schedule_ferc1', sa.Column('utility_id_ferc1', sa.Integer(), nullable=True, comment='PUDL-assigned utility ID, identifying a FERC1 utility. This is an auto-incremented ID and is not expected to be stable from year to year.'), @@ -1610,7 +1697,7 @@ def upgrade() -> None: sa.Column('kwh_per_customer', sa.Float(), nullable=True, comment='kwh per customer.'), sa.Column('revenue_per_kwh', sa.Float(), nullable=True), sa.Column('record_id', sa.Text(), nullable=True, comment='Identifier indicating original FERC Form 1 source record. format: {table_name}_{report_year}_{report_prd}_{respondent_id}_{spplmnt_num}_{row_number}. Unique within FERC Form 1 DB tables which are not row-mapped.'), - sa.ForeignKeyConstraint(['utility_id_ferc1'], ['utilities_ferc1.utility_id_ferc1'], ) + sa.ForeignKeyConstraint(['utility_id_ferc1'], ['utilities_ferc1.utility_id_ferc1'], name=op.f('fk_electricity_sales_by_rate_schedule_ferc1_utility_id_ferc1_utilities_ferc1')) ) op.create_table('epacamd_eia', sa.Column('report_year', sa.Integer(), nullable=True, comment='Four-digit year in which the data was reported.'), @@ -1620,15 +1707,37 @@ def upgrade() -> None: sa.Column('plant_id_eia', sa.Integer(), nullable=True, comment='The unique six-digit facility identification number, also called an ORISPL, assigned by the Energy Information Administration.'), sa.Column('boiler_id', sa.Text(), nullable=True, comment='Alphanumeric boiler ID.'), sa.Column('generator_id', sa.Text(), nullable=True, comment='Generator ID is usually numeric, but sometimes includes letters. Make sure you treat it as a string!'), - sa.ForeignKeyConstraint(['plant_id_eia', 'boiler_id'], ['boilers_entity_eia.plant_id_eia', 'boilers_entity_eia.boiler_id'], ), - sa.ForeignKeyConstraint(['plant_id_eia', 'generator_id'], ['generators_entity_eia.plant_id_eia', 'generators_entity_eia.generator_id'], ) + sa.ForeignKeyConstraint(['plant_id_eia', 'boiler_id'], ['boilers_entity_eia.plant_id_eia', 'boilers_entity_eia.boiler_id'], name=op.f('fk_epacamd_eia_plant_id_eia_boilers_entity_eia')), + sa.ForeignKeyConstraint(['plant_id_eia', 'generator_id'], ['generators_entity_eia.plant_id_eia', 'generators_entity_eia.generator_id'], name=op.f('fk_epacamd_eia_plant_id_eia_generators_entity_eia')) + ) + op.create_table('fuel_cost_by_generator_monthly', + sa.Column('report_date', sa.Date(), nullable=False, comment='Date reported.'), + sa.Column('plant_id_eia', sa.Integer(), nullable=False, comment='The unique six-digit facility identification number, also called an ORISPL, assigned by the Energy Information Administration.'), + sa.Column('generator_id', sa.Text(), nullable=False, comment='Generator ID is usually numeric, but sometimes includes letters. Make sure you treat it as a string!'), + sa.Column('unit_id_pudl', sa.Integer(), nullable=True, comment='Dynamically assigned PUDL unit id. WARNING: This ID is not guaranteed to be static long term as the input data and algorithm may evolve over time.'), + sa.Column('plant_name_eia', sa.Text(), nullable=True, comment='Plant name.'), + sa.Column('plant_id_pudl', sa.Integer(), nullable=True, comment='A manually assigned PUDL plant ID. May not be constant over time.'), + sa.Column('utility_id_eia', sa.Integer(), nullable=True, comment='The EIA Utility Identification number.'), + sa.Column('utility_name_eia', sa.Text(), nullable=True, comment='The name of the utility.'), + sa.Column('utility_id_pudl', sa.Integer(), nullable=True, comment='A manually assigned PUDL utility ID. May not be stable over time.'), + sa.Column('fuel_type_count', sa.Integer(), nullable=True, comment='A count of how many different simple energy sources there are associated with a generator.'), + sa.Column('fuel_type_code_pudl', sa.Enum('coal', 'gas', 'hydro', 'nuclear', 'oil', 'other', 'solar', 'waste', 'wind'), nullable=True, comment='Simplified fuel type code used in PUDL'), + sa.Column('fuel_cost_from_eiaapi', sa.Boolean(), nullable=True, comment='Indicates whether the fuel cost was derived from the EIA API.'), + sa.Column('fuel_cost_per_mmbtu', sa.Float(), nullable=True, comment='Average fuel cost per mmBTU of heat content in nominal USD.'), + sa.Column('heat_rate_mmbtu_mwh', sa.Float(), nullable=True, comment='Fuel content per unit of electricity generated. Coming from MCOE calculation.'), + sa.Column('fuel_cost_per_mwh', sa.Float(), nullable=True, comment='Derived from MCOE, a unit level value. Average fuel cost per MWh of heat content in nominal USD.'), + sa.ForeignKeyConstraint(['plant_id_eia', 'generator_id'], ['generators_entity_eia.plant_id_eia', 'generators_entity_eia.generator_id'], name=op.f('fk_fuel_cost_by_generator_monthly_plant_id_eia_generators_entity_eia')), + sa.ForeignKeyConstraint(['plant_id_pudl'], ['plants_pudl.plant_id_pudl'], name=op.f('fk_fuel_cost_by_generator_monthly_plant_id_pudl_plants_pudl')), + sa.ForeignKeyConstraint(['utility_id_eia'], ['utilities_entity_eia.utility_id_eia'], name=op.f('fk_fuel_cost_by_generator_monthly_utility_id_eia_utilities_entity_eia')), + sa.ForeignKeyConstraint(['utility_id_pudl'], ['utilities_pudl.utility_id_pudl'], name=op.f('fk_fuel_cost_by_generator_monthly_utility_id_pudl_utilities_pudl')), + sa.PrimaryKeyConstraint('report_date', 'plant_id_eia', 'generator_id', name=op.f('pk_fuel_cost_by_generator_monthly')) ) op.create_table('fuel_receipts_costs_eia923', sa.Column('plant_id_eia', sa.Integer(), nullable=True, comment='The unique six-digit facility identification number, also called an ORISPL, assigned by the Energy Information Administration.'), sa.Column('report_date', sa.Date(), nullable=True, comment='Date reported.'), sa.Column('contract_type_code', sa.Enum('S', 'C', 'NC', 'T'), nullable=True, comment='Purchase type under which receipts occurred in the reporting month. C: Contract, NC: New Contract, S: Spot Purchase, T: Tolling Agreement.'), sa.Column('contract_expiration_date', sa.Date(), nullable=True, comment='Date contract expires.Format: MMYY.'), - sa.Column('energy_source_code', sa.Text(), nullable=True, comment='The fuel code associated with the fuel receipt. Two or three character alphanumeric.'), + sa.Column('energy_source_code', sa.Text(), nullable=True, comment='A 2-3 letter code indicating the energy source (e.g. fuel type) associated with the record.'), sa.Column('fuel_type_code_pudl', sa.Enum('coal', 'gas', 'hydro', 'nuclear', 'oil', 'other', 'solar', 'waste', 'wind'), nullable=True, comment='Simplified fuel type code used in PUDL'), sa.Column('fuel_group_code', sa.Enum('petroleum', 'other_gas', 'petroleum_coke', 'natural_gas', 'coal'), nullable=True, comment='Fuel groups used in the Electric Power Monthly'), sa.Column('mine_id_pudl', sa.Integer(), nullable=True, comment='Dynamically assigned PUDL mine identifier.'), @@ -1646,13 +1755,13 @@ def upgrade() -> None: sa.Column('moisture_content_pct', sa.Float(), nullable=True), sa.Column('chlorine_content_ppm', sa.Float(), nullable=True), sa.Column('data_maturity', sa.Text(), nullable=True, comment='Level of maturity of the data record. Some data sources report less-than-final data. PUDL sometimes includes this data, but use at your own risk.'), - sa.ForeignKeyConstraint(['contract_type_code'], ['contract_types_eia.code'], ), - sa.ForeignKeyConstraint(['data_maturity'], ['data_maturities.code'], ), - sa.ForeignKeyConstraint(['energy_source_code'], ['energy_sources_eia.code'], ), - sa.ForeignKeyConstraint(['mine_id_pudl'], ['coalmine_eia923.mine_id_pudl'], ), - sa.ForeignKeyConstraint(['plant_id_eia'], ['plants_entity_eia.plant_id_eia'], ), - sa.ForeignKeyConstraint(['primary_transportation_mode_code'], ['fuel_transportation_modes_eia.code'], ), - sa.ForeignKeyConstraint(['secondary_transportation_mode_code'], ['fuel_transportation_modes_eia.code'], ) + sa.ForeignKeyConstraint(['contract_type_code'], ['contract_types_eia.code'], name=op.f('fk_fuel_receipts_costs_eia923_contract_type_code_contract_types_eia')), + sa.ForeignKeyConstraint(['data_maturity'], ['data_maturities.code'], name=op.f('fk_fuel_receipts_costs_eia923_data_maturity_data_maturities')), + sa.ForeignKeyConstraint(['energy_source_code'], ['energy_sources_eia.code'], name=op.f('fk_fuel_receipts_costs_eia923_energy_source_code_energy_sources_eia')), + sa.ForeignKeyConstraint(['mine_id_pudl'], ['coalmine_eia923.mine_id_pudl'], name=op.f('fk_fuel_receipts_costs_eia923_mine_id_pudl_coalmine_eia923')), + sa.ForeignKeyConstraint(['plant_id_eia'], ['plants_entity_eia.plant_id_eia'], name=op.f('fk_fuel_receipts_costs_eia923_plant_id_eia_plants_entity_eia')), + sa.ForeignKeyConstraint(['primary_transportation_mode_code'], ['fuel_transportation_modes_eia.code'], name=op.f('fk_fuel_receipts_costs_eia923_primary_transportation_mode_code_fuel_transportation_modes_eia')), + sa.ForeignKeyConstraint(['secondary_transportation_mode_code'], ['fuel_transportation_modes_eia.code'], name=op.f('fk_fuel_receipts_costs_eia923_secondary_transportation_mode_code_fuel_transportation_modes_eia')) ) op.create_table('generation_eia923', sa.Column('plant_id_eia', sa.Integer(), nullable=False, comment='The unique six-digit facility identification number, also called an ORISPL, assigned by the Energy Information Administration.'), @@ -1660,9 +1769,56 @@ def upgrade() -> None: sa.Column('report_date', sa.Date(), nullable=False, comment='Date reported.'), sa.Column('net_generation_mwh', sa.Float(), nullable=True, comment='Net electricity generation for the specified period in megawatt-hours (MWh).'), sa.Column('data_maturity', sa.Text(), nullable=True, comment='Level of maturity of the data record. Some data sources report less-than-final data. PUDL sometimes includes this data, but use at your own risk.'), - sa.ForeignKeyConstraint(['data_maturity'], ['data_maturities.code'], ), - sa.ForeignKeyConstraint(['plant_id_eia', 'generator_id'], ['generators_entity_eia.plant_id_eia', 'generators_entity_eia.generator_id'], ), - sa.PrimaryKeyConstraint('plant_id_eia', 'generator_id', 'report_date') + sa.ForeignKeyConstraint(['data_maturity'], ['data_maturities.code'], name=op.f('fk_generation_eia923_data_maturity_data_maturities')), + sa.ForeignKeyConstraint(['plant_id_eia', 'generator_id'], ['generators_entity_eia.plant_id_eia', 'generators_entity_eia.generator_id'], name=op.f('fk_generation_eia923_plant_id_eia_generators_entity_eia')), + sa.PrimaryKeyConstraint('plant_id_eia', 'generator_id', 'report_date', name=op.f('pk_generation_eia923')) + ) + op.create_table('generation_fuel_by_generator_energy_source_monthly_eia923', + sa.Column('report_date', sa.Date(), nullable=False, comment='Date reported.'), + sa.Column('plant_id_eia', sa.Integer(), nullable=False, comment='The unique six-digit facility identification number, also called an ORISPL, assigned by the Energy Information Administration.'), + sa.Column('generator_id', sa.Text(), nullable=False, comment='Generator ID is usually numeric, but sometimes includes letters. Make sure you treat it as a string!'), + sa.Column('prime_mover_code', sa.Text(), nullable=False, comment='Code for the type of prime mover (e.g. CT, CG)'), + sa.Column('energy_source_code', sa.Text(), nullable=False, comment='A 2-3 letter code indicating the energy source (e.g. fuel type) associated with the record.'), + sa.Column('energy_source_code_num', sa.Enum('energy_source_code_1', 'energy_source_code_2', 'energy_source_code_3', 'energy_source_code_4', 'energy_source_code_5', 'energy_source_code_6', 'energy_source_code_7', 'energy_source_code_8'), nullable=True, comment='Name of the energy_source_code_N column that this energy source code was reported in for the generator referenced in the same record.'), + sa.Column('net_generation_mwh', sa.Float(), nullable=True, comment='Net electricity generation for the specified period in megawatt-hours (MWh).'), + sa.Column('fuel_consumed_mmbtu', sa.Float(), nullable=True, comment='Total consumption of fuel in physical unit, year to date. Note: this is the total quantity consumed for both electricity and, in the case of combined heat and power plants, process steam production.'), + sa.Column('fuel_consumed_for_electricity_mmbtu', sa.Float(), nullable=True, comment='Total consumption of fuel to produce electricity, in physical unit, year to date.'), + sa.ForeignKeyConstraint(['energy_source_code'], ['energy_sources_eia.code'], name=op.f('fk_generation_fuel_by_generator_energy_source_monthly_eia923_energy_source_code_energy_sources_eia')), + sa.ForeignKeyConstraint(['plant_id_eia', 'generator_id'], ['generators_entity_eia.plant_id_eia', 'generators_entity_eia.generator_id'], name=op.f('fk_generation_fuel_by_generator_energy_source_monthly_eia923_plant_id_eia_generators_entity_eia')), + sa.ForeignKeyConstraint(['prime_mover_code'], ['prime_movers_eia.code'], name=op.f('fk_generation_fuel_by_generator_energy_source_monthly_eia923_prime_mover_code_prime_movers_eia')), + sa.PrimaryKeyConstraint('report_date', 'plant_id_eia', 'generator_id', 'prime_mover_code', 'energy_source_code', name=op.f('pk_generation_fuel_by_generator_energy_source_monthly_eia923')) + ) + op.create_table('generation_fuel_by_generator_monthly_eia923', + sa.Column('report_date', sa.Date(), nullable=False, comment='Date reported.'), + sa.Column('plant_id_eia', sa.Integer(), nullable=False, comment='The unique six-digit facility identification number, also called an ORISPL, assigned by the Energy Information Administration.'), + sa.Column('plant_id_pudl', sa.Integer(), nullable=True, comment='A manually assigned PUDL plant ID. May not be constant over time.'), + sa.Column('plant_name_eia', sa.Text(), nullable=True, comment='Plant name.'), + sa.Column('utility_id_eia', sa.Integer(), nullable=True, comment='The EIA Utility Identification number.'), + sa.Column('utility_id_pudl', sa.Integer(), nullable=True, comment='A manually assigned PUDL utility ID. May not be stable over time.'), + sa.Column('utility_name_eia', sa.Text(), nullable=True, comment='The name of the utility.'), + sa.Column('generator_id', sa.Text(), nullable=False, comment='Generator ID is usually numeric, but sometimes includes letters. Make sure you treat it as a string!'), + sa.Column('unit_id_pudl', sa.Integer(), nullable=True, comment='Dynamically assigned PUDL unit id. WARNING: This ID is not guaranteed to be static long term as the input data and algorithm may evolve over time.'), + sa.Column('fuel_consumed_for_electricity_mmbtu', sa.Float(), nullable=True, comment='Total consumption of fuel to produce electricity, in physical unit, year to date.'), + sa.Column('fuel_consumed_mmbtu', sa.Float(), nullable=True, comment='Total consumption of fuel in physical unit, year to date. Note: this is the total quantity consumed for both electricity and, in the case of combined heat and power plants, process steam production.'), + sa.Column('net_generation_mwh', sa.Float(), nullable=True, comment='Net electricity generation for the specified period in megawatt-hours (MWh).'), + sa.ForeignKeyConstraint(['plant_id_eia', 'generator_id'], ['generators_entity_eia.plant_id_eia', 'generators_entity_eia.generator_id'], name=op.f('fk_generation_fuel_by_generator_monthly_eia923_plant_id_eia_generators_entity_eia')), + sa.ForeignKeyConstraint(['plant_id_pudl'], ['plants_pudl.plant_id_pudl'], name=op.f('fk_generation_fuel_by_generator_monthly_eia923_plant_id_pudl_plants_pudl')), + sa.ForeignKeyConstraint(['utility_id_eia'], ['utilities_entity_eia.utility_id_eia'], name=op.f('fk_generation_fuel_by_generator_monthly_eia923_utility_id_eia_utilities_entity_eia')), + sa.ForeignKeyConstraint(['utility_id_pudl'], ['utilities_pudl.utility_id_pudl'], name=op.f('fk_generation_fuel_by_generator_monthly_eia923_utility_id_pudl_utilities_pudl')), + sa.PrimaryKeyConstraint('report_date', 'plant_id_eia', 'generator_id', name=op.f('pk_generation_fuel_by_generator_monthly_eia923')) + ) + op.create_table('heat_rate_by_generator_monthly', + sa.Column('report_date', sa.Date(), nullable=False, comment='Date reported.'), + sa.Column('plant_id_eia', sa.Integer(), nullable=False, comment='The unique six-digit facility identification number, also called an ORISPL, assigned by the Energy Information Administration.'), + sa.Column('unit_id_pudl', sa.Integer(), nullable=True, comment='Dynamically assigned PUDL unit id. WARNING: This ID is not guaranteed to be static long term as the input data and algorithm may evolve over time.'), + sa.Column('generator_id', sa.Text(), nullable=False, comment='Generator ID is usually numeric, but sometimes includes letters. Make sure you treat it as a string!'), + sa.Column('heat_rate_mmbtu_mwh', sa.Float(), nullable=True, comment='Fuel content per unit of electricity generated. Coming from MCOE calculation.'), + sa.Column('fuel_type_code_pudl', sa.Enum('coal', 'gas', 'hydro', 'nuclear', 'oil', 'other', 'solar', 'waste', 'wind'), nullable=True, comment='Simplified fuel type code used in PUDL'), + sa.Column('fuel_type_count', sa.Integer(), nullable=True, comment='A count of how many different simple energy sources there are associated with a generator.'), + sa.Column('prime_mover_code', sa.Text(), nullable=True, comment='Code for the type of prime mover (e.g. CT, CG)'), + sa.ForeignKeyConstraint(['plant_id_eia', 'generator_id'], ['generators_entity_eia.plant_id_eia', 'generators_entity_eia.generator_id'], name=op.f('fk_heat_rate_by_generator_monthly_plant_id_eia_generators_entity_eia')), + sa.ForeignKeyConstraint(['prime_mover_code'], ['prime_movers_eia.code'], name=op.f('fk_heat_rate_by_generator_monthly_prime_mover_code_prime_movers_eia')), + sa.PrimaryKeyConstraint('report_date', 'plant_id_eia', 'generator_id', name=op.f('pk_heat_rate_by_generator_monthly')) ) op.create_table('income_statement_ferc1', sa.Column('record_id', sa.Text(), nullable=True, comment='Identifier indicating original FERC Form 1 source record. format: {table_name}_{report_year}_{report_prd}_{respondent_id}_{spplmnt_num}_{row_number}. Unique within FERC Form 1 DB tables which are not row-mapped.'), @@ -1674,8 +1830,59 @@ def upgrade() -> None: sa.Column('balance', sa.Text(), nullable=True, comment='Indication of whether a column is a credit or debit, as reported in the XBRL taxonomy.'), sa.Column('ferc_account', sa.Text(), nullable=True, comment="Actual FERC Account number (e.g. '359.1') if available, or a PUDL assigned ID when FERC accounts have been split or combined in reporting."), sa.Column('row_type_xbrl', sa.Enum('calculated_value', 'reported_value', 'correction'), nullable=True, comment='Indicates whether the value reported in the row is calculated, or uniquely reported within the table.'), - sa.ForeignKeyConstraint(['utility_id_ferc1'], ['utilities_ferc1.utility_id_ferc1'], ), - sa.PrimaryKeyConstraint('utility_id_ferc1', 'report_year', 'utility_type', 'income_type') + sa.ForeignKeyConstraint(['utility_id_ferc1'], ['utilities_ferc1.utility_id_ferc1'], name=op.f('fk_income_statement_ferc1_utility_id_ferc1_utilities_ferc1')), + sa.PrimaryKeyConstraint('utility_id_ferc1', 'report_year', 'utility_type', 'income_type', name=op.f('pk_income_statement_ferc1')) + ) + op.create_table('mcoe_generators_monthly', + sa.Column('plant_id_eia', sa.Integer(), nullable=False, comment='The unique six-digit facility identification number, also called an ORISPL, assigned by the Energy Information Administration.'), + sa.Column('generator_id', sa.Text(), nullable=False, comment='Generator ID is usually numeric, but sometimes includes letters. Make sure you treat it as a string!'), + sa.Column('report_date', sa.Date(), nullable=False, comment='Date reported.'), + sa.Column('unit_id_pudl', sa.Integer(), nullable=True, comment='Dynamically assigned PUDL unit id. WARNING: This ID is not guaranteed to be static long term as the input data and algorithm may evolve over time.'), + sa.Column('plant_id_pudl', sa.Integer(), nullable=True, comment='A manually assigned PUDL plant ID. May not be constant over time.'), + sa.Column('plant_name_eia', sa.Text(), nullable=True, comment='Plant name.'), + sa.Column('utility_id_eia', sa.Integer(), nullable=True, comment='The EIA Utility Identification number.'), + sa.Column('utility_id_pudl', sa.Integer(), nullable=True, comment='A manually assigned PUDL utility ID. May not be stable over time.'), + sa.Column('utility_name_eia', sa.Text(), nullable=True, comment='The name of the utility.'), + sa.Column('technology_description', sa.Text(), nullable=True, comment='High level description of the technology used by the generator to produce electricity.'), + sa.Column('energy_source_code_1', sa.Text(), nullable=True, comment='The code representing the most predominant type of energy that fuels the generator.'), + sa.Column('prime_mover_code', sa.Text(), nullable=True, comment='Code for the type of prime mover (e.g. CT, CG)'), + sa.Column('generator_operating_date', sa.Date(), nullable=True, comment='Date the generator began commercial operation.'), + sa.Column('generator_retirement_date', sa.Date(), nullable=True, comment='Date of the scheduled or effected retirement of the generator.'), + sa.Column('operational_status', sa.Text(), nullable=True, comment='The operating status of the asset. For generators this is based on which tab the generator was listed in in EIA 860.'), + sa.Column('capacity_mw', sa.Float(), nullable=True, comment='Total installed (nameplate) capacity, in megawatts.'), + sa.Column('fuel_type_code_pudl', sa.Enum('coal', 'gas', 'hydro', 'nuclear', 'oil', 'other', 'solar', 'waste', 'wind'), nullable=True, comment='Simplified fuel type code used in PUDL'), + sa.Column('planned_generator_retirement_date', sa.Date(), nullable=True, comment='Planned effective date of the scheduled retirement of the generator.'), + sa.Column('capacity_factor', sa.Float(), nullable=True, comment='Fraction of potential generation that was actually reported for a plant part.'), + sa.Column('fuel_cost_from_eiaapi', sa.Boolean(), nullable=True, comment='Indicates whether the fuel cost was derived from the EIA API.'), + sa.Column('fuel_cost_per_mmbtu', sa.Float(), nullable=True, comment='Average fuel cost per mmBTU of heat content in nominal USD.'), + sa.Column('fuel_cost_per_mwh', sa.Float(), nullable=True, comment='Derived from MCOE, a unit level value. Average fuel cost per MWh of heat content in nominal USD.'), + sa.Column('heat_rate_mmbtu_mwh', sa.Float(), nullable=True, comment='Fuel content per unit of electricity generated. Coming from MCOE calculation.'), + sa.Column('net_generation_mwh', sa.Float(), nullable=True, comment='Net electricity generation for the specified period in megawatt-hours (MWh).'), + sa.Column('total_fuel_cost', sa.Float(), nullable=True, comment='Total annual reported fuel costs for the plant part. Includes costs from all fuels.'), + sa.Column('total_mmbtu', sa.Float(), nullable=True, comment='Total annual heat content of fuel consumed by a plant part record in the plant parts list.'), + sa.ForeignKeyConstraint(['energy_source_code_1'], ['energy_sources_eia.code'], name=op.f('fk_mcoe_generators_monthly_energy_source_code_1_energy_sources_eia')), + sa.ForeignKeyConstraint(['plant_id_eia', 'generator_id'], ['generators_entity_eia.plant_id_eia', 'generators_entity_eia.generator_id'], name=op.f('fk_mcoe_generators_monthly_plant_id_eia_generators_entity_eia')), + sa.ForeignKeyConstraint(['plant_id_pudl'], ['plants_pudl.plant_id_pudl'], name=op.f('fk_mcoe_generators_monthly_plant_id_pudl_plants_pudl')), + sa.ForeignKeyConstraint(['prime_mover_code'], ['prime_movers_eia.code'], name=op.f('fk_mcoe_generators_monthly_prime_mover_code_prime_movers_eia')), + sa.ForeignKeyConstraint(['utility_id_eia'], ['utilities_entity_eia.utility_id_eia'], name=op.f('fk_mcoe_generators_monthly_utility_id_eia_utilities_entity_eia')), + sa.ForeignKeyConstraint(['utility_id_pudl'], ['utilities_pudl.utility_id_pudl'], name=op.f('fk_mcoe_generators_monthly_utility_id_pudl_utilities_pudl')), + sa.PrimaryKeyConstraint('report_date', 'plant_id_eia', 'generator_id', name=op.f('pk_mcoe_generators_monthly')) + ) + op.create_table('mcoe_monthly', + sa.Column('plant_id_eia', sa.Integer(), nullable=False, comment='The unique six-digit facility identification number, also called an ORISPL, assigned by the Energy Information Administration.'), + sa.Column('generator_id', sa.Text(), nullable=False, comment='Generator ID is usually numeric, but sometimes includes letters. Make sure you treat it as a string!'), + sa.Column('unit_id_pudl', sa.Integer(), nullable=True, comment='Dynamically assigned PUDL unit id. WARNING: This ID is not guaranteed to be static long term as the input data and algorithm may evolve over time.'), + sa.Column('report_date', sa.Date(), nullable=False, comment='Date reported.'), + sa.Column('capacity_factor', sa.Float(), nullable=True, comment='Fraction of potential generation that was actually reported for a plant part.'), + sa.Column('fuel_cost_from_eiaapi', sa.Boolean(), nullable=True, comment='Indicates whether the fuel cost was derived from the EIA API.'), + sa.Column('fuel_cost_per_mmbtu', sa.Float(), nullable=True, comment='Average fuel cost per mmBTU of heat content in nominal USD.'), + sa.Column('fuel_cost_per_mwh', sa.Float(), nullable=True, comment='Derived from MCOE, a unit level value. Average fuel cost per MWh of heat content in nominal USD.'), + sa.Column('heat_rate_mmbtu_mwh', sa.Float(), nullable=True, comment='Fuel content per unit of electricity generated. Coming from MCOE calculation.'), + sa.Column('net_generation_mwh', sa.Float(), nullable=True, comment='Net electricity generation for the specified period in megawatt-hours (MWh).'), + sa.Column('total_fuel_cost', sa.Float(), nullable=True, comment='Total annual reported fuel costs for the plant part. Includes costs from all fuels.'), + sa.Column('total_mmbtu', sa.Float(), nullable=True, comment='Total annual heat content of fuel consumed by a plant part record in the plant parts list.'), + sa.ForeignKeyConstraint(['plant_id_eia', 'generator_id'], ['generators_entity_eia.plant_id_eia', 'generators_entity_eia.generator_id'], name=op.f('fk_mcoe_monthly_plant_id_eia_generators_entity_eia')), + sa.PrimaryKeyConstraint('report_date', 'plant_id_eia', 'generator_id', name=op.f('pk_mcoe_monthly')) ) op.create_table('other_regulatory_liabilities_ferc1', sa.Column('utility_id_ferc1', sa.Integer(), nullable=True, comment='PUDL-assigned utility ID, identifying a FERC1 utility. This is an auto-incremented ID and is not expected to be stable from year to year.'), @@ -1686,7 +1893,7 @@ def upgrade() -> None: sa.Column('increase_in_other_regulatory_liabilities', sa.Float(), nullable=True, comment='The increase during the reporting period of other regulatory liabilities.'), sa.Column('account_detail', sa.Text(), nullable=True, comment='Description of the account number credited from making debit adjustment to other regulatory liabilities.'), sa.Column('decrease_in_other_regulatory_liabilities', sa.Float(), nullable=True, comment='The decrease during the reporting period of other regulatory liabilities.'), - sa.ForeignKeyConstraint(['utility_id_ferc1'], ['utilities_ferc1.utility_id_ferc1'], ) + sa.ForeignKeyConstraint(['utility_id_ferc1'], ['utilities_ferc1.utility_id_ferc1'], name=op.f('fk_other_regulatory_liabilities_ferc1_utility_id_ferc1_utilities_ferc1')) ) op.create_table('plant_in_service_ferc1', sa.Column('utility_id_ferc1', sa.Integer(), nullable=False, comment='PUDL-assigned utility ID, identifying a FERC1 utility. This is an auto-incremented ID and is not expected to be stable from year to year.'), @@ -1701,8 +1908,8 @@ def upgrade() -> None: sa.Column('transfers', sa.Float(), nullable=True, comment='Cost of transfers into (out of) the account.'), sa.Column('ending_balance', sa.Float(), nullable=True, comment='Account balance at end of year.'), sa.Column('record_id', sa.Text(), nullable=True, comment='Identifier indicating original FERC Form 1 source record. format: {table_name}_{report_year}_{report_prd}_{respondent_id}_{spplmnt_num}_{row_number}. Unique within FERC Form 1 DB tables which are not row-mapped.'), - sa.ForeignKeyConstraint(['utility_id_ferc1'], ['utilities_ferc1.utility_id_ferc1'], ), - sa.PrimaryKeyConstraint('utility_id_ferc1', 'report_year', 'ferc_account_label') + sa.ForeignKeyConstraint(['utility_id_ferc1'], ['utilities_ferc1.utility_id_ferc1'], name=op.f('fk_plant_in_service_ferc1_utility_id_ferc1_utilities_ferc1')), + sa.PrimaryKeyConstraint('utility_id_ferc1', 'report_year', 'ferc_account_label', name=op.f('pk_plant_in_service_ferc1')) ) op.create_table('plants_eia860', sa.Column('plant_id_eia', sa.Integer(), nullable=False, comment='The unique six-digit facility identification number, also called an ORISPL, assigned by the Energy Information Administration.'), @@ -1746,21 +1953,21 @@ def upgrade() -> None: sa.Column('utility_id_eia', sa.Integer(), nullable=True, comment='The EIA Utility Identification number.'), sa.Column('water_source', sa.Text(), nullable=True, comment='Name of water source associated with the plant.'), sa.Column('data_maturity', sa.Text(), nullable=True, comment='Level of maturity of the data record. Some data sources report less-than-final data. PUDL sometimes includes this data, but use at your own risk.'), - sa.ForeignKeyConstraint(['balancing_authority_code_eia'], ['balancing_authorities_eia.code'], ), - sa.ForeignKeyConstraint(['data_maturity'], ['data_maturities.code'], ), - sa.ForeignKeyConstraint(['plant_id_eia'], ['plants_entity_eia.plant_id_eia'], ), - sa.ForeignKeyConstraint(['reporting_frequency_code'], ['reporting_frequencies_eia.code'], ), - sa.ForeignKeyConstraint(['sector_id_eia'], ['sector_consolidated_eia.code'], ), - sa.ForeignKeyConstraint(['utility_id_eia', 'report_date'], ['utilities_eia860.utility_id_eia', 'utilities_eia860.report_date'], ), - sa.PrimaryKeyConstraint('plant_id_eia', 'report_date') + sa.ForeignKeyConstraint(['balancing_authority_code_eia'], ['balancing_authorities_eia.code'], name=op.f('fk_plants_eia860_balancing_authority_code_eia_balancing_authorities_eia')), + sa.ForeignKeyConstraint(['data_maturity'], ['data_maturities.code'], name=op.f('fk_plants_eia860_data_maturity_data_maturities')), + sa.ForeignKeyConstraint(['plant_id_eia'], ['plants_entity_eia.plant_id_eia'], name=op.f('fk_plants_eia860_plant_id_eia_plants_entity_eia')), + sa.ForeignKeyConstraint(['reporting_frequency_code'], ['reporting_frequencies_eia.code'], name=op.f('fk_plants_eia860_reporting_frequency_code_reporting_frequencies_eia')), + sa.ForeignKeyConstraint(['sector_id_eia'], ['sector_consolidated_eia.code'], name=op.f('fk_plants_eia860_sector_id_eia_sector_consolidated_eia')), + sa.ForeignKeyConstraint(['utility_id_eia', 'report_date'], ['utilities_eia860.utility_id_eia', 'utilities_eia860.report_date'], name=op.f('fk_plants_eia860_utility_id_eia_utilities_eia860')), + sa.PrimaryKeyConstraint('plant_id_eia', 'report_date', name=op.f('pk_plants_eia860')) ) op.create_table('plants_ferc1', sa.Column('utility_id_ferc1', sa.Integer(), nullable=False, comment='PUDL-assigned utility ID, identifying a FERC1 utility. This is an auto-incremented ID and is not expected to be stable from year to year.'), sa.Column('plant_name_ferc1', sa.Text(), nullable=False, comment='Name of the plant, as reported to FERC. This is a freeform string, not guaranteed to be consistent across references to the same plant.'), sa.Column('plant_id_pudl', sa.Integer(), nullable=True, comment='A manually assigned PUDL plant ID. May not be constant over time.'), - sa.ForeignKeyConstraint(['plant_id_pudl'], ['plants_pudl.plant_id_pudl'], ), - sa.ForeignKeyConstraint(['utility_id_ferc1'], ['utilities_ferc1.utility_id_ferc1'], ), - sa.PrimaryKeyConstraint('utility_id_ferc1', 'plant_name_ferc1') + sa.ForeignKeyConstraint(['plant_id_pudl'], ['plants_pudl.plant_id_pudl'], name=op.f('fk_plants_ferc1_plant_id_pudl_plants_pudl')), + sa.ForeignKeyConstraint(['utility_id_ferc1'], ['utilities_ferc1.utility_id_ferc1'], name=op.f('fk_plants_ferc1_utility_id_ferc1_utilities_ferc1')), + sa.PrimaryKeyConstraint('utility_id_ferc1', 'plant_name_ferc1', name=op.f('pk_plants_ferc1')) ) op.create_table('purchased_power_ferc1', sa.Column('record_id', sa.Text(), nullable=True, comment='Identifier indicating original FERC Form 1 source record. format: {table_name}_{report_year}_{report_prd}_{respondent_id}_{spplmnt_num}_{row_number}. Unique within FERC Form 1 DB tables which are not row-mapped.'), @@ -1779,22 +1986,21 @@ def upgrade() -> None: sa.Column('energy_charges', sa.Float(), nullable=True, comment='Energy charges (USD).'), sa.Column('other_charges', sa.Float(), nullable=True, comment='Other charges, including out-of-period adjustments (USD).'), sa.Column('total_settlement', sa.Float(), nullable=True, comment='Sum of demand, energy, and other charges (USD). For power exchanges, the settlement amount for the net receipt of energy. If more energy was delivered than received, this amount is negative.'), - sa.ForeignKeyConstraint(['purchase_type_code'], ['power_purchase_types_ferc1.code'], ), - sa.ForeignKeyConstraint(['utility_id_ferc1'], ['utilities_ferc1.utility_id_ferc1'], ) + sa.ForeignKeyConstraint(['purchase_type_code'], ['power_purchase_types_ferc1.code'], name=op.f('fk_purchased_power_ferc1_purchase_type_code_power_purchase_types_ferc1')), + sa.ForeignKeyConstraint(['utility_id_ferc1'], ['utilities_ferc1.utility_id_ferc1'], name=op.f('fk_purchased_power_ferc1_utility_id_ferc1_utilities_ferc1')) ) op.create_table('retained_earnings_ferc1', sa.Column('utility_id_ferc1', sa.Integer(), nullable=False, comment='PUDL-assigned utility ID, identifying a FERC1 utility. This is an auto-incremented ID and is not expected to be stable from year to year.'), sa.Column('report_year', sa.Integer(), nullable=False, comment='Four-digit year in which the data was reported.'), sa.Column('record_id', sa.Text(), nullable=True, comment='Identifier indicating original FERC Form 1 source record. format: {table_name}_{report_year}_{report_prd}_{respondent_id}_{spplmnt_num}_{row_number}. Unique within FERC Form 1 DB tables which are not row-mapped.'), sa.Column('earnings_type', sa.Text(), nullable=False, comment='Label describing types of earnings.'), - sa.Column('amount', sa.Float(), nullable=True, comment='Reported amount of dollars. This could be a balance or a change in value.'), sa.Column('starting_balance', sa.Float(), nullable=True, comment='Account balance at beginning of year.'), sa.Column('ending_balance', sa.Float(), nullable=True, comment='Account balance at end of year.'), sa.Column('balance', sa.Text(), nullable=True, comment='Indication of whether a column is a credit or debit, as reported in the XBRL taxonomy.'), sa.Column('ferc_account', sa.Text(), nullable=True, comment="Actual FERC Account number (e.g. '359.1') if available, or a PUDL assigned ID when FERC accounts have been split or combined in reporting."), sa.Column('row_type_xbrl', sa.Enum('calculated_value', 'reported_value', 'correction'), nullable=True, comment='Indicates whether the value reported in the row is calculated, or uniquely reported within the table.'), - sa.ForeignKeyConstraint(['utility_id_ferc1'], ['utilities_ferc1.utility_id_ferc1'], ), - sa.PrimaryKeyConstraint('utility_id_ferc1', 'report_year', 'earnings_type') + sa.ForeignKeyConstraint(['utility_id_ferc1'], ['utilities_ferc1.utility_id_ferc1'], name=op.f('fk_retained_earnings_ferc1_utility_id_ferc1_utilities_ferc1')), + sa.PrimaryKeyConstraint('utility_id_ferc1', 'report_year', 'earnings_type', name=op.f('pk_retained_earnings_ferc1')) ) op.create_table('transmission_statistics_ferc1', sa.Column('record_id', sa.Text(), nullable=True, comment='Identifier indicating original FERC Form 1 source record. format: {table_name}_{report_year}_{report_prd}_{respondent_id}_{spplmnt_num}_{row_number}. Unique within FERC Form 1 DB tables which are not row-mapped.'), @@ -1816,19 +2022,19 @@ def upgrade() -> None: sa.Column('opex_maintenance', sa.Float(), nullable=True, comment='Maintenance expenses for the transmission line.'), sa.Column('opex_rents', sa.Float(), nullable=True, comment='Rent expenses for the transmission line.'), sa.Column('opex_total', sa.Float(), nullable=True, comment='Overall expenses for the transmission line.'), - sa.ForeignKeyConstraint(['utility_id_ferc1'], ['utilities_ferc1.utility_id_ferc1'], ) + sa.ForeignKeyConstraint(['utility_id_ferc1'], ['utilities_ferc1.utility_id_ferc1'], name=op.f('fk_transmission_statistics_ferc1_utility_id_ferc1_utilities_ferc1')) ) op.create_table('utilities_ferc1_dbf', sa.Column('utility_id_ferc1', sa.Integer(), nullable=True, comment='PUDL-assigned utility ID, identifying a FERC1 utility. This is an auto-incremented ID and is not expected to be stable from year to year.'), sa.Column('utility_id_ferc1_dbf', sa.Integer(), nullable=False, comment='FERC-assigned respondent_id from DBF reporting years, identifying the reporting entity. Stable from year to year.'), - sa.ForeignKeyConstraint(['utility_id_ferc1'], ['utilities_ferc1.utility_id_ferc1'], ), - sa.PrimaryKeyConstraint('utility_id_ferc1_dbf') + sa.ForeignKeyConstraint(['utility_id_ferc1'], ['utilities_ferc1.utility_id_ferc1'], name=op.f('fk_utilities_ferc1_dbf_utility_id_ferc1_utilities_ferc1')), + sa.PrimaryKeyConstraint('utility_id_ferc1_dbf', name=op.f('pk_utilities_ferc1_dbf')) ) op.create_table('utilities_ferc1_xbrl', sa.Column('utility_id_ferc1', sa.Integer(), nullable=True, comment='PUDL-assigned utility ID, identifying a FERC1 utility. This is an auto-incremented ID and is not expected to be stable from year to year.'), sa.Column('utility_id_ferc1_xbrl', sa.Text(), nullable=False, comment='FERC-assigned entity_id from XBRL reporting years, identifying the reporting entity. Stable from year to year.'), - sa.ForeignKeyConstraint(['utility_id_ferc1'], ['utilities_ferc1.utility_id_ferc1'], ), - sa.PrimaryKeyConstraint('utility_id_ferc1_xbrl') + sa.ForeignKeyConstraint(['utility_id_ferc1'], ['utilities_ferc1.utility_id_ferc1'], name=op.f('fk_utilities_ferc1_xbrl_utility_id_ferc1_utilities_ferc1')), + sa.PrimaryKeyConstraint('utility_id_ferc1_xbrl', name=op.f('pk_utilities_ferc1_xbrl')) ) op.create_table('utility_plant_summary_ferc1', sa.Column('utility_id_ferc1', sa.Integer(), nullable=False, comment='PUDL-assigned utility ID, identifying a FERC1 utility. This is an auto-incremented ID and is not expected to be stable from year to year.'), @@ -1839,8 +2045,8 @@ def upgrade() -> None: sa.Column('row_type_xbrl', sa.Enum('calculated_value', 'reported_value', 'correction'), nullable=True, comment='Indicates whether the value reported in the row is calculated, or uniquely reported within the table.'), sa.Column('ending_balance', sa.Float(), nullable=True, comment='Account balance at end of year.'), sa.Column('record_id', sa.Text(), nullable=True, comment='Identifier indicating original FERC Form 1 source record. format: {table_name}_{report_year}_{report_prd}_{respondent_id}_{spplmnt_num}_{row_number}. Unique within FERC Form 1 DB tables which are not row-mapped.'), - sa.ForeignKeyConstraint(['utility_id_ferc1'], ['utilities_ferc1.utility_id_ferc1'], ), - sa.PrimaryKeyConstraint('utility_id_ferc1', 'report_year', 'utility_type', 'utility_plant_asset_type') + sa.ForeignKeyConstraint(['utility_id_ferc1'], ['utilities_ferc1.utility_id_ferc1'], name=op.f('fk_utility_plant_summary_ferc1_utility_id_ferc1_utilities_ferc1')), + sa.PrimaryKeyConstraint('utility_id_ferc1', 'report_year', 'utility_type', 'utility_plant_asset_type', name=op.f('pk_utility_plant_summary_ferc1')) ) op.create_table('boilers_eia860', sa.Column('plant_id_eia', sa.Integer(), nullable=False, comment='The unique six-digit facility identification number, also called an ORISPL, assigned by the Energy Information Administration.'), @@ -1934,59 +2140,59 @@ def upgrade() -> None: sa.Column('so2_control_proposed_strategy_3', sa.Text(), nullable=True, comment='Proposed strategy to comply with the most stringent sulfur dioxide regulation.'), sa.Column('standard_so2_percent_scrubbed', sa.Float(), nullable=True, comment='The percent of sulfur dioxide to be scrubbed specified by the most stringent sulfur dioxide regulation.'), sa.Column('data_maturity', sa.Text(), nullable=True, comment='Level of maturity of the data record. Some data sources report less-than-final data. PUDL sometimes includes this data, but use at your own risk.'), - sa.ForeignKeyConstraint(['boiler_fuel_code_1'], ['energy_sources_eia.code'], ), - sa.ForeignKeyConstraint(['boiler_fuel_code_2'], ['energy_sources_eia.code'], ), - sa.ForeignKeyConstraint(['boiler_fuel_code_3'], ['energy_sources_eia.code'], ), - sa.ForeignKeyConstraint(['boiler_fuel_code_4'], ['energy_sources_eia.code'], ), - sa.ForeignKeyConstraint(['boiler_status'], ['boiler_status_eia.code'], ), - sa.ForeignKeyConstraint(['boiler_type'], ['boiler_types_eia.code'], ), - sa.ForeignKeyConstraint(['data_maturity'], ['data_maturities.code'], ), - sa.ForeignKeyConstraint(['firing_type_1'], ['firing_types_eia.code'], ), - sa.ForeignKeyConstraint(['firing_type_2'], ['firing_types_eia.code'], ), - sa.ForeignKeyConstraint(['firing_type_3'], ['firing_types_eia.code'], ), - sa.ForeignKeyConstraint(['mercury_control_existing_strategy_1'], ['mercury_compliance_strategies_eia.code'], ), - sa.ForeignKeyConstraint(['mercury_control_existing_strategy_2'], ['mercury_compliance_strategies_eia.code'], ), - sa.ForeignKeyConstraint(['mercury_control_existing_strategy_3'], ['mercury_compliance_strategies_eia.code'], ), - sa.ForeignKeyConstraint(['mercury_control_existing_strategy_4'], ['mercury_compliance_strategies_eia.code'], ), - sa.ForeignKeyConstraint(['mercury_control_existing_strategy_5'], ['mercury_compliance_strategies_eia.code'], ), - sa.ForeignKeyConstraint(['mercury_control_existing_strategy_6'], ['mercury_compliance_strategies_eia.code'], ), - sa.ForeignKeyConstraint(['mercury_control_proposed_strategy_1'], ['mercury_compliance_strategies_eia.code'], ), - sa.ForeignKeyConstraint(['mercury_control_proposed_strategy_2'], ['mercury_compliance_strategies_eia.code'], ), - sa.ForeignKeyConstraint(['mercury_control_proposed_strategy_3'], ['mercury_compliance_strategies_eia.code'], ), - sa.ForeignKeyConstraint(['nox_control_existing_caaa_compliance_strategy_1'], ['nox_compliance_strategies_eia.code'], ), - sa.ForeignKeyConstraint(['nox_control_existing_caaa_compliance_strategy_2'], ['nox_compliance_strategies_eia.code'], ), - sa.ForeignKeyConstraint(['nox_control_existing_caaa_compliance_strategy_3'], ['nox_compliance_strategies_eia.code'], ), - sa.ForeignKeyConstraint(['nox_control_manufacturer_code'], ['environmental_equipment_manufacturers_eia.code'], ), - sa.ForeignKeyConstraint(['nox_control_out_of_compliance_strategy_1'], ['nox_compliance_strategies_eia.code'], ), - sa.ForeignKeyConstraint(['nox_control_out_of_compliance_strategy_2'], ['nox_compliance_strategies_eia.code'], ), - sa.ForeignKeyConstraint(['nox_control_out_of_compliance_strategy_3'], ['nox_compliance_strategies_eia.code'], ), - sa.ForeignKeyConstraint(['nox_control_planned_caaa_compliance_strategy_1'], ['nox_compliance_strategies_eia.code'], ), - sa.ForeignKeyConstraint(['nox_control_planned_caaa_compliance_strategy_2'], ['nox_compliance_strategies_eia.code'], ), - sa.ForeignKeyConstraint(['nox_control_planned_caaa_compliance_strategy_3'], ['nox_compliance_strategies_eia.code'], ), - sa.ForeignKeyConstraint(['nox_control_status_code'], ['nox_control_status_eia.code'], ), - sa.ForeignKeyConstraint(['particulate_control_out_of_compliance_strategy_1'], ['particulate_compliance_strategies_eia.code'], ), - sa.ForeignKeyConstraint(['particulate_control_out_of_compliance_strategy_2'], ['particulate_compliance_strategies_eia.code'], ), - sa.ForeignKeyConstraint(['particulate_control_out_of_compliance_strategy_3'], ['particulate_compliance_strategies_eia.code'], ), - sa.ForeignKeyConstraint(['plant_id_eia', 'boiler_id'], ['boilers_entity_eia.plant_id_eia', 'boilers_entity_eia.boiler_id'], ), - sa.ForeignKeyConstraint(['plant_id_eia', 'report_date'], ['plants_eia860.plant_id_eia', 'plants_eia860.report_date'], ), - sa.ForeignKeyConstraint(['regulation_mercury'], ['regulations_eia.code'], ), - sa.ForeignKeyConstraint(['regulation_nox'], ['regulations_eia.code'], ), - sa.ForeignKeyConstraint(['regulation_particulate'], ['regulations_eia.code'], ), - sa.ForeignKeyConstraint(['regulation_so2'], ['regulations_eia.code'], ), - sa.ForeignKeyConstraint(['so2_control_existing_caaa_compliance_strategy_1'], ['so2_compliance_strategies_eia.code'], ), - sa.ForeignKeyConstraint(['so2_control_existing_caaa_compliance_strategy_2'], ['so2_compliance_strategies_eia.code'], ), - sa.ForeignKeyConstraint(['so2_control_existing_caaa_compliance_strategy_3'], ['so2_compliance_strategies_eia.code'], ), - sa.ForeignKeyConstraint(['so2_control_out_of_compliance_strategy_1'], ['so2_compliance_strategies_eia.code'], ), - sa.ForeignKeyConstraint(['so2_control_out_of_compliance_strategy_2'], ['so2_compliance_strategies_eia.code'], ), - sa.ForeignKeyConstraint(['so2_control_out_of_compliance_strategy_3'], ['so2_compliance_strategies_eia.code'], ), - sa.ForeignKeyConstraint(['so2_control_planned_caaa_compliance_strategy_1'], ['so2_compliance_strategies_eia.code'], ), - sa.ForeignKeyConstraint(['so2_control_planned_caaa_compliance_strategy_2'], ['so2_compliance_strategies_eia.code'], ), - sa.ForeignKeyConstraint(['so2_control_planned_caaa_compliance_strategy_3'], ['so2_compliance_strategies_eia.code'], ), - sa.ForeignKeyConstraint(['unit_nox'], ['nox_units_eia.code'], ), - sa.ForeignKeyConstraint(['unit_particulate'], ['particulate_units_eia.code'], ), - sa.ForeignKeyConstraint(['unit_so2'], ['so2_units_eia.code'], ), - sa.ForeignKeyConstraint(['wet_dry_bottom'], ['wet_dry_bottom_eia.code'], ), - sa.PrimaryKeyConstraint('plant_id_eia', 'boiler_id', 'report_date') + sa.ForeignKeyConstraint(['boiler_fuel_code_1'], ['energy_sources_eia.code'], name=op.f('fk_boilers_eia860_boiler_fuel_code_1_energy_sources_eia')), + sa.ForeignKeyConstraint(['boiler_fuel_code_2'], ['energy_sources_eia.code'], name=op.f('fk_boilers_eia860_boiler_fuel_code_2_energy_sources_eia')), + sa.ForeignKeyConstraint(['boiler_fuel_code_3'], ['energy_sources_eia.code'], name=op.f('fk_boilers_eia860_boiler_fuel_code_3_energy_sources_eia')), + sa.ForeignKeyConstraint(['boiler_fuel_code_4'], ['energy_sources_eia.code'], name=op.f('fk_boilers_eia860_boiler_fuel_code_4_energy_sources_eia')), + sa.ForeignKeyConstraint(['boiler_status'], ['boiler_status_eia.code'], name=op.f('fk_boilers_eia860_boiler_status_boiler_status_eia')), + sa.ForeignKeyConstraint(['boiler_type'], ['boiler_types_eia.code'], name=op.f('fk_boilers_eia860_boiler_type_boiler_types_eia')), + sa.ForeignKeyConstraint(['data_maturity'], ['data_maturities.code'], name=op.f('fk_boilers_eia860_data_maturity_data_maturities')), + sa.ForeignKeyConstraint(['firing_type_1'], ['firing_types_eia.code'], name=op.f('fk_boilers_eia860_firing_type_1_firing_types_eia')), + sa.ForeignKeyConstraint(['firing_type_2'], ['firing_types_eia.code'], name=op.f('fk_boilers_eia860_firing_type_2_firing_types_eia')), + sa.ForeignKeyConstraint(['firing_type_3'], ['firing_types_eia.code'], name=op.f('fk_boilers_eia860_firing_type_3_firing_types_eia')), + sa.ForeignKeyConstraint(['mercury_control_existing_strategy_1'], ['mercury_compliance_strategies_eia.code'], name=op.f('fk_boilers_eia860_mercury_control_existing_strategy_1_mercury_compliance_strategies_eia')), + sa.ForeignKeyConstraint(['mercury_control_existing_strategy_2'], ['mercury_compliance_strategies_eia.code'], name=op.f('fk_boilers_eia860_mercury_control_existing_strategy_2_mercury_compliance_strategies_eia')), + sa.ForeignKeyConstraint(['mercury_control_existing_strategy_3'], ['mercury_compliance_strategies_eia.code'], name=op.f('fk_boilers_eia860_mercury_control_existing_strategy_3_mercury_compliance_strategies_eia')), + sa.ForeignKeyConstraint(['mercury_control_existing_strategy_4'], ['mercury_compliance_strategies_eia.code'], name=op.f('fk_boilers_eia860_mercury_control_existing_strategy_4_mercury_compliance_strategies_eia')), + sa.ForeignKeyConstraint(['mercury_control_existing_strategy_5'], ['mercury_compliance_strategies_eia.code'], name=op.f('fk_boilers_eia860_mercury_control_existing_strategy_5_mercury_compliance_strategies_eia')), + sa.ForeignKeyConstraint(['mercury_control_existing_strategy_6'], ['mercury_compliance_strategies_eia.code'], name=op.f('fk_boilers_eia860_mercury_control_existing_strategy_6_mercury_compliance_strategies_eia')), + sa.ForeignKeyConstraint(['mercury_control_proposed_strategy_1'], ['mercury_compliance_strategies_eia.code'], name=op.f('fk_boilers_eia860_mercury_control_proposed_strategy_1_mercury_compliance_strategies_eia')), + sa.ForeignKeyConstraint(['mercury_control_proposed_strategy_2'], ['mercury_compliance_strategies_eia.code'], name=op.f('fk_boilers_eia860_mercury_control_proposed_strategy_2_mercury_compliance_strategies_eia')), + sa.ForeignKeyConstraint(['mercury_control_proposed_strategy_3'], ['mercury_compliance_strategies_eia.code'], name=op.f('fk_boilers_eia860_mercury_control_proposed_strategy_3_mercury_compliance_strategies_eia')), + sa.ForeignKeyConstraint(['nox_control_existing_caaa_compliance_strategy_1'], ['nox_compliance_strategies_eia.code'], name=op.f('fk_boilers_eia860_nox_control_existing_caaa_compliance_strategy_1_nox_compliance_strategies_eia')), + sa.ForeignKeyConstraint(['nox_control_existing_caaa_compliance_strategy_2'], ['nox_compliance_strategies_eia.code'], name=op.f('fk_boilers_eia860_nox_control_existing_caaa_compliance_strategy_2_nox_compliance_strategies_eia')), + sa.ForeignKeyConstraint(['nox_control_existing_caaa_compliance_strategy_3'], ['nox_compliance_strategies_eia.code'], name=op.f('fk_boilers_eia860_nox_control_existing_caaa_compliance_strategy_3_nox_compliance_strategies_eia')), + sa.ForeignKeyConstraint(['nox_control_manufacturer_code'], ['environmental_equipment_manufacturers_eia.code'], name=op.f('fk_boilers_eia860_nox_control_manufacturer_code_environmental_equipment_manufacturers_eia')), + sa.ForeignKeyConstraint(['nox_control_out_of_compliance_strategy_1'], ['nox_compliance_strategies_eia.code'], name=op.f('fk_boilers_eia860_nox_control_out_of_compliance_strategy_1_nox_compliance_strategies_eia')), + sa.ForeignKeyConstraint(['nox_control_out_of_compliance_strategy_2'], ['nox_compliance_strategies_eia.code'], name=op.f('fk_boilers_eia860_nox_control_out_of_compliance_strategy_2_nox_compliance_strategies_eia')), + sa.ForeignKeyConstraint(['nox_control_out_of_compliance_strategy_3'], ['nox_compliance_strategies_eia.code'], name=op.f('fk_boilers_eia860_nox_control_out_of_compliance_strategy_3_nox_compliance_strategies_eia')), + sa.ForeignKeyConstraint(['nox_control_planned_caaa_compliance_strategy_1'], ['nox_compliance_strategies_eia.code'], name=op.f('fk_boilers_eia860_nox_control_planned_caaa_compliance_strategy_1_nox_compliance_strategies_eia')), + sa.ForeignKeyConstraint(['nox_control_planned_caaa_compliance_strategy_2'], ['nox_compliance_strategies_eia.code'], name=op.f('fk_boilers_eia860_nox_control_planned_caaa_compliance_strategy_2_nox_compliance_strategies_eia')), + sa.ForeignKeyConstraint(['nox_control_planned_caaa_compliance_strategy_3'], ['nox_compliance_strategies_eia.code'], name=op.f('fk_boilers_eia860_nox_control_planned_caaa_compliance_strategy_3_nox_compliance_strategies_eia')), + sa.ForeignKeyConstraint(['nox_control_status_code'], ['nox_control_status_eia.code'], name=op.f('fk_boilers_eia860_nox_control_status_code_nox_control_status_eia')), + sa.ForeignKeyConstraint(['particulate_control_out_of_compliance_strategy_1'], ['particulate_compliance_strategies_eia.code'], name=op.f('fk_boilers_eia860_particulate_control_out_of_compliance_strategy_1_particulate_compliance_strategies_eia')), + sa.ForeignKeyConstraint(['particulate_control_out_of_compliance_strategy_2'], ['particulate_compliance_strategies_eia.code'], name=op.f('fk_boilers_eia860_particulate_control_out_of_compliance_strategy_2_particulate_compliance_strategies_eia')), + sa.ForeignKeyConstraint(['particulate_control_out_of_compliance_strategy_3'], ['particulate_compliance_strategies_eia.code'], name=op.f('fk_boilers_eia860_particulate_control_out_of_compliance_strategy_3_particulate_compliance_strategies_eia')), + sa.ForeignKeyConstraint(['plant_id_eia', 'boiler_id'], ['boilers_entity_eia.plant_id_eia', 'boilers_entity_eia.boiler_id'], name=op.f('fk_boilers_eia860_plant_id_eia_boilers_entity_eia')), + sa.ForeignKeyConstraint(['plant_id_eia', 'report_date'], ['plants_eia860.plant_id_eia', 'plants_eia860.report_date'], name=op.f('fk_boilers_eia860_plant_id_eia_plants_eia860')), + sa.ForeignKeyConstraint(['regulation_mercury'], ['regulations_eia.code'], name=op.f('fk_boilers_eia860_regulation_mercury_regulations_eia')), + sa.ForeignKeyConstraint(['regulation_nox'], ['regulations_eia.code'], name=op.f('fk_boilers_eia860_regulation_nox_regulations_eia')), + sa.ForeignKeyConstraint(['regulation_particulate'], ['regulations_eia.code'], name=op.f('fk_boilers_eia860_regulation_particulate_regulations_eia')), + sa.ForeignKeyConstraint(['regulation_so2'], ['regulations_eia.code'], name=op.f('fk_boilers_eia860_regulation_so2_regulations_eia')), + sa.ForeignKeyConstraint(['so2_control_existing_caaa_compliance_strategy_1'], ['so2_compliance_strategies_eia.code'], name=op.f('fk_boilers_eia860_so2_control_existing_caaa_compliance_strategy_1_so2_compliance_strategies_eia')), + sa.ForeignKeyConstraint(['so2_control_existing_caaa_compliance_strategy_2'], ['so2_compliance_strategies_eia.code'], name=op.f('fk_boilers_eia860_so2_control_existing_caaa_compliance_strategy_2_so2_compliance_strategies_eia')), + sa.ForeignKeyConstraint(['so2_control_existing_caaa_compliance_strategy_3'], ['so2_compliance_strategies_eia.code'], name=op.f('fk_boilers_eia860_so2_control_existing_caaa_compliance_strategy_3_so2_compliance_strategies_eia')), + sa.ForeignKeyConstraint(['so2_control_out_of_compliance_strategy_1'], ['so2_compliance_strategies_eia.code'], name=op.f('fk_boilers_eia860_so2_control_out_of_compliance_strategy_1_so2_compliance_strategies_eia')), + sa.ForeignKeyConstraint(['so2_control_out_of_compliance_strategy_2'], ['so2_compliance_strategies_eia.code'], name=op.f('fk_boilers_eia860_so2_control_out_of_compliance_strategy_2_so2_compliance_strategies_eia')), + sa.ForeignKeyConstraint(['so2_control_out_of_compliance_strategy_3'], ['so2_compliance_strategies_eia.code'], name=op.f('fk_boilers_eia860_so2_control_out_of_compliance_strategy_3_so2_compliance_strategies_eia')), + sa.ForeignKeyConstraint(['so2_control_planned_caaa_compliance_strategy_1'], ['so2_compliance_strategies_eia.code'], name=op.f('fk_boilers_eia860_so2_control_planned_caaa_compliance_strategy_1_so2_compliance_strategies_eia')), + sa.ForeignKeyConstraint(['so2_control_planned_caaa_compliance_strategy_2'], ['so2_compliance_strategies_eia.code'], name=op.f('fk_boilers_eia860_so2_control_planned_caaa_compliance_strategy_2_so2_compliance_strategies_eia')), + sa.ForeignKeyConstraint(['so2_control_planned_caaa_compliance_strategy_3'], ['so2_compliance_strategies_eia.code'], name=op.f('fk_boilers_eia860_so2_control_planned_caaa_compliance_strategy_3_so2_compliance_strategies_eia')), + sa.ForeignKeyConstraint(['unit_nox'], ['nox_units_eia.code'], name=op.f('fk_boilers_eia860_unit_nox_nox_units_eia')), + sa.ForeignKeyConstraint(['unit_particulate'], ['particulate_units_eia.code'], name=op.f('fk_boilers_eia860_unit_particulate_particulate_units_eia')), + sa.ForeignKeyConstraint(['unit_so2'], ['so2_units_eia.code'], name=op.f('fk_boilers_eia860_unit_so2_so2_units_eia')), + sa.ForeignKeyConstraint(['wet_dry_bottom'], ['wet_dry_bottom_eia.code'], name=op.f('fk_boilers_eia860_wet_dry_bottom_wet_dry_bottom_eia')), + sa.PrimaryKeyConstraint('plant_id_eia', 'boiler_id', 'report_date', name=op.f('pk_boilers_eia860')) ) op.create_table('denorm_fuel_by_plant_ferc1', sa.Column('report_year', sa.Integer(), nullable=False, comment='Four-digit year in which the data was reported.'), @@ -2009,10 +2215,10 @@ def upgrade() -> None: sa.Column('primary_fuel_by_mmbtu', sa.Text(), nullable=True, comment='Primary fuel for plant as a percentage of heat content.'), sa.Column('waste_fraction_cost', sa.Float(), nullable=True, comment='Waste-heat cost as a percentage of overall fuel cost.'), sa.Column('waste_fraction_mmbtu', sa.Float(), nullable=True, comment='Waste-heat heat content as a percentage of overall fuel heat content (MMBtu).'), - sa.ForeignKeyConstraint(['plant_id_pudl'], ['plants_pudl.plant_id_pudl'], ), - sa.ForeignKeyConstraint(['utility_id_ferc1', 'plant_name_ferc1'], ['plants_ferc1.utility_id_ferc1', 'plants_ferc1.plant_name_ferc1'], ), - sa.ForeignKeyConstraint(['utility_id_pudl'], ['utilities_pudl.utility_id_pudl'], ), - sa.PrimaryKeyConstraint('report_year', 'utility_id_ferc1', 'plant_name_ferc1') + sa.ForeignKeyConstraint(['plant_id_pudl'], ['plants_pudl.plant_id_pudl'], name=op.f('fk_denorm_fuel_by_plant_ferc1_plant_id_pudl_plants_pudl')), + sa.ForeignKeyConstraint(['utility_id_ferc1', 'plant_name_ferc1'], ['plants_ferc1.utility_id_ferc1', 'plants_ferc1.plant_name_ferc1'], name=op.f('fk_denorm_fuel_by_plant_ferc1_utility_id_ferc1_plants_ferc1')), + sa.ForeignKeyConstraint(['utility_id_pudl'], ['utilities_pudl.utility_id_pudl'], name=op.f('fk_denorm_fuel_by_plant_ferc1_utility_id_pudl_utilities_pudl')), + sa.PrimaryKeyConstraint('report_year', 'utility_id_ferc1', 'plant_name_ferc1', name=op.f('pk_denorm_fuel_by_plant_ferc1')) ) op.create_table('denorm_fuel_ferc1', sa.Column('report_year', sa.Integer(), nullable=True, comment='Four-digit year in which the data was reported.'), @@ -2031,10 +2237,10 @@ def upgrade() -> None: sa.Column('fuel_type_code_pudl', sa.Enum('coal', 'gas', 'hydro', 'nuclear', 'oil', 'other', 'solar', 'waste', 'wind'), nullable=True, comment='Simplified fuel type code used in PUDL'), sa.Column('fuel_units', sa.Enum('mmbtu', 'gramsU', 'kg', 'mwhth', 'kgal', 'bbl', 'klbs', 'mcf', 'gal', 'mwdth', 'btu', 'ton'), nullable=True, comment='Reported unit of measure for fuel.'), sa.Column('record_id', sa.Text(), nullable=False, comment='Identifier indicating original FERC Form 1 source record. format: {table_name}_{report_year}_{report_prd}_{respondent_id}_{spplmnt_num}_{row_number}. Unique within FERC Form 1 DB tables which are not row-mapped.'), - sa.ForeignKeyConstraint(['plant_id_pudl'], ['plants_pudl.plant_id_pudl'], ), - sa.ForeignKeyConstraint(['utility_id_ferc1', 'plant_name_ferc1'], ['plants_ferc1.utility_id_ferc1', 'plants_ferc1.plant_name_ferc1'], ), - sa.ForeignKeyConstraint(['utility_id_pudl'], ['utilities_pudl.utility_id_pudl'], ), - sa.PrimaryKeyConstraint('record_id') + sa.ForeignKeyConstraint(['plant_id_pudl'], ['plants_pudl.plant_id_pudl'], name=op.f('fk_denorm_fuel_ferc1_plant_id_pudl_plants_pudl')), + sa.ForeignKeyConstraint(['utility_id_ferc1', 'plant_name_ferc1'], ['plants_ferc1.utility_id_ferc1', 'plants_ferc1.plant_name_ferc1'], name=op.f('fk_denorm_fuel_ferc1_utility_id_ferc1_plants_ferc1')), + sa.ForeignKeyConstraint(['utility_id_pudl'], ['utilities_pudl.utility_id_pudl'], name=op.f('fk_denorm_fuel_ferc1_utility_id_pudl_utilities_pudl')), + sa.PrimaryKeyConstraint('record_id', name=op.f('pk_denorm_fuel_ferc1')) ) op.create_table('denorm_fuel_receipts_costs_yearly_eia923', sa.Column('report_date', sa.Date(), nullable=True, comment='Date reported.'), @@ -2057,10 +2263,10 @@ def upgrade() -> None: sa.Column('mercury_content_ppm', sa.Float(), nullable=True, comment='Mercury content in parts per million (ppm) to the nearest 0.001 ppm.'), sa.Column('moisture_content_pct', sa.Float(), nullable=True), sa.Column('chlorine_content_ppm', sa.Float(), nullable=True), - sa.ForeignKeyConstraint(['plant_id_eia', 'report_date'], ['plants_eia860.plant_id_eia', 'plants_eia860.report_date'], ), - sa.ForeignKeyConstraint(['plant_id_pudl'], ['plants_pudl.plant_id_pudl'], ), - sa.ForeignKeyConstraint(['utility_id_eia', 'report_date'], ['utilities_eia860.utility_id_eia', 'utilities_eia860.report_date'], ), - sa.ForeignKeyConstraint(['utility_id_pudl'], ['utilities_pudl.utility_id_pudl'], ) + sa.ForeignKeyConstraint(['plant_id_eia', 'report_date'], ['plants_eia860.plant_id_eia', 'plants_eia860.report_date'], name=op.f('fk_denorm_fuel_receipts_costs_yearly_eia923_plant_id_eia_plants_eia860')), + sa.ForeignKeyConstraint(['plant_id_pudl'], ['plants_pudl.plant_id_pudl'], name=op.f('fk_denorm_fuel_receipts_costs_yearly_eia923_plant_id_pudl_plants_pudl')), + sa.ForeignKeyConstraint(['utility_id_eia', 'report_date'], ['utilities_eia860.utility_id_eia', 'utilities_eia860.report_date'], name=op.f('fk_denorm_fuel_receipts_costs_yearly_eia923_utility_id_eia_utilities_eia860')), + sa.ForeignKeyConstraint(['utility_id_pudl'], ['utilities_pudl.utility_id_pudl'], name=op.f('fk_denorm_fuel_receipts_costs_yearly_eia923_utility_id_pudl_utilities_pudl')) ) op.create_table('denorm_generation_fuel_combined_yearly_eia923', sa.Column('report_date', sa.Date(), nullable=False, comment='Date reported.'), @@ -2070,7 +2276,7 @@ def upgrade() -> None: sa.Column('utility_id_eia', sa.Integer(), nullable=True, comment='The EIA Utility Identification number.'), sa.Column('utility_id_pudl', sa.Integer(), nullable=True, comment='A manually assigned PUDL utility ID. May not be stable over time.'), sa.Column('utility_name_eia', sa.Text(), nullable=True, comment='The name of the utility.'), - sa.Column('energy_source_code', sa.Text(), nullable=False, comment='The fuel code associated with the fuel receipt. Two or three character alphanumeric.'), + sa.Column('energy_source_code', sa.Text(), nullable=False, comment='A 2-3 letter code indicating the energy source (e.g. fuel type) associated with the record.'), sa.Column('fuel_type_code_pudl', sa.Enum('coal', 'gas', 'hydro', 'nuclear', 'oil', 'other', 'solar', 'waste', 'wind'), nullable=True, comment='Simplified fuel type code used in PUDL'), sa.Column('prime_mover_code', sa.Text(), nullable=False, comment='Code for the type of prime mover (e.g. CT, CG)'), sa.Column('fuel_consumed_units', sa.Float(), nullable=True, comment='Consumption of the fuel type in physical unit. Note: this is the total quantity consumed for both electricity and, in the case of combined heat and power plants, process steam production.'), @@ -2079,13 +2285,13 @@ def upgrade() -> None: sa.Column('fuel_consumed_mmbtu', sa.Float(), nullable=True, comment='Total consumption of fuel in physical unit, year to date. Note: this is the total quantity consumed for both electricity and, in the case of combined heat and power plants, process steam production.'), sa.Column('fuel_consumed_for_electricity_mmbtu', sa.Float(), nullable=True, comment='Total consumption of fuel to produce electricity, in physical unit, year to date.'), sa.Column('net_generation_mwh', sa.Float(), nullable=True, comment='Net electricity generation for the specified period in megawatt-hours (MWh).'), - sa.ForeignKeyConstraint(['energy_source_code'], ['energy_sources_eia.code'], ), - sa.ForeignKeyConstraint(['plant_id_eia', 'report_date'], ['plants_eia860.plant_id_eia', 'plants_eia860.report_date'], ), - sa.ForeignKeyConstraint(['plant_id_pudl'], ['plants_pudl.plant_id_pudl'], ), - sa.ForeignKeyConstraint(['prime_mover_code'], ['prime_movers_eia.code'], ), - sa.ForeignKeyConstraint(['utility_id_eia', 'report_date'], ['utilities_eia860.utility_id_eia', 'utilities_eia860.report_date'], ), - sa.ForeignKeyConstraint(['utility_id_pudl'], ['utilities_pudl.utility_id_pudl'], ), - sa.PrimaryKeyConstraint('plant_id_eia', 'report_date', 'prime_mover_code', 'energy_source_code') + sa.ForeignKeyConstraint(['energy_source_code'], ['energy_sources_eia.code'], name=op.f('fk_denorm_generation_fuel_combined_yearly_eia923_energy_source_code_energy_sources_eia')), + sa.ForeignKeyConstraint(['plant_id_eia', 'report_date'], ['plants_eia860.plant_id_eia', 'plants_eia860.report_date'], name=op.f('fk_denorm_generation_fuel_combined_yearly_eia923_plant_id_eia_plants_eia860')), + sa.ForeignKeyConstraint(['plant_id_pudl'], ['plants_pudl.plant_id_pudl'], name=op.f('fk_denorm_generation_fuel_combined_yearly_eia923_plant_id_pudl_plants_pudl')), + sa.ForeignKeyConstraint(['prime_mover_code'], ['prime_movers_eia.code'], name=op.f('fk_denorm_generation_fuel_combined_yearly_eia923_prime_mover_code_prime_movers_eia')), + sa.ForeignKeyConstraint(['utility_id_eia', 'report_date'], ['utilities_eia860.utility_id_eia', 'utilities_eia860.report_date'], name=op.f('fk_denorm_generation_fuel_combined_yearly_eia923_utility_id_eia_utilities_eia860')), + sa.ForeignKeyConstraint(['utility_id_pudl'], ['utilities_pudl.utility_id_pudl'], name=op.f('fk_denorm_generation_fuel_combined_yearly_eia923_utility_id_pudl_utilities_pudl')), + sa.PrimaryKeyConstraint('plant_id_eia', 'report_date', 'prime_mover_code', 'energy_source_code', name=op.f('pk_denorm_generation_fuel_combined_yearly_eia923')) ) op.create_table('denorm_plants_all_ferc1', sa.Column('report_year', sa.Integer(), nullable=True, comment='Four-digit year in which the data was reported.'), @@ -2166,10 +2372,10 @@ def upgrade() -> None: sa.Column('opex_production_before_pumping', sa.Float(), nullable=True, comment='Total production expenses before pumping (USD).'), sa.Column('opex_pumped_storage', sa.Float(), nullable=True, comment='Production expenses: pumped storage (USD).'), sa.Column('opex_pumping', sa.Float(), nullable=True, comment='Production expenses: We are here to PUMP YOU UP! (USD).'), - sa.ForeignKeyConstraint(['plant_id_pudl'], ['plants_pudl.plant_id_pudl'], ), - sa.ForeignKeyConstraint(['utility_id_ferc1', 'plant_name_ferc1'], ['plants_ferc1.utility_id_ferc1', 'plants_ferc1.plant_name_ferc1'], ), - sa.ForeignKeyConstraint(['utility_id_pudl'], ['utilities_pudl.utility_id_pudl'], ), - sa.PrimaryKeyConstraint('record_id') + sa.ForeignKeyConstraint(['plant_id_pudl'], ['plants_pudl.plant_id_pudl'], name=op.f('fk_denorm_plants_all_ferc1_plant_id_pudl_plants_pudl')), + sa.ForeignKeyConstraint(['utility_id_ferc1', 'plant_name_ferc1'], ['plants_ferc1.utility_id_ferc1', 'plants_ferc1.plant_name_ferc1'], name=op.f('fk_denorm_plants_all_ferc1_utility_id_ferc1_plants_ferc1')), + sa.ForeignKeyConstraint(['utility_id_pudl'], ['utilities_pudl.utility_id_pudl'], name=op.f('fk_denorm_plants_all_ferc1_utility_id_pudl_utilities_pudl')), + sa.PrimaryKeyConstraint('record_id', name=op.f('pk_denorm_plants_all_ferc1')) ) op.create_table('denorm_plants_eia', sa.Column('plant_id_eia', sa.Integer(), nullable=False, comment='The unique six-digit facility identification number, also called an ORISPL, assigned by the Energy Information Administration.'), @@ -2226,15 +2432,15 @@ def upgrade() -> None: sa.Column('utility_name_eia', sa.Text(), nullable=True, comment='The name of the utility.'), sa.Column('utility_id_pudl', sa.Integer(), nullable=True, comment='A manually assigned PUDL utility ID. May not be stable over time.'), sa.Column('balancing_authority_code_eia_consistent_rate', sa.Float(), nullable=True, comment='Percentage consistency of balancing authority code across entity records.'), - sa.ForeignKeyConstraint(['balancing_authority_code_eia'], ['balancing_authorities_eia.code'], ), - sa.ForeignKeyConstraint(['data_maturity'], ['data_maturities.code'], ), - sa.ForeignKeyConstraint(['plant_id_eia', 'report_date'], ['plants_eia860.plant_id_eia', 'plants_eia860.report_date'], ), - sa.ForeignKeyConstraint(['plant_id_pudl'], ['plants_pudl.plant_id_pudl'], ), - sa.ForeignKeyConstraint(['reporting_frequency_code'], ['reporting_frequencies_eia.code'], ), - sa.ForeignKeyConstraint(['sector_id_eia'], ['sector_consolidated_eia.code'], ), - sa.ForeignKeyConstraint(['utility_id_eia', 'report_date'], ['utilities_eia860.utility_id_eia', 'utilities_eia860.report_date'], ), - sa.ForeignKeyConstraint(['utility_id_pudl'], ['utilities_pudl.utility_id_pudl'], ), - sa.PrimaryKeyConstraint('plant_id_eia', 'report_date') + sa.ForeignKeyConstraint(['balancing_authority_code_eia'], ['balancing_authorities_eia.code'], name=op.f('fk_denorm_plants_eia_balancing_authority_code_eia_balancing_authorities_eia')), + sa.ForeignKeyConstraint(['data_maturity'], ['data_maturities.code'], name=op.f('fk_denorm_plants_eia_data_maturity_data_maturities')), + sa.ForeignKeyConstraint(['plant_id_eia', 'report_date'], ['plants_eia860.plant_id_eia', 'plants_eia860.report_date'], name=op.f('fk_denorm_plants_eia_plant_id_eia_plants_eia860')), + sa.ForeignKeyConstraint(['plant_id_pudl'], ['plants_pudl.plant_id_pudl'], name=op.f('fk_denorm_plants_eia_plant_id_pudl_plants_pudl')), + sa.ForeignKeyConstraint(['reporting_frequency_code'], ['reporting_frequencies_eia.code'], name=op.f('fk_denorm_plants_eia_reporting_frequency_code_reporting_frequencies_eia')), + sa.ForeignKeyConstraint(['sector_id_eia'], ['sector_consolidated_eia.code'], name=op.f('fk_denorm_plants_eia_sector_id_eia_sector_consolidated_eia')), + sa.ForeignKeyConstraint(['utility_id_eia', 'report_date'], ['utilities_eia860.utility_id_eia', 'utilities_eia860.report_date'], name=op.f('fk_denorm_plants_eia_utility_id_eia_utilities_eia860')), + sa.ForeignKeyConstraint(['utility_id_pudl'], ['utilities_pudl.utility_id_pudl'], name=op.f('fk_denorm_plants_eia_utility_id_pudl_utilities_pudl')), + sa.PrimaryKeyConstraint('plant_id_eia', 'report_date', name=op.f('pk_denorm_plants_eia')) ) op.create_table('denorm_plants_hydro_ferc1', sa.Column('report_year', sa.Integer(), nullable=True, comment='Four-digit year in which the data was reported.'), @@ -2279,10 +2485,10 @@ def upgrade() -> None: sa.Column('plant_id_pudl', sa.Integer(), nullable=True, comment='A manually assigned PUDL plant ID. May not be constant over time.'), sa.Column('plant_type', sa.Text(), nullable=True), sa.Column('project_num', sa.Integer(), nullable=True, comment='FERC Licensed Project Number.'), - sa.ForeignKeyConstraint(['plant_id_pudl'], ['plants_pudl.plant_id_pudl'], ), - sa.ForeignKeyConstraint(['utility_id_ferc1', 'plant_name_ferc1'], ['plants_ferc1.utility_id_ferc1', 'plants_ferc1.plant_name_ferc1'], ), - sa.ForeignKeyConstraint(['utility_id_pudl'], ['utilities_pudl.utility_id_pudl'], ), - sa.PrimaryKeyConstraint('record_id') + sa.ForeignKeyConstraint(['plant_id_pudl'], ['plants_pudl.plant_id_pudl'], name=op.f('fk_denorm_plants_hydro_ferc1_plant_id_pudl_plants_pudl')), + sa.ForeignKeyConstraint(['utility_id_ferc1', 'plant_name_ferc1'], ['plants_ferc1.utility_id_ferc1', 'plants_ferc1.plant_name_ferc1'], name=op.f('fk_denorm_plants_hydro_ferc1_utility_id_ferc1_plants_ferc1')), + sa.ForeignKeyConstraint(['utility_id_pudl'], ['utilities_pudl.utility_id_pudl'], name=op.f('fk_denorm_plants_hydro_ferc1_utility_id_pudl_utilities_pudl')), + sa.PrimaryKeyConstraint('record_id', name=op.f('pk_denorm_plants_hydro_ferc1')) ) op.create_table('denorm_plants_pumped_storage_ferc1', sa.Column('report_year', sa.Integer(), nullable=True, comment='Four-digit year in which the data was reported.'), @@ -2331,10 +2537,10 @@ def upgrade() -> None: sa.Column('plant_hours_connected_while_generating', sa.Float(), nullable=True, comment='Hours the plant was connected to load while generating in the report year.'), sa.Column('plant_id_pudl', sa.Integer(), nullable=True, comment='A manually assigned PUDL plant ID. May not be constant over time.'), sa.Column('project_num', sa.Integer(), nullable=True, comment='FERC Licensed Project Number.'), - sa.ForeignKeyConstraint(['plant_id_pudl'], ['plants_pudl.plant_id_pudl'], ), - sa.ForeignKeyConstraint(['utility_id_ferc1', 'plant_name_ferc1'], ['plants_ferc1.utility_id_ferc1', 'plants_ferc1.plant_name_ferc1'], ), - sa.ForeignKeyConstraint(['utility_id_pudl'], ['utilities_pudl.utility_id_pudl'], ), - sa.PrimaryKeyConstraint('record_id') + sa.ForeignKeyConstraint(['plant_id_pudl'], ['plants_pudl.plant_id_pudl'], name=op.f('fk_denorm_plants_pumped_storage_ferc1_plant_id_pudl_plants_pudl')), + sa.ForeignKeyConstraint(['utility_id_ferc1', 'plant_name_ferc1'], ['plants_ferc1.utility_id_ferc1', 'plants_ferc1.plant_name_ferc1'], name=op.f('fk_denorm_plants_pumped_storage_ferc1_utility_id_ferc1_plants_ferc1')), + sa.ForeignKeyConstraint(['utility_id_pudl'], ['utilities_pudl.utility_id_pudl'], name=op.f('fk_denorm_plants_pumped_storage_ferc1_utility_id_pudl_utilities_pudl')), + sa.PrimaryKeyConstraint('record_id', name=op.f('pk_denorm_plants_pumped_storage_ferc1')) ) op.create_table('denorm_plants_small_ferc1', sa.Column('report_year', sa.Integer(), nullable=True, comment='Four-digit year in which the data was reported.'), @@ -2359,10 +2565,10 @@ def upgrade() -> None: sa.Column('opex_total_nonfuel', sa.Float(), nullable=True, comment='Total production expenses, excluding fuel (USD).'), sa.Column('peak_demand_mw', sa.Float(), nullable=True, comment='Net peak demand for 60 minutes. Note: in some cases peak demand for other time periods may have been reported instead, if hourly peak demand was unavailable.'), sa.Column('plant_type', sa.Text(), nullable=True), - sa.ForeignKeyConstraint(['plant_id_pudl'], ['plants_pudl.plant_id_pudl'], ), - sa.ForeignKeyConstraint(['utility_id_ferc1', 'plant_name_ferc1'], ['plants_ferc1.utility_id_ferc1', 'plants_ferc1.plant_name_ferc1'], ), - sa.ForeignKeyConstraint(['utility_id_pudl'], ['utilities_pudl.utility_id_pudl'], ), - sa.PrimaryKeyConstraint('record_id') + sa.ForeignKeyConstraint(['plant_id_pudl'], ['plants_pudl.plant_id_pudl'], name=op.f('fk_denorm_plants_small_ferc1_plant_id_pudl_plants_pudl')), + sa.ForeignKeyConstraint(['utility_id_ferc1', 'plant_name_ferc1'], ['plants_ferc1.utility_id_ferc1', 'plants_ferc1.plant_name_ferc1'], name=op.f('fk_denorm_plants_small_ferc1_utility_id_ferc1_plants_ferc1')), + sa.ForeignKeyConstraint(['utility_id_pudl'], ['utilities_pudl.utility_id_pudl'], name=op.f('fk_denorm_plants_small_ferc1_utility_id_pudl_utilities_pudl')), + sa.PrimaryKeyConstraint('record_id', name=op.f('pk_denorm_plants_small_ferc1')) ) op.create_table('denorm_plants_steam_ferc1', sa.Column('report_year', sa.Integer(), nullable=True, comment='Four-digit year in which the data was reported.'), @@ -2420,10 +2626,10 @@ def upgrade() -> None: sa.Column('plant_type', sa.Text(), nullable=True), sa.Column('record_id', sa.Text(), nullable=False, comment='Identifier indicating original FERC Form 1 source record. format: {table_name}_{report_year}_{report_prd}_{respondent_id}_{spplmnt_num}_{row_number}. Unique within FERC Form 1 DB tables which are not row-mapped.'), sa.Column('water_limited_capacity_mw', sa.Float(), nullable=True, comment='Plant capacity in MW when limited by condenser water.'), - sa.ForeignKeyConstraint(['plant_id_pudl'], ['plants_pudl.plant_id_pudl'], ), - sa.ForeignKeyConstraint(['utility_id_ferc1', 'plant_name_ferc1'], ['plants_ferc1.utility_id_ferc1', 'plants_ferc1.plant_name_ferc1'], ), - sa.ForeignKeyConstraint(['utility_id_pudl'], ['utilities_pudl.utility_id_pudl'], ), - sa.PrimaryKeyConstraint('record_id') + sa.ForeignKeyConstraint(['plant_id_pudl'], ['plants_pudl.plant_id_pudl'], name=op.f('fk_denorm_plants_steam_ferc1_plant_id_pudl_plants_pudl')), + sa.ForeignKeyConstraint(['utility_id_ferc1', 'plant_name_ferc1'], ['plants_ferc1.utility_id_ferc1', 'plants_ferc1.plant_name_ferc1'], name=op.f('fk_denorm_plants_steam_ferc1_utility_id_ferc1_plants_ferc1')), + sa.ForeignKeyConstraint(['utility_id_pudl'], ['utilities_pudl.utility_id_pudl'], name=op.f('fk_denorm_plants_steam_ferc1_utility_id_pudl_utilities_pudl')), + sa.PrimaryKeyConstraint('record_id', name=op.f('pk_denorm_plants_steam_ferc1')) ) op.create_table('denorm_plants_utilities_eia', sa.Column('report_date', sa.Date(), nullable=False, comment='Date reported.'), @@ -2433,11 +2639,11 @@ def upgrade() -> None: sa.Column('utility_id_eia', sa.Integer(), nullable=False, comment='The EIA Utility Identification number.'), sa.Column('utility_name_eia', sa.Text(), nullable=True, comment='The name of the utility.'), sa.Column('utility_id_pudl', sa.Integer(), nullable=True, comment='A manually assigned PUDL utility ID. May not be stable over time.'), - sa.ForeignKeyConstraint(['plant_id_eia', 'report_date'], ['plants_eia860.plant_id_eia', 'plants_eia860.report_date'], ), - sa.ForeignKeyConstraint(['plant_id_pudl'], ['plants_pudl.plant_id_pudl'], ), - sa.ForeignKeyConstraint(['utility_id_eia', 'report_date'], ['utilities_eia860.utility_id_eia', 'utilities_eia860.report_date'], ), - sa.ForeignKeyConstraint(['utility_id_pudl'], ['utilities_pudl.utility_id_pudl'], ), - sa.PrimaryKeyConstraint('report_date', 'plant_id_eia', 'utility_id_eia') + sa.ForeignKeyConstraint(['plant_id_eia', 'report_date'], ['plants_eia860.plant_id_eia', 'plants_eia860.report_date'], name=op.f('fk_denorm_plants_utilities_eia_plant_id_eia_plants_eia860')), + sa.ForeignKeyConstraint(['plant_id_pudl'], ['plants_pudl.plant_id_pudl'], name=op.f('fk_denorm_plants_utilities_eia_plant_id_pudl_plants_pudl')), + sa.ForeignKeyConstraint(['utility_id_eia', 'report_date'], ['utilities_eia860.utility_id_eia', 'utilities_eia860.report_date'], name=op.f('fk_denorm_plants_utilities_eia_utility_id_eia_utilities_eia860')), + sa.ForeignKeyConstraint(['utility_id_pudl'], ['utilities_pudl.utility_id_pudl'], name=op.f('fk_denorm_plants_utilities_eia_utility_id_pudl_utilities_pudl')), + sa.PrimaryKeyConstraint('report_date', 'plant_id_eia', 'utility_id_eia', name=op.f('pk_denorm_plants_utilities_eia')) ) op.create_table('denorm_plants_utilities_ferc1', sa.Column('utility_id_ferc1', sa.Integer(), nullable=False, comment='PUDL-assigned utility ID, identifying a FERC1 utility. This is an auto-incremented ID and is not expected to be stable from year to year.'), @@ -2445,10 +2651,10 @@ def upgrade() -> None: sa.Column('plant_id_pudl', sa.Integer(), nullable=True, comment='A manually assigned PUDL plant ID. May not be constant over time.'), sa.Column('utility_name_ferc1', sa.Text(), nullable=True, comment='Name of the responding utility, as it is reported in FERC Form 1. For human readability only.'), sa.Column('utility_id_pudl', sa.Integer(), nullable=True, comment='A manually assigned PUDL utility ID. May not be stable over time.'), - sa.ForeignKeyConstraint(['plant_id_pudl'], ['plants_pudl.plant_id_pudl'], ), - sa.ForeignKeyConstraint(['utility_id_ferc1', 'plant_name_ferc1'], ['plants_ferc1.utility_id_ferc1', 'plants_ferc1.plant_name_ferc1'], ), - sa.ForeignKeyConstraint(['utility_id_pudl'], ['utilities_pudl.utility_id_pudl'], ), - sa.PrimaryKeyConstraint('utility_id_ferc1', 'plant_name_ferc1') + sa.ForeignKeyConstraint(['plant_id_pudl'], ['plants_pudl.plant_id_pudl'], name=op.f('fk_denorm_plants_utilities_ferc1_plant_id_pudl_plants_pudl')), + sa.ForeignKeyConstraint(['utility_id_ferc1', 'plant_name_ferc1'], ['plants_ferc1.utility_id_ferc1', 'plants_ferc1.plant_name_ferc1'], name=op.f('fk_denorm_plants_utilities_ferc1_utility_id_ferc1_plants_ferc1')), + sa.ForeignKeyConstraint(['utility_id_pudl'], ['utilities_pudl.utility_id_pudl'], name=op.f('fk_denorm_plants_utilities_ferc1_utility_id_pudl_utilities_pudl')), + sa.PrimaryKeyConstraint('utility_id_ferc1', 'plant_name_ferc1', name=op.f('pk_denorm_plants_utilities_ferc1')) ) op.create_table('fuel_ferc1', sa.Column('record_id', sa.Text(), nullable=True, comment='Identifier indicating original FERC Form 1 source record. format: {table_name}_{report_year}_{report_prd}_{respondent_id}_{spplmnt_num}_{row_number}. Unique within FERC Form 1 DB tables which are not row-mapped.'), @@ -2462,7 +2668,7 @@ def upgrade() -> None: sa.Column('fuel_cost_per_unit_burned', sa.Float(), nullable=True, comment='Average cost of fuel consumed in the report year per reported fuel unit (USD).'), sa.Column('fuel_cost_per_unit_delivered', sa.Float(), nullable=True, comment='Average cost of fuel delivered in the report year per reported fuel unit (USD).'), sa.Column('fuel_cost_per_mmbtu', sa.Float(), nullable=True, comment='Average fuel cost per mmBTU of heat content in nominal USD.'), - sa.ForeignKeyConstraint(['utility_id_ferc1', 'plant_name_ferc1'], ['plants_ferc1.utility_id_ferc1', 'plants_ferc1.plant_name_ferc1'], ) + sa.ForeignKeyConstraint(['utility_id_ferc1', 'plant_name_ferc1'], ['plants_ferc1.utility_id_ferc1', 'plants_ferc1.plant_name_ferc1'], name=op.f('fk_fuel_ferc1_utility_id_ferc1_plants_ferc1')) ) op.create_table('generators_eia860', sa.Column('plant_id_eia', sa.Integer(), nullable=False, comment='The unique six-digit facility identification number, also called an ORISPL, assigned by the Energy Information Administration.'), @@ -2534,31 +2740,41 @@ def upgrade() -> None: sa.Column('reactive_power_output_mvar', sa.Float(), nullable=True, comment='Reactive Power Output (MVAr)'), sa.Column('ferc_qualifying_facility', sa.Boolean(), nullable=True, comment='Indicatates whether or not a generator is a qualifying FERC cogeneation facility.'), sa.Column('data_maturity', sa.Text(), nullable=True, comment='Level of maturity of the data record. Some data sources report less-than-final data. PUDL sometimes includes this data, but use at your own risk.'), - sa.ForeignKeyConstraint(['data_maturity'], ['data_maturities.code'], ), - sa.ForeignKeyConstraint(['energy_source_1_transport_1'], ['fuel_transportation_modes_eia.code'], ), - sa.ForeignKeyConstraint(['energy_source_1_transport_2'], ['fuel_transportation_modes_eia.code'], ), - sa.ForeignKeyConstraint(['energy_source_1_transport_3'], ['fuel_transportation_modes_eia.code'], ), - sa.ForeignKeyConstraint(['energy_source_2_transport_1'], ['fuel_transportation_modes_eia.code'], ), - sa.ForeignKeyConstraint(['energy_source_2_transport_2'], ['fuel_transportation_modes_eia.code'], ), - sa.ForeignKeyConstraint(['energy_source_2_transport_3'], ['fuel_transportation_modes_eia.code'], ), - sa.ForeignKeyConstraint(['energy_source_code_1'], ['energy_sources_eia.code'], ), - sa.ForeignKeyConstraint(['energy_source_code_2'], ['energy_sources_eia.code'], ), - sa.ForeignKeyConstraint(['energy_source_code_3'], ['energy_sources_eia.code'], ), - sa.ForeignKeyConstraint(['energy_source_code_4'], ['energy_sources_eia.code'], ), - sa.ForeignKeyConstraint(['energy_source_code_5'], ['energy_sources_eia.code'], ), - sa.ForeignKeyConstraint(['energy_source_code_6'], ['energy_sources_eia.code'], ), - sa.ForeignKeyConstraint(['operational_status_code'], ['operational_status_eia.code'], ), - sa.ForeignKeyConstraint(['planned_energy_source_code_1'], ['energy_sources_eia.code'], ), - sa.ForeignKeyConstraint(['planned_new_prime_mover_code'], ['prime_movers_eia.code'], ), - sa.ForeignKeyConstraint(['plant_id_eia', 'generator_id'], ['generators_entity_eia.plant_id_eia', 'generators_entity_eia.generator_id'], ), - sa.ForeignKeyConstraint(['plant_id_eia', 'report_date'], ['plants_eia860.plant_id_eia', 'plants_eia860.report_date'], ), - sa.ForeignKeyConstraint(['prime_mover_code'], ['prime_movers_eia.code'], ), - sa.ForeignKeyConstraint(['startup_source_code_1'], ['energy_sources_eia.code'], ), - sa.ForeignKeyConstraint(['startup_source_code_2'], ['energy_sources_eia.code'], ), - sa.ForeignKeyConstraint(['startup_source_code_3'], ['energy_sources_eia.code'], ), - sa.ForeignKeyConstraint(['startup_source_code_4'], ['energy_sources_eia.code'], ), - sa.ForeignKeyConstraint(['utility_id_eia', 'report_date'], ['utilities_eia860.utility_id_eia', 'utilities_eia860.report_date'], ), - sa.PrimaryKeyConstraint('plant_id_eia', 'generator_id', 'report_date') + sa.ForeignKeyConstraint(['data_maturity'], ['data_maturities.code'], name=op.f('fk_generators_eia860_data_maturity_data_maturities')), + sa.ForeignKeyConstraint(['energy_source_1_transport_1'], ['fuel_transportation_modes_eia.code'], name=op.f('fk_generators_eia860_energy_source_1_transport_1_fuel_transportation_modes_eia')), + sa.ForeignKeyConstraint(['energy_source_1_transport_2'], ['fuel_transportation_modes_eia.code'], name=op.f('fk_generators_eia860_energy_source_1_transport_2_fuel_transportation_modes_eia')), + sa.ForeignKeyConstraint(['energy_source_1_transport_3'], ['fuel_transportation_modes_eia.code'], name=op.f('fk_generators_eia860_energy_source_1_transport_3_fuel_transportation_modes_eia')), + sa.ForeignKeyConstraint(['energy_source_2_transport_1'], ['fuel_transportation_modes_eia.code'], name=op.f('fk_generators_eia860_energy_source_2_transport_1_fuel_transportation_modes_eia')), + sa.ForeignKeyConstraint(['energy_source_2_transport_2'], ['fuel_transportation_modes_eia.code'], name=op.f('fk_generators_eia860_energy_source_2_transport_2_fuel_transportation_modes_eia')), + sa.ForeignKeyConstraint(['energy_source_2_transport_3'], ['fuel_transportation_modes_eia.code'], name=op.f('fk_generators_eia860_energy_source_2_transport_3_fuel_transportation_modes_eia')), + sa.ForeignKeyConstraint(['energy_source_code_1'], ['energy_sources_eia.code'], name=op.f('fk_generators_eia860_energy_source_code_1_energy_sources_eia')), + sa.ForeignKeyConstraint(['energy_source_code_2'], ['energy_sources_eia.code'], name=op.f('fk_generators_eia860_energy_source_code_2_energy_sources_eia')), + sa.ForeignKeyConstraint(['energy_source_code_3'], ['energy_sources_eia.code'], name=op.f('fk_generators_eia860_energy_source_code_3_energy_sources_eia')), + sa.ForeignKeyConstraint(['energy_source_code_4'], ['energy_sources_eia.code'], name=op.f('fk_generators_eia860_energy_source_code_4_energy_sources_eia')), + sa.ForeignKeyConstraint(['energy_source_code_5'], ['energy_sources_eia.code'], name=op.f('fk_generators_eia860_energy_source_code_5_energy_sources_eia')), + sa.ForeignKeyConstraint(['energy_source_code_6'], ['energy_sources_eia.code'], name=op.f('fk_generators_eia860_energy_source_code_6_energy_sources_eia')), + sa.ForeignKeyConstraint(['operational_status_code'], ['operational_status_eia.code'], name=op.f('fk_generators_eia860_operational_status_code_operational_status_eia')), + sa.ForeignKeyConstraint(['planned_energy_source_code_1'], ['energy_sources_eia.code'], name=op.f('fk_generators_eia860_planned_energy_source_code_1_energy_sources_eia')), + sa.ForeignKeyConstraint(['planned_new_prime_mover_code'], ['prime_movers_eia.code'], name=op.f('fk_generators_eia860_planned_new_prime_mover_code_prime_movers_eia')), + sa.ForeignKeyConstraint(['plant_id_eia', 'generator_id'], ['generators_entity_eia.plant_id_eia', 'generators_entity_eia.generator_id'], name=op.f('fk_generators_eia860_plant_id_eia_generators_entity_eia')), + sa.ForeignKeyConstraint(['plant_id_eia', 'report_date'], ['plants_eia860.plant_id_eia', 'plants_eia860.report_date'], name=op.f('fk_generators_eia860_plant_id_eia_plants_eia860')), + sa.ForeignKeyConstraint(['prime_mover_code'], ['prime_movers_eia.code'], name=op.f('fk_generators_eia860_prime_mover_code_prime_movers_eia')), + sa.ForeignKeyConstraint(['startup_source_code_1'], ['energy_sources_eia.code'], name=op.f('fk_generators_eia860_startup_source_code_1_energy_sources_eia')), + sa.ForeignKeyConstraint(['startup_source_code_2'], ['energy_sources_eia.code'], name=op.f('fk_generators_eia860_startup_source_code_2_energy_sources_eia')), + sa.ForeignKeyConstraint(['startup_source_code_3'], ['energy_sources_eia.code'], name=op.f('fk_generators_eia860_startup_source_code_3_energy_sources_eia')), + sa.ForeignKeyConstraint(['startup_source_code_4'], ['energy_sources_eia.code'], name=op.f('fk_generators_eia860_startup_source_code_4_energy_sources_eia')), + sa.ForeignKeyConstraint(['utility_id_eia', 'report_date'], ['utilities_eia860.utility_id_eia', 'utilities_eia860.report_date'], name=op.f('fk_generators_eia860_utility_id_eia_utilities_eia860')), + sa.PrimaryKeyConstraint('plant_id_eia', 'generator_id', 'report_date', name=op.f('pk_generators_eia860')) + ) + op.create_table('heat_rate_by_unit_yearly', + sa.Column('report_date', sa.Date(), nullable=False, comment='Date reported.'), + sa.Column('plant_id_eia', sa.Integer(), nullable=False, comment='The unique six-digit facility identification number, also called an ORISPL, assigned by the Energy Information Administration.'), + sa.Column('unit_id_pudl', sa.Integer(), nullable=False, comment='Dynamically assigned PUDL unit id. WARNING: This ID is not guaranteed to be static long term as the input data and algorithm may evolve over time.'), + sa.Column('net_generation_mwh', sa.Float(), nullable=True, comment='Net electricity generation for the specified period in megawatt-hours (MWh).'), + sa.Column('fuel_consumed_for_electricity_mmbtu', sa.Float(), nullable=True, comment='Total consumption of fuel to produce electricity, in physical unit, year to date.'), + sa.Column('heat_rate_mmbtu_mwh', sa.Float(), nullable=True, comment='Fuel content per unit of electricity generated. Coming from MCOE calculation.'), + sa.ForeignKeyConstraint(['plant_id_eia', 'report_date'], ['plants_eia860.plant_id_eia', 'plants_eia860.report_date'], name=op.f('fk_heat_rate_by_unit_yearly_plant_id_eia_plants_eia860')), + sa.PrimaryKeyConstraint('report_date', 'plant_id_eia', 'unit_id_pudl', name=op.f('pk_heat_rate_by_unit_yearly')) ) op.create_table('plants_hydro_ferc1', sa.Column('record_id', sa.Text(), nullable=True, comment='Identifier indicating original FERC Form 1 source record. format: {table_name}_{report_year}_{report_prd}_{respondent_id}_{spplmnt_num}_{row_number}. Unique within FERC Form 1 DB tables which are not row-mapped.'), @@ -2566,7 +2782,7 @@ def upgrade() -> None: sa.Column('report_year', sa.Integer(), nullable=True, comment='Four-digit year in which the data was reported.'), sa.Column('plant_name_ferc1', sa.Text(), nullable=True, comment='Name of the plant, as reported to FERC. This is a freeform string, not guaranteed to be consistent across references to the same plant.'), sa.Column('project_num', sa.Integer(), nullable=True, comment='FERC Licensed Project Number.'), - sa.Column('plant_type', sa.Enum('hydro', 'run_of_river', 'run_of_river_with_storage', 'storage', 'na_category'), nullable=True), + sa.Column('plant_type', sa.Enum('storage', 'run_of_river', 'run_of_river_with_storage', 'na_category', 'hydro'), nullable=True), sa.Column('construction_type', sa.Enum('conventional', 'outdoor', 'semioutdoor'), nullable=True, comment="Type of plant construction ('outdoor', 'semioutdoor', or 'conventional'). Categorized by PUDL based on our best guess of intended value in FERC1 freeform strings."), sa.Column('construction_year', sa.Integer(), nullable=True, comment="Year the plant's oldest still operational unit was built."), sa.Column('installation_year', sa.Integer(), nullable=True, comment="Year the plant's most recently built unit was installed."), @@ -2598,7 +2814,7 @@ def upgrade() -> None: sa.Column('opex_misc_plant', sa.Float(), nullable=True, comment='Production expenses: maintenance of miscellaneous hydraulic plant (USD).'), sa.Column('opex_total', sa.Float(), nullable=True, comment='Total production expenses, excluding fuel (USD).'), sa.Column('opex_per_mwh', sa.Float(), nullable=True, comment='Total production expenses (USD per MWh generated).'), - sa.ForeignKeyConstraint(['utility_id_ferc1', 'plant_name_ferc1'], ['plants_ferc1.utility_id_ferc1', 'plants_ferc1.plant_name_ferc1'], ) + sa.ForeignKeyConstraint(['utility_id_ferc1', 'plant_name_ferc1'], ['plants_ferc1.utility_id_ferc1', 'plants_ferc1.plant_name_ferc1'], name=op.f('fk_plants_hydro_ferc1_utility_id_ferc1_plants_ferc1')) ) op.create_table('plants_pumped_storage_ferc1', sa.Column('record_id', sa.Text(), nullable=True, comment='Identifier indicating original FERC Form 1 source record. format: {table_name}_{report_year}_{report_prd}_{respondent_id}_{spplmnt_num}_{row_number}. Unique within FERC Form 1 DB tables which are not row-mapped.'), @@ -2642,7 +2858,7 @@ def upgrade() -> None: sa.Column('opex_pumping', sa.Float(), nullable=True, comment='Production expenses: We are here to PUMP YOU UP! (USD).'), sa.Column('opex_total', sa.Float(), nullable=True, comment='Total production expenses, excluding fuel (USD).'), sa.Column('opex_per_mwh', sa.Float(), nullable=True, comment='Total production expenses (USD per MWh generated).'), - sa.ForeignKeyConstraint(['utility_id_ferc1', 'plant_name_ferc1'], ['plants_ferc1.utility_id_ferc1', 'plants_ferc1.plant_name_ferc1'], ) + sa.ForeignKeyConstraint(['utility_id_ferc1', 'plant_name_ferc1'], ['plants_ferc1.utility_id_ferc1', 'plants_ferc1.plant_name_ferc1'], name=op.f('fk_plants_pumped_storage_ferc1_utility_id_ferc1_plants_ferc1')) ) op.create_table('plants_small_ferc1', sa.Column('record_id', sa.Text(), nullable=True, comment='Identifier indicating original FERC Form 1 source record. format: {table_name}_{report_year}_{report_prd}_{respondent_id}_{spplmnt_num}_{row_number}. Unique within FERC Form 1 DB tables which are not row-mapped.'), @@ -2662,7 +2878,7 @@ def upgrade() -> None: sa.Column('opex_maintenance', sa.Float(), nullable=True, comment='Production expenses: Maintenance (USD).'), sa.Column('fuel_type', sa.Text(), nullable=True), sa.Column('fuel_cost_per_mmbtu', sa.Float(), nullable=True, comment='Average fuel cost per mmBTU of heat content in nominal USD.'), - sa.ForeignKeyConstraint(['utility_id_ferc1', 'plant_name_ferc1'], ['plants_ferc1.utility_id_ferc1', 'plants_ferc1.plant_name_ferc1'], ) + sa.ForeignKeyConstraint(['utility_id_ferc1', 'plant_name_ferc1'], ['plants_ferc1.utility_id_ferc1', 'plants_ferc1.plant_name_ferc1'], name=op.f('fk_plants_small_ferc1_utility_id_ferc1_plants_ferc1')) ) op.create_table('plants_steam_ferc1', sa.Column('record_id', sa.Text(), nullable=True, comment='Identifier indicating original FERC Form 1 source record. format: {table_name}_{report_year}_{report_prd}_{respondent_id}_{spplmnt_num}_{row_number}. Unique within FERC Form 1 DB tables which are not row-mapped.'), @@ -2670,7 +2886,7 @@ def upgrade() -> None: sa.Column('report_year', sa.Integer(), nullable=True, comment='Four-digit year in which the data was reported.'), sa.Column('plant_id_ferc1', sa.Integer(), nullable=True, comment='Algorithmically assigned PUDL FERC Plant ID. WARNING: NOT STABLE BETWEEN PUDL DB INITIALIZATIONS.'), sa.Column('plant_name_ferc1', sa.Text(), nullable=True, comment='Name of the plant, as reported to FERC. This is a freeform string, not guaranteed to be consistent across references to the same plant.'), - sa.Column('plant_type', sa.Enum('combustion_turbine', 'internal_combustion', 'solar_thermal', 'nuclear', 'wind', 'combined_cycle', 'steam', 'geothermal', 'na_category', 'photovoltaic'), nullable=True), + sa.Column('plant_type', sa.Enum('wind', 'photovoltaic', 'nuclear', 'na_category', 'geothermal', 'combined_cycle', 'combustion_turbine', 'steam', 'solar_thermal', 'internal_combustion'), nullable=True), sa.Column('construction_type', sa.Enum('conventional', 'outdoor', 'semioutdoor'), nullable=True, comment="Type of plant construction ('outdoor', 'semioutdoor', or 'conventional'). Categorized by PUDL based on our best guess of intended value in FERC1 freeform strings."), sa.Column('construction_year', sa.Integer(), nullable=True, comment="Year the plant's oldest still operational unit was built."), sa.Column('installation_year', sa.Integer(), nullable=True, comment="Year the plant's most recently built unit was installed."), @@ -2705,7 +2921,7 @@ def upgrade() -> None: sa.Column('opex_production_total', sa.Float(), nullable=True, comment='Total operating expenses.'), sa.Column('opex_per_mwh', sa.Float(), nullable=True, comment='Total production expenses (USD per MWh generated).'), sa.Column('asset_retirement_cost', sa.Float(), nullable=True, comment='Asset retirement cost (USD).'), - sa.ForeignKeyConstraint(['utility_id_ferc1', 'plant_name_ferc1'], ['plants_ferc1.utility_id_ferc1', 'plants_ferc1.plant_name_ferc1'], ) + sa.ForeignKeyConstraint(['utility_id_ferc1', 'plant_name_ferc1'], ['plants_ferc1.utility_id_ferc1', 'plants_ferc1.plant_name_ferc1'], name=op.f('fk_plants_steam_ferc1_utility_id_ferc1_plants_ferc1')) ) op.create_table('boiler_cooling_assn_eia860', sa.Column('report_date', sa.Date(), nullable=False, comment='Date reported.'), @@ -2713,9 +2929,9 @@ def upgrade() -> None: sa.Column('boiler_id', sa.Text(), nullable=False, comment='Alphanumeric boiler ID.'), sa.Column('cooling_id_eia', sa.Text(), nullable=False, comment='The cooling system identification number reported to EIA.'), sa.Column('data_maturity', sa.Text(), nullable=True, comment='Level of maturity of the data record. Some data sources report less-than-final data. PUDL sometimes includes this data, but use at your own risk.'), - sa.ForeignKeyConstraint(['data_maturity'], ['data_maturities.code'], ), - sa.ForeignKeyConstraint(['plant_id_eia', 'boiler_id', 'report_date'], ['boilers_eia860.plant_id_eia', 'boilers_eia860.boiler_id', 'boilers_eia860.report_date'], ), - sa.PrimaryKeyConstraint('report_date', 'plant_id_eia', 'boiler_id', 'cooling_id_eia') + sa.ForeignKeyConstraint(['data_maturity'], ['data_maturities.code'], name=op.f('fk_boiler_cooling_assn_eia860_data_maturity_data_maturities')), + sa.ForeignKeyConstraint(['plant_id_eia', 'boiler_id', 'report_date'], ['boilers_eia860.plant_id_eia', 'boilers_eia860.boiler_id', 'boilers_eia860.report_date'], name=op.f('fk_boiler_cooling_assn_eia860_plant_id_eia_boilers_eia860')), + sa.PrimaryKeyConstraint('report_date', 'plant_id_eia', 'boiler_id', 'cooling_id_eia', name=op.f('pk_boiler_cooling_assn_eia860')) ) op.create_table('boiler_emissions_control_equipment_assn_eia860', sa.Column('report_date', sa.Date(), nullable=False, comment='Date reported.'), @@ -2724,9 +2940,9 @@ def upgrade() -> None: sa.Column('emission_control_id_type', sa.Text(), nullable=False, comment='The type of emissions control id: so2, nox, particulate, or mercury.'), sa.Column('emission_control_id_eia', sa.Text(), nullable=False, comment="The emission control ID used to collect so2, nox, particulate, and mercury emissions data. This column should be used in conjunction with emissions_control_type as it's not guaranteed to be unique."), sa.Column('data_maturity', sa.Text(), nullable=True, comment='Level of maturity of the data record. Some data sources report less-than-final data. PUDL sometimes includes this data, but use at your own risk.'), - sa.ForeignKeyConstraint(['data_maturity'], ['data_maturities.code'], ), - sa.ForeignKeyConstraint(['plant_id_eia', 'boiler_id', 'report_date'], ['boilers_eia860.plant_id_eia', 'boilers_eia860.boiler_id', 'boilers_eia860.report_date'], ), - sa.PrimaryKeyConstraint('report_date', 'plant_id_eia', 'boiler_id', 'emission_control_id_type', 'emission_control_id_eia') + sa.ForeignKeyConstraint(['data_maturity'], ['data_maturities.code'], name=op.f('fk_boiler_emissions_control_equipment_assn_eia860_data_maturity_data_maturities')), + sa.ForeignKeyConstraint(['plant_id_eia', 'boiler_id', 'report_date'], ['boilers_eia860.plant_id_eia', 'boilers_eia860.boiler_id', 'boilers_eia860.report_date'], name=op.f('fk_boiler_emissions_control_equipment_assn_eia860_plant_id_eia_boilers_eia860')), + sa.PrimaryKeyConstraint('report_date', 'plant_id_eia', 'boiler_id', 'emission_control_id_type', 'emission_control_id_eia', name=op.f('pk_boiler_emissions_control_equipment_assn_eia860')) ) op.create_table('boiler_generator_assn_eia860', sa.Column('plant_id_eia', sa.Integer(), nullable=False, comment='The unique six-digit facility identification number, also called an ORISPL, assigned by the Energy Information Administration.'), @@ -2739,12 +2955,12 @@ def upgrade() -> None: sa.Column('steam_plant_type_code', sa.Integer(), nullable=True, comment='Code that describes types of steam plants from EIA 860. See steam_plant_types_eia table for more details.'), sa.Column('bga_source', sa.Text(), nullable=True, comment='The source from where the unit_id_pudl is compiled. The unit_id_pudl comes directly from EIA 860, or string association (which looks at all the boilers and generators that are not associated with a unit and tries to find a matching string in the respective collection of boilers or generator), or from a unit connection (where the unit_id_eia is employed to find additional boiler generator connections).'), sa.Column('data_maturity', sa.Text(), nullable=True, comment='Level of maturity of the data record. Some data sources report less-than-final data. PUDL sometimes includes this data, but use at your own risk.'), - sa.ForeignKeyConstraint(['boiler_generator_assn_type_code'], ['boiler_generator_assn_types_eia.code'], ), - sa.ForeignKeyConstraint(['data_maturity'], ['data_maturities.code'], ), - sa.ForeignKeyConstraint(['plant_id_eia', 'boiler_id', 'report_date'], ['boilers_eia860.plant_id_eia', 'boilers_eia860.boiler_id', 'boilers_eia860.report_date'], ), - sa.ForeignKeyConstraint(['plant_id_eia', 'generator_id', 'report_date'], ['generators_eia860.plant_id_eia', 'generators_eia860.generator_id', 'generators_eia860.report_date'], ), - sa.ForeignKeyConstraint(['steam_plant_type_code'], ['steam_plant_types_eia.code'], ), - sa.PrimaryKeyConstraint('plant_id_eia', 'report_date', 'generator_id', 'boiler_id') + sa.ForeignKeyConstraint(['boiler_generator_assn_type_code'], ['boiler_generator_assn_types_eia.code'], name=op.f('fk_boiler_generator_assn_eia860_boiler_generator_assn_type_code_boiler_generator_assn_types_eia')), + sa.ForeignKeyConstraint(['data_maturity'], ['data_maturities.code'], name=op.f('fk_boiler_generator_assn_eia860_data_maturity_data_maturities')), + sa.ForeignKeyConstraint(['plant_id_eia', 'boiler_id', 'report_date'], ['boilers_eia860.plant_id_eia', 'boilers_eia860.boiler_id', 'boilers_eia860.report_date'], name=op.f('fk_boiler_generator_assn_eia860_plant_id_eia_boilers_eia860')), + sa.ForeignKeyConstraint(['plant_id_eia', 'generator_id', 'report_date'], ['generators_eia860.plant_id_eia', 'generators_eia860.generator_id', 'generators_eia860.report_date'], name=op.f('fk_boiler_generator_assn_eia860_plant_id_eia_generators_eia860')), + sa.ForeignKeyConstraint(['steam_plant_type_code'], ['steam_plant_types_eia.code'], name=op.f('fk_boiler_generator_assn_eia860_steam_plant_type_code_steam_plant_types_eia')), + sa.PrimaryKeyConstraint('plant_id_eia', 'report_date', 'generator_id', 'boiler_id', name=op.f('pk_boiler_generator_assn_eia860')) ) op.create_table('boiler_stack_flue_assn_eia860', sa.Column('report_date', sa.Date(), nullable=False, comment='Date reported.'), @@ -2754,8 +2970,18 @@ def upgrade() -> None: sa.Column('flue_id_eia', sa.Text(), nullable=True, comment='The flue identification value reported to EIA. The flue is a duct, pipe, or opening that transports exhast gases through the stack. This field was reported in conjunction with stack_id_eia until 2013 when stack_flue_id_eia took their place.'), sa.Column('stack_flue_id_eia', sa.Text(), nullable=True, comment='The stack or flue identification value reported to EIA. This denotes the place where emissions from the combusion process are released into the atmosphere. Prior to 2013, this was reported as `stack_id_eia` and `flue_id_eia`.'), sa.Column('stack_flue_id_pudl', sa.Text(), nullable=False, comment='A stack and/or flue identification value created by PUDL for use as part of the primary key for the stack flue equipment and boiler association tables. For 2013 and onward, this value is equal to the value for stack_flue_id_eia. Prior to 2013, this value is equal to the value for stack_id_eia and the value for flue_id_eia seperated by an underscore or just the stack_flue_eia in cases where flue_id_eia is NA.'), - sa.ForeignKeyConstraint(['plant_id_eia', 'boiler_id', 'report_date'], ['boilers_eia860.plant_id_eia', 'boilers_eia860.boiler_id', 'boilers_eia860.report_date'], ), - sa.PrimaryKeyConstraint('report_date', 'plant_id_eia', 'boiler_id', 'stack_flue_id_pudl') + sa.ForeignKeyConstraint(['plant_id_eia', 'boiler_id', 'report_date'], ['boilers_eia860.plant_id_eia', 'boilers_eia860.boiler_id', 'boilers_eia860.report_date'], name=op.f('fk_boiler_stack_flue_assn_eia860_plant_id_eia_boilers_eia860')), + sa.PrimaryKeyConstraint('report_date', 'plant_id_eia', 'boiler_id', 'stack_flue_id_pudl', name=op.f('pk_boiler_stack_flue_assn_eia860')) + ) + op.create_table('capacity_factor_by_generator_yearly', + sa.Column('report_date', sa.Date(), nullable=False, comment='Date reported.'), + sa.Column('plant_id_eia', sa.Integer(), nullable=False, comment='The unique six-digit facility identification number, also called an ORISPL, assigned by the Energy Information Administration.'), + sa.Column('generator_id', sa.Text(), nullable=False, comment='Generator ID is usually numeric, but sometimes includes letters. Make sure you treat it as a string!'), + sa.Column('net_generation_mwh', sa.Float(), nullable=True, comment='Net electricity generation for the specified period in megawatt-hours (MWh).'), + sa.Column('capacity_mw', sa.Float(), nullable=True, comment='Total installed (nameplate) capacity, in megawatts.'), + sa.Column('capacity_factor', sa.Float(), nullable=True, comment='Fraction of potential generation that was actually reported for a plant part.'), + sa.ForeignKeyConstraint(['plant_id_eia', 'generator_id', 'report_date'], ['generators_eia860.plant_id_eia', 'generators_eia860.generator_id', 'generators_eia860.report_date'], name=op.f('fk_capacity_factor_by_generator_yearly_plant_id_eia_generators_eia860')), + sa.PrimaryKeyConstraint('report_date', 'plant_id_eia', 'generator_id', name=op.f('pk_capacity_factor_by_generator_yearly')) ) op.create_table('denorm_boiler_fuel_yearly_eia923', sa.Column('report_date', sa.Date(), nullable=False, comment='Date reported.'), @@ -2767,7 +2993,7 @@ def upgrade() -> None: sa.Column('utility_name_eia', sa.Text(), nullable=True, comment='The name of the utility.'), sa.Column('boiler_id', sa.Text(), nullable=False, comment='Alphanumeric boiler ID.'), sa.Column('unit_id_pudl', sa.Integer(), nullable=True, comment='Dynamically assigned PUDL unit id. WARNING: This ID is not guaranteed to be static long term as the input data and algorithm may evolve over time.'), - sa.Column('energy_source_code', sa.Text(), nullable=False, comment='The fuel code associated with the fuel receipt. Two or three character alphanumeric.'), + sa.Column('energy_source_code', sa.Text(), nullable=False, comment='A 2-3 letter code indicating the energy source (e.g. fuel type) associated with the record.'), sa.Column('prime_mover_code', sa.Text(), nullable=False, comment='Code for the type of prime mover (e.g. CT, CG)'), sa.Column('fuel_type_code_pudl', sa.Enum('coal', 'gas', 'hydro', 'nuclear', 'oil', 'other', 'solar', 'waste', 'wind'), nullable=True, comment='Simplified fuel type code used in PUDL'), sa.Column('fuel_consumed_units', sa.Float(), nullable=True, comment='Consumption of the fuel type in physical unit. Note: this is the total quantity consumed for both electricity and, in the case of combined heat and power plants, process steam production.'), @@ -2775,13 +3001,13 @@ def upgrade() -> None: sa.Column('fuel_consumed_mmbtu', sa.Float(), nullable=True, comment='Total consumption of fuel in physical unit, year to date. Note: this is the total quantity consumed for both electricity and, in the case of combined heat and power plants, process steam production.'), sa.Column('sulfur_content_pct', sa.Float(), nullable=True, comment='Sulfur content percentage by weight to the nearest 0.01 percent.'), sa.Column('ash_content_pct', sa.Float(), nullable=True, comment='Ash content percentage by weight to the nearest 0.1 percent.'), - sa.ForeignKeyConstraint(['energy_source_code'], ['energy_sources_eia.code'], ), - sa.ForeignKeyConstraint(['plant_id_eia', 'boiler_id', 'report_date'], ['boilers_eia860.plant_id_eia', 'boilers_eia860.boiler_id', 'boilers_eia860.report_date'], ), - sa.ForeignKeyConstraint(['plant_id_pudl'], ['plants_pudl.plant_id_pudl'], ), - sa.ForeignKeyConstraint(['prime_mover_code'], ['prime_movers_eia.code'], ), - sa.ForeignKeyConstraint(['utility_id_eia', 'report_date'], ['utilities_eia860.utility_id_eia', 'utilities_eia860.report_date'], ), - sa.ForeignKeyConstraint(['utility_id_pudl'], ['utilities_pudl.utility_id_pudl'], ), - sa.PrimaryKeyConstraint('plant_id_eia', 'boiler_id', 'energy_source_code', 'prime_mover_code', 'report_date') + sa.ForeignKeyConstraint(['energy_source_code'], ['energy_sources_eia.code'], name=op.f('fk_denorm_boiler_fuel_yearly_eia923_energy_source_code_energy_sources_eia')), + sa.ForeignKeyConstraint(['plant_id_eia', 'boiler_id', 'report_date'], ['boilers_eia860.plant_id_eia', 'boilers_eia860.boiler_id', 'boilers_eia860.report_date'], name=op.f('fk_denorm_boiler_fuel_yearly_eia923_plant_id_eia_boilers_eia860')), + sa.ForeignKeyConstraint(['plant_id_pudl'], ['plants_pudl.plant_id_pudl'], name=op.f('fk_denorm_boiler_fuel_yearly_eia923_plant_id_pudl_plants_pudl')), + sa.ForeignKeyConstraint(['prime_mover_code'], ['prime_movers_eia.code'], name=op.f('fk_denorm_boiler_fuel_yearly_eia923_prime_mover_code_prime_movers_eia')), + sa.ForeignKeyConstraint(['utility_id_eia', 'report_date'], ['utilities_eia860.utility_id_eia', 'utilities_eia860.report_date'], name=op.f('fk_denorm_boiler_fuel_yearly_eia923_utility_id_eia_utilities_eia860')), + sa.ForeignKeyConstraint(['utility_id_pudl'], ['utilities_pudl.utility_id_pudl'], name=op.f('fk_denorm_boiler_fuel_yearly_eia923_utility_id_pudl_utilities_pudl')), + sa.PrimaryKeyConstraint('plant_id_eia', 'boiler_id', 'energy_source_code', 'prime_mover_code', 'report_date', name=op.f('pk_denorm_boiler_fuel_yearly_eia923')) ) op.create_table('denorm_boilers_eia', sa.Column('report_date', sa.Date(), nullable=False, comment='Date reported.'), @@ -2891,62 +3117,62 @@ def upgrade() -> None: sa.Column('waste_heat_input_mmbtu_per_hour', sa.Float(), nullable=True, comment='Design waste-heat input rate at maximum continuous steam flow where a waste-heat boiler is a boiler that receives all or a substantial portion of its energy input from the noncumbustible exhaust gases of a separate fuel-burning process (MMBtu per hour).'), sa.Column('wet_dry_bottom', sa.Text(), nullable=True, comment='Wet or Dry Bottom where Wet Bottom is defined as slag tanks that are installed at furnace throat to contain and remove molten ash from the furnace, and Dry Bottom is defined as having no slag tanks at furnace throat area, throat area is clear, and bottom ash drops through throat to bottom ash water hoppers.'), sa.Column('zip_code', sa.Text(), nullable=True, comment='Five digit US Zip Code.'), - sa.ForeignKeyConstraint(['boiler_fuel_code_1'], ['energy_sources_eia.code'], ), - sa.ForeignKeyConstraint(['boiler_fuel_code_2'], ['energy_sources_eia.code'], ), - sa.ForeignKeyConstraint(['boiler_fuel_code_3'], ['energy_sources_eia.code'], ), - sa.ForeignKeyConstraint(['boiler_fuel_code_4'], ['energy_sources_eia.code'], ), - sa.ForeignKeyConstraint(['boiler_manufacturer_code'], ['environmental_equipment_manufacturers_eia.code'], ), - sa.ForeignKeyConstraint(['boiler_status'], ['boiler_status_eia.code'], ), - sa.ForeignKeyConstraint(['boiler_type'], ['boiler_types_eia.code'], ), - sa.ForeignKeyConstraint(['data_maturity'], ['data_maturities.code'], ), - sa.ForeignKeyConstraint(['firing_type_1'], ['firing_types_eia.code'], ), - sa.ForeignKeyConstraint(['firing_type_2'], ['firing_types_eia.code'], ), - sa.ForeignKeyConstraint(['firing_type_3'], ['firing_types_eia.code'], ), - sa.ForeignKeyConstraint(['mercury_control_existing_strategy_1'], ['mercury_compliance_strategies_eia.code'], ), - sa.ForeignKeyConstraint(['mercury_control_existing_strategy_2'], ['mercury_compliance_strategies_eia.code'], ), - sa.ForeignKeyConstraint(['mercury_control_existing_strategy_3'], ['mercury_compliance_strategies_eia.code'], ), - sa.ForeignKeyConstraint(['mercury_control_existing_strategy_4'], ['mercury_compliance_strategies_eia.code'], ), - sa.ForeignKeyConstraint(['mercury_control_existing_strategy_5'], ['mercury_compliance_strategies_eia.code'], ), - sa.ForeignKeyConstraint(['mercury_control_existing_strategy_6'], ['mercury_compliance_strategies_eia.code'], ), - sa.ForeignKeyConstraint(['mercury_control_proposed_strategy_1'], ['mercury_compliance_strategies_eia.code'], ), - sa.ForeignKeyConstraint(['mercury_control_proposed_strategy_2'], ['mercury_compliance_strategies_eia.code'], ), - sa.ForeignKeyConstraint(['mercury_control_proposed_strategy_3'], ['mercury_compliance_strategies_eia.code'], ), - sa.ForeignKeyConstraint(['nox_control_existing_caaa_compliance_strategy_1'], ['nox_compliance_strategies_eia.code'], ), - sa.ForeignKeyConstraint(['nox_control_existing_caaa_compliance_strategy_2'], ['nox_compliance_strategies_eia.code'], ), - sa.ForeignKeyConstraint(['nox_control_existing_caaa_compliance_strategy_3'], ['nox_compliance_strategies_eia.code'], ), - sa.ForeignKeyConstraint(['nox_control_manufacturer_code'], ['environmental_equipment_manufacturers_eia.code'], ), - sa.ForeignKeyConstraint(['nox_control_out_of_compliance_strategy_1'], ['nox_compliance_strategies_eia.code'], ), - sa.ForeignKeyConstraint(['nox_control_out_of_compliance_strategy_2'], ['nox_compliance_strategies_eia.code'], ), - sa.ForeignKeyConstraint(['nox_control_out_of_compliance_strategy_3'], ['nox_compliance_strategies_eia.code'], ), - sa.ForeignKeyConstraint(['nox_control_planned_caaa_compliance_strategy_1'], ['nox_compliance_strategies_eia.code'], ), - sa.ForeignKeyConstraint(['nox_control_planned_caaa_compliance_strategy_2'], ['nox_compliance_strategies_eia.code'], ), - sa.ForeignKeyConstraint(['nox_control_planned_caaa_compliance_strategy_3'], ['nox_compliance_strategies_eia.code'], ), - sa.ForeignKeyConstraint(['nox_control_status_code'], ['nox_control_status_eia.code'], ), - sa.ForeignKeyConstraint(['particulate_control_out_of_compliance_strategy_1'], ['particulate_compliance_strategies_eia.code'], ), - sa.ForeignKeyConstraint(['particulate_control_out_of_compliance_strategy_2'], ['particulate_compliance_strategies_eia.code'], ), - sa.ForeignKeyConstraint(['particulate_control_out_of_compliance_strategy_3'], ['particulate_compliance_strategies_eia.code'], ), - sa.ForeignKeyConstraint(['plant_id_eia', 'boiler_id', 'report_date'], ['boilers_eia860.plant_id_eia', 'boilers_eia860.boiler_id', 'boilers_eia860.report_date'], ), - sa.ForeignKeyConstraint(['plant_id_pudl'], ['plants_pudl.plant_id_pudl'], ), - sa.ForeignKeyConstraint(['regulation_mercury'], ['regulations_eia.code'], ), - sa.ForeignKeyConstraint(['regulation_nox'], ['regulations_eia.code'], ), - sa.ForeignKeyConstraint(['regulation_particulate'], ['regulations_eia.code'], ), - sa.ForeignKeyConstraint(['regulation_so2'], ['regulations_eia.code'], ), - sa.ForeignKeyConstraint(['so2_control_existing_caaa_compliance_strategy_1'], ['so2_compliance_strategies_eia.code'], ), - sa.ForeignKeyConstraint(['so2_control_existing_caaa_compliance_strategy_2'], ['so2_compliance_strategies_eia.code'], ), - sa.ForeignKeyConstraint(['so2_control_existing_caaa_compliance_strategy_3'], ['so2_compliance_strategies_eia.code'], ), - sa.ForeignKeyConstraint(['so2_control_out_of_compliance_strategy_1'], ['so2_compliance_strategies_eia.code'], ), - sa.ForeignKeyConstraint(['so2_control_out_of_compliance_strategy_2'], ['so2_compliance_strategies_eia.code'], ), - sa.ForeignKeyConstraint(['so2_control_out_of_compliance_strategy_3'], ['so2_compliance_strategies_eia.code'], ), - sa.ForeignKeyConstraint(['so2_control_planned_caaa_compliance_strategy_1'], ['so2_compliance_strategies_eia.code'], ), - sa.ForeignKeyConstraint(['so2_control_planned_caaa_compliance_strategy_2'], ['so2_compliance_strategies_eia.code'], ), - sa.ForeignKeyConstraint(['so2_control_planned_caaa_compliance_strategy_3'], ['so2_compliance_strategies_eia.code'], ), - sa.ForeignKeyConstraint(['unit_nox'], ['nox_units_eia.code'], ), - sa.ForeignKeyConstraint(['unit_particulate'], ['particulate_units_eia.code'], ), - sa.ForeignKeyConstraint(['unit_so2'], ['so2_units_eia.code'], ), - sa.ForeignKeyConstraint(['utility_id_eia', 'report_date'], ['utilities_eia860.utility_id_eia', 'utilities_eia860.report_date'], ), - sa.ForeignKeyConstraint(['utility_id_pudl'], ['utilities_pudl.utility_id_pudl'], ), - sa.ForeignKeyConstraint(['wet_dry_bottom'], ['wet_dry_bottom_eia.code'], ), - sa.PrimaryKeyConstraint('plant_id_eia', 'boiler_id', 'report_date') + sa.ForeignKeyConstraint(['boiler_fuel_code_1'], ['energy_sources_eia.code'], name=op.f('fk_denorm_boilers_eia_boiler_fuel_code_1_energy_sources_eia')), + sa.ForeignKeyConstraint(['boiler_fuel_code_2'], ['energy_sources_eia.code'], name=op.f('fk_denorm_boilers_eia_boiler_fuel_code_2_energy_sources_eia')), + sa.ForeignKeyConstraint(['boiler_fuel_code_3'], ['energy_sources_eia.code'], name=op.f('fk_denorm_boilers_eia_boiler_fuel_code_3_energy_sources_eia')), + sa.ForeignKeyConstraint(['boiler_fuel_code_4'], ['energy_sources_eia.code'], name=op.f('fk_denorm_boilers_eia_boiler_fuel_code_4_energy_sources_eia')), + sa.ForeignKeyConstraint(['boiler_manufacturer_code'], ['environmental_equipment_manufacturers_eia.code'], name=op.f('fk_denorm_boilers_eia_boiler_manufacturer_code_environmental_equipment_manufacturers_eia')), + sa.ForeignKeyConstraint(['boiler_status'], ['boiler_status_eia.code'], name=op.f('fk_denorm_boilers_eia_boiler_status_boiler_status_eia')), + sa.ForeignKeyConstraint(['boiler_type'], ['boiler_types_eia.code'], name=op.f('fk_denorm_boilers_eia_boiler_type_boiler_types_eia')), + sa.ForeignKeyConstraint(['data_maturity'], ['data_maturities.code'], name=op.f('fk_denorm_boilers_eia_data_maturity_data_maturities')), + sa.ForeignKeyConstraint(['firing_type_1'], ['firing_types_eia.code'], name=op.f('fk_denorm_boilers_eia_firing_type_1_firing_types_eia')), + sa.ForeignKeyConstraint(['firing_type_2'], ['firing_types_eia.code'], name=op.f('fk_denorm_boilers_eia_firing_type_2_firing_types_eia')), + sa.ForeignKeyConstraint(['firing_type_3'], ['firing_types_eia.code'], name=op.f('fk_denorm_boilers_eia_firing_type_3_firing_types_eia')), + sa.ForeignKeyConstraint(['mercury_control_existing_strategy_1'], ['mercury_compliance_strategies_eia.code'], name=op.f('fk_denorm_boilers_eia_mercury_control_existing_strategy_1_mercury_compliance_strategies_eia')), + sa.ForeignKeyConstraint(['mercury_control_existing_strategy_2'], ['mercury_compliance_strategies_eia.code'], name=op.f('fk_denorm_boilers_eia_mercury_control_existing_strategy_2_mercury_compliance_strategies_eia')), + sa.ForeignKeyConstraint(['mercury_control_existing_strategy_3'], ['mercury_compliance_strategies_eia.code'], name=op.f('fk_denorm_boilers_eia_mercury_control_existing_strategy_3_mercury_compliance_strategies_eia')), + sa.ForeignKeyConstraint(['mercury_control_existing_strategy_4'], ['mercury_compliance_strategies_eia.code'], name=op.f('fk_denorm_boilers_eia_mercury_control_existing_strategy_4_mercury_compliance_strategies_eia')), + sa.ForeignKeyConstraint(['mercury_control_existing_strategy_5'], ['mercury_compliance_strategies_eia.code'], name=op.f('fk_denorm_boilers_eia_mercury_control_existing_strategy_5_mercury_compliance_strategies_eia')), + sa.ForeignKeyConstraint(['mercury_control_existing_strategy_6'], ['mercury_compliance_strategies_eia.code'], name=op.f('fk_denorm_boilers_eia_mercury_control_existing_strategy_6_mercury_compliance_strategies_eia')), + sa.ForeignKeyConstraint(['mercury_control_proposed_strategy_1'], ['mercury_compliance_strategies_eia.code'], name=op.f('fk_denorm_boilers_eia_mercury_control_proposed_strategy_1_mercury_compliance_strategies_eia')), + sa.ForeignKeyConstraint(['mercury_control_proposed_strategy_2'], ['mercury_compliance_strategies_eia.code'], name=op.f('fk_denorm_boilers_eia_mercury_control_proposed_strategy_2_mercury_compliance_strategies_eia')), + sa.ForeignKeyConstraint(['mercury_control_proposed_strategy_3'], ['mercury_compliance_strategies_eia.code'], name=op.f('fk_denorm_boilers_eia_mercury_control_proposed_strategy_3_mercury_compliance_strategies_eia')), + sa.ForeignKeyConstraint(['nox_control_existing_caaa_compliance_strategy_1'], ['nox_compliance_strategies_eia.code'], name=op.f('fk_denorm_boilers_eia_nox_control_existing_caaa_compliance_strategy_1_nox_compliance_strategies_eia')), + sa.ForeignKeyConstraint(['nox_control_existing_caaa_compliance_strategy_2'], ['nox_compliance_strategies_eia.code'], name=op.f('fk_denorm_boilers_eia_nox_control_existing_caaa_compliance_strategy_2_nox_compliance_strategies_eia')), + sa.ForeignKeyConstraint(['nox_control_existing_caaa_compliance_strategy_3'], ['nox_compliance_strategies_eia.code'], name=op.f('fk_denorm_boilers_eia_nox_control_existing_caaa_compliance_strategy_3_nox_compliance_strategies_eia')), + sa.ForeignKeyConstraint(['nox_control_manufacturer_code'], ['environmental_equipment_manufacturers_eia.code'], name=op.f('fk_denorm_boilers_eia_nox_control_manufacturer_code_environmental_equipment_manufacturers_eia')), + sa.ForeignKeyConstraint(['nox_control_out_of_compliance_strategy_1'], ['nox_compliance_strategies_eia.code'], name=op.f('fk_denorm_boilers_eia_nox_control_out_of_compliance_strategy_1_nox_compliance_strategies_eia')), + sa.ForeignKeyConstraint(['nox_control_out_of_compliance_strategy_2'], ['nox_compliance_strategies_eia.code'], name=op.f('fk_denorm_boilers_eia_nox_control_out_of_compliance_strategy_2_nox_compliance_strategies_eia')), + sa.ForeignKeyConstraint(['nox_control_out_of_compliance_strategy_3'], ['nox_compliance_strategies_eia.code'], name=op.f('fk_denorm_boilers_eia_nox_control_out_of_compliance_strategy_3_nox_compliance_strategies_eia')), + sa.ForeignKeyConstraint(['nox_control_planned_caaa_compliance_strategy_1'], ['nox_compliance_strategies_eia.code'], name=op.f('fk_denorm_boilers_eia_nox_control_planned_caaa_compliance_strategy_1_nox_compliance_strategies_eia')), + sa.ForeignKeyConstraint(['nox_control_planned_caaa_compliance_strategy_2'], ['nox_compliance_strategies_eia.code'], name=op.f('fk_denorm_boilers_eia_nox_control_planned_caaa_compliance_strategy_2_nox_compliance_strategies_eia')), + sa.ForeignKeyConstraint(['nox_control_planned_caaa_compliance_strategy_3'], ['nox_compliance_strategies_eia.code'], name=op.f('fk_denorm_boilers_eia_nox_control_planned_caaa_compliance_strategy_3_nox_compliance_strategies_eia')), + sa.ForeignKeyConstraint(['nox_control_status_code'], ['nox_control_status_eia.code'], name=op.f('fk_denorm_boilers_eia_nox_control_status_code_nox_control_status_eia')), + sa.ForeignKeyConstraint(['particulate_control_out_of_compliance_strategy_1'], ['particulate_compliance_strategies_eia.code'], name=op.f('fk_denorm_boilers_eia_particulate_control_out_of_compliance_strategy_1_particulate_compliance_strategies_eia')), + sa.ForeignKeyConstraint(['particulate_control_out_of_compliance_strategy_2'], ['particulate_compliance_strategies_eia.code'], name=op.f('fk_denorm_boilers_eia_particulate_control_out_of_compliance_strategy_2_particulate_compliance_strategies_eia')), + sa.ForeignKeyConstraint(['particulate_control_out_of_compliance_strategy_3'], ['particulate_compliance_strategies_eia.code'], name=op.f('fk_denorm_boilers_eia_particulate_control_out_of_compliance_strategy_3_particulate_compliance_strategies_eia')), + sa.ForeignKeyConstraint(['plant_id_eia', 'boiler_id', 'report_date'], ['boilers_eia860.plant_id_eia', 'boilers_eia860.boiler_id', 'boilers_eia860.report_date'], name=op.f('fk_denorm_boilers_eia_plant_id_eia_boilers_eia860')), + sa.ForeignKeyConstraint(['plant_id_pudl'], ['plants_pudl.plant_id_pudl'], name=op.f('fk_denorm_boilers_eia_plant_id_pudl_plants_pudl')), + sa.ForeignKeyConstraint(['regulation_mercury'], ['regulations_eia.code'], name=op.f('fk_denorm_boilers_eia_regulation_mercury_regulations_eia')), + sa.ForeignKeyConstraint(['regulation_nox'], ['regulations_eia.code'], name=op.f('fk_denorm_boilers_eia_regulation_nox_regulations_eia')), + sa.ForeignKeyConstraint(['regulation_particulate'], ['regulations_eia.code'], name=op.f('fk_denorm_boilers_eia_regulation_particulate_regulations_eia')), + sa.ForeignKeyConstraint(['regulation_so2'], ['regulations_eia.code'], name=op.f('fk_denorm_boilers_eia_regulation_so2_regulations_eia')), + sa.ForeignKeyConstraint(['so2_control_existing_caaa_compliance_strategy_1'], ['so2_compliance_strategies_eia.code'], name=op.f('fk_denorm_boilers_eia_so2_control_existing_caaa_compliance_strategy_1_so2_compliance_strategies_eia')), + sa.ForeignKeyConstraint(['so2_control_existing_caaa_compliance_strategy_2'], ['so2_compliance_strategies_eia.code'], name=op.f('fk_denorm_boilers_eia_so2_control_existing_caaa_compliance_strategy_2_so2_compliance_strategies_eia')), + sa.ForeignKeyConstraint(['so2_control_existing_caaa_compliance_strategy_3'], ['so2_compliance_strategies_eia.code'], name=op.f('fk_denorm_boilers_eia_so2_control_existing_caaa_compliance_strategy_3_so2_compliance_strategies_eia')), + sa.ForeignKeyConstraint(['so2_control_out_of_compliance_strategy_1'], ['so2_compliance_strategies_eia.code'], name=op.f('fk_denorm_boilers_eia_so2_control_out_of_compliance_strategy_1_so2_compliance_strategies_eia')), + sa.ForeignKeyConstraint(['so2_control_out_of_compliance_strategy_2'], ['so2_compliance_strategies_eia.code'], name=op.f('fk_denorm_boilers_eia_so2_control_out_of_compliance_strategy_2_so2_compliance_strategies_eia')), + sa.ForeignKeyConstraint(['so2_control_out_of_compliance_strategy_3'], ['so2_compliance_strategies_eia.code'], name=op.f('fk_denorm_boilers_eia_so2_control_out_of_compliance_strategy_3_so2_compliance_strategies_eia')), + sa.ForeignKeyConstraint(['so2_control_planned_caaa_compliance_strategy_1'], ['so2_compliance_strategies_eia.code'], name=op.f('fk_denorm_boilers_eia_so2_control_planned_caaa_compliance_strategy_1_so2_compliance_strategies_eia')), + sa.ForeignKeyConstraint(['so2_control_planned_caaa_compliance_strategy_2'], ['so2_compliance_strategies_eia.code'], name=op.f('fk_denorm_boilers_eia_so2_control_planned_caaa_compliance_strategy_2_so2_compliance_strategies_eia')), + sa.ForeignKeyConstraint(['so2_control_planned_caaa_compliance_strategy_3'], ['so2_compliance_strategies_eia.code'], name=op.f('fk_denorm_boilers_eia_so2_control_planned_caaa_compliance_strategy_3_so2_compliance_strategies_eia')), + sa.ForeignKeyConstraint(['unit_nox'], ['nox_units_eia.code'], name=op.f('fk_denorm_boilers_eia_unit_nox_nox_units_eia')), + sa.ForeignKeyConstraint(['unit_particulate'], ['particulate_units_eia.code'], name=op.f('fk_denorm_boilers_eia_unit_particulate_particulate_units_eia')), + sa.ForeignKeyConstraint(['unit_so2'], ['so2_units_eia.code'], name=op.f('fk_denorm_boilers_eia_unit_so2_so2_units_eia')), + sa.ForeignKeyConstraint(['utility_id_eia', 'report_date'], ['utilities_eia860.utility_id_eia', 'utilities_eia860.report_date'], name=op.f('fk_denorm_boilers_eia_utility_id_eia_utilities_eia860')), + sa.ForeignKeyConstraint(['utility_id_pudl'], ['utilities_pudl.utility_id_pudl'], name=op.f('fk_denorm_boilers_eia_utility_id_pudl_utilities_pudl')), + sa.ForeignKeyConstraint(['wet_dry_bottom'], ['wet_dry_bottom_eia.code'], name=op.f('fk_denorm_boilers_eia_wet_dry_bottom_wet_dry_bottom_eia')), + sa.PrimaryKeyConstraint('plant_id_eia', 'boiler_id', 'report_date', name=op.f('pk_denorm_boilers_eia')) ) op.create_table('denorm_generation_yearly_eia923', sa.Column('report_date', sa.Date(), nullable=False, comment='Date reported.'), @@ -2959,11 +3185,11 @@ def upgrade() -> None: sa.Column('generator_id', sa.Text(), nullable=False, comment='Generator ID is usually numeric, but sometimes includes letters. Make sure you treat it as a string!'), sa.Column('unit_id_pudl', sa.Integer(), nullable=True, comment='Dynamically assigned PUDL unit id. WARNING: This ID is not guaranteed to be static long term as the input data and algorithm may evolve over time.'), sa.Column('net_generation_mwh', sa.Float(), nullable=True, comment='Net electricity generation for the specified period in megawatt-hours (MWh).'), - sa.ForeignKeyConstraint(['plant_id_eia', 'generator_id', 'report_date'], ['generators_eia860.plant_id_eia', 'generators_eia860.generator_id', 'generators_eia860.report_date'], ), - sa.ForeignKeyConstraint(['plant_id_pudl'], ['plants_pudl.plant_id_pudl'], ), - sa.ForeignKeyConstraint(['utility_id_eia', 'report_date'], ['utilities_eia860.utility_id_eia', 'utilities_eia860.report_date'], ), - sa.ForeignKeyConstraint(['utility_id_pudl'], ['utilities_pudl.utility_id_pudl'], ), - sa.PrimaryKeyConstraint('plant_id_eia', 'generator_id', 'report_date') + sa.ForeignKeyConstraint(['plant_id_eia', 'generator_id', 'report_date'], ['generators_eia860.plant_id_eia', 'generators_eia860.generator_id', 'generators_eia860.report_date'], name=op.f('fk_denorm_generation_yearly_eia923_plant_id_eia_generators_eia860')), + sa.ForeignKeyConstraint(['plant_id_pudl'], ['plants_pudl.plant_id_pudl'], name=op.f('fk_denorm_generation_yearly_eia923_plant_id_pudl_plants_pudl')), + sa.ForeignKeyConstraint(['utility_id_eia', 'report_date'], ['utilities_eia860.utility_id_eia', 'utilities_eia860.report_date'], name=op.f('fk_denorm_generation_yearly_eia923_utility_id_eia_utilities_eia860')), + sa.ForeignKeyConstraint(['utility_id_pudl'], ['utilities_pudl.utility_id_pudl'], name=op.f('fk_denorm_generation_yearly_eia923_utility_id_pudl_utilities_pudl')), + sa.PrimaryKeyConstraint('plant_id_eia', 'generator_id', 'report_date', name=op.f('pk_denorm_generation_yearly_eia923')) ) op.create_table('denorm_generators_eia', sa.Column('report_date', sa.Date(), nullable=False, comment='Date reported.'), @@ -3068,32 +3294,32 @@ def upgrade() -> None: sa.Column('winter_capacity_mw', sa.Float(), nullable=True, comment='The net winter capacity.'), sa.Column('winter_estimated_capability_mw', sa.Float(), nullable=True, comment='EIA estimated winter capacity (in MWh).'), sa.Column('zip_code', sa.Text(), nullable=True, comment='Five digit US Zip Code.'), - sa.ForeignKeyConstraint(['data_maturity'], ['data_maturities.code'], ), - sa.ForeignKeyConstraint(['energy_source_1_transport_1'], ['fuel_transportation_modes_eia.code'], ), - sa.ForeignKeyConstraint(['energy_source_1_transport_2'], ['fuel_transportation_modes_eia.code'], ), - sa.ForeignKeyConstraint(['energy_source_1_transport_3'], ['fuel_transportation_modes_eia.code'], ), - sa.ForeignKeyConstraint(['energy_source_2_transport_1'], ['fuel_transportation_modes_eia.code'], ), - sa.ForeignKeyConstraint(['energy_source_2_transport_2'], ['fuel_transportation_modes_eia.code'], ), - sa.ForeignKeyConstraint(['energy_source_2_transport_3'], ['fuel_transportation_modes_eia.code'], ), - sa.ForeignKeyConstraint(['energy_source_code_1'], ['energy_sources_eia.code'], ), - sa.ForeignKeyConstraint(['energy_source_code_2'], ['energy_sources_eia.code'], ), - sa.ForeignKeyConstraint(['energy_source_code_3'], ['energy_sources_eia.code'], ), - sa.ForeignKeyConstraint(['energy_source_code_4'], ['energy_sources_eia.code'], ), - sa.ForeignKeyConstraint(['energy_source_code_5'], ['energy_sources_eia.code'], ), - sa.ForeignKeyConstraint(['energy_source_code_6'], ['energy_sources_eia.code'], ), - sa.ForeignKeyConstraint(['operational_status_code'], ['operational_status_eia.code'], ), - sa.ForeignKeyConstraint(['planned_energy_source_code_1'], ['energy_sources_eia.code'], ), - sa.ForeignKeyConstraint(['planned_new_prime_mover_code'], ['prime_movers_eia.code'], ), - sa.ForeignKeyConstraint(['plant_id_eia', 'generator_id', 'report_date'], ['generators_eia860.plant_id_eia', 'generators_eia860.generator_id', 'generators_eia860.report_date'], ), - sa.ForeignKeyConstraint(['plant_id_pudl'], ['plants_pudl.plant_id_pudl'], ), - sa.ForeignKeyConstraint(['prime_mover_code'], ['prime_movers_eia.code'], ), - sa.ForeignKeyConstraint(['startup_source_code_1'], ['energy_sources_eia.code'], ), - sa.ForeignKeyConstraint(['startup_source_code_2'], ['energy_sources_eia.code'], ), - sa.ForeignKeyConstraint(['startup_source_code_3'], ['energy_sources_eia.code'], ), - sa.ForeignKeyConstraint(['startup_source_code_4'], ['energy_sources_eia.code'], ), - sa.ForeignKeyConstraint(['utility_id_eia', 'report_date'], ['utilities_eia860.utility_id_eia', 'utilities_eia860.report_date'], ), - sa.ForeignKeyConstraint(['utility_id_pudl'], ['utilities_pudl.utility_id_pudl'], ), - sa.PrimaryKeyConstraint('plant_id_eia', 'generator_id', 'report_date') + sa.ForeignKeyConstraint(['data_maturity'], ['data_maturities.code'], name=op.f('fk_denorm_generators_eia_data_maturity_data_maturities')), + sa.ForeignKeyConstraint(['energy_source_1_transport_1'], ['fuel_transportation_modes_eia.code'], name=op.f('fk_denorm_generators_eia_energy_source_1_transport_1_fuel_transportation_modes_eia')), + sa.ForeignKeyConstraint(['energy_source_1_transport_2'], ['fuel_transportation_modes_eia.code'], name=op.f('fk_denorm_generators_eia_energy_source_1_transport_2_fuel_transportation_modes_eia')), + sa.ForeignKeyConstraint(['energy_source_1_transport_3'], ['fuel_transportation_modes_eia.code'], name=op.f('fk_denorm_generators_eia_energy_source_1_transport_3_fuel_transportation_modes_eia')), + sa.ForeignKeyConstraint(['energy_source_2_transport_1'], ['fuel_transportation_modes_eia.code'], name=op.f('fk_denorm_generators_eia_energy_source_2_transport_1_fuel_transportation_modes_eia')), + sa.ForeignKeyConstraint(['energy_source_2_transport_2'], ['fuel_transportation_modes_eia.code'], name=op.f('fk_denorm_generators_eia_energy_source_2_transport_2_fuel_transportation_modes_eia')), + sa.ForeignKeyConstraint(['energy_source_2_transport_3'], ['fuel_transportation_modes_eia.code'], name=op.f('fk_denorm_generators_eia_energy_source_2_transport_3_fuel_transportation_modes_eia')), + sa.ForeignKeyConstraint(['energy_source_code_1'], ['energy_sources_eia.code'], name=op.f('fk_denorm_generators_eia_energy_source_code_1_energy_sources_eia')), + sa.ForeignKeyConstraint(['energy_source_code_2'], ['energy_sources_eia.code'], name=op.f('fk_denorm_generators_eia_energy_source_code_2_energy_sources_eia')), + sa.ForeignKeyConstraint(['energy_source_code_3'], ['energy_sources_eia.code'], name=op.f('fk_denorm_generators_eia_energy_source_code_3_energy_sources_eia')), + sa.ForeignKeyConstraint(['energy_source_code_4'], ['energy_sources_eia.code'], name=op.f('fk_denorm_generators_eia_energy_source_code_4_energy_sources_eia')), + sa.ForeignKeyConstraint(['energy_source_code_5'], ['energy_sources_eia.code'], name=op.f('fk_denorm_generators_eia_energy_source_code_5_energy_sources_eia')), + sa.ForeignKeyConstraint(['energy_source_code_6'], ['energy_sources_eia.code'], name=op.f('fk_denorm_generators_eia_energy_source_code_6_energy_sources_eia')), + sa.ForeignKeyConstraint(['operational_status_code'], ['operational_status_eia.code'], name=op.f('fk_denorm_generators_eia_operational_status_code_operational_status_eia')), + sa.ForeignKeyConstraint(['planned_energy_source_code_1'], ['energy_sources_eia.code'], name=op.f('fk_denorm_generators_eia_planned_energy_source_code_1_energy_sources_eia')), + sa.ForeignKeyConstraint(['planned_new_prime_mover_code'], ['prime_movers_eia.code'], name=op.f('fk_denorm_generators_eia_planned_new_prime_mover_code_prime_movers_eia')), + sa.ForeignKeyConstraint(['plant_id_eia', 'generator_id', 'report_date'], ['generators_eia860.plant_id_eia', 'generators_eia860.generator_id', 'generators_eia860.report_date'], name=op.f('fk_denorm_generators_eia_plant_id_eia_generators_eia860')), + sa.ForeignKeyConstraint(['plant_id_pudl'], ['plants_pudl.plant_id_pudl'], name=op.f('fk_denorm_generators_eia_plant_id_pudl_plants_pudl')), + sa.ForeignKeyConstraint(['prime_mover_code'], ['prime_movers_eia.code'], name=op.f('fk_denorm_generators_eia_prime_mover_code_prime_movers_eia')), + sa.ForeignKeyConstraint(['startup_source_code_1'], ['energy_sources_eia.code'], name=op.f('fk_denorm_generators_eia_startup_source_code_1_energy_sources_eia')), + sa.ForeignKeyConstraint(['startup_source_code_2'], ['energy_sources_eia.code'], name=op.f('fk_denorm_generators_eia_startup_source_code_2_energy_sources_eia')), + sa.ForeignKeyConstraint(['startup_source_code_3'], ['energy_sources_eia.code'], name=op.f('fk_denorm_generators_eia_startup_source_code_3_energy_sources_eia')), + sa.ForeignKeyConstraint(['startup_source_code_4'], ['energy_sources_eia.code'], name=op.f('fk_denorm_generators_eia_startup_source_code_4_energy_sources_eia')), + sa.ForeignKeyConstraint(['utility_id_eia', 'report_date'], ['utilities_eia860.utility_id_eia', 'utilities_eia860.report_date'], name=op.f('fk_denorm_generators_eia_utility_id_eia_utilities_eia860')), + sa.ForeignKeyConstraint(['utility_id_pudl'], ['utilities_pudl.utility_id_pudl'], name=op.f('fk_denorm_generators_eia_utility_id_pudl_utilities_pudl')), + sa.PrimaryKeyConstraint('plant_id_eia', 'generator_id', 'report_date', name=op.f('pk_denorm_generators_eia')) ) op.create_table('denorm_ownership_eia860', sa.Column('report_date', sa.Date(), nullable=False, comment='Date reported.'), @@ -3106,19 +3332,158 @@ def upgrade() -> None: sa.Column('generator_id', sa.Text(), nullable=False, comment='Generator ID is usually numeric, but sometimes includes letters. Make sure you treat it as a string!'), sa.Column('owner_utility_id_eia', sa.Integer(), nullable=False, comment="EIA-assigned owner's identification number."), sa.Column('owner_name', sa.Text(), nullable=True, comment='Name of owner.'), - sa.Column('owner_state', sa.Enum('VA', 'VI', 'SK', 'PR', 'MT', 'DC', 'VT', 'KY', 'NY', 'KS', 'ME', 'ON', 'WY', 'NU', 'MN', 'WV', 'AL', 'LA', 'MP', 'QC', 'SC', 'SD', 'NE', 'TN', 'NM', 'YT', 'OH', 'AB', 'MO', 'NC', 'NS', 'CA', 'MD', 'NB', 'FL', 'BC', 'ID', 'IN', 'NL', 'GA', 'OK', 'UT', 'IA', 'MI', 'NH', 'NV', 'IL', 'NJ', 'AZ', 'MS', 'PE', 'MA', 'AK', 'CO', 'RI', 'GU', 'ND', 'PA', 'TX', 'HI', 'WI', 'NT', 'OR', 'DE', 'WA', 'MB', 'AS', 'AR', 'CT'), nullable=True, comment='Two letter ISO-3166 political subdivision code.'), + sa.Column('owner_state', sa.Enum('GU', 'WI', 'AZ', 'RI', 'MO', 'SC', 'KS', 'PE', 'UT', 'PA', 'ME', 'NH', 'SD', 'LA', 'NJ', 'DC', 'FL', 'IL', 'MS', 'ND', 'BC', 'NL', 'NU', 'OH', 'CT', 'NC', 'VI', 'SK', 'NY', 'NV', 'NE', 'IA', 'AK', 'HI', 'ON', 'TN', 'VA', 'AR', 'DE', 'AS', 'NS', 'GA', 'TX', 'MA', 'OR', 'QC', 'MT', 'WV', 'IN', 'MP', 'MB', 'MI', 'KY', 'ID', 'WY', 'NM', 'NB', 'AL', 'MD', 'AB', 'WA', 'YT', 'CA', 'CO', 'MN', 'OK', 'PR', 'NT', 'VT'), nullable=True, comment='Two letter ISO-3166 political subdivision code.'), sa.Column('owner_city', sa.Text(), nullable=True, comment='City of owner.'), sa.Column('owner_country', sa.Enum('USA', 'CAN'), nullable=True, comment='Three letter ISO-3166 country code.'), sa.Column('owner_street_address', sa.Text(), nullable=True, comment='Steet address of owner.'), sa.Column('owner_zip_code', sa.Text(), nullable=True, comment='Zip code of owner.'), - sa.Column('fraction_owned', sa.Float(), nullable=True, comment='Proportion of generator ownership.'), + sa.Column('fraction_owned', sa.Float(), nullable=True, comment='Proportion of generator ownership attributable to this utility.'), sa.Column('data_maturity', sa.Text(), nullable=True, comment='Level of maturity of the data record. Some data sources report less-than-final data. PUDL sometimes includes this data, but use at your own risk.'), - sa.ForeignKeyConstraint(['data_maturity'], ['data_maturities.code'], ), - sa.ForeignKeyConstraint(['plant_id_eia', 'generator_id', 'report_date'], ['generators_eia860.plant_id_eia', 'generators_eia860.generator_id', 'generators_eia860.report_date'], ), - sa.ForeignKeyConstraint(['plant_id_pudl'], ['plants_pudl.plant_id_pudl'], ), - sa.ForeignKeyConstraint(['utility_id_eia', 'report_date'], ['utilities_eia860.utility_id_eia', 'utilities_eia860.report_date'], ), - sa.ForeignKeyConstraint(['utility_id_pudl'], ['utilities_pudl.utility_id_pudl'], ), - sa.PrimaryKeyConstraint('report_date', 'plant_id_eia', 'generator_id', 'owner_utility_id_eia') + sa.ForeignKeyConstraint(['data_maturity'], ['data_maturities.code'], name=op.f('fk_denorm_ownership_eia860_data_maturity_data_maturities')), + sa.ForeignKeyConstraint(['plant_id_eia', 'generator_id', 'report_date'], ['generators_eia860.plant_id_eia', 'generators_eia860.generator_id', 'generators_eia860.report_date'], name=op.f('fk_denorm_ownership_eia860_plant_id_eia_generators_eia860')), + sa.ForeignKeyConstraint(['plant_id_pudl'], ['plants_pudl.plant_id_pudl'], name=op.f('fk_denorm_ownership_eia860_plant_id_pudl_plants_pudl')), + sa.ForeignKeyConstraint(['utility_id_eia', 'report_date'], ['utilities_eia860.utility_id_eia', 'utilities_eia860.report_date'], name=op.f('fk_denorm_ownership_eia860_utility_id_eia_utilities_eia860')), + sa.ForeignKeyConstraint(['utility_id_pudl'], ['utilities_pudl.utility_id_pudl'], name=op.f('fk_denorm_ownership_eia860_utility_id_pudl_utilities_pudl')), + sa.PrimaryKeyConstraint('report_date', 'plant_id_eia', 'generator_id', 'owner_utility_id_eia', name=op.f('pk_denorm_ownership_eia860')) + ) + op.create_table('fuel_cost_by_generator_yearly', + sa.Column('report_date', sa.Date(), nullable=False, comment='Date reported.'), + sa.Column('plant_id_eia', sa.Integer(), nullable=False, comment='The unique six-digit facility identification number, also called an ORISPL, assigned by the Energy Information Administration.'), + sa.Column('generator_id', sa.Text(), nullable=False, comment='Generator ID is usually numeric, but sometimes includes letters. Make sure you treat it as a string!'), + sa.Column('unit_id_pudl', sa.Integer(), nullable=True, comment='Dynamically assigned PUDL unit id. WARNING: This ID is not guaranteed to be static long term as the input data and algorithm may evolve over time.'), + sa.Column('plant_name_eia', sa.Text(), nullable=True, comment='Plant name.'), + sa.Column('plant_id_pudl', sa.Integer(), nullable=True, comment='A manually assigned PUDL plant ID. May not be constant over time.'), + sa.Column('utility_id_eia', sa.Integer(), nullable=True, comment='The EIA Utility Identification number.'), + sa.Column('utility_name_eia', sa.Text(), nullable=True, comment='The name of the utility.'), + sa.Column('utility_id_pudl', sa.Integer(), nullable=True, comment='A manually assigned PUDL utility ID. May not be stable over time.'), + sa.Column('fuel_type_count', sa.Integer(), nullable=True, comment='A count of how many different simple energy sources there are associated with a generator.'), + sa.Column('fuel_type_code_pudl', sa.Enum('coal', 'gas', 'hydro', 'nuclear', 'oil', 'other', 'solar', 'waste', 'wind'), nullable=True, comment='Simplified fuel type code used in PUDL'), + sa.Column('fuel_cost_from_eiaapi', sa.Boolean(), nullable=True, comment='Indicates whether the fuel cost was derived from the EIA API.'), + sa.Column('fuel_cost_per_mmbtu', sa.Float(), nullable=True, comment='Average fuel cost per mmBTU of heat content in nominal USD.'), + sa.Column('heat_rate_mmbtu_mwh', sa.Float(), nullable=True, comment='Fuel content per unit of electricity generated. Coming from MCOE calculation.'), + sa.Column('fuel_cost_per_mwh', sa.Float(), nullable=True, comment='Derived from MCOE, a unit level value. Average fuel cost per MWh of heat content in nominal USD.'), + sa.ForeignKeyConstraint(['plant_id_eia', 'generator_id', 'report_date'], ['generators_eia860.plant_id_eia', 'generators_eia860.generator_id', 'generators_eia860.report_date'], name=op.f('fk_fuel_cost_by_generator_yearly_plant_id_eia_generators_eia860')), + sa.ForeignKeyConstraint(['plant_id_pudl'], ['plants_pudl.plant_id_pudl'], name=op.f('fk_fuel_cost_by_generator_yearly_plant_id_pudl_plants_pudl')), + sa.ForeignKeyConstraint(['utility_id_eia', 'report_date'], ['utilities_eia860.utility_id_eia', 'utilities_eia860.report_date'], name=op.f('fk_fuel_cost_by_generator_yearly_utility_id_eia_utilities_eia860')), + sa.ForeignKeyConstraint(['utility_id_pudl'], ['utilities_pudl.utility_id_pudl'], name=op.f('fk_fuel_cost_by_generator_yearly_utility_id_pudl_utilities_pudl')), + sa.PrimaryKeyConstraint('report_date', 'plant_id_eia', 'generator_id', name=op.f('pk_fuel_cost_by_generator_yearly')) + ) + op.create_table('generation_fuel_by_generator_energy_source_owner_yearly_eia923', + sa.Column('report_date', sa.Date(), nullable=False, comment='Date reported.'), + sa.Column('plant_id_eia', sa.Integer(), nullable=False, comment='The unique six-digit facility identification number, also called an ORISPL, assigned by the Energy Information Administration.'), + sa.Column('generator_id', sa.Text(), nullable=False, comment='Generator ID is usually numeric, but sometimes includes letters. Make sure you treat it as a string!'), + sa.Column('prime_mover_code', sa.Text(), nullable=False, comment='Code for the type of prime mover (e.g. CT, CG)'), + sa.Column('energy_source_code', sa.Text(), nullable=False, comment='A 2-3 letter code indicating the energy source (e.g. fuel type) associated with the record.'), + sa.Column('utility_id_eia', sa.Integer(), nullable=False, comment='The EIA Utility Identification number.'), + sa.Column('ownership_record_type', sa.Enum('owned', 'total'), nullable=False, comment='Whether each generator record is for one owner or represents a total of all ownerships.'), + sa.Column('fraction_owned', sa.Float(), nullable=True, comment='Proportion of generator ownership attributable to this utility.'), + sa.Column('capacity_mw', sa.Float(), nullable=True, comment='Total installed (nameplate) capacity, in megawatts.'), + sa.Column('energy_source_code_num', sa.Enum('energy_source_code_1', 'energy_source_code_2', 'energy_source_code_3', 'energy_source_code_4', 'energy_source_code_5', 'energy_source_code_6', 'energy_source_code_7', 'energy_source_code_8'), nullable=True, comment='Name of the energy_source_code_N column that this energy source code was reported in for the generator referenced in the same record.'), + sa.Column('net_generation_mwh', sa.Float(), nullable=True, comment='Net electricity generation for the specified period in megawatt-hours (MWh).'), + sa.Column('fuel_consumed_mmbtu', sa.Float(), nullable=True, comment='Total consumption of fuel in physical unit, year to date. Note: this is the total quantity consumed for both electricity and, in the case of combined heat and power plants, process steam production.'), + sa.Column('fuel_consumed_for_electricity_mmbtu', sa.Float(), nullable=True, comment='Total consumption of fuel to produce electricity, in physical unit, year to date.'), + sa.ForeignKeyConstraint(['energy_source_code'], ['energy_sources_eia.code'], name=op.f('fk_generation_fuel_by_generator_energy_source_owner_yearly_eia923_energy_source_code_energy_sources_eia')), + sa.ForeignKeyConstraint(['plant_id_eia', 'generator_id', 'report_date'], ['generators_eia860.plant_id_eia', 'generators_eia860.generator_id', 'generators_eia860.report_date'], name=op.f('fk_generation_fuel_by_generator_energy_source_owner_yearly_eia923_plant_id_eia_generators_eia860')), + sa.ForeignKeyConstraint(['prime_mover_code'], ['prime_movers_eia.code'], name=op.f('fk_generation_fuel_by_generator_energy_source_owner_yearly_eia923_prime_mover_code_prime_movers_eia')), + sa.PrimaryKeyConstraint('report_date', 'plant_id_eia', 'generator_id', 'prime_mover_code', 'energy_source_code', 'utility_id_eia', 'ownership_record_type', name=op.f('pk_generation_fuel_by_generator_energy_source_owner_yearly_eia923')) + ) + op.create_table('generation_fuel_by_generator_energy_source_yearly_eia923', + sa.Column('report_date', sa.Date(), nullable=False, comment='Date reported.'), + sa.Column('plant_id_eia', sa.Integer(), nullable=False, comment='The unique six-digit facility identification number, also called an ORISPL, assigned by the Energy Information Administration.'), + sa.Column('generator_id', sa.Text(), nullable=False, comment='Generator ID is usually numeric, but sometimes includes letters. Make sure you treat it as a string!'), + sa.Column('prime_mover_code', sa.Text(), nullable=False, comment='Code for the type of prime mover (e.g. CT, CG)'), + sa.Column('energy_source_code', sa.Text(), nullable=False, comment='A 2-3 letter code indicating the energy source (e.g. fuel type) associated with the record.'), + sa.Column('energy_source_code_num', sa.Enum('energy_source_code_1', 'energy_source_code_2', 'energy_source_code_3', 'energy_source_code_4', 'energy_source_code_5', 'energy_source_code_6', 'energy_source_code_7', 'energy_source_code_8'), nullable=True, comment='Name of the energy_source_code_N column that this energy source code was reported in for the generator referenced in the same record.'), + sa.Column('net_generation_mwh', sa.Float(), nullable=True, comment='Net electricity generation for the specified period in megawatt-hours (MWh).'), + sa.Column('fuel_consumed_mmbtu', sa.Float(), nullable=True, comment='Total consumption of fuel in physical unit, year to date. Note: this is the total quantity consumed for both electricity and, in the case of combined heat and power plants, process steam production.'), + sa.Column('fuel_consumed_for_electricity_mmbtu', sa.Float(), nullable=True, comment='Total consumption of fuel to produce electricity, in physical unit, year to date.'), + sa.ForeignKeyConstraint(['energy_source_code'], ['energy_sources_eia.code'], name=op.f('fk_generation_fuel_by_generator_energy_source_yearly_eia923_energy_source_code_energy_sources_eia')), + sa.ForeignKeyConstraint(['plant_id_eia', 'generator_id', 'report_date'], ['generators_eia860.plant_id_eia', 'generators_eia860.generator_id', 'generators_eia860.report_date'], name=op.f('fk_generation_fuel_by_generator_energy_source_yearly_eia923_plant_id_eia_generators_eia860')), + sa.ForeignKeyConstraint(['prime_mover_code'], ['prime_movers_eia.code'], name=op.f('fk_generation_fuel_by_generator_energy_source_yearly_eia923_prime_mover_code_prime_movers_eia')), + sa.PrimaryKeyConstraint('report_date', 'plant_id_eia', 'generator_id', 'prime_mover_code', 'energy_source_code', name=op.f('pk_generation_fuel_by_generator_energy_source_yearly_eia923')) + ) + op.create_table('generation_fuel_by_generator_yearly_eia923', + sa.Column('report_date', sa.Date(), nullable=False, comment='Date reported.'), + sa.Column('plant_id_eia', sa.Integer(), nullable=False, comment='The unique six-digit facility identification number, also called an ORISPL, assigned by the Energy Information Administration.'), + sa.Column('plant_id_pudl', sa.Integer(), nullable=True, comment='A manually assigned PUDL plant ID. May not be constant over time.'), + sa.Column('plant_name_eia', sa.Text(), nullable=True, comment='Plant name.'), + sa.Column('utility_id_eia', sa.Integer(), nullable=True, comment='The EIA Utility Identification number.'), + sa.Column('utility_id_pudl', sa.Integer(), nullable=True, comment='A manually assigned PUDL utility ID. May not be stable over time.'), + sa.Column('utility_name_eia', sa.Text(), nullable=True, comment='The name of the utility.'), + sa.Column('generator_id', sa.Text(), nullable=False, comment='Generator ID is usually numeric, but sometimes includes letters. Make sure you treat it as a string!'), + sa.Column('unit_id_pudl', sa.Integer(), nullable=True, comment='Dynamically assigned PUDL unit id. WARNING: This ID is not guaranteed to be static long term as the input data and algorithm may evolve over time.'), + sa.Column('fuel_consumed_for_electricity_mmbtu', sa.Float(), nullable=True, comment='Total consumption of fuel to produce electricity, in physical unit, year to date.'), + sa.Column('fuel_consumed_mmbtu', sa.Float(), nullable=True, comment='Total consumption of fuel in physical unit, year to date. Note: this is the total quantity consumed for both electricity and, in the case of combined heat and power plants, process steam production.'), + sa.Column('net_generation_mwh', sa.Float(), nullable=True, comment='Net electricity generation for the specified period in megawatt-hours (MWh).'), + sa.ForeignKeyConstraint(['plant_id_eia', 'generator_id', 'report_date'], ['generators_eia860.plant_id_eia', 'generators_eia860.generator_id', 'generators_eia860.report_date'], name=op.f('fk_generation_fuel_by_generator_yearly_eia923_plant_id_eia_generators_eia860')), + sa.ForeignKeyConstraint(['plant_id_pudl'], ['plants_pudl.plant_id_pudl'], name=op.f('fk_generation_fuel_by_generator_yearly_eia923_plant_id_pudl_plants_pudl')), + sa.ForeignKeyConstraint(['utility_id_eia', 'report_date'], ['utilities_eia860.utility_id_eia', 'utilities_eia860.report_date'], name=op.f('fk_generation_fuel_by_generator_yearly_eia923_utility_id_eia_utilities_eia860')), + sa.ForeignKeyConstraint(['utility_id_pudl'], ['utilities_pudl.utility_id_pudl'], name=op.f('fk_generation_fuel_by_generator_yearly_eia923_utility_id_pudl_utilities_pudl')), + sa.PrimaryKeyConstraint('report_date', 'plant_id_eia', 'generator_id', name=op.f('pk_generation_fuel_by_generator_yearly_eia923')) + ) + op.create_table('heat_rate_by_generator_yearly', + sa.Column('report_date', sa.Date(), nullable=False, comment='Date reported.'), + sa.Column('plant_id_eia', sa.Integer(), nullable=False, comment='The unique six-digit facility identification number, also called an ORISPL, assigned by the Energy Information Administration.'), + sa.Column('unit_id_pudl', sa.Integer(), nullable=True, comment='Dynamically assigned PUDL unit id. WARNING: This ID is not guaranteed to be static long term as the input data and algorithm may evolve over time.'), + sa.Column('generator_id', sa.Text(), nullable=False, comment='Generator ID is usually numeric, but sometimes includes letters. Make sure you treat it as a string!'), + sa.Column('heat_rate_mmbtu_mwh', sa.Float(), nullable=True, comment='Fuel content per unit of electricity generated. Coming from MCOE calculation.'), + sa.Column('fuel_type_code_pudl', sa.Enum('coal', 'gas', 'hydro', 'nuclear', 'oil', 'other', 'solar', 'waste', 'wind'), nullable=True, comment='Simplified fuel type code used in PUDL'), + sa.Column('fuel_type_count', sa.Integer(), nullable=True, comment='A count of how many different simple energy sources there are associated with a generator.'), + sa.Column('prime_mover_code', sa.Text(), nullable=True, comment='Code for the type of prime mover (e.g. CT, CG)'), + sa.ForeignKeyConstraint(['plant_id_eia', 'generator_id', 'report_date'], ['generators_eia860.plant_id_eia', 'generators_eia860.generator_id', 'generators_eia860.report_date'], name=op.f('fk_heat_rate_by_generator_yearly_plant_id_eia_generators_eia860')), + sa.ForeignKeyConstraint(['prime_mover_code'], ['prime_movers_eia.code'], name=op.f('fk_heat_rate_by_generator_yearly_prime_mover_code_prime_movers_eia')), + sa.PrimaryKeyConstraint('report_date', 'plant_id_eia', 'generator_id', name=op.f('pk_heat_rate_by_generator_yearly')) + ) + op.create_table('mcoe_generators_yearly', + sa.Column('plant_id_eia', sa.Integer(), nullable=False, comment='The unique six-digit facility identification number, also called an ORISPL, assigned by the Energy Information Administration.'), + sa.Column('generator_id', sa.Text(), nullable=False, comment='Generator ID is usually numeric, but sometimes includes letters. Make sure you treat it as a string!'), + sa.Column('report_date', sa.Date(), nullable=False, comment='Date reported.'), + sa.Column('unit_id_pudl', sa.Integer(), nullable=True, comment='Dynamically assigned PUDL unit id. WARNING: This ID is not guaranteed to be static long term as the input data and algorithm may evolve over time.'), + sa.Column('plant_id_pudl', sa.Integer(), nullable=True, comment='A manually assigned PUDL plant ID. May not be constant over time.'), + sa.Column('plant_name_eia', sa.Text(), nullable=True, comment='Plant name.'), + sa.Column('utility_id_eia', sa.Integer(), nullable=True, comment='The EIA Utility Identification number.'), + sa.Column('utility_id_pudl', sa.Integer(), nullable=True, comment='A manually assigned PUDL utility ID. May not be stable over time.'), + sa.Column('utility_name_eia', sa.Text(), nullable=True, comment='The name of the utility.'), + sa.Column('technology_description', sa.Text(), nullable=True, comment='High level description of the technology used by the generator to produce electricity.'), + sa.Column('energy_source_code_1', sa.Text(), nullable=True, comment='The code representing the most predominant type of energy that fuels the generator.'), + sa.Column('prime_mover_code', sa.Text(), nullable=True, comment='Code for the type of prime mover (e.g. CT, CG)'), + sa.Column('generator_operating_date', sa.Date(), nullable=True, comment='Date the generator began commercial operation.'), + sa.Column('generator_retirement_date', sa.Date(), nullable=True, comment='Date of the scheduled or effected retirement of the generator.'), + sa.Column('operational_status', sa.Text(), nullable=True, comment='The operating status of the asset. For generators this is based on which tab the generator was listed in in EIA 860.'), + sa.Column('capacity_mw', sa.Float(), nullable=True, comment='Total installed (nameplate) capacity, in megawatts.'), + sa.Column('fuel_type_code_pudl', sa.Enum('coal', 'gas', 'hydro', 'nuclear', 'oil', 'other', 'solar', 'waste', 'wind'), nullable=True, comment='Simplified fuel type code used in PUDL'), + sa.Column('planned_generator_retirement_date', sa.Date(), nullable=True, comment='Planned effective date of the scheduled retirement of the generator.'), + sa.Column('capacity_factor', sa.Float(), nullable=True, comment='Fraction of potential generation that was actually reported for a plant part.'), + sa.Column('fuel_cost_from_eiaapi', sa.Boolean(), nullable=True, comment='Indicates whether the fuel cost was derived from the EIA API.'), + sa.Column('fuel_cost_per_mmbtu', sa.Float(), nullable=True, comment='Average fuel cost per mmBTU of heat content in nominal USD.'), + sa.Column('fuel_cost_per_mwh', sa.Float(), nullable=True, comment='Derived from MCOE, a unit level value. Average fuel cost per MWh of heat content in nominal USD.'), + sa.Column('heat_rate_mmbtu_mwh', sa.Float(), nullable=True, comment='Fuel content per unit of electricity generated. Coming from MCOE calculation.'), + sa.Column('net_generation_mwh', sa.Float(), nullable=True, comment='Net electricity generation for the specified period in megawatt-hours (MWh).'), + sa.Column('total_fuel_cost', sa.Float(), nullable=True, comment='Total annual reported fuel costs for the plant part. Includes costs from all fuels.'), + sa.Column('total_mmbtu', sa.Float(), nullable=True, comment='Total annual heat content of fuel consumed by a plant part record in the plant parts list.'), + sa.ForeignKeyConstraint(['energy_source_code_1'], ['energy_sources_eia.code'], name=op.f('fk_mcoe_generators_yearly_energy_source_code_1_energy_sources_eia')), + sa.ForeignKeyConstraint(['plant_id_eia', 'generator_id', 'report_date'], ['generators_eia860.plant_id_eia', 'generators_eia860.generator_id', 'generators_eia860.report_date'], name=op.f('fk_mcoe_generators_yearly_plant_id_eia_generators_eia860')), + sa.ForeignKeyConstraint(['plant_id_pudl'], ['plants_pudl.plant_id_pudl'], name=op.f('fk_mcoe_generators_yearly_plant_id_pudl_plants_pudl')), + sa.ForeignKeyConstraint(['prime_mover_code'], ['prime_movers_eia.code'], name=op.f('fk_mcoe_generators_yearly_prime_mover_code_prime_movers_eia')), + sa.ForeignKeyConstraint(['utility_id_eia', 'report_date'], ['utilities_eia860.utility_id_eia', 'utilities_eia860.report_date'], name=op.f('fk_mcoe_generators_yearly_utility_id_eia_utilities_eia860')), + sa.ForeignKeyConstraint(['utility_id_pudl'], ['utilities_pudl.utility_id_pudl'], name=op.f('fk_mcoe_generators_yearly_utility_id_pudl_utilities_pudl')), + sa.PrimaryKeyConstraint('report_date', 'plant_id_eia', 'generator_id', name=op.f('pk_mcoe_generators_yearly')) + ) + op.create_table('mcoe_yearly', + sa.Column('plant_id_eia', sa.Integer(), nullable=False, comment='The unique six-digit facility identification number, also called an ORISPL, assigned by the Energy Information Administration.'), + sa.Column('generator_id', sa.Text(), nullable=False, comment='Generator ID is usually numeric, but sometimes includes letters. Make sure you treat it as a string!'), + sa.Column('unit_id_pudl', sa.Integer(), nullable=True, comment='Dynamically assigned PUDL unit id. WARNING: This ID is not guaranteed to be static long term as the input data and algorithm may evolve over time.'), + sa.Column('report_date', sa.Date(), nullable=False, comment='Date reported.'), + sa.Column('capacity_factor', sa.Float(), nullable=True, comment='Fraction of potential generation that was actually reported for a plant part.'), + sa.Column('fuel_cost_from_eiaapi', sa.Boolean(), nullable=True, comment='Indicates whether the fuel cost was derived from the EIA API.'), + sa.Column('fuel_cost_per_mmbtu', sa.Float(), nullable=True, comment='Average fuel cost per mmBTU of heat content in nominal USD.'), + sa.Column('fuel_cost_per_mwh', sa.Float(), nullable=True, comment='Derived from MCOE, a unit level value. Average fuel cost per MWh of heat content in nominal USD.'), + sa.Column('heat_rate_mmbtu_mwh', sa.Float(), nullable=True, comment='Fuel content per unit of electricity generated. Coming from MCOE calculation.'), + sa.Column('net_generation_mwh', sa.Float(), nullable=True, comment='Net electricity generation for the specified period in megawatt-hours (MWh).'), + sa.Column('total_fuel_cost', sa.Float(), nullable=True, comment='Total annual reported fuel costs for the plant part. Includes costs from all fuels.'), + sa.Column('total_mmbtu', sa.Float(), nullable=True, comment='Total annual heat content of fuel consumed by a plant part record in the plant parts list.'), + sa.ForeignKeyConstraint(['plant_id_eia', 'generator_id', 'report_date'], ['generators_eia860.plant_id_eia', 'generators_eia860.generator_id', 'generators_eia860.report_date'], name=op.f('fk_mcoe_yearly_plant_id_eia_generators_eia860')), + sa.PrimaryKeyConstraint('report_date', 'plant_id_eia', 'generator_id', name=op.f('pk_mcoe_yearly')) ) op.create_table('ownership_eia860', sa.Column('report_date', sa.Date(), nullable=False, comment='Date reported.'), @@ -3127,17 +3492,17 @@ def upgrade() -> None: sa.Column('generator_id', sa.Text(), nullable=False, comment='Generator ID is usually numeric, but sometimes includes letters. Make sure you treat it as a string!'), sa.Column('owner_utility_id_eia', sa.Integer(), nullable=False, comment="EIA-assigned owner's identification number."), sa.Column('owner_name', sa.Text(), nullable=True, comment='Name of owner.'), - sa.Column('owner_state', sa.Enum('VA', 'VI', 'SK', 'PR', 'MT', 'DC', 'VT', 'KY', 'NY', 'KS', 'ME', 'ON', 'WY', 'NU', 'MN', 'WV', 'AL', 'LA', 'MP', 'QC', 'SC', 'SD', 'NE', 'TN', 'NM', 'YT', 'OH', 'AB', 'MO', 'NC', 'NS', 'CA', 'MD', 'NB', 'FL', 'BC', 'ID', 'IN', 'NL', 'GA', 'OK', 'UT', 'IA', 'MI', 'NH', 'NV', 'IL', 'NJ', 'AZ', 'MS', 'PE', 'MA', 'AK', 'CO', 'RI', 'GU', 'ND', 'PA', 'TX', 'HI', 'WI', 'NT', 'OR', 'DE', 'WA', 'MB', 'AS', 'AR', 'CT'), nullable=True, comment='Two letter ISO-3166 political subdivision code.'), + sa.Column('owner_state', sa.Enum('GU', 'WI', 'AZ', 'RI', 'MO', 'SC', 'KS', 'PE', 'UT', 'PA', 'ME', 'NH', 'SD', 'LA', 'NJ', 'DC', 'FL', 'IL', 'MS', 'ND', 'BC', 'NL', 'NU', 'OH', 'CT', 'NC', 'VI', 'SK', 'NY', 'NV', 'NE', 'IA', 'AK', 'HI', 'ON', 'TN', 'VA', 'AR', 'DE', 'AS', 'NS', 'GA', 'TX', 'MA', 'OR', 'QC', 'MT', 'WV', 'IN', 'MP', 'MB', 'MI', 'KY', 'ID', 'WY', 'NM', 'NB', 'AL', 'MD', 'AB', 'WA', 'YT', 'CA', 'CO', 'MN', 'OK', 'PR', 'NT', 'VT'), nullable=True, comment='Two letter ISO-3166 political subdivision code.'), sa.Column('owner_city', sa.Text(), nullable=True, comment='City of owner.'), sa.Column('owner_country', sa.Enum('USA', 'CAN'), nullable=True, comment='Three letter ISO-3166 country code.'), sa.Column('owner_street_address', sa.Text(), nullable=True, comment='Steet address of owner.'), sa.Column('owner_zip_code', sa.Text(), nullable=True, comment='Zip code of owner.'), - sa.Column('fraction_owned', sa.Float(), nullable=True, comment='Proportion of generator ownership.'), + sa.Column('fraction_owned', sa.Float(), nullable=True, comment='Proportion of generator ownership attributable to this utility.'), sa.Column('data_maturity', sa.Text(), nullable=True, comment='Level of maturity of the data record. Some data sources report less-than-final data. PUDL sometimes includes this data, but use at your own risk.'), - sa.ForeignKeyConstraint(['data_maturity'], ['data_maturities.code'], ), - sa.ForeignKeyConstraint(['plant_id_eia', 'generator_id', 'report_date'], ['generators_eia860.plant_id_eia', 'generators_eia860.generator_id', 'generators_eia860.report_date'], ), - sa.ForeignKeyConstraint(['utility_id_eia', 'report_date'], ['utilities_eia860.utility_id_eia', 'utilities_eia860.report_date'], ), - sa.PrimaryKeyConstraint('report_date', 'plant_id_eia', 'generator_id', 'owner_utility_id_eia') + sa.ForeignKeyConstraint(['data_maturity'], ['data_maturities.code'], name=op.f('fk_ownership_eia860_data_maturity_data_maturities')), + sa.ForeignKeyConstraint(['plant_id_eia', 'generator_id', 'report_date'], ['generators_eia860.plant_id_eia', 'generators_eia860.generator_id', 'generators_eia860.report_date'], name=op.f('fk_ownership_eia860_plant_id_eia_generators_eia860')), + sa.ForeignKeyConstraint(['utility_id_eia', 'report_date'], ['utilities_eia860.utility_id_eia', 'utilities_eia860.report_date'], name=op.f('fk_ownership_eia860_utility_id_eia_utilities_eia860')), + sa.PrimaryKeyConstraint('report_date', 'plant_id_eia', 'generator_id', 'owner_utility_id_eia', name=op.f('pk_ownership_eia860')) ) # ### end Alembic commands ### @@ -3145,11 +3510,19 @@ def upgrade() -> None: def downgrade() -> None: # ### commands auto generated by Alembic - please adjust! ### op.drop_table('ownership_eia860') + op.drop_table('mcoe_yearly') + op.drop_table('mcoe_generators_yearly') + op.drop_table('heat_rate_by_generator_yearly') + op.drop_table('generation_fuel_by_generator_yearly_eia923') + op.drop_table('generation_fuel_by_generator_energy_source_yearly_eia923') + op.drop_table('generation_fuel_by_generator_energy_source_owner_yearly_eia923') + op.drop_table('fuel_cost_by_generator_yearly') op.drop_table('denorm_ownership_eia860') op.drop_table('denorm_generators_eia') op.drop_table('denorm_generation_yearly_eia923') op.drop_table('denorm_boilers_eia') op.drop_table('denorm_boiler_fuel_yearly_eia923') + op.drop_table('capacity_factor_by_generator_yearly') op.drop_table('boiler_stack_flue_assn_eia860') op.drop_table('boiler_generator_assn_eia860') op.drop_table('boiler_emissions_control_equipment_assn_eia860') @@ -3158,6 +3531,7 @@ def downgrade() -> None: op.drop_table('plants_small_ferc1') op.drop_table('plants_pumped_storage_ferc1') op.drop_table('plants_hydro_ferc1') + op.drop_table('heat_rate_by_unit_yearly') op.drop_table('generators_eia860') op.drop_table('fuel_ferc1') op.drop_table('denorm_plants_utilities_ferc1') @@ -3183,9 +3557,15 @@ def downgrade() -> None: op.drop_table('plants_eia860') op.drop_table('plant_in_service_ferc1') op.drop_table('other_regulatory_liabilities_ferc1') + op.drop_table('mcoe_monthly') + op.drop_table('mcoe_generators_monthly') op.drop_table('income_statement_ferc1') + op.drop_table('heat_rate_by_generator_monthly') + op.drop_table('generation_fuel_by_generator_monthly_eia923') + op.drop_table('generation_fuel_by_generator_energy_source_monthly_eia923') op.drop_table('generation_eia923') op.drop_table('fuel_receipts_costs_eia923') + op.drop_table('fuel_cost_by_generator_monthly') op.drop_table('epacamd_eia') op.drop_table('electricity_sales_by_rate_schedule_ferc1') op.drop_table('electric_plant_depreciation_functional_ferc1') @@ -3219,6 +3599,7 @@ def downgrade() -> None: op.drop_table('denorm_balance_sheet_liabilities_ferc1') op.drop_table('denorm_balance_sheet_assets_ferc1') op.drop_table('cash_flow_ferc1') + op.drop_table('capacity_factor_by_generator_monthly') op.drop_table('boiler_fuel_eia923') op.drop_table('balance_sheet_liabilities_ferc1') op.drop_table('balance_sheet_assets_ferc1') @@ -3229,6 +3610,7 @@ def downgrade() -> None: op.drop_table('utilities_ferc1') op.drop_table('utilities_eia860') op.drop_table('utilities_eia') + op.drop_table('summarized_demand_ferc714') op.drop_table('service_territory_eia861') op.drop_table('sales_eia861') op.drop_table('reliability_eia861') @@ -3240,10 +3622,12 @@ def downgrade() -> None: op.drop_table('net_metering_misc_eia861') op.drop_table('net_metering_customer_fuel_class_eia861') op.drop_table('mergers_eia861') + op.drop_table('heat_rate_by_unit_monthly') op.drop_table('green_pricing_eia861') op.drop_table('generators_entity_eia') op.drop_table('generation_fuel_nuclear_eia923') op.drop_table('generation_fuel_eia923') + op.drop_table('fipsified_respondents_ferc714') op.drop_table('energy_efficiency_eia861') op.drop_table('emissions_control_equipment_eia860') op.drop_table('dynamic_pricing_eia861') @@ -3277,6 +3661,7 @@ def downgrade() -> None: op.drop_table('reporting_frequencies_eia') op.drop_table('regulations_eia') op.drop_table('prime_movers_eia') + op.drop_table('predicted_state_hourly_demand') op.drop_table('power_purchase_types_ferc1') op.drop_table('political_subdivisions') op.drop_table('plants_pudl') @@ -3301,6 +3686,8 @@ def downgrade() -> None: op.drop_table('datasources') op.drop_table('data_maturities') op.drop_table('contract_types_eia') + op.drop_table('compiled_geometry_utility_eia861') + op.drop_table('compiled_geometry_balancing_authority_eia861') op.drop_table('coalmine_types_eia') op.drop_table('boiler_types_eia') op.drop_table('boiler_status_eia') diff --git a/migrations/versions/1ec25c296a6d_add_service_terrtory_ferc714_state_.py b/migrations/versions/1ec25c296a6d_add_service_terrtory_ferc714_state_.py deleted file mode 100644 index f3b0a370cd..0000000000 --- a/migrations/versions/1ec25c296a6d_add_service_terrtory_ferc714_state_.py +++ /dev/null @@ -1,99 +0,0 @@ -"""Add service terrtory, FERC714, state demand assets - -Revision ID: 1ec25c296a6d -Revises: 88d9201ae4c4 -Create Date: 2023-06-16 09:33:08.254754 - -""" -from alembic import op -import sqlalchemy as sa -from sqlalchemy.dialects import sqlite - -# revision identifiers, used by Alembic. -revision = '1ec25c296a6d' -down_revision = 'e608f95a3b78' -branch_labels = None -depends_on = None - - -def upgrade() -> None: - # ### commands auto generated by Alembic - please adjust! ### - op.create_table('compiled_geometry_balancing_authority_eia861', - sa.Column('county_id_fips', sa.Text(), nullable=False, comment='County ID from the Federal Information Processing Standard Publication 6-4.'), - sa.Column('county_name_census', sa.Text(), nullable=True, comment='County name as specified in Census DP1 Data.'), - sa.Column('population', sa.Float(), nullable=True, comment='County population, sourced from Census DP1 data.'), - sa.Column('area_km2', sa.Float(), nullable=True, comment='County area in km2.'), - sa.Column('report_date', sa.Date(), nullable=False, comment='Date reported.'), - sa.Column('balancing_authority_id_eia', sa.Integer(), nullable=False, comment='EIA balancing authority ID. This is often (but not always!) the same as the utility ID associated with the same legal entity.'), - sa.Column('state', sa.Text(), nullable=True, comment='Two letter US state abbreviation.'), - sa.Column('county', sa.Text(), nullable=False, comment='County name.'), - sa.Column('state_id_fips', sa.Text(), nullable=True, comment='Two digit state FIPS code.'), - sa.PrimaryKeyConstraint('balancing_authority_id_eia', 'report_date', 'county_id_fips', 'county') - ) - op.create_table('compiled_geometry_utility_eia861', - sa.Column('county_id_fips', sa.Text(), nullable=False, comment='County ID from the Federal Information Processing Standard Publication 6-4.'), - sa.Column('county_name_census', sa.Text(), nullable=True, comment='County name as specified in Census DP1 Data.'), - sa.Column('population', sa.Float(), nullable=True, comment='County population, sourced from Census DP1 data.'), - sa.Column('area_km2', sa.Float(), nullable=True, comment='County area in km2.'), - sa.Column('report_date', sa.Date(), nullable=False, comment='Date reported.'), - sa.Column('utility_id_eia', sa.Integer(), nullable=False, comment='The EIA Utility Identification number.'), - sa.Column('state', sa.Text(), nullable=True, comment='Two letter US state abbreviation.'), - sa.Column('county', sa.Text(), nullable=True, comment='County name.'), - sa.Column('state_id_fips', sa.Text(), nullable=True, comment='Two digit state FIPS code.'), - sa.PrimaryKeyConstraint('utility_id_eia', 'report_date', 'county_id_fips') - ) - op.create_table('predicted_state_hourly_demand', - sa.Column('state_id_fips', sa.Text(), nullable=False, comment='Two digit state FIPS code.'), - sa.Column('utc_datetime', sqlite.DATETIME(), nullable=False), - sa.Column('demand_mwh', sa.Float(), nullable=True), - sa.Column('scaled_demand_mwh', sa.Float(), nullable=True, comment='Estimated electricity demand scaled by the total sales within a state.'), - sa.PrimaryKeyConstraint('state_id_fips', 'utc_datetime') - ) - op.create_table('fipsified_respondents_ferc714', - sa.Column('eia_code', sa.Integer(), nullable=True), - sa.Column('respondent_type', sa.Enum('utility', 'balancing_authority'), nullable=True), - sa.Column('respondent_id_ferc714', sa.Integer(), nullable=True), - sa.Column('respondent_name_ferc714', sa.Text(), nullable=True), - sa.Column('report_date', sa.Date(), nullable=True, comment='Date reported.'), - sa.Column('balancing_authority_id_eia', sa.Integer(), nullable=True, comment='EIA balancing authority ID. This is often (but not always!) the same as the utility ID associated with the same legal entity.'), - sa.Column('balancing_authority_code_eia', sa.Text(), nullable=True, comment='EIA short code identifying a balancing authority.'), - sa.Column('balancing_authority_name_eia', sa.Text(), nullable=True, comment='Name of the balancing authority.'), - sa.Column('utility_id_eia', sa.Integer(), nullable=True, comment='The EIA Utility Identification number.'), - sa.Column('utility_name_eia', sa.Text(), nullable=True, comment='The name of the utility.'), - sa.Column('state', sa.Text(), nullable=True, comment='Two letter US state abbreviation.'), - sa.Column('county', sa.Text(), nullable=True, comment='County name.'), - sa.Column('state_id_fips', sa.Text(), nullable=True, comment='Two digit state FIPS code.'), - sa.Column('county_id_fips', sa.Text(), nullable=True, comment='County ID from the Federal Information Processing Standard Publication 6-4.'), - sa.ForeignKeyConstraint(['respondent_id_ferc714'], ['respondent_id_ferc714.respondent_id_ferc714'], ) - ) - op.create_table('summarized_demand_ferc714', - sa.Column('report_date', sa.Date(), nullable=False, comment='Date reported.'), - sa.Column('respondent_id_ferc714', sa.Integer(), nullable=False), - sa.Column('demand_annual_mwh', sa.Float(), nullable=True), - sa.Column('population', sa.Float(), nullable=True, comment='County population, sourced from Census DP1 data.'), - sa.Column('area_km2', sa.Float(), nullable=True, comment='County area in km2.'), - sa.Column('population_density_km2', sa.Float(), nullable=True, comment='Average population per sq. km area of a service territory.'), - sa.Column('demand_annual_per_capita_mwh', sa.Float(), nullable=True, comment='Per-capita annual demand, averaged using Census county-level population estimates.'), - sa.Column('demand_density_mwh_km2', sa.Float(), nullable=True, comment='Annual demand per km2 of a given service territory.'), - sa.Column('eia_code', sa.Integer(), nullable=True), - sa.Column('respondent_type', sa.Enum('utility', 'balancing_authority'), nullable=True), - sa.Column('respondent_name_ferc714', sa.Text(), nullable=True), - sa.Column('balancing_authority_id_eia', sa.Integer(), nullable=True, comment='EIA balancing authority ID. This is often (but not always!) the same as the utility ID associated with the same legal entity.'), - sa.Column('balancing_authority_code_eia', sa.Text(), nullable=True, comment='EIA short code identifying a balancing authority.'), - sa.Column('balancing_authority_name_eia', sa.Text(), nullable=True, comment='Name of the balancing authority.'), - sa.Column('utility_id_eia', sa.Integer(), nullable=True, comment='The EIA Utility Identification number.'), - sa.Column('utility_name_eia', sa.Text(), nullable=True, comment='The name of the utility.'), - sa.ForeignKeyConstraint(['respondent_id_ferc714'], ['respondent_id_ferc714.respondent_id_ferc714'], ), - sa.PrimaryKeyConstraint('respondent_id_ferc714', 'report_date') - ) - # ### end Alembic commands ### - - -def downgrade() -> None: - # ### commands auto generated by Alembic - please adjust! ### - op.drop_table('summarized_demand_ferc714') - op.drop_table('fipsified_respondents_ferc714') - op.drop_table('predicted_state_hourly_demand') - op.drop_table('compiled_geometry_utility_eia861') - op.drop_table('compiled_geometry_balancing_authority_eia861') - # ### end Alembic commands ### diff --git a/migrations/versions/28bb2b27e2cf_add_mcoe_table.py b/migrations/versions/28bb2b27e2cf_add_mcoe_table.py deleted file mode 100644 index 5ebcbcf22e..0000000000 --- a/migrations/versions/28bb2b27e2cf_add_mcoe_table.py +++ /dev/null @@ -1,250 +0,0 @@ -"""add mcoe table - -Revision ID: 28bb2b27e2cf -Revises: e2670d0ec0eb -Create Date: 2023-08-11 10:12:15.950150 - -""" -from alembic import op -import sqlalchemy as sa - - -# revision identifiers, used by Alembic. -revision = '28bb2b27e2cf' -down_revision = 'e2670d0ec0eb' -branch_labels = None -depends_on = None - - -def upgrade() -> None: - # ### commands auto generated by Alembic - please adjust! ### - op.create_table('heat_rate_by_unit_monthly', - sa.Column('report_date', sa.Date(), nullable=False, comment='Date reported.'), - sa.Column('plant_id_eia', sa.Integer(), nullable=False, comment='The unique six-digit facility identification number, also called an ORISPL, assigned by the Energy Information Administration.'), - sa.Column('unit_id_pudl', sa.Integer(), nullable=False, comment='Dynamically assigned PUDL unit id. WARNING: This ID is not guaranteed to be static long term as the input data and algorithm may evolve over time.'), - sa.Column('net_generation_mwh', sa.Float(), nullable=True, comment='Net electricity generation for the specified period in megawatt-hours (MWh).'), - sa.Column('fuel_consumed_for_electricity_mmbtu', sa.Float(), nullable=True, comment='Total consumption of fuel to produce electricity, in physical unit, year to date.'), - sa.Column('heat_rate_mmbtu_mwh', sa.Float(), nullable=True, comment='Fuel content per unit of electricity generated. Coming from MCOE calculation.'), - sa.ForeignKeyConstraint(['plant_id_eia'], ['plants_entity_eia.plant_id_eia'], ), - sa.PrimaryKeyConstraint('report_date', 'plant_id_eia', 'unit_id_pudl') - ) - op.create_table('capacity_factor_by_generator_monthly', - sa.Column('report_date', sa.Date(), nullable=False, comment='Date reported.'), - sa.Column('plant_id_eia', sa.Integer(), nullable=False, comment='The unique six-digit facility identification number, also called an ORISPL, assigned by the Energy Information Administration.'), - sa.Column('generator_id', sa.Text(), nullable=False, comment='Generator ID is usually numeric, but sometimes includes letters. Make sure you treat it as a string!'), - sa.Column('net_generation_mwh', sa.Float(), nullable=True, comment='Net electricity generation for the specified period in megawatt-hours (MWh).'), - sa.Column('capacity_mw', sa.Float(), nullable=True, comment='Total installed (nameplate) capacity, in megawatts.'), - sa.Column('capacity_factor', sa.Float(), nullable=True, comment='Fraction of potential generation that was actually reported for a plant part.'), - sa.ForeignKeyConstraint(['plant_id_eia', 'generator_id'], ['generators_entity_eia.plant_id_eia', 'generators_entity_eia.generator_id'], ), - sa.PrimaryKeyConstraint('report_date', 'plant_id_eia', 'generator_id') - ) - op.create_table('fuel_cost_by_generator_monthly', - sa.Column('report_date', sa.Date(), nullable=False, comment='Date reported.'), - sa.Column('plant_id_eia', sa.Integer(), nullable=False, comment='The unique six-digit facility identification number, also called an ORISPL, assigned by the Energy Information Administration.'), - sa.Column('generator_id', sa.Text(), nullable=False, comment='Generator ID is usually numeric, but sometimes includes letters. Make sure you treat it as a string!'), - sa.Column('unit_id_pudl', sa.Integer(), nullable=True, comment='Dynamically assigned PUDL unit id. WARNING: This ID is not guaranteed to be static long term as the input data and algorithm may evolve over time.'), - sa.Column('plant_name_eia', sa.Text(), nullable=True, comment='Plant name.'), - sa.Column('plant_id_pudl', sa.Integer(), nullable=True, comment='A manually assigned PUDL plant ID. May not be constant over time.'), - sa.Column('utility_id_eia', sa.Integer(), nullable=True, comment='The EIA Utility Identification number.'), - sa.Column('utility_name_eia', sa.Text(), nullable=True, comment='The name of the utility.'), - sa.Column('utility_id_pudl', sa.Integer(), nullable=True, comment='A manually assigned PUDL utility ID. May not be stable over time.'), - sa.Column('fuel_type_count', sa.Integer(), nullable=True, comment='A count of how many different simple energy sources there are associated with a generator.'), - sa.Column('fuel_type_code_pudl', sa.Enum('coal', 'gas', 'hydro', 'nuclear', 'oil', 'other', 'solar', 'waste', 'wind'), nullable=True, comment='Simplified fuel type code used in PUDL'), - sa.Column('fuel_cost_from_eiaapi', sa.Boolean(), nullable=True, comment='Indicates whether the fuel cost was derived from the EIA API.'), - sa.Column('fuel_cost_per_mmbtu', sa.Float(), nullable=True, comment='Average fuel cost per mmBTU of heat content in nominal USD.'), - sa.Column('heat_rate_mmbtu_mwh', sa.Float(), nullable=True, comment='Fuel content per unit of electricity generated. Coming from MCOE calculation.'), - sa.Column('fuel_cost_per_mwh', sa.Float(), nullable=True, comment='Derived from MCOE, a unit level value. Average fuel cost per MWh of heat content in nominal USD.'), - sa.ForeignKeyConstraint(['plant_id_eia', 'generator_id'], ['generators_entity_eia.plant_id_eia', 'generators_entity_eia.generator_id'], ), - sa.ForeignKeyConstraint(['plant_id_pudl'], ['plants_pudl.plant_id_pudl'], ), - sa.ForeignKeyConstraint(['utility_id_eia'], ['utilities_entity_eia.utility_id_eia'], ), - sa.ForeignKeyConstraint(['utility_id_pudl'], ['utilities_pudl.utility_id_pudl'], ), - sa.PrimaryKeyConstraint('report_date', 'plant_id_eia', 'generator_id') - ) - op.create_table('heat_rate_by_generator_monthly', - sa.Column('report_date', sa.Date(), nullable=False, comment='Date reported.'), - sa.Column('plant_id_eia', sa.Integer(), nullable=False, comment='The unique six-digit facility identification number, also called an ORISPL, assigned by the Energy Information Administration.'), - sa.Column('unit_id_pudl', sa.Integer(), nullable=True, comment='Dynamically assigned PUDL unit id. WARNING: This ID is not guaranteed to be static long term as the input data and algorithm may evolve over time.'), - sa.Column('generator_id', sa.Text(), nullable=False, comment='Generator ID is usually numeric, but sometimes includes letters. Make sure you treat it as a string!'), - sa.Column('heat_rate_mmbtu_mwh', sa.Float(), nullable=True, comment='Fuel content per unit of electricity generated. Coming from MCOE calculation.'), - sa.Column('fuel_type_code_pudl', sa.Enum('coal', 'gas', 'hydro', 'nuclear', 'oil', 'other', 'solar', 'waste', 'wind'), nullable=True, comment='Simplified fuel type code used in PUDL'), - sa.Column('fuel_type_count', sa.Integer(), nullable=True, comment='A count of how many different simple energy sources there are associated with a generator.'), - sa.Column('prime_mover_code', sa.Text(), nullable=True, comment='Code for the type of prime mover (e.g. CT, CG)'), - sa.ForeignKeyConstraint(['plant_id_eia', 'generator_id'], ['generators_entity_eia.plant_id_eia', 'generators_entity_eia.generator_id'], ), - sa.ForeignKeyConstraint(['prime_mover_code'], ['prime_movers_eia.code'], ), - sa.PrimaryKeyConstraint('report_date', 'plant_id_eia', 'generator_id') - ) - op.create_table('mcoe_generators_monthly', - sa.Column('plant_id_eia', sa.Integer(), nullable=False, comment='The unique six-digit facility identification number, also called an ORISPL, assigned by the Energy Information Administration.'), - sa.Column('generator_id', sa.Text(), nullable=False, comment='Generator ID is usually numeric, but sometimes includes letters. Make sure you treat it as a string!'), - sa.Column('report_date', sa.Date(), nullable=False, comment='Date reported.'), - sa.Column('unit_id_pudl', sa.Integer(), nullable=True, comment='Dynamically assigned PUDL unit id. WARNING: This ID is not guaranteed to be static long term as the input data and algorithm may evolve over time.'), - sa.Column('plant_id_pudl', sa.Integer(), nullable=True, comment='A manually assigned PUDL plant ID. May not be constant over time.'), - sa.Column('plant_name_eia', sa.Text(), nullable=True, comment='Plant name.'), - sa.Column('utility_id_eia', sa.Integer(), nullable=True, comment='The EIA Utility Identification number.'), - sa.Column('utility_id_pudl', sa.Integer(), nullable=True, comment='A manually assigned PUDL utility ID. May not be stable over time.'), - sa.Column('utility_name_eia', sa.Text(), nullable=True, comment='The name of the utility.'), - sa.Column('technology_description', sa.Text(), nullable=True, comment='High level description of the technology used by the generator to produce electricity.'), - sa.Column('energy_source_code_1', sa.Text(), nullable=True, comment='The code representing the most predominant type of energy that fuels the generator.'), - sa.Column('prime_mover_code', sa.Text(), nullable=True, comment='Code for the type of prime mover (e.g. CT, CG)'), - sa.Column('generator_operating_date', sa.Date(), nullable=True, comment='Date the generator began commercial operation.'), - sa.Column('generator_retirement_date', sa.Date(), nullable=True, comment='Date of the scheduled or effected retirement of the generator.'), - sa.Column('operational_status', sa.Text(), nullable=True, comment='The operating status of the asset. For generators this is based on which tab the generator was listed in in EIA 860.'), - sa.Column('capacity_mw', sa.Float(), nullable=True, comment='Total installed (nameplate) capacity, in megawatts.'), - sa.Column('fuel_type_code_pudl', sa.Enum('coal', 'gas', 'hydro', 'nuclear', 'oil', 'other', 'solar', 'waste', 'wind'), nullable=True, comment='Simplified fuel type code used in PUDL'), - sa.Column('planned_generator_retirement_date', sa.Date(), nullable=True, comment='Planned effective date of the scheduled retirement of the generator.'), - sa.Column('capacity_factor', sa.Float(), nullable=True, comment='Fraction of potential generation that was actually reported for a plant part.'), - sa.Column('fuel_cost_from_eiaapi', sa.Boolean(), nullable=True, comment='Indicates whether the fuel cost was derived from the EIA API.'), - sa.Column('fuel_cost_per_mmbtu', sa.Float(), nullable=True, comment='Average fuel cost per mmBTU of heat content in nominal USD.'), - sa.Column('fuel_cost_per_mwh', sa.Float(), nullable=True, comment='Derived from MCOE, a unit level value. Average fuel cost per MWh of heat content in nominal USD.'), - sa.Column('heat_rate_mmbtu_mwh', sa.Float(), nullable=True, comment='Fuel content per unit of electricity generated. Coming from MCOE calculation.'), - sa.Column('net_generation_mwh', sa.Float(), nullable=True, comment='Net electricity generation for the specified period in megawatt-hours (MWh).'), - sa.Column('total_fuel_cost', sa.Float(), nullable=True, comment='Total annual reported fuel costs for the plant part. Includes costs from all fuels.'), - sa.Column('total_mmbtu', sa.Float(), nullable=True, comment='Total annual heat content of fuel consumed by a plant part record in the plant parts list.'), - sa.ForeignKeyConstraint(['energy_source_code_1'], ['energy_sources_eia.code'], ), - sa.ForeignKeyConstraint(['plant_id_eia', 'generator_id'], ['generators_entity_eia.plant_id_eia', 'generators_entity_eia.generator_id'], ), - sa.ForeignKeyConstraint(['plant_id_pudl'], ['plants_pudl.plant_id_pudl'], ), - sa.ForeignKeyConstraint(['prime_mover_code'], ['prime_movers_eia.code'], ), - sa.ForeignKeyConstraint(['utility_id_eia'], ['utilities_entity_eia.utility_id_eia'], ), - sa.ForeignKeyConstraint(['utility_id_pudl'], ['utilities_pudl.utility_id_pudl'], ), - sa.PrimaryKeyConstraint('report_date', 'plant_id_eia', 'generator_id') - ) - op.create_table('mcoe_monthly', - sa.Column('plant_id_eia', sa.Integer(), nullable=False, comment='The unique six-digit facility identification number, also called an ORISPL, assigned by the Energy Information Administration.'), - sa.Column('generator_id', sa.Text(), nullable=False, comment='Generator ID is usually numeric, but sometimes includes letters. Make sure you treat it as a string!'), - sa.Column('unit_id_pudl', sa.Integer(), nullable=True, comment='Dynamically assigned PUDL unit id. WARNING: This ID is not guaranteed to be static long term as the input data and algorithm may evolve over time.'), - sa.Column('report_date', sa.Date(), nullable=False, comment='Date reported.'), - sa.Column('capacity_factor', sa.Float(), nullable=True, comment='Fraction of potential generation that was actually reported for a plant part.'), - sa.Column('fuel_cost_from_eiaapi', sa.Boolean(), nullable=True, comment='Indicates whether the fuel cost was derived from the EIA API.'), - sa.Column('fuel_cost_per_mmbtu', sa.Float(), nullable=True, comment='Average fuel cost per mmBTU of heat content in nominal USD.'), - sa.Column('fuel_cost_per_mwh', sa.Float(), nullable=True, comment='Derived from MCOE, a unit level value. Average fuel cost per MWh of heat content in nominal USD.'), - sa.Column('heat_rate_mmbtu_mwh', sa.Float(), nullable=True, comment='Fuel content per unit of electricity generated. Coming from MCOE calculation.'), - sa.Column('net_generation_mwh', sa.Float(), nullable=True, comment='Net electricity generation for the specified period in megawatt-hours (MWh).'), - sa.Column('total_fuel_cost', sa.Float(), nullable=True, comment='Total annual reported fuel costs for the plant part. Includes costs from all fuels.'), - sa.Column('total_mmbtu', sa.Float(), nullable=True, comment='Total annual heat content of fuel consumed by a plant part record in the plant parts list.'), - sa.ForeignKeyConstraint(['plant_id_eia', 'generator_id'], ['generators_entity_eia.plant_id_eia', 'generators_entity_eia.generator_id'], ), - sa.PrimaryKeyConstraint('report_date', 'plant_id_eia', 'generator_id') - ) - op.create_table('heat_rate_by_unit_yearly', - sa.Column('report_date', sa.Date(), nullable=False, comment='Date reported.'), - sa.Column('plant_id_eia', sa.Integer(), nullable=False, comment='The unique six-digit facility identification number, also called an ORISPL, assigned by the Energy Information Administration.'), - sa.Column('unit_id_pudl', sa.Integer(), nullable=False, comment='Dynamically assigned PUDL unit id. WARNING: This ID is not guaranteed to be static long term as the input data and algorithm may evolve over time.'), - sa.Column('net_generation_mwh', sa.Float(), nullable=True, comment='Net electricity generation for the specified period in megawatt-hours (MWh).'), - sa.Column('fuel_consumed_for_electricity_mmbtu', sa.Float(), nullable=True, comment='Total consumption of fuel to produce electricity, in physical unit, year to date.'), - sa.Column('heat_rate_mmbtu_mwh', sa.Float(), nullable=True, comment='Fuel content per unit of electricity generated. Coming from MCOE calculation.'), - sa.ForeignKeyConstraint(['plant_id_eia', 'report_date'], ['plants_eia860.plant_id_eia', 'plants_eia860.report_date'], ), - sa.PrimaryKeyConstraint('report_date', 'plant_id_eia', 'unit_id_pudl') - ) - op.create_table('capacity_factor_by_generator_yearly', - sa.Column('report_date', sa.Date(), nullable=False, comment='Date reported.'), - sa.Column('plant_id_eia', sa.Integer(), nullable=False, comment='The unique six-digit facility identification number, also called an ORISPL, assigned by the Energy Information Administration.'), - sa.Column('generator_id', sa.Text(), nullable=False, comment='Generator ID is usually numeric, but sometimes includes letters. Make sure you treat it as a string!'), - sa.Column('net_generation_mwh', sa.Float(), nullable=True, comment='Net electricity generation for the specified period in megawatt-hours (MWh).'), - sa.Column('capacity_mw', sa.Float(), nullable=True, comment='Total installed (nameplate) capacity, in megawatts.'), - sa.Column('capacity_factor', sa.Float(), nullable=True, comment='Fraction of potential generation that was actually reported for a plant part.'), - sa.ForeignKeyConstraint(['plant_id_eia', 'generator_id', 'report_date'], ['generators_eia860.plant_id_eia', 'generators_eia860.generator_id', 'generators_eia860.report_date'], ), - sa.PrimaryKeyConstraint('report_date', 'plant_id_eia', 'generator_id') - ) - op.create_table('fuel_cost_by_generator_yearly', - sa.Column('report_date', sa.Date(), nullable=False, comment='Date reported.'), - sa.Column('plant_id_eia', sa.Integer(), nullable=False, comment='The unique six-digit facility identification number, also called an ORISPL, assigned by the Energy Information Administration.'), - sa.Column('generator_id', sa.Text(), nullable=False, comment='Generator ID is usually numeric, but sometimes includes letters. Make sure you treat it as a string!'), - sa.Column('unit_id_pudl', sa.Integer(), nullable=True, comment='Dynamically assigned PUDL unit id. WARNING: This ID is not guaranteed to be static long term as the input data and algorithm may evolve over time.'), - sa.Column('plant_name_eia', sa.Text(), nullable=True, comment='Plant name.'), - sa.Column('plant_id_pudl', sa.Integer(), nullable=True, comment='A manually assigned PUDL plant ID. May not be constant over time.'), - sa.Column('utility_id_eia', sa.Integer(), nullable=True, comment='The EIA Utility Identification number.'), - sa.Column('utility_name_eia', sa.Text(), nullable=True, comment='The name of the utility.'), - sa.Column('utility_id_pudl', sa.Integer(), nullable=True, comment='A manually assigned PUDL utility ID. May not be stable over time.'), - sa.Column('fuel_type_count', sa.Integer(), nullable=True, comment='A count of how many different simple energy sources there are associated with a generator.'), - sa.Column('fuel_type_code_pudl', sa.Enum('coal', 'gas', 'hydro', 'nuclear', 'oil', 'other', 'solar', 'waste', 'wind'), nullable=True, comment='Simplified fuel type code used in PUDL'), - sa.Column('fuel_cost_from_eiaapi', sa.Boolean(), nullable=True, comment='Indicates whether the fuel cost was derived from the EIA API.'), - sa.Column('fuel_cost_per_mmbtu', sa.Float(), nullable=True, comment='Average fuel cost per mmBTU of heat content in nominal USD.'), - sa.Column('heat_rate_mmbtu_mwh', sa.Float(), nullable=True, comment='Fuel content per unit of electricity generated. Coming from MCOE calculation.'), - sa.Column('fuel_cost_per_mwh', sa.Float(), nullable=True, comment='Derived from MCOE, a unit level value. Average fuel cost per MWh of heat content in nominal USD.'), - sa.ForeignKeyConstraint(['plant_id_eia', 'generator_id', 'report_date'], ['generators_eia860.plant_id_eia', 'generators_eia860.generator_id', 'generators_eia860.report_date'], ), - sa.ForeignKeyConstraint(['plant_id_pudl'], ['plants_pudl.plant_id_pudl'], ), - sa.ForeignKeyConstraint(['utility_id_eia', 'report_date'], ['utilities_eia860.utility_id_eia', 'utilities_eia860.report_date'], ), - sa.ForeignKeyConstraint(['utility_id_pudl'], ['utilities_pudl.utility_id_pudl'], ), - sa.PrimaryKeyConstraint('report_date', 'plant_id_eia', 'generator_id') - ) - op.create_table('heat_rate_by_generator_yearly', - sa.Column('report_date', sa.Date(), nullable=False, comment='Date reported.'), - sa.Column('plant_id_eia', sa.Integer(), nullable=False, comment='The unique six-digit facility identification number, also called an ORISPL, assigned by the Energy Information Administration.'), - sa.Column('unit_id_pudl', sa.Integer(), nullable=True, comment='Dynamically assigned PUDL unit id. WARNING: This ID is not guaranteed to be static long term as the input data and algorithm may evolve over time.'), - sa.Column('generator_id', sa.Text(), nullable=False, comment='Generator ID is usually numeric, but sometimes includes letters. Make sure you treat it as a string!'), - sa.Column('heat_rate_mmbtu_mwh', sa.Float(), nullable=True, comment='Fuel content per unit of electricity generated. Coming from MCOE calculation.'), - sa.Column('fuel_type_code_pudl', sa.Enum('coal', 'gas', 'hydro', 'nuclear', 'oil', 'other', 'solar', 'waste', 'wind'), nullable=True, comment='Simplified fuel type code used in PUDL'), - sa.Column('fuel_type_count', sa.Integer(), nullable=True, comment='A count of how many different simple energy sources there are associated with a generator.'), - sa.Column('prime_mover_code', sa.Text(), nullable=True, comment='Code for the type of prime mover (e.g. CT, CG)'), - sa.ForeignKeyConstraint(['plant_id_eia', 'generator_id', 'report_date'], ['generators_eia860.plant_id_eia', 'generators_eia860.generator_id', 'generators_eia860.report_date'], ), - sa.ForeignKeyConstraint(['prime_mover_code'], ['prime_movers_eia.code'], ), - sa.PrimaryKeyConstraint('report_date', 'plant_id_eia', 'generator_id') - ) - op.create_table('mcoe_generators_yearly', - sa.Column('plant_id_eia', sa.Integer(), nullable=False, comment='The unique six-digit facility identification number, also called an ORISPL, assigned by the Energy Information Administration.'), - sa.Column('generator_id', sa.Text(), nullable=False, comment='Generator ID is usually numeric, but sometimes includes letters. Make sure you treat it as a string!'), - sa.Column('report_date', sa.Date(), nullable=False, comment='Date reported.'), - sa.Column('unit_id_pudl', sa.Integer(), nullable=True, comment='Dynamically assigned PUDL unit id. WARNING: This ID is not guaranteed to be static long term as the input data and algorithm may evolve over time.'), - sa.Column('plant_id_pudl', sa.Integer(), nullable=True, comment='A manually assigned PUDL plant ID. May not be constant over time.'), - sa.Column('plant_name_eia', sa.Text(), nullable=True, comment='Plant name.'), - sa.Column('utility_id_eia', sa.Integer(), nullable=True, comment='The EIA Utility Identification number.'), - sa.Column('utility_id_pudl', sa.Integer(), nullable=True, comment='A manually assigned PUDL utility ID. May not be stable over time.'), - sa.Column('utility_name_eia', sa.Text(), nullable=True, comment='The name of the utility.'), - sa.Column('technology_description', sa.Text(), nullable=True, comment='High level description of the technology used by the generator to produce electricity.'), - sa.Column('energy_source_code_1', sa.Text(), nullable=True, comment='The code representing the most predominant type of energy that fuels the generator.'), - sa.Column('prime_mover_code', sa.Text(), nullable=True, comment='Code for the type of prime mover (e.g. CT, CG)'), - sa.Column('generator_operating_date', sa.Date(), nullable=True, comment='Date the generator began commercial operation.'), - sa.Column('generator_retirement_date', sa.Date(), nullable=True, comment='Date of the scheduled or effected retirement of the generator.'), - sa.Column('operational_status', sa.Text(), nullable=True, comment='The operating status of the asset. For generators this is based on which tab the generator was listed in in EIA 860.'), - sa.Column('capacity_mw', sa.Float(), nullable=True, comment='Total installed (nameplate) capacity, in megawatts.'), - sa.Column('fuel_type_code_pudl', sa.Enum('coal', 'gas', 'hydro', 'nuclear', 'oil', 'other', 'solar', 'waste', 'wind'), nullable=True, comment='Simplified fuel type code used in PUDL'), - sa.Column('planned_generator_retirement_date', sa.Date(), nullable=True, comment='Planned effective date of the scheduled retirement of the generator.'), - sa.Column('capacity_factor', sa.Float(), nullable=True, comment='Fraction of potential generation that was actually reported for a plant part.'), - sa.Column('fuel_cost_from_eiaapi', sa.Boolean(), nullable=True, comment='Indicates whether the fuel cost was derived from the EIA API.'), - sa.Column('fuel_cost_per_mmbtu', sa.Float(), nullable=True, comment='Average fuel cost per mmBTU of heat content in nominal USD.'), - sa.Column('fuel_cost_per_mwh', sa.Float(), nullable=True, comment='Derived from MCOE, a unit level value. Average fuel cost per MWh of heat content in nominal USD.'), - sa.Column('heat_rate_mmbtu_mwh', sa.Float(), nullable=True, comment='Fuel content per unit of electricity generated. Coming from MCOE calculation.'), - sa.Column('net_generation_mwh', sa.Float(), nullable=True, comment='Net electricity generation for the specified period in megawatt-hours (MWh).'), - sa.Column('total_fuel_cost', sa.Float(), nullable=True, comment='Total annual reported fuel costs for the plant part. Includes costs from all fuels.'), - sa.Column('total_mmbtu', sa.Float(), nullable=True, comment='Total annual heat content of fuel consumed by a plant part record in the plant parts list.'), - sa.ForeignKeyConstraint(['energy_source_code_1'], ['energy_sources_eia.code'], ), - sa.ForeignKeyConstraint(['plant_id_eia', 'generator_id', 'report_date'], ['generators_eia860.plant_id_eia', 'generators_eia860.generator_id', 'generators_eia860.report_date'], ), - sa.ForeignKeyConstraint(['plant_id_pudl'], ['plants_pudl.plant_id_pudl'], ), - sa.ForeignKeyConstraint(['prime_mover_code'], ['prime_movers_eia.code'], ), - sa.ForeignKeyConstraint(['utility_id_eia', 'report_date'], ['utilities_eia860.utility_id_eia', 'utilities_eia860.report_date'], ), - sa.ForeignKeyConstraint(['utility_id_pudl'], ['utilities_pudl.utility_id_pudl'], ), - sa.PrimaryKeyConstraint('report_date', 'plant_id_eia', 'generator_id') - ) - op.create_table('mcoe_yearly', - sa.Column('plant_id_eia', sa.Integer(), nullable=False, comment='The unique six-digit facility identification number, also called an ORISPL, assigned by the Energy Information Administration.'), - sa.Column('generator_id', sa.Text(), nullable=False, comment='Generator ID is usually numeric, but sometimes includes letters. Make sure you treat it as a string!'), - sa.Column('unit_id_pudl', sa.Integer(), nullable=True, comment='Dynamically assigned PUDL unit id. WARNING: This ID is not guaranteed to be static long term as the input data and algorithm may evolve over time.'), - sa.Column('report_date', sa.Date(), nullable=False, comment='Date reported.'), - sa.Column('capacity_factor', sa.Float(), nullable=True, comment='Fraction of potential generation that was actually reported for a plant part.'), - sa.Column('fuel_cost_from_eiaapi', sa.Boolean(), nullable=True, comment='Indicates whether the fuel cost was derived from the EIA API.'), - sa.Column('fuel_cost_per_mmbtu', sa.Float(), nullable=True, comment='Average fuel cost per mmBTU of heat content in nominal USD.'), - sa.Column('fuel_cost_per_mwh', sa.Float(), nullable=True, comment='Derived from MCOE, a unit level value. Average fuel cost per MWh of heat content in nominal USD.'), - sa.Column('heat_rate_mmbtu_mwh', sa.Float(), nullable=True, comment='Fuel content per unit of electricity generated. Coming from MCOE calculation.'), - sa.Column('net_generation_mwh', sa.Float(), nullable=True, comment='Net electricity generation for the specified period in megawatt-hours (MWh).'), - sa.Column('total_fuel_cost', sa.Float(), nullable=True, comment='Total annual reported fuel costs for the plant part. Includes costs from all fuels.'), - sa.Column('total_mmbtu', sa.Float(), nullable=True, comment='Total annual heat content of fuel consumed by a plant part record in the plant parts list.'), - sa.ForeignKeyConstraint(['plant_id_eia', 'generator_id', 'report_date'], ['generators_eia860.plant_id_eia', 'generators_eia860.generator_id', 'generators_eia860.report_date'], ), - sa.PrimaryKeyConstraint('report_date', 'plant_id_eia', 'generator_id') - ) - # ### end Alembic commands ### - - -def downgrade() -> None: - # ### commands auto generated by Alembic - please adjust! ### - op.drop_table('mcoe_yearly') - op.drop_table('mcoe_generators_yearly') - op.drop_table('heat_rate_by_generator_yearly') - op.drop_table('fuel_cost_by_generator_yearly') - op.drop_table('capacity_factor_by_generator_yearly') - op.drop_table('heat_rate_by_unit_yearly') - op.drop_table('mcoe_monthly') - op.drop_table('mcoe_generators_monthly') - op.drop_table('heat_rate_by_generator_monthly') - op.drop_table('fuel_cost_by_generator_monthly') - op.drop_table('capacity_factor_by_generator_monthly') - op.drop_table('heat_rate_by_unit_monthly') - # ### end Alembic commands ### diff --git a/migrations/versions/8c0a49eb9098_dagsterize_net_gen_allocation_revision.py b/migrations/versions/8c0a49eb9098_dagsterize_net_gen_allocation_revision.py deleted file mode 100644 index 0a11ee7605..0000000000 --- a/migrations/versions/8c0a49eb9098_dagsterize_net_gen_allocation_revision.py +++ /dev/null @@ -1,118 +0,0 @@ -"""dagsterize net gen allocation revision - -Revision ID: 8c0a49eb9098 -Revises: e608f95a3b78 -Create Date: 2023-06-23 11:00:51.387245 - -""" -from alembic import op -import sqlalchemy as sa - - -# revision identifiers, used by Alembic. -revision = '8c0a49eb9098' -down_revision = 'e608f95a3b78' -branch_labels = None -depends_on = None - - -def upgrade() -> None: - # ### commands auto generated by Alembic - please adjust! ### - op.create_table('generation_fuel_by_generator_energy_source_monthly_eia923', - sa.Column('report_date', sa.Date(), nullable=False, comment='Date reported.'), - sa.Column('plant_id_eia', sa.Integer(), nullable=False, comment='The unique six-digit facility identification number, also called an ORISPL, assigned by the Energy Information Administration.'), - sa.Column('generator_id', sa.Text(), nullable=False, comment='Generator ID is usually numeric, but sometimes includes letters. Make sure you treat it as a string!'), - sa.Column('prime_mover_code', sa.Text(), nullable=False, comment='Code for the type of prime mover (e.g. CT, CG)'), - sa.Column('energy_source_code', sa.Text(), nullable=False, comment='A 2-3 letter code indicating the energy source (e.g. fuel type) associated with the record.'), - sa.Column('energy_source_code_num', sa.Enum('energy_source_code_1', 'energy_source_code_2', 'energy_source_code_3', 'energy_source_code_4', 'energy_source_code_5', 'energy_source_code_6', 'energy_source_code_7', 'energy_source_code_8'), nullable=True, comment='Name of the energy_source_code_N column that this energy source code was reported in for the generator referenced in the same record.'), - sa.Column('net_generation_mwh', sa.Float(), nullable=True, comment='Net electricity generation for the specified period in megawatt-hours (MWh).'), - sa.Column('fuel_consumed_mmbtu', sa.Float(), nullable=True, comment='Total consumption of fuel in physical unit, year to date. Note: this is the total quantity consumed for both electricity and, in the case of combined heat and power plants, process steam production.'), - sa.Column('fuel_consumed_for_electricity_mmbtu', sa.Float(), nullable=True, comment='Total consumption of fuel to produce electricity, in physical unit, year to date.'), - sa.ForeignKeyConstraint(['energy_source_code'], ['energy_sources_eia.code'], ), - sa.ForeignKeyConstraint(['plant_id_eia', 'generator_id'], ['generators_entity_eia.plant_id_eia', 'generators_entity_eia.generator_id'], ), - sa.ForeignKeyConstraint(['prime_mover_code'], ['prime_movers_eia.code'], ), - sa.PrimaryKeyConstraint('report_date', 'plant_id_eia', 'generator_id', 'prime_mover_code', 'energy_source_code') - ) - op.create_table('generation_fuel_by_generator_monthly_eia923', - sa.Column('report_date', sa.Date(), nullable=False, comment='Date reported.'), - sa.Column('plant_id_eia', sa.Integer(), nullable=False, comment='The unique six-digit facility identification number, also called an ORISPL, assigned by the Energy Information Administration.'), - sa.Column('plant_id_pudl', sa.Integer(), nullable=True, comment='A manually assigned PUDL plant ID. May not be constant over time.'), - sa.Column('plant_name_eia', sa.Text(), nullable=True, comment='Plant name.'), - sa.Column('utility_id_eia', sa.Integer(), nullable=True, comment='The EIA Utility Identification number.'), - sa.Column('utility_id_pudl', sa.Integer(), nullable=True, comment='A manually assigned PUDL utility ID. May not be stable over time.'), - sa.Column('utility_name_eia', sa.Text(), nullable=True, comment='The name of the utility.'), - sa.Column('generator_id', sa.Text(), nullable=False, comment='Generator ID is usually numeric, but sometimes includes letters. Make sure you treat it as a string!'), - sa.Column('unit_id_pudl', sa.Integer(), nullable=True, comment='Dynamically assigned PUDL unit id. WARNING: This ID is not guaranteed to be static long term as the input data and algorithm may evolve over time.'), - sa.Column('fuel_consumed_for_electricity_mmbtu', sa.Float(), nullable=True, comment='Total consumption of fuel to produce electricity, in physical unit, year to date.'), - sa.Column('fuel_consumed_mmbtu', sa.Float(), nullable=True, comment='Total consumption of fuel in physical unit, year to date. Note: this is the total quantity consumed for both electricity and, in the case of combined heat and power plants, process steam production.'), - sa.Column('net_generation_mwh', sa.Float(), nullable=True, comment='Net electricity generation for the specified period in megawatt-hours (MWh).'), - sa.ForeignKeyConstraint(['plant_id_eia', 'generator_id'], ['generators_entity_eia.plant_id_eia', 'generators_entity_eia.generator_id'], ), - sa.ForeignKeyConstraint(['plant_id_pudl'], ['plants_pudl.plant_id_pudl'], ), - sa.ForeignKeyConstraint(['utility_id_eia'], ['utilities_entity_eia.utility_id_eia'], ), - sa.ForeignKeyConstraint(['utility_id_pudl'], ['utilities_pudl.utility_id_pudl'], ), - sa.PrimaryKeyConstraint('report_date', 'plant_id_eia', 'generator_id') - ) - op.create_table('generation_fuel_by_generator_energy_source_owner_yearly_eia923', - sa.Column('report_date', sa.Date(), nullable=False, comment='Date reported.'), - sa.Column('plant_id_eia', sa.Integer(), nullable=False, comment='The unique six-digit facility identification number, also called an ORISPL, assigned by the Energy Information Administration.'), - sa.Column('generator_id', sa.Text(), nullable=False, comment='Generator ID is usually numeric, but sometimes includes letters. Make sure you treat it as a string!'), - sa.Column('prime_mover_code', sa.Text(), nullable=False, comment='Code for the type of prime mover (e.g. CT, CG)'), - sa.Column('energy_source_code', sa.Text(), nullable=False, comment='A 2-3 letter code indicating the energy source (e.g. fuel type) associated with the record.'), - sa.Column('utility_id_eia', sa.Integer(), nullable=False, comment='The EIA Utility Identification number.'), - sa.Column('ownership_record_type', sa.Enum('owned', 'total'), nullable=False, comment='Whether each generator record is for one owner or represents a total of all ownerships.'), - sa.Column('fraction_owned', sa.Float(), nullable=True, comment='Proportion of generator ownership attributable to this utility.'), - sa.Column('capacity_mw', sa.Float(), nullable=True, comment='Total installed (nameplate) capacity, in megawatts.'), - sa.Column('energy_source_code_num', sa.Enum('energy_source_code_1', 'energy_source_code_2', 'energy_source_code_3', 'energy_source_code_4', 'energy_source_code_5', 'energy_source_code_6', 'energy_source_code_7', 'energy_source_code_8'), nullable=True, comment='Name of the energy_source_code_N column that this energy source code was reported in for the generator referenced in the same record.'), - sa.Column('net_generation_mwh', sa.Float(), nullable=True, comment='Net electricity generation for the specified period in megawatt-hours (MWh).'), - sa.Column('fuel_consumed_mmbtu', sa.Float(), nullable=True, comment='Total consumption of fuel in physical unit, year to date. Note: this is the total quantity consumed for both electricity and, in the case of combined heat and power plants, process steam production.'), - sa.Column('fuel_consumed_for_electricity_mmbtu', sa.Float(), nullable=True, comment='Total consumption of fuel to produce electricity, in physical unit, year to date.'), - sa.ForeignKeyConstraint(['energy_source_code'], ['energy_sources_eia.code'], ), - sa.ForeignKeyConstraint(['plant_id_eia', 'generator_id', 'report_date'], ['generators_eia860.plant_id_eia', 'generators_eia860.generator_id', 'generators_eia860.report_date'], ), - sa.ForeignKeyConstraint(['prime_mover_code'], ['prime_movers_eia.code'], ), - sa.PrimaryKeyConstraint('report_date', 'plant_id_eia', 'generator_id', 'prime_mover_code', 'energy_source_code', 'utility_id_eia', 'ownership_record_type') - ) - op.create_table('generation_fuel_by_generator_energy_source_yearly_eia923', - sa.Column('report_date', sa.Date(), nullable=False, comment='Date reported.'), - sa.Column('plant_id_eia', sa.Integer(), nullable=False, comment='The unique six-digit facility identification number, also called an ORISPL, assigned by the Energy Information Administration.'), - sa.Column('generator_id', sa.Text(), nullable=False, comment='Generator ID is usually numeric, but sometimes includes letters. Make sure you treat it as a string!'), - sa.Column('prime_mover_code', sa.Text(), nullable=False, comment='Code for the type of prime mover (e.g. CT, CG)'), - sa.Column('energy_source_code', sa.Text(), nullable=False, comment='A 2-3 letter code indicating the energy source (e.g. fuel type) associated with the record.'), - sa.Column('energy_source_code_num', sa.Enum('energy_source_code_1', 'energy_source_code_2', 'energy_source_code_3', 'energy_source_code_4', 'energy_source_code_5', 'energy_source_code_6', 'energy_source_code_7', 'energy_source_code_8'), nullable=True, comment='Name of the energy_source_code_N column that this energy source code was reported in for the generator referenced in the same record.'), - sa.Column('net_generation_mwh', sa.Float(), nullable=True, comment='Net electricity generation for the specified period in megawatt-hours (MWh).'), - sa.Column('fuel_consumed_mmbtu', sa.Float(), nullable=True, comment='Total consumption of fuel in physical unit, year to date. Note: this is the total quantity consumed for both electricity and, in the case of combined heat and power plants, process steam production.'), - sa.Column('fuel_consumed_for_electricity_mmbtu', sa.Float(), nullable=True, comment='Total consumption of fuel to produce electricity, in physical unit, year to date.'), - sa.ForeignKeyConstraint(['energy_source_code'], ['energy_sources_eia.code'], ), - sa.ForeignKeyConstraint(['plant_id_eia', 'generator_id', 'report_date'], ['generators_eia860.plant_id_eia', 'generators_eia860.generator_id', 'generators_eia860.report_date'], ), - sa.ForeignKeyConstraint(['prime_mover_code'], ['prime_movers_eia.code'], ), - sa.PrimaryKeyConstraint('report_date', 'plant_id_eia', 'generator_id', 'prime_mover_code', 'energy_source_code') - ) - op.create_table('generation_fuel_by_generator_yearly_eia923', - sa.Column('report_date', sa.Date(), nullable=False, comment='Date reported.'), - sa.Column('plant_id_eia', sa.Integer(), nullable=False, comment='The unique six-digit facility identification number, also called an ORISPL, assigned by the Energy Information Administration.'), - sa.Column('plant_id_pudl', sa.Integer(), nullable=True, comment='A manually assigned PUDL plant ID. May not be constant over time.'), - sa.Column('plant_name_eia', sa.Text(), nullable=True, comment='Plant name.'), - sa.Column('utility_id_eia', sa.Integer(), nullable=True, comment='The EIA Utility Identification number.'), - sa.Column('utility_id_pudl', sa.Integer(), nullable=True, comment='A manually assigned PUDL utility ID. May not be stable over time.'), - sa.Column('utility_name_eia', sa.Text(), nullable=True, comment='The name of the utility.'), - sa.Column('generator_id', sa.Text(), nullable=False, comment='Generator ID is usually numeric, but sometimes includes letters. Make sure you treat it as a string!'), - sa.Column('unit_id_pudl', sa.Integer(), nullable=True, comment='Dynamically assigned PUDL unit id. WARNING: This ID is not guaranteed to be static long term as the input data and algorithm may evolve over time.'), - sa.Column('fuel_consumed_for_electricity_mmbtu', sa.Float(), nullable=True, comment='Total consumption of fuel to produce electricity, in physical unit, year to date.'), - sa.Column('fuel_consumed_mmbtu', sa.Float(), nullable=True, comment='Total consumption of fuel in physical unit, year to date. Note: this is the total quantity consumed for both electricity and, in the case of combined heat and power plants, process steam production.'), - sa.Column('net_generation_mwh', sa.Float(), nullable=True, comment='Net electricity generation for the specified period in megawatt-hours (MWh).'), - sa.ForeignKeyConstraint(['plant_id_eia', 'generator_id', 'report_date'], ['generators_eia860.plant_id_eia', 'generators_eia860.generator_id', 'generators_eia860.report_date'], ), - sa.ForeignKeyConstraint(['plant_id_pudl'], ['plants_pudl.plant_id_pudl'], ), - sa.ForeignKeyConstraint(['utility_id_eia', 'report_date'], ['utilities_eia860.utility_id_eia', 'utilities_eia860.report_date'], ), - sa.ForeignKeyConstraint(['utility_id_pudl'], ['utilities_pudl.utility_id_pudl'], ), - sa.PrimaryKeyConstraint('report_date', 'plant_id_eia', 'generator_id') - ) - # ### end Alembic commands ### - - -def downgrade() -> None: - # ### commands auto generated by Alembic - please adjust! ### - op.drop_table('generation_fuel_by_generator_yearly_eia923') - op.drop_table('generation_fuel_by_generator_energy_source_yearly_eia923') - op.drop_table('generation_fuel_by_generator_energy_source_owner_yearly_eia923') - op.drop_table('generation_fuel_by_generator_monthly_eia923') - op.drop_table('generation_fuel_by_generator_energy_source_monthly_eia923') - # ### end Alembic commands ### diff --git a/migrations/versions/9a32db1fbe6e_rename_to_dollar_amount.py b/migrations/versions/9a32db1fbe6e_rename_to_dollar_amount.py deleted file mode 100644 index 3cd7c9ff86..0000000000 --- a/migrations/versions/9a32db1fbe6e_rename_to_dollar_amount.py +++ /dev/null @@ -1,42 +0,0 @@ -"""rename to dollar amount - -Revision ID: 9a32db1fbe6e -Revises: 88d9201ae4c4 -Create Date: 2023-06-13 15:11:44.266717 - -""" -from alembic import op -import sqlalchemy as sa - - -# revision identifiers, used by Alembic. -revision = '9a32db1fbe6e' -down_revision = '1ec25c296a6d' -branch_labels = None -depends_on = None - - -def upgrade() -> None: - # ### commands auto generated by Alembic - please adjust! ### - with op.batch_alter_table('denorm_electric_plant_depreciation_changes_ferc1', schema=None) as batch_op: - batch_op.add_column(sa.Column('dollar_value', sa.Float(), nullable=True, comment='Dollar value of reported income, expense, asset, or liability.')) - batch_op.drop_column('utility_plant_value') - - with op.batch_alter_table('electric_plant_depreciation_changes_ferc1', schema=None) as batch_op: - batch_op.add_column(sa.Column('dollar_value', sa.Float(), nullable=True, comment='Dollar value of reported income, expense, asset, or liability.')) - batch_op.drop_column('utility_plant_value') - - # ### end Alembic commands ### - - -def downgrade() -> None: - # ### commands auto generated by Alembic - please adjust! ### - with op.batch_alter_table('electric_plant_depreciation_changes_ferc1', schema=None) as batch_op: - batch_op.add_column(sa.Column('utility_plant_value', sa.FLOAT(), nullable=True)) - batch_op.drop_column('dollar_value') - - with op.batch_alter_table('denorm_electric_plant_depreciation_changes_ferc1', schema=None) as batch_op: - batch_op.add_column(sa.Column('utility_plant_value', sa.FLOAT(), nullable=True)) - batch_op.drop_column('dollar_value') - - # ### end Alembic commands ### diff --git a/migrations/versions/e2670d0ec0eb_.py b/migrations/versions/e2670d0ec0eb_.py deleted file mode 100644 index cbb81d1df5..0000000000 --- a/migrations/versions/e2670d0ec0eb_.py +++ /dev/null @@ -1,24 +0,0 @@ -"""empty message - -Revision ID: e2670d0ec0eb -Revises: 8c0a49eb9098, 9a32db1fbe6e -Create Date: 2023-07-10 10:58:58.580305 - -""" -from alembic import op -import sqlalchemy as sa - - -# revision identifiers, used by Alembic. -revision = 'e2670d0ec0eb' -down_revision = ('8c0a49eb9098', '9a32db1fbe6e') -branch_labels = None -depends_on = None - - -def upgrade() -> None: - pass - - -def downgrade() -> None: - pass diff --git a/migrations/versions/e608f95a3b78_drop_amount_from_retained_earnings.py b/migrations/versions/e608f95a3b78_drop_amount_from_retained_earnings.py deleted file mode 100644 index 1538ee596f..0000000000 --- a/migrations/versions/e608f95a3b78_drop_amount_from_retained_earnings.py +++ /dev/null @@ -1,38 +0,0 @@ -"""drop amount from retained earnings - -Revision ID: e608f95a3b78 -Revises: 3c458b36094e -Create Date: 2023-06-19 09:04:49.591285 - -""" -from alembic import op -import sqlalchemy as sa - - -# revision identifiers, used by Alembic. -revision = 'e608f95a3b78' -down_revision = '3c458b36094e' -branch_labels = None -depends_on = None - - -def upgrade() -> None: - # ### commands auto generated by Alembic - please adjust! ### - with op.batch_alter_table('denorm_retained_earnings_ferc1', schema=None) as batch_op: - batch_op.drop_column('amount') - - with op.batch_alter_table('retained_earnings_ferc1', schema=None) as batch_op: - batch_op.drop_column('amount') - - # ### end Alembic commands ### - - -def downgrade() -> None: - # ### commands auto generated by Alembic - please adjust! ### - with op.batch_alter_table('retained_earnings_ferc1', schema=None) as batch_op: - batch_op.add_column(sa.Column('amount', sa.FLOAT(), nullable=True)) - - with op.batch_alter_table('denorm_retained_earnings_ferc1', schema=None) as batch_op: - batch_op.add_column(sa.Column('amount', sa.FLOAT(), nullable=True)) - - # ### end Alembic commands ### diff --git a/notebooks/work-in-progress/CEMS_by_utility.ipynb b/notebooks/work-in-progress/CEMS_by_utility.ipynb index d3d4c26aa7..c8a085ac32 100644 --- a/notebooks/work-in-progress/CEMS_by_utility.ipynb +++ b/notebooks/work-in-progress/CEMS_by_utility.ipynb @@ -1,6 +1,7 @@ { "cells": [ { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -43,13 +44,12 @@ "metadata": {}, "outputs": [], "source": [ - "pudl_settings = pudl.workspace.setup.get_defaults()\n", - "#display(pudl_settings)\n", + "from pudl.workspace.setup import PudlPaths\n", "\n", - "ferc1_engine = sa.create_engine(pudl_settings['ferc1_db'])\n", - "#display(ferc1_engine)\n", "\n", - "pudl_engine = sa.create_engine(pudl_settings['pudl_db'])\n", + "ferc1_engine = sa.create_engine(PudlPaths().sqlite_db(\"ferc1\"))\n", + "\n", + "pudl_engine = sa.create_engine(PudlPaths().pudl_db())\n", "#display(pudl_engine)\n", "\n", "#pudl_engine.table_names()\n", @@ -57,6 +57,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -76,6 +77,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -160,6 +162,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -215,7 +218,7 @@ "\n", "# for yr in years:\n", "# print(f'starting calculation for {yr}')\n", - "# epacems_path = (pudl_settings['parquet_dir'] + f'/epacems/year={yr}')\n", + "# epacems_path = (PudlPaths().output_dir + f'/epacems/year={yr}')\n", "# cems_dd = (\n", "# dd.read_parquet(epacems_path, columns=my_cols)\n", "# .assign(state=lambda x: x['state'].astype('string'))\n", @@ -299,6 +302,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -331,6 +335,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ diff --git a/notebooks/work-in-progress/better-heatrates.ipynb b/notebooks/work-in-progress/better-heatrates.ipynb index bbce86000b..4547c5ba3d 100644 --- a/notebooks/work-in-progress/better-heatrates.ipynb +++ b/notebooks/work-in-progress/better-heatrates.ipynb @@ -1,6 +1,7 @@ { "cells": [ { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -71,6 +72,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -78,6 +80,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -116,6 +119,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -165,6 +169,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -213,6 +218,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -239,6 +245,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -246,6 +253,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -278,6 +286,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": { "tags": [] @@ -311,11 +320,12 @@ } ], "source": [ - "pudl_settings = pudl.workspace.setup.get_defaults()\n", - "display(pudl_settings)\n", "\n", - "ferc1_engine = sa.create_engine(pudl_settings['ferc1_db'])\n", - "pudl_engine = sa.create_engine(pudl_settings['pudl_db'])\n", + "from pudl.workspace.setup import PudlPaths\n", + "\n", + "# TODO(janrous): provide property for accessing ferc db?\n", + "ferc1_engine = sa.create_engine(PudlPaths().sqlite_db(\"ferc1\"))\n", + "pudl_engine = sa.create_engine(PudlPaths().pudl_db)\n", "\n", "API_KEY_EIA = os.environ[\"API_KEY_EIA\"]\n", "\n", @@ -323,6 +333,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -384,6 +395,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -421,6 +433,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -431,6 +444,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -442,6 +456,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -480,6 +495,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": { "tags": [] @@ -548,6 +564,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -569,6 +586,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -584,6 +602,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -597,6 +616,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -608,6 +628,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -618,6 +639,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -705,6 +727,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -713,6 +736,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -728,6 +752,7 @@ "source": [] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ diff --git a/notebooks/work-in-progress/eia_column_changes_through_time.ipynb b/notebooks/work-in-progress/eia_column_changes_through_time.ipynb index 3054a4089e..bed0633cef 100644 --- a/notebooks/work-in-progress/eia_column_changes_through_time.ipynb +++ b/notebooks/work-in-progress/eia_column_changes_through_time.ipynb @@ -1,16 +1,19 @@ { "cells": [ { + "attachments": {}, "cell_type": "markdown", + "metadata": {}, "source": [ "# EIA923 Column Changes\n", "This notebook reimplements the excel extractor process to extract each sheet of each excel file separately. This preserves the original structure for easier comparison." - ], - "metadata": {} + ] }, { "cell_type": "code", "execution_count": 1, + "metadata": {}, + "outputs": [], "source": [ "%load_ext autoreload\n", "%autoreload 2\n", @@ -20,74 +23,67 @@ "import pandas as pd\n", "pd.options.display.max_columns = 150\n", "pd.options.display.max_rows = 150" - ], - "outputs": [], - "metadata": {} + ] }, { "cell_type": "code", "execution_count": 2, - "source": [ - "# make notebooks full width\n", - "from IPython.core.display import display, HTML\n", - "display(HTML(\"\"))" - ], + "metadata": {}, "outputs": [ { - "output_type": "display_data", "data": { - "text/plain": [ - "" - ], "text/html": [ "" + ], + "text/plain": [ + "" ] }, - "metadata": {} + "metadata": {}, + "output_type": "display_data" } ], - "metadata": {} - }, - { - "cell_type": "code", - "execution_count": 3, "source": [ - "pudl_settings = pudl.workspace.setup.get_defaults()" - ], - "outputs": [], - "metadata": {} + "# make notebooks full width\n", + "from IPython.core.display import display, HTML\n", + "display(HTML(\"\"))" + ] }, { "cell_type": "code", "execution_count": 4, + "metadata": {}, + "outputs": [], "source": [ "eia923_tables = pc.PUDL_TABLES['eia923']\n", "eia923_years = list(range(2001, 2020))" - ], - "outputs": [], - "metadata": {} + ] }, { "cell_type": "code", "execution_count": 5, - "source": [ - "ds = pudl.workspace.datastore.Datastore(local_cache_path=Path(pudl_settings[\"data_dir\"]))" - ], + "metadata": {}, "outputs": [], - "metadata": {} + "source": [ + "from pudl.workspace.setup import PudlPaths\n", + "\n", + "ds = pudl.workspace.datastore.Datastore(local_cache_path=PudlPaths().data_dir)" + ] }, { "cell_type": "code", "execution_count": 6, + "metadata": {}, + "outputs": [], "source": [ "eia923_extractor = pudl.extract.eia923.Extractor(ds)" - ], - "outputs": [], - "metadata": {} + ] }, { "cell_type": "code", "execution_count": 7, + "metadata": {}, + "outputs": [], "source": [ "dfs = {}\n", "# Lightly altered extractor code (pudl.extract.excel.GenericExtractor.extract) to avoid concatenating prematurely\n", @@ -110,21 +106,16 @@ " newdata = eia923_extractor.process_raw(newdata, page, **partition)\n", " newdata = eia923_extractor.process_renamed(newdata, page, **partition)\n", " dfs[partition['year']][page] = newdata" - ], - "outputs": [], - "metadata": {} + ] }, { "cell_type": "code", "execution_count": 8, - "source": [ - "for k, v in dfs.items():\n", - " print(k, v.keys())" - ], + "metadata": {}, "outputs": [ { - "output_type": "stream", "name": "stdout", + "output_type": "stream", "text": [ "2001 dict_keys(['generation_fuel', 'stocks'])\n", "2002 dict_keys(['generation_fuel', 'stocks'])\n", @@ -148,11 +139,16 @@ ] } ], - "metadata": {} + "source": [ + "for k, v in dfs.items():\n", + " print(k, v.keys())" + ] }, { "cell_type": "code", "execution_count": 23, + "metadata": {}, + "outputs": [], "source": [ "# make dataframes of columns. One df per excel sheet, one row per year\n", "from collections import defaultdict\n", @@ -163,70 +159,32 @@ " col_dfs[page].append(dfs[year][page].columns.to_frame().rename(columns={0: year}))\n", " except KeyError:\n", " continue" - ], - "outputs": [], - "metadata": {} + ] }, { "cell_type": "code", "execution_count": 24, + "metadata": {}, + "outputs": [], "source": [ "col_dfs = {k : pd.concat(v, axis=1).T for k, v in col_dfs.items()}" - ], - "outputs": [], - "metadata": {} + ] }, { + "attachments": {}, "cell_type": "markdown", + "metadata": {}, "source": [ "This shows the state of the columns for each year for each sheet. When a column is introduced (or disappears), that entry will be NaN. This particular page only has data from 2011 on." - ], - "metadata": {} + ] }, { "cell_type": "code", "execution_count": 35, - "source": [ - "col_dfs['plant_frame']" - ], + "metadata": {}, "outputs": [ { - "output_type": "execute_result", "data": { - "text/plain": [ - " report_year plant_id_eia plant_name_eia plant_state eia_sector \\\n", - "2011 report_year plant_id_eia plant_name_eia plant_state eia_sector \n", - "2012 report_year plant_id_eia plant_name_eia plant_state eia_sector \n", - "2013 report_year plant_id_eia plant_name_eia plant_state eia_sector \n", - "2014 report_year plant_id_eia plant_name_eia plant_state eia_sector \n", - "2015 report_year plant_id_eia plant_name_eia plant_state eia_sector \n", - "2016 report_year plant_id_eia plant_name_eia plant_state eia_sector \n", - "2017 report_year plant_id_eia plant_name_eia plant_state eia_sector \n", - "2018 report_year plant_id_eia plant_name_eia plant_state eia_sector \n", - "2019 report_year plant_id_eia plant_name_eia plant_state eia_sector \n", - "\n", - " sector_name naics_code combined_heat_power reporting_frequency \\\n", - "2011 sector_name naics_code combined_heat_power reporting_frequency \n", - "2012 NaN naics_code combined_heat_power reporting_frequency \n", - "2013 sector_name naics_code combined_heat_power reporting_frequency \n", - "2014 NaN naics_code combined_heat_power reporting_frequency \n", - "2015 NaN naics_code combined_heat_power reporting_frequency \n", - "2016 NaN naics_code combined_heat_power reporting_frequency \n", - "2017 NaN naics_code combined_heat_power reporting_frequency \n", - "2018 NaN naics_code combined_heat_power reporting_frequency \n", - "2019 NaN naics_code combined_heat_power reporting_frequency \n", - "\n", - " nameplate_capacity_mw \n", - "2011 nameplate_capacity_mw \n", - "2012 NaN \n", - "2013 NaN \n", - "2014 NaN \n", - "2015 NaN \n", - "2016 NaN \n", - "2017 NaN \n", - "2018 NaN \n", - "2019 NaN " - ], "text/html": [ "
\n", "\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
statereport_datebulk_agg_fuel_cost_per_mmbtufuel_type_code_pudl
0AK2012-01-010.0coal
1AK2012-02-010.0coal
2AK2012-03-010.0coal
3AK2012-04-010.0coal
4AK2012-05-010.0coal
\n","
"],"text/plain":[" state report_date bulk_agg_fuel_cost_per_mmbtu fuel_type_code_pudl\n","0 AK 2012-01-01 0.0 coal\n","1 AK 2012-02-01 0.0 coal\n","2 AK 2012-03-01 0.0 coal\n","3 AK 2012-04-01 0.0 coal\n","4 AK 2012-05-01 0.0 coal"]},"execution_count":12,"metadata":{},"output_type":"execute_result"}],"source":["bulk_df.head()"]},{"cell_type":"code","execution_count":13,"metadata":{},"outputs":[{"data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
report_datefuel_cost_per_unitstatefuel_type_code_pudl
02022-07-0151.83AKcoal
12022-06-0159.16AKcoal
22022-05-0153.04AKcoal
32022-04-0151.69AKcoal
42022-03-0158.55AKcoal
\n","
"],"text/plain":[" report_date fuel_cost_per_unit state fuel_type_code_pudl\n","0 2022-07-01 51.83 AK coal\n","1 2022-06-01 59.16 AK coal\n","2 2022-05-01 53.04 AK coal\n","3 2022-04-01 51.69 AK coal\n","4 2022-03-01 58.55 AK coal"]},"execution_count":13,"metadata":{},"output_type":"execute_result"}],"source":["api_df.drop(columns=['name', 'series_id', 'units']).head()"]},{"cell_type":"code","execution_count":14,"metadata":{},"outputs":[{"data":{"text/plain":["((18501, 4), (30804, 7))"]},"execution_count":14,"metadata":{},"output_type":"execute_result"}],"source":["bulk_df.shape, api_df.shape"]},{"cell_type":"code","execution_count":15,"metadata":{},"outputs":[{"name":"stdout","output_type":"stream","text":["\n","RangeIndex: 18501 entries, 0 to 18500\n","Data columns (total 4 columns):\n"," # Column Non-Null Count Dtype \n","--- ------ -------------- ----- \n"," 0 state 18501 non-null object \n"," 1 report_date 18501 non-null datetime64[ns]\n"," 2 bulk_agg_fuel_cost_per_mmbtu 18501 non-null float64 \n"," 3 fuel_type_code_pudl 18501 non-null object \n","dtypes: datetime64[ns](1), float64(1), object(2)\n","memory usage: 578.3+ KB\n"]}],"source":["bulk_df.info()"]},{"cell_type":"code","execution_count":16,"metadata":{},"outputs":[{"name":"stdout","output_type":"stream","text":["\n","Int64Index: 30804 entries, 0 to 10499\n","Data columns (total 7 columns):\n"," # Column Non-Null Count Dtype \n","--- ------ -------------- ----- \n"," 0 report_date 30804 non-null datetime64[ns]\n"," 1 fuel_cost_per_unit 18615 non-null float64 \n"," 2 state 30804 non-null object \n"," 3 units 30804 non-null object \n"," 4 series_id 30804 non-null object \n"," 5 name 30804 non-null object \n"," 6 fuel_type_code_pudl 30804 non-null object \n","dtypes: datetime64[ns](1), float64(1), object(5)\n","memory usage: 1.9+ MB\n"]}],"source":["api_df.info()"]},{"cell_type":"code","execution_count":19,"metadata":{},"outputs":[],"source":["keep_cols = ['state', 'report_date',\n","'fuel_cost_per_mmbtu', 'fuel_type_code_pudl', 'fuel_cost_from_eiaapi']"]},{"cell_type":"code","execution_count":20,"metadata":{},"outputs":[],"source":["frc_api = eia.fuel_receipts_costs_eia923(\n"," pudl_engine=pudl_engine,\n"," fill=True\n",")[keep_cols + ['fuel_cost_per_unit']]"]},{"cell_type":"code","execution_count":6,"metadata":{},"outputs":[],"source":["frc_bulk = eia.fuel_receipts_costs_eia923(\n"," pudl_engine=pudl_engine,\n"," fill=True\n",")[keep_cols + ['bulk_agg_fuel_cost_per_mmbtu']]"]},{"cell_type":"code","execution_count":21,"metadata":{},"outputs":[{"data":{"text/plain":["((560374, 6), (560374, 6))"]},"execution_count":21,"metadata":{},"output_type":"execute_result"}],"source":["frc_api.shape, frc_bulk.shape"]},{"cell_type":"code","execution_count":26,"metadata":{},"outputs":[],"source":["mismatch = frc_api['fuel_cost_from_eiaapi'] ^ frc_bulk['fuel_cost_from_eiaapi']"]},{"cell_type":"code","execution_count":27,"metadata":{},"outputs":[{"data":{"text/plain":["sum 0.0\n","mean 0.0\n","Name: fuel_cost_from_eiaapi, dtype: float64"]},"execution_count":27,"metadata":{},"output_type":"execute_result"}],"source":["mismatch.agg(['sum', 'mean'])"]},{"cell_type":"code","execution_count":21,"metadata":{},"outputs":[{"data":{"text/plain":["136530"]},"execution_count":21,"metadata":{},"output_type":"execute_result"}],"source":["frc_api['fuel_cost_from_eiaapi'].sum()"]},{"cell_type":"code","execution_count":28,"metadata":{},"outputs":[{"data":{"text/plain":["136530"]},"execution_count":28,"metadata":{},"output_type":"execute_result"}],"source":["frc_bulk['fuel_cost_from_eiaapi'].sum()"]},{"cell_type":"code","execution_count":30,"metadata":{},"outputs":[],"source":["diff = frc_api['fuel_cost_per_mmbtu'].sub(frc_bulk['fuel_cost_per_mmbtu'])"]},{"cell_type":"code","execution_count":32,"metadata":{},"outputs":[{"data":{"text/plain":["sum 186016.00000\n","mean 0.33195\n","Name: fuel_cost_per_mmbtu, dtype: float64"]},"execution_count":32,"metadata":{},"output_type":"execute_result"}],"source":["diff.ne(0).agg(['sum', 'mean'])"]},{"cell_type":"code","execution_count":35,"metadata":{},"outputs":[{"data":{"text/plain":["sum 133687.000000\n","mean 0.238567\n","Name: fuel_cost_per_mmbtu, dtype: float64"]},"execution_count":35,"metadata":{},"output_type":"execute_result"}],"source":["diff.abs().gt(1e-3).agg(['sum', 'mean'])"]},{"cell_type":"code","execution_count":34,"metadata":{},"outputs":[{"data":{"text/plain":["sum 136530.000000\n","mean 0.243641\n","Name: fuel_cost_from_eiaapi, dtype: float64"]},"execution_count":34,"metadata":{},"output_type":"execute_result"}],"source":["frc_bulk['fuel_cost_from_eiaapi'].agg(['sum', 'mean'])"]},{"cell_type":"code","execution_count":37,"metadata":{},"outputs":[{"data":{"text/plain":[""]},"execution_count":37,"metadata":{},"output_type":"execute_result"},{"data":{"image/png":"iVBORw0KGgoAAAANSUhEUgAAAX0AAAD4CAYAAAAAczaOAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjUuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8qNh9FAAAACXBIWXMAAAsTAAALEwEAmpwYAAAVcElEQVR4nO3db4xc53me8euObCusY9ZSZW1pkihllHIjibEdbVgVRouNlURMZJhCURU0VItqVbAQFFcGWMRk8qHoBwJEW6e12lgtEbuiUDUqkVgVEVtJZDbTIoD+mHLlMBStirUYhSEjxm5Ta21A0apPP8yRONwdzs5Sy5kdnusHDObMc86Zeefl8J533jlzNlWFJKkdfmjcDZAkjY6hL0ktYuhLUosY+pLUIoa+JLXIO8bdgMVcddVVtWHDhnNq3//+93n3u989ngZNAPtnMPtnMPtnsEnpn2efffY7VfW++fUVH/obNmzg8OHD59Q6nQ4zMzPjadAEsH8Gs38Gs38Gm5T+SfKH/epO70hSixj6ktQihr4ktciioZ/kg0me67l8L8lnklyZ5IkkLzbXV/TsszvJ8SQvJLmlp35jkiPNuvuT5GI9MUnSQouGflW9UFUfrqoPAzcCPwAeBXYBh6pqI3CouU2S64BtwPXAFuALSS5r7u4BYAewsblsWdZnI0kaaKnTOzcD/6uq/hDYCuxv6vuB25rlrcAjVfVaVb0EHAc2J1kDrK6qJ6t7lreHevaRJI3AUg/Z3Ab8WrM8VVWnAarqdJKrm/pa4KmefU42tdeb5fn1BZLsoPuJgKmpKTqdzjnrZ2dnF9R0lv0zmP0zmP0z2KT3z9Chn+RdwCeA3Ytt2qdWA+oLi1X7gH0A09PTNf+Y2Ek5TnZc7J/B7J/B7J/BJr1/ljK987PAN6rqleb2K82UDc31maZ+Eljfs9864FRTX9enLkkakaWE/ic5O7UDcBDY3ixvBx7rqW9LcnmSa+h+YftMMxX0apKbmqN27uzZR5poG3Z95a2LtJINNb2T5C8APw38o57yXuBAkruBl4HbAarqaJIDwPPAHHBvVb3R7HMP8CCwCni8uUiSRmSo0K+qHwB/aV7tu3SP5um3/R5gT5/6YeCGpTdTkrQc/EWuJLXIij/LprRSOX+vSeRIX5JaxJG+tMx6PwGc2HvrGFsiLeRIX5JaxNCXpBYx9CWpRQx9SWoRQ1+SWsTQl6QWMfQlqUUMfUlqEUNfklrE0JekFjH0JalFDH1JahFDX5JaxNCXpBYx9CWpRQx9SWoRQ1+SWmSo0E/y3iS/nuRbSY4l+RtJrkzyRJIXm+srerbfneR4kheS3NJTvzHJkWbd/UlyMZ6UJKm/YUf6nwd+q6r+GvAh4BiwCzhUVRuBQ81tklwHbAOuB7YAX0hyWXM/DwA7gI3NZcsyPQ9J0hAWDf0kq4G/BXwRoKr+vKr+DNgK7G822w/c1ixvBR6pqteq6iXgOLA5yRpgdVU9WVUFPNSzjyRpBIb5w+gfAP4U+A9JPgQ8C9wHTFXVaYCqOp3k6mb7tcBTPfufbGqvN8vz6wsk2UH3EwFTU1N0Op1z1s/Ozi6o6Sz7Z7Dl6p+dm+YW3WYS/x18/Qw26f0zTOi/A/hx4NNV9XSSz9NM5ZxHv3n6GlBfWKzaB+wDmJ6erpmZmXPWdzod5td0lv0z2HL1z127vrLoNifuePuPM2q+fgab9P4ZZk7/JHCyqp5ubv863TeBV5opG5rrMz3br+/Zfx1wqqmv61OXJI3IoqFfVX8C/FGSDzalm4HngYPA9qa2HXisWT4IbEtyeZJr6H5h+0wzFfRqkpuao3bu7NlHuiRt2PWVty7SSjDM9A7Ap4GHk7wL+Dbw9+m+YRxIcjfwMnA7QFUdTXKA7hvDHHBvVb3R3M89wIPAKuDx5iJJGpGhQr+qngOm+6y6+Tzb7wH29KkfBm5YQvukS0bvaP/E3lvH2BK1mb/IlaQWMfQlqUUMfUlqEUNfklrE0JekFhn2kE1J4PH2mniO9CWpRQx9SWoRQ1+SWsTQl6QWMfQlqUUMfUlqEUNfklrE4/SlMfCMmxoXR/qS1CKO9KVF+CtcXUoc6UtSixj6ktQihr4ktYihL0ktYuhLUosMFfpJTiQ5kuS5JIeb2pVJnkjyYnN9Rc/2u5McT/JCklt66jc293M8yf1JsvxPSZJ0Pks5ZPMnq+o7Pbd3AYeqam+SXc3tzya5DtgGXA+8H/hakmur6g3gAWAH8BTwVWAL8PgyPA9pWXmYpi5Vb2d6Zyuwv1neD9zWU3+kql6rqpeA48DmJGuA1VX1ZFUV8FDPPpKkERh2pF/A7yQp4N9X1T5gqqpOA1TV6SRXN9uupTuSf9PJpvZ6szy/vkCSHXQ/ETA1NUWn0zln/ezs7IKazrJ/Bhumf3ZumhtNY2DF/Vv5+hls0vtn2ND/aFWdaoL9iSTfGrBtv3n6GlBfWOy+qewDmJ6erpmZmXPWdzod5td0lv0z2DD9c9cIp3dO3DEzsscahq+fwSa9f4aa3qmqU831GeBRYDPwSjNlQ3N9ptn8JLC+Z/d1wKmmvq5PXZI0IouGfpJ3J3nPm8vAzwB/ABwEtjebbQcea5YPAtuSXJ7kGmAj8EwzFfRqkpuao3bu7NlHkjQCw0zvTAGPNkdXvgP4T1X1W0m+DhxIcjfwMnA7QFUdTXIAeB6YA+5tjtwBuAd4EFhF96gdj9yRpBFaNPSr6tvAh/rUvwvcfJ599gB7+tQPAzcsvZnSxedhmmoDf5ErSS3i+fSlMfOvaGmUHOlLUosY+pLUIoa+JLWIoS9JLeIXudIK4pe6utgMfekS4puGFuP0jiS1iKEvSS1i6EtSixj6ktQihr4ktYihL0ktYuhLUosY+pLUIoa+JLWIv8iVJpC/vNWFMvTVaiv5TyQa7LoYDH1pwq3kNy6tPM7pS1KLDB36SS5L8j+S/GZz+8okTyR5sbm+omfb3UmOJ3khyS099RuTHGnW3Z8ky/t0JEmDLGV65z7gGLC6ub0LOFRVe5Psam5/Nsl1wDbgeuD9wNeSXFtVbwAPADuAp4CvAluAx5flmUiXMKdwtFyGGuknWQfcCvxqT3krsL9Z3g/c1lN/pKpeq6qXgOPA5iRrgNVV9WRVFfBQzz6SpBEYdqT/r4FfAN7TU5uqqtMAVXU6ydVNfS3dkfybTja115vl+fUFkuyg+4mAqakpOp3OOetnZ2cX1HSW/TNYb//s3DQ33sZcRBf6GvD1M9ik98+ioZ/k48CZqno2ycwQ99lvnr4G1BcWq/YB+wCmp6drZubch+10Osyv6Sz7Z7De/rnrEp42OXHHzAXt5+tnsEnvn2FG+h8FPpHk54AfBlYn+Y/AK0nWNKP8NcCZZvuTwPqe/dcBp5r6uj51SdKILDqnX1W7q2pdVW2g+wXtf62qvwccBLY3m20HHmuWDwLbklye5BpgI/BMMxX0apKbmqN27uzZR5I0Am/nx1l7gQNJ7gZeBm4HqKqjSQ4AzwNzwL3NkTsA9wAPAqvoHrXjkTuSNEJLCv2q6gCdZvm7wM3n2W4PsKdP/TBww1IbKUlaHv4iV5JaxNCXpBYx9CWpRQx9SWoRQ1+SWsTQl6QWMfQlqUUMfUlqEf9colrnyB//30v6RGvSIIa+dInyD6urH6d3JKlFDH1JahFDX5JaxNCXpBYx9CWpRQx9SWoRQ1+SWsTQl6QWMfQlqUUMfUlqEUNfklpk0dBP8sNJnknyzSRHk/yzpn5lkieSvNhcX9Gzz+4kx5O8kOSWnvqNSY406+5PkovztCRJ/QxzwrXXgI9V1WySdwK/l+Rx4G8Dh6pqb5JdwC7gs0muA7YB1wPvB76W5NqqegN4ANgBPAV8FdgCPL7sz0qap/fkYzs3jbEh0pgtOtKvrtnm5jubSwFbgf1NfT9wW7O8FXikql6rqpeA48DmJGuA1VX1ZFUV8FDPPpKkERjq1MpJLgOeBf4q8CtV9XSSqao6DVBVp5Nc3Wy+lu5I/k0nm9rrzfL8er/H20H3EwFTU1N0Op1z1s/Ozi6o6Sz7Z6Gdm+beWp5ade7tNljK68HXz2CT3j9DhX4zNfPhJO8FHk1yw4DN+83T14B6v8fbB+wDmJ6erpmZmXPWdzod5td0lv2z0F3nTO/M8bkj7fpTEifumBl6W18/g016/yzp6J2q+jOgQ3cu/pVmyobm+kyz2Ulgfc9u64BTTX1dn7okaUSGOXrnfc0InySrgJ8CvgUcBLY3m20HHmuWDwLbklye5BpgI/BMMxX0apKbmqN27uzZR5I0AsN8xl0D7G/m9X8IOFBVv5nkSeBAkruBl4HbAarqaJIDwPPAHHBvMz0EcA/wILCK7lE7HrkjSSO0aOhX1e8DH+lT/y5w83n22QPs6VM/DAz6PkCSdBH5i1xJahFDX5JapF3HralVen+FK6nLkb4ktYihL0ktYuhLUosY+pLUIoa+JLWIoS9JLWLoS1KLGPqS1CKGviS1iL/I1SXFX+FKgznSl6QWcaQvtUDvJ6ATe28dY0s0bo70JalFHOlLLeOov90c6UtSixj6ktQihr4ktYihL0ktsmjoJ1mf5HeTHEtyNMl9Tf3KJE8kebG5vqJnn91Jjid5IcktPfUbkxxp1t2fJBfnaUmS+hlmpD8H7KyqHwVuAu5Nch2wCzhUVRuBQ81tmnXbgOuBLcAXklzW3NcDwA5gY3PZsozPRZK0iEVDv6pOV9U3muVXgWPAWmArsL/ZbD9wW7O8FXikql6rqpeA48DmJGuA1VX1ZFUV8FDPPpKkEVjScfpJNgAfAZ4GpqrqNHTfGJJc3Wy2FniqZ7eTTe31Znl+vd/j7KD7iYCpqSk6nc4562dnZxfUdFab+2fnprlFt5laNdx2bdDvddLm188wJr1/hg79JD8C/Abwmar63oDp+H4rakB9YbFqH7APYHp6umZmZs5Z3+l0mF/TWW3un7uGOOHazk1zfO6Iv0sEOHHHzIJam18/w5j0/hnq6J0k76Qb+A9X1Zeb8ivNlA3N9ZmmfhJY37P7OuBUU1/Xpy5JGpFhjt4J8EXgWFX9cs+qg8D2Znk78FhPfVuSy5NcQ/cL22eaqaBXk9zU3OedPftIkkZgmM+4HwU+BRxJ8lxT+0VgL3Agyd3Ay8DtAFV1NMkB4Hm6R/7cW1VvNPvdAzwIrAIeby7S2+I59KXhLRr6VfV79J+PB7j5PPvsAfb0qR8GblhKAyVJy8df5EpSixj6ktQiHremieQ8vnRhHOlLUosY+pLUIoa+JLWIc/pSi/n3ctvHkb4ktYgjfUkD+Wng0mLoS1rAQ2IvXYa+JoZBJL19hr6koTnVM/n8IleSWsTQl6QWMfQlqUUMfUlqEb/I1YrmETuj82Zf79w0h9Fw6XKkL0ktYuhLUosY+pLUIoa+JLXIot/WJPkS8HHgTFXd0NSuBP4zsAE4Afzdqvo/zbrdwN3AG8A/rqrfbuo3Ag8Cq4CvAvdVVS3v09Gk84tb6eIaZqT/ILBlXm0XcKiqNgKHmtskuQ7YBlzf7POFJJc1+zwA7AA2Npf59ylJusgWDf2q+u/A/55X3grsb5b3A7f11B+pqteq6iXgOLA5yRpgdVU92YzuH+rZR5I0Ihd6MO5UVZ0GqKrTSa5u6muBp3q2O9nUXm+W59f7SrKD7qcCpqam6HQ656yfnZ1dUNNZk9w/3WPEL66pVaN5nEk1bP/8m4cfe2t509q/eDGbtKJM8v8vWP5fYKRPrQbU+6qqfcA+gOnp6ZqZmTlnfafTYX5NZ01y/9w1gjn9nZvm+NwRf3x0PhfSPyfumLk4jVmBJvn/F1x46L+SZE0zyl8DnGnqJ4H1PdutA0419XV96pJf3kojdKGHbB4EtjfL24HHeurbklye5Bq6X9g+00wFvZrkpiQB7uzZR5I0IsMcsvlrwAxwVZKTwD8F9gIHktwNvAzcDlBVR5McAJ4H5oB7q+qN5q7u4ewhm483F0nSCC0a+lX1yfOsuvk82+8B9vSpHwZuWFLrJEnLyl/kSlKLGPqS1CKGviS1iAcrayw8TFMaD0Nf0tvW+yZ+Yu+tY2yJFuP0jiS1iKEvSS1i6EtSixj6ktQifpGrkfGIHWn8DH1dVAa9tLIY+lp2Br20cjmnL0kt4khf0rLyh1ormyN9SWoRR/paFs7jS5PB0NcFM+i1GKd6Vh5DX4sy3KVLh3P6ktQijvQljYRTPSuDoa++nNLRxeQbwPiMPPSTbAE+D1wG/GpV7R11G9SfQa+V5Hyvx+V8k2jjm89IQz/JZcCvAD8NnAS+nuRgVT0/ynZcas73wjXENQmW+joddvu2hPhSjXqkvxk4XlXfBkjyCLAVWNGhP8yIY5gX4qDtz7duqY+xYddX2LlpjrsMfLXcxXozWez/10p/s0lVje7Bkr8DbKmqf9jc/hTw16vq5+dttwPY0dz8IPDCvLu6CvjORW7uJLN/BrN/BrN/BpuU/vkrVfW++cVRj/TTp7bgXaeq9gH7znsnyeGqml7Ohl1K7J/B7J/B7J/BJr1/Rn2c/klgfc/tdcCpEbdBklpr1KH/dWBjkmuSvAvYBhwccRskqbVGOr1TVXNJfh74bbqHbH6pqo5ewF2dd+pHgP2zGPtnMPtnsInun5F+kStJGi/PvSNJLWLoS1KLTGzoJ/l0kheSHE3yz8fdnpUoyT9JUkmuGndbVpok/yLJt5L8fpJHk7x33G1aCZJsaf5fHU+ya9ztWUmSrE/yu0mONblz37jbdCEmMvST/CTdX/L+WFVdD/zLMTdpxUmynu7pLl4ed1tWqCeAG6rqx4D/Cewec3vGruc0KT8LXAd8Msl1423VijIH7KyqHwVuAu6dxP6ZyNAH7gH2VtVrAFV1ZsztWYn+FfAL9Pnxm6Cqfqeq5pqbT9H9zUjbvXWalKr6c+DN06QIqKrTVfWNZvlV4BiwdrytWrpJDf1rgb+Z5Okk/y3JT4y7QStJkk8Af1xV3xx3WybEPwAeH3cjVoC1wB/13D7JBIbaKCTZAHwEeHrMTVmyFXs+/SRfA/5yn1W/RLfdV9D9iPUTwIEkH6gWHX+6SP/8IvAzo23RyjOoj6rqsWabX6L7sf3hUbZthRrqNCltl+RHgN8APlNV3xt3e5ZqxYZ+Vf3U+dYluQf4chPyzyT5f3RPgvSno2rfuJ2vf5JsAq4BvpkEutMW30iyuar+ZIRNHLtBryGAJNuBjwM3t2nAMICnSVlEknfSDfyHq+rL427PhZjU6Z3/AnwMIMm1wLuYjLPeXXRVdaSqrq6qDVW1ge5/5B9vW+AvpvljPp8FPlFVPxh3e1YIT5MyQLqjqC8Cx6rql8fdngs1qaH/JeADSf6A7pdN2x2paYn+LfAe4IkkzyX5d+Nu0Lg1X2y/eZqUY8CBCzxNyqXqo8CngI81r5nnkvzcuBu1VJ6GQZJaZFJH+pKkC2DoS1KLGPqS1CKGviS1iKEvSS1i6EtSixj6ktQi/x8iTBQ1MrleqwAAAABJRU5ErkJggg==","text/plain":["
"]},"metadata":{"needs_background":"light"},"output_type":"display_data"}],"source":["diff.abs().replace(np.inf, np.nan).replace(0, np.nan).transform(np.log10).hist(bins=100)"]},{"cell_type":"code","execution_count":40,"metadata":{},"outputs":[],"source":["import matplotlib.pyplot as plt"]},{"cell_type":"code","execution_count":null,"metadata":{},"outputs":[{"data":{"text/plain":["state True\n","report_date True\n","fuel_type_code_pudl True\n","fuel_cost_from_eiaapi True\n","dtype: bool"]},"metadata":{},"output_type":"display_data"}],"source":["# data is all in same order\n","frc_api.drop(columns=['fuel_cost_per_mmbtu', 'fuel_cost_per_unit']).eq(\n"," frc_bulk.drop(columns=['fuel_cost_per_mmbtu', 'bulk_agg_fuel_cost_per_mmbtu'])\n",").all()"]},{"cell_type":"code","execution_count":42,"metadata":{},"outputs":[{"data":{"text/plain":[""]},"execution_count":42,"metadata":{},"output_type":"execute_result"},{"data":{"image/png":"iVBORw0KGgoAAAANSUhEUgAAAXIAAAD4CAYAAADxeG0DAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjUuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8qNh9FAAAACXBIWXMAAAsTAAALEwEAmpwYAAApiElEQVR4nO3deXhc133e8e+ZFYPBDPaN2LmTkriI0GJLsrVZki3ZjBQvsrPVdqLGjRM3TZtYdZs0TZ3YjZ88ceukCqv4sRWrdtNY8iLJ1i5LskVJpERRFDdxJwiS2JfZl3v6BxaDJAguGGJmgPfzPHyIubg49wdg8M6Zc8+9x1hrERGR4uXKdwEiIjI7CnIRkSKnIBcRKXIKchGRIqcgFxEpcp58HLSmpsa2t7fn49AiIkVr69atfdba2tO35yXI29vb2bJlSz4OLSJStIwxh6fbrqEVEZEipyAXESlyCnIRkSKnIBcRKXIKchGRIqcgFxEpcgpyEZEipyAXEZkjjmOJJjM4Tm5vH64gFxGZI/F0lq7BGPF0NqftKshFRC6hqb3wgNdNc2UpAa87p8dQkIuIXEJTe+EulyHo9+BymZweQ0EuIpJjc9ELn0pBLiKSY3PRC59KQS4ikmNz0QufKi+3sRURmc8meuFzdrw5O5KIiFwSCnIRkSKnIBcRKXI5GcQxxhwCRoEskLHWduaiXRERObdcjsbfZK3ty2F7IiIFwXEs8XSWgNd9SacRXiwNrYiInMOlukdKruQqyC3wlDFmqzHmvhy1KSJSEOZ6XviFytXQynXW2m5jTB3wtDFmt7X2xak7jAf8fQCtra05OqyISG5NN4wy1/PCL1ROeuTW2u7x/3uAR4Grp9lnk7W201rbWVtbm4vDiojkzMT9UaLJTEEPo0xn1kFujAkaY0ITHwO3ATtm266IyFxxHEtfJMmR/hhAQQ+jTCcX7xXqgUeNMRPt/R9r7U9z0K6IyCU1MYziOJb+aJLqMv8lv8HVpTDrILfWHgDW5qAWEZE5M9EL748maa4opbUqWLDTC8+lcEfvRUQuoXg6S38kVbS98KkU5CKyIDiOZTSRJpHOUh30E/C6aa0uLdpe+FS6IEhE5r2JYZQd3cNsPtDPQCw1Jws+zBX1yEVk3jr9ZObimiBLa8uoKvXlu7ScUpCLyLyUyTh0DcWIJrO0VhX3ycxz0dCKiMxLA7EUO4+PEPS7Cfo982YYZTrqkYvIvDH18vqqUh+dbVVUlfrmbYBPUI9cRIpeJuPQM5JgNJ6evLze43FRFy7B45n/MTf/v0MRmdccx9I1FGPL4QHimWzRXV6fCxpaEZGiNHGTq1gqw2giw+rGMDVB/4LogZ9OQS4iRSWVytI9Eqc84OVwfwzHWporS6kp88/7sfCzWXgvXSJS1I4Nx/jJ9uMMRJMsqS1jWV1oQYc4qEcuIkVgYhgFIBzwsnJRmKqgn1DAm+fKCoOCXEQKXjSZYcexYfxeF8vqQlzdUb3gTmjOREEuIgVr6iX2fq+LporAvL6w52IpyEWkoKRSWQ4PRgCoKSuhZzTJovIAKxrC8/YS+9lSkItIwXAcy76+CI9tOwYu+NiGlsl54Qrws8tZkBtj3MAW4Ji19q5ctSsiC0c8nSXrWO64ooGAz01TeSk+n8bCzyWXPfIvALuAcA7bFJEFYGJueENZCYtry9QDv0A5mUdujGkG7gQezEV7IjL/pVJZDvSOMjiapGs4xrO7ejgRSehk5kXI1QVBfwv8MeCcbQdjzH3GmC3GmC29vb05OqyIFKvDgxG+u/kwrx/pp7LExy2r6lgUDuS7rKI06yA3xtwF9Fhrt860n7V2k7W201rbWVtbO9vDikiRmri4x7EWr8fQEC6hPOijvaZM4+EXKRdj5NcBHzHGfAgoAcLGmO9Ya389B22LyDwx9SZXA9E0DeEAH7+qjUXhgIZSZmnWQW6tvR+4H8AYcyPw7xXiIjJhIsCjqQzdQ3Gw0FxVSnmpj8oyf77Lmxc0j1xELplMxuHoYJRjgzFKPF5aqgOU+ub3smv5kNMgt9a+ALyQyzZFpPg4jmU0MbZaz87uYarK/CytD1AbKlGAXwLqkYtIzkwE+EA0SddQnJ7hJJctKqepspRQiVchfokoyEUkJ1KpLPv6IvSNJuiPpljTXM7yuhDVC3TVnrmkIBeRWYvF0jy79yTv9oxy68p6VjSEFeBzSD9lEZkVx7G8cWyIn759nDK/h5bqIPXlAYX4HFKPXEQuSiqV5chQFGstfh/ce00bKxtClAd8+S5twVGQi8gFm7jd7JM7jlMV9PL+FfW0tgR1MjNPFOQicl4yGYeBWIqqUh/JrEPWsdy6up6qUh/1ujozrxTkInJOmYzD3pMj7DoxwvVLa6kN6XazhURBLiIzSiQybDk6yJGBCJWlfkrGwzvoV3wUCv0mRGRasViat48P0RdNsuXgILesqueK5gpCJd58lyanUZCLyCkmrs589WA//++1w9SF/dy0qpHO1ipKShQZhUi/FRGZFImm+PmBXvpiCcgafrWzhbaaIEuqQ7pXeAFTkIsIjmPpGYrxf7ce5aW9PXTUBrn3qnbWtlTqwp4ioCAXWcAyGYfDg6NsPzrEvpOjvLS3j5tW1nHbFQ0sqylXiBcJBbnIApXJOOzoHuSfXjnIruMjbGit4Dfe28HtqxoIa8GHoqIgF1mAEokMrx7u55V3e0gk0mxc38ytq+tpqwypF16EFOQiC0gkmuK1I/0MRBO8dmCI2pCPT1y7mKvbqjUjpYjN+jdnjCkBXgT84+39i7X2z2bbrojkztBogp/sPMbe4yO8un+ApqpS7lzXzIbWShrLS9ULL3K5eAlOAjdbayPGGC/wsjHmJ9bazTloW0RmqWcoypcf38GLe/pwu2F9WzUfv7qFGxbXqxc+T8z6t2ittUBk/KF3/J+dbbsiMjuxWJqfHz7JP79ylJ/tHaAx7OHDVzaxcV0zHTVh9cLnkZy8HBtj3MBWYCnwd9baV6fZ5z7gPoDW1tZcHFZEzmIkkuTbr+zn+28cZSiaYUN7mM9c38ENSxvUC5+HcvIbtdZmgXXGmArgUWPM5dbaHaftswnYBNDZ2akeu8glkEpl2dM7xGPbjvPU9qP4XW5+ZUMzn35vBy3VId2pcJ7K6UuztXbIGPMCcAew4xy7i0gOjUSSfHfrIZ58+wTD8RRXdtRy+2WNXL+kjtJS3ehqPsvFrJVaID0e4gHgVuCrs65MRM7Lod4h/v65vQyPJthxfJRkFj5yZQu/874l1IdL1QtfAHLRI28Evj0+Tu4C/tla+1gO2hWRGaRSWX5x4Dj/5Yc7ODSYBeDO1VW8f2UDt61qpCJUkucKZa7kYtbKdmB9DmoRkfO0+3gf/+0HO9nRNcpQdqwH9cHlYf7zh6+gobIs3+XJHNPpa5Eiksk4vN3Vzx/8ny0cHXEAWNsY4FPXtXDnqjbKglrBfiFSkIsUib7hGN/ZfIDHt3dzdMShBPjIlVX88e1rqSkvzXd5kkcKcpECNxJJ8vg7R/nB64fZ1pXA74YbFpfzm9e3ccPiRs0LFwW5SCHrGYry5Z/s4Km3+4g70Bx28fGr2/jNaxbrZKZMUpCLFKCh0QSPvd3FT7d38cqhKF7gfe1BPnfbSjY012rZNTmFglykwAyNJvjLJ7bz4zd7iQONIcOvrm/mt29Yrl64TEtBLlIgugdG+ebL+zjUG+PV/UNkgRuXhvntG5bS2VarsXA5Kz0zRArAvpMDfO6hrbzbn8IF3LK8nKuW1HHPumbNSJFzUpCL5NFIJMmPdhzhmy8c4MBQBoCbl5XzF3ev04U9ct4U5CJ54DiWt4/18l8f3cFb3XEyQEelm09c08K9G5ZoLFwuiIJcZI7tPt7HX/14J+8cG6UvOXZ5/fXtIf7yo2tprSnPd3lShBTkInMkk3F4ds9h/v3DOxkdu7qejnLDPVe18WtXL6YqHMhvgVK0FOQic2Db0RP8yXffZM+AM7ntfR2l/OVHN9BcHc5jZTIfKMhFLqFYLM2Pdx7iy4/sZfweVwSAP/pgG/d2LtdNriQnFOQil0hX/wh/8YO3ePrdESb64YvL4G//VSdrmuvzWpvMLwpykRzrHhjlgefe4Zld/XRHx7Y1heBf37yUe9Z0qBcuOacgF8mhvSf6+eyDmzkaGXvsA25aUc6XPnyFZqTIJZOLNTtbgIeABsABNllrvz7bdkWKSc9QlAdf2sX33zhJf3xsW2MQ7v+VVdy2rFWX18sllYtnVwb4I2vtG8aYELDVGPO0tXZnDtoWKXg/2rGHP/jOvsnHFR646bIq/vADq9ULlzmRizU7jwPHxz8eNcbsApoABbnMaz1DUb7+1DYefmNocluZC773e9ewsrEmf4XJgpPT93vGmHbGFmJ+dZrP3QfcB9Da2prLw4rMuS2Hu/k333qTnvgvtzWUwDc+vV4hLnMuZ0FujCkDvg/8W2vtyOmft9ZuAjYBdHZ22lwdV2QudQ+M8qf/8jLPHPjlhT3lbvjSPUv4lSuWacEHyYucBLkxxstYiD9srX0kF22KFJKh0QRffeLnfPfNxCnbf21DBb9/61rdqVDyKhezVgzwj8Aua+3fzL4kkcKy+WAXv/kPb5E6bftXPtHGvesvz0tNIlPlokd+HfAbwNvGmG3j2/6jtfaJHLQtkjcDI3H+9pltPPTawCnbr2+Cv/zk9ZqRIgUjF7NWXgZMDmoRKRi/OHCU39m0nehp2//7ve18fN1lealJ5Gx0lYLIFF39I/zu37/EjtMS/J41ZXzxQ53UVQTzU5jIDBTkIuP+/sVX+O9PDJyx/cFPr+LWFYvzUJHI+VGQy4K3s7uXX/kfr51xMrPVDQ/+/rUsb6jOS10i50tBLgva1376LN94IXHG9r/51GLuWbMqDxWJXDgFuSxIL+47zG8+uOOM7RXAv/zhe1haXzXnNYlcLAW5LDh3ffFxzoxw+NonO/jo2tVzXo/IbCnIZd5zHMtwPMWNf/EMw9N8Pgg8+R9u0NqZUrQU5DKvjUSS/P1TW3jgtaFpP/+56/380R03k8w6OI7F5dIlEVJ8FOQyLw2NJvjelv185clDZ93nnz6znnBJgL09o1gL7TVBgn79SUjx0bNW5pWRSJLPbXqGn/fMvN+9V9bwwPMHuPuqRfjcPtY0VRDw6s6FUpwU5DJvdPWP8L6/fglnhn2+dFcTXleAbzy9j744NJb7+fytq2iuKNWwihQtBbkUJcexxNNZekciPPzKUXyuAf7u5ciMX/PVe1ZzbDDOi3uOk0rDiho/92xoprUqqBCXoqYgl6IUT2fpGozx3VcO8a1Xj82471Wl8GsfuZw3Dg1xsDfC8voAt1/RyM2r61hSXa4Ql6KnIJei5Lbw/7a+xbdePWMxqlOsqYZfvWElo4k0jnVoLPdz95WtXNVeg8fjmqNqRS4tBbkUFcexPLFzP5//zp4Z9/MCn7qyknd607yw6wTN1SEawwGu7qhhbXOlQlzmFQW5FI2BkThf+v4L/GTPTKcz4Yt3NrK6YRE/3HKE/ScHcTeGuKYqwPVL6+ioCc1ZiE+M40/Mhpn4WEM5kmu5WrPzm8BdQI+1VmtfSU5lMg6vd53gkw+8OeN+jcD/+t1r2HpkkG/+7AAtVT4+fX0HK1tChH0BGspL5yTEHccyGk/TF03QH03RUlGKcRkGo2laq0s1V11yLlfPqG8B3wAeylF7sgA5jiWazAAQ8LqJp7NkHIev/OAZ/u/bM3/tTa3w2ds6yTqw+/gwkWSG5qpq3rO0DoOhPlwyGaBTjxP0e3AcS+9oAmugrqzkosI+k3HojyaxWYe9fVEymSzdI3E8xkUilaWsxEtzZems5qpP9PD9bhfJrKPevUzKSZBba180xrTnoi1ZmBzH0hdJcqQ/SjLjUBP08vSOQ3ztuZlnpAC0+OC6K5bQO5Iikc6QyVo+sm4RGzqqONQb57KmMDVl/snQG42n2XpkkHCJl2V1ZRwbjvPW4QFcbhc3raynLlxywfUfG4zyyBvdlJW42H5kiOuX19BRHeTYUIKmygB1oQBBv+eig3fi59MfSVFd5mMwlqK5Ur17GaNngVwyU8eIzxZgE/tkMg5dAzEqg152nhzioZ9t58l9Z94nfKoPNMMnblnL0YEETRUBhmJp1jVXsrQ2RBbLovIgtWWlVJX6AIgmM/jdLvpjSQZjSVoqSzg6FGP/yQiXt1RQXeaf3Pd8v7/RRJpYKsP+/gh7T45y+xU1LG0IcbA3QlWZn6s7qmmumN2QzkSI944mqQ2N1Rj0e3QlqkyasyA3xtwH3AfQ2to6V4eVPIomM+zvibCkroxQwDttsMfTWY70x/B7Damsw7GRKL//8MzjKKv8cM36Rq5d2sCicID9J+Mc6Y+xvqWKJbUhklmH/T0RPC4X5QHP2AtF0mF/b4TKoJeRaJr26jJKvG729AyzrCHE8rrzPwk6MQbeG01wsC9K33jA3r2hCWMcDvX1c9Oqejrbqikv9c2qFx5PZ3EcS3907BgT7yw060ammrMgt9ZuAjYBdHZ22rk6ruTZlAybuIhnYqw4msyQyTpYk+W1Q4M8/9ounjw6c3P/+qoKlrQ24XO7GIwmiSRTJNMZvB5D0smStpag38OyhhB+t4sTI3H2nYgQKnXTM5zg+GiMkNfLUDJNY0UJV7VVUXIBY82ZjEPXUIxDfaPsORmhpSrAqsYQxhiSGYeM4/Crna1c2VRBaan3on9sjmPpGU3QPZhgSV2Q1qqgxsTlrDS0Ijlzeo876PewrD40OQQQ8LonQzyezvLuyQiDsQR7Twzy1acOztj2ygrD3e9dwoamajYfGqDM76a5KsBbR4bYc2KY+ooAN/jGev0wdhJzNJ7mnWMj7D4xjN/twuNxkUw5hAJprmyroq6shHg6y/7eCEtqx941zCQWS/PSgV66BuM0hH0YwO9xcbQ/RjJtubKjktqykoseC5/684uns3QPxUlmsriM0Vi4zChX0w+/C9wI1BhjuoA/s9b+Yy7aluIxtcc9EWYTs0KiyQwBr3sykAJeN+Ggi28//w4/3JOcsd3fu6GedR2NeN1uyoJeKoJe6kM+9hwfYX/PKGuaK7lueS1NVaV0DcUoD3gxgJO1+DywpC6IwTAQS3JlWzX1ZSVUB/1j9yC3Fs7x/tBxLMOxFM/tOcEjW4/SXhUkngzg8bg52BPB7XZzVUc1rZXBWQ15RJOZyReVoN/DsroQgEJczilXs1Y+mYt2pLhN7XHD2DDEQCyF3+PixEiC5spSvMZwcGCUbd09/Mk/vztje2HgixtXsqG9muFYlmgqTWXAy8r6MO+eHGZfb4Q1LZXcuWYRjeWlRJMZXMby8rs9jCYztFUFKfG4aa8Jsv9EjPUt1ZNj4dFkhq7BGIvKAyxrCE174jCTcTg+GOPgUITdx0fZ3zNKdamPqxdXkcg4JFOWdW0V1IZKqL3IaYtnGH9RcbnMOd8hiEzQS73kzNQe+HA0xdGhGIf7o3S2VdEQLmEomuToYIwvP/Iab/fP3FZno4ff/sBKonGDyxiqQ16ciEPXQJyDfVEqgn4+dXU7i6vLqAz5GY6n+MW+PkYTSfaeHCHs9zFY4mb38Qh3r2vmmiXVkzNSekYSVEyZ1322YZBjQ1EefGk/h3ojlPjcXNFcTmNbAGMMdeUB2quCtMyyFz7VxNi+ZqPIhVKQS05NnAw83B/h6ECcFQ1hgh4333lzN1/58TnOZALLy+D9a1twu90sqa4gay0v7D1B90CM9YuryGRcrF4UHhu+8XkYjCf5xbs9HB2KsOdEhBX1IWrKAuw9OYrX7eKmlQ0srQ1RUjL2VO8ZSfDGkUGubK2cdr54IpHh9SO9vN01jMdn2X1skFDQS2PYT9DvYV9PjJoyH1c0V9JWXZbTk48TL4QiF0rPmgXqfOZ4X4hUKsvhgQi90SSH+qN0VAXx+1w8+ta7fGzzObrf467vCHJtRw0+n5c1LVW4sHzv1QM8tv04sQRE0w7/6rqlLK0pI5rOsP3oIE+908Wzu/uw6QTloRBtdSX0jSS5eWU961sraSwfWzBiYoy+qtTHla2V084XP9I3zF/8aDsv7x0hDiytdNNcXcZQNMOBnj42hgJ84LI6slmoD5ac189tYnipqtSnKYNyySjIF6jTT0zORiqV5eUDPTzxVjelPjdL68p49cBJHnvjCPuGzv31a2ogHA5yqD9JU0WC9YuDvNXVy/M7etlyNEIGuKo5yL1XtbK0powTkThP7ejmrUODHO4bZWA0iQWODI9y3dIaPnp1G4urgpO98Inx8InvdbqeeFf/CP/moS3s6PnlRUhjtwiwNJYH+MDljWxc00TCsTy/u5f68gDt5zG9sD+aZPOBfq7uqCIc8F3QC2euX2xl/lKQL1Cnn5i8GI5j6Y3EeXlPL1sPneTYYAKfSfDQORZ6OF1vFI5Ho5T5PVQHDc9sP8pz747iABUe2NBRzh/cvoLF1RWMptI8uaObF3adJJnNUBtyc2QYhpOworaU2y6rY/Wi8lNmysz0vY5EkvxsXw+PbT00GeIlQH0prG6poqk6yLK6MLdd1khl0E8qleWWVXUsCgem/V5O74H73C7CJWPnDS70hTOXL7Yyv+nZsUDNZjx2Yjre4YEIP9vTw2v7+mgoN7xyaPSi2ktn4ZrFldRXlvDc7kF2940FamsINm5o5q51rcQTDi/vO0nY52PnsQEO9o4QjUJ5EDJpuGl5JX94+3JW1lcBZ4bgdN+r41he3NfL15/ZSzabZnV9gJV1AS5vqyaWyHJtRy0NVQFCPu/kDBKfz017TdlZv5eBWIothwdY3RieXEKuxOeh1O+hIui/oBfOXLzYysKgIJcLFk1m2Hygj837e3hjVzdvz7xIz4wM0FzjZyiRoe/IMPFkgkWlkHbgulUNrGurJ5m2PLLtGOlMls7FVbhx0ReBDHDz0mrWNlVz15omaspLJ9s9WwhmMg7HBqJ0jcTwe9xYt8MHVtXhd1tuXNVEaYmHgUiaurD/ouaFV5X6WN0YJprIEk+PXcwT8LjxuFwX/MKpk59yvvQskXOaGKt1Wzg2EmMkkeL1g118+9W+i27TBzSEoT5cwmVNFbx5ZIhU2qG6IshlTeWsa67kssZy4lmHeDJDOODGa9wMR5J86MpGUllLVZmP333/Mhoqz+whnx6CEze4Otwf4fG3utnVPUJLVZD3Lq/ho1e1MRhL01hRSonHTSzhnNe88NNvhwuQzDo0V5RO3mYW0JRCueSMtXN/25POzk67ZcuWOT+uXJyJk4XpTJZ/2ryfJ7ac5OIGUcYY4O7Lg2SybiwustZQVurBZVxs6KiiKlhCfzRJ0Oth25Eh/G43NeU+3jw8xO7jI3zu5mW8d0ntGTNBUqksh/sjpHHwGxdDyQylXhcVJT7e7R9ld/cIjnXIZg3tNaW01QQpD/go83lJZh0Go2maKwO4XOacJxgn7mG+r2eUgM/DioYwgMa05ZIyxmy11naevl3PNplRIpFh97EhdvcN8PKObn7ybnTWbVrgkR2/bMc1vq0MePHtk4ykxrataC6hvS5Mc3mQJXUh0hnLhvZKblxaS0Xo1Jknh3qH+NpPd3P4WD/RDHjc4HW7WVThI+3yMzQaJ1Tq59ffu5jLmypwAe/2Rjg5kqSy1M+yuhChEu95BfjEHQmPDcYBF4sqApM97onhHM04kbmkIJdppVJZnt13hK89spP9kUt7rIkVOEeB0dQvt+/pTpDFw8rGKlbXlbOivnza+diZjMP/fGYPj71z6nz1Ck+WjMlwcDBOU9jFx9/TwQ1L6khj+f4bR3llXx83rqzl8kUV532jq4mTqIvKAyyr/+W9UCa+dqInfvqUR5FLSc8wAZi8rH4gnqSsxMPWw/38p+/vZGDmtR0umRo/fPqWNiq9Qa5bUkNlyH/WoB2IpQj6PFR4weUamwVjLFy3vIr26gAv7hvixuWV3L6qidJSLyeH47iN5T1Lq7l+WS21ofO7uAdOPYnqcpmz9rw140TmkoJ8gUulsrx06Bjf/0UXgaCLsK+ElqoyfvFu75yH+MoKuG51A36Pl5ryEu64rJny87iIpqrUx6dvWMb1qxoIeMbWs4ynspQHfbRVBrljbYZFoQCVIT8A1UE/t1/eRMDjJhTwXtDQx+knUc8211szTmQu6Zm2wB0divLVH+5lb3+SUjd8/KpmugZ7eGbv4JzVsLbWzY1rWvnI2iY6asI4jr2gy9o9HhcddWE66sKT207pKVebM/ZvKJ/+gp4LpZ63FAIF+QI1MR1vOJ6ivdxwuB+WVMC3NnfNaR1NZS4+d/tqrlvSODnW7HKZi1oAeaq56hGr5y2FQM/ABSqezrKje5g3D/cQCJaxoibN9r7sRbXVXApOBiIpOP3aoBqgvhreu6qGaMLg9RgWVQXJprM4eFjXWkFnS+3kfVFE5MLpr2eBSSQyHBiI0l5RyuKaID/b6fDC3j6GZ16k5xTf/sx64ilYUhPkxGiSAz2j+L1uYqkM71teR3tV2djqO46lezh+yvJumo4nknu5WurtDuDrgBt40Fr7lVy0K7kTiabY1j1MwGt46p0eNq5bhHFn+fmOI+cd4j/9wjWsbKyZHH/2u13UhQMsrS3DZ1yksdQE/Xg8Y+tjOo49ZYaHhiBELo1Z/2UZY9zA3wEfALqA140xP7LW7pxt25IbqVSWJ3ed4PHtx/n197Sycd0i/B7DV57YyzvneZ+UQ1+5c/LjqaFc7vFRPs29vU/fT0QunVz8lV0N7LPWHgAwxnwP2AgoyAtE90icnuEYd65p5OrWaozHxSv7+4jHz32V5tQAF5HClIslS5qAqWt4dY1vO4Ux5j5jzBZjzJbe3t4cHFZmksk49IwkyGQcFoUDfHBtMx9e00RZcGxe9oaWSl4+cvYxlfUehbhIschFj3y6M1dn3InLWrsJ2ARjN83KwXFlBgOx1ClrU069h/bO473c9T9fP+vXvvPnt2tetEgRyUWQdwEtUx43A905aFdm4WxrU37szx/n9fj0X6MeuEhxysXQyuvAMmNMhzHGB9wL/CgH7coFSKWyHOqLkEqNzQX3eFzUhX95T+0DPYO0f1EhLjIfzbpHbq3NGGM+DzzJ2PTDb1pr35l1ZXJBukfiPLurh1tW1Z2xFNkDL7/KVx6bfhEIBbhI8cvJ3DBr7RPAE7loSy7OonDgjEWBI9EUf/roszyywzlj/89eA//5boW4yHygSb7zxOmLAv/Dz1/jr3585uygm+vgm/9OAS4ynyjI55lEIsN339x7RohXAd/6fCdrmuvzU5iIXDIK8nlkaDTBf3l8Kz/YNnTK9gd+awV3rFqan6JE5JJTkM8DsViaH+44yDdfOMS7A+nJ7R9cFuD+jRtorSnPY3UicqkpyIvcSCTJQ5sP8sBz+4k4UOEHr4E///hyPrR6Wb7LE5E5oCAvUpmMw+HBUR55vYtnd56gpaYEv9tw/4dXclV7o24VK7KAKMiL0JG+Yf73i4fIpuO8dSzC+1fWc8tlDaxprNQCDSILkP7qi0jPUJQfbDvGG4f7eW73ABvX1/OF21ZzbXs14TJ/vssTkTxRkBeJSDTF15/dyw/f6Ka9wsWH1zbw+zcv1YlMEVGQF7pYLM2LB0/w0zePsa8vwtqWCj7+3mZuX96kYRQRARTkBSuRyPDivuP8y5Zj7D02QPeo5c41dfzBLStoqS6bvBmWiIiCvAD1Dcf4hxf38YOtR+lNwI1Ly9l4ZQWfuKaNRVWhfJcnIgVGQV5AHMfSPRDhy49t59ndQwTccPPyKv70w6tor63Id3kiUqAU5AXCcSwnR+I8/OphfrF/CJcLNm5o4I9vv4Ky4PSLG4uIgIK8YMTTWQ70RqkIefmd97cRLPGxcU2LQlxEzklBnkexWJp3To7QXllKZZmfy5vKWVpXRnXQr5OZInLelBZ5tKtnlIc3H+Kl/X0ksw7lpT7qywMKcRG5ILNKDGPMx4wx7xhjHGNMZ66Kms8SiQzbDg7wxpF+OqoC/Nq17dywpEar1ovIRZvt0MoO4B7gH3JQy7w2NJrg+d09RJ00r+8bwONx89kblnBVR3W+SxORIjerILfW7gIwRnfam8mJwQh/9cQuth0ZoKM2zMa1TbTVB1lcFcx3aSIyD8zZyU5jzH3AfQCtra1zddi8SiQybDs6wEObD/L8O33csrqGT13fzpWLqnV5vYjkzDnTxBjzDNAwzae+ZK394fkeyFq7CdgE0NnZac+7wiJ2YCDK/37pAPtODnPbFXXc/6FVNFSWnfsLRUQuwDmD3Fp761wUMp84jiWeztJeUcrv3LCYY6NxbllWT0WoJN+licg8pPf3l0A8naVrMEZzZSnXLqvLdzkiMs/Ndvrh3caYLuA9wOPGmCdzU1ZxC3jdNFeWakqhiMyJ2c5aeRR4NEe1zBsulyHo15sdEZkbuoRQRKTIKchFRIqcglxEpMgpyM+T41iiyQyOsyCmwItIEVGQn6eJKYXxdDbfpYiInEJBfp40pVBECpXmyJ0nTSkUkUKlHrmISJFTkIuIFDkFuYhIkVOQi4gUOQW5iEiRW1BBrot6RGQ+WlBBrot6RGQ+WlBBrot6RGQ+WlBXuOiiHhGZjxZUj1xEZD6a7VJvf22M2W2M2W6MedQYU5Gjui6aTmiKyEIz2x7508Dl1to1wF7g/tmXNDs6oSkiC82sgtxa+5S1NjP+cDPQPPuSZkcnNEVkocnlGPlngJ+c7ZPGmPuMMVuMMVt6e3tzeNhTTZzQdLnMJTuGiEghOWeQG2OeMcbsmObfxin7fAnIAA+frR1r7SZrbae1trO2tnZWRWscXETkl845F89ae+tMnzfG/BZwF3CLtXZOknViHLy5slTTCUVkwZtVChpj7gD+BHi/tTaWm5LOznEs8XQWv9ulcXARkXGzHSP/BhACnjbGbDPGPJCDms5qoieezDoaBxcRGTerHrm1dmmuCjkfmpEiInKmohpg1iX2IiJn0iX6IiJFTkEuIlLkFOQiIkVOQS4iUuQU5CIiRU5BLiJS5BTkIiJFzszR7VFOPagxvcDhHDdbA/TluM1cUn0Xr5BrA9U3G4VcGxRefW3W2jPuOpiXIL8UjDFbrLWd+a7jbFTfxSvk2kD1zUYh1waFX98EDa2IiBQ5BbmISJGbT0G+Kd8FnIPqu3iFXBuovtko5Nqg8OsD5tEYuYjIQjWfeuQiIguSglxEpMjNqyA3xvy1MWa3MWa7MeZRY0xFvmuayhjzMWPMO8YYxxhTEFOajDF3GGP2GGP2GWO+mO96pjLGfNMY02OM2ZHvWqZjjGkxxjxvjNk1/nv9Qr5rmmCMKTHGvGaMeWu8tj/Pd03TMca4jTFvGmMey3ctpzPGHDLGvD2++tmWfNczk3kV5MDTwOXW2jXAXuD+PNdzuh3APcCL+S4Exv6IgL8DPgisBj5pjFmd36pO8S3gjnwXMYMM8EfW2lXAtcDvFdDPLwncbK1dC6wD7jDGXJvfkqb1BWBXvouYwU3W2nWFPpd8XgW5tfYpa21m/OFmoDmf9ZzOWrvLWrsn33VMcTWwz1p7wFqbAr4HbMxzTZOstS8CA/mu42ystcettW+MfzzKWCA15beqMXZMZPyhd/xfQc1sMMY0A3cCD+a7lmI3r4L8NJ8BfpLvIgpcE3B0yuMuCiSIio0xph1YD7ya51ImjQ9bbAN6gKettQVT27i/Bf4YcPJcx9lY4CljzFZjzH35LmYmRbcApjHmGaBhmk99yVr7w/F9vsTY296H57K28WOfs74CYqbZVlC9tmJgjCkDvg/8W2vtSL7rmWCtzQLrxs8VPWqMudxaWxDnG4wxdwE91tqtxpgb81zO2Vxnre02xtQBTxtjdo+/Syw4RRfk1tpbZ/q8Mea3gLuAW2weJsmfq74C0wW0THncDHTnqZaiZIzxMhbiD1trH8l3PdOx1g4ZY15g7HxDQQQ5cB3wEWPMh4ASIGyM+Y619tfzXNcka233+P89xphHGRuKLMggn1dDK8aYO4A/AT5irY3lu54i8DqwzBjTYYzxAfcCP8pzTUXDGGOAfwR2WWv/Jt/1TGWMqZ2YtWWMCQC3ArvzWtQU1tr7rbXN1tp2xp53zxVSiBtjgsaY0MTHwG0UzovgGeZVkAPfAEKMvQ3aZox5IN8FTWWMudsY0wW8B3jcGPNkPusZPzH8eeBJxk7U/bO19p181jSVMea7wCvACmNMlzHms/mu6TTXAb8B3Dz+fNs23sMsBI3A88aY7Yy9YD9trS24KX4FrB542RjzFvAa8Li19qd5rumsdIm+iEiRm289chGRBUdBLiJS5BTkIiJFTkEuIlLkFOQiIkVOQS4iUuQU5CIiRe7/A4JcVhLWP4FuAAAAAElFTkSuQmCC","text/plain":["
"]},"metadata":{"needs_background":"light"},"output_type":"display_data"}],"source":["plt.scatter(frc_api['fuel_cost_per_mmbtu'].replace(0, np.nan).transform(np.log10),\n","frc_bulk['fuel_cost_per_mmbtu'].replace(0, np.nan).transform(np.log10),\n","s=1, alpha=0.1)"]}],"metadata":{"kernelspec":{"display_name":"Python 3.10.5 ('pudl-dev')","language":"python","name":"python3"},"language_info":{"codemirror_mode":{"name":"ipython","version":3},"file_extension":".py","mimetype":"text/x-python","name":"python","nbconvert_exporter":"python","pygments_lexer":"ipython3","version":"3.10.5"},"orig_nbformat":4,"vscode":{"interpreter":{"hash":"adf7f0b18919f80aa48732f306897016c591ffeed72c8be7c2b511e7f25e6b58"}}},"nbformat":4,"nbformat_minor":2} +{"cells":[{"cell_type":"code","execution_count":1,"metadata":{},"outputs":[],"source":["%load_ext autoreload"]},{"cell_type":"code","execution_count":2,"metadata":{},"outputs":[],"source":["%autoreload 2"]},{"attachments":{},"cell_type":"markdown","metadata":{},"source":["import pudl\n","import pandas as pd\n","import numpy as np\n","import sqlalchemy as sa\n","import pudl.output.eia923 as eia\n","from pudl.workspace.setup import PudlPaths\n","pudl_engine = sa.create_engine(PudlPaths().pudl_db)\n"]},{"cell_type":"code","execution_count":12,"metadata":{},"outputs":[],"source":["api_df = eia.get_fuel_cost_avg_eiaapi(eia.FUEL_COST_CATEGORIES_EIAAPI) # coal, oil, gas"]},{"cell_type":"code","execution_count":43,"metadata":{},"outputs":[],"source":["bulk_df = eia.get_fuel_cost_avg_bulk_elec(pudl_engine=pudl_engine)"]},{"cell_type":"code","execution_count":12,"metadata":{},"outputs":[{"data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
statereport_datebulk_agg_fuel_cost_per_mmbtufuel_type_code_pudl
0AK2012-01-010.0coal
1AK2012-02-010.0coal
2AK2012-03-010.0coal
3AK2012-04-010.0coal
4AK2012-05-010.0coal
\n","
"],"text/plain":[" state report_date bulk_agg_fuel_cost_per_mmbtu fuel_type_code_pudl\n","0 AK 2012-01-01 0.0 coal\n","1 AK 2012-02-01 0.0 coal\n","2 AK 2012-03-01 0.0 coal\n","3 AK 2012-04-01 0.0 coal\n","4 AK 2012-05-01 0.0 coal"]},"execution_count":12,"metadata":{},"output_type":"execute_result"}],"source":["bulk_df.head()"]},{"cell_type":"code","execution_count":13,"metadata":{},"outputs":[{"data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
report_datefuel_cost_per_unitstatefuel_type_code_pudl
02022-07-0151.83AKcoal
12022-06-0159.16AKcoal
22022-05-0153.04AKcoal
32022-04-0151.69AKcoal
42022-03-0158.55AKcoal
\n","
"],"text/plain":[" report_date fuel_cost_per_unit state fuel_type_code_pudl\n","0 2022-07-01 51.83 AK coal\n","1 2022-06-01 59.16 AK coal\n","2 2022-05-01 53.04 AK coal\n","3 2022-04-01 51.69 AK coal\n","4 2022-03-01 58.55 AK coal"]},"execution_count":13,"metadata":{},"output_type":"execute_result"}],"source":["api_df.drop(columns=['name', 'series_id', 'units']).head()"]},{"cell_type":"code","execution_count":14,"metadata":{},"outputs":[{"data":{"text/plain":["((18501, 4), (30804, 7))"]},"execution_count":14,"metadata":{},"output_type":"execute_result"}],"source":["bulk_df.shape, api_df.shape"]},{"cell_type":"code","execution_count":15,"metadata":{},"outputs":[{"name":"stdout","output_type":"stream","text":["\n","RangeIndex: 18501 entries, 0 to 18500\n","Data columns (total 4 columns):\n"," # Column Non-Null Count Dtype \n","--- ------ -------------- ----- \n"," 0 state 18501 non-null object \n"," 1 report_date 18501 non-null datetime64[ns]\n"," 2 bulk_agg_fuel_cost_per_mmbtu 18501 non-null float64 \n"," 3 fuel_type_code_pudl 18501 non-null object \n","dtypes: datetime64[ns](1), float64(1), object(2)\n","memory usage: 578.3+ KB\n"]}],"source":["bulk_df.info()"]},{"cell_type":"code","execution_count":16,"metadata":{},"outputs":[{"name":"stdout","output_type":"stream","text":["\n","Int64Index: 30804 entries, 0 to 10499\n","Data columns (total 7 columns):\n"," # Column Non-Null Count Dtype \n","--- ------ -------------- ----- \n"," 0 report_date 30804 non-null datetime64[ns]\n"," 1 fuel_cost_per_unit 18615 non-null float64 \n"," 2 state 30804 non-null object \n"," 3 units 30804 non-null object \n"," 4 series_id 30804 non-null object \n"," 5 name 30804 non-null object \n"," 6 fuel_type_code_pudl 30804 non-null object \n","dtypes: datetime64[ns](1), float64(1), object(5)\n","memory usage: 1.9+ MB\n"]}],"source":["api_df.info()"]},{"cell_type":"code","execution_count":19,"metadata":{},"outputs":[],"source":["keep_cols = ['state', 'report_date',\n","'fuel_cost_per_mmbtu', 'fuel_type_code_pudl', 'fuel_cost_from_eiaapi']"]},{"cell_type":"code","execution_count":20,"metadata":{},"outputs":[],"source":["frc_api = eia.fuel_receipts_costs_eia923(\n"," pudl_engine=pudl_engine,\n"," fill=True\n",")[keep_cols + ['fuel_cost_per_unit']]"]},{"cell_type":"code","execution_count":6,"metadata":{},"outputs":[],"source":["frc_bulk = eia.fuel_receipts_costs_eia923(\n"," pudl_engine=pudl_engine,\n"," fill=True\n",")[keep_cols + ['bulk_agg_fuel_cost_per_mmbtu']]"]},{"cell_type":"code","execution_count":21,"metadata":{},"outputs":[{"data":{"text/plain":["((560374, 6), (560374, 6))"]},"execution_count":21,"metadata":{},"output_type":"execute_result"}],"source":["frc_api.shape, frc_bulk.shape"]},{"cell_type":"code","execution_count":26,"metadata":{},"outputs":[],"source":["mismatch = frc_api['fuel_cost_from_eiaapi'] ^ frc_bulk['fuel_cost_from_eiaapi']"]},{"cell_type":"code","execution_count":27,"metadata":{},"outputs":[{"data":{"text/plain":["sum 0.0\n","mean 0.0\n","Name: fuel_cost_from_eiaapi, dtype: float64"]},"execution_count":27,"metadata":{},"output_type":"execute_result"}],"source":["mismatch.agg(['sum', 'mean'])"]},{"cell_type":"code","execution_count":21,"metadata":{},"outputs":[{"data":{"text/plain":["136530"]},"execution_count":21,"metadata":{},"output_type":"execute_result"}],"source":["frc_api['fuel_cost_from_eiaapi'].sum()"]},{"cell_type":"code","execution_count":28,"metadata":{},"outputs":[{"data":{"text/plain":["136530"]},"execution_count":28,"metadata":{},"output_type":"execute_result"}],"source":["frc_bulk['fuel_cost_from_eiaapi'].sum()"]},{"cell_type":"code","execution_count":30,"metadata":{},"outputs":[],"source":["diff = frc_api['fuel_cost_per_mmbtu'].sub(frc_bulk['fuel_cost_per_mmbtu'])"]},{"cell_type":"code","execution_count":32,"metadata":{},"outputs":[{"data":{"text/plain":["sum 186016.00000\n","mean 0.33195\n","Name: fuel_cost_per_mmbtu, dtype: float64"]},"execution_count":32,"metadata":{},"output_type":"execute_result"}],"source":["diff.ne(0).agg(['sum', 'mean'])"]},{"cell_type":"code","execution_count":35,"metadata":{},"outputs":[{"data":{"text/plain":["sum 133687.000000\n","mean 0.238567\n","Name: fuel_cost_per_mmbtu, dtype: float64"]},"execution_count":35,"metadata":{},"output_type":"execute_result"}],"source":["diff.abs().gt(1e-3).agg(['sum', 'mean'])"]},{"cell_type":"code","execution_count":34,"metadata":{},"outputs":[{"data":{"text/plain":["sum 136530.000000\n","mean 0.243641\n","Name: fuel_cost_from_eiaapi, dtype: float64"]},"execution_count":34,"metadata":{},"output_type":"execute_result"}],"source":["frc_bulk['fuel_cost_from_eiaapi'].agg(['sum', 'mean'])"]},{"cell_type":"code","execution_count":37,"metadata":{},"outputs":[{"data":{"text/plain":[""]},"execution_count":37,"metadata":{},"output_type":"execute_result"},{"data":{"image/png":"iVBORw0KGgoAAAANSUhEUgAAAX0AAAD4CAYAAAAAczaOAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjUuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8qNh9FAAAACXBIWXMAAAsTAAALEwEAmpwYAAAVcElEQVR4nO3db4xc53me8euObCusY9ZSZW1pkihllHIjibEdbVgVRouNlURMZJhCURU0VItqVbAQFFcGWMRk8qHoBwJEW6e12lgtEbuiUDUqkVgVEVtJZDbTIoD+mHLlMBStirUYhSEjxm5Ta21A0apPP8yRONwdzs5Sy5kdnusHDObMc86Zeefl8J533jlzNlWFJKkdfmjcDZAkjY6hL0ktYuhLUosY+pLUIoa+JLXIO8bdgMVcddVVtWHDhnNq3//+93n3u989ngZNAPtnMPtnMPtnsEnpn2efffY7VfW++fUVH/obNmzg8OHD59Q6nQ4zMzPjadAEsH8Gs38Gs38Gm5T+SfKH/epO70hSixj6ktQihr4ktciioZ/kg0me67l8L8lnklyZ5IkkLzbXV/TsszvJ8SQvJLmlp35jkiPNuvuT5GI9MUnSQouGflW9UFUfrqoPAzcCPwAeBXYBh6pqI3CouU2S64BtwPXAFuALSS5r7u4BYAewsblsWdZnI0kaaKnTOzcD/6uq/hDYCuxv6vuB25rlrcAjVfVaVb0EHAc2J1kDrK6qJ6t7lreHevaRJI3AUg/Z3Ab8WrM8VVWnAarqdJKrm/pa4KmefU42tdeb5fn1BZLsoPuJgKmpKTqdzjnrZ2dnF9R0lv0zmP0zmP0z2KT3z9Chn+RdwCeA3Ytt2qdWA+oLi1X7gH0A09PTNf+Y2Ek5TnZc7J/B7J/B7J/BJr1/ljK987PAN6rqleb2K82UDc31maZ+Eljfs9864FRTX9enLkkakaWE/ic5O7UDcBDY3ixvBx7rqW9LcnmSa+h+YftMMxX0apKbmqN27uzZR5poG3Z95a2LtJINNb2T5C8APw38o57yXuBAkruBl4HbAarqaJIDwPPAHHBvVb3R7HMP8CCwCni8uUiSRmSo0K+qHwB/aV7tu3SP5um3/R5gT5/6YeCGpTdTkrQc/EWuJLXIij/LprRSOX+vSeRIX5JaxJG+tMx6PwGc2HvrGFsiLeRIX5JaxNCXpBYx9CWpRQx9SWoRQ1+SWsTQl6QWMfQlqUUMfUlqEUNfklrE0JekFjH0JalFDH1JahFDX5JaxNCXpBYx9CWpRQx9SWoRQ1+SWmSo0E/y3iS/nuRbSY4l+RtJrkzyRJIXm+srerbfneR4kheS3NJTvzHJkWbd/UlyMZ6UJKm/YUf6nwd+q6r+GvAh4BiwCzhUVRuBQ81tklwHbAOuB7YAX0hyWXM/DwA7gI3NZcsyPQ9J0hAWDf0kq4G/BXwRoKr+vKr+DNgK7G822w/c1ixvBR6pqteq6iXgOLA5yRpgdVU9WVUFPNSzjyRpBIb5w+gfAP4U+A9JPgQ8C9wHTFXVaYCqOp3k6mb7tcBTPfufbGqvN8vz6wsk2UH3EwFTU1N0Op1z1s/Ozi6o6Sz7Z7Dl6p+dm+YW3WYS/x18/Qw26f0zTOi/A/hx4NNV9XSSz9NM5ZxHv3n6GlBfWKzaB+wDmJ6erpmZmXPWdzod5td0lv0z2HL1z127vrLoNifuePuPM2q+fgab9P4ZZk7/JHCyqp5ubv863TeBV5opG5rrMz3br+/Zfx1wqqmv61OXJI3IoqFfVX8C/FGSDzalm4HngYPA9qa2HXisWT4IbEtyeZJr6H5h+0wzFfRqkpuao3bu7NlHuiRt2PWVty7SSjDM9A7Ap4GHk7wL+Dbw9+m+YRxIcjfwMnA7QFUdTXKA7hvDHHBvVb3R3M89wIPAKuDx5iJJGpGhQr+qngOm+6y6+Tzb7wH29KkfBm5YQvukS0bvaP/E3lvH2BK1mb/IlaQWMfQlqUUMfUlqEUNfklrE0JekFhn2kE1J4PH2mniO9CWpRQx9SWoRQ1+SWsTQl6QWMfQlqUUMfUlqEUNfklrE4/SlMfCMmxoXR/qS1CKO9KVF+CtcXUoc6UtSixj6ktQihr4ktYihL0ktYuhLUosMFfpJTiQ5kuS5JIeb2pVJnkjyYnN9Rc/2u5McT/JCklt66jc293M8yf1JsvxPSZJ0Pks5ZPMnq+o7Pbd3AYeqam+SXc3tzya5DtgGXA+8H/hakmur6g3gAWAH8BTwVWAL8PgyPA9pWXmYpi5Vb2d6Zyuwv1neD9zWU3+kql6rqpeA48DmJGuA1VX1ZFUV8FDPPpKkERh2pF/A7yQp4N9X1T5gqqpOA1TV6SRXN9uupTuSf9PJpvZ6szy/vkCSHXQ/ETA1NUWn0zln/ezs7IKazrJ/Bhumf3ZumhtNY2DF/Vv5+hls0vtn2ND/aFWdaoL9iSTfGrBtv3n6GlBfWOy+qewDmJ6erpmZmXPWdzod5td0lv0z2DD9c9cIp3dO3DEzsscahq+fwSa9f4aa3qmqU831GeBRYDPwSjNlQ3N9ptn8JLC+Z/d1wKmmvq5PXZI0IouGfpJ3J3nPm8vAzwB/ABwEtjebbQcea5YPAtuSXJ7kGmAj8EwzFfRqkpuao3bu7NlHkjQCw0zvTAGPNkdXvgP4T1X1W0m+DhxIcjfwMnA7QFUdTXIAeB6YA+5tjtwBuAd4EFhF96gdj9yRpBFaNPSr6tvAh/rUvwvcfJ599gB7+tQPAzcsvZnSxedhmmoDf5ErSS3i+fSlMfOvaGmUHOlLUosY+pLUIoa+JLWIoS9JLeIXudIK4pe6utgMfekS4puGFuP0jiS1iKEvSS1i6EtSixj6ktQihr4ktYihL0ktYuhLUosY+pLUIoa+JLWIv8iVJpC/vNWFMvTVaiv5TyQa7LoYDH1pwq3kNy6tPM7pS1KLDB36SS5L8j+S/GZz+8okTyR5sbm+omfb3UmOJ3khyS099RuTHGnW3Z8ky/t0JEmDLGV65z7gGLC6ub0LOFRVe5Psam5/Nsl1wDbgeuD9wNeSXFtVbwAPADuAp4CvAluAx5flmUiXMKdwtFyGGuknWQfcCvxqT3krsL9Z3g/c1lN/pKpeq6qXgOPA5iRrgNVV9WRVFfBQzz6SpBEYdqT/r4FfAN7TU5uqqtMAVXU6ydVNfS3dkfybTja115vl+fUFkuyg+4mAqakpOp3OOetnZ2cX1HSW/TNYb//s3DQ33sZcRBf6GvD1M9ik98+ioZ/k48CZqno2ycwQ99lvnr4G1BcWq/YB+wCmp6drZubch+10Osyv6Sz7Z7De/rnrEp42OXHHzAXt5+tnsEnvn2FG+h8FPpHk54AfBlYn+Y/AK0nWNKP8NcCZZvuTwPqe/dcBp5r6uj51SdKILDqnX1W7q2pdVW2g+wXtf62qvwccBLY3m20HHmuWDwLbklye5BpgI/BMMxX0apKbmqN27uzZR5I0Am/nx1l7gQNJ7gZeBm4HqKqjSQ4AzwNzwL3NkTsA9wAPAqvoHrXjkTuSNEJLCv2q6gCdZvm7wM3n2W4PsKdP/TBww1IbKUlaHv4iV5JaxNCXpBYx9CWpRQx9SWoRQ1+SWsTQl6QWMfQlqUUMfUlqEf9colrnyB//30v6RGvSIIa+dInyD6urH6d3JKlFDH1JahFDX5JaxNCXpBYx9CWpRQx9SWoRQ1+SWsTQl6QWMfQlqUUMfUlqEUNfklpk0dBP8sNJnknyzSRHk/yzpn5lkieSvNhcX9Gzz+4kx5O8kOSWnvqNSY406+5PkovztCRJ/QxzwrXXgI9V1WySdwK/l+Rx4G8Dh6pqb5JdwC7gs0muA7YB1wPvB76W5NqqegN4ANgBPAV8FdgCPL7sz0qap/fkYzs3jbEh0pgtOtKvrtnm5jubSwFbgf1NfT9wW7O8FXikql6rqpeA48DmJGuA1VX1ZFUV8FDPPpKkERjq1MpJLgOeBf4q8CtV9XSSqao6DVBVp5Nc3Wy+lu5I/k0nm9rrzfL8er/H20H3EwFTU1N0Op1z1s/Ozi6o6Sz7Z6Gdm+beWp5ade7tNljK68HXz2CT3j9DhX4zNfPhJO8FHk1yw4DN+83T14B6v8fbB+wDmJ6erpmZmXPWdzod5td0lv2z0F3nTO/M8bkj7fpTEifumBl6W18/g016/yzp6J2q+jOgQ3cu/pVmyobm+kyz2Ulgfc9u64BTTX1dn7okaUSGOXrnfc0InySrgJ8CvgUcBLY3m20HHmuWDwLbklye5BpgI/BMMxX0apKbmqN27uzZR5I0AsN8xl0D7G/m9X8IOFBVv5nkSeBAkruBl4HbAarqaJIDwPPAHHBvMz0EcA/wILCK7lE7HrkjSSO0aOhX1e8DH+lT/y5w83n22QPs6VM/DAz6PkCSdBH5i1xJahFDX5JapF3HralVen+FK6nLkb4ktYihL0ktYuhLUosY+pLUIoa+JLWIoS9JLWLoS1KLGPqS1CKGviS1iL/I1SXFX+FKgznSl6QWcaQvtUDvJ6ATe28dY0s0bo70JalFHOlLLeOov90c6UtSixj6ktQihr4ktYihL0ktsmjoJ1mf5HeTHEtyNMl9Tf3KJE8kebG5vqJnn91Jjid5IcktPfUbkxxp1t2fJBfnaUmS+hlmpD8H7KyqHwVuAu5Nch2wCzhUVRuBQ81tmnXbgOuBLcAXklzW3NcDwA5gY3PZsozPRZK0iEVDv6pOV9U3muVXgWPAWmArsL/ZbD9wW7O8FXikql6rqpeA48DmJGuA1VX1ZFUV8FDPPpKkEVjScfpJNgAfAZ4GpqrqNHTfGJJc3Wy2FniqZ7eTTe31Znl+vd/j7KD7iYCpqSk6nc4562dnZxfUdFab+2fnprlFt5laNdx2bdDvddLm188wJr1/hg79JD8C/Abwmar63oDp+H4rakB9YbFqH7APYHp6umZmZs5Z3+l0mF/TWW3un7uGOOHazk1zfO6Iv0sEOHHHzIJam18/w5j0/hnq6J0k76Qb+A9X1Zeb8ivNlA3N9ZmmfhJY37P7OuBUU1/Xpy5JGpFhjt4J8EXgWFX9cs+qg8D2Znk78FhPfVuSy5NcQ/cL22eaqaBXk9zU3OedPftIkkZgmM+4HwU+BRxJ8lxT+0VgL3Agyd3Ay8DtAFV1NMkB4Hm6R/7cW1VvNPvdAzwIrAIeby7S2+I59KXhLRr6VfV79J+PB7j5PPvsAfb0qR8GblhKAyVJy8df5EpSixj6ktQiHremieQ8vnRhHOlLUosY+pLUIoa+JLWIc/pSi/n3ctvHkb4ktYgjfUkD+Wng0mLoS1rAQ2IvXYa+JoZBJL19hr6koTnVM/n8IleSWsTQl6QWMfQlqUUMfUlqEb/I1YrmETuj82Zf79w0h9Fw6XKkL0ktYuhLUosY+pLUIoa+JLXIot/WJPkS8HHgTFXd0NSuBP4zsAE4Afzdqvo/zbrdwN3AG8A/rqrfbuo3Ag8Cq4CvAvdVVS3v09Gk84tb6eIaZqT/ILBlXm0XcKiqNgKHmtskuQ7YBlzf7POFJJc1+zwA7AA2Npf59ylJusgWDf2q+u/A/55X3grsb5b3A7f11B+pqteq6iXgOLA5yRpgdVU92YzuH+rZR5I0Ihd6MO5UVZ0GqKrTSa5u6muBp3q2O9nUXm+W59f7SrKD7qcCpqam6HQ656yfnZ1dUNNZk9w/3WPEL66pVaN5nEk1bP/8m4cfe2t509q/eDGbtKJM8v8vWP5fYKRPrQbU+6qqfcA+gOnp6ZqZmTlnfafTYX5NZ01y/9w1gjn9nZvm+NwRf3x0PhfSPyfumLk4jVmBJvn/F1x46L+SZE0zyl8DnGnqJ4H1PdutA0419XV96pJf3kojdKGHbB4EtjfL24HHeurbklye5Bq6X9g+00wFvZrkpiQB7uzZR5I0IsMcsvlrwAxwVZKTwD8F9gIHktwNvAzcDlBVR5McAJ4H5oB7q+qN5q7u4ewhm483F0nSCC0a+lX1yfOsuvk82+8B9vSpHwZuWFLrJEnLyl/kSlKLGPqS1CKGviS1iAcrayw8TFMaD0Nf0tvW+yZ+Yu+tY2yJFuP0jiS1iKEvSS1i6EtSixj6ktQifpGrkfGIHWn8DH1dVAa9tLIY+lp2Br20cjmnL0kt4khf0rLyh1ormyN9SWoRR/paFs7jS5PB0NcFM+i1GKd6Vh5DX4sy3KVLh3P6ktQijvQljYRTPSuDoa++nNLRxeQbwPiMPPSTbAE+D1wG/GpV7R11G9SfQa+V5Hyvx+V8k2jjm89IQz/JZcCvAD8NnAS+nuRgVT0/ynZcas73wjXENQmW+joddvu2hPhSjXqkvxk4XlXfBkjyCLAVWNGhP8yIY5gX4qDtz7duqY+xYddX2LlpjrsMfLXcxXozWez/10p/s0lVje7Bkr8DbKmqf9jc/hTw16vq5+dttwPY0dz8IPDCvLu6CvjORW7uJLN/BrN/BrN/BpuU/vkrVfW++cVRj/TTp7bgXaeq9gH7znsnyeGqml7Ohl1K7J/B7J/B7J/BJr1/Rn2c/klgfc/tdcCpEbdBklpr1KH/dWBjkmuSvAvYBhwccRskqbVGOr1TVXNJfh74bbqHbH6pqo5ewF2dd+pHgP2zGPtnMPtnsInun5F+kStJGi/PvSNJLWLoS1KLTGzoJ/l0kheSHE3yz8fdnpUoyT9JUkmuGndbVpok/yLJt5L8fpJHk7x33G1aCZJsaf5fHU+ya9ztWUmSrE/yu0mONblz37jbdCEmMvST/CTdX/L+WFVdD/zLMTdpxUmynu7pLl4ed1tWqCeAG6rqx4D/Cewec3vGruc0KT8LXAd8Msl1423VijIH7KyqHwVuAu6dxP6ZyNAH7gH2VtVrAFV1ZsztWYn+FfAL9Pnxm6Cqfqeq5pqbT9H9zUjbvXWalKr6c+DN06QIqKrTVfWNZvlV4BiwdrytWrpJDf1rgb+Z5Okk/y3JT4y7QStJkk8Af1xV3xx3WybEPwAeH3cjVoC1wB/13D7JBIbaKCTZAHwEeHrMTVmyFXs+/SRfA/5yn1W/RLfdV9D9iPUTwIEkH6gWHX+6SP/8IvAzo23RyjOoj6rqsWabX6L7sf3hUbZthRrqNCltl+RHgN8APlNV3xt3e5ZqxYZ+Vf3U+dYluQf4chPyzyT5f3RPgvSno2rfuJ2vf5JsAq4BvpkEutMW30iyuar+ZIRNHLtBryGAJNuBjwM3t2nAMICnSVlEknfSDfyHq+rL427PhZjU6Z3/AnwMIMm1wLuYjLPeXXRVdaSqrq6qDVW1ge5/5B9vW+AvpvljPp8FPlFVPxh3e1YIT5MyQLqjqC8Cx6rql8fdngs1qaH/JeADSf6A7pdN2x2paYn+LfAe4IkkzyX5d+Nu0Lg1X2y/eZqUY8CBCzxNyqXqo8CngI81r5nnkvzcuBu1VJ6GQZJaZFJH+pKkC2DoS1KLGPqS1CKGviS1iKEvSS1i6EtSixj6ktQi/x8iTBQ1MrleqwAAAABJRU5ErkJggg==","text/plain":["
"]},"metadata":{"needs_background":"light"},"output_type":"display_data"}],"source":["diff.abs().replace(np.inf, np.nan).replace(0, np.nan).transform(np.log10).hist(bins=100)"]},{"cell_type":"code","execution_count":40,"metadata":{},"outputs":[],"source":["import matplotlib.pyplot as plt"]},{"cell_type":"code","execution_count":null,"metadata":{},"outputs":[{"data":{"text/plain":["state True\n","report_date True\n","fuel_type_code_pudl True\n","fuel_cost_from_eiaapi True\n","dtype: bool"]},"metadata":{},"output_type":"display_data"}],"source":["# data is all in same order\n","frc_api.drop(columns=['fuel_cost_per_mmbtu', 'fuel_cost_per_unit']).eq(\n"," frc_bulk.drop(columns=['fuel_cost_per_mmbtu', 'bulk_agg_fuel_cost_per_mmbtu'])\n",").all()"]},{"cell_type":"code","execution_count":42,"metadata":{},"outputs":[{"data":{"text/plain":[""]},"execution_count":42,"metadata":{},"output_type":"execute_result"},{"data":{"image/png":"iVBORw0KGgoAAAANSUhEUgAAAXIAAAD4CAYAAADxeG0DAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjUuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8qNh9FAAAACXBIWXMAAAsTAAALEwEAmpwYAAApiElEQVR4nO3deXhc133e8e+ZFYPBDPaN2LmTkriI0GJLsrVZki3ZjBQvsrPVdqLGjRM3TZtYdZs0TZ3YjZ88ceukCqv4sRWrdtNY8iLJ1i5LskVJpERRFDdxJwiS2JfZl3v6BxaDJAguGGJmgPfzPHyIubg49wdg8M6Zc8+9x1hrERGR4uXKdwEiIjI7CnIRkSKnIBcRKXIKchGRIqcgFxEpcp58HLSmpsa2t7fn49AiIkVr69atfdba2tO35yXI29vb2bJlSz4OLSJStIwxh6fbrqEVEZEipyAXESlyCnIRkSKnIBcRKXIKchGRIqcgFxEpcgpyEZEipyAXEZkjjmOJJjM4Tm5vH64gFxGZI/F0lq7BGPF0NqftKshFRC6hqb3wgNdNc2UpAa87p8dQkIuIXEJTe+EulyHo9+BymZweQ0EuIpJjc9ELn0pBLiKSY3PRC59KQS4ikmNz0QufKi+3sRURmc8meuFzdrw5O5KIiFwSCnIRkSKnIBcRKXI5GcQxxhwCRoEskLHWduaiXRERObdcjsbfZK3ty2F7IiIFwXEs8XSWgNd9SacRXiwNrYiInMOlukdKruQqyC3wlDFmqzHmvhy1KSJSEOZ6XviFytXQynXW2m5jTB3wtDFmt7X2xak7jAf8fQCtra05OqyISG5NN4wy1/PCL1ROeuTW2u7x/3uAR4Grp9lnk7W201rbWVtbm4vDiojkzMT9UaLJTEEPo0xn1kFujAkaY0ITHwO3ATtm266IyFxxHEtfJMmR/hhAQQ+jTCcX7xXqgUeNMRPt/R9r7U9z0K6IyCU1MYziOJb+aJLqMv8lv8HVpTDrILfWHgDW5qAWEZE5M9EL748maa4opbUqWLDTC8+lcEfvRUQuoXg6S38kVbS98KkU5CKyIDiOZTSRJpHOUh30E/C6aa0uLdpe+FS6IEhE5r2JYZQd3cNsPtDPQCw1Jws+zBX1yEVk3jr9ZObimiBLa8uoKvXlu7ScUpCLyLyUyTh0DcWIJrO0VhX3ycxz0dCKiMxLA7EUO4+PEPS7Cfo982YYZTrqkYvIvDH18vqqUh+dbVVUlfrmbYBPUI9cRIpeJuPQM5JgNJ6evLze43FRFy7B45n/MTf/v0MRmdccx9I1FGPL4QHimWzRXV6fCxpaEZGiNHGTq1gqw2giw+rGMDVB/4LogZ9OQS4iRSWVytI9Eqc84OVwfwzHWporS6kp88/7sfCzWXgvXSJS1I4Nx/jJ9uMMRJMsqS1jWV1oQYc4qEcuIkVgYhgFIBzwsnJRmKqgn1DAm+fKCoOCXEQKXjSZYcexYfxeF8vqQlzdUb3gTmjOREEuIgVr6iX2fq+LporAvL6w52IpyEWkoKRSWQ4PRgCoKSuhZzTJovIAKxrC8/YS+9lSkItIwXAcy76+CI9tOwYu+NiGlsl54Qrws8tZkBtj3MAW4Ji19q5ctSsiC0c8nSXrWO64ooGAz01TeSk+n8bCzyWXPfIvALuAcA7bFJEFYGJueENZCYtry9QDv0A5mUdujGkG7gQezEV7IjL/pVJZDvSOMjiapGs4xrO7ejgRSehk5kXI1QVBfwv8MeCcbQdjzH3GmC3GmC29vb05OqyIFKvDgxG+u/kwrx/pp7LExy2r6lgUDuS7rKI06yA3xtwF9Fhrt860n7V2k7W201rbWVtbO9vDikiRmri4x7EWr8fQEC6hPOijvaZM4+EXKRdj5NcBHzHGfAgoAcLGmO9Ya389B22LyDwx9SZXA9E0DeEAH7+qjUXhgIZSZmnWQW6tvR+4H8AYcyPw7xXiIjJhIsCjqQzdQ3Gw0FxVSnmpj8oyf77Lmxc0j1xELplMxuHoYJRjgzFKPF5aqgOU+ub3smv5kNMgt9a+ALyQyzZFpPg4jmU0MbZaz87uYarK/CytD1AbKlGAXwLqkYtIzkwE+EA0SddQnJ7hJJctKqepspRQiVchfokoyEUkJ1KpLPv6IvSNJuiPpljTXM7yuhDVC3TVnrmkIBeRWYvF0jy79yTv9oxy68p6VjSEFeBzSD9lEZkVx7G8cWyIn759nDK/h5bqIPXlAYX4HFKPXEQuSiqV5chQFGstfh/ce00bKxtClAd8+S5twVGQi8gFm7jd7JM7jlMV9PL+FfW0tgR1MjNPFOQicl4yGYeBWIqqUh/JrEPWsdy6up6qUh/1ujozrxTkInJOmYzD3pMj7DoxwvVLa6kN6XazhURBLiIzSiQybDk6yJGBCJWlfkrGwzvoV3wUCv0mRGRasViat48P0RdNsuXgILesqueK5gpCJd58lyanUZCLyCkmrs589WA//++1w9SF/dy0qpHO1ipKShQZhUi/FRGZFImm+PmBXvpiCcgafrWzhbaaIEuqQ7pXeAFTkIsIjmPpGYrxf7ce5aW9PXTUBrn3qnbWtlTqwp4ioCAXWcAyGYfDg6NsPzrEvpOjvLS3j5tW1nHbFQ0sqylXiBcJBbnIApXJOOzoHuSfXjnIruMjbGit4Dfe28HtqxoIa8GHoqIgF1mAEokMrx7u55V3e0gk0mxc38ytq+tpqwypF16EFOQiC0gkmuK1I/0MRBO8dmCI2pCPT1y7mKvbqjUjpYjN+jdnjCkBXgT84+39i7X2z2bbrojkztBogp/sPMbe4yO8un+ApqpS7lzXzIbWShrLS9ULL3K5eAlOAjdbayPGGC/wsjHmJ9bazTloW0RmqWcoypcf38GLe/pwu2F9WzUfv7qFGxbXqxc+T8z6t2ittUBk/KF3/J+dbbsiMjuxWJqfHz7JP79ylJ/tHaAx7OHDVzaxcV0zHTVh9cLnkZy8HBtj3MBWYCnwd9baV6fZ5z7gPoDW1tZcHFZEzmIkkuTbr+zn+28cZSiaYUN7mM9c38ENSxvUC5+HcvIbtdZmgXXGmArgUWPM5dbaHaftswnYBNDZ2akeu8glkEpl2dM7xGPbjvPU9qP4XW5+ZUMzn35vBy3VId2pcJ7K6UuztXbIGPMCcAew4xy7i0gOjUSSfHfrIZ58+wTD8RRXdtRy+2WNXL+kjtJS3ehqPsvFrJVaID0e4gHgVuCrs65MRM7Lod4h/v65vQyPJthxfJRkFj5yZQu/874l1IdL1QtfAHLRI28Evj0+Tu4C/tla+1gO2hWRGaRSWX5x4Dj/5Yc7ODSYBeDO1VW8f2UDt61qpCJUkucKZa7kYtbKdmB9DmoRkfO0+3gf/+0HO9nRNcpQdqwH9cHlYf7zh6+gobIs3+XJHNPpa5Eiksk4vN3Vzx/8ny0cHXEAWNsY4FPXtXDnqjbKglrBfiFSkIsUib7hGN/ZfIDHt3dzdMShBPjIlVX88e1rqSkvzXd5kkcKcpECNxJJ8vg7R/nB64fZ1pXA74YbFpfzm9e3ccPiRs0LFwW5SCHrGYry5Z/s4Km3+4g70Bx28fGr2/jNaxbrZKZMUpCLFKCh0QSPvd3FT7d38cqhKF7gfe1BPnfbSjY012rZNTmFglykwAyNJvjLJ7bz4zd7iQONIcOvrm/mt29Yrl64TEtBLlIgugdG+ebL+zjUG+PV/UNkgRuXhvntG5bS2VarsXA5Kz0zRArAvpMDfO6hrbzbn8IF3LK8nKuW1HHPumbNSJFzUpCL5NFIJMmPdhzhmy8c4MBQBoCbl5XzF3ev04U9ct4U5CJ54DiWt4/18l8f3cFb3XEyQEelm09c08K9G5ZoLFwuiIJcZI7tPt7HX/14J+8cG6UvOXZ5/fXtIf7yo2tprSnPd3lShBTkInMkk3F4ds9h/v3DOxkdu7qejnLDPVe18WtXL6YqHMhvgVK0FOQic2Db0RP8yXffZM+AM7ntfR2l/OVHN9BcHc5jZTIfKMhFLqFYLM2Pdx7iy4/sZfweVwSAP/pgG/d2LtdNriQnFOQil0hX/wh/8YO3ePrdESb64YvL4G//VSdrmuvzWpvMLwpykRzrHhjlgefe4Zld/XRHx7Y1heBf37yUe9Z0qBcuOacgF8mhvSf6+eyDmzkaGXvsA25aUc6XPnyFZqTIJZOLNTtbgIeABsABNllrvz7bdkWKSc9QlAdf2sX33zhJf3xsW2MQ7v+VVdy2rFWX18sllYtnVwb4I2vtG8aYELDVGPO0tXZnDtoWKXg/2rGHP/jOvsnHFR646bIq/vADq9ULlzmRizU7jwPHxz8eNcbsApoABbnMaz1DUb7+1DYefmNocluZC773e9ewsrEmf4XJgpPT93vGmHbGFmJ+dZrP3QfcB9Da2prLw4rMuS2Hu/k333qTnvgvtzWUwDc+vV4hLnMuZ0FujCkDvg/8W2vtyOmft9ZuAjYBdHZ22lwdV2QudQ+M8qf/8jLPHPjlhT3lbvjSPUv4lSuWacEHyYucBLkxxstYiD9srX0kF22KFJKh0QRffeLnfPfNxCnbf21DBb9/61rdqVDyKhezVgzwj8Aua+3fzL4kkcKy+WAXv/kPb5E6bftXPtHGvesvz0tNIlPlokd+HfAbwNvGmG3j2/6jtfaJHLQtkjcDI3H+9pltPPTawCnbr2+Cv/zk9ZqRIgUjF7NWXgZMDmoRKRi/OHCU39m0nehp2//7ve18fN1lealJ5Gx0lYLIFF39I/zu37/EjtMS/J41ZXzxQ53UVQTzU5jIDBTkIuP+/sVX+O9PDJyx/cFPr+LWFYvzUJHI+VGQy4K3s7uXX/kfr51xMrPVDQ/+/rUsb6jOS10i50tBLgva1376LN94IXHG9r/51GLuWbMqDxWJXDgFuSxIL+47zG8+uOOM7RXAv/zhe1haXzXnNYlcLAW5LDh3ffFxzoxw+NonO/jo2tVzXo/IbCnIZd5zHMtwPMWNf/EMw9N8Pgg8+R9u0NqZUrQU5DKvjUSS/P1TW3jgtaFpP/+56/380R03k8w6OI7F5dIlEVJ8FOQyLw2NJvjelv185clDZ93nnz6znnBJgL09o1gL7TVBgn79SUjx0bNW5pWRSJLPbXqGn/fMvN+9V9bwwPMHuPuqRfjcPtY0VRDw6s6FUpwU5DJvdPWP8L6/fglnhn2+dFcTXleAbzy9j744NJb7+fytq2iuKNWwihQtBbkUJcexxNNZekciPPzKUXyuAf7u5ciMX/PVe1ZzbDDOi3uOk0rDiho/92xoprUqqBCXoqYgl6IUT2fpGozx3VcO8a1Xj82471Wl8GsfuZw3Dg1xsDfC8voAt1/RyM2r61hSXa4Ql6KnIJei5Lbw/7a+xbdePWMxqlOsqYZfvWElo4k0jnVoLPdz95WtXNVeg8fjmqNqRS4tBbkUFcexPLFzP5//zp4Z9/MCn7qyknd607yw6wTN1SEawwGu7qhhbXOlQlzmFQW5FI2BkThf+v4L/GTPTKcz4Yt3NrK6YRE/3HKE/ScHcTeGuKYqwPVL6+ioCc1ZiE+M40/Mhpn4WEM5kmu5WrPzm8BdQI+1VmtfSU5lMg6vd53gkw+8OeN+jcD/+t1r2HpkkG/+7AAtVT4+fX0HK1tChH0BGspL5yTEHccyGk/TF03QH03RUlGKcRkGo2laq0s1V11yLlfPqG8B3wAeylF7sgA5jiWazAAQ8LqJp7NkHIev/OAZ/u/bM3/tTa3w2ds6yTqw+/gwkWSG5qpq3rO0DoOhPlwyGaBTjxP0e3AcS+9oAmugrqzkosI+k3HojyaxWYe9fVEymSzdI3E8xkUilaWsxEtzZems5qpP9PD9bhfJrKPevUzKSZBba180xrTnoi1ZmBzH0hdJcqQ/SjLjUBP08vSOQ3ztuZlnpAC0+OC6K5bQO5Iikc6QyVo+sm4RGzqqONQb57KmMDVl/snQG42n2XpkkHCJl2V1ZRwbjvPW4QFcbhc3raynLlxywfUfG4zyyBvdlJW42H5kiOuX19BRHeTYUIKmygB1oQBBv+eig3fi59MfSVFd5mMwlqK5Ur17GaNngVwyU8eIzxZgE/tkMg5dAzEqg152nhzioZ9t58l9Z94nfKoPNMMnblnL0YEETRUBhmJp1jVXsrQ2RBbLovIgtWWlVJX6AIgmM/jdLvpjSQZjSVoqSzg6FGP/yQiXt1RQXeaf3Pd8v7/RRJpYKsP+/gh7T45y+xU1LG0IcbA3QlWZn6s7qmmumN2QzkSI944mqQ2N1Rj0e3QlqkyasyA3xtwH3AfQ2to6V4eVPIomM+zvibCkroxQwDttsMfTWY70x/B7Damsw7GRKL//8MzjKKv8cM36Rq5d2sCicID9J+Mc6Y+xvqWKJbUhklmH/T0RPC4X5QHP2AtF0mF/b4TKoJeRaJr26jJKvG729AyzrCHE8rrzPwk6MQbeG01wsC9K33jA3r2hCWMcDvX1c9Oqejrbqikv9c2qFx5PZ3EcS3907BgT7yw060ammrMgt9ZuAjYBdHZ22rk6ruTZlAybuIhnYqw4msyQyTpYk+W1Q4M8/9ounjw6c3P/+qoKlrQ24XO7GIwmiSRTJNMZvB5D0smStpag38OyhhB+t4sTI3H2nYgQKnXTM5zg+GiMkNfLUDJNY0UJV7VVUXIBY82ZjEPXUIxDfaPsORmhpSrAqsYQxhiSGYeM4/Crna1c2VRBaan3on9sjmPpGU3QPZhgSV2Q1qqgxsTlrDS0Ijlzeo876PewrD40OQQQ8LonQzyezvLuyQiDsQR7Twzy1acOztj2ygrD3e9dwoamajYfGqDM76a5KsBbR4bYc2KY+ooAN/jGev0wdhJzNJ7mnWMj7D4xjN/twuNxkUw5hAJprmyroq6shHg6y/7eCEtqx941zCQWS/PSgV66BuM0hH0YwO9xcbQ/RjJtubKjktqykoseC5/684uns3QPxUlmsriM0Vi4zChX0w+/C9wI1BhjuoA/s9b+Yy7aluIxtcc9EWYTs0KiyQwBr3sykAJeN+Ggi28//w4/3JOcsd3fu6GedR2NeN1uyoJeKoJe6kM+9hwfYX/PKGuaK7lueS1NVaV0DcUoD3gxgJO1+DywpC6IwTAQS3JlWzX1ZSVUB/1j9yC3Fs7x/tBxLMOxFM/tOcEjW4/SXhUkngzg8bg52BPB7XZzVUc1rZXBWQ15RJOZyReVoN/DsroQgEJczilXs1Y+mYt2pLhN7XHD2DDEQCyF3+PixEiC5spSvMZwcGCUbd09/Mk/vztje2HgixtXsqG9muFYlmgqTWXAy8r6MO+eHGZfb4Q1LZXcuWYRjeWlRJMZXMby8rs9jCYztFUFKfG4aa8Jsv9EjPUt1ZNj4dFkhq7BGIvKAyxrCE174jCTcTg+GOPgUITdx0fZ3zNKdamPqxdXkcg4JFOWdW0V1IZKqL3IaYtnGH9RcbnMOd8hiEzQS73kzNQe+HA0xdGhGIf7o3S2VdEQLmEomuToYIwvP/Iab/fP3FZno4ff/sBKonGDyxiqQ16ciEPXQJyDfVEqgn4+dXU7i6vLqAz5GY6n+MW+PkYTSfaeHCHs9zFY4mb38Qh3r2vmmiXVkzNSekYSVEyZ1322YZBjQ1EefGk/h3ojlPjcXNFcTmNbAGMMdeUB2quCtMyyFz7VxNi+ZqPIhVKQS05NnAw83B/h6ECcFQ1hgh4333lzN1/58TnOZALLy+D9a1twu90sqa4gay0v7D1B90CM9YuryGRcrF4UHhu+8XkYjCf5xbs9HB2KsOdEhBX1IWrKAuw9OYrX7eKmlQ0srQ1RUjL2VO8ZSfDGkUGubK2cdr54IpHh9SO9vN01jMdn2X1skFDQS2PYT9DvYV9PjJoyH1c0V9JWXZbTk48TL4QiF0rPmgXqfOZ4X4hUKsvhgQi90SSH+qN0VAXx+1w8+ta7fGzzObrf467vCHJtRw0+n5c1LVW4sHzv1QM8tv04sQRE0w7/6rqlLK0pI5rOsP3oIE+908Wzu/uw6QTloRBtdSX0jSS5eWU961sraSwfWzBiYoy+qtTHla2V084XP9I3zF/8aDsv7x0hDiytdNNcXcZQNMOBnj42hgJ84LI6slmoD5ac189tYnipqtSnKYNyySjIF6jTT0zORiqV5eUDPTzxVjelPjdL68p49cBJHnvjCPuGzv31a2ogHA5yqD9JU0WC9YuDvNXVy/M7etlyNEIGuKo5yL1XtbK0powTkThP7ejmrUODHO4bZWA0iQWODI9y3dIaPnp1G4urgpO98Inx8InvdbqeeFf/CP/moS3s6PnlRUhjtwiwNJYH+MDljWxc00TCsTy/u5f68gDt5zG9sD+aZPOBfq7uqCIc8F3QC2euX2xl/lKQL1Cnn5i8GI5j6Y3EeXlPL1sPneTYYAKfSfDQORZ6OF1vFI5Ho5T5PVQHDc9sP8pz747iABUe2NBRzh/cvoLF1RWMptI8uaObF3adJJnNUBtyc2QYhpOworaU2y6rY/Wi8lNmysz0vY5EkvxsXw+PbT00GeIlQH0prG6poqk6yLK6MLdd1khl0E8qleWWVXUsCgem/V5O74H73C7CJWPnDS70hTOXL7Yyv+nZsUDNZjx2Yjre4YEIP9vTw2v7+mgoN7xyaPSi2ktn4ZrFldRXlvDc7kF2940FamsINm5o5q51rcQTDi/vO0nY52PnsQEO9o4QjUJ5EDJpuGl5JX94+3JW1lcBZ4bgdN+r41he3NfL15/ZSzabZnV9gJV1AS5vqyaWyHJtRy0NVQFCPu/kDBKfz017TdlZv5eBWIothwdY3RieXEKuxOeh1O+hIui/oBfOXLzYysKgIJcLFk1m2Hygj837e3hjVzdvz7xIz4wM0FzjZyiRoe/IMPFkgkWlkHbgulUNrGurJ5m2PLLtGOlMls7FVbhx0ReBDHDz0mrWNlVz15omaspLJ9s9WwhmMg7HBqJ0jcTwe9xYt8MHVtXhd1tuXNVEaYmHgUiaurD/ouaFV5X6WN0YJprIEk+PXcwT8LjxuFwX/MKpk59yvvQskXOaGKt1Wzg2EmMkkeL1g118+9W+i27TBzSEoT5cwmVNFbx5ZIhU2qG6IshlTeWsa67kssZy4lmHeDJDOODGa9wMR5J86MpGUllLVZmP333/Mhoqz+whnx6CEze4Otwf4fG3utnVPUJLVZD3Lq/ho1e1MRhL01hRSonHTSzhnNe88NNvhwuQzDo0V5RO3mYW0JRCueSMtXN/25POzk67ZcuWOT+uXJyJk4XpTJZ/2ryfJ7ac5OIGUcYY4O7Lg2SybiwustZQVurBZVxs6KiiKlhCfzRJ0Oth25Eh/G43NeU+3jw8xO7jI3zu5mW8d0ntGTNBUqksh/sjpHHwGxdDyQylXhcVJT7e7R9ld/cIjnXIZg3tNaW01QQpD/go83lJZh0Go2maKwO4XOacJxgn7mG+r2eUgM/DioYwgMa05ZIyxmy11naevl3PNplRIpFh97EhdvcN8PKObn7ybnTWbVrgkR2/bMc1vq0MePHtk4ykxrataC6hvS5Mc3mQJXUh0hnLhvZKblxaS0Xo1Jknh3qH+NpPd3P4WD/RDHjc4HW7WVThI+3yMzQaJ1Tq59ffu5jLmypwAe/2Rjg5kqSy1M+yuhChEu95BfjEHQmPDcYBF4sqApM97onhHM04kbmkIJdppVJZnt13hK89spP9kUt7rIkVOEeB0dQvt+/pTpDFw8rGKlbXlbOivnza+diZjMP/fGYPj71z6nz1Ck+WjMlwcDBOU9jFx9/TwQ1L6khj+f4bR3llXx83rqzl8kUV532jq4mTqIvKAyyr/+W9UCa+dqInfvqUR5FLSc8wAZi8rH4gnqSsxMPWw/38p+/vZGDmtR0umRo/fPqWNiq9Qa5bUkNlyH/WoB2IpQj6PFR4weUamwVjLFy3vIr26gAv7hvixuWV3L6qidJSLyeH47iN5T1Lq7l+WS21ofO7uAdOPYnqcpmz9rw140TmkoJ8gUulsrx06Bjf/0UXgaCLsK+ElqoyfvFu75yH+MoKuG51A36Pl5ryEu64rJny87iIpqrUx6dvWMb1qxoIeMbWs4ynspQHfbRVBrljbYZFoQCVIT8A1UE/t1/eRMDjJhTwXtDQx+knUc8211szTmQu6Zm2wB0divLVH+5lb3+SUjd8/KpmugZ7eGbv4JzVsLbWzY1rWvnI2iY6asI4jr2gy9o9HhcddWE66sKT207pKVebM/ZvKJ/+gp4LpZ63FAIF+QI1MR1vOJ6ivdxwuB+WVMC3NnfNaR1NZS4+d/tqrlvSODnW7HKZi1oAeaq56hGr5y2FQM/ABSqezrKje5g3D/cQCJaxoibN9r7sRbXVXApOBiIpOP3aoBqgvhreu6qGaMLg9RgWVQXJprM4eFjXWkFnS+3kfVFE5MLpr2eBSSQyHBiI0l5RyuKaID/b6fDC3j6GZ16k5xTf/sx64ilYUhPkxGiSAz2j+L1uYqkM71teR3tV2djqO46lezh+yvJumo4nknu5WurtDuDrgBt40Fr7lVy0K7kTiabY1j1MwGt46p0eNq5bhHFn+fmOI+cd4j/9wjWsbKyZHH/2u13UhQMsrS3DZ1yksdQE/Xg8Y+tjOo49ZYaHhiBELo1Z/2UZY9zA3wEfALqA140xP7LW7pxt25IbqVSWJ3ed4PHtx/n197Sycd0i/B7DV57YyzvneZ+UQ1+5c/LjqaFc7vFRPs29vU/fT0QunVz8lV0N7LPWHgAwxnwP2AgoyAtE90icnuEYd65p5OrWaozHxSv7+4jHz32V5tQAF5HClIslS5qAqWt4dY1vO4Ux5j5jzBZjzJbe3t4cHFZmksk49IwkyGQcFoUDfHBtMx9e00RZcGxe9oaWSl4+cvYxlfUehbhIschFj3y6M1dn3InLWrsJ2ARjN83KwXFlBgOx1ClrU069h/bO473c9T9fP+vXvvPnt2tetEgRyUWQdwEtUx43A905aFdm4WxrU37szx/n9fj0X6MeuEhxysXQyuvAMmNMhzHGB9wL/CgH7coFSKWyHOqLkEqNzQX3eFzUhX95T+0DPYO0f1EhLjIfzbpHbq3NGGM+DzzJ2PTDb1pr35l1ZXJBukfiPLurh1tW1Z2xFNkDL7/KVx6bfhEIBbhI8cvJ3DBr7RPAE7loSy7OonDgjEWBI9EUf/roszyywzlj/89eA//5boW4yHygSb7zxOmLAv/Dz1/jr3585uygm+vgm/9OAS4ynyjI55lEIsN339x7RohXAd/6fCdrmuvzU5iIXDIK8nlkaDTBf3l8Kz/YNnTK9gd+awV3rFqan6JE5JJTkM8DsViaH+44yDdfOMS7A+nJ7R9cFuD+jRtorSnPY3UicqkpyIvcSCTJQ5sP8sBz+4k4UOEHr4E///hyPrR6Wb7LE5E5oCAvUpmMw+HBUR55vYtnd56gpaYEv9tw/4dXclV7o24VK7KAKMiL0JG+Yf73i4fIpuO8dSzC+1fWc8tlDaxprNQCDSILkP7qi0jPUJQfbDvGG4f7eW73ABvX1/OF21ZzbXs14TJ/vssTkTxRkBeJSDTF15/dyw/f6Ka9wsWH1zbw+zcv1YlMEVGQF7pYLM2LB0/w0zePsa8vwtqWCj7+3mZuX96kYRQRARTkBSuRyPDivuP8y5Zj7D02QPeo5c41dfzBLStoqS6bvBmWiIiCvAD1Dcf4hxf38YOtR+lNwI1Ly9l4ZQWfuKaNRVWhfJcnIgVGQV5AHMfSPRDhy49t59ndQwTccPPyKv70w6tor63Id3kiUqAU5AXCcSwnR+I8/OphfrF/CJcLNm5o4I9vv4Ky4PSLG4uIgIK8YMTTWQ70RqkIefmd97cRLPGxcU2LQlxEzklBnkexWJp3To7QXllKZZmfy5vKWVpXRnXQr5OZInLelBZ5tKtnlIc3H+Kl/X0ksw7lpT7qywMKcRG5ILNKDGPMx4wx7xhjHGNMZ66Kms8SiQzbDg7wxpF+OqoC/Nq17dywpEar1ovIRZvt0MoO4B7gH3JQy7w2NJrg+d09RJ00r+8bwONx89kblnBVR3W+SxORIjerILfW7gIwRnfam8mJwQh/9cQuth0ZoKM2zMa1TbTVB1lcFcx3aSIyD8zZyU5jzH3AfQCtra1zddi8SiQybDs6wEObD/L8O33csrqGT13fzpWLqnV5vYjkzDnTxBjzDNAwzae+ZK394fkeyFq7CdgE0NnZac+7wiJ2YCDK/37pAPtODnPbFXXc/6FVNFSWnfsLRUQuwDmD3Fp761wUMp84jiWeztJeUcrv3LCYY6NxbllWT0WoJN+licg8pPf3l0A8naVrMEZzZSnXLqvLdzkiMs/Ndvrh3caYLuA9wOPGmCdzU1ZxC3jdNFeWakqhiMyJ2c5aeRR4NEe1zBsulyHo15sdEZkbuoRQRKTIKchFRIqcglxEpMgpyM+T41iiyQyOsyCmwItIEVGQn6eJKYXxdDbfpYiInEJBfp40pVBECpXmyJ0nTSkUkUKlHrmISJFTkIuIFDkFuYhIkVOQi4gUOQW5iEiRW1BBrot6RGQ+WlBBrot6RGQ+WlBBrot6RGQ+WlBXuOiiHhGZjxZUj1xEZD6a7VJvf22M2W2M2W6MedQYU5Gjui6aTmiKyEIz2x7508Dl1to1wF7g/tmXNDs6oSkiC82sgtxa+5S1NjP+cDPQPPuSZkcnNEVkocnlGPlngJ+c7ZPGmPuMMVuMMVt6e3tzeNhTTZzQdLnMJTuGiEghOWeQG2OeMcbsmObfxin7fAnIAA+frR1r7SZrbae1trO2tnZWRWscXETkl845F89ae+tMnzfG/BZwF3CLtXZOknViHLy5slTTCUVkwZtVChpj7gD+BHi/tTaWm5LOznEs8XQWv9ulcXARkXGzHSP/BhACnjbGbDPGPJCDms5qoieezDoaBxcRGTerHrm1dmmuCjkfmpEiInKmohpg1iX2IiJn0iX6IiJFTkEuIlLkFOQiIkVOQS4iUuQU5CIiRU5BLiJS5BTkIiJFzszR7VFOPagxvcDhHDdbA/TluM1cUn0Xr5BrA9U3G4VcGxRefW3W2jPuOpiXIL8UjDFbrLWd+a7jbFTfxSvk2kD1zUYh1waFX98EDa2IiBQ5BbmISJGbT0G+Kd8FnIPqu3iFXBuovtko5Nqg8OsD5tEYuYjIQjWfeuQiIguSglxEpMjNqyA3xvy1MWa3MWa7MeZRY0xFvmuayhjzMWPMO8YYxxhTEFOajDF3GGP2GGP2GWO+mO96pjLGfNMY02OM2ZHvWqZjjGkxxjxvjNk1/nv9Qr5rmmCMKTHGvGaMeWu8tj/Pd03TMca4jTFvGmMey3ctpzPGHDLGvD2++tmWfNczk3kV5MDTwOXW2jXAXuD+PNdzuh3APcCL+S4Exv6IgL8DPgisBj5pjFmd36pO8S3gjnwXMYMM8EfW2lXAtcDvFdDPLwncbK1dC6wD7jDGXJvfkqb1BWBXvouYwU3W2nWFPpd8XgW5tfYpa21m/OFmoDmf9ZzOWrvLWrsn33VMcTWwz1p7wFqbAr4HbMxzTZOstS8CA/mu42ystcettW+MfzzKWCA15beqMXZMZPyhd/xfQc1sMMY0A3cCD+a7lmI3r4L8NJ8BfpLvIgpcE3B0yuMuCiSIio0xph1YD7ya51ImjQ9bbAN6gKettQVT27i/Bf4YcPJcx9lY4CljzFZjzH35LmYmRbcApjHmGaBhmk99yVr7w/F9vsTY296H57K28WOfs74CYqbZVlC9tmJgjCkDvg/8W2vtSL7rmWCtzQLrxs8VPWqMudxaWxDnG4wxdwE91tqtxpgb81zO2Vxnre02xtQBTxtjdo+/Syw4RRfk1tpbZ/q8Mea3gLuAW2weJsmfq74C0wW0THncDHTnqZaiZIzxMhbiD1trH8l3PdOx1g4ZY15g7HxDQQQ5cB3wEWPMh4ASIGyM+Y619tfzXNcka233+P89xphHGRuKLMggn1dDK8aYO4A/AT5irY3lu54i8DqwzBjTYYzxAfcCP8pzTUXDGGOAfwR2WWv/Jt/1TGWMqZ2YtWWMCQC3ArvzWtQU1tr7rbXN1tp2xp53zxVSiBtjgsaY0MTHwG0UzovgGeZVkAPfAEKMvQ3aZox5IN8FTWWMudsY0wW8B3jcGPNkPusZPzH8eeBJxk7U/bO19p181jSVMea7wCvACmNMlzHms/mu6TTXAb8B3Dz+fNs23sMsBI3A88aY7Yy9YD9trS24KX4FrB542RjzFvAa8Li19qd5rumsdIm+iEiRm289chGRBUdBLiJS5BTkIiJFTkEuIlLkFOQiIkVOQS4iUuQU5CIiRe7/A4JcVhLWP4FuAAAAAElFTkSuQmCC","text/plain":["
"]},"metadata":{"needs_background":"light"},"output_type":"display_data"}],"source":["plt.scatter(frc_api['fuel_cost_per_mmbtu'].replace(0, np.nan).transform(np.log10),\n","frc_bulk['fuel_cost_per_mmbtu'].replace(0, np.nan).transform(np.log10),\n","s=1, alpha=0.1)"]}],"metadata":{"kernelspec":{"display_name":"Python 3.10.5 ('pudl-dev')","language":"python","name":"python3"},"language_info":{"codemirror_mode":{"name":"ipython","version":3},"file_extension":".py","mimetype":"text/x-python","name":"python","nbconvert_exporter":"python","pygments_lexer":"ipython3","version":"3.10.5"},"orig_nbformat":4,"vscode":{"interpreter":{"hash":"adf7f0b18919f80aa48732f306897016c591ffeed72c8be7c2b511e7f25e6b58"}}},"nbformat":4,"nbformat_minor":2} diff --git a/notebooks/work-in-progress/state-demand.ipynb b/notebooks/work-in-progress/state-demand.ipynb index 36ef4eced3..d319093948 100644 --- a/notebooks/work-in-progress/state-demand.ipynb +++ b/notebooks/work-in-progress/state-demand.ipynb @@ -112,14 +112,14 @@ "#HARVEST_TOKEN = os.environ[\"HARVEST_TOKEN\"]\n", "#HARVEST_ACCOUNT_ID = os.environ[\"HARVEST_ACCOUNT_ID\"]\n", "\n", - "pudl_settings = pudl.workspace.setup.get_defaults()\n", - "ferc1_engine = sa.create_engine(pudl_settings['ferc1_db'])\n", - "pudl_engine = sa.create_engine(pudl_settings['pudl_db'])\n", - "pudl_out = pudl.output.pudltabl.PudlTabl(pudl_engine=pudl_engine)\n", - "pudl_settings" + "from pudl.workspace.setup import PudlPaths\n", + "ferc1_engine = sa.create_engine(PudlPaths().sqlite_db(\"ferc1\"))\n", + "pudl_engine = sa.create_engine(PudlPaths().pudl_db)\n", + "pudl_out = pudl.output.pudltabl.PudlTabl(pudl_engine=pudl_engine)" ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -187,6 +187,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -226,6 +227,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -265,6 +267,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -321,6 +324,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -527,7 +531,7 @@ "source": [ "%%time\n", "import pathlib\n", - "local_dir = pathlib.Path(pudl_settings['data_dir']) / 'local'\n", + "local_dir = PudlPaths().data_dir / 'local'\n", "ventyx_path = local_dir / 'ventyx/state_level_load_2007_2018.csv'\n", "base_dir = local_dir / 'state-demand'\n", "base_dir.mkdir(parents=True, exist_ok=True)\n", diff --git a/notebooks/work-in-progress/test-validation-tests.ipynb b/notebooks/work-in-progress/test-validation-tests.ipynb index 79c68e4ebc..1d537f2017 100644 --- a/notebooks/work-in-progress/test-validation-tests.ipynb +++ b/notebooks/work-in-progress/test-validation-tests.ipynb @@ -1,6 +1,7 @@ { "cells": [ { + "attachments": {}, "cell_type": "markdown", "id": "advised-nothing", "metadata": {}, @@ -86,8 +87,8 @@ "outputs": [], "source": [ "# Establish connection to pudl database\n", - "pudl_settings = pudl.workspace.setup.get_defaults()\n", - "pudl_engine = sa.create_engine(pudl_settings['pudl_db'])" + "from pudl.workspace.setup import PudlPaths\n", + "pudl_engine = sa.create_engine(PudlPaths().pudl_db)" ] }, { @@ -174,6 +175,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "id": "australian-frederick", "metadata": {}, @@ -188,7 +190,7 @@ "metadata": {}, "outputs": [], "source": [ - "pudl_engine = sa.create_engine(pudl_settings[\"pudl_db\"])" + "pudl_engine = sa.create_engine(PudlPaths().pudl_db)" ] }, { diff --git a/notebooks/work-in-progress/transform_xbrl.ipynb b/notebooks/work-in-progress/transform_xbrl.ipynb index 56c78fb20a..805a36096a 100644 --- a/notebooks/work-in-progress/transform_xbrl.ipynb +++ b/notebooks/work-in-progress/transform_xbrl.ipynb @@ -23,7 +23,7 @@ "import pudl\n", "\n", "from pudl.settings import (Ferc1Settings, Ferc1DbfSettings, Ferc1XbrlSettings)\n", - "pudl_settings = pudl.workspace.setup.get_defaults()\n", + "from pudl.workspace.setup import PudlPaths\n", "from pudl.transform.ferc1 import *\n", "from typing import Literal, Tuple" ] diff --git a/pyproject.toml b/pyproject.toml index 0740f55d4b..920f5a0b73 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -24,7 +24,7 @@ dependencies = [ "coloredlogs>=14.0,<15.1", # Dagster requires 14.0 "dagster-webserver>=1.4,<1.5", # 1.2.2 is first version to support Python 3.11 "dagster>=1.4,<1.5", # 1.2.2 is first version to support Python 3.11 - "dask>=2021.8,<2023.8.1", + "dask>=2021.8,<2023.8.2", "datapackage>=1.11,<1.16", # Transition datastore to use frictionless. "fsspec>=2021.7,<2023.6.1", # For caching datastore on GCS "gcsfs>=2021.7,<2023.6.1", # For caching datastore on GCS @@ -35,7 +35,7 @@ dependencies = [ "networkx>=2.2,<3.2", "numpy>=1.18.5,!=1.23.0,<1.26", "pandas>=1.4,<1.5.4", - "pyarrow>=5,<12.1", + "pyarrow>=5,<13.1", "pydantic[email]>=1.7,<2", "python-dotenv>=0.21,<1.1", "python-snappy>=0.6,<0.7", @@ -119,7 +119,7 @@ dev = [ "isort>=5.0,<5.13", "jedi>=0.18,<0.20", "lxml>=4.6,<4.10", - "tox>=4,<4.7", + "tox>=4,<4.11", "twine>=3.3,<4.1", ] doc = [ @@ -128,8 +128,8 @@ doc = [ "sphinx-autoapi>=1.8,<2.2", "sphinx-issues>=1.2,<3.1", "sphinx-reredirects", - "sphinx>=4,!=5.1.0,<7.2", - "sphinxcontrib_bibtex>=2.4,<2.6", + "sphinx>=4,!=5.1.0,<7.3", + "sphinxcontrib_bibtex>=2.4,<2.7", ] test = [ "bandit>=1.6,<1.8", @@ -153,7 +153,7 @@ test = [ "pytest>=6.2,<7.5", "responses>=0.14,<0.24", "rstcheck[sphinx]>=5.0,<6.2", - "tox>=4.0,<4.9", + "tox>=4.0,<4.11", ] datasette = [ "datasette>=0.60,<0.65", diff --git a/src/pudl/analysis/plant_parts_eia.py b/src/pudl/analysis/plant_parts_eia.py index 437ed5f496..a45cfc3da7 100644 --- a/src/pudl/analysis/plant_parts_eia.py +++ b/src/pudl/analysis/plant_parts_eia.py @@ -159,7 +159,8 @@ .. code-block:: python import pudl - pudl_engine = sa.create_engine(pudl.workspace.setup.get_defaults()['pudl_db']) + from pudl.workspace.setup import PudlPaths + pudl_engine = sa.create_engine(PudlPaths().pudl_db) pudl_out = pudl.output.pudltabl.PudlTabl(pudl_engine,freq='AS') Then make the table via pudl_out: diff --git a/src/pudl/analysis/service_territory.py b/src/pudl/analysis/service_territory.py index 2481a0ca62..322868ee29 100644 --- a/src/pudl/analysis/service_territory.py +++ b/src/pudl/analysis/service_territory.py @@ -18,6 +18,7 @@ from matplotlib import pyplot as plt import pudl +from pudl.workspace.setup import PudlPaths logger = pudl.logging_helpers.get_logger(__name__) @@ -578,9 +579,7 @@ def main(): logfile=args.logfile, loglevel=args.loglevel ) - pudl_settings = pudl.workspace.setup.get_defaults() - pudl_engine = sa.create_engine(pudl_settings["pudl_db"]) - + pudl_engine = sa.create_engine(PudlPaths().pudl_db) # Load the US Census DP1 county data: county_gdf = pudl.etl.defs.load_asset_value(AssetKey("county_censusdp1")) diff --git a/src/pudl/analysis/state_demand.py b/src/pudl/analysis/state_demand.py index cf973bfd97..ef633a7d9b 100644 --- a/src/pudl/analysis/state_demand.py +++ b/src/pudl/analysis/state_demand.py @@ -22,7 +22,6 @@ """ import argparse import datetime -import pathlib import sys from collections.abc import Iterable from typing import Any @@ -830,8 +829,6 @@ def main(): # --- Connect to PUDL database --- # - pudl_settings = pudl.workspace.setup.get_defaults() - # --- Read in inputs from PUDL + dagster cache --- # prediction = pudl.etl.defs.load_asset_value( AssetKey("predicted_state_hourly_demand") @@ -839,7 +836,7 @@ def main(): # --- Export results --- # - local_dir = pathlib.Path(pudl_settings["data_dir"]) / "local" + local_dir = pudl.workspace.setup.PudlPaths().data_dir / "local" ventyx_path = local_dir / "ventyx/state_level_load_2007_2018.csv" base_dir = local_dir / "state-demand" base_dir.mkdir(parents=True, exist_ok=True) diff --git a/src/pudl/cli/etl.py b/src/pudl/cli/etl.py index a592b3ab74..604c754ee8 100644 --- a/src/pudl/cli/etl.py +++ b/src/pudl/cli/etl.py @@ -27,6 +27,7 @@ import pudl from pudl.settings import EtlSettings +from pudl.workspace.setup import PudlPaths logger = pudl.logging_helpers.get_logger(__name__) @@ -44,12 +45,6 @@ def parse_command_line(argv): parser.add_argument( dest="settings_file", type=str, default="", help="path to ETL settings file." ) - parser.add_argument( - "--sandbox", - action="store_true", - default=False, - help="Use the Zenodo sandbox rather than production", - ) parser.add_argument( "--logfile", default=None, @@ -121,9 +116,6 @@ def main(): etl_settings = EtlSettings.from_yaml(args.settings_file) - # Set PUDL_INPUT/PUDL_OUTPUT env vars from .pudl.yml if not set already! - pudl.workspace.setup.get_defaults() - dataset_settings_config = etl_settings.datasets.dict() process_epacems = True if etl_settings.datasets.epacems is None: @@ -158,7 +150,6 @@ def main(): "dataset_settings": {"config": dataset_settings_config}, "datastore": { "config": { - "sandbox": args.sandbox, "gcs_cache_path": args.gcs_cache_path if args.gcs_cache_path else "", @@ -179,7 +170,7 @@ def main(): logger.info(f"Publishing outputs to {output_path}") fs, _, _ = fsspec.get_fs_token_paths(output_path) fs.put( - etl_settings.pudl_out, + PudlPaths().output_dir, output_path, recursive=True, ) diff --git a/src/pudl/convert/censusdp1tract_to_sqlite.py b/src/pudl/convert/censusdp1tract_to_sqlite.py index 43567caa0c..66faf055ea 100644 --- a/src/pudl/convert/censusdp1tract_to_sqlite.py +++ b/src/pudl/convert/censusdp1tract_to_sqlite.py @@ -22,21 +22,14 @@ from dagster import Field, asset import pudl -from pudl.helpers import EnvVar from pudl.workspace.datastore import Datastore +from pudl.workspace.setup import PudlPaths logger = pudl.logging_helpers.get_logger(__name__) @asset( config_schema={ - "pudl_output_path": Field( - EnvVar( - env_var="PUDL_OUTPUT", - ), - description="Path of directory to store the database in.", - default_value=None, - ), "clobber": Field( bool, description="Clobber existing Census database.", default_value=True ), @@ -82,7 +75,7 @@ def censusdp1tract_to_sqlite(context): "censusdp1tract", year=context.op_config["year"] ) extract_root = tmpdir_path / Path(zip_ref.filelist[0].filename) - out_path = Path(context.op_config["pudl_output_path"]) / "censusdp1tract.sqlite" + out_path = PudlPaths().output_dir / "censusdp1tract.sqlite" if out_path.exists(): if context.op_config["clobber"]: diff --git a/src/pudl/etl/__init__.py b/src/pudl/etl/__init__.py index 4bb2b33a77..e573bc0d34 100644 --- a/src/pudl/etl/__init__.py +++ b/src/pudl/etl/__init__.py @@ -29,11 +29,11 @@ *load_assets_from_modules([eia_bulk_elec_assets], group_name="eia_bulk_elec"), *load_assets_from_modules([epacems_assets], group_name="epacems"), *load_assets_from_modules([pudl.extract.eia860], group_name="raw_eia860"), - *load_assets_from_modules([pudl.transform.eia860], group_name="clean_eia860"), + *load_assets_from_modules([pudl.transform.eia860], group_name="_core_eia860"), *load_assets_from_modules([pudl.extract.eia861], group_name="raw_eia861"), *load_assets_from_modules([pudl.transform.eia861], group_name="clean_eia861"), *load_assets_from_modules([pudl.extract.eia923], group_name="raw_eia923"), - *load_assets_from_modules([pudl.transform.eia923], group_name="clean_eia923"), + *load_assets_from_modules([pudl.transform.eia923], group_name="_core_eia923"), *load_assets_from_modules([pudl.transform.eia], group_name="norm_eia"), *load_assets_from_modules([pudl.extract.ferc1], group_name="raw_ferc1"), *load_assets_from_modules([pudl.transform.ferc1], group_name="norm_ferc1"), diff --git a/src/pudl/etl/epacems_assets.py b/src/pudl/etl/epacems_assets.py index 938a86397a..9b66e29531 100644 --- a/src/pudl/etl/epacems_assets.py +++ b/src/pudl/etl/epacems_assets.py @@ -9,17 +9,16 @@ see: https://docs.dagster.io/concepts/ops-jobs-graphs/dynamic-graphs and https://docs.dagster.io/concepts/assets/graph-backed-assets. """ from collections import namedtuple -from pathlib import Path import dask.dataframe as dd import pandas as pd import pyarrow as pa import pyarrow.parquet as pq -from dagster import AssetIn, DynamicOut, DynamicOutput, Field, asset, graph_asset, op +from dagster import AssetIn, DynamicOut, DynamicOutput, asset, graph_asset, op import pudl -from pudl.helpers import EnvVar from pudl.metadata.classes import Resource +from pudl.workspace.setup import PudlPaths logger = pudl.logging_helpers.get_logger(__name__) @@ -42,18 +41,7 @@ def get_years_from_settings(context): yield DynamicOutput(year, mapping_key=str(year)) -@op( - required_resource_keys={"datastore", "dataset_settings"}, - config_schema={ - "pudl_output_path": Field( - EnvVar( - env_var="PUDL_OUTPUT", - ), - description="Path of directory to store the database in.", - default_value=None, - ), - }, -) +@op(required_resource_keys={"datastore", "dataset_settings"}) def process_single_year( context, year, @@ -73,15 +61,14 @@ def process_single_year( epacems_settings = context.resources.dataset_settings.epacems schema = Resource.from_id("hourly_emissions_epacems").to_pyarrow() - partitioned_path = ( - Path(context.op_config["pudl_output_path"]) / "hourly_emissions_epacems" - ) + partitioned_path = PudlPaths().output_dir / "hourly_emissions_epacems" partitioned_path.mkdir(exist_ok=True) for state in epacems_settings.states: logger.info(f"Processing EPA CEMS hourly data for {year}-{state}") df = pudl.extract.epacems.extract(year=year, state=state, ds=ds) - df = pudl.transform.epacems.transform(df, epacamd_eia, plants_entity_eia) + if not df.empty: # If state-year combination has data + df = pudl.transform.epacems.transform(df, epacamd_eia, plants_entity_eia) table = pa.Table.from_pandas(df, schema=schema, preserve_index=False) # Write to a directory of partitioned parquet files @@ -96,17 +83,7 @@ def process_single_year( return YearPartitions(year, epacems_settings.states) -@op( - config_schema={ - "pudl_output_path": Field( - EnvVar( - env_var="PUDL_OUTPUT", - ), - description="Path of directory to store the database in.", - default_value=None, - ), - }, -) +@op def consolidate_partitions(context, partitions: list[YearPartitions]) -> None: """Read partitions into memory and write to a single monolithic output. @@ -114,12 +91,8 @@ def consolidate_partitions(context, partitions: list[YearPartitions]) -> None: context: dagster keyword that provides access to resources and config. partitions: Year and state combinations in the output database. """ - partitioned_path = ( - Path(context.op_config["pudl_output_path"]) / "hourly_emissions_epacems" - ) - monolithic_path = ( - Path(context.op_config["pudl_output_path"]) / "hourly_emissions_epacems.parquet" - ) + partitioned_path = PudlPaths().output_dir / "hourly_emissions_epacems" + monolithic_path = PudlPaths().output_dir / "hourly_emissions_epacems.parquet" schema = Resource.from_id("hourly_emissions_epacems").to_pyarrow() with pq.ParquetWriter( diff --git a/src/pudl/extract/eia860.py b/src/pudl/extract/eia860.py index 4e3094e600..18a37345a6 100644 --- a/src/pudl/extract/eia860.py +++ b/src/pudl/extract/eia860.py @@ -69,29 +69,29 @@ def get_dtypes(page, **partition): # TODO (bendnorman): Add this information to the metadata raw_table_names = ( - "raw_boiler_cooling_eia860", - "raw_boiler_generator_assn_eia860", - "raw_boiler_info_eia860", - "raw_boiler_mercury_eia860", - "raw_boiler_nox_eia860", - "raw_boiler_particulate_eia860", - "raw_boiler_so2_eia860", - "raw_boiler_stack_flue_eia860", - "raw_cooling_equipment_eia860", - "raw_emission_control_strategies_eia860", - "raw_emissions_control_equipment_eia860", - "raw_fgd_equipment_eia860", - "raw_fgp_equipment_eia860", - "raw_generator_eia860", - "raw_generator_existing_eia860", - "raw_generator_proposed_eia860", - "raw_generator_retired_eia860", - "raw_multifuel_existing_eia860", - "raw_multifuel_retired_eia860", - "raw_ownership_eia860", - "raw_plant_eia860", - "raw_stack_flue_equipment_eia860", - "raw_utility_eia860", + "raw_eia860__boiler_cooling", + "raw_eia860__boiler_generator_assn", + "raw_eia860__boiler_info", + "raw_eia860__boiler_mercury", + "raw_eia860__boiler_nox", + "raw_eia860__boiler_particulate", + "raw_eia860__boiler_so2", + "raw_eia860__boiler_stack_flue", + "raw_eia860__cooling_equipment", + "raw_eia860__emission_control_strategies", + "raw_eia860__emissions_control_equipment", + "raw_eia860__fgd_equipment", + "raw_eia860__fgp_equipment", + "raw_eia860__generator", + "raw_eia860__generator_existing", + "raw_eia860__generator_proposed", + "raw_eia860__generator_retired", + "raw_eia860__multifuel_existing", + "raw_eia860__multifuel_retired", + "raw_eia860__ownership", + "raw_eia860__plant", + "raw_eia860__stack_flue_equipment", + "raw_eia860__utility", ) @@ -185,7 +185,7 @@ def extract_eia860(context, eia860_raw_dfs): # create descriptive table_names eia860_raw_dfs = { - "raw_" + table_name + "_eia860": df for table_name, df in eia860_raw_dfs.items() + "raw_eia860__" + table_name: df for table_name, df in eia860_raw_dfs.items() } eia860_raw_dfs = dict(sorted(eia860_raw_dfs.items())) diff --git a/src/pudl/extract/eia861.py b/src/pudl/extract/eia861.py index 369ce067d2..ad6a08b017 100644 --- a/src/pudl/extract/eia861.py +++ b/src/pudl/extract/eia861.py @@ -74,26 +74,26 @@ def get_dtypes(page, **partition): table_name: AssetOut() for table_name in sorted( ( # is there some way to programmatically generate this list? - "raw_advanced_metering_infrastructure_eia861", - "raw_balancing_authority_eia861", - "raw_delivery_companies_eia861", - "raw_demand_response_eia861", - "raw_demand_side_management_eia861", - "raw_distributed_generation_eia861", - "raw_distribution_systems_eia861", - "raw_dynamic_pricing_eia861", - "raw_energy_efficiency_eia861", - "raw_frame_eia861", - "raw_green_pricing_eia861", - "raw_mergers_eia861", - "raw_net_metering_eia861", - "raw_non_net_metering_eia861", - "raw_operational_data_eia861", - "raw_reliability_eia861", - "raw_sales_eia861", - "raw_service_territory_eia861", - "raw_short_form_eia861", - "raw_utility_data_eia861", + "raw_eia861__advanced_metering_infrastructure", + "raw_eia861__balancing_authority", + "raw_eia861__delivery_companies", + "raw_eia861__demand_response", + "raw_eia861__demand_side_management", + "raw_eia861__distributed_generation", + "raw_eia861__distribution_systems", + "raw_eia861__dynamic_pricing", + "raw_eia861__energy_efficiency", + "raw_eia861__frame", + "raw_eia861__green_pricing", + "raw_eia861__mergers", + "raw_eia861__net_metering", + "raw_eia861__non_net_metering", + "raw_eia861__operational_data", + "raw_eia861__reliability", + "raw_eia861__sales", + "raw_eia861__service_territory", + "raw_eia861__short_form", + "raw_eia861__utility_data", ) ) }, @@ -113,7 +113,8 @@ def extract_eia861(context): eia861_raw_dfs = Extractor(ds).extract(year=eia_settings.eia861.years) eia861_raw_dfs = { - "raw_" + table_name: df for table_name, df in eia861_raw_dfs.items() + "raw_eia861__" + table_name.replace("_eia861", ""): df + for table_name, df in eia861_raw_dfs.items() } eia861_raw_dfs = dict(sorted(eia861_raw_dfs.items())) diff --git a/src/pudl/extract/eia923.py b/src/pudl/extract/eia923.py index 09610b048f..695f55ec96 100644 --- a/src/pudl/extract/eia923.py +++ b/src/pudl/extract/eia923.py @@ -95,11 +95,11 @@ def get_dtypes(page, **partition): # TODO (bendnorman): Add this information to the metadata eia_raw_table_names = ( - "raw_boiler_fuel_eia923", - "raw_fuel_receipts_costs_eia923", - "raw_generation_fuel_eia923", - "raw_generator_eia923", - "raw_stocks_eia923", + "raw_eia923__boiler_fuel", + "raw_eia923__fuel_receipts_costs", + "raw_eia923__generation_fuel", + "raw_eia923__generator", + "raw_eia923__stocks", # There's an issue with the EIA-923 archive for 2018 which prevents this table # from being extracted currently. When we update to a new DOI this problem will # probably fix itself. See comments on this issue: @@ -129,7 +129,7 @@ def extract_eia923(context): # create descriptive table_names eia923_raw_dfs = { - "raw_" + table_name + "_eia923": df for table_name, df in eia923_raw_dfs.items() + "raw_eia923__" + table_name: df for table_name, df in eia923_raw_dfs.items() } eia923_raw_dfs = dict(sorted(eia923_raw_dfs.items())) @@ -141,5 +141,5 @@ def extract_eia923(context): # from being extracted currently. When we update to a new DOI this problem will # probably fix itself. See comments on this issue: # https://github.com/catalyst-cooperative/pudl/issues/2448 - if table_name != "raw_emissions_control_eia923" + if table_name != "raw_eia923__emissions_control" ) diff --git a/src/pudl/extract/epacems.py b/src/pudl/extract/epacems.py index c91580509f..c656eb8aeb 100644 --- a/src/pudl/extract/epacems.py +++ b/src/pudl/extract/epacems.py @@ -1,72 +1,74 @@ """Retrieve data from EPA CEMS hourly zipped CSVs. +Prior to August 2023, this data was retrieved from an FTP server. After August 2023, +this data is now retrieved from the CEMS API. The format of the files has changed from +monthly CSVs for each state to one CSV per state per year. The names of the columns +have also changed. Column name compatibility was determined by reading the CEMS API +documentation on column names. + Presently, this module is where the CEMS columns are renamed and dropped. Any columns in the IGNORE_COLS dictionary are excluded from the final output. All of these columns are calculable rates, measurement flags, or descriptors (like facility name) that can be accessed by merging this data with the EIA860 plants entity table. We also remove the -`FACILITY_ID` field because it is internal to the EPA's business accounting database and -`UNIT_ID` field because it's a unique (calculable) identifier for plant_id and -emissions_unit_id (previously `UNITID`) groupings. It took a minute to verify the -difference between the `UNITID` and `UNIT_ID` fields, but coorespondance with the EPA's -CAMD team cleared this up. +`FACILITY_ID` field because it is internal to the EPA's business accounting database. Pre-transform, the `plant_id_epa` field is a close but not perfect indicator for -`plant_id_eia`. In the raw data it's called `ORISPL_CODE` but that's not entirely -accurate. The epacamd_eia crosswalk will show that the mapping between `ORISPL_CODE` as -it appears in CEMS and the `plant_id_eia` field used in EIA data. Hense, we've called it -`plant_id_epa` until it gets transformed into `plant_id_eia` during the transform -process with help from the crosswalk. +`plant_id_eia`. In the raw data it's called `Facility ID` (ORISPL code) but that's not +entirely accurate. The epacamd_eia crosswalk will show that the mapping between +`Facility ID` as it appears in CEMS and the `plant_id_eia` field used in EIA data. +Hence, we've called it `plant_id_epa` until it gets transformed into `plant_id_eia` +during the transform process with help from the crosswalk. """ from pathlib import Path from typing import NamedTuple -from zipfile import ZipFile import pandas as pd import pudl.logging_helpers +from pudl.metadata.classes import Resource from pudl.workspace.datastore import Datastore logger = pudl.logging_helpers.get_logger(__name__) -# EPA CEMS constants ##### -RENAME_DICT = { - "STATE": "state", - "FACILITY_NAME": "plant_name", # Not reading from CSV - "ORISPL_CODE": "plant_id_epa", # Not quite the same as plant_id_eia - "UNITID": "emissions_unit_id_epa", +######################################################################################## +# EPA CEMS constants for API CSV files ##### + +API_RENAME_DICT = { + "State": "state", + "Facility Name": "plant_name", # Not reading from CSV + "Facility ID": "plant_id_epa", # unique facility id for internal EPA database management (ORIS code) + "Unit ID": "emissions_unit_id_epa", + "Associated Stacks": "associated_stacks", # These op_date, op_hour, and op_time variables get converted to # operating_date, operating_datetime and operating_time_interval in # transform/epacems.py - "OP_DATE": "op_date", - "OP_HOUR": "op_hour", - "OP_TIME": "operating_time_hours", - "GLOAD (MW)": "gross_load_mw", - "GLOAD": "gross_load_mw", - "SLOAD (1000 lbs)": "steam_load_1000_lbs", - "SLOAD (1000lb/hr)": "steam_load_1000_lbs", - "SLOAD": "steam_load_1000_lbs", - "SO2_MASS (lbs)": "so2_mass_lbs", - "SO2_MASS": "so2_mass_lbs", - "SO2_MASS_MEASURE_FLG": "so2_mass_measurement_code", - "SO2_RATE (lbs/mmBtu)": "so2_rate_lbs_mmbtu", # Not reading from CSV - "SO2_RATE": "so2_rate_lbs_mmbtu", # Not reading from CSV - "SO2_RATE_MEASURE_FLG": "so2_rate_measure_flg", # Not reading from CSV - "NOX_RATE (lbs/mmBtu)": "nox_rate_lbs_mmbtu", - "NOX_RATE": "nox_rate_lbs_mmbtu", # Not reading from CSV - "NOX_RATE_MEASURE_FLG": "nox_rate_measurement_code", # Not reading from CSV - "NOX_MASS (lbs)": "nox_mass_lbs", - "NOX_MASS": "nox_mass_lbs", - "NOX_MASS_MEASURE_FLG": "nox_mass_measurement_code", - "CO2_MASS (tons)": "co2_mass_tons", - "CO2_MASS": "co2_mass_tons", - "CO2_MASS_MEASURE_FLG": "co2_mass_measurement_code", - "CO2_RATE (tons/mmBtu)": "co2_rate_tons_mmbtu", # Not reading from CSV - "CO2_RATE": "co2_rate_tons_mmbtu", # Not reading from CSV - "CO2_RATE_MEASURE_FLG": "co2_rate_measure_flg", # Not reading from CSV - "HEAT_INPUT (mmBtu)": "heat_content_mmbtu", - "HEAT_INPUT": "heat_content_mmbtu", - "FAC_ID": "facility_id", # unique facility id for internal EPA database management - "UNIT_ID": "unit_id_what", # unique unit id for internal EPA database management + "Date": "op_date", + "Hour": "op_hour", + "Operating Time": "operating_time_hours", + "Gross Load (MW)": "gross_load_mw", + "Steam Load (1000 lb/hr)": "steam_load_1000_lbs", + "SO2 Mass (lbs)": "so2_mass_lbs", + "SO2 Mass Measure Indicator": "so2_mass_measurement_code", + "SO2 Rate (lbs/mmBtu)": "so2_rate_lbs_mmbtu", # Not reading from CSV + "SO2 Rate Measure Indicator": "so2_rate_measure_flg", # Not reading from CSV + "NOx Rate (lbs/mmBtu)": "nox_rate_lbs_mmbtu", # Not reading from CSV + "NOx Rate Measure Indicator": "nox_rate_measurement_code", # Not reading from CSV + "NOx Mass (lbs)": "nox_mass_lbs", + "NOx Mass Measure Indicator": "nox_mass_measurement_code", + "CO2 Mass (short tons)": "co2_mass_tons", + "CO2 Mass Measure Indicator": "co2_mass_measurement_code", + "CO2 Rate (short tons/mmBtu)": "co2_rate_tons_mmbtu", # Not reading from CSV + "CO2 Rate Measure Indicator": "co2_rate_measure_flg", # Not reading from CSV + "Heat Input (mmBtu)": "heat_content_mmbtu", + "Heat Input Measure Indicator": "heat_content_measure_flg", + "Primary Fuel Type": "primary_fuel_type", + "Secondary Fuel Type": "secondary_fuel_type", + "Unit Type": "unit_type", + "SO2 Controls": "so2_controls", + "NOx Controls": "nox_controls", + "PM Controls": "pm_controls", + "Hg Controls": "hg_controls", + "Program Code": "program_code", } """Dict: A dictionary containing EPA CEMS column names (keys) and replacement names to use when reading those columns into PUDL (values). @@ -75,19 +77,22 @@ """ # Any column that exactly matches one of these won't be read -IGNORE_COLS = { - "FACILITY_NAME", - "SO2_RATE (lbs/mmBtu)", - "SO2_RATE", - "SO2_RATE_MEASURE_FLG", - "CO2_RATE (tons/mmBtu)", - "CO2_RATE", - "CO2_RATE_MEASURE_FLG", - "NOX_RATE_MEASURE_FLG", - "NOX_RATE", - "NOX_RATE (lbs/mmBtu)", - "FAC_ID", - "UNIT_ID", +API_IGNORE_COLS = { + "Facility Name", + "SO2 Rate (lbs/mmBtu)", + "SO2 Rate Measure Indicator", + "CO2 Rate (tons/mmBtu)", + "CO2 Rate Measure Indicator", + "NOx Rate (lbs/mmBtu)", + "NOX Rate Measure Indicator", + "Primary Fuel Type", + "Secondary Fuel Type", + "Unit Type", + "SO2 Controls", + "NOx Controls", + "PM Controls", + "Hg Controls", + "Program Code", } """Set: The set of EPA CEMS columns to ignore when reading data.""" @@ -106,9 +111,9 @@ def get_filters(self): """Returns filters for retrieving given partition resource from Datastore.""" return dict(year=self.year, state=self.state.lower()) - def get_monthly_file(self, month: int) -> Path: - """Returns the filename (without suffix) that contains the monthly data.""" - return Path(f"{self.year}{self.state.lower()}{month:02}") + def get_annual_file(self) -> Path: + """Return the name of the CSV file that holds annual hourly data.""" + return Path(f"epacems-{self.year}-{self.state.lower()}.csv") class EpaCemsDatastore: @@ -125,21 +130,28 @@ def __init__(self, datastore: Datastore): self.datastore = datastore def get_data_frame(self, partition: EpaCemsPartition) -> pd.DataFrame: - """Constructs dataframe holding data for a given (year, state) partition.""" + """Constructs dataframe from a zipfile for a given (year, state) partition.""" archive = self.datastore.get_zipfile_resource( "epacems", **partition.get_filters() ) - dfs = [] - for month in range(1, 13): - mf = partition.get_monthly_file(month) - with archive.open(str(mf.with_suffix(".zip")), "r") as mzip: - with ZipFile(mzip, "r").open( - str(mf.with_suffix(".csv")), "r" - ) as csv_file: - dfs.append(self._csv_to_dataframe(csv_file)) - return pd.concat(dfs, sort=True, copy=False, ignore_index=True) - - def _csv_to_dataframe(self, csv_file) -> pd.DataFrame: + + # Get names of files in zip file + files = self.datastore.get_zipfile_file_names(archive) + + # If archive has one csv file in it, this is a yearly CSV (archived after 08/23) + # and this CSV does not need to be concatenated. + if len(files) == 1 and files[0].endswith(".csv"): + with archive.open(str(partition.get_annual_file()), "r") as csv_file: + df = self._csv_to_dataframe( + csv_file, ignore_cols=API_IGNORE_COLS, rename_dict=API_RENAME_DICT + ) + return df + else: + raise AssertionError(f"Unexpected archive format. Found files: {files}.") + + def _csv_to_dataframe( + self, csv_file: Path, ignore_cols: dict[str, str], rename_dict: dict[str, str] + ) -> pd.DataFrame: """Convert a CEMS csv file into a :class:`pandas.DataFrame`. Args: @@ -151,9 +163,9 @@ def _csv_to_dataframe(self, csv_file) -> pd.DataFrame: return pd.read_csv( csv_file, index_col=False, - usecols=lambda col: col not in IGNORE_COLS, + usecols=lambda col: col not in ignore_cols, low_memory=False, - ).rename(columns=RENAME_DICT) + ).rename(columns=rename_dict) def extract(year: int, state: str, ds: Datastore): @@ -169,4 +181,12 @@ def extract(year: int, state: str, ds: Datastore): ds = EpaCemsDatastore(ds) partition = EpaCemsPartition(state=state, year=year) # We have to assign the reporting year for partitioning purposes - return ds.get_data_frame(partition).assign(year=year) + try: + df = ds.get_data_frame(partition).assign(year=year) + except KeyError: # If no state-year combination found, return empty df. + logger.warning( + f"No data found for {state} in {year}. Returning empty dataframe." + ) + res = Resource.from_id("hourly_emissions_epacems") + df = res.format_df(pd.DataFrame()) + return df diff --git a/src/pudl/extract/ferc.py b/src/pudl/extract/ferc.py index 0b39f89f7d..bf7a8514f0 100644 --- a/src/pudl/extract/ferc.py +++ b/src/pudl/extract/ferc.py @@ -8,20 +8,13 @@ from pudl.extract.ferc2 import Ferc2DbfExtractor from pudl.extract.ferc6 import Ferc6DbfExtractor from pudl.extract.ferc60 import Ferc60DbfExtractor -from pudl.helpers import EnvVar +from pudl.workspace.setup import PudlPaths logger = pudl.logging_helpers.get_logger(__name__) @op( config_schema={ - "pudl_output_path": Field( - EnvVar( - env_var="PUDL_OUTPUT", - ), - description="Path of directory to store the database in.", - default_value=None, - ), "clobber": Field( bool, description="Clobber existing ferc1 database.", default_value=False ), @@ -45,5 +38,5 @@ def dbf2sqlite(context) -> None: datastore=context.resources.datastore, settings=context.resources.ferc_to_sqlite_settings, clobber=context.op_config["clobber"], - output_path=context.op_config["pudl_output_path"], + output_path=PudlPaths().output_dir, ).execute() diff --git a/src/pudl/extract/ferc1.py b/src/pudl/extract/ferc1.py index c2321f4c58..b7cbd88599 100644 --- a/src/pudl/extract/ferc1.py +++ b/src/pudl/extract/ferc1.py @@ -68,14 +68,12 @@ """ import json from itertools import chain -from pathlib import Path from typing import Any, Literal import pandas as pd import sqlalchemy as sa from dagster import ( AssetKey, - Field, SourceAsset, asset, build_init_resource_context, @@ -89,7 +87,6 @@ add_key_constraints, deduplicate_by_year, ) -from pudl.helpers import EnvVar from pudl.io_managers import ( FercDBFSQLiteIOManager, FercXBRLSQLiteIOManager, @@ -97,6 +94,7 @@ ferc1_xbrl_sqlite_io_manager, ) from pudl.settings import DatasetsSettings, FercToSqliteSettings, GenericDatasetSettings +from pudl.workspace.setup import PudlPaths logger = pudl.logging_helpers.get_logger(__name__) @@ -341,7 +339,8 @@ def create_raw_ferc1_assets() -> list[SourceAsset]: dbf_table_names = tuple(set(flattened_dbfs)) raw_ferc1_dbf_assets = [ SourceAsset( - key=AssetKey(table_name), io_manager_key="ferc1_dbf_sqlite_io_manager" + key=AssetKey(f"raw_ferc1_dbf__{table_name}"), + io_manager_key="ferc1_dbf_sqlite_io_manager", ) for table_name in dbf_table_names ] @@ -357,7 +356,8 @@ def create_raw_ferc1_assets() -> list[SourceAsset]: xbrl_table_names = tuple(set(xbrls_with_periods)) raw_ferc1_xbrl_assets = [ SourceAsset( - key=AssetKey(table_name), io_manager_key="ferc1_xbrl_sqlite_io_manager" + key=AssetKey(f"raw_ferc1_xbrl__{table_name}"), + io_manager_key="ferc1_xbrl_sqlite_io_manager", ) for table_name in xbrl_table_names ] @@ -372,17 +372,7 @@ def create_raw_ferc1_assets() -> list[SourceAsset]: # asset name. -@asset( - config_schema={ - "pudl_output_path": Field( - EnvVar( - env_var="PUDL_OUTPUT", - ), - description="Path of directory to store the database in.", - default_value=None, - ), - }, -) +@asset def raw_xbrl_metadata_json(context) -> dict[str, dict[str, list[dict[str, Any]]]]: """Extract the FERC 1 XBRL Taxonomy metadata we've stored as JSON. @@ -395,10 +385,7 @@ def raw_xbrl_metadata_json(context) -> dict[str, dict[str, list[dict[str, Any]]] filings. If there is no instant/duration table, an empty list is returned instead. """ - metadata_path = ( - Path(context.op_config["pudl_output_path"]) - / "ferc1_xbrl_taxonomy_metadata.json" - ) + metadata_path = PudlPaths().output_dir / "ferc1_xbrl_taxonomy_metadata.json" with open(metadata_path) as f: xbrl_meta_all = json.load(f) diff --git a/src/pudl/extract/ferc714.py b/src/pudl/extract/ferc714.py index f61c7df33d..9e6c8ac6d7 100644 --- a/src/pudl/extract/ferc714.py +++ b/src/pudl/extract/ferc714.py @@ -10,51 +10,51 @@ FERC714_FILES: OrderedDict[str, dict[str, str]] = OrderedDict( { - "id_certification_ferc714": { + "id_certification": { "name": "Part 1 Schedule 1 - Identification Certification.csv", "encoding": "iso-8859-1", }, - "gen_plants_ba_ferc714": { + "gen_plants_ba": { "name": "Part 2 Schedule 1 - Balancing Authority Generating Plants.csv", "encoding": "iso-8859-1", }, - "demand_monthly_ba_ferc714": { + "demand_monthly_ba": { "name": "Part 2 Schedule 2 - Balancing Authority Monthly Demand.csv", "encoding": "utf-8", }, - "net_energy_load_ba_ferc714": { + "net_energy_load_ba": { "name": "Part 2 Schedule 3 - Balancing Authority Net Energy for Load.csv", "encoding": "utf-8", }, - "adjacency_ba_ferc714": { + "adjacency_ba": { "name": "Part 2 Schedule 4 - Adjacent Balancing Authorities.csv", "encoding": "iso-8859-1", }, - "interchange_ba_ferc714": { + "interchange_ba": { "name": "Part 2 Schedule 5 - Balancing Authority Interchange.csv", "encoding": "iso-8859-1", }, - "lambda_hourly_ba_ferc714": { + "lambda_hourly_ba": { "name": "Part 2 Schedule 6 - Balancing Authority Hourly System Lambda.csv", "encoding": "utf-8", }, - "lambda_description_ferc714": { + "lambda_description": { "name": "Part 2 Schedule 6 - System Lambda Description.csv", "encoding": "iso-8859-1", }, - "description_pa_ferc714": { + "description_pa": { "name": "Part 3 Schedule 1 - Planning Area Description.csv", "encoding": "iso-8859-1", }, - "demand_forecast_pa_ferc714": { + "demand_forecast_pa": { "name": "Part 3 Schedule 2 - Planning Area Forecast Demand.csv", "encoding": "utf-8", }, - "demand_hourly_pa_ferc714": { + "demand_hourly_pa": { "name": "Part 3 Schedule 2 - Planning Area Hourly Demand.csv", "encoding": "utf-8", }, - "respondent_id_ferc714": { + "respondent_id": { "name": "Respondent IDs.csv", "encoding": "utf-8", }, @@ -64,7 +64,7 @@ @multi_asset( - outs={"raw_" + table_name: AssetOut() for table_name in FERC714_FILES}, + outs={"raw_ferc714__" + table_name: AssetOut() for table_name in FERC714_FILES}, required_resource_keys={"datastore", "dataset_settings"}, ) def extract_ferc714(context): @@ -91,12 +91,12 @@ def extract_ferc714(context): raw_dfs[table_name] = pd.read_csv( f, encoding=FERC714_FILES[table_name]["encoding"] ) - if table_name != "respondent_id_ferc714": + if table_name != "respondent_id": raw_dfs[table_name] = raw_dfs[table_name].query( "report_yr in @ferc714_settings.years" ) return ( - Output(output_name="raw_" + table_name, value=df) + Output(output_name="raw_ferc714__" + table_name, value=df) for table_name, df in raw_dfs.items() ) diff --git a/src/pudl/extract/xbrl.py b/src/pudl/extract/xbrl.py index dbcfbd5178..e7f4e34e70 100644 --- a/src/pudl/extract/xbrl.py +++ b/src/pudl/extract/xbrl.py @@ -10,9 +10,9 @@ from ferc_xbrl_extractor.instance import InstanceBuilder import pudl -from pudl.helpers import EnvVar from pudl.settings import FercGenericXbrlToSqliteSettings, XbrlFormNumber from pudl.workspace.datastore import Datastore +from pudl.workspace.setup import PudlPaths logger = pudl.logging_helpers.get_logger(__name__) @@ -70,24 +70,21 @@ def get_filings(self, year: int, form: XbrlFormNumber) -> list[InstanceBuilder]: return filings -def _get_sqlite_engine( - form_number: int, output_path: Path, clobber: bool -) -> sa.engine.Engine: +def _get_sqlite_engine(form_number: int, clobber: bool) -> sa.engine.Engine: """Create SQLite engine for specified form and drop tables. Args: form_number: FERC form number. - output_path: path to PUDL outputs. clobber: Flag indicating whether or not to drop tables. """ # Read in the structure of the DB, if it exists logger.info( f"Dropping the old FERC Form {form_number} XBRL derived SQLite DB if it exists." ) - db_path = output_path / f"ferc{form_number}_xbrl.sqlite" + db_path = PudlPaths().sqlite_db(f"ferc{form_number}_xbrl") logger.info(f"Connecting to SQLite at {db_path}...") - sqlite_engine = sa.create_engine(f"sqlite:///{db_path}") + sqlite_engine = sa.create_engine(db_path) logger.info(f"Connected to SQLite at {db_path}!") try: # So that we can wipe it out @@ -100,13 +97,6 @@ def _get_sqlite_engine( @op( config_schema={ - "pudl_output_path": Field( - EnvVar( - env_var="PUDL_OUTPUT", - ), - description="Path of directory to store the database in.", - default_value=None, - ), "clobber": Field( bool, description="Clobber existing ferc1 database.", default_value=False ), @@ -125,7 +115,7 @@ def _get_sqlite_engine( ) def xbrl2sqlite(context) -> None: """Clone the FERC Form 1 XBRL Databsae to SQLite.""" - output_path = Path(context.op_config["pudl_output_path"]) + output_path = PudlPaths().output_dir clobber = context.op_config["clobber"] batch_size = context.op_config["batch_size"] workers = context.op_config["workers"] @@ -146,7 +136,7 @@ def xbrl2sqlite(context) -> None: logger.info(f"Dataset ferc{form}_xbrl is disabled, skipping") continue - sqlite_engine = _get_sqlite_engine(form.value, output_path, clobber) + sqlite_engine = _get_sqlite_engine(form.value, clobber) convert_form( settings, diff --git a/src/pudl/ferc_to_sqlite/__init__.py b/src/pudl/ferc_to_sqlite/__init__.py index 06c4ff4833..cb08895605 100644 --- a/src/pudl/ferc_to_sqlite/__init__.py +++ b/src/pudl/ferc_to_sqlite/__init__.py @@ -52,8 +52,8 @@ def ferc_to_sqlite_xbrl_only(): "resources": { "ferc_to_sqlite_settings": { "config": ferc_to_sqlite_fast_settings.dict(), - } - } + }, + }, }, ) diff --git a/src/pudl/ferc_to_sqlite/cli.py b/src/pudl/ferc_to_sqlite/cli.py index 220381b514..7a18f613cf 100755 --- a/src/pudl/ferc_to_sqlite/cli.py +++ b/src/pudl/ferc_to_sqlite/cli.py @@ -52,12 +52,6 @@ def parse_command_line(argv): fail.""", default=False, ) - parser.add_argument( - "--sandbox", - action="store_true", - default=False, - help="Use the Zenodo sandbox rather than production", - ) parser.add_argument( "-b", "--batch-size", @@ -139,9 +133,6 @@ def main(): # noqa: C901 etl_settings = EtlSettings.from_yaml(args.settings_file) - # Set PUDL_INPUT/PUDL_OUTPUT env vars from .pudl.yml if not set already! - pudl.workspace.setup.get_defaults() - ferc_to_sqlite_reconstructable_job = build_reconstructable_job( "pudl.ferc_to_sqlite.cli", "ferc_to_sqlite_job_factory", @@ -158,7 +149,6 @@ def main(): # noqa: C901 }, "datastore": { "config": { - "sandbox": args.sandbox, "gcs_cache_path": args.gcs_cache_path if args.gcs_cache_path else "", diff --git a/src/pudl/helpers.py b/src/pudl/helpers.py index c494434480..b246508514 100644 --- a/src/pudl/helpers.py +++ b/src/pudl/helpers.py @@ -8,7 +8,6 @@ """ import importlib.resources import itertools -import os import pathlib import re import shutil @@ -23,13 +22,11 @@ import pandas as pd import requests import sqlalchemy as sa -from dagster import AssetKey, AssetsDefinition, AssetSelection, Noneable, SourceAsset -from dagster._config.errors import PostProcessingError +from dagster import AssetKey, AssetsDefinition, AssetSelection, SourceAsset from pandas._libs.missing import NAType import pudl.logging_helpers from pudl.metadata.fields import get_pudl_dtypes -from pudl.workspace.setup import get_defaults sum_na = partial(pd.Series.sum, skipna=False) """A sum function that returns NA if the Series includes any NA values. @@ -1611,41 +1608,6 @@ def convert_df_to_excel_file(df: pd.DataFrame, **kwargs) -> pd.ExcelFile: return pd.ExcelFile(workbook) -class EnvVar(Noneable): - """A dagster config type for env vars.""" - - def __init__(self, env_var: str) -> None: - """Initialize EnvVarField.""" - super().__init__(inner_type=str) - self.env_var = env_var - - def post_process(self, value: str) -> str: - """Validate an EnvVar config value. - - Returns the value of the object environment variable if the - config value is not specified is not specified with dagster. - - Args: - value: config value to validate. - - Returns: - validated config value. - - Raises: - PostProcessingError: if the value is not specified in the env var or config. - """ - if value is None: - try: - value = os.environ.get(self.env_var) - if value is None: - value = get_defaults()[self.env_var] - except KeyError: - raise PostProcessingError( - f"Config value could not be found. Set the {self.env_var} environment variable or specify a value in dagster config." - ) - return value - - def get_asset_keys( assets: list[AssetsDefinition], exclude_source_assets: bool = True ) -> set[AssetKey]: diff --git a/src/pudl/io_managers.py b/src/pudl/io_managers.py index 85a5ed0224..c3fab27888 100644 --- a/src/pudl/io_managers.py +++ b/src/pudl/io_managers.py @@ -10,7 +10,6 @@ from alembic.autogenerate.api import compare_metadata from alembic.migration import MigrationContext from dagster import ( - Field, InitResourceContext, InputContext, IOManager, @@ -23,8 +22,8 @@ from upath import UPath import pudl -from pudl.helpers import EnvVar from pudl.metadata.classes import Package, Resource +from pudl.workspace.setup import PudlPaths logger = pudl.logging_helpers.get_logger(__name__) @@ -177,7 +176,7 @@ def _get_sqlalchemy_table(self, table_name: str) -> sa.Table: sa_table = self.md.tables.get(table_name, None) if sa_table is None: raise ValueError( - f"{sa_table} not found in database metadata. Either add the table to " + f"{table_name} not found in database metadata. Either add the table to " "the metadata or use a different IO Manager." ) return sa_table @@ -534,21 +533,10 @@ def load_input(self, context: InputContext) -> pd.DataFrame: return df -@io_manager( - config_schema={ - "pudl_output_path": Field( - EnvVar( - env_var="PUDL_OUTPUT", - ), - description="Path of directory to store the database in.", - default_value=None, - ), - } -) +@io_manager def pudl_sqlite_io_manager(init_context) -> PudlSQLiteIOManager: """Create a SQLiteManager dagster resource for the pudl database.""" - base_dir = init_context.resource_config["pudl_output_path"] - return PudlSQLiteIOManager(base_dir=base_dir, db_name="pudl") + return PudlSQLiteIOManager(base_dir=PudlPaths().output_dir, db_name="pudl") class FercSQLiteIOManager(SQLiteIOManager): @@ -658,6 +646,8 @@ def load_input(self, context: InputContext) -> pd.DataFrame: ferc1_settings = context.resources.dataset_settings.ferc1 table_name = self._get_table_name(context) + # Remove preceeding asset name metadata + table_name = table_name.replace("raw_ferc1_dbf__", "") # Check if the table_name exists in the self.md object _ = self._get_sqlalchemy_table(table_name) @@ -676,23 +666,11 @@ def load_input(self, context: InputContext) -> pd.DataFrame: ).assign(sched_table_name=table_name) -@io_manager( - config_schema={ - "pudl_output_path": Field( - EnvVar( - env_var="PUDL_OUTPUT", - ), - description="Path of directory to store the database in.", - default_value=None, - ), - }, - required_resource_keys={"dataset_settings"}, -) +@io_manager(required_resource_keys={"dataset_settings"}) def ferc1_dbf_sqlite_io_manager(init_context) -> FercDBFSQLiteIOManager: """Create a SQLiteManager dagster resource for the ferc1 dbf database.""" - base_dir = init_context.resource_config["pudl_output_path"] return FercDBFSQLiteIOManager( - base_dir=base_dir, + base_dir=PudlPaths().output_dir, db_name="ferc1", ) @@ -719,6 +697,9 @@ def load_input(self, context: InputContext) -> pd.DataFrame: ferc1_settings = context.resources.dataset_settings.ferc1 table_name = self._get_table_name(context) + # Remove preceeding asset name metadata + table_name = table_name.replace("raw_ferc1_xbrl__", "") + # TODO (bendnorman): Figure out a better to handle tables that # don't have duration and instant # Not every table contains both instant and duration @@ -746,23 +727,11 @@ def load_input(self, context: InputContext) -> pd.DataFrame: ).assign(sched_table_name=sched_table_name) -@io_manager( - config_schema={ - "pudl_output_path": Field( - EnvVar( - env_var="PUDL_OUTPUT", - ), - description="Path of directory to store the database in.", - default_value=None, - ), - }, - required_resource_keys={"dataset_settings"}, -) +@io_manager(required_resource_keys={"dataset_settings"}) def ferc1_xbrl_sqlite_io_manager(init_context) -> FercXBRLSQLiteIOManager: """Create a SQLiteManager dagster resource for the ferc1 dbf database.""" - base_dir = init_context.resource_config["pudl_output_path"] return FercXBRLSQLiteIOManager( - base_dir=base_dir, + base_dir=PudlPaths().output_dir, db_name="ferc1_xbrl", ) @@ -793,21 +762,12 @@ def load_from_path(self, context: InputContext, path: UPath) -> dd.DataFrame: ) -@io_manager( - config_schema={ - "base_path": Field( - EnvVar( - env_var="PUDL_OUTPUT", - ), - is_required=False, - default_value=None, - ) - } -) +@io_manager def epacems_io_manager( init_context: InitResourceContext, ) -> PandasParquetIOManager: """IO Manager that writes EPA CEMS partitions to individual parquet files.""" schema = Resource.from_id("hourly_emissions_epacems").to_pyarrow() - base_path = UPath(init_context.resource_config["base_path"]) - return PandasParquetIOManager(base_path=base_path, schema=schema) + return PandasParquetIOManager( + base_path=UPath(PudlPaths().output_dir), schema=schema + ) diff --git a/src/pudl/metadata/classes.py b/src/pudl/metadata/classes.py index e2dc07ed99..43467d72ec 100644 --- a/src/pudl/metadata/classes.py +++ b/src/pudl/metadata/classes.py @@ -43,6 +43,7 @@ from pudl.metadata.resources import FOREIGN_KEYS, RESOURCE_METADATA, eia861 from pudl.metadata.sources import SOURCES from pudl.workspace.datastore import Datastore +from pudl.workspace.setup import PudlPaths logger = pudl.logging_helpers.get_logger(__name__) @@ -734,7 +735,7 @@ def to_sql( # noqa: C901 return sa.Column( self.name, self.to_sql_dtype(), - *[sa.CheckConstraint(check) for check in checks], + *[sa.CheckConstraint(check, name=hash(check)) for check in checks], nullable=not self.constraints.required, unique=self.constraints.unique, comment=self.description, @@ -947,14 +948,8 @@ def get_temporal_coverage(self, partitions: dict = None) -> str: def add_datastore_metadata(self) -> None: """Get source file metadata from the datastore.""" - pudl_settings = pudl.workspace.setup.get_defaults() - if pudl_settings["pudl_in"] is None: - local_cache_path = None - else: - local_cache_path = pudl_settings["data_dir"] dp_desc = Datastore( - sandbox=False, - local_cache_path=local_cache_path, + local_cache_path=PudlPaths().data_dir, gcs_cache_path="gs://zenodo-cache.catalyst.coop", ).get_datapackage_descriptor(self.name) partitions = dp_desc.get_partitions() @@ -1870,7 +1865,15 @@ def to_sql( check_values: bool = True, ) -> sa.MetaData: """Return equivalent SQL MetaData.""" - metadata = sa.MetaData() + metadata = sa.MetaData( + naming_convention={ + "ix": "ix_%(column_0_label)s", + "uq": "uq_%(table_name)s_%(column_0_name)s", + "ck": "ck_%(table_name)s_`%(constraint_name)s`", + "fk": "fk_%(table_name)s_%(column_0_name)s_%(referred_table_name)s", + "pk": "pk_%(table_name)s", + } + ) for resource in self.resources: if resource.create_database_schema: _ = resource.to_sql( diff --git a/src/pudl/metadata/dfs.py b/src/pudl/metadata/dfs.py index f0d5f52686..cb58218971 100644 --- a/src/pudl/metadata/dfs.py +++ b/src/pudl/metadata/dfs.py @@ -300,7 +300,7 @@ """ subdivision_code,subdivision_name,country_code,country_name,subdivision_type,timezone_approx,state_id_fips,division_name_us_census,division_code_us_census,region_name_us_census,is_epacems_state AB,Alberta,CAN,Canada,province,America/Edmonton,,,,,0 -AK,Alaska,USA,United States of America,state,America/Anchorage,"02",Pacific Noncontiguous,PCN,West,0 +AK,Alaska,USA,United States of America,state,America/Anchorage,"02",Pacific Noncontiguous,PCN,West,1 AL,Alabama,USA,United States of America,state,America/Chicago,"01",East South Central,ESC,South,1 AR,Arkansas,USA,United States of America,state,America/Chicago,"05",West South Central,WSC,South,1 AS,American Samoa,USA,United States of America,outlying_area,Pacific/Pago_Pago,"60",,,,0 @@ -351,7 +351,7 @@ OR,Oregon,USA,United States of America,state,America/Los_Angeles,"41",Pacific Contiguous,PCC,West,1 PA,Pennsylvania,USA,United States of America,state,America/New_York,"42",Middle Atlantic,MAT,Northeast,1 PE,Prince Edwards Island,CAN,Canada,province,America/Halifax,,,,,0 -PR,Puerto Rico,USA,United States of America,outlying_area,America/Puerto_Rico,"72",,,,0 +PR,Puerto Rico,USA,United States of America,outlying_area,America/Puerto_Rico,"72",,,,1 QC,Quebec,CAN,Canada,province,America/Montreal,,,,,0 RI,Rhode Island,USA,United States of America,state,America/New_York,"44",New England,NEW,Northeast,1 SC,South Carolina,USA,United States of America,state,America/New_York,"45",South Atlantic,SAT,South,1 diff --git a/src/pudl/metadata/sources.py b/src/pudl/metadata/sources.py index a6bd58046a..69937dc444 100644 --- a/src/pudl/metadata/sources.py +++ b/src/pudl/metadata/sources.py @@ -279,7 +279,7 @@ }, "field_namespace": "epacems", "working_partitions": { - "years": sorted(set(range(1995, 2022))), + "years": sorted(set(range(1995, 2023))), "states": sorted(EPACEMS_STATES), }, "contributors": [ diff --git a/src/pudl/output/epacems.py b/src/pudl/output/epacems.py index 9f7f81fd31..6d509412ce 100644 --- a/src/pudl/output/epacems.py +++ b/src/pudl/output/epacems.py @@ -5,8 +5,8 @@ import dask.dataframe as dd -import pudl from pudl.settings import EpaCemsSettings +from pudl.workspace.setup import PudlPaths def year_state_filter( @@ -133,8 +133,7 @@ def epacems( columns = list(columns) if epacems_path is None: - pudl_settings = pudl.workspace.setup.get_defaults() - epacems_path = Path(pudl_settings["pudl_out"]) / "epacems" + epacems_path = PudlPaths().output_dir / "epacems" epacems = dd.read_parquet( epacems_path, diff --git a/src/pudl/package_data/epacems/additional_epacems_plants.csv b/src/pudl/package_data/epacems/additional_epacems_plants.csv index 96afc3def9..d58803e292 100644 --- a/src/pudl/package_data/epacems/additional_epacems_plants.csv +++ b/src/pudl/package_data/epacems/additional_epacems_plants.csv @@ -126,6 +126,8 @@ plant_id_eia,plant_name_eia,last_date,state,latitude,longitude,fill_data_source_ 880107,SPMT Marcus Hook Industrial Complex,2017-12-31,PA,39.8076,-75.4239,EPA CAMD web query 880108,Grain Processing Corporation,2018-12-31,IN,38.6552,-87.1814,EPA CAMD web query 880109,"Pratt Paper (OH), LLC",2020-10-13,OH,40.5379994,-84.1909398,Not found in EPA CAMD Avg OH Lat -55098,Frontera Energy Center,2016-01-01,TX,26.208000,-98.399200,In CEMS in 2019 but missing from EIA since 2016 +55098,Frontera Energy Center,2016-01-01,TX,26.208,-98.3992,In CEMS in 2019 but missing from EIA since 2016 55120,SRW Cogen LP,2014-01-01,TX,30.054478,-93.757435,In CEMS in 2019 but missing from EIA since 2014 55248,Tait,2018-01-01,OH,39.727679,-84.209489,In CEMS in 2021 but missing from EIA since 2018 +880110,Holston Army Ammunition Plant,2022-09-28,TN,36.5493,-82.6342,EPA CAMD web query +880102,"AES Puerto Rico, LP",2015-09-30,PR,17.9477,-66.154,EPA CAMD web query diff --git a/src/pudl/package_data/settings/etl_fast.yml b/src/pudl/package_data/settings/etl_fast.yml index c751f65c55..a0629e4176 100644 --- a/src/pudl/package_data/settings/etl_fast.yml +++ b/src/pudl/package_data/settings/etl_fast.yml @@ -29,8 +29,8 @@ ferc_to_sqlite_settings: name: pudl-fast title: PUDL Fast ETL description: > - FERC 1 and EIA 860/923 from 2020 (output to SQLite) plus - EPA CEMS hourly emissions data from 2020 (output to Parquet). + FERC 1 data from 2020 and 2021, EIA 860/923 from 2020 and 2022 (output to SQLite) plus + EPA CEMS hourly emissions data from 2020 and 2022 (output to Parquet). version: 0.1.0 datasets: ferc1: @@ -76,4 +76,4 @@ datasets: # so if you're loading CEMS data for a particular year, you should # also load the EIA 860 data for that year if possible states: [ID, ME] - years: [2019, 2020, 2021] + years: [2020, 2022] diff --git a/src/pudl/package_data/settings/etl_full.yml b/src/pudl/package_data/settings/etl_full.yml index 6cf87fb65f..e45eb82ebb 100644 --- a/src/pudl/package_data/settings/etl_full.yml +++ b/src/pudl/package_data/settings/etl_full.yml @@ -298,4 +298,5 @@ datasets: 2019, 2020, 2021, + 2022, ] diff --git a/src/pudl/resources.py b/src/pudl/resources.py index 9597287d72..13d2a50471 100644 --- a/src/pudl/resources.py +++ b/src/pudl/resources.py @@ -1,9 +1,10 @@ """Collection of Dagster resources for PUDL.""" + from dagster import Field, resource -from pudl.helpers import EnvVar from pudl.settings import DatasetsSettings, FercToSqliteSettings, create_dagster_config from pudl.workspace.datastore import Datastore +from pudl.workspace.setup import PudlPaths @resource(config_schema=create_dagster_config(DatasetsSettings())) @@ -28,13 +29,6 @@ def ferc_to_sqlite_settings(init_context) -> FercToSqliteSettings: @resource( config_schema={ - "local_cache_path": Field( - EnvVar( - env_var="PUDL_INPUT", - ), - description="Path to local cache of raw data.", - default_value=None, - ), "gcs_cache_path": Field( str, description="Load datastore resources from Google Cloud Storage.", @@ -45,19 +39,15 @@ def ferc_to_sqlite_settings(init_context) -> FercToSqliteSettings: description="If enabled, the local file cache for datastore will be used.", default_value=True, ), - "sandbox": Field( - bool, - description="Use the Zenodo sandbox rather than production", - default_value=False, - ), }, ) def datastore(init_context) -> Datastore: """Dagster resource to interact with Zenodo archives.""" ds_kwargs = {} ds_kwargs["gcs_cache_path"] = init_context.resource_config["gcs_cache_path"] - ds_kwargs["sandbox"] = init_context.resource_config["sandbox"] if init_context.resource_config["use_local_cache"]: - ds_kwargs["local_cache_path"] = init_context.resource_config["local_cache_path"] + # TODO(rousik): we could also just use PudlPaths().input_dir here, because + # it should be initialized to the right values. + ds_kwargs["local_cache_path"] = PudlPaths().input_dir return Datastore(**ds_kwargs) diff --git a/src/pudl/settings.py b/src/pudl/settings.py index a7665f0a35..a1a3dd283a 100644 --- a/src/pudl/settings.py +++ b/src/pudl/settings.py @@ -220,10 +220,10 @@ def check_eia860m_date(cls, eia860m: bool) -> bool: # noqa: N805 expected_year = max(cls.data_source.working_partitions["years"]) + 1 if eia860m and (eia860m_year != expected_year): raise AssertionError( - """Attempting to integrate an eia860m year """ - f"""({eia860m_year}) from {cls.eia860m_date} not immediately following """ - f"""the eia860 years: {cls.data_source.working_partitions["years"]}. """ - """Consider switching eia860m parameter to False.""" + f"Attempting to integrate an eia860m year " + f"({eia860m_year}) from {cls.eia860m_date} not immediately following" + f"the eia860 years: {cls.data_source.working_partitions['years']}. " + f"Consider switching eia860m parameter to False." ) return eia860m @@ -355,10 +355,10 @@ def make_datasources_table(self, ds: Datastore) -> pd.DataFrame: * the ETL settings (for partitions that are used in the ETL) * the DataSource info (which is stored within the ETL settings) - The ETL settings and the datastore have different levels of nesting - and therefor - names for datasets. The nesting happens particularly with the EIA data. There - are three EIA datasets right now - eia923, eia860 and eia860m. eia860m is a monthly - update of a few tables in the larger eia860 dataset. + The ETL settings and the datastore have different levels of nesting - and + therefore names for datasets. The nesting happens particularly with the EI + data. There are three EIA datasets right now eia923, eia860 and eia860m. + eia860m is a monthly update of a few tables in the larger eia860 dataset. Args: ds: An initalized PUDL Datastore from which the DOI's for each raw input @@ -580,7 +580,8 @@ class FercToSqliteSettings(BaseSettings): Args: ferc1_dbf_to_sqlite_settings: Settings for converting FERC 1 DBF data to SQLite. - ferc1_xbrl_to_sqlite_settings: Settings for converting FERC 1 XBRL data to SQLite. + ferc1_xbrl_to_sqlite_settings: Settings for converting FERC 1 XBRL data to + SQLite. other_xbrl_forms: List of non-FERC1 forms to convert from XBRL to SQLite. """ @@ -652,9 +653,6 @@ class EtlSettings(BaseSettings): description: str = None version: str = None - pudl_in: str = pudl.workspace.setup.get_defaults()["pudl_in"] - pudl_out: str = pudl.workspace.setup.get_defaults()["pudl_out"] - # This is list of fsspec compatible paths to publish the output datasets to. publish_destinations: list[str] = [] diff --git a/src/pudl/transform/eia.py b/src/pudl/transform/eia.py index 0bd61e8202..a551db3339 100644 --- a/src/pudl/transform/eia.py +++ b/src/pudl/transform/eia.py @@ -588,10 +588,10 @@ def harvest_entity_tables( # noqa: C901 ins={ table_name: AssetIn() for table_name in [ - "clean_boiler_generator_assn_eia860", - "clean_generation_eia923", - "clean_generators_eia860", - "clean_boiler_fuel_eia923", + "_core_eia860__boiler_generator_assn", + "_core_eia923__generation", + "_core_eia860__generators", + "_core_eia923__boiler_fuel", ] }, config_schema={ @@ -658,7 +658,7 @@ def boiler_generator_assn_eia860(context, **clean_dfs) -> pd.DataFrame: # noqa: logger.debug(f"{clean_dfs.keys()=}") # grab the generation_eia923 table, group annually, generate a new tag - gen_eia923 = clean_dfs["clean_generation_eia923"] + gen_eia923 = clean_dfs["_core_eia923__generation"] gen_eia923 = ( gen_eia923.set_index(pd.DatetimeIndex(gen_eia923.report_date)) .groupby([pd.Grouper(freq="AS"), "plant_id_eia", "generator_id"]) @@ -670,7 +670,7 @@ def boiler_generator_assn_eia860(context, **clean_dfs) -> pd.DataFrame: # noqa: # compile all of the generators gens = pd.merge( gen_eia923, - clean_dfs["clean_generators_eia860"], + clean_dfs["_core_eia860__generators"], on=["plant_id_eia", "report_date", "generator_id"], how="outer", ) @@ -690,7 +690,7 @@ def boiler_generator_assn_eia860(context, **clean_dfs) -> pd.DataFrame: # noqa: # background bga_compiled_1 = pd.merge( gens, - clean_dfs["clean_boiler_generator_assn_eia860"], + clean_dfs["_core_eia860__boiler_generator_assn"], on=["plant_id_eia", "generator_id", "report_date"], how="outer", ) @@ -707,7 +707,7 @@ def boiler_generator_assn_eia860(context, **clean_dfs) -> pd.DataFrame: # noqa: # apear in gens9 or gens8 (must uncomment-out the og_tag creation above) # bga_compiled_1[bga_compiled_1['og_tag'].isnull()] - bf_eia923 = clean_dfs["clean_boiler_fuel_eia923"].assign( + bf_eia923 = clean_dfs["_core_eia923__boiler_fuel"].assign( total_heat_content_mmbtu=lambda x: x.fuel_consumed_units * x.fuel_mmbtu_per_unit ) bf_eia923 = ( @@ -1138,22 +1138,22 @@ def harvested_entity_asset_factory( ) -> AssetsDefinition: """Create an asset definition for the harvested entity tables.""" harvestable_assets = ( - "clean_boiler_fuel_eia923", - "clean_boiler_generator_assn_eia860", - "clean_boilers_eia860", - "clean_coalmine_eia923", - "clean_fuel_receipts_costs_eia923", - "clean_generation_eia923", - "clean_generation_fuel_eia923", - "clean_generation_fuel_nuclear_eia923", - "clean_generators_eia860", - "clean_ownership_eia860", - "clean_plants_eia860", - "clean_utilities_eia860", - "clean_emissions_control_equipment_eia860", - "clean_boiler_emissions_control_equipment_assn_eia860", - "clean_boiler_cooling_assn_eia860", - "clean_boiler_stack_flue_assn_eia860", + "_core_eia923__boiler_fuel", + "_core_eia860__boiler_generator_assn", + "_core_eia860__boilers", + "_core_eia923__coalmine", + "_core_eia923__fuel_receipts_costs", + "_core_eia923__generation", + "_core_eia923__generation_fuel", + "_core_eia923__generation_fuel_nuclear", + "_core_eia860__generators", + "_core_eia860__ownership", + "_core_eia860__plants", + "_core_eia860__utilities", + "_core_eia860__emissions_control_equipment", + "_core_eia860__boiler_emissions_control_equipment_assn", + "_core_eia860__boiler_cooling_assn", + "_core_eia860__boiler_stack_flue_assn", ) @multi_asset( @@ -1203,16 +1203,21 @@ def finished_eia_asset_factory( table_name: str, io_manager_key: str | None = None ) -> AssetsDefinition: """An asset factory for finished EIA tables.""" - clean_table_name = "clean_" + table_name + # TODO (bendnorman): Create a more graceful function for parsing table name + table_name_parts = table_name.split("_") + dataset = table_name_parts[-1] + table_name_no_dataset = "_".join(table_name_parts[:-1]) + + _core_table_name = f"_core_{dataset}__{table_name_no_dataset}" @asset( - ins={clean_table_name: AssetIn()}, + ins={_core_table_name: AssetIn()}, name=table_name, io_manager_key=io_manager_key, ) def finished_eia_asset(**kwargs) -> pd.DataFrame: """Enforce PUDL DB schema on a cleaned EIA dataframe.""" - df = convert_cols_dtypes(kwargs[clean_table_name], data_source="eia") + df = convert_cols_dtypes(kwargs[_core_table_name], data_source="eia") res = Package.from_resource_ids().get_resource(table_name) return res.enforce_schema(df) diff --git a/src/pudl/transform/eia860.py b/src/pudl/transform/eia860.py index ce58f65cae..782041f3a1 100644 --- a/src/pudl/transform/eia860.py +++ b/src/pudl/transform/eia860.py @@ -15,7 +15,7 @@ @asset -def clean_ownership_eia860(raw_ownership_eia860: pd.DataFrame) -> pd.DataFrame: +def _core_eia860__ownership(raw_eia860__ownership: pd.DataFrame) -> pd.DataFrame: """Pull and transform the ownership table. Transformations include: @@ -25,14 +25,14 @@ def clean_ownership_eia860(raw_ownership_eia860: pd.DataFrame) -> pd.DataFrame: reporting. Args: - raw_ownership_eia860: The raw ``ownership_eia860`` dataframe. + raw_eia860__ownership: The raw ``ownership_eia860`` dataframe. Returns: Cleaned ``ownership_eia860`` dataframe ready for harvesting. """ # Preiminary clean and get rid of unecessary 'year' column own_df = ( - raw_ownership_eia860.copy() + raw_eia860__ownership.copy() .pipe(pudl.helpers.fix_eia_na) .pipe(pudl.helpers.convert_to_date) .drop(columns=["year"]) @@ -179,11 +179,11 @@ def clean_ownership_eia860(raw_ownership_eia860: pd.DataFrame) -> pd.DataFrame: @asset -def clean_generators_eia860( - raw_generator_proposed_eia860: pd.DataFrame, - raw_generator_existing_eia860: pd.DataFrame, - raw_generator_retired_eia860: pd.DataFrame, - raw_generator_eia860: pd.DataFrame, +def _core_eia860__generators( + raw_eia860__generator_proposed: pd.DataFrame, + raw_eia860__generator_existing: pd.DataFrame, + raw_eia860__generator_retired: pd.DataFrame, + raw_eia860__generator: pd.DataFrame, ) -> pd.DataFrame: """Pull and transform the generators table. @@ -207,10 +207,10 @@ def clean_generators_eia860( clean, distinguishable categories. Args: - raw_generator_proposed_eia860: The raw ``raw_generator_proposed_eia860`` dataframe. - raw_generator_existing_eia860: The raw ``raw_generator_existing_eia860`` dataframe. - raw_generator_retired_eia860: The raw ``raw_generator_retired_eia860`` dataframe. - raw_generator_eia860: The raw ``raw_generator_eia860`` dataframe. + raw_eia860__generator_proposed: The raw ``raw_eia860__generator_proposed`` dataframe. + raw_eia860__generator_existing: The raw ``raw_eia860__generator_existing`` dataframe. + raw_eia860__generator_retired: The raw ``raw_eia860__generator_retired`` dataframe. + raw_eia860__generator: The raw ``raw_eia860__generator`` dataframe. Returns: Cleaned ``generators_eia860`` dataframe ready for harvesting. @@ -223,10 +223,10 @@ def clean_generators_eia860( # them all together into a single big table, with a column that indicates # which one of these tables the data came from, since they all have almost # exactly the same structure - gp_df = raw_generator_proposed_eia860 - ge_df = raw_generator_existing_eia860 - gr_df = raw_generator_retired_eia860 - g_df = raw_generator_eia860 + gp_df = raw_eia860__generator_proposed + ge_df = raw_eia860__generator_existing + gr_df = raw_eia860__generator_retired + g_df = raw_eia860__generator # the retired tab of eia860 does not have a operational_status_code column. # we still want these gens to have a code (and subsequently a # operational_status). We could do this by fillna w/ the retirement_date, but @@ -358,7 +358,7 @@ def clean_generators_eia860( @asset -def clean_plants_eia860(raw_plant_eia860: pd.DataFrame) -> pd.DataFrame: +def _core_eia860__plants(raw_eia860__plant: pd.DataFrame) -> pd.DataFrame: """Pull and transform the plants table. Much of the static plant information is reported repeatedly, and scattered across @@ -373,14 +373,14 @@ def clean_plants_eia860(raw_plant_eia860: pd.DataFrame) -> pd.DataFrame: * Convert Y/N/X values to boolean True/False. Args: - raw_plant_eia860: The raw ``raw_plant_eia860`` dataframe. + raw_eia860__plant: The raw ``raw_eia860__plant`` dataframe. Returns: Cleaned ``plants_eia860`` dataframe ready for harvesting. """ # Populating the 'plants_eia860' table p_df = ( - raw_plant_eia860.pipe(pudl.helpers.fix_eia_na) + raw_eia860__plant.pipe(pudl.helpers.fix_eia_na) .astype({"zip_code": str}) .drop("iso_rto", axis="columns") ) @@ -443,8 +443,8 @@ def clean_plants_eia860(raw_plant_eia860: pd.DataFrame) -> pd.DataFrame: @asset -def clean_boiler_generator_assn_eia860( - raw_boiler_generator_assn_eia860: pd.DataFrame, +def _core_eia860__boiler_generator_assn( + raw_eia860__boiler_generator_assn: pd.DataFrame, ) -> pd.DataFrame: """Pull and transform the boilder generator association table. @@ -454,7 +454,7 @@ def clean_boiler_generator_assn_eia860( * Drop duplicate rows. Args: - raw_boiler_generator_assn_eia860 (df): Each entry in this dictionary of DataFrame objects + raw_eia860__boiler_generator_assn (df): Each entry in this dictionary of DataFrame objects corresponds to a page from the EIA860 form, as reported in the Excel spreadsheets they distribute. @@ -462,7 +462,7 @@ def clean_boiler_generator_assn_eia860( Cleaned ``boiler_generator_assn_eia860`` dataframe ready for harvesting. """ # Populating the 'generators_eia860' table - b_g_df = raw_boiler_generator_assn_eia860 + b_g_df = raw_eia860__boiler_generator_assn b_g_df = pudl.helpers.convert_to_date(b_g_df) b_g_df = pudl.helpers.convert_cols_dtypes(df=b_g_df, data_source="eia") @@ -478,7 +478,7 @@ def clean_boiler_generator_assn_eia860( @asset -def clean_utilities_eia860(raw_utility_eia860: pd.DataFrame) -> pd.DataFrame: +def _core_eia860__utilities(raw_eia860__utility: pd.DataFrame) -> pd.DataFrame: """Pull and transform the utilities table. Transformations include: @@ -492,13 +492,13 @@ def clean_utilities_eia860(raw_utility_eia860: pd.DataFrame) -> pd.DataFrame: * Map full spelling onto code values. Args: - raw_utility_eia860: The raw ``raw_utility_eia860`` dataframe. + raw_eia860__utility: The raw ``raw_eia860__utility`` dataframe. Returns: Cleaned ``utilities_eia860`` dataframe ready for harvesting. """ # Populating the 'utilities_eia860' table - u_df = raw_utility_eia860 + u_df = raw_eia860__utility # Replace empty strings, whitespace, and '.' fields with real NA values u_df = pudl.helpers.fix_eia_na(u_df) @@ -564,8 +564,8 @@ def _make_phone_number(col1, col2, col3): @asset -def clean_boilers_eia860( - raw_emission_control_strategies_eia860, raw_boiler_info_eia860 +def _core_eia860__boilers( + raw_eia860__emission_control_strategies, raw_eia860__boiler_info ): """Pull and transform the boilers table. @@ -579,17 +579,17 @@ def clean_boilers_eia860( reporting. Args: - raw_emission_control_strategies_eia860 (pandas.DataFrame): + raw_eia860__emission_control_strategies (pandas.DataFrame): DataFrame extracted from EIA forms earlier in the ETL process. - raw_boiler_info_eia860 (pandas.DataFrame): + raw_eia860__boiler_info (pandas.DataFrame): DataFrame extracted from EIA forms earlier in the ETL process. Returns: pandas.DataFrame: the transformed boilers table """ # Populating the 'boilers_eia860' table - b_df = raw_boiler_info_eia860 - ecs = raw_emission_control_strategies_eia860 + b_df = raw_eia860__boiler_info + ecs = raw_eia860__emission_control_strategies # Combine and replace empty strings, whitespace, and '.' fields with real NA values @@ -765,12 +765,12 @@ def clean_boilers_eia860( @asset -def clean_emissions_control_equipment_eia860( - raw_emissions_control_equipment_eia860: pd.DataFrame, +def _core_eia860__emissions_control_equipment( + raw_eia860__emissions_control_equipment: pd.DataFrame, ) -> pd.DataFrame: """Pull and transform the emissions control equipment table.""" # Replace empty strings, whitespace, and '.' fields with real NA values - emce_df = pudl.helpers.fix_eia_na(raw_emissions_control_equipment_eia860) + emce_df = pudl.helpers.fix_eia_na(raw_eia860__emissions_control_equipment) # Spot fix bad months emce_df["operating_month"] = emce_df["operating_month"].replace({"88": "8"}) @@ -889,26 +889,26 @@ def clean_emissions_control_equipment_eia860( @asset -def clean_boiler_emissions_control_equipment_assn_eia860( - raw_boiler_so2_eia860: pd.DataFrame, - raw_boiler_mercury_eia860: pd.DataFrame, - raw_boiler_nox_eia860: pd.DataFrame, - raw_boiler_particulate_eia860: pd.DataFrame, +def _core_eia860__boiler_emissions_control_equipment_assn( + raw_eia860__boiler_so2: pd.DataFrame, + raw_eia860__boiler_mercury: pd.DataFrame, + raw_eia860__boiler_nox: pd.DataFrame, + raw_eia860__boiler_particulate: pd.DataFrame, ) -> pd.DataFrame: """Pull and transform the emissions control <> boiler ID link tables. Args: - raw_boiler_so2_eia860: Raw EIA 860 boiler to SO2 emission control equipment + raw_eia860__boiler_so2: Raw EIA 860 boiler to SO2 emission control equipment association table. - raw_boiler_mercury_eia860: Raw EIA 860 boiler to mercury emission control + raw_eia860__boiler_mercury: Raw EIA 860 boiler to mercury emission control equipment association table. - raw_boiler_nox_eia860: Raw EIA 860 boiler to nox emission control equipment + raw_eia860__boiler_nox: Raw EIA 860 boiler to nox emission control equipment association table. - raw_boiler_particulate_eia860: Raw EIA 860 boiler to particulate emission + raw_eia860__boiler_particulate: Raw EIA 860 boiler to particulate emission control equipment association table. - raw_boiler_cooling_eia860: Raw EIA 860 boiler to cooling equipment association + raw_eia860__boiler_cooling: Raw EIA 860 boiler to cooling equipment association table. - raw_boiler_stack_flue_eia860: Raw EIA 860 boiler to stack flue equipment + raw_eia860__boiler_stack_flue: Raw EIA 860 boiler to stack flue equipment association table. Returns: @@ -916,10 +916,10 @@ def clean_boiler_emissions_control_equipment_assn_eia860( tables. """ raw_tables = [ - raw_boiler_so2_eia860, - raw_boiler_mercury_eia860, - raw_boiler_nox_eia860, - raw_boiler_particulate_eia860, + raw_eia860__boiler_so2, + raw_eia860__boiler_mercury, + raw_eia860__boiler_nox, + raw_eia860__boiler_particulate, ] bece_df = pd.DataFrame({}) @@ -969,20 +969,20 @@ def clean_boiler_emissions_control_equipment_assn_eia860( @asset -def clean_boiler_cooling_assn_eia860( - raw_boiler_cooling_eia860: pd.DataFrame, +def _core_eia860__boiler_cooling_assn( + raw_eia860__boiler_cooling: pd.DataFrame, ) -> pd.DataFrame: """Pull and transform the EIA 860 boiler to cooler ID table. Args: - raw_boiler_cooling_eia860: Raw EIA 860 boiler to cooler ID association table. + raw_eia860__boiler_cooling: Raw EIA 860 boiler to cooler ID association table. Returns: pd.DataFrame: A cleaned and normalized version of the EIA boiler to cooler ID table. """ # Replace empty strings, whitespace, and '.' fields with real NA values - bc_assn = pudl.helpers.fix_eia_na(raw_boiler_cooling_eia860) + bc_assn = pudl.helpers.fix_eia_na(raw_eia860__boiler_cooling) # Replace the report year col with a report date col for the harvesting process bc_assn = pudl.helpers.convert_to_date( df=bc_assn, year_col="report_year", date_col="report_date" @@ -994,13 +994,13 @@ def clean_boiler_cooling_assn_eia860( @asset -def clean_boiler_stack_flue_assn_eia860( - raw_boiler_stack_flue_eia860: pd.DataFrame, +def _core_eia860__boiler_stack_flue_assn( + raw_eia860__boiler_stack_flue: pd.DataFrame, ) -> pd.DataFrame: """Pull and transform the EIA 860 boiler to stack flue ID table. Args: - raw_boiler_stack_flue_eia860: Raw EIA 860 boiler to stack flue ID association + raw_eia860__boiler_stack_flue: Raw EIA 860 boiler to stack flue ID association table. Returns: @@ -1008,7 +1008,7 @@ def clean_boiler_stack_flue_assn_eia860( ID table. """ # Replace empty strings, whitespace, and '.' fields with real NA values - bsf_assn = pudl.helpers.fix_eia_na(raw_boiler_stack_flue_eia860) + bsf_assn = pudl.helpers.fix_eia_na(raw_eia860__boiler_stack_flue) # Replace the report year col with a report date col for the harvesting process bsf_assn = pudl.helpers.convert_to_date( df=bsf_assn, year_col="report_year", date_col="report_date" diff --git a/src/pudl/transform/eia861.py b/src/pudl/transform/eia861.py index d6e35424f3..afa5178dd0 100644 --- a/src/pudl/transform/eia861.py +++ b/src/pudl/transform/eia861.py @@ -1030,7 +1030,7 @@ def _harvest_associations(dfs: list[pd.DataFrame], cols: list[str]) -> pd.DataFr ############################################################################### @asset(io_manager_key="pudl_sqlite_io_manager") def service_territory_eia861( - raw_service_territory_eia861: pd.DataFrame, + raw_eia861__service_territory: pd.DataFrame, ) -> pd.DataFrame: """Transform the EIA 861 utility service territory table. @@ -1040,12 +1040,12 @@ def service_territory_eia861( * Add field for state/county FIPS code. Args: - raw_service_territory_eia861: Raw EIA-861 utility service territory dataframe. + raw_eia861__service_territory: Raw EIA-861 utility service territory dataframe. Returns: The cleaned utility service territory dataframe. """ - df = _pre_process(raw_service_territory_eia861) + df = _pre_process(raw_eia861__service_territory) # A little WV county sandwiched between OH & PA, got miscategorized a few times: df.loc[(df.state == "OH") & (df.county == "Brooke"), "state"] = "WV" df = ( @@ -1084,7 +1084,7 @@ def service_territory_eia861( @asset def clean_balancing_authority_eia861( - raw_balancing_authority_eia861: pd.DataFrame, + raw_eia861__balancing_authority: pd.DataFrame, ) -> pd.DataFrame: """Transform the EIA 861 Balancing Authority table. @@ -1100,7 +1100,7 @@ def clean_balancing_authority_eia861( # * Backfill BA codes on a per BA ID basis # * Fix data entry errors df = ( - _pre_process(raw_balancing_authority_eia861) + _pre_process(raw_eia861__balancing_authority) .pipe(apply_pudl_dtypes, "eia") .set_index(["report_date", "balancing_authority_name_eia", "utility_id_eia"]) ) @@ -1142,7 +1142,7 @@ def clean_balancing_authority_eia861( @asset(io_manager_key="pudl_sqlite_io_manager") -def sales_eia861(raw_sales_eia861: pd.DataFrame) -> pd.DataFrame: +def sales_eia861(raw_eia861__sales: pd.DataFrame) -> pd.DataFrame: """Transform the EIA 861 Sales table. Transformations include: @@ -1164,7 +1164,7 @@ def sales_eia861(raw_sales_eia861: pd.DataFrame) -> pd.DataFrame: ] # Pre-tidy clean specific to sales table - raw_sales = _pre_process(raw_sales_eia861).query( + raw_sales = _pre_process(raw_eia861__sales).query( "utility_id_eia not in (88888, 99999)" ) @@ -1226,7 +1226,7 @@ def sales_eia861(raw_sales_eia861: pd.DataFrame) -> pd.DataFrame: @asset(io_manager_key="pudl_sqlite_io_manager") def advanced_metering_infrastructure_eia861( - raw_advanced_metering_infrastructure_eia861: pd.DataFrame, + raw_eia861__advanced_metering_infrastructure: pd.DataFrame, ) -> pd.DataFrame: """Transform the EIA 861 Advanced Metering Infrastructure table. @@ -1246,7 +1246,7 @@ def advanced_metering_infrastructure_eia861( ########################################################################### logger.info("Tidying the EIA 861 Advanced Metering Infrastructure table.") tidy_ami, idx_cols = _tidy_class_dfs( - _pre_process(raw_advanced_metering_infrastructure_eia861), + _pre_process(raw_eia861__advanced_metering_infrastructure), df_name="Advanced Metering Infrastructure", idx_cols=idx_cols, class_list=CUSTOMER_CLASSES, @@ -1276,7 +1276,7 @@ def advanced_metering_infrastructure_eia861( ), }, ) -def demand_response_eia861(raw_demand_response_eia861: pd.DataFrame): +def demand_response_eia861(raw_eia861__demand_response: pd.DataFrame): """Transform the EIA 861 Demand Response table. Transformations include: @@ -1294,7 +1294,7 @@ def demand_response_eia861(raw_demand_response_eia861: pd.DataFrame): "report_date", ] - raw_dr = _pre_process(raw_demand_response_eia861) + raw_dr = _pre_process(raw_eia861__demand_response) # fill na BA values with 'UNK' raw_dr["balancing_authority_code_eia"] = raw_dr[ "balancing_authority_code_eia" @@ -1365,7 +1365,7 @@ def demand_response_eia861(raw_demand_response_eia861: pd.DataFrame): }, ) def demand_side_management_eia861( - raw_demand_side_management_eia861: pd.DataFrame, + raw_eia861__demand_side_management: pd.DataFrame, ): """Transform the EIA 861 Demand Side Management table. @@ -1427,7 +1427,7 @@ def demand_side_management_eia861( # * Drop data_status and demand_side_management cols (they don't contain anything) ########################################################################### transformed_dsm1 = ( - clean_nerc(_pre_process(raw_demand_side_management_eia861), idx_cols) + clean_nerc(_pre_process(raw_eia861__demand_side_management), idx_cols) .drop(columns=["demand_side_management", "data_status"]) .query("utility_id_eia not in [88888]") ) @@ -1531,7 +1531,7 @@ def demand_side_management_eia861( }, ) def distributed_generation_eia861( - raw_distributed_generation_eia861: pd.DataFrame, + raw_eia861__distributed_generation: pd.DataFrame, ): """Transform the EIA 861 Distributed Generation table. @@ -1596,7 +1596,7 @@ def distributed_generation_eia861( ] # Pre-tidy transform: set estimated or actual A/E values to 'Acutal'/'Estimated' - raw_dg = _pre_process(raw_distributed_generation_eia861).assign( + raw_dg = _pre_process(raw_eia861__distributed_generation).assign( estimated_or_actual_capacity_data=lambda x: ( x.estimated_or_actual_capacity_data.map(ESTIMATED_OR_ACTUAL) ), @@ -1710,14 +1710,14 @@ def distributed_generation_eia861( @asset(io_manager_key="pudl_sqlite_io_manager") def distribution_systems_eia861( - raw_distribution_systems_eia861: pd.DataFrame, + raw_eia861__distribution_systems: pd.DataFrame, ) -> pd.DataFrame: """Transform the EIA 861 Distribution Systems table. * No additional transformations. """ df = ( - _pre_process(raw_distribution_systems_eia861) + _pre_process(raw_eia861__distribution_systems) .assign(short_form=lambda x: _make_yn_bool(x.short_form)) # No duplicates to speak of but take measures to check just in case .pipe( @@ -1731,7 +1731,7 @@ def distribution_systems_eia861( @asset(io_manager_key="pudl_sqlite_io_manager") -def dynamic_pricing_eia861(raw_dynamic_pricing_eia861: pd.DataFrame) -> pd.DataFrame: +def dynamic_pricing_eia861(raw_eia861__dynamic_pricing: pd.DataFrame) -> pd.DataFrame: """Transform the EIA 861 Dynamic Pricing table. Transformations include: @@ -1755,7 +1755,7 @@ def dynamic_pricing_eia861(raw_dynamic_pricing_eia861: pd.DataFrame) -> pd.DataF ] raw_dp = _pre_process( - raw_dynamic_pricing_eia861.query("utility_id_eia not in [88888]").assign( + raw_eia861__dynamic_pricing.query("utility_id_eia not in [88888]").assign( short_form=lambda x: _make_yn_bool(x.short_form) ) ) @@ -1794,7 +1794,7 @@ def dynamic_pricing_eia861(raw_dynamic_pricing_eia861: pd.DataFrame) -> pd.DataF @asset(io_manager_key="pudl_sqlite_io_manager") def energy_efficiency_eia861( - raw_energy_efficiency_eia861: pd.DataFrame, + raw_eia861__energy_efficiency: pd.DataFrame, ) -> pd.DataFrame: """Transform the EIA 861 Energy Efficiency table. @@ -1812,7 +1812,7 @@ def energy_efficiency_eia861( ] raw_ee = ( - _pre_process(raw_energy_efficiency_eia861).assign( + _pre_process(raw_eia861__energy_efficiency).assign( short_form=lambda x: _make_yn_bool(x.short_form) ) # No duplicates to speak of but take measures to check just in case @@ -1862,7 +1862,7 @@ def energy_efficiency_eia861( @asset(io_manager_key="pudl_sqlite_io_manager") def green_pricing_eia861( - raw_green_pricing_eia861: pd.DataFrame, + raw_eia861__green_pricing: pd.DataFrame, ) -> pd.DataFrame: """Transform the EIA 861 Green Pricing table. @@ -1882,7 +1882,7 @@ def green_pricing_eia861( ########################################################################### logger.info("Tidying the EIA 861 Green Pricing table.") tidy_gp, idx_cols = _tidy_class_dfs( - _pre_process(raw_green_pricing_eia861), + _pre_process(raw_eia861__green_pricing), df_name="Green Pricing", idx_cols=idx_cols, class_list=CUSTOMER_CLASSES, @@ -1905,11 +1905,11 @@ def green_pricing_eia861( @asset(io_manager_key="pudl_sqlite_io_manager") -def mergers_eia861(raw_mergers_eia861: pd.DataFrame) -> pd.DataFrame: +def mergers_eia861(raw_eia861__mergers: pd.DataFrame) -> pd.DataFrame: """Transform the EIA 861 Mergers table.""" # No duplicates to speak of but take measures to check just in case df = ( - _pre_process(raw_mergers_eia861) + _pre_process(raw_eia861__mergers) .pipe( _check_for_dupes, df_name="Mergers", @@ -1928,7 +1928,7 @@ def mergers_eia861(raw_mergers_eia861: pd.DataFrame) -> pd.DataFrame: "net_metering_misc_eia861": AssetOut(io_manager_key="pudl_sqlite_io_manager"), }, ) -def net_metering_eia861(raw_net_metering_eia861: pd.DataFrame): +def net_metering_eia861(raw_eia861__net_metering: pd.DataFrame): """Transform the EIA 861 Net Metering table. Transformations include: @@ -1948,7 +1948,7 @@ def net_metering_eia861(raw_net_metering_eia861: pd.DataFrame): # Pre-tidy clean specific to net_metering table raw_nm = ( - _pre_process(raw_net_metering_eia861) + _pre_process(raw_eia861__net_metering) .query("utility_id_eia not in [99999]") .assign(short_form=lambda x: _make_yn_bool(x.short_form)) ) @@ -2014,7 +2014,7 @@ def net_metering_eia861(raw_net_metering_eia861: pd.DataFrame): ), }, ) -def non_net_metering_eia861(raw_non_net_metering_eia861: pd.DataFrame): +def non_net_metering_eia861(raw_eia861__non_net_metering: pd.DataFrame): """Transform the EIA 861 Non-Net Metering table. Transformations include: @@ -2039,7 +2039,7 @@ def non_net_metering_eia861(raw_non_net_metering_eia861: pd.DataFrame): ] # Pre-tidy clean specific to non_net_metering table - raw_nnm = _pre_process(raw_non_net_metering_eia861).query( + raw_nnm = _pre_process(raw_eia861__non_net_metering).query( "utility_id_eia not in '99999'" ) @@ -2124,7 +2124,7 @@ def non_net_metering_eia861(raw_non_net_metering_eia861: pd.DataFrame): ), }, ) -def operational_data_eia861(raw_operational_data_eia861: pd.DataFrame): +def operational_data_eia861(raw_eia861__operational_data: pd.DataFrame): """Transform the EIA 861 Operational Data table. Transformations include: @@ -2144,7 +2144,7 @@ def operational_data_eia861(raw_operational_data_eia861: pd.DataFrame): ] # Pre-tidy clean specific to operational data table - raw_od = _pre_process(raw_operational_data_eia861) + raw_od = _pre_process(raw_eia861__operational_data) raw_od = raw_od[ (raw_od["utility_id_eia"] != 88888) & (raw_od["utility_id_eia"].notnull()) ] @@ -2207,7 +2207,7 @@ def operational_data_eia861(raw_operational_data_eia861: pd.DataFrame): @asset(io_manager_key="pudl_sqlite_io_manager") -def reliability_eia861(raw_reliability_eia861: pd.DataFrame) -> pd.DataFrame: +def reliability_eia861(raw_eia861__reliability: pd.DataFrame) -> pd.DataFrame: """Transform the EIA 861 Reliability table. Transformations include: @@ -2227,7 +2227,7 @@ def reliability_eia861(raw_reliability_eia861: pd.DataFrame) -> pd.DataFrame: # wide-to-tall by standards tidy_r, idx_cols = _tidy_class_dfs( - df=_pre_process(raw_reliability_eia861), + df=_pre_process(raw_eia861__reliability), df_name="Reliability", idx_cols=idx_cols, class_list=RELIABILITY_STANDARDS, @@ -2277,7 +2277,7 @@ def reliability_eia861(raw_reliability_eia861: pd.DataFrame) -> pd.DataFrame: "utility_data_misc_eia861": AssetOut(io_manager_key="pudl_sqlite_io_manager"), }, ) -def utility_data_eia861(raw_utility_data_eia861: pd.DataFrame): +def utility_data_eia861(raw_eia861__utility_data: pd.DataFrame): """Transform the EIA 861 Utility Data table. Transformations include: @@ -2291,7 +2291,7 @@ def utility_data_eia861(raw_utility_data_eia861: pd.DataFrame): idx_cols = ["utility_id_eia", "state", "report_date", "nerc_region"] # Pre-tidy clean specific to operational data table - raw_ud = _pre_process(raw_utility_data_eia861).query( + raw_ud = _pre_process(raw_eia861__utility_data).query( "utility_id_eia not in [88888]" ) diff --git a/src/pudl/transform/eia923.py b/src/pudl/transform/eia923.py index 106bfd39c9..35fa349782 100644 --- a/src/pudl/transform/eia923.py +++ b/src/pudl/transform/eia923.py @@ -592,11 +592,11 @@ def gen_fuel_nuclear(gen_fuel_nuke: pd.DataFrame) -> pd.DataFrame: @multi_asset( outs={ - "clean_generation_fuel_eia923": AssetOut(), - "clean_generation_fuel_nuclear_eia923": AssetOut(), + "_core_eia923__generation_fuel": AssetOut(), + "_core_eia923__generation_fuel_nuclear": AssetOut(), }, ) -def clean_generation_fuel_eia923(raw_generation_fuel_eia923: pd.DataFrame): +def _core_eia_923__generation_fuel_eia923(raw_eia923__generation_fuel: pd.DataFrame): """Transforms the generation_fuel_eia923 table. Transformations include: @@ -613,14 +613,14 @@ def clean_generation_fuel_eia923(raw_generation_fuel_eia923: pd.DataFrame): * Aggregate records with duplicate natural keys. Args: - raw_generation_fuel_eia923: The raw ``raw_generation_fuel_eia923`` dataframe. + raw_eia923__generation_fuel: The raw ``raw_eia923__generation_fuel`` dataframe. Returns: - clean_generation_fuel_eia923: Cleaned ``generation_fuel_eia923`` dataframe ready for harvesting. - clean_generation_fuel_nuclear_eia923: Cleaned ``generation_fuel_nuclear_eia923`` dataframe ready for harvesting. + _core_eia923__generation_fuel: Cleaned ``generation_fuel_eia923`` dataframe ready for harvesting. + _core_eia923__generation_fuel_nuclear: Cleaned ``generation_fuel_nuclear_eia923`` dataframe ready for harvesting. """ # This needs to be a copy of what we're passed in so we can edit it. - gen_fuel = raw_generation_fuel_eia923 + gen_fuel = raw_eia923__generation_fuel # Drop fields we're not inserting into the generation_fuel_eia923 table. cols_to_drop = [ @@ -709,8 +709,10 @@ def clean_generation_fuel_eia923(raw_generation_fuel_eia923: pd.DataFrame): gen_fuel = _aggregate_generation_fuel_duplicates(gen_fuel) return ( - Output(output_name="clean_generation_fuel_eia923", value=gen_fuel), - Output(output_name="clean_generation_fuel_nuclear_eia923", value=gen_fuel_nuke), + Output(output_name="_core_eia923__generation_fuel", value=gen_fuel), + Output( + output_name="_core_eia923__generation_fuel_nuclear", value=gen_fuel_nuke + ), ) @@ -813,7 +815,7 @@ def _aggregate_duplicate_boiler_fuel_keys(boiler_fuel_df: pd.DataFrame) -> pd.Da @asset -def clean_boiler_fuel_eia923(raw_boiler_fuel_eia923: pd.DataFrame) -> pd.DataFrame: +def _core_eia923__boiler_fuel(raw_eia923__boiler_fuel: pd.DataFrame) -> pd.DataFrame: """Transforms the boiler_fuel_eia923 table. Transformations include: @@ -826,12 +828,12 @@ def clean_boiler_fuel_eia923(raw_boiler_fuel_eia923: pd.DataFrame) -> pd.DataFra * Combine year and month columns into a single date column. Args: - raw_boiler_fuel_eia923: The raw ``raw_boiler_fuel_eia923`` dataframe. + raw_eia923__boiler_fuel: The raw ``raw_eia923__boiler_fuel`` dataframe. Returns: Cleaned ``boiler_fuel_eia923`` dataframe ready for harvesting. """ - bf_df = raw_boiler_fuel_eia923 + bf_df = raw_eia923__boiler_fuel # Need to stop dropping fields that contain harvestable entity attributes. # See https://github.com/catalyst-cooperative/pudl/issues/509 @@ -935,7 +937,7 @@ def remove_duplicate_pks_boiler_fuel_eia923(bf: pd.DataFrame) -> pd.DataFrame: @asset -def clean_generation_eia923(raw_generator_eia923: pd.DataFrame) -> pd.DataFrame: +def _core_eia923__generation(raw_eia923__generator: pd.DataFrame) -> pd.DataFrame: """Transforms the generation_eia923 table. Transformations include: @@ -946,13 +948,13 @@ def clean_generation_eia923(raw_generator_eia923: pd.DataFrame) -> pd.DataFrame: * Drop generator-date row duplicates (all have no data). Args: - raw_generator_eia923: The raw ``raw_generator_eia923`` dataframe. + raw_eia923__generator: The raw ``raw_eia923__generator`` dataframe. Returns: Cleaned ``generation_eia923`` dataframe ready for harvesting. """ gen_df = ( - raw_generator_eia923.dropna(subset=["generator_id"]) + raw_eia923__generator.dropna(subset=["generator_id"]) .drop( [ "combined_heat_power", @@ -1001,7 +1003,9 @@ def clean_generation_eia923(raw_generator_eia923: pd.DataFrame) -> pd.DataFrame: @asset -def clean_coalmine_eia923(raw_fuel_receipts_costs_eia923: pd.DataFrame) -> pd.DataFrame: +def _core_eia923__coalmine( + raw_eia923__fuel_receipts_costs: pd.DataFrame, +) -> pd.DataFrame: """Transforms the coalmine_eia923 table. Transformations include: @@ -1010,7 +1014,7 @@ def clean_coalmine_eia923(raw_fuel_receipts_costs_eia923: pd.DataFrame) -> pd.Da * Drop duplicates with MSHA ID. Args: - raw_fuel_receipts_costs_eia923: raw precursor to the + raw_eia923__fuel_receipts_costs: raw precursor to the :ref:`fuel_receipts_costs_eia923` table. Returns: @@ -1029,7 +1033,7 @@ def clean_coalmine_eia923(raw_fuel_receipts_costs_eia923: pd.DataFrame) -> pd.Da # Make a copy so we don't alter the FRC data frame... which we'll need # to use again for populating the FRC table (see below) - cmi_df = raw_fuel_receipts_costs_eia923 + cmi_df = raw_eia923__fuel_receipts_costs # Keep only the columns listed above: cmi_df = _coalmine_cleanup(cmi_df) @@ -1080,8 +1084,8 @@ def clean_coalmine_eia923(raw_fuel_receipts_costs_eia923: pd.DataFrame) -> pd.Da @asset -def clean_fuel_receipts_costs_eia923( - raw_fuel_receipts_costs_eia923: pd.DataFrame, clean_coalmine_eia923: pd.DataFrame +def _core_eia923__fuel_receipts_costs( + raw_eia923__fuel_receipts_costs: pd.DataFrame, _core_eia923__coalmine: pd.DataFrame ) -> pd.DataFrame: """Transforms the fuel_receipts_costs_eia923 dataframe. @@ -1096,13 +1100,13 @@ def clean_fuel_receipts_costs_eia923( Fuel cost is reported in cents per mmbtu. Converts cents to dollars. Args: - raw_fuel_receipts_costs_eia923: The raw ``raw_fuel_receipts_costs_eia923`` dataframe. - clean_coalmine_eia923: The cleaned pre-harvest ``coalmine_eia923`` dataframe. + raw_eia923__fuel_receipts_costs: The raw ``raw_eia923__fuel_receipts_costs`` dataframe. + _core_eia923__coalmine: The cleaned pre-harvest ``coalmine_eia923`` dataframe. Returns: Cleaned ``fuel_receipts_costs_eia923`` dataframe ready for harvesting. """ - frc_df = raw_fuel_receipts_costs_eia923 + frc_df = raw_eia923__fuel_receipts_costs # Drop fields we're not inserting into the fuel_receipts_costs_eia923 # table. @@ -1122,7 +1126,7 @@ def clean_fuel_receipts_costs_eia923( ] cmi_df = ( - clean_coalmine_eia923 + _core_eia923__coalmine # In order for the merge to work, we need to get the county_id_fips # field back into ready-to-dump form... so it matches the types of the # county_id_fips field that we are going to be merging on in the diff --git a/src/pudl/transform/epacems.py b/src/pudl/transform/epacems.py index e752583547..e1c0bd656f 100644 --- a/src/pudl/transform/epacems.py +++ b/src/pudl/transform/epacems.py @@ -96,7 +96,7 @@ def convert_to_utc(df: pd.DataFrame, plant_utc_offset: pd.DataFrame) -> pd.DataF # `parse_dates=True`, is >10x faster. # Read the date as a datetime, so all the dates are midnight op_datetime_naive=lambda x: pd.to_datetime( - x.op_date, format=r"%m-%d-%Y", exact=True, cache=True + x.op_date, format=r"%Y-%m-%d", exact=True, cache=True ) + pd.to_timedelta(x.op_hour, unit="h") # Add the hour ).merge( diff --git a/src/pudl/transform/ferc1.py b/src/pudl/transform/ferc1.py index c3b186b307..ca1ab5b145 100644 --- a/src/pudl/transform/ferc1.py +++ b/src/pudl/transform/ferc1.py @@ -5261,9 +5261,15 @@ def ferc1_transform_asset_factory( dbf_tables = listify(TABLE_NAME_MAP_FERC1[table_name]["dbf"]) xbrl_tables = listify(TABLE_NAME_MAP_FERC1[table_name]["xbrl"]) - ins = {f"raw_dbf__{tn}": AssetIn(tn) for tn in dbf_tables} - ins |= {f"raw_xbrl_instant__{tn}": AssetIn(f"{tn}_instant") for tn in xbrl_tables} - ins |= {f"raw_xbrl_duration__{tn}": AssetIn(f"{tn}_duration") for tn in xbrl_tables} + ins = {f"raw_dbf__{tn}": AssetIn(f"raw_ferc1_dbf__{tn}") for tn in dbf_tables} + ins |= { + f"raw_xbrl_instant__{tn}": AssetIn(f"raw_ferc1_xbrl__{tn}_instant") + for tn in xbrl_tables + } + ins |= { + f"raw_xbrl_duration__{tn}": AssetIn(f"raw_ferc1_xbrl__{tn}_duration") + for tn in xbrl_tables + } ins["clean_xbrl_metadata_json"] = AssetIn("clean_xbrl_metadata_json") table_id = TableIdFerc1(table_name) @@ -5335,18 +5341,18 @@ def create_ferc1_transform_assets() -> list[AssetsDefinition]: @asset(io_manager_key="pudl_sqlite_io_manager") def plants_steam_ferc1( clean_xbrl_metadata_json: dict[str, dict[str, list[dict[str, Any]]]], - f1_steam: pd.DataFrame, - steam_electric_generating_plant_statistics_large_plants_402_duration: pd.DataFrame, - steam_electric_generating_plant_statistics_large_plants_402_instant: pd.DataFrame, + raw_ferc1_dbf__f1_steam: pd.DataFrame, + raw_ferc1_xbrl__steam_electric_generating_plant_statistics_large_plants_402_duration: pd.DataFrame, + raw_ferc1_xbrl__steam_electric_generating_plant_statistics_large_plants_402_instant: pd.DataFrame, fuel_ferc1: pd.DataFrame, ) -> pd.DataFrame: """Create the clean plants_steam_ferc1 table. Args: clean_xbrl_metadata_json: XBRL metadata json for all tables. - f1_steam: Raw f1_steam table. - steam_electric_generating_plant_statistics_large_plants_402_duration: raw XBRL duration table. - steam_electric_generating_plant_statistics_large_plants_402_instant: raw XBRL instant table. + raw_ferc1_dbf__f1_steam: Raw f1_steam table. + raw_ferc1_xbrl__steam_electric_generating_plant_statistics_large_plants_402_duration: raw XBRL duration table. + raw_ferc1_xbrl__steam_electric_generating_plant_statistics_large_plants_402_instant: raw XBRL instant table. fuel_ferc1: Transformed fuel_ferc1 table. Returns: @@ -5355,9 +5361,9 @@ def plants_steam_ferc1( df = PlantsSteamFerc1TableTransformer( xbrl_metadata_json=clean_xbrl_metadata_json["plants_steam_ferc1"] ).transform( - raw_dbf=f1_steam, - raw_xbrl_instant=steam_electric_generating_plant_statistics_large_plants_402_instant, - raw_xbrl_duration=steam_electric_generating_plant_statistics_large_plants_402_duration, + raw_dbf=raw_ferc1_dbf__f1_steam, + raw_xbrl_instant=raw_ferc1_xbrl__steam_electric_generating_plant_statistics_large_plants_402_instant, + raw_xbrl_duration=raw_ferc1_xbrl__steam_electric_generating_plant_statistics_large_plants_402_duration, transformed_fuel=fuel_ferc1, ) return convert_cols_dtypes(df, data_source="ferc1") diff --git a/src/pudl/transform/ferc714.py b/src/pudl/transform/ferc714.py index 9fbba1e94b..6f0c68211c 100644 --- a/src/pudl/transform/ferc714.py +++ b/src/pudl/transform/ferc714.py @@ -369,7 +369,7 @@ def _standardize_offset_codes(df: pd.DataFrame, offset_fixes) -> pd.DataFrame: @asset(io_manager_key="pudl_sqlite_io_manager") -def respondent_id_ferc714(raw_respondent_id_ferc714: pd.DataFrame) -> pd.DataFrame: +def respondent_id_ferc714(raw_ferc714__respondent_id: pd.DataFrame) -> pd.DataFrame: """Transform the FERC 714 respondent IDs, names, and EIA utility IDs. Clean up FERC-714 respondent names and manually assign EIA utility IDs to a few FERC @@ -378,12 +378,12 @@ def respondent_id_ferc714(raw_respondent_id_ferc714: pd.DataFrame) -> pd.DataFra PacifiCorp). Args: - raw_respondent_id_ferc714: Raw table describing the FERC 714 Respondents. + raw_ferc714__respondent_id: Raw table describing the FERC 714 Respondents. Returns: A clean(er) version of the FERC-714 respondents table. """ - df = _pre_process(raw_respondent_id_ferc714, table_name="respondent_id_ferc714") + df = _pre_process(raw_ferc714__respondent_id, table_name="respondent_id_ferc714") df["respondent_name_ferc714"] = df.respondent_name_ferc714.str.strip() df.loc[df.eia_code == 0, "eia_code"] = pd.NA # There are a few utilities that seem mappable, but missing: @@ -394,7 +394,7 @@ def respondent_id_ferc714(raw_respondent_id_ferc714: pd.DataFrame) -> pd.DataFra @asset(io_manager_key="pudl_sqlite_io_manager") def demand_hourly_pa_ferc714( - raw_demand_hourly_pa_ferc714: pd.DataFrame, + raw_ferc714__demand_hourly_pa: pd.DataFrame, ) -> pd.DataFrame: """Transform the hourly demand time series by Planning Area. @@ -408,7 +408,7 @@ def demand_hourly_pa_ferc714( - Flip negative signs for reported demand. Args: - raw_demand_hourly_pa_ferc714: Raw table containing hourly demand time series by + raw_ferc714__demand_hourly_pa: Raw table containing hourly demand time series by Planning Area. Returns: @@ -416,7 +416,7 @@ def demand_hourly_pa_ferc714( """ logger.info("Converting dates into pandas Datetime types.") df = _pre_process( - raw_demand_hourly_pa_ferc714, table_name="demand_hourly_pa_ferc714" + raw_ferc714__demand_hourly_pa, table_name="demand_hourly_pa_ferc714" ) # Parse date strings diff --git a/src/pudl/workspace/datastore.py b/src/pudl/workspace/datastore.py index 8fcd8ee14a..230cdb505c 100644 --- a/src/pudl/workspace/datastore.py +++ b/src/pudl/workspace/datastore.py @@ -9,25 +9,24 @@ from collections import defaultdict from collections.abc import Iterator from pathlib import Path -from typing import Any +from typing import Any, Self import datapackage import requests from google.auth.exceptions import DefaultCredentialsError +from pydantic import BaseSettings, HttpUrl, constr from requests.adapters import HTTPAdapter -from requests.packages.urllib3.util.retry import Retry +from urllib3.util.retry import Retry import pudl from pudl.workspace import resource_cache from pudl.workspace.resource_cache import PudlResourceKey +from pudl.workspace.setup import PudlPaths logger = pudl.logging_helpers.get_logger(__name__) -# The Zenodo tokens recorded here should have read-only access to our archives. -# Including them here is correct in order to allow public use of this tool, so -# long as we stick to read-only keys. - PUDL_YML = Path.home() / ".pudl.yml" +ZenodoDoi = constr(regex=r"(10\.5072|10\.5281)/zenodo.([\d]+)") class ChecksumMismatch(ValueError): @@ -43,9 +42,9 @@ def __init__(self, datapackage_json: dict, dataset: str, doi: str): """Constructs DatapackageDescriptor. Args: - datapackage_json (dict): parsed datapackage.json describing this datapackage. - dataset (str): name of the dataset. - doi (str): DOI (aka version) of the dataset. + datapackage_json: parsed datapackage.json describing this datapackage. + dataset: The name (an identifying string) of the dataset. + doi: A versioned Digital Object Identifier for the dataset. """ self.datapackage_json = datapackage_json self.dataset = dataset @@ -95,12 +94,12 @@ def _matches(self, res: dict, **filters: Any): ) def get_resources( - self, name: str = None, **filters: Any + self: Self, name: str = None, **filters: Any ) -> Iterator[PudlResourceKey]: """Returns series of PudlResourceKey identifiers for matching resources. Args: - name (str): if specified, find resource(s) with this name. + name: if specified, find resource(s) with this name. filters (dict): if specified, find resoure(s) matching these key=value constraints. The constraints are matched against the 'parts' field of the resource entry in the datapackage.json. @@ -153,83 +152,115 @@ def get_json_string(self) -> str: return json.dumps(self.datapackage_json, sort_keys=True, indent=4) +class ZenodoDoiSettings(BaseSettings): + """Digital Object Identifiers pointing to currently used Zenodo archives.""" + + # Sandbox DOIs are provided for reference + censusdp1tract: ZenodoDoi = "10.5281/zenodo.4127049" + # censusdp1tract: ZenodoDoi = "10.5072/zenodo.674992" + eia860: ZenodoDoi = "10.5281/zenodo.8164776" + # eia860: ZenodoDoi = "10.5072/zenodo.1222854" + eia860m: ZenodoDoi = "10.5281/zenodo.8188017" + # eia860m: ZenodoDoi = "10.5072/zenodo.1225517" + eia861: ZenodoDoi = "10.5281/zenodo.8231268" + # eia861: ZenodoDoi = "10.5072/zenodo.1229930" + eia923: ZenodoDoi = "10.5281/zenodo.8172818" + # eia923: ZenodoDoi = "10.5072/zenodo.1217724" + eia_bulk_elec: ZenodoDoi = "10.5281/zenodo.7067367" + # eia_bulk_elec: ZenodoDoi = "10.5072/zenodo.1103572" + epacamd_eia: ZenodoDoi = "10.5281/zenodo.7900974" + # epacamd_eia: ZenodoDoi = "10.5072/zenodo.1199170" + epacems: ZenodoDoi = "10.5281/zenodo.8235497" + # epacems": ZenodoDoi = "10.5072/zenodo.1228519" + ferc1: ZenodoDoi = "10.5281/zenodo.7314437" + # ferc1: ZenodoDoi = 10.5072/zenodo.1070868" + ferc2: ZenodoDoi = "10.5281/zenodo.8006881" + # ferc2: ZenodoDoi = "10.5072/zenodo.1188447" + ferc6: ZenodoDoi = "10.5281/zenodo.7130141" + # ferc6: ZenodoDoi = "10.5072/zenodo.1098088" + ferc60: ZenodoDoi = "10.5281/zenodo.7130146" + # ferc60: ZenodoDoi = "10.5072/zenodo.1098089" + ferc714: ZenodoDoi = "10.5281/zenodo.7139875" + # ferc714: ZenodoDoi = "10.5072/zenodo.1098302" + + class Config: + """Pydantic config, reads from .env file.""" + + env_prefix = "pudl_zenodo_doi_" + env_file = ".env" + + class ZenodoFetcher: """API for fetching datapackage descriptors and resource contents from zenodo.""" - # Zenodo tokens recorded here should have read-only access to our archives. - # Including them here is correct in order to allow public use of this tool, so - # long as we stick to read-only keys. - TOKEN = { - # Read-only personal access tokens for pudl@catalyst.coop: - "sandbox": "qyPC29wGPaflUUVAv1oGw99ytwBqwEEdwi4NuUrpwc3xUcEwbmuB4emwysco", - "production": "KXcG5s9TqeuPh1Ukt5QYbzhCElp9LxuqAuiwdqHP0WS4qGIQiydHn6FBtdJ5", - } - - DOI = { - "sandbox": { - "censusdp1tract": "10.5072/zenodo.674992", - "eia860": "10.5072/zenodo.1222854", - "eia860m": "10.5072/zenodo.1225517", - "eia861": "10.5072/zenodo.1229930", - "eia923": "10.5072/zenodo.1217724", - "eia_bulk_elec": "10.5072/zenodo.1103572", - "epacamd_eia": "10.5072/zenodo.1199170", - "epacems": "10.5072/zenodo.672963", - "ferc1": "10.5072/zenodo.1070868", - "ferc2": "10.5072/zenodo.1188447", - "ferc6": "10.5072/zenodo.1098088", - "ferc60": "10.5072/zenodo.1098089", - "ferc714": "10.5072/zenodo.1098302", - }, - "production": { - "censusdp1tract": "10.5281/zenodo.4127049", - "eia860": "10.5281/zenodo.8164776", - "eia860m": "10.5281/zenodo.8188017", - "eia861": "10.5281/zenodo.8231268", - "eia923": "10.5281/zenodo.8172818", - "eia_bulk_elec": "10.5281/zenodo.7067367", - "epacamd_eia": "10.5281/zenodo.7900974", - "epacems": "10.5281/zenodo.6910058", - "ferc1": "10.5281/zenodo.7314437", - "ferc2": "10.5281/zenodo.8006881", - "ferc6": "10.5281/zenodo.7130141", - "ferc60": "10.5281/zenodo.7130146", - "ferc714": "10.5281/zenodo.7139875", - }, - } - API_ROOT = { - "sandbox": "https://sandbox.zenodo.org/api", - "production": "https://zenodo.org/api", - } - - def __init__(self, sandbox: bool = False, timeout: float = 15.0): - """Constructs ZenodoFetcher instance. + _descriptor_cache: dict[str, DatapackageDescriptor] + zenodo_dois: ZenodoDoiSettings + timeout: float + http: requests.Session - Args: - sandbox (bool): controls whether production or sandbox zenodo backends - and associated DOIs should be used. - timeout (float): timeout (in seconds) for http requests. - """ - backend = "sandbox" if sandbox else "production" - self._api_root = self.API_ROOT[backend] - self._token = self.TOKEN[backend] - self._dataset_to_doi = self.DOI[backend] - self._descriptor_cache: dict[str, DatapackageDescriptor] = {} + def __init__( + self: Self, zenodo_dois: ZenodoDoiSettings | None = None, timeout: float = 15.0 + ): + """Constructs ZenodoFetcher instance.""" + if not zenodo_dois: + self.zenodo_dois = ZenodoDoiSettings() self.timeout = timeout + retries = Retry( backoff_factor=2, total=3, status_forcelist=[429, 500, 502, 503, 504] ) adapter = HTTPAdapter(max_retries=retries) - self.http = requests.Session() self.http.mount("http://", adapter) self.http.mount("https://", adapter) + self._descriptor_cache = {} + + def get_doi(self: Self, dataset: str) -> ZenodoDoi: + """Returns DOI for given dataset.""" + try: + doi = self.zenodo_dois.__getattribute__(dataset) + except AttributeError: + raise AttributeError(f"No Zenodo DOI found for dataset {dataset}.") + return doi - def _fetch_from_url(self, url: str) -> requests.Response: + def get_known_datasets(self: Self) -> list[str]: + """Returns list of supported datasets.""" + return [name for name, doi in sorted(self.zenodo_dois)] + + def _get_token(self: Self, url: HttpUrl) -> str: + """Return the appropriate read-only Zenodo personal access token. + + These tokens are associated with the pudl@catalyst.coop Zenodo account, which + owns all of the Catalyst raw data archives. + """ + if "sandbox" in url: + token = "qyPC29wGPaflUUVAv1oGw99ytwBqwEEdwi4NuUrpwc3xUcEwbmuB4emwysco" # nosec: B105 + else: + token = "KXcG5s9TqeuPh1Ukt5QYbzhCElp9LxuqAuiwdqHP0WS4qGIQiydHn6FBtdJ5" # nosec: B105 + return token + + def _get_url(self: Self, doi: ZenodoDoi) -> HttpUrl: + """Construct a Zenodo depsition URL based on its Zenodo DOI.""" + match = re.search(r"(10\.5072|10\.5281)/zenodo.([\d]+)", doi) + + if match is None: + raise ValueError(f"Invalid Zenodo DOI: {doi}") + + doi_prefix = match.groups()[0] + zenodo_id = match.groups()[1] + if doi_prefix == "10.5072": + api_root = "https://sandbox.zenodo.org/api" + elif doi_prefix == "10.5281": + api_root = "https://zenodo.org/api" + else: + raise ValueError(f"Invalid Zenodo DOI: {doi}") + return f"{api_root}/deposit/depositions/{zenodo_id}" + + def _fetch_from_url(self: Self, url: HttpUrl) -> requests.Response: logger.info(f"Retrieving {url} from zenodo") response = self.http.get( - url, params={"access_token": self._token}, timeout=self.timeout + url, params={"access_token": self._get_token(url)}, timeout=self.timeout ) if response.status_code == requests.codes.ok: logger.debug(f"Successfully downloaded {url}") @@ -237,22 +268,11 @@ def _fetch_from_url(self, url: str) -> requests.Response: else: raise ValueError(f"Could not download {url}: {response.text}") - def _doi_to_url(self, doi: str) -> str: - """Returns url that holds the datapackage for given doi.""" - match = re.search(r"zenodo.([\d]+)", doi) - if match is None: - raise ValueError(f"Invalid doi {doi}") - - zen_id = int(match.groups()[0]) - return f"{self._api_root}/deposit/depositions/{zen_id}" - - def get_descriptor(self, dataset: str) -> DatapackageDescriptor: - """Returns DatapackageDescriptor for given dataset.""" - doi = self._dataset_to_doi.get(dataset) - if not doi: - raise KeyError(f"No doi found for dataset {dataset}") + def get_descriptor(self: Self, dataset: str) -> DatapackageDescriptor: + """Returns class:`DatapackageDescriptor` for given dataset.""" + doi = self.get_doi(dataset) if doi not in self._descriptor_cache: - dpkg = self._fetch_from_url(self._doi_to_url(doi)) + dpkg = self._fetch_from_url(self._get_url(doi)) for f in dpkg.json()["files"]: if f["filename"] == "datapackage.json": resp = self._fetch_from_url(f["links"]["download"]) @@ -266,15 +286,7 @@ def get_descriptor(self, dataset: str) -> DatapackageDescriptor: ) return self._descriptor_cache[doi] - def get_resource_key(self, dataset: str, name: str) -> PudlResourceKey: - """Returns PudlResourceKey for given resource.""" - return PudlResourceKey(dataset, self._dataset_to_doi[dataset], name) - - def get_doi(self, dataset: str) -> str: - """Returns DOI for given dataset.""" - return self._dataset_to_doi[dataset] - - def get_resource(self, res: PudlResourceKey) -> bytes: + def get_resource(self: Self, res: PudlResourceKey) -> bytes: """Given resource key, retrieve contents of the file from zenodo.""" desc = self.get_descriptor(res.dataset) url = desc.get_resource_path(res.name) @@ -282,10 +294,6 @@ def get_resource(self, res: PudlResourceKey) -> bytes: desc.validate_checksum(res.name, content) return content - def get_known_datasets(self) -> list[str]: - """Returns list of supported datasets.""" - return sorted(self._dataset_to_doi) - class Datastore: """Handle connections and downloading of Zenodo Source archives.""" @@ -294,22 +302,18 @@ def __init__( self, local_cache_path: Path | None = None, gcs_cache_path: str | None = None, - sandbox: bool = False, - timeout: float = 15, + timeout: float = 15.0, ): # TODO(rousik): figure out an efficient way to configure datastore caching """Datastore manages file retrieval for PUDL datasets. Args: - local_cache_path (Path): if provided, LocalFileCache pointed at the data + local_cache_path: if provided, LocalFileCache pointed at the data subdirectory of this path will be used with this Datastore. - gcs_cache_path (str): if provided, GoogleCloudStorageCache will be used + gcs_cache_path: if provided, GoogleCloudStorageCache will be used to retrieve data files. The path is expected to have the following format: gs://bucket[/path_prefix] - sandbox (bool): if True, use sandbox zenodo backend when retrieving files, - otherwise use production. This affects which zenodo servers are contacted - as well as dois used for each dataset. - timeout (floaTR): connection timeouts (in seconds) to use when connecting + timeout: connection timeouts (in seconds) to use when connecting to Zenodo servers. """ self._cache = resource_cache.LayeredCache() @@ -331,7 +335,7 @@ def __init__( ) pass - self._zenodo_fetcher = ZenodoFetcher(sandbox=sandbox, timeout=timeout) + self._zenodo_fetcher = ZenodoFetcher(timeout=timeout) def get_known_datasets(self) -> list[str]: """Returns list of supported datasets.""" @@ -420,6 +424,10 @@ def get_zipfile_resources( for resource_key, content in self.get_resources(dataset, **filters): yield resource_key, zipfile.ZipFile(io.BytesIO(content)) + def get_zipfile_file_names(self, zip_file: zipfile.ZipFile): + """Given a zipfile, return a list of the file names in it.""" + return zipfile.ZipFile.namelist(zip_file) + class ParseKeyValues(argparse.Action): """Transforms k1=v1,k2=v2,... @@ -441,17 +449,13 @@ def __call__(self, parser, namespace, values, option_string=None): def parse_command_line(): """Collect the command line arguments.""" - prod_dois = "\n".join( - [f" - {x}" for x in ZenodoFetcher.DOI["production"].keys()] + known_datasets = "\n".join( + [f" - {x}" for x in ZenodoFetcher().get_known_datasets()] ) - sand_dois = "\n".join([f" - {x}" for x in ZenodoFetcher.DOI["sandbox"].keys()]) dataset_msg = f""" -Available Production Datasets: -{prod_dois} - -Available Sandbox Datasets: -{sand_dois}""" +Available Datasets: +{known_datasets}""" parser = argparse.ArgumentParser( description="Download and cache ETL source data from Zenodo.", @@ -462,12 +466,8 @@ def parse_command_line(): parser.add_argument( "--dataset", help="Download the specified dataset only. See below for available options. " - "The default is to download all, which may take an hour or more." - "speed.", - ) - parser.add_argument( - "--pudl_in", - help="Override pudl_in directory, defaults to setting in ~/.pudl.yml", + "The default is to download all datasets, which may take hours depending on " + "network speed.", ) parser.add_argument( "--validate", @@ -475,12 +475,6 @@ def parse_command_line(): action="store_true", default=False, ) - parser.add_argument( - "--sandbox", - help="Download data from Zenodo sandbox server. For testing purposes only.", - action="store_true", - default=False, - ) parser.add_argument( "--loglevel", help="Set logging level (DEBUG, INFO, WARNING, ERROR, or CRITICAL).", @@ -529,23 +523,6 @@ def parse_command_line(): return parser.parse_args() -def _get_pudl_in(args: dict) -> Path: - """Figure out what pudl_in path should be used.""" - if args.pudl_in: - return Path(args.pudl_in) - else: - return Path(pudl.workspace.setup.get_defaults()["PUDL_INPUT"]) - - -def _create_datastore(args: argparse.Namespace) -> Datastore: - """Constructs datastore instance.""" - # Configure how we want to obtain raw input data: - ds_kwargs = dict(gcs_cache_path=args.gcs_cache_path, sandbox=args.sandbox) - if not args.bypass_local_cache: - ds_kwargs["local_cache_path"] = _get_pudl_in(args) - return Datastore(**ds_kwargs) - - def print_partitions(dstore: Datastore, datasets: list[str]) -> None: """Prints known partition keys and its values for each of the datasets.""" for single_ds in datasets: @@ -609,7 +586,14 @@ def main(): logfile=args.logfile, loglevel=args.loglevel ) - dstore = _create_datastore(args) + cache_path = None + if not args.bypass_local_cache: + cache_path = PudlPaths().input_dir + + dstore = Datastore( + gcs_cache_path=args.gcs_cache_path, + local_cache_path=cache_path, + ) if args.dataset: datasets = [args.dataset] diff --git a/src/pudl/workspace/setup.py b/src/pudl/workspace/setup.py index 13409f3fcd..b2a6c8a519 100644 --- a/src/pudl/workspace/setup.py +++ b/src/pudl/workspace/setup.py @@ -4,287 +4,114 @@ import pathlib import shutil from pathlib import Path -from typing import IO +from typing import Any, Union -import yaml -from dotenv import load_dotenv +from pydantic import BaseSettings, DirectoryPath +from pydantic.validators import path_validator import pudl.logging_helpers logger = pudl.logging_helpers.get_logger(__name__) -def get_defaults( - input_dir: str | None = None, - output_dir: str | None = None, - yaml_file: IO | None = None, - default_pudl_yaml: Path | None = Path.home() / ".pudl.yml", -) -> dict[str, str]: - """Derive PUDL workspace paths from specified input/output directories. +class MissingPath(Path): + """Validates potential path that doesn't exist.""" - Determines input/output directory locations from YAML, then overrides with - env vars, then overrides with keywords passed in. + @classmethod + def __get_validators__(cls) -> Any: + """Validates that path doesn't exist and is path-like.""" + yield path_validator + yield cls.validate - Input/output workspace roots can be the same directories. + @classmethod + def validate(cls, value: Path) -> Path: + """Validates that path doesn't exist.""" + if value.exists(): + raise ValueError("path exists") - Note: will update PUDL_OUTPUT and PUDL_INPUT env vars if they are - overridden by kwargs, so that Dagster configurations in child processes see - the updated configs when they read from env vars. + return value - Args: - input_dir: equivalent to PUDL_INPUT environment variable, but overrides - that value. Derived paths treat the parent directory as the input - workspace root. - output_dir: equivalent to PUDL_OUTPUT environment variable, but - overrides that value. Derived paths treat the parent directory as - the output workspace root. - yaml_file: a buffer including the YAML configuration. The `pudl_in` and - `pudl_out` keys within this file correspond to the input/output - workspace roots directly, instead of through parents. - Returns: - dictionary with a variety of different paths where inputs/outputs are - to be found. - """ - load_dotenv() - - # Workaround for not having PUDL_* env vars in ReadTheDocs builds. - # - # They don't let you set env var through config files, and I'd rather - # have this in source control than go through some sort of web UI - # - # I don't like this any more than you do. - if os.getenv("READTHEDOCS"): - os.environ["PUDL_OUTPUT"] = str(Path("~/pudl-work/output").expanduser()) - os.environ["PUDL_INPUT"] = str(Path("~/pudl-work/data").expanduser()) - - yaml_settings = _munge_legacy_yaml( - yaml_file=yaml_file, default_pudl_yaml=default_pudl_yaml - ) - - # read from env vars - env_var_mapping = { - "pudl_in": os.getenv("PUDL_INPUT"), - "pudl_out": os.getenv("PUDL_OUTPUT"), - } - env_settings = { - key: str(Path(value)) - for key, value in env_var_mapping.items() - if value is not None - } - - # read from params - kwarg_mapping = {"pudl_in": input_dir, "pudl_out": output_dir} - kwarg_settings = { - key: str(Path(value)) - for key, value in kwarg_mapping.items() - if value is not None - } - - # Start with an empty settings, then override in order of precedence. - settings: dict[str, str] = {} - for settings_source in [yaml_settings, env_settings, kwarg_settings]: - settings |= settings_source - - if not ("pudl_in" in settings and "pudl_out" in settings): - raise RuntimeError( - "Must set 'PUDL_OUTPUT'/'PUDL_INPUT' environment variables or provide valid yaml config file." - ) - - settings = derive_paths(settings["pudl_in"], settings["pudl_out"]) - - # override env vars so Dagster can see the most up-to-date configs - if output_dir or "PUDL_OUTPUT" not in os.environ: - os.environ["PUDL_OUTPUT"] = settings["pudl_out"] - if input_dir or "PUDL_INPUT" not in os.environ: - os.environ["PUDL_INPUT"] = settings["data_dir"] - if "DAGSTER_HOME" not in os.environ: - os.environ["DAGSTER_HOME"] = str(Path(settings["pudl_in"]) / "dagster_home") - - return settings - - -def _munge_legacy_yaml( - yaml_file: IO | None, default_pudl_yaml: Path | None -) -> dict[str, str]: - # read from YAML source - if yaml_file is not None: - yaml_settings = yaml.safe_load(yaml_file) - elif default_pudl_yaml and default_pudl_yaml.exists(): - with default_pudl_yaml.open() as f: - yaml_settings = yaml.safe_load(f) - else: - yaml_settings = {} - - # legacy YAML format expects pudl_in/out to point at the parent directory instead - # of the input/output directories directly, so we munge here. - if "pudl_in" in yaml_settings: - yaml_settings["pudl_in"] = f"{yaml_settings['pudl_in']}/data" - if "pudl_out" in yaml_settings: - yaml_settings["pudl_out"] = f"{yaml_settings['pudl_out']}/output" - - return yaml_settings - - -def set_defaults(pudl_in, pudl_out, clobber=False): - """Set default user input and output locations in ``$HOME/.pudl.yml``. - - Create a user settings file for future reference, that defines the default - PUDL input and output directories. If this file already exists, behavior - depends on the clobber parameter, which is False by default. If it's True, - the existing file is replaced. If False, the existing file is not changed. +# TODO: The following could be replaced with NewPath from pydantic v2 +PotentialDirectoryPath = Union[DirectoryPath, MissingPath] - Args: - pudl_in (os.PathLike): Path to be used as the default input directory - for PUDL -- this is where :mod:`pudl.workspace.datastore` will look - to find the ``data`` directory, full of data from public agencies. - pudl_out (os.PathLike): Path to the default output directory for PUDL, - where results of data processing will be organized. - clobber (bool): If True and a user settings file exists, overwrite it. - If False, do not alter the existing file. Defaults to False. - Returns: - None - """ - logger.warning( - "pudl_settings is being deprecated in favor of environment " - "variables PUDL_OUTPUT and PUDL_INPUT. For more info " - "see: https://catalystcoop-pudl.readthedocs.io/en/dev/dev/dev_setup.html" - ) - settings_file = pathlib.Path.home() / ".pudl.yml" - if settings_file.exists(): - if clobber: - logger.info(f"{settings_file} exists: clobbering.") - else: - logger.info(f"{settings_file} exists: not clobbering.") - return - - with settings_file.open(mode="w") as f: - f.write(f"pudl_in: {pudl_in.expanduser().resolve()}\n") - f.write(f"pudl_out: {pudl_out.expanduser().resolve()}\n") - - -def derive_paths(pudl_in, pudl_out): - """Derive PUDL paths based on given input and output paths. - - If no configuration file path is provided, attempt to read in the user - configuration from a file called .pudl.yml in the user's HOME directory. - Presently the only values we expect are pudl_in and pudl_out, directories - that store files that PUDL either depends on that rely on PUDL. - - Args: - pudl_in (os.PathLike): Path to the directory containing the PUDL input - files, most notably the ``data`` directory which houses the raw - data downloaded from public agencies by the - :mod:`pudl.workspace.datastore` tools. ``pudl_in`` may be the same - directory as ``pudl_out``. - pudl_out (os.PathLike): Path to the directory where PUDL should write - the outputs it generates. These will be organized into directories - according to the output format (sqlite, parquet, etc.). +class PudlPaths(BaseSettings): + """These settings provide access to various PUDL directories. - Returns: - dict: A dictionary containing common PUDL settings, derived from those - read out of the YAML file. Mostly paths for inputs & outputs. + It is primarily configured via PUDL_INPUT and PUDL_OUTPUT environment + variables. Other paths of relevance are derived from these. """ - logger.warning( - "pudl_settings is being deprecated in favor of environment variables " - "PUDL_OUTPUT and PUDL_INPUT. For more info " - "see: https://catalystcoop-pudl.readthedocs.io/en/dev/dev/dev_setup.html" - ) - pudl_settings = {} - - # The only "inputs" are the datastore and example settings files: - # Convert from input string to Path and make it absolute w/ resolve() - pudl_in = pathlib.Path(pudl_in).expanduser().resolve() - data_dir = pudl_in - pudl_workspace_legacy = pudl_in.parent - settings_dir = pudl_workspace_legacy / "settings" - - # Store these as strings... since we aren't using Paths everywhere yet: - pudl_settings["pudl_in"] = str(pudl_workspace_legacy) - pudl_settings["data_dir"] = str(data_dir) - pudl_settings["settings_dir"] = str(settings_dir) - - # Everything else goes into outputs, generally organized by type of file: - pudl_out = pathlib.Path(pudl_out).expanduser().resolve() - pudl_settings["pudl_out"] = str(pudl_out) - # One directory per output format: - logger.warning( - "sqlite and parquet directories are no longer being used. Make sure there is a " - "single directory named 'output' at the root of your workspace. For more info " - "see: https://catalystcoop-pudl.readthedocs.io/en/dev/dev/dev_setup.html" - ) - for fmt in ["sqlite", "parquet"]: - pudl_settings[f"{fmt}_dir"] = pudl_settings["pudl_out"] - - # Mirror dagster env vars for ease of use - pudl_settings["PUDL_OUTPUT"] = pudl_settings["pudl_out"] - pudl_settings["PUDL_INPUT"] = pudl_settings["data_dir"] - - ferc1_db_file = pathlib.Path(pudl_settings["pudl_out"], "ferc1.sqlite") - pudl_settings["ferc1_db"] = "sqlite:///" + str(ferc1_db_file.resolve()) - - ferc1_db_file = pathlib.Path(pudl_settings["pudl_out"], "ferc1_xbrl.sqlite") - pudl_settings["ferc1_xbrl_db"] = "sqlite:///" + str(ferc1_db_file.resolve()) - pudl_settings["ferc1_xbrl_datapackage"] = pathlib.Path( - pudl_settings["pudl_out"], "ferc1_xbrl_datapackage.json" - ) - pudl_settings["ferc1_xbrl_taxonomy_metadata"] = pathlib.Path( - pudl_settings["pudl_out"], "ferc1_xbrl_taxonomy_metadata.json" - ) - - ferc2_db_file = pathlib.Path(pudl_settings["pudl_out"], "ferc2_xbrl.sqlite") - pudl_settings["ferc2_xbrl_db"] = "sqlite:///" + str(ferc2_db_file.resolve()) - pudl_settings["ferc2_xbrl_datapackage"] = pathlib.Path( - pudl_settings["pudl_out"], "ferc2_xbrl_datapackage.json" - ) - pudl_settings["ferc2_xbrl_taxonomy_metadata"] = pathlib.Path( - pudl_settings["pudl_out"], "ferc2_xbrl_taxonomy_metadata.json" - ) - - ferc6_db_file = pathlib.Path(pudl_settings["pudl_out"], "ferc6_xbrl.sqlite") - pudl_settings["ferc6_xbrl_db"] = "sqlite:///" + str(ferc6_db_file.resolve()) - pudl_settings["ferc6_xbrl_datapackage"] = pathlib.Path( - pudl_settings["pudl_out"], "ferc6_xbrl_datapackage.json" - ) - pudl_settings["ferc6_xbrl_taxonomy_metadata"] = pathlib.Path( - pudl_settings["pudl_out"], "ferc6_xbrl_taxonomy_metadata.json" - ) - - ferc60_db_file = pathlib.Path(pudl_settings["pudl_out"], "ferc60_xbrl.sqlite") - pudl_settings["ferc60_xbrl_db"] = "sqlite:///" + str(ferc60_db_file.resolve()) - pudl_settings["ferc60_xbrl_datapackage"] = pathlib.Path( - pudl_settings["pudl_out"], "ferc60_xbrl_datapackage.json" - ) - pudl_settings["ferc60_xbrl_taxonomy_metadata"] = pathlib.Path( - pudl_settings["pudl_out"], "ferc60_xbrl_taxonomy_metadata.json" - ) - - ferc714_db_file = pathlib.Path(pudl_settings["pudl_out"], "ferc714_xbrl.sqlite") - pudl_settings["ferc714_xbrl_db"] = "sqlite:///" + str(ferc714_db_file.resolve()) - pudl_settings["ferc714_xbrl_datapackage"] = pathlib.Path( - pudl_settings["pudl_out"], "ferc714_xbrl_datapackage.json" - ) - pudl_settings["ferc714_xbrl_taxonomy_metadata"] = pathlib.Path( - pudl_settings["pudl_out"], "ferc714_xbrl_taxonomy_metadata.json" - ) - - pudl_settings["pudl_db"] = "sqlite:///" + str( - pathlib.Path(pudl_settings["pudl_out"], "pudl.sqlite") - ) - - pudl_settings["censusdp1tract_db"] = "sqlite:///" + str( - pathlib.Path(pudl_settings["pudl_out"], "censusdp1tract.sqlite") - ) - return pudl_settings - - -def init(pudl_settings: dict[str, str], clobber=False): + + pudl_input: PotentialDirectoryPath + pudl_output: PotentialDirectoryPath + + class Config: + """Pydantic config, reads from .env file.""" + + env_file = ".env" + + @property + def input_dir(self) -> Path: + """Path to PUDL input directory.""" + return Path(self.pudl_input) + + @property + def output_dir(self) -> Path: + """Path to PUDL output directory.""" + return Path(self.pudl_output) + + @property + def settings_dir(self) -> Path: + """Path to directory containing settings files.""" + return self.input_dir.parent / "settings" + + @property + def data_dir(self) -> Path: + """Path to PUDL data directory.""" + # TODO(janrous): possibly deprecate this in favor of input_dir + return self.input_dir + + @property + def pudl_db(self) -> Path: + """Returns url of locally stored pudl sqlite database.""" + return self.sqlite_db("pudl") + + def sqlite_db(self, name: str) -> str: + """Returns url of locally stored pudl slqlite database with given name. + + The name is expected to be the name of the database without the .sqlite + suffix. E.g. pudl, ferc1 and so on. + """ + db_path = PudlPaths().output_dir / f"{name}.sqlite" + return f"sqlite:///{db_path}" + return self.output_dir / f"{name}.sqlite" + + def output_file(self, filename: str) -> Path: + """Path to file in PUDL output directory.""" + return self.output_dir / filename + + @staticmethod + def set_path_overrides( + input_dir: str | None = None, + output_dir: str | None = None, + ) -> None: + """Set PUDL_INPUT and/or PUDL_OUTPUT env variables. + + Args: + input_dir: if set, overrides PUDL_INPUT env variable. + output_dir: if set, overrides PUDL_OUTPUT env variable. + """ + if input_dir: + os.environ["PUDL_INPUT"] = input_dir + if output_dir: + os.environ["PUDL_OUTPUT"] = output_dir + + +def init(clobber=False): """Set up a new PUDL working environment based on the user settings. Args: - pudl_settings (os.PathLike): Paths to data inputs & outputs. See - get_defaults() for how to get these. clobber (bool): if True, replace existing files. If False (the default) do not replace existing files. @@ -292,22 +119,25 @@ def init(pudl_settings: dict[str, str], clobber=False): None """ # Create tmp directory - tmp_dir = pathlib.Path(pudl_settings["data_dir"], "tmp") + tmp_dir = PudlPaths().data_dir / "tmp" tmp_dir.mkdir(parents=True, exist_ok=True) # These are files that may exist in the package_data directory, but that # we do not want to deploy into a user workspace: ignore_files = ["__init__.py", ".gitignore"] + # TODO(janrous): perhaps we don't need to do this? # Make a settings directory in the workspace, and deploy settings files: - settings_dir = pathlib.Path(pudl_settings["settings_dir"]) + settings_dir = PudlPaths().settings_dir settings_dir.mkdir(parents=True, exist_ok=True) settings_pkg = "pudl.package_data.settings" deploy(settings_pkg, settings_dir, ignore_files, clobber=clobber) # Make output directory: - pudl_out = pathlib.Path(pudl_settings["pudl_out"]) - pudl_out.mkdir(parents=True, exist_ok=True) + PudlPaths().output_dir.mkdir(parents=True, exist_ok=True) + # TODO(rousik): it might make sense to turn this into a method of + # PudlPaths object and to move this to settings.py from this module. + # Unclear whether deployment of settings files makes much sense. def deploy( diff --git a/src/pudl/workspace/setup_cli.py b/src/pudl/workspace/setup_cli.py index 372c674108..6dea0a7dbd 100644 --- a/src/pudl/workspace/setup_cli.py +++ b/src/pudl/workspace/setup_cli.py @@ -45,6 +45,7 @@ import sys import pudl +from pudl.workspace.setup import PudlPaths logger = pudl.logging_helpers.get_logger(__name__) @@ -55,17 +56,17 @@ def initialize_parser(): description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter ) parser.add_argument( - "--pudl_input", + "--pudl_in", "-i", type=str, - dest="pudl_input", + dest="pudl_in", help="""Directory where the PUDL input data should be located.""", ) parser.add_argument( - "--pudl_output", + "--pudl_out", "-o", type=str, - dest="pudl_output", + dest="pudl_out", help="""Directory where the PUDL outputs, notebooks, and example settings files should be located.""", ) @@ -102,20 +103,17 @@ def main(): logfile=args.logfile, loglevel=args.loglevel ) - if args.pudl_input: + if args.pudl_in: pudl_in = pathlib.Path(args.pudl_in).expanduser().resolve() if not pathlib.Path.is_dir(pudl_in): raise FileNotFoundError(f"Directory not found: {pudl_in}") - - if args.pudl_output: + PudlPaths.set_path_overrides(input_dir=pudl_in) + if args.pudl_out: pudl_out = pathlib.Path(args.pudl_out).expanduser().resolve() if not pathlib.Path.is_dir(pudl_out): raise FileNotFoundError(f"Directory not found: {pudl_out}") - - settings = pudl.workspace.setup.get_defaults( - input_dir=args.pudl_input, output_dir=args.pudl_output - ) - pudl.workspace.setup.init(settings, clobber=args.clobber) + PudlPaths.set_path_overrides(output_dir=pudl_out) + pudl.workspace.setup.init(clobber=args.clobber) if __name__ == "__main__": diff --git a/test/conftest.py b/test/conftest.py index d832b73673..3e93a98489 100644 --- a/test/conftest.py +++ b/test/conftest.py @@ -2,7 +2,6 @@ Defines useful fixtures, command line args. """ -import json import logging import os from pathlib import Path @@ -27,6 +26,7 @@ from pudl.metadata.classes import Package from pudl.output.pudltabl import PudlTabl from pudl.settings import DatasetsSettings, EtlSettings, XbrlFormNumber +from pudl.workspace.setup import PudlPaths logger = logging.getLogger(__name__) @@ -67,12 +67,6 @@ def pytest_addoption(parser): default=False, help="If enabled, the local file cache for datastore will not be used.", ) - parser.addoption( - "--sandbox", - action="store_true", - default=False, - help="Use raw inputs from the Zenodo sandbox server.", - ) parser.addoption( "--save-unmapped-ids", action="store_true", @@ -87,15 +81,6 @@ def pytest_addoption(parser): ) -@pytest.fixture(scope="session") -def pudl_env(pudl_input_output_dirs): - """Set PUDL_OUTPUT/PUDL_INPUT/DAGSTER_HOME environment variables.""" - pudl.workspace.setup.get_defaults(**pudl_input_output_dirs) - - logger.info(f"PUDL_OUTPUT path: {os.environ['PUDL_OUTPUT']}") - logger.info(f"PUDL_INPUT path: {os.environ['PUDL_INPUT']}") - - @pytest.fixture(scope="session", name="test_dir") def test_directory(): """Return the path to the top-level directory containing the tests.""" @@ -194,7 +179,7 @@ def pudl_out_orig(live_dbs, pudl_engine): @pytest.fixture(scope="session") -def ferc_to_sqlite_dbf_only(live_dbs, pudl_datastore_config, etl_settings, pudl_env): +def ferc_to_sqlite_dbf_only(live_dbs, pudl_datastore_config, etl_settings): """Create raw FERC 1 SQLite DBs, but only based on DBF sources.""" if not live_dbs: ferc_to_sqlite_job_factory( @@ -214,7 +199,7 @@ def ferc_to_sqlite_dbf_only(live_dbs, pudl_datastore_config, etl_settings, pudl_ @pytest.fixture(scope="session") -def ferc_to_sqlite_xbrl_only(live_dbs, pudl_datastore_config, etl_settings, pudl_env): +def ferc_to_sqlite_xbrl_only(live_dbs, pudl_datastore_config, etl_settings): """Create raw FERC 1 SQLite DBs, but only based on XBRL sources.""" if not live_dbs: ferc_to_sqlite_job_factory( @@ -234,7 +219,7 @@ def ferc_to_sqlite_xbrl_only(live_dbs, pudl_datastore_config, etl_settings, pudl @pytest.fixture(scope="session") -def ferc_to_sqlite(live_dbs, pudl_datastore_config, etl_settings, pudl_env): +def ferc_to_sqlite(live_dbs, pudl_datastore_config, etl_settings): """Create raw FERC 1 SQLite DBs. If we are using the test database, we initialize it from scratch first. If we're @@ -280,7 +265,6 @@ def ferc1_xbrl_sql_engine(ferc_to_sqlite_xbrl_only, dataset_settings_config): @pytest.fixture(scope="session") def ferc_xbrl( - pudl_settings_fixture, live_dbs, ferc_to_sqlite_settings, pudl_datastore_fixture, @@ -301,7 +285,7 @@ def ferc_xbrl( for form in XbrlFormNumber: raw_archive, taxonomy_entry_point = datastore.get_taxonomy(year, form) - sqlite_engine = _get_sqlite_engine(form.value, pudl_settings_fixture, True) + sqlite_engine = _get_sqlite_engine(form.value, True) form_settings = ferc_to_sqlite_settings.get_xbrl_dataset_settings(form) @@ -315,12 +299,14 @@ def ferc_xbrl( requested_tables=form_settings.tables, batch_size=len(filings_subset) // step_size + 1, workers=step_size, - datapackage_path=pudl_settings_fixture[ - f"ferc{form.value}_xbrl_datapackage" - ], - metadata_path=pudl_settings_fixture[ - f"ferc{form.value}_xbrl_taxonomy_metadata" - ], + # TODO(janrous): the following should ideally be provided by some + # ferc dataset metadata object rather than encoding this in settings. + datapackage_path=PudlPaths().output_file( + f"ferc{form.value}_xbrl_datapackage.json" + ), + metadata_path=PudlPaths().output_file( + f"ferc{form.value}_xbrl_taxonomy_metadata.json" + ), archive_file_path=taxonomy_entry_point, ) @@ -336,8 +322,6 @@ def ferc1_xbrl_taxonomy_metadata(ferc1_engine_xbrl): @pytest.fixture(scope="session") def pudl_sql_io_manager( - pudl_env, - pudl_settings_fixture, ferc1_engine_dbf, # Implicit dependency ferc1_engine_xbrl, # Implicit dependency live_dbs, @@ -353,10 +337,8 @@ def pudl_sql_io_manager( """ logger.info("setting up the pudl_engine fixture") if not live_dbs: - db_path = pudl_settings_fixture["pudl_db"] - # Create the database and schemas - engine = sa.create_engine(db_path) + engine = sa.create_engine(PudlPaths().pudl_db) md = Package.from_resource_ids().to_sql() md.create_all(engine) # Run the ETL and generate a new PUDL SQLite DB for testing: @@ -385,61 +367,34 @@ def pudl_engine(pudl_sql_io_manager): return pudl_sql_io_manager.engine -@pytest.fixture(scope="session") -def pudl_tmpdir(tmp_path_factory): - # Base temporary directory for all other tmp dirs. - tmpdir = tmp_path_factory.mktemp("pudl") - return tmpdir - - -@pytest.fixture(scope="session") -def pudl_output_tmpdir(pudl_tmpdir): - tmpdir = pudl_tmpdir / "output" - tmpdir.mkdir() - return tmpdir - - -@pytest.fixture(scope="session") -def pudl_input_tmpdir(pudl_tmpdir): - tmpdir = pudl_tmpdir / "data" - tmpdir.mkdir() - return tmpdir - - -@pytest.fixture(scope="session") -def pudl_input_output_dirs(request, live_dbs, pudl_input_tmpdir, pudl_output_tmpdir): - """Determine where the PUDL input/output dirs should be.""" - input_override = None - output_override = None - +@pytest.fixture(scope="session", autouse=True) +def configure_paths_for_tests(tmp_path_factory, request): + """Configures PudlPaths for tests.""" + gha_override_input = False + gha_override_output = False if os.environ.get("GITHUB_ACTIONS", False): - # hard-code input dir for CI caching - input_override = Path(os.environ["HOME"]) / "pudl-work" / "data" - output_override = Path(os.environ["HOME"]) / "pudl-work" / "output" - elif request.config.getoption("--tmp-data"): - # use tmpdir for inputs if we ask for it - input_override = pudl_input_tmpdir - if not live_dbs: - # use tmpdir for outputs if we haven't passed --live-db - output_override = pudl_output_tmpdir - - return {"input_dir": input_override, "output_dir": output_override} - - -@pytest.fixture(scope="session", name="pudl_settings_fixture") -def pudl_settings_dict(request, pudl_input_output_dirs): # noqa: C901 - """Determine some settings (mostly paths) for the test session.""" - logger.info("setting up the pudl_settings_fixture") - pudl_settings = pudl.workspace.setup.get_defaults(**pudl_input_output_dirs) - pudl.workspace.setup.init(pudl_settings) - - pudl_settings["sandbox"] = request.config.getoption("--sandbox") - - pretty_settings = json.dumps( - {str(k): str(v) for k, v in pudl_settings.items()}, indent=2 - ) - logger.info(f"pudl_settings being used: {pretty_settings}") - return pudl_settings + gha_override_input = "PUDL_INPUTS" not in os.environ + gha_override_output = "PUDL_OUTPUTS" not in os.environ + logger.info( + "Running in GitHub Actions environment, using" + f" temporary input dir: {gha_override_input}, and" + f" temporary output dir: {gha_override_output}" + ) + pudl_tmpdir = tmp_path_factory.mktemp("pudl") + if gha_override_output or request.config.getoption("--tmp-data"): + in_tmp = pudl_tmpdir / "data" + in_tmp.mkdir() + PudlPaths.set_path_overrides( + input_dir=str(Path(in_tmp).resolve()), + ) + if gha_override_output or not request.config.getoption("--live-dbs"): + out_tmp = pudl_tmpdir / "output" + out_tmp.mkdir() + PudlPaths.set_path_overrides( + output_dir=str(Path(out_tmp).resolve()), + ) + logger.info(f"Starting unit tests with output path {PudlPaths().output_dir}") + pudl.workspace.setup.init() @pytest.fixture(scope="session") @@ -455,7 +410,6 @@ def pudl_datastore_config(request): return { "gcs_cache_path": gcs_cache_path if gcs_cache_path else "", "use_local_cache": not request.config.getoption("--bypass-local-cache"), - "sandbox": request.config.getoption("--sandbox"), } diff --git a/test/integration/datasette_metadata_test.py b/test/integration/datasette_metadata_test.py index 8fc601982d..dfd0f0838f 100644 --- a/test/integration/datasette_metadata_test.py +++ b/test/integration/datasette_metadata_test.py @@ -2,24 +2,22 @@ import json import logging -import os -from pathlib import Path import datasette.utils import yaml from pudl.metadata.classes import DatasetteMetadata +from pudl.workspace.setup import PudlPaths logger = logging.getLogger(__name__) -def test_datasette_metadata_to_yml(pudl_env, ferc1_engine_xbrl): +def test_datasette_metadata_to_yml(ferc1_engine_xbrl): """Test the ability to export metadata as YML for use with Datasette.""" - pudl_output = Path(os.getenv("PUDL_OUTPUT")) - metadata_yml = pudl_output / "metadata.yml" + metadata_yml = PudlPaths().output_dir / "metadata.yml" logger.info(f"Writing Datasette Metadata to {metadata_yml}") - dm = DatasetteMetadata.from_data_source_ids(pudl_output) + dm = DatasetteMetadata.from_data_source_ids(PudlPaths().output_dir) dm.to_yaml(path=metadata_yml) logger.info("Parsing generated metadata using datasette utils.") diff --git a/test/integration/epacems_test.py b/test/integration/epacems_test.py index cc88b2309a..09d06ae1ae 100644 --- a/test/integration/epacems_test.py +++ b/test/integration/epacems_test.py @@ -3,7 +3,9 @@ import pytest from dagster import build_init_resource_context +from pudl.extract.epacems import extract from pudl.io_managers import epacems_io_manager +from pudl.metadata.classes import Resource from pudl.output.epacems import epacems, year_state_filter @@ -16,7 +18,6 @@ def epacems_year_and_state(etl_settings): @pytest.fixture(scope="session") def epacems_parquet_path( - pudl_env, pudl_engine, # implicit dependency; ensures .parquet files exist ): """Get path to the directory of EPA CEMS .parquet data.""" @@ -46,6 +47,19 @@ def test_epacems_subset(epacems_year_and_state, epacems_parquet_path): assert actual.shape[0].compute() > 0 # nosec: B101 n rows +def test_epacems_missing_partition(pudl_datastore_fixture): + """Check that missing partitions return an empty data frame. + + Note that this should pass for both the Fast and Full ETL because the behavior + towards a missing file is identical.""" + df = extract(year=1996, state="UT", ds=pudl_datastore_fixture) + epacems_res = Resource.from_id("hourly_emissions_epacems") + expected_cols = list(epacems_res.get_field_names()) + assert df.shape[0] == 0 # Check that no rows of data are there + # Check that all columns expected of EPACEMS data are present. + assert sorted(df.columns) == sorted(expected_cols) + + def test_epacems_subset_input_validation(epacems_year_and_state, epacems_parquet_path): """Check if invalid inputs raise exceptions.""" if not epacems_year_and_state: @@ -75,9 +89,9 @@ def test_epacems_parallel(pudl_engine, epacems_parquet_path): # monolithic outputs. df = dd.read_parquet( epacems_parquet_path, - filters=year_state_filter(years=[2019], states=["ME"]), + filters=year_state_filter(years=[2020], states=["ME"]), index=False, engine="pyarrow", split_row_groups=True, ).compute() - assert df.shape == (96_360, 16) # nosec: B101 + assert df.shape == (96_624, 16) # nosec: B101 diff --git a/test/integration/etl_test.py b/test/integration/etl_test.py index 4f8caf9b7d..613834f8df 100644 --- a/test/integration/etl_test.py +++ b/test/integration/etl_test.py @@ -152,7 +152,7 @@ def test_extract_eia923(self, pudl_datastore_fixture): class TestFerc1ExtractDebugFunctions: """Verify the ferc1 extraction debug functions are working properly.""" - def test_extract_dbf(self, ferc1_engine_dbf, pudl_env): + def test_extract_dbf(self, ferc1_engine_dbf): """Test extract_dbf.""" years = [2020, 2021] # add desired years here configured_dataset_settings = {"ferc1": {"years": years}} @@ -171,7 +171,7 @@ def test_extract_dbf(self, ferc1_engine_dbf, pudl_env): df.report_year < 2022 ).all(), f"Unexpected years found in table: {table_name}" - def test_extract_xbrl(self, ferc1_engine_dbf, pudl_env): + def test_extract_xbrl(self, ferc1_engine_dbf): """Test extract_xbrl.""" years = [2021] # add desired years here configured_dataset_settings = {"ferc1": {"years": years}} diff --git a/test/integration/glue_test.py b/test/integration/glue_test.py index 46c977b2ba..3cc21e1a99 100644 --- a/test/integration/glue_test.py +++ b/test/integration/glue_test.py @@ -56,11 +56,9 @@ def plants_ferc1_raw(dataset_settings_config) -> pd.DataFrame: @pytest.fixture(scope="module") def glue_test_dfs( - pudl_env, pudl_out, ferc1_engine_xbrl, ferc1_engine_dbf, - pudl_settings_fixture, etl_settings, dataset_settings_config, ) -> dict[str, pd.DataFrame]: diff --git a/test/integration/zenodo_datapackage_test.py b/test/integration/zenodo_datapackage_test.py index 9cbb7d6ba2..a0a02593f7 100644 --- a/test/integration/zenodo_datapackage_test.py +++ b/test/integration/zenodo_datapackage_test.py @@ -8,24 +8,19 @@ class TestZenodoDatapackages: - """Ensure production & sandbox Datastores point to valid datapackages.""" + """Ensure all DOIs in Datastore point to valid datapackages.""" @pytest.mark.xfail( - raises=(MaxRetryError, ConnectionError, RetryError, ResponseError) - ) - def test_sandbox_datapackages(self): - """All datasets point to valid descriptors with 1 or more resources.""" - ds = Datastore(sandbox=True) - for dataset in ds.get_known_datasets(): - desc = ds.get_datapackage_descriptor(dataset) - assert list(desc.get_resources()) - - @pytest.mark.xfail( - raises=(MaxRetryError, ConnectionError, RetryError, ResponseError) + raises=( + MaxRetryError, + ConnectionError, + RetryError, + ResponseError, + ) ) def test_prod_datapackages(self): """All datasets point to valid descriptors with 1 or more resources.""" - ds = Datastore(sandbox=False) + ds = Datastore() for dataset in ds.get_known_datasets(): desc = ds.get_datapackage_descriptor(dataset) assert list(desc.get_resources()) diff --git a/test/unit/helpers_test.py b/test/unit/helpers_test.py index 42899bc227..7a401a8f1c 100644 --- a/test/unit/helpers_test.py +++ b/test/unit/helpers_test.py @@ -1,18 +1,14 @@ """Unit tests for the :mod:`pudl.helpers` module.""" -import os - import numpy as np import pandas as pd import pytest from dagster import AssetKey -from dagster._config.errors import PostProcessingError from pandas.testing import assert_frame_equal, assert_series_equal from pandas.tseries.offsets import BYearEnd import pudl from pudl.helpers import ( - EnvVar, convert_col_to_bool, convert_df_to_excel_file, convert_to_date, @@ -636,27 +632,6 @@ def test_sql_asset_factory_missing_file(): sql_asset_factory(name="fake_view")() -def test_env_var(): - os.environ["_PUDL_TEST"] = "test value" - env_var = EnvVar(env_var="_PUDL_TEST") - assert env_var.post_process(None) == "test value" - del os.environ["_PUDL_TEST"] - - -def test_env_var_reads_defaults(mocker): - mocker.patch( - "pudl.helpers.get_defaults", - lambda: {"_PUDL_TEST": "test value default"}, - ) - env_var = EnvVar(env_var="_PUDL_TEST") - assert env_var.post_process(None) == "test value default" - - -def test_env_var_missing_completely(): - with pytest.raises(PostProcessingError): - EnvVar(env_var="_PUDL_BOGUS").post_process(None) - - @pytest.mark.parametrize( "df", [ diff --git a/test/unit/pudl_environment_test.py b/test/unit/pudl_environment_test.py deleted file mode 100644 index 1ea961633c..0000000000 --- a/test/unit/pudl_environment_test.py +++ /dev/null @@ -1,196 +0,0 @@ -"""Test to see if our environment (PUDL_INPUT/OUTPUT, pudl_settings) is set up properly -in a variety of situations.""" - -import os -import pathlib -from io import StringIO - -import pytest -import yaml - -from pudl.workspace.setup import get_defaults - - -def setup(): - if (old_output := os.getenv("PUDL_OUTPUT")) is not None: - os.environ["PUDL_OUTPUT_OLD"] = old_output - if (old_input := os.getenv("PUDL_INPUT")) is not None: - os.environ["PUDL_INPUT_OLD"] = old_input - - -def test_get_defaults_in_test_environment_no_env_vars(): - if os.getenv("PUDL_OUTPUT"): - del os.environ["PUDL_OUTPUT"] - if os.getenv("PUDL_INPUT"): - del os.environ["PUDL_INPUT"] - - workspace = "/test/whatever" - default_settings = { - "pudl_in": workspace, - "pudl_out": workspace, - } - - settings_yaml = StringIO(yaml.dump(default_settings)) - - settings = get_defaults(yaml_file=settings_yaml) - - expected_values = { - "pudl_in": f"{workspace}", - "pudl_out": f"{workspace}/output", - "data_dir": f"{workspace}/data", - } - - for key, value in expected_values.items(): - assert (key, settings[key]) == (key, value) - - assert os.getenv("PUDL_OUTPUT") == f"{default_settings['pudl_out']}/output" - assert os.getenv("PUDL_INPUT") == f"{default_settings['pudl_in']}/data" - - -def test_get_defaults_in_test_environment_no_env_vars_tmpdir(pudl_output_tmpdir): - if os.getenv("PUDL_OUTPUT"): - del os.environ["PUDL_OUTPUT"] - if os.getenv("PUDL_INPUT"): - del os.environ["PUDL_INPUT"] - - workspace = "/test/whatever" - default_settings = { - "pudl_in": workspace, - "pudl_out": workspace, - } - - settings_yaml = StringIO(yaml.dump(default_settings)) - - settings = get_defaults( - yaml_file=settings_yaml, output_dir=pudl_output_tmpdir / "output" - ) - - expected_values = { - "pudl_in": f"{workspace}", - "pudl_out": f"{pudl_output_tmpdir}/output", - "data_dir": f"{workspace}/data", - } - - for key, value in expected_values.items(): - assert (key, settings[key]) == (key, value) - - assert os.getenv("PUDL_OUTPUT") == f"{pudl_output_tmpdir}/output" - assert os.getenv("PUDL_INPUT") == f"{default_settings['pudl_in']}/data" - - -@pytest.mark.parametrize( - ["settings_yaml", "env_vars"], - [ - ( - None, - { - "PUDL_OUTPUT": "/test/whatever/from/env/output", - "PUDL_INPUT": "/test/whatever/from/env/input", - }, - ), - ( - StringIO( - yaml.dump( - { - "pudl_in": "/test/workspace", - "pudl_out": "/test/workspace", - } - ) - ), - { - "PUDL_OUTPUT": "/test/whatever/from/env/output", - "PUDL_INPUT": "/test/whatever/from/env/input", - }, - ), - ( - StringIO( - yaml.dump( - { - "pudl_in": "/test/workspace", - "pudl_out": "/test/workspace", - } - ) - ), - { - "PUDL_OUTPUT": "/test/whatever/from/env/different_output", - "PUDL_INPUT": "/test/whatever/from/env/different_input", - }, - ), - ], -) -def test_get_defaults_in_test_environment_use_env_vars(settings_yaml, env_vars): - workspace = pathlib.Path(env_vars["PUDL_OUTPUT"]).parent - os.environ |= env_vars - - settings = get_defaults(yaml_file=settings_yaml) - - expected_values = { - "pudl_in": f"{workspace}", - "pudl_out": env_vars["PUDL_OUTPUT"], - "data_dir": env_vars["PUDL_INPUT"], - } - - for key, value in expected_values.items(): - assert (key, settings[key]) == (key, value) - - assert os.getenv("PUDL_OUTPUT") == env_vars["PUDL_OUTPUT"] - assert os.getenv("PUDL_INPUT") == env_vars["PUDL_INPUT"] - - -@pytest.mark.parametrize( - "settings_yaml", - [ - None, - StringIO( - yaml.dump( - { - "pudl_in": "/test/workspace", - "pudl_out": "/test/workspace", - } - ) - ), - ], -) -def test_get_defaults_in_test_environment_use_env_vars_tmpdir( - settings_yaml, pudl_output_tmpdir -): - workspace = "/test/whatever/from/env" - os.environ |= { - "PUDL_OUTPUT": f"{workspace}/output", - "PUDL_INPUT": f"{workspace}/data", - } - - settings = get_defaults( - yaml_file=settings_yaml, output_dir=pudl_output_tmpdir / "output" - ) - - expected_values = { - "pudl_in": f"{workspace}", - "pudl_out": f"{pudl_output_tmpdir}/output", - "data_dir": f"{workspace}/data", - } - - for key, value in expected_values.items(): - assert (key, settings[key]) == (key, value) - - assert os.getenv("PUDL_OUTPUT") == f"{pudl_output_tmpdir}/output" - assert os.getenv("PUDL_INPUT") == f"{workspace}/data" - - -def test_get_defaults_in_test_environment_no_env_vars_no_config(): - if os.getenv("PUDL_OUTPUT"): - del os.environ["PUDL_OUTPUT"] - if os.getenv("PUDL_INPUT"): - del os.environ["PUDL_INPUT"] - - with pytest.raises(RuntimeError): - get_defaults(yaml_file=None, default_pudl_yaml=None) - - -def teardown(): - if (old_output := os.getenv("PUDL_OUTPUT_OLD")) is not None: - os.environ["PUDL_OUTPUT"] = old_output - del os.environ["PUDL_OUTPUT_OLD"] - if (old_input := os.getenv("PUDL_INPUT_OLD")) is not None: - os.environ["PUDL_INPUT"] = old_input - del os.environ["PUDL_INPUT_OLD"] diff --git a/test/unit/settings_test.py b/test/unit/settings_test.py index 25e18ad146..9d4eacba6c 100644 --- a/test/unit/settings_test.py +++ b/test/unit/settings_test.py @@ -19,6 +19,7 @@ _convert_settings_to_dagster_config, ) from pudl.workspace.datastore import Datastore +from pudl.workspace.setup import PudlPaths class TestGenericDatasetSettings: @@ -260,9 +261,9 @@ def test_partitions_with_json_normalize(pudl_etl_settings): ) -def test_partitions_for_datasource_table(pudl_settings_fixture, pudl_etl_settings): +def test_partitions_for_datasource_table(pudl_etl_settings): """Test whether or not we can make the datasource table.""" - ds = Datastore(local_cache_path=pudl_settings_fixture["data_dir"]) + ds = Datastore(local_cache_path=PudlPaths().data_dir) datasource = pudl_etl_settings.make_datasources_table(ds) datasets = pudl_etl_settings.get_datasets().keys() if datasource.empty and datasets != 0: diff --git a/test/unit/workspace/datastore_test.py b/test/unit/workspace/datastore_test.py index 38672b44de..df389b6fbf 100644 --- a/test/unit/workspace/datastore_test.py +++ b/test/unit/workspace/datastore_test.py @@ -190,7 +190,7 @@ def __init__( ): """Construct a test-friendly ZenodoFetcher with descriptors pre-loaded.""" super().__init__(**kwargs) - self._descriptor_cache = dict(descriptors) + self._descriptor_cache = descriptors class TestZenodoFetcher(unittest.TestCase): @@ -220,8 +220,8 @@ class TestZenodoFetcher(unittest.TestCase): }, ] } - PROD_EPACEMS_DOI = "10.5281/zenodo.6910058" - PROD_EPACEMS_ZEN_ID = 6910058 # This is the last numeric part of doi + PROD_EPACEMS_DOI = "10.5281/zenodo.8235497" + PROD_EPACEMS_ZEN_ID = 8235497 # This is the last numeric part of doi def setUp(self): """Constructs mockable Zenodo fetcher based on MOCK_EPACEMS_DATAPACKAGE.""" @@ -235,38 +235,40 @@ def setUp(self): } ) - def test_sandbox_doi_format_is_correct(self): - """Verifies that sandbox ZenodoFetcher DOIs have the right format.""" - ds = datastore.ZenodoFetcher(sandbox=True) - self.assertTrue(ds.get_known_datasets()) - for dataset in ds.get_known_datasets(): - print(f"doi for {dataset} is {ds.get_doi(dataset)}") + def test_doi_format_is_correct(self): + """Verifies ZenodoFetcher DOIs have correct format and are not sandbox DOIs. + + Sandbox DOIs are only meant for use in testing and development, and should not + be checked in, thus this test will fail if a sandbox DOI with prefix 10.5072 is + identified. + """ + zf = datastore.ZenodoFetcher() + self.assertTrue(zf.get_known_datasets()) + for dataset, doi in zf.zenodo_dois: self.assertTrue( - re.fullmatch( - r"10\.5072/zenodo\.[0-9]{5,10}", ds.get_doi(dataset) - ), # noqa: FS003 - msg=f"doi for {dataset} is {ds.get_doi(dataset)}", + zf.get_doi(dataset) == doi, + msg=f"Zenodo DOI for {dataset} matches result of get_doi()", + ) + self.assertFalse( + re.fullmatch(r"10\.5072/zenodo\.[0-9]{5,10}", doi), + msg=f"Zenodo sandbox DOI found for {dataset}: {doi}", ) - - def test_prod_doi_format_is_correct(self): - """Verifies that production ZenodoFetcher DOIs have the right format.""" - ds = datastore.ZenodoFetcher(sandbox=False) - self.assertTrue(ds.get_known_datasets()) - for dataset in ds.get_known_datasets(): self.assertTrue( - re.fullmatch( - r"10\.5281/zenodo\.[0-9]{5,10}", ds.get_doi(dataset) - ), # noqa: FS003 - msg=f"doi for {dataset} is {ds.get_doi(dataset)}", + re.fullmatch(r"10\.5281/zenodo\.[0-9]{5,10}", doi), + msg=f"Zenodo production DOI for {dataset} is {doi}", ) def test_get_known_datasets(self): """Call to get_known_datasets() produces the expected results.""" self.assertEqual( - sorted(datastore.ZenodoFetcher.DOI["production"]), + sorted(name for name, doi in datastore.ZenodoFetcher().zenodo_dois), self.fetcher.get_known_datasets(), ) + def test_get_unknown_dataset(self): + """Ensure that we get a failure when attempting to access an invalid dataset.""" + self.assertRaises(AttributeError, self.fetcher.get_doi, "unknown") + def test_doi_of_prod_epacems_matches(self): """Most of the tests assume specific DOI for production epacems dataset. @@ -292,19 +294,6 @@ def test_get_descriptor_http_calls(self): self.assertEqual(self.MOCK_EPACEMS_DATAPACKAGE, desc.datapackage_json) # self.assertTrue(responses.assert_call_count("http://localhost/my/datapackage.json", 1)) - def test_get_resource_key(self): - """Tests normal operation of get_resource_key().""" - self.assertEqual( - PudlResourceKey("epacems", self.PROD_EPACEMS_DOI, "blob.zip"), - self.fetcher.get_resource_key("epacems", "blob.zip"), - ) - - def test_get_resource_key_for_unknown_dataset_fails(self): - """When get_resource_key() is called for unknown dataset it throws KeyError.""" - self.assertRaises( - KeyError, self.fetcher.get_resource_key, "unknown", "blob.zip" - ) - @responses.activate def test_get_resource(self): """Test that get_resource() calls expected http request and returns content.""" diff --git a/test/validate/notebooks/validate_bf_eia923.ipynb b/test/validate/notebooks/validate_bf_eia923.ipynb index b29fa9c83f..750b0e926e 100644 --- a/test/validate/notebooks/validate_bf_eia923.ipynb +++ b/test/validate/notebooks/validate_bf_eia923.ipynb @@ -1,6 +1,7 @@ { "cells": [ { + "attachments": {}, "cell_type": "markdown", "metadata": { "toc-hr-collapsed": true @@ -77,13 +78,13 @@ "metadata": {}, "outputs": [], "source": [ - "pudl_settings = pudl.workspace.setup.get_defaults()\n", - "ferc1_engine = sa.create_engine(pudl_settings['ferc1_db'])\n", - "pudl_engine = sa.create_engine(pudl_settings['pudl_db'])\n", - "pudl_settings" + "from pudl.workspace.setup import PudlPaths\n", + "ferc1_engine = sa.create_engine(PudlPaths().sqlite_db(\"ferc1\"))\n", + "pudl_engine = sa.create_engine(PudlPaths().pudl_db)" ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -102,6 +103,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": { "toc-hr-collapsed": false @@ -129,6 +131,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -145,6 +148,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -161,6 +165,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -177,6 +182,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -193,6 +199,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -209,6 +216,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -226,6 +234,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -253,6 +262,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ diff --git a/test/validate/notebooks/validate_fbp_ferc1.ipynb b/test/validate/notebooks/validate_fbp_ferc1.ipynb index c45422e5e1..fb4fd2920d 100644 --- a/test/validate/notebooks/validate_fbp_ferc1.ipynb +++ b/test/validate/notebooks/validate_fbp_ferc1.ipynb @@ -1,6 +1,7 @@ { "cells": [ { + "attachments": {}, "cell_type": "markdown", "metadata": { "toc-hr-collapsed": true @@ -79,13 +80,13 @@ "metadata": {}, "outputs": [], "source": [ - "pudl_settings = pudl.workspace.setup.get_defaults()\n", - "ferc1_engine = sa.create_engine(pudl_settings['ferc1_db'])\n", - "pudl_engine = sa.create_engine(pudl_settings['pudl_db'])\n", - "pudl_settings" + "from pudl.workspace.setup import PudlPaths\n", + "ferc1_engine = sa.create_engine(PudlPaths().sqlite_db(\"ferc1\"))\n", + "pudl_engine = sa.create_engine(PudlPaths().pudl_db)" ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -129,6 +130,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -157,6 +159,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": { "toc-hr-collapsed": false @@ -169,6 +172,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ diff --git a/test/validate/notebooks/validate_frc_eia923.ipynb b/test/validate/notebooks/validate_frc_eia923.ipynb index 1fef3ce49b..e1834129b7 100644 --- a/test/validate/notebooks/validate_frc_eia923.ipynb +++ b/test/validate/notebooks/validate_frc_eia923.ipynb @@ -1,6 +1,7 @@ { "cells": [ { + "attachments": {}, "cell_type": "markdown", "metadata": { "toc-hr-collapsed": true @@ -77,13 +78,13 @@ "metadata": {}, "outputs": [], "source": [ - "pudl_settings = pudl.workspace.setup.get_defaults()\n", - "ferc1_engine = sa.create_engine(pudl_settings['ferc1_db'])\n", - "pudl_engine = sa.create_engine(pudl_settings['pudl_db'])\n", - "pudl_settings" + "from pudl.workspace.setup import PudlPaths\n", + "ferc1_engine = sa.create_engine(PudlPaths().sqlite_db(\"ferc1\"))\n", + "pudl_engine = sa.create_engine(PudlPaths().pudl_db)" ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -102,6 +103,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": { "toc-hr-collapsed": false @@ -114,6 +116,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -131,6 +134,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -148,6 +152,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -165,6 +170,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -181,6 +187,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -197,6 +204,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -213,6 +221,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -229,6 +238,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -246,6 +256,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -273,6 +284,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ diff --git a/test/validate/notebooks/validate_fuel_ferc1.ipynb b/test/validate/notebooks/validate_fuel_ferc1.ipynb index bee3f3988b..cc86703c20 100644 --- a/test/validate/notebooks/validate_fuel_ferc1.ipynb +++ b/test/validate/notebooks/validate_fuel_ferc1.ipynb @@ -78,9 +78,9 @@ "metadata": {}, "outputs": [], "source": [ - "pudl_settings = pudl.workspace.setup.get_defaults()\n", - "ferc1_engine = sa.create_engine(pudl_settings['ferc1_db'])\n", - "pudl_engine = sa.create_engine(pudl_settings['pudl_db'])\n", + "from pudl.workspace.setup import PudlPaths\n", + "ferc1_engine = sa.create_engine(PudlPaths().sqlite_db("ferc1"))\n", + "pudl_engine = sa.create_engine(PudlPaths().pudl_db)\n", "pudl_settings" ] }, diff --git a/test/validate/notebooks/validate_gens_eia860.ipynb b/test/validate/notebooks/validate_gens_eia860.ipynb index df59d000bc..17b2916e44 100644 --- a/test/validate/notebooks/validate_gens_eia860.ipynb +++ b/test/validate/notebooks/validate_gens_eia860.ipynb @@ -77,9 +77,9 @@ "metadata": {}, "outputs": [], "source": [ - "pudl_settings = pudl.workspace.setup.get_defaults()\n", - "ferc1_engine = sa.create_engine(pudl_settings['ferc1_db'])\n", - "pudl_engine = sa.create_engine(pudl_settings['pudl_db'])" + "from pudl.workspace.setup import PudlPaths\n", + "ferc1_engine = sa.create_engine(PudlPaths().sqlite_db("ferc1"))\n", + "pudl_engine = sa.create_engine(PudlPaths().pudl_db)" ] }, { diff --git a/test/validate/notebooks/validate_gf_eia923.ipynb b/test/validate/notebooks/validate_gf_eia923.ipynb index d94a9abc5d..216d07e8b8 100644 --- a/test/validate/notebooks/validate_gf_eia923.ipynb +++ b/test/validate/notebooks/validate_gf_eia923.ipynb @@ -77,9 +77,9 @@ "metadata": {}, "outputs": [], "source": [ - "pudl_settings = pudl.workspace.setup.get_defaults()\n", - "ferc1_engine = sa.create_engine(pudl_settings['ferc1_db'])\n", - "pudl_engine = sa.create_engine(pudl_settings['pudl_db'])\n", + "from pudl.workspace.setup import PudlPaths\n", + "ferc1_engine = sa.create_engine(PudlPaths().sqlite_db("ferc1"))\n", + "pudl_engine = sa.create_engine(PudlPaths().pudl_db)\n", "pudl_settings" ] }, diff --git a/test/validate/notebooks/validate_mcoe.ipynb b/test/validate/notebooks/validate_mcoe.ipynb index a8bc0c0883..e8c884f558 100644 --- a/test/validate/notebooks/validate_mcoe.ipynb +++ b/test/validate/notebooks/validate_mcoe.ipynb @@ -78,9 +78,9 @@ "metadata": {}, "outputs": [], "source": [ - "pudl_settings = pudl.workspace.setup.get_defaults()\n", - "ferc1_engine = sa.create_engine(pudl_settings['ferc1_db'])\n", - "pudl_engine = sa.create_engine(pudl_settings['pudl_db'])\n", + "from pudl.workspace.setup import PudlPaths\n", + "ferc1_engine = sa.create_engine(PudlPaths().sqlite_db("ferc1"))\n", + "pudl_engine = sa.create_engine(PudlPaths().pudl_db)\n", "pudl_settings" ] }, diff --git a/test/validate/notebooks/validate_plants_steam_ferc1.ipynb b/test/validate/notebooks/validate_plants_steam_ferc1.ipynb index 055769d2ed..e435974dc0 100644 --- a/test/validate/notebooks/validate_plants_steam_ferc1.ipynb +++ b/test/validate/notebooks/validate_plants_steam_ferc1.ipynb @@ -78,9 +78,9 @@ "metadata": {}, "outputs": [], "source": [ - "pudl_settings = pudl.workspace.setup.get_defaults()\n", - "ferc1_engine = sa.create_engine(pudl_settings['ferc1_db'])\n", - "pudl_engine = sa.create_engine(pudl_settings['pudl_db'])\n", + "from pudl.workspace.setup import PudlPaths\n", + "ferc1_engine = sa.create_engine(PudlPaths().sqlite_db("ferc1"))\n", + "pudl_engine = sa.create_engine(PudlPaths().pudl_db)\n", "pudl_settings" ] }, diff --git a/tox.ini b/tox.ini index 8112828de2..5d4b75ada0 100644 --- a/tox.ini +++ b/tox.ini @@ -295,7 +295,7 @@ addopts = --verbose --pdbcls=IPython.terminal.debugger:TerminalPdb log_format = %(asctime)s [%(levelname)8s] %(name)s:%(lineno)s %(message)s log_date_format= %Y-%m-%d %H:%M:%S log_cli = true -log_cli_level = debug +log_cli_level = DEBUG doctest_optionflags = NORMALIZE_WHITESPACE IGNORE_EXCEPTION_DETAIL ELLIPSIS filterwarnings = ignore:distutils Version classes are deprecated:DeprecationWarning