From 6479097d2854fb0eaf1fb8dbfe6197f3ac6fde87 Mon Sep 17 00:00:00 2001 From: Jan Rous Date: Wed, 7 Jun 2023 09:54:49 +0200 Subject: [PATCH 01/51] Drop support for legacy workspace yml files. --- .../work-in-progress/CEMS_by_utility.ipynb | 2 +- notebooks/work-in-progress/explore-CEMS.ipynb | 37 +++- src/pudl/workspace/setup.py | 161 +++-------------- test/conftest.py | 54 ++---- test/unit/pudl_environment_test.py | 164 +----------------- 5 files changed, 74 insertions(+), 344 deletions(-) diff --git a/notebooks/work-in-progress/CEMS_by_utility.ipynb b/notebooks/work-in-progress/CEMS_by_utility.ipynb index d3d4c26aa7..dc5a7b20e4 100644 --- a/notebooks/work-in-progress/CEMS_by_utility.ipynb +++ b/notebooks/work-in-progress/CEMS_by_utility.ipynb @@ -215,7 +215,7 @@ "\n", "# for yr in years:\n", "# print(f'starting calculation for {yr}')\n", - "# epacems_path = (pudl_settings['parquet_dir'] + f'/epacems/year={yr}')\n", + "# epacems_path = (pudl_settings['pudl_out'] + f'/epacems/year={yr}')\n", "# cems_dd = (\n", "# dd.read_parquet(epacems_path, columns=my_cols)\n", "# .assign(state=lambda x: x['state'].astype('string'))\n", diff --git a/notebooks/work-in-progress/explore-CEMS.ipynb b/notebooks/work-in-progress/explore-CEMS.ipynb index 5413664c2f..0cb836fe09 100644 --- a/notebooks/work-in-progress/explore-CEMS.ipynb +++ b/notebooks/work-in-progress/explore-CEMS.ipynb @@ -1,6 +1,7 @@ { "cells": [ { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -8,6 +9,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -15,6 +17,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -33,6 +36,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -107,6 +111,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -142,6 +147,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -150,6 +156,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -157,6 +164,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -175,13 +183,14 @@ "outputs": [], "source": [ "# Locate the data for the given year/s on your hard drive.\n", - "epacems_path = (pudl_settings['parquet_dir'] + f'/epacems/year={CEMS_year}')\n", + "epacems_path = (pudl_settings['pudl_out'] + f'/epacems/year={CEMS_year}')\n", "\n", "# Create a Dask object for preliminary data interaction\n", "cems_dd = dd.read_parquet(epacems_path)" ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -209,6 +218,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -257,6 +267,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -291,6 +302,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -435,6 +447,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -451,7 +464,7 @@ "# multi_year_cems_df = pd.DataFrame()\n", "\n", "# for yr in years:\n", - "# epacems_path = (pudl_settings['parquet_dir'] + f'/epacems/year={yr}')\n", + "# epacems_path = (pudl_settings[''] + f'/epacems/year={yr}')\n", "# cems_dd = (\n", "# dd.read_parquet(epacems_path, columns=my_cols)\n", "# .assign(state=lambda x: x['state'].astype('string'))\n", @@ -475,6 +488,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -507,6 +521,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -515,6 +530,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -522,6 +538,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -531,6 +548,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -538,6 +556,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -568,6 +587,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -593,6 +613,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -662,6 +683,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -671,6 +693,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -678,6 +701,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -704,6 +728,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -725,6 +750,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -744,6 +770,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -797,6 +824,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -806,6 +834,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -813,6 +842,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -869,6 +899,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -889,6 +920,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -926,6 +958,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ diff --git a/src/pudl/workspace/setup.py b/src/pudl/workspace/setup.py index 38e898004e..73294e82be 100644 --- a/src/pudl/workspace/setup.py +++ b/src/pudl/workspace/setup.py @@ -4,9 +4,7 @@ import pathlib import shutil from pathlib import Path -from typing import IO -import yaml from dotenv import load_dotenv import pudl.logging_helpers @@ -17,20 +15,13 @@ def get_defaults( input_dir: str | None = None, output_dir: str | None = None, - yaml_file: IO | None = None, - default_pudl_yaml: Path | None = Path.home() / ".pudl.yml", ) -> dict[str, str]: """Derive PUDL workspace paths from specified input/output directories. - Determines input/output directory locations from YAML, then overrides with - env vars, then overrides with keywords passed in. + Determines input/output directory locations from env variables. Input/output workspace roots can be the same directories. - Note: will update PUDL_OUTPUT and PUDL_INPUT env vars if they are - overridden by kwargs, so that Dagster configurations in child processes see - the updated configs when they read from env vars. - Args: input_dir: equivalent to PUDL_INPUT environment variable, but overrides that value. Derived paths treat the parent directory as the input @@ -38,9 +29,6 @@ def get_defaults( output_dir: equivalent to PUDL_OUTPUT environment variable, but overrides that value. Derived paths treat the parent directory as the output workspace root. - yaml_file: a buffer including the YAML configuration. The `pudl_in` and - `pudl_out` keys within this file correspond to the input/output - workspace roots directly, instead of through parents. Returns: dictionary with a variety of different paths where inputs/outputs are @@ -58,139 +46,38 @@ def get_defaults( os.environ["PUDL_OUTPUT"] = str(Path("~/pudl-work/output").expanduser()) os.environ["PUDL_INPUT"] = str(Path("~/pudl-work/data").expanduser()) - yaml_settings = _munge_legacy_yaml( - yaml_file=yaml_file, default_pudl_yaml=default_pudl_yaml - ) + if input_dir: + os.environ["PUDL_INPUT"] = str(Path(input_dir).expanduser()) + if output_dir: + os.environ["PUDL_OUTPUT"] = str(Path(output_dir).expanduser()) + + for env_var in ["PUDL_INPUT", "PUDL_OUTPUT"]: + if env_var not in os.environ: + raise RuntimeError(f"{env_var} environment variable must be set.") - # read from env vars - env_var_mapping = { - "pudl_in": os.getenv("PUDL_INPUT"), - "pudl_out": os.getenv("PUDL_OUTPUT"), - } - env_settings = { - key: str(Path(value)) - for key, value in env_var_mapping.items() - if value is not None - } - - # read from params - kwarg_mapping = {"pudl_in": input_dir, "pudl_out": output_dir} - kwarg_settings = { - key: str(Path(value)) - for key, value in kwarg_mapping.items() - if value is not None - } - - # Start with an empty settings, then override in order of precedence. - settings: dict[str, str] = {} - for settings_source in [yaml_settings, env_settings, kwarg_settings]: - settings |= settings_source - - if not ("pudl_in" in settings and "pudl_out" in settings): - raise RuntimeError( - "Must set 'PUDL_OUTPUT'/'PUDL_INPUT' environment variables or provide valid yaml config file." - ) - - settings = derive_paths(settings["pudl_in"], settings["pudl_out"]) - - # override env vars so Dagster can see the most up-to-date configs - if output_dir or "PUDL_OUTPUT" not in os.environ: - os.environ["PUDL_OUTPUT"] = settings["pudl_out"] - if input_dir or "PUDL_INPUT" not in os.environ: - os.environ["PUDL_INPUT"] = settings["data_dir"] + settings = derive_paths( + Path(os.getenv("PUDL_INPUT")), + Path(os.getenv("PUDL_OUTPUT")), + ) if "DAGSTER_HOME" not in os.environ: os.environ["DAGSTER_HOME"] = str(Path(settings["pudl_in"]) / "dagster_home") - return settings -def _munge_legacy_yaml( - yaml_file: IO | None, default_pudl_yaml: Path | None -) -> dict[str, str]: - # read from YAML source - if yaml_file is not None: - yaml_settings = yaml.safe_load(yaml_file) - elif default_pudl_yaml and default_pudl_yaml.exists(): - with default_pudl_yaml.open() as f: - yaml_settings = yaml.safe_load(f) - else: - yaml_settings = {} - - # legacy YAML format expects pudl_in/out to point at the parent directory instead - # of the input/output directories directly, so we munge here. - if "pudl_in" in yaml_settings: - yaml_settings["pudl_in"] = f"{yaml_settings['pudl_in']}/data" - if "pudl_out" in yaml_settings: - yaml_settings["pudl_out"] = f"{yaml_settings['pudl_out']}/output" - - return yaml_settings - - -def set_defaults(pudl_in, pudl_out, clobber=False): - """Set default user input and output locations in ``$HOME/.pudl.yml``. - - Create a user settings file for future reference, that defines the default - PUDL input and output directories. If this file already exists, behavior - depends on the clobber parameter, which is False by default. If it's True, - the existing file is replaced. If False, the existing file is not changed. - - Args: - pudl_in (os.PathLike): Path to be used as the default input directory - for PUDL -- this is where :mod:`pudl.workspace.datastore` will look - to find the ``data`` directory, full of data from public agencies. - pudl_out (os.PathLike): Path to the default output directory for PUDL, - where results of data processing will be organized. - clobber (bool): If True and a user settings file exists, overwrite it. - If False, do not alter the existing file. Defaults to False. - - Returns: - None - """ - logger.warning( - "pudl_settings is being deprecated in favor of environment " - "variables PUDL_OUTPUT and PUDL_INPUT. For more info " - "see: https://catalystcoop-pudl.readthedocs.io/en/dev/dev/dev_setup.html" - ) - settings_file = pathlib.Path.home() / ".pudl.yml" - if settings_file.exists(): - if clobber: - logger.info(f"{settings_file} exists: clobbering.") - else: - logger.info(f"{settings_file} exists: not clobbering.") - return - - with settings_file.open(mode="w") as f: - f.write(f"pudl_in: {pudl_in.expanduser().resolve()}\n") - f.write(f"pudl_out: {pudl_out.expanduser().resolve()}\n") - - -def derive_paths(pudl_in, pudl_out): - """Derive PUDL paths based on given input and output paths. - - If no configuration file path is provided, attempt to read in the user - configuration from a file called .pudl.yml in the user's HOME directory. - Presently the only values we expect are pudl_in and pudl_out, directories - that store files that PUDL either depends on that rely on PUDL. +def derive_paths(pudl_in: Path, pudl_out: Path) -> dict[str, str]: + """Derive PUDL paths based on given input and output env variables. Args: - pudl_in (os.PathLike): Path to the directory containing the PUDL input - files, most notably the ``data`` directory which houses the raw - data downloaded from public agencies by the - :mod:`pudl.workspace.datastore` tools. ``pudl_in`` may be the same - directory as ``pudl_out``. - pudl_out (os.PathLike): Path to the directory where PUDL should write - the outputs it generates. These will be organized into directories - according to the output format (sqlite, parquet, etc.). + pudl_in (Path): directory containing PUDL input files, most notably + the ``data`` directory which houses the raw data downloaded from + public agencies by the :mod:`pudl.workspace.datastore` tools. + pudl_out (Path): directory where PUDL should write the outputs it + generates. Returns: dict: A dictionary containing common PUDL settings, derived from those read out of the YAML file. Mostly paths for inputs & outputs. """ - logger.warning( - "pudl_settings is being deprecated in favor of environment variables " - "PUDL_OUTPUT and PUDL_INPUT. For more info " - "see: https://catalystcoop-pudl.readthedocs.io/en/dev/dev/dev_setup.html" - ) pudl_settings = {} # The only "inputs" are the datastore and example settings files: @@ -208,14 +95,6 @@ def derive_paths(pudl_in, pudl_out): # Everything else goes into outputs, generally organized by type of file: pudl_out = pathlib.Path(pudl_out).expanduser().resolve() pudl_settings["pudl_out"] = str(pudl_out) - # One directory per output format: - logger.warning( - "sqlite and parquet directories are no longer being used. Make sure there is a " - "single directory named 'output' at the root of your workspace. For more info " - "see: https://catalystcoop-pudl.readthedocs.io/en/dev/dev/dev_setup.html" - ) - for fmt in ["sqlite", "parquet"]: - pudl_settings[f"{fmt}_dir"] = pudl_settings["pudl_out"] # Mirror dagster env vars for ease of use pudl_settings["PUDL_OUTPUT"] = pudl_settings["pudl_out"] diff --git a/test/conftest.py b/test/conftest.py index 674403f62c..ab31c4fccb 100644 --- a/test/conftest.py +++ b/test/conftest.py @@ -352,47 +352,27 @@ def pudl_tmpdir(tmp_path_factory): return tmpdir -@pytest.fixture(scope="session") -def pudl_output_tmpdir(pudl_tmpdir): - tmpdir = pudl_tmpdir / "output" - tmpdir.mkdir() - return tmpdir - - -@pytest.fixture(scope="session") -def pudl_input_tmpdir(pudl_tmpdir): - tmpdir = pudl_tmpdir / "data" - tmpdir.mkdir() - return tmpdir - - -@pytest.fixture(scope="session") -def pudl_input_output_dirs(request, live_dbs, pudl_input_tmpdir, pudl_output_tmpdir): - """Determine where the PUDL input/output dirs should be.""" - input_override = None - output_override = None - - if os.environ.get("GITHUB_ACTIONS", False): - # hard-code input dir for CI caching - input_override = Path(os.environ["HOME"]) / "pudl-work" / "data" - output_override = Path(os.environ["HOME"]) / "pudl-work" / "output" - elif request.config.getoption("--tmp-data"): - # use tmpdir for inputs if we ask for it - input_override = pudl_input_tmpdir - if not live_dbs: - # use tmpdir for outputs if we haven't passed --live-db - output_override = pudl_output_tmpdir - - return {"input_dir": input_override, "output_dir": output_override} - - @pytest.fixture(scope="session", name="pudl_settings_fixture") -def pudl_settings_dict(request, pudl_input_output_dirs): # noqa: C901 +def pudl_settings_dict(request, pudl_tmpdir): # noqa: C901 """Determine some settings (mostly paths) for the test session.""" logger.info("setting up the pudl_settings_fixture") - pudl_settings = pudl.workspace.setup.get_defaults(**pudl_input_output_dirs) + param_overrides = {} + if os.environ.get("GITHUB_ACTIONS", False): + param_overrides = { + "input_dir": "~/pudl-work/data", + "output_dir": "~/pudl-work/output", + } + else: + if request.config.getoption("--tmp-data"): + in_tmp = pudl_tmpdir / "data" + in_tmp.mkdir() + param_overrides["input_dir"] = in_tmp + if request.config.getoption("--live-dbs"): + out_tmp = pudl_tmpdir / "output" + out_tmp.mkdir() + param_overrides["output_dir"] = out_tmp + pudl_settings = pudl.workspace.setup.get_defaults(**param_overrides) pudl.workspace.setup.init(pudl_settings) - pudl_settings["sandbox"] = request.config.getoption("--sandbox") pretty_settings = json.dumps( diff --git a/test/unit/pudl_environment_test.py b/test/unit/pudl_environment_test.py index 1ea961633c..3bc874e32a 100644 --- a/test/unit/pudl_environment_test.py +++ b/test/unit/pudl_environment_test.py @@ -2,11 +2,8 @@ in a variety of situations.""" import os -import pathlib -from io import StringIO import pytest -import yaml from pudl.workspace.setup import get_defaults @@ -18,165 +15,6 @@ def setup(): os.environ["PUDL_INPUT_OLD"] = old_input -def test_get_defaults_in_test_environment_no_env_vars(): - if os.getenv("PUDL_OUTPUT"): - del os.environ["PUDL_OUTPUT"] - if os.getenv("PUDL_INPUT"): - del os.environ["PUDL_INPUT"] - - workspace = "/test/whatever" - default_settings = { - "pudl_in": workspace, - "pudl_out": workspace, - } - - settings_yaml = StringIO(yaml.dump(default_settings)) - - settings = get_defaults(yaml_file=settings_yaml) - - expected_values = { - "pudl_in": f"{workspace}", - "pudl_out": f"{workspace}/output", - "data_dir": f"{workspace}/data", - } - - for key, value in expected_values.items(): - assert (key, settings[key]) == (key, value) - - assert os.getenv("PUDL_OUTPUT") == f"{default_settings['pudl_out']}/output" - assert os.getenv("PUDL_INPUT") == f"{default_settings['pudl_in']}/data" - - -def test_get_defaults_in_test_environment_no_env_vars_tmpdir(pudl_output_tmpdir): - if os.getenv("PUDL_OUTPUT"): - del os.environ["PUDL_OUTPUT"] - if os.getenv("PUDL_INPUT"): - del os.environ["PUDL_INPUT"] - - workspace = "/test/whatever" - default_settings = { - "pudl_in": workspace, - "pudl_out": workspace, - } - - settings_yaml = StringIO(yaml.dump(default_settings)) - - settings = get_defaults( - yaml_file=settings_yaml, output_dir=pudl_output_tmpdir / "output" - ) - - expected_values = { - "pudl_in": f"{workspace}", - "pudl_out": f"{pudl_output_tmpdir}/output", - "data_dir": f"{workspace}/data", - } - - for key, value in expected_values.items(): - assert (key, settings[key]) == (key, value) - - assert os.getenv("PUDL_OUTPUT") == f"{pudl_output_tmpdir}/output" - assert os.getenv("PUDL_INPUT") == f"{default_settings['pudl_in']}/data" - - -@pytest.mark.parametrize( - ["settings_yaml", "env_vars"], - [ - ( - None, - { - "PUDL_OUTPUT": "/test/whatever/from/env/output", - "PUDL_INPUT": "/test/whatever/from/env/input", - }, - ), - ( - StringIO( - yaml.dump( - { - "pudl_in": "/test/workspace", - "pudl_out": "/test/workspace", - } - ) - ), - { - "PUDL_OUTPUT": "/test/whatever/from/env/output", - "PUDL_INPUT": "/test/whatever/from/env/input", - }, - ), - ( - StringIO( - yaml.dump( - { - "pudl_in": "/test/workspace", - "pudl_out": "/test/workspace", - } - ) - ), - { - "PUDL_OUTPUT": "/test/whatever/from/env/different_output", - "PUDL_INPUT": "/test/whatever/from/env/different_input", - }, - ), - ], -) -def test_get_defaults_in_test_environment_use_env_vars(settings_yaml, env_vars): - workspace = pathlib.Path(env_vars["PUDL_OUTPUT"]).parent - os.environ |= env_vars - - settings = get_defaults(yaml_file=settings_yaml) - - expected_values = { - "pudl_in": f"{workspace}", - "pudl_out": env_vars["PUDL_OUTPUT"], - "data_dir": env_vars["PUDL_INPUT"], - } - - for key, value in expected_values.items(): - assert (key, settings[key]) == (key, value) - - assert os.getenv("PUDL_OUTPUT") == env_vars["PUDL_OUTPUT"] - assert os.getenv("PUDL_INPUT") == env_vars["PUDL_INPUT"] - - -@pytest.mark.parametrize( - "settings_yaml", - [ - None, - StringIO( - yaml.dump( - { - "pudl_in": "/test/workspace", - "pudl_out": "/test/workspace", - } - ) - ), - ], -) -def test_get_defaults_in_test_environment_use_env_vars_tmpdir( - settings_yaml, pudl_output_tmpdir -): - workspace = "/test/whatever/from/env" - os.environ |= { - "PUDL_OUTPUT": f"{workspace}/output", - "PUDL_INPUT": f"{workspace}/data", - } - - settings = get_defaults( - yaml_file=settings_yaml, output_dir=pudl_output_tmpdir / "output" - ) - - expected_values = { - "pudl_in": f"{workspace}", - "pudl_out": f"{pudl_output_tmpdir}/output", - "data_dir": f"{workspace}/data", - } - - for key, value in expected_values.items(): - assert (key, settings[key]) == (key, value) - - assert os.getenv("PUDL_OUTPUT") == f"{pudl_output_tmpdir}/output" - assert os.getenv("PUDL_INPUT") == f"{workspace}/data" - - def test_get_defaults_in_test_environment_no_env_vars_no_config(): if os.getenv("PUDL_OUTPUT"): del os.environ["PUDL_OUTPUT"] @@ -184,7 +22,7 @@ def test_get_defaults_in_test_environment_no_env_vars_no_config(): del os.environ["PUDL_INPUT"] with pytest.raises(RuntimeError): - get_defaults(yaml_file=None, default_pudl_yaml=None) + get_defaults() def teardown(): From 02a2ea519676cfcbb59853b990cbab719ff37944 Mon Sep 17 00:00:00 2001 From: Jan Rous Date: Wed, 7 Jun 2023 10:06:20 +0200 Subject: [PATCH 02/51] Drop support for legacy config (code cleanups). --- src/pudl/convert/censusdp1tract_to_sqlite.py | 6 ++---- test/conftest.py | 14 ++------------ 2 files changed, 4 insertions(+), 16 deletions(-) diff --git a/src/pudl/convert/censusdp1tract_to_sqlite.py b/src/pudl/convert/censusdp1tract_to_sqlite.py index aab9d51d95..61bf1013d1 100644 --- a/src/pudl/convert/censusdp1tract_to_sqlite.py +++ b/src/pudl/convert/censusdp1tract_to_sqlite.py @@ -143,15 +143,13 @@ def main(): # Configure how we want to obtain raw input data: ds_kwargs = dict( - gcs_cache_path=args.gcs_cache_path, sandbox=pudl_settings.get("sandbox", False) + gcs_cache_path=args.gcs_cache_path, + sandbox=args.sandbox, ) if not args.bypass_local_cache: ds_kwargs["local_cache_path"] = Path(pudl_settings["pudl_in"]) / "data" ds = Datastore(**ds_kwargs) - - pudl_settings["sandbox"] = args.sandbox - censusdp1tract_to_sqlite(pudl_settings=pudl_settings, ds=ds, clobber=args.clobber) diff --git a/test/conftest.py b/test/conftest.py index ab31c4fccb..9c93e956cc 100644 --- a/test/conftest.py +++ b/test/conftest.py @@ -87,15 +87,6 @@ def pytest_addoption(parser): ) -@pytest.fixture(scope="session") -def pudl_env(pudl_input_output_dirs): - """Set PUDL_OUTPUT/PUDL_INPUT/DAGSTER_HOME environment variables.""" - pudl.workspace.setup.get_defaults(**pudl_input_output_dirs) - - logger.info(f"PUDL_OUTPUT path: {os.environ['PUDL_OUTPUT']}") - logger.info(f"PUDL_INPUT path: {os.environ['PUDL_INPUT']}") - - @pytest.fixture(scope="session", name="test_dir") def test_directory(): """Return the path to the top-level directory containing the tests.""" @@ -121,7 +112,7 @@ def check_foreign_keys(request): @pytest.fixture(scope="session", name="etl_settings") -def etl_parameters(request, test_dir) -> EtlSettings: +def etl_parameters(request, test_dir, pudl_settings_fixture) -> EtlSettings: """Read the ETL parameters from the test settings or proffered file.""" if request.config.getoption("--etl-settings"): etl_settings_yml = Path(request.config.getoption("--etl-settings")) @@ -194,7 +185,7 @@ def pudl_out_orig(live_dbs, pudl_engine): @pytest.fixture(scope="session") -def ferc_to_sqlite(live_dbs, pudl_datastore_config, etl_settings, pudl_env): +def ferc_to_sqlite(live_dbs, pudl_datastore_config, etl_settings): """Create raw FERC 1 SQLite DBs. If we are using the test database, we initialize it from scratch first. If we're @@ -373,7 +364,6 @@ def pudl_settings_dict(request, pudl_tmpdir): # noqa: C901 param_overrides["output_dir"] = out_tmp pudl_settings = pudl.workspace.setup.get_defaults(**param_overrides) pudl.workspace.setup.init(pudl_settings) - pudl_settings["sandbox"] = request.config.getoption("--sandbox") pretty_settings = json.dumps( {str(k): str(v) for k, v in pudl_settings.items()}, indent=2 From 7a72e8df87923b009d08a9a6b9e1d6fe54cbf5c8 Mon Sep 17 00:00:00 2001 From: Jan Rous Date: Wed, 7 Jun 2023 21:25:18 +0200 Subject: [PATCH 03/51] Refactoring of fixtures wrt simplified get_defaults() --- src/pudl/workspace/setup.py | 73 ++++++++------------- test/conftest.py | 35 ++++++---- test/integration/datasette_metadata_test.py | 2 +- test/integration/epacems_test.py | 2 +- test/integration/etl_test.py | 4 +- test/integration/glue_test.py | 2 +- 6 files changed, 55 insertions(+), 63 deletions(-) diff --git a/src/pudl/workspace/setup.py b/src/pudl/workspace/setup.py index 73294e82be..b6fdc5fb44 100644 --- a/src/pudl/workspace/setup.py +++ b/src/pudl/workspace/setup.py @@ -12,23 +12,27 @@ logger = pudl.logging_helpers.get_logger(__name__) -def get_defaults( +def set_path_overrides( input_dir: str | None = None, output_dir: str | None = None, -) -> dict[str, str]: - """Derive PUDL workspace paths from specified input/output directories. +) -> None: + """Set PUDL_INPUT and/or PUDL_OUTPUT env variables. - Determines input/output directory locations from env variables. + Args: + input_dir: if set, overrides PUDL_INPUT env variable. + output_dir: if set, overrides PUDL_OUTPUT env variable. + """ + if input_dir: + os.environ["PUDL_INPUT"] = input_dir + if output_dir: + os.environ["PUDL_OUTPUT"] = input_dir - Input/output workspace roots can be the same directories. - Args: - input_dir: equivalent to PUDL_INPUT environment variable, but overrides - that value. Derived paths treat the parent directory as the input - workspace root. - output_dir: equivalent to PUDL_OUTPUT environment variable, but - overrides that value. Derived paths treat the parent directory as - the output workspace root. +def get_defaults() -> dict[str, str]: + """Derive PUDL workspace paths from env variables. + + Reads the PUDL_INPUT and PUDL_OUTPUT environment variables, and derives + all relevant paths that will be set in the config dictionary. Returns: dictionary with a variety of different paths where inputs/outputs are @@ -43,46 +47,19 @@ def get_defaults( # # I don't like this any more than you do. if os.getenv("READTHEDOCS"): - os.environ["PUDL_OUTPUT"] = str(Path("~/pudl-work/output").expanduser()) - os.environ["PUDL_INPUT"] = str(Path("~/pudl-work/data").expanduser()) - - if input_dir: - os.environ["PUDL_INPUT"] = str(Path(input_dir).expanduser()) - if output_dir: - os.environ["PUDL_OUTPUT"] = str(Path(output_dir).expanduser()) - + set_path_overrides( + input_dir="~/pudl-work/data", + output_dir="~/pudl-work/output", + ) for env_var in ["PUDL_INPUT", "PUDL_OUTPUT"]: if env_var not in os.environ: raise RuntimeError(f"{env_var} environment variable must be set.") - settings = derive_paths( - Path(os.getenv("PUDL_INPUT")), - Path(os.getenv("PUDL_OUTPUT")), - ) - if "DAGSTER_HOME" not in os.environ: - os.environ["DAGSTER_HOME"] = str(Path(settings["pudl_in"]) / "dagster_home") - return settings - - -def derive_paths(pudl_in: Path, pudl_out: Path) -> dict[str, str]: - """Derive PUDL paths based on given input and output env variables. - - Args: - pudl_in (Path): directory containing PUDL input files, most notably - the ``data`` directory which houses the raw data downloaded from - public agencies by the :mod:`pudl.workspace.datastore` tools. - pudl_out (Path): directory where PUDL should write the outputs it - generates. - - Returns: - dict: A dictionary containing common PUDL settings, derived from those - read out of the YAML file. Mostly paths for inputs & outputs. - """ pudl_settings = {} # The only "inputs" are the datastore and example settings files: # Convert from input string to Path and make it absolute w/ resolve() - pudl_in = pathlib.Path(pudl_in).expanduser().resolve() + pudl_in = pathlib.Path(os.getenv("PUDL_INPUT")).expanduser().resolve() data_dir = pudl_in pudl_workspace_legacy = pudl_in.parent settings_dir = pudl_workspace_legacy / "settings" @@ -93,7 +70,7 @@ def derive_paths(pudl_in: Path, pudl_out: Path) -> dict[str, str]: pudl_settings["settings_dir"] = str(settings_dir) # Everything else goes into outputs, generally organized by type of file: - pudl_out = pathlib.Path(pudl_out).expanduser().resolve() + pudl_out = pathlib.Path(os.getenv("PUDL_OUTPUT")).expanduser().resolve() pudl_settings["pudl_out"] = str(pudl_out) # Mirror dagster env vars for ease of use @@ -155,6 +132,12 @@ def derive_paths(pudl_in: Path, pudl_out: Path) -> dict[str, str]: pudl_settings["censusdp1tract_db"] = "sqlite:///" + str( pathlib.Path(pudl_settings["pudl_out"], "censusdp1tract.sqlite") ) + + if not os.getenv("DAGSTER_HOME"): + os.environ["DAGSTER_HOME"] = str( + Path(pudl_settings["pudl_in"]) / "dagster_home" + ) + return pudl_settings diff --git a/test/conftest.py b/test/conftest.py index 9c93e956cc..670b7ccab8 100644 --- a/test/conftest.py +++ b/test/conftest.py @@ -287,7 +287,7 @@ def ferc1_xbrl_taxonomy_metadata(ferc1_engine_xbrl): @pytest.fixture(scope="session") def pudl_sql_io_manager( - pudl_env, + pudl_path_setup, pudl_settings_fixture, ferc1_engine_dbf, # Implicit dependency ferc1_engine_xbrl, # Implicit dependency @@ -343,26 +343,35 @@ def pudl_tmpdir(tmp_path_factory): return tmpdir -@pytest.fixture(scope="session", name="pudl_settings_fixture") -def pudl_settings_dict(request, pudl_tmpdir): # noqa: C901 - """Determine some settings (mostly paths) for the test session.""" - logger.info("setting up the pudl_settings_fixture") - param_overrides = {} +@pytest.fixture(scope="session") +def pudl_path_setup(request, pudl_tmpdir): + """Sets the necessary env variables for the input and output paths.""" if os.environ.get("GITHUB_ACTIONS", False): - param_overrides = { - "input_dir": "~/pudl-work/data", - "output_dir": "~/pudl-work/output", - } + pudl.workspace.setup.set_path_overrides( + input_dir="~/pudl-work/data", + output_dir="~/pudl-work/output", + ) else: if request.config.getoption("--tmp-data"): in_tmp = pudl_tmpdir / "data" in_tmp.mkdir() - param_overrides["input_dir"] = in_tmp + pudl.workspace.setup.set_path_overrides( + input_dir=str(Path(in_tmp).resolve()), + ) if request.config.getoption("--live-dbs"): out_tmp = pudl_tmpdir / "output" out_tmp.mkdir() - param_overrides["output_dir"] = out_tmp - pudl_settings = pudl.workspace.setup.get_defaults(**param_overrides) + pudl.workspace.setup.set_path_overrides( + output_dir=str(Path(out_tmp).resolve()), + ) + + +@pytest.fixture(scope="session", name="pudl_settings_fixture") +def pudl_settings_dict(pudl_path_setup): # noqa: C901 + """Determine some settings (mostly paths) for the test session.""" + logger.info("setting up the pudl_settings_fixture") + + pudl_settings = pudl.workspace.setup.get_defaults() pudl.workspace.setup.init(pudl_settings) pretty_settings = json.dumps( diff --git a/test/integration/datasette_metadata_test.py b/test/integration/datasette_metadata_test.py index 8fc601982d..6428abae70 100644 --- a/test/integration/datasette_metadata_test.py +++ b/test/integration/datasette_metadata_test.py @@ -13,7 +13,7 @@ logger = logging.getLogger(__name__) -def test_datasette_metadata_to_yml(pudl_env, ferc1_engine_xbrl): +def test_datasette_metadata_to_yml(pudl_path_setup, ferc1_engine_xbrl): """Test the ability to export metadata as YML for use with Datasette.""" pudl_output = Path(os.getenv("PUDL_OUTPUT")) metadata_yml = pudl_output / "metadata.yml" diff --git a/test/integration/epacems_test.py b/test/integration/epacems_test.py index cc88b2309a..5421eb70a1 100644 --- a/test/integration/epacems_test.py +++ b/test/integration/epacems_test.py @@ -16,7 +16,7 @@ def epacems_year_and_state(etl_settings): @pytest.fixture(scope="session") def epacems_parquet_path( - pudl_env, + pudl_path_setup, pudl_engine, # implicit dependency; ensures .parquet files exist ): """Get path to the directory of EPA CEMS .parquet data.""" diff --git a/test/integration/etl_test.py b/test/integration/etl_test.py index 629addfe66..9adf3a8233 100644 --- a/test/integration/etl_test.py +++ b/test/integration/etl_test.py @@ -202,7 +202,7 @@ def test_extract_eia923(self, pudl_datastore_fixture): class TestFerc1ExtractDebugFunctions: """Verify the ferc1 extraction debug functions are working properly.""" - def test_extract_dbf(self, ferc1_engine_dbf, pudl_env): + def test_extract_dbf(self, ferc1_engine_dbf, pudl_path_setup): """Test extract_dbf.""" years = [2020, 2021] # add desired years here configured_dataset_settings = {"ferc1": {"years": years}} @@ -221,7 +221,7 @@ def test_extract_dbf(self, ferc1_engine_dbf, pudl_env): df.report_year < 2022 ).all(), f"Unexpected years found in table: {table_name}" - def test_extract_xbrl(self, ferc1_engine_dbf, pudl_env): + def test_extract_xbrl(self, ferc1_engine_dbf, pudl_path_setup): """Test extract_xbrl.""" years = [2021] # add desired years here configured_dataset_settings = {"ferc1": {"years": years}} diff --git a/test/integration/glue_test.py b/test/integration/glue_test.py index 3282e33d6b..5265db3766 100644 --- a/test/integration/glue_test.py +++ b/test/integration/glue_test.py @@ -56,7 +56,7 @@ def plants_ferc1_raw(dataset_settings_config) -> pd.DataFrame: @pytest.fixture(scope="module") def glue_test_dfs( - pudl_env, + pudl_path_setup, pudl_out, ferc1_engine_xbrl, ferc1_engine_dbf, From 7bc0cbe158eeb58113dc59ac0c75634fcfc1456c Mon Sep 17 00:00:00 2001 From: Jan Rous Date: Wed, 5 Jul 2023 21:22:49 +0200 Subject: [PATCH 04/51] Replace unstructured pudl_settings with PudlPaths. This should greatly simplify the path configuration for pudl ETL. --- ...ERC1-EIA_manual_mapping_spreadsheets.ipynb | 4 +- ...te_FERC1-EIA_manually_mapped_records.ipynb | 4 +- migrations/env.py | 4 +- .../work-in-progress/CEMS_by_utility.ipynb | 17 +- .../work-in-progress/better-heatrates.ipynb | 33 +- .../eia_column_changes_through_time.ipynb | 469 +++++++++--------- notebooks/work-in-progress/explore-CEMS.ipynb | 13 +- .../explore-data-validation.ipynb | 16 +- notebooks/work-in-progress/explore-mcoe.ipynb | 240 ++++----- .../explore-output-tables.ipynb | 15 +- .../explore_fuel_costs_eiaapi.ipynb | 5 +- .../explore_net_generation.ipynb | 5 +- .../work-in-progress/ferc714-output.ipynb | 12 +- .../work-in-progress/jupyterhub-test.ipynb | 8 +- .../make_master_unit_list_eia.ipynb | 5 +- .../work-in-progress/output-sql-ready.ipynb | 10 +- .../work-in-progress/replace_eia_api.ipynb | 2 +- notebooks/work-in-progress/state-demand.ipynb | 16 +- .../test-validation-tests.ipynb | 8 +- .../work-in-progress/transform_xbrl.ipynb | 2 +- src/pudl/analysis/plant_parts_eia.py | 3 +- src/pudl/analysis/service_territory.py | 5 +- src/pudl/analysis/state_demand.py | 5 +- src/pudl/cli/etl.py | 6 +- src/pudl/extract/xbrl.py | 12 +- src/pudl/ferc_to_sqlite/cli.py | 3 - src/pudl/helpers.py | 15 +- src/pudl/io_managers.py | 8 +- src/pudl/metadata/classes.py | 8 +- src/pudl/output/epacems.py | 5 +- src/pudl/settings.py | 22 +- src/pudl/workspace/datastore.py | 31 +- src/pudl/workspace/setup.py | 186 +++---- src/pudl/workspace/setup_cli.py | 14 +- test/conftest.py | 58 +-- test/integration/glue_test.py | 1 - test/unit/helpers_test.py | 9 - test/unit/pudl_environment_test.py | 34 -- test/unit/settings_test.py | 5 +- .../notebooks/validate_bf_eia923.ipynb | 18 +- .../notebooks/validate_fbp_ferc1.ipynb | 12 +- .../notebooks/validate_frc_eia923.ipynb | 20 +- .../notebooks/validate_fuel_ferc1.ipynb | 6 +- .../notebooks/validate_gens_eia860.ipynb | 6 +- .../notebooks/validate_gf_eia923.ipynb | 6 +- test/validate/notebooks/validate_mcoe.ipynb | 6 +- .../validate_plants_steam_ferc1.ipynb | 6 +- 47 files changed, 685 insertions(+), 713 deletions(-) delete mode 100644 test/unit/pudl_environment_test.py diff --git a/devtools/ferc1-eia-glue/training_data/create_FERC1-EIA_manual_mapping_spreadsheets.ipynb b/devtools/ferc1-eia-glue/training_data/create_FERC1-EIA_manual_mapping_spreadsheets.ipynb index a27bbd394d..c45e2e008f 100644 --- a/devtools/ferc1-eia-glue/training_data/create_FERC1-EIA_manual_mapping_spreadsheets.ipynb +++ b/devtools/ferc1-eia-glue/training_data/create_FERC1-EIA_manual_mapping_spreadsheets.ipynb @@ -42,6 +42,7 @@ "\n", "# Local libraries\n", "import pudl\n", + "from pudl.workspace.setup import PudlPaths\n", "from pudl.analysis.ferc1_eia_train import *" ] }, @@ -54,8 +55,7 @@ }, "outputs": [], "source": [ - "pudl_settings = pudl.workspace.setup.get_defaults()\n", - "pudl_engine = sa.create_engine(pudl_settings['pudl_db'])\n", + "pudl_engine = sa.create_engine(PudlPaths().pudl_db)\n", "pudl_out = pudl.output.pudltabl.PudlTabl(pudl_engine, freq='AS', fill_net_gen=True)" ] }, diff --git a/devtools/ferc1-eia-glue/training_data/validate_and_integrate_FERC1-EIA_manually_mapped_records.ipynb b/devtools/ferc1-eia-glue/training_data/validate_and_integrate_FERC1-EIA_manually_mapped_records.ipynb index 171e01512b..22149567ec 100644 --- a/devtools/ferc1-eia-glue/training_data/validate_and_integrate_FERC1-EIA_manually_mapped_records.ipynb +++ b/devtools/ferc1-eia-glue/training_data/validate_and_integrate_FERC1-EIA_manually_mapped_records.ipynb @@ -43,6 +43,7 @@ "\n", "# Local libraries\n", "import pudl\n", + "from pudl.workspace.setup import PudlPaths\n", "from pudl.analysis.ferc1_eia_train import *" ] }, @@ -55,8 +56,7 @@ }, "outputs": [], "source": [ - "pudl_settings = pudl.workspace.setup.get_defaults()\n", - "pudl_engine = sa.create_engine(pudl_settings['pudl_db'])\n", + "pudl_engine = sa.create_engine(PudlPaths().pudl_db)\n", "pudl_out = pudl.output.pudltabl.PudlTabl(pudl_engine, freq='AS', fill_net_gen=True)" ] }, diff --git a/migrations/env.py b/migrations/env.py index 19ec0dfbfd..f99a0b3e29 100644 --- a/migrations/env.py +++ b/migrations/env.py @@ -5,7 +5,7 @@ from sqlalchemy import engine_from_config, pool from pudl.metadata.classes import Package -from pudl.workspace.setup import get_defaults +from pudl.workspace.setup import PudlPaths # this is the Alembic Config object, which provides # access to the values within the .ini file in use. @@ -28,7 +28,7 @@ # my_important_option = config.get_main_option("my_important_option") # ... etc. -db_location = get_defaults()["pudl_db"] +db_location = PudlPaths().pudl_db logger.info(f"alembic config.sqlalchemy.url: {db_location}") config.set_main_option("sqlalchemy.url", db_location) diff --git a/notebooks/work-in-progress/CEMS_by_utility.ipynb b/notebooks/work-in-progress/CEMS_by_utility.ipynb index dc5a7b20e4..c8a085ac32 100644 --- a/notebooks/work-in-progress/CEMS_by_utility.ipynb +++ b/notebooks/work-in-progress/CEMS_by_utility.ipynb @@ -1,6 +1,7 @@ { "cells": [ { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -43,13 +44,12 @@ "metadata": {}, "outputs": [], "source": [ - "pudl_settings = pudl.workspace.setup.get_defaults()\n", - "#display(pudl_settings)\n", + "from pudl.workspace.setup import PudlPaths\n", "\n", - "ferc1_engine = sa.create_engine(pudl_settings['ferc1_db'])\n", - "#display(ferc1_engine)\n", "\n", - "pudl_engine = sa.create_engine(pudl_settings['pudl_db'])\n", + "ferc1_engine = sa.create_engine(PudlPaths().sqlite_db(\"ferc1\"))\n", + "\n", + "pudl_engine = sa.create_engine(PudlPaths().pudl_db())\n", "#display(pudl_engine)\n", "\n", "#pudl_engine.table_names()\n", @@ -57,6 +57,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -76,6 +77,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -160,6 +162,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -215,7 +218,7 @@ "\n", "# for yr in years:\n", "# print(f'starting calculation for {yr}')\n", - "# epacems_path = (pudl_settings['pudl_out'] + f'/epacems/year={yr}')\n", + "# epacems_path = (PudlPaths().output_dir + f'/epacems/year={yr}')\n", "# cems_dd = (\n", "# dd.read_parquet(epacems_path, columns=my_cols)\n", "# .assign(state=lambda x: x['state'].astype('string'))\n", @@ -299,6 +302,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -331,6 +335,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ diff --git a/notebooks/work-in-progress/better-heatrates.ipynb b/notebooks/work-in-progress/better-heatrates.ipynb index bbce86000b..4547c5ba3d 100644 --- a/notebooks/work-in-progress/better-heatrates.ipynb +++ b/notebooks/work-in-progress/better-heatrates.ipynb @@ -1,6 +1,7 @@ { "cells": [ { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -71,6 +72,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -78,6 +80,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -116,6 +119,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -165,6 +169,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -213,6 +218,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -239,6 +245,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -246,6 +253,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -278,6 +286,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": { "tags": [] @@ -311,11 +320,12 @@ } ], "source": [ - "pudl_settings = pudl.workspace.setup.get_defaults()\n", - "display(pudl_settings)\n", "\n", - "ferc1_engine = sa.create_engine(pudl_settings['ferc1_db'])\n", - "pudl_engine = sa.create_engine(pudl_settings['pudl_db'])\n", + "from pudl.workspace.setup import PudlPaths\n", + "\n", + "# TODO(janrous): provide property for accessing ferc db?\n", + "ferc1_engine = sa.create_engine(PudlPaths().sqlite_db(\"ferc1\"))\n", + "pudl_engine = sa.create_engine(PudlPaths().pudl_db)\n", "\n", "API_KEY_EIA = os.environ[\"API_KEY_EIA\"]\n", "\n", @@ -323,6 +333,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -384,6 +395,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -421,6 +433,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -431,6 +444,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -442,6 +456,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -480,6 +495,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": { "tags": [] @@ -548,6 +564,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -569,6 +586,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -584,6 +602,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -597,6 +616,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -608,6 +628,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -618,6 +639,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -705,6 +727,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -713,6 +736,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -728,6 +752,7 @@ "source": [] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ diff --git a/notebooks/work-in-progress/eia_column_changes_through_time.ipynb b/notebooks/work-in-progress/eia_column_changes_through_time.ipynb index 3054a4089e..bed0633cef 100644 --- a/notebooks/work-in-progress/eia_column_changes_through_time.ipynb +++ b/notebooks/work-in-progress/eia_column_changes_through_time.ipynb @@ -1,16 +1,19 @@ { "cells": [ { + "attachments": {}, "cell_type": "markdown", + "metadata": {}, "source": [ "# EIA923 Column Changes\n", "This notebook reimplements the excel extractor process to extract each sheet of each excel file separately. This preserves the original structure for easier comparison." - ], - "metadata": {} + ] }, { "cell_type": "code", "execution_count": 1, + "metadata": {}, + "outputs": [], "source": [ "%load_ext autoreload\n", "%autoreload 2\n", @@ -20,74 +23,67 @@ "import pandas as pd\n", "pd.options.display.max_columns = 150\n", "pd.options.display.max_rows = 150" - ], - "outputs": [], - "metadata": {} + ] }, { "cell_type": "code", "execution_count": 2, - "source": [ - "# make notebooks full width\n", - "from IPython.core.display import display, HTML\n", - "display(HTML(\"\"))" - ], + "metadata": {}, "outputs": [ { - "output_type": "display_data", "data": { - "text/plain": [ - "" - ], "text/html": [ "" + ], + "text/plain": [ + "" ] }, - "metadata": {} + "metadata": {}, + "output_type": "display_data" } ], - "metadata": {} - }, - { - "cell_type": "code", - "execution_count": 3, "source": [ - "pudl_settings = pudl.workspace.setup.get_defaults()" - ], - "outputs": [], - "metadata": {} + "# make notebooks full width\n", + "from IPython.core.display import display, HTML\n", + "display(HTML(\"\"))" + ] }, { "cell_type": "code", "execution_count": 4, + "metadata": {}, + "outputs": [], "source": [ "eia923_tables = pc.PUDL_TABLES['eia923']\n", "eia923_years = list(range(2001, 2020))" - ], - "outputs": [], - "metadata": {} + ] }, { "cell_type": "code", "execution_count": 5, - "source": [ - "ds = pudl.workspace.datastore.Datastore(local_cache_path=Path(pudl_settings[\"data_dir\"]))" - ], + "metadata": {}, "outputs": [], - "metadata": {} + "source": [ + "from pudl.workspace.setup import PudlPaths\n", + "\n", + "ds = pudl.workspace.datastore.Datastore(local_cache_path=PudlPaths().data_dir)" + ] }, { "cell_type": "code", "execution_count": 6, + "metadata": {}, + "outputs": [], "source": [ "eia923_extractor = pudl.extract.eia923.Extractor(ds)" - ], - "outputs": [], - "metadata": {} + ] }, { "cell_type": "code", "execution_count": 7, + "metadata": {}, + "outputs": [], "source": [ "dfs = {}\n", "# Lightly altered extractor code (pudl.extract.excel.GenericExtractor.extract) to avoid concatenating prematurely\n", @@ -110,21 +106,16 @@ " newdata = eia923_extractor.process_raw(newdata, page, **partition)\n", " newdata = eia923_extractor.process_renamed(newdata, page, **partition)\n", " dfs[partition['year']][page] = newdata" - ], - "outputs": [], - "metadata": {} + ] }, { "cell_type": "code", "execution_count": 8, - "source": [ - "for k, v in dfs.items():\n", - " print(k, v.keys())" - ], + "metadata": {}, "outputs": [ { - "output_type": "stream", "name": "stdout", + "output_type": "stream", "text": [ "2001 dict_keys(['generation_fuel', 'stocks'])\n", "2002 dict_keys(['generation_fuel', 'stocks'])\n", @@ -148,11 +139,16 @@ ] } ], - "metadata": {} + "source": [ + "for k, v in dfs.items():\n", + " print(k, v.keys())" + ] }, { "cell_type": "code", "execution_count": 23, + "metadata": {}, + "outputs": [], "source": [ "# make dataframes of columns. One df per excel sheet, one row per year\n", "from collections import defaultdict\n", @@ -163,70 +159,32 @@ " col_dfs[page].append(dfs[year][page].columns.to_frame().rename(columns={0: year}))\n", " except KeyError:\n", " continue" - ], - "outputs": [], - "metadata": {} + ] }, { "cell_type": "code", "execution_count": 24, + "metadata": {}, + "outputs": [], "source": [ "col_dfs = {k : pd.concat(v, axis=1).T for k, v in col_dfs.items()}" - ], - "outputs": [], - "metadata": {} + ] }, { + "attachments": {}, "cell_type": "markdown", + "metadata": {}, "source": [ "This shows the state of the columns for each year for each sheet. When a column is introduced (or disappears), that entry will be NaN. This particular page only has data from 2011 on." - ], - "metadata": {} + ] }, { "cell_type": "code", "execution_count": 35, - "source": [ - "col_dfs['plant_frame']" - ], + "metadata": {}, "outputs": [ { - "output_type": "execute_result", "data": { - "text/plain": [ - " report_year plant_id_eia plant_name_eia plant_state eia_sector \\\n", - "2011 report_year plant_id_eia plant_name_eia plant_state eia_sector \n", - "2012 report_year plant_id_eia plant_name_eia plant_state eia_sector \n", - "2013 report_year plant_id_eia plant_name_eia plant_state eia_sector \n", - "2014 report_year plant_id_eia plant_name_eia plant_state eia_sector \n", - "2015 report_year plant_id_eia plant_name_eia plant_state eia_sector \n", - "2016 report_year plant_id_eia plant_name_eia plant_state eia_sector \n", - "2017 report_year plant_id_eia plant_name_eia plant_state eia_sector \n", - "2018 report_year plant_id_eia plant_name_eia plant_state eia_sector \n", - "2019 report_year plant_id_eia plant_name_eia plant_state eia_sector \n", - "\n", - " sector_name naics_code combined_heat_power reporting_frequency \\\n", - "2011 sector_name naics_code combined_heat_power reporting_frequency \n", - "2012 NaN naics_code combined_heat_power reporting_frequency \n", - "2013 sector_name naics_code combined_heat_power reporting_frequency \n", - "2014 NaN naics_code combined_heat_power reporting_frequency \n", - "2015 NaN naics_code combined_heat_power reporting_frequency \n", - "2016 NaN naics_code combined_heat_power reporting_frequency \n", - "2017 NaN naics_code combined_heat_power reporting_frequency \n", - "2018 NaN naics_code combined_heat_power reporting_frequency \n", - "2019 NaN naics_code combined_heat_power reporting_frequency \n", - "\n", - " nameplate_capacity_mw \n", - "2011 nameplate_capacity_mw \n", - "2012 NaN \n", - "2013 NaN \n", - "2014 NaN \n", - "2015 NaN \n", - "2016 NaN \n", - "2017 NaN \n", - "2018 NaN \n", - "2019 NaN " - ], "text/html": [ "
\n", "\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
statereport_datebulk_agg_fuel_cost_per_mmbtufuel_type_code_pudl
0AK2012-01-010.0coal
1AK2012-02-010.0coal
2AK2012-03-010.0coal
3AK2012-04-010.0coal
4AK2012-05-010.0coal
\n","
"],"text/plain":[" state report_date bulk_agg_fuel_cost_per_mmbtu fuel_type_code_pudl\n","0 AK 2012-01-01 0.0 coal\n","1 AK 2012-02-01 0.0 coal\n","2 AK 2012-03-01 0.0 coal\n","3 AK 2012-04-01 0.0 coal\n","4 AK 2012-05-01 0.0 coal"]},"execution_count":12,"metadata":{},"output_type":"execute_result"}],"source":["bulk_df.head()"]},{"cell_type":"code","execution_count":13,"metadata":{},"outputs":[{"data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
report_datefuel_cost_per_unitstatefuel_type_code_pudl
02022-07-0151.83AKcoal
12022-06-0159.16AKcoal
22022-05-0153.04AKcoal
32022-04-0151.69AKcoal
42022-03-0158.55AKcoal
\n","
"],"text/plain":[" report_date fuel_cost_per_unit state fuel_type_code_pudl\n","0 2022-07-01 51.83 AK coal\n","1 2022-06-01 59.16 AK coal\n","2 2022-05-01 53.04 AK coal\n","3 2022-04-01 51.69 AK coal\n","4 2022-03-01 58.55 AK coal"]},"execution_count":13,"metadata":{},"output_type":"execute_result"}],"source":["api_df.drop(columns=['name', 'series_id', 'units']).head()"]},{"cell_type":"code","execution_count":14,"metadata":{},"outputs":[{"data":{"text/plain":["((18501, 4), (30804, 7))"]},"execution_count":14,"metadata":{},"output_type":"execute_result"}],"source":["bulk_df.shape, api_df.shape"]},{"cell_type":"code","execution_count":15,"metadata":{},"outputs":[{"name":"stdout","output_type":"stream","text":["\n","RangeIndex: 18501 entries, 0 to 18500\n","Data columns (total 4 columns):\n"," # Column Non-Null Count Dtype \n","--- ------ -------------- ----- \n"," 0 state 18501 non-null object \n"," 1 report_date 18501 non-null datetime64[ns]\n"," 2 bulk_agg_fuel_cost_per_mmbtu 18501 non-null float64 \n"," 3 fuel_type_code_pudl 18501 non-null object \n","dtypes: datetime64[ns](1), float64(1), object(2)\n","memory usage: 578.3+ KB\n"]}],"source":["bulk_df.info()"]},{"cell_type":"code","execution_count":16,"metadata":{},"outputs":[{"name":"stdout","output_type":"stream","text":["\n","Int64Index: 30804 entries, 0 to 10499\n","Data columns (total 7 columns):\n"," # Column Non-Null Count Dtype \n","--- ------ -------------- ----- \n"," 0 report_date 30804 non-null datetime64[ns]\n"," 1 fuel_cost_per_unit 18615 non-null float64 \n"," 2 state 30804 non-null object \n"," 3 units 30804 non-null object \n"," 4 series_id 30804 non-null object \n"," 5 name 30804 non-null object \n"," 6 fuel_type_code_pudl 30804 non-null object \n","dtypes: datetime64[ns](1), float64(1), object(5)\n","memory usage: 1.9+ MB\n"]}],"source":["api_df.info()"]},{"cell_type":"code","execution_count":19,"metadata":{},"outputs":[],"source":["keep_cols = ['state', 'report_date',\n","'fuel_cost_per_mmbtu', 'fuel_type_code_pudl', 'fuel_cost_from_eiaapi']"]},{"cell_type":"code","execution_count":20,"metadata":{},"outputs":[],"source":["frc_api = eia.fuel_receipts_costs_eia923(\n"," pudl_engine=pudl_engine,\n"," fill=True\n",")[keep_cols + ['fuel_cost_per_unit']]"]},{"cell_type":"code","execution_count":6,"metadata":{},"outputs":[],"source":["frc_bulk = eia.fuel_receipts_costs_eia923(\n"," pudl_engine=pudl_engine,\n"," fill=True\n",")[keep_cols + ['bulk_agg_fuel_cost_per_mmbtu']]"]},{"cell_type":"code","execution_count":21,"metadata":{},"outputs":[{"data":{"text/plain":["((560374, 6), (560374, 6))"]},"execution_count":21,"metadata":{},"output_type":"execute_result"}],"source":["frc_api.shape, frc_bulk.shape"]},{"cell_type":"code","execution_count":26,"metadata":{},"outputs":[],"source":["mismatch = frc_api['fuel_cost_from_eiaapi'] ^ frc_bulk['fuel_cost_from_eiaapi']"]},{"cell_type":"code","execution_count":27,"metadata":{},"outputs":[{"data":{"text/plain":["sum 0.0\n","mean 0.0\n","Name: fuel_cost_from_eiaapi, dtype: float64"]},"execution_count":27,"metadata":{},"output_type":"execute_result"}],"source":["mismatch.agg(['sum', 'mean'])"]},{"cell_type":"code","execution_count":21,"metadata":{},"outputs":[{"data":{"text/plain":["136530"]},"execution_count":21,"metadata":{},"output_type":"execute_result"}],"source":["frc_api['fuel_cost_from_eiaapi'].sum()"]},{"cell_type":"code","execution_count":28,"metadata":{},"outputs":[{"data":{"text/plain":["136530"]},"execution_count":28,"metadata":{},"output_type":"execute_result"}],"source":["frc_bulk['fuel_cost_from_eiaapi'].sum()"]},{"cell_type":"code","execution_count":30,"metadata":{},"outputs":[],"source":["diff = frc_api['fuel_cost_per_mmbtu'].sub(frc_bulk['fuel_cost_per_mmbtu'])"]},{"cell_type":"code","execution_count":32,"metadata":{},"outputs":[{"data":{"text/plain":["sum 186016.00000\n","mean 0.33195\n","Name: fuel_cost_per_mmbtu, dtype: float64"]},"execution_count":32,"metadata":{},"output_type":"execute_result"}],"source":["diff.ne(0).agg(['sum', 'mean'])"]},{"cell_type":"code","execution_count":35,"metadata":{},"outputs":[{"data":{"text/plain":["sum 133687.000000\n","mean 0.238567\n","Name: fuel_cost_per_mmbtu, dtype: float64"]},"execution_count":35,"metadata":{},"output_type":"execute_result"}],"source":["diff.abs().gt(1e-3).agg(['sum', 'mean'])"]},{"cell_type":"code","execution_count":34,"metadata":{},"outputs":[{"data":{"text/plain":["sum 136530.000000\n","mean 0.243641\n","Name: fuel_cost_from_eiaapi, dtype: float64"]},"execution_count":34,"metadata":{},"output_type":"execute_result"}],"source":["frc_bulk['fuel_cost_from_eiaapi'].agg(['sum', 'mean'])"]},{"cell_type":"code","execution_count":37,"metadata":{},"outputs":[{"data":{"text/plain":[""]},"execution_count":37,"metadata":{},"output_type":"execute_result"},{"data":{"image/png":"iVBORw0KGgoAAAANSUhEUgAAAX0AAAD4CAYAAAAAczaOAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjUuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8qNh9FAAAACXBIWXMAAAsTAAALEwEAmpwYAAAVcElEQVR4nO3db4xc53me8euObCusY9ZSZW1pkihllHIjibEdbVgVRouNlURMZJhCURU0VItqVbAQFFcGWMRk8qHoBwJEW6e12lgtEbuiUDUqkVgVEVtJZDbTIoD+mHLlMBStirUYhSEjxm5Ta21A0apPP8yRONwdzs5Sy5kdnusHDObMc86Zeefl8J533jlzNlWFJKkdfmjcDZAkjY6hL0ktYuhLUosY+pLUIoa+JLXIO8bdgMVcddVVtWHDhnNq3//+93n3u989ngZNAPtnMPtnMPtnsEnpn2efffY7VfW++fUVH/obNmzg8OHD59Q6nQ4zMzPjadAEsH8Gs38Gs38Gm5T+SfKH/epO70hSixj6ktQihr4ktciioZ/kg0me67l8L8lnklyZ5IkkLzbXV/TsszvJ8SQvJLmlp35jkiPNuvuT5GI9MUnSQouGflW9UFUfrqoPAzcCPwAeBXYBh6pqI3CouU2S64BtwPXAFuALSS5r7u4BYAewsblsWdZnI0kaaKnTOzcD/6uq/hDYCuxv6vuB25rlrcAjVfVaVb0EHAc2J1kDrK6qJ6t7lreHevaRJI3AUg/Z3Ab8WrM8VVWnAarqdJKrm/pa4KmefU42tdeb5fn1BZLsoPuJgKmpKTqdzjnrZ2dnF9R0lv0zmP0zmP0z2KT3z9Chn+RdwCeA3Ytt2qdWA+oLi1X7gH0A09PTNf+Y2Ek5TnZc7J/B7J/B7J/BJr1/ljK987PAN6rqleb2K82UDc31maZ+Eljfs9864FRTX9enLkkakaWE/ic5O7UDcBDY3ixvBx7rqW9LcnmSa+h+YftMMxX0apKbmqN27uzZR5poG3Z95a2LtJINNb2T5C8APw38o57yXuBAkruBl4HbAarqaJIDwPPAHHBvVb3R7HMP8CCwCni8uUiSRmSo0K+qHwB/aV7tu3SP5um3/R5gT5/6YeCGpTdTkrQc/EWuJLXIij/LprRSOX+vSeRIX5JaxJG+tMx6PwGc2HvrGFsiLeRIX5JaxNCXpBYx9CWpRQx9SWoRQ1+SWsTQl6QWMfQlqUUMfUlqEUNfklrE0JekFjH0JalFDH1JahFDX5JaxNCXpBYx9CWpRQx9SWoRQ1+SWmSo0E/y3iS/nuRbSY4l+RtJrkzyRJIXm+srerbfneR4kheS3NJTvzHJkWbd/UlyMZ6UJKm/YUf6nwd+q6r+GvAh4BiwCzhUVRuBQ81tklwHbAOuB7YAX0hyWXM/DwA7gI3NZcsyPQ9J0hAWDf0kq4G/BXwRoKr+vKr+DNgK7G822w/c1ixvBR6pqteq6iXgOLA5yRpgdVU9WVUFPNSzjyRpBIb5w+gfAP4U+A9JPgQ8C9wHTFXVaYCqOp3k6mb7tcBTPfufbGqvN8vz6wsk2UH3EwFTU1N0Op1z1s/Ozi6o6Sz7Z7Dl6p+dm+YW3WYS/x18/Qw26f0zTOi/A/hx4NNV9XSSz9NM5ZxHv3n6GlBfWKzaB+wDmJ6erpmZmXPWdzod5td0lv0z2HL1z127vrLoNifuePuPM2q+fgab9P4ZZk7/JHCyqp5ubv863TeBV5opG5rrMz3br+/Zfx1wqqmv61OXJI3IoqFfVX8C/FGSDzalm4HngYPA9qa2HXisWT4IbEtyeZJr6H5h+0wzFfRqkpuao3bu7NlHuiRt2PWVty7SSjDM9A7Ap4GHk7wL+Dbw9+m+YRxIcjfwMnA7QFUdTXKA7hvDHHBvVb3R3M89wIPAKuDx5iJJGpGhQr+qngOm+6y6+Tzb7wH29KkfBm5YQvukS0bvaP/E3lvH2BK1mb/IlaQWMfQlqUUMfUlqEUNfklrE0JekFhn2kE1J4PH2mniO9CWpRQx9SWoRQ1+SWsTQl6QWMfQlqUUMfUlqEUNfklrE4/SlMfCMmxoXR/qS1CKO9KVF+CtcXUoc6UtSixj6ktQihr4ktYihL0ktYuhLUosMFfpJTiQ5kuS5JIeb2pVJnkjyYnN9Rc/2u5McT/JCklt66jc293M8yf1JsvxPSZJ0Pks5ZPMnq+o7Pbd3AYeqam+SXc3tzya5DtgGXA+8H/hakmur6g3gAWAH8BTwVWAL8PgyPA9pWXmYpi5Vb2d6Zyuwv1neD9zWU3+kql6rqpeA48DmJGuA1VX1ZFUV8FDPPpKkERh2pF/A7yQp4N9X1T5gqqpOA1TV6SRXN9uupTuSf9PJpvZ6szy/vkCSHXQ/ETA1NUWn0zln/ezs7IKazrJ/Bhumf3ZumhtNY2DF/Vv5+hls0vtn2ND/aFWdaoL9iSTfGrBtv3n6GlBfWOy+qewDmJ6erpmZmXPWdzod5td0lv0z2DD9c9cIp3dO3DEzsscahq+fwSa9f4aa3qmqU831GeBRYDPwSjNlQ3N9ptn8JLC+Z/d1wKmmvq5PXZI0IouGfpJ3J3nPm8vAzwB/ABwEtjebbQcea5YPAtuSXJ7kGmAj8EwzFfRqkpuao3bu7NlHkjQCw0zvTAGPNkdXvgP4T1X1W0m+DhxIcjfwMnA7QFUdTXIAeB6YA+5tjtwBuAd4EFhF96gdj9yRpBFaNPSr6tvAh/rUvwvcfJ599gB7+tQPAzcsvZnSxedhmmoDf5ErSS3i+fSlMfOvaGmUHOlLUosY+pLUIoa+JLWIoS9JLeIXudIK4pe6utgMfekS4puGFuP0jiS1iKEvSS1i6EtSixj6ktQihr4ktYihL0ktYuhLUosY+pLUIoa+JLWIv8iVJpC/vNWFMvTVaiv5TyQa7LoYDH1pwq3kNy6tPM7pS1KLDB36SS5L8j+S/GZz+8okTyR5sbm+omfb3UmOJ3khyS099RuTHGnW3Z8ky/t0JEmDLGV65z7gGLC6ub0LOFRVe5Psam5/Nsl1wDbgeuD9wNeSXFtVbwAPADuAp4CvAluAx5flmUiXMKdwtFyGGuknWQfcCvxqT3krsL9Z3g/c1lN/pKpeq6qXgOPA5iRrgNVV9WRVFfBQzz6SpBEYdqT/r4FfAN7TU5uqqtMAVXU6ydVNfS3dkfybTja115vl+fUFkuyg+4mAqakpOp3OOetnZ2cX1HSW/TNYb//s3DQ33sZcRBf6GvD1M9ik98+ioZ/k48CZqno2ycwQ99lvnr4G1BcWq/YB+wCmp6drZubch+10Osyv6Sz7Z7De/rnrEp42OXHHzAXt5+tnsEnvn2FG+h8FPpHk54AfBlYn+Y/AK0nWNKP8NcCZZvuTwPqe/dcBp5r6uj51SdKILDqnX1W7q2pdVW2g+wXtf62qvwccBLY3m20HHmuWDwLbklye5BpgI/BMMxX0apKbmqN27uzZR5I0Am/nx1l7gQNJ7gZeBm4HqKqjSQ4AzwNzwL3NkTsA9wAPAqvoHrXjkTuSNEJLCv2q6gCdZvm7wM3n2W4PsKdP/TBww1IbKUlaHv4iV5JaxNCXpBYx9CWpRQx9SWoRQ1+SWsTQl6QWMfQlqUUMfUlqEf9colrnyB//30v6RGvSIIa+dInyD6urH6d3JKlFDH1JahFDX5JaxNCXpBYx9CWpRQx9SWoRQ1+SWsTQl6QWMfQlqUUMfUlqEUNfklpk0dBP8sNJnknyzSRHk/yzpn5lkieSvNhcX9Gzz+4kx5O8kOSWnvqNSY406+5PkovztCRJ/QxzwrXXgI9V1WySdwK/l+Rx4G8Dh6pqb5JdwC7gs0muA7YB1wPvB76W5NqqegN4ANgBPAV8FdgCPL7sz0qap/fkYzs3jbEh0pgtOtKvrtnm5jubSwFbgf1NfT9wW7O8FXikql6rqpeA48DmJGuA1VX1ZFUV8FDPPpKkERjq1MpJLgOeBf4q8CtV9XSSqao6DVBVp5Nc3Wy+lu5I/k0nm9rrzfL8er/H20H3EwFTU1N0Op1z1s/Ozi6o6Sz7Z6Gdm+beWp5ade7tNljK68HXz2CT3j9DhX4zNfPhJO8FHk1yw4DN+83T14B6v8fbB+wDmJ6erpmZmXPWdzod5td0lv2z0F3nTO/M8bkj7fpTEifumBl6W18/g016/yzp6J2q+jOgQ3cu/pVmyobm+kyz2Ulgfc9u64BTTX1dn7okaUSGOXrnfc0InySrgJ8CvgUcBLY3m20HHmuWDwLbklye5BpgI/BMMxX0apKbmqN27uzZR5I0AsN8xl0D7G/m9X8IOFBVv5nkSeBAkruBl4HbAarqaJIDwPPAHHBvMz0EcA/wILCK7lE7HrkjSSO0aOhX1e8DH+lT/y5w83n22QPs6VM/DAz6PkCSdBH5i1xJahFDX5JapF3HralVen+FK6nLkb4ktYihL0ktYuhLUosY+pLUIoa+JLWIoS9JLWLoS1KLGPqS1CKGviS1iL/I1SXFX+FKgznSl6QWcaQvtUDvJ6ATe28dY0s0bo70JalFHOlLLeOov90c6UtSixj6ktQihr4ktYihL0ktsmjoJ1mf5HeTHEtyNMl9Tf3KJE8kebG5vqJnn91Jjid5IcktPfUbkxxp1t2fJBfnaUmS+hlmpD8H7KyqHwVuAu5Nch2wCzhUVRuBQ81tmnXbgOuBLcAXklzW3NcDwA5gY3PZsozPRZK0iEVDv6pOV9U3muVXgWPAWmArsL/ZbD9wW7O8FXikql6rqpeA48DmJGuA1VX1ZFUV8FDPPpKkEVjScfpJNgAfAZ4GpqrqNHTfGJJc3Wy2FniqZ7eTTe31Znl+vd/j7KD7iYCpqSk6nc4562dnZxfUdFab+2fnprlFt5laNdx2bdDvddLm188wJr1/hg79JD8C/Abwmar63oDp+H4rakB9YbFqH7APYHp6umZmZs5Z3+l0mF/TWW3un7uGOOHazk1zfO6Iv0sEOHHHzIJam18/w5j0/hnq6J0k76Qb+A9X1Zeb8ivNlA3N9ZmmfhJY37P7OuBUU1/Xpy5JGpFhjt4J8EXgWFX9cs+qg8D2Znk78FhPfVuSy5NcQ/cL22eaqaBXk9zU3OedPftIkkZgmM+4HwU+BRxJ8lxT+0VgL3Agyd3Ay8DtAFV1NMkB4Hm6R/7cW1VvNPvdAzwIrAIeby7S2+I59KXhLRr6VfV79J+PB7j5PPvsAfb0qR8GblhKAyVJy8df5EpSixj6ktQiHremieQ8vnRhHOlLUosY+pLUIoa+JLWIc/pSi/n3ctvHkb4ktYgjfUkD+Wng0mLoS1rAQ2IvXYa+JoZBJL19hr6koTnVM/n8IleSWsTQl6QWMfQlqUUMfUlqEb/I1YrmETuj82Zf79w0h9Fw6XKkL0ktYuhLUosY+pLUIoa+JLXIot/WJPkS8HHgTFXd0NSuBP4zsAE4Afzdqvo/zbrdwN3AG8A/rqrfbuo3Ag8Cq4CvAvdVVS3v09Gk84tb6eIaZqT/ILBlXm0XcKiqNgKHmtskuQ7YBlzf7POFJJc1+zwA7AA2Npf59ylJusgWDf2q+u/A/55X3grsb5b3A7f11B+pqteq6iXgOLA5yRpgdVU92YzuH+rZR5I0Ihd6MO5UVZ0GqKrTSa5u6muBp3q2O9nUXm+W59f7SrKD7qcCpqam6HQ656yfnZ1dUNNZk9w/3WPEL66pVaN5nEk1bP/8m4cfe2t509q/eDGbtKJM8v8vWP5fYKRPrQbU+6qqfcA+gOnp6ZqZmTlnfafTYX5NZ01y/9w1gjn9nZvm+NwRf3x0PhfSPyfumLk4jVmBJvn/F1x46L+SZE0zyl8DnGnqJ4H1PdutA0419XV96pJf3kojdKGHbB4EtjfL24HHeurbklye5Bq6X9g+00wFvZrkpiQB7uzZR5I0IsMcsvlrwAxwVZKTwD8F9gIHktwNvAzcDlBVR5McAJ4H5oB7q+qN5q7u4ewhm483F0nSCC0a+lX1yfOsuvk82+8B9vSpHwZuWFLrJEnLyl/kSlKLGPqS1CKGviS1iAcrayw8TFMaD0Nf0tvW+yZ+Yu+tY2yJFuP0jiS1iKEvSS1i6EtSixj6ktQifpGrkfGIHWn8DH1dVAa9tLIY+lp2Br20cjmnL0kt4khf0rLyh1ormyN9SWoRR/paFs7jS5PB0NcFM+i1GKd6Vh5DX4sy3KVLh3P6ktQijvQljYRTPSuDoa++nNLRxeQbwPiMPPSTbAE+D1wG/GpV7R11G9SfQa+V5Hyvx+V8k2jjm89IQz/JZcCvAD8NnAS+nuRgVT0/ynZcas73wjXENQmW+joddvu2hPhSjXqkvxk4XlXfBkjyCLAVWNGhP8yIY5gX4qDtz7duqY+xYddX2LlpjrsMfLXcxXozWez/10p/s0lVje7Bkr8DbKmqf9jc/hTw16vq5+dttwPY0dz8IPDCvLu6CvjORW7uJLN/BrN/BrN/BpuU/vkrVfW++cVRj/TTp7bgXaeq9gH7znsnyeGqml7Ohl1K7J/B7J/B7J/BJr1/Rn2c/klgfc/tdcCpEbdBklpr1KH/dWBjkmuSvAvYBhwccRskqbVGOr1TVXNJfh74bbqHbH6pqo5ewF2dd+pHgP2zGPtnMPtnsInun5F+kStJGi/PvSNJLWLoS1KLTGzoJ/l0kheSHE3yz8fdnpUoyT9JUkmuGndbVpok/yLJt5L8fpJHk7x33G1aCZJsaf5fHU+ya9ztWUmSrE/yu0mONblz37jbdCEmMvST/CTdX/L+WFVdD/zLMTdpxUmynu7pLl4ed1tWqCeAG6rqx4D/Cewec3vGruc0KT8LXAd8Msl1423VijIH7KyqHwVuAu6dxP6ZyNAH7gH2VtVrAFV1ZsztWYn+FfAL9Pnxm6Cqfqeq5pqbT9H9zUjbvXWalKr6c+DN06QIqKrTVfWNZvlV4BiwdrytWrpJDf1rgb+Z5Okk/y3JT4y7QStJkk8Af1xV3xx3WybEPwAeH3cjVoC1wB/13D7JBIbaKCTZAHwEeHrMTVmyFXs+/SRfA/5yn1W/RLfdV9D9iPUTwIEkH6gWHX+6SP/8IvAzo23RyjOoj6rqsWabX6L7sf3hUbZthRrqNCltl+RHgN8APlNV3xt3e5ZqxYZ+Vf3U+dYluQf4chPyzyT5f3RPgvSno2rfuJ2vf5JsAq4BvpkEutMW30iyuar+ZIRNHLtBryGAJNuBjwM3t2nAMICnSVlEknfSDfyHq+rL427PhZjU6Z3/AnwMIMm1wLuYjLPeXXRVdaSqrq6qDVW1ge5/5B9vW+AvpvljPp8FPlFVPxh3e1YIT5MyQLqjqC8Cx6rql8fdngs1qaH/JeADSf6A7pdN2x2paYn+LfAe4IkkzyX5d+Nu0Lg1X2y/eZqUY8CBCzxNyqXqo8CngI81r5nnkvzcuBu1VJ6GQZJaZFJH+pKkC2DoS1KLGPqS1CKGviS1iKEvSS1i6EtSixj6ktQi/x8iTBQ1MrleqwAAAABJRU5ErkJggg==","text/plain":["
"]},"metadata":{"needs_background":"light"},"output_type":"display_data"}],"source":["diff.abs().replace(np.inf, np.nan).replace(0, np.nan).transform(np.log10).hist(bins=100)"]},{"cell_type":"code","execution_count":40,"metadata":{},"outputs":[],"source":["import matplotlib.pyplot as plt"]},{"cell_type":"code","execution_count":null,"metadata":{},"outputs":[{"data":{"text/plain":["state True\n","report_date True\n","fuel_type_code_pudl True\n","fuel_cost_from_eiaapi True\n","dtype: bool"]},"metadata":{},"output_type":"display_data"}],"source":["# data is all in same order\n","frc_api.drop(columns=['fuel_cost_per_mmbtu', 'fuel_cost_per_unit']).eq(\n"," frc_bulk.drop(columns=['fuel_cost_per_mmbtu', 'bulk_agg_fuel_cost_per_mmbtu'])\n",").all()"]},{"cell_type":"code","execution_count":42,"metadata":{},"outputs":[{"data":{"text/plain":[""]},"execution_count":42,"metadata":{},"output_type":"execute_result"},{"data":{"image/png":"iVBORw0KGgoAAAANSUhEUgAAAXIAAAD4CAYAAADxeG0DAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjUuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8qNh9FAAAACXBIWXMAAAsTAAALEwEAmpwYAAApiElEQVR4nO3deXhc133e8e+ZFYPBDPaN2LmTkriI0GJLsrVZki3ZjBQvsrPVdqLGjRM3TZtYdZs0TZ3YjZ88ceukCqv4sRWrdtNY8iLJ1i5LskVJpERRFDdxJwiS2JfZl3v6BxaDJAguGGJmgPfzPHyIubg49wdg8M6Zc8+9x1hrERGR4uXKdwEiIjI7CnIRkSKnIBcRKXIKchGRIqcgFxEpcp58HLSmpsa2t7fn49AiIkVr69atfdba2tO35yXI29vb2bJlSz4OLSJStIwxh6fbrqEVEZEipyAXESlyCnIRkSKnIBcRKXIKchGRIqcgFxEpcgpyEZEipyAXEZkjjmOJJjM4Tm5vH64gFxGZI/F0lq7BGPF0NqftKshFRC6hqb3wgNdNc2UpAa87p8dQkIuIXEJTe+EulyHo9+BymZweQ0EuIpJjc9ELn0pBLiKSY3PRC59KQS4ikmNz0QufKi+3sRURmc8meuFzdrw5O5KIiFwSCnIRkSKnIBcRKXI5GcQxxhwCRoEskLHWduaiXRERObdcjsbfZK3ty2F7IiIFwXEs8XSWgNd9SacRXiwNrYiInMOlukdKruQqyC3wlDFmqzHmvhy1KSJSEOZ6XviFytXQynXW2m5jTB3wtDFmt7X2xak7jAf8fQCtra05OqyISG5NN4wy1/PCL1ROeuTW2u7x/3uAR4Grp9lnk7W201rbWVtbm4vDiojkzMT9UaLJTEEPo0xn1kFujAkaY0ITHwO3ATtm266IyFxxHEtfJMmR/hhAQQ+jTCcX7xXqgUeNMRPt/R9r7U9z0K6IyCU1MYziOJb+aJLqMv8lv8HVpTDrILfWHgDW5qAWEZE5M9EL748maa4opbUqWLDTC8+lcEfvRUQuoXg6S38kVbS98KkU5CKyIDiOZTSRJpHOUh30E/C6aa0uLdpe+FS6IEhE5r2JYZQd3cNsPtDPQCw1Jws+zBX1yEVk3jr9ZObimiBLa8uoKvXlu7ScUpCLyLyUyTh0DcWIJrO0VhX3ycxz0dCKiMxLA7EUO4+PEPS7Cfo982YYZTrqkYvIvDH18vqqUh+dbVVUlfrmbYBPUI9cRIpeJuPQM5JgNJ6evLze43FRFy7B45n/MTf/v0MRmdccx9I1FGPL4QHimWzRXV6fCxpaEZGiNHGTq1gqw2giw+rGMDVB/4LogZ9OQS4iRSWVytI9Eqc84OVwfwzHWporS6kp88/7sfCzWXgvXSJS1I4Nx/jJ9uMMRJMsqS1jWV1oQYc4qEcuIkVgYhgFIBzwsnJRmKqgn1DAm+fKCoOCXEQKXjSZYcexYfxeF8vqQlzdUb3gTmjOREEuIgVr6iX2fq+LporAvL6w52IpyEWkoKRSWQ4PRgCoKSuhZzTJovIAKxrC8/YS+9lSkItIwXAcy76+CI9tOwYu+NiGlsl54Qrws8tZkBtj3MAW4Ji19q5ctSsiC0c8nSXrWO64ooGAz01TeSk+n8bCzyWXPfIvALuAcA7bFJEFYGJueENZCYtry9QDv0A5mUdujGkG7gQezEV7IjL/pVJZDvSOMjiapGs4xrO7ejgRSehk5kXI1QVBfwv8MeCcbQdjzH3GmC3GmC29vb05OqyIFKvDgxG+u/kwrx/pp7LExy2r6lgUDuS7rKI06yA3xtwF9Fhrt860n7V2k7W201rbWVtbO9vDikiRmri4x7EWr8fQEC6hPOijvaZM4+EXKRdj5NcBHzHGfAgoAcLGmO9Ya389B22LyDwx9SZXA9E0DeEAH7+qjUXhgIZSZmnWQW6tvR+4H8AYcyPw7xXiIjJhIsCjqQzdQ3Gw0FxVSnmpj8oyf77Lmxc0j1xELplMxuHoYJRjgzFKPF5aqgOU+ub3smv5kNMgt9a+ALyQyzZFpPg4jmU0MbZaz87uYarK/CytD1AbKlGAXwLqkYtIzkwE+EA0SddQnJ7hJJctKqepspRQiVchfokoyEUkJ1KpLPv6IvSNJuiPpljTXM7yuhDVC3TVnrmkIBeRWYvF0jy79yTv9oxy68p6VjSEFeBzSD9lEZkVx7G8cWyIn759nDK/h5bqIPXlAYX4HFKPXEQuSiqV5chQFGstfh/ce00bKxtClAd8+S5twVGQi8gFm7jd7JM7jlMV9PL+FfW0tgR1MjNPFOQicl4yGYeBWIqqUh/JrEPWsdy6up6qUh/1ujozrxTkInJOmYzD3pMj7DoxwvVLa6kN6XazhURBLiIzSiQybDk6yJGBCJWlfkrGwzvoV3wUCv0mRGRasViat48P0RdNsuXgILesqueK5gpCJd58lyanUZCLyCkmrs589WA//++1w9SF/dy0qpHO1ipKShQZhUi/FRGZFImm+PmBXvpiCcgafrWzhbaaIEuqQ7pXeAFTkIsIjmPpGYrxf7ce5aW9PXTUBrn3qnbWtlTqwp4ioCAXWcAyGYfDg6NsPzrEvpOjvLS3j5tW1nHbFQ0sqylXiBcJBbnIApXJOOzoHuSfXjnIruMjbGit4Dfe28HtqxoIa8GHoqIgF1mAEokMrx7u55V3e0gk0mxc38ytq+tpqwypF16EFOQiC0gkmuK1I/0MRBO8dmCI2pCPT1y7mKvbqjUjpYjN+jdnjCkBXgT84+39i7X2z2bbrojkztBogp/sPMbe4yO8un+ApqpS7lzXzIbWShrLS9ULL3K5eAlOAjdbayPGGC/wsjHmJ9bazTloW0RmqWcoypcf38GLe/pwu2F9WzUfv7qFGxbXqxc+T8z6t2ittUBk/KF3/J+dbbsiMjuxWJqfHz7JP79ylJ/tHaAx7OHDVzaxcV0zHTVh9cLnkZy8HBtj3MBWYCnwd9baV6fZ5z7gPoDW1tZcHFZEzmIkkuTbr+zn+28cZSiaYUN7mM9c38ENSxvUC5+HcvIbtdZmgXXGmArgUWPM5dbaHaftswnYBNDZ2akeu8glkEpl2dM7xGPbjvPU9qP4XW5+ZUMzn35vBy3VId2pcJ7K6UuztXbIGPMCcAew4xy7i0gOjUSSfHfrIZ58+wTD8RRXdtRy+2WNXL+kjtJS3ehqPsvFrJVaID0e4gHgVuCrs65MRM7Lod4h/v65vQyPJthxfJRkFj5yZQu/874l1IdL1QtfAHLRI28Evj0+Tu4C/tla+1gO2hWRGaRSWX5x4Dj/5Yc7ODSYBeDO1VW8f2UDt61qpCJUkucKZa7kYtbKdmB9DmoRkfO0+3gf/+0HO9nRNcpQdqwH9cHlYf7zh6+gobIs3+XJHNPpa5Eiksk4vN3Vzx/8ny0cHXEAWNsY4FPXtXDnqjbKglrBfiFSkIsUib7hGN/ZfIDHt3dzdMShBPjIlVX88e1rqSkvzXd5kkcKcpECNxJJ8vg7R/nB64fZ1pXA74YbFpfzm9e3ccPiRs0LFwW5SCHrGYry5Z/s4Km3+4g70Bx28fGr2/jNaxbrZKZMUpCLFKCh0QSPvd3FT7d38cqhKF7gfe1BPnfbSjY012rZNTmFglykwAyNJvjLJ7bz4zd7iQONIcOvrm/mt29Yrl64TEtBLlIgugdG+ebL+zjUG+PV/UNkgRuXhvntG5bS2VarsXA5Kz0zRArAvpMDfO6hrbzbn8IF3LK8nKuW1HHPumbNSJFzUpCL5NFIJMmPdhzhmy8c4MBQBoCbl5XzF3ev04U9ct4U5CJ54DiWt4/18l8f3cFb3XEyQEelm09c08K9G5ZoLFwuiIJcZI7tPt7HX/14J+8cG6UvOXZ5/fXtIf7yo2tprSnPd3lShBTkInMkk3F4ds9h/v3DOxkdu7qejnLDPVe18WtXL6YqHMhvgVK0FOQic2Db0RP8yXffZM+AM7ntfR2l/OVHN9BcHc5jZTIfKMhFLqFYLM2Pdx7iy4/sZfweVwSAP/pgG/d2LtdNriQnFOQil0hX/wh/8YO3ePrdESb64YvL4G//VSdrmuvzWpvMLwpykRzrHhjlgefe4Zld/XRHx7Y1heBf37yUe9Z0qBcuOacgF8mhvSf6+eyDmzkaGXvsA25aUc6XPnyFZqTIJZOLNTtbgIeABsABNllrvz7bdkWKSc9QlAdf2sX33zhJf3xsW2MQ7v+VVdy2rFWX18sllYtnVwb4I2vtG8aYELDVGPO0tXZnDtoWKXg/2rGHP/jOvsnHFR646bIq/vADq9ULlzmRizU7jwPHxz8eNcbsApoABbnMaz1DUb7+1DYefmNocluZC773e9ewsrEmf4XJgpPT93vGmHbGFmJ+dZrP3QfcB9Da2prLw4rMuS2Hu/k333qTnvgvtzWUwDc+vV4hLnMuZ0FujCkDvg/8W2vtyOmft9ZuAjYBdHZ22lwdV2QudQ+M8qf/8jLPHPjlhT3lbvjSPUv4lSuWacEHyYucBLkxxstYiD9srX0kF22KFJKh0QRffeLnfPfNxCnbf21DBb9/61rdqVDyKhezVgzwj8Aua+3fzL4kkcKy+WAXv/kPb5E6bftXPtHGvesvz0tNIlPlokd+HfAbwNvGmG3j2/6jtfaJHLQtkjcDI3H+9pltPPTawCnbr2+Cv/zk9ZqRIgUjF7NWXgZMDmoRKRi/OHCU39m0nehp2//7ve18fN1lealJ5Gx0lYLIFF39I/zu37/EjtMS/J41ZXzxQ53UVQTzU5jIDBTkIuP+/sVX+O9PDJyx/cFPr+LWFYvzUJHI+VGQy4K3s7uXX/kfr51xMrPVDQ/+/rUsb6jOS10i50tBLgva1376LN94IXHG9r/51GLuWbMqDxWJXDgFuSxIL+47zG8+uOOM7RXAv/zhe1haXzXnNYlcLAW5LDh3ffFxzoxw+NonO/jo2tVzXo/IbCnIZd5zHMtwPMWNf/EMw9N8Pgg8+R9u0NqZUrQU5DKvjUSS/P1TW3jgtaFpP/+56/380R03k8w6OI7F5dIlEVJ8FOQyLw2NJvjelv185clDZ93nnz6znnBJgL09o1gL7TVBgn79SUjx0bNW5pWRSJLPbXqGn/fMvN+9V9bwwPMHuPuqRfjcPtY0VRDw6s6FUpwU5DJvdPWP8L6/fglnhn2+dFcTXleAbzy9j744NJb7+fytq2iuKNWwihQtBbkUJcexxNNZekciPPzKUXyuAf7u5ciMX/PVe1ZzbDDOi3uOk0rDiho/92xoprUqqBCXoqYgl6IUT2fpGozx3VcO8a1Xj82471Wl8GsfuZw3Dg1xsDfC8voAt1/RyM2r61hSXa4Ql6KnIJei5Lbw/7a+xbdePWMxqlOsqYZfvWElo4k0jnVoLPdz95WtXNVeg8fjmqNqRS4tBbkUFcexPLFzP5//zp4Z9/MCn7qyknd607yw6wTN1SEawwGu7qhhbXOlQlzmFQW5FI2BkThf+v4L/GTPTKcz4Yt3NrK6YRE/3HKE/ScHcTeGuKYqwPVL6+ioCc1ZiE+M40/Mhpn4WEM5kmu5WrPzm8BdQI+1VmtfSU5lMg6vd53gkw+8OeN+jcD/+t1r2HpkkG/+7AAtVT4+fX0HK1tChH0BGspL5yTEHccyGk/TF03QH03RUlGKcRkGo2laq0s1V11yLlfPqG8B3wAeylF7sgA5jiWazAAQ8LqJp7NkHIev/OAZ/u/bM3/tTa3w2ds6yTqw+/gwkWSG5qpq3rO0DoOhPlwyGaBTjxP0e3AcS+9oAmugrqzkosI+k3HojyaxWYe9fVEymSzdI3E8xkUilaWsxEtzZems5qpP9PD9bhfJrKPevUzKSZBba180xrTnoi1ZmBzH0hdJcqQ/SjLjUBP08vSOQ3ztuZlnpAC0+OC6K5bQO5Iikc6QyVo+sm4RGzqqONQb57KmMDVl/snQG42n2XpkkHCJl2V1ZRwbjvPW4QFcbhc3raynLlxywfUfG4zyyBvdlJW42H5kiOuX19BRHeTYUIKmygB1oQBBv+eig3fi59MfSVFd5mMwlqK5Ur17GaNngVwyU8eIzxZgE/tkMg5dAzEqg152nhzioZ9t58l9Z94nfKoPNMMnblnL0YEETRUBhmJp1jVXsrQ2RBbLovIgtWWlVJX6AIgmM/jdLvpjSQZjSVoqSzg6FGP/yQiXt1RQXeaf3Pd8v7/RRJpYKsP+/gh7T45y+xU1LG0IcbA3QlWZn6s7qmmumN2QzkSI944mqQ2N1Rj0e3QlqkyasyA3xtwH3AfQ2to6V4eVPIomM+zvibCkroxQwDttsMfTWY70x/B7Damsw7GRKL//8MzjKKv8cM36Rq5d2sCicID9J+Mc6Y+xvqWKJbUhklmH/T0RPC4X5QHP2AtF0mF/b4TKoJeRaJr26jJKvG729AyzrCHE8rrzPwk6MQbeG01wsC9K33jA3r2hCWMcDvX1c9Oqejrbqikv9c2qFx5PZ3EcS3907BgT7yw060ammrMgt9ZuAjYBdHZ22rk6ruTZlAybuIhnYqw4msyQyTpYk+W1Q4M8/9ounjw6c3P/+qoKlrQ24XO7GIwmiSRTJNMZvB5D0smStpag38OyhhB+t4sTI3H2nYgQKnXTM5zg+GiMkNfLUDJNY0UJV7VVUXIBY82ZjEPXUIxDfaPsORmhpSrAqsYQxhiSGYeM4/Crna1c2VRBaan3on9sjmPpGU3QPZhgSV2Q1qqgxsTlrDS0Ijlzeo876PewrD40OQQQ8LonQzyezvLuyQiDsQR7Twzy1acOztj2ygrD3e9dwoamajYfGqDM76a5KsBbR4bYc2KY+ooAN/jGev0wdhJzNJ7mnWMj7D4xjN/twuNxkUw5hAJprmyroq6shHg6y/7eCEtqx941zCQWS/PSgV66BuM0hH0YwO9xcbQ/RjJtubKjktqykoseC5/684uns3QPxUlmsriM0Vi4zChX0w+/C9wI1BhjuoA/s9b+Yy7aluIxtcc9EWYTs0KiyQwBr3sykAJeN+Ggi28//w4/3JOcsd3fu6GedR2NeN1uyoJeKoJe6kM+9hwfYX/PKGuaK7lueS1NVaV0DcUoD3gxgJO1+DywpC6IwTAQS3JlWzX1ZSVUB/1j9yC3Fs7x/tBxLMOxFM/tOcEjW4/SXhUkngzg8bg52BPB7XZzVUc1rZXBWQ15RJOZyReVoN/DsroQgEJczilXs1Y+mYt2pLhN7XHD2DDEQCyF3+PixEiC5spSvMZwcGCUbd09/Mk/vztje2HgixtXsqG9muFYlmgqTWXAy8r6MO+eHGZfb4Q1LZXcuWYRjeWlRJMZXMby8rs9jCYztFUFKfG4aa8Jsv9EjPUt1ZNj4dFkhq7BGIvKAyxrCE174jCTcTg+GOPgUITdx0fZ3zNKdamPqxdXkcg4JFOWdW0V1IZKqL3IaYtnGH9RcbnMOd8hiEzQS73kzNQe+HA0xdGhGIf7o3S2VdEQLmEomuToYIwvP/Iab/fP3FZno4ff/sBKonGDyxiqQ16ciEPXQJyDfVEqgn4+dXU7i6vLqAz5GY6n+MW+PkYTSfaeHCHs9zFY4mb38Qh3r2vmmiXVkzNSekYSVEyZ1322YZBjQ1EefGk/h3ojlPjcXNFcTmNbAGMMdeUB2quCtMyyFz7VxNi+ZqPIhVKQS05NnAw83B/h6ECcFQ1hgh4333lzN1/58TnOZALLy+D9a1twu90sqa4gay0v7D1B90CM9YuryGRcrF4UHhu+8XkYjCf5xbs9HB2KsOdEhBX1IWrKAuw9OYrX7eKmlQ0srQ1RUjL2VO8ZSfDGkUGubK2cdr54IpHh9SO9vN01jMdn2X1skFDQS2PYT9DvYV9PjJoyH1c0V9JWXZbTk48TL4QiF0rPmgXqfOZ4X4hUKsvhgQi90SSH+qN0VAXx+1w8+ta7fGzzObrf467vCHJtRw0+n5c1LVW4sHzv1QM8tv04sQRE0w7/6rqlLK0pI5rOsP3oIE+908Wzu/uw6QTloRBtdSX0jSS5eWU961sraSwfWzBiYoy+qtTHla2V084XP9I3zF/8aDsv7x0hDiytdNNcXcZQNMOBnj42hgJ84LI6slmoD5ac189tYnipqtSnKYNyySjIF6jTT0zORiqV5eUDPTzxVjelPjdL68p49cBJHnvjCPuGzv31a2ogHA5yqD9JU0WC9YuDvNXVy/M7etlyNEIGuKo5yL1XtbK0powTkThP7ejmrUODHO4bZWA0iQWODI9y3dIaPnp1G4urgpO98Inx8InvdbqeeFf/CP/moS3s6PnlRUhjtwiwNJYH+MDljWxc00TCsTy/u5f68gDt5zG9sD+aZPOBfq7uqCIc8F3QC2euX2xl/lKQL1Cnn5i8GI5j6Y3EeXlPL1sPneTYYAKfSfDQORZ6OF1vFI5Ho5T5PVQHDc9sP8pz747iABUe2NBRzh/cvoLF1RWMptI8uaObF3adJJnNUBtyc2QYhpOworaU2y6rY/Wi8lNmysz0vY5EkvxsXw+PbT00GeIlQH0prG6poqk6yLK6MLdd1khl0E8qleWWVXUsCgem/V5O74H73C7CJWPnDS70hTOXL7Yyv+nZsUDNZjx2Yjre4YEIP9vTw2v7+mgoN7xyaPSi2ktn4ZrFldRXlvDc7kF2940FamsINm5o5q51rcQTDi/vO0nY52PnsQEO9o4QjUJ5EDJpuGl5JX94+3JW1lcBZ4bgdN+r41he3NfL15/ZSzabZnV9gJV1AS5vqyaWyHJtRy0NVQFCPu/kDBKfz017TdlZv5eBWIothwdY3RieXEKuxOeh1O+hIui/oBfOXLzYysKgIJcLFk1m2Hygj837e3hjVzdvz7xIz4wM0FzjZyiRoe/IMPFkgkWlkHbgulUNrGurJ5m2PLLtGOlMls7FVbhx0ReBDHDz0mrWNlVz15omaspLJ9s9WwhmMg7HBqJ0jcTwe9xYt8MHVtXhd1tuXNVEaYmHgUiaurD/ouaFV5X6WN0YJprIEk+PXcwT8LjxuFwX/MKpk59yvvQskXOaGKt1Wzg2EmMkkeL1g118+9W+i27TBzSEoT5cwmVNFbx5ZIhU2qG6IshlTeWsa67kssZy4lmHeDJDOODGa9wMR5J86MpGUllLVZmP333/Mhoqz+whnx6CEze4Otwf4fG3utnVPUJLVZD3Lq/ho1e1MRhL01hRSonHTSzhnNe88NNvhwuQzDo0V5RO3mYW0JRCueSMtXN/25POzk67ZcuWOT+uXJyJk4XpTJZ/2ryfJ7ac5OIGUcYY4O7Lg2SybiwustZQVurBZVxs6KiiKlhCfzRJ0Oth25Eh/G43NeU+3jw8xO7jI3zu5mW8d0ntGTNBUqksh/sjpHHwGxdDyQylXhcVJT7e7R9ld/cIjnXIZg3tNaW01QQpD/go83lJZh0Go2maKwO4XOacJxgn7mG+r2eUgM/DioYwgMa05ZIyxmy11naevl3PNplRIpFh97EhdvcN8PKObn7ybnTWbVrgkR2/bMc1vq0MePHtk4ykxrataC6hvS5Mc3mQJXUh0hnLhvZKblxaS0Xo1Jknh3qH+NpPd3P4WD/RDHjc4HW7WVThI+3yMzQaJ1Tq59ffu5jLmypwAe/2Rjg5kqSy1M+yuhChEu95BfjEHQmPDcYBF4sqApM97onhHM04kbmkIJdppVJZnt13hK89spP9kUt7rIkVOEeB0dQvt+/pTpDFw8rGKlbXlbOivnza+diZjMP/fGYPj71z6nz1Ck+WjMlwcDBOU9jFx9/TwQ1L6khj+f4bR3llXx83rqzl8kUV532jq4mTqIvKAyyr/+W9UCa+dqInfvqUR5FLSc8wAZi8rH4gnqSsxMPWw/38p+/vZGDmtR0umRo/fPqWNiq9Qa5bUkNlyH/WoB2IpQj6PFR4weUamwVjLFy3vIr26gAv7hvixuWV3L6qidJSLyeH47iN5T1Lq7l+WS21ofO7uAdOPYnqcpmz9rw140TmkoJ8gUulsrx06Bjf/0UXgaCLsK+ElqoyfvFu75yH+MoKuG51A36Pl5ryEu64rJny87iIpqrUx6dvWMb1qxoIeMbWs4ynspQHfbRVBrljbYZFoQCVIT8A1UE/t1/eRMDjJhTwXtDQx+knUc8211szTmQu6Zm2wB0divLVH+5lb3+SUjd8/KpmugZ7eGbv4JzVsLbWzY1rWvnI2iY6asI4jr2gy9o9HhcddWE66sKT207pKVebM/ZvKJ/+gp4LpZ63FAIF+QI1MR1vOJ6ivdxwuB+WVMC3NnfNaR1NZS4+d/tqrlvSODnW7HKZi1oAeaq56hGr5y2FQM/ABSqezrKje5g3D/cQCJaxoibN9r7sRbXVXApOBiIpOP3aoBqgvhreu6qGaMLg9RgWVQXJprM4eFjXWkFnS+3kfVFE5MLpr2eBSSQyHBiI0l5RyuKaID/b6fDC3j6GZ16k5xTf/sx64ilYUhPkxGiSAz2j+L1uYqkM71teR3tV2djqO46lezh+yvJumo4nknu5WurtDuDrgBt40Fr7lVy0K7kTiabY1j1MwGt46p0eNq5bhHFn+fmOI+cd4j/9wjWsbKyZHH/2u13UhQMsrS3DZ1yksdQE/Xg8Y+tjOo49ZYaHhiBELo1Z/2UZY9zA3wEfALqA140xP7LW7pxt25IbqVSWJ3ed4PHtx/n197Sycd0i/B7DV57YyzvneZ+UQ1+5c/LjqaFc7vFRPs29vU/fT0QunVz8lV0N7LPWHgAwxnwP2AgoyAtE90icnuEYd65p5OrWaozHxSv7+4jHz32V5tQAF5HClIslS5qAqWt4dY1vO4Ux5j5jzBZjzJbe3t4cHFZmksk49IwkyGQcFoUDfHBtMx9e00RZcGxe9oaWSl4+cvYxlfUehbhIschFj3y6M1dn3InLWrsJ2ARjN83KwXFlBgOx1ClrU069h/bO473c9T9fP+vXvvPnt2tetEgRyUWQdwEtUx43A905aFdm4WxrU37szx/n9fj0X6MeuEhxysXQyuvAMmNMhzHGB9wL/CgH7coFSKWyHOqLkEqNzQX3eFzUhX95T+0DPYO0f1EhLjIfzbpHbq3NGGM+DzzJ2PTDb1pr35l1ZXJBukfiPLurh1tW1Z2xFNkDL7/KVx6bfhEIBbhI8cvJ3DBr7RPAE7loSy7OonDgjEWBI9EUf/roszyywzlj/89eA//5boW4yHygSb7zxOmLAv/Dz1/jr3585uygm+vgm/9OAS4ynyjI55lEIsN339x7RohXAd/6fCdrmuvzU5iIXDIK8nlkaDTBf3l8Kz/YNnTK9gd+awV3rFqan6JE5JJTkM8DsViaH+44yDdfOMS7A+nJ7R9cFuD+jRtorSnPY3UicqkpyIvcSCTJQ5sP8sBz+4k4UOEHr4E///hyPrR6Wb7LE5E5oCAvUpmMw+HBUR55vYtnd56gpaYEv9tw/4dXclV7o24VK7KAKMiL0JG+Yf73i4fIpuO8dSzC+1fWc8tlDaxprNQCDSILkP7qi0jPUJQfbDvGG4f7eW73ABvX1/OF21ZzbXs14TJ/vssTkTxRkBeJSDTF15/dyw/f6Ka9wsWH1zbw+zcv1YlMEVGQF7pYLM2LB0/w0zePsa8vwtqWCj7+3mZuX96kYRQRARTkBSuRyPDivuP8y5Zj7D02QPeo5c41dfzBLStoqS6bvBmWiIiCvAD1Dcf4hxf38YOtR+lNwI1Ly9l4ZQWfuKaNRVWhfJcnIgVGQV5AHMfSPRDhy49t59ndQwTccPPyKv70w6tor63Id3kiUqAU5AXCcSwnR+I8/OphfrF/CJcLNm5o4I9vv4Ky4PSLG4uIgIK8YMTTWQ70RqkIefmd97cRLPGxcU2LQlxEzklBnkexWJp3To7QXllKZZmfy5vKWVpXRnXQr5OZInLelBZ5tKtnlIc3H+Kl/X0ksw7lpT7qywMKcRG5ILNKDGPMx4wx7xhjHGNMZ66Kms8SiQzbDg7wxpF+OqoC/Nq17dywpEar1ovIRZvt0MoO4B7gH3JQy7w2NJrg+d09RJ00r+8bwONx89kblnBVR3W+SxORIjerILfW7gIwRnfam8mJwQh/9cQuth0ZoKM2zMa1TbTVB1lcFcx3aSIyD8zZyU5jzH3AfQCtra1zddi8SiQybDs6wEObD/L8O33csrqGT13fzpWLqnV5vYjkzDnTxBjzDNAwzae+ZK394fkeyFq7CdgE0NnZac+7wiJ2YCDK/37pAPtODnPbFXXc/6FVNFSWnfsLRUQuwDmD3Fp761wUMp84jiWeztJeUcrv3LCYY6NxbllWT0WoJN+licg8pPf3l0A8naVrMEZzZSnXLqvLdzkiMs/Ndvrh3caYLuA9wOPGmCdzU1ZxC3jdNFeWakqhiMyJ2c5aeRR4NEe1zBsulyHo15sdEZkbuoRQRKTIKchFRIqcglxEpMgpyM+T41iiyQyOsyCmwItIEVGQn6eJKYXxdDbfpYiInEJBfp40pVBECpXmyJ0nTSkUkUKlHrmISJFTkIuIFDkFuYhIkVOQi4gUOQW5iEiRW1BBrot6RGQ+WlBBrot6RGQ+WlBBrot6RGQ+WlBXuOiiHhGZjxZUj1xEZD6a7VJvf22M2W2M2W6MedQYU5Gjui6aTmiKyEIz2x7508Dl1to1wF7g/tmXNDs6oSkiC82sgtxa+5S1NjP+cDPQPPuSZkcnNEVkocnlGPlngJ+c7ZPGmPuMMVuMMVt6e3tzeNhTTZzQdLnMJTuGiEghOWeQG2OeMcbsmObfxin7fAnIAA+frR1r7SZrbae1trO2tnZWRWscXETkl845F89ae+tMnzfG/BZwF3CLtXZOknViHLy5slTTCUVkwZtVChpj7gD+BHi/tTaWm5LOznEs8XQWv9ulcXARkXGzHSP/BhACnjbGbDPGPJCDms5qoieezDoaBxcRGTerHrm1dmmuCjkfmpEiInKmohpg1iX2IiJn0iX6IiJFTkEuIlLkFOQiIkVOQS4iUuQU5CIiRU5BLiJS5BTkIiJFzszR7VFOPagxvcDhHDdbA/TluM1cUn0Xr5BrA9U3G4VcGxRefW3W2jPuOpiXIL8UjDFbrLWd+a7jbFTfxSvk2kD1zUYh1waFX98EDa2IiBQ5BbmISJGbT0G+Kd8FnIPqu3iFXBuovtko5Nqg8OsD5tEYuYjIQjWfeuQiIguSglxEpMjNqyA3xvy1MWa3MWa7MeZRY0xFvmuayhjzMWPMO8YYxxhTEFOajDF3GGP2GGP2GWO+mO96pjLGfNMY02OM2ZHvWqZjjGkxxjxvjNk1/nv9Qr5rmmCMKTHGvGaMeWu8tj/Pd03TMca4jTFvGmMey3ctpzPGHDLGvD2++tmWfNczk3kV5MDTwOXW2jXAXuD+PNdzuh3APcCL+S4Exv6IgL8DPgisBj5pjFmd36pO8S3gjnwXMYMM8EfW2lXAtcDvFdDPLwncbK1dC6wD7jDGXJvfkqb1BWBXvouYwU3W2nWFPpd8XgW5tfYpa21m/OFmoDmf9ZzOWrvLWrsn33VMcTWwz1p7wFqbAr4HbMxzTZOstS8CA/mu42ystcettW+MfzzKWCA15beqMXZMZPyhd/xfQc1sMMY0A3cCD+a7lmI3r4L8NJ8BfpLvIgpcE3B0yuMuCiSIio0xph1YD7ya51ImjQ9bbAN6gKettQVT27i/Bf4YcPJcx9lY4CljzFZjzH35LmYmRbcApjHmGaBhmk99yVr7w/F9vsTY296H57K28WOfs74CYqbZVlC9tmJgjCkDvg/8W2vtSL7rmWCtzQLrxs8VPWqMudxaWxDnG4wxdwE91tqtxpgb81zO2Vxnre02xtQBTxtjdo+/Syw4RRfk1tpbZ/q8Mea3gLuAW2weJsmfq74C0wW0THncDHTnqZaiZIzxMhbiD1trH8l3PdOx1g4ZY15g7HxDQQQ5cB3wEWPMh4ASIGyM+Y619tfzXNcka233+P89xphHGRuKLMggn1dDK8aYO4A/AT5irY3lu54i8DqwzBjTYYzxAfcCP8pzTUXDGGOAfwR2WWv/Jt/1TGWMqZ2YtWWMCQC3ArvzWtQU1tr7rbXN1tp2xp53zxVSiBtjgsaY0MTHwG0UzovgGeZVkAPfAEKMvQ3aZox5IN8FTWWMudsY0wW8B3jcGPNkPusZPzH8eeBJxk7U/bO19p181jSVMea7wCvACmNMlzHms/mu6TTXAb8B3Dz+fNs23sMsBI3A88aY7Yy9YD9trS24KX4FrB542RjzFvAa8Li19qd5rumsdIm+iEiRm289chGRBUdBLiJS5BTkIiJFTkEuIlLkFOQiIkVOQS4iUuQU5CIiRe7/A4JcVhLWP4FuAAAAAElFTkSuQmCC","text/plain":["
"]},"metadata":{"needs_background":"light"},"output_type":"display_data"}],"source":["plt.scatter(frc_api['fuel_cost_per_mmbtu'].replace(0, np.nan).transform(np.log10),\n","frc_bulk['fuel_cost_per_mmbtu'].replace(0, np.nan).transform(np.log10),\n","s=1, alpha=0.1)"]}],"metadata":{"kernelspec":{"display_name":"Python 3.10.5 ('pudl-dev')","language":"python","name":"python3"},"language_info":{"codemirror_mode":{"name":"ipython","version":3},"file_extension":".py","mimetype":"text/x-python","name":"python","nbconvert_exporter":"python","pygments_lexer":"ipython3","version":"3.10.5"},"orig_nbformat":4,"vscode":{"interpreter":{"hash":"adf7f0b18919f80aa48732f306897016c591ffeed72c8be7c2b511e7f25e6b58"}}},"nbformat":4,"nbformat_minor":2} +{"cells":[{"cell_type":"code","execution_count":1,"metadata":{},"outputs":[],"source":["%load_ext autoreload"]},{"cell_type":"code","execution_count":2,"metadata":{},"outputs":[],"source":["%autoreload 2"]},{"attachments":{},"cell_type":"markdown","metadata":{},"source":["import pudl\n","import pandas as pd\n","import numpy as np\n","import sqlalchemy as sa\n","import pudl.output.eia923 as eia\n","from pudl.workspace.setup import PudlPaths\n","pudl_engine = sa.create_engine(PudlPaths().pudl_db)\n"]},{"cell_type":"code","execution_count":12,"metadata":{},"outputs":[],"source":["api_df = eia.get_fuel_cost_avg_eiaapi(eia.FUEL_COST_CATEGORIES_EIAAPI) # coal, oil, gas"]},{"cell_type":"code","execution_count":43,"metadata":{},"outputs":[],"source":["bulk_df = eia.get_fuel_cost_avg_bulk_elec(pudl_engine=pudl_engine)"]},{"cell_type":"code","execution_count":12,"metadata":{},"outputs":[{"data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
statereport_datebulk_agg_fuel_cost_per_mmbtufuel_type_code_pudl
0AK2012-01-010.0coal
1AK2012-02-010.0coal
2AK2012-03-010.0coal
3AK2012-04-010.0coal
4AK2012-05-010.0coal
\n","
"],"text/plain":[" state report_date bulk_agg_fuel_cost_per_mmbtu fuel_type_code_pudl\n","0 AK 2012-01-01 0.0 coal\n","1 AK 2012-02-01 0.0 coal\n","2 AK 2012-03-01 0.0 coal\n","3 AK 2012-04-01 0.0 coal\n","4 AK 2012-05-01 0.0 coal"]},"execution_count":12,"metadata":{},"output_type":"execute_result"}],"source":["bulk_df.head()"]},{"cell_type":"code","execution_count":13,"metadata":{},"outputs":[{"data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
report_datefuel_cost_per_unitstatefuel_type_code_pudl
02022-07-0151.83AKcoal
12022-06-0159.16AKcoal
22022-05-0153.04AKcoal
32022-04-0151.69AKcoal
42022-03-0158.55AKcoal
\n","
"],"text/plain":[" report_date fuel_cost_per_unit state fuel_type_code_pudl\n","0 2022-07-01 51.83 AK coal\n","1 2022-06-01 59.16 AK coal\n","2 2022-05-01 53.04 AK coal\n","3 2022-04-01 51.69 AK coal\n","4 2022-03-01 58.55 AK coal"]},"execution_count":13,"metadata":{},"output_type":"execute_result"}],"source":["api_df.drop(columns=['name', 'series_id', 'units']).head()"]},{"cell_type":"code","execution_count":14,"metadata":{},"outputs":[{"data":{"text/plain":["((18501, 4), (30804, 7))"]},"execution_count":14,"metadata":{},"output_type":"execute_result"}],"source":["bulk_df.shape, api_df.shape"]},{"cell_type":"code","execution_count":15,"metadata":{},"outputs":[{"name":"stdout","output_type":"stream","text":["\n","RangeIndex: 18501 entries, 0 to 18500\n","Data columns (total 4 columns):\n"," # Column Non-Null Count Dtype \n","--- ------ -------------- ----- \n"," 0 state 18501 non-null object \n"," 1 report_date 18501 non-null datetime64[ns]\n"," 2 bulk_agg_fuel_cost_per_mmbtu 18501 non-null float64 \n"," 3 fuel_type_code_pudl 18501 non-null object \n","dtypes: datetime64[ns](1), float64(1), object(2)\n","memory usage: 578.3+ KB\n"]}],"source":["bulk_df.info()"]},{"cell_type":"code","execution_count":16,"metadata":{},"outputs":[{"name":"stdout","output_type":"stream","text":["\n","Int64Index: 30804 entries, 0 to 10499\n","Data columns (total 7 columns):\n"," # Column Non-Null Count Dtype \n","--- ------ -------------- ----- \n"," 0 report_date 30804 non-null datetime64[ns]\n"," 1 fuel_cost_per_unit 18615 non-null float64 \n"," 2 state 30804 non-null object \n"," 3 units 30804 non-null object \n"," 4 series_id 30804 non-null object \n"," 5 name 30804 non-null object \n"," 6 fuel_type_code_pudl 30804 non-null object \n","dtypes: datetime64[ns](1), float64(1), object(5)\n","memory usage: 1.9+ MB\n"]}],"source":["api_df.info()"]},{"cell_type":"code","execution_count":19,"metadata":{},"outputs":[],"source":["keep_cols = ['state', 'report_date',\n","'fuel_cost_per_mmbtu', 'fuel_type_code_pudl', 'fuel_cost_from_eiaapi']"]},{"cell_type":"code","execution_count":20,"metadata":{},"outputs":[],"source":["frc_api = eia.fuel_receipts_costs_eia923(\n"," pudl_engine=pudl_engine,\n"," fill=True\n",")[keep_cols + ['fuel_cost_per_unit']]"]},{"cell_type":"code","execution_count":6,"metadata":{},"outputs":[],"source":["frc_bulk = eia.fuel_receipts_costs_eia923(\n"," pudl_engine=pudl_engine,\n"," fill=True\n",")[keep_cols + ['bulk_agg_fuel_cost_per_mmbtu']]"]},{"cell_type":"code","execution_count":21,"metadata":{},"outputs":[{"data":{"text/plain":["((560374, 6), (560374, 6))"]},"execution_count":21,"metadata":{},"output_type":"execute_result"}],"source":["frc_api.shape, frc_bulk.shape"]},{"cell_type":"code","execution_count":26,"metadata":{},"outputs":[],"source":["mismatch = frc_api['fuel_cost_from_eiaapi'] ^ frc_bulk['fuel_cost_from_eiaapi']"]},{"cell_type":"code","execution_count":27,"metadata":{},"outputs":[{"data":{"text/plain":["sum 0.0\n","mean 0.0\n","Name: fuel_cost_from_eiaapi, dtype: float64"]},"execution_count":27,"metadata":{},"output_type":"execute_result"}],"source":["mismatch.agg(['sum', 'mean'])"]},{"cell_type":"code","execution_count":21,"metadata":{},"outputs":[{"data":{"text/plain":["136530"]},"execution_count":21,"metadata":{},"output_type":"execute_result"}],"source":["frc_api['fuel_cost_from_eiaapi'].sum()"]},{"cell_type":"code","execution_count":28,"metadata":{},"outputs":[{"data":{"text/plain":["136530"]},"execution_count":28,"metadata":{},"output_type":"execute_result"}],"source":["frc_bulk['fuel_cost_from_eiaapi'].sum()"]},{"cell_type":"code","execution_count":30,"metadata":{},"outputs":[],"source":["diff = frc_api['fuel_cost_per_mmbtu'].sub(frc_bulk['fuel_cost_per_mmbtu'])"]},{"cell_type":"code","execution_count":32,"metadata":{},"outputs":[{"data":{"text/plain":["sum 186016.00000\n","mean 0.33195\n","Name: fuel_cost_per_mmbtu, dtype: float64"]},"execution_count":32,"metadata":{},"output_type":"execute_result"}],"source":["diff.ne(0).agg(['sum', 'mean'])"]},{"cell_type":"code","execution_count":35,"metadata":{},"outputs":[{"data":{"text/plain":["sum 133687.000000\n","mean 0.238567\n","Name: fuel_cost_per_mmbtu, dtype: float64"]},"execution_count":35,"metadata":{},"output_type":"execute_result"}],"source":["diff.abs().gt(1e-3).agg(['sum', 'mean'])"]},{"cell_type":"code","execution_count":34,"metadata":{},"outputs":[{"data":{"text/plain":["sum 136530.000000\n","mean 0.243641\n","Name: fuel_cost_from_eiaapi, dtype: float64"]},"execution_count":34,"metadata":{},"output_type":"execute_result"}],"source":["frc_bulk['fuel_cost_from_eiaapi'].agg(['sum', 'mean'])"]},{"cell_type":"code","execution_count":37,"metadata":{},"outputs":[{"data":{"text/plain":[""]},"execution_count":37,"metadata":{},"output_type":"execute_result"},{"data":{"image/png":"iVBORw0KGgoAAAANSUhEUgAAAX0AAAD4CAYAAAAAczaOAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjUuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8qNh9FAAAACXBIWXMAAAsTAAALEwEAmpwYAAAVcElEQVR4nO3db4xc53me8euObCusY9ZSZW1pkihllHIjibEdbVgVRouNlURMZJhCURU0VItqVbAQFFcGWMRk8qHoBwJEW6e12lgtEbuiUDUqkVgVEVtJZDbTIoD+mHLlMBStirUYhSEjxm5Ta21A0apPP8yRONwdzs5Sy5kdnusHDObMc86Zeefl8J533jlzNlWFJKkdfmjcDZAkjY6hL0ktYuhLUosY+pLUIoa+JLXIO8bdgMVcddVVtWHDhnNq3//+93n3u989ngZNAPtnMPtnMPtnsEnpn2efffY7VfW++fUVH/obNmzg8OHD59Q6nQ4zMzPjadAEsH8Gs38Gs38Gm5T+SfKH/epO70hSixj6ktQihr4ktciioZ/kg0me67l8L8lnklyZ5IkkLzbXV/TsszvJ8SQvJLmlp35jkiPNuvuT5GI9MUnSQouGflW9UFUfrqoPAzcCPwAeBXYBh6pqI3CouU2S64BtwPXAFuALSS5r7u4BYAewsblsWdZnI0kaaKnTOzcD/6uq/hDYCuxv6vuB25rlrcAjVfVaVb0EHAc2J1kDrK6qJ6t7lreHevaRJI3AUg/Z3Ab8WrM8VVWnAarqdJKrm/pa4KmefU42tdeb5fn1BZLsoPuJgKmpKTqdzjnrZ2dnF9R0lv0zmP0zmP0z2KT3z9Chn+RdwCeA3Ytt2qdWA+oLi1X7gH0A09PTNf+Y2Ek5TnZc7J/B7J/B7J/BJr1/ljK987PAN6rqleb2K82UDc31maZ+Eljfs9864FRTX9enLkkakaWE/ic5O7UDcBDY3ixvBx7rqW9LcnmSa+h+YftMMxX0apKbmqN27uzZR5poG3Z95a2LtJINNb2T5C8APw38o57yXuBAkruBl4HbAarqaJIDwPPAHHBvVb3R7HMP8CCwCni8uUiSRmSo0K+qHwB/aV7tu3SP5um3/R5gT5/6YeCGpTdTkrQc/EWuJLXIij/LprRSOX+vSeRIX5JaxJG+tMx6PwGc2HvrGFsiLeRIX5JaxNCXpBYx9CWpRQx9SWoRQ1+SWsTQl6QWMfQlqUUMfUlqEUNfklrE0JekFjH0JalFDH1JahFDX5JaxNCXpBYx9CWpRQx9SWoRQ1+SWmSo0E/y3iS/nuRbSY4l+RtJrkzyRJIXm+srerbfneR4kheS3NJTvzHJkWbd/UlyMZ6UJKm/YUf6nwd+q6r+GvAh4BiwCzhUVRuBQ81tklwHbAOuB7YAX0hyWXM/DwA7gI3NZcsyPQ9J0hAWDf0kq4G/BXwRoKr+vKr+DNgK7G822w/c1ixvBR6pqteq6iXgOLA5yRpgdVU9WVUFPNSzjyRpBIb5w+gfAP4U+A9JPgQ8C9wHTFXVaYCqOp3k6mb7tcBTPfufbGqvN8vz6wsk2UH3EwFTU1N0Op1z1s/Ozi6o6Sz7Z7Dl6p+dm+YW3WYS/x18/Qw26f0zTOi/A/hx4NNV9XSSz9NM5ZxHv3n6GlBfWKzaB+wDmJ6erpmZmXPWdzod5td0lv0z2HL1z127vrLoNifuePuPM2q+fgab9P4ZZk7/JHCyqp5ubv863TeBV5opG5rrMz3br+/Zfx1wqqmv61OXJI3IoqFfVX8C/FGSDzalm4HngYPA9qa2HXisWT4IbEtyeZJr6H5h+0wzFfRqkpuao3bu7NlHuiRt2PWVty7SSjDM9A7Ap4GHk7wL+Dbw9+m+YRxIcjfwMnA7QFUdTXKA7hvDHHBvVb3R3M89wIPAKuDx5iJJGpGhQr+qngOm+6y6+Tzb7wH29KkfBm5YQvukS0bvaP/E3lvH2BK1mb/IlaQWMfQlqUUMfUlqEUNfklrE0JekFhn2kE1J4PH2mniO9CWpRQx9SWoRQ1+SWsTQl6QWMfQlqUUMfUlqEUNfklrE4/SlMfCMmxoXR/qS1CKO9KVF+CtcXUoc6UtSixj6ktQihr4ktYihL0ktYuhLUosMFfpJTiQ5kuS5JIeb2pVJnkjyYnN9Rc/2u5McT/JCklt66jc293M8yf1JsvxPSZJ0Pks5ZPMnq+o7Pbd3AYeqam+SXc3tzya5DtgGXA+8H/hakmur6g3gAWAH8BTwVWAL8PgyPA9pWXmYpi5Vb2d6Zyuwv1neD9zWU3+kql6rqpeA48DmJGuA1VX1ZFUV8FDPPpKkERh2pF/A7yQp4N9X1T5gqqpOA1TV6SRXN9uupTuSf9PJpvZ6szy/vkCSHXQ/ETA1NUWn0zln/ezs7IKazrJ/Bhumf3ZumhtNY2DF/Vv5+hls0vtn2ND/aFWdaoL9iSTfGrBtv3n6GlBfWOy+qewDmJ6erpmZmXPWdzod5td0lv0z2DD9c9cIp3dO3DEzsscahq+fwSa9f4aa3qmqU831GeBRYDPwSjNlQ3N9ptn8JLC+Z/d1wKmmvq5PXZI0IouGfpJ3J3nPm8vAzwB/ABwEtjebbQcea5YPAtuSXJ7kGmAj8EwzFfRqkpuao3bu7NlHkjQCw0zvTAGPNkdXvgP4T1X1W0m+DhxIcjfwMnA7QFUdTXIAeB6YA+5tjtwBuAd4EFhF96gdj9yRpBFaNPSr6tvAh/rUvwvcfJ599gB7+tQPAzcsvZnSxedhmmoDf5ErSS3i+fSlMfOvaGmUHOlLUosY+pLUIoa+JLWIoS9JLeIXudIK4pe6utgMfekS4puGFuP0jiS1iKEvSS1i6EtSixj6ktQihr4ktYihL0ktYuhLUosY+pLUIoa+JLWIv8iVJpC/vNWFMvTVaiv5TyQa7LoYDH1pwq3kNy6tPM7pS1KLDB36SS5L8j+S/GZz+8okTyR5sbm+omfb3UmOJ3khyS099RuTHGnW3Z8ky/t0JEmDLGV65z7gGLC6ub0LOFRVe5Psam5/Nsl1wDbgeuD9wNeSXFtVbwAPADuAp4CvAluAx5flmUiXMKdwtFyGGuknWQfcCvxqT3krsL9Z3g/c1lN/pKpeq6qXgOPA5iRrgNVV9WRVFfBQzz6SpBEYdqT/r4FfAN7TU5uqqtMAVXU6ydVNfS3dkfybTja115vl+fUFkuyg+4mAqakpOp3OOetnZ2cX1HSW/TNYb//s3DQ33sZcRBf6GvD1M9ik98+ioZ/k48CZqno2ycwQ99lvnr4G1BcWq/YB+wCmp6drZubch+10Osyv6Sz7Z7De/rnrEp42OXHHzAXt5+tnsEnvn2FG+h8FPpHk54AfBlYn+Y/AK0nWNKP8NcCZZvuTwPqe/dcBp5r6uj51SdKILDqnX1W7q2pdVW2g+wXtf62qvwccBLY3m20HHmuWDwLbklye5BpgI/BMMxX0apKbmqN27uzZR5I0Am/nx1l7gQNJ7gZeBm4HqKqjSQ4AzwNzwL3NkTsA9wAPAqvoHrXjkTuSNEJLCv2q6gCdZvm7wM3n2W4PsKdP/TBww1IbKUlaHv4iV5JaxNCXpBYx9CWpRQx9SWoRQ1+SWsTQl6QWMfQlqUUMfUlqEf9colrnyB//30v6RGvSIIa+dInyD6urH6d3JKlFDH1JahFDX5JaxNCXpBYx9CWpRQx9SWoRQ1+SWsTQl6QWMfQlqUUMfUlqEUNfklpk0dBP8sNJnknyzSRHk/yzpn5lkieSvNhcX9Gzz+4kx5O8kOSWnvqNSY406+5PkovztCRJ/QxzwrXXgI9V1WySdwK/l+Rx4G8Dh6pqb5JdwC7gs0muA7YB1wPvB76W5NqqegN4ANgBPAV8FdgCPL7sz0qap/fkYzs3jbEh0pgtOtKvrtnm5jubSwFbgf1NfT9wW7O8FXikql6rqpeA48DmJGuA1VX1ZFUV8FDPPpKkERjq1MpJLgOeBf4q8CtV9XSSqao6DVBVp5Nc3Wy+lu5I/k0nm9rrzfL8er/H20H3EwFTU1N0Op1z1s/Ozi6o6Sz7Z6Gdm+beWp5ade7tNljK68HXz2CT3j9DhX4zNfPhJO8FHk1yw4DN+83T14B6v8fbB+wDmJ6erpmZmXPWdzod5td0lv2z0F3nTO/M8bkj7fpTEifumBl6W18/g016/yzp6J2q+jOgQ3cu/pVmyobm+kyz2Ulgfc9u64BTTX1dn7okaUSGOXrnfc0InySrgJ8CvgUcBLY3m20HHmuWDwLbklye5BpgI/BMMxX0apKbmqN27uzZR5I0AsN8xl0D7G/m9X8IOFBVv5nkSeBAkruBl4HbAarqaJIDwPPAHHBvMz0EcA/wILCK7lE7HrkjSSO0aOhX1e8DH+lT/y5w83n22QPs6VM/DAz6PkCSdBH5i1xJahFDX5JapF3HralVen+FK6nLkb4ktYihL0ktYuhLUosY+pLUIoa+JLWIoS9JLWLoS1KLGPqS1CKGviS1iL/I1SXFX+FKgznSl6QWcaQvtUDvJ6ATe28dY0s0bo70JalFHOlLLeOov90c6UtSixj6ktQihr4ktYihL0ktsmjoJ1mf5HeTHEtyNMl9Tf3KJE8kebG5vqJnn91Jjid5IcktPfUbkxxp1t2fJBfnaUmS+hlmpD8H7KyqHwVuAu5Nch2wCzhUVRuBQ81tmnXbgOuBLcAXklzW3NcDwA5gY3PZsozPRZK0iEVDv6pOV9U3muVXgWPAWmArsL/ZbD9wW7O8FXikql6rqpeA48DmJGuA1VX1ZFUV8FDPPpKkEVjScfpJNgAfAZ4GpqrqNHTfGJJc3Wy2FniqZ7eTTe31Znl+vd/j7KD7iYCpqSk6nc4562dnZxfUdFab+2fnprlFt5laNdx2bdDvddLm188wJr1/hg79JD8C/Abwmar63oDp+H4rakB9YbFqH7APYHp6umZmZs5Z3+l0mF/TWW3un7uGOOHazk1zfO6Iv0sEOHHHzIJam18/w5j0/hnq6J0k76Qb+A9X1Zeb8ivNlA3N9ZmmfhJY37P7OuBUU1/Xpy5JGpFhjt4J8EXgWFX9cs+qg8D2Znk78FhPfVuSy5NcQ/cL22eaqaBXk9zU3OedPftIkkZgmM+4HwU+BRxJ8lxT+0VgL3Agyd3Ay8DtAFV1NMkB4Hm6R/7cW1VvNPvdAzwIrAIeby7S2+I59KXhLRr6VfV79J+PB7j5PPvsAfb0qR8GblhKAyVJy8df5EpSixj6ktQiHremieQ8vnRhHOlLUosY+pLUIoa+JLWIc/pSi/n3ctvHkb4ktYgjfUkD+Wng0mLoS1rAQ2IvXYa+JoZBJL19hr6koTnVM/n8IleSWsTQl6QWMfQlqUUMfUlqEb/I1YrmETuj82Zf79w0h9Fw6XKkL0ktYuhLUosY+pLUIoa+JLXIot/WJPkS8HHgTFXd0NSuBP4zsAE4Afzdqvo/zbrdwN3AG8A/rqrfbuo3Ag8Cq4CvAvdVVS3v09Gk84tb6eIaZqT/ILBlXm0XcKiqNgKHmtskuQ7YBlzf7POFJJc1+zwA7AA2Npf59ylJusgWDf2q+u/A/55X3grsb5b3A7f11B+pqteq6iXgOLA5yRpgdVU92YzuH+rZR5I0Ihd6MO5UVZ0GqKrTSa5u6muBp3q2O9nUXm+W59f7SrKD7qcCpqam6HQ656yfnZ1dUNNZk9w/3WPEL66pVaN5nEk1bP/8m4cfe2t509q/eDGbtKJM8v8vWP5fYKRPrQbU+6qqfcA+gOnp6ZqZmTlnfafTYX5NZ01y/9w1gjn9nZvm+NwRf3x0PhfSPyfumLk4jVmBJvn/F1x46L+SZE0zyl8DnGnqJ4H1PdutA0419XV96pJf3kojdKGHbB4EtjfL24HHeurbklye5Bq6X9g+00wFvZrkpiQB7uzZR5I0IsMcsvlrwAxwVZKTwD8F9gIHktwNvAzcDlBVR5McAJ4H5oB7q+qN5q7u4ewhm483F0nSCC0a+lX1yfOsuvk82+8B9vSpHwZuWFLrJEnLyl/kSlKLGPqS1CKGviS1iAcrayw8TFMaD0Nf0tvW+yZ+Yu+tY2yJFuP0jiS1iKEvSS1i6EtSixj6ktQifpGrkfGIHWn8DH1dVAa9tLIY+lp2Br20cjmnL0kt4khf0rLyh1ormyN9SWoRR/paFs7jS5PB0NcFM+i1GKd6Vh5DX4sy3KVLh3P6ktQijvQljYRTPSuDoa++nNLRxeQbwPiMPPSTbAE+D1wG/GpV7R11G9SfQa+V5Hyvx+V8k2jjm89IQz/JZcCvAD8NnAS+nuRgVT0/ynZcas73wjXENQmW+joddvu2hPhSjXqkvxk4XlXfBkjyCLAVWNGhP8yIY5gX4qDtz7duqY+xYddX2LlpjrsMfLXcxXozWez/10p/s0lVje7Bkr8DbKmqf9jc/hTw16vq5+dttwPY0dz8IPDCvLu6CvjORW7uJLN/BrN/BrN/BpuU/vkrVfW++cVRj/TTp7bgXaeq9gH7znsnyeGqml7Ohl1K7J/B7J/B7J/BJr1/Rn2c/klgfc/tdcCpEbdBklpr1KH/dWBjkmuSvAvYBhwccRskqbVGOr1TVXNJfh74bbqHbH6pqo5ewF2dd+pHgP2zGPtnMPtnsInun5F+kStJGi/PvSNJLWLoS1KLTGzoJ/l0kheSHE3yz8fdnpUoyT9JUkmuGndbVpok/yLJt5L8fpJHk7x33G1aCZJsaf5fHU+ya9ztWUmSrE/yu0mONblz37jbdCEmMvST/CTdX/L+WFVdD/zLMTdpxUmynu7pLl4ed1tWqCeAG6rqx4D/Cewec3vGruc0KT8LXAd8Msl1423VijIH7KyqHwVuAu6dxP6ZyNAH7gH2VtVrAFV1ZsztWYn+FfAL9Pnxm6Cqfqeq5pqbT9H9zUjbvXWalKr6c+DN06QIqKrTVfWNZvlV4BiwdrytWrpJDf1rgb+Z5Okk/y3JT4y7QStJkk8Af1xV3xx3WybEPwAeH3cjVoC1wB/13D7JBIbaKCTZAHwEeHrMTVmyFXs+/SRfA/5yn1W/RLfdV9D9iPUTwIEkH6gWHX+6SP/8IvAzo23RyjOoj6rqsWabX6L7sf3hUbZthRrqNCltl+RHgN8APlNV3xt3e5ZqxYZ+Vf3U+dYluQf4chPyzyT5f3RPgvSno2rfuJ2vf5JsAq4BvpkEutMW30iyuar+ZIRNHLtBryGAJNuBjwM3t2nAMICnSVlEknfSDfyHq+rL427PhZjU6Z3/AnwMIMm1wLuYjLPeXXRVdaSqrq6qDVW1ge5/5B9vW+AvpvljPp8FPlFVPxh3e1YIT5MyQLqjqC8Cx6rql8fdngs1qaH/JeADSf6A7pdN2x2paYn+LfAe4IkkzyX5d+Nu0Lg1X2y/eZqUY8CBCzxNyqXqo8CngI81r5nnkvzcuBu1VJ6GQZJaZFJH+pKkC2DoS1KLGPqS1CKGviS1iKEvSS1i6EtSixj6ktQi/x8iTBQ1MrleqwAAAABJRU5ErkJggg==","text/plain":["
"]},"metadata":{"needs_background":"light"},"output_type":"display_data"}],"source":["diff.abs().replace(np.inf, np.nan).replace(0, np.nan).transform(np.log10).hist(bins=100)"]},{"cell_type":"code","execution_count":40,"metadata":{},"outputs":[],"source":["import matplotlib.pyplot as plt"]},{"cell_type":"code","execution_count":null,"metadata":{},"outputs":[{"data":{"text/plain":["state True\n","report_date True\n","fuel_type_code_pudl True\n","fuel_cost_from_eiaapi True\n","dtype: bool"]},"metadata":{},"output_type":"display_data"}],"source":["# data is all in same order\n","frc_api.drop(columns=['fuel_cost_per_mmbtu', 'fuel_cost_per_unit']).eq(\n"," frc_bulk.drop(columns=['fuel_cost_per_mmbtu', 'bulk_agg_fuel_cost_per_mmbtu'])\n",").all()"]},{"cell_type":"code","execution_count":42,"metadata":{},"outputs":[{"data":{"text/plain":[""]},"execution_count":42,"metadata":{},"output_type":"execute_result"},{"data":{"image/png":"iVBORw0KGgoAAAANSUhEUgAAAXIAAAD4CAYAAADxeG0DAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjUuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8qNh9FAAAACXBIWXMAAAsTAAALEwEAmpwYAAApiElEQVR4nO3deXhc133e8e+ZFYPBDPaN2LmTkriI0GJLsrVZki3ZjBQvsrPVdqLGjRM3TZtYdZs0TZ3YjZ88ceukCqv4sRWrdtNY8iLJ1i5LskVJpERRFDdxJwiS2JfZl3v6BxaDJAguGGJmgPfzPHyIubg49wdg8M6Zc8+9x1hrERGR4uXKdwEiIjI7CnIRkSKnIBcRKXIKchGRIqcgFxEpcp58HLSmpsa2t7fn49AiIkVr69atfdba2tO35yXI29vb2bJlSz4OLSJStIwxh6fbrqEVEZEipyAXESlyCnIRkSKnIBcRKXIKchGRIqcgFxEpcgpyEZEipyAXEZkjjmOJJjM4Tm5vH64gFxGZI/F0lq7BGPF0NqftKshFRC6hqb3wgNdNc2UpAa87p8dQkIuIXEJTe+EulyHo9+BymZweQ0EuIpJjc9ELn0pBLiKSY3PRC59KQS4ikmNz0QufKi+3sRURmc8meuFzdrw5O5KIiFwSCnIRkSKnIBcRKXI5GcQxxhwCRoEskLHWduaiXRERObdcjsbfZK3ty2F7IiIFwXEs8XSWgNd9SacRXiwNrYiInMOlukdKruQqyC3wlDFmqzHmvhy1KSJSEOZ6XviFytXQynXW2m5jTB3wtDFmt7X2xak7jAf8fQCtra05OqyISG5NN4wy1/PCL1ROeuTW2u7x/3uAR4Grp9lnk7W201rbWVtbm4vDiojkzMT9UaLJTEEPo0xn1kFujAkaY0ITHwO3ATtm266IyFxxHEtfJMmR/hhAQQ+jTCcX7xXqgUeNMRPt/R9r7U9z0K6IyCU1MYziOJb+aJLqMv8lv8HVpTDrILfWHgDW5qAWEZE5M9EL748maa4opbUqWLDTC8+lcEfvRUQuoXg6S38kVbS98KkU5CKyIDiOZTSRJpHOUh30E/C6aa0uLdpe+FS6IEhE5r2JYZQd3cNsPtDPQCw1Jws+zBX1yEVk3jr9ZObimiBLa8uoKvXlu7ScUpCLyLyUyTh0DcWIJrO0VhX3ycxz0dCKiMxLA7EUO4+PEPS7Cfo982YYZTrqkYvIvDH18vqqUh+dbVVUlfrmbYBPUI9cRIpeJuPQM5JgNJ6evLze43FRFy7B45n/MTf/v0MRmdccx9I1FGPL4QHimWzRXV6fCxpaEZGiNHGTq1gqw2giw+rGMDVB/4LogZ9OQS4iRSWVytI9Eqc84OVwfwzHWporS6kp88/7sfCzWXgvXSJS1I4Nx/jJ9uMMRJMsqS1jWV1oQYc4qEcuIkVgYhgFIBzwsnJRmKqgn1DAm+fKCoOCXEQKXjSZYcexYfxeF8vqQlzdUb3gTmjOREEuIgVr6iX2fq+LporAvL6w52IpyEWkoKRSWQ4PRgCoKSuhZzTJovIAKxrC8/YS+9lSkItIwXAcy76+CI9tOwYu+NiGlsl54Qrws8tZkBtj3MAW4Ji19q5ctSsiC0c8nSXrWO64ooGAz01TeSk+n8bCzyWXPfIvALuAcA7bFJEFYGJueENZCYtry9QDv0A5mUdujGkG7gQezEV7IjL/pVJZDvSOMjiapGs4xrO7ejgRSehk5kXI1QVBfwv8MeCcbQdjzH3GmC3GmC29vb05OqyIFKvDgxG+u/kwrx/pp7LExy2r6lgUDuS7rKI06yA3xtwF9Fhrt860n7V2k7W201rbWVtbO9vDikiRmri4x7EWr8fQEC6hPOijvaZM4+EXKRdj5NcBHzHGfAgoAcLGmO9Ya389B22LyDwx9SZXA9E0DeEAH7+qjUXhgIZSZmnWQW6tvR+4H8AYcyPw7xXiIjJhIsCjqQzdQ3Gw0FxVSnmpj8oyf77Lmxc0j1xELplMxuHoYJRjgzFKPF5aqgOU+ub3smv5kNMgt9a+ALyQyzZFpPg4jmU0MbZaz87uYarK/CytD1AbKlGAXwLqkYtIzkwE+EA0SddQnJ7hJJctKqepspRQiVchfokoyEUkJ1KpLPv6IvSNJuiPpljTXM7yuhDVC3TVnrmkIBeRWYvF0jy79yTv9oxy68p6VjSEFeBzSD9lEZkVx7G8cWyIn759nDK/h5bqIPXlAYX4HFKPXEQuSiqV5chQFGstfh/ce00bKxtClAd8+S5twVGQi8gFm7jd7JM7jlMV9PL+FfW0tgR1MjNPFOQicl4yGYeBWIqqUh/JrEPWsdy6up6qUh/1ujozrxTkInJOmYzD3pMj7DoxwvVLa6kN6XazhURBLiIzSiQybDk6yJGBCJWlfkrGwzvoV3wUCv0mRGRasViat48P0RdNsuXgILesqueK5gpCJd58lyanUZCLyCkmrs589WA//++1w9SF/dy0qpHO1ipKShQZhUi/FRGZFImm+PmBXvpiCcgafrWzhbaaIEuqQ7pXeAFTkIsIjmPpGYrxf7ce5aW9PXTUBrn3qnbWtlTqwp4ioCAXWcAyGYfDg6NsPzrEvpOjvLS3j5tW1nHbFQ0sqylXiBcJBbnIApXJOOzoHuSfXjnIruMjbGit4Dfe28HtqxoIa8GHoqIgF1mAEokMrx7u55V3e0gk0mxc38ytq+tpqwypF16EFOQiC0gkmuK1I/0MRBO8dmCI2pCPT1y7mKvbqjUjpYjN+jdnjCkBXgT84+39i7X2z2bbrojkztBogp/sPMbe4yO8un+ApqpS7lzXzIbWShrLS9ULL3K5eAlOAjdbayPGGC/wsjHmJ9bazTloW0RmqWcoypcf38GLe/pwu2F9WzUfv7qFGxbXqxc+T8z6t2ittUBk/KF3/J+dbbsiMjuxWJqfHz7JP79ylJ/tHaAx7OHDVzaxcV0zHTVh9cLnkZy8HBtj3MBWYCnwd9baV6fZ5z7gPoDW1tZcHFZEzmIkkuTbr+zn+28cZSiaYUN7mM9c38ENSxvUC5+HcvIbtdZmgXXGmArgUWPM5dbaHaftswnYBNDZ2akeu8glkEpl2dM7xGPbjvPU9qP4XW5+ZUMzn35vBy3VId2pcJ7K6UuztXbIGPMCcAew4xy7i0gOjUSSfHfrIZ58+wTD8RRXdtRy+2WNXL+kjtJS3ehqPsvFrJVaID0e4gHgVuCrs65MRM7Lod4h/v65vQyPJthxfJRkFj5yZQu/874l1IdL1QtfAHLRI28Evj0+Tu4C/tla+1gO2hWRGaRSWX5x4Dj/5Yc7ODSYBeDO1VW8f2UDt61qpCJUkucKZa7kYtbKdmB9DmoRkfO0+3gf/+0HO9nRNcpQdqwH9cHlYf7zh6+gobIs3+XJHNPpa5Eiksk4vN3Vzx/8ny0cHXEAWNsY4FPXtXDnqjbKglrBfiFSkIsUib7hGN/ZfIDHt3dzdMShBPjIlVX88e1rqSkvzXd5kkcKcpECNxJJ8vg7R/nB64fZ1pXA74YbFpfzm9e3ccPiRs0LFwW5SCHrGYry5Z/s4Km3+4g70Bx28fGr2/jNaxbrZKZMUpCLFKCh0QSPvd3FT7d38cqhKF7gfe1BPnfbSjY012rZNTmFglykwAyNJvjLJ7bz4zd7iQONIcOvrm/mt29Yrl64TEtBLlIgugdG+ebL+zjUG+PV/UNkgRuXhvntG5bS2VarsXA5Kz0zRArAvpMDfO6hrbzbn8IF3LK8nKuW1HHPumbNSJFzUpCL5NFIJMmPdhzhmy8c4MBQBoCbl5XzF3ev04U9ct4U5CJ54DiWt4/18l8f3cFb3XEyQEelm09c08K9G5ZoLFwuiIJcZI7tPt7HX/14J+8cG6UvOXZ5/fXtIf7yo2tprSnPd3lShBTkInMkk3F4ds9h/v3DOxkdu7qejnLDPVe18WtXL6YqHMhvgVK0FOQic2Db0RP8yXffZM+AM7ntfR2l/OVHN9BcHc5jZTIfKMhFLqFYLM2Pdx7iy4/sZfweVwSAP/pgG/d2LtdNriQnFOQil0hX/wh/8YO3ePrdESb64YvL4G//VSdrmuvzWpvMLwpykRzrHhjlgefe4Zld/XRHx7Y1heBf37yUe9Z0qBcuOacgF8mhvSf6+eyDmzkaGXvsA25aUc6XPnyFZqTIJZOLNTtbgIeABsABNllrvz7bdkWKSc9QlAdf2sX33zhJf3xsW2MQ7v+VVdy2rFWX18sllYtnVwb4I2vtG8aYELDVGPO0tXZnDtoWKXg/2rGHP/jOvsnHFR646bIq/vADq9ULlzmRizU7jwPHxz8eNcbsApoABbnMaz1DUb7+1DYefmNocluZC773e9ewsrEmf4XJgpPT93vGmHbGFmJ+dZrP3QfcB9Da2prLw4rMuS2Hu/k333qTnvgvtzWUwDc+vV4hLnMuZ0FujCkDvg/8W2vtyOmft9ZuAjYBdHZ22lwdV2QudQ+M8qf/8jLPHPjlhT3lbvjSPUv4lSuWacEHyYucBLkxxstYiD9srX0kF22KFJKh0QRffeLnfPfNxCnbf21DBb9/61rdqVDyKhezVgzwj8Aua+3fzL4kkcKy+WAXv/kPb5E6bftXPtHGvesvz0tNIlPlokd+HfAbwNvGmG3j2/6jtfaJHLQtkjcDI3H+9pltPPTawCnbr2+Cv/zk9ZqRIgUjF7NWXgZMDmoRKRi/OHCU39m0nehp2//7ve18fN1lealJ5Gx0lYLIFF39I/zu37/EjtMS/J41ZXzxQ53UVQTzU5jIDBTkIuP+/sVX+O9PDJyx/cFPr+LWFYvzUJHI+VGQy4K3s7uXX/kfr51xMrPVDQ/+/rUsb6jOS10i50tBLgva1376LN94IXHG9r/51GLuWbMqDxWJXDgFuSxIL+47zG8+uOOM7RXAv/zhe1haXzXnNYlcLAW5LDh3ffFxzoxw+NonO/jo2tVzXo/IbCnIZd5zHMtwPMWNf/EMw9N8Pgg8+R9u0NqZUrQU5DKvjUSS/P1TW3jgtaFpP/+56/380R03k8w6OI7F5dIlEVJ8FOQyLw2NJvjelv185clDZ93nnz6znnBJgL09o1gL7TVBgn79SUjx0bNW5pWRSJLPbXqGn/fMvN+9V9bwwPMHuPuqRfjcPtY0VRDw6s6FUpwU5DJvdPWP8L6/fglnhn2+dFcTXleAbzy9j744NJb7+fytq2iuKNWwihQtBbkUJcexxNNZekciPPzKUXyuAf7u5ciMX/PVe1ZzbDDOi3uOk0rDiho/92xoprUqqBCXoqYgl6IUT2fpGozx3VcO8a1Xj82471Wl8GsfuZw3Dg1xsDfC8voAt1/RyM2r61hSXa4Ql6KnIJei5Lbw/7a+xbdePWMxqlOsqYZfvWElo4k0jnVoLPdz95WtXNVeg8fjmqNqRS4tBbkUFcexPLFzP5//zp4Z9/MCn7qyknd607yw6wTN1SEawwGu7qhhbXOlQlzmFQW5FI2BkThf+v4L/GTPTKcz4Yt3NrK6YRE/3HKE/ScHcTeGuKYqwPVL6+ioCc1ZiE+M40/Mhpn4WEM5kmu5WrPzm8BdQI+1VmtfSU5lMg6vd53gkw+8OeN+jcD/+t1r2HpkkG/+7AAtVT4+fX0HK1tChH0BGspL5yTEHccyGk/TF03QH03RUlGKcRkGo2laq0s1V11yLlfPqG8B3wAeylF7sgA5jiWazAAQ8LqJp7NkHIev/OAZ/u/bM3/tTa3w2ds6yTqw+/gwkWSG5qpq3rO0DoOhPlwyGaBTjxP0e3AcS+9oAmugrqzkosI+k3HojyaxWYe9fVEymSzdI3E8xkUilaWsxEtzZems5qpP9PD9bhfJrKPevUzKSZBba180xrTnoi1ZmBzH0hdJcqQ/SjLjUBP08vSOQ3ztuZlnpAC0+OC6K5bQO5Iikc6QyVo+sm4RGzqqONQb57KmMDVl/snQG42n2XpkkHCJl2V1ZRwbjvPW4QFcbhc3raynLlxywfUfG4zyyBvdlJW42H5kiOuX19BRHeTYUIKmygB1oQBBv+eig3fi59MfSVFd5mMwlqK5Ur17GaNngVwyU8eIzxZgE/tkMg5dAzEqg152nhzioZ9t58l9Z94nfKoPNMMnblnL0YEETRUBhmJp1jVXsrQ2RBbLovIgtWWlVJX6AIgmM/jdLvpjSQZjSVoqSzg6FGP/yQiXt1RQXeaf3Pd8v7/RRJpYKsP+/gh7T45y+xU1LG0IcbA3QlWZn6s7qmmumN2QzkSI944mqQ2N1Rj0e3QlqkyasyA3xtwH3AfQ2to6V4eVPIomM+zvibCkroxQwDttsMfTWY70x/B7Damsw7GRKL//8MzjKKv8cM36Rq5d2sCicID9J+Mc6Y+xvqWKJbUhklmH/T0RPC4X5QHP2AtF0mF/b4TKoJeRaJr26jJKvG729AyzrCHE8rrzPwk6MQbeG01wsC9K33jA3r2hCWMcDvX1c9Oqejrbqikv9c2qFx5PZ3EcS3907BgT7yw060ammrMgt9ZuAjYBdHZ22rk6ruTZlAybuIhnYqw4msyQyTpYk+W1Q4M8/9ounjw6c3P/+qoKlrQ24XO7GIwmiSRTJNMZvB5D0smStpag38OyhhB+t4sTI3H2nYgQKnXTM5zg+GiMkNfLUDJNY0UJV7VVUXIBY82ZjEPXUIxDfaPsORmhpSrAqsYQxhiSGYeM4/Crna1c2VRBaan3on9sjmPpGU3QPZhgSV2Q1qqgxsTlrDS0Ijlzeo876PewrD40OQQQ8LonQzyezvLuyQiDsQR7Twzy1acOztj2ygrD3e9dwoamajYfGqDM76a5KsBbR4bYc2KY+ooAN/jGev0wdhJzNJ7mnWMj7D4xjN/twuNxkUw5hAJprmyroq6shHg6y/7eCEtqx941zCQWS/PSgV66BuM0hH0YwO9xcbQ/RjJtubKjktqykoseC5/684uns3QPxUlmsriM0Vi4zChX0w+/C9wI1BhjuoA/s9b+Yy7aluIxtcc9EWYTs0KiyQwBr3sykAJeN+Ggi28//w4/3JOcsd3fu6GedR2NeN1uyoJeKoJe6kM+9hwfYX/PKGuaK7lueS1NVaV0DcUoD3gxgJO1+DywpC6IwTAQS3JlWzX1ZSVUB/1j9yC3Fs7x/tBxLMOxFM/tOcEjW4/SXhUkngzg8bg52BPB7XZzVUc1rZXBWQ15RJOZyReVoN/DsroQgEJczilXs1Y+mYt2pLhN7XHD2DDEQCyF3+PixEiC5spSvMZwcGCUbd09/Mk/vztje2HgixtXsqG9muFYlmgqTWXAy8r6MO+eHGZfb4Q1LZXcuWYRjeWlRJMZXMby8rs9jCYztFUFKfG4aa8Jsv9EjPUt1ZNj4dFkhq7BGIvKAyxrCE174jCTcTg+GOPgUITdx0fZ3zNKdamPqxdXkcg4JFOWdW0V1IZKqL3IaYtnGH9RcbnMOd8hiEzQS73kzNQe+HA0xdGhGIf7o3S2VdEQLmEomuToYIwvP/Iab/fP3FZno4ff/sBKonGDyxiqQ16ciEPXQJyDfVEqgn4+dXU7i6vLqAz5GY6n+MW+PkYTSfaeHCHs9zFY4mb38Qh3r2vmmiXVkzNSekYSVEyZ1322YZBjQ1EefGk/h3ojlPjcXNFcTmNbAGMMdeUB2quCtMyyFz7VxNi+ZqPIhVKQS05NnAw83B/h6ECcFQ1hgh4333lzN1/58TnOZALLy+D9a1twu90sqa4gay0v7D1B90CM9YuryGRcrF4UHhu+8XkYjCf5xbs9HB2KsOdEhBX1IWrKAuw9OYrX7eKmlQ0srQ1RUjL2VO8ZSfDGkUGubK2cdr54IpHh9SO9vN01jMdn2X1skFDQS2PYT9DvYV9PjJoyH1c0V9JWXZbTk48TL4QiF0rPmgXqfOZ4X4hUKsvhgQi90SSH+qN0VAXx+1w8+ta7fGzzObrf467vCHJtRw0+n5c1LVW4sHzv1QM8tv04sQRE0w7/6rqlLK0pI5rOsP3oIE+908Wzu/uw6QTloRBtdSX0jSS5eWU961sraSwfWzBiYoy+qtTHla2V084XP9I3zF/8aDsv7x0hDiytdNNcXcZQNMOBnj42hgJ84LI6slmoD5ac189tYnipqtSnKYNyySjIF6jTT0zORiqV5eUDPTzxVjelPjdL68p49cBJHnvjCPuGzv31a2ogHA5yqD9JU0WC9YuDvNXVy/M7etlyNEIGuKo5yL1XtbK0powTkThP7ejmrUODHO4bZWA0iQWODI9y3dIaPnp1G4urgpO98Inx8InvdbqeeFf/CP/moS3s6PnlRUhjtwiwNJYH+MDljWxc00TCsTy/u5f68gDt5zG9sD+aZPOBfq7uqCIc8F3QC2euX2xl/lKQL1Cnn5i8GI5j6Y3EeXlPL1sPneTYYAKfSfDQORZ6OF1vFI5Ho5T5PVQHDc9sP8pz747iABUe2NBRzh/cvoLF1RWMptI8uaObF3adJJnNUBtyc2QYhpOworaU2y6rY/Wi8lNmysz0vY5EkvxsXw+PbT00GeIlQH0prG6poqk6yLK6MLdd1khl0E8qleWWVXUsCgem/V5O74H73C7CJWPnDS70hTOXL7Yyv+nZsUDNZjx2Yjre4YEIP9vTw2v7+mgoN7xyaPSi2ktn4ZrFldRXlvDc7kF2940FamsINm5o5q51rcQTDi/vO0nY52PnsQEO9o4QjUJ5EDJpuGl5JX94+3JW1lcBZ4bgdN+r41he3NfL15/ZSzabZnV9gJV1AS5vqyaWyHJtRy0NVQFCPu/kDBKfz017TdlZv5eBWIothwdY3RieXEKuxOeh1O+hIui/oBfOXLzYysKgIJcLFk1m2Hygj837e3hjVzdvz7xIz4wM0FzjZyiRoe/IMPFkgkWlkHbgulUNrGurJ5m2PLLtGOlMls7FVbhx0ReBDHDz0mrWNlVz15omaspLJ9s9WwhmMg7HBqJ0jcTwe9xYt8MHVtXhd1tuXNVEaYmHgUiaurD/ouaFV5X6WN0YJprIEk+PXcwT8LjxuFwX/MKpk59yvvQskXOaGKt1Wzg2EmMkkeL1g118+9W+i27TBzSEoT5cwmVNFbx5ZIhU2qG6IshlTeWsa67kssZy4lmHeDJDOODGa9wMR5J86MpGUllLVZmP333/Mhoqz+whnx6CEze4Otwf4fG3utnVPUJLVZD3Lq/ho1e1MRhL01hRSonHTSzhnNe88NNvhwuQzDo0V5RO3mYW0JRCueSMtXN/25POzk67ZcuWOT+uXJyJk4XpTJZ/2ryfJ7ac5OIGUcYY4O7Lg2SybiwustZQVurBZVxs6KiiKlhCfzRJ0Oth25Eh/G43NeU+3jw8xO7jI3zu5mW8d0ntGTNBUqksh/sjpHHwGxdDyQylXhcVJT7e7R9ld/cIjnXIZg3tNaW01QQpD/go83lJZh0Go2maKwO4XOacJxgn7mG+r2eUgM/DioYwgMa05ZIyxmy11naevl3PNplRIpFh97EhdvcN8PKObn7ybnTWbVrgkR2/bMc1vq0MePHtk4ykxrataC6hvS5Mc3mQJXUh0hnLhvZKblxaS0Xo1Jknh3qH+NpPd3P4WD/RDHjc4HW7WVThI+3yMzQaJ1Tq59ffu5jLmypwAe/2Rjg5kqSy1M+yuhChEu95BfjEHQmPDcYBF4sqApM97onhHM04kbmkIJdppVJZnt13hK89spP9kUt7rIkVOEeB0dQvt+/pTpDFw8rGKlbXlbOivnza+diZjMP/fGYPj71z6nz1Ck+WjMlwcDBOU9jFx9/TwQ1L6khj+f4bR3llXx83rqzl8kUV532jq4mTqIvKAyyr/+W9UCa+dqInfvqUR5FLSc8wAZi8rH4gnqSsxMPWw/38p+/vZGDmtR0umRo/fPqWNiq9Qa5bUkNlyH/WoB2IpQj6PFR4weUamwVjLFy3vIr26gAv7hvixuWV3L6qidJSLyeH47iN5T1Lq7l+WS21ofO7uAdOPYnqcpmz9rw140TmkoJ8gUulsrx06Bjf/0UXgaCLsK+ElqoyfvFu75yH+MoKuG51A36Pl5ryEu64rJny87iIpqrUx6dvWMb1qxoIeMbWs4ynspQHfbRVBrljbYZFoQCVIT8A1UE/t1/eRMDjJhTwXtDQx+knUc8211szTmQu6Zm2wB0divLVH+5lb3+SUjd8/KpmugZ7eGbv4JzVsLbWzY1rWvnI2iY6asI4jr2gy9o9HhcddWE66sKT207pKVebM/ZvKJ/+gp4LpZ63FAIF+QI1MR1vOJ6ivdxwuB+WVMC3NnfNaR1NZS4+d/tqrlvSODnW7HKZi1oAeaq56hGr5y2FQM/ABSqezrKje5g3D/cQCJaxoibN9r7sRbXVXApOBiIpOP3aoBqgvhreu6qGaMLg9RgWVQXJprM4eFjXWkFnS+3kfVFE5MLpr2eBSSQyHBiI0l5RyuKaID/b6fDC3j6GZ16k5xTf/sx64ilYUhPkxGiSAz2j+L1uYqkM71teR3tV2djqO46lezh+yvJumo4nknu5WurtDuDrgBt40Fr7lVy0K7kTiabY1j1MwGt46p0eNq5bhHFn+fmOI+cd4j/9wjWsbKyZHH/2u13UhQMsrS3DZ1yksdQE/Xg8Y+tjOo49ZYaHhiBELo1Z/2UZY9zA3wEfALqA140xP7LW7pxt25IbqVSWJ3ed4PHtx/n197Sycd0i/B7DV57YyzvneZ+UQ1+5c/LjqaFc7vFRPs29vU/fT0QunVz8lV0N7LPWHgAwxnwP2AgoyAtE90icnuEYd65p5OrWaozHxSv7+4jHz32V5tQAF5HClIslS5qAqWt4dY1vO4Ux5j5jzBZjzJbe3t4cHFZmksk49IwkyGQcFoUDfHBtMx9e00RZcGxe9oaWSl4+cvYxlfUehbhIschFj3y6M1dn3InLWrsJ2ARjN83KwXFlBgOx1ClrU069h/bO473c9T9fP+vXvvPnt2tetEgRyUWQdwEtUx43A905aFdm4WxrU37szx/n9fj0X6MeuEhxysXQyuvAMmNMhzHGB9wL/CgH7coFSKWyHOqLkEqNzQX3eFzUhX95T+0DPYO0f1EhLjIfzbpHbq3NGGM+DzzJ2PTDb1pr35l1ZXJBukfiPLurh1tW1Z2xFNkDL7/KVx6bfhEIBbhI8cvJ3DBr7RPAE7loSy7OonDgjEWBI9EUf/roszyywzlj/89eA//5boW4yHygSb7zxOmLAv/Dz1/jr3585uygm+vgm/9OAS4ynyjI55lEIsN339x7RohXAd/6fCdrmuvzU5iIXDIK8nlkaDTBf3l8Kz/YNnTK9gd+awV3rFqan6JE5JJTkM8DsViaH+44yDdfOMS7A+nJ7R9cFuD+jRtorSnPY3UicqkpyIvcSCTJQ5sP8sBz+4k4UOEHr4E///hyPrR6Wb7LE5E5oCAvUpmMw+HBUR55vYtnd56gpaYEv9tw/4dXclV7o24VK7KAKMiL0JG+Yf73i4fIpuO8dSzC+1fWc8tlDaxprNQCDSILkP7qi0jPUJQfbDvGG4f7eW73ABvX1/OF21ZzbXs14TJ/vssTkTxRkBeJSDTF15/dyw/f6Ka9wsWH1zbw+zcv1YlMEVGQF7pYLM2LB0/w0zePsa8vwtqWCj7+3mZuX96kYRQRARTkBSuRyPDivuP8y5Zj7D02QPeo5c41dfzBLStoqS6bvBmWiIiCvAD1Dcf4hxf38YOtR+lNwI1Ly9l4ZQWfuKaNRVWhfJcnIgVGQV5AHMfSPRDhy49t59ndQwTccPPyKv70w6tor63Id3kiUqAU5AXCcSwnR+I8/OphfrF/CJcLNm5o4I9vv4Ky4PSLG4uIgIK8YMTTWQ70RqkIefmd97cRLPGxcU2LQlxEzklBnkexWJp3To7QXllKZZmfy5vKWVpXRnXQr5OZInLelBZ5tKtnlIc3H+Kl/X0ksw7lpT7qywMKcRG5ILNKDGPMx4wx7xhjHGNMZ66Kms8SiQzbDg7wxpF+OqoC/Nq17dywpEar1ovIRZvt0MoO4B7gH3JQy7w2NJrg+d09RJ00r+8bwONx89kblnBVR3W+SxORIjerILfW7gIwRnfam8mJwQh/9cQuth0ZoKM2zMa1TbTVB1lcFcx3aSIyD8zZyU5jzH3AfQCtra1zddi8SiQybDs6wEObD/L8O33csrqGT13fzpWLqnV5vYjkzDnTxBjzDNAwzae+ZK394fkeyFq7CdgE0NnZac+7wiJ2YCDK/37pAPtODnPbFXXc/6FVNFSWnfsLRUQuwDmD3Fp761wUMp84jiWeztJeUcrv3LCYY6NxbllWT0WoJN+licg8pPf3l0A8naVrMEZzZSnXLqvLdzkiMs/Ndvrh3caYLuA9wOPGmCdzU1ZxC3jdNFeWakqhiMyJ2c5aeRR4NEe1zBsulyHo15sdEZkbuoRQRKTIKchFRIqcglxEpMgpyM+T41iiyQyOsyCmwItIEVGQn6eJKYXxdDbfpYiInEJBfp40pVBECpXmyJ0nTSkUkUKlHrmISJFTkIuIFDkFuYhIkVOQi4gUOQW5iEiRW1BBrot6RGQ+WlBBrot6RGQ+WlBBrot6RGQ+WlBXuOiiHhGZjxZUj1xEZD6a7VJvf22M2W2M2W6MedQYU5Gjui6aTmiKyEIz2x7508Dl1to1wF7g/tmXNDs6oSkiC82sgtxa+5S1NjP+cDPQPPuSZkcnNEVkocnlGPlngJ+c7ZPGmPuMMVuMMVt6e3tzeNhTTZzQdLnMJTuGiEghOWeQG2OeMcbsmObfxin7fAnIAA+frR1r7SZrbae1trO2tnZWRWscXETkl845F89ae+tMnzfG/BZwF3CLtXZOknViHLy5slTTCUVkwZtVChpj7gD+BHi/tTaWm5LOznEs8XQWv9ulcXARkXGzHSP/BhACnjbGbDPGPJCDms5qoieezDoaBxcRGTerHrm1dmmuCjkfmpEiInKmohpg1iX2IiJn0iX6IiJFTkEuIlLkFOQiIkVOQS4iUuQU5CIiRU5BLiJS5BTkIiJFzszR7VFOPagxvcDhHDdbA/TluM1cUn0Xr5BrA9U3G4VcGxRefW3W2jPuOpiXIL8UjDFbrLWd+a7jbFTfxSvk2kD1zUYh1waFX98EDa2IiBQ5BbmISJGbT0G+Kd8FnIPqu3iFXBuovtko5Nqg8OsD5tEYuYjIQjWfeuQiIguSglxEpMjNqyA3xvy1MWa3MWa7MeZRY0xFvmuayhjzMWPMO8YYxxhTEFOajDF3GGP2GGP2GWO+mO96pjLGfNMY02OM2ZHvWqZjjGkxxjxvjNk1/nv9Qr5rmmCMKTHGvGaMeWu8tj/Pd03TMca4jTFvGmMey3ctpzPGHDLGvD2++tmWfNczk3kV5MDTwOXW2jXAXuD+PNdzuh3APcCL+S4Exv6IgL8DPgisBj5pjFmd36pO8S3gjnwXMYMM8EfW2lXAtcDvFdDPLwncbK1dC6wD7jDGXJvfkqb1BWBXvouYwU3W2nWFPpd8XgW5tfYpa21m/OFmoDmf9ZzOWrvLWrsn33VMcTWwz1p7wFqbAr4HbMxzTZOstS8CA/mu42ystcettW+MfzzKWCA15beqMXZMZPyhd/xfQc1sMMY0A3cCD+a7lmI3r4L8NJ8BfpLvIgpcE3B0yuMuCiSIio0xph1YD7ya51ImjQ9bbAN6gKettQVT27i/Bf4YcPJcx9lY4CljzFZjzH35LmYmRbcApjHmGaBhmk99yVr7w/F9vsTY296H57K28WOfs74CYqbZVlC9tmJgjCkDvg/8W2vtSL7rmWCtzQLrxs8VPWqMudxaWxDnG4wxdwE91tqtxpgb81zO2Vxnre02xtQBTxtjdo+/Syw4RRfk1tpbZ/q8Mea3gLuAW2weJsmfq74C0wW0THncDHTnqZaiZIzxMhbiD1trH8l3PdOx1g4ZY15g7HxDQQQ5cB3wEWPMh4ASIGyM+Y619tfzXNcka233+P89xphHGRuKLMggn1dDK8aYO4A/AT5irY3lu54i8DqwzBjTYYzxAfcCP8pzTUXDGGOAfwR2WWv/Jt/1TGWMqZ2YtWWMCQC3ArvzWtQU1tr7rbXN1tp2xp53zxVSiBtjgsaY0MTHwG0UzovgGeZVkAPfAEKMvQ3aZox5IN8FTWWMudsY0wW8B3jcGPNkPusZPzH8eeBJxk7U/bO19p181jSVMea7wCvACmNMlzHms/mu6TTXAb8B3Dz+fNs23sMsBI3A88aY7Yy9YD9trS24KX4FrB542RjzFvAa8Li19qd5rumsdIm+iEiRm289chGRBUdBLiJS5BTkIiJFTkEuIlLkFOQiIkVOQS4iUuQU5CIiRe7/A4JcVhLWP4FuAAAAAElFTkSuQmCC","text/plain":["
"]},"metadata":{"needs_background":"light"},"output_type":"display_data"}],"source":["plt.scatter(frc_api['fuel_cost_per_mmbtu'].replace(0, np.nan).transform(np.log10),\n","frc_bulk['fuel_cost_per_mmbtu'].replace(0, np.nan).transform(np.log10),\n","s=1, alpha=0.1)"]}],"metadata":{"kernelspec":{"display_name":"Python 3.10.5 ('pudl-dev')","language":"python","name":"python3"},"language_info":{"codemirror_mode":{"name":"ipython","version":3},"file_extension":".py","mimetype":"text/x-python","name":"python","nbconvert_exporter":"python","pygments_lexer":"ipython3","version":"3.10.5"},"orig_nbformat":4,"vscode":{"interpreter":{"hash":"adf7f0b18919f80aa48732f306897016c591ffeed72c8be7c2b511e7f25e6b58"}}},"nbformat":4,"nbformat_minor":2} diff --git a/notebooks/work-in-progress/state-demand.ipynb b/notebooks/work-in-progress/state-demand.ipynb index 36ef4eced3..d319093948 100644 --- a/notebooks/work-in-progress/state-demand.ipynb +++ b/notebooks/work-in-progress/state-demand.ipynb @@ -112,14 +112,14 @@ "#HARVEST_TOKEN = os.environ[\"HARVEST_TOKEN\"]\n", "#HARVEST_ACCOUNT_ID = os.environ[\"HARVEST_ACCOUNT_ID\"]\n", "\n", - "pudl_settings = pudl.workspace.setup.get_defaults()\n", - "ferc1_engine = sa.create_engine(pudl_settings['ferc1_db'])\n", - "pudl_engine = sa.create_engine(pudl_settings['pudl_db'])\n", - "pudl_out = pudl.output.pudltabl.PudlTabl(pudl_engine=pudl_engine)\n", - "pudl_settings" + "from pudl.workspace.setup import PudlPaths\n", + "ferc1_engine = sa.create_engine(PudlPaths().sqlite_db(\"ferc1\"))\n", + "pudl_engine = sa.create_engine(PudlPaths().pudl_db)\n", + "pudl_out = pudl.output.pudltabl.PudlTabl(pudl_engine=pudl_engine)" ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -187,6 +187,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -226,6 +227,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -265,6 +267,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -321,6 +324,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -527,7 +531,7 @@ "source": [ "%%time\n", "import pathlib\n", - "local_dir = pathlib.Path(pudl_settings['data_dir']) / 'local'\n", + "local_dir = PudlPaths().data_dir / 'local'\n", "ventyx_path = local_dir / 'ventyx/state_level_load_2007_2018.csv'\n", "base_dir = local_dir / 'state-demand'\n", "base_dir.mkdir(parents=True, exist_ok=True)\n", diff --git a/notebooks/work-in-progress/test-validation-tests.ipynb b/notebooks/work-in-progress/test-validation-tests.ipynb index 79c68e4ebc..1d537f2017 100644 --- a/notebooks/work-in-progress/test-validation-tests.ipynb +++ b/notebooks/work-in-progress/test-validation-tests.ipynb @@ -1,6 +1,7 @@ { "cells": [ { + "attachments": {}, "cell_type": "markdown", "id": "advised-nothing", "metadata": {}, @@ -86,8 +87,8 @@ "outputs": [], "source": [ "# Establish connection to pudl database\n", - "pudl_settings = pudl.workspace.setup.get_defaults()\n", - "pudl_engine = sa.create_engine(pudl_settings['pudl_db'])" + "from pudl.workspace.setup import PudlPaths\n", + "pudl_engine = sa.create_engine(PudlPaths().pudl_db)" ] }, { @@ -174,6 +175,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "id": "australian-frederick", "metadata": {}, @@ -188,7 +190,7 @@ "metadata": {}, "outputs": [], "source": [ - "pudl_engine = sa.create_engine(pudl_settings[\"pudl_db\"])" + "pudl_engine = sa.create_engine(PudlPaths().pudl_db)" ] }, { diff --git a/notebooks/work-in-progress/transform_xbrl.ipynb b/notebooks/work-in-progress/transform_xbrl.ipynb index 56c78fb20a..805a36096a 100644 --- a/notebooks/work-in-progress/transform_xbrl.ipynb +++ b/notebooks/work-in-progress/transform_xbrl.ipynb @@ -23,7 +23,7 @@ "import pudl\n", "\n", "from pudl.settings import (Ferc1Settings, Ferc1DbfSettings, Ferc1XbrlSettings)\n", - "pudl_settings = pudl.workspace.setup.get_defaults()\n", + "from pudl.workspace.setup import PudlPaths\n", "from pudl.transform.ferc1 import *\n", "from typing import Literal, Tuple" ] diff --git a/src/pudl/analysis/plant_parts_eia.py b/src/pudl/analysis/plant_parts_eia.py index 440cc40519..571c50e0cc 100644 --- a/src/pudl/analysis/plant_parts_eia.py +++ b/src/pudl/analysis/plant_parts_eia.py @@ -159,7 +159,8 @@ .. code-block:: python import pudl - pudl_engine = sa.create_engine(pudl.workspace.setup.get_defaults()['pudl_db']) + from pudl.workspace.setup import PudlPaths + pudl_engine = sa.create_engine(PudlPaths().pudl_db) pudl_out = pudl.output.pudltabl.PudlTabl(pudl_engine,freq='AS') Then make the table via pudl_out: diff --git a/src/pudl/analysis/service_territory.py b/src/pudl/analysis/service_territory.py index 2481a0ca62..322868ee29 100644 --- a/src/pudl/analysis/service_territory.py +++ b/src/pudl/analysis/service_territory.py @@ -18,6 +18,7 @@ from matplotlib import pyplot as plt import pudl +from pudl.workspace.setup import PudlPaths logger = pudl.logging_helpers.get_logger(__name__) @@ -578,9 +579,7 @@ def main(): logfile=args.logfile, loglevel=args.loglevel ) - pudl_settings = pudl.workspace.setup.get_defaults() - pudl_engine = sa.create_engine(pudl_settings["pudl_db"]) - + pudl_engine = sa.create_engine(PudlPaths().pudl_db) # Load the US Census DP1 county data: county_gdf = pudl.etl.defs.load_asset_value(AssetKey("county_censusdp1")) diff --git a/src/pudl/analysis/state_demand.py b/src/pudl/analysis/state_demand.py index cf973bfd97..ef633a7d9b 100644 --- a/src/pudl/analysis/state_demand.py +++ b/src/pudl/analysis/state_demand.py @@ -22,7 +22,6 @@ """ import argparse import datetime -import pathlib import sys from collections.abc import Iterable from typing import Any @@ -830,8 +829,6 @@ def main(): # --- Connect to PUDL database --- # - pudl_settings = pudl.workspace.setup.get_defaults() - # --- Read in inputs from PUDL + dagster cache --- # prediction = pudl.etl.defs.load_asset_value( AssetKey("predicted_state_hourly_demand") @@ -839,7 +836,7 @@ def main(): # --- Export results --- # - local_dir = pathlib.Path(pudl_settings["data_dir"]) / "local" + local_dir = pudl.workspace.setup.PudlPaths().data_dir / "local" ventyx_path = local_dir / "ventyx/state_level_load_2007_2018.csv" base_dir = local_dir / "state-demand" base_dir.mkdir(parents=True, exist_ok=True) diff --git a/src/pudl/cli/etl.py b/src/pudl/cli/etl.py index 4daf490b29..620ebbc96e 100644 --- a/src/pudl/cli/etl.py +++ b/src/pudl/cli/etl.py @@ -27,6 +27,7 @@ import pudl from pudl.settings import EtlSettings +from pudl.workspace.setup import PudlPaths logger = pudl.logging_helpers.get_logger(__name__) @@ -116,9 +117,6 @@ def main(): etl_settings = EtlSettings.from_yaml(args.settings_file) - # Set PUDL_INPUT/PUDL_OUTPUT env vars from .pudl.yml if not set already! - pudl.workspace.setup.get_defaults() - dataset_settings_config = etl_settings.datasets.dict() process_epacems = True if etl_settings.datasets.epacems is None: @@ -167,7 +165,7 @@ def main(): logger.info(f"Publishing outputs to {output_path}") fs, _, _ = fsspec.get_fs_token_paths(output_path) fs.put( - etl_settings.pudl_out, + PudlPaths().output_dir, output_path, recursive=True, ) diff --git a/src/pudl/extract/xbrl.py b/src/pudl/extract/xbrl.py index dbcfbd5178..2959daaef6 100644 --- a/src/pudl/extract/xbrl.py +++ b/src/pudl/extract/xbrl.py @@ -13,6 +13,7 @@ from pudl.helpers import EnvVar from pudl.settings import FercGenericXbrlToSqliteSettings, XbrlFormNumber from pudl.workspace.datastore import Datastore +from pudl.workspace.setup import PudlPaths logger = pudl.logging_helpers.get_logger(__name__) @@ -70,24 +71,21 @@ def get_filings(self, year: int, form: XbrlFormNumber) -> list[InstanceBuilder]: return filings -def _get_sqlite_engine( - form_number: int, output_path: Path, clobber: bool -) -> sa.engine.Engine: +def _get_sqlite_engine(form_number: int, clobber: bool) -> sa.engine.Engine: """Create SQLite engine for specified form and drop tables. Args: form_number: FERC form number. - output_path: path to PUDL outputs. clobber: Flag indicating whether or not to drop tables. """ # Read in the structure of the DB, if it exists logger.info( f"Dropping the old FERC Form {form_number} XBRL derived SQLite DB if it exists." ) - db_path = output_path / f"ferc{form_number}_xbrl.sqlite" + db_path = PudlPaths().sqlite_db(f"ferc{form_number}_xbrl") logger.info(f"Connecting to SQLite at {db_path}...") - sqlite_engine = sa.create_engine(f"sqlite:///{db_path}") + sqlite_engine = sa.create_engine(db_path) logger.info(f"Connected to SQLite at {db_path}!") try: # So that we can wipe it out @@ -146,7 +144,7 @@ def xbrl2sqlite(context) -> None: logger.info(f"Dataset ferc{form}_xbrl is disabled, skipping") continue - sqlite_engine = _get_sqlite_engine(form.value, output_path, clobber) + sqlite_engine = _get_sqlite_engine(form.value, clobber) convert_form( settings, diff --git a/src/pudl/ferc_to_sqlite/cli.py b/src/pudl/ferc_to_sqlite/cli.py index 220381b514..f7cbf9af5e 100755 --- a/src/pudl/ferc_to_sqlite/cli.py +++ b/src/pudl/ferc_to_sqlite/cli.py @@ -139,9 +139,6 @@ def main(): # noqa: C901 etl_settings = EtlSettings.from_yaml(args.settings_file) - # Set PUDL_INPUT/PUDL_OUTPUT env vars from .pudl.yml if not set already! - pudl.workspace.setup.get_defaults() - ferc_to_sqlite_reconstructable_job = build_reconstructable_job( "pudl.ferc_to_sqlite.cli", "ferc_to_sqlite_job_factory", diff --git a/src/pudl/helpers.py b/src/pudl/helpers.py index f0429f1a09..5aababfdc6 100644 --- a/src/pudl/helpers.py +++ b/src/pudl/helpers.py @@ -29,7 +29,6 @@ import pudl.logging_helpers from pudl.metadata.fields import get_pudl_dtypes -from pudl.workspace.setup import get_defaults sum_na = partial(pd.Series.sum, skipna=False) """A sum function that returns NA if the Series includes any NA values. @@ -1597,14 +1596,12 @@ def post_process(self, value: str) -> str: PostProcessingError: if the value is not specified in the env var or config. """ if value is None: - try: - value = os.environ.get(self.env_var) - if value is None: - value = get_defaults()[self.env_var] - except KeyError: - raise PostProcessingError( - f"Config value could not be found. Set the {self.env_var} environment variable or specify a value in dagster config." - ) + value = os.environ.get(self.env_var) + if value is None: + raise PostProcessingError( + f"Config value could not be found. Set the {self.env_var}" + " environment variable or specify a value in dagster config." + ) return value diff --git a/src/pudl/io_managers.py b/src/pudl/io_managers.py index 85a5ed0224..6e0cba18ed 100644 --- a/src/pudl/io_managers.py +++ b/src/pudl/io_managers.py @@ -749,9 +749,7 @@ def load_input(self, context: InputContext) -> pd.DataFrame: @io_manager( config_schema={ "pudl_output_path": Field( - EnvVar( - env_var="PUDL_OUTPUT", - ), + EnvVar("PUDL_OUTPUT"), description="Path of directory to store the database in.", default_value=None, ), @@ -796,9 +794,7 @@ def load_from_path(self, context: InputContext, path: UPath) -> dd.DataFrame: @io_manager( config_schema={ "base_path": Field( - EnvVar( - env_var="PUDL_OUTPUT", - ), + EnvVar("PUDL_OUTPUT"), is_required=False, default_value=None, ) diff --git a/src/pudl/metadata/classes.py b/src/pudl/metadata/classes.py index 9d8548817c..d8eaa02b54 100644 --- a/src/pudl/metadata/classes.py +++ b/src/pudl/metadata/classes.py @@ -43,6 +43,7 @@ from pudl.metadata.resources import FOREIGN_KEYS, RESOURCE_METADATA, eia861 from pudl.metadata.sources import SOURCES from pudl.workspace.datastore import Datastore +from pudl.workspace.setup import PudlPaths logger = pudl.logging_helpers.get_logger(__name__) @@ -947,14 +948,9 @@ def get_temporal_coverage(self, partitions: dict = None) -> str: def add_datastore_metadata(self) -> None: """Get source file metadata from the datastore.""" - pudl_settings = pudl.workspace.setup.get_defaults() - if pudl_settings["pudl_in"] is None: - local_cache_path = None - else: - local_cache_path = pudl_settings["data_dir"] dp_desc = Datastore( sandbox=False, - local_cache_path=local_cache_path, + local_cache_path=PudlPaths().data_dir, gcs_cache_path="gs://zenodo-cache.catalyst.coop", ).get_datapackage_descriptor(self.name) partitions = dp_desc.get_partitions() diff --git a/src/pudl/output/epacems.py b/src/pudl/output/epacems.py index 9f7f81fd31..6d509412ce 100644 --- a/src/pudl/output/epacems.py +++ b/src/pudl/output/epacems.py @@ -5,8 +5,8 @@ import dask.dataframe as dd -import pudl from pudl.settings import EpaCemsSettings +from pudl.workspace.setup import PudlPaths def year_state_filter( @@ -133,8 +133,7 @@ def epacems( columns = list(columns) if epacems_path is None: - pudl_settings = pudl.workspace.setup.get_defaults() - epacems_path = Path(pudl_settings["pudl_out"]) / "epacems" + epacems_path = PudlPaths().output_dir / "epacems" epacems = dd.read_parquet( epacems_path, diff --git a/src/pudl/settings.py b/src/pudl/settings.py index 0d54046760..84336d2dc3 100644 --- a/src/pudl/settings.py +++ b/src/pudl/settings.py @@ -220,10 +220,10 @@ def check_eia860m_date(cls, eia860m: bool) -> bool: # noqa: N805 expected_year = max(cls.data_source.working_partitions["years"]) + 1 if eia860m and (eia860m_year != expected_year): raise AssertionError( - """Attempting to integrate an eia860m year """ - f"""({eia860m_year}) from {cls.eia860m_date} not immediately following """ - f"""the eia860 years: {cls.data_source.working_partitions["years"]}. """ - """Consider switching eia860m parameter to False.""" + f"Attempting to integrate an eia860m year " + f"({eia860m_year}) from {cls.eia860m_date} not immediately following" + f"the eia860 years: {cls.data_source.working_partitions['years']}. " + f"Consider switching eia860m parameter to False." ) return eia860m @@ -355,10 +355,10 @@ def make_datasources_table(self, ds: Datastore) -> pd.DataFrame: * the ETL settings (for partitions that are used in the ETL) * the DataSource info (which is stored within the ETL settings) - The ETL settings and the datastore have different levels of nesting - and therefor - names for datasets. The nesting happens particularly with the EIA data. There - are three EIA datasets right now - eia923, eia860 and eia860m. eia860m is a monthly - update of a few tables in the larger eia860 dataset. + The ETL settings and the datastore have different levels of nesting - and + therefore names for datasets. The nesting happens particularly with the EI + data. There are three EIA datasets right now eia923, eia860 and eia860m. + eia860m is a monthly update of a few tables in the larger eia860 dataset. Args: ds: An initalized PUDL Datastore from which the DOI's for each raw input @@ -559,7 +559,8 @@ class FercToSqliteSettings(BaseSettings): Args: ferc1_dbf_to_sqlite_settings: Settings for converting FERC 1 DBF data to SQLite. - ferc1_xbrl_to_sqlite_settings: Settings for converting FERC 1 XBRL data to SQLite. + ferc1_xbrl_to_sqlite_settings: Settings for converting FERC 1 XBRL data to + SQLite. other_xbrl_forms: List of non-FERC1 forms to convert from XBRL to SQLite. """ @@ -629,9 +630,6 @@ class EtlSettings(BaseSettings): description: str = None version: str = None - pudl_in: str = pudl.workspace.setup.get_defaults()["pudl_in"] - pudl_out: str = pudl.workspace.setup.get_defaults()["pudl_out"] - # This is list of fsspec compatible paths to publish the output datasets to. publish_destinations: list[str] = [] diff --git a/src/pudl/workspace/datastore.py b/src/pudl/workspace/datastore.py index e013b42daf..3af7307673 100644 --- a/src/pudl/workspace/datastore.py +++ b/src/pudl/workspace/datastore.py @@ -20,6 +20,7 @@ import pudl from pudl.workspace import resource_cache from pudl.workspace.resource_cache import PudlResourceKey +from pudl.workspace.setup import PudlPaths, set_path_overrides logger = pudl.logging_helpers.get_logger(__name__) @@ -529,23 +530,6 @@ def parse_command_line(): return parser.parse_args() -def _get_pudl_in(args: dict) -> Path: - """Figure out what pudl_in path should be used.""" - if args.pudl_in: - return Path(args.pudl_in) - else: - return Path(pudl.workspace.setup.get_defaults()["PUDL_INPUT"]) - - -def _create_datastore(args: argparse.Namespace) -> Datastore: - """Constructs datastore instance.""" - # Configure how we want to obtain raw input data: - ds_kwargs = dict(gcs_cache_path=args.gcs_cache_path, sandbox=args.sandbox) - if not args.bypass_local_cache: - ds_kwargs["local_cache_path"] = _get_pudl_in(args) - return Datastore(**ds_kwargs) - - def print_partitions(dstore: Datastore, datasets: list[str]) -> None: """Prints known partition keys and its values for each of the datasets.""" for single_ds in datasets: @@ -609,7 +593,18 @@ def main(): logfile=args.logfile, loglevel=args.loglevel ) - dstore = _create_datastore(args) + if args.pudl_in: + set_path_overrides(input_dir=args.pudl_in) + + cache_path = None + if not args.bypass_local_cache: + cache_path = PudlPaths().input_dir + + dstore = Datastore( + gcs_cache_path=args.gcs_cache_path, + sandbox=args.sandbox, + local_cache_path=cache_path, + ) if args.dataset: datasets = [args.dataset] diff --git a/src/pudl/workspace/setup.py b/src/pudl/workspace/setup.py index b6fdc5fb44..0d37767ad8 100644 --- a/src/pudl/workspace/setup.py +++ b/src/pudl/workspace/setup.py @@ -5,7 +5,7 @@ import shutil from pathlib import Path -from dotenv import load_dotenv +from pydantic import BaseSettings import pudl.logging_helpers @@ -28,125 +28,70 @@ def set_path_overrides( os.environ["PUDL_OUTPUT"] = input_dir -def get_defaults() -> dict[str, str]: - """Derive PUDL workspace paths from env variables. +class PudlPaths(BaseSettings): + """These settings provide access to various PUDL directories. - Reads the PUDL_INPUT and PUDL_OUTPUT environment variables, and derives - all relevant paths that will be set in the config dictionary. - - Returns: - dictionary with a variety of different paths where inputs/outputs are - to be found. + It is primarily configured via PUDL_INPUT and PUDL_OUTPUT environment + variables. Other paths of relevance are derived from these. """ - load_dotenv() - - # Workaround for not having PUDL_* env vars in ReadTheDocs builds. - # - # They don't let you set env var through config files, and I'd rather - # have this in source control than go through some sort of web UI - # - # I don't like this any more than you do. - if os.getenv("READTHEDOCS"): - set_path_overrides( - input_dir="~/pudl-work/data", - output_dir="~/pudl-work/output", - ) - for env_var in ["PUDL_INPUT", "PUDL_OUTPUT"]: - if env_var not in os.environ: - raise RuntimeError(f"{env_var} environment variable must be set.") - - pudl_settings = {} - - # The only "inputs" are the datastore and example settings files: - # Convert from input string to Path and make it absolute w/ resolve() - pudl_in = pathlib.Path(os.getenv("PUDL_INPUT")).expanduser().resolve() - data_dir = pudl_in - pudl_workspace_legacy = pudl_in.parent - settings_dir = pudl_workspace_legacy / "settings" - - # Store these as strings... since we aren't using Paths everywhere yet: - pudl_settings["pudl_in"] = str(pudl_workspace_legacy) - pudl_settings["data_dir"] = str(data_dir) - pudl_settings["settings_dir"] = str(settings_dir) - - # Everything else goes into outputs, generally organized by type of file: - pudl_out = pathlib.Path(os.getenv("PUDL_OUTPUT")).expanduser().resolve() - pudl_settings["pudl_out"] = str(pudl_out) - - # Mirror dagster env vars for ease of use - pudl_settings["PUDL_OUTPUT"] = pudl_settings["pudl_out"] - pudl_settings["PUDL_INPUT"] = pudl_settings["data_dir"] - - ferc1_db_file = pathlib.Path(pudl_settings["pudl_out"], "ferc1.sqlite") - pudl_settings["ferc1_db"] = "sqlite:///" + str(ferc1_db_file.resolve()) - - ferc1_db_file = pathlib.Path(pudl_settings["pudl_out"], "ferc1_xbrl.sqlite") - pudl_settings["ferc1_xbrl_db"] = "sqlite:///" + str(ferc1_db_file.resolve()) - pudl_settings["ferc1_xbrl_datapackage"] = pathlib.Path( - pudl_settings["pudl_out"], "ferc1_xbrl_datapackage.json" - ) - pudl_settings["ferc1_xbrl_taxonomy_metadata"] = pathlib.Path( - pudl_settings["pudl_out"], "ferc1_xbrl_taxonomy_metadata.json" - ) - - ferc2_db_file = pathlib.Path(pudl_settings["pudl_out"], "ferc2_xbrl.sqlite") - pudl_settings["ferc2_xbrl_db"] = "sqlite:///" + str(ferc2_db_file.resolve()) - pudl_settings["ferc2_xbrl_datapackage"] = pathlib.Path( - pudl_settings["pudl_out"], "ferc2_xbrl_datapackage.json" - ) - pudl_settings["ferc2_xbrl_taxonomy_metadata"] = pathlib.Path( - pudl_settings["pudl_out"], "ferc2_xbrl_taxonomy_metadata.json" - ) - - ferc6_db_file = pathlib.Path(pudl_settings["pudl_out"], "ferc6_xbrl.sqlite") - pudl_settings["ferc6_xbrl_db"] = "sqlite:///" + str(ferc6_db_file.resolve()) - pudl_settings["ferc6_xbrl_datapackage"] = pathlib.Path( - pudl_settings["pudl_out"], "ferc6_xbrl_datapackage.json" - ) - pudl_settings["ferc6_xbrl_taxonomy_metadata"] = pathlib.Path( - pudl_settings["pudl_out"], "ferc6_xbrl_taxonomy_metadata.json" - ) - - ferc60_db_file = pathlib.Path(pudl_settings["pudl_out"], "ferc60_xbrl.sqlite") - pudl_settings["ferc60_xbrl_db"] = "sqlite:///" + str(ferc60_db_file.resolve()) - pudl_settings["ferc60_xbrl_datapackage"] = pathlib.Path( - pudl_settings["pudl_out"], "ferc60_xbrl_datapackage.json" - ) - pudl_settings["ferc60_xbrl_taxonomy_metadata"] = pathlib.Path( - pudl_settings["pudl_out"], "ferc60_xbrl_taxonomy_metadata.json" - ) - - ferc714_db_file = pathlib.Path(pudl_settings["pudl_out"], "ferc714_xbrl.sqlite") - pudl_settings["ferc714_xbrl_db"] = "sqlite:///" + str(ferc714_db_file.resolve()) - pudl_settings["ferc714_xbrl_datapackage"] = pathlib.Path( - pudl_settings["pudl_out"], "ferc714_xbrl_datapackage.json" - ) - pudl_settings["ferc714_xbrl_taxonomy_metadata"] = pathlib.Path( - pudl_settings["pudl_out"], "ferc714_xbrl_taxonomy_metadata.json" - ) - - pudl_settings["pudl_db"] = "sqlite:///" + str( - pathlib.Path(pudl_settings["pudl_out"], "pudl.sqlite") - ) - - pudl_settings["censusdp1tract_db"] = "sqlite:///" + str( - pathlib.Path(pudl_settings["pudl_out"], "censusdp1tract.sqlite") - ) - - if not os.getenv("DAGSTER_HOME"): - os.environ["DAGSTER_HOME"] = str( - Path(pudl_settings["pudl_in"]) / "dagster_home" - ) - - return pudl_settings - - -def init(pudl_settings: dict[str, str], clobber=False): + + pudl_input: str + pudl_output: str + + @property + def input_dir(self) -> Path: + """Path to PUDL input directory.""" + return Path(self.pudl_input) + + @property + def output_dir(self) -> Path: + """Path to PUDL output directory.""" + return Path(self.pudl_output) + + @property + def settings_dir(self) -> Path: + """Path to directory containing settings files.""" + return self.input_dir.parent / "settings" + + @property + def data_dir(self) -> Path: + """Path to PUDL data directory.""" + # TODO(janrous): possibly deprecate this in favor of input_dir + return self.input_dir + + @property + def pudl_db(self) -> Path: + """Returns url of locally stored pudl sqlite database.""" + return self.sqlite_db("pudl") + + def sqlite_db(self, name: str) -> str: + """Returns url of locally stored pudl slqlite database with given name. + + The name is expected to be the name of the database without the .sqlite + suffix. E.g. pudl, ferc1 and so on. + """ + db_path = PudlPaths().output_dir / f"{name}.sqlite" + return f"sqlite:///{db_path}" + return self.output_dir / f"{name}.sqlite" + + def output_file(self, filename: str) -> Path: + """Path to file in PUDL output directory.""" + return self.output_dir / filename + + class Config: + """Pydantic configuration. Loads from .env file.""" + + env_file = ".env" + fields = { + "pudl_input": {"env": "PUDL_INPUT"}, + "pudl_output": {"env": "PUDL_OUTPUT"}, + } + + +def init(clobber=False): """Set up a new PUDL working environment based on the user settings. Args: - pudl_settings (os.PathLike): Paths to data inputs & outputs. See - get_defaults() for how to get these. clobber (bool): if True, replace existing files. If False (the default) do not replace existing files. @@ -154,22 +99,25 @@ def init(pudl_settings: dict[str, str], clobber=False): None """ # Create tmp directory - tmp_dir = pathlib.Path(pudl_settings["data_dir"], "tmp") + tmp_dir = PudlPaths().data_dir / "tmp" tmp_dir.mkdir(parents=True, exist_ok=True) # These are files that may exist in the package_data directory, but that # we do not want to deploy into a user workspace: ignore_files = ["__init__.py", ".gitignore"] + # TODO(janrous): perhaps we don't need to do this? # Make a settings directory in the workspace, and deploy settings files: - settings_dir = pathlib.Path(pudl_settings["settings_dir"]) + settings_dir = PudlPaths().settings_dir settings_dir.mkdir(parents=True, exist_ok=True) settings_pkg = "pudl.package_data.settings" deploy(settings_pkg, settings_dir, ignore_files, clobber=clobber) # Make output directory: - pudl_out = pathlib.Path(pudl_settings["pudl_out"]) - pudl_out.mkdir(parents=True, exist_ok=True) + PudlPaths().output_dir.mkdir(parents=True, exist_ok=True) + # TODO(rousik): it might make sense to turn this into a method of + # PudlPaths object and to move this to settings.py from this module. + # Unclear whether deployment of settings files makes much sense. def deploy(pkg_path, deploy_dir, ignore_files, clobber=False): diff --git a/src/pudl/workspace/setup_cli.py b/src/pudl/workspace/setup_cli.py index 372c674108..dcd39b3430 100644 --- a/src/pudl/workspace/setup_cli.py +++ b/src/pudl/workspace/setup_cli.py @@ -45,6 +45,7 @@ import sys import pudl +from pudl.workspace.setup import set_path_overrides logger = pudl.logging_helpers.get_logger(__name__) @@ -102,20 +103,17 @@ def main(): logfile=args.logfile, loglevel=args.loglevel ) - if args.pudl_input: + if args.pudl_in: pudl_in = pathlib.Path(args.pudl_in).expanduser().resolve() if not pathlib.Path.is_dir(pudl_in): raise FileNotFoundError(f"Directory not found: {pudl_in}") - - if args.pudl_output: + set_path_overrides(input_dir=pudl_in) + if args.pudl_out: pudl_out = pathlib.Path(args.pudl_out).expanduser().resolve() if not pathlib.Path.is_dir(pudl_out): raise FileNotFoundError(f"Directory not found: {pudl_out}") - - settings = pudl.workspace.setup.get_defaults( - input_dir=args.pudl_input, output_dir=args.pudl_output - ) - pudl.workspace.setup.init(settings, clobber=args.clobber) + set_path_overrides(output_dir=pudl_out) + pudl.workspace.setup.init(colbber=args.clobber) if __name__ == "__main__": diff --git a/test/conftest.py b/test/conftest.py index f3ff34df76..51751006ed 100644 --- a/test/conftest.py +++ b/test/conftest.py @@ -2,7 +2,6 @@ Defines useful fixtures, command line args. """ -import json import logging import os from pathlib import Path @@ -27,6 +26,7 @@ from pudl.metadata.classes import Package from pudl.output.pudltabl import PudlTabl from pudl.settings import DatasetsSettings, EtlSettings, XbrlFormNumber +from pudl.workspace.setup import PudlPaths logger = logging.getLogger(__name__) @@ -112,7 +112,7 @@ def check_foreign_keys(request): @pytest.fixture(scope="session", name="etl_settings") -def etl_parameters(request, test_dir, pudl_settings_fixture) -> EtlSettings: +def etl_parameters(request, test_dir) -> EtlSettings: """Read the ETL parameters from the test settings or proffered file.""" if request.config.getoption("--etl-settings"): etl_settings_yml = Path(request.config.getoption("--etl-settings")) @@ -271,7 +271,6 @@ def ferc1_xbrl_sql_engine(ferc_to_sqlite_xbrl_only, dataset_settings_config): @pytest.fixture(scope="session") def ferc_xbrl( - pudl_settings_fixture, live_dbs, ferc_to_sqlite_settings, pudl_datastore_fixture, @@ -292,7 +291,7 @@ def ferc_xbrl( for form in XbrlFormNumber: raw_archive, taxonomy_entry_point = datastore.get_taxonomy(year, form) - sqlite_engine = _get_sqlite_engine(form.value, pudl_settings_fixture, True) + sqlite_engine = _get_sqlite_engine(form.value, True) form_settings = ferc_to_sqlite_settings.get_xbrl_dataset_settings(form) @@ -306,12 +305,14 @@ def ferc_xbrl( requested_tables=form_settings.tables, batch_size=len(filings_subset) // step_size + 1, workers=step_size, - datapackage_path=pudl_settings_fixture[ - f"ferc{form.value}_xbrl_datapackage" - ], - metadata_path=pudl_settings_fixture[ - f"ferc{form.value}_xbrl_taxonomy_metadata" - ], + # TODO(janrous): the following should ideally be provided by some + # ferc dataset metadata object rather than encoding this in settings. + datapackage_path=PudlPaths().output_file( + f"ferc{form.value}_xbrl_datapackage.json" + ), + metadata_path=PudlPaths().output_file( + f"ferc{form.value}_xbrl_taxonomy_metadata.json" + ), archive_file_path=taxonomy_entry_point, ) @@ -328,7 +329,6 @@ def ferc1_xbrl_taxonomy_metadata(ferc1_engine_xbrl): @pytest.fixture(scope="session") def pudl_sql_io_manager( pudl_path_setup, - pudl_settings_fixture, ferc1_engine_dbf, # Implicit dependency ferc1_engine_xbrl, # Implicit dependency live_dbs, @@ -344,10 +344,8 @@ def pudl_sql_io_manager( """ logger.info("setting up the pudl_engine fixture") if not live_dbs: - db_path = pudl_settings_fixture["pudl_db"] - # Create the database and schemas - engine = sa.create_engine(db_path) + engine = sa.create_engine(PudlPaths().pudl_db) md = Package.from_resource_ids().to_sql() md.create_all(engine) # Run the ETL and generate a new PUDL SQLite DB for testing: @@ -383,14 +381,23 @@ def pudl_tmpdir(tmp_path_factory): return tmpdir +def pytest_sessionstart(session): + """Configures input/output paths for the tests.""" + # TODO(rousik): Should we be using fixed paths instead + # of using tmpdir capabilities here? + pudl.workspace.setup.set_path_overrides( + input_dir="~/pudl-work/data", + output_dir="~/pudl-work/output", + ) + logger.info(f"Starting unit tests with output path {PudlPaths().output_dir}") + pudl.workspace.setup.init() + + @pytest.fixture(scope="session") def pudl_path_setup(request, pudl_tmpdir): """Sets the necessary env variables for the input and output paths.""" if os.environ.get("GITHUB_ACTIONS", False): - pudl.workspace.setup.set_path_overrides( - input_dir="~/pudl-work/data", - output_dir="~/pudl-work/output", - ) + pudl.workspace.setup.set_path_overrides() else: if request.config.getoption("--tmp-data"): in_tmp = pudl_tmpdir / "data" @@ -406,21 +413,6 @@ def pudl_path_setup(request, pudl_tmpdir): ) -@pytest.fixture(scope="session", name="pudl_settings_fixture") -def pudl_settings_dict(pudl_path_setup): # noqa: C901 - """Determine some settings (mostly paths) for the test session.""" - logger.info("setting up the pudl_settings_fixture") - - pudl_settings = pudl.workspace.setup.get_defaults() - pudl.workspace.setup.init(pudl_settings) - - pretty_settings = json.dumps( - {str(k): str(v) for k, v in pudl_settings.items()}, indent=2 - ) - logger.info(f"pudl_settings being used: {pretty_settings}") - return pudl_settings - - @pytest.fixture(scope="session") def dataset_settings_config(request, etl_settings): """Create dagster dataset_settings resource.""" diff --git a/test/integration/glue_test.py b/test/integration/glue_test.py index 5265db3766..67ec7dfa95 100644 --- a/test/integration/glue_test.py +++ b/test/integration/glue_test.py @@ -60,7 +60,6 @@ def glue_test_dfs( pudl_out, ferc1_engine_xbrl, ferc1_engine_dbf, - pudl_settings_fixture, etl_settings, dataset_settings_config, ) -> dict[str, pd.DataFrame]: diff --git a/test/unit/helpers_test.py b/test/unit/helpers_test.py index 42899bc227..8c6002fae5 100644 --- a/test/unit/helpers_test.py +++ b/test/unit/helpers_test.py @@ -643,15 +643,6 @@ def test_env_var(): del os.environ["_PUDL_TEST"] -def test_env_var_reads_defaults(mocker): - mocker.patch( - "pudl.helpers.get_defaults", - lambda: {"_PUDL_TEST": "test value default"}, - ) - env_var = EnvVar(env_var="_PUDL_TEST") - assert env_var.post_process(None) == "test value default" - - def test_env_var_missing_completely(): with pytest.raises(PostProcessingError): EnvVar(env_var="_PUDL_BOGUS").post_process(None) diff --git a/test/unit/pudl_environment_test.py b/test/unit/pudl_environment_test.py deleted file mode 100644 index 3bc874e32a..0000000000 --- a/test/unit/pudl_environment_test.py +++ /dev/null @@ -1,34 +0,0 @@ -"""Test to see if our environment (PUDL_INPUT/OUTPUT, pudl_settings) is set up properly -in a variety of situations.""" - -import os - -import pytest - -from pudl.workspace.setup import get_defaults - - -def setup(): - if (old_output := os.getenv("PUDL_OUTPUT")) is not None: - os.environ["PUDL_OUTPUT_OLD"] = old_output - if (old_input := os.getenv("PUDL_INPUT")) is not None: - os.environ["PUDL_INPUT_OLD"] = old_input - - -def test_get_defaults_in_test_environment_no_env_vars_no_config(): - if os.getenv("PUDL_OUTPUT"): - del os.environ["PUDL_OUTPUT"] - if os.getenv("PUDL_INPUT"): - del os.environ["PUDL_INPUT"] - - with pytest.raises(RuntimeError): - get_defaults() - - -def teardown(): - if (old_output := os.getenv("PUDL_OUTPUT_OLD")) is not None: - os.environ["PUDL_OUTPUT"] = old_output - del os.environ["PUDL_OUTPUT_OLD"] - if (old_input := os.getenv("PUDL_INPUT_OLD")) is not None: - os.environ["PUDL_INPUT"] = old_input - del os.environ["PUDL_INPUT_OLD"] diff --git a/test/unit/settings_test.py b/test/unit/settings_test.py index 25e18ad146..9d4eacba6c 100644 --- a/test/unit/settings_test.py +++ b/test/unit/settings_test.py @@ -19,6 +19,7 @@ _convert_settings_to_dagster_config, ) from pudl.workspace.datastore import Datastore +from pudl.workspace.setup import PudlPaths class TestGenericDatasetSettings: @@ -260,9 +261,9 @@ def test_partitions_with_json_normalize(pudl_etl_settings): ) -def test_partitions_for_datasource_table(pudl_settings_fixture, pudl_etl_settings): +def test_partitions_for_datasource_table(pudl_etl_settings): """Test whether or not we can make the datasource table.""" - ds = Datastore(local_cache_path=pudl_settings_fixture["data_dir"]) + ds = Datastore(local_cache_path=PudlPaths().data_dir) datasource = pudl_etl_settings.make_datasources_table(ds) datasets = pudl_etl_settings.get_datasets().keys() if datasource.empty and datasets != 0: diff --git a/test/validate/notebooks/validate_bf_eia923.ipynb b/test/validate/notebooks/validate_bf_eia923.ipynb index b29fa9c83f..750b0e926e 100644 --- a/test/validate/notebooks/validate_bf_eia923.ipynb +++ b/test/validate/notebooks/validate_bf_eia923.ipynb @@ -1,6 +1,7 @@ { "cells": [ { + "attachments": {}, "cell_type": "markdown", "metadata": { "toc-hr-collapsed": true @@ -77,13 +78,13 @@ "metadata": {}, "outputs": [], "source": [ - "pudl_settings = pudl.workspace.setup.get_defaults()\n", - "ferc1_engine = sa.create_engine(pudl_settings['ferc1_db'])\n", - "pudl_engine = sa.create_engine(pudl_settings['pudl_db'])\n", - "pudl_settings" + "from pudl.workspace.setup import PudlPaths\n", + "ferc1_engine = sa.create_engine(PudlPaths().sqlite_db(\"ferc1\"))\n", + "pudl_engine = sa.create_engine(PudlPaths().pudl_db)" ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -102,6 +103,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": { "toc-hr-collapsed": false @@ -129,6 +131,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -145,6 +148,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -161,6 +165,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -177,6 +182,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -193,6 +199,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -209,6 +216,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -226,6 +234,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -253,6 +262,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ diff --git a/test/validate/notebooks/validate_fbp_ferc1.ipynb b/test/validate/notebooks/validate_fbp_ferc1.ipynb index c45422e5e1..fb4fd2920d 100644 --- a/test/validate/notebooks/validate_fbp_ferc1.ipynb +++ b/test/validate/notebooks/validate_fbp_ferc1.ipynb @@ -1,6 +1,7 @@ { "cells": [ { + "attachments": {}, "cell_type": "markdown", "metadata": { "toc-hr-collapsed": true @@ -79,13 +80,13 @@ "metadata": {}, "outputs": [], "source": [ - "pudl_settings = pudl.workspace.setup.get_defaults()\n", - "ferc1_engine = sa.create_engine(pudl_settings['ferc1_db'])\n", - "pudl_engine = sa.create_engine(pudl_settings['pudl_db'])\n", - "pudl_settings" + "from pudl.workspace.setup import PudlPaths\n", + "ferc1_engine = sa.create_engine(PudlPaths().sqlite_db(\"ferc1\"))\n", + "pudl_engine = sa.create_engine(PudlPaths().pudl_db)" ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -129,6 +130,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -157,6 +159,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": { "toc-hr-collapsed": false @@ -169,6 +172,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ diff --git a/test/validate/notebooks/validate_frc_eia923.ipynb b/test/validate/notebooks/validate_frc_eia923.ipynb index 1fef3ce49b..e1834129b7 100644 --- a/test/validate/notebooks/validate_frc_eia923.ipynb +++ b/test/validate/notebooks/validate_frc_eia923.ipynb @@ -1,6 +1,7 @@ { "cells": [ { + "attachments": {}, "cell_type": "markdown", "metadata": { "toc-hr-collapsed": true @@ -77,13 +78,13 @@ "metadata": {}, "outputs": [], "source": [ - "pudl_settings = pudl.workspace.setup.get_defaults()\n", - "ferc1_engine = sa.create_engine(pudl_settings['ferc1_db'])\n", - "pudl_engine = sa.create_engine(pudl_settings['pudl_db'])\n", - "pudl_settings" + "from pudl.workspace.setup import PudlPaths\n", + "ferc1_engine = sa.create_engine(PudlPaths().sqlite_db(\"ferc1\"))\n", + "pudl_engine = sa.create_engine(PudlPaths().pudl_db)" ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -102,6 +103,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": { "toc-hr-collapsed": false @@ -114,6 +116,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -131,6 +134,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -148,6 +152,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -165,6 +170,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -181,6 +187,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -197,6 +204,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -213,6 +221,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -229,6 +238,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -246,6 +256,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -273,6 +284,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ diff --git a/test/validate/notebooks/validate_fuel_ferc1.ipynb b/test/validate/notebooks/validate_fuel_ferc1.ipynb index bee3f3988b..cc86703c20 100644 --- a/test/validate/notebooks/validate_fuel_ferc1.ipynb +++ b/test/validate/notebooks/validate_fuel_ferc1.ipynb @@ -78,9 +78,9 @@ "metadata": {}, "outputs": [], "source": [ - "pudl_settings = pudl.workspace.setup.get_defaults()\n", - "ferc1_engine = sa.create_engine(pudl_settings['ferc1_db'])\n", - "pudl_engine = sa.create_engine(pudl_settings['pudl_db'])\n", + "from pudl.workspace.setup import PudlPaths\n", + "ferc1_engine = sa.create_engine(PudlPaths().sqlite_db("ferc1"))\n", + "pudl_engine = sa.create_engine(PudlPaths().pudl_db)\n", "pudl_settings" ] }, diff --git a/test/validate/notebooks/validate_gens_eia860.ipynb b/test/validate/notebooks/validate_gens_eia860.ipynb index df59d000bc..17b2916e44 100644 --- a/test/validate/notebooks/validate_gens_eia860.ipynb +++ b/test/validate/notebooks/validate_gens_eia860.ipynb @@ -77,9 +77,9 @@ "metadata": {}, "outputs": [], "source": [ - "pudl_settings = pudl.workspace.setup.get_defaults()\n", - "ferc1_engine = sa.create_engine(pudl_settings['ferc1_db'])\n", - "pudl_engine = sa.create_engine(pudl_settings['pudl_db'])" + "from pudl.workspace.setup import PudlPaths\n", + "ferc1_engine = sa.create_engine(PudlPaths().sqlite_db("ferc1"))\n", + "pudl_engine = sa.create_engine(PudlPaths().pudl_db)" ] }, { diff --git a/test/validate/notebooks/validate_gf_eia923.ipynb b/test/validate/notebooks/validate_gf_eia923.ipynb index d94a9abc5d..216d07e8b8 100644 --- a/test/validate/notebooks/validate_gf_eia923.ipynb +++ b/test/validate/notebooks/validate_gf_eia923.ipynb @@ -77,9 +77,9 @@ "metadata": {}, "outputs": [], "source": [ - "pudl_settings = pudl.workspace.setup.get_defaults()\n", - "ferc1_engine = sa.create_engine(pudl_settings['ferc1_db'])\n", - "pudl_engine = sa.create_engine(pudl_settings['pudl_db'])\n", + "from pudl.workspace.setup import PudlPaths\n", + "ferc1_engine = sa.create_engine(PudlPaths().sqlite_db("ferc1"))\n", + "pudl_engine = sa.create_engine(PudlPaths().pudl_db)\n", "pudl_settings" ] }, diff --git a/test/validate/notebooks/validate_mcoe.ipynb b/test/validate/notebooks/validate_mcoe.ipynb index a8bc0c0883..e8c884f558 100644 --- a/test/validate/notebooks/validate_mcoe.ipynb +++ b/test/validate/notebooks/validate_mcoe.ipynb @@ -78,9 +78,9 @@ "metadata": {}, "outputs": [], "source": [ - "pudl_settings = pudl.workspace.setup.get_defaults()\n", - "ferc1_engine = sa.create_engine(pudl_settings['ferc1_db'])\n", - "pudl_engine = sa.create_engine(pudl_settings['pudl_db'])\n", + "from pudl.workspace.setup import PudlPaths\n", + "ferc1_engine = sa.create_engine(PudlPaths().sqlite_db("ferc1"))\n", + "pudl_engine = sa.create_engine(PudlPaths().pudl_db)\n", "pudl_settings" ] }, diff --git a/test/validate/notebooks/validate_plants_steam_ferc1.ipynb b/test/validate/notebooks/validate_plants_steam_ferc1.ipynb index 055769d2ed..e435974dc0 100644 --- a/test/validate/notebooks/validate_plants_steam_ferc1.ipynb +++ b/test/validate/notebooks/validate_plants_steam_ferc1.ipynb @@ -78,9 +78,9 @@ "metadata": {}, "outputs": [], "source": [ - "pudl_settings = pudl.workspace.setup.get_defaults()\n", - "ferc1_engine = sa.create_engine(pudl_settings['ferc1_db'])\n", - "pudl_engine = sa.create_engine(pudl_settings['pudl_db'])\n", + "from pudl.workspace.setup import PudlPaths\n", + "ferc1_engine = sa.create_engine(PudlPaths().sqlite_db("ferc1"))\n", + "pudl_engine = sa.create_engine(PudlPaths().pudl_db)\n", "pudl_settings" ] }, From e5111605b37395ff2b0e11ba50450ee4f17034e9 Mon Sep 17 00:00:00 2001 From: Jan Rous Date: Thu, 6 Jul 2023 21:20:08 +0200 Subject: [PATCH 05/51] Fix typo when setting path overrides. --- src/pudl/workspace/setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/pudl/workspace/setup.py b/src/pudl/workspace/setup.py index 0d37767ad8..a054f06efd 100644 --- a/src/pudl/workspace/setup.py +++ b/src/pudl/workspace/setup.py @@ -25,7 +25,7 @@ def set_path_overrides( if input_dir: os.environ["PUDL_INPUT"] = input_dir if output_dir: - os.environ["PUDL_OUTPUT"] = input_dir + os.environ["PUDL_OUTPUT"] = output_dir class PudlPaths(BaseSettings): From 617edb70b284a91b71b50175615de51de287b79f Mon Sep 17 00:00:00 2001 From: Jan Rous Date: Thu, 6 Jul 2023 21:33:55 +0200 Subject: [PATCH 06/51] Refactor paths. Drop pudl_output_path config_schema field in favor of pudl_paths resource. This resource can reconfigure PudlPaths() to use alternative input/output paths if necessary, but in general, this should be controlled via PUDL_INPUT/PUDL_OUTPUT env variables. EnvVar class can now be safely removed. --- src/pudl/convert/censusdp1tract_to_sqlite.py | 13 +---- src/pudl/etl/__init__.py | 8 ++- src/pudl/etl/epacems_assets.py | 42 +++----------- src/pudl/extract/ferc.py | 13 +---- src/pudl/extract/ferc1.py | 19 +----- src/pudl/extract/xbrl.py | 12 +--- src/pudl/ferc_to_sqlite/__init__.py | 7 ++- src/pudl/helpers.py | 37 +----------- src/pudl/io_managers.py | 61 ++++---------------- src/pudl/resources.py | 49 +++++++++++++--- src/pudl/workspace/setup_cli.py | 2 +- test/unit/extract/xbrl_test.py | 2 + test/unit/helpers_test.py | 16 ----- 13 files changed, 85 insertions(+), 196 deletions(-) diff --git a/src/pudl/convert/censusdp1tract_to_sqlite.py b/src/pudl/convert/censusdp1tract_to_sqlite.py index 43567caa0c..9cefb97ea4 100644 --- a/src/pudl/convert/censusdp1tract_to_sqlite.py +++ b/src/pudl/convert/censusdp1tract_to_sqlite.py @@ -22,21 +22,14 @@ from dagster import Field, asset import pudl -from pudl.helpers import EnvVar from pudl.workspace.datastore import Datastore +from pudl.workspace.setup import PudlPaths logger = pudl.logging_helpers.get_logger(__name__) @asset( config_schema={ - "pudl_output_path": Field( - EnvVar( - env_var="PUDL_OUTPUT", - ), - description="Path of directory to store the database in.", - default_value=None, - ), "clobber": Field( bool, description="Clobber existing Census database.", default_value=True ), @@ -46,7 +39,7 @@ default_value=2010, ), }, - required_resource_keys={"datastore"}, + required_resource_keys={"datastore", "pudl_paths"}, ) def censusdp1tract_to_sqlite(context): """Use GDAL's ogr2ogr utility to convert the Census DP1 GeoDB to an SQLite DB. @@ -82,7 +75,7 @@ def censusdp1tract_to_sqlite(context): "censusdp1tract", year=context.op_config["year"] ) extract_root = tmpdir_path / Path(zip_ref.filelist[0].filename) - out_path = Path(context.op_config["pudl_output_path"]) / "censusdp1tract.sqlite" + out_path = PudlPaths().output_dir / "censusdp1tract.sqlite" if out_path.exists(): if context.op_config["clobber"]: diff --git a/src/pudl/etl/__init__.py b/src/pudl/etl/__init__.py index 0bcc273d0b..02a0fa0fe3 100644 --- a/src/pudl/etl/__init__.py +++ b/src/pudl/etl/__init__.py @@ -17,7 +17,12 @@ ferc1_xbrl_sqlite_io_manager, pudl_sqlite_io_manager, ) -from pudl.resources import dataset_settings, datastore, ferc_to_sqlite_settings +from pudl.resources import ( + dataset_settings, + datastore, + ferc_to_sqlite_settings, + pudl_paths, +) from pudl.settings import EtlSettings from . import glue_assets # noqa: F401 @@ -66,6 +71,7 @@ default_resources = { "datastore": datastore, + "pudl_paths": pudl_paths, "pudl_sqlite_io_manager": pudl_sqlite_io_manager, "ferc1_dbf_sqlite_io_manager": ferc1_dbf_sqlite_io_manager, "ferc1_xbrl_sqlite_io_manager": ferc1_xbrl_sqlite_io_manager, diff --git a/src/pudl/etl/epacems_assets.py b/src/pudl/etl/epacems_assets.py index 938a86397a..f26248776f 100644 --- a/src/pudl/etl/epacems_assets.py +++ b/src/pudl/etl/epacems_assets.py @@ -9,17 +9,16 @@ see: https://docs.dagster.io/concepts/ops-jobs-graphs/dynamic-graphs and https://docs.dagster.io/concepts/assets/graph-backed-assets. """ from collections import namedtuple -from pathlib import Path import dask.dataframe as dd import pandas as pd import pyarrow as pa import pyarrow.parquet as pq -from dagster import AssetIn, DynamicOut, DynamicOutput, Field, asset, graph_asset, op +from dagster import AssetIn, DynamicOut, DynamicOutput, asset, graph_asset, op import pudl -from pudl.helpers import EnvVar from pudl.metadata.classes import Resource +from pudl.workspace.setup import PudlPaths logger = pudl.logging_helpers.get_logger(__name__) @@ -42,18 +41,7 @@ def get_years_from_settings(context): yield DynamicOutput(year, mapping_key=str(year)) -@op( - required_resource_keys={"datastore", "dataset_settings"}, - config_schema={ - "pudl_output_path": Field( - EnvVar( - env_var="PUDL_OUTPUT", - ), - description="Path of directory to store the database in.", - default_value=None, - ), - }, -) +@op(required_resource_keys={"datastore", "dataset_settings", "pudl_paths"}) def process_single_year( context, year, @@ -73,9 +61,7 @@ def process_single_year( epacems_settings = context.resources.dataset_settings.epacems schema = Resource.from_id("hourly_emissions_epacems").to_pyarrow() - partitioned_path = ( - Path(context.op_config["pudl_output_path"]) / "hourly_emissions_epacems" - ) + partitioned_path = PudlPaths().output_dir / "hourly_emissions_epacems" partitioned_path.mkdir(exist_ok=True) for state in epacems_settings.states: @@ -96,17 +82,7 @@ def process_single_year( return YearPartitions(year, epacems_settings.states) -@op( - config_schema={ - "pudl_output_path": Field( - EnvVar( - env_var="PUDL_OUTPUT", - ), - description="Path of directory to store the database in.", - default_value=None, - ), - }, -) +@op(required_resource_keys={"pudl_paths"}) def consolidate_partitions(context, partitions: list[YearPartitions]) -> None: """Read partitions into memory and write to a single monolithic output. @@ -114,12 +90,8 @@ def consolidate_partitions(context, partitions: list[YearPartitions]) -> None: context: dagster keyword that provides access to resources and config. partitions: Year and state combinations in the output database. """ - partitioned_path = ( - Path(context.op_config["pudl_output_path"]) / "hourly_emissions_epacems" - ) - monolithic_path = ( - Path(context.op_config["pudl_output_path"]) / "hourly_emissions_epacems.parquet" - ) + partitioned_path = PudlPaths().output_dir / "hourly_emissions_epacems" + monolithic_path = PudlPaths().output_dir / "hourly_emissions_epacems.parquet" schema = Resource.from_id("hourly_emissions_epacems").to_pyarrow() with pq.ParquetWriter( diff --git a/src/pudl/extract/ferc.py b/src/pudl/extract/ferc.py index 9f46864763..c32e48a20c 100644 --- a/src/pudl/extract/ferc.py +++ b/src/pudl/extract/ferc.py @@ -7,25 +7,18 @@ from pudl.extract.ferc1 import Ferc1DbfExtractor from pudl.extract.ferc2 import Ferc2DbfExtractor from pudl.extract.ferc6 import Ferc6DbfExtractor -from pudl.helpers import EnvVar +from pudl.workspace.setup import PudlPaths logger = pudl.logging_helpers.get_logger(__name__) @op( config_schema={ - "pudl_output_path": Field( - EnvVar( - env_var="PUDL_OUTPUT", - ), - description="Path of directory to store the database in.", - default_value=None, - ), "clobber": Field( bool, description="Clobber existing ferc1 database.", default_value=False ), }, - required_resource_keys={"ferc_to_sqlite_settings", "datastore"}, + required_resource_keys={"ferc_to_sqlite_settings", "datastore", "pudl_paths"}, ) def dbf2sqlite(context) -> None: """Clone the FERC Form 1 Visual FoxPro databases into SQLite.""" @@ -43,5 +36,5 @@ def dbf2sqlite(context) -> None: datastore=context.resources.datastore, settings=context.resources.ferc_to_sqlite_settings, clobber=context.op_config["clobber"], - output_path=context.op_config["pudl_output_path"], + output_path=PudlPaths().output_dir, ).execute() diff --git a/src/pudl/extract/ferc1.py b/src/pudl/extract/ferc1.py index c2321f4c58..6255e9be4d 100644 --- a/src/pudl/extract/ferc1.py +++ b/src/pudl/extract/ferc1.py @@ -68,14 +68,12 @@ """ import json from itertools import chain -from pathlib import Path from typing import Any, Literal import pandas as pd import sqlalchemy as sa from dagster import ( AssetKey, - Field, SourceAsset, asset, build_init_resource_context, @@ -89,7 +87,6 @@ add_key_constraints, deduplicate_by_year, ) -from pudl.helpers import EnvVar from pudl.io_managers import ( FercDBFSQLiteIOManager, FercXBRLSQLiteIOManager, @@ -97,6 +94,7 @@ ferc1_xbrl_sqlite_io_manager, ) from pudl.settings import DatasetsSettings, FercToSqliteSettings, GenericDatasetSettings +from pudl.workspace.setup import PudlPaths logger = pudl.logging_helpers.get_logger(__name__) @@ -373,15 +371,7 @@ def create_raw_ferc1_assets() -> list[SourceAsset]: @asset( - config_schema={ - "pudl_output_path": Field( - EnvVar( - env_var="PUDL_OUTPUT", - ), - description="Path of directory to store the database in.", - default_value=None, - ), - }, + required_resource_keys={"pudl_paths"}, ) def raw_xbrl_metadata_json(context) -> dict[str, dict[str, list[dict[str, Any]]]]: """Extract the FERC 1 XBRL Taxonomy metadata we've stored as JSON. @@ -395,10 +385,7 @@ def raw_xbrl_metadata_json(context) -> dict[str, dict[str, list[dict[str, Any]]] filings. If there is no instant/duration table, an empty list is returned instead. """ - metadata_path = ( - Path(context.op_config["pudl_output_path"]) - / "ferc1_xbrl_taxonomy_metadata.json" - ) + metadata_path = PudlPaths().output_dir / "ferc1_xbrl_taxonomy_metadata.json" with open(metadata_path) as f: xbrl_meta_all = json.load(f) diff --git a/src/pudl/extract/xbrl.py b/src/pudl/extract/xbrl.py index 2959daaef6..60f708eac9 100644 --- a/src/pudl/extract/xbrl.py +++ b/src/pudl/extract/xbrl.py @@ -10,7 +10,6 @@ from ferc_xbrl_extractor.instance import InstanceBuilder import pudl -from pudl.helpers import EnvVar from pudl.settings import FercGenericXbrlToSqliteSettings, XbrlFormNumber from pudl.workspace.datastore import Datastore from pudl.workspace.setup import PudlPaths @@ -98,13 +97,6 @@ def _get_sqlite_engine(form_number: int, clobber: bool) -> sa.engine.Engine: @op( config_schema={ - "pudl_output_path": Field( - EnvVar( - env_var="PUDL_OUTPUT", - ), - description="Path of directory to store the database in.", - default_value=None, - ), "clobber": Field( bool, description="Clobber existing ferc1 database.", default_value=False ), @@ -119,11 +111,11 @@ def _get_sqlite_engine(form_number: int, clobber: bool) -> sa.engine.Engine: default_value=50, ), }, - required_resource_keys={"ferc_to_sqlite_settings", "datastore"}, + required_resource_keys={"ferc_to_sqlite_settings", "datastore", "pudl_paths"}, ) def xbrl2sqlite(context) -> None: """Clone the FERC Form 1 XBRL Databsae to SQLite.""" - output_path = Path(context.op_config["pudl_output_path"]) + output_path = PudlPaths().output_dir clobber = context.op_config["clobber"] batch_size = context.op_config["batch_size"] workers = context.op_config["workers"] diff --git a/src/pudl/ferc_to_sqlite/__init__.py b/src/pudl/ferc_to_sqlite/__init__.py index c1a76d38d0..45fcb992e1 100644 --- a/src/pudl/ferc_to_sqlite/__init__.py +++ b/src/pudl/ferc_to_sqlite/__init__.py @@ -6,7 +6,7 @@ import pudl from pudl.extract.ferc import dbf2sqlite from pudl.extract.xbrl import xbrl2sqlite -from pudl.resources import datastore, ferc_to_sqlite_settings +from pudl.resources import datastore, ferc_to_sqlite_settings, pudl_paths from pudl.settings import EtlSettings logger = pudl.logging_helpers.get_logger(__name__) @@ -34,6 +34,7 @@ def ferc_to_sqlite_xbrl_only(): default_resources_defs = { "ferc_to_sqlite_settings": ferc_to_sqlite_settings, "datastore": datastore, + "pudl_paths": pudl_paths, } ferc_to_sqlite_full = ferc_to_sqlite.to_job( @@ -54,8 +55,8 @@ def ferc_to_sqlite_xbrl_only(): "resources": { "ferc_to_sqlite_settings": { "config": ferc_to_sqlite_fast_settings.dict(), - } - } + }, + }, }, ) diff --git a/src/pudl/helpers.py b/src/pudl/helpers.py index 5aababfdc6..bf190462cf 100644 --- a/src/pudl/helpers.py +++ b/src/pudl/helpers.py @@ -7,7 +7,6 @@ with cleaning and restructing dataframes. """ import itertools -import os import pathlib import re import shutil @@ -23,8 +22,7 @@ import pandas as pd import requests import sqlalchemy as sa -from dagster import AssetKey, AssetsDefinition, AssetSelection, Noneable, SourceAsset -from dagster._config.errors import PostProcessingError +from dagster import AssetKey, AssetsDefinition, AssetSelection, SourceAsset from pandas._libs.missing import NAType import pudl.logging_helpers @@ -1572,39 +1570,6 @@ def convert_df_to_excel_file(df: pd.DataFrame, **kwargs) -> pd.ExcelFile: return pd.ExcelFile(workbook) -class EnvVar(Noneable): - """A dagster config type for env vars.""" - - def __init__(self, env_var: str) -> None: - """Initialize EnvVarField.""" - super().__init__(inner_type=str) - self.env_var = env_var - - def post_process(self, value: str) -> str: - """Validate an EnvVar config value. - - Returns the value of the object environment variable if the - config value is not specified is not specified with dagster. - - Args: - value: config value to validate. - - Returns: - validated config value. - - Raises: - PostProcessingError: if the value is not specified in the env var or config. - """ - if value is None: - value = os.environ.get(self.env_var) - if value is None: - raise PostProcessingError( - f"Config value could not be found. Set the {self.env_var}" - " environment variable or specify a value in dagster config." - ) - return value - - def get_asset_keys( assets: list[AssetsDefinition], exclude_source_assets: bool = True ) -> set[AssetKey]: diff --git a/src/pudl/io_managers.py b/src/pudl/io_managers.py index 6e0cba18ed..87c3776844 100644 --- a/src/pudl/io_managers.py +++ b/src/pudl/io_managers.py @@ -10,7 +10,6 @@ from alembic.autogenerate.api import compare_metadata from alembic.migration import MigrationContext from dagster import ( - Field, InitResourceContext, InputContext, IOManager, @@ -23,8 +22,8 @@ from upath import UPath import pudl -from pudl.helpers import EnvVar from pudl.metadata.classes import Package, Resource +from pudl.workspace.setup import PudlPaths logger = pudl.logging_helpers.get_logger(__name__) @@ -534,21 +533,10 @@ def load_input(self, context: InputContext) -> pd.DataFrame: return df -@io_manager( - config_schema={ - "pudl_output_path": Field( - EnvVar( - env_var="PUDL_OUTPUT", - ), - description="Path of directory to store the database in.", - default_value=None, - ), - } -) +@io_manager(required_resource_keys={"pudl_paths"}) def pudl_sqlite_io_manager(init_context) -> PudlSQLiteIOManager: """Create a SQLiteManager dagster resource for the pudl database.""" - base_dir = init_context.resource_config["pudl_output_path"] - return PudlSQLiteIOManager(base_dir=base_dir, db_name="pudl") + return PudlSQLiteIOManager(base_dir=PudlPaths().output_dir, db_name="pudl") class FercSQLiteIOManager(SQLiteIOManager): @@ -676,23 +664,11 @@ def load_input(self, context: InputContext) -> pd.DataFrame: ).assign(sched_table_name=table_name) -@io_manager( - config_schema={ - "pudl_output_path": Field( - EnvVar( - env_var="PUDL_OUTPUT", - ), - description="Path of directory to store the database in.", - default_value=None, - ), - }, - required_resource_keys={"dataset_settings"}, -) +@io_manager(required_resource_keys={"dataset_settings", "pudl_paths"}) def ferc1_dbf_sqlite_io_manager(init_context) -> FercDBFSQLiteIOManager: """Create a SQLiteManager dagster resource for the ferc1 dbf database.""" - base_dir = init_context.resource_config["pudl_output_path"] return FercDBFSQLiteIOManager( - base_dir=base_dir, + base_dir=PudlPaths().output_dir, db_name="ferc1", ) @@ -747,20 +723,12 @@ def load_input(self, context: InputContext) -> pd.DataFrame: @io_manager( - config_schema={ - "pudl_output_path": Field( - EnvVar("PUDL_OUTPUT"), - description="Path of directory to store the database in.", - default_value=None, - ), - }, - required_resource_keys={"dataset_settings"}, + required_resource_keys={"dataset_settings", "pudl_paths"}, ) def ferc1_xbrl_sqlite_io_manager(init_context) -> FercXBRLSQLiteIOManager: """Create a SQLiteManager dagster resource for the ferc1 dbf database.""" - base_dir = init_context.resource_config["pudl_output_path"] return FercXBRLSQLiteIOManager( - base_dir=base_dir, + base_dir=PudlPaths().output_dir, db_name="ferc1_xbrl", ) @@ -791,19 +759,12 @@ def load_from_path(self, context: InputContext, path: UPath) -> dd.DataFrame: ) -@io_manager( - config_schema={ - "base_path": Field( - EnvVar("PUDL_OUTPUT"), - is_required=False, - default_value=None, - ) - } -) +@io_manager(required_resource_keys={"pudl_paths"}) def epacems_io_manager( init_context: InitResourceContext, ) -> PandasParquetIOManager: """IO Manager that writes EPA CEMS partitions to individual parquet files.""" schema = Resource.from_id("hourly_emissions_epacems").to_pyarrow() - base_path = UPath(init_context.resource_config["base_path"]) - return PandasParquetIOManager(base_path=base_path, schema=schema) + return PandasParquetIOManager( + base_path=UPath(init_context.resources.pudl_paths.output_dir), schema=schema + ) diff --git a/src/pudl/resources.py b/src/pudl/resources.py index 9597287d72..e688bae4d2 100644 --- a/src/pudl/resources.py +++ b/src/pudl/resources.py @@ -1,9 +1,11 @@ """Collection of Dagster resources for PUDL.""" +import os + from dagster import Field, resource -from pudl.helpers import EnvVar from pudl.settings import DatasetsSettings, FercToSqliteSettings, create_dagster_config from pudl.workspace.datastore import Datastore +from pudl.workspace.setup import PudlPaths, set_path_overrides @resource(config_schema=create_dagster_config(DatasetsSettings())) @@ -28,13 +30,41 @@ def ferc_to_sqlite_settings(init_context) -> FercToSqliteSettings: @resource( config_schema={ - "local_cache_path": Field( - EnvVar( - env_var="PUDL_INPUT", - ), - description="Path to local cache of raw data.", - default_value=None, + "PUDL_INPUT": Field( + str, + description="Path to the input directory.", + default_value="", + is_required=False, + ), + "PUDL_OUTPUT": Field( + str, + description="Path to the output directory.", + default_value="", + is_required=False, ), + }, +) +def pudl_paths(init_context) -> PudlPaths: + """Dagster resource for obtaining PudlPaths instance. + + Paths can be overriden when non-empty configuration fields are set. Default values + are pulled from PUDL_INPUT and PUDL_OUTPUT env variables. + """ + if init_context.resource_config["PUDL_INPUT"]: + set_path_overrides(intput_dir=init_context.resource_config["PUDL_INPUT"]) + elif not os.getenv("PUDL_INPUT"): + raise ValueError("PUDL_INPUT env variable is not set") + + if init_context.resource_config["PUDL_OUTPUT"]: + set_path_overrides(output_dir=init_context.resource_config["PUDL_OUTPUT"]) + elif not os.getenv("PUDL_OUTPUT"): + raise ValueError("PUDL_OUTPUT env variable is not set") + + return PudlPaths() + + +@resource( + config_schema={ "gcs_cache_path": Field( str, description="Load datastore resources from Google Cloud Storage.", @@ -51,6 +81,7 @@ def ferc_to_sqlite_settings(init_context) -> FercToSqliteSettings: default_value=False, ), }, + required_resource_keys={"pudl_paths"}, ) def datastore(init_context) -> Datastore: """Dagster resource to interact with Zenodo archives.""" @@ -59,5 +90,7 @@ def datastore(init_context) -> Datastore: ds_kwargs["sandbox"] = init_context.resource_config["sandbox"] if init_context.resource_config["use_local_cache"]: - ds_kwargs["local_cache_path"] = init_context.resource_config["local_cache_path"] + # TODO(rousik): we could also just use PudlPaths().input_dir here, because + # it should be initialized to the right values. + ds_kwargs["local_cache_path"] = init_context.resources.pudl_paths.input_dir return Datastore(**ds_kwargs) diff --git a/src/pudl/workspace/setup_cli.py b/src/pudl/workspace/setup_cli.py index dcd39b3430..fb382bd33a 100644 --- a/src/pudl/workspace/setup_cli.py +++ b/src/pudl/workspace/setup_cli.py @@ -113,7 +113,7 @@ def main(): if not pathlib.Path.is_dir(pudl_out): raise FileNotFoundError(f"Directory not found: {pudl_out}") set_path_overrides(output_dir=pudl_out) - pudl.workspace.setup.init(colbber=args.clobber) + pudl.workspace.setup.init(clobber=args.clobber) if __name__ == "__main__": diff --git a/test/unit/extract/xbrl_test.py b/test/unit/extract/xbrl_test.py index 6640713546..056e9f2f04 100644 --- a/test/unit/extract/xbrl_test.py +++ b/test/unit/extract/xbrl_test.py @@ -20,6 +20,7 @@ FercToSqliteSettings, XbrlFormNumber, ) +from pudl.workspace.setup import PudlPaths def test_ferc_xbrl_datastore_get_taxonomy(mocker): @@ -186,6 +187,7 @@ def test_xbrl2sqlite(settings, forms, mocker): resources={ "ferc_to_sqlite_settings": settings, "datastore": "datastore", + "pudl_paths": PudlPaths(), }, config={ "workers": 10, diff --git a/test/unit/helpers_test.py b/test/unit/helpers_test.py index 8c6002fae5..7a401a8f1c 100644 --- a/test/unit/helpers_test.py +++ b/test/unit/helpers_test.py @@ -1,18 +1,14 @@ """Unit tests for the :mod:`pudl.helpers` module.""" -import os - import numpy as np import pandas as pd import pytest from dagster import AssetKey -from dagster._config.errors import PostProcessingError from pandas.testing import assert_frame_equal, assert_series_equal from pandas.tseries.offsets import BYearEnd import pudl from pudl.helpers import ( - EnvVar, convert_col_to_bool, convert_df_to_excel_file, convert_to_date, @@ -636,18 +632,6 @@ def test_sql_asset_factory_missing_file(): sql_asset_factory(name="fake_view")() -def test_env_var(): - os.environ["_PUDL_TEST"] = "test value" - env_var = EnvVar(env_var="_PUDL_TEST") - assert env_var.post_process(None) == "test value" - del os.environ["_PUDL_TEST"] - - -def test_env_var_missing_completely(): - with pytest.raises(PostProcessingError): - EnvVar(env_var="_PUDL_BOGUS").post_process(None) - - @pytest.mark.parametrize( "df", [ From 9eed8628cc2a8c65708bf6eda56f34cc6c7eb9a9 Mon Sep 17 00:00:00 2001 From: Jan Rous Date: Fri, 7 Jul 2023 20:17:52 +0200 Subject: [PATCH 07/51] Fix path setup for unit/integration tests. --- test/conftest.py | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/test/conftest.py b/test/conftest.py index 51751006ed..3893cd86d8 100644 --- a/test/conftest.py +++ b/test/conftest.py @@ -185,7 +185,7 @@ def pudl_out_orig(live_dbs, pudl_engine): @pytest.fixture(scope="session") -def ferc_to_sqlite_dbf_only(live_dbs, pudl_datastore_config, etl_settings, pudl_env): +def ferc_to_sqlite_dbf_only(live_dbs, pudl_datastore_config, etl_settings): """Create raw FERC 1 SQLite DBs, but only based on DBF sources.""" if not live_dbs: ferc_to_sqlite_job_factory( @@ -205,7 +205,7 @@ def ferc_to_sqlite_dbf_only(live_dbs, pudl_datastore_config, etl_settings, pudl_ @pytest.fixture(scope="session") -def ferc_to_sqlite_xbrl_only(live_dbs, pudl_datastore_config, etl_settings, pudl_env): +def ferc_to_sqlite_xbrl_only(live_dbs, pudl_datastore_config, etl_settings): """Create raw FERC 1 SQLite DBs, but only based on XBRL sources.""" if not live_dbs: ferc_to_sqlite_job_factory( @@ -225,7 +225,7 @@ def ferc_to_sqlite_xbrl_only(live_dbs, pudl_datastore_config, etl_settings, pudl @pytest.fixture(scope="session") -def ferc_to_sqlite(live_dbs, pudl_datastore_config, etl_settings, pudl_env): +def ferc_to_sqlite(live_dbs, pudl_datastore_config, etl_settings): """Create raw FERC 1 SQLite DBs. If we are using the test database, we initialize it from scratch first. If we're @@ -397,7 +397,10 @@ def pytest_sessionstart(session): def pudl_path_setup(request, pudl_tmpdir): """Sets the necessary env variables for the input and output paths.""" if os.environ.get("GITHUB_ACTIONS", False): - pudl.workspace.setup.set_path_overrides() + pudl.workspace.setup.set_path_overrides( + input_dir="~/pudl-work/data", + output_dir="~/pudl-work/output", + ) else: if request.config.getoption("--tmp-data"): in_tmp = pudl_tmpdir / "data" @@ -405,7 +408,7 @@ def pudl_path_setup(request, pudl_tmpdir): pudl.workspace.setup.set_path_overrides( input_dir=str(Path(in_tmp).resolve()), ) - if request.config.getoption("--live-dbs"): + if not request.config.getoption("--live-dbs"): out_tmp = pudl_tmpdir / "output" out_tmp.mkdir() pudl.workspace.setup.set_path_overrides( From c401f62de88554d4712587c3bd502d52d7adc05b Mon Sep 17 00:00:00 2001 From: Jan Rous Date: Fri, 7 Jul 2023 21:45:17 +0200 Subject: [PATCH 08/51] Remove pudl_paths resource. We can access PudlPaths() instance directly and assume that env variables are always set. --- src/pudl/convert/censusdp1tract_to_sqlite.py | 2 +- src/pudl/etl/__init__.py | 8 +--- src/pudl/etl/epacems_assets.py | 4 +- src/pudl/extract/ferc.py | 2 +- src/pudl/extract/ferc1.py | 4 +- src/pudl/extract/xbrl.py | 2 +- src/pudl/ferc_to_sqlite/__init__.py | 3 +- src/pudl/io_managers.py | 12 +++--- src/pudl/resources.py | 41 +------------------- test/unit/extract/xbrl_test.py | 2 - 10 files changed, 15 insertions(+), 65 deletions(-) diff --git a/src/pudl/convert/censusdp1tract_to_sqlite.py b/src/pudl/convert/censusdp1tract_to_sqlite.py index 9cefb97ea4..66faf055ea 100644 --- a/src/pudl/convert/censusdp1tract_to_sqlite.py +++ b/src/pudl/convert/censusdp1tract_to_sqlite.py @@ -39,7 +39,7 @@ default_value=2010, ), }, - required_resource_keys={"datastore", "pudl_paths"}, + required_resource_keys={"datastore"}, ) def censusdp1tract_to_sqlite(context): """Use GDAL's ogr2ogr utility to convert the Census DP1 GeoDB to an SQLite DB. diff --git a/src/pudl/etl/__init__.py b/src/pudl/etl/__init__.py index 02a0fa0fe3..0bcc273d0b 100644 --- a/src/pudl/etl/__init__.py +++ b/src/pudl/etl/__init__.py @@ -17,12 +17,7 @@ ferc1_xbrl_sqlite_io_manager, pudl_sqlite_io_manager, ) -from pudl.resources import ( - dataset_settings, - datastore, - ferc_to_sqlite_settings, - pudl_paths, -) +from pudl.resources import dataset_settings, datastore, ferc_to_sqlite_settings from pudl.settings import EtlSettings from . import glue_assets # noqa: F401 @@ -71,7 +66,6 @@ default_resources = { "datastore": datastore, - "pudl_paths": pudl_paths, "pudl_sqlite_io_manager": pudl_sqlite_io_manager, "ferc1_dbf_sqlite_io_manager": ferc1_dbf_sqlite_io_manager, "ferc1_xbrl_sqlite_io_manager": ferc1_xbrl_sqlite_io_manager, diff --git a/src/pudl/etl/epacems_assets.py b/src/pudl/etl/epacems_assets.py index f26248776f..ee1264a831 100644 --- a/src/pudl/etl/epacems_assets.py +++ b/src/pudl/etl/epacems_assets.py @@ -41,7 +41,7 @@ def get_years_from_settings(context): yield DynamicOutput(year, mapping_key=str(year)) -@op(required_resource_keys={"datastore", "dataset_settings", "pudl_paths"}) +@op(required_resource_keys={"datastore", "dataset_settings"}) def process_single_year( context, year, @@ -82,7 +82,7 @@ def process_single_year( return YearPartitions(year, epacems_settings.states) -@op(required_resource_keys={"pudl_paths"}) +@op def consolidate_partitions(context, partitions: list[YearPartitions]) -> None: """Read partitions into memory and write to a single monolithic output. diff --git a/src/pudl/extract/ferc.py b/src/pudl/extract/ferc.py index c32e48a20c..56f1450746 100644 --- a/src/pudl/extract/ferc.py +++ b/src/pudl/extract/ferc.py @@ -18,7 +18,7 @@ bool, description="Clobber existing ferc1 database.", default_value=False ), }, - required_resource_keys={"ferc_to_sqlite_settings", "datastore", "pudl_paths"}, + required_resource_keys={"ferc_to_sqlite_settings", "datastore"}, ) def dbf2sqlite(context) -> None: """Clone the FERC Form 1 Visual FoxPro databases into SQLite.""" diff --git a/src/pudl/extract/ferc1.py b/src/pudl/extract/ferc1.py index 6255e9be4d..1761944e08 100644 --- a/src/pudl/extract/ferc1.py +++ b/src/pudl/extract/ferc1.py @@ -370,9 +370,7 @@ def create_raw_ferc1_assets() -> list[SourceAsset]: # asset name. -@asset( - required_resource_keys={"pudl_paths"}, -) +@asset def raw_xbrl_metadata_json(context) -> dict[str, dict[str, list[dict[str, Any]]]]: """Extract the FERC 1 XBRL Taxonomy metadata we've stored as JSON. diff --git a/src/pudl/extract/xbrl.py b/src/pudl/extract/xbrl.py index 60f708eac9..e7f4e34e70 100644 --- a/src/pudl/extract/xbrl.py +++ b/src/pudl/extract/xbrl.py @@ -111,7 +111,7 @@ def _get_sqlite_engine(form_number: int, clobber: bool) -> sa.engine.Engine: default_value=50, ), }, - required_resource_keys={"ferc_to_sqlite_settings", "datastore", "pudl_paths"}, + required_resource_keys={"ferc_to_sqlite_settings", "datastore"}, ) def xbrl2sqlite(context) -> None: """Clone the FERC Form 1 XBRL Databsae to SQLite.""" diff --git a/src/pudl/ferc_to_sqlite/__init__.py b/src/pudl/ferc_to_sqlite/__init__.py index 45fcb992e1..09451a6d48 100644 --- a/src/pudl/ferc_to_sqlite/__init__.py +++ b/src/pudl/ferc_to_sqlite/__init__.py @@ -6,7 +6,7 @@ import pudl from pudl.extract.ferc import dbf2sqlite from pudl.extract.xbrl import xbrl2sqlite -from pudl.resources import datastore, ferc_to_sqlite_settings, pudl_paths +from pudl.resources import datastore, ferc_to_sqlite_settings from pudl.settings import EtlSettings logger = pudl.logging_helpers.get_logger(__name__) @@ -34,7 +34,6 @@ def ferc_to_sqlite_xbrl_only(): default_resources_defs = { "ferc_to_sqlite_settings": ferc_to_sqlite_settings, "datastore": datastore, - "pudl_paths": pudl_paths, } ferc_to_sqlite_full = ferc_to_sqlite.to_job( diff --git a/src/pudl/io_managers.py b/src/pudl/io_managers.py index 87c3776844..4e7a91f494 100644 --- a/src/pudl/io_managers.py +++ b/src/pudl/io_managers.py @@ -533,7 +533,7 @@ def load_input(self, context: InputContext) -> pd.DataFrame: return df -@io_manager(required_resource_keys={"pudl_paths"}) +@io_manager def pudl_sqlite_io_manager(init_context) -> PudlSQLiteIOManager: """Create a SQLiteManager dagster resource for the pudl database.""" return PudlSQLiteIOManager(base_dir=PudlPaths().output_dir, db_name="pudl") @@ -664,7 +664,7 @@ def load_input(self, context: InputContext) -> pd.DataFrame: ).assign(sched_table_name=table_name) -@io_manager(required_resource_keys={"dataset_settings", "pudl_paths"}) +@io_manager(required_resource_keys={"dataset_settings"}) def ferc1_dbf_sqlite_io_manager(init_context) -> FercDBFSQLiteIOManager: """Create a SQLiteManager dagster resource for the ferc1 dbf database.""" return FercDBFSQLiteIOManager( @@ -722,9 +722,7 @@ def load_input(self, context: InputContext) -> pd.DataFrame: ).assign(sched_table_name=sched_table_name) -@io_manager( - required_resource_keys={"dataset_settings", "pudl_paths"}, -) +@io_manager(required_resource_keys={"dataset_settings"}) def ferc1_xbrl_sqlite_io_manager(init_context) -> FercXBRLSQLiteIOManager: """Create a SQLiteManager dagster resource for the ferc1 dbf database.""" return FercXBRLSQLiteIOManager( @@ -759,12 +757,12 @@ def load_from_path(self, context: InputContext, path: UPath) -> dd.DataFrame: ) -@io_manager(required_resource_keys={"pudl_paths"}) +@io_manager def epacems_io_manager( init_context: InitResourceContext, ) -> PandasParquetIOManager: """IO Manager that writes EPA CEMS partitions to individual parquet files.""" schema = Resource.from_id("hourly_emissions_epacems").to_pyarrow() return PandasParquetIOManager( - base_path=UPath(init_context.resources.pudl_paths.output_dir), schema=schema + base_path=UPath(PudlPaths().output_dir), schema=schema ) diff --git a/src/pudl/resources.py b/src/pudl/resources.py index e688bae4d2..476e84fa54 100644 --- a/src/pudl/resources.py +++ b/src/pudl/resources.py @@ -1,11 +1,10 @@ """Collection of Dagster resources for PUDL.""" -import os from dagster import Field, resource from pudl.settings import DatasetsSettings, FercToSqliteSettings, create_dagster_config from pudl.workspace.datastore import Datastore -from pudl.workspace.setup import PudlPaths, set_path_overrides +from pudl.workspace.setup import PudlPaths @resource(config_schema=create_dagster_config(DatasetsSettings())) @@ -28,41 +27,6 @@ def ferc_to_sqlite_settings(init_context) -> FercToSqliteSettings: return FercToSqliteSettings(**init_context.resource_config) -@resource( - config_schema={ - "PUDL_INPUT": Field( - str, - description="Path to the input directory.", - default_value="", - is_required=False, - ), - "PUDL_OUTPUT": Field( - str, - description="Path to the output directory.", - default_value="", - is_required=False, - ), - }, -) -def pudl_paths(init_context) -> PudlPaths: - """Dagster resource for obtaining PudlPaths instance. - - Paths can be overriden when non-empty configuration fields are set. Default values - are pulled from PUDL_INPUT and PUDL_OUTPUT env variables. - """ - if init_context.resource_config["PUDL_INPUT"]: - set_path_overrides(intput_dir=init_context.resource_config["PUDL_INPUT"]) - elif not os.getenv("PUDL_INPUT"): - raise ValueError("PUDL_INPUT env variable is not set") - - if init_context.resource_config["PUDL_OUTPUT"]: - set_path_overrides(output_dir=init_context.resource_config["PUDL_OUTPUT"]) - elif not os.getenv("PUDL_OUTPUT"): - raise ValueError("PUDL_OUTPUT env variable is not set") - - return PudlPaths() - - @resource( config_schema={ "gcs_cache_path": Field( @@ -81,7 +45,6 @@ def pudl_paths(init_context) -> PudlPaths: default_value=False, ), }, - required_resource_keys={"pudl_paths"}, ) def datastore(init_context) -> Datastore: """Dagster resource to interact with Zenodo archives.""" @@ -92,5 +55,5 @@ def datastore(init_context) -> Datastore: if init_context.resource_config["use_local_cache"]: # TODO(rousik): we could also just use PudlPaths().input_dir here, because # it should be initialized to the right values. - ds_kwargs["local_cache_path"] = init_context.resources.pudl_paths.input_dir + ds_kwargs["local_cache_path"] = PudlPaths().input_dir return Datastore(**ds_kwargs) diff --git a/test/unit/extract/xbrl_test.py b/test/unit/extract/xbrl_test.py index 056e9f2f04..6640713546 100644 --- a/test/unit/extract/xbrl_test.py +++ b/test/unit/extract/xbrl_test.py @@ -20,7 +20,6 @@ FercToSqliteSettings, XbrlFormNumber, ) -from pudl.workspace.setup import PudlPaths def test_ferc_xbrl_datastore_get_taxonomy(mocker): @@ -187,7 +186,6 @@ def test_xbrl2sqlite(settings, forms, mocker): resources={ "ferc_to_sqlite_settings": settings, "datastore": "datastore", - "pudl_paths": PudlPaths(), }, config={ "workers": 10, From e57a69418b6a75065c0f805ef7aa9bf16cfc6238 Mon Sep 17 00:00:00 2001 From: Jan Rous Date: Tue, 18 Jul 2023 19:54:35 +0200 Subject: [PATCH 09/51] Minor changes to PudlPaths() obj. --- src/pudl/workspace/setup.py | 15 +++------------ 1 file changed, 3 insertions(+), 12 deletions(-) diff --git a/src/pudl/workspace/setup.py b/src/pudl/workspace/setup.py index a054f06efd..46380b2678 100644 --- a/src/pudl/workspace/setup.py +++ b/src/pudl/workspace/setup.py @@ -5,7 +5,7 @@ import shutil from pathlib import Path -from pydantic import BaseSettings +from pydantic import BaseSettings, DirectoryPath import pudl.logging_helpers @@ -35,8 +35,8 @@ class PudlPaths(BaseSettings): variables. Other paths of relevance are derived from these. """ - pudl_input: str - pudl_output: str + pudl_input: DirectoryPath + pudl_output: DirectoryPath @property def input_dir(self) -> Path: @@ -78,15 +78,6 @@ def output_file(self, filename: str) -> Path: """Path to file in PUDL output directory.""" return self.output_dir / filename - class Config: - """Pydantic configuration. Loads from .env file.""" - - env_file = ".env" - fields = { - "pudl_input": {"env": "PUDL_INPUT"}, - "pudl_output": {"env": "PUDL_OUTPUT"}, - } - def init(clobber=False): """Set up a new PUDL working environment based on the user settings. From 26aec898bc0611bf7d4fde905c65a9e8428e568f Mon Sep 17 00:00:00 2001 From: Jan Rous Date: Tue, 18 Jul 2023 19:56:27 +0200 Subject: [PATCH 10/51] Put back the env file. --- src/pudl/workspace/setup.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/pudl/workspace/setup.py b/src/pudl/workspace/setup.py index 46380b2678..bba5844d28 100644 --- a/src/pudl/workspace/setup.py +++ b/src/pudl/workspace/setup.py @@ -38,6 +38,11 @@ class PudlPaths(BaseSettings): pudl_input: DirectoryPath pudl_output: DirectoryPath + class Config: + """Pydantic config, reads from .env file.""" + + env_file = ".env" + @property def input_dir(self) -> Path: """Path to PUDL input directory.""" From da4168202dd58d3ce19a57383572d71509111480 Mon Sep 17 00:00:00 2001 From: Jan Rous Date: Tue, 1 Aug 2023 10:38:18 -0600 Subject: [PATCH 11/51] Fix up PudlPath thingies. --- src/pudl/workspace/datastore.py | 4 ++-- src/pudl/workspace/setup.py | 32 +++++++++++++------------- src/pudl/workspace/setup_cli.py | 6 ++--- test/conftest.py | 40 ++++++++++++--------------------- 4 files changed, 35 insertions(+), 47 deletions(-) diff --git a/src/pudl/workspace/datastore.py b/src/pudl/workspace/datastore.py index 3af7307673..e57dd968aa 100644 --- a/src/pudl/workspace/datastore.py +++ b/src/pudl/workspace/datastore.py @@ -20,7 +20,7 @@ import pudl from pudl.workspace import resource_cache from pudl.workspace.resource_cache import PudlResourceKey -from pudl.workspace.setup import PudlPaths, set_path_overrides +from pudl.workspace.setup import PudlPaths logger = pudl.logging_helpers.get_logger(__name__) @@ -594,7 +594,7 @@ def main(): ) if args.pudl_in: - set_path_overrides(input_dir=args.pudl_in) + PudlPaths.set_path_overrides(input_dir=args.pudl_in) cache_path = None if not args.bypass_local_cache: diff --git a/src/pudl/workspace/setup.py b/src/pudl/workspace/setup.py index 280e6c6035..c6eeb655c7 100644 --- a/src/pudl/workspace/setup.py +++ b/src/pudl/workspace/setup.py @@ -12,22 +12,6 @@ logger = pudl.logging_helpers.get_logger(__name__) -def set_path_overrides( - input_dir: str | None = None, - output_dir: str | None = None, -) -> None: - """Set PUDL_INPUT and/or PUDL_OUTPUT env variables. - - Args: - input_dir: if set, overrides PUDL_INPUT env variable. - output_dir: if set, overrides PUDL_OUTPUT env variable. - """ - if input_dir: - os.environ["PUDL_INPUT"] = input_dir - if output_dir: - os.environ["PUDL_OUTPUT"] = output_dir - - class PudlPaths(BaseSettings): """These settings provide access to various PUDL directories. @@ -83,6 +67,22 @@ def output_file(self, filename: str) -> Path: """Path to file in PUDL output directory.""" return self.output_dir / filename + @staticmethod + def set_path_overrides( + input_dir: str | None = None, + output_dir: str | None = None, + ) -> None: + """Set PUDL_INPUT and/or PUDL_OUTPUT env variables. + + Args: + input_dir: if set, overrides PUDL_INPUT env variable. + output_dir: if set, overrides PUDL_OUTPUT env variable. + """ + if input_dir: + os.environ["PUDL_INPUT"] = input_dir + if output_dir: + os.environ["PUDL_OUTPUT"] = output_dir + def init(clobber=False): """Set up a new PUDL working environment based on the user settings. diff --git a/src/pudl/workspace/setup_cli.py b/src/pudl/workspace/setup_cli.py index fb382bd33a..49e5204764 100644 --- a/src/pudl/workspace/setup_cli.py +++ b/src/pudl/workspace/setup_cli.py @@ -45,7 +45,7 @@ import sys import pudl -from pudl.workspace.setup import set_path_overrides +from pudl.workspace.setup import PudlPaths logger = pudl.logging_helpers.get_logger(__name__) @@ -107,12 +107,12 @@ def main(): pudl_in = pathlib.Path(args.pudl_in).expanduser().resolve() if not pathlib.Path.is_dir(pudl_in): raise FileNotFoundError(f"Directory not found: {pudl_in}") - set_path_overrides(input_dir=pudl_in) + PudlPaths.set_path_overrides(input_dir=pudl_in) if args.pudl_out: pudl_out = pathlib.Path(args.pudl_out).expanduser().resolve() if not pathlib.Path.is_dir(pudl_out): raise FileNotFoundError(f"Directory not found: {pudl_out}") - set_path_overrides(output_dir=pudl_out) + PudlPaths.set_path_overrides(output_dir=pudl_out) pudl.workspace.setup.init(clobber=args.clobber) diff --git a/test/conftest.py b/test/conftest.py index 3893cd86d8..758282b175 100644 --- a/test/conftest.py +++ b/test/conftest.py @@ -328,7 +328,6 @@ def ferc1_xbrl_taxonomy_metadata(ferc1_engine_xbrl): @pytest.fixture(scope="session") def pudl_sql_io_manager( - pudl_path_setup, ferc1_engine_dbf, # Implicit dependency ferc1_engine_xbrl, # Implicit dependency live_dbs, @@ -374,46 +373,35 @@ def pudl_engine(pudl_sql_io_manager): return pudl_sql_io_manager.engine -@pytest.fixture(scope="session") -def pudl_tmpdir(tmp_path_factory): - # Base temporary directory for all other tmp dirs. - tmpdir = tmp_path_factory.mktemp("pudl") - return tmpdir - - -def pytest_sessionstart(session): - """Configures input/output paths for the tests.""" - # TODO(rousik): Should we be using fixed paths instead - # of using tmpdir capabilities here? - pudl.workspace.setup.set_path_overrides( - input_dir="~/pudl-work/data", - output_dir="~/pudl-work/output", - ) - logger.info(f"Starting unit tests with output path {PudlPaths().output_dir}") - pudl.workspace.setup.init() - - -@pytest.fixture(scope="session") -def pudl_path_setup(request, pudl_tmpdir): - """Sets the necessary env variables for the input and output paths.""" +@pytest.fixture(scope="session", autouse=True) +def configure_paths_for_tests(tmp_path_factory, request): + """Configures PudlPaths for tests.""" if os.environ.get("GITHUB_ACTIONS", False): - pudl.workspace.setup.set_path_overrides( + if all(evar in os.environ for evar in ["PUDL_INPUTS", "PUDL_OUTPUTS"]): + logger.info( + "Environment variables PUDL_INPUTS and PUDL_OUTPUTS are already set." + ) + return + PudlPaths.set_path_overrides( input_dir="~/pudl-work/data", output_dir="~/pudl-work/output", ) else: + pudl_tmpdir = tmp_path_factory.mktemp("pudl") if request.config.getoption("--tmp-data"): in_tmp = pudl_tmpdir / "data" in_tmp.mkdir() - pudl.workspace.setup.set_path_overrides( + PudlPaths.set_path_overrides( input_dir=str(Path(in_tmp).resolve()), ) if not request.config.getoption("--live-dbs"): out_tmp = pudl_tmpdir / "output" out_tmp.mkdir() - pudl.workspace.setup.set_path_overrides( + PudlPaths.set_path_overrides( output_dir=str(Path(out_tmp).resolve()), ) + logger.info(f"Starting unit tests with output path {PudlPaths().output_dir}") + pudl.workspace.setup.init() @pytest.fixture(scope="session") From 00cf6fd96b5fa2ba81f2d8d1518485a4dd26c280 Mon Sep 17 00:00:00 2001 From: Jan Rous Date: Tue, 1 Aug 2023 13:19:42 -0600 Subject: [PATCH 12/51] Rework initialization of directories in tests. --- test/conftest.py | 41 ++++++++++++++++++++--------------------- 1 file changed, 20 insertions(+), 21 deletions(-) diff --git a/test/conftest.py b/test/conftest.py index 758282b175..01c29705a4 100644 --- a/test/conftest.py +++ b/test/conftest.py @@ -376,30 +376,29 @@ def pudl_engine(pudl_sql_io_manager): @pytest.fixture(scope="session", autouse=True) def configure_paths_for_tests(tmp_path_factory, request): """Configures PudlPaths for tests.""" + gha_override_input = False + gha_override_output = False if os.environ.get("GITHUB_ACTIONS", False): - if all(evar in os.environ for evar in ["PUDL_INPUTS", "PUDL_OUTPUTS"]): - logger.info( - "Environment variables PUDL_INPUTS and PUDL_OUTPUTS are already set." - ) - return + gha_override_input = "PUDL_INPUTS" not in os.environ + gha_override_output = "PUDL_OUTPUTS" not in os.environ + logger.info( + "Running in GitHub Actions environment, using" + f" temporary input dir: {gha_override_input}, and" + f" temporary output dir: {gha_override_output}" + ) + pudl_tmpdir = tmp_path_factory.mktemp("pudl") + if gha_override_output or request.config.getoption("--tmp-data"): + in_tmp = pudl_tmpdir / "data" + in_tmp.mkdir() PudlPaths.set_path_overrides( - input_dir="~/pudl-work/data", - output_dir="~/pudl-work/output", + input_dir=str(Path(in_tmp).resolve()), + ) + if gha_override_output or not request.config.getoption("--live-dbs"): + out_tmp = pudl_tmpdir / "output" + out_tmp.mkdir() + PudlPaths.set_path_overrides( + output_dir=str(Path(out_tmp).resolve()), ) - else: - pudl_tmpdir = tmp_path_factory.mktemp("pudl") - if request.config.getoption("--tmp-data"): - in_tmp = pudl_tmpdir / "data" - in_tmp.mkdir() - PudlPaths.set_path_overrides( - input_dir=str(Path(in_tmp).resolve()), - ) - if not request.config.getoption("--live-dbs"): - out_tmp = pudl_tmpdir / "output" - out_tmp.mkdir() - PudlPaths.set_path_overrides( - output_dir=str(Path(out_tmp).resolve()), - ) logger.info(f"Starting unit tests with output path {PudlPaths().output_dir}") pudl.workspace.setup.init() From 1bb97be8540f77d371279ffcc555fa41af91041a Mon Sep 17 00:00:00 2001 From: Jan Rous Date: Wed, 2 Aug 2023 14:48:46 -0600 Subject: [PATCH 13/51] Fix up some failing integration tests. --- test/integration/datasette_metadata_test.py | 10 ++++------ test/integration/epacems_test.py | 1 - test/integration/etl_test.py | 4 ++-- test/integration/glue_test.py | 1 - 4 files changed, 6 insertions(+), 10 deletions(-) diff --git a/test/integration/datasette_metadata_test.py b/test/integration/datasette_metadata_test.py index 6428abae70..dfd0f0838f 100644 --- a/test/integration/datasette_metadata_test.py +++ b/test/integration/datasette_metadata_test.py @@ -2,24 +2,22 @@ import json import logging -import os -from pathlib import Path import datasette.utils import yaml from pudl.metadata.classes import DatasetteMetadata +from pudl.workspace.setup import PudlPaths logger = logging.getLogger(__name__) -def test_datasette_metadata_to_yml(pudl_path_setup, ferc1_engine_xbrl): +def test_datasette_metadata_to_yml(ferc1_engine_xbrl): """Test the ability to export metadata as YML for use with Datasette.""" - pudl_output = Path(os.getenv("PUDL_OUTPUT")) - metadata_yml = pudl_output / "metadata.yml" + metadata_yml = PudlPaths().output_dir / "metadata.yml" logger.info(f"Writing Datasette Metadata to {metadata_yml}") - dm = DatasetteMetadata.from_data_source_ids(pudl_output) + dm = DatasetteMetadata.from_data_source_ids(PudlPaths().output_dir) dm.to_yaml(path=metadata_yml) logger.info("Parsing generated metadata using datasette utils.") diff --git a/test/integration/epacems_test.py b/test/integration/epacems_test.py index 5421eb70a1..cec4dc7da4 100644 --- a/test/integration/epacems_test.py +++ b/test/integration/epacems_test.py @@ -16,7 +16,6 @@ def epacems_year_and_state(etl_settings): @pytest.fixture(scope="session") def epacems_parquet_path( - pudl_path_setup, pudl_engine, # implicit dependency; ensures .parquet files exist ): """Get path to the directory of EPA CEMS .parquet data.""" diff --git a/test/integration/etl_test.py b/test/integration/etl_test.py index 6bbc1285a6..613834f8df 100644 --- a/test/integration/etl_test.py +++ b/test/integration/etl_test.py @@ -152,7 +152,7 @@ def test_extract_eia923(self, pudl_datastore_fixture): class TestFerc1ExtractDebugFunctions: """Verify the ferc1 extraction debug functions are working properly.""" - def test_extract_dbf(self, ferc1_engine_dbf, pudl_path_setup): + def test_extract_dbf(self, ferc1_engine_dbf): """Test extract_dbf.""" years = [2020, 2021] # add desired years here configured_dataset_settings = {"ferc1": {"years": years}} @@ -171,7 +171,7 @@ def test_extract_dbf(self, ferc1_engine_dbf, pudl_path_setup): df.report_year < 2022 ).all(), f"Unexpected years found in table: {table_name}" - def test_extract_xbrl(self, ferc1_engine_dbf, pudl_path_setup): + def test_extract_xbrl(self, ferc1_engine_dbf): """Test extract_xbrl.""" years = [2021] # add desired years here configured_dataset_settings = {"ferc1": {"years": years}} diff --git a/test/integration/glue_test.py b/test/integration/glue_test.py index 67ec7dfa95..2afa7bc333 100644 --- a/test/integration/glue_test.py +++ b/test/integration/glue_test.py @@ -56,7 +56,6 @@ def plants_ferc1_raw(dataset_settings_config) -> pd.DataFrame: @pytest.fixture(scope="module") def glue_test_dfs( - pudl_path_setup, pudl_out, ferc1_engine_xbrl, ferc1_engine_dbf, From 69f1e0c9245e7054be3556eca7691da0af4ebd86 Mon Sep 17 00:00:00 2001 From: thinky Date: Fri, 4 Aug 2023 11:32:40 -0400 Subject: [PATCH 14/51] WIP rewrite extractor for backwards compatibility with FTP --- src/pudl/extract/epacems.py | 148 +++++++++++++++++--- src/pudl/metadata/sources.py | 2 +- src/pudl/package_data/settings/etl_fast.yml | 4 +- src/pudl/package_data/settings/etl_full.yml | 1 + src/pudl/workspace/datastore.py | 5 + 5 files changed, 135 insertions(+), 25 deletions(-) diff --git a/src/pudl/extract/epacems.py b/src/pudl/extract/epacems.py index c91580509f..3b50df4fd3 100644 --- a/src/pudl/extract/epacems.py +++ b/src/pudl/extract/epacems.py @@ -1,5 +1,12 @@ """Retrieve data from EPA CEMS hourly zipped CSVs. +Prior to August 2023, this data was retrieved from an FTP server. After August 2023, +this data is now retrieved from the CEMS API. The format of the files has changed from +monthly CSVs for each state to one CSV per state per year. The names of the columns +have also changed. This module preserves backwards compatibility for the older data to +allow reading from earlier versions of the CEMS archived data. Column name compatibility +was determined by reading the CEMS API documentation on column names. + Presently, this module is where the CEMS columns are renamed and dropped. Any columns in the IGNORE_COLS dictionary are excluded from the final output. All of these columns are calculable rates, measurement flags, or descriptors (like facility name) that can be @@ -11,11 +18,11 @@ CAMD team cleared this up. Pre-transform, the `plant_id_epa` field is a close but not perfect indicator for -`plant_id_eia`. In the raw data it's called `ORISPL_CODE` but that's not entirely -accurate. The epacamd_eia crosswalk will show that the mapping between `ORISPL_CODE` as -it appears in CEMS and the `plant_id_eia` field used in EIA data. Hense, we've called it -`plant_id_epa` until it gets transformed into `plant_id_eia` during the transform -process with help from the crosswalk. +`plant_id_eia`. In the raw data it's called `ORISPL_CODE` (FTP server) or `Facility ID` +(API) but that's not entirely accurate. The epacamd_eia crosswalk will show that the +mapping between `ORISPL_CODE` as it appears in CEMS and the `plant_id_eia` field used in +EIA data. Hense, we've called it `plant_id_epa` until it gets transformed into +`plant_id_eia` during the transform process with help from the crosswalk. """ from pathlib import Path from typing import NamedTuple @@ -28,8 +35,9 @@ logger = pudl.logging_helpers.get_logger(__name__) -# EPA CEMS constants ##### -RENAME_DICT = { +######################################################################################## +# EPA CEMS constants for FTP ZIP files ##### +FTP_RENAME_DICT = { "STATE": "state", "FACILITY_NAME": "plant_name", # Not reading from CSV "ORISPL_CODE": "plant_id_epa", # Not quite the same as plant_id_eia @@ -75,7 +83,7 @@ """ # Any column that exactly matches one of these won't be read -IGNORE_COLS = { +FTP_IGNORE_COLS = { "FACILITY_NAME", "SO2_RATE (lbs/mmBtu)", "SO2_RATE", @@ -91,6 +99,72 @@ } """Set: The set of EPA CEMS columns to ignore when reading data.""" +######################################################################################## +# EPA CEMS constants for API CSV files ##### + +API_RENAME_DICT = { + "State": "state", + "Facility Name": "plant_name", # Not reading from CSV + "Facility ID": "plant_id_epa", # unique facility id for internal EPA database management (ORIS code) + "Unit ID": "emissions_unit_id_epa", + "Associated Stacks": "associated_stacks", + # These op_date, op_hour, and op_time variables get converted to + # operating_date, operating_datetime and operating_time_interval in + # transform/epacems.py + "Date": "op_date", + "Hour": "op_hour", + "Operating Time": "operating_time_hours", + "Gross Load (MW)": "gross_load_mw", + "Steam Load (1000 lb/hr)": "steam_load_1000_lbs", + "SO2 Mass (lbs)": "so2_mass_lbs", + "SO2 Mass Measure Indicator": "so2_mass_measurement_code", + "SO2 Rate (lbs/mmBtu)": "so2_rate_lbs_mmbtu", # Not reading from CSV + "SO2 Rate Measure Indicator": "so2_rate_measure_flg", # Not reading from CSV + "NOx Rate (lbs/mmBtu)": "nox_rate_lbs_mmbtu", # Not reading from CSV + "NOx Rate Measure Indicator": "nox_rate_measurement_code", # Not reading from CSV + "NOx Mass (lbs)": "nox_mass_lbs", + "NOx Mass Measure Indicator": "nox_mass_measurement_code", + "CO2 Mass (short tons)": "co2_mass_tons", + "CO2 Mass Measure Indicator": "co2_mass_measurement_code", + "CO2 Rate (short tons/mmBtu)": "co2_rate_tons_mmbtu", # Not reading from CSV + "CO2 Rate Measure Indicator": "co2_rate_measure_flg", # Not reading from CSV + "Heat Input (mmBtu)": "heat_content_mmbtu", + "Heat Input Measure Indicator": "heat_content_measure_flg", + "Primary Fuel Type": "primary_fuel_type", + "Secondary Fuel Type": "secondary_fuel_type", + "Unit Type": "unit_type", + "SO2 Controls": "so2_controls", + "NOx Controls": "nox_controls", + "PM Controls": "pm_controls", + "Hg Controls": "hg_controls", + "Program Code": "program_code", +} +"""Dict: A dictionary containing EPA CEMS column names (keys) and replacement names to +use when reading those columns into PUDL (values). + +There are some duplicate rename values because the column names change year to year. +""" + +# Any column that exactly matches one of these won't be read +API_IGNORE_COLS = { + "Facility Name", + "SO2 Rate (lbs/mmBtu)", + "SO2 Rate Measure Indicator", + "CO2 Rate (tons/mmBtu)", + "CO2 Rate Measure Indicator", + "NOx Rate (lbs/mmBtu)", + "NOX Rate Measure Indicator", + "Primary Fuel Type", + "Secondary Fuel Type", + "Unit Type", + "SO2 Controls", + "NOx Controls", + "PM Controls", + "Hg Controls", + "Program Code", +} +"""Set: The set of EPA CEMS columns to ignore when reading data.""" + class EpaCemsPartition(NamedTuple): """Represents EpaCems partition identifying unique resource file.""" @@ -110,6 +184,10 @@ def get_monthly_file(self, month: int) -> Path: """Returns the filename (without suffix) that contains the monthly data.""" return Path(f"{self.year}{self.state.lower()}{month:02}") + def get_annual_file(self) -> Path: + """Return the name of the CSV file that holds annual hourly data.""" + return Path(f"epacems-{self.year}-{self.state.lower()}.csv") + class EpaCemsDatastore: """Helper class to extract EpaCems resources from datastore. @@ -125,21 +203,47 @@ def __init__(self, datastore: Datastore): self.datastore = datastore def get_data_frame(self, partition: EpaCemsPartition) -> pd.DataFrame: - """Constructs dataframe holding data for a given (year, state) partition.""" + """Constructs dataframe from a zipfile for a given (year, state) partition.""" archive = self.datastore.get_zipfile_resource( "epacems", **partition.get_filters() ) - dfs = [] - for month in range(1, 13): - mf = partition.get_monthly_file(month) - with archive.open(str(mf.with_suffix(".zip")), "r") as mzip: - with ZipFile(mzip, "r").open( - str(mf.with_suffix(".csv")), "r" - ) as csv_file: - dfs.append(self._csv_to_dataframe(csv_file)) - return pd.concat(dfs, sort=True, copy=False, ignore_index=True) - - def _csv_to_dataframe(self, csv_file) -> pd.DataFrame: + + # Get names of files in zip file + files = self.datastore.get_zipfile_file_names(archive) + logger.info(files) + # If archive has one csv file in it, this is a yearly CSV (archived after 08/23) + # and this CSV does not need to be concatenated. + if len(files) == 1 and files[0].endswith(".csv"): + with archive.open(str(partition.get_annual_file()), "r") as csv_file: + df = self._csv_to_dataframe( + csv_file, ignore_cols=API_IGNORE_COLS, rename_dict=API_RENAME_DICT + ) + + # If archive has other zip files in it, these are monthly CSVs (archived before + # 08/23) which need to be concatenated. + elif len(files) > 1 and all([x.endswith(".zip") for x in files]): + dfs = [] + for month in range(1, 13): + mf = partition.get_monthly_file(month) + with archive.open(str(mf.with_suffix(".zip")), "r") as mzip: + with ZipFile(mzip, "r").open( + str(mf.with_suffix(".csv")), "r" + ) as csv_file: + dfs.append( + self._csv_to_dataframe( + csv_file, + ignore_cols=FTP_IGNORE_COLS, + rename_dict=FTP_RENAME_DICT, + ) + ) + df = pd.concat(dfs, sort=True, copy=False, ignore_index=True) + else: + raise AssertionError(f"Unexpected archive format. Found files: {files}.") + return df + + def _csv_to_dataframe( + self, csv_file: Path, ignore_cols: dict[str, str], rename_dict: dict[str, str] + ) -> pd.DataFrame: """Convert a CEMS csv file into a :class:`pandas.DataFrame`. Args: @@ -151,9 +255,9 @@ def _csv_to_dataframe(self, csv_file) -> pd.DataFrame: return pd.read_csv( csv_file, index_col=False, - usecols=lambda col: col not in IGNORE_COLS, + usecols=lambda col: col not in ignore_cols, low_memory=False, - ).rename(columns=RENAME_DICT) + ).rename(columns=rename_dict) def extract(year: int, state: str, ds: Datastore): diff --git a/src/pudl/metadata/sources.py b/src/pudl/metadata/sources.py index 7239f96b41..63dc0fe8b3 100644 --- a/src/pudl/metadata/sources.py +++ b/src/pudl/metadata/sources.py @@ -279,7 +279,7 @@ }, "field_namespace": "epacems", "working_partitions": { - "years": sorted(set(range(1995, 2022))), + "years": sorted(set(range(1995, 2023))), "states": sorted(EPACEMS_STATES), }, "contributors": [ diff --git a/src/pudl/package_data/settings/etl_fast.yml b/src/pudl/package_data/settings/etl_fast.yml index 67716841e3..84e8958bb4 100644 --- a/src/pudl/package_data/settings/etl_fast.yml +++ b/src/pudl/package_data/settings/etl_fast.yml @@ -30,7 +30,7 @@ name: pudl-fast title: PUDL Fast ETL description: > FERC 1 and EIA 860/923 from 2020 (output to SQLite) plus - EPA CEMS hourly emissions data from 2020 (output to Parquet). + EPA CEMS hourly emissions data from 2020-2022 (output to Parquet). version: 0.1.0 datasets: ferc1: @@ -75,4 +75,4 @@ datasets: # so if you're loading CEMS data for a particular year, you should # also load the EIA 860 data for that year if possible states: [ID, ME] - years: [2019, 2020, 2021] + years: [2020, 2021, 2022] diff --git a/src/pudl/package_data/settings/etl_full.yml b/src/pudl/package_data/settings/etl_full.yml index fe5567146e..c9766b5413 100644 --- a/src/pudl/package_data/settings/etl_full.yml +++ b/src/pudl/package_data/settings/etl_full.yml @@ -297,4 +297,5 @@ datasets: 2019, 2020, 2021, + 2022, ] diff --git a/src/pudl/workspace/datastore.py b/src/pudl/workspace/datastore.py index a712db58b5..632618d8e6 100644 --- a/src/pudl/workspace/datastore.py +++ b/src/pudl/workspace/datastore.py @@ -420,6 +420,11 @@ def get_zipfile_resources( for resource_key, content in self.get_resources(dataset, **filters): yield resource_key, zipfile.ZipFile(io.BytesIO(content)) + def get_zipfile_file_names(self, zip_file: zipfile.ZipFile): + """Given a zipfile, return a list of the file names in it.""" + if zipfile.Zipfile.is_zipfile(zip_file): + return zipfile.Zipfile.namelist(zip_file) + class ParseKeyValues(argparse.Action): """Transforms k1=v1,k2=v2,... From 7184a83c79421325d2ec004e3f227107eb583a17 Mon Sep 17 00:00:00 2001 From: thinky Date: Wed, 9 Aug 2023 17:25:02 -0400 Subject: [PATCH 15/51] Update EPACEMS extraction to use API only --- src/pudl/extract/epacems.py | 111 +++--------------------------------- 1 file changed, 9 insertions(+), 102 deletions(-) diff --git a/src/pudl/extract/epacems.py b/src/pudl/extract/epacems.py index 3b50df4fd3..c6c485d14e 100644 --- a/src/pudl/extract/epacems.py +++ b/src/pudl/extract/epacems.py @@ -3,30 +3,24 @@ Prior to August 2023, this data was retrieved from an FTP server. After August 2023, this data is now retrieved from the CEMS API. The format of the files has changed from monthly CSVs for each state to one CSV per state per year. The names of the columns -have also changed. This module preserves backwards compatibility for the older data to -allow reading from earlier versions of the CEMS archived data. Column name compatibility -was determined by reading the CEMS API documentation on column names. +have also changed. Column name compatibility was determined by reading the CEMS API +documentation on column names. Presently, this module is where the CEMS columns are renamed and dropped. Any columns in the IGNORE_COLS dictionary are excluded from the final output. All of these columns are calculable rates, measurement flags, or descriptors (like facility name) that can be accessed by merging this data with the EIA860 plants entity table. We also remove the -`FACILITY_ID` field because it is internal to the EPA's business accounting database and -`UNIT_ID` field because it's a unique (calculable) identifier for plant_id and -emissions_unit_id (previously `UNITID`) groupings. It took a minute to verify the -difference between the `UNITID` and `UNIT_ID` fields, but coorespondance with the EPA's -CAMD team cleared this up. +`FACILITY_ID` field because it is internal to the EPA's business accounting database. Pre-transform, the `plant_id_epa` field is a close but not perfect indicator for -`plant_id_eia`. In the raw data it's called `ORISPL_CODE` (FTP server) or `Facility ID` -(API) but that's not entirely accurate. The epacamd_eia crosswalk will show that the -mapping between `ORISPL_CODE` as it appears in CEMS and the `plant_id_eia` field used in -EIA data. Hense, we've called it `plant_id_epa` until it gets transformed into -`plant_id_eia` during the transform process with help from the crosswalk. +`plant_id_eia`. In the raw data it's called `Facility ID` (ORISPL code) but that's not +entirely accurate. The epacamd_eia crosswalk will show that the mapping between +`Facility ID` as it appears in CEMS and the `plant_id_eia` field used in EIA data. +Hence, we've called it `plant_id_epa` until it gets transformed into `plant_id_eia` +during the transform process with help from the crosswalk. """ from pathlib import Path from typing import NamedTuple -from zipfile import ZipFile import pandas as pd @@ -35,70 +29,6 @@ logger = pudl.logging_helpers.get_logger(__name__) -######################################################################################## -# EPA CEMS constants for FTP ZIP files ##### -FTP_RENAME_DICT = { - "STATE": "state", - "FACILITY_NAME": "plant_name", # Not reading from CSV - "ORISPL_CODE": "plant_id_epa", # Not quite the same as plant_id_eia - "UNITID": "emissions_unit_id_epa", - # These op_date, op_hour, and op_time variables get converted to - # operating_date, operating_datetime and operating_time_interval in - # transform/epacems.py - "OP_DATE": "op_date", - "OP_HOUR": "op_hour", - "OP_TIME": "operating_time_hours", - "GLOAD (MW)": "gross_load_mw", - "GLOAD": "gross_load_mw", - "SLOAD (1000 lbs)": "steam_load_1000_lbs", - "SLOAD (1000lb/hr)": "steam_load_1000_lbs", - "SLOAD": "steam_load_1000_lbs", - "SO2_MASS (lbs)": "so2_mass_lbs", - "SO2_MASS": "so2_mass_lbs", - "SO2_MASS_MEASURE_FLG": "so2_mass_measurement_code", - "SO2_RATE (lbs/mmBtu)": "so2_rate_lbs_mmbtu", # Not reading from CSV - "SO2_RATE": "so2_rate_lbs_mmbtu", # Not reading from CSV - "SO2_RATE_MEASURE_FLG": "so2_rate_measure_flg", # Not reading from CSV - "NOX_RATE (lbs/mmBtu)": "nox_rate_lbs_mmbtu", - "NOX_RATE": "nox_rate_lbs_mmbtu", # Not reading from CSV - "NOX_RATE_MEASURE_FLG": "nox_rate_measurement_code", # Not reading from CSV - "NOX_MASS (lbs)": "nox_mass_lbs", - "NOX_MASS": "nox_mass_lbs", - "NOX_MASS_MEASURE_FLG": "nox_mass_measurement_code", - "CO2_MASS (tons)": "co2_mass_tons", - "CO2_MASS": "co2_mass_tons", - "CO2_MASS_MEASURE_FLG": "co2_mass_measurement_code", - "CO2_RATE (tons/mmBtu)": "co2_rate_tons_mmbtu", # Not reading from CSV - "CO2_RATE": "co2_rate_tons_mmbtu", # Not reading from CSV - "CO2_RATE_MEASURE_FLG": "co2_rate_measure_flg", # Not reading from CSV - "HEAT_INPUT (mmBtu)": "heat_content_mmbtu", - "HEAT_INPUT": "heat_content_mmbtu", - "FAC_ID": "facility_id", # unique facility id for internal EPA database management - "UNIT_ID": "unit_id_what", # unique unit id for internal EPA database management -} -"""Dict: A dictionary containing EPA CEMS column names (keys) and replacement names to -use when reading those columns into PUDL (values). - -There are some duplicate rename values because the column names change year to year. -""" - -# Any column that exactly matches one of these won't be read -FTP_IGNORE_COLS = { - "FACILITY_NAME", - "SO2_RATE (lbs/mmBtu)", - "SO2_RATE", - "SO2_RATE_MEASURE_FLG", - "CO2_RATE (tons/mmBtu)", - "CO2_RATE", - "CO2_RATE_MEASURE_FLG", - "NOX_RATE_MEASURE_FLG", - "NOX_RATE", - "NOX_RATE (lbs/mmBtu)", - "FAC_ID", - "UNIT_ID", -} -"""Set: The set of EPA CEMS columns to ignore when reading data.""" - ######################################################################################## # EPA CEMS constants for API CSV files ##### @@ -180,10 +110,6 @@ def get_filters(self): """Returns filters for retrieving given partition resource from Datastore.""" return dict(year=self.year, state=self.state.lower()) - def get_monthly_file(self, month: int) -> Path: - """Returns the filename (without suffix) that contains the monthly data.""" - return Path(f"{self.year}{self.state.lower()}{month:02}") - def get_annual_file(self) -> Path: """Return the name of the CSV file that holds annual hourly data.""" return Path(f"epacems-{self.year}-{self.state.lower()}.csv") @@ -218,28 +144,9 @@ def get_data_frame(self, partition: EpaCemsPartition) -> pd.DataFrame: df = self._csv_to_dataframe( csv_file, ignore_cols=API_IGNORE_COLS, rename_dict=API_RENAME_DICT ) - - # If archive has other zip files in it, these are monthly CSVs (archived before - # 08/23) which need to be concatenated. - elif len(files) > 1 and all([x.endswith(".zip") for x in files]): - dfs = [] - for month in range(1, 13): - mf = partition.get_monthly_file(month) - with archive.open(str(mf.with_suffix(".zip")), "r") as mzip: - with ZipFile(mzip, "r").open( - str(mf.with_suffix(".csv")), "r" - ) as csv_file: - dfs.append( - self._csv_to_dataframe( - csv_file, - ignore_cols=FTP_IGNORE_COLS, - rename_dict=FTP_RENAME_DICT, - ) - ) - df = pd.concat(dfs, sort=True, copy=False, ignore_index=True) + return df else: raise AssertionError(f"Unexpected archive format. Found files: {files}.") - return df def _csv_to_dataframe( self, csv_file: Path, ignore_cols: dict[str, str], rename_dict: dict[str, str] From 66b852f892b67865ea3fcb553ce513f8b597cbea Mon Sep 17 00:00:00 2001 From: thinky Date: Thu, 10 Aug 2023 10:50:56 -0400 Subject: [PATCH 16/51] First attempt at CEMS 2021-2022 integration, validation needed --- src/pudl/extract/epacems.py | 2 +- src/pudl/output/epacems.py | 2 +- src/pudl/package_data/settings/etl_fast.yml | 2 +- src/pudl/transform/epacems.py | 2 +- src/pudl/workspace/datastore.py | 5 ++--- 5 files changed, 6 insertions(+), 7 deletions(-) diff --git a/src/pudl/extract/epacems.py b/src/pudl/extract/epacems.py index c6c485d14e..a36a2b62a1 100644 --- a/src/pudl/extract/epacems.py +++ b/src/pudl/extract/epacems.py @@ -136,7 +136,7 @@ def get_data_frame(self, partition: EpaCemsPartition) -> pd.DataFrame: # Get names of files in zip file files = self.datastore.get_zipfile_file_names(archive) - logger.info(files) + # If archive has one csv file in it, this is a yearly CSV (archived after 08/23) # and this CSV does not need to be concatenated. if len(files) == 1 and files[0].endswith(".csv"): diff --git a/src/pudl/output/epacems.py b/src/pudl/output/epacems.py index 9f7f81fd31..1eb7ccf0b1 100644 --- a/src/pudl/output/epacems.py +++ b/src/pudl/output/epacems.py @@ -134,7 +134,7 @@ def epacems( if epacems_path is None: pudl_settings = pudl.workspace.setup.get_defaults() - epacems_path = Path(pudl_settings["pudl_out"]) / "epacems" + epacems_path = Path(pudl_settings["pudl_out"]) / "hourly_emissions_epacems" epacems = dd.read_parquet( epacems_path, diff --git a/src/pudl/package_data/settings/etl_fast.yml b/src/pudl/package_data/settings/etl_fast.yml index 84e8958bb4..a6e149db93 100644 --- a/src/pudl/package_data/settings/etl_fast.yml +++ b/src/pudl/package_data/settings/etl_fast.yml @@ -75,4 +75,4 @@ datasets: # so if you're loading CEMS data for a particular year, you should # also load the EIA 860 data for that year if possible states: [ID, ME] - years: [2020, 2021, 2022] + years: [2021, 2022] diff --git a/src/pudl/transform/epacems.py b/src/pudl/transform/epacems.py index e752583547..e1c0bd656f 100644 --- a/src/pudl/transform/epacems.py +++ b/src/pudl/transform/epacems.py @@ -96,7 +96,7 @@ def convert_to_utc(df: pd.DataFrame, plant_utc_offset: pd.DataFrame) -> pd.DataF # `parse_dates=True`, is >10x faster. # Read the date as a datetime, so all the dates are midnight op_datetime_naive=lambda x: pd.to_datetime( - x.op_date, format=r"%m-%d-%Y", exact=True, cache=True + x.op_date, format=r"%Y-%m-%d", exact=True, cache=True ) + pd.to_timedelta(x.op_hour, unit="h") # Add the hour ).merge( diff --git a/src/pudl/workspace/datastore.py b/src/pudl/workspace/datastore.py index 632618d8e6..9711c94827 100644 --- a/src/pudl/workspace/datastore.py +++ b/src/pudl/workspace/datastore.py @@ -174,7 +174,7 @@ class ZenodoFetcher: "eia923": "10.5072/zenodo.1217724", "eia_bulk_elec": "10.5072/zenodo.1103572", "epacamd_eia": "10.5072/zenodo.1199170", - "epacems": "10.5072/zenodo.672963", + "epacems": "10.5072/zenodo.1228519", "ferc1": "10.5072/zenodo.1070868", "ferc2": "10.5072/zenodo.1188447", "ferc6": "10.5072/zenodo.1098088", @@ -422,8 +422,7 @@ def get_zipfile_resources( def get_zipfile_file_names(self, zip_file: zipfile.ZipFile): """Given a zipfile, return a list of the file names in it.""" - if zipfile.Zipfile.is_zipfile(zip_file): - return zipfile.Zipfile.namelist(zip_file) + return zipfile.ZipFile.namelist(zip_file) class ParseKeyValues(argparse.Action): From 4a77ec3f2e14ec719a406466bb6986654be32dd4 Mon Sep 17 00:00:00 2001 From: thinky Date: Fri, 11 Aug 2023 08:32:46 -0400 Subject: [PATCH 17/51] Update doi and etl fast yml, fix unit test --- src/pudl/package_data/settings/etl_fast.yml | 6 +++--- src/pudl/workspace/datastore.py | 2 +- test/unit/workspace/datastore_test.py | 4 ++-- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/src/pudl/package_data/settings/etl_fast.yml b/src/pudl/package_data/settings/etl_fast.yml index 5f1250f71c..a31f85542f 100644 --- a/src/pudl/package_data/settings/etl_fast.yml +++ b/src/pudl/package_data/settings/etl_fast.yml @@ -29,8 +29,8 @@ ferc_to_sqlite_settings: name: pudl-fast title: PUDL Fast ETL description: > - FERC 1 and EIA 860/923 from 2020 (output to SQLite) plus - EPA CEMS hourly emissions data from 2020-2022 (output to Parquet). + FERC 1 data from 2020 and 2021, EIA 860/923 from 2020 and 2022 (output to SQLite) plus + EPA CEMS hourly emissions data from 2020 and 2022 (output to Parquet). version: 0.1.0 datasets: ferc1: @@ -75,4 +75,4 @@ datasets: # so if you're loading CEMS data for a particular year, you should # also load the EIA 860 data for that year if possible states: [ID, ME] - years: [2021, 2022] + years: [2020, 2022] diff --git a/src/pudl/workspace/datastore.py b/src/pudl/workspace/datastore.py index 9711c94827..30b819c0d4 100644 --- a/src/pudl/workspace/datastore.py +++ b/src/pudl/workspace/datastore.py @@ -189,7 +189,7 @@ class ZenodoFetcher: "eia923": "10.5281/zenodo.8172818", "eia_bulk_elec": "10.5281/zenodo.7067367", "epacamd_eia": "10.5281/zenodo.7900974", - "epacems": "10.5281/zenodo.6910058", + "epacems": "10.5281/zenodo.8235497", "ferc1": "10.5281/zenodo.7314437", "ferc2": "10.5281/zenodo.8006881", "ferc6": "10.5281/zenodo.7130141", diff --git a/test/unit/workspace/datastore_test.py b/test/unit/workspace/datastore_test.py index 38672b44de..846f474294 100644 --- a/test/unit/workspace/datastore_test.py +++ b/test/unit/workspace/datastore_test.py @@ -220,8 +220,8 @@ class TestZenodoFetcher(unittest.TestCase): }, ] } - PROD_EPACEMS_DOI = "10.5281/zenodo.6910058" - PROD_EPACEMS_ZEN_ID = 6910058 # This is the last numeric part of doi + PROD_EPACEMS_DOI = "10.5281/zenodo.8235497" + PROD_EPACEMS_ZEN_ID = 8235497 # This is the last numeric part of doi def setUp(self): """Constructs mockable Zenodo fetcher based on MOCK_EPACEMS_DATAPACKAGE.""" From 856247d5b9165818de210abc317aeccf862fd911 Mon Sep 17 00:00:00 2001 From: thinky Date: Fri, 11 Aug 2023 10:37:37 -0400 Subject: [PATCH 18/51] Fix integration test to reflect changed year data expectations --- test/integration/epacems_test.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/test/integration/epacems_test.py b/test/integration/epacems_test.py index cc88b2309a..1fb3ec533a 100644 --- a/test/integration/epacems_test.py +++ b/test/integration/epacems_test.py @@ -75,9 +75,9 @@ def test_epacems_parallel(pudl_engine, epacems_parquet_path): # monolithic outputs. df = dd.read_parquet( epacems_parquet_path, - filters=year_state_filter(years=[2019], states=["ME"]), + filters=year_state_filter(years=[2020], states=["ME"]), index=False, engine="pyarrow", split_row_groups=True, ).compute() - assert df.shape == (96_360, 16) # nosec: B101 + assert df.shape == (96_624, 16) # nosec: B101 From 8d5f3ac6fa784e133f2a43cba4ddc31e0b92fd24 Mon Sep 17 00:00:00 2001 From: thinky Date: Mon, 14 Aug 2023 14:27:02 -0400 Subject: [PATCH 19/51] Add handling for missing state-year and re-add AK + PR to processing --- src/pudl/etl/epacems_assets.py | 3 ++- src/pudl/extract/epacems.py | 11 ++++++++++- src/pudl/metadata/dfs.py | 4 ++-- 3 files changed, 14 insertions(+), 4 deletions(-) diff --git a/src/pudl/etl/epacems_assets.py b/src/pudl/etl/epacems_assets.py index 938a86397a..1e2b513165 100644 --- a/src/pudl/etl/epacems_assets.py +++ b/src/pudl/etl/epacems_assets.py @@ -81,7 +81,8 @@ def process_single_year( for state in epacems_settings.states: logger.info(f"Processing EPA CEMS hourly data for {year}-{state}") df = pudl.extract.epacems.extract(year=year, state=state, ds=ds) - df = pudl.transform.epacems.transform(df, epacamd_eia, plants_entity_eia) + if not df.empty: # If state-year combination has data + df = pudl.transform.epacems.transform(df, epacamd_eia, plants_entity_eia) table = pa.Table.from_pandas(df, schema=schema, preserve_index=False) # Write to a directory of partitioned parquet files diff --git a/src/pudl/extract/epacems.py b/src/pudl/extract/epacems.py index a36a2b62a1..aa2d9894e8 100644 --- a/src/pudl/extract/epacems.py +++ b/src/pudl/extract/epacems.py @@ -25,6 +25,7 @@ import pandas as pd import pudl.logging_helpers +from pudl.metadata.classes import Package from pudl.workspace.datastore import Datastore logger = pudl.logging_helpers.get_logger(__name__) @@ -180,4 +181,12 @@ def extract(year: int, state: str, ds: Datastore): ds = EpaCemsDatastore(ds) partition = EpaCemsPartition(state=state, year=year) # We have to assign the reporting year for partitioning purposes - return ds.get_data_frame(partition).assign(year=year) + try: + df = ds.get_data_frame(partition).assign(year=year) + except KeyError: # If no state-year combination found, return empty df. + logger.warning( + f"No data found for {state} in {year}. Returning empty dataframe." + ) + res = Package.from_resource_ids().get_resource("hourly_emissions_epacems") + df = res.format_df(pd.DataFrame()) + return df diff --git a/src/pudl/metadata/dfs.py b/src/pudl/metadata/dfs.py index f0d5f52686..cb58218971 100644 --- a/src/pudl/metadata/dfs.py +++ b/src/pudl/metadata/dfs.py @@ -300,7 +300,7 @@ """ subdivision_code,subdivision_name,country_code,country_name,subdivision_type,timezone_approx,state_id_fips,division_name_us_census,division_code_us_census,region_name_us_census,is_epacems_state AB,Alberta,CAN,Canada,province,America/Edmonton,,,,,0 -AK,Alaska,USA,United States of America,state,America/Anchorage,"02",Pacific Noncontiguous,PCN,West,0 +AK,Alaska,USA,United States of America,state,America/Anchorage,"02",Pacific Noncontiguous,PCN,West,1 AL,Alabama,USA,United States of America,state,America/Chicago,"01",East South Central,ESC,South,1 AR,Arkansas,USA,United States of America,state,America/Chicago,"05",West South Central,WSC,South,1 AS,American Samoa,USA,United States of America,outlying_area,Pacific/Pago_Pago,"60",,,,0 @@ -351,7 +351,7 @@ OR,Oregon,USA,United States of America,state,America/Los_Angeles,"41",Pacific Contiguous,PCC,West,1 PA,Pennsylvania,USA,United States of America,state,America/New_York,"42",Middle Atlantic,MAT,Northeast,1 PE,Prince Edwards Island,CAN,Canada,province,America/Halifax,,,,,0 -PR,Puerto Rico,USA,United States of America,outlying_area,America/Puerto_Rico,"72",,,,0 +PR,Puerto Rico,USA,United States of America,outlying_area,America/Puerto_Rico,"72",,,,1 QC,Quebec,CAN,Canada,province,America/Montreal,,,,,0 RI,Rhode Island,USA,United States of America,state,America/New_York,"44",New England,NEW,Northeast,1 SC,South Carolina,USA,United States of America,state,America/New_York,"45",South Atlantic,SAT,South,1 From cc6798b08bde6bfaa85b229ca46dbfaa328a9aa5 Mon Sep 17 00:00:00 2001 From: bendnorman Date: Mon, 14 Aug 2023 11:28:09 -0800 Subject: [PATCH 20/51] Apply new naming convention to raw and intermedaite assets excluding FERC 714 and Form 1 --- src/pudl/etl/__init__.py | 4 +- src/pudl/extract/eia860.py | 46 +++++++------- src/pudl/extract/eia861.py | 40 ++++++------ src/pudl/extract/eia923.py | 10 +-- src/pudl/transform/eia.py | 59 ++++++++++-------- src/pudl/transform/eia860.py | 116 +++++++++++++++++------------------ src/pudl/transform/eia861.py | 70 ++++++++++----------- src/pudl/transform/eia923.py | 52 ++++++++-------- 8 files changed, 203 insertions(+), 194 deletions(-) diff --git a/src/pudl/etl/__init__.py b/src/pudl/etl/__init__.py index b6d8924d59..939fc84769 100644 --- a/src/pudl/etl/__init__.py +++ b/src/pudl/etl/__init__.py @@ -29,11 +29,11 @@ *load_assets_from_modules([eia_bulk_elec_assets], group_name="eia_bulk_elec"), *load_assets_from_modules([epacems_assets], group_name="epacems"), *load_assets_from_modules([pudl.extract.eia860], group_name="raw_eia860"), - *load_assets_from_modules([pudl.transform.eia860], group_name="clean_eia860"), + *load_assets_from_modules([pudl.transform.eia860], group_name="_core_eia860"), *load_assets_from_modules([pudl.extract.eia861], group_name="raw_eia861"), *load_assets_from_modules([pudl.transform.eia861], group_name="clean_eia861"), *load_assets_from_modules([pudl.extract.eia923], group_name="raw_eia923"), - *load_assets_from_modules([pudl.transform.eia923], group_name="clean_eia923"), + *load_assets_from_modules([pudl.transform.eia923], group_name="_core_eia923"), *load_assets_from_modules([pudl.transform.eia], group_name="norm_eia"), *load_assets_from_modules([pudl.extract.ferc1], group_name="raw_ferc1"), *load_assets_from_modules([pudl.transform.ferc1], group_name="norm_ferc1"), diff --git a/src/pudl/extract/eia860.py b/src/pudl/extract/eia860.py index 4e3094e600..9f6dfb3c36 100644 --- a/src/pudl/extract/eia860.py +++ b/src/pudl/extract/eia860.py @@ -69,29 +69,29 @@ def get_dtypes(page, **partition): # TODO (bendnorman): Add this information to the metadata raw_table_names = ( - "raw_boiler_cooling_eia860", - "raw_boiler_generator_assn_eia860", - "raw_boiler_info_eia860", - "raw_boiler_mercury_eia860", - "raw_boiler_nox_eia860", - "raw_boiler_particulate_eia860", - "raw_boiler_so2_eia860", - "raw_boiler_stack_flue_eia860", - "raw_cooling_equipment_eia860", - "raw_emission_control_strategies_eia860", - "raw_emissions_control_equipment_eia860", - "raw_fgd_equipment_eia860", - "raw_fgp_equipment_eia860", - "raw_generator_eia860", - "raw_generator_existing_eia860", - "raw_generator_proposed_eia860", - "raw_generator_retired_eia860", - "raw_multifuel_existing_eia860", - "raw_multifuel_retired_eia860", - "raw_ownership_eia860", - "raw_plant_eia860", - "raw_stack_flue_equipment_eia860", - "raw_utility_eia860", + "raw_eia860__boiler_cooling", + "raw_eia860__boiler_generator_assn", + "raw_eia860__boiler_info", + "raw_eia860__boiler_mercury", + "raw_eia860__boiler_nox", + "raw_eia860__boiler_particulate", + "raw_eia860__boiler_so2", + "raw_eia860__boiler_stack_flue", + "raw_eia860__cooling_equipment", + "raw_eia860__emission_control_strategies", + "raw_eia860__emissions_control_equipment", + "raw_eia860__fgd_equipment", + "raw_eia860__fgp_equipment", + "raw_eia860__generator", + "raw_eia860__generator_existing", + "raw_eia860__generator_proposed", + "raw_eia860__generator_retired", + "raw_eia860__multifuel_existing", + "raw_eia860__multifuel_retired", + "raw_eia860__ownership", + "raw_eia860__plant", + "raw_eia860__stack_flue_equipment", + "raw_eia860__utility", ) diff --git a/src/pudl/extract/eia861.py b/src/pudl/extract/eia861.py index 369ce067d2..9f4ae49855 100644 --- a/src/pudl/extract/eia861.py +++ b/src/pudl/extract/eia861.py @@ -74,26 +74,26 @@ def get_dtypes(page, **partition): table_name: AssetOut() for table_name in sorted( ( # is there some way to programmatically generate this list? - "raw_advanced_metering_infrastructure_eia861", - "raw_balancing_authority_eia861", - "raw_delivery_companies_eia861", - "raw_demand_response_eia861", - "raw_demand_side_management_eia861", - "raw_distributed_generation_eia861", - "raw_distribution_systems_eia861", - "raw_dynamic_pricing_eia861", - "raw_energy_efficiency_eia861", - "raw_frame_eia861", - "raw_green_pricing_eia861", - "raw_mergers_eia861", - "raw_net_metering_eia861", - "raw_non_net_metering_eia861", - "raw_operational_data_eia861", - "raw_reliability_eia861", - "raw_sales_eia861", - "raw_service_territory_eia861", - "raw_short_form_eia861", - "raw_utility_data_eia861", + "raw_eia861__advanced_metering_infrastructure", + "raw_eia861__balancing_authority", + "raw_eia861__delivery_companies", + "raw_eia861__demand_response", + "raw_eia861__demand_side_management", + "raw_eia861__distributed_generation", + "raw_eia861__distribution_systems", + "raw_eia861__dynamic_pricing", + "raw_eia861__energy_efficiency", + "raw_eia861__frame", + "raw_eia861__green_pricing", + "raw_eia861__mergers", + "raw_eia861__net_metering", + "raw_eia861__non_net_metering", + "raw_eia861__operational_data", + "raw_eia861__reliability", + "raw_eia861__sales", + "raw_eia861__service_territory", + "raw_eia861__short_form", + "raw_eia861__utility_data", ) ) }, diff --git a/src/pudl/extract/eia923.py b/src/pudl/extract/eia923.py index 09610b048f..35dfbc9779 100644 --- a/src/pudl/extract/eia923.py +++ b/src/pudl/extract/eia923.py @@ -95,11 +95,11 @@ def get_dtypes(page, **partition): # TODO (bendnorman): Add this information to the metadata eia_raw_table_names = ( - "raw_boiler_fuel_eia923", - "raw_fuel_receipts_costs_eia923", - "raw_generation_fuel_eia923", - "raw_generator_eia923", - "raw_stocks_eia923", + "raw_eia923__boiler_fuel", + "raw_eia923__fuel_receipts_costs", + "raw_eia923__generation_fuel", + "raw_eia923__generator", + "raw_eia923__stocks", # There's an issue with the EIA-923 archive for 2018 which prevents this table # from being extracted currently. When we update to a new DOI this problem will # probably fix itself. See comments on this issue: diff --git a/src/pudl/transform/eia.py b/src/pudl/transform/eia.py index 0bd61e8202..a551db3339 100644 --- a/src/pudl/transform/eia.py +++ b/src/pudl/transform/eia.py @@ -588,10 +588,10 @@ def harvest_entity_tables( # noqa: C901 ins={ table_name: AssetIn() for table_name in [ - "clean_boiler_generator_assn_eia860", - "clean_generation_eia923", - "clean_generators_eia860", - "clean_boiler_fuel_eia923", + "_core_eia860__boiler_generator_assn", + "_core_eia923__generation", + "_core_eia860__generators", + "_core_eia923__boiler_fuel", ] }, config_schema={ @@ -658,7 +658,7 @@ def boiler_generator_assn_eia860(context, **clean_dfs) -> pd.DataFrame: # noqa: logger.debug(f"{clean_dfs.keys()=}") # grab the generation_eia923 table, group annually, generate a new tag - gen_eia923 = clean_dfs["clean_generation_eia923"] + gen_eia923 = clean_dfs["_core_eia923__generation"] gen_eia923 = ( gen_eia923.set_index(pd.DatetimeIndex(gen_eia923.report_date)) .groupby([pd.Grouper(freq="AS"), "plant_id_eia", "generator_id"]) @@ -670,7 +670,7 @@ def boiler_generator_assn_eia860(context, **clean_dfs) -> pd.DataFrame: # noqa: # compile all of the generators gens = pd.merge( gen_eia923, - clean_dfs["clean_generators_eia860"], + clean_dfs["_core_eia860__generators"], on=["plant_id_eia", "report_date", "generator_id"], how="outer", ) @@ -690,7 +690,7 @@ def boiler_generator_assn_eia860(context, **clean_dfs) -> pd.DataFrame: # noqa: # background bga_compiled_1 = pd.merge( gens, - clean_dfs["clean_boiler_generator_assn_eia860"], + clean_dfs["_core_eia860__boiler_generator_assn"], on=["plant_id_eia", "generator_id", "report_date"], how="outer", ) @@ -707,7 +707,7 @@ def boiler_generator_assn_eia860(context, **clean_dfs) -> pd.DataFrame: # noqa: # apear in gens9 or gens8 (must uncomment-out the og_tag creation above) # bga_compiled_1[bga_compiled_1['og_tag'].isnull()] - bf_eia923 = clean_dfs["clean_boiler_fuel_eia923"].assign( + bf_eia923 = clean_dfs["_core_eia923__boiler_fuel"].assign( total_heat_content_mmbtu=lambda x: x.fuel_consumed_units * x.fuel_mmbtu_per_unit ) bf_eia923 = ( @@ -1138,22 +1138,22 @@ def harvested_entity_asset_factory( ) -> AssetsDefinition: """Create an asset definition for the harvested entity tables.""" harvestable_assets = ( - "clean_boiler_fuel_eia923", - "clean_boiler_generator_assn_eia860", - "clean_boilers_eia860", - "clean_coalmine_eia923", - "clean_fuel_receipts_costs_eia923", - "clean_generation_eia923", - "clean_generation_fuel_eia923", - "clean_generation_fuel_nuclear_eia923", - "clean_generators_eia860", - "clean_ownership_eia860", - "clean_plants_eia860", - "clean_utilities_eia860", - "clean_emissions_control_equipment_eia860", - "clean_boiler_emissions_control_equipment_assn_eia860", - "clean_boiler_cooling_assn_eia860", - "clean_boiler_stack_flue_assn_eia860", + "_core_eia923__boiler_fuel", + "_core_eia860__boiler_generator_assn", + "_core_eia860__boilers", + "_core_eia923__coalmine", + "_core_eia923__fuel_receipts_costs", + "_core_eia923__generation", + "_core_eia923__generation_fuel", + "_core_eia923__generation_fuel_nuclear", + "_core_eia860__generators", + "_core_eia860__ownership", + "_core_eia860__plants", + "_core_eia860__utilities", + "_core_eia860__emissions_control_equipment", + "_core_eia860__boiler_emissions_control_equipment_assn", + "_core_eia860__boiler_cooling_assn", + "_core_eia860__boiler_stack_flue_assn", ) @multi_asset( @@ -1203,16 +1203,21 @@ def finished_eia_asset_factory( table_name: str, io_manager_key: str | None = None ) -> AssetsDefinition: """An asset factory for finished EIA tables.""" - clean_table_name = "clean_" + table_name + # TODO (bendnorman): Create a more graceful function for parsing table name + table_name_parts = table_name.split("_") + dataset = table_name_parts[-1] + table_name_no_dataset = "_".join(table_name_parts[:-1]) + + _core_table_name = f"_core_{dataset}__{table_name_no_dataset}" @asset( - ins={clean_table_name: AssetIn()}, + ins={_core_table_name: AssetIn()}, name=table_name, io_manager_key=io_manager_key, ) def finished_eia_asset(**kwargs) -> pd.DataFrame: """Enforce PUDL DB schema on a cleaned EIA dataframe.""" - df = convert_cols_dtypes(kwargs[clean_table_name], data_source="eia") + df = convert_cols_dtypes(kwargs[_core_table_name], data_source="eia") res = Package.from_resource_ids().get_resource(table_name) return res.enforce_schema(df) diff --git a/src/pudl/transform/eia860.py b/src/pudl/transform/eia860.py index ce58f65cae..782041f3a1 100644 --- a/src/pudl/transform/eia860.py +++ b/src/pudl/transform/eia860.py @@ -15,7 +15,7 @@ @asset -def clean_ownership_eia860(raw_ownership_eia860: pd.DataFrame) -> pd.DataFrame: +def _core_eia860__ownership(raw_eia860__ownership: pd.DataFrame) -> pd.DataFrame: """Pull and transform the ownership table. Transformations include: @@ -25,14 +25,14 @@ def clean_ownership_eia860(raw_ownership_eia860: pd.DataFrame) -> pd.DataFrame: reporting. Args: - raw_ownership_eia860: The raw ``ownership_eia860`` dataframe. + raw_eia860__ownership: The raw ``ownership_eia860`` dataframe. Returns: Cleaned ``ownership_eia860`` dataframe ready for harvesting. """ # Preiminary clean and get rid of unecessary 'year' column own_df = ( - raw_ownership_eia860.copy() + raw_eia860__ownership.copy() .pipe(pudl.helpers.fix_eia_na) .pipe(pudl.helpers.convert_to_date) .drop(columns=["year"]) @@ -179,11 +179,11 @@ def clean_ownership_eia860(raw_ownership_eia860: pd.DataFrame) -> pd.DataFrame: @asset -def clean_generators_eia860( - raw_generator_proposed_eia860: pd.DataFrame, - raw_generator_existing_eia860: pd.DataFrame, - raw_generator_retired_eia860: pd.DataFrame, - raw_generator_eia860: pd.DataFrame, +def _core_eia860__generators( + raw_eia860__generator_proposed: pd.DataFrame, + raw_eia860__generator_existing: pd.DataFrame, + raw_eia860__generator_retired: pd.DataFrame, + raw_eia860__generator: pd.DataFrame, ) -> pd.DataFrame: """Pull and transform the generators table. @@ -207,10 +207,10 @@ def clean_generators_eia860( clean, distinguishable categories. Args: - raw_generator_proposed_eia860: The raw ``raw_generator_proposed_eia860`` dataframe. - raw_generator_existing_eia860: The raw ``raw_generator_existing_eia860`` dataframe. - raw_generator_retired_eia860: The raw ``raw_generator_retired_eia860`` dataframe. - raw_generator_eia860: The raw ``raw_generator_eia860`` dataframe. + raw_eia860__generator_proposed: The raw ``raw_eia860__generator_proposed`` dataframe. + raw_eia860__generator_existing: The raw ``raw_eia860__generator_existing`` dataframe. + raw_eia860__generator_retired: The raw ``raw_eia860__generator_retired`` dataframe. + raw_eia860__generator: The raw ``raw_eia860__generator`` dataframe. Returns: Cleaned ``generators_eia860`` dataframe ready for harvesting. @@ -223,10 +223,10 @@ def clean_generators_eia860( # them all together into a single big table, with a column that indicates # which one of these tables the data came from, since they all have almost # exactly the same structure - gp_df = raw_generator_proposed_eia860 - ge_df = raw_generator_existing_eia860 - gr_df = raw_generator_retired_eia860 - g_df = raw_generator_eia860 + gp_df = raw_eia860__generator_proposed + ge_df = raw_eia860__generator_existing + gr_df = raw_eia860__generator_retired + g_df = raw_eia860__generator # the retired tab of eia860 does not have a operational_status_code column. # we still want these gens to have a code (and subsequently a # operational_status). We could do this by fillna w/ the retirement_date, but @@ -358,7 +358,7 @@ def clean_generators_eia860( @asset -def clean_plants_eia860(raw_plant_eia860: pd.DataFrame) -> pd.DataFrame: +def _core_eia860__plants(raw_eia860__plant: pd.DataFrame) -> pd.DataFrame: """Pull and transform the plants table. Much of the static plant information is reported repeatedly, and scattered across @@ -373,14 +373,14 @@ def clean_plants_eia860(raw_plant_eia860: pd.DataFrame) -> pd.DataFrame: * Convert Y/N/X values to boolean True/False. Args: - raw_plant_eia860: The raw ``raw_plant_eia860`` dataframe. + raw_eia860__plant: The raw ``raw_eia860__plant`` dataframe. Returns: Cleaned ``plants_eia860`` dataframe ready for harvesting. """ # Populating the 'plants_eia860' table p_df = ( - raw_plant_eia860.pipe(pudl.helpers.fix_eia_na) + raw_eia860__plant.pipe(pudl.helpers.fix_eia_na) .astype({"zip_code": str}) .drop("iso_rto", axis="columns") ) @@ -443,8 +443,8 @@ def clean_plants_eia860(raw_plant_eia860: pd.DataFrame) -> pd.DataFrame: @asset -def clean_boiler_generator_assn_eia860( - raw_boiler_generator_assn_eia860: pd.DataFrame, +def _core_eia860__boiler_generator_assn( + raw_eia860__boiler_generator_assn: pd.DataFrame, ) -> pd.DataFrame: """Pull and transform the boilder generator association table. @@ -454,7 +454,7 @@ def clean_boiler_generator_assn_eia860( * Drop duplicate rows. Args: - raw_boiler_generator_assn_eia860 (df): Each entry in this dictionary of DataFrame objects + raw_eia860__boiler_generator_assn (df): Each entry in this dictionary of DataFrame objects corresponds to a page from the EIA860 form, as reported in the Excel spreadsheets they distribute. @@ -462,7 +462,7 @@ def clean_boiler_generator_assn_eia860( Cleaned ``boiler_generator_assn_eia860`` dataframe ready for harvesting. """ # Populating the 'generators_eia860' table - b_g_df = raw_boiler_generator_assn_eia860 + b_g_df = raw_eia860__boiler_generator_assn b_g_df = pudl.helpers.convert_to_date(b_g_df) b_g_df = pudl.helpers.convert_cols_dtypes(df=b_g_df, data_source="eia") @@ -478,7 +478,7 @@ def clean_boiler_generator_assn_eia860( @asset -def clean_utilities_eia860(raw_utility_eia860: pd.DataFrame) -> pd.DataFrame: +def _core_eia860__utilities(raw_eia860__utility: pd.DataFrame) -> pd.DataFrame: """Pull and transform the utilities table. Transformations include: @@ -492,13 +492,13 @@ def clean_utilities_eia860(raw_utility_eia860: pd.DataFrame) -> pd.DataFrame: * Map full spelling onto code values. Args: - raw_utility_eia860: The raw ``raw_utility_eia860`` dataframe. + raw_eia860__utility: The raw ``raw_eia860__utility`` dataframe. Returns: Cleaned ``utilities_eia860`` dataframe ready for harvesting. """ # Populating the 'utilities_eia860' table - u_df = raw_utility_eia860 + u_df = raw_eia860__utility # Replace empty strings, whitespace, and '.' fields with real NA values u_df = pudl.helpers.fix_eia_na(u_df) @@ -564,8 +564,8 @@ def _make_phone_number(col1, col2, col3): @asset -def clean_boilers_eia860( - raw_emission_control_strategies_eia860, raw_boiler_info_eia860 +def _core_eia860__boilers( + raw_eia860__emission_control_strategies, raw_eia860__boiler_info ): """Pull and transform the boilers table. @@ -579,17 +579,17 @@ def clean_boilers_eia860( reporting. Args: - raw_emission_control_strategies_eia860 (pandas.DataFrame): + raw_eia860__emission_control_strategies (pandas.DataFrame): DataFrame extracted from EIA forms earlier in the ETL process. - raw_boiler_info_eia860 (pandas.DataFrame): + raw_eia860__boiler_info (pandas.DataFrame): DataFrame extracted from EIA forms earlier in the ETL process. Returns: pandas.DataFrame: the transformed boilers table """ # Populating the 'boilers_eia860' table - b_df = raw_boiler_info_eia860 - ecs = raw_emission_control_strategies_eia860 + b_df = raw_eia860__boiler_info + ecs = raw_eia860__emission_control_strategies # Combine and replace empty strings, whitespace, and '.' fields with real NA values @@ -765,12 +765,12 @@ def clean_boilers_eia860( @asset -def clean_emissions_control_equipment_eia860( - raw_emissions_control_equipment_eia860: pd.DataFrame, +def _core_eia860__emissions_control_equipment( + raw_eia860__emissions_control_equipment: pd.DataFrame, ) -> pd.DataFrame: """Pull and transform the emissions control equipment table.""" # Replace empty strings, whitespace, and '.' fields with real NA values - emce_df = pudl.helpers.fix_eia_na(raw_emissions_control_equipment_eia860) + emce_df = pudl.helpers.fix_eia_na(raw_eia860__emissions_control_equipment) # Spot fix bad months emce_df["operating_month"] = emce_df["operating_month"].replace({"88": "8"}) @@ -889,26 +889,26 @@ def clean_emissions_control_equipment_eia860( @asset -def clean_boiler_emissions_control_equipment_assn_eia860( - raw_boiler_so2_eia860: pd.DataFrame, - raw_boiler_mercury_eia860: pd.DataFrame, - raw_boiler_nox_eia860: pd.DataFrame, - raw_boiler_particulate_eia860: pd.DataFrame, +def _core_eia860__boiler_emissions_control_equipment_assn( + raw_eia860__boiler_so2: pd.DataFrame, + raw_eia860__boiler_mercury: pd.DataFrame, + raw_eia860__boiler_nox: pd.DataFrame, + raw_eia860__boiler_particulate: pd.DataFrame, ) -> pd.DataFrame: """Pull and transform the emissions control <> boiler ID link tables. Args: - raw_boiler_so2_eia860: Raw EIA 860 boiler to SO2 emission control equipment + raw_eia860__boiler_so2: Raw EIA 860 boiler to SO2 emission control equipment association table. - raw_boiler_mercury_eia860: Raw EIA 860 boiler to mercury emission control + raw_eia860__boiler_mercury: Raw EIA 860 boiler to mercury emission control equipment association table. - raw_boiler_nox_eia860: Raw EIA 860 boiler to nox emission control equipment + raw_eia860__boiler_nox: Raw EIA 860 boiler to nox emission control equipment association table. - raw_boiler_particulate_eia860: Raw EIA 860 boiler to particulate emission + raw_eia860__boiler_particulate: Raw EIA 860 boiler to particulate emission control equipment association table. - raw_boiler_cooling_eia860: Raw EIA 860 boiler to cooling equipment association + raw_eia860__boiler_cooling: Raw EIA 860 boiler to cooling equipment association table. - raw_boiler_stack_flue_eia860: Raw EIA 860 boiler to stack flue equipment + raw_eia860__boiler_stack_flue: Raw EIA 860 boiler to stack flue equipment association table. Returns: @@ -916,10 +916,10 @@ def clean_boiler_emissions_control_equipment_assn_eia860( tables. """ raw_tables = [ - raw_boiler_so2_eia860, - raw_boiler_mercury_eia860, - raw_boiler_nox_eia860, - raw_boiler_particulate_eia860, + raw_eia860__boiler_so2, + raw_eia860__boiler_mercury, + raw_eia860__boiler_nox, + raw_eia860__boiler_particulate, ] bece_df = pd.DataFrame({}) @@ -969,20 +969,20 @@ def clean_boiler_emissions_control_equipment_assn_eia860( @asset -def clean_boiler_cooling_assn_eia860( - raw_boiler_cooling_eia860: pd.DataFrame, +def _core_eia860__boiler_cooling_assn( + raw_eia860__boiler_cooling: pd.DataFrame, ) -> pd.DataFrame: """Pull and transform the EIA 860 boiler to cooler ID table. Args: - raw_boiler_cooling_eia860: Raw EIA 860 boiler to cooler ID association table. + raw_eia860__boiler_cooling: Raw EIA 860 boiler to cooler ID association table. Returns: pd.DataFrame: A cleaned and normalized version of the EIA boiler to cooler ID table. """ # Replace empty strings, whitespace, and '.' fields with real NA values - bc_assn = pudl.helpers.fix_eia_na(raw_boiler_cooling_eia860) + bc_assn = pudl.helpers.fix_eia_na(raw_eia860__boiler_cooling) # Replace the report year col with a report date col for the harvesting process bc_assn = pudl.helpers.convert_to_date( df=bc_assn, year_col="report_year", date_col="report_date" @@ -994,13 +994,13 @@ def clean_boiler_cooling_assn_eia860( @asset -def clean_boiler_stack_flue_assn_eia860( - raw_boiler_stack_flue_eia860: pd.DataFrame, +def _core_eia860__boiler_stack_flue_assn( + raw_eia860__boiler_stack_flue: pd.DataFrame, ) -> pd.DataFrame: """Pull and transform the EIA 860 boiler to stack flue ID table. Args: - raw_boiler_stack_flue_eia860: Raw EIA 860 boiler to stack flue ID association + raw_eia860__boiler_stack_flue: Raw EIA 860 boiler to stack flue ID association table. Returns: @@ -1008,7 +1008,7 @@ def clean_boiler_stack_flue_assn_eia860( ID table. """ # Replace empty strings, whitespace, and '.' fields with real NA values - bsf_assn = pudl.helpers.fix_eia_na(raw_boiler_stack_flue_eia860) + bsf_assn = pudl.helpers.fix_eia_na(raw_eia860__boiler_stack_flue) # Replace the report year col with a report date col for the harvesting process bsf_assn = pudl.helpers.convert_to_date( df=bsf_assn, year_col="report_year", date_col="report_date" diff --git a/src/pudl/transform/eia861.py b/src/pudl/transform/eia861.py index 495984638f..57cdc9464c 100644 --- a/src/pudl/transform/eia861.py +++ b/src/pudl/transform/eia861.py @@ -1030,7 +1030,7 @@ def _harvest_associations(dfs: list[pd.DataFrame], cols: list[str]) -> pd.DataFr ############################################################################### @asset(io_manager_key="pudl_sqlite_io_manager") def service_territory_eia861( - raw_service_territory_eia861: pd.DataFrame, + raw_eia861__service_territory: pd.DataFrame, ) -> pd.DataFrame: """Transform the EIA 861 utility service territory table. @@ -1040,12 +1040,12 @@ def service_territory_eia861( * Add field for state/county FIPS code. Args: - raw_service_territory_eia861: Raw EIA-861 utility service territory dataframe. + raw_eia861__service_territory: Raw EIA-861 utility service territory dataframe. Returns: The cleaned utility service territory dataframe. """ - df = _pre_process(raw_service_territory_eia861) + df = _pre_process(raw_eia861__service_territory) # A little WV county sandwiched between OH & PA, got miscategorized a few times: df.loc[(df.state == "OH") & (df.county == "Brooke"), "state"] = "WV" df = ( @@ -1084,7 +1084,7 @@ def service_territory_eia861( @asset def clean_balancing_authority_eia861( - raw_balancing_authority_eia861: pd.DataFrame, + raw_eia861__balancing_authority: pd.DataFrame, ) -> pd.DataFrame: """Transform the EIA 861 Balancing Authority table. @@ -1100,7 +1100,7 @@ def clean_balancing_authority_eia861( # * Backfill BA codes on a per BA ID basis # * Fix data entry errors df = ( - _pre_process(raw_balancing_authority_eia861) + _pre_process(raw_eia861__balancing_authority) .pipe(apply_pudl_dtypes, "eia") .set_index(["report_date", "balancing_authority_name_eia", "utility_id_eia"]) ) @@ -1142,7 +1142,7 @@ def clean_balancing_authority_eia861( @asset(io_manager_key="pudl_sqlite_io_manager") -def sales_eia861(raw_sales_eia861: pd.DataFrame) -> pd.DataFrame: +def sales_eia861(raw_eia861__sales: pd.DataFrame) -> pd.DataFrame: """Transform the EIA 861 Sales table. Transformations include: @@ -1164,7 +1164,7 @@ def sales_eia861(raw_sales_eia861: pd.DataFrame) -> pd.DataFrame: ] # Pre-tidy clean specific to sales table - raw_sales = _pre_process(raw_sales_eia861).query( + raw_sales = _pre_process(raw_eia861__sales).query( "utility_id_eia not in (88888, 99999)" ) @@ -1226,7 +1226,7 @@ def sales_eia861(raw_sales_eia861: pd.DataFrame) -> pd.DataFrame: @asset(io_manager_key="pudl_sqlite_io_manager") def advanced_metering_infrastructure_eia861( - raw_advanced_metering_infrastructure_eia861: pd.DataFrame, + raw_eia861__advanced_metering_infrastructure: pd.DataFrame, ) -> pd.DataFrame: """Transform the EIA 861 Advanced Metering Infrastructure table. @@ -1246,7 +1246,7 @@ def advanced_metering_infrastructure_eia861( ########################################################################### logger.info("Tidying the EIA 861 Advanced Metering Infrastructure table.") tidy_ami, idx_cols = _tidy_class_dfs( - _pre_process(raw_advanced_metering_infrastructure_eia861), + _pre_process(raw_eia861__advanced_metering_infrastructure), df_name="Advanced Metering Infrastructure", idx_cols=idx_cols, class_list=CUSTOMER_CLASSES, @@ -1276,7 +1276,7 @@ def advanced_metering_infrastructure_eia861( ), }, ) -def demand_response_eia861(raw_demand_response_eia861: pd.DataFrame): +def demand_response_eia861(raw_eia861__demand_response: pd.DataFrame): """Transform the EIA 861 Demand Response table. Transformations include: @@ -1294,7 +1294,7 @@ def demand_response_eia861(raw_demand_response_eia861: pd.DataFrame): "report_date", ] - raw_dr = _pre_process(raw_demand_response_eia861) + raw_dr = _pre_process(raw_eia861__demand_response) # fill na BA values with 'UNK' raw_dr["balancing_authority_code_eia"] = raw_dr[ "balancing_authority_code_eia" @@ -1365,7 +1365,7 @@ def demand_response_eia861(raw_demand_response_eia861: pd.DataFrame): }, ) def demand_side_management_eia861( - raw_demand_side_management_eia861: pd.DataFrame, + raw_eia861__demand_side_management: pd.DataFrame, ): """Transform the EIA 861 Demand Side Management table. @@ -1427,7 +1427,7 @@ def demand_side_management_eia861( # * Drop data_status and demand_side_management cols (they don't contain anything) ########################################################################### transformed_dsm1 = ( - clean_nerc(_pre_process(raw_demand_side_management_eia861), idx_cols) + clean_nerc(_pre_process(raw_eia861__demand_side_management), idx_cols) .drop(columns=["demand_side_management", "data_status"]) .query("utility_id_eia not in [88888]") ) @@ -1531,7 +1531,7 @@ def demand_side_management_eia861( }, ) def distributed_generation_eia861( - raw_distributed_generation_eia861: pd.DataFrame, + raw_eia861__distributed_generation: pd.DataFrame, ): """Transform the EIA 861 Distributed Generation table. @@ -1596,7 +1596,7 @@ def distributed_generation_eia861( ] # Pre-tidy transform: set estimated or actual A/E values to 'Acutal'/'Estimated' - raw_dg = _pre_process(raw_distributed_generation_eia861).assign( + raw_dg = _pre_process(raw_eia861__distributed_generation).assign( estimated_or_actual_capacity_data=lambda x: ( x.estimated_or_actual_capacity_data.map(ESTIMATED_OR_ACTUAL) ), @@ -1710,14 +1710,14 @@ def distributed_generation_eia861( @asset(io_manager_key="pudl_sqlite_io_manager") def distribution_systems_eia861( - raw_distribution_systems_eia861: pd.DataFrame, + raw_eia861__distribution_systems: pd.DataFrame, ) -> pd.DataFrame: """Transform the EIA 861 Distribution Systems table. * No additional transformations. """ df = ( - _pre_process(raw_distribution_systems_eia861) + _pre_process(raw_eia861__distribution_systems) .assign(short_form=lambda x: _make_yn_bool(x.short_form)) # No duplicates to speak of but take measures to check just in case .pipe( @@ -1731,7 +1731,7 @@ def distribution_systems_eia861( @asset(io_manager_key="pudl_sqlite_io_manager") -def dynamic_pricing_eia861(raw_dynamic_pricing_eia861: pd.DataFrame) -> pd.DataFrame: +def dynamic_pricing_eia861(raw_eia861__dynamic_pricing: pd.DataFrame) -> pd.DataFrame: """Transform the EIA 861 Dynamic Pricing table. Transformations include: @@ -1755,7 +1755,7 @@ def dynamic_pricing_eia861(raw_dynamic_pricing_eia861: pd.DataFrame) -> pd.DataF ] raw_dp = _pre_process( - raw_dynamic_pricing_eia861.query("utility_id_eia not in [88888]").assign( + raw_eia861__dynamic_pricing.query("utility_id_eia not in [88888]").assign( short_form=lambda x: _make_yn_bool(x.short_form) ) ) @@ -1794,7 +1794,7 @@ def dynamic_pricing_eia861(raw_dynamic_pricing_eia861: pd.DataFrame) -> pd.DataF @asset(io_manager_key="pudl_sqlite_io_manager") def energy_efficiency_eia861( - raw_energy_efficiency_eia861: pd.DataFrame, + raw_eia861__energy_efficiency: pd.DataFrame, ) -> pd.DataFrame: """Transform the EIA 861 Energy Efficiency table. @@ -1812,7 +1812,7 @@ def energy_efficiency_eia861( ] raw_ee = ( - _pre_process(raw_energy_efficiency_eia861).assign( + _pre_process(raw_eia861__energy_efficiency).assign( short_form=lambda x: _make_yn_bool(x.short_form) ) # No duplicates to speak of but take measures to check just in case @@ -1862,7 +1862,7 @@ def energy_efficiency_eia861( @asset(io_manager_key="pudl_sqlite_io_manager") def green_pricing_eia861( - raw_green_pricing_eia861: pd.DataFrame, + raw_eia861__green_pricing: pd.DataFrame, ) -> pd.DataFrame: """Transform the EIA 861 Green Pricing table. @@ -1882,7 +1882,7 @@ def green_pricing_eia861( ########################################################################### logger.info("Tidying the EIA 861 Green Pricing table.") tidy_gp, idx_cols = _tidy_class_dfs( - _pre_process(raw_green_pricing_eia861), + _pre_process(raw_eia861__green_pricing), df_name="Green Pricing", idx_cols=idx_cols, class_list=CUSTOMER_CLASSES, @@ -1905,11 +1905,11 @@ def green_pricing_eia861( @asset(io_manager_key="pudl_sqlite_io_manager") -def mergers_eia861(raw_mergers_eia861: pd.DataFrame) -> pd.DataFrame: +def mergers_eia861(raw_eia861__mergers: pd.DataFrame) -> pd.DataFrame: """Transform the EIA 861 Mergers table.""" # No duplicates to speak of but take measures to check just in case df = ( - _pre_process(raw_mergers_eia861) + _pre_process(raw_eia861__mergers) .pipe( _check_for_dupes, df_name="Mergers", @@ -1928,7 +1928,7 @@ def mergers_eia861(raw_mergers_eia861: pd.DataFrame) -> pd.DataFrame: "net_metering_misc_eia861": AssetOut(io_manager_key="pudl_sqlite_io_manager"), }, ) -def net_metering_eia861(raw_net_metering_eia861: pd.DataFrame): +def net_metering_eia861(raw_eia861__net_metering: pd.DataFrame): """Transform the EIA 861 Net Metering table. Transformations include: @@ -1948,7 +1948,7 @@ def net_metering_eia861(raw_net_metering_eia861: pd.DataFrame): # Pre-tidy clean specific to net_metering table raw_nm = ( - _pre_process(raw_net_metering_eia861) + _pre_process(raw_eia861__net_metering) .query("utility_id_eia not in [99999]") .assign(short_form=lambda x: _make_yn_bool(x.short_form)) ) @@ -2014,7 +2014,7 @@ def net_metering_eia861(raw_net_metering_eia861: pd.DataFrame): ), }, ) -def non_net_metering_eia861(raw_non_net_metering_eia861: pd.DataFrame): +def non_net_metering_eia861(raw_eia861__non_net_metering: pd.DataFrame): """Transform the EIA 861 Non-Net Metering table. Transformations include: @@ -2039,7 +2039,7 @@ def non_net_metering_eia861(raw_non_net_metering_eia861: pd.DataFrame): ] # Pre-tidy clean specific to non_net_metering table - raw_nnm = _pre_process(raw_non_net_metering_eia861).query( + raw_nnm = _pre_process(raw_eia861__non_net_metering).query( "utility_id_eia not in '99999'" ) @@ -2124,7 +2124,7 @@ def non_net_metering_eia861(raw_non_net_metering_eia861: pd.DataFrame): ), }, ) -def operational_data_eia861(raw_operational_data_eia861: pd.DataFrame): +def operational_data_eia861(raw_eia861__operational_data: pd.DataFrame): """Transform the EIA 861 Operational Data table. Transformations include: @@ -2144,7 +2144,7 @@ def operational_data_eia861(raw_operational_data_eia861: pd.DataFrame): ] # Pre-tidy clean specific to operational data table - raw_od = _pre_process(raw_operational_data_eia861) + raw_od = _pre_process(raw_eia861__operational_data) raw_od = raw_od[ (raw_od["utility_id_eia"] != 88888) & (raw_od["utility_id_eia"].notnull()) ] @@ -2207,7 +2207,7 @@ def operational_data_eia861(raw_operational_data_eia861: pd.DataFrame): @asset(io_manager_key="pudl_sqlite_io_manager") -def reliability_eia861(raw_reliability_eia861: pd.DataFrame) -> pd.DataFrame: +def reliability_eia861(raw_eia861__reliability: pd.DataFrame) -> pd.DataFrame: """Transform the EIA 861 Reliability table. Transformations include: @@ -2227,7 +2227,7 @@ def reliability_eia861(raw_reliability_eia861: pd.DataFrame) -> pd.DataFrame: # wide-to-tall by standards tidy_r, idx_cols = _tidy_class_dfs( - df=_pre_process(raw_reliability_eia861), + df=_pre_process(raw_eia861__reliability), df_name="Reliability", idx_cols=idx_cols, class_list=RELIABILITY_STANDARDS, @@ -2271,7 +2271,7 @@ def reliability_eia861(raw_reliability_eia861: pd.DataFrame) -> pd.DataFrame: "utility_data_misc_eia861": AssetOut(io_manager_key="pudl_sqlite_io_manager"), }, ) -def utility_data_eia861(raw_utility_data_eia861: pd.DataFrame): +def utility_data_eia861(raw_eia861__utility_data: pd.DataFrame): """Transform the EIA 861 Utility Data table. Transformations include: @@ -2285,7 +2285,7 @@ def utility_data_eia861(raw_utility_data_eia861: pd.DataFrame): idx_cols = ["utility_id_eia", "state", "report_date", "nerc_region"] # Pre-tidy clean specific to operational data table - raw_ud = _pre_process(raw_utility_data_eia861).query( + raw_ud = _pre_process(raw_eia861__utility_data).query( "utility_id_eia not in [88888]" ) diff --git a/src/pudl/transform/eia923.py b/src/pudl/transform/eia923.py index 106bfd39c9..35fa349782 100644 --- a/src/pudl/transform/eia923.py +++ b/src/pudl/transform/eia923.py @@ -592,11 +592,11 @@ def gen_fuel_nuclear(gen_fuel_nuke: pd.DataFrame) -> pd.DataFrame: @multi_asset( outs={ - "clean_generation_fuel_eia923": AssetOut(), - "clean_generation_fuel_nuclear_eia923": AssetOut(), + "_core_eia923__generation_fuel": AssetOut(), + "_core_eia923__generation_fuel_nuclear": AssetOut(), }, ) -def clean_generation_fuel_eia923(raw_generation_fuel_eia923: pd.DataFrame): +def _core_eia_923__generation_fuel_eia923(raw_eia923__generation_fuel: pd.DataFrame): """Transforms the generation_fuel_eia923 table. Transformations include: @@ -613,14 +613,14 @@ def clean_generation_fuel_eia923(raw_generation_fuel_eia923: pd.DataFrame): * Aggregate records with duplicate natural keys. Args: - raw_generation_fuel_eia923: The raw ``raw_generation_fuel_eia923`` dataframe. + raw_eia923__generation_fuel: The raw ``raw_eia923__generation_fuel`` dataframe. Returns: - clean_generation_fuel_eia923: Cleaned ``generation_fuel_eia923`` dataframe ready for harvesting. - clean_generation_fuel_nuclear_eia923: Cleaned ``generation_fuel_nuclear_eia923`` dataframe ready for harvesting. + _core_eia923__generation_fuel: Cleaned ``generation_fuel_eia923`` dataframe ready for harvesting. + _core_eia923__generation_fuel_nuclear: Cleaned ``generation_fuel_nuclear_eia923`` dataframe ready for harvesting. """ # This needs to be a copy of what we're passed in so we can edit it. - gen_fuel = raw_generation_fuel_eia923 + gen_fuel = raw_eia923__generation_fuel # Drop fields we're not inserting into the generation_fuel_eia923 table. cols_to_drop = [ @@ -709,8 +709,10 @@ def clean_generation_fuel_eia923(raw_generation_fuel_eia923: pd.DataFrame): gen_fuel = _aggregate_generation_fuel_duplicates(gen_fuel) return ( - Output(output_name="clean_generation_fuel_eia923", value=gen_fuel), - Output(output_name="clean_generation_fuel_nuclear_eia923", value=gen_fuel_nuke), + Output(output_name="_core_eia923__generation_fuel", value=gen_fuel), + Output( + output_name="_core_eia923__generation_fuel_nuclear", value=gen_fuel_nuke + ), ) @@ -813,7 +815,7 @@ def _aggregate_duplicate_boiler_fuel_keys(boiler_fuel_df: pd.DataFrame) -> pd.Da @asset -def clean_boiler_fuel_eia923(raw_boiler_fuel_eia923: pd.DataFrame) -> pd.DataFrame: +def _core_eia923__boiler_fuel(raw_eia923__boiler_fuel: pd.DataFrame) -> pd.DataFrame: """Transforms the boiler_fuel_eia923 table. Transformations include: @@ -826,12 +828,12 @@ def clean_boiler_fuel_eia923(raw_boiler_fuel_eia923: pd.DataFrame) -> pd.DataFra * Combine year and month columns into a single date column. Args: - raw_boiler_fuel_eia923: The raw ``raw_boiler_fuel_eia923`` dataframe. + raw_eia923__boiler_fuel: The raw ``raw_eia923__boiler_fuel`` dataframe. Returns: Cleaned ``boiler_fuel_eia923`` dataframe ready for harvesting. """ - bf_df = raw_boiler_fuel_eia923 + bf_df = raw_eia923__boiler_fuel # Need to stop dropping fields that contain harvestable entity attributes. # See https://github.com/catalyst-cooperative/pudl/issues/509 @@ -935,7 +937,7 @@ def remove_duplicate_pks_boiler_fuel_eia923(bf: pd.DataFrame) -> pd.DataFrame: @asset -def clean_generation_eia923(raw_generator_eia923: pd.DataFrame) -> pd.DataFrame: +def _core_eia923__generation(raw_eia923__generator: pd.DataFrame) -> pd.DataFrame: """Transforms the generation_eia923 table. Transformations include: @@ -946,13 +948,13 @@ def clean_generation_eia923(raw_generator_eia923: pd.DataFrame) -> pd.DataFrame: * Drop generator-date row duplicates (all have no data). Args: - raw_generator_eia923: The raw ``raw_generator_eia923`` dataframe. + raw_eia923__generator: The raw ``raw_eia923__generator`` dataframe. Returns: Cleaned ``generation_eia923`` dataframe ready for harvesting. """ gen_df = ( - raw_generator_eia923.dropna(subset=["generator_id"]) + raw_eia923__generator.dropna(subset=["generator_id"]) .drop( [ "combined_heat_power", @@ -1001,7 +1003,9 @@ def clean_generation_eia923(raw_generator_eia923: pd.DataFrame) -> pd.DataFrame: @asset -def clean_coalmine_eia923(raw_fuel_receipts_costs_eia923: pd.DataFrame) -> pd.DataFrame: +def _core_eia923__coalmine( + raw_eia923__fuel_receipts_costs: pd.DataFrame, +) -> pd.DataFrame: """Transforms the coalmine_eia923 table. Transformations include: @@ -1010,7 +1014,7 @@ def clean_coalmine_eia923(raw_fuel_receipts_costs_eia923: pd.DataFrame) -> pd.Da * Drop duplicates with MSHA ID. Args: - raw_fuel_receipts_costs_eia923: raw precursor to the + raw_eia923__fuel_receipts_costs: raw precursor to the :ref:`fuel_receipts_costs_eia923` table. Returns: @@ -1029,7 +1033,7 @@ def clean_coalmine_eia923(raw_fuel_receipts_costs_eia923: pd.DataFrame) -> pd.Da # Make a copy so we don't alter the FRC data frame... which we'll need # to use again for populating the FRC table (see below) - cmi_df = raw_fuel_receipts_costs_eia923 + cmi_df = raw_eia923__fuel_receipts_costs # Keep only the columns listed above: cmi_df = _coalmine_cleanup(cmi_df) @@ -1080,8 +1084,8 @@ def clean_coalmine_eia923(raw_fuel_receipts_costs_eia923: pd.DataFrame) -> pd.Da @asset -def clean_fuel_receipts_costs_eia923( - raw_fuel_receipts_costs_eia923: pd.DataFrame, clean_coalmine_eia923: pd.DataFrame +def _core_eia923__fuel_receipts_costs( + raw_eia923__fuel_receipts_costs: pd.DataFrame, _core_eia923__coalmine: pd.DataFrame ) -> pd.DataFrame: """Transforms the fuel_receipts_costs_eia923 dataframe. @@ -1096,13 +1100,13 @@ def clean_fuel_receipts_costs_eia923( Fuel cost is reported in cents per mmbtu. Converts cents to dollars. Args: - raw_fuel_receipts_costs_eia923: The raw ``raw_fuel_receipts_costs_eia923`` dataframe. - clean_coalmine_eia923: The cleaned pre-harvest ``coalmine_eia923`` dataframe. + raw_eia923__fuel_receipts_costs: The raw ``raw_eia923__fuel_receipts_costs`` dataframe. + _core_eia923__coalmine: The cleaned pre-harvest ``coalmine_eia923`` dataframe. Returns: Cleaned ``fuel_receipts_costs_eia923`` dataframe ready for harvesting. """ - frc_df = raw_fuel_receipts_costs_eia923 + frc_df = raw_eia923__fuel_receipts_costs # Drop fields we're not inserting into the fuel_receipts_costs_eia923 # table. @@ -1122,7 +1126,7 @@ def clean_fuel_receipts_costs_eia923( ] cmi_df = ( - clean_coalmine_eia923 + _core_eia923__coalmine # In order for the merge to work, we need to get the county_id_fips # field back into ready-to-dump form... so it matches the types of the # county_id_fips field that we are going to be merging on in the From 5207950f31d772bab397c5e38958cdb1c067100a Mon Sep 17 00:00:00 2001 From: thinky Date: Mon, 14 Aug 2023 18:29:58 -0400 Subject: [PATCH 21/51] Fix plant with missing timezone --- src/pudl/package_data/epacems/additional_epacems_plants.csv | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/pudl/package_data/epacems/additional_epacems_plants.csv b/src/pudl/package_data/epacems/additional_epacems_plants.csv index 96afc3def9..ee56e33ca2 100644 --- a/src/pudl/package_data/epacems/additional_epacems_plants.csv +++ b/src/pudl/package_data/epacems/additional_epacems_plants.csv @@ -126,6 +126,7 @@ plant_id_eia,plant_name_eia,last_date,state,latitude,longitude,fill_data_source_ 880107,SPMT Marcus Hook Industrial Complex,2017-12-31,PA,39.8076,-75.4239,EPA CAMD web query 880108,Grain Processing Corporation,2018-12-31,IN,38.6552,-87.1814,EPA CAMD web query 880109,"Pratt Paper (OH), LLC",2020-10-13,OH,40.5379994,-84.1909398,Not found in EPA CAMD Avg OH Lat -55098,Frontera Energy Center,2016-01-01,TX,26.208000,-98.399200,In CEMS in 2019 but missing from EIA since 2016 +55098,Frontera Energy Center,2016-01-01,TX,26.208,-98.3992,In CEMS in 2019 but missing from EIA since 2016 55120,SRW Cogen LP,2014-01-01,TX,30.054478,-93.757435,In CEMS in 2019 but missing from EIA since 2014 55248,Tait,2018-01-01,OH,39.727679,-84.209489,In CEMS in 2021 but missing from EIA since 2018 +880110,Holston Army Ammunition Plant,2022-09-28,TN,36.5493,-82.6342,EPA CAMD web query From 0bb15f690de610a2e09a5c2348c989e1b3a46efc Mon Sep 17 00:00:00 2001 From: bendnorman Date: Mon, 14 Aug 2023 22:31:40 -0800 Subject: [PATCH 22/51] Apply naming convention to raw eia861 and ferc714 assets --- src/pudl/extract/eia861.py | 3 ++- src/pudl/extract/ferc714.py | 30 +++++++++++++++--------------- src/pudl/transform/ferc714.py | 12 ++++++------ 3 files changed, 23 insertions(+), 22 deletions(-) diff --git a/src/pudl/extract/eia861.py b/src/pudl/extract/eia861.py index 9f4ae49855..ad6a08b017 100644 --- a/src/pudl/extract/eia861.py +++ b/src/pudl/extract/eia861.py @@ -113,7 +113,8 @@ def extract_eia861(context): eia861_raw_dfs = Extractor(ds).extract(year=eia_settings.eia861.years) eia861_raw_dfs = { - "raw_" + table_name: df for table_name, df in eia861_raw_dfs.items() + "raw_eia861__" + table_name.replace("_eia861", ""): df + for table_name, df in eia861_raw_dfs.items() } eia861_raw_dfs = dict(sorted(eia861_raw_dfs.items())) diff --git a/src/pudl/extract/ferc714.py b/src/pudl/extract/ferc714.py index f61c7df33d..9e6c8ac6d7 100644 --- a/src/pudl/extract/ferc714.py +++ b/src/pudl/extract/ferc714.py @@ -10,51 +10,51 @@ FERC714_FILES: OrderedDict[str, dict[str, str]] = OrderedDict( { - "id_certification_ferc714": { + "id_certification": { "name": "Part 1 Schedule 1 - Identification Certification.csv", "encoding": "iso-8859-1", }, - "gen_plants_ba_ferc714": { + "gen_plants_ba": { "name": "Part 2 Schedule 1 - Balancing Authority Generating Plants.csv", "encoding": "iso-8859-1", }, - "demand_monthly_ba_ferc714": { + "demand_monthly_ba": { "name": "Part 2 Schedule 2 - Balancing Authority Monthly Demand.csv", "encoding": "utf-8", }, - "net_energy_load_ba_ferc714": { + "net_energy_load_ba": { "name": "Part 2 Schedule 3 - Balancing Authority Net Energy for Load.csv", "encoding": "utf-8", }, - "adjacency_ba_ferc714": { + "adjacency_ba": { "name": "Part 2 Schedule 4 - Adjacent Balancing Authorities.csv", "encoding": "iso-8859-1", }, - "interchange_ba_ferc714": { + "interchange_ba": { "name": "Part 2 Schedule 5 - Balancing Authority Interchange.csv", "encoding": "iso-8859-1", }, - "lambda_hourly_ba_ferc714": { + "lambda_hourly_ba": { "name": "Part 2 Schedule 6 - Balancing Authority Hourly System Lambda.csv", "encoding": "utf-8", }, - "lambda_description_ferc714": { + "lambda_description": { "name": "Part 2 Schedule 6 - System Lambda Description.csv", "encoding": "iso-8859-1", }, - "description_pa_ferc714": { + "description_pa": { "name": "Part 3 Schedule 1 - Planning Area Description.csv", "encoding": "iso-8859-1", }, - "demand_forecast_pa_ferc714": { + "demand_forecast_pa": { "name": "Part 3 Schedule 2 - Planning Area Forecast Demand.csv", "encoding": "utf-8", }, - "demand_hourly_pa_ferc714": { + "demand_hourly_pa": { "name": "Part 3 Schedule 2 - Planning Area Hourly Demand.csv", "encoding": "utf-8", }, - "respondent_id_ferc714": { + "respondent_id": { "name": "Respondent IDs.csv", "encoding": "utf-8", }, @@ -64,7 +64,7 @@ @multi_asset( - outs={"raw_" + table_name: AssetOut() for table_name in FERC714_FILES}, + outs={"raw_ferc714__" + table_name: AssetOut() for table_name in FERC714_FILES}, required_resource_keys={"datastore", "dataset_settings"}, ) def extract_ferc714(context): @@ -91,12 +91,12 @@ def extract_ferc714(context): raw_dfs[table_name] = pd.read_csv( f, encoding=FERC714_FILES[table_name]["encoding"] ) - if table_name != "respondent_id_ferc714": + if table_name != "respondent_id": raw_dfs[table_name] = raw_dfs[table_name].query( "report_yr in @ferc714_settings.years" ) return ( - Output(output_name="raw_" + table_name, value=df) + Output(output_name="raw_ferc714__" + table_name, value=df) for table_name, df in raw_dfs.items() ) diff --git a/src/pudl/transform/ferc714.py b/src/pudl/transform/ferc714.py index 9fbba1e94b..6f0c68211c 100644 --- a/src/pudl/transform/ferc714.py +++ b/src/pudl/transform/ferc714.py @@ -369,7 +369,7 @@ def _standardize_offset_codes(df: pd.DataFrame, offset_fixes) -> pd.DataFrame: @asset(io_manager_key="pudl_sqlite_io_manager") -def respondent_id_ferc714(raw_respondent_id_ferc714: pd.DataFrame) -> pd.DataFrame: +def respondent_id_ferc714(raw_ferc714__respondent_id: pd.DataFrame) -> pd.DataFrame: """Transform the FERC 714 respondent IDs, names, and EIA utility IDs. Clean up FERC-714 respondent names and manually assign EIA utility IDs to a few FERC @@ -378,12 +378,12 @@ def respondent_id_ferc714(raw_respondent_id_ferc714: pd.DataFrame) -> pd.DataFra PacifiCorp). Args: - raw_respondent_id_ferc714: Raw table describing the FERC 714 Respondents. + raw_ferc714__respondent_id: Raw table describing the FERC 714 Respondents. Returns: A clean(er) version of the FERC-714 respondents table. """ - df = _pre_process(raw_respondent_id_ferc714, table_name="respondent_id_ferc714") + df = _pre_process(raw_ferc714__respondent_id, table_name="respondent_id_ferc714") df["respondent_name_ferc714"] = df.respondent_name_ferc714.str.strip() df.loc[df.eia_code == 0, "eia_code"] = pd.NA # There are a few utilities that seem mappable, but missing: @@ -394,7 +394,7 @@ def respondent_id_ferc714(raw_respondent_id_ferc714: pd.DataFrame) -> pd.DataFra @asset(io_manager_key="pudl_sqlite_io_manager") def demand_hourly_pa_ferc714( - raw_demand_hourly_pa_ferc714: pd.DataFrame, + raw_ferc714__demand_hourly_pa: pd.DataFrame, ) -> pd.DataFrame: """Transform the hourly demand time series by Planning Area. @@ -408,7 +408,7 @@ def demand_hourly_pa_ferc714( - Flip negative signs for reported demand. Args: - raw_demand_hourly_pa_ferc714: Raw table containing hourly demand time series by + raw_ferc714__demand_hourly_pa: Raw table containing hourly demand time series by Planning Area. Returns: @@ -416,7 +416,7 @@ def demand_hourly_pa_ferc714( """ logger.info("Converting dates into pandas Datetime types.") df = _pre_process( - raw_demand_hourly_pa_ferc714, table_name="demand_hourly_pa_ferc714" + raw_ferc714__demand_hourly_pa, table_name="demand_hourly_pa_ferc714" ) # Parse date strings From 77bab3b8ce8d731ae5df53988a3179ce841c81fb Mon Sep 17 00:00:00 2001 From: thinky Date: Tue, 15 Aug 2023 10:05:38 -0400 Subject: [PATCH 23/51] Add missing plants, test missing partitions --- .../epacems/additional_epacems_plants.csv | 1 + test/integration/epacems_test.py | 20 +++++++++++++++++++ 2 files changed, 21 insertions(+) diff --git a/src/pudl/package_data/epacems/additional_epacems_plants.csv b/src/pudl/package_data/epacems/additional_epacems_plants.csv index ee56e33ca2..d58803e292 100644 --- a/src/pudl/package_data/epacems/additional_epacems_plants.csv +++ b/src/pudl/package_data/epacems/additional_epacems_plants.csv @@ -130,3 +130,4 @@ plant_id_eia,plant_name_eia,last_date,state,latitude,longitude,fill_data_source_ 55120,SRW Cogen LP,2014-01-01,TX,30.054478,-93.757435,In CEMS in 2019 but missing from EIA since 2014 55248,Tait,2018-01-01,OH,39.727679,-84.209489,In CEMS in 2021 but missing from EIA since 2018 880110,Holston Army Ammunition Plant,2022-09-28,TN,36.5493,-82.6342,EPA CAMD web query +880102,"AES Puerto Rico, LP",2015-09-30,PR,17.9477,-66.154,EPA CAMD web query diff --git a/test/integration/epacems_test.py b/test/integration/epacems_test.py index 1fb3ec533a..789caebef8 100644 --- a/test/integration/epacems_test.py +++ b/test/integration/epacems_test.py @@ -3,7 +3,9 @@ import pytest from dagster import build_init_resource_context +from pudl.extract.epacems import extract from pudl.io_managers import epacems_io_manager +from pudl.metadata.classes import Package from pudl.output.epacems import epacems, year_state_filter @@ -46,6 +48,24 @@ def test_epacems_subset(epacems_year_and_state, epacems_parquet_path): assert actual.shape[0].compute() > 0 # nosec: B101 n rows +def test_epacems_missing_partition(caplog, pudl_datastore_fixture): + """Check that missing partitions return an empty data frame. + + Note that this should pass for both the Fast and Full ETL because the behavior + towards a missing file is identical.""" + df = extract(year=1996, state="UT", ds=pudl_datastore_fixture) + for record in caplog.records: + assert record.levelname == "WARNING" + assert ( + record.message == "No data found for UT in 1996. Returning empty dataframe." + ) + epacems_res = Package.from_resource_ids().get_resource("hourly_emissions_epacems") + expected_cols = list(epacems_res.get_field_names()) + assert df.shape[0] == 0 # Check that no rows of data are there + # Check that all columns expected of EPACEMS data are present. + assert sorted(df.columns) == sorted(expected_cols) + + def test_epacems_subset_input_validation(epacems_year_and_state, epacems_parquet_path): """Check if invalid inputs raise exceptions.""" if not epacems_year_and_state: From d0294bae52cb70a4f9620edda6c84d23d9e15a5c Mon Sep 17 00:00:00 2001 From: thinky Date: Tue, 15 Aug 2023 10:08:12 -0400 Subject: [PATCH 24/51] Update release notes --- docs/release_notes.rst | 2 ++ 1 file changed, 2 insertions(+) diff --git a/docs/release_notes.rst b/docs/release_notes.rst index bd6a802217..94e4b3a816 100644 --- a/docs/release_notes.rst +++ b/docs/release_notes.rst @@ -71,6 +71,8 @@ Data Coverage * Updated :doc:`data_sources/eia860` to include early release data from 2022. * Updated :doc:`data_sources/eia923` to include early release data from 2022. +* Updated :doc:`data_sources/epacems3` to switch from the old FTP server to the new + CAMPD API, and to include 2022 data. * New :ref:`epacamd_eia` crosswalk version v0.3, see issue :issue:`2317` and PR :pr:`2316`. EPA's updates add manual matches and exclusions focusing on operating units with a generator ID as of 2018. From a6d3d9dc910da9026350e2d679fc2a303db4ca53 Mon Sep 17 00:00:00 2001 From: thinky Date: Tue, 15 Aug 2023 11:50:35 -0400 Subject: [PATCH 25/51] Fix typo in docs --- docs/release_notes.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/release_notes.rst b/docs/release_notes.rst index 94e4b3a816..48207381f3 100644 --- a/docs/release_notes.rst +++ b/docs/release_notes.rst @@ -71,7 +71,7 @@ Data Coverage * Updated :doc:`data_sources/eia860` to include early release data from 2022. * Updated :doc:`data_sources/eia923` to include early release data from 2022. -* Updated :doc:`data_sources/epacems3` to switch from the old FTP server to the new +* Updated :doc:`data_sources/epacems` to switch from the old FTP server to the new CAMPD API, and to include 2022 data. * New :ref:`epacamd_eia` crosswalk version v0.3, see issue :issue:`2317` and PR :pr:`2316`. EPA's updates add manual matches and exclusions focusing on operating From bffd3e91186230c04ad8b89d40c4ead5f7f3f700 Mon Sep 17 00:00:00 2001 From: Jan Rous Date: Tue, 15 Aug 2023 08:52:17 -0700 Subject: [PATCH 26/51] Workaround to support non-existent path in PudlPaths. --- src/pudl/workspace/setup.py | 28 ++++++++++++++++++++++++++-- 1 file changed, 26 insertions(+), 2 deletions(-) diff --git a/src/pudl/workspace/setup.py b/src/pudl/workspace/setup.py index c6eeb655c7..b2a6c8a519 100644 --- a/src/pudl/workspace/setup.py +++ b/src/pudl/workspace/setup.py @@ -4,14 +4,38 @@ import pathlib import shutil from pathlib import Path +from typing import Any, Union from pydantic import BaseSettings, DirectoryPath +from pydantic.validators import path_validator import pudl.logging_helpers logger = pudl.logging_helpers.get_logger(__name__) +class MissingPath(Path): + """Validates potential path that doesn't exist.""" + + @classmethod + def __get_validators__(cls) -> Any: + """Validates that path doesn't exist and is path-like.""" + yield path_validator + yield cls.validate + + @classmethod + def validate(cls, value: Path) -> Path: + """Validates that path doesn't exist.""" + if value.exists(): + raise ValueError("path exists") + + return value + + +# TODO: The following could be replaced with NewPath from pydantic v2 +PotentialDirectoryPath = Union[DirectoryPath, MissingPath] + + class PudlPaths(BaseSettings): """These settings provide access to various PUDL directories. @@ -19,8 +43,8 @@ class PudlPaths(BaseSettings): variables. Other paths of relevance are derived from these. """ - pudl_input: DirectoryPath - pudl_output: DirectoryPath + pudl_input: PotentialDirectoryPath + pudl_output: PotentialDirectoryPath class Config: """Pydantic config, reads from .env file.""" From e3c407c920f8717b5619afbee83e4d1369f6786a Mon Sep 17 00:00:00 2001 From: bendnorman Date: Tue, 15 Aug 2023 09:40:22 -0800 Subject: [PATCH 27/51] Convert raw ferc1 assets and fix 860 and 923 naming issues --- src/pudl/extract/eia860.py | 2 +- src/pudl/extract/eia923.py | 4 ++-- src/pudl/extract/ferc1.py | 6 ++++-- src/pudl/io_managers.py | 7 ++++++- src/pudl/transform/ferc1.py | 30 ++++++++++++++++++------------ 5 files changed, 31 insertions(+), 18 deletions(-) diff --git a/src/pudl/extract/eia860.py b/src/pudl/extract/eia860.py index 9f6dfb3c36..18a37345a6 100644 --- a/src/pudl/extract/eia860.py +++ b/src/pudl/extract/eia860.py @@ -185,7 +185,7 @@ def extract_eia860(context, eia860_raw_dfs): # create descriptive table_names eia860_raw_dfs = { - "raw_" + table_name + "_eia860": df for table_name, df in eia860_raw_dfs.items() + "raw_eia860__" + table_name: df for table_name, df in eia860_raw_dfs.items() } eia860_raw_dfs = dict(sorted(eia860_raw_dfs.items())) diff --git a/src/pudl/extract/eia923.py b/src/pudl/extract/eia923.py index 35dfbc9779..695f55ec96 100644 --- a/src/pudl/extract/eia923.py +++ b/src/pudl/extract/eia923.py @@ -129,7 +129,7 @@ def extract_eia923(context): # create descriptive table_names eia923_raw_dfs = { - "raw_" + table_name + "_eia923": df for table_name, df in eia923_raw_dfs.items() + "raw_eia923__" + table_name: df for table_name, df in eia923_raw_dfs.items() } eia923_raw_dfs = dict(sorted(eia923_raw_dfs.items())) @@ -141,5 +141,5 @@ def extract_eia923(context): # from being extracted currently. When we update to a new DOI this problem will # probably fix itself. See comments on this issue: # https://github.com/catalyst-cooperative/pudl/issues/2448 - if table_name != "raw_emissions_control_eia923" + if table_name != "raw_eia923__emissions_control" ) diff --git a/src/pudl/extract/ferc1.py b/src/pudl/extract/ferc1.py index c2321f4c58..87c3e87f55 100644 --- a/src/pudl/extract/ferc1.py +++ b/src/pudl/extract/ferc1.py @@ -341,7 +341,8 @@ def create_raw_ferc1_assets() -> list[SourceAsset]: dbf_table_names = tuple(set(flattened_dbfs)) raw_ferc1_dbf_assets = [ SourceAsset( - key=AssetKey(table_name), io_manager_key="ferc1_dbf_sqlite_io_manager" + key=AssetKey(f"raw_ferc1_dbf__{table_name}"), + io_manager_key="ferc1_dbf_sqlite_io_manager", ) for table_name in dbf_table_names ] @@ -357,7 +358,8 @@ def create_raw_ferc1_assets() -> list[SourceAsset]: xbrl_table_names = tuple(set(xbrls_with_periods)) raw_ferc1_xbrl_assets = [ SourceAsset( - key=AssetKey(table_name), io_manager_key="ferc1_xbrl_sqlite_io_manager" + key=AssetKey(f"raw_ferc1_xbrl__{table_name}"), + io_manager_key="ferc1_xbrl_sqlite_io_manager", ) for table_name in xbrl_table_names ] diff --git a/src/pudl/io_managers.py b/src/pudl/io_managers.py index 85a5ed0224..ff954816db 100644 --- a/src/pudl/io_managers.py +++ b/src/pudl/io_managers.py @@ -177,7 +177,7 @@ def _get_sqlalchemy_table(self, table_name: str) -> sa.Table: sa_table = self.md.tables.get(table_name, None) if sa_table is None: raise ValueError( - f"{sa_table} not found in database metadata. Either add the table to " + f"{table_name} not found in database metadata. Either add the table to " "the metadata or use a different IO Manager." ) return sa_table @@ -658,6 +658,8 @@ def load_input(self, context: InputContext) -> pd.DataFrame: ferc1_settings = context.resources.dataset_settings.ferc1 table_name = self._get_table_name(context) + # Remove preceeding asset name metadata + table_name = table_name.replace("raw_ferc1_dbf__", "") # Check if the table_name exists in the self.md object _ = self._get_sqlalchemy_table(table_name) @@ -719,6 +721,9 @@ def load_input(self, context: InputContext) -> pd.DataFrame: ferc1_settings = context.resources.dataset_settings.ferc1 table_name = self._get_table_name(context) + # Remove preceeding asset name metadata + table_name = table_name.replace("raw_ferc1_xbrl__", "") + # TODO (bendnorman): Figure out a better to handle tables that # don't have duration and instant # Not every table contains both instant and duration diff --git a/src/pudl/transform/ferc1.py b/src/pudl/transform/ferc1.py index c3b186b307..ca1ab5b145 100644 --- a/src/pudl/transform/ferc1.py +++ b/src/pudl/transform/ferc1.py @@ -5261,9 +5261,15 @@ def ferc1_transform_asset_factory( dbf_tables = listify(TABLE_NAME_MAP_FERC1[table_name]["dbf"]) xbrl_tables = listify(TABLE_NAME_MAP_FERC1[table_name]["xbrl"]) - ins = {f"raw_dbf__{tn}": AssetIn(tn) for tn in dbf_tables} - ins |= {f"raw_xbrl_instant__{tn}": AssetIn(f"{tn}_instant") for tn in xbrl_tables} - ins |= {f"raw_xbrl_duration__{tn}": AssetIn(f"{tn}_duration") for tn in xbrl_tables} + ins = {f"raw_dbf__{tn}": AssetIn(f"raw_ferc1_dbf__{tn}") for tn in dbf_tables} + ins |= { + f"raw_xbrl_instant__{tn}": AssetIn(f"raw_ferc1_xbrl__{tn}_instant") + for tn in xbrl_tables + } + ins |= { + f"raw_xbrl_duration__{tn}": AssetIn(f"raw_ferc1_xbrl__{tn}_duration") + for tn in xbrl_tables + } ins["clean_xbrl_metadata_json"] = AssetIn("clean_xbrl_metadata_json") table_id = TableIdFerc1(table_name) @@ -5335,18 +5341,18 @@ def create_ferc1_transform_assets() -> list[AssetsDefinition]: @asset(io_manager_key="pudl_sqlite_io_manager") def plants_steam_ferc1( clean_xbrl_metadata_json: dict[str, dict[str, list[dict[str, Any]]]], - f1_steam: pd.DataFrame, - steam_electric_generating_plant_statistics_large_plants_402_duration: pd.DataFrame, - steam_electric_generating_plant_statistics_large_plants_402_instant: pd.DataFrame, + raw_ferc1_dbf__f1_steam: pd.DataFrame, + raw_ferc1_xbrl__steam_electric_generating_plant_statistics_large_plants_402_duration: pd.DataFrame, + raw_ferc1_xbrl__steam_electric_generating_plant_statistics_large_plants_402_instant: pd.DataFrame, fuel_ferc1: pd.DataFrame, ) -> pd.DataFrame: """Create the clean plants_steam_ferc1 table. Args: clean_xbrl_metadata_json: XBRL metadata json for all tables. - f1_steam: Raw f1_steam table. - steam_electric_generating_plant_statistics_large_plants_402_duration: raw XBRL duration table. - steam_electric_generating_plant_statistics_large_plants_402_instant: raw XBRL instant table. + raw_ferc1_dbf__f1_steam: Raw f1_steam table. + raw_ferc1_xbrl__steam_electric_generating_plant_statistics_large_plants_402_duration: raw XBRL duration table. + raw_ferc1_xbrl__steam_electric_generating_plant_statistics_large_plants_402_instant: raw XBRL instant table. fuel_ferc1: Transformed fuel_ferc1 table. Returns: @@ -5355,9 +5361,9 @@ def plants_steam_ferc1( df = PlantsSteamFerc1TableTransformer( xbrl_metadata_json=clean_xbrl_metadata_json["plants_steam_ferc1"] ).transform( - raw_dbf=f1_steam, - raw_xbrl_instant=steam_electric_generating_plant_statistics_large_plants_402_instant, - raw_xbrl_duration=steam_electric_generating_plant_statistics_large_plants_402_duration, + raw_dbf=raw_ferc1_dbf__f1_steam, + raw_xbrl_instant=raw_ferc1_xbrl__steam_electric_generating_plant_statistics_large_plants_402_instant, + raw_xbrl_duration=raw_ferc1_xbrl__steam_electric_generating_plant_statistics_large_plants_402_duration, transformed_fuel=fuel_ferc1, ) return convert_cols_dtypes(df, data_source="ferc1") From 18425e4d06bd74f45d7e0ae5aa4ee0d9467d1b43 Mon Sep 17 00:00:00 2001 From: Dazhong Xia Date: Thu, 17 Aug 2023 12:04:26 -0400 Subject: [PATCH 28/51] Add constraint naming convention so we can drop them later. --- src/pudl/metadata/classes.py | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/src/pudl/metadata/classes.py b/src/pudl/metadata/classes.py index eb222f3d90..a0c358fef3 100644 --- a/src/pudl/metadata/classes.py +++ b/src/pudl/metadata/classes.py @@ -735,7 +735,10 @@ def to_sql( # noqa: C901 return sa.Column( self.name, self.to_sql_dtype(), - *[sa.CheckConstraint(check) for check in checks], + *[ + sa.CheckConstraint(check, f"{self.name}_{i}") + for i, check in enumerate(checks) + ], nullable=not self.constraints.required, unique=self.constraints.unique, comment=self.description, @@ -1866,7 +1869,15 @@ def to_sql( check_values: bool = True, ) -> sa.MetaData: """Return equivalent SQL MetaData.""" - metadata = sa.MetaData() + metadata = sa.MetaData( + naming_convention={ + "ix": "ix_%(column_0_label)s", + "uq": "uq_%(table_name)s_%(column_0_name)s", + "ck": "ck_%(table_name)s_`%(constraint_name)s`", + "fk": "fk_%(table_name)s_%(column_0_name)s_%(referred_table_name)s", + "pk": "pk_%(table_name)s", + } + ) for resource in self.resources: if resource.create_database_schema: _ = resource.to_sql( From dc041f04260463956ba90caf4e7c06cb52726945 Mon Sep 17 00:00:00 2001 From: Dazhong Xia Date: Thu, 17 Aug 2023 12:05:52 -0400 Subject: [PATCH 29/51] Regenerate migrations so constraints are named --- ...558_regenerate_migrations_to_name_all_.py} | 1547 +++++++++++------ ...a6d_add_service_terrtory_ferc714_state_.py | 99 -- .../versions/28bb2b27e2cf_add_mcoe_table.py | 250 --- ..._dagsterize_net_gen_allocation_revision.py | 118 -- .../9a32db1fbe6e_rename_to_dollar_amount.py | 42 - migrations/versions/e2670d0ec0eb_.py | 24 - ...3b78_drop_amount_from_retained_earnings.py | 38 - 7 files changed, 967 insertions(+), 1151 deletions(-) rename migrations/versions/{3c458b36094e_start_over_with_new_pk_in_emissions_.py => 16948340e558_regenerate_migrations_to_name_all_.py} (79%) delete mode 100644 migrations/versions/1ec25c296a6d_add_service_terrtory_ferc714_state_.py delete mode 100644 migrations/versions/28bb2b27e2cf_add_mcoe_table.py delete mode 100644 migrations/versions/8c0a49eb9098_dagsterize_net_gen_allocation_revision.py delete mode 100644 migrations/versions/9a32db1fbe6e_rename_to_dollar_amount.py delete mode 100644 migrations/versions/e2670d0ec0eb_.py delete mode 100644 migrations/versions/e608f95a3b78_drop_amount_from_retained_earnings.py diff --git a/migrations/versions/3c458b36094e_start_over_with_new_pk_in_emissions_.py b/migrations/versions/16948340e558_regenerate_migrations_to_name_all_.py similarity index 79% rename from migrations/versions/3c458b36094e_start_over_with_new_pk_in_emissions_.py rename to migrations/versions/16948340e558_regenerate_migrations_to_name_all_.py index 01a3d2762a..1d2f5bb1a0 100644 --- a/migrations/versions/3c458b36094e_start_over_with_new_pk_in_emissions_.py +++ b/migrations/versions/16948340e558_regenerate_migrations_to_name_all_.py @@ -1,8 +1,8 @@ -"""Start over with new PK in emissions_control_equipment_types_eia +"""Regenerate migrations to name all unnamed constraints. -Revision ID: 3c458b36094e +Revision ID: 16948340e558 Revises: -Create Date: 2023-06-12 15:32:46.636042 +Create Date: 2023-08-17 12:05:15.020719 """ from alembic import op @@ -10,7 +10,7 @@ from sqlalchemy.dialects import sqlite # revision identifiers, used by Alembic. -revision = '3c458b36094e' +revision = '16948340e558' down_revision = None branch_labels = None depends_on = None @@ -22,75 +22,99 @@ def upgrade() -> None: sa.Column('code', sa.Text(), nullable=False, comment='Originally reported short code.'), sa.Column('label', sa.Text(), nullable=True, comment='Longer human-readable code using snake_case'), sa.Column('description', sa.Text(), nullable=True, comment='Long human-readable description of the meaning of a code/label.'), - sa.PrimaryKeyConstraint('code') + sa.PrimaryKeyConstraint('code', name=op.f('pk_averaging_periods_eia')) ) op.create_table('balancing_authorities_eia', sa.Column('code', sa.Text(), nullable=False, comment='Originally reported short code.'), sa.Column('label', sa.Text(), nullable=True, comment='Longer human-readable code using snake_case'), sa.Column('description', sa.Text(), nullable=True, comment='Long human-readable description of the meaning of a code/label.'), - sa.PrimaryKeyConstraint('code') + sa.PrimaryKeyConstraint('code', name=op.f('pk_balancing_authorities_eia')) ) op.create_table('balancing_authority_assn_eia861', sa.Column('report_date', sa.Date(), nullable=False, comment='Date reported.'), sa.Column('balancing_authority_id_eia', sa.Integer(), nullable=False, comment='EIA balancing authority ID. This is often (but not always!) the same as the utility ID associated with the same legal entity.'), sa.Column('utility_id_eia', sa.Integer(), nullable=False, comment='The EIA Utility Identification number.'), sa.Column('state', sa.Text(), nullable=False, comment='Two letter US state abbreviation.'), - sa.PrimaryKeyConstraint('report_date', 'balancing_authority_id_eia', 'utility_id_eia', 'state') + sa.PrimaryKeyConstraint('report_date', 'balancing_authority_id_eia', 'utility_id_eia', 'state', name=op.f('pk_balancing_authority_assn_eia861')) ) op.create_table('balancing_authority_eia861', sa.Column('report_date', sa.Date(), nullable=False, comment='Date reported.'), sa.Column('balancing_authority_id_eia', sa.Integer(), nullable=False, comment='EIA balancing authority ID. This is often (but not always!) the same as the utility ID associated with the same legal entity.'), sa.Column('balancing_authority_code_eia', sa.Text(), nullable=True, comment='EIA short code identifying a balancing authority.'), sa.Column('balancing_authority_name_eia', sa.Text(), nullable=True, comment='Name of the balancing authority.'), - sa.PrimaryKeyConstraint('report_date', 'balancing_authority_id_eia') + sa.PrimaryKeyConstraint('report_date', 'balancing_authority_id_eia', name=op.f('pk_balancing_authority_eia861')) ) op.create_table('boiler_generator_assn_types_eia', sa.Column('code', sa.Text(), nullable=False, comment='Originally reported short code.'), sa.Column('label', sa.Text(), nullable=True, comment='Longer human-readable code using snake_case'), sa.Column('description', sa.Text(), nullable=True, comment='Long human-readable description of the meaning of a code/label.'), - sa.PrimaryKeyConstraint('code') + sa.PrimaryKeyConstraint('code', name=op.f('pk_boiler_generator_assn_types_eia')) ) op.create_table('boiler_status_eia', sa.Column('code', sa.Text(), nullable=False, comment='Originally reported short code.'), sa.Column('label', sa.Text(), nullable=True, comment='Longer human-readable code using snake_case'), sa.Column('description', sa.Text(), nullable=True, comment='Long human-readable description of the meaning of a code/label.'), - sa.PrimaryKeyConstraint('code') + sa.PrimaryKeyConstraint('code', name=op.f('pk_boiler_status_eia')) ) op.create_table('boiler_types_eia', sa.Column('code', sa.Text(), nullable=False, comment='Originally reported short code.'), sa.Column('label', sa.Text(), nullable=True, comment='Longer human-readable code using snake_case'), sa.Column('description', sa.Text(), nullable=True, comment='Long human-readable description of the meaning of a code/label.'), - sa.PrimaryKeyConstraint('code') + sa.PrimaryKeyConstraint('code', name=op.f('pk_boiler_types_eia')) ) op.create_table('coalmine_types_eia', sa.Column('code', sa.Text(), nullable=False, comment='Originally reported short code.'), sa.Column('label', sa.Text(), nullable=True, comment='Longer human-readable code using snake_case'), sa.Column('description', sa.Text(), nullable=True, comment='Long human-readable description of the meaning of a code/label.'), - sa.PrimaryKeyConstraint('code') + sa.PrimaryKeyConstraint('code', name=op.f('pk_coalmine_types_eia')) + ) + op.create_table('compiled_geometry_balancing_authority_eia861', + sa.Column('county_id_fips', sa.Text(), nullable=False, comment='County ID from the Federal Information Processing Standard Publication 6-4.'), + sa.Column('county_name_census', sa.Text(), nullable=True, comment='County name as specified in Census DP1 Data.'), + sa.Column('population', sa.Float(), nullable=True, comment='County population, sourced from Census DP1 data.'), + sa.Column('area_km2', sa.Float(), nullable=True, comment='County area in km2.'), + sa.Column('report_date', sa.Date(), nullable=False, comment='Date reported.'), + sa.Column('balancing_authority_id_eia', sa.Integer(), nullable=False, comment='EIA balancing authority ID. This is often (but not always!) the same as the utility ID associated with the same legal entity.'), + sa.Column('state', sa.Text(), nullable=True, comment='Two letter US state abbreviation.'), + sa.Column('county', sa.Text(), nullable=False, comment='County name.'), + sa.Column('state_id_fips', sa.Text(), nullable=True, comment='Two digit state FIPS code.'), + sa.PrimaryKeyConstraint('balancing_authority_id_eia', 'report_date', 'county_id_fips', 'county', name=op.f('pk_compiled_geometry_balancing_authority_eia861')) + ) + op.create_table('compiled_geometry_utility_eia861', + sa.Column('county_id_fips', sa.Text(), nullable=False, comment='County ID from the Federal Information Processing Standard Publication 6-4.'), + sa.Column('county_name_census', sa.Text(), nullable=True, comment='County name as specified in Census DP1 Data.'), + sa.Column('population', sa.Float(), nullable=True, comment='County population, sourced from Census DP1 data.'), + sa.Column('area_km2', sa.Float(), nullable=True, comment='County area in km2.'), + sa.Column('report_date', sa.Date(), nullable=False, comment='Date reported.'), + sa.Column('utility_id_eia', sa.Integer(), nullable=False, comment='The EIA Utility Identification number.'), + sa.Column('state', sa.Text(), nullable=True, comment='Two letter US state abbreviation.'), + sa.Column('county', sa.Text(), nullable=True, comment='County name.'), + sa.Column('state_id_fips', sa.Text(), nullable=True, comment='Two digit state FIPS code.'), + sa.PrimaryKeyConstraint('utility_id_eia', 'report_date', 'county_id_fips', name=op.f('pk_compiled_geometry_utility_eia861')) ) op.create_table('contract_types_eia', sa.Column('code', sa.Text(), nullable=False, comment='Originally reported short code.'), sa.Column('label', sa.Text(), nullable=True, comment='Longer human-readable code using snake_case'), sa.Column('description', sa.Text(), nullable=True, comment='Long human-readable description of the meaning of a code/label.'), - sa.PrimaryKeyConstraint('code') + sa.PrimaryKeyConstraint('code', name=op.f('pk_contract_types_eia')) ) op.create_table('data_maturities', sa.Column('code', sa.Text(), nullable=False, comment='Originally reported short code.'), sa.Column('description', sa.Text(), nullable=True, comment='Long human-readable description of the meaning of a code/label.'), - sa.PrimaryKeyConstraint('code') + sa.PrimaryKeyConstraint('code', name=op.f('pk_data_maturities')) ) op.create_table('datasources', sa.Column('datasource', sa.Enum('censusdp1tract', 'eia176', 'eia860', 'eia860m', 'eia861', 'eia923', 'eia_bulk_elec', 'eiawater', 'epacems', 'epacamd_eia', 'ferc1', 'ferc2', 'ferc6', 'ferc60', 'ferc714', 'ferceqr', 'mshamines', 'phmsagas', 'pudl'), nullable=False, comment='Code identifying a dataset available within PUDL.'), sa.Column('partitions', sa.Text(), nullable=True, comment='The data parititions used to generate this instance of the database.'), sa.Column('doi', sa.Text(), nullable=True, comment='Unique digitial object identifier of Zenodo archive.'), sa.Column('pudl_version', sa.Text(), nullable=True, comment='The version of PUDL used to generate this database.'), - sa.PrimaryKeyConstraint('datasource') + sa.PrimaryKeyConstraint('datasource', name=op.f('pk_datasources')) ) op.create_table('emission_control_equipment_types_eia', sa.Column('code', sa.Text(), nullable=False, comment='Originally reported short code.'), sa.Column('label', sa.Text(), nullable=True, comment='Longer human-readable code using snake_case'), sa.Column('description', sa.Text(), nullable=True, comment='Long human-readable description of the meaning of a code/label.'), - sa.PrimaryKeyConstraint('code') + sa.PrimaryKeyConstraint('code', name=op.f('pk_emission_control_equipment_types_eia')) ) op.create_table('energy_sources_eia', sa.Column('code', sa.Text(), nullable=False, comment='Originally reported short code.'), @@ -103,13 +127,13 @@ def upgrade() -> None: sa.Column('fuel_phase', sa.Enum('gas', 'liquid', 'solid'), nullable=True, comment='Physical phase of matter of the fuel.'), sa.Column('fuel_type_code_pudl', sa.Enum('coal', 'gas', 'hydro', 'nuclear', 'oil', 'other', 'solar', 'waste', 'wind'), nullable=True, comment='Simplified fuel type code used in PUDL'), sa.Column('description', sa.Text(), nullable=True, comment='Long human-readable description of the meaning of a code/label.'), - sa.PrimaryKeyConstraint('code') + sa.PrimaryKeyConstraint('code', name=op.f('pk_energy_sources_eia')) ) op.create_table('environmental_equipment_manufacturers_eia', sa.Column('code', sa.Text(), nullable=False, comment='Originally reported short code.'), sa.Column('label', sa.Text(), nullable=True, comment='Longer human-readable code using snake_case'), sa.Column('description', sa.Text(), nullable=True, comment='Long human-readable description of the meaning of a code/label.'), - sa.PrimaryKeyConstraint('code') + sa.PrimaryKeyConstraint('code', name=op.f('pk_environmental_equipment_manufacturers_eia')) ) op.create_table('epacamd_eia_subplant_ids', sa.Column('plant_id_eia', sa.Integer(), nullable=True, comment='The unique six-digit facility identification number, also called an ORISPL, assigned by the Energy Information Administration.'), @@ -122,13 +146,13 @@ def upgrade() -> None: op.create_table('ferc_accounts', sa.Column('ferc_account_id', sa.Text(), nullable=False, comment="Account identifier from FERC's Uniform System of Accounts for Electric Plant. Includes higher level labeled categories."), sa.Column('ferc_account_description', sa.Text(), nullable=True), - sa.PrimaryKeyConstraint('ferc_account_id') + sa.PrimaryKeyConstraint('ferc_account_id', name=op.f('pk_ferc_accounts')) ) op.create_table('firing_types_eia', sa.Column('code', sa.Text(), nullable=False, comment='Originally reported short code.'), sa.Column('label', sa.Text(), nullable=True, comment='Longer human-readable code using snake_case'), sa.Column('description', sa.Text(), nullable=True, comment='Long human-readable description of the meaning of a code/label.'), - sa.PrimaryKeyConstraint('code') + sa.PrimaryKeyConstraint('code', name=op.f('pk_firing_types_eia')) ) op.create_table('fuel_receipts_costs_aggs_eia', sa.Column('fuel_agg', sa.Text(), nullable=False, comment='Category of fuel aggregation in EIA bulk electricity data.'), @@ -138,67 +162,67 @@ def upgrade() -> None: sa.Column('report_date', sa.Date(), nullable=False, comment='Date reported.'), sa.Column('fuel_received_mmbtu', sa.Float(), nullable=True, comment='Aggregated fuel receipts, in MMBtu, in EIA bulk electricity data.'), sa.Column('fuel_cost_per_mmbtu', sa.Float(), nullable=True, comment='Average fuel cost per mmBTU of heat content in nominal USD.'), - sa.PrimaryKeyConstraint('fuel_agg', 'geo_agg', 'sector_agg', 'temporal_agg', 'report_date') + sa.PrimaryKeyConstraint('fuel_agg', 'geo_agg', 'sector_agg', 'temporal_agg', 'report_date', name=op.f('pk_fuel_receipts_costs_aggs_eia')) ) op.create_table('fuel_transportation_modes_eia', sa.Column('code', sa.Text(), nullable=False, comment='Originally reported short code.'), sa.Column('label', sa.Text(), nullable=True, comment='Longer human-readable code using snake_case'), sa.Column('description', sa.Text(), nullable=True, comment='Long human-readable description of the meaning of a code/label.'), - sa.PrimaryKeyConstraint('code') + sa.PrimaryKeyConstraint('code', name=op.f('pk_fuel_transportation_modes_eia')) ) op.create_table('fuel_types_aer_eia', sa.Column('code', sa.Text(), nullable=False, comment='Originally reported short code.'), sa.Column('description', sa.Text(), nullable=True, comment='Long human-readable description of the meaning of a code/label.'), - sa.PrimaryKeyConstraint('code') + sa.PrimaryKeyConstraint('code', name=op.f('pk_fuel_types_aer_eia')) ) op.create_table('mercury_compliance_strategies_eia', sa.Column('code', sa.Text(), nullable=False, comment='Originally reported short code.'), sa.Column('label', sa.Text(), nullable=True, comment='Longer human-readable code using snake_case'), sa.Column('description', sa.Text(), nullable=True, comment='Long human-readable description of the meaning of a code/label.'), - sa.PrimaryKeyConstraint('code') + sa.PrimaryKeyConstraint('code', name=op.f('pk_mercury_compliance_strategies_eia')) ) op.create_table('momentary_interruptions_eia', sa.Column('code', sa.Text(), nullable=False, comment='Originally reported short code.'), sa.Column('label', sa.Text(), nullable=True, comment='Longer human-readable code using snake_case'), sa.Column('description', sa.Text(), nullable=True, comment='Long human-readable description of the meaning of a code/label.'), - sa.PrimaryKeyConstraint('code') + sa.PrimaryKeyConstraint('code', name=op.f('pk_momentary_interruptions_eia')) ) op.create_table('nox_compliance_strategies_eia', sa.Column('code', sa.Text(), nullable=False, comment='Originally reported short code.'), sa.Column('label', sa.Text(), nullable=True, comment='Longer human-readable code using snake_case'), sa.Column('description', sa.Text(), nullable=True, comment='Long human-readable description of the meaning of a code/label.'), - sa.PrimaryKeyConstraint('code') + sa.PrimaryKeyConstraint('code', name=op.f('pk_nox_compliance_strategies_eia')) ) op.create_table('nox_control_status_eia', sa.Column('code', sa.Text(), nullable=False, comment='Originally reported short code.'), sa.Column('label', sa.Text(), nullable=True, comment='Longer human-readable code using snake_case'), sa.Column('description', sa.Text(), nullable=True, comment='Long human-readable description of the meaning of a code/label.'), - sa.PrimaryKeyConstraint('code') + sa.PrimaryKeyConstraint('code', name=op.f('pk_nox_control_status_eia')) ) op.create_table('nox_units_eia', sa.Column('code', sa.Text(), nullable=False, comment='Originally reported short code.'), sa.Column('label', sa.Text(), nullable=True, comment='Longer human-readable code using snake_case'), sa.Column('description', sa.Text(), nullable=True, comment='Long human-readable description of the meaning of a code/label.'), - sa.PrimaryKeyConstraint('code') + sa.PrimaryKeyConstraint('code', name=op.f('pk_nox_units_eia')) ) op.create_table('operational_status_eia', sa.Column('code', sa.Text(), nullable=False, comment='Originally reported short code.'), sa.Column('label', sa.Text(), nullable=True, comment='Longer human-readable code using snake_case'), sa.Column('description', sa.Text(), nullable=True, comment='Long human-readable description of the meaning of a code/label.'), sa.Column('operational_status', sa.Text(), nullable=True, comment='The operating status of the asset. For generators this is based on which tab the generator was listed in in EIA 860.'), - sa.PrimaryKeyConstraint('code') + sa.PrimaryKeyConstraint('code', name=op.f('pk_operational_status_eia')) ) op.create_table('particulate_compliance_strategies_eia', sa.Column('code', sa.Text(), nullable=False, comment='Originally reported short code.'), sa.Column('label', sa.Text(), nullable=True, comment='Longer human-readable code using snake_case'), sa.Column('description', sa.Text(), nullable=True, comment='Long human-readable description of the meaning of a code/label.'), - sa.PrimaryKeyConstraint('code') + sa.PrimaryKeyConstraint('code', name=op.f('pk_particulate_compliance_strategies_eia')) ) op.create_table('particulate_units_eia', sa.Column('code', sa.Text(), nullable=False, comment='Originally reported short code.'), sa.Column('label', sa.Text(), nullable=True, comment='Longer human-readable code using snake_case'), sa.Column('description', sa.Text(), nullable=True, comment='Long human-readable description of the meaning of a code/label.'), - sa.PrimaryKeyConstraint('code') + sa.PrimaryKeyConstraint('code', name=op.f('pk_particulate_units_eia')) ) op.create_table('plants_entity_eia', sa.Column('plant_id_eia', sa.Integer(), nullable=False, comment='The unique six-digit facility identification number, also called an ORISPL, assigned by the Energy Information Administration.'), @@ -211,102 +235,109 @@ def upgrade() -> None: sa.Column('street_address', sa.Text(), nullable=True), sa.Column('zip_code', sa.Text(), nullable=True, comment='Five digit US Zip Code.'), sa.Column('timezone', sa.Enum('Africa/Abidjan', 'Africa/Accra', 'Africa/Addis_Ababa', 'Africa/Algiers', 'Africa/Asmara', 'Africa/Asmera', 'Africa/Bamako', 'Africa/Bangui', 'Africa/Banjul', 'Africa/Bissau', 'Africa/Blantyre', 'Africa/Brazzaville', 'Africa/Bujumbura', 'Africa/Cairo', 'Africa/Casablanca', 'Africa/Ceuta', 'Africa/Conakry', 'Africa/Dakar', 'Africa/Dar_es_Salaam', 'Africa/Djibouti', 'Africa/Douala', 'Africa/El_Aaiun', 'Africa/Freetown', 'Africa/Gaborone', 'Africa/Harare', 'Africa/Johannesburg', 'Africa/Juba', 'Africa/Kampala', 'Africa/Khartoum', 'Africa/Kigali', 'Africa/Kinshasa', 'Africa/Lagos', 'Africa/Libreville', 'Africa/Lome', 'Africa/Luanda', 'Africa/Lubumbashi', 'Africa/Lusaka', 'Africa/Malabo', 'Africa/Maputo', 'Africa/Maseru', 'Africa/Mbabane', 'Africa/Mogadishu', 'Africa/Monrovia', 'Africa/Nairobi', 'Africa/Ndjamena', 'Africa/Niamey', 'Africa/Nouakchott', 'Africa/Ouagadougou', 'Africa/Porto-Novo', 'Africa/Sao_Tome', 'Africa/Timbuktu', 'Africa/Tripoli', 'Africa/Tunis', 'Africa/Windhoek', 'America/Adak', 'America/Anchorage', 'America/Anguilla', 'America/Antigua', 'America/Araguaina', 'America/Argentina/Buenos_Aires', 'America/Argentina/Catamarca', 'America/Argentina/ComodRivadavia', 'America/Argentina/Cordoba', 'America/Argentina/Jujuy', 'America/Argentina/La_Rioja', 'America/Argentina/Mendoza', 'America/Argentina/Rio_Gallegos', 'America/Argentina/Salta', 'America/Argentina/San_Juan', 'America/Argentina/San_Luis', 'America/Argentina/Tucuman', 'America/Argentina/Ushuaia', 'America/Aruba', 'America/Asuncion', 'America/Atikokan', 'America/Atka', 'America/Bahia', 'America/Bahia_Banderas', 'America/Barbados', 'America/Belem', 'America/Belize', 'America/Blanc-Sablon', 'America/Boa_Vista', 'America/Bogota', 'America/Boise', 'America/Buenos_Aires', 'America/Cambridge_Bay', 'America/Campo_Grande', 'America/Cancun', 'America/Caracas', 'America/Catamarca', 'America/Cayenne', 'America/Cayman', 'America/Chicago', 'America/Chihuahua', 'America/Ciudad_Juarez', 'America/Coral_Harbour', 'America/Cordoba', 'America/Costa_Rica', 'America/Creston', 'America/Cuiaba', 'America/Curacao', 'America/Danmarkshavn', 'America/Dawson', 'America/Dawson_Creek', 'America/Denver', 'America/Detroit', 'America/Dominica', 'America/Edmonton', 'America/Eirunepe', 'America/El_Salvador', 'America/Ensenada', 'America/Fort_Nelson', 'America/Fort_Wayne', 'America/Fortaleza', 'America/Glace_Bay', 'America/Godthab', 'America/Goose_Bay', 'America/Grand_Turk', 'America/Grenada', 'America/Guadeloupe', 'America/Guatemala', 'America/Guayaquil', 'America/Guyana', 'America/Halifax', 'America/Havana', 'America/Hermosillo', 'America/Indiana/Indianapolis', 'America/Indiana/Knox', 'America/Indiana/Marengo', 'America/Indiana/Petersburg', 'America/Indiana/Tell_City', 'America/Indiana/Vevay', 'America/Indiana/Vincennes', 'America/Indiana/Winamac', 'America/Indianapolis', 'America/Inuvik', 'America/Iqaluit', 'America/Jamaica', 'America/Jujuy', 'America/Juneau', 'America/Kentucky/Louisville', 'America/Kentucky/Monticello', 'America/Knox_IN', 'America/Kralendijk', 'America/La_Paz', 'America/Lima', 'America/Los_Angeles', 'America/Louisville', 'America/Lower_Princes', 'America/Maceio', 'America/Managua', 'America/Manaus', 'America/Marigot', 'America/Martinique', 'America/Matamoros', 'America/Mazatlan', 'America/Mendoza', 'America/Menominee', 'America/Merida', 'America/Metlakatla', 'America/Mexico_City', 'America/Miquelon', 'America/Moncton', 'America/Monterrey', 'America/Montevideo', 'America/Montreal', 'America/Montserrat', 'America/Nassau', 'America/New_York', 'America/Nipigon', 'America/Nome', 'America/Noronha', 'America/North_Dakota/Beulah', 'America/North_Dakota/Center', 'America/North_Dakota/New_Salem', 'America/Nuuk', 'America/Ojinaga', 'America/Panama', 'America/Pangnirtung', 'America/Paramaribo', 'America/Phoenix', 'America/Port-au-Prince', 'America/Port_of_Spain', 'America/Porto_Acre', 'America/Porto_Velho', 'America/Puerto_Rico', 'America/Punta_Arenas', 'America/Rainy_River', 'America/Rankin_Inlet', 'America/Recife', 'America/Regina', 'America/Resolute', 'America/Rio_Branco', 'America/Rosario', 'America/Santa_Isabel', 'America/Santarem', 'America/Santiago', 'America/Santo_Domingo', 'America/Sao_Paulo', 'America/Scoresbysund', 'America/Shiprock', 'America/Sitka', 'America/St_Barthelemy', 'America/St_Johns', 'America/St_Kitts', 'America/St_Lucia', 'America/St_Thomas', 'America/St_Vincent', 'America/Swift_Current', 'America/Tegucigalpa', 'America/Thule', 'America/Thunder_Bay', 'America/Tijuana', 'America/Toronto', 'America/Tortola', 'America/Vancouver', 'America/Virgin', 'America/Whitehorse', 'America/Winnipeg', 'America/Yakutat', 'America/Yellowknife', 'Antarctica/Casey', 'Antarctica/Davis', 'Antarctica/DumontDUrville', 'Antarctica/Macquarie', 'Antarctica/Mawson', 'Antarctica/McMurdo', 'Antarctica/Palmer', 'Antarctica/Rothera', 'Antarctica/South_Pole', 'Antarctica/Syowa', 'Antarctica/Troll', 'Antarctica/Vostok', 'Arctic/Longyearbyen', 'Asia/Aden', 'Asia/Almaty', 'Asia/Amman', 'Asia/Anadyr', 'Asia/Aqtau', 'Asia/Aqtobe', 'Asia/Ashgabat', 'Asia/Ashkhabad', 'Asia/Atyrau', 'Asia/Baghdad', 'Asia/Bahrain', 'Asia/Baku', 'Asia/Bangkok', 'Asia/Barnaul', 'Asia/Beirut', 'Asia/Bishkek', 'Asia/Brunei', 'Asia/Calcutta', 'Asia/Chita', 'Asia/Choibalsan', 'Asia/Chongqing', 'Asia/Chungking', 'Asia/Colombo', 'Asia/Dacca', 'Asia/Damascus', 'Asia/Dhaka', 'Asia/Dili', 'Asia/Dubai', 'Asia/Dushanbe', 'Asia/Famagusta', 'Asia/Gaza', 'Asia/Harbin', 'Asia/Hebron', 'Asia/Ho_Chi_Minh', 'Asia/Hong_Kong', 'Asia/Hovd', 'Asia/Irkutsk', 'Asia/Istanbul', 'Asia/Jakarta', 'Asia/Jayapura', 'Asia/Jerusalem', 'Asia/Kabul', 'Asia/Kamchatka', 'Asia/Karachi', 'Asia/Kashgar', 'Asia/Kathmandu', 'Asia/Katmandu', 'Asia/Khandyga', 'Asia/Kolkata', 'Asia/Krasnoyarsk', 'Asia/Kuala_Lumpur', 'Asia/Kuching', 'Asia/Kuwait', 'Asia/Macao', 'Asia/Macau', 'Asia/Magadan', 'Asia/Makassar', 'Asia/Manila', 'Asia/Muscat', 'Asia/Nicosia', 'Asia/Novokuznetsk', 'Asia/Novosibirsk', 'Asia/Omsk', 'Asia/Oral', 'Asia/Phnom_Penh', 'Asia/Pontianak', 'Asia/Pyongyang', 'Asia/Qatar', 'Asia/Qostanay', 'Asia/Qyzylorda', 'Asia/Rangoon', 'Asia/Riyadh', 'Asia/Saigon', 'Asia/Sakhalin', 'Asia/Samarkand', 'Asia/Seoul', 'Asia/Shanghai', 'Asia/Singapore', 'Asia/Srednekolymsk', 'Asia/Taipei', 'Asia/Tashkent', 'Asia/Tbilisi', 'Asia/Tehran', 'Asia/Tel_Aviv', 'Asia/Thimbu', 'Asia/Thimphu', 'Asia/Tokyo', 'Asia/Tomsk', 'Asia/Ujung_Pandang', 'Asia/Ulaanbaatar', 'Asia/Ulan_Bator', 'Asia/Urumqi', 'Asia/Ust-Nera', 'Asia/Vientiane', 'Asia/Vladivostok', 'Asia/Yakutsk', 'Asia/Yangon', 'Asia/Yekaterinburg', 'Asia/Yerevan', 'Atlantic/Azores', 'Atlantic/Bermuda', 'Atlantic/Canary', 'Atlantic/Cape_Verde', 'Atlantic/Faeroe', 'Atlantic/Faroe', 'Atlantic/Jan_Mayen', 'Atlantic/Madeira', 'Atlantic/Reykjavik', 'Atlantic/South_Georgia', 'Atlantic/St_Helena', 'Atlantic/Stanley', 'Australia/ACT', 'Australia/Adelaide', 'Australia/Brisbane', 'Australia/Broken_Hill', 'Australia/Canberra', 'Australia/Currie', 'Australia/Darwin', 'Australia/Eucla', 'Australia/Hobart', 'Australia/LHI', 'Australia/Lindeman', 'Australia/Lord_Howe', 'Australia/Melbourne', 'Australia/NSW', 'Australia/North', 'Australia/Perth', 'Australia/Queensland', 'Australia/South', 'Australia/Sydney', 'Australia/Tasmania', 'Australia/Victoria', 'Australia/West', 'Australia/Yancowinna', 'Brazil/Acre', 'Brazil/DeNoronha', 'Brazil/East', 'Brazil/West', 'CET', 'CST6CDT', 'Canada/Atlantic', 'Canada/Central', 'Canada/Eastern', 'Canada/Mountain', 'Canada/Newfoundland', 'Canada/Pacific', 'Canada/Saskatchewan', 'Canada/Yukon', 'Chile/Continental', 'Chile/EasterIsland', 'Cuba', 'EET', 'EST', 'EST5EDT', 'Egypt', 'Eire', 'Etc/GMT', 'Etc/GMT+0', 'Etc/GMT+1', 'Etc/GMT+10', 'Etc/GMT+11', 'Etc/GMT+12', 'Etc/GMT+2', 'Etc/GMT+3', 'Etc/GMT+4', 'Etc/GMT+5', 'Etc/GMT+6', 'Etc/GMT+7', 'Etc/GMT+8', 'Etc/GMT+9', 'Etc/GMT-0', 'Etc/GMT-1', 'Etc/GMT-10', 'Etc/GMT-11', 'Etc/GMT-12', 'Etc/GMT-13', 'Etc/GMT-14', 'Etc/GMT-2', 'Etc/GMT-3', 'Etc/GMT-4', 'Etc/GMT-5', 'Etc/GMT-6', 'Etc/GMT-7', 'Etc/GMT-8', 'Etc/GMT-9', 'Etc/GMT0', 'Etc/Greenwich', 'Etc/UCT', 'Etc/UTC', 'Etc/Universal', 'Etc/Zulu', 'Europe/Amsterdam', 'Europe/Andorra', 'Europe/Astrakhan', 'Europe/Athens', 'Europe/Belfast', 'Europe/Belgrade', 'Europe/Berlin', 'Europe/Bratislava', 'Europe/Brussels', 'Europe/Bucharest', 'Europe/Budapest', 'Europe/Busingen', 'Europe/Chisinau', 'Europe/Copenhagen', 'Europe/Dublin', 'Europe/Gibraltar', 'Europe/Guernsey', 'Europe/Helsinki', 'Europe/Isle_of_Man', 'Europe/Istanbul', 'Europe/Jersey', 'Europe/Kaliningrad', 'Europe/Kiev', 'Europe/Kirov', 'Europe/Kyiv', 'Europe/Lisbon', 'Europe/Ljubljana', 'Europe/London', 'Europe/Luxembourg', 'Europe/Madrid', 'Europe/Malta', 'Europe/Mariehamn', 'Europe/Minsk', 'Europe/Monaco', 'Europe/Moscow', 'Europe/Nicosia', 'Europe/Oslo', 'Europe/Paris', 'Europe/Podgorica', 'Europe/Prague', 'Europe/Riga', 'Europe/Rome', 'Europe/Samara', 'Europe/San_Marino', 'Europe/Sarajevo', 'Europe/Saratov', 'Europe/Simferopol', 'Europe/Skopje', 'Europe/Sofia', 'Europe/Stockholm', 'Europe/Tallinn', 'Europe/Tirane', 'Europe/Tiraspol', 'Europe/Ulyanovsk', 'Europe/Uzhgorod', 'Europe/Vaduz', 'Europe/Vatican', 'Europe/Vienna', 'Europe/Vilnius', 'Europe/Volgograd', 'Europe/Warsaw', 'Europe/Zagreb', 'Europe/Zaporozhye', 'Europe/Zurich', 'GB', 'GB-Eire', 'GMT', 'GMT+0', 'GMT-0', 'GMT0', 'Greenwich', 'HST', 'Hongkong', 'Iceland', 'Indian/Antananarivo', 'Indian/Chagos', 'Indian/Christmas', 'Indian/Cocos', 'Indian/Comoro', 'Indian/Kerguelen', 'Indian/Mahe', 'Indian/Maldives', 'Indian/Mauritius', 'Indian/Mayotte', 'Indian/Reunion', 'Iran', 'Israel', 'Jamaica', 'Japan', 'Kwajalein', 'Libya', 'MET', 'MST', 'MST7MDT', 'Mexico/BajaNorte', 'Mexico/BajaSur', 'Mexico/General', 'NZ', 'NZ-CHAT', 'Navajo', 'PRC', 'PST8PDT', 'Pacific/Apia', 'Pacific/Auckland', 'Pacific/Bougainville', 'Pacific/Chatham', 'Pacific/Chuuk', 'Pacific/Easter', 'Pacific/Efate', 'Pacific/Enderbury', 'Pacific/Fakaofo', 'Pacific/Fiji', 'Pacific/Funafuti', 'Pacific/Galapagos', 'Pacific/Gambier', 'Pacific/Guadalcanal', 'Pacific/Guam', 'Pacific/Honolulu', 'Pacific/Johnston', 'Pacific/Kanton', 'Pacific/Kiritimati', 'Pacific/Kosrae', 'Pacific/Kwajalein', 'Pacific/Majuro', 'Pacific/Marquesas', 'Pacific/Midway', 'Pacific/Nauru', 'Pacific/Niue', 'Pacific/Norfolk', 'Pacific/Noumea', 'Pacific/Pago_Pago', 'Pacific/Palau', 'Pacific/Pitcairn', 'Pacific/Pohnpei', 'Pacific/Ponape', 'Pacific/Port_Moresby', 'Pacific/Rarotonga', 'Pacific/Saipan', 'Pacific/Samoa', 'Pacific/Tahiti', 'Pacific/Tarawa', 'Pacific/Tongatapu', 'Pacific/Truk', 'Pacific/Wake', 'Pacific/Wallis', 'Pacific/Yap', 'Poland', 'Portugal', 'ROC', 'ROK', 'Singapore', 'Turkey', 'UCT', 'US/Alaska', 'US/Aleutian', 'US/Arizona', 'US/Central', 'US/East-Indiana', 'US/Eastern', 'US/Hawaii', 'US/Indiana-Starke', 'US/Michigan', 'US/Mountain', 'US/Pacific', 'US/Samoa', 'UTC', 'Universal', 'W-SU', 'WET', 'Zulu'), nullable=True, comment='IANA timezone name'), - sa.PrimaryKeyConstraint('plant_id_eia') + sa.PrimaryKeyConstraint('plant_id_eia', name=op.f('pk_plants_entity_eia')) ) op.create_table('plants_pudl', sa.Column('plant_id_pudl', sa.Integer(), nullable=False, comment='A manually assigned PUDL plant ID. May not be constant over time.'), sa.Column('plant_name_pudl', sa.Text(), nullable=True, comment='Plant name, chosen arbitrarily from the several possible plant names available in the plant matching process. Included for human readability only.'), - sa.PrimaryKeyConstraint('plant_id_pudl') + sa.PrimaryKeyConstraint('plant_id_pudl', name=op.f('pk_plants_pudl')) ) op.create_table('political_subdivisions', sa.Column('country_code', sa.Enum('USA', 'CAN'), nullable=False, comment='Three letter ISO-3166 country code (e.g. USA or CAN).'), sa.Column('country_name', sa.Text(), nullable=True, comment='Full country name (e.g. United States of America).'), - sa.Column('subdivision_code', sa.Enum('VA', 'VI', 'SK', 'PR', 'MT', 'DC', 'VT', 'KY', 'NY', 'KS', 'ME', 'ON', 'WY', 'NU', 'MN', 'WV', 'AL', 'LA', 'MP', 'QC', 'SC', 'SD', 'NE', 'TN', 'NM', 'YT', 'OH', 'AB', 'MO', 'NC', 'NS', 'CA', 'MD', 'NB', 'FL', 'BC', 'ID', 'IN', 'NL', 'GA', 'OK', 'UT', 'IA', 'MI', 'NH', 'NV', 'IL', 'NJ', 'AZ', 'MS', 'PE', 'MA', 'AK', 'CO', 'RI', 'GU', 'ND', 'PA', 'TX', 'HI', 'WI', 'NT', 'OR', 'DE', 'WA', 'MB', 'AS', 'AR', 'CT'), nullable=False, comment='Two-letter ISO-3166 political subdivision code (e.g. US state or Canadian provice abbreviations like CA or AB).'), + sa.Column('subdivision_code', sa.Enum('GU', 'WI', 'AZ', 'RI', 'MO', 'SC', 'KS', 'PE', 'UT', 'PA', 'ME', 'NH', 'SD', 'LA', 'NJ', 'DC', 'FL', 'IL', 'MS', 'ND', 'BC', 'NL', 'NU', 'OH', 'CT', 'NC', 'VI', 'SK', 'NY', 'NV', 'NE', 'IA', 'AK', 'HI', 'ON', 'TN', 'VA', 'AR', 'DE', 'AS', 'NS', 'GA', 'TX', 'MA', 'OR', 'QC', 'MT', 'WV', 'IN', 'MP', 'MB', 'MI', 'KY', 'ID', 'WY', 'NM', 'NB', 'AL', 'MD', 'AB', 'WA', 'YT', 'CA', 'CO', 'MN', 'OK', 'PR', 'NT', 'VT'), nullable=False, comment='Two-letter ISO-3166 political subdivision code (e.g. US state or Canadian provice abbreviations like CA or AB).'), sa.Column('subdivision_name', sa.Text(), nullable=True, comment='Full name of political subdivision (e.g. US state or Canadian province names like California or Alberta.'), sa.Column('subdivision_type', sa.Text(), nullable=True, comment='ISO-3166 political subdivision type. E.g. state, province, outlying_area.'), sa.Column('timezone_approx', sa.Enum('Africa/Abidjan', 'Africa/Accra', 'Africa/Addis_Ababa', 'Africa/Algiers', 'Africa/Asmara', 'Africa/Asmera', 'Africa/Bamako', 'Africa/Bangui', 'Africa/Banjul', 'Africa/Bissau', 'Africa/Blantyre', 'Africa/Brazzaville', 'Africa/Bujumbura', 'Africa/Cairo', 'Africa/Casablanca', 'Africa/Ceuta', 'Africa/Conakry', 'Africa/Dakar', 'Africa/Dar_es_Salaam', 'Africa/Djibouti', 'Africa/Douala', 'Africa/El_Aaiun', 'Africa/Freetown', 'Africa/Gaborone', 'Africa/Harare', 'Africa/Johannesburg', 'Africa/Juba', 'Africa/Kampala', 'Africa/Khartoum', 'Africa/Kigali', 'Africa/Kinshasa', 'Africa/Lagos', 'Africa/Libreville', 'Africa/Lome', 'Africa/Luanda', 'Africa/Lubumbashi', 'Africa/Lusaka', 'Africa/Malabo', 'Africa/Maputo', 'Africa/Maseru', 'Africa/Mbabane', 'Africa/Mogadishu', 'Africa/Monrovia', 'Africa/Nairobi', 'Africa/Ndjamena', 'Africa/Niamey', 'Africa/Nouakchott', 'Africa/Ouagadougou', 'Africa/Porto-Novo', 'Africa/Sao_Tome', 'Africa/Timbuktu', 'Africa/Tripoli', 'Africa/Tunis', 'Africa/Windhoek', 'America/Adak', 'America/Anchorage', 'America/Anguilla', 'America/Antigua', 'America/Araguaina', 'America/Argentina/Buenos_Aires', 'America/Argentina/Catamarca', 'America/Argentina/ComodRivadavia', 'America/Argentina/Cordoba', 'America/Argentina/Jujuy', 'America/Argentina/La_Rioja', 'America/Argentina/Mendoza', 'America/Argentina/Rio_Gallegos', 'America/Argentina/Salta', 'America/Argentina/San_Juan', 'America/Argentina/San_Luis', 'America/Argentina/Tucuman', 'America/Argentina/Ushuaia', 'America/Aruba', 'America/Asuncion', 'America/Atikokan', 'America/Atka', 'America/Bahia', 'America/Bahia_Banderas', 'America/Barbados', 'America/Belem', 'America/Belize', 'America/Blanc-Sablon', 'America/Boa_Vista', 'America/Bogota', 'America/Boise', 'America/Buenos_Aires', 'America/Cambridge_Bay', 'America/Campo_Grande', 'America/Cancun', 'America/Caracas', 'America/Catamarca', 'America/Cayenne', 'America/Cayman', 'America/Chicago', 'America/Chihuahua', 'America/Ciudad_Juarez', 'America/Coral_Harbour', 'America/Cordoba', 'America/Costa_Rica', 'America/Creston', 'America/Cuiaba', 'America/Curacao', 'America/Danmarkshavn', 'America/Dawson', 'America/Dawson_Creek', 'America/Denver', 'America/Detroit', 'America/Dominica', 'America/Edmonton', 'America/Eirunepe', 'America/El_Salvador', 'America/Ensenada', 'America/Fort_Nelson', 'America/Fort_Wayne', 'America/Fortaleza', 'America/Glace_Bay', 'America/Godthab', 'America/Goose_Bay', 'America/Grand_Turk', 'America/Grenada', 'America/Guadeloupe', 'America/Guatemala', 'America/Guayaquil', 'America/Guyana', 'America/Halifax', 'America/Havana', 'America/Hermosillo', 'America/Indiana/Indianapolis', 'America/Indiana/Knox', 'America/Indiana/Marengo', 'America/Indiana/Petersburg', 'America/Indiana/Tell_City', 'America/Indiana/Vevay', 'America/Indiana/Vincennes', 'America/Indiana/Winamac', 'America/Indianapolis', 'America/Inuvik', 'America/Iqaluit', 'America/Jamaica', 'America/Jujuy', 'America/Juneau', 'America/Kentucky/Louisville', 'America/Kentucky/Monticello', 'America/Knox_IN', 'America/Kralendijk', 'America/La_Paz', 'America/Lima', 'America/Los_Angeles', 'America/Louisville', 'America/Lower_Princes', 'America/Maceio', 'America/Managua', 'America/Manaus', 'America/Marigot', 'America/Martinique', 'America/Matamoros', 'America/Mazatlan', 'America/Mendoza', 'America/Menominee', 'America/Merida', 'America/Metlakatla', 'America/Mexico_City', 'America/Miquelon', 'America/Moncton', 'America/Monterrey', 'America/Montevideo', 'America/Montreal', 'America/Montserrat', 'America/Nassau', 'America/New_York', 'America/Nipigon', 'America/Nome', 'America/Noronha', 'America/North_Dakota/Beulah', 'America/North_Dakota/Center', 'America/North_Dakota/New_Salem', 'America/Nuuk', 'America/Ojinaga', 'America/Panama', 'America/Pangnirtung', 'America/Paramaribo', 'America/Phoenix', 'America/Port-au-Prince', 'America/Port_of_Spain', 'America/Porto_Acre', 'America/Porto_Velho', 'America/Puerto_Rico', 'America/Punta_Arenas', 'America/Rainy_River', 'America/Rankin_Inlet', 'America/Recife', 'America/Regina', 'America/Resolute', 'America/Rio_Branco', 'America/Rosario', 'America/Santa_Isabel', 'America/Santarem', 'America/Santiago', 'America/Santo_Domingo', 'America/Sao_Paulo', 'America/Scoresbysund', 'America/Shiprock', 'America/Sitka', 'America/St_Barthelemy', 'America/St_Johns', 'America/St_Kitts', 'America/St_Lucia', 'America/St_Thomas', 'America/St_Vincent', 'America/Swift_Current', 'America/Tegucigalpa', 'America/Thule', 'America/Thunder_Bay', 'America/Tijuana', 'America/Toronto', 'America/Tortola', 'America/Vancouver', 'America/Virgin', 'America/Whitehorse', 'America/Winnipeg', 'America/Yakutat', 'America/Yellowknife', 'Antarctica/Casey', 'Antarctica/Davis', 'Antarctica/DumontDUrville', 'Antarctica/Macquarie', 'Antarctica/Mawson', 'Antarctica/McMurdo', 'Antarctica/Palmer', 'Antarctica/Rothera', 'Antarctica/South_Pole', 'Antarctica/Syowa', 'Antarctica/Troll', 'Antarctica/Vostok', 'Arctic/Longyearbyen', 'Asia/Aden', 'Asia/Almaty', 'Asia/Amman', 'Asia/Anadyr', 'Asia/Aqtau', 'Asia/Aqtobe', 'Asia/Ashgabat', 'Asia/Ashkhabad', 'Asia/Atyrau', 'Asia/Baghdad', 'Asia/Bahrain', 'Asia/Baku', 'Asia/Bangkok', 'Asia/Barnaul', 'Asia/Beirut', 'Asia/Bishkek', 'Asia/Brunei', 'Asia/Calcutta', 'Asia/Chita', 'Asia/Choibalsan', 'Asia/Chongqing', 'Asia/Chungking', 'Asia/Colombo', 'Asia/Dacca', 'Asia/Damascus', 'Asia/Dhaka', 'Asia/Dili', 'Asia/Dubai', 'Asia/Dushanbe', 'Asia/Famagusta', 'Asia/Gaza', 'Asia/Harbin', 'Asia/Hebron', 'Asia/Ho_Chi_Minh', 'Asia/Hong_Kong', 'Asia/Hovd', 'Asia/Irkutsk', 'Asia/Istanbul', 'Asia/Jakarta', 'Asia/Jayapura', 'Asia/Jerusalem', 'Asia/Kabul', 'Asia/Kamchatka', 'Asia/Karachi', 'Asia/Kashgar', 'Asia/Kathmandu', 'Asia/Katmandu', 'Asia/Khandyga', 'Asia/Kolkata', 'Asia/Krasnoyarsk', 'Asia/Kuala_Lumpur', 'Asia/Kuching', 'Asia/Kuwait', 'Asia/Macao', 'Asia/Macau', 'Asia/Magadan', 'Asia/Makassar', 'Asia/Manila', 'Asia/Muscat', 'Asia/Nicosia', 'Asia/Novokuznetsk', 'Asia/Novosibirsk', 'Asia/Omsk', 'Asia/Oral', 'Asia/Phnom_Penh', 'Asia/Pontianak', 'Asia/Pyongyang', 'Asia/Qatar', 'Asia/Qostanay', 'Asia/Qyzylorda', 'Asia/Rangoon', 'Asia/Riyadh', 'Asia/Saigon', 'Asia/Sakhalin', 'Asia/Samarkand', 'Asia/Seoul', 'Asia/Shanghai', 'Asia/Singapore', 'Asia/Srednekolymsk', 'Asia/Taipei', 'Asia/Tashkent', 'Asia/Tbilisi', 'Asia/Tehran', 'Asia/Tel_Aviv', 'Asia/Thimbu', 'Asia/Thimphu', 'Asia/Tokyo', 'Asia/Tomsk', 'Asia/Ujung_Pandang', 'Asia/Ulaanbaatar', 'Asia/Ulan_Bator', 'Asia/Urumqi', 'Asia/Ust-Nera', 'Asia/Vientiane', 'Asia/Vladivostok', 'Asia/Yakutsk', 'Asia/Yangon', 'Asia/Yekaterinburg', 'Asia/Yerevan', 'Atlantic/Azores', 'Atlantic/Bermuda', 'Atlantic/Canary', 'Atlantic/Cape_Verde', 'Atlantic/Faeroe', 'Atlantic/Faroe', 'Atlantic/Jan_Mayen', 'Atlantic/Madeira', 'Atlantic/Reykjavik', 'Atlantic/South_Georgia', 'Atlantic/St_Helena', 'Atlantic/Stanley', 'Australia/ACT', 'Australia/Adelaide', 'Australia/Brisbane', 'Australia/Broken_Hill', 'Australia/Canberra', 'Australia/Currie', 'Australia/Darwin', 'Australia/Eucla', 'Australia/Hobart', 'Australia/LHI', 'Australia/Lindeman', 'Australia/Lord_Howe', 'Australia/Melbourne', 'Australia/NSW', 'Australia/North', 'Australia/Perth', 'Australia/Queensland', 'Australia/South', 'Australia/Sydney', 'Australia/Tasmania', 'Australia/Victoria', 'Australia/West', 'Australia/Yancowinna', 'Brazil/Acre', 'Brazil/DeNoronha', 'Brazil/East', 'Brazil/West', 'CET', 'CST6CDT', 'Canada/Atlantic', 'Canada/Central', 'Canada/Eastern', 'Canada/Mountain', 'Canada/Newfoundland', 'Canada/Pacific', 'Canada/Saskatchewan', 'Canada/Yukon', 'Chile/Continental', 'Chile/EasterIsland', 'Cuba', 'EET', 'EST', 'EST5EDT', 'Egypt', 'Eire', 'Etc/GMT', 'Etc/GMT+0', 'Etc/GMT+1', 'Etc/GMT+10', 'Etc/GMT+11', 'Etc/GMT+12', 'Etc/GMT+2', 'Etc/GMT+3', 'Etc/GMT+4', 'Etc/GMT+5', 'Etc/GMT+6', 'Etc/GMT+7', 'Etc/GMT+8', 'Etc/GMT+9', 'Etc/GMT-0', 'Etc/GMT-1', 'Etc/GMT-10', 'Etc/GMT-11', 'Etc/GMT-12', 'Etc/GMT-13', 'Etc/GMT-14', 'Etc/GMT-2', 'Etc/GMT-3', 'Etc/GMT-4', 'Etc/GMT-5', 'Etc/GMT-6', 'Etc/GMT-7', 'Etc/GMT-8', 'Etc/GMT-9', 'Etc/GMT0', 'Etc/Greenwich', 'Etc/UCT', 'Etc/UTC', 'Etc/Universal', 'Etc/Zulu', 'Europe/Amsterdam', 'Europe/Andorra', 'Europe/Astrakhan', 'Europe/Athens', 'Europe/Belfast', 'Europe/Belgrade', 'Europe/Berlin', 'Europe/Bratislava', 'Europe/Brussels', 'Europe/Bucharest', 'Europe/Budapest', 'Europe/Busingen', 'Europe/Chisinau', 'Europe/Copenhagen', 'Europe/Dublin', 'Europe/Gibraltar', 'Europe/Guernsey', 'Europe/Helsinki', 'Europe/Isle_of_Man', 'Europe/Istanbul', 'Europe/Jersey', 'Europe/Kaliningrad', 'Europe/Kiev', 'Europe/Kirov', 'Europe/Kyiv', 'Europe/Lisbon', 'Europe/Ljubljana', 'Europe/London', 'Europe/Luxembourg', 'Europe/Madrid', 'Europe/Malta', 'Europe/Mariehamn', 'Europe/Minsk', 'Europe/Monaco', 'Europe/Moscow', 'Europe/Nicosia', 'Europe/Oslo', 'Europe/Paris', 'Europe/Podgorica', 'Europe/Prague', 'Europe/Riga', 'Europe/Rome', 'Europe/Samara', 'Europe/San_Marino', 'Europe/Sarajevo', 'Europe/Saratov', 'Europe/Simferopol', 'Europe/Skopje', 'Europe/Sofia', 'Europe/Stockholm', 'Europe/Tallinn', 'Europe/Tirane', 'Europe/Tiraspol', 'Europe/Ulyanovsk', 'Europe/Uzhgorod', 'Europe/Vaduz', 'Europe/Vatican', 'Europe/Vienna', 'Europe/Vilnius', 'Europe/Volgograd', 'Europe/Warsaw', 'Europe/Zagreb', 'Europe/Zaporozhye', 'Europe/Zurich', 'GB', 'GB-Eire', 'GMT', 'GMT+0', 'GMT-0', 'GMT0', 'Greenwich', 'HST', 'Hongkong', 'Iceland', 'Indian/Antananarivo', 'Indian/Chagos', 'Indian/Christmas', 'Indian/Cocos', 'Indian/Comoro', 'Indian/Kerguelen', 'Indian/Mahe', 'Indian/Maldives', 'Indian/Mauritius', 'Indian/Mayotte', 'Indian/Reunion', 'Iran', 'Israel', 'Jamaica', 'Japan', 'Kwajalein', 'Libya', 'MET', 'MST', 'MST7MDT', 'Mexico/BajaNorte', 'Mexico/BajaSur', 'Mexico/General', 'NZ', 'NZ-CHAT', 'Navajo', 'PRC', 'PST8PDT', 'Pacific/Apia', 'Pacific/Auckland', 'Pacific/Bougainville', 'Pacific/Chatham', 'Pacific/Chuuk', 'Pacific/Easter', 'Pacific/Efate', 'Pacific/Enderbury', 'Pacific/Fakaofo', 'Pacific/Fiji', 'Pacific/Funafuti', 'Pacific/Galapagos', 'Pacific/Gambier', 'Pacific/Guadalcanal', 'Pacific/Guam', 'Pacific/Honolulu', 'Pacific/Johnston', 'Pacific/Kanton', 'Pacific/Kiritimati', 'Pacific/Kosrae', 'Pacific/Kwajalein', 'Pacific/Majuro', 'Pacific/Marquesas', 'Pacific/Midway', 'Pacific/Nauru', 'Pacific/Niue', 'Pacific/Norfolk', 'Pacific/Noumea', 'Pacific/Pago_Pago', 'Pacific/Palau', 'Pacific/Pitcairn', 'Pacific/Pohnpei', 'Pacific/Ponape', 'Pacific/Port_Moresby', 'Pacific/Rarotonga', 'Pacific/Saipan', 'Pacific/Samoa', 'Pacific/Tahiti', 'Pacific/Tarawa', 'Pacific/Tongatapu', 'Pacific/Truk', 'Pacific/Wake', 'Pacific/Wallis', 'Pacific/Yap', 'Poland', 'Portugal', 'ROC', 'ROK', 'Singapore', 'Turkey', 'UCT', 'US/Alaska', 'US/Aleutian', 'US/Arizona', 'US/Central', 'US/East-Indiana', 'US/Eastern', 'US/Hawaii', 'US/Indiana-Starke', 'US/Michigan', 'US/Mountain', 'US/Pacific', 'US/Samoa', 'UTC', 'Universal', 'W-SU', 'WET', 'Zulu'), nullable=True, comment='IANA timezone name of the timezone which encompasses the largest portion of the population in the associated geographic area.'), sa.Column('state_id_fips', sa.Text(), nullable=True, comment='Two digit state FIPS code.'), sa.Column('division_name_us_census', sa.Text(), nullable=True, comment='Longer human readable name describing the US Census division.'), - sa.Column('division_code_us_census', sa.Enum('PCN', 'SAT', 'ESC', 'NEW', 'WSC', 'MAT', 'WNC', 'MTN', 'PCC', 'ENC'), nullable=True, comment='Three-letter US Census division code as it appears in the bulk electricity data published by the EIA. Note that EIA splits the Pacific division into distinct contiguous (CA, OR, WA) and non-contiguous (AK, HI) states. For reference see this US Census region and division map: https://www2.census.gov/geo/pdfs/maps-data/maps/reference/us_regdiv.pdf'), + sa.Column('division_code_us_census', sa.Enum('PCC', 'NEW', 'ESC', 'MAT', 'WSC', 'SAT', 'WNC', 'PCN', 'ENC', 'MTN'), nullable=True, comment='Three-letter US Census division code as it appears in the bulk electricity data published by the EIA. Note that EIA splits the Pacific division into distinct contiguous (CA, OR, WA) and non-contiguous (AK, HI) states. For reference see this US Census region and division map: https://www2.census.gov/geo/pdfs/maps-data/maps/reference/us_regdiv.pdf'), sa.Column('region_name_us_census', sa.Text(), nullable=True, comment='Human-readable name of a US Census region.'), sa.Column('is_epacems_state', sa.Boolean(), nullable=True, comment="Indicates whether the associated state reports data within the EPA's Continuous Emissions Monitoring System."), - sa.PrimaryKeyConstraint('country_code', 'subdivision_code') + sa.PrimaryKeyConstraint('country_code', 'subdivision_code', name=op.f('pk_political_subdivisions')) ) op.create_table('power_purchase_types_ferc1', sa.Column('code', sa.Text(), nullable=False, comment='Originally reported short code.'), sa.Column('label', sa.Text(), nullable=True, comment='Longer human-readable code using snake_case'), sa.Column('description', sa.Text(), nullable=True, comment='Long human-readable description of the meaning of a code/label.'), - sa.PrimaryKeyConstraint('code') + sa.PrimaryKeyConstraint('code', name=op.f('pk_power_purchase_types_ferc1')) + ) + op.create_table('predicted_state_hourly_demand', + sa.Column('state_id_fips', sa.Text(), nullable=False, comment='Two digit state FIPS code.'), + sa.Column('utc_datetime', sqlite.DATETIME(), nullable=False), + sa.Column('demand_mwh', sa.Float(), nullable=True), + sa.Column('scaled_demand_mwh', sa.Float(), nullable=True, comment='Estimated electricity demand scaled by the total sales within a state.'), + sa.PrimaryKeyConstraint('state_id_fips', 'utc_datetime', name=op.f('pk_predicted_state_hourly_demand')) ) op.create_table('prime_movers_eia', sa.Column('code', sa.Text(), nullable=False, comment='Originally reported short code.'), sa.Column('label', sa.Text(), nullable=True, comment='Longer human-readable code using snake_case'), sa.Column('description', sa.Text(), nullable=True, comment='Long human-readable description of the meaning of a code/label.'), - sa.PrimaryKeyConstraint('code') + sa.PrimaryKeyConstraint('code', name=op.f('pk_prime_movers_eia')) ) op.create_table('regulations_eia', sa.Column('code', sa.Text(), nullable=False, comment='Originally reported short code.'), sa.Column('label', sa.Text(), nullable=True, comment='Longer human-readable code using snake_case'), sa.Column('description', sa.Text(), nullable=True, comment='Long human-readable description of the meaning of a code/label.'), - sa.PrimaryKeyConstraint('code') + sa.PrimaryKeyConstraint('code', name=op.f('pk_regulations_eia')) ) op.create_table('reporting_frequencies_eia', sa.Column('code', sa.Text(), nullable=False, comment='Originally reported short code.'), sa.Column('label', sa.Text(), nullable=True, comment='Longer human-readable code using snake_case'), sa.Column('description', sa.Text(), nullable=True, comment='Long human-readable description of the meaning of a code/label.'), - sa.PrimaryKeyConstraint('code') + sa.PrimaryKeyConstraint('code', name=op.f('pk_reporting_frequencies_eia')) ) op.create_table('respondent_id_ferc714', sa.Column('respondent_id_ferc714', sa.Integer(), nullable=False), sa.Column('respondent_name_ferc714', sa.Text(), nullable=True), sa.Column('eia_code', sa.Integer(), nullable=True), - sa.PrimaryKeyConstraint('respondent_id_ferc714') + sa.PrimaryKeyConstraint('respondent_id_ferc714', name=op.f('pk_respondent_id_ferc714')) ) op.create_table('sector_consolidated_eia', sa.Column('code', sa.Integer(), nullable=False, comment='Originally reported short code.'), sa.Column('label', sa.Text(), nullable=True, comment='Longer human-readable code using snake_case'), sa.Column('description', sa.Text(), nullable=True, comment='Long human-readable description of the meaning of a code/label.'), - sa.PrimaryKeyConstraint('code') + sa.PrimaryKeyConstraint('code', name=op.f('pk_sector_consolidated_eia')) ) op.create_table('so2_compliance_strategies_eia', sa.Column('code', sa.Text(), nullable=False, comment='Originally reported short code.'), sa.Column('label', sa.Text(), nullable=True, comment='Longer human-readable code using snake_case'), sa.Column('description', sa.Text(), nullable=True, comment='Long human-readable description of the meaning of a code/label.'), - sa.PrimaryKeyConstraint('code') + sa.PrimaryKeyConstraint('code', name=op.f('pk_so2_compliance_strategies_eia')) ) op.create_table('so2_units_eia', sa.Column('code', sa.Text(), nullable=False, comment='Originally reported short code.'), sa.Column('label', sa.Text(), nullable=True, comment='Longer human-readable code using snake_case'), sa.Column('description', sa.Text(), nullable=True, comment='Long human-readable description of the meaning of a code/label.'), - sa.PrimaryKeyConstraint('code') + sa.PrimaryKeyConstraint('code', name=op.f('pk_so2_units_eia')) ) op.create_table('steam_plant_types_eia', sa.Column('code', sa.Text(), nullable=False, comment='Originally reported short code.'), sa.Column('label', sa.Text(), nullable=True, comment='Longer human-readable code using snake_case'), sa.Column('description', sa.Text(), nullable=True, comment='Long human-readable description of the meaning of a code/label.'), - sa.PrimaryKeyConstraint('code') + sa.PrimaryKeyConstraint('code', name=op.f('pk_steam_plant_types_eia')) ) op.create_table('utilities_entity_eia', sa.Column('utility_id_eia', sa.Integer(), nullable=False, comment='The EIA Utility Identification number.'), sa.Column('utility_name_eia', sa.Text(), nullable=True, comment='The name of the utility.'), - sa.PrimaryKeyConstraint('utility_id_eia') + sa.PrimaryKeyConstraint('utility_id_eia', name=op.f('pk_utilities_entity_eia')) ) op.create_table('utilities_pudl', sa.Column('utility_id_pudl', sa.Integer(), nullable=False, comment='A manually assigned PUDL utility ID. May not be stable over time.'), sa.Column('utility_name_pudl', sa.Text(), nullable=True, comment='Utility name, chosen arbitrarily from the several possible utility names available in the utility matching process. Included for human readability only.'), - sa.PrimaryKeyConstraint('utility_id_pudl') + sa.PrimaryKeyConstraint('utility_id_pudl', name=op.f('pk_utilities_pudl')) ) op.create_table('utility_assn_eia861', sa.Column('report_date', sa.Date(), nullable=False, comment='Date reported.'), sa.Column('utility_id_eia', sa.Integer(), nullable=False, comment='The EIA Utility Identification number.'), sa.Column('state', sa.Text(), nullable=False, comment='Two letter US state abbreviation.'), - sa.PrimaryKeyConstraint('report_date', 'utility_id_eia', 'state') + sa.PrimaryKeyConstraint('report_date', 'utility_id_eia', 'state', name=op.f('pk_utility_assn_eia861')) ) op.create_table('wet_dry_bottom_eia', sa.Column('code', sa.Text(), nullable=False, comment='Originally reported short code.'), sa.Column('label', sa.Text(), nullable=True, comment='Longer human-readable code using snake_case'), sa.Column('description', sa.Text(), nullable=True, comment='Long human-readable description of the meaning of a code/label.'), - sa.PrimaryKeyConstraint('code') + sa.PrimaryKeyConstraint('code', name=op.f('pk_wet_dry_bottom_eia')) ) op.create_table('advanced_metering_infrastructure_eia861', sa.Column('advanced_metering_infrastructure', sa.Integer(), nullable=True), @@ -325,17 +356,17 @@ def upgrade() -> None: sa.Column('utility_id_eia', sa.Integer(), nullable=False, comment='The EIA Utility Identification number.'), sa.Column('utility_name_eia', sa.Text(), nullable=True, comment='The name of the utility.'), sa.Column('data_maturity', sa.Text(), nullable=True, comment='Level of maturity of the data record. Some data sources report less-than-final data. PUDL sometimes includes this data, but use at your own risk.'), - sa.ForeignKeyConstraint(['data_maturity'], ['data_maturities.code'], ), - sa.PrimaryKeyConstraint('balancing_authority_code_eia', 'customer_class', 'report_date', 'state', 'utility_id_eia') + sa.ForeignKeyConstraint(['data_maturity'], ['data_maturities.code'], name=op.f('fk_advanced_metering_infrastructure_eia861_data_maturity_data_maturities')), + sa.PrimaryKeyConstraint('balancing_authority_code_eia', 'customer_class', 'report_date', 'state', 'utility_id_eia', name=op.f('pk_advanced_metering_infrastructure_eia861')) ) op.create_table('boilers_entity_eia', sa.Column('plant_id_eia', sa.Integer(), nullable=False, comment='The unique six-digit facility identification number, also called an ORISPL, assigned by the Energy Information Administration.'), sa.Column('boiler_id', sa.Text(), nullable=False, comment='Alphanumeric boiler ID.'), sa.Column('boiler_manufacturer', sa.Text(), nullable=True, comment='Name of boiler manufacturer.'), sa.Column('boiler_manufacturer_code', sa.Text(), nullable=True, comment='EIA short code for boiler manufacturer.'), - sa.ForeignKeyConstraint(['boiler_manufacturer_code'], ['environmental_equipment_manufacturers_eia.code'], ), - sa.ForeignKeyConstraint(['plant_id_eia'], ['plants_entity_eia.plant_id_eia'], ), - sa.PrimaryKeyConstraint('plant_id_eia', 'boiler_id') + sa.ForeignKeyConstraint(['boiler_manufacturer_code'], ['environmental_equipment_manufacturers_eia.code'], name=op.f('fk_boilers_entity_eia_boiler_manufacturer_code_environmental_equipment_manufacturers_eia')), + sa.ForeignKeyConstraint(['plant_id_eia'], ['plants_entity_eia.plant_id_eia'], name=op.f('fk_boilers_entity_eia_plant_id_eia_plants_entity_eia')), + sa.PrimaryKeyConstraint('plant_id_eia', 'boiler_id', name=op.f('pk_boilers_entity_eia')) ) op.create_table('coalmine_eia923', sa.Column('mine_id_pudl', sa.Integer(), nullable=False, comment='Dynamically assigned PUDL mine identifier.'), @@ -345,9 +376,9 @@ def upgrade() -> None: sa.Column('county_id_fips', sa.Text(), nullable=True, comment='County ID from the Federal Information Processing Standard Publication 6-4.'), sa.Column('mine_id_msha', sa.Integer(), nullable=True, comment='MSHA issued mine identifier.'), sa.Column('data_maturity', sa.Text(), nullable=True, comment='Level of maturity of the data record. Some data sources report less-than-final data. PUDL sometimes includes this data, but use at your own risk.'), - sa.ForeignKeyConstraint(['data_maturity'], ['data_maturities.code'], ), - sa.ForeignKeyConstraint(['mine_type_code'], ['coalmine_types_eia.code'], ), - sa.PrimaryKeyConstraint('mine_id_pudl') + sa.ForeignKeyConstraint(['data_maturity'], ['data_maturities.code'], name=op.f('fk_coalmine_eia923_data_maturity_data_maturities')), + sa.ForeignKeyConstraint(['mine_type_code'], ['coalmine_types_eia.code'], name=op.f('fk_coalmine_eia923_mine_type_code_coalmine_types_eia')), + sa.PrimaryKeyConstraint('mine_id_pudl', name=op.f('pk_coalmine_eia923')) ) op.create_table('demand_hourly_pa_ferc714', sa.Column('respondent_id_ferc714', sa.Integer(), nullable=False), @@ -355,8 +386,8 @@ def upgrade() -> None: sa.Column('utc_datetime', sqlite.DATETIME(), nullable=False), sa.Column('timezone', sa.Enum('America/New_York', 'America/Chicago', 'America/Denver', 'America/Los_Angeles', 'America/Anchorage', 'Pacific/Honolulu'), nullable=True, comment='IANA timezone name'), sa.Column('demand_mwh', sa.Float(), nullable=True), - sa.ForeignKeyConstraint(['respondent_id_ferc714'], ['respondent_id_ferc714.respondent_id_ferc714'], ), - sa.PrimaryKeyConstraint('respondent_id_ferc714', 'utc_datetime') + sa.ForeignKeyConstraint(['respondent_id_ferc714'], ['respondent_id_ferc714.respondent_id_ferc714'], name=op.f('fk_demand_hourly_pa_ferc714_respondent_id_ferc714_respondent_id_ferc714')), + sa.PrimaryKeyConstraint('respondent_id_ferc714', 'utc_datetime', name=op.f('pk_demand_hourly_pa_ferc714')) ) op.create_table('demand_response_eia861', sa.Column('actual_peak_demand_savings_mw', sa.Float(), nullable=True), @@ -373,8 +404,8 @@ def upgrade() -> None: sa.Column('utility_id_eia', sa.Integer(), nullable=False, comment='The EIA Utility Identification number.'), sa.Column('utility_name_eia', sa.Text(), nullable=True, comment='The name of the utility.'), sa.Column('data_maturity', sa.Text(), nullable=True, comment='Level of maturity of the data record. Some data sources report less-than-final data. PUDL sometimes includes this data, but use at your own risk.'), - sa.ForeignKeyConstraint(['data_maturity'], ['data_maturities.code'], ), - sa.PrimaryKeyConstraint('balancing_authority_code_eia', 'customer_class', 'report_date', 'state', 'utility_id_eia') + sa.ForeignKeyConstraint(['data_maturity'], ['data_maturities.code'], name=op.f('fk_demand_response_eia861_data_maturity_data_maturities')), + sa.PrimaryKeyConstraint('balancing_authority_code_eia', 'customer_class', 'report_date', 'state', 'utility_id_eia', name=op.f('pk_demand_response_eia861')) ) op.create_table('demand_response_water_heater_eia861', sa.Column('balancing_authority_code_eia', sa.Text(), nullable=False, comment='EIA short code identifying a balancing authority.'), @@ -383,8 +414,8 @@ def upgrade() -> None: sa.Column('utility_id_eia', sa.Integer(), nullable=False, comment='The EIA Utility Identification number.'), sa.Column('water_heater', sa.Integer(), nullable=True), sa.Column('data_maturity', sa.Text(), nullable=True, comment='Level of maturity of the data record. Some data sources report less-than-final data. PUDL sometimes includes this data, but use at your own risk.'), - sa.ForeignKeyConstraint(['data_maturity'], ['data_maturities.code'], ), - sa.PrimaryKeyConstraint('balancing_authority_code_eia', 'report_date', 'state', 'utility_id_eia') + sa.ForeignKeyConstraint(['data_maturity'], ['data_maturities.code'], name=op.f('fk_demand_response_water_heater_eia861_data_maturity_data_maturities')), + sa.PrimaryKeyConstraint('balancing_authority_code_eia', 'report_date', 'state', 'utility_id_eia', name=op.f('pk_demand_response_water_heater_eia861')) ) op.create_table('demand_side_management_ee_dr_eia861', sa.Column('annual_indirect_program_cost', sa.Float(), nullable=True), @@ -411,7 +442,7 @@ def upgrade() -> None: sa.Column('time_responsiveness_customers', sa.Integer(), nullable=True), sa.Column('utility_id_eia', sa.Integer(), nullable=True, comment='The EIA Utility Identification number.'), sa.Column('data_maturity', sa.Text(), nullable=True, comment='Level of maturity of the data record. Some data sources report less-than-final data. PUDL sometimes includes this data, but use at your own risk.'), - sa.ForeignKeyConstraint(['data_maturity'], ['data_maturities.code'], ) + sa.ForeignKeyConstraint(['data_maturity'], ['data_maturities.code'], name=op.f('fk_demand_side_management_ee_dr_eia861_data_maturity_data_maturities')) ) op.create_table('demand_side_management_misc_eia861', sa.Column('energy_savings_estimates_independently_verified', sa.Boolean(), nullable=True), @@ -428,7 +459,7 @@ def upgrade() -> None: sa.Column('utility_id_eia', sa.Integer(), nullable=True, comment='The EIA Utility Identification number.'), sa.Column('utility_name_eia', sa.Text(), nullable=True, comment='The name of the utility.'), sa.Column('data_maturity', sa.Text(), nullable=True, comment='Level of maturity of the data record. Some data sources report less-than-final data. PUDL sometimes includes this data, but use at your own risk.'), - sa.ForeignKeyConstraint(['data_maturity'], ['data_maturities.code'], ) + sa.ForeignKeyConstraint(['data_maturity'], ['data_maturities.code'], name=op.f('fk_demand_side_management_misc_eia861_data_maturity_data_maturities')) ) op.create_table('demand_side_management_sales_eia861', sa.Column('nerc_region', sa.Enum('BASN', 'CALN', 'CALS', 'DSW', 'ASCC', 'ISONE', 'ERCOT', 'NORW', 'NYISO', 'PJM', 'ROCK', 'ECAR', 'FRCC', 'HICC', 'MAAC', 'MAIN', 'MAPP', 'MRO', 'NPCC', 'RFC', 'SERC', 'SPP', 'TRE', 'WECC', 'WSCC', 'MISO', 'ECAR_MAAC', 'MAPP_WECC', 'RFC_SERC', 'SPP_WECC', 'MRO_WECC', 'ERCOT_SPP', 'SPP_TRE', 'ERCOT_TRE', 'MISO_TRE', 'VI', 'GU', 'PR', 'AS', 'UNK'), nullable=True, comment='NERC region in which the plant is located'), @@ -438,7 +469,7 @@ def upgrade() -> None: sa.Column('state', sa.Text(), nullable=True, comment='Two letter US state abbreviation.'), sa.Column('utility_id_eia', sa.Integer(), nullable=True, comment='The EIA Utility Identification number.'), sa.Column('data_maturity', sa.Text(), nullable=True, comment='Level of maturity of the data record. Some data sources report less-than-final data. PUDL sometimes includes this data, but use at your own risk.'), - sa.ForeignKeyConstraint(['data_maturity'], ['data_maturities.code'], ) + sa.ForeignKeyConstraint(['data_maturity'], ['data_maturities.code'], name=op.f('fk_demand_side_management_sales_eia861_data_maturity_data_maturities')) ) op.create_table('denorm_emissions_control_equipment_eia860', sa.Column('report_year', sa.Integer(), nullable=False, comment='Four-digit year in which the data was reported.'), @@ -461,14 +492,14 @@ def upgrade() -> None: sa.Column('emission_control_equipment_cost', sa.Float(), nullable=True, comment='The total cost to install a piece of emission control equipment.'), sa.Column('emission_control_operating_date', sa.Date(), nullable=True, comment='The date a piece of emissions control equipment began operating. Derived from month and year columns in the raw data.'), sa.Column('emission_control_retirement_date', sa.Date(), nullable=True, comment='The expected or actual retirement date for a piece of emissions control equipment. Derived from month and year columns in the raw data.'), - sa.ForeignKeyConstraint(['data_maturity'], ['data_maturities.code'], ), - sa.ForeignKeyConstraint(['emission_control_equipment_type_code'], ['emission_control_equipment_types_eia.code'], ), - sa.ForeignKeyConstraint(['operational_status_code'], ['operational_status_eia.code'], ), - sa.ForeignKeyConstraint(['plant_id_eia'], ['plants_entity_eia.plant_id_eia'], ), - sa.ForeignKeyConstraint(['plant_id_pudl'], ['plants_pudl.plant_id_pudl'], ), - sa.ForeignKeyConstraint(['utility_id_eia'], ['utilities_entity_eia.utility_id_eia'], ), - sa.ForeignKeyConstraint(['utility_id_pudl'], ['utilities_pudl.utility_id_pudl'], ), - sa.PrimaryKeyConstraint('report_year', 'plant_id_eia', 'emission_control_id_pudl') + sa.ForeignKeyConstraint(['data_maturity'], ['data_maturities.code'], name=op.f('fk_denorm_emissions_control_equipment_eia860_data_maturity_data_maturities')), + sa.ForeignKeyConstraint(['emission_control_equipment_type_code'], ['emission_control_equipment_types_eia.code'], name=op.f('fk_denorm_emissions_control_equipment_eia860_emission_control_equipment_type_code_emission_control_equipment_types_eia')), + sa.ForeignKeyConstraint(['operational_status_code'], ['operational_status_eia.code'], name=op.f('fk_denorm_emissions_control_equipment_eia860_operational_status_code_operational_status_eia')), + sa.ForeignKeyConstraint(['plant_id_eia'], ['plants_entity_eia.plant_id_eia'], name=op.f('fk_denorm_emissions_control_equipment_eia860_plant_id_eia_plants_entity_eia')), + sa.ForeignKeyConstraint(['plant_id_pudl'], ['plants_pudl.plant_id_pudl'], name=op.f('fk_denorm_emissions_control_equipment_eia860_plant_id_pudl_plants_pudl')), + sa.ForeignKeyConstraint(['utility_id_eia'], ['utilities_entity_eia.utility_id_eia'], name=op.f('fk_denorm_emissions_control_equipment_eia860_utility_id_eia_utilities_entity_eia')), + sa.ForeignKeyConstraint(['utility_id_pudl'], ['utilities_pudl.utility_id_pudl'], name=op.f('fk_denorm_emissions_control_equipment_eia860_utility_id_pudl_utilities_pudl')), + sa.PrimaryKeyConstraint('report_year', 'plant_id_eia', 'emission_control_id_pudl', name=op.f('pk_denorm_emissions_control_equipment_eia860')) ) op.create_table('denorm_fuel_receipts_costs_eia923', sa.Column('report_date', sa.Date(), nullable=True, comment='Date reported.'), @@ -481,7 +512,7 @@ def upgrade() -> None: sa.Column('state', sa.Text(), nullable=True, comment='Two letter US state abbreviation.'), sa.Column('contract_type_code', sa.Enum('S', 'C', 'NC', 'T'), nullable=True, comment='Purchase type under which receipts occurred in the reporting month. C: Contract, NC: New Contract, S: Spot Purchase, T: Tolling Agreement.'), sa.Column('contract_expiration_date', sa.Date(), nullable=True, comment='Date contract expires.Format: MMYY.'), - sa.Column('energy_source_code', sa.Text(), nullable=True, comment='The fuel code associated with the fuel receipt. Two or three character alphanumeric.'), + sa.Column('energy_source_code', sa.Text(), nullable=True, comment='A 2-3 letter code indicating the energy source (e.g. fuel type) associated with the record.'), sa.Column('fuel_type_code_pudl', sa.Enum('coal', 'gas', 'hydro', 'nuclear', 'oil', 'other', 'solar', 'waste', 'wind'), nullable=True, comment='Simplified fuel type code used in PUDL'), sa.Column('fuel_group_code', sa.Enum('petroleum', 'other_gas', 'petroleum_coke', 'natural_gas', 'coal'), nullable=True, comment='Fuel groups used in the Electric Power Monthly'), sa.Column('supplier_name', sa.Text(), nullable=True, comment='Company that sold the fuel to the plant or, in the case of Natural Gas, pipline owner.'), @@ -507,16 +538,16 @@ def upgrade() -> None: sa.Column('mine_state', sa.Text(), nullable=True, comment='State where the coal mine is located. Two letter abbreviation.'), sa.Column('coalmine_county_id_fips', sa.Text(), nullable=True, comment='County ID from the Federal Information Processing Standard Publication 6-4. This is the county where the coal mine is located.'), sa.Column('mine_type_code', sa.Text(), nullable=True, comment='Type of coal mine.'), - sa.ForeignKeyConstraint(['contract_type_code'], ['contract_types_eia.code'], ), - sa.ForeignKeyConstraint(['data_maturity'], ['data_maturities.code'], ), - sa.ForeignKeyConstraint(['energy_source_code'], ['energy_sources_eia.code'], ), - sa.ForeignKeyConstraint(['mine_type_code'], ['coalmine_types_eia.code'], ), - sa.ForeignKeyConstraint(['plant_id_eia'], ['plants_entity_eia.plant_id_eia'], ), - sa.ForeignKeyConstraint(['plant_id_pudl'], ['plants_pudl.plant_id_pudl'], ), - sa.ForeignKeyConstraint(['primary_transportation_mode_code'], ['fuel_transportation_modes_eia.code'], ), - sa.ForeignKeyConstraint(['secondary_transportation_mode_code'], ['fuel_transportation_modes_eia.code'], ), - sa.ForeignKeyConstraint(['utility_id_eia'], ['utilities_entity_eia.utility_id_eia'], ), - sa.ForeignKeyConstraint(['utility_id_pudl'], ['utilities_pudl.utility_id_pudl'], ) + sa.ForeignKeyConstraint(['contract_type_code'], ['contract_types_eia.code'], name=op.f('fk_denorm_fuel_receipts_costs_eia923_contract_type_code_contract_types_eia')), + sa.ForeignKeyConstraint(['data_maturity'], ['data_maturities.code'], name=op.f('fk_denorm_fuel_receipts_costs_eia923_data_maturity_data_maturities')), + sa.ForeignKeyConstraint(['energy_source_code'], ['energy_sources_eia.code'], name=op.f('fk_denorm_fuel_receipts_costs_eia923_energy_source_code_energy_sources_eia')), + sa.ForeignKeyConstraint(['mine_type_code'], ['coalmine_types_eia.code'], name=op.f('fk_denorm_fuel_receipts_costs_eia923_mine_type_code_coalmine_types_eia')), + sa.ForeignKeyConstraint(['plant_id_eia'], ['plants_entity_eia.plant_id_eia'], name=op.f('fk_denorm_fuel_receipts_costs_eia923_plant_id_eia_plants_entity_eia')), + sa.ForeignKeyConstraint(['plant_id_pudl'], ['plants_pudl.plant_id_pudl'], name=op.f('fk_denorm_fuel_receipts_costs_eia923_plant_id_pudl_plants_pudl')), + sa.ForeignKeyConstraint(['primary_transportation_mode_code'], ['fuel_transportation_modes_eia.code'], name=op.f('fk_denorm_fuel_receipts_costs_eia923_primary_transportation_mode_code_fuel_transportation_modes_eia')), + sa.ForeignKeyConstraint(['secondary_transportation_mode_code'], ['fuel_transportation_modes_eia.code'], name=op.f('fk_denorm_fuel_receipts_costs_eia923_secondary_transportation_mode_code_fuel_transportation_modes_eia')), + sa.ForeignKeyConstraint(['utility_id_eia'], ['utilities_entity_eia.utility_id_eia'], name=op.f('fk_denorm_fuel_receipts_costs_eia923_utility_id_eia_utilities_entity_eia')), + sa.ForeignKeyConstraint(['utility_id_pudl'], ['utilities_pudl.utility_id_pudl'], name=op.f('fk_denorm_fuel_receipts_costs_eia923_utility_id_pudl_utilities_pudl')) ) op.create_table('denorm_fuel_receipts_costs_monthly_eia923', sa.Column('report_date', sa.Date(), nullable=True, comment='Date reported.'), @@ -539,10 +570,10 @@ def upgrade() -> None: sa.Column('mercury_content_ppm', sa.Float(), nullable=True, comment='Mercury content in parts per million (ppm) to the nearest 0.001 ppm.'), sa.Column('moisture_content_pct', sa.Float(), nullable=True), sa.Column('chlorine_content_ppm', sa.Float(), nullable=True), - sa.ForeignKeyConstraint(['plant_id_eia'], ['plants_entity_eia.plant_id_eia'], ), - sa.ForeignKeyConstraint(['plant_id_pudl'], ['plants_pudl.plant_id_pudl'], ), - sa.ForeignKeyConstraint(['utility_id_eia'], ['utilities_entity_eia.utility_id_eia'], ), - sa.ForeignKeyConstraint(['utility_id_pudl'], ['utilities_pudl.utility_id_pudl'], ) + sa.ForeignKeyConstraint(['plant_id_eia'], ['plants_entity_eia.plant_id_eia'], name=op.f('fk_denorm_fuel_receipts_costs_monthly_eia923_plant_id_eia_plants_entity_eia')), + sa.ForeignKeyConstraint(['plant_id_pudl'], ['plants_pudl.plant_id_pudl'], name=op.f('fk_denorm_fuel_receipts_costs_monthly_eia923_plant_id_pudl_plants_pudl')), + sa.ForeignKeyConstraint(['utility_id_eia'], ['utilities_entity_eia.utility_id_eia'], name=op.f('fk_denorm_fuel_receipts_costs_monthly_eia923_utility_id_eia_utilities_entity_eia')), + sa.ForeignKeyConstraint(['utility_id_pudl'], ['utilities_pudl.utility_id_pudl'], name=op.f('fk_denorm_fuel_receipts_costs_monthly_eia923_utility_id_pudl_utilities_pudl')) ) op.create_table('denorm_generation_fuel_combined_eia923', sa.Column('report_date', sa.Date(), nullable=False, comment='Date reported.'), @@ -552,7 +583,7 @@ def upgrade() -> None: sa.Column('utility_id_eia', sa.Integer(), nullable=True, comment='The EIA Utility Identification number.'), sa.Column('utility_id_pudl', sa.Integer(), nullable=True, comment='A manually assigned PUDL utility ID. May not be stable over time.'), sa.Column('utility_name_eia', sa.Text(), nullable=True, comment='The name of the utility.'), - sa.Column('energy_source_code', sa.Text(), nullable=False, comment='The fuel code associated with the fuel receipt. Two or three character alphanumeric.'), + sa.Column('energy_source_code', sa.Text(), nullable=False, comment='A 2-3 letter code indicating the energy source (e.g. fuel type) associated with the record.'), sa.Column('fuel_type_code_pudl', sa.Enum('coal', 'gas', 'hydro', 'nuclear', 'oil', 'other', 'solar', 'waste', 'wind'), nullable=True, comment='Simplified fuel type code used in PUDL'), sa.Column('fuel_type_code_aer', sa.Text(), nullable=True, comment='A partial aggregation of the reported fuel type codes into larger categories used by EIA in, for example, the Annual Energy Review (AER). Two or three letter alphanumeric.'), sa.Column('prime_mover_code', sa.Text(), nullable=False, comment='Code for the type of prime mover (e.g. CT, CG)'), @@ -563,15 +594,15 @@ def upgrade() -> None: sa.Column('fuel_consumed_for_electricity_mmbtu', sa.Float(), nullable=True, comment='Total consumption of fuel to produce electricity, in physical unit, year to date.'), sa.Column('net_generation_mwh', sa.Float(), nullable=True, comment='Net electricity generation for the specified period in megawatt-hours (MWh).'), sa.Column('data_maturity', sa.Text(), nullable=True, comment='Level of maturity of the data record. Some data sources report less-than-final data. PUDL sometimes includes this data, but use at your own risk.'), - sa.ForeignKeyConstraint(['data_maturity'], ['data_maturities.code'], ), - sa.ForeignKeyConstraint(['energy_source_code'], ['energy_sources_eia.code'], ), - sa.ForeignKeyConstraint(['fuel_type_code_aer'], ['fuel_types_aer_eia.code'], ), - sa.ForeignKeyConstraint(['plant_id_eia'], ['plants_entity_eia.plant_id_eia'], ), - sa.ForeignKeyConstraint(['plant_id_pudl'], ['plants_pudl.plant_id_pudl'], ), - sa.ForeignKeyConstraint(['prime_mover_code'], ['prime_movers_eia.code'], ), - sa.ForeignKeyConstraint(['utility_id_eia'], ['utilities_entity_eia.utility_id_eia'], ), - sa.ForeignKeyConstraint(['utility_id_pudl'], ['utilities_pudl.utility_id_pudl'], ), - sa.PrimaryKeyConstraint('plant_id_eia', 'report_date', 'prime_mover_code', 'energy_source_code') + sa.ForeignKeyConstraint(['data_maturity'], ['data_maturities.code'], name=op.f('fk_denorm_generation_fuel_combined_eia923_data_maturity_data_maturities')), + sa.ForeignKeyConstraint(['energy_source_code'], ['energy_sources_eia.code'], name=op.f('fk_denorm_generation_fuel_combined_eia923_energy_source_code_energy_sources_eia')), + sa.ForeignKeyConstraint(['fuel_type_code_aer'], ['fuel_types_aer_eia.code'], name=op.f('fk_denorm_generation_fuel_combined_eia923_fuel_type_code_aer_fuel_types_aer_eia')), + sa.ForeignKeyConstraint(['plant_id_eia'], ['plants_entity_eia.plant_id_eia'], name=op.f('fk_denorm_generation_fuel_combined_eia923_plant_id_eia_plants_entity_eia')), + sa.ForeignKeyConstraint(['plant_id_pudl'], ['plants_pudl.plant_id_pudl'], name=op.f('fk_denorm_generation_fuel_combined_eia923_plant_id_pudl_plants_pudl')), + sa.ForeignKeyConstraint(['prime_mover_code'], ['prime_movers_eia.code'], name=op.f('fk_denorm_generation_fuel_combined_eia923_prime_mover_code_prime_movers_eia')), + sa.ForeignKeyConstraint(['utility_id_eia'], ['utilities_entity_eia.utility_id_eia'], name=op.f('fk_denorm_generation_fuel_combined_eia923_utility_id_eia_utilities_entity_eia')), + sa.ForeignKeyConstraint(['utility_id_pudl'], ['utilities_pudl.utility_id_pudl'], name=op.f('fk_denorm_generation_fuel_combined_eia923_utility_id_pudl_utilities_pudl')), + sa.PrimaryKeyConstraint('plant_id_eia', 'report_date', 'prime_mover_code', 'energy_source_code', name=op.f('pk_denorm_generation_fuel_combined_eia923')) ) op.create_table('denorm_generation_fuel_combined_monthly_eia923', sa.Column('report_date', sa.Date(), nullable=False, comment='Date reported.'), @@ -581,7 +612,7 @@ def upgrade() -> None: sa.Column('utility_id_eia', sa.Integer(), nullable=True, comment='The EIA Utility Identification number.'), sa.Column('utility_id_pudl', sa.Integer(), nullable=True, comment='A manually assigned PUDL utility ID. May not be stable over time.'), sa.Column('utility_name_eia', sa.Text(), nullable=True, comment='The name of the utility.'), - sa.Column('energy_source_code', sa.Text(), nullable=False, comment='The fuel code associated with the fuel receipt. Two or three character alphanumeric.'), + sa.Column('energy_source_code', sa.Text(), nullable=False, comment='A 2-3 letter code indicating the energy source (e.g. fuel type) associated with the record.'), sa.Column('fuel_type_code_pudl', sa.Enum('coal', 'gas', 'hydro', 'nuclear', 'oil', 'other', 'solar', 'waste', 'wind'), nullable=True, comment='Simplified fuel type code used in PUDL'), sa.Column('prime_mover_code', sa.Text(), nullable=False, comment='Code for the type of prime mover (e.g. CT, CG)'), sa.Column('fuel_consumed_units', sa.Float(), nullable=True, comment='Consumption of the fuel type in physical unit. Note: this is the total quantity consumed for both electricity and, in the case of combined heat and power plants, process steam production.'), @@ -590,13 +621,13 @@ def upgrade() -> None: sa.Column('fuel_consumed_mmbtu', sa.Float(), nullable=True, comment='Total consumption of fuel in physical unit, year to date. Note: this is the total quantity consumed for both electricity and, in the case of combined heat and power plants, process steam production.'), sa.Column('fuel_consumed_for_electricity_mmbtu', sa.Float(), nullable=True, comment='Total consumption of fuel to produce electricity, in physical unit, year to date.'), sa.Column('net_generation_mwh', sa.Float(), nullable=True, comment='Net electricity generation for the specified period in megawatt-hours (MWh).'), - sa.ForeignKeyConstraint(['energy_source_code'], ['energy_sources_eia.code'], ), - sa.ForeignKeyConstraint(['plant_id_eia'], ['plants_entity_eia.plant_id_eia'], ), - sa.ForeignKeyConstraint(['plant_id_pudl'], ['plants_pudl.plant_id_pudl'], ), - sa.ForeignKeyConstraint(['prime_mover_code'], ['prime_movers_eia.code'], ), - sa.ForeignKeyConstraint(['utility_id_eia'], ['utilities_entity_eia.utility_id_eia'], ), - sa.ForeignKeyConstraint(['utility_id_pudl'], ['utilities_pudl.utility_id_pudl'], ), - sa.PrimaryKeyConstraint('plant_id_eia', 'report_date', 'prime_mover_code', 'energy_source_code') + sa.ForeignKeyConstraint(['energy_source_code'], ['energy_sources_eia.code'], name=op.f('fk_denorm_generation_fuel_combined_monthly_eia923_energy_source_code_energy_sources_eia')), + sa.ForeignKeyConstraint(['plant_id_eia'], ['plants_entity_eia.plant_id_eia'], name=op.f('fk_denorm_generation_fuel_combined_monthly_eia923_plant_id_eia_plants_entity_eia')), + sa.ForeignKeyConstraint(['plant_id_pudl'], ['plants_pudl.plant_id_pudl'], name=op.f('fk_denorm_generation_fuel_combined_monthly_eia923_plant_id_pudl_plants_pudl')), + sa.ForeignKeyConstraint(['prime_mover_code'], ['prime_movers_eia.code'], name=op.f('fk_denorm_generation_fuel_combined_monthly_eia923_prime_mover_code_prime_movers_eia')), + sa.ForeignKeyConstraint(['utility_id_eia'], ['utilities_entity_eia.utility_id_eia'], name=op.f('fk_denorm_generation_fuel_combined_monthly_eia923_utility_id_eia_utilities_entity_eia')), + sa.ForeignKeyConstraint(['utility_id_pudl'], ['utilities_pudl.utility_id_pudl'], name=op.f('fk_denorm_generation_fuel_combined_monthly_eia923_utility_id_pudl_utilities_pudl')), + sa.PrimaryKeyConstraint('plant_id_eia', 'report_date', 'prime_mover_code', 'energy_source_code', name=op.f('pk_denorm_generation_fuel_combined_monthly_eia923')) ) op.create_table('distributed_generation_fuel_eia861', sa.Column('estimated_or_actual_fuel_data', sa.Enum('estimated', 'actual'), nullable=True), @@ -606,7 +637,7 @@ def upgrade() -> None: sa.Column('state', sa.Text(), nullable=True, comment='Two letter US state abbreviation.'), sa.Column('utility_id_eia', sa.Integer(), nullable=True, comment='The EIA Utility Identification number.'), sa.Column('data_maturity', sa.Text(), nullable=True, comment='Level of maturity of the data record. Some data sources report less-than-final data. PUDL sometimes includes this data, but use at your own risk.'), - sa.ForeignKeyConstraint(['data_maturity'], ['data_maturities.code'], ) + sa.ForeignKeyConstraint(['data_maturity'], ['data_maturities.code'], name=op.f('fk_distributed_generation_fuel_eia861_data_maturity_data_maturities')) ) op.create_table('distributed_generation_misc_eia861', sa.Column('backup_capacity_mw', sa.Float(), nullable=True), @@ -620,7 +651,7 @@ def upgrade() -> None: sa.Column('utility_id_eia', sa.Integer(), nullable=True, comment='The EIA Utility Identification number.'), sa.Column('utility_name_eia', sa.Text(), nullable=True, comment='The name of the utility.'), sa.Column('data_maturity', sa.Text(), nullable=True, comment='Level of maturity of the data record. Some data sources report less-than-final data. PUDL sometimes includes this data, but use at your own risk.'), - sa.ForeignKeyConstraint(['data_maturity'], ['data_maturities.code'], ) + sa.ForeignKeyConstraint(['data_maturity'], ['data_maturities.code'], name=op.f('fk_distributed_generation_misc_eia861_data_maturity_data_maturities')) ) op.create_table('distributed_generation_tech_eia861', sa.Column('capacity_mw', sa.Float(), nullable=True, comment='Total installed (nameplate) capacity, in megawatts.'), @@ -630,7 +661,7 @@ def upgrade() -> None: sa.Column('tech_class', sa.Enum('backup', 'chp_cogen', 'combustion_turbine', 'fuel_cell', 'hydro', 'internal_combustion', 'other', 'pv', 'steam', 'storage_pv', 'all_storage', 'total', 'virtual_pv', 'wind'), nullable=True), sa.Column('utility_id_eia', sa.Integer(), nullable=True, comment='The EIA Utility Identification number.'), sa.Column('data_maturity', sa.Text(), nullable=True, comment='Level of maturity of the data record. Some data sources report less-than-final data. PUDL sometimes includes this data, but use at your own risk.'), - sa.ForeignKeyConstraint(['data_maturity'], ['data_maturities.code'], ) + sa.ForeignKeyConstraint(['data_maturity'], ['data_maturities.code'], name=op.f('fk_distributed_generation_tech_eia861_data_maturity_data_maturities')) ) op.create_table('distribution_systems_eia861', sa.Column('circuits_with_voltage_optimization', sa.Integer(), nullable=True), @@ -641,7 +672,7 @@ def upgrade() -> None: sa.Column('utility_id_eia', sa.Integer(), nullable=True, comment='The EIA Utility Identification number.'), sa.Column('utility_name_eia', sa.Text(), nullable=True, comment='The name of the utility.'), sa.Column('data_maturity', sa.Text(), nullable=True, comment='Level of maturity of the data record. Some data sources report less-than-final data. PUDL sometimes includes this data, but use at your own risk.'), - sa.ForeignKeyConstraint(['data_maturity'], ['data_maturities.code'], ) + sa.ForeignKeyConstraint(['data_maturity'], ['data_maturities.code'], name=op.f('fk_distribution_systems_eia861_data_maturity_data_maturities')) ) op.create_table('dynamic_pricing_eia861', sa.Column('balancing_authority_code_eia', sa.Text(), nullable=True, comment='EIA short code identifying a balancing authority.'), @@ -658,7 +689,7 @@ def upgrade() -> None: sa.Column('utility_name_eia', sa.Text(), nullable=True, comment='The name of the utility.'), sa.Column('variable_peak_pricing', sa.Boolean(), nullable=True), sa.Column('data_maturity', sa.Text(), nullable=True, comment='Level of maturity of the data record. Some data sources report less-than-final data. PUDL sometimes includes this data, but use at your own risk.'), - sa.ForeignKeyConstraint(['data_maturity'], ['data_maturities.code'], ) + sa.ForeignKeyConstraint(['data_maturity'], ['data_maturities.code'], name=op.f('fk_dynamic_pricing_eia861_data_maturity_data_maturities')) ) op.create_table('emissions_control_equipment_eia860', sa.Column('report_year', sa.Integer(), nullable=False, comment='Four-digit year in which the data was reported.'), @@ -675,11 +706,11 @@ def upgrade() -> None: sa.Column('emission_control_equipment_cost', sa.Float(), nullable=True, comment='The total cost to install a piece of emission control equipment.'), sa.Column('emission_control_operating_date', sa.Date(), nullable=True, comment='The date a piece of emissions control equipment began operating. Derived from month and year columns in the raw data.'), sa.Column('emission_control_retirement_date', sa.Date(), nullable=True, comment='The expected or actual retirement date for a piece of emissions control equipment. Derived from month and year columns in the raw data.'), - sa.ForeignKeyConstraint(['data_maturity'], ['data_maturities.code'], ), - sa.ForeignKeyConstraint(['emission_control_equipment_type_code'], ['emission_control_equipment_types_eia.code'], ), - sa.ForeignKeyConstraint(['operational_status_code'], ['operational_status_eia.code'], ), - sa.ForeignKeyConstraint(['plant_id_eia'], ['plants_entity_eia.plant_id_eia'], ), - sa.PrimaryKeyConstraint('report_year', 'plant_id_eia', 'emission_control_id_pudl') + sa.ForeignKeyConstraint(['data_maturity'], ['data_maturities.code'], name=op.f('fk_emissions_control_equipment_eia860_data_maturity_data_maturities')), + sa.ForeignKeyConstraint(['emission_control_equipment_type_code'], ['emission_control_equipment_types_eia.code'], name=op.f('fk_emissions_control_equipment_eia860_emission_control_equipment_type_code_emission_control_equipment_types_eia')), + sa.ForeignKeyConstraint(['operational_status_code'], ['operational_status_eia.code'], name=op.f('fk_emissions_control_equipment_eia860_operational_status_code_operational_status_eia')), + sa.ForeignKeyConstraint(['plant_id_eia'], ['plants_entity_eia.plant_id_eia'], name=op.f('fk_emissions_control_equipment_eia860_plant_id_eia_plants_entity_eia')), + sa.PrimaryKeyConstraint('report_year', 'plant_id_eia', 'emission_control_id_pudl', name=op.f('pk_emissions_control_equipment_eia860')) ) op.create_table('energy_efficiency_eia861', sa.Column('balancing_authority_code_eia', sa.Text(), nullable=True, comment='EIA short code identifying a balancing authority.'), @@ -699,12 +730,29 @@ def upgrade() -> None: sa.Column('utility_name_eia', sa.Text(), nullable=True, comment='The name of the utility.'), sa.Column('weighted_average_life_years', sa.Float(), nullable=True), sa.Column('data_maturity', sa.Text(), nullable=True, comment='Level of maturity of the data record. Some data sources report less-than-final data. PUDL sometimes includes this data, but use at your own risk.'), - sa.ForeignKeyConstraint(['data_maturity'], ['data_maturities.code'], ) + sa.ForeignKeyConstraint(['data_maturity'], ['data_maturities.code'], name=op.f('fk_energy_efficiency_eia861_data_maturity_data_maturities')) + ) + op.create_table('fipsified_respondents_ferc714', + sa.Column('eia_code', sa.Integer(), nullable=True), + sa.Column('respondent_type', sa.Enum('utility', 'balancing_authority'), nullable=True), + sa.Column('respondent_id_ferc714', sa.Integer(), nullable=True), + sa.Column('respondent_name_ferc714', sa.Text(), nullable=True), + sa.Column('report_date', sa.Date(), nullable=True, comment='Date reported.'), + sa.Column('balancing_authority_id_eia', sa.Integer(), nullable=True, comment='EIA balancing authority ID. This is often (but not always!) the same as the utility ID associated with the same legal entity.'), + sa.Column('balancing_authority_code_eia', sa.Text(), nullable=True, comment='EIA short code identifying a balancing authority.'), + sa.Column('balancing_authority_name_eia', sa.Text(), nullable=True, comment='Name of the balancing authority.'), + sa.Column('utility_id_eia', sa.Integer(), nullable=True, comment='The EIA Utility Identification number.'), + sa.Column('utility_name_eia', sa.Text(), nullable=True, comment='The name of the utility.'), + sa.Column('state', sa.Text(), nullable=True, comment='Two letter US state abbreviation.'), + sa.Column('county', sa.Text(), nullable=True, comment='County name.'), + sa.Column('state_id_fips', sa.Text(), nullable=True, comment='Two digit state FIPS code.'), + sa.Column('county_id_fips', sa.Text(), nullable=True, comment='County ID from the Federal Information Processing Standard Publication 6-4.'), + sa.ForeignKeyConstraint(['respondent_id_ferc714'], ['respondent_id_ferc714.respondent_id_ferc714'], name=op.f('fk_fipsified_respondents_ferc714_respondent_id_ferc714_respondent_id_ferc714')) ) op.create_table('generation_fuel_eia923', sa.Column('report_date', sa.Date(), nullable=False, comment='Date reported.'), sa.Column('plant_id_eia', sa.Integer(), nullable=False, comment='The unique six-digit facility identification number, also called an ORISPL, assigned by the Energy Information Administration.'), - sa.Column('energy_source_code', sa.Text(), nullable=False, comment='The fuel code associated with the fuel receipt. Two or three character alphanumeric.'), + sa.Column('energy_source_code', sa.Text(), nullable=False, comment='A 2-3 letter code indicating the energy source (e.g. fuel type) associated with the record.'), sa.Column('fuel_type_code_pudl', sa.Enum('coal', 'gas', 'hydro', 'nuclear', 'oil', 'other', 'solar', 'waste', 'wind'), nullable=True, comment='Simplified fuel type code used in PUDL'), sa.Column('fuel_type_code_aer', sa.Text(), nullable=True, comment='A partial aggregation of the reported fuel type codes into larger categories used by EIA in, for example, the Annual Energy Review (AER). Two or three letter alphanumeric.'), sa.Column('prime_mover_code', sa.Text(), nullable=False, comment='Code for the type of prime mover (e.g. CT, CG)'), @@ -715,18 +763,18 @@ def upgrade() -> None: sa.Column('fuel_consumed_for_electricity_mmbtu', sa.Float(), nullable=True, comment='Total consumption of fuel to produce electricity, in physical unit, year to date.'), sa.Column('net_generation_mwh', sa.Float(), nullable=True, comment='Net electricity generation for the specified period in megawatt-hours (MWh).'), sa.Column('data_maturity', sa.Text(), nullable=True, comment='Level of maturity of the data record. Some data sources report less-than-final data. PUDL sometimes includes this data, but use at your own risk.'), - sa.ForeignKeyConstraint(['data_maturity'], ['data_maturities.code'], ), - sa.ForeignKeyConstraint(['energy_source_code'], ['energy_sources_eia.code'], ), - sa.ForeignKeyConstraint(['fuel_type_code_aer'], ['fuel_types_aer_eia.code'], ), - sa.ForeignKeyConstraint(['plant_id_eia'], ['plants_entity_eia.plant_id_eia'], ), - sa.ForeignKeyConstraint(['prime_mover_code'], ['prime_movers_eia.code'], ), - sa.PrimaryKeyConstraint('plant_id_eia', 'report_date', 'prime_mover_code', 'energy_source_code') + sa.ForeignKeyConstraint(['data_maturity'], ['data_maturities.code'], name=op.f('fk_generation_fuel_eia923_data_maturity_data_maturities')), + sa.ForeignKeyConstraint(['energy_source_code'], ['energy_sources_eia.code'], name=op.f('fk_generation_fuel_eia923_energy_source_code_energy_sources_eia')), + sa.ForeignKeyConstraint(['fuel_type_code_aer'], ['fuel_types_aer_eia.code'], name=op.f('fk_generation_fuel_eia923_fuel_type_code_aer_fuel_types_aer_eia')), + sa.ForeignKeyConstraint(['plant_id_eia'], ['plants_entity_eia.plant_id_eia'], name=op.f('fk_generation_fuel_eia923_plant_id_eia_plants_entity_eia')), + sa.ForeignKeyConstraint(['prime_mover_code'], ['prime_movers_eia.code'], name=op.f('fk_generation_fuel_eia923_prime_mover_code_prime_movers_eia')), + sa.PrimaryKeyConstraint('plant_id_eia', 'report_date', 'prime_mover_code', 'energy_source_code', name=op.f('pk_generation_fuel_eia923')) ) op.create_table('generation_fuel_nuclear_eia923', sa.Column('plant_id_eia', sa.Integer(), nullable=False, comment='The unique six-digit facility identification number, also called an ORISPL, assigned by the Energy Information Administration.'), sa.Column('report_date', sa.Date(), nullable=False, comment='Date reported.'), sa.Column('nuclear_unit_id', sa.Text(), nullable=False, comment='For nuclear plants only, the unit number .One digit numeric. Nuclear plants are the only type of plants for which data are shown explicitly at the generating unit level.'), - sa.Column('energy_source_code', sa.Text(), nullable=False, comment='The fuel code associated with the fuel receipt. Two or three character alphanumeric.'), + sa.Column('energy_source_code', sa.Text(), nullable=False, comment='A 2-3 letter code indicating the energy source (e.g. fuel type) associated with the record.'), sa.Column('fuel_type_code_pudl', sa.Enum('coal', 'gas', 'hydro', 'nuclear', 'oil', 'other', 'solar', 'waste', 'wind'), nullable=True, comment='Simplified fuel type code used in PUDL'), sa.Column('fuel_type_code_aer', sa.Text(), nullable=True, comment='A partial aggregation of the reported fuel type codes into larger categories used by EIA in, for example, the Annual Energy Review (AER). Two or three letter alphanumeric.'), sa.Column('prime_mover_code', sa.Text(), nullable=False, comment='Code for the type of prime mover (e.g. CT, CG)'), @@ -737,12 +785,12 @@ def upgrade() -> None: sa.Column('fuel_consumed_for_electricity_mmbtu', sa.Float(), nullable=True, comment='Total consumption of fuel to produce electricity, in physical unit, year to date.'), sa.Column('net_generation_mwh', sa.Float(), nullable=True, comment='Net electricity generation for the specified period in megawatt-hours (MWh).'), sa.Column('data_maturity', sa.Text(), nullable=True, comment='Level of maturity of the data record. Some data sources report less-than-final data. PUDL sometimes includes this data, but use at your own risk.'), - sa.ForeignKeyConstraint(['data_maturity'], ['data_maturities.code'], ), - sa.ForeignKeyConstraint(['energy_source_code'], ['energy_sources_eia.code'], ), - sa.ForeignKeyConstraint(['fuel_type_code_aer'], ['fuel_types_aer_eia.code'], ), - sa.ForeignKeyConstraint(['plant_id_eia'], ['plants_entity_eia.plant_id_eia'], ), - sa.ForeignKeyConstraint(['prime_mover_code'], ['prime_movers_eia.code'], ), - sa.PrimaryKeyConstraint('plant_id_eia', 'report_date', 'nuclear_unit_id', 'energy_source_code', 'prime_mover_code') + sa.ForeignKeyConstraint(['data_maturity'], ['data_maturities.code'], name=op.f('fk_generation_fuel_nuclear_eia923_data_maturity_data_maturities')), + sa.ForeignKeyConstraint(['energy_source_code'], ['energy_sources_eia.code'], name=op.f('fk_generation_fuel_nuclear_eia923_energy_source_code_energy_sources_eia')), + sa.ForeignKeyConstraint(['fuel_type_code_aer'], ['fuel_types_aer_eia.code'], name=op.f('fk_generation_fuel_nuclear_eia923_fuel_type_code_aer_fuel_types_aer_eia')), + sa.ForeignKeyConstraint(['plant_id_eia'], ['plants_entity_eia.plant_id_eia'], name=op.f('fk_generation_fuel_nuclear_eia923_plant_id_eia_plants_entity_eia')), + sa.ForeignKeyConstraint(['prime_mover_code'], ['prime_movers_eia.code'], name=op.f('fk_generation_fuel_nuclear_eia923_prime_mover_code_prime_movers_eia')), + sa.PrimaryKeyConstraint('plant_id_eia', 'report_date', 'nuclear_unit_id', 'energy_source_code', 'prime_mover_code', name=op.f('pk_generation_fuel_nuclear_eia923')) ) op.create_table('generators_entity_eia', sa.Column('plant_id_eia', sa.Integer(), nullable=False, comment='The unique six-digit facility identification number, also called an ORISPL, assigned by the Energy Information Administration.'), @@ -765,8 +813,8 @@ def upgrade() -> None: sa.Column('original_planned_generator_operating_date', sa.Date(), nullable=True, comment='The date the generator was originally scheduled to be operational'), sa.Column('operating_switch', sa.Text(), nullable=True, comment='Indicates whether the fuel switching generator can switch when operating'), sa.Column('previously_canceled', sa.Boolean(), nullable=True, comment='Indicates whether the generator was previously reported as indefinitely postponed or canceled'), - sa.ForeignKeyConstraint(['plant_id_eia'], ['plants_entity_eia.plant_id_eia'], ), - sa.PrimaryKeyConstraint('plant_id_eia', 'generator_id') + sa.ForeignKeyConstraint(['plant_id_eia'], ['plants_entity_eia.plant_id_eia'], name=op.f('fk_generators_entity_eia_plant_id_eia_plants_entity_eia')), + sa.PrimaryKeyConstraint('plant_id_eia', 'generator_id', name=op.f('pk_generators_entity_eia')) ) op.create_table('green_pricing_eia861', sa.Column('customer_class', sa.Enum('commercial', 'industrial', 'direct_connection', 'other', 'residential', 'total', 'transportation'), nullable=True, comment='High level categorization of customer type.'), @@ -780,7 +828,17 @@ def upgrade() -> None: sa.Column('utility_id_eia', sa.Integer(), nullable=True, comment='The EIA Utility Identification number.'), sa.Column('utility_name_eia', sa.Text(), nullable=True, comment='The name of the utility.'), sa.Column('data_maturity', sa.Text(), nullable=True, comment='Level of maturity of the data record. Some data sources report less-than-final data. PUDL sometimes includes this data, but use at your own risk.'), - sa.ForeignKeyConstraint(['data_maturity'], ['data_maturities.code'], ) + sa.ForeignKeyConstraint(['data_maturity'], ['data_maturities.code'], name=op.f('fk_green_pricing_eia861_data_maturity_data_maturities')) + ) + op.create_table('heat_rate_by_unit_monthly', + sa.Column('report_date', sa.Date(), nullable=False, comment='Date reported.'), + sa.Column('plant_id_eia', sa.Integer(), nullable=False, comment='The unique six-digit facility identification number, also called an ORISPL, assigned by the Energy Information Administration.'), + sa.Column('unit_id_pudl', sa.Integer(), nullable=False, comment='Dynamically assigned PUDL unit id. WARNING: This ID is not guaranteed to be static long term as the input data and algorithm may evolve over time.'), + sa.Column('net_generation_mwh', sa.Float(), nullable=True, comment='Net electricity generation for the specified period in megawatt-hours (MWh).'), + sa.Column('fuel_consumed_for_electricity_mmbtu', sa.Float(), nullable=True, comment='Total consumption of fuel to produce electricity, in physical unit, year to date.'), + sa.Column('heat_rate_mmbtu_mwh', sa.Float(), nullable=True, comment='Fuel content per unit of electricity generated. Coming from MCOE calculation.'), + sa.ForeignKeyConstraint(['plant_id_eia'], ['plants_entity_eia.plant_id_eia'], name=op.f('fk_heat_rate_by_unit_monthly_plant_id_eia_plants_entity_eia')), + sa.PrimaryKeyConstraint('report_date', 'plant_id_eia', 'unit_id_pudl', name=op.f('pk_heat_rate_by_unit_monthly')) ) op.create_table('mergers_eia861', sa.Column('entity_type', sa.Text(), nullable=True, comment='Entity type of principal owner.'), @@ -797,7 +855,7 @@ def upgrade() -> None: sa.Column('zip_code', sa.Text(), nullable=True, comment='Five digit US Zip Code.'), sa.Column('zip_code_4', sa.Text(), nullable=True, comment='Four digit US Zip Code suffix.'), sa.Column('data_maturity', sa.Text(), nullable=True, comment='Level of maturity of the data record. Some data sources report less-than-final data. PUDL sometimes includes this data, but use at your own risk.'), - sa.ForeignKeyConstraint(['data_maturity'], ['data_maturities.code'], ) + sa.ForeignKeyConstraint(['data_maturity'], ['data_maturities.code'], name=op.f('fk_mergers_eia861_data_maturity_data_maturities')) ) op.create_table('net_metering_customer_fuel_class_eia861', sa.Column('balancing_authority_code_eia', sa.Text(), nullable=True, comment='EIA short code identifying a balancing authority.'), @@ -813,7 +871,7 @@ def upgrade() -> None: sa.Column('utility_id_eia', sa.Integer(), nullable=True, comment='The EIA Utility Identification number.'), sa.Column('utility_name_eia', sa.Text(), nullable=True, comment='The name of the utility.'), sa.Column('data_maturity', sa.Text(), nullable=True, comment='Level of maturity of the data record. Some data sources report less-than-final data. PUDL sometimes includes this data, but use at your own risk.'), - sa.ForeignKeyConstraint(['data_maturity'], ['data_maturities.code'], ) + sa.ForeignKeyConstraint(['data_maturity'], ['data_maturities.code'], name=op.f('fk_net_metering_customer_fuel_class_eia861_data_maturity_data_maturities')) ) op.create_table('net_metering_misc_eia861', sa.Column('balancing_authority_code_eia', sa.Text(), nullable=True, comment='EIA short code identifying a balancing authority.'), @@ -822,7 +880,7 @@ def upgrade() -> None: sa.Column('state', sa.Text(), nullable=True, comment='Two letter US state abbreviation.'), sa.Column('utility_id_eia', sa.Integer(), nullable=True, comment='The EIA Utility Identification number.'), sa.Column('data_maturity', sa.Text(), nullable=True, comment='Level of maturity of the data record. Some data sources report less-than-final data. PUDL sometimes includes this data, but use at your own risk.'), - sa.ForeignKeyConstraint(['data_maturity'], ['data_maturities.code'], ) + sa.ForeignKeyConstraint(['data_maturity'], ['data_maturities.code'], name=op.f('fk_net_metering_misc_eia861_data_maturity_data_maturities')) ) op.create_table('non_net_metering_customer_fuel_class_eia861', sa.Column('balancing_authority_code_eia', sa.Text(), nullable=True, comment='EIA short code identifying a balancing authority.'), @@ -834,7 +892,7 @@ def upgrade() -> None: sa.Column('utility_id_eia', sa.Integer(), nullable=True, comment='The EIA Utility Identification number.'), sa.Column('utility_name_eia', sa.Text(), nullable=True, comment='The name of the utility.'), sa.Column('data_maturity', sa.Text(), nullable=True, comment='Level of maturity of the data record. Some data sources report less-than-final data. PUDL sometimes includes this data, but use at your own risk.'), - sa.ForeignKeyConstraint(['data_maturity'], ['data_maturities.code'], ) + sa.ForeignKeyConstraint(['data_maturity'], ['data_maturities.code'], name=op.f('fk_non_net_metering_customer_fuel_class_eia861_data_maturity_data_maturities')) ) op.create_table('non_net_metering_misc_eia861', sa.Column('backup_capacity_mw', sa.Float(), nullable=True), @@ -846,7 +904,7 @@ def upgrade() -> None: sa.Column('utility_id_eia', sa.Integer(), nullable=True, comment='The EIA Utility Identification number.'), sa.Column('utility_owned_capacity_mw', sa.Float(), nullable=True), sa.Column('data_maturity', sa.Text(), nullable=True, comment='Level of maturity of the data record. Some data sources report less-than-final data. PUDL sometimes includes this data, but use at your own risk.'), - sa.ForeignKeyConstraint(['data_maturity'], ['data_maturities.code'], ) + sa.ForeignKeyConstraint(['data_maturity'], ['data_maturities.code'], name=op.f('fk_non_net_metering_misc_eia861_data_maturity_data_maturities')) ) op.create_table('operational_data_misc_eia861', sa.Column('consumed_by_facility_mwh', sa.Float(), nullable=True), @@ -877,7 +935,7 @@ def upgrade() -> None: sa.Column('wholesale_power_purchases_mwh', sa.Float(), nullable=True), sa.Column('winter_peak_demand_mw', sa.Float(), nullable=True), sa.Column('data_maturity', sa.Text(), nullable=True, comment='Level of maturity of the data record. Some data sources report less-than-final data. PUDL sometimes includes this data, but use at your own risk.'), - sa.ForeignKeyConstraint(['data_maturity'], ['data_maturities.code'], ) + sa.ForeignKeyConstraint(['data_maturity'], ['data_maturities.code'], name=op.f('fk_operational_data_misc_eia861_data_maturity_data_maturities')) ) op.create_table('operational_data_revenue_eia861', sa.Column('nerc_region', sa.Enum('BASN', 'CALN', 'CALS', 'DSW', 'ASCC', 'ISONE', 'ERCOT', 'NORW', 'NYISO', 'PJM', 'ROCK', 'ECAR', 'FRCC', 'HICC', 'MAAC', 'MAIN', 'MAPP', 'MRO', 'NPCC', 'RFC', 'SERC', 'SPP', 'TRE', 'WECC', 'WSCC', 'MISO', 'ECAR_MAAC', 'MAPP_WECC', 'RFC_SERC', 'SPP_WECC', 'MRO_WECC', 'ERCOT_SPP', 'SPP_TRE', 'ERCOT_TRE', 'MISO_TRE', 'VI', 'GU', 'PR', 'AS', 'UNK'), nullable=True, comment='NERC region in which the plant is located'), @@ -887,14 +945,14 @@ def upgrade() -> None: sa.Column('state', sa.Text(), nullable=True, comment='Two letter US state abbreviation.'), sa.Column('utility_id_eia', sa.Integer(), nullable=True, comment='The EIA Utility Identification number.'), sa.Column('data_maturity', sa.Text(), nullable=True, comment='Level of maturity of the data record. Some data sources report less-than-final data. PUDL sometimes includes this data, but use at your own risk.'), - sa.ForeignKeyConstraint(['data_maturity'], ['data_maturities.code'], ) + sa.ForeignKeyConstraint(['data_maturity'], ['data_maturities.code'], name=op.f('fk_operational_data_revenue_eia861_data_maturity_data_maturities')) ) op.create_table('plants_eia', sa.Column('plant_id_eia', sa.Integer(), nullable=False, comment='The unique six-digit facility identification number, also called an ORISPL, assigned by the Energy Information Administration.'), sa.Column('plant_name_eia', sa.Text(), nullable=True, comment='Plant name.'), sa.Column('plant_id_pudl', sa.Integer(), nullable=True, comment='A manually assigned PUDL plant ID. May not be constant over time.'), - sa.ForeignKeyConstraint(['plant_id_pudl'], ['plants_pudl.plant_id_pudl'], ), - sa.PrimaryKeyConstraint('plant_id_eia') + sa.ForeignKeyConstraint(['plant_id_pudl'], ['plants_pudl.plant_id_pudl'], name=op.f('fk_plants_eia_plant_id_pudl_plants_pudl')), + sa.PrimaryKeyConstraint('plant_id_eia', name=op.f('pk_plants_eia')) ) op.create_table('reliability_eia861', sa.Column('caidi_w_major_event_days_minus_loss_of_service_minutes', sa.Float(), nullable=True), @@ -919,8 +977,8 @@ def upgrade() -> None: sa.Column('utility_id_eia', sa.Integer(), nullable=True, comment='The EIA Utility Identification number.'), sa.Column('utility_name_eia', sa.Text(), nullable=True, comment='The name of the utility.'), sa.Column('data_maturity', sa.Text(), nullable=True, comment='Level of maturity of the data record. Some data sources report less-than-final data. PUDL sometimes includes this data, but use at your own risk.'), - sa.ForeignKeyConstraint(['data_maturity'], ['data_maturities.code'], ), - sa.ForeignKeyConstraint(['momentary_interruption_definition'], ['momentary_interruptions_eia.code'], ) + sa.ForeignKeyConstraint(['data_maturity'], ['data_maturities.code'], name=op.f('fk_reliability_eia861_data_maturity_data_maturities')), + sa.ForeignKeyConstraint(['momentary_interruption_definition'], ['momentary_interruptions_eia.code'], name=op.f('fk_reliability_eia861_momentary_interruption_definition_momentary_interruptions_eia')) ) op.create_table('sales_eia861', sa.Column('utility_id_eia', sa.Integer(), nullable=False, comment='The EIA Utility Identification number.'), @@ -938,8 +996,8 @@ def upgrade() -> None: sa.Column('sales_mwh', sa.Float(), nullable=True, comment='Quantity of electricity sold in MWh.'), sa.Column('sales_revenue', sa.Float(), nullable=True, comment='Revenue from electricity sold.'), sa.Column('data_maturity', sa.Text(), nullable=True, comment='Level of maturity of the data record. Some data sources report less-than-final data. PUDL sometimes includes this data, but use at your own risk.'), - sa.ForeignKeyConstraint(['data_maturity'], ['data_maturities.code'], ), - sa.PrimaryKeyConstraint('utility_id_eia', 'state', 'report_date', 'balancing_authority_code_eia', 'customer_class', 'business_model', 'service_type') + sa.ForeignKeyConstraint(['data_maturity'], ['data_maturities.code'], name=op.f('fk_sales_eia861_data_maturity_data_maturities')), + sa.PrimaryKeyConstraint('utility_id_eia', 'state', 'report_date', 'balancing_authority_code_eia', 'customer_class', 'business_model', 'service_type', name=op.f('pk_sales_eia861')) ) op.create_table('service_territory_eia861', sa.Column('county', sa.Text(), nullable=True, comment='County name.'), @@ -951,15 +1009,35 @@ def upgrade() -> None: sa.Column('state_id_fips', sa.Text(), nullable=True, comment='Two digit state FIPS code.'), sa.Column('county_id_fips', sa.Text(), nullable=False, comment='County ID from the Federal Information Processing Standard Publication 6-4.'), sa.Column('data_maturity', sa.Text(), nullable=True, comment='Level of maturity of the data record. Some data sources report less-than-final data. PUDL sometimes includes this data, but use at your own risk.'), - sa.ForeignKeyConstraint(['data_maturity'], ['data_maturities.code'], ), - sa.PrimaryKeyConstraint('report_date', 'utility_id_eia', 'county_id_fips') + sa.ForeignKeyConstraint(['data_maturity'], ['data_maturities.code'], name=op.f('fk_service_territory_eia861_data_maturity_data_maturities')), + sa.PrimaryKeyConstraint('report_date', 'utility_id_eia', 'county_id_fips', name=op.f('pk_service_territory_eia861')) + ) + op.create_table('summarized_demand_ferc714', + sa.Column('report_date', sa.Date(), nullable=False, comment='Date reported.'), + sa.Column('respondent_id_ferc714', sa.Integer(), nullable=False), + sa.Column('demand_annual_mwh', sa.Float(), nullable=True), + sa.Column('population', sa.Float(), nullable=True, comment='County population, sourced from Census DP1 data.'), + sa.Column('area_km2', sa.Float(), nullable=True, comment='County area in km2.'), + sa.Column('population_density_km2', sa.Float(), nullable=True, comment='Average population per sq. km area of a service territory.'), + sa.Column('demand_annual_per_capita_mwh', sa.Float(), nullable=True, comment='Per-capita annual demand, averaged using Census county-level population estimates.'), + sa.Column('demand_density_mwh_km2', sa.Float(), nullable=True, comment='Annual demand per km2 of a given service territory.'), + sa.Column('eia_code', sa.Integer(), nullable=True), + sa.Column('respondent_type', sa.Enum('utility', 'balancing_authority'), nullable=True), + sa.Column('respondent_name_ferc714', sa.Text(), nullable=True), + sa.Column('balancing_authority_id_eia', sa.Integer(), nullable=True, comment='EIA balancing authority ID. This is often (but not always!) the same as the utility ID associated with the same legal entity.'), + sa.Column('balancing_authority_code_eia', sa.Text(), nullable=True, comment='EIA short code identifying a balancing authority.'), + sa.Column('balancing_authority_name_eia', sa.Text(), nullable=True, comment='Name of the balancing authority.'), + sa.Column('utility_id_eia', sa.Integer(), nullable=True, comment='The EIA Utility Identification number.'), + sa.Column('utility_name_eia', sa.Text(), nullable=True, comment='The name of the utility.'), + sa.ForeignKeyConstraint(['respondent_id_ferc714'], ['respondent_id_ferc714.respondent_id_ferc714'], name=op.f('fk_summarized_demand_ferc714_respondent_id_ferc714_respondent_id_ferc714')), + sa.PrimaryKeyConstraint('respondent_id_ferc714', 'report_date', name=op.f('pk_summarized_demand_ferc714')) ) op.create_table('utilities_eia', sa.Column('utility_id_eia', sa.Integer(), nullable=False, comment='The EIA Utility Identification number.'), sa.Column('utility_name_eia', sa.Text(), nullable=True, comment='The name of the utility.'), sa.Column('utility_id_pudl', sa.Integer(), nullable=True, comment='A manually assigned PUDL utility ID. May not be stable over time.'), - sa.ForeignKeyConstraint(['utility_id_pudl'], ['utilities_pudl.utility_id_pudl'], ), - sa.PrimaryKeyConstraint('utility_id_eia') + sa.ForeignKeyConstraint(['utility_id_pudl'], ['utilities_pudl.utility_id_pudl'], name=op.f('fk_utilities_eia_utility_id_pudl_utilities_pudl')), + sa.PrimaryKeyConstraint('utility_id_eia', name=op.f('pk_utilities_eia')) ) op.create_table('utilities_eia860', sa.Column('utility_id_eia', sa.Integer(), nullable=False, comment='The EIA Utility Identification number.'), @@ -987,16 +1065,16 @@ def upgrade() -> None: sa.Column('phone_number_2', sa.Text(), nullable=True, comment='Phone number for utility contact 2.'), sa.Column('phone_extension_2', sa.Text(), nullable=True, comment='Phone extension for utility contact 2'), sa.Column('data_maturity', sa.Text(), nullable=True, comment='Level of maturity of the data record. Some data sources report less-than-final data. PUDL sometimes includes this data, but use at your own risk.'), - sa.ForeignKeyConstraint(['data_maturity'], ['data_maturities.code'], ), - sa.ForeignKeyConstraint(['utility_id_eia'], ['utilities_entity_eia.utility_id_eia'], ), - sa.PrimaryKeyConstraint('utility_id_eia', 'report_date') + sa.ForeignKeyConstraint(['data_maturity'], ['data_maturities.code'], name=op.f('fk_utilities_eia860_data_maturity_data_maturities')), + sa.ForeignKeyConstraint(['utility_id_eia'], ['utilities_entity_eia.utility_id_eia'], name=op.f('fk_utilities_eia860_utility_id_eia_utilities_entity_eia')), + sa.PrimaryKeyConstraint('utility_id_eia', 'report_date', name=op.f('pk_utilities_eia860')) ) op.create_table('utilities_ferc1', sa.Column('utility_id_ferc1', sa.Integer(), nullable=False, comment='PUDL-assigned utility ID, identifying a FERC1 utility. This is an auto-incremented ID and is not expected to be stable from year to year.'), sa.Column('utility_name_ferc1', sa.Text(), nullable=True, comment='Name of the responding utility, as it is reported in FERC Form 1. For human readability only.'), sa.Column('utility_id_pudl', sa.Integer(), nullable=True, comment='A manually assigned PUDL utility ID. May not be stable over time.'), - sa.ForeignKeyConstraint(['utility_id_pudl'], ['utilities_pudl.utility_id_pudl'], ), - sa.PrimaryKeyConstraint('utility_id_ferc1') + sa.ForeignKeyConstraint(['utility_id_pudl'], ['utilities_pudl.utility_id_pudl'], name=op.f('fk_utilities_ferc1_utility_id_pudl_utilities_pudl')), + sa.PrimaryKeyConstraint('utility_id_ferc1', name=op.f('pk_utilities_ferc1')) ) op.create_table('utility_data_misc_eia861', sa.Column('alternative_fuel_vehicle_2_activity', sa.Boolean(), nullable=True), @@ -1018,7 +1096,7 @@ def upgrade() -> None: sa.Column('utility_name_eia', sa.Text(), nullable=True, comment='The name of the utility.'), sa.Column('wholesale_marketing_activity', sa.Boolean(), nullable=True), sa.Column('data_maturity', sa.Text(), nullable=True, comment='Level of maturity of the data record. Some data sources report less-than-final data. PUDL sometimes includes this data, but use at your own risk.'), - sa.ForeignKeyConstraint(['data_maturity'], ['data_maturities.code'], ) + sa.ForeignKeyConstraint(['data_maturity'], ['data_maturities.code'], name=op.f('fk_utility_data_misc_eia861_data_maturity_data_maturities')) ) op.create_table('utility_data_nerc_eia861', sa.Column('nerc_region', sa.Enum('BASN', 'CALN', 'CALS', 'DSW', 'ASCC', 'ISONE', 'ERCOT', 'NORW', 'NYISO', 'PJM', 'ROCK', 'ECAR', 'FRCC', 'HICC', 'MAAC', 'MAIN', 'MAPP', 'MRO', 'NPCC', 'RFC', 'SERC', 'SPP', 'TRE', 'WECC', 'WSCC', 'MISO', 'ECAR_MAAC', 'MAPP_WECC', 'RFC_SERC', 'SPP_WECC', 'MRO_WECC', 'ERCOT_SPP', 'SPP_TRE', 'ERCOT_TRE', 'MISO_TRE', 'VI', 'GU', 'PR', 'AS', 'UNK'), nullable=True, comment='NERC region in which the plant is located'), @@ -1027,7 +1105,7 @@ def upgrade() -> None: sa.Column('state', sa.Text(), nullable=True, comment='Two letter US state abbreviation.'), sa.Column('utility_id_eia', sa.Integer(), nullable=True, comment='The EIA Utility Identification number.'), sa.Column('data_maturity', sa.Text(), nullable=True, comment='Level of maturity of the data record. Some data sources report less-than-final data. PUDL sometimes includes this data, but use at your own risk.'), - sa.ForeignKeyConstraint(['data_maturity'], ['data_maturities.code'], ) + sa.ForeignKeyConstraint(['data_maturity'], ['data_maturities.code'], name=op.f('fk_utility_data_nerc_eia861_data_maturity_data_maturities')) ) op.create_table('utility_data_rto_eia861', sa.Column('nerc_region', sa.Enum('BASN', 'CALN', 'CALS', 'DSW', 'ASCC', 'ISONE', 'ERCOT', 'NORW', 'NYISO', 'PJM', 'ROCK', 'ECAR', 'FRCC', 'HICC', 'MAAC', 'MAIN', 'MAPP', 'MRO', 'NPCC', 'RFC', 'SERC', 'SPP', 'TRE', 'WECC', 'WSCC', 'MISO', 'ECAR_MAAC', 'MAPP_WECC', 'RFC_SERC', 'SPP_WECC', 'MRO_WECC', 'ERCOT_SPP', 'SPP_TRE', 'ERCOT_TRE', 'MISO_TRE', 'VI', 'GU', 'PR', 'AS', 'UNK'), nullable=True, comment='NERC region in which the plant is located'), @@ -1036,14 +1114,14 @@ def upgrade() -> None: sa.Column('state', sa.Text(), nullable=True, comment='Two letter US state abbreviation.'), sa.Column('utility_id_eia', sa.Integer(), nullable=True, comment='The EIA Utility Identification number.'), sa.Column('data_maturity', sa.Text(), nullable=True, comment='Level of maturity of the data record. Some data sources report less-than-final data. PUDL sometimes includes this data, but use at your own risk.'), - sa.ForeignKeyConstraint(['data_maturity'], ['data_maturities.code'], ) + sa.ForeignKeyConstraint(['data_maturity'], ['data_maturities.code'], name=op.f('fk_utility_data_rto_eia861_data_maturity_data_maturities')) ) op.create_table('utility_plant_assn', sa.Column('utility_id_pudl', sa.Integer(), nullable=False, comment='A manually assigned PUDL utility ID. May not be stable over time.'), sa.Column('plant_id_pudl', sa.Integer(), nullable=False, comment='A manually assigned PUDL plant ID. May not be constant over time.'), - sa.ForeignKeyConstraint(['plant_id_pudl'], ['plants_pudl.plant_id_pudl'], ), - sa.ForeignKeyConstraint(['utility_id_pudl'], ['utilities_pudl.utility_id_pudl'], ), - sa.PrimaryKeyConstraint('utility_id_pudl', 'plant_id_pudl') + sa.ForeignKeyConstraint(['plant_id_pudl'], ['plants_pudl.plant_id_pudl'], name=op.f('fk_utility_plant_assn_plant_id_pudl_plants_pudl')), + sa.ForeignKeyConstraint(['utility_id_pudl'], ['utilities_pudl.utility_id_pudl'], name=op.f('fk_utility_plant_assn_utility_id_pudl_utilities_pudl')), + sa.PrimaryKeyConstraint('utility_id_pudl', 'plant_id_pudl', name=op.f('pk_utility_plant_assn')) ) op.create_table('balance_sheet_assets_ferc1', sa.Column('utility_id_ferc1', sa.Integer(), nullable=False, comment='PUDL-assigned utility ID, identifying a FERC1 utility. This is an auto-incremented ID and is not expected to be stable from year to year.'), @@ -1055,8 +1133,8 @@ def upgrade() -> None: sa.Column('ferc_account', sa.Text(), nullable=True, comment="Actual FERC Account number (e.g. '359.1') if available, or a PUDL assigned ID when FERC accounts have been split or combined in reporting."), sa.Column('balance', sa.Text(), nullable=True, comment='Indication of whether a column is a credit or debit, as reported in the XBRL taxonomy.'), sa.Column('row_type_xbrl', sa.Enum('calculated_value', 'reported_value', 'correction'), nullable=True, comment='Indicates whether the value reported in the row is calculated, or uniquely reported within the table.'), - sa.ForeignKeyConstraint(['utility_id_ferc1'], ['utilities_ferc1.utility_id_ferc1'], ), - sa.PrimaryKeyConstraint('utility_id_ferc1', 'report_year', 'asset_type') + sa.ForeignKeyConstraint(['utility_id_ferc1'], ['utilities_ferc1.utility_id_ferc1'], name=op.f('fk_balance_sheet_assets_ferc1_utility_id_ferc1_utilities_ferc1')), + sa.PrimaryKeyConstraint('utility_id_ferc1', 'report_year', 'asset_type', name=op.f('pk_balance_sheet_assets_ferc1')) ) op.create_table('balance_sheet_liabilities_ferc1', sa.Column('record_id', sa.Text(), nullable=True, comment='Identifier indicating original FERC Form 1 source record. format: {table_name}_{report_year}_{report_prd}_{respondent_id}_{spplmnt_num}_{row_number}. Unique within FERC Form 1 DB tables which are not row-mapped.'), @@ -1068,13 +1146,13 @@ def upgrade() -> None: sa.Column('balance', sa.Text(), nullable=True, comment='Indication of whether a column is a credit or debit, as reported in the XBRL taxonomy.'), sa.Column('ferc_account', sa.Text(), nullable=True, comment="Actual FERC Account number (e.g. '359.1') if available, or a PUDL assigned ID when FERC accounts have been split or combined in reporting."), sa.Column('row_type_xbrl', sa.Enum('calculated_value', 'reported_value', 'correction'), nullable=True, comment='Indicates whether the value reported in the row is calculated, or uniquely reported within the table.'), - sa.ForeignKeyConstraint(['utility_id_ferc1'], ['utilities_ferc1.utility_id_ferc1'], ), - sa.PrimaryKeyConstraint('utility_id_ferc1', 'report_year', 'liability_type') + sa.ForeignKeyConstraint(['utility_id_ferc1'], ['utilities_ferc1.utility_id_ferc1'], name=op.f('fk_balance_sheet_liabilities_ferc1_utility_id_ferc1_utilities_ferc1')), + sa.PrimaryKeyConstraint('utility_id_ferc1', 'report_year', 'liability_type', name=op.f('pk_balance_sheet_liabilities_ferc1')) ) op.create_table('boiler_fuel_eia923', sa.Column('plant_id_eia', sa.Integer(), nullable=False, comment='The unique six-digit facility identification number, also called an ORISPL, assigned by the Energy Information Administration.'), sa.Column('boiler_id', sa.Text(), nullable=False, comment='Alphanumeric boiler ID.'), - sa.Column('energy_source_code', sa.Text(), nullable=False, comment='The fuel code associated with the fuel receipt. Two or three character alphanumeric.'), + sa.Column('energy_source_code', sa.Text(), nullable=False, comment='A 2-3 letter code indicating the energy source (e.g. fuel type) associated with the record.'), sa.Column('prime_mover_code', sa.Text(), nullable=False, comment='Code for the type of prime mover (e.g. CT, CG)'), sa.Column('fuel_type_code_pudl', sa.Enum('coal', 'gas', 'hydro', 'nuclear', 'oil', 'other', 'solar', 'waste', 'wind'), nullable=True, comment='Simplified fuel type code used in PUDL'), sa.Column('report_date', sa.Date(), nullable=False, comment='Date reported.'), @@ -1082,10 +1160,20 @@ def upgrade() -> None: sa.Column('fuel_mmbtu_per_unit', sa.Float(), nullable=True, comment='Heat content of the fuel in millions of Btus per physical unit.'), sa.Column('sulfur_content_pct', sa.Float(), nullable=True, comment='Sulfur content percentage by weight to the nearest 0.01 percent.'), sa.Column('ash_content_pct', sa.Float(), nullable=True, comment='Ash content percentage by weight to the nearest 0.1 percent.'), - sa.ForeignKeyConstraint(['energy_source_code'], ['energy_sources_eia.code'], ), - sa.ForeignKeyConstraint(['plant_id_eia', 'boiler_id'], ['boilers_entity_eia.plant_id_eia', 'boilers_entity_eia.boiler_id'], ), - sa.ForeignKeyConstraint(['prime_mover_code'], ['prime_movers_eia.code'], ), - sa.PrimaryKeyConstraint('plant_id_eia', 'boiler_id', 'energy_source_code', 'prime_mover_code', 'report_date') + sa.ForeignKeyConstraint(['energy_source_code'], ['energy_sources_eia.code'], name=op.f('fk_boiler_fuel_eia923_energy_source_code_energy_sources_eia')), + sa.ForeignKeyConstraint(['plant_id_eia', 'boiler_id'], ['boilers_entity_eia.plant_id_eia', 'boilers_entity_eia.boiler_id'], name=op.f('fk_boiler_fuel_eia923_plant_id_eia_boilers_entity_eia')), + sa.ForeignKeyConstraint(['prime_mover_code'], ['prime_movers_eia.code'], name=op.f('fk_boiler_fuel_eia923_prime_mover_code_prime_movers_eia')), + sa.PrimaryKeyConstraint('plant_id_eia', 'boiler_id', 'energy_source_code', 'prime_mover_code', 'report_date', name=op.f('pk_boiler_fuel_eia923')) + ) + op.create_table('capacity_factor_by_generator_monthly', + sa.Column('report_date', sa.Date(), nullable=False, comment='Date reported.'), + sa.Column('plant_id_eia', sa.Integer(), nullable=False, comment='The unique six-digit facility identification number, also called an ORISPL, assigned by the Energy Information Administration.'), + sa.Column('generator_id', sa.Text(), nullable=False, comment='Generator ID is usually numeric, but sometimes includes letters. Make sure you treat it as a string!'), + sa.Column('net_generation_mwh', sa.Float(), nullable=True, comment='Net electricity generation for the specified period in megawatt-hours (MWh).'), + sa.Column('capacity_mw', sa.Float(), nullable=True, comment='Total installed (nameplate) capacity, in megawatts.'), + sa.Column('capacity_factor', sa.Float(), nullable=True, comment='Fraction of potential generation that was actually reported for a plant part.'), + sa.ForeignKeyConstraint(['plant_id_eia', 'generator_id'], ['generators_entity_eia.plant_id_eia', 'generators_entity_eia.generator_id'], name=op.f('fk_capacity_factor_by_generator_monthly_plant_id_eia_generators_entity_eia')), + sa.PrimaryKeyConstraint('report_date', 'plant_id_eia', 'generator_id', name=op.f('pk_capacity_factor_by_generator_monthly')) ) op.create_table('cash_flow_ferc1', sa.Column('record_id', sa.Text(), nullable=True, comment='Identifier indicating original FERC Form 1 source record. format: {table_name}_{report_year}_{report_prd}_{respondent_id}_{spplmnt_num}_{row_number}. Unique within FERC Form 1 DB tables which are not row-mapped.'), @@ -1095,8 +1183,8 @@ def upgrade() -> None: sa.Column('amount', sa.Float(), nullable=True, comment='Reported amount of dollars. This could be a balance or a change in value.'), sa.Column('balance', sa.Text(), nullable=True, comment='Indication of whether a column is a credit or debit, as reported in the XBRL taxonomy.'), sa.Column('row_type_xbrl', sa.Enum('calculated_value', 'reported_value', 'correction'), nullable=True, comment='Indicates whether the value reported in the row is calculated, or uniquely reported within the table.'), - sa.ForeignKeyConstraint(['utility_id_ferc1'], ['utilities_ferc1.utility_id_ferc1'], ), - sa.PrimaryKeyConstraint('utility_id_ferc1', 'report_year', 'amount_type') + sa.ForeignKeyConstraint(['utility_id_ferc1'], ['utilities_ferc1.utility_id_ferc1'], name=op.f('fk_cash_flow_ferc1_utility_id_ferc1_utilities_ferc1')), + sa.PrimaryKeyConstraint('utility_id_ferc1', 'report_year', 'amount_type', name=op.f('pk_cash_flow_ferc1')) ) op.create_table('denorm_balance_sheet_assets_ferc1', sa.Column('report_year', sa.Integer(), nullable=False, comment='Four-digit year in which the data was reported.'), @@ -1110,9 +1198,9 @@ def upgrade() -> None: sa.Column('ferc_account', sa.Text(), nullable=True, comment="Actual FERC Account number (e.g. '359.1') if available, or a PUDL assigned ID when FERC accounts have been split or combined in reporting."), sa.Column('row_type_xbrl', sa.Enum('calculated_value', 'reported_value', 'correction'), nullable=True, comment='Indicates whether the value reported in the row is calculated, or uniquely reported within the table.'), sa.Column('starting_balance', sa.Float(), nullable=True, comment='Account balance at beginning of year.'), - sa.ForeignKeyConstraint(['utility_id_ferc1'], ['utilities_ferc1.utility_id_ferc1'], ), - sa.ForeignKeyConstraint(['utility_id_pudl'], ['utilities_pudl.utility_id_pudl'], ), - sa.PrimaryKeyConstraint('utility_id_ferc1', 'report_year', 'asset_type') + sa.ForeignKeyConstraint(['utility_id_ferc1'], ['utilities_ferc1.utility_id_ferc1'], name=op.f('fk_denorm_balance_sheet_assets_ferc1_utility_id_ferc1_utilities_ferc1')), + sa.ForeignKeyConstraint(['utility_id_pudl'], ['utilities_pudl.utility_id_pudl'], name=op.f('fk_denorm_balance_sheet_assets_ferc1_utility_id_pudl_utilities_pudl')), + sa.PrimaryKeyConstraint('utility_id_ferc1', 'report_year', 'asset_type', name=op.f('pk_denorm_balance_sheet_assets_ferc1')) ) op.create_table('denorm_balance_sheet_liabilities_ferc1', sa.Column('record_id', sa.Text(), nullable=True, comment='Identifier indicating original FERC Form 1 source record. format: {table_name}_{report_year}_{report_prd}_{respondent_id}_{spplmnt_num}_{row_number}. Unique within FERC Form 1 DB tables which are not row-mapped.'), @@ -1126,9 +1214,9 @@ def upgrade() -> None: sa.Column('balance', sa.Text(), nullable=True, comment='Indication of whether a column is a credit or debit, as reported in the XBRL taxonomy.'), sa.Column('ferc_account', sa.Text(), nullable=True, comment="Actual FERC Account number (e.g. '359.1') if available, or a PUDL assigned ID when FERC accounts have been split or combined in reporting."), sa.Column('row_type_xbrl', sa.Enum('calculated_value', 'reported_value', 'correction'), nullable=True, comment='Indicates whether the value reported in the row is calculated, or uniquely reported within the table.'), - sa.ForeignKeyConstraint(['utility_id_ferc1'], ['utilities_ferc1.utility_id_ferc1'], ), - sa.ForeignKeyConstraint(['utility_id_pudl'], ['utilities_pudl.utility_id_pudl'], ), - sa.PrimaryKeyConstraint('utility_id_ferc1', 'report_year', 'liability_type') + sa.ForeignKeyConstraint(['utility_id_ferc1'], ['utilities_ferc1.utility_id_ferc1'], name=op.f('fk_denorm_balance_sheet_liabilities_ferc1_utility_id_ferc1_utilities_ferc1')), + sa.ForeignKeyConstraint(['utility_id_pudl'], ['utilities_pudl.utility_id_pudl'], name=op.f('fk_denorm_balance_sheet_liabilities_ferc1_utility_id_pudl_utilities_pudl')), + sa.PrimaryKeyConstraint('utility_id_ferc1', 'report_year', 'liability_type', name=op.f('pk_denorm_balance_sheet_liabilities_ferc1')) ) op.create_table('denorm_boiler_fuel_eia923', sa.Column('report_date', sa.Date(), nullable=False, comment='Date reported.'), @@ -1140,7 +1228,7 @@ def upgrade() -> None: sa.Column('utility_name_eia', sa.Text(), nullable=True, comment='The name of the utility.'), sa.Column('boiler_id', sa.Text(), nullable=False, comment='Alphanumeric boiler ID.'), sa.Column('unit_id_pudl', sa.Integer(), nullable=True, comment='Dynamically assigned PUDL unit id. WARNING: This ID is not guaranteed to be static long term as the input data and algorithm may evolve over time.'), - sa.Column('energy_source_code', sa.Text(), nullable=False, comment='The fuel code associated with the fuel receipt. Two or three character alphanumeric.'), + sa.Column('energy_source_code', sa.Text(), nullable=False, comment='A 2-3 letter code indicating the energy source (e.g. fuel type) associated with the record.'), sa.Column('prime_mover_code', sa.Text(), nullable=False, comment='Code for the type of prime mover (e.g. CT, CG)'), sa.Column('fuel_type_code_pudl', sa.Enum('coal', 'gas', 'hydro', 'nuclear', 'oil', 'other', 'solar', 'waste', 'wind'), nullable=True, comment='Simplified fuel type code used in PUDL'), sa.Column('fuel_consumed_units', sa.Float(), nullable=True, comment='Consumption of the fuel type in physical unit. Note: this is the total quantity consumed for both electricity and, in the case of combined heat and power plants, process steam production.'), @@ -1148,13 +1236,13 @@ def upgrade() -> None: sa.Column('fuel_consumed_mmbtu', sa.Float(), nullable=True, comment='Total consumption of fuel in physical unit, year to date. Note: this is the total quantity consumed for both electricity and, in the case of combined heat and power plants, process steam production.'), sa.Column('sulfur_content_pct', sa.Float(), nullable=True, comment='Sulfur content percentage by weight to the nearest 0.01 percent.'), sa.Column('ash_content_pct', sa.Float(), nullable=True, comment='Ash content percentage by weight to the nearest 0.1 percent.'), - sa.ForeignKeyConstraint(['energy_source_code'], ['energy_sources_eia.code'], ), - sa.ForeignKeyConstraint(['plant_id_eia', 'boiler_id'], ['boilers_entity_eia.plant_id_eia', 'boilers_entity_eia.boiler_id'], ), - sa.ForeignKeyConstraint(['plant_id_pudl'], ['plants_pudl.plant_id_pudl'], ), - sa.ForeignKeyConstraint(['prime_mover_code'], ['prime_movers_eia.code'], ), - sa.ForeignKeyConstraint(['utility_id_eia'], ['utilities_entity_eia.utility_id_eia'], ), - sa.ForeignKeyConstraint(['utility_id_pudl'], ['utilities_pudl.utility_id_pudl'], ), - sa.PrimaryKeyConstraint('plant_id_eia', 'boiler_id', 'energy_source_code', 'prime_mover_code', 'report_date') + sa.ForeignKeyConstraint(['energy_source_code'], ['energy_sources_eia.code'], name=op.f('fk_denorm_boiler_fuel_eia923_energy_source_code_energy_sources_eia')), + sa.ForeignKeyConstraint(['plant_id_eia', 'boiler_id'], ['boilers_entity_eia.plant_id_eia', 'boilers_entity_eia.boiler_id'], name=op.f('fk_denorm_boiler_fuel_eia923_plant_id_eia_boilers_entity_eia')), + sa.ForeignKeyConstraint(['plant_id_pudl'], ['plants_pudl.plant_id_pudl'], name=op.f('fk_denorm_boiler_fuel_eia923_plant_id_pudl_plants_pudl')), + sa.ForeignKeyConstraint(['prime_mover_code'], ['prime_movers_eia.code'], name=op.f('fk_denorm_boiler_fuel_eia923_prime_mover_code_prime_movers_eia')), + sa.ForeignKeyConstraint(['utility_id_eia'], ['utilities_entity_eia.utility_id_eia'], name=op.f('fk_denorm_boiler_fuel_eia923_utility_id_eia_utilities_entity_eia')), + sa.ForeignKeyConstraint(['utility_id_pudl'], ['utilities_pudl.utility_id_pudl'], name=op.f('fk_denorm_boiler_fuel_eia923_utility_id_pudl_utilities_pudl')), + sa.PrimaryKeyConstraint('plant_id_eia', 'boiler_id', 'energy_source_code', 'prime_mover_code', 'report_date', name=op.f('pk_denorm_boiler_fuel_eia923')) ) op.create_table('denorm_boiler_fuel_monthly_eia923', sa.Column('report_date', sa.Date(), nullable=False, comment='Date reported.'), @@ -1166,7 +1254,7 @@ def upgrade() -> None: sa.Column('utility_name_eia', sa.Text(), nullable=True, comment='The name of the utility.'), sa.Column('boiler_id', sa.Text(), nullable=False, comment='Alphanumeric boiler ID.'), sa.Column('unit_id_pudl', sa.Integer(), nullable=True, comment='Dynamically assigned PUDL unit id. WARNING: This ID is not guaranteed to be static long term as the input data and algorithm may evolve over time.'), - sa.Column('energy_source_code', sa.Text(), nullable=False, comment='The fuel code associated with the fuel receipt. Two or three character alphanumeric.'), + sa.Column('energy_source_code', sa.Text(), nullable=False, comment='A 2-3 letter code indicating the energy source (e.g. fuel type) associated with the record.'), sa.Column('prime_mover_code', sa.Text(), nullable=False, comment='Code for the type of prime mover (e.g. CT, CG)'), sa.Column('fuel_type_code_pudl', sa.Enum('coal', 'gas', 'hydro', 'nuclear', 'oil', 'other', 'solar', 'waste', 'wind'), nullable=True, comment='Simplified fuel type code used in PUDL'), sa.Column('fuel_consumed_units', sa.Float(), nullable=True, comment='Consumption of the fuel type in physical unit. Note: this is the total quantity consumed for both electricity and, in the case of combined heat and power plants, process steam production.'), @@ -1174,13 +1262,13 @@ def upgrade() -> None: sa.Column('fuel_consumed_mmbtu', sa.Float(), nullable=True, comment='Total consumption of fuel in physical unit, year to date. Note: this is the total quantity consumed for both electricity and, in the case of combined heat and power plants, process steam production.'), sa.Column('sulfur_content_pct', sa.Float(), nullable=True, comment='Sulfur content percentage by weight to the nearest 0.01 percent.'), sa.Column('ash_content_pct', sa.Float(), nullable=True, comment='Ash content percentage by weight to the nearest 0.1 percent.'), - sa.ForeignKeyConstraint(['energy_source_code'], ['energy_sources_eia.code'], ), - sa.ForeignKeyConstraint(['plant_id_eia', 'boiler_id'], ['boilers_entity_eia.plant_id_eia', 'boilers_entity_eia.boiler_id'], ), - sa.ForeignKeyConstraint(['plant_id_pudl'], ['plants_pudl.plant_id_pudl'], ), - sa.ForeignKeyConstraint(['prime_mover_code'], ['prime_movers_eia.code'], ), - sa.ForeignKeyConstraint(['utility_id_eia'], ['utilities_entity_eia.utility_id_eia'], ), - sa.ForeignKeyConstraint(['utility_id_pudl'], ['utilities_pudl.utility_id_pudl'], ), - sa.PrimaryKeyConstraint('plant_id_eia', 'boiler_id', 'energy_source_code', 'prime_mover_code', 'report_date') + sa.ForeignKeyConstraint(['energy_source_code'], ['energy_sources_eia.code'], name=op.f('fk_denorm_boiler_fuel_monthly_eia923_energy_source_code_energy_sources_eia')), + sa.ForeignKeyConstraint(['plant_id_eia', 'boiler_id'], ['boilers_entity_eia.plant_id_eia', 'boilers_entity_eia.boiler_id'], name=op.f('fk_denorm_boiler_fuel_monthly_eia923_plant_id_eia_boilers_entity_eia')), + sa.ForeignKeyConstraint(['plant_id_pudl'], ['plants_pudl.plant_id_pudl'], name=op.f('fk_denorm_boiler_fuel_monthly_eia923_plant_id_pudl_plants_pudl')), + sa.ForeignKeyConstraint(['prime_mover_code'], ['prime_movers_eia.code'], name=op.f('fk_denorm_boiler_fuel_monthly_eia923_prime_mover_code_prime_movers_eia')), + sa.ForeignKeyConstraint(['utility_id_eia'], ['utilities_entity_eia.utility_id_eia'], name=op.f('fk_denorm_boiler_fuel_monthly_eia923_utility_id_eia_utilities_entity_eia')), + sa.ForeignKeyConstraint(['utility_id_pudl'], ['utilities_pudl.utility_id_pudl'], name=op.f('fk_denorm_boiler_fuel_monthly_eia923_utility_id_pudl_utilities_pudl')), + sa.PrimaryKeyConstraint('plant_id_eia', 'boiler_id', 'energy_source_code', 'prime_mover_code', 'report_date', name=op.f('pk_denorm_boiler_fuel_monthly_eia923')) ) op.create_table('denorm_cash_flow_ferc1', sa.Column('record_id', sa.Text(), nullable=True, comment='Identifier indicating original FERC Form 1 source record. format: {table_name}_{report_year}_{report_prd}_{respondent_id}_{spplmnt_num}_{row_number}. Unique within FERC Form 1 DB tables which are not row-mapped.'), @@ -1192,9 +1280,9 @@ def upgrade() -> None: sa.Column('amount', sa.Float(), nullable=True, comment='Reported amount of dollars. This could be a balance or a change in value.'), sa.Column('balance', sa.Text(), nullable=True, comment='Indication of whether a column is a credit or debit, as reported in the XBRL taxonomy.'), sa.Column('row_type_xbrl', sa.Enum('calculated_value', 'reported_value', 'correction'), nullable=True, comment='Indicates whether the value reported in the row is calculated, or uniquely reported within the table.'), - sa.ForeignKeyConstraint(['utility_id_ferc1'], ['utilities_ferc1.utility_id_ferc1'], ), - sa.ForeignKeyConstraint(['utility_id_pudl'], ['utilities_pudl.utility_id_pudl'], ), - sa.PrimaryKeyConstraint('utility_id_ferc1', 'report_year', 'amount_type') + sa.ForeignKeyConstraint(['utility_id_ferc1'], ['utilities_ferc1.utility_id_ferc1'], name=op.f('fk_denorm_cash_flow_ferc1_utility_id_ferc1_utilities_ferc1')), + sa.ForeignKeyConstraint(['utility_id_pudl'], ['utilities_pudl.utility_id_pudl'], name=op.f('fk_denorm_cash_flow_ferc1_utility_id_pudl_utilities_pudl')), + sa.PrimaryKeyConstraint('utility_id_ferc1', 'report_year', 'amount_type', name=op.f('pk_denorm_cash_flow_ferc1')) ) op.create_table('denorm_depreciation_amortization_summary_ferc1', sa.Column('record_id', sa.Text(), nullable=True, comment='Identifier indicating original FERC Form 1 source record. format: {table_name}_{report_year}_{report_prd}_{respondent_id}_{spplmnt_num}_{row_number}. Unique within FERC Form 1 DB tables which are not row-mapped.'), @@ -1206,9 +1294,9 @@ def upgrade() -> None: sa.Column('ferc_account_label', sa.Text(), nullable=False, comment='Long FERC account identifier derived from values reported in the XBRL taxonomies. May also refer to aggregations of individual FERC accounts.'), sa.Column('ferc_account', sa.Text(), nullable=True, comment="Actual FERC Account number (e.g. '359.1') if available, or a PUDL assigned ID when FERC accounts have been split or combined in reporting."), sa.Column('dollar_value', sa.Float(), nullable=True, comment='Dollar value of reported income, expense, asset, or liability.'), - sa.ForeignKeyConstraint(['utility_id_ferc1'], ['utilities_ferc1.utility_id_ferc1'], ), - sa.ForeignKeyConstraint(['utility_id_pudl'], ['utilities_pudl.utility_id_pudl'], ), - sa.PrimaryKeyConstraint('utility_id_ferc1', 'report_year', 'plant_function', 'ferc_account_label') + sa.ForeignKeyConstraint(['utility_id_ferc1'], ['utilities_ferc1.utility_id_ferc1'], name=op.f('fk_denorm_depreciation_amortization_summary_ferc1_utility_id_ferc1_utilities_ferc1')), + sa.ForeignKeyConstraint(['utility_id_pudl'], ['utilities_pudl.utility_id_pudl'], name=op.f('fk_denorm_depreciation_amortization_summary_ferc1_utility_id_pudl_utilities_pudl')), + sa.PrimaryKeyConstraint('utility_id_ferc1', 'report_year', 'plant_function', 'ferc_account_label', name=op.f('pk_denorm_depreciation_amortization_summary_ferc1')) ) op.create_table('denorm_electric_energy_dispositions_ferc1', sa.Column('utility_id_ferc1', sa.Integer(), nullable=False, comment='PUDL-assigned utility ID, identifying a FERC1 utility. This is an auto-incremented ID and is not expected to be stable from year to year.'), @@ -1219,9 +1307,9 @@ def upgrade() -> None: sa.Column('row_type_xbrl', sa.Enum('calculated_value', 'reported_value', 'correction'), nullable=True, comment='Indicates whether the value reported in the row is calculated, or uniquely reported within the table.'), sa.Column('energy_mwh', sa.Float(), nullable=True, comment='Sources and uses of energy in MWh.'), sa.Column('record_id', sa.Text(), nullable=True, comment='Identifier indicating original FERC Form 1 source record. format: {table_name}_{report_year}_{report_prd}_{respondent_id}_{spplmnt_num}_{row_number}. Unique within FERC Form 1 DB tables which are not row-mapped.'), - sa.ForeignKeyConstraint(['utility_id_ferc1'], ['utilities_ferc1.utility_id_ferc1'], ), - sa.ForeignKeyConstraint(['utility_id_pudl'], ['utilities_pudl.utility_id_pudl'], ), - sa.PrimaryKeyConstraint('utility_id_ferc1', 'report_year', 'energy_disposition_type') + sa.ForeignKeyConstraint(['utility_id_ferc1'], ['utilities_ferc1.utility_id_ferc1'], name=op.f('fk_denorm_electric_energy_dispositions_ferc1_utility_id_ferc1_utilities_ferc1')), + sa.ForeignKeyConstraint(['utility_id_pudl'], ['utilities_pudl.utility_id_pudl'], name=op.f('fk_denorm_electric_energy_dispositions_ferc1_utility_id_pudl_utilities_pudl')), + sa.PrimaryKeyConstraint('utility_id_ferc1', 'report_year', 'energy_disposition_type', name=op.f('pk_denorm_electric_energy_dispositions_ferc1')) ) op.create_table('denorm_electric_energy_sources_ferc1', sa.Column('utility_id_ferc1', sa.Integer(), nullable=False, comment='PUDL-assigned utility ID, identifying a FERC1 utility. This is an auto-incremented ID and is not expected to be stable from year to year.'), @@ -1232,9 +1320,9 @@ def upgrade() -> None: sa.Column('row_type_xbrl', sa.Enum('calculated_value', 'reported_value', 'correction'), nullable=True, comment='Indicates whether the value reported in the row is calculated, or uniquely reported within the table.'), sa.Column('energy_mwh', sa.Float(), nullable=True, comment='Sources and uses of energy in MWh.'), sa.Column('record_id', sa.Text(), nullable=True, comment='Identifier indicating original FERC Form 1 source record. format: {table_name}_{report_year}_{report_prd}_{respondent_id}_{spplmnt_num}_{row_number}. Unique within FERC Form 1 DB tables which are not row-mapped.'), - sa.ForeignKeyConstraint(['utility_id_ferc1'], ['utilities_ferc1.utility_id_ferc1'], ), - sa.ForeignKeyConstraint(['utility_id_pudl'], ['utilities_pudl.utility_id_pudl'], ), - sa.PrimaryKeyConstraint('utility_id_ferc1', 'report_year', 'energy_source_type') + sa.ForeignKeyConstraint(['utility_id_ferc1'], ['utilities_ferc1.utility_id_ferc1'], name=op.f('fk_denorm_electric_energy_sources_ferc1_utility_id_ferc1_utilities_ferc1')), + sa.ForeignKeyConstraint(['utility_id_pudl'], ['utilities_pudl.utility_id_pudl'], name=op.f('fk_denorm_electric_energy_sources_ferc1_utility_id_pudl_utilities_pudl')), + sa.PrimaryKeyConstraint('utility_id_ferc1', 'report_year', 'energy_source_type', name=op.f('pk_denorm_electric_energy_sources_ferc1')) ) op.create_table('denorm_electric_operating_expenses_ferc1', sa.Column('utility_id_ferc1', sa.Integer(), nullable=False, comment='PUDL-assigned utility ID, identifying a FERC1 utility. This is an auto-incremented ID and is not expected to be stable from year to year.'), @@ -1246,9 +1334,9 @@ def upgrade() -> None: sa.Column('record_id', sa.Text(), nullable=True, comment='Identifier indicating original FERC Form 1 source record. format: {table_name}_{report_year}_{report_prd}_{respondent_id}_{spplmnt_num}_{row_number}. Unique within FERC Form 1 DB tables which are not row-mapped.'), sa.Column('ferc_account', sa.Text(), nullable=True, comment="Actual FERC Account number (e.g. '359.1') if available, or a PUDL assigned ID when FERC accounts have been split or combined in reporting."), sa.Column('row_type_xbrl', sa.Enum('calculated_value', 'reported_value', 'correction'), nullable=True, comment='Indicates whether the value reported in the row is calculated, or uniquely reported within the table.'), - sa.ForeignKeyConstraint(['utility_id_ferc1'], ['utilities_ferc1.utility_id_ferc1'], ), - sa.ForeignKeyConstraint(['utility_id_pudl'], ['utilities_pudl.utility_id_pudl'], ), - sa.PrimaryKeyConstraint('utility_id_ferc1', 'report_year', 'expense_type') + sa.ForeignKeyConstraint(['utility_id_ferc1'], ['utilities_ferc1.utility_id_ferc1'], name=op.f('fk_denorm_electric_operating_expenses_ferc1_utility_id_ferc1_utilities_ferc1')), + sa.ForeignKeyConstraint(['utility_id_pudl'], ['utilities_pudl.utility_id_pudl'], name=op.f('fk_denorm_electric_operating_expenses_ferc1_utility_id_pudl_utilities_pudl')), + sa.PrimaryKeyConstraint('utility_id_ferc1', 'report_year', 'expense_type', name=op.f('pk_denorm_electric_operating_expenses_ferc1')) ) op.create_table('denorm_electric_operating_revenues_ferc1', sa.Column('utility_id_ferc1', sa.Integer(), nullable=False, comment='PUDL-assigned utility ID, identifying a FERC1 utility. This is an auto-incremented ID and is not expected to be stable from year to year.'), @@ -1262,9 +1350,9 @@ def upgrade() -> None: sa.Column('avg_customers_per_month', sa.Float(), nullable=True, comment='Average number of customers per month.'), sa.Column('ferc_account', sa.Text(), nullable=True, comment="Actual FERC Account number (e.g. '359.1') if available, or a PUDL assigned ID when FERC accounts have been split or combined in reporting."), sa.Column('row_type_xbrl', sa.Enum('calculated_value', 'reported_value', 'correction'), nullable=True, comment='Indicates whether the value reported in the row is calculated, or uniquely reported within the table.'), - sa.ForeignKeyConstraint(['utility_id_ferc1'], ['utilities_ferc1.utility_id_ferc1'], ), - sa.ForeignKeyConstraint(['utility_id_pudl'], ['utilities_pudl.utility_id_pudl'], ), - sa.PrimaryKeyConstraint('utility_id_ferc1', 'report_year', 'revenue_type') + sa.ForeignKeyConstraint(['utility_id_ferc1'], ['utilities_ferc1.utility_id_ferc1'], name=op.f('fk_denorm_electric_operating_revenues_ferc1_utility_id_ferc1_utilities_ferc1')), + sa.ForeignKeyConstraint(['utility_id_pudl'], ['utilities_pudl.utility_id_pudl'], name=op.f('fk_denorm_electric_operating_revenues_ferc1_utility_id_pudl_utilities_pudl')), + sa.PrimaryKeyConstraint('utility_id_ferc1', 'report_year', 'revenue_type', name=op.f('pk_denorm_electric_operating_revenues_ferc1')) ) op.create_table('denorm_electric_plant_depreciation_changes_ferc1', sa.Column('utility_id_ferc1', sa.Integer(), nullable=False, comment='PUDL-assigned utility ID, identifying a FERC1 utility. This is an auto-incremented ID and is not expected to be stable from year to year.'), @@ -1274,14 +1362,14 @@ def upgrade() -> None: sa.Column('depreciation_type', sa.Text(), nullable=False, comment='Type of depreciation provision within FERC Account 108, including cost ofremoval, depreciation expenses, salvage, cost of retired plant, etc.'), sa.Column('plant_status', sa.Text(), nullable=False, comment='Utility plant financial status (in service, future, leased, total).'), sa.Column('utility_type', sa.Text(), nullable=False, comment='Listing of utility plant types. Examples include Electric Utility, Gas Utility, and Other Utility.'), - sa.Column('utility_plant_value', sa.Float(), nullable=True, comment='Utility plant value.'), + sa.Column('dollar_value', sa.Float(), nullable=True, comment='Dollar value of reported income, expense, asset, or liability.'), sa.Column('record_id', sa.Text(), nullable=True, comment='Identifier indicating original FERC Form 1 source record. format: {table_name}_{report_year}_{report_prd}_{respondent_id}_{spplmnt_num}_{row_number}. Unique within FERC Form 1 DB tables which are not row-mapped.'), sa.Column('balance', sa.Text(), nullable=True, comment='Indication of whether a column is a credit or debit, as reported in the XBRL taxonomy.'), sa.Column('ferc_account', sa.Text(), nullable=True, comment="Actual FERC Account number (e.g. '359.1') if available, or a PUDL assigned ID when FERC accounts have been split or combined in reporting."), sa.Column('row_type_xbrl', sa.Enum('calculated_value', 'reported_value', 'correction'), nullable=True, comment='Indicates whether the value reported in the row is calculated, or uniquely reported within the table.'), - sa.ForeignKeyConstraint(['utility_id_ferc1'], ['utilities_ferc1.utility_id_ferc1'], ), - sa.ForeignKeyConstraint(['utility_id_pudl'], ['utilities_pudl.utility_id_pudl'], ), - sa.PrimaryKeyConstraint('utility_id_ferc1', 'report_year', 'depreciation_type', 'plant_status', 'utility_type') + sa.ForeignKeyConstraint(['utility_id_ferc1'], ['utilities_ferc1.utility_id_ferc1'], name=op.f('fk_denorm_electric_plant_depreciation_changes_ferc1_utility_id_ferc1_utilities_ferc1')), + sa.ForeignKeyConstraint(['utility_id_pudl'], ['utilities_pudl.utility_id_pudl'], name=op.f('fk_denorm_electric_plant_depreciation_changes_ferc1_utility_id_pudl_utilities_pudl')), + sa.PrimaryKeyConstraint('utility_id_ferc1', 'report_year', 'depreciation_type', 'plant_status', 'utility_type', name=op.f('pk_denorm_electric_plant_depreciation_changes_ferc1')) ) op.create_table('denorm_electric_plant_depreciation_functional_ferc1', sa.Column('utility_id_ferc1', sa.Integer(), nullable=False, comment='PUDL-assigned utility ID, identifying a FERC1 utility. This is an auto-incremented ID and is not expected to be stable from year to year.'), @@ -1296,9 +1384,9 @@ def upgrade() -> None: sa.Column('balance', sa.Text(), nullable=True, comment='Indication of whether a column is a credit or debit, as reported in the XBRL taxonomy.'), sa.Column('ferc_account', sa.Text(), nullable=True, comment="Actual FERC Account number (e.g. '359.1') if available, or a PUDL assigned ID when FERC accounts have been split or combined in reporting."), sa.Column('row_type_xbrl', sa.Enum('calculated_value', 'reported_value', 'correction'), nullable=True, comment='Indicates whether the value reported in the row is calculated, or uniquely reported within the table.'), - sa.ForeignKeyConstraint(['utility_id_ferc1'], ['utilities_ferc1.utility_id_ferc1'], ), - sa.ForeignKeyConstraint(['utility_id_pudl'], ['utilities_pudl.utility_id_pudl'], ), - sa.PrimaryKeyConstraint('utility_id_ferc1', 'report_year', 'plant_function', 'plant_status', 'utility_type') + sa.ForeignKeyConstraint(['utility_id_ferc1'], ['utilities_ferc1.utility_id_ferc1'], name=op.f('fk_denorm_electric_plant_depreciation_functional_ferc1_utility_id_ferc1_utilities_ferc1')), + sa.ForeignKeyConstraint(['utility_id_pudl'], ['utilities_pudl.utility_id_pudl'], name=op.f('fk_denorm_electric_plant_depreciation_functional_ferc1_utility_id_pudl_utilities_pudl')), + sa.PrimaryKeyConstraint('utility_id_ferc1', 'report_year', 'plant_function', 'plant_status', 'utility_type', name=op.f('pk_denorm_electric_plant_depreciation_functional_ferc1')) ) op.create_table('denorm_electricity_sales_by_rate_schedule_ferc1', sa.Column('utility_id_ferc1', sa.Integer(), nullable=True, comment='PUDL-assigned utility ID, identifying a FERC1 utility. This is an auto-incremented ID and is not expected to be stable from year to year.'), @@ -1314,8 +1402,8 @@ def upgrade() -> None: sa.Column('kwh_per_customer', sa.Float(), nullable=True, comment='kwh per customer.'), sa.Column('revenue_per_kwh', sa.Float(), nullable=True), sa.Column('record_id', sa.Text(), nullable=True, comment='Identifier indicating original FERC Form 1 source record. format: {table_name}_{report_year}_{report_prd}_{respondent_id}_{spplmnt_num}_{row_number}. Unique within FERC Form 1 DB tables which are not row-mapped.'), - sa.ForeignKeyConstraint(['utility_id_ferc1'], ['utilities_ferc1.utility_id_ferc1'], ), - sa.ForeignKeyConstraint(['utility_id_pudl'], ['utilities_pudl.utility_id_pudl'], ) + sa.ForeignKeyConstraint(['utility_id_ferc1'], ['utilities_ferc1.utility_id_ferc1'], name=op.f('fk_denorm_electricity_sales_by_rate_schedule_ferc1_utility_id_ferc1_utilities_ferc1')), + sa.ForeignKeyConstraint(['utility_id_pudl'], ['utilities_pudl.utility_id_pudl'], name=op.f('fk_denorm_electricity_sales_by_rate_schedule_ferc1_utility_id_pudl_utilities_pudl')) ) op.create_table('denorm_generation_eia923', sa.Column('report_date', sa.Date(), nullable=False, comment='Date reported.'), @@ -1329,12 +1417,12 @@ def upgrade() -> None: sa.Column('unit_id_pudl', sa.Integer(), nullable=True, comment='Dynamically assigned PUDL unit id. WARNING: This ID is not guaranteed to be static long term as the input data and algorithm may evolve over time.'), sa.Column('net_generation_mwh', sa.Float(), nullable=True, comment='Net electricity generation for the specified period in megawatt-hours (MWh).'), sa.Column('data_maturity', sa.Text(), nullable=True, comment='Level of maturity of the data record. Some data sources report less-than-final data. PUDL sometimes includes this data, but use at your own risk.'), - sa.ForeignKeyConstraint(['data_maturity'], ['data_maturities.code'], ), - sa.ForeignKeyConstraint(['plant_id_eia', 'generator_id'], ['generators_entity_eia.plant_id_eia', 'generators_entity_eia.generator_id'], ), - sa.ForeignKeyConstraint(['plant_id_pudl'], ['plants_pudl.plant_id_pudl'], ), - sa.ForeignKeyConstraint(['utility_id_eia'], ['utilities_entity_eia.utility_id_eia'], ), - sa.ForeignKeyConstraint(['utility_id_pudl'], ['utilities_pudl.utility_id_pudl'], ), - sa.PrimaryKeyConstraint('plant_id_eia', 'generator_id', 'report_date') + sa.ForeignKeyConstraint(['data_maturity'], ['data_maturities.code'], name=op.f('fk_denorm_generation_eia923_data_maturity_data_maturities')), + sa.ForeignKeyConstraint(['plant_id_eia', 'generator_id'], ['generators_entity_eia.plant_id_eia', 'generators_entity_eia.generator_id'], name=op.f('fk_denorm_generation_eia923_plant_id_eia_generators_entity_eia')), + sa.ForeignKeyConstraint(['plant_id_pudl'], ['plants_pudl.plant_id_pudl'], name=op.f('fk_denorm_generation_eia923_plant_id_pudl_plants_pudl')), + sa.ForeignKeyConstraint(['utility_id_eia'], ['utilities_entity_eia.utility_id_eia'], name=op.f('fk_denorm_generation_eia923_utility_id_eia_utilities_entity_eia')), + sa.ForeignKeyConstraint(['utility_id_pudl'], ['utilities_pudl.utility_id_pudl'], name=op.f('fk_denorm_generation_eia923_utility_id_pudl_utilities_pudl')), + sa.PrimaryKeyConstraint('plant_id_eia', 'generator_id', 'report_date', name=op.f('pk_denorm_generation_eia923')) ) op.create_table('denorm_generation_monthly_eia923', sa.Column('report_date', sa.Date(), nullable=False, comment='Date reported.'), @@ -1347,11 +1435,11 @@ def upgrade() -> None: sa.Column('generator_id', sa.Text(), nullable=False, comment='Generator ID is usually numeric, but sometimes includes letters. Make sure you treat it as a string!'), sa.Column('unit_id_pudl', sa.Integer(), nullable=True, comment='Dynamically assigned PUDL unit id. WARNING: This ID is not guaranteed to be static long term as the input data and algorithm may evolve over time.'), sa.Column('net_generation_mwh', sa.Float(), nullable=True, comment='Net electricity generation for the specified period in megawatt-hours (MWh).'), - sa.ForeignKeyConstraint(['plant_id_eia', 'generator_id'], ['generators_entity_eia.plant_id_eia', 'generators_entity_eia.generator_id'], ), - sa.ForeignKeyConstraint(['plant_id_pudl'], ['plants_pudl.plant_id_pudl'], ), - sa.ForeignKeyConstraint(['utility_id_eia'], ['utilities_entity_eia.utility_id_eia'], ), - sa.ForeignKeyConstraint(['utility_id_pudl'], ['utilities_pudl.utility_id_pudl'], ), - sa.PrimaryKeyConstraint('plant_id_eia', 'generator_id', 'report_date') + sa.ForeignKeyConstraint(['plant_id_eia', 'generator_id'], ['generators_entity_eia.plant_id_eia', 'generators_entity_eia.generator_id'], name=op.f('fk_denorm_generation_monthly_eia923_plant_id_eia_generators_entity_eia')), + sa.ForeignKeyConstraint(['plant_id_pudl'], ['plants_pudl.plant_id_pudl'], name=op.f('fk_denorm_generation_monthly_eia923_plant_id_pudl_plants_pudl')), + sa.ForeignKeyConstraint(['utility_id_eia'], ['utilities_entity_eia.utility_id_eia'], name=op.f('fk_denorm_generation_monthly_eia923_utility_id_eia_utilities_entity_eia')), + sa.ForeignKeyConstraint(['utility_id_pudl'], ['utilities_pudl.utility_id_pudl'], name=op.f('fk_denorm_generation_monthly_eia923_utility_id_pudl_utilities_pudl')), + sa.PrimaryKeyConstraint('plant_id_eia', 'generator_id', 'report_date', name=op.f('pk_denorm_generation_monthly_eia923')) ) op.create_table('denorm_income_statement_ferc1', sa.Column('record_id', sa.Text(), nullable=True, comment='Identifier indicating original FERC Form 1 source record. format: {table_name}_{report_year}_{report_prd}_{respondent_id}_{spplmnt_num}_{row_number}. Unique within FERC Form 1 DB tables which are not row-mapped.'), @@ -1365,9 +1453,9 @@ def upgrade() -> None: sa.Column('balance', sa.Text(), nullable=True, comment='Indication of whether a column is a credit or debit, as reported in the XBRL taxonomy.'), sa.Column('ferc_account', sa.Text(), nullable=True, comment="Actual FERC Account number (e.g. '359.1') if available, or a PUDL assigned ID when FERC accounts have been split or combined in reporting."), sa.Column('row_type_xbrl', sa.Enum('calculated_value', 'reported_value', 'correction'), nullable=True, comment='Indicates whether the value reported in the row is calculated, or uniquely reported within the table.'), - sa.ForeignKeyConstraint(['utility_id_ferc1'], ['utilities_ferc1.utility_id_ferc1'], ), - sa.ForeignKeyConstraint(['utility_id_pudl'], ['utilities_pudl.utility_id_pudl'], ), - sa.PrimaryKeyConstraint('utility_id_ferc1', 'report_year', 'utility_type', 'income_type') + sa.ForeignKeyConstraint(['utility_id_ferc1'], ['utilities_ferc1.utility_id_ferc1'], name=op.f('fk_denorm_income_statement_ferc1_utility_id_ferc1_utilities_ferc1')), + sa.ForeignKeyConstraint(['utility_id_pudl'], ['utilities_pudl.utility_id_pudl'], name=op.f('fk_denorm_income_statement_ferc1_utility_id_pudl_utilities_pudl')), + sa.PrimaryKeyConstraint('utility_id_ferc1', 'report_year', 'utility_type', 'income_type', name=op.f('pk_denorm_income_statement_ferc1')) ) op.create_table('denorm_other_regulatory_liabilities_ferc1', sa.Column('utility_id_ferc1', sa.Integer(), nullable=True, comment='PUDL-assigned utility ID, identifying a FERC1 utility. This is an auto-incremented ID and is not expected to be stable from year to year.'), @@ -1380,8 +1468,8 @@ def upgrade() -> None: sa.Column('increase_in_other_regulatory_liabilities', sa.Float(), nullable=True, comment='The increase during the reporting period of other regulatory liabilities.'), sa.Column('account_detail', sa.Text(), nullable=True, comment='Description of the account number credited from making debit adjustment to other regulatory liabilities.'), sa.Column('decrease_in_other_regulatory_liabilities', sa.Float(), nullable=True, comment='The decrease during the reporting period of other regulatory liabilities.'), - sa.ForeignKeyConstraint(['utility_id_ferc1'], ['utilities_ferc1.utility_id_ferc1'], ), - sa.ForeignKeyConstraint(['utility_id_pudl'], ['utilities_pudl.utility_id_pudl'], ) + sa.ForeignKeyConstraint(['utility_id_ferc1'], ['utilities_ferc1.utility_id_ferc1'], name=op.f('fk_denorm_other_regulatory_liabilities_ferc1_utility_id_ferc1_utilities_ferc1')), + sa.ForeignKeyConstraint(['utility_id_pudl'], ['utilities_pudl.utility_id_pudl'], name=op.f('fk_denorm_other_regulatory_liabilities_ferc1_utility_id_pudl_utilities_pudl')) ) op.create_table('denorm_plant_in_service_ferc1', sa.Column('report_year', sa.Integer(), nullable=False, comment='Four-digit year in which the data was reported.'), @@ -1398,9 +1486,9 @@ def upgrade() -> None: sa.Column('row_type_xbrl', sa.Enum('calculated_value', 'reported_value', 'correction'), nullable=True, comment='Indicates whether the value reported in the row is calculated, or uniquely reported within the table.'), sa.Column('starting_balance', sa.Float(), nullable=True, comment='Account balance at beginning of year.'), sa.Column('transfers', sa.Float(), nullable=True, comment='Cost of transfers into (out of) the account.'), - sa.ForeignKeyConstraint(['utility_id_ferc1'], ['utilities_ferc1.utility_id_ferc1'], ), - sa.ForeignKeyConstraint(['utility_id_pudl'], ['utilities_pudl.utility_id_pudl'], ), - sa.PrimaryKeyConstraint('utility_id_ferc1', 'report_year', 'ferc_account_label') + sa.ForeignKeyConstraint(['utility_id_ferc1'], ['utilities_ferc1.utility_id_ferc1'], name=op.f('fk_denorm_plant_in_service_ferc1_utility_id_ferc1_utilities_ferc1')), + sa.ForeignKeyConstraint(['utility_id_pudl'], ['utilities_pudl.utility_id_pudl'], name=op.f('fk_denorm_plant_in_service_ferc1_utility_id_pudl_utilities_pudl')), + sa.PrimaryKeyConstraint('utility_id_ferc1', 'report_year', 'ferc_account_label', name=op.f('pk_denorm_plant_in_service_ferc1')) ) op.create_table('denorm_purchased_power_ferc1', sa.Column('report_year', sa.Integer(), nullable=True, comment='Four-digit year in which the data was reported.'), @@ -1421,9 +1509,9 @@ def upgrade() -> None: sa.Column('received_mwh', sa.Float(), nullable=True, comment='Gross megawatt-hours received in power exchanges and used as the basis for settlement.'), sa.Column('tariff', sa.Text(), nullable=True, comment='FERC Rate Schedule Number or Tariff. (Note: may be incomplete if originally reported on multiple lines.)'), sa.Column('total_settlement', sa.Float(), nullable=True, comment='Sum of demand, energy, and other charges (USD). For power exchanges, the settlement amount for the net receipt of energy. If more energy was delivered than received, this amount is negative.'), - sa.ForeignKeyConstraint(['purchase_type_code'], ['power_purchase_types_ferc1.code'], ), - sa.ForeignKeyConstraint(['utility_id_ferc1'], ['utilities_ferc1.utility_id_ferc1'], ), - sa.ForeignKeyConstraint(['utility_id_pudl'], ['utilities_pudl.utility_id_pudl'], ) + sa.ForeignKeyConstraint(['purchase_type_code'], ['power_purchase_types_ferc1.code'], name=op.f('fk_denorm_purchased_power_ferc1_purchase_type_code_power_purchase_types_ferc1')), + sa.ForeignKeyConstraint(['utility_id_ferc1'], ['utilities_ferc1.utility_id_ferc1'], name=op.f('fk_denorm_purchased_power_ferc1_utility_id_ferc1_utilities_ferc1')), + sa.ForeignKeyConstraint(['utility_id_pudl'], ['utilities_pudl.utility_id_pudl'], name=op.f('fk_denorm_purchased_power_ferc1_utility_id_pudl_utilities_pudl')) ) op.create_table('denorm_retained_earnings_ferc1', sa.Column('utility_id_ferc1', sa.Integer(), nullable=False, comment='PUDL-assigned utility ID, identifying a FERC1 utility. This is an auto-incremented ID and is not expected to be stable from year to year.'), @@ -1432,15 +1520,14 @@ def upgrade() -> None: sa.Column('report_year', sa.Integer(), nullable=False, comment='Four-digit year in which the data was reported.'), sa.Column('record_id', sa.Text(), nullable=True, comment='Identifier indicating original FERC Form 1 source record. format: {table_name}_{report_year}_{report_prd}_{respondent_id}_{spplmnt_num}_{row_number}. Unique within FERC Form 1 DB tables which are not row-mapped.'), sa.Column('earnings_type', sa.Text(), nullable=False, comment='Label describing types of earnings.'), - sa.Column('amount', sa.Float(), nullable=True, comment='Reported amount of dollars. This could be a balance or a change in value.'), sa.Column('starting_balance', sa.Float(), nullable=True, comment='Account balance at beginning of year.'), sa.Column('ending_balance', sa.Float(), nullable=True, comment='Account balance at end of year.'), sa.Column('balance', sa.Text(), nullable=True, comment='Indication of whether a column is a credit or debit, as reported in the XBRL taxonomy.'), sa.Column('ferc_account', sa.Text(), nullable=True, comment="Actual FERC Account number (e.g. '359.1') if available, or a PUDL assigned ID when FERC accounts have been split or combined in reporting."), sa.Column('row_type_xbrl', sa.Enum('calculated_value', 'reported_value', 'correction'), nullable=True, comment='Indicates whether the value reported in the row is calculated, or uniquely reported within the table.'), - sa.ForeignKeyConstraint(['utility_id_ferc1'], ['utilities_ferc1.utility_id_ferc1'], ), - sa.ForeignKeyConstraint(['utility_id_pudl'], ['utilities_pudl.utility_id_pudl'], ), - sa.PrimaryKeyConstraint('utility_id_ferc1', 'report_year', 'earnings_type') + sa.ForeignKeyConstraint(['utility_id_ferc1'], ['utilities_ferc1.utility_id_ferc1'], name=op.f('fk_denorm_retained_earnings_ferc1_utility_id_ferc1_utilities_ferc1')), + sa.ForeignKeyConstraint(['utility_id_pudl'], ['utilities_pudl.utility_id_pudl'], name=op.f('fk_denorm_retained_earnings_ferc1_utility_id_pudl_utilities_pudl')), + sa.PrimaryKeyConstraint('utility_id_ferc1', 'report_year', 'earnings_type', name=op.f('pk_denorm_retained_earnings_ferc1')) ) op.create_table('denorm_transmission_statistics_ferc1', sa.Column('record_id', sa.Text(), nullable=True, comment='Identifier indicating original FERC Form 1 source record. format: {table_name}_{report_year}_{report_prd}_{respondent_id}_{spplmnt_num}_{row_number}. Unique within FERC Form 1 DB tables which are not row-mapped.'), @@ -1464,8 +1551,8 @@ def upgrade() -> None: sa.Column('opex_maintenance', sa.Float(), nullable=True, comment='Production expenses: Maintenance (USD).'), sa.Column('opex_rents', sa.Float(), nullable=True, comment='Production expenses: rents (USD).'), sa.Column('opex_total', sa.Float(), nullable=True, comment='Total production expenses, excluding fuel (USD).'), - sa.ForeignKeyConstraint(['utility_id_ferc1'], ['utilities_ferc1.utility_id_ferc1'], ), - sa.ForeignKeyConstraint(['utility_id_pudl'], ['utilities_pudl.utility_id_pudl'], ) + sa.ForeignKeyConstraint(['utility_id_ferc1'], ['utilities_ferc1.utility_id_ferc1'], name=op.f('fk_denorm_transmission_statistics_ferc1_utility_id_ferc1_utilities_ferc1')), + sa.ForeignKeyConstraint(['utility_id_pudl'], ['utilities_pudl.utility_id_pudl'], name=op.f('fk_denorm_transmission_statistics_ferc1_utility_id_pudl_utilities_pudl')) ) op.create_table('denorm_utilities_eia', sa.Column('utility_id_eia', sa.Integer(), nullable=False, comment='The EIA Utility Identification number.'), @@ -1495,10 +1582,10 @@ def upgrade() -> None: sa.Column('phone_number_2', sa.Text(), nullable=True, comment='Phone number for utility contact 2.'), sa.Column('phone_extension_2', sa.Text(), nullable=True, comment='Phone extension for utility contact 2'), sa.Column('data_maturity', sa.Text(), nullable=True, comment='Level of maturity of the data record. Some data sources report less-than-final data. PUDL sometimes includes this data, but use at your own risk.'), - sa.ForeignKeyConstraint(['data_maturity'], ['data_maturities.code'], ), - sa.ForeignKeyConstraint(['utility_id_eia', 'report_date'], ['utilities_eia860.utility_id_eia', 'utilities_eia860.report_date'], ), - sa.ForeignKeyConstraint(['utility_id_pudl'], ['utilities_pudl.utility_id_pudl'], ), - sa.PrimaryKeyConstraint('utility_id_eia', 'report_date') + sa.ForeignKeyConstraint(['data_maturity'], ['data_maturities.code'], name=op.f('fk_denorm_utilities_eia_data_maturity_data_maturities')), + sa.ForeignKeyConstraint(['utility_id_eia', 'report_date'], ['utilities_eia860.utility_id_eia', 'utilities_eia860.report_date'], name=op.f('fk_denorm_utilities_eia_utility_id_eia_utilities_eia860')), + sa.ForeignKeyConstraint(['utility_id_pudl'], ['utilities_pudl.utility_id_pudl'], name=op.f('fk_denorm_utilities_eia_utility_id_pudl_utilities_pudl')), + sa.PrimaryKeyConstraint('utility_id_eia', 'report_date', name=op.f('pk_denorm_utilities_eia')) ) op.create_table('denorm_utility_plant_summary_ferc1', sa.Column('utility_id_ferc1', sa.Integer(), nullable=False, comment='PUDL-assigned utility ID, identifying a FERC1 utility. This is an auto-incremented ID and is not expected to be stable from year to year.'), @@ -1511,9 +1598,9 @@ def upgrade() -> None: sa.Column('row_type_xbrl', sa.Enum('calculated_value', 'reported_value', 'correction'), nullable=True, comment='Indicates whether the value reported in the row is calculated, or uniquely reported within the table.'), sa.Column('ending_balance', sa.Float(), nullable=True, comment='Account balance at end of year.'), sa.Column('record_id', sa.Text(), nullable=True, comment='Identifier indicating original FERC Form 1 source record. format: {table_name}_{report_year}_{report_prd}_{respondent_id}_{spplmnt_num}_{row_number}. Unique within FERC Form 1 DB tables which are not row-mapped.'), - sa.ForeignKeyConstraint(['utility_id_ferc1'], ['utilities_ferc1.utility_id_ferc1'], ), - sa.ForeignKeyConstraint(['utility_id_pudl'], ['utilities_pudl.utility_id_pudl'], ), - sa.PrimaryKeyConstraint('utility_id_ferc1', 'report_year', 'utility_type', 'utility_plant_asset_type') + sa.ForeignKeyConstraint(['utility_id_ferc1'], ['utilities_ferc1.utility_id_ferc1'], name=op.f('fk_denorm_utility_plant_summary_ferc1_utility_id_ferc1_utilities_ferc1')), + sa.ForeignKeyConstraint(['utility_id_pudl'], ['utilities_pudl.utility_id_pudl'], name=op.f('fk_denorm_utility_plant_summary_ferc1_utility_id_pudl_utilities_pudl')), + sa.PrimaryKeyConstraint('utility_id_ferc1', 'report_year', 'utility_type', 'utility_plant_asset_type', name=op.f('pk_denorm_utility_plant_summary_ferc1')) ) op.create_table('depreciation_amortization_summary_ferc1', sa.Column('record_id', sa.Text(), nullable=True, comment='Identifier indicating original FERC Form 1 source record. format: {table_name}_{report_year}_{report_prd}_{respondent_id}_{spplmnt_num}_{row_number}. Unique within FERC Form 1 DB tables which are not row-mapped.'), @@ -1523,8 +1610,8 @@ def upgrade() -> None: sa.Column('ferc_account_label', sa.Text(), nullable=False, comment='Long FERC account identifier derived from values reported in the XBRL taxonomies. May also refer to aggregations of individual FERC accounts.'), sa.Column('ferc_account', sa.Text(), nullable=True, comment="Actual FERC Account number (e.g. '359.1') if available, or a PUDL assigned ID when FERC accounts have been split or combined in reporting."), sa.Column('dollar_value', sa.Float(), nullable=True, comment='Dollar value of reported income, expense, asset, or liability.'), - sa.ForeignKeyConstraint(['utility_id_ferc1'], ['utilities_ferc1.utility_id_ferc1'], ), - sa.PrimaryKeyConstraint('utility_id_ferc1', 'report_year', 'plant_function', 'ferc_account_label') + sa.ForeignKeyConstraint(['utility_id_ferc1'], ['utilities_ferc1.utility_id_ferc1'], name=op.f('fk_depreciation_amortization_summary_ferc1_utility_id_ferc1_utilities_ferc1')), + sa.PrimaryKeyConstraint('utility_id_ferc1', 'report_year', 'plant_function', 'ferc_account_label', name=op.f('pk_depreciation_amortization_summary_ferc1')) ) op.create_table('electric_energy_dispositions_ferc1', sa.Column('utility_id_ferc1', sa.Integer(), nullable=False, comment='PUDL-assigned utility ID, identifying a FERC1 utility. This is an auto-incremented ID and is not expected to be stable from year to year.'), @@ -1533,8 +1620,8 @@ def upgrade() -> None: sa.Column('row_type_xbrl', sa.Enum('calculated_value', 'reported_value', 'correction'), nullable=True, comment='Indicates whether the value reported in the row is calculated, or uniquely reported within the table.'), sa.Column('energy_mwh', sa.Float(), nullable=True, comment='Sources and uses of energy in MWh.'), sa.Column('record_id', sa.Text(), nullable=True, comment='Identifier indicating original FERC Form 1 source record. format: {table_name}_{report_year}_{report_prd}_{respondent_id}_{spplmnt_num}_{row_number}. Unique within FERC Form 1 DB tables which are not row-mapped.'), - sa.ForeignKeyConstraint(['utility_id_ferc1'], ['utilities_ferc1.utility_id_ferc1'], ), - sa.PrimaryKeyConstraint('utility_id_ferc1', 'report_year', 'energy_disposition_type') + sa.ForeignKeyConstraint(['utility_id_ferc1'], ['utilities_ferc1.utility_id_ferc1'], name=op.f('fk_electric_energy_dispositions_ferc1_utility_id_ferc1_utilities_ferc1')), + sa.PrimaryKeyConstraint('utility_id_ferc1', 'report_year', 'energy_disposition_type', name=op.f('pk_electric_energy_dispositions_ferc1')) ) op.create_table('electric_energy_sources_ferc1', sa.Column('utility_id_ferc1', sa.Integer(), nullable=False, comment='PUDL-assigned utility ID, identifying a FERC1 utility. This is an auto-incremented ID and is not expected to be stable from year to year.'), @@ -1543,8 +1630,8 @@ def upgrade() -> None: sa.Column('row_type_xbrl', sa.Enum('calculated_value', 'reported_value', 'correction'), nullable=True, comment='Indicates whether the value reported in the row is calculated, or uniquely reported within the table.'), sa.Column('energy_mwh', sa.Float(), nullable=True, comment='Sources and uses of energy in MWh.'), sa.Column('record_id', sa.Text(), nullable=True, comment='Identifier indicating original FERC Form 1 source record. format: {table_name}_{report_year}_{report_prd}_{respondent_id}_{spplmnt_num}_{row_number}. Unique within FERC Form 1 DB tables which are not row-mapped.'), - sa.ForeignKeyConstraint(['utility_id_ferc1'], ['utilities_ferc1.utility_id_ferc1'], ), - sa.PrimaryKeyConstraint('utility_id_ferc1', 'report_year', 'energy_source_type') + sa.ForeignKeyConstraint(['utility_id_ferc1'], ['utilities_ferc1.utility_id_ferc1'], name=op.f('fk_electric_energy_sources_ferc1_utility_id_ferc1_utilities_ferc1')), + sa.PrimaryKeyConstraint('utility_id_ferc1', 'report_year', 'energy_source_type', name=op.f('pk_electric_energy_sources_ferc1')) ) op.create_table('electric_operating_expenses_ferc1', sa.Column('utility_id_ferc1', sa.Integer(), nullable=False, comment='PUDL-assigned utility ID, identifying a FERC1 utility. This is an auto-incremented ID and is not expected to be stable from year to year.'), @@ -1554,8 +1641,8 @@ def upgrade() -> None: sa.Column('record_id', sa.Text(), nullable=True, comment='Identifier indicating original FERC Form 1 source record. format: {table_name}_{report_year}_{report_prd}_{respondent_id}_{spplmnt_num}_{row_number}. Unique within FERC Form 1 DB tables which are not row-mapped.'), sa.Column('ferc_account', sa.Text(), nullable=True, comment="Actual FERC Account number (e.g. '359.1') if available, or a PUDL assigned ID when FERC accounts have been split or combined in reporting."), sa.Column('row_type_xbrl', sa.Enum('calculated_value', 'reported_value', 'correction'), nullable=True, comment='Indicates whether the value reported in the row is calculated, or uniquely reported within the table.'), - sa.ForeignKeyConstraint(['utility_id_ferc1'], ['utilities_ferc1.utility_id_ferc1'], ), - sa.PrimaryKeyConstraint('utility_id_ferc1', 'report_year', 'expense_type') + sa.ForeignKeyConstraint(['utility_id_ferc1'], ['utilities_ferc1.utility_id_ferc1'], name=op.f('fk_electric_operating_expenses_ferc1_utility_id_ferc1_utilities_ferc1')), + sa.PrimaryKeyConstraint('utility_id_ferc1', 'report_year', 'expense_type', name=op.f('pk_electric_operating_expenses_ferc1')) ) op.create_table('electric_operating_revenues_ferc1', sa.Column('utility_id_ferc1', sa.Integer(), nullable=False, comment='PUDL-assigned utility ID, identifying a FERC1 utility. This is an auto-incremented ID and is not expected to be stable from year to year.'), @@ -1567,8 +1654,8 @@ def upgrade() -> None: sa.Column('avg_customers_per_month', sa.Float(), nullable=True, comment='Average number of customers per month.'), sa.Column('ferc_account', sa.Text(), nullable=True, comment="Actual FERC Account number (e.g. '359.1') if available, or a PUDL assigned ID when FERC accounts have been split or combined in reporting."), sa.Column('row_type_xbrl', sa.Enum('calculated_value', 'reported_value', 'correction'), nullable=True, comment='Indicates whether the value reported in the row is calculated, or uniquely reported within the table.'), - sa.ForeignKeyConstraint(['utility_id_ferc1'], ['utilities_ferc1.utility_id_ferc1'], ), - sa.PrimaryKeyConstraint('utility_id_ferc1', 'report_year', 'revenue_type') + sa.ForeignKeyConstraint(['utility_id_ferc1'], ['utilities_ferc1.utility_id_ferc1'], name=op.f('fk_electric_operating_revenues_ferc1_utility_id_ferc1_utilities_ferc1')), + sa.PrimaryKeyConstraint('utility_id_ferc1', 'report_year', 'revenue_type', name=op.f('pk_electric_operating_revenues_ferc1')) ) op.create_table('electric_plant_depreciation_changes_ferc1', sa.Column('utility_id_ferc1', sa.Integer(), nullable=False, comment='PUDL-assigned utility ID, identifying a FERC1 utility. This is an auto-incremented ID and is not expected to be stable from year to year.'), @@ -1576,13 +1663,13 @@ def upgrade() -> None: sa.Column('depreciation_type', sa.Text(), nullable=False, comment='Type of depreciation provision within FERC Account 108, including cost ofremoval, depreciation expenses, salvage, cost of retired plant, etc.'), sa.Column('plant_status', sa.Text(), nullable=False, comment='Utility plant financial status (in service, future, leased, total).'), sa.Column('utility_type', sa.Text(), nullable=False, comment='Listing of utility plant types. Examples include Electric Utility, Gas Utility, and Other Utility.'), - sa.Column('utility_plant_value', sa.Float(), nullable=True, comment='Utility plant value.'), + sa.Column('dollar_value', sa.Float(), nullable=True, comment='Dollar value of reported income, expense, asset, or liability.'), sa.Column('record_id', sa.Text(), nullable=True, comment='Identifier indicating original FERC Form 1 source record. format: {table_name}_{report_year}_{report_prd}_{respondent_id}_{spplmnt_num}_{row_number}. Unique within FERC Form 1 DB tables which are not row-mapped.'), sa.Column('balance', sa.Text(), nullable=True, comment='Indication of whether a column is a credit or debit, as reported in the XBRL taxonomy.'), sa.Column('ferc_account', sa.Text(), nullable=True, comment="Actual FERC Account number (e.g. '359.1') if available, or a PUDL assigned ID when FERC accounts have been split or combined in reporting."), sa.Column('row_type_xbrl', sa.Enum('calculated_value', 'reported_value', 'correction'), nullable=True, comment='Indicates whether the value reported in the row is calculated, or uniquely reported within the table.'), - sa.ForeignKeyConstraint(['utility_id_ferc1'], ['utilities_ferc1.utility_id_ferc1'], ), - sa.PrimaryKeyConstraint('utility_id_ferc1', 'report_year', 'depreciation_type', 'plant_status', 'utility_type') + sa.ForeignKeyConstraint(['utility_id_ferc1'], ['utilities_ferc1.utility_id_ferc1'], name=op.f('fk_electric_plant_depreciation_changes_ferc1_utility_id_ferc1_utilities_ferc1')), + sa.PrimaryKeyConstraint('utility_id_ferc1', 'report_year', 'depreciation_type', 'plant_status', 'utility_type', name=op.f('pk_electric_plant_depreciation_changes_ferc1')) ) op.create_table('electric_plant_depreciation_functional_ferc1', sa.Column('utility_id_ferc1', sa.Integer(), nullable=False, comment='PUDL-assigned utility ID, identifying a FERC1 utility. This is an auto-incremented ID and is not expected to be stable from year to year.'), @@ -1595,8 +1682,8 @@ def upgrade() -> None: sa.Column('balance', sa.Text(), nullable=True, comment='Indication of whether a column is a credit or debit, as reported in the XBRL taxonomy.'), sa.Column('ferc_account', sa.Text(), nullable=True, comment="Actual FERC Account number (e.g. '359.1') if available, or a PUDL assigned ID when FERC accounts have been split or combined in reporting."), sa.Column('row_type_xbrl', sa.Enum('calculated_value', 'reported_value', 'correction'), nullable=True, comment='Indicates whether the value reported in the row is calculated, or uniquely reported within the table.'), - sa.ForeignKeyConstraint(['utility_id_ferc1'], ['utilities_ferc1.utility_id_ferc1'], ), - sa.PrimaryKeyConstraint('utility_id_ferc1', 'report_year', 'plant_function', 'plant_status', 'utility_type') + sa.ForeignKeyConstraint(['utility_id_ferc1'], ['utilities_ferc1.utility_id_ferc1'], name=op.f('fk_electric_plant_depreciation_functional_ferc1_utility_id_ferc1_utilities_ferc1')), + sa.PrimaryKeyConstraint('utility_id_ferc1', 'report_year', 'plant_function', 'plant_status', 'utility_type', name=op.f('pk_electric_plant_depreciation_functional_ferc1')) ) op.create_table('electricity_sales_by_rate_schedule_ferc1', sa.Column('utility_id_ferc1', sa.Integer(), nullable=True, comment='PUDL-assigned utility ID, identifying a FERC1 utility. This is an auto-incremented ID and is not expected to be stable from year to year.'), @@ -1610,7 +1697,7 @@ def upgrade() -> None: sa.Column('kwh_per_customer', sa.Float(), nullable=True, comment='kwh per customer.'), sa.Column('revenue_per_kwh', sa.Float(), nullable=True), sa.Column('record_id', sa.Text(), nullable=True, comment='Identifier indicating original FERC Form 1 source record. format: {table_name}_{report_year}_{report_prd}_{respondent_id}_{spplmnt_num}_{row_number}. Unique within FERC Form 1 DB tables which are not row-mapped.'), - sa.ForeignKeyConstraint(['utility_id_ferc1'], ['utilities_ferc1.utility_id_ferc1'], ) + sa.ForeignKeyConstraint(['utility_id_ferc1'], ['utilities_ferc1.utility_id_ferc1'], name=op.f('fk_electricity_sales_by_rate_schedule_ferc1_utility_id_ferc1_utilities_ferc1')) ) op.create_table('epacamd_eia', sa.Column('report_year', sa.Integer(), nullable=True, comment='Four-digit year in which the data was reported.'), @@ -1620,15 +1707,37 @@ def upgrade() -> None: sa.Column('plant_id_eia', sa.Integer(), nullable=True, comment='The unique six-digit facility identification number, also called an ORISPL, assigned by the Energy Information Administration.'), sa.Column('boiler_id', sa.Text(), nullable=True, comment='Alphanumeric boiler ID.'), sa.Column('generator_id', sa.Text(), nullable=True, comment='Generator ID is usually numeric, but sometimes includes letters. Make sure you treat it as a string!'), - sa.ForeignKeyConstraint(['plant_id_eia', 'boiler_id'], ['boilers_entity_eia.plant_id_eia', 'boilers_entity_eia.boiler_id'], ), - sa.ForeignKeyConstraint(['plant_id_eia', 'generator_id'], ['generators_entity_eia.plant_id_eia', 'generators_entity_eia.generator_id'], ) + sa.ForeignKeyConstraint(['plant_id_eia', 'boiler_id'], ['boilers_entity_eia.plant_id_eia', 'boilers_entity_eia.boiler_id'], name=op.f('fk_epacamd_eia_plant_id_eia_boilers_entity_eia')), + sa.ForeignKeyConstraint(['plant_id_eia', 'generator_id'], ['generators_entity_eia.plant_id_eia', 'generators_entity_eia.generator_id'], name=op.f('fk_epacamd_eia_plant_id_eia_generators_entity_eia')) + ) + op.create_table('fuel_cost_by_generator_monthly', + sa.Column('report_date', sa.Date(), nullable=False, comment='Date reported.'), + sa.Column('plant_id_eia', sa.Integer(), nullable=False, comment='The unique six-digit facility identification number, also called an ORISPL, assigned by the Energy Information Administration.'), + sa.Column('generator_id', sa.Text(), nullable=False, comment='Generator ID is usually numeric, but sometimes includes letters. Make sure you treat it as a string!'), + sa.Column('unit_id_pudl', sa.Integer(), nullable=True, comment='Dynamically assigned PUDL unit id. WARNING: This ID is not guaranteed to be static long term as the input data and algorithm may evolve over time.'), + sa.Column('plant_name_eia', sa.Text(), nullable=True, comment='Plant name.'), + sa.Column('plant_id_pudl', sa.Integer(), nullable=True, comment='A manually assigned PUDL plant ID. May not be constant over time.'), + sa.Column('utility_id_eia', sa.Integer(), nullable=True, comment='The EIA Utility Identification number.'), + sa.Column('utility_name_eia', sa.Text(), nullable=True, comment='The name of the utility.'), + sa.Column('utility_id_pudl', sa.Integer(), nullable=True, comment='A manually assigned PUDL utility ID. May not be stable over time.'), + sa.Column('fuel_type_count', sa.Integer(), nullable=True, comment='A count of how many different simple energy sources there are associated with a generator.'), + sa.Column('fuel_type_code_pudl', sa.Enum('coal', 'gas', 'hydro', 'nuclear', 'oil', 'other', 'solar', 'waste', 'wind'), nullable=True, comment='Simplified fuel type code used in PUDL'), + sa.Column('fuel_cost_from_eiaapi', sa.Boolean(), nullable=True, comment='Indicates whether the fuel cost was derived from the EIA API.'), + sa.Column('fuel_cost_per_mmbtu', sa.Float(), nullable=True, comment='Average fuel cost per mmBTU of heat content in nominal USD.'), + sa.Column('heat_rate_mmbtu_mwh', sa.Float(), nullable=True, comment='Fuel content per unit of electricity generated. Coming from MCOE calculation.'), + sa.Column('fuel_cost_per_mwh', sa.Float(), nullable=True, comment='Derived from MCOE, a unit level value. Average fuel cost per MWh of heat content in nominal USD.'), + sa.ForeignKeyConstraint(['plant_id_eia', 'generator_id'], ['generators_entity_eia.plant_id_eia', 'generators_entity_eia.generator_id'], name=op.f('fk_fuel_cost_by_generator_monthly_plant_id_eia_generators_entity_eia')), + sa.ForeignKeyConstraint(['plant_id_pudl'], ['plants_pudl.plant_id_pudl'], name=op.f('fk_fuel_cost_by_generator_monthly_plant_id_pudl_plants_pudl')), + sa.ForeignKeyConstraint(['utility_id_eia'], ['utilities_entity_eia.utility_id_eia'], name=op.f('fk_fuel_cost_by_generator_monthly_utility_id_eia_utilities_entity_eia')), + sa.ForeignKeyConstraint(['utility_id_pudl'], ['utilities_pudl.utility_id_pudl'], name=op.f('fk_fuel_cost_by_generator_monthly_utility_id_pudl_utilities_pudl')), + sa.PrimaryKeyConstraint('report_date', 'plant_id_eia', 'generator_id', name=op.f('pk_fuel_cost_by_generator_monthly')) ) op.create_table('fuel_receipts_costs_eia923', sa.Column('plant_id_eia', sa.Integer(), nullable=True, comment='The unique six-digit facility identification number, also called an ORISPL, assigned by the Energy Information Administration.'), sa.Column('report_date', sa.Date(), nullable=True, comment='Date reported.'), sa.Column('contract_type_code', sa.Enum('S', 'C', 'NC', 'T'), nullable=True, comment='Purchase type under which receipts occurred in the reporting month. C: Contract, NC: New Contract, S: Spot Purchase, T: Tolling Agreement.'), sa.Column('contract_expiration_date', sa.Date(), nullable=True, comment='Date contract expires.Format: MMYY.'), - sa.Column('energy_source_code', sa.Text(), nullable=True, comment='The fuel code associated with the fuel receipt. Two or three character alphanumeric.'), + sa.Column('energy_source_code', sa.Text(), nullable=True, comment='A 2-3 letter code indicating the energy source (e.g. fuel type) associated with the record.'), sa.Column('fuel_type_code_pudl', sa.Enum('coal', 'gas', 'hydro', 'nuclear', 'oil', 'other', 'solar', 'waste', 'wind'), nullable=True, comment='Simplified fuel type code used in PUDL'), sa.Column('fuel_group_code', sa.Enum('petroleum', 'other_gas', 'petroleum_coke', 'natural_gas', 'coal'), nullable=True, comment='Fuel groups used in the Electric Power Monthly'), sa.Column('mine_id_pudl', sa.Integer(), nullable=True, comment='Dynamically assigned PUDL mine identifier.'), @@ -1646,13 +1755,13 @@ def upgrade() -> None: sa.Column('moisture_content_pct', sa.Float(), nullable=True), sa.Column('chlorine_content_ppm', sa.Float(), nullable=True), sa.Column('data_maturity', sa.Text(), nullable=True, comment='Level of maturity of the data record. Some data sources report less-than-final data. PUDL sometimes includes this data, but use at your own risk.'), - sa.ForeignKeyConstraint(['contract_type_code'], ['contract_types_eia.code'], ), - sa.ForeignKeyConstraint(['data_maturity'], ['data_maturities.code'], ), - sa.ForeignKeyConstraint(['energy_source_code'], ['energy_sources_eia.code'], ), - sa.ForeignKeyConstraint(['mine_id_pudl'], ['coalmine_eia923.mine_id_pudl'], ), - sa.ForeignKeyConstraint(['plant_id_eia'], ['plants_entity_eia.plant_id_eia'], ), - sa.ForeignKeyConstraint(['primary_transportation_mode_code'], ['fuel_transportation_modes_eia.code'], ), - sa.ForeignKeyConstraint(['secondary_transportation_mode_code'], ['fuel_transportation_modes_eia.code'], ) + sa.ForeignKeyConstraint(['contract_type_code'], ['contract_types_eia.code'], name=op.f('fk_fuel_receipts_costs_eia923_contract_type_code_contract_types_eia')), + sa.ForeignKeyConstraint(['data_maturity'], ['data_maturities.code'], name=op.f('fk_fuel_receipts_costs_eia923_data_maturity_data_maturities')), + sa.ForeignKeyConstraint(['energy_source_code'], ['energy_sources_eia.code'], name=op.f('fk_fuel_receipts_costs_eia923_energy_source_code_energy_sources_eia')), + sa.ForeignKeyConstraint(['mine_id_pudl'], ['coalmine_eia923.mine_id_pudl'], name=op.f('fk_fuel_receipts_costs_eia923_mine_id_pudl_coalmine_eia923')), + sa.ForeignKeyConstraint(['plant_id_eia'], ['plants_entity_eia.plant_id_eia'], name=op.f('fk_fuel_receipts_costs_eia923_plant_id_eia_plants_entity_eia')), + sa.ForeignKeyConstraint(['primary_transportation_mode_code'], ['fuel_transportation_modes_eia.code'], name=op.f('fk_fuel_receipts_costs_eia923_primary_transportation_mode_code_fuel_transportation_modes_eia')), + sa.ForeignKeyConstraint(['secondary_transportation_mode_code'], ['fuel_transportation_modes_eia.code'], name=op.f('fk_fuel_receipts_costs_eia923_secondary_transportation_mode_code_fuel_transportation_modes_eia')) ) op.create_table('generation_eia923', sa.Column('plant_id_eia', sa.Integer(), nullable=False, comment='The unique six-digit facility identification number, also called an ORISPL, assigned by the Energy Information Administration.'), @@ -1660,9 +1769,56 @@ def upgrade() -> None: sa.Column('report_date', sa.Date(), nullable=False, comment='Date reported.'), sa.Column('net_generation_mwh', sa.Float(), nullable=True, comment='Net electricity generation for the specified period in megawatt-hours (MWh).'), sa.Column('data_maturity', sa.Text(), nullable=True, comment='Level of maturity of the data record. Some data sources report less-than-final data. PUDL sometimes includes this data, but use at your own risk.'), - sa.ForeignKeyConstraint(['data_maturity'], ['data_maturities.code'], ), - sa.ForeignKeyConstraint(['plant_id_eia', 'generator_id'], ['generators_entity_eia.plant_id_eia', 'generators_entity_eia.generator_id'], ), - sa.PrimaryKeyConstraint('plant_id_eia', 'generator_id', 'report_date') + sa.ForeignKeyConstraint(['data_maturity'], ['data_maturities.code'], name=op.f('fk_generation_eia923_data_maturity_data_maturities')), + sa.ForeignKeyConstraint(['plant_id_eia', 'generator_id'], ['generators_entity_eia.plant_id_eia', 'generators_entity_eia.generator_id'], name=op.f('fk_generation_eia923_plant_id_eia_generators_entity_eia')), + sa.PrimaryKeyConstraint('plant_id_eia', 'generator_id', 'report_date', name=op.f('pk_generation_eia923')) + ) + op.create_table('generation_fuel_by_generator_energy_source_monthly_eia923', + sa.Column('report_date', sa.Date(), nullable=False, comment='Date reported.'), + sa.Column('plant_id_eia', sa.Integer(), nullable=False, comment='The unique six-digit facility identification number, also called an ORISPL, assigned by the Energy Information Administration.'), + sa.Column('generator_id', sa.Text(), nullable=False, comment='Generator ID is usually numeric, but sometimes includes letters. Make sure you treat it as a string!'), + sa.Column('prime_mover_code', sa.Text(), nullable=False, comment='Code for the type of prime mover (e.g. CT, CG)'), + sa.Column('energy_source_code', sa.Text(), nullable=False, comment='A 2-3 letter code indicating the energy source (e.g. fuel type) associated with the record.'), + sa.Column('energy_source_code_num', sa.Enum('energy_source_code_1', 'energy_source_code_2', 'energy_source_code_3', 'energy_source_code_4', 'energy_source_code_5', 'energy_source_code_6', 'energy_source_code_7', 'energy_source_code_8'), nullable=True, comment='Name of the energy_source_code_N column that this energy source code was reported in for the generator referenced in the same record.'), + sa.Column('net_generation_mwh', sa.Float(), nullable=True, comment='Net electricity generation for the specified period in megawatt-hours (MWh).'), + sa.Column('fuel_consumed_mmbtu', sa.Float(), nullable=True, comment='Total consumption of fuel in physical unit, year to date. Note: this is the total quantity consumed for both electricity and, in the case of combined heat and power plants, process steam production.'), + sa.Column('fuel_consumed_for_electricity_mmbtu', sa.Float(), nullable=True, comment='Total consumption of fuel to produce electricity, in physical unit, year to date.'), + sa.ForeignKeyConstraint(['energy_source_code'], ['energy_sources_eia.code'], name=op.f('fk_generation_fuel_by_generator_energy_source_monthly_eia923_energy_source_code_energy_sources_eia')), + sa.ForeignKeyConstraint(['plant_id_eia', 'generator_id'], ['generators_entity_eia.plant_id_eia', 'generators_entity_eia.generator_id'], name=op.f('fk_generation_fuel_by_generator_energy_source_monthly_eia923_plant_id_eia_generators_entity_eia')), + sa.ForeignKeyConstraint(['prime_mover_code'], ['prime_movers_eia.code'], name=op.f('fk_generation_fuel_by_generator_energy_source_monthly_eia923_prime_mover_code_prime_movers_eia')), + sa.PrimaryKeyConstraint('report_date', 'plant_id_eia', 'generator_id', 'prime_mover_code', 'energy_source_code', name=op.f('pk_generation_fuel_by_generator_energy_source_monthly_eia923')) + ) + op.create_table('generation_fuel_by_generator_monthly_eia923', + sa.Column('report_date', sa.Date(), nullable=False, comment='Date reported.'), + sa.Column('plant_id_eia', sa.Integer(), nullable=False, comment='The unique six-digit facility identification number, also called an ORISPL, assigned by the Energy Information Administration.'), + sa.Column('plant_id_pudl', sa.Integer(), nullable=True, comment='A manually assigned PUDL plant ID. May not be constant over time.'), + sa.Column('plant_name_eia', sa.Text(), nullable=True, comment='Plant name.'), + sa.Column('utility_id_eia', sa.Integer(), nullable=True, comment='The EIA Utility Identification number.'), + sa.Column('utility_id_pudl', sa.Integer(), nullable=True, comment='A manually assigned PUDL utility ID. May not be stable over time.'), + sa.Column('utility_name_eia', sa.Text(), nullable=True, comment='The name of the utility.'), + sa.Column('generator_id', sa.Text(), nullable=False, comment='Generator ID is usually numeric, but sometimes includes letters. Make sure you treat it as a string!'), + sa.Column('unit_id_pudl', sa.Integer(), nullable=True, comment='Dynamically assigned PUDL unit id. WARNING: This ID is not guaranteed to be static long term as the input data and algorithm may evolve over time.'), + sa.Column('fuel_consumed_for_electricity_mmbtu', sa.Float(), nullable=True, comment='Total consumption of fuel to produce electricity, in physical unit, year to date.'), + sa.Column('fuel_consumed_mmbtu', sa.Float(), nullable=True, comment='Total consumption of fuel in physical unit, year to date. Note: this is the total quantity consumed for both electricity and, in the case of combined heat and power plants, process steam production.'), + sa.Column('net_generation_mwh', sa.Float(), nullable=True, comment='Net electricity generation for the specified period in megawatt-hours (MWh).'), + sa.ForeignKeyConstraint(['plant_id_eia', 'generator_id'], ['generators_entity_eia.plant_id_eia', 'generators_entity_eia.generator_id'], name=op.f('fk_generation_fuel_by_generator_monthly_eia923_plant_id_eia_generators_entity_eia')), + sa.ForeignKeyConstraint(['plant_id_pudl'], ['plants_pudl.plant_id_pudl'], name=op.f('fk_generation_fuel_by_generator_monthly_eia923_plant_id_pudl_plants_pudl')), + sa.ForeignKeyConstraint(['utility_id_eia'], ['utilities_entity_eia.utility_id_eia'], name=op.f('fk_generation_fuel_by_generator_monthly_eia923_utility_id_eia_utilities_entity_eia')), + sa.ForeignKeyConstraint(['utility_id_pudl'], ['utilities_pudl.utility_id_pudl'], name=op.f('fk_generation_fuel_by_generator_monthly_eia923_utility_id_pudl_utilities_pudl')), + sa.PrimaryKeyConstraint('report_date', 'plant_id_eia', 'generator_id', name=op.f('pk_generation_fuel_by_generator_monthly_eia923')) + ) + op.create_table('heat_rate_by_generator_monthly', + sa.Column('report_date', sa.Date(), nullable=False, comment='Date reported.'), + sa.Column('plant_id_eia', sa.Integer(), nullable=False, comment='The unique six-digit facility identification number, also called an ORISPL, assigned by the Energy Information Administration.'), + sa.Column('unit_id_pudl', sa.Integer(), nullable=True, comment='Dynamically assigned PUDL unit id. WARNING: This ID is not guaranteed to be static long term as the input data and algorithm may evolve over time.'), + sa.Column('generator_id', sa.Text(), nullable=False, comment='Generator ID is usually numeric, but sometimes includes letters. Make sure you treat it as a string!'), + sa.Column('heat_rate_mmbtu_mwh', sa.Float(), nullable=True, comment='Fuel content per unit of electricity generated. Coming from MCOE calculation.'), + sa.Column('fuel_type_code_pudl', sa.Enum('coal', 'gas', 'hydro', 'nuclear', 'oil', 'other', 'solar', 'waste', 'wind'), nullable=True, comment='Simplified fuel type code used in PUDL'), + sa.Column('fuel_type_count', sa.Integer(), nullable=True, comment='A count of how many different simple energy sources there are associated with a generator.'), + sa.Column('prime_mover_code', sa.Text(), nullable=True, comment='Code for the type of prime mover (e.g. CT, CG)'), + sa.ForeignKeyConstraint(['plant_id_eia', 'generator_id'], ['generators_entity_eia.plant_id_eia', 'generators_entity_eia.generator_id'], name=op.f('fk_heat_rate_by_generator_monthly_plant_id_eia_generators_entity_eia')), + sa.ForeignKeyConstraint(['prime_mover_code'], ['prime_movers_eia.code'], name=op.f('fk_heat_rate_by_generator_monthly_prime_mover_code_prime_movers_eia')), + sa.PrimaryKeyConstraint('report_date', 'plant_id_eia', 'generator_id', name=op.f('pk_heat_rate_by_generator_monthly')) ) op.create_table('income_statement_ferc1', sa.Column('record_id', sa.Text(), nullable=True, comment='Identifier indicating original FERC Form 1 source record. format: {table_name}_{report_year}_{report_prd}_{respondent_id}_{spplmnt_num}_{row_number}. Unique within FERC Form 1 DB tables which are not row-mapped.'), @@ -1674,8 +1830,59 @@ def upgrade() -> None: sa.Column('balance', sa.Text(), nullable=True, comment='Indication of whether a column is a credit or debit, as reported in the XBRL taxonomy.'), sa.Column('ferc_account', sa.Text(), nullable=True, comment="Actual FERC Account number (e.g. '359.1') if available, or a PUDL assigned ID when FERC accounts have been split or combined in reporting."), sa.Column('row_type_xbrl', sa.Enum('calculated_value', 'reported_value', 'correction'), nullable=True, comment='Indicates whether the value reported in the row is calculated, or uniquely reported within the table.'), - sa.ForeignKeyConstraint(['utility_id_ferc1'], ['utilities_ferc1.utility_id_ferc1'], ), - sa.PrimaryKeyConstraint('utility_id_ferc1', 'report_year', 'utility_type', 'income_type') + sa.ForeignKeyConstraint(['utility_id_ferc1'], ['utilities_ferc1.utility_id_ferc1'], name=op.f('fk_income_statement_ferc1_utility_id_ferc1_utilities_ferc1')), + sa.PrimaryKeyConstraint('utility_id_ferc1', 'report_year', 'utility_type', 'income_type', name=op.f('pk_income_statement_ferc1')) + ) + op.create_table('mcoe_generators_monthly', + sa.Column('plant_id_eia', sa.Integer(), nullable=False, comment='The unique six-digit facility identification number, also called an ORISPL, assigned by the Energy Information Administration.'), + sa.Column('generator_id', sa.Text(), nullable=False, comment='Generator ID is usually numeric, but sometimes includes letters. Make sure you treat it as a string!'), + sa.Column('report_date', sa.Date(), nullable=False, comment='Date reported.'), + sa.Column('unit_id_pudl', sa.Integer(), nullable=True, comment='Dynamically assigned PUDL unit id. WARNING: This ID is not guaranteed to be static long term as the input data and algorithm may evolve over time.'), + sa.Column('plant_id_pudl', sa.Integer(), nullable=True, comment='A manually assigned PUDL plant ID. May not be constant over time.'), + sa.Column('plant_name_eia', sa.Text(), nullable=True, comment='Plant name.'), + sa.Column('utility_id_eia', sa.Integer(), nullable=True, comment='The EIA Utility Identification number.'), + sa.Column('utility_id_pudl', sa.Integer(), nullable=True, comment='A manually assigned PUDL utility ID. May not be stable over time.'), + sa.Column('utility_name_eia', sa.Text(), nullable=True, comment='The name of the utility.'), + sa.Column('technology_description', sa.Text(), nullable=True, comment='High level description of the technology used by the generator to produce electricity.'), + sa.Column('energy_source_code_1', sa.Text(), nullable=True, comment='The code representing the most predominant type of energy that fuels the generator.'), + sa.Column('prime_mover_code', sa.Text(), nullable=True, comment='Code for the type of prime mover (e.g. CT, CG)'), + sa.Column('generator_operating_date', sa.Date(), nullable=True, comment='Date the generator began commercial operation.'), + sa.Column('generator_retirement_date', sa.Date(), nullable=True, comment='Date of the scheduled or effected retirement of the generator.'), + sa.Column('operational_status', sa.Text(), nullable=True, comment='The operating status of the asset. For generators this is based on which tab the generator was listed in in EIA 860.'), + sa.Column('capacity_mw', sa.Float(), nullable=True, comment='Total installed (nameplate) capacity, in megawatts.'), + sa.Column('fuel_type_code_pudl', sa.Enum('coal', 'gas', 'hydro', 'nuclear', 'oil', 'other', 'solar', 'waste', 'wind'), nullable=True, comment='Simplified fuel type code used in PUDL'), + sa.Column('planned_generator_retirement_date', sa.Date(), nullable=True, comment='Planned effective date of the scheduled retirement of the generator.'), + sa.Column('capacity_factor', sa.Float(), nullable=True, comment='Fraction of potential generation that was actually reported for a plant part.'), + sa.Column('fuel_cost_from_eiaapi', sa.Boolean(), nullable=True, comment='Indicates whether the fuel cost was derived from the EIA API.'), + sa.Column('fuel_cost_per_mmbtu', sa.Float(), nullable=True, comment='Average fuel cost per mmBTU of heat content in nominal USD.'), + sa.Column('fuel_cost_per_mwh', sa.Float(), nullable=True, comment='Derived from MCOE, a unit level value. Average fuel cost per MWh of heat content in nominal USD.'), + sa.Column('heat_rate_mmbtu_mwh', sa.Float(), nullable=True, comment='Fuel content per unit of electricity generated. Coming from MCOE calculation.'), + sa.Column('net_generation_mwh', sa.Float(), nullable=True, comment='Net electricity generation for the specified period in megawatt-hours (MWh).'), + sa.Column('total_fuel_cost', sa.Float(), nullable=True, comment='Total annual reported fuel costs for the plant part. Includes costs from all fuels.'), + sa.Column('total_mmbtu', sa.Float(), nullable=True, comment='Total annual heat content of fuel consumed by a plant part record in the plant parts list.'), + sa.ForeignKeyConstraint(['energy_source_code_1'], ['energy_sources_eia.code'], name=op.f('fk_mcoe_generators_monthly_energy_source_code_1_energy_sources_eia')), + sa.ForeignKeyConstraint(['plant_id_eia', 'generator_id'], ['generators_entity_eia.plant_id_eia', 'generators_entity_eia.generator_id'], name=op.f('fk_mcoe_generators_monthly_plant_id_eia_generators_entity_eia')), + sa.ForeignKeyConstraint(['plant_id_pudl'], ['plants_pudl.plant_id_pudl'], name=op.f('fk_mcoe_generators_monthly_plant_id_pudl_plants_pudl')), + sa.ForeignKeyConstraint(['prime_mover_code'], ['prime_movers_eia.code'], name=op.f('fk_mcoe_generators_monthly_prime_mover_code_prime_movers_eia')), + sa.ForeignKeyConstraint(['utility_id_eia'], ['utilities_entity_eia.utility_id_eia'], name=op.f('fk_mcoe_generators_monthly_utility_id_eia_utilities_entity_eia')), + sa.ForeignKeyConstraint(['utility_id_pudl'], ['utilities_pudl.utility_id_pudl'], name=op.f('fk_mcoe_generators_monthly_utility_id_pudl_utilities_pudl')), + sa.PrimaryKeyConstraint('report_date', 'plant_id_eia', 'generator_id', name=op.f('pk_mcoe_generators_monthly')) + ) + op.create_table('mcoe_monthly', + sa.Column('plant_id_eia', sa.Integer(), nullable=False, comment='The unique six-digit facility identification number, also called an ORISPL, assigned by the Energy Information Administration.'), + sa.Column('generator_id', sa.Text(), nullable=False, comment='Generator ID is usually numeric, but sometimes includes letters. Make sure you treat it as a string!'), + sa.Column('unit_id_pudl', sa.Integer(), nullable=True, comment='Dynamically assigned PUDL unit id. WARNING: This ID is not guaranteed to be static long term as the input data and algorithm may evolve over time.'), + sa.Column('report_date', sa.Date(), nullable=False, comment='Date reported.'), + sa.Column('capacity_factor', sa.Float(), nullable=True, comment='Fraction of potential generation that was actually reported for a plant part.'), + sa.Column('fuel_cost_from_eiaapi', sa.Boolean(), nullable=True, comment='Indicates whether the fuel cost was derived from the EIA API.'), + sa.Column('fuel_cost_per_mmbtu', sa.Float(), nullable=True, comment='Average fuel cost per mmBTU of heat content in nominal USD.'), + sa.Column('fuel_cost_per_mwh', sa.Float(), nullable=True, comment='Derived from MCOE, a unit level value. Average fuel cost per MWh of heat content in nominal USD.'), + sa.Column('heat_rate_mmbtu_mwh', sa.Float(), nullable=True, comment='Fuel content per unit of electricity generated. Coming from MCOE calculation.'), + sa.Column('net_generation_mwh', sa.Float(), nullable=True, comment='Net electricity generation for the specified period in megawatt-hours (MWh).'), + sa.Column('total_fuel_cost', sa.Float(), nullable=True, comment='Total annual reported fuel costs for the plant part. Includes costs from all fuels.'), + sa.Column('total_mmbtu', sa.Float(), nullable=True, comment='Total annual heat content of fuel consumed by a plant part record in the plant parts list.'), + sa.ForeignKeyConstraint(['plant_id_eia', 'generator_id'], ['generators_entity_eia.plant_id_eia', 'generators_entity_eia.generator_id'], name=op.f('fk_mcoe_monthly_plant_id_eia_generators_entity_eia')), + sa.PrimaryKeyConstraint('report_date', 'plant_id_eia', 'generator_id', name=op.f('pk_mcoe_monthly')) ) op.create_table('other_regulatory_liabilities_ferc1', sa.Column('utility_id_ferc1', sa.Integer(), nullable=True, comment='PUDL-assigned utility ID, identifying a FERC1 utility. This is an auto-incremented ID and is not expected to be stable from year to year.'), @@ -1686,7 +1893,7 @@ def upgrade() -> None: sa.Column('increase_in_other_regulatory_liabilities', sa.Float(), nullable=True, comment='The increase during the reporting period of other regulatory liabilities.'), sa.Column('account_detail', sa.Text(), nullable=True, comment='Description of the account number credited from making debit adjustment to other regulatory liabilities.'), sa.Column('decrease_in_other_regulatory_liabilities', sa.Float(), nullable=True, comment='The decrease during the reporting period of other regulatory liabilities.'), - sa.ForeignKeyConstraint(['utility_id_ferc1'], ['utilities_ferc1.utility_id_ferc1'], ) + sa.ForeignKeyConstraint(['utility_id_ferc1'], ['utilities_ferc1.utility_id_ferc1'], name=op.f('fk_other_regulatory_liabilities_ferc1_utility_id_ferc1_utilities_ferc1')) ) op.create_table('plant_in_service_ferc1', sa.Column('utility_id_ferc1', sa.Integer(), nullable=False, comment='PUDL-assigned utility ID, identifying a FERC1 utility. This is an auto-incremented ID and is not expected to be stable from year to year.'), @@ -1701,8 +1908,8 @@ def upgrade() -> None: sa.Column('transfers', sa.Float(), nullable=True, comment='Cost of transfers into (out of) the account.'), sa.Column('ending_balance', sa.Float(), nullable=True, comment='Account balance at end of year.'), sa.Column('record_id', sa.Text(), nullable=True, comment='Identifier indicating original FERC Form 1 source record. format: {table_name}_{report_year}_{report_prd}_{respondent_id}_{spplmnt_num}_{row_number}. Unique within FERC Form 1 DB tables which are not row-mapped.'), - sa.ForeignKeyConstraint(['utility_id_ferc1'], ['utilities_ferc1.utility_id_ferc1'], ), - sa.PrimaryKeyConstraint('utility_id_ferc1', 'report_year', 'ferc_account_label') + sa.ForeignKeyConstraint(['utility_id_ferc1'], ['utilities_ferc1.utility_id_ferc1'], name=op.f('fk_plant_in_service_ferc1_utility_id_ferc1_utilities_ferc1')), + sa.PrimaryKeyConstraint('utility_id_ferc1', 'report_year', 'ferc_account_label', name=op.f('pk_plant_in_service_ferc1')) ) op.create_table('plants_eia860', sa.Column('plant_id_eia', sa.Integer(), nullable=False, comment='The unique six-digit facility identification number, also called an ORISPL, assigned by the Energy Information Administration.'), @@ -1746,21 +1953,21 @@ def upgrade() -> None: sa.Column('utility_id_eia', sa.Integer(), nullable=True, comment='The EIA Utility Identification number.'), sa.Column('water_source', sa.Text(), nullable=True, comment='Name of water source associated with the plant.'), sa.Column('data_maturity', sa.Text(), nullable=True, comment='Level of maturity of the data record. Some data sources report less-than-final data. PUDL sometimes includes this data, but use at your own risk.'), - sa.ForeignKeyConstraint(['balancing_authority_code_eia'], ['balancing_authorities_eia.code'], ), - sa.ForeignKeyConstraint(['data_maturity'], ['data_maturities.code'], ), - sa.ForeignKeyConstraint(['plant_id_eia'], ['plants_entity_eia.plant_id_eia'], ), - sa.ForeignKeyConstraint(['reporting_frequency_code'], ['reporting_frequencies_eia.code'], ), - sa.ForeignKeyConstraint(['sector_id_eia'], ['sector_consolidated_eia.code'], ), - sa.ForeignKeyConstraint(['utility_id_eia', 'report_date'], ['utilities_eia860.utility_id_eia', 'utilities_eia860.report_date'], ), - sa.PrimaryKeyConstraint('plant_id_eia', 'report_date') + sa.ForeignKeyConstraint(['balancing_authority_code_eia'], ['balancing_authorities_eia.code'], name=op.f('fk_plants_eia860_balancing_authority_code_eia_balancing_authorities_eia')), + sa.ForeignKeyConstraint(['data_maturity'], ['data_maturities.code'], name=op.f('fk_plants_eia860_data_maturity_data_maturities')), + sa.ForeignKeyConstraint(['plant_id_eia'], ['plants_entity_eia.plant_id_eia'], name=op.f('fk_plants_eia860_plant_id_eia_plants_entity_eia')), + sa.ForeignKeyConstraint(['reporting_frequency_code'], ['reporting_frequencies_eia.code'], name=op.f('fk_plants_eia860_reporting_frequency_code_reporting_frequencies_eia')), + sa.ForeignKeyConstraint(['sector_id_eia'], ['sector_consolidated_eia.code'], name=op.f('fk_plants_eia860_sector_id_eia_sector_consolidated_eia')), + sa.ForeignKeyConstraint(['utility_id_eia', 'report_date'], ['utilities_eia860.utility_id_eia', 'utilities_eia860.report_date'], name=op.f('fk_plants_eia860_utility_id_eia_utilities_eia860')), + sa.PrimaryKeyConstraint('plant_id_eia', 'report_date', name=op.f('pk_plants_eia860')) ) op.create_table('plants_ferc1', sa.Column('utility_id_ferc1', sa.Integer(), nullable=False, comment='PUDL-assigned utility ID, identifying a FERC1 utility. This is an auto-incremented ID and is not expected to be stable from year to year.'), sa.Column('plant_name_ferc1', sa.Text(), nullable=False, comment='Name of the plant, as reported to FERC. This is a freeform string, not guaranteed to be consistent across references to the same plant.'), sa.Column('plant_id_pudl', sa.Integer(), nullable=True, comment='A manually assigned PUDL plant ID. May not be constant over time.'), - sa.ForeignKeyConstraint(['plant_id_pudl'], ['plants_pudl.plant_id_pudl'], ), - sa.ForeignKeyConstraint(['utility_id_ferc1'], ['utilities_ferc1.utility_id_ferc1'], ), - sa.PrimaryKeyConstraint('utility_id_ferc1', 'plant_name_ferc1') + sa.ForeignKeyConstraint(['plant_id_pudl'], ['plants_pudl.plant_id_pudl'], name=op.f('fk_plants_ferc1_plant_id_pudl_plants_pudl')), + sa.ForeignKeyConstraint(['utility_id_ferc1'], ['utilities_ferc1.utility_id_ferc1'], name=op.f('fk_plants_ferc1_utility_id_ferc1_utilities_ferc1')), + sa.PrimaryKeyConstraint('utility_id_ferc1', 'plant_name_ferc1', name=op.f('pk_plants_ferc1')) ) op.create_table('purchased_power_ferc1', sa.Column('record_id', sa.Text(), nullable=True, comment='Identifier indicating original FERC Form 1 source record. format: {table_name}_{report_year}_{report_prd}_{respondent_id}_{spplmnt_num}_{row_number}. Unique within FERC Form 1 DB tables which are not row-mapped.'), @@ -1779,22 +1986,21 @@ def upgrade() -> None: sa.Column('energy_charges', sa.Float(), nullable=True, comment='Energy charges (USD).'), sa.Column('other_charges', sa.Float(), nullable=True, comment='Other charges, including out-of-period adjustments (USD).'), sa.Column('total_settlement', sa.Float(), nullable=True, comment='Sum of demand, energy, and other charges (USD). For power exchanges, the settlement amount for the net receipt of energy. If more energy was delivered than received, this amount is negative.'), - sa.ForeignKeyConstraint(['purchase_type_code'], ['power_purchase_types_ferc1.code'], ), - sa.ForeignKeyConstraint(['utility_id_ferc1'], ['utilities_ferc1.utility_id_ferc1'], ) + sa.ForeignKeyConstraint(['purchase_type_code'], ['power_purchase_types_ferc1.code'], name=op.f('fk_purchased_power_ferc1_purchase_type_code_power_purchase_types_ferc1')), + sa.ForeignKeyConstraint(['utility_id_ferc1'], ['utilities_ferc1.utility_id_ferc1'], name=op.f('fk_purchased_power_ferc1_utility_id_ferc1_utilities_ferc1')) ) op.create_table('retained_earnings_ferc1', sa.Column('utility_id_ferc1', sa.Integer(), nullable=False, comment='PUDL-assigned utility ID, identifying a FERC1 utility. This is an auto-incremented ID and is not expected to be stable from year to year.'), sa.Column('report_year', sa.Integer(), nullable=False, comment='Four-digit year in which the data was reported.'), sa.Column('record_id', sa.Text(), nullable=True, comment='Identifier indicating original FERC Form 1 source record. format: {table_name}_{report_year}_{report_prd}_{respondent_id}_{spplmnt_num}_{row_number}. Unique within FERC Form 1 DB tables which are not row-mapped.'), sa.Column('earnings_type', sa.Text(), nullable=False, comment='Label describing types of earnings.'), - sa.Column('amount', sa.Float(), nullable=True, comment='Reported amount of dollars. This could be a balance or a change in value.'), sa.Column('starting_balance', sa.Float(), nullable=True, comment='Account balance at beginning of year.'), sa.Column('ending_balance', sa.Float(), nullable=True, comment='Account balance at end of year.'), sa.Column('balance', sa.Text(), nullable=True, comment='Indication of whether a column is a credit or debit, as reported in the XBRL taxonomy.'), sa.Column('ferc_account', sa.Text(), nullable=True, comment="Actual FERC Account number (e.g. '359.1') if available, or a PUDL assigned ID when FERC accounts have been split or combined in reporting."), sa.Column('row_type_xbrl', sa.Enum('calculated_value', 'reported_value', 'correction'), nullable=True, comment='Indicates whether the value reported in the row is calculated, or uniquely reported within the table.'), - sa.ForeignKeyConstraint(['utility_id_ferc1'], ['utilities_ferc1.utility_id_ferc1'], ), - sa.PrimaryKeyConstraint('utility_id_ferc1', 'report_year', 'earnings_type') + sa.ForeignKeyConstraint(['utility_id_ferc1'], ['utilities_ferc1.utility_id_ferc1'], name=op.f('fk_retained_earnings_ferc1_utility_id_ferc1_utilities_ferc1')), + sa.PrimaryKeyConstraint('utility_id_ferc1', 'report_year', 'earnings_type', name=op.f('pk_retained_earnings_ferc1')) ) op.create_table('transmission_statistics_ferc1', sa.Column('record_id', sa.Text(), nullable=True, comment='Identifier indicating original FERC Form 1 source record. format: {table_name}_{report_year}_{report_prd}_{respondent_id}_{spplmnt_num}_{row_number}. Unique within FERC Form 1 DB tables which are not row-mapped.'), @@ -1816,19 +2022,19 @@ def upgrade() -> None: sa.Column('opex_maintenance', sa.Float(), nullable=True, comment='Maintenance expenses for the transmission line.'), sa.Column('opex_rents', sa.Float(), nullable=True, comment='Rent expenses for the transmission line.'), sa.Column('opex_total', sa.Float(), nullable=True, comment='Overall expenses for the transmission line.'), - sa.ForeignKeyConstraint(['utility_id_ferc1'], ['utilities_ferc1.utility_id_ferc1'], ) + sa.ForeignKeyConstraint(['utility_id_ferc1'], ['utilities_ferc1.utility_id_ferc1'], name=op.f('fk_transmission_statistics_ferc1_utility_id_ferc1_utilities_ferc1')) ) op.create_table('utilities_ferc1_dbf', sa.Column('utility_id_ferc1', sa.Integer(), nullable=True, comment='PUDL-assigned utility ID, identifying a FERC1 utility. This is an auto-incremented ID and is not expected to be stable from year to year.'), sa.Column('utility_id_ferc1_dbf', sa.Integer(), nullable=False, comment='FERC-assigned respondent_id from DBF reporting years, identifying the reporting entity. Stable from year to year.'), - sa.ForeignKeyConstraint(['utility_id_ferc1'], ['utilities_ferc1.utility_id_ferc1'], ), - sa.PrimaryKeyConstraint('utility_id_ferc1_dbf') + sa.ForeignKeyConstraint(['utility_id_ferc1'], ['utilities_ferc1.utility_id_ferc1'], name=op.f('fk_utilities_ferc1_dbf_utility_id_ferc1_utilities_ferc1')), + sa.PrimaryKeyConstraint('utility_id_ferc1_dbf', name=op.f('pk_utilities_ferc1_dbf')) ) op.create_table('utilities_ferc1_xbrl', sa.Column('utility_id_ferc1', sa.Integer(), nullable=True, comment='PUDL-assigned utility ID, identifying a FERC1 utility. This is an auto-incremented ID and is not expected to be stable from year to year.'), sa.Column('utility_id_ferc1_xbrl', sa.Text(), nullable=False, comment='FERC-assigned entity_id from XBRL reporting years, identifying the reporting entity. Stable from year to year.'), - sa.ForeignKeyConstraint(['utility_id_ferc1'], ['utilities_ferc1.utility_id_ferc1'], ), - sa.PrimaryKeyConstraint('utility_id_ferc1_xbrl') + sa.ForeignKeyConstraint(['utility_id_ferc1'], ['utilities_ferc1.utility_id_ferc1'], name=op.f('fk_utilities_ferc1_xbrl_utility_id_ferc1_utilities_ferc1')), + sa.PrimaryKeyConstraint('utility_id_ferc1_xbrl', name=op.f('pk_utilities_ferc1_xbrl')) ) op.create_table('utility_plant_summary_ferc1', sa.Column('utility_id_ferc1', sa.Integer(), nullable=False, comment='PUDL-assigned utility ID, identifying a FERC1 utility. This is an auto-incremented ID and is not expected to be stable from year to year.'), @@ -1839,8 +2045,8 @@ def upgrade() -> None: sa.Column('row_type_xbrl', sa.Enum('calculated_value', 'reported_value', 'correction'), nullable=True, comment='Indicates whether the value reported in the row is calculated, or uniquely reported within the table.'), sa.Column('ending_balance', sa.Float(), nullable=True, comment='Account balance at end of year.'), sa.Column('record_id', sa.Text(), nullable=True, comment='Identifier indicating original FERC Form 1 source record. format: {table_name}_{report_year}_{report_prd}_{respondent_id}_{spplmnt_num}_{row_number}. Unique within FERC Form 1 DB tables which are not row-mapped.'), - sa.ForeignKeyConstraint(['utility_id_ferc1'], ['utilities_ferc1.utility_id_ferc1'], ), - sa.PrimaryKeyConstraint('utility_id_ferc1', 'report_year', 'utility_type', 'utility_plant_asset_type') + sa.ForeignKeyConstraint(['utility_id_ferc1'], ['utilities_ferc1.utility_id_ferc1'], name=op.f('fk_utility_plant_summary_ferc1_utility_id_ferc1_utilities_ferc1')), + sa.PrimaryKeyConstraint('utility_id_ferc1', 'report_year', 'utility_type', 'utility_plant_asset_type', name=op.f('pk_utility_plant_summary_ferc1')) ) op.create_table('boilers_eia860', sa.Column('plant_id_eia', sa.Integer(), nullable=False, comment='The unique six-digit facility identification number, also called an ORISPL, assigned by the Energy Information Administration.'), @@ -1934,59 +2140,59 @@ def upgrade() -> None: sa.Column('so2_control_proposed_strategy_3', sa.Text(), nullable=True, comment='Proposed strategy to comply with the most stringent sulfur dioxide regulation.'), sa.Column('standard_so2_percent_scrubbed', sa.Float(), nullable=True, comment='The percent of sulfur dioxide to be scrubbed specified by the most stringent sulfur dioxide regulation.'), sa.Column('data_maturity', sa.Text(), nullable=True, comment='Level of maturity of the data record. Some data sources report less-than-final data. PUDL sometimes includes this data, but use at your own risk.'), - sa.ForeignKeyConstraint(['boiler_fuel_code_1'], ['energy_sources_eia.code'], ), - sa.ForeignKeyConstraint(['boiler_fuel_code_2'], ['energy_sources_eia.code'], ), - sa.ForeignKeyConstraint(['boiler_fuel_code_3'], ['energy_sources_eia.code'], ), - sa.ForeignKeyConstraint(['boiler_fuel_code_4'], ['energy_sources_eia.code'], ), - sa.ForeignKeyConstraint(['boiler_status'], ['boiler_status_eia.code'], ), - sa.ForeignKeyConstraint(['boiler_type'], ['boiler_types_eia.code'], ), - sa.ForeignKeyConstraint(['data_maturity'], ['data_maturities.code'], ), - sa.ForeignKeyConstraint(['firing_type_1'], ['firing_types_eia.code'], ), - sa.ForeignKeyConstraint(['firing_type_2'], ['firing_types_eia.code'], ), - sa.ForeignKeyConstraint(['firing_type_3'], ['firing_types_eia.code'], ), - sa.ForeignKeyConstraint(['mercury_control_existing_strategy_1'], ['mercury_compliance_strategies_eia.code'], ), - sa.ForeignKeyConstraint(['mercury_control_existing_strategy_2'], ['mercury_compliance_strategies_eia.code'], ), - sa.ForeignKeyConstraint(['mercury_control_existing_strategy_3'], ['mercury_compliance_strategies_eia.code'], ), - sa.ForeignKeyConstraint(['mercury_control_existing_strategy_4'], ['mercury_compliance_strategies_eia.code'], ), - sa.ForeignKeyConstraint(['mercury_control_existing_strategy_5'], ['mercury_compliance_strategies_eia.code'], ), - sa.ForeignKeyConstraint(['mercury_control_existing_strategy_6'], ['mercury_compliance_strategies_eia.code'], ), - sa.ForeignKeyConstraint(['mercury_control_proposed_strategy_1'], ['mercury_compliance_strategies_eia.code'], ), - sa.ForeignKeyConstraint(['mercury_control_proposed_strategy_2'], ['mercury_compliance_strategies_eia.code'], ), - sa.ForeignKeyConstraint(['mercury_control_proposed_strategy_3'], ['mercury_compliance_strategies_eia.code'], ), - sa.ForeignKeyConstraint(['nox_control_existing_caaa_compliance_strategy_1'], ['nox_compliance_strategies_eia.code'], ), - sa.ForeignKeyConstraint(['nox_control_existing_caaa_compliance_strategy_2'], ['nox_compliance_strategies_eia.code'], ), - sa.ForeignKeyConstraint(['nox_control_existing_caaa_compliance_strategy_3'], ['nox_compliance_strategies_eia.code'], ), - sa.ForeignKeyConstraint(['nox_control_manufacturer_code'], ['environmental_equipment_manufacturers_eia.code'], ), - sa.ForeignKeyConstraint(['nox_control_out_of_compliance_strategy_1'], ['nox_compliance_strategies_eia.code'], ), - sa.ForeignKeyConstraint(['nox_control_out_of_compliance_strategy_2'], ['nox_compliance_strategies_eia.code'], ), - sa.ForeignKeyConstraint(['nox_control_out_of_compliance_strategy_3'], ['nox_compliance_strategies_eia.code'], ), - sa.ForeignKeyConstraint(['nox_control_planned_caaa_compliance_strategy_1'], ['nox_compliance_strategies_eia.code'], ), - sa.ForeignKeyConstraint(['nox_control_planned_caaa_compliance_strategy_2'], ['nox_compliance_strategies_eia.code'], ), - sa.ForeignKeyConstraint(['nox_control_planned_caaa_compliance_strategy_3'], ['nox_compliance_strategies_eia.code'], ), - sa.ForeignKeyConstraint(['nox_control_status_code'], ['nox_control_status_eia.code'], ), - sa.ForeignKeyConstraint(['particulate_control_out_of_compliance_strategy_1'], ['particulate_compliance_strategies_eia.code'], ), - sa.ForeignKeyConstraint(['particulate_control_out_of_compliance_strategy_2'], ['particulate_compliance_strategies_eia.code'], ), - sa.ForeignKeyConstraint(['particulate_control_out_of_compliance_strategy_3'], ['particulate_compliance_strategies_eia.code'], ), - sa.ForeignKeyConstraint(['plant_id_eia', 'boiler_id'], ['boilers_entity_eia.plant_id_eia', 'boilers_entity_eia.boiler_id'], ), - sa.ForeignKeyConstraint(['plant_id_eia', 'report_date'], ['plants_eia860.plant_id_eia', 'plants_eia860.report_date'], ), - sa.ForeignKeyConstraint(['regulation_mercury'], ['regulations_eia.code'], ), - sa.ForeignKeyConstraint(['regulation_nox'], ['regulations_eia.code'], ), - sa.ForeignKeyConstraint(['regulation_particulate'], ['regulations_eia.code'], ), - sa.ForeignKeyConstraint(['regulation_so2'], ['regulations_eia.code'], ), - sa.ForeignKeyConstraint(['so2_control_existing_caaa_compliance_strategy_1'], ['so2_compliance_strategies_eia.code'], ), - sa.ForeignKeyConstraint(['so2_control_existing_caaa_compliance_strategy_2'], ['so2_compliance_strategies_eia.code'], ), - sa.ForeignKeyConstraint(['so2_control_existing_caaa_compliance_strategy_3'], ['so2_compliance_strategies_eia.code'], ), - sa.ForeignKeyConstraint(['so2_control_out_of_compliance_strategy_1'], ['so2_compliance_strategies_eia.code'], ), - sa.ForeignKeyConstraint(['so2_control_out_of_compliance_strategy_2'], ['so2_compliance_strategies_eia.code'], ), - sa.ForeignKeyConstraint(['so2_control_out_of_compliance_strategy_3'], ['so2_compliance_strategies_eia.code'], ), - sa.ForeignKeyConstraint(['so2_control_planned_caaa_compliance_strategy_1'], ['so2_compliance_strategies_eia.code'], ), - sa.ForeignKeyConstraint(['so2_control_planned_caaa_compliance_strategy_2'], ['so2_compliance_strategies_eia.code'], ), - sa.ForeignKeyConstraint(['so2_control_planned_caaa_compliance_strategy_3'], ['so2_compliance_strategies_eia.code'], ), - sa.ForeignKeyConstraint(['unit_nox'], ['nox_units_eia.code'], ), - sa.ForeignKeyConstraint(['unit_particulate'], ['particulate_units_eia.code'], ), - sa.ForeignKeyConstraint(['unit_so2'], ['so2_units_eia.code'], ), - sa.ForeignKeyConstraint(['wet_dry_bottom'], ['wet_dry_bottom_eia.code'], ), - sa.PrimaryKeyConstraint('plant_id_eia', 'boiler_id', 'report_date') + sa.ForeignKeyConstraint(['boiler_fuel_code_1'], ['energy_sources_eia.code'], name=op.f('fk_boilers_eia860_boiler_fuel_code_1_energy_sources_eia')), + sa.ForeignKeyConstraint(['boiler_fuel_code_2'], ['energy_sources_eia.code'], name=op.f('fk_boilers_eia860_boiler_fuel_code_2_energy_sources_eia')), + sa.ForeignKeyConstraint(['boiler_fuel_code_3'], ['energy_sources_eia.code'], name=op.f('fk_boilers_eia860_boiler_fuel_code_3_energy_sources_eia')), + sa.ForeignKeyConstraint(['boiler_fuel_code_4'], ['energy_sources_eia.code'], name=op.f('fk_boilers_eia860_boiler_fuel_code_4_energy_sources_eia')), + sa.ForeignKeyConstraint(['boiler_status'], ['boiler_status_eia.code'], name=op.f('fk_boilers_eia860_boiler_status_boiler_status_eia')), + sa.ForeignKeyConstraint(['boiler_type'], ['boiler_types_eia.code'], name=op.f('fk_boilers_eia860_boiler_type_boiler_types_eia')), + sa.ForeignKeyConstraint(['data_maturity'], ['data_maturities.code'], name=op.f('fk_boilers_eia860_data_maturity_data_maturities')), + sa.ForeignKeyConstraint(['firing_type_1'], ['firing_types_eia.code'], name=op.f('fk_boilers_eia860_firing_type_1_firing_types_eia')), + sa.ForeignKeyConstraint(['firing_type_2'], ['firing_types_eia.code'], name=op.f('fk_boilers_eia860_firing_type_2_firing_types_eia')), + sa.ForeignKeyConstraint(['firing_type_3'], ['firing_types_eia.code'], name=op.f('fk_boilers_eia860_firing_type_3_firing_types_eia')), + sa.ForeignKeyConstraint(['mercury_control_existing_strategy_1'], ['mercury_compliance_strategies_eia.code'], name=op.f('fk_boilers_eia860_mercury_control_existing_strategy_1_mercury_compliance_strategies_eia')), + sa.ForeignKeyConstraint(['mercury_control_existing_strategy_2'], ['mercury_compliance_strategies_eia.code'], name=op.f('fk_boilers_eia860_mercury_control_existing_strategy_2_mercury_compliance_strategies_eia')), + sa.ForeignKeyConstraint(['mercury_control_existing_strategy_3'], ['mercury_compliance_strategies_eia.code'], name=op.f('fk_boilers_eia860_mercury_control_existing_strategy_3_mercury_compliance_strategies_eia')), + sa.ForeignKeyConstraint(['mercury_control_existing_strategy_4'], ['mercury_compliance_strategies_eia.code'], name=op.f('fk_boilers_eia860_mercury_control_existing_strategy_4_mercury_compliance_strategies_eia')), + sa.ForeignKeyConstraint(['mercury_control_existing_strategy_5'], ['mercury_compliance_strategies_eia.code'], name=op.f('fk_boilers_eia860_mercury_control_existing_strategy_5_mercury_compliance_strategies_eia')), + sa.ForeignKeyConstraint(['mercury_control_existing_strategy_6'], ['mercury_compliance_strategies_eia.code'], name=op.f('fk_boilers_eia860_mercury_control_existing_strategy_6_mercury_compliance_strategies_eia')), + sa.ForeignKeyConstraint(['mercury_control_proposed_strategy_1'], ['mercury_compliance_strategies_eia.code'], name=op.f('fk_boilers_eia860_mercury_control_proposed_strategy_1_mercury_compliance_strategies_eia')), + sa.ForeignKeyConstraint(['mercury_control_proposed_strategy_2'], ['mercury_compliance_strategies_eia.code'], name=op.f('fk_boilers_eia860_mercury_control_proposed_strategy_2_mercury_compliance_strategies_eia')), + sa.ForeignKeyConstraint(['mercury_control_proposed_strategy_3'], ['mercury_compliance_strategies_eia.code'], name=op.f('fk_boilers_eia860_mercury_control_proposed_strategy_3_mercury_compliance_strategies_eia')), + sa.ForeignKeyConstraint(['nox_control_existing_caaa_compliance_strategy_1'], ['nox_compliance_strategies_eia.code'], name=op.f('fk_boilers_eia860_nox_control_existing_caaa_compliance_strategy_1_nox_compliance_strategies_eia')), + sa.ForeignKeyConstraint(['nox_control_existing_caaa_compliance_strategy_2'], ['nox_compliance_strategies_eia.code'], name=op.f('fk_boilers_eia860_nox_control_existing_caaa_compliance_strategy_2_nox_compliance_strategies_eia')), + sa.ForeignKeyConstraint(['nox_control_existing_caaa_compliance_strategy_3'], ['nox_compliance_strategies_eia.code'], name=op.f('fk_boilers_eia860_nox_control_existing_caaa_compliance_strategy_3_nox_compliance_strategies_eia')), + sa.ForeignKeyConstraint(['nox_control_manufacturer_code'], ['environmental_equipment_manufacturers_eia.code'], name=op.f('fk_boilers_eia860_nox_control_manufacturer_code_environmental_equipment_manufacturers_eia')), + sa.ForeignKeyConstraint(['nox_control_out_of_compliance_strategy_1'], ['nox_compliance_strategies_eia.code'], name=op.f('fk_boilers_eia860_nox_control_out_of_compliance_strategy_1_nox_compliance_strategies_eia')), + sa.ForeignKeyConstraint(['nox_control_out_of_compliance_strategy_2'], ['nox_compliance_strategies_eia.code'], name=op.f('fk_boilers_eia860_nox_control_out_of_compliance_strategy_2_nox_compliance_strategies_eia')), + sa.ForeignKeyConstraint(['nox_control_out_of_compliance_strategy_3'], ['nox_compliance_strategies_eia.code'], name=op.f('fk_boilers_eia860_nox_control_out_of_compliance_strategy_3_nox_compliance_strategies_eia')), + sa.ForeignKeyConstraint(['nox_control_planned_caaa_compliance_strategy_1'], ['nox_compliance_strategies_eia.code'], name=op.f('fk_boilers_eia860_nox_control_planned_caaa_compliance_strategy_1_nox_compliance_strategies_eia')), + sa.ForeignKeyConstraint(['nox_control_planned_caaa_compliance_strategy_2'], ['nox_compliance_strategies_eia.code'], name=op.f('fk_boilers_eia860_nox_control_planned_caaa_compliance_strategy_2_nox_compliance_strategies_eia')), + sa.ForeignKeyConstraint(['nox_control_planned_caaa_compliance_strategy_3'], ['nox_compliance_strategies_eia.code'], name=op.f('fk_boilers_eia860_nox_control_planned_caaa_compliance_strategy_3_nox_compliance_strategies_eia')), + sa.ForeignKeyConstraint(['nox_control_status_code'], ['nox_control_status_eia.code'], name=op.f('fk_boilers_eia860_nox_control_status_code_nox_control_status_eia')), + sa.ForeignKeyConstraint(['particulate_control_out_of_compliance_strategy_1'], ['particulate_compliance_strategies_eia.code'], name=op.f('fk_boilers_eia860_particulate_control_out_of_compliance_strategy_1_particulate_compliance_strategies_eia')), + sa.ForeignKeyConstraint(['particulate_control_out_of_compliance_strategy_2'], ['particulate_compliance_strategies_eia.code'], name=op.f('fk_boilers_eia860_particulate_control_out_of_compliance_strategy_2_particulate_compliance_strategies_eia')), + sa.ForeignKeyConstraint(['particulate_control_out_of_compliance_strategy_3'], ['particulate_compliance_strategies_eia.code'], name=op.f('fk_boilers_eia860_particulate_control_out_of_compliance_strategy_3_particulate_compliance_strategies_eia')), + sa.ForeignKeyConstraint(['plant_id_eia', 'boiler_id'], ['boilers_entity_eia.plant_id_eia', 'boilers_entity_eia.boiler_id'], name=op.f('fk_boilers_eia860_plant_id_eia_boilers_entity_eia')), + sa.ForeignKeyConstraint(['plant_id_eia', 'report_date'], ['plants_eia860.plant_id_eia', 'plants_eia860.report_date'], name=op.f('fk_boilers_eia860_plant_id_eia_plants_eia860')), + sa.ForeignKeyConstraint(['regulation_mercury'], ['regulations_eia.code'], name=op.f('fk_boilers_eia860_regulation_mercury_regulations_eia')), + sa.ForeignKeyConstraint(['regulation_nox'], ['regulations_eia.code'], name=op.f('fk_boilers_eia860_regulation_nox_regulations_eia')), + sa.ForeignKeyConstraint(['regulation_particulate'], ['regulations_eia.code'], name=op.f('fk_boilers_eia860_regulation_particulate_regulations_eia')), + sa.ForeignKeyConstraint(['regulation_so2'], ['regulations_eia.code'], name=op.f('fk_boilers_eia860_regulation_so2_regulations_eia')), + sa.ForeignKeyConstraint(['so2_control_existing_caaa_compliance_strategy_1'], ['so2_compliance_strategies_eia.code'], name=op.f('fk_boilers_eia860_so2_control_existing_caaa_compliance_strategy_1_so2_compliance_strategies_eia')), + sa.ForeignKeyConstraint(['so2_control_existing_caaa_compliance_strategy_2'], ['so2_compliance_strategies_eia.code'], name=op.f('fk_boilers_eia860_so2_control_existing_caaa_compliance_strategy_2_so2_compliance_strategies_eia')), + sa.ForeignKeyConstraint(['so2_control_existing_caaa_compliance_strategy_3'], ['so2_compliance_strategies_eia.code'], name=op.f('fk_boilers_eia860_so2_control_existing_caaa_compliance_strategy_3_so2_compliance_strategies_eia')), + sa.ForeignKeyConstraint(['so2_control_out_of_compliance_strategy_1'], ['so2_compliance_strategies_eia.code'], name=op.f('fk_boilers_eia860_so2_control_out_of_compliance_strategy_1_so2_compliance_strategies_eia')), + sa.ForeignKeyConstraint(['so2_control_out_of_compliance_strategy_2'], ['so2_compliance_strategies_eia.code'], name=op.f('fk_boilers_eia860_so2_control_out_of_compliance_strategy_2_so2_compliance_strategies_eia')), + sa.ForeignKeyConstraint(['so2_control_out_of_compliance_strategy_3'], ['so2_compliance_strategies_eia.code'], name=op.f('fk_boilers_eia860_so2_control_out_of_compliance_strategy_3_so2_compliance_strategies_eia')), + sa.ForeignKeyConstraint(['so2_control_planned_caaa_compliance_strategy_1'], ['so2_compliance_strategies_eia.code'], name=op.f('fk_boilers_eia860_so2_control_planned_caaa_compliance_strategy_1_so2_compliance_strategies_eia')), + sa.ForeignKeyConstraint(['so2_control_planned_caaa_compliance_strategy_2'], ['so2_compliance_strategies_eia.code'], name=op.f('fk_boilers_eia860_so2_control_planned_caaa_compliance_strategy_2_so2_compliance_strategies_eia')), + sa.ForeignKeyConstraint(['so2_control_planned_caaa_compliance_strategy_3'], ['so2_compliance_strategies_eia.code'], name=op.f('fk_boilers_eia860_so2_control_planned_caaa_compliance_strategy_3_so2_compliance_strategies_eia')), + sa.ForeignKeyConstraint(['unit_nox'], ['nox_units_eia.code'], name=op.f('fk_boilers_eia860_unit_nox_nox_units_eia')), + sa.ForeignKeyConstraint(['unit_particulate'], ['particulate_units_eia.code'], name=op.f('fk_boilers_eia860_unit_particulate_particulate_units_eia')), + sa.ForeignKeyConstraint(['unit_so2'], ['so2_units_eia.code'], name=op.f('fk_boilers_eia860_unit_so2_so2_units_eia')), + sa.ForeignKeyConstraint(['wet_dry_bottom'], ['wet_dry_bottom_eia.code'], name=op.f('fk_boilers_eia860_wet_dry_bottom_wet_dry_bottom_eia')), + sa.PrimaryKeyConstraint('plant_id_eia', 'boiler_id', 'report_date', name=op.f('pk_boilers_eia860')) ) op.create_table('denorm_fuel_by_plant_ferc1', sa.Column('report_year', sa.Integer(), nullable=False, comment='Four-digit year in which the data was reported.'), @@ -2009,10 +2215,10 @@ def upgrade() -> None: sa.Column('primary_fuel_by_mmbtu', sa.Text(), nullable=True, comment='Primary fuel for plant as a percentage of heat content.'), sa.Column('waste_fraction_cost', sa.Float(), nullable=True, comment='Waste-heat cost as a percentage of overall fuel cost.'), sa.Column('waste_fraction_mmbtu', sa.Float(), nullable=True, comment='Waste-heat heat content as a percentage of overall fuel heat content (MMBtu).'), - sa.ForeignKeyConstraint(['plant_id_pudl'], ['plants_pudl.plant_id_pudl'], ), - sa.ForeignKeyConstraint(['utility_id_ferc1', 'plant_name_ferc1'], ['plants_ferc1.utility_id_ferc1', 'plants_ferc1.plant_name_ferc1'], ), - sa.ForeignKeyConstraint(['utility_id_pudl'], ['utilities_pudl.utility_id_pudl'], ), - sa.PrimaryKeyConstraint('report_year', 'utility_id_ferc1', 'plant_name_ferc1') + sa.ForeignKeyConstraint(['plant_id_pudl'], ['plants_pudl.plant_id_pudl'], name=op.f('fk_denorm_fuel_by_plant_ferc1_plant_id_pudl_plants_pudl')), + sa.ForeignKeyConstraint(['utility_id_ferc1', 'plant_name_ferc1'], ['plants_ferc1.utility_id_ferc1', 'plants_ferc1.plant_name_ferc1'], name=op.f('fk_denorm_fuel_by_plant_ferc1_utility_id_ferc1_plants_ferc1')), + sa.ForeignKeyConstraint(['utility_id_pudl'], ['utilities_pudl.utility_id_pudl'], name=op.f('fk_denorm_fuel_by_plant_ferc1_utility_id_pudl_utilities_pudl')), + sa.PrimaryKeyConstraint('report_year', 'utility_id_ferc1', 'plant_name_ferc1', name=op.f('pk_denorm_fuel_by_plant_ferc1')) ) op.create_table('denorm_fuel_ferc1', sa.Column('report_year', sa.Integer(), nullable=True, comment='Four-digit year in which the data was reported.'), @@ -2031,10 +2237,10 @@ def upgrade() -> None: sa.Column('fuel_type_code_pudl', sa.Enum('coal', 'gas', 'hydro', 'nuclear', 'oil', 'other', 'solar', 'waste', 'wind'), nullable=True, comment='Simplified fuel type code used in PUDL'), sa.Column('fuel_units', sa.Enum('mmbtu', 'gramsU', 'kg', 'mwhth', 'kgal', 'bbl', 'klbs', 'mcf', 'gal', 'mwdth', 'btu', 'ton'), nullable=True, comment='Reported unit of measure for fuel.'), sa.Column('record_id', sa.Text(), nullable=False, comment='Identifier indicating original FERC Form 1 source record. format: {table_name}_{report_year}_{report_prd}_{respondent_id}_{spplmnt_num}_{row_number}. Unique within FERC Form 1 DB tables which are not row-mapped.'), - sa.ForeignKeyConstraint(['plant_id_pudl'], ['plants_pudl.plant_id_pudl'], ), - sa.ForeignKeyConstraint(['utility_id_ferc1', 'plant_name_ferc1'], ['plants_ferc1.utility_id_ferc1', 'plants_ferc1.plant_name_ferc1'], ), - sa.ForeignKeyConstraint(['utility_id_pudl'], ['utilities_pudl.utility_id_pudl'], ), - sa.PrimaryKeyConstraint('record_id') + sa.ForeignKeyConstraint(['plant_id_pudl'], ['plants_pudl.plant_id_pudl'], name=op.f('fk_denorm_fuel_ferc1_plant_id_pudl_plants_pudl')), + sa.ForeignKeyConstraint(['utility_id_ferc1', 'plant_name_ferc1'], ['plants_ferc1.utility_id_ferc1', 'plants_ferc1.plant_name_ferc1'], name=op.f('fk_denorm_fuel_ferc1_utility_id_ferc1_plants_ferc1')), + sa.ForeignKeyConstraint(['utility_id_pudl'], ['utilities_pudl.utility_id_pudl'], name=op.f('fk_denorm_fuel_ferc1_utility_id_pudl_utilities_pudl')), + sa.PrimaryKeyConstraint('record_id', name=op.f('pk_denorm_fuel_ferc1')) ) op.create_table('denorm_fuel_receipts_costs_yearly_eia923', sa.Column('report_date', sa.Date(), nullable=True, comment='Date reported.'), @@ -2057,10 +2263,10 @@ def upgrade() -> None: sa.Column('mercury_content_ppm', sa.Float(), nullable=True, comment='Mercury content in parts per million (ppm) to the nearest 0.001 ppm.'), sa.Column('moisture_content_pct', sa.Float(), nullable=True), sa.Column('chlorine_content_ppm', sa.Float(), nullable=True), - sa.ForeignKeyConstraint(['plant_id_eia', 'report_date'], ['plants_eia860.plant_id_eia', 'plants_eia860.report_date'], ), - sa.ForeignKeyConstraint(['plant_id_pudl'], ['plants_pudl.plant_id_pudl'], ), - sa.ForeignKeyConstraint(['utility_id_eia', 'report_date'], ['utilities_eia860.utility_id_eia', 'utilities_eia860.report_date'], ), - sa.ForeignKeyConstraint(['utility_id_pudl'], ['utilities_pudl.utility_id_pudl'], ) + sa.ForeignKeyConstraint(['plant_id_eia', 'report_date'], ['plants_eia860.plant_id_eia', 'plants_eia860.report_date'], name=op.f('fk_denorm_fuel_receipts_costs_yearly_eia923_plant_id_eia_plants_eia860')), + sa.ForeignKeyConstraint(['plant_id_pudl'], ['plants_pudl.plant_id_pudl'], name=op.f('fk_denorm_fuel_receipts_costs_yearly_eia923_plant_id_pudl_plants_pudl')), + sa.ForeignKeyConstraint(['utility_id_eia', 'report_date'], ['utilities_eia860.utility_id_eia', 'utilities_eia860.report_date'], name=op.f('fk_denorm_fuel_receipts_costs_yearly_eia923_utility_id_eia_utilities_eia860')), + sa.ForeignKeyConstraint(['utility_id_pudl'], ['utilities_pudl.utility_id_pudl'], name=op.f('fk_denorm_fuel_receipts_costs_yearly_eia923_utility_id_pudl_utilities_pudl')) ) op.create_table('denorm_generation_fuel_combined_yearly_eia923', sa.Column('report_date', sa.Date(), nullable=False, comment='Date reported.'), @@ -2070,7 +2276,7 @@ def upgrade() -> None: sa.Column('utility_id_eia', sa.Integer(), nullable=True, comment='The EIA Utility Identification number.'), sa.Column('utility_id_pudl', sa.Integer(), nullable=True, comment='A manually assigned PUDL utility ID. May not be stable over time.'), sa.Column('utility_name_eia', sa.Text(), nullable=True, comment='The name of the utility.'), - sa.Column('energy_source_code', sa.Text(), nullable=False, comment='The fuel code associated with the fuel receipt. Two or three character alphanumeric.'), + sa.Column('energy_source_code', sa.Text(), nullable=False, comment='A 2-3 letter code indicating the energy source (e.g. fuel type) associated with the record.'), sa.Column('fuel_type_code_pudl', sa.Enum('coal', 'gas', 'hydro', 'nuclear', 'oil', 'other', 'solar', 'waste', 'wind'), nullable=True, comment='Simplified fuel type code used in PUDL'), sa.Column('prime_mover_code', sa.Text(), nullable=False, comment='Code for the type of prime mover (e.g. CT, CG)'), sa.Column('fuel_consumed_units', sa.Float(), nullable=True, comment='Consumption of the fuel type in physical unit. Note: this is the total quantity consumed for both electricity and, in the case of combined heat and power plants, process steam production.'), @@ -2079,13 +2285,13 @@ def upgrade() -> None: sa.Column('fuel_consumed_mmbtu', sa.Float(), nullable=True, comment='Total consumption of fuel in physical unit, year to date. Note: this is the total quantity consumed for both electricity and, in the case of combined heat and power plants, process steam production.'), sa.Column('fuel_consumed_for_electricity_mmbtu', sa.Float(), nullable=True, comment='Total consumption of fuel to produce electricity, in physical unit, year to date.'), sa.Column('net_generation_mwh', sa.Float(), nullable=True, comment='Net electricity generation for the specified period in megawatt-hours (MWh).'), - sa.ForeignKeyConstraint(['energy_source_code'], ['energy_sources_eia.code'], ), - sa.ForeignKeyConstraint(['plant_id_eia', 'report_date'], ['plants_eia860.plant_id_eia', 'plants_eia860.report_date'], ), - sa.ForeignKeyConstraint(['plant_id_pudl'], ['plants_pudl.plant_id_pudl'], ), - sa.ForeignKeyConstraint(['prime_mover_code'], ['prime_movers_eia.code'], ), - sa.ForeignKeyConstraint(['utility_id_eia', 'report_date'], ['utilities_eia860.utility_id_eia', 'utilities_eia860.report_date'], ), - sa.ForeignKeyConstraint(['utility_id_pudl'], ['utilities_pudl.utility_id_pudl'], ), - sa.PrimaryKeyConstraint('plant_id_eia', 'report_date', 'prime_mover_code', 'energy_source_code') + sa.ForeignKeyConstraint(['energy_source_code'], ['energy_sources_eia.code'], name=op.f('fk_denorm_generation_fuel_combined_yearly_eia923_energy_source_code_energy_sources_eia')), + sa.ForeignKeyConstraint(['plant_id_eia', 'report_date'], ['plants_eia860.plant_id_eia', 'plants_eia860.report_date'], name=op.f('fk_denorm_generation_fuel_combined_yearly_eia923_plant_id_eia_plants_eia860')), + sa.ForeignKeyConstraint(['plant_id_pudl'], ['plants_pudl.plant_id_pudl'], name=op.f('fk_denorm_generation_fuel_combined_yearly_eia923_plant_id_pudl_plants_pudl')), + sa.ForeignKeyConstraint(['prime_mover_code'], ['prime_movers_eia.code'], name=op.f('fk_denorm_generation_fuel_combined_yearly_eia923_prime_mover_code_prime_movers_eia')), + sa.ForeignKeyConstraint(['utility_id_eia', 'report_date'], ['utilities_eia860.utility_id_eia', 'utilities_eia860.report_date'], name=op.f('fk_denorm_generation_fuel_combined_yearly_eia923_utility_id_eia_utilities_eia860')), + sa.ForeignKeyConstraint(['utility_id_pudl'], ['utilities_pudl.utility_id_pudl'], name=op.f('fk_denorm_generation_fuel_combined_yearly_eia923_utility_id_pudl_utilities_pudl')), + sa.PrimaryKeyConstraint('plant_id_eia', 'report_date', 'prime_mover_code', 'energy_source_code', name=op.f('pk_denorm_generation_fuel_combined_yearly_eia923')) ) op.create_table('denorm_plants_all_ferc1', sa.Column('report_year', sa.Integer(), nullable=True, comment='Four-digit year in which the data was reported.'), @@ -2166,10 +2372,10 @@ def upgrade() -> None: sa.Column('opex_production_before_pumping', sa.Float(), nullable=True, comment='Total production expenses before pumping (USD).'), sa.Column('opex_pumped_storage', sa.Float(), nullable=True, comment='Production expenses: pumped storage (USD).'), sa.Column('opex_pumping', sa.Float(), nullable=True, comment='Production expenses: We are here to PUMP YOU UP! (USD).'), - sa.ForeignKeyConstraint(['plant_id_pudl'], ['plants_pudl.plant_id_pudl'], ), - sa.ForeignKeyConstraint(['utility_id_ferc1', 'plant_name_ferc1'], ['plants_ferc1.utility_id_ferc1', 'plants_ferc1.plant_name_ferc1'], ), - sa.ForeignKeyConstraint(['utility_id_pudl'], ['utilities_pudl.utility_id_pudl'], ), - sa.PrimaryKeyConstraint('record_id') + sa.ForeignKeyConstraint(['plant_id_pudl'], ['plants_pudl.plant_id_pudl'], name=op.f('fk_denorm_plants_all_ferc1_plant_id_pudl_plants_pudl')), + sa.ForeignKeyConstraint(['utility_id_ferc1', 'plant_name_ferc1'], ['plants_ferc1.utility_id_ferc1', 'plants_ferc1.plant_name_ferc1'], name=op.f('fk_denorm_plants_all_ferc1_utility_id_ferc1_plants_ferc1')), + sa.ForeignKeyConstraint(['utility_id_pudl'], ['utilities_pudl.utility_id_pudl'], name=op.f('fk_denorm_plants_all_ferc1_utility_id_pudl_utilities_pudl')), + sa.PrimaryKeyConstraint('record_id', name=op.f('pk_denorm_plants_all_ferc1')) ) op.create_table('denorm_plants_eia', sa.Column('plant_id_eia', sa.Integer(), nullable=False, comment='The unique six-digit facility identification number, also called an ORISPL, assigned by the Energy Information Administration.'), @@ -2226,15 +2432,15 @@ def upgrade() -> None: sa.Column('utility_name_eia', sa.Text(), nullable=True, comment='The name of the utility.'), sa.Column('utility_id_pudl', sa.Integer(), nullable=True, comment='A manually assigned PUDL utility ID. May not be stable over time.'), sa.Column('balancing_authority_code_eia_consistent_rate', sa.Float(), nullable=True, comment='Percentage consistency of balancing authority code across entity records.'), - sa.ForeignKeyConstraint(['balancing_authority_code_eia'], ['balancing_authorities_eia.code'], ), - sa.ForeignKeyConstraint(['data_maturity'], ['data_maturities.code'], ), - sa.ForeignKeyConstraint(['plant_id_eia', 'report_date'], ['plants_eia860.plant_id_eia', 'plants_eia860.report_date'], ), - sa.ForeignKeyConstraint(['plant_id_pudl'], ['plants_pudl.plant_id_pudl'], ), - sa.ForeignKeyConstraint(['reporting_frequency_code'], ['reporting_frequencies_eia.code'], ), - sa.ForeignKeyConstraint(['sector_id_eia'], ['sector_consolidated_eia.code'], ), - sa.ForeignKeyConstraint(['utility_id_eia', 'report_date'], ['utilities_eia860.utility_id_eia', 'utilities_eia860.report_date'], ), - sa.ForeignKeyConstraint(['utility_id_pudl'], ['utilities_pudl.utility_id_pudl'], ), - sa.PrimaryKeyConstraint('plant_id_eia', 'report_date') + sa.ForeignKeyConstraint(['balancing_authority_code_eia'], ['balancing_authorities_eia.code'], name=op.f('fk_denorm_plants_eia_balancing_authority_code_eia_balancing_authorities_eia')), + sa.ForeignKeyConstraint(['data_maturity'], ['data_maturities.code'], name=op.f('fk_denorm_plants_eia_data_maturity_data_maturities')), + sa.ForeignKeyConstraint(['plant_id_eia', 'report_date'], ['plants_eia860.plant_id_eia', 'plants_eia860.report_date'], name=op.f('fk_denorm_plants_eia_plant_id_eia_plants_eia860')), + sa.ForeignKeyConstraint(['plant_id_pudl'], ['plants_pudl.plant_id_pudl'], name=op.f('fk_denorm_plants_eia_plant_id_pudl_plants_pudl')), + sa.ForeignKeyConstraint(['reporting_frequency_code'], ['reporting_frequencies_eia.code'], name=op.f('fk_denorm_plants_eia_reporting_frequency_code_reporting_frequencies_eia')), + sa.ForeignKeyConstraint(['sector_id_eia'], ['sector_consolidated_eia.code'], name=op.f('fk_denorm_plants_eia_sector_id_eia_sector_consolidated_eia')), + sa.ForeignKeyConstraint(['utility_id_eia', 'report_date'], ['utilities_eia860.utility_id_eia', 'utilities_eia860.report_date'], name=op.f('fk_denorm_plants_eia_utility_id_eia_utilities_eia860')), + sa.ForeignKeyConstraint(['utility_id_pudl'], ['utilities_pudl.utility_id_pudl'], name=op.f('fk_denorm_plants_eia_utility_id_pudl_utilities_pudl')), + sa.PrimaryKeyConstraint('plant_id_eia', 'report_date', name=op.f('pk_denorm_plants_eia')) ) op.create_table('denorm_plants_hydro_ferc1', sa.Column('report_year', sa.Integer(), nullable=True, comment='Four-digit year in which the data was reported.'), @@ -2279,10 +2485,10 @@ def upgrade() -> None: sa.Column('plant_id_pudl', sa.Integer(), nullable=True, comment='A manually assigned PUDL plant ID. May not be constant over time.'), sa.Column('plant_type', sa.Text(), nullable=True), sa.Column('project_num', sa.Integer(), nullable=True, comment='FERC Licensed Project Number.'), - sa.ForeignKeyConstraint(['plant_id_pudl'], ['plants_pudl.plant_id_pudl'], ), - sa.ForeignKeyConstraint(['utility_id_ferc1', 'plant_name_ferc1'], ['plants_ferc1.utility_id_ferc1', 'plants_ferc1.plant_name_ferc1'], ), - sa.ForeignKeyConstraint(['utility_id_pudl'], ['utilities_pudl.utility_id_pudl'], ), - sa.PrimaryKeyConstraint('record_id') + sa.ForeignKeyConstraint(['plant_id_pudl'], ['plants_pudl.plant_id_pudl'], name=op.f('fk_denorm_plants_hydro_ferc1_plant_id_pudl_plants_pudl')), + sa.ForeignKeyConstraint(['utility_id_ferc1', 'plant_name_ferc1'], ['plants_ferc1.utility_id_ferc1', 'plants_ferc1.plant_name_ferc1'], name=op.f('fk_denorm_plants_hydro_ferc1_utility_id_ferc1_plants_ferc1')), + sa.ForeignKeyConstraint(['utility_id_pudl'], ['utilities_pudl.utility_id_pudl'], name=op.f('fk_denorm_plants_hydro_ferc1_utility_id_pudl_utilities_pudl')), + sa.PrimaryKeyConstraint('record_id', name=op.f('pk_denorm_plants_hydro_ferc1')) ) op.create_table('denorm_plants_pumped_storage_ferc1', sa.Column('report_year', sa.Integer(), nullable=True, comment='Four-digit year in which the data was reported.'), @@ -2331,10 +2537,10 @@ def upgrade() -> None: sa.Column('plant_hours_connected_while_generating', sa.Float(), nullable=True, comment='Hours the plant was connected to load while generating in the report year.'), sa.Column('plant_id_pudl', sa.Integer(), nullable=True, comment='A manually assigned PUDL plant ID. May not be constant over time.'), sa.Column('project_num', sa.Integer(), nullable=True, comment='FERC Licensed Project Number.'), - sa.ForeignKeyConstraint(['plant_id_pudl'], ['plants_pudl.plant_id_pudl'], ), - sa.ForeignKeyConstraint(['utility_id_ferc1', 'plant_name_ferc1'], ['plants_ferc1.utility_id_ferc1', 'plants_ferc1.plant_name_ferc1'], ), - sa.ForeignKeyConstraint(['utility_id_pudl'], ['utilities_pudl.utility_id_pudl'], ), - sa.PrimaryKeyConstraint('record_id') + sa.ForeignKeyConstraint(['plant_id_pudl'], ['plants_pudl.plant_id_pudl'], name=op.f('fk_denorm_plants_pumped_storage_ferc1_plant_id_pudl_plants_pudl')), + sa.ForeignKeyConstraint(['utility_id_ferc1', 'plant_name_ferc1'], ['plants_ferc1.utility_id_ferc1', 'plants_ferc1.plant_name_ferc1'], name=op.f('fk_denorm_plants_pumped_storage_ferc1_utility_id_ferc1_plants_ferc1')), + sa.ForeignKeyConstraint(['utility_id_pudl'], ['utilities_pudl.utility_id_pudl'], name=op.f('fk_denorm_plants_pumped_storage_ferc1_utility_id_pudl_utilities_pudl')), + sa.PrimaryKeyConstraint('record_id', name=op.f('pk_denorm_plants_pumped_storage_ferc1')) ) op.create_table('denorm_plants_small_ferc1', sa.Column('report_year', sa.Integer(), nullable=True, comment='Four-digit year in which the data was reported.'), @@ -2359,10 +2565,10 @@ def upgrade() -> None: sa.Column('opex_total_nonfuel', sa.Float(), nullable=True, comment='Total production expenses, excluding fuel (USD).'), sa.Column('peak_demand_mw', sa.Float(), nullable=True, comment='Net peak demand for 60 minutes. Note: in some cases peak demand for other time periods may have been reported instead, if hourly peak demand was unavailable.'), sa.Column('plant_type', sa.Text(), nullable=True), - sa.ForeignKeyConstraint(['plant_id_pudl'], ['plants_pudl.plant_id_pudl'], ), - sa.ForeignKeyConstraint(['utility_id_ferc1', 'plant_name_ferc1'], ['plants_ferc1.utility_id_ferc1', 'plants_ferc1.plant_name_ferc1'], ), - sa.ForeignKeyConstraint(['utility_id_pudl'], ['utilities_pudl.utility_id_pudl'], ), - sa.PrimaryKeyConstraint('record_id') + sa.ForeignKeyConstraint(['plant_id_pudl'], ['plants_pudl.plant_id_pudl'], name=op.f('fk_denorm_plants_small_ferc1_plant_id_pudl_plants_pudl')), + sa.ForeignKeyConstraint(['utility_id_ferc1', 'plant_name_ferc1'], ['plants_ferc1.utility_id_ferc1', 'plants_ferc1.plant_name_ferc1'], name=op.f('fk_denorm_plants_small_ferc1_utility_id_ferc1_plants_ferc1')), + sa.ForeignKeyConstraint(['utility_id_pudl'], ['utilities_pudl.utility_id_pudl'], name=op.f('fk_denorm_plants_small_ferc1_utility_id_pudl_utilities_pudl')), + sa.PrimaryKeyConstraint('record_id', name=op.f('pk_denorm_plants_small_ferc1')) ) op.create_table('denorm_plants_steam_ferc1', sa.Column('report_year', sa.Integer(), nullable=True, comment='Four-digit year in which the data was reported.'), @@ -2420,10 +2626,10 @@ def upgrade() -> None: sa.Column('plant_type', sa.Text(), nullable=True), sa.Column('record_id', sa.Text(), nullable=False, comment='Identifier indicating original FERC Form 1 source record. format: {table_name}_{report_year}_{report_prd}_{respondent_id}_{spplmnt_num}_{row_number}. Unique within FERC Form 1 DB tables which are not row-mapped.'), sa.Column('water_limited_capacity_mw', sa.Float(), nullable=True, comment='Plant capacity in MW when limited by condenser water.'), - sa.ForeignKeyConstraint(['plant_id_pudl'], ['plants_pudl.plant_id_pudl'], ), - sa.ForeignKeyConstraint(['utility_id_ferc1', 'plant_name_ferc1'], ['plants_ferc1.utility_id_ferc1', 'plants_ferc1.plant_name_ferc1'], ), - sa.ForeignKeyConstraint(['utility_id_pudl'], ['utilities_pudl.utility_id_pudl'], ), - sa.PrimaryKeyConstraint('record_id') + sa.ForeignKeyConstraint(['plant_id_pudl'], ['plants_pudl.plant_id_pudl'], name=op.f('fk_denorm_plants_steam_ferc1_plant_id_pudl_plants_pudl')), + sa.ForeignKeyConstraint(['utility_id_ferc1', 'plant_name_ferc1'], ['plants_ferc1.utility_id_ferc1', 'plants_ferc1.plant_name_ferc1'], name=op.f('fk_denorm_plants_steam_ferc1_utility_id_ferc1_plants_ferc1')), + sa.ForeignKeyConstraint(['utility_id_pudl'], ['utilities_pudl.utility_id_pudl'], name=op.f('fk_denorm_plants_steam_ferc1_utility_id_pudl_utilities_pudl')), + sa.PrimaryKeyConstraint('record_id', name=op.f('pk_denorm_plants_steam_ferc1')) ) op.create_table('denorm_plants_utilities_eia', sa.Column('report_date', sa.Date(), nullable=False, comment='Date reported.'), @@ -2433,11 +2639,11 @@ def upgrade() -> None: sa.Column('utility_id_eia', sa.Integer(), nullable=False, comment='The EIA Utility Identification number.'), sa.Column('utility_name_eia', sa.Text(), nullable=True, comment='The name of the utility.'), sa.Column('utility_id_pudl', sa.Integer(), nullable=True, comment='A manually assigned PUDL utility ID. May not be stable over time.'), - sa.ForeignKeyConstraint(['plant_id_eia', 'report_date'], ['plants_eia860.plant_id_eia', 'plants_eia860.report_date'], ), - sa.ForeignKeyConstraint(['plant_id_pudl'], ['plants_pudl.plant_id_pudl'], ), - sa.ForeignKeyConstraint(['utility_id_eia', 'report_date'], ['utilities_eia860.utility_id_eia', 'utilities_eia860.report_date'], ), - sa.ForeignKeyConstraint(['utility_id_pudl'], ['utilities_pudl.utility_id_pudl'], ), - sa.PrimaryKeyConstraint('report_date', 'plant_id_eia', 'utility_id_eia') + sa.ForeignKeyConstraint(['plant_id_eia', 'report_date'], ['plants_eia860.plant_id_eia', 'plants_eia860.report_date'], name=op.f('fk_denorm_plants_utilities_eia_plant_id_eia_plants_eia860')), + sa.ForeignKeyConstraint(['plant_id_pudl'], ['plants_pudl.plant_id_pudl'], name=op.f('fk_denorm_plants_utilities_eia_plant_id_pudl_plants_pudl')), + sa.ForeignKeyConstraint(['utility_id_eia', 'report_date'], ['utilities_eia860.utility_id_eia', 'utilities_eia860.report_date'], name=op.f('fk_denorm_plants_utilities_eia_utility_id_eia_utilities_eia860')), + sa.ForeignKeyConstraint(['utility_id_pudl'], ['utilities_pudl.utility_id_pudl'], name=op.f('fk_denorm_plants_utilities_eia_utility_id_pudl_utilities_pudl')), + sa.PrimaryKeyConstraint('report_date', 'plant_id_eia', 'utility_id_eia', name=op.f('pk_denorm_plants_utilities_eia')) ) op.create_table('denorm_plants_utilities_ferc1', sa.Column('utility_id_ferc1', sa.Integer(), nullable=False, comment='PUDL-assigned utility ID, identifying a FERC1 utility. This is an auto-incremented ID and is not expected to be stable from year to year.'), @@ -2445,10 +2651,10 @@ def upgrade() -> None: sa.Column('plant_id_pudl', sa.Integer(), nullable=True, comment='A manually assigned PUDL plant ID. May not be constant over time.'), sa.Column('utility_name_ferc1', sa.Text(), nullable=True, comment='Name of the responding utility, as it is reported in FERC Form 1. For human readability only.'), sa.Column('utility_id_pudl', sa.Integer(), nullable=True, comment='A manually assigned PUDL utility ID. May not be stable over time.'), - sa.ForeignKeyConstraint(['plant_id_pudl'], ['plants_pudl.plant_id_pudl'], ), - sa.ForeignKeyConstraint(['utility_id_ferc1', 'plant_name_ferc1'], ['plants_ferc1.utility_id_ferc1', 'plants_ferc1.plant_name_ferc1'], ), - sa.ForeignKeyConstraint(['utility_id_pudl'], ['utilities_pudl.utility_id_pudl'], ), - sa.PrimaryKeyConstraint('utility_id_ferc1', 'plant_name_ferc1') + sa.ForeignKeyConstraint(['plant_id_pudl'], ['plants_pudl.plant_id_pudl'], name=op.f('fk_denorm_plants_utilities_ferc1_plant_id_pudl_plants_pudl')), + sa.ForeignKeyConstraint(['utility_id_ferc1', 'plant_name_ferc1'], ['plants_ferc1.utility_id_ferc1', 'plants_ferc1.plant_name_ferc1'], name=op.f('fk_denorm_plants_utilities_ferc1_utility_id_ferc1_plants_ferc1')), + sa.ForeignKeyConstraint(['utility_id_pudl'], ['utilities_pudl.utility_id_pudl'], name=op.f('fk_denorm_plants_utilities_ferc1_utility_id_pudl_utilities_pudl')), + sa.PrimaryKeyConstraint('utility_id_ferc1', 'plant_name_ferc1', name=op.f('pk_denorm_plants_utilities_ferc1')) ) op.create_table('fuel_ferc1', sa.Column('record_id', sa.Text(), nullable=True, comment='Identifier indicating original FERC Form 1 source record. format: {table_name}_{report_year}_{report_prd}_{respondent_id}_{spplmnt_num}_{row_number}. Unique within FERC Form 1 DB tables which are not row-mapped.'), @@ -2462,7 +2668,7 @@ def upgrade() -> None: sa.Column('fuel_cost_per_unit_burned', sa.Float(), nullable=True, comment='Average cost of fuel consumed in the report year per reported fuel unit (USD).'), sa.Column('fuel_cost_per_unit_delivered', sa.Float(), nullable=True, comment='Average cost of fuel delivered in the report year per reported fuel unit (USD).'), sa.Column('fuel_cost_per_mmbtu', sa.Float(), nullable=True, comment='Average fuel cost per mmBTU of heat content in nominal USD.'), - sa.ForeignKeyConstraint(['utility_id_ferc1', 'plant_name_ferc1'], ['plants_ferc1.utility_id_ferc1', 'plants_ferc1.plant_name_ferc1'], ) + sa.ForeignKeyConstraint(['utility_id_ferc1', 'plant_name_ferc1'], ['plants_ferc1.utility_id_ferc1', 'plants_ferc1.plant_name_ferc1'], name=op.f('fk_fuel_ferc1_utility_id_ferc1_plants_ferc1')) ) op.create_table('generators_eia860', sa.Column('plant_id_eia', sa.Integer(), nullable=False, comment='The unique six-digit facility identification number, also called an ORISPL, assigned by the Energy Information Administration.'), @@ -2534,31 +2740,41 @@ def upgrade() -> None: sa.Column('reactive_power_output_mvar', sa.Float(), nullable=True, comment='Reactive Power Output (MVAr)'), sa.Column('ferc_qualifying_facility', sa.Boolean(), nullable=True, comment='Indicatates whether or not a generator is a qualifying FERC cogeneation facility.'), sa.Column('data_maturity', sa.Text(), nullable=True, comment='Level of maturity of the data record. Some data sources report less-than-final data. PUDL sometimes includes this data, but use at your own risk.'), - sa.ForeignKeyConstraint(['data_maturity'], ['data_maturities.code'], ), - sa.ForeignKeyConstraint(['energy_source_1_transport_1'], ['fuel_transportation_modes_eia.code'], ), - sa.ForeignKeyConstraint(['energy_source_1_transport_2'], ['fuel_transportation_modes_eia.code'], ), - sa.ForeignKeyConstraint(['energy_source_1_transport_3'], ['fuel_transportation_modes_eia.code'], ), - sa.ForeignKeyConstraint(['energy_source_2_transport_1'], ['fuel_transportation_modes_eia.code'], ), - sa.ForeignKeyConstraint(['energy_source_2_transport_2'], ['fuel_transportation_modes_eia.code'], ), - sa.ForeignKeyConstraint(['energy_source_2_transport_3'], ['fuel_transportation_modes_eia.code'], ), - sa.ForeignKeyConstraint(['energy_source_code_1'], ['energy_sources_eia.code'], ), - sa.ForeignKeyConstraint(['energy_source_code_2'], ['energy_sources_eia.code'], ), - sa.ForeignKeyConstraint(['energy_source_code_3'], ['energy_sources_eia.code'], ), - sa.ForeignKeyConstraint(['energy_source_code_4'], ['energy_sources_eia.code'], ), - sa.ForeignKeyConstraint(['energy_source_code_5'], ['energy_sources_eia.code'], ), - sa.ForeignKeyConstraint(['energy_source_code_6'], ['energy_sources_eia.code'], ), - sa.ForeignKeyConstraint(['operational_status_code'], ['operational_status_eia.code'], ), - sa.ForeignKeyConstraint(['planned_energy_source_code_1'], ['energy_sources_eia.code'], ), - sa.ForeignKeyConstraint(['planned_new_prime_mover_code'], ['prime_movers_eia.code'], ), - sa.ForeignKeyConstraint(['plant_id_eia', 'generator_id'], ['generators_entity_eia.plant_id_eia', 'generators_entity_eia.generator_id'], ), - sa.ForeignKeyConstraint(['plant_id_eia', 'report_date'], ['plants_eia860.plant_id_eia', 'plants_eia860.report_date'], ), - sa.ForeignKeyConstraint(['prime_mover_code'], ['prime_movers_eia.code'], ), - sa.ForeignKeyConstraint(['startup_source_code_1'], ['energy_sources_eia.code'], ), - sa.ForeignKeyConstraint(['startup_source_code_2'], ['energy_sources_eia.code'], ), - sa.ForeignKeyConstraint(['startup_source_code_3'], ['energy_sources_eia.code'], ), - sa.ForeignKeyConstraint(['startup_source_code_4'], ['energy_sources_eia.code'], ), - sa.ForeignKeyConstraint(['utility_id_eia', 'report_date'], ['utilities_eia860.utility_id_eia', 'utilities_eia860.report_date'], ), - sa.PrimaryKeyConstraint('plant_id_eia', 'generator_id', 'report_date') + sa.ForeignKeyConstraint(['data_maturity'], ['data_maturities.code'], name=op.f('fk_generators_eia860_data_maturity_data_maturities')), + sa.ForeignKeyConstraint(['energy_source_1_transport_1'], ['fuel_transportation_modes_eia.code'], name=op.f('fk_generators_eia860_energy_source_1_transport_1_fuel_transportation_modes_eia')), + sa.ForeignKeyConstraint(['energy_source_1_transport_2'], ['fuel_transportation_modes_eia.code'], name=op.f('fk_generators_eia860_energy_source_1_transport_2_fuel_transportation_modes_eia')), + sa.ForeignKeyConstraint(['energy_source_1_transport_3'], ['fuel_transportation_modes_eia.code'], name=op.f('fk_generators_eia860_energy_source_1_transport_3_fuel_transportation_modes_eia')), + sa.ForeignKeyConstraint(['energy_source_2_transport_1'], ['fuel_transportation_modes_eia.code'], name=op.f('fk_generators_eia860_energy_source_2_transport_1_fuel_transportation_modes_eia')), + sa.ForeignKeyConstraint(['energy_source_2_transport_2'], ['fuel_transportation_modes_eia.code'], name=op.f('fk_generators_eia860_energy_source_2_transport_2_fuel_transportation_modes_eia')), + sa.ForeignKeyConstraint(['energy_source_2_transport_3'], ['fuel_transportation_modes_eia.code'], name=op.f('fk_generators_eia860_energy_source_2_transport_3_fuel_transportation_modes_eia')), + sa.ForeignKeyConstraint(['energy_source_code_1'], ['energy_sources_eia.code'], name=op.f('fk_generators_eia860_energy_source_code_1_energy_sources_eia')), + sa.ForeignKeyConstraint(['energy_source_code_2'], ['energy_sources_eia.code'], name=op.f('fk_generators_eia860_energy_source_code_2_energy_sources_eia')), + sa.ForeignKeyConstraint(['energy_source_code_3'], ['energy_sources_eia.code'], name=op.f('fk_generators_eia860_energy_source_code_3_energy_sources_eia')), + sa.ForeignKeyConstraint(['energy_source_code_4'], ['energy_sources_eia.code'], name=op.f('fk_generators_eia860_energy_source_code_4_energy_sources_eia')), + sa.ForeignKeyConstraint(['energy_source_code_5'], ['energy_sources_eia.code'], name=op.f('fk_generators_eia860_energy_source_code_5_energy_sources_eia')), + sa.ForeignKeyConstraint(['energy_source_code_6'], ['energy_sources_eia.code'], name=op.f('fk_generators_eia860_energy_source_code_6_energy_sources_eia')), + sa.ForeignKeyConstraint(['operational_status_code'], ['operational_status_eia.code'], name=op.f('fk_generators_eia860_operational_status_code_operational_status_eia')), + sa.ForeignKeyConstraint(['planned_energy_source_code_1'], ['energy_sources_eia.code'], name=op.f('fk_generators_eia860_planned_energy_source_code_1_energy_sources_eia')), + sa.ForeignKeyConstraint(['planned_new_prime_mover_code'], ['prime_movers_eia.code'], name=op.f('fk_generators_eia860_planned_new_prime_mover_code_prime_movers_eia')), + sa.ForeignKeyConstraint(['plant_id_eia', 'generator_id'], ['generators_entity_eia.plant_id_eia', 'generators_entity_eia.generator_id'], name=op.f('fk_generators_eia860_plant_id_eia_generators_entity_eia')), + sa.ForeignKeyConstraint(['plant_id_eia', 'report_date'], ['plants_eia860.plant_id_eia', 'plants_eia860.report_date'], name=op.f('fk_generators_eia860_plant_id_eia_plants_eia860')), + sa.ForeignKeyConstraint(['prime_mover_code'], ['prime_movers_eia.code'], name=op.f('fk_generators_eia860_prime_mover_code_prime_movers_eia')), + sa.ForeignKeyConstraint(['startup_source_code_1'], ['energy_sources_eia.code'], name=op.f('fk_generators_eia860_startup_source_code_1_energy_sources_eia')), + sa.ForeignKeyConstraint(['startup_source_code_2'], ['energy_sources_eia.code'], name=op.f('fk_generators_eia860_startup_source_code_2_energy_sources_eia')), + sa.ForeignKeyConstraint(['startup_source_code_3'], ['energy_sources_eia.code'], name=op.f('fk_generators_eia860_startup_source_code_3_energy_sources_eia')), + sa.ForeignKeyConstraint(['startup_source_code_4'], ['energy_sources_eia.code'], name=op.f('fk_generators_eia860_startup_source_code_4_energy_sources_eia')), + sa.ForeignKeyConstraint(['utility_id_eia', 'report_date'], ['utilities_eia860.utility_id_eia', 'utilities_eia860.report_date'], name=op.f('fk_generators_eia860_utility_id_eia_utilities_eia860')), + sa.PrimaryKeyConstraint('plant_id_eia', 'generator_id', 'report_date', name=op.f('pk_generators_eia860')) + ) + op.create_table('heat_rate_by_unit_yearly', + sa.Column('report_date', sa.Date(), nullable=False, comment='Date reported.'), + sa.Column('plant_id_eia', sa.Integer(), nullable=False, comment='The unique six-digit facility identification number, also called an ORISPL, assigned by the Energy Information Administration.'), + sa.Column('unit_id_pudl', sa.Integer(), nullable=False, comment='Dynamically assigned PUDL unit id. WARNING: This ID is not guaranteed to be static long term as the input data and algorithm may evolve over time.'), + sa.Column('net_generation_mwh', sa.Float(), nullable=True, comment='Net electricity generation for the specified period in megawatt-hours (MWh).'), + sa.Column('fuel_consumed_for_electricity_mmbtu', sa.Float(), nullable=True, comment='Total consumption of fuel to produce electricity, in physical unit, year to date.'), + sa.Column('heat_rate_mmbtu_mwh', sa.Float(), nullable=True, comment='Fuel content per unit of electricity generated. Coming from MCOE calculation.'), + sa.ForeignKeyConstraint(['plant_id_eia', 'report_date'], ['plants_eia860.plant_id_eia', 'plants_eia860.report_date'], name=op.f('fk_heat_rate_by_unit_yearly_plant_id_eia_plants_eia860')), + sa.PrimaryKeyConstraint('report_date', 'plant_id_eia', 'unit_id_pudl', name=op.f('pk_heat_rate_by_unit_yearly')) ) op.create_table('plants_hydro_ferc1', sa.Column('record_id', sa.Text(), nullable=True, comment='Identifier indicating original FERC Form 1 source record. format: {table_name}_{report_year}_{report_prd}_{respondent_id}_{spplmnt_num}_{row_number}. Unique within FERC Form 1 DB tables which are not row-mapped.'), @@ -2566,7 +2782,7 @@ def upgrade() -> None: sa.Column('report_year', sa.Integer(), nullable=True, comment='Four-digit year in which the data was reported.'), sa.Column('plant_name_ferc1', sa.Text(), nullable=True, comment='Name of the plant, as reported to FERC. This is a freeform string, not guaranteed to be consistent across references to the same plant.'), sa.Column('project_num', sa.Integer(), nullable=True, comment='FERC Licensed Project Number.'), - sa.Column('plant_type', sa.Enum('hydro', 'run_of_river', 'run_of_river_with_storage', 'storage', 'na_category'), nullable=True), + sa.Column('plant_type', sa.Enum('storage', 'run_of_river', 'run_of_river_with_storage', 'na_category', 'hydro'), nullable=True), sa.Column('construction_type', sa.Enum('conventional', 'outdoor', 'semioutdoor'), nullable=True, comment="Type of plant construction ('outdoor', 'semioutdoor', or 'conventional'). Categorized by PUDL based on our best guess of intended value in FERC1 freeform strings."), sa.Column('construction_year', sa.Integer(), nullable=True, comment="Year the plant's oldest still operational unit was built."), sa.Column('installation_year', sa.Integer(), nullable=True, comment="Year the plant's most recently built unit was installed."), @@ -2598,7 +2814,7 @@ def upgrade() -> None: sa.Column('opex_misc_plant', sa.Float(), nullable=True, comment='Production expenses: maintenance of miscellaneous hydraulic plant (USD).'), sa.Column('opex_total', sa.Float(), nullable=True, comment='Total production expenses, excluding fuel (USD).'), sa.Column('opex_per_mwh', sa.Float(), nullable=True, comment='Total production expenses (USD per MWh generated).'), - sa.ForeignKeyConstraint(['utility_id_ferc1', 'plant_name_ferc1'], ['plants_ferc1.utility_id_ferc1', 'plants_ferc1.plant_name_ferc1'], ) + sa.ForeignKeyConstraint(['utility_id_ferc1', 'plant_name_ferc1'], ['plants_ferc1.utility_id_ferc1', 'plants_ferc1.plant_name_ferc1'], name=op.f('fk_plants_hydro_ferc1_utility_id_ferc1_plants_ferc1')) ) op.create_table('plants_pumped_storage_ferc1', sa.Column('record_id', sa.Text(), nullable=True, comment='Identifier indicating original FERC Form 1 source record. format: {table_name}_{report_year}_{report_prd}_{respondent_id}_{spplmnt_num}_{row_number}. Unique within FERC Form 1 DB tables which are not row-mapped.'), @@ -2642,7 +2858,7 @@ def upgrade() -> None: sa.Column('opex_pumping', sa.Float(), nullable=True, comment='Production expenses: We are here to PUMP YOU UP! (USD).'), sa.Column('opex_total', sa.Float(), nullable=True, comment='Total production expenses, excluding fuel (USD).'), sa.Column('opex_per_mwh', sa.Float(), nullable=True, comment='Total production expenses (USD per MWh generated).'), - sa.ForeignKeyConstraint(['utility_id_ferc1', 'plant_name_ferc1'], ['plants_ferc1.utility_id_ferc1', 'plants_ferc1.plant_name_ferc1'], ) + sa.ForeignKeyConstraint(['utility_id_ferc1', 'plant_name_ferc1'], ['plants_ferc1.utility_id_ferc1', 'plants_ferc1.plant_name_ferc1'], name=op.f('fk_plants_pumped_storage_ferc1_utility_id_ferc1_plants_ferc1')) ) op.create_table('plants_small_ferc1', sa.Column('record_id', sa.Text(), nullable=True, comment='Identifier indicating original FERC Form 1 source record. format: {table_name}_{report_year}_{report_prd}_{respondent_id}_{spplmnt_num}_{row_number}. Unique within FERC Form 1 DB tables which are not row-mapped.'), @@ -2662,7 +2878,7 @@ def upgrade() -> None: sa.Column('opex_maintenance', sa.Float(), nullable=True, comment='Production expenses: Maintenance (USD).'), sa.Column('fuel_type', sa.Text(), nullable=True), sa.Column('fuel_cost_per_mmbtu', sa.Float(), nullable=True, comment='Average fuel cost per mmBTU of heat content in nominal USD.'), - sa.ForeignKeyConstraint(['utility_id_ferc1', 'plant_name_ferc1'], ['plants_ferc1.utility_id_ferc1', 'plants_ferc1.plant_name_ferc1'], ) + sa.ForeignKeyConstraint(['utility_id_ferc1', 'plant_name_ferc1'], ['plants_ferc1.utility_id_ferc1', 'plants_ferc1.plant_name_ferc1'], name=op.f('fk_plants_small_ferc1_utility_id_ferc1_plants_ferc1')) ) op.create_table('plants_steam_ferc1', sa.Column('record_id', sa.Text(), nullable=True, comment='Identifier indicating original FERC Form 1 source record. format: {table_name}_{report_year}_{report_prd}_{respondent_id}_{spplmnt_num}_{row_number}. Unique within FERC Form 1 DB tables which are not row-mapped.'), @@ -2670,7 +2886,7 @@ def upgrade() -> None: sa.Column('report_year', sa.Integer(), nullable=True, comment='Four-digit year in which the data was reported.'), sa.Column('plant_id_ferc1', sa.Integer(), nullable=True, comment='Algorithmically assigned PUDL FERC Plant ID. WARNING: NOT STABLE BETWEEN PUDL DB INITIALIZATIONS.'), sa.Column('plant_name_ferc1', sa.Text(), nullable=True, comment='Name of the plant, as reported to FERC. This is a freeform string, not guaranteed to be consistent across references to the same plant.'), - sa.Column('plant_type', sa.Enum('combustion_turbine', 'internal_combustion', 'solar_thermal', 'nuclear', 'wind', 'combined_cycle', 'steam', 'geothermal', 'na_category', 'photovoltaic'), nullable=True), + sa.Column('plant_type', sa.Enum('wind', 'photovoltaic', 'nuclear', 'na_category', 'geothermal', 'combined_cycle', 'combustion_turbine', 'steam', 'solar_thermal', 'internal_combustion'), nullable=True), sa.Column('construction_type', sa.Enum('conventional', 'outdoor', 'semioutdoor'), nullable=True, comment="Type of plant construction ('outdoor', 'semioutdoor', or 'conventional'). Categorized by PUDL based on our best guess of intended value in FERC1 freeform strings."), sa.Column('construction_year', sa.Integer(), nullable=True, comment="Year the plant's oldest still operational unit was built."), sa.Column('installation_year', sa.Integer(), nullable=True, comment="Year the plant's most recently built unit was installed."), @@ -2705,7 +2921,7 @@ def upgrade() -> None: sa.Column('opex_production_total', sa.Float(), nullable=True, comment='Total operating expenses.'), sa.Column('opex_per_mwh', sa.Float(), nullable=True, comment='Total production expenses (USD per MWh generated).'), sa.Column('asset_retirement_cost', sa.Float(), nullable=True, comment='Asset retirement cost (USD).'), - sa.ForeignKeyConstraint(['utility_id_ferc1', 'plant_name_ferc1'], ['plants_ferc1.utility_id_ferc1', 'plants_ferc1.plant_name_ferc1'], ) + sa.ForeignKeyConstraint(['utility_id_ferc1', 'plant_name_ferc1'], ['plants_ferc1.utility_id_ferc1', 'plants_ferc1.plant_name_ferc1'], name=op.f('fk_plants_steam_ferc1_utility_id_ferc1_plants_ferc1')) ) op.create_table('boiler_cooling_assn_eia860', sa.Column('report_date', sa.Date(), nullable=False, comment='Date reported.'), @@ -2713,9 +2929,9 @@ def upgrade() -> None: sa.Column('boiler_id', sa.Text(), nullable=False, comment='Alphanumeric boiler ID.'), sa.Column('cooling_id_eia', sa.Text(), nullable=False, comment='The cooling system identification number reported to EIA.'), sa.Column('data_maturity', sa.Text(), nullable=True, comment='Level of maturity of the data record. Some data sources report less-than-final data. PUDL sometimes includes this data, but use at your own risk.'), - sa.ForeignKeyConstraint(['data_maturity'], ['data_maturities.code'], ), - sa.ForeignKeyConstraint(['plant_id_eia', 'boiler_id', 'report_date'], ['boilers_eia860.plant_id_eia', 'boilers_eia860.boiler_id', 'boilers_eia860.report_date'], ), - sa.PrimaryKeyConstraint('report_date', 'plant_id_eia', 'boiler_id', 'cooling_id_eia') + sa.ForeignKeyConstraint(['data_maturity'], ['data_maturities.code'], name=op.f('fk_boiler_cooling_assn_eia860_data_maturity_data_maturities')), + sa.ForeignKeyConstraint(['plant_id_eia', 'boiler_id', 'report_date'], ['boilers_eia860.plant_id_eia', 'boilers_eia860.boiler_id', 'boilers_eia860.report_date'], name=op.f('fk_boiler_cooling_assn_eia860_plant_id_eia_boilers_eia860')), + sa.PrimaryKeyConstraint('report_date', 'plant_id_eia', 'boiler_id', 'cooling_id_eia', name=op.f('pk_boiler_cooling_assn_eia860')) ) op.create_table('boiler_emissions_control_equipment_assn_eia860', sa.Column('report_date', sa.Date(), nullable=False, comment='Date reported.'), @@ -2724,9 +2940,9 @@ def upgrade() -> None: sa.Column('emission_control_id_type', sa.Text(), nullable=False, comment='The type of emissions control id: so2, nox, particulate, or mercury.'), sa.Column('emission_control_id_eia', sa.Text(), nullable=False, comment="The emission control ID used to collect so2, nox, particulate, and mercury emissions data. This column should be used in conjunction with emissions_control_type as it's not guaranteed to be unique."), sa.Column('data_maturity', sa.Text(), nullable=True, comment='Level of maturity of the data record. Some data sources report less-than-final data. PUDL sometimes includes this data, but use at your own risk.'), - sa.ForeignKeyConstraint(['data_maturity'], ['data_maturities.code'], ), - sa.ForeignKeyConstraint(['plant_id_eia', 'boiler_id', 'report_date'], ['boilers_eia860.plant_id_eia', 'boilers_eia860.boiler_id', 'boilers_eia860.report_date'], ), - sa.PrimaryKeyConstraint('report_date', 'plant_id_eia', 'boiler_id', 'emission_control_id_type', 'emission_control_id_eia') + sa.ForeignKeyConstraint(['data_maturity'], ['data_maturities.code'], name=op.f('fk_boiler_emissions_control_equipment_assn_eia860_data_maturity_data_maturities')), + sa.ForeignKeyConstraint(['plant_id_eia', 'boiler_id', 'report_date'], ['boilers_eia860.plant_id_eia', 'boilers_eia860.boiler_id', 'boilers_eia860.report_date'], name=op.f('fk_boiler_emissions_control_equipment_assn_eia860_plant_id_eia_boilers_eia860')), + sa.PrimaryKeyConstraint('report_date', 'plant_id_eia', 'boiler_id', 'emission_control_id_type', 'emission_control_id_eia', name=op.f('pk_boiler_emissions_control_equipment_assn_eia860')) ) op.create_table('boiler_generator_assn_eia860', sa.Column('plant_id_eia', sa.Integer(), nullable=False, comment='The unique six-digit facility identification number, also called an ORISPL, assigned by the Energy Information Administration.'), @@ -2739,12 +2955,12 @@ def upgrade() -> None: sa.Column('steam_plant_type_code', sa.Integer(), nullable=True, comment='Code that describes types of steam plants from EIA 860. See steam_plant_types_eia table for more details.'), sa.Column('bga_source', sa.Text(), nullable=True, comment='The source from where the unit_id_pudl is compiled. The unit_id_pudl comes directly from EIA 860, or string association (which looks at all the boilers and generators that are not associated with a unit and tries to find a matching string in the respective collection of boilers or generator), or from a unit connection (where the unit_id_eia is employed to find additional boiler generator connections).'), sa.Column('data_maturity', sa.Text(), nullable=True, comment='Level of maturity of the data record. Some data sources report less-than-final data. PUDL sometimes includes this data, but use at your own risk.'), - sa.ForeignKeyConstraint(['boiler_generator_assn_type_code'], ['boiler_generator_assn_types_eia.code'], ), - sa.ForeignKeyConstraint(['data_maturity'], ['data_maturities.code'], ), - sa.ForeignKeyConstraint(['plant_id_eia', 'boiler_id', 'report_date'], ['boilers_eia860.plant_id_eia', 'boilers_eia860.boiler_id', 'boilers_eia860.report_date'], ), - sa.ForeignKeyConstraint(['plant_id_eia', 'generator_id', 'report_date'], ['generators_eia860.plant_id_eia', 'generators_eia860.generator_id', 'generators_eia860.report_date'], ), - sa.ForeignKeyConstraint(['steam_plant_type_code'], ['steam_plant_types_eia.code'], ), - sa.PrimaryKeyConstraint('plant_id_eia', 'report_date', 'generator_id', 'boiler_id') + sa.ForeignKeyConstraint(['boiler_generator_assn_type_code'], ['boiler_generator_assn_types_eia.code'], name=op.f('fk_boiler_generator_assn_eia860_boiler_generator_assn_type_code_boiler_generator_assn_types_eia')), + sa.ForeignKeyConstraint(['data_maturity'], ['data_maturities.code'], name=op.f('fk_boiler_generator_assn_eia860_data_maturity_data_maturities')), + sa.ForeignKeyConstraint(['plant_id_eia', 'boiler_id', 'report_date'], ['boilers_eia860.plant_id_eia', 'boilers_eia860.boiler_id', 'boilers_eia860.report_date'], name=op.f('fk_boiler_generator_assn_eia860_plant_id_eia_boilers_eia860')), + sa.ForeignKeyConstraint(['plant_id_eia', 'generator_id', 'report_date'], ['generators_eia860.plant_id_eia', 'generators_eia860.generator_id', 'generators_eia860.report_date'], name=op.f('fk_boiler_generator_assn_eia860_plant_id_eia_generators_eia860')), + sa.ForeignKeyConstraint(['steam_plant_type_code'], ['steam_plant_types_eia.code'], name=op.f('fk_boiler_generator_assn_eia860_steam_plant_type_code_steam_plant_types_eia')), + sa.PrimaryKeyConstraint('plant_id_eia', 'report_date', 'generator_id', 'boiler_id', name=op.f('pk_boiler_generator_assn_eia860')) ) op.create_table('boiler_stack_flue_assn_eia860', sa.Column('report_date', sa.Date(), nullable=False, comment='Date reported.'), @@ -2754,8 +2970,18 @@ def upgrade() -> None: sa.Column('flue_id_eia', sa.Text(), nullable=True, comment='The flue identification value reported to EIA. The flue is a duct, pipe, or opening that transports exhast gases through the stack. This field was reported in conjunction with stack_id_eia until 2013 when stack_flue_id_eia took their place.'), sa.Column('stack_flue_id_eia', sa.Text(), nullable=True, comment='The stack or flue identification value reported to EIA. This denotes the place where emissions from the combusion process are released into the atmosphere. Prior to 2013, this was reported as `stack_id_eia` and `flue_id_eia`.'), sa.Column('stack_flue_id_pudl', sa.Text(), nullable=False, comment='A stack and/or flue identification value created by PUDL for use as part of the primary key for the stack flue equipment and boiler association tables. For 2013 and onward, this value is equal to the value for stack_flue_id_eia. Prior to 2013, this value is equal to the value for stack_id_eia and the value for flue_id_eia seperated by an underscore or just the stack_flue_eia in cases where flue_id_eia is NA.'), - sa.ForeignKeyConstraint(['plant_id_eia', 'boiler_id', 'report_date'], ['boilers_eia860.plant_id_eia', 'boilers_eia860.boiler_id', 'boilers_eia860.report_date'], ), - sa.PrimaryKeyConstraint('report_date', 'plant_id_eia', 'boiler_id', 'stack_flue_id_pudl') + sa.ForeignKeyConstraint(['plant_id_eia', 'boiler_id', 'report_date'], ['boilers_eia860.plant_id_eia', 'boilers_eia860.boiler_id', 'boilers_eia860.report_date'], name=op.f('fk_boiler_stack_flue_assn_eia860_plant_id_eia_boilers_eia860')), + sa.PrimaryKeyConstraint('report_date', 'plant_id_eia', 'boiler_id', 'stack_flue_id_pudl', name=op.f('pk_boiler_stack_flue_assn_eia860')) + ) + op.create_table('capacity_factor_by_generator_yearly', + sa.Column('report_date', sa.Date(), nullable=False, comment='Date reported.'), + sa.Column('plant_id_eia', sa.Integer(), nullable=False, comment='The unique six-digit facility identification number, also called an ORISPL, assigned by the Energy Information Administration.'), + sa.Column('generator_id', sa.Text(), nullable=False, comment='Generator ID is usually numeric, but sometimes includes letters. Make sure you treat it as a string!'), + sa.Column('net_generation_mwh', sa.Float(), nullable=True, comment='Net electricity generation for the specified period in megawatt-hours (MWh).'), + sa.Column('capacity_mw', sa.Float(), nullable=True, comment='Total installed (nameplate) capacity, in megawatts.'), + sa.Column('capacity_factor', sa.Float(), nullable=True, comment='Fraction of potential generation that was actually reported for a plant part.'), + sa.ForeignKeyConstraint(['plant_id_eia', 'generator_id', 'report_date'], ['generators_eia860.plant_id_eia', 'generators_eia860.generator_id', 'generators_eia860.report_date'], name=op.f('fk_capacity_factor_by_generator_yearly_plant_id_eia_generators_eia860')), + sa.PrimaryKeyConstraint('report_date', 'plant_id_eia', 'generator_id', name=op.f('pk_capacity_factor_by_generator_yearly')) ) op.create_table('denorm_boiler_fuel_yearly_eia923', sa.Column('report_date', sa.Date(), nullable=False, comment='Date reported.'), @@ -2767,7 +2993,7 @@ def upgrade() -> None: sa.Column('utility_name_eia', sa.Text(), nullable=True, comment='The name of the utility.'), sa.Column('boiler_id', sa.Text(), nullable=False, comment='Alphanumeric boiler ID.'), sa.Column('unit_id_pudl', sa.Integer(), nullable=True, comment='Dynamically assigned PUDL unit id. WARNING: This ID is not guaranteed to be static long term as the input data and algorithm may evolve over time.'), - sa.Column('energy_source_code', sa.Text(), nullable=False, comment='The fuel code associated with the fuel receipt. Two or three character alphanumeric.'), + sa.Column('energy_source_code', sa.Text(), nullable=False, comment='A 2-3 letter code indicating the energy source (e.g. fuel type) associated with the record.'), sa.Column('prime_mover_code', sa.Text(), nullable=False, comment='Code for the type of prime mover (e.g. CT, CG)'), sa.Column('fuel_type_code_pudl', sa.Enum('coal', 'gas', 'hydro', 'nuclear', 'oil', 'other', 'solar', 'waste', 'wind'), nullable=True, comment='Simplified fuel type code used in PUDL'), sa.Column('fuel_consumed_units', sa.Float(), nullable=True, comment='Consumption of the fuel type in physical unit. Note: this is the total quantity consumed for both electricity and, in the case of combined heat and power plants, process steam production.'), @@ -2775,13 +3001,13 @@ def upgrade() -> None: sa.Column('fuel_consumed_mmbtu', sa.Float(), nullable=True, comment='Total consumption of fuel in physical unit, year to date. Note: this is the total quantity consumed for both electricity and, in the case of combined heat and power plants, process steam production.'), sa.Column('sulfur_content_pct', sa.Float(), nullable=True, comment='Sulfur content percentage by weight to the nearest 0.01 percent.'), sa.Column('ash_content_pct', sa.Float(), nullable=True, comment='Ash content percentage by weight to the nearest 0.1 percent.'), - sa.ForeignKeyConstraint(['energy_source_code'], ['energy_sources_eia.code'], ), - sa.ForeignKeyConstraint(['plant_id_eia', 'boiler_id', 'report_date'], ['boilers_eia860.plant_id_eia', 'boilers_eia860.boiler_id', 'boilers_eia860.report_date'], ), - sa.ForeignKeyConstraint(['plant_id_pudl'], ['plants_pudl.plant_id_pudl'], ), - sa.ForeignKeyConstraint(['prime_mover_code'], ['prime_movers_eia.code'], ), - sa.ForeignKeyConstraint(['utility_id_eia', 'report_date'], ['utilities_eia860.utility_id_eia', 'utilities_eia860.report_date'], ), - sa.ForeignKeyConstraint(['utility_id_pudl'], ['utilities_pudl.utility_id_pudl'], ), - sa.PrimaryKeyConstraint('plant_id_eia', 'boiler_id', 'energy_source_code', 'prime_mover_code', 'report_date') + sa.ForeignKeyConstraint(['energy_source_code'], ['energy_sources_eia.code'], name=op.f('fk_denorm_boiler_fuel_yearly_eia923_energy_source_code_energy_sources_eia')), + sa.ForeignKeyConstraint(['plant_id_eia', 'boiler_id', 'report_date'], ['boilers_eia860.plant_id_eia', 'boilers_eia860.boiler_id', 'boilers_eia860.report_date'], name=op.f('fk_denorm_boiler_fuel_yearly_eia923_plant_id_eia_boilers_eia860')), + sa.ForeignKeyConstraint(['plant_id_pudl'], ['plants_pudl.plant_id_pudl'], name=op.f('fk_denorm_boiler_fuel_yearly_eia923_plant_id_pudl_plants_pudl')), + sa.ForeignKeyConstraint(['prime_mover_code'], ['prime_movers_eia.code'], name=op.f('fk_denorm_boiler_fuel_yearly_eia923_prime_mover_code_prime_movers_eia')), + sa.ForeignKeyConstraint(['utility_id_eia', 'report_date'], ['utilities_eia860.utility_id_eia', 'utilities_eia860.report_date'], name=op.f('fk_denorm_boiler_fuel_yearly_eia923_utility_id_eia_utilities_eia860')), + sa.ForeignKeyConstraint(['utility_id_pudl'], ['utilities_pudl.utility_id_pudl'], name=op.f('fk_denorm_boiler_fuel_yearly_eia923_utility_id_pudl_utilities_pudl')), + sa.PrimaryKeyConstraint('plant_id_eia', 'boiler_id', 'energy_source_code', 'prime_mover_code', 'report_date', name=op.f('pk_denorm_boiler_fuel_yearly_eia923')) ) op.create_table('denorm_boilers_eia', sa.Column('report_date', sa.Date(), nullable=False, comment='Date reported.'), @@ -2891,62 +3117,62 @@ def upgrade() -> None: sa.Column('waste_heat_input_mmbtu_per_hour', sa.Float(), nullable=True, comment='Design waste-heat input rate at maximum continuous steam flow where a waste-heat boiler is a boiler that receives all or a substantial portion of its energy input from the noncumbustible exhaust gases of a separate fuel-burning process (MMBtu per hour).'), sa.Column('wet_dry_bottom', sa.Text(), nullable=True, comment='Wet or Dry Bottom where Wet Bottom is defined as slag tanks that are installed at furnace throat to contain and remove molten ash from the furnace, and Dry Bottom is defined as having no slag tanks at furnace throat area, throat area is clear, and bottom ash drops through throat to bottom ash water hoppers.'), sa.Column('zip_code', sa.Text(), nullable=True, comment='Five digit US Zip Code.'), - sa.ForeignKeyConstraint(['boiler_fuel_code_1'], ['energy_sources_eia.code'], ), - sa.ForeignKeyConstraint(['boiler_fuel_code_2'], ['energy_sources_eia.code'], ), - sa.ForeignKeyConstraint(['boiler_fuel_code_3'], ['energy_sources_eia.code'], ), - sa.ForeignKeyConstraint(['boiler_fuel_code_4'], ['energy_sources_eia.code'], ), - sa.ForeignKeyConstraint(['boiler_manufacturer_code'], ['environmental_equipment_manufacturers_eia.code'], ), - sa.ForeignKeyConstraint(['boiler_status'], ['boiler_status_eia.code'], ), - sa.ForeignKeyConstraint(['boiler_type'], ['boiler_types_eia.code'], ), - sa.ForeignKeyConstraint(['data_maturity'], ['data_maturities.code'], ), - sa.ForeignKeyConstraint(['firing_type_1'], ['firing_types_eia.code'], ), - sa.ForeignKeyConstraint(['firing_type_2'], ['firing_types_eia.code'], ), - sa.ForeignKeyConstraint(['firing_type_3'], ['firing_types_eia.code'], ), - sa.ForeignKeyConstraint(['mercury_control_existing_strategy_1'], ['mercury_compliance_strategies_eia.code'], ), - sa.ForeignKeyConstraint(['mercury_control_existing_strategy_2'], ['mercury_compliance_strategies_eia.code'], ), - sa.ForeignKeyConstraint(['mercury_control_existing_strategy_3'], ['mercury_compliance_strategies_eia.code'], ), - sa.ForeignKeyConstraint(['mercury_control_existing_strategy_4'], ['mercury_compliance_strategies_eia.code'], ), - sa.ForeignKeyConstraint(['mercury_control_existing_strategy_5'], ['mercury_compliance_strategies_eia.code'], ), - sa.ForeignKeyConstraint(['mercury_control_existing_strategy_6'], ['mercury_compliance_strategies_eia.code'], ), - sa.ForeignKeyConstraint(['mercury_control_proposed_strategy_1'], ['mercury_compliance_strategies_eia.code'], ), - sa.ForeignKeyConstraint(['mercury_control_proposed_strategy_2'], ['mercury_compliance_strategies_eia.code'], ), - sa.ForeignKeyConstraint(['mercury_control_proposed_strategy_3'], ['mercury_compliance_strategies_eia.code'], ), - sa.ForeignKeyConstraint(['nox_control_existing_caaa_compliance_strategy_1'], ['nox_compliance_strategies_eia.code'], ), - sa.ForeignKeyConstraint(['nox_control_existing_caaa_compliance_strategy_2'], ['nox_compliance_strategies_eia.code'], ), - sa.ForeignKeyConstraint(['nox_control_existing_caaa_compliance_strategy_3'], ['nox_compliance_strategies_eia.code'], ), - sa.ForeignKeyConstraint(['nox_control_manufacturer_code'], ['environmental_equipment_manufacturers_eia.code'], ), - sa.ForeignKeyConstraint(['nox_control_out_of_compliance_strategy_1'], ['nox_compliance_strategies_eia.code'], ), - sa.ForeignKeyConstraint(['nox_control_out_of_compliance_strategy_2'], ['nox_compliance_strategies_eia.code'], ), - sa.ForeignKeyConstraint(['nox_control_out_of_compliance_strategy_3'], ['nox_compliance_strategies_eia.code'], ), - sa.ForeignKeyConstraint(['nox_control_planned_caaa_compliance_strategy_1'], ['nox_compliance_strategies_eia.code'], ), - sa.ForeignKeyConstraint(['nox_control_planned_caaa_compliance_strategy_2'], ['nox_compliance_strategies_eia.code'], ), - sa.ForeignKeyConstraint(['nox_control_planned_caaa_compliance_strategy_3'], ['nox_compliance_strategies_eia.code'], ), - sa.ForeignKeyConstraint(['nox_control_status_code'], ['nox_control_status_eia.code'], ), - sa.ForeignKeyConstraint(['particulate_control_out_of_compliance_strategy_1'], ['particulate_compliance_strategies_eia.code'], ), - sa.ForeignKeyConstraint(['particulate_control_out_of_compliance_strategy_2'], ['particulate_compliance_strategies_eia.code'], ), - sa.ForeignKeyConstraint(['particulate_control_out_of_compliance_strategy_3'], ['particulate_compliance_strategies_eia.code'], ), - sa.ForeignKeyConstraint(['plant_id_eia', 'boiler_id', 'report_date'], ['boilers_eia860.plant_id_eia', 'boilers_eia860.boiler_id', 'boilers_eia860.report_date'], ), - sa.ForeignKeyConstraint(['plant_id_pudl'], ['plants_pudl.plant_id_pudl'], ), - sa.ForeignKeyConstraint(['regulation_mercury'], ['regulations_eia.code'], ), - sa.ForeignKeyConstraint(['regulation_nox'], ['regulations_eia.code'], ), - sa.ForeignKeyConstraint(['regulation_particulate'], ['regulations_eia.code'], ), - sa.ForeignKeyConstraint(['regulation_so2'], ['regulations_eia.code'], ), - sa.ForeignKeyConstraint(['so2_control_existing_caaa_compliance_strategy_1'], ['so2_compliance_strategies_eia.code'], ), - sa.ForeignKeyConstraint(['so2_control_existing_caaa_compliance_strategy_2'], ['so2_compliance_strategies_eia.code'], ), - sa.ForeignKeyConstraint(['so2_control_existing_caaa_compliance_strategy_3'], ['so2_compliance_strategies_eia.code'], ), - sa.ForeignKeyConstraint(['so2_control_out_of_compliance_strategy_1'], ['so2_compliance_strategies_eia.code'], ), - sa.ForeignKeyConstraint(['so2_control_out_of_compliance_strategy_2'], ['so2_compliance_strategies_eia.code'], ), - sa.ForeignKeyConstraint(['so2_control_out_of_compliance_strategy_3'], ['so2_compliance_strategies_eia.code'], ), - sa.ForeignKeyConstraint(['so2_control_planned_caaa_compliance_strategy_1'], ['so2_compliance_strategies_eia.code'], ), - sa.ForeignKeyConstraint(['so2_control_planned_caaa_compliance_strategy_2'], ['so2_compliance_strategies_eia.code'], ), - sa.ForeignKeyConstraint(['so2_control_planned_caaa_compliance_strategy_3'], ['so2_compliance_strategies_eia.code'], ), - sa.ForeignKeyConstraint(['unit_nox'], ['nox_units_eia.code'], ), - sa.ForeignKeyConstraint(['unit_particulate'], ['particulate_units_eia.code'], ), - sa.ForeignKeyConstraint(['unit_so2'], ['so2_units_eia.code'], ), - sa.ForeignKeyConstraint(['utility_id_eia', 'report_date'], ['utilities_eia860.utility_id_eia', 'utilities_eia860.report_date'], ), - sa.ForeignKeyConstraint(['utility_id_pudl'], ['utilities_pudl.utility_id_pudl'], ), - sa.ForeignKeyConstraint(['wet_dry_bottom'], ['wet_dry_bottom_eia.code'], ), - sa.PrimaryKeyConstraint('plant_id_eia', 'boiler_id', 'report_date') + sa.ForeignKeyConstraint(['boiler_fuel_code_1'], ['energy_sources_eia.code'], name=op.f('fk_denorm_boilers_eia_boiler_fuel_code_1_energy_sources_eia')), + sa.ForeignKeyConstraint(['boiler_fuel_code_2'], ['energy_sources_eia.code'], name=op.f('fk_denorm_boilers_eia_boiler_fuel_code_2_energy_sources_eia')), + sa.ForeignKeyConstraint(['boiler_fuel_code_3'], ['energy_sources_eia.code'], name=op.f('fk_denorm_boilers_eia_boiler_fuel_code_3_energy_sources_eia')), + sa.ForeignKeyConstraint(['boiler_fuel_code_4'], ['energy_sources_eia.code'], name=op.f('fk_denorm_boilers_eia_boiler_fuel_code_4_energy_sources_eia')), + sa.ForeignKeyConstraint(['boiler_manufacturer_code'], ['environmental_equipment_manufacturers_eia.code'], name=op.f('fk_denorm_boilers_eia_boiler_manufacturer_code_environmental_equipment_manufacturers_eia')), + sa.ForeignKeyConstraint(['boiler_status'], ['boiler_status_eia.code'], name=op.f('fk_denorm_boilers_eia_boiler_status_boiler_status_eia')), + sa.ForeignKeyConstraint(['boiler_type'], ['boiler_types_eia.code'], name=op.f('fk_denorm_boilers_eia_boiler_type_boiler_types_eia')), + sa.ForeignKeyConstraint(['data_maturity'], ['data_maturities.code'], name=op.f('fk_denorm_boilers_eia_data_maturity_data_maturities')), + sa.ForeignKeyConstraint(['firing_type_1'], ['firing_types_eia.code'], name=op.f('fk_denorm_boilers_eia_firing_type_1_firing_types_eia')), + sa.ForeignKeyConstraint(['firing_type_2'], ['firing_types_eia.code'], name=op.f('fk_denorm_boilers_eia_firing_type_2_firing_types_eia')), + sa.ForeignKeyConstraint(['firing_type_3'], ['firing_types_eia.code'], name=op.f('fk_denorm_boilers_eia_firing_type_3_firing_types_eia')), + sa.ForeignKeyConstraint(['mercury_control_existing_strategy_1'], ['mercury_compliance_strategies_eia.code'], name=op.f('fk_denorm_boilers_eia_mercury_control_existing_strategy_1_mercury_compliance_strategies_eia')), + sa.ForeignKeyConstraint(['mercury_control_existing_strategy_2'], ['mercury_compliance_strategies_eia.code'], name=op.f('fk_denorm_boilers_eia_mercury_control_existing_strategy_2_mercury_compliance_strategies_eia')), + sa.ForeignKeyConstraint(['mercury_control_existing_strategy_3'], ['mercury_compliance_strategies_eia.code'], name=op.f('fk_denorm_boilers_eia_mercury_control_existing_strategy_3_mercury_compliance_strategies_eia')), + sa.ForeignKeyConstraint(['mercury_control_existing_strategy_4'], ['mercury_compliance_strategies_eia.code'], name=op.f('fk_denorm_boilers_eia_mercury_control_existing_strategy_4_mercury_compliance_strategies_eia')), + sa.ForeignKeyConstraint(['mercury_control_existing_strategy_5'], ['mercury_compliance_strategies_eia.code'], name=op.f('fk_denorm_boilers_eia_mercury_control_existing_strategy_5_mercury_compliance_strategies_eia')), + sa.ForeignKeyConstraint(['mercury_control_existing_strategy_6'], ['mercury_compliance_strategies_eia.code'], name=op.f('fk_denorm_boilers_eia_mercury_control_existing_strategy_6_mercury_compliance_strategies_eia')), + sa.ForeignKeyConstraint(['mercury_control_proposed_strategy_1'], ['mercury_compliance_strategies_eia.code'], name=op.f('fk_denorm_boilers_eia_mercury_control_proposed_strategy_1_mercury_compliance_strategies_eia')), + sa.ForeignKeyConstraint(['mercury_control_proposed_strategy_2'], ['mercury_compliance_strategies_eia.code'], name=op.f('fk_denorm_boilers_eia_mercury_control_proposed_strategy_2_mercury_compliance_strategies_eia')), + sa.ForeignKeyConstraint(['mercury_control_proposed_strategy_3'], ['mercury_compliance_strategies_eia.code'], name=op.f('fk_denorm_boilers_eia_mercury_control_proposed_strategy_3_mercury_compliance_strategies_eia')), + sa.ForeignKeyConstraint(['nox_control_existing_caaa_compliance_strategy_1'], ['nox_compliance_strategies_eia.code'], name=op.f('fk_denorm_boilers_eia_nox_control_existing_caaa_compliance_strategy_1_nox_compliance_strategies_eia')), + sa.ForeignKeyConstraint(['nox_control_existing_caaa_compliance_strategy_2'], ['nox_compliance_strategies_eia.code'], name=op.f('fk_denorm_boilers_eia_nox_control_existing_caaa_compliance_strategy_2_nox_compliance_strategies_eia')), + sa.ForeignKeyConstraint(['nox_control_existing_caaa_compliance_strategy_3'], ['nox_compliance_strategies_eia.code'], name=op.f('fk_denorm_boilers_eia_nox_control_existing_caaa_compliance_strategy_3_nox_compliance_strategies_eia')), + sa.ForeignKeyConstraint(['nox_control_manufacturer_code'], ['environmental_equipment_manufacturers_eia.code'], name=op.f('fk_denorm_boilers_eia_nox_control_manufacturer_code_environmental_equipment_manufacturers_eia')), + sa.ForeignKeyConstraint(['nox_control_out_of_compliance_strategy_1'], ['nox_compliance_strategies_eia.code'], name=op.f('fk_denorm_boilers_eia_nox_control_out_of_compliance_strategy_1_nox_compliance_strategies_eia')), + sa.ForeignKeyConstraint(['nox_control_out_of_compliance_strategy_2'], ['nox_compliance_strategies_eia.code'], name=op.f('fk_denorm_boilers_eia_nox_control_out_of_compliance_strategy_2_nox_compliance_strategies_eia')), + sa.ForeignKeyConstraint(['nox_control_out_of_compliance_strategy_3'], ['nox_compliance_strategies_eia.code'], name=op.f('fk_denorm_boilers_eia_nox_control_out_of_compliance_strategy_3_nox_compliance_strategies_eia')), + sa.ForeignKeyConstraint(['nox_control_planned_caaa_compliance_strategy_1'], ['nox_compliance_strategies_eia.code'], name=op.f('fk_denorm_boilers_eia_nox_control_planned_caaa_compliance_strategy_1_nox_compliance_strategies_eia')), + sa.ForeignKeyConstraint(['nox_control_planned_caaa_compliance_strategy_2'], ['nox_compliance_strategies_eia.code'], name=op.f('fk_denorm_boilers_eia_nox_control_planned_caaa_compliance_strategy_2_nox_compliance_strategies_eia')), + sa.ForeignKeyConstraint(['nox_control_planned_caaa_compliance_strategy_3'], ['nox_compliance_strategies_eia.code'], name=op.f('fk_denorm_boilers_eia_nox_control_planned_caaa_compliance_strategy_3_nox_compliance_strategies_eia')), + sa.ForeignKeyConstraint(['nox_control_status_code'], ['nox_control_status_eia.code'], name=op.f('fk_denorm_boilers_eia_nox_control_status_code_nox_control_status_eia')), + sa.ForeignKeyConstraint(['particulate_control_out_of_compliance_strategy_1'], ['particulate_compliance_strategies_eia.code'], name=op.f('fk_denorm_boilers_eia_particulate_control_out_of_compliance_strategy_1_particulate_compliance_strategies_eia')), + sa.ForeignKeyConstraint(['particulate_control_out_of_compliance_strategy_2'], ['particulate_compliance_strategies_eia.code'], name=op.f('fk_denorm_boilers_eia_particulate_control_out_of_compliance_strategy_2_particulate_compliance_strategies_eia')), + sa.ForeignKeyConstraint(['particulate_control_out_of_compliance_strategy_3'], ['particulate_compliance_strategies_eia.code'], name=op.f('fk_denorm_boilers_eia_particulate_control_out_of_compliance_strategy_3_particulate_compliance_strategies_eia')), + sa.ForeignKeyConstraint(['plant_id_eia', 'boiler_id', 'report_date'], ['boilers_eia860.plant_id_eia', 'boilers_eia860.boiler_id', 'boilers_eia860.report_date'], name=op.f('fk_denorm_boilers_eia_plant_id_eia_boilers_eia860')), + sa.ForeignKeyConstraint(['plant_id_pudl'], ['plants_pudl.plant_id_pudl'], name=op.f('fk_denorm_boilers_eia_plant_id_pudl_plants_pudl')), + sa.ForeignKeyConstraint(['regulation_mercury'], ['regulations_eia.code'], name=op.f('fk_denorm_boilers_eia_regulation_mercury_regulations_eia')), + sa.ForeignKeyConstraint(['regulation_nox'], ['regulations_eia.code'], name=op.f('fk_denorm_boilers_eia_regulation_nox_regulations_eia')), + sa.ForeignKeyConstraint(['regulation_particulate'], ['regulations_eia.code'], name=op.f('fk_denorm_boilers_eia_regulation_particulate_regulations_eia')), + sa.ForeignKeyConstraint(['regulation_so2'], ['regulations_eia.code'], name=op.f('fk_denorm_boilers_eia_regulation_so2_regulations_eia')), + sa.ForeignKeyConstraint(['so2_control_existing_caaa_compliance_strategy_1'], ['so2_compliance_strategies_eia.code'], name=op.f('fk_denorm_boilers_eia_so2_control_existing_caaa_compliance_strategy_1_so2_compliance_strategies_eia')), + sa.ForeignKeyConstraint(['so2_control_existing_caaa_compliance_strategy_2'], ['so2_compliance_strategies_eia.code'], name=op.f('fk_denorm_boilers_eia_so2_control_existing_caaa_compliance_strategy_2_so2_compliance_strategies_eia')), + sa.ForeignKeyConstraint(['so2_control_existing_caaa_compliance_strategy_3'], ['so2_compliance_strategies_eia.code'], name=op.f('fk_denorm_boilers_eia_so2_control_existing_caaa_compliance_strategy_3_so2_compliance_strategies_eia')), + sa.ForeignKeyConstraint(['so2_control_out_of_compliance_strategy_1'], ['so2_compliance_strategies_eia.code'], name=op.f('fk_denorm_boilers_eia_so2_control_out_of_compliance_strategy_1_so2_compliance_strategies_eia')), + sa.ForeignKeyConstraint(['so2_control_out_of_compliance_strategy_2'], ['so2_compliance_strategies_eia.code'], name=op.f('fk_denorm_boilers_eia_so2_control_out_of_compliance_strategy_2_so2_compliance_strategies_eia')), + sa.ForeignKeyConstraint(['so2_control_out_of_compliance_strategy_3'], ['so2_compliance_strategies_eia.code'], name=op.f('fk_denorm_boilers_eia_so2_control_out_of_compliance_strategy_3_so2_compliance_strategies_eia')), + sa.ForeignKeyConstraint(['so2_control_planned_caaa_compliance_strategy_1'], ['so2_compliance_strategies_eia.code'], name=op.f('fk_denorm_boilers_eia_so2_control_planned_caaa_compliance_strategy_1_so2_compliance_strategies_eia')), + sa.ForeignKeyConstraint(['so2_control_planned_caaa_compliance_strategy_2'], ['so2_compliance_strategies_eia.code'], name=op.f('fk_denorm_boilers_eia_so2_control_planned_caaa_compliance_strategy_2_so2_compliance_strategies_eia')), + sa.ForeignKeyConstraint(['so2_control_planned_caaa_compliance_strategy_3'], ['so2_compliance_strategies_eia.code'], name=op.f('fk_denorm_boilers_eia_so2_control_planned_caaa_compliance_strategy_3_so2_compliance_strategies_eia')), + sa.ForeignKeyConstraint(['unit_nox'], ['nox_units_eia.code'], name=op.f('fk_denorm_boilers_eia_unit_nox_nox_units_eia')), + sa.ForeignKeyConstraint(['unit_particulate'], ['particulate_units_eia.code'], name=op.f('fk_denorm_boilers_eia_unit_particulate_particulate_units_eia')), + sa.ForeignKeyConstraint(['unit_so2'], ['so2_units_eia.code'], name=op.f('fk_denorm_boilers_eia_unit_so2_so2_units_eia')), + sa.ForeignKeyConstraint(['utility_id_eia', 'report_date'], ['utilities_eia860.utility_id_eia', 'utilities_eia860.report_date'], name=op.f('fk_denorm_boilers_eia_utility_id_eia_utilities_eia860')), + sa.ForeignKeyConstraint(['utility_id_pudl'], ['utilities_pudl.utility_id_pudl'], name=op.f('fk_denorm_boilers_eia_utility_id_pudl_utilities_pudl')), + sa.ForeignKeyConstraint(['wet_dry_bottom'], ['wet_dry_bottom_eia.code'], name=op.f('fk_denorm_boilers_eia_wet_dry_bottom_wet_dry_bottom_eia')), + sa.PrimaryKeyConstraint('plant_id_eia', 'boiler_id', 'report_date', name=op.f('pk_denorm_boilers_eia')) ) op.create_table('denorm_generation_yearly_eia923', sa.Column('report_date', sa.Date(), nullable=False, comment='Date reported.'), @@ -2959,11 +3185,11 @@ def upgrade() -> None: sa.Column('generator_id', sa.Text(), nullable=False, comment='Generator ID is usually numeric, but sometimes includes letters. Make sure you treat it as a string!'), sa.Column('unit_id_pudl', sa.Integer(), nullable=True, comment='Dynamically assigned PUDL unit id. WARNING: This ID is not guaranteed to be static long term as the input data and algorithm may evolve over time.'), sa.Column('net_generation_mwh', sa.Float(), nullable=True, comment='Net electricity generation for the specified period in megawatt-hours (MWh).'), - sa.ForeignKeyConstraint(['plant_id_eia', 'generator_id', 'report_date'], ['generators_eia860.plant_id_eia', 'generators_eia860.generator_id', 'generators_eia860.report_date'], ), - sa.ForeignKeyConstraint(['plant_id_pudl'], ['plants_pudl.plant_id_pudl'], ), - sa.ForeignKeyConstraint(['utility_id_eia', 'report_date'], ['utilities_eia860.utility_id_eia', 'utilities_eia860.report_date'], ), - sa.ForeignKeyConstraint(['utility_id_pudl'], ['utilities_pudl.utility_id_pudl'], ), - sa.PrimaryKeyConstraint('plant_id_eia', 'generator_id', 'report_date') + sa.ForeignKeyConstraint(['plant_id_eia', 'generator_id', 'report_date'], ['generators_eia860.plant_id_eia', 'generators_eia860.generator_id', 'generators_eia860.report_date'], name=op.f('fk_denorm_generation_yearly_eia923_plant_id_eia_generators_eia860')), + sa.ForeignKeyConstraint(['plant_id_pudl'], ['plants_pudl.plant_id_pudl'], name=op.f('fk_denorm_generation_yearly_eia923_plant_id_pudl_plants_pudl')), + sa.ForeignKeyConstraint(['utility_id_eia', 'report_date'], ['utilities_eia860.utility_id_eia', 'utilities_eia860.report_date'], name=op.f('fk_denorm_generation_yearly_eia923_utility_id_eia_utilities_eia860')), + sa.ForeignKeyConstraint(['utility_id_pudl'], ['utilities_pudl.utility_id_pudl'], name=op.f('fk_denorm_generation_yearly_eia923_utility_id_pudl_utilities_pudl')), + sa.PrimaryKeyConstraint('plant_id_eia', 'generator_id', 'report_date', name=op.f('pk_denorm_generation_yearly_eia923')) ) op.create_table('denorm_generators_eia', sa.Column('report_date', sa.Date(), nullable=False, comment='Date reported.'), @@ -3068,32 +3294,32 @@ def upgrade() -> None: sa.Column('winter_capacity_mw', sa.Float(), nullable=True, comment='The net winter capacity.'), sa.Column('winter_estimated_capability_mw', sa.Float(), nullable=True, comment='EIA estimated winter capacity (in MWh).'), sa.Column('zip_code', sa.Text(), nullable=True, comment='Five digit US Zip Code.'), - sa.ForeignKeyConstraint(['data_maturity'], ['data_maturities.code'], ), - sa.ForeignKeyConstraint(['energy_source_1_transport_1'], ['fuel_transportation_modes_eia.code'], ), - sa.ForeignKeyConstraint(['energy_source_1_transport_2'], ['fuel_transportation_modes_eia.code'], ), - sa.ForeignKeyConstraint(['energy_source_1_transport_3'], ['fuel_transportation_modes_eia.code'], ), - sa.ForeignKeyConstraint(['energy_source_2_transport_1'], ['fuel_transportation_modes_eia.code'], ), - sa.ForeignKeyConstraint(['energy_source_2_transport_2'], ['fuel_transportation_modes_eia.code'], ), - sa.ForeignKeyConstraint(['energy_source_2_transport_3'], ['fuel_transportation_modes_eia.code'], ), - sa.ForeignKeyConstraint(['energy_source_code_1'], ['energy_sources_eia.code'], ), - sa.ForeignKeyConstraint(['energy_source_code_2'], ['energy_sources_eia.code'], ), - sa.ForeignKeyConstraint(['energy_source_code_3'], ['energy_sources_eia.code'], ), - sa.ForeignKeyConstraint(['energy_source_code_4'], ['energy_sources_eia.code'], ), - sa.ForeignKeyConstraint(['energy_source_code_5'], ['energy_sources_eia.code'], ), - sa.ForeignKeyConstraint(['energy_source_code_6'], ['energy_sources_eia.code'], ), - sa.ForeignKeyConstraint(['operational_status_code'], ['operational_status_eia.code'], ), - sa.ForeignKeyConstraint(['planned_energy_source_code_1'], ['energy_sources_eia.code'], ), - sa.ForeignKeyConstraint(['planned_new_prime_mover_code'], ['prime_movers_eia.code'], ), - sa.ForeignKeyConstraint(['plant_id_eia', 'generator_id', 'report_date'], ['generators_eia860.plant_id_eia', 'generators_eia860.generator_id', 'generators_eia860.report_date'], ), - sa.ForeignKeyConstraint(['plant_id_pudl'], ['plants_pudl.plant_id_pudl'], ), - sa.ForeignKeyConstraint(['prime_mover_code'], ['prime_movers_eia.code'], ), - sa.ForeignKeyConstraint(['startup_source_code_1'], ['energy_sources_eia.code'], ), - sa.ForeignKeyConstraint(['startup_source_code_2'], ['energy_sources_eia.code'], ), - sa.ForeignKeyConstraint(['startup_source_code_3'], ['energy_sources_eia.code'], ), - sa.ForeignKeyConstraint(['startup_source_code_4'], ['energy_sources_eia.code'], ), - sa.ForeignKeyConstraint(['utility_id_eia', 'report_date'], ['utilities_eia860.utility_id_eia', 'utilities_eia860.report_date'], ), - sa.ForeignKeyConstraint(['utility_id_pudl'], ['utilities_pudl.utility_id_pudl'], ), - sa.PrimaryKeyConstraint('plant_id_eia', 'generator_id', 'report_date') + sa.ForeignKeyConstraint(['data_maturity'], ['data_maturities.code'], name=op.f('fk_denorm_generators_eia_data_maturity_data_maturities')), + sa.ForeignKeyConstraint(['energy_source_1_transport_1'], ['fuel_transportation_modes_eia.code'], name=op.f('fk_denorm_generators_eia_energy_source_1_transport_1_fuel_transportation_modes_eia')), + sa.ForeignKeyConstraint(['energy_source_1_transport_2'], ['fuel_transportation_modes_eia.code'], name=op.f('fk_denorm_generators_eia_energy_source_1_transport_2_fuel_transportation_modes_eia')), + sa.ForeignKeyConstraint(['energy_source_1_transport_3'], ['fuel_transportation_modes_eia.code'], name=op.f('fk_denorm_generators_eia_energy_source_1_transport_3_fuel_transportation_modes_eia')), + sa.ForeignKeyConstraint(['energy_source_2_transport_1'], ['fuel_transportation_modes_eia.code'], name=op.f('fk_denorm_generators_eia_energy_source_2_transport_1_fuel_transportation_modes_eia')), + sa.ForeignKeyConstraint(['energy_source_2_transport_2'], ['fuel_transportation_modes_eia.code'], name=op.f('fk_denorm_generators_eia_energy_source_2_transport_2_fuel_transportation_modes_eia')), + sa.ForeignKeyConstraint(['energy_source_2_transport_3'], ['fuel_transportation_modes_eia.code'], name=op.f('fk_denorm_generators_eia_energy_source_2_transport_3_fuel_transportation_modes_eia')), + sa.ForeignKeyConstraint(['energy_source_code_1'], ['energy_sources_eia.code'], name=op.f('fk_denorm_generators_eia_energy_source_code_1_energy_sources_eia')), + sa.ForeignKeyConstraint(['energy_source_code_2'], ['energy_sources_eia.code'], name=op.f('fk_denorm_generators_eia_energy_source_code_2_energy_sources_eia')), + sa.ForeignKeyConstraint(['energy_source_code_3'], ['energy_sources_eia.code'], name=op.f('fk_denorm_generators_eia_energy_source_code_3_energy_sources_eia')), + sa.ForeignKeyConstraint(['energy_source_code_4'], ['energy_sources_eia.code'], name=op.f('fk_denorm_generators_eia_energy_source_code_4_energy_sources_eia')), + sa.ForeignKeyConstraint(['energy_source_code_5'], ['energy_sources_eia.code'], name=op.f('fk_denorm_generators_eia_energy_source_code_5_energy_sources_eia')), + sa.ForeignKeyConstraint(['energy_source_code_6'], ['energy_sources_eia.code'], name=op.f('fk_denorm_generators_eia_energy_source_code_6_energy_sources_eia')), + sa.ForeignKeyConstraint(['operational_status_code'], ['operational_status_eia.code'], name=op.f('fk_denorm_generators_eia_operational_status_code_operational_status_eia')), + sa.ForeignKeyConstraint(['planned_energy_source_code_1'], ['energy_sources_eia.code'], name=op.f('fk_denorm_generators_eia_planned_energy_source_code_1_energy_sources_eia')), + sa.ForeignKeyConstraint(['planned_new_prime_mover_code'], ['prime_movers_eia.code'], name=op.f('fk_denorm_generators_eia_planned_new_prime_mover_code_prime_movers_eia')), + sa.ForeignKeyConstraint(['plant_id_eia', 'generator_id', 'report_date'], ['generators_eia860.plant_id_eia', 'generators_eia860.generator_id', 'generators_eia860.report_date'], name=op.f('fk_denorm_generators_eia_plant_id_eia_generators_eia860')), + sa.ForeignKeyConstraint(['plant_id_pudl'], ['plants_pudl.plant_id_pudl'], name=op.f('fk_denorm_generators_eia_plant_id_pudl_plants_pudl')), + sa.ForeignKeyConstraint(['prime_mover_code'], ['prime_movers_eia.code'], name=op.f('fk_denorm_generators_eia_prime_mover_code_prime_movers_eia')), + sa.ForeignKeyConstraint(['startup_source_code_1'], ['energy_sources_eia.code'], name=op.f('fk_denorm_generators_eia_startup_source_code_1_energy_sources_eia')), + sa.ForeignKeyConstraint(['startup_source_code_2'], ['energy_sources_eia.code'], name=op.f('fk_denorm_generators_eia_startup_source_code_2_energy_sources_eia')), + sa.ForeignKeyConstraint(['startup_source_code_3'], ['energy_sources_eia.code'], name=op.f('fk_denorm_generators_eia_startup_source_code_3_energy_sources_eia')), + sa.ForeignKeyConstraint(['startup_source_code_4'], ['energy_sources_eia.code'], name=op.f('fk_denorm_generators_eia_startup_source_code_4_energy_sources_eia')), + sa.ForeignKeyConstraint(['utility_id_eia', 'report_date'], ['utilities_eia860.utility_id_eia', 'utilities_eia860.report_date'], name=op.f('fk_denorm_generators_eia_utility_id_eia_utilities_eia860')), + sa.ForeignKeyConstraint(['utility_id_pudl'], ['utilities_pudl.utility_id_pudl'], name=op.f('fk_denorm_generators_eia_utility_id_pudl_utilities_pudl')), + sa.PrimaryKeyConstraint('plant_id_eia', 'generator_id', 'report_date', name=op.f('pk_denorm_generators_eia')) ) op.create_table('denorm_ownership_eia860', sa.Column('report_date', sa.Date(), nullable=False, comment='Date reported.'), @@ -3106,19 +3332,158 @@ def upgrade() -> None: sa.Column('generator_id', sa.Text(), nullable=False, comment='Generator ID is usually numeric, but sometimes includes letters. Make sure you treat it as a string!'), sa.Column('owner_utility_id_eia', sa.Integer(), nullable=False, comment="EIA-assigned owner's identification number."), sa.Column('owner_name', sa.Text(), nullable=True, comment='Name of owner.'), - sa.Column('owner_state', sa.Enum('VA', 'VI', 'SK', 'PR', 'MT', 'DC', 'VT', 'KY', 'NY', 'KS', 'ME', 'ON', 'WY', 'NU', 'MN', 'WV', 'AL', 'LA', 'MP', 'QC', 'SC', 'SD', 'NE', 'TN', 'NM', 'YT', 'OH', 'AB', 'MO', 'NC', 'NS', 'CA', 'MD', 'NB', 'FL', 'BC', 'ID', 'IN', 'NL', 'GA', 'OK', 'UT', 'IA', 'MI', 'NH', 'NV', 'IL', 'NJ', 'AZ', 'MS', 'PE', 'MA', 'AK', 'CO', 'RI', 'GU', 'ND', 'PA', 'TX', 'HI', 'WI', 'NT', 'OR', 'DE', 'WA', 'MB', 'AS', 'AR', 'CT'), nullable=True, comment='Two letter ISO-3166 political subdivision code.'), + sa.Column('owner_state', sa.Enum('GU', 'WI', 'AZ', 'RI', 'MO', 'SC', 'KS', 'PE', 'UT', 'PA', 'ME', 'NH', 'SD', 'LA', 'NJ', 'DC', 'FL', 'IL', 'MS', 'ND', 'BC', 'NL', 'NU', 'OH', 'CT', 'NC', 'VI', 'SK', 'NY', 'NV', 'NE', 'IA', 'AK', 'HI', 'ON', 'TN', 'VA', 'AR', 'DE', 'AS', 'NS', 'GA', 'TX', 'MA', 'OR', 'QC', 'MT', 'WV', 'IN', 'MP', 'MB', 'MI', 'KY', 'ID', 'WY', 'NM', 'NB', 'AL', 'MD', 'AB', 'WA', 'YT', 'CA', 'CO', 'MN', 'OK', 'PR', 'NT', 'VT'), nullable=True, comment='Two letter ISO-3166 political subdivision code.'), sa.Column('owner_city', sa.Text(), nullable=True, comment='City of owner.'), sa.Column('owner_country', sa.Enum('USA', 'CAN'), nullable=True, comment='Three letter ISO-3166 country code.'), sa.Column('owner_street_address', sa.Text(), nullable=True, comment='Steet address of owner.'), sa.Column('owner_zip_code', sa.Text(), nullable=True, comment='Zip code of owner.'), - sa.Column('fraction_owned', sa.Float(), nullable=True, comment='Proportion of generator ownership.'), + sa.Column('fraction_owned', sa.Float(), nullable=True, comment='Proportion of generator ownership attributable to this utility.'), sa.Column('data_maturity', sa.Text(), nullable=True, comment='Level of maturity of the data record. Some data sources report less-than-final data. PUDL sometimes includes this data, but use at your own risk.'), - sa.ForeignKeyConstraint(['data_maturity'], ['data_maturities.code'], ), - sa.ForeignKeyConstraint(['plant_id_eia', 'generator_id', 'report_date'], ['generators_eia860.plant_id_eia', 'generators_eia860.generator_id', 'generators_eia860.report_date'], ), - sa.ForeignKeyConstraint(['plant_id_pudl'], ['plants_pudl.plant_id_pudl'], ), - sa.ForeignKeyConstraint(['utility_id_eia', 'report_date'], ['utilities_eia860.utility_id_eia', 'utilities_eia860.report_date'], ), - sa.ForeignKeyConstraint(['utility_id_pudl'], ['utilities_pudl.utility_id_pudl'], ), - sa.PrimaryKeyConstraint('report_date', 'plant_id_eia', 'generator_id', 'owner_utility_id_eia') + sa.ForeignKeyConstraint(['data_maturity'], ['data_maturities.code'], name=op.f('fk_denorm_ownership_eia860_data_maturity_data_maturities')), + sa.ForeignKeyConstraint(['plant_id_eia', 'generator_id', 'report_date'], ['generators_eia860.plant_id_eia', 'generators_eia860.generator_id', 'generators_eia860.report_date'], name=op.f('fk_denorm_ownership_eia860_plant_id_eia_generators_eia860')), + sa.ForeignKeyConstraint(['plant_id_pudl'], ['plants_pudl.plant_id_pudl'], name=op.f('fk_denorm_ownership_eia860_plant_id_pudl_plants_pudl')), + sa.ForeignKeyConstraint(['utility_id_eia', 'report_date'], ['utilities_eia860.utility_id_eia', 'utilities_eia860.report_date'], name=op.f('fk_denorm_ownership_eia860_utility_id_eia_utilities_eia860')), + sa.ForeignKeyConstraint(['utility_id_pudl'], ['utilities_pudl.utility_id_pudl'], name=op.f('fk_denorm_ownership_eia860_utility_id_pudl_utilities_pudl')), + sa.PrimaryKeyConstraint('report_date', 'plant_id_eia', 'generator_id', 'owner_utility_id_eia', name=op.f('pk_denorm_ownership_eia860')) + ) + op.create_table('fuel_cost_by_generator_yearly', + sa.Column('report_date', sa.Date(), nullable=False, comment='Date reported.'), + sa.Column('plant_id_eia', sa.Integer(), nullable=False, comment='The unique six-digit facility identification number, also called an ORISPL, assigned by the Energy Information Administration.'), + sa.Column('generator_id', sa.Text(), nullable=False, comment='Generator ID is usually numeric, but sometimes includes letters. Make sure you treat it as a string!'), + sa.Column('unit_id_pudl', sa.Integer(), nullable=True, comment='Dynamically assigned PUDL unit id. WARNING: This ID is not guaranteed to be static long term as the input data and algorithm may evolve over time.'), + sa.Column('plant_name_eia', sa.Text(), nullable=True, comment='Plant name.'), + sa.Column('plant_id_pudl', sa.Integer(), nullable=True, comment='A manually assigned PUDL plant ID. May not be constant over time.'), + sa.Column('utility_id_eia', sa.Integer(), nullable=True, comment='The EIA Utility Identification number.'), + sa.Column('utility_name_eia', sa.Text(), nullable=True, comment='The name of the utility.'), + sa.Column('utility_id_pudl', sa.Integer(), nullable=True, comment='A manually assigned PUDL utility ID. May not be stable over time.'), + sa.Column('fuel_type_count', sa.Integer(), nullable=True, comment='A count of how many different simple energy sources there are associated with a generator.'), + sa.Column('fuel_type_code_pudl', sa.Enum('coal', 'gas', 'hydro', 'nuclear', 'oil', 'other', 'solar', 'waste', 'wind'), nullable=True, comment='Simplified fuel type code used in PUDL'), + sa.Column('fuel_cost_from_eiaapi', sa.Boolean(), nullable=True, comment='Indicates whether the fuel cost was derived from the EIA API.'), + sa.Column('fuel_cost_per_mmbtu', sa.Float(), nullable=True, comment='Average fuel cost per mmBTU of heat content in nominal USD.'), + sa.Column('heat_rate_mmbtu_mwh', sa.Float(), nullable=True, comment='Fuel content per unit of electricity generated. Coming from MCOE calculation.'), + sa.Column('fuel_cost_per_mwh', sa.Float(), nullable=True, comment='Derived from MCOE, a unit level value. Average fuel cost per MWh of heat content in nominal USD.'), + sa.ForeignKeyConstraint(['plant_id_eia', 'generator_id', 'report_date'], ['generators_eia860.plant_id_eia', 'generators_eia860.generator_id', 'generators_eia860.report_date'], name=op.f('fk_fuel_cost_by_generator_yearly_plant_id_eia_generators_eia860')), + sa.ForeignKeyConstraint(['plant_id_pudl'], ['plants_pudl.plant_id_pudl'], name=op.f('fk_fuel_cost_by_generator_yearly_plant_id_pudl_plants_pudl')), + sa.ForeignKeyConstraint(['utility_id_eia', 'report_date'], ['utilities_eia860.utility_id_eia', 'utilities_eia860.report_date'], name=op.f('fk_fuel_cost_by_generator_yearly_utility_id_eia_utilities_eia860')), + sa.ForeignKeyConstraint(['utility_id_pudl'], ['utilities_pudl.utility_id_pudl'], name=op.f('fk_fuel_cost_by_generator_yearly_utility_id_pudl_utilities_pudl')), + sa.PrimaryKeyConstraint('report_date', 'plant_id_eia', 'generator_id', name=op.f('pk_fuel_cost_by_generator_yearly')) + ) + op.create_table('generation_fuel_by_generator_energy_source_owner_yearly_eia923', + sa.Column('report_date', sa.Date(), nullable=False, comment='Date reported.'), + sa.Column('plant_id_eia', sa.Integer(), nullable=False, comment='The unique six-digit facility identification number, also called an ORISPL, assigned by the Energy Information Administration.'), + sa.Column('generator_id', sa.Text(), nullable=False, comment='Generator ID is usually numeric, but sometimes includes letters. Make sure you treat it as a string!'), + sa.Column('prime_mover_code', sa.Text(), nullable=False, comment='Code for the type of prime mover (e.g. CT, CG)'), + sa.Column('energy_source_code', sa.Text(), nullable=False, comment='A 2-3 letter code indicating the energy source (e.g. fuel type) associated with the record.'), + sa.Column('utility_id_eia', sa.Integer(), nullable=False, comment='The EIA Utility Identification number.'), + sa.Column('ownership_record_type', sa.Enum('owned', 'total'), nullable=False, comment='Whether each generator record is for one owner or represents a total of all ownerships.'), + sa.Column('fraction_owned', sa.Float(), nullable=True, comment='Proportion of generator ownership attributable to this utility.'), + sa.Column('capacity_mw', sa.Float(), nullable=True, comment='Total installed (nameplate) capacity, in megawatts.'), + sa.Column('energy_source_code_num', sa.Enum('energy_source_code_1', 'energy_source_code_2', 'energy_source_code_3', 'energy_source_code_4', 'energy_source_code_5', 'energy_source_code_6', 'energy_source_code_7', 'energy_source_code_8'), nullable=True, comment='Name of the energy_source_code_N column that this energy source code was reported in for the generator referenced in the same record.'), + sa.Column('net_generation_mwh', sa.Float(), nullable=True, comment='Net electricity generation for the specified period in megawatt-hours (MWh).'), + sa.Column('fuel_consumed_mmbtu', sa.Float(), nullable=True, comment='Total consumption of fuel in physical unit, year to date. Note: this is the total quantity consumed for both electricity and, in the case of combined heat and power plants, process steam production.'), + sa.Column('fuel_consumed_for_electricity_mmbtu', sa.Float(), nullable=True, comment='Total consumption of fuel to produce electricity, in physical unit, year to date.'), + sa.ForeignKeyConstraint(['energy_source_code'], ['energy_sources_eia.code'], name=op.f('fk_generation_fuel_by_generator_energy_source_owner_yearly_eia923_energy_source_code_energy_sources_eia')), + sa.ForeignKeyConstraint(['plant_id_eia', 'generator_id', 'report_date'], ['generators_eia860.plant_id_eia', 'generators_eia860.generator_id', 'generators_eia860.report_date'], name=op.f('fk_generation_fuel_by_generator_energy_source_owner_yearly_eia923_plant_id_eia_generators_eia860')), + sa.ForeignKeyConstraint(['prime_mover_code'], ['prime_movers_eia.code'], name=op.f('fk_generation_fuel_by_generator_energy_source_owner_yearly_eia923_prime_mover_code_prime_movers_eia')), + sa.PrimaryKeyConstraint('report_date', 'plant_id_eia', 'generator_id', 'prime_mover_code', 'energy_source_code', 'utility_id_eia', 'ownership_record_type', name=op.f('pk_generation_fuel_by_generator_energy_source_owner_yearly_eia923')) + ) + op.create_table('generation_fuel_by_generator_energy_source_yearly_eia923', + sa.Column('report_date', sa.Date(), nullable=False, comment='Date reported.'), + sa.Column('plant_id_eia', sa.Integer(), nullable=False, comment='The unique six-digit facility identification number, also called an ORISPL, assigned by the Energy Information Administration.'), + sa.Column('generator_id', sa.Text(), nullable=False, comment='Generator ID is usually numeric, but sometimes includes letters. Make sure you treat it as a string!'), + sa.Column('prime_mover_code', sa.Text(), nullable=False, comment='Code for the type of prime mover (e.g. CT, CG)'), + sa.Column('energy_source_code', sa.Text(), nullable=False, comment='A 2-3 letter code indicating the energy source (e.g. fuel type) associated with the record.'), + sa.Column('energy_source_code_num', sa.Enum('energy_source_code_1', 'energy_source_code_2', 'energy_source_code_3', 'energy_source_code_4', 'energy_source_code_5', 'energy_source_code_6', 'energy_source_code_7', 'energy_source_code_8'), nullable=True, comment='Name of the energy_source_code_N column that this energy source code was reported in for the generator referenced in the same record.'), + sa.Column('net_generation_mwh', sa.Float(), nullable=True, comment='Net electricity generation for the specified period in megawatt-hours (MWh).'), + sa.Column('fuel_consumed_mmbtu', sa.Float(), nullable=True, comment='Total consumption of fuel in physical unit, year to date. Note: this is the total quantity consumed for both electricity and, in the case of combined heat and power plants, process steam production.'), + sa.Column('fuel_consumed_for_electricity_mmbtu', sa.Float(), nullable=True, comment='Total consumption of fuel to produce electricity, in physical unit, year to date.'), + sa.ForeignKeyConstraint(['energy_source_code'], ['energy_sources_eia.code'], name=op.f('fk_generation_fuel_by_generator_energy_source_yearly_eia923_energy_source_code_energy_sources_eia')), + sa.ForeignKeyConstraint(['plant_id_eia', 'generator_id', 'report_date'], ['generators_eia860.plant_id_eia', 'generators_eia860.generator_id', 'generators_eia860.report_date'], name=op.f('fk_generation_fuel_by_generator_energy_source_yearly_eia923_plant_id_eia_generators_eia860')), + sa.ForeignKeyConstraint(['prime_mover_code'], ['prime_movers_eia.code'], name=op.f('fk_generation_fuel_by_generator_energy_source_yearly_eia923_prime_mover_code_prime_movers_eia')), + sa.PrimaryKeyConstraint('report_date', 'plant_id_eia', 'generator_id', 'prime_mover_code', 'energy_source_code', name=op.f('pk_generation_fuel_by_generator_energy_source_yearly_eia923')) + ) + op.create_table('generation_fuel_by_generator_yearly_eia923', + sa.Column('report_date', sa.Date(), nullable=False, comment='Date reported.'), + sa.Column('plant_id_eia', sa.Integer(), nullable=False, comment='The unique six-digit facility identification number, also called an ORISPL, assigned by the Energy Information Administration.'), + sa.Column('plant_id_pudl', sa.Integer(), nullable=True, comment='A manually assigned PUDL plant ID. May not be constant over time.'), + sa.Column('plant_name_eia', sa.Text(), nullable=True, comment='Plant name.'), + sa.Column('utility_id_eia', sa.Integer(), nullable=True, comment='The EIA Utility Identification number.'), + sa.Column('utility_id_pudl', sa.Integer(), nullable=True, comment='A manually assigned PUDL utility ID. May not be stable over time.'), + sa.Column('utility_name_eia', sa.Text(), nullable=True, comment='The name of the utility.'), + sa.Column('generator_id', sa.Text(), nullable=False, comment='Generator ID is usually numeric, but sometimes includes letters. Make sure you treat it as a string!'), + sa.Column('unit_id_pudl', sa.Integer(), nullable=True, comment='Dynamically assigned PUDL unit id. WARNING: This ID is not guaranteed to be static long term as the input data and algorithm may evolve over time.'), + sa.Column('fuel_consumed_for_electricity_mmbtu', sa.Float(), nullable=True, comment='Total consumption of fuel to produce electricity, in physical unit, year to date.'), + sa.Column('fuel_consumed_mmbtu', sa.Float(), nullable=True, comment='Total consumption of fuel in physical unit, year to date. Note: this is the total quantity consumed for both electricity and, in the case of combined heat and power plants, process steam production.'), + sa.Column('net_generation_mwh', sa.Float(), nullable=True, comment='Net electricity generation for the specified period in megawatt-hours (MWh).'), + sa.ForeignKeyConstraint(['plant_id_eia', 'generator_id', 'report_date'], ['generators_eia860.plant_id_eia', 'generators_eia860.generator_id', 'generators_eia860.report_date'], name=op.f('fk_generation_fuel_by_generator_yearly_eia923_plant_id_eia_generators_eia860')), + sa.ForeignKeyConstraint(['plant_id_pudl'], ['plants_pudl.plant_id_pudl'], name=op.f('fk_generation_fuel_by_generator_yearly_eia923_plant_id_pudl_plants_pudl')), + sa.ForeignKeyConstraint(['utility_id_eia', 'report_date'], ['utilities_eia860.utility_id_eia', 'utilities_eia860.report_date'], name=op.f('fk_generation_fuel_by_generator_yearly_eia923_utility_id_eia_utilities_eia860')), + sa.ForeignKeyConstraint(['utility_id_pudl'], ['utilities_pudl.utility_id_pudl'], name=op.f('fk_generation_fuel_by_generator_yearly_eia923_utility_id_pudl_utilities_pudl')), + sa.PrimaryKeyConstraint('report_date', 'plant_id_eia', 'generator_id', name=op.f('pk_generation_fuel_by_generator_yearly_eia923')) + ) + op.create_table('heat_rate_by_generator_yearly', + sa.Column('report_date', sa.Date(), nullable=False, comment='Date reported.'), + sa.Column('plant_id_eia', sa.Integer(), nullable=False, comment='The unique six-digit facility identification number, also called an ORISPL, assigned by the Energy Information Administration.'), + sa.Column('unit_id_pudl', sa.Integer(), nullable=True, comment='Dynamically assigned PUDL unit id. WARNING: This ID is not guaranteed to be static long term as the input data and algorithm may evolve over time.'), + sa.Column('generator_id', sa.Text(), nullable=False, comment='Generator ID is usually numeric, but sometimes includes letters. Make sure you treat it as a string!'), + sa.Column('heat_rate_mmbtu_mwh', sa.Float(), nullable=True, comment='Fuel content per unit of electricity generated. Coming from MCOE calculation.'), + sa.Column('fuel_type_code_pudl', sa.Enum('coal', 'gas', 'hydro', 'nuclear', 'oil', 'other', 'solar', 'waste', 'wind'), nullable=True, comment='Simplified fuel type code used in PUDL'), + sa.Column('fuel_type_count', sa.Integer(), nullable=True, comment='A count of how many different simple energy sources there are associated with a generator.'), + sa.Column('prime_mover_code', sa.Text(), nullable=True, comment='Code for the type of prime mover (e.g. CT, CG)'), + sa.ForeignKeyConstraint(['plant_id_eia', 'generator_id', 'report_date'], ['generators_eia860.plant_id_eia', 'generators_eia860.generator_id', 'generators_eia860.report_date'], name=op.f('fk_heat_rate_by_generator_yearly_plant_id_eia_generators_eia860')), + sa.ForeignKeyConstraint(['prime_mover_code'], ['prime_movers_eia.code'], name=op.f('fk_heat_rate_by_generator_yearly_prime_mover_code_prime_movers_eia')), + sa.PrimaryKeyConstraint('report_date', 'plant_id_eia', 'generator_id', name=op.f('pk_heat_rate_by_generator_yearly')) + ) + op.create_table('mcoe_generators_yearly', + sa.Column('plant_id_eia', sa.Integer(), nullable=False, comment='The unique six-digit facility identification number, also called an ORISPL, assigned by the Energy Information Administration.'), + sa.Column('generator_id', sa.Text(), nullable=False, comment='Generator ID is usually numeric, but sometimes includes letters. Make sure you treat it as a string!'), + sa.Column('report_date', sa.Date(), nullable=False, comment='Date reported.'), + sa.Column('unit_id_pudl', sa.Integer(), nullable=True, comment='Dynamically assigned PUDL unit id. WARNING: This ID is not guaranteed to be static long term as the input data and algorithm may evolve over time.'), + sa.Column('plant_id_pudl', sa.Integer(), nullable=True, comment='A manually assigned PUDL plant ID. May not be constant over time.'), + sa.Column('plant_name_eia', sa.Text(), nullable=True, comment='Plant name.'), + sa.Column('utility_id_eia', sa.Integer(), nullable=True, comment='The EIA Utility Identification number.'), + sa.Column('utility_id_pudl', sa.Integer(), nullable=True, comment='A manually assigned PUDL utility ID. May not be stable over time.'), + sa.Column('utility_name_eia', sa.Text(), nullable=True, comment='The name of the utility.'), + sa.Column('technology_description', sa.Text(), nullable=True, comment='High level description of the technology used by the generator to produce electricity.'), + sa.Column('energy_source_code_1', sa.Text(), nullable=True, comment='The code representing the most predominant type of energy that fuels the generator.'), + sa.Column('prime_mover_code', sa.Text(), nullable=True, comment='Code for the type of prime mover (e.g. CT, CG)'), + sa.Column('generator_operating_date', sa.Date(), nullable=True, comment='Date the generator began commercial operation.'), + sa.Column('generator_retirement_date', sa.Date(), nullable=True, comment='Date of the scheduled or effected retirement of the generator.'), + sa.Column('operational_status', sa.Text(), nullable=True, comment='The operating status of the asset. For generators this is based on which tab the generator was listed in in EIA 860.'), + sa.Column('capacity_mw', sa.Float(), nullable=True, comment='Total installed (nameplate) capacity, in megawatts.'), + sa.Column('fuel_type_code_pudl', sa.Enum('coal', 'gas', 'hydro', 'nuclear', 'oil', 'other', 'solar', 'waste', 'wind'), nullable=True, comment='Simplified fuel type code used in PUDL'), + sa.Column('planned_generator_retirement_date', sa.Date(), nullable=True, comment='Planned effective date of the scheduled retirement of the generator.'), + sa.Column('capacity_factor', sa.Float(), nullable=True, comment='Fraction of potential generation that was actually reported for a plant part.'), + sa.Column('fuel_cost_from_eiaapi', sa.Boolean(), nullable=True, comment='Indicates whether the fuel cost was derived from the EIA API.'), + sa.Column('fuel_cost_per_mmbtu', sa.Float(), nullable=True, comment='Average fuel cost per mmBTU of heat content in nominal USD.'), + sa.Column('fuel_cost_per_mwh', sa.Float(), nullable=True, comment='Derived from MCOE, a unit level value. Average fuel cost per MWh of heat content in nominal USD.'), + sa.Column('heat_rate_mmbtu_mwh', sa.Float(), nullable=True, comment='Fuel content per unit of electricity generated. Coming from MCOE calculation.'), + sa.Column('net_generation_mwh', sa.Float(), nullable=True, comment='Net electricity generation for the specified period in megawatt-hours (MWh).'), + sa.Column('total_fuel_cost', sa.Float(), nullable=True, comment='Total annual reported fuel costs for the plant part. Includes costs from all fuels.'), + sa.Column('total_mmbtu', sa.Float(), nullable=True, comment='Total annual heat content of fuel consumed by a plant part record in the plant parts list.'), + sa.ForeignKeyConstraint(['energy_source_code_1'], ['energy_sources_eia.code'], name=op.f('fk_mcoe_generators_yearly_energy_source_code_1_energy_sources_eia')), + sa.ForeignKeyConstraint(['plant_id_eia', 'generator_id', 'report_date'], ['generators_eia860.plant_id_eia', 'generators_eia860.generator_id', 'generators_eia860.report_date'], name=op.f('fk_mcoe_generators_yearly_plant_id_eia_generators_eia860')), + sa.ForeignKeyConstraint(['plant_id_pudl'], ['plants_pudl.plant_id_pudl'], name=op.f('fk_mcoe_generators_yearly_plant_id_pudl_plants_pudl')), + sa.ForeignKeyConstraint(['prime_mover_code'], ['prime_movers_eia.code'], name=op.f('fk_mcoe_generators_yearly_prime_mover_code_prime_movers_eia')), + sa.ForeignKeyConstraint(['utility_id_eia', 'report_date'], ['utilities_eia860.utility_id_eia', 'utilities_eia860.report_date'], name=op.f('fk_mcoe_generators_yearly_utility_id_eia_utilities_eia860')), + sa.ForeignKeyConstraint(['utility_id_pudl'], ['utilities_pudl.utility_id_pudl'], name=op.f('fk_mcoe_generators_yearly_utility_id_pudl_utilities_pudl')), + sa.PrimaryKeyConstraint('report_date', 'plant_id_eia', 'generator_id', name=op.f('pk_mcoe_generators_yearly')) + ) + op.create_table('mcoe_yearly', + sa.Column('plant_id_eia', sa.Integer(), nullable=False, comment='The unique six-digit facility identification number, also called an ORISPL, assigned by the Energy Information Administration.'), + sa.Column('generator_id', sa.Text(), nullable=False, comment='Generator ID is usually numeric, but sometimes includes letters. Make sure you treat it as a string!'), + sa.Column('unit_id_pudl', sa.Integer(), nullable=True, comment='Dynamically assigned PUDL unit id. WARNING: This ID is not guaranteed to be static long term as the input data and algorithm may evolve over time.'), + sa.Column('report_date', sa.Date(), nullable=False, comment='Date reported.'), + sa.Column('capacity_factor', sa.Float(), nullable=True, comment='Fraction of potential generation that was actually reported for a plant part.'), + sa.Column('fuel_cost_from_eiaapi', sa.Boolean(), nullable=True, comment='Indicates whether the fuel cost was derived from the EIA API.'), + sa.Column('fuel_cost_per_mmbtu', sa.Float(), nullable=True, comment='Average fuel cost per mmBTU of heat content in nominal USD.'), + sa.Column('fuel_cost_per_mwh', sa.Float(), nullable=True, comment='Derived from MCOE, a unit level value. Average fuel cost per MWh of heat content in nominal USD.'), + sa.Column('heat_rate_mmbtu_mwh', sa.Float(), nullable=True, comment='Fuel content per unit of electricity generated. Coming from MCOE calculation.'), + sa.Column('net_generation_mwh', sa.Float(), nullable=True, comment='Net electricity generation for the specified period in megawatt-hours (MWh).'), + sa.Column('total_fuel_cost', sa.Float(), nullable=True, comment='Total annual reported fuel costs for the plant part. Includes costs from all fuels.'), + sa.Column('total_mmbtu', sa.Float(), nullable=True, comment='Total annual heat content of fuel consumed by a plant part record in the plant parts list.'), + sa.ForeignKeyConstraint(['plant_id_eia', 'generator_id', 'report_date'], ['generators_eia860.plant_id_eia', 'generators_eia860.generator_id', 'generators_eia860.report_date'], name=op.f('fk_mcoe_yearly_plant_id_eia_generators_eia860')), + sa.PrimaryKeyConstraint('report_date', 'plant_id_eia', 'generator_id', name=op.f('pk_mcoe_yearly')) ) op.create_table('ownership_eia860', sa.Column('report_date', sa.Date(), nullable=False, comment='Date reported.'), @@ -3127,17 +3492,17 @@ def upgrade() -> None: sa.Column('generator_id', sa.Text(), nullable=False, comment='Generator ID is usually numeric, but sometimes includes letters. Make sure you treat it as a string!'), sa.Column('owner_utility_id_eia', sa.Integer(), nullable=False, comment="EIA-assigned owner's identification number."), sa.Column('owner_name', sa.Text(), nullable=True, comment='Name of owner.'), - sa.Column('owner_state', sa.Enum('VA', 'VI', 'SK', 'PR', 'MT', 'DC', 'VT', 'KY', 'NY', 'KS', 'ME', 'ON', 'WY', 'NU', 'MN', 'WV', 'AL', 'LA', 'MP', 'QC', 'SC', 'SD', 'NE', 'TN', 'NM', 'YT', 'OH', 'AB', 'MO', 'NC', 'NS', 'CA', 'MD', 'NB', 'FL', 'BC', 'ID', 'IN', 'NL', 'GA', 'OK', 'UT', 'IA', 'MI', 'NH', 'NV', 'IL', 'NJ', 'AZ', 'MS', 'PE', 'MA', 'AK', 'CO', 'RI', 'GU', 'ND', 'PA', 'TX', 'HI', 'WI', 'NT', 'OR', 'DE', 'WA', 'MB', 'AS', 'AR', 'CT'), nullable=True, comment='Two letter ISO-3166 political subdivision code.'), + sa.Column('owner_state', sa.Enum('GU', 'WI', 'AZ', 'RI', 'MO', 'SC', 'KS', 'PE', 'UT', 'PA', 'ME', 'NH', 'SD', 'LA', 'NJ', 'DC', 'FL', 'IL', 'MS', 'ND', 'BC', 'NL', 'NU', 'OH', 'CT', 'NC', 'VI', 'SK', 'NY', 'NV', 'NE', 'IA', 'AK', 'HI', 'ON', 'TN', 'VA', 'AR', 'DE', 'AS', 'NS', 'GA', 'TX', 'MA', 'OR', 'QC', 'MT', 'WV', 'IN', 'MP', 'MB', 'MI', 'KY', 'ID', 'WY', 'NM', 'NB', 'AL', 'MD', 'AB', 'WA', 'YT', 'CA', 'CO', 'MN', 'OK', 'PR', 'NT', 'VT'), nullable=True, comment='Two letter ISO-3166 political subdivision code.'), sa.Column('owner_city', sa.Text(), nullable=True, comment='City of owner.'), sa.Column('owner_country', sa.Enum('USA', 'CAN'), nullable=True, comment='Three letter ISO-3166 country code.'), sa.Column('owner_street_address', sa.Text(), nullable=True, comment='Steet address of owner.'), sa.Column('owner_zip_code', sa.Text(), nullable=True, comment='Zip code of owner.'), - sa.Column('fraction_owned', sa.Float(), nullable=True, comment='Proportion of generator ownership.'), + sa.Column('fraction_owned', sa.Float(), nullable=True, comment='Proportion of generator ownership attributable to this utility.'), sa.Column('data_maturity', sa.Text(), nullable=True, comment='Level of maturity of the data record. Some data sources report less-than-final data. PUDL sometimes includes this data, but use at your own risk.'), - sa.ForeignKeyConstraint(['data_maturity'], ['data_maturities.code'], ), - sa.ForeignKeyConstraint(['plant_id_eia', 'generator_id', 'report_date'], ['generators_eia860.plant_id_eia', 'generators_eia860.generator_id', 'generators_eia860.report_date'], ), - sa.ForeignKeyConstraint(['utility_id_eia', 'report_date'], ['utilities_eia860.utility_id_eia', 'utilities_eia860.report_date'], ), - sa.PrimaryKeyConstraint('report_date', 'plant_id_eia', 'generator_id', 'owner_utility_id_eia') + sa.ForeignKeyConstraint(['data_maturity'], ['data_maturities.code'], name=op.f('fk_ownership_eia860_data_maturity_data_maturities')), + sa.ForeignKeyConstraint(['plant_id_eia', 'generator_id', 'report_date'], ['generators_eia860.plant_id_eia', 'generators_eia860.generator_id', 'generators_eia860.report_date'], name=op.f('fk_ownership_eia860_plant_id_eia_generators_eia860')), + sa.ForeignKeyConstraint(['utility_id_eia', 'report_date'], ['utilities_eia860.utility_id_eia', 'utilities_eia860.report_date'], name=op.f('fk_ownership_eia860_utility_id_eia_utilities_eia860')), + sa.PrimaryKeyConstraint('report_date', 'plant_id_eia', 'generator_id', 'owner_utility_id_eia', name=op.f('pk_ownership_eia860')) ) # ### end Alembic commands ### @@ -3145,11 +3510,19 @@ def upgrade() -> None: def downgrade() -> None: # ### commands auto generated by Alembic - please adjust! ### op.drop_table('ownership_eia860') + op.drop_table('mcoe_yearly') + op.drop_table('mcoe_generators_yearly') + op.drop_table('heat_rate_by_generator_yearly') + op.drop_table('generation_fuel_by_generator_yearly_eia923') + op.drop_table('generation_fuel_by_generator_energy_source_yearly_eia923') + op.drop_table('generation_fuel_by_generator_energy_source_owner_yearly_eia923') + op.drop_table('fuel_cost_by_generator_yearly') op.drop_table('denorm_ownership_eia860') op.drop_table('denorm_generators_eia') op.drop_table('denorm_generation_yearly_eia923') op.drop_table('denorm_boilers_eia') op.drop_table('denorm_boiler_fuel_yearly_eia923') + op.drop_table('capacity_factor_by_generator_yearly') op.drop_table('boiler_stack_flue_assn_eia860') op.drop_table('boiler_generator_assn_eia860') op.drop_table('boiler_emissions_control_equipment_assn_eia860') @@ -3158,6 +3531,7 @@ def downgrade() -> None: op.drop_table('plants_small_ferc1') op.drop_table('plants_pumped_storage_ferc1') op.drop_table('plants_hydro_ferc1') + op.drop_table('heat_rate_by_unit_yearly') op.drop_table('generators_eia860') op.drop_table('fuel_ferc1') op.drop_table('denorm_plants_utilities_ferc1') @@ -3183,9 +3557,15 @@ def downgrade() -> None: op.drop_table('plants_eia860') op.drop_table('plant_in_service_ferc1') op.drop_table('other_regulatory_liabilities_ferc1') + op.drop_table('mcoe_monthly') + op.drop_table('mcoe_generators_monthly') op.drop_table('income_statement_ferc1') + op.drop_table('heat_rate_by_generator_monthly') + op.drop_table('generation_fuel_by_generator_monthly_eia923') + op.drop_table('generation_fuel_by_generator_energy_source_monthly_eia923') op.drop_table('generation_eia923') op.drop_table('fuel_receipts_costs_eia923') + op.drop_table('fuel_cost_by_generator_monthly') op.drop_table('epacamd_eia') op.drop_table('electricity_sales_by_rate_schedule_ferc1') op.drop_table('electric_plant_depreciation_functional_ferc1') @@ -3219,6 +3599,7 @@ def downgrade() -> None: op.drop_table('denorm_balance_sheet_liabilities_ferc1') op.drop_table('denorm_balance_sheet_assets_ferc1') op.drop_table('cash_flow_ferc1') + op.drop_table('capacity_factor_by_generator_monthly') op.drop_table('boiler_fuel_eia923') op.drop_table('balance_sheet_liabilities_ferc1') op.drop_table('balance_sheet_assets_ferc1') @@ -3229,6 +3610,7 @@ def downgrade() -> None: op.drop_table('utilities_ferc1') op.drop_table('utilities_eia860') op.drop_table('utilities_eia') + op.drop_table('summarized_demand_ferc714') op.drop_table('service_territory_eia861') op.drop_table('sales_eia861') op.drop_table('reliability_eia861') @@ -3240,10 +3622,12 @@ def downgrade() -> None: op.drop_table('net_metering_misc_eia861') op.drop_table('net_metering_customer_fuel_class_eia861') op.drop_table('mergers_eia861') + op.drop_table('heat_rate_by_unit_monthly') op.drop_table('green_pricing_eia861') op.drop_table('generators_entity_eia') op.drop_table('generation_fuel_nuclear_eia923') op.drop_table('generation_fuel_eia923') + op.drop_table('fipsified_respondents_ferc714') op.drop_table('energy_efficiency_eia861') op.drop_table('emissions_control_equipment_eia860') op.drop_table('dynamic_pricing_eia861') @@ -3277,6 +3661,7 @@ def downgrade() -> None: op.drop_table('reporting_frequencies_eia') op.drop_table('regulations_eia') op.drop_table('prime_movers_eia') + op.drop_table('predicted_state_hourly_demand') op.drop_table('power_purchase_types_ferc1') op.drop_table('political_subdivisions') op.drop_table('plants_pudl') @@ -3301,6 +3686,8 @@ def downgrade() -> None: op.drop_table('datasources') op.drop_table('data_maturities') op.drop_table('contract_types_eia') + op.drop_table('compiled_geometry_utility_eia861') + op.drop_table('compiled_geometry_balancing_authority_eia861') op.drop_table('coalmine_types_eia') op.drop_table('boiler_types_eia') op.drop_table('boiler_status_eia') diff --git a/migrations/versions/1ec25c296a6d_add_service_terrtory_ferc714_state_.py b/migrations/versions/1ec25c296a6d_add_service_terrtory_ferc714_state_.py deleted file mode 100644 index f3b0a370cd..0000000000 --- a/migrations/versions/1ec25c296a6d_add_service_terrtory_ferc714_state_.py +++ /dev/null @@ -1,99 +0,0 @@ -"""Add service terrtory, FERC714, state demand assets - -Revision ID: 1ec25c296a6d -Revises: 88d9201ae4c4 -Create Date: 2023-06-16 09:33:08.254754 - -""" -from alembic import op -import sqlalchemy as sa -from sqlalchemy.dialects import sqlite - -# revision identifiers, used by Alembic. -revision = '1ec25c296a6d' -down_revision = 'e608f95a3b78' -branch_labels = None -depends_on = None - - -def upgrade() -> None: - # ### commands auto generated by Alembic - please adjust! ### - op.create_table('compiled_geometry_balancing_authority_eia861', - sa.Column('county_id_fips', sa.Text(), nullable=False, comment='County ID from the Federal Information Processing Standard Publication 6-4.'), - sa.Column('county_name_census', sa.Text(), nullable=True, comment='County name as specified in Census DP1 Data.'), - sa.Column('population', sa.Float(), nullable=True, comment='County population, sourced from Census DP1 data.'), - sa.Column('area_km2', sa.Float(), nullable=True, comment='County area in km2.'), - sa.Column('report_date', sa.Date(), nullable=False, comment='Date reported.'), - sa.Column('balancing_authority_id_eia', sa.Integer(), nullable=False, comment='EIA balancing authority ID. This is often (but not always!) the same as the utility ID associated with the same legal entity.'), - sa.Column('state', sa.Text(), nullable=True, comment='Two letter US state abbreviation.'), - sa.Column('county', sa.Text(), nullable=False, comment='County name.'), - sa.Column('state_id_fips', sa.Text(), nullable=True, comment='Two digit state FIPS code.'), - sa.PrimaryKeyConstraint('balancing_authority_id_eia', 'report_date', 'county_id_fips', 'county') - ) - op.create_table('compiled_geometry_utility_eia861', - sa.Column('county_id_fips', sa.Text(), nullable=False, comment='County ID from the Federal Information Processing Standard Publication 6-4.'), - sa.Column('county_name_census', sa.Text(), nullable=True, comment='County name as specified in Census DP1 Data.'), - sa.Column('population', sa.Float(), nullable=True, comment='County population, sourced from Census DP1 data.'), - sa.Column('area_km2', sa.Float(), nullable=True, comment='County area in km2.'), - sa.Column('report_date', sa.Date(), nullable=False, comment='Date reported.'), - sa.Column('utility_id_eia', sa.Integer(), nullable=False, comment='The EIA Utility Identification number.'), - sa.Column('state', sa.Text(), nullable=True, comment='Two letter US state abbreviation.'), - sa.Column('county', sa.Text(), nullable=True, comment='County name.'), - sa.Column('state_id_fips', sa.Text(), nullable=True, comment='Two digit state FIPS code.'), - sa.PrimaryKeyConstraint('utility_id_eia', 'report_date', 'county_id_fips') - ) - op.create_table('predicted_state_hourly_demand', - sa.Column('state_id_fips', sa.Text(), nullable=False, comment='Two digit state FIPS code.'), - sa.Column('utc_datetime', sqlite.DATETIME(), nullable=False), - sa.Column('demand_mwh', sa.Float(), nullable=True), - sa.Column('scaled_demand_mwh', sa.Float(), nullable=True, comment='Estimated electricity demand scaled by the total sales within a state.'), - sa.PrimaryKeyConstraint('state_id_fips', 'utc_datetime') - ) - op.create_table('fipsified_respondents_ferc714', - sa.Column('eia_code', sa.Integer(), nullable=True), - sa.Column('respondent_type', sa.Enum('utility', 'balancing_authority'), nullable=True), - sa.Column('respondent_id_ferc714', sa.Integer(), nullable=True), - sa.Column('respondent_name_ferc714', sa.Text(), nullable=True), - sa.Column('report_date', sa.Date(), nullable=True, comment='Date reported.'), - sa.Column('balancing_authority_id_eia', sa.Integer(), nullable=True, comment='EIA balancing authority ID. This is often (but not always!) the same as the utility ID associated with the same legal entity.'), - sa.Column('balancing_authority_code_eia', sa.Text(), nullable=True, comment='EIA short code identifying a balancing authority.'), - sa.Column('balancing_authority_name_eia', sa.Text(), nullable=True, comment='Name of the balancing authority.'), - sa.Column('utility_id_eia', sa.Integer(), nullable=True, comment='The EIA Utility Identification number.'), - sa.Column('utility_name_eia', sa.Text(), nullable=True, comment='The name of the utility.'), - sa.Column('state', sa.Text(), nullable=True, comment='Two letter US state abbreviation.'), - sa.Column('county', sa.Text(), nullable=True, comment='County name.'), - sa.Column('state_id_fips', sa.Text(), nullable=True, comment='Two digit state FIPS code.'), - sa.Column('county_id_fips', sa.Text(), nullable=True, comment='County ID from the Federal Information Processing Standard Publication 6-4.'), - sa.ForeignKeyConstraint(['respondent_id_ferc714'], ['respondent_id_ferc714.respondent_id_ferc714'], ) - ) - op.create_table('summarized_demand_ferc714', - sa.Column('report_date', sa.Date(), nullable=False, comment='Date reported.'), - sa.Column('respondent_id_ferc714', sa.Integer(), nullable=False), - sa.Column('demand_annual_mwh', sa.Float(), nullable=True), - sa.Column('population', sa.Float(), nullable=True, comment='County population, sourced from Census DP1 data.'), - sa.Column('area_km2', sa.Float(), nullable=True, comment='County area in km2.'), - sa.Column('population_density_km2', sa.Float(), nullable=True, comment='Average population per sq. km area of a service territory.'), - sa.Column('demand_annual_per_capita_mwh', sa.Float(), nullable=True, comment='Per-capita annual demand, averaged using Census county-level population estimates.'), - sa.Column('demand_density_mwh_km2', sa.Float(), nullable=True, comment='Annual demand per km2 of a given service territory.'), - sa.Column('eia_code', sa.Integer(), nullable=True), - sa.Column('respondent_type', sa.Enum('utility', 'balancing_authority'), nullable=True), - sa.Column('respondent_name_ferc714', sa.Text(), nullable=True), - sa.Column('balancing_authority_id_eia', sa.Integer(), nullable=True, comment='EIA balancing authority ID. This is often (but not always!) the same as the utility ID associated with the same legal entity.'), - sa.Column('balancing_authority_code_eia', sa.Text(), nullable=True, comment='EIA short code identifying a balancing authority.'), - sa.Column('balancing_authority_name_eia', sa.Text(), nullable=True, comment='Name of the balancing authority.'), - sa.Column('utility_id_eia', sa.Integer(), nullable=True, comment='The EIA Utility Identification number.'), - sa.Column('utility_name_eia', sa.Text(), nullable=True, comment='The name of the utility.'), - sa.ForeignKeyConstraint(['respondent_id_ferc714'], ['respondent_id_ferc714.respondent_id_ferc714'], ), - sa.PrimaryKeyConstraint('respondent_id_ferc714', 'report_date') - ) - # ### end Alembic commands ### - - -def downgrade() -> None: - # ### commands auto generated by Alembic - please adjust! ### - op.drop_table('summarized_demand_ferc714') - op.drop_table('fipsified_respondents_ferc714') - op.drop_table('predicted_state_hourly_demand') - op.drop_table('compiled_geometry_utility_eia861') - op.drop_table('compiled_geometry_balancing_authority_eia861') - # ### end Alembic commands ### diff --git a/migrations/versions/28bb2b27e2cf_add_mcoe_table.py b/migrations/versions/28bb2b27e2cf_add_mcoe_table.py deleted file mode 100644 index 5ebcbcf22e..0000000000 --- a/migrations/versions/28bb2b27e2cf_add_mcoe_table.py +++ /dev/null @@ -1,250 +0,0 @@ -"""add mcoe table - -Revision ID: 28bb2b27e2cf -Revises: e2670d0ec0eb -Create Date: 2023-08-11 10:12:15.950150 - -""" -from alembic import op -import sqlalchemy as sa - - -# revision identifiers, used by Alembic. -revision = '28bb2b27e2cf' -down_revision = 'e2670d0ec0eb' -branch_labels = None -depends_on = None - - -def upgrade() -> None: - # ### commands auto generated by Alembic - please adjust! ### - op.create_table('heat_rate_by_unit_monthly', - sa.Column('report_date', sa.Date(), nullable=False, comment='Date reported.'), - sa.Column('plant_id_eia', sa.Integer(), nullable=False, comment='The unique six-digit facility identification number, also called an ORISPL, assigned by the Energy Information Administration.'), - sa.Column('unit_id_pudl', sa.Integer(), nullable=False, comment='Dynamically assigned PUDL unit id. WARNING: This ID is not guaranteed to be static long term as the input data and algorithm may evolve over time.'), - sa.Column('net_generation_mwh', sa.Float(), nullable=True, comment='Net electricity generation for the specified period in megawatt-hours (MWh).'), - sa.Column('fuel_consumed_for_electricity_mmbtu', sa.Float(), nullable=True, comment='Total consumption of fuel to produce electricity, in physical unit, year to date.'), - sa.Column('heat_rate_mmbtu_mwh', sa.Float(), nullable=True, comment='Fuel content per unit of electricity generated. Coming from MCOE calculation.'), - sa.ForeignKeyConstraint(['plant_id_eia'], ['plants_entity_eia.plant_id_eia'], ), - sa.PrimaryKeyConstraint('report_date', 'plant_id_eia', 'unit_id_pudl') - ) - op.create_table('capacity_factor_by_generator_monthly', - sa.Column('report_date', sa.Date(), nullable=False, comment='Date reported.'), - sa.Column('plant_id_eia', sa.Integer(), nullable=False, comment='The unique six-digit facility identification number, also called an ORISPL, assigned by the Energy Information Administration.'), - sa.Column('generator_id', sa.Text(), nullable=False, comment='Generator ID is usually numeric, but sometimes includes letters. Make sure you treat it as a string!'), - sa.Column('net_generation_mwh', sa.Float(), nullable=True, comment='Net electricity generation for the specified period in megawatt-hours (MWh).'), - sa.Column('capacity_mw', sa.Float(), nullable=True, comment='Total installed (nameplate) capacity, in megawatts.'), - sa.Column('capacity_factor', sa.Float(), nullable=True, comment='Fraction of potential generation that was actually reported for a plant part.'), - sa.ForeignKeyConstraint(['plant_id_eia', 'generator_id'], ['generators_entity_eia.plant_id_eia', 'generators_entity_eia.generator_id'], ), - sa.PrimaryKeyConstraint('report_date', 'plant_id_eia', 'generator_id') - ) - op.create_table('fuel_cost_by_generator_monthly', - sa.Column('report_date', sa.Date(), nullable=False, comment='Date reported.'), - sa.Column('plant_id_eia', sa.Integer(), nullable=False, comment='The unique six-digit facility identification number, also called an ORISPL, assigned by the Energy Information Administration.'), - sa.Column('generator_id', sa.Text(), nullable=False, comment='Generator ID is usually numeric, but sometimes includes letters. Make sure you treat it as a string!'), - sa.Column('unit_id_pudl', sa.Integer(), nullable=True, comment='Dynamically assigned PUDL unit id. WARNING: This ID is not guaranteed to be static long term as the input data and algorithm may evolve over time.'), - sa.Column('plant_name_eia', sa.Text(), nullable=True, comment='Plant name.'), - sa.Column('plant_id_pudl', sa.Integer(), nullable=True, comment='A manually assigned PUDL plant ID. May not be constant over time.'), - sa.Column('utility_id_eia', sa.Integer(), nullable=True, comment='The EIA Utility Identification number.'), - sa.Column('utility_name_eia', sa.Text(), nullable=True, comment='The name of the utility.'), - sa.Column('utility_id_pudl', sa.Integer(), nullable=True, comment='A manually assigned PUDL utility ID. May not be stable over time.'), - sa.Column('fuel_type_count', sa.Integer(), nullable=True, comment='A count of how many different simple energy sources there are associated with a generator.'), - sa.Column('fuel_type_code_pudl', sa.Enum('coal', 'gas', 'hydro', 'nuclear', 'oil', 'other', 'solar', 'waste', 'wind'), nullable=True, comment='Simplified fuel type code used in PUDL'), - sa.Column('fuel_cost_from_eiaapi', sa.Boolean(), nullable=True, comment='Indicates whether the fuel cost was derived from the EIA API.'), - sa.Column('fuel_cost_per_mmbtu', sa.Float(), nullable=True, comment='Average fuel cost per mmBTU of heat content in nominal USD.'), - sa.Column('heat_rate_mmbtu_mwh', sa.Float(), nullable=True, comment='Fuel content per unit of electricity generated. Coming from MCOE calculation.'), - sa.Column('fuel_cost_per_mwh', sa.Float(), nullable=True, comment='Derived from MCOE, a unit level value. Average fuel cost per MWh of heat content in nominal USD.'), - sa.ForeignKeyConstraint(['plant_id_eia', 'generator_id'], ['generators_entity_eia.plant_id_eia', 'generators_entity_eia.generator_id'], ), - sa.ForeignKeyConstraint(['plant_id_pudl'], ['plants_pudl.plant_id_pudl'], ), - sa.ForeignKeyConstraint(['utility_id_eia'], ['utilities_entity_eia.utility_id_eia'], ), - sa.ForeignKeyConstraint(['utility_id_pudl'], ['utilities_pudl.utility_id_pudl'], ), - sa.PrimaryKeyConstraint('report_date', 'plant_id_eia', 'generator_id') - ) - op.create_table('heat_rate_by_generator_monthly', - sa.Column('report_date', sa.Date(), nullable=False, comment='Date reported.'), - sa.Column('plant_id_eia', sa.Integer(), nullable=False, comment='The unique six-digit facility identification number, also called an ORISPL, assigned by the Energy Information Administration.'), - sa.Column('unit_id_pudl', sa.Integer(), nullable=True, comment='Dynamically assigned PUDL unit id. WARNING: This ID is not guaranteed to be static long term as the input data and algorithm may evolve over time.'), - sa.Column('generator_id', sa.Text(), nullable=False, comment='Generator ID is usually numeric, but sometimes includes letters. Make sure you treat it as a string!'), - sa.Column('heat_rate_mmbtu_mwh', sa.Float(), nullable=True, comment='Fuel content per unit of electricity generated. Coming from MCOE calculation.'), - sa.Column('fuel_type_code_pudl', sa.Enum('coal', 'gas', 'hydro', 'nuclear', 'oil', 'other', 'solar', 'waste', 'wind'), nullable=True, comment='Simplified fuel type code used in PUDL'), - sa.Column('fuel_type_count', sa.Integer(), nullable=True, comment='A count of how many different simple energy sources there are associated with a generator.'), - sa.Column('prime_mover_code', sa.Text(), nullable=True, comment='Code for the type of prime mover (e.g. CT, CG)'), - sa.ForeignKeyConstraint(['plant_id_eia', 'generator_id'], ['generators_entity_eia.plant_id_eia', 'generators_entity_eia.generator_id'], ), - sa.ForeignKeyConstraint(['prime_mover_code'], ['prime_movers_eia.code'], ), - sa.PrimaryKeyConstraint('report_date', 'plant_id_eia', 'generator_id') - ) - op.create_table('mcoe_generators_monthly', - sa.Column('plant_id_eia', sa.Integer(), nullable=False, comment='The unique six-digit facility identification number, also called an ORISPL, assigned by the Energy Information Administration.'), - sa.Column('generator_id', sa.Text(), nullable=False, comment='Generator ID is usually numeric, but sometimes includes letters. Make sure you treat it as a string!'), - sa.Column('report_date', sa.Date(), nullable=False, comment='Date reported.'), - sa.Column('unit_id_pudl', sa.Integer(), nullable=True, comment='Dynamically assigned PUDL unit id. WARNING: This ID is not guaranteed to be static long term as the input data and algorithm may evolve over time.'), - sa.Column('plant_id_pudl', sa.Integer(), nullable=True, comment='A manually assigned PUDL plant ID. May not be constant over time.'), - sa.Column('plant_name_eia', sa.Text(), nullable=True, comment='Plant name.'), - sa.Column('utility_id_eia', sa.Integer(), nullable=True, comment='The EIA Utility Identification number.'), - sa.Column('utility_id_pudl', sa.Integer(), nullable=True, comment='A manually assigned PUDL utility ID. May not be stable over time.'), - sa.Column('utility_name_eia', sa.Text(), nullable=True, comment='The name of the utility.'), - sa.Column('technology_description', sa.Text(), nullable=True, comment='High level description of the technology used by the generator to produce electricity.'), - sa.Column('energy_source_code_1', sa.Text(), nullable=True, comment='The code representing the most predominant type of energy that fuels the generator.'), - sa.Column('prime_mover_code', sa.Text(), nullable=True, comment='Code for the type of prime mover (e.g. CT, CG)'), - sa.Column('generator_operating_date', sa.Date(), nullable=True, comment='Date the generator began commercial operation.'), - sa.Column('generator_retirement_date', sa.Date(), nullable=True, comment='Date of the scheduled or effected retirement of the generator.'), - sa.Column('operational_status', sa.Text(), nullable=True, comment='The operating status of the asset. For generators this is based on which tab the generator was listed in in EIA 860.'), - sa.Column('capacity_mw', sa.Float(), nullable=True, comment='Total installed (nameplate) capacity, in megawatts.'), - sa.Column('fuel_type_code_pudl', sa.Enum('coal', 'gas', 'hydro', 'nuclear', 'oil', 'other', 'solar', 'waste', 'wind'), nullable=True, comment='Simplified fuel type code used in PUDL'), - sa.Column('planned_generator_retirement_date', sa.Date(), nullable=True, comment='Planned effective date of the scheduled retirement of the generator.'), - sa.Column('capacity_factor', sa.Float(), nullable=True, comment='Fraction of potential generation that was actually reported for a plant part.'), - sa.Column('fuel_cost_from_eiaapi', sa.Boolean(), nullable=True, comment='Indicates whether the fuel cost was derived from the EIA API.'), - sa.Column('fuel_cost_per_mmbtu', sa.Float(), nullable=True, comment='Average fuel cost per mmBTU of heat content in nominal USD.'), - sa.Column('fuel_cost_per_mwh', sa.Float(), nullable=True, comment='Derived from MCOE, a unit level value. Average fuel cost per MWh of heat content in nominal USD.'), - sa.Column('heat_rate_mmbtu_mwh', sa.Float(), nullable=True, comment='Fuel content per unit of electricity generated. Coming from MCOE calculation.'), - sa.Column('net_generation_mwh', sa.Float(), nullable=True, comment='Net electricity generation for the specified period in megawatt-hours (MWh).'), - sa.Column('total_fuel_cost', sa.Float(), nullable=True, comment='Total annual reported fuel costs for the plant part. Includes costs from all fuels.'), - sa.Column('total_mmbtu', sa.Float(), nullable=True, comment='Total annual heat content of fuel consumed by a plant part record in the plant parts list.'), - sa.ForeignKeyConstraint(['energy_source_code_1'], ['energy_sources_eia.code'], ), - sa.ForeignKeyConstraint(['plant_id_eia', 'generator_id'], ['generators_entity_eia.plant_id_eia', 'generators_entity_eia.generator_id'], ), - sa.ForeignKeyConstraint(['plant_id_pudl'], ['plants_pudl.plant_id_pudl'], ), - sa.ForeignKeyConstraint(['prime_mover_code'], ['prime_movers_eia.code'], ), - sa.ForeignKeyConstraint(['utility_id_eia'], ['utilities_entity_eia.utility_id_eia'], ), - sa.ForeignKeyConstraint(['utility_id_pudl'], ['utilities_pudl.utility_id_pudl'], ), - sa.PrimaryKeyConstraint('report_date', 'plant_id_eia', 'generator_id') - ) - op.create_table('mcoe_monthly', - sa.Column('plant_id_eia', sa.Integer(), nullable=False, comment='The unique six-digit facility identification number, also called an ORISPL, assigned by the Energy Information Administration.'), - sa.Column('generator_id', sa.Text(), nullable=False, comment='Generator ID is usually numeric, but sometimes includes letters. Make sure you treat it as a string!'), - sa.Column('unit_id_pudl', sa.Integer(), nullable=True, comment='Dynamically assigned PUDL unit id. WARNING: This ID is not guaranteed to be static long term as the input data and algorithm may evolve over time.'), - sa.Column('report_date', sa.Date(), nullable=False, comment='Date reported.'), - sa.Column('capacity_factor', sa.Float(), nullable=True, comment='Fraction of potential generation that was actually reported for a plant part.'), - sa.Column('fuel_cost_from_eiaapi', sa.Boolean(), nullable=True, comment='Indicates whether the fuel cost was derived from the EIA API.'), - sa.Column('fuel_cost_per_mmbtu', sa.Float(), nullable=True, comment='Average fuel cost per mmBTU of heat content in nominal USD.'), - sa.Column('fuel_cost_per_mwh', sa.Float(), nullable=True, comment='Derived from MCOE, a unit level value. Average fuel cost per MWh of heat content in nominal USD.'), - sa.Column('heat_rate_mmbtu_mwh', sa.Float(), nullable=True, comment='Fuel content per unit of electricity generated. Coming from MCOE calculation.'), - sa.Column('net_generation_mwh', sa.Float(), nullable=True, comment='Net electricity generation for the specified period in megawatt-hours (MWh).'), - sa.Column('total_fuel_cost', sa.Float(), nullable=True, comment='Total annual reported fuel costs for the plant part. Includes costs from all fuels.'), - sa.Column('total_mmbtu', sa.Float(), nullable=True, comment='Total annual heat content of fuel consumed by a plant part record in the plant parts list.'), - sa.ForeignKeyConstraint(['plant_id_eia', 'generator_id'], ['generators_entity_eia.plant_id_eia', 'generators_entity_eia.generator_id'], ), - sa.PrimaryKeyConstraint('report_date', 'plant_id_eia', 'generator_id') - ) - op.create_table('heat_rate_by_unit_yearly', - sa.Column('report_date', sa.Date(), nullable=False, comment='Date reported.'), - sa.Column('plant_id_eia', sa.Integer(), nullable=False, comment='The unique six-digit facility identification number, also called an ORISPL, assigned by the Energy Information Administration.'), - sa.Column('unit_id_pudl', sa.Integer(), nullable=False, comment='Dynamically assigned PUDL unit id. WARNING: This ID is not guaranteed to be static long term as the input data and algorithm may evolve over time.'), - sa.Column('net_generation_mwh', sa.Float(), nullable=True, comment='Net electricity generation for the specified period in megawatt-hours (MWh).'), - sa.Column('fuel_consumed_for_electricity_mmbtu', sa.Float(), nullable=True, comment='Total consumption of fuel to produce electricity, in physical unit, year to date.'), - sa.Column('heat_rate_mmbtu_mwh', sa.Float(), nullable=True, comment='Fuel content per unit of electricity generated. Coming from MCOE calculation.'), - sa.ForeignKeyConstraint(['plant_id_eia', 'report_date'], ['plants_eia860.plant_id_eia', 'plants_eia860.report_date'], ), - sa.PrimaryKeyConstraint('report_date', 'plant_id_eia', 'unit_id_pudl') - ) - op.create_table('capacity_factor_by_generator_yearly', - sa.Column('report_date', sa.Date(), nullable=False, comment='Date reported.'), - sa.Column('plant_id_eia', sa.Integer(), nullable=False, comment='The unique six-digit facility identification number, also called an ORISPL, assigned by the Energy Information Administration.'), - sa.Column('generator_id', sa.Text(), nullable=False, comment='Generator ID is usually numeric, but sometimes includes letters. Make sure you treat it as a string!'), - sa.Column('net_generation_mwh', sa.Float(), nullable=True, comment='Net electricity generation for the specified period in megawatt-hours (MWh).'), - sa.Column('capacity_mw', sa.Float(), nullable=True, comment='Total installed (nameplate) capacity, in megawatts.'), - sa.Column('capacity_factor', sa.Float(), nullable=True, comment='Fraction of potential generation that was actually reported for a plant part.'), - sa.ForeignKeyConstraint(['plant_id_eia', 'generator_id', 'report_date'], ['generators_eia860.plant_id_eia', 'generators_eia860.generator_id', 'generators_eia860.report_date'], ), - sa.PrimaryKeyConstraint('report_date', 'plant_id_eia', 'generator_id') - ) - op.create_table('fuel_cost_by_generator_yearly', - sa.Column('report_date', sa.Date(), nullable=False, comment='Date reported.'), - sa.Column('plant_id_eia', sa.Integer(), nullable=False, comment='The unique six-digit facility identification number, also called an ORISPL, assigned by the Energy Information Administration.'), - sa.Column('generator_id', sa.Text(), nullable=False, comment='Generator ID is usually numeric, but sometimes includes letters. Make sure you treat it as a string!'), - sa.Column('unit_id_pudl', sa.Integer(), nullable=True, comment='Dynamically assigned PUDL unit id. WARNING: This ID is not guaranteed to be static long term as the input data and algorithm may evolve over time.'), - sa.Column('plant_name_eia', sa.Text(), nullable=True, comment='Plant name.'), - sa.Column('plant_id_pudl', sa.Integer(), nullable=True, comment='A manually assigned PUDL plant ID. May not be constant over time.'), - sa.Column('utility_id_eia', sa.Integer(), nullable=True, comment='The EIA Utility Identification number.'), - sa.Column('utility_name_eia', sa.Text(), nullable=True, comment='The name of the utility.'), - sa.Column('utility_id_pudl', sa.Integer(), nullable=True, comment='A manually assigned PUDL utility ID. May not be stable over time.'), - sa.Column('fuel_type_count', sa.Integer(), nullable=True, comment='A count of how many different simple energy sources there are associated with a generator.'), - sa.Column('fuel_type_code_pudl', sa.Enum('coal', 'gas', 'hydro', 'nuclear', 'oil', 'other', 'solar', 'waste', 'wind'), nullable=True, comment='Simplified fuel type code used in PUDL'), - sa.Column('fuel_cost_from_eiaapi', sa.Boolean(), nullable=True, comment='Indicates whether the fuel cost was derived from the EIA API.'), - sa.Column('fuel_cost_per_mmbtu', sa.Float(), nullable=True, comment='Average fuel cost per mmBTU of heat content in nominal USD.'), - sa.Column('heat_rate_mmbtu_mwh', sa.Float(), nullable=True, comment='Fuel content per unit of electricity generated. Coming from MCOE calculation.'), - sa.Column('fuel_cost_per_mwh', sa.Float(), nullable=True, comment='Derived from MCOE, a unit level value. Average fuel cost per MWh of heat content in nominal USD.'), - sa.ForeignKeyConstraint(['plant_id_eia', 'generator_id', 'report_date'], ['generators_eia860.plant_id_eia', 'generators_eia860.generator_id', 'generators_eia860.report_date'], ), - sa.ForeignKeyConstraint(['plant_id_pudl'], ['plants_pudl.plant_id_pudl'], ), - sa.ForeignKeyConstraint(['utility_id_eia', 'report_date'], ['utilities_eia860.utility_id_eia', 'utilities_eia860.report_date'], ), - sa.ForeignKeyConstraint(['utility_id_pudl'], ['utilities_pudl.utility_id_pudl'], ), - sa.PrimaryKeyConstraint('report_date', 'plant_id_eia', 'generator_id') - ) - op.create_table('heat_rate_by_generator_yearly', - sa.Column('report_date', sa.Date(), nullable=False, comment='Date reported.'), - sa.Column('plant_id_eia', sa.Integer(), nullable=False, comment='The unique six-digit facility identification number, also called an ORISPL, assigned by the Energy Information Administration.'), - sa.Column('unit_id_pudl', sa.Integer(), nullable=True, comment='Dynamically assigned PUDL unit id. WARNING: This ID is not guaranteed to be static long term as the input data and algorithm may evolve over time.'), - sa.Column('generator_id', sa.Text(), nullable=False, comment='Generator ID is usually numeric, but sometimes includes letters. Make sure you treat it as a string!'), - sa.Column('heat_rate_mmbtu_mwh', sa.Float(), nullable=True, comment='Fuel content per unit of electricity generated. Coming from MCOE calculation.'), - sa.Column('fuel_type_code_pudl', sa.Enum('coal', 'gas', 'hydro', 'nuclear', 'oil', 'other', 'solar', 'waste', 'wind'), nullable=True, comment='Simplified fuel type code used in PUDL'), - sa.Column('fuel_type_count', sa.Integer(), nullable=True, comment='A count of how many different simple energy sources there are associated with a generator.'), - sa.Column('prime_mover_code', sa.Text(), nullable=True, comment='Code for the type of prime mover (e.g. CT, CG)'), - sa.ForeignKeyConstraint(['plant_id_eia', 'generator_id', 'report_date'], ['generators_eia860.plant_id_eia', 'generators_eia860.generator_id', 'generators_eia860.report_date'], ), - sa.ForeignKeyConstraint(['prime_mover_code'], ['prime_movers_eia.code'], ), - sa.PrimaryKeyConstraint('report_date', 'plant_id_eia', 'generator_id') - ) - op.create_table('mcoe_generators_yearly', - sa.Column('plant_id_eia', sa.Integer(), nullable=False, comment='The unique six-digit facility identification number, also called an ORISPL, assigned by the Energy Information Administration.'), - sa.Column('generator_id', sa.Text(), nullable=False, comment='Generator ID is usually numeric, but sometimes includes letters. Make sure you treat it as a string!'), - sa.Column('report_date', sa.Date(), nullable=False, comment='Date reported.'), - sa.Column('unit_id_pudl', sa.Integer(), nullable=True, comment='Dynamically assigned PUDL unit id. WARNING: This ID is not guaranteed to be static long term as the input data and algorithm may evolve over time.'), - sa.Column('plant_id_pudl', sa.Integer(), nullable=True, comment='A manually assigned PUDL plant ID. May not be constant over time.'), - sa.Column('plant_name_eia', sa.Text(), nullable=True, comment='Plant name.'), - sa.Column('utility_id_eia', sa.Integer(), nullable=True, comment='The EIA Utility Identification number.'), - sa.Column('utility_id_pudl', sa.Integer(), nullable=True, comment='A manually assigned PUDL utility ID. May not be stable over time.'), - sa.Column('utility_name_eia', sa.Text(), nullable=True, comment='The name of the utility.'), - sa.Column('technology_description', sa.Text(), nullable=True, comment='High level description of the technology used by the generator to produce electricity.'), - sa.Column('energy_source_code_1', sa.Text(), nullable=True, comment='The code representing the most predominant type of energy that fuels the generator.'), - sa.Column('prime_mover_code', sa.Text(), nullable=True, comment='Code for the type of prime mover (e.g. CT, CG)'), - sa.Column('generator_operating_date', sa.Date(), nullable=True, comment='Date the generator began commercial operation.'), - sa.Column('generator_retirement_date', sa.Date(), nullable=True, comment='Date of the scheduled or effected retirement of the generator.'), - sa.Column('operational_status', sa.Text(), nullable=True, comment='The operating status of the asset. For generators this is based on which tab the generator was listed in in EIA 860.'), - sa.Column('capacity_mw', sa.Float(), nullable=True, comment='Total installed (nameplate) capacity, in megawatts.'), - sa.Column('fuel_type_code_pudl', sa.Enum('coal', 'gas', 'hydro', 'nuclear', 'oil', 'other', 'solar', 'waste', 'wind'), nullable=True, comment='Simplified fuel type code used in PUDL'), - sa.Column('planned_generator_retirement_date', sa.Date(), nullable=True, comment='Planned effective date of the scheduled retirement of the generator.'), - sa.Column('capacity_factor', sa.Float(), nullable=True, comment='Fraction of potential generation that was actually reported for a plant part.'), - sa.Column('fuel_cost_from_eiaapi', sa.Boolean(), nullable=True, comment='Indicates whether the fuel cost was derived from the EIA API.'), - sa.Column('fuel_cost_per_mmbtu', sa.Float(), nullable=True, comment='Average fuel cost per mmBTU of heat content in nominal USD.'), - sa.Column('fuel_cost_per_mwh', sa.Float(), nullable=True, comment='Derived from MCOE, a unit level value. Average fuel cost per MWh of heat content in nominal USD.'), - sa.Column('heat_rate_mmbtu_mwh', sa.Float(), nullable=True, comment='Fuel content per unit of electricity generated. Coming from MCOE calculation.'), - sa.Column('net_generation_mwh', sa.Float(), nullable=True, comment='Net electricity generation for the specified period in megawatt-hours (MWh).'), - sa.Column('total_fuel_cost', sa.Float(), nullable=True, comment='Total annual reported fuel costs for the plant part. Includes costs from all fuels.'), - sa.Column('total_mmbtu', sa.Float(), nullable=True, comment='Total annual heat content of fuel consumed by a plant part record in the plant parts list.'), - sa.ForeignKeyConstraint(['energy_source_code_1'], ['energy_sources_eia.code'], ), - sa.ForeignKeyConstraint(['plant_id_eia', 'generator_id', 'report_date'], ['generators_eia860.plant_id_eia', 'generators_eia860.generator_id', 'generators_eia860.report_date'], ), - sa.ForeignKeyConstraint(['plant_id_pudl'], ['plants_pudl.plant_id_pudl'], ), - sa.ForeignKeyConstraint(['prime_mover_code'], ['prime_movers_eia.code'], ), - sa.ForeignKeyConstraint(['utility_id_eia', 'report_date'], ['utilities_eia860.utility_id_eia', 'utilities_eia860.report_date'], ), - sa.ForeignKeyConstraint(['utility_id_pudl'], ['utilities_pudl.utility_id_pudl'], ), - sa.PrimaryKeyConstraint('report_date', 'plant_id_eia', 'generator_id') - ) - op.create_table('mcoe_yearly', - sa.Column('plant_id_eia', sa.Integer(), nullable=False, comment='The unique six-digit facility identification number, also called an ORISPL, assigned by the Energy Information Administration.'), - sa.Column('generator_id', sa.Text(), nullable=False, comment='Generator ID is usually numeric, but sometimes includes letters. Make sure you treat it as a string!'), - sa.Column('unit_id_pudl', sa.Integer(), nullable=True, comment='Dynamically assigned PUDL unit id. WARNING: This ID is not guaranteed to be static long term as the input data and algorithm may evolve over time.'), - sa.Column('report_date', sa.Date(), nullable=False, comment='Date reported.'), - sa.Column('capacity_factor', sa.Float(), nullable=True, comment='Fraction of potential generation that was actually reported for a plant part.'), - sa.Column('fuel_cost_from_eiaapi', sa.Boolean(), nullable=True, comment='Indicates whether the fuel cost was derived from the EIA API.'), - sa.Column('fuel_cost_per_mmbtu', sa.Float(), nullable=True, comment='Average fuel cost per mmBTU of heat content in nominal USD.'), - sa.Column('fuel_cost_per_mwh', sa.Float(), nullable=True, comment='Derived from MCOE, a unit level value. Average fuel cost per MWh of heat content in nominal USD.'), - sa.Column('heat_rate_mmbtu_mwh', sa.Float(), nullable=True, comment='Fuel content per unit of electricity generated. Coming from MCOE calculation.'), - sa.Column('net_generation_mwh', sa.Float(), nullable=True, comment='Net electricity generation for the specified period in megawatt-hours (MWh).'), - sa.Column('total_fuel_cost', sa.Float(), nullable=True, comment='Total annual reported fuel costs for the plant part. Includes costs from all fuels.'), - sa.Column('total_mmbtu', sa.Float(), nullable=True, comment='Total annual heat content of fuel consumed by a plant part record in the plant parts list.'), - sa.ForeignKeyConstraint(['plant_id_eia', 'generator_id', 'report_date'], ['generators_eia860.plant_id_eia', 'generators_eia860.generator_id', 'generators_eia860.report_date'], ), - sa.PrimaryKeyConstraint('report_date', 'plant_id_eia', 'generator_id') - ) - # ### end Alembic commands ### - - -def downgrade() -> None: - # ### commands auto generated by Alembic - please adjust! ### - op.drop_table('mcoe_yearly') - op.drop_table('mcoe_generators_yearly') - op.drop_table('heat_rate_by_generator_yearly') - op.drop_table('fuel_cost_by_generator_yearly') - op.drop_table('capacity_factor_by_generator_yearly') - op.drop_table('heat_rate_by_unit_yearly') - op.drop_table('mcoe_monthly') - op.drop_table('mcoe_generators_monthly') - op.drop_table('heat_rate_by_generator_monthly') - op.drop_table('fuel_cost_by_generator_monthly') - op.drop_table('capacity_factor_by_generator_monthly') - op.drop_table('heat_rate_by_unit_monthly') - # ### end Alembic commands ### diff --git a/migrations/versions/8c0a49eb9098_dagsterize_net_gen_allocation_revision.py b/migrations/versions/8c0a49eb9098_dagsterize_net_gen_allocation_revision.py deleted file mode 100644 index 0a11ee7605..0000000000 --- a/migrations/versions/8c0a49eb9098_dagsterize_net_gen_allocation_revision.py +++ /dev/null @@ -1,118 +0,0 @@ -"""dagsterize net gen allocation revision - -Revision ID: 8c0a49eb9098 -Revises: e608f95a3b78 -Create Date: 2023-06-23 11:00:51.387245 - -""" -from alembic import op -import sqlalchemy as sa - - -# revision identifiers, used by Alembic. -revision = '8c0a49eb9098' -down_revision = 'e608f95a3b78' -branch_labels = None -depends_on = None - - -def upgrade() -> None: - # ### commands auto generated by Alembic - please adjust! ### - op.create_table('generation_fuel_by_generator_energy_source_monthly_eia923', - sa.Column('report_date', sa.Date(), nullable=False, comment='Date reported.'), - sa.Column('plant_id_eia', sa.Integer(), nullable=False, comment='The unique six-digit facility identification number, also called an ORISPL, assigned by the Energy Information Administration.'), - sa.Column('generator_id', sa.Text(), nullable=False, comment='Generator ID is usually numeric, but sometimes includes letters. Make sure you treat it as a string!'), - sa.Column('prime_mover_code', sa.Text(), nullable=False, comment='Code for the type of prime mover (e.g. CT, CG)'), - sa.Column('energy_source_code', sa.Text(), nullable=False, comment='A 2-3 letter code indicating the energy source (e.g. fuel type) associated with the record.'), - sa.Column('energy_source_code_num', sa.Enum('energy_source_code_1', 'energy_source_code_2', 'energy_source_code_3', 'energy_source_code_4', 'energy_source_code_5', 'energy_source_code_6', 'energy_source_code_7', 'energy_source_code_8'), nullable=True, comment='Name of the energy_source_code_N column that this energy source code was reported in for the generator referenced in the same record.'), - sa.Column('net_generation_mwh', sa.Float(), nullable=True, comment='Net electricity generation for the specified period in megawatt-hours (MWh).'), - sa.Column('fuel_consumed_mmbtu', sa.Float(), nullable=True, comment='Total consumption of fuel in physical unit, year to date. Note: this is the total quantity consumed for both electricity and, in the case of combined heat and power plants, process steam production.'), - sa.Column('fuel_consumed_for_electricity_mmbtu', sa.Float(), nullable=True, comment='Total consumption of fuel to produce electricity, in physical unit, year to date.'), - sa.ForeignKeyConstraint(['energy_source_code'], ['energy_sources_eia.code'], ), - sa.ForeignKeyConstraint(['plant_id_eia', 'generator_id'], ['generators_entity_eia.plant_id_eia', 'generators_entity_eia.generator_id'], ), - sa.ForeignKeyConstraint(['prime_mover_code'], ['prime_movers_eia.code'], ), - sa.PrimaryKeyConstraint('report_date', 'plant_id_eia', 'generator_id', 'prime_mover_code', 'energy_source_code') - ) - op.create_table('generation_fuel_by_generator_monthly_eia923', - sa.Column('report_date', sa.Date(), nullable=False, comment='Date reported.'), - sa.Column('plant_id_eia', sa.Integer(), nullable=False, comment='The unique six-digit facility identification number, also called an ORISPL, assigned by the Energy Information Administration.'), - sa.Column('plant_id_pudl', sa.Integer(), nullable=True, comment='A manually assigned PUDL plant ID. May not be constant over time.'), - sa.Column('plant_name_eia', sa.Text(), nullable=True, comment='Plant name.'), - sa.Column('utility_id_eia', sa.Integer(), nullable=True, comment='The EIA Utility Identification number.'), - sa.Column('utility_id_pudl', sa.Integer(), nullable=True, comment='A manually assigned PUDL utility ID. May not be stable over time.'), - sa.Column('utility_name_eia', sa.Text(), nullable=True, comment='The name of the utility.'), - sa.Column('generator_id', sa.Text(), nullable=False, comment='Generator ID is usually numeric, but sometimes includes letters. Make sure you treat it as a string!'), - sa.Column('unit_id_pudl', sa.Integer(), nullable=True, comment='Dynamically assigned PUDL unit id. WARNING: This ID is not guaranteed to be static long term as the input data and algorithm may evolve over time.'), - sa.Column('fuel_consumed_for_electricity_mmbtu', sa.Float(), nullable=True, comment='Total consumption of fuel to produce electricity, in physical unit, year to date.'), - sa.Column('fuel_consumed_mmbtu', sa.Float(), nullable=True, comment='Total consumption of fuel in physical unit, year to date. Note: this is the total quantity consumed for both electricity and, in the case of combined heat and power plants, process steam production.'), - sa.Column('net_generation_mwh', sa.Float(), nullable=True, comment='Net electricity generation for the specified period in megawatt-hours (MWh).'), - sa.ForeignKeyConstraint(['plant_id_eia', 'generator_id'], ['generators_entity_eia.plant_id_eia', 'generators_entity_eia.generator_id'], ), - sa.ForeignKeyConstraint(['plant_id_pudl'], ['plants_pudl.plant_id_pudl'], ), - sa.ForeignKeyConstraint(['utility_id_eia'], ['utilities_entity_eia.utility_id_eia'], ), - sa.ForeignKeyConstraint(['utility_id_pudl'], ['utilities_pudl.utility_id_pudl'], ), - sa.PrimaryKeyConstraint('report_date', 'plant_id_eia', 'generator_id') - ) - op.create_table('generation_fuel_by_generator_energy_source_owner_yearly_eia923', - sa.Column('report_date', sa.Date(), nullable=False, comment='Date reported.'), - sa.Column('plant_id_eia', sa.Integer(), nullable=False, comment='The unique six-digit facility identification number, also called an ORISPL, assigned by the Energy Information Administration.'), - sa.Column('generator_id', sa.Text(), nullable=False, comment='Generator ID is usually numeric, but sometimes includes letters. Make sure you treat it as a string!'), - sa.Column('prime_mover_code', sa.Text(), nullable=False, comment='Code for the type of prime mover (e.g. CT, CG)'), - sa.Column('energy_source_code', sa.Text(), nullable=False, comment='A 2-3 letter code indicating the energy source (e.g. fuel type) associated with the record.'), - sa.Column('utility_id_eia', sa.Integer(), nullable=False, comment='The EIA Utility Identification number.'), - sa.Column('ownership_record_type', sa.Enum('owned', 'total'), nullable=False, comment='Whether each generator record is for one owner or represents a total of all ownerships.'), - sa.Column('fraction_owned', sa.Float(), nullable=True, comment='Proportion of generator ownership attributable to this utility.'), - sa.Column('capacity_mw', sa.Float(), nullable=True, comment='Total installed (nameplate) capacity, in megawatts.'), - sa.Column('energy_source_code_num', sa.Enum('energy_source_code_1', 'energy_source_code_2', 'energy_source_code_3', 'energy_source_code_4', 'energy_source_code_5', 'energy_source_code_6', 'energy_source_code_7', 'energy_source_code_8'), nullable=True, comment='Name of the energy_source_code_N column that this energy source code was reported in for the generator referenced in the same record.'), - sa.Column('net_generation_mwh', sa.Float(), nullable=True, comment='Net electricity generation for the specified period in megawatt-hours (MWh).'), - sa.Column('fuel_consumed_mmbtu', sa.Float(), nullable=True, comment='Total consumption of fuel in physical unit, year to date. Note: this is the total quantity consumed for both electricity and, in the case of combined heat and power plants, process steam production.'), - sa.Column('fuel_consumed_for_electricity_mmbtu', sa.Float(), nullable=True, comment='Total consumption of fuel to produce electricity, in physical unit, year to date.'), - sa.ForeignKeyConstraint(['energy_source_code'], ['energy_sources_eia.code'], ), - sa.ForeignKeyConstraint(['plant_id_eia', 'generator_id', 'report_date'], ['generators_eia860.plant_id_eia', 'generators_eia860.generator_id', 'generators_eia860.report_date'], ), - sa.ForeignKeyConstraint(['prime_mover_code'], ['prime_movers_eia.code'], ), - sa.PrimaryKeyConstraint('report_date', 'plant_id_eia', 'generator_id', 'prime_mover_code', 'energy_source_code', 'utility_id_eia', 'ownership_record_type') - ) - op.create_table('generation_fuel_by_generator_energy_source_yearly_eia923', - sa.Column('report_date', sa.Date(), nullable=False, comment='Date reported.'), - sa.Column('plant_id_eia', sa.Integer(), nullable=False, comment='The unique six-digit facility identification number, also called an ORISPL, assigned by the Energy Information Administration.'), - sa.Column('generator_id', sa.Text(), nullable=False, comment='Generator ID is usually numeric, but sometimes includes letters. Make sure you treat it as a string!'), - sa.Column('prime_mover_code', sa.Text(), nullable=False, comment='Code for the type of prime mover (e.g. CT, CG)'), - sa.Column('energy_source_code', sa.Text(), nullable=False, comment='A 2-3 letter code indicating the energy source (e.g. fuel type) associated with the record.'), - sa.Column('energy_source_code_num', sa.Enum('energy_source_code_1', 'energy_source_code_2', 'energy_source_code_3', 'energy_source_code_4', 'energy_source_code_5', 'energy_source_code_6', 'energy_source_code_7', 'energy_source_code_8'), nullable=True, comment='Name of the energy_source_code_N column that this energy source code was reported in for the generator referenced in the same record.'), - sa.Column('net_generation_mwh', sa.Float(), nullable=True, comment='Net electricity generation for the specified period in megawatt-hours (MWh).'), - sa.Column('fuel_consumed_mmbtu', sa.Float(), nullable=True, comment='Total consumption of fuel in physical unit, year to date. Note: this is the total quantity consumed for both electricity and, in the case of combined heat and power plants, process steam production.'), - sa.Column('fuel_consumed_for_electricity_mmbtu', sa.Float(), nullable=True, comment='Total consumption of fuel to produce electricity, in physical unit, year to date.'), - sa.ForeignKeyConstraint(['energy_source_code'], ['energy_sources_eia.code'], ), - sa.ForeignKeyConstraint(['plant_id_eia', 'generator_id', 'report_date'], ['generators_eia860.plant_id_eia', 'generators_eia860.generator_id', 'generators_eia860.report_date'], ), - sa.ForeignKeyConstraint(['prime_mover_code'], ['prime_movers_eia.code'], ), - sa.PrimaryKeyConstraint('report_date', 'plant_id_eia', 'generator_id', 'prime_mover_code', 'energy_source_code') - ) - op.create_table('generation_fuel_by_generator_yearly_eia923', - sa.Column('report_date', sa.Date(), nullable=False, comment='Date reported.'), - sa.Column('plant_id_eia', sa.Integer(), nullable=False, comment='The unique six-digit facility identification number, also called an ORISPL, assigned by the Energy Information Administration.'), - sa.Column('plant_id_pudl', sa.Integer(), nullable=True, comment='A manually assigned PUDL plant ID. May not be constant over time.'), - sa.Column('plant_name_eia', sa.Text(), nullable=True, comment='Plant name.'), - sa.Column('utility_id_eia', sa.Integer(), nullable=True, comment='The EIA Utility Identification number.'), - sa.Column('utility_id_pudl', sa.Integer(), nullable=True, comment='A manually assigned PUDL utility ID. May not be stable over time.'), - sa.Column('utility_name_eia', sa.Text(), nullable=True, comment='The name of the utility.'), - sa.Column('generator_id', sa.Text(), nullable=False, comment='Generator ID is usually numeric, but sometimes includes letters. Make sure you treat it as a string!'), - sa.Column('unit_id_pudl', sa.Integer(), nullable=True, comment='Dynamically assigned PUDL unit id. WARNING: This ID is not guaranteed to be static long term as the input data and algorithm may evolve over time.'), - sa.Column('fuel_consumed_for_electricity_mmbtu', sa.Float(), nullable=True, comment='Total consumption of fuel to produce electricity, in physical unit, year to date.'), - sa.Column('fuel_consumed_mmbtu', sa.Float(), nullable=True, comment='Total consumption of fuel in physical unit, year to date. Note: this is the total quantity consumed for both electricity and, in the case of combined heat and power plants, process steam production.'), - sa.Column('net_generation_mwh', sa.Float(), nullable=True, comment='Net electricity generation for the specified period in megawatt-hours (MWh).'), - sa.ForeignKeyConstraint(['plant_id_eia', 'generator_id', 'report_date'], ['generators_eia860.plant_id_eia', 'generators_eia860.generator_id', 'generators_eia860.report_date'], ), - sa.ForeignKeyConstraint(['plant_id_pudl'], ['plants_pudl.plant_id_pudl'], ), - sa.ForeignKeyConstraint(['utility_id_eia', 'report_date'], ['utilities_eia860.utility_id_eia', 'utilities_eia860.report_date'], ), - sa.ForeignKeyConstraint(['utility_id_pudl'], ['utilities_pudl.utility_id_pudl'], ), - sa.PrimaryKeyConstraint('report_date', 'plant_id_eia', 'generator_id') - ) - # ### end Alembic commands ### - - -def downgrade() -> None: - # ### commands auto generated by Alembic - please adjust! ### - op.drop_table('generation_fuel_by_generator_yearly_eia923') - op.drop_table('generation_fuel_by_generator_energy_source_yearly_eia923') - op.drop_table('generation_fuel_by_generator_energy_source_owner_yearly_eia923') - op.drop_table('generation_fuel_by_generator_monthly_eia923') - op.drop_table('generation_fuel_by_generator_energy_source_monthly_eia923') - # ### end Alembic commands ### diff --git a/migrations/versions/9a32db1fbe6e_rename_to_dollar_amount.py b/migrations/versions/9a32db1fbe6e_rename_to_dollar_amount.py deleted file mode 100644 index 3cd7c9ff86..0000000000 --- a/migrations/versions/9a32db1fbe6e_rename_to_dollar_amount.py +++ /dev/null @@ -1,42 +0,0 @@ -"""rename to dollar amount - -Revision ID: 9a32db1fbe6e -Revises: 88d9201ae4c4 -Create Date: 2023-06-13 15:11:44.266717 - -""" -from alembic import op -import sqlalchemy as sa - - -# revision identifiers, used by Alembic. -revision = '9a32db1fbe6e' -down_revision = '1ec25c296a6d' -branch_labels = None -depends_on = None - - -def upgrade() -> None: - # ### commands auto generated by Alembic - please adjust! ### - with op.batch_alter_table('denorm_electric_plant_depreciation_changes_ferc1', schema=None) as batch_op: - batch_op.add_column(sa.Column('dollar_value', sa.Float(), nullable=True, comment='Dollar value of reported income, expense, asset, or liability.')) - batch_op.drop_column('utility_plant_value') - - with op.batch_alter_table('electric_plant_depreciation_changes_ferc1', schema=None) as batch_op: - batch_op.add_column(sa.Column('dollar_value', sa.Float(), nullable=True, comment='Dollar value of reported income, expense, asset, or liability.')) - batch_op.drop_column('utility_plant_value') - - # ### end Alembic commands ### - - -def downgrade() -> None: - # ### commands auto generated by Alembic - please adjust! ### - with op.batch_alter_table('electric_plant_depreciation_changes_ferc1', schema=None) as batch_op: - batch_op.add_column(sa.Column('utility_plant_value', sa.FLOAT(), nullable=True)) - batch_op.drop_column('dollar_value') - - with op.batch_alter_table('denorm_electric_plant_depreciation_changes_ferc1', schema=None) as batch_op: - batch_op.add_column(sa.Column('utility_plant_value', sa.FLOAT(), nullable=True)) - batch_op.drop_column('dollar_value') - - # ### end Alembic commands ### diff --git a/migrations/versions/e2670d0ec0eb_.py b/migrations/versions/e2670d0ec0eb_.py deleted file mode 100644 index cbb81d1df5..0000000000 --- a/migrations/versions/e2670d0ec0eb_.py +++ /dev/null @@ -1,24 +0,0 @@ -"""empty message - -Revision ID: e2670d0ec0eb -Revises: 8c0a49eb9098, 9a32db1fbe6e -Create Date: 2023-07-10 10:58:58.580305 - -""" -from alembic import op -import sqlalchemy as sa - - -# revision identifiers, used by Alembic. -revision = 'e2670d0ec0eb' -down_revision = ('8c0a49eb9098', '9a32db1fbe6e') -branch_labels = None -depends_on = None - - -def upgrade() -> None: - pass - - -def downgrade() -> None: - pass diff --git a/migrations/versions/e608f95a3b78_drop_amount_from_retained_earnings.py b/migrations/versions/e608f95a3b78_drop_amount_from_retained_earnings.py deleted file mode 100644 index 1538ee596f..0000000000 --- a/migrations/versions/e608f95a3b78_drop_amount_from_retained_earnings.py +++ /dev/null @@ -1,38 +0,0 @@ -"""drop amount from retained earnings - -Revision ID: e608f95a3b78 -Revises: 3c458b36094e -Create Date: 2023-06-19 09:04:49.591285 - -""" -from alembic import op -import sqlalchemy as sa - - -# revision identifiers, used by Alembic. -revision = 'e608f95a3b78' -down_revision = '3c458b36094e' -branch_labels = None -depends_on = None - - -def upgrade() -> None: - # ### commands auto generated by Alembic - please adjust! ### - with op.batch_alter_table('denorm_retained_earnings_ferc1', schema=None) as batch_op: - batch_op.drop_column('amount') - - with op.batch_alter_table('retained_earnings_ferc1', schema=None) as batch_op: - batch_op.drop_column('amount') - - # ### end Alembic commands ### - - -def downgrade() -> None: - # ### commands auto generated by Alembic - please adjust! ### - with op.batch_alter_table('retained_earnings_ferc1', schema=None) as batch_op: - batch_op.add_column(sa.Column('amount', sa.FLOAT(), nullable=True)) - - with op.batch_alter_table('denorm_retained_earnings_ferc1', schema=None) as batch_op: - batch_op.add_column(sa.Column('amount', sa.FLOAT(), nullable=True)) - - # ### end Alembic commands ### From 882d63ce6fc2dfac348e8b276e060ae0939dd3ca Mon Sep 17 00:00:00 2001 From: Dazhong Xia Date: Thu, 17 Aug 2023 12:48:09 -0400 Subject: [PATCH 30/51] Use a hash instead of the list index so the check constraint naming is consistent --- src/pudl/metadata/classes.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/src/pudl/metadata/classes.py b/src/pudl/metadata/classes.py index a0c358fef3..b7fd2f34ec 100644 --- a/src/pudl/metadata/classes.py +++ b/src/pudl/metadata/classes.py @@ -735,10 +735,7 @@ def to_sql( # noqa: C901 return sa.Column( self.name, self.to_sql_dtype(), - *[ - sa.CheckConstraint(check, f"{self.name}_{i}") - for i, check in enumerate(checks) - ], + *[sa.CheckConstraint(check, hash(check)) for check in checks], nullable=not self.constraints.required, unique=self.constraints.unique, comment=self.description, From 6d5a5f4f3c3b8a96152d5792e9cfe711dc6a5114 Mon Sep 17 00:00:00 2001 From: bendnorman Date: Thu, 17 Aug 2023 09:45:02 -0800 Subject: [PATCH 31/51] Fix pudl_setup pudl_in and pudl_out args --- src/pudl/workspace/setup_cli.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/pudl/workspace/setup_cli.py b/src/pudl/workspace/setup_cli.py index 49e5204764..6dea0a7dbd 100644 --- a/src/pudl/workspace/setup_cli.py +++ b/src/pudl/workspace/setup_cli.py @@ -56,17 +56,17 @@ def initialize_parser(): description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter ) parser.add_argument( - "--pudl_input", + "--pudl_in", "-i", type=str, - dest="pudl_input", + dest="pudl_in", help="""Directory where the PUDL input data should be located.""", ) parser.add_argument( - "--pudl_output", + "--pudl_out", "-o", type=str, - dest="pudl_output", + dest="pudl_out", help="""Directory where the PUDL outputs, notebooks, and example settings files should be located.""", ) From f7f1e383e56d14c0875d7af15ca0135a0a52fa93 Mon Sep 17 00:00:00 2001 From: Dazhong Xia Date: Thu, 17 Aug 2023 16:29:46 -0400 Subject: [PATCH 32/51] Add name keyword to CheckConstraint call for explicit-ness --- src/pudl/metadata/classes.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/pudl/metadata/classes.py b/src/pudl/metadata/classes.py index b7fd2f34ec..4404065763 100644 --- a/src/pudl/metadata/classes.py +++ b/src/pudl/metadata/classes.py @@ -735,7 +735,7 @@ def to_sql( # noqa: C901 return sa.Column( self.name, self.to_sql_dtype(), - *[sa.CheckConstraint(check, hash(check)) for check in checks], + *[sa.CheckConstraint(check, name=hash(check)) for check in checks], nullable=not self.constraints.required, unique=self.constraints.unique, comment=self.description, From 4f0d7f490dc894c2dacf58e261b12723bea14777 Mon Sep 17 00:00:00 2001 From: Zane Selvans Date: Fri, 18 Aug 2023 11:38:19 -0600 Subject: [PATCH 33/51] Include 2022 in EIA data coverage in README. --- README.rst | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/README.rst b/README.rst index d1443dfa36..f328e7a7f9 100644 --- a/README.rst +++ b/README.rst @@ -64,10 +64,10 @@ What data is available? PUDL currently integrates data from: -* `EIA Form 860 `__: 2001-2021 -* `EIA Form 860m `__: 2022-06 -* `EIA Form 861 `__: 2001-2021 -* `EIA Form 923 `__: 2001-2021 +* `EIA Form 860 `__: 2001-2022 +* `EIA Form 860m `__: 2023-06 +* `EIA Form 861 `__: 2001-2022 +* `EIA Form 923 `__: 2001-2022 * `EPA Continuous Emissions Monitoring System (CEMS) `__: 1995-2021 * `FERC Form 1 `__: 1994-2021 * `FERC Form 714 `__: 2006-2020 @@ -75,7 +75,7 @@ PUDL currently integrates data from: Thanks to support from the `Alfred P. Sloan Foundation Energy & Environment Program `__, from -2021 to 2023 we will be integrating the following data as well: +2021 to 2024 we will be integrating the following data as well: * `EIA Form 176 `__ (The Annual Report of Natural Gas Supply and Disposition) From 5de498e75e53193a3715b046dc654d140e27a4e8 Mon Sep 17 00:00:00 2001 From: Zane Selvans Date: Fri, 18 Aug 2023 19:42:41 -0600 Subject: [PATCH 34/51] Allow a mix of Zenodo sandbox & production DOIs Okay I did this off the clock since it has been driving me a little bit nuts. Historically we've required that all Zenodo DOIs in the datastore come either from the Sandbox or the Production server, which makes testing a single new archive on its own a hassle, and adds complexity across the whole application with switches for sandbox vs. not-sandbox data sources. This commit removes this requirement, and allows a mix of sandbox and production DOIs to be used in development. I also removed some very sparse documentation about how to create an archive in the Datastore by hand, which I think was very old and probably no longer supported and certainly not being tested, since it seemed likely to confuse and frustrate anyone who actually tried to do it. There's a unit test which checks that all DOIs are production, rather than sandbox to make it difficult to accidentally check in code that refers to unofficial input data. --- README.rst | 2 +- docs/dev/datastore.rst | 39 ++---- docs/dev/testing.rst | 1 - src/pudl/cli/etl.py | 7 - src/pudl/ferc_to_sqlite/cli.py | 7 - src/pudl/metadata/classes.py | 1 - src/pudl/resources.py | 6 - src/pudl/workspace/datastore.py | 137 +++++++++----------- test/conftest.py | 7 - test/integration/zenodo_datapackage_test.py | 21 ++- test/unit/workspace/datastore_test.py | 35 +++-- 11 files changed, 99 insertions(+), 164 deletions(-) diff --git a/README.rst b/README.rst index f328e7a7f9..aa3a4f8a42 100644 --- a/README.rst +++ b/README.rst @@ -124,7 +124,7 @@ Want access to all the published data in bulk? If you're familiar with Python and `Jupyter Notebooks `__ and are willing to install Docker you can: -* `Download a PUDL data release `__ from +* `Download a PUDL data release `__ from CERN's `Zenodo `__ archiving service. * `Install Docker `__ * Run the archived image using ``docker-compose up`` diff --git a/docs/dev/datastore.rst b/docs/dev/datastore.rst index e9411537f6..eb3f2f452e 100644 --- a/docs/dev/datastore.rst +++ b/docs/dev/datastore.rst @@ -38,15 +38,17 @@ For more detailed usage information, see: $ pudl_datastore --help The downloaded data will be used by the script to populate a datastore under -the ``data`` directory in your workspace, organized by data source, form, and -date:: +the your ``$PUDL_INPUT`` directory, organized by data source, form, and DOI:: data/censusdp1tract/ data/eia860/ + data/eia860m/ data/eia861/ data/eia923/ data/epacems/ data/ferc1/ + data/ferc2/ + data/ferc60/ data/ferc714/ If the download fails to complete successfully, the script can be run repeatedly until @@ -64,28 +66,13 @@ archival and versioning of datasets. See the `documentation for information on adding datasets to the datastore. -Prepare the Datastore -^^^^^^^^^^^^^^^^^^^^^ +Tell PUDL about the archive +^^^^^^^^^^^^^^^^^^^^^^^^^^^ -If you have used pudl-archiver to prepare a Zenodo archive as above, you -can add support for your archive to the datastore by adding the DOI to -pudl.workspace.datastore.DOI, under "sandbox" or "production" as appropriate. - -If you want to prepare an archive for the datastore separately, the following -are required. - -#. The root path must contain a ``datapackage.json`` file that conforms to the -`frictionless datapackage spec `__ -#. Each listed resource among the ``datapackage.json`` resources must include: - - * ``path`` containing the zenodo download url for the specific file. - * ``remote_url`` with the same url as the ``path`` - * ``name`` of the file - * ``hash`` with the md5 hash of the file - * ``parts`` a set of key / value pairs defining additional attributes that - can be used to select a subset of the whole datapackage. For example, the - ``epacems`` dataset is partitioned by year and state, and - ``"parts": {"year": 2010, "state": "ca"}`` would indicate that the - resource contains data for the state of California in the year 2010. - Unpartitioned datasets like the ``ferc714`` which includes all years in - a single file, would have an empty ``"parts": {}`` +Once you have used pudl-archiver to prepare a Zenodo archive as above, you +can make the PUDL Datastore aware of it by updating the appropriate DOI in +:class:`pudl.workspace.datastore.ZenodoFetcher`. DOIs can refer to resources from the +`Zenodo sandbox server `__ for archives that are still in +testing or development (sandbox DOIs have a prefix of ``10.5072``), or the +`Zenodo production server `__ server if the archive is ready for +public use (production DOIs hae a prefix of ``10.5281``). diff --git a/docs/dev/testing.rst b/docs/dev/testing.rst index 79439e468f..679074661f 100644 --- a/docs/dev/testing.rst +++ b/docs/dev/testing.rst @@ -304,7 +304,6 @@ You can always check to see what custom flags exist by running Path to a non-standard ETL settings file to use. --gcs-cache-path=GCS_CACHE_PATH If set, use this GCS path as a datastore cache layer. - --sandbox Use raw inputs from the Zenodo sandbox server. The main flexibility that these custom options provide is in selecting where the raw input data comes from and what data the tests should be run diff --git a/src/pudl/cli/etl.py b/src/pudl/cli/etl.py index 39707ae775..604c754ee8 100644 --- a/src/pudl/cli/etl.py +++ b/src/pudl/cli/etl.py @@ -45,12 +45,6 @@ def parse_command_line(argv): parser.add_argument( dest="settings_file", type=str, default="", help="path to ETL settings file." ) - parser.add_argument( - "--sandbox", - action="store_true", - default=False, - help="Use the Zenodo sandbox rather than production", - ) parser.add_argument( "--logfile", default=None, @@ -156,7 +150,6 @@ def main(): "dataset_settings": {"config": dataset_settings_config}, "datastore": { "config": { - "sandbox": args.sandbox, "gcs_cache_path": args.gcs_cache_path if args.gcs_cache_path else "", diff --git a/src/pudl/ferc_to_sqlite/cli.py b/src/pudl/ferc_to_sqlite/cli.py index f7cbf9af5e..7a18f613cf 100755 --- a/src/pudl/ferc_to_sqlite/cli.py +++ b/src/pudl/ferc_to_sqlite/cli.py @@ -52,12 +52,6 @@ def parse_command_line(argv): fail.""", default=False, ) - parser.add_argument( - "--sandbox", - action="store_true", - default=False, - help="Use the Zenodo sandbox rather than production", - ) parser.add_argument( "-b", "--batch-size", @@ -155,7 +149,6 @@ def main(): # noqa: C901 }, "datastore": { "config": { - "sandbox": args.sandbox, "gcs_cache_path": args.gcs_cache_path if args.gcs_cache_path else "", diff --git a/src/pudl/metadata/classes.py b/src/pudl/metadata/classes.py index 4404065763..43467d72ec 100644 --- a/src/pudl/metadata/classes.py +++ b/src/pudl/metadata/classes.py @@ -949,7 +949,6 @@ def get_temporal_coverage(self, partitions: dict = None) -> str: def add_datastore_metadata(self) -> None: """Get source file metadata from the datastore.""" dp_desc = Datastore( - sandbox=False, local_cache_path=PudlPaths().data_dir, gcs_cache_path="gs://zenodo-cache.catalyst.coop", ).get_datapackage_descriptor(self.name) diff --git a/src/pudl/resources.py b/src/pudl/resources.py index 476e84fa54..13d2a50471 100644 --- a/src/pudl/resources.py +++ b/src/pudl/resources.py @@ -39,18 +39,12 @@ def ferc_to_sqlite_settings(init_context) -> FercToSqliteSettings: description="If enabled, the local file cache for datastore will be used.", default_value=True, ), - "sandbox": Field( - bool, - description="Use the Zenodo sandbox rather than production", - default_value=False, - ), }, ) def datastore(init_context) -> Datastore: """Dagster resource to interact with Zenodo archives.""" ds_kwargs = {} ds_kwargs["gcs_cache_path"] = init_context.resource_config["gcs_cache_path"] - ds_kwargs["sandbox"] = init_context.resource_config["sandbox"] if init_context.resource_config["use_local_cache"]: # TODO(rousik): we could also just use PudlPaths().input_dir here, because diff --git a/src/pudl/workspace/datastore.py b/src/pudl/workspace/datastore.py index d0a5c2191d..78519aeffb 100644 --- a/src/pudl/workspace/datastore.py +++ b/src/pudl/workspace/datastore.py @@ -44,9 +44,9 @@ def __init__(self, datapackage_json: dict, dataset: str, doi: str): """Constructs DatapackageDescriptor. Args: - datapackage_json (dict): parsed datapackage.json describing this datapackage. - dataset (str): name of the dataset. - doi (str): DOI (aka version) of the dataset. + datapackage_json: parsed datapackage.json describing this datapackage. + dataset: The name (an identifying string) of the dataset. + doi: A versioned Digital Object Identifier for the dataset. """ self.datapackage_json = datapackage_json self.dataset = dataset @@ -167,54 +167,46 @@ class ZenodoFetcher: } DOI = { - "sandbox": { - "censusdp1tract": "10.5072/zenodo.674992", - "eia860": "10.5072/zenodo.1222854", - "eia860m": "10.5072/zenodo.1225517", - "eia861": "10.5072/zenodo.1229930", - "eia923": "10.5072/zenodo.1217724", - "eia_bulk_elec": "10.5072/zenodo.1103572", - "epacamd_eia": "10.5072/zenodo.1199170", - "epacems": "10.5072/zenodo.672963", - "ferc1": "10.5072/zenodo.1070868", - "ferc2": "10.5072/zenodo.1188447", - "ferc6": "10.5072/zenodo.1098088", - "ferc60": "10.5072/zenodo.1098089", - "ferc714": "10.5072/zenodo.1098302", - }, - "production": { - "censusdp1tract": "10.5281/zenodo.4127049", - "eia860": "10.5281/zenodo.8164776", - "eia860m": "10.5281/zenodo.8188017", - "eia861": "10.5281/zenodo.8231268", - "eia923": "10.5281/zenodo.8172818", - "eia_bulk_elec": "10.5281/zenodo.7067367", - "epacamd_eia": "10.5281/zenodo.7900974", - "epacems": "10.5281/zenodo.6910058", - "ferc1": "10.5281/zenodo.7314437", - "ferc2": "10.5281/zenodo.8006881", - "ferc6": "10.5281/zenodo.7130141", - "ferc60": "10.5281/zenodo.7130146", - "ferc714": "10.5281/zenodo.7139875", - }, + # Sandbox DOIs are provided for reference + "censusdp1tract": "10.5281/zenodo.4127049", + # "censusdp1tract": "10.5072/zenodo.674992", + "eia860": "10.5281/zenodo.8164776", + # "eia860": "10.5072/zenodo.1222854", + "eia860m": "10.5281/zenodo.8188017", + # "eia860m": "10.5072/zenodo.1225517", + "eia861": "10.5281/zenodo.8231268", + # "eia861": "10.5072/zenodo.1229930", + "eia923": "10.5281/zenodo.8172818", + # "eia923": "10.5072/zenodo.1217724", + "eia_bulk_elec": "10.5281/zenodo.7067367", + # "eia_bulk_elec": "10.5072/zenodo.1103572", + "epacamd_eia": "10.5281/zenodo.7900974", + # "epacamd_eia": "10.5072/zenodo.1199170", + "epacems": "10.5281/zenodo.6910058", + # "epacems": "10.5072/zenodo.672963", + "ferc1": "10.5281/zenodo.7314437", + # "ferc1": "10.5072/zenodo.1070868", + "ferc2": "10.5281/zenodo.8006881", + # "ferc2": "10.5072/zenodo.1188447", + "ferc6": "10.5281/zenodo.7130141", + # "ferc6": "10.5072/zenodo.1098088", + "ferc60": "10.5281/zenodo.7130146", + # "ferc60": "10.5072/zenodo.1098089", + "ferc714": "10.5281/zenodo.7139875", + # "ferc714": "10.5072/zenodo.1098302", } API_ROOT = { "sandbox": "https://sandbox.zenodo.org/api", "production": "https://zenodo.org/api", } - def __init__(self, sandbox: bool = False, timeout: float = 15.0): + def __init__(self, timeout: float = 15.0): """Constructs ZenodoFetcher instance. Args: - sandbox (bool): controls whether production or sandbox zenodo backends - and associated DOIs should be used. timeout (float): timeout (in seconds) for http requests. """ - backend = "sandbox" if sandbox else "production" - self._api_root = self.API_ROOT[backend] - self._token = self.TOKEN[backend] - self._dataset_to_doi = self.DOI[backend] + self._dataset_to_doi = self.DOI self._descriptor_cache: dict[str, DatapackageDescriptor] = {} self.timeout = timeout @@ -229,8 +221,12 @@ def __init__(self, sandbox: bool = False, timeout: float = 15.0): def _fetch_from_url(self, url: str) -> requests.Response: logger.info(f"Retrieving {url} from zenodo") + if "sandbox" in url: + token = self.TOKEN["sandbox"] + else: + token = self.TOKEN["production"] response = self.http.get( - url, params={"access_token": self._token}, timeout=self.timeout + url, params={"access_token": token}, timeout=self.timeout ) if response.status_code == requests.codes.ok: logger.debug(f"Successfully downloaded {url}") @@ -240,16 +236,24 @@ def _fetch_from_url(self, url: str) -> requests.Response: def _doi_to_url(self, doi: str) -> str: """Returns url that holds the datapackage for given doi.""" - match = re.search(r"zenodo.([\d]+)", doi) - if match is None: - raise ValueError(f"Invalid doi {doi}") + match = re.search(r"(10\.5072|10\.5281)/zenodo.([\d]+)", doi) - zen_id = int(match.groups()[0]) - return f"{self._api_root}/deposit/depositions/{zen_id}" + if match is None: + raise ValueError(f"Invalid Zenodo DOI: {doi}") + + doi_prefix = match.groups()[0] + zenodo_id = match.groups()[1] + if doi_prefix == "10.5072": + api_root = self.API_ROOT["sandbox"] + elif doi_prefix == "10.5281": + api_root = self.API_ROOT["production"] + else: + raise ValueError(f"Invalid Zenodo DOI: {doi}") + return f"{api_root}/deposit/depositions/{zenodo_id}" def get_descriptor(self, dataset: str) -> DatapackageDescriptor: - """Returns DatapackageDescriptor for given dataset.""" - doi = self._dataset_to_doi.get(dataset) + """Returns class:`DatapackageDescriptor` for given dataset.""" + doi = self._dataset_to_doi.get(dataset, False) if not doi: raise KeyError(f"No doi found for dataset {dataset}") if doi not in self._descriptor_cache: @@ -295,22 +299,18 @@ def __init__( self, local_cache_path: Path | None = None, gcs_cache_path: str | None = None, - sandbox: bool = False, - timeout: float = 15, + timeout: float = 15.0, ): # TODO(rousik): figure out an efficient way to configure datastore caching """Datastore manages file retrieval for PUDL datasets. Args: - local_cache_path (Path): if provided, LocalFileCache pointed at the data + local_cache_path: if provided, LocalFileCache pointed at the data subdirectory of this path will be used with this Datastore. - gcs_cache_path (str): if provided, GoogleCloudStorageCache will be used + gcs_cache_path: if provided, GoogleCloudStorageCache will be used to retrieve data files. The path is expected to have the following format: gs://bucket[/path_prefix] - sandbox (bool): if True, use sandbox zenodo backend when retrieving files, - otherwise use production. This affects which zenodo servers are contacted - as well as dois used for each dataset. - timeout (floaTR): connection timeouts (in seconds) to use when connecting + timeout: connection timeouts (in seconds) to use when connecting to Zenodo servers. """ self._cache = resource_cache.LayeredCache() @@ -332,7 +332,7 @@ def __init__( ) pass - self._zenodo_fetcher = ZenodoFetcher(sandbox=sandbox, timeout=timeout) + self._zenodo_fetcher = ZenodoFetcher(timeout=timeout) def get_known_datasets(self) -> list[str]: """Returns list of supported datasets.""" @@ -442,17 +442,11 @@ def __call__(self, parser, namespace, values, option_string=None): def parse_command_line(): """Collect the command line arguments.""" - prod_dois = "\n".join( - [f" - {x}" for x in ZenodoFetcher.DOI["production"].keys()] - ) - sand_dois = "\n".join([f" - {x}" for x in ZenodoFetcher.DOI["sandbox"].keys()]) + dois = "\n".join([f" - {x}" for x in ZenodoFetcher.DOI]) dataset_msg = f""" Available Production Datasets: -{prod_dois} - -Available Sandbox Datasets: -{sand_dois}""" +{dois}""" parser = argparse.ArgumentParser( description="Download and cache ETL source data from Zenodo.", @@ -463,12 +457,12 @@ def parse_command_line(): parser.add_argument( "--dataset", help="Download the specified dataset only. See below for available options. " - "The default is to download all, which may take an hour or more." - "speed.", + "The default is to download all datasets, which may take hours depending on " + "network speed.", ) parser.add_argument( "--pudl_in", - help="Override pudl_in directory, defaults to setting in ~/.pudl.yml", + help="Input directory to use, overridng the $PUDL_INPUT environment variable.", ) parser.add_argument( "--validate", @@ -476,12 +470,6 @@ def parse_command_line(): action="store_true", default=False, ) - parser.add_argument( - "--sandbox", - help="Download data from Zenodo sandbox server. For testing purposes only.", - action="store_true", - default=False, - ) parser.add_argument( "--loglevel", help="Set logging level (DEBUG, INFO, WARNING, ERROR, or CRITICAL).", @@ -602,7 +590,6 @@ def main(): dstore = Datastore( gcs_cache_path=args.gcs_cache_path, - sandbox=args.sandbox, local_cache_path=cache_path, ) diff --git a/test/conftest.py b/test/conftest.py index 01c29705a4..3e93a98489 100644 --- a/test/conftest.py +++ b/test/conftest.py @@ -67,12 +67,6 @@ def pytest_addoption(parser): default=False, help="If enabled, the local file cache for datastore will not be used.", ) - parser.addoption( - "--sandbox", - action="store_true", - default=False, - help="Use raw inputs from the Zenodo sandbox server.", - ) parser.addoption( "--save-unmapped-ids", action="store_true", @@ -416,7 +410,6 @@ def pudl_datastore_config(request): return { "gcs_cache_path": gcs_cache_path if gcs_cache_path else "", "use_local_cache": not request.config.getoption("--bypass-local-cache"), - "sandbox": request.config.getoption("--sandbox"), } diff --git a/test/integration/zenodo_datapackage_test.py b/test/integration/zenodo_datapackage_test.py index 9cbb7d6ba2..a0a02593f7 100644 --- a/test/integration/zenodo_datapackage_test.py +++ b/test/integration/zenodo_datapackage_test.py @@ -8,24 +8,19 @@ class TestZenodoDatapackages: - """Ensure production & sandbox Datastores point to valid datapackages.""" + """Ensure all DOIs in Datastore point to valid datapackages.""" @pytest.mark.xfail( - raises=(MaxRetryError, ConnectionError, RetryError, ResponseError) - ) - def test_sandbox_datapackages(self): - """All datasets point to valid descriptors with 1 or more resources.""" - ds = Datastore(sandbox=True) - for dataset in ds.get_known_datasets(): - desc = ds.get_datapackage_descriptor(dataset) - assert list(desc.get_resources()) - - @pytest.mark.xfail( - raises=(MaxRetryError, ConnectionError, RetryError, ResponseError) + raises=( + MaxRetryError, + ConnectionError, + RetryError, + ResponseError, + ) ) def test_prod_datapackages(self): """All datasets point to valid descriptors with 1 or more resources.""" - ds = Datastore(sandbox=False) + ds = Datastore() for dataset in ds.get_known_datasets(): desc = ds.get_datapackage_descriptor(dataset) assert list(desc.get_resources()) diff --git a/test/unit/workspace/datastore_test.py b/test/unit/workspace/datastore_test.py index 38672b44de..323f21003e 100644 --- a/test/unit/workspace/datastore_test.py +++ b/test/unit/workspace/datastore_test.py @@ -235,35 +235,30 @@ def setUp(self): } ) - def test_sandbox_doi_format_is_correct(self): - """Verifies that sandbox ZenodoFetcher DOIs have the right format.""" - ds = datastore.ZenodoFetcher(sandbox=True) - self.assertTrue(ds.get_known_datasets()) - for dataset in ds.get_known_datasets(): - print(f"doi for {dataset} is {ds.get_doi(dataset)}") - self.assertTrue( - re.fullmatch( - r"10\.5072/zenodo\.[0-9]{5,10}", ds.get_doi(dataset) - ), # noqa: FS003 - msg=f"doi for {dataset} is {ds.get_doi(dataset)}", - ) + def test_doi_format_is_correct(self): + """Verifies ZenodoFetcher DOIs have correct format and are not sandbox DOIs. - def test_prod_doi_format_is_correct(self): - """Verifies that production ZenodoFetcher DOIs have the right format.""" - ds = datastore.ZenodoFetcher(sandbox=False) + Sandbox DOIs are only meant for use in testing and development, and should not + be checked in, thus this test will fail if a sandbox DOI with prefix 10.5072 is + identified. + """ + ds = datastore.ZenodoFetcher() self.assertTrue(ds.get_known_datasets()) for dataset in ds.get_known_datasets(): + doi = ds.get_doi(dataset) + self.assertFalse( + re.fullmatch(r"10\.5072/zenodo\.[0-9]{5,10}", doi), + msg=f"Zenodo sandbox DOI found for {dataset}: {doi}", + ) self.assertTrue( - re.fullmatch( - r"10\.5281/zenodo\.[0-9]{5,10}", ds.get_doi(dataset) - ), # noqa: FS003 - msg=f"doi for {dataset} is {ds.get_doi(dataset)}", + re.fullmatch(r"10\.5281/zenodo\.[0-9]{5,10}", doi), + msg=f"Zenodo production DOI for {dataset} is {doi}", ) def test_get_known_datasets(self): """Call to get_known_datasets() produces the expected results.""" self.assertEqual( - sorted(datastore.ZenodoFetcher.DOI["production"]), + sorted(datastore.ZenodoFetcher.DOI), self.fetcher.get_known_datasets(), ) From fe8225ce0096503bcc61ebfe701bb5501a1c7beb Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 21 Aug 2023 07:11:29 +0000 Subject: [PATCH 35/51] Update dask requirement from <2023.8.1,>=2021.8 to >=2021.8,<2023.8.2 Updates the requirements on [dask](https://github.com/dask/dask) to permit the latest version. - [Changelog](https://github.com/dask/dask/blob/main/docs/release-procedure.md) - [Commits](https://github.com/dask/dask/compare/2021.08.0...2023.8.1) --- updated-dependencies: - dependency-name: dask dependency-type: direct:production ... Signed-off-by: dependabot[bot] --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 0740f55d4b..790a5c4660 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -24,7 +24,7 @@ dependencies = [ "coloredlogs>=14.0,<15.1", # Dagster requires 14.0 "dagster-webserver>=1.4,<1.5", # 1.2.2 is first version to support Python 3.11 "dagster>=1.4,<1.5", # 1.2.2 is first version to support Python 3.11 - "dask>=2021.8,<2023.8.1", + "dask>=2021.8,<2023.8.2", "datapackage>=1.11,<1.16", # Transition datastore to use frictionless. "fsspec>=2021.7,<2023.6.1", # For caching datastore on GCS "gcsfs>=2021.7,<2023.6.1", # For caching datastore on GCS From 5e51d5ef3fe935e19b4063dc7b9d5dda65a67f96 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 21 Aug 2023 07:11:54 +0000 Subject: [PATCH 36/51] Update tox requirement from <4.7,>=4 to >=4,<4.10 Updates the requirements on [tox](https://github.com/tox-dev/tox) to permit the latest version. - [Release notes](https://github.com/tox-dev/tox/releases) - [Changelog](https://github.com/tox-dev/tox/blob/main/docs/changelog.rst) - [Commits](https://github.com/tox-dev/tox/compare/4.0.0...4.9.0) --- updated-dependencies: - dependency-name: tox dependency-type: direct:production ... Signed-off-by: dependabot[bot] --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 0740f55d4b..36f67a526e 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -153,7 +153,7 @@ test = [ "pytest>=6.2,<7.5", "responses>=0.14,<0.24", "rstcheck[sphinx]>=5.0,<6.2", - "tox>=4.0,<4.9", + "tox>=4.0,<4.10", ] datasette = [ "datasette>=0.60,<0.65", From 1a8685481f92c8589a4b3060d942fd9d6ffe5455 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 21 Aug 2023 07:12:33 +0000 Subject: [PATCH 37/51] Update sphinx requirement from !=5.1.0,<7.2,>=4 to >=4,!=5.1.0,<7.3 Updates the requirements on [sphinx](https://github.com/sphinx-doc/sphinx) to permit the latest version. - [Release notes](https://github.com/sphinx-doc/sphinx/releases) - [Changelog](https://github.com/sphinx-doc/sphinx/blob/master/CHANGES) - [Commits](https://github.com/sphinx-doc/sphinx/compare/v4.0.0...v7.2.2) --- updated-dependencies: - dependency-name: sphinx dependency-type: direct:production ... Signed-off-by: dependabot[bot] --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 0740f55d4b..2cafd848a1 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -128,7 +128,7 @@ doc = [ "sphinx-autoapi>=1.8,<2.2", "sphinx-issues>=1.2,<3.1", "sphinx-reredirects", - "sphinx>=4,!=5.1.0,<7.2", + "sphinx>=4,!=5.1.0,<7.3", "sphinxcontrib_bibtex>=2.4,<2.6", ] test = [ From 0c3c0502bbe4f05e1b0598c39936634e3cb75ce4 Mon Sep 17 00:00:00 2001 From: Zane Selvans Date: Mon, 21 Aug 2023 10:27:26 -0600 Subject: [PATCH 38/51] Integrate some Pydantic validation into ZenodoFetcher --- src/pudl/workspace/datastore.py | 123 ++++++++++++++------------ test/unit/workspace/datastore_test.py | 5 +- 2 files changed, 68 insertions(+), 60 deletions(-) diff --git a/src/pudl/workspace/datastore.py b/src/pudl/workspace/datastore.py index 78519aeffb..0c0895328a 100644 --- a/src/pudl/workspace/datastore.py +++ b/src/pudl/workspace/datastore.py @@ -9,11 +9,12 @@ from collections import defaultdict from collections.abc import Iterator from pathlib import Path -from typing import Any +from typing import Any, Self import datapackage import requests from google.auth.exceptions import DefaultCredentialsError +from pydantic import BaseModel, HttpUrl, confloat, constr from requests.adapters import HTTPAdapter from requests.packages.urllib3.util.retry import Retry @@ -29,6 +30,7 @@ # long as we stick to read-only keys. PUDL_YML = Path.home() / ".pudl.yml" +ZenodoDOI = constr(regex=r"(10\.5072|10\.5281)/zenodo.([\d]+)") class ChecksumMismatch(ValueError): @@ -154,19 +156,13 @@ def get_json_string(self) -> str: return json.dumps(self.datapackage_json, sort_keys=True, indent=4) -class ZenodoFetcher: +class ZenodoFetcher(BaseModel): """API for fetching datapackage descriptors and resource contents from zenodo.""" - # Zenodo tokens recorded here should have read-only access to our archives. - # Including them here is correct in order to allow public use of this tool, so - # long as we stick to read-only keys. - TOKEN = { - # Read-only personal access tokens for pudl@catalyst.coop: - "sandbox": "qyPC29wGPaflUUVAv1oGw99ytwBqwEEdwi4NuUrpwc3xUcEwbmuB4emwysco", - "production": "KXcG5s9TqeuPh1Ukt5QYbzhCElp9LxuqAuiwdqHP0WS4qGIQiydHn6FBtdJ5", - } - - DOI = { + _descriptor_cache: dict[str, DatapackageDescriptor] = {} + http: requests.Session = requests.Session() + timeout: confloat(gt=0.0, allow_inf_nan=False) = 15.0 + zenodo_dois: dict[str, ZenodoDOI] = { # Sandbox DOIs are provided for reference "censusdp1tract": "10.5281/zenodo.4127049", # "censusdp1tract": "10.5072/zenodo.674992", @@ -195,47 +191,45 @@ class ZenodoFetcher: "ferc714": "10.5281/zenodo.7139875", # "ferc714": "10.5072/zenodo.1098302", } - API_ROOT = { - "sandbox": "https://sandbox.zenodo.org/api", - "production": "https://zenodo.org/api", - } - def __init__(self, timeout: float = 15.0): - """Constructs ZenodoFetcher instance. + class Config: + """Allow arbitrary types -- required for requests.Session.""" - Args: - timeout (float): timeout (in seconds) for http requests. - """ - self._dataset_to_doi = self.DOI - self._descriptor_cache: dict[str, DatapackageDescriptor] = {} + arbitrary_types_allowed = True + + def __init__(self: Self, **data): + """Constructs ZenodoFetcher instance.""" + super().__init__(**data) - self.timeout = timeout retries = Retry( backoff_factor=2, total=3, status_forcelist=[429, 500, 502, 503, 504] ) adapter = HTTPAdapter(max_retries=retries) - self.http = requests.Session() self.http.mount("http://", adapter) self.http.mount("https://", adapter) + for dataset in self.zenodo_dois: + try: + ZenodoDOI.validate(self.zenodo_dois[dataset]) + except Exception: + raise ValueError( + f"Invalid Zenodo DOI for {dataset}: {self.zenodo_dois[dataset]}" + ) - def _fetch_from_url(self, url: str) -> requests.Response: - logger.info(f"Retrieving {url} from zenodo") + def _get_token(self: Self, url: HttpUrl) -> str: + """Return the appropriate read-only Zenodo personal access token. + + These tokens are associated with the pudl@catalyst.coop Zenodo account, which + owns all of the Catalyst raw data archives. + """ if "sandbox" in url: - token = self.TOKEN["sandbox"] - else: - token = self.TOKEN["production"] - response = self.http.get( - url, params={"access_token": token}, timeout=self.timeout - ) - if response.status_code == requests.codes.ok: - logger.debug(f"Successfully downloaded {url}") - return response + token = "qyPC29wGPaflUUVAv1oGw99ytwBqwEEdwi4NuUrpwc3xUcEwbmuB4emwysco" # nosec: B105 else: - raise ValueError(f"Could not download {url}: {response.text}") + token = "KXcG5s9TqeuPh1Ukt5QYbzhCElp9LxuqAuiwdqHP0WS4qGIQiydHn6FBtdJ5" # nosec: B105 + return token - def _doi_to_url(self, doi: str) -> str: - """Returns url that holds the datapackage for given doi.""" + def _get_url(self: Self, doi: ZenodoDOI) -> HttpUrl: + """Construct a Zenodo depsition URL based on its Zenodo DOI.""" match = re.search(r"(10\.5072|10\.5281)/zenodo.([\d]+)", doi) if match is None: @@ -244,20 +238,29 @@ def _doi_to_url(self, doi: str) -> str: doi_prefix = match.groups()[0] zenodo_id = match.groups()[1] if doi_prefix == "10.5072": - api_root = self.API_ROOT["sandbox"] + api_root = "https://sandbox.zenodo.org/api" elif doi_prefix == "10.5281": - api_root = self.API_ROOT["production"] + api_root = "https://zenodo.org/api" else: raise ValueError(f"Invalid Zenodo DOI: {doi}") return f"{api_root}/deposit/depositions/{zenodo_id}" - def get_descriptor(self, dataset: str) -> DatapackageDescriptor: + def _fetch_from_url(self: Self, url: HttpUrl) -> requests.Response: + logger.info(f"Retrieving {url} from zenodo") + response = self.http.get( + url, params={"access_token": self._get_token(url)}, timeout=self.timeout + ) + if response.status_code == requests.codes.ok: + logger.debug(f"Successfully downloaded {url}") + return response + else: + raise ValueError(f"Could not download {url}: {response.text}") + + def get_descriptor(self: Self, dataset: str) -> DatapackageDescriptor: """Returns class:`DatapackageDescriptor` for given dataset.""" - doi = self._dataset_to_doi.get(dataset, False) - if not doi: - raise KeyError(f"No doi found for dataset {dataset}") + doi = self.get_doi(dataset) if doi not in self._descriptor_cache: - dpkg = self._fetch_from_url(self._doi_to_url(doi)) + dpkg = self._fetch_from_url(self._get_url(doi)) for f in dpkg.json()["files"]: if f["filename"] == "datapackage.json": resp = self._fetch_from_url(f["links"]["download"]) @@ -271,15 +274,19 @@ def get_descriptor(self, dataset: str) -> DatapackageDescriptor: ) return self._descriptor_cache[doi] - def get_resource_key(self, dataset: str, name: str) -> PudlResourceKey: - """Returns PudlResourceKey for given resource.""" - return PudlResourceKey(dataset, self._dataset_to_doi[dataset], name) + def get_resource_key(self: Self, dataset: str, name: str) -> PudlResourceKey: + """Returns :class:`PudlResourceKey` for given resource.""" + return PudlResourceKey(dataset, self.get_doi(dataset), name) - def get_doi(self, dataset: str) -> str: + def get_doi(self: Self, dataset: str) -> ZenodoDOI: """Returns DOI for given dataset.""" - return self._dataset_to_doi[dataset] + try: + doi = self.zenodo_dois[dataset] + except KeyError: + raise KeyError(f"No Zenodo DOI found for datast {dataset}.") + return doi - def get_resource(self, res: PudlResourceKey) -> bytes: + def get_resource(self: Self, res: PudlResourceKey) -> bytes: """Given resource key, retrieve contents of the file from zenodo.""" desc = self.get_descriptor(res.dataset) url = desc.get_resource_path(res.name) @@ -287,9 +294,9 @@ def get_resource(self, res: PudlResourceKey) -> bytes: desc.validate_checksum(res.name, content) return content - def get_known_datasets(self) -> list[str]: + def get_known_datasets(self: Self) -> list[str]: """Returns list of supported datasets.""" - return sorted(self._dataset_to_doi) + return sorted(self.zenodo_dois) class Datastore: @@ -442,11 +449,13 @@ def __call__(self, parser, namespace, values, option_string=None): def parse_command_line(): """Collect the command line arguments.""" - dois = "\n".join([f" - {x}" for x in ZenodoFetcher.DOI]) + known_datasets = "\n".join( + [f" - {x}" for x in ZenodoFetcher().get_known_datasets()] + ) dataset_msg = f""" -Available Production Datasets: -{dois}""" +Available Datasets: +{known_datasets}""" parser = argparse.ArgumentParser( description="Download and cache ETL source data from Zenodo.", diff --git a/test/unit/workspace/datastore_test.py b/test/unit/workspace/datastore_test.py index 323f21003e..7c530584df 100644 --- a/test/unit/workspace/datastore_test.py +++ b/test/unit/workspace/datastore_test.py @@ -189,8 +189,7 @@ def __init__( self, descriptors: dict[str, datastore.DatapackageDescriptor], **kwargs ): """Construct a test-friendly ZenodoFetcher with descriptors pre-loaded.""" - super().__init__(**kwargs) - self._descriptor_cache = dict(descriptors) + super().__init__(**kwargs, _descriptor_cache=descriptors) class TestZenodoFetcher(unittest.TestCase): @@ -258,7 +257,7 @@ def test_doi_format_is_correct(self): def test_get_known_datasets(self): """Call to get_known_datasets() produces the expected results.""" self.assertEqual( - sorted(datastore.ZenodoFetcher.DOI), + sorted(datastore.ZenodoFetcher().zenodo_dois), self.fetcher.get_known_datasets(), ) From c4b13fc86aed7a36d5c4796b472d559a22b1bc76 Mon Sep 17 00:00:00 2001 From: Zane Selvans Date: Mon, 21 Aug 2023 13:51:19 -0600 Subject: [PATCH 39/51] Update allowed tox versions. --- pyproject.toml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 79a5ca3b18..caf97cac01 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -119,7 +119,7 @@ dev = [ "isort>=5.0,<5.13", "jedi>=0.18,<0.20", "lxml>=4.6,<4.10", - "tox>=4,<4.7", + "tox>=4,<4.11", "twine>=3.3,<4.1", ] doc = [ @@ -153,7 +153,7 @@ test = [ "pytest>=6.2,<7.5", "responses>=0.14,<0.24", "rstcheck[sphinx]>=5.0,<6.2", - "tox>=4.0,<4.10", + "tox>=4.0,<4.11", ] datasette = [ "datasette>=0.60,<0.65", From f894ed6d912a429836e7839c07348ca60f1cfb6c Mon Sep 17 00:00:00 2001 From: Zane Selvans Date: Mon, 21 Aug 2023 16:12:23 -0600 Subject: [PATCH 40/51] Make sure that epacems caplog actually has records before looping. --- src/pudl/extract/epacems.py | 4 ++-- test/integration/epacems_test.py | 19 ++++++++++++------- tox.ini | 2 +- 3 files changed, 15 insertions(+), 10 deletions(-) diff --git a/src/pudl/extract/epacems.py b/src/pudl/extract/epacems.py index aa2d9894e8..c656eb8aeb 100644 --- a/src/pudl/extract/epacems.py +++ b/src/pudl/extract/epacems.py @@ -25,7 +25,7 @@ import pandas as pd import pudl.logging_helpers -from pudl.metadata.classes import Package +from pudl.metadata.classes import Resource from pudl.workspace.datastore import Datastore logger = pudl.logging_helpers.get_logger(__name__) @@ -187,6 +187,6 @@ def extract(year: int, state: str, ds: Datastore): logger.warning( f"No data found for {state} in {year}. Returning empty dataframe." ) - res = Package.from_resource_ids().get_resource("hourly_emissions_epacems") + res = Resource.from_id("hourly_emissions_epacems") df = res.format_df(pd.DataFrame()) return df diff --git a/test/integration/epacems_test.py b/test/integration/epacems_test.py index cb4f362db9..26a7b8d16c 100644 --- a/test/integration/epacems_test.py +++ b/test/integration/epacems_test.py @@ -1,11 +1,13 @@ """Tests for pudl/output/epacems.py loading functions.""" +import logging + import dask.dataframe as dd import pytest from dagster import build_init_resource_context from pudl.extract.epacems import extract from pudl.io_managers import epacems_io_manager -from pudl.metadata.classes import Package +from pudl.metadata.classes import Resource from pudl.output.epacems import epacems, year_state_filter @@ -53,12 +55,15 @@ def test_epacems_missing_partition(caplog, pudl_datastore_fixture): Note that this should pass for both the Fast and Full ETL because the behavior towards a missing file is identical.""" df = extract(year=1996, state="UT", ds=pudl_datastore_fixture) - for record in caplog.records: - assert record.levelname == "WARNING" - assert ( - record.message == "No data found for UT in 1996. Returning empty dataframe." - ) - epacems_res = Package.from_resource_ids().get_resource("hourly_emissions_epacems") + with caplog.at_level(logging.DEBUG): + assert len(caplog.records) == 1 + for record in caplog.records: + assert record.levelname == "WARNING" + assert ( + record.message + == "No data found for UT in 1996. Returning empty dataframe." + ) + epacems_res = Resource.from_id("hourly_emissions_epacems") expected_cols = list(epacems_res.get_field_names()) assert df.shape[0] == 0 # Check that no rows of data are there # Check that all columns expected of EPACEMS data are present. diff --git a/tox.ini b/tox.ini index 8112828de2..5d4b75ada0 100644 --- a/tox.ini +++ b/tox.ini @@ -295,7 +295,7 @@ addopts = --verbose --pdbcls=IPython.terminal.debugger:TerminalPdb log_format = %(asctime)s [%(levelname)8s] %(name)s:%(lineno)s %(message)s log_date_format= %Y-%m-%d %H:%M:%S log_cli = true -log_cli_level = debug +log_cli_level = DEBUG doctest_optionflags = NORMALIZE_WHITESPACE IGNORE_EXCEPTION_DETAIL ELLIPSIS filterwarnings = ignore:distutils Version classes are deprecated:DeprecationWarning From 63f63d50d5bf48f8154c17c5d1225a2f8aecd8dc Mon Sep 17 00:00:00 2001 From: Zane Selvans Date: Mon, 21 Aug 2023 18:34:42 -0600 Subject: [PATCH 41/51] Remove caplog test. --- test/integration/epacems_test.py | 12 +----------- 1 file changed, 1 insertion(+), 11 deletions(-) diff --git a/test/integration/epacems_test.py b/test/integration/epacems_test.py index 26a7b8d16c..09d06ae1ae 100644 --- a/test/integration/epacems_test.py +++ b/test/integration/epacems_test.py @@ -1,6 +1,4 @@ """Tests for pudl/output/epacems.py loading functions.""" -import logging - import dask.dataframe as dd import pytest from dagster import build_init_resource_context @@ -49,20 +47,12 @@ def test_epacems_subset(epacems_year_and_state, epacems_parquet_path): assert actual.shape[0].compute() > 0 # nosec: B101 n rows -def test_epacems_missing_partition(caplog, pudl_datastore_fixture): +def test_epacems_missing_partition(pudl_datastore_fixture): """Check that missing partitions return an empty data frame. Note that this should pass for both the Fast and Full ETL because the behavior towards a missing file is identical.""" df = extract(year=1996, state="UT", ds=pudl_datastore_fixture) - with caplog.at_level(logging.DEBUG): - assert len(caplog.records) == 1 - for record in caplog.records: - assert record.levelname == "WARNING" - assert ( - record.message - == "No data found for UT in 1996. Returning empty dataframe." - ) epacems_res = Resource.from_id("hourly_emissions_epacems") expected_cols = list(epacems_res.get_field_names()) assert df.shape[0] == 0 # Check that no rows of data are there From 77aa2f451a6eab107292a200c64694cc4261c3d5 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Tue, 22 Aug 2023 04:23:04 +0000 Subject: [PATCH 42/51] [pre-commit.ci] pre-commit autoupdate MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit updates: - [github.com/pre-commit/mirrors-prettier: v3.0.1 → v3.0.2](https://github.com/pre-commit/mirrors-prettier/compare/v3.0.1...v3.0.2) --- .pre-commit-config.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 447b95a8dd..047ca0c5cb 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -52,7 +52,7 @@ repos: - id: rm-unneeded-f-str - repo: https://github.com/pre-commit/mirrors-prettier - rev: v3.0.1 + rev: v3.0.2 hooks: - id: prettier types_or: [yaml] From 4674bf37920568253faaad5982958d8f3c524ea6 Mon Sep 17 00:00:00 2001 From: Zane Selvans Date: Tue, 22 Aug 2023 09:13:55 -0600 Subject: [PATCH 43/51] Update docs to reflect availability of 2022 EPA CEMS data. --- README.rst | 2 +- docs/data_access.rst | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/README.rst b/README.rst index f328e7a7f9..7215491ed7 100644 --- a/README.rst +++ b/README.rst @@ -68,7 +68,7 @@ PUDL currently integrates data from: * `EIA Form 860m `__: 2023-06 * `EIA Form 861 `__: 2001-2022 * `EIA Form 923 `__: 2001-2022 -* `EPA Continuous Emissions Monitoring System (CEMS) `__: 1995-2021 +* `EPA Continuous Emissions Monitoring System (CEMS) `__: 1995-2022 * `FERC Form 1 `__: 1994-2021 * `FERC Form 714 `__: 2006-2020 * `US Census Demographic Profile 1 Geodatabase `__: 2010 diff --git a/docs/data_access.rst b/docs/data_access.rst index 26b7c92545..92befc2829 100644 --- a/docs/data_access.rst +++ b/docs/data_access.rst @@ -83,7 +83,7 @@ AWS CLI, or programmatically via the S3 API. They can also be downloaded directl HTTPS using the following links: * `PUDL SQLite DB `__ -* `EPA CEMS Hourly Emissions Parquet (1995-2021) `__ +* `EPA CEMS Hourly Emissions Parquet (1995-2022) `__ * `Census DP1 SQLite DB (2010) `__ * Raw FERC Form 1: From a8675fb117ffa89b8be08257d2eb1f92fa603b4c Mon Sep 17 00:00:00 2001 From: Zane Selvans Date: Tue, 22 Aug 2023 22:46:37 -0600 Subject: [PATCH 44/51] Bump max allowed Tox version. --- pyproject.toml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 79a5ca3b18..caf97cac01 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -119,7 +119,7 @@ dev = [ "isort>=5.0,<5.13", "jedi>=0.18,<0.20", "lxml>=4.6,<4.10", - "tox>=4,<4.7", + "tox>=4,<4.11", "twine>=3.3,<4.1", ] doc = [ @@ -153,7 +153,7 @@ test = [ "pytest>=6.2,<7.5", "responses>=0.14,<0.24", "rstcheck[sphinx]>=5.0,<6.2", - "tox>=4.0,<4.10", + "tox>=4.0,<4.11", ] datasette = [ "datasette>=0.60,<0.65", From c62fc1c98b7e23055a63df3acb0c54b4ab8dc7c5 Mon Sep 17 00:00:00 2001 From: Zane Selvans Date: Fri, 25 Aug 2023 08:52:24 -0600 Subject: [PATCH 45/51] Stop importing urllib3 Retry from deprecated location --- src/pudl/workspace/datastore.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/pudl/workspace/datastore.py b/src/pudl/workspace/datastore.py index 224c3e9979..f3454cad57 100644 --- a/src/pudl/workspace/datastore.py +++ b/src/pudl/workspace/datastore.py @@ -15,7 +15,7 @@ import requests from google.auth.exceptions import DefaultCredentialsError from requests.adapters import HTTPAdapter -from requests.packages.urllib3.util.retry import Retry +from urllib3.util.retry import Retry import pudl from pudl.workspace import resource_cache From d840c1dd922eec5272445b1fc5a3c8e4c65f7a16 Mon Sep 17 00:00:00 2001 From: Zane Selvans Date: Fri, 25 Aug 2023 20:25:12 -0600 Subject: [PATCH 46/51] Create a ZenodoDoiSettings Pydantic BaseSettings class. --- src/pudl/workspace/datastore.py | 136 +++++++++++++------------- test/unit/workspace/datastore_test.py | 7 +- 2 files changed, 72 insertions(+), 71 deletions(-) diff --git a/src/pudl/workspace/datastore.py b/src/pudl/workspace/datastore.py index 062cc471f5..1ff7c7ce5b 100644 --- a/src/pudl/workspace/datastore.py +++ b/src/pudl/workspace/datastore.py @@ -14,7 +14,7 @@ import datapackage import requests from google.auth.exceptions import DefaultCredentialsError -from pydantic import BaseModel, HttpUrl, confloat, constr +from pydantic import BaseSettings, HttpUrl, constr from requests.adapters import HTTPAdapter from urllib3.util.retry import Retry @@ -25,12 +25,8 @@ logger = pudl.logging_helpers.get_logger(__name__) -# The Zenodo tokens recorded here should have read-only access to our archives. -# Including them here is correct in order to allow public use of this tool, so -# long as we stick to read-only keys. - PUDL_YML = Path.home() / ".pudl.yml" -ZenodoDOI = constr(regex=r"(10\.5072|10\.5281)/zenodo.([\d]+)") +ZenodoDoi = constr(regex=r"(10\.5072|10\.5281)/zenodo.([\d]+)") class ChecksumMismatch(ValueError): @@ -98,12 +94,12 @@ def _matches(self, res: dict, **filters: Any): ) def get_resources( - self, name: str = None, **filters: Any + self: Self, name: str = None, **filters: Any ) -> Iterator[PudlResourceKey]: """Returns series of PudlResourceKey identifiers for matching resources. Args: - name (str): if specified, find resource(s) with this name. + name: if specified, find resource(s) with this name. filters (dict): if specified, find resoure(s) matching these key=value constraints. The constraints are matched against the 'parts' field of the resource entry in the datapackage.json. @@ -156,65 +152,81 @@ def get_json_string(self) -> str: return json.dumps(self.datapackage_json, sort_keys=True, indent=4) -class ZenodoFetcher(BaseModel): - """API for fetching datapackage descriptors and resource contents from zenodo.""" - - _descriptor_cache: dict[str, DatapackageDescriptor] = {} - http: requests.Session = requests.Session() - timeout: confloat(gt=0.0, allow_inf_nan=False) = 15.0 - zenodo_dois: dict[str, ZenodoDOI] = { - # Sandbox DOIs are provided for reference - "censusdp1tract": "10.5281/zenodo.4127049", - # "censusdp1tract": "10.5072/zenodo.674992", - "eia860": "10.5281/zenodo.8164776", - # "eia860": "10.5072/zenodo.1222854", - "eia860m": "10.5281/zenodo.8188017", - # "eia860m": "10.5072/zenodo.1225517", - "eia861": "10.5281/zenodo.8231268", - # "eia861": "10.5072/zenodo.1229930", - "eia923": "10.5281/zenodo.8172818", - # "eia923": "10.5072/zenodo.1217724", - "eia_bulk_elec": "10.5281/zenodo.7067367", - # "eia_bulk_elec": "10.5072/zenodo.1103572", - "epacamd_eia": "10.5281/zenodo.7900974", - # "epacamd_eia": "10.5072/zenodo.1199170", - "epacems": "10.5281/zenodo.8235497", - # "epacems": "10.5072/zenodo.1228519", - "ferc1": "10.5281/zenodo.7314437", - # "ferc1": "10.5072/zenodo.1070868", - "ferc2": "10.5281/zenodo.8006881", - # "ferc2": "10.5072/zenodo.1188447", - "ferc6": "10.5281/zenodo.7130141", - # "ferc6": "10.5072/zenodo.1098088", - "ferc60": "10.5281/zenodo.7130146", - # "ferc60": "10.5072/zenodo.1098089", - "ferc714": "10.5281/zenodo.7139875", - # "ferc714": "10.5072/zenodo.1098302", - } +class ZenodoDoiSettings(BaseSettings): + """Digital Object Identifiers pointing to currently used Zenodo archives.""" + + # Sandbox DOIs are provided for reference + censusdp1tract: ZenodoDoi = "10.5281/zenodo.4127049" + # censusdp1tract: ZenodoDoi = "10.5072/zenodo.674992" + eia860: ZenodoDoi = "10.5281/zenodo.8164776" + # eia860: ZenodoDoi = "10.5072/zenodo.1222854" + eia860m: ZenodoDoi = "10.5281/zenodo.8188017" + # eia860m: ZenodoDoi = "10.5072/zenodo.1225517" + eia861: ZenodoDoi = "10.5281/zenodo.8231268" + # eia861: ZenodoDoi = "10.5072/zenodo.1229930" + eia923: ZenodoDoi = "10.5281/zenodo.8172818" + # eia923: ZenodoDoi = "10.5072/zenodo.1217724" + eia_bulk_elec: ZenodoDoi = "10.5281/zenodo.7067367" + # eia_bulk_elec: ZenodoDoi = "10.5072/zenodo.1103572" + epacamd_eia: ZenodoDoi = "10.5281/zenodo.7900974" + # epacamd_eia: ZenodoDoi = "10.5072/zenodo.1199170" + epacems: ZenodoDoi = "10.5281/zenodo.8235497" + # epacems": ZenodoDoi = "10.5072/zenodo.1228519" + ferc1: ZenodoDoi = "10.5281/zenodo.7314437" + # ferc1: ZenodoDoi = 10.5072/zenodo.1070868" + ferc2: ZenodoDoi = "10.5281/zenodo.8006881" + # ferc2: ZenodoDoi = "10.5072/zenodo.1188447" + ferc6: ZenodoDoi = "10.5281/zenodo.7130141" + # ferc6: ZenodoDoi = "10.5072/zenodo.1098088" + ferc60: ZenodoDoi = "10.5281/zenodo.7130146" + # ferc60: ZenodoDoi = "10.5072/zenodo.1098089" + ferc714: ZenodoDoi = "10.5281/zenodo.7139875" + # ferc714: ZenodoDoi = "10.5072/zenodo.1098302" class Config: - """Allow arbitrary types -- required for requests.Session.""" + """Pydantic config, reads from .env file.""" + + env_prefix = "pudl_zenodo_doi_" + env_file = ".env" + + +class ZenodoFetcher: + """API for fetching datapackage descriptors and resource contents from zenodo.""" - arbitrary_types_allowed = True + _descriptor_cache: dict[str, DatapackageDescriptor] + zenodo_dois: ZenodoDoiSettings + timeout: float + http: requests.Session - def __init__(self: Self, **data): + def __init__( + self: Self, zenodo_dois: ZenodoDoiSettings | None = None, timeout: float = 15.0 + ): """Constructs ZenodoFetcher instance.""" - super().__init__(**data) + if not zenodo_dois: + self.zenodo_dois = ZenodoDoiSettings() + + self.timeout = timeout retries = Retry( backoff_factor=2, total=3, status_forcelist=[429, 500, 502, 503, 504] ) adapter = HTTPAdapter(max_retries=retries) - + self.http = requests.Session() self.http.mount("http://", adapter) self.http.mount("https://", adapter) - for dataset in self.zenodo_dois: - try: - ZenodoDOI.validate(self.zenodo_dois[dataset]) - except Exception: - raise ValueError( - f"Invalid Zenodo DOI for {dataset}: {self.zenodo_dois[dataset]}" - ) + self._descriptor_cache = {} + + def get_doi(self: Self, dataset: str) -> ZenodoDoi: + """Returns DOI for given dataset.""" + try: + doi = self.zenodo_dois.__getattribute__(dataset) + except AttributeError: + raise AttributeError(f"No Zenodo DOI found for dataset {dataset}.") + return doi + + def get_known_datasets(self: Self) -> list[str]: + """Returns list of supported datasets.""" + return [name for name, doi in sorted(self.zenodo_dois)] def _get_token(self: Self, url: HttpUrl) -> str: """Return the appropriate read-only Zenodo personal access token. @@ -228,7 +240,7 @@ def _get_token(self: Self, url: HttpUrl) -> str: token = "KXcG5s9TqeuPh1Ukt5QYbzhCElp9LxuqAuiwdqHP0WS4qGIQiydHn6FBtdJ5" # nosec: B105 return token - def _get_url(self: Self, doi: ZenodoDOI) -> HttpUrl: + def _get_url(self: Self, doi: ZenodoDoi) -> HttpUrl: """Construct a Zenodo depsition URL based on its Zenodo DOI.""" match = re.search(r"(10\.5072|10\.5281)/zenodo.([\d]+)", doi) @@ -278,14 +290,6 @@ def get_resource_key(self: Self, dataset: str, name: str) -> PudlResourceKey: """Returns :class:`PudlResourceKey` for given resource.""" return PudlResourceKey(dataset, self.get_doi(dataset), name) - def get_doi(self: Self, dataset: str) -> ZenodoDOI: - """Returns DOI for given dataset.""" - try: - doi = self.zenodo_dois[dataset] - except KeyError: - raise KeyError(f"No Zenodo DOI found for datast {dataset}.") - return doi - def get_resource(self: Self, res: PudlResourceKey) -> bytes: """Given resource key, retrieve contents of the file from zenodo.""" desc = self.get_descriptor(res.dataset) @@ -294,10 +298,6 @@ def get_resource(self: Self, res: PudlResourceKey) -> bytes: desc.validate_checksum(res.name, content) return content - def get_known_datasets(self: Self) -> list[str]: - """Returns list of supported datasets.""" - return sorted(self.zenodo_dois) - class Datastore: """Handle connections and downloading of Zenodo Source archives.""" diff --git a/test/unit/workspace/datastore_test.py b/test/unit/workspace/datastore_test.py index 8cf22be99e..f3982e5459 100644 --- a/test/unit/workspace/datastore_test.py +++ b/test/unit/workspace/datastore_test.py @@ -189,7 +189,8 @@ def __init__( self, descriptors: dict[str, datastore.DatapackageDescriptor], **kwargs ): """Construct a test-friendly ZenodoFetcher with descriptors pre-loaded.""" - super().__init__(**kwargs, _descriptor_cache=descriptors) + super().__init__(**kwargs) + self._descriptor_cache = descriptors class TestZenodoFetcher(unittest.TestCase): @@ -257,7 +258,7 @@ def test_doi_format_is_correct(self): def test_get_known_datasets(self): """Call to get_known_datasets() produces the expected results.""" self.assertEqual( - sorted(datastore.ZenodoFetcher().zenodo_dois), + sorted(name for name, doi in datastore.ZenodoFetcher().zenodo_dois), self.fetcher.get_known_datasets(), ) @@ -296,7 +297,7 @@ def test_get_resource_key(self): def test_get_resource_key_for_unknown_dataset_fails(self): """When get_resource_key() is called for unknown dataset it throws KeyError.""" self.assertRaises( - KeyError, self.fetcher.get_resource_key, "unknown", "blob.zip" + AttributeError, self.fetcher.get_resource_key, "unknown", "blob.zip" ) @responses.activate From 2d081f0dc70b201a6ca31108b758df4104037fbb Mon Sep 17 00:00:00 2001 From: Zane Selvans Date: Fri, 25 Aug 2023 22:30:47 -0600 Subject: [PATCH 47/51] Update Zenodo DOI test to work better with new ZenodoDoiSettings --- test/unit/workspace/datastore_test.py | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/test/unit/workspace/datastore_test.py b/test/unit/workspace/datastore_test.py index f3982e5459..d13d4754fd 100644 --- a/test/unit/workspace/datastore_test.py +++ b/test/unit/workspace/datastore_test.py @@ -242,10 +242,13 @@ def test_doi_format_is_correct(self): be checked in, thus this test will fail if a sandbox DOI with prefix 10.5072 is identified. """ - ds = datastore.ZenodoFetcher() - self.assertTrue(ds.get_known_datasets()) - for dataset in ds.get_known_datasets(): - doi = ds.get_doi(dataset) + zf = datastore.ZenodoFetcher() + self.assertTrue(zf.get_known_datasets()) + for dataset, doi in zf.zenodo_dois: + self.assertTrue( + zf.get_doi(dataset) == doi, + msg=f"Zenodo DOI for {dataset} matches result of get_doi()", + ) self.assertFalse( re.fullmatch(r"10\.5072/zenodo\.[0-9]{5,10}", doi), msg=f"Zenodo sandbox DOI found for {dataset}: {doi}", From a09368ad1c28e9872f0ef5309e26a36d4170767c Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 28 Aug 2023 07:34:33 +0000 Subject: [PATCH 48/51] Update pyarrow requirement from <12.1,>=5 to >=5,<13.1 Updates the requirements on [pyarrow](https://github.com/apache/arrow) to permit the latest version. - [Commits](https://github.com/apache/arrow/compare/apache-arrow-5.0.0...go/v13.0.0) --- updated-dependencies: - dependency-name: pyarrow dependency-type: direct:production ... Signed-off-by: dependabot[bot] --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index caf97cac01..8bbee1474b 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -35,7 +35,7 @@ dependencies = [ "networkx>=2.2,<3.2", "numpy>=1.18.5,!=1.23.0,<1.26", "pandas>=1.4,<1.5.4", - "pyarrow>=5,<12.1", + "pyarrow>=5,<13.1", "pydantic[email]>=1.7,<2", "python-dotenv>=0.21,<1.1", "python-snappy>=0.6,<0.7", From d42493f02392c53b443aae0f3e042e37b0fe32d0 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 28 Aug 2023 07:35:30 +0000 Subject: [PATCH 49/51] Update sphinxcontrib-bibtex requirement from <2.6,>=2.4 to >=2.4,<2.7 Updates the requirements on [sphinxcontrib-bibtex](https://github.com/mcmtroffaes/sphinxcontrib-bibtex) to permit the latest version. - [Changelog](https://github.com/mcmtroffaes/sphinxcontrib-bibtex/blob/develop/CHANGELOG.rst) - [Commits](https://github.com/mcmtroffaes/sphinxcontrib-bibtex/compare/2.4.0...2.6.1) --- updated-dependencies: - dependency-name: sphinxcontrib-bibtex dependency-type: direct:production ... Signed-off-by: dependabot[bot] --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index caf97cac01..0b500cc56a 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -129,7 +129,7 @@ doc = [ "sphinx-issues>=1.2,<3.1", "sphinx-reredirects", "sphinx>=4,!=5.1.0,<7.3", - "sphinxcontrib_bibtex>=2.4,<2.6", + "sphinxcontrib_bibtex>=2.4,<2.7", ] test = [ "bandit>=1.6,<1.8", From b5889ced26fe66c7e95947d9e4c68030bb52cb2f Mon Sep 17 00:00:00 2001 From: Zane Selvans Date: Tue, 29 Aug 2023 12:58:40 -0400 Subject: [PATCH 50/51] Update docs/dev/datastore.rst Co-authored-by: Dazhong Xia --- docs/dev/datastore.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/dev/datastore.rst b/docs/dev/datastore.rst index eb3f2f452e..31e1e6f5b0 100644 --- a/docs/dev/datastore.rst +++ b/docs/dev/datastore.rst @@ -38,7 +38,7 @@ For more detailed usage information, see: $ pudl_datastore --help The downloaded data will be used by the script to populate a datastore under -the your ``$PUDL_INPUT`` directory, organized by data source, form, and DOI:: +your ``$PUDL_INPUT`` directory, organized by data source, form, and DOI:: data/censusdp1tract/ data/eia860/ From 6ac9d9fd10c40dbe41ac10aa2e5295fd131a639e Mon Sep 17 00:00:00 2001 From: Zane Selvans Date: Tue, 29 Aug 2023 11:52:52 -0600 Subject: [PATCH 51/51] Remove deprecated pudl_datastore --pudl_in option and unused get_resource_key() method --- src/pudl/workspace/datastore.py | 11 ----------- test/unit/workspace/datastore_test.py | 17 ++++------------- 2 files changed, 4 insertions(+), 24 deletions(-) diff --git a/src/pudl/workspace/datastore.py b/src/pudl/workspace/datastore.py index 1ff7c7ce5b..230cdb505c 100644 --- a/src/pudl/workspace/datastore.py +++ b/src/pudl/workspace/datastore.py @@ -286,10 +286,6 @@ def get_descriptor(self: Self, dataset: str) -> DatapackageDescriptor: ) return self._descriptor_cache[doi] - def get_resource_key(self: Self, dataset: str, name: str) -> PudlResourceKey: - """Returns :class:`PudlResourceKey` for given resource.""" - return PudlResourceKey(dataset, self.get_doi(dataset), name) - def get_resource(self: Self, res: PudlResourceKey) -> bytes: """Given resource key, retrieve contents of the file from zenodo.""" desc = self.get_descriptor(res.dataset) @@ -473,10 +469,6 @@ def parse_command_line(): "The default is to download all datasets, which may take hours depending on " "network speed.", ) - parser.add_argument( - "--pudl_in", - help="Input directory to use, overridng the $PUDL_INPUT environment variable.", - ) parser.add_argument( "--validate", help="Validate locally cached datapackages, but don't download anything.", @@ -594,9 +586,6 @@ def main(): logfile=args.logfile, loglevel=args.loglevel ) - if args.pudl_in: - PudlPaths.set_path_overrides(input_dir=args.pudl_in) - cache_path = None if not args.bypass_local_cache: cache_path = PudlPaths().input_dir diff --git a/test/unit/workspace/datastore_test.py b/test/unit/workspace/datastore_test.py index d13d4754fd..df389b6fbf 100644 --- a/test/unit/workspace/datastore_test.py +++ b/test/unit/workspace/datastore_test.py @@ -265,6 +265,10 @@ def test_get_known_datasets(self): self.fetcher.get_known_datasets(), ) + def test_get_unknown_dataset(self): + """Ensure that we get a failure when attempting to access an invalid dataset.""" + self.assertRaises(AttributeError, self.fetcher.get_doi, "unknown") + def test_doi_of_prod_epacems_matches(self): """Most of the tests assume specific DOI for production epacems dataset. @@ -290,19 +294,6 @@ def test_get_descriptor_http_calls(self): self.assertEqual(self.MOCK_EPACEMS_DATAPACKAGE, desc.datapackage_json) # self.assertTrue(responses.assert_call_count("http://localhost/my/datapackage.json", 1)) - def test_get_resource_key(self): - """Tests normal operation of get_resource_key().""" - self.assertEqual( - PudlResourceKey("epacems", self.PROD_EPACEMS_DOI, "blob.zip"), - self.fetcher.get_resource_key("epacems", "blob.zip"), - ) - - def test_get_resource_key_for_unknown_dataset_fails(self): - """When get_resource_key() is called for unknown dataset it throws KeyError.""" - self.assertRaises( - AttributeError, self.fetcher.get_resource_key, "unknown", "blob.zip" - ) - @responses.activate def test_get_resource(self): """Test that get_resource() calls expected http request and returns content."""