diff --git a/notebooks/01-pudl-data-access.ipynb b/notebooks/01-pudl-data-access.ipynb new file mode 100644 index 0000000..7b235da --- /dev/null +++ b/notebooks/01-pudl-data-access.ipynb @@ -0,0 +1 @@ +{"cells":[{"source":"\"Kaggle\"","metadata":{},"cell_type":"markdown"},{"cell_type":"code","execution_count":1,"id":"c2d5b356","metadata":{"_cell_guid":"b1076dfc-b9ad-4769-8c92-a6c4dae69d19","_uuid":"8f2839f25d086af736a60e9eeb907d3b93b6e0e5","execution":{"iopub.execute_input":"2023-10-09T18:06:51.144636Z","iopub.status.busy":"2023-10-09T18:06:51.144299Z","iopub.status.idle":"2023-10-09T18:06:52.155943Z","shell.execute_reply":"2023-10-09T18:06:52.15498Z"},"papermill":{"duration":1.019939,"end_time":"2023-10-09T18:06:52.158256","exception":false,"start_time":"2023-10-09T18:06:51.138317","status":"completed"},"tags":[]},"outputs":[{"name":"stdout","output_type":"stream","text":["Python version: 3.10.12 | packaged by conda-forge | (main, Jun 23 2023, 22:40:32) [GCC 12.3.0]\n","np.__version__='1.23.5'\n","pd.__version__='2.0.3'\n","sa.__version__='2.0.17'\n","/kaggle/input/public-utility-data-liberation-project-pudl/censusdp1tract.sqlite\n","/kaggle/input/public-utility-data-liberation-project-pudl/ferc1.sqlite\n","/kaggle/input/public-utility-data-liberation-project-pudl/ferc1_xbrl.sqlite\n","/kaggle/input/public-utility-data-liberation-project-pudl/ferc1_xbrl_datapackage.json\n","/kaggle/input/public-utility-data-liberation-project-pudl/ferc1_xbrl_taxonomy_metadata.json\n","/kaggle/input/public-utility-data-liberation-project-pudl/ferc2.sqlite\n","/kaggle/input/public-utility-data-liberation-project-pudl/ferc2_xbrl.sqlite\n","/kaggle/input/public-utility-data-liberation-project-pudl/ferc2_xbrl_datapackage.json\n","/kaggle/input/public-utility-data-liberation-project-pudl/ferc2_xbrl_taxonomy_metadata.json\n","/kaggle/input/public-utility-data-liberation-project-pudl/ferc6.sqlite\n","/kaggle/input/public-utility-data-liberation-project-pudl/ferc60.sqlite\n","/kaggle/input/public-utility-data-liberation-project-pudl/ferc60_xbrl.sqlite\n","/kaggle/input/public-utility-data-liberation-project-pudl/ferc60_xbrl_datapackage.json\n","/kaggle/input/public-utility-data-liberation-project-pudl/ferc60_xbrl_taxonomy_metadata.json\n","/kaggle/input/public-utility-data-liberation-project-pudl/ferc6_xbrl.sqlite\n","/kaggle/input/public-utility-data-liberation-project-pudl/ferc6_xbrl_datapackage.json\n","/kaggle/input/public-utility-data-liberation-project-pudl/ferc714_xbrl.sqlite\n","/kaggle/input/public-utility-data-liberation-project-pudl/ferc714_xbrl_datapackage.json\n","/kaggle/input/public-utility-data-liberation-project-pudl/ferc714_xbrl_taxonomy_metadata.json\n","/kaggle/input/public-utility-data-liberation-project-pudl/hourly_emissions_epacems.parquet\n","/kaggle/input/public-utility-data-liberation-project-pudl/pudl.sqlite\n"]}],"source":["import sys\n","\n","print(f\"Python version: {sys.version}\")\n","# This Python 3 environment comes with many helpful analytics libraries installed\n","# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python\n","# For example, here's several helpful packages to load\n","\n","import numpy as np # linear algebra\n","import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)\n","import sqlalchemy as sa\n","\n","print(f\"{np.__version__=}\")\n","print(f\"{pd.__version__=}\")\n","print(f\"{sa.__version__=}\")\n","\n","# Input data files are available in the read-only \"../input/\" directory\n","# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory\n","\n","import os\n","for dirname, _, filenames in os.walk('/kaggle/input'):\n"," for filename in sorted(filenames):\n"," print(os.path.join(dirname, filename))\n","\n","\n","# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using \"Save & Run All\" \n","# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session\n","import pathlib\n","pudl_path = pathlib.Path(\"/kaggle/input/public-utility-data-liberation-project-pudl\")"]},{"cell_type":"markdown","id":"6aa2921d","metadata":{"papermill":{"duration":0.003519,"end_time":"2023-10-09T18:06:52.165882","exception":false,"start_time":"2023-10-09T18:06:52.162363","status":"completed"},"tags":[]},"source":["# Connect to the PUDL Database"]},{"cell_type":"code","execution_count":2,"id":"b9fc2f05","metadata":{"execution":{"iopub.execute_input":"2023-10-09T18:06:52.175116Z","iopub.status.busy":"2023-10-09T18:06:52.174681Z","iopub.status.idle":"2023-10-09T18:06:52.210668Z","shell.execute_reply":"2023-10-09T18:06:52.209569Z"},"papermill":{"duration":0.043303,"end_time":"2023-10-09T18:06:52.212945","exception":false,"start_time":"2023-10-09T18:06:52.169642","status":"completed"},"tags":[]},"outputs":[],"source":["pudl_engine = sa.create_engine(f\"sqlite:///{pudl_path}/pudl.sqlite\")"]},{"cell_type":"markdown","id":"ae683a45","metadata":{"papermill":{"duration":0.003762,"end_time":"2023-10-09T18:06:52.220907","exception":false,"start_time":"2023-10-09T18:06:52.217145","status":"completed"},"tags":[]},"source":["# Read and Inspect EIA-923 Fuel Receipts and Costs Data"]},{"cell_type":"code","execution_count":3,"id":"d7da8e30","metadata":{"execution":{"iopub.execute_input":"2023-10-09T18:06:52.23035Z","iopub.status.busy":"2023-10-09T18:06:52.229976Z","iopub.status.idle":"2023-10-09T18:07:09.510506Z","shell.execute_reply":"2023-10-09T18:07:09.509258Z"},"papermill":{"duration":17.288052,"end_time":"2023-10-09T18:07:09.512902","exception":false,"start_time":"2023-10-09T18:06:52.22485","status":"completed"},"tags":[]},"outputs":[{"name":"stdout","output_type":"stream","text":["\n","RangeIndex: 631975 entries, 0 to 631974\n","Data columns (total 36 columns):\n"," # Column Non-Null Count Dtype \n","--- ------ -------------- ----- \n"," 0 report_date 631975 non-null datetime64[ns]\n"," 1 plant_id_eia 631975 non-null Int64 \n"," 2 plant_id_pudl 631975 non-null Int64 \n"," 3 plant_name_eia 631975 non-null string \n"," 4 utility_id_eia 631975 non-null Int64 \n"," 5 utility_id_pudl 631975 non-null Int64 \n"," 6 utility_name_eia 631975 non-null string \n"," 7 state 631975 non-null string \n"," 8 contract_type_code 631737 non-null string \n"," 9 contract_expiration_date 269723 non-null datetime64[ns]\n"," 10 energy_source_code 631975 non-null string \n"," 11 fuel_type_code_pudl 631975 non-null string \n"," 12 fuel_group_code 631974 non-null string \n"," 13 supplier_name 631972 non-null string \n"," 14 fuel_received_units 631975 non-null Int64 \n"," 15 fuel_mmbtu_per_unit 631975 non-null float64 \n"," 16 fuel_cost_per_mmbtu 585181 non-null float64 \n"," 17 bulk_agg_fuel_cost_per_mmbtu 400204 non-null float64 \n"," 18 fuel_consumed_mmbtu 631975 non-null float64 \n"," 19 total_fuel_cost 585181 non-null float64 \n"," 20 fuel_cost_from_eiaapi 631975 non-null boolean \n"," 21 sulfur_content_pct 631975 non-null float64 \n"," 22 ash_content_pct 631975 non-null float64 \n"," 23 mercury_content_ppm 346521 non-null float64 \n"," 24 primary_transportation_mode_code 573833 non-null string \n"," 25 secondary_transportation_mode_code 32353 non-null string \n"," 26 natural_gas_transport_code 363501 non-null string \n"," 27 natural_gas_delivery_contract_type_code 187182 non-null string \n"," 28 moisture_content_pct 95329 non-null float64 \n"," 29 chlorine_content_ppm 95329 non-null Int64 \n"," 30 data_maturity 631975 non-null string \n"," 31 mine_id_msha 195180 non-null Int64 \n"," 32 mine_name 216680 non-null string \n"," 33 mine_state 216680 non-null string \n"," 34 coalmine_county_id_fips 177070 non-null string \n"," 35 mine_type_code 216604 non-null string \n","dtypes: Int64(7), boolean(1), datetime64[ns](2), float64(9), string(17)\n","memory usage: 174.2 MB\n","CPU times: user 14.8 s, sys: 2.06 s, total: 16.8 s\n","Wall time: 17.3 s\n"]}],"source":["%%time\n","frc = pd.read_sql(\"denorm_fuel_receipts_costs_eia923\", pudl_engine).convert_dtypes(convert_floating=False)\n","frc.info()"]},{"cell_type":"code","execution_count":4,"id":"ade2eb90","metadata":{"execution":{"iopub.execute_input":"2023-10-09T18:07:09.522759Z","iopub.status.busy":"2023-10-09T18:07:09.522432Z","iopub.status.idle":"2023-10-09T18:07:09.583635Z","shell.execute_reply":"2023-10-09T18:07:09.582521Z"},"papermill":{"duration":0.068691,"end_time":"2023-10-09T18:07:09.585884","exception":false,"start_time":"2023-10-09T18:07:09.517193","status":"completed"},"tags":[]},"outputs":[{"data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
report_dateplant_id_eiaplant_id_pudlplant_name_eiautility_id_eiautility_id_pudlutility_name_eiastatecontract_type_codecontract_expiration_date...natural_gas_transport_codenatural_gas_delivery_contract_type_codemoisture_content_pctchlorine_content_ppmdata_maturitymine_id_mshamine_namemine_statecoalmine_county_id_fipsmine_type_code
873502009-06-0115641806Vienna Ops198302702Vienna Operations IncMDSNaT...<NA><NA>NaN<NA>final<NA><NA><NA><NA><NA>
1455302010-06-0112188253Fair3258838Central Iowa Power CoopIASNaT...firm<NA>NaN<NA>final<NA><NA><NA><NA><NA>
4291512017-06-011702311Dan E Karn425481Consumers Energy CoMIC2018-03-01...<NA><NA>26.240final4800977black thunderWY56005S
2180662011-09-0116799Cecil Lynch814106Entergy Arkansas LLCARSNaT...firm<NA>NaN<NA>final<NA><NA><NA><NA><NA>
3940222016-06-016031316Killen4922317Dayton Power & Light CoOHC2016-12-01...<NA><NA>6.240final1103203white oak mine 1IL17065U
2484522012-04-013948382Mitchell140064386Ohio Power CoWVC2013-12-01...<NA><NA>NaN<NA>final4601368fancoWV54045P
6203712022-09-01607206Fredonia15500277Puget Sound Energy IncWAC2022-10-01...firmfirmNaN<NA>provisional<NA><NA><NA><NA><NA>
809242009-04-01547804206Abbott Univ of IL Urbana Champ195283585University of IllinoisILC2009-06-01...<NA><NA>NaN<NA>final1102408gateway mineIL17157U
5798652021-07-016258642Wilson7140123Georgia Power CoGAC2022-04-01...<NA><NA>NaN<NA>final<NA><NA><NA><NA><NA>
3303832014-07-0115521797CP Crane Power, LLC581772948Raven Power Holdings LLCMDSNaT...<NA><NA>26.720final4800977black thunderWY56005S
996922009-08-017721577Theodore19518Alabama Power CoALSNaT...firm<NA>NaN<NA>final<NA><NA><NA><NA><NA>
1632272010-09-0175512817SCA165343069Sacramento Municipal Util DistCASNaT...firm<NA>NaN<NA>final<NA><NA><NA><NA><NA>
4465632017-10-01558184655Frederickson Power LP566131810Frederickson Power LPWASNaT...firmfirmNaN<NA>final<NA><NA><NA><NA><NA>
2025202011-05-01553804497Union Power Station547963563Union Power Partners LPARSNaT...firm<NA>NaN<NA>final<NA><NA><NA><NA><NA>
5310542020-02-01551534383Guadalupe570451944Guadalupe Power Partners LPTXTNaT...firmfirmNaN<NA>final<NA><NA><NA><NA><NA>
1861022011-02-013484417Nichols17718302Southwestern Public Service CoTXSNaT...firm<NA>NaN<NA>final<NA><NA><NA><NA><NA>
4177452017-02-0155835511Rocky Mountain Energy Center15466272Public Service Co of ColoradoCOSNaT...interruptibleinterruptibleNaN<NA>final<NA><NA><NA><NA><NA>
5662832021-02-01565655075J Lamar Stall17698301Southwestern Electric Power CoLASNaT...firmfirmNaN<NA>final<NA><NA><NA><NA><NA>
5373622020-05-01126594H. Wilson Sundt Generating Station24211330Tucson Electric Power CoAZSNaT...firmfirmNaN<NA>final<NA><NA><NA><NA><NA>
134932008-03-01104953242Rumford Cogen547842602NewPage CorporationMEC2008-12-01...<NA><NA>NaN<NA>final<NA><NA><NA><NA><NA>
\n","

20 rows × 36 columns

\n","
"],"text/plain":[" report_date plant_id_eia plant_id_pudl \\\n","87350 2009-06-01 1564 1806 \n","145530 2010-06-01 1218 8253 \n","429151 2017-06-01 1702 311 \n","218066 2011-09-01 167 99 \n","394022 2016-06-01 6031 316 \n","248452 2012-04-01 3948 382 \n","620371 2022-09-01 607 206 \n","80924 2009-04-01 54780 4206 \n","579865 2021-07-01 6258 642 \n","330383 2014-07-01 1552 1797 \n","99692 2009-08-01 7721 577 \n","163227 2010-09-01 7551 2817 \n","446563 2017-10-01 55818 4655 \n","202520 2011-05-01 55380 4497 \n","531054 2020-02-01 55153 4383 \n","186102 2011-02-01 3484 417 \n","417745 2017-02-01 55835 511 \n","566283 2021-02-01 56565 5075 \n","537362 2020-05-01 126 594 \n","13493 2008-03-01 10495 3242 \n","\n"," plant_name_eia utility_id_eia utility_id_pudl \\\n","87350 Vienna Ops 19830 2702 \n","145530 Fair 3258 838 \n","429151 Dan E Karn 4254 81 \n","218066 Cecil Lynch 814 106 \n","394022 Killen 4922 317 \n","248452 Mitchell 14006 4386 \n","620371 Fredonia 15500 277 \n","80924 Abbott Univ of IL Urbana Champ 19528 3585 \n","579865 Wilson 7140 123 \n","330383 CP Crane Power, LLC 58177 2948 \n","99692 Theodore 195 18 \n","163227 SCA 16534 3069 \n","446563 Frederickson Power LP 56613 1810 \n","202520 Union Power Station 54796 3563 \n","531054 Guadalupe 57045 1944 \n","186102 Nichols 17718 302 \n","417745 Rocky Mountain Energy Center 15466 272 \n","566283 J Lamar Stall 17698 301 \n","537362 H. Wilson Sundt Generating Station 24211 330 \n","13493 Rumford Cogen 54784 2602 \n","\n"," utility_name_eia state contract_type_code \\\n","87350 Vienna Operations Inc MD S \n","145530 Central Iowa Power Coop IA S \n","429151 Consumers Energy Co MI C \n","218066 Entergy Arkansas LLC AR S \n","394022 Dayton Power & Light Co OH C \n","248452 Ohio Power Co WV C \n","620371 Puget Sound Energy Inc WA C \n","80924 University of Illinois IL C \n","579865 Georgia Power Co GA C \n","330383 Raven Power Holdings LLC MD S \n","99692 Alabama Power Co AL S \n","163227 Sacramento Municipal Util Dist CA S \n","446563 Frederickson Power LP WA S \n","202520 Union Power Partners LP AR S \n","531054 Guadalupe Power Partners LP TX T \n","186102 Southwestern Public Service Co TX S \n","417745 Public Service Co of Colorado CO S \n","566283 Southwestern Electric Power Co LA S \n","537362 Tucson Electric Power Co AZ S \n","13493 NewPage Corporation ME C \n","\n"," contract_expiration_date ... natural_gas_transport_code \\\n","87350 NaT ... \n","145530 NaT ... firm \n","429151 2018-03-01 ... \n","218066 NaT ... firm \n","394022 2016-12-01 ... \n","248452 2013-12-01 ... \n","620371 2022-10-01 ... firm \n","80924 2009-06-01 ... \n","579865 2022-04-01 ... \n","330383 NaT ... \n","99692 NaT ... firm \n","163227 NaT ... firm \n","446563 NaT ... firm \n","202520 NaT ... firm \n","531054 NaT ... firm \n","186102 NaT ... firm \n","417745 NaT ... interruptible \n","566283 NaT ... firm \n","537362 NaT ... firm \n","13493 2008-12-01 ... \n","\n"," natural_gas_delivery_contract_type_code moisture_content_pct \\\n","87350 NaN \n","145530 NaN \n","429151 26.24 \n","218066 NaN \n","394022 6.24 \n","248452 NaN \n","620371 firm NaN \n","80924 NaN \n","579865 NaN \n","330383 26.72 \n","99692 NaN \n","163227 NaN \n","446563 firm NaN \n","202520 NaN \n","531054 firm NaN \n","186102 NaN \n","417745 interruptible NaN \n","566283 firm NaN \n","537362 firm NaN \n","13493 NaN \n","\n"," chlorine_content_ppm data_maturity mine_id_msha mine_name \\\n","87350 final \n","145530 final \n","429151 0 final 4800977 black thunder \n","218066 final \n","394022 0 final 1103203 white oak mine 1 \n","248452 final 4601368 fanco \n","620371 provisional \n","80924 final 1102408 gateway mine \n","579865 final \n","330383 0 final 4800977 black thunder \n","99692 final \n","163227 final \n","446563 final \n","202520 final \n","531054 final \n","186102 final \n","417745 final \n","566283 final \n","537362 final \n","13493 final \n","\n"," mine_state coalmine_county_id_fips mine_type_code \n","87350 \n","145530 \n","429151 WY 56005 S \n","218066 \n","394022 IL 17065 U \n","248452 WV 54045 P \n","620371 \n","80924 IL 17157 U \n","579865 \n","330383 WY 56005 S \n","99692 \n","163227 \n","446563 \n","202520 \n","531054 \n","186102 \n","417745 \n","566283 \n","537362 \n","13493 \n","\n","[20 rows x 36 columns]"]},"execution_count":4,"metadata":{},"output_type":"execute_result"}],"source":["frc.sample(20)"]},{"cell_type":"markdown","id":"f9f9ab46","metadata":{"papermill":{"duration":0.004698,"end_time":"2023-10-09T18:07:09.595657","exception":false,"start_time":"2023-10-09T18:07:09.590959","status":"completed"},"tags":[]},"source":["# Read EPA Hourly Emissions Data from Apache Parquet\n","* The full hourly emissions time series for thousands of US power plants covering 1995-2022 contains almost a billion records.\n","* The data is stored in a single [Apache Parquet file](https://parquet.apache.org/) with row-groups defined by year and state.\n","* This compressed columnar format enables very efficient queries with appropriate tooling, including [Dask](https://www.dask.org/) and [PyArrow](https://arrow.apache.org/docs/python/index.html).\n","* Reading the entire dataset into memory at once will probably exceed the available RAM.\n","* The filters use [Disjunctive Normal Form](https://blog.datasyndrome.com/python-and-parquet-performance-e71da65269ce)\n","* Using Dask's lazy evaluation and the filter criteria, we can minimize the data read off of disk and limit memory usage.\n","* The Dask project has lots of [tutorials and documentation](https://www.dask.org/get-started) if you want to learn more.\n","* Other tools like [DuckDB](https://duckdb.org/docs/data/parquet/overview.html) ([Python API](https://duckdb.org/docs/api/python/overview)) also provide good Parquet support. "]},{"cell_type":"markdown","id":"728ba72f","metadata":{"papermill":{"duration":0.004686,"end_time":"2023-10-09T18:07:09.60531","exception":false,"start_time":"2023-10-09T18:07:09.600624","status":"completed"},"tags":[]},"source":["## Using Dask to selectively read Parquet data"]},{"cell_type":"code","execution_count":5,"id":"1b84dad1","metadata":{"execution":{"iopub.execute_input":"2023-10-09T18:07:09.616462Z","iopub.status.busy":"2023-10-09T18:07:09.616078Z","iopub.status.idle":"2023-10-09T18:07:11.855018Z","shell.execute_reply":"2023-10-09T18:07:11.853817Z"},"papermill":{"duration":2.247096,"end_time":"2023-10-09T18:07:11.857201","exception":false,"start_time":"2023-10-09T18:07:09.610105","status":"completed"},"tags":[]},"outputs":[{"name":"stderr","output_type":"stream","text":["/opt/conda/lib/python3.10/site-packages/dask/dataframe/io/parquet/core.py:1516: UserWarning: Sorted columns detected: ['year', 'state']\n","Use the `index` argument to set a sorted column as your index to create a DataFrame collection with known `divisions`.\n"," warnings.warn(\n"]},{"name":"stdout","output_type":"stream","text":["\n","RangeIndex: 1662192 entries, 0 to 1662191\n","Data columns (total 16 columns):\n"," # Column Non-Null Count Dtype \n","--- ------ -------------- ----- \n"," 0 plant_id_eia 1662192 non-null int32[pyarrow] \n"," 1 plant_id_epa 1662192 non-null int32[pyarrow] \n"," 2 emissions_unit_id_epa 1662192 non-null string \n"," 3 operating_datetime_utc 1662192 non-null timestamp[ms, tz=UTC][pyarrow] \n"," 4 year 1662192 non-null int32[pyarrow] \n"," 5 state 1662192 non-null dictionary[pyarrow]\n"," 6 operating_time_hours 1662192 non-null float[pyarrow] \n"," 7 gross_load_mw 856134 non-null float[pyarrow] \n"," 8 heat_content_mmbtu 858087 non-null float[pyarrow] \n"," 9 steam_load_1000_lbs 1953 non-null float[pyarrow] \n"," 10 so2_mass_lbs 858087 non-null float[pyarrow] \n"," 11 so2_mass_measurement_code 858087 non-null dictionary[pyarrow]\n"," 12 nox_mass_lbs 858087 non-null float[pyarrow] \n"," 13 nox_mass_measurement_code 858087 non-null dictionary[pyarrow]\n"," 14 co2_mass_tons 858087 non-null float[pyarrow] \n"," 15 co2_mass_measurement_code 858087 non-null dictionary[pyarrow]\n","dtypes: dictionary[pyarrow](4), float[pyarrow](7), int32[pyarrow](3), string(1), timestamp[ms, tz=UTC][pyarrow](1)\n","memory usage: 114.6 MB\n","CPU times: user 1.66 s, sys: 603 ms, total: 2.27 s\n","Wall time: 2.23 s\n"]}],"source":["%%time \n","from dask import dataframe as dd\n","# In a DNF filter, the inner lists of conditions are combined with AND\n","# while the outer list of conditions are combined with OR\n","# So this filter will get all 2019 and 2020 records for CO and WY:\n","state_year_filters = [\n"," [('year', '=', 2019), ('state', '=', 'CO')],\n"," [('year', '=', 2019), ('state', '=', 'WY')],\n"," [('year', '=', 2020), ('state', '=', 'CO')],\n"," [('year', '=', 2020), ('state', '=', 'WY')],\n","]\n","co_wy_cems = dd.read_parquet(\n"," f\"{pudl_path}/hourly_emissions_epacems.parquet\",\n"," engine=\"pyarrow\",\n"," dtype_backend=\"pyarrow\",\n"," filters=state_year_filters,\n",").compute()\n","co_wy_cems.info()"]},{"cell_type":"code","execution_count":6,"id":"1199fed8","metadata":{"execution":{"iopub.execute_input":"2023-10-09T18:07:11.869253Z","iopub.status.busy":"2023-10-09T18:07:11.868516Z","iopub.status.idle":"2023-10-09T18:07:11.983765Z","shell.execute_reply":"2023-10-09T18:07:11.982705Z"},"papermill":{"duration":0.123343,"end_time":"2023-10-09T18:07:11.98569","exception":false,"start_time":"2023-10-09T18:07:11.862347","status":"completed"},"tags":[]},"outputs":[{"data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
plant_id_eiaplant_id_epaemissions_unit_id_epaoperating_datetime_utcyearstateoperating_time_hoursgross_load_mwheat_content_mmbtusteam_load_1000_lbsso2_mass_lbsso2_mass_measurement_codenox_mass_lbsnox_mass_measurement_codeco2_mass_tonsco2_mass_measurement_code
2788401068210682GT52020-04-02 15:00:00+00:002020CO1.032.0402.5<NA>0.242Measured41.860001Calculated23.9Measured
13944655699856998CT022019-07-05 00:00:00+00:002019CO0.0<NA><NA><NA><NA><NA><NA><NA><NA><NA>
105455767616761C2019-09-15 04:00:00+00:002019CO0.0<NA><NA><NA><NA><NA><NA><NA><NA><NA>
6628404162416232019-12-27 15:00:00+00:002019WY0.0<NA><NA><NA><NA><NA><NA><NA><NA><NA>
6849346204620422019-07-06 05:00:00+00:002019WY1.0415.04396.600098<NA>397.799988Measured580.351013Calculated461.100006Measured
3348505512755127CT12020-08-18 09:00:00+00:002020CO0.0<NA><NA><NA><NA><NA><NA><NA><NA><NA>
13815885699856998CT012019-01-14 11:00:00+00:002019CO0.0<NA><NA><NA><NA><NA><NA><NA><NA><NA>
6416104162416212019-07-26 01:00:00+00:002019WY1.0164.01818.599976<NA>315.100006Measured345.533997Calculated190.699997Measured
22293267616761D2020-11-21 03:00:00+00:002020CO0.0<NA><NA><NA><NA><NA><NA><NA><NA><NA>
2832961068210682GT52020-10-05 07:00:00+00:002020CO0.0<NA><NA><NA><NA><NA><NA><NA><NA><NA>
\n","
"],"text/plain":[" plant_id_eia plant_id_epa emissions_unit_id_epa \\\n","278840 10682 10682 GT5 \n","1394465 56998 56998 CT02 \n","1054557 6761 6761 C \n","662840 4162 4162 3 \n","684934 6204 6204 2 \n","334850 55127 55127 CT1 \n","1381588 56998 56998 CT01 \n","641610 4162 4162 1 \n","222932 6761 6761 D \n","283296 10682 10682 GT5 \n","\n"," operating_datetime_utc year state operating_time_hours \\\n","278840 2020-04-02 15:00:00+00:00 2020 CO 1.0 \n","1394465 2019-07-05 00:00:00+00:00 2019 CO 0.0 \n","1054557 2019-09-15 04:00:00+00:00 2019 CO 0.0 \n","662840 2019-12-27 15:00:00+00:00 2019 WY 0.0 \n","684934 2019-07-06 05:00:00+00:00 2019 WY 1.0 \n","334850 2020-08-18 09:00:00+00:00 2020 CO 0.0 \n","1381588 2019-01-14 11:00:00+00:00 2019 CO 0.0 \n","641610 2019-07-26 01:00:00+00:00 2019 WY 1.0 \n","222932 2020-11-21 03:00:00+00:00 2020 CO 0.0 \n","283296 2020-10-05 07:00:00+00:00 2020 CO 0.0 \n","\n"," gross_load_mw heat_content_mmbtu steam_load_1000_lbs so2_mass_lbs \\\n","278840 32.0 402.5 0.242 \n","1394465 \n","1054557 \n","662840 \n","684934 415.0 4396.600098 397.799988 \n","334850 \n","1381588 \n","641610 164.0 1818.599976 315.100006 \n","222932 \n","283296 \n","\n"," so2_mass_measurement_code nox_mass_lbs nox_mass_measurement_code \\\n","278840 Measured 41.860001 Calculated \n","1394465 \n","1054557 \n","662840 \n","684934 Measured 580.351013 Calculated \n","334850 \n","1381588 \n","641610 Measured 345.533997 Calculated \n","222932 \n","283296 \n","\n"," co2_mass_tons co2_mass_measurement_code \n","278840 23.9 Measured \n","1394465 \n","1054557 \n","662840 \n","684934 461.100006 Measured \n","334850 \n","1381588 \n","641610 190.699997 Measured \n","222932 \n","283296 "]},"execution_count":6,"metadata":{},"output_type":"execute_result"}],"source":["co_wy_cems.sample(10)"]},{"cell_type":"markdown","id":"04049803","metadata":{"papermill":{"duration":0.005157,"end_time":"2023-10-09T18:07:11.996503","exception":false,"start_time":"2023-10-09T18:07:11.991346","status":"completed"},"tags":[]},"source":["## Read all Colorado Emissions Data"]},{"cell_type":"code","execution_count":7,"id":"ae1881f0","metadata":{"execution":{"iopub.execute_input":"2023-10-09T18:07:12.00897Z","iopub.status.busy":"2023-10-09T18:07:12.008622Z","iopub.status.idle":"2023-10-09T18:07:13.857795Z","shell.execute_reply":"2023-10-09T18:07:13.857034Z"},"papermill":{"duration":1.858175,"end_time":"2023-10-09T18:07:13.859952","exception":false,"start_time":"2023-10-09T18:07:12.001777","status":"completed"},"tags":[]},"outputs":[{"name":"stderr","output_type":"stream","text":["/opt/conda/lib/python3.10/site-packages/dask/dataframe/io/parquet/core.py:1516: UserWarning: Sorted columns detected: ['state']\n","Use the `index` argument to set a sorted column as your index to create a DataFrame collection with known `divisions`.\n"," warnings.warn(\n"]},{"name":"stdout","output_type":"stream","text":["\n","RangeIndex: 13631472 entries, 0 to 13631471\n","Data columns (total 16 columns):\n"," # Column Dtype \n","--- ------ ----- \n"," 0 plant_id_eia int32[pyarrow] \n"," 1 plant_id_epa int32[pyarrow] \n"," 2 emissions_unit_id_epa string \n"," 3 operating_datetime_utc timestamp[ms, tz=UTC][pyarrow] \n"," 4 year int32[pyarrow] \n"," 5 state dictionary[pyarrow]\n"," 6 operating_time_hours float[pyarrow] \n"," 7 gross_load_mw float[pyarrow] \n"," 8 heat_content_mmbtu float[pyarrow] \n"," 9 steam_load_1000_lbs float[pyarrow] \n"," 10 so2_mass_lbs float[pyarrow] \n"," 11 so2_mass_measurement_code dictionary[pyarrow]\n"," 12 nox_mass_lbs float[pyarrow] \n"," 13 nox_mass_measurement_code dictionary[pyarrow]\n"," 14 co2_mass_tons float[pyarrow] \n"," 15 co2_mass_measurement_code dictionary[pyarrow]\n","dtypes: dictionary[pyarrow](4), float[pyarrow](7), int32[pyarrow](3), string(1), timestamp[ms, tz=UTC][pyarrow](1)\n","memory usage: 934.8 MB\n","CPU times: user 2.33 s, sys: 1.29 s, total: 3.61 s\n","Wall time: 1.84 s\n"]}],"source":["%%time\n","colorado_cems = dd.read_parquet(\n"," f\"{pudl_path}/hourly_emissions_epacems.parquet\",\n"," engine=\"pyarrow\",\n"," dtype_backend=\"pyarrow\",\n"," filters=[(\"state\", \"=\", \"CO\")],\n",").compute()\n","colorado_cems.info()"]},{"cell_type":"code","execution_count":8,"id":"24c39e19","metadata":{"execution":{"iopub.execute_input":"2023-10-09T18:07:13.874336Z","iopub.status.busy":"2023-10-09T18:07:13.873465Z","iopub.status.idle":"2023-10-09T18:07:15.455576Z","shell.execute_reply":"2023-10-09T18:07:15.454404Z"},"papermill":{"duration":1.591965,"end_time":"2023-10-09T18:07:15.457847","exception":false,"start_time":"2023-10-09T18:07:13.865882","status":"completed"},"tags":[]},"outputs":[{"data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
plant_id_eiaplant_id_epaemissions_unit_id_epaoperating_datetime_utcyearstateoperating_time_hoursgross_load_mwheat_content_mmbtusteam_load_1000_lbsso2_mass_lbsso2_mass_measurement_codenox_mass_lbsnox_mass_measurement_codeco2_mass_tonsco2_mass_measurement_code
1300237746946952017-03-10 00:00:00+00:002017CO1.0129.0941.599976<NA>0.6Measured11.299Calculated56.0Measured
628569049249262006-04-25 01:00:00+00:002006CO1.083.0927.400024<NA>628.0Measured324.589996Calculated95.099998Measured
12489335520755207CT82005-09-23 04:00:00+00:002005CO0.0<NA><NA><NA><NA><NA><NA><NA><NA><NA>
13474233554535545362017-01-19 16:00:00+00:002017CO0.0<NA><NA><NA><NA><NA><NA><NA><NA><NA>
1262920267616761F2012-03-15 01:00:00+00:002012CO0.0<NA><NA><NA><NA><NA><NA><NA><NA><NA>
1324111767616761C2017-06-10 12:00:00+00:002017CO0.0<NA><NA><NA><NA><NA><NA><NA><NA><NA>
414349852752712008-02-28 01:00:00+00:002008CO1.0106.01159.900024<NA>344.200012Substitute461.640015Calculated119.0Measured and Substitute
1011827967616761D2014-03-06 06:00:00+00:002014CO0.0<NA><NA><NA><NA><NA><NA><NA><NA><NA>
859781146946941999-10-20 10:00:00+00:001999CO1.0362.03076.800049<NA>1945.5Measured984.575989Calculated315.700012Measured
1145251747047022002-06-27 12:00:00+00:002002CO1.0208.01874.900024<NA>1108.699951Measured883.078003Calculated192.399994Measured
\n","
"],"text/plain":[" plant_id_eia plant_id_epa emissions_unit_id_epa \\\n","13002377 469 469 5 \n","6285690 492 492 6 \n","1248933 55207 55207 CT8 \n","13474233 55453 55453 6 \n","12629202 6761 6761 F \n","13241117 6761 6761 C \n","4143498 527 527 1 \n","10118279 6761 6761 D \n","8597811 469 469 4 \n","11452517 470 470 2 \n","\n"," operating_datetime_utc year state operating_time_hours \\\n","13002377 2017-03-10 00:00:00+00:00 2017 CO 1.0 \n","6285690 2006-04-25 01:00:00+00:00 2006 CO 1.0 \n","1248933 2005-09-23 04:00:00+00:00 2005 CO 0.0 \n","13474233 2017-01-19 16:00:00+00:00 2017 CO 0.0 \n","12629202 2012-03-15 01:00:00+00:00 2012 CO 0.0 \n","13241117 2017-06-10 12:00:00+00:00 2017 CO 0.0 \n","4143498 2008-02-28 01:00:00+00:00 2008 CO 1.0 \n","10118279 2014-03-06 06:00:00+00:00 2014 CO 0.0 \n","8597811 1999-10-20 10:00:00+00:00 1999 CO 1.0 \n","11452517 2002-06-27 12:00:00+00:00 2002 CO 1.0 \n","\n"," gross_load_mw heat_content_mmbtu steam_load_1000_lbs \\\n","13002377 129.0 941.599976 \n","6285690 83.0 927.400024 \n","1248933 \n","13474233 \n","12629202 \n","13241117 \n","4143498 106.0 1159.900024 \n","10118279 \n","8597811 362.0 3076.800049 \n","11452517 208.0 1874.900024 \n","\n"," so2_mass_lbs so2_mass_measurement_code nox_mass_lbs \\\n","13002377 0.6 Measured 11.299 \n","6285690 628.0 Measured 324.589996 \n","1248933 \n","13474233 \n","12629202 \n","13241117 \n","4143498 344.200012 Substitute 461.640015 \n","10118279 \n","8597811 1945.5 Measured 984.575989 \n","11452517 1108.699951 Measured 883.078003 \n","\n"," nox_mass_measurement_code co2_mass_tons co2_mass_measurement_code \n","13002377 Calculated 56.0 Measured \n","6285690 Calculated 95.099998 Measured \n","1248933 \n","13474233 \n","12629202 \n","13241117 \n","4143498 Calculated 119.0 Measured and Substitute \n","10118279 \n","8597811 Calculated 315.700012 Measured \n","11452517 Calculated 192.399994 Measured "]},"execution_count":8,"metadata":{},"output_type":"execute_result"}],"source":["colorado_cems.sample(10)"]},{"cell_type":"markdown","id":"80ee2335","metadata":{"papermill":{"duration":0.005817,"end_time":"2023-10-09T18:07:15.469807","exception":false,"start_time":"2023-10-09T18:07:15.46399","status":"completed"},"tags":[]},"source":["## Visualize Hourly Power Plant Operations\n","* Let's find a particular power plant and look at its long-term operations.\n","* Say we want to investigate [Xcel Energy's troubled Comanche coal plant](https://coloradosun.com/?s=comanche%20pueblo) in Pueblo, CO?\n","* The EPA CEMS data only has contains the EIA Plant ID, not its name or any ownership information.\n","* The PUDL database links these IDs to much more extensive EIA data.\n","* We can look for the Comanche plant in the PUDL DB and use that information to select the appropriate EPA CEMS data to plot."]},{"cell_type":"code","execution_count":9,"id":"027c7aa6","metadata":{"execution":{"iopub.execute_input":"2023-10-09T18:07:15.483808Z","iopub.status.busy":"2023-10-09T18:07:15.482983Z","iopub.status.idle":"2023-10-09T18:07:23.226918Z","shell.execute_reply":"2023-10-09T18:07:23.225931Z"},"papermill":{"duration":7.753083,"end_time":"2023-10-09T18:07:23.228851","exception":false,"start_time":"2023-10-09T18:07:15.475768","status":"completed"},"tags":[]},"outputs":[{"name":"stdout","output_type":"stream","text":["CPU times: user 6.92 s, sys: 319 ms, total: 7.24 s\n","Wall time: 7.72 s\n"]},{"data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
plant_id_eiaplant_name_eiacitycountylatitudelongitudestatestreet_addresszip_codetimezone...transmission_distribution_owner_idtransmission_distribution_owner_nametransmission_distribution_owner_stateutility_id_eiawater_sourcedata_maturityplant_id_pudlutility_name_eiautility_id_pudlbalancing_authority_code_eia_consistent_rate
650937922North PlantBrooklynPoweshiek41.7394-92.4394IA1250 Center St.52211America/Chicago...3258Central Iowa Power Cooperative<NA>2287Municipalityfinal2929City of Brooklyn - (IA)9311.0
7610910641Cambria CogenEbensburgCambria40.474167-78.7014PA243 Rubisch Road15931America/New_York...<NA><NA><NA>2884Wilmore Reservoirfinal3303Cambria CoGen Co7731.0
19077163613DowneyDowneyLos Angeles33.919368-118.1289CA9333 Imperial Hwy90242America/Los_Angeles...17609Southern California Edison CoCA61344<NA>final14384Advanced Microgrid Solutions62281.0
7636610670DeepwaterPasadenaHarris29.716552-95.22736TX701 Light Company Road77506America/Chicago...<NA><NA><NA>156Riverfinal8104AES Deepwater Inc38441.0
7490710539TheresaTheresaJefferson44.21742-75.79517NYBridge Street13694America/New_York...13573Niagara Mohawk Power Corp.NY34688Indian Riverfinal3258CHI Energy Inc8721.0
151891058Sixth StreetCedar RapidsLinn41.984768-91.66861IA509 6th Street NE52402America/Chicago...<NA><NA><NA>9162River/Lakefinal1220Interstate Power and Light92841.0
561916768SikestonSikestonScott36.8791-89.6209MO1551 West Wakefield St63801America/Chicago...17177City of SikestonMO17177Wellsfinal2667City of Sikeston - (MO)11921.0
584257279O'Shaughnessy HydroDublinDelaware40.153328-83.12672OH5959 Glick Rd43017America/New_York...<NA><NA><NA>4065O'Shaughnessy Reservoirfinal2729City of Columbus - (OH)9591.0
19505764663WLWPCF Cogeneration FacilityKeizerMarion45.00812-123.0529OR5915 Windsor Island Rd N97303America/Los_Angeles...15248Portland General Electric CoOR64262<NA>final15787City of Salem Public Works90911.0
15971859129CSD 2- Freedom HighOakleyContra Costa37.969722-121.7194CA1050 Neroly Road95037America/Los_Angeles...14328Pacific Gas & Electric CoCA59622<NA>final7001Ahana Renewables, LLC52771.0
\n","

10 rows × 54 columns

\n","
"],"text/plain":[" plant_id_eia plant_name_eia city \\\n","65093 7922 North Plant Brooklyn \n","76109 10641 Cambria Cogen Ebensburg \n","190771 63613 Downey Downey \n","76366 10670 Deepwater Pasadena \n","74907 10539 Theresa Theresa \n","15189 1058 Sixth Street Cedar Rapids \n","56191 6768 Sikeston Sikeston \n","58425 7279 O'Shaughnessy Hydro Dublin \n","195057 64663 WLWPCF Cogeneration Facility Keizer \n","159718 59129 CSD 2- Freedom High Oakley \n","\n"," county latitude longitude state street_address \\\n","65093 Poweshiek 41.7394 -92.4394 IA 1250 Center St. \n","76109 Cambria 40.474167 -78.7014 PA 243 Rubisch Road \n","190771 Los Angeles 33.919368 -118.1289 CA 9333 Imperial Hwy \n","76366 Harris 29.716552 -95.22736 TX 701 Light Company Road \n","74907 Jefferson 44.21742 -75.79517 NY Bridge Street \n","15189 Linn 41.984768 -91.66861 IA 509 6th Street NE \n","56191 Scott 36.8791 -89.6209 MO 1551 West Wakefield St \n","58425 Delaware 40.153328 -83.12672 OH 5959 Glick Rd \n","195057 Marion 45.00812 -123.0529 OR 5915 Windsor Island Rd N \n","159718 Contra Costa 37.969722 -121.7194 CA 1050 Neroly Road \n","\n"," zip_code timezone ... transmission_distribution_owner_id \\\n","65093 52211 America/Chicago ... 3258 \n","76109 15931 America/New_York ... \n","190771 90242 America/Los_Angeles ... 17609 \n","76366 77506 America/Chicago ... \n","74907 13694 America/New_York ... 13573 \n","15189 52402 America/Chicago ... \n","56191 63801 America/Chicago ... 17177 \n","58425 43017 America/New_York ... \n","195057 97303 America/Los_Angeles ... 15248 \n","159718 95037 America/Los_Angeles ... 14328 \n","\n"," transmission_distribution_owner_name \\\n","65093 Central Iowa Power Cooperative \n","76109 \n","190771 Southern California Edison Co \n","76366 \n","74907 Niagara Mohawk Power Corp. \n","15189 \n","56191 City of Sikeston \n","58425 \n","195057 Portland General Electric Co \n","159718 Pacific Gas & Electric Co \n","\n"," transmission_distribution_owner_state utility_id_eia \\\n","65093 2287 \n","76109 2884 \n","190771 CA 61344 \n","76366 156 \n","74907 NY 34688 \n","15189 9162 \n","56191 MO 17177 \n","58425 4065 \n","195057 OR 64262 \n","159718 CA 59622 \n","\n"," water_source data_maturity plant_id_pudl \\\n","65093 Municipality final 2929 \n","76109 Wilmore Reservoir final 3303 \n","190771 final 14384 \n","76366 River final 8104 \n","74907 Indian River final 3258 \n","15189 River/Lake final 1220 \n","56191 Wells final 2667 \n","58425 O'Shaughnessy Reservoir final 2729 \n","195057 final 15787 \n","159718 final 7001 \n","\n"," utility_name_eia utility_id_pudl \\\n","65093 City of Brooklyn - (IA) 931 \n","76109 Cambria CoGen Co 773 \n","190771 Advanced Microgrid Solutions 6228 \n","76366 AES Deepwater Inc 3844 \n","74907 CHI Energy Inc 872 \n","15189 Interstate Power and Light 9284 \n","56191 City of Sikeston - (MO) 1192 \n","58425 City of Columbus - (OH) 959 \n","195057 City of Salem Public Works 9091 \n","159718 Ahana Renewables, LLC 5277 \n","\n"," balancing_authority_code_eia_consistent_rate \n","65093 1.0 \n","76109 1.0 \n","190771 1.0 \n","76366 1.0 \n","74907 1.0 \n","15189 1.0 \n","56191 1.0 \n","58425 1.0 \n","195057 1.0 \n","159718 1.0 \n","\n","[10 rows x 54 columns]"]},"execution_count":9,"metadata":{},"output_type":"execute_result"}],"source":["%%time\n","plants_eia = pd.read_sql(\"denorm_plants_eia\", pudl_engine).convert_dtypes()\n","plants_eia.sample(10)"]},{"cell_type":"markdown","id":"d9385571","metadata":{"papermill":{"duration":0.007516,"end_time":"2023-10-09T18:07:23.24291","exception":false,"start_time":"2023-10-09T18:07:23.235394","status":"completed"},"tags":[]},"source":["By selecting a few informative columns and records with \"Comanche\" in the plant name, we find that the coal plant we're looking for has `plant_id_eia==470`"]},{"cell_type":"code","execution_count":10,"id":"9c5df574","metadata":{"execution":{"iopub.execute_input":"2023-10-09T18:07:23.258277Z","iopub.status.busy":"2023-10-09T18:07:23.257667Z","iopub.status.idle":"2023-10-09T18:07:23.423503Z","shell.execute_reply":"2023-10-09T18:07:23.422758Z"},"papermill":{"duration":0.175561,"end_time":"2023-10-09T18:07:23.425244","exception":false,"start_time":"2023-10-09T18:07:23.249683","status":"completed"},"tags":[]},"outputs":[{"data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
plant_id_eiaplant_name_eiautility_name_eiacitystatelatitudelongitude
7460470ComanchePublic Service Co of ColoradoPuebloCO38.2081-104.5747
507756145Comanche PeakLuminant Generation Company LLCGlen RoseTX32.298365-97.78552
507886145Comanche PeakTXU Generation Co LPGlen RoseTX32.298365-97.78552
673878059ComanchePublic Service Co of OklahomaLawtonOK34.5431-98.3244
16457059656Comanche SolarNovatus EnergyPuebloCO38.205278-104.5667
16457559656Comanche SolarComanche LLCPuebloCO38.205278-104.5667
\n","
"],"text/plain":[" plant_id_eia plant_name_eia utility_name_eia \\\n","7460 470 Comanche Public Service Co of Colorado \n","50775 6145 Comanche Peak Luminant Generation Company LLC \n","50788 6145 Comanche Peak TXU Generation Co LP \n","67387 8059 Comanche Public Service Co of Oklahoma \n","164570 59656 Comanche Solar Novatus Energy \n","164575 59656 Comanche Solar Comanche LLC \n","\n"," city state latitude longitude \n","7460 Pueblo CO 38.2081 -104.5747 \n","50775 Glen Rose TX 32.298365 -97.78552 \n","50788 Glen Rose TX 32.298365 -97.78552 \n","67387 Lawton OK 34.5431 -98.3244 \n","164570 Pueblo CO 38.205278 -104.5667 \n","164575 Pueblo CO 38.205278 -104.5667 "]},"execution_count":10,"metadata":{},"output_type":"execute_result"}],"source":["plants_eia.loc[\n"," plants_eia.plant_name_eia.str.contains(\"comanche\", case=False),\n"," [\n"," \"plant_id_eia\",\n"," \"plant_name_eia\",\n"," \"utility_name_eia\",\n"," \"city\",\n"," \"state\",\n"," \"latitude\",\n"," \"longitude\",\n"," ]\n","].drop_duplicates()"]},{"cell_type":"code","execution_count":11,"id":"ee57813f","metadata":{"execution":{"iopub.execute_input":"2023-10-09T18:07:23.440274Z","iopub.status.busy":"2023-10-09T18:07:23.439682Z","iopub.status.idle":"2023-10-09T18:07:24.569329Z","shell.execute_reply":"2023-10-09T18:07:24.568223Z"},"papermill":{"duration":1.139773,"end_time":"2023-10-09T18:07:24.571565","exception":false,"start_time":"2023-10-09T18:07:23.431792","status":"completed"},"tags":[]},"outputs":[{"name":"stdout","output_type":"stream","text":["\n","Index: 569760 entries, 26280 to 13044551\n","Data columns (total 16 columns):\n"," # Column Non-Null Count Dtype \n","--- ------ -------------- ----- \n"," 0 plant_id_eia 569760 non-null int32[pyarrow] \n"," 1 plant_id_epa 569760 non-null int32[pyarrow] \n"," 2 emissions_unit_id_epa 569760 non-null string \n"," 3 operating_datetime_utc 569760 non-null timestamp[ms, tz=UTC][pyarrow] \n"," 4 year 569760 non-null int32[pyarrow] \n"," 5 state 569760 non-null dictionary[pyarrow]\n"," 6 operating_time_hours 569476 non-null float[pyarrow] \n"," 7 gross_load_mw 483648 non-null float[pyarrow] \n"," 8 heat_content_mmbtu 483648 non-null float[pyarrow] \n"," 9 steam_load_1000_lbs 0 non-null float[pyarrow] \n"," 10 so2_mass_lbs 483648 non-null float[pyarrow] \n"," 11 so2_mass_measurement_code 483648 non-null dictionary[pyarrow]\n"," 12 nox_mass_lbs 483514 non-null float[pyarrow] \n"," 13 nox_mass_measurement_code 479666 non-null dictionary[pyarrow]\n"," 14 co2_mass_tons 483648 non-null float[pyarrow] \n"," 15 co2_mass_measurement_code 483648 non-null dictionary[pyarrow]\n","dtypes: dictionary[pyarrow](4), float[pyarrow](7), int32[pyarrow](3), string(1), timestamp[ms, tz=UTC][pyarrow](1)\n","memory usage: 42.9 MB\n"]}],"source":["comanche_cems = colorado_cems[colorado_cems.plant_id_eia==470]\n","comanche_cems.info()"]},{"cell_type":"code","execution_count":null,"id":"1e7c2c4d","metadata":{"papermill":{"duration":0.006522,"end_time":"2023-10-09T18:07:24.585096","exception":false,"start_time":"2023-10-09T18:07:24.578574","status":"completed"},"tags":[]},"outputs":[],"source":[]}],"metadata":{"kernelspec":{"display_name":"Python 3","language":"python","name":"python3"},"language_info":{"codemirror_mode":{"name":"ipython","version":3},"file_extension":".py","mimetype":"text/x-python","name":"python","nbconvert_exporter":"python","pygments_lexer":"ipython3","version":"3.10.12"},"papermill":{"default_parameters":{},"duration":37.807388,"end_time":"2023-10-09T18:07:25.915154","environment_variables":{},"exception":null,"input_path":"__notebook__.ipynb","output_path":"__notebook__.ipynb","parameters":{},"start_time":"2023-10-09T18:06:48.107766","version":"2.4.0"}},"nbformat":4,"nbformat_minor":5} \ No newline at end of file