run black linter

rewiringamerica · Sep 24, 2024 · c09fd40 · c09fd40
1 parent 8773353
commit c09fd40
Show file tree

Hide file tree

Showing 17 changed files with 1,752 additions and 1,109 deletions.
diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml
@@ -0,0 +1,46 @@
+# Run various link checks, like flake8 and black, to make sure
+# our code remains in good shape, avoids common bugs, and follows
+# common coding  conventions.
+name: lint
+
+on:
+  push:
+    branches-ignore:
+      - main
+
+  # Allows you to run this workflow manually from the Actions tab
+  workflow_dispatch:
+
+jobs:
+  linting:
+    runs-on: ubuntu-latest
+    steps:
+      #----------------------------------------------
+      #       check-out repo and set-up python
+      #----------------------------------------------
+      - uses: actions/checkout@v4
+      - uses: actions/setup-python@v5
+        with:
+          python-version: 3.12
+      #----------------------------------------------
+      #        load pip cache if cache exists
+      #----------------------------------------------
+      - uses: actions/cache@v4
+        with:
+          path: ~/.cache/pip
+          key: ${{ runner.os }}-pip
+          restore-keys: ${{ runner.os }}-pip
+      #----------------------------------------------
+      #          run isort
+      #----------------------------------------------
+      - run: python -m pip install isort
+      - run: |
+          isort --profile black ./src ./tests
+      #----------------------------------------------
+      #          run black
+      #----------------------------------------------
+      - run: python -m pip install black[jupyter]
+      - run: |
+          black ./src ./tests --diff
+          black ./src ./tests --check
+    
diff --git a/deprecated/Yearly_model/Full_data_creation.py b/deprecated/Yearly_model/Full_data_creation.py
@@ -1,18 +1,18 @@
 # Databricks notebook source
-# MAGIC %md # Creating the Full Dataset used for modeling. 
+# MAGIC %md # Creating the Full Dataset used for modeling.
 # MAGIC
 # MAGIC ### Goal
-# MAGIC Join the ResStock outputs to weather and metadata. And apply the necessary aggregation to energy consumption. 
+# MAGIC Join the ResStock outputs to weather and metadata. And apply the necessary aggregation to energy consumption.
 # MAGIC
 # MAGIC ### Process
 # MAGIC Please create the weather_data tables for the required aggregation, and the metadata with upgrades table, if you have not already done so, before running this notebook
 # MAGIC
-# MAGIC ##### Inputs: 
+# MAGIC ##### Inputs:
 # MAGIC - `building_model.resstock_outputs_hourly`: ResStock Hourly output table
 # MAGIC - `building_model.weather_data_yearly`: Contains yearly weather data. Can also use monthly or daily we just need to change the suffix
 # MAGIC - `building_model.metadata_w_upgrades`: Contains metadata with upgrades.
 # MAGIC
-# MAGIC ##### Outputs: 
+# MAGIC ##### Outputs:
 # MAGIC - `building_model.resstock_yearly_with_metadata_weather_upgrades`: Contains the final table used for modeling. Can also have a daily or monthly version as well
 # MAGIC
 # MAGIC
@@ -23,14 +23,15 @@
 import pyspark.sql.functions as F
 from pyspark.sql.functions import col
 from pyspark.sql.functions import avg
+
 spark.conf.set("spark.sql.shuffle.partitions", 1536)
 
 
 # COMMAND ----------
 
-resstock_path = 'building_model.resstock_outputs_hourly'
-weather_data_full_path = 'building_model.weather_data_yearly'
-metadata_path = 'building_model.metadata_w_upgrades'
+resstock_path = "building_model.resstock_outputs_hourly"
+weather_data_full_path = "building_model.weather_data_yearly"
+metadata_path = "building_model.metadata_w_upgrades"
 
 resstock = spark.table(resstock_path)
 metadata = spark.table(metadata_path)
@@ -40,75 +41,161 @@
 
 ##define end uses by fuel type. And select the columns corresponding to them
 
-heating_electric = ['out_electricity_heating_fans_pumps_energy_consumption_kwh', 'out_electricity_heating_hp_bkup_energy_consumption_kwh', 'out_electricity_heating_energy_consumption_kwh']
+heating_electric = [
+    "out_electricity_heating_fans_pumps_energy_consumption_kwh",
+    "out_electricity_heating_hp_bkup_energy_consumption_kwh",
+    "out_electricity_heating_energy_consumption_kwh",
+]
 
-cooling_electric = ['out_electricity_cooling_fans_pumps_energy_consumption_kwh',
-                    'out_electricity_cooling_energy_consumption_kwh']
+cooling_electric = [
+    "out_electricity_cooling_fans_pumps_energy_consumption_kwh",
+    "out_electricity_cooling_energy_consumption_kwh",
+]
 
-heating_nat_gas = ['out_natural_gas_heating_hp_bkup_energy_consumption_kwh','out_natural_gas_heating_energy_consumption_kwh']
+heating_nat_gas = [
+    "out_natural_gas_heating_hp_bkup_energy_consumption_kwh",
+    "out_natural_gas_heating_energy_consumption_kwh",
+]
 
-heating_fuel_oil =['out_fuel_oil_heating_hp_bkup_energy_consumption_kwh','out_fuel_oil_heating_energy_consumption_kwh']
+heating_fuel_oil = [
+    "out_fuel_oil_heating_hp_bkup_energy_consumption_kwh",
+    "out_fuel_oil_heating_energy_consumption_kwh",
+]
 
-heating_propane = ['out_propane_heating_hp_bkup_energy_consumption_kwh',
-                   'out_propane_heating_energy_consumption_kwh']
+heating_propane = [
+    "out_propane_heating_hp_bkup_energy_consumption_kwh",
+    "out_propane_heating_energy_consumption_kwh",
+]
 
 # COMMAND ----------
 
-
-resstock = (resstock.withColumn( 
-    'out_electricity_heating_total', sum(resstock[col] for col in heating_electric)).withColumn(
-        'out_electricity_cooling_total', sum(resstock[col] for col in cooling_electric)).withColumn(
-          'out_natural_gas_heating_total', sum(resstock[col] for col in heating_nat_gas)).withColumn(
-              'out_fuel_oil_heating_total', sum(resstock[col] for col in heating_fuel_oil)).withColumn('out_propane_heating_total', sum(resstock[col] for col in heating_propane))
-          ) 
 
-drop_list = heating_electric + cooling_electric + heating_fuel_oil + heating_nat_gas + heating_propane
+resstock = (
+    resstock.withColumn(
+        "out_electricity_heating_total", sum(resstock[col] for col in heating_electric)
+    )
+    .withColumn(
+        "out_electricity_cooling_total", sum(resstock[col] for col in cooling_electric)
+    )
+    .withColumn(
+        "out_natural_gas_heating_total", sum(resstock[col] for col in heating_nat_gas)
+    )
+    .withColumn(
+        "out_fuel_oil_heating_total", sum(resstock[col] for col in heating_fuel_oil)
+    )
+    .withColumn(
+        "out_propane_heating_total", sum(resstock[col] for col in heating_propane)
+    )
+)
+
+drop_list = (
+    heating_electric
+    + cooling_electric
+    + heating_fuel_oil
+    + heating_nat_gas
+    + heating_propane
+)
 resstock = resstock.drop(*drop_list)
 
 # COMMAND ----------
 
 from pyspark.sql.functions import sum
 
+
 def Create_full_data(resstock, metadata, weather, aggregation_level, table_write_path):
-    if aggregation_level == 'yearly':
-        resstock_yearly = (resstock).groupBy('building_id','upgrade_id').agg(
-        *[sum(col).alias("sum_" + col) for col in resstock.columns if col not in ['building_id', 'month','upgrade_id', 'day', 'hour', 'weekday', 'timestamp']])
-
-        resstock_yearly_with_metadata = (
-        resstock_yearly
-        .join(broadcast(metadata), on = ['building_id', 'upgrade_id']))
-
-        resstock_yearly_with_metadata_weather = (
-        resstock_yearly_with_metadata
-        .join(broadcast(weather), on = ['county_geoid']))
+    if aggregation_level == "yearly":
+        resstock_yearly = (
+            (resstock)
+            .groupBy("building_id", "upgrade_id")
+            .agg(
+                *[
+                    sum(col).alias("sum_" + col)
+                    for col in resstock.columns
+                    if col
+                    not in [
+                        "building_id",
+                        "month",
+                        "upgrade_id",
+                        "day",
+                        "hour",
+                        "weekday",
+                        "timestamp",
+                    ]
+                ]
+            )
+        )
+
+        resstock_yearly_with_metadata = resstock_yearly.join(
+            broadcast(metadata), on=["building_id", "upgrade_id"]
+        )
+
+        resstock_yearly_with_metadata_weather = resstock_yearly_with_metadata.join(
+            broadcast(weather), on=["county_geoid"]
+        )
 
         resstock_yearly_with_metadata_weather.write.saveAsTable(table_write_path)
 
-    elif aggregation_level == 'monthly':
-        resstock_monthly = (resstock).groupBy('building_id', 'month', 'upgrade_id').agg(
-        *[sum(col).alias("sum_" + col) for col in resstock.columns if col not in ['building_id', 'month','upgrade_id', 'day', 'hour', 'weekday', 'timestamp']])
-
-        resstock_monthly_with_metadata = (
-        resstock_monthly
-        .join(broadcast(metadata), on = ['building_id', 'upgrade_id']))
-
-        resstock_monthly_with_metadata_weather = (
-        resstock_monthly_with_metadata
-        .join(broadcast(weather), on = ['county_geoid', 'month']))
-
+    elif aggregation_level == "monthly":
+        resstock_monthly = (
+            (resstock)
+            .groupBy("building_id", "month", "upgrade_id")
+            .agg(
+                *[
+                    sum(col).alias("sum_" + col)
+                    for col in resstock.columns
+                    if col
+                    not in [
+                        "building_id",
+                        "month",
+                        "upgrade_id",
+                        "day",
+                        "hour",
+                        "weekday",
+                        "timestamp",
+                    ]
+                ]
+            )
+        )
+
+        resstock_monthly_with_metadata = resstock_monthly.join(
+            broadcast(metadata), on=["building_id", "upgrade_id"]
+        )
+
+        resstock_monthly_with_metadata_weather = resstock_monthly_with_metadata.join(
+            broadcast(weather), on=["county_geoid", "month"]
+        )
+
         resstock_monthly_with_metadata_weather.write.saveAsTable(table_write_path)
-
-    elif aggregation_level == 'daily':
-        resstock_daily = (resstock).groupBy('building_id', 'day', 'month', 'upgrade_id').agg(
-        *[sum(col).alias("sum_" + col) for col in resstock.columns if col not in ['building_id', 'month','upgrade_id', 'day', 'hour', 'weekday', 'timestamp']])
-
-        resstock_daily_with_metadata = (
-        resstock_daily
-        .join(broadcast(metadata), on = ['building_id', 'upgrade_id']))
-
-        resstock_daily_with_metadata_weather = (
-        resstock_daily_with_metadata
-        .join(broadcast(weather), on = ['county_geoid', 'day', 'month']))
+
+    elif aggregation_level == "daily":
+        resstock_daily = (
+            (resstock)
+            .groupBy("building_id", "day", "month", "upgrade_id")
+            .agg(
+                *[
+                    sum(col).alias("sum_" + col)
+                    for col in resstock.columns
+                    if col
+                    not in [
+                        "building_id",
+                        "month",
+                        "upgrade_id",
+                        "day",
+                        "hour",
+                        "weekday",
+                        "timestamp",
+                    ]
+                ]
+            )
+        )
+
+        resstock_daily_with_metadata = resstock_daily.join(
+            broadcast(metadata), on=["building_id", "upgrade_id"]
+        )
+
+        resstock_daily_with_metadata_weather = resstock_daily_with_metadata.join(
+            broadcast(weather), on=["county_geoid", "day", "month"]
+        )
 
         resstock_daily_with_metadata_weather.write.saveAsTable(table_write_path)
 
@@ -120,4 +207,10 @@ def Create_full_data(resstock, metadata, weather, aggregation_level, table_write
 
 table_write_path = "building_model.resstock_yearly_with_metadata_weather_upgrades"
 
-Create_full_data(resstock = resstock, metadata = metadata, weather =weather, aggregation_level = 'yearly', table_write_path = table_write_path)
+Create_full_data(
+    resstock=resstock,
+    metadata=metadata,
+    weather=weather,
+    aggregation_level="yearly",
+    table_write_path=table_write_path,
+)