Skip to content

Commit

Permalink
run black linter
Browse files Browse the repository at this point in the history
  • Loading branch information
Miki Verma committed Sep 24, 2024
1 parent 8773353 commit c09fd40
Show file tree
Hide file tree
Showing 17 changed files with 1,752 additions and 1,109 deletions.
46 changes: 46 additions & 0 deletions .github/workflows/lint.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
# Run various link checks, like flake8 and black, to make sure
# our code remains in good shape, avoids common bugs, and follows
# common coding conventions.
name: lint

on:
push:
branches-ignore:
- main

# Allows you to run this workflow manually from the Actions tab
workflow_dispatch:

jobs:
linting:
runs-on: ubuntu-latest
steps:
#----------------------------------------------
# check-out repo and set-up python
#----------------------------------------------
- uses: actions/checkout@v4
- uses: actions/setup-python@v5
with:
python-version: 3.12
#----------------------------------------------
# load pip cache if cache exists
#----------------------------------------------
- uses: actions/cache@v4
with:
path: ~/.cache/pip
key: ${{ runner.os }}-pip
restore-keys: ${{ runner.os }}-pip
#----------------------------------------------
# run isort
#----------------------------------------------
- run: python -m pip install isort
- run: |
isort --profile black ./src ./tests
#----------------------------------------------
# run black
#----------------------------------------------
- run: python -m pip install black[jupyter]
- run: |
black ./src ./tests --diff
black ./src ./tests --check
209 changes: 151 additions & 58 deletions deprecated/Yearly_model/Full_data_creation.py
Original file line number Diff line number Diff line change
@@ -1,18 +1,18 @@
# Databricks notebook source
# MAGIC %md # Creating the Full Dataset used for modeling.
# MAGIC %md # Creating the Full Dataset used for modeling.
# MAGIC
# MAGIC ### Goal
# MAGIC Join the ResStock outputs to weather and metadata. And apply the necessary aggregation to energy consumption.
# MAGIC Join the ResStock outputs to weather and metadata. And apply the necessary aggregation to energy consumption.
# MAGIC
# MAGIC ### Process
# MAGIC Please create the weather_data tables for the required aggregation, and the metadata with upgrades table, if you have not already done so, before running this notebook
# MAGIC
# MAGIC ##### Inputs:
# MAGIC ##### Inputs:
# MAGIC - `building_model.resstock_outputs_hourly`: ResStock Hourly output table
# MAGIC - `building_model.weather_data_yearly`: Contains yearly weather data. Can also use monthly or daily we just need to change the suffix
# MAGIC - `building_model.metadata_w_upgrades`: Contains metadata with upgrades.
# MAGIC
# MAGIC ##### Outputs:
# MAGIC ##### Outputs:
# MAGIC - `building_model.resstock_yearly_with_metadata_weather_upgrades`: Contains the final table used for modeling. Can also have a daily or monthly version as well
# MAGIC
# MAGIC
Expand All @@ -23,14 +23,15 @@
import pyspark.sql.functions as F
from pyspark.sql.functions import col
from pyspark.sql.functions import avg

spark.conf.set("spark.sql.shuffle.partitions", 1536)


# COMMAND ----------

resstock_path = 'building_model.resstock_outputs_hourly'
weather_data_full_path = 'building_model.weather_data_yearly'
metadata_path = 'building_model.metadata_w_upgrades'
resstock_path = "building_model.resstock_outputs_hourly"
weather_data_full_path = "building_model.weather_data_yearly"
metadata_path = "building_model.metadata_w_upgrades"

resstock = spark.table(resstock_path)
metadata = spark.table(metadata_path)
Expand All @@ -40,75 +41,161 @@

##define end uses by fuel type. And select the columns corresponding to them

heating_electric = ['out_electricity_heating_fans_pumps_energy_consumption_kwh', 'out_electricity_heating_hp_bkup_energy_consumption_kwh', 'out_electricity_heating_energy_consumption_kwh']
heating_electric = [
"out_electricity_heating_fans_pumps_energy_consumption_kwh",
"out_electricity_heating_hp_bkup_energy_consumption_kwh",
"out_electricity_heating_energy_consumption_kwh",
]

cooling_electric = ['out_electricity_cooling_fans_pumps_energy_consumption_kwh',
'out_electricity_cooling_energy_consumption_kwh']
cooling_electric = [
"out_electricity_cooling_fans_pumps_energy_consumption_kwh",
"out_electricity_cooling_energy_consumption_kwh",
]

heating_nat_gas = ['out_natural_gas_heating_hp_bkup_energy_consumption_kwh','out_natural_gas_heating_energy_consumption_kwh']
heating_nat_gas = [
"out_natural_gas_heating_hp_bkup_energy_consumption_kwh",
"out_natural_gas_heating_energy_consumption_kwh",
]

heating_fuel_oil =['out_fuel_oil_heating_hp_bkup_energy_consumption_kwh','out_fuel_oil_heating_energy_consumption_kwh']
heating_fuel_oil = [
"out_fuel_oil_heating_hp_bkup_energy_consumption_kwh",
"out_fuel_oil_heating_energy_consumption_kwh",
]

heating_propane = ['out_propane_heating_hp_bkup_energy_consumption_kwh',
'out_propane_heating_energy_consumption_kwh']
heating_propane = [
"out_propane_heating_hp_bkup_energy_consumption_kwh",
"out_propane_heating_energy_consumption_kwh",
]

# COMMAND ----------


resstock = (resstock.withColumn(
'out_electricity_heating_total', sum(resstock[col] for col in heating_electric)).withColumn(
'out_electricity_cooling_total', sum(resstock[col] for col in cooling_electric)).withColumn(
'out_natural_gas_heating_total', sum(resstock[col] for col in heating_nat_gas)).withColumn(
'out_fuel_oil_heating_total', sum(resstock[col] for col in heating_fuel_oil)).withColumn('out_propane_heating_total', sum(resstock[col] for col in heating_propane))
)

drop_list = heating_electric + cooling_electric + heating_fuel_oil + heating_nat_gas + heating_propane
resstock = (
resstock.withColumn(
"out_electricity_heating_total", sum(resstock[col] for col in heating_electric)
)
.withColumn(
"out_electricity_cooling_total", sum(resstock[col] for col in cooling_electric)
)
.withColumn(
"out_natural_gas_heating_total", sum(resstock[col] for col in heating_nat_gas)
)
.withColumn(
"out_fuel_oil_heating_total", sum(resstock[col] for col in heating_fuel_oil)
)
.withColumn(
"out_propane_heating_total", sum(resstock[col] for col in heating_propane)
)
)

drop_list = (
heating_electric
+ cooling_electric
+ heating_fuel_oil
+ heating_nat_gas
+ heating_propane
)
resstock = resstock.drop(*drop_list)

# COMMAND ----------

from pyspark.sql.functions import sum


def Create_full_data(resstock, metadata, weather, aggregation_level, table_write_path):
if aggregation_level == 'yearly':
resstock_yearly = (resstock).groupBy('building_id','upgrade_id').agg(
*[sum(col).alias("sum_" + col) for col in resstock.columns if col not in ['building_id', 'month','upgrade_id', 'day', 'hour', 'weekday', 'timestamp']])

resstock_yearly_with_metadata = (
resstock_yearly
.join(broadcast(metadata), on = ['building_id', 'upgrade_id']))

resstock_yearly_with_metadata_weather = (
resstock_yearly_with_metadata
.join(broadcast(weather), on = ['county_geoid']))
if aggregation_level == "yearly":
resstock_yearly = (
(resstock)
.groupBy("building_id", "upgrade_id")
.agg(
*[
sum(col).alias("sum_" + col)
for col in resstock.columns
if col
not in [
"building_id",
"month",
"upgrade_id",
"day",
"hour",
"weekday",
"timestamp",
]
]
)
)

resstock_yearly_with_metadata = resstock_yearly.join(
broadcast(metadata), on=["building_id", "upgrade_id"]
)

resstock_yearly_with_metadata_weather = resstock_yearly_with_metadata.join(
broadcast(weather), on=["county_geoid"]
)

resstock_yearly_with_metadata_weather.write.saveAsTable(table_write_path)

elif aggregation_level == 'monthly':
resstock_monthly = (resstock).groupBy('building_id', 'month', 'upgrade_id').agg(
*[sum(col).alias("sum_" + col) for col in resstock.columns if col not in ['building_id', 'month','upgrade_id', 'day', 'hour', 'weekday', 'timestamp']])

resstock_monthly_with_metadata = (
resstock_monthly
.join(broadcast(metadata), on = ['building_id', 'upgrade_id']))

resstock_monthly_with_metadata_weather = (
resstock_monthly_with_metadata
.join(broadcast(weather), on = ['county_geoid', 'month']))

elif aggregation_level == "monthly":
resstock_monthly = (
(resstock)
.groupBy("building_id", "month", "upgrade_id")
.agg(
*[
sum(col).alias("sum_" + col)
for col in resstock.columns
if col
not in [
"building_id",
"month",
"upgrade_id",
"day",
"hour",
"weekday",
"timestamp",
]
]
)
)

resstock_monthly_with_metadata = resstock_monthly.join(
broadcast(metadata), on=["building_id", "upgrade_id"]
)

resstock_monthly_with_metadata_weather = resstock_monthly_with_metadata.join(
broadcast(weather), on=["county_geoid", "month"]
)

resstock_monthly_with_metadata_weather.write.saveAsTable(table_write_path)

elif aggregation_level == 'daily':
resstock_daily = (resstock).groupBy('building_id', 'day', 'month', 'upgrade_id').agg(
*[sum(col).alias("sum_" + col) for col in resstock.columns if col not in ['building_id', 'month','upgrade_id', 'day', 'hour', 'weekday', 'timestamp']])

resstock_daily_with_metadata = (
resstock_daily
.join(broadcast(metadata), on = ['building_id', 'upgrade_id']))

resstock_daily_with_metadata_weather = (
resstock_daily_with_metadata
.join(broadcast(weather), on = ['county_geoid', 'day', 'month']))

elif aggregation_level == "daily":
resstock_daily = (
(resstock)
.groupBy("building_id", "day", "month", "upgrade_id")
.agg(
*[
sum(col).alias("sum_" + col)
for col in resstock.columns
if col
not in [
"building_id",
"month",
"upgrade_id",
"day",
"hour",
"weekday",
"timestamp",
]
]
)
)

resstock_daily_with_metadata = resstock_daily.join(
broadcast(metadata), on=["building_id", "upgrade_id"]
)

resstock_daily_with_metadata_weather = resstock_daily_with_metadata.join(
broadcast(weather), on=["county_geoid", "day", "month"]
)

resstock_daily_with_metadata_weather.write.saveAsTable(table_write_path)

Expand All @@ -120,4 +207,10 @@ def Create_full_data(resstock, metadata, weather, aggregation_level, table_write

table_write_path = "building_model.resstock_yearly_with_metadata_weather_upgrades"

Create_full_data(resstock = resstock, metadata = metadata, weather =weather, aggregation_level = 'yearly', table_write_path = table_write_path)
Create_full_data(
resstock=resstock,
metadata=metadata,
weather=weather,
aggregation_level="yearly",
table_write_path=table_write_path,
)
Loading

0 comments on commit c09fd40

Please sign in to comment.