Skip to content

Commit

Permalink
remove redundant upgrade id creation
Browse files Browse the repository at this point in the history
  • Loading branch information
mikivee committed Oct 3, 2024
1 parent 8be22ea commit 529bf88
Show file tree
Hide file tree
Showing 2 changed files with 3 additions and 13 deletions.
9 changes: 1 addition & 8 deletions scripts/build_feature_store.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,14 +43,7 @@
import pyspark.sql.functions as F
from databricks.feature_engineering import FeatureEngineeringClient
from pyspark.sql import DataFrame
# from pyspark.sql.column import Column
from pyspark.sql.types import (
#IntegerType,
#DoubleType,
StringType,
#StructType,
#StructField,
)
from pyspark.sql.types import StringType

from src.dmutils import sumo, data_cleaning

Expand Down
7 changes: 2 additions & 5 deletions src/dmutils/sumo.py
Original file line number Diff line number Diff line change
Expand Up @@ -572,8 +572,6 @@ def transform_building_features(building_metadata_table_name) -> DataFrame:
"""
building_metadata_features = (
spark.read.table("ml.surrogate_model.building_metadata")
# add upgrade id for baseline
.withColumn("upgrade_id", F.lit(0.0))
# -- filter to occupied sf homes with modeled fuels and without shared HVAC systems -- #
# sf homes only
.where(
Expand Down Expand Up @@ -838,9 +836,8 @@ def transform_building_features(building_metadata_table_name) -> DataFrame:
)
# subset to all possible features of interest
.select(
# primary keys
# primary key
"building_id",
F.col("upgrade_id").cast("double"),
# foreign key
"weather_file_city",
# structure
Expand Down Expand Up @@ -1009,7 +1006,7 @@ def apply_upgrades(baseline_building_features: DataFrame, upgrade_id: int) -> Da
raise ValueError(f"Upgrade id={upgrade_id} is not yet supported")

upgrade_building_features = (
baseline_building_features.withColumn("upgrade_id", F.lit(upgrade_id))
baseline_building_features.withColumn("upgrade_id", F.lit(upgrade_id).cast('double'))
.withColumn("has_heat_pump_dryer", F.lit(False))
.withColumn("has_induction_range", F.lit(False))
)
Expand Down

0 comments on commit 529bf88

Please sign in to comment.