Skip to content

Commit

Permalink
Merge pull request #2893 from catalyst-cooperative/nightly-build-2023…
Browse files Browse the repository at this point in the history
…-09-26

Merge dev into main for 2023-09-26
  • Loading branch information
zaneselvans authored Sep 26, 2023
2 parents 780fbc8 + f31087b commit 51c1da6
Show file tree
Hide file tree
Showing 7 changed files with 102 additions and 218 deletions.
2 changes: 1 addition & 1 deletion .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ repos:
# Formatters: hooks that re-write Python & documentation files
####################################################################################
- repo: https://github.com/astral-sh/ruff-pre-commit
rev: v0.0.290
rev: v0.0.291
hooks:
- id: ruff
args: [--fix, --exit-non-zero-on-fix]
Expand Down
5 changes: 3 additions & 2 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -13,14 +13,15 @@ license = { file = "LICENSE.txt" }
dependencies = [
"addfips>=0.4,<0.5",
"alembic>=1.10.3,<1.13",
"astroid>=2,<3",
"catalystcoop.dbfread>=3.0,<3.1",
"catalystcoop.ferc-xbrl-extractor==0.8.3",
"coloredlogs>=14.0,<15.1", # Dagster requires 14.0
"dagster-webserver>=1.4,<1.5", # 1.2.2 is first version to support Python 3.11
"dagster>=1.4,<1.5", # 1.2.2 is first version to support Python 3.11
"dask>=2021.8,<2023.9.2",
"dask>=2021.8,<2023.9.3",
"datapackage>=1.11,<1.16", # Transition datastore to use frictionless.
"fsspec>=2021.7,<2023.6.1", # For caching datastore on GCS
"fsspec>=2021.7,<2023.9.3", # For caching datastore on GCS
"geopandas>=0.13,<0.15",
"gcsfs>=2021.7,<2023.9.2", # For caching datastore on GCS
"grpcio<1.56.0",
Expand Down
172 changes: 13 additions & 159 deletions src/pudl/output/ferc1.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ class CalculationToleranceFerc1(BaseModel):
intertable_calculation_errors=0.20,
),
"balance_sheet_assets_ferc1": CalculationToleranceFerc1(
intertable_calculation_errors=0.85,
intertable_calculation_errors=0.65,
),
"balance_sheet_liabilities_ferc1": CalculationToleranceFerc1(
intertable_calculation_errors=0.07,
Expand Down Expand Up @@ -1369,10 +1369,6 @@ def boom(self: Self, tables_to_explode: dict[str, pd.DataFrame]) -> pd.DataFrame
exploded = (
self.initial_explosion_concatenation(tables_to_explode)
.pipe(self.generate_intertable_calculations)
.pipe(
self.reconcile_intertable_calculations,
self.calculation_tolerance.intertable_calculation_errors,
)
.pipe(self.calculation_forest.leafy_data, value_col=self.value_col)
)
# Identify which columns should be kept in the output...
Expand Down Expand Up @@ -1469,147 +1465,20 @@ def generate_intertable_calculations(
f"{self.root_table}: Reconcile inter-table calculations: "
f"{list(calculations_intertable.xbrl_factoid.unique())}."
)
# compile the lists of columns we are going to use later
calc_component_idx = ["table_name", "xbrl_factoid"] + self.other_dimensions
# Merge the reported data and the calculation component metadata to enable
# validation of calculated values. Here the data table exploded is supplying the
# values associated with individual calculation components, and the table_name
# and xbrl_factoid to which we aggregate are coming from the calculation
# components table. After merging we use the weights to adjust the reported
# values so they can be summed directly. This gives us aggregated calculated
# values that can later be compared to the higher level reported values.

# the validation is one_many in all instances expect for the xbrl_factoid
# construction_work_in_progress in the balance_sheet_assets_ferc1 explosion.
# this may be a problem in the calculations that we should track down in #2717
validate = (
"one_to_many"
if self.root_table != "balance_sheet_assets_ferc1"
else "many_to_many"
)
# we are going to merge the data onto the calc components with the _parent
# column names, so the groupby after the merge needs a set of by cols with the
# _parent suffix
meta_idx = [col for col in list(NodeId._fields) if col in self.exploded_pks]
gby_parent = [
f"{col}_parent" if col in meta_idx else col for col in self.exploded_pks
]
calc_df = (
pd.merge(
calculations_intertable,
exploded,
validate=validate,
on=calc_component_idx,
)
# apply the weight from the calc to convey the sign before summing.
.assign(calculated_amount=lambda x: x[self.value_col] * x.weight)
.groupby(gby_parent, as_index=False, dropna=False)[["calculated_amount"]]
.sum(min_count=1)
)
# remove the _parent suffix so we can merge these calculated values back onto
# the data using the original pks
calc_df.columns = calc_df.columns.str.removesuffix("_parent")
calculated_df = pd.merge(
exploded,
calc_df,
on=self.exploded_pks,
how="outer",
validate="1:1",
indicator=True,
)

assert calculated_df[
(calculated_df._merge == "right_only")
& (calculated_df[self.value_col].notnull())
].empty

calculated_df = calculated_df.drop(columns=["_merge"])
# Force value_col to be a float to prevent any hijinks with calculating differences.
calculated_df[self.value_col] = calculated_df[self.value_col].astype(float)

return calculated_df

def reconcile_intertable_calculations(
self: Self, calculated_df: pd.DataFrame, calculation_tolerance: float = 0.05
):
"""Ensure inter-table calculated values match reported values within a tolerance.
In addition to checking whether all reported "calculated" values match the output
of our repaired calculations, this function adds a correction record to the
dataframe that is included in the calculations so that after the fact the
calculations match exactly. This is only done when the fraction of records that
don't match within the tolerances of :func:`numpy.isclose` is below a set
threshold.
Note that only calculations which are off by a significant amount result in the
creation of a correction record. Many calculations are off from the reported values
by exaclty one dollar, presumably due to rounding errrors. These records typically
do not fail the :func:`numpy.isclose()` test and so are not corrected.
Args:
calculated_df: table with calculated fields
calculation_tolerance: What proportion (0-1) of calculated values are
allowed to be incorrect without raising an AssertionError.
"""
if "calculated_amount" not in calculated_df.columns:
return calculated_df

# Data types were very messy here, including pandas Float64 for the
# calculated_amount columns which did not work with the np.isclose(). Not sure
# why these are cropping up.
calculated_df = calculated_df.convert_dtypes(convert_floating=False).astype(
{self.value_col: "float64", "calculated_amount": "float64"}
calc_idx = [col for col in list(NodeId._fields) if col in self.exploded_pks]
calculated_df = pudl.transform.ferc1.calculate_values_from_components(
calculation_components=calculations_intertable,
data=exploded,
calc_idx=calc_idx,
value_col=self.value_col,
)
calculated_df = calculated_df.assign(
abs_diff=lambda x: abs(x[self.value_col] - x.calculated_amount),
rel_diff=lambda x: np.where(
(x[self.value_col] != 0.0),
abs(x.abs_diff / x[self.value_col]),
np.nan,
),
calculated_df = pudl.transform.ferc1.check_calculation_metrics(
calculated_df=calculated_df,
value_col=self.value_col,
calculation_tolerance=self.calculation_tolerance.intertable_calculation_errors,
table_name=self.root_table,
add_corrections=True,
)
off_df = calculated_df[
~np.isclose(calculated_df.calculated_amount, calculated_df[self.value_col])
& (calculated_df["abs_diff"].notnull())
]
calculated_values = calculated_df[(calculated_df.abs_diff.notnull())]
if calculated_values.empty:
# Will only occur if all reported values are NaN when calculated values
# exist, or vice versa.
logger.warning(
"Warning: No calculated values have a corresponding reported value in the table."
)
off_ratio = np.nan
else:
off_ratio = len(off_df) / len(calculated_values)
if off_ratio > calculation_tolerance:
raise AssertionError(
f"Calculations in {self.root_table} are off by {off_ratio:.2%}. Expected tolerance "
f"of {calculation_tolerance:.1%}."
)

# # We'll only get here if the proportion of calculations that are off is acceptable
if off_ratio > 0 or np.isnan(off_ratio):
logger.info(
f"{self.root_table}: has {len(off_df)} ({off_ratio:.02%}) records whose "
"calculations don't match. Adding correction records to make calculations "
"match reported values."
)
corrections = off_df.copy()

corrections[self.value_col] = (
corrections[self.value_col].fillna(0.0)
- corrections["calculated_amount"]
)
corrections["original_factoid"] = corrections["xbrl_factoid"]
corrections["xbrl_factoid"] = corrections["xbrl_factoid"] + "_correction"
corrections["row_type_xbrl"] = "correction"
corrections["is_within_table_calc"] = False
corrections["record_id"] = pd.NA

calculated_df = pd.concat(
[calculated_df, corrections], axis="index"
).reset_index(drop=True)
return calculated_df


Expand Down Expand Up @@ -1688,21 +1557,6 @@ def unique_associations(cls, v: pd.DataFrame, values) -> pd.DataFrame:
assert not v.duplicated(subset=pks, keep=False).any()
return v

@validator("exploded_calcs")
def single_valued_weights(cls, v: pd.DataFrame, values) -> pd.DataFrame:
"""Ensure that every calculation component has a uniquely specified weight."""
multi_valued_weights = (
v.groupby(values["calc_cols"], dropna=False)["weight"]
.transform("nunique")
.gt(1)
)
if multi_valued_weights.any():
logger.warning(
f"Found {sum(multi_valued_weights)} calculations with conflicting "
"weights."
)
return v

@validator("exploded_calcs")
def calcs_have_required_cols(cls, v: pd.DataFrame, values) -> pd.DataFrame:
"""Ensure exploded calculations include all required columns."""
Expand Down
Original file line number Diff line number Diff line change
@@ -1,13 +1,13 @@
table_name_parent,xbrl_factoid_parent,table_name,xbrl_factoid,weight,utility_type,plant_function,plant_status
balance_sheet_assets_ferc1,accumulated_provision_for_depreciation_amortization_and_depletion_of_plant_utility,utility_plant_summary_ferc1,abandonment_of_leases,,,,
balance_sheet_assets_ferc1,accumulated_provision_for_depreciation_amortization_and_depletion_of_plant_utility,utility_plant_summary_ferc1,accumulated_provision_for_depreciation_amortization_and_depletion_of_plant_utility,1.0,,,
balance_sheet_assets_ferc1,accumulated_provision_for_depreciation_amortization_and_depletion_of_plant_utility,utility_plant_summary_ferc1,accumulated_provision_for_depreciation_amortization_and_depletion_of_plant_utility,1.0,total,,
balance_sheet_assets_ferc1,accumulated_provision_for_depreciation_amortization_and_depletion_of_plant_utility,utility_plant_summary_ferc1,amortization_of_other_utility_plant_utility_plant_in_service,,,,
balance_sheet_assets_ferc1,accumulated_provision_for_depreciation_amortization_and_depletion_of_plant_utility,utility_plant_summary_ferc1,amortization_of_plant_acquisition_adjustment,,,,
balance_sheet_assets_ferc1,accumulated_provision_for_depreciation_amortization_and_depletion_of_plant_utility,utility_plant_summary_ferc1,depreciation_amortization_and_depletion_utility_plant_in_service,,,,
balance_sheet_assets_ferc1,accumulated_provision_for_depreciation_amortization_and_depletion_of_plant_utility,utility_plant_summary_ferc1,depreciation_amortization_and_depletion_utility_plant_leased_to_others,,,,
balance_sheet_assets_ferc1,accumulated_provision_for_depreciation_amortization_and_depletion_of_plant_utility,utility_plant_summary_ferc1,depreciation_and_amortization_utility_plant_held_for_future_use,,,,
balance_sheet_assets_ferc1,accumulated_provision_for_depreciation_amortization_and_depletion_of_plant_utility,utility_plant_summary_ferc1,depreciation_utility_plant_in_service,,,,
balance_sheet_assets_ferc1,construction_work_in_progress,utility_plant_summary_ferc1,construction_work_in_progress,1.0,,,
balance_sheet_assets_ferc1,construction_work_in_progress,utility_plant_summary_ferc1,construction_work_in_progress,1.0,total,,
balance_sheet_assets_ferc1,current_and_accrued_assets,balance_sheet_assets_ferc1,derivative_instrument_assets_hedges_long_term,,,,
balance_sheet_assets_ferc1,current_and_accrued_assets,balance_sheet_assets_ferc1,derivative_instrument_assets_long_term,,,,
balance_sheet_assets_ferc1,current_and_accrued_assets,balance_sheet_assets_ferc1,less_derivative_instrument_assets_hedges_long_term,-1.0,,,
Expand Down Expand Up @@ -138,7 +138,7 @@ retained_earnings_ferc1,unappropriated_undistributed_subsidiary_earnings,retaine
retained_earnings_ferc1,unappropriated_undistributed_subsidiary_earnings,retained_earnings_ferc1,unappropriated_undistributed_subsidiary_earnings_previous_year,1.0,,,
balance_sheet_liabilities_ferc1,unappropriated_undistributed_subsidiary_earnings,retained_earnings_ferc1,unappropriated_undistributed_subsidiary_earnings,1.0,,,
utility_plant_summary_ferc1,accumulated_provision_for_depreciation_amortization_and_depletion_of_plant_utility,utility_plant_summary_ferc1,depreciation_amortization_and_depletion_utility_plant_in_service,,,,
utility_plant_summary_ferc1,depreciation_utility_plant_in_service,electric_plant_depreciation_functional_ferc1,accumulated_depreciation,1.0,electric,total,total
utility_plant_summary_ferc1,depreciation_utility_plant_in_service,electric_plant_depreciation_functional_ferc1,accumulated_depreciation,1.0,electric,total,in_service
utility_plant_summary_ferc1,utility_plant_in_service_classified_and_unclassified,utility_plant_summary_ferc1,utility_plant_in_service_classified,,,,
utility_plant_summary_ferc1,utility_plant_in_service_classified_and_unclassified,utility_plant_summary_ferc1,utility_plant_in_service_classified_and_property_under_capital_leases,1.0,,,
utility_plant_summary_ferc1,utility_plant_in_service_classified_and_unclassified,utility_plant_summary_ferc1,utility_plant_in_service_property_under_capital_leases,,,,
Expand Down
8 changes: 4 additions & 4 deletions src/pudl/package_data/ferc1/xbrl_factoid_rate_base_tags.csv
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
xbrl_factoid_rmi,xbrl_factoid_rmi_renamed,xbrl_factoid,table_name,in_rate_base,utility_type,plant_function,plant_status,notes
accrued_utility_revenues,accrued_utility_revenues,accrued_utility_revenues,balance_sheet_assets_ferc1,yes,,,,"At the option of the utility, the estimated amount accrued for service rendered, but not billed at the end of any accounting period, may be included herein. In case accruals are made for unbilled revenues, they shall be made likewise for unbilled expenses, such as for the purchase of energy."
advances_for_gas,advances_for_gas,advances_for_gas,balance_sheet_assets_ferc1,yes,,,,"4/13/21 - changed from ""non-electric"" to ""mixed"" - could be for gas power plants."
allowances,allowances,allowances,electric_operating_expenses_ferc1,yes,,,,
allowances,allowances,allowance_inventory_and_withheld,balance_sheet_assets_ferc1,yes,,,,
cash,cash,cash,balance_sheet_assets_ferc1,yes,,,,
customer_accounts_receivable,customer_accounts_receivable,customer_accounts_receivable,balance_sheet_assets_ferc1,yes,,,,"service provided & billed/collected, asset earned on."
derivative_instrument_assets,derivative_instrument_assets,derivative_instrument_assets,balance_sheet_assets_ferc1,yes,,,,Ryan/David spoke - believe hedges to be part of rate base - capital put up by customers for their benefit?
Expand Down Expand Up @@ -115,10 +115,10 @@ unappropriated_undistributed_subsidiary_earnings,unappropriated_undistributed_su
unappropriated_undistributed_subsidiary_earnings,unappropriated_undistributed_subsidiary_earnings,unappropriated_undistributed_subsidiary_earnings,retained_earnings_ferc1,no,,,,
accounts_receivable_from_assoc_companies,accounts_receivable_from_assoc_companies,notes_receivable_from_associated_companies,balance_sheet_assets_ferc1,no,,,,Don't believe associated companies are involved in utility services?
accum_prov_for_uncollectible_acct_credit,accum_prov_for_uncollectible_acct_credit,accumulated_provision_for_uncollectible_accounts_credit,,yes,,,,Losses on accounts receivable which may become collectible.
cash_and_working_funds_(non_major_only),cash_and_working_funds_(non_major_only),cash_and_working_funds,,yes,,,,
derivative_instrument_assets___hedges,derivative_instrument_assets___hedges,derivative_instrument_assets_hedges,,yes,,,,Ryan/David spoke - believe hedges to be part of rate base - capital put up by customers for their benefit?
cash_and_working_funds_(non_major_only),cash_and_working_funds_(non_major_only),cash_and_working_funds,balance_sheet_assets_ferc1,yes,,,,
derivative_instrument_assets___hedges,derivative_instrument_assets___hedges,derivative_instrument_assets_hedges,balance_sheet_assets_ferc1,yes,,,,Ryan/David spoke - believe hedges to be part of rate base - capital put up by customers for their benefit?
gas_stored_underground___current,gas_stored_underground___current,gas_stored_current,balance_sheet_assets_ferc1,yes,,,,
long_term_portion_of_derivative_instrument_assets,long_term_portion_of_derivative_instrument_assets,derivative_instrument_assets_hedges_long_term,,yes,,,,Ryan/David spoke - believe hedges to be part of rate base - capital put up by customers for their benefit?
long_term_portion_of_derivative_instrument_assets,long_term_portion_of_derivative_instrument_assets,derivative_instrument_assets_hedges_long_term,balance_sheet_assets_ferc1,yes,,,,Ryan/David spoke - believe hedges to be part of rate base - capital put up by customers for their benefit?
long_term_portion_of_derivative_instrument_assets___hedges,long_term_portion_of_derivative_instrument_assets___hedges,derivative_instrument_assets_hedges_long_term,balance_sheet_assets_ferc1,yes,,,,Ryan/David spoke - believe hedges to be part of rate base - capital put up by customers for their benefit?
residuals_(elec)_and_extracted_products,residuals_(elec)_and_extracted_products,residuals,balance_sheet_assets_ferc1,yes,,,,
working_fund,working_fund,working_funds,balance_sheet_assets_ferc1,yes,,,,
Expand Down
Loading

0 comments on commit 51c1da6

Please sign in to comment.