From 046c5be6979a1cf0ba6141316c9f4ed32a50e919 Mon Sep 17 00:00:00 2001 From: Olivier Sprangers Date: Fri, 29 Nov 2024 10:42:28 +0100 Subject: [PATCH 1/2] clean_up_code --- hierarchicalforecast/_modidx.py | 2 - hierarchicalforecast/probabilistic_methods.py | 5 +- hierarchicalforecast/utils.py | 91 ++--- nbs/src/core.ipynb | 27 +- nbs/src/probabilistic_methods.ipynb | 5 +- nbs/src/utils.ipynb | 319 ++++-------------- 6 files changed, 140 insertions(+), 309 deletions(-) diff --git a/hierarchicalforecast/_modidx.py b/hierarchicalforecast/_modidx.py index 122f24d5..0e27fa2d 100644 --- a/hierarchicalforecast/_modidx.py +++ b/hierarchicalforecast/_modidx.py @@ -202,8 +202,6 @@ 'hierarchicalforecast/utils.py'), 'hierarchicalforecast.utils.aggregate': ( 'src/utils.html#aggregate', 'hierarchicalforecast/utils.py'), - 'hierarchicalforecast.utils.cov2corr': ( 'src/utils.html#cov2corr', - 'hierarchicalforecast/utils.py'), 'hierarchicalforecast.utils.is_strictly_hierarchical': ( 'src/utils.html#is_strictly_hierarchical', 'hierarchicalforecast/utils.py'), 'hierarchicalforecast.utils.level_to_outputs': ( 'src/utils.html#level_to_outputs', diff --git a/hierarchicalforecast/probabilistic_methods.py b/hierarchicalforecast/probabilistic_methods.py index c5cf844d..d5a90c8d 100644 --- a/hierarchicalforecast/probabilistic_methods.py +++ b/hierarchicalforecast/probabilistic_methods.py @@ -11,7 +11,7 @@ from scipy.stats import norm from sklearn.preprocessing import OneHotEncoder -from .utils import is_strictly_hierarchical, cov2corr +from .utils import is_strictly_hierarchical # %% ../nbs/src/probabilistic_methods.ipynb 6 class Normality: @@ -64,7 +64,8 @@ def __init__( # Base Normality Errors assume independence/diagonal covariance # TODO: replace bilinearity with elementwise row multiplication - R1 = cov2corr(self.W) + std_ = np.sqrt(np.diag(self.W)) + R1 = self.W / np.outer(std_, std_) Wh = [np.diag(sigma) @ R1 @ np.diag(sigma).T for sigma in self.sigmah.T] # Reconciled covariances across forecast horizon diff --git a/hierarchicalforecast/utils.py b/hierarchicalforecast/utils.py index aabeaa8d..2688559b 100644 --- a/hierarchicalforecast/utils.py +++ b/hierarchicalforecast/utils.py @@ -15,7 +15,7 @@ from narwhals.typing import Frame, FrameT from numba import njit, prange from sklearn.preprocessing import OneHotEncoder -from typing import Dict, List, Optional, Iterable, Union, Sequence +from typing import Dict, List, Optional, Union, Sequence # %% ../nbs/src/utils.ipynb 6 # Global variables @@ -44,7 +44,7 @@ def __exit__(self, exc_type, exc_value, traceback): ) # %% ../nbs/src/utils.ipynb 8 -def is_strictly_hierarchical(S: np.ndarray, tags: Dict[str, np.ndarray]): +def is_strictly_hierarchical(S: np.ndarray, tags: Dict[str, np.ndarray]) -> bool: # main idea: # if S represents a strictly hierarchical structure # the number of paths before the bottom level @@ -60,25 +60,10 @@ def is_strictly_hierarchical(S: np.ndarray, tags: Dict[str, np.ndarray]): nodes = levels_.popitem()[1].size return paths == nodes -# %% ../nbs/src/utils.ipynb 9 -def cov2corr(cov, return_std=False): - """convert covariance matrix to correlation matrix - **Parameters:**
- `cov`: array_like, 2d covariance matrix.
- `return_std`: bool=False, if True returned std.
- **Returns:**
- `corr`: ndarray (subclass) correlation matrix - """ - cov = np.asanyarray(cov) - std_ = np.sqrt(np.diag(cov)) - corr = cov / np.outer(std_, std_) - if return_std: - return corr, std_ - else: - return corr - -# %% ../nbs/src/utils.ipynb 11 -def _to_upper_hierarchy(bottom_split, bottom_values, upper_key): +# %% ../nbs/src/utils.ipynb 10 +def _to_upper_hierarchy( + bottom_split: List[str], bottom_values: str, upper_key: str +) -> List[str]: upper_split = upper_key.split("/") upper_idxs = [bottom_split.index(i) for i in upper_split] @@ -88,7 +73,7 @@ def join_upper(bottom_value): return [join_upper(val) for val in bottom_values] -# %% ../nbs/src/utils.ipynb 14 +# %% ../nbs/src/utils.ipynb 11 def aggregate( df: Frame, spec: List[List[str]], @@ -139,16 +124,18 @@ def aggregate( raise ValueError("Sparse output is only supported for Pandas DataFrames.") for col in df_nw.columns: - assert ( - not df_nw[col].is_null().any() - ), f"Column {col} contains null values. Make sure no column in the DataFrame contains null values." + if df_nw[col].is_null().any(): + raise ValueError( + f"Column {col} contains null values. Make sure no column in the DataFrame contains null values." + ) # Check whether all columns in the spec are in the df aggregation_cols_in_spec = list( dict.fromkeys([col for cols in spec for col in cols]) ) for col in aggregation_cols_in_spec: - assert col in df_nw.columns, f"Column {col} in spec not present in df" + if col not in df_nw.columns: + raise ValueError(f"Column {col} in spec not present in df") # Prepare the aggregation dictionary agg_dict = dict( @@ -247,7 +234,7 @@ def aggregate( return Y_df, S_df, tags -# %% ../nbs/src/utils.ipynb 30 +# %% ../nbs/src/utils.ipynb 25 class HierarchicalPlot: """Hierarchical Plot @@ -532,9 +519,9 @@ def plot_hierarchical_predictions_gap( plt.grid() plt.show() -# %% ../nbs/src/utils.ipynb 51 +# %% ../nbs/src/utils.ipynb 46 # convert levels to output quantile names -def level_to_outputs(level: Iterable[int]): +def level_to_outputs(level: List[int]) -> tuple[List[float], List[str]]: """Converts list of levels into output names matching StatsForecast and NeuralForecast methods. **Parameters:**
@@ -558,7 +545,7 @@ def level_to_outputs(level: Iterable[int]): # convert quantiles to output quantile names -def quantiles_to_outputs(quantiles: Iterable[float]): +def quantiles_to_outputs(quantiles: List[float]) -> tuple[List[float], List[str]]: """Converts list of quantiles into output names matching StatsForecast and NeuralForecast methods. **Parameters:**
@@ -577,7 +564,7 @@ def quantiles_to_outputs(quantiles: Iterable[float]): output_names.append("-median") return quantiles, output_names -# %% ../nbs/src/utils.ipynb 52 +# %% ../nbs/src/utils.ipynb 47 # given input array of sample forecasts and inptut quantiles/levels, # output a Pandas Dataframe with columns of quantile predictions def samples_to_quantiles_df( @@ -586,10 +573,11 @@ def samples_to_quantiles_df( dates: List[str], quantiles: Optional[List[float]] = None, level: Optional[List[int]] = None, - model_name: Optional[str] = "model", + model_name: str = "model", id_col: str = "unique_id", time_col: str = "ds", -): + backend: str = "pandas", +) -> tuple[List[float], FrameT]: """Transform Random Samples into HierarchicalForecast input. Auxiliary function to create compatible HierarchicalForecast input `Y_hat_df` dataframe. @@ -602,26 +590,35 @@ def samples_to_quantiles_df( `model_name`: string. Name of forecasting model.
`id_col` : str='unique_id', column that identifies each serie.
`time_col` : str='ds', column that identifies each timestep, its values can be timestamps or integers.
+ `backend` : str='pandas', backend to use for the output dataframe, either 'pandas' or 'polars'.
**Returns:**
`quantiles`: float list in [0., 1.]. quantiles to estimate from y distribution .
- `Y_hat_df`: pd.DataFrame. With base quantile forecasts with columns ds and models to reconcile indexed by unique_id. + `Y_hat_df`: DataFrame. With base quantile forecasts with columns ds and models to reconcile indexed by unique_id. """ # Get the shape of the array n_series, n_samples, horizon = samples.shape - assert n_series == len(unique_ids) - assert horizon == len(dates) - assert (quantiles is not None) ^ ( - level is not None - ) # check exactly one of quantiles/levels has been input + if n_series != len(unique_ids): + raise ValueError( + f"Number of unique_ids ({len(unique_ids)}) must match the number of series ({n_series})." + ) + if horizon != len(dates): + raise ValueError( + f"Number of dates ({len(dates)}) must match third dimension of samples array ({horizon})." + ) + if not ((quantiles is None) ^ (level is None)): + raise ValueError("Either quantiles or level must be provided, but not both.") + + namespace = sys.modules.get(backend, None) + if namespace is None: + raise ValueError(f"DataFrame backend {backend} not installed.") # create initial dictionary forecasts_mean = np.mean(samples, axis=1).flatten() unique_ids = np.repeat(unique_ids, horizon) ds = np.tile(dates, n_series) - data = pd.DataFrame({id_col: unique_ids, time_col: ds, model_name: forecasts_mean}) # create quantiles and quantile names if level is not None: @@ -642,11 +639,15 @@ def samples_to_quantiles_df( ) # [Q,H,N] -> [N,H,Q] forecasts_quantiles = forecasts_quantiles.reshape(-1, len(_quantiles)) - df = pd.DataFrame(data=forecasts_quantiles, columns=col_names) + df_nw = nw.from_dict( + {id_col: unique_ids, time_col: ds, model_name: forecasts_mean}, + native_namespace=namespace, + ) + df_nw = df_nw.with_columns(**dict(zip(col_names, forecasts_quantiles.T))) - return _quantiles, pd.concat([data, df], axis=1).set_index(id_col) + return _quantiles, df_nw.to_native() -# %% ../nbs/src/utils.ipynb 59 +# %% ../nbs/src/utils.ipynb 55 # Masked empirical covariance matrix @njit( "Array(float64, 2, 'F')(Array(float64, 2, 'C'), Array(bool, 2, 'C'))", @@ -685,7 +686,7 @@ def _ma_cov(residuals: np.ndarray, not_nan_mask: np.ndarray): return W -# %% ../nbs/src/utils.ipynb 60 +# %% ../nbs/src/utils.ipynb 56 # Shrunk covariance matrix using the Schafer-Strimmer method @@ -836,7 +837,7 @@ def _shrunk_covariance_schaferstrimmer_with_nans( return W -# %% ../nbs/src/utils.ipynb 62 +# %% ../nbs/src/utils.ipynb 58 # Lasso cyclic coordinate descent @njit( "Array(float64, 1, 'C')(Array(float64, 2, 'C'), Array(float64, 1, 'C'), float64, int64, float64)", diff --git a/nbs/src/core.ipynb b/nbs/src/core.ipynb index af26b382..83c5e33d 100644 --- a/nbs/src/core.ipynb +++ b/nbs/src/core.ipynb @@ -69,7 +69,7 @@ "import os\n", "\n", "from fastcore.test import test_close, test_eq, test_fail\n", - "from nbdev.showdoc import add_docs, show_doc\n", + "from nbdev.showdoc import show_doc\n", "import pandas as pd" ] }, @@ -1282,7 +1282,7 @@ "#| hide\n", "from statsforecast import StatsForecast\n", "from statsforecast.utils import generate_series\n", - "from statsforecast.models import RandomWalkWithDrift, AutoETS" + "from statsforecast.models import RandomWalkWithDrift" ] }, { @@ -1294,7 +1294,7 @@ "#| hide\n", "# test unbalanced dataset\n", "max_tenure = 24\n", - "dates = pd.date_range(start='2019-01-31', freq='M', periods=max_tenure)\n", + "dates = pd.date_range(start='2019-01-31', freq='ME', periods=max_tenure)\n", "cohort_tenure = [24, 23, 22, 21]\n", "\n", "ts_list = []\n", @@ -1302,7 +1302,7 @@ "# Create ts for each cohort\n", "for i in range(len(cohort_tenure)):\n", " ts_list.append(\n", - " generate_series(n_series=1, freq='M', min_length=cohort_tenure[i], max_length=cohort_tenure[i]).reset_index() \\\n", + " generate_series(n_series=1, freq='ME', min_length=cohort_tenure[i], max_length=cohort_tenure[i]).reset_index() \\\n", " .assign(ult=i) \\\n", " .assign(ds=dates[-cohort_tenure[i]:]) \\\n", " .drop(columns=['unique_id'])\n", @@ -1328,7 +1328,7 @@ " models=[\n", " RandomWalkWithDrift(),\n", " ],\n", - " freq='M',\n", + " freq='ME',\n", " n_jobs=1,\n", ")\n", "\n", @@ -1343,8 +1343,8 @@ "fitted_df = fcst.forecast_fitted_values()\n", "\n", "fcst_df = hrec.reconcile(\n", - " Y_hat_df=fcst_df.reset_index(),\n", - " Y_df=fitted_df.reset_index(),\n", + " Y_hat_df=fcst_df,\n", + " Y_df=fitted_df,\n", " S=S_df,\n", " tags=tags,\n", ")" @@ -1380,15 +1380,18 @@ " ]\n", ")\n", "\n", - "fcst_df = fcst.forecast(df=train_df, h=12, fitted=True)\n", + "fcst_df_pl = fcst.forecast(df=train_df, h=12, fitted=True)\n", "fitted_df = fcst.forecast_fitted_values()\n", "\n", - "fcst_df = hrec.reconcile(\n", - " Y_hat_df=fcst_df,\n", + "fcst_df_pl = hrec.reconcile(\n", + " Y_hat_df=fcst_df_pl,\n", " Y_df=fitted_df,\n", " S=S_df_pl,\n", " tags=tags,\n", - ")" + ")\n", + "\n", + "# Test equivalence\n", + "pd.testing.assert_frame_equal(fcst_df, fcst_df_pl.to_pandas())" ] }, { @@ -1744,7 +1747,7 @@ "import pandas as pd\n", "\n", "from statsforecast.core import StatsForecast\n", - "from statsforecast.models import AutoETS, Naive\n", + "from statsforecast.models import AutoETS\n", "\n", "from hierarchicalforecast.utils import aggregate\n", "from hierarchicalforecast.core import HierarchicalReconciliation\n", diff --git a/nbs/src/probabilistic_methods.ipynb b/nbs/src/probabilistic_methods.ipynb index d280f893..f91e770a 100644 --- a/nbs/src/probabilistic_methods.ipynb +++ b/nbs/src/probabilistic_methods.ipynb @@ -42,7 +42,7 @@ "from scipy.stats import norm\n", "from sklearn.preprocessing import OneHotEncoder\n", "\n", - "from hierarchicalforecast.utils import is_strictly_hierarchical, cov2corr" + "from hierarchicalforecast.utils import is_strictly_hierarchical" ] }, { @@ -120,7 +120,8 @@ "\n", " # Base Normality Errors assume independence/diagonal covariance\n", " # TODO: replace bilinearity with elementwise row multiplication\n", - " R1 = cov2corr(self.W)\n", + " std_ = np.sqrt(np.diag(self.W))\n", + " R1 = self.W / np.outer(std_, std_) \n", " Wh = [np.diag(sigma) @ R1 @ np.diag(sigma).T for sigma in self.sigmah.T]\n", "\n", " # Reconciled covariances across forecast horizon\n", diff --git a/nbs/src/utils.ipynb b/nbs/src/utils.ipynb index e226694c..479b6bc0 100644 --- a/nbs/src/utils.ipynb +++ b/nbs/src/utils.ipynb @@ -50,7 +50,7 @@ "from narwhals.typing import Frame, FrameT\n", "from numba import njit, prange\n", "from sklearn.preprocessing import OneHotEncoder\n", - "from typing import Dict, List, Optional, Iterable, Union, Sequence" + "from typing import Dict, List, Optional, Union, Sequence" ] }, { @@ -62,8 +62,7 @@ "source": [ "#| hide\n", "import os\n", - "import warnings\n", - "from nbdev.showdoc import add_docs, show_doc\n", + "from nbdev.showdoc import show_doc\n", "from fastcore.test import test_eq, test_close, test_fail\n", "from statsforecast.utils import generate_series" ] @@ -126,7 +125,7 @@ "source": [ "#| exporti\n", "def is_strictly_hierarchical(S: np.ndarray, \n", - " tags: Dict[str, np.ndarray]):\n", + " tags: Dict[str, np.ndarray]) -> bool:\n", " # main idea:\n", " # if S represents a strictly hierarchical structure\n", " # the number of paths before the bottom level\n", @@ -143,31 +142,6 @@ " return paths == nodes" ] }, - { - "cell_type": "code", - "execution_count": null, - "id": "da433b2e", - "metadata": {}, - "outputs": [], - "source": [ - "#| exporti\n", - "def cov2corr(cov, return_std=False):\n", - " \"\"\" convert covariance matrix to correlation matrix\n", - " **Parameters:**
\n", - " `cov`: array_like, 2d covariance matrix.
\n", - " `return_std`: bool=False, if True returned std.
\n", - " **Returns:**
\n", - " `corr`: ndarray (subclass) correlation matrix\n", - " \"\"\"\n", - " cov = np.asanyarray(cov)\n", - " std_ = np.sqrt(np.diag(cov))\n", - " corr = cov / np.outer(std_, std_)\n", - " if return_std:\n", - " return corr, std_\n", - " else:\n", - " return corr" - ] - }, { "cell_type": "markdown", "id": "3a1f4267", @@ -184,7 +158,7 @@ "outputs": [], "source": [ "#| exporti\n", - "def _to_upper_hierarchy(bottom_split, bottom_values, upper_key):\n", + "def _to_upper_hierarchy(bottom_split: List[str], bottom_values: str, upper_key: str) -> List[str]:\n", " upper_split = upper_key.split('/')\n", " upper_idxs = [bottom_split.index(i) for i in upper_split]\n", "\n", @@ -195,135 +169,6 @@ " return [join_upper(val) for val in bottom_values]" ] }, - { - "cell_type": "code", - "execution_count": null, - "id": "f9fdc577", - "metadata": {}, - "outputs": [], - "source": [ - "#| hide\n", - "import warnings" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "6be82d73", - "metadata": {}, - "outputs": [], - "source": [ - "#| hide\n", - "def aggregate_old(\n", - " df: pd.DataFrame,\n", - " spec: List[List[str]],\n", - " exog_vars: Optional[Dict[str, Union[str, List[str]]]] = None,\n", - " is_balanced: bool = False,\n", - " sparse_s: bool = False,\n", - "):\n", - " \"\"\"Utils Aggregation Function.\n", - " Aggregates bottom level series contained in the pandas DataFrame `df` according\n", - " to levels defined in the `spec` list.\n", - "\n", - " Parameters\n", - " ----------\n", - " df : pandas DataFrame\n", - " Dataframe with columns `['ds', 'y']` and columns to aggregate.\n", - " spec : list of list of str\n", - " List of levels. Each element of the list should contain a list of columns of `df` to aggregate.\n", - " exog_vars: dictionary of string keys & values that can either be a list of strings or a single string\n", - " keys correspond to column names and the values represent the aggregation(s) that will be applied to each column. Accepted values are those from Pandas aggregation Functions, check the Pandas docs for guidance\n", - " is_balanced : bool (default=False)\n", - " Deprecated.\n", - " sparse_s : bool (default=False)\n", - " Return `S_df` as a sparse dataframe.\n", - "\n", - " Returns\n", - " -------\n", - " Y_df : pandas DataFrame\n", - " Hierarchically structured series.\n", - " S_df : pandas DataFrame\n", - " Summing dataframe.\n", - " tags : dict\n", - " Aggregation indices.\n", - " \"\"\"\n", - " # Checks\n", - " if df.isnull().values.any():\n", - " raise ValueError('`df` contains null values')\n", - " if is_balanced:\n", - " warnings.warn(\n", - " \"`is_balanced` is deprecated and will be removed in a future version. \"\n", - " \"Don't set this argument to suppress this warning.\",\n", - " category=DeprecationWarning,\n", - " )\n", - "\n", - " \n", - " # compute aggregations and tags\n", - " spec = sorted(spec, key=len)\n", - " bottom = spec[-1]\n", - " aggs = []\n", - " tags = {}\n", - " # Prepare the aggregation dictionary\n", - " agg_dict = {\n", - " \"y\": (\"y\", \"sum\")\n", - " }\n", - "\n", - " # Check if exog_vars are present in df & add to the aggregation dictionary if it is not None\n", - " if exog_vars is not None:\n", - " missing_vars = [var for var in exog_vars.keys() if var not in df.columns]\n", - " if missing_vars:\n", - " raise ValueError(f\"The following exogenous variables are not present in the DataFrame: {', '.join(missing_vars)}\") \n", - " else:\n", - " # Update agg_dict to handle multiple aggregations for each exog_vars key\n", - " for key, agg_func in exog_vars.items():\n", - " # Ensure agg_func is a list\n", - " if isinstance(agg_func, str): # If it's a single string, convert to list\n", - " agg_func = [agg_func]\n", - " elif not isinstance(agg_func, list): # Raise an error if it's neither\n", - " raise ValueError(f\"Aggregation functions for '{key}' must be a string or a list of strings.\")\n", - " \n", - " for func in agg_func:\n", - " agg_dict[f\"{key}_{func}\"] = (key, func) # Update the agg_dict with the new naming structure\n", - "\n", - " # Perform the aggregation\n", - " for levels in spec:\n", - " agg = df.groupby(levels + ['ds'], observed=True).agg(**agg_dict)\n", - " if not agg.index.is_monotonic_increasing:\n", - " agg = agg.sort_index()\n", - " agg = agg.reset_index('ds')\n", - " group = agg.index.get_level_values(0)\n", - " if not pd.api.types.is_string_dtype(group.dtype):\n", - " group = group.astype(str)\n", - " for level in levels[1:]:\n", - " group = group + '/' + agg.index.get_level_values(level).str.replace('/', '_')\n", - " agg.index = group\n", - " agg.index.name = 'unique_id'\n", - " tags['/'.join(levels)] = group.unique().values\n", - " aggs.append(agg)\n", - " Y_df = pd.concat(aggs)\n", - "\n", - " # construct S\n", - " bottom_key = '/'.join(bottom)\n", - " bottom_levels = tags[bottom_key]\n", - " S = np.empty((len(bottom_levels), len(spec)), dtype=object)\n", - " for j, levels in enumerate(spec[:-1]):\n", - " S[:, j] = _to_upper_hierarchy(bottom, bottom_levels, '/'.join(levels))\n", - " S[:, -1] = tags[bottom_key]\n", - "\n", - " categories = list(tags.values())\n", - " try:\n", - " encoder = OneHotEncoder(categories=categories, sparse_output=sparse_s, dtype=np.float64)\n", - " except TypeError: # sklearn < 1.2\n", - " encoder = OneHotEncoder(categories=categories, sparse=sparse_s, dtype=np.float64) \n", - " S = encoder.fit_transform(S).T\n", - " if sparse_s:\n", - " df_constructor = pd.DataFrame.sparse.from_spmatrix\n", - " else:\n", - " df_constructor = pd.DataFrame\n", - " S_df = df_constructor(S, index=np.hstack(categories), columns=bottom_levels)\n", - " return Y_df, S_df, tags" - ] - }, { "cell_type": "code", "execution_count": null, @@ -382,12 +227,14 @@ " raise ValueError(\"Sparse output is only supported for Pandas DataFrames.\")\n", " \n", " for col in df_nw.columns:\n", - " assert not df_nw[col].is_null().any(), f\"Column {col} contains null values. Make sure no column in the DataFrame contains null values.\"\n", + " if df_nw[col].is_null().any():\n", + " raise ValueError(f\"Column {col} contains null values. Make sure no column in the DataFrame contains null values.\")\n", "\n", " # Check whether all columns in the spec are in the df\n", " aggregation_cols_in_spec = list(dict.fromkeys([col for cols in spec for col in cols]))\n", " for col in aggregation_cols_in_spec:\n", - " assert col in df_nw.columns, f\"Column {col} in spec not present in df\"\n", + " if col not in df_nw.columns:\n", + " raise ValueError(f\"Column {col} in spec not present in df\")\n", "\n", " # Prepare the aggregation dictionary \n", " agg_dict = dict(zip(target_cols, tuple(zip(target_cols, len(target_cols)*[\"sum\"]))))\n", @@ -569,23 +416,6 @@ " test_eq(tags[tag], tags_f[tag]) " ] }, - { - "cell_type": "code", - "execution_count": null, - "id": "db5a1398", - "metadata": {}, - "outputs": [], - "source": [ - "#| hide\n", - "# test against old aggregation function\n", - "Y_df_old, S_df_old, tags_old = aggregate_old(df, spec)\n", - "\n", - "test_eq(Y_df_old.reset_index(), Y_df)\n", - "test_eq(S_df_old.reset_index(names=\"unique_id\"), S_df) \n", - "for tag in tags:\n", - " test_eq(tags[tag], tags_old[tag])" - ] - }, { "cell_type": "code", "execution_count": null, @@ -633,10 +463,7 @@ " ['pen', 'ult'],\n", "]\n", "\n", - "hier_df, S_df, tags = aggregate(df=df, spec=hier_levels)\n", - "hier_df_old, S_df_old, _ = aggregate_old(df=df, spec=hier_levels)\n", - "test_eq(S_df, S_df_old.reset_index(names=\"unique_id\"))\n", - "test_eq(hier_df, hier_df_old.reset_index(names=\"unique_id\"))" + "hier_df, S_df, tags = aggregate(df=df, spec=hier_levels)" ] }, { @@ -676,11 +503,6 @@ "test_eq(hier_df[\"unique_id\"].unique(), S_df[\"unique_id\"])\n", "test_eq(len(tags), len(hiers_strictly)) \n", "\n", - "# Test old vs new\n", - "hier_df_old, S_df_old, tags_old = aggregate_old(df=df, spec=hiers_strictly)\n", - "test_eq(hier_df, hier_df_old.reset_index())\n", - "test_eq(S_df, S_df_old.reset_index(names=\"unique_id\"))\n", - "\n", "# grouped structure\n", "hiers_grouped = [['Country'],\n", " ['Country', 'State'], \n", @@ -696,14 +518,7 @@ "test_eq(hier_df[\"unique_id\"].nunique(), 425)\n", "test_eq(S_df.shape, (425, 305))\n", "test_eq(hier_df[\"unique_id\"].unique(), S_df[\"unique_id\"])\n", - "test_eq(len(tags), len(hiers_grouped))\n", - "\n", - "# Test old vs new - equivalent up to a different sorting, tbd if this is fine\n", - "hier_df_old, S_df_old, tags_old = aggregate_old(df=df, spec=hiers_grouped)\n", - "test_eq(hier_df.sort_values(by=[\"unique_id\", \"ds\"], ignore_index=True), \n", - " hier_df_old.reset_index().sort_values(by=[\"unique_id\", \"ds\"], ignore_index=True))\n", - "test_eq(S_df.sort_values(by=\"unique_id\", ignore_index=True)[S_df.columns], \n", - " S_df_old.reset_index(names=\"unique_id\").sort_values(by=\"unique_id\", ignore_index=True)[S_df.columns])" + "test_eq(len(tags), len(hiers_grouped))" ] }, { @@ -777,41 +592,6 @@ ")" ] }, - { - "cell_type": "code", - "execution_count": null, - "id": "f4b3828f-bbcc-4116-a969-49c78c33bf72", - "metadata": {}, - "outputs": [], - "source": [ - "#| hide\n", - "# Test equality of aggregation and aggregation_before\n", - "for name, spec in zip(['strict', 'grouped'], [hiers_strictly, hiers_grouped]):\n", - " with CodeTimer(f'{name} aggregation before'):\n", - " Y_df_old, S_df_old, tags_old = aggregate_old(df=df, spec=spec)\n", - " \n", - " with CodeTimer(f'{name} aggregation now'):\n", - " Y_df, S_df, tags = aggregate(df=df, spec=spec)\n", - "\n", - " Y_df = Y_df.sort_values(by=[\"unique_id\", \"ds\"], ignore_index=True)\n", - " Y_df_old = Y_df_old.reset_index().sort_values(by=[\"unique_id\", \"ds\"], ignore_index=True)\n", - "\n", - " S_df = S_df.sort_values(by=\"unique_id\", ignore_index=True)[S_df.columns]\n", - " S_df_old = S_df_old.reset_index(names=\"unique_id\").sort_values(by=\"unique_id\", ignore_index=True)[S_df.columns]\n", - " \n", - " np.testing.assert_allclose(\n", - " Y_df['y'].values,\n", - " Y_df_old['y'].values,\n", - " )\n", - " np.testing.assert_equal(S_df.values, S_df_old.values)\n", - " \n", - " test_eq(S_df.columns, S_df_old.columns)\n", - " test_eq(S_df.index, S_df_old.index)\n", - " \n", - " test_eq(Y_df.columns, Y_df_old.columns)\n", - " test_eq(Y_df.index, Y_df_old.index)" - ] - }, { "cell_type": "code", "execution_count": null, @@ -1421,7 +1201,7 @@ "#| exporti\n", "\n", "# convert levels to output quantile names\n", - "def level_to_outputs(level:Iterable[int]):\n", + "def level_to_outputs(level: List[int]) -> tuple[List[float], List[str]]:\n", " \"\"\" Converts list of levels into output names matching StatsForecast and NeuralForecast methods.\n", "\n", " **Parameters:**
\n", @@ -1444,7 +1224,7 @@ " return quantiles, output_names\n", "\n", "# convert quantiles to output quantile names\n", - "def quantiles_to_outputs(quantiles:Iterable[float]):\n", + "def quantiles_to_outputs(quantiles: List[float]) -> tuple[List[float], List[str]]:\n", " \"\"\"Converts list of quantiles into output names matching StatsForecast and NeuralForecast methods.\n", "\n", " **Parameters:**
\n", @@ -1480,10 +1260,11 @@ " dates: List[str], \n", " quantiles: Optional[List[float]] = None,\n", " level: Optional[List[int]] = None, \n", - " model_name: Optional[str] = \"model\",\n", + " model_name: str = \"model\",\n", " id_col: str = 'unique_id',\n", " time_col: str = 'ds',\n", - " ):\n", + " backend: str = 'pandas',\n", + " ) -> tuple[List[float], FrameT]:\n", " \"\"\" Transform Random Samples into HierarchicalForecast input.\n", " Auxiliary function to create compatible HierarchicalForecast input `Y_hat_df` dataframe.\n", "\n", @@ -1496,24 +1277,35 @@ " `model_name`: string. Name of forecasting model.
\n", " `id_col` : str='unique_id', column that identifies each serie.
\n", " `time_col` : str='ds', column that identifies each timestep, its values can be timestamps or integers.
\n", + " `backend` : str='pandas', backend to use for the output dataframe, either 'pandas' or 'polars'.
\n", "\n", " **Returns:**
\n", " `quantiles`: float list in [0., 1.]. quantiles to estimate from y distribution .
\n", - " `Y_hat_df`: pd.DataFrame. With base quantile forecasts with columns ds and models to reconcile indexed by unique_id.\n", + " `Y_hat_df`: DataFrame. With base quantile forecasts with columns ds and models to reconcile indexed by unique_id.\n", " \"\"\"\n", " \n", " # Get the shape of the array\n", " n_series, n_samples, horizon = samples.shape\n", "\n", - " assert n_series == len(unique_ids)\n", - " assert horizon == len(dates)\n", - " assert (quantiles is not None) ^ (level is not None) #check exactly one of quantiles/levels has been input\n", + " if n_series != len(unique_ids):\n", + " raise ValueError(\n", + " f\"Number of unique_ids ({len(unique_ids)}) must match the number of series ({n_series}).\"\n", + " )\n", + " if horizon != len(dates):\n", + " raise ValueError(\n", + " f\"Number of dates ({len(dates)}) must match third dimension of samples array ({horizon}).\"\n", + " )\n", + " if not ((quantiles is None) ^ (level is None)):\n", + " raise ValueError(\"Either quantiles or level must be provided, but not both.\")\n", + "\n", + " namespace = sys.modules.get(backend, None)\n", + " if namespace is None:\n", + " raise ValueError(f\"DataFrame backend {backend} not installed.\")\n", "\n", " #create initial dictionary\n", " forecasts_mean = np.mean(samples, axis=1).flatten()\n", " unique_ids = np.repeat(unique_ids, horizon)\n", " ds = np.tile(dates, n_series)\n", - " data = pd.DataFrame({id_col:unique_ids, time_col:ds, model_name:forecasts_mean})\n", "\n", " #create quantiles and quantile names\n", " if level is not None:\n", @@ -1529,11 +1321,11 @@ "\n", " forecasts_quantiles = np.transpose(forecasts_quantiles, (1,2,0)) # [Q,H,N] -> [N,H,Q]\n", " forecasts_quantiles = forecasts_quantiles.reshape(-1,len(_quantiles))\n", - "\n", - " df = pd.DataFrame(data=forecasts_quantiles, \n", - " columns=col_names)\n", " \n", - " return _quantiles, pd.concat([data,df], axis=1).set_index(id_col)" + " df_nw = nw.from_dict({id_col:unique_ids, time_col:ds, model_name:forecasts_mean}, native_namespace=namespace)\n", + " df_nw = df_nw.with_columns(**dict(zip(col_names, forecasts_quantiles.T))) \n", + "\n", + " return _quantiles, df_nw.to_native()" ] }, { @@ -1604,10 +1396,45 @@ ")\n", "test_eq(\n", " ret_df_1.columns,\n", - " ['ds', 'model', 'model-median', 'model-lo-90', 'model-lo-50', 'model-lo-10', 'model-hi-10', 'model-hi-50', 'model-hi-90']\n", + " ['unique_id', 'ds', 'model', 'model-median', 'model-lo-90', 'model-lo-50', 'model-lo-10', 'model-hi-10', 'model-hi-50', 'model-hi-90']\n", + ")\n", + "test_eq(\n", + " ret_df_1[\"unique_id\"].values,\n", + " ['id1', 'id1', 'id1', 'id1', 'id1', 'id1', 'id1', 'id1', 'id1', 'id1',\n", + " 'id2', 'id2', 'id2', 'id2', 'id2', 'id2', 'id2', 'id2', 'id2', 'id2',\n", + " 'id3', 'id3', 'id3', 'id3', 'id3', 'id3', 'id3', 'id3', 'id3', 'id3']\n", + ")\n", + "test_eq(\n", + " ret_quantiles_1, ret_quantiles_2\n", + ")\n", + "test_eq(\n", + " ret_df_1[\"unique_id\"], ret_df_2[\"unique_id\"]\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d6eeb27e", + "metadata": {}, + "outputs": [], + "source": [ + "#| hide\n", + "# polars\n", + "\n", + "ret_quantiles_1, ret_df_1 = samples_to_quantiles_df(samples, unique_ids, dates, level=level, backend='polars')\n", + "ret_quantiles_2, ret_df_2 = samples_to_quantiles_df(samples, unique_ids, dates, quantiles=quantiles, backend='polars')\n", + "\n", + "test_eq(\n", + " ret_quantiles_1,\n", + " quantiles\n", + ")\n", + "test_eq(\n", + " ret_df_1.columns,\n", + " ['unique_id', 'ds', 'model', 'model-median', 'model-lo-90', 'model-lo-50', 'model-lo-10', 'model-hi-10', 'model-hi-50', 'model-hi-90']\n", ")\n", "test_eq(\n", - " ret_df_1.index,\n", + " list(ret_df_1[\"unique_id\"]),\n", " ['id1', 'id1', 'id1', 'id1', 'id1', 'id1', 'id1', 'id1', 'id1', 'id1',\n", " 'id2', 'id2', 'id2', 'id2', 'id2', 'id2', 'id2', 'id2', 'id2', 'id2',\n", " 'id3', 'id3', 'id3', 'id3', 'id3', 'id3', 'id3', 'id3', 'id3', 'id3']\n", @@ -1616,7 +1443,7 @@ " ret_quantiles_1, ret_quantiles_2\n", ")\n", "test_eq(\n", - " ret_df_1.index, ret_df_2.index\n", + " ret_df_1[\"unique_id\"], ret_df_2[\"unique_id\"]\n", ")" ] }, From 18551086270e2aa57913aa6d4d5610acfae7bc9d Mon Sep 17 00:00:00 2001 From: Olivier Sprangers Date: Mon, 2 Dec 2024 16:16:04 +0100 Subject: [PATCH 2/2] jose_comments --- hierarchicalforecast/core.py | 18 ++--- hierarchicalforecast/evaluation.py | 24 +++---- hierarchicalforecast/methods.py | 56 ++++++++-------- hierarchicalforecast/probabilistic_methods.py | 4 +- hierarchicalforecast/utils.py | 66 ++++++++++--------- nbs/src/core.ipynb | 18 ++--- nbs/src/evaluation.ipynb | 17 ++--- nbs/src/methods.ipynb | 56 ++++++++-------- nbs/src/probabilistic_methods.ipynb | 4 +- nbs/src/utils.ipynb | 66 ++++++++++--------- 10 files changed, 171 insertions(+), 158 deletions(-) diff --git a/hierarchicalforecast/core.py b/hierarchicalforecast/core.py index b8d3edba..468ce7bc 100644 --- a/hierarchicalforecast/core.py +++ b/hierarchicalforecast/core.py @@ -14,7 +14,7 @@ from narwhals.typing import Frame, FrameT from scipy.stats import norm from scipy import sparse -from typing import Dict, List, Optional +from typing import Optional import narwhals as nw import numpy as np @@ -104,7 +104,7 @@ class HierarchicalReconciliation: [Rob J. Hyndman and George Athanasopoulos (2018). \"Forecasting principles and practice, Hierarchical and Grouped Series\".](https://otexts.com/fpp3/hierarchical.html) """ - def __init__(self, reconcilers: List[HReconciler]): + def __init__(self, reconcilers: list[HReconciler]): self.reconcilers = reconcilers self.orig_reconcilers = copy.deepcopy(reconcilers) # TODO: elegant solution self.insample = any([method.insample for method in reconcilers]) @@ -114,13 +114,13 @@ def _prepare_fit( Y_hat_nw: Frame, S_nw: Frame, Y_nw: Optional[Frame], - tags: Dict[str, np.ndarray], - level: Optional[List[int]] = None, + tags: dict[str, np.ndarray], + level: Optional[list[int]] = None, intervals_method: str = "normality", id_col: str = "unique_id", time_col: str = "ds", target_col: str = "y", - ) -> tuple[FrameT, FrameT, FrameT, List[str]]: + ) -> tuple[FrameT, FrameT, FrameT, list[str]]: """ Performs preliminary wrangling and protections """ @@ -267,9 +267,9 @@ def reconcile( self, Y_hat_df: Frame, S: Frame, - tags: Dict[str, np.ndarray], + tags: dict[str, np.ndarray], Y_df: Optional[Frame] = None, - level: Optional[List[int]] = None, + level: Optional[list[int]] = None, intervals_method: str = "normality", num_samples: int = -1, seed: int = 0, @@ -505,9 +505,9 @@ def bootstrap_reconcile( self, Y_hat_df: Frame, S_df: Frame, - tags: Dict[str, np.ndarray], + tags: dict[str, np.ndarray], Y_df: Optional[Frame] = None, - level: Optional[List[int]] = None, + level: Optional[list[int]] = None, intervals_method: str = "normality", num_samples: int = -1, num_seeds: int = 1, diff --git a/hierarchicalforecast/evaluation.py b/hierarchicalforecast/evaluation.py index 26963981..94b022d8 100644 --- a/hierarchicalforecast/evaluation.py +++ b/hierarchicalforecast/evaluation.py @@ -10,7 +10,7 @@ from inspect import signature from narwhals.typing import Frame, FrameT from scipy.stats import multivariate_normal -from typing import Callable, Dict, List, Optional, Union +from typing import Callable, Optional, Union # %% ../nbs/src/evaluation.ipynb 7 def _metric_protections( @@ -349,14 +349,14 @@ class HierarchicalEvaluation: **References:**
""" - def __init__(self, evaluators: List[Callable]): + def __init__(self, evaluators: list[Callable]): self.evaluators = evaluators def evaluate( self, Y_hat_df: Frame, Y_test_df: Frame, - tags: Dict[str, np.ndarray], + tags: dict[str, np.ndarray], Y_df: Optional[Frame] = None, benchmark: Optional[str] = None, id_col: str = "unique_id", @@ -458,16 +458,16 @@ def evaluate( evaluation_index_np[i_level * len(fn_names) + i_fn, 1] = fn_name evaluation_np = evaluation_np.reshape(-1, len(model_names)) - evaluation_index_dict = { - "level": evaluation_index_np[:, 0], - "metric": evaluation_index_np[:, 1], - } - evaluation_index_nw = nw.from_dict( - evaluation_index_dict, native_namespace=native_namespace + evaluation_nw = nw.from_dict( + { + **{ + "level": evaluation_index_np[:, 0], + "metric": evaluation_index_np[:, 1], + }, + **dict(zip(model_names, evaluation_np.T)), + }, + native_namespace=native_namespace, ) - evaluation_dict = dict(zip(model_names, evaluation_np.T)) - evaluation_nw = evaluation_index_nw.with_columns(**evaluation_dict) - evaluation_nw = evaluation_nw[["level", "metric"] + model_names] evaluation = evaluation_nw.to_native() diff --git a/hierarchicalforecast/methods.py b/hierarchicalforecast/methods.py index 7c5a0dd5..a94c6f27 100644 --- a/hierarchicalforecast/methods.py +++ b/hierarchicalforecast/methods.py @@ -9,7 +9,7 @@ from collections import OrderedDict from concurrent.futures import ThreadPoolExecutor from copy import deepcopy -from typing import Dict, List, Optional, Union +from typing import Optional, Union import numpy as np from quadprog import solve_qp @@ -81,7 +81,7 @@ def _reconcile( P: np.ndarray, y_hat: np.ndarray, SP: np.ndarray = None, - level: Optional[List[int]] = None, + level: Optional[list[int]] = None, sampler: Optional[Union[Normality, PERMBU, Bootstrap]] = None, ): @@ -101,7 +101,7 @@ def _reconcile( return res def predict( - self, S: np.ndarray, y_hat: np.ndarray, level: Optional[List[int]] = None + self, S: np.ndarray, y_hat: np.ndarray, level: Optional[list[int]] = None ): """Predict using reconciler. @@ -194,7 +194,7 @@ def fit( intervals_method: Optional[str] = None, num_samples: Optional[int] = None, seed: Optional[int] = None, - tags: Optional[Dict[str, np.ndarray]] = None, + tags: Optional[dict[str, np.ndarray]] = None, ): """Bottom Up Fit Method. @@ -235,11 +235,11 @@ def fit_predict( y_insample: Optional[np.ndarray] = None, y_hat_insample: Optional[np.ndarray] = None, sigmah: Optional[np.ndarray] = None, - level: Optional[List[int]] = None, + level: Optional[list[int]] = None, intervals_method: Optional[str] = None, num_samples: Optional[int] = None, seed: Optional[int] = None, - tags: Optional[Dict[str, np.ndarray]] = None, + tags: Optional[dict[str, np.ndarray]] = None, ): """BottomUp Reconciliation Method. @@ -300,7 +300,7 @@ def _get_PW_matrices(self, S, idx_bottom): # %% ../nbs/src/methods.ipynb 27 def _get_child_nodes( - S: Union[np.ndarray, sparse.csr_matrix], tags: Dict[str, np.ndarray] + S: Union[np.ndarray, sparse.csr_matrix], tags: dict[str, np.ndarray] ): if isinstance(S, sparse.spmatrix): S = S.toarray() @@ -324,8 +324,8 @@ def _get_child_nodes( def _reconcile_fcst_proportions( S: np.ndarray, y_hat: np.ndarray, - tags: Dict[str, np.ndarray], - nodes: Dict[str, Dict[int, np.ndarray]], + tags: dict[str, np.ndarray], + nodes: dict[str, dict[int, np.ndarray]], idx_top: int, ): reconciled = np.zeros_like(y_hat) @@ -369,7 +369,7 @@ def _get_PW_matrices( S: np.ndarray, y_hat: np.ndarray, y_insample: np.ndarray, - tags: Optional[Dict[str, np.ndarray]] = None, + tags: Optional[dict[str, np.ndarray]] = None, ): n_hiers, n_bottom = S.shape @@ -416,7 +416,7 @@ def fit( intervals_method: Optional[str] = None, num_samples: Optional[int] = None, seed: Optional[int] = None, - tags: Optional[Dict[str, np.ndarray]] = None, + tags: Optional[dict[str, np.ndarray]] = None, idx_bottom: Optional[np.ndarray] = None, ): """TopDown Fit Method. @@ -458,12 +458,12 @@ def fit_predict( self, S: np.ndarray, y_hat: np.ndarray, - tags: Dict[str, np.ndarray], + tags: dict[str, np.ndarray], idx_bottom: np.ndarray = None, y_insample: Optional[np.ndarray] = None, y_hat_insample: Optional[np.ndarray] = None, sigmah: Optional[np.ndarray] = None, - level: Optional[List[int]] = None, + level: Optional[list[int]] = None, intervals_method: Optional[str] = None, num_samples: Optional[int] = None, seed: Optional[int] = None, @@ -540,7 +540,7 @@ def _get_PW_matrices( S: sparse.csr_matrix, y_hat: np.ndarray, y_insample: np.ndarray, - tags: Optional[Dict[str, np.ndarray]] = None, + tags: Optional[dict[str, np.ndarray]] = None, ): # Check if the data structure is strictly hierarchical. if tags is not None and not is_strictly_hierarchical(S, tags): @@ -625,9 +625,9 @@ def fit_predict( self, S: np.ndarray, y_hat: np.ndarray, - tags: Dict[str, np.ndarray], + tags: dict[str, np.ndarray], y_insample: Optional[np.ndarray] = None, - level: Optional[List[int]] = None, + level: Optional[list[int]] = None, intervals_method: Optional[str] = None, ): """Middle Out Reconciliation Method. @@ -727,11 +727,11 @@ def fit_predict( self, S: np.ndarray, y_hat: np.ndarray, - tags: Dict[str, np.ndarray], + tags: dict[str, np.ndarray], y_insample: Optional[np.ndarray] = None, - level: Optional[List[int]] = None, + level: Optional[list[int]] = None, intervals_method: Optional[str] = None, - ) -> Dict[str, np.ndarray]: + ) -> dict[str, np.ndarray]: # Check if the data structure is strictly hierarchical. if not is_strictly_hierarchical(S, tags): raise ValueError( @@ -849,7 +849,7 @@ def _get_PW_matrices( y_hat: np.ndarray, y_insample: Optional[np.ndarray] = None, y_hat_insample: Optional[np.ndarray] = None, - idx_bottom: Optional[List[int]] = None, + idx_bottom: Optional[list[int]] = None, ): # shape residuals_insample (n_hiers, obs) res_methods = ["wls_var", "mint_cov", "mint_shrink"] @@ -954,7 +954,7 @@ def fit( intervals_method: Optional[str] = None, num_samples: Optional[int] = None, seed: Optional[int] = None, - tags: Optional[Dict[str, np.ndarray]] = None, + tags: Optional[dict[str, np.ndarray]] = None, idx_bottom: Optional[np.ndarray] = None, ): """MinTrace Fit Method. @@ -1056,11 +1056,11 @@ def fit_predict( y_insample: Optional[np.ndarray] = None, y_hat_insample: Optional[np.ndarray] = None, sigmah: Optional[np.ndarray] = None, - level: Optional[List[int]] = None, + level: Optional[list[int]] = None, intervals_method: Optional[str] = None, num_samples: Optional[int] = None, seed: Optional[int] = None, - tags: Optional[Dict[str, np.ndarray]] = None, + tags: Optional[dict[str, np.ndarray]] = None, ): """MinTrace Reconciliation Method. @@ -1130,7 +1130,7 @@ def _get_PW_matrices( y_hat: np.ndarray, y_insample: Optional[np.ndarray] = None, y_hat_insample: Optional[np.ndarray] = None, - idx_bottom: Optional[List[int]] = None, + idx_bottom: Optional[list[int]] = None, ): # shape residuals_insample (n_hiers, obs) res_methods = ["wls_var", "mint_cov", "mint_shrink"] @@ -1224,7 +1224,7 @@ def fit( intervals_method: Optional[str] = None, num_samples: Optional[int] = None, seed: Optional[int] = None, - tags: Optional[Dict[str, np.ndarray]] = None, + tags: Optional[dict[str, np.ndarray]] = None, idx_bottom: Optional[np.ndarray] = None, ): # Clip the base forecasts if required to align them with their use in practice. @@ -1414,7 +1414,7 @@ def fit( intervals_method: Optional[str] = None, num_samples: Optional[int] = None, seed: Optional[int] = None, - tags: Optional[Dict[str, np.ndarray]] = None, + tags: Optional[dict[str, np.ndarray]] = None, idx_bottom: Optional[np.ndarray] = None, ): """ERM Fit Method. @@ -1463,11 +1463,11 @@ def fit_predict( y_insample: Optional[np.ndarray] = None, y_hat_insample: Optional[np.ndarray] = None, sigmah: Optional[np.ndarray] = None, - level: Optional[List[int]] = None, + level: Optional[list[int]] = None, intervals_method: Optional[str] = None, num_samples: Optional[int] = None, seed: Optional[int] = None, - tags: Optional[Dict[str, np.ndarray]] = None, + tags: Optional[dict[str, np.ndarray]] = None, ): """ERM Reconciliation Method. diff --git a/hierarchicalforecast/probabilistic_methods.py b/hierarchicalforecast/probabilistic_methods.py index d5a90c8d..f5982d57 100644 --- a/hierarchicalforecast/probabilistic_methods.py +++ b/hierarchicalforecast/probabilistic_methods.py @@ -5,7 +5,7 @@ # %% ../nbs/src/probabilistic_methods.ipynb 3 import warnings -from typing import Dict, Optional +from typing import Optional import numpy as np from scipy.stats import norm @@ -263,7 +263,7 @@ class PERMBU: def __init__( self, S: np.ndarray, - tags: Dict[str, np.ndarray], + tags: dict[str, np.ndarray], y_hat: np.ndarray, y_insample: np.ndarray, y_hat_insample: np.ndarray, diff --git a/hierarchicalforecast/utils.py b/hierarchicalforecast/utils.py index 2688559b..96a34ad6 100644 --- a/hierarchicalforecast/utils.py +++ b/hierarchicalforecast/utils.py @@ -15,7 +15,7 @@ from narwhals.typing import Frame, FrameT from numba import njit, prange from sklearn.preprocessing import OneHotEncoder -from typing import Dict, List, Optional, Union, Sequence +from typing import Optional, Union, Sequence # %% ../nbs/src/utils.ipynb 6 # Global variables @@ -44,7 +44,7 @@ def __exit__(self, exc_type, exc_value, traceback): ) # %% ../nbs/src/utils.ipynb 8 -def is_strictly_hierarchical(S: np.ndarray, tags: Dict[str, np.ndarray]) -> bool: +def is_strictly_hierarchical(S: np.ndarray, tags: dict[str, np.ndarray]) -> bool: # main idea: # if S represents a strictly hierarchical structure # the number of paths before the bottom level @@ -62,8 +62,8 @@ def is_strictly_hierarchical(S: np.ndarray, tags: Dict[str, np.ndarray]) -> bool # %% ../nbs/src/utils.ipynb 10 def _to_upper_hierarchy( - bottom_split: List[str], bottom_values: str, upper_key: str -) -> List[str]: + bottom_split: list[str], bottom_values: str, upper_key: str +) -> list[str]: upper_split = upper_key.split("/") upper_idxs = [bottom_split.index(i) for i in upper_split] @@ -76,12 +76,12 @@ def join_upper(bottom_value): # %% ../nbs/src/utils.ipynb 11 def aggregate( df: Frame, - spec: List[List[str]], - exog_vars: Optional[Dict[str, Union[str, List[str]]]] = None, + spec: list[list[str]], + exog_vars: Optional[dict[str, Union[str, list[str]]]] = None, sparse_s: bool = False, id_col: str = "unique_id", time_col: str = "ds", - target_cols: List[str] = ["y"], + target_cols: list[str] = ["y"], ) -> tuple[FrameT, FrameT, dict]: """Utils Aggregation Function. Aggregates bottom level series contained in the DataFrame `df` according @@ -92,7 +92,7 @@ def aggregate( df : DataFrame Dataframe with columns `[time_col, *target_cols]`, columns to aggregate and optionally exog_vars. spec : list of list of str - List of levels. Each element of the list should contain a list of columns of `df` to aggregate. + list of levels. Each element of the list should contain a list of columns of `df` to aggregate. exog_vars: dictionary of string keys & values that can either be a list of strings or a single string keys correspond to column names and the values represent the aggregation(s) that will be applied to each column. Accepted values are those from Pandas or Polars aggregation Functions, check the respective docs for guidance is_balanced : bool (default=False) @@ -104,7 +104,7 @@ def aggregate( time_col : str (default='ds') Column that identifies each timestep, its values can be timestamps or integers. target_cols : (default=['y']) - List of columns that contains the targets to aggregate. + list of columns that contains the targets to aggregate. Returns ------- @@ -221,9 +221,13 @@ def aggregate( S_dum = encoder.fit_transform(S) if not sparse_s: - S_nw = nw.from_dict({id_col: category_list}, native_namespace=native_namespace) - S_dict = dict(zip(tags[level_name], S_dum)) - S_nw = S_nw.with_columns(**S_dict) + S_nw = nw.from_dict( + { + **{id_col: category_list}, + **dict(zip(tags[level_name], S_dum)), + }, + native_namespace=native_namespace, + ) S_nw = nw.maybe_reset_index(S_nw) S_df = S_nw.to_native() else: @@ -252,7 +256,7 @@ class HierarchicalPlot: def __init__( self, S: Frame, - tags: Dict[str, np.ndarray], + tags: dict[str, np.ndarray], S_id_col: str = "unique_id", ): @@ -277,8 +281,8 @@ def plot_series( self, series: str, Y_df: Frame, - models: Optional[List[str]] = None, - level: Optional[List[int]] = None, + models: Optional[list[str]] = None, + level: Optional[list[int]] = None, id_col: str = "unique_id", time_col: str = "ds", target_col: str = "y", @@ -289,7 +293,7 @@ def plot_series( `series`: str, string identifying the `'unique_id'` any-level series to plot.
`Y_df`: DataFrame, hierarchically structured series ($\mathbf{y}_{[a,b]}$). It contains columns `['unique_id', 'ds', 'y']`, it may have `'models'`.
- `models`: List[str], string identifying filtering model columns.
+ `models`: list[str], string identifying filtering model columns.
`level`: float list 0-100, confidence levels for prediction intervals available in `Y_df`.
`id_col` : str='unique_id', column that identifies each serie.
`time_col` : str='ds', column that identifies each timestep, its values can be timestamps or integers.
@@ -356,8 +360,8 @@ def plot_hierarchically_linked_series( self, bottom_series: str, Y_df: Frame, - models: Optional[List[str]] = None, - level: Optional[List[int]] = None, + models: Optional[list[str]] = None, + level: Optional[list[int]] = None, id_col: str = "unique_id", time_col: str = "ds", target_col: str = "y", @@ -368,7 +372,7 @@ def plot_hierarchically_linked_series( `bottom_series`: str, string identifying the `'unique_id'` bottom-level series to plot.
`Y_df`: DataFrame, hierarchically structured series ($\mathbf{y}_{[a,b]}$). It contains columns ['unique_id', 'ds', 'y'] and models.
- `models`: List[str], string identifying filtering model columns.
+ `models`: list[str], string identifying filtering model columns.
`level`: float list 0-100, confidence levels for prediction intervals available in `Y_df`.
`id_col` : str='unique_id', column that identifies each serie.
`time_col` : str='ds', column that identifies each timestep, its values can be timestamps or integers.
@@ -451,7 +455,7 @@ def plot_hierarchically_linked_series( def plot_hierarchical_predictions_gap( self, Y_df: Frame, - models: Optional[List[str]] = None, + models: Optional[list[str]] = None, xlabel: Optional[str] = None, ylabel: Optional[str] = None, id_col: str = "unique_id", @@ -463,7 +467,7 @@ def plot_hierarchical_predictions_gap( **Parameters:**
`Y_df`: DataFrame, hierarchically structured series ($\mathbf{y}_{[a,b]}$). It contains columns ['unique_id', 'ds', 'y'] and models.
- `models`: List[str], string identifying filtering model columns.
+ `models`: list[str], string identifying filtering model columns.
`xlabel`: str, string for the plot's x axis label.
`ylabel`: str, string for the plot's y axis label.
`id_col` : str='unique_id', column that identifies each serie.
@@ -521,7 +525,7 @@ def plot_hierarchical_predictions_gap( # %% ../nbs/src/utils.ipynb 46 # convert levels to output quantile names -def level_to_outputs(level: List[int]) -> tuple[List[float], List[str]]: +def level_to_outputs(level: list[int]) -> tuple[list[float], list[str]]: """Converts list of levels into output names matching StatsForecast and NeuralForecast methods. **Parameters:**
@@ -545,7 +549,7 @@ def level_to_outputs(level: List[int]) -> tuple[List[float], List[str]]: # convert quantiles to output quantile names -def quantiles_to_outputs(quantiles: List[float]) -> tuple[List[float], List[str]]: +def quantiles_to_outputs(quantiles: list[float]) -> tuple[list[float], list[str]]: """Converts list of quantiles into output names matching StatsForecast and NeuralForecast methods. **Parameters:**
@@ -570,21 +574,21 @@ def quantiles_to_outputs(quantiles: List[float]) -> tuple[List[float], List[str] def samples_to_quantiles_df( samples: np.ndarray, unique_ids: Sequence[str], - dates: List[str], - quantiles: Optional[List[float]] = None, - level: Optional[List[int]] = None, + dates: list[str], + quantiles: Optional[list[float]] = None, + level: Optional[list[int]] = None, model_name: str = "model", id_col: str = "unique_id", time_col: str = "ds", backend: str = "pandas", -) -> tuple[List[float], FrameT]: +) -> tuple[list[float], FrameT]: """Transform Random Samples into HierarchicalForecast input. Auxiliary function to create compatible HierarchicalForecast input `Y_hat_df` dataframe. **Parameters:**
`samples`: numpy array. Samples from forecast distribution of shape [n_series, n_samples, horizon].
`unique_ids`: string list. Unique identifiers for each time series.
- `dates`: datetime list. List of forecast dates.
+ `dates`: datetime list. list of forecast dates.
`quantiles`: float list in [0., 1.]. Alternative to level, quantiles to estimate from y distribution.
`level`: int list in [0,100]. Probability levels for prediction intervals.
`model_name`: string. Name of forecasting model.
@@ -640,10 +644,12 @@ def samples_to_quantiles_df( forecasts_quantiles = forecasts_quantiles.reshape(-1, len(_quantiles)) df_nw = nw.from_dict( - {id_col: unique_ids, time_col: ds, model_name: forecasts_mean}, + { + **{id_col: unique_ids, time_col: ds, model_name: forecasts_mean}, + **dict(zip(col_names, forecasts_quantiles.T)), + }, native_namespace=namespace, ) - df_nw = df_nw.with_columns(**dict(zip(col_names, forecasts_quantiles.T))) return _quantiles, df_nw.to_native() diff --git a/nbs/src/core.ipynb b/nbs/src/core.ipynb index 83c5e33d..bc85a813 100644 --- a/nbs/src/core.ipynb +++ b/nbs/src/core.ipynb @@ -53,7 +53,7 @@ "from narwhals.typing import Frame, FrameT\n", "from scipy.stats import norm\n", "from scipy import sparse\n", - "from typing import Dict, List, Optional\n", + "from typing import Optional\n", "\n", "import narwhals as nw\n", "import numpy as np" @@ -221,7 +221,7 @@ " [Rob J. Hyndman and George Athanasopoulos (2018). \\\"Forecasting principles and practice, Hierarchical and Grouped Series\\\".](https://otexts.com/fpp3/hierarchical.html)\n", " \"\"\"\n", " def __init__(self,\n", - " reconcilers: List[HReconciler]):\n", + " reconcilers: list[HReconciler]):\n", " self.reconcilers = reconcilers\n", " self.orig_reconcilers = copy.deepcopy(reconcilers) # TODO: elegant solution\n", " self.insample = any([method.insample for method in reconcilers])\n", @@ -230,13 +230,13 @@ " Y_hat_nw: Frame,\n", " S_nw: Frame,\n", " Y_nw: Optional[Frame],\n", - " tags: Dict[str, np.ndarray],\n", - " level: Optional[List[int]] = None,\n", + " tags: dict[str, np.ndarray],\n", + " level: Optional[list[int]] = None,\n", " intervals_method: str = 'normality',\n", " id_col: str = \"unique_id\",\n", " time_col: str = \"ds\", \n", " target_col: str = \"y\", \n", - " ) -> tuple[FrameT, FrameT, FrameT, List[str]]:\n", + " ) -> tuple[FrameT, FrameT, FrameT, list[str]]:\n", " \"\"\"\n", " Performs preliminary wrangling and protections\n", " \"\"\"\n", @@ -354,9 +354,9 @@ " def reconcile(self, \n", " Y_hat_df: Frame,\n", " S: Frame,\n", - " tags: Dict[str, np.ndarray],\n", + " tags: dict[str, np.ndarray],\n", " Y_df: Optional[Frame] = None,\n", - " level: Optional[List[int]] = None,\n", + " level: Optional[list[int]] = None,\n", " intervals_method: str = 'normality',\n", " num_samples: int = -1,\n", " seed: int = 0,\n", @@ -539,9 +539,9 @@ " def bootstrap_reconcile(self,\n", " Y_hat_df: Frame,\n", " S_df: Frame,\n", - " tags: Dict[str, np.ndarray],\n", + " tags: dict[str, np.ndarray],\n", " Y_df: Optional[Frame] = None,\n", - " level: Optional[List[int]] = None,\n", + " level: Optional[list[int]] = None,\n", " intervals_method: str = 'normality',\n", " num_samples: int = -1,\n", " num_seeds: int = 1,\n", diff --git a/nbs/src/evaluation.ipynb b/nbs/src/evaluation.ipynb index d440b281..882042d7 100644 --- a/nbs/src/evaluation.ipynb +++ b/nbs/src/evaluation.ipynb @@ -40,7 +40,7 @@ "from inspect import signature\n", "from narwhals.typing import Frame, FrameT\n", "from scipy.stats import multivariate_normal\n", - "from typing import Callable, Dict, List, Optional, Union" + "from typing import Callable, Optional, Union" ] }, { @@ -540,13 +540,13 @@ " **References:**
\n", " \"\"\"\n", " def __init__(self, \n", - " evaluators: List[Callable]):\n", + " evaluators: list[Callable]):\n", " self.evaluators = evaluators\n", "\n", " def evaluate(self, \n", " Y_hat_df: Frame,\n", " Y_test_df: Frame,\n", - " tags: Dict[str, np.ndarray],\n", + " tags: dict[str, np.ndarray],\n", " Y_df: Optional[Frame] = None,\n", " benchmark: Optional[str] = None,\n", " id_col: str = \"unique_id\",\n", @@ -626,11 +626,12 @@ " evaluation_index_np[i_level * len(fn_names) + i_fn, 1] = fn_name\n", "\n", " evaluation_np = evaluation_np.reshape(-1, len(model_names))\n", - " evaluation_index_dict = {\"level\": evaluation_index_np[:, 0], \"metric\": evaluation_index_np[:, 1]}\n", - " evaluation_index_nw = nw.from_dict(evaluation_index_dict, native_namespace=native_namespace)\n", - " evaluation_dict = dict(zip(model_names, evaluation_np.T))\n", - " evaluation_nw = evaluation_index_nw.with_columns(**evaluation_dict)\n", - " evaluation_nw = evaluation_nw[[\"level\", \"metric\"] + model_names]\n", + " evaluation_nw = nw.from_dict(\n", + " {\n", + " **{\"level\": evaluation_index_np[:, 0], \"metric\": evaluation_index_np[:, 1]},\n", + " **dict(zip(model_names, evaluation_np.T))\n", + " }, \n", + " native_namespace=native_namespace)\n", "\n", " evaluation = evaluation_nw.to_native()\n", "\n", diff --git a/nbs/src/methods.ipynb b/nbs/src/methods.ipynb index 2ba0ba01..1a34d8f4 100644 --- a/nbs/src/methods.ipynb +++ b/nbs/src/methods.ipynb @@ -41,7 +41,7 @@ "from collections import OrderedDict\n", "from concurrent.futures import ThreadPoolExecutor\n", "from copy import deepcopy\n", - "from typing import Dict, List, Optional, Union\n", + "from typing import Optional, Union\n", "\n", "import numpy as np\n", "from quadprog import solve_qp\n", @@ -122,7 +122,7 @@ " P: np.ndarray,\n", " y_hat: np.ndarray,\n", " SP: np.ndarray = None,\n", - " level: Optional[List[int]] = None,\n", + " level: Optional[list[int]] = None,\n", " sampler: Optional[Union[Normality, PERMBU, Bootstrap]] = None):\n", "\n", " # Mean reconciliation\n", @@ -142,7 +142,7 @@ " def predict(self,\n", " S: np.ndarray,\n", " y_hat: np.ndarray,\n", - " level: Optional[List[int]] = None):\n", + " level: Optional[list[int]] = None):\n", " \"\"\"Predict using reconciler.\n", "\n", " Predict using fitted mean and probabilistic reconcilers.\n", @@ -248,7 +248,7 @@ " intervals_method: Optional[str] = None,\n", " num_samples: Optional[int] = None,\n", " seed: Optional[int] = None, \n", - " tags: Optional[Dict[str, np.ndarray]] = None):\n", + " tags: Optional[dict[str, np.ndarray]] = None):\n", " \"\"\"Bottom Up Fit Method.\n", "\n", " **Parameters:**
\n", @@ -285,11 +285,11 @@ " y_insample: Optional[np.ndarray] = None,\n", " y_hat_insample: Optional[np.ndarray] = None,\n", " sigmah: Optional[np.ndarray] = None,\n", - " level: Optional[List[int]] = None,\n", + " level: Optional[list[int]] = None,\n", " intervals_method: Optional[str] = None,\n", " num_samples: Optional[int] = None,\n", " seed: Optional[int] = None,\n", - " tags: Optional[Dict[str, np.ndarray]] = None):\n", + " tags: Optional[dict[str, np.ndarray]] = None):\n", " \"\"\"BottomUp Reconciliation Method.\n", "\n", " **Parameters:**
\n", @@ -585,7 +585,7 @@ "source": [ "#| exporti\n", "def _get_child_nodes(\n", - " S: Union[np.ndarray, sparse.csr_matrix], tags: Dict[str, np.ndarray]\n", + " S: Union[np.ndarray, sparse.csr_matrix], tags: dict[str, np.ndarray]\n", "):\n", " if isinstance(S, sparse.spmatrix):\n", " S = S.toarray()\n", @@ -614,8 +614,8 @@ "source": [ "#| exporti\n", "def _reconcile_fcst_proportions(S: np.ndarray, y_hat: np.ndarray,\n", - " tags: Dict[str, np.ndarray],\n", - " nodes: Dict[str, Dict[int, np.ndarray]],\n", + " tags: dict[str, np.ndarray],\n", + " nodes: dict[str, dict[int, np.ndarray]],\n", " idx_top: int):\n", " reconciled = np.zeros_like(y_hat)\n", " reconciled[idx_top] = y_hat[idx_top]\n", @@ -664,7 +664,7 @@ " S: np.ndarray,\n", " y_hat: np.ndarray,\n", " y_insample: np.ndarray,\n", - " tags: Optional[Dict[str, np.ndarray]] = None,\n", + " tags: Optional[dict[str, np.ndarray]] = None,\n", " ):\n", "\n", " n_hiers, n_bottom = S.shape\n", @@ -708,7 +708,7 @@ " intervals_method: Optional[str] = None,\n", " num_samples: Optional[int] = None,\n", " seed: Optional[int] = None, \n", - " tags: Optional[Dict[str, np.ndarray]] = None,\n", + " tags: Optional[dict[str, np.ndarray]] = None,\n", " idx_bottom: Optional[np.ndarray] = None):\n", " \"\"\"TopDown Fit Method.\n", "\n", @@ -745,12 +745,12 @@ " def fit_predict(self,\n", " S: np.ndarray,\n", " y_hat: np.ndarray,\n", - " tags: Dict[str, np.ndarray],\n", + " tags: dict[str, np.ndarray],\n", " idx_bottom: np.ndarray = None,\n", " y_insample: Optional[np.ndarray] = None,\n", " y_hat_insample: Optional[np.ndarray] = None,\n", " sigmah: Optional[np.ndarray] = None,\n", - " level: Optional[List[int]] = None,\n", + " level: Optional[list[int]] = None,\n", " intervals_method: Optional[str] = None,\n", " num_samples: Optional[int] = None,\n", " seed: Optional[int] = None):\n", @@ -867,7 +867,7 @@ " S: sparse.csr_matrix,\n", " y_hat: np.ndarray,\n", " y_insample: np.ndarray,\n", - " tags: Optional[Dict[str, np.ndarray]] = None,\n", + " tags: Optional[dict[str, np.ndarray]] = None,\n", " ):\n", " # Check if the data structure is strictly hierarchical.\n", " if tags is not None and not is_strictly_hierarchical(S, tags):\n", @@ -1114,9 +1114,9 @@ " def fit_predict(self, \n", " S: np.ndarray,\n", " y_hat: np.ndarray,\n", - " tags: Dict[str, np.ndarray],\n", + " tags: dict[str, np.ndarray],\n", " y_insample: Optional[np.ndarray] = None,\n", - " level: Optional[List[int]] = None,\n", + " level: Optional[list[int]] = None,\n", " intervals_method: Optional[str] = None):\n", " \"\"\"Middle Out Reconciliation Method.\n", "\n", @@ -1265,11 +1265,11 @@ " self,\n", " S: np.ndarray,\n", " y_hat: np.ndarray,\n", - " tags: Dict[str, np.ndarray],\n", + " tags: dict[str, np.ndarray],\n", " y_insample: Optional[np.ndarray] = None,\n", - " level: Optional[List[int]] = None,\n", + " level: Optional[list[int]] = None,\n", " intervals_method: Optional[str] = None,\n", - " ) -> Dict[str, np.ndarray]:\n", + " ) -> dict[str, np.ndarray]:\n", " # Check if the data structure is strictly hierarchical.\n", " if not is_strictly_hierarchical(S, tags):\n", " raise ValueError(\n", @@ -1548,7 +1548,7 @@ " y_hat: np.ndarray,\n", " y_insample: Optional[np.ndarray] = None,\n", " y_hat_insample: Optional[np.ndarray] = None,\n", - " idx_bottom: Optional[List[int]] = None,):\n", + " idx_bottom: Optional[list[int]] = None,):\n", " # shape residuals_insample (n_hiers, obs)\n", " res_methods = ['wls_var', 'mint_cov', 'mint_shrink']\n", " if self.method in res_methods and y_insample is None and y_hat_insample is None:\n", @@ -1623,7 +1623,7 @@ " intervals_method: Optional[str] = None,\n", " num_samples: Optional[int] = None,\n", " seed: Optional[int] = None, \n", - " tags: Optional[Dict[str, np.ndarray]] = None,\n", + " tags: Optional[dict[str, np.ndarray]] = None,\n", " idx_bottom: Optional[np.ndarray] = None):\n", " \"\"\"MinTrace Fit Method.\n", "\n", @@ -1710,11 +1710,11 @@ " y_insample: Optional[np.ndarray] = None,\n", " y_hat_insample: Optional[np.ndarray] = None,\n", " sigmah: Optional[np.ndarray] = None,\n", - " level: Optional[List[int]] = None,\n", + " level: Optional[list[int]] = None,\n", " intervals_method: Optional[str] = None,\n", " num_samples: Optional[int] = None,\n", " seed: Optional[int] = None, \n", - " tags: Optional[Dict[str, np.ndarray]] = None):\n", + " tags: Optional[dict[str, np.ndarray]] = None):\n", " \"\"\"MinTrace Reconciliation Method.\n", "\n", " **Parameters:**
\n", @@ -1828,7 +1828,7 @@ " y_hat: np.ndarray,\n", " y_insample: Optional[np.ndarray] = None,\n", " y_hat_insample: Optional[np.ndarray] = None,\n", - " idx_bottom: Optional[List[int]] = None,\n", + " idx_bottom: Optional[list[int]] = None,\n", " ):\n", " # shape residuals_insample (n_hiers, obs)\n", " res_methods = [\"wls_var\", \"mint_cov\", \"mint_shrink\"]\n", @@ -1917,7 +1917,7 @@ " intervals_method: Optional[str] = None,\n", " num_samples: Optional[int] = None,\n", " seed: Optional[int] = None, \n", - " tags: Optional[Dict[str, np.ndarray]] = None,\n", + " tags: Optional[dict[str, np.ndarray]] = None,\n", " idx_bottom: Optional[np.ndarray] = None):\n", " # Clip the base forecasts if required to align them with their use in practice.\n", " if self.nonnegative:\n", @@ -2360,7 +2360,7 @@ " intervals_method: Optional[str] = None,\n", " num_samples: Optional[int] = None,\n", " seed: Optional[int] = None,\n", - " tags: Optional[Dict[str, np.ndarray]] = None,\n", + " tags: Optional[dict[str, np.ndarray]] = None,\n", " idx_bottom: Optional[np.ndarray] = None):\n", " \"\"\"ERM Fit Method.\n", "\n", @@ -2403,11 +2403,11 @@ " y_insample: Optional[np.ndarray] = None,\n", " y_hat_insample: Optional[np.ndarray] = None,\n", " sigmah: Optional[np.ndarray] = None,\n", - " level: Optional[List[int]] = None,\n", + " level: Optional[list[int]] = None,\n", " intervals_method: Optional[str] = None,\n", " num_samples: Optional[int] = None,\n", " seed: Optional[int] = None,\n", - " tags: Optional[Dict[str, np.ndarray]] = None):\n", + " tags: Optional[dict[str, np.ndarray]] = None):\n", " \"\"\"ERM Reconciliation Method.\n", "\n", " **Parameters:**
\n", diff --git a/nbs/src/probabilistic_methods.ipynb b/nbs/src/probabilistic_methods.ipynb index f91e770a..b9dc2f2d 100644 --- a/nbs/src/probabilistic_methods.ipynb +++ b/nbs/src/probabilistic_methods.ipynb @@ -36,7 +36,7 @@ "source": [ "#| export\n", "import warnings\n", - "from typing import Dict, Optional\n", + "from typing import Optional\n", "\n", "import numpy as np\n", "from scipy.stats import norm\n", @@ -372,7 +372,7 @@ " \"\"\"\n", " def __init__(self,\n", " S: np.ndarray,\n", - " tags: Dict[str, np.ndarray],\n", + " tags: dict[str, np.ndarray],\n", " y_hat: np.ndarray,\n", " y_insample: np.ndarray,\n", " y_hat_insample: np.ndarray,\n", diff --git a/nbs/src/utils.ipynb b/nbs/src/utils.ipynb index 479b6bc0..72b5ef24 100644 --- a/nbs/src/utils.ipynb +++ b/nbs/src/utils.ipynb @@ -50,7 +50,7 @@ "from narwhals.typing import Frame, FrameT\n", "from numba import njit, prange\n", "from sklearn.preprocessing import OneHotEncoder\n", - "from typing import Dict, List, Optional, Union, Sequence" + "from typing import Optional, Union, Sequence" ] }, { @@ -125,7 +125,7 @@ "source": [ "#| exporti\n", "def is_strictly_hierarchical(S: np.ndarray, \n", - " tags: Dict[str, np.ndarray]) -> bool:\n", + " tags: dict[str, np.ndarray]) -> bool:\n", " # main idea:\n", " # if S represents a strictly hierarchical structure\n", " # the number of paths before the bottom level\n", @@ -158,7 +158,7 @@ "outputs": [], "source": [ "#| exporti\n", - "def _to_upper_hierarchy(bottom_split: List[str], bottom_values: str, upper_key: str) -> List[str]:\n", + "def _to_upper_hierarchy(bottom_split: list[str], bottom_values: str, upper_key: str) -> list[str]:\n", " upper_split = upper_key.split('/')\n", " upper_idxs = [bottom_split.index(i) for i in upper_split]\n", "\n", @@ -179,12 +179,12 @@ "#| export\n", "def aggregate(\n", " df: Frame,\n", - " spec: List[List[str]],\n", - " exog_vars: Optional[Dict[str, Union[str, List[str]]]] = None,\n", + " spec: list[list[str]],\n", + " exog_vars: Optional[dict[str, Union[str, list[str]]]] = None,\n", " sparse_s: bool = False,\n", " id_col: str = \"unique_id\",\n", " time_col: str = \"ds\", \n", - " target_cols: List[str] = [\"y\"], \n", + " target_cols: list[str] = [\"y\"], \n", ") -> tuple[FrameT, FrameT, dict]:\n", " \"\"\"Utils Aggregation Function.\n", " Aggregates bottom level series contained in the DataFrame `df` according\n", @@ -195,7 +195,7 @@ " df : DataFrame\n", " Dataframe with columns `[time_col, *target_cols]`, columns to aggregate and optionally exog_vars.\n", " spec : list of list of str\n", - " List of levels. Each element of the list should contain a list of columns of `df` to aggregate.\n", + " list of levels. Each element of the list should contain a list of columns of `df` to aggregate.\n", " exog_vars: dictionary of string keys & values that can either be a list of strings or a single string\n", " keys correspond to column names and the values represent the aggregation(s) that will be applied to each column. Accepted values are those from Pandas or Polars aggregation Functions, check the respective docs for guidance\n", " is_balanced : bool (default=False)\n", @@ -207,7 +207,7 @@ " time_col : str (default='ds')\n", " Column that identifies each timestep, its values can be timestamps or integers.\n", " target_cols : (default=['y'])\n", - " List of columns that contains the targets to aggregate. \n", + " list of columns that contains the targets to aggregate. \n", "\n", " Returns\n", " -------\n", @@ -301,10 +301,11 @@ " S_dum = encoder.fit_transform(S)\n", " \n", " if not sparse_s:\n", - " S_nw = nw.from_dict({id_col: category_list}, \n", - " native_namespace=native_namespace)\n", - " S_dict = dict(zip(tags[level_name], S_dum))\n", - " S_nw = S_nw.with_columns(**S_dict)\n", + " S_nw = nw.from_dict({\n", + " **{id_col: category_list},\n", + " **dict(zip(tags[level_name], S_dum)),\n", + " }, \n", + " native_namespace=native_namespace)\n", " S_nw = nw.maybe_reset_index(S_nw)\n", " S_df = S_nw.to_native()\n", " else:\n", @@ -663,7 +664,7 @@ " \"\"\"\n", " def __init__(self,\n", " S: Frame,\n", - " tags: Dict[str, np.ndarray],\n", + " tags: dict[str, np.ndarray],\n", " S_id_col: str = \"unique_id\",\n", " ):\n", "\n", @@ -687,8 +688,8 @@ " def plot_series(self,\n", " series: str,\n", " Y_df: Frame,\n", - " models: Optional[List[str]] = None,\n", - " level: Optional[List[int]] = None,\n", + " models: Optional[list[str]] = None,\n", + " level: Optional[list[int]] = None,\n", " id_col: str = \"unique_id\",\n", " time_col: str = \"ds\",\n", " target_col: str = \"y\",\n", @@ -699,7 +700,7 @@ " `series`: str, string identifying the `'unique_id'` any-level series to plot.
\n", " `Y_df`: DataFrame, hierarchically structured series ($\\mathbf{y}_{[a,b]}$). \n", " It contains columns `['unique_id', 'ds', 'y']`, it may have `'models'`.
\n", - " `models`: List[str], string identifying filtering model columns.
\n", + " `models`: list[str], string identifying filtering model columns.
\n", " `level`: float list 0-100, confidence levels for prediction intervals available in `Y_df`.
\n", " `id_col` : str='unique_id', column that identifies each serie.
\n", " `time_col` : str='ds', column that identifies each timestep, its values can be timestamps or integers.
\n", @@ -751,8 +752,8 @@ " def plot_hierarchically_linked_series(self,\n", " bottom_series: str,\n", " Y_df: Frame,\n", - " models: Optional[List[str]] = None,\n", - " level: Optional[List[int]] = None,\n", + " models: Optional[list[str]] = None,\n", + " level: Optional[list[int]] = None,\n", " id_col: str = \"unique_id\",\n", " time_col: str = \"ds\",\n", " target_col: str = \"y\", \n", @@ -763,7 +764,7 @@ " `bottom_series`: str, string identifying the `'unique_id'` bottom-level series to plot.
\n", " `Y_df`: DataFrame, hierarchically structured series ($\\mathbf{y}_{[a,b]}$). \n", " It contains columns ['unique_id', 'ds', 'y'] and models.
\n", - " `models`: List[str], string identifying filtering model columns.
\n", + " `models`: list[str], string identifying filtering model columns.
\n", " `level`: float list 0-100, confidence levels for prediction intervals available in `Y_df`.
\n", " `id_col` : str='unique_id', column that identifies each serie.
\n", " `time_col` : str='ds', column that identifies each timestep, its values can be timestamps or integers.
\n", @@ -823,7 +824,7 @@ "\n", " def plot_hierarchical_predictions_gap(self,\n", " Y_df: Frame,\n", - " models: Optional[List[str]] = None,\n", + " models: Optional[list[str]] = None,\n", " xlabel: Optional[str] = None,\n", " ylabel: Optional[str] = None,\n", " id_col: str = \"unique_id\",\n", @@ -835,7 +836,7 @@ " **Parameters:**
\n", " `Y_df`: DataFrame, hierarchically structured series ($\\mathbf{y}_{[a,b]}$). \n", " It contains columns ['unique_id', 'ds', 'y'] and models.
\n", - " `models`: List[str], string identifying filtering model columns.
\n", + " `models`: list[str], string identifying filtering model columns.
\n", " `xlabel`: str, string for the plot's x axis label.
\n", " `ylabel`: str, string for the plot's y axis label.
\n", " `id_col` : str='unique_id', column that identifies each serie.
\n", @@ -1201,7 +1202,7 @@ "#| exporti\n", "\n", "# convert levels to output quantile names\n", - "def level_to_outputs(level: List[int]) -> tuple[List[float], List[str]]:\n", + "def level_to_outputs(level: list[int]) -> tuple[list[float], list[str]]:\n", " \"\"\" Converts list of levels into output names matching StatsForecast and NeuralForecast methods.\n", "\n", " **Parameters:**
\n", @@ -1224,7 +1225,7 @@ " return quantiles, output_names\n", "\n", "# convert quantiles to output quantile names\n", - "def quantiles_to_outputs(quantiles: List[float]) -> tuple[List[float], List[str]]:\n", + "def quantiles_to_outputs(quantiles: list[float]) -> tuple[list[float], list[str]]:\n", " \"\"\"Converts list of quantiles into output names matching StatsForecast and NeuralForecast methods.\n", "\n", " **Parameters:**
\n", @@ -1257,21 +1258,21 @@ "# output a Pandas Dataframe with columns of quantile predictions\n", "def samples_to_quantiles_df(samples: np.ndarray, \n", " unique_ids: Sequence[str], \n", - " dates: List[str], \n", - " quantiles: Optional[List[float]] = None,\n", - " level: Optional[List[int]] = None, \n", + " dates: list[str], \n", + " quantiles: Optional[list[float]] = None,\n", + " level: Optional[list[int]] = None, \n", " model_name: str = \"model\",\n", " id_col: str = 'unique_id',\n", " time_col: str = 'ds',\n", " backend: str = 'pandas',\n", - " ) -> tuple[List[float], FrameT]:\n", + " ) -> tuple[list[float], FrameT]:\n", " \"\"\" Transform Random Samples into HierarchicalForecast input.\n", " Auxiliary function to create compatible HierarchicalForecast input `Y_hat_df` dataframe.\n", "\n", " **Parameters:**
\n", " `samples`: numpy array. Samples from forecast distribution of shape [n_series, n_samples, horizon].
\n", " `unique_ids`: string list. Unique identifiers for each time series.
\n", - " `dates`: datetime list. List of forecast dates.
\n", + " `dates`: datetime list. list of forecast dates.
\n", " `quantiles`: float list in [0., 1.]. Alternative to level, quantiles to estimate from y distribution.
\n", " `level`: int list in [0,100]. Probability levels for prediction intervals.
\n", " `model_name`: string. Name of forecasting model.
\n", @@ -1322,8 +1323,13 @@ " forecasts_quantiles = np.transpose(forecasts_quantiles, (1,2,0)) # [Q,H,N] -> [N,H,Q]\n", " forecasts_quantiles = forecasts_quantiles.reshape(-1,len(_quantiles))\n", " \n", - " df_nw = nw.from_dict({id_col:unique_ids, time_col:ds, model_name:forecasts_mean}, native_namespace=namespace)\n", - " df_nw = df_nw.with_columns(**dict(zip(col_names, forecasts_quantiles.T))) \n", + " df_nw = nw.from_dict(\n", + " {\n", + " **{id_col: unique_ids, time_col: ds, model_name: forecasts_mean},\n", + " **dict(zip(col_names, forecasts_quantiles.T)),\n", + " },\n", + " native_namespace=namespace,\n", + " )\n", "\n", " return _quantiles, df_nw.to_native()" ]