Skip to content

Commit

Permalink
set refit=False and results_ as dict in AutoMLForecast (#341)
Browse files Browse the repository at this point in the history
  • Loading branch information
jmoralez authored May 9, 2024
1 parent dd52bfc commit 3eb6290
Show file tree
Hide file tree
Showing 10 changed files with 133 additions and 78 deletions.
27 changes: 24 additions & 3 deletions .github/workflows/ci.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -65,13 +65,12 @@ jobs:
- name: Run all tests
run: nbdev_test --n_workers 0 --do_print --timing --flags 'polars core'

run-local-tests:
runs-on: ${{ matrix.os }}
run-macos-tests:
runs-on: macos-13
timeout-minutes: 30
strategy:
fail-fast: false
matrix:
os: [macos-13, windows-latest]
python-version: ['3.8', '3.9', '3.10', '3.11']
steps:
- name: Clone repo
Expand All @@ -90,6 +89,28 @@ jobs:
- name: Run local tests
run: nbdev_test --n_workers 0 --do_print --timing --skip_file_glob "*distributed*" --flags 'polars core'

run-windows-tests:
runs-on: windows-latest
timeout-minutes: 30
strategy:
fail-fast: false
matrix:
python-version: ['3.8', '3.9', '3.10', '3.11']
steps:
- name: Clone repo
uses: actions/checkout@v3

- name: Set up environment
uses: actions/setup-python@v4
with:
python-version: ${{ matrix.python-version }}

- name: Install the library
run: pip install uv && uv pip install ".[dev]" --system

- name: Run local tests
run: nbdev_test --n_workers 0 --do_print --timing --skip_file_glob "*distributed*" --flags 'polars core'

check-deps:
runs-on: ubuntu-latest
steps:
Expand Down
2 changes: 1 addition & 1 deletion environment.yml
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ dependencies:
- matplotlib
- nbformat
- numba
- optuna
- pandas
- pip
- prophet
Expand All @@ -19,7 +20,6 @@ dependencies:
- s3fs
- scikit-learn
- shap
- snappy<1.2.0
- statsmodels
- window-ops
- py-xgboost-cpu
Expand Down
2 changes: 1 addition & 1 deletion local_environment.yml
Original file line number Diff line number Diff line change
Expand Up @@ -11,13 +11,13 @@ dependencies:
- nbformat
- nomkl
- numba
- optuna
- pandas
- pip
- prophet
- pyarrow
- scikit-learn
- shap
- snappy<1.2.0
- statsmodels
- window-ops
- py-xgboost-cpu
Expand Down
2 changes: 1 addition & 1 deletion mlforecast/__init__.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
__version__ = "0.12.1"
__version__ = "0.13.0"
__all__ = ['MLForecast']
from mlforecast.forecast import MLForecast
14 changes: 10 additions & 4 deletions mlforecast/auto.py
Original file line number Diff line number Diff line change
Expand Up @@ -104,7 +104,7 @@ def random_forest_space(trial: optuna.Trial):
"min_samples_split": trial.suggest_int("min_child_samples", 1, 100),
"max_features": trial.suggest_float("max_features", 0.5, 1.0),
"criterion": trial.suggest_categorical(
"criterion", ["squared_error", "poisson"]
"criterion", ["squared_error", "absolute_error"]
),
}

Expand Down Expand Up @@ -434,6 +434,7 @@ def fit(
n_windows: int,
h: int,
num_samples: int,
refit: Union[bool, int] = False,
loss: Optional[Callable[[DataFrame, DataFrame], float]] = None,
id_col: str = "unique_id",
time_col: str = "ds",
Expand All @@ -454,6 +455,10 @@ def fit(
Forecast horizon.
num_samples : int
Number of trials to run
refit : bool or int (default=False)
Retrain model for each cross validation window.
If False, the models are trained at the beginning and then used to predict each window.
If positive int, the models are retrained every `refit` windows.
loss : callable, optional (default=None)
Function that takes the validation and train dataframes and produces a float.
If `None` will use the average SMAPE across series.
Expand All @@ -466,7 +471,7 @@ def fit(
study_kwargs : dict, optional (default=None)
Keyword arguments to be passed to the optuna.Study constructor.
optimize_kwargs : dict, optional (default=None)
Keyword arguments to be passed to the Study.optimize method.
Keyword arguments to be passed to the optuna.Study.optimize method.
Returns
-------
Expand Down Expand Up @@ -498,7 +503,7 @@ def loss(df, train_df): # noqa: ARG001
if optimize_kwargs is None:
optimize_kwargs = {}

self.results_ = []
self.results_ = {}
self.models_ = {}
for name, auto_model in self.models.items():

Expand All @@ -520,13 +525,14 @@ def config_fn(trial: optuna.Trial) -> Dict[str, Any]:
freq=self.freq,
n_windows=n_windows,
h=h,
refit=refit,
id_col=id_col,
time_col=time_col,
target_col=target_col,
)
study = optuna.create_study(direction="minimize", **study_kwargs)
study.optimize(objective, n_trials=num_samples, **optimize_kwargs)
self.results_.append(study)
self.results_[name] = study
best_config = study.best_trial.user_attrs["config"]
best_model = clone(auto_model.model)
best_model.set_params(**best_config["model_params"])
Expand Down
39 changes: 25 additions & 14 deletions mlforecast/optimization.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@

# %% ../nbs/optimization.ipynb 3
import copy
from typing import Any, Callable, Dict, Optional
from typing import Any, Callable, Dict, Optional, Union

import numpy as np
import optuna
Expand All @@ -29,6 +29,7 @@ def mlforecast_objective(
freq: Freq,
n_windows: int,
h: int,
refit: Union[bool, int] = False,
id_col: str = "unique_id",
time_col: str = "ds",
target_col: str = "y",
Expand All @@ -52,6 +53,10 @@ def mlforecast_objective(
Number of windows to evaluate.
h : int
Forecast horizon.
refit : bool or int (default=False)
Retrain model for each cross validation window.
If False, the models are trained at the beginning and then used to predict each window.
If positive int, the models are retrained every `refit` windows.
id_col : str (default='unique_id')
Column that identifies each serie.
time_col : str (default='ds')
Expand Down Expand Up @@ -88,19 +93,21 @@ def objective(trial: optuna.Trial) -> float:
model_params["cat_features"] = config["mlf_fit_params"]["static_features"]
model_copy.set_params(**config["model_params"])
metrics = []
mlf = MLForecast(
models={"model": model_copy},
freq=freq,
**config["mlf_init_params"],
)
for i, (_, train, valid) in enumerate(splits):
mlf = MLForecast(
models={"model": model_copy},
freq=freq,
**config["mlf_init_params"],
)
mlf.fit(
train,
id_col=id_col,
time_col=time_col,
target_col=target_col,
**config["mlf_fit_params"],
)
should_fit = i == 0 or (refit > 0 and i % refit == 0)
if should_fit:
mlf.fit(
train,
id_col=id_col,
time_col=time_col,
target_col=target_col,
**config["mlf_fit_params"],
)
static = [c for c in mlf.ts.static_features_.columns if c != id_col]
dynamic = [
c
Expand All @@ -113,7 +120,11 @@ def objective(trial: optuna.Trial) -> float:
)
else:
X_df = None
preds = mlf.predict(h=h, X_df=X_df)
preds = mlf.predict(
h=h,
X_df=X_df,
new_df=None if should_fit else train,
)
result = ufp.join(
valid[[id_col, time_col, target_col]],
preds,
Expand Down
Loading

0 comments on commit 3eb6290

Please sign in to comment.