diff --git a/DESCRIPTION b/DESCRIPTION index 9354060..953f327 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: finnts Title: Microsoft Finance Time Series Forecasting Framework -Version: 0.4.0.9005 +Version: 0.4.0.9006 Authors@R: c(person(given = "Mike", family = "Tokic", diff --git a/NEWS.md b/NEWS.md index ee1b746..f29b361 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,4 +1,4 @@ -# finnts 0.4.0.9005 (DEVELOPMENT VERSION) +# finnts 0.4.0.9006 (DEVELOPMENT VERSION) ## Improvements @@ -8,6 +8,7 @@ - Always save the most accurate model average, regardless if selected as best model. This allows for improved scaling with large data sets. - Automatically condense large forecasts (+10k time series) into smaller amount of files to make it easier to read forecast outputs - Improved weighted MAPE calculation across all time series +- Changed default for box_cox argument in `prep_data()` to FALSE ## Bug Fixes diff --git a/R/ensemble_models.R b/R/ensemble_models.R index e39b527..5fab7ae 100644 --- a/R/ensemble_models.R +++ b/R/ensemble_models.R @@ -366,6 +366,7 @@ ensemble_models <- function(run_info, parallel_over = "everything" ) ) %>% + base::suppressMessages() %>% base::suppressWarnings() best_param <- tune::select_best(tune_results, metric = "rmse") @@ -397,7 +398,9 @@ ensemble_models <- function(run_info, pkgs = inner_packages, parallel_over = "everything" ) - ) + ) %>% + base::suppressMessages() %>% + base::suppressWarnings() final_fcst <- tune::collect_predictions(refit_tbl) %>% dplyr::rename( diff --git a/R/multistep_cubist.R b/R/multistep_cubist.R index e03c047..08f849a 100644 --- a/R/multistep_cubist.R +++ b/R/multistep_cubist.R @@ -469,8 +469,8 @@ cubist_multistep_predict_impl <- function(object, new_data, ...) { xreg_tbl_final <- xreg_tbl %>% dplyr::filter( - Run_Number >= start_val, - Run_Number <= lag_number + Run_Number >= as.numeric(start_val), + Run_Number <= as.numeric(lag_number) ) if (!is.null(xreg_tbl)) { diff --git a/R/multistep_glmnet.R b/R/multistep_glmnet.R index b67e821..1c290f9 100644 --- a/R/multistep_glmnet.R +++ b/R/multistep_glmnet.R @@ -457,8 +457,8 @@ glmnet_multistep_predict_impl <- function(object, new_data, ...) { xreg_tbl_final <- xreg_tbl %>% dplyr::filter( - Run_Number >= start_val, - Run_Number <= lag_number + Run_Number >= as.numeric(start_val), + Run_Number <= as.numeric(lag_number) ) if (!is.null(xreg_tbl)) { diff --git a/R/multistep_mars.R b/R/multistep_mars.R index 68899d9..de13e59 100644 --- a/R/multistep_mars.R +++ b/R/multistep_mars.R @@ -480,8 +480,8 @@ mars_multistep_predict_impl <- function(object, new_data, ...) { xreg_tbl_final <- xreg_tbl %>% dplyr::filter( - Run_Number >= start_val, - Run_Number <= lag_number + Run_Number >= as.numeric(start_val), + Run_Number <= as.numeric(lag_number) ) if (!is.null(xreg_tbl)) { diff --git a/R/multistep_svm_poly.R b/R/multistep_svm_poly.R index 4b57a8b..55e6e08 100644 --- a/R/multistep_svm_poly.R +++ b/R/multistep_svm_poly.R @@ -506,8 +506,8 @@ svm_poly_multistep_predict_impl <- function(object, new_data, ...) { xreg_tbl_final <- xreg_tbl %>% dplyr::filter( - Run_Number >= start_val, - Run_Number <= lag_number + Run_Number >= as.numeric(start_val), + Run_Number <= as.numeric(lag_number) ) if (!is.null(xreg_tbl)) { diff --git a/R/multistep_svm_rbf.R b/R/multistep_svm_rbf.R index d748091..01e925a 100644 --- a/R/multistep_svm_rbf.R +++ b/R/multistep_svm_rbf.R @@ -486,8 +486,8 @@ svm_rbf_multistep_predict_impl <- function(object, new_data, ...) { xreg_tbl_final <- xreg_tbl %>% dplyr::filter( - Run_Number >= start_val, - Run_Number <= lag_number + Run_Number >= as.numeric(start_val), + Run_Number <= as.numeric(lag_number) ) if (!is.null(xreg_tbl)) { diff --git a/R/multistep_xgboost.R b/R/multistep_xgboost.R index 9ce5332..2edc9c5 100644 --- a/R/multistep_xgboost.R +++ b/R/multistep_xgboost.R @@ -568,8 +568,8 @@ xgboost_multistep_predict_impl <- function(object, new_data, ...) { xreg_tbl_temp <- xreg_tbl %>% dplyr::filter( - Run_Number >= start_val, - Run_Number <= lag_number + Run_Number >= as.numeric(start_val), + Run_Number <= as.numeric(lag_number) ) xreg_tbl_final <- xreg_tbl_temp %>% diff --git a/R/prep_data.R b/R/prep_data.R index 92108af..b9b9efa 100644 --- a/R/prep_data.R +++ b/R/prep_data.R @@ -83,7 +83,7 @@ prep_data <- function(run_info, fiscal_year_start = 1, clean_missing_values = TRUE, clean_outliers = FALSE, - box_cox = TRUE, + box_cox = FALSE, stationary = TRUE, forecast_approach = "bottoms_up", parallel_processing = NULL, diff --git a/R/prep_models.R b/R/prep_models.R index 5602820..c848a12 100644 --- a/R/prep_models.R +++ b/R/prep_models.R @@ -804,7 +804,9 @@ model_hyperparameters <- function(run_info, # update logging file log_df <- log_df %>% - dplyr::mutate(num_hyperparameters = num_hyperparameters) + dplyr::mutate( + num_hyperparameters = num_hyperparameters + ) write_data( x = log_df, diff --git a/R/read_write_data.R b/R/read_write_data.R index 2e43572..b0587a5 100644 --- a/R/read_write_data.R +++ b/R/read_write_data.R @@ -407,10 +407,17 @@ write_data <- function(x, write_data_type <- function(x, path, type) { + if (type == "csv") { + if (nrow(x) == 1) { + type <- "log" + } + } + switch(type, rds = saveRDS(x, path), parquet = arrow::write_parquet(x, path), csv = vroom::vroom_write(x, path, delim = ",", progress = FALSE), + log = utils::write.csv(x, path, row.names = FALSE), qs = qs::qsave(x, path) ) } diff --git a/R/train_models.R b/R/train_models.R index d8f406e..35b9802 100644 --- a/R/train_models.R +++ b/R/train_models.R @@ -516,7 +516,9 @@ train_models <- function(run_info, parallel_over = "everything" ) ) %>% - tune::collect_predictions() + tune::collect_predictions() %>% + base::suppressMessages() %>% + base::suppressWarnings() # finalize forecast final_fcst <- refit_tbl %>% @@ -534,6 +536,11 @@ train_models <- function(run_info, dplyr::mutate(Hyperparameter_ID = hyperparameter_id) %>% dplyr::select(-.row, -.config) + # check for future forecast + if (as.numeric(min(unique(final_fcst$Train_Test_ID))) != 1) { + stop("model is missing future forecast") + } + # undo differencing transformation if (stationary & model %in% list_multivariate_models()) { if (combo_hash == "All-Data") { diff --git a/man/prep_data.Rd b/man/prep_data.Rd index d802a05..a70731a 100644 --- a/man/prep_data.Rd +++ b/man/prep_data.Rd @@ -18,7 +18,7 @@ prep_data( fiscal_year_start = 1, clean_missing_values = TRUE, clean_outliers = FALSE, - box_cox = TRUE, + box_cox = FALSE, stationary = TRUE, forecast_approach = "bottoms_up", parallel_processing = NULL,