Merge pull request #16 from LAMPSPUC/allow_level_rm

allow for the removal of level componentt
LAMPSPUC · Aug 4, 2024 · 832b745 · 832b745
2 parents 1223c5c + 8b0f65d
commit 832b745
Show file tree

Hide file tree

Showing 8 changed files with 172 additions and 80 deletions.
diff --git a/src/StateSpaceLearning.jl b/src/StateSpaceLearning.jl
@@ -9,21 +9,21 @@ include("estimation_procedure/default_estimation_procedure.jl")
 include("utils.jl")
 include("datasets.jl")
 
-const DEFAULT_COMPONENTS_PARAMETERS = ["stochastic_level", "trend", "stochastic_trend", "seasonal", "stochastic_seasonal", "freq_seasonal"]
+const DEFAULT_COMPONENTS_PARAMETERS = ["level", "stochastic_level", "trend", "stochastic_trend", "seasonal", "stochastic_seasonal", "freq_seasonal"]
 
 export fit_model, forecast
 
 """
 fit_model(y::Vector{Fl};
-                    model_input::Dict = Dict("stochastic_level" => true, "trend" => true, "stochastic_trend" => true, 
-                                             "seasonal" => true, "stochastic_seasonal" => true, "freq_seasonal" => 12,
-                                             "outlier" => true, "ζ_ω_threshold" => 12),
-                    model_functions::Dict = Dict("create_X" => create_X, "get_components_indexes" => get_components_indexes,
-                                             "get_variances" => get_variances),
-                    estimation_input::Dict = Dict("α" => 0.1, "information_criteria" => "aic", "ϵ" => 0.05, 
-                                                   "penalize_exogenous" => true, "penalize_initial_states" => true),
-                    estimation_function::Function = default_estimation_procedure,
-                    Exogenous_X::Matrix{Fl} = zeros(length(y), 0))::Output where Fl
+            model_input::Dict = Dict("level" => true, "stochastic_level" => true, "trend" => true, "stochastic_trend" => true, 
+                                        "seasonal" => true, "stochastic_seasonal" => true, "freq_seasonal" => 12,
+                                        "outlier" => true, "ζ_ω_threshold" => 12),
+            model_functions::Dict = Dict("create_X" => create_X, "get_components_indexes" => get_components_indexes,
+                                        "get_variances" => get_variances),
+            estimation_input::Dict = Dict("α" => 0.1, "information_criteria" => "aic", "ϵ" => 0.05, 
+                                            "penalize_exogenous" => true, "penalize_initial_states" => true),
+            estimation_function::Function = default_estimation_procedure,
+            Exogenous_X::Matrix{Fl} = zeros(length(y), 0))::Output where Fl
 
     Fits the StateSpaceLearning model using specified parameters and estimation procedures.
 
@@ -40,7 +40,7 @@ fit_model(y::Vector{Fl};
 
 """
 function fit_model(y::Vector{Fl};
-                    model_input::Dict = Dict("stochastic_level" => true, "trend" => true, "stochastic_trend" => true, 
+                    model_input::Dict = Dict("level" => true, "stochastic_level" => true, "trend" => true, "stochastic_trend" => true, 
                                              "seasonal" => true, "stochastic_seasonal" => true, "freq_seasonal" => 12,
                                              "outlier" => true, "ζ_ω_threshold" => 12),
                     model_functions::Dict = Dict("create_X" => create_X, "get_components_indexes" => get_components_indexes,
@@ -57,8 +57,15 @@ function fit_model(y::Vector{Fl};
         @assert all([key in keys(model_input) for key in DEFAULT_COMPONENTS_PARAMETERS]) "The default components model must have all the necessary parameters $(DEFAULT_COMPONENTS_PARAMETERS)"
     end
 
+    @assert !has_intercept(Exogenous_X) "Exogenous matrix must not have an intercept column"
+
     X = model_functions["create_X"](model_input, Exogenous_X)
 
+    if has_intercept(X)
+        @assert allequal(X[:, 1]) "Intercept column must be the first column"
+        @assert !has_intercept(X[:, 2:end]) "Matrix must not have more than one intercept column"
+    end
+
     estimation_y, Estimation_X, valid_indexes = handle_missing_values(X, y)
 
     components_indexes  = model_functions["get_components_indexes"](Exogenous_X, model_input)

diff --git a/src/estimation_procedure/default_estimation_procedure.jl b/src/estimation_procedure/default_estimation_procedure.jl
@@ -52,71 +52,71 @@ function get_outlier_duplicate_columns(Estimation_X::Matrix{Tl}, components_inde
 end
 
 """
-    get_path_information_criteria(model::GLMNetPath, Estimation_X::Matrix{Tl}, estimation_y::Vector{Fl},
+    get_path_information_criteria(model::GLMNetPath, Lasso_X::Matrix{Tl}, Lasso_y::Vector{Fl},
         information_criteria::String; intercept::Bool = true)::Tuple{Vector{Float64}, Vector{Float64}} where {Tl, Fl}
 
     Calculates the information criteria along the regularization path of a GLMNet model and returns coefficients and residuals of the best model based on the selected information criteria.
 
     # Arguments
     - `model::GLMNetPath`: Fitted GLMNetPath model object.
-    - `Estimation_X::Matrix{Tl}`: Matrix of predictors for estimation.
-    - `estimation_y::Vector{Fl}`: Vector of response values for estimation.
+    - `Lasso_X::Matrix{Tl}`: Matrix of predictors for estimation.
+    - `Lasso_y::Vector{Fl}`: Vector of response values for estimation.
     - `information_criteria::String`: Information Criteria method for hyperparameter selection.
     - `intercept::Bool`: Flag for intercept inclusion in the model (default: true).
 
     # Returns
     - `Tuple{Vector{Float64}, Vector{Float64}}`: Tuple containing coefficients and residuals of the best model.
 
 """
-function get_path_information_criteria(model::GLMNetPath, Estimation_X::Matrix{Tl}, estimation_y::Vector{Fl}, information_criteria::String; intercept::Bool = true)::Tuple{Vector{Float64}, Vector{Float64}} where {Tl, Fl}
+function get_path_information_criteria(model::GLMNetPath, Lasso_X::Matrix{Tl}, Lasso_y::Vector{Fl}, information_criteria::String; intercept::Bool = true)::Tuple{Vector{Float64}, Vector{Float64}} where {Tl, Fl}
     path_size = length(model.lambda)
-    T         = size(Estimation_X, 1)
+    T         = size(Lasso_X, 1)
     K         = count(i->i != 0, model.betas; dims = 1)'
 
     method_vec = Vector{Float64}(undef, path_size)
     for i in 1:path_size
-        fit = Estimation_X*model.betas[:, i] .+ model.a0[i]
-        ϵ   = estimation_y - fit
+        fit = Lasso_X*model.betas[:, i] .+ model.a0[i]
+        ϵ   = Lasso_y - fit
 
         method_vec[i] = get_information(T, K[i], ϵ; information_criteria = information_criteria)
     end
 
     best_model_idx = argmin(method_vec)
     coefs = intercept ? vcat(model.a0[best_model_idx], model.betas[:, best_model_idx]) : model.betas[:, best_model_idx]
-    fit   = intercept ? hcat(ones(T), Estimation_X)*coefs : Estimation_X*coefs
-    ϵ   = estimation_y - fit
+    fit   = intercept ? hcat(ones(T), Lasso_X)*coefs : Lasso_X*coefs
+    ϵ   = Lasso_y - fit
     return coefs, ϵ
 end
 
 """
-    fit_glmnet(Estimation_X::Matrix{Tl}, estimation_y::Vector{Fl}, α::Float64;
+    fit_glmnet(Lasso_X::Matrix{Tl}, Lasso_y::Vector{Fl}, α::Float64;
                information_criteria::String = "aic",
-               penalty_factor::Vector{Float64}=ones(size(Estimation_X,2) - 1),
+               penalty_factor::Vector{Float64}=ones(size(Lasso_X,2) - 1),
                intercept::Bool = intercept)::Tuple{Vector{Float64}, Vector{Float64}} where {Tl, Fl}
 
     Fits a GLMNet model to the provided data and returns coefficients and residuals based on selected criteria.
 
     # Arguments
-    - `Estimation_X::Matrix{Tl}`: Matrix of predictors for estimation.
-    - `estimation_y::Vector{Fl}`: Vector of response values for estimation.
+    - `Lasso_X::Matrix{Tl}`: Matrix of predictors for estimation.
+    - `Lasso_y::Vector{Fl}`: Vector of response values for estimation.
     - `α::Float64`: Elastic net control factor between ridge (α=0) and lasso (α=1) (default: 0.1).
     - `information_criteria::String`: Information Criteria method for hyperparameter selection (default: aic).
-    - `penalty_factor::Vector{Float64}`: Penalty factors for each predictor (default: ones(size(Estimation_X, 2) - 1)).
+    - `penalty_factor::Vector{Float64}`: Penalty factors for each predictor (default: ones(size(Lasso_X, 2) - 1)).
     - `intercept::Bool`: Flag for intercept inclusion in the model (default: true).
 
     # Returns
     - `Tuple{Vector{Float64}, Vector{Float64}}`: Tuple containing coefficients and residuals of the best model.
 
 """
-function fit_glmnet(Estimation_X::Matrix{Tl}, estimation_y::Vector{Fl}, α::Float64; information_criteria::String = "aic", penalty_factor::Vector{Float64}=ones(size(Estimation_X,2) - 1), intercept::Bool = intercept)::Tuple{Vector{Float64}, Vector{Float64}} where {Tl, Fl}
-    model = glmnet(Estimation_X, estimation_y, alpha = α, penalty_factor = penalty_factor, intercept = intercept, dfmax=size(Estimation_X, 2), lambda_min_ratio=0.001)
-    return get_path_information_criteria(model, Estimation_X, estimation_y, information_criteria; intercept = intercept)
+function fit_glmnet(Lasso_X::Matrix{Tl}, Lasso_y::Vector{Fl}, α::Float64; information_criteria::String = "aic", penalty_factor::Vector{Float64}=ones(size(Lasso_X,2) - 1), intercept::Bool = intercept)::Tuple{Vector{Float64}, Vector{Float64}} where {Tl, Fl}
+    model = glmnet(Lasso_X, Lasso_y, alpha = α, penalty_factor = penalty_factor, intercept = intercept, dfmax=size(Lasso_X, 2), lambda_min_ratio=0.001)
+    return get_path_information_criteria(model, Lasso_X, Lasso_y, information_criteria; intercept = intercept)
 end
 
 """
     fit_lasso(Estimation_X::Matrix{Tl}, estimation_y::Vector{Fl}, α::Float64, information_criteria::String,
-              penalize_exogenous::Bool, components_indexes::Dict{String, Vector{Int64}};
-              penalty_factor::Vector{Float64}=ones(size(Estimation_X,2) - 1), intercept::Bool = true)::Tuple{Vector{Float64}, Vector{Float64}} where {Tl, Fl}
+              penalize_exogenous::Bool, components_indexes::Dict{String, Vector{Int64}}, penalty_factor::Vector{Float64};
+              rm_average::Bool = false)::Tuple{Vector{Float64}, Vector{Float64}} where {Tl, Fl}
 
     Fits a Lasso regression model to the provided data and returns coefficients and residuals based on selected criteria.
 
@@ -127,23 +127,41 @@ end
     - `information_criteria::String`: Information Criteria method for hyperparameter selection (default: aic).
     - `penalize_exogenous::Bool`: Flag for selecting exogenous variables. When false the penalty factor for these variables will be set to 0.
     - `components_indexes::Dict{String, Vector{Int64}}`: Dictionary containing indexes for different components.
-    - `penalty_factor::Vector{Float64}`: Penalty factors for each predictor (default: ones(size(Estimation_X, 2) - 1)).
-    - `intercept::Bool`: Flag for intercept inclusion in the model (default: true).
+    - `penalty_factor::Vector{Float64}`: Penalty factors for each predictor.
+    - `rm_average::Bool`: Flag to consider if the intercept will be calculated is the average of the time series (default: false).
 
     # Returns
     - `Tuple{Vector{Float64}, Vector{Float64}}`: Tuple containing coefficients and residuals of the fitted Lasso model.
 
 """
-function fit_lasso(Estimation_X::Matrix{Tl}, estimation_y::Vector{Fl}, α::Float64, information_criteria::String, penalize_exogenous::Bool, components_indexes::Dict{String, Vector{Int64}}; penalty_factor::Vector{Float64}=ones(size(Estimation_X,2) - 1), intercept::Bool = true)::Tuple{Vector{Float64}, Vector{Float64}} where {Tl, Fl}
+function fit_lasso(Estimation_X::Matrix{Tl}, estimation_y::Vector{Fl}, α::Float64, information_criteria::String, penalize_exogenous::Bool, components_indexes::Dict{String, Vector{Int64}}, penalty_factor::Vector{Float64}; rm_average::Bool = false)::Tuple{Vector{Float64}, Vector{Float64}} where {Tl, Fl}
 
     outlier_duplicate_columns = get_outlier_duplicate_columns(Estimation_X, components_indexes)
     penalty_factor[outlier_duplicate_columns] .= Inf
 
-    !penalize_exogenous ? penalty_factor[components_indexes["Exogenous_X"] .- 1] .= 0 : nothing
-    mean_y = mean(estimation_y); Lasso_y = intercept ? estimation_y : estimation_y .- mean_y
+    hasintercept = has_intercept(Estimation_X)
+    if hasintercept
+        !penalize_exogenous ? penalty_factor[components_indexes["Exogenous_X"] .- 1] .= 0 : nothing
+        Lasso_X = Estimation_X[:, 2:end]
+    else
+        !penalize_exogenous ? penalty_factor[components_indexes["Exogenous_X"]] .= 0 : nothing
+        Lasso_X = Estimation_X
+        @assert !rm_average "Intercept must be included in the model if rm_average is set to true"
+    end
+
+    if rm_average
+        mean_y  = mean(estimation_y)
+        Lasso_y = estimation_y .- mean_y
+    else
+        Lasso_y = estimation_y
+    end
 
-    coefs, ϵ =  fit_glmnet(Estimation_X[:, 2:end], Lasso_y, α; information_criteria=information_criteria, penalty_factor=penalty_factor, intercept = intercept)
-    return !intercept ? (vcat(mean_y, coefs), ϵ) : (coefs, ϵ)
+    if hasintercept
+        coefs, ϵ =  fit_glmnet(Lasso_X, Lasso_y, α; information_criteria=information_criteria, penalty_factor=penalty_factor, intercept = !rm_average)
+    else
+        coefs, ϵ =  fit_glmnet(Lasso_X, Lasso_y, α; information_criteria=information_criteria, penalty_factor=penalty_factor, intercept = false)
+    end
+    return rm_average ? (vcat(mean_y, coefs), ϵ) : (coefs, ϵ)
 
 end
 
@@ -175,22 +193,37 @@ function default_estimation_procedure(Estimation_X::Matrix{Tl}, estimation_y::Ve
 
     @assert 0 <= α <= 1 "α must be in [0, 1]"
 
-    penalty_factor = ones(size(Estimation_X, 2) - 1); penalty_factor[components_indexes["initial_states"][2:end] .- 1] .= 0
-    coefs, _  = fit_lasso(Estimation_X, estimation_y, α, information_criteria, penalize_exogenous, components_indexes; penalty_factor = penalty_factor, intercept = false)
+    hasintercept = has_intercept(Estimation_X)
+
+    if hasintercept
+        penalty_factor = ones(size(Estimation_X, 2) - 1)
+        penalty_factor[components_indexes["initial_states"][2:end] .- 1] .= 0
+        coefs, _  = fit_lasso(Estimation_X, estimation_y, α, information_criteria, penalize_exogenous, components_indexes, penalty_factor; rm_average = true)
+    else
+        penalty_factor = ones(size(Estimation_X, 2))
+        penalty_factor[components_indexes["initial_states"][2:end]] .= 0
+        coefs, _  = fit_lasso(Estimation_X, estimation_y, α, information_criteria, penalize_exogenous, components_indexes, penalty_factor; rm_average = false)
+    end
 
     #AdaLasso per component
-    penalty_factor = zeros(size(Estimation_X, 2) - 1)
+    ts_penalty_factor = hasintercept ? zeros(size(Estimation_X, 2) - 1) : zeros(size(Estimation_X, 2))
     for key in keys(components_indexes)
         if key != "initial_states" && key != "μ1"
             component = components_indexes[key]
             if key != "Exogenous_X" && key != "o" && !(key in ["ν1", "γ1"])
                 κ = count(i -> i != 0, coefs[component]) < 1 ? 0 : std(coefs[component])
-                penalty_factor[component .- 1] .= (1 / (κ + ϵ))
+                hasintercept ? ts_penalty_factor[component .- 1] .= (1 / (κ + ϵ)) : ts_penalty_factor[component] .= (1 / (κ + ϵ))
             else
-                penalty_factor[component .- 1]  = (1 ./ (abs.(coefs[component]) .+ ϵ))
+                hasintercept ? ts_penalty_factor[component .- 1]  = (1 ./ (abs.(coefs[component]) .+ ϵ)) : ts_penalty_factor[component]  = (1 ./ (abs.(coefs[component]) .+ ϵ))
             end
         end
     end 
-    !penalize_initial_states ? penalty_factor[components_indexes["initial_states"][2:end] .- 1] .= 0 : nothing
-    return fit_lasso(Estimation_X, estimation_y, α, information_criteria, penalize_exogenous, components_indexes; penalty_factor=penalty_factor)
+
+    if hasintercept
+        !penalize_initial_states ? ts_penalty_factor[components_indexes["initial_states"][2:end] .- 1] .= 0 : nothing
+    else
+        !penalize_initial_states ? ts_penalty_factor[components_indexes["initial_states"][2:end]] .= 0 : nothing
+    end
+
+    return fit_lasso(Estimation_X, estimation_y, α, information_criteria, penalize_exogenous, components_indexes, penalty_factor; rm_average = false)
 end
diff --git a/src/models/default_model.jl b/src/models/default_model.jl
@@ -116,24 +116,26 @@ function create_ω(T::Int64, freq_seasonal::Int64, steps_ahead::Int64, ζ_ω_thr
 end
 
 """
-    create_initial_states_Matrix(T::Int64, s::Int64, steps_ahead::Int64, trend::Bool, seasonal::Bool)::Matrix
+    create_initial_states_Matrix(T::Int64, s::Int64, steps_ahead::Int64, level::Bool, trend::Bool, seasonal::Bool)::Matrix
 
     Creates an initial states matrix based on the input parameters.
 
     # Arguments
     - `T::Int64`: Length of the original time series.
     - `freq_seasonal::Int64`: Seasonal period.
     - `steps_ahead::Int64`: Number of steps ahead.
+    - `level::Bool`: Flag for considering level component.
     - `trend::Bool`: Flag for considering trend component.
     - `seasonal::Bool`: Flag for considering seasonal component.
 
     # Returns
     - `Matrix`: Initial states matrix constructed based on the input parameters.
 
 """
-function create_initial_states_Matrix(T::Int64, freq_seasonal::Int64, steps_ahead::Int64, trend::Bool, seasonal::Bool)::Matrix
+function create_initial_states_Matrix(T::Int64, freq_seasonal::Int64, steps_ahead::Int64, level::Bool, trend::Bool, seasonal::Bool)::Matrix
 
-    initial_states_matrix = ones(T+steps_ahead, 1)
+    initial_states_matrix = zeros(T+steps_ahead, 0)
+    level ? initial_states_matrix = hcat(initial_states_matrix, ones(T+steps_ahead, 1)) : nothing
     trend ? initial_states_matrix = hcat(initial_states_matrix, vcat([0], collect(1:T+steps_ahead-1))) : nothing
 
     if seasonal
@@ -172,7 +174,7 @@ function create_X(model_input::Dict, Exogenous_X::Matrix{Fl},
     ω_matrix = model_input["stochastic_seasonal"] ? create_ω(T, model_input["freq_seasonal"], steps_ahead, ζ_ω_threshold) : zeros(T+steps_ahead, 0)
     o_matrix = outlier ? create_o_matrix(T, steps_ahead) : zeros(T+steps_ahead, 0)
 
-    initial_states_matrix = create_initial_states_Matrix(T, model_input["freq_seasonal"], steps_ahead, model_input["trend"], model_input["seasonal"])
+    initial_states_matrix = create_initial_states_Matrix(T, model_input["freq_seasonal"], steps_ahead, model_input["level"], model_input["trend"], model_input["seasonal"])
     return hcat(initial_states_matrix, ξ_matrix, ζ_matrix, ω_matrix, o_matrix, vcat(Exogenous_X, Exogenous_Forecast))
 
 end
@@ -194,9 +196,16 @@ function get_components_indexes(Exogenous_X::Matrix{Fl}, model_input::Dict)::Dic
 
     outlier = model_input["outlier"]; ζ_ω_threshold = model_input["ζ_ω_threshold"]; T = size(Exogenous_X, 1)
 
-    μ1_indexes = [1]
-    initial_states_indexes = [1]
-    FINAL_INDEX = 1
+    FINAL_INDEX = 0
+
+    if model_input["level"]
+        μ1_indexes = [1]
+        initial_states_indexes = [1]
+        FINAL_INDEX += length(μ1_indexes)
+    else
+        μ1_indexes = Int64[]
+        initial_states_indexes = Int64[]
+    end
 
     if model_input["trend"]
         ν1_indexes = [2]
@@ -214,7 +223,6 @@ function get_components_indexes(Exogenous_X::Matrix{Fl}, model_input::Dict)::Dic
         γ1_indexes = Int64[]
     end
 
-
     if model_input["stochastic_level"]
         ξ_indexes = collect(FINAL_INDEX+1:FINAL_INDEX+ξ_size(T))
         FINAL_INDEX += length(ξ_indexes)

diff --git a/src/utils.jl b/src/utils.jl
@@ -105,4 +105,19 @@ function handle_missing_values(X::Matrix{Tl}, y::Vector{Fl})::Tuple{Vector{Fl},
     valid_indexes   = setdiff(1:length(y), invalid_indexes)
 
     return y[valid_indexes], X[valid_indexes, :], valid_indexes
+end
+
+"""
+has_intercept(X::Matrix{Tl})::Bool where Tl
+
+    Checks if the input matrix has a constant column (intercept).
+
+    # Arguments
+    - `X::Matrix{Tl}`: Input matrix.
+
+    # Returns
+    - `Bool`: True if the input matrix has a constant column, false otherwise.
+"""
+function has_intercept(X::Matrix{Tl})::Bool where Tl
+    return any([all(X[:, i] .== 1) for i in 1:size(X, 2)])
 end