From 8b0f65d81702e96d95e972dbdaabd969cff04df8 Mon Sep 17 00:00:00 2001 From: andre_ramos Date: Sat, 3 Aug 2024 22:02:07 -0400 Subject: [PATCH] allow for the removal of level componentt --- src/StateSpaceLearning.jl | 29 +++--- .../default_estimation_procedure.jl | 99 ++++++++++++------- src/models/default_model.jl | 24 +++-- src/utils.jl | 15 +++ test/StateSpaceLearning.jl | 4 +- .../default_estimation_procedure.jl | 29 ++++-- test/models/default_model.jl | 38 ++++--- test/utils.jl | 14 ++- 8 files changed, 172 insertions(+), 80 deletions(-) diff --git a/src/StateSpaceLearning.jl b/src/StateSpaceLearning.jl index ccbacba..bac448a 100644 --- a/src/StateSpaceLearning.jl +++ b/src/StateSpaceLearning.jl @@ -9,21 +9,21 @@ include("estimation_procedure/default_estimation_procedure.jl") include("utils.jl") include("datasets.jl") -const DEFAULT_COMPONENTS_PARAMETERS = ["stochastic_level", "trend", "stochastic_trend", "seasonal", "stochastic_seasonal", "freq_seasonal"] +const DEFAULT_COMPONENTS_PARAMETERS = ["level", "stochastic_level", "trend", "stochastic_trend", "seasonal", "stochastic_seasonal", "freq_seasonal"] export fit_model, forecast """ fit_model(y::Vector{Fl}; - model_input::Dict = Dict("stochastic_level" => true, "trend" => true, "stochastic_trend" => true, - "seasonal" => true, "stochastic_seasonal" => true, "freq_seasonal" => 12, - "outlier" => true, "ζ_ω_threshold" => 12), - model_functions::Dict = Dict("create_X" => create_X, "get_components_indexes" => get_components_indexes, - "get_variances" => get_variances), - estimation_input::Dict = Dict("α" => 0.1, "information_criteria" => "aic", "ϵ" => 0.05, - "penalize_exogenous" => true, "penalize_initial_states" => true), - estimation_function::Function = default_estimation_procedure, - Exogenous_X::Matrix{Fl} = zeros(length(y), 0))::Output where Fl + model_input::Dict = Dict("level" => true, "stochastic_level" => true, "trend" => true, "stochastic_trend" => true, + "seasonal" => true, "stochastic_seasonal" => true, "freq_seasonal" => 12, + "outlier" => true, "ζ_ω_threshold" => 12), + model_functions::Dict = Dict("create_X" => create_X, "get_components_indexes" => get_components_indexes, + "get_variances" => get_variances), + estimation_input::Dict = Dict("α" => 0.1, "information_criteria" => "aic", "ϵ" => 0.05, + "penalize_exogenous" => true, "penalize_initial_states" => true), + estimation_function::Function = default_estimation_procedure, + Exogenous_X::Matrix{Fl} = zeros(length(y), 0))::Output where Fl Fits the StateSpaceLearning model using specified parameters and estimation procedures. @@ -40,7 +40,7 @@ fit_model(y::Vector{Fl}; """ function fit_model(y::Vector{Fl}; - model_input::Dict = Dict("stochastic_level" => true, "trend" => true, "stochastic_trend" => true, + model_input::Dict = Dict("level" => true, "stochastic_level" => true, "trend" => true, "stochastic_trend" => true, "seasonal" => true, "stochastic_seasonal" => true, "freq_seasonal" => 12, "outlier" => true, "ζ_ω_threshold" => 12), model_functions::Dict = Dict("create_X" => create_X, "get_components_indexes" => get_components_indexes, @@ -57,8 +57,15 @@ function fit_model(y::Vector{Fl}; @assert all([key in keys(model_input) for key in DEFAULT_COMPONENTS_PARAMETERS]) "The default components model must have all the necessary parameters $(DEFAULT_COMPONENTS_PARAMETERS)" end + @assert !has_intercept(Exogenous_X) "Exogenous matrix must not have an intercept column" + X = model_functions["create_X"](model_input, Exogenous_X) + if has_intercept(X) + @assert allequal(X[:, 1]) "Intercept column must be the first column" + @assert !has_intercept(X[:, 2:end]) "Matrix must not have more than one intercept column" + end + estimation_y, Estimation_X, valid_indexes = handle_missing_values(X, y) components_indexes = model_functions["get_components_indexes"](Exogenous_X, model_input) diff --git a/src/estimation_procedure/default_estimation_procedure.jl b/src/estimation_procedure/default_estimation_procedure.jl index a25cc19..3f6e37f 100644 --- a/src/estimation_procedure/default_estimation_procedure.jl +++ b/src/estimation_procedure/default_estimation_procedure.jl @@ -52,15 +52,15 @@ function get_outlier_duplicate_columns(Estimation_X::Matrix{Tl}, components_inde end """ - get_path_information_criteria(model::GLMNetPath, Estimation_X::Matrix{Tl}, estimation_y::Vector{Fl}, + get_path_information_criteria(model::GLMNetPath, Lasso_X::Matrix{Tl}, Lasso_y::Vector{Fl}, information_criteria::String; intercept::Bool = true)::Tuple{Vector{Float64}, Vector{Float64}} where {Tl, Fl} Calculates the information criteria along the regularization path of a GLMNet model and returns coefficients and residuals of the best model based on the selected information criteria. # Arguments - `model::GLMNetPath`: Fitted GLMNetPath model object. - - `Estimation_X::Matrix{Tl}`: Matrix of predictors for estimation. - - `estimation_y::Vector{Fl}`: Vector of response values for estimation. + - `Lasso_X::Matrix{Tl}`: Matrix of predictors for estimation. + - `Lasso_y::Vector{Fl}`: Vector of response values for estimation. - `information_criteria::String`: Information Criteria method for hyperparameter selection. - `intercept::Bool`: Flag for intercept inclusion in the model (default: true). @@ -68,55 +68,55 @@ end - `Tuple{Vector{Float64}, Vector{Float64}}`: Tuple containing coefficients and residuals of the best model. """ -function get_path_information_criteria(model::GLMNetPath, Estimation_X::Matrix{Tl}, estimation_y::Vector{Fl}, information_criteria::String; intercept::Bool = true)::Tuple{Vector{Float64}, Vector{Float64}} where {Tl, Fl} +function get_path_information_criteria(model::GLMNetPath, Lasso_X::Matrix{Tl}, Lasso_y::Vector{Fl}, information_criteria::String; intercept::Bool = true)::Tuple{Vector{Float64}, Vector{Float64}} where {Tl, Fl} path_size = length(model.lambda) - T = size(Estimation_X, 1) + T = size(Lasso_X, 1) K = count(i->i != 0, model.betas; dims = 1)' method_vec = Vector{Float64}(undef, path_size) for i in 1:path_size - fit = Estimation_X*model.betas[:, i] .+ model.a0[i] - ϵ = estimation_y - fit + fit = Lasso_X*model.betas[:, i] .+ model.a0[i] + ϵ = Lasso_y - fit method_vec[i] = get_information(T, K[i], ϵ; information_criteria = information_criteria) end best_model_idx = argmin(method_vec) coefs = intercept ? vcat(model.a0[best_model_idx], model.betas[:, best_model_idx]) : model.betas[:, best_model_idx] - fit = intercept ? hcat(ones(T), Estimation_X)*coefs : Estimation_X*coefs - ϵ = estimation_y - fit + fit = intercept ? hcat(ones(T), Lasso_X)*coefs : Lasso_X*coefs + ϵ = Lasso_y - fit return coefs, ϵ end """ - fit_glmnet(Estimation_X::Matrix{Tl}, estimation_y::Vector{Fl}, α::Float64; + fit_glmnet(Lasso_X::Matrix{Tl}, Lasso_y::Vector{Fl}, α::Float64; information_criteria::String = "aic", - penalty_factor::Vector{Float64}=ones(size(Estimation_X,2) - 1), + penalty_factor::Vector{Float64}=ones(size(Lasso_X,2) - 1), intercept::Bool = intercept)::Tuple{Vector{Float64}, Vector{Float64}} where {Tl, Fl} Fits a GLMNet model to the provided data and returns coefficients and residuals based on selected criteria. # Arguments - - `Estimation_X::Matrix{Tl}`: Matrix of predictors for estimation. - - `estimation_y::Vector{Fl}`: Vector of response values for estimation. + - `Lasso_X::Matrix{Tl}`: Matrix of predictors for estimation. + - `Lasso_y::Vector{Fl}`: Vector of response values for estimation. - `α::Float64`: Elastic net control factor between ridge (α=0) and lasso (α=1) (default: 0.1). - `information_criteria::String`: Information Criteria method for hyperparameter selection (default: aic). - - `penalty_factor::Vector{Float64}`: Penalty factors for each predictor (default: ones(size(Estimation_X, 2) - 1)). + - `penalty_factor::Vector{Float64}`: Penalty factors for each predictor (default: ones(size(Lasso_X, 2) - 1)). - `intercept::Bool`: Flag for intercept inclusion in the model (default: true). # Returns - `Tuple{Vector{Float64}, Vector{Float64}}`: Tuple containing coefficients and residuals of the best model. """ -function fit_glmnet(Estimation_X::Matrix{Tl}, estimation_y::Vector{Fl}, α::Float64; information_criteria::String = "aic", penalty_factor::Vector{Float64}=ones(size(Estimation_X,2) - 1), intercept::Bool = intercept)::Tuple{Vector{Float64}, Vector{Float64}} where {Tl, Fl} - model = glmnet(Estimation_X, estimation_y, alpha = α, penalty_factor = penalty_factor, intercept = intercept, dfmax=size(Estimation_X, 2), lambda_min_ratio=0.001) - return get_path_information_criteria(model, Estimation_X, estimation_y, information_criteria; intercept = intercept) +function fit_glmnet(Lasso_X::Matrix{Tl}, Lasso_y::Vector{Fl}, α::Float64; information_criteria::String = "aic", penalty_factor::Vector{Float64}=ones(size(Lasso_X,2) - 1), intercept::Bool = intercept)::Tuple{Vector{Float64}, Vector{Float64}} where {Tl, Fl} + model = glmnet(Lasso_X, Lasso_y, alpha = α, penalty_factor = penalty_factor, intercept = intercept, dfmax=size(Lasso_X, 2), lambda_min_ratio=0.001) + return get_path_information_criteria(model, Lasso_X, Lasso_y, information_criteria; intercept = intercept) end """ fit_lasso(Estimation_X::Matrix{Tl}, estimation_y::Vector{Fl}, α::Float64, information_criteria::String, - penalize_exogenous::Bool, components_indexes::Dict{String, Vector{Int64}}; - penalty_factor::Vector{Float64}=ones(size(Estimation_X,2) - 1), intercept::Bool = true)::Tuple{Vector{Float64}, Vector{Float64}} where {Tl, Fl} + penalize_exogenous::Bool, components_indexes::Dict{String, Vector{Int64}}, penalty_factor::Vector{Float64}; + rm_average::Bool = false)::Tuple{Vector{Float64}, Vector{Float64}} where {Tl, Fl} Fits a Lasso regression model to the provided data and returns coefficients and residuals based on selected criteria. @@ -127,23 +127,41 @@ end - `information_criteria::String`: Information Criteria method for hyperparameter selection (default: aic). - `penalize_exogenous::Bool`: Flag for selecting exogenous variables. When false the penalty factor for these variables will be set to 0. - `components_indexes::Dict{String, Vector{Int64}}`: Dictionary containing indexes for different components. - - `penalty_factor::Vector{Float64}`: Penalty factors for each predictor (default: ones(size(Estimation_X, 2) - 1)). - - `intercept::Bool`: Flag for intercept inclusion in the model (default: true). + - `penalty_factor::Vector{Float64}`: Penalty factors for each predictor. + - `rm_average::Bool`: Flag to consider if the intercept will be calculated is the average of the time series (default: false). # Returns - `Tuple{Vector{Float64}, Vector{Float64}}`: Tuple containing coefficients and residuals of the fitted Lasso model. """ -function fit_lasso(Estimation_X::Matrix{Tl}, estimation_y::Vector{Fl}, α::Float64, information_criteria::String, penalize_exogenous::Bool, components_indexes::Dict{String, Vector{Int64}}; penalty_factor::Vector{Float64}=ones(size(Estimation_X,2) - 1), intercept::Bool = true)::Tuple{Vector{Float64}, Vector{Float64}} where {Tl, Fl} +function fit_lasso(Estimation_X::Matrix{Tl}, estimation_y::Vector{Fl}, α::Float64, information_criteria::String, penalize_exogenous::Bool, components_indexes::Dict{String, Vector{Int64}}, penalty_factor::Vector{Float64}; rm_average::Bool = false)::Tuple{Vector{Float64}, Vector{Float64}} where {Tl, Fl} outlier_duplicate_columns = get_outlier_duplicate_columns(Estimation_X, components_indexes) penalty_factor[outlier_duplicate_columns] .= Inf - !penalize_exogenous ? penalty_factor[components_indexes["Exogenous_X"] .- 1] .= 0 : nothing - mean_y = mean(estimation_y); Lasso_y = intercept ? estimation_y : estimation_y .- mean_y + hasintercept = has_intercept(Estimation_X) + if hasintercept + !penalize_exogenous ? penalty_factor[components_indexes["Exogenous_X"] .- 1] .= 0 : nothing + Lasso_X = Estimation_X[:, 2:end] + else + !penalize_exogenous ? penalty_factor[components_indexes["Exogenous_X"]] .= 0 : nothing + Lasso_X = Estimation_X + @assert !rm_average "Intercept must be included in the model if rm_average is set to true" + end + + if rm_average + mean_y = mean(estimation_y) + Lasso_y = estimation_y .- mean_y + else + Lasso_y = estimation_y + end - coefs, ϵ = fit_glmnet(Estimation_X[:, 2:end], Lasso_y, α; information_criteria=information_criteria, penalty_factor=penalty_factor, intercept = intercept) - return !intercept ? (vcat(mean_y, coefs), ϵ) : (coefs, ϵ) + if hasintercept + coefs, ϵ = fit_glmnet(Lasso_X, Lasso_y, α; information_criteria=information_criteria, penalty_factor=penalty_factor, intercept = !rm_average) + else + coefs, ϵ = fit_glmnet(Lasso_X, Lasso_y, α; information_criteria=information_criteria, penalty_factor=penalty_factor, intercept = false) + end + return rm_average ? (vcat(mean_y, coefs), ϵ) : (coefs, ϵ) end @@ -175,22 +193,37 @@ function default_estimation_procedure(Estimation_X::Matrix{Tl}, estimation_y::Ve @assert 0 <= α <= 1 "α must be in [0, 1]" - penalty_factor = ones(size(Estimation_X, 2) - 1); penalty_factor[components_indexes["initial_states"][2:end] .- 1] .= 0 - coefs, _ = fit_lasso(Estimation_X, estimation_y, α, information_criteria, penalize_exogenous, components_indexes; penalty_factor = penalty_factor, intercept = false) + hasintercept = has_intercept(Estimation_X) + + if hasintercept + penalty_factor = ones(size(Estimation_X, 2) - 1) + penalty_factor[components_indexes["initial_states"][2:end] .- 1] .= 0 + coefs, _ = fit_lasso(Estimation_X, estimation_y, α, information_criteria, penalize_exogenous, components_indexes, penalty_factor; rm_average = true) + else + penalty_factor = ones(size(Estimation_X, 2)) + penalty_factor[components_indexes["initial_states"][2:end]] .= 0 + coefs, _ = fit_lasso(Estimation_X, estimation_y, α, information_criteria, penalize_exogenous, components_indexes, penalty_factor; rm_average = false) + end #AdaLasso per component - penalty_factor = zeros(size(Estimation_X, 2) - 1) + ts_penalty_factor = hasintercept ? zeros(size(Estimation_X, 2) - 1) : zeros(size(Estimation_X, 2)) for key in keys(components_indexes) if key != "initial_states" && key != "μ1" component = components_indexes[key] if key != "Exogenous_X" && key != "o" && !(key in ["ν1", "γ1"]) κ = count(i -> i != 0, coefs[component]) < 1 ? 0 : std(coefs[component]) - penalty_factor[component .- 1] .= (1 / (κ + ϵ)) + hasintercept ? ts_penalty_factor[component .- 1] .= (1 / (κ + ϵ)) : ts_penalty_factor[component] .= (1 / (κ + ϵ)) else - penalty_factor[component .- 1] = (1 ./ (abs.(coefs[component]) .+ ϵ)) + hasintercept ? ts_penalty_factor[component .- 1] = (1 ./ (abs.(coefs[component]) .+ ϵ)) : ts_penalty_factor[component] = (1 ./ (abs.(coefs[component]) .+ ϵ)) end end end - !penalize_initial_states ? penalty_factor[components_indexes["initial_states"][2:end] .- 1] .= 0 : nothing - return fit_lasso(Estimation_X, estimation_y, α, information_criteria, penalize_exogenous, components_indexes; penalty_factor=penalty_factor) + + if hasintercept + !penalize_initial_states ? ts_penalty_factor[components_indexes["initial_states"][2:end] .- 1] .= 0 : nothing + else + !penalize_initial_states ? ts_penalty_factor[components_indexes["initial_states"][2:end]] .= 0 : nothing + end + + return fit_lasso(Estimation_X, estimation_y, α, information_criteria, penalize_exogenous, components_indexes, penalty_factor; rm_average = false) end diff --git a/src/models/default_model.jl b/src/models/default_model.jl index 33be145..f1f5939 100644 --- a/src/models/default_model.jl +++ b/src/models/default_model.jl @@ -116,7 +116,7 @@ function create_ω(T::Int64, freq_seasonal::Int64, steps_ahead::Int64, ζ_ω_thr end """ - create_initial_states_Matrix(T::Int64, s::Int64, steps_ahead::Int64, trend::Bool, seasonal::Bool)::Matrix + create_initial_states_Matrix(T::Int64, s::Int64, steps_ahead::Int64, level::Bool, trend::Bool, seasonal::Bool)::Matrix Creates an initial states matrix based on the input parameters. @@ -124,6 +124,7 @@ end - `T::Int64`: Length of the original time series. - `freq_seasonal::Int64`: Seasonal period. - `steps_ahead::Int64`: Number of steps ahead. + - `level::Bool`: Flag for considering level component. - `trend::Bool`: Flag for considering trend component. - `seasonal::Bool`: Flag for considering seasonal component. @@ -131,9 +132,10 @@ end - `Matrix`: Initial states matrix constructed based on the input parameters. """ -function create_initial_states_Matrix(T::Int64, freq_seasonal::Int64, steps_ahead::Int64, trend::Bool, seasonal::Bool)::Matrix +function create_initial_states_Matrix(T::Int64, freq_seasonal::Int64, steps_ahead::Int64, level::Bool, trend::Bool, seasonal::Bool)::Matrix - initial_states_matrix = ones(T+steps_ahead, 1) + initial_states_matrix = zeros(T+steps_ahead, 0) + level ? initial_states_matrix = hcat(initial_states_matrix, ones(T+steps_ahead, 1)) : nothing trend ? initial_states_matrix = hcat(initial_states_matrix, vcat([0], collect(1:T+steps_ahead-1))) : nothing if seasonal @@ -172,7 +174,7 @@ function create_X(model_input::Dict, Exogenous_X::Matrix{Fl}, ω_matrix = model_input["stochastic_seasonal"] ? create_ω(T, model_input["freq_seasonal"], steps_ahead, ζ_ω_threshold) : zeros(T+steps_ahead, 0) o_matrix = outlier ? create_o_matrix(T, steps_ahead) : zeros(T+steps_ahead, 0) - initial_states_matrix = create_initial_states_Matrix(T, model_input["freq_seasonal"], steps_ahead, model_input["trend"], model_input["seasonal"]) + initial_states_matrix = create_initial_states_Matrix(T, model_input["freq_seasonal"], steps_ahead, model_input["level"], model_input["trend"], model_input["seasonal"]) return hcat(initial_states_matrix, ξ_matrix, ζ_matrix, ω_matrix, o_matrix, vcat(Exogenous_X, Exogenous_Forecast)) end @@ -194,9 +196,16 @@ function get_components_indexes(Exogenous_X::Matrix{Fl}, model_input::Dict)::Dic outlier = model_input["outlier"]; ζ_ω_threshold = model_input["ζ_ω_threshold"]; T = size(Exogenous_X, 1) - μ1_indexes = [1] - initial_states_indexes = [1] - FINAL_INDEX = 1 + FINAL_INDEX = 0 + + if model_input["level"] + μ1_indexes = [1] + initial_states_indexes = [1] + FINAL_INDEX += length(μ1_indexes) + else + μ1_indexes = Int64[] + initial_states_indexes = Int64[] + end if model_input["trend"] ν1_indexes = [2] @@ -214,7 +223,6 @@ function get_components_indexes(Exogenous_X::Matrix{Fl}, model_input::Dict)::Dic γ1_indexes = Int64[] end - if model_input["stochastic_level"] ξ_indexes = collect(FINAL_INDEX+1:FINAL_INDEX+ξ_size(T)) FINAL_INDEX += length(ξ_indexes) diff --git a/src/utils.jl b/src/utils.jl index 448b03d..00dc661 100644 --- a/src/utils.jl +++ b/src/utils.jl @@ -105,4 +105,19 @@ function handle_missing_values(X::Matrix{Tl}, y::Vector{Fl})::Tuple{Vector{Fl}, valid_indexes = setdiff(1:length(y), invalid_indexes) return y[valid_indexes], X[valid_indexes, :], valid_indexes +end + +""" +has_intercept(X::Matrix{Tl})::Bool where Tl + + Checks if the input matrix has a constant column (intercept). + + # Arguments + - `X::Matrix{Tl}`: Input matrix. + + # Returns + - `Bool`: True if the input matrix has a constant column, false otherwise. +""" +function has_intercept(X::Matrix{Tl})::Bool where Tl + return any([all(X[:, i] .== 1) for i in 1:size(X, 2)]) end \ No newline at end of file diff --git a/test/StateSpaceLearning.jl b/test/StateSpaceLearning.jl index fb79e68..5ffdd38 100644 --- a/test/StateSpaceLearning.jl +++ b/test/StateSpaceLearning.jl @@ -13,10 +13,10 @@ @test all(isnan.(output2.ϵ[10:20])) @test !all(isnan.(output2.fitted[10:20])) - output3 = StateSpaceLearning.fit_model(y1; model_input = Dict("stochastic_level" => true, "trend" => true, "stochastic_trend" => true, "seasonal" => true, "stochastic_seasonal" => true, "freq_seasonal" => 12, "outlier" => true, "ζ_ω_threshold" => 1)) + output3 = StateSpaceLearning.fit_model(y1; model_input = Dict("level"=> true, "stochastic_level" => true, "trend" => true, "stochastic_trend" => true, "seasonal" => true, "stochastic_seasonal" => true, "freq_seasonal" => 12, "outlier" => true, "ζ_ω_threshold" => 1)) @test length(output3.coefs) - 22 == length(output1.coefs) - @test_throws AssertionError StateSpaceLearning.fit_model(y1; model_input = Dict("stochastic_level" => true, "trend" => true, "stochastic_trend" => true, "seasonal" => true, "stochastic_seasonal" => true, "freq_seasonal" => 1000, "outlier" => true, "ζ_ω_threshold" => 1)) + @test_throws AssertionError StateSpaceLearning.fit_model(y1; model_input = Dict("level"=> true, "stochastic_level" => true, "trend" => true, "stochastic_trend" => true, "seasonal" => true, "stochastic_seasonal" => true, "freq_seasonal" => 1000, "outlier" => true, "ζ_ω_threshold" => 1)) @test_throws AssertionError StateSpaceLearning.fit_model(y1; estimation_input = Dict("α" => -0.1, "information_criteria" => "aic", "ψ" => 0.05, "penalize_exogenous" => true, "penalize_initial_states" => true)) @test_throws AssertionError StateSpaceLearning.fit_model(y1; estimation_input = Dict("α" => 1.1, "information_criteria" => "aic", "ψ" => 0.05, "penalize_exogenous" => true, "penalize_initial_states" => true)) diff --git a/test/estimation_procedure/default_estimation_procedure.jl b/test/estimation_procedure/default_estimation_procedure.jl index b4d972d..62eb2d4 100644 --- a/test/estimation_procedure/default_estimation_procedure.jl +++ b/test/estimation_procedure/default_estimation_procedure.jl @@ -30,29 +30,36 @@ end @testset "Function: fit_lasso" begin Random.seed!(1234) Exogenous_X = hcat(rand(10, 3), vcat(zeros(3), ones(1), zeros(6))) - Basic_Structural = Dict("stochastic_level" => true, "trend" => true, "stochastic_trend" => true, "seasonal" => true, "stochastic_seasonal" => true, "freq_seasonal" => 2, "outlier" => true, "ζ_ω_threshold" => 0) + Basic_Structural = Dict("level" => true, "stochastic_level" => true, "trend" => true, "stochastic_trend" => true, "seasonal" => true, "stochastic_seasonal" => true, "freq_seasonal" => 2, "outlier" => true, "ζ_ω_threshold" => 0) + Basic_Structural_w_level = Dict("level" => false, "stochastic_level" => true, "trend" => true, "stochastic_trend" => true, "seasonal" => true, "stochastic_seasonal" => true, "freq_seasonal" => 2, "outlier" => true, "ζ_ω_threshold" => 0) components_indexes = StateSpaceLearning.get_components_indexes(Exogenous_X, Basic_Structural) + components_indexes2 = StateSpaceLearning.get_components_indexes(Exogenous_X, Basic_Structural_w_level) Estimation_X = StateSpaceLearning.create_X(Basic_Structural, Exogenous_X) + Estimation_X2 = StateSpaceLearning.create_X(Basic_Structural_w_level, Exogenous_X) estimation_y = Estimation_X*rand(size(Estimation_X, 2)) + rand(10) - coefs1, ϵ1 = StateSpaceLearning.fit_lasso(Estimation_X, estimation_y, 0.1, "aic", true, components_indexes; intercept = true) + coefs1, ϵ1 = StateSpaceLearning.fit_lasso(Estimation_X, estimation_y, 0.1, "aic", true, components_indexes, ones(size(Estimation_X, 2) - 1); rm_average = true) @test length(coefs1) == 43 @test length(ϵ1) == 10 - coefs2, ϵ2 = StateSpaceLearning.fit_lasso(Estimation_X, estimation_y, 0.1, "aic", true, components_indexes; intercept = false) + coefs1, ϵ1 = StateSpaceLearning.fit_lasso(Estimation_X2, estimation_y, 0.1, "aic", true, components_indexes2, ones(size(Estimation_X2, 2)); rm_average = false) + @test length(coefs1) == 42 + @test length(ϵ1) == 10 + + coefs2, ϵ2 = StateSpaceLearning.fit_lasso(Estimation_X, estimation_y, 0.1, "aic", true, components_indexes, ones(size(Estimation_X, 2) - 1); rm_average = true) @test coefs2[1] == mean(estimation_y) @test length(coefs2) == 43 @test length(ϵ2) == 10 - coefs3, ϵ3 = StateSpaceLearning.fit_lasso(Estimation_X, estimation_y, 0.1, "aic", false, components_indexes; intercept = true) + coefs3, ϵ3 = StateSpaceLearning.fit_lasso(Estimation_X, estimation_y, 0.1, "aic", false, components_indexes, ones(size(Estimation_X, 2) - 1); rm_average = true) @test coefs3[components_indexes["o"][4]] == 0 @test all(coefs3[components_indexes["Exogenous_X"]] .!= 0) @test length(coefs3) == 43 @test length(ϵ3) == 10 - coefs4, ϵ4 = StateSpaceLearning.fit_lasso(Estimation_X, estimation_y, 0.1, "aic", true, components_indexes; penalty_factor = vcat(ones(1), ones(size(Estimation_X,2) - 2).*Inf), intercept = true) + coefs4, ϵ4 = StateSpaceLearning.fit_lasso(Estimation_X, estimation_y, 0.1, "aic", true, components_indexes, vcat(ones(1), ones(size(Estimation_X,2) - 2).*Inf); rm_average = true) @test all(coefs4[3:end] .== 0) @test length(coefs4) == 43 @test length(ϵ4) == 10 @@ -61,11 +68,14 @@ end @testset "Function: default_estimation_procedure" begin Random.seed!(1234) Exogenous_X = hcat(rand(10, 3), vcat(ones(3), zeros(1), ones(6))) - Basic_Structural = Dict("stochastic_level" => true, "trend" => true, "stochastic_trend" => true, "seasonal" => true, "stochastic_seasonal" => true, "freq_seasonal" => 2, "outlier" => true, "ζ_ω_threshold" => 0) + Basic_Structural = Dict("level" => true, "stochastic_level" => true, "trend" => true, "stochastic_trend" => true, "seasonal" => true, "stochastic_seasonal" => true, "freq_seasonal" => 2, "outlier" => true, "ζ_ω_threshold" => 0) + Basic_Structural_w_level = Dict("level" => false, "stochastic_level" => true, "trend" => true, "stochastic_trend" => true, "seasonal" => true, "stochastic_seasonal" => true, "freq_seasonal" => 2, "outlier" => true, "ζ_ω_threshold" => 0) components_indexes = StateSpaceLearning.get_components_indexes(Exogenous_X, Basic_Structural) + components_indexes2 = StateSpaceLearning.get_components_indexes(Exogenous_X, Basic_Structural_w_level) Estimation_X = StateSpaceLearning.create_X(Basic_Structural, Exogenous_X) + Estimation_X2 = StateSpaceLearning.create_X(Basic_Structural_w_level, Exogenous_X) estimation_y = Estimation_X*rand(size(Estimation_X, 2)) + rand(10).*5 @@ -74,6 +84,11 @@ end @test length(coefs1) == 43 @test length(ϵ1) == 10 + estimation_input1 = Dict("α" => 0.1, "information_criteria" => "aic", "ϵ" => 0.05, "penalize_exogenous" => true, "penalize_initial_states" => true) + coefs1, ϵ1 = StateSpaceLearning.default_estimation_procedure(Estimation_X2, estimation_y, components_indexes2, estimation_input1) + @test length(coefs1) == 42 + @test length(ϵ1) == 10 + estimation_input2 = Dict("α" => 0.1, "information_criteria" => "aic", "ϵ" => 0.05, "penalize_exogenous" => true, "penalize_initial_states" => false) coefs2, ϵ2 = StateSpaceLearning.default_estimation_procedure(Estimation_X, estimation_y, components_indexes, estimation_input2) @test length(coefs2) == 43 @@ -97,7 +112,7 @@ end Exogenous_X1 = hcat(rand(10, 3), vcat(zeros(3), ones(1), zeros(6))) Exogenous_X2 = rand(10, 3) - Basic_Structural = Dict("stochastic_level" => true, "trend" => true, "stochastic_trend" => true, "seasonal" => true, "stochastic_seasonal" => true, "freq_seasonal" => 2, "outlier" => true, "ζ_ω_threshold" => 0) + Basic_Structural = Dict("level"=> true, "stochastic_level" => true, "trend" => true, "stochastic_trend" => true, "seasonal" => true, "stochastic_seasonal" => true, "freq_seasonal" => 2, "outlier" => true, "ζ_ω_threshold" => 0) components_indexes1 = StateSpaceLearning.get_components_indexes(Exogenous_X1, Basic_Structural) components_indexes2 = StateSpaceLearning.get_components_indexes(Exogenous_X2, Basic_Structural) diff --git a/test/models/default_model.jl b/test/models/default_model.jl index a6dbfd6..1c7beca 100644 --- a/test/models/default_model.jl +++ b/test/models/default_model.jl @@ -75,8 +75,8 @@ end @testset "Initial State Matrix" begin - X1 = StateSpaceLearning.create_initial_states_Matrix(5, 2, 0, true, true) - X2 = StateSpaceLearning.create_initial_states_Matrix(5, 2, 2, true, true) + X1 = StateSpaceLearning.create_initial_states_Matrix(5, 2, 0, true, true, true) + X2 = StateSpaceLearning.create_initial_states_Matrix(5, 2, 2, true, true, true) @test X1 == [1.0 0.0 1.0 0.0; 1.0 1.0 0.0 1.0; @@ -92,8 +92,8 @@ end 1.0 5.0 0.0 1.0; 1.0 6.0 1.0 0.0] - X3 = StateSpaceLearning.create_initial_states_Matrix(5, 2, 0, true, false) - X4 = StateSpaceLearning.create_initial_states_Matrix(5, 2, 2, true, false) + X3 = StateSpaceLearning.create_initial_states_Matrix(5, 2, 0, true, true, false) + X4 = StateSpaceLearning.create_initial_states_Matrix(5, 2, 2, true, true, false) @test X3 == [1.0 0.0; 1.0 1.0; @@ -109,11 +109,17 @@ end 1.0 5.0; 1.0 6.0] - X5 = StateSpaceLearning.create_initial_states_Matrix(5, 2, 0, false, false) - X6 = StateSpaceLearning.create_initial_states_Matrix(5, 2, 2, false, false) + X5 = StateSpaceLearning.create_initial_states_Matrix(5, 2, 0, true, false, false) + X6 = StateSpaceLearning.create_initial_states_Matrix(5, 2, 2, true, false, false) @test X5 == ones(5, 1) @test X6 == ones(7, 1) + + X7 = StateSpaceLearning.create_initial_states_Matrix(5, 2, 0, false, true, false) + X8 = StateSpaceLearning.create_initial_states_Matrix(5, 2, 2, false, true, false) + + @test X7 == [0.0; 1.0; 2.0; 3.0; 4.0][:, :] + @test X8 == [0.0; 1.0; 2.0; 3.0; 4.0; 5.0; 6.0][:, :] end @testset "Create X matrix" begin @@ -134,10 +140,10 @@ end size_vec1=[(5, 22), (5, 18), (7, 18), (5, 17), (5, 13), (7, 13), (5, 12), (5, 12), (7, 12), (5, 7), (5, 7), (7, 7), (5, 16), (5, 14), (7, 14), (5, 11), (5, 9), (7, 9), (5, 13), (5, 11), (7, 11), (5, 8), (5, 6), (7, 6)] size_vec2=[(5, 19), (5, 15), (7, 15), (5, 14), (5, 10), (7, 10), (5, 9), (5, 9), (7, 9), (5, 4), (5, 4), (7, 4), (5, 13), (5, 11), (7, 11), (5, 8), (5, 6), (7, 6), (5, 10), (5, 8), (7, 8), (5, 5), (5, 3), (7, 3)] counter = 1 - for model_input in [Dict("stochastic_level" => true, "trend" => true, "stochastic_trend" => true, "seasonal" => true, "stochastic_seasonal" => true, "freq_seasonal" => 2, "outlier" => true, "ζ_ω_threshold" => 12), - Dict("stochastic_level" => true, "trend" => false, "stochastic_trend" => false, "seasonal" => false, "stochastic_seasonal" => false, "freq_seasonal" => 2, "outlier" => true, "ζ_ω_threshold" => 12), - Dict("stochastic_level" => true, "trend" => true, "stochastic_trend" => true, "seasonal" => false, "stochastic_seasonal" => false, "freq_seasonal" => 2, "outlier" => true, "ζ_ω_threshold" => 12), - Dict("stochastic_level" => false, "trend" => true, "stochastic_trend" => true, "seasonal" => false, "stochastic_seasonal" => false, "freq_seasonal" => 2, "outlier" => true, "ζ_ω_threshold" => 12)] + for model_input in [Dict("level"=> true, "stochastic_level" => true, "trend" => true, "stochastic_trend" => true, "seasonal" => true, "stochastic_seasonal" => true, "freq_seasonal" => 2, "outlier" => true, "ζ_ω_threshold" => 12), + Dict("level"=> true, "stochastic_level" => true, "trend" => false, "stochastic_trend" => false, "seasonal" => false, "stochastic_seasonal" => false, "freq_seasonal" => 2, "outlier" => true, "ζ_ω_threshold" => 12), + Dict("level"=> true, "stochastic_level" => true, "trend" => true, "stochastic_trend" => true, "seasonal" => false, "stochastic_seasonal" => false, "freq_seasonal" => 2, "outlier" => true, "ζ_ω_threshold" => 12), + Dict("level"=> true, "stochastic_level" => false, "trend" => true, "stochastic_trend" => true, "seasonal" => false, "stochastic_seasonal" => false, "freq_seasonal" => 2, "outlier" => true, "ζ_ω_threshold" => 12)] for param in param_combination model_input["outlier"] = param[1] model_input["ζ_ω_threshold"] = param[2] @@ -158,9 +164,9 @@ end Exogenous_X1 = rand(10, 3) Exogenous_X2 = zeros(10, 0) - Basic_Structural = Dict("stochastic_level" => true, "trend" => true, "stochastic_trend" => true, "seasonal" => true, "stochastic_seasonal" => true, "freq_seasonal" => 2, "outlier" => true, "ζ_ω_threshold" => 0) - Local_Level = Dict("stochastic_level" => true, "trend" => false, "stochastic_trend" => false, "seasonal" => false, "stochastic_seasonal" => false, "freq_seasonal" => 2, "outlier" => true, "ζ_ω_threshold" => 0) - Local_Linear_Trend = Dict("stochastic_level" => true, "trend" => true, "stochastic_trend" => true, "seasonal" => false, "stochastic_seasonal" => false, "freq_seasonal" => 2, "outlier" => true, "ζ_ω_threshold" => 0) + Basic_Structural = Dict("level"=> true, "stochastic_level" => true, "trend" => true, "stochastic_trend" => true, "seasonal" => true, "stochastic_seasonal" => true, "freq_seasonal" => 2, "outlier" => true, "ζ_ω_threshold" => 0) + Local_Level = Dict("level"=> true, "stochastic_level" => true, "trend" => false, "stochastic_trend" => false, "seasonal" => false, "stochastic_seasonal" => false, "freq_seasonal" => 2, "outlier" => true, "ζ_ω_threshold" => 0) + Local_Linear_Trend = Dict("level"=> true, "stochastic_level" => true, "trend" => true, "stochastic_trend" => true, "seasonal" => false, "stochastic_seasonal" => false, "freq_seasonal" => 2, "outlier" => true, "ζ_ω_threshold" => 0) parameter_combination = [ [Basic_Structural, true, Exogenous_X1], [Local_Level, true, Exogenous_X1], @@ -195,9 +201,9 @@ end @testset "Function: get_variances" begin Exogenous_X2 = zeros(10, 0) - Basic_Structural = Dict("stochastic_level" => true, "trend" => true, "stochastic_trend" => true, "seasonal" => true, "stochastic_seasonal" => true, "freq_seasonal" => 2, "outlier" => true, "ζ_ω_threshold" => 0) - Local_Level = Dict("stochastic_level" => true, "trend" => false, "stochastic_trend" => false, "seasonal" => false, "stochastic_seasonal" => false, "freq_seasonal" => 2, "outlier" => true, "ζ_ω_threshold" => 0) - Local_Linear_Trend = Dict("stochastic_level" => true, "trend" => true, "stochastic_trend" => true, "seasonal" => false, "stochastic_seasonal" => false, "freq_seasonal" => 2, "outlier" => true, "ζ_ω_threshold" => 0) + Basic_Structural = Dict("level"=> true, "stochastic_level" => true, "trend" => true, "stochastic_trend" => true, "seasonal" => true, "stochastic_seasonal" => true, "freq_seasonal" => 2, "outlier" => true, "ζ_ω_threshold" => 0) + Local_Level = Dict("level"=> true, "stochastic_level" => true, "trend" => false, "stochastic_trend" => false, "seasonal" => false, "stochastic_seasonal" => false, "freq_seasonal" => 2, "outlier" => true, "ζ_ω_threshold" => 0) + Local_Linear_Trend = Dict("level"=> true, "stochastic_level" => true, "trend" => true, "stochastic_trend" => true, "seasonal" => false, "stochastic_seasonal" => false, "freq_seasonal" => 2, "outlier" => true, "ζ_ω_threshold" => 0) parameter_combination = [ [Basic_Structural, true, Exogenous_X2, ["ξ", "ζ", "ω", "ε"]], diff --git a/test/utils.jl b/test/utils.jl index bca2c3d..f93046f 100644 --- a/test/utils.jl +++ b/test/utils.jl @@ -2,9 +2,9 @@ Exogenous_X1 = rand(10, 3) Exogenous_X2 = zeros(10, 0) - Basic_Structural = Dict("stochastic_level" => true, "trend" => true, "stochastic_trend" => true, "seasonal" => true, "stochastic_seasonal" => true, "freq_seasonal" => 2, "outlier" => true, "ζ_ω_threshold" => 0) - Local_Level = Dict("stochastic_level" => true, "trend" => false, "stochastic_trend" => false, "seasonal" => false, "stochastic_seasonal" => false, "freq_seasonal" => 2, "outlier" => true, "ζ_ω_threshold" => 0) - Local_Linear_Trend = Dict("stochastic_level" => true, "trend" => true, "stochastic_trend" => true, "seasonal" => false, "stochastic_seasonal" => false, "freq_seasonal" => 2, "outlier" => true, "ζ_ω_threshold" => 0) + Basic_Structural = Dict("level"=> true, "stochastic_level" => true, "trend" => true, "stochastic_trend" => true, "seasonal" => true, "stochastic_seasonal" => true, "freq_seasonal" => 2, "outlier" => true, "ζ_ω_threshold" => 0) + Local_Level = Dict("level"=> true, "stochastic_level" => true, "trend" => false, "stochastic_trend" => false, "seasonal" => false, "stochastic_seasonal" => false, "freq_seasonal" => 2, "outlier" => true, "ζ_ω_threshold" => 0) + Local_Linear_Trend = Dict("level"=> true, "stochastic_level" => true, "trend" => true, "stochastic_trend" => true, "seasonal" => false, "stochastic_seasonal" => false, "freq_seasonal" => 2, "outlier" => true, "ζ_ω_threshold" => 0) parameter_combination = [ [Basic_Structural, true, Exogenous_X1], [Local_Level, true, Exogenous_X1], @@ -49,4 +49,12 @@ end ϵ2, fitted2 = StateSpaceLearning.get_fit_and_residuals(estimation_ϵ2, coefs, X, valid_indexes2, T) @test !all(isnan.(ϵ2[valid_indexes2])) @test !all(isnan.(fitted2)) +end + +@testset "Function: has_intercept" begin + X = rand(10, 3) + @test !StateSpaceLearning.has_intercept(X) + + X = [ones(10) rand(10, 2)] + @test StateSpaceLearning.has_intercept(X) end \ No newline at end of file