From 5c6f1ea2255d421cbad1457ae844208a062ee8d5 Mon Sep 17 00:00:00 2001 From: ShouvikGhosh2048 Date: Fri, 6 Dec 2024 14:28:08 +0000 Subject: [PATCH 1/4] Add VI for linear regression --- Project.toml | 2 + src/CRRao.jl | 4 +- src/bayesian/getter.jl | 17 +- src/bayesian/linear_regression.jl | 180 ++++++++++++++++---- src/bayesian/logistic_regression.jl | 2 +- src/bayesian/negativebinomial_regression.jl | 2 +- src/bayesian/poisson_regression.jl | 2 +- src/fitmodel.jl | 35 +++- src/print.jl | 9 +- test/numerical/bayesian/LinearRegression.jl | 14 +- 10 files changed, 213 insertions(+), 54 deletions(-) diff --git a/Project.toml b/Project.toml index 782217c..fd2dcec 100644 --- a/Project.toml +++ b/Project.toml @@ -4,6 +4,7 @@ authors = ["xKDR Forum, Sourish Das"] version = "0.1.1" [deps] +AdvancedVI = "b5ca4192-6429-45e5-a2d9-87aec30a685c" DataFrames = "a93c6f00-e57d-5684-b7b6-d8193f3e46c0" Distributions = "31c24e10-a181-5473-b8eb-7969acd0382f" Documenter = "e30172f5-a6a5-5a46-863b-614d45cd2de4" @@ -19,6 +20,7 @@ StatsModels = "3eaba693-59b7-5ba5-a881-562e759f1c8d" Turing = "fce5fe82-541a-59a6-adf8-730c64b5f9a0" [compat] +AdvancedVI = "0.2.11" DataFrames = "1" Distributions = "0.25" Documenter = "0.27, 1" diff --git a/src/CRRao.jl b/src/CRRao.jl index 59c062d..19b6c49 100644 --- a/src/CRRao.jl +++ b/src/CRRao.jl @@ -2,7 +2,7 @@ module CRRao using DataFrames, GLM, Turing, StatsModels, StatsBase using StatsBase, Distributions, LinearAlgebra -using Optim, NLSolversBase, Random, HypothesisTests +using Optim, NLSolversBase, Random, HypothesisTests, AdvancedVI import StatsBase: coef, coeftable, r2, adjr2, loglikelihood, aic, bic, predict, residuals, cooksdistance, fit import HypothesisTests: pvalue @@ -396,7 +396,7 @@ export LinearRegression, LogisticRegression, PoissonRegression, NegBinomRegressi export Prior_Ridge, Prior_Laplace, Prior_Cauchy, Prior_TDist, Prior_HorseShoe, Prior_Gauss export CRRaoLink, Logit, Probit, Cloglog, Cauchit, fit export coef, coeftable, r2, adjr2, loglikelihood, aic, bic, sigma, predict, residuals, cooksdistance, BPTest, pvalue -export FrequentistRegression, BayesianRegression +export FrequentistRegression, BayesianRegressionMCMC, BayesianRegressionVI include("random_number_generator.jl") include("general_stats.jl") diff --git a/src/bayesian/getter.jl b/src/bayesian/getter.jl index 5e748d3..b02bddf 100644 --- a/src/bayesian/getter.jl +++ b/src/bayesian/getter.jl @@ -1,4 +1,4 @@ -function predict(container::BayesianRegression{:LinearRegression}, newdata::DataFrame, prediction_chain_start::Int64 = 200) +function predict(container::BayesianRegressionMCMC{:LinearRegression}, newdata::DataFrame, prediction_chain_start::Int64 = 200) X = modelmatrix(container.formula, newdata) params = get_params(container.chain[prediction_chain_start:end,:,:]) @@ -11,7 +11,16 @@ function predict(container::BayesianRegression{:LinearRegression}, newdata::Data return vec(mean(predictions, dims=2)) end -function predict(container::BayesianRegression{:LogisticRegression}, newdata::DataFrame, prediction_chain_start::Int64 = 200) +function predict(container::BayesianRegressionVI{:LinearRegression}, newdata::DataFrame, number_of_samples::Int64 = 1000) + X = modelmatrix(container.formula, newdata) + + W = rand(CRRao_rng, container.dist, number_of_samples) + W = W[union(container.symbol_to_range[:β]...), :] + predictions = X * W + return vec(mean(predictions, dims=2)) +end + +function predict(container::BayesianRegressionMCMC{:LogisticRegression}, newdata::DataFrame, prediction_chain_start::Int64 = 200) X = modelmatrix(container.formula, newdata) params = get_params(container.chain[prediction_chain_start:end,:,:]) @@ -24,7 +33,7 @@ function predict(container::BayesianRegression{:LogisticRegression}, newdata::Da return vec(mean(container.link.link_function.(z), dims=2)) end -function predict(container::BayesianRegression{:NegativeBinomialRegression}, newdata::DataFrame, prediction_chain_start::Int64 = 200) +function predict(container::BayesianRegressionMCMC{:NegativeBinomialRegression}, newdata::DataFrame, prediction_chain_start::Int64 = 200) X = modelmatrix(container.formula, newdata) params = get_params(container.chain[prediction_chain_start:end,:,:]) @@ -37,7 +46,7 @@ function predict(container::BayesianRegression{:NegativeBinomialRegression}, new return vec(mean(exp.(z), dims=2)) end -function predict(container::BayesianRegression{:PoissonRegression}, newdata::DataFrame, prediction_chain_start::Int64 = 200) +function predict(container::BayesianRegressionMCMC{:PoissonRegression}, newdata::DataFrame, prediction_chain_start::Int64 = 200) X = modelmatrix(container.formula, newdata) params = get_params(container.chain[prediction_chain_start:end,:,:]) diff --git a/src/bayesian/linear_regression.jl b/src/bayesian/linear_regression.jl index 010b148..d50dc3e 100644 --- a/src/bayesian/linear_regression.jl +++ b/src/bayesian/linear_regression.jl @@ -1,4 +1,4 @@ -function linear_reg(formula::FormulaTerm, data::DataFrame, turingModel::Function, sim_size::Int64) +function linear_reg_mcmc(formula::FormulaTerm, data::DataFrame, turingModel::Function, sim_size::Int64) formula = apply_schema(formula, schema(formula, data),RegressionModel) y, X = modelcols(formula, data) @@ -6,12 +6,33 @@ function linear_reg(formula::FormulaTerm, data::DataFrame, turingModel::Function @warn "Simulation size should generally be atleast 500." end chain = sample(CRRao_rng, turingModel(X, y), NUTS(), sim_size) - return BayesianRegression(:LinearRegression, chain, formula) + return BayesianRegressionMCMC(:LinearRegression, chain, formula) +end + +function linear_reg_vi(formula::FormulaTerm, data::DataFrame, turingModel::Function, max_iter::Int64) + formula = apply_schema(formula, schema(formula, data),RegressionModel) + y, X = modelcols(formula, data) + + if max_iter < 500 + @warn "Max iterations should generally be atleast 500." + end + model = turingModel(X, y) + dist = vi(model, ADVI(100, max_iter)) + _, symbol_to_range = bijector(model, Val(true)) + return BayesianRegressionVI(:LinearRegression, dist, formula, symbol_to_range) end """ ```julia -fit(formula::FormulaTerm, data::DataFrame, modelClass::LinearRegression, prior::Prior_Ridge, h::Float64 = 0.01, sim_size::Int64 = 1000) +fit( + formula::FormulaTerm, + data::DataFrame, + modelClass::LinearRegression, + prior::Prior_Ridge, + use_vi::Bool = false, + h::Float64 = 0.01, + sim_size::Int64 = use_vi ? 20000 : 1000, +) ``` Fit a Bayesian Linear Regression model on the input data with a Ridge prior. @@ -77,8 +98,9 @@ function fit( data::DataFrame, modelClass::LinearRegression, prior::Prior_Ridge, + use_vi::Bool = false, h::Float64 = 0.01, - sim_size::Int64 = 1000 + sim_size::Int64 = use_vi ? 20000 : 1000 ) @model LinearRegression(X, y) = begin p = size(X, 2) @@ -97,12 +119,24 @@ function fit( y ~ MvNormal(X * β, σ) end - return linear_reg(formula, data, LinearRegression, sim_size) + if use_vi + return linear_reg_vi(formula, data, LinearRegression, sim_size) + else + return linear_reg_mcmc(formula, data, LinearRegression, sim_size) + end end """ ```julia -fit(formula::FormulaTerm, data::DataFrame, modelClass::LinearRegression, prior::Prior_Laplace, h::Float64 = 0.01, sim_size::Int64 = 1000) +fit( + formula::FormulaTerm, + data::DataFrame, + modelClass::LinearRegression, + prior::Prior_Laplace, + use_vi::Bool = false, + h::Float64 = 0.01, + sim_size::Int64 = use_vi ? 20000 : 1000, +) ``` Fit a Bayesian Linear Regression model on the input data with a Laplace prior. @@ -166,8 +200,9 @@ function fit( data::DataFrame, modelClass::LinearRegression, prior::Prior_Laplace, + use_vi::Bool = false, h::Float64 = 0.01, - sim_size::Int64 = 1000 + sim_size::Int64 = use_vi ? 20000 : 1000 ) @model LinearRegression(X, y) = begin p = size(X, 2) @@ -185,12 +220,23 @@ function fit( y ~ MvNormal(X * β, σ) end - return linear_reg(formula, data, LinearRegression, sim_size) + if use_vi + return linear_reg_vi(formula, data, LinearRegression, sim_size) + else + return linear_reg_mcmc(formula, data, LinearRegression, sim_size) + end end """ ```julia -fit(formula::FormulaTerm, data::DataFrame, modelClass::LinearRegression, prior::Prior_Cauchy, sim_size::Int64 = 1000) +fit( + formula::FormulaTerm, + data::DataFrame, + modelClass::LinearRegression, + prior::Prior_Cauchy, + use_vi::Bool = false, + sim_size::Int64 = use_vi ? 20000 : 1000, +) ``` Fit a Bayesian Linear Regression model on the input data with a Cauchy prior. @@ -253,7 +299,8 @@ function fit( data::DataFrame, modelClass::LinearRegression, prior::Prior_Cauchy, - sim_size::Int64 = 1000 + use_vi::Bool = false, + sim_size::Int64 = use_vi ? 20000 : 1000 ) @model LinearRegression(X, y) = begin p = size(X, 2) @@ -268,12 +315,24 @@ function fit( y ~ MvNormal(X * β, σ) end - return linear_reg(formula, data, LinearRegression, sim_size) + if use_vi + return linear_reg_vi(formula, data, LinearRegression, sim_size) + else + return linear_reg_mcmc(formula, data, LinearRegression, sim_size) + end end """ ```julia -fit(formula::FormulaTerm, data::DataFrame, modelClass::LinearRegression, prior::Prior_TDist, h::Float64 = 2.0, sim_size::Int64 = 1000) +fit( + formula::FormulaTerm, + data::DataFrame, + modelClass::LinearRegression, + prior::Prior_TDist, + h::Float64 = 2.0, + use_vi::Bool = false, + sim_size::Int64 = use_vi ? 20000 : 1000, +) ``` Fit a Bayesian Linear Regression model on the input data with a t(ν) distributed prior. @@ -340,8 +399,9 @@ function fit( data::DataFrame, modelClass::LinearRegression, prior::Prior_TDist, + use_vi::Bool = false, h::Float64 = 2.0, - sim_size::Int64 = 1000 + sim_size::Int64 = use_vi ? 20000 : 1000 ) @model LinearRegression(X, y) = begin p = size(X, 2) @@ -359,13 +419,24 @@ function fit( y ~ MvNormal(X * β, σ) end - return linear_reg(formula, data, LinearRegression, sim_size) + if use_vi + return linear_reg_vi(formula, data, LinearRegression, sim_size) + else + return linear_reg_mcmc(formula, data, LinearRegression, sim_size) + end end """ ```julia -fit(formula::FormulaTerm,data::DataFrame,modelClass::LinearRegression,prior::Prior_HorseShoe,sim_size::Int64 = 1000) +fit( + formula::FormulaTerm, + data::DataFrame, + modelClass::LinearRegression, + prior::Prior_HorseShoe, + use_vi::Bool = false, + sim_size::Int64 = use_vi ? 20000 : 1000, +) ``` Fit a Bayesian Linear Regression model on the input data with a HorseShoe prior. @@ -425,7 +496,8 @@ function fit( data::DataFrame, modelClass::LinearRegression, prior::Prior_HorseShoe, - sim_size::Int64 = 1000 + use_vi::Bool = false, + sim_size::Int64 = use_vi ? 20000 : 1000 ) @model LinearRegression(X, y) = begin p = size(X, 2) @@ -446,12 +518,25 @@ function fit( y ~ MvNormal( X * β, σ) end - return linear_reg(formula, data, LinearRegression, sim_size) + if use_vi + return linear_reg_vi(formula, data, LinearRegression, sim_size) + else + return linear_reg_mcmc(formula, data, LinearRegression, sim_size) + end end """ ```julia -fit(formula::FormulaTerm, data::DataFrame, modelClass::LinearRegression, prior::Prior_Gauss,alpha_prior_mean::Float64 = 0.0, beta_prior_mean::Float64, sim_size::Int64 = 1000, h::Float64 = 0.1) +fit( + formula::FormulaTerm, + ata::DataFrame, + modelClass::LinearRegression, + prior::Prior_Gauss, + alpha_prior_mean::Float64, + beta_prior_mean::Vector{Float64}, + use_vi::Bool = false, + sim_size::Int64 = use_vi ? 20000 : 1000, +) ``` Fit a Bayesian Linear Regression model on the input data with a Gaussian prior with user specific prior mean for α and β. User doesnot have @@ -497,13 +582,14 @@ Quantiles ``` """ function fit( - formula::FormulaTerm - , data::DataFrame - , modelClass::LinearRegression - , prior::Prior_Gauss - , alpha_prior_mean::Float64 - , beta_prior_mean::Vector{Float64} - , sim_size::Int64 = 1000 + formula::FormulaTerm, + data::DataFrame, + modelClass::LinearRegression, + prior::Prior_Gauss, + alpha_prior_mean::Float64, + beta_prior_mean::Vector{Float64}, + use_vi::Bool = false, + sim_size::Int64 = use_vi ? 20000 : 1000 ) @model LinearRegression(X, y) = begin p = size(X, 2) @@ -529,13 +615,28 @@ function fit( y ~ MvNormal(X * β, σ) end - return linear_reg(formula, data, LinearRegression, sim_size) + if use_vi + return linear_reg_vi(formula, data, LinearRegression, sim_size) + else + return linear_reg_mcmc(formula, data, LinearRegression, sim_size) + end end """ ```julia -fit(formula::FormulaTerm, data::DataFrame, modelClass::LinearRegression, prior::Prior_Gauss, alpha_prior_mean::Float64, alpha_prior_sd::Float64, beta_prior_mean::Vector{Float64}, beta_prior_sd::Vector{Float64}, sim_size::Int64 = 1000) +fit( + formula::FormulaTerm, + data::DataFrame, + modelClass::LinearRegression, + prior::Prior_Gauss, + alpha_prior_mean::Float64, + alpha_prior_sd::Float64, + beta_prior_mean::Vector{Float64}, + beta_prior_sd::Vector{Float64}, + use_vi::Bool = false, + sim_size::Int64 = use_vi ? 20000 : 1000, +) ``` Fit a Bayesian Linear Regression model on the input data with a Gaussian prior with user specific prior mean and sd for α and β. @@ -580,15 +681,16 @@ Quantiles ``` """ function fit( - formula::FormulaTerm - , data::DataFrame - , modelClass::LinearRegression - , prior::Prior_Gauss - , alpha_prior_mean::Float64 - , alpha_prior_sd::Float64 - , beta_prior_mean::Vector{Float64} - , beta_prior_sd::Vector{Float64} - , sim_size::Int64 = 1000 + formula::FormulaTerm, + data::DataFrame, + modelClass::LinearRegression, + prior::Prior_Gauss, + alpha_prior_mean::Float64, + alpha_prior_sd::Float64, + beta_prior_mean::Vector{Float64}, + beta_prior_sd::Vector{Float64}, + use_vi::Bool = false, + sim_size::Int64 = use_vi ? 20000 : 1000 ) @model LinearRegression(X, y) = begin p = size(X, 2) @@ -616,5 +718,9 @@ function fit( y ~ MvNormal(X * β, σ) end - return linear_reg(formula, data, LinearRegression, sim_size) + if use_vi + return linear_reg_vi(formula, data, LinearRegression, sim_size) + else + return linear_reg_mcmc(formula, data, LinearRegression, sim_size) + end end \ No newline at end of file diff --git a/src/bayesian/logistic_regression.jl b/src/bayesian/logistic_regression.jl index 4c7d83a..d2f122f 100644 --- a/src/bayesian/logistic_regression.jl +++ b/src/bayesian/logistic_regression.jl @@ -6,7 +6,7 @@ function logistic_reg(formula::FormulaTerm, data::DataFrame, Link::CRRaoLink, tu @warn "Simulation size should generally be atleast 500." end chain = sample(CRRao_rng, turingModel(X, y), NUTS(), sim_size) - return BayesianRegression(:LogisticRegression, chain, formula, Link) + return BayesianRegressionMCMC(:LogisticRegression, chain, formula, Link) end """ diff --git a/src/bayesian/negativebinomial_regression.jl b/src/bayesian/negativebinomial_regression.jl index e2f8a47..7e220c7 100644 --- a/src/bayesian/negativebinomial_regression.jl +++ b/src/bayesian/negativebinomial_regression.jl @@ -11,7 +11,7 @@ function negativebinomial_reg( @warn "Simulation size should generally be atleast 500." end chain = sample(CRRao_rng, turingModel(X, y), NUTS(), sim_size) - return BayesianRegression(:NegativeBinomialRegression, chain, formula) + return BayesianRegressionMCMC(:NegativeBinomialRegression, chain, formula) end """ diff --git a/src/bayesian/poisson_regression.jl b/src/bayesian/poisson_regression.jl index 0653ea2..1bec20e 100644 --- a/src/bayesian/poisson_regression.jl +++ b/src/bayesian/poisson_regression.jl @@ -6,7 +6,7 @@ function poisson_reg(formula::FormulaTerm, data::DataFrame, turingModel::Functio @warn "Simulation size should generally be atleast 500." end chain = sample(CRRao_rng, turingModel(X, y), NUTS(), sim_size) - return BayesianRegression(:PoissonRegression, chain, formula) + return BayesianRegressionMCMC(:PoissonRegression, chain, formula) end """ diff --git a/src/fitmodel.jl b/src/fitmodel.jl index 046fa98..df45575 100644 --- a/src/fitmodel.jl +++ b/src/fitmodel.jl @@ -22,12 +22,12 @@ FrequentistRegression(RegressionType::Symbol, model, formula, link = GLM.Identit """ ```julia -BayesianRegression{RegressionType} +BayesianRegressionMCMC{RegressionType} ``` -Type to represent bayesian regression models returned by `fit` functions. This type is used internally by the package to represent all bayesian regression models. `RegressionType` is a `Symbol` representing the model class. +Type to represent bayesian regression models (using MCMC) returned by `fit` functions. This type is used internally by the package to represent all bayesian regression models using MCMC. `RegressionType` is a `Symbol` representing the model class. """ -struct BayesianRegression{RegressionType} <: RegressionModel +struct BayesianRegressionMCMC{RegressionType} <: RegressionModel chain formula::FormulaTerm link @@ -35,12 +35,35 @@ end """ ```julia -BayesianRegression(::Symbol, chain) +BayesianRegressionMCMC(::Symbol, chain) ``` -Constructor for `BayesianRegression`. `model` can be any regression model. Used by `fit` functions to return a bayesian regression model container. +Constructor for `BayesianRegressionMCMC`. `model` can be any regression model. Used by `fit` functions to return a bayesian regression model container. """ -BayesianRegression(RegressionType::Symbol, chain, formula, link = Identity()) = BayesianRegression{RegressionType}(chain, formula, link) +BayesianRegressionMCMC(RegressionType::Symbol, chain, formula, link = Identity()) = BayesianRegressionMCMC{RegressionType}(chain, formula, link) + +""" +```julia +BayesianRegressionVI{RegressionType} +``` + +Type to represent bayesian regression models (using VI) returned by `fit` functions. This type is used internally by the package to represent all bayesian regression models using VI. `RegressionType` is a `Symbol` representing the model class. +""" +struct BayesianRegressionVI{RegressionType} <: RegressionModel + dist + formula::FormulaTerm + symbol_to_range + link +end + +""" +```julia +BayesianRegressionVI(::Symbol, dist) +``` + +Constructor for `BayesianRegressionVI`. `model` can be any regression model. Used by `fit` functions to return a bayesian regression model container. +""" +BayesianRegressionVI(RegressionType::Symbol, dist, formula, symbol_to_range, link = Identity()) = BayesianRegressionVI{RegressionType}(dist, formula, symbol_to_range, link) # Print Messages include("print.jl") diff --git a/src/print.jl b/src/print.jl index 5faa261..2b57ed7 100644 --- a/src/print.jl +++ b/src/print.jl @@ -27,9 +27,16 @@ function Base.show(io::IO, container::FrequentistRegression{:PoissonRegression}) end # Bayesian Models -function Base.show(io::IO, container::BayesianRegression) +function Base.show(io::IO, container::BayesianRegressionMCMC) println(io, "Formula: ", container.formula) println(io, "Link: ", container.link) print(io, "Chain: ") show(io, MIME("text/plain"), container.chain) end + +function Base.show(io::IO, container::BayesianRegressionVI) + println(io, "Formula: ", container.formula) + println(io, "Link: ", container.link) + print(io, "Distribution: ") + show(io, MIME("text/plain"), container.dist) +end \ No newline at end of file diff --git a/test/numerical/bayesian/LinearRegression.jl b/test/numerical/bayesian/LinearRegression.jl index 3930434..f1285dc 100644 --- a/test/numerical/bayesian/LinearRegression.jl +++ b/test/numerical/bayesian/LinearRegression.jl @@ -9,15 +9,27 @@ tests = [ ] for (prior, test_mean) in tests + # MCMC CRRao.set_rng(StableRNG(123)) model = fit(@formula(MPG ~ HP + WT + Gear), mtcars, LinearRegression(), prior) prediction = predict(model, mtcars) @test mean(prediction) - 2 * std(prediction) <= test_mean && test_mean <= mean(prediction) + 2 * std(prediction) + + # VI + CRRao.set_rng(StableRNG(123)) + model = fit(@formula(MPG ~ HP + WT + Gear), mtcars, LinearRegression(), prior, true) + prediction = predict(model, mtcars) + @test mean(prediction) - 2 * std(prediction) <= test_mean && test_mean <= mean(prediction) + 2 * std(prediction) end gauss_test = 20.0796026428345 CRRao.set_rng(StableRNG(123)) -model = fit(@formula(MPG ~ HP + WT + Gear), mtcars, LinearRegression(), Prior_Gauss(), 30.0, [0.0,-3.0,1.0], 1000) +model = fit(@formula(MPG ~ HP + WT + Gear), mtcars, LinearRegression(), Prior_Gauss(), 30.0, [0.0,-3.0,1.0]) prediction = predict(model, mtcars) @test mean(prediction) - 2 * std(prediction) <= gauss_test && gauss_test <= mean(prediction) + 2 * std(prediction) + +CRRao.set_rng(StableRNG(123)) +model = fit(@formula(MPG ~ HP + WT + Gear), mtcars, LinearRegression(), Prior_Gauss(), 30.0, [0.0,-3.0,1.0], true) +prediction = predict(model, mtcars) +@test mean(prediction) - 2 * std(prediction) <= gauss_test && gauss_test <= mean(prediction) + 2 * std(prediction) \ No newline at end of file From 792f7793cac6745921dcd22542f0e403b01b521b Mon Sep 17 00:00:00 2001 From: ShouvikGhosh2048 Date: Mon, 9 Dec 2024 09:51:49 +0000 Subject: [PATCH 2/4] Store the samples instead of the chain/distribution --- src/CRRao.jl | 2 +- src/bayesian/getter.jl | 55 +++++---------------- src/bayesian/linear_regression.jl | 14 +++++- src/bayesian/logistic_regression.jl | 9 +++- src/bayesian/negativebinomial_regression.jl | 9 +++- src/bayesian/poisson_regression.jl | 9 +++- src/fitmodel.jl | 35 +++---------- src/print.jl | 13 ++--- 8 files changed, 57 insertions(+), 89 deletions(-) diff --git a/src/CRRao.jl b/src/CRRao.jl index 19b6c49..27d046b 100644 --- a/src/CRRao.jl +++ b/src/CRRao.jl @@ -396,7 +396,7 @@ export LinearRegression, LogisticRegression, PoissonRegression, NegBinomRegressi export Prior_Ridge, Prior_Laplace, Prior_Cauchy, Prior_TDist, Prior_HorseShoe, Prior_Gauss export CRRaoLink, Logit, Probit, Cloglog, Cauchit, fit export coef, coeftable, r2, adjr2, loglikelihood, aic, bic, sigma, predict, residuals, cooksdistance, BPTest, pvalue -export FrequentistRegression, BayesianRegressionMCMC, BayesianRegressionVI +export FrequentistRegression, BayesianRegression include("random_number_generator.jl") include("general_stats.jl") diff --git a/src/bayesian/getter.jl b/src/bayesian/getter.jl index b02bddf..15f37f4 100644 --- a/src/bayesian/getter.jl +++ b/src/bayesian/getter.jl @@ -1,60 +1,27 @@ -function predict(container::BayesianRegressionMCMC{:LinearRegression}, newdata::DataFrame, prediction_chain_start::Int64 = 200) +function predict(container::BayesianRegression{:LinearRegression}, newdata::DataFrame, prediction_chain_start::Int64 = 200) X = modelmatrix(container.formula, newdata) - - params = get_params(container.chain[prediction_chain_start:end,:,:]) - W = params.β - if isa(W, Tuple) - W = reduce(hcat, W) - end - #predictions = params.α' .+ X * W' - predictions = X * W' - return vec(mean(predictions, dims=2)) -end - -function predict(container::BayesianRegressionVI{:LinearRegression}, newdata::DataFrame, number_of_samples::Int64 = 1000) - X = modelmatrix(container.formula, newdata) - - W = rand(CRRao_rng, container.dist, number_of_samples) - W = W[union(container.symbol_to_range[:β]...), :] + W = container.samples[:, prediction_chain_start:end] predictions = X * W return vec(mean(predictions, dims=2)) end -function predict(container::BayesianRegressionMCMC{:LogisticRegression}, newdata::DataFrame, prediction_chain_start::Int64 = 200) +function predict(container::BayesianRegression{:LogisticRegression}, newdata::DataFrame, prediction_chain_start::Int64 = 200) X = modelmatrix(container.formula, newdata) - - params = get_params(container.chain[prediction_chain_start:end,:,:]) - W = params.β - if isa(W, Tuple) - W = reduce(hcat, W) - end - #z = params.α' .+ X * W' - z = X * W' + W = container.samples[:, prediction_chain_start:end] + z = X * W return vec(mean(container.link.link_function.(z), dims=2)) end -function predict(container::BayesianRegressionMCMC{:NegativeBinomialRegression}, newdata::DataFrame, prediction_chain_start::Int64 = 200) +function predict(container::BayesianRegression{:NegativeBinomialRegression}, newdata::DataFrame, prediction_chain_start::Int64 = 200) X = modelmatrix(container.formula, newdata) - - params = get_params(container.chain[prediction_chain_start:end,:,:]) - W = params.β - if isa(W, Tuple) - W = reduce(hcat, W) - end - #z = params.α' .+ X * W' - z = X * W' + W = container.samples[:, prediction_chain_start:end] + z = X * W return vec(mean(exp.(z), dims=2)) end -function predict(container::BayesianRegressionMCMC{:PoissonRegression}, newdata::DataFrame, prediction_chain_start::Int64 = 200) +function predict(container::BayesianRegression{:PoissonRegression}, newdata::DataFrame, prediction_chain_start::Int64 = 200) X = modelmatrix(container.formula, newdata) - - params = get_params(container.chain[prediction_chain_start:end,:,:]) - W = params.β - if isa(W, Tuple) - W = reduce(hcat, W) - end - #z = params.α' .+ X * W' - z = X * W' + W = container.samples[:, prediction_chain_start:end] + z = X * W return vec(mean(exp.(z), dims=2)) end \ No newline at end of file diff --git a/src/bayesian/linear_regression.jl b/src/bayesian/linear_regression.jl index d50dc3e..ffe7e85 100644 --- a/src/bayesian/linear_regression.jl +++ b/src/bayesian/linear_regression.jl @@ -6,7 +6,14 @@ function linear_reg_mcmc(formula::FormulaTerm, data::DataFrame, turingModel::Fun @warn "Simulation size should generally be atleast 500." end chain = sample(CRRao_rng, turingModel(X, y), NUTS(), sim_size) - return BayesianRegressionMCMC(:LinearRegression, chain, formula) + params = get_params(chain[:,:,:]) + samples = params.β + if isa(samples, Tuple) + samples = reduce(hcat, samples) + end + samples = samples' + + return BayesianRegression(:LinearRegression, samples, formula) end function linear_reg_vi(formula::FormulaTerm, data::DataFrame, turingModel::Function, max_iter::Int64) @@ -16,10 +23,13 @@ function linear_reg_vi(formula::FormulaTerm, data::DataFrame, turingModel::Funct if max_iter < 500 @warn "Max iterations should generally be atleast 500." end + model = turingModel(X, y) dist = vi(model, ADVI(100, max_iter)) + samples = rand(CRRao_rng, dist, max_iter) _, symbol_to_range = bijector(model, Val(true)) - return BayesianRegressionVI(:LinearRegression, dist, formula, symbol_to_range) + samples = samples[union(symbol_to_range[:β]...), :] + return BayesianRegression(:LinearRegression, samples, formula) end """ diff --git a/src/bayesian/logistic_regression.jl b/src/bayesian/logistic_regression.jl index d2f122f..682717c 100644 --- a/src/bayesian/logistic_regression.jl +++ b/src/bayesian/logistic_regression.jl @@ -6,7 +6,14 @@ function logistic_reg(formula::FormulaTerm, data::DataFrame, Link::CRRaoLink, tu @warn "Simulation size should generally be atleast 500." end chain = sample(CRRao_rng, turingModel(X, y), NUTS(), sim_size) - return BayesianRegressionMCMC(:LogisticRegression, chain, formula, Link) + params = get_params(chain[:,:,:]) + samples = params.β + if isa(samples, Tuple) + samples = reduce(hcat, samples) + end + samples = samples' + + return BayesianRegression(:LogisticRegression, samples, formula, Link) end """ diff --git a/src/bayesian/negativebinomial_regression.jl b/src/bayesian/negativebinomial_regression.jl index 7e220c7..7070cee 100644 --- a/src/bayesian/negativebinomial_regression.jl +++ b/src/bayesian/negativebinomial_regression.jl @@ -11,7 +11,14 @@ function negativebinomial_reg( @warn "Simulation size should generally be atleast 500." end chain = sample(CRRao_rng, turingModel(X, y), NUTS(), sim_size) - return BayesianRegressionMCMC(:NegativeBinomialRegression, chain, formula) + params = get_params(chain[:,:,:]) + samples = params.β + if isa(samples, Tuple) + samples = reduce(hcat, samples) + end + samples = samples' + + return BayesianRegression(:NegativeBinomialRegression, samples, formula) end """ diff --git a/src/bayesian/poisson_regression.jl b/src/bayesian/poisson_regression.jl index 1bec20e..3f175cd 100644 --- a/src/bayesian/poisson_regression.jl +++ b/src/bayesian/poisson_regression.jl @@ -6,7 +6,14 @@ function poisson_reg(formula::FormulaTerm, data::DataFrame, turingModel::Functio @warn "Simulation size should generally be atleast 500." end chain = sample(CRRao_rng, turingModel(X, y), NUTS(), sim_size) - return BayesianRegressionMCMC(:PoissonRegression, chain, formula) + params = get_params(chain[:,:,:]) + samples = params.β + if isa(samples, Tuple) + samples = reduce(hcat, samples) + end + samples = samples' + + return BayesianRegression(:PoissonRegression, samples, formula) end """ diff --git a/src/fitmodel.jl b/src/fitmodel.jl index df45575..ec7b7a5 100644 --- a/src/fitmodel.jl +++ b/src/fitmodel.jl @@ -22,48 +22,25 @@ FrequentistRegression(RegressionType::Symbol, model, formula, link = GLM.Identit """ ```julia -BayesianRegressionMCMC{RegressionType} +BayesianRegression{RegressionType} ``` Type to represent bayesian regression models (using MCMC) returned by `fit` functions. This type is used internally by the package to represent all bayesian regression models using MCMC. `RegressionType` is a `Symbol` representing the model class. """ -struct BayesianRegressionMCMC{RegressionType} <: RegressionModel - chain +struct BayesianRegression{RegressionType} <: RegressionModel + samples formula::FormulaTerm link end """ ```julia -BayesianRegressionMCMC(::Symbol, chain) +BayesianRegression(::Symbol, samples) ``` -Constructor for `BayesianRegressionMCMC`. `model` can be any regression model. Used by `fit` functions to return a bayesian regression model container. +Constructor for `BayesianRegression`. `model` can be any regression model. Used by `fit` functions to return a bayesian regression model container. """ -BayesianRegressionMCMC(RegressionType::Symbol, chain, formula, link = Identity()) = BayesianRegressionMCMC{RegressionType}(chain, formula, link) - -""" -```julia -BayesianRegressionVI{RegressionType} -``` - -Type to represent bayesian regression models (using VI) returned by `fit` functions. This type is used internally by the package to represent all bayesian regression models using VI. `RegressionType` is a `Symbol` representing the model class. -""" -struct BayesianRegressionVI{RegressionType} <: RegressionModel - dist - formula::FormulaTerm - symbol_to_range - link -end - -""" -```julia -BayesianRegressionVI(::Symbol, dist) -``` - -Constructor for `BayesianRegressionVI`. `model` can be any regression model. Used by `fit` functions to return a bayesian regression model container. -""" -BayesianRegressionVI(RegressionType::Symbol, dist, formula, symbol_to_range, link = Identity()) = BayesianRegressionVI{RegressionType}(dist, formula, symbol_to_range, link) +BayesianRegression(RegressionType::Symbol, samples, formula, link = Identity()) = BayesianRegression{RegressionType}(samples, formula, link) # Print Messages include("print.jl") diff --git a/src/print.jl b/src/print.jl index 2b57ed7..f216e5b 100644 --- a/src/print.jl +++ b/src/print.jl @@ -27,16 +27,9 @@ function Base.show(io::IO, container::FrequentistRegression{:PoissonRegression}) end # Bayesian Models -function Base.show(io::IO, container::BayesianRegressionMCMC) +function Base.show(io::IO, container::BayesianRegression) println(io, "Formula: ", container.formula) println(io, "Link: ", container.link) - print(io, "Chain: ") - show(io, MIME("text/plain"), container.chain) -end - -function Base.show(io::IO, container::BayesianRegressionVI) - println(io, "Formula: ", container.formula) - println(io, "Link: ", container.link) - print(io, "Distribution: ") - show(io, MIME("text/plain"), container.dist) + print(io, "Samples: ") + show(io, MIME("text/plain"), container.samples) end \ No newline at end of file From 84631dbdb43a6238617848d83be34ebc30228936 Mon Sep 17 00:00:00 2001 From: ShouvikGhosh2048 Date: Mon, 9 Dec 2024 18:43:34 +0530 Subject: [PATCH 3/4] Use common bayesian struct --- src/CRRao.jl | 40 +++++++- src/bayesian/getter.jl | 4 +- src/bayesian/linear_regression.jl | 104 ++++++-------------- test/numerical/bayesian/LinearRegression.jl | 24 +++-- 4 files changed, 86 insertions(+), 86 deletions(-) diff --git a/src/CRRao.jl b/src/CRRao.jl index 27d046b..e795faf 100644 --- a/src/CRRao.jl +++ b/src/CRRao.jl @@ -392,9 +392,47 @@ end Cauchit() = Cauchit(Cauchit_Link) +""" +```julia +BayesianAlgorithm +``` + +Abstract type representing bayesian algorithms which are used to dispatch to appropriate calls. +""" +abstract type BayesianAlgorithm end + +""" +```julia +MCMC <: BayesianAlgorithm +``` + +A type representing MCMC algorithms. +""" +struct MCMC <: BayesianAlgorithm + sim_size::Int64 + prediction_chain_start::Int64 +end + +MCMC() = MCMC(1000, 200) + +""" +```julia +VI <: BayesianAlgorithm +``` + +A type representing variational inference algorithms. +""" +struct VI <: BayesianAlgorithm + distribution_sample_count::Int64 + vi_max_iters::Int64 + vi_samples_per_step::Int64 +end + +VI() = VI(1000, 10000, 100) + export LinearRegression, LogisticRegression, PoissonRegression, NegBinomRegression, Boot_Residual export Prior_Ridge, Prior_Laplace, Prior_Cauchy, Prior_TDist, Prior_HorseShoe, Prior_Gauss -export CRRaoLink, Logit, Probit, Cloglog, Cauchit, fit +export CRRaoLink, Logit, Probit, Cloglog, Cauchit, fit, MCMC, VI export coef, coeftable, r2, adjr2, loglikelihood, aic, bic, sigma, predict, residuals, cooksdistance, BPTest, pvalue export FrequentistRegression, BayesianRegression diff --git a/src/bayesian/getter.jl b/src/bayesian/getter.jl index 15f37f4..86a5b50 100644 --- a/src/bayesian/getter.jl +++ b/src/bayesian/getter.jl @@ -1,6 +1,6 @@ -function predict(container::BayesianRegression{:LinearRegression}, newdata::DataFrame, prediction_chain_start::Int64 = 200) +function predict(container::BayesianRegression{:LinearRegression}, newdata::DataFrame) X = modelmatrix(container.formula, newdata) - W = container.samples[:, prediction_chain_start:end] + W = container.samples predictions = X * W return vec(mean(predictions, dims=2)) end diff --git a/src/bayesian/linear_regression.jl b/src/bayesian/linear_regression.jl index ffe7e85..959bf7a 100644 --- a/src/bayesian/linear_regression.jl +++ b/src/bayesian/linear_regression.jl @@ -1,12 +1,12 @@ -function linear_reg_mcmc(formula::FormulaTerm, data::DataFrame, turingModel::Function, sim_size::Int64) - formula = apply_schema(formula, schema(formula, data),RegressionModel) +function linear_reg(formula::FormulaTerm, data::DataFrame, turingModel::Function, algorithm::MCMC) + formula = apply_schema(formula, schema(formula, data), RegressionModel) y, X = modelcols(formula, data) - if sim_size < 500 + if algorithm.sim_size < 500 @warn "Simulation size should generally be atleast 500." end - chain = sample(CRRao_rng, turingModel(X, y), NUTS(), sim_size) - params = get_params(chain[:,:,:]) + chain = sample(CRRao_rng, turingModel(X, y), NUTS(), algorithm.sim_size) + params = get_params(chain[algorithm.prediction_chain_start:end,:,:]) samples = params.β if isa(samples, Tuple) samples = reduce(hcat, samples) @@ -16,17 +16,17 @@ function linear_reg_mcmc(formula::FormulaTerm, data::DataFrame, turingModel::Fun return BayesianRegression(:LinearRegression, samples, formula) end -function linear_reg_vi(formula::FormulaTerm, data::DataFrame, turingModel::Function, max_iter::Int64) - formula = apply_schema(formula, schema(formula, data),RegressionModel) +function linear_reg(formula::FormulaTerm, data::DataFrame, turingModel::Function, algorithm::VI) + formula = apply_schema(formula, schema(formula, data), RegressionModel) y, X = modelcols(formula, data) - if max_iter < 500 + if algorithm.vi_max_iters < 500 @warn "Max iterations should generally be atleast 500." end model = turingModel(X, y) - dist = vi(model, ADVI(100, max_iter)) - samples = rand(CRRao_rng, dist, max_iter) + dist = vi(model, ADVI(algorithm.vi_samples_per_step, algorithm.vi_max_iters)) + samples = rand(CRRao_rng, dist, algorithm.distribution_sample_count) _, symbol_to_range = bijector(model, Val(true)) samples = samples[union(symbol_to_range[:β]...), :] return BayesianRegression(:LinearRegression, samples, formula) @@ -39,9 +39,8 @@ fit( data::DataFrame, modelClass::LinearRegression, prior::Prior_Ridge, - use_vi::Bool = false, + algorithm::BayesianAlgorithm = MCMC(), h::Float64 = 0.01, - sim_size::Int64 = use_vi ? 20000 : 1000, ) ``` @@ -108,9 +107,8 @@ function fit( data::DataFrame, modelClass::LinearRegression, prior::Prior_Ridge, - use_vi::Bool = false, + algorithm::BayesianAlgorithm = MCMC(), h::Float64 = 0.01, - sim_size::Int64 = use_vi ? 20000 : 1000 ) @model LinearRegression(X, y) = begin p = size(X, 2) @@ -129,11 +127,7 @@ function fit( y ~ MvNormal(X * β, σ) end - if use_vi - return linear_reg_vi(formula, data, LinearRegression, sim_size) - else - return linear_reg_mcmc(formula, data, LinearRegression, sim_size) - end + return linear_reg(formula, data, LinearRegression, algorithm) end """ @@ -143,9 +137,8 @@ fit( data::DataFrame, modelClass::LinearRegression, prior::Prior_Laplace, - use_vi::Bool = false, + algorithm::BayesianAlgorithm = MCMC(), h::Float64 = 0.01, - sim_size::Int64 = use_vi ? 20000 : 1000, ) ``` @@ -210,9 +203,8 @@ function fit( data::DataFrame, modelClass::LinearRegression, prior::Prior_Laplace, - use_vi::Bool = false, + algorithm::BayesianAlgorithm = MCMC(), h::Float64 = 0.01, - sim_size::Int64 = use_vi ? 20000 : 1000 ) @model LinearRegression(X, y) = begin p = size(X, 2) @@ -230,11 +222,7 @@ function fit( y ~ MvNormal(X * β, σ) end - if use_vi - return linear_reg_vi(formula, data, LinearRegression, sim_size) - else - return linear_reg_mcmc(formula, data, LinearRegression, sim_size) - end + return linear_reg(formula, data, LinearRegression, algorithm) end """ @@ -244,8 +232,7 @@ fit( data::DataFrame, modelClass::LinearRegression, prior::Prior_Cauchy, - use_vi::Bool = false, - sim_size::Int64 = use_vi ? 20000 : 1000, + algorithm::BayesianAlgorithm = MCMC(), ) ``` @@ -309,8 +296,7 @@ function fit( data::DataFrame, modelClass::LinearRegression, prior::Prior_Cauchy, - use_vi::Bool = false, - sim_size::Int64 = use_vi ? 20000 : 1000 + algorithm::BayesianAlgorithm = MCMC(), ) @model LinearRegression(X, y) = begin p = size(X, 2) @@ -325,11 +311,7 @@ function fit( y ~ MvNormal(X * β, σ) end - if use_vi - return linear_reg_vi(formula, data, LinearRegression, sim_size) - else - return linear_reg_mcmc(formula, data, LinearRegression, sim_size) - end + return linear_reg(formula, data, LinearRegression, algorithm) end """ @@ -339,9 +321,8 @@ fit( data::DataFrame, modelClass::LinearRegression, prior::Prior_TDist, + algorithm::BayesianAlgorithm = MCMC(), h::Float64 = 2.0, - use_vi::Bool = false, - sim_size::Int64 = use_vi ? 20000 : 1000, ) ``` @@ -409,9 +390,8 @@ function fit( data::DataFrame, modelClass::LinearRegression, prior::Prior_TDist, - use_vi::Bool = false, + algorithm::BayesianAlgorithm = MCMC(), h::Float64 = 2.0, - sim_size::Int64 = use_vi ? 20000 : 1000 ) @model LinearRegression(X, y) = begin p = size(X, 2) @@ -429,11 +409,7 @@ function fit( y ~ MvNormal(X * β, σ) end - if use_vi - return linear_reg_vi(formula, data, LinearRegression, sim_size) - else - return linear_reg_mcmc(formula, data, LinearRegression, sim_size) - end + return linear_reg(formula, data, LinearRegression, algorithm) end @@ -444,8 +420,7 @@ fit( data::DataFrame, modelClass::LinearRegression, prior::Prior_HorseShoe, - use_vi::Bool = false, - sim_size::Int64 = use_vi ? 20000 : 1000, + algorithm::BayesianAlgorithm = MCMC(), ) ``` @@ -506,8 +481,7 @@ function fit( data::DataFrame, modelClass::LinearRegression, prior::Prior_HorseShoe, - use_vi::Bool = false, - sim_size::Int64 = use_vi ? 20000 : 1000 + algorithm::BayesianAlgorithm = MCMC(), ) @model LinearRegression(X, y) = begin p = size(X, 2) @@ -528,11 +502,7 @@ function fit( y ~ MvNormal( X * β, σ) end - if use_vi - return linear_reg_vi(formula, data, LinearRegression, sim_size) - else - return linear_reg_mcmc(formula, data, LinearRegression, sim_size) - end + return linear_reg(formula, data, LinearRegression, algorithm) end """ @@ -544,8 +514,7 @@ fit( prior::Prior_Gauss, alpha_prior_mean::Float64, beta_prior_mean::Vector{Float64}, - use_vi::Bool = false, - sim_size::Int64 = use_vi ? 20000 : 1000, + algorithm::BayesianAlgorithm = MCMC(), ) ``` @@ -598,8 +567,7 @@ function fit( prior::Prior_Gauss, alpha_prior_mean::Float64, beta_prior_mean::Vector{Float64}, - use_vi::Bool = false, - sim_size::Int64 = use_vi ? 20000 : 1000 + algorithm::BayesianAlgorithm = MCMC(), ) @model LinearRegression(X, y) = begin p = size(X, 2) @@ -625,11 +593,7 @@ function fit( y ~ MvNormal(X * β, σ) end - if use_vi - return linear_reg_vi(formula, data, LinearRegression, sim_size) - else - return linear_reg_mcmc(formula, data, LinearRegression, sim_size) - end + return linear_reg(formula, data, LinearRegression, algorithm) end @@ -644,8 +608,7 @@ fit( alpha_prior_sd::Float64, beta_prior_mean::Vector{Float64}, beta_prior_sd::Vector{Float64}, - use_vi::Bool = false, - sim_size::Int64 = use_vi ? 20000 : 1000, + algorithm::BayesianAlgorithm = MCMC(), ) ``` @@ -699,8 +662,7 @@ function fit( alpha_prior_sd::Float64, beta_prior_mean::Vector{Float64}, beta_prior_sd::Vector{Float64}, - use_vi::Bool = false, - sim_size::Int64 = use_vi ? 20000 : 1000 + algorithm::BayesianAlgorithm = MCMC(), ) @model LinearRegression(X, y) = begin p = size(X, 2) @@ -728,9 +690,5 @@ function fit( y ~ MvNormal(X * β, σ) end - if use_vi - return linear_reg_vi(formula, data, LinearRegression, sim_size) - else - return linear_reg_mcmc(formula, data, LinearRegression, sim_size) - end + return linear_reg(formula, data, LinearRegression, algorithm) end \ No newline at end of file diff --git a/test/numerical/bayesian/LinearRegression.jl b/test/numerical/bayesian/LinearRegression.jl index f1285dc..339e3f3 100644 --- a/test/numerical/bayesian/LinearRegression.jl +++ b/test/numerical/bayesian/LinearRegression.jl @@ -12,24 +12,28 @@ for (prior, test_mean) in tests # MCMC CRRao.set_rng(StableRNG(123)) model = fit(@formula(MPG ~ HP + WT + Gear), mtcars, LinearRegression(), prior) - prediction = predict(model, mtcars) - @test mean(prediction) - 2 * std(prediction) <= test_mean && test_mean <= mean(prediction) + 2 * std(prediction) + mcmc_prediction = predict(model, mtcars) + @test mean(mcmc_prediction) - 2 * std(mcmc_prediction) <= test_mean && test_mean <= mean(mcmc_prediction) + 2 * std(mcmc_prediction) # VI CRRao.set_rng(StableRNG(123)) - model = fit(@formula(MPG ~ HP + WT + Gear), mtcars, LinearRegression(), prior, true) - prediction = predict(model, mtcars) - @test mean(prediction) - 2 * std(prediction) <= test_mean && test_mean <= mean(prediction) + 2 * std(prediction) + model = fit(@formula(MPG ~ HP + WT + Gear), mtcars, LinearRegression(), prior, VI()) + vi_prediction = predict(model, mtcars) + @test mean(vi_prediction) - 2 * std(vi_prediction) <= test_mean && test_mean <= mean(vi_prediction) + 2 * std(vi_prediction) + + @test maximum(abs.(mcmc_prediction - vi_prediction)) <= 5.0 end gauss_test = 20.0796026428345 CRRao.set_rng(StableRNG(123)) model = fit(@formula(MPG ~ HP + WT + Gear), mtcars, LinearRegression(), Prior_Gauss(), 30.0, [0.0,-3.0,1.0]) -prediction = predict(model, mtcars) -@test mean(prediction) - 2 * std(prediction) <= gauss_test && gauss_test <= mean(prediction) + 2 * std(prediction) +mcmc_prediction = predict(model, mtcars) +@test mean(mcmc_prediction) - 2 * std(mcmc_prediction) <= gauss_test && gauss_test <= mean(mcmc_prediction) + 2 * std(mcmc_prediction) CRRao.set_rng(StableRNG(123)) -model = fit(@formula(MPG ~ HP + WT + Gear), mtcars, LinearRegression(), Prior_Gauss(), 30.0, [0.0,-3.0,1.0], true) -prediction = predict(model, mtcars) -@test mean(prediction) - 2 * std(prediction) <= gauss_test && gauss_test <= mean(prediction) + 2 * std(prediction) \ No newline at end of file +model = fit(@formula(MPG ~ HP + WT + Gear), mtcars, LinearRegression(), Prior_Gauss(), 30.0, [0.0,-3.0,1.0], VI()) +vi_prediction = predict(model, mtcars) +@test mean(vi_prediction) - 2 * std(vi_prediction) <= gauss_test && gauss_test <= mean(vi_prediction) + 2 * std(vi_prediction) + +@test maximum(abs.(mcmc_prediction - vi_prediction)) <= 5.0 \ No newline at end of file From f1947a3cfeda72bdbcb6449f827b2dcf86da78e5 Mon Sep 17 00:00:00 2001 From: ShouvikGhosh2048 Date: Mon, 9 Dec 2024 14:08:30 +0000 Subject: [PATCH 4/4] Update docs --- docs/src/api/bayesian_regression.md | 22 +++++++++++++++------- src/CRRao.jl | 2 +- 2 files changed, 16 insertions(+), 8 deletions(-) diff --git a/docs/src/api/bayesian_regression.md b/docs/src/api/bayesian_regression.md index 018096c..c9e9ad2 100644 --- a/docs/src/api/bayesian_regression.md +++ b/docs/src/api/bayesian_regression.md @@ -4,34 +4,42 @@ BayesianRegression ``` +## Bayesian Algorithms + +```@docs +BayesianAlgorithm +MCMC +VI +``` + ## Linear Regression ### Linear Regression with User Specific Gaussian Prior ```@docs -fit(formula::FormulaTerm, data::DataFrame, modelClass::LinearRegression, prior::Prior_Gauss, alpha_prior_mean::Float64, beta_prior_mean::Vector{Float64}, sim_size::Int64 = 1000) -fit(formula::FormulaTerm, data::DataFrame, modelClass::LinearRegression, prior::Prior_Gauss, alpha_prior_mean::Float64, alpha_prior_sd::Float64, beta_prior_mean::Vector{Float64}, beta_prior_sd::Vector{Float64}, sim_size::Int64 = 1000) +fit(formula::FormulaTerm, data::DataFrame, modelClass::LinearRegression, prior::Prior_Gauss, alpha_prior_mean::Float64, beta_prior_mean::Vector{Float64}, algorithm::BayesianAlgorithm = MCMC()) +fit(formula::FormulaTerm, data::DataFrame, modelClass::LinearRegression, prior::Prior_Gauss, alpha_prior_mean::Float64, alpha_prior_sd::Float64, beta_prior_mean::Vector{Float64}, beta_prior_sd::Vector{Float64}, algorithm::BayesianAlgorithm = MCMC()) ``` ### Linear Regression with Ridge Prior ```@docs -fit(formula::FormulaTerm, data::DataFrame, modelClass::LinearRegression, prior::Prior_Ridge, h::Float64 = 0.01, sim_size::Int64 = 1000) +fit(formula::FormulaTerm, data::DataFrame, modelClass::LinearRegression, prior::Prior_Ridge, algorithm::BayesianAlgorithm = MCMC(), h::Float64 = 0.01) ``` ### Linear Regression with Laplace Prior ```@docs -fit(formula::FormulaTerm, data::DataFrame, modelClass::LinearRegression, prior::Prior_Laplace, h::Float64 = 0.01, sim_size::Int64 = 1000) +fit(formula::FormulaTerm, data::DataFrame, modelClass::LinearRegression, prior::Prior_Laplace, algorithm::BayesianAlgorithm = MCMC(), h::Float64 = 0.01) ``` ### Linear Regression with Cauchy Prior ```@docs -fit(formula::FormulaTerm, data::DataFrame, modelClass::LinearRegression, prior::Prior_Cauchy, sim_size::Int64 = 1000) +fit(formula::FormulaTerm, data::DataFrame, modelClass::LinearRegression, prior::Prior_Cauchy, algorithm::BayesianAlgorithm = MCMC()) ``` ### Linear Regression with T-distributed Prior ```@docs -fit(formula::FormulaTerm, data::DataFrame, modelClass::LinearRegression, prior::Prior_TDist, h::Float64 = 2.0, sim_size::Int64 = 1000) +fit(formula::FormulaTerm, data::DataFrame, modelClass::LinearRegression, prior::Prior_TDist, algorithm::BayesianAlgorithm = MCMC(), h::Float64 = 2.0) ``` ### Linear Regression with Horse Shoe Prior ```@docs -fit(formula::FormulaTerm,data::DataFrame,modelClass::LinearRegression,prior::Prior_HorseShoe,sim_size::Int64 = 1000) +fit(formula::FormulaTerm,data::DataFrame,modelClass::LinearRegression,prior::Prior_HorseShoe,algorithm::BayesianAlgorithm = MCMC()) ``` ## Logistic Regression diff --git a/src/CRRao.jl b/src/CRRao.jl index e795faf..4f54037 100644 --- a/src/CRRao.jl +++ b/src/CRRao.jl @@ -432,7 +432,7 @@ VI() = VI(1000, 10000, 100) export LinearRegression, LogisticRegression, PoissonRegression, NegBinomRegression, Boot_Residual export Prior_Ridge, Prior_Laplace, Prior_Cauchy, Prior_TDist, Prior_HorseShoe, Prior_Gauss -export CRRaoLink, Logit, Probit, Cloglog, Cauchit, fit, MCMC, VI +export CRRaoLink, Logit, Probit, Cloglog, Cauchit, fit, BayesianAlgorithm, MCMC, VI export coef, coeftable, r2, adjr2, loglikelihood, aic, bic, sigma, predict, residuals, cooksdistance, BPTest, pvalue export FrequentistRegression, BayesianRegression