Skip to content

Commit

Permalink
Add VI for linear regression
Browse files Browse the repository at this point in the history
  • Loading branch information
ShouvikGhosh2048 committed Dec 6, 2024
1 parent 5f9e94f commit 5c6f1ea
Show file tree
Hide file tree
Showing 10 changed files with 213 additions and 54 deletions.
2 changes: 2 additions & 0 deletions Project.toml
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ authors = ["xKDR Forum, Sourish Das"]
version = "0.1.1"

[deps]
AdvancedVI = "b5ca4192-6429-45e5-a2d9-87aec30a685c"
DataFrames = "a93c6f00-e57d-5684-b7b6-d8193f3e46c0"
Distributions = "31c24e10-a181-5473-b8eb-7969acd0382f"
Documenter = "e30172f5-a6a5-5a46-863b-614d45cd2de4"
Expand All @@ -19,6 +20,7 @@ StatsModels = "3eaba693-59b7-5ba5-a881-562e759f1c8d"
Turing = "fce5fe82-541a-59a6-adf8-730c64b5f9a0"

[compat]
AdvancedVI = "0.2.11"
DataFrames = "1"
Distributions = "0.25"
Documenter = "0.27, 1"
Expand Down
4 changes: 2 additions & 2 deletions src/CRRao.jl
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ module CRRao

using DataFrames, GLM, Turing, StatsModels, StatsBase
using StatsBase, Distributions, LinearAlgebra
using Optim, NLSolversBase, Random, HypothesisTests
using Optim, NLSolversBase, Random, HypothesisTests, AdvancedVI
import StatsBase: coef, coeftable, r2, adjr2, loglikelihood, aic, bic, predict, residuals, cooksdistance, fit
import HypothesisTests: pvalue

Expand Down Expand Up @@ -396,7 +396,7 @@ export LinearRegression, LogisticRegression, PoissonRegression, NegBinomRegressi
export Prior_Ridge, Prior_Laplace, Prior_Cauchy, Prior_TDist, Prior_HorseShoe, Prior_Gauss
export CRRaoLink, Logit, Probit, Cloglog, Cauchit, fit
export coef, coeftable, r2, adjr2, loglikelihood, aic, bic, sigma, predict, residuals, cooksdistance, BPTest, pvalue
export FrequentistRegression, BayesianRegression
export FrequentistRegression, BayesianRegressionMCMC, BayesianRegressionVI

include("random_number_generator.jl")
include("general_stats.jl")
Expand Down
17 changes: 13 additions & 4 deletions src/bayesian/getter.jl
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
function predict(container::BayesianRegression{:LinearRegression}, newdata::DataFrame, prediction_chain_start::Int64 = 200)
function predict(container::BayesianRegressionMCMC{:LinearRegression}, newdata::DataFrame, prediction_chain_start::Int64 = 200)
X = modelmatrix(container.formula, newdata)

params = get_params(container.chain[prediction_chain_start:end,:,:])
Expand All @@ -11,7 +11,16 @@ function predict(container::BayesianRegression{:LinearRegression}, newdata::Data
return vec(mean(predictions, dims=2))
end

function predict(container::BayesianRegression{:LogisticRegression}, newdata::DataFrame, prediction_chain_start::Int64 = 200)
function predict(container::BayesianRegressionVI{:LinearRegression}, newdata::DataFrame, number_of_samples::Int64 = 1000)
X = modelmatrix(container.formula, newdata)

W = rand(CRRao_rng, container.dist, number_of_samples)
W = W[union(container.symbol_to_range[]...), :]
predictions = X * W
return vec(mean(predictions, dims=2))
end

function predict(container::BayesianRegressionMCMC{:LogisticRegression}, newdata::DataFrame, prediction_chain_start::Int64 = 200)
X = modelmatrix(container.formula, newdata)

params = get_params(container.chain[prediction_chain_start:end,:,:])
Expand All @@ -24,7 +33,7 @@ function predict(container::BayesianRegression{:LogisticRegression}, newdata::Da
return vec(mean(container.link.link_function.(z), dims=2))
end

function predict(container::BayesianRegression{:NegativeBinomialRegression}, newdata::DataFrame, prediction_chain_start::Int64 = 200)
function predict(container::BayesianRegressionMCMC{:NegativeBinomialRegression}, newdata::DataFrame, prediction_chain_start::Int64 = 200)
X = modelmatrix(container.formula, newdata)

params = get_params(container.chain[prediction_chain_start:end,:,:])
Expand All @@ -37,7 +46,7 @@ function predict(container::BayesianRegression{:NegativeBinomialRegression}, new
return vec(mean(exp.(z), dims=2))
end

function predict(container::BayesianRegression{:PoissonRegression}, newdata::DataFrame, prediction_chain_start::Int64 = 200)
function predict(container::BayesianRegressionMCMC{:PoissonRegression}, newdata::DataFrame, prediction_chain_start::Int64 = 200)
X = modelmatrix(container.formula, newdata)

params = get_params(container.chain[prediction_chain_start:end,:,:])
Expand Down
180 changes: 143 additions & 37 deletions src/bayesian/linear_regression.jl
Original file line number Diff line number Diff line change
@@ -1,17 +1,38 @@
function linear_reg(formula::FormulaTerm, data::DataFrame, turingModel::Function, sim_size::Int64)
function linear_reg_mcmc(formula::FormulaTerm, data::DataFrame, turingModel::Function, sim_size::Int64)
formula = apply_schema(formula, schema(formula, data),RegressionModel)
y, X = modelcols(formula, data)

if sim_size < 500
@warn "Simulation size should generally be atleast 500."
end
chain = sample(CRRao_rng, turingModel(X, y), NUTS(), sim_size)
return BayesianRegression(:LinearRegression, chain, formula)
return BayesianRegressionMCMC(:LinearRegression, chain, formula)
end

function linear_reg_vi(formula::FormulaTerm, data::DataFrame, turingModel::Function, max_iter::Int64)
formula = apply_schema(formula, schema(formula, data),RegressionModel)
y, X = modelcols(formula, data)

if max_iter < 500
@warn "Max iterations should generally be atleast 500."
end
model = turingModel(X, y)
dist = vi(model, ADVI(100, max_iter))
_, symbol_to_range = bijector(model, Val(true))
return BayesianRegressionVI(:LinearRegression, dist, formula, symbol_to_range)
end

"""
```julia
fit(formula::FormulaTerm, data::DataFrame, modelClass::LinearRegression, prior::Prior_Ridge, h::Float64 = 0.01, sim_size::Int64 = 1000)
fit(
formula::FormulaTerm,
data::DataFrame,
modelClass::LinearRegression,
prior::Prior_Ridge,
use_vi::Bool = false,
h::Float64 = 0.01,
sim_size::Int64 = use_vi ? 20000 : 1000,
)
```
Fit a Bayesian Linear Regression model on the input data with a Ridge prior.
Expand Down Expand Up @@ -77,8 +98,9 @@ function fit(
data::DataFrame,
modelClass::LinearRegression,
prior::Prior_Ridge,
use_vi::Bool = false,
h::Float64 = 0.01,
sim_size::Int64 = 1000
sim_size::Int64 = use_vi ? 20000 : 1000
)
@model LinearRegression(X, y) = begin
p = size(X, 2)
Expand All @@ -97,12 +119,24 @@ function fit(
y ~ MvNormal(X * β, σ)
end

return linear_reg(formula, data, LinearRegression, sim_size)
if use_vi
return linear_reg_vi(formula, data, LinearRegression, sim_size)
else
return linear_reg_mcmc(formula, data, LinearRegression, sim_size)
end
end

"""
```julia
fit(formula::FormulaTerm, data::DataFrame, modelClass::LinearRegression, prior::Prior_Laplace, h::Float64 = 0.01, sim_size::Int64 = 1000)
fit(
formula::FormulaTerm,
data::DataFrame,
modelClass::LinearRegression,
prior::Prior_Laplace,
use_vi::Bool = false,
h::Float64 = 0.01,
sim_size::Int64 = use_vi ? 20000 : 1000,
)
```
Fit a Bayesian Linear Regression model on the input data with a Laplace prior.
Expand Down Expand Up @@ -166,8 +200,9 @@ function fit(
data::DataFrame,
modelClass::LinearRegression,
prior::Prior_Laplace,
use_vi::Bool = false,
h::Float64 = 0.01,
sim_size::Int64 = 1000
sim_size::Int64 = use_vi ? 20000 : 1000
)
@model LinearRegression(X, y) = begin
p = size(X, 2)
Expand All @@ -185,12 +220,23 @@ function fit(
y ~ MvNormal(X * β, σ)
end

return linear_reg(formula, data, LinearRegression, sim_size)
if use_vi
return linear_reg_vi(formula, data, LinearRegression, sim_size)
else
return linear_reg_mcmc(formula, data, LinearRegression, sim_size)
end
end

"""
```julia
fit(formula::FormulaTerm, data::DataFrame, modelClass::LinearRegression, prior::Prior_Cauchy, sim_size::Int64 = 1000)
fit(
formula::FormulaTerm,
data::DataFrame,
modelClass::LinearRegression,
prior::Prior_Cauchy,
use_vi::Bool = false,
sim_size::Int64 = use_vi ? 20000 : 1000,
)
```
Fit a Bayesian Linear Regression model on the input data with a Cauchy prior.
Expand Down Expand Up @@ -253,7 +299,8 @@ function fit(
data::DataFrame,
modelClass::LinearRegression,
prior::Prior_Cauchy,
sim_size::Int64 = 1000
use_vi::Bool = false,
sim_size::Int64 = use_vi ? 20000 : 1000
)
@model LinearRegression(X, y) = begin
p = size(X, 2)
Expand All @@ -268,12 +315,24 @@ function fit(
y ~ MvNormal(X * β, σ)
end

return linear_reg(formula, data, LinearRegression, sim_size)
if use_vi
return linear_reg_vi(formula, data, LinearRegression, sim_size)
else
return linear_reg_mcmc(formula, data, LinearRegression, sim_size)
end
end

"""
```julia
fit(formula::FormulaTerm, data::DataFrame, modelClass::LinearRegression, prior::Prior_TDist, h::Float64 = 2.0, sim_size::Int64 = 1000)
fit(
formula::FormulaTerm,
data::DataFrame,
modelClass::LinearRegression,
prior::Prior_TDist,
h::Float64 = 2.0,
use_vi::Bool = false,
sim_size::Int64 = use_vi ? 20000 : 1000,
)
```
Fit a Bayesian Linear Regression model on the input data with a t(ν) distributed prior.
Expand Down Expand Up @@ -340,8 +399,9 @@ function fit(
data::DataFrame,
modelClass::LinearRegression,
prior::Prior_TDist,
use_vi::Bool = false,
h::Float64 = 2.0,
sim_size::Int64 = 1000
sim_size::Int64 = use_vi ? 20000 : 1000
)
@model LinearRegression(X, y) = begin
p = size(X, 2)
Expand All @@ -359,13 +419,24 @@ function fit(
y ~ MvNormal(X * β, σ)
end

return linear_reg(formula, data, LinearRegression, sim_size)
if use_vi
return linear_reg_vi(formula, data, LinearRegression, sim_size)
else
return linear_reg_mcmc(formula, data, LinearRegression, sim_size)
end
end


"""
```julia
fit(formula::FormulaTerm,data::DataFrame,modelClass::LinearRegression,prior::Prior_HorseShoe,sim_size::Int64 = 1000)
fit(
formula::FormulaTerm,
data::DataFrame,
modelClass::LinearRegression,
prior::Prior_HorseShoe,
use_vi::Bool = false,
sim_size::Int64 = use_vi ? 20000 : 1000,
)
```
Fit a Bayesian Linear Regression model on the input data with a HorseShoe prior.
Expand Down Expand Up @@ -425,7 +496,8 @@ function fit(
data::DataFrame,
modelClass::LinearRegression,
prior::Prior_HorseShoe,
sim_size::Int64 = 1000
use_vi::Bool = false,
sim_size::Int64 = use_vi ? 20000 : 1000
)
@model LinearRegression(X, y) = begin
p = size(X, 2)
Expand All @@ -446,12 +518,25 @@ function fit(
y ~ MvNormal( X * β, σ)
end

return linear_reg(formula, data, LinearRegression, sim_size)
if use_vi
return linear_reg_vi(formula, data, LinearRegression, sim_size)
else
return linear_reg_mcmc(formula, data, LinearRegression, sim_size)
end
end

"""
```julia
fit(formula::FormulaTerm, data::DataFrame, modelClass::LinearRegression, prior::Prior_Gauss,alpha_prior_mean::Float64 = 0.0, beta_prior_mean::Float64, sim_size::Int64 = 1000, h::Float64 = 0.1)
fit(
formula::FormulaTerm,
ata::DataFrame,
modelClass::LinearRegression,
prior::Prior_Gauss,
alpha_prior_mean::Float64,
beta_prior_mean::Vector{Float64},
use_vi::Bool = false,
sim_size::Int64 = use_vi ? 20000 : 1000,
)
```
Fit a Bayesian Linear Regression model on the input data with a Gaussian prior with user specific prior mean for α and β. User doesnot have
Expand Down Expand Up @@ -497,13 +582,14 @@ Quantiles
```
"""
function fit(
formula::FormulaTerm
, data::DataFrame
, modelClass::LinearRegression
, prior::Prior_Gauss
, alpha_prior_mean::Float64
, beta_prior_mean::Vector{Float64}
, sim_size::Int64 = 1000
formula::FormulaTerm,
data::DataFrame,
modelClass::LinearRegression,
prior::Prior_Gauss,
alpha_prior_mean::Float64,
beta_prior_mean::Vector{Float64},
use_vi::Bool = false,
sim_size::Int64 = use_vi ? 20000 : 1000
)
@model LinearRegression(X, y) = begin
p = size(X, 2)
Expand All @@ -529,13 +615,28 @@ function fit(
y ~ MvNormal(X * β, σ)
end

return linear_reg(formula, data, LinearRegression, sim_size)
if use_vi
return linear_reg_vi(formula, data, LinearRegression, sim_size)
else
return linear_reg_mcmc(formula, data, LinearRegression, sim_size)
end
end


"""
```julia
fit(formula::FormulaTerm, data::DataFrame, modelClass::LinearRegression, prior::Prior_Gauss, alpha_prior_mean::Float64, alpha_prior_sd::Float64, beta_prior_mean::Vector{Float64}, beta_prior_sd::Vector{Float64}, sim_size::Int64 = 1000)
fit(
formula::FormulaTerm,
data::DataFrame,
modelClass::LinearRegression,
prior::Prior_Gauss,
alpha_prior_mean::Float64,
alpha_prior_sd::Float64,
beta_prior_mean::Vector{Float64},
beta_prior_sd::Vector{Float64},
use_vi::Bool = false,
sim_size::Int64 = use_vi ? 20000 : 1000,
)
```
Fit a Bayesian Linear Regression model on the input data with a Gaussian prior with user specific prior mean and sd for α and β.
Expand Down Expand Up @@ -580,15 +681,16 @@ Quantiles
```
"""
function fit(
formula::FormulaTerm
, data::DataFrame
, modelClass::LinearRegression
, prior::Prior_Gauss
, alpha_prior_mean::Float64
, alpha_prior_sd::Float64
, beta_prior_mean::Vector{Float64}
, beta_prior_sd::Vector{Float64}
, sim_size::Int64 = 1000
formula::FormulaTerm,
data::DataFrame,
modelClass::LinearRegression,
prior::Prior_Gauss,
alpha_prior_mean::Float64,
alpha_prior_sd::Float64,
beta_prior_mean::Vector{Float64},
beta_prior_sd::Vector{Float64},
use_vi::Bool = false,
sim_size::Int64 = use_vi ? 20000 : 1000
)
@model LinearRegression(X, y) = begin
p = size(X, 2)
Expand Down Expand Up @@ -616,5 +718,9 @@ function fit(
y ~ MvNormal(X * β, σ)
end

return linear_reg(formula, data, LinearRegression, sim_size)
if use_vi
return linear_reg_vi(formula, data, LinearRegression, sim_size)
else
return linear_reg_mcmc(formula, data, LinearRegression, sim_size)
end
end
Loading

0 comments on commit 5c6f1ea

Please sign in to comment.