Skip to content

Commit

Permalink
Store the samples instead of the chain/distribution
Browse files Browse the repository at this point in the history
  • Loading branch information
ShouvikGhosh2048 committed Dec 9, 2024
1 parent 5c6f1ea commit 792f779
Show file tree
Hide file tree
Showing 8 changed files with 57 additions and 89 deletions.
2 changes: 1 addition & 1 deletion src/CRRao.jl
Original file line number Diff line number Diff line change
Expand Up @@ -396,7 +396,7 @@ export LinearRegression, LogisticRegression, PoissonRegression, NegBinomRegressi
export Prior_Ridge, Prior_Laplace, Prior_Cauchy, Prior_TDist, Prior_HorseShoe, Prior_Gauss
export CRRaoLink, Logit, Probit, Cloglog, Cauchit, fit
export coef, coeftable, r2, adjr2, loglikelihood, aic, bic, sigma, predict, residuals, cooksdistance, BPTest, pvalue
export FrequentistRegression, BayesianRegressionMCMC, BayesianRegressionVI
export FrequentistRegression, BayesianRegression

include("random_number_generator.jl")
include("general_stats.jl")
Expand Down
55 changes: 11 additions & 44 deletions src/bayesian/getter.jl
Original file line number Diff line number Diff line change
@@ -1,60 +1,27 @@
function predict(container::BayesianRegressionMCMC{:LinearRegression}, newdata::DataFrame, prediction_chain_start::Int64 = 200)
function predict(container::BayesianRegression{:LinearRegression}, newdata::DataFrame, prediction_chain_start::Int64 = 200)
X = modelmatrix(container.formula, newdata)

params = get_params(container.chain[prediction_chain_start:end,:,:])
W = params.β
if isa(W, Tuple)
W = reduce(hcat, W)
end
#predictions = params.α' .+ X * W'
predictions = X * W'
return vec(mean(predictions, dims=2))
end

function predict(container::BayesianRegressionVI{:LinearRegression}, newdata::DataFrame, number_of_samples::Int64 = 1000)
X = modelmatrix(container.formula, newdata)

W = rand(CRRao_rng, container.dist, number_of_samples)
W = W[union(container.symbol_to_range[]...), :]
W = container.samples[:, prediction_chain_start:end]
predictions = X * W
return vec(mean(predictions, dims=2))
end

function predict(container::BayesianRegressionMCMC{:LogisticRegression}, newdata::DataFrame, prediction_chain_start::Int64 = 200)
function predict(container::BayesianRegression{:LogisticRegression}, newdata::DataFrame, prediction_chain_start::Int64 = 200)
X = modelmatrix(container.formula, newdata)

params = get_params(container.chain[prediction_chain_start:end,:,:])
W = params.β
if isa(W, Tuple)
W = reduce(hcat, W)
end
#z = params.α' .+ X * W'
z = X * W'
W = container.samples[:, prediction_chain_start:end]
z = X * W
return vec(mean(container.link.link_function.(z), dims=2))
end

function predict(container::BayesianRegressionMCMC{:NegativeBinomialRegression}, newdata::DataFrame, prediction_chain_start::Int64 = 200)
function predict(container::BayesianRegression{:NegativeBinomialRegression}, newdata::DataFrame, prediction_chain_start::Int64 = 200)
X = modelmatrix(container.formula, newdata)

params = get_params(container.chain[prediction_chain_start:end,:,:])
W = params.β
if isa(W, Tuple)
W = reduce(hcat, W)
end
#z = params.α' .+ X * W'
z = X * W'
W = container.samples[:, prediction_chain_start:end]
z = X * W
return vec(mean(exp.(z), dims=2))
end

function predict(container::BayesianRegressionMCMC{:PoissonRegression}, newdata::DataFrame, prediction_chain_start::Int64 = 200)
function predict(container::BayesianRegression{:PoissonRegression}, newdata::DataFrame, prediction_chain_start::Int64 = 200)
X = modelmatrix(container.formula, newdata)

params = get_params(container.chain[prediction_chain_start:end,:,:])
W = params.β
if isa(W, Tuple)
W = reduce(hcat, W)
end
#z = params.α' .+ X * W'
z = X * W'
W = container.samples[:, prediction_chain_start:end]
z = X * W
return vec(mean(exp.(z), dims=2))
end
14 changes: 12 additions & 2 deletions src/bayesian/linear_regression.jl
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,14 @@ function linear_reg_mcmc(formula::FormulaTerm, data::DataFrame, turingModel::Fun
@warn "Simulation size should generally be atleast 500."
end
chain = sample(CRRao_rng, turingModel(X, y), NUTS(), sim_size)
return BayesianRegressionMCMC(:LinearRegression, chain, formula)
params = get_params(chain[:,:,:])
samples = params.β
if isa(samples, Tuple)
samples = reduce(hcat, samples)
end
samples = samples'

return BayesianRegression(:LinearRegression, samples, formula)
end

function linear_reg_vi(formula::FormulaTerm, data::DataFrame, turingModel::Function, max_iter::Int64)
Expand All @@ -16,10 +23,13 @@ function linear_reg_vi(formula::FormulaTerm, data::DataFrame, turingModel::Funct
if max_iter < 500
@warn "Max iterations should generally be atleast 500."
end

model = turingModel(X, y)
dist = vi(model, ADVI(100, max_iter))
samples = rand(CRRao_rng, dist, max_iter)
_, symbol_to_range = bijector(model, Val(true))
return BayesianRegressionVI(:LinearRegression, dist, formula, symbol_to_range)
samples = samples[union(symbol_to_range[]...), :]
return BayesianRegression(:LinearRegression, samples, formula)
end

"""
Expand Down
9 changes: 8 additions & 1 deletion src/bayesian/logistic_regression.jl
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,14 @@ function logistic_reg(formula::FormulaTerm, data::DataFrame, Link::CRRaoLink, tu
@warn "Simulation size should generally be atleast 500."
end
chain = sample(CRRao_rng, turingModel(X, y), NUTS(), sim_size)
return BayesianRegressionMCMC(:LogisticRegression, chain, formula, Link)
params = get_params(chain[:,:,:])
samples = params.β
if isa(samples, Tuple)
samples = reduce(hcat, samples)
end
samples = samples'

return BayesianRegression(:LogisticRegression, samples, formula, Link)
end

"""
Expand Down
9 changes: 8 additions & 1 deletion src/bayesian/negativebinomial_regression.jl
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,14 @@ function negativebinomial_reg(
@warn "Simulation size should generally be atleast 500."
end
chain = sample(CRRao_rng, turingModel(X, y), NUTS(), sim_size)
return BayesianRegressionMCMC(:NegativeBinomialRegression, chain, formula)
params = get_params(chain[:,:,:])
samples = params.β
if isa(samples, Tuple)
samples = reduce(hcat, samples)
end
samples = samples'

return BayesianRegression(:NegativeBinomialRegression, samples, formula)
end

"""
Expand Down
9 changes: 8 additions & 1 deletion src/bayesian/poisson_regression.jl
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,14 @@ function poisson_reg(formula::FormulaTerm, data::DataFrame, turingModel::Functio
@warn "Simulation size should generally be atleast 500."
end
chain = sample(CRRao_rng, turingModel(X, y), NUTS(), sim_size)
return BayesianRegressionMCMC(:PoissonRegression, chain, formula)
params = get_params(chain[:,:,:])
samples = params.β
if isa(samples, Tuple)
samples = reduce(hcat, samples)
end
samples = samples'

return BayesianRegression(:PoissonRegression, samples, formula)
end

"""
Expand Down
35 changes: 6 additions & 29 deletions src/fitmodel.jl
Original file line number Diff line number Diff line change
Expand Up @@ -22,48 +22,25 @@ FrequentistRegression(RegressionType::Symbol, model, formula, link = GLM.Identit

"""
```julia
BayesianRegressionMCMC{RegressionType}
BayesianRegression{RegressionType}
```
Type to represent bayesian regression models (using MCMC) returned by `fit` functions. This type is used internally by the package to represent all bayesian regression models using MCMC. `RegressionType` is a `Symbol` representing the model class.
"""
struct BayesianRegressionMCMC{RegressionType} <: RegressionModel
chain
struct BayesianRegression{RegressionType} <: RegressionModel
samples
formula::FormulaTerm
link
end

"""
```julia
BayesianRegressionMCMC(::Symbol, chain)
BayesianRegression(::Symbol, samples)
```
Constructor for `BayesianRegressionMCMC`. `model` can be any regression model. Used by `fit` functions to return a bayesian regression model container.
Constructor for `BayesianRegression`. `model` can be any regression model. Used by `fit` functions to return a bayesian regression model container.
"""
BayesianRegressionMCMC(RegressionType::Symbol, chain, formula, link = Identity()) = BayesianRegressionMCMC{RegressionType}(chain, formula, link)

"""
```julia
BayesianRegressionVI{RegressionType}
```
Type to represent bayesian regression models (using VI) returned by `fit` functions. This type is used internally by the package to represent all bayesian regression models using VI. `RegressionType` is a `Symbol` representing the model class.
"""
struct BayesianRegressionVI{RegressionType} <: RegressionModel
dist
formula::FormulaTerm
symbol_to_range
link
end

"""
```julia
BayesianRegressionVI(::Symbol, dist)
```
Constructor for `BayesianRegressionVI`. `model` can be any regression model. Used by `fit` functions to return a bayesian regression model container.
"""
BayesianRegressionVI(RegressionType::Symbol, dist, formula, symbol_to_range, link = Identity()) = BayesianRegressionVI{RegressionType}(dist, formula, symbol_to_range, link)
BayesianRegression(RegressionType::Symbol, samples, formula, link = Identity()) = BayesianRegression{RegressionType}(samples, formula, link)

# Print Messages
include("print.jl")
Expand Down
13 changes: 3 additions & 10 deletions src/print.jl
Original file line number Diff line number Diff line change
Expand Up @@ -27,16 +27,9 @@ function Base.show(io::IO, container::FrequentistRegression{:PoissonRegression})
end

# Bayesian Models
function Base.show(io::IO, container::BayesianRegressionMCMC)
function Base.show(io::IO, container::BayesianRegression)
println(io, "Formula: ", container.formula)
println(io, "Link: ", container.link)
print(io, "Chain: ")
show(io, MIME("text/plain"), container.chain)
end

function Base.show(io::IO, container::BayesianRegressionVI)
println(io, "Formula: ", container.formula)
println(io, "Link: ", container.link)
print(io, "Distribution: ")
show(io, MIME("text/plain"), container.dist)
print(io, "Samples: ")
show(io, MIME("text/plain"), container.samples)
end

0 comments on commit 792f779

Please sign in to comment.