diff --git a/src/CRRao.jl b/src/CRRao.jl index 02203ce..59c062d 100644 --- a/src/CRRao.jl +++ b/src/CRRao.jl @@ -94,6 +94,14 @@ where """ struct PoissonRegression end +""" +```julia +Boot_Residual +``` +Type representing Residual Bootstrap. +""" +struct Boot_Residual end + """ ```julia Prior_Gauss @@ -384,7 +392,7 @@ end Cauchit() = Cauchit(Cauchit_Link) -export LinearRegression, LogisticRegression, PoissonRegression, NegBinomRegression +export LinearRegression, LogisticRegression, PoissonRegression, NegBinomRegression, Boot_Residual export Prior_Ridge, Prior_Laplace, Prior_Cauchy, Prior_TDist, Prior_HorseShoe, Prior_Gauss export CRRaoLink, Logit, Probit, Cloglog, Cauchit, fit export coef, coeftable, r2, adjr2, loglikelihood, aic, bic, sigma, predict, residuals, cooksdistance, BPTest, pvalue diff --git a/src/frequentist/linear_regression.jl b/src/frequentist/linear_regression.jl index 074b5e3..4304841 100644 --- a/src/frequentist/linear_regression.jl +++ b/src/frequentist/linear_regression.jl @@ -97,3 +97,92 @@ function fit(formula::FormulaTerm, data::DataFrame, modelClass::LinearRegression model = lm(formula, data; kwargs...) return FrequentistRegression(:LinearRegression, model, formula) end + +""" +```julia +fit(formula::FormulaTerm, data::DataFrame, modelClass::LinearRegression, bootstrap::Boot_Residual, sim_size::Int64 = 1000) +``` +Fit a Bootstrap Regression model on the input data. Uses the [lm](https://juliastats.org/GLM.jl/stable/api/#GLM.lm) method from the [GLM](https://github.com/JuliaStats/GLM.jl) package under the hood. Returns an object of type `DataFrame`. + +# Example +```julia-repl +julia> using CRRao, RDatasets, StableRNGs, StatsModels +julia> df = dataset("datasets", "mtcars") +32×12 DataFrame + Row │ Model MPG Cyl Disp HP DRat WT QSec VS AM Gear Carb + │ String31 Float64 Int64 Float64 Int64 Float64 Float64 Float64 Int64 Int64 Int64 Int64 +─────┼────────────────────────────────────────────────────────────────────────────────────────────────────────── + 1 │ Mazda RX4 21.0 6 160.0 110 3.9 2.62 16.46 0 1 4 4 + 2 │ Mazda RX4 Wag 21.0 6 160.0 110 3.9 2.875 17.02 0 1 4 4 + 3 │ Datsun 710 22.8 4 108.0 93 3.85 2.32 18.61 1 1 4 1 + 4 │ Hornet 4 Drive 21.4 6 258.0 110 3.08 3.215 19.44 1 0 3 1 + 5 │ Hornet Sportabout 18.7 8 360.0 175 3.15 3.44 17.02 0 0 3 2 + 6 │ Valiant 18.1 6 225.0 105 2.76 3.46 20.22 1 0 3 1 + ⋮ │ ⋮ ⋮ ⋮ ⋮ ⋮ ⋮ ⋮ ⋮ ⋮ ⋮ ⋮ ⋮ + 27 │ Porsche 914-2 26.0 4 120.3 91 4.43 2.14 16.7 0 1 5 2 + 28 │ Lotus Europa 30.4 4 95.1 113 3.77 1.513 16.9 1 1 5 2 + 29 │ Ford Pantera L 15.8 8 351.0 264 4.22 3.17 14.5 0 1 5 4 + 30 │ Ferrari Dino 19.7 6 145.0 175 3.62 2.77 15.5 0 1 5 6 + 31 │ Maserati Bora 15.0 8 301.0 335 3.54 3.57 14.6 0 1 5 8 + 32 │ Volvo 142E 21.4 4 121.0 109 4.11 2.78 18.6 1 1 4 2 + 20 rows omitted +julia> CRRao.set_rng(StableRNG(123)) +StableRNGs.LehmerRNG(state=0x000000000000000000000000000000f7) +julia> container = fit(@formula(MPG ~ HP + WT + Gear), df, LinearRegression(), Boot_Residual()) +4×5 DataFrame + Row │ Predictor Coef Std Error Lower 5% Upper 95% + │ String Float64 Float64 Float64 Float64 +─────┼───────────────────────────────────────────────────────────── + 1 │ (Intercept) 32.1309 4.57528 24.8024 39.9568 + 2 │ HP -0.0364971 0.00962225 -0.0519917 -0.0201571 + 3 │ WT -3.22576 0.834607 -4.61517 -1.80358 + 4 │ Gear 1.00012 0.842335 -0.429382 2.35324 +``` +""" +function fit( + formula::FormulaTerm, + data::DataFrame, + modelClass::LinearRegression, + bootstrap::Boot_Residual, + sim_size::Int64 = 1000) + + formula = apply_schema(formula, schema(formula, data), RegressionModel) + y, X = modelcols(formula, data) + + model = lm(formula, data) + res = coeftable(model) + res = DataFrame(res) + e = residuals(model) + + β_hat = coef(model) + p = length(β_hat) + n = size(X)[1] + + ## This line is inefficient + A = vcov(model)/dispersion(model.model,true) + + ## Once the Mousum's code is merged we will revert to following line + # A = GLM.inv(model) + + β_star_matrix = zeros(sim_size,p) + + for b in 1:sim_size + e_resample = rand(CRRao_rng, e, n) + β_star = β_hat+A*X'e_resample + β_star_matrix[b,:] = β_star + end + + bootstrap_coef_table = zeros(p,4) + bootstrap_coef_table[:,1] = mean(β_star_matrix, dims=1) + bootstrap_coef_table[:,2] = std(β_star_matrix, dims=1) + for j in 1:p + bootstrap_coef_table[j,3] = quantile(β_star_matrix[:,j], 0.05) + bootstrap_coef_table[j,4] = quantile(β_star_matrix[:,j], 0.95) + end + col_names = ["Coef", "Std Error", "Lower 5%", "Upper 95%"] + bootstrap_coeftable = DataFrame(bootstrap_coef_table, col_names) + row_names = res[!,1] + bootstrap_coeftable = insertcols!(bootstrap_coeftable, 1, :Predictor =>row_names) + + return bootstrap_coeftable +end diff --git a/test/numerical/frequentist/LinearRegression.jl b/test/numerical/frequentist/LinearRegression.jl index 29043ac..b315ff3 100644 --- a/test/numerical/frequentist/LinearRegression.jl +++ b/test/numerical/frequentist/LinearRegression.jl @@ -19,4 +19,8 @@ for (test_formula, test_aic, test_bic, test_pvalue) in tests bp_test = BPTest(crrao_model, mtcars) @test isapprox(pvalue(bp_test), test_pvalue) end -end \ No newline at end of file +end + +CRRao.set_rng(StableRNG(123)) +container = fit(@formula(MPG ~ HP + WT + Gear), mtcars, LinearRegression(), Boot_Residual()) +@test isapprox(container.Coef, [32.13093064426382, -0.036497081866302794, -3.2257634531003574, 1.0001169309164597]) \ No newline at end of file