add weights to lsqfit, for bootstrap

Gkreindler · Dec 25, 2023 · e8fad9b · e8fad9b
1 parent 46dd3fa
commit e8fad9b
Show file tree

Hide file tree

Showing 13 changed files with 10,909 additions and 10,690 deletions.
diff --git a/Project.toml b/Project.toml
@@ -19,6 +19,7 @@ JSON = "682c06a0-de6a-54ab-a142-c8b1cf79cde6"
 LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
 LsqFit = "2fda8390-95c7-5789-9bda-21331edee243"
 Optim = "429524aa-4258-5aef-a3af-852621145aeb"
+ProgressMeter = "92933f4c-e287-5a05-a399-4b506db050ca"
 Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
 RegressionTables = "d519eb52-b820-54da-95a6-98e1306fdade"
 Revise = "295af30f-e4ad-537b-8983-00126c2a3abe"

diff --git a/examples/example_ols_2step.jl b/examples/example_ols_2step.jl
@@ -15,7 +15,6 @@ using Optim # need for NewtonTrustRegion()
 
 using Random
 
-
 # load data, originally from: https://www.kaggle.com/datasets/uciml/autompg-dataset/?select=auto-mpg.csv 
     df = CSV.read("examples/auto-mpg.csv", DataFrame)
     df[!, :constant] .= 1.0
@@ -54,40 +53,45 @@ end
                     write_iter=true,
                     clean_iter=true,
                     overwrite=true,
+                    optim_opts=(show_trace=false,), # additional options for LsqFit in a NamedTuple
                     trace=1)
 
     # estimate model
     myfit = GMMTools.fit(df, ols_moments_fn, theta0, mode=:twostep, opts=myopts)
 
 # ### using Optim.jl
-#     # estimation options
-#     myopts = GMMTools.GMMOptions(
-#                     path="C:/git-repos/GMMTools.jl/examples/temp/", 
-#                     optim_algo=LBFGS(), 
-#                     optim_autodiff=:forward,
-#                     write_iter=true,
-#                     clean_iter=true,
-#                     overwrite=true,
-#                     trace=1)
-
-#     # estimate model
-#     myfit = GMMTools.fit(df, ols_moments_fn, theta0, mode=:twostep, opts=myopts)
+    # estimation options
+    myopts = GMMTools.GMMOptions(
+                    path="C:/git-repos/GMMTools.jl/examples/temp/", 
+                    optim_algo=LBFGS(), 
+                    optim_autodiff=:forward,
+                    write_iter=true,
+                    clean_iter=true,
+                    overwrite=true,
+                    trace=1)
+
+    # estimate model
+    myfit = GMMTools.fit(df, ols_moments_fn, theta0, mode=:twostep, opts=myopts)
 
 # compute asymptotic variance-covariance matrix and save in myfit.vcov
     vcov_simple(df, ols_moments_fn, myfit)
-0
+
 # print table with results
     regtable(myfit)
 
-fdsfd
+
 
 # compute Bayesian (weighted) bootstrap inference and save in myfit.vcov
-    myopts.trace = 0
+    myopts.trace = 1
     vcov_bboot(df, ols_moments_fn, theta0, myfit, nboot=500, opts=myopts)
     GMMTools.regtable(myfit) # print table with new bootstrap SEs -- very similar to asymptotic SEs in this case. Nice!
 
+    sdfsd
+    # using Plots
+    # histogram(myfit.vcov[:boot_fits].all_theta_hat[:, 1])
 
 # bootstrap with weightes drawn at the level of clusters defined by the variable df.cylinders
+    myopts.trace = 0
     vcov_bboot(df, ols_moments_fn, theta0, myfit, boot_weights=:cluster, cluster_var=:cylinders, nboot=500, opts=myopts)
     myfit.vcov
 

diff --git a/examples/example_ols_parallel.jl b/examples/example_ols_parallel.jl
@@ -0,0 +1,113 @@
+using Distributed
+
+using Pkg
+Pkg.activate(".")
+Pkg.resolve()
+Pkg.instantiate()
+
+rmprocs(workers())
+display(workers())
+addprocs(2)
+display(workers())
+
+@everywhere using Pkg
+@everywhere Pkg.activate(".")
+
+@everywhere begin
+
+    using Revise
+    using LinearAlgebra # for identity matrix "I"
+    using CSV
+    using DataFrames
+    using FixedEffectModels # for benchmarking
+    using RegressionTables
+
+    using GMMTools
+    using Optim # need for NewtonTrustRegion()
+
+    using Random
+end
+
+# load data, originally from: https://www.kaggle.com/datasets/uciml/autompg-dataset/?select=auto-mpg.csv 
+    df = CSV.read("examples/auto-mpg.csv", DataFrame)
+    df[!, :constant] .= 1.0
+
+# Run plain OLS for comparison
+    r = reg(df, term(:mpg) ~ term(:acceleration))
+    regtable(r)
+
+# define moments for OLS regression
+# residuals orthogonal to the constant and to the variable (acceleration)
+# this must always return a Matrix (rows = observations, columns = moments)
+@everywhere function ols_moments_fn(data, theta)
+
+    resids = @. data.mpg - theta[1] - theta[2] * data.acceleration
+
+    # n by 2 matrix of moments
+    moms = hcat(resids, resids .* data.acceleration)
+
+    return moms
+end
+
+# initial parameter guess
+    Random.seed!(123)
+    theta0 = randn(20,2)
+
+### using Optim.jl
+    # estimation options
+    myopts = GMMTools.GMMOptions(
+                    path="C:/git-repos/GMMTools.jl/examples/temp/", 
+                    optimizer=:lsqfit,
+                    optim_algo_bounds=true,
+                    lower_bound=[-Inf, -Inf],
+                    upper_bound=[Inf, Inf],
+                    optim_autodiff=:forward,
+                    write_iter=true,
+                    clean_iter=true,
+                    overwrite=true,
+                    optim_opts=(show_trace=false,), # additional options for LsqFit in a NamedTuple
+                    trace=1)
+
+    # estimate model
+    myfit = GMMTools.fit(df, ols_moments_fn, theta0, mode=:twostep, opts=myopts, run_parallel=true)
+
+    fsdfd
+
+# ### using Optim.jl
+    # estimation options
+    myopts = GMMTools.GMMOptions(
+                    path="C:/git-repos/GMMTools.jl/examples/temp/", 
+                    optim_algo=LBFGS(), 
+                    optim_autodiff=:forward,
+                    write_iter=true,
+                    clean_iter=true,
+                    overwrite=true,
+                    trace=1)
+
+    # estimate model
+    myfit = GMMTools.fit(df, ols_moments_fn, theta0, mode=:twostep, opts=myopts)
+
+# compute asymptotic variance-covariance matrix and save in myfit.vcov
+    vcov_simple(df, ols_moments_fn, myfit)
+
+# print table with results
+    regtable(myfit)
+
+
+
+# compute Bayesian (weighted) bootstrap inference and save in myfit.vcov
+    myopts.trace = 1
+    vcov_bboot(df, ols_moments_fn, theta0, myfit, nboot=500, opts=myopts)
+    GMMTools.regtable(myfit) # print table with new bootstrap SEs -- very similar to asymptotic SEs in this case. Nice!
+
+    sdfsd
+    # using Plots
+    # histogram(myfit.vcov[:boot_fits].all_theta_hat[:, 1])
+
+# bootstrap with weightes drawn at the level of clusters defined by the variable df.cylinders
+    myopts.trace = 0
+    vcov_bboot(df, ols_moments_fn, theta0, myfit, boot_weights=:cluster, cluster_var=:cylinders, nboot=500, opts=myopts)
+    myfit.vcov
+
+    GMMTools.regtable(myfit)
+