Skip to content

Commit

Permalink
add weights to lsqfit, for bootstrap
Browse files Browse the repository at this point in the history
  • Loading branch information
Gkreindler committed Dec 25, 2023
1 parent 46dd3fa commit e8fad9b
Show file tree
Hide file tree
Showing 13 changed files with 10,909 additions and 10,690 deletions.
1 change: 1 addition & 0 deletions Project.toml
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ JSON = "682c06a0-de6a-54ab-a142-c8b1cf79cde6"
LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
LsqFit = "2fda8390-95c7-5789-9bda-21331edee243"
Optim = "429524aa-4258-5aef-a3af-852621145aeb"
ProgressMeter = "92933f4c-e287-5a05-a399-4b506db050ca"
Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
RegressionTables = "d519eb52-b820-54da-95a6-98e1306fdade"
Revise = "295af30f-e4ad-537b-8983-00126c2a3abe"
Expand Down
36 changes: 20 additions & 16 deletions examples/example_ols_2step.jl
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,6 @@ using Optim # need for NewtonTrustRegion()

using Random


# load data, originally from: https://www.kaggle.com/datasets/uciml/autompg-dataset/?select=auto-mpg.csv
df = CSV.read("examples/auto-mpg.csv", DataFrame)
df[!, :constant] .= 1.0
Expand Down Expand Up @@ -54,40 +53,45 @@ end
write_iter=true,
clean_iter=true,
overwrite=true,
optim_opts=(show_trace=false,), # additional options for LsqFit in a NamedTuple
trace=1)

# estimate model
myfit = GMMTools.fit(df, ols_moments_fn, theta0, mode=:twostep, opts=myopts)

# ### using Optim.jl
# # estimation options
# myopts = GMMTools.GMMOptions(
# path="C:/git-repos/GMMTools.jl/examples/temp/",
# optim_algo=LBFGS(),
# optim_autodiff=:forward,
# write_iter=true,
# clean_iter=true,
# overwrite=true,
# trace=1)

# # estimate model
# myfit = GMMTools.fit(df, ols_moments_fn, theta0, mode=:twostep, opts=myopts)
# estimation options
myopts = GMMTools.GMMOptions(
path="C:/git-repos/GMMTools.jl/examples/temp/",
optim_algo=LBFGS(),
optim_autodiff=:forward,
write_iter=true,
clean_iter=true,
overwrite=true,
trace=1)

# estimate model
myfit = GMMTools.fit(df, ols_moments_fn, theta0, mode=:twostep, opts=myopts)

# compute asymptotic variance-covariance matrix and save in myfit.vcov
vcov_simple(df, ols_moments_fn, myfit)
0

# print table with results
regtable(myfit)

fdsfd


# compute Bayesian (weighted) bootstrap inference and save in myfit.vcov
myopts.trace = 0
myopts.trace = 1
vcov_bboot(df, ols_moments_fn, theta0, myfit, nboot=500, opts=myopts)
GMMTools.regtable(myfit) # print table with new bootstrap SEs -- very similar to asymptotic SEs in this case. Nice!

sdfsd
# using Plots
# histogram(myfit.vcov[:boot_fits].all_theta_hat[:, 1])

# bootstrap with weightes drawn at the level of clusters defined by the variable df.cylinders
myopts.trace = 0
vcov_bboot(df, ols_moments_fn, theta0, myfit, boot_weights=:cluster, cluster_var=:cylinders, nboot=500, opts=myopts)
myfit.vcov

Expand Down
113 changes: 113 additions & 0 deletions examples/example_ols_parallel.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,113 @@
using Distributed

using Pkg
Pkg.activate(".")
Pkg.resolve()
Pkg.instantiate()

rmprocs(workers())
display(workers())
addprocs(2)
display(workers())

@everywhere using Pkg
@everywhere Pkg.activate(".")

@everywhere begin

using Revise
using LinearAlgebra # for identity matrix "I"
using CSV
using DataFrames
using FixedEffectModels # for benchmarking
using RegressionTables

using GMMTools
using Optim # need for NewtonTrustRegion()

using Random
end

# load data, originally from: https://www.kaggle.com/datasets/uciml/autompg-dataset/?select=auto-mpg.csv
df = CSV.read("examples/auto-mpg.csv", DataFrame)
df[!, :constant] .= 1.0

# Run plain OLS for comparison
r = reg(df, term(:mpg) ~ term(:acceleration))
regtable(r)

# define moments for OLS regression
# residuals orthogonal to the constant and to the variable (acceleration)
# this must always return a Matrix (rows = observations, columns = moments)
@everywhere function ols_moments_fn(data, theta)

resids = @. data.mpg - theta[1] - theta[2] * data.acceleration

# n by 2 matrix of moments
moms = hcat(resids, resids .* data.acceleration)

return moms
end

# initial parameter guess
Random.seed!(123)
theta0 = randn(20,2)

### using Optim.jl
# estimation options
myopts = GMMTools.GMMOptions(
path="C:/git-repos/GMMTools.jl/examples/temp/",
optimizer=:lsqfit,
optim_algo_bounds=true,
lower_bound=[-Inf, -Inf],
upper_bound=[Inf, Inf],
optim_autodiff=:forward,
write_iter=true,
clean_iter=true,
overwrite=true,
optim_opts=(show_trace=false,), # additional options for LsqFit in a NamedTuple
trace=1)

# estimate model
myfit = GMMTools.fit(df, ols_moments_fn, theta0, mode=:twostep, opts=myopts, run_parallel=true)

fsdfd

# ### using Optim.jl
# estimation options
myopts = GMMTools.GMMOptions(
path="C:/git-repos/GMMTools.jl/examples/temp/",
optim_algo=LBFGS(),
optim_autodiff=:forward,
write_iter=true,
clean_iter=true,
overwrite=true,
trace=1)

# estimate model
myfit = GMMTools.fit(df, ols_moments_fn, theta0, mode=:twostep, opts=myopts)

# compute asymptotic variance-covariance matrix and save in myfit.vcov
vcov_simple(df, ols_moments_fn, myfit)

# print table with results
regtable(myfit)



# compute Bayesian (weighted) bootstrap inference and save in myfit.vcov
myopts.trace = 1
vcov_bboot(df, ols_moments_fn, theta0, myfit, nboot=500, opts=myopts)
GMMTools.regtable(myfit) # print table with new bootstrap SEs -- very similar to asymptotic SEs in this case. Nice!

sdfsd
# using Plots
# histogram(myfit.vcov[:boot_fits].all_theta_hat[:, 1])

# bootstrap with weightes drawn at the level of clusters defined by the variable df.cylinders
myopts.trace = 0
vcov_bboot(df, ols_moments_fn, theta0, myfit, boot_weights=:cluster, cluster_var=:cylinders, nboot=500, opts=myopts)
myfit.vcov

GMMTools.regtable(myfit)

Loading

0 comments on commit e8fad9b

Please sign in to comment.