Skip to content

Commit

Permalink
read/write changes, document errors, minor regtable improvements
Browse files Browse the repository at this point in the history
  • Loading branch information
Gabriel E Kreindler authored and Gabriel E Kreindler committed Dec 31, 2023
1 parent a45841c commit 9ad96dd
Show file tree
Hide file tree
Showing 7 changed files with 123 additions and 42 deletions.
18 changes: 15 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -73,11 +73,23 @@ myfit = GMMTools.fit(MYDATA, mom_fn, theta0, mode=:twostep, opts=myopts)
```

## Writing and reading results
`GMMTools.fit(...)` saves the estimation results in two files: `fit.csv` contains a table with one row per set of initial conditions, and `fit.json` contains several estimation parameters and results. `GMMTools.write(myfit::GMMFit, opts::GMMOptions, filename)` is the lower level function to write `GMMFit` objects to files.
`GMMTools.fit(...)` saves the estimation results in two files: `fit.csv` contains a table with one row per set of initial conditions, and `fit.json` contains several estimation parameters and results. `GMMTools.write(myfit::GMMFit, opts::GMMOptions; subpath="fit")` is the lower level function to write `GMMFit` objects to files.

`GMMTools.vcov_simple(...)` saves a `vcov.json` file that includes, among other objects, the variance-covariance matrix `myfit.vcov.V`. `GMMTools.vcov_bboot(...)` also saves two files `vcov_boot_fits_df.csv` (all individual runs for all bootstrap runs) and `vcov_boot_weights.csv` (rows are bootstrap runs, columns are data observations). The lower level function is `GMMTools.write(myvcov::GMMvcov, opts::GMMOptions)`.
`GMMTools.vcov_simple(...)` saves a `vcov.json` file that includes, among other objects, the variance-covariance matrix `myfit.vcov.V`. `GMMTools.vcov_bboot(...)` also saves two files `vcov_boot_fits_df.csv` (all individual runs for all bootstrap runs) and `vcov_boot_weights.csv` (rows are bootstrap runs, columns are data observations). The lower level function is `GMMTools.write(myvcov::GMMvcov, opts::GMMOptions; subpath="vcov")`.

`GMMTools.read_fit(full_path; subpath="fit", show_trace=false)` reads estimation results and loads them into a `GMMFit` object. `GMMTools.read_vcov(full_path; subpath="vcov", show_trace=false)` reads vcov results and loads them into a `GMMvcov` object. Note that `GMMTools.read_fit()` attempts to also read the vcov from the same folder. Otherwise, read the vcov separately and attach it using
```julia
myfit = GMMTools.read_fit(mypath1)
myfit.vcov = GMMTools.read_vcov(mypath2)
```

## Capturing errors
By default, any error during optimization stops the entire estimation (or inference) command.

Set the `GMMOptions` field `throw_errors=false` to capture these errors and write them to file, but not interrupt the rest of the estimation procedure.
- when using multiple initial conditions, all iterations that error are recorded in `myfit.fits_df` with `errored=true` and `obj_value=Inf`. If all iterations error, we have `myfit.errored=true` and several other fields are recorded as `missing`
- for bootstrap results, similar rules apply. Note that inference objects (SEs, vcov, etc.) are computed dropping the bootstrap runs that errored. `@warn` messages should be displayed to remind the user that this is happenening. It is the user's responsibility to ensure this behavior is ok for their use case.

`GMMTools.read_fit(opts::GMMOptions; filepath="")` reads estimation results and loads them into a `GMMFit` object. `GMMTools.read_vcov(opts::GMMOptions; filepath="")` reads vcov results and loads them into a `GMMvcov` object.

# Package To-do list

Expand Down
24 changes: 12 additions & 12 deletions examples/example_ols_2step.jl
Original file line number Diff line number Diff line change
Expand Up @@ -55,13 +55,9 @@ end
# estimate model
myfit = GMMTools.fit(df, ols_moments_fn, theta0, mode=:twostep, opts=myopts)

myfit.theta_hat
myfit.fits_df
# ols_moments_fn(df, [0.06364138660614915, 0.2604202723600453])
sdfd

myopts.path *= "step1/"
myfit2 = GMMTools.read_fit(myopts)
# read fit from file
mypath = "C:/git-repos/GMMTools.jl/examples/temp/step2/"
myfit2 = GMMTools.read_fit(mypath)

### using Optim.jl
# estimation options
Expand All @@ -78,11 +74,16 @@ sdfd
# estimate model
myfit = GMMTools.fit(df, ols_moments_fn, theta0, mode=:twostep, opts=myopts);


# compute asymptotic variance-covariance matrix and save in myfit.vcov
vcov_simple(df, ols_moments_fn, myfit)
vcov_simple(df, ols_moments_fn, myfit, opts=myopts)

GMMTools.write(myfit.vcov, myopts.path)

GMMTools.write(myfit.vcov, myopts)
# read vcov from file
mypath = "C:/git-repos/GMMTools.jl/examples/temp/"
myvcov2 = GMMTools.read_vcov(mypath)
myfit.vcov = myvcov2

# print table with results
regtable(myfit) |> display
Expand All @@ -102,7 +103,7 @@ sdfd


# read vcov with bootstrop from file
myfit.vcov = GMMTools.read_vcov(myopts)
myfit.vcov = GMMTools.read_vcov(myopts.path);
regtable(myfit) |> display

# using Plots
Expand All @@ -113,5 +114,4 @@ sdfd
# vcov_bboot(df, ols_moments_fn, theta0, myfit, boot_weights=:cluster, cluster_var=:cylinders, nboot=500, opts=myopts)
# myfit.vcov

# GMMTools.regtable(myfit)

# GMMTools.regtable(myfit)
41 changes: 41 additions & 0 deletions examples/example_regtable.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
using Pkg
Pkg.activate(".")
Pkg.resolve()
Pkg.instantiate()

using Revise
using CSV
using DataFrames
using FixedEffectModels # for benchmarking
using RegressionTables

using GMMTools

# load data, originally from: https://www.kaggle.com/datasets/uciml/autompg-dataset/?select=auto-mpg.csv
df = CSV.read("examples/auto-mpg.csv", DataFrame)
df[!, :constant] .= 1.0


# Run plain OLS for comparison
reg_ols = reg(df, term(:mpg) ~ term(:acceleration))
regtable(reg_ols)

# read fit from file
mypath = "C:/git-repos/GMMTools.jl/examples/temp/step2/"
myfit = GMMTools.read_fit(mypath)

# read vcov from file
mypath = "C:/git-repos/GMMTools.jl/examples/temp/"
myfit.vcov = GMMTools.read_vcov(mypath)

myfit.theta_names = ["(Intercept)", "acceleration"]

# print table with results (combine both OLS and GMM results)
# want to use a new label for the estimator: doesn't work
# RegressionTables.label_distribution(render::AbstractRenderType, d::GMMTools.GMMFit) = "GMM"
# default: print_estimator_section = false

# mylabels = Dict("theta_1" => "(Intercept)", "theta_2" => "acceleration")
regtable(reg_ols, myfit, render = AsciiTable(), below_statistic = ConfInt) |> display


13 changes: 6 additions & 7 deletions src/functions_estimation.jl
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,8 @@ Base.@kwdef mutable struct GMMOptions
path::String = "" # path to save results
write_iter::Bool = false # write to file each result (each initial run)
clean_iter::Bool = false # delete individual run files at the end of the estimation
overwrite::Bool = false # overwrite existing results file and individual run files
throw_errors::Bool = false # throw optimization errors (if false, save them to file but continue with the other runs)
overwrite::Bool = true # overwrite existing results file and individual run files
throw_errors::Bool = true # throw optimization errors (if false, save them to file but continue with the other runs)

optimizer::Symbol = :optim # optimizer backend: :optim or :lsqfit (LM)
optim_algo = LBFGS() # Optim.jl algorithm
Expand All @@ -14,7 +14,6 @@ Base.@kwdef mutable struct GMMOptions
theta_lower = nothing # nothing or vector of lower bounds
theta_upper = nothing # nothing or vector of upper bounds


theta_names::Union{Vector{String}, Nothing} = nothing # names of parameters

trace::Integer = 0
Expand Down Expand Up @@ -54,7 +53,7 @@ Base.@kwdef mutable struct GMMFit
time_it_took::Union{Float64, Missing}

# results from multiple initial conditions (DataFrame)
fits_df = nothing # TODO: switch to PrettyTables.jl and OrderedDict
fits_df = nothing
idx = nothing # aware of which iteration number this is

# variance covariance matrix
Expand Down Expand Up @@ -251,7 +250,7 @@ function fit_twostep(
fit_step2 = deepcopy(fit_step1)
fit_step2.mode = :twostep2
# save results to file?
(opts.path != "") && write(fit_step2, opts)
(opts.path != "") && write(fit_step2, opts.path)

return fit_step2
end
Expand Down Expand Up @@ -358,7 +357,7 @@ function fit_onestep(
stats_at_theta_hat(best_model_fit, data, mom_fn)

# save results to file?
(opts.path != "") && write(best_model_fit, opts)
(opts.path != "") && write(best_model_fit, opts.path)

# delete all intermediate files with individual iteration results
opts.clean_iter && clean_iter(opts)
Expand Down Expand Up @@ -408,7 +407,7 @@ function fit_onerun(
# write intermediate results to file
if opts.write_iter
(opts.trace > 0) && println(" Done and done writing to file.")
write(model_fit, opts, subpath="__iter__/results_" * string(idx)) # this does not contain moms_hat (good, saves space)
write(model_fit, opts.path, subpath="__iter__/results_" * string(idx)) # this does not contain moms_hat (good, saves space)
else
(opts.trace > 0) && println(" Done. ")
end
Expand Down
30 changes: 27 additions & 3 deletions src/functions_inference.jl
Original file line number Diff line number Diff line change
Expand Up @@ -44,11 +44,24 @@ function jacobian(data, mom_fn::Function, myfit::GMMFit)

end

function vcov_simple(data, mom_fn::Function, myfit::GMMFit)
function vcov_simple(
data,
mom_fn::Function,
myfit::GMMFit;
opts::GMMOptions=default_gmm_opts()
)

# cannot compute if estimation errored
myfit.errored && error("Cannot compute vcov_simple because estimation errored.")

# try to read from file
myvcov = read_fit(opts.path)
if !isnothing(myvcov) && myvcov.method == :simple
# TODO: use opts.overwrite option to error if vcov already exists but is not :simple
myfit.vcov = myvcov
return
end

# jacobian
J = jacobian(data, mom_fn, myfit)

Expand All @@ -75,6 +88,9 @@ function vcov_simple(data, mom_fn::Function, myfit::GMMFit)
Σ = Σ,
ses = sqrt.(diag(V)))

# save results to file?
(opts.path != "") && write(myfit.vcov, opts.path)

return
end

Expand Down Expand Up @@ -259,6 +275,14 @@ function vcov_bboot(
run_parallel=true,
opts::GMMOptions=default_gmm_opts())

# try to read from file
myvcov = read_fit(opts.path)
if !isnothing(myvcov) && myvcov.method == :bayesian_bootstrap
# TODO: use opts.overwrite option to error if vcov already exists but is not bayesian_bootstrap
myfit.vcov = myvcov
return
end

# copy options so we can modify them (trace and path)
opts = deepcopy(opts)

Expand Down Expand Up @@ -324,7 +348,7 @@ function vcov_bboot(
)

# save results to file?
(opts.path != "") && write(myfit.vcov, opts)
(opts.path != "") && write(myfit.vcov, opts.path)

# delete all intermediate files with individual iteration results
if opts.clean_iter
Expand All @@ -337,7 +361,7 @@ function vcov_bboot(
end
end

return boot_fits
return
end

function bboot(
Expand Down
8 changes: 4 additions & 4 deletions src/functions_regtable.jl
Original file line number Diff line number Diff line change
Expand Up @@ -48,8 +48,8 @@ has_fe(m::GMMRegModel) = false
# RegressionTables.replace_name(x::Tuple{Vararg{Any}}, a::Dict{String, String}, b::Dict{String, String}) = [RegressionTables.replace_name(x[i], a, b) for i=1:length(x)]
# RegressionTables.formula(m::GMMModel) = term(m.responsename) ~ sum(term.(String.(m.coefnames)))

RegressionTables._responsename(x::GMMRegModel) = string(responsename(x))
RegressionTables._coefnames(x::GMMRegModel) = string.(coefnames(x))
RegressionTables._responsename(x::GMMRegModel) = RegressionTables.CoefName(string(responsename(x)))
RegressionTables._coefnames(x::GMMRegModel) = RegressionTables.CoefName.(string.(coefnames(x)))

StatsAPI.coef(m::GMMRegModel) = m.coef
StatsAPI.coefnames(m::GMMRegModel) = m.coefnames
Expand Down Expand Up @@ -109,7 +109,7 @@ function GMMRegModel(r::GMMFit)
fe=DataFrame(),
fekeys=[],
coefnames=r.theta_names,
responsename="a",
responsename="", # no column header
# formula::FormulaTerm # Original formula
# formula_schema::FormulaTerm # Schema for predict
contrasts=Dict(),
Expand All @@ -133,7 +133,7 @@ function RegressionTables.regtable(rrs::Vararg{Union{RegressionModel, GMMFit}};

rrs_converted = [isa(r, GMMFit) ? GMMRegModel(r) : r for r=rrs]

return RegressionTables.regtable(rrs_converted..., kwargs...)
return RegressionTables.regtable(rrs_converted...; kwargs...)
end


Expand Down
31 changes: 18 additions & 13 deletions src/io.jl
Original file line number Diff line number Diff line change
Expand Up @@ -45,13 +45,12 @@ end
Write GMMFit object to files: JSON for most fields + CSV for fits_df table
All paths should exist.
"""
function write(myfit::GMMFit, opts::GMMOptions; subpath="fit")
function write(myfit::GMMFit, full_path; subpath="fit")

# paths
if opts.path == ""
if full_path == ""
return
end
full_path = opts.path
end
(full_path[end] == '/') && (full_path *= '/') # ? platform issues?
full_path *= subpath

Expand Down Expand Up @@ -87,12 +86,11 @@ end

### Variance-covariance results

function write(myvcov::GMMvcov, opts::GMMOptions; subpath="vcov")
function write(myvcov::GMMvcov, full_path; subpath="vcov")

if opts.path == ""
if full_path == ""
return
end
full_path = opts.path
(full_path[end] == '/') && (full_path *= '/') # ? platform issues?
full_path *= subpath

Expand Down Expand Up @@ -196,11 +194,10 @@ function dict2fit(myfit_dict)
end

"""
filepath should not include the extension (.csv or .json)
Example: fit object saved under "C:/temp/fit.json" and "C:/temp/fit.csv". Then call `read_fit("C:/temp/")`.
"""
function read_fit(opts::GMMOptions; subpath="fit", show_trace=false)
function read_fit(full_path; subpath="fit", show_trace=false)

full_path = opts.path
(full_path[end] == '/') && (full_path *= '/') # ? platform issues?
full_path *= subpath

Expand All @@ -225,13 +222,19 @@ function read_fit(opts::GMMOptions; subpath="fit", show_trace=false)
myfit.moms_hat = readdlm(full_path * "_moms_hat.csv", ',', Float64)
end

@info "Read fit from file from " * full_path

# try to automatically read vcov object (myfit.vcov = nothing if this fails)
myfit.vcov = read_vcov(full_path, show_trace=show_trace)

return myfit
end


function read_vcov(opts::GMMOptions; subpath="vcov", show_trace=false)
"""
Example: vcov object saved under "C:/temp/vcov.json". Then call `read_vcov("C:/temp/")`.
"""
function read_vcov(full_path; subpath="vcov", show_trace=false)

full_path = opts.path
(full_path[end] == '/') && (full_path *= '/') # ? platform issues?
full_path *= subpath

Expand Down Expand Up @@ -286,5 +289,7 @@ function read_vcov(opts::GMMOptions; subpath="vcov", show_trace=false)
boot_moms_hat_df = boot_moms_hat_df)
end

@info "Read vcov type [" * string(myvcov.method) * "] from file from " * full_path

return myvcov
end

0 comments on commit 9ad96dd

Please sign in to comment.