From eae4ae9a1f626ddc627a172b7ef9ca75ab894ba8 Mon Sep 17 00:00:00 2001 From: Thore Kockerols Date: Mon, 23 Sep 2024 15:43:42 +0000 Subject: [PATCH] download us data from FRED --- test/download_data.jl | 134 +++++++++++++++++++++++++++++++ test/test_sw07_estimation_2nd.jl | 69 +++++++++------- 2 files changed, 172 insertions(+), 31 deletions(-) create mode 100644 test/download_data.jl diff --git a/test/download_data.jl b/test/download_data.jl new file mode 100644 index 00000000..f81e2498 --- /dev/null +++ b/test/download_data.jl @@ -0,0 +1,134 @@ +using Downloads +using CSV +using DataFrames +import Dates +using AxisKeys + +function download_fred_data(series_names::Vector{String}; + wide_format::Bool=false, + quarterly_only::Bool=true)::DataFrame + all_data = DataFrame() + + for series_name in series_names + # Construct the URL for the FRED series data + url = "https://fred.stlouisfed.org/series/$series_name/downloaddata/$series_name.csv" + + # Download the CSV data from the URL + http_response = Downloads.download(url) + + # Convert the downloaded data into a DataFrame + data = CSV.read(http_response, DataFrame) + + # Add a column for the series name + data[!, :series_name] .= series_name + + # Append to the final DataFrame + all_data = vcat(all_data, data) + end + + # Sort the DataFrame by date + sort!(all_data, [:series_name, :DATE]) + + if quarterly_only + # Filter to include only quarterly data (1st of Jan, Apr, Jul, Oct) + quarterly_dates = filter(row -> Dates.month(row.DATE) in [1, 4, 7, 10], all_data) + all_data = quarterly_dates + end + + if wide_format + # Pivot the DataFrame to wide format + # all_data_wide = unstack(all_data, :DATE, :series_name, :VALUE) + all_data_wide = unstack(all_data, :series_name, :DATE, :VALUE) + return all_data_wide + else + return all_data + end +end + +# https://users.cla.umn.edu/~erm/data/sr409/sw_orig/readme.pdf +# https://fredaccount.stlouisfed.org/public/datalist/803 +# https://www.wiwi.uni-bonn.de/bgsepapers/boncrc/CRCTR224_2022_356.pdf p37 +# https://www.ecb.europa.eu/pub/pdf/scpwps/ecbwp722.pdf p48 + +series = ["GDPC1","GDPC96","CE16OV","HOANBS","HOABS","AWHNONAG","FEDFUNDS","PRS85006152","PRS84006152","FPI","B007RG3Q086SBEA","GDPDEF","PRS85006023","PRS84006023","COMPRNFB","COMPNFB","PCECC96","CNP16OV"] +# Example usage: +df = download_fred_data(series, wide_format = false, quarterly_only = true) + + +all_data_wide = unstack(df, :DATE, :series_name, :VALUE) +sort!(all_data_wide, :DATE) + + +all_data_wide.interest_rate = @. ((all_data_wide.FEDFUNDS / 100 + 1) ^.25 - 1) * 100 + +all_data_wide.real_GDP_per_capita = all_data_wide.GDPC1 ./ all_data_wide.CNP16OV +all_data_wide.real_GDP_per_capita_growth = [missing; diff(all_data_wide.real_GDP_per_capita) ./ all_data_wide.real_GDP_per_capita[1:end-1] * 100] + +all_data_wide.inflation = [missing; diff(all_data_wide.GDPDEF) ./ all_data_wide.GDPDEF[1:end-1] * 100] + +all_data_wide.real_investment_per_capita = all_data_wide.FPI ./ all_data_wide.B007RG3Q086SBEA ./ all_data_wide.CNP16OV +all_data_wide.real_investment_per_capita_growth = [missing; diff(all_data_wide.real_investment_per_capita) ./ all_data_wide.real_investment_per_capita[1:end-1] * 100] + +all_data_wide.real_consumption_per_capita = all_data_wide.PCECC96 ./ all_data_wide.CNP16OV +all_data_wide.real_consumption_per_capita_growth = [missing; diff(all_data_wide.real_consumption_per_capita) ./ all_data_wide.real_consumption_per_capita[1:end-1] * 100] + +all_data_wide.hours_worked = log.(all_data_wide.PRS85006023 .* all_data_wide.CE16OV ./ all_data_wide.CNP16OV) .* 100 .- 415 # 415 is about the average for the original sample + +all_data_wide.real_wage_per_capita_growth = @. ((all_data_wide.PRS85006152 / 100 + 1) ^.25 - 1) * 100 + + +subset_data_wide = all_data_wide[:,[:DATE, + :real_GDP_per_capita_growth, + :real_consumption_per_capita_growth, + :real_investment_per_capita_growth, + :hours_worked, + :inflation, + :real_wage_per_capita_growth, + :interest_rate + ]] + +complete_subset_data_wide = subset_data_wide[completecases(subset_data_wide),:] + +CSV.write("us_data.csv", complete_subset_data_wide) + +data = KeyedArray(Float64.(Matrix(complete_subset_data_wide[:, Not(:DATE)])'), Variable = Symbol.(names(complete_subset_data_wide)[2:end]), Time = complete_subset_data_wide[:, :DATE]) + + +# declare observables as written in model +observables = [:dy, :dc, :dinve, :labobs, :pinfobs, :dwobs, :robs] # note that :dw was renamed to :dwobs in linear model in order to avoid confusion with nonlinear model + +data = rekey(data, :Variable => observables) + +# all_data_wide.hours_worked = all_data_wide.PRS84006023 .* all_data_wide.CE16OV ./ all_data_wide.CNP16OV +# all_data_wide.hours_worked = all_data_wide.AWHNONAG .* all_data_wide.CE16OV ./ all_data_wide.CNP16OV +# all_data_wide.real_wage_index_per_capita = all_data_wide.COMPRNFB +# all_data_wide.real_wage_per_capita = all_data_wide.COMPNFB ./ all_data_wide.GDPDEF +# all_data_wide.real_wage_index_per_capita = all_data_wide.CE16OV .* all_data_wide.COMPRNFB ./ all_data_wide.CNP16OV +# all_data_wide.real_wage_per_capita_growth = [missing; diff(all_data_wide.real_wage_per_capita) ./ all_data_wide.real_wage_per_capita[2:end] * 100] + +# using StatsPlots +# plot(all_data_wide.real_wage_per_capita_growth[1:230]) +# plot(all_data_wide.hours_worked[1:230] .- 415) +# plot((all_data_wide.hours_worked) .- 415) +# plot(all_data_wide.interest_rate) +# plot(all_data_wide.real_GDP_per_capita_growth[1:230]) +# plot(all_data_wide.real_investment) +# plot(all_data_wide.real_consumption_per_capita_growth[1:230]) +# plot(all_data_wide.inflation) + +# (all_data_wide.hours_worked[5:230] |> sum) / 226 +# (all_data_wide.hours_worked[5:end-1] |> sum) / 306 + +# dy[0] = ctrend + 100 * (y[0] / y[-1] - 1) # fine + +# dc[0] = ctrend + 100 * (c[0] / c[-1] - 1) # fine + +# dinve[0] = ctrend + 100 * (inve[0] / inve[-1] - 1) # fine + +# pinfobs[0] = 100 * (pinf[0] - 1) # fine except for some data issues somewhere in the middle + +# robs[0] = 100 * (r[0] - 1) # fine + +# dwobs[0] = ctrend + 100 * (w[0] / w[-1] - 1) + +# labobs[0] = constelab + 100 * (lab[0] / lab[ss] - 1) # fine, except for constant diff --git a/test/test_sw07_estimation_2nd.jl b/test/test_sw07_estimation_2nd.jl index 2dca212f..70b151c5 100644 --- a/test/test_sw07_estimation_2nd.jl +++ b/test/test_sw07_estimation_2nd.jl @@ -18,15 +18,16 @@ println("Threads used: ", Threads.nthreads()) println("BLAS threads used: ", BLAS.get_num_threads()) # smpler = ENV["sampler"] # "pigeons" # -# smple = ENV["sample"] # "original" # +smple = ENV["sample"] # "original" # # mdl = ENV["model"] # "linear" # fltr = ENV["filter"] # "kalman" # -algo = ENV["algorithm"] # "kalman" # +algo = ENV["algorithm"] # "first_order" # # chns = Meta.parse(ENV["chains"]) # "4" # # scns = Meta.parse(ENV["scans"]) # "4" # smpls = Meta.parse(ENV["samples"]) # "4" # # println("Sampler: $smpler") +println("Estimation Sample: $smple") println("Samples: $smpls") # println("Model: $mdl") # println("Chains: $chns") @@ -35,25 +36,35 @@ println("Algorithm: $algo") # println("Scans: $scns") println(pwd()) -# load data -dat = CSV.read("./Github/MacroModelling.jl/test/data/usmodel.csv", DataFrame) +if smple == "original" + # load data + dat = CSV.read("./Github/MacroModelling.jl/test/data/usmodel.csv", DataFrame) -# load data -data = KeyedArray(Array(dat)',Variable = Symbol.(strip.(names(dat))), Time = 1:size(dat)[1]) + # load data + data = KeyedArray(Array(dat)',Variable = Symbol.(strip.(names(dat))), Time = 1:size(dat)[1]) -# declare observables as written in csv file -observables_old = [:dy, :dc, :dinve, :labobs, :pinfobs, :dw, :robs] # note that :dw was renamed to :dwobs in linear model in order to avoid confusion with nonlinear model + # declare observables as written in csv file + observables_old = [:dy, :dc, :dinve, :labobs, :pinfobs, :dw, :robs] # note that :dw was renamed to :dwobs in linear model in order to avoid confusion with nonlinear model -# Subsample -# subset observables in data -sample_idx = 47:230 # 1960Q1-2004Q4 + # Subsample + # subset observables in data + sample_idx = 47:230 # 1960Q1-2004Q4 -data = data(observables_old, sample_idx) + data = data(observables_old, sample_idx) -# declare observables as written in model -observables = [:dy, :dc, :dinve, :labobs, :pinfobs, :dwobs, :robs] # note that :dw was renamed to :dwobs in linear model in order to avoid confusion with nonlinear model + # declare observables as written in model + observables = [:dy, :dc, :dinve, :labobs, :pinfobs, :dwobs, :robs] # note that :dw was renamed to :dwobs in linear model in order to avoid confusion with nonlinear model -data = rekey(data, :Variable => observables) + data = rekey(data, :Variable => observables) +elseif smple == "full" # 1954Q4 - 2024Q2 + include("download_data.jl") +elseif smple == "no_pandemic" # 1954Q4 - 2020Q1 + include("download_data.jl") + data = data[:,<(Dates.Date("2020-04-01"))] +elseif smple == "update" # 1960Q1 - 2024Q2 + include("download_data.jl") + data = data[:,>(Dates.Date("1959-10-01"))] +end # Handling distributions with varying parameters using arraydist @@ -130,14 +141,14 @@ SW07_loglikelihood_middle = SW07_loglikelihood_function(data[:,100:end], Smets_W SW07_loglikelihood = SW07_loglikelihood_function(data, Smets_Wouters_2007, observables, fixed_parameters, Symbol(algo), Symbol(fltr)) -modeSW2007 = Turing.maximum_a_posteriori(SW07_loglikelihood_short, - Optim.SimulatedAnnealing()) +# modeSW2007 = Turing.maximum_a_posteriori(SW07_loglikelihood_short, +# Optim.SimulatedAnnealing()) -println("Mode loglikelihood (Simulated Annealing): $(modeSW2007.lp)") +# println("Mode loglikelihood (Simulated Annealing): $(modeSW2007.lp)") modeSW2007 = Turing.maximum_a_posteriori(SW07_loglikelihood_short, - Optim.NelderMead(), - initial_params = modeSW2007.values) + Optim.NelderMead())#, + # initial_params = modeSW2007.values) println("Mode loglikelihood (short sample): $(modeSW2007.lp)") @@ -174,17 +185,14 @@ samps = @time Turing.sample(SW07_loglikelihood, progress = true, initial_params = init_params) -varnames = [:z_ea, :z_eb, :z_eg, :z_eqs, :z_em, :z_epinf, :z_ew, :crhoa, :crhob, :crhog, :crhoqs, :crhoms, :crhopinf, :crhow, :cmap, :cmaw, :csadjcost, :csigma, :chabb, :cprobw, :csigl, :cprobp, :cindw, :cindp, :czcap, :cfc, :crpi, :crr, :cry, :crdy, :constepinf, :constebeta, :constelab, :ctrend, :cgy, :calfa] - -nms = names(samps) -nms[1:length(varnames)] .= varnames - -samps_renamed = MCMCChains.Chains(samps.value, nms) - println(samps) println("Mean variable values: $(mean(samps).nt.mean)") -dir_name = "sw07_$(algo)_$(smpls)_samples_$(fltr)_filter" +varnames = [:z_ea, :z_eb, :z_eg, :z_eqs, :z_em, :z_epinf, :z_ew, :crhoa, :crhob, :crhog, :crhoqs, :crhoms, :crhopinf, :crhow, :cmap, :cmaw, :csadjcost, :csigma, :chabb, :cprobw, :csigl, :cprobp, :cindw, :cindp, :czcap, :cfc, :crpi, :crr, :cry, :crdy, :constepinf, :constebeta, :constelab, :ctrend, :cgy, :calfa] +nms = copy(names(samps)) +samps = replacenames(samps, Dict(nms[1:length(varnames)] .=> varnames)) + +dir_name = "sw07_$(algo)_$(smpls)_samples_$(fltr)_filter_$(smple)_sample" if !isdir(dir_name) mkdir(dir_name) end @@ -195,11 +203,10 @@ println("Current working directory: ", pwd()) dt = Dates.format(Dates.now(), "yyyy-mm-dd_HH") serialize("samples_$(dt)h.jls", samps) -my_plot = StatsPlots.plot(samps_renamed) +my_plot = StatsPlots.plot(samps) StatsPlots.savefig(my_plot, "samples_$(dt)h.png") StatsPlots.savefig(my_plot, "../samples_latest.png") #Base.show(samps) #println(Base.show(samps)) -Base.show(stdout, MIME"text/plain"(), samps) -Base.show(stdout, MIME"text/plain"(), samps_renamed) \ No newline at end of file +Base.show(stdout, MIME"text/plain"(), samps) \ No newline at end of file