From 2f96b9be7e93b473fa609d424f722256ed5ec193 Mon Sep 17 00:00:00 2001 From: Thore Kockerols Date: Tue, 29 Oct 2024 17:32:19 +0000 Subject: [PATCH] reshuffled training loop --- test/neural_net_solution.jl | 165 +++++++++++++++++------------------- 1 file changed, 79 insertions(+), 86 deletions(-) diff --git a/test/neural_net_solution.jl b/test/neural_net_solution.jl index 4dc5ba68..c99e090d 100644 --- a/test/neural_net_solution.jl +++ b/test/neural_net_solution.jl @@ -4,123 +4,116 @@ using Flux using ParameterSchedulers using Optim using FluxOptTools +using StatsPlots + using LinearAlgebra +BLAS.set_num_threads(Threads.nthreads()) include("../models/Smets_Wouters_2007.jl") + +normalise = true # use asinh and tanh at the beginning if there is no normalisation +recurrent = false # the internal state needs to be reset but carries a lot of information + n_shocks = length(get_shocks(Smets_Wouters_2007)) n_vars = length(get_variables(Smets_Wouters_2007)) -n_hidden = 64 - -neural_net = Chain( Dense(n_vars + n_shocks, n_hidden, asinh), - Dense(n_hidden, n_hidden, asinh), - Dense(n_hidden, n_hidden, tanh), - # Dense(n_hidden, n_hidden, celu), - # Dense(n_hidden, n_hidden, celu), - # Dense(n_hidden, n_hidden, celu), - Dense(n_hidden, n_hidden, celu), - Dense(n_hidden, n_hidden, celu), - Dense(n_hidden, n_hidden, celu), - Dense(n_hidden, n_vars)) - -# n_hidden = 64 - -# neural_net = Chain( Dense(n_vars + n_shocks, n_hidden, asinh), -# Flux.LSTM(n_hidden, n_hidden ÷ 2), -# Flux.GRU(n_hidden ÷ 2, n_hidden ÷ 2), # optional -# Dense(n_hidden ÷ 2, n_hidden ÷ 2, celu), -# Dense(n_hidden ÷ 2, n_hidden, celu), -# Dense(n_hidden, n_hidden, celu), # optional -# Dense(n_hidden, n_vars)) - -s = ParameterSchedulers.Stateful(CosAnneal(.001, 1e-6, 1000)) -# s = ParameterSchedulers.Stateful(SinDecay2(.001, 1e-6, 500)) +n_hidden = n_vars * 2 + +if recurrent + neural_net = Chain( Dense(n_vars + n_shocks, n_hidden, asinh), + Flux.LSTM(n_hidden, n_hidden ÷ 2), + Flux.GRU(n_hidden ÷ 2, n_hidden ÷ 2), # optional + Dense(n_hidden ÷ 2, n_hidden ÷ 2, celu), + Dense(n_hidden ÷ 2, n_hidden, celu), + Dense(n_hidden, n_hidden, celu), # optional + Dense(n_hidden, n_vars)) +else + if normalise + neural_net = Chain( Dense(n_vars + n_shocks, n_hidden, celu), + Dense(n_hidden, n_hidden, celu), + Dense(n_hidden, n_hidden, celu), + Dense(n_hidden, n_hidden, celu), + Dense(n_hidden, n_hidden, celu), + Dense(n_hidden, n_hidden, celu), + Dense(n_hidden, n_vars)) + else + neural_net = Chain( Dense(n_vars + n_shocks, n_hidden, asinh), + Dense(n_hidden, n_hidden, asinh), + Dense(n_hidden, n_hidden, tanh), + Dense(n_hidden, n_hidden, celu), + Dense(n_hidden, n_hidden, celu), + Dense(n_hidden, n_hidden, celu), + Dense(n_hidden, n_vars)) + end +end -# optim = Flux.setup(Flux.AdamW(0.001, (0.9, 0.999), 0.01), neural_net) # will store optimiser momentum, etc. +optim = Flux.setup(Flux.Adam(), neural_net) +# optim = Flux.setup(Flux.AdamW(.001,(.9,.999),.01), neural_net) -optim = Flux.setup(Flux.Adam(), neural_net) # will store optimiser momentum, etc. +nn_params = sum(length.(Flux.params(neural_net))) -normalise = false +n_batches = 10 +n_simul = n_batches * nn_params ÷ (n_vars * 10) +n_burnin = 500 +scheduler_period = 15000 -for i in 1:10 - n_simul = 10000 - n_burnin = 500 +s = ParameterSchedulers.Stateful(CosAnneal(.001, 1e-8, scheduler_period)) +# s = ParameterSchedulers.Stateful(SinDecay2(.001, 1e-6, 500)) - shcks = randn(n_shocks, n_burnin + n_simul) +shcks = randn(n_shocks, n_burnin + n_simul) - sims = get_irf(Smets_Wouters_2007, shocks = shcks, periods = 0, levels = true) +sims = get_irf(Smets_Wouters_2007, shocks = shcks, periods = 0, levels = true) - if normalise - normalised_sims = Flux.normalise(collect(sims[:,n_burnin:end,1]), dims=1) +if normalise + mn = get_mean(Smets_Wouters_2007, derivatives = false) + + stddev = get_std(Smets_Wouters_2007, derivatives = false) + + normalised_sims = collect((sims[:,n_burnin:end,1] .- mn) ./ stddev) - sim_slices = Float32.(vcat(normalised_sims[:,1:end - 1], shcks[:,n_burnin + 1:n_burnin + n_simul])) + inputs = Float32.(vcat(normalised_sims[:,1:end - 1], shcks[:,n_burnin + 1:n_burnin + n_simul])) - out_slices = Float32.(normalised_sims[:,2:end]) - else - sim_slices = Float32.(vcat(collect(sims[:,n_burnin:n_burnin + n_simul - 1,1]), shcks[:,n_burnin + 1:n_burnin + n_simul])) - - out_slices = Float32.(collect(sims[:,n_burnin+1:n_burnin + n_simul,1])) - end + outputs = Float32.(normalised_sims[:,2:end]) +else + inputs = Float32.(vcat(collect(sims[:,n_burnin:n_burnin + n_simul - 1,1]), shcks[:,n_burnin + 1:n_burnin + n_simul])) + + outputs = Float32.(collect(sims[:,n_burnin+1:n_burnin + n_simul,1])) +end - # Training loop, using the whole data set 1000 times: - losses = [] +train_loader = Flux.DataLoader((outputs, inputs), batchsize = n_simul ÷ n_batches, shuffle = true) - for epoch in 1:2000 +losses = [] +# Training loop +for epoch in 1:scheduler_period + for (out,in) in train_loader lss, grads = Flux.withgradient(neural_net) do nn - sqrt(Flux.mse(nn(sim_slices), out_slices)) + sqrt(Flux.mse(out, nn(in))) end - Flux.adjust!(optim, ParameterSchedulers.next!(s)) - Flux.update!(optim, neural_net, grads[1]) push!(losses, lss) # logging, outside gradient context - - if epoch % 100 == 0 println("Epoch: $epoch; Loss: $lss; Opt state: $(optim.layers[1].weight.rule)") end end -end - - -sqrt(sum(abs2, normalised_out_slices - neural_net(normalised_sim_slices)) / (n_simul * n_vars)) # RMSE -sqrt(Flux.mse(neural_net(normalised_sim_slices), normalised_out_slices)) - -pars = Flux.params(neural_net) -lossfun, gradfun, fg!, p0 = optfuns(loss, pars) -res = Optim.optimize(Optim.only_fg!(fg!), p0, Optim.Options(iterations=100, show_trace=true)) -# end - - - -sim_slices = Float32.(vcat(collect(sims[:,n_burnin:n_burnin + n_simul - 1,1]), shcks[:,n_burnin + 1:n_burnin + n_simul])) - -out_slices = Float32.(collect(sims[:,n_burnin+1:n_burnin + n_simul,1])) + Flux.adjust!(optim, ParameterSchedulers.next!(s)) -maximum((normalised_out_slices - neural_net(normalised_sim_slices))[:,1]) - - -maximum(neural_net(sim_slices[:,1])) -maximum(out_slices[:,1]) + if epoch % 100 == 0 println("Epoch: $epoch; Loss: $(sum(losses[end-99:end])/100); Opt state: $(optim.layers[1].weight.rule)") end +end - n_simul = 1000 - n_burnin = 500 +plot(losses[500:end], yaxis=:log) - shcks = randn(n_shocks, n_burnin + n_simul) +norm((outputs - neural_net(inputs)) .* stddev) / norm(outputs .* stddev .+ mn) - sims = get_irf(Smets_Wouters_2007, shocks = shcks, periods = 0, levels = true) +norm(outputs - neural_net(inputs)) / norm(outputs) - if normalise - normalised_sims = Flux.normalise(collect(sims[:,n_burnin:end,1]), dims=1) +maximum((outputs[:,1] .* stddev - neural_net(inputs[:,1]) .* stddev)) - sim_slices = Float32.(vcat(normalised_sims[:,1:end - 1], shcks[:,n_burnin + 1:n_burnin + n_simul])) - - out_slices = Float32.(normalised_sims[:,2:end]) - else - sim_slices = Float32.(vcat(collect(sims[:,n_burnin:n_burnin + n_simul - 1,1]), shcks[:,n_burnin + 1:n_burnin + n_simul])) - - out_slices = Float32.(collect(sims[:,n_burnin+1:n_burnin + n_simul,1])) - end -norm(out_slices - neural_net(sim_slices)) / norm(out_slices) \ No newline at end of file +# does it converge to a steady state +stt = Float32.(zero(outputs[:,1])) +shck = zeros(Float32,n_shocks) +for i in 1:100000 + stt = neural_net(vcat(stt, shck)) +end \ No newline at end of file