diff --git a/Project.toml b/Project.toml index 3add5de..b91294d 100644 --- a/Project.toml +++ b/Project.toml @@ -1,7 +1,7 @@ name = "PointBasedValueIteration" uuid = "835c131e-675f-4498-8e2c-c054c75556e1" authors = ["Dominik Straub and Tomáš Omasta "] -version = "0.2.0" +version = "0.2.1" [deps] BeliefUpdaters = "8bb6e9a1-7d73-552c-a44a-e5dc5634aac4" diff --git a/src/pbvi.jl b/src/pbvi.jl index 8add4b8..80bfb71 100644 --- a/src/pbvi.jl +++ b/src/pbvi.jl @@ -35,21 +35,6 @@ end ==(a::AlphaVec, b::AlphaVec) = (a.alpha,a.action) == (b.alpha, b.action) Base.hash(a::AlphaVec, h::UInt) = hash(a.alpha, hash(a.action, h)) -convert(::Type{Array{Float64, 1}}, d::BoolDistribution, pomdp) = [1 - d.p, d.p] -convert(::Type{Array{Float64, 1}}, d::DiscreteUniform, pomdp) = [pdf(d, stateindex(pomdp, s)) for s in states(pomdp)] -convert(::Type{Array{Float64, 1}}, d::SparseCat, pomdp) = d.probs - -convert(::Type{Array{Float64, 1}}, d::InStageDistribution{DiscreteUniform}, m::FixedHorizonPOMDPWrapper) = vec([pdf(d, s) for s in states(m)]) - -function convert(::Type{Array{Float64, 1}}, d::InStageDistribution{BoolDistribution}, m::FixedHorizonPOMDPWrapper) - if stage(d) == 1 - append!([1 - d.d.p[1], d.d.p[1]], zeros(length(states(m)) - 2)) - else - append!(append!(zeros((stage(d) - 1) * length(stage_states(m, 1))), [1 - d.d.p[1], d.d.p[1]]), zeros((horizon(m) - stage(d) + 1) * length(stage_states(m, 1)))) - end -end - - function _argmax(f, X) return X[argmax(map(f, X))] end @@ -197,9 +182,9 @@ function solve(solver::PBVISolver, pomdp::POMDP) Γ = [fill(α_init, length(S)) for a in A] #init belief, if given distribution, convert to vector - init = convert(Array{Float64, 1}, initialstate(pomdp), pomdp) - B = [DiscreteBelief(pomdp, init)] - Bs = Set([init]) + init = initialize_belief(DiscreteUpdater(pomdp), initialstate(pomdp)) + B = [init] + Bs = Set([init.b]) if solver.verbose println("Running PBVI solver on $(typeof(pomdp)) problem with following settings:\n max_iterations = $(solver.max_iterations), ϵ = $(solver.ϵ), verbose = $(solver.verbose)\n+----------------------------------------------------------+") end diff --git a/test/runtests.jl b/test/runtests.jl index 45889a7..a5a9302 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -3,50 +3,11 @@ using POMDPModels using POMDPs using SARSOP using BeliefUpdaters -using POMDPModelTools: Deterministic -using POMDPSimulators: RolloutSimulator +using POMDPModelTools +using POMDPSimulators using FiniteHorizonPOMDPs - using PointBasedValueIteration -@testset "Convert test" begin - @testset "Infinite Horizon POMDP tests" begin - tigerPOMDP = TigerPOMDP() - babyPOMDP = BabyPOMDP() - minihallwayPOMDP = MiniHallway() - - @test convert(Array{Float64, 1}, initialstate(tigerPOMDP), tigerPOMDP) == [0.5, 0.5] - @test convert(Array{Float64, 1}, initialstate(babyPOMDP), babyPOMDP) == [1., 0.] - @test convert(Array{Float64, 1}, initialstate(minihallwayPOMDP), minihallwayPOMDP) == append!(fill(1/12, 12), zeros(1)) - end - - @testset "Finite Horizon POMDP tests" begin - @testset "Finite Horizon POMDP initial state convert tests" begin - tigerPOMDP = fixhorizon(TigerPOMDP(), 1) - babyPOMDP = fixhorizon(BabyPOMDP(), 1) - minihallwayPOMDP = fixhorizon(MiniHallway(), 1) - - @test convert(Array{Float64, 1}, initialstate(tigerPOMDP), tigerPOMDP) == [0.5, 0.5, 0., 0.] - @test convert(Array{Float64, 1}, initialstate(babyPOMDP), babyPOMDP) == [1., 0., 0., 0.] - @test convert(Array{Float64, 1}, initialstate(minihallwayPOMDP), minihallwayPOMDP) == append!(fill(1/12, 12), zeros(14)) - end - - @testset "Finite Horizon POMDP other than initial stage distribution tests" begin - tigerPOMDP = fixhorizon(TigerPOMDP(), 2) - babyPOMDP = fixhorizon(BabyPOMDP(), 2) - minihallwayPOMDP = fixhorizon(MiniHallway(), 2) - - tigerbelief = FiniteHorizonPOMDPs.InStageDistribution(FiniteHorizonPOMDPs.distribution(initialstate(tigerPOMDP)), 2) - babybelief = FiniteHorizonPOMDPs.InStageDistribution(FiniteHorizonPOMDPs.distribution(initialstate(babyPOMDP)), 2) - minihallwaybelief = FiniteHorizonPOMDPs.InStageDistribution(FiniteHorizonPOMDPs.distribution(initialstate(minihallwayPOMDP)), 2) - - @test convert(Array{Float64, 1}, tigerbelief, tigerPOMDP) == [0., 0., 0.5, 0.5, 0., 0.] - @test convert(Array{Float64, 1}, babybelief, babyPOMDP) == [0., 0., 1., 0., 0., 0.] - @test convert(Array{Float64, 1}, minihallwaybelief, minihallwayPOMDP) == append!(append!(zeros(13), fill(1/12, 12)), zeros(14)) - end - end -end - @testset "Comparison with SARSOP" begin pomdps = [TigerPOMDP(), BabyPOMDP(), MiniHallway()]