diff --git a/.github/workflows/DependaBot.yml b/.github/workflows/DependaBot.yml new file mode 100644 index 00000000..ff6499d6 --- /dev/null +++ b/.github/workflows/DependaBot.yml @@ -0,0 +1,7 @@ +# https://docs.github.com/github/administering-a-repository/configuration-options-for-dependency-updates +version: 2 +updates: + - package-ecosystem: "github-actions" + directory: "/" # Location of package manifests + schedule: + interval: "weekly" \ No newline at end of file diff --git a/.github/workflows/docs-benchmark.yml b/.github/workflows/docs-benchmark.yml deleted file mode 100644 index 07435238..00000000 --- a/.github/workflows/docs-benchmark.yml +++ /dev/null @@ -1,38 +0,0 @@ -name: docs-benchmark -on: - push: - tags: '*' - workflow_dispatch: -concurrency: - # Skip intermediate builds: always. - # Cancel intermediate builds: only if it is a pull request build. - group: ${{ github.workflow }}-${{ github.ref }} - cancel-in-progress: ${{ startsWith(github.ref, 'refs/pull/') }} -jobs: - docs: - name: Documentation - runs-on: ubuntu-latest - permissions: - contents: write - statuses: write - steps: - - uses: actions/checkout@v2 - - uses: julia-actions/setup-julia@v1 - with: - version: '1' - - uses: julia-actions/julia-buildpkg@v1 - - run: | - julia --threads=1 --project=benchmark -e ' - using Pkg - Pkg.instantiate() - include("benchmark/run_benchmarks.jl") - include("benchmark/process_benchmarks.jl")' - - uses: julia-actions/julia-docdeploy@v1 - env: - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - - run: | - julia --project=docs -e ' - using Documenter: DocMeta, doctest - using HiddenMarkovModels - DocMeta.setdocmeta!(HiddenMarkovModels, :DocTestSetup, :(using HiddenMarkovModels); recursive=true) - doctest(HiddenMarkovModels)' diff --git a/.github/workflows/docs-nobenchmark.yml b/.github/workflows/docs.yml similarity index 97% rename from .github/workflows/docs-nobenchmark.yml rename to .github/workflows/docs.yml index 1696c2b0..f6087980 100644 --- a/.github/workflows/docs-nobenchmark.yml +++ b/.github/workflows/docs.yml @@ -1,4 +1,4 @@ -name: docs-nobenchmark +name: docs on: push: branches: diff --git a/Project.toml b/Project.toml index 690b4dc2..e7aac87c 100644 --- a/Project.toml +++ b/Project.toml @@ -32,7 +32,6 @@ DocStringExtensions = "0.9" LinearAlgebra = "1.6" PrecompileTools = "1.1" Random = "1.6" -RequiredInterfaces = "0.1.3" Requires = "1.3" SimpleUnPack = "1.1" StatsAPI = "1.6" @@ -57,9 +56,8 @@ SimpleUnPack = "ce78b400-467f-4804-87d8-8f486da07d0a" SparseArrays = "2f01184e-e22b-5df5-ae63-d93ebab69eaf" StaticArrays = "90137ffa-7385-5640-81b9-e52037218182" Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2" -Suppressor = "fd094767-a336-5f1f-9728-57cf17d0bbfb" Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40" Zygote = "e88e6eb3-aa80-5325-afca-941959d7151f" [targets] -test = ["Aqua", "BenchmarkTools", "Distributions", "Documenter", "FiniteDifferences", "ForwardDiff", "HMMBase", "JET", "JuliaFormatter", "LinearAlgebra", "LogarithmicNumbers", "Pkg", "Random", "SimpleUnPack", "SparseArrays", "StaticArrays", "Statistics", "Suppressor", "Test", "Zygote"] +test = ["Aqua", "BenchmarkTools", "Distributions", "Documenter", "FiniteDifferences", "ForwardDiff", "HMMBase", "JET", "JuliaFormatter", "LinearAlgebra", "LogarithmicNumbers", "Pkg", "Random", "SimpleUnPack", "SparseArrays", "StaticArrays", "Statistics", "Test", "Zygote"] diff --git a/benchmark/HMMBenchmark/src/HMMBenchmark.jl b/benchmark/HMMBenchmark/src/HMMBenchmark.jl index c7920b8b..43ee9c7e 100644 --- a/benchmark/HMMBenchmark/src/HMMBenchmark.jl +++ b/benchmark/HMMBenchmark/src/HMMBenchmark.jl @@ -12,7 +12,7 @@ using BenchmarkTools using HiddenMarkovModels using SimpleUnPack -export define_suite +export define_suite, run_suite, parse_results include("hmms.jl") include("hmmbase.jl") diff --git a/benchmark/HMMBenchmark/src/hmmbase.jl b/benchmark/HMMBenchmark/src/hmmbase.jl index 33f72cce..a41d55bb 100644 --- a/benchmark/HMMBenchmark/src/hmmbase.jl +++ b/benchmark/HMMBenchmark/src/hmmbase.jl @@ -21,30 +21,37 @@ end function benchmarkables_hmmbase(; algos, N, D, T, K, I) rand_model_hmmbase(; N, D) if D == 1 - obs_mat = randn(K * T) + obs_mats = [randn(T) for k in 1:K] else - obs_mat = randn(K * T, D) + obs_mats = [randn(T, D) for k in 1:K] end + obs_mats_concat = randn(K * T, D) benchs = Dict() if "logdensity" in algos - benchs["logdensity"] = @benchmarkable HMMBase.forward(model, $obs_mat) setup = ( - model = rand_model_hmmbase(; N=$N, D=$D) - ) + benchs["logdensity"] = @benchmarkable begin + for k in 1:($K) + HMMBase.forward(model, $obs_mats[k]) + end + end setup = (model = rand_model_hmmbase(; N=$N, D=$D)) end if "viterbi" in algos - benchs["viterbi"] = @benchmarkable HMMBase.viterbi(model, $obs_mat) setup = ( - model = rand_model_hmmbase(; N=$N, D=$D) - ) + benchs["viterbi"] = @benchmarkable begin + for k in 1:($K) + HMMBase.viterbi(model, $obs_mats[k]) + end + end setup = (model = rand_model_hmmbase(; N=$N, D=$D)) end if "forward_backward" in algos - benchs["forward_backward"] = @benchmarkable HMMBase.posteriors(model, $obs_mat) setup = ( - model = rand_model_hmmbase(; N=$N, D=$D) - ) + benchs["forward_backward"] = @benchmarkable begin + for k in 1:($K) + HMMBase.posteriors(model, $obs_mats[k]) + end + end setup = (model = rand_model_hmmbase(; N=$N, D=$D)) end if "baum_welch" in algos - benchs["baum_welch"] = @benchmarkable HMMBase.fit_mle( - model, $obs_mat; maxiter=$I, tol=-Inf - ) setup = (model = rand_model_hmmbase(; N=$N, D=$D)) + benchs["baum_welch"] = @benchmarkable begin + HMMBase.fit_mle(model, $obs_mats_concat; maxiter=$I, tol=-Inf) + end setup = (model = rand_model_hmmbase(; N=$N, D=$D)) end return benchs end diff --git a/benchmark/HMMBenchmark/src/hmms.jl b/benchmark/HMMBenchmark/src/hmms.jl index 997fad09..f30ec528 100644 --- a/benchmark/HMMBenchmark/src/hmms.jl +++ b/benchmark/HMMBenchmark/src/hmms.jl @@ -26,24 +26,32 @@ function benchmarkables_hmms(; algos, N, D, T, K, I) end benchs = Dict() if "logdensity" in algos - benchs["logdensity"] = @benchmarkable HiddenMarkovModels.logdensityof( - model, $obs_seqs, $K - ) setup = (model = rand_model_hmms(; N=$N, D=$D)) + benchs["logdensity"] = @benchmarkable begin + for k in 1:($K) + HiddenMarkovModels.logdensityof(model, $obs_seqs[k]) + end + end setup = (model = rand_model_hmms(; N=$N, D=$D)) end if "viterbi" in algos - benchs["viterbi"] = @benchmarkable HiddenMarkovModels.viterbi(model, $obs_seqs, $K) setup = ( - model = rand_model_hmms(; N=$N, D=$D) - ) + benchs["viterbi"] = @benchmarkable begin + for k in 1:($K) + HiddenMarkovModels.viterbi(model, $obs_seqs[k]) + end + end setup = (model = rand_model_hmms(; N=$N, D=$D)) end if "forward_backward" in algos - benchs["forward_backward"] = @benchmarkable HiddenMarkovModels.forward_backward( - model, $obs_seqs, $K - ) setup = (model = rand_model_hmms(; N=$N, D=$D)) + benchs["forward_backward"] = @benchmarkable begin + for k in 1:($K) + HiddenMarkovModels.forward_backward(model, $obs_seqs[k]) + end + end setup = (model = rand_model_hmms(; N=$N, D=$D)) end if "baum_welch" in algos - benchs["baum_welch"] = @benchmarkable HiddenMarkovModels.baum_welch( - model, $obs_seqs, $K; max_iterations=$I, atol=-Inf - ) setup = (model = rand_model_hmms(; N=$N, D=$D)) + benchs["baum_welch"] = @benchmarkable begin + HiddenMarkovModels.baum_welch( + model, $obs_seqs, $K; max_iterations=$I, atol=-Inf + ) + end setup = (model = rand_model_hmms(; N=$N, D=$D)) end return benchs end diff --git a/benchmark/HMMBenchmark/src/suite.jl b/benchmark/HMMBenchmark/src/suite.jl index 8855528d..ce270cbb 100644 --- a/benchmark/HMMBenchmark/src/suite.jl +++ b/benchmark/HMMBenchmark/src/suite.jl @@ -12,7 +12,7 @@ function benchmarkables_by_implem(; implem, algos, kwargs...) end end -function define_suite(; implems, algos, N_vals, D_vals, T_vals, K_vals, I) +function define_suite(; implems, algos, N_vals, D_vals, T_vals, K_vals, I_vals) SUITE = BenchmarkGroup() if ("HMMBase.jl" in implems) && any(>(1), K_vals) @warn "HMMBase.jl doesn't support multiple observation sequences, concatenating instead." @@ -24,7 +24,7 @@ function define_suite(; implems, algos, N_vals, D_vals, T_vals, K_vals, I) SUITE[implem][algo] = BenchmarkGroup() SUITE[implem][algo][(1, 1, 2, 1, 1)] = bench end - for N in N_vals, D in D_vals, T in T_vals, K in K_vals + for (N, D, T, K, I) in zip(N_vals, D_vals, T_vals, K_vals, I_vals) bench_tup = benchmarkables_by_implem(; implem, algos, N, D, T, K, I) for (algo, bench) in pairs(bench_tup) SUITE[implem][algo][(N, D, T, K, I)] = bench diff --git a/benchmark/Manifest.toml b/benchmark/Manifest.toml index 990ce6dd..b8bbd193 100644 --- a/benchmark/Manifest.toml +++ b/benchmark/Manifest.toml @@ -696,15 +696,15 @@ version = "10.42.0+0" [[deps.PDMats]] deps = ["LinearAlgebra", "SparseArrays", "SuiteSparse"] -git-tree-sha1 = "f6f85a2edb9c356b829934ad3caed2ad0ebbfc99" +git-tree-sha1 = "66b2fcd977db5329aa35cac121e5b94dd6472198" uuid = "90014a1f-27ba-587c-ab20-58faa44d9150" -version = "0.11.29" +version = "0.11.28" [[deps.Parsers]] deps = ["Dates", "PrecompileTools", "UUIDs"] -git-tree-sha1 = "a935806434c9d4c506ba941871b327b96d41f2bf" +git-tree-sha1 = "716e24b21538abc91f6205fd1d8363f39b442851" uuid = "69de0a69-1ddd-5017-9359-2bf0b02dc9f0" -version = "2.8.0" +version = "2.7.2" [[deps.Pipe]] git-tree-sha1 = "6842804e7867b115ca9de748a0cf6b364523c16d" diff --git a/benchmark/benchmarks.jl b/benchmark/benchmarks.jl index 561cabc4..7463429e 100644 --- a/benchmark/benchmarks.jl +++ b/benchmark/benchmarks.jl @@ -4,11 +4,11 @@ using BenchmarkTools SUITE = define_suite(; implems=("HMMs.jl",), algos=("logdensity", "viterbi", "forward_backward", "baum_welch"), - N_vals=2:2:20, - D_vals=10, - T_vals=100, - K_vals=10, - I=10, + N_vals=4:4:20, + D_vals=fill(10, 5), + T_vals=fill(100, 5), + K_vals=fill(10, 5), + I_vals=fill(10, 5), ) BenchmarkTools.save(joinpath(@__DIR__, "tune.json"), BenchmarkTools.params(SUITE)); diff --git a/docs/Project.toml b/docs/Project.toml index ea2baba9..68e4008f 100644 --- a/docs/Project.toml +++ b/docs/Project.toml @@ -4,7 +4,6 @@ Documenter = "e30172f5-a6a5-5a46-863b-614d45cd2de4" DocumenterCitations = "daee34ce-89f3-4625-b898-19384cb65244" HiddenMarkovModels = "84ca31d5-effc-45e0-bfda-5a68cd981f47" LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e" -RequiredInterfaces = "97f35ef4-7bc5-4ec1-a41a-dcc69c7308c6" Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2" StatsAPI = "82ae8749-77ed-4fe6-ae5f-f523153014b0" diff --git a/docs/make.jl b/docs/make.jl index 0e56ebc3..f41a7c15 100644 --- a/docs/make.jl +++ b/docs/make.jl @@ -22,21 +22,18 @@ open(joinpath(joinpath(@__DIR__, "src"), "index.md"), "w") do io end end -benchmarks_done = false - pages = [ "Home" => "index.md", - "Essentials" => ["Background" => "background.md", "API reference" => "api.md"], + "Essentials" => [ + "Background" => "background.md", + "API reference" => "api.md", + "Competitors" => "competitors.md", + ], "Tutorials" => [ "Built-in HMM" => "builtin.md", "Custom HMM" => "custom.md", "Debugging" => "debugging.md", ], - "Alternatives" => if benchmarks_done - ["Features" => "features.md", "Benchmarks" => "benchmarks.md"] - else - ["Features" => "features.md"] - end, "Advanced" => ["Formulas" => "formulas.md", "Roadmap" => "roadmap.md"], ] diff --git a/docs/src/api.md b/docs/src/api.md index c027e18c..16354d42 100644 --- a/docs/src/api.md +++ b/docs/src/api.md @@ -38,21 +38,40 @@ fit! ## Misc ```@docs -check_hmm rand_prob_vec rand_trans_mat +HiddenMarkovModels.fit_element_from_sequence! +HiddenMarkovModels.LightDiagNormal ``` -## Internals +## In-place algorithms (internals) + +### Storage types ```@docs HiddenMarkovModels.ForwardStorage HiddenMarkovModels.ViterbiStorage HiddenMarkovModels.ForwardBackwardStorage HiddenMarkovModels.BaumWelchStorage -HiddenMarkovModels.fit_element_from_sequence! -HiddenMarkovModels.LightDiagNormal -HiddenMarkovModels.PermutedHMM +``` + +### Initializing storage + +```@docs +HiddenMarkovModels.initialize_forward +HiddenMarkovModels.initialize_viterbi +HiddenMarkovModels.initialize_forward_backward +HiddenMarkovModels.initialize_baum_welch +HiddenMarkovModels.initialize_logL_evolution +``` + +### Modifying storage + +```@docs +HiddenMarkovModels.forward! +HiddenMarkovModels.viterbi! +HiddenMarkovModels.forward_backward! +HiddenMarkovModels.baum_welch! ``` ## Notations @@ -82,7 +101,7 @@ HiddenMarkovModels.PermutedHMM - `β`: scaled backward variables - `γ`: state marginals - `ξ`: transition marginals -- `logL`: posterior loglikelihood of a sequence of observations +- `logL`: loglikelihood of a sequence of observations ## Index diff --git a/docs/src/builtin.md b/docs/src/builtin.md index afa429f1..0c6a1a2b 100644 --- a/docs/src/builtin.md +++ b/docs/src/builtin.md @@ -1,4 +1,4 @@ -# Tutorial - built-in HMM +# Built-in HMM ```@example tuto using Distributions @@ -78,7 +78,7 @@ perm = sortperm(1:3, by=i->d_est[i].μ) ``` ```@example tuto -hmm_est = HiddenMarkovModels.PermutedHMM(hmm_est, perm) +hmm_est = HiddenMarkovModels.permute(hmm_est, perm) ``` Evaluating errors: diff --git a/docs/src/features.md b/docs/src/competitors.md similarity index 98% rename from docs/src/features.md rename to docs/src/competitors.md index 63473b4c..b9c42674 100644 --- a/docs/src/features.md +++ b/docs/src/competitors.md @@ -1,4 +1,4 @@ -# Alternatives - features +# Competitors We compare features among the following Julia packages: diff --git a/docs/src/custom.md b/docs/src/custom.md index c21154f1..6375e38a 100644 --- a/docs/src/custom.md +++ b/docs/src/custom.md @@ -1,4 +1,4 @@ -# Tutorial - custom HMM +# Custom HMM ```@example tuto using Distributions diff --git a/ext/HiddenMarkovModelsChainRulesCoreExt.jl b/ext/HiddenMarkovModelsChainRulesCoreExt.jl index a0bfd1b1..2a6221bb 100644 --- a/ext/HiddenMarkovModelsChainRulesCoreExt.jl +++ b/ext/HiddenMarkovModelsChainRulesCoreExt.jl @@ -1,34 +1,38 @@ module HiddenMarkovModelsChainRulesCoreExt -using ChainRulesCore: - ChainRulesCore, NoTangent, ZeroTangent, RuleConfig, rrule_via_ad, @not_implemented +using ChainRulesCore: ChainRulesCore, NoTangent, RuleConfig, rrule_via_ad using DensityInterface: logdensityof using HiddenMarkovModels import HiddenMarkovModels as HMMs using SimpleUnPack -function _params_and_loglikelihoods(hmm::AbstractHMM, obs_seq) +function obs_logdensities_matrix(hmm::AbstractHMM, obs_seq::Vector) + d = obs_distributions(hmm) + logB = reduce(hcat, logdensityof.(d, (obs_seq[t],)) for t in 1:length(obs_seq)) + return logB +end + +function _params_and_loglikelihoods(hmm::AbstractHMM, obs_seq::Vector) p = initialization(hmm) A = transition_matrix(hmm) - d = obs_distributions(hmm) - logB = reduce(hcat, logdensityof.(d, (obs,)) for obs in obs_seq) + logB = obs_logdensities_matrix(hmm, obs_seq) return p, A, logB end function ChainRulesCore.rrule( - rc::RuleConfig, ::typeof(logdensityof), hmm::AbstractHMM, obs_seq + rc::RuleConfig, ::typeof(logdensityof), hmm::AbstractHMM, obs_seq::Vector ) (p, A, logB), pullback = rrule_via_ad(rc, _params_and_loglikelihoods, hmm, obs_seq) - fb = HMMs.initialize_forward_backward(hmm, obs_seq) - HMMs.forward_backward!(fb, hmm, obs_seq) - @unpack α, β, γ, c, B̃β = fb + storage = HMMs.initialize_forward_backward(hmm, obs_seq) + HMMs.forward_backward!(storage, hmm, obs_seq) + @unpack logL, α, β, γ, c, Bβ = storage T = length(obs_seq) function logdensityof_hmm_pullback(ΔlogL) - Δp = ΔlogL .* B̃β[:, 1] - ΔA = ΔlogL .* α[:, 1] .* B̃β[:, 2]' + Δp = ΔlogL .* Bβ[:, 1] + ΔA = ΔlogL .* α[:, 1] .* Bβ[:, 2]' @views for t in 2:(T - 1) - ΔA .+= ΔlogL .* α[:, t] .* B̃β[:, t + 1]' + ΔA .+= ΔlogL .* α[:, t] .* Bβ[:, t + 1]' end ΔlogB = ΔlogL .* γ @@ -37,7 +41,7 @@ function ChainRulesCore.rrule( return Δlogdensityof, Δhmm, Δobs_seq end - return fb.logL[], logdensityof_hmm_pullback + return logL[], logdensityof_hmm_pullback end end diff --git a/ext/HiddenMarkovModelsHMMBaseExt.jl b/ext/HiddenMarkovModelsHMMBaseExt.jl index 9488107f..84f8d597 100644 --- a/ext/HiddenMarkovModelsHMMBaseExt.jl +++ b/ext/HiddenMarkovModelsHMMBaseExt.jl @@ -11,9 +11,9 @@ function HiddenMarkovModels.HMM(hmm_base::HMMBase.HMM) end function HMMBase.HMM(hmm::HiddenMarkovModels.HMM) - a = deepcopy(initialization(hmm)) - A = deepcopy(transition_matrix(hmm)) - B = deepcopy(obs_distributions(hmm)) + a = deepcopy(hmm.init) + A = deepcopy(hmm.trans) + B = deepcopy(hmm.dists) return HMMBase.HMM(a, A, B) end diff --git a/src/HiddenMarkovModels.jl b/src/HiddenMarkovModels.jl index 8b1f8d67..715efb9d 100644 --- a/src/HiddenMarkovModels.jl +++ b/src/HiddenMarkovModels.jl @@ -2,17 +2,12 @@ HiddenMarkovModels A Julia package for HMM modeling, simulation, inference and learning. - -# Exports - -$(EXPORTS) """ module HiddenMarkovModels using Base: RefValue using Base.Threads: @threads -using DensityInterface: - DensityInterface, DensityKind, HasDensity, NoDensity, densityof, logdensityof +using DensityInterface: DensityInterface, DensityKind, HasDensity, NoDensity, logdensityof using Distributions: Distributions, Categorical, @@ -21,12 +16,12 @@ using Distributions: MultivariateDistribution, MatrixDistribution using DocStringExtensions -using LinearAlgebra: Diagonal, dot, mul! +using LinearAlgebra: Diagonal, axpy!, dot, ldiv!, lmul!, mul! using PrecompileTools: @compile_workload, @setup_workload using Random: Random, AbstractRNG, default_rng using Requires: @require using SimpleUnPack: @unpack -using SparseArrays: SparseMatrixCSC, nzrange, nnz +using SparseArrays: AbstractSparseArray, SparseMatrixCSC, nnz, nonzeros, nzrange using StatsAPI: StatsAPI, fit, fit! export AbstractHiddenMarkovModel, AbstractHMM @@ -38,32 +33,27 @@ export fit! export check_hmm include("types/abstract_hmm.jl") -include("types/permuted_hmm.jl") +include("types/hmm.jl") +include("utils/linalg.jl") include("utils/check.jl") -include("utils/probvec.jl") -include("utils/transmat.jl") +include("utils/probvec_transmat.jl") include("utils/fit.jl") include("utils/lightdiagnormal.jl") -include("utils/mul.jl") include("inference/forward.jl") include("inference/viterbi.jl") include("inference/forward_backward.jl") include("inference/baum_welch.jl") -include("types/hmm.jl") - -include("utils/Test.jl") +include("utils/HMMTest.jl") if !isdefined(Base, :get_extension) + include("../ext/HiddenMarkovModelsChainRulesCoreExt.jl") function __init__() @require HMMBase = "b2b3ca75-8444-5ffa-85e6-af70e2b64fe7" include( "../ext/HiddenMarkovModelsHMMBaseExt.jl" ) - @require ChainRulesCore = "d360d2e6-b24c-11e9-a2a3-2a2ae2dbcce4" include( - "../ext/HiddenMarkovModelsChainRulesCoreExt.jl" - ) end end diff --git a/src/inference/baum_welch.jl b/src/inference/baum_welch.jl index c4ef69e8..e8c867ec 100644 --- a/src/inference/baum_welch.jl +++ b/src/inference/baum_welch.jl @@ -22,10 +22,19 @@ struct BaumWelchStorage{R,M<:AbstractMatrix{R}} limits::Vector{Int} end +function check_nb_seqs(obs_seqs::Vector{<:Vector}, nb_seqs::Integer) + if nb_seqs != length(obs_seqs) + throw(ArgumentError("Incoherent sizes provided: `nb_seqs != length(obs_seqs)`")) + end +end + +""" + initialize_baum_welch(hmm, obs_seqs, nb_seqs) +""" function initialize_baum_welch( hmm::AbstractHMM, obs_seqs::Vector{<:Vector}, nb_seqs::Integer ) - check_lengths(obs_seqs, nb_seqs) + check_nb_seqs(obs_seqs, nb_seqs) N, T_concat = length(hmm), sum(length, obs_seqs) A = transition_matrix(hmm) R = eltype(hmm, obs_seqs[1][1]) @@ -36,10 +45,13 @@ function initialize_baum_welch( return BaumWelchStorage(init_count, trans_count, state_marginals_concat, limits) end +""" + initialize_logL_evolution(hmm, obs_seqs, nb_seqs; max_iterations) +""" function initialize_logL_evolution( hmm::AbstractHMM, obs_seqs::Vector{<:Vector}, nb_seqs::Integer; max_iterations::Integer ) - check_lengths(obs_seqs, nb_seqs) + check_nb_seqs(obs_seqs, nb_seqs) R = eltype(hmm, obs_seqs[1][1]) logL_evolution = R[] sizehint!(logL_evolution, max_iterations) @@ -47,17 +59,17 @@ function initialize_logL_evolution( end function update_sufficient_statistics!( - bw::BaumWelchStorage{R}, fbs::Vector{<:ForwardBackwardStorage} + bw_storage::BaumWelchStorage{R}, fb_storages::Vector{<:ForwardBackwardStorage} ) where {R} - @unpack init_count, trans_count, state_marginals_concat, limits = bw + @unpack init_count, trans_count, state_marginals_concat, limits = bw_storage init_count .= zero(R) trans_count .= zero(R) state_marginals_concat .= zero(R) - for k in eachindex(fbs) - @unpack γ, ξ, B̃β = fbs[k] - init_count .+= view(γ, :, 1) + for k in eachindex(fb_storages) # TODO: ThreadsX? + @unpack γ, ξ = fb_storages[k] + init_count .+= @view γ[:, 1] for t in eachindex(ξ) - trans_count .+= ξ[t] + mynonzeros(trans_count) .+= mynonzeros(ξ[t]) end state_marginals_concat[:, (limits[k] + 1):limits[k + 1]] .= γ end @@ -79,15 +91,23 @@ function baum_welch_has_converged( return false end -function StatsAPI.fit!(hmm::AbstractHMM, bw::BaumWelchStorage, obs_seqs_concat::Vector) - return fit!( - hmm, bw.init_count, bw.trans_count, obs_seqs_concat, bw.state_marginals_concat - ) +function StatsAPI.fit!( + hmm::AbstractHMM, bw_storage::BaumWelchStorage, obs_seqs_concat::Vector +) + @unpack init_count, trans_count, state_marginals_concat = bw_storage + return fit!(hmm, init_count, trans_count, obs_seqs_concat, state_marginals_concat) end +""" + baum_welch!( + fb_storages, bw_storage, logL_evolution, + hmm, obs_seqs, obs_seqs_concat; + atol, max_iterations, loglikelihood_increasing + ) +""" function baum_welch!( - fbs::Vector{<:ForwardBackwardStorage}, - bw::BaumWelchStorage, + fb_storages::Vector{<:ForwardBackwardStorage}, + bw_storage::BaumWelchStorage, logL_evolution::Vector, hmm::AbstractHMM, obs_seqs::Vector{<:Vector}, @@ -97,12 +117,12 @@ function baum_welch!( loglikelihood_increasing::Bool, ) for _ in 1:max_iterations - @threads for k in eachindex(obs_seqs, fbs) - forward_backward!(fbs[k], hmm, obs_seqs[k]) + @threads for k in eachindex(obs_seqs, fb_storages) + forward_backward!(fb_storages[k], hmm, obs_seqs[k]) end - update_sufficient_statistics!(bw, fbs) - push!(logL_evolution, sum(fb.logL[] for fb in fbs)) - fit!(hmm, bw, obs_seqs_concat) + update_sufficient_statistics!(bw_storage, fb_storages) + push!(logL_evolution, sum(fb.logL[] for fb in fb_storages)) + fit!(hmm, bw_storage, obs_seqs_concat) check_hmm(hmm) if baum_welch_has_converged(logL_evolution; atol, loglikelihood_increasing) break @@ -133,7 +153,7 @@ function baum_welch( max_iterations=100, loglikelihood_increasing=true, ) - check_lengths(obs_seqs, nb_seqs) + check_nb_seqs(obs_seqs, nb_seqs) hmm = deepcopy(hmm_init) fbs = [initialize_forward_backward(hmm, obs_seqs[k]) for k in eachindex(obs_seqs)] bw = initialize_baum_welch(hmm, obs_seqs, nb_seqs) diff --git a/src/inference/forward.jl b/src/inference/forward.jl index 02f1a3d3..9e71144f 100644 --- a/src/inference/forward.jl +++ b/src/inference/forward.jl @@ -7,7 +7,7 @@ This storage is relative to a single sequence. # Fields -The only fields useful outside of the algorithm are `αₜ` and `logL`. +The only fields useful outside of the algorithm are `α` and `logL`, the rest does not belong to the public API. $(TYPEDFIELDS) """ @@ -22,6 +22,9 @@ struct ForwardStorage{R} α_next::Vector{R} end +""" + initialize_forward(hmm, obs_seq) +""" function initialize_forward(hmm::AbstractHMM, obs_seq::Vector) N = length(hmm) R = eltype(hmm, obs_seq[1]) @@ -30,88 +33,66 @@ function initialize_forward(hmm::AbstractHMM, obs_seq::Vector) logb = Vector{R}(undef, N) α = Vector{R}(undef, N) α_next = Vector{R}(undef, N) - f = ForwardStorage(logL, logb, α, α_next) - return f + storage = ForwardStorage(logL, logb, α, α_next) + return storage end -function forward!(f::ForwardStorage, hmm::AbstractHMM, obs_seq::Vector) +""" + forward!(storage, hmm, obs_seq) +""" +function forward!(storage::ForwardStorage, hmm::AbstractHMM, obs_seq::Vector) T = length(obs_seq) p = initialization(hmm) A = transition_matrix(hmm) - d = obs_distributions(hmm) - @unpack logL, logb, α, α_next = f + @unpack logL, logb, α, α_next = storage - logb .= logdensityof.(d, (obs_seq[1],)) + obs_logdensities!(logb, hmm, obs_seq[1]) + check_right_finite(logb) logm = maximum(logb) α .= p .* exp.(logb .- logm) c = inv(sum(α)) α .*= c + check_finite(α) logL[] = -log(c) + logm for t in 1:(T - 1) - logb .= logdensityof.(d, (obs_seq[t + 1],)) + obs_logdensities!(logb, hmm, obs_seq[t + 1]) + check_right_finite(logb) logm = maximum(logb) mul!(α_next, A', α) α_next .*= exp.(logb .- logm) c = inv(sum(α_next)) α_next .*= c α .= α_next + check_finite(α) logL[] += -log(c) + logm end return nothing end -function forward!( - fs::Vector{<:ForwardStorage}, - hmm::AbstractHMM, - obs_seqs::Vector{<:Vector}, - nb_seqs::Integer, -) - check_lengths(obs_seqs, nb_seqs) - @threads for k in eachindex(fs, obs_seqs) - forward!(fs[k], hmm, obs_seqs[k]) - end - return nothing -end - """ forward(hmm, obs_seq) - forward(hmm, obs_seqs, nb_seqs) -Run the forward algorithm to infer the current state of an HMM. +Run the forward algorithm to infer the current state of `hmm` after sequence `obs_seq`. -When applied on a single sequence, this function returns a tuple `(α, logL)` where +This function returns a tuple `(α, logL)` where - `α[i]` is the posterior probability of state `i` at the end of the sequence - `logL` is the loglikelihood of the sequence - -When applied on multiple sequences, this function returns a vector of tuples. """ -function forward(hmm::AbstractHMM, obs_seqs::Vector{<:Vector}, nb_seqs::Integer) - check_lengths(obs_seqs, nb_seqs) - fs = [initialize_forward(hmm, obs_seqs[k]) for k in eachindex(obs_seqs)] - forward!(fs, hmm, obs_seqs, nb_seqs) - return [(f.α, f.logL[]) for f in fs] -end - function forward(hmm::AbstractHMM, obs_seq::Vector) - return only(forward(hmm, [obs_seq], 1)) + storage = initialize_forward(hmm, obs_seq) + forward!(storage, hmm, obs_seq) + return storage.α, storage.logL[] end """ logdensityof(hmm, obs_seq) - logdensityof(hmm, obs_seqs, nb_seqs) -Run the forward algorithm to compute the posterior loglikelihood of observations for an HMM. +Run the forward algorithm to compute the posterior loglikelihood of sequence `obs_seq` for `hmm`. -Whether it is applied on one or multiple sequences, this function returns a number. +This function returns a number. """ -function DensityInterface.logdensityof( - hmm::AbstractHMM, obs_seqs::Vector{<:Vector}, nb_seqs::Integer -) - logαs_and_logLs = forward(hmm, obs_seqs, nb_seqs) - return sum(last, logαs_and_logLs) -end - function DensityInterface.logdensityof(hmm::AbstractHMM, obs_seq::Vector) - return logdensityof(hmm, [obs_seq], 1) + _, logL = forward(hmm, obs_seq) + return logL end diff --git a/src/inference/forward_backward.jl b/src/inference/forward_backward.jl index 3aefcdbf..5cfda512 100644 --- a/src/inference/forward_backward.jl +++ b/src/inference/forward_backward.jl @@ -7,7 +7,7 @@ This storage is relative to a single sequence. # Fields -The only fields useful outside of the algorithm are `γ`, `ξ` and `logL`. +The only fields useful outside of the algorithm are `γ`, `logL`, `init_count` and `trans_count`, the rest does not belong to the public API. $(TYPEDFIELDS) """ @@ -20,7 +20,7 @@ struct ForwardBackwardStorage{R,M<:AbstractMatrix{R}} β::Matrix{R} "posterior state marginals `γ[i,t] = ℙ(X[t]=i | Y[1:T])`" γ::Matrix{R} - "posterior transition marginals `ξ[t][i,j] = ℙ(X[t:t+1]=(i,j) | Y[1:T])`" + "posterior transition marginals `ξ[t][i,j] = ℙ(X[t]=i, X[t+1]=j | Y[1:T])`" ξ::Vector{M} "forward message inverse normalizations `c[t] = 1 / sum(α[:,t])`" c::Vector{R} @@ -28,15 +28,15 @@ struct ForwardBackwardStorage{R,M<:AbstractMatrix{R}} logB::Matrix{R} "maximum of the observation loglikelihoods `logm[t] = maximum(logB[:, t])`" logm::Vector{R} - "numerically stabilized observation likelihoods `B̃[i,t] = exp.(logB[i,t] - logm[t])`" - B̃::Matrix{R} - "product `B̃β[i,t] = B̃[i,t] * β[i,t]`" - B̃β::Matrix{R} + "numerically stabilized observation likelihoods `B[i,t] = exp.(logB[i,t] - logm[t])`" + B::Matrix{R} + "product `Bβ[i,t] = B[i,t] * β[i,t]`" + Bβ::Matrix{R} end -Base.eltype(::ForwardBackwardStorage{R}) where {R} = R -duration(fb::ForwardBackwardStorage) = size(fb.α, 2) - +""" + initialize_forward_backward(hmm, obs_seq) +""" function initialize_forward_backward(hmm::AbstractHMM, obs_seq::Vector) N, T = length(hmm), length(obs_seq) A = transition_matrix(hmm) @@ -54,116 +54,82 @@ function initialize_forward_backward(hmm::AbstractHMM, obs_seq::Vector) c = Vector{R}(undef, T) logB = Matrix{R}(undef, N, T) logm = Vector{R}(undef, T) - B̃ = Matrix{R}(undef, N, T) - B̃β = Matrix{R}(undef, N, T) - - return ForwardBackwardStorage{R,M}(logL, α, β, γ, ξ, c, logB, logm, B̃, B̃β) + B = Matrix{R}(undef, N, T) + Bβ = Matrix{R}(undef, N, T) + return ForwardBackwardStorage{R,M}(logL, α, β, γ, ξ, c, logB, logm, B, Bβ) end -function update_likelihoods!(fb::ForwardBackwardStorage, hmm::AbstractHMM, obs_seq::Vector) - d = obs_distributions(hmm) - @unpack logB, logm, B̃ = fb - N, T = length(hmm), duration(fb) +""" + forward_backward!(storage, hmm, obs_seq) +""" +function forward_backward!( + storage::ForwardBackwardStorage, hmm::AbstractHMM, obs_seq::Vector +) + p = initialization(hmm) + A = transition_matrix(hmm) + T = length(obs_seq) + @unpack logL, α, β, c, γ, ξ, logB, logm, B, Bβ = storage - for t in 1:T - logB[:, t] .= logdensityof.(d, (obs_seq[t],)) + # Observation loglikelihoods + for (logb, obs) in zip(eachcol(logB), obs_seq) + obs_logdensities!(logb, hmm, obs) end - check_no_nan(logB) + check_right_finite(logB) maximum!(logm', logB) - B̃ .= exp.(logB .- logm') - return nothing -end - -function forward!(fb::ForwardBackwardStorage, hmm::AbstractHMM) - p = initialization(hmm) - A = transition_matrix(hmm) - @unpack α, c, B̃ = fb - N, T = length(hmm), duration(fb) + B .= exp.(logB .- logm') + # Forward @views begin - α[:, 1] .= p .* B̃[:, 1] + α[:, 1] .= p .* B[:, 1] c[1] = inv(sum(α[:, 1])) - α[:, 1] .*= c[1] + lmul!(c[1], α[:, 1]) end @views for t in 1:(T - 1) mul!(α[:, t + 1], A', α[:, t]) - α[:, t + 1] .*= B̃[:, t + 1] + α[:, t + 1] .*= B[:, t + 1] c[t + 1] = inv(sum(α[:, t + 1])) - α[:, t + 1] .*= c[t + 1] + lmul!(c[t + 1], α[:, t + 1]) end - fb.logL[] = -sum(log, fb.c) + sum(fb.logm) - return nothing -end - -function backward!(fb::ForwardBackwardStorage{R}, hmm::AbstractHMM) where {R} - A = transition_matrix(hmm) - @unpack β, c, B̃, B̃β = fb - N, T = length(hmm), duration(fb) + # Backward β[:, T] .= c[T] @views for t in (T - 1):-1:1 - B̃β[:, t + 1] .= B̃[:, t + 1] .* β[:, t + 1] - mul!(β[:, t], A, B̃β[:, t + 1]) - β[:, t] .*= c[t] + Bβ[:, t + 1] .= B[:, t + 1] .* β[:, t + 1] + mul!(β[:, t], A, Bβ[:, t + 1]) + lmul!(c[t], β[:, t]) end - @views B̃β[:, 1] .= B̃[:, 1] .* β[:, 1] - return nothing -end - -function marginals!(fb::ForwardBackwardStorage, hmm::AbstractHMM) - A = transition_matrix(hmm) - @unpack α, β, c, B̃β, γ, ξ = fb - N, T = length(hmm), duration(fb) + @views Bβ[:, 1] .= B[:, 1] .* β[:, 1] + # Marginals γ .= α .* β ./ c' - check_no_nan(γ) - @views for t in 1:(T - 1) - mul_rows_cols!(ξ[t], α[:, t], A, B̃β[:, t + 1]) + check_finite(γ) + for t in 1:(T - 1) + mul_rows_cols!(ξ[t], view(α, :, t), A, view(Bβ, :, t + 1)) end - return nothing -end -function forward_backward!(fb::ForwardBackwardStorage, hmm::AbstractHMM, obs_seq::Vector) - update_likelihoods!(fb, hmm, obs_seq) - forward!(fb, hmm) - backward!(fb, hmm) - marginals!(fb, hmm) - return nothing -end + # Loglikelihood + logL[] = -sum(log, c) + sum(logm) -function forward_backward!( - fbs::Vector{<:ForwardBackwardStorage}, - hmm::AbstractHMM, - obs_seqs::Vector{<:Vector}, - nb_seqs::Integer, -) - check_lengths(obs_seqs, nb_seqs) - @threads for k in eachindex(fbs, obs_seqs) - forward_backward!(fbs[k], hmm, obs_seqs[k]) - end return nothing end """ forward_backward(hmm, obs_seq) - forward_backward(hmm, obs_seqs, nb_seqs) -Run the forward-backward algorithm to infer the posterior state and transition marginals of an HMM. +Run the forward-backward algorithm to infer the posterior state and transition marginals of `hmm` on the sequence `obs_seq`. -When applied on a single sequence, this function returns a tuple `(γ, ξ, logL)` where +This function returns a tuple `(γ, ξ, logL)` where -- `γ` is a matrix containing the posterior state marginals `γ[i, t]` +- `γ` is a matrix containing the posterior state marginals `γ[i,t]` +- `ξ` is a vector of matrices containing the posterior transition marginals `ξ[t][i,j]` - `logL` is the loglikelihood of the sequence -WHen applied on multiple sequences, it returns a vector of tuples. -""" -function forward_backward(hmm::AbstractHMM, obs_seqs::Vector{<:Vector}, nb_seqs::Integer) - check_lengths(obs_seqs, nb_seqs) - fbs = [initialize_forward_backward(hmm, obs_seqs[k]) for k in eachindex(obs_seqs)] - forward_backward!(fbs, hmm, obs_seqs, nb_seqs) - return [(fb.γ, fb.logL[]) for fb in fbs] -end +# See also +- [`ForwardBackwardStorage`](@ref) +""" function forward_backward(hmm::AbstractHMM, obs_seq::Vector) - return only(forward_backward(hmm, [obs_seq], 1)) + storage = initialize_forward_backward(hmm, obs_seq) + forward_backward!(storage, hmm, obs_seq) + return storage.γ, storage.ξ, storage.logL[] end diff --git a/src/inference/viterbi.jl b/src/inference/viterbi.jl index 6a65f92c..13322f43 100644 --- a/src/inference/viterbi.jl +++ b/src/inference/viterbi.jl @@ -7,7 +7,7 @@ This storage is relative to a single sequence. # Fields -The only field useful outside of the algorithm is `q`. +The only field useful outside of the algorithm is `q`, the rest does not belong to the public API. $(TYPEDFIELDS) """ @@ -18,14 +18,17 @@ struct ViterbiStorage{R} δ::Vector{R} "same as `δ` but for the previous time step" δ_prev::Vector{R} - "temporary variable used to store products `δ_prev .* A[:, j]`" - δA::Vector{R} "penultimate state maximizing the path score" ψ::Matrix{Int} "most likely state at each time `q[t] = argmaxᵢ ℙ(X[t]=i | Y[1:T])`" q::Vector{Int} + "scratch storage space" + scratch::Vector{R} end +""" + initialize_viterbi(hmm, obs_seq) +""" function initialize_viterbi(hmm::AbstractHMM, obs_seq::Vector) T, N = length(obs_seq), length(hmm) R = eltype(hmm, obs_seq[1]) @@ -33,33 +36,39 @@ function initialize_viterbi(hmm::AbstractHMM, obs_seq::Vector) logb = Vector{R}(undef, N) δ = Vector{R}(undef, N) δ_prev = Vector{R}(undef, N) - δA = Vector{R}(undef, N) ψ = Matrix{Int}(undef, N, T) q = Vector{Int}(undef, T) - return ViterbiStorage(logb, δ, δ_prev, δA, ψ, q) + scratch = Vector{R}(undef, N) + return ViterbiStorage(logb, δ, δ_prev, ψ, q, scratch) end -function viterbi!(v::ViterbiStorage, hmm::AbstractHMM, obs_seq::Vector) +""" + viterbi!(storage, hmm, obs_seq) +""" +function viterbi!(storage::ViterbiStorage, hmm::AbstractHMM, obs_seq::Vector) N, T = length(hmm), length(obs_seq) p = initialization(hmm) A = transition_matrix(hmm) - d = obs_distributions(hmm) - @unpack logb, δ, δ_prev, δA, ψ, q = v + @unpack logb, δ, δ_prev, ψ, q, scratch = storage - logb .= logdensityof.(d, (obs_seq[1],)) + obs_logdensities!(logb, hmm, obs_seq[1]) + check_right_finite(logb) logm = maximum(logb) δ .= p .* exp.(logb .- logm) + check_finite(δ) δ_prev .= δ @views ψ[:, 1] .= zero(eltype(ψ)) for t in 2:T - logb .= logdensityof.(d, (obs_seq[t],)) + obs_logdensities!(logb, hmm, obs_seq[t]) + check_right_finite(logb) logm = maximum(logb) for j in 1:N - @views δA .= δ_prev .* A[:, j] - i_max = argmax(δA) + @views scratch .= δ_prev .* A[:, j] + i_max = argmax(scratch) ψ[j, t] = i_max - δ[j] = δA[i_max] * exp(logb[j] - logm) + δ[j] = scratch[i_max] * exp(logb[j] - logm) end + check_finite(δ) δ_prev .= δ end q[T] = argmax(δ) @@ -69,35 +78,15 @@ function viterbi!(v::ViterbiStorage, hmm::AbstractHMM, obs_seq::Vector) return nothing end -function viterbi!( - vs::Vector{<:ViterbiStorage}, - hmm::AbstractHMM, - obs_seqs::Vector{<:Vector}, - nb_seqs::Integer, -) - check_lengths(obs_seqs, nb_seqs) - @threads for k in eachindex(vs, obs_seqs) - viterbi!(vs[k], hmm, obs_seqs[k]) - end - return nothing -end - """ viterbi(hmm, obs_seq) - viterbi(hmm, obs_seqs, nb_seqs) -Apply the Viterbi algorithm to infer the most likely state sequence of an HMM. +Apply the Viterbi algorithm to infer the most likely state sequence corresponding to `obs_seq` for `hmm`. -When applied on a single sequence, this function returns a vector of integers. -When applied on multiple sequences, it returns a vector of vectors of integers. +This function returns a vector of integers. """ -function viterbi(hmm::AbstractHMM, obs_seqs::Vector{<:Vector}, nb_seqs::Integer) - check_lengths(obs_seqs, nb_seqs) - vs = [initialize_viterbi(hmm, obs_seqs[k]) for k in eachindex(obs_seqs)] - viterbi!(vs, hmm, obs_seqs, nb_seqs) - return [v.q for v in vs] -end - function viterbi(hmm::AbstractHMM, obs_seq::Vector) - return only(viterbi(hmm, [obs_seq], 1)) + storage = initialize_viterbi(hmm, obs_seq) + viterbi!(storage, hmm, obs_seq) + return storage.q end diff --git a/src/types/abstract_hmm.jl b/src/types/abstract_hmm.jl index 658dabbb..aa81c892 100644 --- a/src/types/abstract_hmm.jl +++ b/src/types/abstract_hmm.jl @@ -18,12 +18,12 @@ To create your own subtype of `AbstractHiddenMarkovModel`, you need to implement Any HMM object which satisfies the interface can be given as input to the following functions: -- [`rand(rng, hmm, T)`](@ref) - [`logdensityof(hmm, obs_seq)`](@ref) - [`forward(hmm, obs_seq)`](@ref) - [`viterbi(hmm, obs_seq)`](@ref) - [`forward_backward(hmm, obs_seq)`](@ref) -- [`baum_welch(hmm, obs_seq)`](@ref) (if the optional `fit!` is implemented) +- [`rand(rng, hmm, T)`](@ref) +- [`baum_welch(hmm, obs_seq)`](@ref) (if `fit!` is implemented) """ abstract type AbstractHiddenMarkovModel end @@ -49,7 +49,8 @@ Base.length eltype(hmm, obs) Return a type that can accommodate forward-backward computations on observations similar to `obs`. -It is typicall a promotion between the element type of the initialization, the element type of the transition matrix, and the type of an observation logdensity evaluated at `obs`. + +It is typically a promotion between the element type of the initialization, the element type of the transition matrix, and the type of an observation logdensity evaluated at `obs`. """ function Base.eltype(hmm::AbstractHMM, obs) init_type = eltype(initialization(hmm)) @@ -75,14 +76,19 @@ function transition_matrix end """ obs_distributions(hmm) -Return a vector of observation distributions for `hmm`. +Return a vector of observation distributions, one for each state of `hmm`. -Each element `dist` of this vector must implement -- `rand(rng, dist)` -- `DensityInterface.logdensityof(dist, obs)` +There objects should support `rand(rng, dist)` and `DensityInterface.logdensityof(dist, obs)`. """ function obs_distributions end +function obs_logdensities!(logb::AbstractVector, hmm::AbstractHMM, obs) + d = obs_distributions(hmm) + for i in eachindex(logb, d) + logb[i] = logdensityof(d[i], obs) + end +end + """ fit!(hmm, init_count, trans_count, obs_seq, state_marginals) @@ -116,19 +122,12 @@ function Base.rand(rng::AbstractRNG, hmm::AbstractHMM, T::Integer) p = initialization(hmm) A = transition_matrix(hmm) d = obs_distributions(hmm) - - first_state = rand(rng, Categorical(p; check_args=false)) state_seq = Vector{Int}(undef, T) - state_seq[1] = first_state + state_seq[1] = rand(rng, Categorical(p; check_args=false)) @views for t in 2:T state_seq[t] = rand(rng, Categorical(A[state_seq[t - 1], :]; check_args=false)) end - first_obs = rand(rng, d[state_seq[1]]) - obs_seq = Vector{typeof(first_obs)}(undef, T) - obs_seq[1] = first_obs - for t in 2:T - obs_seq[t] = rand(rng, d[state_seq[t]]) - end + obs_seq = [rand(rng, d[state_seq[t]]) for t in 1:T] return (; state_seq=state_seq, obs_seq=obs_seq) end diff --git a/src/types/hmm.jl b/src/types/hmm.jl index 2a27f0d7..6253b837 100644 --- a/src/types/hmm.jl +++ b/src/types/hmm.jl @@ -13,7 +13,7 @@ struct HiddenMarkovModel{I<:AbstractVector,T<:AbstractMatrix,D<:AbstractVector} init::I "state transition matrix" trans::T - "observation distributions" + "observation distributions (must be amenable to `logdensityof` and `rand`)" dists::D function HiddenMarkovModel(init::I, trans::T, dists::D) where {I,T,D} @@ -56,6 +56,9 @@ function StatsAPI.fit!( for i in eachindex(hmm.dists) fit_element_from_sequence!(hmm.dists, i, obs_seq, view(state_marginals, i, :)) end - check_hmm(hmm) return nothing end + +function permute(hmm::AbstractHMM, perm::Vector{Int}) + return HMM(hmm.init[perm], hmm.trans[perm, :][:, perm], hmm.dists[perm]) +end diff --git a/src/types/permuted_hmm.jl b/src/types/permuted_hmm.jl deleted file mode 100644 index 4d1f53de..00000000 --- a/src/types/permuted_hmm.jl +++ /dev/null @@ -1,29 +0,0 @@ -""" -$(TYPEDEF) - -Wrapper around an `AbstractHMM` that permutes its states. - -This is computationally inefficient and mostly useful for evaluation. - -# Fields - -$(TYPEDFIELDS) -""" -struct PermutedHMM{H<:AbstractHMM} <: AbstractHMM - "the old HMM" - hmm::H - "a permutation such that state `i` in the new HMM corresponds to state `perm[i]` in the old" - perm::Vector{Int} -end - -Base.length(p::PermutedHMM) = length(p.hmm) - -initialization(p::PermutedHMM) = initialization(p.hmm)[p.perm] - -function transition_matrix(p::PermutedHMM) - return transition_matrix(p.hmm)[p.perm, :][:, p.perm] -end - -function obs_distributions(p::PermutedHMM) - return obs_distributions(p.hmm)[p.perm] -end diff --git a/src/utils/Test.jl b/src/utils/HMMTest.jl similarity index 98% rename from src/utils/Test.jl rename to src/utils/HMMTest.jl index af3f3612..d218f1a5 100644 --- a/src/utils/Test.jl +++ b/src/utils/HMMTest.jl @@ -1,4 +1,4 @@ -module Test +module HMMTest using Distributions using Distributions: PDiagMat diff --git a/src/utils/check.jl b/src/utils/check.jl index a0c34838..efd4c216 100644 --- a/src/utils/check.jl +++ b/src/utils/check.jl @@ -1,73 +1,71 @@ -function check_no_nan(a) - if any(isnan, a) - throw(OverflowError("Some values are NaN")) +function check_finite(a) + if !all(isfinite, mynonzeros(a)) + throw(OverflowError("Some values are infinite or NaN")) + end +end + +function check_right_finite(a) + if !all(<(typemax(eltype(a))), mynonzeros(a)) + throw(OverflowError("Some values are positive infinite or NaN")) end end -function check_no_inf(a) - if any(isequal(typemax(eltype(a))), a) - throw(OverflowError("Some values are infinite")) +function check_no_nan(a) + if any(isnan, mynonzeros(a)) + throw(OverflowError("Some values are NaN")) end end function check_positive(a) - if any(!>(zero(eltype(a))), a) + if !all(>(zero(eltype(a))), mynonzeros(a)) throw(OverflowError("Some values are not positive")) end end function check_prob_vec(p::AbstractVector) - check_no_nan(p) - if !is_prob_vec(p) + check_finite(p) + if !valid_prob_vec(p) throw(ArgumentError("Invalid probability distribution.")) end end function check_trans_mat(A::AbstractMatrix) - check_no_nan(A) - if !is_trans_mat(A) + check_finite(A) + if !valid_trans_mat(A) throw(ArgumentError("Invalid transition matrix.")) end end -function check_coherent_sizes(p::AbstractVector, A::AbstractMatrix) - if size(A) != (length(p), length(p)) - throw( - DimensionMismatch( - "Probability distribution and transition matrix are incompatible." - ), - ) - end -end - -function check_dists(d) +function check_dists(d::AbstractVector) for i in eachindex(d) if DensityKind(d[i]) == NoDensity() - throw(ArgumentError("Observation is not a density")) + throw( + ArgumentError( + "Invalid observation distributions (do not satisfy DensityInterface.jl)" + ), + ) end end + return true end -""" - check_hmm(hmm::AbstractHMM) +function check_hmm_sizes(p::AbstractVector, A::AbstractMatrix, d::AbstractVector) + if !(size(A) == (length(p), length(p)) == (length(d), length(d))) + throw( + DimensionMismatch( + "Initialization, transition matrix and observation distributions have incompatible sizes.", + ), + ) + end +end -Verify that `hmm` satisfies basic assumptions. -""" function check_hmm(hmm::AbstractHMM) p = initialization(hmm) A = transition_matrix(hmm) d = obs_distributions(hmm) - if !all(==(length(hmm)), (length(p), size(A, 1), size(A, 2), length(d))) - throw(DimensionMismatch("Incoherent sizes")) - end + check_hmm_sizes(p, A, d) check_prob_vec(p) check_trans_mat(A) check_dists(d) return nothing end - -function check_lengths(obs_seqs::Vector{<:Vector}, nb_seqs::Integer) - if nb_seqs != length(obs_seqs) - throw(ArgumentError("nb_seqs != length(obs_seqs)")) - end -end diff --git a/src/utils/lightdiagnormal.jl b/src/utils/lightdiagnormal.jl index a2b36a31..a80c7bfd 100644 --- a/src/utils/lightdiagnormal.jl +++ b/src/utils/lightdiagnormal.jl @@ -21,7 +21,6 @@ struct LightDiagNormal{ end function LightDiagNormal(μ, σ) - check_no_nan(μ) check_positive(σ) return LightDiagNormal(μ, σ, log.(σ)) end @@ -34,22 +33,20 @@ end Base.length(dist::LightDiagNormal) = length(dist.μ) -function Base.rand(rng::AbstractRNG, dist::LightDiagNormal) - return dist.σ .* randn(rng, length(dist)) .+ dist.μ +function Base.rand(rng::AbstractRNG, dist::LightDiagNormal{T1,T2}) where {T1,T2} + T = promote_type(T1, T2) + return dist.σ .* randn(rng, T, length(dist)) .+ dist.μ end function DensityInterface.logdensityof(dist::LightDiagNormal, x) a = -sum(abs2, (x[i] - dist.μ[i]) / dist.σ[i] for i in eachindex(x, dist.μ, dist.σ)) b = -sum(dist.logσ) - check_no_nan(a) - check_no_nan(b) logd = (a / 2) + b return logd end function StatsAPI.fit!(dist::LightDiagNormal{T1,T2}, x, w) where {T1,T2} w_tot = sum(w) - check_positive(w_tot) dist.μ .= zero(T1) dist.σ .= zero(T2) for i in eachindex(x, w) @@ -60,8 +57,8 @@ function StatsAPI.fit!(dist::LightDiagNormal{T1,T2}, x, w) where {T1,T2} dist.σ .+= abs2.(x[i] .- dist.μ) .* w[i] end dist.σ ./= w_tot - dist.σ .= sqrt.(dist.σ) check_positive(dist.σ) + dist.σ .= sqrt.(dist.σ) dist.logσ .= log.(dist.σ) return nothing end diff --git a/src/utils/mul.jl b/src/utils/linalg.jl similarity index 69% rename from src/utils/mul.jl rename to src/utils/linalg.jl index 5b66fc9e..4783a4df 100644 --- a/src/utils/mul.jl +++ b/src/utils/linalg.jl @@ -1,3 +1,8 @@ +sum_to_one!(x) = ldiv!(sum(x), x) + +mynonzeros(x::AbstractArray) = x +mynonzeros(x::AbstractSparseArray) = nonzeros(x) + function mul_rows_cols!( B::AbstractMatrix, l::AbstractVector, A::AbstractMatrix, r::AbstractVector ) @@ -9,11 +14,11 @@ function mul_rows_cols!( B::SparseMatrixCSC, l::AbstractVector, A::SparseMatrixCSC, r::AbstractVector ) @assert size(B) == size(A) == (length(l), length(r)) - B .= A + @assert nnz(B) == nnz(A) for j in axes(B, 2) for k in nzrange(B, j) i = B.rowval[k] - B.nzval[k] *= l[i] * r[j] + B.nzval[k] = l[i] * A.nzval[k] * r[j] end end return nothing diff --git a/src/utils/probvec.jl b/src/utils/probvec.jl deleted file mode 100644 index 7925b56c..00000000 --- a/src/utils/probvec.jl +++ /dev/null @@ -1,19 +0,0 @@ -function is_prob_vec(p::AbstractVector; atol=1e-2) - return (minimum(p) >= 0) && isapprox(sum(p), 1; atol=atol) -end - -sum_to_one!(x) = x ./= sum(x) - -""" - rand_prob_vec(N) - rand_prob_vec(rng, N) - -Generate a random probability distribution of size `N`. -""" -function rand_prob_vec(rng::AbstractRNG, N) - p = rand(rng, N) - sum_to_one!(p) - return p -end - -rand_prob_vec(N) = rand_prob_vec(default_rng(), N) diff --git a/src/utils/transmat.jl b/src/utils/probvec_transmat.jl similarity index 52% rename from src/utils/transmat.jl rename to src/utils/probvec_transmat.jl index 1f30e5f8..9c9931b0 100644 --- a/src/utils/transmat.jl +++ b/src/utils/probvec_transmat.jl @@ -1,13 +1,17 @@ +function valid_prob_vec(p::AbstractVector; atol=1e-2) + return (minimum(p) >= 0) && isapprox(sum(p), 1; atol=atol) +end + function is_square(A::AbstractMatrix) return size(A, 1) == size(A, 2) end -function is_trans_mat(A::AbstractMatrix; atol=1e-2) +function valid_trans_mat(A::AbstractMatrix; atol=1e-2) if !is_square(A) return false else for row in eachrow(A) - if !is_prob_vec(row; atol=atol) + if !valid_prob_vec(row; atol=atol) return false end end @@ -15,6 +19,18 @@ function is_trans_mat(A::AbstractMatrix; atol=1e-2) end end +""" + rand_prob_vec(N) + rand_prob_vec(rng, N) + +Generate a random probability distribution of size `N`. +""" +function rand_prob_vec(rng::AbstractRNG, N) + p = rand(rng, N) + sum_to_one!(p) + return p +end + """ rand_trans_mat(N) rand_trans_mat(rng, N) @@ -27,4 +43,5 @@ function rand_trans_mat(rng::AbstractRNG, N) return A end +rand_prob_vec(N) = rand_prob_vec(default_rng(), N) rand_trans_mat(N) = rand_trans_mat(default_rng(), N) diff --git a/test/allocations.jl b/test/allocations.jl index aebcb4b5..40ba2657 100644 --- a/test/allocations.jl +++ b/test/allocations.jl @@ -1,8 +1,9 @@ using BenchmarkTools using HiddenMarkovModels -using HiddenMarkovModels.Test +using HiddenMarkovModels.HMMTest import HiddenMarkovModels as HMMs using Test +using SparseArrays function test_allocations(hmm; T) obs_seq = rand(hmm, T).obs_seq @@ -10,31 +11,38 @@ function test_allocations(hmm; T) obs_seqs = [rand(hmm, T).obs_seq for _ in 1:nb_seqs] ## Forward - f = HMMs.initialize_forward(hmm, obs_seq) - allocs = @ballocated HiddenMarkovModels.forward!($f, $hmm, $obs_seq) + f_storage = HMMs.initialize_forward(hmm, obs_seq) + HiddenMarkovModels.forward!(f_storage, hmm, obs_seq) + allocs = @allocated HiddenMarkovModels.forward!(f_storage, hmm, obs_seq) @test allocs == 0 ## Viterbi - v = HMMs.initialize_viterbi(hmm, obs_seq) - allocs = @ballocated HMMs.viterbi!($v, $hmm, $obs_seq) + v_storage = HMMs.initialize_viterbi(hmm, obs_seq) + HMMs.viterbi!(v_storage, hmm, obs_seq) + allocs = @allocated HMMs.viterbi!(v_storage, hmm, obs_seq) @test allocs == 0 ## Forward-backward - fb = HMMs.initialize_forward_backward(hmm, obs_seq) - allocs = @ballocated HMMs.forward_backward!($fb, $hmm, $obs_seq) + fb_storage = HMMs.initialize_forward_backward(hmm, obs_seq) + HMMs.forward_backward!(fb_storage, hmm, obs_seq) + allocs = @allocated HMMs.forward_backward!(fb_storage, hmm, obs_seq) @test allocs == 0 ## Baum-Welch - fbs = [HMMs.initialize_forward_backward(hmm, obs_seqs[k]) for k in eachindex(obs_seqs)] - bw = HMMs.initialize_baum_welch(hmm, obs_seqs, nb_seqs) + fb_storages = [ + HMMs.initialize_forward_backward(hmm, obs_seqs[k]) for k in eachindex(obs_seqs) + ] + bw_storage = HMMs.initialize_baum_welch(hmm, obs_seqs, nb_seqs) obs_seqs_concat = reduce(vcat, obs_seqs) - HMMs.forward_backward!(fbs, hmm, obs_seqs, nb_seqs) - HMMs.update_sufficient_statistics!(bw, fbs) - fit!(hmm, bw, obs_seqs_concat) - allocs = @ballocated HMMs.update_sufficient_statistics!($bw, $fbs) - @test allocs == 0 - allocs = @ballocated fit!($hmm, $bw, $obs_seqs_concat) - @test allocs == 0 + for k in eachindex(fb_storages, obs_seqs) + HMMs.forward_backward!(fb_storages[k], hmm, obs_seqs[k]) + end + HMMs.update_sufficient_statistics!(bw_storage, fb_storages) + fit!(hmm, bw_storage, obs_seqs_concat) + allocs1 = @allocated HMMs.update_sufficient_statistics!(bw_storage, fb_storages) + allocs2 = @allocated fit!(hmm, bw_storage, obs_seqs_concat) + @test allocs1 == 0 + @test allocs2 == 0 end N, D, T = 3, 2, 100 @@ -45,7 +53,7 @@ end @testset "Normal sparse" begin # see https://discourse.julialang.org/t/why-does-mul-u-a-v-allocate-when-a-is-sparse-and-u-v-are-views/105995 - @test_skip test_allocations(rand_gaussian_hmm_1d(N; sparse_trans=true); T) + test_allocations(rand_gaussian_hmm_1d(N; sparse_trans=true); T) end @testset "LightDiagNormal" begin diff --git a/test/arrays.jl b/test/arrays.jl index 316f019a..a50290c5 100644 --- a/test/arrays.jl +++ b/test/arrays.jl @@ -19,14 +19,12 @@ d_init = [Normal(i + randn(), 1.0) for i in 1:N]; hmm = HMM(p, A, d); hmm_init = HMM(p, A, d_init); - obs_seq = rand(hmm, T).obs_seq; -γ, logL = forward_backward(hmm, obs_seq); -hmm_est, logL_evolution = @inferred baum_welch(hmm_init, obs_seq); - @testset "Sparse" begin - @test typeof(hmm_est) == typeof(hmm_init) + γ, ξ, logL = @inferred forward_backward(hmm, obs_seq) + hmm_est, logL_evolution = @inferred baum_welch(hmm_init, obs_seq) + @test eltype(ξ) == typeof(transition_matrix(hmm)) @test nnz(transition_matrix(hmm_est)) <= nnz(transition_matrix(hmm)) end @@ -41,9 +39,8 @@ hmm = HMM(p, A, d); hmm_init = HMM(p, A, d_init); obs_seq = rand(hmm, T).obs_seq; -γ, logL = forward_backward(hmm, obs_seq); -hmm_est, logL_evolution = @inferred baum_welch(hmm_init, obs_seq); - @testset "Static" begin - @test typeof(hmm_est) == typeof(hmm_init) + γ, ξ, logL = @inferred forward_backward(hmm, obs_seq) + hmm_est, logL_evolution = @inferred baum_welch(hmm_init, obs_seq) + @test eltype(ξ) == typeof(transition_matrix(hmm)) end diff --git a/test/correctness.jl b/test/correctness.jl index 2afae698..41b7e4f2 100644 --- a/test/correctness.jl +++ b/test/correctness.jl @@ -1,63 +1,50 @@ using Distributions using HMMBase: HMMBase using HiddenMarkovModels -using HiddenMarkovModels.Test +using HiddenMarkovModels.HMMTest using SimpleUnPack using Test function test_correctness(hmm, hmm_init; T) - obs_seq1 = rand(hmm, T).obs_seq - obs_seq2 = rand(hmm, T).obs_seq - obs_mat1 = collect(reduce(hcat, obs_seq1)') - obs_mat2 = collect(reduce(hcat, obs_seq2)') - - nb_seqs = 2 - obs_seqs = [obs_seq1, obs_seq2] + obs_seq = rand(hmm, T).obs_seq + obs_mat = collect(reduce(hcat, obs_seq)') hmm_base = HMMBase.HMM(deepcopy(hmm)) hmm_init_base = HMMBase.HMM(deepcopy(hmm_init)) @testset "Logdensity" begin - logL1_base = HMMBase.forward(hmm_base, obs_mat1)[2] - logL2_base = HMMBase.forward(hmm_base, obs_mat2)[2] - logL = logdensityof(hmm, obs_seqs, nb_seqs) - @test logL ≈ logL1_base + logL2_base + logL_base = HMMBase.forward(hmm_base, obs_mat)[2] + logL = logdensityof(hmm, obs_seq) + @test logL ≈ logL_base end @testset "Forward" begin - (α1_base, logL1_base), (α2_base, logL2_base) = [ - HMMBase.forward(hmm_base, obs_mat1), HMMBase.forward(hmm_base, obs_mat2) - ] - (α1, logL1), (α2, logL2) = forward(hmm, obs_seqs, nb_seqs) - @test isapprox(α1, α1_base[end, :]) - @test isapprox(α2, α2_base[end, :]) - @test logL1 ≈ logL1_base - @test logL2 ≈ logL2_base + α_base, logL_base = HMMBase.forward(hmm_base, obs_mat) + α, logL = forward(hmm, obs_seq) + @test isapprox(α, α_base[end, :]) + @test logL ≈ logL_base end @testset "Viterbi" begin - q1_base = HMMBase.viterbi(hmm_base, obs_mat1) - q2_base = HMMBase.viterbi(hmm_base, obs_mat2) - q1, q2 = viterbi(hmm, obs_seqs, nb_seqs) - @test isequal(q1, q1_base) - @test isequal(q2, q2_base) + q_base = HMMBase.viterbi(hmm_base, obs_mat) + q = viterbi(hmm, obs_seq) + # Viterbi decoding can vary in case of (infrequent) ties + @test mean(q .== q_base) > 0.9 end @testset "Forward-backward" begin - γ1_base = HMMBase.posteriors(hmm_base, obs_mat1) - γ2_base = HMMBase.posteriors(hmm_base, obs_mat2) - (γ1, _), (γ2, _) = forward_backward(hmm, obs_seqs, nb_seqs) - @test isapprox(γ1, γ1_base') - @test isapprox(γ2, γ2_base') + γ_base = HMMBase.posteriors(hmm_base, obs_mat) + γ, _, _ = forward_backward(hmm, obs_seq) + @test isapprox(γ, γ_base') end @testset "Baum-Welch" begin hmm_est_base, hist_base = HMMBase.fit_mle( - hmm_init_base, obs_mat1; maxiter=10, tol=-Inf + hmm_init_base, obs_mat; maxiter=10, tol=-Inf ) logL_evolution_base = hist_base.logtots hmm_est, logL_evolution = baum_welch( - hmm_init, [obs_seq1, obs_seq1], 2; max_iterations=10, atol=-Inf + hmm_init, [obs_seq, obs_seq], 2; max_iterations=10, atol=-Inf ) @test isapprox( logL_evolution[(begin + 1):end], 2 * logL_evolution_base[begin:(end - 1)] @@ -78,7 +65,7 @@ end N, D, T = 3, 2, 100 @testset "Categorical" begin - test_correctness(rand_categorical_hmm(N, 10), rand_categorical_hmm(N, 10); T) + test_correctness(rand_categorical_hmm(N, D), rand_categorical_hmm(N, D); T) end @testset "Normal" begin diff --git a/test/misc.jl b/test/misc.jl deleted file mode 100644 index 82b31c48..00000000 --- a/test/misc.jl +++ /dev/null @@ -1,29 +0,0 @@ -using HiddenMarkovModels -using HiddenMarkovModels.Test -using HiddenMarkovModels: PermutedHMM -using Distributions -using Test - -## Permuted - -perm = [3, 1, 2] -hmm = rand_gaussian_hmm_1d(3) -hmm_perm = PermutedHMM(hmm, perm) - -p = initialization(hmm) -A = transition_matrix(hmm) -d = obs_distributions(hmm) - -p_perm = initialization(hmm_perm) -A_perm = transition_matrix(hmm_perm) -d_perm = obs_distributions(hmm_perm) - -@testset "PermutedHMM" begin - for i in 1:3 - @test p_perm[i] ≈ p[perm[i]] - @test d_perm[i] ≈ d[perm[i]] - end - for i in 1:3, j in 1:3 - @test A_perm[i, j] ≈ A[perm[i], perm[j]] - end -end diff --git a/test/numbers.jl b/test/numbers.jl deleted file mode 100644 index 35e906cc..00000000 --- a/test/numbers.jl +++ /dev/null @@ -1,34 +0,0 @@ -using HiddenMarkovModels -using HiddenMarkovModels: LightDiagNormal -using LinearAlgebra -using LogarithmicNumbers -using SimpleUnPack -using Test - -N, D, T = 3, 2, 1000 - -## LogarithmicNumbers - -p = ones(N) / N; -A = rand_trans_mat(N); -d = [LightDiagNormal(randn(D), ones(D)) for i in 1:N]; -d_init = [LightDiagNormal(randn(D), ones(D)) for i in 1:N]; -d_init_log = [LightDiagNormal(randn(D), LogFloat64.(ones(D))) for i in 1:N]; - -hmm = HMM(p, A, d); -obs_seq = rand(hmm, T).obs_seq; - -hmm_init1 = HMM(LogFloat64.(p), A, d_init); -hmm_est1, logL_evolution1 = @inferred baum_welch(hmm_init1, obs_seq); - -hmm_init2 = HMM(p, LogFloat64.(A), d_init); -hmm_est2, logL_evolution2 = @inferred baum_welch(hmm_init2, obs_seq); - -hmm_init3 = HMM(p, A, d_init_log); -hmm_est3, logL_evolution3 = @inferred baum_welch(hmm_init3, obs_seq); - -@testset "Logarithmic" begin - @test typeof(hmm_est1) == typeof(hmm_init1) - @test typeof(hmm_est2) == typeof(hmm_init2) - @test typeof(hmm_est3) == typeof(hmm_init3) -end diff --git a/test/runtests.jl b/test/runtests.jl index a49bbe92..315d269c 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -28,10 +28,6 @@ using Test end end - @testset "Doctests" begin - Documenter.doctest(HiddenMarkovModels) - end - @testset "Correctness" begin include("correctness.jl") end @@ -40,10 +36,6 @@ using Test include("arrays.jl") end - @testset "Number types" begin - include("numbers.jl") - end - @testset "Autodiff" begin include("autodiff.jl") end @@ -52,7 +44,7 @@ using Test include("dna.jl") end - @testset "Misc" begin - include("misc.jl") + @testset "Doctests" begin + Documenter.doctest(HiddenMarkovModels) end end diff --git a/test/type_stability.jl b/test/type_stability.jl index 974a96c7..642ae0ef 100644 --- a/test/type_stability.jl +++ b/test/type_stability.jl @@ -1,5 +1,5 @@ using HiddenMarkovModels -using HiddenMarkovModels.Test +using HiddenMarkovModels.HMMTest import HiddenMarkovModels as HMMs using JET using SimpleUnPack