Complete revamp (#59)

* Revamp * More changes * Correctness and type stability * Allocs * BaumWelch storage * Finish BW storage * Fix docs * Separate single sequence case * Finish revamp except custom tuto * Reactivate precompile workload and fix viterbi * Reactivate sparse allocation test * Fix FB output * Skip allocations test sparse * Fix tests for 1.6
gdalle · Nov 10, 2023 · 0b1eef1 · 0b1eef1
1 parent 145df1c
commit 0b1eef1
Show file tree

Hide file tree

Showing 50 changed files with 1,045 additions and 1,222 deletions.
diff --git a/Project.toml b/Project.toml
@@ -1,18 +1,19 @@
 name = "HiddenMarkovModels"
 uuid = "84ca31d5-effc-45e0-bfda-5a68cd981f47"
 authors = ["Guillaume Dalle"]
-version = "0.3.1"
+version = "0.4.0"
 
 [deps]
 ChainRulesCore = "d360d2e6-b24c-11e9-a2a3-2a2ae2dbcce4"
 DensityInterface = "b429d917-457f-4dbc-8f4c-0cc954292b1d"
 Distributions = "31c24e10-a181-5473-b8eb-7969acd0382f"
+DocStringExtensions = "ffbed154-4ef7-542d-bbb7-c09d3a79fcae"
 LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
 PrecompileTools = "aea7be01-6a6a-4083-8856-8a6e6704d82a"
 Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
-RequiredInterfaces = "97f35ef4-7bc5-4ec1-a41a-dcc69c7308c6"
 Requires = "ae029012-a4dd-5104-9daa-d747884805df"
 SimpleUnPack = "ce78b400-467f-4804-87d8-8f486da07d0a"
+SparseArrays = "2f01184e-e22b-5df5-ae63-d93ebab69eaf"
 StatsAPI = "82ae8749-77ed-4fe6-ae5f-f523153014b0"
 
 [weakdeps]
@@ -27,6 +28,7 @@ HiddenMarkovModelsHMMBaseExt = "HMMBase"
 ChainRulesCore = "1.16"
 DensityInterface = "0.4"
 Distributions = "0.25"
+DocStringExtensions = "0.9"
 LinearAlgebra = "1.6"
 PrecompileTools = "1.1"
 Random = "1.6"

diff --git a/benchmark/utils/hmms.jl b/benchmark/utils/hmms.jl
@@ -1,5 +1,5 @@
 using BenchmarkTools
-using HiddenMarkovModels: HMMs
+using HiddenMarkovModels
 using SimpleUnPack
 
 function rand_params_hmms(; N, D)
@@ -15,9 +15,9 @@ function rand_model_hmms(; N, D)
     if D == 1
         dists = [Normal(μ[n, 1], σ[n, 1]) for n in 1:N]
     else
-        dists = [HMMs.LightDiagNormal(μ[n, :], σ[n, :]) for n in 1:N]
+        dists = [HiddenMarkovModels.LightDiagNormal(μ[n, :], σ[n, :]) for n in 1:N]
     end
-    model = HMMs.HMM(p, A, dists)
+    model = HiddenMarkovModels.HMM(p, A, dists)
     return model
 end
 
@@ -30,22 +30,22 @@ function benchmarkables_hmms(; algos, N, D, T, K, I)
     end
     benchs = Dict()
     if "logdensity" in algos
-        benchs["logdensity"] = @benchmarkable HMMs.logdensityof(model, $obs_seqs, $K) setup = (
-            model = rand_model_hmms(; N=$N, D=$D)
-        )
+        benchs["logdensity"] = @benchmarkable HiddenMarkovModels.logdensityof(
+            model, $obs_seqs, $K
+        ) setup = (model = rand_model_hmms(; N=$N, D=$D))
     end
     if "viterbi" in algos
-        benchs["viterbi"] = @benchmarkable HMMs.viterbi(model, $obs_seqs, $K) setup = (
+        benchs["viterbi"] = @benchmarkable HiddenMarkovModels.viterbi(model, $obs_seqs, $K) setup = (
             model = rand_model_hmms(; N=$N, D=$D)
         )
     end
     if "forward_backward" in algos
-        benchs["forward_backward"] = @benchmarkable HMMs.forward_backward(
+        benchs["forward_backward"] = @benchmarkable HiddenMarkovModels.forward_backward(
             model, $obs_seqs, $K
         ) setup = (model = rand_model_hmms(; N=$N, D=$D))
     end
     if "baum_welch" in algos
-        benchs["baum_welch"] = @benchmarkable HMMs.baum_welch(
+        benchs["baum_welch"] = @benchmarkable HiddenMarkovModels.baum_welch(
             model, $obs_seqs, $K; max_iterations=$I, atol=-Inf
         ) setup = (model = rand_model_hmms(; N=$N, D=$D))
     end

diff --git a/docs/make.jl b/docs/make.jl
@@ -31,12 +31,15 @@ pages = [
     "Home" => "index.md",
     "Essentials" => ["Background" => "background.md", "API reference" => "api.md"],
     "Tutorials" => [
-        "Built-in HMM" => "tuto_builtin.md",
-        "Custom HMM" => "tuto_custom.md",
+        "Built-in HMM" => "builtin.md",
+        "Custom HMM" => "custom.md",
         "Debugging" => "debugging.md",
     ],
-    "Alternatives" =>
-        ["Features" => "alt_features.md", "Performance" => "alt_performance.md"],
+    "Alternatives" => if benchmarks_done
+        ["Features" => "features.md", "Benchmarks" => "benchmarks.md"]
+    else
+        ["Features" => "features.md"]
+    end,
     "Advanced" => ["Formulas" => "formulas.md", "Roadmap" => "roadmap.md"],
 ]
 
@@ -54,7 +57,7 @@ makedocs(;
     format=fmt,
     pages=pages,
     plugins=[bib],
-    warnonly=!benchmarks_done,
+    pagesonly=true,
 )
 
 deploydocs(; repo="github.com/gdalle/HiddenMarkovModels.jl", devbranch="main")
diff --git a/docs/src/api.md b/docs/src/api.md
@@ -6,22 +6,10 @@ HiddenMarkovModels
 
 ## Types
 
-### Markov chains
-
-```@docs
-AbstractMarkovChain
-MarkovChain
-AbstractMC
-MC
-```
-
-### Hidden Markov Models
-
 ```@docs
 AbstractHiddenMarkovModel
 HiddenMarkovModel
 AbstractHMM
-HiddenMarkovModels.PermutedHMM
 HMM
 ```
 
@@ -30,9 +18,10 @@ HMM
 ```@docs
 rand
 length
-initial_distribution
+eltype
+initialization
 transition_matrix
-obs_distribution
+obs_distributions
 ```
 
 ## Inference
@@ -42,22 +31,28 @@ logdensityof
 forward
 viterbi
 forward_backward
+baum_welch
+fit!
 ```
 
-## Learning
+## Misc
 
 ```@docs
-fit!
-fit
-baum_welch
+check_hmm
+rand_prob_vec
+rand_trans_mat
 ```
 
 ## Internals
 
 ```@docs
-HMMs.ForwardBackwardStorage
-HMMs.fit_element_from_sequence!
-HMMs.LightDiagNormal
+HiddenMarkovModels.ForwardStorage
+HiddenMarkovModels.ViterbiStorage
+HiddenMarkovModels.ForwardBackwardStorage
+HiddenMarkovModels.BaumWelchStorage
+HiddenMarkovModels.fit_element_from_sequence!
+HiddenMarkovModels.LightDiagNormal
+HiddenMarkovModels.PermutedHMM
 ```
 
 ## Notations
@@ -71,22 +66,23 @@ HMMs.LightDiagNormal
 
 ### Models and simulations
 
-- `p` or `init`: initial_distribution (vector of state probabilities)
+- `p` or `init`: initialization (vector of state probabilities)
 - `A` or `trans`: transition_matrix (matrix of transition probabilities)
-- `dists`: observation distribution (vector of `rand`-able and `logdensityof`-able objects)
+- `d` or `dists`: observation distribution (vector of `rand`-able and `logdensityof`-able objects)
 - `state_seq`: a sequence of states (vector of integers)
 - `obs_seq`: a sequence of observations (vector of individual observations)
 - `obs_seqs`: several sequences of observations
+- `nb_seqs`: number of observation sequences
 
 ### Forward backward
 
 - `(log)b`: vector of observation (log)likelihoods by state for an individual observation
 - `(log)B`: matrix of observation (log)likelihoods by state for a sequence of observations
 - `α`: scaled forward variables
 - `β`: scaled backward variables
-- `γ`: one-state marginals
-- `ξ`: two-state marginals
-- `logL`: loglikelihood of a sequence of observations
+- `γ`: state marginals
+- `ξ`: transition marginals
+- `logL`: posterior loglikelihood of a sequence of observations
 
 ## Index
 

diff --git a/docs/src/alt_performance.md → docs/src/benchmarks.md b/docs/src/alt_performance.md → docs/src/benchmarks.md
@@ -15,10 +15,6 @@ The test case is an HMM with diagonal multivariate normal observations.
 - `K`: number of trajectories
 - `I`: number of Baum-Welch iterations
 
-!!! danger "Missing benchmarks?"
-    The benchmarks are computationally expensive and we only run them once for each new release.
-    If you don't see any plots below and the links are broken, you are probably on the dev documentation: go to the [stable documentation](https://gdalle.github.io/HiddenMarkovModels.jl/stable/) instead.
-
 ## Reproducibility
 
 These benchmarks were generated in the following environment: [`setup.txt`](./assets/benchmark/results/setup.txt).

diff --git a/docs/src/tuto_builtin.md → docs/src/builtin.md b/docs/src/tuto_builtin.md → docs/src/builtin.md
@@ -15,8 +15,8 @@ Creating a model:
 function gaussian_hmm(N; noise=0)
     p = ones(N) / N  # initial distribution
     A = rand_trans_mat(N)  # transition matrix
-    dists = [Normal(i + noise * randn(), 0.5) for i in 1:N]  # observation distributions
-    return HMM(p, A, dists)
+    d = [Normal(i + noise * randn(), 0.5) for i in 1:N]  # observation distributions
+    return HMM(p, A, d)
 end
 ```
 
@@ -29,7 +29,7 @@ transition_matrix(hmm)
 ```
 
 ```@example tuto
-[obs_distribution(hmm, i) for i in 1:N]
+obs_distributions(hmm)
 ```
 
 Simulating a sequence:
@@ -70,15 +70,15 @@ first(logL_evolution), last(logL_evolution)
 Correcting state order because we know observation means are increasing in the true model:
 
 ```@example tuto
-[obs_distribution(hmm_est, i) for i in 1:N]
+d_est = obs_distributions(hmm_est)
 ```
 
 ```@example tuto
-perm = sortperm(1:3, by=i->obs_distribution(hmm_est, i).μ)
+perm = sortperm(1:3, by=i->d_est[i].μ)
 ```
 
 ```@example tuto
-hmm_est = PermutedHMM(hmm_est, perm)
+hmm_est = HiddenMarkovModels.PermutedHMM(hmm_est, perm)
 ```
 
 Evaluating errors:

diff --git a/docs/src/custom.md b/docs/src/custom.md
@@ -0,0 +1,13 @@
+# Tutorial - custom HMM
+
+```@example tuto
+using Distributions
+using HiddenMarkovModels
+
+using Random; Random.seed!(63)
+```
+
+Here we demonstrate how to build your own HMM structure satisfying the interface.
+
+!!! danger
+    Work in progress.
diff --git a/docs/src/debugging.md b/docs/src/debugging.md
@@ -9,4 +9,4 @@ This can happen for a variety of reasons, so here are a few leads worth investig
 * Reduce the number of states (to make every one of them useful)
 * Add a prior to your transition matrix / observation distributions (to avoid degenerate behavior like zero variance in a Gaussian)
 * Pick a better initialization (to start closer to the supposed ground truth)
-* Use [LogarithmicNumbers.jl](https://github.com/cjdoris/LogarithmicNumbers.jl) in strategic places (to guarantee numerical stability). Note that these numbers don't play nicely with Distributions.jl, so you may have to roll out your own observation distribution.
+* Use [LogarithmicNumbers.jl](https://github.com/cjdoris/LogarithmicNumbers.jl) in strategic places (to guarantee numerical stability). Note that these numbers don't play nicely with Distributions.jl, so you may have to roll out your own observation distribution.
diff --git a/docs/src/alt_features.md → docs/src/features.md b/docs/src/alt_features.md → docs/src/features.md