Skip to content

Commit

Permalink
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Fix leaky tests
Browse files Browse the repository at this point in the history
gdalle committed Feb 22, 2024
1 parent 9657939 commit b56d1b2
Showing 10 changed files with 61 additions and 50 deletions.
34 changes: 18 additions & 16 deletions docs/src/formulas.md
Original file line number Diff line number Diff line change
@@ -4,9 +4,11 @@ Suppose we are given observations $Y_1, ..., Y_T$, with hidden states $X_1, ...,
Following [Rabiner1989](@cite), we use the following notations:

* let $\pi \in \mathbb{R}^N$ be the initial state distribution $\pi_i = \mathbb{P}(X_1 = i)$
* let $A \in \mathbb{R}^{N \times N}$ be the transition matrix $a_{i,j} = \mathbb{P}(X_{t+1}=j | X_t = i)$
* let $A_t \in \mathbb{R}^{N \times N}$ be the transition matrix $a_{i,j,t} = \mathbb{P}(X_{t+1}=j | X_t = i)$
* let $B \in \mathbb{R}^{N \times T}$ be the matrix of statewise observation likelihoods $b_{i,t} = \mathbb{P}(Y_t | X_t = i)$

The conditioning on the known controls $U_{1:T}$ is implicit throughout.

## Vanilla forward-backward

### Recursion
@@ -33,8 +35,8 @@ and satisfy the dynamic programming equations

```math
\begin{align*}
\alpha_{j,t+1} & = \left(\sum_{i=1}^N \alpha_{i,t} a_{i,j}\right) b_{j,t+1} \\
\beta_{i,t} & = \sum_{j=1}^N a_{i,j} b_{j,t+1} \beta_{j,t+1}
\alpha_{j,t+1} & = \left(\sum_{i=1}^N \alpha_{i,t} a_{i,j,t}\right) b_{j,t+1} \\
\beta_{i,t} & = \sum_{j=1}^N a_{i,j,t} b_{j,t+1} \beta_{j,t+1}
\end{align*}
```

@@ -53,7 +55,7 @@ We notice that
```math
\begin{align*}
\alpha_{i,t} \beta_{i,t} & = \mathbb{P}(Y_{1:T}, X_t=i) \\
\alpha_{i,t} a_{i,j} b_{j,t+1} \beta_{j,t+1} & = \mathbb{P}(Y_{1:T}, X_t=i, X_{t+1}=j)
\alpha_{i,t} a_{i,j,t} b_{j,t+1} \beta_{j,t+1} & = \mathbb{P}(Y_{1:T}, X_t=i, X_{t+1}=j)
\end{align*}
```

@@ -62,7 +64,7 @@ Thus we deduce the one-state and two-state marginals
```math
\begin{align*}
\gamma_{i,t} & = \mathbb{P}(X_t=i | Y_{1:T}) = \frac{1}{\mathcal{L}} \alpha_{i,t} \beta_{i,t} \\
\xi_{i,j,t} & = \mathbb{P}(X_t=i, X_{t+1}=j | Y_{1:T}) = \frac{1}{\mathcal{L}} \alpha_{i,t} a_{i,j} b_{j,t+1} \beta_{j,t+1}
\xi_{i,j,t} & = \mathbb{P}(X_t=i, X_{t+1}=j | Y_{1:T}) = \frac{1}{\mathcal{L}} \alpha_{i,t} a_{i,j,t} b_{j,t+1} \beta_{j,t+1}
\end{align*}
```

@@ -75,7 +77,7 @@ According to [Qin2000](@cite), derivatives of the likelihood can be obtained as
\frac{\partial \mathcal{L}}{\partial \pi_i} &= \beta_{i,1} b_{i,1} \\
\frac{\partial \mathcal{L}}{\partial a_{i,j}} &= \sum_{t=1}^{T-1} \alpha_{i,t} b_{j,t+1} \beta_{j,t+1} \\
\frac{\partial \mathcal{L}}{\partial b_{j,1}} &= \pi_j \beta_{j,1} \\
\frac{\partial \mathcal{L}}{\partial b_{j,t}} &= \left(\sum_{i=1}^N \alpha_{i,t-1} a_{i,j}\right) \beta_{j,t}
\frac{\partial \mathcal{L}}{\partial b_{j,t}} &= \left(\sum_{i=1}^N \alpha_{i,t-1} a_{i,j,t-1}\right) \beta_{j,t}
\end{align*}
```

@@ -98,8 +100,8 @@ and satisfy the dynamic programming equations

```math
\begin{align*}
\hat{\alpha}_{j,t+1} & = \left(\sum_{i=1}^N \bar{\alpha}_{i,t} a_{i,j}\right) \frac{b_{j,t+1}}{m_{t+1}} & c_{t+1} & = \frac{1}{\sum_j \hat{\alpha}_{j,t+1}} & \bar{\alpha}_{j,t+1} = c_{t+1} \hat{\alpha}_{j,t+1} \\
\hat{\beta}_{i,t} & = \sum_{j=1}^N a_{i,j} \frac{b_{j,t+1}}{m_{t+1}} \bar{\beta}_{j,t+1} & && \bar{\beta}_{j,t} = c_t \hat{\beta}_{j,t}
\hat{\alpha}_{j,t+1} & = \left(\sum_{i=1}^N \bar{\alpha}_{i,t} a_{i,j,t}\right) \frac{b_{j,t+1}}{m_{t+1}} & c_{t+1} & = \frac{1}{\sum_j \hat{\alpha}_{j,t+1}} & \bar{\alpha}_{j,t+1} = c_{t+1} \hat{\alpha}_{j,t+1} \\
\hat{\beta}_{i,t} & = \sum_{j=1}^N a_{i,j,t} \frac{b_{j,t+1}}{m_{t+1}} \bar{\beta}_{j,t+1} & && \bar{\beta}_{j,t} = c_t \hat{\beta}_{j,t}
\end{align*}
```

@@ -140,9 +142,9 @@ We can now express the marginals using scaled variables:
```math
\begin{align*}
\xi_{i,j,t} & = \frac{1}{\mathcal{L}} \alpha_{i,t} a_{i,j} b_{j,t+1} \beta_{j,t+1} \\
&= \frac{1}{\mathcal{L}} \left(\bar{\alpha}_{i,t} \prod_{s=1}^t \frac{m_s}{c_s}\right) a_{i,j} b_{j,t+1} \left(\bar{\beta}_{j,t+1} \frac{1}{c_{t+1}} \prod_{s=t+2}^T \frac{m_s}{c_s}\right) \\
&= \frac{1}{\mathcal{L}} \bar{\alpha}_{i,t} a_{i,j} \frac{b_{j,t+1}}{m_{t+1}} \bar{\beta}_{j,t+1} \left(\prod_{s=1}^T \frac{m_s}{c_s}\right) \\
&= \bar{\alpha}_{i,t} a_{i,j} \frac{b_{j,t+1}}{m_{t+1}} \bar{\beta}_{j,t+1}
&= \frac{1}{\mathcal{L}} \left(\bar{\alpha}_{i,t} \prod_{s=1}^t \frac{m_s}{c_s}\right) a_{i,j,t} b_{j,t+1} \left(\bar{\beta}_{j,t+1} \frac{1}{c_{t+1}} \prod_{s=t+2}^T \frac{m_s}{c_s}\right) \\
&= \frac{1}{\mathcal{L}} \bar{\alpha}_{i,t} a_{i,j,t} \frac{b_{j,t+1}}{m_{t+1}} \bar{\beta}_{j,t+1} \left(\prod_{s=1}^T \frac{m_s}{c_s}\right) \\
&= \bar{\alpha}_{i,t} a_{i,j,t} \frac{b_{j,t+1}}{m_{t+1}} \bar{\beta}_{j,t+1}
\end{align*}
```

@@ -179,10 +181,10 @@ And for the statewise observation likelihoods,

```math
\begin{align*}
\frac{\partial \mathcal{L}}{\partial b_{j,t}} &= \left(\sum_{i=1}^N \alpha_{i,t-1} a_{i,j}\right) \beta_{j,t} \\
&= \sum_{i=1}^N \left(\bar{\alpha}_{i,t-1} \prod_{s=1}^{t-1} \frac{m_s}{c_s}\right) a_{i,j} \left(\bar{\beta}_{j,t} \frac{1}{c_t} \prod_{s=t+1}^T \frac{m_s}{c_s} \right) \\
&= \sum_{i=1}^N \bar{\alpha}_{i,t-1} a_{i,j} \bar{\beta}_{j,t} \frac{1}{m_t} \left(\prod_{s=1}^T \frac{m_s}{c_s}\right) \\
&= \mathcal{L} \sum_{i=1}^N \bar{\alpha}_{i,t-1} a_{i,j} \bar{\beta}_{j,t} \frac{1}{m_t} \\
\frac{\partial \mathcal{L}}{\partial b_{j,t}} &= \left(\sum_{i=1}^N \alpha_{i,t-1} a_{i,j,t-1}\right) \beta_{j,t} \\
&= \sum_{i=1}^N \left(\bar{\alpha}_{i,t-1} \prod_{s=1}^{t-1} \frac{m_s}{c_s}\right) a_{i,j,t-1} \left(\bar{\beta}_{j,t} \frac{1}{c_t} \prod_{s=t+1}^T \frac{m_s}{c_s} \right) \\
&= \sum_{i=1}^N \bar{\alpha}_{i,t-1} a_{i,j,t-1} \bar{\beta}_{j,t} \frac{1}{m_t} \left(\prod_{s=1}^T \frac{m_s}{c_s}\right) \\
&= \mathcal{L} \sum_{i=1}^N \bar{\alpha}_{i,t-1} a_{i,j,t-1} \bar{\beta}_{j,t} \frac{1}{m_t} \\
\end{align*}
```

@@ -199,7 +201,7 @@ To sum up,
\frac{\partial \log \mathcal{L}}{\partial \pi_i} &= \frac{b_{i,1}}{m_1} \bar{\beta}_{i,1} \\
\frac{\partial \log \mathcal{L}}{\partial a_{i,j}} &= \sum_{t=1}^{T-1} \bar{\alpha}_{i,t} \frac{b_{j,t+1}}{m_{t+1}} \bar{\beta}_{j,t+1} \\
\frac{\partial \log \mathcal{L}}{\partial \log b_{j,1}} &= \pi_j \frac{b_{j,1}}{m_1} \bar{\beta}_{j,1} = \frac{\bar{\alpha}_{j,1} \bar{\beta}_{j,1}}{c_1} = \gamma_{j,1} \\
\frac{\partial \log \mathcal{L}}{\partial \log b_{j,t}} &= \sum_{i=1}^N \bar{\alpha}_{i,t-1} a_{i,j} \frac{b_{j,t}}{m_t} \bar{\beta}_{j,t} = \frac{\bar{\alpha}_{j,t} \bar{\beta}_{j,t}}{c_t} = \gamma_{j,t}
\frac{\partial \log \mathcal{L}}{\partial \log b_{j,t}} &= \sum_{i=1}^N \bar{\alpha}_{i,t-1} a_{i,j,t-1} \frac{b_{j,t}}{m_t} \bar{\beta}_{j,t} = \frac{\bar{\alpha}_{j,t} \bar{\beta}_{j,t}}{c_t} = \gamma_{j,t}
\end{align*}
```

4 changes: 2 additions & 2 deletions examples/basics.jl
Original file line number Diff line number Diff line change
@@ -30,7 +30,7 @@ Any scalar- or vector-valued distribution from [Distributions.jl](https://github
init = [0.6, 0.4]
trans = [0.7 0.3; 0.3 0.7]
dists = [MvNormal([-0.5, -0.8], I), MvNormal([0.5, 0.8], I)]
hmm = HMM(init, trans, dists);
hmm = HMM(init, trans, dists)

# ## Simulation

@@ -143,7 +143,7 @@ Since it is a local optimization procedure, it requires a starting point that is

init_guess = [0.5, 0.5]
trans_guess = [0.6 0.4; 0.4 0.6]
dists_guess = [MvNormal([-0.6, -0.7], I), MvNormal([0.6, 0.7], I)]
dists_guess = [MvNormal([-0.4, -0.7], I), MvNormal([0.4, 0.7], I)]
hmm_guess = HMM(init_guess, trans_guess, dists_guess);

#=
2 changes: 1 addition & 1 deletion examples/interfaces.jl
Original file line number Diff line number Diff line change
@@ -105,7 +105,7 @@ If we implement `fit!`, Baum-Welch also works seamlessly.

init_guess = [0.5, 0.5]
trans_guess = [0.6 0.4; 0.4 0.6]
dists_guess = [StuffDist(-0.5), StuffDist(+0.5)]
dists_guess = [StuffDist(-0.7), StuffDist(+0.7)]
hmm_guess = HMM(init_guess, trans_guess, dists_guess);

#-
4 changes: 2 additions & 2 deletions examples/types.jl
Original file line number Diff line number Diff line change
@@ -51,7 +51,7 @@ Note that uncertainty on the transition parameters would throw an error because
=#

dists_guess = [Normal(-1.0 ± 0.1), Normal(1.0 ± 0.2)]
hmm_uncertain = HMM(init, trans, dists_guess);
hmm_uncertain = HMM(init, trans, dists_guess)

#=
Every quantity we compute with this new HMM will have propagated uncertainties around it.
@@ -98,7 +98,7 @@ trans = sparse([

init = [0.2, 0.6, 0.2]
dists = [Normal(-2.0), Normal(0.0), Normal(+2.0)]
hmm = HMM(init, trans, dists);
hmm = HMM(init, trans, dists)

#=
When we simulate it, the transitions outside of the nonzero coefficients simply cannot happen.
4 changes: 2 additions & 2 deletions libs/HMMTest/src/allocations.jl
Original file line number Diff line number Diff line change
@@ -46,8 +46,8 @@ function test_allocations(
)
HMMs.forward_backward!(fb_storage, hmm, obs_seq, control_seq; seq_ends)
allocs_bw = @ballocated fit!(
$hmm_guess, $fb_storage, $obs_seq, $control_seq; seq_ends=$seq_ends
) evals = 1 samples = 1 setup = (hmm_guess = deepcopy($hmm))
hmm_guess_copy, $fb_storage, $obs_seq, $control_seq; seq_ends=$seq_ends
) evals = 1 samples = 1 setup = (hmm_guess_copy = deepcopy($hmm_guess))
@test_broken allocs_bw == 0
end
end
37 changes: 23 additions & 14 deletions libs/HMMTest/src/coherence.jl
Original file line number Diff line number Diff line change
@@ -1,46 +1,55 @@
infnorm(x) = maximum(abs, x)

function are_equal_hmms(
function test_equal_hmms(
hmm1::AbstractHMM,
hmm2::AbstractHMM,
control_seq::AbstractVector;
atol::Real,
init::Bool,
test::Bool,
flip::Bool=false,
)
equal_check = true

if init
init1 = initialization(hmm1)
init2 = initialization(hmm2)
test && @test isapprox(init1, init2; atol, norm=infnorm)
equal_check = equal_check && isapprox(init1, init2; atol, norm=infnorm)
if flip
@test !isapprox(init1, init2; atol, norm=infnorm)
else
@test isapprox(init1, init2; atol, norm=infnorm)
end
end

for control in control_seq
trans1 = transition_matrix(hmm1, control)
trans2 = transition_matrix(hmm2, control)
test && @test isapprox(trans1, trans2; atol, norm=infnorm)
equal_check = equal_check && isapprox(trans1, trans2; atol, norm=infnorm)
if flip
@test !isapprox(trans1, trans2; atol, norm=infnorm)
else
@test isapprox(trans1, trans2; atol, norm=infnorm)
end
end

for control in control_seq
dists1 = obs_distributions(hmm1, control)
dists2 = obs_distributions(hmm2, control)
for (dist1, dist2) in zip(dists1, dists2)
for field in fieldnames(typeof(dist1))
if startswith(string(field), "log")
if startswith(string(field), "log") ||
contains("σ", string(field)) ||
contains("Σ", string(field))
continue
end
x1 = getfield(dist1, field)
x2 = getfield(dist2, field)
test && @test isapprox(x1, x2; atol, norm=infnorm)
equal_check = equal_check && isapprox(x1, x2; atol, norm=infnorm)
if flip
@test !isapprox(x1, x2; atol, norm=infnorm)
else
@test isapprox(x1, x2; atol, norm=infnorm)
end
end
end
end

return equal_check
return nothing
end

function test_coherent_algorithms(
@@ -81,8 +90,8 @@ function test_coherent_algorithms(
if !isnothing(hmm_guess)
hmm_est, logL_evolution = baum_welch(hmm_guess, obs_seq, control_seq; seq_ends)
@test all(>=(0), diff(logL_evolution))
@test !are_equal_hmms(hmm, hmm_guess, control_seq[1:2]; atol, init, test=false)
are_equal_hmms(hmm, hmm_est, control_seq[1:2]; atol, init, test=true)
test_equal_hmms(hmm, hmm_guess, control_seq[1:2]; atol, init, flip=true)
test_equal_hmms(hmm, hmm_est, control_seq[1:2]; atol, init)
end
end
end
10 changes: 2 additions & 8 deletions libs/HMMTest/src/hmmbase.jl
Original file line number Diff line number Diff line change
@@ -50,14 +50,8 @@ function test_identical_hmmbase(
@test isapprox(
logL_evolution[(begin + 1):end], 2 * logL_evolution_base[begin:(end - 1)]
)
are_equal_hmms(
hmm_est,
HMM(hmm_est_base.a, hmm_est_base.A, hmm_est_base.B),
[nothing];
atol,
init=true,
test=true,
)
hmm_est_base_converted = HMM(hmm_est_base.a, hmm_est_base.A, hmm_est_base.B)
test_equal_hmms(hmm_est, hmm_est_base_converted, [nothing]; atol, init=true)
end
end
end
7 changes: 7 additions & 0 deletions src/types/hmm.jl
Original file line number Diff line number Diff line change
@@ -26,6 +26,13 @@ function Base.copy(hmm::HMM)
return HMM(copy(hmm.init), copy(hmm.trans), copy(hmm.dists))
end

function Base.show(io::IO, hmm::HMM)
return print(
io,
"Hidden Markov Model with:\n - initialization: $(hmm.init)\n - transition matrix: $(hmm.trans)\n - observation distributions: $(hmm.dists)",
)
end

initialization(hmm::HMM) = hmm.init
transition_matrix(hmm::HMM) = hmm.trans
obs_distributions(hmm::HMM) = hmm.dists
1 change: 1 addition & 0 deletions src/utils/linalg.jl
Original file line number Diff line number Diff line change
@@ -18,6 +18,7 @@ function mul_rows_cols!(
@argcheck size(B) == size(A) == (length(l), length(r))
@argcheck nnz(B) == nnz(A)
for j in axes(B, 2)
@argcheck nzrange(B, j) == nzrange(A, j)
for k in nzrange(B, j)
i = B.rowval[k]
B.nzval[k] = l[i] * A.nzval[k] * r[j]
8 changes: 3 additions & 5 deletions test/correctness.jl
Original file line number Diff line number Diff line change
@@ -18,8 +18,8 @@ T, K = 100, 200
init = [0.4, 0.6]
init_guess = [0.5, 0.5]

trans = [0.8 0.2; 0.2 0.8]
trans_guess = [0.7 0.3; 0.3 0.7]
trans = [0.7 0.3; 0.3 0.7]
trans_guess = [0.6 0.4; 0.4 0.6]

p = [[0.8, 0.2], [0.2, 0.8]]
p_guess = [[0.7, 0.3], [0.3, 0.7]]
@@ -58,9 +58,7 @@ end
hmm_guess = HMM(init_guess, trans_guess, dists_guess)

test_identical_hmmbase(rng, hmm, T; hmm_guess)
@test_skip test_coherent_algorithms(
rng, hmm, control_seq; seq_ends, hmm_guess, init=false
)
test_coherent_algorithms(rng, hmm, control_seq; seq_ends, hmm_guess, init=false)
test_type_stability(rng, hmm, control_seq; seq_ends, hmm_guess)
end

0 comments on commit b56d1b2

Please sign in to comment.