From ca9615bcda1e65abe3e38cb474b035015809173c Mon Sep 17 00:00:00 2001
From: szhan <shing.zhan@gmail.com>
Date: Fri, 19 Apr 2024 09:02:34 +0100
Subject: [PATCH] Refactor

---
 lshmm/api.py                               | 108 ++++---
 lshmm/core.py                              |  85 ++++++
 lshmm/{forward_backward => }/fb_diploid.py | 256 ++++++----------
 lshmm/{forward_backward => }/fb_haploid.py |  49 ++-
 lshmm/forward_backward/__init__.py         |   0
 lshmm/vit_diploid.py                       | 331 ++++++++-------------
 lshmm/vit_haploid.py                       | 155 +++++-----
 tests/lsbase.py                            | 296 ++++++++++++++++++
 tests/test_API.py                          | 305 ++++---------------
 tests/test_API_multiallelic.py             | 213 ++-----------
 tests/test_LS_haploid_diploid.py           | 105 +++----
 11 files changed, 911 insertions(+), 992 deletions(-)
 create mode 100644 lshmm/core.py
 rename lshmm/{forward_backward => }/fb_diploid.py (61%)
 rename lshmm/{forward_backward => }/fb_haploid.py (53%)
 delete mode 100644 lshmm/forward_backward/__init__.py
 create mode 100644 tests/lsbase.py

diff --git a/lshmm/api.py b/lshmm/api.py
index f917ecf..f6e7946 100644
--- a/lshmm/api.py
+++ b/lshmm/api.py
@@ -4,8 +4,15 @@
 
 import numpy as np
 
-from .forward_backward.fb_diploid import backward_ls_dip_loop, forward_ls_dip_loop
-from .forward_backward.fb_haploid import backwards_ls_hap, forwards_ls_hap
+from . import core
+from .fb_diploid import (
+    backward_ls_dip_loop,
+    forward_ls_dip_loop,
+)
+from .fb_haploid import (
+    backwards_ls_hap,
+    forwards_ls_hap,
+)
 from .vit_diploid import (
     backwards_viterbi_dip,
     forwards_viterbi_dip_low_mem,
@@ -18,15 +25,6 @@
     path_ll_hap,
 )
 
-EQUAL_BOTH_HOM = 4
-UNEQUAL_BOTH_HOM = 0
-BOTH_HET = 7
-REF_HOM_OBS_HET = 1
-REF_HET_OBS_HOM = 2
-MISSING_INDEX = 3
-
-MISSING = -1
-
 
 def check_alleles(alleles, m):
     """
@@ -52,7 +50,7 @@ def check_alleles(alleles, m):
     if isinstance(alleles[0], str):
         return np.int8([len(alleles) for _ in range(m)])
     # Otherwise, process allele lists.
-    exclusion_set = np.array([MISSING])
+    exclusion_set = np.array([core.MISSING])
     n_alleles = np.zeros(num_sites, dtype=np.int8)
     for i in range(num_sites):
         uniq_alleles = np.unique(alleles[i])
@@ -68,7 +66,7 @@ def checks(
     scale_mutation_based_on_n_alleles,
 ):
     """
-    Checks that the input data and parameters are valid.
+    Check that the input data and parameters are valid.
 
     The reference panel must be a matrix of size (m, n) or (m, n, n).
     The query must be a matrix of size (k, m) or (k, m, 2).
@@ -77,17 +75,21 @@ def checks(
     n = number of samples in the reference panel (haplotypes, not individuals).
     k = number of samples in the query (haplotypes, not individuals).
 
+    The mutation rate can be scaled according to the set of alleles
+    that can be mutated to based on the number of distinct alleles at each site.
+
     :param numpy.ndarray(dtype=int) reference_panel: Matrix of size (m, n) or (m, n, n).
     :param numpy.ndarray(dtype=int) query: Matrix of size (k, m) or (k, m, 2).
     :param numpy.ndarray(dtype=float) p_mutation: Scalar or vector of length m.
     :param numpy.ndarray(dtype=float) p_recombination: Scalar or vector of length m.
-    :param bool scale_mutation_based_on_n_alleles: Whether to scale the mutation probability to the set of alleles that can be mutated to based on the number of alleles (True) or not (False).
+    :param bool scale_mutation_based_on_n_alleles: Scale the mutation probability or not.
     :return: n, m, ploidy
     :rtype: tuple
     """
     # Check reference panel
     if not len(reference_panel.shape) in (2, 3):
-        raise ValueError("Reference panel array must have 2 or 3 dimensions.")
+        err_msg = "Reference panel array must have 2 or 3 dimensions."
+        raise ValueError(err_msg)
 
     if len(reference_panel.shape) == 2:
         m, n = reference_panel.shape
@@ -97,42 +99,49 @@ def checks(
         ploidy = 2
 
     if ploidy == 2 and (reference_panel.shape[1] != reference_panel.shape[2]):
-        raise ValueError(
-            "Reference_panel dimensions are incorrect, perhaps a sample x sample x variant matrix was passed. Expected sites x samples x samples."
+        err_msg = (
+            "Reference_panel dimensions are incorrect, "
+            "perhaps a sample x sample x variant matrix was passed. "
+            "Expected sites x samples x samples."
         )
+        raise ValueError(err_msg)
 
     # Check query sequence(s)
     if query.shape[1] != m:
-        raise ValueError(
-            "Number of sites in query does not match reference panel. If haploid, ensure a sites x samples matrix is passed."
+        err_msg = (
+            "Number of sites in query does not match reference panel. "
+            "If haploid, ensure a sites x samples matrix is passed."
         )
+        raise ValueError(err_msg)
 
-    # Ensure that the mutation probability is either a scalar or vector of length m
+    # Ensure that the mutation probability is either a scalar or vector of length m.
     if isinstance(p_mutation, (int, float)):
         if not scale_mutation_based_on_n_alleles:
-            warnings.warn(
-                "Passed a scalar probability of mutation, but not rescaling this probability of mutation conditional on the number of alleles at the site."
-            )
+            warn_msg = "Passed a scalar mutation probability, but not rescaling it."
+            warnings.warn(warn_msg)
     elif isinstance(p_mutation, np.ndarray) and p_mutation.shape[0] == m:
         if scale_mutation_based_on_n_alleles:
-            warnings.warn(
-                "Passed a vector of probabilities of mutation, but rescaling each mutation probability conditional on the number of alleles at each site."
-            )
+            warn_msg = "Passed a vector of mutation probabilities. Rescaling them."
+            warnings.warn(warn_msg)
     elif p_mutation is None:
-        warnings.warn(
-            "No mutation probability passed, setting mutation probability based on Li and Stephens 2003, equations (A2) and (A3)"
+        warn_msg = (
+            "No mutation probability passed. "
+            "Setting it based on Li & Stephens (2003) equations A2 and A3."
         )
+        warnings.warn(warn_msg)
     else:
-        raise ValueError(
-            f"Mutation probability is not None, a scalar, or vector of length m: {m}"
+        err_msg = (
+            f"Mutation probability is not None, a scalar, or vector of length {m}."
         )
+        raise ValueError(err_msg)
 
     # Ensure that the recombination probability is either a scalar or a vector of length m
     if not (
         isinstance(p_recombination, (int, float))
         or (isinstance(p_recombination, np.ndarray) and p_recombination.shape[0] == m)
     ):
-        raise ValueError(f"p_Recombination is not a scalar or vector of length m: {m}")
+        err_msg = f"Recombination probability is not a scalar or vector of length {m}."
+        raise ValueError(err_msg)
 
     return (n, m, ploidy)
 
@@ -148,9 +157,10 @@ def set_emission_probabilities(
     scale_mutation_based_on_n_alleles,
 ):
     # Check alleles should go in here, and modify e before passing to the algorithm
-    # If alleles is not passed, we don't perform a test of alleles, but set n_alleles based on the reference_panel.
+    # If alleles is not passed, we don't perform a test of alleles,
+    # but set n_alleles based on the reference_panel.
     if alleles is None:
-        exclusion_set = np.array([MISSING])
+        exclusion_set = np.array([core.MISSING])
         n_alleles = np.zeros(m, dtype=np.int8)
         for j in range(reference_panel.shape[0]):
             uniq_alleles = np.unique(np.append(reference_panel[j, :], query[:, j]))
@@ -159,7 +169,7 @@ def set_emission_probabilities(
         n_alleles = check_alleles(alleles, m)
 
     if p_mutation is None:
-        # Set the mutation probability to be the proposed mutation probability in Li and Stephens (2003).
+        # Set the mutation probability to be the proposed mutation probability in Li & Stephens (2003).
         theta_tilde = 1 / np.sum([1 / k for k in range(1, n - 1)])
         p_mutation = 0.5 * (theta_tilde / (n + theta_tilde))
 
@@ -172,15 +182,18 @@ def set_emission_probabilities(
         e = np.zeros((m, 2))
 
         if scale_mutation_based_on_n_alleles:
-            # Scale mutation based on the number of alleles - so p_mutation is probability of mutation any given one of the alleles.
+            # Scale mutation based on the number of alleles,
+            # so p_mutation is probability of mutation any given one of the alleles.
             # The overall mutation probability is then (n_alleles - 1) * p_mutation.
             e[:, 0] = p_mutation - p_mutation * np.equal(
                 n_alleles, np.ones(m)
             )  # Added boolean in case we're at an invariant site
             e[:, 1] = 1 - (n_alleles - 1) * p_mutation
         else:
-            # No scaling based on the number of alleles - so p_mutation is the probability of mutation to anything
-            # (summing over the states we can switch to). This means that we must rescale the probability of mutation to
+            # No scaling based on the number of alleles,
+            # so p_mutation is the probability of mutation to anything
+            # (summing over the states we can switch to).
+            # This means that we must rescale the probability of mutation to
             # a different allele by the number of alleles at the site.
             for j in range(m):
                 if n_alleles[j] == 1:  # In case we're at an invariant site
@@ -194,12 +207,12 @@ def set_emission_probabilities(
         # Evaluate emission probabilities here, using the mutation probability - this can take a scalar or vector.
         # DEV: there's a wrinkle here.
         e = np.zeros((m, 8))
-        e[:, EQUAL_BOTH_HOM] = (1 - p_mutation) ** 2
-        e[:, UNEQUAL_BOTH_HOM] = p_mutation**2
-        e[:, BOTH_HET] = (1 - p_mutation) ** 2 + p_mutation**2
-        e[:, REF_HOM_OBS_HET] = 2 * p_mutation * (1 - p_mutation)
-        e[:, REF_HET_OBS_HOM] = p_mutation * (1 - p_mutation)
-        e[:, MISSING_INDEX] = 1
+        e[:, core.EQUAL_BOTH_HOM] = (1 - p_mutation) ** 2
+        e[:, core.UNEQUAL_BOTH_HOM] = p_mutation**2
+        e[:, core.BOTH_HET] = (1 - p_mutation) ** 2 + p_mutation**2
+        e[:, core.REF_HOM_OBS_HET] = 2 * p_mutation * (1 - p_mutation)
+        e[:, core.REF_HET_OBS_HOM] = p_mutation * (1 - p_mutation)
+        e[:, core.MISSING_INDEX] = 1
 
     return e
 
@@ -233,8 +246,7 @@ def forwards(
     norm=True,
 ):
     """
-    Run the Li and Stephens forwards algorithm on haplotype or
-    unphased genotype data.
+    Run the Li & Stephens forwards algorithm on haplotype or unphased genotype data.
     """
     n, m, ploidy = checks(
         reference_panel,
@@ -281,8 +293,7 @@ def backwards(
     scale_mutation_based_on_n_alleles=True,
 ):
     """
-    Run the Li and Stephens backwards algorithm on haplotype or
-    unphased genotype data.
+    Run the Li & Stephens backwards algorithm on haplotype or unphased genotype data.
     """
     n, m, ploidy = checks(
         reference_panel,
@@ -330,8 +341,7 @@ def viterbi(
     scale_mutation_based_on_n_alleles=True,
 ):
     """
-    Run the Li and Stephens Viterbi algorithm on haplotype or
-    unphased genotype data.
+    Run the Li & Stephens Viterbi algorithm on haplotype or unphased genotype data.
     """
     n, m, ploidy = checks(
         reference_panel,
diff --git a/lshmm/core.py b/lshmm/core.py
new file mode 100644
index 0000000..30f86d7
--- /dev/null
+++ b/lshmm/core.py
@@ -0,0 +1,85 @@
+import numpy as np
+
+from lshmm import jit
+
+
+EQUAL_BOTH_HOM = 4
+UNEQUAL_BOTH_HOM = 0
+BOTH_HET = 7
+REF_HOM_OBS_HET = 1
+REF_HET_OBS_HOM = 2
+MISSING_INDEX = 3
+
+MISSING = -1
+
+
+""" Helper functions. """
+
+
+# https://github.com/numba/numba/issues/1269
+@jit.numba_njit
+def np_apply_along_axis(func1d, axis, arr):
+    """Create Numpy-like functions for max, sum, etc."""
+    assert arr.ndim == 2
+    assert axis in [0, 1]
+    if axis == 0:
+        result = np.empty(arr.shape[1])
+        for i in range(len(result)):
+            result[i] = func1d(arr[:, i])
+    else:
+        result = np.empty(arr.shape[0])
+        for i in range(len(result)):
+            result[i] = func1d(arr[i, :])
+    return result
+
+
+@jit.numba_njit
+def np_amax(array, axis):
+    """Numba implementation of Numpy-vectorised max."""
+    return np_apply_along_axis(np.amax, axis, array)
+
+
+@jit.numba_njit
+def np_sum(array, axis):
+    """Numba implementation of Numpy-vectorised sum."""
+    return np_apply_along_axis(np.sum, axis, array)
+
+
+@jit.numba_njit
+def np_argmax(array, axis):
+    """Numba implementation of Numpy-vectorised argmax."""
+    return np_apply_along_axis(np.argmax, axis, array)
+
+
+""" Functions used across different implementations of the LS HMM. """
+
+
+@jit.numba_njit
+def get_index_in_emission_matrix(ref_allele, query_allele):
+    is_allele_match = np.equal(ref_allele, query_allele)
+    is_query_missing = query_allele == MISSING
+    if is_allele_match or is_query_missing:
+        return 1
+    return 0
+
+
+@jit.numba_njit
+def get_index_in_emission_matrix_diploid(ref_allele, query_allele):
+    if query_allele == MISSING:
+        return MISSING_INDEX
+    else:
+        is_match = ref_allele == query_allele
+        is_ref_one = ref_allele == 1
+        is_query_one = query_allele == 1
+        return 4 * is_match + 2 * is_ref_one + is_query_one
+
+
+@jit.numba_njit
+def get_index_in_emission_matrix_diploid_G(ref_G, query_allele, n):
+    if query_allele == MISSING:
+        return MISSING_INDEX * np.ones((n, n), dtype=np.int64)
+    else:
+        is_match = ref_G == query_allele
+        is_ref_one = ref_G == 1
+        is_query_one = query_allele == 1
+        return 4 * is_match + 2 * is_ref_one + is_query_one
diff --git a/lshmm/forward_backward/fb_diploid.py b/lshmm/fb_diploid.py
similarity index 61%
rename from lshmm/forward_backward/fb_diploid.py
rename to lshmm/fb_diploid.py
index 50ffe12..84c1a25 100644
--- a/lshmm/forward_backward/fb_diploid.py
+++ b/lshmm/fb_diploid.py
@@ -1,74 +1,29 @@
-"""Collection of functions to run forwards and backwards algorithms on haploid genotype data, where the data is structured as variants x samples."""
+"""
+Various implementations of the Li & Stephens forwards-backwards algorithm on diploid genotype data,
+where the data is structured as variants x samples x samples.
+"""
+
 
 import numpy as np
 
+from lshmm import core
 from lshmm import jit
 
-EQUAL_BOTH_HOM = 4
-UNEQUAL_BOTH_HOM = 0
-BOTH_HET = 7
-REF_HOM_OBS_HET = 1
-REF_HET_OBS_HOM = 2
-
-MISSING = -1
-MISSING_INDEX = 3
-
-
-# https://github.com/numba/numba/issues/1269
-@jit.numba_njit
-def np_apply_along_axis(func1d, axis, arr):
-    """Create numpy-like functions for max, sum etc."""
-    assert arr.ndim == 2
-    assert axis in [0, 1]
-    if axis == 0:
-        result = np.empty(arr.shape[1])
-        for i in range(len(result)):
-            result[i] = func1d(arr[:, i])
-    else:
-        result = np.empty(arr.shape[0])
-        for i in range(len(result)):
-            result[i] = func1d(arr[i, :])
-    return result
-
-
-@jit.numba_njit
-def np_amax(array, axis):
-    """Numba implementation of numpy vectorised maximum."""
-    return np_apply_along_axis(np.amax, axis, array)
-
-
-@jit.numba_njit
-def np_sum(array, axis):
-    """Numba implementation of numpy vectorised sum."""
-    return np_apply_along_axis(np.sum, axis, array)
-
-
-@jit.numba_njit
-def np_argmax(array, axis):
-    """Numba implementation of numpy vectorised argmax."""
-    return np_apply_along_axis(np.argmax, axis, array)
-
 
 def forwards_ls_dip(n, m, G, s, e, r, norm=True):
-    """Matrix based diploid LS forward algorithm using numpy vectorisation."""
-    # Initialise the forward tensor
+    """A matrix-based implementation using Numpy vectorisation."""
+    # Initialise
     F = np.zeros((m, n, n))
     F[0, :, :] = 1 / (n**2)
     c = np.ones(m)
     r_n = r / n
 
-    if s[0, 0] == MISSING:
-        index = MISSING_INDEX * np.ones(
-            (n, n), dtype=np.int64
-        )  # We could have chosen anything here, this just implies a multiplication by a constant.
-    else:
-        index = 4 * np.equal(G[0, :, :], s[0, 0]).astype(np.int64) + 2 * (
-            G[0, :, :] == 1
-        ).astype(np.int64)
-        if s[0, 0] == 1:
-            index += 1
-
-    F[0, :, :] *= e[0, index]
+    emission_index = core.get_index_in_emission_matrix_diploid_G(
+        ref_G=G[0, :, :],
+        query_allele=s[0, 0],
+        n=n
+    )
+    F[0, :, :] *= e[0, emission_index]
 
     if norm:
         c[0] = np.sum(F[0, :, :])
@@ -76,15 +31,11 @@ def forwards_ls_dip(n, m, G, s, e, r, norm=True):
 
         # Forwards
         for l in range(1, m):
-            if s[0, l] == MISSING:
-                index = MISSING_INDEX * np.ones((n, n), dtype=np.int64)
-            else:
-                index = 4 * np.equal(G[l, :, :], s[0, l]).astype(np.int64) + 2 * (
-                    G[l, :, :] == 1
-                ).astype(np.int64)
-
-                if s[0, l] == 1:
-                    index += 1
+            emission_index = core.get_index_in_emission_matrix_diploid_G(
+                ref_G=G[l, :, :],
+                query_allele=s[0, l],
+                n=n
+            )
 
             # No change in both
             F[l, :, :] = (1 - r[l]) ** 2 * F[l - 1, :, :]
@@ -93,11 +44,11 @@ def forwards_ls_dip(n, m, G, s, e, r, norm=True):
             F[l, :, :] += (r_n[l]) ** 2
 
             # One changes
-            sum_j = np_sum(F[l - 1, :, :], 0).repeat(n).reshape((-1, n)).T
+            sum_j = core.np_sum(F[l - 1, :, :], 0).repeat(n).reshape((-1, n)).T
             F[l, :, :] += ((1 - r[l]) * r_n[l]) * (sum_j + sum_j.T)
 
             # Emission
-            F[l, :, :] *= e[l, index]
+            F[l, :, :] *= e[l, emission_index]
             c[l] = np.sum(F[l, :, :])
             F[l, :, :] *= 1 / c[l]
 
@@ -105,15 +56,11 @@ def forwards_ls_dip(n, m, G, s, e, r, norm=True):
     else:
         # Forwards
         for l in range(1, m):
-            if s[0, l] == MISSING:
-                index = MISSING_INDEX * np.ones((n, n), dtype=np.int64)
-            else:
-                index = 4 * np.equal(G[l, :, :], s[0, l]).astype(np.int64) + 2 * (
-                    G[l, :, :] == 1
-                ).astype(np.int64)
-
-                if s[0, l] == 1:
-                    index += 1
+            emission_index = core.get_index_in_emission_matrix_diploid_G(
+                ref_G=G[l, :, :],
+                query_allele=s[0, l],
+                n=n
+            )
 
             # No change in both
             F[l, :, :] = (1 - r[l]) ** 2 * F[l - 1, :, :]
@@ -122,12 +69,12 @@ def forwards_ls_dip(n, m, G, s, e, r, norm=True):
             F[l, :, :] += (r_n[l]) ** 2 * np.sum(F[l - 1, :, :])
 
             # One changes
-            sum_j = np_sum(F[l - 1, :, :], 0).repeat(n).reshape((-1, n)).T
+            sum_j = core.np_sum(F[l - 1, :, :], 0).repeat(n).reshape((-1, n)).T
             # sum_j2 = np_sum(F[l - 1, :, :], 1).repeat(n).reshape((-1, n))
             F[l, :, :] += ((1 - r[l]) * r_n[l]) * (sum_j + sum_j.T)
 
             # Emission
-            F[l, :, :] *= e[l, index]
+            F[l, :, :] *= e[l, emission_index]
 
         ll = np.log10(np.sum(F[l, :, :]))
 
@@ -135,39 +82,34 @@ def forwards_ls_dip(n, m, G, s, e, r, norm=True):
 
 
 def backwards_ls_dip(n, m, G, s, e, c, r):
-    """Matrix based diploid LS backward algorithm using numpy vectorisation."""
-    # Initialise the backward tensor
-    B = np.zeros((m, n, n))
-
+    """A matrix-based implementation using Numpy vectorisation."""
     # Initialise
+    B = np.zeros((m, n, n))
     B[m - 1, :, :] = 1
     r_n = r / n
 
     # Backwards
     for l in range(m - 2, -1, -1):
-        if s[0, l + 1] == MISSING:
-            index = MISSING_INDEX * np.ones(
-                (n, n), dtype=np.int64
-            )  # We could have chosen anything here, this just implies a multiplication by a constant.
-        else:
-            index = (
-                4 * np.equal(G[l + 1, :, :], s[0, l + 1]).astype(np.int64)
-                + 2 * (G[l + 1, :, :] == 1).astype(np.int64)
-                + np.int64(s[0, l + 1] == 1)
-            )
+        emission_index = core.get_index_in_emission_matrix_diploid_G(
+            ref_G=G[l + 1, :, :],
+            query_allele=s[0, l + 1],
+            n=n
+        )
 
         # No change in both
         B[l, :, :] = r_n[l + 1] ** 2 * np.sum(
-            e[l + 1, index.reshape((n, n))] * B[l + 1, :, :]
+            e[l + 1, emission_index.reshape((n, n))] * B[l + 1, :, :]
         )
 
         # Both change
         B[l, :, :] += (
-            (1 - r[l + 1]) ** 2 * B[l + 1, :, :] * e[l + 1, index.reshape((n, n))]
+            (1 - r[l + 1]) ** 2 * B[l + 1, :, :] * e[l + 1, emission_index.reshape((n, n))]
         )
 
         # One changes
-        sum_j = np_sum(B[l + 1, :, :] * e[l + 1, index], 0).repeat(n).reshape((-1, n))
+        sum_j = (
+            core.np_sum(B[l + 1, :, :] * e[l + 1, emission_index], 0).repeat(n).reshape((-1, n))
+        )
         B[l, :, :] += ((1 - r[l + 1]) * r_n[l + 1]) * (sum_j + sum_j.T)
         B[l, :, :] *= 1 / c[l + 1]
 
@@ -177,24 +119,20 @@ def backwards_ls_dip(n, m, G, s, e, c, r):
 @jit.numba_njit
 def forward_ls_dip_starting_point(n, m, G, s, e, r):
     """Naive implementation of LS diploid forwards algorithm."""
-    # Initialise the forward tensor
+    # Initialise
     F = np.zeros((m, n, n))
     r_n = r / n
+
     for j1 in range(n):
         for j2 in range(n):
             F[0, j1, j2] = 1 / (n**2)
-            if s[0, 0] == MISSING:
-                index_tmp = MISSING_INDEX
-            else:
-                index_tmp = (
-                    4 * np.int64(np.equal(G[0, j1, j2], s[0, 0]))
-                    + 2 * np.int64((G[0, j1, j2] == 1))
-                    + np.int64(s[0, 0] == 1)
-                )
-            F[0, j1, j2] *= e[0, index_tmp]
+            emission_index = core.get_index_in_emission_matrix_diploid(
+                ref_allele=G[0, j1, j2],
+                query_allele=s[0, 0]
+            )
+            F[0, j1, j2] *= e[0, emission_index]
 
     for l in range(1, m):
-        # Determine the various components
         F_no_change = np.zeros((n, n))
         F_j1_change = np.zeros(n)
         F_j2_change = np.zeros(n)
@@ -231,24 +169,24 @@ def forward_ls_dip_starting_point(n, m, G, s, e, r):
         for j1 in range(n):
             for j2 in range(n):
                 # What is the emission?
-                if s[0, l] == MISSING:
-                    F[l, j1, j2] *= e[l, MISSING_INDEX]
+                if s[0, l] == core.MISSING:
+                    F[l, j1, j2] *= e[l, core.MISSING_INDEX]
                 else:
                     if s[0, l] == 1:
                         # OBS is het
                         if G[l, j1, j2] == 1:  # REF is het
-                            F[l, j1, j2] *= e[l, BOTH_HET]
+                            F[l, j1, j2] *= e[l, core.BOTH_HET]
                         else:  # REF is hom
-                            F[l, j1, j2] *= e[l, REF_HOM_OBS_HET]
+                            F[l, j1, j2] *= e[l, core.REF_HOM_OBS_HET]
                     else:
                         # OBS is hom
                         if G[l, j1, j2] == 1:  # REF is het
-                            F[l, j1, j2] *= e[l, REF_HET_OBS_HOM]
+                            F[l, j1, j2] *= e[l, core.REF_HET_OBS_HOM]
                         else:  # REF is hom
                             if G[l, j1, j2] == s[0, l]:  # Equal
-                                F[l, j1, j2] *= e[l, EQUAL_BOTH_HOM]
+                                F[l, j1, j2] *= e[l, core.EQUAL_BOTH_HOM]
                             else:  # Unequal
-                                F[l, j1, j2] *= e[l, UNEQUAL_BOTH_HOM]
+                                F[l, j1, j2] *= e[l, core.UNEQUAL_BOTH_HOM]
 
     ll = np.log10(np.sum(F[l, :, :]))
 
@@ -257,16 +195,13 @@ def forward_ls_dip_starting_point(n, m, G, s, e, r):
 
 @jit.numba_njit
 def backward_ls_dip_starting_point(n, m, G, s, e, r):
-    """Naive implementation of LS diploid backwards algorithm."""
-    # Backwards
-    B = np.zeros((m, n, n))
-
+    """A naive implementation."""
     # Initialise
+    B = np.zeros((m, n, n))
     B[m - 1, :, :] = 1
     r_n = r / n
 
     for l in range(m - 2, -1, -1):
-        # Determine the various components
         B_no_change = np.zeros((n, n))
         B_j1_change = np.zeros(n)
         B_j2_change = np.zeros(n)
@@ -274,8 +209,8 @@ def backward_ls_dip_starting_point(n, m, G, s, e, r):
 
         # Evaluate the emission matrix at this site, for all pairs
         e_tmp = np.zeros((n, n))
-        if s[0, l + 1] == MISSING:
-            e_tmp[:, :] = e[l + 1, MISSING_INDEX]
+        if s[0, l + 1] == core.MISSING:
+            e_tmp[:, :] = e[l + 1, core.MISSING_INDEX]
         else:
             for j1 in range(n):
                 for j2 in range(n):
@@ -283,18 +218,18 @@ def backward_ls_dip_starting_point(n, m, G, s, e, r):
                     if s[0, l + 1] == 1:
                         # OBS is het
                         if G[l + 1, j1, j2] == 1:  # REF is het
-                            e_tmp[j1, j2] = e[l + 1, BOTH_HET]
+                            e_tmp[j1, j2] = e[l + 1, core.BOTH_HET]
                         else:  # REF is hom
-                            e_tmp[j1, j2] = e[l + 1, REF_HOM_OBS_HET]
+                            e_tmp[j1, j2] = e[l + 1, core.REF_HOM_OBS_HET]
                     else:
                         # OBS is hom
                         if G[l + 1, j1, j2] == 1:  # REF is het
-                            e_tmp[j1, j2] = e[l + 1, REF_HET_OBS_HOM]
+                            e_tmp[j1, j2] = e[l + 1, core.REF_HET_OBS_HOM]
                         else:  # REF is hom
                             if G[l + 1, j1, j2] == s[0, l + 1]:  # Equal
-                                e_tmp[j1, j2] = e[l + 1, EQUAL_BOTH_HOM]
+                                e_tmp[j1, j2] = e[l + 1, core.EQUAL_BOTH_HOM]
                             else:  # Unequal
-                                e_tmp[j1, j2] = e[l + 1, UNEQUAL_BOTH_HOM]
+                                e_tmp[j1, j2] = e[l + 1, core.UNEQUAL_BOTH_HOM]
 
         for j1 in range(n):
             for j2 in range(n):
@@ -335,21 +270,17 @@ def backward_ls_dip_starting_point(n, m, G, s, e, r):
 
 @jit.numba_njit
 def forward_ls_dip_loop(n, m, G, s, e, r, norm=True):
-    """LS diploid forwards algoritm without vectorisation."""
-    # Initialise the forward tensor
+    """LS diploid forwards algorithm without vectorisation."""
+    # Initialise
     F = np.zeros((m, n, n))
     for j1 in range(n):
         for j2 in range(n):
             F[0, j1, j2] = 1 / (n**2)
-            if s[0, 0] == MISSING:
-                index_tmp = MISSING_INDEX
-            else:
-                index_tmp = (
-                    4 * np.int64(np.equal(G[0, j1, j2], s[0, 0]))
-                    + 2 * np.int64((G[0, j1, j2] == 1))
-                    + np.int64(s[0, 0] == 1)
-                )
-            F[0, j1, j2] *= e[0, index_tmp]
+            emission_index = core.get_index_in_emission_matrix_diploid(
+                ref_allele=G[0, j1, j2],
+                query_allele=s[0, 0]
+            )
+            F[0, j1, j2] *= e[0, emission_index]
     r_n = r / n
     c = np.ones(m)
 
@@ -358,7 +289,6 @@ def forward_ls_dip_loop(n, m, G, s, e, r, norm=True):
         F[0, :, :] *= 1 / c[0]
 
         for l in range(1, m):
-            # Determine the various components
             F_no_change = np.zeros((n, n))
             F_j_change = np.zeros(n)
 
@@ -375,8 +305,8 @@ def forward_ls_dip_loop(n, m, G, s, e, r, norm=True):
                 for j2 in range(n):
                     F[l, j1, j2] += F_no_change[j1, j2]
 
-            if s[0, l] == MISSING:
-                F[l, :, :] *= e[l, MISSING_INDEX]
+            if s[0, l] == core.MISSING:
+                F[l, :, :] *= e[l, core.MISSING_INDEX]
             else:
                 for j1 in range(n):
                     for j2 in range(n):
@@ -384,18 +314,18 @@ def forward_ls_dip_loop(n, m, G, s, e, r, norm=True):
                         if s[0, l] == 1:
                             # OBS is het
                             if G[l, j1, j2] == 1:  # REF is het
-                                F[l, j1, j2] *= e[l, BOTH_HET]
+                                F[l, j1, j2] *= e[l, core.BOTH_HET]
                             else:  # REF is hom
-                                F[l, j1, j2] *= e[l, REF_HOM_OBS_HET]
+                                F[l, j1, j2] *= e[l, core.REF_HOM_OBS_HET]
                         else:
                             # OBS is hom
                             if G[l, j1, j2] == 1:  # REF is het
-                                F[l, j1, j2] *= e[l, REF_HET_OBS_HOM]
+                                F[l, j1, j2] *= e[l, core.REF_HET_OBS_HOM]
                             else:  # REF is hom
                                 if G[l, j1, j2] == s[0, l]:  # Equal
-                                    F[l, j1, j2] *= e[l, EQUAL_BOTH_HOM]
+                                    F[l, j1, j2] *= e[l, core.EQUAL_BOTH_HOM]
                                 else:  # Unequal
-                                    F[l, j1, j2] *= e[l, UNEQUAL_BOTH_HOM]
+                                    F[l, j1, j2] *= e[l, core.UNEQUAL_BOTH_HOM]
 
             c[l] = np.sum(F[l, :, :])
             F[l, :, :] *= 1 / c[l]
@@ -404,7 +334,6 @@ def forward_ls_dip_loop(n, m, G, s, e, r, norm=True):
 
     else:
         for l in range(1, m):
-            # Determine the various components
             F_no_change = np.zeros((n, n))
             F_j1_change = np.zeros(n)
             F_j2_change = np.zeros(n)
@@ -425,8 +354,8 @@ def forward_ls_dip_loop(n, m, G, s, e, r, norm=True):
                 for j2 in range(n):
                     F[l, j1, j2] += F_no_change[j1, j2]
 
-            if s[0, l] == MISSING:
-                F[l, :, :] *= e[l, MISSING_INDEX]
+            if s[0, l] == core.MISSING:
+                F[l, :, :] *= e[l, core.MISSING_INDEX]
             else:
                 for j1 in range(n):
                     for j2 in range(n):
@@ -434,18 +363,18 @@ def forward_ls_dip_loop(n, m, G, s, e, r, norm=True):
                         if s[0, l] == 1:
                             # OBS is het
                             if G[l, j1, j2] == 1:  # REF is het
-                                F[l, j1, j2] *= e[l, BOTH_HET]
+                                F[l, j1, j2] *= e[l, core.BOTH_HET]
                             else:  # REF is hom
-                                F[l, j1, j2] *= e[l, REF_HOM_OBS_HET]
+                                F[l, j1, j2] *= e[l, core.REF_HOM_OBS_HET]
                         else:
                             # OBS is hom
                             if G[l, j1, j2] == 1:  # REF is het
-                                F[l, j1, j2] *= e[l, REF_HET_OBS_HOM]
+                                F[l, j1, j2] *= e[l, core.REF_HET_OBS_HOM]
                             else:  # REF is hom
                                 if G[l, j1, j2] == s[0, l]:  # Equal
-                                    F[l, j1, j2] *= e[l, EQUAL_BOTH_HOM]
+                                    F[l, j1, j2] *= e[l, core.EQUAL_BOTH_HOM]
                                 else:  # Unequal
-                                    F[l, j1, j2] *= e[l, UNEQUAL_BOTH_HOM]
+                                    F[l, j1, j2] *= e[l, core.UNEQUAL_BOTH_HOM]
 
             ll = np.log10(np.sum(F[l, :, :]))
 
@@ -455,13 +384,12 @@ def forward_ls_dip_loop(n, m, G, s, e, r, norm=True):
 @jit.numba_njit
 def backward_ls_dip_loop(n, m, G, s, e, c, r):
     """LS diploid backwards algoritm without vectorisation."""
-    # Initialise the backward tensor
+    # Initialise
     B = np.zeros((m, n, n))
     B[m - 1, :, :] = 1
     r_n = r / n
 
     for l in range(m - 2, -1, -1):
-        # Determine the various components
         B_no_change = np.zeros((n, n))
         B_j1_change = np.zeros(n)
         B_j2_change = np.zeros(n)
@@ -469,8 +397,8 @@ def backward_ls_dip_loop(n, m, G, s, e, c, r):
 
         # Evaluate the emission matrix at this site, for all pairs
         e_tmp = np.zeros((n, n))
-        if s[0, l + 1] == MISSING:
-            e_tmp[:, :] = e[l + 1, MISSING_INDEX]
+        if s[0, l + 1] == core.MISSING:
+            e_tmp[:, :] = e[l + 1, core.MISSING_INDEX]
         else:
             for j1 in range(n):
                 for j2 in range(n):
@@ -479,18 +407,18 @@ def backward_ls_dip_loop(n, m, G, s, e, c, r):
                     if s[0, l + 1] == 1:
                         # OBS is het
                         if G[l + 1, j1, j2] == 1:  # REF is het
-                            e_tmp[j1, j2] = e[l + 1, BOTH_HET]
+                            e_tmp[j1, j2] = e[l + 1, core.BOTH_HET]
                         else:  # REF is hom
-                            e_tmp[j1, j2] = e[l + 1, REF_HOM_OBS_HET]
+                            e_tmp[j1, j2] = e[l + 1, core.REF_HOM_OBS_HET]
                     else:
                         # OBS is hom
                         if G[l + 1, j1, j2] == 1:  # REF is het
-                            e_tmp[j1, j2] = e[l + 1, REF_HET_OBS_HOM]
+                            e_tmp[j1, j2] = e[l + 1, core.REF_HET_OBS_HOM]
                         else:  # REF is hom
                             if G[l + 1, j1, j2] == s[0, l + 1]:  # Equal
-                                e_tmp[j1, j2] = e[l + 1, EQUAL_BOTH_HOM]
+                                e_tmp[j1, j2] = e[l + 1, core.EQUAL_BOTH_HOM]
                             else:  # Unequal
-                                e_tmp[j1, j2] = e[l + 1, UNEQUAL_BOTH_HOM]
+                                e_tmp[j1, j2] = e[l + 1, core.UNEQUAL_BOTH_HOM]
 
         for j1 in range(n):
             for j2 in range(n):
diff --git a/lshmm/forward_backward/fb_haploid.py b/lshmm/fb_haploid.py
similarity index 53%
rename from lshmm/forward_backward/fb_haploid.py
rename to lshmm/fb_haploid.py
index 69d01fc..2541e02 100644
--- a/lshmm/forward_backward/fb_haploid.py
+++ b/lshmm/fb_haploid.py
@@ -1,25 +1,27 @@
-"""Collection of functions to run forwards and backwards algorithms on haploid genotype data, where the data is structured as variants x samples."""
+"""
+Various implementations of the Li & Stephens forwards-backwards algorithm on haploid genotype data,
+where the data is structured as variants x samples.
+"""
 
 import numpy as np
 
+from lshmm import core
 from lshmm import jit
 
-MISSING = -1
-
 
 @jit.numba_njit
 def forwards_ls_hap(n, m, H, s, e, r, norm=True):
-    """Matrix based haploid LS forward algorithm using numpy vectorisation."""
-    # Initialise
+    """A matrix-based implementation using Numpy vectorisation."""
     F = np.zeros((m, n))
     r_n = r / n
 
     if norm:
         c = np.zeros(m)
         for i in range(n):
-            F[0, i] = (
-                1 / n * e[0, np.int64(np.equal(H[0, i], s[0, 0]) or s[0, 0] == MISSING)]
+            emission_index = core.get_index_in_emission_matrix(
+                ref_allele=H[0, i], query_allele=s[0, 0]
             )
+            F[0, i] = 1 / n * e[0, emission_index]
             c[0] += F[0, i]
 
         for i in range(n):
@@ -29,9 +31,10 @@ def forwards_ls_hap(n, m, H, s, e, r, norm=True):
         for l in range(1, m):
             for i in range(n):
                 F[l, i] = F[l - 1, i] * (1 - r[l]) + r_n[l]
-                F[l, i] *= e[
-                    l, np.int64(np.equal(H[l, i], s[0, l]) or s[0, l] == MISSING)
-                ]
+                emission_index = core.get_index_in_emission_matrix(
+                    ref_allele=H[l, i], query_allele=s[0, l]
+                )
+                F[l, i] *= e[l, emission_index]
                 c[l] += F[l, i]
 
             for i in range(n):
@@ -43,17 +46,19 @@ def forwards_ls_hap(n, m, H, s, e, r, norm=True):
         c = np.ones(m)
 
         for i in range(n):
-            F[0, i] = (
-                1 / n * e[0, np.int64(np.equal(H[0, i], s[0, 0]) or s[0, 0] == MISSING)]
+            emission_index = core.get_index_in_emission_matrix(
+                ref_allele=H[0, i], query_allele=s[0, 0]
             )
+            F[0, i] = 1 / n * e[0, emission_index]
 
         # Forwards pass
         for l in range(1, m):
             for i in range(n):
                 F[l, i] = F[l - 1, i] * (1 - r[l]) + np.sum(F[l - 1, :]) * r_n[l]
-                F[l, i] *= e[
-                    l, np.int64(np.equal(H[l, i], s[0, l]) or s[0, l] == MISSING)
-                ]
+                emission_index = core.get_index_in_emission_matrix(
+                    ref_allele=H[l, i], query_allele=s[0, l]
+                )
+                F[l, i] *= e[l, emission_index]
 
         ll = np.log10(np.sum(F[m - 1, :]))
 
@@ -62,8 +67,7 @@ def forwards_ls_hap(n, m, H, s, e, r, norm=True):
 
 @jit.numba_njit
 def backwards_ls_hap(n, m, H, s, e, c, r):
-    """Matrix based haploid LS backward algorithm using numpy vectorisation."""
-    # Initialise
+    """A matrix-based implementation using Numpy vectorisation."""
     B = np.zeros((m, n))
     for i in range(n):
         B[m - 1, i] = 1
@@ -74,15 +78,10 @@ def backwards_ls_hap(n, m, H, s, e, c, r):
         tmp_B = np.zeros(n)
         tmp_B_sum = 0
         for i in range(n):
-            tmp_B[i] = (
-                e[
-                    l + 1,
-                    np.int64(
-                        np.equal(H[l + 1, i], s[0, l + 1]) or s[0, l + 1] == MISSING
-                    ),
-                ]
-                * B[l + 1, i]
+            emission_index = core.get_index_in_emission_matrix(
+                ref_allele=H[l + 1, i], query_allele=s[0, l + 1]
             )
+            tmp_B[i] = e[l + 1, emission_index] * B[l + 1, i]
             tmp_B_sum += tmp_B[i]
         for i in range(n):
             B[l, i] = r_n[l + 1] * tmp_B_sum
diff --git a/lshmm/forward_backward/__init__.py b/lshmm/forward_backward/__init__.py
deleted file mode 100644
index e69de29..0000000
diff --git a/lshmm/vit_diploid.py b/lshmm/vit_diploid.py
index 316b5d0..f28567d 100644
--- a/lshmm/vit_diploid.py
+++ b/lshmm/vit_diploid.py
@@ -1,82 +1,39 @@
-"""Collection of functions to run Viterbi algorithms on dipoid genotype data, where the data is structured as variants x samples."""
+"""
+Various implementations of the Li & Stephens Viterbi algorithm on diploid genotype data,
+where the data is structured as variants x samples x samples.
+"""
 
 import numpy as np
 
+from . import core
 from . import jit
 
-MISSING = -1
-MISSING_INDEX = 3
-
-
-# https://github.com/numba/numba/issues/1269
-@jit.numba_njit
-def np_apply_along_axis(func1d, axis, arr):
-    """Create numpy-like functions for max, sum etc."""
-    assert arr.ndim == 2
-    assert axis in [0, 1]
-    if axis == 0:
-        result = np.empty(arr.shape[1])
-        for i in range(len(result)):
-            result[i] = func1d(arr[:, i])
-    else:
-        result = np.empty(arr.shape[0])
-        for i in range(len(result)):
-            result[i] = func1d(arr[i, :])
-    return result
-
-
-@jit.numba_njit
-def np_amax(array, axis):
-    """Numba implementation of numpy vectorised maximum."""
-    return np_apply_along_axis(np.amax, axis, array)
-
-
-@jit.numba_njit
-def np_sum(array, axis):
-    """Numba implementation of numpy vectorised sum."""
-    return np_apply_along_axis(np.sum, axis, array)
-
-
-@jit.numba_njit
-def np_argmax(array, axis):
-    """Numba implementation of numpy vectorised argmax."""
-    return np_apply_along_axis(np.argmax, axis, array)
-
 
 @jit.numba_njit
 def forwards_viterbi_dip_naive(n, m, G, s, e, r):
-    """Naive implementation of LS diploid Viterbi algorithm."""
+    """A naive implementation."""
     # Initialise
     V = np.zeros((m, n, n))
-    P = np.zeros((m, n, n)).astype(np.int64)
+    P = np.zeros((m, n, n), dtype=np.int64)
     c = np.ones(m)
     r_n = r / n
 
     for j1 in range(n):
         for j2 in range(n):
-            if s[0, 0] == MISSING:
-                index_tmp = MISSING_INDEX
-            else:
-                index_tmp = (
-                    4 * np.int64(np.equal(G[0, j1, j2], s[0, 0]))
-                    + 2 * np.int64((G[0, j1, j2] == 1))
-                    + np.int64(s[0, 0] == 1)
-                )
-            V[0, j1, j2] = 1 / (n**2) * e[0, index_tmp]
+            emission_index = core.get_index_in_emission_matrix_diploid(
+                ref_allele=G[0, j1, j2], query_allele=s[0, 0]
+            )
+            V[0, j1, j2] = 1 / (n**2) * e[0, emission_index]
 
     for l in range(1, m):
-        if s[0, l] == MISSING:
-            index = MISSING_INDEX * np.ones((n, n), dtype=np.int64)
-        else:
-            index = (
-                4 * np.equal(G[l, :, :], s[0, l]).astype(np.int64)
-                + 2 * (G[l, :, :] == 1).astype(np.int64)
-                + np.int64(s[0, l] == 1)
-            )
+        emission_index = core.get_index_in_emission_matrix_diploid_G(
+            ref_G=G[l, :, :],
+            query_allele=s[0, l],
+            n=n,
+        )
 
         for j1 in range(n):
             for j2 in range(n):
-                # Get the vector to maximise over
                 v = np.zeros((n, n))
                 for k1 in range(n):
                     for k2 in range(n):
@@ -89,7 +46,7 @@ def forwards_viterbi_dip_naive(n, m, G, s, e, r):
                             v[k1, k2] *= r_n[l] * (1 - r[l]) + r_n[l] ** 2
                         else:
                             v[k1, k2] *= r_n[l] ** 2
-                V[l, j1, j2] = np.amax(v) * e[l, index[j1, j2]]
+                V[l, j1, j2] = np.amax(v) * e[l, emission_index[j1, j2]]
                 P[l, j1, j2] = np.argmax(v)
         c[l] = np.amax(V[l, :, :])
         V[l, :, :] *= 1 / c[l]
@@ -101,44 +58,37 @@ def forwards_viterbi_dip_naive(n, m, G, s, e, r):
 
 @jit.numba_njit
 def forwards_viterbi_dip_naive_low_mem(n, m, G, s, e, r):
-    """Naive implementation of LS diploid Viterbi algorithm, with reduced memory."""
+    """A naive implementation with reduced memory."""
     # Initialise
     V = np.zeros((n, n))
-    V_previous = np.zeros((n, n))
-    P = np.zeros((m, n, n)).astype(np.int64)
+    V_prev = np.zeros((n, n))
+    P = np.zeros((m, n, n), dtype=np.int64)
     c = np.ones(m)
     r_n = r / n
 
     for j1 in range(n):
         for j2 in range(n):
-            if s[0, 0] == MISSING:
-                index_tmp = MISSING_INDEX
-            else:
-                index_tmp = (
-                    4 * np.int64(np.equal(G[0, j1, j2], s[0, 0]))
-                    + 2 * np.int64((G[0, j1, j2] == 1))
-                    + np.int64(s[0, 0] == 1)
-                )
-            V_previous[j1, j2] = 1 / (n**2) * e[0, index_tmp]
+            emission_index = core.get_index_in_emission_matrix_diploid(
+                ref_allele=G[0, j1, j2], query_allele=s[0, 0]
+            )
+            V_prev[j1, j2] = 1 / (n**2) * e[0, emission_index]
 
-    # Take a look at Haploid Viterbi implementation in Jeromes code and see if we can pinch some ideas.
+    # Take a look at the haploid Viterbi implementation in Jerome's code, and
+    # see if we can pinch some ideas.
     # Diploid Viterbi, with smaller memory footprint.
     for l in range(1, m):
-        if s[0, l] == MISSING:
-            index = MISSING_INDEX * np.ones((n, n), dtype=np.int64)
-        else:
-            index = (
-                4 * np.equal(G[l, :, :], s[0, l]).astype(np.int64)
-                + 2 * (G[l, :, :] == 1).astype(np.int64)
-                + np.int64(s[0, l] == 1)
-            )
+        emission_index = core.get_index_in_emission_matrix_diploid_G(
+            ref_G=G[l, :, :],
+            query_allele=s[0, l],
+            n=n,
+        )
+
         for j1 in range(n):
             for j2 in range(n):
-                # Get the vector to maximise over
                 v = np.zeros((n, n))
                 for k1 in range(n):
                     for k2 in range(n):
-                        v[k1, k2] = V_previous[k1, k2]
+                        v[k1, k2] = V_prev[k1, k2]
                         if (k1 == j1) and (k2 == j2):
                             v[k1, k2] *= (
                                 (1 - r[l]) ** 2 + 2 * (1 - r[l]) * r_n[l] + r_n[l] ** 2
@@ -147,10 +97,10 @@ def forwards_viterbi_dip_naive_low_mem(n, m, G, s, e, r):
                             v[k1, k2] *= r_n[l] * (1 - r[l]) + r_n[l] ** 2
                         else:
                             v[k1, k2] *= r_n[l] ** 2
-                V[j1, j2] = np.amax(v) * e[l, index[j1, j2]]
+                V[j1, j2] = np.amax(v) * e[l, emission_index[j1, j2]]
                 P[l, j1, j2] = np.argmax(v)
         c[l] = np.amax(V)
-        V_previous = np.copy(V) / c[l]
+        V_prev = np.copy(V) / c[l]
 
     ll = np.sum(np.log10(c))
 
@@ -159,43 +109,35 @@ def forwards_viterbi_dip_naive_low_mem(n, m, G, s, e, r):
 
 @jit.numba_njit
 def forwards_viterbi_dip_low_mem(n, m, G, s, e, r):
-    """LS diploid Viterbi algorithm, with reduced memory."""
+    """An implementation with reduced memory."""
     # Initialise
     V = np.zeros((n, n))
-    V_previous = np.zeros((n, n))
-    P = np.zeros((m, n, n)).astype(np.int64)
+    V_prev = np.zeros((n, n))
+    P = np.zeros((m, n, n), dtype=np.int64)
     c = np.ones(m)
     r_n = r / n
 
     for j1 in range(n):
         for j2 in range(n):
-            if s[0, 0] == MISSING:
-                index_tmp = MISSING_INDEX
-            else:
-                index_tmp = (
-                    4 * np.int64(np.equal(G[0, j1, j2], s[0, 0]))
-                    + 2 * np.int64((G[0, j1, j2] == 1))
-                    + np.int64(s[0, 0] == 1)
-                )
-            V_previous[j1, j2] = 1 / (n**2) * e[0, index_tmp]
+            emission_index = core.get_index_in_emission_matrix_diploid(
+                ref_allele=G[0, j1, j2], query_allele=s[0, 0]
+            )
+            V_prev[j1, j2] = 1 / (n**2) * e[0, emission_index]
 
     # Diploid Viterbi, with smaller memory footprint, rescaling, and using the structure of the HMM.
     for l in range(1, m):
-        if s[0, l] == MISSING:
-            index = MISSING_INDEX * np.ones((n, n), dtype=np.int64)
-        else:
-            index = (
-                4 * np.equal(G[l, :, :], s[0, l]).astype(np.int64)
-                + 2 * (G[l, :, :] == 1).astype(np.int64)
-                + np.int64(s[0, l] == 1)
-            )
+        emission_index = core.get_index_in_emission_matrix_diploid_G(
+            ref_G=G[l, :, :],
+            query_allele=s[0, l],
+            n=n,
+        )
 
-        c[l] = np.amax(V_previous)
-        argmax = np.argmax(V_previous)
+        c[l] = np.amax(V_prev)
+        argmax = np.argmax(V_prev)
 
-        V_previous *= 1 / c[l]
-        V_rowcol_max = np_amax(V_previous, 0)
-        arg_rowcol_max = np_argmax(V_previous, 0)
+        V_prev *= 1 / c[l]
+        V_rowcol_max = core.np_amax(V_prev, 0)
+        arg_rowcol_max = core.np_argmax(V_prev, 0)
 
         no_switch = (1 - r[l]) ** 2 + 2 * (r_n[l] * (1 - r[l])) + r_n[l] ** 2
         single_switch = r_n[l] * (1 - r[l]) + r_n[l] ** 2
@@ -215,7 +157,7 @@ def forwards_viterbi_dip_low_mem(n, m, G, s, e, r):
                 else:
                     template_single_switch = arg_rowcol_max[j2] * n + j2
 
-                V[j1, j2] = V_previous[j1, j2] * no_switch  # No switch in either
+                V[j1, j2] = V_prev[j1, j2] * no_switch  # No switch in either
                 P[l, j1, j2] = j1_j2
 
                 # Single or double switch?
@@ -231,9 +173,9 @@ def forwards_viterbi_dip_low_mem(n, m, G, s, e, r):
                         V[j1, j2] = double_switch
                         P[l, j1, j2] = argmax
 
-                V[j1, j2] *= e[l, index[j1, j2]]
+                V[j1, j2] *= e[l, emission_index[j1, j2]]
                 j1_j2 += 1
-        V_previous = np.copy(V)
+        V_prev = np.copy(V)
 
     ll = np.sum(np.log10(c)) + np.log10(np.amax(V))
 
@@ -242,10 +184,10 @@ def forwards_viterbi_dip_low_mem(n, m, G, s, e, r):
 
 @jit.numba_njit
 def forwards_viterbi_dip_low_mem_no_pointer(n, m, G, s, e, r):
-    """LS diploid Viterbi algorithm, with reduced memory."""
+    """An implementation with reduced memory and no pointer."""
     # Initialise
     V = np.zeros((n, n))
-    V_previous = np.zeros((n, n))
+    V_prev = np.zeros((n, n))
     c = np.ones(m)
     r_n = r / n
 
@@ -262,35 +204,27 @@ def forwards_viterbi_dip_low_mem_no_pointer(n, m, G, s, e, r):
 
     for j1 in range(n):
         for j2 in range(n):
-            if s[0, 0] == MISSING:
-                index_tmp = MISSING_INDEX
-            else:
-                index_tmp = (
-                    4 * np.int64(np.equal(G[0, j1, j2], s[0, 0]))
-                    + 2 * np.int64((G[0, j1, j2] == 1))
-                    + np.int64(s[0, 0] == 1)
-                )
-            V_previous[j1, j2] = 1 / (n**2) * e[0, index_tmp]
+            emission_index = core.get_index_in_emission_matrix_diploid(
+                ref_allele=G[0, j1, j2], query_allele=s[0, 0]
+            )
+            V_prev[j1, j2] = 1 / (n**2) * e[0, emission_index]
 
     # Diploid Viterbi, with smaller memory footprint, rescaling, and using the structure of the HMM.
     for l in range(1, m):
-        if s[0, l] == MISSING:
-            index = MISSING_INDEX * np.ones((n, n), dtype=np.int64)
-        else:
-            index = (
-                4 * np.equal(G[l, :, :], s[0, l]).astype(np.int64)
-                + 2 * (G[l, :, :] == 1).astype(np.int64)
-                + np.int64(s[0, l] == 1)
-            )
+        emission_index = core.get_index_in_emission_matrix_diploid_G(
+            ref_G=G[l, :, :],
+            query_allele=s[0, l],
+            n=n,
+        )
 
-        c[l] = np.amax(V_previous)
-        argmax = np.argmax(V_previous)
+        c[l] = np.amax(V_prev)
+        argmax = np.argmax(V_prev)
         V_argmaxes[l - 1] = argmax  # added
 
-        V_previous *= 1 / c[l]
-        V_rowcol_max = np_amax(V_previous, 0)
+        V_prev *= 1 / c[l]
+        V_rowcol_max = core.np_amax(V_prev, 0)
         V_rowcol_maxes[l - 1, :] = V_rowcol_max
-        arg_rowcol_max = np_argmax(V_previous, 0)
+        arg_rowcol_max = core.np_argmax(V_prev, 0)
         V_rowcol_argmaxes[l - 1, :] = arg_rowcol_max
 
         no_switch = (1 - r[l]) ** 2 + 2 * (r_n[l] * (1 - r[l])) + r_n[l] ** 2
@@ -302,7 +236,7 @@ def forwards_viterbi_dip_low_mem_no_pointer(n, m, G, s, e, r):
         for j1 in range(n):
             for j2 in range(n):
                 V_single_switch = max(V_rowcol_max[j1], V_rowcol_max[j2])
-                V[j1, j2] = V_previous[j1, j2] * no_switch  # No switch in either
+                V[j1, j2] = V_prev[j1, j2] * no_switch  # No switch in either
 
                 # Single or double switch?
                 single_switch_tmp = single_switch * V_single_switch
@@ -317,13 +251,13 @@ def forwards_viterbi_dip_low_mem_no_pointer(n, m, G, s, e, r):
                         V[j1, j2] = double_switch
                         recombs_double[l] = np.append(recombs_double[l], values=j1_j2)
 
-                V[j1, j2] *= e[l, index[j1, j2]]
+                V[j1, j2] *= e[l, emission_index[j1, j2]]
                 j1_j2 += 1
-        V_previous = np.copy(V)
+        V_prev = np.copy(V)
 
-    V_argmaxes[m - 1] = np.argmax(V_previous)
-    V_rowcol_maxes[m - 1, :] = np_amax(V_previous, 0)
-    V_rowcol_argmaxes[m - 1, :] = np_argmax(V_previous, 0)
+    V_argmaxes[m - 1] = np.argmax(V_prev)
+    V_rowcol_maxes[m - 1, :] = core.np_amax(V_prev, 0)
+    V_rowcol_argmaxes[m - 1, :] = core.np_argmax(V_prev, 0)
     ll = np.sum(np.log10(c)) + np.log10(np.amax(V))
 
     return (
@@ -339,35 +273,27 @@ def forwards_viterbi_dip_low_mem_no_pointer(n, m, G, s, e, r):
 
 @jit.numba_njit
 def forwards_viterbi_dip_naive_vec(n, m, G, s, e, r):
-    """Vectorised LS diploid Viterbi algorithm using numpy."""
+    """An implementation using Numpy vectorisation."""
     # Initialise
     V = np.zeros((m, n, n))
-    P = np.zeros((m, n, n)).astype(np.int64)
+    P = np.zeros((m, n, n), dtype=np.int64)
     c = np.ones(m)
     r_n = r / n
 
     for j1 in range(n):
         for j2 in range(n):
-            if s[0, 0] == MISSING:
-                index_tmp = MISSING_INDEX
-            else:
-                index_tmp = (
-                    4 * np.int64(np.equal(G[0, j1, j2], s[0, 0]))
-                    + 2 * np.int64((G[0, j1, j2] == 1))
-                    + np.int64(s[0, 0] == 1)
-                )
-            V[0, j1, j2] = 1 / (n**2) * e[0, index_tmp]
+            emission_index = core.get_index_in_emission_matrix_diploid(
+                ref_allele=G[0, j1, j2], query_allele=s[0, 0]
+            )
+            V[0, j1, j2] = 1 / (n**2) * e[0, emission_index]
 
     # Jumped the gun - vectorising.
     for l in range(1, m):
-        if s[0, l] == MISSING:
-            index = MISSING_INDEX * np.ones((n, n), dtype=np.int64)
-        else:
-            index = (
-                4 * np.equal(G[l, :, :], s[0, l]).astype(np.int64)
-                + 2 * (G[l, :, :] == 1).astype(np.int64)
-                + np.int64(s[0, l] == 1)
-            )
+        emission_index = core.get_index_in_emission_matrix_diploid_G(
+            ref_G=G[l, :, :],
+            query_allele=s[0, l],
+            n=n,
+        )
 
         for j1 in range(n):
             for j2 in range(n):
@@ -376,7 +302,7 @@ def forwards_viterbi_dip_naive_vec(n, m, G, s, e, r):
                 v[j1, :] += r_n[l] * (1 - r[l])
                 v[:, j2] += r_n[l] * (1 - r[l])
                 v *= V[l - 1, :, :]
-                V[l, j1, j2] = np.amax(v) * e[l, index[j1, j2]]
+                V[l, j1, j2] = np.amax(v) * e[l, emission_index[j1, j2]]
                 P[l, j1, j2] = np.argmax(v)
 
         c[l] = np.amax(V[l, :, :])
@@ -388,7 +314,7 @@ def forwards_viterbi_dip_naive_vec(n, m, G, s, e, r):
 
 
 def forwards_viterbi_dip_naive_full_vec(n, m, G, s, e, r):
-    """Fully vectorised naive LS diploid Viterbi algorithm using numpy."""
+    """Fully vectorised naive implementation using Numpy."""
     char_both = np.eye(n * n).ravel().reshape((n, n, n, n))
     char_col = np.tile(np.sum(np.eye(n * n).reshape((n, n, n, n)), 3), (n, 1, 1, 1))
     char_row = np.copy(char_col).T
@@ -396,28 +322,23 @@ def forwards_viterbi_dip_naive_full_vec(n, m, G, s, e, r):
 
     # Initialise
     V = np.zeros((m, n, n))
-    P = np.zeros((m, n, n)).astype(np.int64)
+    P = np.zeros((m, n, n), dtype=np.int64)
     c = np.ones(m)
-    if s[0, 0] == MISSING:
-        index = MISSING_INDEX * np.ones((n, n), dtype=np.int64)
-    else:
-        index = (
-            4 * np.equal(G[0, :, :], s[0, 0]).astype(np.int64)
-            + 2 * (G[0, :, :] == 1).astype(np.int64)
-            + np.int64(s[0, 0] == 1)
-        )
-    V[0, :, :] = 1 / (n**2) * e[0, index]
+
+    emission_index = core.get_index_in_emission_matrix_diploid_G(
+        ref_G=G[0, :, :],
+        query_allele=s[0, 0],
+        n=n,
+    )
+    V[0, :, :] = 1 / (n**2) * e[0, emission_index]
     r_n = r / n
 
     for l in range(1, m):
-        if s[0, l] == MISSING:
-            index = MISSING_INDEX * np.ones((n, n), dtype=np.int64)
-        else:
-            index = (
-                4 * np.equal(G[l, :, :], s[0, l]).astype(np.int64)
-                + 2 * (G[l, :, :] == 1).astype(np.int64)
-                + np.int64(s[0, l] == 1)
-            )
+        emission_index = core.get_index_in_emission_matrix_diploid_G(
+            ref_G=G[l, :, :],
+            query_allele=s[0, l],
+            n=n,
+        )
         v = (
             (r_n[l] ** 2)
             + (1 - r[l]) ** 2 * char_both
@@ -425,7 +346,7 @@ def forwards_viterbi_dip_naive_full_vec(n, m, G, s, e, r):
         )
         v *= V[l - 1, :, :]
         P[l, :, :] = np.argmax(v.reshape(n, n, -1), 2)  # Have to flatten to use argmax
-        V[l, :, :] = v.reshape(n, n, -1)[rows, cols, P[l, :, :]] * e[l, index]
+        V[l, :, :] = v.reshape(n, n, -1)[rows, cols, P[l, :, :]] * e[l, emission_index]
         c[l] = np.amax(V[l, :, :])
         V[l, :, :] *= 1 / c[l]
 
@@ -439,8 +360,9 @@ def backwards_viterbi_dip(m, V_last, P):
     """Run a backwards pass to determine the most likely path."""
     assert V_last.ndim == 2
     assert V_last.shape[0] == V_last.shape[1]
-    # Initialisation
-    path = np.zeros(m).astype(np.int64)
+
+    # Initialise
+    path = np.zeros(m, dtype=np.int64)
     path[m - 1] = np.argmax(V_last)
 
     # Backtrace
@@ -455,8 +377,7 @@ def in_list(array, value):
     where = np.searchsorted(array, value)
     if where < array.shape[0]:
         return array[where] == value
-    else:
-        return False
+    return False
 
 
 @jit.numba_njit
@@ -472,8 +393,9 @@ def backwards_viterbi_dip_no_pointer(
     """Run a backwards pass to determine the most likely path."""
     assert V_last.ndim == 2
     assert V_last.shape[0] == V_last.shape[1]
-    # Initialisation
-    path = np.zeros(m).astype(np.int64)
+
+    # Initialise
+    path = np.zeros(m, dtype=np.int64)
     path[m - 1] = np.argmax(V_last)
     n = V_last.shape[0]
 
@@ -496,37 +418,24 @@ def backwards_viterbi_dip_no_pointer(
 
 
 def get_phased_path(n, path):
-    """Obtain the phased path."""
     return np.unravel_index(path, (n, n))
 
 
 @jit.numba_njit
 def path_ll_dip(n, m, G, phased_path, s, e, r):
     """Evaluate log-likelihood path through a reference panel which results in sequence s."""
-    if s[0, 0] == MISSING:
-        index = MISSING_INDEX
-    else:
-        index = (
-            4 * np.int64(np.equal(G[0, phased_path[0][0], phased_path[1][0]], s[0, 0]))
-            + 2 * np.int64(G[0, phased_path[0][0], phased_path[1][0]] == 1)
-            + np.int64(s[0, 0] == 1)
-        )
-    log_prob_path = np.log10(1 / (n**2) * e[0, index])
+    emission_index = core.get_index_in_emission_matrix_diploid(
+        ref_allele=G[0, phased_path[0][0], phased_path[1][0]], query_allele=s[0, 0]
+    )
+    log_prob_path = np.log10(1 / (n**2) * e[0, emission_index])
     old_phase = np.array([phased_path[0][0], phased_path[1][0]])
     r_n = r / n
 
     for l in range(1, m):
-        if s[0, l] == MISSING:
-            index = MISSING_INDEX
-        else:
-            index = (
-                4
-                * np.int64(
-                    np.equal(G[l, phased_path[0][l], phased_path[1][l]], s[0, l])
-                )
-                + 2 * np.int64(G[l, phased_path[0][l], phased_path[1][l]] == 1)
-                + np.int64(s[0, l] == 1)
-            )
+        emission_index = core.get_index_in_emission_matrix_diploid(
+            ref_allele=G[l, phased_path[0][l], phased_path[1][l]],
+            query_allele=s[0, l],
+        )
 
         current_phase = np.array([phased_path[0][l], phased_path[1][l]])
         phase_diff = np.sum(~np.equal(current_phase, old_phase))
@@ -540,7 +449,7 @@ def path_ll_dip(n, m, G, phased_path, s, e, r):
         else:
             log_prob_path += np.log10(r_n[l] ** 2)
 
-        log_prob_path += np.log10(e[l, index])
+        log_prob_path += np.log10(e[l, emission_index])
         old_phase = current_phase
 
     return log_prob_path
diff --git a/lshmm/vit_haploid.py b/lshmm/vit_haploid.py
index 7fec45e..ec2ae10 100644
--- a/lshmm/vit_haploid.py
+++ b/lshmm/vit_haploid.py
@@ -1,57 +1,60 @@
-"""Collection of functions to run Viterbi algorithms on haploid genotype data, where the data is structured as variants x samples."""
+"""
+Various implementations of the Li & Stephens Viterbi algorithm on haploid genotype data,
+where the data is structured as variants x samples.
+"""
 
 import numpy as np
 
+from . import core
 from . import jit
 
-MISSING = -1
-
 
 @jit.numba_njit
 def viterbi_naive_init(n, m, H, s, e, r):
-    """Initialise naive implementation of LS viterbi."""
+    """Initialise a naive implementation."""
     V = np.zeros((m, n))
-    P = np.zeros((m, n)).astype(np.int64)
+    P = np.zeros((m, n), dtype=np.int64)
     r_n = r / n
+
     for i in range(n):
-        V[0, i] = (
-            1 / n * e[0, np.int64(np.equal(H[0, i], s[0, 0]) or s[0, 0] == MISSING)]
+        emission_idx = core.get_index_in_emission_matrix(
+            ref_allele=H[0, i], query_allele=s[0, 0]
         )
+        V[0, i] = 1 / n * e[0, emission_idx]
 
     return V, P, r_n
 
 
 @jit.numba_njit
 def viterbi_init(n, m, H, s, e, r):
-    """Initialise naive, but more space memory efficient implementation of LS viterbi."""
-    V_previous = np.zeros(n)
+    """Initialise a naive, but more memory efficient, implementation."""
+    V_prev = np.zeros(n)
     V = np.zeros(n)
-    P = np.zeros((m, n)).astype(np.int64)
+    P = np.zeros((m, n), dtype=np.int64)
     r_n = r / n
 
     for i in range(n):
-        V_previous[i] = (
-            1 / n * e[0, np.int64(np.equal(H[0, i], s[0, 0]) or s[0, 0] == MISSING)]
+        emission_idx = core.get_index_in_emission_matrix(
+            ref_allele=H[0, i], query_allele=s[0, 0]
         )
+        V_prev[i] = 1 / n * e[0, emission_idx]
 
-    return V, V_previous, P, r_n
+    return V, V_prev, P, r_n
 
 
 @jit.numba_njit
 def forwards_viterbi_hap_naive(n, m, H, s, e, r):
-    """Naive implementation of LS haploid Viterbi algorithm."""
-    # Initialise
+    """A naive implementation of the forward pass."""
     V, P, r_n = viterbi_naive_init(n, m, H, s, e, r)
 
     for j in range(1, m):
         for i in range(n):
-            # Get the vector to maximise over
             v = np.zeros(n)
             for k in range(n):
-                v[k] = (
-                    e[j, np.int64(np.equal(H[j, i], s[0, j]) or s[0, j] == MISSING)]
-                    * V[j - 1, k]
+                emission_idx = core.get_index_in_emission_matrix(
+                    ref_allele=H[j, i], query_allele=s[0, j]
                 )
+                v[k] = V[j - 1, k] * e[j, emission_idx]
                 if k == i:
                     v[k] *= 1 - r[j] + r_n[j]
                 else:
@@ -66,8 +69,7 @@ def forwards_viterbi_hap_naive(n, m, H, s, e, r):
 
 @jit.numba_njit
 def forwards_viterbi_hap_naive_vec(n, m, H, s, e, r):
-    """Naive matrix based implementation of LS haploid forward Viterbi algorithm using numpy."""
-    # Initialise
+    """A naive matrix-based implementation of the forward pass using Numpy."""
     V, P, r_n = viterbi_naive_init(n, m, H, s, e, r)
 
     for j in range(1, m):
@@ -75,7 +77,10 @@ def forwards_viterbi_hap_naive_vec(n, m, H, s, e, r):
         for i in range(n):
             v = np.copy(v_tmp)
             v[i] += V[j - 1, i] * (1 - r[j])
-            v *= e[j, np.int64(np.equal(H[j, i], s[0, j]) or s[0, j] == MISSING)]
+            emission_idx = core.get_index_in_emission_matrix(
+                ref_allele=H[j, i], query_allele=s[0, j]
+            )
+            v *= e[j, emission_idx]
             P[j, i] = np.argmax(v)
             V[j, i] = v[P[j, i]]
 
@@ -86,26 +91,24 @@ def forwards_viterbi_hap_naive_vec(n, m, H, s, e, r):
 
 @jit.numba_njit
 def forwards_viterbi_hap_naive_low_mem(n, m, H, s, e, r):
-    """Naive implementation of LS haploid Viterbi algorithm, with reduced memory."""
-    # Initialise
-    V, V_previous, P, r_n = viterbi_init(n, m, H, s, e, r)
+    """A naive implementation of the forward pass with reduced memory."""
+    V, V_prev, P, r_n = viterbi_init(n, m, H, s, e, r)
 
     for j in range(1, m):
         for i in range(n):
-            # Get the vector to maximise over
             v = np.zeros(n)
             for k in range(n):
-                v[k] = (
-                    e[j, np.int64(np.equal(H[j, i], s[0, j]) or s[0, j] == MISSING)]
-                    * V_previous[k]
+                emission_idx = core.get_index_in_emission_matrix(
+                    ref_allele=H[j, i], query_allele=s[0, j]
                 )
+                v[k] = V_prev[k] * e[j, emission_idx]
                 if k == i:
                     v[k] *= 1 - r[j] + r_n[j]
                 else:
                     v[k] *= r_n[j]
             P[j, i] = np.argmax(v)
             V[i] = v[P[j, i]]
-        V_previous = np.copy(V)
+        V_prev = np.copy(V)
 
     ll = np.log10(np.amax(V))
 
@@ -114,30 +117,27 @@ def forwards_viterbi_hap_naive_low_mem(n, m, H, s, e, r):
 
 @jit.numba_njit
 def forwards_viterbi_hap_naive_low_mem_rescaling(n, m, H, s, e, r):
-    """Naive implementation of LS haploid Viterbi algorithm, with reduced memory and rescaling."""
-    # Initialise
-    V, V_previous, P, r_n = viterbi_init(n, m, H, s, e, r)
+    """A naive implementation of the forward pass with reduced memory and rescaling."""
+    V, V_prev, P, r_n = viterbi_init(n, m, H, s, e, r)
     c = np.ones(m)
 
     for j in range(1, m):
-        c[j] = np.amax(V_previous)
-        V_previous *= 1 / c[j]
+        c[j] = np.amax(V_prev)
+        V_prev *= 1 / c[j]
         for i in range(n):
-            # Get the vector to maximise over
             v = np.zeros(n)
             for k in range(n):
-                v[k] = (
-                    e[j, np.int64(np.equal(H[j, i], s[0, j]) or s[0, j] == MISSING)]
-                    * V_previous[k]
+                emission_idx = core.get_index_in_emission_matrix(
+                    ref_allele=H[j, i], query_allele=s[0, j]
                 )
+                v[k] = V_prev[k] * e[j, emission_idx]
                 if k == i:
                     v[k] *= 1 - r[j] + r_n[j]
                 else:
                     v[k] *= r_n[j]
             P[j, i] = np.argmax(v)
             V[i] = v[P[j, i]]
-
-        V_previous = np.copy(V)
+        V_prev = np.copy(V)
 
     ll = np.sum(np.log10(c)) + np.log10(np.amax(V))
 
@@ -146,24 +146,26 @@ def forwards_viterbi_hap_naive_low_mem_rescaling(n, m, H, s, e, r):
 
 @jit.numba_njit
 def forwards_viterbi_hap_low_mem_rescaling(n, m, H, s, e, r):
-    """LS haploid Viterbi algorithm, with reduced memory and exploits the Markov process structure."""
-    # Initialise
-    V, V_previous, P, r_n = viterbi_init(n, m, H, s, e, r)
+    """An implementation with reduced memory that exploits the Markov structure."""
+    V, V_prev, P, r_n = viterbi_init(n, m, H, s, e, r)
     c = np.ones(m)
 
     for j in range(1, m):
-        argmax = np.argmax(V_previous)
-        c[j] = V_previous[argmax]
-        V_previous *= 1 / c[j]
+        argmax = np.argmax(V_prev)
+        c[j] = V_prev[argmax]
+        V_prev *= 1 / c[j]
         V = np.zeros(n)
         for i in range(n):
-            V[i] = V_previous[i] * (1 - r[j] + r_n[j])
+            V[i] = V_prev[i] * (1 - r[j] + r_n[j])
             P[j, i] = i
             if V[i] < r_n[j]:
                 V[i] = r_n[j]
                 P[j, i] = argmax
-            V[i] *= e[j, np.int64(np.equal(H[j, i], s[0, j]) or s[0, j] == MISSING)]
-        V_previous = np.copy(V)
+            emission_idx = core.get_index_in_emission_matrix(
+                ref_allele=H[j, i], query_allele=s[0, j]
+            )
+            V[i] *= e[j, emission_idx]
+        V_prev = np.copy(V)
 
     ll = np.sum(np.log10(c)) + np.log10(np.max(V))
 
@@ -172,12 +174,14 @@ def forwards_viterbi_hap_low_mem_rescaling(n, m, H, s, e, r):
 
 @jit.numba_njit
 def forwards_viterbi_hap_lower_mem_rescaling(n, m, H, s, e, r):
-    """LS haploid Viterbi algorithm with even smaller memory footprint and exploits the Markov process structure."""
-    # Initialise
+    """An implementation with even smaller memory footprint that exploits the Markov structure."""
     V = np.zeros(n)
     for i in range(n):
-        V[i] = 1 / n * e[0, np.int64(np.equal(H[0, i], s[0, 0]) or s[0, 0] == MISSING)]
-    P = np.zeros((m, n)).astype(np.int64)
+        emission_idx = core.get_index_in_emission_matrix(
+            ref_allele=H[0, i], query_allele=s[0, 0]
+        )
+        V[i] = 1 / n * e[0, emission_idx]
+    P = np.zeros((m, n), dtype=np.int64)
     r_n = r / n
     c = np.ones(m)
 
@@ -191,7 +195,10 @@ def forwards_viterbi_hap_lower_mem_rescaling(n, m, H, s, e, r):
             if V[i] < r_n[j]:
                 V[i] = r_n[j]
                 P[j, i] = argmax
-            V[i] *= e[j, np.int64(np.equal(H[j, i], s[0, j]) or s[0, j] == MISSING)]
+            emission_idx = core.get_index_in_emission_matrix(
+                ref_allele=H[j, i], query_allele=s[0, j]
+            )
+            V[i] *= e[j, emission_idx]
 
     ll = np.sum(np.log10(c)) + np.log10(np.max(V))
 
@@ -200,16 +207,23 @@ def forwards_viterbi_hap_lower_mem_rescaling(n, m, H, s, e, r):
 
 @jit.numba_njit
 def forwards_viterbi_hap_lower_mem_rescaling_no_pointer(n, m, H, s, e, r):
-    """LS haploid Viterbi algorithm with even smaller memory footprint and exploits the Markov process structure."""
-    # Initialise
+    """
+    An implementation with even smaller memory footprint and rescaling
+    that exploits the Markov structure.
+    """
     V = np.zeros(n)
     for i in range(n):
-        V[i] = 1 / n * e[0, np.int64(np.equal(H[0, i], s[0, 0]) or s[0, 0] == MISSING)]
+        emission_idx = core.get_index_in_emission_matrix(
+            ref_allele=H[0, i], query_allele=s[0, 0]
+        )
+        V[i] = 1 / n * e[0, emission_idx]
     r_n = r / n
     c = np.ones(m)
+    # This is going to be filled with the templates we can recombine to
+    # that have higher prob than staying where we are.
     recombs = [
         np.zeros(shape=0, dtype=np.int64) for _ in range(m)
-    ]  # This is going to be filled with the templates we can recombine to that have higher prob than staying where we are.
+    ]
 
     V_argmaxes = np.zeros(m)
 
@@ -225,7 +239,8 @@ def forwards_viterbi_hap_lower_mem_rescaling_no_pointer(n, m, H, s, e, r):
                 recombs[j] = np.append(
                     recombs[j], i
                 )  # We add template i as a potential template to recombine to at site j.
-            V[i] *= e[j, np.int64(np.equal(H[j, i], s[0, j]) or s[0, j] == MISSING)]
+            emission_idx = core.get_index_in_emission_matrix(H[j, i], s[0, j])
+            V[i] *= e[j, emission_idx]
 
     V_argmaxes[m - 1] = np.argmax(V)
     ll = np.sum(np.log10(c)) + np.log10(np.max(V))
@@ -237,9 +252,8 @@ def forwards_viterbi_hap_lower_mem_rescaling_no_pointer(n, m, H, s, e, r):
 @jit.numba_njit
 def backwards_viterbi_hap(m, V_last, P):
     """Run a backwards pass to determine the most likely path."""
-    # Initialise
     assert len(V_last.shape) == 1
-    path = np.zeros(m).astype(np.int64)
+    path = np.zeros(m, dtype=np.int64)
     path[m - 1] = np.argmax(V_last)
 
     for j in range(m - 2, -1, -1):
@@ -251,8 +265,7 @@ def backwards_viterbi_hap(m, V_last, P):
 @jit.numba_njit
 def backwards_viterbi_hap_no_pointer(m, V_argmaxes, recombs):
     """Run a backwards pass to determine the most likely path."""
-    # Initialise
-    path = np.zeros(m).astype(np.int64)
+    path = np.zeros(m, dtype=np.int64)
     path[m - 1] = V_argmaxes[m - 1]
 
     for j in range(m - 2, -1, -1):
@@ -266,14 +279,18 @@ def backwards_viterbi_hap_no_pointer(m, V_argmaxes, recombs):
 
 @jit.numba_njit
 def path_ll_hap(n, m, H, path, s, e, r):
-    """Evaluate log-likelihood path through a reference panel which results in sequence s."""
-    index = np.int64(np.equal(H[0, path[0]], s[0, 0]) or s[0, 0] == MISSING)
-    log_prob_path = np.log10((1 / n) * e[0, index])
+    """Evaluate the log-likelihood of a path through a reference panel resulting in a sequence."""
+    emission_idx = core.get_index_in_emission_matrix(
+        ref_allele=H[0, path[0]], query_allele=s[0, 0]
+    )
+    log_prob_path = np.log10((1 / n) * e[0, emission_idx])
     old = path[0]
     r_n = r / n
 
     for l in range(1, m):
-        index = np.int64(np.equal(H[l, path[l]], s[0, l]) or s[0, l] == MISSING)
+        emission_idx = core.get_index_in_emission_matrix(
+            ref_allele=H[l, path[l]], query_allele=s[0, l]
+        )
         current = path[l]
         same = old == current
 
@@ -282,7 +299,7 @@ def path_ll_hap(n, m, H, path, s, e, r):
         else:
             log_prob_path += np.log10(r_n[l])
 
-        log_prob_path += np.log10(e[l, index])
+        log_prob_path += np.log10(e[l, emission_idx])
         old = current
 
     return log_prob_path
diff --git a/tests/lsbase.py b/tests/lsbase.py
new file mode 100644
index 0000000..6684585
--- /dev/null
+++ b/tests/lsbase.py
@@ -0,0 +1,296 @@
+import itertools
+
+import numpy as np
+
+import msprime
+
+import lshmm.core as core
+
+
+class LSBase:
+    """Base class of Li and Stephens tests."""
+
+    def verify(self, ts):
+        raise NotImplementedError()
+
+    def assertAllClose(self, A, B):
+        np.testing.assert_allclose(A, B, rtol=1e-9, atol=0.0)
+
+    # Helper routine
+    def get_num_alleles(self, ref_haps, query):
+        assert ref_haps.shape[0] == query.shape[1]
+        num_sites = ref_haps.shape[0]
+        num_alleles = np.zeros(num_sites, dtype=np.int8)
+        exclusion_set = np.array([core.MISSING])
+        for i in range(num_sites):
+            uniq_alleles = np.unique(np.append(ref_haps[i, :], query[:, i]))
+            num_alleles[i] = np.sum(~np.isin(uniq_alleles, exclusion_set))
+        assert np.all(num_alleles >= 0), "Number of alleles cannot be zero."
+        return num_alleles
+
+    # Haploid
+    def get_examples_haploid(self, ts):
+        H = ts.genotype_matrix()
+        s = H[:, 0].reshape(1, H.shape[0])
+        H = H[:, 1:]
+        haplotypes = [s, H[:, -1].reshape(1, H.shape[0])]
+        s_miss_last = s.copy()
+        s_miss_last[0, -1] = core.MISSING
+        s_miss_mid = s.copy()
+        s_miss_mid[0, ts.num_sites // 2] = core.MISSING
+        s_miss_all = s.copy()
+        s_miss_all[0, :] = core.MISSING
+        haplotypes.append(s_miss_last)
+        haplotypes.append(s_miss_mid)
+        haplotypes.append(s_miss_all)
+        return H, haplotypes
+
+    def get_emission_matrix_haploid(
+        self, mu, m, n_alleles, scale_mutation_based_on_n_alleles
+    ):
+        e = np.zeros((m, 2))
+        if isinstance(mu, float):
+            mu = mu * np.ones(m)
+        if scale_mutation_based_on_n_alleles:
+            e[:, 0] = mu - mu * np.equal(
+                n_alleles, np.ones(m)
+            )  # Add boolean in case we're at an invariant site
+            e[:, 1] = 1 - (n_alleles - 1) * mu
+        else:
+            for j in range(m):
+                if n_alleles[j] == 1:
+                    # In case we're at an invariant site
+                    e[j, 0] = 0
+                    e[j, 1] = 1
+                else:
+                    e[j, 0] = mu[j] / (n_alleles[j] - 1)
+                    e[j, 1] = 1 - mu[j]
+        return e
+
+    def get_examples_pars_haploid(self, ts, scale_mutation=True, seed=42):
+        """Returns an iterator over combinations of examples and parameters."""
+        np.random.seed(seed)
+        H, haplotypes = self.get_examples_haploid(ts)
+        m = ts.num_sites
+        n = H.shape[1]
+        rs = [
+            np.zeros(m) + 0.01,  # Equal recombination and mutation
+            np.zeros(m) + 0.999,  # Extreme
+            np.zeros(m) + 1e-6,  # Extreme
+            np.random.rand(m),  # Random
+        ]
+        mus = [
+            np.zeros(m) + 0.01,  # Equal recombination and mutation
+            np.zeros(m) + 0.2,  # Extreme
+            np.zeros(m) + 1e-6,  # Extreme
+            np.random.rand(m) * 0.2,  # Random
+        ]
+        for s, r, mu in itertools.product(haplotypes, rs, mus):
+            r[0] = 0
+            # Must be calculated from the genotype matrix,
+            # because we can now get back mutations that
+            # result in the number of alleles being higher
+            # than the number of alleles in the reference panel.
+            n_alleles = self.get_num_alleles(H, s)
+            e = self.get_emission_matrix_haploid(
+                mu, m, n_alleles, scale_mutation_based_on_n_alleles=scale_mutation
+            )
+            yield n, m, H, s, e, r, mu
+
+    # Diploid
+    def get_examples_diploid(self, ts, seed=42):
+        np.random.seed(seed)
+        H = ts.genotype_matrix()
+        s = H[:, 0].reshape(1, H.shape[0]) + H[:, 1].reshape(1, H.shape[0])
+        H = H[:, 2:]
+        genotypes = [
+            s,
+            H[:, -1].reshape(1, H.shape[0]) + H[:, -2].reshape(1, H.shape[0]),
+        ]
+        s_miss_last = s.copy()
+        s_miss_last[0, -1] = core.MISSING
+        s_miss_mid = s.copy()
+        s_miss_mid[0, ts.num_sites // 2] = core.MISSING
+        s_miss_all = s.copy()
+        s_miss_all[0, :] = core.MISSING
+        genotypes.append(s_miss_last)
+        genotypes.append(s_miss_mid)
+        genotypes.append(s_miss_all)
+        m = ts.num_sites
+        n = H.shape[1]
+        G = np.zeros((m, n, n))
+        for i in range(m):
+            G[i, :, :] = np.add.outer(H[i, :], H[i, :])
+        return H, G, genotypes
+
+    def get_emission_matrix_diploid(self, mu, m):
+        e = np.zeros((m, 8))
+        e[:, core.EQUAL_BOTH_HOM] = (1 - mu) ** 2
+        e[:, core.UNEQUAL_BOTH_HOM] = mu**2
+        e[:, core.BOTH_HET] = (1 - mu) ** 2 + mu**2
+        e[:, core.REF_HOM_OBS_HET] = 2 * mu * (1 - mu)
+        e[:, core.REF_HET_OBS_HOM] = mu * (1 - mu)
+        e[:, core.MISSING_INDEX] = 1
+        return e
+
+    def get_examples_pars_diploid(self, ts, seed=42):
+        """Returns an iterator over combinations of examples and parameters."""
+        np.random.seed(seed)
+        H, G, genotypes = self.get_examples_diploid(ts)
+        m = ts.num_sites
+        n = H.shape[1]
+        rs = [
+            np.zeros(m) + 0.01,  # Equal recombination and mutation
+            np.zeros(m) + 0.999,  # Extreme
+            np.zeros(m) + 1e-6,  # Extreme
+            np.random.rand(m),  # Random
+        ]
+        mus = [
+            np.zeros(m) + 0.01,  # Equal recombination and mutation
+            np.zeros(m) + 0.33,  # Extreme
+            np.zeros(m) + 1e-6,  # Extreme
+            np.random.rand(m) * 0.33,  # Random
+        ]
+        for s, r, mu in itertools.product(genotypes, rs, mus):
+            r[0] = 0
+            e = self.get_emission_matrix_diploid(mu, m)
+            yield n, m, G, s, e, r, mu
+
+    def get_examples_pars_larger_diploid(self, ts, mean_r=1e-5, mean_mu=1e-5, seed=42):
+        """Returns an iterator over combinations of examples and parameters."""
+        np.random.seed(seed)
+        H, G, genotypes = self.get_examples_diploid(ts)
+        m = H.shape[0]
+        n = H.shape[1]
+        r = mean_r * np.ones(m) * ((np.random.rand(m) + 0.5) / 2)
+        r[0] = 0
+        mu = mean_mu * np.ones(m) * ((np.random.rand(m) + 0.5) / 2)
+        e = self.get_emission_matrix_diploid(mu, m)
+        for s in genotypes:
+            yield n, m, G, s, e, r, mu
+
+    # Prepare simple example datasets.
+    def get_simple_n10_no_recombination(self, seed=42):
+        ts = msprime.simulate(
+            10,
+            recombination_rate=0,
+            mutation_rate=0.5,
+            random_seed=seed,
+        )
+        assert ts.num_sites > 3
+        return ts
+
+    def get_simple_n6(self, seed=42):
+        ts = msprime.simulate(
+            6,
+            recombination_rate=2,
+            mutation_rate=7,
+            random_seed=seed,
+        )
+        assert ts.num_sites > 5
+        return ts
+
+    def get_simple_n8(self, seed=42):
+        ts = msprime.simulate(
+            8,
+            recombination_rate=2,
+            mutation_rate=5,
+            random_seed=seed,
+        )
+        assert ts.num_sites > 5
+        return ts
+
+    def get_simple_n8_high_recombination(self, seed=42):
+        ts = msprime.simulate(
+            8,
+            recombination_rate=20,
+            mutation_rate=5,
+            random_seed=seed,
+        )
+        assert ts.num_trees > 15
+        assert ts.num_sites > 5
+        return ts
+
+    def get_simple_n16(self, seed=42):
+        ts = msprime.simulate(
+            16,
+            recombination_rate=2,
+            mutation_rate=5,
+            random_seed=seed,
+        )
+        assert ts.num_sites > 5
+        return ts
+
+    # Prepare example datasets with multiallelic sites.
+    def get_multiallelic_n10_no_recombination(self, seed=42):
+        ts = msprime.sim_ancestry(
+            samples=10,
+            recombination_rate=0,
+            sequence_length=10,
+            population_size=1e4,
+            random_seed=seed,
+        )
+        ts = msprime.sim_mutations(
+            ts,
+            rate=1e-5,
+            random_seed=seed,
+        )
+        assert ts.num_sites > 3
+        return ts
+
+    def get_multiallelic_n6(self, seed=42):
+        ts = msprime.sim_ancestry(
+            samples=6,
+            recombination_rate=1e-4,
+            sequence_length=40,
+            population_size=1e4,
+            random_seed=seed,
+        )
+        ts = msprime.sim_mutations(
+            ts,
+            rate=1e-3,
+            random_seed=seed,
+        )
+        assert ts.num_sites > 5
+        return ts
+
+    def get_multiallelic_n8(self, seed=42):
+        ts = msprime.sim_ancestry(
+            samples=8,
+            recombination_rate=1e-4,
+            sequence_length=20,
+            population_size=1e4,
+            random_seed=seed,
+        )
+        ts = msprime.sim_mutations(
+            ts,
+            rate=1e-4,
+            random_seed=seed,
+        )
+        assert ts.num_sites > 5
+        assert ts.num_trees > 15
+        return ts
+
+    def get_multiallelic_n16(self, seed=42):
+        ts = msprime.sim_ancestry(
+            samples=16,
+            recombination_rate=1e-2,
+            sequence_length=20,
+            population_size=1e4,
+            random_seed=seed,
+        )
+        ts = msprime.sim_mutations(
+            ts,
+            rate=1e-4,
+            random_seed=seed,
+        )
+        assert ts.num_sites > 5
+        return ts
+
+
+class FBAlgorithmBase(LSBase):
+    """Base for testing forwards-backwards algorithms."""
+
+
+class ViterbiAlgorithmBase(LSBase):
+    """Base for testing Viterbi algoritms."""
diff --git a/tests/test_API.py b/tests/test_API.py
index 129e67a..051b79d 100644
--- a/tests/test_API.py
+++ b/tests/test_API.py
@@ -1,249 +1,30 @@
-# Simulation
-import itertools
-
-# Python libraries
-import msprime
-import numpy as np
-import pytest
-import tskit
-
+from . import lsbase
 import lshmm as ls
-import lshmm.forward_backward.fb_diploid as fbd
-import lshmm.forward_backward.fb_haploid as fbh
+import lshmm.fb_diploid as fbd
+import lshmm.fb_haploid as fbh
 import lshmm.vit_diploid as vd
 import lshmm.vit_haploid as vh
 
-EQUAL_BOTH_HOM = 4
-UNEQUAL_BOTH_HOM = 0
-BOTH_HET = 7
-REF_HOM_OBS_HET = 1
-REF_HET_OBS_HOM = 2
-
-MISSING = -1
-MISSING_INDEX = 3
-
-
-class LSBase:
-    """Superclass of Li and Stephens tests."""
-
-    def example_haplotypes(self, ts, seed=42):
-        H = ts.genotype_matrix()
-        s = H[:, 0].reshape(1, H.shape[0])
-        H = H[:, 1:]
-
-        haplotypes = [s, H[:, -1].reshape(1, H.shape[0])]
-        s_tmp = s.copy()
-        s_tmp[0, -1] = MISSING
-        haplotypes.append(s_tmp)
-        s_tmp = s.copy()
-        s_tmp[0, ts.num_sites // 2] = MISSING
-        haplotypes.append(s_tmp)
-        s_tmp = s.copy()
-        s_tmp[0, :] = MISSING
-        haplotypes.append(s_tmp)
-
-        return H, haplotypes
-
-    def haplotype_emission(self, mu, m, n_alleles, scale_mutation_based_on_n_alleles):
-        # Define the emission probability matrix
-        e = np.zeros((m, 2))
-        if isinstance(mu, float):
-            mu = mu * np.ones(m)
-
-        if scale_mutation_based_on_n_alleles:
-            e[:, 0] = mu - mu * np.equal(
-                n_alleles, np.ones(m)
-            )  # Added boolean in case we're at an invariant site
-            e[:, 1] = 1 - (n_alleles - 1) * mu
-        else:
-            for j in range(m):
-                if n_alleles[j] == 1:  # In case we're at an invariant site
-                    e[j, 0] = 0
-                    e[j, 1] = 1
-                else:
-                    e[j, 0] = mu[j] / (n_alleles[j] - 1)
-                    e[j, 1] = 1 - mu[j]
-        return e
-
-    def genotype_emission(self, mu, m):
-        # Define the emission probability matrix
-        e = np.zeros((m, 8))
-        e[:, EQUAL_BOTH_HOM] = (1 - mu) ** 2
-        e[:, UNEQUAL_BOTH_HOM] = mu**2
-        e[:, BOTH_HET] = (1 - mu) ** 2 + mu**2
-        e[:, REF_HOM_OBS_HET] = 2 * mu * (1 - mu)
-        e[:, REF_HET_OBS_HOM] = mu * (1 - mu)
-        e[:, MISSING_INDEX] = 1
-
-        return e
-
-    def example_parameters_haplotypes(self, ts, seed=42, scale_mutation=True):
-        """Returns an iterator over combinations of haplotype, recombination and
-        mutation probabilities."""
-        np.random.seed(seed)
-        H, haplotypes = self.example_haplotypes(ts)
-        n = H.shape[1]
-        m = ts.get_num_sites()
-
-        def _get_num_alleles(ref_haps, query):
-            assert ref_haps.shape[0] == query.shape[1]
-            num_sites = ref_haps.shape[0]
-            num_alleles = np.zeros(num_sites, dtype=np.int8)
-            exclusion_set = np.array([MISSING])
-            for i in range(num_sites):
-                uniq_alleles = np.unique(np.append(ref_haps[i, :], query[:, i]))
-                num_alleles[i] = np.sum(~np.isin(uniq_alleles, exclusion_set))
-            assert np.all(num_alleles >= 0), "Number of alleles cannot be zero."
-            return num_alleles
-
-        # Here we have equal mutation and recombination
-        r = np.zeros(m) + 0.01
-        mu = np.zeros(m) + 0.01
-        r[0] = 0
-
-        for s in haplotypes:
-            n_alleles = _get_num_alleles(H, s)
-            e = self.haplotype_emission(
-                mu, m, n_alleles, scale_mutation_based_on_n_alleles=scale_mutation
-            )
-            yield n, m, H, s, e, r, mu
-
-        # Mixture of random and extremes
-        rs = [np.zeros(m) + 0.999, np.zeros(m) + 1e-6, np.random.rand(m)]
-        mus = [np.zeros(m) + 0.33, np.zeros(m) + 1e-6, np.random.rand(m) * 0.33]
-
-        for s, r, mu in itertools.product(haplotypes, rs, mus):
-            r[0] = 0
-            n_alleles = _get_num_alleles(H, s)
-            e = self.haplotype_emission(
-                mu, m, n_alleles, scale_mutation_based_on_n_alleles=scale_mutation
-            )
-            yield n, m, H, s, e, r, mu
-
-    def example_genotypes(self, ts, seed=42):
-        np.random.seed(seed)
-        H = ts.genotype_matrix()
-        s = H[:, 0].reshape(1, H.shape[0]) + H[:, 1].reshape(1, H.shape[0])
-        H = H[:, 2:]
-
-        genotypes = [
-            s,
-            H[:, -1].reshape(1, H.shape[0]) + H[:, -2].reshape(1, H.shape[0]),
-        ]
-
-        s_tmp = s.copy()
-        s_tmp[0, -1] = MISSING
-        genotypes.append(s_tmp)
-        s_tmp = s.copy()
-        s_tmp[0, ts.num_sites // 2] = MISSING
-        genotypes.append(s_tmp)
-        s_tmp = s.copy()
-        s_tmp[0, :] = MISSING
-        genotypes.append(s_tmp)
-
-        m = ts.get_num_sites()
-        n = H.shape[1]
-
-        G = np.zeros((m, n, n))
-        for i in range(m):
-            G[i, :, :] = np.add.outer(H[i, :], H[i, :])
-
-        return H, G, genotypes
-
-    def example_parameters_genotypes(self, ts, seed=42):
-        np.random.seed(seed)
-        H, G, genotypes = self.example_genotypes(ts)
-        n = H.shape[1]
-        m = ts.get_num_sites()
-
-        # Here we have equal mutation and recombination
-        r = np.zeros(m) + 0.01
-        mu = np.zeros(m) + 0.01
-        r[0] = 0
-
-        e = self.genotype_emission(mu, m)
-
-        for s in genotypes:
-            yield n, m, G, s, e, r, mu
-
-        # Mixture of random and extremes
-        rs = [np.zeros(m) + 0.999, np.zeros(m) + 1e-6, np.random.rand(m)]
-        mus = [np.zeros(m) + 0.33, np.zeros(m) + 1e-6, np.random.rand(m) * 0.33]
-
-        e = self.genotype_emission(mu, m)
-
-        for s, r, mu in itertools.product(genotypes, rs, mus):
-            r[0] = 0
-            e = self.genotype_emission(mu, m)
-            yield n, m, G, s, e, r, mu
-
-    def example_parameters_genotypes_larger(
-        self, ts, seed=42, mean_r=1e-5, mean_mu=1e-5
-    ):
-        np.random.seed(seed)
-        H, G, genotypes = self.example_genotypes(ts)
-
-        m = ts.get_num_sites()
-        n = H.shape[1]
 
-        r = mean_r * np.ones(m) * ((np.random.rand(m) + 0.5) / 2)
-        r[0] = 0
-
-        # Error probability
-        mu = mean_mu * np.ones(m) * ((np.random.rand(m) + 0.5) / 2)
-
-        # Define the emission probability matrix
-        e = self.genotype_emission(mu, m)
-
-        for s in genotypes:
-            yield n, m, G, s, e, r, mu
-
-    def assertAllClose(self, A, B):
-        """Assert that all entries of two matrices are 'close'"""
-        assert np.allclose(A, B, rtol=1e-9, atol=0.0)
-
-    # Define a bunch of very small tree-sequences for testing a collection of parameters on
-    def test_simple_n_10_no_recombination(self):
-        ts = msprime.simulate(
-            10, recombination_rate=0, mutation_rate=0.5, random_seed=42
-        )
-        assert ts.num_sites > 3
+class TestMethodsHaploid(lsbase.FBAlgorithmBase):
+    def test_simple_n10_no_recombination(self):
+        ts = self.get_simple_n10_no_recombination()
         self.verify(ts)
 
-    def test_simple_n_6(self):
-        ts = msprime.simulate(6, recombination_rate=2, mutation_rate=7, random_seed=42)
-        assert ts.num_sites > 5
+    def test_simple_n6(self):
+        ts = self.get_simple_n6()
         self.verify(ts)
 
-    def test_simple_n_8(self):
-        ts = msprime.simulate(8, recombination_rate=2, mutation_rate=5, random_seed=42)
-        assert ts.num_sites > 5
+    def test_simple_n8(self):
+        ts = self.get_simple_n8()
         self.verify(ts)
 
-    def test_simple_n_8_high_recombination(self):
-        ts = msprime.simulate(8, recombination_rate=20, mutation_rate=5, random_seed=42)
-        assert ts.num_trees > 15
-        assert ts.num_sites > 5
-        self.verify(ts)
-
-    def test_simple_n_16(self):
-        ts = msprime.simulate(16, recombination_rate=2, mutation_rate=5, random_seed=42)
-        assert ts.num_sites > 5
+    def test_simple_n16(self):
+        ts = self.get_simple_n16()
         self.verify(ts)
 
     def verify(self, ts):
-        raise NotImplementedError()
-
-
-class FBAlgorithmBase(LSBase):
-    """Base for forwards backwards algorithm tests."""
-
-
-class TestMethodsHap(FBAlgorithmBase):
-    """Test that we compute the sample likelihoods across all implementations."""
-
-    def verify(self, ts):
-        for n, m, H_vs, s, e_vs, r, mu in self.example_parameters_haplotypes(ts):
+        for n, m, H_vs, s, e_vs, r, mu in self.get_examples_pars_haploid(ts):
             F_vs, c_vs, ll_vs = fbh.forwards_ls_hap(n, m, H_vs, s, e_vs, r)
             B_vs = fbh.backwards_ls_hap(n, m, H_vs, s, e_vs, c_vs, r)
             F, c, ll = ls.forwards(H_vs, s, r, p_mutation=mu)
@@ -256,11 +37,25 @@ def verify(self, ts):
             B = ls.backwards(H_vs, s, c, r, mu)
 
 
-class TestMethodsDip(FBAlgorithmBase):
-    """Test that we compute the sample likelihoods across all implementations."""
+class TestMethodsDiploid(lsbase.FBAlgorithmBase):
+    def test_simple_n10_no_recombination(self):
+        ts = self.get_simple_n10_no_recombination()
+        self.verify(ts)
+
+    def test_simple_n6(self):
+        ts = self.get_simple_n6()
+        self.verify(ts)
+
+    def test_simple_n8(self):
+        ts = self.get_simple_n8()
+        self.verify(ts)
+
+    def test_simple_n16(self):
+        ts = self.get_simple_n16()
+        self.verify(ts)
 
     def verify(self, ts):
-        for n, m, G_vs, s, e_vs, r, mu in self.example_parameters_genotypes(ts):
+        for n, m, G_vs, s, e_vs, r, mu in self.get_examples_pars_diploid(ts):
             F_vs, c_vs, ll_vs = fbd.forward_ls_dip_loop(
                 n, m, G_vs, s, e_vs, r, norm=True
             )
@@ -272,15 +67,25 @@ def verify(self, ts):
             self.assertAllClose(ll_vs, ll)
 
 
-class VitAlgorithmBase(LSBase):
-    """Base for viterbi algoritm tests."""
+class TestViterbiHaploid(lsbase.ViterbiAlgorithmBase):
+    def test_simple_n10_no_recombination(self):
+        ts = self.get_simple_n10_no_recombination()
+        self.verify(ts)
 
+    def test_simple_n6(self):
+        ts = self.get_simple_n6()
+        self.verify(ts)
+
+    def test_simple_n8(self):
+        ts = self.get_simple_n8()
+        self.verify(ts)
 
-class TestViterbiHap(VitAlgorithmBase):
-    """Test that we have the same log-likelihood across all implementations"""
+    def test_simple_n16(self):
+        ts = self.get_simple_n16()
+        self.verify(ts)
 
     def verify(self, ts):
-        for n, m, H_vs, s, e_vs, r, mu in self.example_parameters_haplotypes(ts):
+        for n, m, H_vs, s, e_vs, r, mu in self.get_examples_pars_haploid(ts):
             V_vs, P_vs, ll_vs = vh.forwards_viterbi_hap_lower_mem_rescaling(
                 n, m, H_vs, s, e_vs, r
             )
@@ -291,11 +96,25 @@ def verify(self, ts):
             self.assertAllClose(path_vs, path)
 
 
-class TestViterbiDip(VitAlgorithmBase):
-    """Test that we have the same log-likelihood across all implementations"""
+class TestViterbiDiploid(lsbase.ViterbiAlgorithmBase):
+    def test_simple_n10_no_recombination(self):
+        ts = self.get_simple_n10_no_recombination()
+        self.verify(ts)
+
+    def test_simple_n6(self):
+        ts = self.get_simple_n6()
+        self.verify(ts)
+
+    def test_simple_n8(self):
+        ts = self.get_simple_n8()
+        self.verify(ts)
+
+    def test_simple_n_16(self):
+        ts = self.get_simple_n16()
+        self.verify(ts)
 
     def verify(self, ts):
-        for n, m, G_vs, s, e_vs, r, mu in self.example_parameters_genotypes(ts):
+        for n, m, G_vs, s, e_vs, r, mu in self.get_examples_pars_diploid(ts):
             V_vs, P_vs, ll_vs = vd.forwards_viterbi_dip_low_mem(n, m, G_vs, s, e_vs, r)
             path_vs = vd.backwards_viterbi_dip(m, V_vs, P_vs)
             phased_path_vs = vd.get_phased_path(n, path_vs)
diff --git a/tests/test_API_multiallelic.py b/tests/test_API_multiallelic.py
index 92f1ab3..45ea8fc 100644
--- a/tests/test_API_multiallelic.py
+++ b/tests/test_API_multiallelic.py
@@ -1,196 +1,39 @@
-# Simulation
-import itertools
-
-# Python libraries
-import msprime
-import numpy as np
-import pytest
-import tskit
-
+from . import lsbase
 import lshmm as ls
-import lshmm.forward_backward.fb_diploid as fbd
-import lshmm.forward_backward.fb_haploid as fbh
+import lshmm.fb_diploid as fbd
+import lshmm.fb_haploid as fbh
 import lshmm.vit_diploid as vd
 import lshmm.vit_haploid as vh
 
-EQUAL_BOTH_HOM = 4
-UNEQUAL_BOTH_HOM = 0
-BOTH_HET = 7
-REF_HOM_OBS_HET = 1
-REF_HET_OBS_HOM = 2
-
-MISSING = -1
-MISSING_INDEX = 3
-
-
-class LSBase:
-    """Superclass of Li and Stephens tests."""
-
-    def example_haplotypes(self, ts, num_random=10, seed=42):
-        H = ts.genotype_matrix()
-        s = H[:, 0].reshape(1, H.shape[0])
-        H = H[:, 1:]
-
-        haplotypes = [s, H[:, -1].reshape(1, H.shape[0])]
-        s_tmp = s.copy()
-        s_tmp[0, -1] = MISSING
-        haplotypes.append(s_tmp)
-        s_tmp = s.copy()
-        s_tmp[0, ts.num_sites // 2] = MISSING
-        haplotypes.append(s_tmp)
-        s_tmp = s.copy()
-        s_tmp[0, :] = MISSING
-        haplotypes.append(s_tmp)
 
-        return H, haplotypes
+class TestMethodsHaploid(lsbase.FBAlgorithmBase):
+    def test_multiallelic_n10_no_recombination(self):
+        ts = self.get_multiallelic_n10_no_recombination()
+        return ts
 
-    def haplotype_emission(self, mu, m, n_alleles, scale_mutation_based_on_n_alleles):
-        # Define the emission probability matrix
-        e = np.zeros((m, 2))
-        if isinstance(mu, float):
-            mu = mu * np.ones(m)
+    def test_multiallelic_n6(self):
+        ts = self.get_multiallelic_n6()
+        return ts
 
-        if scale_mutation_based_on_n_alleles:
-            e[:, 0] = mu - mu * np.equal(
-                n_alleles, np.ones(m)
-            )  # Added boolean in case we're at an invariant site
-            e[:, 1] = 1 - (n_alleles - 1) * mu
-        else:
-            for j in range(m):
-                if n_alleles[j] == 1:  # In case we're at an invariant site
-                    e[j, 0] = 0
-                    e[j, 1] = 1
-                else:
-                    e[j, 0] = mu[j] / (n_alleles[j] - 1)
-                    e[j, 1] = 1 - mu[j]
-        return e
+    def test_multiallelic_n8(self):
+        ts = self.get_multiallelic_n8()
+        return ts
 
-    def example_parameters_haplotypes(self, ts, seed=42, scale_mutation=True):
-        """Returns an iterator over combinations of haplotype, recombination and
-        mutation probabilities."""
-        np.random.seed(seed)
-        H, haplotypes = self.example_haplotypes(ts)
-        n = H.shape[1]
-        m = ts.get_num_sites()
-
-        def _get_num_alleles(ref_haps, query):
-            assert ref_haps.shape[0] == query.shape[1]
-            num_sites = ref_haps.shape[0]
-            num_alleles = np.zeros(num_sites, dtype=np.int8)
-            exclusion_set = np.array([MISSING])
-            for i in range(num_sites):
-                uniq_alleles = np.unique(np.append(ref_haps[i, :], query[:, i]))
-                num_alleles[i] = np.sum(~np.isin(uniq_alleles, exclusion_set))
-            assert np.all(num_alleles >= 0), "Number of alleles cannot be zero."
-            return num_alleles
-
-        # Here we have equal mutation and recombination
-        r = np.zeros(m) + 0.01
-        mu = np.zeros(m) + 0.01
-        r[0] = 0
-
-        for s in haplotypes:
-            # Must be calculated from the genotype matrix because we can now get back mutations that
-            # result in the number of alleles being higher than the number of alleles in the reference panel.
-            n_alleles = _get_num_alleles(H, s)
-            e = self.haplotype_emission(
-                mu, m, n_alleles, scale_mutation_based_on_n_alleles=scale_mutation
-            )
-            yield n, m, H, s, e, r, mu
-
-        # Mixture of random and extremes
-        rs = [np.zeros(m) + 0.999, np.zeros(m) + 1e-6, np.random.rand(m)]
-        mus = [np.zeros(m) + 0.2, np.zeros(m) + 1e-6, np.random.rand(m) * 0.2]
-
-        e = self.haplotype_emission(
-            mu, m, n_alleles, scale_mutation_based_on_n_alleles=scale_mutation
-        )
-
-        for s, r, mu in itertools.product(haplotypes, rs, mus):
-            r[0] = 0
-            n_alleles = _get_num_alleles(H, s)
-            e = self.haplotype_emission(
-                mu, m, n_alleles, scale_mutation_based_on_n_alleles=scale_mutation
-            )
-            yield n, m, H, s, e, r, mu
-
-    def assertAllClose(self, A, B):
-        """Assert that all entries of two matrices are 'close'"""
-        assert np.allclose(A, B, rtol=1e-9, atol=0.0)
-
-    # Define a bunch of very small tree-sequences for testing a collection of parameters on
-    def test_simple_n_10_no_recombination(self):
-        ts = msprime.sim_ancestry(
-            samples=10,
-            recombination_rate=0,
-            random_seed=42,
-            sequence_length=10,
-            population_size=10000,
-        )
-        ts = msprime.sim_mutations(ts, rate=1e-5, random_seed=42)
-        assert ts.num_sites > 3
-        self.verify(ts)
-
-    def test_simple_n_6(self):
-        ts = msprime.sim_ancestry(
-            samples=6,
-            recombination_rate=1e-4,
-            random_seed=42,
-            sequence_length=40,
-            population_size=10000,
-        )
-        ts = msprime.sim_mutations(ts, rate=1e-3, random_seed=42)
-        assert ts.num_sites > 5
-        self.verify(ts)
-
-    def test_simple_n_8(self):
-        ts = msprime.sim_ancestry(
-            samples=8,
-            recombination_rate=1e-4,
-            random_seed=42,
-            sequence_length=20,
-            population_size=10000,
-        )
-        ts = msprime.sim_mutations(ts, rate=1e-4, random_seed=42)
-        assert ts.num_sites > 5
-        assert ts.num_trees > 15
-        self.verify(ts)
-
-    def test_simple_n_16(self):
-        ts = msprime.sim_ancestry(
-            samples=16,
-            recombination_rate=1e-2,
-            random_seed=42,
-            sequence_length=20,
-            population_size=10000,
-        )
-        ts = msprime.sim_mutations(ts, rate=1e-4, random_seed=42)
-        assert ts.num_sites > 5
-        self.verify(ts)
+    def test_multiallelic_n16(self):
+        ts = self.get_multiallelic_n16()
+        return ts
 
     def verify(self, ts):
-        raise NotImplementedError()
-
-
-class FBAlgorithmBase(LSBase):
-    """Base for forwards backwards algorithm tests."""
-
-
-class TestMethodsHap(FBAlgorithmBase):
-    """Test that we compute the sample likelihoods across all implementations."""
-
-    def verify(self, ts):
-        for n, m, H_vs, s, e_vs, r, mu in self.example_parameters_haplotypes(ts):
+        for n, m, H_vs, s, e_vs, r, mu in self.get_examples_parameters_haploid(ts):
             F_vs, c_vs, ll_vs = fbh.forwards_ls_hap(n, m, H_vs, s, e_vs, r)
             B_vs = fbh.backwards_ls_hap(n, m, H_vs, s, e_vs, c_vs, r)
             F, c, ll = ls.forwards(H_vs, s, r, p_mutation=mu)
             B = ls.backwards(H_vs, s, c, r, p_mutation=mu)
             self.assertAllClose(F, F_vs)
             self.assertAllClose(B, B_vs)
-            # print(e_vs)
             self.assertAllClose(ll_vs, ll)
 
-        for n, m, H_vs, s, e_vs, r, mu in self.example_parameters_haplotypes(
+        for n, m, H_vs, s, e_vs, r, mu in self.get_examples_parameters_haploid(
             ts, scale_mutation=False
         ):
             F_vs, c_vs, ll_vs = fbh.forwards_ls_hap(n, m, H_vs, s, e_vs, r)
@@ -206,15 +49,25 @@ def verify(self, ts):
             self.assertAllClose(ll_vs, ll)
 
 
-class VitAlgorithmBase(LSBase):
-    """Base for viterbi algoritm tests."""
+class TestViterbiHaploid(lsbase.ViterbiAlgorithmBase):
+    def test_multiallelic_n10_no_recombination(self):
+        ts = self.get_multiallelic_n10_no_recombination()
+        return ts
+
+    def test_multiallelic_n6(self):
+        ts = self.get_multiallelic_n6()
+        return ts
 
+    def test_multiallelic_n8(self):
+        ts = self.get_multiallelic_n8()
+        return ts
 
-class TestViterbiHap(VitAlgorithmBase):
-    """Test that we have the same log-likelihood across all implementations"""
+    def test_multiallelic_n16(self):
+        ts = self.get_multiallelic_n16()
+        return ts
 
     def verify(self, ts):
-        for n, m, H_vs, s, e_vs, r, mu in self.example_parameters_haplotypes(ts):
+        for n, m, H_vs, s, e_vs, r, mu in self.get_examples_parameters_haploid(ts):
             V_vs, P_vs, ll_vs = vh.forwards_viterbi_hap_lower_mem_rescaling(
                 n, m, H_vs, s, e_vs, r
             )
diff --git a/tests/test_LS_haploid_diploid.py b/tests/test_LS_haploid_diploid.py
index 9b9f7d8..086d79d 100644
--- a/tests/test_LS_haploid_diploid.py
+++ b/tests/test_LS_haploid_diploid.py
@@ -1,28 +1,18 @@
-# Simulation
 import itertools
+import pytest
 
-# Python libraries
-import msprime
 import numpy as np
-import pytest
+import numba as nb
 
-import lshmm.forward_backward.fb_diploid as fbd
-import lshmm.forward_backward.fb_haploid as fbh
+import msprime
+import tskit
+
+import lshmm.core as core
+import lshmm.fb_diploid as fbd
+import lshmm.fb_haploid as fbh
 import lshmm.vit_diploid as vd
 import lshmm.vit_haploid as vh
 
-EQUAL_BOTH_HOM = 4
-UNEQUAL_BOTH_HOM = 0
-BOTH_HET = 7
-REF_HOM_OBS_HET = 1
-REF_HET_OBS_HOM = 2
-
-MISSING = -1
-MISSING_INDEX = 3
-
-import numba as nb
-import tskit
-
 
 class LSBase:
     """Superclass of Li and Stephens tests."""
@@ -34,35 +24,31 @@ def example_haplotypes(self, ts):
 
         haplotypes = [s, H[:, -1].reshape(1, H.shape[0])]
         s_tmp = s.copy()
-        s_tmp[0, -1] = MISSING
+        s_tmp[0, -1] = core.MISSING
         haplotypes.append(s_tmp)
         s_tmp = s.copy()
-        s_tmp[0, ts.num_sites // 2] = MISSING
+        s_tmp[0, ts.num_sites // 2] = core.MISSING
         haplotypes.append(s_tmp)
         s_tmp = s.copy()
-        s_tmp[0, :] = MISSING
+        s_tmp[0, :] = core.MISSING
         haplotypes.append(s_tmp)
 
         return H, haplotypes
 
     def haplotype_emission(self, mu, m):
-        # Define the emission probability matrix
         e = np.zeros((m, 2))
-        e[:, 0] = mu  # If they match
-        e[:, 1] = 1 - mu  # If they don't match
-
+        e[:, 0] = mu
+        e[:, 1] = 1 - mu
         return e
 
     def genotype_emission(self, mu, m):
-        # Define the emission probability matrix
         e = np.zeros((m, 8))
-        e[:, EQUAL_BOTH_HOM] = (1 - mu) ** 2
-        e[:, UNEQUAL_BOTH_HOM] = mu**2
-        e[:, BOTH_HET] = 1 - mu
-        e[:, REF_HOM_OBS_HET] = 2 * mu * (1 - mu)
-        e[:, REF_HET_OBS_HOM] = mu * (1 - mu)
-        e[:, MISSING_INDEX] = 1
-
+        e[:, core.EQUAL_BOTH_HOM] = (1 - mu) ** 2
+        e[:, core.UNEQUAL_BOTH_HOM] = mu**2
+        e[:, core.BOTH_HET] = 1 - mu
+        e[:, core.REF_HOM_OBS_HET] = 2 * mu * (1 - mu)
+        e[:, core.REF_HET_OBS_HOM] = mu * (1 - mu)
+        e[:, core.MISSING_INDEX] = 1
         return e
 
     def example_parameters_haplotypes(self, ts, seed=42):
@@ -104,10 +90,8 @@ def example_parameters_haplotypes_larger(
         r = mean_r * np.ones(m) * ((np.random.rand(m) + 0.5) / 2)
         r[0] = 0
 
-        # Error probability
         mu = mean_mu * np.ones(m) * ((np.random.rand(m) + 0.5) / 2)
 
-        # Define the emission probability matrix
         e = self.haplotype_emission(mu, m)
 
         for s in haplotypes:
@@ -124,13 +108,13 @@ def example_genotypes(self, ts, seed=42):
         ]
 
         s_tmp = s.copy()
-        s_tmp[0, -1] = MISSING
+        s_tmp[0, -1] = core.MISSING
         genotypes.append(s_tmp)
         s_tmp = s.copy()
-        s_tmp[0, ts.num_sites // 2] = MISSING
+        s_tmp[0, ts.num_sites // 2] = core.MISSING
         genotypes.append(s_tmp)
         s_tmp = s.copy()
-        s_tmp[0, :] = MISSING
+        s_tmp[0, :] = core.MISSING
         genotypes.append(s_tmp)
 
         m = ts.get_num_sites()
@@ -179,46 +163,65 @@ def example_parameters_genotypes_larger(
         r = mean_r * np.ones(m) * ((np.random.rand(m) + 0.5) / 2)
         r[0] = 0
 
-        # Error probability
         mu = mean_mu * np.ones(m) * ((np.random.rand(m) + 0.5) / 2)
 
-        # Define the emission probability matrix
         e = self.genotype_emission(mu, m)
 
         for s in genotypes:
             yield n, m, G, s, e, r
 
     def assertAllClose(self, A, B):
-        """Assert that all entries of two matrices are 'close'"""
-        # assert np.allclose(A, B, rtol=1e-9, atol=0.0)
         assert np.allclose(A, B, rtol=1e-09, atol=1e-08)
 
     # Define a bunch of very small tree-sequences for testing a collection of parameters on
     def test_simple_n_10_no_recombination(self):
         ts = msprime.simulate(
-            10, recombination_rate=0, mutation_rate=0.5, random_seed=42
+            10,
+            recombination_rate=0,
+            mutation_rate=0.5,
+            random_seed=42,
         )
         assert ts.num_sites > 3
         self.verify(ts)
 
     def test_simple_n_6(self):
-        ts = msprime.simulate(6, recombination_rate=2, mutation_rate=7, random_seed=42)
+        ts = msprime.simulate(
+            6,
+            recombination_rate=2,
+            mutation_rate=7,
+            random_seed=42,
+        )
         assert ts.num_sites > 5
         self.verify(ts)
 
     def test_simple_n_8(self):
-        ts = msprime.simulate(8, recombination_rate=2, mutation_rate=5, random_seed=42)
+        ts = msprime.simulate(
+            8,
+            recombination_rate=2,
+            mutation_rate=5,
+            random_seed=42,
+        )
         assert ts.num_sites > 5
         self.verify(ts)
 
     def test_simple_n_8_high_recombination(self):
-        ts = msprime.simulate(8, recombination_rate=20, mutation_rate=5, random_seed=42)
+        ts = msprime.simulate(
+            8,
+            recombination_rate=20,
+            mutation_rate=5,
+            random_seed=42,
+        )
         assert ts.num_trees > 15
         assert ts.num_sites > 5
         self.verify(ts)
 
     def test_simple_n_16(self):
-        ts = msprime.simulate(16, recombination_rate=2, mutation_rate=5, random_seed=42)
+        ts = msprime.simulate(
+            16,
+            recombination_rate=2,
+            mutation_rate=5,
+            random_seed=42,
+        )
         assert ts.num_sites > 5
         self.verify(ts)
 
@@ -245,7 +248,7 @@ class FBAlgorithmBase(LSBase):
 
 
 class TestNonTreeMethodsHap(FBAlgorithmBase):
-    """Test that we compute the sample likelihoods across all implementations."""
+    """Test that the computed likelihoods are the same across all implementations."""
 
     def verify(self, ts):
         for n, m, H_vs, s, e_vs, r in self.example_parameters_haplotypes(ts):
@@ -281,7 +284,7 @@ def verify_larger(self, ts):
 
 
 class TestNonTreeMethodsDip(FBAlgorithmBase):
-    """Test that we compute the sample likelihoods across all implementations."""
+    """Test that the computed likelihoods are the same across all implementations."""
 
     def verify(self, ts):
         for n, m, G_vs, s, e_vs, r in self.example_parameters_genotypes(ts):
@@ -368,7 +371,7 @@ class VitAlgorithmBase(LSBase):
 
 
 class TestNonTreeViterbiHap(VitAlgorithmBase):
-    """Test that we have the same log-likelihood across all implementations"""
+    """Test that the computed log-likelihoods are the same across all implementations."""
 
     def verify(self, ts):
         for n, m, H_vs, s, e_vs, r in self.example_parameters_haplotypes(ts):
@@ -488,7 +491,7 @@ def verify_larger(self, ts):
 
 
 class TestNonTreeViterbiDip(VitAlgorithmBase):
-    """Test that we have the same log-likelihood across all implementations"""
+    """Test that the computed log-likelihoods are the same across all implementations."""
 
     def verify(self, ts):
         for n, m, G_vs, s, e_vs, r in self.example_parameters_genotypes(ts):