From b00c9b4ce7972a15545761ea06ad11bab241a930 Mon Sep 17 00:00:00 2001
From: Shing Zhan <shing.zhan@gmail.com>
Date: Mon, 7 Aug 2023 18:19:00 +0100
Subject: [PATCH] Add tests to check LS HMM of _tskit.lshmm compared to BEAGLE

---
 python/tests/beagle.py          | 689 ++++++++++++++++++++++++++++++++
 python/tests/beagle_numba.py    | 441 ++++++++++++++++++++
 python/tests/test_imputation.py | 628 +++++++++++++++++++++++++++++
 3 files changed, 1758 insertions(+)
 create mode 100644 python/tests/beagle.py
 create mode 100644 python/tests/beagle_numba.py
 create mode 100644 python/tests/test_imputation.py

diff --git a/python/tests/beagle.py b/python/tests/beagle.py
new file mode 100644
index 0000000000..312c34e4ba
--- /dev/null
+++ b/python/tests/beagle.py
@@ -0,0 +1,689 @@
+"""
+Implementation of the BEAGLE 4.1 algorithm to impute genotypes.
+
+This was implemented while closely consulting the BEAGLE 4.1 paper:
+Browning & Browning (2016). Genotype imputation with millions of reference samples.
+Am J Hum Genet 98:116-126. doi:10.1016/j.ajhg.2015.11.020
+
+The source code of BEAGLE 4.1 (particularly `LSHapBaum.java`) was also consulted:
+https://faculty.washington.edu/browning/beagle/b4_1.html
+
+Here are some notations used throughout this implementation:
+h = Number of reference haplotypes.
+m = Number of genotyped markers.
+x = Number of imputed markers.
+
+This implementation takes the following inputs:
+1. Panel of reference haplotypes in a matrix of size (m + x, h).
+2. One query haplotype in an array of size (m + x).
+3. Genomic site positions of all the markers in an array of size (m + x).
+
+For simplicity, we assume that:
+1. All the sites are biallelic (0/1 encoding).
+2. The genetic map is defined by equating 1 Mbp to 1 cM.
+
+In the query haplotype:
+1. The genotyped markers take values of 0 or 1.
+2. The imputed markers take -1.
+
+The forward and backward probability matrices are of size (m, h).
+The HMM state probability matrix is of size (m + 1, h).
+The interpolated haplotype probability matrix is of size (x, 2),
+The imputed alleles in the query haplotype are the maximum a posteriori (MAP) alleles.
+
+For efficiency, BEAGLE uses aggregated markers, which are clusters of markers
+within a 0.005 cM interval (default). Because the genotypes are phased,
+the alleles in the aggregated markers form distinct "allele sequences".
+Below, we do not use aggregated markers or allele sequences, which would complicate
+the implementation.
+
+Rather than trying to exactly replicating the original BEAGLE 4.1 algorithm,
+this implementation computes Equation 1 of BB2016. The functions used in an attempt
+to faithfully implement the BEAGLE algorithm are kept for documentation.
+"""
+import numpy as np
+
+import _tskit
+
+
+def convert_to_genetic_map_position(pos):
+    """
+    Convert genomic site positions (bp) to genetic map positions (cM).
+
+    In BEAGLE 4.1, when a genetic map is not specified, it is assumed that 1 Mbp = 1cM.
+
+    This trivial function is meant for documentation.
+
+    :param numpy.ndarray pos: Site positions.
+    :return: Genetic map positions.
+    :rtype: numpy.ndarray
+    """
+    assert 0 < np.min(pos)
+    return pos / 1e6
+
+
+def get_weights(genotyped_pos, imputed_pos):
+    """
+    Get weights at imputed markers in the query haplotype.
+
+    These weights are used in linear interpolation of HMM state probabilities
+    at imputed markers.
+
+    In BB2016 (see below Equation 1), a weight between genotyped markers m and m + 1
+    at imputed marker x is denoted lambda_m,x.
+
+    lambda_m,x = [g(m + 1) - g(x)] / [g(m + 1) - g(m)],
+    where g(i) = genetic map position of marker i.
+
+    :param numpy.ndarray genotyped_pos: Site positions of genotyped markers.
+    :param numpy.ndarray imputed_pos: Site positions of imputed markers.
+    :return: Weights used in linear interpolation.
+    :rtype: numpy.ndarray
+    """
+    # Check that the two sets are mutually exclusive.
+    assert not np.any(np.isin(genotyped_pos, imputed_pos))
+    # Set min genetic distance to avoid division by zero.
+    MIN_CM_DIST = 0.005
+    m = len(genotyped_pos)
+    x = len(imputed_pos)
+    genotyped_cm = convert_to_genetic_map_position(genotyped_pos)
+    imputed_cm = convert_to_genetic_map_position(imputed_pos)
+    assert not np.any(np.isin(genotyped_cm, imputed_cm))
+    # Calculate weights for imputed markers.
+    weights = np.zeros(x, dtype=np.float64)
+    # Identify genotype markers k and k + 1 between imputed marker i
+    for i in np.arange(x):
+        if imputed_cm[i] < genotyped_cm[0]:
+            # Special case: imputed marker is before the first genotyped marker.
+            k = 0
+            weights[i] = 1.0
+        elif imputed_cm[i] > genotyped_cm[-1]:
+            # Special case: imputed marker is after the last genotyped marker.
+            k = m - 1
+            weights[i] = 0.0
+        else:
+            k = np.max(np.where(imputed_cm[i] > genotyped_cm)[0])
+            cm_mP1_x = genotyped_cm[k + 1] - imputed_cm[i]
+            cm_mP1_m = np.max([genotyped_cm[k + 1] - genotyped_cm[k], MIN_CM_DIST])
+            weights[i] = cm_mP1_x / cm_mP1_m
+    assert 0 <= np.min(weights) and np.max(weights) <= 1, "Weights are not in [0, 1]."
+    return weights
+
+
+def get_mismatch_prob(pos, miscall_rate):
+    """
+    Set mismatch probabilities at genotyped markers.
+
+    In BEAGLE, the mismatch probability is dominated by the allelic miscall rate.
+    By default, it is set to 0.0001 and capped at 0.50.
+
+    :param numpy.ndarray pos: Site positions.
+    :param float miscall_rate: Allelic miscall rate.
+    :return: Mismatch probabilities.
+    :rtype: numpy.ndarray
+    """
+    assert isinstance(miscall_rate, float)
+    if miscall_rate >= 0.5:
+        miscall_rate = 0.5
+    mu = np.zeros(len(pos), dtype=np.float64) + miscall_rate
+    return mu
+
+
+def get_switch_prob(pos, h, ne):
+    """
+    Set switch probabilities at genotyped markers.
+
+    Note that in BEAGLE, the default value of `ne` is set to 1e6.
+    If `h` is small and `ne` is large, the switch probability is nearly 1.
+    In such cases, it may be desirable to set `ne` to a small value
+    to avoid switching between haplotypes all the time.
+
+    :param numpy.ndarray pos: Site positions.
+    :param int h: Number of reference haplotypes.
+    :param float ne: Effective population size.
+    :return: Switch probabilities.
+    :rtype: numpy.ndarray
+    """
+    assert pos[0] > 0, "Site positions are not 1-based."
+    assert isinstance(h, int), "Number of reference haplotypes is not an integer."
+    assert isinstance(ne, float), "Effective population size is not a float."
+    # Get genetic distance between adjacent markers.
+    cm = np.concatenate([[0], convert_to_genetic_map_position(pos)])
+    gen_dist = cm[1:] - cm[:-1]
+    assert len(gen_dist) == len(pos)
+    rho = -np.expm1(-(4 * ne * gen_dist / h))
+    return rho
+
+
+def compute_forward_probability_matrix(ref_h, query_h, rho, mu):
+    """
+    Implement the forward algorithm.
+
+    The forward probablity matrix is of size (m, h),
+    where m is the number of genotyped markers
+    and h is the number of reference haplotypes.
+
+    `ref_h` is a panel of reference haplotypes of size m x h.
+    Here, it is subsetted to genotyped markers.
+
+    `query_h` is also subsetted to genotyped markers (length m).
+
+    :param numpy.ndarray ref_h: Reference haplotypes.
+    :param numpy.ndarray query_h: One query haplotype.
+    :param numpy.ndarray rho: Switch probabilities.
+    :param numpy.ndarray mu: Mismatch probabilities.
+    :return: Forward probability matrix.
+    :rtype: numpy.ndarray
+    """
+    assert np.all(
+        np.isin(ref_h, [0, 1])
+    ), "Reference haplotypes have non-biallelic values."
+    assert np.all(np.isin(query_h, [0, 1])), "Query haplotype has non-biallelic values."
+    m = ref_h.shape[0]
+    h = ref_h.shape[1]
+    assert m == len(query_h)
+    assert m == len(rho)
+    assert m == len(mu)
+    assert 0 <= np.min(rho) and np.max(rho) <= 1
+    # BEAGLE caps mismatch probabilities at 0.5.
+    assert 0 <= np.min(mu) and np.max(mu) <= 0.5
+    fm = np.zeros((m, h), dtype=np.float64)
+    last_sum = 1.0  # Part of normalization factor
+    for i in np.arange(m):
+        # Get site-specific parameters
+        shift = rho[i] / h
+        scale = (1 - rho[i]) / last_sum
+        # Get allele at genotyped marker i on query haplotype j
+        query_a = query_h[i]
+        for j in np.arange(h):
+            # Get allele at genotyped marker i on reference haplotype j
+            ref_a = ref_h[i, j]
+            # Get emission probability
+            em_prob = mu[i] if query_a != ref_a else 1.0 - mu[i]
+            if m == 0:
+                fm[i, j] = em_prob
+            else:
+                fm[i, j] = em_prob * (scale * fm[i - 1, j] + shift)
+        site_sum = np.sum(fm[i, :])
+        last_sum = site_sum / h if m == 0 else site_sum
+    return fm
+
+
+def compute_backward_probability_matrix(ref_h, query_h, rho, mu):
+    """
+    Implement the backward algorithm.
+
+    The backward probablity matrix is of size (m, h),
+    where m is the number of genotyped markers
+    and h is the number of reference haplotypes.
+
+    `ref_h` is a panel of reference haplotypes of size m x h.
+    Here, it is subsetted to genotyped markers.
+
+    `query_h` is also subsetted to genotyped markers (length m).
+
+    In BEAGLE 4.1, the values are kept one site at a time.
+    Here, we keep the values at all the sites.
+
+    :param numpy.ndarray ref_h: Reference haplotypes.
+    :param numpy.ndarray query_h: One query haplotype.
+    :param numpy.ndarray rho: Switch probabilities.
+    :param numpy.ndarray mu: Mismatch probabilities.
+    :return: Backward probability matrix.
+    :rtype: numpy.ndarray
+    """
+    assert np.all(
+        np.isin(ref_h, [0, 1])
+    ), "Reference haplotypes have non-biallelic values."
+    assert np.all(np.isin(query_h, [0, 1])), "Query haplotype has non-biallelic values."
+    m = ref_h.shape[0]
+    h = ref_h.shape[1]
+    assert m == len(query_h)
+    assert m == len(rho)
+    assert m == len(mu)
+    assert 0 <= np.min(rho) and np.max(rho) <= 1
+    assert 0 <= np.min(mu) and np.max(mu) <= 0.5
+    bm = np.zeros((m, h), dtype=np.float64)
+    bm[-1, :] = 1.0 / h  # Initialise the last column
+    for i in np.arange(m - 2, -1, -1):
+        query_a = query_h[i + 1]
+        for j in np.arange(h):
+            ref_a = ref_h[i + 1, j]
+            em_prob = mu[i + 1] if ref_a != query_a else 1.0 - mu[i + 1]
+            # Note that BEAGLE keeps the values at one site at a time.
+            bm[i, j] = bm[i + 1, j] * em_prob
+        sum_site = np.sum(bm[i, :])
+        scale = (1 - rho[i + 1]) / sum_site
+        shift = rho[i + 1] / h
+        bm[i, :] = scale * bm[i, :] + shift
+    return bm
+
+
+def _get_ref_hap_seg(a, b):
+    """
+    WARN: This function is part of an attempt to faithfully reimplement
+    the BEAGLE 4.1 algorithm. It is not complete and not used
+    in the current implementation. It is kept for documentation.
+
+    Assuming all biallelic sites, get the index of a reference haplotype segment (i)
+    defined by the alleles a and b at two adjacent genotyped markers.
+
+    #i, a, b
+    0, 0, 0
+    1, 1, 0
+    2, 0, 1
+    3, 1, 1
+
+    See https://github.com/tskit-dev/tskit/issues/2802#issuecomment-1698767169
+
+    This is a helper function for `_compute_state_probability_matrix`.
+    """
+    ref_hap_idx = a * 1 + b * 2
+    return ref_hap_idx
+
+
+def _compute_state_probability_matrix(fm, bm, ref_h, query_h, rho, mu):
+    """
+    WARN: This function is part of an attempt to faithfully reimplement
+    the BEAGLE 4.1 algorithm. It is not complete and not used
+    in the current implementation. It is kept for documentation.
+
+    Implement the HMM forward-backward algorithm to compute:
+    1. A HMM state probability matrix across genotyped markers;
+    2. "Forward haplotype probabilities"; and
+    3. "Backward haplotype probabilities".
+
+    A HMM state is an index of a reference haplotype.
+
+    The HMM state probability matrix is of size (m + 1, h),
+    where m is the number of genotyped markers in the query haplotype
+    and h is the number of reference haplotypes.
+
+    Note that the additional column at m + 1 is for interpolation of
+    imputed markers right of the last genotyped marker.
+
+    Both the forward and backward haplotype probability matrices
+    are of size (m, 2), assuming only biallelic sites.
+
+    `ref_h` is a panel of reference haplotypes of size m x h.
+    Here, it is subsetted to genotyped markers.
+
+    `query_h` is also subsetted to genotyped markers (length m).
+
+    This implementation is based on the function `setStateProbs`
+    of `LSHapBaum.java` in BEAGLE. It performs the computation
+    described in Equation 2 of BB2016.
+
+    :param numpy.ndarray fm: Forward probability matrix.
+    :param numpy.ndarray bm: Backward probability matrix.
+    :param numpy.ndarray ref_h: Reference haplotypes.
+    :param numpy.ndarray query_h: One query haplotype.
+    :param numpy.ndarray rho: Switch probabilities.
+    :param numpy.ndarray mu: Mismatch probabilities.
+    :return: HMM state probability matrix,
+        forward haplotype probability matrix, and
+        backward haplotype probability matrix.
+    :rtype: tuple
+    """
+    m = fm.shape[0]
+    h = fm.shape[1]
+    assert (m, h) == bm.shape
+    assert not np.any(fm < 0), "Forward probability matrix has negative values."
+    assert not np.any(np.isnan(fm)), "Forward probability matrix has NaN values."
+    assert not np.any(bm < 0), "Backward probability matrix has negative values."
+    assert not np.any(np.isnan(bm)), "Backward probability matrix has NaN values."
+    assert np.all(
+        np.isin(ref_h, [0, 1])
+    ), "Reference haplotypes have non-biallelic values."
+    assert np.all(np.isin(query_h, [0, 1])), "Query haplotype has non-biallelic values."
+    assert m == len(query_h)
+    assert m == len(rho)
+    assert m == len(mu)
+    assert 0 <= np.min(rho) and np.max(rho) <= 1
+    assert 0 <= np.min(mu) and np.max(mu) <= 0.5
+    # m + 1 columns to allow for imputed markers right of the last genotyped marker.
+    sm = np.zeros((m + 1, h), dtype=np.float64)
+    fwd_hap_probs = np.zeros((m, 4), dtype=np.float64)
+    bwd_hap_probs = np.zeros((m, 4), dtype=np.float64)
+    for i in np.arange(m - 1, -1, -1):
+        for j in np.arange(h):
+            sm[i, j] = fm[i, j] * bm[i, j]
+            ref_hap_seg_mP1 = _get_ref_hap_seg(ref_h[i, j], ref_h[i + 1, j])
+            ref_hap_seg_m = _get_ref_hap_seg(ref_h[i - 1, j], ref_h[i, j])
+            fwd_hap_probs[i, ref_hap_seg_mP1] += sm[i, j]
+            bwd_hap_probs[i, ref_hap_seg_m] += sm[i, j]
+        sum_fwd_hap_probs = np.sum(fwd_hap_probs[i, :])
+        fwd_hap_probs[i, :] /= sum_fwd_hap_probs
+        bwd_hap_probs[i, :] /= sum_fwd_hap_probs  # Sum of forward hap probs
+    sm[m, :] = sm[m - 1, :]  # Not in BEAGLE
+    return (sm, fwd_hap_probs, bwd_hap_probs)
+
+
+def _interpolate_allele_probabilities(
+    fwd_hap_probs, bwd_hap_probs, genotyped_pos, imputed_pos
+):
+    """
+    WARN: This function is part of an attempt to faithfully reimplement
+    the BEAGLE 4.1 algorithm. It is not complete and not used
+    in the current implementation. It is kept for documentation.
+
+    Compute the interpolated allele probabilities at the imputed markers
+    of a query haplotype.
+
+    The forward and backward haplotype probability matrices are of size (m, 2),
+    where m is the number of genotyped markers.
+    Here, we assume all biallelic sites, hence the 2.
+
+    The interpolated allele probability matrix is of size (x, 2),
+    where x is the number of imputed markers.
+    Again, we assume all biallelic sites, hence the 2.
+
+    This implementation is based on Equation 2 (the rearranged one) in BB2016.
+
+    :param numpy.ndarray fwd_hap_probs: Forward haplotype probability matrix.
+    :param numpy.ndarray bwd_hap_probs: Backward haplotype probability matrix.
+    :param numpy.ndarray genotyped_pos: Site positions of genotyped markers.
+    :param numpy.ndarray imputed_pos: Site positions of imputed markers.
+    :return: Interpolated allele probabilities.
+    :rtype: numpy.ndarray
+    """
+    m = len(genotyped_pos)
+    x = len(imputed_pos)
+    assert (m, 2) == fwd_hap_probs.shape
+    assert (m, 2) == bwd_hap_probs.shape
+    genotyped_cm = convert_to_genetic_map_position(genotyped_pos)
+    assert m == len(genotyped_cm)
+    imputed_cm = convert_to_genetic_map_position(imputed_pos)
+    assert x == len(imputed_cm)
+    weights = get_weights(genotyped_cm, imputed_cm)
+    assert x == len(weights)
+    i_hap_probs = np.zeros((x, 2), dtype=np.float64)
+    for i in np.arange(x):
+        # Identify the genotyped markers k and k + 1 between imputed marker i
+        if imputed_cm[i] < genotyped_cm[0]:
+            k = 0
+        elif imputed_cm[i] > genotyped_cm[-1]:
+            k = m - 2
+        else:
+            k = np.max(np.where(imputed_cm[i] > genotyped_cm)[0])
+        # Vectorised over the allelic states
+        i_hap_probs[i, :] = weights[i] * bwd_hap_probs[k, :]
+        i_hap_probs[i, :] += (1 - weights[i]) * fwd_hap_probs[k, :]
+    return i_hap_probs
+
+
+def compute_state_probability_matrix(fm, bm, ref_h, query_h):
+    """
+    Implement the HMM forward-backward algorithm following Equation 1 of BB2016.
+    This is simpler than faithfully reimplementing the original BEAGLE 4.1 algorithm.
+
+    A HMM state is (index of a genotyped marker, index of a reference haplotype).
+
+    The output HMM state probability matrix is of size (m + 1, h),
+    where m is the number of genotyped markers in the query haplotype
+    and h is the number of reference haplotypes.
+
+    Note that the additional column at m + 1 is for interpolation of
+    imputed markers right of the last genotyped marker.
+
+    The forward and backward probability matrices are both of size (m, h).
+
+    The reference haplotypes of size (m, h), and the query haplotype is of size m.
+    Here, they are both subsetted to genotyped markers.
+
+    :param numpy.ndarray fm: Forward probability matrix.
+    :param numpy.ndarray bm: Backward probability matrix.
+    :param numpy.ndarray ref_h: Reference haplotypes.
+    :param numpy.ndarray query_h: One query haplotype.
+    :return: HMM state probability matrix.
+    :rtype: numpy.ndarray
+    """
+    assert np.all(
+        np.isin(ref_h, [0, 1])
+    ), "Reference haplotypes have non-biallelic values."
+    assert np.all(np.isin(query_h, [0, 1])), "Query haplotype has non-biallelic values."
+    m = ref_h.shape[0]
+    h = ref_h.shape[1]
+    assert m == len(query_h)
+    assert (m, h) == fm.shape
+    assert (m, h) == bm.shape
+    assert not np.any(fm < 0), "Forward probability matrix has negative values."
+    assert not np.any(np.isnan(fm)), "Forward probability matrix has NaN values."
+    assert not np.any(bm < 0), "Backward probability matrix has negative values."
+    assert not np.any(np.isnan(bm)), "Backward probability matrix has NaN values."
+    # m + 1 columns to allow for imputed markers right of the last genotyped marker.
+    sm = np.zeros((m + 1, h), dtype=np.float64)
+    sm[:-1, :] = np.multiply(fm, bm)
+    sm[m, :] = sm[m - 1, :]  # Not in BEAGLE
+    return sm
+
+
+def interpolate_allele_probabilities(
+    sm, ref_h, genotyped_pos, imputed_pos, alleles=(0, 1)
+):
+    """
+    Compute the interpolated allele probabilities at imputed markers of
+    a query haplotype following Equation 1 of BB2016.
+
+    Assuming all biallelic sites, the interpolated allele probability matrix
+    is of size (x, 2), where x is the number of imputed markers.
+
+    This function takes the output of `compute_state_probability_matrix`.
+
+    Note that this function takes:
+    1. HMM state probability matrix across genotyped markers of size (m + 1, h).
+    2. reference haplotypes subsetted to imputed markers of size (x, h).
+
+    :param numpy.ndarray sm: HMM state probability matrix at genotyped markers.
+    :param numpy.ndarray ref_h: Reference haplotypes subsetted to imputed markers.
+    :param numpy.ndarray genotyped_pos: Site positions at genotyped markers.
+    :param numpy.ndarray imputed_pos: Site positions at imputed markers.
+    :param list alleles: Distince alleles in the haplotypes.
+    :return: Interpolated allele probabilities.
+    :rtype: numpy.ndarray
+    """
+    assert not np.any(sm < 0), "HMM state probability matrix has negative values."
+    assert not np.any(np.isnan(sm)), "HMM state probability matrix has NaN values."
+    m = sm.shape[0] - 1
+    h = sm.shape[1]
+    assert np.all(
+        np.isin(ref_h, [0, 1])
+    ), "Reference haplotypes have non-biallelic values."
+    assert m == len(genotyped_pos)
+    x = len(imputed_pos)
+    assert (x, h) == ref_h.shape
+    genotyped_cm = convert_to_genetic_map_position(genotyped_pos)
+    imputed_cm = convert_to_genetic_map_position(imputed_pos)
+    weights = get_weights(genotyped_pos, imputed_pos)
+    assert x == len(weights)
+    p = np.zeros((x, len(alleles)), dtype=np.float64)
+    # Compute allele probabilities as per Equation 1 in BB2016.
+    for a in alleles:
+        for i in np.arange(x):
+            # Identify the genotyped markers k and k + 1 between the imputed marker i
+            # The indices k should mirror those in `get_weights`.
+            if imputed_cm[i] < genotyped_cm[0]:
+                # Special case: imputed marker is before the first genotyped marker.
+                k = 0
+            elif imputed_cm[i] > genotyped_cm[-1]:
+                # Special case: imputed marker is after the last genotyped marker.
+                k = m - 1
+            else:
+                k = np.max(np.where(imputed_cm[i] > genotyped_cm)[0])
+            # Sum over the reference haplotypes where the allele is a at marker i.
+            has_a = ref_h[i, :] == a
+            p[i, a] += np.sum(weights[i] * sm[k, has_a])
+            p[i, a] += np.sum((1 - weights[i]) * sm[k + 1, has_a])
+    return p
+
+
+def get_map_alleles(allele_probs):
+    """
+    Compute the maximum a posteriori alleles at the imputed markers of a query haplotype.
+
+    Assuming all biallelic sites, the output is an array of size x,
+    where x is the number of imputed markers.
+
+    WARN: If the allele probabilities are equal, then allele 0 is arbitrarily chosen.
+
+    :param numpy.ndarray allele_probs: Interpolated allele probabilities.
+    :return: Imputed alleles in the query haplotype.
+    :rtype: numpy.ndarray
+    """
+    assert not np.any(allele_probs < 0), "Allele probabilities have negative values."
+    assert not np.any(np.isnan(allele_probs)), "Allele probabilities have NaN values."
+    imputed_alleles = np.argmax(allele_probs, axis=1)
+    assert len(imputed_alleles) == allele_probs.shape[0]
+    return imputed_alleles
+
+
+def run_beagle(ref_h, query_h, pos, miscall_rate=0.0001, ne=1e6, debug=False):
+    """
+    Run a simplified version of the BEAGLE imputation algorithm
+    based on Equation 1 of BB2016.
+
+    `ref_h` and `query_h` span all genotyped and imputed marker sites.
+    So, their sizes are (m + x, h) and (m + x), respectively,
+    where m is the number of genotyped markers
+    and x is the number of imputed markers.
+
+    The genomic site positions of all the markers are in `pos`.
+
+    This produces imputed alleles and interpolated allele probabilities
+    at the imputed markers of the query haplotype.
+
+    :param numpy.ndarray ref_h: Reference haplotypes.
+    :param numpy.ndarray query_h: One query haplotype.
+    :param numpy.ndarray pos: Site positions of all the markers.
+    :param float miscall_rate: Allelic miscall rate.
+    :param int ne: Effective population size.
+    :param bool debug: Whether to print intermediate results.
+    :return: Imputed alleles and interpolated allele probabilities.
+    :rtype: list(numpy.ndarray, numpy.ndarray)
+    """
+    # Set precision for printing
+    if debug:
+        np.set_printoptions(precision=20)
+    assert ref_h.shape[0] == len(pos)
+    assert len(query_h) == len(pos)
+    # Indices of genotyped markers in the query haplotype
+    genotyped_pos_idx = np.where(query_h != -1)[0]
+    assert len(genotyped_pos_idx) > 1
+    # Indices of imputed markers in the query haplotype
+    imputed_pos_idx = np.where(query_h == -1)[0]
+    assert len(imputed_pos_idx) > 0
+    # Site positions of genotyped markers
+    genotyped_pos = pos[genotyped_pos_idx]
+    # Site positions of imputed markers
+    imputed_pos = pos[imputed_pos_idx]
+    h = ref_h.shape[1]
+    m = len(genotyped_pos)
+    x = len(imputed_pos)
+    assert m + x == len(pos)
+    # Subset the reference haplotypes to genotyped markers
+    ref_h_genotyped = ref_h[genotyped_pos_idx, :]
+    if debug:
+        print("Reference haplotypes subsetted to genotyped markers")
+        print(ref_h_genotyped)
+    assert ref_h_genotyped.shape == (m, h)
+    # Subset the query haplotype to genotyped markers
+    query_h_genotyped = query_h[genotyped_pos_idx]
+    if debug:
+        print("Query haplotype subsetted to genotyped markers")
+        print(query_h_genotyped)
+    assert len(query_h_genotyped) == m
+    # Set mismatch probabilities at genotyped markers
+    mu = get_mismatch_prob(genotyped_pos, miscall_rate=miscall_rate)
+    if debug:
+        print("Mismatch probabilities")
+        print(mu)
+    assert len(mu) == m
+    # Set switch probabilities at genotyped markers
+    rho = get_switch_prob(genotyped_pos, h, ne=ne)
+    if debug:
+        print("Switch probabilities")
+        print(rho)
+    assert len(rho) == m
+    # Compute forward probability matrix at genotyped markers
+    fm = compute_forward_probability_matrix(ref_h_genotyped, query_h_genotyped, rho, mu)
+    if debug:
+        print("Forward probability matrix")
+        print(fm)
+    assert fm.shape == (m, h)
+    # Compute backward probability matrix at genotyped markers
+    bm = compute_backward_probability_matrix(
+        ref_h_genotyped, query_h_genotyped, rho, mu
+    )
+    if debug:
+        print("Backward probability matrix")
+        print(bm)
+    assert bm.shape == (m, h)
+    # Compute HMM state probability matrix at genotyped markers
+    sm = compute_state_probability_matrix(fm, bm, ref_h_genotyped, query_h_genotyped)
+    if debug:
+        print("HMM state probability matrix")
+        print(sm)
+    assert sm.shape == (m + 1, h)
+    # Subset the reference haplotypes to imputed markers
+    ref_h_imputed = ref_h[imputed_pos_idx, :]
+    # Interpolate allele probabilities at imputed markers
+    i_allele_probs = interpolate_allele_probabilities(
+        sm, ref_h_imputed, genotyped_pos, imputed_pos
+    )
+    if debug:
+        print("Interpolated allele probabilities")
+        print(i_allele_probs)
+    assert i_allele_probs.shape == (x, 2)
+    # Get MAP alleles at imputed markers
+    imputed_alleles = get_map_alleles(i_allele_probs)
+    if debug:
+        print("Imputed alleles")
+        print(imputed_alleles)
+    assert len(imputed_alleles) == x
+    return (imputed_alleles, i_allele_probs)
+
+
+def run_tsimpute(ref_ts, query_h, pos, mu, rho):
+    """
+    Run the simplified BEAGLE algorithm above on a tree sequence.
+
+    :param numpy.ndarray ref_h: Reference haplotypes.
+    :param numpy.ndarray query_h: One query haplotype.
+    :param numpy.ndarray pos: Site positions of all the markers.
+    :param numpy.ndarray mu: Mismatch probabilities.
+    :param numpy.ndarray rho: Switch probabilities.
+    :param bool debug: Whether to print intermediate results.
+    :return: Imputed alleles and interpolated allele probabilities.
+    :rtype: list(numpy.ndarray, numpy.ndarray)
+    """
+    # Prepare marker positions.
+    genotyped_site_ids = np.where(query_h != -1)[0]
+    genotyped_pos = pos[genotyped_site_ids]
+    imputed_site_ids = np.where(query_h == -1)[0]
+    imputed_pos = pos[imputed_site_ids]
+    # Prepare reference haplotypes.
+    ref_ts_m = ref_ts.delete_sites(site_ids=imputed_site_ids)
+    ref_h_m = ref_ts_m.genotype_matrix()
+    ref_ts_x = ref_ts.delete_sites(site_ids=genotyped_site_ids)
+    ref_h_x = ref_ts_x.genotype_matrix()
+    query_h_m = query_h[genotyped_site_ids]
+    # Get forward and backward matrices from ts.
+    fm = _tskit.CompressedMatrix(ref_ts_m._ll_tree_sequence)
+    bm = _tskit.CompressedMatrix(ref_ts_m._ll_tree_sequence)
+    ls_hmm = _tskit.LsHmm(ref_ts_m._ll_tree_sequence, mu, rho, acgt_alleles=True)
+    ls_hmm.forward_matrix(query_h_m.T, fm)
+    ls_hmm.backward_matrix(query_h_m.T, fm.normalisation_factor, bm)
+    # Compute state probability matrix.
+    sm = compute_state_probability_matrix(
+        fm.decode(),
+        bm.decode(),
+        ref_h_m,
+        query_h_m,
+    )
+    # Interpolate allele probabilities.
+    i_allele_probs = interpolate_allele_probabilities(
+        sm, ref_h_x, genotyped_pos, imputed_pos
+    )
+    # Get MAP alleles at imputed markers.
+    imputed_alleles = get_map_alleles(i_allele_probs)
+    return (imputed_alleles, i_allele_probs)
diff --git a/python/tests/beagle_numba.py b/python/tests/beagle_numba.py
new file mode 100644
index 0000000000..e292a77215
--- /dev/null
+++ b/python/tests/beagle_numba.py
@@ -0,0 +1,441 @@
+"""
+Implementation of the BEAGLE 4.1 algorithm to impute genotypes.
+
+This is the numba-fied version of `beagle.py`.
+"""
+import numpy as np
+from numba import njit
+
+import _tskit
+
+
+@njit
+def convert_to_genetic_map_position(pos):
+    """
+    Convert genomic site positions (bp) to genetic map positions (cM).
+
+    In BEAGLE 4.1, when a genetic map is not specified, it is assumed that 1 Mbp = 1cM.
+
+    This trivial function is meant for documentation.
+
+    :param numpy.ndarray pos: Site positions.
+    :return: Genetic map positions.
+    :rtype: numpy.ndarray
+    """
+    assert 0 < np.min(pos), "Site positions are not 1-based."
+    return pos / 1e6
+
+
+@njit
+def get_weights(genotyped_pos, imputed_pos):
+    """
+    Get weights at imputed markers in the query haplotype.
+
+    These weights are used in linear interpolation of HMM state probabilities
+    at imputed markers.
+
+    In BB2016 (see below Equation 1), a weight between genotyped markers m and m + 1
+    at imputed marker x is denoted lambda_m,x.
+
+    lambda_m,x = [g(m + 1) - g(x)] / [g(m + 1) - g(m)],
+    where g(i) = genetic map position of marker i.
+
+    :param numpy.ndarray genotyped_pos: Site positions of genotyped markers.
+    :param numpy.ndarray imputed_pos: Site positions of imputed markers.
+    :return: Weights used in linear interpolation.
+    :rtype: numpy.ndarray
+    """
+    m = len(genotyped_pos)
+    x = len(imputed_pos)
+    genotyped_cm = convert_to_genetic_map_position(genotyped_pos)
+    imputed_cm = convert_to_genetic_map_position(imputed_pos)
+    # Calculate weights for imputed markers.
+    weights = np.zeros(x, dtype=np.float64)
+    # Identify genotype markers k and k + 1 between imputed marker i
+    idx_m = m - 1
+    for idx_x in np.arange(x - 1, -1, -1):
+        assert (
+            imputed_cm[idx_x] != genotyped_cm[idx_m]
+        ), "Genotyped markers and imputed markers overlap."
+        while imputed_cm[idx_x] < genotyped_cm[idx_m] and idx_m > -1:
+            idx_m = max(idx_m - 1, -1)
+        if idx_m == m - 1:
+            # Right of the last genotyped marker
+            weights[idx_x] = 0.0
+        elif idx_m == -1:
+            # Left of the first genotyped marker
+            weights[idx_x] = 1.0
+        else:
+            # Between two genotyped markers
+            cm_mP1_x = genotyped_cm[idx_m + 1] - imputed_cm[idx_x]
+            cm_mP1_m = genotyped_cm[idx_m + 1] - genotyped_cm[idx_m]
+            # Set min genetic distance to avoid division by zero.
+            cm_mP1_m = cm_mP1_m if cm_mP1_m > 0.005 else 0.005
+            weights[idx_x] = cm_mP1_x / cm_mP1_m
+    assert 0 <= np.min(weights), "Some weights are negative."
+    assert np.max(weights) <= 1, "Some weights are greater than 1."
+    return weights
+
+
+@njit
+def get_mismatch_prob(pos, miscall_rate):
+    """
+    Set mismatch probabilities at genotyped markers.
+
+    In BEAGLE, the mismatch probability is dominated by the allelic miscall rate.
+    By default, it is set to 0.0001 and capped at 0.50.
+
+    :param numpy.ndarray pos: Site positions.
+    :param float miscall_rate: Allelic miscall rate.
+    :return: Mismatch probabilities.
+    :rtype: numpy.ndarray
+    """
+    if miscall_rate >= 0.5:
+        miscall_rate = 0.5
+    mu = np.zeros(len(pos), dtype=np.float64) + miscall_rate
+    return mu
+
+
+@njit
+def get_switch_prob(pos, h, ne):
+    """
+    Set switch probabilities at genotyped markers.
+
+    Note that in BEAGLE, the default value of `ne` is set to 1e6.
+    If `h` is small and `ne` is large, the switch probability is nearly 1.
+    In such cases, it may be desirable to set `ne` to a small value
+    to avoid switching between haplotypes all the time.
+
+    :param numpy.ndarray pos: Site positions.
+    :param int h: Number of reference haplotypes.
+    :param float ne: Effective population size.
+    :return: Switch probabilities.
+    :rtype: numpy.ndarray
+    """
+    assert 0 < np.min(pos), "Site positions are not 1-based."
+    # Get genetic distances between adjacent markers.
+    cm = convert_to_genetic_map_position(pos)
+    d = np.zeros(len(pos), dtype=np.float64)
+    d[0] = cm[0]
+    d[1:] = cm[1:] - cm[:-1]
+    rho = -np.expm1(-(4 * ne * d / h))
+    return rho
+
+
+@njit
+def compute_forward_probability_matrix(ref_h, query_h, rho, mu):
+    """
+    Implement the forward algorithm.
+
+    The forward probablity matrix is of size (m, h),
+    where m is the number of genotyped markers
+    and h is the number of reference haplotypes.
+
+    `ref_h` is a panel of reference haplotypes of size m x h.
+    Here, it is subsetted to genotyped markers.
+
+    `query_h` is also subsetted to genotyped markers (length m).
+
+    :param numpy.ndarray ref_h: Reference haplotypes.
+    :param numpy.ndarray query_h: One query haplotype.
+    :param numpy.ndarray rho: Switch probabilities.
+    :param numpy.ndarray mu: Mismatch probabilities.
+    :return: Forward probability matrix.
+    :rtype: numpy.ndarray
+    """
+    m = ref_h.shape[0]
+    h = ref_h.shape[1]
+    assert m == len(query_h)
+    assert m == len(rho)
+    assert m == len(mu)
+    assert 0 <= np.min(rho) and np.max(rho) <= 1
+    # BEAGLE caps mismatch probabilities at 0.5.
+    assert 0 <= np.min(mu) and np.max(mu) <= 0.5
+    fm = np.zeros((m, h), dtype=np.float64)
+    last_sum = 1.0  # Part of normalization factor
+    for i in np.arange(m):
+        # Get site-specific parameters
+        shift = rho[i] / h
+        scale = (1 - rho[i]) / last_sum
+        # Get allele at genotyped marker i on query haplotype j
+        query_a = query_h[i]
+        for j in np.arange(h):
+            # Get allele at genotyped marker i on reference haplotype j
+            ref_a = ref_h[i, j]
+            # Get emission probability
+            em_prob = mu[i] if query_a != ref_a else 1.0 - mu[i]
+            if m == 0:
+                fm[i, j] = em_prob
+            else:
+                fm[i, j] = em_prob * (scale * fm[i - 1, j] + shift)
+        site_sum = np.sum(fm[i, :])
+        last_sum = site_sum / h if m == 0 else site_sum
+    return fm
+
+
+@njit
+def compute_backward_probability_matrix(ref_h, query_h, rho, mu):
+    """
+    Implement the backward algorithm.
+
+    The backward probablity matrix is of size (m, h),
+    where m is the number of genotyped markers
+    and h is the number of reference haplotypes.
+
+    `ref_h` is a panel of reference haplotypes of size m x h.
+    Here, it is subsetted to genotyped markers.
+
+    `query_h` is also subsetted to genotyped markers (length m).
+
+    In BEAGLE 4.1, the values are kept one site at a time.
+    Here, we keep the values at all the sites.
+
+    :param numpy.ndarray ref_h: Reference haplotypes.
+    :param numpy.ndarray query_h: One query haplotype.
+    :param numpy.ndarray rho: Switch probabilities.
+    :param numpy.ndarray mu: Mismatch probabilities.
+    :return: Backward probability matrix.
+    :rtype: numpy.ndarray
+    """
+    m = ref_h.shape[0]
+    h = ref_h.shape[1]
+    assert m == len(query_h)
+    assert m == len(rho)
+    assert m == len(mu)
+    assert 0 <= np.min(rho) and np.max(rho) <= 1
+    assert 0 <= np.min(mu) and np.max(mu) <= 0.5
+    bm = np.zeros((m, h), dtype=np.float64)
+    bm[-1, :] = 1.0 / h  # Initialise the last column
+    for i in np.arange(m - 2, -1, -1):
+        query_a = query_h[i + 1]
+        for j in np.arange(h):
+            ref_a = ref_h[i + 1, j]
+            em_prob = mu[i + 1] if ref_a != query_a else 1.0 - mu[i + 1]
+            # Note that BEAGLE keeps the values at one site at a time.
+            bm[i, j] = bm[i + 1, j] * em_prob
+        sum_site = np.sum(bm[i, :])
+        scale = (1 - rho[i + 1]) / sum_site
+        shift = rho[i + 1] / h
+        bm[i, :] = scale * bm[i, :] + shift
+    return bm
+
+
+@njit
+def compute_state_probability_matrix(fm, bm, ref_h, query_h):
+    """
+    Implement the HMM forward-backward algorithm following Equation 1 of BB2016.
+    This is simpler than faithfully reimplementing the original BEAGLE 4.1 algorithm.
+
+    A HMM state is (index of a genotyped marker, index of a reference haplotype).
+
+    The output HMM state probability matrix is of size (m + 1, h),
+    where m is the number of genotyped markers in the query haplotype
+    and h is the number of reference haplotypes.
+
+    Note that the additional column at m + 1 is for interpolation of
+    imputed markers right of the last genotyped marker.
+
+    The forward and backward probability matrices are both of size (m, h).
+
+    The reference haplotypes of size (m, h), and the query haplotype is of size m.
+    Here, they are both subsetted to genotyped markers.
+
+    :param numpy.ndarray fm: Forward probability matrix.
+    :param numpy.ndarray bm: Backward probability matrix.
+    :param numpy.ndarray ref_h: Reference haplotypes.
+    :param numpy.ndarray query_h: One query haplotype.
+    :return: HMM state probability matrix.
+    :rtype: numpy.ndarray
+    """
+    m = ref_h.shape[0]
+    h = ref_h.shape[1]
+    assert m == len(query_h)
+    assert (m, h) == fm.shape
+    assert (m, h) == bm.shape
+    # m + 1 columns to allow for imputed markers right of the last genotyped marker.
+    sm = np.zeros((m + 1, h), dtype=np.float64)
+    sm[:-1, :] = np.multiply(fm, bm)
+    sm[m, :] = sm[m - 1, :]  # Not in BEAGLE
+    return sm
+
+
+@njit
+def interpolate_allele_probabilities(
+    sm, ref_h, genotyped_pos, imputed_pos, alleles=(0, 1)
+):
+    """
+    Compute the interpolated allele probabilities at imputed markers of
+    a query haplotype following Equation 1 of BB2016.
+
+    Assuming all biallelic sites, the interpolated allele probability matrix
+    is of size (x, 2), where x is the number of imputed markers.
+
+    This function takes the output of `compute_state_probability_matrix`.
+
+    Note that this function takes:
+    1. HMM state probability matrix across genotyped markers of size (m + 1, h).
+    2. reference haplotypes subsetted to imputed markers of size (x, h).
+
+    :param numpy.ndarray sm: HMM state probability matrix at genotyped markers.
+    :param numpy.ndarray ref_h: Reference haplotypes subsetted to imputed markers.
+    :param numpy.ndarray genotyped_pos: Site positions at genotyped markers.
+    :param numpy.ndarray imputed_pos: Site positions at imputed markers.
+    :param list alleles: Distince alleles in the haplotypes.
+    :return: Interpolated allele probabilities.
+    :rtype: numpy.ndarray
+    """
+    m = sm.shape[0] - 1
+    h = sm.shape[1]
+    assert m == len(genotyped_pos)
+    x = len(imputed_pos)
+    assert (x, h) == ref_h.shape
+    genotyped_cm = convert_to_genetic_map_position(genotyped_pos)
+    imputed_cm = convert_to_genetic_map_position(imputed_pos)
+    weights = get_weights(genotyped_pos, imputed_pos)
+    assert x == len(weights)
+    p = np.zeros((x, len(alleles)), dtype=np.float64)
+    # Compute allele probabilities as per Equation 1 in BB2016.
+    for a in alleles:
+        for i in np.arange(x):
+            # Identify the genotyped markers k and k + 1 between the imputed marker i
+            # The indices k should mirror those in `get_weights`.
+            if imputed_cm[i] < genotyped_cm[0]:
+                # Special case: imputed marker is before the first genotyped marker.
+                k = 0
+            elif imputed_cm[i] > genotyped_cm[-1]:
+                # Special case: imputed marker is after the last genotyped marker.
+                k = m - 1
+            else:
+                k = np.max(np.where(imputed_cm[i] > genotyped_cm)[0])
+            # Sum over the reference haplotypes where the allele is a at marker i.
+            has_a = ref_h[i, :] == a
+            p[i, a] += np.sum(weights[i] * sm[k, has_a])
+            p[i, a] += np.sum((1 - weights[i]) * sm[k + 1, has_a])
+    return p
+
+
+@njit
+def get_map_alleles(allele_probs):
+    """
+    Compute the maximum a posteriori alleles at the imputed markers of a query haplotype.
+
+    Assuming all biallelic sites, the output is an array of size x,
+    where x is the number of imputed markers.
+
+    WARN: If the allele probabilities are equal, then allele 0 is arbitrarily chosen.
+
+    :param numpy.ndarray allele_probs: Interpolated allele probabilities.
+    :return: Imputed alleles in the query haplotype.
+    :rtype: numpy.ndarray
+    """
+    imputed_alleles = np.argmax(allele_probs, axis=1)
+    assert len(imputed_alleles) == allele_probs.shape[0]
+    return imputed_alleles
+
+
+@njit
+def run_beagle(ref_h, query_h, pos, miscall_rate=0.0001, ne=1e6):
+    """
+    Run a simplified version of the BEAGLE imputation algorithm
+    based on Equation 1 of BB2016.
+
+    `ref_h` and `query_h` span all genotyped and imputed marker sites.
+    So, their sizes are (m + x, h) and (m + x), respectively,
+    where m is the number of genotyped markers
+    and x is the number of imputed markers.
+
+    The genomic site positions of all the markers are in `pos`.
+
+    This produces imputed alleles and interpolated allele probabilities
+    at the imputed markers of the query haplotype.
+
+    :param numpy.ndarray ref_h: Reference haplotypes.
+    :param numpy.ndarray query_h: One query haplotype.
+    :param numpy.ndarray pos: Site positions of all the markers.
+    :param float miscall_rate: Allelic miscall rate.
+    :param int ne: Effective population size.
+    :return: Imputed alleles and interpolated allele probabilities.
+    :rtype: list(numpy.ndarray, numpy.ndarray)
+    """
+    assert ref_h.shape[0] == len(pos)
+    assert len(query_h) == len(pos)
+    # Indices of genotyped markers in the query haplotype
+    genotyped_pos_idx = np.where(query_h != -1)[0]
+    assert len(genotyped_pos_idx) > 1
+    # Indices of imputed markers in the query haplotype
+    imputed_pos_idx = np.where(query_h == -1)[0]
+    assert len(imputed_pos_idx) > 0
+    # Site positions of genotyped markers
+    genotyped_pos = pos[genotyped_pos_idx]
+    # Site positions of imputed markers
+    imputed_pos = pos[imputed_pos_idx]
+    h = ref_h.shape[1]
+    m = len(genotyped_pos)
+    x = len(imputed_pos)
+    assert m + x == len(pos)
+    # Subset the reference haplotypes to genotyped markers
+    ref_h_genotyped = ref_h[genotyped_pos_idx, :]
+    assert ref_h_genotyped.shape == (m, h)
+    # Subset the query haplotype to genotyped markers
+    query_h_genotyped = query_h[genotyped_pos_idx]
+    assert len(query_h_genotyped) == m
+    # Set mismatch probabilities at genotyped markers
+    mu = get_mismatch_prob(genotyped_pos, miscall_rate=miscall_rate)
+    assert len(mu) == m
+    # Set switch probabilities at genotyped markers
+    rho = get_switch_prob(genotyped_pos, h, ne=ne)
+    assert len(rho) == m
+    # Compute forward probability matrix at genotyped markers
+    fm = compute_forward_probability_matrix(ref_h_genotyped, query_h_genotyped, rho, mu)
+    assert fm.shape == (m, h)
+    # Compute backward probability matrix at genotyped markers
+    bm = compute_backward_probability_matrix(
+        ref_h_genotyped, query_h_genotyped, rho, mu
+    )
+    assert bm.shape == (m, h)
+    # Compute HMM state probability matrix at genotyped markers
+    sm = compute_state_probability_matrix(fm, bm, ref_h_genotyped, query_h_genotyped)
+    assert sm.shape == (m + 1, h)
+    # Subset the reference haplotypes to imputed markers
+    ref_h_imputed = ref_h[imputed_pos_idx, :]
+    # Interpolate allele probabilities at imputed markers
+    i_allele_probs = interpolate_allele_probabilities(
+        sm, ref_h_imputed, genotyped_pos, imputed_pos
+    )
+    assert i_allele_probs.shape == (x, 2)
+    # Get MAP alleles at imputed markers
+    imputed_alleles = get_map_alleles(i_allele_probs)
+    assert len(imputed_alleles) == x
+    return (imputed_alleles, i_allele_probs)
+
+
+def run_tsimpute(ref_ts, query_h, pos, mu, rho):
+    # Prepare marker positions.
+    genotyped_site_ids = np.where(query_h != -1)[0]
+    genotyped_pos = pos[genotyped_site_ids]
+    imputed_site_ids = np.where(query_h == -1)[0]
+    imputed_pos = pos[imputed_site_ids]
+    # Prepare reference haplotypes.
+    ref_ts_m = ref_ts.delete_sites(site_ids=imputed_site_ids)
+    ref_h_m = ref_ts_m.genotype_matrix()
+    ref_ts_x = ref_ts.delete_sites(site_ids=genotyped_site_ids)
+    ref_h_x = ref_ts_x.genotype_matrix()
+    query_h_m = query_h[genotyped_site_ids]
+    # Get forward and backward matrices from ts.
+    fm = _tskit.CompressedMatrix(ref_ts_m._ll_tree_sequence)
+    bm = _tskit.CompressedMatrix(ref_ts_m._ll_tree_sequence)
+    ls_hmm = _tskit.LsHmm(ref_ts_m._ll_tree_sequence, mu, rho, acgt_alleles=True)
+    ls_hmm.forward_matrix(query_h_m.T, fm)
+    ls_hmm.backward_matrix(query_h_m.T, fm.normalisation_factor, bm)
+    # Compute state probability matrix.
+    sm = compute_state_probability_matrix(
+        fm.decode(),
+        bm.decode(),
+        ref_h_m,
+        query_h_m,
+    )
+    # Interpolate allele probabilities.
+    ap = interpolate_allele_probabilities(sm, ref_h_x, genotyped_pos, imputed_pos)
+    # Get imputed alleles.
+    ia = get_map_alleles(ap)
+    return ia
diff --git a/python/tests/test_imputation.py b/python/tests/test_imputation.py
new file mode 100644
index 0000000000..05c886812e
--- /dev/null
+++ b/python/tests/test_imputation.py
@@ -0,0 +1,628 @@
+"""
+Tests for genotype imputation (forward and Baum-Welsh algorithms).
+"""
+import io
+
+import numpy as np
+import pytest
+
+import tskit
+from tests.beagle import run_beagle
+
+
+# The following toy query haplotypes were imputed from the toy reference haplotypes
+# using BEAGLE 4.1 with Ne set to 10.
+#
+# There are 9 sites, starting from 10,000 to 90,000, with 10,000 increments.
+# The REF is A and ALT is T for all the sites in the VCF input to BEAGLE.
+#
+# Case 0:
+# Reference haplotypes and query haplotypes have 0|0 at all sites.
+# First and last reference markers are missing in the query haplotypes.
+# fmt: off
+toy_ref_0 = np.array(
+    [
+        [0, 0, 0, 0, 0, 0, 0, 0, 0],   # ref_0, haplotype 0
+        [0, 0, 0, 0, 0, 0, 0, 0, 0],   # ref_0, haplotype 1
+    ]
+).T
+toy_query_0 = np.array(
+    [
+        [-1, 0, -1, 0, -1, 0, -1, 0, -1],  # query_0, haplotype 0
+        [-1, 0, -1, 0, -1, 0, -1, 0, -1],  # query_0, haplotype 1
+    ]
+)
+toy_query_0_beagle_imputed = np.array(
+    [
+        [0, 0, 0, 0, 0, 0, 0, 0, 0],  # query_0, haplotype 0
+        [0, 0, 0, 0, 0, 0, 0, 0, 0],  # query_0, haplotype 1
+    ]
+)
+toy_query_0_beagle_allele_probs = np.array(
+    # Imputed markers only
+    [
+        # query_0, haplotype 0
+        [
+            [1.0, 1.0, 1.0, 1.0, 1.0],
+            [0.0, 0.0, 0.0, 0.0, 0.0],
+        ],
+        # query_0, haplotype 1
+        [
+            [1.0, 1.0, 1.0, 1.0, 1.0],
+            [0.0, 0.0, 0.0, 0.0, 0.0],
+        ],
+    ],
+    dtype=np.float64,
+)
+# fmt: on
+assert toy_ref_0.shape == (9, 2)
+assert toy_query_0.shape == (2, 9)
+assert toy_query_0_beagle_imputed.shape == (2, 9)
+assert np.sum(toy_query_0[0] == 0) == 4
+assert np.sum(toy_query_0[0] == -1) == 5
+assert np.array_equal(toy_query_0[0], toy_query_0[1])
+assert np.all(toy_query_0_beagle_imputed[0] == 0)
+assert np.all(toy_query_0_beagle_imputed[1] == 0)
+assert toy_query_0_beagle_allele_probs.shape == (2, 2, 5)
+
+# Case 1:
+# Reference haplotypes and query haplotypes have 1|1 at all sites.
+# First and last reference markers are missing in the query haplotypes.
+# fmt: off
+toy_ref_1 = np.array(
+    [
+        [1, 1, 1, 1, 1, 1, 1, 1, 1],  # ref_0, haplotype 0
+        [1, 1, 1, 1, 1, 1, 1, 1, 1],  # ref_0, haplotype 1
+    ]
+).T
+toy_query_1 = np.array(
+    [
+        [-1, 1, -1, 1, -1, 1, -1, 1, -1],  # query_0, haplotype 0
+        [-1, 1, -1, 1, -1, 1, -1, 1, -1],  # query_0, haplotype 1
+    ]
+)
+toy_query_1_beagle_imputed = np.array(
+    [
+        [1, 1, 1, 1, 1, 1, 1, 1, 1],  # query_0, haplotype 0
+        [1, 1, 1, 1, 1, 1, 1, 1, 1],  # query_0, haplotype 1
+    ]
+)
+toy_query_1_beagle_allele_probs = np.array(
+    # Imputed markers only
+    [
+        # query_0, haplotype 0
+        [
+            [0.0, 0.0, 0.0, 0.0, 0.0],
+            [1.0, 1.0, 1.0, 1.0, 1.0],
+        ],
+        # query_0, haplotype 1
+        [
+            [0.0, 0.0, 0.0, 0.0, 0.0],
+            [1.0, 1.0, 1.0, 1.0, 1.0],
+        ],
+    ],
+    dtype=np.float64,
+)
+# fmt: on
+assert toy_ref_1.shape == (9, 2)
+assert toy_query_1.shape == (2, 9)
+assert toy_query_1_beagle_imputed.shape == (2, 9)
+assert np.sum(toy_query_1[0] == 1) == 4
+assert np.sum(toy_query_1[0] == -1) == 5
+assert np.array_equal(toy_query_1[0], toy_query_1[1])
+assert np.all(toy_query_1_beagle_imputed[0] == 1)
+assert np.all(toy_query_1_beagle_imputed[1] == 1)
+assert toy_query_1_beagle_allele_probs.shape == (2, 2, 5)
+
+# Case 2:
+# Reference haplotypes and query haplotypes have 0|0 at all sites.
+# First and last reference markers are genotyped in the query haplotypes.
+# fmt: off
+toy_ref_2 = np.copy(toy_ref_0)
+toy_query_2 = np.array(
+    [
+        [0, 0, -1, 0, -1, 0, -1, 0, 0],  # query_0, haplotype 0
+        [0, 0, -1, 0, -1, 0, -1, 0, 0],  # query_0, haplotype 1
+    ]
+)
+toy_query_2_beagle_imputed = np.array(
+    [
+        [0, 0, 0, 0, 0, 0, 0, 0, 0],  # query_0, haplotype 0
+        [0, 0, 0, 0, 0, 0, 0, 0, 0],  # query_0, haplotype 1
+    ]
+)
+toy_query_2_beagle_allele_probs = np.array(
+    # Imputed markers only
+    [
+        # query_0, haplotype 0
+        [
+            [1.0, 1.0, 1.0],
+            [0.0, 0.0, 0.0],
+        ],
+        # query_0, haplotype 1
+        [
+            [1.0, 1.0, 1.0],
+            [0.0, 0.0, 0.0],
+        ],
+    ],
+    dtype=np.float64,
+)
+# fmt: on
+assert toy_ref_2.shape == (9, 2)
+assert toy_query_2.shape == (2, 9)
+assert toy_query_2_beagle_imputed.shape == (2, 9)
+assert np.sum(toy_query_2[0] == 0) == 6
+assert np.sum(toy_query_2[0] == -1) == 3
+assert np.array_equal(toy_query_2[0], toy_query_2[1])
+assert np.all(toy_query_2_beagle_imputed[0] == 0)
+assert np.all(toy_query_2_beagle_imputed[1] == 0)
+assert toy_query_2_beagle_allele_probs.shape == (2, 2, 3)
+
+# Case 3:
+# Reference haplotypes and query haplotypes have 0|1 at all sites.
+# fmt: off
+toy_ref_3 = np.array(
+    [
+        [0, 0, 0, 0, 0, 0, 0, 0, 0],  # ref_0, haplotype 0
+        [1, 1, 1, 1, 1, 1, 1, 1, 1],  # ref_0, haplotype 1
+    ]
+).T
+toy_query_3 = np.array(
+    [
+        [-1, 0, -1, 0, -1, 0, -1, 0, -1],  # query_0, haplotype 0
+        [-1, 1, -1, 1, -1, 1, -1, 1, -1],  # query_0, haplotype 1
+    ]
+)
+toy_query_3_beagle_imputed = np.array(
+    [
+        [0, 0, 0, 0, 0, 0, 0, 0, 0],  # query_0, haplotype 0
+        [1, 1, 1, 1, 1, 1, 1, 1, 1],  # query_0, haplotype 1
+    ]
+)
+toy_query_3_beagle_allele_probs = np.array(
+    # Imputed markers only
+    # See comment https://github.com/tskit-dev/tskit/pull/2815#issuecomment-1708178257
+    [
+        # query_0, haplotype 0
+        [
+            [0.9999998, 0.9999999, 1.0, 0.9999999, 0.9999998],
+            [0.0      , 0.0      , 0.0, 0.0      , 0.0],
+        ],
+        # query_0, haplotype 1
+        [
+            [0.0      , 0.0      , 0.0, 0.0      , 0.0],
+            [0.9999998, 0.9999999, 1.0, 0.9999999, 0.9999998],
+        ],
+    ],
+    dtype=np.float64,
+)
+# fmt: on
+assert toy_ref_3.shape == (9, 2)
+assert toy_query_3.shape == (2, 9)
+assert toy_query_3_beagle_imputed.shape == (2, 9)
+assert np.sum(toy_query_3[0] == 0) == 4
+assert np.sum(toy_query_3[1] == 1) == 4
+assert np.sum(toy_query_3[0] == -1) == np.sum(toy_query_3[1] == -1) == 5
+assert np.all(
+    np.invert(np.equal(toy_query_3_beagle_imputed[0], toy_query_3_beagle_imputed[1]))
+)
+assert toy_query_3_beagle_allele_probs.shape == (2, 2, 5)
+
+# Case 4:
+# Reference haplotypes and query haplotypes have alternating 0|1 and 1|0 genotypes.
+# Query haplotypes have 0|1 at all genotyped sites.
+# fmt: off
+toy_ref_4 = np.array(
+    [
+        [0, 1, 0, 1, 0, 1, 0, 1, 0],  # ref_0, haplotype 0
+        [1, 0, 1, 0, 1, 0, 1, 0, 1],  # ref_0, haplotype 1
+    ]
+).T
+toy_query_4 = np.array(
+    [
+        [-1, 0, -1, 0, -1, 0, -1, 0, -1],  # query_0, haplotype 0
+        [-1, 1, -1, 1, -1, 1, -1, 1, -1],  # query_0, haplotype 1
+    ]
+)
+toy_query_4_beagle_imputed = np.array(
+    [
+        [1, 0, 1, 0, 1, 0, 1, 0, 1],  # query_0, haplotype 0
+        [0, 1, 0, 1, 0, 1, 0, 1, 0],  # query_0, haplotype 1
+    ]
+)
+toy_query_4_beagle_allele_probs = np.array(
+    # Imputed markers only
+    [
+        # query_0, haplotype 0
+        [
+            [0.0      , 0.0      , 0.0, 0.0      , 0.9999998],
+            [0.9999998, 0.9999999, 1.0, 0.9999999, 0.0],
+        ],
+        # query_0, haplotype 1
+        [
+            [0.9999998, 0.9999999, 1.0, 0.9999999, 0.0],
+            [0.0      , 0.0      , 0.0, 0.0      , 0.9999998],
+        ],
+    ],
+    dtype=np.float64,
+)
+# fmt: on
+assert toy_ref_4.shape == (9, 2)
+assert toy_query_4.shape == (2, 9)
+assert toy_query_4_beagle_imputed.shape == (2, 9)
+assert np.sum(toy_query_4[0] == 0) == 4
+assert np.sum(toy_query_4[1] == 1) == 4
+assert np.sum(toy_query_4[0] == -1) == np.sum(toy_query_4[1] == -1) == 5
+assert np.all(
+    np.invert(np.equal(toy_query_4_beagle_imputed[0], toy_query_4_beagle_imputed[1]))
+)
+assert toy_query_3_beagle_allele_probs.shape == (2, 2, 5)
+
+# Case 5:
+# The reference panel has two individuals. The first individual has 0|0 at all sites,
+# and the second individual has 1|1 at all sites.
+# The query haplotypes have one recombination breakpoint in the middle.
+# fmt: off
+toy_ref_5 = np.array(
+    [
+        [0, 0, 0, 0, 0, 0, 0, 0, 0],  # ref_0
+        [0, 0, 0, 0, 0, 0, 0, 0, 0],  # ref_0
+        [1, 1, 1, 1, 1, 1, 1, 1, 1],  # ref_1
+        [1, 1, 1, 1, 1, 1, 1, 1, 1],  # ref_1
+    ]
+).T
+toy_query_5 = np.array(
+    [
+        [-1, 0, -1, 0, -1, 0, -1, 1, -1],  # query_0, haplotype 0
+        [-1, 1, -1, 1, -1, 1, -1, 0, -1],  # query_0, haplotype 1
+    ]
+)
+toy_query_5_beagle_imputed = np.array(
+    [
+        [0, 0, 0, 0, 0, 0, 0, 1, 1],  # query_0, haplotype 0
+        [1, 1, 1, 1, 1, 1, 1, 0, 0],  # query_0, haplotype 1
+    ]
+)
+toy_query_5_beagle_allele_probs = np.array(
+    # Imputed markers only
+    [
+        # query_0, haplotype 0
+        [
+            [0.9999999, 0.99999994, 0.9999546, 0.5454091, 0.09090912],
+            [0.0      , 0.0       , 0.0      , 0.4545909, 0.9090909],
+        ],
+        # query_0, haplotype 1
+        [
+            [0.0      , 0.0       , 0.0      , 0.4545909, 0.9090909],
+            [0.9999999, 0.99999994, 0.9999546, 0.5454091, 0.09090912],
+        ],
+    ],
+    dtype=np.float64,
+)
+# fmt: on
+assert toy_ref_5.shape == (9, 4)
+assert toy_query_5.shape == (2, 9)
+assert toy_query_5_beagle_imputed.shape == (2, 9)
+assert np.sum(toy_query_5[0] == 0) == 3
+assert np.sum(toy_query_5[0] == 1) == 1
+assert np.sum(toy_query_5[0] == -1) == 5
+assert np.sum(toy_query_5[1] == 0) == 1
+assert np.sum(toy_query_5[1] == 1) == 3
+assert np.sum(toy_query_5[1] == -1) == 5
+assert np.all(
+    np.invert(np.equal(toy_query_5_beagle_imputed[0], toy_query_5_beagle_imputed[1]))
+)
+assert toy_query_5_beagle_allele_probs.shape == (2, 2, 5)
+
+# Case 6:
+# The reference panel has two individuals. The first individual has 0|0 at all sites,
+# and the second individual has 1|1 at all sites.
+# The query haplotypes have two recombination breakpoints in the middle.
+# fmt: off
+toy_ref_6 = np.copy(toy_ref_5)
+toy_query_6 = np.array(
+    [
+        [-1, 0, -1, 1, -1, 1, -1, 0, -1],  # query_0, haplotype 0
+        [-1, 1, -1, 0, -1, 0, -1, 1, -1],  # query_0, haplotype 1
+    ]
+)
+toy_query_6_beagle_imputed = np.array(
+    [
+        [0, 0, 1, 1, 1, 1, 1, 0, 0],  # query_0, haplotype 0
+        [1, 1, 0, 0, 0, 0, 0, 1, 1],  # query_0, haplotype 1
+    ]
+)
+toy_query_6_beagle_allele_probs = np.array(
+    # Imputed markers only
+    [
+        # query_0, haplotype 0
+        [
+            [0.90983605, 0.46770614, 0.025481388, 0.46838862, 0.91139066],
+            [0.09016396, 0.53229386, 0.97451866 , 0.5316114 , 0.088609315],
+        ],
+        # query_0, haplotype 1
+        [
+            [0.09016395, 0.53229386, 0.97451866 , 0.5316114 , 0.0886093],
+            [0.90983605, 0.46770614, 0.025481388, 0.46838862, 0.91139066],
+        ],
+    ],
+    dtype=np.float64,
+)
+# fmt: on
+assert toy_ref_6.shape == (9, 4)
+assert toy_query_6.shape == (2, 9)
+assert toy_query_6_beagle_imputed.shape == (2, 9)
+assert np.sum(toy_query_6[0] == 0) == 2
+assert np.sum(toy_query_6[0] == 1) == 2
+assert np.sum(toy_query_6[0] == -1) == 5
+assert np.sum(toy_query_6[1] == 0) == 2
+assert np.sum(toy_query_6[1] == 1) == 2
+assert np.sum(toy_query_6[1] == -1) == 5
+assert np.all(
+    np.invert(np.equal(toy_query_6_beagle_imputed[0], toy_query_6_beagle_imputed[1]))
+)
+assert toy_query_6_beagle_allele_probs.shape == (2, 2, 5)
+
+# Case 7:
+# Hand-crafted example.
+# fmt: off
+toy_ref_7 = np.array(
+    [
+        [0, 0, 0, 1, 1, 0, 0, 0, 1],    # ref_0
+        [1, 0, 0, 0, 1, 0, 0, 0, 1],    # ref_0
+        [0, 0, 1, 1, 1, 0, 0, 0, 1],    # ref_1
+        [0, 1, 0, 0, 0, 0, 0, 1, 0],    # ref_1
+        [0, 0, 0, 0, 0, 1, 1, 1, 0],    # ref_2
+        [1, 0, 0, 0, 1, 0, 0, 0, 1],    # ref_2
+        [0, 0, 1, 1, 1, 0, 0, 0, 1],    # ref_3
+        [0, 0, 0, 0, 0, 0, 0, 0, 1],    # ref_3
+        [0, 1, 0, 0, 0, 0, 0, 1, 0],    # ref_4
+        [0, 0, 0, 0, 0, 1, 1, 1, 0],    # ref_4
+    ]
+).T
+toy_query_7 = np.array(
+    [
+        [0, 0, -1, 1, -1, 1, -1, 0, 1],   # query_0, haplotype 0
+        [1, 1, -1, 0, -1, 0, -1, 1, 0],   # query_0, haplotype 1
+    ]
+)
+toy_query_7_beagle_imputed = np.array(
+    [
+        [0, 0, 1, 1, 1, 1, 0, 0, 1],    # query_0, haplotype 0
+        [1, 1, 0, 0, 0, 0, 0, 1, 0],    # query_0, haplotype 1
+    ]
+)
+toy_query_7_beagle_allele_probs = np.array(
+    # Imputed markers only
+    [
+        # query_0, haplotype 0
+        [
+            [0.33317572, 0.0      , 0.99945664],
+            [0.66635144, 0.9991437, 0.0],
+        ],
+        # query_0, haplotype 1
+        [
+            [0.9995998, 0.99971414, 0.99974835],
+            [0.0      , 0.0       , 0.0],
+        ],
+    ],
+    dtype=np.float64,
+)
+# fmt: on
+assert toy_ref_7.shape == (9, 10)
+assert toy_query_7.shape == (2, 9)
+assert toy_query_7_beagle_imputed.shape == (2, 9)
+assert np.sum(toy_query_7[0] == -1) == np.sum(toy_query_7[1] == -1) == 3
+assert toy_query_7_beagle_allele_probs.shape == (2, 2, 3)
+
+# Case 8:
+# Same as case 7 except the last genotyped marker is missing.
+# fmt: off
+toy_ref_8 = np.copy(toy_ref_7)
+toy_query_8 = np.array(
+    [
+        [-1, 0, -1, 1, -1, 1, -1, 0, -1],  # query_0, haplotype 0
+        [-1, 1, -1, 0, -1, 0, -1, 1, -1],  # query_0, haplotype 1
+    ]
+)
+toy_query_8_beagle_imputed = np.array(
+    [
+        [0, 0, 1, 1, 1, 1, 0, 0, 1],    # query_0, haplotype 0
+        [0, 1, 0, 0, 0, 0, 0, 1, 0],    # query_0, haplotype 1
+    ]
+)
+toy_query_8_beagle_allele_probs = np.array(
+    # Imputed markers only
+    [
+        # query_0, haplotype 0
+        [
+            [0.9997735, 0.33310473, 0.0       , 0.9992305, 0.9997736],
+            [0.0      , 0.66620946, 0.99893093, 0.0      , 0.0],
+        ],
+        # query_0, haplotype 1
+        [
+            [0.9999998, 0.9999998, 0.9999998, 0.99991965, 0.0],
+            [0.0      , 0.0      , 0.0      , 0.0       , 0.9999998],
+        ],
+    ],
+    dtype=np.float64,
+)
+# fmt: on
+assert toy_ref_8.shape == (9, 10)
+assert toy_query_8.shape == (2, 9)
+assert toy_query_8_beagle_imputed.shape == (2, 9)
+assert np.sum(toy_query_8[0] == -1) == 5
+assert np.sum(toy_query_8[1] == -1) == 5
+assert toy_query_8_beagle_allele_probs.shape == (2, 2, 5)
+
+
+@pytest.mark.parametrize(
+    "input_ref,input_query,expected",
+    [
+        (toy_ref_0, toy_query_0, toy_query_0_beagle_imputed),
+        (toy_ref_1, toy_query_1, toy_query_1_beagle_imputed),
+        (toy_ref_2, toy_query_2, toy_query_2_beagle_imputed),
+        (toy_ref_3, toy_query_3, toy_query_3_beagle_imputed),
+        (toy_ref_4, toy_query_4, toy_query_4_beagle_imputed),
+        (toy_ref_5, toy_query_5, toy_query_5_beagle_imputed),
+        (toy_ref_6, toy_query_6, toy_query_6_beagle_imputed),
+        (toy_ref_7, toy_query_7, toy_query_7_beagle_imputed),
+        (toy_ref_8, toy_query_8, toy_query_8_beagle_imputed),
+    ],
+)
+def test_compare_imputed_alleles(input_ref, input_query, expected):
+    """Compare with imputed alleles from BEAGLE 4.1."""
+    assert input_query.shape == expected.shape
+    pos = (np.arange(9) + 1) * 1e4
+    num_query_haps = input_query.shape[0]
+    num_imputed_markers = np.sum(input_query[0] == -1)  # x
+    imputed_alleles = np.zeros((num_query_haps, num_imputed_markers), dtype=int)
+    expected_subset = expected[:, input_query[0] == -1]  # Subsetted to imputed markers
+    assert expected_subset.shape == imputed_alleles.shape
+    for i in np.arange(num_query_haps):
+        imputed_alleles[i], _ = run_beagle(
+            input_ref, input_query[i], pos, miscall_rate=0.0001, ne=10.0
+        )
+        num_diff = np.sum(imputed_alleles[i] != expected_subset[i])
+        assert np.array_equal(num_diff == 0), f"Wrongly imputed alleles: {num_diff}"
+
+
+@pytest.mark.parametrize(
+    "input_ref,input_query,expected",
+    [
+        (toy_ref_0, toy_query_0, toy_query_0_beagle_allele_probs),
+        (toy_ref_1, toy_query_1, toy_query_1_beagle_allele_probs),
+        (toy_ref_2, toy_query_2, toy_query_2_beagle_allele_probs),
+        (toy_ref_3, toy_query_3, toy_query_3_beagle_allele_probs),
+        # (toy_ref_4, toy_query_4, toy_query_4_beagle_allele_probs),
+        # (toy_ref_5, toy_query_5, toy_query_5_beagle_allele_probs),
+        # (toy_ref_6, toy_query_6, toy_query_6_beagle_allele_probs),
+        # (toy_ref_7, toy_query_7, toy_query_7_beagle_allele_probs),
+        # (toy_ref_8, toy_query_8, toy_query_8_beagle_allele_probs),
+    ],
+)
+def test_compare_allele_probabilities(input_ref, input_query, expected):
+    """Check interpolated allele probabilities from BEAGLE 4.1."""
+    pos = (np.arange(9) + 1) * 1e4
+    num_query_haps = input_query.shape[0]
+    for i in np.arange(num_query_haps):
+        _, i_allele_probs = run_beagle(
+            input_ref, input_query[i], pos, miscall_rate=0.0001, ne=10.0
+        )
+        # Rescale probabilities before comparison
+        rescaled_probs = np.zeros_like(i_allele_probs).T
+        for j in np.arange(rescaled_probs.shape[1]):
+            rescaled_probs[:, j] = i_allele_probs[j] / np.sum(i_allele_probs[j])
+        assert np.allclose(rescaled_probs, expected[i], atol=1e-04)
+
+
+# Below is toy data set case 7 in tree sequence format.
+toy_ts_nodes_text = """\
+id      is_sample       time    population      individual      metadata
+0       1       0.000000        0       0
+1       1       0.000000        0       0
+2       1       0.000000        0       1
+3       1       0.000000        0       1
+4       1       0.000000        0       2
+5       1       0.000000        0       2
+6       1       0.000000        0       3
+7       1       0.000000        0       3
+8       1       0.000000        0       4
+9       1       0.000000        0       4
+10      0       0.009923        0       -1
+11      0       0.038603        0       -1
+12      0       0.057935        0       -1
+13      0       0.145141        0       -1
+14      0       0.238045        0       -1
+15      0       0.528344        0       -1
+16      0       0.646418        0       -1
+17      0       1.462199        0       -1
+18      0       2.836600        0       -1
+19      0       3.142225        0       -1
+20      0       4.056253        0       -1
+"""
+
+toy_ts_edges_text = """\
+left    right   parent  child   metadata
+0.000000        100000.000000   10      1
+0.000000        100000.000000   10      5
+0.000000        100000.000000   11      3
+0.000000        100000.000000   11      8
+0.000000        100000.000000   12      2
+0.000000        100000.000000   12      6
+0.000000        100000.000000   13      0
+0.000000        100000.000000   13      12
+0.000000        100000.000000   14      10
+0.000000        100000.000000   14      13
+0.000000        40443.000000    15      9
+0.000000        40443.000000    15      14
+40443.000000    100000.000000   16      4
+40443.000000    100000.000000   16      9
+0.000000        40443.000000    17      4
+0.000000        100000.000000   17      11
+40443.000000    100000.000000   17      16
+0.000000        12721.000000    18      7
+0.000000        12721.000000    18      17
+12721.000000    100000.000000   19      7
+40443.000000    100000.000000   19      14
+12721.000000    40443.000000    19      15
+0.000000        12721.000000    20      15
+12721.000000    100000.000000   20      17
+0.000000        12721.000000    20      18
+12721.000000    100000.000000   20      19
+"""
+
+toy_ts_sites_text = """\
+position        ancestral_state metadata
+10000.000000    A
+20000.000000    A
+30000.000000    A
+40000.000000    A
+50000.000000    A
+60000.000000    A
+70000.000000    A
+80000.000000    A
+90000.000000    A
+"""
+
+toy_ts_mutations_text = """\
+site    node    time    derived_state   parent  metadata
+0       10      unknown C       -1
+1       11      unknown C       -1
+2       12      unknown C       -1
+3       13      unknown C       -1
+4       14      unknown C       -1
+5       16      unknown C       -1
+6       16      unknown C       -1
+7       17      unknown C       -1
+8       19      unknown C       -1
+"""
+
+toy_ts_individuals_text = """\
+flags
+0
+0
+0
+0
+0
+"""
+
+
+def get_toy_data():
+    """Get toy example 7 in tree sequence format."""
+    ts = tskit.load_text(
+        nodes=io.StringIO(toy_ts_nodes_text),
+        edges=io.StringIO(toy_ts_edges_text),
+        sites=io.StringIO(toy_ts_sites_text),
+        mutations=io.StringIO(toy_ts_mutations_text),
+        individuals=io.StringIO(toy_ts_individuals_text),
+        strict=False,
+    )
+    return ts
+
+
+def parse_matrix(csv_text):
+    # TODO: Maybe remove.
+    # This returns a record array, which is essentially the same as a
+    # pandas dataframe, which we can access via df["m"] etc.
+    return np.lib.npyio.recfromcsv(io.StringIO(csv_text))