From dd09b276a2ec279a2d0ef654fbcfbc95a078ae82 Mon Sep 17 00:00:00 2001
From: Shing Zhan <shing.zhan@gmail.com>
Date: Mon, 7 Aug 2023 18:19:00 +0100
Subject: [PATCH] Add tests to check LS HMM of _tskit.lshmm compared to BEAGLE

---
 python/tests/beagle.py          | 653 +++++++++++++++++++++++++++++
 python/tests/beagle_numba.py    | 569 +++++++++++++++++++++++++
 python/tests/test_imputation.py | 719 ++++++++++++++++++++++++++++++++
 3 files changed, 1941 insertions(+)
 create mode 100644 python/tests/beagle.py
 create mode 100644 python/tests/beagle_numba.py
 create mode 100644 python/tests/test_imputation.py

diff --git a/python/tests/beagle.py b/python/tests/beagle.py
new file mode 100644
index 0000000000..47ebb6920b
--- /dev/null
+++ b/python/tests/beagle.py
@@ -0,0 +1,653 @@
+"""
+Implementation of the BEAGLE 4.1 algorithm to impute alleles.
+
+This was implemented while closely consulting the BEAGLE 4.1 paper:
+Browning & Browning (2016). Genotype imputation with millions of reference samples.
+Am J Hum Genet 98:116-126. doi:10.1016/j.ajhg.2015.11.020
+
+The BEAGLE 4.1 source code (particularly `LSHapBaum.java`) was also consulted:
+https://faculty.washington.edu/browning/beagle/b4_1.html
+
+These notations are used throughout this implementation:
+h = number of reference haplotypes.
+m = number of genotyped markers.
+x = number of ungenotyped markers.
+
+This implementation takes the following inputs:
+1. Panel of reference haplotypes in a matrix of size (m + x, h).
+2. One query haplotype in an array of size (m + x).
+3. Site positions of all the markers in an array of size (m + x).
+
+In the query haplotype:
+1. The genotyped markers take values of 0, 1, 2, or 3 (ACGT encoding).
+2. The ungenotyped markers take -1.
+
+The forward and backward probability matrices are of size (m, h).
+The HMM state probability matrix is of size (m + 1, h).
+The interpolated allele probability matrix is of size (x, 4),
+The imputed alleles are the maximum a posteriori (MAP) alleles.
+
+For efficiency, BEAGLE uses aggregated markers, which are clusters of markers
+within a 0.005 cM interval (default). Because the genotypes are phased,
+the alleles in the aggregated markers form distinct "allele sequences".
+Below, we do not use aggregated markers or allele sequences, which would complicate
+the implementation.
+
+Rather than trying to exactly replicating the original BEAGLE 4.1 algorithm,
+this implementation computes Equation 1 of BB2016. We keep functions used
+in a first attempt to faithfully implement the BEAGLE algorithm for documentation.
+"""
+import numpy as np
+
+
+"""
+TODO: Update use of genetic map.
+TODO: Refactor and clean up this code.
+TODO: Move useful documentation over to beagle_numba.py.
+"""
+
+
+def convert_to_genetic_map_position(pos, genetic_map_pos=None, genetic_map_cm=None):
+    """
+    Convert physical positions (bp) to genetic map positions (cM).
+
+    In BEAGLE 4.1, when a genetic map is not specified, it is assumed
+    that the recombination rate is constant (1 cM / 1 Mb).
+
+    If a genetic map is specified, then the genetic map positions are
+    either taken straight from it or interpolated from it. The genetic map
+    needs to contain physical positions and corresponding genetic positions.
+    See `PlinkGenMap.java` in the BEAGLE 4.1 source code for details.
+
+    :param numpy.ndarray pos: Physical positions (bp).
+    :param numpy.ndarray genetic_map_pos: Physical positions of genetic map.
+    :param numpy.ndarray genetic_map_cm: Genetic map positions of genetic map.
+    :return: Genetic map positions (cM).
+    :rtype: numpy.ndarray
+    """
+    if genetic_map_pos is None or genetic_map_cm is None:
+        return pos / 1e6
+    # TODO: Better way to pass the components of a genetic map.
+    base_pos = genetic_map_pos
+    gen_pos = genetic_map_cm
+    assert len(base_pos) == len(gen_pos), "Bad genetic map."
+    assert np.all(pos >= base_pos[0]) and np.all(
+        pos <= base_pos[-1]
+    ), "Physical positions outside of genetic map."
+    left_idx = np.searchsorted(base_pos, pos, side="right")
+    xMa = np.zeros(len(pos), dtype=np.float32)
+    bMa = np.zeros(len(pos), dtype=np.float32)
+    fbMfa = np.zeros(len(pos), dtype=np.float32)
+    for i in range(len(pos)):
+        xMa[i] = pos[i] - base_pos[left_idx[i] - 1]
+        bMa[i] = base_pos[left_idx[i]] - base_pos[left_idx[i] - 1]
+        fbMfa[i] = gen_pos[left_idx[i]] - gen_pos[left_idx[i] - 1]
+    return xMa / bMa * fbMfa
+
+
+def get_weights(genotyped_pos, ungenotyped_pos, genetic_map=None):
+    """
+    Get weights at ungenotyped markers in the query haplotype, which are used in
+    linear interpolation of HMM state probabilities at ungenotyped markers.
+
+    In BB2016 (see below Equation 1), a weight between genotyped markers m and m + 1
+    at ungenotyped marker x is denoted lambda_m,x.
+
+    lambda_m,x = [g(m + 1) - g(x)] / [g(m + 1) - g(m)],
+    where g(i) = genetic map position of marker i.
+
+    :param numpy.ndarray genotyped_pos: Site positions of genotyped markers.
+    :param numpy.ndarray ungenotyped_pos: Site positions of ungenotyped markers.
+    :param msprime.RateMap genetic_map: Genetic map.
+    :return: Weights and associated marker interval start indices.
+    :rtype: tuple(numpy.ndarray, numpy.ndarray)
+    """
+    MIN_CM_DIST = 1e-7  # See `ImpData.java` in BEAGLE 4.1 source code
+    assert len(genotyped_pos) > 1
+    assert len(ungenotyped_pos) > 0
+    # Check that the two sets are mutually exclusive.
+    m = len(genotyped_pos)
+    x = len(ungenotyped_pos)
+    assert len(np.union1d(genotyped_pos, ungenotyped_pos)) == m + x
+    if genetic_map is None:
+        genotyped_cm = convert_to_genetic_map_position(genotyped_pos)
+        ungenotyped_cm = convert_to_genetic_map_position(ungenotyped_pos)
+    else:
+        genotyped_cm = genetic_map.get_cumulative_mass(genotyped_pos)
+        ungenotyped_cm = genetic_map.get_cumulative_mass(ungenotyped_pos)
+    # Calculate weights for ungenotyped markers.
+    weights = np.zeros(x, dtype=np.float32)
+    # Identify genotype markers k and k + 1 sandwiching marker ungenotyped marker i.
+    marker_interval_start = np.zeros(x, dtype=np.int32)
+    for i in range(x):
+        if ungenotyped_pos[i] < genotyped_pos[0]:
+            # Ungenotyped marker is before first genotyped marker.
+            k = 0
+            weights[i] = 1.0
+        elif ungenotyped_pos[i] > genotyped_pos[-1]:
+            # Ungenotyped marker is after last genotyped marker.
+            k = m - 1
+            weights[i] = 0.0
+        else:
+            # Ungenotyped marker is between genotyped markers k and k + 1.
+            k = np.max(np.where(ungenotyped_pos[i] > genotyped_pos)[0])  # Inefficient
+            cm_mP1_x = genotyped_cm[k + 1] - ungenotyped_cm[i]
+            cm_mP1_m = genotyped_cm[k + 1] - genotyped_cm[k]
+            if cm_mP1_m == 0:
+                cm_mP1_m = MIN_CM_DIST
+            weights[i] = cm_mP1_x / cm_mP1_m
+        marker_interval_start[i] = k
+    assert 0 <= np.min(weights)
+    assert np.max(weights) <= 1
+    return (weights, marker_interval_start)
+
+
+def get_mismatch_prob(pos, miscall_rate):
+    """
+    Set mismatch probabilities at genotyped markers.
+
+    In BEAGLE, the mismatch probability is dominated by the allelic miscall rate.
+    By default, it is set to 0.0001 and capped at 0.50.
+
+    :param numpy.ndarray pos: Site positions.
+    :param float miscall_rate: Allelic miscall rate.
+    :return: Mismatch probabilities.
+    :rtype: numpy.ndarray
+    """
+    assert isinstance(miscall_rate, float)
+    if miscall_rate >= 0.5:
+        # TODO: Print warning.
+        miscall_rate = 0.5
+    mu = np.zeros(len(pos), dtype=np.float32) + miscall_rate
+    return mu
+
+
+def get_switch_prob(pos, h, ne, genetic_map=None):
+    """
+    Set switch probabilities at genotyped markers.
+
+    Note that in BEAGLE, the default value of `ne` is set to 1e6.
+    If `h` is small and `ne` is large, the switch probability is nearly 1.
+    In such cases, it may be desirable to set `ne` to a small value
+    to avoid switching between haplotypes all the time.
+
+    :param numpy.ndarray pos: Site positions.
+    :param int h: Number of reference haplotypes.
+    :param float ne: Effective population size.
+    :param msprime.RateMap genetic_map: Genetic map.
+    :return: Switch probabilities.
+    :rtype: numpy.ndarray
+    """
+    assert np.min(pos) > 0
+    assert isinstance(h, int)
+    assert isinstance(ne, float)
+    # Get genetic distance between adjacent markers.
+    if genetic_map is None:
+        cm = convert_to_genetic_map_position(pos)
+    else:
+        cm = genetic_map.get_cumulative_mass(pos)
+    d = np.zeros(len(pos), dtype=np.float32)
+    d[0] = cm[0]
+    d[1:] = cm[1:] - cm[:-1]
+    assert len(d) == len(pos)
+    rho = -np.expm1(-(4 * ne * d / h))
+    assert len(d) == len(rho)
+    return rho
+
+
+def compute_forward_matrix(ref_h, query_h, rho, mu):
+    """
+    Implement forward algorithm to compute a forward probablity matrix of size (m, h).
+
+    Reference haplotypes and query haplotype are subsetted to genotyped markers.
+    So, they are a matrix of size (m, h) and an array of size m, respectively.
+
+    :param numpy.ndarray ref_h: Reference haplotypes.
+    :param numpy.ndarray query_h: One query haplotype.
+    :param numpy.ndarray rho: Switch probabilities.
+    :param numpy.ndarray mu: Mismatch probabilities.
+    :return: Forward probability matrix.
+    :rtype: numpy.ndarray
+    """
+    alleles = np.arange(4)  # ACGT
+    assert np.all(np.isin(ref_h, alleles))
+    assert np.all(np.isin(query_h, alleles))
+    m = ref_h.shape[0]
+    h = ref_h.shape[1]
+    assert m == len(query_h)
+    assert m == len(rho)
+    assert m == len(mu)
+    assert 0 <= np.min(rho)
+    assert np.max(rho) <= 1
+    assert 0 <= np.min(mu)
+    assert np.max(mu) <= 0.5
+    fm = np.zeros((m, h), dtype=np.float32)
+    last_sum = 1.0  # Part of normalization factor
+    for i in range(m):
+        # Get site-specific parameters.
+        shift = rho[i] / h
+        scale = (1 - rho[i]) / last_sum
+        # Get allele at genotyped marker i on query haplotype.
+        query_a = query_h[i]
+        for j in range(h):
+            # Get allele at genotyped marker i on reference haplotype j.
+            ref_a = ref_h[i, j]
+            # Get emission probability.
+            em_prob = mu[i] if query_a != ref_a else 1.0 - mu[i]
+            fm[i, j] = em_prob
+            if i == 0:
+                fm[i, j] *= 1.0 / h
+            else:
+                fm[i, j] *= scale * fm[i - 1, j] + shift
+        site_sum = np.sum(fm[i, :])
+        last_sum = site_sum / h if i == 0 else site_sum
+    return fm
+
+
+def compute_backward_matrix(ref_h, query_h, rho, mu):
+    """
+    Implement backward algorithm to compute a backward probablity matrix of size (m, h).
+
+    Reference haplotypes and query haplotype are subsetted to genotyped markers.
+    So, they are a matrix of size (m, h) and an array of size m, respectively.
+
+    In BEAGLE 4.1, the values are kept one site at a time.
+    Here, we keep the values at all the sites.
+
+    :param numpy.ndarray ref_h: Reference haplotypes.
+    :param numpy.ndarray query_h: One query haplotype.
+    :param numpy.ndarray rho: Switch probabilities.
+    :param numpy.ndarray mu: Mismatch probabilities.
+    :return: Backward probability matrix.
+    :rtype: numpy.ndarray
+    """
+    alleles = np.arange(4)  # ACGT
+    assert np.all(np.isin(ref_h, alleles))
+    assert np.all(np.isin(query_h, alleles))
+    m = ref_h.shape[0]
+    h = ref_h.shape[1]
+    assert m == len(query_h)
+    assert m == len(rho)
+    assert m == len(mu)
+    assert 0 <= np.min(rho)
+    assert np.max(rho) <= 1
+    assert 0 <= np.min(mu)
+    assert np.max(mu) <= 0.5
+    bm = np.zeros((m, h), dtype=np.float32)
+    bm[-1, :] = 1.0 / h  # Initialise the last column
+    for i in range(m - 2, -1, -1):
+        query_a = query_h[i + 1]
+        for j in range(h):
+            ref_a = ref_h[i + 1, j]
+            em_prob = mu[i + 1] if ref_a != query_a else 1.0 - mu[i + 1]
+            # Note that BEAGLE keeps the values at one site at a time.
+            bm[i, j] = bm[i + 1, j] * em_prob
+        sum_site = np.sum(bm[i, :])
+        scale = (1 - rho[i + 1]) / sum_site
+        shift = rho[i + 1] / h
+        bm[i, :] = scale * bm[i, :] + shift
+    return bm
+
+
+def _get_reference_haplotype_segments(a, b):
+    """
+    WARN: This function is part of an attempt to faithfully reimplement
+    the BEAGLE 4.1 algorithm. It is not complete and not used
+    in the current implementation. It is kept for documentation.
+
+    Assuming all biallelic sites, get the index of a reference haplotype segment (i)
+    defined by the alleles a and b at two adjacent genotyped markers.
+
+    #i, a, b
+    0, 0, 0
+    1, 1, 0
+    2, 0, 1
+    3, 1, 1
+
+    See https://github.com/tskit-dev/tskit/issues/2802#issuecomment-1698767169
+
+    This is a helper function for `_compute_state_probability_matrix`.
+    """
+    ref_hap_idx = a * 1 + b * 2
+    return ref_hap_idx
+
+
+def _compute_state_prob_matrix(fm, bm, ref_h, query_h, rho, mu):
+    """
+    WARN: This function is part of an attempt to faithfully reimplement
+    the BEAGLE 4.1 algorithm. It is not complete and not used
+    in the current implementation. It is kept for documentation.
+
+    Implement the forward-backward algorithm to compute:
+    1. A HMM state probability matrix across genotyped markers.
+    2. "Forward haplotype probabilities".
+    3. "Backward haplotype probabilities".
+
+    The HMM state probability matrix is of size (m + 1, h).
+
+    Note that the additional column at m + 1 is for interpolation of
+    ungenotyped markers right of the last genotyped marker.
+
+    Both the forward and backward haplotype probability matrices
+    are of size (m, 2), assuming only biallelic sites.
+
+    Reference haplotypes and query haplotype are subsetted to genotyped markers.
+    So, they are a matrix of size (m, h) and an array of size m, respectively.
+
+    This implementation is based on the function `setStateProbs`
+    of `LSHapBaum.java` in BEAGLE. It performs the computation
+    described in Equation 2 of BB2016.
+
+    :param numpy.ndarray fm: Forward probability matrix.
+    :param numpy.ndarray bm: Backward probability matrix.
+    :param numpy.ndarray ref_h: Reference haplotypes.
+    :param numpy.ndarray query_h: One query haplotype.
+    :param numpy.ndarray rho: Switch probabilities.
+    :param numpy.ndarray mu: Mismatch probabilities.
+    :return: HMM state probability matrix,
+        forward haplotype probability matrix, and
+        backward haplotype probability matrix.
+    :rtype: tuple(numpy.ndarray, numpy.ndarray, numpy.ndarray)
+    """
+    alleles = np.arange(4)  # ACGT
+    m = fm.shape[0]
+    h = fm.shape[1]
+    assert (m, h) == bm.shape
+    assert not np.any(fm < 0)
+    assert not np.any(np.isnan(fm))
+    assert not np.any(bm < 0)
+    assert not np.any(np.isnan(bm))
+    assert np.all(np.isin(ref_h, alleles))
+    assert np.all(np.isin(query_h, alleles))
+    assert m == len(query_h)
+    assert m == len(rho)
+    assert m == len(mu)
+    assert 0 <= np.min(rho)
+    assert np.max(rho) <= 1
+    assert 0 <= np.min(mu)
+    assert np.max(mu) <= 0.5
+    # m + 1 columns to allow for ungenotyped markers right of the last genotyped marker.
+    sm = np.zeros((m + 1, h), dtype=np.float32)
+    fwd_hap_probs = np.zeros((m, 4), dtype=np.float32)
+    bwd_hap_probs = np.zeros((m, 4), dtype=np.float32)
+    for i in range(m - 1, -1, -1):
+        for j in range(h):
+            sm[i, j] = fm[i, j] * bm[i, j]
+            ref_hap_seg_mP1 = _get_reference_haplotype_segments(
+                ref_h[i, j], ref_h[i + 1, j]
+            )
+            ref_hap_seg_m = _get_reference_haplotype_segments(
+                ref_h[i - 1, j], ref_h[i, j]
+            )
+            fwd_hap_probs[i, ref_hap_seg_mP1] += sm[i, j]
+            bwd_hap_probs[i, ref_hap_seg_m] += sm[i, j]
+        sum_fwd_hap_probs = np.sum(fwd_hap_probs[i, :])
+        fwd_hap_probs[i, :] /= sum_fwd_hap_probs
+        bwd_hap_probs[i, :] /= sum_fwd_hap_probs  # Sum of forward hap probs
+    sm[m, :] = sm[m - 1, :]  # Not in BEAGLE
+    return (sm, fwd_hap_probs, bwd_hap_probs)
+
+
+def _interpolate_allele_prob(
+    fwd_hap_probs, bwd_hap_probs, genotyped_pos, ungenotyped_pos, genetic_map=None
+):
+    """
+    WARN: This function is part of an attempt to faithfully reimplement
+    the BEAGLE 4.1 algorithm. It is not complete and not used
+    in the current implementation. It is kept for documentation.
+
+    Compute the interpolated allele probabilities at the ungenotyped markers.
+
+    The forward and backward haplotype probability matrices are of size (m, 2).
+    The interpolated allele probability matrix is of size (x, 2).
+    We assume all biallelic sites, hence the 2.
+
+    This implementation is based on Equation 2 (the rearranged one) in BB2016.
+
+    :param numpy.ndarray fwd_hap_probs: Forward haplotype probability matrix.
+    :param numpy.ndarray bwd_hap_probs: Backward haplotype probability matrix.
+    :param numpy.ndarray genotyped_pos: Site positions of genotyped markers.
+    :param numpy.ndarray ungenotyped_pos: Site positions of ungenotyped markers.
+    :param msprime.RateMap genetic_map: Genetic map.
+    :return: Interpolated allele probabilities.
+    :rtype: numpy.ndarray
+    """
+    m = len(genotyped_pos)
+    x = len(ungenotyped_pos)
+    assert (m, 2) == fwd_hap_probs.shape
+    assert (m, 2) == bwd_hap_probs.shape
+    genotyped_cm = convert_to_genetic_map_position(genotyped_pos)
+    assert m == len(genotyped_cm)
+    ungenotyped_cm = convert_to_genetic_map_position(ungenotyped_pos)
+    assert x == len(ungenotyped_cm)
+    weights, _ = get_weights(genotyped_pos, ungenotyped_pos, genetic_map=genetic_map)
+    assert x == len(weights)
+    i_hap_probs = np.zeros((x, 2), dtype=np.float32)
+    for i in range(x):
+        # Identify genotyped markers k and k + 1 sandwiching ungenotyped marker i.
+        if ungenotyped_pos[i] < genotyped_pos[0]:
+            k = 0
+        elif ungenotyped_pos[i] > genotyped_pos[-1]:
+            k = m - 2
+        else:
+            k = np.max(np.where(ungenotyped_pos[i] > genotyped_pos)[0])
+        # Vectorised over the allelic states.
+        i_hap_probs[i, :] = weights[i] * bwd_hap_probs[k, :]
+        i_hap_probs[i, :] += (1 - weights[i]) * fwd_hap_probs[k, :]
+    return i_hap_probs
+
+
+def compute_state_prob_matrix(fm, bm):
+    """
+    Compute the HMM state probability matrix, in which each element is
+    the probability of a HMM state at a genotyped marker.
+
+    Implementing this is simpler than faithfully reproducing the BEAGLE 4.1 algorithm.
+
+    The output HMM state probability matrix is of size (m + 1, h).
+
+    Note that the additional column at m + 1 is used for interpolation of
+    ungenotyped markers to the right of the last genotyped marker.
+
+    The forward and backward probability matrices are of size (m, h).
+
+    :param numpy.ndarray fm: Forward probability matrix.
+    :param numpy.ndarray bm: Backward probability matrix.
+    :return: HMM state probability matrix.
+    :rtype: numpy.ndarray
+    """
+    assert fm.shape == bm.shape
+    m = fm.shape[0]
+    h = bm.shape[1]
+    assert not np.any(fm < 0)
+    assert not np.any(np.isnan(fm))
+    assert not np.any(bm < 0)
+    assert not np.any(np.isnan(bm))
+    # m + 1 columns to allow for ungenotyped markers right of the last genotyped marker.
+    sm = np.zeros((m + 1, h), dtype=np.float32)
+    sm[:-1, :] = np.multiply(fm, bm)
+    sm[m, :] = sm[m - 1, :]  # Not in BEAGLE
+    return sm
+
+
+def interpolate_allele_probabilities(
+    sm, ref_h, genotyped_pos, ungenotyped_pos, genetic_map=None
+):
+    """
+    Compute the interpolated allele probabilities at ungenotyped markers of
+    a query haplotype following Equation 1 of BB2016.
+
+    The interpolated allele probability matrix is of size (x, a),
+    where a is the number of alleles.
+
+    This function takes the output of `compute_state_prob_matrix`.
+
+    Note that this function takes:
+    1. HMM state probability matrix across genotyped markers of size (m + 1, h).
+    2. reference haplotypes subsetted to ungenotyped markers of size (x, h).
+
+    :param numpy.ndarray sm: HMM state probability matrix at genotyped markers.
+    :param numpy.ndarray ref_h: Reference haplotypes subsetted to ungenotyped markers.
+    :param numpy.ndarray genotyped_pos: Site positions at genotyped markers.
+    :param numpy.ndarray ungenotyped_pos: Site positions at ungenotyped markers.
+    :param msprime.RateMap genetic_map: Genetic map.
+    :return: Interpolated allele probabilities.
+    :rtype: numpy.ndarray
+    """
+    alleles = np.arange(4)  # ACGT
+    assert not np.any(sm < 0)
+    assert not np.any(np.isnan(sm))
+    m = sm.shape[0] - 1
+    h = sm.shape[1]
+    assert m == len(genotyped_pos)
+    x = len(ungenotyped_pos)
+    assert (x, h) == ref_h.shape
+    weights, marker_interval_start = get_weights(
+        genotyped_pos, ungenotyped_pos, genetic_map=genetic_map
+    )
+    assert x == len(weights)
+    assert x == len(marker_interval_start)
+    p = np.zeros((x, len(alleles)), dtype=np.float32)
+    # Compute allele probabilities as per Equation 1 in BB2016.
+    for a in alleles:
+        # Going from left to right.
+        for i in range(x):
+            # Identify genotyped markers k and k + 1 sandwiching ungenotyped marker i.
+            k = marker_interval_start[i]
+            # Sum over the reference haplotypes where the allele is a at marker i.
+            has_a = ref_h[i, :] == a
+            p[i, a] += np.sum(weights[i] * sm[k, has_a])
+            p[i, a] += np.sum((1 - weights[i]) * sm[k + 1, has_a])
+    # Rescale probabilities.
+    p_rescaled = p / np.sum(p, axis=1)[:, np.newaxis]
+    return p_rescaled
+
+
+def get_map_alleles(allele_prob):
+    """
+    Compute the maximum a posteriori alleles at ungenotyped markers of a query haplotype.
+
+    The imputed alleles is an array of size x.
+
+    WARN: If the allele probabilities are equal, then allele 0 is arbitrarily chosen.
+
+    :param numpy.ndarray allele_prob: Interpolated allele probabilities.
+    :return: Imputed alleles and their associated probabilities.
+    :rtype: tuple(numpy.ndarray, numpy.ndarray)
+    """
+    assert np.all(allele_prob >= 0)
+    assert not np.any(np.isnan(allele_prob))
+    imputed_alleles = np.argmax(allele_prob, axis=1)
+    max_allele_prob = np.max(allele_prob, axis=1)
+    assert len(imputed_alleles) == allele_prob.shape[0]
+    assert len(max_allele_prob) == allele_prob.shape[0]
+    return (imputed_alleles, max_allele_prob)
+
+
+def run_beagle(
+    ref_h, query_h, pos, miscall_rate=1e-4, ne=1e6, genetic_map=None, debug=False
+):
+    """
+    Run a simplified version of the BEAGLE 4.1 imputation algorithm
+    based on Equation 1 of BB2016.
+
+    Reference haplotypes and query haplotype are of size (m + x, h) and (m + x),
+    respectively,
+
+    The site positions of all the markers are an array of size (m + x).
+
+    This produces imputed alleles and interpolated allele probabilities
+    at the ungenotyped markers of the query haplotype.
+
+    :param numpy.ndarray ref_h: Reference haplotypes.
+    :param numpy.ndarray query_h: One query haplotype.
+    :param numpy.ndarray pos: Site positions of all the markers.
+    :param float miscall_rate: Allelic miscall rate.
+    :param int ne: Effective population size.
+    :param msprime.RateMap genetic_map: Genetic map.
+    :param bool debug: Whether to print intermediate results.
+    :return: Imputed alleles and their associated probabilities.
+    :rtype: tuple(numpy.ndarray, numpy.ndarray)
+    """
+    # Set precision for printing.
+    if debug:
+        np.set_printoptions(precision=20)
+    assert ref_h.shape[0] == len(pos)
+    assert len(query_h) == len(pos)
+    # Indices of genotyped markers in the query haplotype.
+    genotyped_idx = np.where(query_h != -1)[0]
+    assert len(genotyped_idx) > 1
+    # Indices of ungenotyped markers in the query haplotype.
+    ungenotyped_idx = np.where(query_h == -1)[0]
+    assert len(ungenotyped_idx) > 0
+    # Site positions of genotyped markers.
+    genotyped_pos = pos[genotyped_idx]
+    # Site positions of ungenotyped markers.
+    ungenotyped_pos = pos[ungenotyped_idx]
+    h = ref_h.shape[1]
+    m = len(genotyped_pos)
+    x = len(ungenotyped_pos)
+    assert m + x == len(pos)
+    # Subset reference haplotypes to genotyped markers.
+    ref_h_genotyped = ref_h[genotyped_idx, :]
+    if debug:
+        print("Reference haplotypes subsetted to genotyped markers")
+        print(ref_h_genotyped)
+    assert (m, h) == ref_h_genotyped.shape
+    # Subset query haplotype to genotyped markers.
+    query_h_genotyped = query_h[genotyped_idx]
+    if debug:
+        print("Query haplotype subsetted to genotyped markers")
+        print(query_h_genotyped)
+    assert m == len(query_h_genotyped)
+    # Set mismatch probabilities at genotyped markers.
+    mu = get_mismatch_prob(genotyped_pos, miscall_rate=miscall_rate)
+    if debug:
+        print("Mismatch probabilities")
+        print(mu)
+    assert m == len(mu)
+    # Set switch probabilities at genotyped markers.
+    rho = get_switch_prob(genotyped_pos, h, ne=ne)
+    if debug:
+        print("Switch probabilities")
+        print(rho)
+    assert m == len(rho)
+    # Compute forward probability matrix at genotyped markers.
+    fm = compute_forward_matrix(ref_h_genotyped, query_h_genotyped, rho, mu)
+    if debug:
+        print("Forward probability matrix")
+        print(fm)
+    assert (m, h) == fm.shape
+    # Compute backward probability matrix at genotyped markers.
+    bm = compute_backward_matrix(ref_h_genotyped, query_h_genotyped, rho, mu)
+    if debug:
+        print("Backward probability matrix")
+        print(bm)
+    assert (m, h) == bm.shape
+    # Compute HMM state probability matrix at genotyped markers.
+    sm = compute_state_prob_matrix(fm, bm)
+    if debug:
+        print("HMM state probability matrix")
+        print(sm)
+    assert (m + 1, h) == sm.shape
+    # Subset the reference haplotypes to ungenotyped markers.
+    ref_h_ungenotyped = ref_h[ungenotyped_idx, :]
+    # Interpolate allele probabilities at ungenotyped markers.
+    alleles = np.arange(4)  # ACGT
+    i_allele_prob = interpolate_allele_probabilities(
+        sm, ref_h_ungenotyped, genotyped_pos, ungenotyped_pos, genetic_map=genetic_map
+    )
+    if debug:
+        print("Interpolated allele probabilities")
+        print(i_allele_prob)
+    assert (x, len(alleles)) == i_allele_prob.shape
+    # Get MAP alleles at ungenotyped markers.
+    imputed_alleles, max_allele_prob = get_map_alleles(i_allele_prob)
+    if debug:
+        print("Imputed alleles")
+        print(imputed_alleles)
+        print("Max allele probabilities")
+        print(max_allele_prob)
+    assert x == len(imputed_alleles)
+    assert x == len(max_allele_prob)
+    return (imputed_alleles, max_allele_prob)
diff --git a/python/tests/beagle_numba.py b/python/tests/beagle_numba.py
new file mode 100644
index 0000000000..e199d8cfde
--- /dev/null
+++ b/python/tests/beagle_numba.py
@@ -0,0 +1,569 @@
+"""
+Implementation of the BEAGLE algorithm to impute alleles by linear interpolation of
+state probabilities at ungenotyped markers in the HMM state probability matrix.
+
+This was implemented while closely consulting the BEAGLE 4.1 paper:
+Browning & Browning (2016). Genotype imputation with millions of reference samples.
+Am J Hum Genet 98:116-126. doi:10.1016/j.ajhg.2015.11.020
+
+The BEAGLE 4.1 source code (particularly `LSHapBaum.java`) was closely consulted:
+https://faculty.washington.edu/browning/beagle/b4_1.html
+
+These notations are used throughout:
+h = number of reference haplotypes.
+m = number of genotyped markers.
+x = number of ungenotyped markers.
+
+This implementation takes the following inputs:
+1. Panel of reference haplotypes in a matrix of size (m + x, h).
+2. One query haplotype in an array of size (m + x).
+3. Physical positions of all the markers in an array of size (m + x).
+
+In the query haplotype:
+1. The genotyped positions take values of 0, 1, 2, or 3 (ACGT encoding).
+2. The ungenotyped positions take -1.
+
+The forward and backward probability matrices are of size (m, h).
+The HMM state probability matrix is of size (m, h).
+The interpolated allele probability matrix is of size (x, 4),
+The imputed alleles are the maximum a posteriori (MAP) alleles.
+
+To improve computational efficiency, BEAGLE uses aggregated markers,
+which are clusters of markers within a 0.005 cM interval (default).
+Because the genotypes are phased, the alleles in the aggregated markers
+form distinct "allele sequences". Below, we do not use aggregated markers
+or allele sequences, which would complicate the implementation.
+
+Rather than trying to exactly replicate the original BEAGLE algorithm,
+this implementation uses Equation 1 of BB2016.
+"""
+import warnings
+from dataclasses import dataclass
+
+import numpy as np
+from numba import njit
+
+import _tskit
+import tskit
+
+
+@dataclass(frozen=True)
+class GeneticMap:
+    """
+    A genetic map having:
+    1. Physical positions (bp).
+    2. Genetic map positions (cM).
+    """
+
+    base_pos: np.ndarray
+    gen_pos: np.ndarray
+
+    def __post_init__(self):
+        assert len(self.base_pos) == len(
+            self.gen_pos
+        ), "Lists of physical positions and genetic map positions differ in length."
+        assert np.all(
+            self.base_pos[1:] > self.base_pos[:-1]
+        ), "Physical positions not in strict ascending order."
+
+
+""" TODO: Implement a class to encapsulate the data for imputation (`ImpData`). """
+
+
+def remap_alleles(a):
+    """
+    Helper function.
+
+    TODO: Move elsewhere.
+    """
+    _ALLELES_ACGT = "ACGT"
+    b = np.zeros(len(a), dtype=np.int8) - 1
+    for i in range(len(a)):
+        if a[i] in [None, ""]:
+            continue
+        elif a[i] in _ALLELES_ACGT:
+            b[i] = _ALLELES_ACGT.index(a[i])
+        else:
+            raise AssertionError(f"Unrecognised character: {a[i]}")
+    return b
+
+
+def check_data(ref_h, query_h):
+    """
+    For each site, check if the alleles in the query haplotype
+    are represented in the reference haplotypes.
+
+    Missing data (i.e. -1) are ignored.
+
+    :param numpy.ndarray ref_h: Reference haplotypes.
+    :param numpy.ndarray query_h: One query haplotype.
+    :return: True if alleles in query are in reference.
+    :rtype: bool
+    """
+    m = ref_h.shape[0]
+    assert m == len(query_h), "Reference and query differ in length."
+    for i in range(m):
+        if query_h[i] != tskit.MISSING_DATA:
+            if query_h[i] not in ref_h[i, :]:
+                print(f"Allele {query_h[i]} at {i}th position not in reference.")
+                return False
+    return True
+
+
+def convert_to_genetic_map_positions(pos, genetic_map=None):
+    """
+    Convert physical positions (bp) to genetic map positions (cM).
+
+    In BEAGLE 4.1, when a genetic map is not specified, it is assumed
+    that the recombination rate is constant (1 cM / 1 Mb).
+
+    If a genetic map is specified, then the genetic map positions are
+    either taken straight from it or interpolated from it. The genetic map
+    needs to contain physical positions and corresponding genetic map positions.
+    See `PlinkGenMap.java` in the BEAGLE 4.1 source code for details.
+
+    :param numpy.ndarray pos: Physical positions (bp).
+    :param GeneticMap genetic_map: Genetic map.
+    :return: Genetic map positions (cM).
+    :rtype: numpy.ndarray
+    """
+    if genetic_map is None:
+        return pos / 1e6
+    assert np.all(pos >= genetic_map.base_pos[0]) and np.all(
+        pos < genetic_map.base_pos[-1]
+    ), "Physical positions outside of genetic map."
+    # Approximate genetic map distances by linear interpolation.
+    # Note np.searchsorted(a, v, side='right') returns i s.t. a[i-1] <= v < a[i].
+    right_idx = np.searchsorted(genetic_map.base_pos, pos, side="right")
+    est_cm = np.zeros(len(pos), dtype=np.float64)  # BEAGLE 4.1 uses double in Java.
+    for i in range(len(pos)):
+        a = genetic_map.base_pos[right_idx[i] - 1]
+        b = genetic_map.base_pos[right_idx[i]]
+        fa = genetic_map.gen_pos[right_idx[i] - 1]
+        fb = genetic_map.gen_pos[right_idx[i]]
+        assert pos[i] >= a, f"Query position not >= left-bound position: {pos[i]}, {a}."
+        assert (
+            fb >= fa
+        ), f"Genetic map positions not in monotonically ascending order: {fb}, {fa}."
+        est_cm[i] = fa
+        est_cm[i] += (fb - fa) * (pos[i] - a) / (b - a)
+    return est_cm
+
+
+def get_mutation_rates(pos, error_rate):
+    """
+    Set mutation rates at genotyped positions.
+
+    Mutation rates should be dominated by the allele error rate.
+
+    In BEAGLE 4.1/5.4, the default value is set to 0.0001 and capped at 0.50.
+    In IMPUTE5, the default value is also set to 0.0001.
+
+    When using `_tskit.LsHmm`, this is `mu`.
+
+    :param numpy.ndarray pos: Physical positions (bp).
+    :param float error_rate: Allele error rate.
+    :return: Per-site mutation rates.
+    :rtype: numpy.ndarray
+    """
+    MAX_ERROR_RATE = 0.5
+    if error_rate >= MAX_ERROR_RATE:
+        error_rate = MAX_ERROR_RATE
+    mu = np.zeros(len(pos), dtype=np.float64) + error_rate
+    return mu
+
+
+def get_recombination_rates(cm, h, ne):
+    """
+    Set recombination rates at genotyped positions.
+
+    Note that in BEAGLE 4.1, the default value of `ne` is set to 1e6,
+    whereas in BEAGLE 5.4, the default value of `ne` is set to 1e5.
+    In BEAGLE, this default value was optimized empirically on
+    datasets from large outbred human populations. Also, in IMPUTE5,
+    the default value of `ne` is set to 1e6.
+
+    If `h` is small and `ne` is large, the recombination rates is ~1.
+    In such cases, it may be desirable to set `ne` to a small value
+    to avoid switching between haplotypes all the time.
+
+    When using `_tskit.LsHmm`, this is `rho`.
+
+    :param numpy.ndarray cm: Genetic map positions (cM).
+    :param int h: Number of reference haplotypes.
+    :param float ne: Effective population size.
+    :return: Per-site recombination rates.
+    :rtype: numpy.ndarray
+    """
+    # Recombination rate of first site is always 0.
+    r = np.zeros(len(cm), dtype=np.float64)
+    r[1:] = np.diff(cm)
+    # TODO: In the original LS formulation, it's 4. BEAGLE 4.1 uses 0.04.
+    # It may have to do with improving numerical stability.
+    c = -0.04 * (ne / h)
+    rho = -1 * np.expm1(c * r)
+    return rho
+
+
+@njit
+def compute_forward_matrix(ref_h, query_h, rho, mu):
+    """
+    Implement forward algorithm to compute a forward probablity matrix of size (m, h).
+
+    Reference haplotypes and query haplotype are subsetted to genotyped markers.
+    So, they are a matrix of size (m, h) and an array of size m, respectively.
+
+    :param numpy.ndarray ref_h: Reference haplotypes.
+    :param numpy.ndarray query_h: One query haplotype.
+    :param numpy.ndarray rho: Per-site recombination rates.
+    :param numpy.ndarray mu: Per-site mutation rates.
+    :return: Forward probability matrix.
+    :rtype: numpy.ndarray
+    """
+    m = ref_h.shape[0]
+    h = ref_h.shape[1]
+    fm = np.zeros((m, h), dtype=np.float64)
+    last_sum = 1.0  # Part of normalization factor.
+    for i in range(m):
+        # Get site-specific parameters.
+        shift = rho[i] / h
+        scale = (1 - rho[i]) / last_sum
+        # Get allele at genotyped marker i on query haplotype.
+        query_a = query_h[i]
+        for j in range(h):
+            # Get allele at genotyped marker i on reference haplotype j.
+            ref_a = ref_h[i, j]
+            # Get emission probability.
+            em_prob = mu[i] if query_a != ref_a else 1.0 - mu[i]
+            fm[i, j] = em_prob
+            if i == 0:
+                fm[i, j] *= 1.0 / h  # Prior probabilities.
+            else:
+                fm[i, j] *= scale * fm[i - 1, j] + shift
+        site_sum = np.sum(fm[i, :])
+        last_sum = site_sum / h if i == 0 else site_sum
+    return fm
+
+
+@njit
+def compute_backward_matrix(ref_h, query_h, rho, mu):
+    """
+    Implement backward algorithm to compute a backward probablity matrix of size (m, h).
+
+    Reference haplotypes and query haplotype are subsetted to genotyped markers.
+    So, they are a matrix of size (m, h) and an array of size m, respectively.
+
+    In BEAGLE 4.1, the values are kept one site at a time. Here, we keep the values
+    at all the sites.
+
+    :param numpy.ndarray ref_h: Reference haplotypes.
+    :param numpy.ndarray query_h: One query haplotype.
+    :param numpy.ndarray rho: Per-site recombination rates.
+    :param numpy.ndarray mu: Per-site mutation rates.
+    :return: Backward probability matrix.
+    :rtype: numpy.ndarray
+    """
+    m = ref_h.shape[0]
+    h = ref_h.shape[1]
+    bm = np.zeros((m, h), dtype=np.float64)
+    bm[-1, :] = 1.0 / h  # Initialise the last column.
+    for i in range(m - 2, -1, -1):
+        query_a = query_h[i + 1]
+        for j in range(h):
+            ref_a = ref_h[i + 1, j]
+            em_prob = mu[i + 1] if ref_a != query_a else 1.0 - mu[i + 1]
+            bm[i, j] = bm[i + 1, j] * em_prob
+        sum_site = np.sum(bm[i, :])
+        scale = (1 - rho[i + 1]) / sum_site
+        shift = rho[i + 1] / h
+        bm[i, :] = scale * bm[i, :] + shift
+    return bm
+
+
+def compute_state_prob_matrix(fm, bm):
+    """
+    Compute the HMM state probability matrix, in which each element is
+    the probability of a HMM state at a genotyped position.
+
+    The forward and backward probability matrices are of size (m, h).
+    The output HMM state probability matrix is of size (m, h).
+
+    Implementing this is simpler than faithfully reproducing BEAGLE.
+
+    :param numpy.ndarray fm: Forward probability matrix.
+    :param numpy.ndarray bm: Backward probability matrix.
+    :return: HMM state probability matrix.
+    :rtype: numpy.ndarray
+    """
+    assert fm.shape == bm.shape, "Forward and backward matrices differ in shape."
+    sm = np.multiply(fm, bm)
+    return sm
+
+
+@njit
+def get_weights(typed_pos, untyped_pos, typed_cm, untyped_cm):
+    """
+    Get weights for the ungenotyped positions in a query haplotype, which are used in
+    linear interpolation of HMM state probabilities at the ungenotyped positions.
+
+    In BB2016 (see below Equation 1), a weight between genotyped positions m and m + 1
+    bounding ungenotyped marker x is denoted lambda_m,x.
+
+    lambda_m,x = [g(m + 1) - g(x)] / [g(m + 1) - g(m)],
+    where g(i) = genetic map position of marker i.
+
+    This function looks for the right-bounding position instead of the left-bounding.
+
+    :param numpy.ndarray typed_pos: Physical positions of genotyped markers (bp).
+    :param numpy.ndarray untyped_pos: Physical positions of ungenotyped markers (bp).
+    :param numpy.ndarray typed_cm: Genetic map positions of genotyped markers (cM).
+    :param numpy.ndarray untyped_cm: Genetic map positions of ungenotyped markers (cM).
+    :return: Weights for ungenotyped positions and indices of right-bounding positions.
+    :rtype: tuple(numpy.ndarray, numpy.ndarray)
+    """
+    _MIN_CM_DIST = 1e-7  # Avoid division by zero.
+    m = len(typed_pos)
+    x = len(untyped_pos)
+    # Identify genotype positions (m - 1) and m bounding ungenotyped position i.
+    # Note np.searchsorted(a, v, side='right') returns i s.t. a[i-1] <= v < a[i].
+    right_idx = np.searchsorted(typed_pos, untyped_pos, side="right")
+    # Calculate weights for ungenotyped positions.
+    weights = np.zeros(x, dtype=np.float64)
+    for i in range(len(right_idx)):
+        r = right_idx[i]
+        if r == 0:
+            # Left of the first genotyped position.
+            weights[i] = 1.0
+        elif r == m:
+            # Right of the last genotyped position.
+            weights[i] = 0.0
+        else:
+            # Between two genotyped positions.
+            cm_m2x = typed_cm[r] - untyped_cm[i]
+            # TODO: Check if this a subtle source of error.
+            cm_m2mM1 = max(typed_cm[r] - typed_cm[r - 1], _MIN_CM_DIST)
+            weights[i] = cm_m2x / cm_m2mM1
+    return (weights, right_idx)
+
+
+@njit
+def interpolate_allele_probs(sm, ref_h, typed_pos, untyped_pos, typed_cm, untyped_cm):
+    """
+    Interpolate allele probabilities at the ungenotyped positions of a query haplotype
+    following Equation 1 of BB2016.
+
+    The interpolated allele probability matrix is of size (x, a),
+    where a is the number of alleles.
+
+    Note that this function takes:
+    1. HMM state probability matrix at genotyped positions of size (m, h).
+    2. Reference haplotypes subsetted to ungenotyped positions of size (x, h).
+
+    :param numpy.ndarray sm: HMM state probability matrix at genotyped positions.
+    :param numpy.ndarray ref_h: Reference haplotypes subsetted to ungenotyped positions.
+    :param numpy.ndarray typed_pos: Physical positions of genotyped markers (bp).
+    :param numpy.ndarray untyped_pos: Physical positions of ungenotyped markers (bp).
+    :param numpy.ndarray typed_cm: Genetic map positions at genotyped markers (cM).
+    :param numpy.ndarray untyped_cm: Genetic map positions at ungenotyped markers (cM).
+    :return: Interpolated allele probabilities.
+    :rtype: numpy.ndarray
+    """
+    # TODO: Allow for biallelic site matrix. Work with `_tskit.lshmm` properly.
+    alleles = np.arange(4)  # ACGT
+    m = sm.shape[0]
+    x = ref_h.shape[0]
+    h = ref_h.shape[1]
+    weights, right_idx = get_weights(typed_pos, untyped_pos, typed_cm, untyped_cm)
+    probs = np.zeros((x, len(alleles)), dtype=np.float64)
+    for i in range(x):
+        k = right_idx[i]
+        w = weights[i]
+        for j in range(h):
+            for a in alleles:
+                if ref_h[i, j] == a:
+                    if k == 0:
+                        assert w == 1.0, "Weight should be 1.0."
+                        probs[i, a] += sm[k, j]
+                    if k == m:
+                        # FIXME: It's unclear how BEAGLE handles this case.
+                        assert w == 0.0, "Weight should be 0.0."
+                        probs[i, a] += sm[k - 1, j]
+                    else:
+                        probs[i, a] += w * sm[k - 1, j]
+                        probs[i, a] += (1 - w) * sm[k, j]
+    # Rescale probabilities.
+    # TODO: Check if this is necessary. Could this be a subtle source of error?
+    probs_rescaled = probs / np.sum(probs, axis=1)[:, np.newaxis]
+    return probs_rescaled
+
+
+def get_map_alleles(allele_probs):
+    """
+    Compute the maximum a posteriori alleles at ungenotyped markers of a query haplotype,
+    based on posterior marginal allele probabilities.
+
+    The imputed alleles is an array of size x.
+
+    WARN: If the allele probabilities are equal, then allele 0 is arbitrarily chosen.
+    TODO: Investigate how often this happens and the effect of this arbitrary choice.
+
+    :param numpy.ndarray allele_probs: Interpolated allele probabilities.
+    :return: Imputed alleles and their associated probabilities.
+    :rtype: tuple(numpy.ndarray, numpy.ndarray)
+    """
+    imputed_alleles = np.argmax(allele_probs, axis=1)
+    max_allele_probs = np.max(allele_probs, axis=1)
+    return (imputed_alleles, max_allele_probs)
+
+
+def run_interpolation_beagle(
+    ref_h,
+    query_h,
+    pos,
+    ne=1e6,
+    error_rate=1e-4,
+    genetic_map=None,
+):
+    """
+    Perform a simplified procedure of interpolation-style imputation
+    based on Equation 1 of BB2016.
+
+    Reference haplotypes and query haplotype are of size (m + x, h) and (m + x).
+
+    The physical positions of all the markers are an array of size (m + x).
+
+    This produces imputed alleles and their interpolated probabilities
+    at the ungenotyped markers of the query haplotype.
+
+    The default values of `ne` and `error_rate` are taken from BEAGLE 4.1, not 5.4.
+    In BEAGLE 5.4, the default value of `ne` is 1e5 and `error_rate` is data-dependent.
+
+    :param numpy.ndarray ref_h: Reference haplotypes.
+    :param numpy.ndarray query_h: One query haplotype.
+    :param numpy.ndarray pos: Physical positions of all the markers (bp).
+    :param int ne: Effective population size.
+    :param float error_rate: Allele error rate.
+    :param GeneticMap genetic_map: Genetic map.
+    :return: Imputed alleles and their interpolated probabilities.
+    :rtype: tuple(numpy.ndarray, numpy.ndarray)
+    """
+    warnings.warn("This function is experimental and not fully tested.", stacklevel=1)
+    warnings.warn(
+        "Check the reference and query haplotypes use the same allele encoding.",
+        stacklevel=1,
+    )
+    # Set marker indices.
+    typed_idx = np.where(query_h != -1)[0]
+    untyped_idx = np.where(query_h == -1)[0]
+    # Set physical positions of markers.
+    typed_pos = pos[typed_idx]
+    untyped_pos = pos[untyped_idx]
+    # Get genetic map positions of markers.
+    typed_cm = convert_to_genetic_map_positions(typed_pos, genetic_map)
+    untyped_cm = convert_to_genetic_map_positions(untyped_pos, genetic_map)
+    # Subset haplotypes.
+    ref_h_typed = ref_h[typed_idx, :]
+    ref_h_untyped = ref_h[untyped_idx, :]
+    query_h_typed = query_h[typed_idx]
+    # Get rates at genotyped markers.
+    h = ref_h.shape[1]
+    rho = get_recombination_rates(typed_cm, h=h, ne=ne)
+    mu = get_mutation_rates(typed_pos, error_rate=error_rate)
+    # Compute matrices at genotyped markers.
+    fm = compute_forward_matrix(ref_h_typed, query_h_typed, rho, mu)
+    bm = compute_backward_matrix(ref_h_typed, query_h_typed, rho, mu)
+    sm = compute_state_prob_matrix(fm, bm)
+    # Perform linear interpolation.
+    int_allele_probs = interpolate_allele_probs(
+        sm=sm,
+        ref_h=ref_h_untyped,
+        typed_pos=typed_pos,
+        untyped_pos=untyped_pos,
+        typed_cm=typed_cm,
+        untyped_cm=untyped_cm,
+    )
+    imputed_alleles, max_allele_probs = get_map_alleles(int_allele_probs)
+    return (imputed_alleles, max_allele_probs)
+
+
+def run_tsimpute(
+    ref_ts,
+    query_h,
+    pos,
+    rho,
+    mu,
+    precision=22,
+    genetic_map=None,
+):
+    """
+    Perform interpolation-style imputation, except that the forward and backward
+    probability matrices are computed from a tree sequence.
+
+    Reference haplotypes and query haplotype are of size (m + x, h) and (m + x).
+
+    The physical positions of all the markers are an array of size (m + x).
+
+    This produces imputed alleles and their interpolated probabilities
+    at the ungenotyped markers of the query haplotype.
+
+    TODO: Put this function elsewhere.
+    TODO: Set default precision. What should it be?
+    TODO: Allow for imputation on user-specified genomic interval.
+    TODO: Handle `acgt_alleles` properly.
+
+    :param numpy.ndarray ref_ts: Tree sequence with reference haplotypes.
+    :param numpy.ndarray query_h: One query haplotype.
+    :param numpy.ndarray pos: Physical positions of all the markers (bp).
+    :param numpy.ndarray rho: Per-site recombination rate.
+    :param numpy.ndarray mu: Per-site mutation rate.
+    :param int precision: Precision when running Li & Stephens HMM.
+    :param GeneticMap genetic_map: Genetic map.
+    :return: Imputed alleles and their interpolated probabilities.
+    :rtype: tuple(numpy.ndarray, numpy.ndarray)
+    """
+    warnings.warn(
+        "Check the reference and query haplotypes use the same allele encoding.",
+        stacklevel=1,
+    )
+    # Set markers indices.
+    typed_idx = np.where(query_h != -1)[0]
+    untyped_idx = np.where(query_h == -1)[0]
+    # Set physical positions of markers.
+    typed_pos = pos[typed_idx]
+    untyped_pos = pos[untyped_idx]
+    # Get genetic map positions of markers.
+    typed_cm = convert_to_genetic_map_positions(typed_pos, genetic_map)
+    untyped_cm = convert_to_genetic_map_positions(untyped_pos, genetic_map)
+    # Get rates at genotyped markers.
+    rho = rho[typed_idx]
+    mu = mu[typed_idx]
+    # Subset haplotypes.
+    ref_ts_typed = ref_ts.delete_sites(site_ids=untyped_idx)
+    ref_ts_untyped = ref_ts.delete_sites(site_ids=typed_idx)
+    ref_h_untyped = ref_ts_untyped.genotype_matrix(alleles=tskit.ALLELES_ACGT)
+    query_h_typed = query_h[typed_idx]
+    # Get matrices from tree sequence.
+    fm = _tskit.CompressedMatrix(ref_ts_typed._ll_tree_sequence)
+    bm = _tskit.CompressedMatrix(ref_ts_typed._ll_tree_sequence)
+    # WARN: Be careful with argument `acgt_alleles`!!!
+    ls_hmm = _tskit.LsHmm(
+        ref_ts_typed._ll_tree_sequence,
+        recombination_rate=rho,
+        mutation_rate=mu,
+        acgt_alleles=True,
+        precision=precision,
+    )
+    ls_hmm.forward_matrix(query_h_typed.T, fm)
+    ls_hmm.backward_matrix(query_h_typed.T, fm.normalisation_factor, bm)
+    sm = compute_state_prob_matrix(fm.decode(), bm.decode())
+    # Perform linear interpolation.
+    int_allele_probs = interpolate_allele_probs(
+        sm=sm,
+        ref_h=ref_h_untyped,
+        typed_pos=typed_pos,
+        untyped_pos=untyped_pos,
+        typed_cm=typed_cm,
+        untyped_cm=untyped_cm,
+    )
+    imputed_alleles, max_allele_probs = get_map_alleles(int_allele_probs)
+    return (imputed_alleles, max_allele_probs)
diff --git a/python/tests/test_imputation.py b/python/tests/test_imputation.py
new file mode 100644
index 0000000000..96cdaaf3a3
--- /dev/null
+++ b/python/tests/test_imputation.py
@@ -0,0 +1,719 @@
+"""
+Tests for interpolation-style genotype imputation.
+"""
+import io
+
+import numpy as np
+import pytest
+
+import tests.beagle
+import tests.beagle_numba
+import tskit
+
+
+# The following toy query haplotypes were imputed from the toy reference haplotypes
+# using BEAGLE 4.1 with Ne set to 10.
+#
+# There are 9 sites, starting from 10,000 to 90,000, with 10,000 increments.
+# The REF is A and ALT is C for all the sites in the VCF input to BEAGLE.
+# The ancestral state is A and derived state is C for all the sites.
+# In this setup, A is encoded as 0 and C as 1 whether the allele encoding is
+# ancestral/derived (0/1) or ACGT (0123).
+#
+# Case 0:
+# Reference haplotypes and query haplotypes have 0|0 at all sites.
+# First and last reference markers are missing in the query haplotypes.
+# fmt: off
+toy_ref_0 = np.array(
+    [
+        [0, 0, 0, 0, 0, 0, 0, 0, 0],   # ref_0, haplotype 0
+        [0, 0, 0, 0, 0, 0, 0, 0, 0],   # ref_0, haplotype 1
+    ], dtype=np.int32
+).T
+toy_query_0 = np.array(
+    [
+        [-1, 0, -1, 0, -1, 0, -1, 0, -1],  # query_0, haplotype 0
+        [-1, 0, -1, 0, -1, 0, -1, 0, -1],  # query_0, haplotype 1
+    ], dtype=np.int32
+)
+toy_query_0_beagle_imputed = np.array(
+    [
+        [0, 0, 0, 0, 0, 0, 0, 0, 0],  # query_0, haplotype 0
+        [0, 0, 0, 0, 0, 0, 0, 0, 0],  # query_0, haplotype 1
+    ], dtype=np.int32
+)
+toy_query_0_beagle_allele_probs = np.array(
+    # Ungenotyped markers only
+    [
+        # query_0, haplotype 0
+        [
+            [1.0, 1.0, 1.0, 1.0, 1.0],
+            [0.0, 0.0, 0.0, 0.0, 0.0],
+        ],
+        # query_0, haplotype 1
+        [
+            [1.0, 1.0, 1.0, 1.0, 1.0],
+            [0.0, 0.0, 0.0, 0.0, 0.0],
+        ],
+    ],
+    dtype=np.float32,
+)
+# fmt: on
+assert toy_ref_0.shape == (9, 2)
+assert toy_query_0.shape == (2, 9)
+assert toy_query_0_beagle_imputed.shape == (2, 9)
+assert np.sum(toy_query_0[0] == 0) == 4
+assert np.sum(toy_query_0[0] == -1) == 5
+assert np.array_equal(toy_query_0[0], toy_query_0[1])
+assert np.all(toy_query_0_beagle_imputed[0] == 0)
+assert np.all(toy_query_0_beagle_imputed[1] == 0)
+assert toy_query_0_beagle_allele_probs.shape == (2, 2, 5)
+
+# Case 1:
+# Reference haplotypes and query haplotypes have 1|1 at all sites.
+# First and last reference markers are missing in the query haplotypes.
+# fmt: off
+toy_ref_1 = np.array(
+    [
+        [1, 1, 1, 1, 1, 1, 1, 1, 1],  # ref_0, haplotype 0
+        [1, 1, 1, 1, 1, 1, 1, 1, 1],  # ref_0, haplotype 1
+    ], dtype=np.int32
+).T
+toy_query_1 = np.array(
+    [
+        [-1, 1, -1, 1, -1, 1, -1, 1, -1],  # query_0, haplotype 0
+        [-1, 1, -1, 1, -1, 1, -1, 1, -1],  # query_0, haplotype 1
+    ], dtype=np.int32
+)
+toy_query_1_beagle_imputed = np.array(
+    [
+        [1, 1, 1, 1, 1, 1, 1, 1, 1],  # query_0, haplotype 0
+        [1, 1, 1, 1, 1, 1, 1, 1, 1],  # query_0, haplotype 1
+    ], dtype=np.int32
+)
+toy_query_1_beagle_allele_probs = np.array(
+    # Ungenotyped markers only
+    [
+        # query_0, haplotype 0
+        [
+            [0.0, 0.0, 0.0, 0.0, 0.0],
+            [1.0, 1.0, 1.0, 1.0, 1.0],
+        ],
+        # query_0, haplotype 1
+        [
+            [0.0, 0.0, 0.0, 0.0, 0.0],
+            [1.0, 1.0, 1.0, 1.0, 1.0],
+        ],
+    ],
+    dtype=np.float32,
+)
+# fmt: on
+assert toy_ref_1.shape == (9, 2)
+assert toy_query_1.shape == (2, 9)
+assert toy_query_1_beagle_imputed.shape == (2, 9)
+assert np.sum(toy_query_1[0] == 1) == 4
+assert np.sum(toy_query_1[0] == -1) == 5
+assert np.array_equal(toy_query_1[0], toy_query_1[1])
+assert np.all(toy_query_1_beagle_imputed[0] == 1)
+assert np.all(toy_query_1_beagle_imputed[1] == 1)
+assert toy_query_1_beagle_allele_probs.shape == (2, 2, 5)
+
+# Case 2:
+# Reference haplotypes and query haplotypes have 0|0 at all sites.
+# First and last reference markers are genotyped in the query haplotypes.
+# fmt: off
+toy_ref_2 = np.copy(toy_ref_0)
+toy_query_2 = np.array(
+    [
+        [0, 0, -1, 0, -1, 0, -1, 0, 0],  # query_0, haplotype 0
+        [0, 0, -1, 0, -1, 0, -1, 0, 0],  # query_0, haplotype 1
+    ], dtype=np.int32
+)
+toy_query_2_beagle_imputed = np.array(
+    [
+        [0, 0, 0, 0, 0, 0, 0, 0, 0],  # query_0, haplotype 0
+        [0, 0, 0, 0, 0, 0, 0, 0, 0],  # query_0, haplotype 1
+    ], dtype=np.int32
+)
+toy_query_2_beagle_allele_probs = np.array(
+    # Ungenotyped markers only
+    [
+        # query_0, haplotype 0
+        [
+            [1.0, 1.0, 1.0],
+            [0.0, 0.0, 0.0],
+        ],
+        # query_0, haplotype 1
+        [
+            [1.0, 1.0, 1.0],
+            [0.0, 0.0, 0.0],
+        ],
+    ],
+    dtype=np.float32,
+)
+# fmt: on
+assert toy_ref_2.shape == (9, 2)
+assert toy_query_2.shape == (2, 9)
+assert toy_query_2_beagle_imputed.shape == (2, 9)
+assert np.sum(toy_query_2[0] == 0) == 6
+assert np.sum(toy_query_2[0] == -1) == 3
+assert np.array_equal(toy_query_2[0], toy_query_2[1])
+assert np.all(toy_query_2_beagle_imputed[0] == 0)
+assert np.all(toy_query_2_beagle_imputed[1] == 0)
+assert toy_query_2_beagle_allele_probs.shape == (2, 2, 3)
+
+# Case 3:
+# Reference haplotypes and query haplotypes have 0|1 at all sites.
+# fmt: off
+toy_ref_3 = np.array(
+    [
+        [0, 0, 0, 0, 0, 0, 0, 0, 0],  # ref_0, haplotype 0
+        [1, 1, 1, 1, 1, 1, 1, 1, 1],  # ref_0, haplotype 1
+    ], dtype=np.int32
+).T
+toy_query_3 = np.array(
+    [
+        [-1, 0, -1, 0, -1, 0, -1, 0, -1],  # query_0, haplotype 0
+        [-1, 1, -1, 1, -1, 1, -1, 1, -1],  # query_0, haplotype 1
+    ], dtype=np.int32
+)
+toy_query_3_beagle_imputed = np.array(
+    [
+        [0, 0, 0, 0, 0, 0, 0, 0, 0],  # query_0, haplotype 0
+        [1, 1, 1, 1, 1, 1, 1, 1, 1],  # query_0, haplotype 1
+    ], dtype=np.int32
+)
+toy_query_3_beagle_allele_probs = np.array(
+    # Ungenotyped markers only
+    # See comment https://github.com/tskit-dev/tskit/pull/2815#issuecomment-1708178257
+    [
+        # query_0, haplotype 0
+        [
+            [0.9999998, 0.9999999, 1.0, 0.9999999, 0.9999998],
+            [0.0      , 0.0      , 0.0, 0.0      , 0.0],
+        ],
+        # query_0, haplotype 1
+        [
+            [0.0      , 0.0      , 0.0, 0.0      , 0.0],
+            [0.9999998, 0.9999999, 1.0, 0.9999999, 0.9999998],
+        ],
+    ],
+    dtype=np.float32,
+)
+# fmt: on
+assert toy_ref_3.shape == (9, 2)
+assert toy_query_3.shape == (2, 9)
+assert toy_query_3_beagle_imputed.shape == (2, 9)
+assert np.sum(toy_query_3[0] == 0) == 4
+assert np.sum(toy_query_3[1] == 1) == 4
+assert np.sum(toy_query_3[0] == -1) == np.sum(toy_query_3[1] == -1) == 5
+assert np.all(
+    np.invert(np.equal(toy_query_3_beagle_imputed[0], toy_query_3_beagle_imputed[1]))
+)
+assert toy_query_3_beagle_allele_probs.shape == (2, 2, 5)
+
+# Case 4:
+# Reference haplotypes and query haplotypes have alternating 0|1 and 1|0 genotypes.
+# Query haplotypes have 0|1 at all genotyped sites.
+# fmt: off
+toy_ref_4 = np.array(
+    [
+        [0, 1, 0, 1, 0, 1, 0, 1, 0],  # ref_0, haplotype 0
+        [1, 0, 1, 0, 1, 0, 1, 0, 1],  # ref_0, haplotype 1
+    ], dtype=np.int32
+).T
+toy_query_4 = np.array(
+    [
+        [-1, 0, -1, 0, -1, 0, -1, 0, -1],  # query_0, haplotype 0
+        [-1, 1, -1, 1, -1, 1, -1, 1, -1],  # query_0, haplotype 1
+    ], dtype=np.int32
+)
+toy_query_4_beagle_imputed = np.array(
+    [
+        [1, 0, 1, 0, 1, 0, 1, 0, 1],  # query_0, haplotype 0
+        [0, 1, 0, 1, 0, 1, 0, 1, 0],  # query_0, haplotype 1
+    ], dtype=np.int32
+)
+toy_query_4_beagle_allele_probs = np.array(
+    # Ungenotyped markers only
+    [
+        # query_0, haplotype 0
+        [
+            [0.0      , 0.0      , 0.0, 0.0      , 0.9999998],
+            [0.9999998, 0.9999999, 1.0, 0.9999999, 0.0],
+        ],
+        # query_0, haplotype 1
+        [
+            [0.9999998, 0.9999999, 1.0, 0.9999999, 0.0],
+            [0.0      , 0.0      , 0.0, 0.0      , 0.9999998],
+        ],
+    ],
+    dtype=np.float32,
+)
+# fmt: on
+assert toy_ref_4.shape == (9, 2)
+assert toy_query_4.shape == (2, 9)
+assert toy_query_4_beagle_imputed.shape == (2, 9)
+assert np.sum(toy_query_4[0] == 0) == 4
+assert np.sum(toy_query_4[1] == 1) == 4
+assert np.sum(toy_query_4[0] == -1) == np.sum(toy_query_4[1] == -1) == 5
+assert np.all(
+    np.invert(np.equal(toy_query_4_beagle_imputed[0], toy_query_4_beagle_imputed[1]))
+)
+assert toy_query_3_beagle_allele_probs.shape == (2, 2, 5)
+
+# Case 5:
+# The reference panel has two individuals. The first individual has 0|0 at all sites,
+# and the second individual has 1|1 at all sites.
+# The query haplotypes have one recombination breakpoint in the middle.
+# fmt: off
+toy_ref_5 = np.array(
+    [
+        [0, 0, 0, 0, 0, 0, 0, 0, 0],  # ref_0
+        [0, 0, 0, 0, 0, 0, 0, 0, 0],  # ref_0
+        [1, 1, 1, 1, 1, 1, 1, 1, 1],  # ref_1
+        [1, 1, 1, 1, 1, 1, 1, 1, 1],  # ref_1
+    ], dtype=np.int32
+).T
+toy_query_5 = np.array(
+    [
+        [-1, 0, -1, 0, -1, 0, -1, 1, -1],  # query_0, haplotype 0
+        [-1, 1, -1, 1, -1, 1, -1, 0, -1],  # query_0, haplotype 1
+    ], dtype=np.int32
+)
+toy_query_5_beagle_imputed = np.array(
+    [
+        [0, 0, 0, 0, 0, 0, 0, 1, 1],  # query_0, haplotype 0
+        [1, 1, 1, 1, 1, 1, 1, 0, 0],  # query_0, haplotype 1
+    ], dtype=np.int32
+)
+toy_query_5_beagle_allele_probs = np.array(
+    # Ungenotyped markers only
+    [
+        # query_0, haplotype 0
+        [
+            [0.9999999, 0.99999994, 0.9999546, 0.5454091, 0.09090912],
+            [0.0      , 0.0       , 0.0      , 0.4545909, 0.9090909],
+        ],
+        # query_0, haplotype 1
+        [
+            [0.0      , 0.0       , 0.0      , 0.4545909, 0.9090909],
+            [0.9999999, 0.99999994, 0.9999546, 0.5454091, 0.09090912],
+        ],
+    ],
+    dtype=np.float32,
+)
+# fmt: on
+assert toy_ref_5.shape == (9, 4)
+assert toy_query_5.shape == (2, 9)
+assert toy_query_5_beagle_imputed.shape == (2, 9)
+assert np.sum(toy_query_5[0] == 0) == 3
+assert np.sum(toy_query_5[0] == 1) == 1
+assert np.sum(toy_query_5[0] == -1) == 5
+assert np.sum(toy_query_5[1] == 0) == 1
+assert np.sum(toy_query_5[1] == 1) == 3
+assert np.sum(toy_query_5[1] == -1) == 5
+assert np.all(
+    np.invert(np.equal(toy_query_5_beagle_imputed[0], toy_query_5_beagle_imputed[1]))
+)
+assert toy_query_5_beagle_allele_probs.shape == (2, 2, 5)
+
+# Case 6:
+# The reference panel has two individuals. The first individual has 0|0 at all sites,
+# and the second individual has 1|1 at all sites.
+# The query haplotypes have two recombination breakpoints in the middle.
+# fmt: off
+toy_ref_6 = np.copy(toy_ref_5)
+toy_query_6 = np.array(
+    [
+        [-1, 0, -1, 1, -1, 1, -1, 0, -1],  # query_0, haplotype 0
+        [-1, 1, -1, 0, -1, 0, -1, 1, -1],  # query_0, haplotype 1
+    ], dtype=np.int32
+)
+toy_query_6_beagle_imputed = np.array(
+    [
+        [0, 0, 1, 1, 1, 1, 1, 0, 0],  # query_0, haplotype 0
+        [1, 1, 0, 0, 0, 0, 0, 1, 1],  # query_0, haplotype 1
+    ], dtype=np.int32
+)
+toy_query_6_beagle_allele_probs = np.array(
+    # Ungenotyped markers only
+    [
+        # query_0, haplotype 0
+        [
+            [0.90983605, 0.46770614, 0.025481388, 0.46838862, 0.91139066],
+            [0.09016396, 0.53229386, 0.97451866 , 0.5316114 , 0.088609315],
+        ],
+        # query_0, haplotype 1
+        [
+            [0.09016395, 0.53229386, 0.97451866 , 0.5316114 , 0.0886093],
+            [0.90983605, 0.46770614, 0.025481388, 0.46838862, 0.91139066],
+        ],
+    ],
+    dtype=np.float32,
+)
+# fmt: on
+assert toy_ref_6.shape == (9, 4)
+assert toy_query_6.shape == (2, 9)
+assert toy_query_6_beagle_imputed.shape == (2, 9)
+assert np.sum(toy_query_6[0] == 0) == 2
+assert np.sum(toy_query_6[0] == 1) == 2
+assert np.sum(toy_query_6[0] == -1) == 5
+assert np.sum(toy_query_6[1] == 0) == 2
+assert np.sum(toy_query_6[1] == 1) == 2
+assert np.sum(toy_query_6[1] == -1) == 5
+assert np.all(
+    np.invert(np.equal(toy_query_6_beagle_imputed[0], toy_query_6_beagle_imputed[1]))
+)
+assert toy_query_6_beagle_allele_probs.shape == (2, 2, 5)
+
+# Case 7:
+# Hand-crafted example.
+# fmt: off
+toy_ref_7 = np.array(
+    [
+        [0, 1, 2, 0, 1, 1, 2, 3, 1],    # ref_0
+        [1, 1, 2, 3, 1, 1, 2, 3, 1],    # ref_0
+        [0, 1, 3, 0, 1, 1, 2, 3, 1],    # ref_1
+        [0, 2, 2, 3, 0, 1, 2, 0, 0],    # ref_1
+        [0, 1, 2, 3, 0, 2, 3, 0, 0],    # ref_2
+        [1, 1, 2, 3, 1, 1, 2, 3, 1],    # ref_2
+        [0, 1, 3, 0, 1, 1, 2, 3, 1],    # ref_3
+        [0, 1, 2, 3, 0, 1, 2, 3, 1],    # ref_3
+        [0, 2, 2, 3, 0, 1, 2, 0, 0],    # ref_4
+        [0, 1, 2, 3, 0, 2, 3, 0, 0],    # ref_4
+    ], dtype=np.int32
+).T
+toy_query_7 = np.array(
+    [
+        [0, 1, -1, 0, -1, 2, -1, 3, 1],   # query_0, haplotype 0
+        [1, 2, -1, 3, -1, 1, -1, 0, 0],   # query_0, haplotype 1
+    ], dtype=np.int32
+)
+toy_query_7_beagle_imputed = np.array(
+    [
+        [0, 1, 3, 0, 1, 2, 2, 3, 1],    # query_0, haplotype 0
+        [1, 2, 2, 3, 0, 1, 2, 0, 0],    # query_0, haplotype 1
+    ], dtype=np.int32
+)
+toy_query_7_beagle_allele_probs = np.array(
+    # Ungenotyped markers only
+    [
+        # query_0, haplotype 0
+        [
+            [0.33317572, 0.0      , 0.99945664],
+            [0.66635144, 0.9991437, 0.0],
+        ],
+        # query_0, haplotype 1
+        [
+            [0.9995998, 0.99971414, 0.99974835],
+            [0.0      , 0.0       , 0.0],
+        ],
+    ],
+    dtype=np.float32,
+)
+# fmt: on
+assert toy_ref_7.shape == (9, 10)
+assert toy_query_7.shape == (2, 9)
+assert toy_query_7_beagle_imputed.shape == (2, 9)
+assert np.sum(toy_query_7[0] == -1) == np.sum(toy_query_7[1] == -1) == 3
+assert toy_query_7_beagle_allele_probs.shape == (2, 2, 3)
+
+# Case 8:
+# Same as case 7 except the last genotyped marker is missing.
+# fmt: off
+toy_ref_8 = np.copy(toy_ref_7)
+toy_query_8 = np.array(
+    [
+        [-1, 1, -1, 0, -1, 2, -1, 3, -1],  # query_0, haplotype 0
+        [-1, 2, -1, 3, -1, 1, -1, 0, -1],  # query_0, haplotype 1
+    ], dtype=np.int32
+)
+toy_query_8_beagle_imputed = np.array(
+    [
+        [0, 1, 3, 0, 1, 2, 2, 3, 1],    # query_0, haplotype 0
+        [0, 2, 2, 3, 0, 1, 2, 0, 0],    # query_0, haplotype 1
+    ], dtype=np.int32
+)
+toy_query_8_beagle_allele_probs = np.array(
+    # Ungenotyped markers only
+    [
+        # query_0, haplotype 0
+        [
+            [0.9997735, 0.33310473, 0.0       , 0.9992305, 0.9997736],
+            [0.0      , 0.66620946, 0.99893093, 0.0      , 0.0],
+        ],
+        # query_0, haplotype 1
+        [
+            [0.9999998, 0.9999998, 0.9999998, 0.99991965, 0.0],
+            [0.0      , 0.0      , 0.0      , 0.0       , 0.9999998],
+        ],
+    ],
+    dtype=np.float32,
+)
+# fmt: on
+assert toy_ref_8.shape == (9, 10)
+assert toy_query_8.shape == (2, 9)
+assert toy_query_8_beagle_imputed.shape == (2, 9)
+assert np.sum(toy_query_8[0] == -1) == 5
+assert np.sum(toy_query_8[1] == -1) == 5
+assert toy_query_8_beagle_allele_probs.shape == (2, 2, 5)
+
+
+@pytest.mark.parametrize(
+    "input_ref,input_query,expected",
+    [
+        (toy_ref_0, toy_query_0, toy_query_0_beagle_imputed),
+        (toy_ref_1, toy_query_1, toy_query_1_beagle_imputed),
+        (toy_ref_2, toy_query_2, toy_query_2_beagle_imputed),
+        (toy_ref_3, toy_query_3, toy_query_3_beagle_imputed),
+        (toy_ref_4, toy_query_4, toy_query_4_beagle_imputed),
+        (toy_ref_5, toy_query_5, toy_query_5_beagle_imputed),
+        (toy_ref_6, toy_query_6, toy_query_6_beagle_imputed),
+        (toy_ref_7, toy_query_7, toy_query_7_beagle_imputed),
+        (toy_ref_8, toy_query_8, toy_query_8_beagle_imputed),
+    ],
+)
+def test_beagle_vanilla(input_ref, input_query, expected):
+    """Compare imputed alleles from Python BEAGLE implementation and BEAGLE 4.1."""
+    assert input_query.shape == expected.shape
+    pos = (np.arange(9) + 1) * 1e4
+    num_query_haps = input_query.shape[0]
+    num_ungenotyped_markers = np.sum(input_query[0] == -1)
+    imputed_alleles = np.zeros(
+        (num_query_haps, num_ungenotyped_markers), dtype=np.int32
+    )
+    expected_ungenotyped = expected[:, input_query[0] == -1]
+    assert imputed_alleles.shape == expected_ungenotyped.shape
+    for i in range(num_query_haps):
+        imputed_alleles[i], _ = tests.beagle.run_beagle(
+            input_ref, input_query[i], pos, miscall_rate=1e-4, ne=10.0
+        )
+        np.testing.assert_array_equal(imputed_alleles[i], expected_ungenotyped[i])
+
+
+@pytest.mark.parametrize(
+    "input_ref,input_query",
+    [
+        (toy_ref_0, toy_query_0),
+        (toy_ref_1, toy_query_1),
+        (toy_ref_2, toy_query_2),
+        (toy_ref_3, toy_query_3),
+        (toy_ref_4, toy_query_4),
+        (toy_ref_5, toy_query_5),
+        (toy_ref_6, toy_query_6),
+        (toy_ref_7, toy_query_7),
+        (toy_ref_8, toy_query_8),
+    ],
+)
+def test_beagle_numba(input_ref, input_query):
+    """Compare imputed alleles from vanilla and numba Python BEAGLE implementations."""
+    pos = (np.arange(9) + 1) * 1e4
+    num_query_haps = input_query.shape[0]
+    for i in range(num_query_haps):
+        imputed_alleles_vanilla, _ = tests.beagle.run_beagle(
+            input_ref,
+            input_query[i],
+            pos,
+            ne=10.0,
+            miscall_rate=1e-4,
+        )
+        imputed_alleles_numba, _ = tests.beagle_numba.run_interpolation_beagle(
+            input_ref,
+            input_query[i],
+            pos,
+            ne=10.0,
+            error_rate=1e-4,
+        )
+        np.testing.assert_array_equal(imputed_alleles_vanilla, imputed_alleles_numba)
+
+
+# Below is toy data set case 7 in tree sequence format.
+toy_ts_nodes_text = """\
+id      is_sample       time    population      individual      metadata
+0       1       0.000000        0       0
+1       1       0.000000        0       0
+2       1       0.000000        0       1
+3       1       0.000000        0       1
+4       1       0.000000        0       2
+5       1       0.000000        0       2
+6       1       0.000000        0       3
+7       1       0.000000        0       3
+8       1       0.000000        0       4
+9       1       0.000000        0       4
+10      0       0.009923        0       -1
+11      0       0.038603        0       -1
+12      0       0.057935        0       -1
+13      0       0.145141        0       -1
+14      0       0.238045        0       -1
+15      0       0.528344        0       -1
+16      0       0.646418        0       -1
+17      0       1.462199        0       -1
+18      0       2.836600        0       -1
+19      0       3.142225        0       -1
+20      0       4.056253        0       -1
+"""
+
+toy_ts_edges_text = """\
+left    right   parent  child   metadata
+0.000000        100000.000000   10      1
+0.000000        100000.000000   10      5
+0.000000        100000.000000   11      3
+0.000000        100000.000000   11      8
+0.000000        100000.000000   12      2
+0.000000        100000.000000   12      6
+0.000000        100000.000000   13      0
+0.000000        100000.000000   13      12
+0.000000        100000.000000   14      10
+0.000000        100000.000000   14      13
+0.000000        40443.000000    15      9
+0.000000        40443.000000    15      14
+40443.000000    100000.000000   16      4
+40443.000000    100000.000000   16      9
+0.000000        40443.000000    17      4
+0.000000        100000.000000   17      11
+40443.000000    100000.000000   17      16
+0.000000        12721.000000    18      7
+0.000000        12721.000000    18      17
+12721.000000    100000.000000   19      7
+40443.000000    100000.000000   19      14
+12721.000000    40443.000000    19      15
+0.000000        12721.000000    20      15
+12721.000000    100000.000000   20      17
+0.000000        12721.000000    20      18
+12721.000000    100000.000000   20      19
+"""
+
+toy_ts_sites_text = """\
+position        ancestral_state metadata
+10000.000000    A
+20000.000000    C
+30000.000000    G
+40000.000000    T
+50000.000000    A
+60000.000000    C
+70000.000000    G
+80000.000000    T
+90000.000000    A
+"""
+
+toy_ts_mutations_text = """\
+site    node    time    derived_state   parent  metadata
+0       10      unknown C       -1
+1       11      unknown G       -1
+2       12      unknown T       -1
+3       13      unknown A       -1
+4       14      unknown C       -1
+5       16      unknown G       -1
+6       16      unknown T       -1
+7       17      unknown A       -1
+8       19      unknown C       -1
+"""
+
+toy_ts_individuals_text = """\
+flags
+0
+0
+0
+0
+0
+"""
+
+
+def get_toy_data():
+    """Get toy example 7 in tree sequence format."""
+    ts = tskit.load_text(
+        nodes=io.StringIO(toy_ts_nodes_text),
+        edges=io.StringIO(toy_ts_edges_text),
+        sites=io.StringIO(toy_ts_sites_text),
+        mutations=io.StringIO(toy_ts_mutations_text),
+        individuals=io.StringIO(toy_ts_individuals_text),
+        strict=False,
+    )
+    return ts
+
+
+def parse_matrix(csv_text):
+    # TODO: Maybe remove.
+    # This returns a record array, which is essentially the same as a
+    # pandas dataframe, which we can access via df["m"] etc.
+    return np.lib.npyio.recfromcsv(io.StringIO(csv_text))
+
+
+@pytest.mark.parametrize(
+    "input_query,expected",
+    [
+        (toy_query_7, toy_query_7_beagle_imputed),
+        (toy_query_8, toy_query_8_beagle_imputed),
+    ],
+)
+def test_tsimpute(input_query, expected):
+    """
+    Compare imputed alleles from tsimpute and BEAGLE 4.1.
+    """
+    toy_ref_ts = get_toy_data()  # Same for both cases
+    pos = toy_ref_ts.sites_position
+    num_query_haps = input_query.shape[0]
+    mu = np.zeros(len(pos), dtype=np.float32) + 1e-8
+    rho = np.zeros(len(pos), dtype=np.float32) + 1e-8
+    expected_subset = expected[:, input_query[0] == -1]
+    for i in range(num_query_haps):
+        imputed_alleles, _ = tests.beagle_numba.run_tsimpute(
+            toy_ref_ts,
+            input_query[i],
+            pos,
+            rho,
+            mu,
+        )
+        np.testing.assert_array_equal(imputed_alleles, expected_subset[i])
+
+
+# Tests for helper functions.
+@pytest.mark.parametrize(
+    "typed_pos,untyped_pos,expected_weights,expected_idx",
+    [
+        # All ungenotyped markers are between genotyped markers.
+        (
+            np.array([10, 20]) * 1e6,
+            np.array([15]) * 1e6,
+            np.array([0.5]),
+            np.array([0]),
+        ),
+        # Same as above, but more genotyped markers.
+        (
+            np.array([10, 20, 30, 40]) * 1e6,
+            np.array([15, 25, 35]) * 1e6,
+            np.array([0.5, 0.5, 0.5]),
+            np.array([0, 1, 2]),
+        ),
+        # Ungenotyped markers are left of the first genotyped marker.
+        (
+            np.array([10, 20]) * 1e6,
+            np.array([5, 15]) * 1e6,
+            np.array([1.0, 0.5]),
+            np.array([0, 0]),
+        ),
+        # Ungenotyped markers are right of the last genotyped marker.
+        (
+            np.array([10, 20]) * 1e6,
+            np.array([15, 25]) * 1e6,
+            np.array([0.5, 0.0]),
+            np.array([0, 1]),
+        ),
+        # Denominator below min threshold.
+        (np.array([10, 20]), np.array([15]), np.array([0.001]), np.array([0])),
+    ],
+)
+def test_get_weights(typed_pos, untyped_pos, expected_weights, expected_idx):
+    # beagle, no numba
+    actual_weights, actual_idx = tests.beagle.get_weights(typed_pos, untyped_pos)
+    np.testing.assert_allclose(actual_weights, expected_weights)
+    np.testing.assert_array_equal(actual_idx, expected_idx)
+    # beagle, numba
+    genotyped_cm = tests.beagle_numba.convert_to_genetic_map_positions(typed_pos)
+    ungenotyped_cm = tests.beagle_numba.convert_to_genetic_map_positions(untyped_pos)
+    actual_weights, actual_idx = tests.beagle_numba.get_weights(
+        typed_pos, untyped_pos, genotyped_cm, ungenotyped_cm
+    )
+    np.testing.assert_allclose(actual_weights, expected_weights)
+    np.testing.assert_array_equal(actual_idx, expected_idx)