From 39fc0228f3d4eea9416f0ddda2a164dffab95db2 Mon Sep 17 00:00:00 2001
From: Shing Zhan <shing.zhan@gmail.com>
Date: Sun, 17 Sep 2023 10:14:33 +0100
Subject: [PATCH] Return marker interval start indices

---
 python/tests/beagle.py          | 12 +++++++-----
 python/tests/beagle_numba.py    |  4 ++--
 python/tests/test_imputation.py |  2 +-
 3 files changed, 10 insertions(+), 8 deletions(-)

diff --git a/python/tests/beagle.py b/python/tests/beagle.py
index 59c691bb44..5f570a7468 100644
--- a/python/tests/beagle.py
+++ b/python/tests/beagle.py
@@ -73,8 +73,8 @@ def get_weights(genotyped_pos, ungenotyped_pos):
 
     :param numpy.ndarray genotyped_pos: Site positions of genotyped markers.
     :param numpy.ndarray ungenotyped_pos: Site positions of ungenotyped markers.
-    :return: Weights used in interpolation.
-    :rtype: numpy.ndarray
+    :return: Weights and associated marker interval start indices.
+    :rtype: tuple(numpy.ndarray, numpy.ndarray)
     """
     assert len(genotyped_pos) > 1, "There are fewer than two genotyped markers."
     assert len(ungenotyped_pos) > 0, "There are no ungenotyped markers."
@@ -89,6 +89,7 @@ def get_weights(genotyped_pos, ungenotyped_pos):
     # Calculate weights for ungenotyped markers.
     weights = np.zeros(x, dtype=np.float64)
     # Identify genotype markers k and k + 1 sandwiching marker i.
+    marker_interval_start = np.zeros(x, dtype=np.int64)
     for i in np.arange(x):
         if ungenotyped_pos[i] < genotyped_pos[0]:
             # Ungenotyped marker is before the first genotyped marker.
@@ -104,9 +105,10 @@ def get_weights(genotyped_pos, ungenotyped_pos):
             cm_mP1_x = genotyped_cm[k + 1] - ungenotyped_cm[i]
             cm_mP1_m = np.max([genotyped_cm[k + 1] - genotyped_cm[k], MIN_CM_DIST])
             weights[i] = cm_mP1_x / cm_mP1_m
+        marker_interval_start[i] = k
     assert 0 <= np.min(weights), "Some weights are less than 1."
     assert np.max(weights) <= 1, "Some weights are greater than 1."
-    return weights
+    return (weights, marker_interval_start)
 
 
 def get_mismatch_prob(pos, miscall_rate):
@@ -376,7 +378,7 @@ def _interpolate_allele_probabilities(
     assert m == len(genotyped_cm)
     imputed_cm = convert_to_genetic_map_position(ungenotyped_pos)
     assert x == len(imputed_cm)
-    weights = get_weights(genotyped_cm, imputed_cm)
+    weights, _ = get_weights(genotyped_cm, imputed_cm)
     assert x == len(weights)
     i_hap_probs = np.zeros((x, 2), dtype=np.float64)
     for i in np.arange(x):
@@ -455,7 +457,7 @@ def interpolate_allele_probabilities(sm, ref_h, genotyped_pos, ungenotyped_pos):
     assert m == len(genotyped_pos)
     x = len(ungenotyped_pos)
     assert (x, h) == ref_h.shape
-    weights = get_weights(genotyped_pos, ungenotyped_pos)
+    weights, _ = get_weights(genotyped_pos, ungenotyped_pos)
     assert x == len(weights)
     p = np.zeros((x, len(alleles)), dtype=np.float64)
     # Compute allele probabilities as per Equation 1 in BB2016.
diff --git a/python/tests/beagle_numba.py b/python/tests/beagle_numba.py
index 2731a35422..0828484f66 100644
--- a/python/tests/beagle_numba.py
+++ b/python/tests/beagle_numba.py
@@ -39,8 +39,8 @@ def get_weights(genotyped_pos, ungenotyped_pos):
 
     :param numpy.ndarray genotyped_pos: Site positions of genotyped markers.
     :param numpy.ndarray ungenotyped_pos: Site positions of ungenotyped markers.
-    :return: Weights used in interpolation.
-    :rtype: numpy.ndarray
+    :return: Weights and marker interval start indices.
+    :rtype: tuple(numpy.ndarray, numpy.ndarray)
     """
     m = len(genotyped_pos)
     x = len(ungenotyped_pos)
diff --git a/python/tests/test_imputation.py b/python/tests/test_imputation.py
index 83bead3dd6..f30f9d043f 100644
--- a/python/tests/test_imputation.py
+++ b/python/tests/test_imputation.py
@@ -670,7 +670,7 @@ def test_tsimpute(input_ref, input_query):
 )
 def test_get_weights(genotyped_pos, ungenotyped_pos, expected):
     # TODO: Test indices.
-    actual = tests.beagle.get_weights(genotyped_pos, ungenotyped_pos)
+    actual, _ = tests.beagle.get_weights(genotyped_pos, ungenotyped_pos)
     np.testing.assert_allclose(actual, expected)
     actual, _ = tests.beagle_numba.get_weights(genotyped_pos, ungenotyped_pos)
     np.testing.assert_allclose(actual, expected)