From 39fc0228f3d4eea9416f0ddda2a164dffab95db2 Mon Sep 17 00:00:00 2001 From: Shing Zhan Date: Sun, 17 Sep 2023 10:14:33 +0100 Subject: [PATCH] Return marker interval start indices --- python/tests/beagle.py | 12 +++++++----- python/tests/beagle_numba.py | 4 ++-- python/tests/test_imputation.py | 2 +- 3 files changed, 10 insertions(+), 8 deletions(-) diff --git a/python/tests/beagle.py b/python/tests/beagle.py index 59c691bb44..5f570a7468 100644 --- a/python/tests/beagle.py +++ b/python/tests/beagle.py @@ -73,8 +73,8 @@ def get_weights(genotyped_pos, ungenotyped_pos): :param numpy.ndarray genotyped_pos: Site positions of genotyped markers. :param numpy.ndarray ungenotyped_pos: Site positions of ungenotyped markers. - :return: Weights used in interpolation. - :rtype: numpy.ndarray + :return: Weights and associated marker interval start indices. + :rtype: tuple(numpy.ndarray, numpy.ndarray) """ assert len(genotyped_pos) > 1, "There are fewer than two genotyped markers." assert len(ungenotyped_pos) > 0, "There are no ungenotyped markers." @@ -89,6 +89,7 @@ def get_weights(genotyped_pos, ungenotyped_pos): # Calculate weights for ungenotyped markers. weights = np.zeros(x, dtype=np.float64) # Identify genotype markers k and k + 1 sandwiching marker i. + marker_interval_start = np.zeros(x, dtype=np.int64) for i in np.arange(x): if ungenotyped_pos[i] < genotyped_pos[0]: # Ungenotyped marker is before the first genotyped marker. @@ -104,9 +105,10 @@ def get_weights(genotyped_pos, ungenotyped_pos): cm_mP1_x = genotyped_cm[k + 1] - ungenotyped_cm[i] cm_mP1_m = np.max([genotyped_cm[k + 1] - genotyped_cm[k], MIN_CM_DIST]) weights[i] = cm_mP1_x / cm_mP1_m + marker_interval_start[i] = k assert 0 <= np.min(weights), "Some weights are less than 1." assert np.max(weights) <= 1, "Some weights are greater than 1." - return weights + return (weights, marker_interval_start) def get_mismatch_prob(pos, miscall_rate): @@ -376,7 +378,7 @@ def _interpolate_allele_probabilities( assert m == len(genotyped_cm) imputed_cm = convert_to_genetic_map_position(ungenotyped_pos) assert x == len(imputed_cm) - weights = get_weights(genotyped_cm, imputed_cm) + weights, _ = get_weights(genotyped_cm, imputed_cm) assert x == len(weights) i_hap_probs = np.zeros((x, 2), dtype=np.float64) for i in np.arange(x): @@ -455,7 +457,7 @@ def interpolate_allele_probabilities(sm, ref_h, genotyped_pos, ungenotyped_pos): assert m == len(genotyped_pos) x = len(ungenotyped_pos) assert (x, h) == ref_h.shape - weights = get_weights(genotyped_pos, ungenotyped_pos) + weights, _ = get_weights(genotyped_pos, ungenotyped_pos) assert x == len(weights) p = np.zeros((x, len(alleles)), dtype=np.float64) # Compute allele probabilities as per Equation 1 in BB2016. diff --git a/python/tests/beagle_numba.py b/python/tests/beagle_numba.py index 2731a35422..0828484f66 100644 --- a/python/tests/beagle_numba.py +++ b/python/tests/beagle_numba.py @@ -39,8 +39,8 @@ def get_weights(genotyped_pos, ungenotyped_pos): :param numpy.ndarray genotyped_pos: Site positions of genotyped markers. :param numpy.ndarray ungenotyped_pos: Site positions of ungenotyped markers. - :return: Weights used in interpolation. - :rtype: numpy.ndarray + :return: Weights and marker interval start indices. + :rtype: tuple(numpy.ndarray, numpy.ndarray) """ m = len(genotyped_pos) x = len(ungenotyped_pos) diff --git a/python/tests/test_imputation.py b/python/tests/test_imputation.py index 83bead3dd6..f30f9d043f 100644 --- a/python/tests/test_imputation.py +++ b/python/tests/test_imputation.py @@ -670,7 +670,7 @@ def test_tsimpute(input_ref, input_query): ) def test_get_weights(genotyped_pos, ungenotyped_pos, expected): # TODO: Test indices. - actual = tests.beagle.get_weights(genotyped_pos, ungenotyped_pos) + actual, _ = tests.beagle.get_weights(genotyped_pos, ungenotyped_pos) np.testing.assert_allclose(actual, expected) actual, _ = tests.beagle_numba.get_weights(genotyped_pos, ungenotyped_pos) np.testing.assert_allclose(actual, expected)