Skip to content

Commit

Permalink
Return marker interval start indices
Browse files Browse the repository at this point in the history
  • Loading branch information
szhan committed Sep 17, 2023
1 parent 53ec5d4 commit 39fc022
Show file tree
Hide file tree
Showing 3 changed files with 10 additions and 8 deletions.
12 changes: 7 additions & 5 deletions python/tests/beagle.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,8 +73,8 @@ def get_weights(genotyped_pos, ungenotyped_pos):
:param numpy.ndarray genotyped_pos: Site positions of genotyped markers.
:param numpy.ndarray ungenotyped_pos: Site positions of ungenotyped markers.
:return: Weights used in interpolation.
:rtype: numpy.ndarray
:return: Weights and associated marker interval start indices.
:rtype: tuple(numpy.ndarray, numpy.ndarray)
"""
assert len(genotyped_pos) > 1, "There are fewer than two genotyped markers."
assert len(ungenotyped_pos) > 0, "There are no ungenotyped markers."
Expand All @@ -89,6 +89,7 @@ def get_weights(genotyped_pos, ungenotyped_pos):
# Calculate weights for ungenotyped markers.
weights = np.zeros(x, dtype=np.float64)
# Identify genotype markers k and k + 1 sandwiching marker i.
marker_interval_start = np.zeros(x, dtype=np.int64)
for i in np.arange(x):
if ungenotyped_pos[i] < genotyped_pos[0]:
# Ungenotyped marker is before the first genotyped marker.
Expand All @@ -104,9 +105,10 @@ def get_weights(genotyped_pos, ungenotyped_pos):
cm_mP1_x = genotyped_cm[k + 1] - ungenotyped_cm[i]
cm_mP1_m = np.max([genotyped_cm[k + 1] - genotyped_cm[k], MIN_CM_DIST])
weights[i] = cm_mP1_x / cm_mP1_m
marker_interval_start[i] = k
assert 0 <= np.min(weights), "Some weights are less than 1."
assert np.max(weights) <= 1, "Some weights are greater than 1."
return weights
return (weights, marker_interval_start)


def get_mismatch_prob(pos, miscall_rate):
Expand Down Expand Up @@ -376,7 +378,7 @@ def _interpolate_allele_probabilities(
assert m == len(genotyped_cm)
imputed_cm = convert_to_genetic_map_position(ungenotyped_pos)
assert x == len(imputed_cm)
weights = get_weights(genotyped_cm, imputed_cm)
weights, _ = get_weights(genotyped_cm, imputed_cm)
assert x == len(weights)
i_hap_probs = np.zeros((x, 2), dtype=np.float64)
for i in np.arange(x):
Expand Down Expand Up @@ -455,7 +457,7 @@ def interpolate_allele_probabilities(sm, ref_h, genotyped_pos, ungenotyped_pos):
assert m == len(genotyped_pos)
x = len(ungenotyped_pos)
assert (x, h) == ref_h.shape
weights = get_weights(genotyped_pos, ungenotyped_pos)
weights, _ = get_weights(genotyped_pos, ungenotyped_pos)
assert x == len(weights)
p = np.zeros((x, len(alleles)), dtype=np.float64)
# Compute allele probabilities as per Equation 1 in BB2016.
Expand Down
4 changes: 2 additions & 2 deletions python/tests/beagle_numba.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,8 +39,8 @@ def get_weights(genotyped_pos, ungenotyped_pos):
:param numpy.ndarray genotyped_pos: Site positions of genotyped markers.
:param numpy.ndarray ungenotyped_pos: Site positions of ungenotyped markers.
:return: Weights used in interpolation.
:rtype: numpy.ndarray
:return: Weights and marker interval start indices.
:rtype: tuple(numpy.ndarray, numpy.ndarray)
"""
m = len(genotyped_pos)
x = len(ungenotyped_pos)
Expand Down
2 changes: 1 addition & 1 deletion python/tests/test_imputation.py
Original file line number Diff line number Diff line change
Expand Up @@ -670,7 +670,7 @@ def test_tsimpute(input_ref, input_query):
)
def test_get_weights(genotyped_pos, ungenotyped_pos, expected):
# TODO: Test indices.
actual = tests.beagle.get_weights(genotyped_pos, ungenotyped_pos)
actual, _ = tests.beagle.get_weights(genotyped_pos, ungenotyped_pos)
np.testing.assert_allclose(actual, expected)
actual, _ = tests.beagle_numba.get_weights(genotyped_pos, ungenotyped_pos)
np.testing.assert_allclose(actual, expected)

0 comments on commit 39fc022

Please sign in to comment.