Skip to content

Commit

Permalink
Implement MIN_CM_DIST in genetic map position calculations instead of…
Browse files Browse the repository at this point in the history
… get_weights
  • Loading branch information
szhan committed Feb 27, 2024
1 parent eabeece commit f2b2c47
Showing 1 changed file with 6 additions and 3 deletions.
9 changes: 6 additions & 3 deletions python/tests/beagle_numba.py
Original file line number Diff line number Diff line change
Expand Up @@ -222,6 +222,8 @@ def convert_to_genetic_map_positions(pos, *, genetic_map=None):
:return: Genetic map positions (cM).
:rtype: numpy.ndarray
"""
# See 'cumPos' in 'ImputationData.java' in BEAGLE 4.1 source code.
_MIN_CM_DIST = 1e-7
if genetic_map is None:
return pos / 1e6
assert np.all(pos >= genetic_map.base_pos[0]) and np.all(
Expand All @@ -242,6 +244,9 @@ def convert_to_genetic_map_positions(pos, *, genetic_map=None):
), f"Genetic map positions not in monotonically ascending order: {fb}, {fa}."
est_cm[i] = fa
est_cm[i] += (fb - fa) * (pos[i] - a) / (b - a)
if i > 0:
if est_cm[i] - est_cm[i - 1] < _MIN_CM_DIST:
est_cm[i] = _MIN_CM_DIST
return est_cm


Expand Down Expand Up @@ -422,7 +427,6 @@ def get_weights(typed_pos, untyped_pos, typed_cm, untyped_cm):
:return: Weights for ungenotyped positions and indices of right-bounding positions.
:rtype: tuple(numpy.ndarray, numpy.ndarray)
"""
_MIN_CM_DIST = 1e-7 # Avoid division by zero.
m = len(typed_pos)
x = len(untyped_pos)
# Identify genotype positions (m - 1) and m bounding ungenotyped position i.
Expand All @@ -441,8 +445,7 @@ def get_weights(typed_pos, untyped_pos, typed_cm, untyped_cm):
else:
# Between two genotyped positions.
cm_m2x = typed_cm[k] - untyped_cm[i]
# TODO: Check if this a subtle source of error.
cm_m2mM1 = max(typed_cm[k] - typed_cm[k - 1], _MIN_CM_DIST)
cm_m2mM1 = typed_cm[k] - typed_cm[k - 1]
weights[i] = cm_m2x / cm_m2mM1
return (weights, right_idx)

Expand Down

0 comments on commit f2b2c47

Please sign in to comment.