Skip to content

Commit

Permalink
Improve docs for get_weights
Browse files Browse the repository at this point in the history
  • Loading branch information
szhan committed Jan 14, 2024
1 parent 6f019fe commit 3205ee9
Showing 1 changed file with 9 additions and 8 deletions.
17 changes: 9 additions & 8 deletions python/tests/beagle_numba.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,11 +79,11 @@ def convert_to_genetic_map_positions(pos, genetic_map=None):
@njit
def get_weights(genotyped_pos, ungenotyped_pos, genotyped_cm, ungenotyped_cm):
"""
Get weights at ungenotyped markers in the query haplotype, which are used in
linear interpolation of HMM state probabilities at ungenotyped markers.
Get weights for ungenotyped markers in the query haplotype, which are used in
linear interpolation of HMM state probabilities at the ungenotyped markers.
In BB2016 (see below Equation 1), a weight between genotyped markers m and m + 1
at ungenotyped marker x is denoted lambda_m,x.
bounding ungenotyped marker x is denoted lambda_m,x.
lambda_m,x = [g(m + 1) - g(x)] / [g(m + 1) - g(m)],
where g(i) = genetic map position of marker i.
Expand All @@ -92,17 +92,18 @@ def get_weights(genotyped_pos, ungenotyped_pos, genotyped_cm, ungenotyped_cm):
:param numpy.ndarray ungenotyped_pos: Physical positions of ungenotyped markers.
:param numpy.ndarray genotyped_cm: Genetic map positions of genotyped markers.
:param numpy.ndarray ungenotyped_cm: Genetic map positions of ungenotyped markers.
:return: Weights and marker interval start indices.
:return: Weights for ungenotyped markers and indices of left-bounding markers.
:rtype: tuple(numpy.ndarray, numpy.ndarray)
"""
MIN_CM_DIST = 1e-7 # Avoid division by zero.
m = len(genotyped_pos)
x = len(ungenotyped_pos)
# Calculate weights for ungenotyped markers.
weights = np.zeros(x, dtype=np.float32)
# Identify genotype markers k and k + 1 bounding ungenotyped marker i.
marker_interval_start = np.zeros(x, dtype=np.int32)
# Identify genotype markers m and m+1 bounding ungenotyped marker i.
left_idx = np.zeros(x, dtype=np.int32)
# Going from right to left.
# TODO: Refactor using np.searchsorted().
idx_m = m - 1
for idx_x in range(x - 1, -1, -1):
while idx_m > 0 and ungenotyped_pos[idx_x] < genotyped_pos[idx_m]:
Expand All @@ -118,8 +119,8 @@ def get_weights(genotyped_pos, ungenotyped_pos, genotyped_cm, ungenotyped_cm):
cm_mP1_x = genotyped_cm[idx_m + 1] - ungenotyped_cm[idx_x]
cm_mP1_m = max(genotyped_cm[idx_m + 1] - genotyped_cm[idx_m], MIN_CM_DIST)
weights[idx_x] = cm_mP1_x / cm_mP1_m
marker_interval_start[idx_x] = idx_m
return (weights, marker_interval_start)
left_idx[idx_x] = idx_m
return (weights, left_idx)


def get_mismatch_prob(pos, error_rate):
Expand Down

0 comments on commit 3205ee9

Please sign in to comment.