Skip to content

Commit

Permalink
Refactor and improve docs WIP
Browse files Browse the repository at this point in the history
  • Loading branch information
szhan committed Sep 15, 2023
1 parent 2c943f5 commit 5ba6063
Showing 1 changed file with 12 additions and 17 deletions.
29 changes: 12 additions & 17 deletions python/tests/beagle.py
Original file line number Diff line number Diff line change
Expand Up @@ -345,52 +345,47 @@ def _compute_state_probability_matrix(fm, bm, ref_h, query_h, rho, mu):


def _interpolate_allele_probabilities(
fwd_hap_probs, bwd_hap_probs, genotyped_pos, imputed_pos
fwd_hap_probs, bwd_hap_probs, genotyped_pos, ungenotyped_pos
):
"""
WARN: This function is part of an attempt to faithfully reimplement
the BEAGLE 4.1 algorithm. It is not complete and not used
in the current implementation. It is kept for documentation.
Compute the interpolated allele probabilities at the imputed markers
of a query haplotype.
Compute the interpolated allele probabilities at the ungenotyped markers.
The forward and backward haplotype probability matrices are of size (m, 2),
where m is the number of genotyped markers.
Here, we assume all biallelic sites, hence the 2.
The interpolated allele probability matrix is of size (x, 2),
where x is the number of imputed markers.
Again, we assume all biallelic sites, hence the 2.
The forward and backward haplotype probability matrices are of size (m, 2).
The interpolated allele probability matrix is of size (x, 2).
We assume all biallelic sites, hence the 2.
This implementation is based on Equation 2 (the rearranged one) in BB2016.
:param numpy.ndarray fwd_hap_probs: Forward haplotype probability matrix.
:param numpy.ndarray bwd_hap_probs: Backward haplotype probability matrix.
:param numpy.ndarray genotyped_pos: Site positions of genotyped markers.
:param numpy.ndarray imputed_pos: Site positions of imputed markers.
:param numpy.ndarray ungenotyped_pos: Site positions of ungenotyped markers.
:return: Interpolated allele probabilities.
:rtype: numpy.ndarray
"""
m = len(genotyped_pos)
x = len(imputed_pos)
x = len(ungenotyped_pos)
assert (m, 2) == fwd_hap_probs.shape
assert (m, 2) == bwd_hap_probs.shape
genotyped_cm = convert_to_genetic_map_position(genotyped_pos)
assert m == len(genotyped_cm)
imputed_cm = convert_to_genetic_map_position(imputed_pos)
imputed_cm = convert_to_genetic_map_position(ungenotyped_pos)
assert x == len(imputed_cm)
weights = get_weights(genotyped_cm, imputed_cm)
assert x == len(weights)
i_hap_probs = np.zeros((x, 2), dtype=np.float64)
for i in np.arange(x):
# Identify the genotyped markers k and k + 1 between imputed marker i
if imputed_cm[i] < genotyped_cm[0]:
# Identify the genotyped markers k and k + 1 sandwiching ungenotyped marker i.
if ungenotyped_pos[i] < genotyped_pos[0]:
k = 0
elif imputed_cm[i] > genotyped_cm[-1]:
elif ungenotyped_pos[i] > genotyped_pos[-1]:
k = m - 2
else:
k = np.max(np.where(imputed_cm[i] > genotyped_cm)[0])
k = np.max(np.where(ungenotyped_pos[i] > genotyped_pos)[0])
# Vectorised over the allelic states
i_hap_probs[i, :] = weights[i] * bwd_hap_probs[k, :]
i_hap_probs[i, :] += (1 - weights[i]) * fwd_hap_probs[k, :]
Expand Down

0 comments on commit 5ba6063

Please sign in to comment.