Skip to content

Commit

Permalink
Further renaming of variables
Browse files Browse the repository at this point in the history
  • Loading branch information
szhan committed Feb 23, 2024
1 parent b17659a commit 9bc2ea1
Showing 1 changed file with 23 additions and 23 deletions.
46 changes: 23 additions & 23 deletions python/tests/beagle_numba.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
"""
Implementation of the BEAGLE algorithm to impute alleles by linear interpolation of
state probabilities at ungenotyped markers in the HMM state probability matrix.
state probabilities at ungenotyped markers in the hidden state probability matrix.
This was implemented while closely consulting the BEAGLE 4.1 paper:
Browning & Browning (2016). Genotype imputation with millions of reference samples.
Expand All @@ -24,8 +24,8 @@
2. The ungenotyped positions take -1.
The forward and backward probability matrices are of size (m, h).
The HMM state probability matrix is of size (m, h).
The interpolated allele probability matrix is of size (x, 4),
The state probability matrix is of size (m, h).
The imputed allele probability matrix is of size (x, 4),
The imputed alleles are the maximum a posteriori (MAP) alleles.
To improve computational efficiency, BEAGLE uses aggregated markers,
Expand Down Expand Up @@ -206,7 +206,7 @@ def get_transition_probs(cm, h, ne):


@njit
def compute_forward_matrix(ref_h, query_h, rho, mu):
def compute_forward_matrix(ref_h, query_h, tp, mp):
"""
Implement forward algorithm to compute a forward probablity matrix of size (m, h).
Expand All @@ -215,8 +215,8 @@ def compute_forward_matrix(ref_h, query_h, rho, mu):
:param numpy.ndarray ref_h: Reference haplotypes.
:param numpy.ndarray query_h: One query haplotype.
:param numpy.ndarray rho: Per-site recombination rates.
:param numpy.ndarray mu: Per-site mutation rates.
:param numpy.ndarray tp: Per-site transition probabilities.
:param numpy.ndarray mp: Per-site mismatch probabilities.
:return: Forward probability matrix.
:rtype: numpy.ndarray
"""
Expand All @@ -226,15 +226,15 @@ def compute_forward_matrix(ref_h, query_h, rho, mu):
last_sum = 1.0 # Part of normalization factor.
for i in range(m):
# Get site-specific parameters.
shift = rho[i] / h
scale = (1 - rho[i]) / last_sum
shift = tp[i] / h
scale = (1 - tp[i]) / last_sum
# Get allele at genotyped marker i on query haplotype.
query_a = query_h[i]
for j in range(h):
# Get allele at genotyped marker i on reference haplotype j.
ref_a = ref_h[i, j]
# Get emission probability.
em_prob = mu[i] if query_a != ref_a else 1.0 - mu[i]
em_prob = mp[i] if query_a != ref_a else 1.0 - mp[i]
fm[i, j] = em_prob
if i == 0:
fm[i, j] *= 1.0 / h # Prior probabilities.
Expand All @@ -246,20 +246,20 @@ def compute_forward_matrix(ref_h, query_h, rho, mu):


@njit
def compute_backward_matrix(ref_h, query_h, rho, mu):
def compute_backward_matrix(ref_h, query_h, tp, mp):
"""
Implement backward algorithm to compute a backward probablity matrix of size (m, h).
Reference haplotypes and query haplotype are subsetted to genotyped markers.
Reference haplotypes and query haplotype are subsetted to genotyped positions.
So, they are a matrix of size (m, h) and an array of size m, respectively.
In BEAGLE 4.1, the values are kept one site at a time. Here, we keep the values
at all the sites.
In BEAGLE 4.1, the values are kept one position at a time. Here, we keep the values
at all the positions.
:param numpy.ndarray ref_h: Reference haplotypes.
:param numpy.ndarray query_h: One query haplotype.
:param numpy.ndarray rho: Per-site recombination rates.
:param numpy.ndarray mu: Per-site mutation rates.
:param numpy.ndarray tp: Per-site transition probabilities.
:param numpy.ndarray mp: Per-site mismatch probabilities.
:return: Backward probability matrix.
:rtype: numpy.ndarray
"""
Expand All @@ -271,28 +271,28 @@ def compute_backward_matrix(ref_h, query_h, rho, mu):
query_a = query_h[i + 1]
for j in range(h):
ref_a = ref_h[i + 1, j]
em_prob = mu[i + 1] if ref_a != query_a else 1.0 - mu[i + 1]
em_prob = mp[i + 1] if ref_a != query_a else 1.0 - mp[i + 1]
bm[i, j] = bm[i + 1, j] * em_prob
sum_site = np.sum(bm[i, :])
scale = (1 - rho[i + 1]) / sum_site
shift = rho[i + 1] / h
scale = (1 - tp[i + 1]) / sum_site
shift = tp[i + 1] / h
bm[i, :] = scale * bm[i, :] + shift
return bm


def compute_state_prob_matrix(fm, bm):
"""
Compute the HMM state probability matrix, in which each element is
the probability of a HMM state at a genotyped position.
Compute the hidden state probability matrix, in which each element is
the probability of an HMM state at a genotyped position.
The forward and backward probability matrices are of size (m, h).
The output HMM state probability matrix is of size (m, h).
The output hidden state probability matrix is of size (m, h).
Implementing this is simpler than faithfully reproducing BEAGLE.
Implementing this is simpler than faithfully reproducing BEAGLE 4.1.
:param numpy.ndarray fm: Forward probability matrix.
:param numpy.ndarray bm: Backward probability matrix.
:return: HMM state probability matrix.
:return: Hidden state probability matrix.
:rtype: numpy.ndarray
"""
assert fm.shape == bm.shape, "Forward and backward matrices differ in shape."
Expand Down

0 comments on commit 9bc2ea1

Please sign in to comment.