Further renaming of variables

szhan · Feb 23, 2024 · 9bc2ea1 · 9bc2ea1
1 parent b17659a
commit 9bc2ea1
Showing 1 changed file with 23 additions and 23 deletions.
diff --git a/python/tests/beagle_numba.py b/python/tests/beagle_numba.py
@@ -1,6 +1,6 @@
 """
 Implementation of the BEAGLE algorithm to impute alleles by linear interpolation of
-state probabilities at ungenotyped markers in the HMM state probability matrix.
+state probabilities at ungenotyped markers in the hidden state probability matrix.
 
 This was implemented while closely consulting the BEAGLE 4.1 paper:
 Browning & Browning (2016). Genotype imputation with millions of reference samples.
@@ -24,8 +24,8 @@
 2. The ungenotyped positions take -1.
 
 The forward and backward probability matrices are of size (m, h).
-The HMM state probability matrix is of size (m, h).
-The interpolated allele probability matrix is of size (x, 4),
+The state probability matrix is of size (m, h).
+The imputed allele probability matrix is of size (x, 4),
 The imputed alleles are the maximum a posteriori (MAP) alleles.
 
 To improve computational efficiency, BEAGLE uses aggregated markers,
@@ -206,7 +206,7 @@ def get_transition_probs(cm, h, ne):
 
 
 @njit
-def compute_forward_matrix(ref_h, query_h, rho, mu):
+def compute_forward_matrix(ref_h, query_h, tp, mp):
     """
     Implement forward algorithm to compute a forward probablity matrix of size (m, h).
 
@@ -215,8 +215,8 @@ def compute_forward_matrix(ref_h, query_h, rho, mu):
 
     :param numpy.ndarray ref_h: Reference haplotypes.
     :param numpy.ndarray query_h: One query haplotype.
-    :param numpy.ndarray rho: Per-site recombination rates.
-    :param numpy.ndarray mu: Per-site mutation rates.
+    :param numpy.ndarray tp: Per-site transition probabilities.
+    :param numpy.ndarray mp: Per-site mismatch probabilities.
     :return: Forward probability matrix.
     :rtype: numpy.ndarray
     """
@@ -226,15 +226,15 @@ def compute_forward_matrix(ref_h, query_h, rho, mu):
     last_sum = 1.0  # Part of normalization factor.
     for i in range(m):
         # Get site-specific parameters.
-        shift = rho[i] / h
-        scale = (1 - rho[i]) / last_sum
+        shift = tp[i] / h
+        scale = (1 - tp[i]) / last_sum
         # Get allele at genotyped marker i on query haplotype.
         query_a = query_h[i]
         for j in range(h):
             # Get allele at genotyped marker i on reference haplotype j.
             ref_a = ref_h[i, j]
             # Get emission probability.
-            em_prob = mu[i] if query_a != ref_a else 1.0 - mu[i]
+            em_prob = mp[i] if query_a != ref_a else 1.0 - mp[i]
             fm[i, j] = em_prob
             if i == 0:
                 fm[i, j] *= 1.0 / h  # Prior probabilities.
@@ -246,20 +246,20 @@ def compute_forward_matrix(ref_h, query_h, rho, mu):
 
 
 @njit
-def compute_backward_matrix(ref_h, query_h, rho, mu):
+def compute_backward_matrix(ref_h, query_h, tp, mp):
     """
     Implement backward algorithm to compute a backward probablity matrix of size (m, h).
 
-    Reference haplotypes and query haplotype are subsetted to genotyped markers.
+    Reference haplotypes and query haplotype are subsetted to genotyped positions.
     So, they are a matrix of size (m, h) and an array of size m, respectively.
 
-    In BEAGLE 4.1, the values are kept one site at a time. Here, we keep the values
-    at all the sites.
+    In BEAGLE 4.1, the values are kept one position at a time. Here, we keep the values
+    at all the positions.
 
     :param numpy.ndarray ref_h: Reference haplotypes.
     :param numpy.ndarray query_h: One query haplotype.
-    :param numpy.ndarray rho: Per-site recombination rates.
-    :param numpy.ndarray mu: Per-site mutation rates.
+    :param numpy.ndarray tp: Per-site transition probabilities.
+    :param numpy.ndarray mp: Per-site mismatch probabilities.
     :return: Backward probability matrix.
     :rtype: numpy.ndarray
     """
@@ -271,28 +271,28 @@ def compute_backward_matrix(ref_h, query_h, rho, mu):
         query_a = query_h[i + 1]
         for j in range(h):
             ref_a = ref_h[i + 1, j]
-            em_prob = mu[i + 1] if ref_a != query_a else 1.0 - mu[i + 1]
+            em_prob = mp[i + 1] if ref_a != query_a else 1.0 - mp[i + 1]
             bm[i, j] = bm[i + 1, j] * em_prob
         sum_site = np.sum(bm[i, :])
-        scale = (1 - rho[i + 1]) / sum_site
-        shift = rho[i + 1] / h
+        scale = (1 - tp[i + 1]) / sum_site
+        shift = tp[i + 1] / h
         bm[i, :] = scale * bm[i, :] + shift
     return bm
 
 
 def compute_state_prob_matrix(fm, bm):
     """
-    Compute the HMM state probability matrix, in which each element is
-    the probability of a HMM state at a genotyped position.
+    Compute the hidden state probability matrix, in which each element is
+    the probability of an HMM state at a genotyped position.
 
     The forward and backward probability matrices are of size (m, h).
-    The output HMM state probability matrix is of size (m, h).
+    The output hidden state probability matrix is of size (m, h).
 
-    Implementing this is simpler than faithfully reproducing BEAGLE.
+    Implementing this is simpler than faithfully reproducing BEAGLE 4.1.
 
     :param numpy.ndarray fm: Forward probability matrix.
     :param numpy.ndarray bm: Backward probability matrix.
-    :return: HMM state probability matrix.
+    :return: Hidden state probability matrix.
     :rtype: numpy.ndarray
     """
     assert fm.shape == bm.shape, "Forward and backward matrices differ in shape."