Refactor and improve docs WIP

szhan · Sep 15, 2023 · 5ba6063 · 5ba6063
1 parent 2c943f5
commit 5ba6063
Showing 1 changed file with 12 additions and 17 deletions.
diff --git a/python/tests/beagle.py b/python/tests/beagle.py
@@ -345,52 +345,47 @@ def _compute_state_probability_matrix(fm, bm, ref_h, query_h, rho, mu):
 
 
 def _interpolate_allele_probabilities(
-    fwd_hap_probs, bwd_hap_probs, genotyped_pos, imputed_pos
+    fwd_hap_probs, bwd_hap_probs, genotyped_pos, ungenotyped_pos
 ):
     """
     WARN: This function is part of an attempt to faithfully reimplement
     the BEAGLE 4.1 algorithm. It is not complete and not used
     in the current implementation. It is kept for documentation.
 
-    Compute the interpolated allele probabilities at the imputed markers
-    of a query haplotype.
+    Compute the interpolated allele probabilities at the ungenotyped markers.
 
-    The forward and backward haplotype probability matrices are of size (m, 2),
-    where m is the number of genotyped markers.
-    Here, we assume all biallelic sites, hence the 2.
-
-    The interpolated allele probability matrix is of size (x, 2),
-    where x is the number of imputed markers.
-    Again, we assume all biallelic sites, hence the 2.
+    The forward and backward haplotype probability matrices are of size (m, 2).
+    The interpolated allele probability matrix is of size (x, 2).
+    We assume all biallelic sites, hence the 2.
 
     This implementation is based on Equation 2 (the rearranged one) in BB2016.
 
     :param numpy.ndarray fwd_hap_probs: Forward haplotype probability matrix.
     :param numpy.ndarray bwd_hap_probs: Backward haplotype probability matrix.
     :param numpy.ndarray genotyped_pos: Site positions of genotyped markers.
-    :param numpy.ndarray imputed_pos: Site positions of imputed markers.
+    :param numpy.ndarray ungenotyped_pos: Site positions of ungenotyped markers.
     :return: Interpolated allele probabilities.
     :rtype: numpy.ndarray
     """
     m = len(genotyped_pos)
-    x = len(imputed_pos)
+    x = len(ungenotyped_pos)
     assert (m, 2) == fwd_hap_probs.shape
     assert (m, 2) == bwd_hap_probs.shape
     genotyped_cm = convert_to_genetic_map_position(genotyped_pos)
     assert m == len(genotyped_cm)
-    imputed_cm = convert_to_genetic_map_position(imputed_pos)
+    imputed_cm = convert_to_genetic_map_position(ungenotyped_pos)
     assert x == len(imputed_cm)
     weights = get_weights(genotyped_cm, imputed_cm)
     assert x == len(weights)
     i_hap_probs = np.zeros((x, 2), dtype=np.float64)
     for i in np.arange(x):
-        # Identify the genotyped markers k and k + 1 between imputed marker i
-        if imputed_cm[i] < genotyped_cm[0]:
+        # Identify the genotyped markers k and k + 1 sandwiching ungenotyped marker i.
+        if ungenotyped_pos[i] < genotyped_pos[0]:
             k = 0
-        elif imputed_cm[i] > genotyped_cm[-1]:
+        elif ungenotyped_pos[i] > genotyped_pos[-1]:
             k = m - 2
         else:
-            k = np.max(np.where(imputed_cm[i] > genotyped_cm)[0])
+            k = np.max(np.where(ungenotyped_pos[i] > genotyped_pos)[0])
         # Vectorised over the allelic states
         i_hap_probs[i, :] = weights[i] * bwd_hap_probs[k, :]
         i_hap_probs[i, :] += (1 - weights[i]) * fwd_hap_probs[k, :]