Improve docs for get_weights

szhan · Jan 14, 2024 · 3205ee9 · 3205ee9
1 parent 6f019fe
commit 3205ee9
Showing 1 changed file with 9 additions and 8 deletions.
diff --git a/python/tests/beagle_numba.py b/python/tests/beagle_numba.py
@@ -79,11 +79,11 @@ def convert_to_genetic_map_positions(pos, genetic_map=None):
 @njit
 def get_weights(genotyped_pos, ungenotyped_pos, genotyped_cm, ungenotyped_cm):
     """
-    Get weights at ungenotyped markers in the query haplotype, which are used in
-    linear interpolation of HMM state probabilities at ungenotyped markers.
+    Get weights for ungenotyped markers in the query haplotype, which are used in
+    linear interpolation of HMM state probabilities at the ungenotyped markers.
 
     In BB2016 (see below Equation 1), a weight between genotyped markers m and m + 1
-    at ungenotyped marker x is denoted lambda_m,x.
+    bounding ungenotyped marker x is denoted lambda_m,x.
 
     lambda_m,x = [g(m + 1) - g(x)] / [g(m + 1) - g(m)],
     where g(i) = genetic map position of marker i.
@@ -92,17 +92,18 @@ def get_weights(genotyped_pos, ungenotyped_pos, genotyped_cm, ungenotyped_cm):
     :param numpy.ndarray ungenotyped_pos: Physical positions of ungenotyped markers.
     :param numpy.ndarray genotyped_cm: Genetic map positions of genotyped markers.
     :param numpy.ndarray ungenotyped_cm: Genetic map positions of ungenotyped markers.
-    :return: Weights and marker interval start indices.
+    :return: Weights for ungenotyped markers and indices of left-bounding markers.
     :rtype: tuple(numpy.ndarray, numpy.ndarray)
     """
     MIN_CM_DIST = 1e-7  # Avoid division by zero.
     m = len(genotyped_pos)
     x = len(ungenotyped_pos)
     # Calculate weights for ungenotyped markers.
     weights = np.zeros(x, dtype=np.float32)
-    # Identify genotype markers k and k + 1 bounding ungenotyped marker i.
-    marker_interval_start = np.zeros(x, dtype=np.int32)
+    # Identify genotype markers m and m+1 bounding ungenotyped marker i.
+    left_idx = np.zeros(x, dtype=np.int32)
     # Going from right to left.
+    # TODO: Refactor using np.searchsorted().
     idx_m = m - 1
     for idx_x in range(x - 1, -1, -1):
         while idx_m > 0 and ungenotyped_pos[idx_x] < genotyped_pos[idx_m]:
@@ -118,8 +119,8 @@ def get_weights(genotyped_pos, ungenotyped_pos, genotyped_cm, ungenotyped_cm):
             cm_mP1_x = genotyped_cm[idx_m + 1] - ungenotyped_cm[idx_x]
             cm_mP1_m = max(genotyped_cm[idx_m + 1] - genotyped_cm[idx_m], MIN_CM_DIST)
             weights[idx_x] = cm_mP1_x / cm_mP1_m
-        marker_interval_start[idx_x] = idx_m
-    return (weights, marker_interval_start)
+        left_idx[idx_x] = idx_m
+    return (weights, left_idx)
 
 
 def get_mismatch_prob(pos, error_rate):