From f8c6a5f4dfec1143a588fcc9e142bd79adc4d157 Mon Sep 17 00:00:00 2001
From: szhan <shing.zhan@gmail.com>
Date: Sat, 20 Apr 2024 17:18:41 +0100
Subject: [PATCH] Clean up and fixes

---
 lshmm/fb_diploid.py  | 10 +++++-----
 lshmm/vit_diploid.py |  6 +++---
 lshmm/vit_haploid.py | 26 ++++++++++++++------------
 tests/test_API.py    |  2 +-
 4 files changed, 23 insertions(+), 21 deletions(-)

diff --git a/lshmm/fb_diploid.py b/lshmm/fb_diploid.py
index 919febb..1b3a170 100644
--- a/lshmm/fb_diploid.py
+++ b/lshmm/fb_diploid.py
@@ -11,7 +11,7 @@
 
 
 def forwards_ls_dip(n, m, G, s, e, r, norm=True):
-    """A matrix-based implementation using Numpy vectorisation."""
+    """A matrix-based implementation using Numpy."""
     # Initialise
     F = np.zeros((m, n, n))
     F[0, :, :] = 1 / (n**2)
@@ -76,7 +76,7 @@ def forwards_ls_dip(n, m, G, s, e, r, norm=True):
 
 
 def backwards_ls_dip(n, m, G, s, e, c, r):
-    """A matrix-based implementation using Numpy vectorisation."""
+    """A matrix-based implementation using Numpy."""
     # Initialise
     B = np.zeros((m, n, n))
     B[m - 1, :, :] = 1
@@ -114,7 +114,7 @@ def backwards_ls_dip(n, m, G, s, e, c, r):
 
 @jit.numba_njit
 def forward_ls_dip_starting_point(n, m, G, s, e, r):
-    """Naive implementation of LS diploid forwards algorithm."""
+    """A naive implementation."""
     # Initialise
     F = np.zeros((m, n, n))
     r_n = r / n
@@ -265,7 +265,7 @@ def backward_ls_dip_starting_point(n, m, G, s, e, r):
 
 @jit.numba_njit
 def forward_ls_dip_loop(n, m, G, s, e, r, norm=True):
-    """LS diploid forwards algorithm without vectorisation."""
+    """An implementation without vectorisation."""
     # Initialise
     F = np.zeros((m, n, n))
     for j1 in range(n):
@@ -377,7 +377,7 @@ def forward_ls_dip_loop(n, m, G, s, e, r, norm=True):
 
 @jit.numba_njit
 def backward_ls_dip_loop(n, m, G, s, e, c, r):
-    """LS diploid backwards algoritm without vectorisation."""
+    """An implementation without vectorisation."""
     # Initialise
     B = np.zeros((m, n, n))
     B[m - 1, :, :] = 1
diff --git a/lshmm/vit_diploid.py b/lshmm/vit_diploid.py
index f28567d..849d049 100644
--- a/lshmm/vit_diploid.py
+++ b/lshmm/vit_diploid.py
@@ -402,10 +402,10 @@ def backwards_viterbi_dip_no_pointer(
     # Backtrace
     for l in range(m - 2, -1, -1):
         current_best_template = path[l + 1]
-        # if current_best_template in recombs_double[l + 1]:
+        # Current_best_template in recombs_double[l + 1]
         if in_list(recombs_double[l + 1], current_best_template):
             current_best_template = V_argmaxes[l]
-        # elif current_best_template in recombs_single[l + 1]:
+        # Current_best_template in recombs_single[l + 1]
         elif in_list(recombs_single[l + 1], current_best_template):
             (j1, j2) = divmod(current_best_template, n)
             if V_rowcol_maxes[l, j1] > V_rowcol_maxes[l, j2]:
@@ -423,7 +423,7 @@ def get_phased_path(n, path):
 
 @jit.numba_njit
 def path_ll_dip(n, m, G, phased_path, s, e, r):
-    """Evaluate log-likelihood path through a reference panel which results in sequence s."""
+    """Evaluate log-likelihood path through a reference panel which results in sequence."""
     emission_index = core.get_index_in_emission_matrix_diploid(
         ref_allele=G[0, phased_path[0][0], phased_path[1][0]], query_allele=s[0, 0]
     )
diff --git a/lshmm/vit_haploid.py b/lshmm/vit_haploid.py
index b296516..6a4d7d7 100644
--- a/lshmm/vit_haploid.py
+++ b/lshmm/vit_haploid.py
@@ -17,7 +17,7 @@ def viterbi_naive_init(n, m, H, s, e, r):
     r_n = r / n
 
     for i in range(n):
-        emission_idx = core.get_index_in_emission_matrix(
+        emission_idx = core.get_index_in_emission_matrix_haploid(
             ref_allele=H[0, i], query_allele=s[0, 0]
         )
         V[0, i] = 1 / n * e[0, emission_idx]
@@ -34,7 +34,7 @@ def viterbi_init(n, m, H, s, e, r):
     r_n = r / n
 
     for i in range(n):
-        emission_idx = core.get_index_in_emission_matrix(
+        emission_idx = core.get_index_in_emission_matrix_haploid(
             ref_allele=H[0, i], query_allele=s[0, 0]
         )
         V_prev[i] = 1 / n * e[0, emission_idx]
@@ -51,7 +51,7 @@ def forwards_viterbi_hap_naive(n, m, H, s, e, r):
         for i in range(n):
             v = np.zeros(n)
             for k in range(n):
-                emission_idx = core.get_index_in_emission_matrix(
+                emission_idx = core.get_index_in_emission_matrix_haploid(
                     ref_allele=H[j, i], query_allele=s[0, j]
                 )
                 v[k] = V[j - 1, k] * e[j, emission_idx]
@@ -77,7 +77,7 @@ def forwards_viterbi_hap_naive_vec(n, m, H, s, e, r):
         for i in range(n):
             v = np.copy(v_tmp)
             v[i] += V[j - 1, i] * (1 - r[j])
-            emission_idx = core.get_index_in_emission_matrix(
+            emission_idx = core.get_index_in_emission_matrix_haploid(
                 ref_allele=H[j, i], query_allele=s[0, j]
             )
             v *= e[j, emission_idx]
@@ -98,7 +98,7 @@ def forwards_viterbi_hap_naive_low_mem(n, m, H, s, e, r):
         for i in range(n):
             v = np.zeros(n)
             for k in range(n):
-                emission_idx = core.get_index_in_emission_matrix(
+                emission_idx = core.get_index_in_emission_matrix_haploid(
                     ref_allele=H[j, i], query_allele=s[0, j]
                 )
                 v[k] = V_prev[k] * e[j, emission_idx]
@@ -127,7 +127,7 @@ def forwards_viterbi_hap_naive_low_mem_rescaling(n, m, H, s, e, r):
         for i in range(n):
             v = np.zeros(n)
             for k in range(n):
-                emission_idx = core.get_index_in_emission_matrix(
+                emission_idx = core.get_index_in_emission_matrix_haploid(
                     ref_allele=H[j, i], query_allele=s[0, j]
                 )
                 v[k] = V_prev[k] * e[j, emission_idx]
@@ -177,7 +177,7 @@ def forwards_viterbi_hap_lower_mem_rescaling(n, m, H, s, e, r):
     """An implementation with even smaller memory footprint that exploits the Markov structure."""
     V = np.zeros(n)
     for i in range(n):
-        emission_idx = core.get_index_in_emission_matrix(
+        emission_idx = core.get_index_in_emission_matrix_haploid(
             ref_allele=H[0, i], query_allele=s[0, 0]
         )
         V[i] = 1 / n * e[0, emission_idx]
@@ -195,7 +195,7 @@ def forwards_viterbi_hap_lower_mem_rescaling(n, m, H, s, e, r):
             if V[i] < r_n[j]:
                 V[i] = r_n[j]
                 P[j, i] = argmax
-            emission_idx = core.get_index_in_emission_matrix(
+            emission_idx = core.get_index_in_emission_matrix_haploid(
                 ref_allele=H[j, i], query_allele=s[0, j]
             )
             V[i] *= e[j, emission_idx]
@@ -213,7 +213,7 @@ def forwards_viterbi_hap_lower_mem_rescaling_no_pointer(n, m, H, s, e, r):
     """
     V = np.zeros(n)
     for i in range(n):
-        emission_idx = core.get_index_in_emission_matrix(
+        emission_idx = core.get_index_in_emission_matrix_haploid(
             ref_allele=H[0, i], query_allele=s[0, 0]
         )
         V[i] = 1 / n * e[0, emission_idx]
@@ -237,7 +237,9 @@ def forwards_viterbi_hap_lower_mem_rescaling_no_pointer(n, m, H, s, e, r):
                 recombs[j] = np.append(
                     recombs[j], i
                 )  # We add template i as a potential template to recombine to at site j.
-            emission_idx = core.get_index_in_emission_matrix(H[j, i], s[0, j])
+            emission_idx = core.get_index_in_emission_matrix_haploid(
+                ref_allele=H[j, i], query_allele=s[0, j]
+            )
             V[i] *= e[j, emission_idx]
 
     V_argmaxes[m - 1] = np.argmax(V)
@@ -278,7 +280,7 @@ def backwards_viterbi_hap_no_pointer(m, V_argmaxes, recombs):
 @jit.numba_njit
 def path_ll_hap(n, m, H, path, s, e, r):
     """Evaluate the log-likelihood of a path through a reference panel resulting in a sequence."""
-    emission_idx = core.get_index_in_emission_matrix(
+    emission_idx = core.get_index_in_emission_matrix_haploid(
         ref_allele=H[0, path[0]], query_allele=s[0, 0]
     )
     log_prob_path = np.log10((1 / n) * e[0, emission_idx])
@@ -286,7 +288,7 @@ def path_ll_hap(n, m, H, path, s, e, r):
     r_n = r / n
 
     for l in range(1, m):
-        emission_idx = core.get_index_in_emission_matrix(
+        emission_idx = core.get_index_in_emission_matrix_haploid(
             ref_allele=H[l, path[l]], query_allele=s[0, l]
         )
         current = path[l]
diff --git a/tests/test_API.py b/tests/test_API.py
index fbc6f93..f65646f 100644
--- a/tests/test_API.py
+++ b/tests/test_API.py
@@ -133,6 +133,6 @@ def test_simple_n8_high_recombination(self):
         ts = self.get_simple_n8_high_recombination()
         self.verify(ts)
 
-    def test_simple_n_16(self):
+    def test_simple_n16(self):
         ts = self.get_simple_n16()
         self.verify(ts)