From f8c6a5f4dfec1143a588fcc9e142bd79adc4d157 Mon Sep 17 00:00:00 2001 From: szhan Date: Sat, 20 Apr 2024 17:18:41 +0100 Subject: [PATCH] Clean up and fixes --- lshmm/fb_diploid.py | 10 +++++----- lshmm/vit_diploid.py | 6 +++--- lshmm/vit_haploid.py | 26 ++++++++++++++------------ tests/test_API.py | 2 +- 4 files changed, 23 insertions(+), 21 deletions(-) diff --git a/lshmm/fb_diploid.py b/lshmm/fb_diploid.py index 919febb..1b3a170 100644 --- a/lshmm/fb_diploid.py +++ b/lshmm/fb_diploid.py @@ -11,7 +11,7 @@ def forwards_ls_dip(n, m, G, s, e, r, norm=True): - """A matrix-based implementation using Numpy vectorisation.""" + """A matrix-based implementation using Numpy.""" # Initialise F = np.zeros((m, n, n)) F[0, :, :] = 1 / (n**2) @@ -76,7 +76,7 @@ def forwards_ls_dip(n, m, G, s, e, r, norm=True): def backwards_ls_dip(n, m, G, s, e, c, r): - """A matrix-based implementation using Numpy vectorisation.""" + """A matrix-based implementation using Numpy.""" # Initialise B = np.zeros((m, n, n)) B[m - 1, :, :] = 1 @@ -114,7 +114,7 @@ def backwards_ls_dip(n, m, G, s, e, c, r): @jit.numba_njit def forward_ls_dip_starting_point(n, m, G, s, e, r): - """Naive implementation of LS diploid forwards algorithm.""" + """A naive implementation.""" # Initialise F = np.zeros((m, n, n)) r_n = r / n @@ -265,7 +265,7 @@ def backward_ls_dip_starting_point(n, m, G, s, e, r): @jit.numba_njit def forward_ls_dip_loop(n, m, G, s, e, r, norm=True): - """LS diploid forwards algorithm without vectorisation.""" + """An implementation without vectorisation.""" # Initialise F = np.zeros((m, n, n)) for j1 in range(n): @@ -377,7 +377,7 @@ def forward_ls_dip_loop(n, m, G, s, e, r, norm=True): @jit.numba_njit def backward_ls_dip_loop(n, m, G, s, e, c, r): - """LS diploid backwards algoritm without vectorisation.""" + """An implementation without vectorisation.""" # Initialise B = np.zeros((m, n, n)) B[m - 1, :, :] = 1 diff --git a/lshmm/vit_diploid.py b/lshmm/vit_diploid.py index f28567d..849d049 100644 --- a/lshmm/vit_diploid.py +++ b/lshmm/vit_diploid.py @@ -402,10 +402,10 @@ def backwards_viterbi_dip_no_pointer( # Backtrace for l in range(m - 2, -1, -1): current_best_template = path[l + 1] - # if current_best_template in recombs_double[l + 1]: + # Current_best_template in recombs_double[l + 1] if in_list(recombs_double[l + 1], current_best_template): current_best_template = V_argmaxes[l] - # elif current_best_template in recombs_single[l + 1]: + # Current_best_template in recombs_single[l + 1] elif in_list(recombs_single[l + 1], current_best_template): (j1, j2) = divmod(current_best_template, n) if V_rowcol_maxes[l, j1] > V_rowcol_maxes[l, j2]: @@ -423,7 +423,7 @@ def get_phased_path(n, path): @jit.numba_njit def path_ll_dip(n, m, G, phased_path, s, e, r): - """Evaluate log-likelihood path through a reference panel which results in sequence s.""" + """Evaluate log-likelihood path through a reference panel which results in sequence.""" emission_index = core.get_index_in_emission_matrix_diploid( ref_allele=G[0, phased_path[0][0], phased_path[1][0]], query_allele=s[0, 0] ) diff --git a/lshmm/vit_haploid.py b/lshmm/vit_haploid.py index b296516..6a4d7d7 100644 --- a/lshmm/vit_haploid.py +++ b/lshmm/vit_haploid.py @@ -17,7 +17,7 @@ def viterbi_naive_init(n, m, H, s, e, r): r_n = r / n for i in range(n): - emission_idx = core.get_index_in_emission_matrix( + emission_idx = core.get_index_in_emission_matrix_haploid( ref_allele=H[0, i], query_allele=s[0, 0] ) V[0, i] = 1 / n * e[0, emission_idx] @@ -34,7 +34,7 @@ def viterbi_init(n, m, H, s, e, r): r_n = r / n for i in range(n): - emission_idx = core.get_index_in_emission_matrix( + emission_idx = core.get_index_in_emission_matrix_haploid( ref_allele=H[0, i], query_allele=s[0, 0] ) V_prev[i] = 1 / n * e[0, emission_idx] @@ -51,7 +51,7 @@ def forwards_viterbi_hap_naive(n, m, H, s, e, r): for i in range(n): v = np.zeros(n) for k in range(n): - emission_idx = core.get_index_in_emission_matrix( + emission_idx = core.get_index_in_emission_matrix_haploid( ref_allele=H[j, i], query_allele=s[0, j] ) v[k] = V[j - 1, k] * e[j, emission_idx] @@ -77,7 +77,7 @@ def forwards_viterbi_hap_naive_vec(n, m, H, s, e, r): for i in range(n): v = np.copy(v_tmp) v[i] += V[j - 1, i] * (1 - r[j]) - emission_idx = core.get_index_in_emission_matrix( + emission_idx = core.get_index_in_emission_matrix_haploid( ref_allele=H[j, i], query_allele=s[0, j] ) v *= e[j, emission_idx] @@ -98,7 +98,7 @@ def forwards_viterbi_hap_naive_low_mem(n, m, H, s, e, r): for i in range(n): v = np.zeros(n) for k in range(n): - emission_idx = core.get_index_in_emission_matrix( + emission_idx = core.get_index_in_emission_matrix_haploid( ref_allele=H[j, i], query_allele=s[0, j] ) v[k] = V_prev[k] * e[j, emission_idx] @@ -127,7 +127,7 @@ def forwards_viterbi_hap_naive_low_mem_rescaling(n, m, H, s, e, r): for i in range(n): v = np.zeros(n) for k in range(n): - emission_idx = core.get_index_in_emission_matrix( + emission_idx = core.get_index_in_emission_matrix_haploid( ref_allele=H[j, i], query_allele=s[0, j] ) v[k] = V_prev[k] * e[j, emission_idx] @@ -177,7 +177,7 @@ def forwards_viterbi_hap_lower_mem_rescaling(n, m, H, s, e, r): """An implementation with even smaller memory footprint that exploits the Markov structure.""" V = np.zeros(n) for i in range(n): - emission_idx = core.get_index_in_emission_matrix( + emission_idx = core.get_index_in_emission_matrix_haploid( ref_allele=H[0, i], query_allele=s[0, 0] ) V[i] = 1 / n * e[0, emission_idx] @@ -195,7 +195,7 @@ def forwards_viterbi_hap_lower_mem_rescaling(n, m, H, s, e, r): if V[i] < r_n[j]: V[i] = r_n[j] P[j, i] = argmax - emission_idx = core.get_index_in_emission_matrix( + emission_idx = core.get_index_in_emission_matrix_haploid( ref_allele=H[j, i], query_allele=s[0, j] ) V[i] *= e[j, emission_idx] @@ -213,7 +213,7 @@ def forwards_viterbi_hap_lower_mem_rescaling_no_pointer(n, m, H, s, e, r): """ V = np.zeros(n) for i in range(n): - emission_idx = core.get_index_in_emission_matrix( + emission_idx = core.get_index_in_emission_matrix_haploid( ref_allele=H[0, i], query_allele=s[0, 0] ) V[i] = 1 / n * e[0, emission_idx] @@ -237,7 +237,9 @@ def forwards_viterbi_hap_lower_mem_rescaling_no_pointer(n, m, H, s, e, r): recombs[j] = np.append( recombs[j], i ) # We add template i as a potential template to recombine to at site j. - emission_idx = core.get_index_in_emission_matrix(H[j, i], s[0, j]) + emission_idx = core.get_index_in_emission_matrix_haploid( + ref_allele=H[j, i], query_allele=s[0, j] + ) V[i] *= e[j, emission_idx] V_argmaxes[m - 1] = np.argmax(V) @@ -278,7 +280,7 @@ def backwards_viterbi_hap_no_pointer(m, V_argmaxes, recombs): @jit.numba_njit def path_ll_hap(n, m, H, path, s, e, r): """Evaluate the log-likelihood of a path through a reference panel resulting in a sequence.""" - emission_idx = core.get_index_in_emission_matrix( + emission_idx = core.get_index_in_emission_matrix_haploid( ref_allele=H[0, path[0]], query_allele=s[0, 0] ) log_prob_path = np.log10((1 / n) * e[0, emission_idx]) @@ -286,7 +288,7 @@ def path_ll_hap(n, m, H, path, s, e, r): r_n = r / n for l in range(1, m): - emission_idx = core.get_index_in_emission_matrix( + emission_idx = core.get_index_in_emission_matrix_haploid( ref_allele=H[l, path[l]], query_allele=s[0, l] ) current = path[l] diff --git a/tests/test_API.py b/tests/test_API.py index fbc6f93..f65646f 100644 --- a/tests/test_API.py +++ b/tests/test_API.py @@ -133,6 +133,6 @@ def test_simple_n8_high_recombination(self): ts = self.get_simple_n8_high_recombination() self.verify(ts) - def test_simple_n_16(self): + def test_simple_n16(self): ts = self.get_simple_n16() self.verify(ts)