diff --git a/tests/lsbase.py b/tests/lsbase.py index cd5711a..a911630 100644 --- a/tests/lsbase.py +++ b/tests/lsbase.py @@ -93,7 +93,7 @@ def get_examples_diploid(self, ts, include_ancestors): queries = [query1, query2, query_miss_last, query_miss_mid, query_miss_most] # Exclude the arbitrarily chosen queries from the reference panel. ref_panel = ref_panel[:, 2:-2] - num_ref_haps = ref_panel.shape[1] # Haplotypes, not individuals. + num_ref_haps = ref_panel.shape[1] # Haplotypes, not individuals. # Reference panel contains phased genotypes. G = np.zeros((num_sites, num_ref_haps, num_ref_haps)) for i in range(num_sites): @@ -103,17 +103,17 @@ def get_examples_diploid(self, ts, include_ancestors): def get_examples_pars( self, ts, - ploidy=None, - scale_mutation_rate=None, - include_ancestors=None, - mean_r=None, - mean_mu=None, + ploidy, + scale_mutation_rate, + include_ancestors, + include_extreme_rates, seed=42, ): """Returns an iterator over combinations of examples and parameters.""" assert ploidy in [1, 2] assert scale_mutation_rate in [True, False] assert include_ancestors in [True, False] + assert include_extreme_rates in [True, False] np.random.seed(seed) if ploidy == 1: @@ -129,16 +129,19 @@ def get_examples_pars( np.zeros(m) + 0.999, # Extreme np.zeros(m) + 1e-6, # Extreme np.random.rand(m), # Random + 1e-5 * (np.random.rand(m) + 0.5) / 2, ] mus = [ np.zeros(m) + 0.01, # Equal recombination and mutation - np.zeros(m) + 0.2, # Extreme - np.zeros(m) + 1e-6, # Extreme np.random.rand(m) * 0.2, # Random + 1e-5 * (np.random.rand(m) + 0.5) / 2, ] - if mean_r is not None and mean_mu is not None: - rs.append(mean_r * (np.random.rand(m) + 0.5) / 2) - mus.append(mean_mu * (np.random.rand(m) + 0.5) / 2) + + if include_extreme_rates: + rs.append(np.zeros(m) + 0.2) + rs.append(np.zeros(m) + 1e-6) + mus.append(np.zeros(m) + 0.2) + mus.append(np.zeros(m) + 1e-6) for s, r, mu in itertools.product(queries, rs, mus): r[0] = 0 diff --git a/tests/test_API.py b/tests/test_API.py index e872a25..29260cf 100644 --- a/tests/test_API.py +++ b/tests/test_API.py @@ -8,16 +8,13 @@ class TestForwardBackwardHaploid(lsbase.ForwardBackwardAlgorithmBase): - def verify( - self, ts, scale_mutation_rate, include_ancestors, mean_r=None, mean_mu=None - ): + def verify(self, ts, scale_mutation_rate, include_ancestors): for n, m, H_vs, s, e_vs, r, mu in self.get_examples_pars( ts, ploidy=1, scale_mutation_rate=scale_mutation_rate, include_ancestors=include_ancestors, - mean_r=mean_r, - mean_mu=mean_mu, + include_extreme_rates=True, ): F_vs, c_vs, ll_vs = fbh.forwards_ls_hap(n, m, H_vs, s, e_vs, r) B_vs = fbh.backwards_ls_hap(n, m, H_vs, s, e_vs, c_vs, r) @@ -54,33 +51,24 @@ def test_ts_simple_n16(self, include_ancestors): @pytest.mark.parametrize("include_ancestors", [True, False]) def test_ts_larger(self, include_ancestors): - ref_panel_size = 46 - length = 1e5 - mean_r = 1e-5 - mean_mu = 1e-5 ts = self.get_ts_custom_pars( - ref_panel_size=ref_panel_size, length=length, mean_r=mean_r, mean_mu=mean_mu + ref_panel_size=45, length=1e5, mean_r=1e-5, mean_mu=1e-5 ) self.verify( ts, scale_mutation_rate=True, include_ancestors=include_ancestors, - mean_r=mean_r, - mean_mu=mean_mu, ) class TestForwardBackwardDiploid(lsbase.ForwardBackwardAlgorithmBase): - def verify( - self, ts, scale_mutation_rate, include_ancestors, mean_r=None, mean_mu=None - ): + def verify(self, ts, scale_mutation_rate, include_ancestors): for n, m, G_vs, s, e_vs, r, mu in self.get_examples_pars( ts, ploidy=2, scale_mutation_rate=scale_mutation_rate, include_ancestors=include_ancestors, - mean_r=mean_r, - mean_mu=mean_mu, + include_extreme_rates=True, ): F_vs, c_vs, ll_vs = fbd.forward_ls_dip_loop( n, m, G_vs, s, e_vs, r, norm=True @@ -119,33 +107,24 @@ def test_ts_simple_n16(self, include_ancestors): @pytest.mark.parametrize("include_ancestors", [False]) def test_ts_larger(self, include_ancestors): - ref_panel_size = 46 - length = 1e5 - mean_r = 1e-5 - mean_mu = 1e-5 ts = self.get_ts_custom_pars( - ref_panel_size=ref_panel_size, length=length, mean_r=mean_r, mean_mu=mean_mu + ref_panel_size=45, length=1e5, mean_r=1e-5, mean_mu=1e-5 ) self.verify( ts, scale_mutation_rate=True, include_ancestors=include_ancestors, - mean_r=mean_r, - mean_mu=mean_mu, ) class TestViterbiHaploid(lsbase.ViterbiAlgorithmBase): - def verify( - self, ts, scale_mutation_rate, include_ancestors, mean_r=None, mean_mu=None - ): + def verify(self, ts, scale_mutation_rate, include_ancestors): for n, m, H_vs, s, e_vs, r, mu in self.get_examples_pars( ts, ploidy=1, scale_mutation_rate=scale_mutation_rate, include_ancestors=include_ancestors, - mean_r=mean_r, - mean_mu=mean_mu, + include_extreme_rates=True, ): V_vs, P_vs, ll_vs = vh.forwards_viterbi_hap_lower_mem_rescaling( n, m, H_vs, s, e_vs, r @@ -182,33 +161,24 @@ def test_ts_simple_n16(self, include_ancestors): @pytest.mark.parametrize("include_ancestors", [True, False]) def test_ts_larger(self, include_ancestors): - ref_panel_size = 46 - length = 1e5 - mean_r = 1e-5 - mean_mu = 1e-5 ts = self.get_ts_custom_pars( - ref_panel_size=ref_panel_size, length=length, mean_r=mean_r, mean_mu=mean_mu + ref_panel_size=46, length=1e5, mean_r=1e-5, mean_mu=1e-5 ) self.verify( ts, scale_mutation_rate=True, include_ancestors=include_ancestors, - mean_r=mean_r, - mean_mu=mean_mu, ) class TestViterbiDiploid(lsbase.ViterbiAlgorithmBase): - def verify( - self, ts, scale_mutation_rate, include_ancestors, mean_r=None, mean_mu=None - ): + def verify(self, ts, scale_mutation_rate, include_ancestors): for n, m, G_vs, s, e_vs, r, mu in self.get_examples_pars( ts, ploidy=2, scale_mutation_rate=scale_mutation_rate, include_ancestors=include_ancestors, - mean_r=mean_r, - mean_mu=mean_mu, + include_extreme_rates=True, ): V_vs, P_vs, ll_vs = vd.forwards_viterbi_dip_low_mem(n, m, G_vs, s, e_vs, r) path_vs = vd.backwards_viterbi_dip(m, V_vs, P_vs) @@ -244,17 +214,11 @@ def test_ts_simple_n16(self, include_ancestors): @pytest.mark.parametrize("include_ancestors", [False]) def test_ts_larger(self, include_ancestors): - ref_panel_size = 46 - length = 1e5 - mean_r = 1e-5 - mean_mu = 1e-5 ts = self.get_ts_custom_pars( - ref_panel_size=ref_panel_size, length=length, mean_r=mean_r, mean_mu=mean_mu + ref_panel_size=45, length=1e5, mean_r=1e-5, mean_mu=1e-5 ) self.verify( ts, scale_mutation_rate=True, include_ancestors=include_ancestors, - mean_r=mean_r, - mean_mu=mean_mu, ) diff --git a/tests/test_API_multiallelic.py b/tests/test_API_multiallelic.py index ae8bd78..714f2fb 100644 --- a/tests/test_API_multiallelic.py +++ b/tests/test_API_multiallelic.py @@ -12,6 +12,7 @@ def verify(self, ts, scale_mutation_rate, include_ancestors): ploidy=1, scale_mutation_rate=scale_mutation_rate, include_ancestors=include_ancestors, + include_extreme_rates=True, ): F_vs, c_vs, ll_vs = fbh.forwards_ls_hap(n, m, H_vs, s, e_vs, r) B_vs = fbh.backwards_ls_hap(n, m, H_vs, s, e_vs, c_vs, r) @@ -59,6 +60,7 @@ def verify(self, ts, scale_mutation_rate, include_ancestors): ploidy=1, scale_mutation_rate=scale_mutation_rate, include_ancestors=include_ancestors, + include_extreme_rates=True, ): V_vs, P_vs, ll_vs = vh.forwards_viterbi_hap_lower_mem_rescaling( n, m, H_vs, s, e_vs, r diff --git a/tests/test_non_tree.py b/tests/test_non_tree.py index fc6baee..daff84a 100644 --- a/tests/test_non_tree.py +++ b/tests/test_non_tree.py @@ -10,16 +10,13 @@ class TestNonTreeForwardBackwardHaploid(lsbase.ForwardBackwardAlgorithmBase): - def verify( - self, ts, scale_mutation_rate, include_ancestors, mean_r=None, mean_mu=None - ): + def verify(self, ts, scale_mutation_rate, include_ancestors): for n, m, H_vs, s, e_vs, r, _ in self.get_examples_pars( ts, ploidy=1, scale_mutation_rate=scale_mutation_rate, include_ancestors=include_ancestors, - mean_r=mean_r, - mean_mu=mean_mu, + include_extreme_rates=True, ): F_vs, c_vs, ll_vs = fbh.forwards_ls_hap(n, m, H_vs, s, e_vs, r, norm=False) B_vs = fbh.backwards_ls_hap(n, m, H_vs, s, e_vs, c_vs, r) @@ -59,66 +56,39 @@ def test_ts_simple_n16(self, include_ancestors): @pytest.mark.parametrize("include_ancestors", [True, False]) def test_larger(self, include_ancestors): - ref_panel_size = 45 - length = 1e5 - mean_r = 1e-5 - mean_mu = 1e-5 ts = self.get_ts_custom_pars( - ref_panel_size=ref_panel_size, length=length, mean_r=mean_r, mean_mu=mean_mu + ref_panel_size=45, + length=1e5, + mean_r=1e-5, + mean_mu=1e-5, ) self.verify( ts, scale_mutation_rate=True, include_ancestors=include_ancestors, - mean_r=mean_r, - mean_mu=mean_mu, ) class TestNonTreeForwardBackwardDiploid(lsbase.ForwardBackwardAlgorithmBase): def verify( - self, ts, scale_mutation_rate, include_ancestors, mean_r=None, mean_mu=None + self, + ts, + scale_mutation_rate, + include_ancestors, + include_extreme_rates, + normalise, ): for n, m, G_vs, s, e_vs, r, _ in self.get_examples_pars( ts, ploidy=2, scale_mutation_rate=scale_mutation_rate, include_ancestors=include_ancestors, - mean_r=mean_r, - mean_mu=mean_mu, + include_extreme_rates=include_extreme_rates, ): F_vs, c_vs, ll_vs = fbd.forwards_ls_dip(n, m, G_vs, s, e_vs, r, norm=True) B_vs = fbd.backwards_ls_dip(n, m, G_vs, s, e_vs, c_vs, r) self.assertAllClose(np.sum(F_vs * B_vs, (1, 2)), np.ones(m)) - F_tmp, c_tmp, ll_tmp = fbd.forwards_ls_dip( - n, m, G_vs, s, e_vs, r, norm=False - ) - if ll_tmp != -np.inf: - B_tmp = fbd.backwards_ls_dip(n, m, G_vs, s, e_vs, c_tmp, r) - self.assertAllClose( - np.log10(np.sum(F_tmp * B_tmp, (1, 2))), ll_tmp * np.ones(m) - ) - self.assertAllClose(ll_vs, ll_tmp) - - F_tmp, ll_tmp = fbd.forward_ls_dip_starting_point(n, m, G_vs, s, e_vs, r) - if ll_tmp != -np.inf: - B_tmp = fbd.backward_ls_dip_starting_point(n, m, G_vs, s, e_vs, r) - self.assertAllClose( - np.log10(np.sum(F_tmp * B_tmp, (1, 2))), ll_tmp * np.ones(m) - ) - self.assertAllClose(ll_vs, ll_tmp) - - F_tmp, c_tmp, ll_tmp = fbd.forward_ls_dip_loop( - n, m, G_vs, s, e_vs, r, norm=False - ) - if ll_tmp != -np.inf: - B_tmp = fbd.backward_ls_dip_loop(n, m, G_vs, s, e_vs, c_tmp, r) - self.assertAllClose( - np.log10(np.sum(F_tmp * B_tmp, (1, 2))), ll_tmp * np.ones(m) - ) - self.assertAllClose(ll_vs, ll_tmp) - F_tmp, c_tmp, ll_tmp = fbd.forward_ls_dip_loop( n, m, G_vs, s, e_vs, r, norm=True ) @@ -126,60 +96,131 @@ def verify( self.assertAllClose(np.sum(F_tmp * B_tmp, (1, 2)), np.ones(m)) self.assertAllClose(ll_vs, ll_tmp) + if not normalise: + F_tmp, c_tmp, ll_tmp = fbd.forwards_ls_dip( + n, m, G_vs, s, e_vs, r, norm=False + ) + if ll_tmp != -np.inf: + B_tmp = fbd.backwards_ls_dip(n, m, G_vs, s, e_vs, c_tmp, r) + self.assertAllClose( + np.log10(np.sum(F_tmp * B_tmp, (1, 2))), ll_tmp * np.ones(m) + ) + self.assertAllClose(ll_vs, ll_tmp) + + F_tmp, c_tmp, ll_tmp = fbd.forward_ls_dip_loop( + n, m, G_vs, s, e_vs, r, norm=False + ) + if ll_tmp != -np.inf: + B_tmp = fbd.backward_ls_dip_loop(n, m, G_vs, s, e_vs, c_tmp, r) + self.assertAllClose( + np.log10(np.sum(F_tmp * B_tmp, (1, 2))), ll_tmp * np.ones(m) + ) + self.assertAllClose(ll_vs, ll_tmp) + + F_tmp, ll_tmp = fbd.forward_ls_dip_starting_point( + n, m, G_vs, s, e_vs, r + ) + if ll_tmp != -np.inf: + B_tmp = fbd.backward_ls_dip_starting_point(n, m, G_vs, s, e_vs, r) + self.assertAllClose( + np.log10(np.sum(F_tmp * B_tmp, (1, 2))), ll_tmp * np.ones(m) + ) + self.assertAllClose(ll_vs, ll_tmp) + @pytest.mark.parametrize("include_ancestors", [False]) - def test_ts_simple_n10_no_recomb(self, include_ancestors): + @pytest.mark.parametrize("normalise", [True, False]) + def test_ts_simple_n10_no_recomb(self, include_ancestors, normalise): ts = self.get_ts_simple_n10_no_recomb() - self.verify(ts, scale_mutation_rate=True, include_ancestors=include_ancestors) + # Test extreme rates only when normalising, + # because they can lead to pathological cases. + include_extreme_rates = normalise + self.verify( + ts, + scale_mutation_rate=True, + include_ancestors=include_ancestors, + normalise=normalise, + include_extreme_rates=include_extreme_rates, + ) @pytest.mark.parametrize("include_ancestors", [False]) - def test_ts_simple_n6(self, include_ancestors): + @pytest.mark.parametrize("normalise", [True, False]) + def test_ts_simple_n6(self, include_ancestors, normalise): ts = self.get_ts_simple_n6() - self.verify(ts, scale_mutation_rate=True, include_ancestors=include_ancestors) + include_extreme_rates = normalise + self.verify( + ts, + scale_mutation_rate=True, + include_ancestors=include_ancestors, + normalise=normalise, + include_extreme_rates=include_extreme_rates, + ) @pytest.mark.parametrize("include_ancestors", [False]) - def test_ts_simple_n8(self, include_ancestors): + @pytest.mark.parametrize("normalise", [True, False]) + def test_ts_simple_n8(self, include_ancestors, normalise): ts = self.get_ts_simple_n8() - self.verify(ts, scale_mutation_rate=True, include_ancestors=include_ancestors) + include_extreme_rates = normalise + self.verify( + ts, + scale_mutation_rate=True, + include_ancestors=include_ancestors, + normalise=normalise, + include_extreme_rates=include_extreme_rates, + ) @pytest.mark.parametrize("include_ancestors", [False]) - def test_ts_simple_n8_high_recomb(self, include_ancestors): + @pytest.mark.parametrize("normalise", [True, False]) + def test_ts_simple_n8_high_recomb(self, include_ancestors, normalise): ts = self.get_ts_simple_n8_high_recomb() - self.verify(ts, scale_mutation_rate=True, include_ancestors=include_ancestors) + include_extreme_rates = normalise + self.verify( + ts, + scale_mutation_rate=True, + include_ancestors=include_ancestors, + normalise=normalise, + include_extreme_rates=include_extreme_rates, + ) @pytest.mark.parametrize("include_ancestors", [False]) - def test_ts_simple_n16(self, include_ancestors): + @pytest.mark.parametrize("normalise", [True, False]) + def test_ts_simple_n16(self, include_ancestors, normalise): ts = self.get_ts_simple_n16() - self.verify(ts, scale_mutation_rate=True, include_ancestors=include_ancestors) + include_extreme_rates = normalise + self.verify( + ts, + scale_mutation_rate=True, + include_ancestors=include_ancestors, + normalise=normalise, + include_extreme_rates=include_extreme_rates, + ) @pytest.mark.parametrize("include_ancestors", [False]) - def test_ts_larger(self, include_ancestors): - ref_panel_size = 45 - length = 1e5 - mean_r = 1e-5 - mean_mu = 1e-5 + @pytest.mark.parametrize("normalise", [True, False]) + def test_ts_larger(self, include_ancestors, normalise): ts = self.get_ts_custom_pars( - ref_panel_size=ref_panel_size, length=length, mean_r=mean_r, mean_mu=mean_mu + ref_panel_size=45, + length=1e5, + mean_r=1e-5, + mean_mu=1e-5, ) + include_extreme_rates = normalise self.verify( ts, scale_mutation_rate=True, include_ancestors=include_ancestors, - mean_r=mean_r, - mean_mu=mean_mu, + normalise=normalise, + include_extreme_rates=include_extreme_rates, ) class TestNonTreeViterbiHaploid(lsbase.ViterbiAlgorithmBase): - def verify( - self, ts, scale_mutation_rate, include_ancestors, mean_r=None, mean_mu=None - ): + def verify(self, ts, scale_mutation_rate, include_ancestors): for n, m, H_vs, s, e_vs, r, _ in self.get_examples_pars( ts, ploidy=1, scale_mutation_rate=scale_mutation_rate, include_ancestors=include_ancestors, - mean_r=mean_r, - mean_mu=mean_mu, + include_extreme_rates=True, ): V_vs, P_vs, ll_vs = vh.forwards_viterbi_hap_naive(n, m, H_vs, s, e_vs, r) path_vs = vh.backwards_viterbi_hap(m, V_vs[m - 1, :], P_vs) @@ -270,33 +311,24 @@ def test_ts_simple_n16(self, include_ancestors): @pytest.mark.parametrize("include_ancestors", [True, False]) def test_ts_larger(self, include_ancestors): - ref_panel_size = 45 - length = 1e5 - mean_r = 1e-5 - mean_mu = 1e-5 ts = self.get_ts_custom_pars( - ref_panel_size=ref_panel_size, length=length, mean_r=mean_r, mean_mu=mean_mu + ref_panel_size=45, length=1e5, mean_r=1e-5, mean_mu=1e-5 ) self.verify( ts, scale_mutation_rate=True, include_ancestors=include_ancestors, - mean_r=mean_r, - mean_mu=mean_mu, ) class TestNonTreeViterbiDiploid(lsbase.ViterbiAlgorithmBase): - def verify( - self, ts, scale_mutation_rate, include_ancestors, mean_r=None, mean_mu=None - ): + def verify(self, ts, scale_mutation_rate, include_ancestors): for n, m, G_vs, s, e_vs, r, _ in self.get_examples_pars( ts, ploidy=2, scale_mutation_rate=scale_mutation_rate, include_ancestors=include_ancestors, - mean_r=mean_r, - mean_mu=mean_mu, + include_extreme_rates=True, ): V_vs, P_vs, ll_vs = vd.forwards_viterbi_dip_naive(n, m, G_vs, s, e_vs, r) path_vs = vd.backwards_viterbi_dip(m, V_vs[m - 1, :, :], P_vs) @@ -381,17 +413,11 @@ def test_ts_simple_n16(self, include_ancestors): @pytest.mark.parametrize("include_ancestors", [False]) def test_ts_larger(self, include_ancestors): - ref_panel_size = 45 - length = 1e5 - mean_r = 1e-5 - mean_mu = 1e-5 ts = self.get_ts_custom_pars( - ref_panel_size=ref_panel_size, length=length, mean_r=mean_r, mean_mu=mean_mu + ref_panel_size=45, length=1e5, mean_r=1e-5, mean_mu=1e-5 ) self.verify( ts, scale_mutation_rate=True, include_ancestors=include_ancestors, - mean_r=mean_r, - mean_mu=mean_mu, )