From 0bad86c193494f7ada49eda9a64406ab27d56a46 Mon Sep 17 00:00:00 2001 From: szhan Date: Tue, 18 Jun 2024 10:22:02 +0100 Subject: [PATCH] Refactor how mutation probability is defined when prob_mutation is not provided --- lshmm/api.py | 22 ++++++++++++---------- lshmm/core.py | 8 ++++++++ 2 files changed, 20 insertions(+), 10 deletions(-) diff --git a/lshmm/api.py b/lshmm/api.py index e6ddd28..b5c0e10 100644 --- a/lshmm/api.py +++ b/lshmm/api.py @@ -122,18 +122,12 @@ def check_inputs( def set_emission_probabilities( - num_ref_haps, num_sites, ploidy, num_alleles, prob_mutation, scale_mutation_rate, ): - if prob_mutation is None: - # Set the mutation probability to be that proposed in Li & Stephens (2003). - theta_tilde = 1 / np.sum([1 / k for k in range(1, num_ref_haps - 1)]) - prob_mutation = 0.5 * (theta_tilde / (num_ref_haps + theta_tilde)) - if isinstance(prob_mutation, float): prob_mutation = prob_mutation * np.ones(num_sites) @@ -180,8 +174,10 @@ def forwards( scale_mutation_rate=scale_mutation_rate, ) + if prob_mutation is None: + prob_mutation = core.estimate_mutation_probability(num_ref_haps) + emission_probs = set_emission_probabilities( - num_ref_haps=num_ref_haps, num_sites=num_sites, ploidy=ploidy, num_alleles=num_alleles, @@ -233,8 +229,10 @@ def backwards( scale_mutation_rate=scale_mutation_rate, ) + if prob_mutation is None: + prob_mutation = core.estimate_mutation_probability(num_ref_haps) + emission_probs = set_emission_probabilities( - num_ref_haps=num_ref_haps, num_sites=num_sites, ploidy=ploidy, num_alleles=num_alleles, @@ -281,8 +279,10 @@ def viterbi( scale_mutation_rate=scale_mutation_rate, ) + if prob_mutation is None: + prob_mutation = core.estimate_mutation_probability(num_ref_haps) + emission_probs = set_emission_probabilities( - num_ref_haps=num_ref_haps, num_sites=num_sites, ploidy=ploidy, num_alleles=num_alleles, @@ -337,8 +337,10 @@ def path_loglik( scale_mutation_rate=scale_mutation_rate, ) + if prob_mutation is None: + prob_mutation = core.estimate_mutation_probability(num_ref_haps) + emission_probs = set_emission_probabilities( - num_ref_haps=num_ref_haps, num_sites=num_sites, ploidy=ploidy, num_alleles=num_alleles, diff --git a/lshmm/core.py b/lshmm/core.py index d4ca0a7..47f6c16 100644 --- a/lshmm/core.py +++ b/lshmm/core.py @@ -404,3 +404,11 @@ def get_index_in_emission_matrix_diploid(ref_genotype, query_genotype): is_ref_het = ref_genotype == 1 is_query_het = query_genotype == 1 return 4 * is_match + 2 * is_ref_het + is_query_het + + +# Miscellaneous functions. +def estimate_mutation_probability(num_haps): + """Return the mutation probability as defined by A2 and A3 in Li & Stephens (2003).""" + theta_tilde = 1 / np.sum([1 / k for k in range(1, num_haps - 1)]) + prob_mutation = 0.5 * (theta_tilde / (num_haps + theta_tilde)) + return prob_mutation