Skip to content

Commit

Permalink
Refactor how mutation probability is defined when prob_mutation is no…
Browse files Browse the repository at this point in the history
…t provided
  • Loading branch information
szhan committed Jun 18, 2024
1 parent ed35954 commit 0bad86c
Show file tree
Hide file tree
Showing 2 changed files with 20 additions and 10 deletions.
22 changes: 12 additions & 10 deletions lshmm/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -122,18 +122,12 @@ def check_inputs(


def set_emission_probabilities(
num_ref_haps,
num_sites,
ploidy,
num_alleles,
prob_mutation,
scale_mutation_rate,
):
if prob_mutation is None:
# Set the mutation probability to be that proposed in Li & Stephens (2003).
theta_tilde = 1 / np.sum([1 / k for k in range(1, num_ref_haps - 1)])
prob_mutation = 0.5 * (theta_tilde / (num_ref_haps + theta_tilde))

if isinstance(prob_mutation, float):
prob_mutation = prob_mutation * np.ones(num_sites)

Expand Down Expand Up @@ -180,8 +174,10 @@ def forwards(
scale_mutation_rate=scale_mutation_rate,
)

if prob_mutation is None:
prob_mutation = core.estimate_mutation_probability(num_ref_haps)

emission_probs = set_emission_probabilities(
num_ref_haps=num_ref_haps,
num_sites=num_sites,
ploidy=ploidy,
num_alleles=num_alleles,
Expand Down Expand Up @@ -233,8 +229,10 @@ def backwards(
scale_mutation_rate=scale_mutation_rate,
)

if prob_mutation is None:
prob_mutation = core.estimate_mutation_probability(num_ref_haps)

emission_probs = set_emission_probabilities(
num_ref_haps=num_ref_haps,
num_sites=num_sites,
ploidy=ploidy,
num_alleles=num_alleles,
Expand Down Expand Up @@ -281,8 +279,10 @@ def viterbi(
scale_mutation_rate=scale_mutation_rate,
)

if prob_mutation is None:
prob_mutation = core.estimate_mutation_probability(num_ref_haps)

emission_probs = set_emission_probabilities(
num_ref_haps=num_ref_haps,
num_sites=num_sites,
ploidy=ploidy,
num_alleles=num_alleles,
Expand Down Expand Up @@ -337,8 +337,10 @@ def path_loglik(
scale_mutation_rate=scale_mutation_rate,
)

if prob_mutation is None:
prob_mutation = core.estimate_mutation_probability(num_ref_haps)

emission_probs = set_emission_probabilities(
num_ref_haps=num_ref_haps,
num_sites=num_sites,
ploidy=ploidy,
num_alleles=num_alleles,
Expand Down
8 changes: 8 additions & 0 deletions lshmm/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -404,3 +404,11 @@ def get_index_in_emission_matrix_diploid(ref_genotype, query_genotype):
is_ref_het = ref_genotype == 1
is_query_het = query_genotype == 1
return 4 * is_match + 2 * is_ref_het + is_query_het


# Miscellaneous functions.
def estimate_mutation_probability(num_haps):
"""Return the mutation probability as defined by A2 and A3 in Li & Stephens (2003)."""
theta_tilde = 1 / np.sum([1 / k for k in range(1, num_haps - 1)])
prob_mutation = 0.5 * (theta_tilde / (num_haps + theta_tilde))
return prob_mutation

0 comments on commit 0bad86c

Please sign in to comment.