From f2a35187bedc2e40a5a965fe68395fe11adbdca2 Mon Sep 17 00:00:00 2001 From: szhan Date: Mon, 1 Jul 2024 12:30:20 +0100 Subject: [PATCH] Support multiple allele states --- lshmm/core.py | 60 +++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 60 insertions(+) diff --git a/lshmm/core.py b/lshmm/core.py index b8bd86d..e912959 100644 --- a/lshmm/core.py +++ b/lshmm/core.py @@ -302,6 +302,66 @@ def get_emission_probability_haploid(ref_allele, query_allele, site, emission_ma return emission_matrix[site, 1] +@jit.numba_njit +def get_emission_probability_haploid_hkylike(ref_allele, query_allele, site, emission_matrix): + """ + Return the emission probability at a specified site for the haploid case, + given an emission probability matrix. + + The emission probability matrix is an array of size (m, 4), + where m = number of sites. + + :param int ref_allele: Reference allele. + :param int query_allele: Query allele. + :param int site: Site index. + :param numpy.ndarray emission_matrix: Emission probability matrix. + :return: Emission probability. + :rtype: float + """ + if ref_allele == MISSING: + raise ValueError("Reference allele cannot be MISSING.") + if query_allele == NONCOPY: + raise ValueError("Query allele cannot be NONCOPY.") + if emission_matrix.shape[1] != 2: + raise ValueError("Emission probability matrix has incorrect shape.") + if ref_allele == NONCOPY: + return 0.0 + elif query_allele == MISSING: + return 1.0 + else: + return emission_matrix[ref_allele, query_allele] + + +@jit.numba_njit +def get_emission_matrix_hkylike(mu, kappa=None): + """ + Return an emission probability matrix that allows for + differences between transition and transversion rates. + + When `kappa` is set to None, it is equivalent to setting it to 1. + + :param float mu: Probability of mutation to any allele. + :param float kappa: Transition-to-transversion rate ratio. + """ + if kappa <= 0: + raise ValueError("Ts/tv ratio must be positive.") + # Assume that ACGT are encoded as 0 to 3. + num_alleles = 4 + emission_matrix = np.zeros((num_alleles, num_alleles), dtype=np.float64) - 1 + for i in range(num_alleles): + for j in range(num_alleles): + if i == j: + emission_matrix[i, j] = 1 - mu + else: + emission_matrix[i, j] = mu / 3 + if kappa is not None: + # Transitions: A <-> G, C <-> T. + is_transition = (i in [0, 2] and j in [0, 2]) or (i in [1, 3] and j in [1, 3]) + if is_transition: + emission_matrix[i, j] *= kappa + return emission_matrix + + # Functions to assign emission probabilities for diploid LS HMM. @jit.numba_njit def get_emission_matrix_diploid(mu, num_sites, num_alleles, scale_mutation_rate):