diff --git a/python/tests/beagle_numba.py b/python/tests/beagle_numba.py index 1436d032ef..71a6f136a8 100644 --- a/python/tests/beagle_numba.py +++ b/python/tests/beagle_numba.py @@ -1,6 +1,6 @@ """ Implementation of the BEAGLE algorithm to impute alleles by linear interpolation of -state probabilities at ungenotyped markers in the HMM state probability matrix. +state probabilities at ungenotyped markers in the hidden state probability matrix. This was implemented while closely consulting the BEAGLE 4.1 paper: Browning & Browning (2016). Genotype imputation with millions of reference samples. @@ -24,8 +24,8 @@ 2. The ungenotyped positions take -1. The forward and backward probability matrices are of size (m, h). -The HMM state probability matrix is of size (m, h). -The interpolated allele probability matrix is of size (x, 4), +The state probability matrix is of size (m, h). +The imputed allele probability matrix is of size (x, 4), The imputed alleles are the maximum a posteriori (MAP) alleles. To improve computational efficiency, BEAGLE uses aggregated markers, @@ -206,7 +206,7 @@ def get_transition_probs(cm, h, ne): @njit -def compute_forward_matrix(ref_h, query_h, rho, mu): +def compute_forward_matrix(ref_h, query_h, tp, mp): """ Implement forward algorithm to compute a forward probablity matrix of size (m, h). @@ -215,8 +215,8 @@ def compute_forward_matrix(ref_h, query_h, rho, mu): :param numpy.ndarray ref_h: Reference haplotypes. :param numpy.ndarray query_h: One query haplotype. - :param numpy.ndarray rho: Per-site recombination rates. - :param numpy.ndarray mu: Per-site mutation rates. + :param numpy.ndarray tp: Per-site transition probabilities. + :param numpy.ndarray mp: Per-site mismatch probabilities. :return: Forward probability matrix. :rtype: numpy.ndarray """ @@ -226,15 +226,15 @@ def compute_forward_matrix(ref_h, query_h, rho, mu): last_sum = 1.0 # Part of normalization factor. for i in range(m): # Get site-specific parameters. - shift = rho[i] / h - scale = (1 - rho[i]) / last_sum + shift = tp[i] / h + scale = (1 - tp[i]) / last_sum # Get allele at genotyped marker i on query haplotype. query_a = query_h[i] for j in range(h): # Get allele at genotyped marker i on reference haplotype j. ref_a = ref_h[i, j] # Get emission probability. - em_prob = mu[i] if query_a != ref_a else 1.0 - mu[i] + em_prob = mp[i] if query_a != ref_a else 1.0 - mp[i] fm[i, j] = em_prob if i == 0: fm[i, j] *= 1.0 / h # Prior probabilities. @@ -246,20 +246,20 @@ def compute_forward_matrix(ref_h, query_h, rho, mu): @njit -def compute_backward_matrix(ref_h, query_h, rho, mu): +def compute_backward_matrix(ref_h, query_h, tp, mp): """ Implement backward algorithm to compute a backward probablity matrix of size (m, h). - Reference haplotypes and query haplotype are subsetted to genotyped markers. + Reference haplotypes and query haplotype are subsetted to genotyped positions. So, they are a matrix of size (m, h) and an array of size m, respectively. - In BEAGLE 4.1, the values are kept one site at a time. Here, we keep the values - at all the sites. + In BEAGLE 4.1, the values are kept one position at a time. Here, we keep the values + at all the positions. :param numpy.ndarray ref_h: Reference haplotypes. :param numpy.ndarray query_h: One query haplotype. - :param numpy.ndarray rho: Per-site recombination rates. - :param numpy.ndarray mu: Per-site mutation rates. + :param numpy.ndarray tp: Per-site transition probabilities. + :param numpy.ndarray mp: Per-site mismatch probabilities. :return: Backward probability matrix. :rtype: numpy.ndarray """ @@ -271,28 +271,28 @@ def compute_backward_matrix(ref_h, query_h, rho, mu): query_a = query_h[i + 1] for j in range(h): ref_a = ref_h[i + 1, j] - em_prob = mu[i + 1] if ref_a != query_a else 1.0 - mu[i + 1] + em_prob = mp[i + 1] if ref_a != query_a else 1.0 - mp[i + 1] bm[i, j] = bm[i + 1, j] * em_prob sum_site = np.sum(bm[i, :]) - scale = (1 - rho[i + 1]) / sum_site - shift = rho[i + 1] / h + scale = (1 - tp[i + 1]) / sum_site + shift = tp[i + 1] / h bm[i, :] = scale * bm[i, :] + shift return bm def compute_state_prob_matrix(fm, bm): """ - Compute the HMM state probability matrix, in which each element is - the probability of a HMM state at a genotyped position. + Compute the hidden state probability matrix, in which each element is + the probability of an HMM state at a genotyped position. The forward and backward probability matrices are of size (m, h). - The output HMM state probability matrix is of size (m, h). + The output hidden state probability matrix is of size (m, h). - Implementing this is simpler than faithfully reproducing BEAGLE. + Implementing this is simpler than faithfully reproducing BEAGLE 4.1. :param numpy.ndarray fm: Forward probability matrix. :param numpy.ndarray bm: Backward probability matrix. - :return: HMM state probability matrix. + :return: Hidden state probability matrix. :rtype: numpy.ndarray """ assert fm.shape == bm.shape, "Forward and backward matrices differ in shape."