From 5ba6063d79136fd6fc959dabb48cd438c8903b2f Mon Sep 17 00:00:00 2001 From: Shing Zhan Date: Fri, 15 Sep 2023 16:46:58 +0100 Subject: [PATCH] Refactor and improve docs WIP --- python/tests/beagle.py | 29 ++++++++++++----------------- 1 file changed, 12 insertions(+), 17 deletions(-) diff --git a/python/tests/beagle.py b/python/tests/beagle.py index 136fa9513a..aa889ca100 100644 --- a/python/tests/beagle.py +++ b/python/tests/beagle.py @@ -345,52 +345,47 @@ def _compute_state_probability_matrix(fm, bm, ref_h, query_h, rho, mu): def _interpolate_allele_probabilities( - fwd_hap_probs, bwd_hap_probs, genotyped_pos, imputed_pos + fwd_hap_probs, bwd_hap_probs, genotyped_pos, ungenotyped_pos ): """ WARN: This function is part of an attempt to faithfully reimplement the BEAGLE 4.1 algorithm. It is not complete and not used in the current implementation. It is kept for documentation. - Compute the interpolated allele probabilities at the imputed markers - of a query haplotype. + Compute the interpolated allele probabilities at the ungenotyped markers. - The forward and backward haplotype probability matrices are of size (m, 2), - where m is the number of genotyped markers. - Here, we assume all biallelic sites, hence the 2. - - The interpolated allele probability matrix is of size (x, 2), - where x is the number of imputed markers. - Again, we assume all biallelic sites, hence the 2. + The forward and backward haplotype probability matrices are of size (m, 2). + The interpolated allele probability matrix is of size (x, 2). + We assume all biallelic sites, hence the 2. This implementation is based on Equation 2 (the rearranged one) in BB2016. :param numpy.ndarray fwd_hap_probs: Forward haplotype probability matrix. :param numpy.ndarray bwd_hap_probs: Backward haplotype probability matrix. :param numpy.ndarray genotyped_pos: Site positions of genotyped markers. - :param numpy.ndarray imputed_pos: Site positions of imputed markers. + :param numpy.ndarray ungenotyped_pos: Site positions of ungenotyped markers. :return: Interpolated allele probabilities. :rtype: numpy.ndarray """ m = len(genotyped_pos) - x = len(imputed_pos) + x = len(ungenotyped_pos) assert (m, 2) == fwd_hap_probs.shape assert (m, 2) == bwd_hap_probs.shape genotyped_cm = convert_to_genetic_map_position(genotyped_pos) assert m == len(genotyped_cm) - imputed_cm = convert_to_genetic_map_position(imputed_pos) + imputed_cm = convert_to_genetic_map_position(ungenotyped_pos) assert x == len(imputed_cm) weights = get_weights(genotyped_cm, imputed_cm) assert x == len(weights) i_hap_probs = np.zeros((x, 2), dtype=np.float64) for i in np.arange(x): - # Identify the genotyped markers k and k + 1 between imputed marker i - if imputed_cm[i] < genotyped_cm[0]: + # Identify the genotyped markers k and k + 1 sandwiching ungenotyped marker i. + if ungenotyped_pos[i] < genotyped_pos[0]: k = 0 - elif imputed_cm[i] > genotyped_cm[-1]: + elif ungenotyped_pos[i] > genotyped_pos[-1]: k = m - 2 else: - k = np.max(np.where(imputed_cm[i] > genotyped_cm)[0]) + k = np.max(np.where(ungenotyped_pos[i] > genotyped_pos)[0]) # Vectorised over the allelic states i_hap_probs[i, :] = weights[i] * bwd_hap_probs[k, :] i_hap_probs[i, :] += (1 - weights[i]) * fwd_hap_probs[k, :]