diff --git a/python/tests/beagle_numba.py b/python/tests/beagle_numba.py index 71a6f136a8..9ae14fce0e 100644 --- a/python/tests/beagle_numba.py +++ b/python/tests/beagle_numba.py @@ -304,9 +304,9 @@ def compute_state_prob_matrix(fm, bm): def get_weights(typed_pos, untyped_pos, typed_cm, untyped_cm): """ Get weights for the ungenotyped positions in a query haplotype, which are used in - linear interpolation of HMM state probabilities at the ungenotyped positions. + linear interpolation of hidden state probabilities at the ungenotyped positions. - In BB2016 (see below Equation 1), a weight between genotyped positions m and m + 1 + In BB2016 (see below Equation 1), a weight between genotyped positions m and (m + 1) bounding ungenotyped marker x is denoted lambda_m,x. lambda_m,x = [g(m + 1) - g(x)] / [g(m + 1) - g(m)], @@ -356,7 +356,7 @@ def interpolate_allele_probs(sm, ref_h, typed_pos, untyped_pos, typed_cm, untype where a is the number of alleles. Note that this function takes: - 1. HMM state probability matrix at genotyped positions of size (m, h). + 1. Hidden state probability matrix at genotyped positions of size (m, h). 2. Reference haplotypes subsetted to ungenotyped positions of size (x, h). :param numpy.ndarray sm: HMM state probability matrix at genotyped positions. @@ -365,7 +365,7 @@ def interpolate_allele_probs(sm, ref_h, typed_pos, untyped_pos, typed_cm, untype :param numpy.ndarray untyped_pos: Physical positions of ungenotyped markers (bp). :param numpy.ndarray typed_cm: Genetic map positions at genotyped markers (cM). :param numpy.ndarray untyped_cm: Genetic map positions at ungenotyped markers (cM). - :return: Interpolated allele probabilities. + :return: Imputed allele probabilities. :rtype: numpy.ndarray """ # TODO: Allow for biallelic site matrix. Work with `_tskit.lshmm` properly. @@ -403,15 +403,15 @@ def interpolate_allele_probs(sm, ref_h, typed_pos, untyped_pos, typed_cm, untype def get_map_alleles(allele_probs): """ - Compute maximum a posteriori alleles at the ungenotyped markers of a query haplotype, - based on posterior marginal allele probabilities. + Compute maximum a posteriori (MAP) alleles at the ungenotyped markers + of a query haplotype, based on posterior marginal allele probabilities. The imputed alleles is an array of size x. WARN: If the allele probabilities are equal, then allele 0 is arbitrarily chosen. TODO: Investigate how often this happens and the effect of this arbitrary choice. - :param numpy.ndarray allele_probs: Interpolated allele probabilities. + :param numpy.ndarray allele_probs: Imputed allele probabilities. :return: Imputed alleles and their associated probabilities. :rtype: tuple(numpy.ndarray, numpy.ndarray) """ @@ -436,8 +436,8 @@ def run_interpolation_beagle( The physical positions of all the markers are an array of size (m + x). - This produces imputed alleles and their interpolated probabilities - at the ungenotyped markers of the query haplotype. + This produces imputed alleles and their probabilities at the ungenotyped markers + of the query haplotype. The default values of `ne` and `error_rate` are taken from BEAGLE 4.1, not 5.4. In BEAGLE 5.4, the default value of `ne` is 1e5 and `error_rate` is data-dependent. @@ -448,7 +448,7 @@ def run_interpolation_beagle( :param int ne: Effective population size. :param float error_rate: Allele error rate. :param GeneticMap genetic_map: Genetic map. - :return: Imputed alleles and their interpolated probabilities. + :return: Imputed alleles and their probabilities. :rtype: tuple(numpy.ndarray, numpy.ndarray) """ warnings.warn("This function is experimental and not fully tested.", stacklevel=1) @@ -482,7 +482,7 @@ def run_interpolation_beagle( ) sm = compute_state_prob_matrix(fm, bm) # Perform linear interpolation. - int_allele_probs = interpolate_allele_probs( + imputed_allele_probs = interpolate_allele_probs( sm=sm, ref_h=ref_h_untyped, typed_pos=typed_pos, @@ -490,7 +490,7 @@ def run_interpolation_beagle( typed_cm=typed_cm, untyped_cm=untyped_cm, ) - imputed_alleles, max_allele_probs = get_map_alleles(int_allele_probs) + imputed_alleles, max_allele_probs = get_map_alleles(imputed_allele_probs) return (imputed_alleles, max_allele_probs) @@ -511,8 +511,8 @@ def run_tsimpute( The physical positions of all the markers are an array of size (m + x). - This produces imputed alleles and their interpolated probabilities - at the ungenotyped markers of the query haplotype. + This produces imputed alleles and their probabilities at the ungenotyped markers + of the query haplotype. TODO: Set default precision. What should it be? TODO: Allow for imputation on user-specified genomic interval. @@ -526,7 +526,7 @@ def run_tsimpute( :param numpy.ndarray mp: Per-site mismatch probabilities. :param int precision: Precision when running Li & Stephens HMM. :param GeneticMap genetic_map: Genetic map. - :return: Imputed alleles and their interpolated probabilities. + :return: Imputed alleles and their probabilities. :rtype: tuple(numpy.ndarray, numpy.ndarray) """ warnings.warn( @@ -565,7 +565,7 @@ def run_tsimpute( ls_hmm.backward_matrix(query_h_typed.T, fm.normalisation_factor, bm) sm = compute_state_prob_matrix(fm.decode(), bm.decode()) # Perform linear interpolation. - int_allele_probs = interpolate_allele_probs( + imputed_allele_probs = interpolate_allele_probs( sm=sm, ref_h=ref_h_untyped, typed_pos=typed_pos, @@ -573,7 +573,7 @@ def run_tsimpute( typed_cm=typed_cm, untyped_cm=untyped_cm, ) - imputed_alleles, max_allele_probs = get_map_alleles(int_allele_probs) + imputed_alleles, max_allele_probs = get_map_alleles(imputed_allele_probs) return (imputed_alleles, max_allele_probs)