Skip to content

Commit

Permalink
Further renaming of variables
Browse files Browse the repository at this point in the history
  • Loading branch information
szhan committed Feb 23, 2024
1 parent 9bc2ea1 commit 8555e69
Showing 1 changed file with 17 additions and 17 deletions.
34 changes: 17 additions & 17 deletions python/tests/beagle_numba.py
Original file line number Diff line number Diff line change
Expand Up @@ -304,9 +304,9 @@ def compute_state_prob_matrix(fm, bm):
def get_weights(typed_pos, untyped_pos, typed_cm, untyped_cm):
"""
Get weights for the ungenotyped positions in a query haplotype, which are used in
linear interpolation of HMM state probabilities at the ungenotyped positions.
linear interpolation of hidden state probabilities at the ungenotyped positions.
In BB2016 (see below Equation 1), a weight between genotyped positions m and m + 1
In BB2016 (see below Equation 1), a weight between genotyped positions m and (m + 1)
bounding ungenotyped marker x is denoted lambda_m,x.
lambda_m,x = [g(m + 1) - g(x)] / [g(m + 1) - g(m)],
Expand Down Expand Up @@ -356,7 +356,7 @@ def interpolate_allele_probs(sm, ref_h, typed_pos, untyped_pos, typed_cm, untype
where a is the number of alleles.
Note that this function takes:
1. HMM state probability matrix at genotyped positions of size (m, h).
1. Hidden state probability matrix at genotyped positions of size (m, h).
2. Reference haplotypes subsetted to ungenotyped positions of size (x, h).
:param numpy.ndarray sm: HMM state probability matrix at genotyped positions.
Expand All @@ -365,7 +365,7 @@ def interpolate_allele_probs(sm, ref_h, typed_pos, untyped_pos, typed_cm, untype
:param numpy.ndarray untyped_pos: Physical positions of ungenotyped markers (bp).
:param numpy.ndarray typed_cm: Genetic map positions at genotyped markers (cM).
:param numpy.ndarray untyped_cm: Genetic map positions at ungenotyped markers (cM).
:return: Interpolated allele probabilities.
:return: Imputed allele probabilities.
:rtype: numpy.ndarray
"""
# TODO: Allow for biallelic site matrix. Work with `_tskit.lshmm` properly.
Expand Down Expand Up @@ -403,15 +403,15 @@ def interpolate_allele_probs(sm, ref_h, typed_pos, untyped_pos, typed_cm, untype

def get_map_alleles(allele_probs):
"""
Compute maximum a posteriori alleles at the ungenotyped markers of a query haplotype,
based on posterior marginal allele probabilities.
Compute maximum a posteriori (MAP) alleles at the ungenotyped markers
of a query haplotype, based on posterior marginal allele probabilities.
The imputed alleles is an array of size x.
WARN: If the allele probabilities are equal, then allele 0 is arbitrarily chosen.
TODO: Investigate how often this happens and the effect of this arbitrary choice.
:param numpy.ndarray allele_probs: Interpolated allele probabilities.
:param numpy.ndarray allele_probs: Imputed allele probabilities.
:return: Imputed alleles and their associated probabilities.
:rtype: tuple(numpy.ndarray, numpy.ndarray)
"""
Expand All @@ -436,8 +436,8 @@ def run_interpolation_beagle(
The physical positions of all the markers are an array of size (m + x).
This produces imputed alleles and their interpolated probabilities
at the ungenotyped markers of the query haplotype.
This produces imputed alleles and their probabilities at the ungenotyped markers
of the query haplotype.
The default values of `ne` and `error_rate` are taken from BEAGLE 4.1, not 5.4.
In BEAGLE 5.4, the default value of `ne` is 1e5 and `error_rate` is data-dependent.
Expand All @@ -448,7 +448,7 @@ def run_interpolation_beagle(
:param int ne: Effective population size.
:param float error_rate: Allele error rate.
:param GeneticMap genetic_map: Genetic map.
:return: Imputed alleles and their interpolated probabilities.
:return: Imputed alleles and their probabilities.
:rtype: tuple(numpy.ndarray, numpy.ndarray)
"""
warnings.warn("This function is experimental and not fully tested.", stacklevel=1)
Expand Down Expand Up @@ -482,15 +482,15 @@ def run_interpolation_beagle(
)
sm = compute_state_prob_matrix(fm, bm)
# Perform linear interpolation.
int_allele_probs = interpolate_allele_probs(
imputed_allele_probs = interpolate_allele_probs(
sm=sm,
ref_h=ref_h_untyped,
typed_pos=typed_pos,
untyped_pos=untyped_pos,
typed_cm=typed_cm,
untyped_cm=untyped_cm,
)
imputed_alleles, max_allele_probs = get_map_alleles(int_allele_probs)
imputed_alleles, max_allele_probs = get_map_alleles(imputed_allele_probs)
return (imputed_alleles, max_allele_probs)


Expand All @@ -511,8 +511,8 @@ def run_tsimpute(
The physical positions of all the markers are an array of size (m + x).
This produces imputed alleles and their interpolated probabilities
at the ungenotyped markers of the query haplotype.
This produces imputed alleles and their probabilities at the ungenotyped markers
of the query haplotype.
TODO: Set default precision. What should it be?
TODO: Allow for imputation on user-specified genomic interval.
Expand All @@ -526,7 +526,7 @@ def run_tsimpute(
:param numpy.ndarray mp: Per-site mismatch probabilities.
:param int precision: Precision when running Li & Stephens HMM.
:param GeneticMap genetic_map: Genetic map.
:return: Imputed alleles and their interpolated probabilities.
:return: Imputed alleles and their probabilities.
:rtype: tuple(numpy.ndarray, numpy.ndarray)
"""
warnings.warn(
Expand Down Expand Up @@ -565,15 +565,15 @@ def run_tsimpute(
ls_hmm.backward_matrix(query_h_typed.T, fm.normalisation_factor, bm)
sm = compute_state_prob_matrix(fm.decode(), bm.decode())
# Perform linear interpolation.
int_allele_probs = interpolate_allele_probs(
imputed_allele_probs = interpolate_allele_probs(
sm=sm,
ref_h=ref_h_untyped,
typed_pos=typed_pos,
untyped_pos=untyped_pos,
typed_cm=typed_cm,
untyped_cm=untyped_cm,
)
imputed_alleles, max_allele_probs = get_map_alleles(int_allele_probs)
imputed_alleles, max_allele_probs = get_map_alleles(imputed_allele_probs)
return (imputed_alleles, max_allele_probs)


Expand Down

0 comments on commit 8555e69

Please sign in to comment.