Skip to content

Commit

Permalink
Rework interpolate_allele_prob to use right indices of bounding inter…
Browse files Browse the repository at this point in the history
…vals
  • Loading branch information
szhan committed Jan 21, 2024
1 parent 32d9591 commit d42c220
Showing 1 changed file with 18 additions and 18 deletions.
36 changes: 18 additions & 18 deletions python/tests/beagle_numba.py
Original file line number Diff line number Diff line change
Expand Up @@ -318,22 +318,22 @@ def _interpolate_allele_prob(sm, ref_h, typed_pos, untyped_pos, typed_cm, untype
@njit
def interpolate_allele_prob(sm, ref_h, typed_pos, untyped_pos, typed_cm, untyped_cm):
"""
Compute interpolated allele probabilities at the ungenotyped markers of
a query haplotype following Equation 1 of BB2016.
Interpolate allele probabilities at the ungenotyped markers of a query haplotype
following Equation 1 of BB2016.
The interpolated allele probability matrix is of size (x, a),
where a is the number of alleles.
Note that this function takes:
1. HMM state probability matrix across genotyped markers of size (m, h).
1. HMM state probability matrix at genotyped markers of size (m, h).
2. Reference haplotypes subsetted to ungenotyped markers of size (x, h).
:param numpy.ndarray sm: HMM state probability matrix at genotyped markers.
:param numpy.ndarray ref_h: Reference haplotypes subsetted to imputed markers.
:param numpy.ndarray typed_pos: Physical positions at genotyped markers.
:param numpy.ndarray untyped_pos: Physical positions at ungenotyped markers.
:param numpy.ndarray typed_cm: Genetic map positions at genotyped markers.
:param numpy.ndarray untyped_cm: Genetic map positions at ungenotyped markers.
:param numpy.ndarray sm: HMM state probability matrix at genotyped positions.
:param numpy.ndarray ref_h: Reference haplotypes subsetted to ungenotyped positions.
:param numpy.ndarray typed_pos: Physical positions at genotyped positions (bp).
:param numpy.ndarray untyped_pos: Physical positions at ungenotyped positions (bp).
:param numpy.ndarray typed_cm: Genetic map positions at genotyped positions (cM).
:param numpy.ndarray untyped_cm: Genetic map positions at ungenotyped positions (cM).
:return: Interpolated allele probabilities.
:rtype: numpy.ndarray
"""
Expand All @@ -342,24 +342,24 @@ def interpolate_allele_prob(sm, ref_h, typed_pos, untyped_pos, typed_cm, untyped
m = sm.shape[0]
x = ref_h.shape[0]
h = ref_h.shape[1]
weights, left_idx = get_weights(typed_pos, untyped_pos, typed_cm, untyped_cm)
p = np.zeros((x, len(alleles)), dtype=np.float64)
weights, right_idx = get_weights(typed_pos, untyped_pos, typed_cm, untyped_cm)
probs = np.zeros((x, len(alleles)), dtype=np.float64)
for i in range(x):
k = left_idx[i]
k = right_idx[i]
w = weights[i]
for j in range(h):
for a in alleles:
if ref_h[i, j] == a:
if k == m - 1:
if k == m:
# FIXME: It's unclear how BEAGLE handles this case.
p[i, a] += sm[k, j]
probs[i, a] += sm[k - 1, j]
else:
p[i, a] += w * sm[k, j]
p[i, a] += (1 - w) * sm[k + 1, j]
probs[i, a] += w * sm[k - 1, j]
probs[i, a] += (1 - w) * sm[k, j]
# Rescale probabilities.
# TODO: Check if this is necessary. Could this be a subtle source of error?
p_rescaled = p / np.sum(p, axis=1)[:, np.newaxis]
return p_rescaled
probs_rescaled = probs / np.sum(probs, axis=1)[:, np.newaxis]
return probs_rescaled


def get_map_alleles(allele_prob):
Expand Down

0 comments on commit d42c220

Please sign in to comment.