Skip to content

Commit

Permalink
Remove asserts
Browse files Browse the repository at this point in the history
  • Loading branch information
szhan committed Sep 13, 2023
1 parent eb7e20a commit 01afc5e
Showing 1 changed file with 0 additions and 58 deletions.
58 changes: 0 additions & 58 deletions python/tests/beagle_numba.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,6 @@ def convert_to_genetic_map_position(pos):
:return: Genetic map positions.
:rtype: numpy.ndarray
"""
assert 0 < np.min(pos), "Site positions are not 1-based."
return pos / 1e6


Expand Down Expand Up @@ -55,9 +54,6 @@ def get_weights(genotyped_pos, imputed_pos):
marker_interval_start = np.repeat(-2, x, dtype=np.int64)
idx_m = m - 1
for idx_x in np.arange(x - 1, -1, -1):
assert (
imputed_cm[idx_x] != genotyped_cm[idx_m]
), "Genotyped markers and imputed markers overlap."
while imputed_cm[idx_x] < genotyped_cm[idx_m] and idx_m > -1:
idx_m = max(idx_m - 1, -1)
if idx_m == m - 1:
Expand All @@ -74,9 +70,6 @@ def get_weights(genotyped_pos, imputed_pos):
cm_mP1_m = cm_mP1_m if cm_mP1_m > 0.005 else 0.005
weights[idx_x] = cm_mP1_x / cm_mP1_m
marker_interval_start[idx_x] = idx_m
assert 0 <= np.min(weights), "Some weights are negative."
assert np.max(weights) <= 1, "Some weights are greater than 1."
assert np.min(marker_interval_start) > -2, "Some marker indices are out of bounds."
return (weights, marker_interval_start)


Expand Down Expand Up @@ -115,7 +108,6 @@ def get_switch_prob(pos, h, ne):
:return: Switch probabilities.
:rtype: numpy.ndarray
"""
assert 0 < np.min(pos), "Site positions are not 1-based."
# Get genetic distances between adjacent markers.
cm = convert_to_genetic_map_position(pos)
d = np.zeros(len(pos), dtype=np.float64)
Expand Down Expand Up @@ -148,12 +140,6 @@ def compute_forward_probability_matrix(ref_h, query_h, rho, mu):
"""
m = ref_h.shape[0]
h = ref_h.shape[1]
assert m == len(query_h)
assert m == len(rho)
assert m == len(mu)
assert 0 <= np.min(rho) and np.max(rho) <= 1
# BEAGLE caps mismatch probabilities at 0.5.
assert 0 <= np.min(mu) and np.max(mu) <= 0.5
fm = np.zeros((m, h), dtype=np.float64)
last_sum = 1.0 # Part of normalization factor
for i in np.arange(m):
Expand Down Expand Up @@ -202,11 +188,6 @@ def compute_backward_probability_matrix(ref_h, query_h, rho, mu):
"""
m = ref_h.shape[0]
h = ref_h.shape[1]
assert m == len(query_h)
assert m == len(rho)
assert m == len(mu)
assert 0 <= np.min(rho) and np.max(rho) <= 1
assert 0 <= np.min(mu) and np.max(mu) <= 0.5
bm = np.zeros((m, h), dtype=np.float64)
bm[-1, :] = 1.0 / h # Initialise the last column
for i in np.arange(m - 2, -1, -1):
Expand Down Expand Up @@ -252,9 +233,6 @@ def compute_state_probability_matrix(fm, bm, ref_h, query_h):
"""
m = ref_h.shape[0]
h = ref_h.shape[1]
assert m == len(query_h)
assert (m, h) == fm.shape
assert (m, h) == bm.shape
# m + 1 columns to allow for imputed markers right of the last genotyped marker.
sm = np.zeros((m + 1, h), dtype=np.float64)
sm[:-1, :] = np.multiply(fm, bm)
Expand Down Expand Up @@ -284,14 +262,9 @@ def interpolate_allele_probabilities(sm, ref_h, genotyped_pos, imputed_pos):
:return: Interpolated allele probabilities.
:rtype: numpy.ndarray
"""
m = sm.shape[0] - 1
h = sm.shape[1]
alleles = [0, 1] # Assume all biallelic sites
assert m == len(genotyped_pos)
x = len(imputed_pos)
assert (x, h) == ref_h.shape
weights, marker_interval_start = get_weights(genotyped_pos, imputed_pos)
assert x == len(weights) == len(marker_interval_start)
p = np.zeros((x, len(alleles)), dtype=np.float64)
for a in alleles:
for i in np.arange(x):
Expand Down Expand Up @@ -351,56 +324,25 @@ def run_beagle(ref_h, query_h, pos, miscall_rate=0.0001, ne=1e6):
:return: Imputed alleles and interpolated allele probabilities.
:rtype: list(numpy.ndarray, numpy.ndarray)
"""
assert ref_h.shape[0] == len(pos)
assert len(query_h) == len(pos)
# Indices of genotyped markers in the query haplotype
genotyped_pos_idx = np.where(query_h != -1)[0]
assert len(genotyped_pos_idx) > 1
# Indices of imputed markers in the query haplotype
imputed_pos_idx = np.where(query_h == -1)[0]
assert len(imputed_pos_idx) > 0
# Site positions of genotyped markers
genotyped_pos = pos[genotyped_pos_idx]
# Site positions of imputed markers
imputed_pos = pos[imputed_pos_idx]
h = ref_h.shape[1]
m = len(genotyped_pos)
x = len(imputed_pos)
assert m + x == len(pos)
# Subset the reference haplotypes to genotyped markers
ref_h_genotyped = ref_h[genotyped_pos_idx, :]
assert (m, h) == ref_h_genotyped.shape
# Subset the query haplotype to genotyped markers
query_h_genotyped = query_h[genotyped_pos_idx]
assert m == len(query_h_genotyped)
# Set mismatch probabilities at genotyped markers
mu = get_mismatch_prob(genotyped_pos, miscall_rate=miscall_rate)
assert m == len(mu)
# Set switch probabilities at genotyped markers
rho = get_switch_prob(genotyped_pos, h, ne=ne)
assert m == len(rho)
# Compute forward probability matrix at genotyped markers
fm = compute_forward_probability_matrix(ref_h_genotyped, query_h_genotyped, rho, mu)
assert (m, h) == fm.shape
# Compute backward probability matrix at genotyped markers
bm = compute_backward_probability_matrix(
ref_h_genotyped, query_h_genotyped, rho, mu
)
assert (m, h) == bm.shape
# Compute HMM state probability matrix at genotyped markers
sm = compute_state_probability_matrix(fm, bm, ref_h_genotyped, query_h_genotyped)
assert (m + 1, h) == sm.shape
# Subset the reference haplotypes to imputed markers
ref_h_imputed = ref_h[imputed_pos_idx, :]
# Interpolate allele probabilities at imputed markers
i_allele_probs = interpolate_allele_probabilities(
sm, ref_h_imputed, genotyped_pos, imputed_pos
)
# TODO: Allow for multiallelic sites.
assert (x, 2) == i_allele_probs.shape
# Get MAP alleles at imputed markers
imputed_alleles = get_map_alleles(i_allele_probs)
assert x == len(imputed_alleles)
return (imputed_alleles, i_allele_probs)


Expand Down

0 comments on commit 01afc5e

Please sign in to comment.