diff --git a/python/tests/beagle_numba.py b/python/tests/beagle_numba.py index f232014a5a..ac88c8b119 100644 --- a/python/tests/beagle_numba.py +++ b/python/tests/beagle_numba.py @@ -22,7 +22,6 @@ def convert_to_genetic_map_position(pos): :return: Genetic map positions. :rtype: numpy.ndarray """ - assert 0 < np.min(pos), "Site positions are not 1-based." return pos / 1e6 @@ -55,9 +54,6 @@ def get_weights(genotyped_pos, imputed_pos): marker_interval_start = np.repeat(-2, x, dtype=np.int64) idx_m = m - 1 for idx_x in np.arange(x - 1, -1, -1): - assert ( - imputed_cm[idx_x] != genotyped_cm[idx_m] - ), "Genotyped markers and imputed markers overlap." while imputed_cm[idx_x] < genotyped_cm[idx_m] and idx_m > -1: idx_m = max(idx_m - 1, -1) if idx_m == m - 1: @@ -74,9 +70,6 @@ def get_weights(genotyped_pos, imputed_pos): cm_mP1_m = cm_mP1_m if cm_mP1_m > 0.005 else 0.005 weights[idx_x] = cm_mP1_x / cm_mP1_m marker_interval_start[idx_x] = idx_m - assert 0 <= np.min(weights), "Some weights are negative." - assert np.max(weights) <= 1, "Some weights are greater than 1." - assert np.min(marker_interval_start) > -2, "Some marker indices are out of bounds." return (weights, marker_interval_start) @@ -115,7 +108,6 @@ def get_switch_prob(pos, h, ne): :return: Switch probabilities. :rtype: numpy.ndarray """ - assert 0 < np.min(pos), "Site positions are not 1-based." # Get genetic distances between adjacent markers. cm = convert_to_genetic_map_position(pos) d = np.zeros(len(pos), dtype=np.float64) @@ -148,12 +140,6 @@ def compute_forward_probability_matrix(ref_h, query_h, rho, mu): """ m = ref_h.shape[0] h = ref_h.shape[1] - assert m == len(query_h) - assert m == len(rho) - assert m == len(mu) - assert 0 <= np.min(rho) and np.max(rho) <= 1 - # BEAGLE caps mismatch probabilities at 0.5. - assert 0 <= np.min(mu) and np.max(mu) <= 0.5 fm = np.zeros((m, h), dtype=np.float64) last_sum = 1.0 # Part of normalization factor for i in np.arange(m): @@ -202,11 +188,6 @@ def compute_backward_probability_matrix(ref_h, query_h, rho, mu): """ m = ref_h.shape[0] h = ref_h.shape[1] - assert m == len(query_h) - assert m == len(rho) - assert m == len(mu) - assert 0 <= np.min(rho) and np.max(rho) <= 1 - assert 0 <= np.min(mu) and np.max(mu) <= 0.5 bm = np.zeros((m, h), dtype=np.float64) bm[-1, :] = 1.0 / h # Initialise the last column for i in np.arange(m - 2, -1, -1): @@ -252,9 +233,6 @@ def compute_state_probability_matrix(fm, bm, ref_h, query_h): """ m = ref_h.shape[0] h = ref_h.shape[1] - assert m == len(query_h) - assert (m, h) == fm.shape - assert (m, h) == bm.shape # m + 1 columns to allow for imputed markers right of the last genotyped marker. sm = np.zeros((m + 1, h), dtype=np.float64) sm[:-1, :] = np.multiply(fm, bm) @@ -284,14 +262,9 @@ def interpolate_allele_probabilities(sm, ref_h, genotyped_pos, imputed_pos): :return: Interpolated allele probabilities. :rtype: numpy.ndarray """ - m = sm.shape[0] - 1 - h = sm.shape[1] alleles = [0, 1] # Assume all biallelic sites - assert m == len(genotyped_pos) x = len(imputed_pos) - assert (x, h) == ref_h.shape weights, marker_interval_start = get_weights(genotyped_pos, imputed_pos) - assert x == len(weights) == len(marker_interval_start) p = np.zeros((x, len(alleles)), dtype=np.float64) for a in alleles: for i in np.arange(x): @@ -351,56 +324,25 @@ def run_beagle(ref_h, query_h, pos, miscall_rate=0.0001, ne=1e6): :return: Imputed alleles and interpolated allele probabilities. :rtype: list(numpy.ndarray, numpy.ndarray) """ - assert ref_h.shape[0] == len(pos) - assert len(query_h) == len(pos) - # Indices of genotyped markers in the query haplotype genotyped_pos_idx = np.where(query_h != -1)[0] - assert len(genotyped_pos_idx) > 1 - # Indices of imputed markers in the query haplotype imputed_pos_idx = np.where(query_h == -1)[0] - assert len(imputed_pos_idx) > 0 - # Site positions of genotyped markers genotyped_pos = pos[genotyped_pos_idx] - # Site positions of imputed markers imputed_pos = pos[imputed_pos_idx] h = ref_h.shape[1] - m = len(genotyped_pos) - x = len(imputed_pos) - assert m + x == len(pos) - # Subset the reference haplotypes to genotyped markers ref_h_genotyped = ref_h[genotyped_pos_idx, :] - assert (m, h) == ref_h_genotyped.shape - # Subset the query haplotype to genotyped markers query_h_genotyped = query_h[genotyped_pos_idx] - assert m == len(query_h_genotyped) - # Set mismatch probabilities at genotyped markers mu = get_mismatch_prob(genotyped_pos, miscall_rate=miscall_rate) - assert m == len(mu) - # Set switch probabilities at genotyped markers rho = get_switch_prob(genotyped_pos, h, ne=ne) - assert m == len(rho) - # Compute forward probability matrix at genotyped markers fm = compute_forward_probability_matrix(ref_h_genotyped, query_h_genotyped, rho, mu) - assert (m, h) == fm.shape - # Compute backward probability matrix at genotyped markers bm = compute_backward_probability_matrix( ref_h_genotyped, query_h_genotyped, rho, mu ) - assert (m, h) == bm.shape - # Compute HMM state probability matrix at genotyped markers sm = compute_state_probability_matrix(fm, bm, ref_h_genotyped, query_h_genotyped) - assert (m + 1, h) == sm.shape - # Subset the reference haplotypes to imputed markers ref_h_imputed = ref_h[imputed_pos_idx, :] - # Interpolate allele probabilities at imputed markers i_allele_probs = interpolate_allele_probabilities( sm, ref_h_imputed, genotyped_pos, imputed_pos ) - # TODO: Allow for multiallelic sites. - assert (x, 2) == i_allele_probs.shape - # Get MAP alleles at imputed markers imputed_alleles = get_map_alleles(i_allele_probs) - assert x == len(imputed_alleles) return (imputed_alleles, i_allele_probs)