From 6a2035dd481c7d5050e0259b835ff4dd94225ae0 Mon Sep 17 00:00:00 2001 From: Shing Zhan Date: Tue, 20 Feb 2024 16:07:37 +0000 Subject: [PATCH] WIP: Print results to VCF --- python/tests/beagle_numba.py | 108 +++++++++++++++++++++++++++++++++++ 1 file changed, 108 insertions(+) diff --git a/python/tests/beagle_numba.py b/python/tests/beagle_numba.py index ee15681f06..d75a176257 100644 --- a/python/tests/beagle_numba.py +++ b/python/tests/beagle_numba.py @@ -571,3 +571,111 @@ def run_tsimpute( ) imputed_alleles, max_allele_probs = get_map_alleles(int_allele_probs) return (imputed_alleles, max_allele_probs) + + +def compute_alt_allele_frequencies(): + """ + In BEAGLE 4.1, AF: "Estimated ALT Allele Frequencies". + """ + pass + + +def compute_allelic_r_squared(): + """ + In BEAGLE 4.1, AR2: "Allelic R-Squared: estimated squared correlation + between most probable REF dose and true REF dose". + """ + pass + + +def compute_dosage_r_squared(): + """ + In BEAGLE 4.1, DR2: "Dosage R-Squared: estimated squared correlation + between estimated REF dose [P(RA) + 2 * P(RR)] and true REF dose". + """ + pass + + +def compute_genotype_likelihoods(): + """ + In BEAGLE 4.1, GL: "Log10-scaled Genotype Likelihood". + + TODO: Check if BEAGLE 4.1 actually outputs this. + """ + pass + + +def compute_genotype_probs(allele_probs_1, allele_probs_2): + """ + Compute genotype probabilities at ungenotyped positions of an individual. + + Take element-wise products of allele probabilities at each ungenotyped positions. + + In BEAGLE 4.1 output, GP: "Estimated Genotype Probability". + + :param numpy.ndarray allele_probs_1: Imputed allele probabilities for haplotype 1. + :param numpy.ndarray allele_probs_2: Imputed allele probabilities for haplotype 2. + :return: Genotype probabilities. + :rtype: numpy.ndarray(dtype=np.float64) + """ + assert len(allele_probs_1) == len( + allele_probs_2 + ), "Lengths of allele probabilities differ." + genotype_probs = allele_probs_1 * allele_probs_2 + return genotype_probs + + +def compute_dosage_scores( + imputed_alleles_1, allele_probs_1, imputed_alleles_2, allele_probs_2 +): + """ + Compute dosage scores at ungenotyped positions of an individual. + + In BEAGLE 4.1 output, DS: "Estimated ALT dose [P(RA) + P(AA)". + + :param numpy.ndarray imputed_alleles_1: Imputed alleles for haplotype 1. + :param numpy.ndarray allele_probs_1: Imputed allele probabilities for haplotype 1. + :param numpy.ndarray imputed_alleles_2: Imputed alleles for haplotype 2. + :param numpy.ndarray allele_probs_2: Imputed allele probabilities for haplotype 2. + :return: Dosage scores at ungenotyped positions. + :rtype: numpy.ndarray(dtype=np.float64) + """ + assert len(imputed_alleles_1) == len( + imputed_alleles_2 + ), "Lengths of imputed alleles differ." + assert len(allele_probs_1) == len( + allele_probs_2 + ), "Lengths of allele probabilities differ." + x = len(imputed_alleles_1) + dosage_scores = np.zeros(x, dtype=np.float64) + # TODO: Figure out how BEAGLE does this. + return dosage_scores + + +def write_vcf(out_file): + """ + :param str out_file: Path to output VCF file. + :return: None + """ + _HEADER = [ + "##fileformat=VCFv4.2", + "##INFO=', + "##INFO=', + "##INFO=', + '##INFO=', + '##FORMAT=', + "##FORMAT=', + "##FORMAT=', + "##FORMAT=', + ] + with open(out_file, "w") as f: + for line in _HEADER: + f.write(line + "\n")