From 9b66c9bef2989e72ecd25a4e772ee25957cf6d62 Mon Sep 17 00:00:00 2001 From: Shing Zhan Date: Sun, 25 Feb 2024 12:34:15 +0000 Subject: [PATCH] WIP: allelic Rsqured --- python/tests/beagle_numba.py | 28 +++++++++++++++++++++++++--- 1 file changed, 25 insertions(+), 3 deletions(-) diff --git a/python/tests/beagle_numba.py b/python/tests/beagle_numba.py index d25467adf0..dabc1ef2c8 100644 --- a/python/tests/beagle_numba.py +++ b/python/tests/beagle_numba.py @@ -584,12 +584,34 @@ def compute_alt_allele_frequencies(): pass -def compute_allelic_r_squared(): +def compute_estimated_allelic_r_squared( + alleles_1, allele_probs_1, alleles_2, allele_probs_2 +): """ - In BEAGLE 4.1, AR2: "Allelic R-Squared: estimated squared correlation + Compute the estimated allelic R^2 for a given imputed site. + + It is not the true allelic R^2, which needs access to true genotypes to compute. + It has shown that the true and estimated allelic R-squared are highly correlated. + + In BEAGLE 4.1, it is AR2: "Allelic R-Squared: estimated squared correlation between most probable REF dose and true REF dose". + + See formulation in the Appendix 1 of Browning and Browning. (2009). + Am J Hum Genet. 84(2): 210–223. doi: 10.1016/j.ajhg.2009.01.005. + + :param numpy.ndarray alleles_1: Imputed alleles for haplotype 1. + :param numpy.ndarray allele_probs_1: Imputed allele probabilities for haplotype 1. + :param numpy.ndarray alleles_2: Imputed alleles for haplotype 2. + :param numpy.ndarray allele_probs_2: Imputed allele probabilities for haplotype 2. + :return: Estimated allelic R-squared. + :rtype: float """ - pass + assert len(alleles_1) == len(alleles_2), "Lengths of imputed alleles differ." + assert len(allele_probs_1) == len( + allele_probs_2 + ), "Lengths of allele probabilities differ." + est_allelic_rsq = 0 + return est_allelic_rsq def compute_dosage_r_squared():