From 1eda0c55788e15be6cb05f4ec7e45948baf2057f Mon Sep 17 00:00:00 2001 From: Shing Zhan Date: Thu, 25 Jan 2024 12:12:48 +0000 Subject: [PATCH] Add function to check data to avoid incorrect allele encoding --- python/tests/beagle_numba.py | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/python/tests/beagle_numba.py b/python/tests/beagle_numba.py index 07e38b60ab..e364f669c8 100644 --- a/python/tests/beagle_numba.py +++ b/python/tests/beagle_numba.py @@ -87,6 +87,30 @@ def remap_alleles(a): return b +def check_data(ref_h, query_h): + """ + For each site, check if the alleles in the query haplotype + are represented in the reference haplotypes. + + Missing data (i.e. -1) are ignored. + + :param numpy.ndarray ref_h: Reference haplotypes. + :param numpy.ndarray query_h: One query haplotype. + :return: True if alleles in query are in reference. + :rtype: bool + """ + m = ref_h.shape[0] + assert m == len(query_h), "Reference and query differ in length." + for i in range(m): + if query_h[i] == -1: + continue + else: + if query_h[i] not in ref_h[i, :]: + print(f"Allele {query_h[i]} at {i}th position not in reference.") + return False + return True + + def convert_to_genetic_map_positions(pos, genetic_map=None): """ Convert physical positions (bp) to genetic map positions (cM).