Skip to content

Commit

Permalink
Add function to check data to avoid incorrect allele encoding
Browse files Browse the repository at this point in the history
  • Loading branch information
szhan committed Jan 25, 2024
1 parent eb399dd commit 1eda0c5
Showing 1 changed file with 24 additions and 0 deletions.
24 changes: 24 additions & 0 deletions python/tests/beagle_numba.py
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,30 @@ def remap_alleles(a):
return b


def check_data(ref_h, query_h):
"""
For each site, check if the alleles in the query haplotype
are represented in the reference haplotypes.
Missing data (i.e. -1) are ignored.
:param numpy.ndarray ref_h: Reference haplotypes.
:param numpy.ndarray query_h: One query haplotype.
:return: True if alleles in query are in reference.
:rtype: bool
"""
m = ref_h.shape[0]
assert m == len(query_h), "Reference and query differ in length."
for i in range(m):
if query_h[i] == -1:
continue
else:
if query_h[i] not in ref_h[i, :]:
print(f"Allele {query_h[i]} at {i}th position not in reference.")
return False
return True


def convert_to_genetic_map_positions(pos, genetic_map=None):
"""
Convert physical positions (bp) to genetic map positions (cM).
Expand Down

0 comments on commit 1eda0c5

Please sign in to comment.