Skip to content

Commit

Permalink
Merge pull request #153 from pbashyal-nmdp/feature/summary_failure_table
Browse files Browse the repository at this point in the history
Show summary failure table in batch mode
  • Loading branch information
mmaiers-nmdp authored Mar 23, 2022
2 parents a93e960 + a134df5 commit dce480b
Show file tree
Hide file tree
Showing 5 changed files with 37 additions and 8 deletions.
3 changes: 2 additions & 1 deletion pyard/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,4 +24,5 @@
from .pyard import ARD

__author__ = """NMDP Bioinformatics"""
__version__ = '0.7.3'
__version__ = '0.7.4'

20 changes: 20 additions & 0 deletions pyard/db.py
Original file line number Diff line number Diff line change
Expand Up @@ -138,6 +138,7 @@ def is_valid_mac_code(connection: sqlite3.Connection, code: str) -> bool:
cursor.close()
return result[0] > 0


def serology_to_alleles(connection: sqlite3.Connection, serology: str) -> List[str]:
"""
Look up Serology in the database and return corresponding list of alleles.
Expand All @@ -156,6 +157,7 @@ def serology_to_alleles(connection: sqlite3.Connection, serology: str) -> List[s
alleles = []
return alleles


def is_valid_serology(connection: sqlite3.Connection, serology: str) -> bool:
"""
Check db if the serology exists
Expand Down Expand Up @@ -287,3 +289,21 @@ def load_dict(connection: sqlite3.Connection, table_name: str, columns: Tuple[st
table_as_dict = {k: v for k, v in cursor.fetchall()}
cursor.close()
return table_as_dict


def similar_alleles(connection: sqlite3.Connection, allele_name: str) -> Set[str]:
"""
Find similar alleles starting with the provided allele_name.
:param connection: db connection of type sqlite.Connection
:param allele_name: Allele name to use as a prefix to find similar alleles
:return: list of similar alleles
"""
query = f"SELECT allele FROM alleles WHERE allele LIKE ?"
cursor = connection.execute(query, (f"{allele_name}%",))
result = cursor.fetchall()
# fetchall() returns a list of tuples of results
# e.g. [('C*04:09N',)]
# Get out the first value of the tuple from the result list
alleles = set(map(lambda t: t[0], result))
return alleles
18 changes: 13 additions & 5 deletions scripts/pyard-reduce-csv
Original file line number Diff line number Diff line change
Expand Up @@ -125,7 +125,7 @@ def reduce(allele, locus, column_name):
print(e)
message = f"Failed reducing '{locus_allele}' in column {column_name}"
print(message)
failure_summary_messages.append(message)
failed_to_reduce_alleles.append((column_name, locus_allele))
return allele
# print(f"reduced to '{reduced_allele}'")
if reduced_allele:
Expand Down Expand Up @@ -210,7 +210,7 @@ if __name__ == '__main__':
header=0, dtype=str,
keep_default_na=False)

failure_summary_messages = []
failed_to_reduce_alleles = []
# Reduce each of the specified columns
for column in ard_config["columns_to_reduce_in_csv"]:
if verbose:
Expand Down Expand Up @@ -245,12 +245,20 @@ if __name__ == '__main__':
out_file_name = f"{ard_config['out_csv_filename'] + '.gz' if ard_config['apply_compression'] else ''}"
df.to_csv(out_file_name, index=False, compression=ard_config["apply_compression"])

if len(failure_summary_messages) == 0:
if len(failed_to_reduce_alleles) == 0:
print("No Errors", file=sys.stderr)
else:
print("Summary", file=sys.stderr)
print("-------", file=sys.stderr)
for message in failure_summary_messages:
print("\t", message, file=sys.stderr)
print(f"{len(failed_to_reduce_alleles)} alleles failed to reduce.", file=sys.stderr)
print("| Column Name | Allele | Did you mean ? ", file=sys.stderr)
print("| --------------- | ---------------- | ------------------------- ", file=sys.stderr)
for column_name, locus_allele in failed_to_reduce_alleles:
similar_allele_names = pyard.db.similar_alleles(ard.db_connection, locus_allele)
if similar_allele_names:
similar_allele_names = ",".join(sorted(similar_allele_names, reverse=True))
else:
similar_allele_names = 'NA'
print(f"| {column_name:15} | {locus_allele:16} | {similar_allele_names} ", file=sys.stderr)
# Done
print(f"Saved result to file:{out_file_name}")
2 changes: 1 addition & 1 deletion setup.cfg
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
[bumpversion]
current_version = 0.7.3
current_version = 0.7.4
commit = True
tag = True

Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@

setup(
name='py-ard',
version='0.7.3',
version='0.7.4',
description="ARD reduction for HLA with Python",
long_description=readme + '\n\n' + history,
long_description_content_type="text/markdown",
Expand Down

0 comments on commit dce480b

Please sign in to comment.