Skip to content

Commit

Permalink
Db status check (#124)
Browse files Browse the repository at this point in the history
* Add `pyard-status` command to check the status of all tables in the databases. This will help to see if there are missing tables and also to compare number of data between versions.

```
-------------------------------------------
IMGT DB Version: 3450
-------------------------------------------
|Table Name          |Rows                |
|-----------------------------------------|
|dup_g               |                  50|
|dup_lg              |                   2|
|dup_lgx             |                   2|
|g_group             |               10841|
|lg_group            |               10841|
|lgx_group           |               10841|
|exon_group          |                9724|
|p_group             |                9724|
|alleles             |               33525|
|xx_codes            |                1690|
|who_alleles         |               31552|
|who_group           |               31930|
-------------------------------------------
```

The missing tables are noted and the database can be rebuilt with `pyard-import --re-install`

-------------------------------------------
IMGT DB Version: 3290
-------------------------------------------
|Table Name          |Rows                |
|-----------------------------------------|
|dup_g               |                  17|
|dup_lg              |                   0|
|dup_lgx             |                   0|
|g_group             |                2786|
|lg_group            |                2786|
|lgx_group           |                2786|
MISSING: exon_group table
MISSING: p_group table
|alleles             |               18451|
|xx_codes            |                 946|
MISSING: who_alleles table
MISSING: who_group table
-------------------------------------------
```

* Bump version: 0.6.8 → 0.6.9
  • Loading branch information
pbashyal-nmdp authored Sep 27, 2021
1 parent 16d18e6 commit 49956e8
Show file tree
Hide file tree
Showing 6 changed files with 104 additions and 3 deletions.
4 changes: 4 additions & 0 deletions README.rst
Original file line number Diff line number Diff line change
Expand Up @@ -135,3 +135,7 @@ Command Line Tools
$ pyard -v 3290 --gl 'A1' -r lgx
A*01:01/A*01:02/A*01:03/A*01:06/A*01:07/A*01:08/A*01:09/A*01:10/A*01:12/ ...
# Show the status of all py-ard databases
$ pyard-status
2 changes: 1 addition & 1 deletion pyard/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,4 +24,4 @@
from .pyard import ARD

__author__ = """NMDP Bioinformatics"""
__version__ = '0.6.8'
__version__ = '0.6.9'
15 changes: 15 additions & 0 deletions pyard/db.py
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,21 @@ def tables_exist(connection: sqlite3.Connection, table_names: List[str]):
return all([table_exists(connection, table_name) for table_name in table_names])


def count_rows(connection: sqlite3.Connection, table_name: str) -> int:
"""
Count number of rows in the table.
:param connection: db connection of type sqlite.Connection
:param table_name: table in the sqlite db
:return: bool indicating whether table_name exists as a table
"""
query = f"SELECT count(*) from '{table_name}'"
cursor = connection.execute(query)
result = cursor.fetchone()
cursor.close()
return result[0]


def mac_code_to_alleles(connection: sqlite3.Connection, code: str) -> List[str]:
"""
Look up the MAC code in the database and return corresponding list
Expand Down
81 changes: 81 additions & 0 deletions scripts/pyard-status
Original file line number Diff line number Diff line change
@@ -0,0 +1,81 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
#
# py-ard
# Copyright (c) 2020 Be The Match operated by National Marrow Donor Program. All Rights Reserved.
#
# This library is free software; you can redistribute it and/or modify it
# under the terms of the GNU Lesser General Public License as published
# by the Free Software Foundation; either version 3 of the License, or (at
# your option) any later version.
#
# This library is distributed in the hope that it will be useful, but WITHOUT
# ANY WARRANTY; with out even the implied warranty of MERCHANTABILITY or
# FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
# License for more details.
#
# You should have received a copy of the GNU Lesser General Public License
# along with this library; if not, write to the Free Software Foundation,
# Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA.
#
# > http://www.fsf.org/licensing/licenses/lgpl.html
# > http://www.opensource.org/licenses/lgpl-license.php
#
import argparse
import os
import pathlib
import re

from pyard import db, data_repository


def get_data_dir(data_dir):
if data_dir:
path = pathlib.Path(data_dir)
if not path.exists() or not path.is_dir():
raise RuntimeError(f"{data_dir} is not a valid directory")
data_dir = path
else:
data_dir = db.get_pyard_db_install_directory()
return data_dir


if __name__ == '__main__':
parser = argparse.ArgumentParser(
usage="""
[--data-dir <directory for db file>]\n
""",
description="""
py-ard tool to provide a status report for reference SQLite databases.
"""
)
parser.add_argument(
"--data-dir",
dest="data_dir"
)
args = parser.parse_args()
data_dir = get_data_dir(args.data_dir)
# print(data_dir)

imgt_regex = re.compile(r'pyard-(.+)\.sqlite3')
for _, _, filenames in os.walk(data_dir):
for filename in filenames:
# Get imgt version from the filename
# eg: get 3440 from 'pyard-3440.sqlite3'
match = imgt_regex.match(filename)
imgt_version = match.group(1) # Get first group
db_connection = db.create_db_connection(data_dir, imgt_version)
print('-' * 43)
print(f"IMGT DB Version: {imgt_version}")
print('-' * 43)
print(f"|{'Table Name':20}|{'Rows':20}|")
print(f"|{'-' * 41}|")
for table in data_repository.ars_mapping_tables + \
data_repository.code_mapping_tables:
if db.table_exists(db_connection, table):
total_rows = db.count_rows(db_connection, table)
print(f"|{table:20}|{total_rows:20}|")
else:
print(f"MISSING: {table} table")
print('-' * 43)
db_connection.close()
2 changes: 1 addition & 1 deletion setup.cfg
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
[bumpversion]
current_version = 0.6.8
current_version = 0.6.9
commit = True
tag = True

Expand Down
3 changes: 2 additions & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@

setup(
name='py-ard',
version='0.6.8',
version='0.6.9',
description="ARD reduction for HLA with Python",
long_description=readme + '\n\n' + history,
author="CIBMTR",
Expand All @@ -55,6 +55,7 @@
scripts=[
'scripts/pyard',
'scripts/pyard-import',
'scripts/pyard-status',
'scripts/pyard-reduce-csv'
],
install_requires=requirements,
Expand Down

0 comments on commit 49956e8

Please sign in to comment.