Skip to content

Commit

Permalink
Merge pull request #3 from dorbarker/logging
Browse files Browse the repository at this point in the history
Logging
  • Loading branch information
dorbarker authored Nov 27, 2019
2 parents e7d9a16 + dcf8470 commit 56538ff
Show file tree
Hide file tree
Showing 4 changed files with 40 additions and 10 deletions.
2 changes: 1 addition & 1 deletion fsac/__init__.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
__version__ = '1.0.4'
__version__ = '1.1.0'
__author__ = 'Dillon Barker'
__email__ = '[email protected]'
6 changes: 5 additions & 1 deletion fsac/allele_call.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import json
import logging
import subprocess
import io
import re
Expand All @@ -16,6 +17,8 @@ def allele_call(genome: Path, genes: Path, output: Path) -> None:

def blast(query_path: Path, genome_path: Path) -> pd.DataFrame:

logging.info('BLASTing locus %s in subject %s', query_path, genome_path)

out_format = '6 qseqid sseqid pident length qstart qend sstart send qlen \
slen bitscore gaps sseq qseq mismatch'

Expand Down Expand Up @@ -81,7 +84,6 @@ def filter_result(blast_output: pd.DataFrame) -> Optional[pd.Series]:

# Perfect match
elif any(blast_output['correct']):

best = blast_output[blast_output['correct']]

# Highest bitscore
Expand Down Expand Up @@ -175,3 +177,5 @@ def unpack_value(value):

with json_out.open('w') as j:
json.dump(results, j, indent=4)
logging.info('Wrote results to %s', json_out)

25 changes: 19 additions & 6 deletions fsac/main.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
import argparse
import logging
import os
import sys
from pathlib import Path

Expand All @@ -7,14 +9,23 @@
from .update import update_directory
from .tabulate import tabulate_calls

# Ensure numpy, via pandas, doesn't use more than 1 thread.
# If numpy uses multiple threads, it brings no performance benefit in this
# case, but can cause problems if you're running lots of jobs
# on a single HPC node
for env_var in ('OPENBLAS_NUM_THREADS',
'OMP_NUM_THREADS',
'MKL_NUM_THREADS',
'NUMEXPR_NUM_THREADS'):
os.environ[env_var] = '1'

def arguments():

parser = argparse.ArgumentParser()

parser.add_argument('-v', '--version',
action='store_true',
help='Print version and exit')
action='version',
version=f'{parser.prog} {__version__}')

parser.set_defaults(func=None)
subparsers = parser.add_subparsers(title='Commands')
Expand Down Expand Up @@ -91,10 +102,6 @@ def arguments():

args = parser.parse_args()

if args.version:
print('fsac', __version__)
sys.exit(0)

if args.func is None:
parser.print_help()
sys.exit(0)
Expand All @@ -106,6 +113,12 @@ def main():

args = arguments()

logging.basicConfig(datefmt='%Y-%m-%d %H:%M',
format='%(asctime)s - %(levelname)s: %(message)s',
stream=sys.stderr,
level=logging.INFO)

logging.info('Running %s', args.func.__name__)
args.func(args)


Expand Down
17 changes: 15 additions & 2 deletions fsac/update.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
from pathlib import Path
from contextlib import suppress
import json
import logging

SeqAllele = Tuple[Optional[str], Optional[str]]
LocusData = Union[str, int, float, bool]
Expand Down Expand Up @@ -67,20 +68,25 @@ def extend_hit(gene, threshold: int, genome_path: Path):
of the alignment causes the alignment to not be extended.
"""

logging.info('Extending hit for %s in %s', gene['QueryName'], genome_path)
seq = gene['SubjAln'].replace('-', '')

difference = gene['QueryLength'] - len(seq)

if difference is 0:
logging.info('Difference is 0. Skipping')
# handle a complete hit
# return early
return seq, gene['MarkerMatch']

if difference > threshold:
logging.info('Difference (%s) is greater than %s. Skipping',
difference, threshold)
# handle large discrepancy
# return early
return None, None


# Open subject FASTA
sequences_names = get_known_alleles(genome_path)
names_sequences = {str(value.split()[0]): key
Expand All @@ -103,8 +109,16 @@ def extend_hit(gene, threshold: int, genome_path: Path):

else:

full_sequence = reverse_complement(contig[start : end])
try:
full_sequence = reverse_complement(contig[start : end])

except KeyError:
msg = 'Cannot assign allele due to ambiguous subject sequence'
logging.info(msg)
return None, None

logging.info('Extended hit to length %s, expected %s',
len(full_sequence) - 1, gene['QueryLength'])
return full_sequence, None


Expand All @@ -115,7 +129,6 @@ def reverse_complement(sequence: str):
'T': 'A',
'G': 'C',
'C': 'G',
'N': 'N'
}

return ''.join(complements[x] for x in reversed(sequence))
Expand Down

0 comments on commit 56538ff

Please sign in to comment.