Skip to content

Commit

Permalink
Add HIVIntact analysis, for #10.
Browse files Browse the repository at this point in the history
  • Loading branch information
donkirkby authored and Donaim committed Jul 21, 2023
1 parent 7168745 commit f9f84f7
Show file tree
Hide file tree
Showing 5 changed files with 78 additions and 11 deletions.
19 changes: 19 additions & 0 deletions Singularity
Original file line number Diff line number Diff line change
Expand Up @@ -28,13 +28,18 @@ From: ubuntu:22.04
fontconfig libbz2-dev liblzma-dev libssl-dev \
libffi-dev libsqlite3-dev

echo ===== Installing MAFFT ===== >/dev/null
apt-get install -y mafft

echo ===== Installing Python ===== >/dev/null
apt-get install -y python3 python3-pip

echo ===== Installing blast ===== >/dev/null
apt-get install -y ncbi-blast+

echo ===== Installing Python packages ===== >/dev/null
pip3 install git+https://github.com/cfe-lab/[email protected]

cd /opt/primer_finder
pip3 install .

Expand All @@ -55,3 +60,17 @@ From: ubuntu:22.04

%runscript
gene_splicer_sample --hivseqinr /opt/hivseqinr "$@"

%apprun hivintact
gene_splicer_sample --hivintact "$@"

%apphelp hivintact
Search proviral consensus sequences for primers, then use HIVIntact to
decide if the genomes are complete.

%applabels hivintact
KIVE_INPUTS sample_info_csv contigs_csv conseqs_csv cascade_csv
KIVE_OUTPUTS outcome_summary_csv conseqs_primers_csv contigs_primers_csv \
table_precursor_csv hivseqinr_results_tar
KIVE_THREADS 1
KIVE_MEMORY 200
37 changes: 33 additions & 4 deletions gene_splicer/primer_finder.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import re
import subprocess
from argparse import ArgumentParser, ArgumentDefaultsHelpFormatter, FileType
from csv import DictReader, DictWriter
from itertools import groupby
Expand Down Expand Up @@ -77,6 +78,9 @@ def parse_args():
help="Path to HIVSeqinR source code, or download "
"destination. HIVSeqinR will be skipped if this "
"isn't given.")
parser.add_argument('--hivintact',
action='store_true',
help="Launch the HIVIntact analysis.")
parser.add_argument(
'--nodups',
action='store_false',
Expand Down Expand Up @@ -386,8 +390,8 @@ def add_primers(row):
def remove_primers(row):
# Strip the primers out, convert index values from floats.
newseq = row.sequence[
int(row.fwd_sample_primer_size + row.fwd_sample_primer_start)
:-int(row.rev_sample_primer_size + row.rev_sample_primer_start)]
int(row.fwd_sample_primer_size + row.fwd_sample_primer_start):
-int(row.rev_sample_primer_size + row.rev_sample_primer_start)]
row.sequence = newseq
return row

Expand Down Expand Up @@ -422,6 +426,13 @@ def archive_hivseqinr_results(working_path: Path,
archive.add(result_path, result_path.name)


def archive_hivintact_results(working_path: Path,
hivintact_results_tar: typing.IO):
archive = TarFile(fileobj=hivintact_results_tar, mode='w')
for result_path in working_path.iterdir():
archive.add(result_path, result_path.name)


def run(contigs_csv,
conseqs_csv,
cascade_csv,
Expand All @@ -433,7 +444,9 @@ def run(contigs_csv,
sample_size=50,
force_all_proviral=False,
default_sample_name: str = None,
hivseqinr_results_tar: typing.IO = None):
hivseqinr_results_tar: typing.IO = None,
run_hivintact: bool = False,
hivintact_results_tar: typing.IO = None):
all_samples = utils.get_samples_from_cascade(cascade_csv,
default_sample_name)

Expand Down Expand Up @@ -517,6 +530,21 @@ def run(contigs_csv,
if hivseqinr_results_tar is not None:
archive_hivseqinr_results(working_path,
hivseqinr_results_tar)
if run_hivintact:
working_path: Path = outpath / f'hivintact_{i}'
working_path.mkdir(exist_ok=True)
with (working_path / 'hiv-intact.log').open('w') as log_file:
subprocess.run(['proviral',
'intact',
'--subtype=B',
str(no_primers_fasta)],
check=True,
stdout=log_file,
stderr=subprocess.STDOUT,
cwd=working_path)
if hivintact_results_tar is not None:
archive_hivintact_results(working_path,
hivintact_results_tar)
files.append(no_primers_fasta)
return files

Expand All @@ -531,7 +559,8 @@ def main():
hivseqinr=args.hivseqinr,
nodups=args.nodups,
split=args.split,
sample_size=args.sample_size)
sample_size=args.sample_size,
run_hivintact=args.hivintact)
return {'fasta_files': fasta_files, 'args': args}


Expand Down
2 changes: 1 addition & 1 deletion gene_splicer/primer_finder_class.py
Original file line number Diff line number Diff line change
Expand Up @@ -144,7 +144,7 @@ def get_slices(self):
hxb2_slice = utils.hxb2[self.hxb2_start -
self.validation_size:self.hxb2_end]
if len(sample_slice) == 0:
logger.debug(\
logger.debug(
'Sample slice size is 0! \n'
f'start: {self.start} \n'
f'end: {self.end} \n'
Expand Down
18 changes: 15 additions & 3 deletions gene_splicer/sample.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,8 @@ def parse_args():
help='Data for proviral landscape plot',
type=FileType('w'))
parser.add_argument('hivseqinr_results_tar',
help="Archive file with HIVSeqinR's final results folder.",
help="Archive file with HIVSeqinR's final results "
"folder, or HIVIntact's results.",
type=FileType('wb'))
parser.add_argument(
'-p',
Expand All @@ -57,6 +58,9 @@ def parse_args():
help="Path to HIVSeqinR source code, or download "
"destination. HIVSeqinR will be skipped if this "
"isn't given.")
parser.add_argument('--hivintact',
action='store_true',
help="Launch the HIVIntact analysis.")
parser.add_argument(
'--nodups',
action='store_false',
Expand Down Expand Up @@ -88,18 +92,26 @@ def main():
info_reader = DictReader(args.sample_info_csv)
sample_info: dict = next(info_reader)
run_name = sample_info.get('run_name', 'kive_run')
if args.hivintact:
hivseqinr_results_tar = None
hivintact_results_tar = args.hivseqinr_results_tar
else:
hivseqinr_results_tar = args.hivseqinr_results_tar
hivintact_results_tar = None
fasta_files = primer_finder.run(contigs_csv=args.contigs_csv,
conseqs_csv=args.conseqs_csv,
cascade_csv=args.cascade_csv,
hivseqinr_results_tar=args.hivseqinr_results_tar,
hivseqinr_results_tar=hivseqinr_results_tar,
name=run_name,
outpath=outpath,
hivseqinr=args.hivseqinr,
nodups=args.nodups,
split=args.split,
sample_size=args.sample_size,
force_all_proviral=True,
default_sample_name=sample_info['sample'])
default_sample_name=sample_info['sample'],
run_hivintact=args.hivintact,
hivintact_results_tar=hivintact_results_tar)
for file in fasta_files:
gene_splicer.run(file, outdir=outpath)
utils.generate_table_precursor(name=run_name, outpath=outpath)
Expand Down
13 changes: 10 additions & 3 deletions gene_splicer/study_summary.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,10 @@ def parse_args():
'the runs in samples_csv. Any samples not found '
'in samples.csv will guess the participant id '
'from the first part of the sample name.')
parser.add_argument('--hivintact',
action='store_true',
help="Launch the HIVIntact analysis instead of "
"HIVSeqinR.")
return parser.parse_args()


Expand Down Expand Up @@ -197,7 +201,7 @@ def write_warnings(self, report_file: typing.TextIO, limit: int = None):
file=report_file)


def run_gene_splicer(run_path: Path, outcome_folder: Path):
def run_gene_splicer(run_path: Path, outcome_folder: Path, run_hivintact: bool):
version_results_path = run_path / 'Results' / 'version_7.14'
assert version_results_path.exists(), version_results_path
denovo_path = version_results_path / 'denovo'
Expand All @@ -214,11 +218,14 @@ def run_gene_splicer(run_path: Path, outcome_folder: Path):
pipeline_args = [python_path,
'-m', 'gene_splicer.pipeline',
'--outpath', str(outcome_folder),
'--hivseqinr', str(hivseqinr_path),
contigs_path,
conseq_path,
cascade_path,
short_run_name]
if run_hivintact:
pipeline_args.append('--hivintact')
else:
pipeline_args.append(f'--hivseqinr={hivseqinr_path}')
try:
with log_path.open('w') as log_file:
run(pipeline_args,
Expand Down Expand Up @@ -259,7 +266,7 @@ def main():
print('Missing denovo results:', run_path)
continue
else:
run_gene_splicer(run_path, outcome_path.parent)
run_gene_splicer(run_path, outcome_path.parent, args.hivintact)
assert outcome_path.exists(), outcome_path
print('.', end='', flush=True)
dots_printed = True
Expand Down

0 comments on commit f9f84f7

Please sign in to comment.