Skip to content

Commit

Permalink
Use unstitched_cascade.csv for proviral pipeline
Browse files Browse the repository at this point in the history
  • Loading branch information
Donaim committed Oct 31, 2024
1 parent 873613d commit e3efdea
Show file tree
Hide file tree
Showing 7 changed files with 19 additions and 5 deletions.
2 changes: 1 addition & 1 deletion Singularity
Original file line number Diff line number Diff line change
Expand Up @@ -157,7 +157,7 @@ From: python:3.8
conseq_all_csv concordance_csv concordance_seed_csv failed_align_csv \
coverage_scores_csv coverage_maps_tar aligned_csv g2p_aligned_csv \
genome_coverage_csv genome_coverage_svg genome_concordance_svg \
unstitched_conseq_csv unstitched_contigs_csv contigs_csv \
unstitched_cascade_csv unstitched_conseq_csv unstitched_contigs_csv contigs_csv \
read_entropy_csv conseq_region_csv conseq_stitched_csv
KIVE_THREADS 2
KIVE_MEMORY 6000
Expand Down
7 changes: 7 additions & 0 deletions docs/steps.md
Original file line number Diff line number Diff line change
Expand Up @@ -353,6 +353,13 @@ Individual files are described after the list of steps.
* unstitched_conseq.csv
* region - the region mapped to
* sequence - the consensus sequence used
* unstitched_cascade.csv - number of read pairs that flow through the pipeline steps
* demultiplexed - count from the raw FASTQ
* v3loop - aligned with V3LOOP
* g2p - valid reads to count in G2P
* prelim_map - mapped to other references on first pass
* remap - mapped to other references after remapping
* aligned - aligned with a reference and merged with mate
* resistance.csv
* region - the region code, like PR or RT
* drug_class - the drug class code from the HIVdb rules, like NRTI
Expand Down
6 changes: 4 additions & 2 deletions micall/monitor/kive_watcher.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,7 @@
'resistance_consensus_csv',
'wg_fasta',
'mid_fasta',
'unstitched_cascade_csv',
'unstitched_conseq_csv',
'unstitched_contigs_csv',
'contigs_csv',
Expand Down Expand Up @@ -939,12 +940,13 @@ def run_proviral_pipeline(self, sample_watcher, folder_watcher, description):
run_dataset['argument_name']: run_dataset['dataset']
for run_dataset in main_run['datasets']
if run_dataset['argument_name'] in ('sample_info_csv',
'unstitched_cascade_csv',
'unstitched_conseq_csv',
'unstitched_contigs_csv',
'cascade_csv')}
'unstitched_contigs_csv')}
input_datasets = {
argument_name: self.kive_retry(lambda: self.session.get(url).json())
for argument_name, url in input_dataset_urls.items()}
input_datasets['cascade_csv'] = input_datasets.pop('unstitched_cascade_csv')
input_datasets['conseqs_csv'] = input_datasets.pop('unstitched_conseq_csv')
input_datasets['contigs_csv'] = input_datasets.pop('unstitched_contigs_csv')
run = self.find_or_launch_run(
Expand Down
2 changes: 1 addition & 1 deletion micall/tests/test_kive_watcher.py
Original file line number Diff line number Diff line change
Expand Up @@ -1765,7 +1765,7 @@ def test_launch_proviral_run(raw_data_with_two_samples, mock_open_kive):
argument_name='unstitched_conseq_csv'),
dict(dataset='/datasets/113/',
argument_type='O',
argument_name='cascade_csv')]] # run datasets
argument_name='unstitched_cascade_csv')]] # run datasets
mock_session.get.return_value.json.side_effect = [
dict(url='/datasets/110/', id=110),
dict(url='/datasets/111/', id=111),
Expand Down
3 changes: 2 additions & 1 deletion micall_docker.py
Original file line number Diff line number Diff line change
Expand Up @@ -1099,7 +1099,8 @@ def collate_samples(run_info: RunInfo):
'concordance.csv',
'concordance_seed.csv']
if run_info.is_denovo:
filenames += ['conseq_stitched.csv', 'conseq_region.csv', 'unstitched_conseq.csv']
filenames += ['conseq_stitched.csv', 'conseq_region.csv',
'unstitched_cascade.csv', 'unstitched_conseq.csv', 'unstitched_contigs.csv']
for filename in filenames:
out_path = run_info.output_path
with open(os.path.join(out_path, filename), 'w') as fout:
Expand Down
3 changes: 3 additions & 0 deletions micall_kive.py
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,9 @@ def parse_args():
action='store_true',
help='Use de novo assembly instead of mapping to '
'reference sequences.')
parser.add_argument('unstitched_cascade_csv',
nargs='?',
help='count of reads at each step')
parser.add_argument('unstitched_conseq_csv',
nargs='?',
help='CSV containing mapping unstitched consensus sequences')
Expand Down
1 change: 1 addition & 0 deletions release_test_microtest.py
Original file line number Diff line number Diff line change
Expand Up @@ -465,6 +465,7 @@ def process_sample(self, fastq_file: Path):
'genome_coverage.csv',
'genome_coverage.svg',
'genome_concordance.svg',
'unstitched_cascade.csv',
'unstitched_conseq.csv',
'unstitched_contigs.csv',
'contigs.csv',
Expand Down

0 comments on commit e3efdea

Please sign in to comment.