From abf0b4dcbb2230d957efd9c0a861851ec3cd295a Mon Sep 17 00:00:00 2001 From: nate Date: Mon, 13 Jan 2025 11:44:57 -0500 Subject: [PATCH] Incorporated threads, and parsed file name --- tests/test_command.py | 4 ++-- tests/test_freyja_tasks.py | 11 ++++++----- vibecheck/src/freyja_tasks.py | 20 +++++++++++++------- 3 files changed, 21 insertions(+), 14 deletions(-) diff --git a/tests/test_command.py b/tests/test_command.py index b1ff8f3..f6607e9 100644 --- a/tests/test_command.py +++ b/tests/test_command.py @@ -129,13 +129,13 @@ def inputFreyha(): if outdir.exists(): shutil.rmtree(outdir) -@pytest.mark.skip(reason="Input files are too large") +#@pytest.mark.skip(reason="Input files are too large") def test_Freyja_pipeline(inputFreyha): results = inputFreyha / "lineage_report.csv" expected_result = ( "sequence_id,lineage,confidence,freyja_notes\n" - "foo,T13,1.000,Freyja results: T13(100.0%)\n" + "OUG-1858,T13,1.000,Freyja results: T13(100.0%)\n" ) assert results.exists() diff --git a/tests/test_freyja_tasks.py b/tests/test_freyja_tasks.py index 29c977c..f6bf6ba 100644 --- a/tests/test_freyja_tasks.py +++ b/tests/test_freyja_tasks.py @@ -83,11 +83,12 @@ def test_align_reads(mock_run_command, mock_paths, mock_file_exists): mock_paths["read2"], mock_paths["reference"], mock_paths["tempdir"], + 1 ) expected_calls = [ call( - "minimap2 -ax sr /test/reference.fasta /test/read1.fastq.gz /test/read2.fastq.gz | samtools view -b - | samtools sort -o /test/temp/alignment.bam -", + "minimap2 -ax sr -t 1 /test/reference.fasta /test/read1.fastq.gz /test/read2.fastq.gz | samtools view -b - | samtools sort -o /test/temp/alignment.bam -", error_message="Alignment of raw reads failed", ), call( @@ -163,14 +164,14 @@ def freyja_resultsA(tmp_path): ) input_file.write_text(input_text) - parse_freyja_results(input_file, output_file) + parse_freyja_results(input_file, "testA", output_file) return output_file def test_parse_freyja_results_A(freyja_resultsA): expected_result = ( "sequence_id,lineage,confidence,freyja_notes\n" - "foo,MEASLES-D9,0.604,Freyja results: MEASLES-D9(79.7%) MEASLES-H1(20.3%)\n" + "testA,MEASLES-D9,0.604,Freyja results: MEASLES-D9(79.7%) MEASLES-H1(20.3%)\n" ) assert freyja_resultsA.read_text() == expected_result @@ -201,13 +202,13 @@ def freyja_resultsB(tmp_path): ) input_file.write_text(input_text) - parse_freyja_results(input_file, output_file) + parse_freyja_results(input_file, "testB", output_file) return output_file def test_parse_freyja_results_B(freyja_resultsB): expected_result = ( "sequence_id,lineage,confidence,freyja_notes\n" - "foo,T13,1.000,Freyja results: T13(100.0%)\n" + "testB,T13,1.000,Freyja results: T13(100.0%)\n" ) assert freyja_resultsB.read_text() == expected_result diff --git a/vibecheck/src/freyja_tasks.py b/vibecheck/src/freyja_tasks.py index 58f62e5..806be7a 100644 --- a/vibecheck/src/freyja_tasks.py +++ b/vibecheck/src/freyja_tasks.py @@ -19,6 +19,8 @@ def run_pipeline( threads: int, ) -> None: + name = str( reads[0].name ).split( "." )[0] + if no_subsample: console.log("Aligning reads to reference") alignment = align_reads(*reads, reference, tempdir) @@ -27,19 +29,19 @@ def run_pipeline( sub_read1, sub_read2 = downsample_reads(*reads, tempdir, subsample_fraction) console.log("Aligning reads to reference") - alignment = align_reads(sub_read1, sub_read2, reference, tempdir) + alignment = align_reads(sub_read1, sub_read2, reference, tempdir, threads) console.log("Calculating depth of coverage across reference genome.") depth = generate_depth(alignment, reference, tempdir) console.log("Calling variants between reads and reference.") - variants_filled = call_variants(alignment, reference, tempdir) + variants_filled = call_variants(alignment, reference, tempdir, threads) console.log("Calculating relative lineage abundances using Freyja.") freyja_results = freyja_demix(variants_filled, depth, barcodes, tempdir) console.log("Parsing Freyja results.") - parse_freyja_results(freyja_results, outfile) + parse_freyja_results(freyja_results, name, outfile) def downsample_reads( @@ -86,7 +88,7 @@ def downsample_reads( return sub_read1, sub_read2 -def align_reads(read1: Path, read2: Path, reference: Path, tempdir: Path) -> Path: +def align_reads(read1: Path, read2: Path, reference: Path, tempdir: Path, threads: int) -> Path: """Aligns reads to a reference using `minimap2` and processes with `samtools`. Parameters @@ -99,6 +101,8 @@ def align_reads(read1: Path, read2: Path, reference: Path, tempdir: Path) -> Pat Location of FASTA file containing the reference sequence. tempdir : Path Location of temporary directory. + threads : int + number of threads to use during alignment Returns ------- @@ -107,7 +111,7 @@ def align_reads(read1: Path, read2: Path, reference: Path, tempdir: Path) -> Pat """ alignment = tempdir / "alignment.bam" - minimap_command = f"minimap2 -ax sr {reference} {read1} {read2} | samtools view -b - | samtools sort -o {alignment} -" + minimap_command = f"minimap2 -ax sr -t {threads} {reference} {read1} {read2} | samtools view -b - | samtools sort -o {alignment} -" index_command = f"samtools index {alignment}" run_command(minimap_command, error_message="Alignment of raw reads failed") @@ -221,7 +225,7 @@ def freyja_demix( return freyja_output -def parse_freyja_results(freyja_results: Path, outfile: Path) -> None: +def parse_freyja_results(freyja_results: Path, name: str, outfile: Path) -> None: """Parses Freyja lineage abundance results from a text file. Exits if required fields are missing. @@ -229,6 +233,8 @@ def parse_freyja_results(freyja_results: Path, outfile: Path) -> None: ---------- freyja_results : Path Location of text file containing Freyja results + name : str + Name of sample. outfile : Path Location to save CSV file containing parsed Freyja results. """ @@ -269,4 +275,4 @@ def parse_freyja_results(freyja_results: Path, outfile: Path) -> None: with open(outfile, "wt") as out_file: out_file.write("sequence_id,lineage,confidence,freyja_notes\n") - out_file.write(f"foo,{top_lineage},{confidence:.3f},{summary}\n") + out_file.write(f"{name},{top_lineage},{confidence:.3f},{summary}\n")