diff --git a/captus/align.py b/captus/align.py index 1dc373c..9c80c1c 100644 --- a/captus/align.py +++ b/captus/align.py @@ -1,6 +1,6 @@ #!/usr/bin/env python3 """ -Copyright 2020-2024 Edgardo M. Ortiz (e.ortiz.v@gmail.com) +Copyright 2020-2025 Edgardo M. Ortiz (e.ortiz.v@gmail.com) https://github.com/edgardomortiz/Captus This file is part of Captus. Captus is free software: you can redistribute it and/or modify @@ -36,7 +36,7 @@ def align(full_command, args): captus_start = time.time() out_dir, out_dir_msg = make_output_dir(args.out) - log.logger = log.Log(Path(args.out, "captus-assembly_align.log"), stdout_verbosity_level=1) + log.logger = log.Log(Path(args.out, "captus-align.log"), stdout_verbosity_level=1) mar = 26 # Margin for aligning parameters and values @@ -1538,7 +1538,7 @@ def write_paralog_stats(out_dir, tsv_comment, shared_paralog_stats): if not shared_paralog_stats: return red("No paralogs were found...") else: - stats_tsv_file = Path(out_dir, "captus-assembly_align.paralogs.tsv") + stats_tsv_file = Path(out_dir, "captus-align_paralogs.tsv") with open(stats_tsv_file, "wt") as tsv_out: tsv_out.write(tsv_comment) tsv_out.write("\t".join(["marker_type", @@ -1783,7 +1783,7 @@ def write_aln_stats(out_dir, tsv_comment, shared_aln_stats): if not shared_aln_stats: return None else: - stats_tsv_file = Path(out_dir, "captus-assembly_align.alignments.tsv") + stats_tsv_file = Path(out_dir, "captus-align_alignments.tsv") with open(stats_tsv_file, "wt") as tsv_out: tsv_out.write(tsv_comment) tsv_out.write("\t".join(["path", @@ -1817,7 +1817,7 @@ def write_sam_stats(out_dir, tsv_comment, shared_sam_stats): if not shared_sam_stats: return None else: - stats_tsv_file = Path(out_dir, "captus-assembly_align.samples.tsv") + stats_tsv_file = Path(out_dir, "captus-align_samples.tsv") with open(stats_tsv_file, "wt") as tsv_out: tsv_out.write(tsv_comment) tsv_out.write("\t".join(["sample", diff --git a/captus/assemble.py b/captus/assemble.py index 0169f90..0c7d46b 100644 --- a/captus/assemble.py +++ b/captus/assemble.py @@ -1,6 +1,6 @@ #!/usr/bin/env python3 """ -Copyright 2020-2024 Edgardo M. Ortiz (e.ortiz.v@gmail.com) +Copyright 2020-2025 Edgardo M. Ortiz (e.ortiz.v@gmail.com) https://github.com/edgardomortiz/Captus This file is part of Captus. Captus is free software: you can redistribute it and/or modify @@ -33,7 +33,7 @@ def assemble(full_command, args): captus_start = time.time() out_dir, out_dir_msg = make_output_dir(args.out) - log.logger = log.Log(Path(args.out, "captus-assembly_assemble.log"), stdout_verbosity_level=1) + log.logger = log.Log(Path(args.out, "captus-assemble.log"), stdout_verbosity_level=1) mar = 21 # Margin for aligning parameters and values @@ -1021,7 +1021,7 @@ def calc_asm_stats( msg = ( - f"'{sample_name}' {stage.upper()}: {num_contigs:,} contigs, total {tot_length:,} bp," + f"'{sample_name}': {stage.upper()} {num_contigs:,} contigs, total {tot_length:,} bp," f" min {min_length:,} bp, max {max_length:,} bp, avg {avg_length:,} bp, N50 {n50:,} bp" ) return msg @@ -1096,9 +1096,9 @@ def collect_asm_stats(out_dir, tsv_comment): depth_tsv_files = sorted(list(Path(out_dir).resolve().rglob("depth_stats.tsv"))) length_tsv_files = sorted(list(Path(out_dir).resolve().rglob("length_stats.tsv"))) - assembly_stats_tsv = Path(out_dir, "captus-assembly_assemble.assembly_stats.tsv") - depth_stats_tsv = Path(out_dir, "captus-assembly_assemble.depth_stats.tsv") - length_stats_tsv = Path(out_dir, "captus-assembly_assemble.length_stats.tsv") + assembly_stats_tsv = Path(out_dir, "captus-assemble_assembly_stats.tsv") + depth_stats_tsv = Path(out_dir, "captus-assemble_depth_stats.tsv") + length_stats_tsv = Path(out_dir, "captus-assemble_length_stats.tsv") if not assembly_tsv_files or not depth_tsv_files or not length_tsv_files: return None, None, None diff --git a/captus/bait.py b/captus/bait.py index dd74b28..e2bee80 100644 --- a/captus/bait.py +++ b/captus/bait.py @@ -1,6 +1,6 @@ #!/usr/bin/env python3 """ -Copyright 2020-2024 Edgardo M. Ortiz (e.ortiz.v@gmail.com) +Copyright 2020-2025 Edgardo M. Ortiz (e.ortiz.v@gmail.com) https://github.com/edgardomortiz/Captus This file is part of Captus. Captus is free software: you can redistribute it and/or modify @@ -36,7 +36,7 @@ def bait(full_command, args): captus_start = time.time() out_dir, out_dir_msg = make_output_dir(args.out) - log.logger = log.Log(Path(out_dir, "captus-design_bait.log"), stdout_verbosity_level=1) + log.logger = log.Log(Path(out_dir, "captus-bait.log"), stdout_verbosity_level=1) mar = 28 # Margin for aligning parameters and values diff --git a/captus/bioformats.py b/captus/bioformats.py index 4876107..525e251 100644 --- a/captus/bioformats.py +++ b/captus/bioformats.py @@ -1,6 +1,6 @@ #!/usr/bin/env python3 """ -Copyright 2020-2024 Edgardo M. Ortiz (e.ortiz.v@gmail.com) +Copyright 2020-2025 Edgardo M. Ortiz (e.ortiz.v@gmail.com) https://github.com/edgardomortiz/Captus This file is part of Captus. Captus is free software: you can redistribute it and/or modify diff --git a/captus/captus_assembly.py b/captus/captus_assembly.py index 4626f9c..b6d4add 100644 --- a/captus/captus_assembly.py +++ b/captus/captus_assembly.py @@ -1,6 +1,6 @@ #!/usr/bin/env python3 """ -Copyright 2020-2024 Edgardo M. Ortiz (e.ortiz.v@gmail.com) +Copyright 2020-2025 Edgardo M. Ortiz (e.ortiz.v@gmail.com) https://github.com/edgardomortiz/Captus This is the control program for the assembly pipeline of Captus. diff --git a/captus/captus_design.py b/captus/captus_design.py index 1baa7e2..f7e13d1 100644 --- a/captus/captus_design.py +++ b/captus/captus_design.py @@ -1,6 +1,6 @@ #!/usr/bin/env python3 """ -Copyright 2020-2024 Edgardo M. Ortiz (e.ortiz.v@gmail.com) +Copyright 2020-2025 Edgardo M. Ortiz (e.ortiz.v@gmail.com) https://github.com/edgardomortiz/Captus This is the control program for the bait design pipeline of Captus. diff --git a/captus/clean.py b/captus/clean.py index 48a8aba..5fcab33 100644 --- a/captus/clean.py +++ b/captus/clean.py @@ -1,6 +1,6 @@ #!/usr/bin/env python3 """ -Copyright 2020-2024 Edgardo M. Ortiz (e.ortiz.v@gmail.com) +Copyright 2020-2025 Edgardo M. Ortiz (e.ortiz.v@gmail.com) https://github.com/edgardomortiz/Captus This file is part of Captus. Captus is free software: you can redistribute it and/or modify @@ -32,7 +32,7 @@ def clean(full_command, args): captus_start = time.time() out_dir, out_dir_msg = make_output_dir(args.out) - log.logger = log.Log(Path(out_dir, "captus-assembly_clean.log"), stdout_verbosity_level=1) + log.logger = log.Log(Path(out_dir, "captus-clean.log"), stdout_verbosity_level=1) mar = 21 # Margin for aligning parameters and values diff --git a/captus/cluster.py b/captus/cluster.py index ef8ac49..b428229 100644 --- a/captus/cluster.py +++ b/captus/cluster.py @@ -1,6 +1,6 @@ #!/usr/bin/env python3 """ -Copyright 2020-2024 Edgardo M. Ortiz (e.ortiz.v@gmail.com) +Copyright 2020-2025 Edgardo M. Ortiz (e.ortiz.v@gmail.com) https://github.com/edgardomortiz/Captus This file is part of Captus. Captus is free software: you can redistribute it and/or modify @@ -35,7 +35,7 @@ def cluster(full_command, args): captus_start = time.time() out_dir, out_dir_msg = make_output_dir(args.out) - log.logger = log.Log(Path(out_dir, "captus-design_cluster.log"), stdout_verbosity_level=1) + log.logger = log.Log(Path(out_dir, "captus-cluster.log"), stdout_verbosity_level=1) mar = 23 # Margin for aligning parameters and values @@ -1274,7 +1274,7 @@ def min_copies(aln_trimmed: dict, aln_width: int): def write_aln_stats(out_dir: Path, shared_aln_stats: list): - stats_tsv_file = Path(out_dir, "captus-design_cluster.alignments.tsv") + stats_tsv_file = Path(out_dir, "captus-cluster_alignments.tsv") if not shared_aln_stats: if stats_tsv_file.exists() and not file_is_empty(stats_tsv_file): return stats_tsv_file diff --git a/captus/extract.py b/captus/extract.py index 406a4dc..e8d0bc7 100644 --- a/captus/extract.py +++ b/captus/extract.py @@ -1,6 +1,6 @@ #!/usr/bin/env python3 """ -Copyright 2020-2024 Edgardo M. Ortiz (e.ortiz.v@gmail.com) +Copyright 2020-2025 Edgardo M. Ortiz (e.ortiz.v@gmail.com) https://github.com/edgardomortiz/Captus This file is part of Captus. Captus is free software: you can redistribute it and/or modify @@ -43,7 +43,7 @@ def extract(full_command, args): captus_start = time.time() out_dir, out_dir_msg = make_output_dir(args.out) - log.logger = log.Log(Path(args.out, "captus-assembly_extract.log"), stdout_verbosity_level=1) + log.logger = log.Log(Path(args.out, "captus-extract.log"), stdout_verbosity_level=1) mar = 25 # Margin for aligning parameters and values ################################################################################################ @@ -1997,7 +1997,7 @@ def blat_misc_dna( else: if not keep_all: Path(blat_dna_out_file).unlink() - write_gff3(dna_hits, marker_type, disable_stitching, dna_gff_file) + write_gff3(dna_hits, marker_type, disable_stitching, tsv_comment, dna_gff_file) recovery_stats = write_fastas_and_report(dna_hits, sample_name, dna_target, blat_dna_out_dir, marker_type, max_loci_files, tsv_comment, overwrite) @@ -2470,7 +2470,7 @@ def collect_ext_stats(out_dir, tsv_comment): if not samples_stats: return None else: - stats_file_out = Path(out_dir, "captus-assembly_extract.stats.tsv") + stats_file_out = Path(out_dir, "captus-extract_stats.tsv") header = "\t".join(settings.EXT_STATS_HEADER) + "\n" with open(stats_file_out, "wt") as tsv_out: tsv_out.write(tsv_comment) diff --git a/captus/log.py b/captus/log.py index 12d7ed4..ab67a92 100644 --- a/captus/log.py +++ b/captus/log.py @@ -1,6 +1,6 @@ #!/usr/bin/env python3 """ -Copyright 2020-2024 Edgardo M. Ortiz (e.ortiz.v@gmail.com) +Copyright 2020-2025 Edgardo M. Ortiz (e.ortiz.v@gmail.com) https://github.com/edgardomortiz/Captus This module contains Unicycler's class for writing output to both stdout and a log file. diff --git a/captus/misc.py b/captus/misc.py index 85f2a5b..cee9978 100644 --- a/captus/misc.py +++ b/captus/misc.py @@ -1,6 +1,6 @@ #!/usr/bin/env python3 """ -Copyright 2020-2024 Edgardo M. Ortiz (e.ortiz.v@gmail.com) +Copyright 2020-2025 Edgardo M. Ortiz (e.ortiz.v@gmail.com) https://github.com/edgardomortiz/Captus This file is part of Captus. Captus is free software: you can redistribute it and/or modify diff --git a/captus/report.py b/captus/report.py index 612f37b..1202921 100644 --- a/captus/report.py +++ b/captus/report.py @@ -1516,7 +1516,7 @@ def build_qc_report(out_dir, qc_extras_dir):
Version: {version}{command}
""" - qc_html_report = Path(out_dir, "captus-assembly_clean.report.html") + qc_html_report = Path(out_dir, "captus-clean_report.html") with open(qc_html_report, "w") as f: f.write(html_header) for fig in figs: @@ -2312,7 +2312,7 @@ def build_assembly_report(out_dir, asm_stats_tsv, len_stats_tsv, dep_stats_tsv):
Version: {version}{command}
""" - asm_html_report = Path(out_dir, "captus-assembly_assemble.report.html") + asm_html_report = Path(out_dir, "captus-assemble_report.html") with open(asm_html_report, "w") as f: f.write(html_header) for fig in figs: @@ -2864,7 +2864,7 @@ def build_extraction_report(out_dir, ext_stats_tsv):
Version: {version}{command}
""" - ext_html_report = Path(out_dir, "captus-assembly_extract.report.html") + ext_html_report = Path(out_dir, "captus-extract_report.html") with open(ext_html_report, "w") as f: f.write(html_header) for fig in figs: @@ -3750,7 +3750,7 @@ def build_alignment_report(out_dir, aln_stats_tsv, sam_stats_tsv): """ # Save plot in html - aln_html_report = Path(out_dir, "captus-assembly_align.report.html") + aln_html_report = Path(out_dir, "captus-align_report.html") with open(aln_html_report, "w") as f: f.write(html_header) for i, fig in enumerate(figs): @@ -4066,10 +4066,10 @@ def build_design_report(out_dir, des_stats_tsv, step): ) if step == "cluster": report_title = "Captus-design: Cluster (Alignment Report)" - des_html_report = Path(out_dir, "captus-design_cluster.report.html") + des_html_report = Path(out_dir, "captus-cluster_report.html") elif step == "select": report_title = "Captus-design: Select (Alignment Report)" - des_html_report = Path(out_dir, "captus-design_select.report.html") + des_html_report = Path(out_dir, "captus-select_report.html") with open(des_stats_tsv, "r") as f: version = f.readline().lstrip("#") command = f.readline().lstrip("#") diff --git a/captus/select.py b/captus/select.py index b423ec9..923b9fd 100644 --- a/captus/select.py +++ b/captus/select.py @@ -1,6 +1,6 @@ #!/usr/bin/env python3 """ -Copyright 2020-2024 Edgardo M. Ortiz (e.ortiz.v@gmail.com) +Copyright 2020-2025 Edgardo M. Ortiz (e.ortiz.v@gmail.com) https://github.com/edgardomortiz/Captus This file is part of Captus. Captus is free software: you can redistribute it and/or modify @@ -28,7 +28,7 @@ def select(full_command, args): captus_start = time.time() out_dir, out_dir_msg = make_output_dir(args.out) - log.logger = log.Log(Path(out_dir, "captus-design_select.log"), stdout_verbosity_level=1) + log.logger = log.Log(Path(out_dir, "captus-select.log"), stdout_verbosity_level=1) mar = 33 # Margin for aligning parameters and values @@ -145,7 +145,7 @@ def select(full_command, args): def load_aln_stats_tsv(clusters_dir: Path): start = time.time() - aln_stats_tsv_path = Path(clusters_dir, "captus-design_cluster.alignments.tsv") + aln_stats_tsv_path = Path(clusters_dir, "captus-cluster_alignments.tsv") if aln_stats_tsv_path.exists(): aln_stats = {} with open(aln_stats_tsv_path, "rt") as stats: @@ -457,7 +457,7 @@ def copy_loci(aln_stats: dict, out_dir: Path, overwrite: bool, show_more: bool): def write_aln_stats(out_dir: Path, aln_stats_filtered: dict): - stats_tsv_file = Path(out_dir, "captus-design_select.alignments.tsv") + stats_tsv_file = Path(out_dir, "captus-select_alignments.tsv") if not aln_stats_filtered: if stats_tsv_file.exists() and not file_is_empty(stats_tsv_file): return stats_tsv_file diff --git a/captus/settings.py b/captus/settings.py index 9f40546..26ab8d4 100644 --- a/captus/settings.py +++ b/captus/settings.py @@ -1,9 +1,9 @@ #!/usr/bin/env python3 """ -Copyright 2020-2024 Edgardo M. Ortiz (e.ortiz.v@gmail.com) +Copyright 2020-2025 Edgardo M. Ortiz (e.ortiz.v@gmail.com) https://github.com/edgardomortiz/Captus -This module contains hard-coded settings for Captus-assembly +This module contains hard-coded settings for Captus This file is part of Captus. Captus is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, @@ -336,7 +336,7 @@ TRANSLATED_REF_SUFFIX = ".captus.faa" # JSON with paths to references filename -JSON_REFS = "captus-assembly_extract.refs.json" +JSON_REFS = "captus-extract_refs.json" # Valid combinations of marker directories and format directories VALID_MARKER_FORMAT_COMBO = [(m, f) for m in ["NUC","PTD","MIT"] for f in ["AA","NT","GE","GF"]] @@ -598,7 +598,7 @@ # File name for sequence-to-sample equivalence table used by ASTRAL-Pro to analyze trees that # include paralogs -ASTRAL_PRO_EQ = "captus-assembly_align.astral-pro.tsv" +ASTRAL_PRO_EQ = "captus-align_astral-pro.tsv" # Import data for clustering file names DES_SUFFIXES = { diff --git a/captus/version.py b/captus/version.py index e7ff2c0..74b953d 100644 --- a/captus/version.py +++ b/captus/version.py @@ -1,6 +1,6 @@ #!/usr/bin/env python3 """ -Copyright 2020-2024 Edgardo M. Ortiz (e.ortiz.v@gmail.com) +Copyright 2020-2025 Edgardo M. Ortiz (e.ortiz.v@gmail.com) https://github.com/edgardomortiz/Captus Captus' version is stored here in a separate file so it can exist in only one place. diff --git a/captus_assembly-runner.py b/captus_assembly-runner.py index 7ed004e..16a2357 100755 --- a/captus_assembly-runner.py +++ b/captus_assembly-runner.py @@ -1,6 +1,6 @@ #!/usr/bin/env python3 """ -Copyright 2020-2024 Edgardo M. Ortiz (e.ortiz.v@gmail.com) +Copyright 2020-2025 Edgardo M. Ortiz (e.ortiz.v@gmail.com) https://github.com/edgardomortiz/Captus This file is a convenience wrapper for running Captus assemble directly from the source tree. By diff --git a/captus_design-runner.py b/captus_design-runner.py index d842284..1aa0f09 100755 --- a/captus_design-runner.py +++ b/captus_design-runner.py @@ -1,6 +1,6 @@ #!/usr/bin/env python3 """ -Copyright 2020-2024 Edgardo M. Ortiz (e.ortiz.v@gmail.com) +Copyright 2020-2025 Edgardo M. Ortiz (e.ortiz.v@gmail.com) https://github.com/edgardomortiz/Captus This file is a convenience wrapper for running Captus design directly from the source tree. By diff --git a/docs/content/assembly/align/output.md b/docs/content/assembly/align/output.md index ed2d07f..5445986 100644 --- a/docs/content/assembly/align/output.md +++ b/docs/content/assembly/align/output.md @@ -73,7 +73,7 @@ ___ ### 15. **`02_matches_flanked`** This directory contains the alignments of DNA sequence matches (`MF` in the figure above) including flanks and intervening segments not present in the references for the extracted markers gathered across samples. One FASTA file per marker, with extension `.fna`. ___ -### 16. **`captus-assembly_align.paralogs.tsv`** +### 16. **`captus-align_paralogs.tsv`** A tab-separated-values table recording which copy was selected during the `informed` filtering of paralogs. {{% expand "Information included in the table" %}} @@ -92,7 +92,7 @@ A tab-separated-values table recording which copy was selected during the `infor |**accepted**|Whether the copy is accepted (`TRUE`) or not (`FALSE`).| {{% /expand %}} ___ -### 17. **`captus-assembly_align.alignments.tsv`** +### 17. **`captus-align_alignments.tsv`** A tab-separated-values table recording alignment statistics for each of the alignments produced. {{% expand "Information included in the table" %}} @@ -123,7 +123,7 @@ A tab-separated-values table recording alignment statistics for each of the alig |**gc_codon_p3**|| {{% /expand %}} ___ -### 18. **`captus-assembly_align.samples.tsv`** +### 18. **`captus-align_samples.tsv`** A tab-separated-values table recording sample statistics across the different filtering and trimming stages, as well as marker types and formats. {{% expand "Information included in the table" %}} @@ -146,7 +146,7 @@ A tab-separated-values table recording sample statistics across the different fi |**num_copies**|Number of copies in the alignment.| {{% /expand %}} ___ -### 19. **`captus-assembly_align.astral-pro.tsv`** +### 19. **`captus-align_astral-pro.tsv`** ASTRAL-Pro requires a tab-separated-values file for mapping the names of the paralog sequence names (first column) to the name of the sample (second column). `Captus` produces this file automatically. {{% expand "Example" %}} @@ -179,12 +179,12 @@ GenusD_speciesD_CAP__05 GenusD_speciesD_CAP ``` {{% /expand %}} ___ -### 20. **`captus-assembly_align.report.html`** +### 20. **`captus-align_report.html`** This is the final [Aligment report]({{< ref "assembly/align/report">}}), summarizing alignment statistics across all processing stages, marker types, and formats. ___ -### 21. **`captus-assembly_align.log`** +### 21. **`captus-align.log`** This is the log from `Captus`, it contains the command used and all the information shown during the run. Even if the option `--show_more` was disabled, the log will contain all the extra detailed information that was hidden during the run. ___ Created by [Edgardo M. Ortiz]({{< ref "../../more/credits/#edgardo-m-ortiz">}}) (06.08.2021) -Last modified by [Edgardo M. Ortiz]({{< ref "../../more/credits/#edgardo-m-ortiz">}}) (18.12.2024) \ No newline at end of file +Last modified by [Edgardo M. Ortiz]({{< ref "../../more/credits/#edgardo-m-ortiz">}}) (23.12.2024) \ No newline at end of file diff --git a/docs/content/assembly/align/report.md b/docs/content/assembly/align/report.md index 16b8c94..ed9b535 100644 --- a/docs/content/assembly/align/report.md +++ b/docs/content/assembly/align/report.md @@ -12,12 +12,12 @@ This `align` module generates several sets of alignments that are ready-to-use i Each alignment set differs from one another in the following four respects: 1) whether they are trimmed, 2) which [paralog filter ]({{< relref "assembly/align/options#paralog-filtering" >}}) is applied, 3) whether they contain reference sequences, and 4) in which [formats ]({{< relref "assembly/align/options#-f---formats" >}}). Thus, **it is important to understand the differences between each alignment set and carefully evaluate their quality in order to decide which alignment set to use for subsequent analyses**. -Open the report `captus-assembly_align.report.html` with your browser (internet connection required) to explore and compare general alignment statistics for each locus and each sample! +Open the report `captus-align_report.html` with your browser (internet connection required) to explore and compare general alignment statistics for each locus and each sample! {{% notice style="tip" title="Tips" %}} - The entire report is based on data stored in the following two files: - - [`captus-assembly_align.alignments.tsv`]({{< relref "assembly/align/output#17-captus-assembly_alignalignmentstsv" >}}) - - [`captus-assembly_align.samples.tsv`]({{< relref "assembly/align/output#18-captus-assembly_alignsamplestsv" >}}) + - [`captus-align.alignments.tsv`]({{< relref "assembly/align/output#17-captus-align_alignmentstsv" >}}) + - [`captus-align.samples.tsv`]({{< relref "assembly/align/output#18-captus-align_samplestsv" >}}) - All tables and plots in the report are interactive powered by [`Plotly`](https://plotly.com/python). Visit the following sites once to take full advantage of its interactivity: diff --git a/docs/content/assembly/assemble/output.md b/docs/content/assembly/assemble/output.md index 39c8274..814a89e 100644 --- a/docs/content/assembly/assemble/output.md +++ b/docs/content/assembly/assemble/output.md @@ -30,59 +30,20 @@ ___ This directory contains the FASTA and FASTG assembly files as well as assembly statistics and logs. ___ ### 4. **`assembly.fasta`** -The main assembly file in FASTA format, this file contains the contigs assembled by `MEGAHIT`. The sequence headers are modified by `Captus` to resemble the headers produced by the assembler `Spades`. +The main assembly file in FASTA format, this file contains the contigs assembled by `MEGAHIT` and filtered according `--max_contig_gc` and `--min_contig_depth`. The sequence headers are modified by `Captus` to resemble the headers produced by the assembler `Spades`. {{% expand "Example" %}} ![FASTA format](/captus.docs/images/fasta_format.png?width=1000&classes=shadow) {{% /expand %}} ___ ### 5. **`assembly_graph.fastg`** -The assembly graph in [FASTG format](http://fastg.sourceforge.net/FASTG_Spec_v1.00.pdf). This file can be explored in [Bandage](https://rrwick.github.io/Bandage/) or similar software which are able to plot the connections between contigs, loops, circular segments, etc. +The assembly graph in [FASTG format](http://fastg.sourceforge.net/FASTG_Spec_v1.00.pdf). This file can be explored in [Bandage](https://rrwick.github.io/Bandage/) or similar software which are able to plot the connections between contigs, loops, circular segments, etc. The graph is based on the original `MEGAHIT` assembly prior to filtering. {{% expand "Example" %}} ![FASTG in Bandage](/captus.docs/images/fastg_in_bandage.png?width=1000&classes=shadow) {{% /expand %}} ___ -### 6. **`removed_contigs.fasta`** -This file is created after the filtering by GC and/or depth is finished (same format as in **4**). -___ -### 7. **`assembly_stats.tsv`** -Assembly statistics, before and after filtering: - -{{% expand "Example" %}} -**`assembly.stats.tsv`** -|Column|Description| -|-|-| -|**sample_name**|Name of the sample| -|**stage**|Before or After filtering| -|**num_contigs**|Number of contigs| -|**pct_contigs_1kbp**|Percentage of contigs over 1kbp| -|**pct_contigs_2kbp**|Percentage of contigs over 2kbp| -|**pct_contigs_5kbp**|Percentage of contigs over 5kbp| -|**pct_contigs_10kbp**|Percentage of contigs over 10kbp| -|**pct_contigs_20kbp**|Percentage of contigs over 20kbp| -|**pct_contigs_50kbp**|Percentage of contigs over 50kbp| -|**total_length**|Cumulative length of all contigs in bp| -|**pct_lengt_1kbp**|Percentage of total assembly length in contigs over 1kbp| -|**pct_lengt_2kbp**|Percentage of total assembly length in contigs over 2kbp| -|**pct_lengt_5kbp**|Percentage of total assembly length in contigs over 5kbp| -|**pct_lengt_10kbp**|Percentage of total assembly length in contigs over 10kbp| -|**pct_lengt_20kbp**|Percentage of total assembly length in contigs over 20kbp| -|**pct_lengt_50kbp**|Percentage of total assembly length in contigs over 50kbp| -|**shortest_contig**|Length of shortest contig in bp| -|**longest_contig**|Length of longest contig in bp| -|**avg_length**|Average contig length in bp| -|**median_length**|Median contig length in bp| -|**avg_depth**|Average contig depth| -|**median_depth**|Median contig depth| -|**gc**|Average contig GC content| -|**N50**|Assembly N50 in bp| -|**N75**|Assembly N75 in bp| -|**L50**|Assembly L50 in number of contigs| -|**L75**|Assembly L75 in number of contigs| -{{% /expand %}} -___ -### 8. **`megahit_brief.log`**, **`megahit_full.log`** +### 6. **`megahit_brief.log`**, **`megahit_full.log`** `MEGAHIT` program logs, the _brief_ version contains just the screen output from each `MEGAHIT` run. {{% expand "Example" %}} @@ -149,8 +110,31 @@ Captus' MEGAHIT Command: ``` {{% /expand %}} ___ -### 9. **`captus-assembly_assemble.assembly_stats.tsv`** -Statistics tab-separated-values table compiled across all assembled samples before and after filtering. +### 7. **`01_salmon_quant`** +This directory contains the results of mapping the reads back to the assembled contigs using `Salmon`. It is not created when `--ignore_mapping` is used. +___ +### 8. **`salmon.log`** +`Salmon` logs, combined for the indexing and quantification steps. +___ +### 9. **`removed_contigs.fasta`** +This file is created after the filtering by GC and/or depth is finished (same format as in **4**). +___ +### 10. **`contigs_depth.tsv`** +Table containing depth statistics and contig names with the original depth estimated by `MEGAHIT` and then recalculated with `Salmon`. + +{{% expand "Information included in the table" %}} +|Column|Description| +|-|-| +|**megahit_contig_name**|Original contig name from `MEGAHIT`| +|**megahit_depth**|Depth of coverage contained in `megahit_contig_name`| +|**length**|Length of the contig in bp| +|**salmon_contig_name**|Contig name with depth of coverage calculated by `Salmon`| +|**salmon_num_reads**|Estimated number of reads mapping to the contig according to `Salmon`| +|**salmon_depth**|read length (multiplied by 2 if reads are paired-end) * `salmon_num_reads` / `length` | +{{% /expand %}} +___ +### 11. **`assembly_stats.tsv`** +Assembly statistics, before and after filtering: {{% expand "Information included in the table" %}} |Column|Description| @@ -184,18 +168,103 @@ Statistics tab-separated-values table compiled across all assembled samples befo |**L75**|Assembly L75 in number of contigs| {{% /expand %}} ___ -### 10. **`captus-assembly_assemble.depth_stats.tsv`** -This is the final [Assembly report]({{< ref "assembly/assemble/report">}}), summarizing statistics across all samples assembled. +### 12. **`depth_stats.tsv`** +Depth statistics, before and after filtering: + +{{% expand "Information included in the table" %}} +|Column|Description| +|-|-| +|**sample_name**|Name of the sample| +|**stage**|Before or After filtering| +|**depth_bin**|Upper limit of the depth bin (lower limit given by the previous depth bin value)| +|**length**|Sum of lengths of the contigs inside the `depth_bin`| +|**fraction**|Sum of lengths of the contigs inside the `depth_bin` as a fraction of the total length| +|**num_contigs**|Number of contigs inside the `depth_bin`| +{{% /expand %}} ___ -### 11. **`captus-assembly_assemble.length_stats.tsv`** -This is the final [Assembly report]({{< ref "assembly/assemble/report">}}), summarizing statistics across all samples assembled. +### 13. **`length_stats.tsv`** +Length statistics, before and after filtering: + +{{% expand "Information included in the table" %}} +|Column|Description| +|-|-| +|**sample_name**|Name of the sample| +|**stage**|Before or After filtering| +|**length_bin**|Upper limit of the length bin (lower limit given by the previous length bin value)| +|**length**|Sum of lengths of the contigs inside the `length_bin`| +|**fraction**|Sum of lengths of the contigs inside the `length_bin` as a fraction of the total length| +|**num_contigs**|Number of contigs inside the `length_bin`| +{{% /expand %}} ___ -### 12. **`captus-assembly_assemble.log`** -This is the log from `Captus`, it contains the command used and all the information shown during the run. If the option `--show_less` was enabled, the log will also contain all the extra detailed information that was hidden during the run. +### 14. **`captus-assemble_assembly_stats.tsv`** +Assembly statistics compiled across all samples, before and after filtering: + +{{% expand "Information included in the table" %}} +|Column|Description| +|-|-| +|**sample_name**|Name of the sample| +|**stage**|Before or After filtering| +|**num_contigs**|Number of contigs| +|**pct_contigs_1kbp**|Percentage of contigs over 1kbp| +|**pct_contigs_2kbp**|Percentage of contigs over 2kbp| +|**pct_contigs_5kbp**|Percentage of contigs over 5kbp| +|**pct_contigs_10kbp**|Percentage of contigs over 10kbp| +|**pct_contigs_20kbp**|Percentage of contigs over 20kbp| +|**pct_contigs_50kbp**|Percentage of contigs over 50kbp| +|**total_length**|Cumulative length of all contigs in bp| +|**pct_lengt_1kbp**|Percentage of total assembly length in contigs over 1kbp| +|**pct_lengt_2kbp**|Percentage of total assembly length in contigs over 2kbp| +|**pct_lengt_5kbp**|Percentage of total assembly length in contigs over 5kbp| +|**pct_lengt_10kbp**|Percentage of total assembly length in contigs over 10kbp| +|**pct_lengt_20kbp**|Percentage of total assembly length in contigs over 20kbp| +|**pct_lengt_50kbp**|Percentage of total assembly length in contigs over 50kbp| +|**shortest_contig**|Length of shortest contig in bp| +|**longest_contig**|Length of longest contig in bp| +|**avg_length**|Average contig length in bp| +|**median_length**|Median contig length in bp| +|**avg_depth**|Average contig depth| +|**median_depth**|Median contig depth| +|**gc**|Average contig GC content| +|**N50**|Assembly N50 in bp| +|**N75**|Assembly N75 in bp| +|**L50**|Assembly L50 in number of contigs| +|**L75**|Assembly L75 in number of contigs| +{{% /expand %}} +___ +### 15. **`captus-assemble_depth_stats.tsv`** +Depth statistics compiled across all samples, before and after filtering: + +{{% expand "Information included in the table" %}} +|Column|Description| +|-|-| +|**sample_name**|Name of the sample| +|**stage**|Before or After filtering| +|**depth_bin**|Upper limit of the depth bin (lower limit given by the previous depth bin value)| +|**length**|Sum of lengths of the contigs inside the `depth_bin`| +|**fraction**|Sum of lengths of the contigs inside the `depth_bin` as a fraction of the total length| +|**num_contigs**|Number of contigs inside the `depth_bin`| +{{% /expand %}} +___ +### 16. **`captus-assemble_length_stats.tsv`** +Length statistics compiled across all samples, before and after filtering: + +{{% expand "Information included in the table" %}} +|Column|Description| +|-|-| +|**sample_name**|Name of the sample| +|**stage**|Before or After filtering| +|**length_bin**|Upper limit of the length bin (lower limit given by the previous length bin value)| +|**length**|Sum of lengths of the contigs inside the `length_bin`| +|**fraction**|Sum of lengths of the contigs inside the `length_bin` as a fraction of the total length| +|**num_contigs**|Number of contigs inside the `length_bin`| +{{% /expand %}} ___ -### 13. **`captus-assembly_assemble.report.html`** +### 17. **`captus-assemble_report.html`** This is the final [Assembly report]({{< ref "assembly/assemble/report">}}), summarizing statistics across all samples assembled. +___ +### 18. **`captus-assemble.log`** +This is the log from `Captus`, it contains the command used and all the information shown during the run. If the option `--show_less` was enabled, the log will also contain all the extra detailed information that was hidden during the run. ___ Created by [Edgardo M. Ortiz]({{< ref "../../more/credits/#edgardo-m-ortiz">}}) (06.08.2021) -Last modified by [Edgardo M. Ortiz]({{< ref "../../more/credits/#edgardo-m-ortiz">}}) (18.12.2024) \ No newline at end of file +Last modified by [Edgardo M. Ortiz]({{< ref "../../more/credits/#edgardo-m-ortiz">}}) (23.12.2024) \ No newline at end of file diff --git a/docs/content/assembly/assemble/report.md b/docs/content/assembly/assemble/report.md index 97c0691..3742cf1 100644 --- a/docs/content/assembly/assemble/report.md +++ b/docs/content/assembly/assemble/report.md @@ -11,13 +11,13 @@ plotly = true **No successful marker extractions can be achieved without successful assemblies**. Even though this `assemble` module offers presets tuned for different data types, it is recommendable to repeat this step some times with different parameters to find optimal settings for your own data. `Captus` assists you in this tedious process by automatically generating a useful report for assembly evaluation. -Just open `captus-assembly_assemble.report.html` with your browser (internet connection required) to get general assembly statistics across all your samples! +Just open `captus-assemble_report.html` with your browser (internet connection required) to get general assembly statistics across all your samples! {{% notice tip %}} - The entire report is based on data stored in the following three files: - - [`captus-assembly_assemble.assembly_stats.tsv`]({{< relref "assembly/assemble/output#9-captus-assembly_assemblestatstsv" >}}) - - [`captus-assembly_assemble.depth_stats.tsv`]({{< relref "assembly/assemble/output" >}}) - - [`captus-assembly_assemble.length_stats.tsv`]({{< relref "assembly/assemble/output" >}}) + - [`captus-assemble.assembly_stats.tsv`]({{< relref "assembly/assemble/output#14-captus-assemble_assembly_statstsv" >}}) + - [`captus-assemble.depth_stats.tsv`]({{< relref "assembly/assemble/output#15-captus-assemble_depth_statstsv" >}}) + - [`captus-assemble.length_stats.tsv`]({{< relref "assembly/assemble/output#16-captus-assemble_length_statstsv" >}}) - All tables and plots in the report are interactive powered by [`Plotly`](https://plotly.com/python). Visit the following sites once to take full advantage of its interactivity: @@ -129,4 +129,4 @@ Feature: --- Created by [Gentaro Shigita]({{< ref "../../more/credits/#gentaro-shigita">}}) (11.08.2021) -Last modified by [Gentaro Shigita]({{< ref "../../more/credits/#gentaro-shigita">}}) (16.12.2024) +Last modified by [Gentaro Shigita]({{< ref "../../more/credits/#gentaro-shigita">}}) (23.12.2024) diff --git a/docs/content/assembly/clean/output.md b/docs/content/assembly/clean/output.md index 34d8c6e..5a64766 100644 --- a/docs/content/assembly/clean/output.md +++ b/docs/content/assembly/clean/output.md @@ -18,72 +18,7 @@ Notice we are using default settings, the only required argument is the location ![Clean reads](/captus.docs/images/clean_reads.png?width=640&classes=shadow) -### 1. **`[sample]_R1.fq.gz`**, **`[sample]_R2.fq.gz`** -In case of paired-end input we will have a pair of files like in the image, the forward reads are indicated by **_R1** and the reverse reads by **_R2**. Single-end input will only return forward reads. [Wikipedia](https://en.wikipedia.org/wiki/FASTQ_format)'s entry for the format describes it in more detail. - -{{% expand "Example" %}} -![FASTQ format](/captus.docs/images/fastq_format.png?width=1000&classes=shadow) -{{% /expand %}} -___ -### 2. **`[sample].cleaning.log`** -This file contains the cleaning command used for `bbduk.sh` as well the data shown as screen output, this and other information is compiled in the [Cleaning report]({{< ref "assembly/clean/report">}}). -{{% expand "Example" %}} -```text -Captus' BBDuk Command: - bbduk.sh -Xmx16220m threads=8 in=/tutorial/01_clean_reads/00_adaptors_trimmed/GenusA_speciesA_CAP_R#.fq.gz out=/tutorial/01_clean_reads/GenusA_speciesA_CAP_R#.fq.gz ref=/software/GitHub/Captus/data/phix174_ill.ref.fa.gz,/software/GitHub/Captus/data/sequencing_artifacts.fasta k=31 hdist=1 qtrim=lr trimq=13 maq=16 ftl=0 ftr=0 minlength=21 maxns=5 ziplevel=5 overwrite=t stats=/tutorial/01_clean_reads/GenusA_speciesA_CAP.cleaning.stats.txt 2>/tutorial/01_clean_reads/GenusA_speciesA_CAP.stdout.log - - -Executing jgi.BBDuk [-Xmx16220m, threads=8, in=/tutorial/01_clean_reads/00_adaptors_trimmed/GenusA_speciesA_CAP_R#.fq.gz, out=/tutorial/01_clean_reads/GenusA_speciesA_CAP_R#.fq.gz, ref=/software/GitHub/Captus/data/phix174_ill.ref.fa.gz,/software/GitHub/Captus/data/sequencing_artifacts.fasta, k=31, hdist=1, qtrim=lr, trimq=13, maq=16, ftl=0, ftr=0, minlength=21, maxns=5, ziplevel=5, overwrite=t, stats=/tutorial/01_clean_reads/GenusA_speciesA_CAP.cleaning.stats.txt] -Version 38.95 - -Set threads to 8 -0.018 seconds. -Initial: -Memory: max=17007m, total=17007m, free=16987m, used=20m - -Added 8403228 kmers; time: 1.021 seconds. -Memory: max=17007m, total=17007m, free=16612m, used=395m - -Input is being processed as paired -Started output streams: 0.062 seconds. -Processing time: 3.655 seconds. - -Input: 733430 reads 110292758 bases. -Contaminants: 0 reads (0.00%) 0 bases (0.00%) -QTrimmed: 127322 reads (17.36%) 515529 bases (0.47%) -Low quality discards: 13310 reads (1.81%) 1903218 bases (1.73%) -Total Removed: 13340 reads (1.82%) 2418747 bases (2.19%) -Result: 720090 reads (98.18%) 107874011 bases (97.81%) - -Time: 4.753 seconds. -Reads Processed: 733k 154.32k reads/sec -Bases Processed: 110m 23.21m bases/sec -``` -{{% /expand %}} -___ -### 3. **`[sample].cleaning.stats.txt`** -List of contaminants found by `bbduk.sh` in the input reads, sorted by abundance. -{{% expand "Example" %}} -```text -#File /tutorial/01_clean_reads/00_adaptors_trimmed/GenusX_speciesX_CAP_R1.fq.gz /tutorial/01_clean_reads/00_adaptors_trimmed/GenusX_speciesX_CAP_R2.fq.gz -#Total 60621406 -#Matched 25 0.00004% -#Name Reads ReadsPct -gi|9626372|ref|NC_001422.1| Coliphage phiX174, complete genome 14 0.00002% -contam_111 8 0.00001% -contam_32 1 0.00000% -contam_76 1 0.00000% -contam_87 1 0.00000% -``` -{{% /expand %}} -___ -### 4. **`captus-assembly_clean.report.html`** -This is the final [Cleaning report]({{< ref "assembly/clean/report">}}), summarizing statistics across all samples analyzed. -___ -### 5. **`captus-assembly_clean.log`** -This is the log from `Captus`, it contains the command used and all the information shown during the run. If the option `--show_less` was enabled, the log will also contain all the extra detailed information that was hidden during the run. -___ -### 6. **`00_adaptors_trimmed`** +### 1. **`00_adaptors_trimmed`** This is an intermediate directory that contains the FASTQ files without adaptors, prior to quality-trimming and filtering. The directory also stores `bbduk.sh` commands and logs for the adaptor trimming stage. If the option `--keep_all` was enabled the FASTQs from this intermediate are kept after the run, otherwise they are deleted. {{% expand "Example" %}} **`[sample].round1.log`** @@ -199,10 +134,69 @@ I5_Primer_Nextera_XT_Index_Kit_v2_S520 1 0.00014% ``` {{% /expand %}} ___ -### 7. **`01_qc_stats_before`**, **`02_qc_stats_after`** +### 2. **`[sample]_R1.fq.gz`**, **`[sample]_R2.fq.gz`** +In case of paired-end input we will have a pair of files like in the image, the forward reads are indicated by **_R1** and the reverse reads by **_R2**. Single-end input will only return forward reads. [Wikipedia](https://en.wikipedia.org/wiki/FASTQ_format)'s entry for the format describes it in more detail. These are the cleaned reads that will be used by the `assemble` module. + +{{% expand "Example" %}} +![FASTQ format](/captus.docs/images/fastq_format.png?width=1000&classes=shadow) +{{% /expand %}} +___ +### 3. **`[sample].cleaning.log`** +This file contains the cleaning command used for `bbduk.sh` as well the data shown as screen output, this and other information is compiled in the [Cleaning report]({{< ref "assembly/clean/report">}}). +{{% expand "Example" %}} +```text +Captus' BBDuk Command: + bbduk.sh -Xmx16220m threads=8 in=/tutorial/01_clean_reads/00_adaptors_trimmed/GenusA_speciesA_CAP_R#.fq.gz out=/tutorial/01_clean_reads/GenusA_speciesA_CAP_R#.fq.gz ref=/software/GitHub/Captus/data/phix174_ill.ref.fa.gz,/software/GitHub/Captus/data/sequencing_artifacts.fasta k=31 hdist=1 qtrim=lr trimq=13 maq=16 ftl=0 ftr=0 minlength=21 maxns=5 ziplevel=5 overwrite=t stats=/tutorial/01_clean_reads/GenusA_speciesA_CAP.cleaning.stats.txt 2>/tutorial/01_clean_reads/GenusA_speciesA_CAP.stdout.log + + +Executing jgi.BBDuk [-Xmx16220m, threads=8, in=/tutorial/01_clean_reads/00_adaptors_trimmed/GenusA_speciesA_CAP_R#.fq.gz, out=/tutorial/01_clean_reads/GenusA_speciesA_CAP_R#.fq.gz, ref=/software/GitHub/Captus/data/phix174_ill.ref.fa.gz,/software/GitHub/Captus/data/sequencing_artifacts.fasta, k=31, hdist=1, qtrim=lr, trimq=13, maq=16, ftl=0, ftr=0, minlength=21, maxns=5, ziplevel=5, overwrite=t, stats=/tutorial/01_clean_reads/GenusA_speciesA_CAP.cleaning.stats.txt] +Version 38.95 + +Set threads to 8 +0.018 seconds. +Initial: +Memory: max=17007m, total=17007m, free=16987m, used=20m + +Added 8403228 kmers; time: 1.021 seconds. +Memory: max=17007m, total=17007m, free=16612m, used=395m + +Input is being processed as paired +Started output streams: 0.062 seconds. +Processing time: 3.655 seconds. + +Input: 733430 reads 110292758 bases. +Contaminants: 0 reads (0.00%) 0 bases (0.00%) +QTrimmed: 127322 reads (17.36%) 515529 bases (0.47%) +Low quality discards: 13310 reads (1.81%) 1903218 bases (1.73%) +Total Removed: 13340 reads (1.82%) 2418747 bases (2.19%) +Result: 720090 reads (98.18%) 107874011 bases (97.81%) + +Time: 4.753 seconds. +Reads Processed: 733k 154.32k reads/sec +Bases Processed: 110m 23.21m bases/sec +``` +{{% /expand %}} +___ +### 4. **`[sample].cleaning.stats.txt`** +List of contaminants found by `bbduk.sh` in the input reads, sorted by abundance. +{{% expand "Example" %}} +```text +#File /tutorial/01_clean_reads/00_adaptors_trimmed/GenusX_speciesX_CAP_R1.fq.gz /tutorial/01_clean_reads/00_adaptors_trimmed/GenusX_speciesX_CAP_R2.fq.gz +#Total 60621406 +#Matched 25 0.00004% +#Name Reads ReadsPct +gi|9626372|ref|NC_001422.1| Coliphage phiX174, complete genome 14 0.00002% +contam_111 8 0.00001% +contam_32 1 0.00000% +contam_76 1 0.00000% +contam_87 1 0.00000% +``` +{{% /expand %}} +___ +### 5. **`01_qc_stats_before`**, **`02_qc_stats_after`** These directories contain the results from either `Falco` or `FastQC`, organized in a subdirectory per FASTQ file analyzed. ___ -### 8. **`03_qc_extras`** +### 6. **`03_qc_extras`** This directory contains all the tab-separated-values tables needed to build the [Cleaning report]({{< ref "assembly/clean/report">}}). We provide them separately to allow the user more detailed analyses. {{% expand "List of tables produced" %}} |Table|Description| @@ -220,5 +214,11 @@ This directory contains all the tab-separated-values tables needed to build the |**seq_len_dist.tsv**|Sequence length distribution, parsed from `Falco`'s or`FastQC`'s output| {{% /expand %}} ___ +### 7. **`captus-clean_report.html`** +This is the final [Cleaning report]({{< ref "assembly/clean/report">}}), summarizing statistics across all samples analyzed. +___ +### 8. **`captus-clean.log`** +This is the log from `Captus`, it contains the command used and all the information shown during the run. If the option `--show_less` was enabled, the log will also contain all the extra detailed information that was hidden during the run. +___ Created by [Edgardo M. Ortiz]({{< ref "../../more/credits/#edgardo-m-ortiz">}}) (06.08.2021) -Last modified by [Edgardo M. Ortiz]({{< ref "../../more/credits/#edgardo-m-ortiz">}}) (18.12.2024) \ No newline at end of file +Last modified by [Edgardo M. Ortiz]({{< ref "../../more/credits/#edgardo-m-ortiz">}}) (23.12.2024) \ No newline at end of file diff --git a/docs/content/assembly/clean/report.md b/docs/content/assembly/clean/report.md index f399d38..1eee1b1 100644 --- a/docs/content/assembly/clean/report.md +++ b/docs/content/assembly/clean/report.md @@ -13,11 +13,11 @@ To assess the quality of raw reads and how it is improved by the cleaning, the ` Although both programs generate informative reports, they are in separate files for each sample, each read direction (for paired-end), and before and after cleaning. This makes it tedious to review every report, and can lead to overlook some serious problems, such as residual low-quality bases or adaptor sequences, contamination of different samples, and improper setting of cleaning parameters. -`Captus` summarizes the information in those disparate reports into a single HTML file. All you need to do is open `captus-assembly_clean.report.html` with your browser (internet connection required) to get a quick overview on all your samples, both reads (for paired-end), and before and after cleaning! +`Captus` summarizes the information in those disparate reports into a single HTML file. All you need to do is open `captus-clean_report.html` with your browser (internet connection required) to get a quick overview on all your samples, both reads (for paired-end), and before and after cleaning! {{% notice tip %}} -- The entire report is based on tables stored in the [`03_qc_extras`]({{< relref "assembly/clean/output#8-03_qc_extras" >}}) directory. +- The entire report is based on tables stored in the [`03_qc_extras`]({{< relref "assembly/clean/output#6-03_qc_extras" >}}) directory. - All tables and plots in the report are interactive powered by [`Plotly`](https://plotly.com/python). Visit the following sites once to take full advantage of its interactivity: @@ -30,15 +30,17 @@ Visit the following sites once to take full advantage of its interactivity: --- The report comprises the following nine sections: -1. [Summary Table](#1-summary-table) -2. [Stats on Reads/Bases](#2-stats-on-readsbases) -3. [Per Base Quality](#3-per-base-quality) -4. [Per Read Quality](#4-per-read-quality) -5. [Read Length Distribution](#5-read-length-distribution) -6. [Per Base Nucleotide Content](#6-per-base-nucleotide-content) -7. [Per Read GC Content](#7-per-read-gc-content) -8. [Sequence Duplication Level](#8-sequence-duplication-level) -9. [Adaptor Content](#9-adaptor-content) +- [Concept](#concept) +- [Contents](#contents) + - [1. Summary Table](#1-summary-table) + - [2. Stats on Reads/Bases](#2-stats-on-readsbases) + - [3. Per Base Quality](#3-per-base-quality) + - [4. Per Read Quality](#4-per-read-quality) + - [5. Read Length Distribution](#5-read-length-distribution) + - [6. Per Base Nucleotide Content](#6-per-base-nucleotide-content) + - [7. Per Read GC Content](#7-per-read-gc-content) + - [8. Sequence Duplication Level](#8-sequence-duplication-level) + - [9. Adaptor Content](#9-adaptor-content) A brief description and interactive example of each section is given below. By switching the tabs at the top of each plot, you can compare the plot produced by `Captus` with the corresponding plot from [`FastQC`](https://www.bioinformatics.babraham.ac.uk/projects/fastqc). @@ -221,4 +223,4 @@ For more details, read [ FastQC documentation](http --- Created by [Gentaro Shigita]({{< ref "../../more/credits/#gentaro-shigita">}}) (11.08.2021) -Last modified by [Gentaro Shigita]({{< ref "../../more/credits/#gentaro-shigita">}}) (22.02.2023) +Last modified by [Gentaro Shigita]({{< ref "../../more/credits/#gentaro-shigita">}}) (23.12.2024) diff --git a/docs/content/assembly/extract/output.md b/docs/content/assembly/extract/output.md index 282d3e2..a17a211 100644 --- a/docs/content/assembly/extract/output.md +++ b/docs/content/assembly/extract/output.md @@ -98,7 +98,35 @@ ___ ### 9. **`[MARKER_TYPE]_recovery_stats.tsv`** Tab-separated-values table with marker recovery statistics, these are concatenated across marker types and samples and summarized in the final [Marker Recovery report]({{< ref "assembly/extract/report">}}). Prefixes can be `NUC`, `PTD`, or `MIT`. -For more information on the table see [26. captus-assembly_extract.stats.tsv]({{< relref "assembly/extract/output#26-captus-assembly_extractstatstsv" >}}) +{{% expand "Information included in the table" %}} +|Column|Description| +|-|-| +|**sample_name**|Name of the sample.| +|**marker_type**|Type of marker. Possible values are `NUC`, `PTD`, `MIT`, `DNA`, or `CLR`.| +|**locus**|Name of the locus.| +|**ref_name**|Name of the reference selected for the locus. Relevant when the reference contains multiple sequences per locus like in Angiosperms353 for example.| +|**ref_coords**|Match coordinates with respect to the reference, each segment is expressed as `[start]-[end]`, segments within the same contig are separated by `,`, and segments in different contigs are separated by `;`. For example: `1-47;48-354,355-449` indicates that a contig contained a segment matching reference coordinates `1-49` and a different contig matched two segments, `48-354` and `355-449` respectively.| +|**ref_type**|Whether the reference is an aminoacid (`prot`) or nucleotide (`nucl`) sequence.| +|**ref_len_matched**|Number of residues matched in the reference.| +|**hit**|Paralog ranking, `00` is assigned to the best hit, secondary hits start at `01`.| +|**pct_recovered**|Percentage of the total length of the reference sequence that was matched.| +|**pct_identity**|Percentage of sequence identity between the hit and the reference sequence.| +|**score**|Inspired by `Scipio`'s score: `(matches - mismatches) / reference length`.| +|**wscore**|Weighted score. When the reference contains multiple sequences per locus, the best-matching reference is decided after normalizing their recovered length across references in the locus and multiplying that value by their respective `score`, thus producing the `wscore`. Finally `wscore` is also penalized by the number of frameshifts (if the marker is coding) and number of contigs used in the assembly of the hit.| +|**hit_len**|Number of residues matched in the sample's contig(s) plus the length of the flanking sequence.| +|**cds_len**|If `ref_type` is `prot` this number represents the number of residues corresponding to coding sequence (i.e. exons). If the `ref_type` is `nucl` this field shows `NA`.| +|**intron_len**|If `ref_type` is `prot` this number represents the number of residues corresponding to intervening non-coding sequence segments (i.e. introns). If the `ref_type` is `nucl` this field shows `NA`.| +|**flanks_len**|Number of residues included in the flanking sequence.| +|**frameshifts**|Positions of the corrected frameshifts in the output sequence. If the `ref_type` is `nucl` this field shows `NA`.| +|**hit_contigs**|Number of contigs used to assemble the hit.| +|**hit_l50**|Least number of contigs in the hit that contain 50% of the recovered length.| +|**hit_l90**|Least number of contigs in the hit that contain 90% of the recovered length.| +|**hit_lg50**|Least number of contigs in the hit that contain 50% of the reference locus length.| +|**hit_lg90**|Least number of contigs in the hit that contain 90% of the reference locus length.| +|**ctg_names**|Name of the contigs used in the reconstruction of the hit. Example: `NODE_6256_length_619_cov_3.0000_k_169_flag_1;NODE_3991_length_1778_cov_19.0000_k_169_flag_1`, for a hit where two contigs were used.| +|**ctg_strands**|Contig strands (`+` or `-`) provided in the same order as `ctg_names`. Example: `+;-` indicates that the contig `NODE_6256_length_619_cov_3.0000_k_169_flag_1` was matched in the positive strand while the contig `NODE_3991_length_1778_cov_19.0000_k_169_flag_1` was matched in the ngeative strand.| +|**ctg_coords**|Match coordinates with respect to the contigs in the sample's assembly. Each segment is expressed as `[start]-[end]`, segments within the same contig are separated by `,`, and segments in different contigs are separated by `;` which are provided in the same order as `ctg_names` and `ctg_strands`. Example: `303-452;694-1626,301-597` indicates that a single segment was matched in contig `NODE_6256_length_619_cov_3.0000_k_169_flag_1` in the `+` strand with coordinates `303-452`, while two segments were matched in contig `NODE_3991_length_1778_cov_19.0000_k_169_flag_1` in the `-` strand with coordinates `694-1626` and `301-597` respectively.| +{{% /expand %}} ___ ### 10. **`[MARKER_TYPE]_scipio_final.log`** Log of the second Scipio's run, where best references have already been selected (when using multi-sequence per locus references) and only the contigs that had hits durin Scipio's initial run are used. Prefixes can be `NUC`, `PTD`, or `MIT`. @@ -149,7 +177,35 @@ ___ ### 17. **`[MARKER_TYPE]_recovery_stats.tsv`** Tab-separated-values table with marker recovery statistics, these are concatenated across marker types and samples and summarized in the final [Marker Recovery report]({{< ref "assembly/extract/report">}}). Prefixes can be `DNA` or `CLR`. -For more information on the table see [26. captus-assembly_extract.stats.tsv]({{< relref "assembly/extract/output#26-captus-assembly_extractstatstsv" >}}) +{{% expand "Information included in the table" %}} +|Column|Description| +|-|-| +|**sample_name**|Name of the sample.| +|**marker_type**|Type of marker. Possible values are `NUC`, `PTD`, `MIT`, `DNA`, or `CLR`.| +|**locus**|Name of the locus.| +|**ref_name**|Name of the reference selected for the locus. Relevant when the reference contains multiple sequences per locus like in Angiosperms353 for example.| +|**ref_coords**|Match coordinates with respect to the reference, each segment is expressed as `[start]-[end]`, segments within the same contig are separated by `,`, and segments in different contigs are separated by `;`. For example: `1-47;48-354,355-449` indicates that a contig contained a segment matching reference coordinates `1-49` and a different contig matched two segments, `48-354` and `355-449` respectively.| +|**ref_type**|Whether the reference is an aminoacid (`prot`) or nucleotide (`nucl`) sequence.| +|**ref_len_matched**|Number of residues matched in the reference.| +|**hit**|Paralog ranking, `00` is assigned to the best hit, secondary hits start at `01`.| +|**pct_recovered**|Percentage of the total length of the reference sequence that was matched.| +|**pct_identity**|Percentage of sequence identity between the hit and the reference sequence.| +|**score**|Inspired by `Scipio`'s score: `(matches - mismatches) / reference length`.| +|**wscore**|Weighted score. When the reference contains multiple sequences per locus, the best-matching reference is decided after normalizing their recovered length across references in the locus and multiplying that value by their respective `score`, thus producing the `wscore`. Finally `wscore` is also penalized by the number of frameshifts (if the marker is coding) and number of contigs used in the assembly of the hit.| +|**hit_len**|Number of residues matched in the sample's contig(s) plus the length of the flanking sequence.| +|**cds_len**|If `ref_type` is `prot` this number represents the number of residues corresponding to coding sequence (i.e. exons). If the `ref_type` is `nucl` this field shows `NA`.| +|**intron_len**|If `ref_type` is `prot` this number represents the number of residues corresponding to intervening non-coding sequence segments (i.e. introns). If the `ref_type` is `nucl` this field shows `NA`.| +|**flanks_len**|Number of residues included in the flanking sequence.| +|**frameshifts**|Positions of the corrected frameshifts in the output sequence. If the `ref_type` is `nucl` this field shows `NA`.| +|**hit_contigs**|Number of contigs used to assemble the hit.| +|**hit_l50**|Least number of contigs in the hit that contain 50% of the recovered length.| +|**hit_l90**|Least number of contigs in the hit that contain 90% of the recovered length.| +|**hit_lg50**|Least number of contigs in the hit that contain 50% of the reference locus length.| +|**hit_lg90**|Least number of contigs in the hit that contain 90% of the reference locus length.| +|**ctg_names**|Name of the contigs used in the reconstruction of the hit. Example: `NODE_6256_length_619_cov_3.0000_k_169_flag_1;NODE_3991_length_1778_cov_19.0000_k_169_flag_1`, for a hit where two contigs were used.| +|**ctg_strands**|Contig strands (`+` or `-`) provided in the same order as `ctg_names`. Example: `+;-` indicates that the contig `NODE_6256_length_619_cov_3.0000_k_169_flag_1` was matched in the positive strand while the contig `NODE_3991_length_1778_cov_19.0000_k_169_flag_1` was matched in the ngeative strand.| +|**ctg_coords**|Match coordinates with respect to the contigs in the sample's assembly. Each segment is expressed as `[start]-[end]`, segments within the same contig are separated by `,`, and segments in different contigs are separated by `;` which are provided in the same order as `ctg_names` and `ctg_strands`. Example: `303-452;694-1626,301-597` indicates that a single segment was matched in contig `NODE_6256_length_619_cov_3.0000_k_169_flag_1` in the `+` strand with coordinates `303-452`, while two segments were matched in contig `NODE_3991_length_1778_cov_19.0000_k_169_flag_1` in the `-` strand with coordinates `694-1626` and `301-597` respectively.| +{{% /expand %}} ___ ### 18. **`[MARKER_TYPE]_blat_search.log`** Log of BLAT's run. Prefixes can be `DNA` or `CLR`. @@ -171,7 +227,35 @@ ___ ### 22. **`[SAMPLE_NAME]_recovery_stats.tsv`** Unified tab-separated-values table with marker recovery statistics from ALL the marker types found in the sample, these are concatenated across samples and summarized in the final [Marker Recovery report]({{< ref "assembly/extract/report">}}). -For more information on the table see [26. captus-assembly_extract.stats.tsv]({{< relref "assembly/extract/output#26-captus-assembly_extractstatstsv" >}}) +{{% expand "Information included in the table" %}} +|Column|Description| +|-|-| +|**sample_name**|Name of the sample.| +|**marker_type**|Type of marker. Possible values are `NUC`, `PTD`, `MIT`, `DNA`, or `CLR`.| +|**locus**|Name of the locus.| +|**ref_name**|Name of the reference selected for the locus. Relevant when the reference contains multiple sequences per locus like in Angiosperms353 for example.| +|**ref_coords**|Match coordinates with respect to the reference, each segment is expressed as `[start]-[end]`, segments within the same contig are separated by `,`, and segments in different contigs are separated by `;`. For example: `1-47;48-354,355-449` indicates that a contig contained a segment matching reference coordinates `1-49` and a different contig matched two segments, `48-354` and `355-449` respectively.| +|**ref_type**|Whether the reference is an aminoacid (`prot`) or nucleotide (`nucl`) sequence.| +|**ref_len_matched**|Number of residues matched in the reference.| +|**hit**|Paralog ranking, `00` is assigned to the best hit, secondary hits start at `01`.| +|**pct_recovered**|Percentage of the total length of the reference sequence that was matched.| +|**pct_identity**|Percentage of sequence identity between the hit and the reference sequence.| +|**score**|Inspired by `Scipio`'s score: `(matches - mismatches) / reference length`.| +|**wscore**|Weighted score. When the reference contains multiple sequences per locus, the best-matching reference is decided after normalizing their recovered length across references in the locus and multiplying that value by their respective `score`, thus producing the `wscore`. Finally `wscore` is also penalized by the number of frameshifts (if the marker is coding) and number of contigs used in the assembly of the hit.| +|**hit_len**|Number of residues matched in the sample's contig(s) plus the length of the flanking sequence.| +|**cds_len**|If `ref_type` is `prot` this number represents the number of residues corresponding to coding sequence (i.e. exons). If the `ref_type` is `nucl` this field shows `NA`.| +|**intron_len**|If `ref_type` is `prot` this number represents the number of residues corresponding to intervening non-coding sequence segments (i.e. introns). If the `ref_type` is `nucl` this field shows `NA`.| +|**flanks_len**|Number of residues included in the flanking sequence.| +|**frameshifts**|Positions of the corrected frameshifts in the output sequence. If the `ref_type` is `nucl` this field shows `NA`.| +|**hit_contigs**|Number of contigs used to assemble the hit.| +|**hit_l50**|Least number of contigs in the hit that contain 50% of the recovered length.| +|**hit_l90**|Least number of contigs in the hit that contain 90% of the recovered length.| +|**hit_lg50**|Least number of contigs in the hit that contain 50% of the reference locus length.| +|**hit_lg90**|Least number of contigs in the hit that contain 90% of the reference locus length.| +|**ctg_names**|Name of the contigs used in the reconstruction of the hit. Example: `NODE_6256_length_619_cov_3.0000_k_169_flag_1;NODE_3991_length_1778_cov_19.0000_k_169_flag_1`, for a hit where two contigs were used.| +|**ctg_strands**|Contig strands (`+` or `-`) provided in the same order as `ctg_names`. Example: `+;-` indicates that the contig `NODE_6256_length_619_cov_3.0000_k_169_flag_1` was matched in the positive strand while the contig `NODE_3991_length_1778_cov_19.0000_k_169_flag_1` was matched in the ngeative strand.| +|**ctg_coords**|Match coordinates with respect to the contigs in the sample's assembly. Each segment is expressed as `[start]-[end]`, segments within the same contig are separated by `,`, and segments in different contigs are separated by `;` which are provided in the same order as `ctg_names` and `ctg_strands`. Example: `303-452;694-1626,301-597` indicates that a single segment was matched in contig `NODE_6256_length_619_cov_3.0000_k_169_flag_1` in the `+` strand with coordinates `303-452`, while two segments were matched in contig `NODE_3991_length_1778_cov_19.0000_k_169_flag_1` in the `-` strand with coordinates `694-1626` and `301-597` respectively.| +{{% /expand %}} ___ ### 23. **`leftover_contigs.fasta.gz`** This file contains the subset of the contigs assembled by `MEGAHIT` that had no hit to the reference markers. The file is compressed to save space. These are the contigs that are used for clustering across samples in order to discover additional homologous markers. @@ -179,7 +263,7 @@ ___ ### 24. **`leftover_contigs_after_custering.fasta.gz`** This file contains the subset of the contigs assembled by `MEGAHIT` that had no hit to the reference markers or even to the newly discovered markers derived from clusterin. The file is compressed to save space. ___ -### 25. **`captus-assembly_extract.refs.json`** +### 25. **`captus-extract_refs.json`** This file stores the paths to all the references used for extraction. This file is necessary so the alignment step can correctly add the references to the final alignments to be used as guides. {{% expand "Example" %}} @@ -219,7 +303,7 @@ This file stores the paths to all the references used for extraction. This file ``` {{% /expand %}} ___ -### 26. **`captus-assembly_extract.stats.tsv`** +### 26. **`captus-extract_stats.tsv`** Unified tab-separated-values table with marker recovery statistics from ALL the markers found in ALL the samples, this table is used to create the final [Marker Recovery report]({{< ref "assembly/extract/report">}}). Even though the report is quite useful for visualization you might need to do more complex statistical analysis, this table is the most appropriate output file for such analyses. {{% expand "Information included in the table" %}} @@ -252,10 +336,10 @@ Unified tab-separated-values table with marker recovery statistics from ALL the |**ctg_coords**|Match coordinates with respect to the contigs in the sample's assembly. Each segment is expressed as `[start]-[end]`, segments within the same contig are separated by `,`, and segments in different contigs are separated by `;` which are provided in the same order as `ctg_names` and `ctg_strands`. Example: `303-452;694-1626,301-597` indicates that a single segment was matched in contig `NODE_6256_length_619_cov_3.0000_k_169_flag_1` in the `+` strand with coordinates `303-452`, while two segments were matched in contig `NODE_3991_length_1778_cov_19.0000_k_169_flag_1` in the `-` strand with coordinates `694-1626` and `301-597` respectively.| {{% /expand %}} ___ -### 27. **`captus-assembly_extract.report.html`** +### 27. **`captus-extract_report.html`** This is the final [Marker Recovery report]({{< ref "assembly/extract/report">}}), summarizing marker extraction statistics across all samples and marker types. ___ -### 28. **`captus-assembly_extract.log`** +### 28. **`captus-extract.log`** This is the log from `Captus`, it contains the command used and all the information shown during the run. If the option `--show_less` was enabled, the log will also contain all the extra detailed information that was hidden during the run. ___ ### 29. **`clust_id##.##_cov##.##_captus_clusters_refs.fasta`** @@ -281,4 +365,4 @@ As you can see, **Sample name**, **Locus name**, and **Paralog ranking** are sep ___ Created by [Edgardo M. Ortiz]({{< ref "../../more/credits/#edgardo-m-ortiz">}}) (06.08.2021) -Last modified by [Edgardo M. Ortiz]({{< ref "../../more/credits/#edgardo-m-ortiz">}}) (18.12.2024) \ No newline at end of file +Last modified by [Edgardo M. Ortiz]({{< ref "../../more/credits/#edgardo-m-ortiz">}}) (23.12.2024) \ No newline at end of file diff --git a/docs/content/assembly/extract/report.md b/docs/content/assembly/extract/report.md index 37f9f40..cac55a6 100644 --- a/docs/content/assembly/extract/report.md +++ b/docs/content/assembly/extract/report.md @@ -12,10 +12,10 @@ The output from this `extract` module, such as **how many loci are recovered, in However, collecting, summarizing, and visualizing such important information can be backbreaking, especially in a phylo"genomic" project which typically employs hundreds or even thousands of samples and loci. Don't worry, `Captus` automatically generates an informative report! -Open `captus-assembly_extract.report.html` with your browser (internet connection required) to explore your extraction result at various scales, from the global level to the single sample or single locus level. +Open `captus-extract_report.html` with your browser (internet connection required) to explore your extraction result at various scales, from the global level to the single sample or single locus level. {{% notice tip %}} -- The entire report is based on data stored in [`captus-assembly_extract.stats.tsv`]({{< relref "assembly/extract/output#26-captus-assembly_extractstatstsv" >}}). +- The entire report is based on data stored in [`captus-extract_stats.tsv`]({{< relref "assembly/extract/output#26-captus-extract_statstsv" >}}). - All tables and plots in the report are interactive powered by [`Plotly`](https://plotly.com/python). Visit the following sites once to take full advantage of its interactivity: @@ -58,7 +58,7 @@ Hover mouse cursor over the heatmap to see detailed information about each singl |**Recovered length**|Percentage of reference sequence length recovered, calcurated as (`Ref len matched` / Reference sequence length) * 100|%| |**Identity**|Sequence identity of the recovered sequence to the reference sequence|%| |**Score**|Score inspired by [`Scipio`](https://www.webscipio.org/help/webscipio#setting), calculated as (matches - mismatches) / reference sequence length|-| -|**Weighted score**|Weighted `score` to address multiple reference sequences per locus
(for details, read [ Information included in the table]({{< relref "assembly/extract/output#26-captus-assembly_extractstatstsv" >}}))|-| +|**Weighted score**|Weighted `score` to address multiple reference sequences per locus
(for details, read [ Information included in the table]({{< relref "assembly/extract/output#26-captus-extract_statstsv" >}}))|-| |**Hit length**|Length of sequence recovered|bp| |**CDS length**|Total length of coding sequences (CDS) recovered (always `NA` when the `ref_type` is `nucl`)|bp| |**Intron length**|Total length of introns recovered (always `NA` when the `ref_type` is `nucl`)|bp| @@ -85,7 +85,7 @@ Switch this dropdown to change the variable to be shown as a heatmap among the f |**Identity**|Sequence identity of the recovered sequence to the reference sequence|%| |**Total Hits (Copies)**|Number of hits found (Values greater than 1 imply the presence of paralogs)|-| |**Score**|Score inspired by [`Scipio`](https://www.webscipio.org/help/webscipio#setting), calculated as (matches - mismatches) / reference sequence length|-| -|**Weighted Score**|Weighted `score` to address multiple reference sequences per locus
(for details, read [ Information included in the table]({{< relref "assembly/extract/output#26-captus-assembly_extractstatstsv" >}}))|-| +|**Weighted Score**|Weighted `score` to address multiple reference sequences per locus
(for details, read [ Information included in the table]({{< relref "assembly/extract/output#26-captus-extract_statstsv" >}}))|-| |**Number of Frameshifts**|Number of corrected frameshifts in the extracted sequence
(always `0` if the reference sequence is in nucleotide)|-| |**Contigs in Best Hit**|Number of contigs used to assemble the best hit|-| |**Best Hit L50**|Least number of contigs in best hit that contain 50% of the best hit's recovered length|-| diff --git a/docs/static/images/alignments.png b/docs/static/images/alignments.png index 48e2e89..338f742 100644 Binary files a/docs/static/images/alignments.png and b/docs/static/images/alignments.png differ diff --git a/docs/static/images/assemblies.png b/docs/static/images/assemblies.png index d1115b2..bb3dd6a 100644 Binary files a/docs/static/images/assemblies.png and b/docs/static/images/assemblies.png differ diff --git a/docs/static/images/assembly_annotated.png b/docs/static/images/assembly_annotated.png index 5b7b85e..09497a4 100644 Binary files a/docs/static/images/assembly_annotated.png and b/docs/static/images/assembly_annotated.png differ diff --git a/docs/static/images/clean_reads.png b/docs/static/images/clean_reads.png index bd99898..6e5fad6 100644 Binary files a/docs/static/images/clean_reads.png and b/docs/static/images/clean_reads.png differ diff --git a/docs/static/images/extractions.png b/docs/static/images/extractions.png index ac5aae4..85dad1b 100644 Binary files a/docs/static/images/extractions.png and b/docs/static/images/extractions.png differ diff --git a/docs/static/images/raw_reads.png b/docs/static/images/raw_reads.png index 0fe2229..72c742b 100644 Binary files a/docs/static/images/raw_reads.png and b/docs/static/images/raw_reads.png differ diff --git a/extras/filter_most_common_target_per_locus.py b/extras/filter_most_common_target_per_locus.py index f252a9f..c6d17e9 100755 --- a/extras/filter_most_common_target_per_locus.py +++ b/extras/filter_most_common_target_per_locus.py @@ -1,6 +1,6 @@ #!/usr/bin/env python3 """ -Copyright 2020-2024 Edgardo M. Ortiz (e.ortiz.v@gmail.com) +Copyright 2020-2025 Edgardo M. Ortiz (e.ortiz.v@gmail.com) https://github.com/edgardomortiz/Captus This file is part of Captus. Captus is free software: you can redistribute it and/or modify @@ -143,8 +143,8 @@ def main(): ) args = parser.parse_args() - aln_log_file = Path(args.captus_alignments_dir, "captus-assembly_align.log") - paralog_file = Path(args.captus_alignments_dir, "captus-assembly_align.paralogs.tsv") + aln_log_file = Path(args.captus_alignments_dir, "captus-align.log") + paralog_file = Path(args.captus_alignments_dir, "captus-align_paralogs.tsv") targets_paths = { "NUC": {"AA_path": None, "AA_fasta": None, "AA_names": [], "NT_path": None, "NT_fasta": None, "NT_names": []}, diff --git a/setup.py b/setup.py index f71eff8..16e210c 100755 --- a/setup.py +++ b/setup.py @@ -1,6 +1,6 @@ #!/usr/bin/env python3 """ -Copyright 2020-2024 Edgardo M. Ortiz (e.ortiz.v@gmail.com) +Copyright 2020-2025 Edgardo M. Ortiz (e.ortiz.v@gmail.com) https://github.com/edgardomortiz/Captus Captus' installation script