diff --git a/CHANGELOG.md b/CHANGELOG.md
index 8e36b245..6089039e 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -3,6 +3,25 @@
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/)
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
+## 3.0.3 [2024-08-27]
+
+### `Added`
+
+### `Changed`
+
+### `Fixed`
+
+- [#648](https://github.com/nf-core/mag/pull/648) - Fix sample ID/assembly ID check failure when no IDs match (reported by @zackhenny, fix by @prototaxites)
+- [#646](https://github.com/nf-core/mag/pull/646) - GTDB-Tk directory input now creates a value channel so it runs for all entries to the process and not just the first (reported by @amizeranschi, fix by @prototaxites).
+- [#639](https://github.com/nf-core/mag/pull/639) - Fix pipeline failure when a sample produces only a single bin (fix by @d-callan)
+- [#651](https://github.com/nf-core/mag/pull/651) - Replace base container for bash only modules to reduce number of containers in pipeline (reported and fixed by @harper357)
+- [#652](https://github.com/nf-core/mag/pull/652) - Fix documentation typo in using user-defined assembly parameters (reported and fixed by @amizeranschi)
+- [#653](https://github.com/nf-core/mag/pull/653) - Fix overwriting of per-bin 'raw' GUNC RUN output files (multi-bin summary tables not affected) (reported by @zackhenny and fixed by @jfy133)
+
+### `Dependencies`
+
+### `Deprecated`
+
## 3.0.2 [2024-07-04]
### `Added`
diff --git a/LICENSE b/LICENSE
index fa60ad5d..d90d555c 100644
--- a/LICENSE
+++ b/LICENSE
@@ -1,6 +1,6 @@
MIT License
-Copyright (c) Hadrien Gourlé, Daniel Straub, Sabrina Krakau
+Copyright (c) Hadrien Gourlé, Daniel Straub, Sabrina Krakau, James A. Fellows Yates, Maxime Borry
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
diff --git a/README.md b/README.md
index b2a723eb..405e298a 100644
--- a/README.md
+++ b/README.md
@@ -77,14 +77,25 @@ When group-wise co-assembly is enabled, `SPAdes` is run on accordingly pooled re
## Credits
-nf-core/mag was written by [Hadrien Gourlé](https://hadriengourle.com) at [SLU](https://slu.se), [Daniel Straub](https://github.com/d4straub) and [Sabrina Krakau](https://github.com/skrakau) at the [Quantitative Biology Center (QBiC)](http://qbic.life). [James A. Fellows Yates](https://github.com/jfy133) and [Maxime Borry](https://github.com/maxibor) at the [Max Planck Institute for Evolutionary Anthropology](https://www.eva.mpg.de) joined in version 2.2.0. More recent contributors include [Jim Downie](https://github.com/prototaxites) and [Carson Miller](https://github.com/CarsonJM).
+nf-core/mag was written by [Hadrien Gourlé](https://hadriengourle.com) at [SLU](https://slu.se), [Daniel Straub](https://github.com/d4straub) and [Sabrina Krakau](https://github.com/skrakau) at the [Quantitative Biology Center (QBiC)](http://qbic.life). [James A. Fellows Yates](https://github.com/jfy133) and [Maxime Borry](https://github.com/maxibor) at the [Max Planck Institute for Evolutionary Anthropology](https://www.eva.mpg.de) joined in version 2.2.0.
+
+Other code contributors include:
+
+- [Antonia Schuster](https://github.com/AntoniaSchuster)
+- [Alexander Ramos](https://github.com/alxndrdiaz)
+- [Carson Miller](https://github.com/CarsonJM)
+- [Daniel Lundin](https://github.com/erikrikarddaniel)
+- [Danielle Callan](https://github.com/d-callan)
+- [Gregory Sprenger](https://github.com/gregorysprenger)
+- [Jim Downie](https://github.com/prototaxites)
+- [Phil Palmer](https://github.com/PhilPalmer)
+- [@willros](https://github.com/willros)
Long read processing was inspired by [caspargross/HybridAssembly](https://github.com/caspargross/HybridAssembly) written by Caspar Gross [@caspargross](https://github.com/caspargross)
We thank the following people for their extensive assistance in the development of this pipeline:
- [Alexander Peltzer](https://github.com/apeltzer)
-- [Antonia Schuster](https://github.com/antoniaschuster)
- [Phil Ewels](https://github.com/ewels)
- [Gisela Gabernet](https://github.com/ggabernet)
- [Harshil Patel](https://github.com/drpatelh)
diff --git a/assets/multiqc_config.yml b/assets/multiqc_config.yml
index 47f9932a..10c24150 100644
--- a/assets/multiqc_config.yml
+++ b/assets/multiqc_config.yml
@@ -1,7 +1,7 @@
report_comment: >
- This report has been generated by the nf-core/mag
+ This report has been generated by the nf-core/mag
analysis pipeline. For information about how to interpret these results, please see the
- documentation.
+ documentation.
report_section_order:
"nf-core-mag-methods-description":
order: -1000
diff --git a/conf/base.config b/conf/base.config
index 4cbf14f0..2928a99b 100644
--- a/conf/base.config
+++ b/conf/base.config
@@ -58,10 +58,6 @@ process {
maxRetries = 2
}
- withName:CUSTOM_DUMPSOFTWAREVERSIONS {
- cache = false
- }
-
withName: BOWTIE2_HOST_REMOVAL_BUILD {
cpus = { check_max (10 * task.attempt, 'cpus' ) }
memory = { check_max (20.GB * task.attempt, 'memory' ) }
diff --git a/conf/modules.config b/conf/modules.config
index 93a82d83..81df5bc8 100644
--- a/conf/modules.config
+++ b/conf/modules.config
@@ -447,7 +447,7 @@ process {
// Make sure to keep directory in sync with gunc_qc.nf
withName: 'GUNC_RUN' {
publishDir = [
- path: { "${params.outdir}/GenomeBinning/QC/GUNC/raw/${meta.assembler}-${meta.binner}-${meta.domain}-${meta.refinement}-${meta.id}" },
+ path: { "${params.outdir}/GenomeBinning/QC/GUNC/raw/${meta.assembler}-${meta.binner}-${meta.domain}-${meta.refinement}-${meta.id}/${fasta.baseName}/" },
mode: params.publish_dir_mode,
saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
]
@@ -456,7 +456,7 @@ process {
// Make sure to keep directory in sync with gunc_qc.nf
withName: 'GUNC_MERGECHECKM' {
publishDir = [
- path: { "${params.outdir}/GenomeBinning/QC/GUNC/checkmmerged/${meta.assembler}-${meta.binner}-${meta.domain}-${meta.refinement}-${meta.id}" },
+ path: { "${params.outdir}/GenomeBinning/QC/GUNC/checkmmerged/${meta.assembler}-${meta.binner}-${meta.domain}-${meta.refinement}-${meta.id}/${checkm_file.baseName}" },
mode: params.publish_dir_mode,
saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
]
diff --git a/docs/output.md b/docs/output.md
index d044e544..5f889056 100644
--- a/docs/output.md
+++ b/docs/output.md
@@ -454,7 +454,7 @@ For each bin or refined bin the median sequencing depth is computed based on the
- `GenomeBinning/depths/bins/`
- `bin_depths_summary.tsv`: Summary of bin sequencing depths for all samples. Depths are available for samples mapped against the corresponding assembly, i.e. according to the mapping strategy specified with `--binning_map_mode`. Only for short reads.
- `bin_refined_depths_summary.tsv`: Summary of sequencing depths for refined bins for all samples, if refinement was performed. Depths are available for samples mapped against the corresponding assembly, i.e. according to the mapping strategy specified with `--binning_map_mode`. Only for short reads.
- - `[assembler]-[binner]-[sample/group]-binDepths.heatmap.png`: Clustered heatmap showing bin abundances of the assembly across samples. Bin depths are transformed to centered log-ratios and bins as well as samples are clustered by Euclidean distance. Again, sample depths are available according to the mapping strategy specified with `--binning_map_mode`.
+ - `[assembler]-[binner]-[sample/group]-binDepths.heatmap.png`: Clustered heatmap showing bin abundances of the assembly across samples. Bin depths are transformed to centered log-ratios and bins as well as samples are clustered by Euclidean distance. Again, sample depths are available according to the mapping strategy specified with `--binning_map_mode`. If a sample produces only a single bin, a heatmap will not be provided.
@@ -565,9 +565,9 @@ If the parameter `--save_checkm_reference` is set, additionally the used the Che
- `[gunc-database].dmnd`
- `GUNC/`
- `raw/`
- - `[assembler]-[binner]-[domain]-[refinement]-[sample/group]/GUNC_checkM.merged.tsv`: Per sample GUNC [output](https://grp-bork.embl-community.io/gunc/output.html) containing with taxonomic and completeness QC statistics.
+ - `[assembler]-[binner]-[domain]-[refinement]-[sample/group]/[fasta input file name]/GUNC_checkM.merged.tsv`: Per sample GUNC [output](https://grp-bork.embl-community.io/gunc/output.html) containing with taxonomic and completeness QC statistics.
- `checkmmerged/`
- - `[assembler]-[binner]-[domain]-[refinement]-[sample/group]/GUNC.progenomes_2.1.maxCSS_level.tsv`: Per sample GUNC output merged with output from [CheckM](#checkm)
+ - `[assembler]-[binner]-[domain]-[refinement]-[sample/group]/[checkm input file name]/GUNC.progenomes_2.1.maxCSS_level.tsv`: Per sample GUNC output merged with output from [CheckM](#checkm)
diff --git a/modules.json b/modules.json
index 16a805b8..0cab4e4e 100644
--- a/modules.json
+++ b/modules.json
@@ -87,11 +87,6 @@
"git_sha": "baa30accc6c50ea8a98662417d4f42ed18966353",
"installed_by": ["fasta_binning_concoct"]
},
- "custom/dumpsoftwareversions": {
- "branch": "master",
- "git_sha": "8ec825f465b9c17f9d83000022995b4f7de6fe93",
- "installed_by": ["modules"]
- },
"dastool/dastool": {
"branch": "master",
"git_sha": "911696ea0b62df80e900ef244d7867d177971f73",
diff --git a/modules/local/adjust_maxbin2_ext.nf b/modules/local/adjust_maxbin2_ext.nf
index 4d7fecd0..70eae99c 100644
--- a/modules/local/adjust_maxbin2_ext.nf
+++ b/modules/local/adjust_maxbin2_ext.nf
@@ -2,11 +2,10 @@ process ADJUST_MAXBIN2_EXT {
tag "${meta.assembler}-${meta.id}"
label 'process_low'
- // Using container from multiqc since it'll be included anyway
- conda "bioconda::multiqc=1.12"
+ conda "conda-forge::sed=4.7"
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
- 'https://depot.galaxyproject.org/singularity/multiqc:1.12--pyhdfd78af_0' :
- 'biocontainers/multiqc:1.12--pyhdfd78af_0' }"
+ 'https://depot.galaxyproject.org/singularity/ubuntu:20.04' :
+ 'nf-core/ubuntu:20.04' }"
input:
tuple val(meta), path(bins)
diff --git a/modules/local/rename_postdastool.nf b/modules/local/rename_postdastool.nf
index 7d5a325e..6129dfbe 100644
--- a/modules/local/rename_postdastool.nf
+++ b/modules/local/rename_postdastool.nf
@@ -2,11 +2,10 @@ process RENAME_POSTDASTOOL {
tag "${meta.assembler}-${meta.id}"
label 'process_low'
- // Using container from multiqc since it'll be included anyway
- conda "bioconda::multiqc=1.12"
+ conda "conda-forge::sed=4.7"
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
- 'https://depot.galaxyproject.org/singularity/multiqc:1.12--pyhdfd78af_0' :
- 'biocontainers/multiqc:1.12--pyhdfd78af_0' }"
+ 'https://depot.galaxyproject.org/singularity/ubuntu:20.04' :
+ 'nf-core/ubuntu:20.04' }"
input:
tuple val(meta), path(bins)
diff --git a/modules/local/rename_predastool.nf b/modules/local/rename_predastool.nf
index cc3bab18..3d9373b8 100644
--- a/modules/local/rename_predastool.nf
+++ b/modules/local/rename_predastool.nf
@@ -2,11 +2,10 @@ process RENAME_PREDASTOOL {
tag "${meta.assembler}-${meta.binner}-${meta.id}"
label 'process_low'
- // Using container from multiqc since it'll be included anyway
- conda "bioconda::multiqc=1.12"
+ conda "conda-forge::sed=4.7"
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
- 'https://depot.galaxyproject.org/singularity/multiqc:1.12--pyhdfd78af_0' :
- 'biocontainers/multiqc:1.12--pyhdfd78af_0' }"
+ 'https://depot.galaxyproject.org/singularity/ubuntu:20.04' :
+ 'nf-core/ubuntu:20.04' }"
input:
tuple val(meta), path(bins)
diff --git a/modules/nf-core/custom/dumpsoftwareversions/environment.yml b/modules/nf-core/custom/dumpsoftwareversions/environment.yml
deleted file mode 100644
index 9b3272bc..00000000
--- a/modules/nf-core/custom/dumpsoftwareversions/environment.yml
+++ /dev/null
@@ -1,7 +0,0 @@
-name: custom_dumpsoftwareversions
-channels:
- - conda-forge
- - bioconda
- - defaults
-dependencies:
- - bioconda::multiqc=1.19
diff --git a/modules/nf-core/custom/dumpsoftwareversions/main.nf b/modules/nf-core/custom/dumpsoftwareversions/main.nf
deleted file mode 100644
index f2187611..00000000
--- a/modules/nf-core/custom/dumpsoftwareversions/main.nf
+++ /dev/null
@@ -1,24 +0,0 @@
-process CUSTOM_DUMPSOFTWAREVERSIONS {
- label 'process_single'
-
- // Requires `pyyaml` which does not have a dedicated container but is in the MultiQC container
- conda "${moduleDir}/environment.yml"
- container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
- 'https://depot.galaxyproject.org/singularity/multiqc:1.19--pyhdfd78af_0' :
- 'biocontainers/multiqc:1.19--pyhdfd78af_0' }"
-
- input:
- path versions
-
- output:
- path "software_versions.yml" , emit: yml
- path "software_versions_mqc.yml", emit: mqc_yml
- path "versions.yml" , emit: versions
-
- when:
- task.ext.when == null || task.ext.when
-
- script:
- def args = task.ext.args ?: ''
- template 'dumpsoftwareversions.py'
-}
diff --git a/modules/nf-core/custom/dumpsoftwareversions/meta.yml b/modules/nf-core/custom/dumpsoftwareversions/meta.yml
deleted file mode 100644
index 5f15a5fd..00000000
--- a/modules/nf-core/custom/dumpsoftwareversions/meta.yml
+++ /dev/null
@@ -1,37 +0,0 @@
-# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json
-name: custom_dumpsoftwareversions
-description: Custom module used to dump software versions within the nf-core pipeline template
-keywords:
- - custom
- - dump
- - version
-tools:
- - custom:
- description: Custom module used to dump software versions within the nf-core pipeline template
- homepage: https://github.com/nf-core/tools
- documentation: https://github.com/nf-core/tools
- licence: ["MIT"]
-input:
- - versions:
- type: file
- description: YML file containing software versions
- pattern: "*.yml"
-output:
- - yml:
- type: file
- description: Standard YML file containing software versions
- pattern: "software_versions.yml"
- - mqc_yml:
- type: file
- description: MultiQC custom content YML file containing software versions
- pattern: "software_versions_mqc.yml"
- - versions:
- type: file
- description: File containing software versions
- pattern: "versions.yml"
-authors:
- - "@drpatelh"
- - "@grst"
-maintainers:
- - "@drpatelh"
- - "@grst"
diff --git a/modules/nf-core/custom/dumpsoftwareversions/templates/dumpsoftwareversions.py b/modules/nf-core/custom/dumpsoftwareversions/templates/dumpsoftwareversions.py
deleted file mode 100755
index da033408..00000000
--- a/modules/nf-core/custom/dumpsoftwareversions/templates/dumpsoftwareversions.py
+++ /dev/null
@@ -1,101 +0,0 @@
-#!/usr/bin/env python
-
-
-"""Provide functions to merge multiple versions.yml files."""
-
-
-import yaml
-import platform
-from textwrap import dedent
-
-
-def _make_versions_html(versions):
- """Generate a tabular HTML output of all versions for MultiQC."""
- html = [
- dedent(
- """\\
-
-
-
-
- Process Name |
- Software |
- Version |
-
-
- """
- )
- ]
- for process, tmp_versions in sorted(versions.items()):
- html.append("")
- for i, (tool, version) in enumerate(sorted(tmp_versions.items())):
- html.append(
- dedent(
- f"""\\
-
- {process if (i == 0) else ''} |
- {tool} |
- {version} |
-
- """
- )
- )
- html.append("")
- html.append("
")
- return "\\n".join(html)
-
-
-def main():
- """Load all version files and generate merged output."""
- versions_this_module = {}
- versions_this_module["${task.process}"] = {
- "python": platform.python_version(),
- "yaml": yaml.__version__,
- }
-
- with open("$versions") as f:
- versions_by_process = yaml.load(f, Loader=yaml.BaseLoader) | versions_this_module
-
- # aggregate versions by the module name (derived from fully-qualified process name)
- versions_by_module = {}
- for process, process_versions in versions_by_process.items():
- module = process.split(":")[-1]
- try:
- if versions_by_module[module] != process_versions:
- raise AssertionError(
- "We assume that software versions are the same between all modules. "
- "If you see this error-message it means you discovered an edge-case "
- "and should open an issue in nf-core/tools. "
- )
- except KeyError:
- versions_by_module[module] = process_versions
-
- versions_by_module["Workflow"] = {
- "Nextflow": "$workflow.nextflow.version",
- "$workflow.manifest.name": "$workflow.manifest.version",
- }
-
- versions_mqc = {
- "id": "software_versions",
- "section_name": "${workflow.manifest.name} Software Versions",
- "section_href": "https://github.com/${workflow.manifest.name}",
- "plot_type": "html",
- "description": "are collected at run time from the software output.",
- "data": _make_versions_html(versions_by_module),
- }
-
- with open("software_versions.yml", "w") as f:
- yaml.dump(versions_by_module, f, default_flow_style=False)
- with open("software_versions_mqc.yml", "w") as f:
- yaml.dump(versions_mqc, f, default_flow_style=False)
-
- with open("versions.yml", "w") as f:
- yaml.dump(versions_this_module, f, default_flow_style=False)
-
-
-if __name__ == "__main__":
- main()
diff --git a/modules/nf-core/custom/dumpsoftwareversions/tests/main.nf.test b/modules/nf-core/custom/dumpsoftwareversions/tests/main.nf.test
deleted file mode 100644
index b1e1630b..00000000
--- a/modules/nf-core/custom/dumpsoftwareversions/tests/main.nf.test
+++ /dev/null
@@ -1,43 +0,0 @@
-nextflow_process {
-
- name "Test Process CUSTOM_DUMPSOFTWAREVERSIONS"
- script "../main.nf"
- process "CUSTOM_DUMPSOFTWAREVERSIONS"
- tag "modules"
- tag "modules_nfcore"
- tag "custom"
- tag "dumpsoftwareversions"
- tag "custom/dumpsoftwareversions"
-
- test("Should run without failures") {
- when {
- process {
- """
- def tool1_version = '''
- TOOL1:
- tool1: 0.11.9
- '''.stripIndent()
-
- def tool2_version = '''
- TOOL2:
- tool2: 1.9
- '''.stripIndent()
-
- input[0] = Channel.of(tool1_version, tool2_version).collectFile()
- """
- }
- }
-
- then {
- assertAll(
- { assert process.success },
- { assert snapshot(
- process.out.versions,
- file(process.out.mqc_yml[0]).readLines()[0..10],
- file(process.out.yml[0]).readLines()[0..7]
- ).match()
- }
- )
- }
- }
-}
diff --git a/modules/nf-core/custom/dumpsoftwareversions/tests/main.nf.test.snap b/modules/nf-core/custom/dumpsoftwareversions/tests/main.nf.test.snap
deleted file mode 100644
index 5f59a936..00000000
--- a/modules/nf-core/custom/dumpsoftwareversions/tests/main.nf.test.snap
+++ /dev/null
@@ -1,33 +0,0 @@
-{
- "Should run without failures": {
- "content": [
- [
- "versions.yml:md5,76d454d92244589d32455833f7c1ba6d"
- ],
- [
- "data: \"\\n\\n \\n \\n Process Name | \\n \\",
- " \\ Software | \\n Version | \\n
\\n \\n\\",
- " \\n\\n\\n CUSTOM_DUMPSOFTWAREVERSIONS | \\n python | \\n\\",
- " \\ 3.11.7 | \\n
\\n\\n\\n | \\n \\",
- " \\ yaml | \\n 5.4.1 | \\n
\\n\\n\\n\\",
- " \\n\\n TOOL1 | \\n tool1 | \\n\\",
- " \\ 0.11.9 | \\n
\\n\\n\\n\\n\\n TOOL2 | \\n\\",
- " \\ tool2 | \\n 1.9 | \\n
\\n\\n\\n\\",
- " \\n\\n Workflow | \\n Nextflow | \\n\\"
- ],
- [
- "CUSTOM_DUMPSOFTWAREVERSIONS:",
- " python: 3.11.7",
- " yaml: 5.4.1",
- "TOOL1:",
- " tool1: 0.11.9",
- "TOOL2:",
- " tool2: '1.9'",
- "Workflow:"
- ]
- ],
- "timestamp": "2024-01-09T23:01:18.710682"
- }
-}
\ No newline at end of file
diff --git a/modules/nf-core/custom/dumpsoftwareversions/tests/tags.yml b/modules/nf-core/custom/dumpsoftwareversions/tests/tags.yml
deleted file mode 100644
index 405aa24a..00000000
--- a/modules/nf-core/custom/dumpsoftwareversions/tests/tags.yml
+++ /dev/null
@@ -1,2 +0,0 @@
-custom/dumpsoftwareversions:
- - modules/nf-core/custom/dumpsoftwareversions/**
diff --git a/nextflow.config b/nextflow.config
index e2a4398c..9306ae99 100644
--- a/nextflow.config
+++ b/nextflow.config
@@ -385,12 +385,12 @@ dag {
manifest {
name = 'nf-core/mag'
- author = """Hadrien Gourlé, Daniel Straub, Sabrina Krakau"""
+ author = """Hadrien Gourlé, Daniel Straub, Sabrina Krakau, James A. Fellows Yates, Maxime Borry"""
homePage = 'https://github.com/nf-core/mag'
description = """Assembly, binning and annotation of metagenomes"""
mainScript = 'main.nf'
nextflowVersion = '!>=23.04.0'
- version = '3.0.2'
+ version = '3.0.3'
doi = '10.1093/nargab/lqac007'
}
diff --git a/nextflow_schema.json b/nextflow_schema.json
index b2847b0c..35e85825 100644
--- a/nextflow_schema.json
+++ b/nextflow_schema.json
@@ -605,12 +605,12 @@
"spades_options": {
"type": "string",
"description": "Additional custom options for SPAdes.",
- "help_text": "An example is adjusting k-mers (\"-k 21,33,55,77\") or adding [advanced options](https://github.com/ablab/spades#advanced-options). But not -t, -m, -o or --out-prefix, because these are already in use. Must be used like this: --spades_options=\"-k 21,33,55,77\")"
+ "help_text": "An example is adjusting k-mers (\"-k 21,33,55,77\") or adding [advanced options](https://github.com/ablab/spades#advanced-options). But not -t, -m, -o or --out-prefix, because these are already in use. Must be used like this: --spades_options \"-k 21,33,55,77\")"
},
"megahit_options": {
"type": "string",
"description": "Additional custom options for MEGAHIT.",
- "help_text": "An example is adjusting presets (e.g. \"--presets meta-large\"), k-mers (e.g. \"-k 21,33,55,77\") or adding other [advanced options](https://github.com/voutcn/megahit#advanced-usage). For example, increase the minimum k-mer in the event of an error message such as \"Too many vertices in the unitig graph, you may increase the kmer size to remove tons of erroneous kmers.\" in the MEGAHIT log file. But not --threads, --memory, -o or input read files, because these are already in use. Must be used like this: --megahit_options=\"--presets meta-large\""
+ "help_text": "An example is adjusting presets (e.g. \"--presets meta-large\"), k-mers (e.g. \"-k 21,33,55,77\") or adding other [advanced options](https://github.com/voutcn/megahit#advanced-usage). For example, increase the minimum k-mer in the event of an error message such as \"Too many vertices in the unitig graph, you may increase the kmer size to remove tons of erroneous kmers.\" in the MEGAHIT log file. But not --threads, --memory, -o or input read files, because these are already in use. Must be used like this: --megahit_options \"--presets meta-large\""
},
"skip_spades": {
"type": "boolean",
@@ -735,7 +735,7 @@
"bowtie2_mode": {
"type": "string",
"description": "Bowtie2 alignment mode",
- "help_text": "Bowtie2 alignment mode options, for example: `--very-fast` , `--very-sensitive-local -N 1` , ... Must be used like this: --bowtie2_mode=\"--very-sensitive\""
+ "help_text": "Bowtie2 alignment mode options, for example: `--very-fast` , `--very-sensitive-local -N 1` , ... Must be used like this: --bowtie2_mode \"--very-sensitive\""
},
"save_assembly_mapped_reads": {
"type": "boolean",
diff --git a/subworkflows/local/depths.nf b/subworkflows/local/depths.nf
index 87fc21cc..a2b69c95 100644
--- a/subworkflows/local/depths.nf
+++ b/subworkflows/local/depths.nf
@@ -10,6 +10,14 @@ def getColNo(filename) {
return lines[0].split('\t').size()
}
+/*
+ * Get number of rows in a file
+ */
+def getRowNo(filename) {
+ lines = file(filename).readLines()
+ return lines.size()
+}
+
workflow DEPTHS {
take:
bins_unbins //channel: val(meta), [ path(bins) ]
@@ -53,9 +61,10 @@ workflow DEPTHS {
.collectFile(name:'sample_groups.tsv'){ meta, reads -> meta.id + '\t' + meta.group + '\n' }
// Filter MAG depth files: use only those for plotting that contain depths for > 2 samples
+ // as well as > 2 bins
ch_mag_depths_plot = MAG_DEPTHS.out.depths
.map { meta, bin_depths_file ->
- if (getColNo(bin_depths_file) > 2) [ meta, bin_depths_file ]
+ if (getColNo(bin_depths_file) > 2 && getRowNo(bin_depths_file) > 2) [ meta, bin_depths_file ]
}
MAG_DEPTHS_PLOT ( ch_mag_depths_plot, ch_sample_groups.collect() )
diff --git a/subworkflows/local/gtdbtk.nf b/subworkflows/local/gtdbtk.nf
index 95e343c8..21ef25b2 100644
--- a/subworkflows/local/gtdbtk.nf
+++ b/subworkflows/local/gtdbtk.nf
@@ -74,8 +74,8 @@ workflow GTDBTK {
gtdb_dir = gtdb.listFiles()
ch_db_for_gtdbtk = Channel
.of(gtdb_dir)
- .map{['gtdb', it]}
- .groupTuple()
+ .collect()
+ .map { ["gtdb", it] }
} else {
error("Unsupported object given to --gtdb, database must be supplied as either a directory or a .tar.gz file!")
}
diff --git a/subworkflows/local/utils_nfcore_mag_pipeline/main.nf b/subworkflows/local/utils_nfcore_mag_pipeline/main.nf
index 592d7a09..29806112 100644
--- a/subworkflows/local/utils_nfcore_mag_pipeline/main.nf
+++ b/subworkflows/local/utils_nfcore_mag_pipeline/main.nf
@@ -150,7 +150,7 @@ workflow PIPELINE_INITIALISATION {
.toList()
.sort()
- ch_read_ids.cross(ch_assembly_ids)
+ ch_read_ids.concat(ch_assembly_ids).collect(flat: false) // need flat:false to ensure the two lists of IDs in the channels don't get smushed into a single list (and thus no ids1 and ids2 lists to compare)
.map { ids1, ids2 ->
if (ids1.sort() != ids2.sort()) {
exit 1, "[nf-core/mag] ERROR: supplied IDs or Groups in read and assembly CSV files do not match!"