From 60446a0ac294bf2b1ccaad77edbf9f9a59808e78 Mon Sep 17 00:00:00 2001
From: d4straub <daniel.straub@uni-tuebingen.de>
Date: Tue, 26 Feb 2019 13:46:54 +0100
Subject: [PATCH 001/105] update credits and add sequence import via a manifest
 file

---
 README.md |  4 +++-
 main.nf   | 42 +++++++++++++++++++++++++++++++++++++++++-
 2 files changed, 44 insertions(+), 2 deletions(-)

diff --git a/README.md b/README.md
index 7aa34134..f2f5cadc 100644
--- a/README.md
+++ b/README.md
@@ -29,4 +29,6 @@ The nf-core/mag pipeline comes with documentation about the pipeline, found in t
 
 ### Credits
 
-This pipeline was written by [Hadrien Gourlé](https://hadriengourle.com) at [SLU](https://slu.se).
+This pipeline was written by [Hadrien Gourlé](https://hadriengourle.com) at [SLU](https://slu.se) and Daniel Straub ([@d4straub](https://github.com/d4straub)).
+
+Long read processing was inspired by [caspargross/HybridAssembly](https://github.com/caspargross/HybridAssembly) written by Caspar Gross [@caspargross](https://github.com/caspargross)
diff --git a/main.nf b/main.nf
index cc3dc02a..ddfda074 100644
--- a/main.nf
+++ b/main.nf
@@ -9,6 +9,7 @@ nf-core/mag Analysis Pipeline. Started 2018-05-22.
 https://github.com/nf-core/mag
 #### Authors
 Hadrien Gourlé HadrienG <hadrien.gourle@slu.se> - hadriengourle.com>
+Daniel Straub <d4straub@gmail.com>
 ----------------------------------------------------------------------------------------
 */
 
@@ -29,6 +30,11 @@ def helpMessage() {
       -profile                      Configuration profile to use. Can use multiple (comma separated)
                                     Available: standard, conda, docker, singularity, awsbatch, test
 
+    Hybrid assembly:
+      --manifest                    Path to manifest file (must be surrounded with quotes)
+                                    Has 4 headerless columns (tab separated): Sample_Id, Long_Reads, Short_Reads_1, Short_Reads_2
+                                    Only one file path per entry allowed, join multiple longread files if possible
+
     Options:
       -name                         Name for the pipeline run. If not specified, Nextflow will automatically generate a random mnemonic.
       --singleEnd                   Specifies that the input is single end reads
@@ -122,7 +128,41 @@ params.ssu_evalue = 1e-6
 /*
  * Create a channel for input read files
  */
- if(params.readPaths){
+def returnFile(it) {
+// Return file if it exists
+    if (workflow.profile in ['test', 'localtest'] ) {
+        inputFile = file("$workflow.projectDir/" + it)
+    } else {
+        inputFile = file(it)
+    }
+    if (!file(inputFile).exists()) exit 1, "Missing file in TSV file: ${inputFile}, see --help for more information"
+    return inputFile
+}
+
+if(params.manifest){
+    manifestFile = file(params.manifest)
+    // extracts read files from TSV and distribute into channels
+    Channel
+        .from(manifestFile)
+        .ifEmpty {exit 1, log.info "Cannot find path file ${tsvFile}"}
+        .splitCsv(sep:'\t')
+        .map { row ->
+            def id = row[0]
+            def lr = returnFile( row[1] )
+            def sr1 = returnFile( row[2] )
+            def sr2 = returnFile( row[3] )
+            [ id, lr, sr1, sr2 ]
+            }
+        .into { files_print; files_sr; files_preprocessing }
+    // report samples
+    files_print
+        .subscribe { log.info "\n$it\n" }
+    // prepare input for fastqc
+    files_sr
+        .map { id, lr, sr1, sr2 -> [ id, [ sr1, sr2 ] ] }
+        .into { read_files_fastqc; read_files_fastp }
+    
+} else if(params.readPaths){
      if(params.singleEnd){
          Channel
              .from(params.readPaths)

From 4b2f84a9a9e9d0dd47e4cdb85d0491649b1b0110 Mon Sep 17 00:00:00 2001
From: d4straub <daniel.straub@uni-tuebingen.de>
Date: Wed, 27 Feb 2019 13:26:55 +0100
Subject: [PATCH 002/105] integrate nanopore preprocessing and hybrid assembly

---
 environment.yml |   6 ++
 main.nf         | 155 +++++++++++++++++++++++++++++++++++++++++++++---
 2 files changed, 153 insertions(+), 8 deletions(-)

diff --git a/environment.yml b/environment.yml
index 1aa4a9f3..9ea0e6aa 100644
--- a/environment.yml
+++ b/environment.yml
@@ -23,3 +23,9 @@ dependencies:
   - conda-forge::r-markdown=0.8
   - checkm-genome=1.0.12
   - refinem=0.0.24
+  #long read specific:
+  #- nanoplot=1.20.0 #python >=3.5 -->incompatible! alternative is pauvre
+  - pauvre=0.1.86=py_1
+  - filtlong=0.2.0=he941832_2
+  #- porechop=0.2.3_seqan2.1.1 #python >=3.5,<3.6.0a0 -->incompatible! alternative is fastp
+  - spades=3.13.0=0
diff --git a/main.nf b/main.nf
index ddfda074..041c06b3 100644
--- a/main.nf
+++ b/main.nf
@@ -153,10 +153,7 @@ if(params.manifest){
             def sr2 = returnFile( row[3] )
             [ id, lr, sr1, sr2 ]
             }
-        .into { files_print; files_sr; files_preprocessing }
-    // report samples
-    files_print
-        .subscribe { log.info "\n$it\n" }
+        .into { files_sr; files_all_raw }
     // prepare input for fastqc
     files_sr
         .map { id, lr, sr1, sr2 -> [ id, [ sr1, sr2 ] ] }
@@ -270,6 +267,90 @@ process get_software_versions {
 }
 
 
+/*
+ * Trim adapter sequences on long read nanopore files, dont use quality filtering just yet
+ * TODO: Common adapter sequences: https://github.com/rrwick/Porechop/blob/master/porechop/adapters.py
+ * TODO: Currently only produces quality plots, absolutely suboptimal!
+ * TODO: NanoPlot (python 3.6+) or pauvre (process nanoqc, doesnt work atm) would be better
+ */
+process fastp_lr { 
+    tag "$id"
+    publishDir "${params.outdir}/${id}/LongReadQC/", mode: 'copy'
+
+    input:
+    set id, lr, sr1, sr2 from files_all_raw
+    
+    output:
+    set id, file('lr_trimmed.fastq.gz'), sr1, sr2 into files_lr_trimmed
+    //set id, lr, val("raw") into files_nanoplot_raw
+    file("fastp.html")
+    //file("fastp.json")
+    
+    script:
+    """
+    fastp \
+        --disable_length_filtering \
+        --disable_quality_filtering \
+        -w "${task.cpus}" \
+        -i ${lr} \
+        -o lr_trimmed.fastq.gz
+    """
+}
+
+
+/*
+ * Quality filter long reads focus on length instead of quality to improve assembly size
+ * TODO: Should illumina reads be trimmed/processed before using them here?
+ */
+process filtlong {
+    tag "$id"
+
+    input: 
+    //set id, lr, sr1, sr2 from files_porechop //original
+    set id, lr, sr1, sr2 from files_lr_trimmed
+    
+    output:
+    set id, file("${id}_lr_filtlong.fastq.gz") into files_lr_filtered 
+    //set id, file("${id}_lr_filtlong.fastq.gz"), val('filtered') into files_nanoplot_filtered    
+
+    script:
+    """
+    filtlong \
+        -1 ${sr1} \
+        -2 ${sr2} \
+        --min_length 1000 \
+        --keep_percent 90 \
+        --trim \
+        --split 1000 \
+        --length_weight 10 \
+        ${lr} | gzip > ${id}_lr_filtlong.fastq.gz
+    """
+}
+
+
+/*
+ * Quality check for nanopore reads and Quality/Length Plots
+ * TODO: make it work!
+
+process nanoqc {
+    tag "$id"
+    publishDir "${params.outdir}/${id}/LongReadQC_${type}/", mode: 'copy'
+    
+    input:
+    //set id, lr, type from files_nanoplot_raw.mix(files_nanoplot_filtered) //original
+    set id, lr, type from files_nanoplot_filtered
+
+    output:
+    file '*'
+    
+    script:
+    """
+    pauvre marginplot --fastq ${lr}
+    """
+}
+ */
+
+
 /*
  * STEP 1 - Read trimming and pre/post qc
  */
@@ -302,7 +383,7 @@ process fastp {
     val trim_qual from params.trimming_quality
 
     output:
-    set val(name), file("${name}_trimmed*.fastq.gz") into (trimmed_reads_megahit, trimmed_reads_metabat, trimmed_reads_fastqc)
+    set val(name), file("${name}_trimmed*.fastq.gz") into (trimmed_reads_megahit, trimmed_reads_metabat, trimmed_reads_fastqc, trimmed_sr_spades)
 
     script:
     if ( !params.singleEnd ) {
@@ -353,7 +434,8 @@ process megahit {
     set val(name), file(reads) from trimmed_reads_megahit
 
     output:
-    set val(name), file("megahit/${name}.contigs.fa") into (assembly_quast, assembly_metabat, assembly_refinem)
+    set val(name), file("megahit/${name}.contigs.fa") into (assembly_megahit_to_quast, assembly_megahit_to_refinem)
+    set val(name), file("megahit/${name}.contigs.fa"), file(reads) into assembly_megahit_to_metabat
 
     script:
     if ( !params.singleEnd ) {
@@ -370,6 +452,64 @@ process megahit {
 }
 
 
+/*
+ * Spades hybrid Assembly running normal configuration
+ * TODO: use assembly_graph_spades
+ */
+
+ files_lr_filtered
+    .combine(trimmed_sr_spades, by: 0)
+    .set { files_pre_spades }
+
+process spades {
+    tag "$id"
+    publishDir "${params.outdir}/${id}/assembly_spades", mode: 'copy', pattern: "${id}*"
+
+    input:
+    set id, file(lr), file(sr) from files_pre_spades  
+
+    output:
+    //set id, val('spades'), file("${id}_graph_spades.gfa") into assembly_graph_spades
+    set val("spades-$id"), file("${id}_scaffolds_spades.fasta") into (assembly_spades_to_quast, assembly_spades_to_refinem)
+    set val("spades-$id"), file("${id}_scaffolds_spades.fasta"), file(sr) into assembly_spades_to_metabat
+    file("${id}_contigs_spades.fasta")
+    file("${id}_spades.log")
+
+    when:
+    params.manifest
+    !params.singleEnd
+     
+    script:
+    def maxmem = "${task.memory.toString().replaceAll(/[\sGB]/,'')}"
+    """
+    metaspades.py \
+        --threads "${task.cpus}" \
+        --memory "$maxmem" \
+        --pe1-1 ${sr[0]} \
+        --pe1-2 ${sr[1]} \
+        --nanopore ${lr} \
+        -o spades
+    cp spades/assembly_graph_with_scaffolds.gfa ${id}_graph_spades.gfa
+    cp spades/scaffolds.fasta ${id}_scaffolds_spades.fasta
+    cp spades/contigs.fasta ${id}_contigs_spades.fasta
+    cp spades/spades.log ${id}_spades.log
+    """
+}
+
+/*
+ * Accumulate all assemblies for further steps
+ */
+assembly_spades_to_quast
+    .mix ( assembly_megahit_to_quast )
+    .set { assembly_quast }
+assembly_spades_to_metabat
+    .mix ( assembly_megahit_to_metabat )
+    .set { assembly_metabat }
+assembly_spades_to_refinem
+    .mix ( assembly_megahit_to_refinem )
+    .set { assembly_refinem }
+
+
 process quast {
     tag "$name"
     // publishDir "${params.outdir}/quast/$name", mode: 'copy'
@@ -395,8 +535,7 @@ process metabat {
     publishDir "${params.outdir}/metabat", mode: 'copy'
 
     input:
-    set val(_name), file(assembly) from assembly_metabat
-    set val(name), file(reads) from trimmed_reads_metabat
+    set val(name), file(assembly), file(reads) from assembly_metabat
     val(min_size) from params.min_contig_size
 
     output:

From 625d000418e1055704f578113a35581e33c4eb2e Mon Sep 17 00:00:00 2001
From: d4straub <daniel.straub@uni-tuebingen.de>
Date: Thu, 28 Feb 2019 13:54:26 +0100
Subject: [PATCH 003/105] change to python 3.6.7, use most popular nanopore
 preprocessing tools, replace python2 checkM with BUSCO, loose python2 RefineM

---
 conf/base.config |   5 +-
 environment.yml  |  26 ++--
 main.nf          | 315 +++++++++++++++++------------------------------
 3 files changed, 125 insertions(+), 221 deletions(-)

diff --git a/conf/base.config b/conf/base.config
index 78297e52..9e7b647d 100644
--- a/conf/base.config
+++ b/conf/base.config
@@ -31,10 +31,7 @@ process {
   withName: multiqc {
     errorStrategy = { task.exitStatus in [143,137] ? 'retry' : 'ignore' }
   }
-  withName: checkm_download_db {
-    time = 4.h
-  }
-  withName: refinem_download_db {
+  withName: busco_download_db {
     time = 4.h
   }
 }
diff --git a/environment.yml b/environment.yml
index 9ea0e6aa..02b83b1f 100644
--- a/environment.yml
+++ b/environment.yml
@@ -5,27 +5,27 @@ channels:
   - defaults
 dependencies:
   - fastqc=0.11.8
-  - multiqc=1.6
+  - multiqc=1.6=py36h24bf2e0_0
   - fastp=0.19.5
-  - megahit=1.1.3
+  - megahit=1.1.3=py36_0
   - metabat2=2.12.1
   - samtools=1.9
-  - bowtie2=2.3.4.3
-  - quast=5.0.2
+  - bowtie2=2.3.4.3=py36he860b03_1
+  - quast=5.0.2=py36pl526ha92aebf_0
   - hmmer=3.2.1
   - prodigal=2.6.3
   - pplacer=1.1.alpha19
-  - diamond=0.9.22
-  - python=2.7.15
+  - diamond=0.9.24=ha87ae23_0
+  - python=3.6.7=hd21baee_1002
   - r=3.5.1
-  - biopython=1.72
+  - biopython=1.72=py36h04863e7_0
   - krona=2.7
   - conda-forge::r-markdown=0.8
-  - checkm-genome=1.0.12
-  - refinem=0.0.24
-  #long read specific:
-  #- nanoplot=1.20.0 #python >=3.5 -->incompatible! alternative is pauvre
-  - pauvre=0.1.86=py_1
+  - r-ggplot2=3.1.0
+  - busco=3.0.2=py36_10
+  - nanoplot=1.20.0=py36_0
   - filtlong=0.2.0=he941832_2
-  #- porechop=0.2.3_seqan2.1.1 #python >=3.5,<3.6.0a0 -->incompatible! alternative is fastp
+  - porechop=0.2.3_seqan2.1.1=py36h2d50403_3
+  - nanolyse=1.1.0=py36_1
   - spades=3.13.0=0
+  - bandage=0.8.1=hb59a952_0
diff --git a/main.nf b/main.nf
index 041c06b3..96893e91 100644
--- a/main.nf
+++ b/main.nf
@@ -80,6 +80,7 @@ params.name = false
 params.multiqc_config = "$baseDir/conf/multiqc_config.yaml"
 params.email = false
 params.plaintext_email = false
+params.manifest = false
 
 ch_multiqc_config = Channel.fromPath(params.multiqc_config)
 ch_output_docs = Channel.fromPath("$baseDir/docs/output.md")
@@ -115,6 +116,7 @@ params.trimming_quality = 15
  */
 params.refinem = false
 params.no_checkm = false
+params.no_busco = false
 params.min_contig_size = 1500
 params.delta_cont = 5
 params.merged_cont = 15
@@ -166,18 +168,21 @@ if(params.manifest){
              .map { row -> [ row[0], [file(row[1][0])]] }
              .ifEmpty { exit 1, "params.readPaths was empty - no input files supplied" }
              .into { read_files_fastqc; read_files_fastp }
+        files_all_raw = Channel.from()
      } else {
          Channel
              .from(params.readPaths)
              .map { row -> [ row[0], [file(row[1][0]), file(row[1][1])]] }
              .ifEmpty { exit 1, "params.readPaths was empty - no input files supplied" }
              .into { read_files_fastqc; read_files_fastp }
+        files_all_raw = Channel.from()
      }
  } else {
      Channel
          .fromFilePairs( params.reads, size: params.singleEnd ? 1 : 2 )
          .ifEmpty { exit 1, "Cannot find any reads matching: ${params.reads}\nNB: Path needs to be enclosed in quotes!\nNB: Path requires at least one * wildcard!\nIf this is single-end data, please specify --singleEnd on the command line." }
          .into { read_files_fastqc; read_files_fastp }
+    files_all_raw = Channel.from()
  }
 
 
@@ -239,6 +244,7 @@ ${summary.collect { k,v -> "            <dt>$k</dt><dd><samp>${v ?: '<span style
 
 /*
  * Parse software version numbers
+ * TODO: all new programs!
  */
 process get_software_versions {
     validExitStatus 0
@@ -257,43 +263,55 @@ process get_software_versions {
     metabat2 -h 2> v_metabat.txt || true
 
     # fake checkm data setRoot so we can read checkm version number
-    chd=\$(readlink -f checkm_data)
-    printf "\$chd\\n\$chd\\n" | checkm data setRoot
+    #chd=\$(readlink -f checkm_data)
+    #printf "\$chd\\n\$chd\\n" | checkm data setRoot
 
-    checkm -h > v_checkm.txt
-    refinem -h > v_refinem.txt
+    #checkm -h > v_checkm.txt
+    echo unknown > v_checkm.txt
+    #refinem -h > v_refinem.txt
+    echo unknown > v_refinem.txt
     scrape_software_versions.py > software_versions_mqc.yaml
     """
 }
 
+/*
+ * Remove reads mapping to the lambda genome.
+ * TODO: missing reference, use: ftp://igenome:G3nom3s4u@ussd-ftp.illumina.com/Enterobacteriophage_lambda/NCBI/1993-04-28/Enterobacteriophage_lambda_NCBI_1993-04-28.tar.gz
+
+process nanolyse { 
+    tag "$id"
+        
+    input:
+    set id, lr, sr1, sr2 from files_all_raw
+
+    output:
+    set id, file("${id}_nanolyse.fastq.gz"), sr1, sr2 into files_nanolyse
+    set id, lr, val("raw") into files_nanoplot_raw
+
+    script:
+    """
+    zcat ${lr} | NanoLyse | gzip > ${id}_nanolyse.fastq.gz
+    """
+}
+ */
 
 /*
- * Trim adapter sequences on long read nanopore files, dont use quality filtering just yet
- * TODO: Common adapter sequences: https://github.com/rrwick/Porechop/blob/master/porechop/adapters.py
- * TODO: Currently only produces quality plots, absolutely suboptimal!
- * TODO: NanoPlot (python 3.6+) or pauvre (process nanoqc, doesnt work atm) would be better
+ * Trim adapter sequences on long read nanopore files
  */
-process fastp_lr { 
+process porechop { 
     tag "$id"
-    publishDir "${params.outdir}/${id}/LongReadQC/", mode: 'copy'
-
+        
     input:
+    //set id, lr, sr1, sr2 from files_nanolyse
     set id, lr, sr1, sr2 from files_all_raw
     
     output:
-    set id, file('lr_trimmed.fastq.gz'), sr1, sr2 into files_lr_trimmed
-    //set id, lr, val("raw") into files_nanoplot_raw
-    file("fastp.html")
-    //file("fastp.json")
+    set id, file("${id}_porechop.fastq"), sr1, sr2 into files_porechop
+    set id, lr, val("raw") into files_nanoplot_raw
     
     script:
     """
-    fastp \
-        --disable_length_filtering \
-        --disable_quality_filtering \
-        -w "${task.cpus}" \
-        -i ${lr} \
-        -o lr_trimmed.fastq.gz
+    porechop -i ${lr} -t "${task.cpus}" -o ${id}_porechop.fastq
     """
 }
 
@@ -306,12 +324,11 @@ process filtlong {
     tag "$id"
 
     input: 
-    //set id, lr, sr1, sr2 from files_porechop //original
-    set id, lr, sr1, sr2 from files_lr_trimmed
+    set id, lr, sr1, sr2 from files_porechop
     
     output:
     set id, file("${id}_lr_filtlong.fastq.gz") into files_lr_filtered 
-    //set id, file("${id}_lr_filtlong.fastq.gz"), val('filtered') into files_nanoplot_filtered    
+    set id, file("${id}_lr_filtlong.fastq.gz"), val('filtered') into files_nanoplot_filtered    
 
     script:
     """
@@ -330,25 +347,24 @@ process filtlong {
 
 /*
  * Quality check for nanopore reads and Quality/Length Plots
- * TODO: make it work!
-
-process nanoqc {
-    tag "$id"
-    publishDir "${params.outdir}/${id}/LongReadQC_${type}/", mode: 'copy'
+ */
+process nanoplot {
+    tag{id}
+    publishDir "${params.outdir}/${id}/qc/longread_${type}/", mode: 'copy'
     
     input:
-    //set id, lr, type from files_nanoplot_raw.mix(files_nanoplot_filtered) //original
-    set id, lr, type from files_nanoplot_filtered
+    set id, lr, type from files_nanoplot_raw.mix(files_nanoplot_filtered)
 
     output:
-    file '*'
+    file '*.png'
+    file '*.html'
+    file '*.txt'
     
     script:
     """
-    pauvre marginplot --fastq ${lr}
+    NanoPlot -t "${task.cpus}" -p ${type}_  --title ${id}_${type} -c darkblue --fastq ${lr}
     """
 }
- */
 
 
 /*
@@ -453,7 +469,7 @@ process megahit {
 
 
 /*
- * Spades hybrid Assembly running normal configuration
+ * metaSpades hybrid Assembly
  * TODO: use assembly_graph_spades
  */
 
@@ -505,9 +521,6 @@ assembly_spades_to_quast
 assembly_spades_to_metabat
     .mix ( assembly_megahit_to_metabat )
     .set { assembly_metabat }
-assembly_spades_to_refinem
-    .mix ( assembly_megahit_to_refinem )
-    .set { assembly_refinem }
 
 
 process quast {
@@ -539,201 +552,94 @@ process metabat {
     val(min_size) from params.min_contig_size
 
     output:
-    set val(name), file("bins/") into metabat_bins
+    set val(name), file("bins/*") into metabat_bins mode flatten
     set val(name), file("${name}.bam") into (mapped_reads_checkm, mapped_reads_refinem)
 
     script:
     if ( !params.singleEnd ) {
-    """
-    bowtie2-build --threads "${task.cpus}" "${assembly}" ref
-    bowtie2 -p "${task.cpus}" -x ref -1 "${reads[0]}" -2 "${reads[1]}" | \
-        samtools view -@ "${task.cpus}" -bS | \
-        samtools sort -@ "${task.cpus}" -o "${name}.bam"
-    samtools index "${name}.bam"
-    jgi_summarize_bam_contig_depths --outputDepth depth.txt "${name}.bam"
-    metabat2 -t "${task.cpus}" -i "${assembly}" -a depth.txt -o "bins/${name}" -m ${min_size}
-    """
-    }
-    else {
-    """
-    bowtie2-build --threads "${task.cpus}" "${assembly}" ref
-    bowtie2 -p "${task.cpus}" -x ref -U ${reads} | \
-        samtools view -@ "${task.cpus}" -bS | \
-        samtools sort -@ "${task.cpus}" -o "${name}.bam"
-    samtools index "${name}.bam"
-    jgi_summarize_bam_contig_depths --outputDepth depth.txt "${name}.bam"
-    metabat2 -t "${task.cpus}" -i "${assembly}" -a depth.txt -o bins/"${name}" -m ${min_size}
-    """
+        """
+        bowtie2-build --threads "${task.cpus}" "${assembly}" ref
+        bowtie2 -p "${task.cpus}" -x ref -1 "${reads[0]}" -2 "${reads[1]}" | \
+            samtools view -@ "${task.cpus}" -bS | \
+            samtools sort -@ "${task.cpus}" -o "${name}.bam"
+        samtools index "${name}.bam"
+        jgi_summarize_bam_contig_depths --outputDepth depth.txt "${name}.bam"
+        metabat2 -t "${task.cpus}" -i "${assembly}" -a depth.txt -o "bins/${name}" -m ${min_size}
+
+        #if bin foolder is empty
+        if [ -z \"\$(ls -A bins)\" ]; then 
+            cp ${assembly} bins/
+        fi
+        """
+    } else {
+        """
+        bowtie2-build --threads "${task.cpus}" "${assembly}" ref
+        bowtie2 -p "${task.cpus}" -x ref -U ${reads} | \
+            samtools view -@ "${task.cpus}" -bS | \
+            samtools sort -@ "${task.cpus}" -o "${name}.bam"
+        samtools index "${name}.bam"
+        jgi_summarize_bam_contig_depths --outputDepth depth.txt "${name}.bam"
+        metabat2 -t "${task.cpus}" -i "${assembly}" -a depth.txt -o bins/"${name}" -m ${min_size}
+        """
     }
 
 }
 
 
-process checkm_download_db {
+process busco_download_db {
     output:
-        file("checkm_data") into checkm_db
+    set val("bacteria_odb9"), file("bacteria_odb9*") into busco_db
 
     when:
-        params.no_checkm == false
+    !params.no_busco
 
     script:
     """
-    mkdir -p checkm_data && \
-    cd checkm_data && \
-    curl -L -O https://data.ace.uq.edu.au/public/CheckM_databases/checkm_data_2015_01_16.tar.gz && \
-    tar xzf checkm_data_2015_01_16.tar.gz && \
-    cd .. && \
-    printf "checkm_data\ncheckm_data\n" | checkm data setRoot
+    wget https://busco.ezlab.org/datasets/bacteria_odb9.tar.gz
     """
 }
 
-process checkm {
-    tag "$name"
-    publishDir "${params.outdir}/checkm", mode: 'copy'
-
-    input:
-    set val(name), file(bins) from metabat_bins
-    set val(_name), file(bam) from mapped_reads_checkm
-    val(delta_cont) from params.delta_cont
-    val(merged_cont) from params.merged_cont
-    val(delta_compl) from params.delta_compl
-    val(abs_delta_cov) from params.abs_delta_cov
-    val(delta_gc) from params.delta_gc
-    file("checkm_data") from checkm_db
-
-    output:
-    file("${name}_stats/lineage") into checkm_merge_results
-    file("${name}_stats/plots") into checkm_merge_plots
-    file("${name}_stats/qa.txt") into checkm_merge_qa
-    set val(name), file("${name}/") into checkm_merge_bins
+metabat_bins
+    .combine(busco_db)
+    .set{ metabat_db_busco }
 
-    when:
-        params.no_checkm == false
-
-    script:
-    """
-    # re-run setRoot in case checkm has forgotten where the databases are located
-    chd=\$(readlink -f checkm_data)
-    printf "\$chd\\n\$chd\\n" | checkm data setRoot
-
-    mkdir -p stats
-    checkm lineage_wf -t "${task.cpus}" -x fa "${bins}" stats/lineage > stats/qa.txt
-    checkm bin_qa_plot -x fa stats/lineage "${bins}" stats/plots
-
-    samtools index "${bam}"
-    checkm coverage -t "${task.cpus}" -x fa "${bins}" stats/coverage.txt "${bam}"
-    checkm profile stats/coverage.txt > stats/profile.txt
-    checkm tree_qa stats/lineage > stats/tree_qa.txt
-
-    samtools view "${bam}" | awk '{if (NR <=1000) print length(\$10)}' >  stats/read_length.txt
-
-    checkm taxon_set domain Bacteria bacteria.ms
-    checkm merge -t "${task.cpus}" -x fa --delta_cont "${delta_cont}" --merged_cont "${merged_cont}" \
-        bacteria.ms "${bins}" stats/merger/
-
-    mkdir -p ${name}
-    mkdir -p ${name}_stats
-    merge_bins.py --delta_cont "${delta_cont}" --merged_cont "${merged_cont}" \
-        --delta_compl "${delta_compl}" --abs_delta_cov "${abs_delta_cov}" --delta_gc "${delta_gc}" \
-        --profile stats/profile.txt --tree stats/tree_qa.txt \
-        --length stats/read_length.txt \
-        --merger stats/merger/merger.tsv "${bins}" "${name}"
-    checkm lineage_wf -t "${task.cpus}" -x fa "${name}" ${name}_stats/lineage > ${name}_stats/qa.txt
-    checkm bin_qa_plot -x fa ${name}_stats/lineage "${name}" ${name}_stats/plots
-    """
-}
-
-
-process refinem_download_db {
-    publishDir "${params.outdir}/db"
-    output:
-        file("refinem_databases/") into refinem_db
-
-    when:
-        params.refinem == true && params.refinem_db ==false && params.no_checkm == false
-
-    script:
-    """
-    wget https://storage.googleapis.com/mag_refinem_db/refinem_databases.tar.gz
-    tar xzf refinem_databases.tar.gz
-    """
-}
-
-if (params.refinem_db) {
-    refinem_db = file(params.refinem_db)
-}
-mapped_reads_refinem.into {mapped_reads_refinem_1; mapped_reads_refinem_2}
-assembly_refinem.into {assembly_refinem_1; assembly_refinem_2}
-
-process refinem_pass_1 {
-    tag "${name}"
+/*
+ * BUSCO: Quantitative measures for the assessment of genome assembly
+ */
+process busco {
+    tag "${name}-${assembly}"
+    publishDir "${params.outdir}/busco", mode: 'copy'
 
     input:
-    set val(name), file(bins) from checkm_merge_bins
-    set val(_name), file(bam) from mapped_reads_refinem_1
-    set val(__name), file(assembly) from assembly_refinem_1
-    file(refinem_db) from refinem_db
+    set val(name), file(assembly), val(db_name), file(db) from metabat_db_busco
 
     output:
-    set val(name), file("bins_pass_1") into refinem_bins_pass_1
-
-    when:
-        params.refinem == true && params.no_checkm == false
+    file("short_summary_${name}-${assembly}.txt") into busco_summary
+    file("${name}-${assembly}_busco_figure.png")
+    file("${name}-${assembly}_busco_figure.R")
+    file("${name}-${assembly}_busco_log.txt")
 
     script:
-    """
-    # filter by GC / tetra
-    samtools index "${bam}"
-    refinem scaffold_stats -x fa -c "${task.cpus}" "${assembly}" "${bins}" refinem "${bam}"
-    refinem outliers --no_plots refinem/scaffold_stats.tsv refinem
-    refinem filter_bins -x fa "${bins}" refinem/outliers.tsv new_bins_tmp_1
-    # filter by taxonomic assignment
-    refinem call_genes -x fa -c "${task.cpus}" new_bins_tmp_1 gene_calls
-    refinem taxon_profile -c "${task.cpus}" gene_calls refinem/scaffold_stats.tsv \
-        "${refinem_db}/gtdb_r86.dmnd" "${refinem_db}/gtdb_r86_taxonomy.2018-09-25.tsv" \
-        taxon_profile
-    refinem taxon_filter -c "${task.cpus}" taxon_profile taxon_filter.tsv
-    refinem filter_bins -x fa new_bins_tmp_1 taxon_filter.tsv bins_pass_1
-    rename 's/.filtered.filtered.fa/.fa/' bins_pass_1/*.filtered.filtered.fa
+    def binName = "${name}-${assembly}"
+    """
+    tar -xf bacteria_odb9.tar.gz
+    run_BUSCO.py \
+        --in ${assembly} \
+        --lineage_path $db_name \
+        --cpu "${task.cpus}" \
+        --blast_single_core \
+        --mode genome \
+        --out ${binName} \
+        >${binName}_busco_log.txt
+    generate_plot.py \
+        --working_directory run_${binName}
+    cp run_${binName}/busco_figure.png ${binName}_busco_figure.png
+    cp run_${binName}/busco_figure.R ${binName}_busco_figure.R
+    cp run_${binName}/short_summary_${binName}.txt short_summary_${binName}.txt
     """
 }
 
-process refinem_pass_2 {
-    tag "${name}"
-    publishDir "${params.outdir}/", mode: 'copy'
-
-    input:
-    set val(name), file(bins) from refinem_bins_pass_1
-    set val(_name), file(bam) from mapped_reads_refinem_2
-    set val(__name), file(assembly) from assembly_refinem_2
-    file(refinem_db) from refinem_db
-    val(ssu_evalue) from params.ssu_evalue
 
-    output:
-    set val(name), file("refinem") into refinem_bins
-
-    when:
-        params.refinem == true && params.no_checkm == false
-
-    script:
-    """
-    samtools index "${bam}"
-    # filter incongruent 16S
-    # first we need to re-run taxon profile on the new bin dir
-    refinem scaffold_stats -x fa -c "${task.cpus}" "${assembly}" "${bins}" stats "${bam}"
-    refinem call_genes -x fa -c "${task.cpus}" "${bins}" gene_calls
-    refinem taxon_profile -c "${task.cpus}" gene_calls stats/scaffold_stats.tsv \
-        "${refinem_db}/gtdb_r86.dmnd" "${refinem_db}/gtdb_r86_taxonomy.2018-09-25.tsv" \
-        taxon_profile
-    # then we can identify incongruent ssu
-    refinem ssu_erroneous -x fa -c "${task.cpus}" "${bins}" taxon_profile \
-        "${refinem_db}/gtdb_r80_ssu" "${refinem_db}/gtdb_r86_taxonomy.2018-09-25.tsv" ssu
-    # TODO inspect top-hit and give e-value threshold to filter a contig
-    filter_ssu.py --evalue ${ssu_evalue} ssu/ssu_erroneous.tsv ssu/ssu_filtered.tsv 
-    refinem filter_bins -x fa "${bins}" ssu/ssu_filtered.tsv refinem
-    rename 's/.filtered.fa/.fa/' refinem/*.filtered.fa
-    """
-}
 
 // TODO next releases:
 // good bins channels (from checkm or refinem) + annotation
@@ -751,6 +657,7 @@ process multiqc {
     file (fastqc_raw:'fastqc/*') from fastqc_results.collect().ifEmpty([])
     file (fastqc_trimmed:'fastqc/*') from fastqc_results_trimmed.collect().ifEmpty([])
     file ('quast*/*') from quast_results.collect()
+    file ('short_summary_*.txt') from busco_summary.collect()
 
     output:
     file "*multiqc_report.html" into multiqc_report

From fb0828f179b33cbad33491d015190eccf44e29ec Mon Sep 17 00:00:00 2001
From: d4straub <daniel.straub@uni-tuebingen.de>
Date: Thu, 28 Feb 2019 14:42:08 +0100
Subject: [PATCH 004/105] improve busco database handling

---
 main.nf | 30 +++++++++++++++++++++++-------
 1 file changed, 23 insertions(+), 7 deletions(-)

diff --git a/main.nf b/main.nf
index 96893e91..832cf1c7 100644
--- a/main.nf
+++ b/main.nf
@@ -59,6 +59,11 @@ def helpMessage() {
       --delta_gc                    Maximum %GC difference to merge compatible bins (default: 3)
       --ssu_evalue                  Evalue threshold to filter incongruent 16S (default 1e-6)
 
+    Bin quality check:
+      --no_busco                    Disable bin QC
+      --busco_reference             Download path for BUSCO database, available databases are listed here: https://busco.ezlab.org/
+                                    (default: https://busco.ezlab.org/datasets/bacteria_odb9.tar.gz)
+
     AWSBatch options:
       --awsqueue                    The AWSBatch JobQueue that needs to be set when running on AWSBatch
       --awsregion                   The AWS Region for your AWS Batch job to run on
@@ -81,6 +86,7 @@ params.multiqc_config = "$baseDir/conf/multiqc_config.yaml"
 params.email = false
 params.plaintext_email = false
 params.manifest = false
+params.busco_reference = "https://busco.ezlab.org/datasets/bacteria_odb9.tar.gz"
 
 ch_multiqc_config = Channel.fromPath(params.multiqc_config)
 ch_output_docs = Channel.fromPath("$baseDir/docs/output.md")
@@ -130,6 +136,14 @@ params.ssu_evalue = 1e-6
 /*
  * Create a channel for input read files
  */
+if(!params.no_busco){
+    Channel
+        .fromPath( "${params.busco_reference}", checkIfExists: true )
+        .set { file_busco_db }
+} else {
+    Channel.from()
+}
+
 def returnFile(it) {
 // Return file if it exists
     if (workflow.profile in ['test', 'localtest'] ) {
@@ -587,21 +601,24 @@ process metabat {
 
 
 process busco_download_db {
-    output:
-    set val("bacteria_odb9"), file("bacteria_odb9*") into busco_db
+    tag "${database.baseName}"
 
-    when:
-    !params.no_busco
+    input:
+    file(database) from file_busco_db
+
+    output:
+    set val("${database.toString().replace(".tar.gz", "")}"), file("buscodb/*") into busco_db
 
     script:
     """
-    wget https://busco.ezlab.org/datasets/bacteria_odb9.tar.gz
+    mkdir buscodb
+    tar -xf ${database} -C buscodb
     """
 }
 
 metabat_bins
     .combine(busco_db)
-    .set{ metabat_db_busco }
+    .set { metabat_db_busco }
 
 /*
  * BUSCO: Quantitative measures for the assessment of genome assembly
@@ -622,7 +639,6 @@ process busco {
     script:
     def binName = "${name}-${assembly}"
     """
-    tar -xf bacteria_odb9.tar.gz
     run_BUSCO.py \
         --in ${assembly} \
         --lineage_path $db_name \

From 00f1b81756ae5b37678005fb83f0d6281e0c6951 Mon Sep 17 00:00:00 2001
From: d4straub <daniel.straub@uni-tuebingen.de>
Date: Mon, 4 Mar 2019 15:07:40 +0100
Subject: [PATCH 005/105] add nanolyse and prevent fastq.gz publishing

---
 main.nf | 84 ++++++++++++++++++++++++++++++++++++++-------------------
 1 file changed, 56 insertions(+), 28 deletions(-)

diff --git a/main.nf b/main.nf
index 832cf1c7..1b9de512 100644
--- a/main.nf
+++ b/main.nf
@@ -60,10 +60,13 @@ def helpMessage() {
       --ssu_evalue                  Evalue threshold to filter incongruent 16S (default 1e-6)
 
     Bin quality check:
-      --no_busco                    Disable bin QC
+      --no_busco                    Disable bin QC with BUSCO (default: false)
       --busco_reference             Download path for BUSCO database, available databases are listed here: https://busco.ezlab.org/
                                     (default: https://busco.ezlab.org/datasets/bacteria_odb9.tar.gz)
 
+    Long read preprocessing:
+      --no_nanolyse                 Skip removing reads similar to the internal standard lambda genome.
+
     AWSBatch options:
       --awsqueue                    The AWSBatch JobQueue that needs to be set when running on AWSBatch
       --awsregion                   The AWS Region for your AWS Batch job to run on
@@ -74,7 +77,7 @@ def helpMessage() {
  * SET UP CONFIGURATION VARIABLES
  */
 
-// Show help emssage
+// Show help message
 if (params.help){
     helpMessage()
     exit 0
@@ -133,6 +136,11 @@ params.delta_gc = 3
 params.refinem_db = false
 params.ssu_evalue = 1e-6
 
+/*
+ * long read preprocessing options
+ */
+params.no_nanolyse = false
+
 /*
  * Create a channel for input read files
  */
@@ -288,26 +296,6 @@ process get_software_versions {
     """
 }
 
-/*
- * Remove reads mapping to the lambda genome.
- * TODO: missing reference, use: ftp://igenome:G3nom3s4u@ussd-ftp.illumina.com/Enterobacteriophage_lambda/NCBI/1993-04-28/Enterobacteriophage_lambda_NCBI_1993-04-28.tar.gz
-
-process nanolyse { 
-    tag "$id"
-        
-    input:
-    set id, lr, sr1, sr2 from files_all_raw
-
-    output:
-    set id, file("${id}_nanolyse.fastq.gz"), sr1, sr2 into files_nanolyse
-    set id, lr, val("raw") into files_nanoplot_raw
-
-    script:
-    """
-    zcat ${lr} | NanoLyse | gzip > ${id}_nanolyse.fastq.gz
-    """
-}
- */
 
 /*
  * Trim adapter sequences on long read nanopore files
@@ -316,7 +304,6 @@ process porechop {
     tag "$id"
         
     input:
-    //set id, lr, sr1, sr2 from files_nanolyse
     set id, lr, sr1, sr2 from files_all_raw
     
     output:
@@ -329,6 +316,42 @@ process porechop {
     """
 }
 
+/*
+ * Remove reads mapping to the lambda genome.
+ * TODO: add lambda phage to igenomes.config?
+ */
+nanolyse_reference = "ftp://ftp.ncbi.nlm.nih.gov/genomes/genbank/viral/Escherichia_virus_Lambda/all_assembly_versions/GCA_000840245.1_ViralProj14204/GCA_000840245.1_ViralProj14204_genomic.fna.gz"
+if (!params.no_nanolyse) {
+    Channel
+        .fromPath( "${nanolyse_reference}", checkIfExists: true )
+        .set { file_nanolyse_db }
+    process nanolyse { 
+        tag "$id"
+
+        publishDir "${params.outdir}", mode: 'copy',
+        saveAs: {filename -> filename.indexOf(".fastq.gz") == -1 ? "nanolyse/$filename" : null}
+            
+        input:
+        set id, file(lr), sr1, sr2, file(nanolyse_db) from files_porechop.combine(file_nanolyse_db)
+
+        output:
+        set id, file("${id}_nanolyse.fastq.gz"), sr1, sr2 into files_nanolyse
+        file("${id}_nanolyse_log.txt")
+    
+        script:
+        """
+        cat ${lr} | NanoLyse --reference $nanolyse_db | gzip > ${id}_nanolyse.fastq.gz
+        
+        echo "NanoLyse reference: $nanolyse_reference" >${id}_nanolyse_log.txt
+        cat ${lr} | echo "total reads before NanoLyse: \$((`wc -l`/4))" >>${id}_nanolyse_log.txt
+        zcat ${id}_nanolyse.fastq.gz | echo "total reads after NanoLyse: \$((`wc -l`/4))" >>${id}_nanolyse_log.txt
+        """
+    }
+} else {
+    files_porechop
+        .set{ files_nanolyse }
+}
+
 
 /*
  * Quality filter long reads focus on length instead of quality to improve assembly size
@@ -338,7 +361,7 @@ process filtlong {
     tag "$id"
 
     input: 
-    set id, lr, sr1, sr2 from files_porechop
+    set id, lr, sr1, sr2 from files_nanolyse
     
     output:
     set id, file("${id}_lr_filtlong.fastq.gz") into files_lr_filtered 
@@ -363,7 +386,7 @@ process filtlong {
  * Quality check for nanopore reads and Quality/Length Plots
  */
 process nanoplot {
-    tag{id}
+    tag "$id"
     publishDir "${params.outdir}/${id}/qc/longread_${type}/", mode: 'copy'
     
     input:
@@ -458,7 +481,8 @@ process fastqc_trimmed {
  */
 process megahit {
     tag "$name"
-    publishDir "${params.outdir}/", mode: 'copy'
+    publishDir "${params.outdir}/", mode: 'copy',
+        saveAs: {filename -> filename.indexOf(".fastq.gz") == -1 ? "$filename" : null}
 
     input:
     set val(name), file(reads) from trimmed_reads_megahit
@@ -493,7 +517,8 @@ process megahit {
 
 process spades {
     tag "$id"
-    publishDir "${params.outdir}/${id}/assembly_spades", mode: 'copy', pattern: "${id}*"
+    publishDir "${params.outdir}/${id}/assembly_spades", mode: 'copy', pattern: "${id}*",
+        saveAs: {filename -> filename.indexOf(".fastq.gz") == -1 ? filename : null}
 
     input:
     set id, file(lr), file(sr) from files_pre_spades  
@@ -559,7 +584,10 @@ process quast {
  */
 process metabat {
     tag "$name"
-    publishDir "${params.outdir}/metabat", mode: 'copy'
+    publishDir "${params.outdir}/metabat", mode: 'copy',
+        saveAs: {filename -> 
+            if      (filename.indexOf(".fastq.gz") > -1)   filename
+            else if (filename.indexOf(".bam") > -1)        filename }
 
     input:
     set val(name), file(assembly), file(reads) from assembly_metabat

From a5c7626db744ed023f1ac8c9577f62ad3b3f4703 Mon Sep 17 00:00:00 2001
From: d4straub <daniel.straub@uni-tuebingen.de>
Date: Mon, 4 Mar 2019 15:20:36 +0100
Subject: [PATCH 006/105] add parameters for long read qc

---
 main.nf | 37 ++++++++++++++++++++-----------------
 1 file changed, 20 insertions(+), 17 deletions(-)

diff --git a/main.nf b/main.nf
index 1b9de512..099c15f8 100644
--- a/main.nf
+++ b/main.nf
@@ -65,7 +65,11 @@ def helpMessage() {
                                     (default: https://busco.ezlab.org/datasets/bacteria_odb9.tar.gz)
 
     Long read preprocessing:
-      --no_nanolyse                 Skip removing reads similar to the internal standard lambda genome.
+      --no_nanolyse                 Skip removing reads similar to the ONT internal standard lambda genome (default: false)
+      --filtlong_min_length         Discard any read which is shorter than this value (default: 1000)
+      --filtlong_keep_percent       Retain the best % of reads (default: 90)
+      --filtlong_split              Split reads whenever so much consequence bases fail to match a k-mer in the reference (default: 1000)
+      --filtlong_length_weight      The higher the more important is read length when choosing the best reads (default: 10)
 
     AWSBatch options:
       --awsqueue                    The AWSBatch JobQueue that needs to be set when running on AWSBatch
@@ -140,6 +144,10 @@ params.ssu_evalue = 1e-6
  * long read preprocessing options
  */
 params.no_nanolyse = false
+params.filtlong_min_length = 1000
+params.filtlong_keep_percent = 90
+params.filtlong_split = 1000
+params.filtlong_length_weight = 10
 
 /*
  * Create a channel for input read files
@@ -372,11 +380,11 @@ process filtlong {
     filtlong \
         -1 ${sr1} \
         -2 ${sr2} \
-        --min_length 1000 \
-        --keep_percent 90 \
+        --min_length ${params.filtlong_min_length} \
+        --keep_percent ${params.filtlong_keep_percent} \
         --trim \
-        --split 1000 \
-        --length_weight 10 \
+        --split ${params.filtlong_split} \
+        --length_weight ${params.filtlong_length_weight} \
         ${lr} | gzip > ${id}_lr_filtlong.fastq.gz
     """
 }
@@ -551,23 +559,13 @@ process spades {
     """
 }
 
-/*
- * Accumulate all assemblies for further steps
- */
-assembly_spades_to_quast
-    .mix ( assembly_megahit_to_quast )
-    .set { assembly_quast }
-assembly_spades_to_metabat
-    .mix ( assembly_megahit_to_metabat )
-    .set { assembly_metabat }
-
 
 process quast {
     tag "$name"
     // publishDir "${params.outdir}/quast/$name", mode: 'copy'
 
     input:
-    set val(name), file(assembly) from assembly_quast
+    set val(name), file(assembly) from assembly_spades_to_quast.mix(assembly_megahit_to_quast)
 
     output:
     file("quast_${name}/*") into quast_results
@@ -590,7 +588,7 @@ process metabat {
             else if (filename.indexOf(".bam") > -1)        filename }
 
     input:
-    set val(name), file(assembly), file(reads) from assembly_metabat
+    set val(name), file(assembly), file(reads) from assembly_spades_to_metabat.mix(assembly_megahit_to_metabat)
     val(min_size) from params.min_contig_size
 
     output:
@@ -622,6 +620,11 @@ process metabat {
         samtools index "${name}.bam"
         jgi_summarize_bam_contig_depths --outputDepth depth.txt "${name}.bam"
         metabat2 -t "${task.cpus}" -i "${assembly}" -a depth.txt -o bins/"${name}" -m ${min_size}
+
+        #if bin foolder is empty
+        if [ -z \"\$(ls -A bins)\" ]; then 
+            cp ${assembly} bins/
+        fi
         """
     }
 

From f4fceab650d1e7e6746f96a92e7c6dbc85d7abc2 Mon Sep 17 00:00:00 2001
From: d4straub <daniel.straub@uni-tuebingen.de>
Date: Mon, 4 Mar 2019 16:30:26 +0100
Subject: [PATCH 007/105] add phix filtering for illumina reads

---
 main.nf | 58 +++++++++++++++++++++++++++++++++++++++++++++++++++++----
 1 file changed, 54 insertions(+), 4 deletions(-)

diff --git a/main.nf b/main.nf
index 099c15f8..8b37d866 100644
--- a/main.nf
+++ b/main.nf
@@ -46,6 +46,7 @@ def helpMessage() {
       --adapter_reverse             Sequence of 3' adapter to remove in the reverse reads
       --mean_quality                Mean qualified quality value for keeping read (default: 15)
       --trimming_quality            Trimming quality value for the sliding window (default: 15)
+      --keep_phix                   Keep reads similar to the Illumina internal standard PhiX genome (default: false)
 
     Binning options:
       --refinem                     Enable bin refinement with refinem.
@@ -65,9 +66,9 @@ def helpMessage() {
                                     (default: https://busco.ezlab.org/datasets/bacteria_odb9.tar.gz)
 
     Long read preprocessing:
-      --no_nanolyse                 Skip removing reads similar to the ONT internal standard lambda genome (default: false)
+      --no_nanolyse                 Keep reads similar to the ONT internal standard lambda genome (default: false)
       --filtlong_min_length         Discard any read which is shorter than this value (default: 1000)
-      --filtlong_keep_percent       Retain the best % of reads (default: 90)
+      --filtlong_keep_percent       keep the best % of reads (default: 90)
       --filtlong_split              Split reads whenever so much consequence bases fail to match a k-mer in the reference (default: 1000)
       --filtlong_length_weight      The higher the more important is read length when choosing the best reads (default: 10)
 
@@ -123,6 +124,7 @@ params.adapter_forward = "AGATCGGAAGAGCACACGTCTGAACTCCAGTCA"
 params.adapter_reverse = "AGATCGGAAGAGCGTCGTGTAGGGAAAGAGTGT"
 params.mean_quality = 15
 params.trimming_quality = 15
+params.keep_phix
 
 /*
  * binning options
@@ -337,7 +339,7 @@ if (!params.no_nanolyse) {
         tag "$id"
 
         publishDir "${params.outdir}", mode: 'copy',
-        saveAs: {filename -> filename.indexOf(".fastq.gz") == -1 ? "nanolyse/$filename" : null}
+            saveAs: {filename -> filename.indexOf(".fastq.gz") == -1 ? "nanolyse/$filename" : null}
             
         input:
         set id, file(lr), sr1, sr2, file(nanolyse_db) from files_porechop.combine(file_nanolyse_db)
@@ -444,7 +446,7 @@ process fastp {
     val trim_qual from params.trimming_quality
 
     output:
-    set val(name), file("${name}_trimmed*.fastq.gz") into (trimmed_reads_megahit, trimmed_reads_metabat, trimmed_reads_fastqc, trimmed_sr_spades)
+    set val(name), file("${name}_trimmed*.fastq.gz") into trimmed_reads
 
     script:
     if ( !params.singleEnd ) {
@@ -466,6 +468,54 @@ process fastp {
     }
 }
 
+/*
+ * Remove PhiX contamination from Illumina reads
+ * TODO: function downloaddb that also makes index
+ */
+if(!params.keep_phix) {
+    phix_reference = "ftp://ftp.ncbi.nlm.nih.gov/genomes/genbank/viral/Enterobacteria_phage_phiX174_sensu_lato/all_assembly_versions/GCA_002596845.1_ASM259684v1/GCA_002596845.1_ASM259684v1_genomic.fna.gz"
+    Channel
+        .fromPath( "${phix_reference}", checkIfExists: true )
+        .set { file_phix_db }
+    process remove_phix {
+        tag "$name"
+
+        publishDir "${params.outdir}", mode: 'copy',
+            saveAs: {filename -> filename.indexOf(".fastq.gz") == -1 ? "remove_phix/$filename" : null}
+
+        input:
+        set val(name), file(reads), file(genome) from trimmed_reads.combine(file_phix_db)
+
+        output:
+        set val(name), file("*.fastq.gz") into (trimmed_reads_megahit, trimmed_reads_metabat, trimmed_reads_fastqc, trimmed_sr_spades)
+        file("${name}_remove_phix_log.txt")
+
+        script:
+        if ( !params.singleEnd ) {
+            """
+            bowtie2-build --threads "${task.cpus}" "${genome}" ref
+            bowtie2 -p "${task.cpus}" -x ref -1 "${reads[0]}" -2 "${reads[1]}" --un-conc-gz unmapped_%.fastq.gz
+
+            echo "Bowtie2 reference: ${genome}" >${name}_remove_phix_log.txt
+            zcat ${reads[0]} | echo "Read pairs before removal: \$((`wc -l`/4))" >>${name}_remove_phix_log.txt
+            zcat unmapped_1.fastq.gz | echo "Read pairs after removal: \$((`wc -l`/4))" >>${name}_remove_phix_log.txt
+            """
+        } else {
+            """
+            bowtie2-build --threads "${task.cpus}" "${genome}" ref
+            bowtie2 -p "${task.cpus}" -x ref -U ${reads}  --un-gz unmapped.fastq.gz
+
+            echo "Bowtie2 reference: $ref" >${name}_remove_phix_log.txt
+            zcat ${reads[0]} | echo "Reads before removal: \$((`wc -l`/4))" >>${name}_remove_phix_log.txt
+            zcat unmapped_1.fastq.gz | echo "Reads after removal: \$((`wc -l`/4))" >>${name}_remove_phix_log.txt
+            """
+        }
+
+    }
+} else {
+    trimmed_reads.into {trimmed_reads_megahit; trimmed_reads_metabat; trimmed_reads_fastqc; trimmed_sr_spades}
+}
+
 
 process fastqc_trimmed {
     tag "$name"

From 1ef108b1309b34cb1602cb1017a332ff8bd56468 Mon Sep 17 00:00:00 2001
From: d4straub <daniel.straub@uni-tuebingen.de>
Date: Mon, 4 Mar 2019 16:39:58 +0100
Subject: [PATCH 008/105] resolve file name collision for multiqc introduced
 earlier

---
 main.nf | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/main.nf b/main.nf
index 8b37d866..14a312c4 100644
--- a/main.nf
+++ b/main.nf
@@ -471,6 +471,7 @@ process fastp {
 /*
  * Remove PhiX contamination from Illumina reads
  * TODO: function downloaddb that also makes index
+ * TODO: PhiX into/from iGenomes.conf?
  */
 if(!params.keep_phix) {
     phix_reference = "ftp://ftp.ncbi.nlm.nih.gov/genomes/genbank/viral/Enterobacteria_phage_phiX174_sensu_lato/all_assembly_versions/GCA_002596845.1_ASM259684v1/GCA_002596845.1_ASM259684v1_genomic.fna.gz"
@@ -494,20 +495,20 @@ if(!params.keep_phix) {
         if ( !params.singleEnd ) {
             """
             bowtie2-build --threads "${task.cpus}" "${genome}" ref
-            bowtie2 -p "${task.cpus}" -x ref -1 "${reads[0]}" -2 "${reads[1]}" --un-conc-gz unmapped_%.fastq.gz
+            bowtie2 -p "${task.cpus}" -x ref -1 "${reads[0]}" -2 "${reads[1]}" --un-conc-gz ${name}_unmapped_%.fastq.gz
 
             echo "Bowtie2 reference: ${genome}" >${name}_remove_phix_log.txt
             zcat ${reads[0]} | echo "Read pairs before removal: \$((`wc -l`/4))" >>${name}_remove_phix_log.txt
-            zcat unmapped_1.fastq.gz | echo "Read pairs after removal: \$((`wc -l`/4))" >>${name}_remove_phix_log.txt
+            zcat ${name}_unmapped_1.fastq.gz | echo "Read pairs after removal: \$((`wc -l`/4))" >>${name}_remove_phix_log.txt
             """
         } else {
             """
             bowtie2-build --threads "${task.cpus}" "${genome}" ref
-            bowtie2 -p "${task.cpus}" -x ref -U ${reads}  --un-gz unmapped.fastq.gz
+            bowtie2 -p "${task.cpus}" -x ref -U ${reads}  --un-gz ${name}_unmapped.fastq.gz
 
             echo "Bowtie2 reference: $ref" >${name}_remove_phix_log.txt
             zcat ${reads[0]} | echo "Reads before removal: \$((`wc -l`/4))" >>${name}_remove_phix_log.txt
-            zcat unmapped_1.fastq.gz | echo "Reads after removal: \$((`wc -l`/4))" >>${name}_remove_phix_log.txt
+            zcat ${name}_unmapped.fastq.gz | echo "Reads after removal: \$((`wc -l`/4))" >>${name}_remove_phix_log.txt
             """
         }
 

From c315083030b08e67502f498be5556da414ea84e7 Mon Sep 17 00:00:00 2001
From: d4straub <daniel.straub@uni-tuebingen.de>
Date: Wed, 6 Mar 2019 11:03:52 +0100
Subject: [PATCH 009/105] fix centralized config

---
 nextflow.config | 1 +
 1 file changed, 1 insertion(+)

diff --git a/nextflow.config b/nextflow.config
index dfb5a76b..9825bada 100644
--- a/nextflow.config
+++ b/nextflow.config
@@ -21,6 +21,7 @@ params {
   clusterOptions = false
   tracedir = "${params.outdir}/pipeline_info"
   custom_config_version = 'master'
+  custom_config_base = "https://raw.githubusercontent.com/nf-core/configs/${params.custom_config_version}"
 }
 
 // load base config by default for all pipelines

From 4d5f2043b20c0dca61883697430ff635ad952897 Mon Sep 17 00:00:00 2001
From: d4straub <daniel.straub@uni-tuebingen.de>
Date: Wed, 6 Mar 2019 11:12:22 +0100
Subject: [PATCH 010/105] update software version reporting

---
 bin/scrape_software_versions.py | 22 ++++++++++++++++++----
 main.nf                         | 15 +++++++++++----
 2 files changed, 29 insertions(+), 8 deletions(-)

diff --git a/bin/scrape_software_versions.py b/bin/scrape_software_versions.py
index 6eb41868..6b76b7d2 100755
--- a/bin/scrape_software_versions.py
+++ b/bin/scrape_software_versions.py
@@ -11,8 +11,15 @@
     'fastp': ['v_fastp.txt', r"fastp (\S+)"],
     'megahit': ['v_megahit.txt', r"MEGAHIT v(\S+)"],
     'metabat': ['v_metabat.txt', r"version (\S+)"],
-    'checkm': ['v_checkm.txt', r"CheckM v(\S+)"],
-    'refinem': ['v_refinem.txt', r"RefineM v(\S+)"]
+    #'checkm': ['v_checkm.txt', r"CheckM v(\S+)"],
+    #'refinem': ['v_refinem.txt', r"RefineM v(\S+)"],
+    'NanoPlot': ['v_nanoplot.txt', r"NanoPlot (\S+)"],
+    'Filtlong': ['v_filtlong.txt', r"Filtlong v(\S+)"],
+    'porechop': ['v_porechop.txt', r"(\S+)"],
+    'NanoLyse': ['v_nanolyse.txt', r"NanoLyse (\S+)"],
+    'SPAdes': ['v_spades.txt', r"SPAdes v(\S+)"],
+    'BUSCO': ['v_busco.txt', r"BUSCO (\S+)"],
+    'Bandage': ['v_bandage.txt', r"Version: (\S+)"]
 }
 results = OrderedDict()
 results['nf-core/mag'] = '<span style="color:#999999;\">N/A</span>'
@@ -22,8 +29,15 @@
 results['fastp'] = '<span style="color:#999999;\">N/A</span>'
 results['megahit'] = '<span style="color:#999999;\">N/A</span>'
 results['metabat'] = '<span style="color:#999999;\">N/A</span>'
-results['checkm'] = '<span style="color:#999999;\">N/A</span>'
-results['refinem'] = '<span style="color:#999999;\">N/A</span>'
+#results['checkm'] = '<span style="color:#999999;\">N/A</span>'
+#results['refinem'] = '<span style="color:#999999;\">N/A</span>'
+results['NanoPlot'] = '<span style="color:#999999;\">N/A</span>'
+results['Filtlong'] = '<span style="color:#999999;\">N/A</span>'
+results['porechop'] = '<span style="color:#999999;\">N/A</span>'
+results['NanoLyse'] = '<span style="color:#999999;\">N/A</span>'
+results['SPAdes'] = '<span style="color:#999999;\">N/A</span>'
+results['BUSCO'] = '<span style="color:#999999;\">N/A</span>'
+results['Bandage'] = '<span style="color:#999999;\">N/A</span>'
 
 # Search each file using its regex
 for k, v in regexes.items():
diff --git a/main.nf b/main.nf
index 14a312c4..8fd34d32 100644
--- a/main.nf
+++ b/main.nf
@@ -118,13 +118,13 @@ if( !(workflow.runName ==~ /[a-z]+_[a-z]+/) ){
 }
 
 /*
- * trimming options
+ * short read preprocessing options
  */
 params.adapter_forward = "AGATCGGAAGAGCACACGTCTGAACTCCAGTCA"
 params.adapter_reverse = "AGATCGGAAGAGCGTCGTGTAGGGAAAGAGTGT"
 params.mean_quality = 15
 params.trimming_quality = 15
-params.keep_phix
+params.keep_phix = false
 
 /*
  * binning options
@@ -299,9 +299,16 @@ process get_software_versions {
     #printf "\$chd\\n\$chd\\n" | checkm data setRoot
 
     #checkm -h > v_checkm.txt
-    echo unknown > v_checkm.txt
     #refinem -h > v_refinem.txt
-    echo unknown > v_refinem.txt
+
+    NanoPlot --version > v_nanoplot.txt
+    filtlong --version > v_filtlong.txt
+    porechop --version > v_porechop.txt
+    NanoLyse --version > v_nanolyse.txt
+    spades.py --version > v_spades.txt
+    run_BUSCO.py --version > v_busco.txt
+    Bandage --version > v_bandage.txt
+
     scrape_software_versions.py > software_versions_mqc.yaml
     """
 }

From 779a0088d90ad6f129017d97a5d8f07f44817748 Mon Sep 17 00:00:00 2001
From: d4straub <daniel.straub@uni-tuebingen.de>
Date: Wed, 6 Mar 2019 11:13:41 +0100
Subject: [PATCH 011/105] fix centralized config once more

---
 nextflow.config | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/nextflow.config b/nextflow.config
index 9825bada..9671ab67 100644
--- a/nextflow.config
+++ b/nextflow.config
@@ -30,6 +30,16 @@ includeConfig 'conf/base.config'
 // Load nf-core custom profiles from different Institutions
 includeConfig "https://raw.githubusercontent.com/nf-core/configs/${params.custom_config_version}/nfcore_custom.config"
 
+// Load base.config by default for all pipelines
+includeConfig 'conf/base.config'
+
+// Load nf-core custom profiles from different Institutions
+try {
+  includeConfig "${params.custom_config_base}/nfcore_custom.config"
+} catch (Exception e) {
+  System.err.println("WARNING: Could not load nf-core/config profiles: ${params.custom_config_base}/nfcore_custom.config")
+}
+
 profiles {
   conda { process.conda = "$baseDir/environment.yml" }
   docker {

From 73e8113e70bd37f9adf357b5daf89bec1063d021 Mon Sep 17 00:00:00 2001
From: d4straub <daniel.straub@uni-tuebingen.de>
Date: Wed, 6 Mar 2019 12:58:52 +0100
Subject: [PATCH 012/105] add bandage for assembly graphs

---
 main.nf | 37 +++++++++++++++++++++++++++++++++++--
 1 file changed, 35 insertions(+), 2 deletions(-)

diff --git a/main.nf b/main.nf
index 8fd34d32..f8579796 100644
--- a/main.nf
+++ b/main.nf
@@ -48,6 +48,10 @@ def helpMessage() {
       --trimming_quality            Trimming quality value for the sliding window (default: 15)
       --keep_phix                   Keep reads similar to the Illumina internal standard PhiX genome (default: false)
 
+    Assembly:
+      --skip_assembly_graph         Skip drawing an assembly graph
+      --bandage_mindepth            Reduce the assembly graph to include only contig with this depth (default: false)
+
     Binning options:
       --refinem                     Enable bin refinement with refinem.
       --refinem_db                  Path to refinem database
@@ -95,6 +99,8 @@ params.email = false
 params.plaintext_email = false
 params.manifest = false
 params.busco_reference = "https://busco.ezlab.org/datasets/bacteria_odb9.tar.gz"
+params.skip_assembly_graph = false
+params.bandage_mindepth = false
 
 ch_multiqc_config = Channel.fromPath(params.multiqc_config)
 ch_output_docs = Channel.fromPath("$baseDir/docs/output.md")
@@ -574,7 +580,6 @@ process megahit {
 
 /*
  * metaSpades hybrid Assembly
- * TODO: use assembly_graph_spades
  */
 
  files_lr_filtered
@@ -590,7 +595,7 @@ process spades {
     set id, file(lr), file(sr) from files_pre_spades  
 
     output:
-    //set id, val('spades'), file("${id}_graph_spades.gfa") into assembly_graph_spades
+    set id, val('spades'), file("${id}_graph_spades.gfa") into assembly_graph_spades
     set val("spades-$id"), file("${id}_scaffolds_spades.fasta") into (assembly_spades_to_quast, assembly_spades_to_refinem)
     set val("spades-$id"), file("${id}_scaffolds_spades.fasta"), file(sr) into assembly_spades_to_metabat
     file("${id}_contigs_spades.fasta")
@@ -635,6 +640,34 @@ process quast {
 }
 
 
+// Use Bandage to draw a (reduced) picture of the assembly graph
+process draw_assembly_graph {
+    tag "$id"
+    publishDir "${params.outdir}/${id}/assembly_spades/", mode: 'copy'
+
+    input:
+    set id, type, file(gfa) from assembly_graph_spades
+
+    output:
+    file("${id}*")
+
+    when:
+    !params.skip_assembly_graph
+
+    script:
+    if(!params.bandage_mindepth) {
+        """
+        Bandage image ${gfa} ${id}_${type}_graph.svg
+        """
+    } else {
+        """
+        Bandage reduce ${gfa} ${id}_graph_spades_reduced.gfa --scope depthrange --mindepth ${params.bandage_mindepth} --maxdepth 1000000
+        Bandage image ${id}_graph_spades_reduced.gfa ${id}_${type}_graph.svg
+        """
+    }
+}
+
+
 /*
  * STEP 3 - Binning
  */

From a92f6878f2e07824c47f165345d5fb5b2281e3c5 Mon Sep 17 00:00:00 2001
From: d4straub <daniel.straub@uni-tuebingen.de>
Date: Wed, 6 Mar 2019 13:25:40 +0100
Subject: [PATCH 013/105] optimize phix filtering

---
 main.nf | 51 ++++++++++++++++++++++++++++++++++-----------------
 1 file changed, 34 insertions(+), 17 deletions(-)

diff --git a/main.nf b/main.nf
index f8579796..262349b0 100644
--- a/main.nf
+++ b/main.nf
@@ -41,16 +41,18 @@ def helpMessage() {
       --outdir                      The output directory where the results will be saved
       --email                       Set this parameter to your e-mail address to get a summary e-mail with details of the run sent to you when the workflow exits
 
-    Trimming options:
+    Short read preprocessing:
       --adapter_forward             Sequence of 3' adapter to remove in the forward reads
       --adapter_reverse             Sequence of 3' adapter to remove in the reverse reads
       --mean_quality                Mean qualified quality value for keeping read (default: 15)
       --trimming_quality            Trimming quality value for the sliding window (default: 15)
       --keep_phix                   Keep reads similar to the Illumina internal standard PhiX genome (default: false)
+      --phix_reference              Download path for PhiX database
+                                    (default: "ftp://ftp.ncbi.nlm.nih.gov/genomes/genbank/viral/Enterobacteria_phage_phiX174_sensu_lato/all_assembly_versions/GCA_002596845.1_ASM259684v1/GCA_002596845.1_ASM259684v1_genomic.fna.gz")
 
     Assembly:
-      --skip_assembly_graph         Skip drawing an assembly graph
-      --bandage_mindepth            Reduce the assembly graph to include only contig with this depth (default: false)
+      --skip_assembly_graph         Skip drawing an assembly graph with Bandage
+      --bandage_mindepth            Reduce the assembly graph to include only contig above this depth (default: 0)
 
     Binning options:
       --refinem                     Enable bin refinement with refinem.
@@ -65,7 +67,7 @@ def helpMessage() {
       --ssu_evalue                  Evalue threshold to filter incongruent 16S (default 1e-6)
 
     Bin quality check:
-      --no_busco                    Disable bin QC with BUSCO (default: false)
+      --skip_busco                    Disable bin QC with BUSCO (default: false)
       --busco_reference             Download path for BUSCO database, available databases are listed here: https://busco.ezlab.org/
                                     (default: https://busco.ezlab.org/datasets/bacteria_odb9.tar.gz)
 
@@ -131,13 +133,15 @@ params.adapter_reverse = "AGATCGGAAGAGCGTCGTGTAGGGAAAGAGTGT"
 params.mean_quality = 15
 params.trimming_quality = 15
 params.keep_phix = false
+params.phix_reference = "ftp://ftp.ncbi.nlm.nih.gov/genomes/genbank/viral/Enterobacteria_phage_phiX174_sensu_lato/all_assembly_versions/GCA_002596845.1_ASM259684v1/GCA_002596845.1_ASM259684v1_genomic.fna.gz"
+
 
 /*
  * binning options
  */
 params.refinem = false
 params.no_checkm = false
-params.no_busco = false
+params.skip_busco = false
 params.min_contig_size = 1500
 params.delta_cont = 5
 params.merged_cont = 15
@@ -160,7 +164,7 @@ params.filtlong_length_weight = 10
 /*
  * Create a channel for input read files
  */
-if(!params.no_busco){
+if(!params.skip_busco){
     Channel
         .fromPath( "${params.busco_reference}", checkIfExists: true )
         .set { file_busco_db }
@@ -168,6 +172,12 @@ if(!params.no_busco){
     Channel.from()
 }
 
+if(!params.keep_phix) {
+    Channel
+        .fromPath( "${params.phix_reference}", checkIfExists: true )
+        .into { file_phix_db; file_phix_db_log }
+}
+
 def returnFile(it) {
 // Return file if it exists
     if (workflow.profile in ['test', 'localtest'] ) {
@@ -282,7 +292,6 @@ ${summary.collect { k,v -> "            <dt>$k</dt><dd><samp>${v ?: '<span style
 
 /*
  * Parse software version numbers
- * TODO: all new programs!
  */
 process get_software_versions {
     validExitStatus 0
@@ -487,10 +496,21 @@ process fastp {
  * TODO: PhiX into/from iGenomes.conf?
  */
 if(!params.keep_phix) {
-    phix_reference = "ftp://ftp.ncbi.nlm.nih.gov/genomes/genbank/viral/Enterobacteria_phage_phiX174_sensu_lato/all_assembly_versions/GCA_002596845.1_ASM259684v1/GCA_002596845.1_ASM259684v1_genomic.fna.gz"
-    Channel
-        .fromPath( "${phix_reference}", checkIfExists: true )
-        .set { file_phix_db }
+    process phix_download_db {
+        tag "${genome}"
+
+        input:
+        file(genome) from file_phix_db
+
+        output:
+        file("ref*") into phix_db
+
+        script:
+        """
+        bowtie2-build --threads "${task.cpus}" "${genome}" ref
+        """
+    }
+
     process remove_phix {
         tag "$name"
 
@@ -498,7 +518,8 @@ if(!params.keep_phix) {
             saveAs: {filename -> filename.indexOf(".fastq.gz") == -1 ? "remove_phix/$filename" : null}
 
         input:
-        set val(name), file(reads), file(genome) from trimmed_reads.combine(file_phix_db)
+        file(ref) from phix_db
+        set val(name), file(reads), file(genome) from trimmed_reads.combine(file_phix_db_log)
 
         output:
         set val(name), file("*.fastq.gz") into (trimmed_reads_megahit, trimmed_reads_metabat, trimmed_reads_fastqc, trimmed_sr_spades)
@@ -507,19 +528,15 @@ if(!params.keep_phix) {
         script:
         if ( !params.singleEnd ) {
             """
-            bowtie2-build --threads "${task.cpus}" "${genome}" ref
             bowtie2 -p "${task.cpus}" -x ref -1 "${reads[0]}" -2 "${reads[1]}" --un-conc-gz ${name}_unmapped_%.fastq.gz
-
             echo "Bowtie2 reference: ${genome}" >${name}_remove_phix_log.txt
             zcat ${reads[0]} | echo "Read pairs before removal: \$((`wc -l`/4))" >>${name}_remove_phix_log.txt
             zcat ${name}_unmapped_1.fastq.gz | echo "Read pairs after removal: \$((`wc -l`/4))" >>${name}_remove_phix_log.txt
             """
         } else {
             """
-            bowtie2-build --threads "${task.cpus}" "${genome}" ref
             bowtie2 -p "${task.cpus}" -x ref -U ${reads}  --un-gz ${name}_unmapped.fastq.gz
-
-            echo "Bowtie2 reference: $ref" >${name}_remove_phix_log.txt
+            echo "Bowtie2 reference: ${genome}" >${name}_remove_phix_log.txt
             zcat ${reads[0]} | echo "Reads before removal: \$((`wc -l`/4))" >>${name}_remove_phix_log.txt
             zcat ${name}_unmapped.fastq.gz | echo "Reads after removal: \$((`wc -l`/4))" >>${name}_remove_phix_log.txt
             """

From 900a0ad4ab75b626bd711ecf6c437d904fa57f64 Mon Sep 17 00:00:00 2001
From: d4straub <daniel.straub@uni-tuebingen.de>
Date: Wed, 6 Mar 2019 13:31:29 +0100
Subject: [PATCH 014/105] Don#t load configs twice

---
 main.nf         | 2 --
 nextflow.config | 6 ------
 2 files changed, 8 deletions(-)

diff --git a/main.nf b/main.nf
index 262349b0..411e5a97 100644
--- a/main.nf
+++ b/main.nf
@@ -387,7 +387,6 @@ if (!params.no_nanolyse) {
 
 /*
  * Quality filter long reads focus on length instead of quality to improve assembly size
- * TODO: Should illumina reads be trimmed/processed before using them here?
  */
 process filtlong {
     tag "$id"
@@ -492,7 +491,6 @@ process fastp {
 
 /*
  * Remove PhiX contamination from Illumina reads
- * TODO: function downloaddb that also makes index
  * TODO: PhiX into/from iGenomes.conf?
  */
 if(!params.keep_phix) {
diff --git a/nextflow.config b/nextflow.config
index 9671ab67..f6fa729e 100644
--- a/nextflow.config
+++ b/nextflow.config
@@ -27,12 +27,6 @@ params {
 // load base config by default for all pipelines
 includeConfig 'conf/base.config'
 
-// Load nf-core custom profiles from different Institutions
-includeConfig "https://raw.githubusercontent.com/nf-core/configs/${params.custom_config_version}/nfcore_custom.config"
-
-// Load base.config by default for all pipelines
-includeConfig 'conf/base.config'
-
 // Load nf-core custom profiles from different Institutions
 try {
   includeConfig "${params.custom_config_base}/nfcore_custom.config"

From e55b4c6a41c1bd1fd46edd4280ed9b07b712f746 Mon Sep 17 00:00:00 2001
From: d4straub <daniel.straub@uni-tuebingen.de>
Date: Fri, 8 Mar 2019 16:53:02 +0100
Subject: [PATCH 015/105] fix phix filtering and generate overall busco plot

---
 main.nf | 40 +++++++++++++++++++++++++---------------
 1 file changed, 25 insertions(+), 15 deletions(-)

diff --git a/main.nf b/main.nf
index 411e5a97..5f6ace02 100644
--- a/main.nf
+++ b/main.nf
@@ -175,7 +175,7 @@ if(!params.skip_busco){
 if(!params.keep_phix) {
     Channel
         .fromPath( "${params.phix_reference}", checkIfExists: true )
-        .into { file_phix_db; file_phix_db_log }
+        .set { file_phix_db }
 }
 
 def returnFile(it) {
@@ -501,7 +501,7 @@ if(!params.keep_phix) {
         file(genome) from file_phix_db
 
         output:
-        file("ref*") into phix_db
+        set file(genome), file("ref*") into phix_db
 
         script:
         """
@@ -516,8 +516,7 @@ if(!params.keep_phix) {
             saveAs: {filename -> filename.indexOf(".fastq.gz") == -1 ? "remove_phix/$filename" : null}
 
         input:
-        file(ref) from phix_db
-        set val(name), file(reads), file(genome) from trimmed_reads.combine(file_phix_db_log)
+        set val(name), file(reads), file(genome), file(db) from trimmed_reads.combine(phix_db)
 
         output:
         set val(name), file("*.fastq.gz") into (trimmed_reads_megahit, trimmed_reads_metabat, trimmed_reads_fastqc, trimmed_sr_spades)
@@ -575,8 +574,8 @@ process megahit {
     set val(name), file(reads) from trimmed_reads_megahit
 
     output:
-    set val(name), file("megahit/${name}.contigs.fa") into (assembly_megahit_to_quast, assembly_megahit_to_refinem)
-    set val(name), file("megahit/${name}.contigs.fa"), file(reads) into assembly_megahit_to_metabat
+    set val("megahit-$name"), file("megahit/${name}.contigs.fa") into (assembly_megahit_to_quast, assembly_megahit_to_refinem)
+    set val("megahit-$name"), file("megahit/${name}.contigs.fa"), file(reads) into assembly_megahit_to_metabat
 
     script:
     if ( !params.singleEnd ) {
@@ -657,7 +656,7 @@ process quast {
 
 // Use Bandage to draw a (reduced) picture of the assembly graph
 process draw_assembly_graph {
-    tag "$id"
+    tag "$type-$id"
     publishDir "${params.outdir}/${id}/assembly_spades/", mode: 'copy'
 
     input:
@@ -768,9 +767,7 @@ process busco {
     set val(name), file(assembly), val(db_name), file(db) from metabat_db_busco
 
     output:
-    file("short_summary_${name}-${assembly}.txt") into busco_summary
-    file("${name}-${assembly}_busco_figure.png")
-    file("${name}-${assembly}_busco_figure.R")
+    file("short_summary_${name}-${assembly}.txt") into (busco_summary_to_multiqc, busco_summary_to_plot)
     file("${name}-${assembly}_busco_log.txt")
 
     script:
@@ -784,15 +781,28 @@ process busco {
         --mode genome \
         --out ${binName} \
         >${binName}_busco_log.txt
-    generate_plot.py \
-        --working_directory run_${binName}
-    cp run_${binName}/busco_figure.png ${binName}_busco_figure.png
-    cp run_${binName}/busco_figure.R ${binName}_busco_figure.R
     cp run_${binName}/short_summary_${binName}.txt short_summary_${binName}.txt
     """
 }
 
 
+process busco_plot {    
+    publishDir "${params.outdir}/busco", mode: 'copy'
+
+    input:
+    file(summaries) from busco_summary_to_plot.collect()
+
+    output:
+    file("busco_figure.png")
+    file("busco_figure.R")
+
+    script:
+    """
+    generate_plot.py \
+        --working_directory .
+    """
+}
+
 
 // TODO next releases:
 // good bins channels (from checkm or refinem) + annotation
@@ -810,7 +820,7 @@ process multiqc {
     file (fastqc_raw:'fastqc/*') from fastqc_results.collect().ifEmpty([])
     file (fastqc_trimmed:'fastqc/*') from fastqc_results_trimmed.collect().ifEmpty([])
     file ('quast*/*') from quast_results.collect()
-    file ('short_summary_*.txt') from busco_summary.collect()
+    file ('short_summary_*.txt') from busco_summary_to_multiqc.collect()
 
     output:
     file "*multiqc_report.html" into multiqc_report

From aa9da9a431443615efa5d3311e25f2118eedf358 Mon Sep 17 00:00:00 2001
From: d4straub <daniel.straub@uni-tuebingen.de>
Date: Thu, 21 Mar 2019 16:31:32 +0100
Subject: [PATCH 016/105] improve output structure

---
 main.nf | 130 +++++++++++++++++++++++++++++++-------------------------
 1 file changed, 72 insertions(+), 58 deletions(-)

diff --git a/main.nf b/main.nf
index 5f6ace02..69ba6f42 100644
--- a/main.nf
+++ b/main.nf
@@ -67,7 +67,7 @@ def helpMessage() {
       --ssu_evalue                  Evalue threshold to filter incongruent 16S (default 1e-6)
 
     Bin quality check:
-      --skip_busco                    Disable bin QC with BUSCO (default: false)
+      --skip_busco                  Disable bin QC with BUSCO (default: false)
       --busco_reference             Download path for BUSCO database, available databases are listed here: https://busco.ezlab.org/
                                     (default: https://busco.ezlab.org/datasets/bacteria_odb9.tar.gz)
 
@@ -361,7 +361,7 @@ if (!params.no_nanolyse) {
         tag "$id"
 
         publishDir "${params.outdir}", mode: 'copy',
-            saveAs: {filename -> filename.indexOf(".fastq.gz") == -1 ? "nanolyse/$filename" : null}
+            saveAs: {filename -> filename.indexOf(".fastq.gz") == -1 ? "QC_longreads/NanoLyse/$filename" : null}
             
         input:
         set id, file(lr), sr1, sr2, file(nanolyse_db) from files_porechop.combine(file_nanolyse_db)
@@ -418,7 +418,7 @@ process filtlong {
  */
 process nanoplot {
     tag "$id"
-    publishDir "${params.outdir}/${id}/qc/longread_${type}/", mode: 'copy'
+    publishDir "${params.outdir}/QC_longreads/NanoPlot_${id}", mode: 'copy'
     
     input:
     set id, lr, type from files_nanoplot_raw.mix(files_nanoplot_filtered)
@@ -440,7 +440,8 @@ process nanoplot {
  */
 process fastqc_raw {
     tag "$name"
-    publishDir "${params.outdir}/fastqc", mode: 'copy'
+    publishDir "${params.outdir}/", mode: 'copy',
+        saveAs: {filename -> filename.indexOf(".zip") == -1 ? "QC_shortreads/fastqc/$filename" : null}
 
     input:
     set val(name), file(reads) from read_files_fastqc
@@ -457,7 +458,8 @@ process fastqc_raw {
 
 process fastp {
     tag "$name"
-    // publishDir "${params.outdir}/fastp", mode: 'copy'
+    publishDir "${params.outdir}/", mode: 'copy',
+        saveAs: {filename -> filename.indexOf(".fastq.gz") == -1 ? "QC_shortreads/fastp/$name/$filename" : null}
 
     input:
     set val(name), file(reads) from read_files_fastp
@@ -468,6 +470,7 @@ process fastp {
 
     output:
     set val(name), file("${name}_trimmed*.fastq.gz") into trimmed_reads
+    file("fastp.*")
 
     script:
     if ( !params.singleEnd ) {
@@ -513,7 +516,7 @@ if(!params.keep_phix) {
         tag "$name"
 
         publishDir "${params.outdir}", mode: 'copy',
-            saveAs: {filename -> filename.indexOf(".fastq.gz") == -1 ? "remove_phix/$filename" : null}
+            saveAs: {filename -> filename.indexOf(".fastq.gz") == -1 ? "QC_shortreads/remove_phix/$filename" : null}
 
         input:
         set val(name), file(reads), file(genome), file(db) from trimmed_reads.combine(phix_db)
@@ -547,7 +550,8 @@ if(!params.keep_phix) {
 
 process fastqc_trimmed {
     tag "$name"
-    publishDir "${params.outdir}/fastqc", mode: 'copy'
+    publishDir "${params.outdir}/", mode: 'copy',
+        saveAs: {filename -> filename.indexOf(".zip") == -1 ? "QC_shortreads/fastqc/$filename" : null}
 
     input:
     set val(name), file(reads) from trimmed_reads_fastqc
@@ -568,25 +572,26 @@ process fastqc_trimmed {
 process megahit {
     tag "$name"
     publishDir "${params.outdir}/", mode: 'copy',
-        saveAs: {filename -> filename.indexOf(".fastq.gz") == -1 ? "$filename" : null}
+        saveAs: {filename -> filename.indexOf(".fastq.gz") == -1 ? "Assembly/$filename" : null}
 
     input:
     set val(name), file(reads) from trimmed_reads_megahit
 
     output:
-    set val("megahit-$name"), file("megahit/${name}.contigs.fa") into (assembly_megahit_to_quast, assembly_megahit_to_refinem)
-    set val("megahit-$name"), file("megahit/${name}.contigs.fa"), file(reads) into assembly_megahit_to_metabat
+    set val("MEGAHIT"), val("$name"), file("MEGAHIT/${name}.contigs.fa") into (assembly_megahit_to_quast, assembly_megahit_to_refinem)
+    set val("MEGAHIT"), val("$name"), file("MEGAHIT/${name}.contigs.fa"), file(reads) into assembly_megahit_to_metabat
+    file("MEGAHIT/*.log")
 
     script:
     if ( !params.singleEnd ) {
     """
-    megahit -t "${task.cpus}" -1 "${reads[0]}" -2 "${reads[1]}" -o megahit \
+    megahit -t "${task.cpus}" -1 "${reads[0]}" -2 "${reads[1]}" -o MEGAHIT \
         --out-prefix "${name}"
     """
     }
     else {
     """
-    megahit -t "${task.cpus}" -r ${reads} -o megahit --out-prefix "${name}"
+    megahit -t "${task.cpus}" -r ${reads} -o MEGAHIT --out-prefix "${name}"
     """
     }
 }
@@ -602,18 +607,18 @@ process megahit {
 
 process spades {
     tag "$id"
-    publishDir "${params.outdir}/${id}/assembly_spades", mode: 'copy', pattern: "${id}*",
-        saveAs: {filename -> filename.indexOf(".fastq.gz") == -1 ? filename : null}
+    publishDir "${params.outdir}/", mode: 'copy', pattern: "${id}*",
+        saveAs: {filename -> filename.indexOf(".fastq.gz") == -1 ? "Assembly/SPAdes/$filename" : null}
 
     input:
     set id, file(lr), file(sr) from files_pre_spades  
 
     output:
-    set id, val('spades'), file("${id}_graph_spades.gfa") into assembly_graph_spades
-    set val("spades-$id"), file("${id}_scaffolds_spades.fasta") into (assembly_spades_to_quast, assembly_spades_to_refinem)
-    set val("spades-$id"), file("${id}_scaffolds_spades.fasta"), file(sr) into assembly_spades_to_metabat
-    file("${id}_contigs_spades.fasta")
-    file("${id}_spades.log")
+    set id, val("SPAdes"), file("${id}_graph.gfa") into assembly_graph_spades
+    set val("SPAdes"), val("$id"), file("${id}_scaffolds.fasta") into (assembly_spades_to_quast, assembly_spades_to_refinem)
+    set val("SPAdes"), val("$id"), file("${id}_scaffolds.fasta"), file(sr) into assembly_spades_to_metabat
+    file("${id}_contigs.fasta")
+    file("${id}_log.txt")
 
     when:
     params.manifest
@@ -629,27 +634,27 @@ process spades {
         --pe1-2 ${sr[1]} \
         --nanopore ${lr} \
         -o spades
-    cp spades/assembly_graph_with_scaffolds.gfa ${id}_graph_spades.gfa
-    cp spades/scaffolds.fasta ${id}_scaffolds_spades.fasta
-    cp spades/contigs.fasta ${id}_contigs_spades.fasta
-    cp spades/spades.log ${id}_spades.log
+    cp spades/assembly_graph_with_scaffolds.gfa ${id}_graph.gfa
+    cp spades/scaffolds.fasta ${id}_scaffolds.fasta
+    cp spades/contigs.fasta ${id}_contigs.fasta
+    cp spades/spades.log ${id}_log.txt
     """
 }
 
 
 process quast {
-    tag "$name"
-    // publishDir "${params.outdir}/quast/$name", mode: 'copy'
+    tag "$assembler-$sample"
+    publishDir "${params.outdir}/Assembly/$assembler", mode: 'copy'
 
     input:
-    set val(name), file(assembly) from assembly_spades_to_quast.mix(assembly_megahit_to_quast)
+    set val(assembler), val(sample), file(assembly) from assembly_spades_to_quast.mix(assembly_megahit_to_quast)
 
     output:
-    file("quast_${name}/*") into quast_results
+    file("${sample}_QC/*") into quast_results
 
     script:
     """
-    quast.py --threads "${task.cpus}" -l "${name}" "${assembly}" -o "quast_${name}"
+    quast.py --threads "${task.cpus}" -l "${assembler}-${sample}" "${assembly}" -o "${sample}_QC"
     """
 }
 
@@ -657,7 +662,7 @@ process quast {
 // Use Bandage to draw a (reduced) picture of the assembly graph
 process draw_assembly_graph {
     tag "$type-$id"
-    publishDir "${params.outdir}/${id}/assembly_spades/", mode: 'copy'
+    publishDir "${params.outdir}/Assembly/SPAdes/", mode: 'copy'
 
     input:
     set id, type, file(gfa) from assembly_graph_spades
@@ -686,21 +691,20 @@ process draw_assembly_graph {
  * STEP 3 - Binning
  */
 process metabat {
-    tag "$name"
-    publishDir "${params.outdir}/metabat", mode: 'copy',
-        saveAs: {filename -> 
-            if      (filename.indexOf(".fastq.gz") > -1)   filename
-            else if (filename.indexOf(".bam") > -1)        filename }
+    tag "$assembler-$sample"
+    publishDir "${params.outdir}/", mode: 'copy',
+        saveAs: {filename -> filename.indexOf(".bam") == -1 ? "GenomeBinning/$filename" : null}
 
     input:
-    set val(name), file(assembly), file(reads) from assembly_spades_to_metabat.mix(assembly_megahit_to_metabat)
+    set val(assembler), val(sample), file(assembly), file(reads) from assembly_spades_to_metabat.mix(assembly_megahit_to_metabat)
     val(min_size) from params.min_contig_size
 
     output:
-    set val(name), file("bins/*") into metabat_bins mode flatten
-    set val(name), file("${name}.bam") into (mapped_reads_checkm, mapped_reads_refinem)
+    set val(assembler), val(sample), file("MetaBAT2/*") into metabat_bins mode flatten
+    set val(assembler), file("${assembler}-${sample}.bam") into (mapped_reads_checkm, mapped_reads_refinem)
 
     script:
+    def name = "${assembler}-${sample}"
     if ( !params.singleEnd ) {
         """
         bowtie2-build --threads "${task.cpus}" "${assembly}" ref
@@ -709,11 +713,11 @@ process metabat {
             samtools sort -@ "${task.cpus}" -o "${name}.bam"
         samtools index "${name}.bam"
         jgi_summarize_bam_contig_depths --outputDepth depth.txt "${name}.bam"
-        metabat2 -t "${task.cpus}" -i "${assembly}" -a depth.txt -o "bins/${name}" -m ${min_size}
+        metabat2 -t "${task.cpus}" -i "${assembly}" -a depth.txt -o "MetaBAT2/${name}" -m ${min_size}
 
         #if bin foolder is empty
-        if [ -z \"\$(ls -A bins)\" ]; then 
-            cp ${assembly} bins/
+        if [ -z \"\$(ls -A MetaBAT2)\" ]; then 
+            cp ${assembly} MetaBAT2/${assembler}-${assembly}
         fi
         """
     } else {
@@ -724,11 +728,11 @@ process metabat {
             samtools sort -@ "${task.cpus}" -o "${name}.bam"
         samtools index "${name}.bam"
         jgi_summarize_bam_contig_depths --outputDepth depth.txt "${name}.bam"
-        metabat2 -t "${task.cpus}" -i "${assembly}" -a depth.txt -o bins/"${name}" -m ${min_size}
+        metabat2 -t "${task.cpus}" -i "${assembly}" -a depth.txt -o "MetaBAT2/${name}" -m ${min_size}
 
         #if bin foolder is empty
-        if [ -z \"\$(ls -A bins)\" ]; then 
-            cp ${assembly} bins/
+        if [ -z \"\$(ls -A MetaBAT2)\" ]; then 
+            cp ${assembly} MetaBAT2/${assembler}-${assembly}
         fi
         """
     }
@@ -760,18 +764,18 @@ metabat_bins
  * BUSCO: Quantitative measures for the assessment of genome assembly
  */
 process busco {
-    tag "${name}-${assembly}"
-    publishDir "${params.outdir}/busco", mode: 'copy'
+    tag "${assembly}"
+    publishDir "${params.outdir}/GenomeBinning/QC/raw/", mode: 'copy'
 
     input:
-    set val(name), file(assembly), val(db_name), file(db) from metabat_db_busco
+    set val(assembler), val(sample), file(assembly), val(db_name), file(db) from metabat_db_busco
 
     output:
-    file("short_summary_${name}-${assembly}.txt") into (busco_summary_to_multiqc, busco_summary_to_plot)
-    file("${name}-${assembly}_busco_log.txt")
+    file("short_summary_${assembly}.txt") into (busco_summary_to_multiqc, busco_summary_to_plot)
+    val("$assembler-$sample") into busco_assembler_sample_to_plot
+    file("${assembly}_busco_log.txt")
 
     script:
-    def binName = "${name}-${assembly}"
     """
     run_BUSCO.py \
         --in ${assembly} \
@@ -779,27 +783,37 @@ process busco {
         --cpu "${task.cpus}" \
         --blast_single_core \
         --mode genome \
-        --out ${binName} \
-        >${binName}_busco_log.txt
-    cp run_${binName}/short_summary_${binName}.txt short_summary_${binName}.txt
+        --out ${assembly} \
+        >${assembly}_busco_log.txt
+    cp run_${assembly}/short_summary_${assembly}.txt short_summary_${assembly}.txt
     """
 }
 
 
-process busco_plot {    
-    publishDir "${params.outdir}/busco", mode: 'copy'
+process busco_plot { 
+    publishDir "${params.outdir}/GenomeBinning/QC/", mode: 'copy'
 
     input:
     file(summaries) from busco_summary_to_plot.collect()
+    val(assemblersample) from busco_assembler_sample_to_plot.collect()
 
     output:
-    file("busco_figure.png")
-    file("busco_figure.R")
+    file("*busco_figure.png")
+    file("*busco_figure.R")
 
     script:
+    def assemblersampleunique = assemblersample.unique()
     """
-    generate_plot.py \
-        --working_directory .
+    assemblersample=\$(echo \"$assemblersampleunique\" | sed 's/[][]//g')
+    IFS=', ' read -r -a assemblersamples <<< \"\$assemblersample\"
+
+    for name in \"\${assemblersamples[@]}\"; do
+        mkdir \${name}
+        cp short_summary_\${name}* \${name}/
+        generate_plot.py --working_directory \${name}
+        cp \${name}/busco_figure.png \${name}-busco_figure.png
+        cp \${name}/busco_figure.R \${name}-busco_figure.R
+    done
     """
 }
 

From eadf8e8613e9cb8616347fc136696c72bb8ae1a3 Mon Sep 17 00:00:00 2001
From: d4straub <daniel.straub@uni-tuebingen.de>
Date: Thu, 21 Mar 2019 16:40:09 +0100
Subject: [PATCH 017/105] remove all of checkm and refinem

---
 bin/scrape_software_versions.py |  4 ----
 main.nf                         | 36 ++-------------------------------
 2 files changed, 2 insertions(+), 38 deletions(-)

diff --git a/bin/scrape_software_versions.py b/bin/scrape_software_versions.py
index 6b76b7d2..7078b83f 100755
--- a/bin/scrape_software_versions.py
+++ b/bin/scrape_software_versions.py
@@ -11,8 +11,6 @@
     'fastp': ['v_fastp.txt', r"fastp (\S+)"],
     'megahit': ['v_megahit.txt', r"MEGAHIT v(\S+)"],
     'metabat': ['v_metabat.txt', r"version (\S+)"],
-    #'checkm': ['v_checkm.txt', r"CheckM v(\S+)"],
-    #'refinem': ['v_refinem.txt', r"RefineM v(\S+)"],
     'NanoPlot': ['v_nanoplot.txt', r"NanoPlot (\S+)"],
     'Filtlong': ['v_filtlong.txt', r"Filtlong v(\S+)"],
     'porechop': ['v_porechop.txt', r"(\S+)"],
@@ -29,8 +27,6 @@
 results['fastp'] = '<span style="color:#999999;\">N/A</span>'
 results['megahit'] = '<span style="color:#999999;\">N/A</span>'
 results['metabat'] = '<span style="color:#999999;\">N/A</span>'
-#results['checkm'] = '<span style="color:#999999;\">N/A</span>'
-#results['refinem'] = '<span style="color:#999999;\">N/A</span>'
 results['NanoPlot'] = '<span style="color:#999999;\">N/A</span>'
 results['Filtlong'] = '<span style="color:#999999;\">N/A</span>'
 results['porechop'] = '<span style="color:#999999;\">N/A</span>'
diff --git a/main.nf b/main.nf
index 69ba6f42..0d75e10d 100644
--- a/main.nf
+++ b/main.nf
@@ -55,16 +55,7 @@ def helpMessage() {
       --bandage_mindepth            Reduce the assembly graph to include only contig above this depth (default: 0)
 
     Binning options:
-      --refinem                     Enable bin refinement with refinem.
-      --refinem_db                  Path to refinem database
-      --no_checkm                   Disable bin QC and merging with checkm
       --min_contig_size             Minimum contig size to be considered for binning (default: 1500)
-      --delta_cont                  Maximum increase in contamination to merge compatible bins (default: 5)
-      --merged_cont                 Maximum total contamination to merge compatible bins (default: 15)
-      --delta_compl                 Minimum increase in completion to merge compatible bins (default: 10)
-      --abs_delta_cov               Minimum coverage ratio to merge compatible bins (default: 0.75)
-      --delta_gc                    Maximum %GC difference to merge compatible bins (default: 3)
-      --ssu_evalue                  Evalue threshold to filter incongruent 16S (default 1e-6)
 
     Bin quality check:
       --skip_busco                  Disable bin QC with BUSCO (default: false)
@@ -139,18 +130,8 @@ params.phix_reference = "ftp://ftp.ncbi.nlm.nih.gov/genomes/genbank/viral/Entero
 /*
  * binning options
  */
-params.refinem = false
-params.no_checkm = false
 params.skip_busco = false
 params.min_contig_size = 1500
-params.delta_cont = 5
-params.merged_cont = 15
-params.delta_compl = 10
-params.abs_delta_cov = 0.75
-params.delta_gc = 3
-
-params.refinem_db = false
-params.ssu_evalue = 1e-6
 
 /*
  * long read preprocessing options
@@ -308,14 +289,6 @@ process get_software_versions {
     fastp -v 2> v_fastp.txt
     megahit --version > v_megahit.txt
     metabat2 -h 2> v_metabat.txt || true
-
-    # fake checkm data setRoot so we can read checkm version number
-    #chd=\$(readlink -f checkm_data)
-    #printf "\$chd\\n\$chd\\n" | checkm data setRoot
-
-    #checkm -h > v_checkm.txt
-    #refinem -h > v_refinem.txt
-
     NanoPlot --version > v_nanoplot.txt
     filtlong --version > v_filtlong.txt
     porechop --version > v_porechop.txt
@@ -578,7 +551,7 @@ process megahit {
     set val(name), file(reads) from trimmed_reads_megahit
 
     output:
-    set val("MEGAHIT"), val("$name"), file("MEGAHIT/${name}.contigs.fa") into (assembly_megahit_to_quast, assembly_megahit_to_refinem)
+    set val("MEGAHIT"), val("$name"), file("MEGAHIT/${name}.contigs.fa") into assembly_megahit_to_quast
     set val("MEGAHIT"), val("$name"), file("MEGAHIT/${name}.contigs.fa"), file(reads) into assembly_megahit_to_metabat
     file("MEGAHIT/*.log")
 
@@ -615,7 +588,7 @@ process spades {
 
     output:
     set id, val("SPAdes"), file("${id}_graph.gfa") into assembly_graph_spades
-    set val("SPAdes"), val("$id"), file("${id}_scaffolds.fasta") into (assembly_spades_to_quast, assembly_spades_to_refinem)
+    set val("SPAdes"), val("$id"), file("${id}_scaffolds.fasta") into assembly_spades_to_quast
     set val("SPAdes"), val("$id"), file("${id}_scaffolds.fasta"), file(sr) into assembly_spades_to_metabat
     file("${id}_contigs.fasta")
     file("${id}_log.txt")
@@ -701,7 +674,6 @@ process metabat {
 
     output:
     set val(assembler), val(sample), file("MetaBAT2/*") into metabat_bins mode flatten
-    set val(assembler), file("${assembler}-${sample}.bam") into (mapped_reads_checkm, mapped_reads_refinem)
 
     script:
     def name = "${assembler}-${sample}"
@@ -818,10 +790,6 @@ process busco_plot {
 }
 
 
-// TODO next releases:
-// good bins channels (from checkm or refinem) + annotation
-// multiqc modules for checkm/refinem
-
 /*
  * STEP 4 - MultiQC
  */

From 4d4165268fbca70563aa068a04deccc5a29add98 Mon Sep 17 00:00:00 2001
From: d4straub <daniel.straub@uni-tuebingen.de>
Date: Thu, 21 Mar 2019 17:06:45 +0100
Subject: [PATCH 018/105] update params

---
 main.nf | 80 +++++++++++++++++++++++++++++++++------------------------
 1 file changed, 46 insertions(+), 34 deletions(-)

diff --git a/main.nf b/main.nf
index 0d75e10d..8e6e98ad 100644
--- a/main.nf
+++ b/main.nf
@@ -24,6 +24,7 @@ def helpMessage() {
     The typical command for running the pipeline is as follows:
 
     nextflow run nf-core/mag --reads '*_R{1,2}.fastq.gz' -profile docker
+    nextflow run nf-core/mag --manifest 'manifest.tsv' -profile docker
 
     Mandatory arguments:
       --reads                       Path to input data (must be surrounded with quotes)
@@ -31,9 +32,9 @@ def helpMessage() {
                                     Available: standard, conda, docker, singularity, awsbatch, test
 
     Hybrid assembly:
-      --manifest                    Path to manifest file (must be surrounded with quotes)
+      --manifest                    Path to manifest file (must be surrounded with quotes), required for hybrid assembly with metaSPAdes
                                     Has 4 headerless columns (tab separated): Sample_Id, Long_Reads, Short_Reads_1, Short_Reads_2
-                                    Only one file path per entry allowed, join multiple longread files if possible
+                                    Only one file path per entry allowed
 
     Options:
       -name                         Name for the pipeline run. If not specified, Nextflow will automatically generate a random mnemonic.
@@ -50,11 +51,22 @@ def helpMessage() {
       --phix_reference              Download path for PhiX database
                                     (default: "ftp://ftp.ncbi.nlm.nih.gov/genomes/genbank/viral/Enterobacteria_phage_phiX174_sensu_lato/all_assembly_versions/GCA_002596845.1_ASM259684v1/GCA_002596845.1_ASM259684v1_genomic.fna.gz")
 
+    Long read preprocessing:
+      --longreads_min_length        Discard any read which is shorter than this value (default: 1000)
+      --longreads_keep_percent      Keep this percent of bases (default: 90)
+      --longreads_length_weight     The higher the more important is read length when choosing the best reads (default: 10)
+      --keep_lambda                 Keep reads similar to the ONT internal standard Escherichia virus Lambda genome (default: false)
+      --lambda_reference            Download path for Escherichia virus Lambda genome
+                                    (default: "ftp://ftp.ncbi.nlm.nih.gov/genomes/genbank/viral/Escherichia_virus_Lambda/all_assembly_versions/GCA_000840245.1_ViralProj14204/GCA_000840245.1_ViralProj14204_genomic.fna.gz")
+    
     Assembly:
+      --skip_spades                 Skip SPAdes assembly
+      --skip_megahit                Skip MEGAHIT assembly
       --skip_assembly_graph         Skip drawing an assembly graph with Bandage
-      --bandage_mindepth            Reduce the assembly graph to include only contig above this depth (default: 0)
+      --bandage_mindepth            Reduce the assembly graph to include only contig above this depth (default: 20)
 
     Binning options:
+      --skip_binning                Skip metagenome binning
       --min_contig_size             Minimum contig size to be considered for binning (default: 1500)
 
     Bin quality check:
@@ -62,13 +74,6 @@ def helpMessage() {
       --busco_reference             Download path for BUSCO database, available databases are listed here: https://busco.ezlab.org/
                                     (default: https://busco.ezlab.org/datasets/bacteria_odb9.tar.gz)
 
-    Long read preprocessing:
-      --no_nanolyse                 Keep reads similar to the ONT internal standard lambda genome (default: false)
-      --filtlong_min_length         Discard any read which is shorter than this value (default: 1000)
-      --filtlong_keep_percent       keep the best % of reads (default: 90)
-      --filtlong_split              Split reads whenever so much consequence bases fail to match a k-mer in the reference (default: 1000)
-      --filtlong_length_weight      The higher the more important is read length when choosing the best reads (default: 10)
-
     AWSBatch options:
       --awsqueue                    The AWSBatch JobQueue that needs to be set when running on AWSBatch
       --awsregion                   The AWS Region for your AWS Batch job to run on
@@ -92,8 +97,6 @@ params.email = false
 params.plaintext_email = false
 params.manifest = false
 params.busco_reference = "https://busco.ezlab.org/datasets/bacteria_odb9.tar.gz"
-params.skip_assembly_graph = false
-params.bandage_mindepth = false
 
 ch_multiqc_config = Channel.fromPath(params.multiqc_config)
 ch_output_docs = Channel.fromPath("$baseDir/docs/output.md")
@@ -130,17 +133,27 @@ params.phix_reference = "ftp://ftp.ncbi.nlm.nih.gov/genomes/genbank/viral/Entero
 /*
  * binning options
  */
+params.skip_binning = false
 params.skip_busco = false
 params.min_contig_size = 1500
 
+/*
+ * assembly options
+ */
+params.skip_spades = false
+params.skip_megahit = false
+params.skip_assembly_graph = false
+params.bandage_mindepth = 20
+
 /*
  * long read preprocessing options
  */
-params.no_nanolyse = false
-params.filtlong_min_length = 1000
-params.filtlong_keep_percent = 90
+params.keep_lambda = false
+params.longreads_min_length = 1000
+params.longreads_keep_percent = 90
 params.filtlong_split = 1000
-params.filtlong_length_weight = 10
+params.longreads_length_weight = 10
+params.lambda_reference = "ftp://ftp.ncbi.nlm.nih.gov/genomes/genbank/viral/Escherichia_virus_Lambda/all_assembly_versions/GCA_000840245.1_ViralProj14204/GCA_000840245.1_ViralProj14204_genomic.fna.gz"
 
 /*
  * Create a channel for input read files
@@ -325,10 +338,9 @@ process porechop {
  * Remove reads mapping to the lambda genome.
  * TODO: add lambda phage to igenomes.config?
  */
-nanolyse_reference = "ftp://ftp.ncbi.nlm.nih.gov/genomes/genbank/viral/Escherichia_virus_Lambda/all_assembly_versions/GCA_000840245.1_ViralProj14204/GCA_000840245.1_ViralProj14204_genomic.fna.gz"
-if (!params.no_nanolyse) {
+if (!params.keep_lambda) {
     Channel
-        .fromPath( "${nanolyse_reference}", checkIfExists: true )
+        .fromPath( "${params.lambda_reference}", checkIfExists: true )
         .set { file_nanolyse_db }
     process nanolyse { 
         tag "$id"
@@ -347,7 +359,7 @@ if (!params.no_nanolyse) {
         """
         cat ${lr} | NanoLyse --reference $nanolyse_db | gzip > ${id}_nanolyse.fastq.gz
         
-        echo "NanoLyse reference: $nanolyse_reference" >${id}_nanolyse_log.txt
+        echo "NanoLyse reference: $params.lambda_reference" >${id}_nanolyse_log.txt
         cat ${lr} | echo "total reads before NanoLyse: \$((`wc -l`/4))" >>${id}_nanolyse_log.txt
         zcat ${id}_nanolyse.fastq.gz | echo "total reads after NanoLyse: \$((`wc -l`/4))" >>${id}_nanolyse_log.txt
         """
@@ -376,11 +388,10 @@ process filtlong {
     filtlong \
         -1 ${sr1} \
         -2 ${sr2} \
-        --min_length ${params.filtlong_min_length} \
-        --keep_percent ${params.filtlong_keep_percent} \
+        --min_length ${params.longreads_min_length} \
+        --keep_percent ${params.longreads_keep_percent} \
         --trim \
-        --split ${params.filtlong_split} \
-        --length_weight ${params.filtlong_length_weight} \
+        --length_weight ${params.longreads_length_weight} \
         ${lr} | gzip > ${id}_lr_filtlong.fastq.gz
     """
 }
@@ -555,6 +566,9 @@ process megahit {
     set val("MEGAHIT"), val("$name"), file("MEGAHIT/${name}.contigs.fa"), file(reads) into assembly_megahit_to_metabat
     file("MEGAHIT/*.log")
 
+    when:
+    !params.skip_megahit
+
     script:
     if ( !params.singleEnd ) {
     """
@@ -596,6 +610,7 @@ process spades {
     when:
     params.manifest
     !params.singleEnd
+    !params.skip_spades
      
     script:
     def maxmem = "${task.memory.toString().replaceAll(/[\sGB]/,'')}"
@@ -647,16 +662,10 @@ process draw_assembly_graph {
     !params.skip_assembly_graph
 
     script:
-    if(!params.bandage_mindepth) {
-        """
-        Bandage image ${gfa} ${id}_${type}_graph.svg
-        """
-    } else {
-        """
-        Bandage reduce ${gfa} ${id}_graph_spades_reduced.gfa --scope depthrange --mindepth ${params.bandage_mindepth} --maxdepth 1000000
-        Bandage image ${id}_graph_spades_reduced.gfa ${id}_${type}_graph.svg
-        """
-    }
+    """
+    Bandage reduce ${gfa} ${id}_graph_spades_reduced.gfa --scope depthrange --mindepth ${params.bandage_mindepth} --maxdepth 1000000
+    Bandage image ${id}_graph_spades_reduced.gfa ${id}_${type}_graph.svg
+    """
 }
 
 
@@ -675,6 +684,9 @@ process metabat {
     output:
     set val(assembler), val(sample), file("MetaBAT2/*") into metabat_bins mode flatten
 
+    when:
+    !params.skip_binning
+
     script:
     def name = "${assembler}-${sample}"
     if ( !params.singleEnd ) {

From 1d32389156e10afc7428710940d20cc180d452ef Mon Sep 17 00:00:00 2001
From: d4straub <daniel.straub@uni-tuebingen.de>
Date: Thu, 21 Mar 2019 17:21:27 +0100
Subject: [PATCH 019/105] add many supposedly resource-hungry processes to
 base.conf

---
 conf/base.config | 56 +++++++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 55 insertions(+), 1 deletion(-)

diff --git a/conf/base.config b/conf/base.config
index 9e7b647d..2545cb25 100644
--- a/conf/base.config
+++ b/conf/base.config
@@ -18,7 +18,7 @@ process {
   time = { check_max( 2.h * task.attempt, 'time' ) }
 
   errorStrategy = { task.exitStatus in [143,137] ? 'retry' : 'finish' }
-  maxRetries = 1
+  maxRetries = 3
   maxErrors = '-1'
 
   // Process-specific resource requirements
@@ -34,6 +34,60 @@ process {
   withName: busco_download_db {
     time = 4.h
   }
+  withName: porcechop {
+    cpus = { check_max (2 * task.attempt, 'cpus' ) }
+    memory = { check_max (10.GB * task.attempt, 'memory' ) }
+    time = { check_max (3.h * task.attempt, 'time' ) }
+    errorStrategy = { task.exitStatus in [143,137] ? 'retry' : 'ignore' }
+  }
+  withName: nanolyse {
+    cpus = { check_max (2 * task.attempt, 'cpus' ) }
+    memory = { check_max (10.GB * task.attempt, 'memory' ) }
+    time = { check_max (3.h * task.attempt, 'time' ) }
+    errorStrategy = { task.exitStatus in [143,137] ? 'retry' : 'ignore' }
+  }
+  withName: filtlong {
+    cpus = { check_max (2 * task.attempt, 'cpus' ) }
+    memory = { check_max (10.GB * task.attempt, 'memory' ) }
+    time = { check_max (3.h * task.attempt, 'time' ) }
+    errorStrategy = { task.exitStatus in [143,137] ? 'retry' : 'ignore' }
+  }
+  withName: megahit {
+    cpus = { check_max (8 * task.attempt, 'cpus' ) }
+    memory = { check_max (20.GB * task.attempt, 'memory' ) }
+    time = { check_max (8.h * task.attempt, 'time' ) }
+    errorStrategy = { task.exitStatus in [143,137] ? 'retry' : 'ignore' }
+  }
+  withName: spades {
+    cpus = { check_max (8 * task.attempt, 'cpus' ) }
+    memory = { check_max (20.GB * task.attempt, 'memory' ) }
+    time = { check_max (8.h * task.attempt, 'time' ) }
+    errorStrategy = { task.exitStatus in [143,137] ? 'retry' : 'ignore' }
+  }
+  withName: quast {
+    cpus = { check_max (2 * task.attempt, 'cpus' ) }
+    memory = { check_max (10.GB * task.attempt, 'memory' ) }
+    time = { check_max (2.h * task.attempt, 'time' ) }
+    errorStrategy = { task.exitStatus in [143,137] ? 'retry' : 'ignore' }
+  }
+  withName: metabat {
+    cpus = { check_max (8 * task.attempt, 'cpus' ) }
+    memory = { check_max (20.GB * task.attempt, 'memory' ) }
+    time = { check_max (8.h * task.attempt, 'time' ) }
+    errorStrategy = { task.exitStatus in [143,137] ? 'retry' : 'ignore' }
+  }
+  withName: busco {
+    cpus = { check_max (4 * task.attempt, 'cpus' ) }
+    memory = { check_max (10.GB * task.attempt, 'memory' ) }
+    time = { check_max (4.h * task.attempt, 'time' ) }
+    errorStrategy = { task.exitStatus in [143,137] ? 'retry' : 'ignore' }
+  }
+  withName: busco_plot {
+    cpus = { check_max (1 * task.attempt, 'cpus' ) }
+    memory = { check_max (10.GB * task.attempt, 'memory' ) }
+    time = { check_max (4.h * task.attempt, 'time' ) }
+    errorStrategy = { task.exitStatus in [143,137] ? 'retry' : 'ignore' }
+  }
 }
 
 params {

From b09e8803196e05a4c0986454e1d00386a332bc2b Mon Sep 17 00:00:00 2001
From: d4straub <daniel.straub@uni-tuebingen.de>
Date: Fri, 22 Mar 2019 15:12:43 +0100
Subject: [PATCH 020/105] add SPAdes for Illumina-only

---
 conf/base.config |  6 +++++
 main.nf          | 70 +++++++++++++++++++++++++++++++++++++-----------
 2 files changed, 60 insertions(+), 16 deletions(-)

diff --git a/conf/base.config b/conf/base.config
index 2545cb25..6d3e14ce 100644
--- a/conf/base.config
+++ b/conf/base.config
@@ -64,6 +64,12 @@ process {
     time = { check_max (8.h * task.attempt, 'time' ) }
     errorStrategy = { task.exitStatus in [143,137] ? 'retry' : 'ignore' }
   }
+  withName: spadeshybrid {
+    cpus = { check_max (8 * task.attempt, 'cpus' ) }
+    memory = { check_max (20.GB * task.attempt, 'memory' ) }
+    time = { check_max (8.h * task.attempt, 'time' ) }
+    errorStrategy = { task.exitStatus in [143,137] ? 'retry' : 'ignore' }
+  }
   withName: quast {
     cpus = { check_max (2 * task.attempt, 'cpus' ) }
     memory = { check_max (10.GB * task.attempt, 'memory' ) }
diff --git a/main.nf b/main.nf
index 8e6e98ad..7f530a57 100644
--- a/main.nf
+++ b/main.nf
@@ -60,10 +60,11 @@ def helpMessage() {
                                     (default: "ftp://ftp.ncbi.nlm.nih.gov/genomes/genbank/viral/Escherichia_virus_Lambda/all_assembly_versions/GCA_000840245.1_ViralProj14204/GCA_000840245.1_ViralProj14204_genomic.fna.gz")
     
     Assembly:
-      --skip_spades                 Skip SPAdes assembly
+      --skip_spades                 Skip Illumina-only SPAdes assembly
+      --skip_spadeshybrid           Skip SPAdes hybrid assembly (only available when using manifest input)
       --skip_megahit                Skip MEGAHIT assembly
       --skip_assembly_graph         Skip drawing an assembly graph with Bandage
-      --bandage_mindepth            Reduce the assembly graph to include only contig above this depth (default: 20)
+      --bandage_mindepth            Reduce the assembly graph to include only contig above this depth (default: 10)
 
     Binning options:
       --skip_binning                Skip metagenome binning
@@ -141,9 +142,10 @@ params.min_contig_size = 1500
  * assembly options
  */
 params.skip_spades = false
+params.skip_spadeshybrid = false
 params.skip_megahit = false
 params.skip_assembly_graph = false
-params.bandage_mindepth = 20
+params.bandage_mindepth = 10
 
 /*
  * long read preprocessing options
@@ -506,7 +508,7 @@ if(!params.keep_phix) {
         set val(name), file(reads), file(genome), file(db) from trimmed_reads.combine(phix_db)
 
         output:
-        set val(name), file("*.fastq.gz") into (trimmed_reads_megahit, trimmed_reads_metabat, trimmed_reads_fastqc, trimmed_sr_spades)
+        set val(name), file("*.fastq.gz") into (trimmed_reads_megahit, trimmed_reads_metabat, trimmed_reads_fastqc, trimmed_sr_spadeshybrid, trimmed_reads_spades)
         file("${name}_remove_phix_log.txt")
 
         script:
@@ -528,7 +530,7 @@ if(!params.keep_phix) {
 
     }
 } else {
-    trimmed_reads.into {trimmed_reads_megahit; trimmed_reads_metabat; trimmed_reads_fastqc; trimmed_sr_spades}
+    trimmed_reads.into {trimmed_reads_megahit; trimmed_reads_metabat; trimmed_reads_fastqc; trimmed_sr_spadeshybrid; trimmed_reads_spades}
 }
 
 
@@ -589,8 +591,46 @@ process megahit {
  */
 
  files_lr_filtered
-    .combine(trimmed_sr_spades, by: 0)
-    .set { files_pre_spades }
+    .combine(trimmed_sr_spadeshybrid, by: 0)
+    .set { files_pre_spadeshybrid }
+
+process spadeshybrid {
+    tag "$id"
+    publishDir "${params.outdir}/", mode: 'copy', pattern: "${id}*",
+        saveAs: {filename -> filename.indexOf(".fastq.gz") == -1 ? "Assembly/SPAdesHybrid/$filename" : null}
+
+    input:
+    set id, file(lr), file(sr) from files_pre_spadeshybrid  
+
+    output:
+    set id, val("SPAdesHybrid"), file("${id}_graph.gfa") into assembly_graph_spadeshybrid
+    set val("SPAdesHybrid"), val("$id"), file("${id}_scaffolds.fasta") into assembly_spadeshybrid_to_quast
+    set val("SPAdesHybrid"), val("$id"), file("${id}_scaffolds.fasta"), file(sr) into assembly_spadeshybrid_to_metabat
+    file("${id}_contigs.fasta")
+    file("${id}_log.txt")
+
+    when:
+    params.manifest
+    !params.singleEnd
+    !params.skip_spadeshybrid
+     
+    script:
+    def maxmem = "${task.memory.toString().replaceAll(/[\sGB]/,'')}"
+    """
+    metaspades.py \
+        --threads "${task.cpus}" \
+        --memory "$maxmem" \
+        --pe1-1 ${sr[0]} \
+        --pe1-2 ${sr[1]} \
+        --nanopore ${lr} \
+        -o spades
+    cp spades/assembly_graph_with_scaffolds.gfa ${id}_graph.gfa
+    cp spades/scaffolds.fasta ${id}_scaffolds.fasta
+    cp spades/contigs.fasta ${id}_contigs.fasta
+    cp spades/spades.log ${id}_log.txt
+    """
+}
+
 
 process spades {
     tag "$id"
@@ -598,7 +638,7 @@ process spades {
         saveAs: {filename -> filename.indexOf(".fastq.gz") == -1 ? "Assembly/SPAdes/$filename" : null}
 
     input:
-    set id, file(lr), file(sr) from files_pre_spades  
+    set id, file(sr) from trimmed_reads_spades  
 
     output:
     set id, val("SPAdes"), file("${id}_graph.gfa") into assembly_graph_spades
@@ -608,7 +648,6 @@ process spades {
     file("${id}_log.txt")
 
     when:
-    params.manifest
     !params.singleEnd
     !params.skip_spades
      
@@ -620,7 +659,6 @@ process spades {
         --memory "$maxmem" \
         --pe1-1 ${sr[0]} \
         --pe1-2 ${sr[1]} \
-        --nanopore ${lr} \
         -o spades
     cp spades/assembly_graph_with_scaffolds.gfa ${id}_graph.gfa
     cp spades/scaffolds.fasta ${id}_scaffolds.fasta
@@ -635,7 +673,7 @@ process quast {
     publishDir "${params.outdir}/Assembly/$assembler", mode: 'copy'
 
     input:
-    set val(assembler), val(sample), file(assembly) from assembly_spades_to_quast.mix(assembly_megahit_to_quast)
+    set val(assembler), val(sample), file(assembly) from assembly_spades_to_quast.mix(assembly_megahit_to_quast).mix(assembly_spadeshybrid_to_quast)
 
     output:
     file("${sample}_QC/*") into quast_results
@@ -649,11 +687,11 @@ process quast {
 
 // Use Bandage to draw a (reduced) picture of the assembly graph
 process draw_assembly_graph {
-    tag "$type-$id"
-    publishDir "${params.outdir}/Assembly/SPAdes/", mode: 'copy'
+    tag "$assembler-$id"
+    publishDir "${params.outdir}/Assembly/${assembler}/", mode: 'copy'
 
     input:
-    set id, type, file(gfa) from assembly_graph_spades
+    set val(id), val(assembler), file(gfa) from assembly_graph_spades.mix(assembly_graph_spadeshybrid)
 
     output:
     file("${id}*")
@@ -664,7 +702,7 @@ process draw_assembly_graph {
     script:
     """
     Bandage reduce ${gfa} ${id}_graph_spades_reduced.gfa --scope depthrange --mindepth ${params.bandage_mindepth} --maxdepth 1000000
-    Bandage image ${id}_graph_spades_reduced.gfa ${id}_${type}_graph.svg
+    Bandage image ${id}_graph_spades_reduced.gfa ${id}_${assembler}_graph.svg
     """
 }
 
@@ -678,7 +716,7 @@ process metabat {
         saveAs: {filename -> filename.indexOf(".bam") == -1 ? "GenomeBinning/$filename" : null}
 
     input:
-    set val(assembler), val(sample), file(assembly), file(reads) from assembly_spades_to_metabat.mix(assembly_megahit_to_metabat)
+    set val(assembler), val(sample), file(assembly), file(reads) from assembly_spades_to_metabat.mix(assembly_megahit_to_metabat).mix(assembly_spadeshybrid_to_metabat)
     val(min_size) from params.min_contig_size
 
     output:

From df716b9cd8f79dc20a49bbf088a6b35be160045e Mon Sep 17 00:00:00 2001
From: d4straub <daniel.straub@uni-tuebingen.de>
Date: Fri, 22 Mar 2019 15:18:19 +0100
Subject: [PATCH 021/105] correct typo in conf/base.config

---
 conf/base.config | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/conf/base.config b/conf/base.config
index 6d3e14ce..195e1fb2 100644
--- a/conf/base.config
+++ b/conf/base.config
@@ -34,7 +34,7 @@ process {
   withName: busco_download_db {
     time = 4.h
   }
-  withName: porcechop {
+  withName: porechop {
     cpus = { check_max (2 * task.attempt, 'cpus' ) }
     memory = { check_max (10.GB * task.attempt, 'memory' ) }
     time = { check_max (3.h * task.attempt, 'time' ) }

From 8034bf9450532bc907acd60738eca08d1cb3c25b Mon Sep 17 00:00:00 2001
From: d4straub <daniel.straub@uni-tuebingen.de>
Date: Wed, 27 Mar 2019 11:07:23 +0100
Subject: [PATCH 022/105] remove Bandage

---
 bin/scrape_software_versions.py |  1 -
 environment.yml                 |  1 -
 main.nf                         | 26 --------------------------
 3 files changed, 28 deletions(-)

diff --git a/bin/scrape_software_versions.py b/bin/scrape_software_versions.py
index 7078b83f..87c31122 100755
--- a/bin/scrape_software_versions.py
+++ b/bin/scrape_software_versions.py
@@ -17,7 +17,6 @@
     'NanoLyse': ['v_nanolyse.txt', r"NanoLyse (\S+)"],
     'SPAdes': ['v_spades.txt', r"SPAdes v(\S+)"],
     'BUSCO': ['v_busco.txt', r"BUSCO (\S+)"],
-    'Bandage': ['v_bandage.txt', r"Version: (\S+)"]
 }
 results = OrderedDict()
 results['nf-core/mag'] = '<span style="color:#999999;\">N/A</span>'
diff --git a/environment.yml b/environment.yml
index 02b83b1f..208394ba 100644
--- a/environment.yml
+++ b/environment.yml
@@ -28,4 +28,3 @@ dependencies:
   - porechop=0.2.3_seqan2.1.1=py36h2d50403_3
   - nanolyse=1.1.0=py36_1
   - spades=3.13.0=0
-  - bandage=0.8.1=hb59a952_0
diff --git a/main.nf b/main.nf
index 7f530a57..bc0bd04e 100644
--- a/main.nf
+++ b/main.nf
@@ -63,8 +63,6 @@ def helpMessage() {
       --skip_spades                 Skip Illumina-only SPAdes assembly
       --skip_spadeshybrid           Skip SPAdes hybrid assembly (only available when using manifest input)
       --skip_megahit                Skip MEGAHIT assembly
-      --skip_assembly_graph         Skip drawing an assembly graph with Bandage
-      --bandage_mindepth            Reduce the assembly graph to include only contig above this depth (default: 10)
 
     Binning options:
       --skip_binning                Skip metagenome binning
@@ -144,8 +142,6 @@ params.min_contig_size = 1500
 params.skip_spades = false
 params.skip_spadeshybrid = false
 params.skip_megahit = false
-params.skip_assembly_graph = false
-params.bandage_mindepth = 10
 
 /*
  * long read preprocessing options
@@ -310,7 +306,6 @@ process get_software_versions {
     NanoLyse --version > v_nanolyse.txt
     spades.py --version > v_spades.txt
     run_BUSCO.py --version > v_busco.txt
-    Bandage --version > v_bandage.txt
 
     scrape_software_versions.py > software_versions_mqc.yaml
     """
@@ -685,27 +680,6 @@ process quast {
 }
 
 
-// Use Bandage to draw a (reduced) picture of the assembly graph
-process draw_assembly_graph {
-    tag "$assembler-$id"
-    publishDir "${params.outdir}/Assembly/${assembler}/", mode: 'copy'
-
-    input:
-    set val(id), val(assembler), file(gfa) from assembly_graph_spades.mix(assembly_graph_spadeshybrid)
-
-    output:
-    file("${id}*")
-
-    when:
-    !params.skip_assembly_graph
-
-    script:
-    """
-    Bandage reduce ${gfa} ${id}_graph_spades_reduced.gfa --scope depthrange --mindepth ${params.bandage_mindepth} --maxdepth 1000000
-    Bandage image ${id}_graph_spades_reduced.gfa ${id}_${assembler}_graph.svg
-    """
-}
-
 
 /*
  * STEP 3 - Binning

From 175569342e57c32bc4f341f9727a5b260628aa45 Mon Sep 17 00:00:00 2001
From: d4straub <daniel.straub@uni-tuebingen.de>
Date: Wed, 27 Mar 2019 12:46:39 +0100
Subject: [PATCH 023/105] fix busco when using singularity container

---
 main.nf | 39 ++++++++++++++++++++++++++++-----------
 1 file changed, 28 insertions(+), 11 deletions(-)

diff --git a/main.nf b/main.nf
index bc0bd04e..b2ec085f 100644
--- a/main.nf
+++ b/main.nf
@@ -772,17 +772,34 @@ process busco {
     file("${assembly}_busco_log.txt")
 
     script:
-    """
-    run_BUSCO.py \
-        --in ${assembly} \
-        --lineage_path $db_name \
-        --cpu "${task.cpus}" \
-        --blast_single_core \
-        --mode genome \
-        --out ${assembly} \
-        >${assembly}_busco_log.txt
-    cp run_${assembly}/short_summary_${assembly}.txt short_summary_${assembly}.txt
-    """
+    if( workflow.profile.toString().indexOf("conda") == -1) {
+        """
+        cp -r /opt/conda/pkgs/augustus-3.3-pl526hcfae127_4/config augustus_config/
+        export AUGUSTUS_CONFIG_PATH=augustus_config
+
+        run_BUSCO.py \
+            --in ${assembly} \
+            --lineage_path $db_name \
+            --cpu "${task.cpus}" \
+            --blast_single_core \
+            --mode genome \
+            --out ${assembly} \
+            >${assembly}_busco_log.txt
+        cp run_${assembly}/short_summary_${assembly}.txt short_summary_${assembly}.txt
+        """
+    } else {
+        """
+        run_BUSCO.py \
+            --in ${assembly} \
+            --lineage_path $db_name \
+            --cpu "${task.cpus}" \
+            --blast_single_core \
+            --mode genome \
+            --out ${assembly} \
+            >${assembly}_busco_log.txt
+        cp run_${assembly}/short_summary_${assembly}.txt short_summary_${assembly}.txt
+        """
+    }
 }
 
 
From 5f16a795cfe2d9a48beb9428f7191340f9a5c82f Mon Sep 17 00:00:00 2001
From: d4straub <daniel.straub@uni-tuebingen.de>
Date: Thu, 28 Mar 2019 10:31:45 +0100
Subject: [PATCH 024/105] adjust computational requirements

---
 conf/base.config | 28 +++++++++++++++++-----------
 1 file changed, 17 insertions(+), 11 deletions(-)

diff --git a/conf/base.config b/conf/base.config
index 195e1fb2..07e12b79 100644
--- a/conf/base.config
+++ b/conf/base.config
@@ -35,9 +35,9 @@ process {
     time = 4.h
   }
   withName: porechop {
-    cpus = { check_max (2 * task.attempt, 'cpus' ) }
-    memory = { check_max (10.GB * task.attempt, 'memory' ) }
-    time = { check_max (3.h * task.attempt, 'time' ) }
+    cpus = { check_max (4 * task.attempt, 'cpus' ) }
+    memory = { check_max (30.GB * task.attempt, 'memory' ) }
+    time = { check_max (4.h * task.attempt, 'time' ) }
     errorStrategy = { task.exitStatus in [143,137] ? 'retry' : 'ignore' }
   }
   withName: nanolyse {
@@ -47,27 +47,33 @@ process {
     errorStrategy = { task.exitStatus in [143,137] ? 'retry' : 'ignore' }
   }
   withName: filtlong {
-    cpus = { check_max (2 * task.attempt, 'cpus' ) }
-    memory = { check_max (10.GB * task.attempt, 'memory' ) }
-    time = { check_max (3.h * task.attempt, 'time' ) }
+    cpus = { check_max (4 * task.attempt, 'cpus' ) }
+    memory = { check_max (50.GB * task.attempt, 'memory' ) }
+    time = { check_max (6.h * task.attempt, 'time' ) }
+    errorStrategy = { task.exitStatus in [143,137] ? 'retry' : 'ignore' }
+  }
+  withName: remove_phix {
+    cpus = { check_max (4 * task.attempt, 'cpus' ) }
+    memory = { check_max (8.GB * task.attempt, 'memory' ) }
+    time = { check_max (6.h * task.attempt, 'time' ) }
     errorStrategy = { task.exitStatus in [143,137] ? 'retry' : 'ignore' }
   }
   withName: megahit {
     cpus = { check_max (8 * task.attempt, 'cpus' ) }
-    memory = { check_max (20.GB * task.attempt, 'memory' ) }
+    memory = { check_max (40.GB * task.attempt, 'memory' ) }
     time = { check_max (8.h * task.attempt, 'time' ) }
     errorStrategy = { task.exitStatus in [143,137] ? 'retry' : 'ignore' }
   }
   withName: spades {
     cpus = { check_max (8 * task.attempt, 'cpus' ) }
-    memory = { check_max (20.GB * task.attempt, 'memory' ) }
-    time = { check_max (8.h * task.attempt, 'time' ) }
+    memory = { check_max (40.GB * task.attempt, 'memory' ) }
+    time = { check_max (16.h * task.attempt, 'time' ) }
     errorStrategy = { task.exitStatus in [143,137] ? 'retry' : 'ignore' }
   }
   withName: spadeshybrid {
     cpus = { check_max (8 * task.attempt, 'cpus' ) }
-    memory = { check_max (20.GB * task.attempt, 'memory' ) }
-    time = { check_max (8.h * task.attempt, 'time' ) }
+    memory = { check_max (40.GB * task.attempt, 'memory' ) }
+    time = { check_max (16.h * task.attempt, 'time' ) }
     errorStrategy = { task.exitStatus in [143,137] ? 'retry' : 'ignore' }
   }
   withName: quast {

From 184b75ecac58c15f7ee3785392cceb24fe2aad82 Mon Sep 17 00:00:00 2001
From: d4straub <daniel.straub@uni-tuebingen.de>
Date: Fri, 29 Mar 2019 13:53:38 +0100
Subject: [PATCH 025/105] more adjustments in conf/base.config

---
 conf/base.config | 10 +++++-----
 main.nf          |  1 -
 2 files changed, 5 insertions(+), 6 deletions(-)

diff --git a/conf/base.config b/conf/base.config
index 07e12b79..c70c5bfa 100644
--- a/conf/base.config
+++ b/conf/base.config
@@ -47,9 +47,9 @@ process {
     errorStrategy = { task.exitStatus in [143,137] ? 'retry' : 'ignore' }
   }
   withName: filtlong {
-    cpus = { check_max (4 * task.attempt, 'cpus' ) }
-    memory = { check_max (50.GB * task.attempt, 'memory' ) }
-    time = { check_max (6.h * task.attempt, 'time' ) }
+    cpus = { check_max (8 * task.attempt, 'cpus' ) }
+    memory = { check_max (64.GB * task.attempt, 'memory' ) }
+    time = { check_max (24.h * task.attempt, 'time' ) }
     errorStrategy = { task.exitStatus in [143,137] ? 'retry' : 'ignore' }
   }
   withName: remove_phix {
@@ -68,13 +68,13 @@ process {
     cpus = { check_max (8 * task.attempt, 'cpus' ) }
     memory = { check_max (40.GB * task.attempt, 'memory' ) }
     time = { check_max (16.h * task.attempt, 'time' ) }
-    errorStrategy = { task.exitStatus in [143,137] ? 'retry' : 'ignore' }
+    errorStrategy = { task.exitStatus in [143,137,1] ? 'retry' : 'ignore' }
   }
   withName: spadeshybrid {
     cpus = { check_max (8 * task.attempt, 'cpus' ) }
     memory = { check_max (40.GB * task.attempt, 'memory' ) }
     time = { check_max (16.h * task.attempt, 'time' ) }
-    errorStrategy = { task.exitStatus in [143,137] ? 'retry' : 'ignore' }
+    errorStrategy = { task.exitStatus in [143,137,1] ? 'retry' : 'ignore' }
   }
   withName: quast {
     cpus = { check_max (2 * task.attempt, 'cpus' ) }
diff --git a/main.nf b/main.nf
index b2ec085f..8061f51c 100644
--- a/main.nf
+++ b/main.nf
@@ -149,7 +149,6 @@ params.skip_megahit = false
 params.keep_lambda = false
 params.longreads_min_length = 1000
 params.longreads_keep_percent = 90
-params.filtlong_split = 1000
 params.longreads_length_weight = 10
 params.lambda_reference = "ftp://ftp.ncbi.nlm.nih.gov/genomes/genbank/viral/Escherichia_virus_Lambda/all_assembly_versions/GCA_000840245.1_ViralProj14204/GCA_000840245.1_ViralProj14204_genomic.fna.gz"
 

From 764c714c9d295e95439d42453210d4143ecc6a4d Mon Sep 17 00:00:00 2001
From: d4straub <daniel.straub@uni-tuebingen.de>
Date: Fri, 29 Mar 2019 14:14:07 +0100
Subject: [PATCH 026/105] output busco sequences for each bin to make (manual
 for now) taxonomic classification

---
 main.nf | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/main.nf b/main.nf
index 8061f51c..b895b17d 100644
--- a/main.nf
+++ b/main.nf
@@ -769,6 +769,8 @@ process busco {
     file("short_summary_${assembly}.txt") into (busco_summary_to_multiqc, busco_summary_to_plot)
     val("$assembler-$sample") into busco_assembler_sample_to_plot
     file("${assembly}_busco_log.txt")
+    file("${assembly}_buscos.faa")
+    file("${assembly}_buscos.fna")
 
     script:
     if( workflow.profile.toString().indexOf("conda") == -1) {
@@ -785,6 +787,8 @@ process busco {
             --out ${assembly} \
             >${assembly}_busco_log.txt
         cp run_${assembly}/short_summary_${assembly}.txt short_summary_${assembly}.txt
+        cat run_${assembly}/single_copy_busco_sequences/*faa >${assembly}_buscos.faa 2>/dev/null
+        cat run_${assembly}/single_copy_busco_sequences/*fna >${assembly}_buscos.fna 2>/dev/null
         """
     } else {
         """
@@ -797,6 +801,8 @@ process busco {
             --out ${assembly} \
             >${assembly}_busco_log.txt
         cp run_${assembly}/short_summary_${assembly}.txt short_summary_${assembly}.txt
+        cat run_${assembly}/single_copy_busco_sequences/*faa >${assembly}_buscos.faa 2>/dev/null
+        cat run_${assembly}/single_copy_busco_sequences/*fna >${assembly}_buscos.fna 2>/dev/null
         """
     }
 }

From def9fe70e6bbd8c59ddf8852d426ca0310df3566 Mon Sep 17 00:00:00 2001
From: d4straub <daniel.straub@uni-tuebingen.de>
Date: Tue, 2 Apr 2019 10:00:58 +0200
Subject: [PATCH 027/105] increase filtlong and SPAdes resources

---
 conf/base.config | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/conf/base.config b/conf/base.config
index c70c5bfa..4f2eb68e 100644
--- a/conf/base.config
+++ b/conf/base.config
@@ -46,9 +46,10 @@ process {
     time = { check_max (3.h * task.attempt, 'time' ) }
     errorStrategy = { task.exitStatus in [143,137] ? 'retry' : 'ignore' }
   }
+  //filtlong: exponential increase of memory with attempts
   withName: filtlong {
     cpus = { check_max (8 * task.attempt, 'cpus' ) }
-    memory = { check_max (64.GB * task.attempt, 'memory' ) }
+    memory = { check_max (64.GB * (2**(task.attempt-1)), 'memory' ) }
     time = { check_max (24.h * task.attempt, 'time' ) }
     errorStrategy = { task.exitStatus in [143,137] ? 'retry' : 'ignore' }
   }
@@ -64,15 +65,18 @@ process {
     time = { check_max (8.h * task.attempt, 'time' ) }
     errorStrategy = { task.exitStatus in [143,137] ? 'retry' : 'ignore' }
   }
+  //SPAdes returns error(1) if it runs out of memory (and for other reasons as well...)!
+  //exponential increase of memory with attempts
+  //40 GB memory is sufficient for low complexity Illumina-only metagenome
   withName: spades {
     cpus = { check_max (8 * task.attempt, 'cpus' ) }
-    memory = { check_max (40.GB * task.attempt, 'memory' ) }
+    memory = { check_max (40.GB * (2**(task.attempt-1)), 'memory' ) }
     time = { check_max (16.h * task.attempt, 'time' ) }
     errorStrategy = { task.exitStatus in [143,137,1] ? 'retry' : 'ignore' }
   }
   withName: spadeshybrid {
     cpus = { check_max (8 * task.attempt, 'cpus' ) }
-    memory = { check_max (40.GB * task.attempt, 'memory' ) }
+    memory = { check_max (64.GB * (2**(task.attempt-1)), 'memory' ) }
     time = { check_max (16.h * task.attempt, 'time' ) }
     errorStrategy = { task.exitStatus in [143,137,1] ? 'retry' : 'ignore' }
   }

From 8bddb9a5f0401e3d3ed978e1b8fed61067eb306b Mon Sep 17 00:00:00 2001
From: d4straub <daniel.straub@uni-tuebingen.de>
Date: Tue, 2 Apr 2019 10:12:45 +0200
Subject: [PATCH 028/105] temporarly add binac_smp.config to test assembly with
 >128 GB memory requirement

---
 conf/binac_smp.config | 23 +++++++++++++++++++++++
 nextflow.config       |  3 +++
 2 files changed, 26 insertions(+)
 create mode 100644 conf/binac_smp.config

diff --git a/conf/binac_smp.config b/conf/binac_smp.config
new file mode 100644
index 00000000..9d23ef57
--- /dev/null
+++ b/conf/binac_smp.config
@@ -0,0 +1,23 @@
+//Profile config names for nf-core/configs
+params {
+  config_profile_description = 'BINAC cluster profile for jobs requiring >128 GB memory provided by nf-core/configs.'
+  config_profile_contact = 'just a test right now'
+  config_profile_url = 'https://www.bwhpc-c5.de/wiki/index.php/Category:BwForCluster_BinAC'
+}
+
+singularity {
+  enabled = true
+}
+
+process {
+  beforeScript = 'module load devel/singularity/3.0.1'
+  executor = 'pbs'
+  queue = 'smp'
+}
+
+params {
+  igenomes_base = '/nfsmounts/igenomes'
+  max_memory = 1024.GB
+  max_cpus = 40
+  max_time = 730.h
+}
\ No newline at end of file
diff --git a/nextflow.config b/nextflow.config
index f6fa729e..5e741ca6 100644
--- a/nextflow.config
+++ b/nextflow.config
@@ -50,6 +50,9 @@ profiles {
   test {
     includeConfig 'conf/test.config'
   }
+  binac_smp {
+    includeConfig 'conf/binac_smp.config'
+  }
   none {
     // Don't load any config (for use with custom home configs)
   }

From ed8399aa6135ed53e7ee4b7877932b022d70e2fd Mon Sep 17 00:00:00 2001
From: d4straub <daniel.straub@uni-tuebingen.de>
Date: Tue, 2 Apr 2019 10:19:55 +0200
Subject: [PATCH 029/105] increase filtlong and SPAdes resources very high for
 testing binac_smp.config

---
 conf/base.config | 18 ++++++++++--------
 1 file changed, 10 insertions(+), 8 deletions(-)

diff --git a/conf/base.config b/conf/base.config
index 4f2eb68e..319f210f 100644
--- a/conf/base.config
+++ b/conf/base.config
@@ -46,11 +46,12 @@ process {
     time = { check_max (3.h * task.attempt, 'time' ) }
     errorStrategy = { task.exitStatus in [143,137] ? 'retry' : 'ignore' }
   }
-  //filtlong: exponential increase of memory with attempts
+  //filtlong: exponential increase of memory and time with attempts
+  //64 GB starting memory might be sufficient
   withName: filtlong {
     cpus = { check_max (8 * task.attempt, 'cpus' ) }
-    memory = { check_max (64.GB * (2**(task.attempt-1)), 'memory' ) }
-    time = { check_max (24.h * task.attempt, 'time' ) }
+    memory = { check_max (256.GB * (2**(task.attempt-1)), 'memory' ) }
+    time = { check_max (48.h * (2**(task.attempt-1)), 'time' ) }
     errorStrategy = { task.exitStatus in [143,137] ? 'retry' : 'ignore' }
   }
   withName: remove_phix {
@@ -66,18 +67,19 @@ process {
     errorStrategy = { task.exitStatus in [143,137] ? 'retry' : 'ignore' }
   }
   //SPAdes returns error(1) if it runs out of memory (and for other reasons as well...)!
-  //exponential increase of memory with attempts
-  //40 GB memory is sufficient for low complexity Illumina-only metagenome
+  //exponential increase of memory and time with attempts
+  //spades: 40 GB memory / 16 h is sufficient for low complexity Illumina-only metagenome
   withName: spades {
     cpus = { check_max (8 * task.attempt, 'cpus' ) }
     memory = { check_max (40.GB * (2**(task.attempt-1)), 'memory' ) }
-    time = { check_max (16.h * task.attempt, 'time' ) }
+    time = { check_max (16.h * (2**(task.attempt-1)), 'time' ) }
     errorStrategy = { task.exitStatus in [143,137,1] ? 'retry' : 'ignore' }
   }
+  //spadeshybrid: 64 GB / 16 h starting memory might be sufficient
   withName: spadeshybrid {
     cpus = { check_max (8 * task.attempt, 'cpus' ) }
-    memory = { check_max (64.GB * (2**(task.attempt-1)), 'memory' ) }
-    time = { check_max (16.h * task.attempt, 'time' ) }
+    memory = { check_max (256.GB * (2**(task.attempt-1)), 'memory' ) }
+    time = { check_max (48.h * (2**(task.attempt-1)), 'time' ) }
     errorStrategy = { task.exitStatus in [143,137,1] ? 'retry' : 'ignore' }
   }
   withName: quast {

From 900d0879c19186d7d3d76fb44b842d3dcb54fec2 Mon Sep 17 00:00:00 2001
From: d4straub <daniel.straub@uni-tuebingen.de>
Date: Tue, 2 Apr 2019 11:04:24 +0200
Subject: [PATCH 030/105] fix again process busco to not fail when no buscos
 are found

---
 main.nf | 22 ++++++++++++++++++----
 1 file changed, 18 insertions(+), 4 deletions(-)

diff --git a/main.nf b/main.nf
index b895b17d..a896b87c 100644
--- a/main.nf
+++ b/main.nf
@@ -787,8 +787,15 @@ process busco {
             --out ${assembly} \
             >${assembly}_busco_log.txt
         cp run_${assembly}/short_summary_${assembly}.txt short_summary_${assembly}.txt
-        cat run_${assembly}/single_copy_busco_sequences/*faa >${assembly}_buscos.faa 2>/dev/null
-        cat run_${assembly}/single_copy_busco_sequences/*fna >${assembly}_buscos.fna 2>/dev/null
+
+        for f in run_${assembly}/single_copy_busco_sequences/*faa; do 
+            [ -e "\$f" ] && cat run_${assembly}/single_copy_busco_sequences/*faa >${assembly}_buscos.faa || touch ${assembly}_buscos.faa
+            break
+        done
+        for f in run_${assembly}/single_copy_busco_sequences/*fna; do 
+            [ -e "\$f" ] && cat run_${assembly}/single_copy_busco_sequences/*fna >${assembly}_buscos.fna || touch ${assembly}_buscos.fna
+            break
+        done
         """
     } else {
         """
@@ -801,8 +808,15 @@ process busco {
             --out ${assembly} \
             >${assembly}_busco_log.txt
         cp run_${assembly}/short_summary_${assembly}.txt short_summary_${assembly}.txt
-        cat run_${assembly}/single_copy_busco_sequences/*faa >${assembly}_buscos.faa 2>/dev/null
-        cat run_${assembly}/single_copy_busco_sequences/*fna >${assembly}_buscos.fna 2>/dev/null
+
+        for f in run_${assembly}/single_copy_busco_sequences/*faa; do 
+            [ -e "\$f" ] && cat run_${assembly}/single_copy_busco_sequences/*faa >${assembly}_buscos.faa || touch ${assembly}_buscos.faa
+            break
+        done
+        for f in run_${assembly}/single_copy_busco_sequences/*fna; do 
+            [ -e "\$f" ] && cat run_${assembly}/single_copy_busco_sequences/*fna >${assembly}_buscos.fna || touch ${assembly}_buscos.fna
+            break
+        done
         """
     }
 }

From ffecaa2267fd412634fa3ab65bf2f64e623d8cff Mon Sep 17 00:00:00 2001
From: d4straub <daniel.straub@uni-tuebingen.de>
Date: Tue, 2 Apr 2019 16:02:09 +0200
Subject: [PATCH 031/105] output also tables with busco values

---
 bin/summary_busco.py | 40 ++++++++++++++++++++++++++++++++++++++++
 main.nf              | 12 +++++++++++-
 2 files changed, 51 insertions(+), 1 deletion(-)
 create mode 100755 bin/summary_busco.py

diff --git a/bin/summary_busco.py b/bin/summary_busco.py
new file mode 100755
index 00000000..392ec6f1
--- /dev/null
+++ b/bin/summary_busco.py
@@ -0,0 +1,40 @@
+#!/usr/bin/env python
+
+#USAGE: ./summary.busco.py *.txt
+
+import re
+from sys import argv
+
+#"# Summarized benchmarking in BUSCO notation for file MEGAHIT-testset1.contigs.fa"
+#"	C:0.0%[S:0.0%,D:0.0%],F:0.0%,M:100.0%,n:148"
+
+regexes = {
+    'nf-core/mag': ['v_pipeline.txt', r"(\S+)"],
+    'Nextflow': ['v_nextflow.txt', r"(\S+)"],
+    'MultiQC': ['v_multiqc.txt', r"multiqc, version (\S+)"],
+    'fastqc': ['v_fastqc.txt', r"FastQC v(\S+)"],
+    'fastp': ['v_fastp.txt', r"fastp (\S+)"],
+    'megahit': ['v_megahit.txt', r"MEGAHIT v(\S+)"],
+    'metabat': ['v_metabat.txt', r"version (\S+)"],
+    'NanoPlot': ['v_nanoplot.txt', r"NanoPlot (\S+)"],
+    'Filtlong': ['v_filtlong.txt', r"Filtlong v(\S+)"],
+    'porechop': ['v_porechop.txt', r"(\S+)"],
+    'NanoLyse': ['v_nanolyse.txt', r"NanoLyse (\S+)"],
+    'SPAdes': ['v_spades.txt', r"SPAdes v(\S+)"],
+    'BUSCO': ['v_busco.txt', r"BUSCO (\S+)"],
+}
+
+regexes = [r"# Summarized benchmarking in BUSCO notation for file (\S+)", r"	C:(\S+)%\[S:", r"%\[S:(\S+)%,D:", r"%,D:(\S+)%\],F:", r"%\],F:(\S+)%,M:", r"%,M:(\S+)%,n:", r"%,n:(\S+)"]
+columns = ["GenomeBin","%Complete","%Complete and single-copy","%Complete and duplicated","%Fragmented","%Missing","Total number"]
+
+# Search each file using its regex
+print("\t".join(columns))
+for FILE in argv[1:]:
+    with open(FILE) as x:
+        results = []
+        TEXT = x.read()
+        for REGEX in regexes:
+            match = re.search(REGEX, TEXT)
+            if match:
+                results.append( match.group(1) )
+        print("\t".join(results))
diff --git a/main.nf b/main.nf
index a896b87c..d1901eb5 100644
--- a/main.nf
+++ b/main.nf
@@ -831,11 +831,13 @@ process busco_plot {
 
     output:
     file("*busco_figure.png")
-    file("*busco_figure.R")
+    file("raw/*busco_figure.R")
+    file("*busco_summary.txt")
 
     script:
     def assemblersampleunique = assemblersample.unique()
     """
+    #for each assembler and sample:
     assemblersample=\$(echo \"$assemblersampleunique\" | sed 's/[][]//g')
     IFS=', ' read -r -a assemblersamples <<< \"\$assemblersample\"
 
@@ -843,9 +845,17 @@ process busco_plot {
         mkdir \${name}
         cp short_summary_\${name}* \${name}/
         generate_plot.py --working_directory \${name}
+        
         cp \${name}/busco_figure.png \${name}-busco_figure.png
         cp \${name}/busco_figure.R \${name}-busco_figure.R
+
+        summary_busco.py \${name}/short_summary_*.txt >\${name}-busco_summary.txt
     done
+
+    mkdir raw
+    cp *-busco_figure.R raw/
+
+    summary_busco.py short_summary_*.txt >busco_summary.txt
     """
 }
 

From a7bf1cf6db927a68c609bc9c685961af62a4fa7b Mon Sep 17 00:00:00 2001
From: d4straub <daniel.straub@uni-tuebingen.de>
Date: Wed, 3 Apr 2019 11:06:03 +0200
Subject: [PATCH 032/105] use metaquast instead of quast and also evaluate
 genomic bins with metaquast

---
 main.nf | 33 ++++++++++++++++++++++++++++++++-
 1 file changed, 32 insertions(+), 1 deletion(-)

diff --git a/main.nf b/main.nf
index d1901eb5..2f0046d6 100644
--- a/main.nf
+++ b/main.nf
@@ -63,6 +63,7 @@ def helpMessage() {
       --skip_spades                 Skip Illumina-only SPAdes assembly
       --skip_spadeshybrid           Skip SPAdes hybrid assembly (only available when using manifest input)
       --skip_megahit                Skip MEGAHIT assembly
+      --skip_quast                  Skip metaQUAST
 
     Binning options:
       --skip_binning                Skip metagenome binning
@@ -142,6 +143,7 @@ params.min_contig_size = 1500
 params.skip_spades = false
 params.skip_spadeshybrid = false
 params.skip_megahit = false
+params.skip_quast = false
 
 /*
  * long read preprocessing options
@@ -672,9 +674,12 @@ process quast {
     output:
     file("${sample}_QC/*") into quast_results
 
+    when:
+    !params.skip_quast
+
     script:
     """
-    quast.py --threads "${task.cpus}" -l "${assembler}-${sample}" "${assembly}" -o "${sample}_QC"
+    metaquast.py --threads "${task.cpus}" --rna-finding -l "${assembler}-${sample}" "${assembly}" -o "${sample}_QC"
     """
 }
 
@@ -694,6 +699,7 @@ process metabat {
 
     output:
     set val(assembler), val(sample), file("MetaBAT2/*") into metabat_bins mode flatten
+    set val(assembler), val(sample), file("MetaBAT2/*"), file(reads) into metabat_bins_quast_bins
 
     when:
     !params.skip_binning
@@ -859,6 +865,31 @@ process busco_plot {
     """
 }
 
+process quast_bins {
+    tag "$assembler-$sample"
+    publishDir "${params.outdir}/GenomeBinning/QC/", mode: 'copy'
+
+    input:
+    set val(assembler), val(sample), file(assembly), file(reads) from metabat_bins_quast_bins
+
+    output:
+    file("QUAST/${assembler}/${sample}/*")
+
+    when:
+    !params.skip_quast
+
+    script:
+    if ( !params.singleEnd ) {
+        """
+        metaquast.py --threads "${task.cpus}" --pe1 "${reads[0]}" --pe2 "${reads[1]}" --rna-finding --gene-finding -l "${assembler}-${sample}" "${assembly}" -o "QUAST/${assembler}/${sample}"
+        """
+    } else {
+        """
+        metaquast.py --threads "${task.cpus}" --single "${reads}" --rna-finding --gene-finding -l "${assembler}-${sample}" "${assembly}" -o "QUAST/${assembler}/${sample}"
+        """        
+    }
+}
+
 
 /*
  * STEP 4 - MultiQC

From 5e904f5f7a316c9086481fefe4cbe60f48519642 Mon Sep 17 00:00:00 2001
From: d4straub <daniel.straub@uni-tuebingen.de>
Date: Wed, 3 Apr 2019 11:21:37 +0200
Subject: [PATCH 033/105] prevent fastq.gz from publishing

---
 main.nf | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/main.nf b/main.nf
index 2f0046d6..7a3e166c 100644
--- a/main.nf
+++ b/main.nf
@@ -691,7 +691,7 @@ process quast {
 process metabat {
     tag "$assembler-$sample"
     publishDir "${params.outdir}/", mode: 'copy',
-        saveAs: {filename -> filename.indexOf(".bam") == -1 ? "GenomeBinning/$filename" : null}
+        saveAs: {filename -> (filename.indexOf(".bam") == -1 && filename.indexOf(".fastq.gz") == -1) ? "GenomeBinning/$filename" : null}
 
     input:
     set val(assembler), val(sample), file(assembly), file(reads) from assembly_spades_to_metabat.mix(assembly_megahit_to_metabat).mix(assembly_spadeshybrid_to_metabat)

From 5b925b16f728ea65cb6fbbbc534b623072abd5a8 Mon Sep 17 00:00:00 2001
From: d4straub <daniel.straub@uni-tuebingen.de>
Date: Mon, 8 Apr 2019 09:10:37 +0200
Subject: [PATCH 034/105] increase spades/hybridspades memory allocation once
 more

---
 conf/base.config | 15 ++++++++-------
 1 file changed, 8 insertions(+), 7 deletions(-)

diff --git a/conf/base.config b/conf/base.config
index 319f210f..33fc4af6 100644
--- a/conf/base.config
+++ b/conf/base.config
@@ -67,19 +67,20 @@ process {
     errorStrategy = { task.exitStatus in [143,137] ? 'retry' : 'ignore' }
   }
   //SPAdes returns error(1) if it runs out of memory (and for other reasons as well...)!
-  //exponential increase of memory and time with attempts
+  //exponential increase of memory with attempts
   //spades: 40 GB memory / 16 h is sufficient for low complexity Illumina-only metagenome
+  //320 GB is not sufficient for complex dataset, also >50h!
   withName: spades {
-    cpus = { check_max (8 * task.attempt, 'cpus' ) }
-    memory = { check_max (40.GB * (2**(task.attempt-1)), 'memory' ) }
-    time = { check_max (16.h * (2**(task.attempt-1)), 'time' ) }
+    cpus = { check_max (20 * task.attempt, 'cpus' ) }
+    memory = { check_max (512.GB * (2**(task.attempt-1)), 'memory' ) }
+    time = { check_max (120.h * task.attempt, 'time' ) }
     errorStrategy = { task.exitStatus in [143,137,1] ? 'retry' : 'ignore' }
   }
   //spadeshybrid: 64 GB / 16 h starting memory might be sufficient
   withName: spadeshybrid {
-    cpus = { check_max (8 * task.attempt, 'cpus' ) }
-    memory = { check_max (256.GB * (2**(task.attempt-1)), 'memory' ) }
-    time = { check_max (48.h * (2**(task.attempt-1)), 'time' ) }
+    cpus = { check_max (20 * task.attempt, 'cpus' ) }
+    memory = { check_max (512.GB * (2**(task.attempt-1)), 'memory' ) }
+    time = { check_max (120.h * task.attempt, 'time' ) }
     errorStrategy = { task.exitStatus in [143,137,1] ? 'retry' : 'ignore' }
   }
   withName: quast {

From 875ab0de7dff5e3bab64cb2a185f43831a1b2dc2 Mon Sep 17 00:00:00 2001
From: Travis CI User <travis@example.org>
Date: Tue, 9 Apr 2019 11:44:43 +0000
Subject: [PATCH 035/105] Update nf-core pipeline template.

---
 .github/CONTRIBUTING.md                       |  34 ++-
 .github/ISSUE_TEMPLATE/bug_report.md          |  31 +++
 .github/ISSUE_TEMPLATE/feature_request.md     |  16 ++
 ...ll_request.md => PULL_REQUEST_TEMPLATE.md} |   2 +-
 .github/bug_report.md                         |  29 --
 .github/feature_request.md                    |  16 --
 .github/markdownlint.yml                      |   9 +
 .gitignore                                    |   1 +
 .travis.yml                                   |  34 +--
 CHANGELOG.md                                  |   3 +-
 CODE_OF_CONDUCT.md                            |   2 +-
 Dockerfile                                    |   5 +-
 LICENSE                                       |   2 +
 README.md                                     |  29 +-
 Singularity                                   |  17 --
 assets/email_template.txt                     |  17 --
 {conf => assets}/multiqc_config.yaml          |   2 +
 assets/sendmail_template.txt                  |  31 ++-
 .../scrape_software_versions.cpython-36.pyc   | Bin 1331 -> 1390 bytes
 bin/scrape_software_versions.py               |  15 +-
 conf/awsbatch.config                          |  11 +-
 conf/base.config                              |  13 +-
 conf/igenomes.config                          |   1 +
 conf/test.config                              |   7 +-
 docs/README.md                                |  13 +-
 docs/configuration/adding_your_own.md         |  86 ------
 docs/configuration/reference_genomes.md       |  49 ----
 docs/installation.md                          | 115 --------
 docs/output.md                                |   3 +-
 docs/troubleshooting.md                       |  28 --
 docs/usage.md                                 | 167 ++++++-----
 environment.yml                               |  11 +-
 main.nf                                       | 263 ++++++++++++------
 nextflow.config                               |  87 +++---
 34 files changed, 532 insertions(+), 617 deletions(-)
 create mode 100644 .github/ISSUE_TEMPLATE/bug_report.md
 create mode 100644 .github/ISSUE_TEMPLATE/feature_request.md
 rename .github/{pull_request.md => PULL_REQUEST_TEMPLATE.md} (96%)
 delete mode 100644 .github/bug_report.md
 delete mode 100644 .github/feature_request.md
 create mode 100644 .github/markdownlint.yml
 delete mode 100644 Singularity
 rename {conf => assets}/multiqc_config.yaml (95%)
 delete mode 100644 docs/configuration/adding_your_own.md
 delete mode 100644 docs/configuration/reference_genomes.md
 delete mode 100644 docs/installation.md
 delete mode 100644 docs/troubleshooting.md

diff --git a/.github/CONTRIBUTING.md b/.github/CONTRIBUTING.md
index 2ef9f8f1..c3171ba0 100644
--- a/.github/CONTRIBUTING.md
+++ b/.github/CONTRIBUTING.md
@@ -1,12 +1,14 @@
-# nf-core/mag Contributing Guidelines
+# nf-core/mag: Contributing Guidelines
 
 Hi there! Many thanks for taking an interest in improving nf-core/mag.
 
-We try to manage the required tasks for nf-core/mag using GitHub issues, you probably came to this page when creating one. Please use the prefilled template to save time.
+We try to manage the required tasks for nf-core/mag using GitHub issues, you probably came to this page when creating one. Please use the pre-filled template to save time.
 
 However, don't be put off by this template - other more general issues and suggestions are welcome! Contributions to the code are even more welcome ;)
 
-> If you need help using nf-core/mag then the best place to go is the Gitter chatroom where you can ask us questions directly: https://gitter.im/nf-core/Lobby
+> If you need help using or modifying nf-core/mag then the best place to ask is on the pipeline channel on [Slack](https://nf-core-invite.herokuapp.com/).
+
+
 
 ## Contribution workflow
 If you'd like to write some code for nf-core/mag, the standard workflow
@@ -15,11 +17,31 @@ is as follows:
 1. Check that there isn't already an issue about your idea in the
    [nf-core/mag issues](https://github.com/nf-core/mag/issues) to avoid
    duplicating work.
-    * Feel free to add a new issue here for the same reason.
+    * If there isn't one already, please create one so that others know you're working on this
 2. Fork the [nf-core/mag repository](https://github.com/nf-core/mag) to your GitHub account
 3. Make the necessary changes / additions within your forked repository
-4. Submit a Pull Request against the master branch and wait for the code to be reviewed and merged.
+4. Submit a Pull Request against the `dev` branch and wait for the code to be reviewed and merged.
 
 If you're not used to this workflow with git, you can start with some [basic docs from GitHub](https://help.github.com/articles/fork-a-repo/) or even their [excellent interactive tutorial](https://try.github.io/).
 
-For further information/help, please consult the [nf-core/mag documentation](https://github.com/nf-core/mag#documentation) and don't hesitate to get in touch on [Gitter](https://gitter.im/nf-core/Lobby)
+
+## Tests
+When you create a pull request with changes, [Travis CI](https://travis-ci.org/) will run automatic tests.
+Typically, pull-requests are only fully reviewed when these tests are passing, though of course we can help out before then.
+
+There are typically two types of tests that run:
+
+### Lint Tests
+The nf-core has a [set of guidelines](http://nf-co.re/guidelines) which all pipelines must adhere to.
+To enforce these and ensure that all pipelines stay in sync, we have developed a helper tool which runs checks on the pipeline code. This is in the [nf-core/tools repository](https://github.com/nf-core/tools) and once installed can be run locally with the `nf-core lint <pipeline-directory>` command.
+
+If any failures or warnings are encountered, please follow the listed URL for more documentation.
+
+### Pipeline Tests
+Each nf-core pipeline should be set up with a minimal set of test-data.
+Travis CI then runs the pipeline on this data to ensure that it exists successfully.
+If there are any failures then the automated tests fail.
+These tests are run both with the latest available version of Nextflow and also the minimum required version that is stated in the pipeline code.
+
+## Getting help
+For further information/help, please consult the [nf-core/mag documentation](https://github.com/nf-core/mag#documentation) and don't hesitate to get in touch on the pipeline channel on [Slack](https://nf-core-invite.herokuapp.com/).
diff --git a/.github/ISSUE_TEMPLATE/bug_report.md b/.github/ISSUE_TEMPLATE/bug_report.md
new file mode 100644
index 00000000..789853ec
--- /dev/null
+++ b/.github/ISSUE_TEMPLATE/bug_report.md
@@ -0,0 +1,31 @@
+Hi there!
+
+Thanks for telling us about a problem with the pipeline. Please delete this text and anything that's not relevant from the template below:
+
+#### Describe the bug
+A clear and concise description of what the bug is.
+
+#### Steps to reproduce
+Steps to reproduce the behaviour:
+1. Command line: `nextflow run ...`
+2. See error: _Please provide your error message_
+
+#### Expected behaviour
+A clear and concise description of what you expected to happen.
+
+#### System:
+ - Hardware: [e.g. HPC, Desktop, Cloud...]
+ - Executor: [e.g. slurm, local, awsbatch...]
+ - OS: [e.g. CentOS Linux, macOS, Linux Mint...]
+ - Version [e.g. 7, 10.13.6, 18.3...]
+
+#### Nextflow Installation:
+ - Version: [e.g. 0.31.0]
+
+#### Container engine:
+ - Engine: [e.g. Conda, Docker or Singularity]
+ - version: [e.g. 1.0.0]
+ - Image tag: [e.g. nfcore/mag:1.0.0]
+
+#### Additional context
+Add any other context about the problem here.
diff --git a/.github/ISSUE_TEMPLATE/feature_request.md b/.github/ISSUE_TEMPLATE/feature_request.md
new file mode 100644
index 00000000..1f025b77
--- /dev/null
+++ b/.github/ISSUE_TEMPLATE/feature_request.md
@@ -0,0 +1,16 @@
+Hi there!
+
+Thanks for suggesting a new feature for the pipeline! Please delete this text and anything that's not relevant from the template below:
+
+#### Is your feature request related to a problem? Please describe.
+A clear and concise description of what the problem is.
+Ex. I'm always frustrated when [...]
+
+#### Describe the solution you'd like
+A clear and concise description of what you want to happen.
+
+#### Describe alternatives you've considered
+A clear and concise description of any alternative solutions or features you've considered.
+
+#### Additional context
+Add any other context about the feature request here.
diff --git a/.github/pull_request.md b/.github/PULL_REQUEST_TEMPLATE.md
similarity index 96%
rename from .github/pull_request.md
rename to .github/PULL_REQUEST_TEMPLATE.md
index 60b03628..b2bce74e 100644
--- a/.github/pull_request.md
+++ b/.github/PULL_REQUEST_TEMPLATE.md
@@ -5,7 +5,7 @@ Please fill in the appropriate checklist below (delete whatever is not relevant)
 ## PR checklist
  - [ ] This comment contains a description of changes (with reason)
  - [ ] If you've fixed a bug or added code that should be tested, add tests!
- - [ ] If necessary, also make a PR on the [nf-core/mag branch on the nf-core/test-datasets repo]( https://github.com/nf-core/test-datasets/pull/newnf-core/mag)
+ - [ ] If necessary, also make a PR on the [nf-core/mag branch on the nf-core/test-datasets repo]( https://github.com/nf-core/test-datasets/pull/new/nf-core/mag)
  - [ ] Ensure the test suite passes (`nextflow run . -profile test,docker`).
  - [ ] Make sure your code lints (`nf-core lint .`).
  - [ ] Documentation in `docs` is updated
diff --git a/.github/bug_report.md b/.github/bug_report.md
deleted file mode 100644
index d0405d12..00000000
--- a/.github/bug_report.md
+++ /dev/null
@@ -1,29 +0,0 @@
-**Describe the bug**
-A clear and concise description of what the bug is.
-
-**To Reproduce**
-Steps to reproduce the behavior:
-1. Command line '...'
-2. See error **Please provide your error message**
-
-**Expected behavior**
-A clear and concise description of what you expected to happen.
-
-**System (please complete the following information):**
- - Hardware: [e.g. HPC, Desktop, Cloud...]
- - Executor: [e.g. slurm, local, awsbatch...]
- - OS: [e.g. CentOS Linux, macOS, Linux Mint...]
- - Version [e.g. 7, 10.13.6, 18.3...]
-
-**Nextflow (please complete the following information):**
- - Version: [e.g. 0.31.0]
-
-**Container engine (please complete the following information):**
- - Engine: [e.g. Conda, Docker or Singularity]
- - version: [e.g. 1.0.0]
-
-**Container (please complete the following information):**
- - tag: [e.g. 1.0.0]
-
-**Additional context**
-Add any other context about the problem here.
diff --git a/.github/feature_request.md b/.github/feature_request.md
deleted file mode 100644
index 3616d75c..00000000
--- a/.github/feature_request.md
+++ /dev/null
@@ -1,16 +0,0 @@
-**Is your feature request related to a problem? Please describe.**
-
-A clear and concise description of what the problem is.
-Ex. I'm always frustrated when [...]
-
-**Describe the solution you'd like**
-
-A clear and concise description of what you want to happen.
-
-**Describe alternatives you've considered**
-
-A clear and concise description of any alternative solutions or features you've considered.
-
-**Additional context**
-
-Add any other context about the feature request here.
diff --git a/.github/markdownlint.yml b/.github/markdownlint.yml
new file mode 100644
index 00000000..e052a635
--- /dev/null
+++ b/.github/markdownlint.yml
@@ -0,0 +1,9 @@
+# Markdownlint configuration file
+default: true,
+line-length: false
+no-multiple-blanks: 0
+blanks-around-headers: false
+blanks-around-lists: false
+header-increment: false
+no-duplicate-header:
+    siblings_only: true
diff --git a/.gitignore b/.gitignore
index 46f69e41..5b54e3e6 100644
--- a/.gitignore
+++ b/.gitignore
@@ -4,3 +4,4 @@ data/
 results/
 .DS_Store
 tests/test_data
+*.pyc
diff --git a/.travis.yml b/.travis.yml
index e4c3540c..490c575e 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -1,42 +1,42 @@
 sudo: required
-language: java
+language: python
 jdk: openjdk8
-services:
-  - docker
-python:
-  - "2.7"
+services: docker
+python: '3.6'
 cache: pip
 matrix:
   fast_finish: true
 
 before_install:
-  # PRs made to 'master' branch should always orginate from another repo or the 'dev' branch
+  # PRs to master are only ok if coming from dev branch
   - '[ $TRAVIS_PULL_REQUEST = "false" ] || [ $TRAVIS_BRANCH != "master" ] || ([ $TRAVIS_PULL_REQUEST_SLUG = $TRAVIS_REPO_SLUG ] && [ $TRAVIS_PULL_REQUEST_BRANCH = "dev" ])'
   # Pull the docker image first so the test doesn't wait for this
-  - docker pull nfcore/mag
+  - docker pull nfcore/mag:dev
   # Fake the tag locally so that the pipeline runs properly
-  - docker tag nfcore/mag nfcore/mag:latest
+  # Looks weird when this is :dev to :dev, but makes sense when testing code for a release (:dev to :1.0.1)
+  - docker tag nfcore/mag:dev nfcore/mag:dev
 
 install:
   # Install Nextflow
-  - mkdir /tmp/nextflow
-  - cd /tmp/nextflow
+  - mkdir /tmp/nextflow && cd /tmp/nextflow
   - wget -qO- get.nextflow.io | bash
   - sudo ln -s /tmp/nextflow/nextflow /usr/local/bin/nextflow
   # Install nf-core/tools
-  - git clone https://github.com/nf-core/tools.git /tmp/nf-core-tools
-  - cd /tmp/nf-core-tools
-  - pip install --user -e .
+  - pip install --upgrade pip
+  - pip install nf-core
   # Reset
-  - mkdir ${TRAVIS_BUILD_DIR}/tests
-  - cd ${TRAVIS_BUILD_DIR}/tests
+  - mkdir ${TRAVIS_BUILD_DIR}/tests && cd ${TRAVIS_BUILD_DIR}/tests
+  # Install markdownlint-cli
+  - sudo apt-get install npm && npm install -g markdownlint-cli
 
 env:
-  - NXF_VER=0.30.0 # Specify a minimum NF version that should be tested and work
-  - NXF_VER='' # Plus: get the latest NF version and check, that it works
+  - NXF_VER='0.32.0' # Specify a minimum NF version that should be tested and work
+  - NXF_VER='' # Plus: get the latest NF version and check that it works
 
 script:
   # Lint the pipeline code
   - nf-core lint ${TRAVIS_BUILD_DIR}
+  # Lint the documentation
+  - markdownlint ${TRAVIS_BUILD_DIR} -c ${TRAVIS_BUILD_DIR}/.github/markdownlint.yml
   # Run the pipeline with the test profile
   - nextflow run ${TRAVIS_BUILD_DIR} -profile test,docker
diff --git a/CHANGELOG.md b/CHANGELOG.md
index e442f459..a219a261 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,3 +1,4 @@
+# nf-core/mag: Changelog
 
-## nf-core/mag version 1.0dev - <date>
+## v1.0.0 - [date]
 Initial release of nf-core/mag, created with the [nf-core](http://nf-co.re/) template.
diff --git a/CODE_OF_CONDUCT.md b/CODE_OF_CONDUCT.md
index 21096193..09226d0d 100644
--- a/CODE_OF_CONDUCT.md
+++ b/CODE_OF_CONDUCT.md
@@ -34,7 +34,7 @@ This Code of Conduct applies both within project spaces and in public spaces whe
 
 ## Enforcement
 
-Instances of abusive, harassing, or otherwise unacceptable behavior may be reported by contacting the project team on the [Gitter channel](https://gitter.im/nf-core/Lobby). The project team will review and investigate all complaints, and will respond in a way that it deems appropriate to the circumstances. The project team is obligated to maintain confidentiality with regard to the reporter of an incident. Further details of specific enforcement policies may be posted separately.
+Instances of abusive, harassing, or otherwise unacceptable behavior may be reported by contacting the project team on [Slack](https://nf-core-invite.herokuapp.com/). The project team will review and investigate all complaints, and will respond in a way that it deems appropriate to the circumstances. The project team is obligated to maintain confidentiality with regard to the reporter of an incident. Further details of specific enforcement policies may be posted separately.
 
 Project maintainers who do not follow or enforce the Code of Conduct in good faith may face temporary or permanent repercussions as determined by other members of the project's leadership.
 
diff --git a/Dockerfile b/Dockerfile
index e3391a9d..cdb55dfc 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -1,6 +1,7 @@
 FROM nfcore/base
-LABEL description="Docker image containing all requirements for nf-core/mag pipeline"
+LABEL authors="No author provided" \
+      description="Docker image containing all requirements for nf-core/mag pipeline"
 
 COPY environment.yml /
 RUN conda env create -f /environment.yml && conda clean -a
-ENV PATH /opt/conda/envs/nf-core-mag-1.0dev/bin:$PATH
+ENV PATH /opt/conda/envs/nf-core-mag-1.0.0/bin:$PATH
diff --git a/LICENSE b/LICENSE
index 9cf10627..fafd3dbf 100644
--- a/LICENSE
+++ b/LICENSE
@@ -1,5 +1,7 @@
 MIT License
 
+Copyright (c) No author provided
+
 Permission is hereby granted, free of charge, to any person obtaining a copy
 of this software and associated documentation files (the "Software"), to deal
 in the Software without restriction, including without limitation the rights
diff --git a/README.md b/README.md
index 5903eed5..15863270 100644
--- a/README.md
+++ b/README.md
@@ -1,25 +1,30 @@
 # nf-core/mag
-**Assembly, binning and annotation of metagenomes**
 
-[![Build Status](https://travis-ci.org/nf-core/mag.svg?branch=master)](https://travis-ci.org/nf-core/mag)
-[![Nextflow](https://img.shields.io/badge/nextflow-%E2%89%A50.30.0-brightgreen.svg)](https://www.nextflow.io/)
+**Assembly, binning and annotation of metagenomes**.
+
+[![Build Status](https://travis-ci.com/nf-core/mag.svg?branch=master)](https://travis-ci.com/nf-core/mag)
+[![Nextflow](https://img.shields.io/badge/nextflow-%E2%89%A50.32.0-brightgreen.svg)](https://www.nextflow.io/)
 
 [![install with bioconda](https://img.shields.io/badge/install%20with-bioconda-brightgreen.svg)](http://bioconda.github.io/)
 [![Docker](https://img.shields.io/docker/automated/nfcore/mag.svg)](https://hub.docker.com/r/nfcore/mag)
-![Singularity Container available](
-https://img.shields.io/badge/singularity-available-7E4C74.svg)
 
-### Introduction
-The pipeline is built using [Nextflow](https://www.nextflow.io), a workflow tool to run tasks across multiple compute infrastructures in a very portable manner. It comes with docker / singularity containers making installation trivial and results highly reproducible.
+## Introduction
+The pipeline is built using [Nextflow](https://www.nextflow.io), a workflow tool to run tasks across multiple compute infrastructures in a very portable manner. It comes with docker containers making installation trivial and results highly reproducible.
 
 
-### Documentation
+## Documentation
 The nf-core/mag pipeline comes with documentation about the pipeline, found in the `docs/` directory:
 
-1. [Installation](docs/installation.md)
+1. [Installation](https://nf-co.re/usage/installation)
 2. Pipeline configuration
-    * [Local installation](docs/configuration/local.md)
-    * [Adding your own system](docs/configuration/adding_your_own.md)
+    * [Local installation](https://nf-co.re/usage/local_installation)
+    * [Adding your own system config](https://nf-co.re/usage/adding_own_config)
+    * [Reference genomes](https://nf-co.re/usage/reference_genomes)
 3. [Running the pipeline](docs/usage.md)
 4. [Output and how to interpret the results](docs/output.md)
-5. [Troubleshooting](docs/troubleshooting.md)
+5. [Troubleshooting](https://nf-co.re/usage/troubleshooting)
+
+<!-- TODO nf-core: Add a brief overview of what the pipeline does and how it works -->
+
+## Credits
+nf-core/mag was originally written by No author provided.
diff --git a/Singularity b/Singularity
deleted file mode 100644
index 0fdc4c00..00000000
--- a/Singularity
+++ /dev/null
@@ -1,17 +0,0 @@
-From:nfcore/base
-Bootstrap:docker
-
-%labels
-    DESCRIPTION Singularity image containing all requirements for nf-core/mag pipeline
-    VERSION 1.0dev
-
-%environment
-    PATH=/opt/conda/envs/nf-core-mag-1.0dev/bin:$PATH
-    export PATH
-
-%files
-    environment.yml /
-
-%post
-    /opt/conda/bin/conda env create -f /environment.yml
-    /opt/conda/bin/conda clean -a
diff --git a/assets/email_template.txt b/assets/email_template.txt
index 5c47bcb3..c99c11e1 100644
--- a/assets/email_template.txt
+++ b/assets/email_template.txt
@@ -17,23 +17,6 @@ ${errorReport}
 } %>
 
 
-<% if (!success){
-    out << """####################################################
-## nf-core/mag execution completed unsuccessfully! ##
-####################################################
-The exit status of the task that caused the workflow execution to fail was: $exitStatus.
-The full error message was:
-
-${errorReport}
-"""
-} else {
-    out << "## nf-core/mag execution completed successfully! ##"
-}
-%>
-
-
-
-
 The workflow was completed at $dateComplete (duration: $duration)
 
 The command used to launch the workflow was as follows:
diff --git a/conf/multiqc_config.yaml b/assets/multiqc_config.yaml
similarity index 95%
rename from conf/multiqc_config.yaml
rename to assets/multiqc_config.yaml
index 276fe4ae..c3cab132 100644
--- a/conf/multiqc_config.yaml
+++ b/assets/multiqc_config.yaml
@@ -5,3 +5,5 @@ report_comment: >
 report_section_order:
     nf-core/mag-software-versions:
         order: -1000
+
+export_plots: true
diff --git a/assets/sendmail_template.txt b/assets/sendmail_template.txt
index fd1cd739..2d671220 100644
--- a/assets/sendmail_template.txt
+++ b/assets/sendmail_template.txt
@@ -1,11 +1,36 @@
 To: $email
 Subject: $subject
 Mime-Version: 1.0
-Content-Type: multipart/related;boundary="nfmimeboundary"
+Content-Type: multipart/related;boundary="nfcoremimeboundary"
 
---nfmimeboundary
+--nfcoremimeboundary
 Content-Type: text/html; charset=utf-8
 
 $email_html
 
---nfmimeboundary--
+<%
+if (mqcFile){
+def mqcFileObj = new File("$mqcFile")
+if (mqcFileObj.length() < mqcMaxSize){
+out << """
+--nfcoremimeboundary
+Content-Type: text/html; name=\"multiqc_report\"
+Content-Transfer-Encoding: base64
+Content-ID: <mqcreport>
+Content-Disposition: attachment; filename=\"${mqcFileObj.getName()}\"
+
+${mqcFileObj.
+  bytes.
+  encodeBase64().
+  toString().
+  tokenize( '\n' )*.
+  toList()*.
+  collate( 76 )*.
+  collect { it.join() }.
+  flatten().
+  join( '\n' )}
+"""
+}}
+%>
+
+--nfcoremimeboundary--
diff --git a/bin/__pycache__/scrape_software_versions.cpython-36.pyc b/bin/__pycache__/scrape_software_versions.cpython-36.pyc
index b7b7d01a01f12bf1e4ccb475c30d863ce6c40285..ebd68ae9b27aa4e0c5decdc5cba498051cb7fd8e 100644
GIT binary patch
delta 371
zcmdnY^^Qx$n3tDJwslQR3>yQ(V+JI^17te@ad8CWM3o~_6;i2eE)20kQ5>mksT`6F
z#V0_tEkogniO+NcKZG-+3Z;lPGcq!ya;1o+h&MC!8$=0LXr!`bu{SdSHOj;=MR7!N
zrV3|?q)225q;jNkHZy_LPhc$63TFssNMR0S2x5o;(!mUxlABc+RTzc27#SG2GE=M+
z)Qj`eO3D+9QsXE0GX06xR)7MVloGq@S{r>JWs{O(XH%S*TL2aSGl1eLc2%4pwLoT$
zU6m-@n6lKO;>`TKV!h<zvRjPhRczI@oYl2lnxd1pGPfAsVobZmT3(b{lDd+iNE#Rj
zVB%M$eo1bDenDn|Zf0I_Nn%cpZhm&Tc~Wt1#pEe0`CJx2g*=Qrj2tYJd0FK|xEMJY
TxtN(a*cgQvMHo4ld4MzkmJ?nk

delta 296
zcmaFIwV6xBn3tF9Q^mSybyfz3#|%h-8_0G5;^IFH6IG5(W?|G8yb{ikDwHDH%*e=)
z!W_(?DYm(RQH7D+hLM4RD>G&CStfCHy`22=)FKT{y`t2DoW$f*4Rr-|9R+n=bxnoZ
zTHWIOw370~qEy|<+nN68$SFX9O-hMfb*+s)kg`chfiP3-syIQ?K+zmKP2tIWEG^M1
z8Hyx;E&>z3*6J4&WtJtDr0SO?7U`$u=cJ?-73=2|>z9<L#AlaS78|4&$0ui1m{}T^
zSR^GEBpU!hdY*oWenDn|Zf0I_Nn%cpu4QggMWu;F+2q44`CK|c!+98a7&(|H+px-U
NFmbRk3NeZ>0su<wPXhn|

diff --git a/bin/scrape_software_versions.py b/bin/scrape_software_versions.py
index 1e6c0e9c..c9bba113 100755
--- a/bin/scrape_software_versions.py
+++ b/bin/scrape_software_versions.py
@@ -3,6 +3,7 @@
 from collections import OrderedDict
 import re
 
+# TODO nf-core: Add additional regexes for new tools in process get_software_versions
 regexes = {
     'nf-core/mag': ['v_pipeline.txt', r"(\S+)"],
     'Nextflow': ['v_nextflow.txt', r"(\S+)"],
@@ -23,9 +24,14 @@
         if match:
             results[k] = "v{}".format(match.group(1))
 
+# Remove software set to false in results
+for k in results:
+    if not results[k]:
+        del(results[k])
+
 # Dump to YAML
 print ('''
-id: 'nf-core/mag-software-versions'
+id: 'software_versions'
 section_name: 'nf-core/mag Software Versions'
 section_href: 'https://github.com/nf-core/mag'
 plot_type: 'html'
@@ -34,5 +40,10 @@
     <dl class="dl-horizontal">
 ''')
 for k,v in results.items():
-    print("        <dt>{}</dt><dd>{}</dd>".format(k,v))
+    print("        <dt>{}</dt><dd><samp>{}</samp></dd>".format(k,v))
 print ("    </dl>")
+
+# Write out regexes as csv file:
+with open('software_versions.csv', 'w') as f:
+    for k,v in results.items():
+        f.write("{}\t{}\n".format(k,v))
diff --git a/conf/awsbatch.config b/conf/awsbatch.config
index 79078c7b..14af5866 100644
--- a/conf/awsbatch.config
+++ b/conf/awsbatch.config
@@ -1,10 +1,15 @@
 /*
  * -------------------------------------------------
- *  Nextflow config file for AWS Batch
+ *  Nextflow config file for running on AWS batch
  * -------------------------------------------------
- * Imported under the 'awsbatch' Nextflow profile in nextflow.config
- * Uses docker for software depedencies automagically, so not specified here.
+ * Base config needed for running with -profile awsbatch
  */
+params {
+  config_profile_name = 'AWSBATCH'
+  config_profile_description = 'AWSBATCH Cloud Profile'
+  config_profile_contact = 'Alexander Peltzer (@apeltzer)'
+  config_profile_url = 'https://aws.amazon.com/de/batch/'
+}
 
 aws.region = params.awsregion
 process.executor = 'awsbatch'
diff --git a/conf/base.config b/conf/base.config
index d2385a1d..478fa65d 100644
--- a/conf/base.config
+++ b/conf/base.config
@@ -11,23 +11,18 @@
 
 process {
 
-  container = params.container
-
+  // TODO nf-core: Check the defaults for all processes
   cpus = { check_max( 1 * task.attempt, 'cpus' ) }
   memory = { check_max( 8.GB * task.attempt, 'memory' ) }
   time = { check_max( 2.h * task.attempt, 'time' ) }
 
-  errorStrategy = { task.exitStatus in [143,137] ? 'retry' : 'finish' }
+  errorStrategy = { task.exitStatus in [143,137,104,134,139] ? 'retry' : 'finish' }
   maxRetries = 1
   maxErrors = '-1'
 
   // Process-specific resource requirements
-  withName: fastqc {
-    errorStrategy = { task.exitStatus in [143,137] ? 'retry' : 'ignore' }
-  }
-  withName: multiqc {
-    errorStrategy = { task.exitStatus in [143,137] ? 'retry' : 'ignore' }
-  }
+  // TODO nf-core: Customise requirements for specific processes.
+  // See https://www.nextflow.io/docs/latest/config.html#config-process-selectors
 }
 
 params {
diff --git a/conf/igenomes.config b/conf/igenomes.config
index 08154994..d19e61f4 100644
--- a/conf/igenomes.config
+++ b/conf/igenomes.config
@@ -9,6 +9,7 @@
 
 params {
   // illumina iGenomes reference file paths
+  // TODO nf-core: Add new reference types and strip out those that are not needed
   genomes {
     'GRCh37' {
       bed12   = "${params.igenomes_base}/Homo_sapiens/Ensembl/GRCh37/Annotation/Genes/genes.bed"
diff --git a/conf/test.config b/conf/test.config
index f8ebeb64..3a0cc352 100644
--- a/conf/test.config
+++ b/conf/test.config
@@ -4,14 +4,19 @@
  * -------------------------------------------------
  * Defines bundled input files and everything required
  * to run a fast and simple test. Use as follows:
- *   nextflow run nf-core/methylseq -profile test
+ *   nextflow run nf-core/mag -profile test
  */
 
 params {
+  config_profile_name = 'Test profile'
+  config_profile_description = 'Minimal test dataset to check pipeline function'
+  // Limit resources so that this can run on Travis
   max_cpus = 2
   max_memory = 6.GB
   max_time = 48.h
   // Input data
+  // TODO nf-core: Specify the paths to your test data on nf-core/test-datasets
+  // TODO nf-core: Give any required params for the test so that command line flags are not needed
   singleEnd = false
   readPaths = [
     ['Testdata', ['https://github.com/nf-core/test-datasets/raw/exoseq/testdata/Testdata_R1.tiny.fastq.gz', 'https://github.com/nf-core/test-datasets/raw/exoseq/testdata/Testdata_R2.tiny.fastq.gz']],
diff --git a/docs/README.md b/docs/README.md
index c4222a90..4bcccbd6 100644
--- a/docs/README.md
+++ b/docs/README.md
@@ -2,10 +2,11 @@
 
 The nf-core/mag documentation is split into the following files:
 
-1. [Installation](installation.md)
-2. [Running the pipeline](usage.md)
-3. Pipeline configuration
-    * [Adding your own system](configuration/adding_your_own.md)
-    * [Reference genomes](configuration/reference_genomes.md)
+1. [Installation](https://nf-co.re/usage/installation)
+2. Pipeline configuration
+    * [Local installation](https://nf-co.re/usage/local_installation)
+    * [Adding your own system config](https://nf-co.re/usage/adding_own_config)
+    * [Reference genomes](https://nf-co.re/usage/reference_genomes)
+3. [Running the pipeline](usage.md)
 4. [Output and how to interpret the results](output.md)
-5. [Troubleshooting](troubleshooting.md)
+5. [Troubleshooting](https://nf-co.re/usage/troubleshooting)
diff --git a/docs/configuration/adding_your_own.md b/docs/configuration/adding_your_own.md
deleted file mode 100644
index d7aa14cd..00000000
--- a/docs/configuration/adding_your_own.md
+++ /dev/null
@@ -1,86 +0,0 @@
-# nf-core/mag: Configuration for other clusters
-
-It is entirely possible to run this pipeline on other clusters, though you will need to set up your own config file so that the pipeline knows how to work with your cluster.
-
-> If you think that there are other people using the pipeline who would benefit from your configuration (eg. other common cluster setups), please let us know. We can add a new configuration and profile which can used by specifying `-profile <name>` when running the pipeline.
-
-If you are the only person to be running this pipeline, you can create your config file as `~/.nextflow/config` and it will be applied every time you run Nextflow. Alternatively, save the file anywhere and reference it when running the pipeline with `-c path/to/config` (see the [Nextflow documentation](https://www.nextflow.io/docs/latest/config.html) for more).
-
-A basic configuration comes with the pipeline, which runs by default (the `standard` config profile - see [`conf/base.config`](../conf/base.config)). This means that you only need to configure the specifics for your system and overwrite any defaults that you want to change.
-
-## Cluster Environment
-By default, pipeline uses the `local` Nextflow executor - in other words, all jobs are run in the login session. If you're using a simple server, this may be fine. If you're using a compute cluster, this is bad as all jobs will run on the head node.
-
-To specify your cluster environment, add the following line to your config file:
-
-```nextflow
-process.executor = 'YOUR_SYSTEM_TYPE'
-```
-
-Many different cluster types are supported by Nextflow. For more information, please see the [Nextflow documentation](https://www.nextflow.io/docs/latest/executor.html).
-
-Note that you may need to specify cluster options, such as a project or queue. To do so, use the `clusterOptions` config option:
-
-```nextflow
-process {
-  executor = 'SLURM'
-  clusterOptions = '-A myproject'
-}
-```
-
-
-## Software Requirements
-To run the pipeline, several software packages are required. How you satisfy these requirements is essentially up to you and depends on your system. If possible, we _highly_ recommend using either Docker or Singularity.
-
-Please see the [`installation documentation`](../installation.md) for how to run using the below as a one-off. These instructions are about configuring a config file for repeated use.
-
-### Docker
-Docker is a great way to run nf-core/mag, as it manages all software installations and allows the pipeline to be run in an identical software environment across a range of systems.
-
-Nextflow has [excellent integration](https://www.nextflow.io/docs/latest/docker.html) with Docker, and beyond installing the two tools, not much else is required - nextflow will automatically fetch the [nfcore/mag](https://hub.docker.com/r/nfcore/mag/) image that we have created and is hosted at dockerhub at run time.
-
-To add docker support to your own config file, add the following:
-
-```nextflow
-docker.enabled = true
-process.container = "nfcore/mag"
-```
-
-Note that the dockerhub organisation name annoyingly can't have a hyphen, so is `nfcore` and not `nf-core`.
-
-
-### Singularity image
-Many HPC environments are not able to run Docker due to security issues.
-[Singularity](http://singularity.lbl.gov/) is a tool designed to run on such HPC systems which is very similar to Docker.
-
-To specify singularity usage in your pipeline config file, add the following:
-
-```nextflow
-singularity.enabled = true
-process.container = "shub://nf-core/mag"
-```
-
-If you intend to run the pipeline offline, nextflow will not be able to automatically download the singularity image for you.
-Instead, you'll have to do this yourself manually first, transfer the image file and then point to that.
-
-First, pull the image file where you have an internet connection:
-
-```bash
-singularity pull --name nf-core-mag.simg shub://nf-core/mag
-```
-
-Then transfer this file and point the config file to the image:
-
-```nextflow
-singularity.enabled = true
-process.container = "/path/to/nf-core-mag.simg"
-```
-
-
-### Conda
-If you're not able to use Docker or Singularity, you can instead use conda to manage the software requirements.
-To use conda in your own config file, add the following:
-
-```nextflow
-process.conda = "$baseDir/environment.yml"
-```
diff --git a/docs/configuration/reference_genomes.md b/docs/configuration/reference_genomes.md
deleted file mode 100644
index a5c86cf9..00000000
--- a/docs/configuration/reference_genomes.md
+++ /dev/null
@@ -1,49 +0,0 @@
-# nf-core/mag: Reference Genomes Configuration
-
-The nf-core/mag pipeline needs a reference genome for alignment and annotation.
-
-These paths can be supplied on the command line at run time (see the [usage docs](../usage.md)),
-but for convenience it's often better to save these paths in a nextflow config file.
-See below for instructions on how to do this.
-Read [Adding your own system](adding_your_own.md) to find out how to set up custom config files.
-
-## Adding paths to a config file
-Specifying long paths every time you run the pipeline is a pain.
-To make this easier, the pipeline comes configured to understand reference genome keywords which correspond to preconfigured paths, meaning that you can just specify `--genome ID` when running the pipeline.
-
-Note that this genome key can also be specified in a config file if you always use the same genome.
-
-To use this system, add paths to your config file using the following template:
-
-```nextflow
-params {
-  genomes {
-    'YOUR-ID' {
-      fasta  = '<PATH TO FASTA FILE>/genome.fa'
-    }
-    'OTHER-GENOME' {
-      // [..]
-    }
-  }
-  // Optional - default genome. Ignored if --genome 'OTHER-GENOME' specified on command line
-  genome = 'YOUR-ID'
-}
-```
-
-You can add as many genomes as you like as long as they have unique IDs.
-
-## illumina iGenomes
-To make the use of reference genomes easier, illumina has developed a centralised resource called [iGenomes](https://support.illumina.com/sequencing/sequencing_software/igenome.html).
-Multiple reference index types are held together with consistent structure for multiple genomes.
-
-We have put a copy of iGenomes up onto AWS S3 hosting and this pipeline is configured to use this by default.
-The hosting fees for AWS iGenomes are currently kindly funded by a grant from Amazon.
-The pipeline will automatically download the required reference files when you run the pipeline.
-For more information about the AWS iGenomes, see https://ewels.github.io/AWS-iGenomes/
-
-Downloading the files takes time and bandwidth, so we recommend making a local copy of the iGenomes resource.
-Once downloaded, you can customise the variable `params.igenomes_base` in your custom configuration file to point to the reference location.
-For example:
-```nextflow
-params.igenomes_base = '/path/to/data/igenomes/'
-```
diff --git a/docs/installation.md b/docs/installation.md
deleted file mode 100644
index 4e1a1baf..00000000
--- a/docs/installation.md
+++ /dev/null
@@ -1,115 +0,0 @@
-# nf-core/mag: Installation
-
-To start using the nf-core/mag pipeline, follow the steps below:
-
-1. [Install Nextflow](#1-install-nextflow)
-2. [Install the pipeline](#2-install-the-pipeline)
-    * [Automatic](#21-automatic)
-    * [Offline](#22-offline)
-    * [Development](#23-development)
-3. [Pipeline configuration](#3-pipeline-configuration)
-    * [Software deps: Docker and Singularity](#31-software-deps-docker-and-singularity)
-    * [Software deps: Bioconda](#32-software-deps-bioconda)
-    * [Configuration profiles](#33-configuration-profiles)
-4. [Reference genomes](#4-reference-genomes)
-5. [Appendices](#appendices)
-    * [Running on UPPMAX](#running-on-uppmax)
-
-## 1) Install NextFlow
-Nextflow runs on most POSIX systems (Linux, Mac OSX etc). It can be installed by running the following commands:
-
-```bash
-# Make sure that Java v8+ is installed:
-java -version
-
-# Install Nextflow
-curl -fsSL get.nextflow.io | bash
-
-# Add Nextflow binary to your PATH:
-mv nextflow ~/bin/
-# OR system-wide installation:
-# sudo mv nextflow /usr/local/bin
-```
-
-See [nextflow.io](https://www.nextflow.io/) for further instructions on how to install and configure Nextflow.
-
-## 2) Install the pipeline
-
-#### 2.1) Automatic
-This pipeline itself needs no installation - NextFlow will automatically fetch it from GitHub if `nf-core/mag` is specified as the pipeline name.
-
-#### 2.2) Offline
-The above method requires an internet connection so that Nextflow can download the pipeline files. If you're running on a system that has no internet connection, you'll need to download and transfer the pipeline files manually:
-
-```bash
-wget https://github.com/nf-core/mag/archive/master.zip
-mkdir -p ~/my-pipelines/nf-core/
-unzip master.zip -d ~/my-pipelines/nf-core/
-cd ~/my_data/
-nextflow run ~/my-pipelines/nf-core/mag-master
-```
-
-To stop nextflow from looking for updates online, you can tell it to run in offline mode by specifying the following environment variable in your ~/.bashrc file:
-
-```bash
-export NXF_OFFLINE='TRUE'
-```
-
-#### 2.3) Development
-
-If you would like to make changes to the pipeline, it's best to make a fork on GitHub and then clone the files. Once cloned you can run the pipeline directly as above.
-
-
-## 3) Pipeline configuration
-By default, the pipeline runs with the `standard` configuration profile. This uses a number of sensible defaults for process requirements and is suitable for running on a simple (if powerful!) basic server. You can see this configuration in [`conf/base.config`](../conf/base.config).
-
-Be warned of two important points about this default configuration:
-
-1. The default profile uses the `local` executor
-    * All jobs are run in the login session. If you're using a simple server, this may be fine. If you're using a compute cluster, this is bad as all jobs will run on the head node.
-    * See the [nextflow docs](https://www.nextflow.io/docs/latest/executor.html) for information about running with other hardware backends. Most job scheduler systems are natively supported.
-2. Nextflow will expect all software to be installed and available on the `PATH`
-
-#### 3.1) Software deps: Docker
-First, install docker on your system: [Docker Installation Instructions](https://docs.docker.com/engine/installation/)
-
-Then, running the pipeline with the option `-profile standard,docker` tells Nextflow to enable Docker for this run. An image containing all of the software requirements will be automatically fetched and used from dockerhub (https://hub.docker.com/r/nfcore/mag).
-
-#### 3.1) Software deps: Singularity
-If you're not able to use Docker then [Singularity](http://singularity.lbl.gov/) is a great alternative.
-The process is very similar: running the pipeline with the option `-profile standard,singularity` tells Nextflow to enable singularity for this run. An image containing all of the software requirements will be automatically fetched and used from singularity hub.
-
-If running offline with Singularity, you'll need to download and transfer the Singularity image first:
-
-```bash
-singularity pull --name nf-core-mag.simg shub://nf-core/mag
-```
-
-Once transferred, use `-with-singularity` and specify the path to the image file:
-
-```bash
-nextflow run /path/to/nf-core-mag -with-singularity nf-core-mag.simg
-```
-
-Remember to pull updated versions of the singularity image if you update the pipeline.
-
-
-#### 3.2) Software deps: conda
-If you're not able to use Docker _or_ Singularity, you can instead use conda to manage the software requirements.
-This is slower and less reproducible than the above, but is still better than having to install all requirements yourself!
-The pipeline ships with a conda environment file and nextflow has built-in support for this.
-To use it first ensure that you have conda installed (we recommend [miniconda](https://conda.io/miniconda.html)), then follow the same pattern as above and use the flag `-profile standard,conda`
-
-
-## Appendices
-
-#### Running on UPPMAX
-To run the pipeline on the [Swedish UPPMAX](https://www.uppmax.uu.se/) clusters (`rackham`, `irma`, `bianca` etc), use the command line flag `-profile uppmax`. This tells Nextflow to submit jobs using the SLURM job executor with Singularity for software dependencies.
-
-Note that you will need to specify your UPPMAX project ID when running a pipeline. To do this, use the command line flag `--project <project_ID>`. The pipeline will exit with an error message if you try to run it pipeline with the default UPPMAX config profile without a project.
-
-**Optional Extra:** To avoid having to specify your project every time you run Nextflow, you can add it to your personal Nextflow config file instead. Add this line to `~/.nextflow/config`:
-
-```nextflow
-params.project = 'project_ID' // eg. b2017123
-```
diff --git a/docs/output.md b/docs/output.md
index bc8be4c1..64ea7ce1 100644
--- a/docs/output.md
+++ b/docs/output.md
@@ -2,6 +2,7 @@
 
 This document describes the output produced by the pipeline. Most of the plots are taken from the MultiQC report, which summarises results at the end of the pipeline.
 
+<!-- TODO nf-core: Write this documentation describing your workflow's output -->
 
 ## Pipeline overview
 The pipeline is built using [Nextflow](https://www.nextflow.io/)
@@ -37,4 +38,4 @@ The pipeline has special steps which allow the software versions used to be repo
 * `Project_multiqc_data/`
   * Directory containing parsed statistics from the different tools used in the pipeline
 
-For more information about how to use MultiQC reports, see http://multiqc.info
+For more information about how to use MultiQC reports, see [http://multiqc.info](http://multiqc.info)
diff --git a/docs/troubleshooting.md b/docs/troubleshooting.md
deleted file mode 100644
index c7cbb086..00000000
--- a/docs/troubleshooting.md
+++ /dev/null
@@ -1,28 +0,0 @@
-# nf-core/mag: Troubleshooting
-
-## Input files not found
-
-If only no file, only one input file , or only read one and not read two is picked up then something is wrong with your input file declaration
-
-1. The path must be enclosed in quotes (`'` or `"`)
-2. The path must have at least one `*` wildcard character. This is even if you are only running one paired end sample.
-3. When using the pipeline with paired end data, the path must use `{1,2}` or `{R1,R2}` notation to specify read pairs.
-4.  If you are running Single end data make sure to specify `--singleEnd`
-
-If the pipeline can't find your files then you will get the following error
-
-```
-ERROR ~ Cannot find any reads matching: *{1,2}.fastq.gz
-```
-
-Note that if your sample name is "messy" then you have to be very particular with your glob specification. A file name like `L1-1-D-2h_S1_L002_R1_001.fastq.gz` can be difficult enough for a human to read. Specifying `*{1,2}*.gz` wont work give you what you want Whilst `*{R1,R2}*.gz` will.
-
-
-## Data organization
-The pipeline can't take a list of multiple input files - it takes a glob expression. If your input files are scattered in different paths then we recommend that you generate a directory with symlinked files. If running in paired end mode please make sure that your files are sensibly named so that they can be properly paired. See the previous point.
-
-## Extra resources and getting help
-If you still have an issue with running the pipeline then feel free to contact us.
-Have a look at the [pipeline website](https://github.com/nf-core/mag) to find out how.
-
-If you have problems that are related to Nextflow and not our pipeline then check out the [Nextflow gitter channel](https://gitter.im/nextflow-io/nextflow) or the [google group](https://groups.google.com/forum/#!forum/nextflow).
diff --git a/docs/usage.md b/docs/usage.md
index c5517131..3b518961 100644
--- a/docs/usage.md
+++ b/docs/usage.md
@@ -2,43 +2,45 @@
 
 ## Table of contents
 
-* [Introduction](#general-nextflow-info)
+<!-- Install Atom plugin markdown-toc-auto for this ToC to auto-update on save -->
+<!-- TOC START min:2 max:3 link:true asterisk:true update:true -->
+* [Table of contents](#table-of-contents)
+* [Introduction](#introduction)
 * [Running the pipeline](#running-the-pipeline)
-* [Updating the pipeline](#updating-the-pipeline)
-* [Reproducibility](#reproducibility)
+  * [Updating the pipeline](#updating-the-pipeline)
+  * [Reproducibility](#reproducibility)
 * [Main arguments](#main-arguments)
-    * [`-profile`](#-profile-single-dash)
-        * [`docker`](#docker)
-        * [`awsbatch`](#awsbatch)
-        * [`standard`](#standard)
-        * [`none`](#none)
-    * [`--reads`](#--reads)
-    * [`--singleEnd`](#--singleend)
-* [Reference Genomes](#reference-genomes)
-    * [`--genome`](#--genome)
-    * [`--fasta`](#--fasta)
-* [Job Resources](#job-resources)
-* [Automatic resubmission](#automatic-resubmission)
-* [Custom resource requests](#custom-resource-requests)
-* [AWS batch specific parameters](#aws-batch-specific-parameters)
-    * [`-awsbatch`](#-awsbatch)
-    * [`--awsqueue`](#--awsqueue)
-    * [`--awsregion`](#--awsregion)
+  * [`-profile`](#-profile)
+  * [`--reads`](#--reads)
+  * [`--singleEnd`](#--singleend)
+* [Reference genomes](#reference-genomes)
+  * [`--genome` (using iGenomes)](#--genome-using-igenomes)
+  * [`--fasta`](#--fasta)
+  * [`--igenomesIgnore`](#--igenomesignore)
+* [Job resources](#job-resources)
+  * [Automatic resubmission](#automatic-resubmission)
+  * [Custom resource requests](#custom-resource-requests)
+* [AWS Batch specific parameters](#aws-batch-specific-parameters)
+  * [`--awsqueue`](#--awsqueue)
+  * [`--awsregion`](#--awsregion)
 * [Other command line parameters](#other-command-line-parameters)
-    * [`--outdir`](#--outdir)
-    * [`--email`](#--email)
-    * [`-name`](#-name-single-dash)
-    * [`-resume`](#-resume-single-dash)
-    * [`-c`](#-c-single-dash)
-    * [`--max_memory`](#--max_memory)
-    * [`--max_time`](#--max_time)
-    * [`--max_cpus`](#--max_cpus)
-    * [`--plaintext_emails`](#--plaintext_emails)
-    * [`--sampleLevel`](#--sampleLevel)
-    * [`--multiqc_config`](#--multiqc_config)
-
-
-## General Nextflow info
+  * [`--outdir`](#--outdir)
+  * [`--email`](#--email)
+  * [`-name`](#-name)
+  * [`-resume`](#-resume)
+  * [`-c`](#-c)
+  * [`--custom_config_version`](#--custom_config_version)
+  * [`--custom_config_base`](#--custom_config_base)
+  * [`--max_memory`](#--max_memory)
+  * [`--max_time`](#--max_time)
+  * [`--max_cpus`](#--max_cpus)
+  * [`--plaintext_email`](#--plaintext_email)
+  * [`--monochrome_logs`](#--monochrome_logs)
+  * [`--multiqc_config`](#--multiqc_config)
+<!-- TOC END -->
+
+
+## Introduction
 Nextflow handles job submissions on SLURM or other environments, and supervises running the jobs. Thus the Nextflow process must run until the pipeline is finished. We recommend that you put the process running in the background through `screen` / `tmux` or similar tool. Alternatively you can run nextflow within a cluster job submitted your job scheduler.
 
 It is recommended to limit the Nextflow Java virtual machines memory. We recommend adding the following line to your environment (typically in `~/.bashrc` or `~./bash_profile`):
@@ -47,10 +49,13 @@ It is recommended to limit the Nextflow Java virtual machines memory. We recomme
 NXF_OPTS='-Xms1g -Xmx4g'
 ```
 
+<!-- TODO nf-core: Document required command line parameters to run the pipeline-->
+
 ## Running the pipeline
 The typical command for running the pipeline is as follows:
+
 ```bash
-nextflow run nf-core/mag --reads '*_R{1,2}.fastq.gz' -profile standard,docker
+nextflow run nf-core/mag --reads '*_R{1,2}.fastq.gz' -profile docker
 ```
 
 This will launch the pipeline with the `docker` configuration profile. See below for more information about profiles.
@@ -79,30 +84,29 @@ First, go to the [nf-core/mag releases page](https://github.com/nf-core/mag/rele
 This version number will be logged in reports when you run the pipeline, so that you'll know what you used when you look back in the future.
 
 
-## Main Arguments
+## Main arguments
 
 ### `-profile`
-Use this parameter to choose a configuration profile. Profiles can give configuration presets for different compute environments. Note that multiple profiles can be loaded, for example: `-profile standard,docker` - the order of arguments is important!
+Use this parameter to choose a configuration profile. Profiles can give configuration presets for different compute environments. Note that multiple profiles can be loaded, for example: `-profile docker` - the order of arguments is important!
+
+If `-profile` is not specified at all the pipeline will be run locally and expects all software to be installed and available on the `PATH`.
 
-* `standard`
-    * The default profile, used if `-profile` is not specified at all.
-    * Runs locally and expects all software to be installed and available on the `PATH`.
+* `awsbatch`
+  * A generic configuration profile to be used with AWS Batch.
+* `conda`
+  * A generic configuration profile to be used with [conda](https://conda.io/docs/)
+  * Pulls most software from [Bioconda](https://bioconda.github.io/)
 * `docker`
-    * A generic configuration profile to be used with [Docker](http://docker.com/)
-    * Pulls software from dockerhub: [`nfcore/mag`](http://hub.docker.com/r/nfcore/mag/)
+  * A generic configuration profile to be used with [Docker](http://docker.com/)
+  * Pulls software from dockerhub: [`nfcore/mag`](http://hub.docker.com/r/nfcore/mag/)
 * `singularity`
-    * A generic configuration profile to be used with [Singularity](http://singularity.lbl.gov/)
-    * Pulls software from singularity-hub
-* `conda`
-    * A generic configuration profile to be used with [conda](https://conda.io/docs/)
-    * Pulls most software from [Bioconda](https://bioconda.github.io/)
-* `awsbatch`
-    * A generic configuration profile to be used with AWS Batch.
+  * A generic configuration profile to be used with [Singularity](http://singularity.lbl.gov/)
+  * Pulls software from DockerHub: [`nfcore/mag`](http://hub.docker.com/r/nfcore/mag/)
 * `test`
-    * A profile with a complete configuration for automated testing
-    * Includes links to test data so needs no other parameters
-* `none`
-    * No configuration at all. Useful if you want to build your own config from scratch and want to avoid loading in the default `base` config profile (not recommended).
+  * A profile with a complete configuration for automated testing
+  * Includes links to test data so needs no other parameters
+
+<!-- TODO nf-core: Document required command line parameters -->
 
 ### `--reads`
 Use this to specify the location of your input FastQ files. For example:
@@ -129,7 +133,7 @@ By default, the pipeline expects paired-end data. If you have single-end data, y
 It is not possible to run a mixture of single-end and paired-end files in one run.
 
 
-## Reference Genomes
+## Reference genomes
 
 The pipeline config files come bundled with paths to the illumina iGenomes reference index files. If running with docker or AWS, the configuration is set up to use the [AWS-iGenomes](https://ewels.github.io/AWS-iGenomes/) resource.
 
@@ -153,6 +157,8 @@ Note that you can use the same configuration setup to save sets of reference fil
 
 The syntax for this reference configuration is as follows:
 
+<!-- TODO nf-core: Update reference genome example according to what is needed -->
+
 ```nextflow
 params {
   genomes {
@@ -164,6 +170,7 @@ params {
 }
 ```
 
+<!-- TODO nf-core: Describe reference path flags -->
 ### `--fasta`
 If you prefer, you can specify the full path to your reference genome when you run the pipeline:
 
@@ -171,12 +178,19 @@ If you prefer, you can specify the full path to your reference genome when you r
 --fasta '[path to Fasta reference]'
 ```
 
-## Job Resources
+### `--igenomesIgnore`
+Do not load `igenomes.config` when running the pipeline. You may choose this option if you observe clashes between custom parameters and those supplied in `igenomes.config`.
+
+## Job resources
 ### Automatic resubmission
 Each step in the pipeline has a default set of requirements for number of CPUs, memory and time. For most of the steps in the pipeline, if the job exits with an error code of `143` (exceeded requested resources) it will automatically resubmit with higher requests (2 x original, then 3 x original). If it still fails after three times then the pipeline is stopped.
 
 ### Custom resource requests
-Wherever process-specific requirements are set in the pipeline, the default value can be changed by creating a custom config file. See the files in [`conf`](../conf) for examples.
+Wherever process-specific requirements are set in the pipeline, the default value can be changed by creating a custom config file. See the files hosted at [`nf-core/configs`](https://github.com/nf-core/configs/tree/master/conf) for examples.
+
+If you are likely to be running `nf-core` pipelines regularly it may be a good idea to request that your custom config file is uploaded to the `nf-core/configs` git repository. Before you do this please can you test that the config file works with your pipeline of choice using the `-c` parameter (see definition below). You can then create a pull request to the `nf-core/configs` repository with the addition of your config file, associated documentation file (see examples in [`nf-core/configs/docs`](https://github.com/nf-core/configs/tree/master/docs)), and amending [`nfcore_custom.config`](https://github.com/nf-core/configs/blob/master/nfcore_custom.config) to include your custom profile.
+
+If you have any questions or issues please send us a message on [Slack](https://nf-core-invite.herokuapp.com/).
 
 ## AWS Batch specific parameters
 Running the pipeline on AWS Batch requires a couple of specific parameters to be set according to your AWS Batch configuration. Please use the `-awsbatch` profile and then specify all of the following parameters.
@@ -189,11 +203,13 @@ Please make sure to also set the `-w/--work-dir` and `--outdir` parameters to a
 
 ## Other command line parameters
 
+<!-- TODO nf-core: Describe any other command line flags here -->
+
 ### `--outdir`
 The output directory where the results will be saved.
 
 ### `--email`
-Set this parameter to your e-mail address to get a summary e-mail with details of the run sent to you when the workflow exits. If set in your user config file (`~/.nextflow/config`) then you don't need to speicfy this on the command line for every run.
+Set this parameter to your e-mail address to get a summary e-mail with details of the run sent to you when the workflow exits. If set in your user config file (`~/.nextflow/config`) then you don't need to specify this on the command line for every run.
 
 ### `-name`
 Name for the pipeline run. If not specified, Nextflow will automatically generate a random mnemonic.
@@ -214,15 +230,39 @@ Specify the path to a specific config file (this is a core NextFlow command).
 
 **NB:** Single hyphen (core Nextflow option)
 
-Note - you can use this to override defaults. For example, you can specify a config file using `-c` that contains the following:
+Note - you can use this to override pipeline defaults.
 
-```nextflow
-process.$multiqc.module = []
+### `--custom_config_version`
+Provide git commit id for custom Institutional configs hosted at `nf-core/configs`. This was implemented for reproducibility purposes. Default is set to `master`.
+
+```bash
+## Download and use config file with following git commid id
+--custom_config_version d52db660777c4bf36546ddb188ec530c3ada1b96
 ```
 
+### `--custom_config_base`
+If you're running offline, nextflow will not be able to fetch the institutional config files
+from the internet. If you don't need them, then this is not a problem. If you do need them,
+you should download the files from the repo and tell nextflow where to find them with the
+`custom_config_base` option. For example:
+
+```bash
+## Download and unzip the config files
+cd /path/to/my/configs
+wget https://github.com/nf-core/configs/archive/master.zip
+unzip master.zip
+
+## Run the pipeline
+cd /path/to/my/data
+nextflow run /path/to/pipeline/ --custom_config_base /path/to/my/configs/configs-master/
+```
+
+> Note that the nf-core/tools helper package has a `download` command to download all required pipeline
+> files + singularity containers + institutional configs in one go for you, to make this process easier.
+
 ### `--max_memory`
 Use to set a top-limit for the default memory requirement for each process.
-Should be a string in the format integer-unit. eg. `--max_memory '8.GB'``
+Should be a string in the format integer-unit. eg. `--max_memory '8.GB'`
 
 ### `--max_time`
 Use to set a top-limit for the default time requirement for each process.
@@ -235,5 +275,8 @@ Should be a string in the format integer-unit. eg. `--max_cpus 1`
 ### `--plaintext_email`
 Set to receive plain-text e-mails instead of HTML formatted.
 
-### `--multiqc_config`
+### `--monochrome_logs`
+Set to disable colourful command line output and live life in monochrome.
+
+### `--multiqc_config`
 Specify a path to a custom MultiQC configuration file.
diff --git a/environment.yml b/environment.yml
index d157e1e9..c13fd014 100644
--- a/environment.yml
+++ b/environment.yml
@@ -1,8 +1,11 @@
-name: nf-core-mag-1.0dev
+# You can use this file to create a conda environment for this pipeline:
+#   conda env create -f environment.yml
+name: nf-core-mag-1.0.0
 channels:
-  - bioconda
   - conda-forge
+  - bioconda
   - defaults
 dependencies:
-  - fastqc=0.11.7
-  - multiqc=1.6
+  # TODO nf-core: Add required software dependencies here
+  - fastqc=0.11.8
+  - multiqc=1.7
diff --git a/main.nf b/main.nf
index 7f7ed4aa..071e13fa 100644
--- a/main.nf
+++ b/main.nf
@@ -11,23 +11,23 @@
 
 
 def helpMessage() {
+    // TODO nf-core: Add to this help message with new command line parameters
+    log.info nfcoreHeader()
     log.info"""
-    =========================================
-     nf-core/mag v${manifest.pipelineVersion}
-    =========================================
+
     Usage:
 
     The typical command for running the pipeline is as follows:
 
-    nextflow run nf-core/mag --reads '*_R{1,2}.fastq.gz' -profile standard,docker
+    nextflow run nf-core/mag --reads '*_R{1,2}.fastq.gz' -profile docker
 
     Mandatory arguments:
       --reads                       Path to input data (must be surrounded with quotes)
-      --genome                      Name of iGenomes reference
       -profile                      Configuration profile to use. Can use multiple (comma separated)
-                                    Available: standard, conda, docker, singularity, awsbatch, test
+                                    Available: conda, docker, singularity, awsbatch, test and more.
 
     Options:
+      --genome                      Name of iGenomes reference
       --singleEnd                   Specifies that the input is single end reads
 
     References                      If not specified in the configuration file or you wish to overwrite any of the references.
@@ -36,6 +36,7 @@ def helpMessage() {
     Other options:
       --outdir                      The output directory where the results will be saved
       --email                       Set this parameter to your e-mail address to get a summary e-mail with details of the run sent to you when the workflow exits
+      --maxMultiqcEmailFileSize     Theshold size for MultiQC report to be attached in notification email. If file generated by pipeline exceeds the threshold, it will not be attached (Default: 25MB)
       -name                         Name for the pipeline run. If not specified, Nextflow will automatically generate a random mnemonic.
 
     AWSBatch options:
@@ -54,26 +55,18 @@ if (params.help){
     exit 0
 }
 
-// Configurable variables
-params.name = false
-params.fasta = params.genome ? params.genomes[ params.genome ].fasta ?: false : false
-params.multiqc_config = "$baseDir/conf/multiqc_config.yaml"
-params.email = false
-params.plaintext_email = false
-
-multiqc_config = file(params.multiqc_config)
-output_docs = file("$baseDir/docs/output.md")
+// Check if genome exists in the config file
+if (params.genomes && params.genome && !params.genomes.containsKey(params.genome)) {
+    exit 1, "The provided genome '${params.genome}' is not available in the iGenomes file. Currently the available genomes are ${params.genomes.keySet().join(", ")}"
+}
 
-// Validate inputs
+// TODO nf-core: Add any reference files that are needed
+// Configurable reference genomes
+fasta = params.genome ? params.genomes[ params.genome ].fasta ?: false : false
 if ( params.fasta ){
     fasta = file(params.fasta)
     if( !fasta.exists() ) exit 1, "Fasta file not found: ${params.fasta}"
 }
-// AWSBatch sanity checking
-if(workflow.profile == 'awsbatch'){
-    if (!params.awsqueue || !params.awsregion) exit 1, "Specify correct --awsqueue and --awsregion parameters on AWSBatch!"
-    if (!workflow.workDir.startsWith('s3') || !params.outdir.startsWith('s3')) exit 1, "Specify S3 URLs for workDir and outdir parameters on AWSBatch!"
-}
 //
 // NOTE - THIS IS NOT USED IN THIS PIPELINE, EXAMPLE ONLY
 // If you want to use the above in a process, define the following:
@@ -89,79 +82,81 @@ if( !(workflow.runName ==~ /[a-z]+_[a-z]+/) ){
   custom_runName = workflow.runName
 }
 
-// Check workDir/outdir paths to be S3 buckets if running on AWSBatch
-// related: https://github.com/nextflow-io/nextflow/issues/813
+
 if( workflow.profile == 'awsbatch') {
-    if(!workflow.workDir.startsWith('s3:') || !params.outdir.startsWith('s3:')) exit 1, "Workdir or Outdir not on S3 - specify S3 Buckets for each to run on AWSBatch!"
+  // AWSBatch sanity checking
+  if (!params.awsqueue || !params.awsregion) exit 1, "Specify correct --awsqueue and --awsregion parameters on AWSBatch!"
+  // Check outdir paths to be S3 buckets if running on AWSBatch
+  // related: https://github.com/nextflow-io/nextflow/issues/813
+  if (!params.outdir.startsWith('s3:')) exit 1, "Outdir not on S3 - specify S3 Bucket to run on AWSBatch!"
+  // Prevent trace files to be stored on S3 since S3 does not support rolling files.
+  if (workflow.tracedir.startsWith('s3:')) exit 1, "Specify a local tracedir or run without trace! S3 cannot be used for tracefiles."
 }
 
+// Stage config files
+ch_multiqc_config = Channel.fromPath(params.multiqc_config)
+ch_output_docs = Channel.fromPath("$baseDir/docs/output.md")
+
 /*
  * Create a channel for input read files
  */
- if(params.readPaths){
-     if(params.singleEnd){
-         Channel
-             .from(params.readPaths)
-             .map { row -> [ row[0], [file(row[1][0])]] }
-             .ifEmpty { exit 1, "params.readPaths was empty - no input files supplied" }
-             .into { read_files_fastqc; read_files_trimming }
-     } else {
-         Channel
-             .from(params.readPaths)
-             .map { row -> [ row[0], [file(row[1][0]), file(row[1][1])]] }
-             .ifEmpty { exit 1, "params.readPaths was empty - no input files supplied" }
-             .into { read_files_fastqc; read_files_trimming }
-     }
- } else {
-     Channel
-         .fromFilePairs( params.reads, size: params.singleEnd ? 1 : 2 )
-         .ifEmpty { exit 1, "Cannot find any reads matching: ${params.reads}\nNB: Path needs to be enclosed in quotes!\nIf this is single-end data, please specify --singleEnd on the command line." }
-         .into { read_files_fastqc; read_files_trimming }
- }
+if(params.readPaths){
+    if(params.singleEnd){
+        Channel
+            .from(params.readPaths)
+            .map { row -> [ row[0], [file(row[1][0])]] }
+            .ifEmpty { exit 1, "params.readPaths was empty - no input files supplied" }
+            .into { read_files_fastqc; read_files_trimming }
+    } else {
+        Channel
+            .from(params.readPaths)
+            .map { row -> [ row[0], [file(row[1][0]), file(row[1][1])]] }
+            .ifEmpty { exit 1, "params.readPaths was empty - no input files supplied" }
+            .into { read_files_fastqc; read_files_trimming }
+    }
+} else {
+    Channel
+        .fromFilePairs( params.reads, size: params.singleEnd ? 1 : 2 )
+        .ifEmpty { exit 1, "Cannot find any reads matching: ${params.reads}\nNB: Path needs to be enclosed in quotes!\nIf this is single-end data, please specify --singleEnd on the command line." }
+        .into { read_files_fastqc; read_files_trimming }
+}
 
 
 // Header log info
-log.info """=======================================================
-                                          ,--./,-.
-          ___     __   __   __   ___     /,-._.--~\'
-    |\\ | |__  __ /  ` /  \\ |__) |__         }  {
-    | \\| |       \\__, \\__/ |  \\ |___     \\`-._,-`-,
-                                          `._,._,\'
-
-nf-core/mag v${manifest.pipelineVersion}"
-======================================================="""
+log.info nfcoreHeader()
 def summary = [:]
-summary['Pipeline Name']  = 'nf-core/mag'
-summary['Pipeline Version'] = manifest.pipelineVersion
-summary['Run Name']     = custom_runName ?: workflow.runName
-summary['Reads']        = params.reads
-summary['Fasta Ref']    = params.fasta
-summary['Data Type']    = params.singleEnd ? 'Single-End' : 'Paired-End'
-summary['Max Memory']   = params.max_memory
-summary['Max CPUs']     = params.max_cpus
-summary['Max Time']     = params.max_time
-summary['Output dir']   = params.outdir
-summary['Working dir']  = workflow.workDir
-summary['Container Engine'] = workflow.containerEngine
-if(workflow.containerEngine) summary['Container'] = workflow.container
-summary['Current home']   = "$HOME"
-summary['Current user']   = "$USER"
-summary['Current path']   = "$PWD"
-summary['Working dir']    = workflow.workDir
-summary['Output dir']     = params.outdir
-summary['Script dir']     = workflow.projectDir
-summary['Config Profile'] = workflow.profile
+if(workflow.revision) summary['Pipeline Release'] = workflow.revision
+summary['Run Name']         = custom_runName ?: workflow.runName
+// TODO nf-core: Report custom parameters here
+summary['Reads']            = params.reads
+summary['Fasta Ref']        = params.fasta
+summary['Data Type']        = params.singleEnd ? 'Single-End' : 'Paired-End'
+summary['Max Resources']    = "$params.max_memory memory, $params.max_cpus cpus, $params.max_time time per job"
+if(workflow.containerEngine) summary['Container'] = "$workflow.containerEngine - $workflow.container"
+summary['Output dir']       = params.outdir
+summary['Launch dir']       = workflow.launchDir
+summary['Working dir']      = workflow.workDir
+summary['Script dir']       = workflow.projectDir
+summary['User']             = workflow.userName
 if(workflow.profile == 'awsbatch'){
-   summary['AWS Region'] = params.awsregion
-   summary['AWS Queue'] = params.awsqueue
+   summary['AWS Region']    = params.awsregion
+   summary['AWS Queue']     = params.awsqueue
 }
-if(params.email) summary['E-mail Address'] = params.email
-log.info summary.collect { k,v -> "${k.padRight(15)}: $v" }.join("\n")
-log.info "========================================="
+summary['Config Profile'] = workflow.profile
+if(params.config_profile_description) summary['Config Description'] = params.config_profile_description
+if(params.config_profile_contact)     summary['Config Contact']     = params.config_profile_contact
+if(params.config_profile_url)         summary['Config URL']         = params.config_profile_url
+if(params.email) {
+  summary['E-mail Address']  = params.email
+  summary['MultiQC maxsize'] = params.maxMultiqcEmailFileSize
+}
+log.info summary.collect { k,v -> "${k.padRight(18)}: $v" }.join("\n")
+log.info "\033[2m----------------------------------------------------\033[0m"
 
+// Check the hostnames against configured profiles
+checkHostname()
 
 def create_workflow_summary(summary) {
-
     def yaml_file = workDir.resolve('workflow_summary_mqc.yaml')
     yaml_file.text  = """
     id: 'nf-core-mag-summary'
@@ -183,17 +178,24 @@ ${summary.collect { k,v -> "            <dt>$k</dt><dd><samp>${v ?: '<span style
  * Parse software version numbers
  */
 process get_software_versions {
+    publishDir "${params.outdir}/pipeline_info", mode: 'copy',
+    saveAs: {filename ->
+        if (filename.indexOf(".csv") > 0) filename
+        else null
+    }
 
     output:
     file 'software_versions_mqc.yaml' into software_versions_yaml
+    file "software_versions.csv"
 
     script:
+    // TODO nf-core: Get all tools to print their version number here
     """
-    echo $manifest.pipelineVersion > v_pipeline.txt
+    echo $workflow.manifest.version > v_pipeline.txt
     echo $workflow.nextflow.version > v_nextflow.txt
     fastqc --version > v_fastqc.txt
     multiqc --version > v_multiqc.txt
-    scrape_software_versions.py > software_versions_mqc.yaml
+    scrape_software_versions.py &> software_versions_mqc.yaml
     """
 }
 
@@ -228,18 +230,21 @@ process multiqc {
     publishDir "${params.outdir}/MultiQC", mode: 'copy'
 
     input:
-    file multiqc_config
-    file ('fastqc/*') from fastqc_results.collect()
-    file ('software_versions/*') from software_versions_yaml
+    file multiqc_config from ch_multiqc_config
+    // TODO nf-core: Add in log files from your new processes for MultiQC to find!
+    file ('fastqc/*') from fastqc_results.collect().ifEmpty([])
+    file ('software_versions/*') from software_versions_yaml.collect()
     file workflow_summary from create_workflow_summary(summary)
 
     output:
     file "*multiqc_report.html" into multiqc_report
     file "*_data"
+    file "multiqc_plots"
 
     script:
     rtitle = custom_runName ? "--title \"$custom_runName\"" : ''
     rfilename = custom_runName ? "--filename " + custom_runName.replaceAll('\\W','_').replaceAll('_+','_') + "_multiqc_report" : ''
+    // TODO nf-core: Specify which MultiQC modules to use with -m for a faster run time
     """
     multiqc -f $rtitle $rfilename --config $multiqc_config .
     """
@@ -251,11 +256,10 @@ process multiqc {
  * STEP 3 - Output Description HTML
  */
 process output_documentation {
-    tag "$prefix"
-    publishDir "${params.outdir}/Documentation", mode: 'copy'
+    publishDir "${params.outdir}/pipeline_info", mode: 'copy'
 
     input:
-    file output_docs
+    file output_docs from ch_output_docs
 
     output:
     file "results_description.html"
@@ -279,7 +283,7 @@ workflow.onComplete {
       subject = "[nf-core/mag] FAILED: $workflow.runName"
     }
     def email_fields = [:]
-    email_fields['version'] = manifest.pipelineVersion
+    email_fields['version'] = workflow.manifest.version
     email_fields['runName'] = custom_runName ?: workflow.runName
     email_fields['success'] = workflow.success
     email_fields['dateComplete'] = workflow.complete
@@ -297,10 +301,26 @@ workflow.onComplete {
     if(workflow.repository) email_fields['summary']['Pipeline repository Git URL'] = workflow.repository
     if(workflow.commitId) email_fields['summary']['Pipeline repository Git Commit'] = workflow.commitId
     if(workflow.revision) email_fields['summary']['Pipeline Git branch/tag'] = workflow.revision
+    if(workflow.container) email_fields['summary']['Docker image'] = workflow.container
     email_fields['summary']['Nextflow Version'] = workflow.nextflow.version
     email_fields['summary']['Nextflow Build'] = workflow.nextflow.build
     email_fields['summary']['Nextflow Compile Timestamp'] = workflow.nextflow.timestamp
 
+    // TODO nf-core: If not using MultiQC, strip out this code (including params.maxMultiqcEmailFileSize)
+    // On success try attach the multiqc report
+    def mqc_report = null
+    try {
+        if (workflow.success) {
+            mqc_report = multiqc_report.getVal()
+            if (mqc_report.getClass() == ArrayList){
+                log.warn "[nf-core/mag] Found multiple reports from process 'multiqc', will use only one"
+                mqc_report = mqc_report[0]
+            }
+        }
+    } catch (all) {
+        log.warn "[nf-core/mag] Could not attach MultiQC report to summary email"
+    }
+
     // Render the TXT template
     def engine = new groovy.text.GStringTemplateEngine()
     def tf = new File("$baseDir/assets/email_template.txt")
@@ -313,7 +333,7 @@ workflow.onComplete {
     def email_html = html_template.toString()
 
     // Render the sendmail template
-    def smail_fields = [ email: params.email, subject: subject, email_txt: email_txt, email_html: email_html, baseDir: "$baseDir" ]
+    def smail_fields = [ email: params.email, subject: subject, email_txt: email_txt, email_html: email_html, baseDir: "$baseDir", mqcFile: mqc_report, mqcMaxSize: params.maxMultiqcEmailFileSize.toBytes() ]
     def sf = new File("$baseDir/assets/sendmail_template.txt")
     def sendmail_template = engine.createTemplate(sf).make(smail_fields)
     def sendmail_html = sendmail_template.toString()
@@ -333,7 +353,7 @@ workflow.onComplete {
     }
 
     // Write summary e-mail HTML to a file
-    def output_d = new File( "${params.outdir}/Documentation/" )
+    def output_d = new File( "${params.outdir}/pipeline_info/" )
     if( !output_d.exists() ) {
       output_d.mkdirs()
     }
@@ -342,6 +362,67 @@ workflow.onComplete {
     def output_tf = new File( output_d, "pipeline_report.txt" )
     output_tf.withWriter { w -> w << email_txt }
 
-    log.info "[nf-core/mag] Pipeline Complete"
+    c_reset = params.monochrome_logs ? '' : "\033[0m";
+    c_purple = params.monochrome_logs ? '' : "\033[0;35m";
+    c_green = params.monochrome_logs ? '' : "\033[0;32m";
+    c_red = params.monochrome_logs ? '' : "\033[0;31m";
+
+    if (workflow.stats.ignoredCountFmt > 0 && workflow.success) {
+      log.info "${c_purple}Warning, pipeline completed, but with errored process(es) ${c_reset}"
+      log.info "${c_red}Number of ignored errored process(es) : ${workflow.stats.ignoredCountFmt} ${c_reset}"
+      log.info "${c_green}Number of successfully ran process(es) : ${workflow.stats.succeedCountFmt} ${c_reset}"
+    }
+
+    if(workflow.success){
+        log.info "${c_purple}[nf-core/mag]${c_green} Pipeline completed successfully${c_reset}"
+    } else {
+        checkHostname()
+        log.info "${c_purple}[nf-core/mag]${c_red} Pipeline completed with errors${c_reset}"
+    }
 
 }
+
+
+def nfcoreHeader(){
+    // Log colors ANSI codes
+    c_reset = params.monochrome_logs ? '' : "\033[0m";
+    c_dim = params.monochrome_logs ? '' : "\033[2m";
+    c_black = params.monochrome_logs ? '' : "\033[0;30m";
+    c_green = params.monochrome_logs ? '' : "\033[0;32m";
+    c_yellow = params.monochrome_logs ? '' : "\033[0;33m";
+    c_blue = params.monochrome_logs ? '' : "\033[0;34m";
+    c_purple = params.monochrome_logs ? '' : "\033[0;35m";
+    c_cyan = params.monochrome_logs ? '' : "\033[0;36m";
+    c_white = params.monochrome_logs ? '' : "\033[0;37m";
+
+    return """    ${c_dim}----------------------------------------------------${c_reset}
+                                            ${c_green},--.${c_black}/${c_green},-.${c_reset}
+    ${c_blue}        ___     __   __   __   ___     ${c_green}/,-._.--~\'${c_reset}
+    ${c_blue}  |\\ | |__  __ /  ` /  \\ |__) |__         ${c_yellow}}  {${c_reset}
+    ${c_blue}  | \\| |       \\__, \\__/ |  \\ |___     ${c_green}\\`-._,-`-,${c_reset}
+                                            ${c_green}`._,._,\'${c_reset}
+    ${c_purple}  nf-core/mag v${workflow.manifest.version}${c_reset}
+    ${c_dim}----------------------------------------------------${c_reset}
+    """.stripIndent()
+}
+
+def checkHostname(){
+    def c_reset = params.monochrome_logs ? '' : "\033[0m"
+    def c_white = params.monochrome_logs ? '' : "\033[0;37m"
+    def c_red = params.monochrome_logs ? '' : "\033[1;91m"
+    def c_yellow_bold = params.monochrome_logs ? '' : "\033[1;93m"
+    if(params.hostnames){
+        def hostname = "hostname".execute().text.trim()
+        params.hostnames.each { prof, hnames ->
+            hnames.each { hname ->
+                if(hostname.contains(hname) && !workflow.profile.contains(prof)){
+                    log.error "====================================================\n" +
+                            "  ${c_red}WARNING!${c_reset} You are running with `-profile $workflow.profile`\n" +
+                            "  but your machine hostname is ${c_white}'$hostname'${c_reset}\n" +
+                            "  ${c_yellow_bold}It's highly recommended that you use `-profile $prof${c_reset}`\n" +
+                            "============================================================"
+                }
+            }
+        }
+    }
+}
diff --git a/nextflow.config b/nextflow.config
index e2abe31f..e29655ec 100644
--- a/nextflow.config
+++ b/nextflow.config
@@ -3,54 +3,64 @@
  *  nf-core/mag Nextflow config file
  * -------------------------------------------------
  * Default config options for all environments.
- * Cluster-specific config options should be saved
- * in the conf folder and imported under a profile
- * name here.
  */
 
 // Global default params, used in configs
 params {
 
-  container = 'nfcore/mag:latest' // Container slug. Stable releases should specify release tag!
-
-  help = false
+  // Workflow flags
+  // TODO nf-core: Specify your pipeline's command line flags
   reads = "data/*{1,2}.fastq.gz"
   singleEnd = false
   outdir = './results'
+
+  // Boilerplate options
+  name = false
+  multiqc_config = "$baseDir/assets/multiqc_config.yaml"
+  email = false
+  maxMultiqcEmailFileSize = 25.MB
+  plaintext_email = false
+  monochrome_logs = false
+  help = false
   igenomes_base = "./iGenomes"
   tracedir = "${params.outdir}/pipeline_info"
-  clusterOptions = false
   awsqueue = false
   awsregion = 'eu-west-1'
+  igenomesIgnore = false
+  custom_config_version = 'master'
+  custom_config_base = "https://raw.githubusercontent.com/nf-core/configs/${params.custom_config_version}"
+  hostnames = false
+  config_profile_description = false
+  config_profile_contact = false
+  config_profile_url = false
 }
 
-profiles {
+// Container slug. Stable releases should specify release tag!
+// Developmental code should specify :dev
+process.container = 'nfcore/mag:dev'
 
-  standard {
-    includeConfig 'conf/base.config'
-  }
+// Load base.config by default for all pipelines
+includeConfig 'conf/base.config'
+
+// Load nf-core custom profiles from different Institutions
+try {
+  includeConfig "${params.custom_config_base}/nfcore_custom.config"
+} catch (Exception e) {
+  System.err.println("WARNING: Could not load nf-core/config profiles: ${params.custom_config_base}/nfcore_custom.config")
+}
+
+profiles {
+  awsbatch { includeConfig 'conf/awsbatch.config' }
   conda { process.conda = "$baseDir/environment.yml" }
-  docker {
-    docker.enabled = true
-    process.container = params.container
-  }
-  singularity {
-    singularity.enabled = true
-    process.container = {"shub://${params.container.replace('nfcore', 'nf-core')}"}
-  }
-  awsbatch {
-    includeConfig 'conf/base.config'
-    includeConfig 'conf/awsbatch.config'
-    includeConfig 'conf/igenomes.config'
-  }
-  test {
-    includeConfig 'conf/base.config'
-    includeConfig 'conf/test.config'
-  }
-  none {
-    // Don't load any config (for use with custom home configs)
-  }
+  debug { process.beforeScript = 'echo $HOSTNAME' }
+  docker { docker.enabled = true }
+  singularity { singularity.enabled = true }
+  test { includeConfig 'conf/test.config' }
+}
 
+// Load igenomes.config if required
+if(!params.igenomesIgnore){
+  includeConfig 'conf/igenomes.config'
 }
 
 // Capture exit codes from upstream processes when piping
@@ -58,28 +68,29 @@ process.shell = ['/bin/bash', '-euo', 'pipefail']
 
 timeline {
   enabled = true
-  file = "${params.tracedir}/pipeline_info/nf-core/mag_timeline.html"
+  file = "${params.tracedir}/execution_timeline.html"
 }
 report {
   enabled = true
-  file = "${params.tracedir}/pipeline_info/nf-core/mag_report.html"
+  file = "${params.tracedir}/execution_report.html"
 }
 trace {
   enabled = true
-  file = "${params.tracedir}/pipeline_info/nf-core/mag_trace.txt"
+  file = "${params.tracedir}/execution_trace.txt"
 }
 dag {
   enabled = true
-  file = "${params.tracedir}/pipeline_info/nf-core/mag_dag.svg"
+  file = "${params.tracedir}/pipeline_dag.svg"
 }
 
 manifest {
   name = 'nf-core/mag'
-  description = 'Assembly, binning and annotation of metagenomes'
+  author = 'No author provided'
   homePage = 'https://github.com/nf-core/mag'
-  pipelineVersion = '1.0dev'
+  description = 'Assembly, binning and annotation of metagenomes'
   mainScript = 'main.nf'
-  nextflowVersion = '>=0.30.0'
+  nextflowVersion = '>=0.32.0'
+  version = '1.0.0'
 }
 
 // Function to ensure that resource requirements don't go beyond

From 1552b69724eb4917a56a72be18898f15d41b7be0 Mon Sep 17 00:00:00 2001
From: d4straub <daniel.straub@uni-tuebingen.de>
Date: Fri, 12 Apr 2019 14:56:55 +0200
Subject: [PATCH 036/105] fix metaquast for bins

---
 main.nf | 16 +++++++++++++---
 1 file changed, 13 insertions(+), 3 deletions(-)

diff --git a/main.nf b/main.nf
index 7a3e166c..463067ba 100644
--- a/main.nf
+++ b/main.nf
@@ -873,7 +873,7 @@ process quast_bins {
     set val(assembler), val(sample), file(assembly), file(reads) from metabat_bins_quast_bins
 
     output:
-    file("QUAST/${assembler}/${sample}/*")
+    file("QUAST/*")
 
     when:
     !params.skip_quast
@@ -881,11 +881,21 @@ process quast_bins {
     script:
     if ( !params.singleEnd ) {
         """
-        metaquast.py --threads "${task.cpus}" --pe1 "${reads[0]}" --pe2 "${reads[1]}" --rna-finding --gene-finding -l "${assembler}-${sample}" "${assembly}" -o "QUAST/${assembler}/${sample}"
+        ASSEMBLIES=\$(echo \"$assembly\" | sed 's/[][]//g')
+        IFS=', ' read -r -a assemblies <<< \"\$ASSEMBLIES\"
+    
+        for assembly in \"\${assemblies[@]}\"; do
+            metaquast.py --threads "${task.cpus}" --pe1 "${reads[0]}" --pe2 "${reads[1]}" --rna-finding --gene-finding -l "\${assembly}" "\${assembly}" -o "QUAST/\${assembly}"
+        done    
         """
     } else {
         """
-        metaquast.py --threads "${task.cpus}" --single "${reads}" --rna-finding --gene-finding -l "${assembler}-${sample}" "${assembly}" -o "QUAST/${assembler}/${sample}"
+        ASSEMBLIES=\$(echo \"$assembly\" | sed 's/[][]//g')
+        IFS=', ' read -r -a assemblies <<< \"\$ASSEMBLIES\"
+    
+        for assembly in \"\${assemblies[@]}\"; do
+            metaquast.py --threads "${task.cpus}" --single "${reads}" --rna-finding --gene-finding -l "\${assembly}" "\${assembly}" -o "QUAST/\${assembly}"
+        done
         """        
     }
 }

From d587861d212f15245cfa5ff8f9c0140b9642905e Mon Sep 17 00:00:00 2001
From: d4straub <daniel.straub@uni-tuebingen.de>
Date: Mon, 15 Apr 2019 14:09:36 +0200
Subject: [PATCH 037/105] prevent MetaQUAST from downloading reference genomes
 since this often failed due to connectivity problems

---
 main.nf | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/main.nf b/main.nf
index 463067ba..f8c464e3 100644
--- a/main.nf
+++ b/main.nf
@@ -679,7 +679,7 @@ process quast {
 
     script:
     """
-    metaquast.py --threads "${task.cpus}" --rna-finding -l "${assembler}-${sample}" "${assembly}" -o "${sample}_QC"
+    metaquast.py --threads "${task.cpus}" --rna-finding --max-ref-number 0 -l "${assembler}-${sample}" "${assembly}" -o "${sample}_QC"
     """
 }
 
@@ -885,7 +885,7 @@ process quast_bins {
         IFS=', ' read -r -a assemblies <<< \"\$ASSEMBLIES\"
     
         for assembly in \"\${assemblies[@]}\"; do
-            metaquast.py --threads "${task.cpus}" --pe1 "${reads[0]}" --pe2 "${reads[1]}" --rna-finding --gene-finding -l "\${assembly}" "\${assembly}" -o "QUAST/\${assembly}"
+            metaquast.py --threads "${task.cpus}" --max-ref-number 0 --rna-finding --gene-finding -l "\${assembly}" "\${assembly}" -o "QUAST/\${assembly}"
         done    
         """
     } else {
@@ -894,7 +894,7 @@ process quast_bins {
         IFS=', ' read -r -a assemblies <<< \"\$ASSEMBLIES\"
     
         for assembly in \"\${assemblies[@]}\"; do
-            metaquast.py --threads "${task.cpus}" --single "${reads}" --rna-finding --gene-finding -l "\${assembly}" "\${assembly}" -o "QUAST/\${assembly}"
+            metaquast.py --threads "${task.cpus}" --max-ref-number 0 --rna-finding --gene-finding -l "\${assembly}" "\${assembly}" -o "QUAST/\${assembly}"
         done
         """        
     }

From 10e1780a6094f7d3ac1fcca4feba636328faa9d1 Mon Sep 17 00:00:00 2001
From: d4straub <daniel.straub@uni-tuebingen.de>
Date: Mon, 15 Apr 2019 16:20:19 +0200
Subject: [PATCH 038/105] reorganize quast output and publish busco and quast
 summary

---
 bin/combine_tables.py | 17 +++++++++++
 main.nf               | 71 +++++++++++++++++++++++++++++--------------
 2 files changed, 65 insertions(+), 23 deletions(-)
 create mode 100755 bin/combine_tables.py

diff --git a/bin/combine_tables.py b/bin/combine_tables.py
new file mode 100755
index 00000000..705eeaa7
--- /dev/null
+++ b/bin/combine_tables.py
@@ -0,0 +1,17 @@
+#!/usr/bin/env python
+
+#USAGE: ./combine_tables.py <BUSCO_table> <QUAST_table>
+
+import pandas as pd
+from sys import stdout
+from sys import argv
+
+# Read files
+file1 = pd.read_csv(argv[1], sep="\t")
+file2 = pd.read_csv(argv[2], sep="\t")
+
+# Merge files
+result = pd.merge(file1, file2, left_on="GenomeBin", right_on="Assembly", how='outer')
+
+# Print to stdout
+result.to_csv(stdout, sep='\t')
diff --git a/main.nf b/main.nf
index f8c464e3..22d598eb 100644
--- a/main.nf
+++ b/main.nf
@@ -766,7 +766,7 @@ metabat_bins
  */
 process busco {
     tag "${assembly}"
-    publishDir "${params.outdir}/GenomeBinning/QC/raw/", mode: 'copy'
+    publishDir "${params.outdir}/GenomeBinning/QC/BUSCO/", mode: 'copy'
 
     input:
     set val(assembler), val(sample), file(assembly), val(db_name), file(db) from metabat_db_busco
@@ -837,8 +837,9 @@ process busco_plot {
 
     output:
     file("*busco_figure.png")
-    file("raw/*busco_figure.R")
-    file("*busco_summary.txt")
+    file("BUSCO/*busco_figure.R")
+    file("BUSCO/*busco_summary.txt")
+    file("busco_summary.txt") into busco_summary
 
     script:
     def assemblersampleunique = assemblersample.unique()
@@ -847,6 +848,8 @@ process busco_plot {
     assemblersample=\$(echo \"$assemblersampleunique\" | sed 's/[][]//g')
     IFS=', ' read -r -a assemblersamples <<< \"\$assemblersample\"
 
+    mkdir BUSCO
+
     for name in \"\${assemblersamples[@]}\"; do
         mkdir \${name}
         cp short_summary_\${name}* \${name}/
@@ -855,11 +858,10 @@ process busco_plot {
         cp \${name}/busco_figure.png \${name}-busco_figure.png
         cp \${name}/busco_figure.R \${name}-busco_figure.R
 
-        summary_busco.py \${name}/short_summary_*.txt >\${name}-busco_summary.txt
+        summary_busco.py \${name}/short_summary_*.txt >BUSCO/\${name}-busco_summary.txt
     done
 
-    mkdir raw
-    cp *-busco_figure.R raw/
+    cp *-busco_figure.R BUSCO/
 
     summary_busco.py short_summary_*.txt >busco_summary.txt
     """
@@ -874,30 +876,53 @@ process quast_bins {
 
     output:
     file("QUAST/*")
+    file("QUAST/*-quast_summary.tsv") into quast_bin_summaries
 
     when:
     !params.skip_quast
 
     script:
-    if ( !params.singleEnd ) {
-        """
-        ASSEMBLIES=\$(echo \"$assembly\" | sed 's/[][]//g')
-        IFS=', ' read -r -a assemblies <<< \"\$ASSEMBLIES\"
+    """
+    ASSEMBLIES=\$(echo \"$assembly\" | sed 's/[][]//g')
+    IFS=', ' read -r -a assemblies <<< \"\$ASSEMBLIES\"
     
-        for assembly in \"\${assemblies[@]}\"; do
-            metaquast.py --threads "${task.cpus}" --max-ref-number 0 --rna-finding --gene-finding -l "\${assembly}" "\${assembly}" -o "QUAST/\${assembly}"
-        done    
-        """
-    } else {
-        """
-        ASSEMBLIES=\$(echo \"$assembly\" | sed 's/[][]//g')
-        IFS=', ' read -r -a assemblies <<< \"\$ASSEMBLIES\"
+    for assembly in \"\${assemblies[@]}\"; do
+        metaquast.py --threads "${task.cpus}" --max-ref-number 0 --rna-finding --gene-finding -l "\${assembly}" "\${assembly}" -o "QUAST/\${assembly}"
+        if ! [ -f "QUAST/${assembler}-quast_summary.tsv" ]; then 
+            cp "QUAST/\${assembly}/transposed_report.tsv" "QUAST/${assembler}-quast_summary.tsv"
+        else
+            tail -n +2 "QUAST/\${assembly}/transposed_report.tsv" >> "QUAST/${assembler}-quast_summary.tsv"
+        fi
+    done    
+    """
+}
+
+process merge_quast_and_busco {
+    publishDir "${params.outdir}/GenomeBinning/QC/", mode: 'copy'
+
+    input:
+    file(quast_bin_sum) from quast_bin_summaries.collect()
+    file(busco_sum) from busco_summary
+
+    output:
+    file("quast_and_busco_summary.tsv")
+    file("quast_summary.tsv")
+
+    script:
+    """
+    QUAST_BIN=\$(echo \"$quast_bin_sum\" | sed 's/[][]//g')
+    IFS=', ' read -r -a quast_bin <<< \"\$QUAST_BIN\"
     
-        for assembly in \"\${assemblies[@]}\"; do
-            metaquast.py --threads "${task.cpus}" --max-ref-number 0 --rna-finding --gene-finding -l "\${assembly}" "\${assembly}" -o "QUAST/\${assembly}"
-        done
-        """        
-    }
+    for quast_file in \"\${quast_bin[@]}\"; do
+        if ! [ -f "quast_summary.tsv" ]; then 
+            cp "\${quast_file}" "quast_summary.tsv"
+        else
+            tail -n +2 "\${quast_file}" >> "quast_summary.tsv"
+        fi
+    done   
+
+    combine_tables.py $busco_sum quast_summary.tsv >quast_and_busco_summary.tsv
+    """
 }
 
 
From d931d4cb5da034fda68adcff60d979d895d147bc Mon Sep 17 00:00:00 2001
From: d4straub <daniel.straub@uni-tuebingen.de>
Date: Tue, 16 Apr 2019 09:31:14 +0200
Subject: [PATCH 039/105] make porechop optional

---
 main.nf | 36 +++++++++++++++++++++++-------------
 1 file changed, 23 insertions(+), 13 deletions(-)

diff --git a/main.nf b/main.nf
index 22d598eb..609c9656 100644
--- a/main.nf
+++ b/main.nf
@@ -52,6 +52,7 @@ def helpMessage() {
                                     (default: "ftp://ftp.ncbi.nlm.nih.gov/genomes/genbank/viral/Enterobacteria_phage_phiX174_sensu_lato/all_assembly_versions/GCA_002596845.1_ASM259684v1/GCA_002596845.1_ASM259684v1_genomic.fna.gz")
 
     Long read preprocessing:
+      --skip_adapter_trimming       Skip removing adapter sequences from long reads
       --longreads_min_length        Discard any read which is shorter than this value (default: 1000)
       --longreads_keep_percent      Keep this percent of bases (default: 90)
       --longreads_length_weight     The higher the more important is read length when choosing the best reads (default: 10)
@@ -148,6 +149,7 @@ params.skip_quast = false
 /*
  * long read preprocessing options
  */
+params.skip_adapter_trimming = false
 params.keep_lambda = false
 params.longreads_min_length = 1000
 params.longreads_keep_percent = 90
@@ -316,20 +318,28 @@ process get_software_versions {
 /*
  * Trim adapter sequences on long read nanopore files
  */
-process porechop { 
-    tag "$id"
+if (!params.skip_adapter_trimming) {
+    process porechop { 
+        tag "$id"
+            
+        input:
+        set id, lr, sr1, sr2 from files_all_raw
         
-    input:
-    set id, lr, sr1, sr2 from files_all_raw
-    
-    output:
-    set id, file("${id}_porechop.fastq"), sr1, sr2 into files_porechop
-    set id, lr, val("raw") into files_nanoplot_raw
-    
-    script:
-    """
-    porechop -i ${lr} -t "${task.cpus}" -o ${id}_porechop.fastq
-    """
+        output:
+        set id, file("${id}_porechop.fastq"), sr1, sr2 into files_porechop
+        set id, lr, val("raw") into files_nanoplot_raw
+        
+        script:
+        """
+        porechop -i ${lr} -t "${task.cpus}" -o ${id}_porechop.fastq
+        """
+    }
+} else {
+    files_all_raw
+        .into{ files_porechop; pre_files_nanoplot_raw }
+    pre_files_nanoplot_raw
+        .map { id, lr, sr1, sr2 -> [ id, lr, "raw" ] }
+        .set { files_nanoplot_raw }
 }
 
 /*

From 18f0d7a47546c9a27750989f02b871a3ce942d5a Mon Sep 17 00:00:00 2001
From: d4straub <daniel.straub@uni-tuebingen.de>
Date: Tue, 16 Apr 2019 09:45:08 +0200
Subject: [PATCH 040/105] make quast behave correctely with multiple samples

---
 main.nf | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/main.nf b/main.nf
index 609c9656..997828e6 100644
--- a/main.nf
+++ b/main.nf
@@ -898,10 +898,10 @@ process quast_bins {
     
     for assembly in \"\${assemblies[@]}\"; do
         metaquast.py --threads "${task.cpus}" --max-ref-number 0 --rna-finding --gene-finding -l "\${assembly}" "\${assembly}" -o "QUAST/\${assembly}"
-        if ! [ -f "QUAST/${assembler}-quast_summary.tsv" ]; then 
-            cp "QUAST/\${assembly}/transposed_report.tsv" "QUAST/${assembler}-quast_summary.tsv"
+        if ! [ -f "QUAST/${assembler}-${sample}-quast_summary.tsv" ]; then 
+            cp "QUAST/\${assembly}/transposed_report.tsv" "QUAST/${assembler}-${sample}-quast_summary.tsv"
         else
-            tail -n +2 "QUAST/\${assembly}/transposed_report.tsv" >> "QUAST/${assembler}-quast_summary.tsv"
+            tail -n +2 "QUAST/\${assembly}/transposed_report.tsv" >> "QUAST/${assembler}-${sample}-quast_summary.tsv"
         fi
     done    
     """

From 3de7fccf4457929609a41bf6e39e3d2bcbcf368f Mon Sep 17 00:00:00 2001
From: d4straub <daniel.straub@uni-tuebingen.de>
Date: Tue, 16 Apr 2019 10:01:27 +0200
Subject: [PATCH 041/105] clean up base.config and make sure that (meta)SPAdes
 memory allocation ranges from 64 to 1024 GB

---
 conf/base.config | 64 ++++++++++++------------------------------------
 1 file changed, 15 insertions(+), 49 deletions(-)

diff --git a/conf/base.config b/conf/base.config
index 33fc4af6..59b7ba72 100644
--- a/conf/base.config
+++ b/conf/base.config
@@ -17,95 +17,61 @@ process {
   memory = { check_max( 8.GB * task.attempt, 'memory' ) }
   time = { check_max( 2.h * task.attempt, 'time' ) }
 
-  errorStrategy = { task.exitStatus in [143,137] ? 'retry' : 'finish' }
-  maxRetries = 3
+  errorStrategy = { task.exitStatus in [143,137] ? 'retry' : 'ignore' }
+  maxRetries = 4
   maxErrors = '-1'
 
   // Process-specific resource requirements
-  withName: fastqc_raw {
-    errorStrategy = { task.exitStatus in [143,137] ? 'retry' : 'ignore' }
-  }
-  withName: fastqc_trimmed {
-    errorStrategy = { task.exitStatus in [143,137] ? 'retry' : 'ignore' }
-  }
-  withName: multiqc {
-    errorStrategy = { task.exitStatus in [143,137] ? 'retry' : 'ignore' }
-  }
   withName: busco_download_db {
     time = 4.h
   }
+  withName: phix_download_db {
+    time = 4.h
+  }
   withName: porechop {
     cpus = { check_max (4 * task.attempt, 'cpus' ) }
     memory = { check_max (30.GB * task.attempt, 'memory' ) }
     time = { check_max (4.h * task.attempt, 'time' ) }
-    errorStrategy = { task.exitStatus in [143,137] ? 'retry' : 'ignore' }
   }
   withName: nanolyse {
     cpus = { check_max (2 * task.attempt, 'cpus' ) }
     memory = { check_max (10.GB * task.attempt, 'memory' ) }
     time = { check_max (3.h * task.attempt, 'time' ) }
-    errorStrategy = { task.exitStatus in [143,137] ? 'retry' : 'ignore' }
   }
   //filtlong: exponential increase of memory and time with attempts
-  //64 GB starting memory might be sufficient
   withName: filtlong {
     cpus = { check_max (8 * task.attempt, 'cpus' ) }
-    memory = { check_max (256.GB * (2**(task.attempt-1)), 'memory' ) }
-    time = { check_max (48.h * (2**(task.attempt-1)), 'time' ) }
-    errorStrategy = { task.exitStatus in [143,137] ? 'retry' : 'ignore' }
+    memory = { check_max (64.GB * (2**(task.attempt-1)), 'memory' ) }
+    time = { check_max (24.h * (2**(task.attempt-1)), 'time' ) }
   }
   withName: remove_phix {
     cpus = { check_max (4 * task.attempt, 'cpus' ) }
     memory = { check_max (8.GB * task.attempt, 'memory' ) }
     time = { check_max (6.h * task.attempt, 'time' ) }
-    errorStrategy = { task.exitStatus in [143,137] ? 'retry' : 'ignore' }
   }
   withName: megahit {
     cpus = { check_max (8 * task.attempt, 'cpus' ) }
     memory = { check_max (40.GB * task.attempt, 'memory' ) }
-    time = { check_max (8.h * task.attempt, 'time' ) }
-    errorStrategy = { task.exitStatus in [143,137] ? 'retry' : 'ignore' }
+    time = { check_max (16.h * task.attempt, 'time' ) }
   }
   //SPAdes returns error(1) if it runs out of memory (and for other reasons as well...)!
-  //exponential increase of memory with attempts
-  //spades: 40 GB memory / 16 h is sufficient for low complexity Illumina-only metagenome
-  //320 GB is not sufficient for complex dataset, also >50h!
+  //exponential increase of memory and time with attempts
   withName: spades {
-    cpus = { check_max (20 * task.attempt, 'cpus' ) }
-    memory = { check_max (512.GB * (2**(task.attempt-1)), 'memory' ) }
-    time = { check_max (120.h * task.attempt, 'time' ) }
+    cpus = { check_max (10 * task.attempt, 'cpus' ) }
+    memory = { check_max (64.GB * (2**(task.attempt-1)), 'memory' ) }
+    time = { check_max (24.h * (2**(task.attempt-1)), 'time' ) }
     errorStrategy = { task.exitStatus in [143,137,1] ? 'retry' : 'ignore' }
   }
-  //spadeshybrid: 64 GB / 16 h starting memory might be sufficient
   withName: spadeshybrid {
-    cpus = { check_max (20 * task.attempt, 'cpus' ) }
-    memory = { check_max (512.GB * (2**(task.attempt-1)), 'memory' ) }
-    time = { check_max (120.h * task.attempt, 'time' ) }
+    cpus = { check_max (10 * task.attempt, 'cpus' ) }
+    memory = { check_max (64.GB * (2**(task.attempt-1)), 'memory' ) }
+    time = { check_max (24.h * (2**(task.attempt-1)), 'time' ) }
     errorStrategy = { task.exitStatus in [143,137,1] ? 'retry' : 'ignore' }
   }
-  withName: quast {
-    cpus = { check_max (2 * task.attempt, 'cpus' ) }
-    memory = { check_max (10.GB * task.attempt, 'memory' ) }
-    time = { check_max (2.h * task.attempt, 'time' ) }
-    errorStrategy = { task.exitStatus in [143,137] ? 'retry' : 'ignore' }
-  }
   withName: metabat {
     cpus = { check_max (8 * task.attempt, 'cpus' ) }
     memory = { check_max (20.GB * task.attempt, 'memory' ) }
     time = { check_max (8.h * task.attempt, 'time' ) }
-    errorStrategy = { task.exitStatus in [143,137] ? 'retry' : 'ignore' }
-  }
-  withName: busco {
-    cpus = { check_max (4 * task.attempt, 'cpus' ) }
-    memory = { check_max (10.GB * task.attempt, 'memory' ) }
-    time = { check_max (4.h * task.attempt, 'time' ) }
-    errorStrategy = { task.exitStatus in [143,137] ? 'retry' : 'ignore' }
-  }
-  withName: busco_plot {
-    cpus = { check_max (1 * task.attempt, 'cpus' ) }
-    memory = { check_max (10.GB * task.attempt, 'memory' ) }
-    time = { check_max (4.h * task.attempt, 'time' ) }
-    errorStrategy = { task.exitStatus in [143,137] ? 'retry' : 'ignore' }
   }
 }
 

From 8b6d1c657555c4f91bf5bd77fbd8e31ad6a48e35 Mon Sep 17 00:00:00 2001
From: d4straub <daniel.straub@uni-tuebingen.de>
Date: Wed, 17 Apr 2019 10:05:54 +0200
Subject: [PATCH 042/105] add code for -profile test_hybrid

---
 .travis.yml             |  1 +
 conf/test_hybrid.config | 19 +++++++++++++++++++
 main.nf                 |  6 +-----
 nextflow.config         |  3 +++
 4 files changed, 24 insertions(+), 5 deletions(-)
 create mode 100644 conf/test_hybrid.config

diff --git a/.travis.yml b/.travis.yml
index 9d844bef..66a165c8 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -35,3 +35,4 @@ script:
   - nf-core lint ${TRAVIS_BUILD_DIR}
   # Run the pipeline with the test profile
   - nextflow run main.nf -profile test,docker
+  - nextflow run ${TRAVIS_BUILD_DIR} -profile test_hybrid,docker
diff --git a/conf/test_hybrid.config b/conf/test_hybrid.config
new file mode 100644
index 00000000..6e7c76a5
--- /dev/null
+++ b/conf/test_hybrid.config
@@ -0,0 +1,19 @@
+/*
+ * -------------------------------------------------
+ *  Nextflow config file for running tests
+ * -------------------------------------------------
+ * Defines bundled input files and everything required
+ * to run a fast and simple test. Use as follows:
+ *   nextflow run nf-core/methylseq -profile test
+ */
+
+params {
+  max_cpus = 2
+  max_memory = 7.GB
+  max_time = 48.h
+  params.outdir = "./tests"
+  params.temp_dir = "./tests/tmp_dir"
+  // Input data
+  singleEnd = false
+  params.manifest = 'https://github.com/HadrienG/test-datasets/raw/mag/test_data/manifest.txt'
+}
diff --git a/main.nf b/main.nf
index 997828e6..a0f0f92e 100644
--- a/main.nf
+++ b/main.nf
@@ -175,11 +175,7 @@ if(!params.keep_phix) {
 
 def returnFile(it) {
 // Return file if it exists
-    if (workflow.profile in ['test', 'localtest'] ) {
-        inputFile = file("$workflow.projectDir/" + it)
-    } else {
-        inputFile = file(it)
-    }
+    inputFile = file(it)
     if (!file(inputFile).exists()) exit 1, "Missing file in TSV file: ${inputFile}, see --help for more information"
     return inputFile
 }
diff --git a/nextflow.config b/nextflow.config
index 5e741ca6..9a52c82b 100644
--- a/nextflow.config
+++ b/nextflow.config
@@ -50,6 +50,9 @@ profiles {
   test {
     includeConfig 'conf/test.config'
   }
+  test_hybrid {
+    includeConfig 'conf/test_hybrid.config'
+  }
   binac_smp {
     includeConfig 'conf/binac_smp.config'
   }

From e6aa28a8973d70b8be20ee8e80fc39d881ae36f1 Mon Sep 17 00:00:00 2001
From: d4straub <daniel.straub@uni-tuebingen.de>
Date: Wed, 17 Apr 2019 12:56:23 +0200
Subject: [PATCH 043/105] fix file staging that broke test_hybrid

---
 main.nf | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/main.nf b/main.nf
index a0f0f92e..b075bda6 100644
--- a/main.nf
+++ b/main.nf
@@ -319,11 +319,11 @@ if (!params.skip_adapter_trimming) {
         tag "$id"
             
         input:
-        set id, lr, sr1, sr2 from files_all_raw
+        set id, file(lr), sr1, sr2 from files_all_raw
         
         output:
         set id, file("${id}_porechop.fastq"), sr1, sr2 into files_porechop
-        set id, lr, val("raw") into files_nanoplot_raw
+        set id, file(lr), val("raw") into files_nanoplot_raw
         
         script:
         """
@@ -353,10 +353,10 @@ if (!params.keep_lambda) {
             saveAs: {filename -> filename.indexOf(".fastq.gz") == -1 ? "QC_longreads/NanoLyse/$filename" : null}
             
         input:
-        set id, file(lr), sr1, sr2, file(nanolyse_db) from files_porechop.combine(file_nanolyse_db)
+        set id, file(lr), file(sr1), file(sr2), file(nanolyse_db) from files_porechop.combine(file_nanolyse_db)
 
         output:
-        set id, file("${id}_nanolyse.fastq.gz"), sr1, sr2 into files_nanolyse
+        set id, file("${id}_nanolyse.fastq.gz"), file(sr1), file(sr2) into files_nanolyse
         file("${id}_nanolyse_log.txt")
     
         script:
@@ -381,7 +381,7 @@ process filtlong {
     tag "$id"
 
     input: 
-    set id, lr, sr1, sr2 from files_nanolyse
+    set id, file(lr), file(sr1), file(sr2) from files_nanolyse
     
     output:
     set id, file("${id}_lr_filtlong.fastq.gz") into files_lr_filtered 
@@ -409,7 +409,7 @@ process nanoplot {
     publishDir "${params.outdir}/QC_longreads/NanoPlot_${id}", mode: 'copy'
     
     input:
-    set id, lr, type from files_nanoplot_raw.mix(files_nanoplot_filtered)
+    set id, file(lr), type from files_nanoplot_raw.mix(files_nanoplot_filtered)
 
     output:
     file '*.png'

From b95555fdd63f8e7a6a523078002a3e6dd8911ccb Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Hadrien=20Gourl=C3=A9?= <gourlehadrien@gmail.com>
Date: Tue, 30 Apr 2019 13:36:26 +0200
Subject: [PATCH 044/105] fix nf tests

---
 .travis.yml     | 4 ++--
 nextflow.config | 1 +
 2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/.travis.yml b/.travis.yml
index bf46a012..f749bd26 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -38,6 +38,6 @@ script:
   - nf-core lint ${TRAVIS_BUILD_DIR}
   # Lint the documentation
   - markdownlint ${TRAVIS_BUILD_DIR} -c ${TRAVIS_BUILD_DIR}/.github/markdownlint.yml
-  # Run the pipeline with the test profile
-  - nextflow run main.nf -profile test,docker
+  # Run the pipeline with the test profile(s)
+  - nextflow run ${TRAVIS_BUILD_DIR} -profile test,docker
   - nextflow run ${TRAVIS_BUILD_DIR} -profile test_hybrid,docker
diff --git a/nextflow.config b/nextflow.config
index 4ab635e1..f6fc5ab5 100644
--- a/nextflow.config
+++ b/nextflow.config
@@ -56,6 +56,7 @@ profiles {
   docker { docker.enabled = true }
   singularity { singularity.enabled = true }
   test { includeConfig 'conf/test.config' }
+  test_hybrid { includeConfig 'conf/test_hybrid.config' }
   binac_smp { includeConfig 'conf/binac_smp.config' }
 }
 

From fd506bb8d1fd4610c522638bbabb13e96215563b Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Hadrien=20Gourl=C3=A9?= <gourlehadrien@gmail.com>
Date: Mon, 10 Jun 2019 10:36:57 +0200
Subject: [PATCH 045/105] phix and lambda to assets/data

---
 .gitignore                                      |   1 +
 ...CA_000840245.1_ViralProj14204_genomic.fna.gz | Bin 0 -> 15296 bytes
 .../GCA_002596845.1_ASM259684v1_genomic.fna.gz  | Bin 0 -> 1888 bytes
 main.nf                                         |   7 ++++---
 4 files changed, 5 insertions(+), 3 deletions(-)
 create mode 100644 assets/data/GCA_000840245.1_ViralProj14204_genomic.fna.gz
 create mode 100644 assets/data/GCA_002596845.1_ASM259684v1_genomic.fna.gz

diff --git a/.gitignore b/.gitignore
index 148e1af5..636ede74 100644
--- a/.gitignore
+++ b/.gitignore
@@ -6,3 +6,4 @@ tests/test_data
 *.pyc
 .vscode
 db/
+tests/
\ No newline at end of file
diff --git a/assets/data/GCA_000840245.1_ViralProj14204_genomic.fna.gz b/assets/data/GCA_000840245.1_ViralProj14204_genomic.fna.gz
new file mode 100644
index 0000000000000000000000000000000000000000..2fa0ed2a69731813047dbb6c5e122e25934b1cfc
GIT binary patch
literal 15296
zcmV;xJ3qu9iwFP!0000015~|Dt}RQFo!5_1)BprR5~PMY0t^Ph0kv%cNisnRV6g~+
z^M_9O`aX+YSy?adyXWlK5gzXLvuxS2|MUO;pa0kY?SKD2{=@(BFaOtn`yc=O|NiYi
z{^LLW{m=jVkAL~^|Ia`E%Rm3+zy9N2|KorD$N%&%|LNcU^}qkufBuhu{>y*)=YRXR
zfBolk&gu8R)jj?G@yy?U{{A%e_rEo#{{D0Ss_&1g-rvr@|NSHXJ=68~lfOUK^Y@YW
z*YolBvwv6qyL2w}{^Pv=c)#%PPjm8v_Xhs1dq4R1zxVeiU-P_wo4>d}y<hxZ<NaX2
z|MU01?+v{FaxYcyMel_>cYeR;?+gC!>F>kOdq;mi==W~!Z{Aya|9P+4JQ?@&e)#WK
z-k;9>(%f~{=HA!y_knf|?hVboOdj@>4>|9L@1x(_e81pb)O*GE7jip)fAhTe((S7J
z{oq}I_uskI$+hNQ>%Gd~FPyvl?<>3)zE?Q!zyCeilTYP4=3b`m&3A6?eZ<Loc|UNU
ze(ElKUQxEj`z?8EPafyJ(A;0YU-d3}zyG-(ct2U$T<)K&cjbHDw{styx14S9eoMVe
z*>{oJ*PQqI-p0Me@IddoJa^OHCpqum&-;H*=bg^I;mP~W@@7}ujl>)1?D(#`%UHW}
zzy4jS`+lFiq{)G~FZEr6`!eohpX~3+5*F|3{`z~%_wnuv?z^ymFXevqebijJPcDC7
z{NHuEvf1yFzrQf~!<lBAW}WXV_`BfUd_DQPdtUC#&YS$ZrT2;6J(_R0C+6PF`v6(O
zyvEA+%ya+ZuJoFMe%ItKMSlB!_x<gPO=#~RkKMV?dnDeC%@J+h^m_|g`78pqrS9V1
zwVS-2drRFc|HNi^!Mj-ZF6ypMcJp1YShV-r*)lB9O#X%&=ls`uQu@BmbDy$V%6A98
z_mW?|zjN+ec;DasO5fEz0y&fS2hY2J=RWb=&6uOxoQ`J8-et<hyc^-3n4FL1W%oUC
z=ZVF*FY&&)iGhw0zkBc=`+JSp(io-u<$dIPMq^<9&T_uAHF<Z7@U_>d-R)m7lR15N
zKiuPowXE!+n6~Hmdh^S9Gjp&k*=u@@>sC(gnq765>Ym-Z@;%Sp5ne7Y;x68OMa3sD
zICYQqU5B+47~PXSch@j~IoZ<pW^z8+kN2kT9>{VxelUOPGu@wMzi&B?<nGG6g!gIV
zX4+f5&vlRO-PGk@+(p2K_g(8JEB6|i`_<Q`=Md$5p4g~Z2mHyso5>|viu>R9cc1$b
z@3zLI-VKrCkUNTVi$N+^CjL7+CwAfb&DS`*_j_GxA71~kz??ho<pvk`wVYUytS~1x
zxA*s>XMCW08tQKB`<yvicPa9yIOV%3C(ktrB8&pwD?Qf&b{6{H^IhgXxQ*TybrS2u
z{bjS=H^XDI=6?p3pXUbLeuMSDpU>3?1H=2R*jqxAYWK~PQ{`f054)9EpseRz*?WZ|
zT)GB)ur1j-@3u+Ql>IWqb_q4!*Lp43y<GyI9Fe|1IzOhna{d!J+%4w2f89oct(+l4
zs7my4w>M7Z`q1JGW<CEN8NY}z&UfZhd6fH3b2Nr1kr$dk>^?;{;kjq|o)DHUul(+f
zjz2oQggGwfwc<Ie1dj(Vf4};@!`>yn?>=jBH}qZGb%y7<!)w<XXL=7oHqOrl$G4F9
zBwP3fI{n3%2`uJ5$Nl$f@NXb|W2gHSe?NN?Gka)b3h@<<x5n+Z5pCxgG0MaCx$F98
zU~@<J!!ZEa8rd`FMksX+_PK6v4Cu0P@_5}^p?4qVd+rqu9;@y-*bTxy{K8+Bi&Hto
zBOJXpvgueALJ(g=HHTfNe!sSE<P=jeCvN$sCGWN&gv^N{;_fwJclDdVw>N;jTVu5T
z*P-X|pT{HCF#}0Xva51*4S-pJ+!`l`txl?PPvS!?JlEZj4CEYC0u%gOw>J4CsLlfr
zI<glMIV9rciLdb@t135)-_c1BxT8w+dAC1d!@IFHp7-5Ota{^C?g`HkYLDB|4<w7^
zBxUElKYFgyy|>*w#vz7JY|H^<Z=K&5pQtw5=U{jru0!BlSe!FjD?;IW%KG5JPNL(w
zhNg+#a-&b8!K4bMVJMG6Zu#uNN{)3Gj)<*GpL@(xvL()j?f3l@A85|WsWq@!ajzaL
zLn`rxa|uuivyotF2m{FV?@DBI<G;JK`uFOS6!6~CgOxNkxaT6Pn8i4F*~T^DLy2LJ
zr#>#`&6lr1NUlqamXI1-SK~M3hz*(5`7W0*A@R=ZMmb$gR-C9wL*m<HGZ$$KE;jqW
zjd1V#dVnbId1wwSaYq8~j;q^*r16b8OXK@YNdA}JCg37+=eFX{4^Y5s1CmEJ9*Xcr
zn?=f%t@xrM9NHVCogeHb*==6KStwy**@!|C-rP0Jo_l`m6d_77SmTQ(UL>w<-$8LD
zqRabreItiqk;!ixZ_r>0Siolg_L|eADkjePtG^fe;7N#YuO;H5g9A!p{N?)Cjb<mU
zOHQ!*Bz`^EtQbino%_e*8*PH4O15sClhC;Ue(o9)ZfETqpbJ;igc@h??LsIqfx|5f
zOVJkya;ven$utkCSz^VBSHeQDy0LtTh!1Y?g$RbluC?VPq7!dP_?aBB_))LRGxxdK
z(fd+j)pUr7E1HxsmW0?NzBn&H$JpX|L20h}(tn-uSk3WH$1RyCVD{K^ED}@#_as_+
zf~(wrxl4xgYSTlq2Myu0Uy@+$?(7mUdlR2TzX2q(K7kZK-PR-y`QvWH-pdNx-DCTZ
z9Ax8@AOZW8E{a&Njk*trfEi)({A;vt(9X8tEG6D9P8x<i*SdgR_U(tEG^ZFe@VQp#
zu%<C(cj1bV0(185yU8|Abee<2LBGH7@Me!WQ?mOasfzIh)4MTOvkd@Lg<nrHqPwx%
zxVI7ra>tu99KLCq^e<L?4?dVGY2njkK;k9rFfa)56B1A0v!rQrSf;#kjNQc#F+Sr9
z9RiigKNf!geYz&@Gm(Jq$@<=neM7Dht^6^09q^*eh4Z9@)Y@X40U6_ama~2n__dRN
z<K<@KU^1-4V&p@_a$T)!eD~tBDQK1POgwHSzAq5y0dK#jk61gQU<Y6T=6M>EcF_RN
zYA2exx0KulR3Vp6h<o?)jXg)#^}d0psAG^V3Exq$7^B2Q;ulvs<nsk^F5(;G_z0T;
z+ntS$jnaoTaW@VCP;WdIK$rmf(Z?ie0CK#0XQ6{r9<)SVn`5M0ljSck)~6P73-E<6
z2WG}49s@Q4mTOU`jmMkU`aZ{#v;l<lxMi!gDSbE!Nh0&b1(g38!Xv)@SOGZ6&f>s{
z>V2)gx3N%=my_sfeaKzY0n}xil%COV4Uj;(YIHflzQleKl!_@oO*T9S*!uuW9P1l+
zXo_AP@Q+`-@dkiO%=g6{<}=U<WRiKr#UDymglrUc$bkrglHbPKw22@1I52jbkn#@A
z5e499%(v>-<aO5YzRft=WXJK|xI=^3#H^hZ+?3B69wG4DHN-~!lyk-~t^*Q!<9tPx
zVDkJ#v3EIM7qCUNyGa&l<=_-Z){VQ8aC<(>c=WDtC-`~Vcr+Q)(OMs4x~&HqS+5_r
z9+6bFlEXhdW`a8sw8SC9w(VMAy<+tOZxWa$;~eACX9MZumh-jD7hx0N1tBOi@kCz<
z(`rD2p5FWykUHhLWVG#h#M8_|OYJg}vtN+nTAKr0Ugr3gwa%9j19}YWMD5C#By^W-
z;_2D2uP6}3w^^96G5{t<ic#)cfkumgn6FyLoA_E>y#};lt`Sd;^;J@^<u{zaHj7|s
zNH)m7*BwkiW-{opD)E^Za)~6GS)nH%r2IC99U~kpg4h<xV!XW^ur>+Ddymhx8bdjE
zvk78wfuj_Y&MXYR-W7}`<><Z^+kej)s#>q;21}VN_reqVUHK_TJ*AE(3w^EdU2cQ9
zSlyUaOQm_Jb~Oz-h;!pro<pn&!eFiiS}txk3&dMvh^ZN`H)2E&RqR+<(-Co|Bopio
zQSJh)Nw}3l#aIqJPoN_c08*}6uYY<`eZscfF9~ZZHfImwU!H=&oFDjNev+QiSz-#=
ztwMqW`f?0@=h-u9ybK6j)Q+tq;$sp*72wntzkse!JuUsBDJ}Mf2)Q_ANNq&E^@HvS
zxr5Wp^?H<YK6v7MRjNESMc5&|svJD)Nvty!<>vr=<gCLLu+o58O@V<xILQYEg29Y5
zFYk4ulYDZLU0Vup>k`JMmT(;lm8pQ}09Y;6<IiHpQ)X3SCYtv~fx0|-L^&VUkqyM=
zB;tAF!@QxqOCcUZ%%T7s%21BrF_+Zolo0y6i63zz%N5pz_LHs0*<Taz0w%7l8eAuZ
zak1`ZBuW-SY$%@ETPZ&W_EO?$BBx3{<?e;%LZle?O<8+8up2D&mUNhWq!vr5H`lX>
zby#oVyr#D78z(sV$@_Ba_u<OK?zM*_X(qcLY7T($0d&Ty#Mep>>5_mE=XN*td=Z1W
zc*pHr<6%zVQKI%sS!VNB^11uSfEAt1w$;A{@2P)7{<(1?#EYQiRQnGA)icwUXsmD%
zV&NF?_S}G!eQ->iFe*70@2@BEDN2Mn#84#)GT7%5S`hFa>M!W3g6YwLoy*iO5r>t|
z(%09=_$X_QfhUN*8dT^aOE*YfzSzb>MQMW49ree=Hc*2o^YxbYZ>am2y@J0dWH#GA
zRuM;_qOb(GW+UUx6W(kTC(&T{IhQ4vNhp&NVqO4TxSoZ7KpyY5fh94bnIn-PWzyQx
zo2W>!wc89}s52y~dG?cF3khK1OwG?*UaYQM7%~w9{p|0@$|Sa*BME;Djt$jbUw%B$
z+Nk~^BUL{l=8LD6r3|vj0WMv_9VwIW@;=6`o4k<&CLcE4$<yModi`B*y$%ZC7*Ykx
zu`>LyF-k^Heb&vdN!=~`r3L;ap|tgR5c@}J*ufT)vNw3F83mtAItqH)TSOc~h0_nD
ztIbi`;&6dC9+-;P#P^VnnnuAep`vsR?C6O_i{s0P4UXmErBy(<;v=TNM&Ykt7JHl^
z;@L6Tjb%aVzPH{$s>VkIJ_ZK97u?^zmgbO}@&WcI;amM+iifHr#xq!D-L&F9*K9$J
zB4&7IgIQ#!ocXbhpbUZtN$TGiyYM0KTLb~BHvTPb#{nK#Uz|z?4&>%RPwB(@EKi4J
z76F^tB*4p14LmUBYO9|8901La5cZ{++!?tVkiYz-+IosW(cSWor6InJ$j+bv2dz2d
zkoFQ+J>8!F_mK$+ED8>!Y1kXG;7Wi9D}oVS>>SF`A%cy^bMH%(6l<|o=%JL#dN|%C
z8=&A4li_`&rZUc!>;$nTQSkmfm31x+efhD>Qzpg1c=_0tr^R-{Jvvn2OT$|j0XB9~
zAGFxTCeWbe7g(k;6&Kx&ag*p!&rg&U^YGnK%6EG0ZXtbY;6x*1RWlFwNL&LrtuVuD
z58Sd6V!0EloS%qlpk|Q9J;z<%Ngk>BQV?aA6=^aqJ!ZHdI_5%m(Qf7OqtRa{EWI>^
z*JJ(scv^am;0E-GNK+T<rzDR7vqD+;Ddinsz}kh6%E&rmOByHD-}gh|U!1nq|8F8%
z6aYyei#P6x2I16fvchuKO;jO#N)`!39(wm#j!V#~=TIAw!zR^CmzI(h`TMNfoZYSU
zA&w1~xPK7Nf~?q<h7P`AV~MQ4l>mZ#J=*2l#6FWB^~cdUTIbnvcpFq1H=L~x%b9-w
z)%aqV_9+S-2z0!~ts&%r%k=wDrb(Fhd|AhUhnB2_+6-yT8?lVDA+yW~^6!G@a4cIH
zPm+huT6V}N21rc~I9tXd6?ikL0KLJekLhSFjcV#+7g1Ouf=EsVUT%}gbCxlkRV-*{
zU+tO{@x&J&D>oYcuyHujRiV1omU#_E(-|XaQAZ<lgvZL#P0K*Ltq>n!iFT6ZS4z!3
z+Q4-3EE|pD{g3Io1sY&-a$yYs7E3ac4x%Z6F3=Z423h=W(}zAB3490c>$06^0h~Cm
z*&YBgs;_u`(W9rAlZ)fw*=xD0wO}d>u;p2}$IENK!4gdh0sCMK^B#7Ot+OJ^PL4eb
zy8Pq|x8if{dl}S18yFu4Vh7y_DOUyF+nRI2yS&aPWD;nOrgp&H<XC1J#%Kk&L0lMj
zAusqb$1pBRT`Z8p(d~ja3m}mG3<Z|Y$-L!{T1t&z){*2DxF`lT={{{4$M5oSHib-I
z0KRQ3wWin)3Gf8l`(cs+!rb<n9~L1EQxE+}_>{JFprHOH5J@a$B+xuZ;1}4Yl69W}
zW1{0D8RlSXlUD$l*Romwc!34VWO>5wJ3s8Ff;a{j`3N%QJ&`L|5D1LVW=Nttsi&k^
z^Hq7IClsZ9+#*|P!;FOy50s@F@IPmpi6$9D;8C|4QP=i=(fHOsyu9OHy$&e#lp{na
zXh|7y(#LnSpaXvYA=?sk@de=$%YvH_(`+Sc8vrS3>sV<9TmsWr@V*=30mS7H^;boK
z_i%`C#^#ju*7afK19EorWh<Gz_=W_}^jt#vI5aL&dA$2B$wT`+*9%a2ye_ar_N+)G
zgiJi7gDtv{K4yIuiRRFYaIPmFX+?QA1`Tr7WCvMB)3~Pei6455B4M!Q${gx!qb1y7
zg@C){YJs+OEh-|!Wi$Z#OXCV}>R`nPta*eVV^i$0CvlEYj7e+w6Sc2DNN`aN(>HG9
zoB#-#*0M=#qyjZ*T85<hZI8kS4AW;ba7Zss{cMXT&J6cx_^FkozlL3!CeWL^#^!vK
z=m!gOFsrq2f<)kiLo#xSSI0%PGZo8o+0F!hBee~Pg2Y;Yp8T194orun!x)CUpB^25
z$~W+vY>nL<@X$g^lQbQDMU&K4NyCHPHYSK<ha@@Jl);l8s(zYTlddPm!fe-*S7$3C
zR+dn0><?OUnZ%U-pope$cv7DWp>d05kQ2^X0|r6iEX|(QVVuB6ddB-b4gXAn3!Pg>
zI6%hjZ`q~objad+s82eqPut5y;N105C-QcRR!18)57eyTyM2GR92}f0d(-m6l1=(-
zYDkUnhe%b61JZwaL-A*;I%B1IEAe#yO}WM*C*Z%58TtdrDw$NBAEZ!>w^LKiT1t~2
z!>8L8K>JDW5_JqLzA&8~1gx_JU=dSTji1YsjZNnT4(3TYta|Kx1H;Lom0m?7P!S%E
z^!c2JKmr9<1O~&{@HU$On|Y=Q5OY=*%C~EV)P|7iS&K_af*y%2UxKB4-C7$}6(-ci
zxodo`xIy?FqM3A~*7?YvmL?9=k)F-vy_wC#o-a%Mp?&hAsyzXDA^>r^GGPYK*B2g^
z=|@YU379TYz=C7Rj4$YI_gxC0h{t_K`snLxj;ZYw2q=$s0lhjuydc=<6gYtU-f9J9
z;4F6PTOaO|b6vm9`5zOBN<@`KqKhBGlK*NwO+sQFC16gMwKNNov7xpugPPb4w-lR=
zG><JYW-oZW{?I*pTV+y1<Hvv#5|=s#X~u^=yf!j*04leOt`Ee^35fA=MZlK!<Zh#J
zEci#m*aqS<F0$kQhI7juo{fEB2d)eO_=XV2tYhk*CYcJ2|1+Z7H?)mWhf2hJg?Qrg
zuK+*miz2^Zb6nYvC85@ys%3PZBUx-;7s?7y$?u{)vhL23AgLnZbSh?$RLG}3Bxl>M
zB3B;b#5yC;YdCx#E79hs+GYXZM0E}1k)KB>^@p!Hc8c4OL2?2_NS6ap8!vj#(L<s2
zXg)T`pZ;O+Y=tInb_7e3B@8b%74VUIV4WSJ8f=G@9x&0>A19lxF9`#*oZsFGPH1mg
zBQ;>0r?LQ)fq^u}z8+2sR&)BH-{ObCBZh5lc%Tqp{Ed|{Z*J0U=}D8?XhbGl-BWJt
z-cpG*+mI(|_#)j`_2Fij_kxtr?S$`AErw`-DhF|q>GJ_LK9B+vho*noj@x`jc-0#3
zcKe15hw{X>>yyA)&VG#5WE}kTRW#YZEnf9wfEJw33HkV+kGe2ex}g^+Ai}cIw%uQn
z(pH_^aKnjwRzHPO3wC(jV~DBCt4Iz}^py{$8)cU(VX@uL5fLJwH~anV3GVY1ipuQ?
zr~y)bAGw90WCitEMvi(Uc1@M&p$a|rFvLT=-}CI{to|Cz&c}l)n<pg|6X`;ZQx|2s
zkdn49vVTOFQ9c>aYLWWsFAAduUuYs+0#8z715qPQm10a0X-lbvCJK07Slu4B2lHtv
zWOL}@VL?`B@p_1d^bE_7qO&9_M#N=olc5eHE9#aKFtFpY*~Qx!{?Dk~a&Tel#%4T$
zuS;?@lY}1)ZzxdBS!}3eZa;L_&uU2xt4(h*P`HN3?RP%#K1b?gVM6^?Z^GIx<_JRH
zv5i?F9y)Pawk6*hNBHnPG#S|<UZ_pf*YB9fIhKyx@uzulf#U~`2-03bG>CqUr~=%i
zf%>fEP0@)LC9j6W@^VF#eS$u{WJGrxNX7Ej2qE{?s1lUQ!&BR??8UbIVf$*q^MEEm
zh^fZCUzFR70Mb*O)}yQqU0S<;kUrzRQXve$Ys?FSGtFy6XtiQnN@gr*H@!U(4Ry4>
zB$Q-UM|%-Y*&+5f|1d3@<JVtxDWC&&mu$=43jMjA{_Ga~LNBoVLsL|0BMaKH$1T_o
zB;>ioT4IKeS_Rj1D`d82P0v^2IosVX!zIP+XFime==8&p=%}<Nar$%XSf`%`t=NfM
zY&~07%1)ULlU8J@&tfaKF-th;um62IFv})6eLndKYHW&X)Rvy@PXTf4Q9kP+5Ycv8
z_NPQaSr2&hs4*tXBRUstEC+&<HoAw0=P+^=%l_n}fr>{?6*q<QU6&}w{d&X=?2_9%
zHTSrY2Nja@(=Q8vh>KSu=vo_aL4N#MTUi)!Bg8U*I=c;isd2I14<(`5=JdAEW72$<
zeR<S@un`kxLir<DreCmBjT{He3aJ#coaDqgBmF~a&(}EifS~^<V!3*~1vNB&jxQ7p
zYDsqJp55Dj>>M9Vwi|`T56cze|11x`Y(@RPitf>eoqmA75K>wG7r{S52ZmaKMD02c
ze0a>4hnfSq@S2bPa2<5NFe|bq0n})ZYmEcp0L`Y-n+nB;CADp>mJ;1ZKpKhDkz{(t
zlWG@q|41?XSPN1fwPBXwPjln0z%y96pr#+*e{XXPxTV+3*{Z9oOA<wM%_rYQQ1{c?
z-712f_4^$VGO_k2AUUOa!E4wbjw>)92Lr(jw&o`#u&10DO^Cn1QEw~6Br!{tPh9g^
z<xl-9>p^)v$vVD%(6PEdeIV5Tbk_Rgwu=Mn0o;m{oG+}|TJC$Dr+5!p0)7=Bv9|Hy
ztVA0xcx1At)=(?M^~CSWYNtLrmLKpaC_rok!|AX!oO06%i{OiI%icL?K3i5lH2)(j
zmajA!wQn^R?G5L~^Z4%`IYK2!l4XG;PW4X!FwkT<Y-tTccx*dtA$o@Rioh3q)=|&!
ztmdmap;^KUnc8-z4xA)(HJhM&%&e3Ug@Q&;3;~ZyD}+9fj6I>cx?`)2QL{9%HnXPu
zR$okhtGBj_MjL2ZYDvJxRx~=7B==acm0toLT}Y{n-D}(D2q>bQ$~j+=2uk{_cmURk
z#ds7x`gnC)|1R}Hq>iZ3SAd^o6fQ9GR?V_lr<X{%VTTOZf>~x458aWZZbxJl>7Zxr
z&esNDWWUXHDm^I$VZq2=QX0+fxqk@Bm2)#bnw5u3sy24ZRQ{jcDX_8IKj>US$)u}3
zxcwbSASpat^5muvd;Or;`?DvwS?NO1EJ*e+p9CiyAv+|@b|wN4LzsS@RHH8#B0`zi
z?uC1m4+lyzy`T4&O3BMgW_>qy&Ku{`?iMWJu-?c15*S42)xf5U-K!`x@)H`Qr9~eU
zrMJm&6<QSpRKYT;jcBG`@=1-Q2i6>SGsuu=I*@VWnF09H7K9Y`-G{aprRCKM$RJ^T
z7ZpNYQ!<4d24{L&b-MSjD#6lL7nR83!4X4c%i)>q14m)ycURxHj4dc=Fhz;}*gj13
z64FB1wj=9vy(v;GCnz!Y$C^RNA!d!qj(fxOLA&&JAr@V@IR%$4=&h>BTeKu0J7bc~
zN9uga24ev_$v1LYky1fenU{dJi9LWJSB+Gi@rF2w_RwVOOp5+%2C|78GzoR}9G1L}
zuo^j(11viz1?A|K3X9D~gY_}XvqiR*j;gbFU+w`4hvC}*DcI{<JkHQ7@FP<Ln9Vsr
zZbi9O*eq9V)sANI{2xiQL$XF`w$XrTqJZwTNjV8=^NbX8VNz&M&mR8d-`MH01U-qe
zo&nS++a__=s6GQb8KDN<9IPe?bW>8gKE1d!Za-_OgZF5>xP?<kudW4Z0!6x429ioJ
z!&gehDtg(dt~$N&g{1tYPqqo1-c4BKvIxvyP)=-(2`a1ZuVYK18lx$G#M21d-5&6(
z3PuNTP(av5Am#KAq~S^x`0?X8`x15ED?Qm**Ei<f(0WbwHJf5LL?M7&l;=MQUaF~&
z6=#nU+&a>wNb&4If>PVe&erWL85&dP0qHLtl>-NC_Zh6rvACwv!VltiSJ4uZaRRbC
zhO852GsdU-6GDY+IG~?|=VgiDS;R#JjzlX#4A`!8_<Zf$_{5J}zgM}*qupghqhnBa
zFGb1Q2oI(*+p1LILW1npQ{Yp@%r!WL(!hZhXmt$)Q!6LlDND&p%h^<SPO=7h(ddTj
z$lDPAp-6|NpN$!l?a%X_3@GaZNw-mIIL?!p6$DKvyCM<UcktDoQjyVl7?5s$8eS6w
z;st2igI7k4GL_;<!FmotnSsYruwdH4zylbY`o|I_#%l8-Z2LsJPi#;;ijr9wLNp$Q
z_^8NnR8k_Z$Ra!r4k1vr10gifW36=@E>UWY4RaTy9ML0Op~uY9I|to_QK*IB5eI+|
z>91)H880Y^SCQ=C=|s4JdWv;#>NI7JnJs+eo^<ci-9Gy4YRcDJwHyx~reN5_2iqAi
zsD4zXldfM5C-o0n_Doo_ZMh;T@-RnWGTnrZT*6RvQXX<LrSd+}X!V48>Ay$acLa$U
z1+y}i_nwHp;%wb>0qKUL05>Q#Kfd4ell024xi4-rD5>oRnBc=SI(mZ;1UE=#k&i-5
zoXW=r(iulW%qMEsvwQI@Sh{_#qrP<$YF|kosDhew8yf?q(LjqJM2|f4M29u2wy5}D
z-Qq&&$pExs{LokUVW%col2BmtBE14Jd4a0>Ge}2(NBO*P)7XaSIR!A7C53zf>C=#t
z)jnE^ESeZ?cb8O!Q5;(A4M>E5uTI+D-I!a*&B-fu9|Zr&h{k%~YU-Af0CP3Q$z@7t
z0?SZ8$Y_fTC8)AfWW^e01%@BReDX@aX+L0c>Z}-Qhh!EsfZRP90)B|0HrZlBM{R;c
ztckX+R_xM7vU{=!Tw9lOyMwnMpR!*zZK+p?rB+iMOV?r15N_j8D}7Q)%tt%lpET4u
zI{;0`Y82VcLT@yloM`tQw+B(=Y5Wu2Ejn7gd;u}3X52ri%=?x@>VTtMNQSa#8*amg
z6z8&FLfD?r;r{+ZjM=PGTk9TTUZeLZ=e_hSZEaJtQKBeN6)zbf*v6s{qWu674s>yr
zLX#F378V%b@$CUF(w&LcP10R6{bu|AjLyM%z%9HwOK`1lf_WQX(m1Gqm`^0mCP9J1
z4;6q^{gEldsH^&B4sCd>u%g=CDz44e3e-Zc>-Cx*^)czmZO+*eY#T1EcJF+1!_GR|
zdm^6Wgt687+ZUTo%c|{T@03VFF`{V?VBJwfuU1uQ7mk^5Ouf2Q$@V^V6Y{~cv&&{1
z3+`DqkE})95WY<GpMaMx#Se~1_@V5zFzW4-B7+6_5*+;=18qk=_Vi+OoH*3-U|2k}
zli281LpMjh8X_oYUp2#`fP-iQ0!*H~W!_O{Q4P&0G_BFRzB>nX6T~`*BtEgu%D5AC
zknCr--sQb6Q^7Kf^0+_<g@$rgJ5zGD)oQB>WLu-+=zIgO81>W4^&mRH;a(1BshMkc
z4VyUA62=ZXHdudbN}^HP`S2+u1+7#c#@As|LtI!}d}@1Yguxtv6$}0-(@#46sTz-H
zC?j-4)n-SToUfsaH4J1rFjTBV7r56jMF_taT8MLUFhJ+ytmag{Zq(ui%Y^>Zd>v|v
z-YX6&+sc<i#Xcayvw=BIgt8P#7?iS}4-r=J1m&FNjh;Y{j}asx&ev698-=*KS&>AD
z*$jNyJJI1J!m!c%tLl$-Zx9()>odtw5gV(1o7!)1n|rr)Y(!aI)*%uN3S&uWB@&0y
zHz;K&Wsi|lmd3aZVJmHUAJv}w6X=G^?^K)g4qUZWd~mB1skSQM<zdhcvv4fFs>H54
zgroL+vU4e6M+PWqu6{CsPBy!niLoWd!>vNtW5cb)NFZgf9?~bkEIXOvm~5j-R>FXt
zD{j!f6x5sS>F-rtonBb)EFek&HNs@H^IRdj{*tKa{?UYMDVI68(!j}R+F3lfQ~=}$
zn0r(r<jT0ap%beZSX6S;2LU?Gx!`?po8`P<+$h;AC^st)N|JZ$9C$nIFPnk_4FY38
zHd8;y0v5KnKeT4w3?iq?H@<)W(2g|A>2AvnaJtIkm%r8a0TQYTDu<~3I45A~#?Z8s
zv7cIwt8w+LYLv9=k54yGo8~kfmLxlv7G&+9sVQzsEKi(h0p)Iu2NlZ(aGy_*MP@Ga
z{-oT;Giz(S{lD1eYq1$2aR48Gc(w}03-*>%rH%v(<e2Tq?K>NeFvgDm&xJ{Tq?Z;(
zGhVS)xE2kv(32kn;gOniW)agSU$X72oK)$cK%V`W+kiRDkAK7>A)3_kM>37`Y#18D
z)@N&MK$=$*<5&#!{+R_dS`Z!nJw7`yN91r1O4>vpX=$jR*-WS>CGh=iS~;o_py~LH
zNuAC(yq(NjwlmqT+!hVt0PQG)QVy-)>@xEtCk6B3v{{<Or5SF>0nd&8&zb)8kDNYy
zLJFPvZMzPxGqcL(RCU)Z^i~X}R#jLT97;$s>dx18K=rI>6%p=Vb;=F<M%lT%^ruy3
zwfzQ${c!{kO}#15qmpTN3TL^$_5fH_5Np1E$y$h7eczf-)md%H-g4l`mAN>!I_pNa
zy?s;LG^Hk@!&EaAMv$bw$+OWS*eYy00kD)_20NOgrnamU<~^8Xb8^F3Y7(b5SvvDG
zemO8rFEPWR)x<vtTMv0=HuhQ0g5}Gv;H5Hdd2~mazuHk~NlC+wdJ=e^Sz1Kd_I`S0
zO+<ROk<o_u;~<U$%OV{#4Nz{Vc~hER?ZyNg^D~q3M`6OzuC{AFu?bX27ZlnNwYhyU
zgK>!HjwXSb1m!O;MVKFX?%2X%Ow=gz(OR1%)GOCBwcTA&3YFBsRv$Tjvn^%L@1xVx
zVVh$z(%UG0BA7!7m0i(0jazlH+7OQyV8ezve=Q3u)!S{JIm;D@MPC+zIu>#E4&m#s
zKR%)KMfQ+I0y7DUCPOT)u62ZiDJUm3VLEqw-`#8vCvoRN6$Ob^VBJB8dK+rH$HcW5
z8h9X*&C<;0YiIvj$T=@d*=9QUs4?C0MOgl4c6^}ax>Y4`>_B_^dxz5OFs4Highui(
zIC2~|rFb4KVJLOYx?LdiIi@B{yw+bn+BFH6_GgwFGvg>Xp}sZuW;Mn@xPbFB@yB^r
zRDn2bjoRY$vT$1KxLVtY?ngtCsf4Fjud3+>npK@;u<oo@CG9i4W4q%jsV@&1f($1`
z2<`3y!l2VD!81b>YI(Ky#-HtVb?7QI)>+nRVr@rjJC)eJ?=2)kmwxCP@%MHxs~mGK
zp%$LQ{}9*_L7?t*g`37BPxMGrL2Xyz+CVe{9FEr|;R|Ci5$UA|6eX*f6?pH}WzzLC
z?5UjG*X^GVnCINtaHbyixP60*2ra8NL^`GRUn=@vH$2UPcBwNv1@Y&u<#Xi)AkZcS
zX|`h3J47k@Pd%S{i7CL^HYeZc0b~)5{Uf!aFIN7eBu@t>+n`|KGQUy8D;4s#Cvn+h
z4vSDi`SCf!&G_bwFxSL)xbg#1kIxS7*wW>JNVcF^osZxAAugkyf7<Xo&!~>h`pXby
zGFd}O%dL?r@4R#Sme3@x`lx#RDJLWyeW?8%({wcjM8BND4fOmqnK=r3#X`c1f7D8r
zWL;m0Bi-~yJSx50a?~kXpG`><g{^U5_Ugv=<a?6Y>f7p{2GuY}ef9n$i&Ox&G;;uc
ztu#UJZ1!?dkX=II1hpzA#6J{0<}10&N9NrE3X^=$8OJ^_8|TlJ*>Lh*%NGAxb-n7<
z*&pe9<BW{?Ao@|@l(#IV-=o6n>_g3O-c#Sek1fqeEk>N_5iNqO*bYe-`62NAh*FBz
zP$dQXM&R$x*DE<fJ4Z`gTY1H@hog3Ztfk2#m5h27YaQ!K5@7k<(a`WVRM_}!zm;&=
zz3mZCudZDQP1<cCcXKvD>;6&~kSv-H1mpuq4$R^1<#3p>hL%Lxt9OzxvR{yQ=#04U
z<uR5Y$|H-!TIz+!h);^4+sfIQ?{FWdlfb;8DJFCyb8H5leUm{I6-IH#g6j7h&rXb)
znw?P%7Q5WyqmtsRb6sKzD%|l_qfZ%ww1B;v4G!IAAgAi}2Cb}*@#qojNuIK!eh31<
z`mF!NaDWa&Ap@%VEpS{+=MkMy0;pgyJE?reVJgz~mKy66S5mNSw}(b;2$|;_?;BF&
zHz|ciAOwNzYk-;)?75TC!zfZC1Y>RLF8$dVM&(p?yIl$-Yn()>NZ|4i7ztEN({Ur(
zTHyo$N(R8TNr~=2=V4Zx+=}@r`I{4&g;6ploL<mmrG?UKX4L^a$Y#nNJuB;=SLVXG
zP{G%jJ_<VZ931gc<#wU&p<}{;=d`=7l#W|Ol8@hv{%gA2=~!#BXMSgQSbpo@2z={E
zm>dS_S|mv2euaI{Zu$e6VG%iJ!rD<OHXk{f9qmHLHtlc{RTNV!U1m^))88504&tX5
zsyfSp$>}h^Ct8~y2?61z722BZdU+gl<1D87x~)#l^2>H!r*_0@#;7O&i{zQv0k+3_
zSqjsb66b`14G|4GIcM9hB(y*z8+i_@Z+^P+NL!chLH%;@A57-x$za`&L=neNsx(vk
zM4JYc<7Uo|C@YB;g*^n4np&By(D;6p-mENT=H5cx&<B^^_3$ZJ=tOUYIiis$RlDA4
zbmqdkQ+umAb1P~`Wl-y8o0xO20J=i6fdl~VKFN&Q*}=h~3&O?<!9dSfnRk?pLG!Iv
zpolU~uJah~t&~<VoZEg^(Pa=Dnm`phU1Am5NVmRhW`G4-cDHckmf95q8WpketF>DR
z5jOL#0i9Z<O(|8g$~<N!4ibVXBa40#ihvT+>}1ko>ilt7pd^xbt6Q1Q8=AT91)ZPT
zGC%}phG9o)oy()4sSE1P_E%l&{Ir_#gXp2N^4v1*lv@G-qMqjYl`BzI=SgpU&kg{m
zuWA*UTgf`y$`2Z%W;XovYlfd~=I&noGw@OS;ERzB$09(Pt0~o3bdfgI3PM5^-_n=T
zj>)ql)H0%tOsWDq1|`|XL;MN7b>!{U_D0mI!6PO)Uz8+5R>)~ya`Gn|wxbZ+@?^G2
z&HM}<v%?#LHV2~ZLwZcJIg_&S>0~v<AGXgT*&7;cUnP-UBp+#sQMPu}E<J6#%O(SK
z{{wiS#vP<O$WmmIsUAnkl4Jx?d~5BNKt3wEI#||~vTbPe2e*c-h0U{Re`mERAU|@3
z6e>Zn;@U&Wpx@|~?<{S7OV!OS$6IxW#`}$yeNY|$DKR0u+fHn6`4_@xYUs+Z9>qyB
z6uX@bSqd%NCv-%`%1UJbl2Q$vn)6q2xXiQ;A#4-G)z_tA(qnY&4&2r}S~*o23j68#
zlKis+PP%NXT(ygpcxZ~vB%6$zL6+#IW=FZm<gt$Invcc#k(wOjNx;N-RStMkp=V3V
zvL|K2+SEj#gx16c>CRsxVW0;%MgP^tWMJS3?haEs=m1qyl!<yWu;-)DWdh(?4tF_}
zGJ_WxS_{a@9Bl05aU_WaCNKUFUp7<?Cj-fyU41~#5>u?3?uuUOH5ksFL<!BILttIE
zy<>|@#j=<^8S)66eMr*XNRMr>+iX0hMwI?+;yWu8i*Ep)#zi%$)5_7<4Jm>{Jae3z
zP%EEVV4)znwI=rzC8QtCq$3lz8SvY)MPi8%ZM^_3EJo468hk>89CbQr&}{Q3$8fGv
z(Kk3Im8PQ)WA@hLJXp*=&{%*6Kyer&rRdh;axh5aF-HZSR5NCN22a<g8mB0M8k5ej
zr~@EX3K_#2cmj#`Ss8UuzH+3;2nsk?k<x{@Xmng*L0tlm4Qg20UAwfqG@S)bVQyt6
ztq-b`pDgipQY(p%mxlz~*1{JsIY?CMrxJjcB=>Y%K7bwHsICsxp1(8XZCXG`ip;G1
zYp466Vgz?f*l8(yu+HbBw4!<gGYHVsiqR>grG943tWcl26FYcVT`F|;=wSb*>pH9Y
z0cu-J^+?U4z2bMk-cCjR`hSkB1(i>grguU#l7D9`YICCI>|}@jBoiU7vaxUc`B<hK
z=QxteqwlUnX?p960(v|=2Jw9IIEVp$<$>y@133DooI^4{v!Ko%&9hqw(Zy%ebk%}e
zC3-jv%M(&XoQhxQFZB%-OG@x4b6$J&Y!4g(NJiLx<RzJ~5RM2RvVR^xle6f_^bTN4
zB)4IYgX)@)uYWpM%K;LPiD;AMp=Ha_-Xg^(z3f=c`FGtVD2&y$*q_!5^G3eFwJfM9
z{Nc!+S}{zf6-Y&XJ`sC(cL+qgJ5rIegOR2V^McvZ0J9VtS@o7gcH~XBO!Tve4U*$c
zlHZ?30`KH*j|au%&26N?Qs^0`I;QKMhOfe(0(y@#%P*Uy_SudNpn{zYMW~(UwFQy0
zEt`%7SG@3rOr>-?atCG2qt3k}RV9W}Mk<GdI@c_q`qL`O%Uo9kt<EBs@FEF3M!`<1
zVs4o1oX6Qd47sVLVUfL=o|dXm{mo&PO{GTXZOIvvSJee{%)p8~@z!>oehYG-Y!__f
zJJ+nW8N~EsB@RXs)JSy^8N|Nq#`<Uzw~xNGbXqTpF}0GJ$4JZZdkQow!-Z?kQ#>Oo
z!bFu>g=`8AXLe}r*(A-jX-qi>H?l}14~~zZCd9*g>VFiaY=&<+`1{ctz(0jY9A}sf
z(iKg~cBOl*R$Akyg?b08Wah`C)P;)I1|PgR=hOxvgO`Xp(vFNm&-h;4GMr};;sGj&
zeZFm9j!3W;Ix=9VcGOGVx}GW&)`{)8Kd6H!wS>%G!Cq4c*|(UO=s^sD&I3RJ4&csE
zJ8Ub3`&LKvXR-}VB+qOS`z$*g%~x3cPB{3%O3UeT;D-qJL7X7QomKj!P;m^(hNg1o
zD5xh|XQiWCq0xCP->k^iss*mT7J%30$~!lX=_xC{8f83=7?lrrj&Y3$iPU!5LEv9(
ztyYV(u*>tT{u(N3WjI;ZN@hEJ>2IUgj3H<%sT{xk{HR`<ss__OYCkG?phiC}*jvp<
zY=TJImYg9jLpPcv7s^FB$x_Upyk)6svnBY5QWk_UqKzHC-0I)guNvP{n74qx$jGgL
z_r`msP(LhlPpgfd{Zqe^gx=b&#MtR#Z%0U(a}q(Bx%5i#Dc9cH*w@Z(PJ9Tem0AgV
za*&=pwu1|yiG-p^?pPNNd51ntv9_IkS3eb%aViQst)w&9XaK8+km^cU(4NTd#bE9$
zS*^M_rC5CGjWEkrsW^z@lRTkFo6*lbzCTBHV(N)u^4iOa^I2Zg2L#FDq1Y|py<;qY
z=70;xL<wek=Yk~AdKd@nC@TN3@GpWfG-B>hsiitYCKwOFv#es+@=Y0H^ZHTvfJA`6
zv4qc8-yjpb(;Ymsyn>$9amUi5hDQ*;gZhk`m~{sO^*t%sT5Zi=#XXGu4@4+4>XYaE
zmdTFaEAKz6)&6OQAIy=_#t-RxS~gVoH{e5?^t90+0>83W9XLLoUTJ^jI92#@r9ez>
z<5|jTuUv}Y7p!chD&?}&P6-L|c*h`L3-VBhmO*{iW|qBiHUEC#H<IDCLqzwE9P-7`
zgjwugt!Y35H7~{q&+B<U^LnJBKeULIut0r*aS1`e%mdOY>APFI>7}6F=_EtxcTB*X
z99ugZc4LHN-4K+qRc?uselgf@8rpoL#W7oRmiy{lA%@R>;Jjrl)wmy+KXRI(@s=g(
z*`TZ>%(TKrD$F2EoGR_ZewL8>!z-&DS+S#9=hH7iXIG`JRjyB;lrcVe14hb`7A636
z5^V<lX%sjD=A<>OY_ej^WX(6-#rZZ!aE$|yb@lb*wi(;42q@T{?S&2piHZl`zhfiy
zCuIc!s&5BOE7VoiGqi|;6P8=)v0g5}8MeEDbmiGuxa_W-t8MhEYuF+jJO2Z)`g}FY
z3+f`OTC<Y(NH^e1YsZJortUU;SkWSAy^#azMT7&)qoh}J$qMJFk%W2#u-G?V4DQu)
z$`-5fB{<CEoA+R5w~+Wbs}i&hC13k2smnP{q?+H%p8&k1@kq0u?L?T3ToP@;2%=y|
z&PsLOj-hhm*m`POHaDP)h75ottEEZFRbv9qM^aTO1pF)DuVeRMaNXr*y0LIAyA*L+
zMs5vNR=mNIwvW0!_07}0P73H+B_$qx=v<>Zf6j`+^uh!Vob4Uod1rr6V`tW(CIX=Q
z^d|^=gy?>V3N$YAsr2~B(oK{Ks7pS!<J`7!%Ckq2&FVMS?5a;Y5w#Z<EC&s;nM6cC
z;x*GSe#Sc?>hkobU`IPq)NE|HHE6K`xY1)=bJ*w0!NVmMk1Cov(={@VzcGt$L*W&4
z=n?X@TeG6JI?9pkWOVqLWsUSKG{X-SCP;L_Ilc?`RP9qBnPADpag<2KBMXQnpjG><
z3aG;!pXqP3{Et6ms*j~+Z{E%jXJ;#*y|QH)imfE3QVQpkf-;SFT`dMGTPO@icOTm7
zwds#Fv$cKghz8n{S76)DK%n$>cVp%pb-~1)!VW4Y1Ytjx3F$Q4aUe@gZ}AR(n8y5r
zE*cY%YInsq`W5{Jm?v%OV&%a6lkrc^Su7i4wj=s{ar#)dZdnV~ms<@<vQbiRM@$M2
zfT5fZ?NY69Ln)!=sU=B-d~#~1F!a|;hGwVJa#rWacC=%EtP=qfQmW9%ggVZqGf7Bl
z4DO({Ve_SLmU0rzz5S7K+)*uyhtGy-xa3vJ=JDHc9@c6f46Wg+b0vwqS_r%y9kzuV
zW~R*+d$ac*p17QvB5ze6xy-$h`tF6emZReN?{kPcK=09G`ewf#aW{%+?WjdN5b3oI
zCl-WL(FjPhh8^zda{4(monGlD6363!1_<8ps(pn@EFk=dY)CvRaqN#Cq`h5e+%=Om
zP$e8S3(<2|ud!v7!<Ra$mqV;qwhDAk>b0Ya`iH&e2zE5ls7@*Su0HIl&mO0Sp{E4K
zU_#xWYO0kyI9BRPIjOwtw#n!s;%yX8YTJb+5ldXR7iPqHx3~r;H>A*o6^deYuN3FT
z<4`nK?emlUV5o`$x0hH86i94caMZeM2_x-zP-)&4S^#TGboXW5|IGG4P~zm=*n?%0
zOenB)-7#jPw!rNaxyLqQD?g}Go<*lYf>ixHI~T+JNrAoq!H@L2QucFD?<=Q>5>KeT
z@p1iE_@WfkznxtDsg*?tHcGyzrIoD}M`a^fT8h|yMAXY_VMM|>zeQ|xR{3_PYde!6
zEYSK9KXu0B9T_u3^eCA_h4{KKm<po^x+MBjD<3pFF5o8Mzza0$F9C#hNuY+~=(02W
z=0BZ>^R+gdd5{P0)t;I~SC%)Qr#+g4MBuz~kO~<9^+*F$APZ2SHn}TMUA@%U_R+^S
z-&r!=J$lZRSAA&2rR!6#CYeex=qS?cY=Y{Y;zH9<w^i!-1gL(n2cNLF!p><gRXh;=
zzrD9(M-zTq9WL;$agx`Jlg!SJjzLQghP3lj?5crx%&52)%`-!^iFUqD&~X~~r#187
z<ZyL}La=*AAA)BlX>5hl>+3WnhPeV_@TPz2#LdL>`DnNh)$0pNv>I~FsrEgI!<at>
zyF2yQ15LfwSV9<|6~DHQ0ay7l##<Ck#;O(4kkYpsuw`Ya143*<ACTit8Ovzn&^LGL
z><EF8Fv$smP-3H}Qta(SgPn}xn4S@r3yb`G+6rh#0(!1Cc5jRB?W3mxQ|RhIJ$wFt
S00030{{sMWik|fhzyJWjaWEzT

literal 0
HcmV?d00001

diff --git a/assets/data/GCA_002596845.1_ASM259684v1_genomic.fna.gz b/assets/data/GCA_002596845.1_ASM259684v1_genomic.fna.gz
new file mode 100644
index 0000000000000000000000000000000000000000..511f57e8e66500d9a2a4f9e155c0d8992f9471ba
GIT binary patch
literal 1888
zcmV-m2cP&KiwFP!0000015B8~id{zxMECs_^8q2mi;#E|5DI8*5M&*~-~{a0i~K)n
z)hXSX!Fb-h+qYXPl}b|o_WP%we);(8$De=v@RHxY{PX?qKR>^|{1G2M{P6PEx3Ay7
zy#Dw4^7-}OZ(m<eMfLrO&dTo0=!*L>D&|2%M$Yr9xNGi$`?%+wybJEnU6OafJiRa6
z*Y1Y;a{kSZvwI|8M(DMg-So8dalWtM_bj*@-i7n-7;^5q)Z_V}t;bd^)2C~4#_Tu8
zjD<aaNBOHPtiM~Z%ejBfKI4!qyob!9l)soJtL3tMy(kET9ysp9n4RV*Ie6wAEeM@E
zN6v{tL5PhzV@nUFV{8Tn%wFJ_d({1W1m0k;*IVXIY=CXH-C(6)kuf2+z;#2W2xgrz
zORL)d8?KTS3N`DCz|Bk7yBtpjet}bo=l7eVKgK<w$J_dKKMZd7RTwpa)K+5@WaF5c
z&6Cd^XTBCVoH`%{fxLk0?bO2%V``|e+a(+lrh;{z9+75tnt&;=$vYuZLJSO5z*<}x
z6a&^52DJe>PKx=DZ8M(~7?|{QOwx;E#sCy=F&4?hd0}%>rp-P&Hy2|d-8Y50KJyx2
z3oXH5tgabngL}V+3*&P-XT<-;UYiEs5jM)y4TWZaKF+a8fYfg7ylfpS!iXCQ^x`$y
z7`e!c2c?WX3b>?0gptohFtxsYp74iuioSR`KFqpMPAEH1Hr_Q}&Dvp!m<89ywNa*U
zPMU%QOLi$qApQ*sPAbz@|EvgNSjkDaBwGm=`X(}o{iIdQh6<&CQ5H{ZgqElnN+JbN
z9yU5^U}PWJ2n+9@rzIzeXeP#Qev_=ErQ9G`!la=qmRNwtjiaE-a%8j|1PG2$NLj`&
zr@bX_4k~F5IwT&9VuwyiVNuAG62P*k@MHjfrgO#>rq@Jg&P_cCCdTMwy^|Y!q78|{
zRcfN5-x8@neWV=;u}Mf7Kn__JAss9Z7bC-#Fbw5D?ZB;0t`eGd34mluTfdd+A-A32
zlo9hlA$8&(VJX7*f@s~O_DMihl5{X|r>#?hHTrCxAGs$9N^K#llEBF1u>KG303w-5
z5X{KUbDTLA&OjZM3E>)q4!9^OSga@r0A1jNIdhLWX?+L92~qVHK4CxG3G_;uh^e6S
z^zm4lM!sA<l=0M1W%R@+?Rz|Dwdof|K&Og)l5t_GoUGFp{Dx+R9K_1npy(=v)hLu{
zmExI^UUfRCZ!h!dW?~Cp^Mj^@>w3uz@Q}#T3rO};icLc&@&@W7akDH20<paSrJvRb
z{uRZm#N1;!CYjNE3Oua|oJ}%54VtZ^WIe>?Jj!TdS1*WowwXdF)GlEw$K(`&tbEd1
zor9}lv<oWF$^}Rn)Z5;<$i~W8A%BU;PF*fCDxMO8Z8RSkI_4ukt{kyIl&fO*wi^dr
z!fk1V<sYd7y;NHqn;@}(xbCH$;qz(Ud3#`=WtKxA{d9XE3J5gS<SIAGW#K*L5{u5L
zoVm)X5%4;=K@CltmgGG3sEO-TJ<kcW61lou{ni3eq->gdoOmZBuD4W{qrcm!{4Ob=
z7OGO)hT`LznPy?o+o^MycFxskaZD&~MbAwKHMB(|T<`uDYaD=a<v^x{01PyXJ=-NM
z0cu@Edmr_XLPAq47}}Kj{b!4Z2jrppvDj%y!$B}<1=?917H&^ir|b{|R>ZS)@3`i1
zX^VX9ct%G<JFhuURqP;d!Yi!ei9{r=HA9=5c(rm-Zgbqh&|^3=j$6Efn<Gw^*DZw}
z5oE&7srs{A36)f{Mi`$RXwtdwvM8=<u-m5MsZY=-R(rQaG=UDvMxKZqZoc(4*qh9x
z06F{Cnh05qFdogQku+5KnAa2`TaY9eRAmC|?RhPlPX~VR1Q*>f^~yaxh08V@iRh6#
zH+uIn(eC3T;cfSvk`Mxn>L#^`1~hQ$8B7(jWv?5xtB0O4)M)Hji1N5a2pSvR21vpf
z;5JN5E?c%W`1D&zB*{Gqh9^EZ>9q0Txi;mK?=P6h)$pIGi)!@MvZ<h#1{N9ss(SA*
zh>H{(adKM&SMBMs;jy*@a<kDXue9)WN^V}Fg1eZ;q%95<B?&C!5*-W~9LtfgM$@ty
z42B`mUj$Gz&kh^q+n_dM!nZEfJB;{tz^C~7h`QmqWR5=Xs_X{qnHSC5=OTcXbMppa
z(6tNQmjZv3=<UYK!ydpDy<vLz&w8c^hBd}p8=8ezt<emjbc*K|svT>q=m`K8b>ku{
zP*SU-u*tIt85nAzg%R`#;VT9=M4fN90#vQ?ZHIz&_8OqvCb(RN?XcuM{RuNLRh1s%
z5qVm-!`%G<&+aIai*dw?nj%2UYb8f9!!7=-J+XbhFS%lEb!l=2!V2f$R+F7FOd0KP
zTq}K`b1Jy;qxN%wW;$7W2$BnBJ-ING6gAV5KvUz}IK`Rn2?aE}!!rZ7aV8BH1H#bt
al((BHih=C>4*&rF{{sNY9O}+{6#xL8k(%uQ

literal 0
HcmV?d00001

diff --git a/main.nf b/main.nf
index d5a1eac2..77f39117 100644
--- a/main.nf
+++ b/main.nf
@@ -129,8 +129,8 @@ params.adapter_reverse = "AGATCGGAAGAGCGTCGTGTAGGGAAAGAGTGT"
 params.mean_quality = 15
 params.trimming_quality = 15
 params.keep_phix = false
-params.phix_reference = "ftp://ftp.ncbi.nlm.nih.gov/genomes/genbank/viral/Enterobacteria_phage_phiX174_sensu_lato/all_assembly_versions/GCA_002596845.1_ASM259684v1/GCA_002596845.1_ASM259684v1_genomic.fna.gz"
-
+// params.phix_reference = "ftp://ftp.ncbi.nlm.nih.gov/genomes/genbank/viral/Enterobacteria_phage_phiX174_sensu_lato/all_assembly_versions/GCA_002596845.1_ASM259684v1/GCA_002596845.1_ASM259684v1_genomic.fna.gz"
+params.phix_reference = "$baseDir/assets/data/GCA_002596845.1_ASM259684v1_genomic.fna.gz"
 
 /*
  * binning options
@@ -155,7 +155,8 @@ params.keep_lambda = false
 params.longreads_min_length = 1000
 params.longreads_keep_percent = 90
 params.longreads_length_weight = 10
-params.lambda_reference = "ftp://ftp.ncbi.nlm.nih.gov/genomes/genbank/viral/Escherichia_virus_Lambda/all_assembly_versions/GCA_000840245.1_ViralProj14204/GCA_000840245.1_ViralProj14204_genomic.fna.gz"
+// params.lambda_reference = "ftp://ftp.ncbi.nlm.nih.gov/genomes/genbank/viral/Escherichia_virus_Lambda/all_assembly_versions/GCA_000840245.1_ViralProj14204/GCA_000840245.1_ViralProj14204_genomic.fna.gz"
+params.lambda_reference = "$baseDir/assets/data/GCA_000840245.1_ViralProj14204_genomic.fna.gz"
 
 // Stage config files
 ch_multiqc_config = Channel.fromPath(params.multiqc_config)

From 9160b2577bc8d5c503e3ff6e8077f9a4615960a7 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Hadrien=20Gourl=C3=A9?= <gourlehadrien@gmail.com>
Date: Tue, 11 Jun 2019 10:57:24 +0200
Subject: [PATCH 046/105] linting

---
 .travis.yml     |  2 +-
 environment.yml | 25 ++++++++++++-------------
 nextflow.config |  2 +-
 3 files changed, 14 insertions(+), 15 deletions(-)

diff --git a/.travis.yml b/.travis.yml
index f749bd26..710a14f1 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -14,7 +14,7 @@ before_install:
   - docker pull nfcore/mag:dev
   # Fake the tag locally so that the pipeline runs properly
   # Looks weird when this is :dev to :dev, but makes sense when testing code for a release (:dev to :1.0.1)
-  - docker tag nfcore/mag:dev nfcore/mag:dev
+  - docker tag nfcore/mag:dev nfcore/mag:1.0.0
 
 install:
   # Install Nextflow
diff --git a/environment.yml b/environment.yml
index 8154ec48..d14ebb04 100644
--- a/environment.yml
+++ b/environment.yml
@@ -9,24 +9,23 @@ dependencies:
   - fastqc=0.11.8
   - multiqc=1.7
   - fastp=0.19.5
-  - megahit=1.1.3=py36_0
+  - megahit=1.1.3
   - metabat2=2.12.1
   - samtools=1.9
-  - bowtie2=2.3.4.3=py36he860b03_1
-  - quast=5.0.2=py36pl526ha92aebf_0
-  - hmmer=3.2.1
+  - bowtie2=2.3.4.3
+  - quast=5.0.2
   - prodigal=2.6.3
   - pplacer=1.1.alpha19
-  - diamond=0.9.24=ha87ae23_0
-  - python=3.6.7=hd21baee_1002
+  - diamond=0.9.24
+  - python=3.6.7
   - r=3.5.1
-  - biopython=1.72=py36h04863e7_0
+  - biopython=1.72
   - krona=2.7
   - conda-forge::r-markdown=0.8
   - r-ggplot2=3.1.0
-  - busco=3.0.2=py36_10
-  - nanoplot=1.20.0=py36_0
-  - filtlong=0.2.0=he941832_2
-  - porechop=0.2.3_seqan2.1.1=py36h2d50403_3
-  - nanolyse=1.1.0=py36_1
-  - spades=3.13.0=0
+  - busco=3.0.2
+  - nanoplot=1.20.0
+  - filtlong=0.2.0
+  - porechop=0.2.3_seqan2.1.1
+  - nanolyse=1.1.0
+  - spades=3.13.0
diff --git a/nextflow.config b/nextflow.config
index f6fc5ab5..aa5a2c48 100644
--- a/nextflow.config
+++ b/nextflow.config
@@ -37,7 +37,7 @@ params {
 
 // Container slug. Stable releases should specify release tag!
 // Developmental code should specify :dev
-process.container = 'nfcore/mag:dev'
+process.container = 'nfcore/mag:1.0.0'
 
 // Load base.config by default for all pipelines
 includeConfig 'conf/base.config'

From d3aec170824650623a9bc6a91ea1bb915ed1c6d3 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Hadrien=20Gourl=C3=A9?= <gourlehadrien@gmail.com>
Date: Wed, 12 Jun 2019 08:16:28 +0200
Subject: [PATCH 047/105] markdownlint

---
 README.md                   |  2 +-
 docs/configuration/local.md |  5 ++++-
 docs/output.md              | 24 +--------------------
 docs/usage.md               | 42 +++++++++++++++++++------------------
 4 files changed, 28 insertions(+), 45 deletions(-)

diff --git a/README.md b/README.md
index 75c423e3..5bbda617 100644
--- a/README.md
+++ b/README.md
@@ -1,6 +1,6 @@
 # ![mag](https://raw.githubusercontent.com/nf-core/mag/master/docs/images/mag_logo.png)
 
-# nf-core/mag
+## nf-core/mag
 
 **Assembly, binning and annotation of metagenomes**.
 
diff --git a/docs/configuration/local.md b/docs/configuration/local.md
index e119e1af..0d4f6585 100644
--- a/docs/configuration/local.md
+++ b/docs/configuration/local.md
@@ -3,6 +3,7 @@
 If running the pipeline in a local environment, we highly recommend using either Docker or Singularity.
 
 ## Docker
+
 Docker is a great way to run nf-core/mag, as it manages all software installations and allows the pipeline to be run in an identical software environment across a range of systems.
 
 Nextflow has [excellent integration](https://www.nextflow.io/docs/latest/docker.html) with Docker, and beyond installing the two tools, not much else is required. The nf-core/mag profile comes with a configuration profile for docker, making it very easy to use. This also comes with the required presets to use the AWS iGenomes resource, meaning that if using common reference genomes you just specify the reference ID and it will be autaomtically downloaded from AWS S3.
@@ -10,6 +11,7 @@ Nextflow has [excellent integration](https://www.nextflow.io/docs/latest/docker.
 First, install docker on your system: [Docker Installation Instructions](https://docs.docker.com/engine/installation/)
 
 Then, simply run the analysis pipeline:
+
 ```bash
 nextflow run nf-core/mag -profile docker --reads '<path to your reads>'
 ```
@@ -19,10 +21,11 @@ Nextflow will recognise `nf-core/mag` and download the pipeline from GitHub. The
 For more information about how to work with reference genomes, see [`docs/configuration/reference_genomes.md`](docs/configuration/reference_genomes.md).
 
 ### Pipeline versions
-The public docker images are tagged with the same version numbers as the code, which you can use to ensure reproducibility. When running the pipeline, specify the pipeline version with `-r`, for example `-r v1.3`. This uses pipeline code and docker image from this tagged version.
 
+The public docker images are tagged with the same version numbers as the code, which you can use to ensure reproducibility. When running the pipeline, specify the pipeline version with `-r`, for example `-r v1.3`. This uses pipeline code and docker image from this tagged version.
 
 ## Singularity image
+
 Many HPC environments are not able to run Docker due to security issues. [Singularity](http://singularity.lbl.gov/) is a tool designed to run on such HPC systems which is very similar to Docker. Even better, it can use create images directly from dockerhub.
 
 To use the singularity image for a single run, use `-with-singularity 'docker://hadrieng/mag'`. This will download the docker container from dockerhub and create a singularity image for you dynamically.
diff --git a/docs/output.md b/docs/output.md
index 6693d88c..aa991e02 100644
--- a/docs/output.md
+++ b/docs/output.md
@@ -52,7 +52,7 @@ The pipeline has special steps which allow the software versions used to be repo
 - `Project_multiqc_data/`
   - Directory containing parsed statistics from the different tools used in the pipeline
 
-For more information about how to use MultiQC reports, see http://multiqc.info
+For more information about how to use MultiQC reports, see <http://multiqc.info>
 
 ## Megahit
 
@@ -81,25 +81,3 @@ For more information about how to use MultiQC reports, see http://multiqc.info
   - reads mapped against the megahit assembly
 - `bins/sample_X.fa`
   - the putative genome bins retrieved by metabat
-
-## checkm
-
-[checkm](https://github.com/Ecogenomics/CheckM) Assess the quality of microbial genomes recovered from isolates, single cells, and metagenomes.
-
-**output directory: `results/checkm`**
-
-- `sample/`
-  - directory containing the improved bins
-- `sample_stats`
-  - directory containing stats about completeness and contamination of the bins, as well as plots.
-
-## refinem
-
-_(this process is optional)_
-
-[refinem](https://github.com/dparks1134/RefineM) is a companion tool to checkm, that refines and filter incongruent contigs from the genome bins.
-
-**output directory: `results/refinem`**
-
-- `sample.X.fa`
-  - the genome bins, improved by refinem
diff --git a/docs/usage.md b/docs/usage.md
index 25442574..ab0e4ba9 100644
--- a/docs/usage.md
+++ b/docs/usage.md
@@ -135,8 +135,6 @@ By default, the pipeline expects paired-end data. If you have single-end data, y
 
 It is not possible to run a mixture of single-end and paired-end files in one run.
 
-## Optional arguments
-
 ## Reference genomes
 
 The pipeline config files come bundled with paths to the illumina iGenomes reference index files. If running with docker or AWS, the configuration is set up to use the [AWS-iGenomes](https://ewels.github.io/AWS-iGenomes/) resource.
@@ -181,29 +179,33 @@ params {
 
 If you prefer, you can specify the full path to your reference genome when you run the pipeline:
 
-### Trimming options:
+### `--igenomesIgnore`
 
-    --adapter_forward             Sequence of 3' adapter to remove in the forward reads
-    --adapter_reverse             Sequence of 3' adapter to remove in the reverse reads
-    --mean_quality                Mean qualified quality value for keeping read (default: 15)
-    --trimming_quality            Trimming quality value for the sliding window (default: 15)
+Do not load `igenomes.config` when running the pipeline. You may choose this option if you observe clashes between custom parameters and those supplied in `igenomes.config`.
 
-### Binning options:
+## Trimming options
 
-    --refinem                     Enable bin refinement with refinem.
-    --refinem_db                  Path to refinem database
-    --no_checkm                   Disable bin QC and merging with checkm
-    --min_contig_size             Minimum contig size to be considered for binning (default: 1500)
-    --delta_cont                  Maximum increase in contamination to merge compatible bins (default: 5)
-    --merged_cont                 Maximum total contamination to merge compatible bins (default: 15)
-    --delta_compl                 Minimum increase in completion to merge compatible bins (default: 10)
-    --abs_delta_cov               Minimum coverage ratio to merge compatible bins (default: 0.75)
-    --delta_gc                    Maximum %GC difference to merge compatible bins (default: 3)
-    --ssu_evalue                  Evalue threshold to filter incongruent 16S (default 1e-6)
+### `--adapter_forward`
 
-### `--igenomesIgnore`
+Sequence of 3' adapter to remove in the forward reads
 
-Do not load `igenomes.config` when running the pipeline. You may choose this option if you observe clashes between custom parameters and those supplied in `igenomes.config`.
+### `--adapter_reverse`
+
+Sequence of 3' adapter to remove in the reverse reads
+
+### `--mean_quality`
+
+Mean qualified quality value for keeping read (default: 15)
+
+### `--trimming_quality`
+
+Trimming quality value for the sliding window (default: 15)
+
+## Binning options
+
+### `--min_contig_size`
+
+Minimum contig size to be considered for binning (default: 1500)
 
 ## Job resources
 

From db6693eed45c3dc73b1f81c3081c10c7f59b77df Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Hadrien=20Gourl=C3=A9?= <gourlehadrien@gmail.com>
Date: Wed, 12 Jun 2019 15:42:31 +0200
Subject: [PATCH 048/105] pin matplotlib to 2.2.3

---
 environment.yml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/environment.yml b/environment.yml
index d14ebb04..cfcc6988 100644
--- a/environment.yml
+++ b/environment.yml
@@ -8,6 +8,7 @@ channels:
 dependencies:
   - fastqc=0.11.8
   - multiqc=1.7
+  - matplotlib=2.2.3
   - fastp=0.19.5
   - megahit=1.1.3
   - metabat2=2.12.1

From d87faf5001cd3b7d2e580172154674f0d71f419f Mon Sep 17 00:00:00 2001
From: d4straub <daniel.straub@uni-tuebingen.de>
Date: Tue, 16 Jul 2019 12:58:27 +0200
Subject: [PATCH 049/105] add optional centrifuge and a krona plot on raw reads

---
 bin/merge_bins.py | 245 ----------------------------------------------
 environment.yml   |   1 +
 main.nf           | 117 ++++++++++++++++++++--
 3 files changed, 112 insertions(+), 251 deletions(-)
 delete mode 100755 bin/merge_bins.py

diff --git a/bin/merge_bins.py b/bin/merge_bins.py
deleted file mode 100755
index c91cb9c0..00000000
--- a/bin/merge_bins.py
+++ /dev/null
@@ -1,245 +0,0 @@
-#!/usr/bin/env python
-
-from __future__ import print_function
-
-import os
-import sys
-import argparse
-
-from shutil import copyfile, copyfileobj
-
-from Bio import SeqIO
-from Bio.SeqUtils import GC
-
-
-class Bin(object):
-    """a genome bin"""
-
-    def __init__(self, id, compl, contam):
-        super(Bin, self).__init__()
-        self.id = id
-        self.compl = compl
-        self.contam = contam
-        self.read_cov = float()
-        self.phylo = []
-
-
-class PossibleMerge(object):
-    """A possible merger between two bins. Corresponds to one line of
-    merger.tsv"""
-
-    def __init__(self, line):
-        self.bin_1 = Bin(line[0], line[2], line[3])
-        self.bin_2 = Bin(line[1], line[4], line[5])
-        self.delta_compl = float(line[6])
-        self.delta_contam = float(line[7])
-        self.delta_merger = float(line[8])  # ???
-        self.merged_compl = float(line[9])
-        self.merged_contam = float(line[10])
-
-    def add_profile_info(self, profile_txt, mean):
-        for profile in self.parse_profile(profile_txt):
-            cov = (float(profile[2]) * mean) / (float(profile[1]) * 1e6)
-            cov = round(cov, 2)
-            if profile[0] == self.bin_1.id and self.bin_1.read_cov == 0.0:
-                self.bin_1.read_cov = cov
-            elif profile[0] == self.bin_2.id and self.bin_2.read_cov == 0.0:
-                self.bin_2.read_cov = cov
-
-    def add_tree_info(self, tree_qa):
-        for phylogeny in self.parse_tree(tree_qa):
-            if phylogeny[0] == self.bin_1.id and self.bin_1.phylo == []:
-                self.bin_1.phylo = phylogeny[3:]
-            elif phylogeny[0] == self.bin_2.id and self.bin_2.phylo == []:
-                self.bin_2.phylo = phylogeny[3:]
-
-    @staticmethod
-    def parse_profile(profile_txt):
-        with open(profile_txt, "r") as p:
-            p.readline()
-            header = p.readline()
-            p.readline()
-            for l in p:
-                sl = l.split()
-                if len(sl) < 2:
-                    continue
-                yield(sl[:3])  # bin_id, bin size, n mapped reads
-
-    @staticmethod
-    def parse_tree(tree_qa):
-        with open(tree_qa, "r") as t:
-            t.readline()
-            header = t.readline()
-            t.readline()
-            for l in t:
-                sl = l.split()
-                if len(sl) < 2:
-                    continue
-                yield(sum([i.split(";") for i in sl], []))
-
-
-def get_mean(length_file):
-    with open(length_file, "r") as f:
-        total = [int(line.split()[0]) for line in f]
-    mean = sum(total) / len(total)
-    return(mean)
-
-
-def parse_merge_file(merger_tsv):
-    with open(merger_tsv, "r") as m:
-        m.readline()  # discard the header
-        for l in m:
-            line = l.split()
-            pm = PossibleMerge(line)
-            yield(pm)
-
-
-def concatenate(file_list, output):
-    """Concatenate files together
-    Args:
-        file_list (list): the list of input files (can be a generator)
-        output (string): the output file name
-    """
-    try:
-        out_file = open(output, "wb")
-    except (IOError, OSError) as e:
-        print("Failed to open output file: %s" % e)
-        sys.exit(1)
-
-    with out_file:
-        for file_name in file_list:
-            if file_name is not None:
-                with open(file_name, "rb") as f:
-                    copyfile(f, out_file)
-
-
-def merge(args):
-    all_bins = os.listdir(args.bins)
-    n_merged = 0
-
-    # merge bins according to criteria in
-    # https://doi.org/10.1038/s41564-017-0012-7
-    mean_read_length = get_mean(args.length)
-    for pm in parse_merge_file(args.merger):
-        pm.add_profile_info(args.profile, mean_read_length)
-        pm.add_tree_info(args.tree)
-        abs_delta_cov = \
-            max(pm.bin_1.read_cov, pm.bin_2.read_cov) /\
-            min(pm.bin_1.read_cov, pm.bin_2.read_cov)
-        clades = []
-        for p1, p2 in zip(pm.bin_1.phylo, pm.bin_2.phylo):
-            clades.append((p1, p2))
-
-        if pm.delta_contam <= args.delta_cont and \
-           pm.merged_contam <= args.merged_cont and \
-           pm.delta_compl >= args.delta_compl and \
-           abs_delta_cov >= args.abs_delta_cov and \
-           clades[-1][0] == clades[-1][1]:
-
-            # then read the two bins
-            bin_1_p = "%s/%s.fa" % (args.bins, pm.bin_1.id)
-            bin_2_p = "%s/%s.fa" % (args.bins, pm.bin_2.id)
-            with open(bin_1_p, "r") as bin_1, open(bin_2_, "r") as bin_2:
-                concat_seq_1 = concat_seq_2 = ""
-                for record in SeqIO.parse(bin_1, "fasta"):
-                    concat_seq_1 += record.seq
-                gc_bin_1 = GC(concat_seq_1)
-                for record in SeqIO.parse(bin_2, "fasta"):
-                    concat_seq_2 += record.seq
-                gc_bin_2 = GC(concat_seq_2)
-
-            # then merge if similar GC content
-            if abs(gc_bin_1 - gc_bin_2) <= args.delta_gc:
-                output_name = "%s/merged_%i.fa" % (args.outdir, n_merged)
-                concatenate([bin_1_p, bin_2_p], output_name)
-                all_bins.remove(bin_1_p)
-                all_bins.remove(bin_2_p)
-                n_merged += 1
-
-        else:
-            continue
-    # now puts the single bins in the same output directory
-    for bin in all_bins:
-        if bin.endswith(".fa") or bin.endswith(".fasta"):
-            path = "%s/%s" % (args.bins, bin)
-            output_name = "%s/%s" % (args.outdir, bin)
-            copyfile(path, output_name)
-
-
-def main():
-    parser = argparse.ArgumentParser(
-        prog="merge_bins.py",
-        usage="parse merger.tsv from checkm and merge bins"
-    )
-    parser.add_argument(
-        "--profile",
-        metavar="profile.txt",
-        help="output of checkm profile"
-    )
-    parser.add_argument(
-        "--tree",
-        metavar="tree_qa.txt",
-        help="output of checkm tree_qa"
-    )
-    parser.add_argument(
-        "--length",
-        metavar="read_length.txt",
-        help="a file containing read length distribution"
-    )
-    parser.add_argument(
-        "--merger",
-        metavar="merger.tsv",
-        help="merger.tsv produced by checkm merge"
-    )
-    parser.add_argument(
-        "--delta_cont",
-        metavar="int",
-        default=5,
-        help="Max increase in contamination to merge bins (default: 5)"
-    )
-    parser.add_argument(
-        "--merged_cont",
-        metavar="int",
-        default=15,
-        help="Max total contamination to merge bins (default: 15)"
-    )
-    parser.add_argument(
-        "--delta_compl",
-        metavar="int",
-        default=10,
-        help="Min increase in completion to merge bins (default: 10)"
-    )
-    parser.add_argument(
-        "--abs_delta_cov",
-        metavar="float",
-        default=0.75,
-        help="Min coverage ratio to merge bins (default: 0.75)"
-    )
-    parser.add_argument(
-        "--delta_gc",
-        metavar="int",
-        default=3,
-        help="Max %GC diffrence to merge bins (default: 3)"
-    )
-    parser.add_argument(
-        "bins",
-        metavar="bins/",
-        help="the bin directory (bins must end with .fa or .fasta)"
-    )
-    parser.add_argument(
-        "outdir",
-        metavar="outdir/",
-        help="output directory (must exist)"
-    )
-    parser.set_defaults(func=merge)
-    args = parser.parse_args()
-
-    try:
-        args.func(args)
-    except AttributeError as e:
-        parser.print_help()
-        raise
-
-
-if __name__ == "__main__":
-    main()
diff --git a/environment.yml b/environment.yml
index cfcc6988..993c3175 100644
--- a/environment.yml
+++ b/environment.yml
@@ -30,3 +30,4 @@ dependencies:
   - porechop=0.2.3_seqan2.1.1
   - nanolyse=1.1.0
   - spades=3.13.0
+  - centrifuge
diff --git a/main.nf b/main.nf
index 77f39117..c17e9b85 100644
--- a/main.nf
+++ b/main.nf
@@ -49,8 +49,6 @@ def helpMessage() {
       --mean_quality                Mean qualified quality value for keeping read (default: 15)
       --trimming_quality            Trimming quality value for the sliding window (default: 15)
       --keep_phix                   Keep reads similar to the Illumina internal standard PhiX genome (default: false)
-      --phix_reference              Download path for PhiX database
-                                    (default: "ftp://ftp.ncbi.nlm.nih.gov/genomes/genbank/viral/Enterobacteria_phage_phiX174_sensu_lato/all_assembly_versions/GCA_002596845.1_ASM259684v1/GCA_002596845.1_ASM259684v1_genomic.fna.gz")
 
     Long read preprocessing:
       --skip_adapter_trimming       Skip removing adapter sequences from long reads
@@ -58,8 +56,6 @@ def helpMessage() {
       --longreads_keep_percent      Keep this percent of bases (default: 90)
       --longreads_length_weight     The higher the more important is read length when choosing the best reads (default: 10)
       --keep_lambda                 Keep reads similar to the ONT internal standard Escherichia virus Lambda genome (default: false)
-      --lambda_reference            Download path for Escherichia virus Lambda genome
-                                    (default: "ftp://ftp.ncbi.nlm.nih.gov/genomes/genbank/viral/Escherichia_virus_Lambda/all_assembly_versions/GCA_000840245.1_ViralProj14204/GCA_000840245.1_ViralProj14204_genomic.fna.gz")
     
     Assembly:
       --skip_spades                 Skip Illumina-only SPAdes assembly
@@ -67,6 +63,10 @@ def helpMessage() {
       --skip_megahit                Skip MEGAHIT assembly
       --skip_quast                  Skip metaQUAST
 
+    Taxonomy:
+      --centrifuge_db [path]        Database for taxonomic binning with centrifuge (default: none). E.g. ftp://ftp.ccb.jhu.edu/pub/infphilo/centrifuge/data/p_compressed+h+v.tar.gz
+      --skip_krona                  Skip creating a krona plot for taxonomic binning
+
     Binning options:
       --skip_binning                Skip metagenome binning
       --min_contig_size             Minimum contig size to be considered for binning (default: 1500)
@@ -147,6 +147,12 @@ params.skip_spadeshybrid = false
 params.skip_megahit = false
 params.skip_quast = false
 
+/*
+ * taxonomy options
+ */
+params.centrifuge_db = false
+params.skip_krona = false
+
 /*
  * long read preprocessing options
  */
@@ -173,6 +179,14 @@ if(!params.skip_busco){
     Channel.from()
 }
 
+if(params.centrifuge_db){
+    Channel
+        .fromPath( "${params.centrifuge_db}", checkIfExists: true )
+        .set { file_centrifuge_db }
+} else {
+    file_centrifuge_db = Channel.from()
+}
+
 if(!params.keep_phix) {
     Channel
         .fromPath( "${params.phix_reference}", checkIfExists: true )
@@ -518,7 +532,7 @@ if(!params.keep_phix) {
         set val(name), file(reads), file(genome), file(db) from trimmed_reads.combine(phix_db)
 
         output:
-        set val(name), file("*.fastq.gz") into (trimmed_reads_megahit, trimmed_reads_metabat, trimmed_reads_fastqc, trimmed_sr_spadeshybrid, trimmed_reads_spades)
+        set val(name), file("*.fastq.gz") into (trimmed_reads_megahit, trimmed_reads_metabat, trimmed_reads_fastqc, trimmed_sr_spadeshybrid, trimmed_reads_spades, trimmed_reads_centrifuge)
         file("${name}_remove_phix_log.txt")
 
         script:
@@ -540,7 +554,7 @@ if(!params.keep_phix) {
 
     }
 } else {
-    trimmed_reads.into {trimmed_reads_megahit; trimmed_reads_metabat; trimmed_reads_fastqc; trimmed_sr_spadeshybrid; trimmed_reads_spades}
+    trimmed_reads.into {trimmed_reads_megahit; trimmed_reads_metabat; trimmed_reads_fastqc; trimmed_sr_spadeshybrid; trimmed_reads_spades; trimmed_reads_centrifuge}
 }
 
 
@@ -561,6 +575,97 @@ process fastqc_trimmed {
     """
 }
 
+/*
+ * STEP - Taxonomic information
+ */
+
+process centrifuge_db_preparation {
+    input:
+    file(db) from file_centrifuge_db
+
+    output:
+    set val("${db.toString().replace(".tar.gz", "")}"), file("*.cf") into centrifuge_database
+
+    script:
+    """
+    tar -xf "${db}"
+    """
+}
+
+trimmed_reads_centrifuge
+    .combine(centrifuge_database)
+    .set { centrifuge_input }
+
+process centrifuge {
+    tag "${name}-${db_name}"
+    publishDir "${params.outdir}/Taxonomy/${name}", mode: 'copy'
+
+    input:
+    set val(name), file(reads), val(db_name), file(db) from centrifuge_input
+
+    output:
+    set val(name), file("kreport.txt") into centrifuge_kreport
+    file("report.txt")
+    file("results.txt")
+
+    script:
+    if ( !params.singleEnd ) {
+    """
+    centrifuge -x "${db_name}" \
+        -p "${task.cpus}" \
+        -1 "${reads[0]}" \
+        -2 "${reads[1]}" \
+        --report-file report.txt \
+        -S results.txt
+    centrifuge-kreport -x "${db_name}" results.txt > kreport.txt
+    """
+    }
+    else {
+    """
+    centrifuge -x "${db_name}" \
+        -p "${task.cpus}" \
+        -U "${reads}" \
+        --report-file report.txt \
+        -S results.txt
+    centrifuge-kreport -x "${db_name}" results.txt > kreport.txt
+    """
+    }
+}
+
+process krona_db {
+    output:
+    file("taxonomy/taxonomy.tab") into file_krona_db
+
+    when:
+    params.centrifuge_db && !params.skip_krona
+
+    script:
+    """
+    ktUpdateTaxonomy.sh taxonomy
+    """
+}
+
+centrifuge_kreport
+    .combine(file_krona_db)
+    .set { krona_input }
+
+process krona {
+    tag "${name}"
+    publishDir "${params.outdir}/Taxonomy/${name}", mode: 'copy'
+
+    input:
+    set val(name), file(kreport), file("taxonomy/taxonomy.tab") from krona_input
+
+    output:
+    file("*.html")
+
+    script:
+    """
+    cat "${kreport}" | cut -f 1,3 > results.krona
+    ktImportTaxonomy results.krona -tax taxonomy
+    """
+}
+
 
 /*
  * STEP 2 - Assembly

From 5419a56f5e7da4f8ffa802c0a72a185c27d9619c Mon Sep 17 00:00:00 2001
From: d4straub <daniel.straub@uni-tuebingen.de>
Date: Tue, 16 Jul 2019 14:29:42 +0200
Subject: [PATCH 050/105] add optional taxonomic classification of MAGs by CAT

---
 environment.yml |  5 +++--
 main.nf         | 53 ++++++++++++++++++++++++++++++++++++++++++++++++-
 2 files changed, 55 insertions(+), 3 deletions(-)

diff --git a/environment.yml b/environment.yml
index 993c3175..434c86c5 100644
--- a/environment.yml
+++ b/environment.yml
@@ -21,7 +21,7 @@ dependencies:
   - python=3.6.7
   - r=3.5.1
   - biopython=1.72
-  - krona=2.7
+  - krona=2.7.1
   - conda-forge::r-markdown=0.8
   - r-ggplot2=3.1.0
   - busco=3.0.2
@@ -30,4 +30,5 @@ dependencies:
   - porechop=0.2.3_seqan2.1.1
   - nanolyse=1.1.0
   - spades=3.13.0
-  - centrifuge
+  - centrifuge=1.0.4_beta
+  - cat=4.6
diff --git a/main.nf b/main.nf
index c17e9b85..cbe983d9 100644
--- a/main.nf
+++ b/main.nf
@@ -64,8 +64,9 @@ def helpMessage() {
       --skip_quast                  Skip metaQUAST
 
     Taxonomy:
-      --centrifuge_db [path]        Database for taxonomic binning with centrifuge (default: none). E.g. ftp://ftp.ccb.jhu.edu/pub/infphilo/centrifuge/data/p_compressed+h+v.tar.gz
+      --centrifuge_db [path]        Database for taxonomic binning with centrifuge (default: none). E.g. "ftp://ftp.ccb.jhu.edu/pub/infphilo/centrifuge/data/p_compressed+h+v.tar.gz"
       --skip_krona                  Skip creating a krona plot for taxonomic binning
+      --cat_db [path]               Database for taxonomic classification of metagenome assembled genomes (default: none). E.g. "tbb.bio.uu.nl/bastiaan/CAT_prepare/CAT_prepare_20190108.tar.gz"
 
     Binning options:
       --skip_binning                Skip metagenome binning
@@ -152,6 +153,7 @@ params.skip_quast = false
  */
 params.centrifuge_db = false
 params.skip_krona = false
+params.cat_db = false
 
 /*
  * long read preprocessing options
@@ -187,6 +189,14 @@ if(params.centrifuge_db){
     file_centrifuge_db = Channel.from()
 }
 
+if(params.cat_db){
+    Channel
+        .fromPath( "${params.cat_db}", checkIfExists: true )
+        .set { file_cat_db }
+} else {
+    file_cat_db = Channel.from()
+}
+
 if(!params.keep_phix) {
     Channel
         .fromPath( "${params.phix_reference}", checkIfExists: true )
@@ -818,6 +828,7 @@ process metabat {
 
     output:
     set val(assembler), val(sample), file("MetaBAT2/*") into metabat_bins mode flatten
+    set val(assembler), val(sample), file("MetaBAT2/*") into metabat_bins_for_cat mode flatten
     set val(assembler), val(sample), file("MetaBAT2/*"), file(reads) into metabat_bins_quast_bins
 
     when:
@@ -1044,6 +1055,46 @@ process merge_quast_and_busco {
     """
 }
 
+/*
+ * CAT: Bin Annotation Tool (BAT) are pipelines for the taxonomic classification of long DNA sequences and metagenome assembled genomes (MAGs/bins)
+ */
+process cat_db {
+    tag "${database.baseName}"
+
+    input:
+    file(database) from file_cat_db
+
+    output:
+    set val("${database.toString().replace(".tar.gz", "")}"), file("catDB/*") into cat_db
+
+    script:
+    """
+    mkdir catDB
+    tar -xf ${database} -C catDB
+    """
+}
+
+metabat_bins_for_cat
+    .combine(cat_db)
+    .set { cat_input }
+
+process cat {
+    tag "${assembler}-${sample}-${db_name}"
+    publishDir "${params.outdir}/Taxonomy/${assembler}-${sample}", mode: 'copy'
+
+    input:
+    set val(assembler), val(sample), file("bins/*"), val(db_name), file("db/*") from cat_input
+
+    output:
+    file("*ORF2LCA.txt")
+    file("*.ORF2LCA.names.txt")
+
+    script:
+    """
+    CAT bins -b "bins/" -d "${db}" -t "${db}" -n "${task.cpus}" --top 6 -o "${assembler}-${sample}"
+    CAT add_names -i "${assembler}-${sample}_run.ORF2LCA.txt" -o "${assembler}-${sample}.ORF2LCA.names.txt" -t {taxonomy folder}
+    """
+}
 
 /*
  * STEP 4 - MultiQC

From 0a48ebddebd7f0d3fd0149dd563b799c398cc0a2 Mon Sep 17 00:00:00 2001
From: d4straub <daniel.straub@uni-tuebingen.de>
Date: Wed, 17 Jul 2019 09:13:50 +0200
Subject: [PATCH 051/105] more resources to centrifuge and cat

---
 conf/base.config | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/conf/base.config b/conf/base.config
index e8e7f623..7acd35f2 100644
--- a/conf/base.config
+++ b/conf/base.config
@@ -48,6 +48,16 @@ process {
     memory = { check_max (8.GB * task.attempt, 'memory' ) }
     time = { check_max (6.h * task.attempt, 'time' ) }
   }
+  withName: centrifuge {
+    cpus = { check_max (8 * task.attempt, 'cpus' ) }
+    memory = { check_max (40.GB * task.attempt, 'memory' ) }
+    time = { check_max (12.h * task.attempt, 'time' ) }
+  }
+  withName: cat {
+    cpus = { check_max (8 * task.attempt, 'cpus' ) }
+    memory = { check_max (40.GB * task.attempt, 'memory' ) }
+    time = { check_max (12.h * task.attempt, 'time' ) }
+  }
   withName: megahit {
     cpus = { check_max (8 * task.attempt, 'cpus' ) }
     memory = { check_max (40.GB * task.attempt, 'memory' ) }

From aa3864dce80a7946fa45845c97b4ac506b96822c Mon Sep 17 00:00:00 2001
From: d4straub <daniel.straub@uni-tuebingen.de>
Date: Wed, 17 Jul 2019 10:26:52 +0200
Subject: [PATCH 052/105] fix wrong input file

---
 main.nf | 15 +++++++++------
 1 file changed, 9 insertions(+), 6 deletions(-)

diff --git a/main.nf b/main.nf
index cbe983d9..124620eb 100644
--- a/main.nf
+++ b/main.nf
@@ -608,15 +608,17 @@ trimmed_reads_centrifuge
 
 process centrifuge {
     tag "${name}-${db_name}"
-    publishDir "${params.outdir}/Taxonomy/${name}", mode: 'copy'
+    publishDir "${params.outdir}/Taxonomy/${name}", mode: 'copy',
+            saveAs: {filename -> filename.indexOf(".krona") == -1 ? filename : null}
 
     input:
     set val(name), file(reads), val(db_name), file(db) from centrifuge_input
 
     output:
-    set val(name), file("kreport.txt") into centrifuge_kreport
+    set val(name), file("results.krona") into centrifuge_to_krona
     file("report.txt")
     file("results.txt")
+    file("kreport.txt")
 
     script:
     if ( !params.singleEnd ) {
@@ -628,6 +630,7 @@ process centrifuge {
         --report-file report.txt \
         -S results.txt
     centrifuge-kreport -x "${db_name}" results.txt > kreport.txt
+    cat results.txt | cut -f 1,3 > results.krona
     """
     }
     else {
@@ -638,6 +641,7 @@ process centrifuge {
         --report-file report.txt \
         -S results.txt
     centrifuge-kreport -x "${db_name}" results.txt > kreport.txt
+    cat results.txt | cut -f 1,3 > results.krona
     """
     }
 }
@@ -655,7 +659,7 @@ process krona_db {
     """
 }
 
-centrifuge_kreport
+centrifuge_to_krona
     .combine(file_krona_db)
     .set { krona_input }
 
@@ -664,15 +668,14 @@ process krona {
     publishDir "${params.outdir}/Taxonomy/${name}", mode: 'copy'
 
     input:
-    set val(name), file(kreport), file("taxonomy/taxonomy.tab") from krona_input
+    set val(name), file(report), file("taxonomy/taxonomy.tab") from krona_input
 
     output:
     file("*.html")
 
     script:
     """
-    cat "${kreport}" | cut -f 1,3 > results.krona
-    ktImportTaxonomy results.krona -tax taxonomy
+    ktImportTaxonomy "$report" -tax taxonomy
     """
 }
 

From 13d4c33c9efe6ccc2fd0c37302992e79a9679b46 Mon Sep 17 00:00:00 2001
From: d4straub <daniel.straub@uni-tuebingen.de>
Date: Wed, 17 Jul 2019 14:00:02 +0200
Subject: [PATCH 053/105] attempt to fix cat input

---
 main.nf | 11 +++++++----
 1 file changed, 7 insertions(+), 4 deletions(-)

diff --git a/main.nf b/main.nf
index 124620eb..bbbe8d77 100644
--- a/main.nf
+++ b/main.nf
@@ -67,6 +67,7 @@ def helpMessage() {
       --centrifuge_db [path]        Database for taxonomic binning with centrifuge (default: none). E.g. "ftp://ftp.ccb.jhu.edu/pub/infphilo/centrifuge/data/p_compressed+h+v.tar.gz"
       --skip_krona                  Skip creating a krona plot for taxonomic binning
       --cat_db [path]               Database for taxonomic classification of metagenome assembled genomes (default: none). E.g. "tbb.bio.uu.nl/bastiaan/CAT_prepare/CAT_prepare_20190108.tar.gz"
+                                    The zipped file needs to contain a folder named "*taxonomy*" and "*CAT_database*" that hold the respective files.
 
     Binning options:
       --skip_binning                Skip metagenome binning
@@ -831,7 +832,7 @@ process metabat {
 
     output:
     set val(assembler), val(sample), file("MetaBAT2/*") into metabat_bins mode flatten
-    set val(assembler), val(sample), file("MetaBAT2/*") into metabat_bins_for_cat mode flatten
+    set val(assembler), val(sample), file("MetaBAT2/*") into metabat_bins_for_cat
     set val(assembler), val(sample), file("MetaBAT2/*"), file(reads) into metabat_bins_quast_bins
 
     when:
@@ -1068,12 +1069,14 @@ process cat_db {
     file(database) from file_cat_db
 
     output:
-    set val("${database.toString().replace(".tar.gz", "")}"), file("catDB/*") into cat_db
+    set val("${database.toString().replace(".tar.gz", "")}"), file("database/*"), file("taxonomy/*") into cat_db
 
     script:
     """
     mkdir catDB
     tar -xf ${database} -C catDB
+    mv `find catDB/ -type d -name "*taxonomy*"` taxonomy/
+    mv `find catDB/ -type d -name "*CAT_database*"` database/
     """
 }
 
@@ -1086,7 +1089,7 @@ process cat {
     publishDir "${params.outdir}/Taxonomy/${assembler}-${sample}", mode: 'copy'
 
     input:
-    set val(assembler), val(sample), file("bins/*"), val(db_name), file("db/*") from cat_input
+    set val(assembler), val(sample), file("bins/*"), val(db_name), file(database), file(taxonomy) from cat_input
 
     output:
     file("*ORF2LCA.txt")
@@ -1094,7 +1097,7 @@ process cat {
 
     script:
     """
-    CAT bins -b "bins/" -d "${db}" -t "${db}" -n "${task.cpus}" --top 6 -o "${assembler}-${sample}"
+    CAT bins -b "bins/" -d database/ -t taxonomy/ -n "${task.cpus}" --top 6 -o "${assembler}-${sample}"
     CAT add_names -i "${assembler}-${sample}_run.ORF2LCA.txt" -o "${assembler}-${sample}.ORF2LCA.names.txt" -t {taxonomy folder}
     """
 }

From b08366d89c0771891acc72f49cd91892dd1e533a Mon Sep 17 00:00:00 2001
From: d4straub <daniel.straub@uni-tuebingen.de>
Date: Wed, 17 Jul 2019 14:02:52 +0200
Subject: [PATCH 054/105] more resources for krona

---
 conf/base.config | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/conf/base.config b/conf/base.config
index 7acd35f2..7a210243 100644
--- a/conf/base.config
+++ b/conf/base.config
@@ -53,6 +53,11 @@ process {
     memory = { check_max (40.GB * task.attempt, 'memory' ) }
     time = { check_max (12.h * task.attempt, 'time' ) }
   }
+  withName: krona {
+    cpus = { check_max (8 * task.attempt, 'cpus' ) }
+    memory = { check_max (20.GB * task.attempt, 'memory' ) }
+    time = { check_max (12.h * task.attempt, 'time' ) }
+  }
   withName: cat {
     cpus = { check_max (8 * task.attempt, 'cpus' ) }
     memory = { check_max (40.GB * task.attempt, 'memory' ) }

From 26613db12a636fac22fbbbf7056159fa99d7268d Mon Sep 17 00:00:00 2001
From: d4straub <daniel.straub@uni-tuebingen.de>
Date: Wed, 17 Jul 2019 15:12:22 +0200
Subject: [PATCH 055/105] speed up of CAT/diamond needs one more parameter

---
 main.nf | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/main.nf b/main.nf
index bbbe8d77..09e38121 100644
--- a/main.nf
+++ b/main.nf
@@ -1097,7 +1097,7 @@ process cat {
 
     script:
     """
-    CAT bins -b "bins/" -d database/ -t taxonomy/ -n "${task.cpus}" --top 6 -o "${assembler}-${sample}"
+    CAT bins -b "bins/" -d database/ -t taxonomy/ -n "${task.cpus}" --top 6 -o "${assembler}-${sample}" --I_know_what_Im_doing
     CAT add_names -i "${assembler}-${sample}_run.ORF2LCA.txt" -o "${assembler}-${sample}.ORF2LCA.names.txt" -t {taxonomy folder}
     """
 }

From 2c3b9aebaf062a6fb5d2aba5d94bd4361ddf1733 Mon Sep 17 00:00:00 2001
From: d4straub <daniel.straub@uni-tuebingen.de>
Date: Wed, 17 Jul 2019 15:21:29 +0200
Subject: [PATCH 056/105] fix cat input

---
 main.nf | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/main.nf b/main.nf
index 09e38121..a2d17ae6 100644
--- a/main.nf
+++ b/main.nf
@@ -1089,7 +1089,7 @@ process cat {
     publishDir "${params.outdir}/Taxonomy/${assembler}-${sample}", mode: 'copy'
 
     input:
-    set val(assembler), val(sample), file("bins/*"), val(db_name), file(database), file(taxonomy) from cat_input
+    set val(assembler), val(sample), file("bins/*"), val(db_name), file("database/*"), file("taxonomy/*") from cat_input
 
     output:
     file("*ORF2LCA.txt")
@@ -1097,7 +1097,7 @@ process cat {
 
     script:
     """
-    CAT bins -b "bins/" -d database/ -t taxonomy/ -n "${task.cpus}" --top 6 -o "${assembler}-${sample}" --I_know_what_Im_doing
+    CAT bins -b "bins/" -d database/ -t taxonomy/ -n "${task.cpus}" -s .fa --top 6 -o "${assembler}-${sample}" --I_know_what_Im_doing
     CAT add_names -i "${assembler}-${sample}_run.ORF2LCA.txt" -o "${assembler}-${sample}.ORF2LCA.names.txt" -t {taxonomy folder}
     """
 }

From 1211e82406c6631779a9aa2877fc572b52d22472 Mon Sep 17 00:00:00 2001
From: d4straub <daniel.straub@uni-tuebingen.de>
Date: Wed, 17 Jul 2019 17:18:08 +0200
Subject: [PATCH 057/105] add kraken2

---
 conf/base.config |  5 +++
 environment.yml  |  1 +
 main.nf          | 82 ++++++++++++++++++++++++++++++++++++++++++++----
 3 files changed, 82 insertions(+), 6 deletions(-)

diff --git a/conf/base.config b/conf/base.config
index 7a210243..15b385f3 100644
--- a/conf/base.config
+++ b/conf/base.config
@@ -53,6 +53,11 @@ process {
     memory = { check_max (40.GB * task.attempt, 'memory' ) }
     time = { check_max (12.h * task.attempt, 'time' ) }
   }
+  withName: kraken2 {
+    cpus = { check_max (8 * task.attempt, 'cpus' ) }
+    memory = { check_max (40.GB * task.attempt, 'memory' ) }
+    time = { check_max (12.h * task.attempt, 'time' ) }
+  }
   withName: krona {
     cpus = { check_max (8 * task.attempt, 'cpus' ) }
     memory = { check_max (20.GB * task.attempt, 'memory' ) }
diff --git a/environment.yml b/environment.yml
index 434c86c5..fd286624 100644
--- a/environment.yml
+++ b/environment.yml
@@ -32,3 +32,4 @@ dependencies:
   - spades=3.13.0
   - centrifuge=1.0.4_beta
   - cat=4.6
+  - kraken2=2.0.8_beta
diff --git a/main.nf b/main.nf
index a2d17ae6..c720ee72 100644
--- a/main.nf
+++ b/main.nf
@@ -65,6 +65,7 @@ def helpMessage() {
 
     Taxonomy:
       --centrifuge_db [path]        Database for taxonomic binning with centrifuge (default: none). E.g. "ftp://ftp.ccb.jhu.edu/pub/infphilo/centrifuge/data/p_compressed+h+v.tar.gz"
+      --kraken2_db [path]           Database for taxonomic binning with kraken2 (default: none). E.g. "ftp://ftp.ccb.jhu.edu/pub/data/kraken2_dbs/minikraken2_v2_8GB_201904_UPDATE.tgz"
       --skip_krona                  Skip creating a krona plot for taxonomic binning
       --cat_db [path]               Database for taxonomic classification of metagenome assembled genomes (default: none). E.g. "tbb.bio.uu.nl/bastiaan/CAT_prepare/CAT_prepare_20190108.tar.gz"
                                     The zipped file needs to contain a folder named "*taxonomy*" and "*CAT_database*" that hold the respective files.
@@ -153,6 +154,7 @@ params.skip_quast = false
  * taxonomy options
  */
 params.centrifuge_db = false
+params.kraken2_db = false
 params.skip_krona = false
 params.cat_db = false
 
@@ -190,6 +192,14 @@ if(params.centrifuge_db){
     file_centrifuge_db = Channel.from()
 }
 
+if(params.kraken2_db){
+    Channel
+        .fromPath( "${params.kraken2_db}", checkIfExists: true )
+        .set { file_kraken2_db }
+} else {
+    file_kraken2_db = Channel.from()
+}
+
 if(params.cat_db){
     Channel
         .fromPath( "${params.cat_db}", checkIfExists: true )
@@ -543,7 +553,7 @@ if(!params.keep_phix) {
         set val(name), file(reads), file(genome), file(db) from trimmed_reads.combine(phix_db)
 
         output:
-        set val(name), file("*.fastq.gz") into (trimmed_reads_megahit, trimmed_reads_metabat, trimmed_reads_fastqc, trimmed_sr_spadeshybrid, trimmed_reads_spades, trimmed_reads_centrifuge)
+        set val(name), file("*.fastq.gz") into (trimmed_reads_megahit, trimmed_reads_metabat, trimmed_reads_fastqc, trimmed_sr_spadeshybrid, trimmed_reads_spades, trimmed_reads_centrifuge, trimmed_reads_kraken2)
         file("${name}_remove_phix_log.txt")
 
         script:
@@ -609,14 +619,14 @@ trimmed_reads_centrifuge
 
 process centrifuge {
     tag "${name}-${db_name}"
-    publishDir "${params.outdir}/Taxonomy/${name}", mode: 'copy',
+    publishDir "${params.outdir}/Taxonomy/centrifuge/${name}", mode: 'copy',
             saveAs: {filename -> filename.indexOf(".krona") == -1 ? filename : null}
 
     input:
     set val(name), file(reads), val(db_name), file(db) from centrifuge_input
 
     output:
-    set val(name), file("results.krona") into centrifuge_to_krona
+    set val("centrifuge"), val(name), file("results.krona") into centrifuge_to_krona
     file("report.txt")
     file("results.txt")
     file("kreport.txt")
@@ -647,6 +657,65 @@ process centrifuge {
     }
 }
 
+process kraken2_db_preparation {
+    input:
+    file(db) from file_kraken2_db
+
+    output:
+    set val("${db.toString().replace(".tgz", "")}"), file("*") into kraken2_database
+
+    script:
+    """
+    tar -xf "${db}"
+    """
+}
+
+trimmed_reads_kraken2
+    .combine(kraken2_database)
+    .set { kraken2_input }
+
+process kraken2 {
+    tag "${name}-${db_name}"
+    publishDir "${params.outdir}/Taxonomy/kraken2/${name}", mode: 'copy',
+            saveAs: {filename -> filename.indexOf(".krona") == -1 ? filename : null}
+
+    input:
+    set val(name), file(reads), val(db_name), file("database/*") from kraken2_input
+
+    output:
+    set val("kraken2"), val(name), file("results.krona") into kraken2_to_krona
+    file("kraken2.kraken")
+    file("kraken2_report.txt")
+
+    script:
+    if ( !params.singleEnd ) {
+    """
+    kraken2 --use-names \
+        --report-zero-counts \
+        --threads "${task.cpus}" \
+        --db database \
+        --fastq-input \
+        --report kraken2_report.txt \
+        --paired "${reads[0]}" "${reads[1]}" \
+        > kraken2.kraken
+    cat kraken2.kraken | cut -f 2,3 > results.krona
+    """
+    }
+    else {
+    """
+    kraken2 --use-names \
+        --report-zero-counts \
+        --threads "${task.cpus}" \
+        --db database \
+        --fastq-input \
+        --report kraken2_report.txt \
+        "${reads}" \
+        > kraken2.kraken
+    cat kraken2.kraken | cut -f 2,3 > results.krona
+    """
+    }
+}
+
 process krona_db {
     output:
     file("taxonomy/taxonomy.tab") into file_krona_db
@@ -661,15 +730,16 @@ process krona_db {
 }
 
 centrifuge_to_krona
+    .mix(kraken2_to_krona)
     .combine(file_krona_db)
     .set { krona_input }
 
 process krona {
-    tag "${name}"
-    publishDir "${params.outdir}/Taxonomy/${name}", mode: 'copy'
+    tag "${classifier}-${name}"
+    publishDir "${params.outdir}/Taxonomy/${classifier}/${name}", mode: 'copy'
 
     input:
-    set val(name), file(report), file("taxonomy/taxonomy.tab") from krona_input
+    set val(classifier), val(name), file(report), file("taxonomy/taxonomy.tab") from krona_input
 
     output:
     file("*.html")

From 3e66ec92e5c3fc348262a380284fa62259b469ba Mon Sep 17 00:00:00 2001
From: d4straub <daniel.straub@uni-tuebingen.de>
Date: Thu, 18 Jul 2019 09:18:33 +0200
Subject: [PATCH 058/105] fix cat

---
 main.nf | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/main.nf b/main.nf
index c720ee72..707cc461 100644
--- a/main.nf
+++ b/main.nf
@@ -1162,13 +1162,17 @@ process cat {
     set val(assembler), val(sample), file("bins/*"), val(db_name), file("database/*"), file("taxonomy/*") from cat_input
 
     output:
-    file("*ORF2LCA.txt")
+    file("*.ORF2LCA.txt")
     file("*.ORF2LCA.names.txt")
+    file("*.predicted_proteins.faa")
+    file("*.predicted_proteins.gff")
+    file("*.log")
+    file("*.bin2classification.txt")
 
     script:
     """
     CAT bins -b "bins/" -d database/ -t taxonomy/ -n "${task.cpus}" -s .fa --top 6 -o "${assembler}-${sample}" --I_know_what_Im_doing
-    CAT add_names -i "${assembler}-${sample}_run.ORF2LCA.txt" -o "${assembler}-${sample}.ORF2LCA.names.txt" -t {taxonomy folder}
+    CAT add_names -i "${assembler}-${sample}_run.ORF2LCA.txt" -o "${assembler}-${sample}.ORF2LCA.names.txt" -t taxonomy/
     """
 }
 

From bcca92f288083eaf076ec4623d1b912fa0ce04cd Mon Sep 17 00:00:00 2001
From: d4straub <daniel.straub@uni-tuebingen.de>
Date: Thu, 18 Jul 2019 13:01:45 +0200
Subject: [PATCH 059/105] fix kraken2 database

---
 main.nf | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/main.nf b/main.nf
index 707cc461..d0d1e22f 100644
--- a/main.nf
+++ b/main.nf
@@ -662,7 +662,7 @@ process kraken2_db_preparation {
     file(db) from file_kraken2_db
 
     output:
-    set val("${db.toString().replace(".tgz", "")}"), file("*") into kraken2_database
+    set val("${db.toString().replace(".tgz", "")}"), file(".k2d") into kraken2_database
 
     script:
     """

From bceccef692c6efe6d8684b4a104a92087cf40d34 Mon Sep 17 00:00:00 2001
From: d4straub <daniel.straub@uni-tuebingen.de>
Date: Thu, 18 Jul 2019 13:40:32 +0200
Subject: [PATCH 060/105] fix kraken2 database 2nd

---
 main.nf | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/main.nf b/main.nf
index d0d1e22f..56769f09 100644
--- a/main.nf
+++ b/main.nf
@@ -662,7 +662,7 @@ process kraken2_db_preparation {
     file(db) from file_kraken2_db
 
     output:
-    set val("${db.toString().replace(".tgz", "")}"), file(".k2d") into kraken2_database
+    set val("${db.toString().replace(".tgz", "")}"), file("*.k2d") into kraken2_database
 
     script:
     """

From 4497d9688ce49c7269fa860e221a5ba640e7705c Mon Sep 17 00:00:00 2001
From: d4straub <daniel.straub@uni-tuebingen.de>
Date: Thu, 18 Jul 2019 14:22:19 +0200
Subject: [PATCH 061/105] fix --skip_busco

---
 main.nf | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/main.nf b/main.nf
index 56769f09..8d887bef 100644
--- a/main.nf
+++ b/main.nf
@@ -181,7 +181,7 @@ if(!params.skip_busco){
         .fromPath( "${params.busco_reference}", checkIfExists: true )
         .set { file_busco_db }
 } else {
-    Channel.from()
+    file_busco_db = Channel.from()
 }
 
 if(params.centrifuge_db){

From 299d35624fe203000cd6bcf4213cce3ffc3b591d Mon Sep 17 00:00:00 2001
From: d4straub <daniel.straub@uni-tuebingen.de>
Date: Thu, 18 Jul 2019 14:40:16 +0200
Subject: [PATCH 062/105] fix kraken2 database 3rd

---
 main.nf | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/main.nf b/main.nf
index 8d887bef..1331cf7b 100644
--- a/main.nf
+++ b/main.nf
@@ -662,7 +662,7 @@ process kraken2_db_preparation {
     file(db) from file_kraken2_db
 
     output:
-    set val("${db.toString().replace(".tgz", "")}"), file("*.k2d") into kraken2_database
+    set val("${db.baseName}"), file("${db.baseName}/*.k2d") into kraken2_database
 
     script:
     """

From fab79bdef6dd4ee176ca91224129e4fe02645d1c Mon Sep 17 00:00:00 2001
From: d4straub <daniel.straub@uni-tuebingen.de>
Date: Thu, 18 Jul 2019 15:06:05 +0200
Subject: [PATCH 063/105] fix kraken2 output

---
 main.nf | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/main.nf b/main.nf
index 1331cf7b..103308a4 100644
--- a/main.nf
+++ b/main.nf
@@ -684,13 +684,12 @@ process kraken2 {
 
     output:
     set val("kraken2"), val(name), file("results.krona") into kraken2_to_krona
-    file("kraken2.kraken")
     file("kraken2_report.txt")
 
     script:
     if ( !params.singleEnd ) {
     """
-    kraken2 --use-names \
+    kraken2 \
         --report-zero-counts \
         --threads "${task.cpus}" \
         --db database \
@@ -703,7 +702,7 @@ process kraken2 {
     }
     else {
     """
-    kraken2 --use-names \
+    kraken2 \
         --report-zero-counts \
         --threads "${task.cpus}" \
         --db database \

From d0340a1c9e87f91561c8a05628dfcde7e428e1fc Mon Sep 17 00:00:00 2001
From: d4straub <daniel.straub@uni-tuebingen.de>
Date: Thu, 18 Jul 2019 15:48:44 +0200
Subject: [PATCH 064/105] fix cat 2nd

---
 main.nf | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/main.nf b/main.nf
index 103308a4..0526e8de 100644
--- a/main.nf
+++ b/main.nf
@@ -1171,7 +1171,7 @@ process cat {
     script:
     """
     CAT bins -b "bins/" -d database/ -t taxonomy/ -n "${task.cpus}" -s .fa --top 6 -o "${assembler}-${sample}" --I_know_what_Im_doing
-    CAT add_names -i "${assembler}-${sample}_run.ORF2LCA.txt" -o "${assembler}-${sample}.ORF2LCA.names.txt" -t taxonomy/
+    CAT add_names -i "${assembler}-${sample}.ORF2LCA.txt" -o "${assembler}-${sample}.ORF2LCA.names.txt" -t taxonomy/
     """
 }
 

From b1e840fd1cb2226cc2b68c80854394e1e891f028 Mon Sep 17 00:00:00 2001
From: d4straub <daniel.straub@uni-tuebingen.de>
Date: Thu, 18 Jul 2019 16:42:14 +0200
Subject: [PATCH 065/105] improve cat output

---
 main.nf | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/main.nf b/main.nf
index 0526e8de..551c3e7f 100644
--- a/main.nf
+++ b/main.nf
@@ -1155,14 +1155,18 @@ metabat_bins_for_cat
 
 process cat {
     tag "${assembler}-${sample}-${db_name}"
-    publishDir "${params.outdir}/Taxonomy/${assembler}-${sample}", mode: 'copy'
+    publishDir "${params.outdir}/Taxonomy/${assembler}", mode: 'copy',
+    saveAs: {filename ->
+        if (filename.indexOf(".names.txt") > 0) filename
+        else "raw/$filename"
+    }
 
     input:
     set val(assembler), val(sample), file("bins/*"), val(db_name), file("database/*"), file("taxonomy/*") from cat_input
 
     output:
     file("*.ORF2LCA.txt")
-    file("*.ORF2LCA.names.txt")
+    file("*.names.txt")
     file("*.predicted_proteins.faa")
     file("*.predicted_proteins.gff")
     file("*.log")
@@ -1172,6 +1176,7 @@ process cat {
     """
     CAT bins -b "bins/" -d database/ -t taxonomy/ -n "${task.cpus}" -s .fa --top 6 -o "${assembler}-${sample}" --I_know_what_Im_doing
     CAT add_names -i "${assembler}-${sample}.ORF2LCA.txt" -o "${assembler}-${sample}.ORF2LCA.names.txt" -t taxonomy/
+    CAT add_names -i "${assembler}-${sample}.bin2classification.txt" -o "${assembler}-${sample}.bin2classification.names.txt" -t taxonomy/
     """
 }
 

From 9aebac99ba19b270ff8858b2370f99d93bf2f452 Mon Sep 17 00:00:00 2001
From: d4straub <daniel.straub@uni-tuebingen.de>
Date: Fri, 19 Jul 2019 10:07:15 +0200
Subject: [PATCH 066/105] only prepare krona database when centrifuge or
 kraken2 are planned

---
 main.nf | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/main.nf b/main.nf
index 551c3e7f..0f4125d0 100644
--- a/main.nf
+++ b/main.nf
@@ -720,7 +720,7 @@ process krona_db {
     file("taxonomy/taxonomy.tab") into file_krona_db
 
     when:
-    params.centrifuge_db && !params.skip_krona
+    ( params.centrifuge_db || params.kraken2_db ) && !params.skip_krona
 
     script:
     """

From 8ddfce15f632bfaa590ec4be3f5e093ae477e635 Mon Sep 17 00:00:00 2001
From: d4straub <daniel.straub@uni-tuebingen.de>
Date: Fri, 19 Jul 2019 10:09:56 +0200
Subject: [PATCH 067/105] adjust centrifuge output

---
 main.nf | 1 -
 1 file changed, 1 deletion(-)

diff --git a/main.nf b/main.nf
index 0f4125d0..18d31ec6 100644
--- a/main.nf
+++ b/main.nf
@@ -628,7 +628,6 @@ process centrifuge {
     output:
     set val("centrifuge"), val(name), file("results.krona") into centrifuge_to_krona
     file("report.txt")
-    file("results.txt")
     file("kreport.txt")
 
     script:

From 9ff5f0b411ae49d3b39d13286278ad139ef0caa9 Mon Sep 17 00:00:00 2001
From: d4straub <daniel.straub@uni-tuebingen.de>
Date: Mon, 22 Jul 2019 14:43:41 +0200
Subject: [PATCH 068/105] ignore when BUSCO fails on a genomic bin

---
 conf/base.config | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/conf/base.config b/conf/base.config
index 15b385f3..2eef0b88 100644
--- a/conf/base.config
+++ b/conf/base.config
@@ -24,6 +24,9 @@ process {
   withName: busco_download_db {
     time = 4.h
   }
+  withName: busco {
+    errorStrategy = { task.exitStatus in [143,137] ? 'retry' : 'ignore' }
+  }
   withName: phix_download_db {
     time = 4.h
   }

From 487b277a7b693016c0fa703580594fd127cf140f Mon Sep 17 00:00:00 2001
From: d4straub <daniel.straub@uni-tuebingen.de>
Date: Mon, 22 Jul 2019 14:47:10 +0200
Subject: [PATCH 069/105] fix when statements

---
 main.nf | 7 ++-----
 1 file changed, 2 insertions(+), 5 deletions(-)

diff --git a/main.nf b/main.nf
index 18d31ec6..baf3520d 100644
--- a/main.nf
+++ b/main.nf
@@ -807,9 +807,7 @@ process spadeshybrid {
     file("${id}_log.txt")
 
     when:
-    params.manifest
-    !params.singleEnd
-    !params.skip_spadeshybrid
+    params.manifest && !params.singleEnd && !params.skip_spadeshybrid
      
     script:
     def maxmem = "${task.memory.toString().replaceAll(/[\sGB]/,'')}"
@@ -845,8 +843,7 @@ process spades {
     file("${id}_log.txt")
 
     when:
-    !params.singleEnd
-    !params.skip_spades
+    !params.singleEnd && !params.skip_spades
      
     script:
     def maxmem = "${task.memory.toString().replaceAll(/[\sGB]/,'')}"

From d379663e0f47793c04c214a1f1735ce20355f43a Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Hadrien=20Gourl=C3=A9?= <gourlehadrien@gmail.com>
Date: Tue, 30 Jul 2019 10:14:25 +0200
Subject: [PATCH 070/105] test centrifuge

---
 conf/test.config | 1 +
 1 file changed, 1 insertion(+)

diff --git a/conf/test.config b/conf/test.config
index 0ceb4f9a..8fa08070 100644
--- a/conf/test.config
+++ b/conf/test.config
@@ -23,4 +23,5 @@ params {
   readPaths = [
     ['test_minigut', ['https://github.com/HadrienG/test-datasets/raw/mag/test_data/test_minigut_R1.fastq.gz', 'https://github.com/HadrienG/test-datasets/raw/mag/test_data/test_minigut_R2.fastq.gz']]
   ]
+  centrifuge_db = "https://github.com/HadrienG/test-datasets/raw/mag/test_data/minigut_cf.tar.gz"
 }

From 464e201307bb3db911e105a229575366dcdc4785 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Hadrien=20Gourl=C3=A9?= <gourlehadrien@gmail.com>
Date: Wed, 31 Jul 2019 09:25:18 +0200
Subject: [PATCH 071/105] test kraken

---
 conf/test.config | 1 +
 1 file changed, 1 insertion(+)

diff --git a/conf/test.config b/conf/test.config
index 8fa08070..16767179 100644
--- a/conf/test.config
+++ b/conf/test.config
@@ -24,4 +24,5 @@ params {
     ['test_minigut', ['https://github.com/HadrienG/test-datasets/raw/mag/test_data/test_minigut_R1.fastq.gz', 'https://github.com/HadrienG/test-datasets/raw/mag/test_data/test_minigut_R2.fastq.gz']]
   ]
   centrifuge_db = "https://github.com/HadrienG/test-datasets/raw/mag/test_data/minigut_cf.tar.gz"
+  kraken2_db = "https://github.com/HadrienG/test-datasets/raw/mag/test_data/minigut_kraken.tgz"
 }

From 8f53af8439d69604ae2ae2bb2030cd67388f7c45 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Hadrien=20Gourl=C3=A9?= <gourlehadrien@gmail.com>
Date: Wed, 31 Jul 2019 13:00:32 +0200
Subject: [PATCH 072/105] migrated test datasets to nf-core/mag

---
 conf/test.config | 9 +++------
 1 file changed, 3 insertions(+), 6 deletions(-)

diff --git a/conf/test.config b/conf/test.config
index 16767179..3984186f 100644
--- a/conf/test.config
+++ b/conf/test.config
@@ -16,13 +16,10 @@ params {
   max_time = 48.h
   params.outdir = "./tests"
   params.temp_dir = "./tests/tmp_dir"
-  // Input data
-  // TODO nf-core: Specify the paths to your test data on nf-core/test-datasets
-  // TODO nf-core: Give any required params for the test so that command line flags are not needed
   singleEnd = false
   readPaths = [
-    ['test_minigut', ['https://github.com/HadrienG/test-datasets/raw/mag/test_data/test_minigut_R1.fastq.gz', 'https://github.com/HadrienG/test-datasets/raw/mag/test_data/test_minigut_R2.fastq.gz']]
+    ['test_minigut', ['https://github.com/nf-core/test-datasets/raw/mag/test_data/test_minigut_R1.fastq.gz', 'https://github.com/HadrienG/test-datasets/raw/mag/test_data/test_minigut_R2.fastq.gz']]
   ]
-  centrifuge_db = "https://github.com/HadrienG/test-datasets/raw/mag/test_data/minigut_cf.tar.gz"
-  kraken2_db = "https://github.com/HadrienG/test-datasets/raw/mag/test_data/minigut_kraken.tgz"
+  centrifuge_db = "https://github.com/nf-core/test-datasets/raw/mag/test_data/minigut_cf.tar.gz"
+  kraken2_db = "https://github.com/nf-core/test-datasets/raw/mag/test_data/minigut_kraken.tgz"
 }

From f7ccfc0d8d9a9c309380eb3e85cadf9d9b309843 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Hadrien=20Gourl=C3=A9?= <gourlehadrien@gmail.com>
Date: Wed, 31 Jul 2019 14:13:01 +0200
Subject: [PATCH 073/105] remove pycache

---
 .../scrape_software_versions.cpython-36.pyc      | Bin 1390 -> 0 bytes
 1 file changed, 0 insertions(+), 0 deletions(-)
 delete mode 100644 bin/__pycache__/scrape_software_versions.cpython-36.pyc

diff --git a/bin/__pycache__/scrape_software_versions.cpython-36.pyc b/bin/__pycache__/scrape_software_versions.cpython-36.pyc
deleted file mode 100644
index ebd68ae9b27aa4e0c5decdc5cba498051cb7fd8e..0000000000000000000000000000000000000000
GIT binary patch
literal 0
HcmV?d00001

literal 1390
zcmZ`&OK;mo5Z)yznxZ8?WIKub00_5$<0Li<6zIdHg%$~Vt6RrF9S|-Enme*6^X1)@
zEKAi%=sEoX{R0Jh?2pKA*lSO@_1sg3G#wZSyaZ=H4rgX(zS;e}-L^me?XS~QlaPPN
zoz+78ExzIvC4>=16OvM&(h_Tlmg>G<VLdT?gBh%Gt}(Nc3(P{?WH!phKMcPC`keSC
zYk|qy9;x(Ft(R-P;*s$wGzJTE@`C*G)2ZfLeiLkHGO|hL^z|41A}kJCn5PZKU<vK3
zn*`c(&0mIP{x|CkR$zIs%2wxePQe7rBPWFEN8}}WMJ}@wGX4Ne%-r#<^6w@QZ%TbY
zF5jFG=&-dpb~gSL%({X{bp;)^j+rfJ&b89x^54pPqr_md2khZod;OWeHh%mr@}oL(
zZLrQB-HrX7UOqXwQx(&X$<F56lJXs@mI@wcGUyGnNXB`Fmc<`A0}kv(9LdAU(kLk6
z0+Kj`Jvo+>#^clDr#q!NgR$%-`Bi0Zje=e%<YiQSwZBJv^xPTUIJZYZI!t6-|9wz@
zJab0C1#aq8Zac>0;dJUmd43T?G?WtfUKXa{%x20utT$8UO=`99Lz(gD`DEJ@MVL84
zUMFy{73E3J_dof%THkDS58dxPw{+;<o17cS1aFiA&mwt?$Bs<&(R3Et2<@2dJKG}f
z$*Yh<P**IrZ2=WwL0KYt+*7%8Tt_-T-}HJXvd^K1k$ou(vG2O)vFs1e_M$v>?@6|8
z7fCJyd0mt{_GOxE+YCg+<D#4$+OR|<FiEgBU`{9<KFk~$BZ)npr;hA{bK8oO4`nfw
zdzd1W;lA^$?cnlQ;zUU(#K9IzcKbPxCwV5rWUFgWo|X>f@|f&SXP%47W320mFfA%W
z1<=mAlV;`UvZOoNxZUy{dwY=>DS9<Ar?cjCX76k$JHR6-hsbciH5`CitlPl81lLQ8
z#=&3Ua1LV-3W2}_AXFokkP1aF6dfreFCbHNtPBnzQ`YUYlqn$OQC~IEP@*z>IY*=#
z=R6-4s!`nmMSH4o#WCNXgnx)TR^q>FSEhxFcVIWp1TvBA<`-9Ao{4nqX1(1g=YTq>
z^zB92#gd97lz@YL?>GlpE|B2<Z04TDnTyj43kcq)sJK>^m*#AkK(}N>e1)P#Thua4
iT0Sk!q}I)&cjywL6>88e-9}{Vnq|@sU8k1bLj6C(ouD%S


From 294a163294693d3c2677c8419be520a4dd7d3289 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Hadrien=20Gourl=C3=A9?= <gourlehadrien@gmail.com>
Date: Thu, 1 Aug 2019 11:31:49 +0200
Subject: [PATCH 074/105] brief pipeline desc

---
 README.md | 13 +++++++++++--
 1 file changed, 11 insertions(+), 2 deletions(-)

diff --git a/README.md b/README.md
index 5bbda617..5bfd2ba9 100644
--- a/README.md
+++ b/README.md
@@ -12,6 +12,17 @@
 
 ## Introduction
 
+This pipeline is for assembly, binning and annotation of metagenomes.
+It supports both short and long reads, quality trims the reads and adapters with [https://github.com/OpenGene/fastp](fastp) and [https://github.com/rrwick/Porechop](porechop) and performs basic QC with [https://www.bioinformatics.babraham.ac.uk/projects/fastqc/](fastqc).
+
+The pipeline then:
+
+- assigns taxonomy to reads using [https://ccb.jhu.edu/software/centrifuge/](centrifuge) and/or [https://ccb.jhu.edu/software/kraken2/](kraken2)
+- performs assembly using [https://github.com/voutcn/megahit](megahit) and [http://cab.spbu.ru/software/spades/](spades), and checks their quality using [http://quast.sourceforge.net/quast](quast)
+- performs metagenome binning using [https://bitbucket.org/berkeleylab/metabat/src/master/](metabat2), and checks the quality of the genome bins using [https://busco.ezlab.org/](busco)
+
+Furthermore, the pipeline creates various reports in the results directory specified, including a [https://multiqc.info/](multiqc) report summarizing some of the findings and software versions.
+
 The pipeline is built using [Nextflow](https://www.nextflow.io), a workflow tool to run tasks across multiple compute infrastructures in a very portable manner. It comes with docker containers making installation trivial and results highly reproducible.
 
 ## Documentation
@@ -27,8 +38,6 @@ The nf-core/mag pipeline comes with documentation about the pipeline, found in t
 4. [Output and how to interpret the results](docs/output.md)
 5. [Troubleshooting](https://nf-co.re/usage/troubleshooting)
 
-<!-- TODO nf-core: Add a brief overview of what the pipeline does and how it works -->
-
 ## Credits
 
 This pipeline was written by [Hadrien Gourlé](https://hadriengourle.com) at [SLU](https://slu.se) and Daniel Straub ([@d4straub](https://github.com/d4straub)).

From 5226e1847cc483c0a76d851f0836900ef813bf24 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Hadrien=20Gourl=C3=A9?= <gourlehadrien@gmail.com>
Date: Thu, 1 Aug 2019 11:32:25 +0200
Subject: [PATCH 075/105] remove todos

---
 bin/scrape_software_versions.py | 1 -
 conf/base.config                | 1 -
 conf/igenomes.config            | 1 -
 3 files changed, 3 deletions(-)

diff --git a/bin/scrape_software_versions.py b/bin/scrape_software_versions.py
index dfe6a5c5..9f8cde55 100755
--- a/bin/scrape_software_versions.py
+++ b/bin/scrape_software_versions.py
@@ -3,7 +3,6 @@
 from collections import OrderedDict
 import re
 
-# TODO nf-core: Add additional regexes for new tools in process get_software_versions
 regexes = {
     'nf-core/mag': ['v_pipeline.txt', r"(\S+)"],
     'Nextflow': ['v_nextflow.txt', r"(\S+)"],
diff --git a/conf/base.config b/conf/base.config
index 2eef0b88..e89c2e68 100644
--- a/conf/base.config
+++ b/conf/base.config
@@ -11,7 +11,6 @@
 
 process {
 
-  // TODO nf-core: Check the defaults for all processes
   cpus = { check_max( 1 * task.attempt, 'cpus' ) }
   memory = { check_max( 8.GB * task.attempt, 'memory' ) }
   time = { check_max( 2.h * task.attempt, 'time' ) }
diff --git a/conf/igenomes.config b/conf/igenomes.config
index d19e61f4..08154994 100644
--- a/conf/igenomes.config
+++ b/conf/igenomes.config
@@ -9,7 +9,6 @@
 
 params {
   // illumina iGenomes reference file paths
-  // TODO nf-core: Add new reference types and strip out those that are not needed
   genomes {
     'GRCh37' {
       bed12   = "${params.igenomes_base}/Homo_sapiens/Ensembl/GRCh37/Annotation/Genes/genes.bed"

From 7049eb1c4fbb059b6ab87e9d8a27ec66ac33baeb Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Hadrien=20Gourl=C3=A9?= <gourlehadrien@gmail.com>
Date: Thu, 1 Aug 2019 14:33:04 +0200
Subject: [PATCH 076/105] command-line flags + software versions

---
 bin/scrape_software_versions.py |  9 +++++++-
 main.nf                         | 40 +++++++++++++++++----------------
 nextflow.config                 |  1 -
 3 files changed, 29 insertions(+), 21 deletions(-)

diff --git a/bin/scrape_software_versions.py b/bin/scrape_software_versions.py
index 9f8cde55..bca53641 100755
--- a/bin/scrape_software_versions.py
+++ b/bin/scrape_software_versions.py
@@ -17,6 +17,10 @@
     'NanoLyse': ['v_nanolyse.txt', r"NanoLyse (\S+)"],
     'SPAdes': ['v_spades.txt', r"SPAdes v(\S+)"],
     'BUSCO': ['v_busco.txt', r"BUSCO (\S+)"],
+    'centrifuge': ['v_centrifuge.txt', r"centrifuge-class version (\S+)"],
+    'Kraken2': ['v_kraken2.txt', r"Kraken version (\S+)-beta"],
+    'Quast': ['v_quast.txt', r"QUAST v(\S+)"],
+    'CAT': ['v_cat.txt', r"CAT v(\S+)"]
 }
 results = OrderedDict()
 results['nf-core/mag'] = '<span style="color:#999999;\">N/A</span>'
@@ -32,7 +36,10 @@
 results['NanoLyse'] = '<span style="color:#999999;\">N/A</span>'
 results['SPAdes'] = '<span style="color:#999999;\">N/A</span>'
 results['BUSCO'] = '<span style="color:#999999;\">N/A</span>'
-results['Bandage'] = '<span style="color:#999999;\">N/A</span>'
+results['centrifuge'] = '<span style="color:#999999;\">N/A</span>'
+results['Kraken2'] = '<span style="color:#999999;\">N/A</span>'
+results['CAT'] = '<span style="color:#999999;\">N/A</span>'
+results['Quast'] = '<span style="color:#999999;\">N/A</span>'
 
 # Search each file using its regex
 for k, v in regexes.items():
diff --git a/main.nf b/main.nf
index baf3520d..56c65476 100644
--- a/main.nf
+++ b/main.nf
@@ -15,7 +15,6 @@ Daniel Straub <d4straub@gmail.com>
 
 
 def helpMessage() {
-    // TODO nf-core: Add to this help message with new command line parameters
     log.info nfcoreHeader()
     log.info"""
     Usage:
@@ -272,28 +271,30 @@ log.info nfcoreHeader()
 def summary = [:]
 if(workflow.revision) summary['Pipeline Release'] = workflow.revision
 summary['Run Name']         = custom_runName ?: workflow.runName
-// TODO nf-core: Report custom parameters here
-summary['Reads']            = params.reads
-summary['Fasta Ref']        = params.fasta
-summary['Data Type']        = params.singleEnd ? 'Single-End' : 'Paired-End'
-summary['Max Resources']    = "$params.max_memory memory, $params.max_cpus cpus, $params.max_time time per job"
+summary['Reads']                = params.reads
+summary['Fasta Ref']            = params.fasta
+summary['Data Type']            = params.singleEnd ? 'Single-End' : 'Paired-End'
+if(params.centrifuge_db) summary['Centrifuge Db']   = params.centrifuge_db
+if(params.kraken2_db) summary['Kraken2 Db']         = params.kraken2_db
+if(!params.skip_busco) summary['Busco Reference']   = params.busco_reference 
+summary['Max Resources']        = "$params.max_memory memory, $params.max_cpus cpus, $params.max_time time per job"
 if(workflow.containerEngine) summary['Container'] = "$workflow.containerEngine - $workflow.container"
-summary['Output dir']       = params.outdir
-summary['Launch dir']       = workflow.launchDir
-summary['Working dir']      = workflow.workDir
-summary['Script dir']       = workflow.projectDir
-summary['User']             = workflow.userName
+summary['Output dir']           = params.outdir
+summary['Launch dir']           = workflow.launchDir
+summary['Working dir']          = workflow.workDir
+summary['Script dir']           = workflow.projectDir
+summary['User']                 = workflow.userName
 if(workflow.profile == 'awsbatch'){
-   summary['AWS Region']    = params.awsregion
-   summary['AWS Queue']     = params.awsqueue
+   summary['AWS Region']        = params.awsregion
+   summary['AWS Queue']         = params.awsqueue
 }
-summary['Config Profile'] = workflow.profile
+summary['Config Profile']       = workflow.profile
 if(params.config_profile_description) summary['Config Description'] = params.config_profile_description
 if(params.config_profile_contact)     summary['Config Contact']     = params.config_profile_contact
 if(params.config_profile_url)         summary['Config URL']         = params.config_profile_url
 if(params.email) {
-  summary['E-mail Address']  = params.email
-  summary['MultiQC maxsize'] = params.maxMultiqcEmailFileSize
+  summary['E-mail Address']     = params.email
+  summary['MultiQC maxsize']    = params.maxMultiqcEmailFileSize
 }
 log.info summary.collect { k,v -> "${k.padRight(18)}: $v" }.join("\n")
 log.info "\033[2m----------------------------------------------------\033[0m"
@@ -333,7 +334,6 @@ process get_software_versions {
     file "software_versions.csv"
 
     script:
-    // TODO nf-core: Get all tools to print their version number here
     """
     echo $workflow.manifest.version > v_pipeline.txt
     echo $workflow.manifest.nextflowVersion > v_nextflow.txt
@@ -348,6 +348,10 @@ process get_software_versions {
     NanoLyse --version > v_nanolyse.txt
     spades.py --version > v_spades.txt
     run_BUSCO.py --version > v_busco.txt
+    centrifuge --version > v_centrifuge.txt
+    kraken2 -v > v_kraken2.txt
+    CAT -v > v_cat.txt
+    quast -v > v_quast.txt
 
     scrape_software_versions.py > software_versions_mqc.yaml
     """
@@ -1198,7 +1202,6 @@ process multiqc {
     script:
     rtitle = custom_runName ? "--title \"$custom_runName\"" : ''
     rfilename = custom_runName ? "--filename " + custom_runName.replaceAll('\\W','_').replaceAll('_+','_') + "_multiqc_report" : ''
-    // TODO nf-core: Specify which MultiQC modules to use with -m for a faster run time
     """
     multiqc -f ${rtitle} ${rfilename} --config ${multiqc_config} .
     """
@@ -1258,7 +1261,6 @@ workflow.onComplete {
     email_fields['summary']['Nextflow Build'] = workflow.nextflow.build
     email_fields['summary']['Nextflow Compile Timestamp'] = workflow.nextflow.timestamp
 
-    // TODO nf-core: If not using MultiQC, strip out this code (including params.maxMultiqcEmailFileSize)
     // On success try attach the multiqc report
     def mqc_report = null
     try {
diff --git a/nextflow.config b/nextflow.config
index aa5a2c48..d42d5285 100644
--- a/nextflow.config
+++ b/nextflow.config
@@ -9,7 +9,6 @@
 params {
 
   // Workflow flags
-  // TODO nf-core: Specify your pipeline's command line flags
   reads = "data/*{_R1,_R2}.fastq.gz"
   singleEnd = false
   outdir = './results'

From 45305e82c6fb283cd7bbad201cded3740092eddf Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Hadrien=20Gourl=C3=A9?= <gourlehadrien@gmail.com>
Date: Thu, 1 Aug 2019 16:00:13 +0200
Subject: [PATCH 077/105] skip testing krona (travis does not wanna dl the db)

---
 conf/test.config | 1 +
 1 file changed, 1 insertion(+)

diff --git a/conf/test.config b/conf/test.config
index 3984186f..dfb769fa 100644
--- a/conf/test.config
+++ b/conf/test.config
@@ -22,4 +22,5 @@ params {
   ]
   centrifuge_db = "https://github.com/nf-core/test-datasets/raw/mag/test_data/minigut_cf.tar.gz"
   kraken2_db = "https://github.com/nf-core/test-datasets/raw/mag/test_data/minigut_kraken.tgz"
+  skip_krona = true
 }

From 30055c0775525e03b7f98eeb52f0ad759f92a090 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Hadrien=20Gourl=C3=A9?= <gourlehadrien@gmail.com>
Date: Thu, 1 Aug 2019 16:00:38 +0200
Subject: [PATCH 078/105] software version bumps

---
 environment.yml | 27 +++++++++++++--------------
 main.nf         |  2 +-
 2 files changed, 14 insertions(+), 15 deletions(-)

diff --git a/environment.yml b/environment.yml
index fd286624..1ee43e81 100644
--- a/environment.yml
+++ b/environment.yml
@@ -8,28 +8,27 @@ channels:
 dependencies:
   - fastqc=0.11.8
   - multiqc=1.7
-  - matplotlib=2.2.3
-  - fastp=0.19.5
-  - megahit=1.1.3
-  - metabat2=2.12.1
+  - matplotlib=3.1.1
+  - fastp=0.20.0
+  - megahit=1.2.7
+  - metabat2=2.13
   - samtools=1.9
-  - bowtie2=2.3.4.3
+  - bowtie2=2.3.5
   - quast=5.0.2
   - prodigal=2.6.3
-  - pplacer=1.1.alpha19
-  - diamond=0.9.24
-  - python=3.6.7
-  - r=3.5.1
-  - biopython=1.72
+  - diamond=0.9.24 # 0.9.25 conflicts with Busco over boost
+  - python=3.6.7 # 3.7.x conflicts with Nanolyse
+  - r=3.6
+  - biopython=1.74
   - krona=2.7.1
-  - conda-forge::r-markdown=0.8
-  - r-ggplot2=3.1.0
+  - conda-forge::r-markdown=1.0
+  - r-ggplot2=3.2.0
   - busco=3.0.2
-  - nanoplot=1.20.0
+  - nanoplot=1.26.3
   - filtlong=0.2.0
   - porechop=0.2.3_seqan2.1.1
   - nanolyse=1.1.0
-  - spades=3.13.0
+  - spades=3.13.1
   - centrifuge=1.0.4_beta
   - cat=4.6
   - kraken2=2.0.8_beta
diff --git a/main.nf b/main.nf
index 56c65476..a4d02f62 100644
--- a/main.nf
+++ b/main.nf
@@ -984,7 +984,7 @@ process busco {
     script:
     if( workflow.profile.toString().indexOf("conda") == -1) {
         """
-        cp -r /opt/conda/pkgs/augustus-3.3-pl526hcfae127_4/config augustus_config/
+        cp -r /opt/conda/pkgs/augustus*/config augustus_config/
         export AUGUSTUS_CONFIG_PATH=augustus_config
 
         run_BUSCO.py \

From b2b37801fabf83a48db9a408d829c01b44813f92 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Hadrien=20Gourl=C3=A9?= <gourlehadrien@gmail.com>
Date: Fri, 2 Aug 2019 09:40:07 +0200
Subject: [PATCH 079/105] usage

---
 docs/usage.md | 98 +++++++++++++++++++++++++++------------------------
 1 file changed, 51 insertions(+), 47 deletions(-)

diff --git a/docs/usage.md b/docs/usage.md
index ab0e4ba9..27d60423 100644
--- a/docs/usage.md
+++ b/docs/usage.md
@@ -14,8 +14,11 @@
     - [`none`](#none)
   - [`--reads`](#--reads)
   - [`--singleEnd`](#--singleend)
+  - [`--manifest`](#--manifest)
 - [Optional Arguments](#optional-arguments)
   - [Trimming Options](#trimming-options)
+  - [Trimming Options for long reads](#trimming-options-for-long-reads)
+  - [Taxonomic classification](#taxonomic-classification)
   - [Binning Options](#binning-options)
 - [Job Resources](#job-resources)
 - [Automatic resubmission](#automatic-resubmission)
@@ -47,8 +50,6 @@ It is recommended to limit the Nextflow Java virtual machines memory. We recomme
 NXF_OPTS='-Xms1g -Xmx4g'
 ```
 
-<!-- TODO nf-core: Document required command line parameters to run the pipeline-->
-
 ## Running the pipeline
 
 The typical command for running the pipeline is as follows:
@@ -107,8 +108,6 @@ If `-profile` is not specified at all the pipeline will be run locally and expec
   - A profile with a complete configuration for automated testing
   - Includes links to test data so needs no other parameters
 
-<!-- TODO nf-core: Document required command line parameters -->
-
 ### `--reads`
 
 Use this to specify the location of your input FastQ files. For example:
@@ -135,71 +134,73 @@ By default, the pipeline expects paired-end data. If you have single-end data, y
 
 It is not possible to run a mixture of single-end and paired-end files in one run.
 
-## Reference genomes
+### `--manifest`
 
-The pipeline config files come bundled with paths to the illumina iGenomes reference index files. If running with docker or AWS, the configuration is set up to use the [AWS-iGenomes](https://ewels.github.io/AWS-iGenomes/) resource.
+The pipeline has support for hybrid (with long and short reads) assembly, with the `--manifest` option.
+The option take a tab-separated file with 4 headerless columns: Sample_Id, Long_Reads, Short_Reads_1, Short_Reads_2
+Only one file path per entry is allowed, and single-end short reads are not supported.
 
-### `--genome` (using iGenomes)
+## Trimming options
 
-There are 31 different species supported in the iGenomes references. To run the pipeline, you must specify which to use with the `--genome` flag.
+### `--adapter_forward`
 
-You can find the keys to specify the genomes in the [iGenomes config file](../conf/igenomes.config). Common genomes that are supported are:
+Sequence of 3' adapter to remove in the forward reads
 
-- Human
-  - `--genome GRCh37`
-- Mouse
-  - `--genome GRCm38`
-- _Drosophila_
-  - `--genome BDGP6`
-- _S. cerevisiae_
-  - `--genome 'R64-1-1'`
+### `--adapter_reverse`
 
-> There are numerous others - check the config file for more.
+Sequence of 3' adapter to remove in the reverse reads
 
-Note that you can use the same configuration setup to save sets of reference files for your own use, even if they are not part of the iGenomes resource. See the [Nextflow documentation](https://www.nextflow.io/docs/latest/config.html) for instructions on where to save such a file.
+### `--mean_quality`
 
-The syntax for this reference configuration is as follows:
+Mean qualified quality value for keeping read (default: 15)
 
-<!-- TODO nf-core: Update reference genome example according to what is needed -->
+### `--trimming_quality`
 
-```nextflow
-params {
-  genomes {
-    'GRCh37' {
-      fasta   = '<path to the genome fasta file>' // Used if no star index given
-    }
-    // Any number of additional genomes, key is used with --genome
-  }
-}
-```
+Trimming quality value for the sliding window (default: 15)
 
-<!-- TODO nf-core: Describe reference path flags -->
+### `--keep_phix`
 
-### `--fasta`
+Keep reads similar to the Illumina internal standard PhiX genome (default: false)
 
-If you prefer, you can specify the full path to your reference genome when you run the pipeline:
+## Trimming options for long reads
 
-### `--igenomesIgnore`
+### `--skip_adapter_trimming`
 
-Do not load `igenomes.config` when running the pipeline. You may choose this option if you observe clashes between custom parameters and those supplied in `igenomes.config`.
+Skip removing adapter sequences from long reads
 
-## Trimming options
+### `--longreads_min_length`
 
-### `--adapter_forward`
+Discard any read which is shorter than this value (default: 1000)
 
-Sequence of 3' adapter to remove in the forward reads
+### `--longreads_keep_percent`
 
-### `--adapter_reverse`
+Keep this percent of bases (default: 90)
 
-Sequence of 3' adapter to remove in the reverse reads
+### `--longreads_length_weight`
 
-### `--mean_quality`
+The higher the more important is read length when choosing the best reads (default: 10)
 
-Mean qualified quality value for keeping read (default: 15)
+### `--keep_lambda`
 
-### `--trimming_quality`
+Keep reads similar to the ONT internal standard Escherichia virus Lambda genome (default: false)
 
-Trimming quality value for the sliding window (default: 15)
+## Taxonomic classification
+
+Taxonomic classification is disabled by default.
+You have to specify one of the options below to activate it.
+
+### `--centrifuge_db`
+
+Database for taxonomic binning with centrifuge (default: none). E.g. "ftp://ftp.ccb.jhu.edu/pub/infphilo/centrifuge/data/p_compressed+h+v.tar.gz"
+
+### `--kraken2_db`
+
+Database for taxonomic binning with kraken2 (default: none). E.g. "ftp://ftp.ccb.jhu.edu/pub/data/kraken2_dbs/minikraken2_v2_8GB_201904_UPDATE.tgz"
+
+### `--cat_db`
+
+Database for taxonomic classification of metagenome assembled genomes (default: none). E.g. "tbb.bio.uu.nl/bastiaan/CAT*prepare/CAT_prepare_20190108.tar.gz"
+The zipped file needs to contain a folder named "\_taxonomy*" and "_CAT_database_" that hold the respective files.
 
 ## Binning options
 
@@ -207,6 +208,11 @@ Trimming quality value for the sliding window (default: 15)
 
 Minimum contig size to be considered for binning (default: 1500)
 
+### `--busco_reference`
+
+Download path for BUSCO database, available databases are listed here: https://busco.ezlab.org/
+(default: https://busco.ezlab.org/datasets/bacteria_odb9.tar.gz)
+
 ## Job resources
 
 ### Automatic resubmission
@@ -237,8 +243,6 @@ Please make sure to also set the `-w/--work-dir` and `--outdir` parameters to a
 
 ## Other command line parameters
 
-<!-- TODO nf-core: Describe any other command line flags here -->
-
 ### `--outdir`
 
 The output directory where the results will be saved.

From 9b178730dce75a6d4867920f38b1b4e970c4452f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Hadrien=20Gourl=C3=A9?= <gourlehadrien@gmail.com>
Date: Fri, 2 Aug 2019 14:09:24 +0200
Subject: [PATCH 080/105] describe output files

---
 docs/output.md | 156 +++++++++++++++++++++++++++++++++++++------------
 1 file changed, 119 insertions(+), 37 deletions(-)

diff --git a/docs/output.md b/docs/output.md
index aa991e02..2a7944dd 100644
--- a/docs/output.md
+++ b/docs/output.md
@@ -7,16 +7,18 @@ This document describes the output produced by the pipeline. Most of the plots a
 The pipeline is built using [Nextflow](https://www.nextflow.io/)
 and processes data using the following steps:
 
-- [FastQC](#fastqc) - read quality control
-- [fastp](#fastp) - read trimming
-- [megahit](#megahit) - assembly
-- [quast](#quast) - assembly quality report
+- [Quality trimming and QC](#quality-trimming-and-qc) of input reads
+- [Taxonomic classification](#taxonomic-classification) of trimmed reads
+- [Assembly](#assembly) of trimmed reads
+- [Binning](#binning) of assembled contigs
 - [MultiQC](#multiqc) - aggregate report, describing results of the whole pipeline
-- [metabat](#metabat) - recovered draft genome bins from assembled metagenomes
-- [checkm](#checkm) - genome bins quality control and merging of compatible bins
-- [refinem](#refinem) - improving metagenome-assembled genome bins
 
-## FastQC
+## Quality trimming and QC
+
+These steps trim away the adapter sequences present in input reads, trims away bad quality bases and sicard reads that are too short.
+It also removes sequencing controls, such as PhiX or the Lambda phage, as well as runs FastQC for visualising the general quality metrics of the sequencing runs before and after trimming.
+
+### FastQC
 
 [FastQC](http://www.bioinformatics.babraham.ac.uk/projects/fastqc/) gives general quality metrics about your reads. It provides information about the quality score distribution across your reads, the per base sequence content (%T/A/G/C). You get information about adapter contamination and other overrepresented sequences.
 
@@ -24,60 +26,140 @@ For further reading and documentation see the [FastQC help](http://www.bioinform
 
 > **NB:** The FastQC plots displayed in the MultiQC report shows both _untrimmed_ and _trimmed_ reads.
 
-**Output directory: `results/fastqc`**
+**Output directory: `results/QC_shortreads/fastqc`**
 
 - `sample_fastqc.html`
   - FastQC report, containing quality metrics for your untrimmed raw fastq files
-- `zips/sample_fastqc.zip`
-  - zip file containing the FastQC report, tab-delimited data file and plot images
 
-## fastp
+### fastp
 
 [fastp](https://github.com/OpenGene/fastp) is a all-in-one fastq preprocessor for read/adapter trimming and quality control. It is used in this pipeline for trimming adapter sequences and discard low-quality reads.
 
-**Output directory: `None`**
+**Output directory: `results/QC_shortreads/fastp`**
 
-- The trimmed reads are not included in the output
+- The trimmed reads are not included in the output by default, but the `--keep_trimmed` will store the trimmed reads in this directory
 
-## MultiQC
+### remove_phix
 
-[MultiQC](http://multiqc.info) is a visualisation tool that generates a single HTML report summarising all samples in your project. Most of the pipeline QC results are visualised in the report and further statistics are available in within the report data directory.
+The pipeline uses bowtie2 to map the reads against PhiX and removes mapped reads.
 
-The pipeline has special steps which allow the software versions used to be reported in the MultiQC output for future traceability.
+**Output directory: `results/QC_shortreads/remove_phix`**
 
-**Output directory: `results/multiqc`**
+- Contains a brief log file indicating how many reads have been removed.
 
-- `Project_multiqc_report.html`
-  - MultiQC report - a standalone HTML file that can be viewed in your web browser
-- `Project_multiqc_data/`
-  - Directory containing parsed statistics from the different tools used in the pipeline
+### keep_lambda
 
-For more information about how to use MultiQC reports, see <http://multiqc.info>
+The pipeline uses Nanolyse to map the reads against the Lambda phage and removes mapped reads.
+
+**Output directory: `results/QC_longreads/NanoLyse`**
+
+- Contains a brief log file indicating how many reads have been removed.
+
+### Filtlong and porechop
+
+The pipeline uses filtlong and porechop to perform quality control of the long reads that are eventually provided with the `--manifest` option.
+
+**Output directory: `results/QC_longreads/NanoPlot_${sample_name}`**
+
+- Contains various metrics and plots about the quality and length distribution of long reads, as well as an html report generated by nanoplot
+
+For more information about Nanoplot see the [online documentation](https://github.com/wdecoster/NanoPlot)
+
+## Taxonomic Classification
+
+### Kraken
+
+Kraken2 classifies reads using a k-mer based approach as well as assigns taxonomy using a Lowest Common Ancestor (LCA) algorithm.
+
+**Output directory: `results/Taxonomy/kraken2/${sample_name}`**
+
+- `*_kraken2.report`: Classification in the Kraken report format. See the [kraken manual](http://ccb.jhu.edu/software/kraken/MANUAL.html#sample-reports) for more details
+- `taxonomy.krona.html`: an interactive pie chart produced by [KronaTools](https://github.com/marbl/Krona/wiki)
+
+### Centrifuge
+
+Centrifuge is commonly used for the classification of DNA sequences from microbial samples. It uses an indexing scheme based on the Burrows-Wheeler transform (BWT) and the Ferragina-Manzini (FM) index.
+
+More information on the [Centrifuge](https://ccb.jhu.edu/software/centrifuge/) website
+
+**Output directory: `results/Taxonomy/centrifuge/${sample_name}`**
+
+- `report.txt`: Tab-delimited result file. See the [centrifuge manual](https://ccb.jhu.edu/software/centrifuge/manual.shtml#centrifuge-classification-output) for information about the fields
+- `kreport.txt`: Classification in the Kraken report format. See the [kraken manual](http://ccb.jhu.edu/software/kraken/MANUAL.html#sample-reports) for more details
+- `taxonomy.krona.html`: an interactive pie chart produced by [KronaTools](https://github.com/marbl/Krona/wiki)
+
+### CAT
+
+<!-- TODO: CAT output -->
 
-## Megahit
+## Assembly
 
-[megahit](https://github.com/voutcn/megahit) is a single node assembler for large and complex metagenomics HTS reads.
+Trimmed (short) reads are assembled with both megahit and SPAdes. Hybrid assembly is only supported by SPAdes.
 
-**Output directory: `results/megahit`**
+### Megahit
 
-- `sample.fasta`
-  - metagenome assembly in fasta format
+[megahit](https://github.com/voutcn/megahit) is a single node assembler for large and complex metagenomics short reads.
 
-## Quast
+**Output directory: `results/MEGAHIT`**
+
+- `${sample}.contigs.fasta`: metagenome assembly in fasta format
+- `${sample}_QC/`: directory containing QUAST files
+
+### SPAdes
+
+[SPAdes](http://cab.spbu.ru/software/spades/) was originally a single genome assembler that later added support for assembling metagenomes.
+
+**Output directory: `results/SPAdes`**
+
+- `${sample}_contigs.fasta`: metagenome assembly in fasta format
+- `${sample}_QC/`: directory containing QUAST files
+
+### SPAdesHybrid
+
+SPAdesHybrid is a part of the SPAdes software and is used when the user provides both long and short reads.
+
+**Output directory: `results/SPAdesHybrid`**
+
+- `${sample}_contigs.fasta`: metagenome assembly in fasta format
+- `${sample}_QC/`: directory containing QUAST files
+
+### Quast
 
 [quast](http://cab.spbu.ru/software/quast/) is a tool that evaluates genome and metagenome assemblies by computing various metrics
 
 **output directory:** `None`
 
-- The quast output is included in the multiqc report
+- The quast output is included in the multiqc report, as well as in the assembly directories themselves
+
+## Binning
+
+### Metabat
+
+[metabat](https://bitbucket.org/berkeleylab/metabat) recovers genome bins (that is, contigs/scaffolds that all belongs to a same organism) from metagenome assemblies. Additionally, Quast is run again on all the genome bins.
+
+**output directory: `results/GenomeBinning/MetaBat2`**
+
+- `*.fa`: Genome bins retrieved from the different input assemblies
 
-## Metabat
+### Busco
 
-[metabat](https://bitbucket.org/berkeleylab/metabat) recovers genome bins (that is, contigs/scaffolds that all bolongs to a same organism) from metagenome assemblies.
+Busco is a tool used to assess the completeness of a genome assembly. It is run on all the genome bins obtained by metabat2
 
-**output directory: `results/metabat`**
+**output directory: `results/GenomeBinning/QC/`**
 
-- `sample.bam`
-  - reads mapped against the megahit assembly
-- `bins/sample_X.fa`
-  - the putative genome bins retrieved by metabat
+- `busco_summary.txt`: A summary tabke of the BUSCO results, with % of marker genes found
+
+## MultiQC
+
+[MultiQC](http://multiqc.info) is a visualisation tool that generates a single HTML report summarising all samples in your project. Most of the pipeline QC results are visualised in the report and further statistics are available in within the report data directory.
+
+The pipeline has special steps which allow the software versions used to be reported in the MultiQC output for future traceability.
+
+**Output directory: `results/multiqc`**
+
+- `Project_multiqc_report.html`
+  - MultiQC report - a standalone HTML file that can be viewed in your web browser
+- `Project_multiqc_data/`
+  - Directory containing parsed statistics from the different tools used in the pipeline
+
+For more information about how to use MultiQC reports, see <http://multiqc.info>

From de9a3ded5f70543b5b2793972ec938d9831b3cea Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Hadrien=20Gourl=C3=A9?= <gourlehadrien@gmail.com>
Date: Tue, 6 Aug 2019 09:23:14 +0200
Subject: [PATCH 081/105] brief description of pipeline as of 1.0.0

---
 CHANGELOG.md | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 77d5c8a4..36e3a754 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -3,3 +3,12 @@
 ## v1.0.0 - [date]
 
 Initial release of nf-core/mag, created with the [nf-core](http://nf-co.re/) template.
+As this release the pipeline will have the following functionailities:
+
+- short and long reads QC (fastp, porechop, filtlong, fastqc)
+- Lambda and PhiX detection and filtering (bowtie2, nanolyse)
+- Taxonomic classification of reads (centrifuge, kraken2)
+- Short read and hybrid assembly (megahit, metaspades)
+- metagenome binning (metabat2)
+- QC of bins (busco, quast)
+- annotation (cat/bat)

From b2d8e3f330e3c286cdda8934e7bad2f3a55f9d8a Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Hadrien=20Gourl=C3=A9?= <gourlehadrien@gmail.com>
Date: Tue, 6 Aug 2019 09:29:39 +0200
Subject: [PATCH 082/105] address pseudo review comments

---
 README.md                       |  2 --
 bin/scrape_software_versions.py | 12 ++++++------
 2 files changed, 6 insertions(+), 8 deletions(-)

diff --git a/README.md b/README.md
index 5bfd2ba9..4a8e5c63 100644
--- a/README.md
+++ b/README.md
@@ -1,7 +1,5 @@
 # ![mag](https://raw.githubusercontent.com/nf-core/mag/master/docs/images/mag_logo.png)
 
-## nf-core/mag
-
 **Assembly, binning and annotation of metagenomes**.
 
 [![Build Status](https://travis-ci.com/nf-core/mag.svg?branch=master)](https://travis-ci.com/nf-core/mag)
diff --git a/bin/scrape_software_versions.py b/bin/scrape_software_versions.py
index bca53641..1addaf9f 100755
--- a/bin/scrape_software_versions.py
+++ b/bin/scrape_software_versions.py
@@ -7,17 +7,17 @@
     'nf-core/mag': ['v_pipeline.txt', r"(\S+)"],
     'Nextflow': ['v_nextflow.txt', r"(\S+)"],
     'MultiQC': ['v_multiqc.txt', r"multiqc, version (\S+)"],
-    'fastqc': ['v_fastqc.txt', r"FastQC v(\S+)"],
-    'fastp': ['v_fastp.txt', r"fastp (\S+)"],
-    'megahit': ['v_megahit.txt', r"MEGAHIT v(\S+)"],
-    'metabat': ['v_metabat.txt', r"version (\S+)"],
+    'FastQC': ['v_fastqc.txt', r"FastQC v(\S+)"],
+    'Fastp': ['v_fastp.txt', r"fastp (\S+)"],
+    'Megahit': ['v_megahit.txt', r"MEGAHIT v(\S+)"],
+    'Metabat': ['v_metabat.txt', r"version (\S+)"],
     'NanoPlot': ['v_nanoplot.txt', r"NanoPlot (\S+)"],
     'Filtlong': ['v_filtlong.txt', r"Filtlong v(\S+)"],
-    'porechop': ['v_porechop.txt', r"(\S+)"],
+    'Porechop': ['v_porechop.txt', r"(\S+)"],
     'NanoLyse': ['v_nanolyse.txt', r"NanoLyse (\S+)"],
     'SPAdes': ['v_spades.txt', r"SPAdes v(\S+)"],
     'BUSCO': ['v_busco.txt', r"BUSCO (\S+)"],
-    'centrifuge': ['v_centrifuge.txt', r"centrifuge-class version (\S+)"],
+    'Centrifuge': ['v_centrifuge.txt', r"centrifuge-class version (\S+)"],
     'Kraken2': ['v_kraken2.txt', r"Kraken version (\S+)-beta"],
     'Quast': ['v_quast.txt', r"QUAST v(\S+)"],
     'CAT': ['v_cat.txt', r"CAT v(\S+)"]

From 8bfae254032d8bcf880827d00e46bdf21e2f4b05 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Hadrien=20Gourl=C3=A9?= <gourlehadrien@gmail.com>
Date: Fri, 13 Sep 2019 09:24:40 +0200
Subject: [PATCH 083/105] mapping as a separate process

---
 main.nf | 63 ++++++++++++++++++++++++++++++++++++++-------------------
 1 file changed, 42 insertions(+), 21 deletions(-)

diff --git a/main.nf b/main.nf
index a4d02f62..49dc371c 100644
--- a/main.nf
+++ b/main.nf
@@ -885,24 +885,29 @@ process quast {
     """
 }
 
+// TODO
+// input tests for bowtie2
+// what we need is (i) assemblies, sample, assembler
+// NO NEED FOR READS IN METABAT OUTPUT.
+// // (ii) reads
+// Channel
+//     .from(assembly_spades_to_metabat)
+//     .mix(assembly_megahit_to_metabat)
+//     .mix(assembly_spadeshybrid_to_metabat)
+//     .into { bowtie2_input }
 
 
 /*
  * STEP 3 - Binning
  */
-process metabat {
+process bowtie2 {
     tag "$assembler-$sample"
-    publishDir "${params.outdir}/", mode: 'copy',
-        saveAs: {filename -> (filename.indexOf(".bam") == -1 && filename.indexOf(".fastq.gz") == -1) ? "GenomeBinning/$filename" : null}
 
     input:
     set val(assembler), val(sample), file(assembly), file(reads) from assembly_spades_to_metabat.mix(assembly_megahit_to_metabat).mix(assembly_spadeshybrid_to_metabat)
-    val(min_size) from params.min_contig_size
 
     output:
-    set val(assembler), val(sample), file("MetaBAT2/*") into metabat_bins mode flatten
-    set val(assembler), val(sample), file("MetaBAT2/*") into metabat_bins_for_cat
-    set val(assembler), val(sample), file("MetaBAT2/*"), file(reads) into metabat_bins_quast_bins
+    set val(assembler), val(sample), file(assembly), file(reads), file("${assembler}-${sample}.bam"), file("${assembler}-${sample}.bam.bai") into assembly_mapping_for_metabat
 
     when:
     !params.skip_binning
@@ -916,13 +921,6 @@ process metabat {
             samtools view -@ "${task.cpus}" -bS | \
             samtools sort -@ "${task.cpus}" -o "${name}.bam"
         samtools index "${name}.bam"
-        jgi_summarize_bam_contig_depths --outputDepth depth.txt "${name}.bam"
-        metabat2 -t "${task.cpus}" -i "${assembly}" -a depth.txt -o "MetaBAT2/${name}" -m ${min_size}
-
-        #if bin foolder is empty
-        if [ -z \"\$(ls -A MetaBAT2)\" ]; then 
-            cp ${assembly} MetaBAT2/${assembler}-${assembly}
-        fi
         """
     } else {
         """
@@ -931,16 +929,39 @@ process metabat {
             samtools view -@ "${task.cpus}" -bS | \
             samtools sort -@ "${task.cpus}" -o "${name}.bam"
         samtools index "${name}.bam"
-        jgi_summarize_bam_contig_depths --outputDepth depth.txt "${name}.bam"
-        metabat2 -t "${task.cpus}" -i "${assembly}" -a depth.txt -o "MetaBAT2/${name}" -m ${min_size}
-
-        #if bin foolder is empty
-        if [ -z \"\$(ls -A MetaBAT2)\" ]; then 
-            cp ${assembly} MetaBAT2/${assembler}-${assembly}
-        fi
         """
     }
+}
+
+
+process metabat {
+    tag "$assembler-$sample"
+    publishDir "${params.outdir}/", mode: 'copy',
+        saveAs: {filename -> (filename.indexOf(".bam") == -1 && filename.indexOf(".fastq.gz") == -1) ? "GenomeBinning/$filename" : null}
+
+    input:
+    set val(assembler), val(sample), file(assembly), file(reads), file(bam), file(index) from assembly_mapping_for_metabat
+    val(min_size) from params.min_contig_size
+
+    output:
+    set val(assembler), val(sample), file("MetaBAT2/*") into metabat_bins mode flatten
+    set val(assembler), val(sample), file("MetaBAT2/*") into metabat_bins_for_cat
+    set val(assembler), val(sample), file("MetaBAT2/*"), file(reads) into metabat_bins_quast_bins
 
+    when:
+    !params.skip_binning
+
+    script:
+    def name = "${assembler}-${sample}"
+    """
+    jgi_summarize_bam_contig_depths --outputDepth depth.txt "${bam}"
+    metabat2 -t "${task.cpus}" -i "${assembly}" -a depth.txt -o "MetaBAT2/${name}" -m ${min_size}
+
+    #if bin folder is empty
+    if [ -z \"\$(ls -A MetaBAT2)\" ]; then 
+        cp ${assembly} MetaBAT2/${assembler}-${assembly}
+    fi
+    """
 }
 
 
From b34af800573f69bd6af1f6799a1738affa4085b8 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Hadrien=20Gourl=C3=A9?= <gourlehadrien@gmail.com>
Date: Wed, 4 Dec 2019 13:15:01 +0100
Subject: [PATCH 084/105] mv reads outside of assembly output

---
 main.nf | 24 ++++++++++++++----------
 1 file changed, 14 insertions(+), 10 deletions(-)

diff --git a/main.nf b/main.nf
index 49dc371c..28f7c668 100644
--- a/main.nf
+++ b/main.nf
@@ -557,7 +557,7 @@ if(!params.keep_phix) {
         set val(name), file(reads), file(genome), file(db) from trimmed_reads.combine(phix_db)
 
         output:
-        set val(name), file("*.fastq.gz") into (trimmed_reads_megahit, trimmed_reads_metabat, trimmed_reads_fastqc, trimmed_sr_spadeshybrid, trimmed_reads_spades, trimmed_reads_centrifuge, trimmed_reads_kraken2)
+        set val(name), file("*.fastq.gz") into (trimmed_reads_megahit, trimmed_reads_metabat, trimmed_reads_fastqc, trimmed_sr_spadeshybrid, trimmed_reads_spades, trimmed_reads_centrifuge, trimmed_reads_kraken2, trimmed_reads_bowtie2)
         file("${name}_remove_phix_log.txt")
 
         script:
@@ -766,7 +766,7 @@ process megahit {
 
     output:
     set val("MEGAHIT"), val("$name"), file("MEGAHIT/${name}.contigs.fa") into assembly_megahit_to_quast
-    set val("MEGAHIT"), val("$name"), file("MEGAHIT/${name}.contigs.fa"), file(reads) into assembly_megahit_to_metabat
+    set val("MEGAHIT"), val("$name"), file("MEGAHIT/${name}.contigs.fa") into assembly_megahit_to_metabat
     file("MEGAHIT/*.log")
 
     when:
@@ -806,7 +806,7 @@ process spadeshybrid {
     output:
     set id, val("SPAdesHybrid"), file("${id}_graph.gfa") into assembly_graph_spadeshybrid
     set val("SPAdesHybrid"), val("$id"), file("${id}_scaffolds.fasta") into assembly_spadeshybrid_to_quast
-    set val("SPAdesHybrid"), val("$id"), file("${id}_scaffolds.fasta"), file(sr) into assembly_spadeshybrid_to_metabat
+    set val("SPAdesHybrid"), val("$id"), file("${id}_scaffolds.fasta") into assembly_spadeshybrid_to_metabat
     file("${id}_contigs.fasta")
     file("${id}_log.txt")
 
@@ -842,7 +842,7 @@ process spades {
     output:
     set id, val("SPAdes"), file("${id}_graph.gfa") into assembly_graph_spades
     set val("SPAdes"), val("$id"), file("${id}_scaffolds.fasta") into assembly_spades_to_quast
-    set val("SPAdes"), val("$id"), file("${id}_scaffolds.fasta"), file(sr) into assembly_spades_to_metabat
+    set val("SPAdes"), val("$id"), file("${id}_scaffolds.fasta") into assembly_spades_to_metabat
     file("${id}_contigs.fasta")
     file("${id}_log.txt")
 
@@ -890,11 +890,15 @@ process quast {
 // what we need is (i) assemblies, sample, assembler
 // NO NEED FOR READS IN METABAT OUTPUT.
 // // (ii) reads
-// Channel
-//     .from(assembly_spades_to_metabat)
-//     .mix(assembly_megahit_to_metabat)
-//     .mix(assembly_spadeshybrid_to_metabat)
-//     .into { bowtie2_input }
+Channel
+    .from(assembly_spades_to_metabat)
+    .mix(assembly_megahit_to_metabat)
+    .mix(assembly_spadeshybrid_to_metabat)
+    .combine(trimmed_reads_bowtie2)
+    .dump()
+    .into { test_p; bowtie2_input }
+
+println(test_p)
 
 
 /*
@@ -904,7 +908,7 @@ process bowtie2 {
     tag "$assembler-$sample"
 
     input:
-    set val(assembler), val(sample), file(assembly), file(reads) from assembly_spades_to_metabat.mix(assembly_megahit_to_metabat).mix(assembly_spadeshybrid_to_metabat)
+    set val(assembler), val(sample), file(assembly), val(_), file(reads) from bowtie2_input
 
     output:
     set val(assembler), val(sample), file(assembly), file(reads), file("${assembler}-${sample}.bam"), file("${assembler}-${sample}.bam.bai") into assembly_mapping_for_metabat

From aa47f77c032e2c245a360040b75ac391c9f4a8dc Mon Sep 17 00:00:00 2001
From: MaxUlysse <max.u.garcia@gmail.com>
Date: Wed, 4 Dec 2019 16:04:19 +0100
Subject: [PATCH 085/105] multiple samples

---
 conf/test.config | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/conf/test.config b/conf/test.config
index dfb769fa..63c407bd 100644
--- a/conf/test.config
+++ b/conf/test.config
@@ -18,7 +18,8 @@ params {
   params.temp_dir = "./tests/tmp_dir"
   singleEnd = false
   readPaths = [
-    ['test_minigut', ['https://github.com/nf-core/test-datasets/raw/mag/test_data/test_minigut_R1.fastq.gz', 'https://github.com/HadrienG/test-datasets/raw/mag/test_data/test_minigut_R2.fastq.gz']]
+    ['test_minigut', ['https://github.com/nf-core/test-datasets/raw/mag/test_data/test_minigut_R1.fastq.gz', 'https://github.com/HadrienG/test-datasets/raw/mag/test_data/test_minigut_R2.fastq.gz']],
+    ['test_minigut_sample2', ['https://github.com/nf-core/test-datasets/raw/mag/test_data/test_minigut_sample2_R1.fastq.gz', 'https://github.com/HadrienG/test-datasets/raw/mag/test_data/test_minigut_sample2_R2.fastq.gz']]
   ]
   centrifuge_db = "https://github.com/nf-core/test-datasets/raw/mag/test_data/minigut_cf.tar.gz"
   kraken2_db = "https://github.com/nf-core/test-datasets/raw/mag/test_data/minigut_kraken.tgz"

From 0f155a542f2ad707a70a6df225f5da8646847a04 Mon Sep 17 00:00:00 2001
From: MaxUlysse <max.u.garcia@gmail.com>
Date: Wed, 4 Dec 2019 16:04:31 +0100
Subject: [PATCH 086/105] channel magic

---
 main.nf | 38 ++++++++++++++++++--------------------
 1 file changed, 18 insertions(+), 20 deletions(-)

diff --git a/main.nf b/main.nf
index 28f7c668..0670adcd 100644
--- a/main.nf
+++ b/main.nf
@@ -885,21 +885,16 @@ process quast {
     """
 }
 
-// TODO
-// input tests for bowtie2
-// what we need is (i) assemblies, sample, assembler
-// NO NEED FOR READS IN METABAT OUTPUT.
-// // (ii) reads
-Channel
-    .from(assembly_spades_to_metabat)
-    .mix(assembly_megahit_to_metabat)
-    .mix(assembly_spadeshybrid_to_metabat)
-    .combine(trimmed_reads_bowtie2)
-    .dump()
-    .into { test_p; bowtie2_input }
+bowtie2_input = Channel.empty()
+
+assembly_all_to_metabat = assembly_spades_to_metabat.mix(assembly_megahit_to_metabat,assembly_spadeshybrid_to_metabat)
+
+(assembly_all_to_metabat, assembly_all_to_metabat_copy) = assembly_all_to_metabat.into(2)
 
-println(test_p)
+bowtie2_input = assembly_all_to_metabat
+    .combine(trimmed_reads_bowtie2)
 
+(bowtie2_input, bowtie2_input_copy) = bowtie2_input.into(2)
 
 /*
  * STEP 3 - Binning
@@ -908,16 +903,16 @@ process bowtie2 {
     tag "$assembler-$sample"
 
     input:
-    set val(assembler), val(sample), file(assembly), val(_), file(reads) from bowtie2_input
+    set val(assembler), val(sample), file(assembly), val(sampleToMap), file(reads) from bowtie2_input
 
     output:
-    set val(assembler), val(sample), file(assembly), file(reads), file("${assembler}-${sample}.bam"), file("${assembler}-${sample}.bam.bai") into assembly_mapping_for_metabat
+    set val(assembler), val(sample), file("${assembler}-${sample}-${sampleToMap}.bam"), file("${assembler}-${sample}-${sampleToMap}.bam.bai") into assembly_mapping_for_metabat
 
     when:
     !params.skip_binning
 
     script:
-    def name = "${assembler}-${sample}"
+    def name = "${assembler}-${sample}-${sampleToMap}"
     if ( !params.singleEnd ) {
         """
         bowtie2-build --threads "${task.cpus}" "${assembly}" ref
@@ -937,6 +932,9 @@ process bowtie2 {
     }
 }
 
+assembly_mapping_for_metabat = assembly_mapping_for_metabat.groupTuple(by:[0,1]).join(assembly_all_to_metabat_copy)
+
+assembly_mapping_for_metabat = assembly_mapping_for_metabat.dump(tag:'assembly_mapping_for_metabat')
 
 process metabat {
     tag "$assembler-$sample"
@@ -944,13 +942,13 @@ process metabat {
         saveAs: {filename -> (filename.indexOf(".bam") == -1 && filename.indexOf(".fastq.gz") == -1) ? "GenomeBinning/$filename" : null}
 
     input:
-    set val(assembler), val(sample), file(assembly), file(reads), file(bam), file(index) from assembly_mapping_for_metabat
+    set val(assembler), val(sample), file(bam), file(index), val(sampleCopy), file(assembly) from assembly_mapping_for_metabat
     val(min_size) from params.min_contig_size
 
     output:
     set val(assembler), val(sample), file("MetaBAT2/*") into metabat_bins mode flatten
     set val(assembler), val(sample), file("MetaBAT2/*") into metabat_bins_for_cat
-    set val(assembler), val(sample), file("MetaBAT2/*"), file(reads) into metabat_bins_quast_bins
+    set val(assembler), val(sample), file("MetaBAT2/*") into metabat_bins_quast_bins
 
     when:
     !params.skip_binning
@@ -958,7 +956,7 @@ process metabat {
     script:
     def name = "${assembler}-${sample}"
     """
-    jgi_summarize_bam_contig_depths --outputDepth depth.txt "${bam}"
+    jgi_summarize_bam_contig_depths --outputDepth depth.txt ${bam}
     metabat2 -t "${task.cpus}" -i "${assembly}" -a depth.txt -o "MetaBAT2/${name}" -m ${min_size}
 
     #if bin folder is empty
@@ -1100,7 +1098,7 @@ process quast_bins {
     publishDir "${params.outdir}/GenomeBinning/QC/", mode: 'copy'
 
     input:
-    set val(assembler), val(sample), file(assembly), file(reads) from metabat_bins_quast_bins
+    set val(assembler), val(sample), file(assembly) from metabat_bins_quast_bins
 
     output:
     file("QUAST/*")

From a9b23422c2ab938ecd764160678f9831a8194a0b Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Hadrien=20Gourl=C3=A9?= <gourlehadrien@gmail.com>
Date: Fri, 6 Dec 2019 08:48:07 +0100
Subject: [PATCH 087/105] clarify text profiles

---
 docs/usage.md | 102 +++++++++++++++++++++++++-------------------------
 1 file changed, 51 insertions(+), 51 deletions(-)

diff --git a/docs/usage.md b/docs/usage.md
index 27d60423..2a264b37 100644
--- a/docs/usage.md
+++ b/docs/usage.md
@@ -2,43 +2,43 @@
 
 ## Table of contents
 
-- [Introduction](#general-nextflow-info)
-- [Running the pipeline](#running-the-pipeline)
-- [Updating the pipeline](#updating-the-pipeline)
-- [Reproducibility](#reproducibility)
-- [Main arguments](#main-arguments)
-  - [`-profile`](#-profile-single-dash)
-    - [`docker`](#docker)
-    - [`awsbatch`](#awsbatch)
-    - [`standard`](#standard)
-    - [`none`](#none)
-  - [`--reads`](#--reads)
-  - [`--singleEnd`](#--singleend)
-  - [`--manifest`](#--manifest)
-- [Optional Arguments](#optional-arguments)
-  - [Trimming Options](#trimming-options)
-  - [Trimming Options for long reads](#trimming-options-for-long-reads)
-  - [Taxonomic classification](#taxonomic-classification)
-  - [Binning Options](#binning-options)
-- [Job Resources](#job-resources)
-- [Automatic resubmission](#automatic-resubmission)
-- [Custom resource requests](#custom-resource-requests)
-- [AWS batch specific parameters](#aws-batch-specific-parameters)
-  - [`-awsbatch`](#-awsbatch)
-  - [`--awsqueue`](#--awsqueue)
-  - [`--awsregion`](#--awsregion)
-- [Other command line parameters](#other-command-line-parameters)
-  - [`--outdir`](#--outdir)
-  - [`--email`](#--email)
-  - [`-name`](#-name-single-dash)
-  - [`-resume`](#-resume-single-dash)
-  - [`-c`](#-c-single-dash)
-  - [`--max_memory`](#--max_memory)
-  - [`--max_time`](#--max_time)
-  - [`--max_cpus`](#--max_cpus)
-  - [`--plaintext_emails`](#--plaintext_emails)
-  - [`--sampleLevel`](#--sampleLevel)
-  - [`--multiqc_config`](#--multiqc_config)
+-   [Introduction](#general-nextflow-info)
+-   [Running the pipeline](#running-the-pipeline)
+-   [Updating the pipeline](#updating-the-pipeline)
+-   [Reproducibility](#reproducibility)
+-   [Main arguments](#main-arguments)
+    -   [`-profile`](#-profile-single-dash)
+        -   [`docker`](#docker)
+        -   [`awsbatch`](#awsbatch)
+        -   [`standard`](#standard)
+        -   [`none`](#none)
+    -   [`--reads`](#--reads)
+    -   [`--singleEnd`](#--singleend)
+    -   [`--manifest`](#--manifest)
+-   [Optional Arguments](#optional-arguments)
+    -   [Trimming Options](#trimming-options)
+    -   [Trimming Options for long reads](#trimming-options-for-long-reads)
+    -   [Taxonomic classification](#taxonomic-classification)
+    -   [Binning Options](#binning-options)
+-   [Job Resources](#job-resources)
+-   [Automatic resubmission](#automatic-resubmission)
+-   [Custom resource requests](#custom-resource-requests)
+-   [AWS batch specific parameters](#aws-batch-specific-parameters)
+    -   [`-awsbatch`](#-awsbatch)
+    -   [`--awsqueue`](#--awsqueue)
+    -   [`--awsregion`](#--awsregion)
+-   [Other command line parameters](#other-command-line-parameters)
+    -   [`--outdir`](#--outdir)
+    -   [`--email`](#--email)
+    -   [`-name`](#-name-single-dash)
+    -   [`-resume`](#-resume-single-dash)
+    -   [`-c`](#-c-single-dash)
+    -   [`--max_memory`](#--max_memory)
+    -   [`--max_time`](#--max_time)
+    -   [`--max_cpus`](#--max_cpus)
+    -   [`--plaintext_emails`](#--plaintext_emails)
+    -   [`--sampleLevel`](#--sampleLevel)
+    -   [`--multiqc_config`](#--multiqc_config)
 
 ## General Nextflow info
 
@@ -93,20 +93,20 @@ Use this parameter to choose a configuration profile. Profiles can give configur
 
 If `-profile` is not specified at all the pipeline will be run locally and expects all software to be installed and available on the `PATH`.
 
-- `awsbatch`
-  - A generic configuration profile to be used with AWS Batch.
-- `conda`
-  - A generic configuration profile to be used with [conda](https://conda.io/docs/)
-  - Pulls most software from [Bioconda](https://bioconda.github.io/)
-- `docker`
-  - A generic configuration profile to be used with [Docker](http://docker.com/)
-  - Pulls software from dockerhub: [`nfcore/mag`](http://hub.docker.com/r/nfcore/mag/)
-- `singularity`
-  - A generic configuration profile to be used with [Singularity](http://singularity.lbl.gov/)
-  - Pulls software from DockerHub: [`nfcore/mag`](http://hub.docker.com/r/nfcore/mag/)
-- `test`
-  - A profile with a complete configuration for automated testing
-  - Includes links to test data so needs no other parameters
+-   `awsbatch`
+    -   A generic configuration profile to be used with AWS Batch.
+-   `conda`
+    -   A generic configuration profile to be used with [conda](https://conda.io/docs/)
+    -   Pulls most software from [Bioconda](https://bioconda.github.io/)
+-   `docker`
+    -   A generic configuration profile to be used with [Docker](http://docker.com/)
+    -   Pulls software from dockerhub: [`nfcore/mag`](http://hub.docker.com/r/nfcore/mag/)
+-   `singularity`
+    -   A generic configuration profile to be used with [Singularity](http://singularity.lbl.gov/)
+    -   Pulls software from DockerHub: [`nfcore/mag`](http://hub.docker.com/r/nfcore/mag/)
+-   `test`, `test_hybrid`
+    -   Profiles with a complete configuration for automated testing
+    -   Includes links to test data so needs no other parameters
 
 ### `--reads`
 

From f9af33a56221128a2a2fb0ff596bac357132e972 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Hadrien=20Gourl=C3=A9?= <gourlehadrien@gmail.com>
Date: Fri, 6 Dec 2019 08:50:53 +0100
Subject: [PATCH 088/105] Revert "clarify text profiles"

This reverts commit a9b23422c2ab938ecd764160678f9831a8194a0b.
---
 docs/usage.md | 102 +++++++++++++++++++++++++-------------------------
 1 file changed, 51 insertions(+), 51 deletions(-)

diff --git a/docs/usage.md b/docs/usage.md
index 2a264b37..27d60423 100644
--- a/docs/usage.md
+++ b/docs/usage.md
@@ -2,43 +2,43 @@
 
 ## Table of contents
 
--   [Introduction](#general-nextflow-info)
--   [Running the pipeline](#running-the-pipeline)
--   [Updating the pipeline](#updating-the-pipeline)
--   [Reproducibility](#reproducibility)
--   [Main arguments](#main-arguments)
-    -   [`-profile`](#-profile-single-dash)
-        -   [`docker`](#docker)
-        -   [`awsbatch`](#awsbatch)
-        -   [`standard`](#standard)
-        -   [`none`](#none)
-    -   [`--reads`](#--reads)
-    -   [`--singleEnd`](#--singleend)
-    -   [`--manifest`](#--manifest)
--   [Optional Arguments](#optional-arguments)
-    -   [Trimming Options](#trimming-options)
-    -   [Trimming Options for long reads](#trimming-options-for-long-reads)
-    -   [Taxonomic classification](#taxonomic-classification)
-    -   [Binning Options](#binning-options)
--   [Job Resources](#job-resources)
--   [Automatic resubmission](#automatic-resubmission)
--   [Custom resource requests](#custom-resource-requests)
--   [AWS batch specific parameters](#aws-batch-specific-parameters)
-    -   [`-awsbatch`](#-awsbatch)
-    -   [`--awsqueue`](#--awsqueue)
-    -   [`--awsregion`](#--awsregion)
--   [Other command line parameters](#other-command-line-parameters)
-    -   [`--outdir`](#--outdir)
-    -   [`--email`](#--email)
-    -   [`-name`](#-name-single-dash)
-    -   [`-resume`](#-resume-single-dash)
-    -   [`-c`](#-c-single-dash)
-    -   [`--max_memory`](#--max_memory)
-    -   [`--max_time`](#--max_time)
-    -   [`--max_cpus`](#--max_cpus)
-    -   [`--plaintext_emails`](#--plaintext_emails)
-    -   [`--sampleLevel`](#--sampleLevel)
-    -   [`--multiqc_config`](#--multiqc_config)
+- [Introduction](#general-nextflow-info)
+- [Running the pipeline](#running-the-pipeline)
+- [Updating the pipeline](#updating-the-pipeline)
+- [Reproducibility](#reproducibility)
+- [Main arguments](#main-arguments)
+  - [`-profile`](#-profile-single-dash)
+    - [`docker`](#docker)
+    - [`awsbatch`](#awsbatch)
+    - [`standard`](#standard)
+    - [`none`](#none)
+  - [`--reads`](#--reads)
+  - [`--singleEnd`](#--singleend)
+  - [`--manifest`](#--manifest)
+- [Optional Arguments](#optional-arguments)
+  - [Trimming Options](#trimming-options)
+  - [Trimming Options for long reads](#trimming-options-for-long-reads)
+  - [Taxonomic classification](#taxonomic-classification)
+  - [Binning Options](#binning-options)
+- [Job Resources](#job-resources)
+- [Automatic resubmission](#automatic-resubmission)
+- [Custom resource requests](#custom-resource-requests)
+- [AWS batch specific parameters](#aws-batch-specific-parameters)
+  - [`-awsbatch`](#-awsbatch)
+  - [`--awsqueue`](#--awsqueue)
+  - [`--awsregion`](#--awsregion)
+- [Other command line parameters](#other-command-line-parameters)
+  - [`--outdir`](#--outdir)
+  - [`--email`](#--email)
+  - [`-name`](#-name-single-dash)
+  - [`-resume`](#-resume-single-dash)
+  - [`-c`](#-c-single-dash)
+  - [`--max_memory`](#--max_memory)
+  - [`--max_time`](#--max_time)
+  - [`--max_cpus`](#--max_cpus)
+  - [`--plaintext_emails`](#--plaintext_emails)
+  - [`--sampleLevel`](#--sampleLevel)
+  - [`--multiqc_config`](#--multiqc_config)
 
 ## General Nextflow info
 
@@ -93,20 +93,20 @@ Use this parameter to choose a configuration profile. Profiles can give configur
 
 If `-profile` is not specified at all the pipeline will be run locally and expects all software to be installed and available on the `PATH`.
 
--   `awsbatch`
-    -   A generic configuration profile to be used with AWS Batch.
--   `conda`
-    -   A generic configuration profile to be used with [conda](https://conda.io/docs/)
-    -   Pulls most software from [Bioconda](https://bioconda.github.io/)
--   `docker`
-    -   A generic configuration profile to be used with [Docker](http://docker.com/)
-    -   Pulls software from dockerhub: [`nfcore/mag`](http://hub.docker.com/r/nfcore/mag/)
--   `singularity`
-    -   A generic configuration profile to be used with [Singularity](http://singularity.lbl.gov/)
-    -   Pulls software from DockerHub: [`nfcore/mag`](http://hub.docker.com/r/nfcore/mag/)
--   `test`, `test_hybrid`
-    -   Profiles with a complete configuration for automated testing
-    -   Includes links to test data so needs no other parameters
+- `awsbatch`
+  - A generic configuration profile to be used with AWS Batch.
+- `conda`
+  - A generic configuration profile to be used with [conda](https://conda.io/docs/)
+  - Pulls most software from [Bioconda](https://bioconda.github.io/)
+- `docker`
+  - A generic configuration profile to be used with [Docker](http://docker.com/)
+  - Pulls software from dockerhub: [`nfcore/mag`](http://hub.docker.com/r/nfcore/mag/)
+- `singularity`
+  - A generic configuration profile to be used with [Singularity](http://singularity.lbl.gov/)
+  - Pulls software from DockerHub: [`nfcore/mag`](http://hub.docker.com/r/nfcore/mag/)
+- `test`
+  - A profile with a complete configuration for automated testing
+  - Includes links to test data so needs no other parameters
 
 ### `--reads`
 

From 4d2da86a45a20e808dcdb6d71d9a943b7ffdb8e3 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Hadrien=20Gourl=C3=A9?= <gourlehadrien@gmail.com>
Date: Fri, 6 Dec 2019 08:52:25 +0100
Subject: [PATCH 089/105] clarify text profiles, without screwing up the
 formatting

---
 docs/usage.md | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/docs/usage.md b/docs/usage.md
index 27d60423..25e80bfc 100644
--- a/docs/usage.md
+++ b/docs/usage.md
@@ -104,8 +104,8 @@ If `-profile` is not specified at all the pipeline will be run locally and expec
 - `singularity`
   - A generic configuration profile to be used with [Singularity](http://singularity.lbl.gov/)
   - Pulls software from DockerHub: [`nfcore/mag`](http://hub.docker.com/r/nfcore/mag/)
-- `test`
-  - A profile with a complete configuration for automated testing
+- `test`, `test_hybrid`
+  - Profiles with a complete configuration for automated testing
   - Includes links to test data so needs no other parameters
 
 ### `--reads`

From 49d74078d72d7371b703d348dc786b2f8846af08 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Hadrien=20Gourl=C3=A9?= <gourlehadrien@gmail.com>
Date: Fri, 6 Dec 2019 08:53:54 +0100
Subject: [PATCH 090/105] markdown linting

---
 docs/usage.md | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/docs/usage.md b/docs/usage.md
index 25e80bfc..8b79e31e 100644
--- a/docs/usage.md
+++ b/docs/usage.md
@@ -191,15 +191,15 @@ You have to specify one of the options below to activate it.
 
 ### `--centrifuge_db`
 
-Database for taxonomic binning with centrifuge (default: none). E.g. "ftp://ftp.ccb.jhu.edu/pub/infphilo/centrifuge/data/p_compressed+h+v.tar.gz"
+Database for taxonomic binning with centrifuge (default: none). E.g. "<ftp://ftp.ccb.jhu.edu/pub/infphilo/centrifuge/data/p_compressed+h+v.tar.gz>"
 
 ### `--kraken2_db`
 
-Database for taxonomic binning with kraken2 (default: none). E.g. "ftp://ftp.ccb.jhu.edu/pub/data/kraken2_dbs/minikraken2_v2_8GB_201904_UPDATE.tgz"
+Database for taxonomic binning with kraken2 (default: none). E.g. "<ftp://ftp.ccb.jhu.edu/pub/data/kraken2_dbs/minikraken2_v2_8GB_201904_UPDATE.tgz>"
 
 ### `--cat_db`
 
-Database for taxonomic classification of metagenome assembled genomes (default: none). E.g. "tbb.bio.uu.nl/bastiaan/CAT*prepare/CAT_prepare_20190108.tar.gz"
+Database for taxonomic classification of metagenome assembled genomes (default: none). E.g. "<tbb.bio.uu.nl/bastiaan/CAT*prepare/CAT_prepare_20190108.tar.gz>"
 The zipped file needs to contain a folder named "\_taxonomy*" and "_CAT_database_" that hold the respective files.
 
 ## Binning options
@@ -210,8 +210,8 @@ Minimum contig size to be considered for binning (default: 1500)
 
 ### `--busco_reference`
 
-Download path for BUSCO database, available databases are listed here: https://busco.ezlab.org/
-(default: https://busco.ezlab.org/datasets/bacteria_odb9.tar.gz)
+Download path for BUSCO database, available databases are listed here: <https://busco.ezlab.org/>
+(default: <https://busco.ezlab.org/datasets/bacteria_odb9.tar.gz>)
 
 ## Job resources
 

From 4afcb8b854988387570f285ebdec31010f4f29a2 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Hadrien=20Gourl=C3=A9?= <gourlehadrien@gmail.com>
Date: Fri, 6 Dec 2019 09:47:53 +0100
Subject: [PATCH 091/105] linting + nf version bump

---
 .travis.yml     | 4 ++--
 Dockerfile      | 2 +-
 README.md       | 2 +-
 nextflow.config | 2 +-
 4 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/.travis.yml b/.travis.yml
index 710a14f1..43388f8b 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -9,7 +9,7 @@ matrix:
 
 before_install:
   # PRs to master are only ok if coming from dev branch
-  - '[ $TRAVIS_PULL_REQUEST = "false" ] || [ $TRAVIS_BRANCH != "master" ] || ([ $TRAVIS_PULL_REQUEST_SLUG = $TRAVIS_REPO_SLUG ] && [ $TRAVIS_PULL_REQUEST_BRANCH = "dev" ])'
+  - '[ $TRAVIS_PULL_REQUEST = "false" ] || [ $TRAVIS_BRANCH != "master" ] || ([ $TRAVIS_PULL_REQUEST_SLUG = $TRAVIS_REPO_SLUG ] && ([ $TRAVIS_PULL_REQUEST_BRANCH = "dev" ] || [ $TRAVIS_PULL_REQUEST_BRANCH = "patch" ]))'
   # Pull the docker image first so the test doesn't wait for this
   - docker pull nfcore/mag:dev
   # Fake the tag locally so that the pipeline runs properly
@@ -30,7 +30,7 @@ install:
   - sudo apt-get install npm && npm install -g markdownlint-cli
 
 env:
-  - NXF_VER='0.32.0' # Specify a minimum NF version that should be tested and work
+  - NXF_VER='19.01.0' # Specify a minimum NF version that should be tested and work
   - NXF_VER='' # Plus: get the latest NF version and check that it works
 
 script:
diff --git a/Dockerfile b/Dockerfile
index bd757576..6f100d3f 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -1,4 +1,4 @@
-FROM nfcore/base
+FROM nfcore/base:1.7
 
 LABEL authors="Hadrien Gourlé <hadrien.gourle@slu.se>, Daniel Straub <d4straub@gmail.com>" \
     description="Docker image containing all requirements for nf-core/mag pipeline"
diff --git a/README.md b/README.md
index 4a8e5c63..47b59a92 100644
--- a/README.md
+++ b/README.md
@@ -3,7 +3,7 @@
 **Assembly, binning and annotation of metagenomes**.
 
 [![Build Status](https://travis-ci.com/nf-core/mag.svg?branch=master)](https://travis-ci.com/nf-core/mag)
-[![Nextflow](https://img.shields.io/badge/nextflow-%E2%89%A50.32.0-brightgreen.svg)](https://www.nextflow.io/)
+[![Nextflow](https://img.shields.io/badge/nextflow-%E2%89%A519.01.0-brightgreen.svg)](https://www.nextflow.io/)
 
 [![install with bioconda](https://img.shields.io/badge/install%20with-bioconda-brightgreen.svg)](http://bioconda.github.io/)
 [![Docker](https://img.shields.io/docker/automated/nfcore/mag.svg)](https://hub.docker.com/r/nfcore/mag)
diff --git a/nextflow.config b/nextflow.config
index d42d5285..d2732e71 100644
--- a/nextflow.config
+++ b/nextflow.config
@@ -90,7 +90,7 @@ manifest {
   homePage = 'https://github.com/nf-core/mag'
   description = 'Assembly, binning and annotation of metagenomes'
   mainScript = 'main.nf'
-  nextflowVersion = '>=0.32.0'
+  nextflowVersion = '>=19.01.0'
   version = '1.0.0'
 }
 

From 5233739796e41c87e79cc048e122b0b680231e0b Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Hadrien=20Gourl=C3=A9?= <gourlehadrien@gmail.com>
Date: Fri, 6 Dec 2019 15:43:16 +0100
Subject: [PATCH 092/105] correct slack links

---
 .github/CONTRIBUTING.md | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/.github/CONTRIBUTING.md b/.github/CONTRIBUTING.md
index c3171ba0..07a7dfcd 100644
--- a/.github/CONTRIBUTING.md
+++ b/.github/CONTRIBUTING.md
@@ -6,7 +6,7 @@ We try to manage the required tasks for nf-core/mag using GitHub issues, you pro
 
 However, don't be put off by this template - other more general issues and suggestions are welcome! Contributions to the code are even more welcome ;)
 
-> If you need help using or modifying nf-core/mag then the best place to ask is on the pipeline channel on [Slack](https://nf-core-invite.herokuapp.com/).
+> > If you need help using or modifying nf-core/mag then the best place to ask is on the pipeline channel on [Slack](https://nf-co.re/join/slack).
 
 
@@ -44,4 +44,4 @@ If there are any failures then the automated tests fail.
 These tests are run both with the latest available version of Nextflow and also the minimum required version that is stated in the pipeline code.
 
 ## Getting help
-For further information/help, please consult the [nf-core/mag documentation](https://github.com/nf-core/mag#documentation) and don't hesitate to get in touch on the pipeline channel on [Slack](https://nf-core-invite.herokuapp.com/).
+For further information/help, please consult the [nf-core/mag documentation](https://github.com/nf-core/mag#documentation) and don't hesitate to get in touch on the pipeline channel on [Slack](https://nfcore.slack.com/channels/mag).

From 06a21f0f7a6db0b90d7dc8f66b24a0861249355a Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Hadrien=20Gourl=C3=A9?= <gourlehadrien@gmail.com>
Date: Fri, 6 Dec 2019 15:44:14 +0100
Subject: [PATCH 093/105] rm aws config

---
 nextflow.config | 1 -
 1 file changed, 1 deletion(-)

diff --git a/nextflow.config b/nextflow.config
index d2732e71..a6ccfad4 100644
--- a/nextflow.config
+++ b/nextflow.config
@@ -49,7 +49,6 @@ try {
 }
 
 profiles {
-  awsbatch { includeConfig 'conf/awsbatch.config' }
   conda { process.conda = "$baseDir/environment.yml" }
   debug { process.beforeScript = 'echo $HOSTNAME' }
   docker { docker.enabled = true }

From 49dd4f24869d9c612231b31512af1229b45ad5ee Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Hadrien=20Gourl=C3=A9?= <gourlehadrien@gmail.com>
Date: Fri, 6 Dec 2019 15:54:52 +0100
Subject: [PATCH 094/105] cat doc

---
 docs/output.md | 11 ++++++++++-
 1 file changed, 10 insertions(+), 1 deletion(-)

diff --git a/docs/output.md b/docs/output.md
index 2a7944dd..5c21083b 100644
--- a/docs/output.md
+++ b/docs/output.md
@@ -90,7 +90,16 @@ More information on the [Centrifuge](https://ccb.jhu.edu/software/centrifuge/) w
 
 ### CAT
 
-<!-- TODO: CAT output -->
+[CAT](https://github.com/dutilh/CAT) is a toolkit for annotating contigs and bins from metagenome-assembled-genomes. The MAG pipeline uses CAT to assign taxonomy to the contigs from megahit and/or SPAdes, and to assign taxonomy to genome bins based on the taxnomy of the contigs
+
+**Output directory: `results/Taxonomy/${assembler}`**
+
+- `*.ORF2LCA.txt`: Tab-delimited files containing the lineage of each contig
+- `*.names.txt`: Taxonomy classification, with names of each lineage levels instead og taxids.
+- `*.predicted_proteins.faa`: predicted protein sequences for each genome bins, in fasta format
+- `*.predicted_proteins.gff`: predicted protein features for each genome bins, in gff format
+- `*.log`: log files.
+- `*.bin2classification.txt`: Taxonomy classification of the genome bins.
 
 ## Assembly
 

From 7bd3d2abebcb5ef1424a05a268c052dfc1ac93dc Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Hadrien=20Gourl=C3=A9?= <gourlehadrien@gmail.com>
Date: Fri, 6 Dec 2019 18:12:38 +0100
Subject: [PATCH 095/105] markdown formatting

---
 .github/CONTRIBUTING.md | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/.github/CONTRIBUTING.md b/.github/CONTRIBUTING.md
index 07a7dfcd..3d115e80 100644
--- a/.github/CONTRIBUTING.md
+++ b/.github/CONTRIBUTING.md
@@ -6,9 +6,7 @@ We try to manage the required tasks for nf-core/mag using GitHub issues, you pro
 
 However, don't be put off by this template - other more general issues and suggestions are welcome! Contributions to the code are even more welcome ;)
 
-> > If you need help using or modifying nf-core/mag then the best place to ask is on the pipeline channel on [Slack](https://nf-co.re/join/slack).
-
-
+> If you need help using or modifying nf-core/mag then the best place to ask is on the pipeline channel on [Slack](https://nf-co.re/join/slack).
 
 ## Contribution workflow
 If you'd like to write some code for nf-core/mag, the standard workflow
@@ -24,7 +22,6 @@ is as follows:
 
 If you're not used to this workflow with git, you can start with some [basic docs from GitHub](https://help.github.com/articles/fork-a-repo/) or even their [excellent interactive tutorial](https://try.github.io/).
 
-
 ## Tests
 When you create a pull request with changes, [Travis CI](https://travis-ci.org/) will run automatic tests.
 Typically, pull-requests are only fully reviewed when these tests are passing, though of course we can help out before then.

From 10a92055c83e456580d8db230d01d57d28b915a1 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Hadrien=20Gourl=C3=A9?= <gourlehadrien@gmail.com>
Date: Fri, 6 Dec 2019 18:14:00 +0100
Subject: [PATCH 096/105] rm version regex from busco parser

---
 bin/summary_busco.py | 30 ++++++++----------------------
 1 file changed, 8 insertions(+), 22 deletions(-)

diff --git a/bin/summary_busco.py b/bin/summary_busco.py
index 392ec6f1..f3dd2eba 100755
--- a/bin/summary_busco.py
+++ b/bin/summary_busco.py
@@ -1,31 +1,17 @@
 #!/usr/bin/env python
 
-#USAGE: ./summary.busco.py *.txt
+# USAGE: ./summary.busco.py *.txt
 
 import re
 from sys import argv
 
-#"# Summarized benchmarking in BUSCO notation for file MEGAHIT-testset1.contigs.fa"
-#"	C:0.0%[S:0.0%,D:0.0%],F:0.0%,M:100.0%,n:148"
+# "# Summarized benchmarking in BUSCO notation for file MEGAHIT-testset1.contigs.fa"
+# "	C:0.0%[S:0.0%,D:0.0%],F:0.0%,M:100.0%,n:148"
 
-regexes = {
-    'nf-core/mag': ['v_pipeline.txt', r"(\S+)"],
-    'Nextflow': ['v_nextflow.txt', r"(\S+)"],
-    'MultiQC': ['v_multiqc.txt', r"multiqc, version (\S+)"],
-    'fastqc': ['v_fastqc.txt', r"FastQC v(\S+)"],
-    'fastp': ['v_fastp.txt', r"fastp (\S+)"],
-    'megahit': ['v_megahit.txt', r"MEGAHIT v(\S+)"],
-    'metabat': ['v_metabat.txt', r"version (\S+)"],
-    'NanoPlot': ['v_nanoplot.txt', r"NanoPlot (\S+)"],
-    'Filtlong': ['v_filtlong.txt', r"Filtlong v(\S+)"],
-    'porechop': ['v_porechop.txt', r"(\S+)"],
-    'NanoLyse': ['v_nanolyse.txt', r"NanoLyse (\S+)"],
-    'SPAdes': ['v_spades.txt', r"SPAdes v(\S+)"],
-    'BUSCO': ['v_busco.txt', r"BUSCO (\S+)"],
-}
-
-regexes = [r"# Summarized benchmarking in BUSCO notation for file (\S+)", r"	C:(\S+)%\[S:", r"%\[S:(\S+)%,D:", r"%,D:(\S+)%\],F:", r"%\],F:(\S+)%,M:", r"%,M:(\S+)%,n:", r"%,n:(\S+)"]
-columns = ["GenomeBin","%Complete","%Complete and single-copy","%Complete and duplicated","%Fragmented","%Missing","Total number"]
+regexes = [r"# Summarized benchmarking in BUSCO notation for file (\S+)", r"	C:(\S+)%\[S:",
+           r"%\[S:(\S+)%,D:", r"%,D:(\S+)%\],F:", r"%\],F:(\S+)%,M:", r"%,M:(\S+)%,n:", r"%,n:(\S+)"]
+columns = ["GenomeBin", "%Complete", "%Complete and single-copy",
+           "%Complete and duplicated", "%Fragmented", "%Missing", "Total number"]
 
 # Search each file using its regex
 print("\t".join(columns))
@@ -36,5 +22,5 @@
         for REGEX in regexes:
             match = re.search(REGEX, TEXT)
             if match:
-                results.append( match.group(1) )
+                results.append(match.group(1))
         print("\t".join(results))

From 2e8dfd2c4e1904afadc5961cf88186a82e7a738f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Hadrien=20Gourl=C3=A9?= <gourlehadrien@gmail.com>
Date: Fri, 6 Dec 2019 18:14:33 +0100
Subject: [PATCH 097/105] rm binac config

---
 conf/binac_smp.config | 23 -----------------------
 1 file changed, 23 deletions(-)
 delete mode 100644 conf/binac_smp.config

diff --git a/conf/binac_smp.config b/conf/binac_smp.config
deleted file mode 100644
index 9d23ef57..00000000
--- a/conf/binac_smp.config
+++ /dev/null
@@ -1,23 +0,0 @@
-//Profile config names for nf-core/configs
-params {
-  config_profile_description = 'BINAC cluster profile for jobs requiring >128 GB memory provided by nf-core/configs.'
-  config_profile_contact = 'just a test right now'
-  config_profile_url = 'https://www.bwhpc-c5.de/wiki/index.php/Category:BwForCluster_BinAC'
-}
-
-singularity {
-  enabled = true
-}
-
-process {
-  beforeScript = 'module load devel/singularity/3.0.1'
-  executor = 'pbs'
-  queue = 'smp'
-}
-
-params {
-  igenomes_base = '/nfsmounts/igenomes'
-  max_memory = 1024.GB
-  max_cpus = 40
-  max_time = 730.h
-}
\ No newline at end of file

From 4c66843f667d10b8417e3eaf734ff90db245002b Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Hadrien=20Gourl=C3=A9?= <gourlehadrien@gmail.com>
Date: Tue, 10 Dec 2019 10:21:03 +0100
Subject: [PATCH 098/105] def input single/pairs instead of ifelse

---
 main.nf | 87 ++++++++++++---------------------------------------------
 1 file changed, 18 insertions(+), 69 deletions(-)

diff --git a/main.nf b/main.nf
index 0670adcd..f66c5ee6 100644
--- a/main.nf
+++ b/main.nf
@@ -508,23 +508,15 @@ process fastp {
     file("fastp.*")
 
     script:
-    if ( !params.singleEnd ) {
-        """
-        fastp -w "${task.cpus}" -q "${qual}" --cut_by_quality5 \
-            --cut_by_quality3 --cut_mean_quality "${trim_qual}"\
-            --adapter_sequence=${adapter} --adapter_sequence_r2=${adapter_reverse} \
-            -i "${reads[0]}" -I "${reads[1]}" \
-            -o ${name}_trimmed_R1.fastq.gz -O ${name}_trimmed_R2.fastq.gz
-        """
-    }
-    else {
-        """
-        fastp -w "${task.cpus}" -q "${qual}" --cut_by_quality5 \
-            --cut_by_quality3 --cut_mean_quality "${trim_qual}"\
-            --adapter_sequence="${adapter}" --adapter_sequence_r2="${adapter_reverse}" \
-            -i ${reads} -o "${name}_trimmed.fastq.gz"
-        """
-    }
+    def pe_input = params.singleEnd ? '' :  "-I \"${reads[1]}\""
+    def pe_output1 = params.singleEnd ? "-o \"${name}_trimmed.fastq.gz\"" :  "-o \"${name}_trimmed_R1.fastq.gz\""
+    def pe_output2 = params.singleEnd ? '' :  "-O \"${name}_trimmed_R2.fastq.gz\""
+    """
+    fastp -w "${task.cpus}" -q "${qual}" --cut_by_quality5 \
+        --cut_by_quality3 --cut_mean_quality "${trim_qual}"\
+        --adapter_sequence=${adapter} --adapter_sequence_r2=${adapter_reverse} \
+        -i "${reads[0]}" $pe_input $pe_output1 $pe_output2
+    """
 }
 
 /*
@@ -635,29 +627,16 @@ process centrifuge {
     file("kreport.txt")
 
     script:
-    if ( !params.singleEnd ) {
+    def input = params.singleEnd ? "-U \"${reads}\"" :  "-1 \"${reads[0]}\" -2 \"${reads[1]}\""
     """
     centrifuge -x "${db_name}" \
         -p "${task.cpus}" \
-        -1 "${reads[0]}" \
-        -2 "${reads[1]}" \
         --report-file report.txt \
-        -S results.txt
+        -S results.txt \
+        $input
     centrifuge-kreport -x "${db_name}" results.txt > kreport.txt
     cat results.txt | cut -f 1,3 > results.krona
     """
-    }
-    else {
-    """
-    centrifuge -x "${db_name}" \
-        -p "${task.cpus}" \
-        -U "${reads}" \
-        --report-file report.txt \
-        -S results.txt
-    centrifuge-kreport -x "${db_name}" results.txt > kreport.txt
-    cat results.txt | cut -f 1,3 > results.krona
-    """
-    }
 }
 
 process kraken2_db_preparation {
@@ -690,7 +669,7 @@ process kraken2 {
     file("kraken2_report.txt")
 
     script:
-    if ( !params.singleEnd ) {
+    def input = params.singleEnd ? "\"${reads}\"" :  "--paired \"${reads[0]}\" \"${reads[1]}\""
     """
     kraken2 \
         --report-zero-counts \
@@ -698,24 +677,10 @@ process kraken2 {
         --db database \
         --fastq-input \
         --report kraken2_report.txt \
-        --paired "${reads[0]}" "${reads[1]}" \
+        $input \
         > kraken2.kraken
     cat kraken2.kraken | cut -f 2,3 > results.krona
     """
-    }
-    else {
-    """
-    kraken2 \
-        --report-zero-counts \
-        --threads "${task.cpus}" \
-        --db database \
-        --fastq-input \
-        --report kraken2_report.txt \
-        "${reads}" \
-        > kraken2.kraken
-    cat kraken2.kraken | cut -f 2,3 > results.krona
-    """
-    }
 }
 
 process krona_db {
@@ -773,17 +738,10 @@ process megahit {
     !params.skip_megahit
 
     script:
-    if ( !params.singleEnd ) {
-    """
-    megahit -t "${task.cpus}" -1 "${reads[0]}" -2 "${reads[1]}" -o MEGAHIT \
-        --out-prefix "${name}"
-    """
-    }
-    else {
+    def input = params.singleEnd ? "-r \"${reads}\"" :  "-1 \"${reads[0]}\" -2 \"${reads[1]}\""
     """
-    megahit -t "${task.cpus}" -r ${reads} -o MEGAHIT --out-prefix "${name}"
+    megahit -t "${task.cpus}" $input -o MEGAHIT --out-prefix "${name}"
     """
-    }
 }
 
 
@@ -913,23 +871,14 @@ process bowtie2 {
 
     script:
     def name = "${assembler}-${sample}-${sampleToMap}"
-    if ( !params.singleEnd ) {
-        """
-        bowtie2-build --threads "${task.cpus}" "${assembly}" ref
-        bowtie2 -p "${task.cpus}" -x ref -1 "${reads[0]}" -2 "${reads[1]}" | \
-            samtools view -@ "${task.cpus}" -bS | \
-            samtools sort -@ "${task.cpus}" -o "${name}.bam"
-        samtools index "${name}.bam"
-        """
-    } else {
+    def input = params.singleEnd ? "-U \"${reads}\"" :  "-1 \"${reads[0]}\" -2 \"${reads[1]}\""
         """
         bowtie2-build --threads "${task.cpus}" "${assembly}" ref
-        bowtie2 -p "${task.cpus}" -x ref -U ${reads} | \
+        bowtie2 -p "${task.cpus}" -x ref $input | \
             samtools view -@ "${task.cpus}" -bS | \
             samtools sort -@ "${task.cpus}" -o "${name}.bam"
         samtools index "${name}.bam"
         """
-    }
 }
 
 assembly_mapping_for_metabat = assembly_mapping_for_metabat.groupTuple(by:[0,1]).join(assembly_all_to_metabat_copy)

From f853db4fc186fee9826bbc32e8d58789baa4d6f1 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Hadrien=20Gourl=C3=A9?= <gourlehadrien@gmail.com>
Date: Thu, 12 Dec 2019 10:46:45 +0100
Subject: [PATCH 099/105] mv instead of cp for spades output

---
 main.nf | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/main.nf b/main.nf
index f66c5ee6..b94d957b 100644
--- a/main.nf
+++ b/main.nf
@@ -781,10 +781,10 @@ process spadeshybrid {
         --pe1-2 ${sr[1]} \
         --nanopore ${lr} \
         -o spades
-    cp spades/assembly_graph_with_scaffolds.gfa ${id}_graph.gfa
-    cp spades/scaffolds.fasta ${id}_scaffolds.fasta
-    cp spades/contigs.fasta ${id}_contigs.fasta
-    cp spades/spades.log ${id}_log.txt
+    mv spades/assembly_graph_with_scaffolds.gfa ${id}_graph.gfa
+    mv spades/scaffolds.fasta ${id}_scaffolds.fasta
+    mv spades/contigs.fasta ${id}_contigs.fasta
+    mv spades/spades.log ${id}_log.txt
     """
 }
 
@@ -816,10 +816,10 @@ process spades {
         --pe1-1 ${sr[0]} \
         --pe1-2 ${sr[1]} \
         -o spades
-    cp spades/assembly_graph_with_scaffolds.gfa ${id}_graph.gfa
-    cp spades/scaffolds.fasta ${id}_scaffolds.fasta
-    cp spades/contigs.fasta ${id}_contigs.fasta
-    cp spades/spades.log ${id}_log.txt
+    mv spades/assembly_graph_with_scaffolds.gfa ${id}_graph.gfa
+    mv spades/scaffolds.fasta ${id}_scaffolds.fasta
+    mv spades/contigs.fasta ${id}_contigs.fasta
+    mv spades/spades.log ${id}_log.txt
     """
 }
 

From 39ed6cb969dfb67f6f99fcaa0a4d92957383559c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Hadrien=20Gourl=C3=A9?= <gourlehadrien@gmail.com>
Date: Fri, 20 Dec 2019 10:17:53 +0100
Subject: [PATCH 100/105] correct slack link

---
 CODE_OF_CONDUCT.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/CODE_OF_CONDUCT.md b/CODE_OF_CONDUCT.md
index 09226d0d..3a8a013d 100644
--- a/CODE_OF_CONDUCT.md
+++ b/CODE_OF_CONDUCT.md
@@ -34,7 +34,7 @@ This Code of Conduct applies both within project spaces and in public spaces whe
 
 ## Enforcement
 
-Instances of abusive, harassing, or otherwise unacceptable behavior may be reported by contacting the project team on [Slack](https://nf-core-invite.herokuapp.com/). The project team will review and investigate all complaints, and will respond in a way that it deems appropriate to the circumstances. The project team is obligated to maintain confidentiality with regard to the reporter of an incident. Further details of specific enforcement policies may be posted separately.
+Instances of abusive, harassing, or otherwise unacceptable behavior may be reported by contacting the project team on [Slack](nf-co.re/join/slack). The project team will review and investigate all complaints, and will respond in a way that it deems appropriate to the circumstances. The project team is obligated to maintain confidentiality with regard to the reporter of an incident. Further details of specific enforcement policies may be posted separately.
 
 Project maintainers who do not follow or enforce the Code of Conduct in good faith may face temporary or permanent repercussions as determined by other members of the project's leadership.
 

From a8987ad9dfebac711cddef915408fe3186dae637 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Hadrien=20Gourl=C3=A9?= <gourlehadrien@gmail.com>
Date: Fri, 20 Dec 2019 10:22:17 +0100
Subject: [PATCH 101/105] rm aws batch config

---
 conf/awsbatch.config | 18 ------------------
 1 file changed, 18 deletions(-)
 delete mode 100644 conf/awsbatch.config

diff --git a/conf/awsbatch.config b/conf/awsbatch.config
deleted file mode 100644
index 14af5866..00000000
--- a/conf/awsbatch.config
+++ /dev/null
@@ -1,18 +0,0 @@
-/*
- * -------------------------------------------------
- *  Nextflow config file for running on AWS batch
- * -------------------------------------------------
- * Base config needed for running with -profile awsbatch
- */
-params {
-  config_profile_name = 'AWSBATCH'
-  config_profile_description = 'AWSBATCH Cloud Profile'
-  config_profile_contact = 'Alexander Peltzer (@apeltzer)'
-  config_profile_url = 'https://aws.amazon.com/de/batch/'
-}
-
-aws.region = params.awsregion
-process.executor = 'awsbatch'
-process.queue = params.awsqueue
-executor.awscli = '/home/ec2-user/miniconda/bin/aws'
-params.tracedir = './'

From 1cbbf2249a3a4ac9c663fdc65d7b9f946c7b114a Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Hadrien=20Gourl=C3=A9?= <gourlehadrien@gmail.com>
Date: Fri, 20 Dec 2019 10:24:21 +0100
Subject: [PATCH 102/105] rm ref to binac config

---
 nextflow.config | 1 -
 1 file changed, 1 deletion(-)

diff --git a/nextflow.config b/nextflow.config
index a6ccfad4..44c543b0 100644
--- a/nextflow.config
+++ b/nextflow.config
@@ -55,7 +55,6 @@ profiles {
   singularity { singularity.enabled = true }
   test { includeConfig 'conf/test.config' }
   test_hybrid { includeConfig 'conf/test_hybrid.config' }
-  binac_smp { includeConfig 'conf/binac_smp.config' }
 }
 
 // Load igenomes.config if required

From 9984f2b76dc5d57232b723d607c2b96c070c8515 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Hadrien=20Gourl=C3=A9?= <gourlehadrien@gmail.com>
Date: Fri, 20 Dec 2019 10:29:14 +0100
Subject: [PATCH 103/105] rm local install config, now on website

---
 docs/configuration/local.md | 45 -------------------------------------
 1 file changed, 45 deletions(-)
 delete mode 100644 docs/configuration/local.md

diff --git a/docs/configuration/local.md b/docs/configuration/local.md
deleted file mode 100644
index 0d4f6585..00000000
--- a/docs/configuration/local.md
+++ /dev/null
@@ -1,45 +0,0 @@
-# nf-core/mag: Local Configuration
-
-If running the pipeline in a local environment, we highly recommend using either Docker or Singularity.
-
-## Docker
-
-Docker is a great way to run nf-core/mag, as it manages all software installations and allows the pipeline to be run in an identical software environment across a range of systems.
-
-Nextflow has [excellent integration](https://www.nextflow.io/docs/latest/docker.html) with Docker, and beyond installing the two tools, not much else is required. The nf-core/mag profile comes with a configuration profile for docker, making it very easy to use. This also comes with the required presets to use the AWS iGenomes resource, meaning that if using common reference genomes you just specify the reference ID and it will be autaomtically downloaded from AWS S3.
-
-First, install docker on your system: [Docker Installation Instructions](https://docs.docker.com/engine/installation/)
-
-Then, simply run the analysis pipeline:
-
-```bash
-nextflow run nf-core/mag -profile docker --reads '<path to your reads>'
-```
-
-Nextflow will recognise `nf-core/mag` and download the pipeline from GitHub. The `-profile docker` configuration lists the [hadrieng/mag](https://hub.docker.com/r/hadrieng/mag/) image that we have created and is hosted at dockerhub, and this is downloaded.
-
-For more information about how to work with reference genomes, see [`docs/configuration/reference_genomes.md`](docs/configuration/reference_genomes.md).
-
-### Pipeline versions
-
-The public docker images are tagged with the same version numbers as the code, which you can use to ensure reproducibility. When running the pipeline, specify the pipeline version with `-r`, for example `-r v1.3`. This uses pipeline code and docker image from this tagged version.
-
-## Singularity image
-
-Many HPC environments are not able to run Docker due to security issues. [Singularity](http://singularity.lbl.gov/) is a tool designed to run on such HPC systems which is very similar to Docker. Even better, it can use create images directly from dockerhub.
-
-To use the singularity image for a single run, use `-with-singularity 'docker://hadrieng/mag'`. This will download the docker container from dockerhub and create a singularity image for you dynamically.
-
-If you intend to run the pipeline offline, nextflow will not be able to automatically download the singularity image for you. Instead, you'll have to do this yourself manually first, transfer the image file and then point to that.
-
-First, pull the image file where you have an internet connection:
-
-```bash
-singularity pull --name mag.img docker://hadrieng/mag
-```
-
-Then transfer this file and run the pipeline with this path:
-
-```bash
-nextflow run /path/to/nf-core/mag -with-singularity /path/to/mag.img
-```

From 859230887ec338a0243376ea4f043a97ce004e36 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Hadrien=20Gourl=C3=A9?= <gourlehadrien@gmail.com>
Date: Fri, 20 Dec 2019 10:30:11 +0100
Subject: [PATCH 104/105] release date in changelog

---
 CHANGELOG.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 36e3a754..f00c9fa8 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,6 +1,6 @@
 # nf-core/mag: Changelog
 
-## v1.0.0 - [date]
+## v1.0.0 - 2019/12/20
 
 Initial release of nf-core/mag, created with the [nf-core](http://nf-co.re/) template.
 As this release the pipeline will have the following functionailities:

From 54310f70630074948045b1aa174d201e70df4c61 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Hadrien=20Gourl=C3=A9?= <gourlehadrien@gmail.com>
Date: Fri, 20 Dec 2019 10:42:41 +0100
Subject: [PATCH 105/105] busco changed their dl url...

---
 main.nf | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/main.nf b/main.nf
index b94d957b..a1827b43 100644
--- a/main.nf
+++ b/main.nf
@@ -76,7 +76,7 @@ def helpMessage() {
     Bin quality check:
       --skip_busco                  Disable bin QC with BUSCO (default: false)
       --busco_reference             Download path for BUSCO database, available databases are listed here: https://busco.ezlab.org/
-                                    (default: https://busco.ezlab.org/datasets/bacteria_odb9.tar.gz)
+                                    (default: https://busco-archive.ezlab.org/v3/datasets/bacteria_odb9.tar.gz)
 
     AWSBatch options:
       --awsqueue                    The AWSBatch JobQueue that needs to be set when running on AWSBatch
@@ -100,7 +100,7 @@ params.multiqc_config = "$baseDir/conf/multiqc_config.yaml"
 params.email = false
 params.plaintext_email = false
 params.manifest = false
-params.busco_reference = "https://busco.ezlab.org/datasets/bacteria_odb9.tar.gz"
+params.busco_reference = "https://busco-archive.ezlab.org/v3/datasets/bacteria_odb9.tar.gz"
 
 ch_multiqc_config = Channel.fromPath(params.multiqc_config)
 ch_output_docs = Channel.fromPath("$baseDir/docs/output.md")