diff --git a/ANNEXA b/ANNEXA new file mode 160000 index 0000000..2e25dd3 --- /dev/null +++ b/ANNEXA @@ -0,0 +1 @@ +Subproject commit 2e25dd377795d287cc42fd32541a35bedb3c6ff0 diff --git a/README.md b/README.md index 1bae90d..a316ea9 100644 --- a/README.md +++ b/README.md @@ -2,7 +2,7 @@ ## Introduction -**ANNEXA** is an all-in-one reproductible pipeline, written in the [Nextflow](https://nextflow.io), which allows users to analyze LR-RNAseq sequences from Oxford Nanopore Technologies (ONT), and to reconstruct and quantify known and novel genes and isoforms. +**ANNEXA** is an all-in-one reproductible pipeline, written in the [Nextflow](https://nextflow.io), which allows users to analyze LR-RNAseq data (Long-Read RNASeq), and to reconstruct and quantify known and novel genes and isoforms. ## Pipeline summary @@ -41,7 +41,7 @@ nextflow run IGDRion/ANNEXA \ --fa /path/to/ref.fa ``` -The input parameter takes a file listing the bams to analyze (see example below) +The input parameter takes a file listing the `bam` path files to analyze (see example below) ``` /path/to/1.bam diff --git a/bin/filter_gtf_ndr.py b/bin/filter_gtf_ndr.py index 1ad19c5..230a9db 100755 --- a/bin/filter_gtf_ndr.py +++ b/bin/filter_gtf_ndr.py @@ -9,7 +9,7 @@ def parse_bambu(line): def parse_tfkmers(line): ids = line[0].split("::") - return ids[0], ids[1], line[2] + return ids[0], ids[1], line[1] def parse_ndr(csv, origin, th) -> Set[str]: diff --git a/bin/qc.R b/bin/qc.R index 4bcc386..ee8c3f8 100755 --- a/bin/qc.R +++ b/bin/qc.R @@ -230,8 +230,7 @@ gene_ext_dist = gene %>% # TRANSCRIPT ############################################################################# transcript = read.csv(paste0(prefix,".transcript.stats"), header = T) -lncRNA_biotypes = c("retained_intron", - "lncRNA", +lncRNA_biotypes = c("lncRNA", "antisense", "non-coding", "lnc_RNA") diff --git a/environment.yml b/environment.yml index 6a1a5d9..53037a2 100644 --- a/environment.yml +++ b/environment.yml @@ -8,7 +8,8 @@ dependencies: - conda-forge::r-base=4.1 - conda-forge::r-rcolorbrewer - - conda-forge::r-tidyverse + - conda-forge::r-tidyverse=1.3.2 + - conda-forge::r-dplyr=1.0.10 - conda-forge::r-reshape2 - conda-forge::r-ggpubr - conda-forge::r-ggridges diff --git a/modules/feelnc/codpot.nf b/modules/feelnc/codpot.nf index 9ac0d5d..cd677c4 100644 --- a/modules/feelnc/codpot.nf +++ b/modules/feelnc/codpot.nf @@ -30,6 +30,11 @@ process FEELNC_CODPOT { -l known_lncRNA.gtf \ --numtx=3000,3000 \ -o new + + # consider new noORF transcripts as new lncRNA + if [ -e feelnc_codpot_out/new.noORF.gtf ]; then + cat feelnc_codpot_out/new.noORF.gtf >> feelnc_codpot_out/new.lncRNA.gtf + fi """ } diff --git a/modules/index_bam.nf b/modules/index_bam.nf index 1ba0cba..9433080 100644 --- a/modules/index_bam.nf +++ b/modules/index_bam.nf @@ -1,8 +1,8 @@ process INDEX_BAM { - conda (params.enable_conda ? "bioconda::samtools=1.15.1" : null) + conda (params.enable_conda ? "bioconda::samtools=1.16.1" : null) container "${ workflow.containerEngine == 'singularity' ? - 'https://depot.galaxyproject.org/singularity/samtools:1.15.1--h1170115_0' : - 'quay.io/biocontainers/samtools:1.15.1--h1170115_0' }" + 'https://depot.galaxyproject.org/singularity/samtools%3A1.16.1--h6899075_0' : + 'quay.io/biocontainers/samtools:1.16.1--h1170115_0' }" input: file bam diff --git a/nextflow.config b/nextflow.config index 874eb99..f64be72 100644 --- a/nextflow.config +++ b/nextflow.config @@ -3,7 +3,7 @@ params { outdir = "results" withGeneCoverage = false maxCpu = 8 - maxMemory = "40GB" + maxMemory = "80GB" enable_conda = false filter = false tfkmers_threshold = 0.2 @@ -14,7 +14,7 @@ params { } process { - memory = '8GB' + memory = '16GB' } profiles {