diff --git a/src/trimgalore/config.vsh.yaml b/src/trimgalore/config.vsh.yaml new file mode 100644 index 00000000..e3ca6b76 --- /dev/null +++ b/src/trimgalore/config.vsh.yaml @@ -0,0 +1,313 @@ +name: trimgalore +description: | + +keywords: ["trimming", "adapters"] +links: + homepage: https://github.com/FelixKrueger/TrimGalore + documentation: https://github.com/FelixKrueger/TrimGalore/blob/master/Docs/Trim_Galore_User_Guide.md + repository: https://github.com/FelixKrueger/TrimGalore +references: + doi: 10.5281/zenodo.7598955 +license: GPL-3.0 +requirements: + commands: [trim_galore] +authors: + - __merge__: /src/_authors/sai_nirmayi_yasa.yaml + roles: [ author, maintainer ] + +argument_groups: + - name: Input + arguments: + - name: "--input" + type: file + description: Input files. Note that paired-end files need to be supplied in a pairwise fashion, e.g. file1_1.fq file1_2.fq SRR2_1.fq.gz SRR2_2.fq.gz + required: true + multiple: true + example: sample1_r1.fq;sample1_r2.fq;sample2_r1.fq;sample2_r2.fq + - name: Trimming options + arguments: + - name: --quality + alternatives: -q + type: integer + description: Trim low-quality ends (below the specified Phred score) from reads in addition to adapter removal. For RRBS samples, quality trimming will be performed first, and adapter trimming is carried in a second round. Other files are quality and adapter trimmed in a single pass. The algorithm is the same as the one used by BWA (Subtract INT from all qualities; compute partial sums from all indices to the end of the sequence; cut sequence at the index at which the sum is minimal). + example: 20 + required: false + - name: --phred33 + type: boolean + description: Instructs Cutadapt to use ASCII+33 quality scores as Phred scores (Sanger/Illumina 1.9+ encoding) for quality trimming. + required: false + default: true + - name: --phred64 + type: boolean + description: Instructs Cutadapt to use ASCII+64 quality scores as Phred scores (Illumina 1.5 encoding) for quality trimming. + required: false + default: false + - name: --fastqc + type: boolean + description: Run FastQC in the default mode on the FastQ file once trimming is complete. + required: false + - name: --fastqc_args + type: string + description: Passes extra arguments to FastQC. If more than one argument is to be passed to FastQC they must be in the form "arg1 arg2 ...". Passing extra arguments will automatically invoke FastQC, so --fastqc does not have to be specified separately. + required: false + example: "--nogroup --outdir /home/" + - name: --adapter + alternatives: -a + type: string + description: | + Adapter sequence to be trimmed. If not specified explicitly, Trim Galore will try to auto-detect whether the Illumina universal, Nextera transposase or Illumina small RNA adapter sequence was used. A single base may also be given as e.g. -a A{10}, to be expanded to -a AAAAAAAAAA. + At a special request, multiple adapters can also be specified like so: + -a " AGCTCCCG -a TTTCATTATAT -a TTTATTCGGATTTAT" -a2 " AGCTAGCG -a TCTCTTATAT -a TTTCGGATTTAT", + or so: + -a "file:../multiple_adapters.fa" -a2 "file:../different_adapters.fa" + Potentially in conjucntion with the parameter "-n 3" to trim all adapters. + example: 20 + required: false + example: AGCTCCCG + - name: --adapter2 + alternatives: -a2 + type: string + description: Optional adapter sequence to be trimmed off read 2 of paired-end files. This option requires '--paired' to be specified as well. If the libraries to be trimmed are smallRNA then a2 will be set to the Illumina small RNA 5' adapter automatically (GATCGTCGGACT). A single base may also be given as e.g. -a2 A{10}, to be expanded to -a2 AAAAAAAAAA. + required: false + example: AGCTCCCG + - name: --illumina + type: boolean + description: Adapter sequence to be trimmed is the first 13bp of the Illumina universal adapter 'AGATCGGAAGAGC' instead of the default auto-detection of adapter sequence. + required: false + - name: --stranded_illumina + type: boolean + description: Adapter sequence to be trimmed is the first 13bp of the Illumina stranded mRNA or Total RNA adapter 'ACTGTCTCTTATA' instead of the default auto-detection of adapter sequence. + required: false + - name: --nextera + type: boolean + description: Adapter sequence to be trimmed is the first 12bp of the Nextera adapter 'CTGTCTCTTATA' instead of the default auto-detection of adapter sequence. + required: false + - name: --small_rna + type: boolean + description: Adapter sequence to be trimmed is the first 12bp of the Illumina Small RNA 3' Adapter 'TGGAATTCTCGG' instead of the default auto-detection of adapter sequence. Selecting to trim smallRNA adapters will also lower the --length value to 18bp. If the smallRNA libraries are paired-end then a automatically (GATCGTCGGACT) unless -a 2 had been defined explicitly. + - name: --consider_already_trimmed + type: integer + description: During adapter auto-detection, the limit set by this argument allows the user to set a threshold up to which the file is considered already adapter-trimmed. If no adapter sequence exceeds this threshold, no additional adapter trimming will be performed (technically, the adapter is set to '-a X'). Quality trimming is still performed as usual. + required: false + - name: --max_length + type: integer + description: Discard reads that are longer than the specified value after trimming. This is only advised for smallRNA sequencing to remove non-small RNA sequences. + required: false + - name: --stringency + type: integer + description: Overlap with adapter sequence required to trim a sequence. Defaults to a very stringent setting of 1, i.e. even a single bp of overlapping sequence will be trimmed off from the 3' end of any read. + required: false + example: 1 + - name: --error_rate + alternatives: -e + type: double + description: Maximum allowed error rate (no. of errors divided by the length of the matching region) + required: false + example: 0.1 + - name: --gzip + type: boolean + description: Compress the output file with GZIP. If the input files are GZIP-compressed the output files will automatically be GZIP compressed as well. As of v0.2.8 the compression will take place on the fly. + required: false + - name: --dont_gzip + type: boolean + description: Output files won't be compressed with GZIP. This option overrides --gzip. + required: false + - name: --length + type: integer + description: Discard reads that became shorter than the specified length because of either quality or adapter trimming. A value of '0' effectively disables this behaviour. For paired-end files, both reads of a read-pair need to be longer than the specified length to be printed out to validated paired-end files. If only one read became too short there is the possibility of keeping such unpaired single-end reads using the --retain_unpaired option. + required: false + example: 20 + - name: --max_n + type: integer + description: The total number of Ns a read may contain before it will be removed altogether.In a paired-end setting, either read exceeding this limit will result in the entire pair being removed from the trimmed output files. If COUNT is a number between 0 and 1, it is interpreted as a fraction of the read length. + required: false + - name: --trim_n + type: boolean + description: Removes Ns from either side of the read. This option does currently not work in RRBS mode. + required: false + - name: --no_report_file + type: boolean + description: If specified no report file will be generated. + required: false + - name: --suppress_warn + type: boolean + description: If specified any output to STDOUT or STDERR will be suppressed. + required: false + - name: --clip_R1 + type: integer + description: Instructs TrimGalore to remove given number of bp from the 5' end of read 1 (or single-end reads). This may be useful if the qualities were very poor, or if there is some sort of unwanted bias at the 5' end. + required: false + - name: --clip_R2 + type: integer + description: Instructs TrimGalore to remove given number bp from the 5' end of read 2 (paired-end reads only). This may be useful if the qualities were very poor, or if there is some sort of unwanted bias at the 5' end. For paired-end BS-Seq, it is recommended to remove the first few bp because the end-repair reaction may introduce a bias towards low methylation. + required: false + - name: --three_prime_clip_R1 + type: integer + description: Instructs Trim Galore to remove spacified number of bp from the 3' end of read 1 (or single-end reads) AFTER adapter/quality trimming has been performed. This may remove some bias from the 3' end that is not directly related to adapter sequence or basecall quality. + required: false + - name: --three_prime_clip_R2 + type: integer + description: Instructs Trim Galore to remove bp from the 3' end of read 2 AFTER adapter/quality trimming has been performed. This may remove some unwanted bias from the 3' end that is not directly related to adapter sequence or basecall quality. + required: false + - name: --nextseq + type: integer + description: This enables the option '--nextseq-trim=3'CUTOFF' within Cutadapt, which will set a quality cutoff (that is normally given with -q instead), but qualities of G bases are ignored. This trimming is in common for the NextSeq- and NovaSeq-platforms, where basecalls without any signal are called as high-quality G bases. This is mutually exlusive with '-q INT'. + required: false + - name: --basename + type: string + description: Use specified name (PREFERRED_NAME) as the basename for output files, instead of deriving the filenames from the input files. Single-end data would be called PREFERRED_NAME_trimmed.fq(.gz), or PREFERRED_NAME_val_1.fq(.gz) and PREFERRED_NAME_val_2.fq(.gz) for paired-end data. --basename only works when 1 file (single-end) or 2 files (paired-end) are specified, but not for longer lists. + required: false + - name: --cores + alternatives: -j + type: integer + description: Number of cores to be used for trimming + required: false + example: 1 + - name: Specific trimming options without adapter/quality trimming + arguments: + - name: --hardtrim5 + type: integer + description: Instead of performing adapter-/quality trimming, this option will simply hard-trim sequences to bp at the 5'-end. Once hard-trimming of files is complete, Trim Galore will exit. Hard-trimmed output files will end in ._5prime.fq(.gz). + required: false + - name: --hardtrim3 + type: integer + description: Instead of performing adapter-/quality trimming, this option will simply hard-trim sequences to bp at the 3'-end. Once hard-trimming of files is complete, Trim Galore will exit. Hard-trimmed output files will end in ._3prime.fq(.gz). + required: false + - name: --clock + type: boolean + description: In this mode, reads are trimmed in a specific way that is currently used for the Mouse Epigenetic Clock. + required: false + - name: --polyA + type: boolean + description: This is a new, still experimental, trimming mode to identify and remove poly-A tails from sequences. When --polyA is selected, Trim Galore attempts to identify from the first supplied sample whether sequences contain more often a stretch of either 'AAAAAAAAAA' or 'TTTTTTTTTT'. This determines if Read 1 of a paired-end end file, or single-end files, are trimmed for PolyA or PolyT. In case of paired-end sequencing, Read2 is trimmed for the complementary base from the start of the reads. The auto-detection uses a default of A{20} for Read1 (3'-end trimming) and T{150} for Read2 (5'-end trimming). These values may be changed manually using the options -a and -a2. In addition to trimming the sequences, white spaces are replaced with _ and it records in the read ID how many bases were trimmed so it can later be used to identify PolyA trimmed sequences. This is currently done by writing tags to both the start ("32:A:") and end ("_PolyA:32") of the reads. The poly-A trimming mode expects that sequences were both adapter and quality before looking for Poly-A tails, and it is the user's responsibility to carry out an initial round of trimming. + required: false + - name: --implicon + type: boolean + description: | + This is a special mode of operation for paired-end data, such as required for the IMPLICON method, where a UMI sequence is getting transferred from the start of Read 2 to the readID of both reads. Following this, Trim Galore will exit. In it's current implementation, the UMI carrying reads come in the following format + Read 1 5' FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF 3' + Read 2 3' UUUUUUUUFFFFFFFFFFFFFFFFFFFFFFFFFFFF 5' + Where UUUUUUUU is a random 8-mer unique molecular identifier (UMI) and FFFFFFF... is the actual fragment to be sequenced. The UMI of Read 2 (R2) is written into the read ID of both reads and removed from the actual sequence. + required: false + - name: RRBS-specific options + arguments: + - name: --rrbs + type: boolean + description: Specifies that the input file was an MspI digested RRBS sample (recognition site is CCGG). Single-end or Read 1 sequences (paired-end) which were adapter-trimmed will have a further 2 bp removed from their 3' end. Sequences which were merely trimmed because of poor quality will not be shortened further. Read 2 of paired-end libraries will in addition have the first 2 bp removed from the 5' end (by setting '--clip_r2 2'). This is to avoid using artificial methylation calls from the filled-in cytosine positions close to the 3' MspI site in sequenced fragments. This option is not recommended for users of the Tecan Ovation RRBS Methyl-Seq with TrueMethyl oxBS 1-16 kit (see below). + required: false + - name: --non_directional + type: boolean + description: Selecting this option for non-directional RRBS libraries will screen quality-trimmed sequences for 'CAA' or 'CGA' at the start of the read and, if found, removes the first two basepairs. Like with the option '--rrbs' this avoids using cytosine positions that were filled-in during the end-repair step. '--non_directional' requires '--rrbs' to be specified as well. Note that this option does not set '--clip_r2 2' in paired-end mode. + required: false + - name: --keep + type: boolean + description: Keep the quality trimmed intermediate file. + required: false + - name: Paired-end specific options + arguments: + - name: --paired + type: boolean + description: This option performs length trimming of quality/adapter/RRBS trimmed reads for paired-end files. To pass the validation test, both sequences of a sequence pair are required to have a certain minimum length which is governed by the option --length (see above). If only one read passes this length threshold the other read can be rescued (see option --retain_unpaired). Using this option lets you discard too short read pairs without disturbing the sequence-by-sequence order of FastQ files which is required by many aligners. Trim Galore expects paired-end files to be supplied in a pairwise fashion, e.g. file1_1.fq file1_2.fq SRR2_1.fq.gz SRR2_2.fq.gz ... . + required: false + - name: --retain_unpaired + type: boolean + description: If only one of the two paired-end reads became too short, the longer read will be written to either '.unpaired_1.fq' or '.unpaired_2.fq' output files. The length cutoff for unpaired single-end reads is governed by the parameters -r1/--length_1 and -r2/--length_2. + required: false + - name: --length_1 + alternatives: -r1 + type: integer + description: Unpaired single-end read length cutoff needed for read 1 to be written to '.unpaired_1.fq' output file. These reads may be mapped in single-end mode. + example: 35 + required: false + - name: --length_2 + alternatives: -r2 + type: integer + description: Unpaired single-end read length cutoff needed for read 2 to be written to '.unpaired_2.fq' output file. These reads may be mapped in single-end mode. + required: false + example: 35 + - name: Output + arguments: + - name: --output_dir + alternatives: -o + type: file + description: If specified all output will be written to this directory instead of the current directory. + direction: output + required: false + default: trimmed_output + - name: --trimmed_r1 + type: file + required: true + description: Output file for read 1. Only works when 1 file (single-end) or 2 files (paired-end) are specified, but not for longer lists. + direction: output + example: read_1.fastq + - name: --trimmed_r2 + type: file + required: false + description: Output file for read 2. Only works when 1 file (single-end) or 2 files (paired-end) are specified, but not for longer lists. + direction: output + example: read_2.fastq + - name: --trimming_report_r1 + type: file + description: Trimming report for read 1. Only works when 1 file (single-end) or 2 files (paired-end) are specified, but not for longer lists. + direction: output + example: read_1.trimming_report.txt + - name: --trimming_report_r2 + type: file + description: Trimming report for read 1. Only works when 1 file (single-end) or 2 files (paired-end) are specified, but not for longer lists. + direction: output + required: false + example: read_2.trimming_report.txt + - name: --trimmed_fastqc_html_1 + type: file + description: FastQC report for trimmed (single-end) reads (or read 1 for paired-end). Only works when 1 file (single-end) or 2 files (paired-end) are specified, but not for longer lists. + direction: output + example: read_1.fastqc.html + - name: --trimmed_fastqc_html_2 + type: file + description: FastQC report for trimmed reads (read2 for paired-end). Only works when 1 file (single-end) or 2 files (paired-end) are specified, but not for longer lists. + direction: output + required: false + example: read_2.fastqc.html + - name: --trimmed_fastqc_zip_1 + type: file + description: FastQC results for trimmed (single-end) reads (or read 1 for paired-end). Only works when 1 file (single-end) or 2 files (paired-end) are specified, but not for longer lists. + direction: output + example: read_1.fastqc.zip + - name: --trimmed_fastqc_zip_2 + type: file + description: FastQC results for trimmed reads (read2 for paired-end). Only works when 1 file (single-end) or 2 files (paired-end) are specified, but not for longer lists. + direction: output + required: false + example: read_2.fastqc.zip + - name: --unpaired_r1 + type: file + required: true + description: Output file for unpired read 1. Only works when 1 file (single-end) or 2 files (paired-end) are specified, but not for longer lists. + direction: output + example: unpaired_read_1.fastq + - name: --unpaired_r2 + type: file + required: true + description: Output file for unpaired read 2. Only works when 1 file (single-end) or 2 files (paired-end) are specified, but not for longer lists. + direction: output + example: unpaired_read_2.fastq + +resources: + - type: bash_script + path: script.sh + +test_resources: + - type: bash_script + path: test.sh + +engines: +- type: docker + image: quay.io/biocontainers/trim-galore:0.6.9--hdfd78af_0 + setup: + - type: docker + run: | + echo "TrimGalore: `trim_galore --version | grep -oP 'version \K\d+\.\d+\.\d+'`" > /var/software_versions.txt + +runners: + - type: executable + - type: nextflow \ No newline at end of file diff --git a/src/trimgalore/help.txt b/src/trimgalore/help.txt new file mode 100644 index 00000000..4bf38e99 --- /dev/null +++ b/src/trimgalore/help.txt @@ -0,0 +1,355 @@ + + USAGE: + +trim_galore [options] + + +-h/--help Print this help message and exits. + +-v/--version Print the version information and exits. + +-q/--quality Trim low-quality ends from reads in addition to adapter removal. For + RRBS samples, quality trimming will be performed first, and adapter + trimming is carried in a second round. Other files are quality and adapter + trimmed in a single pass. The algorithm is the same as the one used by BWA + (Subtract INT from all qualities; compute partial sums from all indices + to the end of the sequence; cut sequence at the index at which the sum is + minimal). Default Phred score: 20. + +--phred33 Instructs Cutadapt to use ASCII+33 quality scores as Phred scores + (Sanger/Illumina 1.9+ encoding) for quality trimming. Default: ON. + +--phred64 Instructs Cutadapt to use ASCII+64 quality scores as Phred scores + (Illumina 1.5 encoding) for quality trimming. + +--fastqc Run FastQC in the default mode on the FastQ file once trimming is complete. + +--fastqc_args "" Passes extra arguments to FastQC. If more than one argument is to be passed + to FastQC they must be in the form "arg1 arg2 etc.". An example would be: + --fastqc_args "--nogroup --outdir /home/". Passing extra arguments will + automatically invoke FastQC, so --fastqc does not have to be specified + separately. + +-a/--adapter Adapter sequence to be trimmed. If not specified explicitly, Trim Galore will + try to auto-detect whether the Illumina universal, Nextera transposase or Illumina + small RNA adapter sequence was used. Also see '--illumina', '--nextera' and + '--small_rna'. If no adapter can be detected within the first 1 million sequences + of the first file specified or if there is a tie between several adapter sequences, + Trim Galore defaults to '--illumina' (as long as the Illumina adapter was one of the + options, else '--nextera' is the default). A single base + may also be given as e.g. -a A{10}, to be expanded to -a AAAAAAAAAA. + + At a special request, multiple adapters can also be specified like so: + -a " AGCTCCCG -a TTTCATTATAT -a TTTATTCGGATTTAT" + -a2 " AGCTAGCG -a TCTCTTATAT -a TTTCGGATTTAT", or so: + -a "file:../multiple_adapters.fa" + -a2 "file:../different_adapters.fa" + Potentially in conjucntion with the parameter "-n 3" to trim all adapters. Please note + that this is NOT needed for standard trimming! + More Information here: https://github.com/FelixKrueger/TrimGalore/issues/86 + +-a2/--adapter2 Optional adapter sequence to be trimmed off read 2 of paired-end files. This + option requires '--paired' to be specified as well. If the libraries to be trimmed + are smallRNA then a2 will be set to the Illumina small RNA 5' adapter automatically + (GATCGTCGGACT). A single base may also be given as e.g. -a2 A{10}, to be expanded + to -a2 AAAAAAAAAA. + +--illumina Adapter sequence to be trimmed is the first 13bp of the Illumina universal adapter + 'AGATCGGAAGAGC' instead of the default auto-detection of adapter sequence. + +--stranded_illumina Adapter sequence to be trimmed is the first 13bp of the Illumina stranded mRNA or Total + RNA adapter 'ACTGTCTCTTATA' instead of the default auto-detection of adapter sequence. + Note that this sequence resembles the Nextera sequence with an additional A from A-tailing. + Please also see https://github.com/FelixKrueger/TrimGalore/issues/127 or + https://support.illumina.com/bulletins/2020/06/trimming-t-overhang-options-for-the-illumina-rna-library-prep-wo.html + for further information. This sequence is currently NOT included in the adapter auto-detection. + +--nextera Adapter sequence to be trimmed is the first 12bp of the Nextera adapter + 'CTGTCTCTTATA' instead of the default auto-detection of adapter sequence. + +--small_rna Adapter sequence to be trimmed is the first 12bp of the Illumina Small RNA 3' Adapter + 'TGGAATTCTCGG' instead of the default auto-detection of adapter sequence. Selecting + to trim smallRNA adapters will also lower the --length value to 18bp. If the smallRNA + libraries are paired-end then a2 will be set to the Illumina small RNA 5' adapter + automatically (GATCGTCGGACT) unless -a 2 had been defined explicitly. + +--consider_already_trimmed During adapter auto-detection, the limit set by allows the user to + set a threshold up to which the file is considered already adapter-trimmed. If no adapter + sequence exceeds this threshold, no additional adapter trimming will be performed (technically, + the adapter is set to '-a X'). Quality trimming is still performed as usual. + Default: NOT SELECTED (i.e. normal auto-detection precedence rules apply). + +--max_length Discard reads that are longer than bp after trimming. This is only advised for + smallRNA sequencing to remove non-small RNA sequences. + + +--stringency Overlap with adapter sequence required to trim a sequence. Defaults to a + very stringent setting of 1, i.e. even a single bp of overlapping sequence + will be trimmed off from the 3' end of any read. + +-e Maximum allowed error rate (no. of errors divided by the length of the matching + region) (default: 0.1) + +--gzip Compress the output file with GZIP. If the input files are GZIP-compressed + the output files will automatically be GZIP compressed as well. As of v0.2.8 the + compression will take place on the fly. + +--dont_gzip Output files won't be compressed with GZIP. This option overrides --gzip. + +--length Discard reads that became shorter than length INT because of either + quality or adapter trimming. A value of '0' effectively disables + this behaviour. Default: 20 bp. + + For paired-end files, both reads of a read-pair need to be longer than + bp to be printed out to validated paired-end files (see option --paired). + If only one read became too short there is the possibility of keeping such + unpaired single-end reads (see --retain_unpaired). Default pair-cutoff: 20 bp. + +--max_n COUNT The total number of Ns a read may contain before it will be removed altogether. + In a paired-end setting, either read exceeding this limit will result in the entire + pair being removed from the trimmed output files. If COUNT is a number between 0 and 1, + it is interpreted as a fraction of the read length. + +--trim-n Removes Ns from either side of the read. This option does currently not work in RRBS mode. + +-o/--output_dir If specified all output will be written to this directory instead of the current + directory. If the directory doesn't exist it will be created for you. + +--no_report_file If specified no report file will be generated. + +--suppress_warn If specified any output to STDOUT or STDERR will be suppressed. + +--clip_R1 Instructs Trim Galore to remove bp from the 5' end of read 1 (or single-end + reads). This may be useful if the qualities were very poor, or if there is some + sort of unwanted bias at the 5' end. Default: OFF. + +--clip_R2 Instructs Trim Galore to remove bp from the 5' end of read 2 (paired-end reads + only). This may be useful if the qualities were very poor, or if there is some sort + of unwanted bias at the 5' end. For paired-end BS-Seq, it is recommended to remove + the first few bp because the end-repair reaction may introduce a bias towards low + methylation. Please refer to the M-bias plot section in the Bismark User Guide for + some examples. Default: OFF. + +--three_prime_clip_R1 Instructs Trim Galore to remove bp from the 3' end of read 1 (or single-end + reads) AFTER adapter/quality trimming has been performed. This may remove some unwanted + bias from the 3' end that is not directly related to adapter sequence or basecall quality. + Default: OFF. + +--three_prime_clip_R2 Instructs Trim Galore to remove bp from the 3' end of read 2 AFTER + adapter/quality trimming has been performed. This may remove some unwanted bias from + the 3' end that is not directly related to adapter sequence or basecall quality. + Default: OFF. + +--2colour/--nextseq INT This enables the option '--nextseq-trim=3'CUTOFF' within Cutadapt, which will set a quality + cutoff (that is normally given with -q instead), but qualities of G bases are ignored. + This trimming is in common for the NextSeq- and NovaSeq-platforms, where basecalls without + any signal are called as high-quality G bases. This is mutually exlusive with '-q INT'. + + +--path_to_cutadapt You may use this option to specify a path to the Cutadapt executable, + e.g. /my/home/cutadapt-1.7.1/bin/cutadapt. Else it is assumed that Cutadapt is in + the PATH. + +--basename Use PREFERRED_NAME as the basename for output files, instead of deriving the filenames from + the input files. Single-end data would be called PREFERRED_NAME_trimmed.fq(.gz), or + PREFERRED_NAME_val_1.fq(.gz) and PREFERRED_NAME_val_2.fq(.gz) for paired-end data. --basename + only works when 1 file (single-end) or 2 files (paired-end) are specified, but not for longer lists. + +-j/--cores INT Number of cores to be used for trimming [default: 1]. For Cutadapt to work with multiple cores, it + requires Python 3 as well as parallel gzip (pigz) installed on the system. Trim Galore attempts to detect + the version of Python used by calling Cutadapt. If Python 2 is detected, --cores is set to 1. If the Python + version cannot be detected, Python 3 is assumed and we let Cutadapt handle potential issues itself. + + If pigz cannot be detected on your system, Trim Galore reverts to using gzip compression. Please note + that gzip compression will slow down multi-core processes so much that it is hardly worthwhile, please + see: https://github.com/FelixKrueger/TrimGalore/issues/16#issuecomment-458557103 for more info). + + Actual core usage: It should be mentioned that the actual number of cores used is a little convoluted. + Assuming that Python 3 is used and pigz is installed, --cores 2 would use 2 cores to read the input + (probably not at a high usage though), 2 cores to write to the output (at moderately high usage), and + 2 cores for Cutadapt itself + 2 additional cores for Cutadapt (not sure what they are used for) + 1 core + for Trim Galore itself. So this can be up to 9 cores, even though most of them won't be used at 100% for + most of the time. Paired-end processing uses twice as many cores for the validation (= writing out) step. + --cores 4 would then be: 4 (read) + 4 (write) + 4 (Cutadapt) + 2 (extra Cutadapt) + 1 (Trim Galore) = 15. + + It seems that --cores 4 could be a sweet spot, anything above has diminishing returns. + + + +SPECIFIC TRIMMING - without adapter/quality trimming + +--hardtrim5 Instead of performing adapter-/quality trimming, this option will simply hard-trim sequences + to bp at the 5'-end. Once hard-trimming of files is complete, Trim Galore will exit. + Hard-trimmed output files will end in ._5prime.fq(.gz). Here is an example: + + before: CCTAAGGAAACAAGTACACTCCACACATGCATAAAGGAAATCAAATGTTATTTTTAAGAAAATGGAAAAT + --hardtrim5 20: CCTAAGGAAACAAGTACACT + +--hardtrim3 Instead of performing adapter-/quality trimming, this option will simply hard-trim sequences + to bp at the 3'-end. Once hard-trimming of files is complete, Trim Galore will exit. + Hard-trimmed output files will end in ._3prime.fq(.gz). Here is an example: + + before: CCTAAGGAAACAAGTACACTCCACACATGCATAAAGGAAATCAAATGTTATTTTTAAGAAAATGGAAAAT + --hardtrim3 20: TTTTTAAGAAAATGGAAAAT + +--clock In this mode, reads are trimmed in a specific way that is currently used for the Mouse + Epigenetic Clock (see here: Multi-tissue DNA methylation age predictor in mouse, Stubbs et al., + Genome Biology, 2017 18:68 https://doi.org/10.1186/s13059-017-1203-5). Following this, Trim Galore + will exit. + + In it's current implementation, the dual-UMI RRBS reads come in the following format: + + Read 1 5' UUUUUUUU CAGTA FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF TACTG UUUUUUUU 3' + Read 2 3' UUUUUUUU GTCAT FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF ATGAC UUUUUUUU 5' + + Where UUUUUUUU is a random 8-mer unique molecular identifier (UMI), CAGTA is a constant region, + and FFFFFFF... is the actual RRBS-Fragment to be sequenced. The UMIs for Read 1 (R1) and + Read 2 (R2), as well as the fixed sequences (F1 or F2), are written into the read ID and + removed from the actual sequence. Here is an example: + + R1: @HWI-D00436:407:CCAETANXX:1:1101:4105:1905 1:N:0: CGATGTTT + ATCTAGTTCAGTACGGTGTTTTCGAATTAGAAAAATATGTATAGAGGAAATAGATATAAAGGCGTATTCGTTATTG + R2: @HWI-D00436:407:CCAETANXX:1:1101:4105:1905 3:N:0: CGATGTTT + CAATTTTGCAGTACAAAAATAATACCTCCTCTATTTATCCAAAATCACAAAAAACCACCCACTTAACTTTCCCTAA + + R1: @HWI-D00436:407:CCAETANXX:1:1101:4105:1905 1:N:0: CGATGTTT:R1:ATCTAGTT:R2:CAATTTTG:F1:CAGT:F2:CAGT + CGGTGTTTTCGAATTAGAAAAATATGTATAGAGGAAATAGATATAAAGGCGTATTCGTTATTG + R2: @HWI-D00436:407:CCAETANXX:1:1101:4105:1905 3:N:0: CGATGTTT:R1:ATCTAGTT:R2:CAATTTTG:F1:CAGT:F2:CAGT + CAAAAATAATACCTCCTCTATTTATCCAAAATCACAAAAAACCACCCACTTAACTTTCCCTAA + + Following clock trimming, the resulting files (.clock_UMI.R1.fq(.gz) and .clock_UMI.R2.fq(.gz)) + should be adapter- and quality trimmed with Trim Galore as usual. In addition, reads need to be trimmed + by 15bp from their 3' end to get rid of potential UMI and fixed sequences. The command is: + + trim_galore --paired --three_prime_clip_R1 15 --three_prime_clip_R2 15 *.clock_UMI.R1.fq.gz *.clock_UMI.R2.fq.gz + + Following this, reads should be aligned with Bismark and deduplicated with UmiBam + in '--dual_index' mode (see here: https://github.com/FelixKrueger/Umi-Grinder). UmiBam recognises + the UMIs within this pattern: R1:(ATCTAGTT):R2:(CAATTTTG): as (UMI R1) and (UMI R2). + +--polyA This is a new, still experimental, trimming mode to identify and remove poly-A tails from sequences. + When --polyA is selected, Trim Galore attempts to identify from the first supplied sample whether + sequences contain more often a stretch of either 'AAAAAAAAAA' or 'TTTTTTTTTT'. This determines + if Read 1 of a paired-end end file, or single-end files, are trimmed for PolyA or PolyT. In case of + paired-end sequencing, Read2 is trimmed for the complementary base from the start of the reads. The + auto-detection uses a default of A{20} for Read1 (3'-end trimming) and T{150} for Read2 (5'-end trimming). + These values may be changed manually using the options -a and -a2. + + In addition to trimming the sequences, white spaces are replaced with _ and it records in the read ID + how many bases were trimmed so it can later be used to identify PolyA trimmed sequences. This is currently done + by writing tags to both the start ("32:A:") and end ("_PolyA:32") of the reads in the following example: + + @READ-ID:1:1102:22039:36996 1:N:0:CCTAATCC + GCCTAAGGAAACAAGTACACTCCACACATGCATAAAGGAAATCAAATGTTATTTTTAAGAAAATGGAAAATAAAAACTTTATAAACACCAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA + + @32:A:READ-ID:1:1102:22039:36996_1:N:0:CCTAATCC_PolyA:32 + GCCTAAGGAAACAAGTACACTCCACACATGCATAAAGGAAATCAAATGTTATTTTTAAGAAAATGGAAAATAAAAACTTTATAAACACC + + PLEASE NOTE: The poly-A trimming mode expects that sequences were both adapter and quality trimmed + before looking for Poly-A tails, and it is the user's responsibility to carry out an initial round of + trimming. The following sequence: + + 1) trim_galore file.fastq.gz + 2) trim_galore --polyA file_trimmed.fq.gz + 3) zcat file_trimmed_trimmed.fq.gz | grep -A 3 PolyA | grep -v ^-- > PolyA_trimmed.fastq + + Will 1) trim qualities and Illumina adapter contamination, 2) find and remove PolyA contamination. + Finally, if desired, 3) will specifically find PolyA trimmed sequences to a specific FastQ file of your choice. + +--implicon This is a special mode of operation for paired-end data, such as required for the IMPLICON method, where a UMI sequence + is getting transferred from the start of Read 2 to the readID of both reads. Following this, Trim Galore will exit. + + In it's current implementation, the UMI carrying reads come in the following format: + + Read 1 5' FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF 3' + Read 2 3' UUUUUUUUFFFFFFFFFFFFFFFFFFFFFFFFFFFF 5' + + Where UUUUUUUU is a random 8-mer unique molecular identifier (UMI) and FFFFFFF... is the actual fragment to be + sequenced. The UMI of Read 2 (R2) is written into the read ID of both reads and removed from the actual sequence. + Here is an example: + + R1: @HWI-D00436:407:CCAETANXX:1:1101:4105:1905 1:N:0: CGATGTTT + ATCTAGTTCAGTACGGTGTTTTCGAATTAGAAAAATATGTATAGAGGAAATAGATATAAAGGCGTATTCGTTATTG + R2: @HWI-D00436:407:CCAETANXX:1:1101:4105:1905 3:N:0: CGATGTTT + CAATTTTGCAGTACAAAAATAATACCTCCTCTATTTATCCAAAATCACAAAAAACCACCCACTTAACTTTCCCTAA + + After --implicon trimming: + R1: @HWI-D00436:407:CCAETANXX:1:1101:4105:1905 1:N:0: CGATGTTT:CAATTTTG + ATCTAGTTCAGTACGGTGTTTTCGAATTAGAAAAATATGTATAGAGGAAATAGATATAAAGGCGTATTCGTTATTG + R2: @HWI-D00436:407:CCAETANXX:1:1101:4105:1905 3:N:0: CGATGTTT:CAATTTTG + CAGTACAAAAATAATACCTCCTCTATTTATCCAAAATCACAAAAAACCACCCACTTAACTTTCCCTAA + +RRBS-specific options (MspI digested material): + +--rrbs Specifies that the input file was an MspI digested RRBS sample (recognition + site: CCGG). Single-end or Read 1 sequences (paired-end) which were adapter-trimmed + will have a further 2 bp removed from their 3' end. Sequences which were merely + trimmed because of poor quality will not be shortened further. Read 2 of paired-end + libraries will in addition have the first 2 bp removed from the 5' end (by setting + '--clip_r2 2'). This is to avoid using artificial methylation calls from the filled-in + cytosine positions close to the 3' MspI site in sequenced fragments. + This option is not recommended for users of the Tecan Ovation RRBS Methyl-Seq with TrueMethyl + oxBS 1-16 kit (see below). + +--non_directional Selecting this option for non-directional RRBS libraries will screen + quality-trimmed sequences for 'CAA' or 'CGA' at the start of the read + and, if found, removes the first two basepairs. Like with the option + '--rrbs' this avoids using cytosine positions that were filled-in + during the end-repair step. '--non_directional' requires '--rrbs' to + be specified as well. Note that this option does not set '--clip_r2 2' in + paired-end mode. + +--keep Keep the quality trimmed intermediate file. Default: off, which means + the temporary file is being deleted after adapter trimming. Only has + an effect for RRBS samples since other FastQ files are not trimmed + for poor qualities separately. + + +Note for RRBS using the Tecan Ovation RRBS Methyl-Seq with TrueMethyl oxBS 1-16 kit: + +Owing to the fact that the Tecan Ovation RRBS kit attaches a varying number of nucleotides (0-3) after each MspI +site Trim Galore should be run WITHOUT the option --rrbs. This trimming is accomplished in a subsequent +diversity trimming step afterwards (see their manual). + + + +Note for RRBS using MseI: + +If your DNA material was digested with MseI (recognition motif: TTAA) instead of MspI it is NOT necessary +to specify --rrbs or --non_directional since virtually all reads should start with the sequence +'TAA', and this holds true for both directional and non-directional libraries. As the end-repair of 'TAA' +restricted sites does not involve any cytosines it does not need to be treated especially. Instead, simply +run Trim Galore! in the standard (i.e. non-RRBS) mode. + + + + +Paired-end specific options: + +--paired This option performs length trimming of quality/adapter/RRBS trimmed reads for + paired-end files. To pass the validation test, both sequences of a sequence pair + are required to have a certain minimum length which is governed by the option + --length (see above). If only one read passes this length threshold the + other read can be rescued (see option --retain_unpaired). Using this option lets + you discard too short read pairs without disturbing the sequence-by-sequence order + of FastQ files which is required by many aligners. + + Trim Galore! expects paired-end files to be supplied in a pairwise fashion, e.g. + file1_1.fq file1_2.fq SRR2_1.fq.gz SRR2_2.fq.gz ... . + + +--retain_unpaired If only one of the two paired-end reads became too short, the longer + read will be written to either '.unpaired_1.fq' or '.unpaired_2.fq' + output files. The length cutoff for unpaired single-end reads is + governed by the parameters -r1/--length_1 and -r2/--length_2. Default: OFF. + +-r1/--length_1 Unpaired single-end read length cutoff needed for read 1 to be written to + '.unpaired_1.fq' output file. These reads may be mapped in single-end mode. + Default: 35 bp. + +-r2/--length_2 Unpaired single-end read length cutoff needed for read 2 to be written to + '.unpaired_2.fq' output file. These reads may be mapped in single-end mode. + Default: 35 bp. + +Last modified on 02 02 2023. + diff --git a/src/trimgalore/script.sh b/src/trimgalore/script.sh new file mode 100755 index 00000000..cded9dc0 --- /dev/null +++ b/src/trimgalore/script.sh @@ -0,0 +1,89 @@ +#!/bin/bash + +set -eo pipefail + +[[ ! -d $output_dir ]] && mkdir -p $par_output_dir + +IFS=";" read -ra input <<< $par_input + +unset_if_false=( par_phred33 par_phred64 par_fastqc par_illumina par_stranded_illumina par_nextera par_small_rna par_gzip par_dont_gzip par_no_report_file par_suppress_warn par_clock par_polyA par_rrbs par_non_directional par_keep par_paired par_retain_unpaired ) + +for par in ${unset_if_false[@]}; do + test_val="${!par}" + [[ "$test_val" == "false" ]] && unset $par +done + +trim_galore \ + ${par_quality:+-q "${par_quality}"} \ + ${par_phred33:+--phred33} \ + ${par_phred64:+--phred64 } \ + ${par_fastqc:+--fastqc } \ + ${par_fastqc_args:+--fastqc_args "${par_fastqc_args}"} \ + ${par_adapter:+-a "${par_adapter}"} \ + ${par_adapter2:+-a2 "${par_adapter2}"} \ + ${par_illumina:+--illumina} \ + ${par_stranded_illumina:+--stranded_illumina} \ + ${par_nextera:+--nextera} \ + ${par_small_rna:+--small_rna} \ + ${par_consider_already_trimmed:+--consider_already_trimmed "${par_consider_already_trimmed}"} \ + ${par_max_length:+--max_length "${par_max_length}"} \ + ${par_stringency:+--stringency "${par_stringency}"} \ + ${par_error_rate:+-e "${par_error_rate}"} \ + ${par_gzip:+--gzip} \ + ${par_dont_gzip:+--dont_gzip} \ + ${par_length:+--length "${par_length}"} \ + ${par_max_n:+--max_n "${par_max_n}"} \ + ${par_trim_n:+--trim-n "${par_trim_n}"} \ + ${par_no_report_file:+--no_report_file} \ + ${par_suppress_warn:+--suppress_warn} \ + ${par_clip_R1:+--clip_R1 "${par_clip_R1}"} \ + ${par_clip_R2:+--clip_R2 "${par_clip_R2}"} \ + ${par_three_prime_clip_R1:+--three_prime_clip_R1 "${par_three_prime_clip_R1}"} \ + ${par_three_prime_clip_R2:+--three_prime_clip_R2 "${par_three_prime_clip_R2}"} \ + ${par_nextseq:+--nextseq "${par_nextseq}"} \ + ${par_basename:+-basename "${par_basename}"} \ + ${par_hardtrim5:+--hardtrim5 "${par_hardtrim5}"} \ + ${par_hardtrim3:+--hardtrim3 "${par_hardtrim3}"} \ + ${par_clock:+--clock} \ + ${par_polyA:+--polyA} \ + ${par_implicon:+--implicon "${par_implicon}"} \ + ${par_rrbs:+--rrbs} \ + ${par_non_directional:+--non_directional} \ + ${par_keep:+--keep} \ + ${par_paired:+--paired} \ + ${par_retain_unpaired:+--retain_unpaired} \ + ${par_length_1:+-r1 "${par_length_1}"} \ + ${par_length_2:+-r2 "${par_length_2}"} \ + ${par_cores:+-j "${par_cores}"} \ + -o $par_output_dir \ + ${input[*]} + +# Split output directory to separate files +[[ ! -z "$par_trimmed_r1" ]] && mv "$par_output_dir/output.txt" "$trimmed_r1" +[[ ! -z "$par_trimmed_r2" ]] && mv "$par_output_dir/output.txt.summary" "$par_trimmed_r2" +[[ ! -z "$par_trimming_report_r1" ]] && mv "$par_output_dir/output.txt" "$par_trimming_report_r1" +[[ ! -z "$par_trimming_report_r2" ]] && mv "$par_output_dir/output.txt.summary" "$par_trimming_report_r2" +[[ ! -z "$par_trimmed_fastqc_html_1" ]] && mv "$par_output_dir/output.txt" "$par_trimmed_fastqc_html_1" +[[ ! -z "$par_trimmed_fastqc_html_2" ]] && mv "$par_output_dir/output.txt.summary" "$par_trimmed_fastqc_html_2" +[[ ! -z "$par_trimmed_fastqc_zip_1" ]] && mv "$par_output_dir/output.txt" "$par_trimmed_fastqc_zip_1" +[[ ! -z "$par_trimmed_fastqc_zip_2" ]] && mv "$par_output_dir/output.txt.summary" "$par_trimmed_fastqc_zip_2" + +if [ $par_paired == "true" ]; then + input_r1=$(basename -- "${input[0]}") + input_r2=$(basename -- "${input[1]}") + [[ ! -z "$par_trimmed_r1" ]] && mv "$par_output_dir/*val_1*.f*q.*" "$par_trimmed_r1" + [[ ! -z "$par_trimmed_r2" ]] && mv "$par_output_dir/*val_2*.f*q.*" "$par_trimmed_r2" + [[ ! -z "$par_trimming_report_r1" ]] && mv "$par_output_dir/${input_r1}_trimming_report.txt" "$par_trimming_report_r1" + [[ ! -z "$par_trimming_report_r2" ]] && mv "$par_output_dir/${input_r1}_trimming_report.txt" "$par_trimming_report_r2" + [[ ! -z "$par_trimmed_fastqc_html_1" ]] && mv "$par_output_dir/*val_1*.html" "$par_trimmed_fastqc_html_1" + [[ ! -z "$par_trimmed_fastqc_html_2" ]] && mv "$par_output_dir/*val_2*.html" "$par_trimmed_fastqc_html_2" + [[ ! -z "$par_trimmed_fastqc_zip_1" ]] && mv "$par_output_dir/*val_1*.zip" "$par_trimmed_fastqc_zip_1" + [[ ! -z "$par_trimmed_fastqc_zip_2" ]] && mv "$par_output_dir/*val_2*.zip" "$par_trimmed_fastqc_zip_2" + [[ ! -z "$par_unpaired_r1" ]] && mv "$par_output_dir/*.unpaired_1.f*q.*" "$par_unpaired_r1" + [[ ! -z "$par_unpaired_r2" ]] && mv "$par_output_dir/*.unpaired_2.f*q.*" "$par_unpaired_r2" +else + input_r1=$(basename -- "${input[0]}") + [[ ! -z "$par_trimmed_r1" ]] && mv "$par_output_dir/*_trimmed.fq*" "$trimmed_r1" + [[ ! -z "$par_trimming_report_r1" ]] && mv "$par_output_dir/${input_r1}_trimming_report.txt" "$par_trimming_report_r1" + [[ ! -z "$par_trimmed_fastqc_html_1" ]] && mv "$par_output_dir/*_trimmed_fastqc.html" "$par_trimmed_fastqc_html_1" + [[ ! -z "$par_trimmed_fastqc_zip_1" ]] && mv "$par_output_dir/*_trimmed_fastqc.zip" "$par_trimmed_fastqc_zip_1" diff --git a/src/trimgalore/test.sh b/src/trimgalore/test.sh new file mode 100644 index 00000000..d5e3d8e7 --- /dev/null +++ b/src/trimgalore/test.sh @@ -0,0 +1,128 @@ +#!/bin/bash + +set -e +set -eo pipefail + +# helper functions +assert_file_exists() { + [ -f "$1" ] || { echo "File '$1' does not exist" && exit 1; } +} +assert_file_doesnt_exist() { + [ ! -f "$1" ] || { echo "File '$1' exists but shouldn't" && exit 1; } +} +assert_file_empty() { + [ ! -s "$1" ] || { echo "File '$1' is not empty but should be" && exit 1; } +} +assert_file_not_empty() { + [ -s "$1" ] || { echo "File '$1' is empty but shouldn't be" && exit 1; } +} +assert_file_contains() { + grep -q "$2" "$1" || { echo "File '$1' does not contain '$2'" && exit 1; } +} +assert_file_contains_line() { + grep -q -x "$2" "$1" || { echo "File '$1' does not contain line '$2'" && exit 1; } +} +assert_file_not_contains() { + grep -q "$2" "$1" && { echo "File '$1' contains '$2' but shouldn't" && exit 1; } +} + +################################################################# + +echo ">>> Prepare test data" + +cat > example_R1.fastq <<'EOF' +@read1 +ACGTACGTACGTAAAAA ++ +IIIIIIIIIIIIIIIII +@read2 +ACGTACGTACGTCCCCC ++ +IIIIIIIIIIIIIIIII +EOF + +cat > example_R2.fastq <<'EOF' +@read1 +ACGTACGTACGTGGGGG ++ +IIIIIIIIIIIIIIIII +@read2 +ACGTACGTACGTTTTTT ++ +IIIIIIIIIIIIIIIII +EOF + +################################################################# + +echo ">>> Testing for paired-end reads" +"$meta_executable" \ + --paired true \ + --input "example_R1.fastq;example_R2.fastq" \ + --adapter "ACG" \ + --trim_html_1 example_R1.trimmed.html \ + --trim_html_2 example_R2.trimmed.html \ + --trim_zip_1 example_R1.trimmed.zip \ + --trim_zip_2 example_R2.trimmed.zip \ + --fastq_1 example_R1.trimmed.fastq \ + --fastq_2 example_R2.trimmed.fastq \ + --trim_log_1 example_R1.trimming_report.txt \ + --trim_log_2 example_R2.trimming_report.txt + +echo ">> Checking output" +assert_file_exists "example_R1.trimmed.html" +assert_file_exists "example_R2.trimmed.html" +assert_file_exists "example_R1.trimmed.zip" +assert_file_exists "example_R2.trimmed.zip" +assert_file_exists "example_R1.trimmed.fastq" +assert_file_exists "example_R2.trimmed.fastq" +assert_file_exists "example_R1.trimming_report.txt" +assert_file_exists "example_R2.trimming_report.txt" + +echo ">> Check if output is empty" +assert_file_not_empty "example_R1.trimmed.html" +assert_file_not_empty "example_R2.trimmed.html" +assert_file_not_empty "example_R1.trimmed.zip" +assert_file_not_empty "example_R2.trimmed.zip" +assert_file_not_empty "example_R1.trimmed.fastq" +assert_file_not_empty "example_R2.trimmed.fastq" +assert_file_not_empty "example_R1.trimming_report.txt" +assert_file_not_empty "example_R2.trimming_report.txt" + +echo ">> Check contents" +assert_file_contains_line "example_R1.trimmed.fastq" "TACGTACGTAAAAA" +assert_file_contains_line "example_R2.trimmed.fastq" "TACGTACGTGGGGG" +assert_file_contains "example_R1.trimming_report.txt" "sequences processed in total" +assert_file_contains "example_R2.trimming_report.txt" "Number of sequence pairs removed because at least one read was shorter than the length cutoff" + +################################################################# + +echo ">>> Testing for single-end reads" +"$meta_executable" \ + --paired false \ + --input "example_R1.fastq" \ + --adapter "ACG" \ + --trim_html_1 example.trimmed.html \ + --trim_zip_1 example.trimmed.zip \ + --fastq_1 example.trimmed.fastq \ + --trim_log_1 example.trimming_report.txt \ + +echo ">> Checking output" +assert_file_exists "example.trimmed.html" +assert_file_exists "example.trimmed.zip" +assert_file_exists "example.trimmed.fastq" +assert_file_exists "example.trimming_report.txt" + +echo ">> Check if output is empty" +assert_file_not_empty "example.trimmed.html" +assert_file_not_empty "example.trimmed.zip" +assert_file_not_empty "example.trimmed.fastq" +assert_file_not_empty "example.trimming_report.txt" + +echo ">> Check contents" +assert_file_contains_line "example.trimmed.fastq" "TACGTACGTAAAAA" +assert_file_contains "example.trimming_report.txt" "Sequences removed because they became shorter than the length cutoff" + +################################################################# + +echo ">>> Test finished successfully" +exit 0