diff --git a/.gitignore b/.gitignore
new file mode 100644
index 00000000..ca5262bc
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,17 @@
+*.DS_Store
+*__pycache__
+
+# IDE ignores
+.idea/
+
+# R specific ignores
+.Rhistory
+.Rproj.user
+*.Rproj
+
+# viash specific ignores
+target/
+
+# nextflow specific ignores
+.nextflow*
+work
diff --git a/CHANGELOG.md b/CHANGELOG.md
index d2610ff9..63f69365 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -6,6 +6,8 @@
 
 * `arriba`: Detect gene fusions from RNA-seq data (PR #1).
 
+* `fastp`: An ultra-fast all-in-one FASTQ preprocessor (PR #3).
+
 ## MAJOR CHANGES
 
 ## MINOR CHANGES
diff --git a/src/arriba/test_data/README.md b/src/arriba/test_data/README.md
deleted file mode 100644
index 8cfb28f6..00000000
--- a/src/arriba/test_data/README.md
+++ /dev/null
@@ -1,13 +0,0 @@
-# arriba test data
-
-Test data was obtained from https://github.com/snakemake/snakemake-wrappers/tree/master/bio/arriba/test.
-
-__author__ = "Jan Forster"
-__copyright__ = "Copyright 2019, Jan Forster"
-__email__ = "j.forster@dkfz.de"
-__license__ = "MIT"
-
-```bash
-git clone --depth 1 --single-branch --branch master https://github.com/snakemake/snakemake-wrappers /tmp/snakemake-wrappers
-cp -r /tmp/snakemake-wrappers/bio/arriba/test/* src/arriba/test_data
-```
\ No newline at end of file
diff --git a/src/arriba/test_data/script.sh b/src/arriba/test_data/script.sh
new file mode 100755
index 00000000..e5bbcf2c
--- /dev/null
+++ b/src/arriba/test_data/script.sh
@@ -0,0 +1,10 @@
+# arriba test data
+
+# Test data was obtained from https://github.com/snakemake/snakemake-wrappers/tree/master/bio/arriba/test
+
+if [ ! -d /tmp/snakemake-wrappers ]; then
+  git clone --depth 1 --single-branch --branch master https://github.com/snakemake/snakemake-wrappers /tmp/snakemake-wrappers
+fi
+
+cp -r /tmp/snakemake-wrappers/bio/arriba/test/* src/arriba/test_data
+
diff --git a/src/fastp/config.vsh.yaml b/src/fastp/config.vsh.yaml
new file mode 100644
index 00000000..c513ec92
--- /dev/null
+++ b/src/fastp/config.vsh.yaml
@@ -0,0 +1,574 @@
+functionality:
+  name: fastp
+  description: |
+    An ultra-fast all-in-one FASTQ preprocessor (QC/adapters/trimming/filtering/splitting/merging...).
+
+    Features:
+
+      - comprehensive quality profiling for both before and after filtering data (quality curves, base contents, KMER, Q20/Q30, GC Ratio, duplication, adapter contents...)
+      - filter out bad reads (too low quality, too short, or too many N...)
+      - cut low quality bases for per read in its 5' and 3' by evaluating the mean quality from a sliding window (like Trimmomatic but faster).
+      - trim all reads in front and tail
+      - cut adapters. Adapter sequences can be automatically detected, which means you don't have to input the adapter sequences to trim them.
+      - correct mismatched base pairs in overlapped regions of paired end reads, if one base is with high quality while the other is with ultra low quality
+      - trim polyG in 3' ends, which is commonly seen in NovaSeq/NextSeq data. Trim polyX in 3' ends to remove unwanted polyX tailing (i.e. polyA tailing for mRNA-Seq data)
+      - preprocess unique molecular identifier (UMI) enabled data, shift UMI to sequence name.
+      - report JSON format result for further interpreting.
+      - visualize quality control and filtering results on a single HTML page (like FASTQC but faster and more informative).
+      - split the output to multiple files (0001.R1.gz, 0002.R1.gz...) to support parallel processing. Two modes can be used, limiting the total split file number, or limitting the lines of each split file.
+      - support long reads (data from PacBio / Nanopore devices).
+      - support reading from STDIN and writing to STDOUT
+      - support interleaved input
+      - support ultra-fast FASTQ-level deduplication
+  info:
+    keywords: [RNA-Seq, Trimming, Quality control]
+    repository: https://github.com/OpenGene/fastp
+    documentation: https://github.com/OpenGene/fastp/blob/master/README.md
+    reference: "doi:10.1093/bioinformatics/bty560"
+    licence: MIT
+  argument_groups:
+    - name: Inputs
+      description: |
+        `fastp` supports both single-end (SE) and paired-end (PE) input.
+
+        - for SE data, you only have to specify read1 input by `-i` or `--in1`.
+        - for PE data, you should also specify read2 input by `-I` or `--in2`.
+      arguments:
+        - name: --in1
+          alternatives: [-i]
+          type: file
+          description: Input FastQ file. Must be single-end or paired-end R1. Can be gzipped.
+          required: true
+          example: in.R1.fq.gz
+        - name: --in2
+          alternatives: [-I]
+          type: file
+          description: Input FastQ file. Must be paired-end R2. Can be gzipped.
+          required: false
+          example: in.R2.fq.gz
+    - name: Outputs
+      description: |
+
+        - for SE data, you only have to specify read1 output by `-o` or `--out1`.
+        - for PE data, you should also specify read2 output by `-O` or `--out2`.
+        - if you don't specify the output file names, no output files will be written, but the QC will still be done for both data before and after filtering.
+        - the output will be gzip-compressed if its file name ends with `.gz`
+      arguments:
+        - name: --out1
+          alternatives: [-o]
+          type: file
+          description: The single-end or paired-end R1 reads that pass QC. Will be gzipped if its file name ends with `.gz`.
+          required: true
+          example: out.R1.fq.gz
+          direction: output
+        - name: --out2
+          alternatives: [-O]
+          type: file
+          description: The paired-end R2 reads that pass QC. Will be gzipped if its file name ends with `.gz`.
+          required: false
+          example: out.R2.fq.gz
+          direction: output
+        - name: --unpaired1
+          type: file
+          description: Store the reads that `read1` passes filters but its paired `read2` doesn't.
+          required: false
+          example: unpaired.R1.fq.gz
+          direction: output
+        - name: --unpaired2
+          type: file
+          description: Store the reads that `read2` passes filters but its paired `read1` doesn't.
+          required: false
+          example: unpaired.R2.fq.gz
+          direction: output
+        - name: --failed_out
+          type: file
+          description: |
+            Store the reads that fail filters.
+
+            If one read failed and is written to --failed_out, its failure reason will be appended to its read name. For example, failed_quality_filter, failed_too_short etc.
+            For PE data, if unpaired reads are not stored (by giving --unpaired1 or --unpaired2), the failed pair of reads will be put together. If one read passes the filters but its pair doesn't, the failure reason will be paired_read_is_failing.
+          required: false
+          example: failed.fq.gz
+          direction: output
+        - name: --overlapped_out
+          type: file
+          description: |
+            For each read pair, output the overlapped region if it has no any mismatched base.
+          direction: output
+    - name: Report output arguments
+      arguments:
+        - name: --json
+          alternatives: [-j]
+          type: file
+          description: |
+            The json format report file name
+          example: out.json
+          direction: output
+        - name: --html
+          type: file
+          description: |
+            The html format report file name
+          example: out.html
+          direction: output
+        - name: --report_title
+          type: string
+          description: |
+            The title of the html report, default is "fastp report".
+          example: fastp report
+    - name: Adapter trimming
+      description: |
+        Adapter trimming is enabled by default, but you can disable it by `-A` or `--disable_adapter_trimming`. Adapter sequences can be automatically detected for both PE/SE data.
+
+        - For SE data, the adapters are evaluated by analyzing the tails of first ~1M reads. This evaluation may be inacurrate, and you can specify the adapter sequence by `-a` or `--adapter_sequence` option. If adapter sequence is specified, the auto detection for SE data will be disabled.
+        - For PE data, the adapters can be detected by per-read overlap analysis, which seeks for the overlap of each pair of reads. This method is robust and fast, so normally you don't have to input the adapter sequence even you know it. But you can still specify the adapter sequences for read1 by `--adapter_sequence`, and for read2 by `--adapter_sequence_r2`. If `fastp` fails to find an overlap (i.e. due to low quality bases), it will use these sequences to trim adapters for read1 and read2 respectively.
+        - For PE data, the adapter sequence auto-detection is disabled by default since the adapters can be trimmed by overlap analysis. However, you can specify `--detect_adapter_for_pe` to enable it.
+        - For PE data, `fastp` will run a little slower if you specify the sequence adapters or enable adapter auto-detection, but usually result in a slightly cleaner output, since the overlap analysis may fail due to sequencing errors or adapter dimers.
+        - The most widely used adapter is the Illumina TruSeq adapters. If your data is from the TruSeq library, you can add `--adapter_sequence=AGATCGGAAGAGCACACGTCTGAACTCCAGTCA --adapter_sequence_r2=AGATCGGAAGAGCGTCGTGTAGGGAAAGAGTGT` to your command lines, or enable auto detection for PE data by specifing `detect_adapter_for_pe`.
+        - `fastp` contains some built-in known adapter sequences for better auto-detection. If you want to make some adapters to be a part of the built-in adapters, please file an issue.
+
+        You can also specify --adapter_fasta to give a FASTA file to tell fastp to trim multiple adapters in this FASTA file. Here is a sample of such adapter FASTA file:
+
+        ```
+        >Illumina TruSeq Adapter Read 1
+        AGATCGGAAGAGCACACGTCTGAACTCCAGTCA
+        >Illumina TruSeq Adapter Read 2
+        AGATCGGAAGAGCGTCGTGTAGGGAAAGAGTGT
+        >polyA
+        AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA
+        ```
+
+        The adapter sequence in this file should be at least 6bp long, otherwise it will be skipped. And you can give whatever you want to trim, rather than regular sequencing adapters (i.e. polyA).
+
+        `fastp` first trims the auto-detected adapter or the adapter sequences given by `--adapter_sequence | --adapter_sequence_r2`, then trims the adapters given by `--adapter_fasta` one by one.
+
+        The sequence distribution of trimmed adapters can be found at the HTML/JSON reports.
+      arguments:
+        - name: --disable_adapter_trimming
+          alternatives: [-A]
+          type: boolean_true
+          description: |
+            Disable adapter trimming.
+        - name: --detect_adapter_for_pe
+          type: boolean_true
+          description: |
+            By default, the auto-detection for adapter is for SE data input only, turn on this option to enable it for PE data.
+        - name: --adapter_sequence
+          alternatives: [-a]
+          type: string
+          description: |
+            The adapter sequences to be trimmed. For SE data, if not specified, the adapters will be auto-detected. For PE data, this is used if R1/R2 are found not overlapped
+        - name: --adapter_sequence_r2
+          type: string
+          description: |
+            The adapter sequences to be trimmed for R2. This is used for PE data if R1/R2 are found overlapped.
+        - name: --adapter_fasta
+          type: file
+          description: |
+            A FASTA file containing all the adapter sequences to be trimmed. For SE data, if not specified, the adapters will be auto-detected. For PE data, this is used if R1/R2 are found not overlapped.
+    - name: Base trimming
+      arguments:
+        - name: --trim_front1
+          alternatives: [-f]
+          type: integer
+          description: |
+            Trimming how many bases in front for read1, default is 0.
+          example: 0
+        - name: --trim_tail1
+          alternatives: [-t]
+          type: integer
+          description: |
+            Trimming how many bases in tail for read1, default is 0.
+          example: 0
+        - name: --max_len1
+          alternatives: [-b]
+          type: integer
+          min: 0
+          description: |
+            If read1 is longer than max_len1, then trim read1 at its tail to make it as long as max_len1. Default 0 means no limitation.
+        - name: --trim_front2
+          alternatives: [-F]
+          type: integer
+          description: |
+            Trimming how many bases in front for read2, default is 0.
+          example: 0
+        - name: --trim_tail2
+          alternatives: [-T]
+          type: integer
+          description: |
+            Trimming how many bases in tail for read2, default is 0.
+          example: 0
+        - name: --max_len2
+          alternatives: [-B]
+          type: integer
+          min: 0
+          description: |
+            If read2 is longer than max_len2, then trim read2 at its tail to make it as long as max_len2. Default 0 means no limitation.
+    - name: Merging mode
+      description: Allows merging paired-end reads into a single longer read if they are overlapping.
+      arguments:
+        - name: --merge
+          alternatives: [-m]
+          type: boolean_true
+          description: |
+            For paired-end input, merge each pair of reads into a single read if they are overlapped. The merged reads will be written to the file given by --merged_out, the unmerged reads will be written to the files specified by --out1 and --out2. The merging mode is disabled by default.
+        - name: --merged_out
+          type: file
+          description: |
+            In the merging mode, specify the file name to store merged output, or specify --stdout to stream the merged output.
+          direction: output
+          example: merged.fq.gz
+        - name: --include_unmerged
+          type: boolean_true
+          description: |
+            In the merging mode, write the unmerged or unpaired reads to the file specified by --merge. Disabled by default.
+    - name: Additional input arguments
+      description: Affects how the input is read.
+      arguments:
+        - name: --interleaved_in
+          type: boolean_true
+          description: |
+            Indicate that <in1> is an interleaved FASTQ which contains both read1 and read2. Disabled by default.
+        - name: --fix_mgi_id
+          type: boolean_true
+          description: |
+            The MGI FASTQ ID format is not compatible with many BAM operation tools, enable this option to fix it.
+        - name: --phred64
+          alternatives: ["-6"]
+          type: boolean_true
+          description: |
+            Indicate the input is using phred64 scoring (it'll be converted to phred33, so the output will still be phred33)
+    - name: Additional output arguments
+      description: Affects how the output is written.
+      arguments:
+        - name: --compression
+          alternatives: ["-z"]
+          type: integer
+          description: |
+            Compression level for gzip output (1 ~ 9). 1 is fastest, 9 is smallest, default is 4.
+          example: 4
+          min: 1
+          max: 9
+        - name: --dont_overwrite
+          type: boolean_true
+          description: |
+            Don't overwrite existing files. Overwritting is allowed by default.
+    - name: Logging arguments
+      arguments:
+        - name: --verbose
+          alternatives: [-V]
+          type: boolean_true
+          description: Output verbose log information (i.e. when every 1M reads are processed).
+    - name: Processing arguments
+      arguments:
+        - name: --reads_to_process
+          type: long
+          description: |
+            Specify how many reads/pairs to be processed. Default 0 means process all reads.
+          example: 1000000
+          min: 0
+    - name: Deduplication arguments
+      arguments:
+        - name: --dedup
+          type: boolean_true
+          description: |
+            Enable deduplication to drop the duplicated reads/pairs
+        - name: --dup_calc_accuracy
+          type: integer
+          description: |
+            Accuracy level to calculate duplication (1~6). Higher level uses more memory (1G, 2G, 4G, 8G, 16G, 24G). Default 1 for no-dedup mode, and 3 for dedup mode.
+          example: 3
+          min: 1
+          max: 6
+        - name: --dont_eval_duplication
+          type: boolean_true
+          description: |
+            Don't evaluate duplication rate to save time and use less memory.
+    - name: PolyG tail trimming arguments
+      arguments:
+        - name: --trim_poly_g
+          alternatives: [-g]
+          type: boolean_true
+          description: |
+            Force polyG tail trimming, by default trimming is automatically enabled for Illumina NextSeq/NovaSeq data
+        - name: --poly_g_min_len
+          type: integer
+          description: |
+            The minimum length to detect polyG in the read tail. 10 by default.
+          example: 10
+          min: 1
+        - name: --disable_trim_poly_g
+          alternatives: [-G]
+          type: boolean_true
+          description: |
+            Disable polyG tail trimming, by default trimming is automatically enabled for Illumina NextSeq/NovaSeq data
+    - name: PolyX tail trimming arguments
+      arguments:
+        - name: --trim_poly_x
+          alternatives: [-x]
+          type: boolean_true
+          description: |
+            Enable polyX trimming in 3' ends.
+        - name: --poly_x_min_len
+          type: integer
+          description: |
+            The minimum length to detect polyX in the read tail. 10 by default.
+          example: 10
+          min: 1
+    - name: Cut arguments
+      arguments:
+        - name: --cut_front
+          alternatives: ["-5"]
+          type: integer
+          description: |
+            Move a sliding window from front (5') to tail, drop the bases in the window if its mean quality < threshold, stop otherwise.
+        - name: --cut_tail
+          alternatives: ["-3"]
+          type: integer
+          description: |
+            Move a sliding window from tail (3') to front, drop the bases in the window if its mean quality < threshold, stop otherwise.
+        - name: --cut_right
+          alternatives: ["-r"]
+          type: integer
+          description: |
+            Move a sliding window from front to tail, if meet one window with mean quality < threshold, drop the bases in the window and the right part, and then stop.
+        - name: --cut_window_size
+          alternatives: ["-W"]
+          type: integer
+          description: |
+            The window size option shared by cut_front, cut_tail or cut_sliding. Range: 1~1000, default: 4.
+          example: 4
+          min: 1
+        - name: --cut_mean_quality
+          alternatives: ["-M"]
+          type: integer
+          description: |
+            The mean quality requirement option shared by cut_front, cut_tail or cut_sliding. Range: 1~36 default: 20 (Q20)
+          example: 20
+          min: 0
+        - name: --cut_front_window_size
+          type: integer
+          description: |
+            The window size option of cut_front, default to cut_window_size if not specified.
+          example: 4
+          min: 1
+        - name: --cut_front_mean_quality
+          type: integer
+          description: |
+            The mean quality requirement option of cut_front, default to cut_mean_quality if not specified.
+          example: 20
+          min: 0
+        - name: --cut_tail_window_size
+          type: integer
+          description: |
+            The window size option of cut_tail, default to cut_window_size if not specified.
+          example: 4
+          min: 1
+        - name: --cut_tail_mean_quality
+          type: integer
+          description: |
+            The mean quality requirement option of cut_tail, default to cut_mean_quality if not specified.
+          example: 20
+          min: 0
+        - name: --cut_right_window_size
+          type: integer
+          description: |
+            The window size option of cut_right, default to cut_window_size if not specified.
+          example: 4
+          min: 1
+        - name: --cut_right_mean_quality
+          type: integer
+          description: |
+            The mean quality requirement option of cut_right, default to cut_mean_quality if not specified.
+          example: 20
+          min: 0
+    - name: Quality filtering arguments
+      arguments:
+        - name: --disable_quality_filtering
+          alternatives: [-Q]
+          type: boolean_true
+          description: |
+            Quality filtering is enabled by default. If this option is specified, quality filtering is disabled.
+        - name: --qualified_quality_phred
+          alternatives: [-q]
+          type: integer
+          description: |
+            The quality value that a base is qualified. Default 15 means phred quality >=Q15 is qualified.
+          example: 15
+          min: 0
+        - name: --unqualified_percent_limit
+          alternatives: [-u]
+          type: integer
+          description: |
+            How many percents of bases are allowed to be unqualified (0~100). Default 40 means 40%.
+          example: 40
+          min: 0
+          max: 100
+        - name: --n_base_limit
+          alternatives: [-n]
+          type: integer
+          description: |
+            If one read's number of N base is >n_base_limit, then this read/pair is discarded. Default is 5.
+          example: 5
+          min: 0
+        - name: --average_qual
+          alternatives: [-e]
+          type: integer
+          description: |
+            If one read's average quality score <avg_qual, then this read/pair is discarded. Default 0 means no requirement.
+          example: 0
+          min: 0
+    - name: Length filtering arguments
+      arguments:
+        - name: --disable_length_filtering
+          alternatives: [-L]
+          type: boolean_true
+          description: |
+            Length filtering is enabled by default. If this option is specified, length filtering is disabled.
+        - name: --length_required
+          alternatives: [-l]
+          type: integer
+          description: |
+            Reads shorter than length_required will be discarded, default is 15.
+          example: 15
+          min: 0
+        - name: --length_limit
+          type: integer
+          description: |
+            Reads longer than length_limit will be discarded, default 0 means no limitation.
+          example: 0
+          min: 0
+    - name: Low complexity filtering arguments
+      arguments:
+        - name: --low_complexity_filter
+          alternatives: [-y]
+          type: boolean_true
+          description: |
+            Enable low complexity filter. The complexity is defined as the percentage of base that is different from its next base (base[i] != base[i+1]).
+        - name: --complexity_threshold
+          alternatives: [-Y]
+          type: integer
+          description: |
+            The threshold for low complexity filter (0~100). Default is 30, which means 30% complexity is required.
+          example: 30
+          min: 0
+    - name: Index filtering arguments
+      arguments:
+        - name: --filter_by_index1
+          type: file
+          description: |
+            Specify a file contains a list of barcodes of index1 to be filtered out, one barcode per line.
+        - name: --filter_by_index2
+          type: file
+          description: |
+            Specify a file contains a list of barcodes of index2 to be filtered out, one barcode per line.
+        - name: --filter_by_index_threshold
+          type: integer
+          description: |
+            The allowed difference of index barcode for index filtering, default 0 means completely identical.
+          example: 0
+          min: 0
+    - name: Overlapped region correction
+      arguments:
+        - type: boolean_true
+          name: --correction
+          alternatives: [-c]
+          description: |
+            Enable base correction in overlapped regions (only for PE data), default is disabled.
+        - name: --overlap_len_require
+          type: integer
+          description: |
+            The minimum length to detect overlapped region of PE reads. This will affect overlap analysis based PE merge, adapter trimming and correction. 30 by default.
+          example: 30
+          min: 0
+        - name: --overlap_diff_limit
+          type: integer
+          description: |
+            The maximum number of mismatched bases to detect overlapped region of PE reads. This will affect overlap analysis based PE merge, adapter trimming and correction. 5 by default.
+          example: 5
+          min: 0
+        - name: --overlap_diff_percent_limit
+          type: integer
+          description: |
+            The maximum percentage of mismatched bases to detect overlapped region of PE reads. This will affect overlap analysis based PE merge, adapter trimming and correction. Default 20 means 20%.
+          example: 20
+          min: 0
+          max: 100
+    - name: UMI arguments
+      arguments:
+        - name: --umi
+          alternatives: [-U]
+          type: boolean_true
+          description: |
+            Enable unique molecular identifier (UMI) preprocessing.
+        - name: --umi_loc
+          type: string
+          description: |
+            Specify the location of UMI, can be (index1/index2/read1/read2/per_index/per_read, default is none.
+          choices: [index1, index2, read1, read2, per_index, per_read]
+        - name: --umi_len
+          type: integer
+          description: |
+            If the UMI is in read1/read2, its length should be provided.
+          example: 0
+          min: 0
+        - name: --umi_prefix
+          type: string
+          description: |
+            If specified, an underline will be used to connect prefix and UMI (i.e. prefix=UMI, UMI=AATTCG, final=UMI_AATTCG). No prefix by default.
+        - name: --umi_skip
+          type: integer
+          description: |
+            If the UMI is in read1/read2, fastp can skip several bases following UMI, default is 0.
+          example: 0
+          min: 0
+        - name: --umi_delim
+          type: string
+          description: |
+            If the UMI is in index1/index2, fastp can use a delimiter to separate UMI from the read sequence, default is none.
+    - name: Overrepresentation analysis arguments
+      arguments:
+        - name: --overrepresentation_analysis
+          alternatives: [-p]
+          type: boolean_true
+          description: |
+            Enable overrepresentation analysis.
+        - name: --overrepresentation_sampling
+          type: integer
+          description: |
+            One in (--overrepresentation_sampling) reads will be computed for overrepresentation analysis (1~10000), smaller is slower, default is 20.
+          example: 20
+          min: 1
+    # # would need to set all outputs to multiple: true
+    # - name: Split arguments
+    #   arguments:
+    #     - name: --split
+    #       alternatives: [-s]
+    #       type: boolean_true
+    #       description: |
+    #         Split output by limiting total split file number with this option (2~999), a sequential number prefix will be added to output name ( 0001.out.fq, 0002.out.fq...), disabled by default.
+    #     - name: --split_by_lines
+    #       alternatives: [-S]
+    #       type: long
+    #       description: |
+    #         Split output by limiting lines of each file with this option(>=1000), a sequential number prefix will be added to output name ( 0001.out.fq, 0002.out.fq...), disabled by default.
+    #     - name: --split_prefix_digits
+    #       type: integer
+    #       description: |
+    #         The digits for the sequential number padding (1~10), default is 4, so the filename will be padded as 0001.xxx, 0 to disable padding.
+    #       example: 4
+  resources:
+    - type: bash_script
+      path: script.sh
+  test_resources:
+    - type: bash_script
+      path: test.sh
+    - type: file
+      path: test_data
+platforms:
+  - type: docker
+    image: quay.io/biocontainers/fastp:0.23.4--hadf994f_2
+    setup:
+      - type: docker
+        run: |
+          fastp --version 2>&1 | sed 's# #: "#;s#$#"#' > /var/software_versions.txt
+  - type: nextflow
diff --git a/src/fastp/help.txt b/src/fastp/help.txt
new file mode 100644
index 00000000..d34917b2
--- /dev/null
+++ b/src/fastp/help.txt
@@ -0,0 +1,93 @@
+```bash
+fastp --help
+```
+
+usage: fastp [options] ... 
+options:
+  -i, --in1                            read1 input file name (string [=])
+  -o, --out1                           read1 output file name (string [=])
+  -I, --in2                            read2 input file name (string [=])
+  -O, --out2                           read2 output file name (string [=])
+      --unpaired1                      for PE input, if read1 passed QC but read2 not, it will be written to unpaired1. Default is to discard it. (string [=])
+      --unpaired2                      for PE input, if read2 passed QC but read1 not, it will be written to unpaired2. If --unpaired2 is same as --unpaired1 (default mode), both unpaired reads will be written to this same file. (string [=])
+      --overlapped_out                 for each read pair, output the overlapped region if it has no any mismatched base. (string [=])
+      --failed_out                     specify the file to store reads that cannot pass the filters. (string [=])
+  -m, --merge                          for paired-end input, merge each pair of reads into a single read if they are overlapped. The merged reads will be written to the file given by --merged_out, the unmerged reads will be written to the files specified by --out1 and --out2. The merging mode is disabled by default.
+      --merged_out                     in the merging mode, specify the file name to store merged output, or specify --stdout to stream the merged output (string [=])
+      --include_unmerged               in the merging mode, write the unmerged or unpaired reads to the file specified by --merge. Disabled by default.
+  -6, --phred64                        indicate the input is using phred64 scoring (it'll be converted to phred33, so the output will still be phred33)
+  -z, --compression                    compression level for gzip output (1 ~ 9). 1 is fastest, 9 is smallest, default is 4. (int [=4])
+      --stdin                          input from STDIN. If the STDIN is interleaved paired-end FASTQ, please also add --interleaved_in.
+      --stdout                         stream passing-filters reads to STDOUT. This option will result in interleaved FASTQ output for paired-end output. Disabled by default.
+      --interleaved_in                 indicate that <in1> is an interleaved FASTQ which contains both read1 and read2. Disabled by default.
+      --reads_to_process               specify how many reads/pairs to be processed. Default 0 means process all reads. (int [=0])
+      --dont_overwrite                 don't overwrite existing files. Overwritting is allowed by default.
+      --fix_mgi_id                     the MGI FASTQ ID format is not compatible with many BAM operation tools, enable this option to fix it.
+  -V, --verbose                        output verbose log information (i.e. when every 1M reads are processed).
+  -A, --disable_adapter_trimming       adapter trimming is enabled by default. If this option is specified, adapter trimming is disabled
+  -a, --adapter_sequence               the adapter for read1. For SE data, if not specified, the adapter will be auto-detected. For PE data, this is used if R1/R2 are found not overlapped. (string [=auto])
+      --adapter_sequence_r2            the adapter for read2 (PE data only). This is used if R1/R2 are found not overlapped. If not specified, it will be the same as <adapter_sequence> (string [=auto])
+      --adapter_fasta                  specify a FASTA file to trim both read1 and read2 (if PE) by all the sequences in this FASTA file (string [=])
+      --detect_adapter_for_pe          by default, the auto-detection for adapter is for SE data input only, turn on this option to enable it for PE data.
+  -f, --trim_front1                    trimming how many bases in front for read1, default is 0 (int [=0])
+  -t, --trim_tail1                     trimming how many bases in tail for read1, default is 0 (int [=0])
+  -b, --max_len1                       if read1 is longer than max_len1, then trim read1 at its tail to make it as long as max_len1. Default 0 means no limitation (int [=0])
+  -F, --trim_front2                    trimming how many bases in front for read2. If it's not specified, it will follow read1's settings (int [=0])
+  -T, --trim_tail2                     trimming how many bases in tail for read2. If it's not specified, it will follow read1's settings (int [=0])
+  -B, --max_len2                       if read2 is longer than max_len2, then trim read2 at its tail to make it as long as max_len2. Default 0 means no limitation. If it's not specified, it will follow read1's settings (int [=0])
+  -D, --dedup                          enable deduplication to drop the duplicated reads/pairs
+      --dup_calc_accuracy              accuracy level to calculate duplication (1~6), higher level uses more memory (1G, 2G, 4G, 8G, 16G, 24G). Default 1 for no-dedup mode, and 3 for dedup mode. (int [=0])
+      --dont_eval_duplication          don't evaluate duplication rate to save time and use less memory.
+  -g, --trim_poly_g                    force polyG tail trimming, by default trimming is automatically enabled for Illumina NextSeq/NovaSeq data
+      --poly_g_min_len                 the minimum length to detect polyG in the read tail. 10 by default. (int [=10])
+  -G, --disable_trim_poly_g            disable polyG tail trimming, by default trimming is automatically enabled for Illumina NextSeq/NovaSeq data
+  -x, --trim_poly_x                    enable polyX trimming in 3' ends.
+      --poly_x_min_len                 the minimum length to detect polyX in the read tail. 10 by default. (int [=10])
+  -5, --cut_front                      move a sliding window from front (5') to tail, drop the bases in the window if its mean quality < threshold, stop otherwise.
+  -3, --cut_tail                       move a sliding window from tail (3') to front, drop the bases in the window if its mean quality < threshold, stop otherwise.
+  -r, --cut_right                      move a sliding window from front to tail, if meet one window with mean quality < threshold, drop the bases in the window and the right part, and then stop.
+  -W, --cut_window_size                the window size option shared by cut_front, cut_tail or cut_sliding. Range: 1~1000, default: 4 (int [=4])
+  -M, --cut_mean_quality               the mean quality requirement option shared by cut_front, cut_tail or cut_sliding. Range: 1~36 default: 20 (Q20) (int [=20])
+      --cut_front_window_size          the window size option of cut_front, default to cut_window_size if not specified (int [=4])
+      --cut_front_mean_quality         the mean quality requirement option for cut_front, default to cut_mean_quality if not specified (int [=20])
+      --cut_tail_window_size           the window size option of cut_tail, default to cut_window_size if not specified (int [=4])
+      --cut_tail_mean_quality          the mean quality requirement option for cut_tail, default to cut_mean_quality if not specified (int [=20])
+      --cut_right_window_size          the window size option of cut_right, default to cut_window_size if not specified (int [=4])
+      --cut_right_mean_quality         the mean quality requirement option for cut_right, default to cut_mean_quality if not specified (int [=20])
+  -Q, --disable_quality_filtering      quality filtering is enabled by default. If this option is specified, quality filtering is disabled
+  -q, --qualified_quality_phred        the quality value that a base is qualified. Default 15 means phred quality >=Q15 is qualified. (int [=15])
+  -u, --unqualified_percent_limit      how many percents of bases are allowed to be unqualified (0~100). Default 40 means 40% (int [=40])
+  -n, --n_base_limit                   if one read's number of N base is >n_base_limit, then this read/pair is discarded. Default is 5 (int [=5])
+  -e, --average_qual                   if one read's average quality score <avg_qual, then this read/pair is discarded. Default 0 means no requirement (int [=0])
+  -L, --disable_length_filtering       length filtering is enabled by default. If this option is specified, length filtering is disabled
+  -l, --length_required                reads shorter than length_required will be discarded, default is 15. (int [=15])
+      --length_limit                   reads longer than length_limit will be discarded, default 0 means no limitation. (int [=0])
+  -y, --low_complexity_filter          enable low complexity filter. The complexity is defined as the percentage of base that is different from its next base (base[i] != base[i+1]).
+  -Y, --complexity_threshold           the threshold for low complexity filter (0~100). Default is 30, which means 30% complexity is required. (int [=30])
+      --filter_by_index1               specify a file contains a list of barcodes of index1 to be filtered out, one barcode per line (string [=])
+      --filter_by_index2               specify a file contains a list of barcodes of index2 to be filtered out, one barcode per line (string [=])
+      --filter_by_index_threshold      the allowed difference of index barcode for index filtering, default 0 means completely identical. (int [=0])
+  -c, --correction                     enable base correction in overlapped regions (only for PE data), default is disabled
+      --overlap_len_require            the minimum length to detect overlapped region of PE reads. This will affect overlap analysis based PE merge, adapter trimming and correction. 30 by default. (int [=30])
+      --overlap_diff_limit             the maximum number of mismatched bases to detect overlapped region of PE reads. This will affect overlap analysis based PE merge, adapter trimming and correction. 5 by default. (int [=5])
+      --overlap_diff_percent_limit     the maximum percentage of mismatched bases to detect overlapped region of PE reads. This will affect overlap analysis based PE merge, adapter trimming and correction. Default 20 means 20%. (int [=20])
+  -U, --umi                            enable unique molecular identifier (UMI) preprocessing
+      --umi_loc                        specify the location of UMI, can be (index1/index2/read1/read2/per_index/per_read, default is none (string [=])
+      --umi_len                        if the UMI is in read1/read2, its length should be provided (int [=0])
+      --umi_prefix                     if specified, an underline will be used to connect prefix and UMI (i.e. prefix=UMI, UMI=AATTCG, final=UMI_AATTCG). No prefix by default (string [=])
+      --umi_skip                       if the UMI is in read1/read2, fastp can skip several bases following UMI, default is 0 (int [=0])
+      --umi_delim                      delimiter to use between the read name and the UMI, default is : (string [=:])
+  -p, --overrepresentation_analysis    enable overrepresented sequence analysis.
+  -P, --overrepresentation_sampling    one in (--overrepresentation_sampling) reads will be computed for overrepresentation analysis (1~10000), smaller is slower, default is 20. (int [=20])
+  -j, --json                           the json format report file name (string [=fastp.json])
+  -h, --html                           the html format report file name (string [=fastp.html])
+  -R, --report_title                   should be quoted with ' or ", default is "fastp report" (string [=fastp report])
+  -w, --thread                         worker thread number, default is 3 (int [=3])
+  -s, --split                          split output by limiting total split file number with this option (2~999), a sequential number prefix will be added to output name ( 0001.out.fq, 0002.out.fq...), disabled by default (int [=0])
+  -S, --split_by_lines                 split output by limiting lines of each file with this option(>=1000), a sequential number prefix will be added to output name ( 0001.out.fq, 0002.out.fq...), disabled by default (long [=0])
+  -d, --split_prefix_digits            the digits for the sequential number padding (1~10), default is 4, so the filename will be padded as 0001.xxx, 0 to disable padding (int [=4])
+      --cut_by_quality5                DEPRECATED, use --cut_front instead.
+      --cut_by_quality3                DEPRECATED, use --cut_tail instead.
+      --cut_by_quality_aggressive      DEPRECATED, use --cut_right instead.
+      --discard_unmerged               DEPRECATED, no effect now, see the introduction for merging.
+  -?, --help                           print this message
diff --git a/src/fastp/script.sh b/src/fastp/script.sh
new file mode 100644
index 00000000..4bb37c87
--- /dev/null
+++ b/src/fastp/script.sh
@@ -0,0 +1,105 @@
+#!/bin/bash
+
+## VIASH START
+## VIASH END
+
+# disable flags
+[[ "$par_disable_adapter_trimming" == "false" ]] && unset par_disable_adapter_trimming
+[[ "$par_detect_adapter_for_pe" == "false" ]] && unset par_detect_adapter_for_pe
+[[ "$par_merge" == "false" ]] && unset par_merge
+[[ "$par_include_unmerged" == "false" ]] && unset par_include_unmerged
+[[ "$par_interleaved_in" == "false" ]] && unset par_interleaved_in
+[[ "$par_fix_mgi_id" == "false" ]] && unset par_fix_mgi_id
+[[ "$par_phred64" == "false" ]] && unset par_phred64
+[[ "$par_dont_overwrite" == "false" ]] && unset par_dont_overwrite
+[[ "$par_verbose" == "false" ]] && unset par_verbose
+[[ "$par_dedup" == "false" ]] && unset par_dedup
+[[ "$par_dont_eval_duplication" == "false" ]] && unset par_dont_eval_duplication
+[[ "$par_trim_poly_g" == "false" ]] && unset par_trim_poly_g
+[[ "$par_disable_trim_poly_g" == "false" ]] && unset par_disable_trim_poly_g
+[[ "$par_trim_poly_x" == "false" ]] && unset par_trim_poly_x
+[[ "$par_disable_quality_filtering" == "false" ]] && unset par_disable_quality_filtering
+[[ "$par_disable_length_filtering" == "false" ]] && unset par_disable_length_filtering
+[[ "$par_low_complexity_filter" == "false" ]] && unset par_low_complexity_filter
+[[ "$par_umi" == "false" ]] && unset par_umi
+[[ "$par_overrepresentation_analysis" == "false" ]] && unset par_overrepresentation_analysis
+
+# run command
+fastp \
+  -i "$par_in1" \
+  -o "$par_out1" \
+  ${par_in2:+--in2 "${par_in2}"} \
+  ${par_out2:+--out2 "${par_out2}"} \
+  ${par_unpaired1:+--unpaired1 "${par_unpaired1}"} \
+  ${par_unpaired2:+--unpaired2 "${par_unpaired2}"} \
+  ${par_failed_out:+--failed_out "${par_failed_out}"} \
+  ${par_overlapped_out:+--overlapped_out "${par_overlapped_out}"} \
+  ${par_json:+--json "${par_json}"} \
+  ${par_html:+--html "${par_html}"} \
+  ${par_report_title:+--report_title "${par_report_title}"} \
+  ${par_disable_adapter_trimming:+--disable_adapter_trimming} \
+  ${par_detect_adapter_for_pe:+--detect_adapter_for_pe} \
+  ${par_adapter_sequence:+--adapter_sequence "${par_adapter_sequence}"} \
+  ${par_adapter_sequence_r2:+--adapter_sequence_r2 "${par_adapter_sequence_r2}"} \
+  ${par_adapter_fasta:+--adapter_fasta "${par_adapter_fasta}"} \
+  ${par_trim_front1:+--trim_front1 "${par_trim_front1}"} \
+  ${par_trim_tail1:+--trim_tail1 "${par_trim_tail1}"} \
+  ${par_max_len1:+--max_len1 "${par_max_len1}"} \
+  ${par_trim_front2:+--trim_front2 "${par_trim_front2}"} \
+  ${par_trim_tail2:+--trim_tail2 "${par_trim_tail2}"} \
+  ${par_max_len2:+--max_len2 "${par_max_len2}"} \
+  ${par_merge:+--merge} \
+  ${par_merged_out:+--merged_out "${par_merged_out}"} \
+  ${par_include_unmerged:+--include_unmerged} \
+  ${par_interleaved_in:+--interleaved_in} \
+  ${par_fix_mgi_id:+--fix_mgi_id} \
+  ${par_phred64:+--phred64} \
+  ${par_compression:+--compression "${par_compression}"} \
+  ${par_dont_overwrite:+--dont_overwrite} \
+  ${par_verbose:+--verbose} \
+  ${par_reads_to_process:+--reads_to_process "${par_reads_to_process}"} \
+  ${par_dedup:+--dedup} \
+  ${par_dup_calc_accuracy:+--dup_calc_accuracy "${par_dup_calc_accuracy}"} \
+  ${par_dont_eval_duplication:+--dont_eval_duplication} \
+  ${par_trim_poly_g:+--trim_poly_g} \
+  ${par_poly_g_min_len:+--poly_g_min_len "${par_poly_g_min_len}"} \
+  ${par_disable_trim_poly_g:+--disable_trim_poly_g} \
+  ${par_trim_poly_x:+--trim_poly_x} \
+  ${par_poly_x_min_len:+--poly_x_min_len "${par_poly_x_min_len}"} \
+  ${par_cut_front:+--cut_front "${par_cut_front}"} \
+  ${par_cut_tail:+--cut_tail "${par_cut_tail}"} \
+  ${par_cut_right:+--cut_right "${par_cut_right}"} \
+  ${par_cut_window_size:+--cut_window_size "${par_cut_window_size}"} \
+  ${par_cut_mean_quality:+--cut_mean_quality "${par_cut_mean_quality}"} \
+  ${par_cut_front_window_size:+--cut_front_window_size "${par_cut_front_window_size}"} \
+  ${par_cut_front_mean_quality:+--cut_front_mean_quality "${par_cut_front_mean_quality}"} \
+  ${par_cut_tail_window_size:+--cut_tail_window_size "${par_cut_tail_window_size}"} \
+  ${par_cut_tail_mean_quality:+--cut_tail_mean_quality "${par_cut_tail_mean_quality}"} \
+  ${par_cut_right_window_size:+--cut_right_window_size "${par_cut_right_window_size}"} \
+  ${par_cut_right_mean_quality:+--cut_right_mean_quality "${par_cut_right_mean_quality}"} \
+  ${par_disable_quality_filtering:+--disable_quality_filtering} \
+  ${par_qualified_quality_phred:+--qualified_quality_phred "${par_qualified_quality_phred}"} \
+  ${par_unqualified_percent_limit:+--unqualified_percent_limit "${par_unqualified_percent_limit}"} \
+  ${par_n_base_limit:+--n_base_limit "${par_n_base_limit}"} \
+  ${par_average_qual:+--average_qual "${par_average_qual}"} \
+  ${par_disable_length_filtering:+--disable_length_filtering} \
+  ${par_length_required:+--length_required "${par_length_required}"} \
+  ${par_length_limit:+--length_limit "${par_length_limit}"} \
+  ${par_low_complexity_filter:+--low_complexity_filter} \
+  ${par_complexity_threshold:+--complexity_threshold "${par_complexity_threshold}"} \
+  ${par_filter_by_index1:+--filter_by_index1 "${par_filter_by_index1}"} \
+  ${par_filter_by_index2:+--filter_by_index2 "${par_filter_by_index2}"} \
+  ${par_filter_by_index_threshold:+--filter_by_index_threshold "${par_filter_by_index_threshold}"} \
+  ${par_correction:+--correction} \
+  ${par_overlap_len_require:+--overlap_len_require "${par_overlap_len_require}"} \
+  ${par_overlap_diff_limit:+--overlap_diff_limit "${par_overlap_diff_limit}"} \
+  ${par_overlap_diff_percent_limit:+--overlap_diff_percent_limit "${par_overlap_diff_percent_limit}"} \
+  ${par_umi:+--umi} \
+  ${par_umi_loc:+--umi_loc "${par_umi_loc}"} \
+  ${par_umi_len:+--umi_len "${par_umi_len}"} \
+  ${par_umi_prefix:+--umi_prefix "${par_umi_prefix}"} \
+  ${par_umi_skip:+--umi_skip "${par_umi_skip}"} \
+  ${par_umi_delim:+--umi_delim "${par_umi_delim}"} \
+  ${par_overrepresentation_analysis:+--overrepresentation_analysis} \
+  ${par_overrepresentation_sampling:+--overrepresentation_sampling "${par_overrepresentation_sampling}"} \
+  ${meta_cpus:+--thread "${meta_cpus}"}
diff --git a/src/fastp/test.sh b/src/fastp/test.sh
new file mode 100644
index 00000000..1b1f6f0c
--- /dev/null
+++ b/src/fastp/test.sh
@@ -0,0 +1,74 @@
+#!/bin/bash
+
+set -e
+
+## VIASH START
+meta_executable="target/docker/fastp/fastp"
+meta_resources_dir="src/fastp"
+## VIASH END
+
+#########################################################################################
+mkdir fastp_se
+cd fastp_se
+
+echo "> Run fastp on SE"
+"$meta_executable" \
+  --in1 "$meta_resources_dir/test_data/se/a.fastq" \
+  --out1 "trimmed.fastq" \
+  --failed_out "failed.fastq" \
+  --json "report.json" \
+  --html "report.html" \
+  --adapter_sequence ACGGCTAGCTA
+
+echo ">> Check if output exists"
+[ ! -f "trimmed.fastq" ] && echo ">> trimmed.fastq does not exist" && exit 1
+[ ! -f "failed.fastq" ] && echo ">> failed.fastq does not exist" && exit 1
+[ ! -f "report.json" ] && echo ">> report.json does not exist" && exit 1
+[ ! -f "report.html" ] && echo ">> report.html does not exist" && exit 1
+
+#########################################################################################
+cd ..
+mkdir fastp_pe_minimal
+cd fastp_pe_minimal
+
+echo ">> Run fastp on PE with minimal parameters"
+"$meta_executable" \
+  --in1 "$meta_resources_dir/test_data/pe/a.1.fastq" \
+  --in2 "$meta_resources_dir/test_data/pe/a.2.fastq" \
+  --out1 "trimmed_1.fastq" \
+  --out2 "trimmed_2.fastq"
+
+echo ">> Check if output exists"
+[ ! -f "trimmed_1.fastq" ] && echo ">> trimmed_1.fastq does not exist" && exit 1
+[ ! -f "trimmed_2.fastq" ] && echo ">> trimmed_2.fastq does not exist" && exit 1
+
+#########################################################################################
+cd ..
+mkdir fastp_pe_many
+cd fastp_pe_many
+
+echo ">> Run fastp on PE with many parameters"
+"$meta_executable" \
+  --in1 "$meta_resources_dir/test_data/pe/a.1.fastq" \
+  --in2 "$meta_resources_dir/test_data/pe/a.2.fastq" \
+  --out1 "trimmed_1.fastq" \
+  --out2 "trimmed_2.fastq" \
+  --failed_out "failed.fastq" \
+  --json "report.json" \
+  --html "report.html" \
+  --adapter_sequence ACGGCTAGCTA \
+  --adapter_sequence_r2 AGATCGGAAGAGCACACGTCTGAACTCCAGTCAC \
+  --merge \
+  --merged_out "merged.fastq"
+
+echo ">> Check if output exists"
+[ ! -f "trimmed_1.fastq" ] && echo ">> trimmed_1.fastq does not exist" && exit 1
+[ ! -f "trimmed_2.fastq" ] && echo ">> trimmed_2.fastq does not exist" && exit 1
+[ ! -f "failed.fastq" ] && echo ">> failed.fastq does not exist" && exit 1
+[ ! -f "report.json" ] && echo ">> report.json does not exist" && exit 1
+[ ! -f "report.html" ] && echo ">> report.html does not exist" && exit 1
+[ ! -f "merged.fastq" ] && echo ">> merged.fastq does not exist" && exit 1
+
+#########################################################################################
+
+echo "> Test successful"
\ No newline at end of file
diff --git a/src/fastp/test_data/pe/a.1.fastq b/src/fastp/test_data/pe/a.1.fastq
new file mode 100644
index 00000000..42735560
--- /dev/null
+++ b/src/fastp/test_data/pe/a.1.fastq
@@ -0,0 +1,4 @@
+@1
+ACGGCAT
++
+!!!!!!!
diff --git a/src/fastp/test_data/pe/a.2.fastq b/src/fastp/test_data/pe/a.2.fastq
new file mode 100644
index 00000000..42735560
--- /dev/null
+++ b/src/fastp/test_data/pe/a.2.fastq
@@ -0,0 +1,4 @@
+@1
+ACGGCAT
++
+!!!!!!!
diff --git a/src/fastp/test_data/script.sh b/src/fastp/test_data/script.sh
new file mode 100755
index 00000000..725eef6d
--- /dev/null
+++ b/src/fastp/test_data/script.sh
@@ -0,0 +1,10 @@
+# fastp test data
+
+# Test data was obtained from https://github.com/snakemake/snakemake-wrappers/tree/master/bio/fastp/test
+
+if [ ! -d /tmp/snakemake-wrappers ]; then
+  git clone --depth 1 --single-branch --branch master https://github.com/snakemake/snakemake-wrappers /tmp/snakemake-wrappers
+fi
+
+cp -r /tmp/snakemake-wrappers/bio/fastp/test/reads/* src/fastp/test_data
+
diff --git a/src/fastp/test_data/se/a.fastq b/src/fastp/test_data/se/a.fastq
new file mode 100644
index 00000000..42735560
--- /dev/null
+++ b/src/fastp/test_data/se/a.fastq
@@ -0,0 +1,4 @@
+@1
+ACGGCAT
++
+!!!!!!!