From 38f586bec0ac9e4312b016e29c3aa0bd53f292b2 Mon Sep 17 00:00:00 2001
From: emmarousseau <emmarou1@icloud.com>
Date: Thu, 11 Apr 2024 11:04:14 +0100
Subject: [PATCH 1/6] initial commit dedup

---
 CHANGELOG.md                                  |   3 +
 src/umi_tools/umi_tools_dedup/config.vsh.yaml | 279 ++++++++++++++++++
 src/umi_tools/umi_tools_dedup/help.txt        |  13 +
 src/umi_tools/umi_tools_dedup/script.sh       |  65 ++++
 src/umi_tools/umi_tools_dedup/test.sh         |  49 +++
 5 files changed, 409 insertions(+)
 create mode 100644 src/umi_tools/umi_tools_dedup/config.vsh.yaml
 create mode 100644 src/umi_tools/umi_tools_dedup/help.txt
 create mode 100644 src/umi_tools/umi_tools_dedup/script.sh
 create mode 100644 src/umi_tools/umi_tools_dedup/test.sh

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 4fd7f001..1bef9345 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -39,6 +39,9 @@
     - `samtools/flagstat`: Counts the number of alignments in SAM/BAM/CRAM files for each FLAG type (PR #31).
     - `samtools/idxstats`: Reports alignment summary statistics for a SAM/BAM/CRAM file (PR #32).
 
+* `umi_tools`:
+    - `umi_tools/umi_tools_dedup`: Deduplicate reads based on the mapping co-ordinate and the UMI attached to the read (PR #).
+    
 ## MAJOR CHANGES
 
 ## MINOR CHANGES
diff --git a/src/umi_tools/umi_tools_dedup/config.vsh.yaml b/src/umi_tools/umi_tools_dedup/config.vsh.yaml
new file mode 100644
index 00000000..75306541
--- /dev/null
+++ b/src/umi_tools/umi_tools_dedup/config.vsh.yaml
@@ -0,0 +1,279 @@
+name: umi_tool_dedup
+namespace: umi_tools
+description: |
+  Deduplicate reads based on the mapping co-ordinate and the UMI attached to the read.
+keywords: [umi_tools, deduplication, dedup]
+links:
+  homepage: https://umi-tools.readthedocs.io/en/latest/
+  documentation: [ https://umi-tools.readthedocs.io/en/latest/reference/dedup.html,
+                   https://umi-tools.readthedocs.io/en/latest/common_options.html#common-options ]
+  repository: https://github.com/CGATOxford/UMI-tools
+references: 
+  doi: 10.1101/gr.209601.116
+license: MIT
+
+argument_groups:
+  - name: Inputs
+    arguments:
+      - name: --input
+        alternatives: -I
+        type: file
+        description: Input BAM or SAM file. Use --in_sam to specify SAM format.
+        required: true
+      - name: --in_sam
+        type: boolean_true
+        description: |
+          By default, inputs are assumed to be in BAM format. Use this options 
+          to specify the use of SAM format for input.
+      - name: --bai
+        type: file
+        description: BAM index
+      - name: --get_output_stats
+        type: boolean
+        description: Whether or not to generate output stats. 
+      - name: --random_seed
+        type: integer
+        description: |
+          Random seed to initialize number generator with.
+        default: none
+
+  - name: Outputs
+    arguments:
+      - name: --output
+        alternatives: -S
+        type: file
+        description: Deduplicated BAM file
+        required: true
+        direction: output
+      - name: --out_sam
+        type: boolean_true
+        description: |
+          By default, outputa are written in BAM format. Use this options to 
+          specify the use of SAM format for output.
+      - name: --paired
+        type: boolean_true
+        description: |
+          BAM is paired end - output both read pairs. This will also force the
+          use of the template length to determine reads with the same mapping
+          coordinates.
+      - name: --output_stats
+        type: file
+        description: Directory containing UMI based deduplication statistics files
+        direction: output
+      - name: --extract_umi_method
+        type: string
+        description: |
+          Specify the method by which the barcodes were encoded in the read.
+          The options are: [read_id, tag, umis].
+        default: read_id
+      - name: --umi_tag
+        type: string
+        description: |
+          The tag containing the UMI sequence. 
+          This is only required if the extract_umi_method is set to tag.
+      - name: --umi_separator
+        type: string
+        description: |
+          The separator used to separate the UMI from the read sequence. 
+          This is only required if the extract_umi_method is set to id_read.
+        default: '_'
+      - name: --umi_tag_split
+        type: string
+        description: |
+          Separate the UMI in tag by <SPLIT> and take the first element.
+      - name: --umi_tag_delimiter
+        type: string
+        description: |
+          Separate the UMI in by <DELIMITER> and concatenate the elements
+      - name: --cell_tag
+        type: string
+        description: |
+          The tag containing the cell barcode sequence. 
+          This is only required if the extract_umi_method is set to tag.
+      - name: --cell_tag_split
+        type: string
+        description: |
+          Separate the cell barcode in tag by <SPLIT> and take the first element.
+      - name: --cell_tag_delimiter
+        type: string
+        description: |
+          Separate the cell barcode in by <DELIMITER> and concatenate the elements
+  
+  - name: Grouping Options
+    arguments:    
+      - name: --method
+        type: string
+        description: |
+          The method to use for grouping reads. The options are: 
+          [unique, percentile, cluster, adjacency, directional].
+        default: directional
+      - name: --edit_distance_threshold
+        type: integer
+        description: |
+          For the adjacency and cluster methods the threshold for the edit 
+          distance to connect two UMIs in the network can be increased. The 
+          default value of 1 works best unless the UMI is very long (>14bp).
+        default: 1
+      - name: --spliced_is_unique
+        type: boolean_true
+        description: |
+          Causes two reads that start in the same position on the same strand
+          and having the same UMI to be considered unique if one is spliced 
+          and the other is not. (Uses the ‘N’ cigar operation to test for splicing).
+      - name: --soft_clip_threshold
+        type: integer
+        description: |
+          Mappers that soft clip will sometimes do so rather than mapping a
+          spliced read if there is only a small overhang over the exon junction.
+          By setting this option, you can treat reads with at least this many
+          bases soft-clipped at the 3’ end as spliced.
+        default: 4
+      - name: --multimapping_detection_method
+        type: string
+        description: |
+          If the sam/bam contains tags to identify multimapping reads, you can
+          specify for use when selecting the best read at a given loci. Supported
+          tags are “NH”, “X0” and “XT”. If not specified, the read with the highest
+          mapping quality will be selected.
+      - name: --read_length
+        type: integer
+        description: |
+          Use the read length as a criteria when deduping, for e.g sRNA-Seq.
+  
+  - name: Single-cell RNA-Seq Options
+    arguments:
+      - name: --per_gene
+        type: boolean_true
+        description: |
+          Reads will be grouped together if they have the same gene. This is useful
+          if your library prep generates PCR duplicates with non identical alignment
+          positions such as CEL-Seq. Note this option is hardcoded to be on with the
+          count command. I.e counting is always performed per-gene. Must be combined
+          with either --gene_tag or --per_contig option.
+      - name: --gene_tag
+        type: string
+        description: |
+          Deduplicate per gene. The gene information is encoded in the bam read tag
+          specified.
+      - name: --assigned_status_tag
+        type: string
+        description: |
+          BAM tag which describes whether a read is assigned to a gene. Defaults to
+          the same value as given for --gene_tag.
+      - name: --skip_tags_regex
+        type: string
+        description: |
+          Use in conjunction with the --assigned_status_tag option to skip any reads
+          where the tag matches this regex. Default ("^[__|Unassigned]") matches
+          anything which starts with “__” or “Unassigned”.
+      - name: --per_contig
+        type: boolean_true
+        description: |
+          Deduplicate per contig (field 3 in BAM; RNAME). All reads with the same
+          contig will be considered to have the same alignment position. This is
+          useful if you have aligned to a reference transcriptome with one
+          transcript per gene. If you have aligned to a transcriptome with more
+          than one transcript per gene, you can supply a map between transcripts
+          and gene using the --gene_transcript_map option.
+      - name: --gene_transcript_map
+        type: file
+        description: |
+          A file containing a mapping between gene names and transcript names.
+          The file should be tab separated with the gene name in the first column
+          and the transcript name in the second column.
+      - name: --per_cell
+        type: boolean_true
+        description: |
+          Reads will only be grouped together if they have the same cell barcode.
+          Can be combined with --per_gene.
+  
+  - name: SAM/BAM Options
+    arguments:
+      - name: --mapping_quality
+        type: integer
+        description: |
+          Minimium mapping quality (MAPQ) for a read to be retained.
+        default: 0
+      - name: --unmapped_reads
+        type: string
+        description: |
+          How unmapped reads should be handled. 
+          The options are:
+          "discard": Discard all unmapped reads.
+          "use":     If read2 is unmapped, deduplicate using read1 only. 
+                     Requires --paired.
+          "output":  Output unmapped reads/read pairs without UMI 
+                     grouping/deduplication. Only available in umi_tools group.
+        default: discard
+      - name: --chimeric_pairs
+        type: string
+        description: |
+          How chimeric pairs should be handled. 
+          The options are:
+          "discard": Discard all chimeric read pairs.
+          "use":     Deduplicate using read1 only.
+          "output":  Output chimeric pairs without UMI grouping/deduplication. 
+                     Only available in umi_tools group.
+        default: use
+      - name: --unapired_reads
+        type: string
+        description: |
+          How unpaired reads should be handled. 
+          The options are:
+          "discard": Discard all unpaired reads.
+          "use":     Deduplicate using read1 only.
+          "output":  Output unpaired reads without UMI grouping/deduplication. 
+                     Only available in umi_tools group.
+        default: use
+      - name: --ignore_umi
+        type: boolean_true
+        description: |
+          Ignore the UMI and group reads using mapping coordinates only.
+      - name: --subset
+        type: boolean_true
+        description: |
+          Only consider a fraction of the reads, chosen at random. This is useful
+          for doing saturation analyses.
+      - name: --chrom
+        type: string
+        description: |
+          Only consider a single chromosome. This is useful for debugging/testing 
+          purposes.
+  
+  - name: Group/Dedup Options
+    arguments:
+      - name: --no_sort_output
+        type: boolean_true
+        description: |
+          By default, output is sorted. This involves the use of a temporary unsorted
+          file (saved in --temp-dir). Use this option to turn off sorting.
+      - name: --buffer_whole_contig
+        type: boolean_true
+        description: |
+          Forces dedup to parse an entire contig before yielding any reads for
+          deduplication. This is the only way to absolutely guarantee that all reads
+          with the same start position are grouped together for deduplication since
+          dedup uses the start position of the read, not the alignment coordinate on
+          which the reads are sorted. However, by default, dedup reads for another
+          1000bp before outputting read groups which will avoid any reads being missed
+          with short read sequencing (<1000bp).
+
+
+resources:
+  - type: bash_script
+    path: script.sh
+test_resources:
+  - type: bash_script
+    path: test.sh
+  - type: file
+    path: test_data
+engines:
+  - type: docker
+    image: quay.io/biocontainers/umi_tools:1.1.5--py39hf95cd2a_1
+    setup:
+      - type: docker
+        run: |
+            umi_tools -v | sed 's/ version//g' > /var/software_versions.txt
+runners:
+- type: executable
+- type: nextflow
\ No newline at end of file
diff --git a/src/umi_tools/umi_tools_dedup/help.txt b/src/umi_tools/umi_tools_dedup/help.txt
new file mode 100644
index 00000000..d3c8fa44
--- /dev/null
+++ b/src/umi_tools/umi_tools_dedup/help.txt
@@ -0,0 +1,13 @@
+```
+umi_tools dedup
+```
+
+dedup - Deduplicate reads using UMI and mapping coordinates
+
+Usage: umi_tools dedup [OPTIONS] [--stdin=IN_BAM] [--stdout=OUT_BAM]
+
+       note: If --stdout is ommited, standard out is output. To
+             generate a valid BAM file on standard out, please
+             redirect log with --log=LOGFILE or --log2stderr 
+
+For full UMI-tools documentation, see https://umi-tools.readthedocs.io/en/latest/
\ No newline at end of file
diff --git a/src/umi_tools/umi_tools_dedup/script.sh b/src/umi_tools/umi_tools_dedup/script.sh
new file mode 100644
index 00000000..57c01258
--- /dev/null
+++ b/src/umi_tools/umi_tools_dedup/script.sh
@@ -0,0 +1,65 @@
+#!/bin/bash
+
+## VIASH START
+## VIASH END
+
+set -e
+
+test_dir="${metal_executable}/test_data"
+
+[[ "$par_paired" == "false" ]] && unset par_paired
+[[ "$par_in_sam" == "false" ]] && unset par_in_sam
+[[ "$par_out_sam" == "false" ]] && unset par_out_sam
+[[ "$par_spliced_is_unique" == "false" ]] && unset par_spliced_is_unique
+[[ "$par_per_gene" == "false" ]] && unset par_per_gene
+[[ "$par_per_contig" == "false" ]] && unset par_per_contig
+[[ "$par_per_cell" == "false" ]] && unset par_per_cell
+[[ "$par_no_sort_output" == "false" ]] && unset par_no_sort_output
+[[ "$par_buffer_whole_contig" == "false" ]] && unset par_buffer_whole_contig
+[[ "$par_ignore_umi" == "false" ]] && unset par_ignore_umi
+[[ "$par_subset" == "false" ]] && unset par_subset
+
+
+$(which umi_tools) dedup \
+    -I "$par_input" \
+    ${par_in_sam:+--in-sam} \
+    ${par_bai:+--bai "$par_bai"} \
+    ${par_get_output_stats:+--get-output-stats} \
+    ${par_random_seed:+--random-seed "$par_random_seed"} \
+    -S "$par_output" \
+    ${par_out_sam:+--out-sam} \
+    ${par_paired:+--paired} \
+    ${par_output_stats:+--output-stats "$par_output_stats"} \
+    ${par_extract_umi_method:+--extract-umi-method "$par_extract_umi_method"} \
+    ${par_umi_tag:+--umi-tag "$par_umi_tag"} \
+    ${par_umi_separator:+--umi-separator "$par_umi_separator"} \
+    ${par_umi_tag_split:+--umi-tag-split "$par_umi_tag_split"} \
+    ${par_umi_tag_delimiter:+--umi-tag-delimiter "$par_umi_tag_delimiter"} \
+    ${par_cell_tag:+--cell-tag "$par_cell_tag"} \
+    ${par_cell_tag_split:+--cell-tag-split "$par_cell_tag_split"} \
+    ${par_cell_tag_delimiter:+--cell-tag-delimiter "$par_cell_tag_delimiter"} \
+    ${par_method:+--method "$par_method"} \
+    ${par_edit_distance_threshold:+--edit-distance-threshold "$par_edit_distance_threshold"} \
+    ${par_spliced_is_unique:+--spliced-is-unique} \
+    ${par_soft_clip_threshold:+--soft-clip-threshold "$par_soft_clip_threshold"} \
+    ${par_multimapping_detection_method:+--multimapping-detection-method "$par_multimapping_detection_method"} \
+    ${par_read_length:+--read-length "$par_read_length"} \
+    ${par_per_gene:+--per-gene} \
+    ${par_gene_tag:+--gene-tag "$par_gene_tag"} \
+    ${par_assigned_status_tag:+--assigned-status-tag "$par_assigned_status_tag"} \
+    ${par_skip_tags_regex:+--skip-tags-regex "$par_skip_tags_regex"} \
+    ${par_per_contig:+--per-contig}
+    ${par_gene_transcript_map:+--gene-transcript-map "$par_gene_transcript_map"} \
+    ${par_per_cell:+--per-cell} \
+    ${par_mapping_quality:+--mapping-quality "$par_mapping_quality"} \
+    ${par_unmapped_reads:+--unmapped-reads "$par_unmapped_reads"} \
+    ${par_chimeric_pairs:+--chimeric-pairs "$par_chimeric_pairs"} \
+    ${par_unapired_reads:+--unapired-reads "$par_unapired_reads"} \
+    ${par_ignore_umi:+--ignore-umi} \
+    ${par_subset:+--subset} \
+    ${par_chrom:+--chrom "$par_chrom"} \
+    ${par_no_sort_output:+--no-sort-output} \
+    ${par_buffer_whole_contig:+--buffer-whole-contig}
+
+
+exit 0
\ No newline at end of file
diff --git a/src/umi_tools/umi_tools_dedup/test.sh b/src/umi_tools/umi_tools_dedup/test.sh
new file mode 100644
index 00000000..1459ec08
--- /dev/null
+++ b/src/umi_tools/umi_tools_dedup/test.sh
@@ -0,0 +1,49 @@
+#!/bin/bash
+
+test_dir="${meta_resources_dir}/test_data"
+echo ">>> Testing $meta_functionality_name"
+
+"$meta_executable" \
+  --bam "$test_dir/a.sorted.bam" \
+  --bai "$test_dir/a.sorted.bam.bai" \
+  --output "$test_dir/a.sorted.idxstats"
+
+echo ">>> Checking whether output exists"
+[ ! -f "$test_dir/a.sorted.idxstats" ] && echo "File 'a.sorted.idxstats' does not exist!" && exit 1
+
+echo ">>> Checking whether output is non-empty"
+[ ! -s "$test_dir/a.sorted.idxstats" ] && echo "File 'a.sorted.idxstats' is empty!" && exit 1
+
+echo ">>> Checking whether output is correct"
+diff "$test_dir/a.sorted.idxstats" "$test_dir/a_ref.sorted.idxstats" || \
+    (echo "Output file a.sorted.idxstats does not match expected output" && exit 1)
+
+rm "$test_dir/a.sorted.idxstats"
+
+############################################################################################
+
+echo ">>> Testing $meta_functionality_name with singletons in the input"
+
+"$meta_executable" \
+  --bam "$test_dir/test.paired_end.sorted.bam" \
+  --bai "$test_dir/test.paired_end.sorted.bam.bai" \
+  --output "$test_dir/test.paired_end.sorted.idxstats"
+
+echo ">>> Checking whether output exists"
+[ ! -f "$test_dir/test.paired_end.sorted.idxstats" ] && \
+    echo "File 'test.paired_end.sorted.idxstats' does not exist!" && exit 1
+
+echo ">>> Checking whether output is non-empty"
+[ ! -s "$test_dir/test.paired_end.sorted.idxstats" ] && \
+    echo "File 'test.paired_end.sorted.idxstats' is empty!" && exit 1
+
+echo ">>> Checking whether output is correct"
+diff "$test_dir/test.paired_end.sorted.idxstats" "$test_dir/test_ref.paired_end.sorted.idxstats" || \
+    (echo "Output file test.paired_end.sorted.idxstats does not match expected output" && exit 1)
+
+rm "$test_dir/test.paired_end.sorted.idxstats"
+
+############################################################################################
+
+echo "All tests succeeded!"
+exit 0
\ No newline at end of file

From 2c269682620a407803e528652198646435ef2c03 Mon Sep 17 00:00:00 2001
From: emmarousseau <emmarou1@icloud.com>
Date: Thu, 11 Apr 2024 11:38:57 +0100
Subject: [PATCH 2/6] Revert "initial commit dedup"

This reverts commit 38f586bec0ac9e4312b016e29c3aa0bd53f292b2.
---
 CHANGELOG.md                                  |   3 -
 src/umi_tools/umi_tools_dedup/config.vsh.yaml | 279 ------------------
 src/umi_tools/umi_tools_dedup/help.txt        |  13 -
 src/umi_tools/umi_tools_dedup/script.sh       |  65 ----
 src/umi_tools/umi_tools_dedup/test.sh         |  49 ---
 5 files changed, 409 deletions(-)
 delete mode 100644 src/umi_tools/umi_tools_dedup/config.vsh.yaml
 delete mode 100644 src/umi_tools/umi_tools_dedup/help.txt
 delete mode 100644 src/umi_tools/umi_tools_dedup/script.sh
 delete mode 100644 src/umi_tools/umi_tools_dedup/test.sh

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 1bef9345..4fd7f001 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -39,9 +39,6 @@
     - `samtools/flagstat`: Counts the number of alignments in SAM/BAM/CRAM files for each FLAG type (PR #31).
     - `samtools/idxstats`: Reports alignment summary statistics for a SAM/BAM/CRAM file (PR #32).
 
-* `umi_tools`:
-    - `umi_tools/umi_tools_dedup`: Deduplicate reads based on the mapping co-ordinate and the UMI attached to the read (PR #).
-    
 ## MAJOR CHANGES
 
 ## MINOR CHANGES
diff --git a/src/umi_tools/umi_tools_dedup/config.vsh.yaml b/src/umi_tools/umi_tools_dedup/config.vsh.yaml
deleted file mode 100644
index 75306541..00000000
--- a/src/umi_tools/umi_tools_dedup/config.vsh.yaml
+++ /dev/null
@@ -1,279 +0,0 @@
-name: umi_tool_dedup
-namespace: umi_tools
-description: |
-  Deduplicate reads based on the mapping co-ordinate and the UMI attached to the read.
-keywords: [umi_tools, deduplication, dedup]
-links:
-  homepage: https://umi-tools.readthedocs.io/en/latest/
-  documentation: [ https://umi-tools.readthedocs.io/en/latest/reference/dedup.html,
-                   https://umi-tools.readthedocs.io/en/latest/common_options.html#common-options ]
-  repository: https://github.com/CGATOxford/UMI-tools
-references: 
-  doi: 10.1101/gr.209601.116
-license: MIT
-
-argument_groups:
-  - name: Inputs
-    arguments:
-      - name: --input
-        alternatives: -I
-        type: file
-        description: Input BAM or SAM file. Use --in_sam to specify SAM format.
-        required: true
-      - name: --in_sam
-        type: boolean_true
-        description: |
-          By default, inputs are assumed to be in BAM format. Use this options 
-          to specify the use of SAM format for input.
-      - name: --bai
-        type: file
-        description: BAM index
-      - name: --get_output_stats
-        type: boolean
-        description: Whether or not to generate output stats. 
-      - name: --random_seed
-        type: integer
-        description: |
-          Random seed to initialize number generator with.
-        default: none
-
-  - name: Outputs
-    arguments:
-      - name: --output
-        alternatives: -S
-        type: file
-        description: Deduplicated BAM file
-        required: true
-        direction: output
-      - name: --out_sam
-        type: boolean_true
-        description: |
-          By default, outputa are written in BAM format. Use this options to 
-          specify the use of SAM format for output.
-      - name: --paired
-        type: boolean_true
-        description: |
-          BAM is paired end - output both read pairs. This will also force the
-          use of the template length to determine reads with the same mapping
-          coordinates.
-      - name: --output_stats
-        type: file
-        description: Directory containing UMI based deduplication statistics files
-        direction: output
-      - name: --extract_umi_method
-        type: string
-        description: |
-          Specify the method by which the barcodes were encoded in the read.
-          The options are: [read_id, tag, umis].
-        default: read_id
-      - name: --umi_tag
-        type: string
-        description: |
-          The tag containing the UMI sequence. 
-          This is only required if the extract_umi_method is set to tag.
-      - name: --umi_separator
-        type: string
-        description: |
-          The separator used to separate the UMI from the read sequence. 
-          This is only required if the extract_umi_method is set to id_read.
-        default: '_'
-      - name: --umi_tag_split
-        type: string
-        description: |
-          Separate the UMI in tag by <SPLIT> and take the first element.
-      - name: --umi_tag_delimiter
-        type: string
-        description: |
-          Separate the UMI in by <DELIMITER> and concatenate the elements
-      - name: --cell_tag
-        type: string
-        description: |
-          The tag containing the cell barcode sequence. 
-          This is only required if the extract_umi_method is set to tag.
-      - name: --cell_tag_split
-        type: string
-        description: |
-          Separate the cell barcode in tag by <SPLIT> and take the first element.
-      - name: --cell_tag_delimiter
-        type: string
-        description: |
-          Separate the cell barcode in by <DELIMITER> and concatenate the elements
-  
-  - name: Grouping Options
-    arguments:    
-      - name: --method
-        type: string
-        description: |
-          The method to use for grouping reads. The options are: 
-          [unique, percentile, cluster, adjacency, directional].
-        default: directional
-      - name: --edit_distance_threshold
-        type: integer
-        description: |
-          For the adjacency and cluster methods the threshold for the edit 
-          distance to connect two UMIs in the network can be increased. The 
-          default value of 1 works best unless the UMI is very long (>14bp).
-        default: 1
-      - name: --spliced_is_unique
-        type: boolean_true
-        description: |
-          Causes two reads that start in the same position on the same strand
-          and having the same UMI to be considered unique if one is spliced 
-          and the other is not. (Uses the ‘N’ cigar operation to test for splicing).
-      - name: --soft_clip_threshold
-        type: integer
-        description: |
-          Mappers that soft clip will sometimes do so rather than mapping a
-          spliced read if there is only a small overhang over the exon junction.
-          By setting this option, you can treat reads with at least this many
-          bases soft-clipped at the 3’ end as spliced.
-        default: 4
-      - name: --multimapping_detection_method
-        type: string
-        description: |
-          If the sam/bam contains tags to identify multimapping reads, you can
-          specify for use when selecting the best read at a given loci. Supported
-          tags are “NH”, “X0” and “XT”. If not specified, the read with the highest
-          mapping quality will be selected.
-      - name: --read_length
-        type: integer
-        description: |
-          Use the read length as a criteria when deduping, for e.g sRNA-Seq.
-  
-  - name: Single-cell RNA-Seq Options
-    arguments:
-      - name: --per_gene
-        type: boolean_true
-        description: |
-          Reads will be grouped together if they have the same gene. This is useful
-          if your library prep generates PCR duplicates with non identical alignment
-          positions such as CEL-Seq. Note this option is hardcoded to be on with the
-          count command. I.e counting is always performed per-gene. Must be combined
-          with either --gene_tag or --per_contig option.
-      - name: --gene_tag
-        type: string
-        description: |
-          Deduplicate per gene. The gene information is encoded in the bam read tag
-          specified.
-      - name: --assigned_status_tag
-        type: string
-        description: |
-          BAM tag which describes whether a read is assigned to a gene. Defaults to
-          the same value as given for --gene_tag.
-      - name: --skip_tags_regex
-        type: string
-        description: |
-          Use in conjunction with the --assigned_status_tag option to skip any reads
-          where the tag matches this regex. Default ("^[__|Unassigned]") matches
-          anything which starts with “__” or “Unassigned”.
-      - name: --per_contig
-        type: boolean_true
-        description: |
-          Deduplicate per contig (field 3 in BAM; RNAME). All reads with the same
-          contig will be considered to have the same alignment position. This is
-          useful if you have aligned to a reference transcriptome with one
-          transcript per gene. If you have aligned to a transcriptome with more
-          than one transcript per gene, you can supply a map between transcripts
-          and gene using the --gene_transcript_map option.
-      - name: --gene_transcript_map
-        type: file
-        description: |
-          A file containing a mapping between gene names and transcript names.
-          The file should be tab separated with the gene name in the first column
-          and the transcript name in the second column.
-      - name: --per_cell
-        type: boolean_true
-        description: |
-          Reads will only be grouped together if they have the same cell barcode.
-          Can be combined with --per_gene.
-  
-  - name: SAM/BAM Options
-    arguments:
-      - name: --mapping_quality
-        type: integer
-        description: |
-          Minimium mapping quality (MAPQ) for a read to be retained.
-        default: 0
-      - name: --unmapped_reads
-        type: string
-        description: |
-          How unmapped reads should be handled. 
-          The options are:
-          "discard": Discard all unmapped reads.
-          "use":     If read2 is unmapped, deduplicate using read1 only. 
-                     Requires --paired.
-          "output":  Output unmapped reads/read pairs without UMI 
-                     grouping/deduplication. Only available in umi_tools group.
-        default: discard
-      - name: --chimeric_pairs
-        type: string
-        description: |
-          How chimeric pairs should be handled. 
-          The options are:
-          "discard": Discard all chimeric read pairs.
-          "use":     Deduplicate using read1 only.
-          "output":  Output chimeric pairs without UMI grouping/deduplication. 
-                     Only available in umi_tools group.
-        default: use
-      - name: --unapired_reads
-        type: string
-        description: |
-          How unpaired reads should be handled. 
-          The options are:
-          "discard": Discard all unpaired reads.
-          "use":     Deduplicate using read1 only.
-          "output":  Output unpaired reads without UMI grouping/deduplication. 
-                     Only available in umi_tools group.
-        default: use
-      - name: --ignore_umi
-        type: boolean_true
-        description: |
-          Ignore the UMI and group reads using mapping coordinates only.
-      - name: --subset
-        type: boolean_true
-        description: |
-          Only consider a fraction of the reads, chosen at random. This is useful
-          for doing saturation analyses.
-      - name: --chrom
-        type: string
-        description: |
-          Only consider a single chromosome. This is useful for debugging/testing 
-          purposes.
-  
-  - name: Group/Dedup Options
-    arguments:
-      - name: --no_sort_output
-        type: boolean_true
-        description: |
-          By default, output is sorted. This involves the use of a temporary unsorted
-          file (saved in --temp-dir). Use this option to turn off sorting.
-      - name: --buffer_whole_contig
-        type: boolean_true
-        description: |
-          Forces dedup to parse an entire contig before yielding any reads for
-          deduplication. This is the only way to absolutely guarantee that all reads
-          with the same start position are grouped together for deduplication since
-          dedup uses the start position of the read, not the alignment coordinate on
-          which the reads are sorted. However, by default, dedup reads for another
-          1000bp before outputting read groups which will avoid any reads being missed
-          with short read sequencing (<1000bp).
-
-
-resources:
-  - type: bash_script
-    path: script.sh
-test_resources:
-  - type: bash_script
-    path: test.sh
-  - type: file
-    path: test_data
-engines:
-  - type: docker
-    image: quay.io/biocontainers/umi_tools:1.1.5--py39hf95cd2a_1
-    setup:
-      - type: docker
-        run: |
-            umi_tools -v | sed 's/ version//g' > /var/software_versions.txt
-runners:
-- type: executable
-- type: nextflow
\ No newline at end of file
diff --git a/src/umi_tools/umi_tools_dedup/help.txt b/src/umi_tools/umi_tools_dedup/help.txt
deleted file mode 100644
index d3c8fa44..00000000
--- a/src/umi_tools/umi_tools_dedup/help.txt
+++ /dev/null
@@ -1,13 +0,0 @@
-```
-umi_tools dedup
-```
-
-dedup - Deduplicate reads using UMI and mapping coordinates
-
-Usage: umi_tools dedup [OPTIONS] [--stdin=IN_BAM] [--stdout=OUT_BAM]
-
-       note: If --stdout is ommited, standard out is output. To
-             generate a valid BAM file on standard out, please
-             redirect log with --log=LOGFILE or --log2stderr 
-
-For full UMI-tools documentation, see https://umi-tools.readthedocs.io/en/latest/
\ No newline at end of file
diff --git a/src/umi_tools/umi_tools_dedup/script.sh b/src/umi_tools/umi_tools_dedup/script.sh
deleted file mode 100644
index 57c01258..00000000
--- a/src/umi_tools/umi_tools_dedup/script.sh
+++ /dev/null
@@ -1,65 +0,0 @@
-#!/bin/bash
-
-## VIASH START
-## VIASH END
-
-set -e
-
-test_dir="${metal_executable}/test_data"
-
-[[ "$par_paired" == "false" ]] && unset par_paired
-[[ "$par_in_sam" == "false" ]] && unset par_in_sam
-[[ "$par_out_sam" == "false" ]] && unset par_out_sam
-[[ "$par_spliced_is_unique" == "false" ]] && unset par_spliced_is_unique
-[[ "$par_per_gene" == "false" ]] && unset par_per_gene
-[[ "$par_per_contig" == "false" ]] && unset par_per_contig
-[[ "$par_per_cell" == "false" ]] && unset par_per_cell
-[[ "$par_no_sort_output" == "false" ]] && unset par_no_sort_output
-[[ "$par_buffer_whole_contig" == "false" ]] && unset par_buffer_whole_contig
-[[ "$par_ignore_umi" == "false" ]] && unset par_ignore_umi
-[[ "$par_subset" == "false" ]] && unset par_subset
-
-
-$(which umi_tools) dedup \
-    -I "$par_input" \
-    ${par_in_sam:+--in-sam} \
-    ${par_bai:+--bai "$par_bai"} \
-    ${par_get_output_stats:+--get-output-stats} \
-    ${par_random_seed:+--random-seed "$par_random_seed"} \
-    -S "$par_output" \
-    ${par_out_sam:+--out-sam} \
-    ${par_paired:+--paired} \
-    ${par_output_stats:+--output-stats "$par_output_stats"} \
-    ${par_extract_umi_method:+--extract-umi-method "$par_extract_umi_method"} \
-    ${par_umi_tag:+--umi-tag "$par_umi_tag"} \
-    ${par_umi_separator:+--umi-separator "$par_umi_separator"} \
-    ${par_umi_tag_split:+--umi-tag-split "$par_umi_tag_split"} \
-    ${par_umi_tag_delimiter:+--umi-tag-delimiter "$par_umi_tag_delimiter"} \
-    ${par_cell_tag:+--cell-tag "$par_cell_tag"} \
-    ${par_cell_tag_split:+--cell-tag-split "$par_cell_tag_split"} \
-    ${par_cell_tag_delimiter:+--cell-tag-delimiter "$par_cell_tag_delimiter"} \
-    ${par_method:+--method "$par_method"} \
-    ${par_edit_distance_threshold:+--edit-distance-threshold "$par_edit_distance_threshold"} \
-    ${par_spliced_is_unique:+--spliced-is-unique} \
-    ${par_soft_clip_threshold:+--soft-clip-threshold "$par_soft_clip_threshold"} \
-    ${par_multimapping_detection_method:+--multimapping-detection-method "$par_multimapping_detection_method"} \
-    ${par_read_length:+--read-length "$par_read_length"} \
-    ${par_per_gene:+--per-gene} \
-    ${par_gene_tag:+--gene-tag "$par_gene_tag"} \
-    ${par_assigned_status_tag:+--assigned-status-tag "$par_assigned_status_tag"} \
-    ${par_skip_tags_regex:+--skip-tags-regex "$par_skip_tags_regex"} \
-    ${par_per_contig:+--per-contig}
-    ${par_gene_transcript_map:+--gene-transcript-map "$par_gene_transcript_map"} \
-    ${par_per_cell:+--per-cell} \
-    ${par_mapping_quality:+--mapping-quality "$par_mapping_quality"} \
-    ${par_unmapped_reads:+--unmapped-reads "$par_unmapped_reads"} \
-    ${par_chimeric_pairs:+--chimeric-pairs "$par_chimeric_pairs"} \
-    ${par_unapired_reads:+--unapired-reads "$par_unapired_reads"} \
-    ${par_ignore_umi:+--ignore-umi} \
-    ${par_subset:+--subset} \
-    ${par_chrom:+--chrom "$par_chrom"} \
-    ${par_no_sort_output:+--no-sort-output} \
-    ${par_buffer_whole_contig:+--buffer-whole-contig}
-
-
-exit 0
\ No newline at end of file
diff --git a/src/umi_tools/umi_tools_dedup/test.sh b/src/umi_tools/umi_tools_dedup/test.sh
deleted file mode 100644
index 1459ec08..00000000
--- a/src/umi_tools/umi_tools_dedup/test.sh
+++ /dev/null
@@ -1,49 +0,0 @@
-#!/bin/bash
-
-test_dir="${meta_resources_dir}/test_data"
-echo ">>> Testing $meta_functionality_name"
-
-"$meta_executable" \
-  --bam "$test_dir/a.sorted.bam" \
-  --bai "$test_dir/a.sorted.bam.bai" \
-  --output "$test_dir/a.sorted.idxstats"
-
-echo ">>> Checking whether output exists"
-[ ! -f "$test_dir/a.sorted.idxstats" ] && echo "File 'a.sorted.idxstats' does not exist!" && exit 1
-
-echo ">>> Checking whether output is non-empty"
-[ ! -s "$test_dir/a.sorted.idxstats" ] && echo "File 'a.sorted.idxstats' is empty!" && exit 1
-
-echo ">>> Checking whether output is correct"
-diff "$test_dir/a.sorted.idxstats" "$test_dir/a_ref.sorted.idxstats" || \
-    (echo "Output file a.sorted.idxstats does not match expected output" && exit 1)
-
-rm "$test_dir/a.sorted.idxstats"
-
-############################################################################################
-
-echo ">>> Testing $meta_functionality_name with singletons in the input"
-
-"$meta_executable" \
-  --bam "$test_dir/test.paired_end.sorted.bam" \
-  --bai "$test_dir/test.paired_end.sorted.bam.bai" \
-  --output "$test_dir/test.paired_end.sorted.idxstats"
-
-echo ">>> Checking whether output exists"
-[ ! -f "$test_dir/test.paired_end.sorted.idxstats" ] && \
-    echo "File 'test.paired_end.sorted.idxstats' does not exist!" && exit 1
-
-echo ">>> Checking whether output is non-empty"
-[ ! -s "$test_dir/test.paired_end.sorted.idxstats" ] && \
-    echo "File 'test.paired_end.sorted.idxstats' is empty!" && exit 1
-
-echo ">>> Checking whether output is correct"
-diff "$test_dir/test.paired_end.sorted.idxstats" "$test_dir/test_ref.paired_end.sorted.idxstats" || \
-    (echo "Output file test.paired_end.sorted.idxstats does not match expected output" && exit 1)
-
-rm "$test_dir/test.paired_end.sorted.idxstats"
-
-############################################################################################
-
-echo "All tests succeeded!"
-exit 0
\ No newline at end of file

From 1177dc1a82ad21513e0367ee4af7852059c0fd3f Mon Sep 17 00:00:00 2001
From: emmarousseau <emmarou1@icloud.com>
Date: Sat, 7 Sep 2024 17:04:27 +0200
Subject: [PATCH 3/6] test data, complete config, help, changelog update

---
 CHANGELOG.md                                  |   3 +
 src/kallisto/kallisto_index/config.vsh.yaml   |  89 ++++++++++++++++++
 src/kallisto/kallisto_index/help.txt          |  21 +++++
 src/kallisto/kallisto_index/k_index           | Bin 0 -> 4036 bytes
 src/kallisto/kallisto_index/script.sh         |  25 +++++
 src/kallisto/kallisto_index/test.sh           |  23 +++++
 .../kallisto_index/test_data/d_list.fasta     |   5 +
 .../test_data/transcriptome.fasta             |  23 +++++
 8 files changed, 189 insertions(+)
 create mode 100644 src/kallisto/kallisto_index/config.vsh.yaml
 create mode 100644 src/kallisto/kallisto_index/help.txt
 create mode 100644 src/kallisto/kallisto_index/k_index
 create mode 100644 src/kallisto/kallisto_index/script.sh
 create mode 100644 src/kallisto/kallisto_index/test.sh
 create mode 100644 src/kallisto/kallisto_index/test_data/d_list.fasta
 create mode 100644 src/kallisto/kallisto_index/test_data/transcriptome.fasta

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 98e78c17..1bec0a04 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -140,6 +140,9 @@
     - `bedtools_getfasta`: extract sequences from a FASTA file for each of the
                            intervals defined in a BED/GFF/VCF file (PR #59).
 
+* `kallisto`:
+    - `kallisto_index`: Create a kallisto index (PR #149).
+
 ## MINOR CHANGES
 
 * Uniformize component metadata (PR #23).
diff --git a/src/kallisto/kallisto_index/config.vsh.yaml b/src/kallisto/kallisto_index/config.vsh.yaml
new file mode 100644
index 00000000..3ae6241f
--- /dev/null
+++ b/src/kallisto/kallisto_index/config.vsh.yaml
@@ -0,0 +1,89 @@
+name: kallisto_index
+namespace: kallisto
+description: |
+  Build a Kallisto index for the transcriptome to use Kallisto in the mapping-based mode.
+keywords: [kallisto, index]
+links:
+  homepage: https://pachterlab.github.io/kallisto/about
+  documentation: https://pachterlab.github.io/kallisto/manual
+  repository: https://github.com/pachterlab/kallisto
+  issue_tracker: https://github.com/pachterlab/kallisto/issues
+references: 
+  doi: https://doi.org/10.1038/nbt.3519
+license: BSD 2-Clause License
+
+argument_groups:
+- name: "Input"
+  arguments: 
+  - name: "--input"
+    type: file
+    description: |
+      Path to a FASTA-file containing the transcriptome sequences, either in plain text or 
+      compressed (.gz) format.
+    required: true
+  - name: "--d_list"
+    type: file
+    description: |
+      Path to a FASTA-file containing sequences to mask from quantification.
+
+- name: "Output"
+  arguments:
+  - name: "--kallisto_index"
+    type: file
+    direction: output
+    must_exist: false
+    example: Kallisto_index
+
+- name: "Options"
+  arguments:
+  - name: "--kmer_size"
+    type: integer
+    description: |
+      Kmer length passed to indexing step of pseudoaligners (default: '31').
+    example: 31
+  - name: "--make_unique"
+    type: boolean_true
+    description: |
+      Replace repeated target names with unique names.
+  - name: "--aa"
+    type: boolean_true
+    description: |
+      Generate index from a FASTA-file containing amino acid sequences.
+  - name: "--distiguish"
+    type: boolean_true
+    description: |
+       Generate index where sequences are distinguished by the sequence names.
+  - name: "--min_size"
+    alternatives: ["-m"]
+    type: integer
+    description: |
+      Length of minimizers (default: automatically chosen).
+  - name: "--ec_max_size"
+    alternatives: ["-e"]
+    type: integer
+    description: |
+      Maximum number of targets in an equivalence class (default: no maximum).
+
+resources:
+  - type: bash_script
+    path: script.sh
+
+test_resources:
+  - type: bash_script
+    path: test.sh
+  - path: test_data
+
+engines:
+  - type: docker
+    image: ubuntu:22.04
+    setup:
+      - type: docker
+        run: |
+          apt-get update && \
+          apt-get install -y --no-install-recommends wget && \
+          wget --no-check-certificate https://github.com/pachterlab/kallisto/releases/download/v0.50.1/kallisto_linux-v0.50.1.tar.gz && \
+          tar -xzf kallisto_linux-v0.50.1.tar.gz && \
+          mv kallisto/kallisto /usr/local/bin/
+runners:
+  - type: executable
+  - type: nextflow  
diff --git a/src/kallisto/kallisto_index/help.txt b/src/kallisto/kallisto_index/help.txt
new file mode 100644
index 00000000..28778ac0
--- /dev/null
+++ b/src/kallisto/kallisto_index/help.txt
@@ -0,0 +1,21 @@
+```
+kallisto index
+```
+kallisto 0.50.1
+Builds a kallisto index
+
+Usage: kallisto index [arguments] FASTA-files
+
+Required argument:
+-i, --index=STRING          Filename for the kallisto index to be constructed 
+
+Optional argument:
+-k, --kmer-size=INT         k-mer (odd) length (default: 31, max value: 31)
+-t, --threads=INT           Number of threads to use (default: 1)
+-d, --d-list=STRING         Path to a FASTA-file containing sequences to mask from quantification
+    --make-unique           Replace repeated target names with unique names
+    --aa                    Generate index from a FASTA-file containing amino acid sequences
+    --distinguish           Generate index where sequences are distinguished by the sequence name
+-T, --tmp=STRING            Temporary directory (default: tmp)
+-m, --min-size=INT          Length of minimizers (default: automatically chosen)
+-e, --ec-max-size=INT       Maximum number of targets in an equivalence class (default: no maximum)
diff --git a/src/kallisto/kallisto_index/k_index b/src/kallisto/kallisto_index/k_index
new file mode 100644
index 0000000000000000000000000000000000000000..cbb91fd2dafbbdbb7bc1a7a942bf09d8ce3c4ad6
GIT binary patch
literal 4036
zcmeHK-;W#B6}}#io$Iv;vt}E1)3m+*k!V{Fwn1qKLM}62lMF1Xyo+f-qRmn*C{n1D
z_6b$E@=UzcS<<Y7`Viih2&tiU<1o-4vMPBiMabYPibi>;whvz5(Nfh)mX|(E)$fjH
zDBk=~{(y{T=H5B?obP_;JI7=DJ~BGa1xBL}!xP72zxWo$2XXBhnaL~esI}v^Ii$vb
znk;Ox?q+kom*|><j!@*GINWEG&G)voH92g`9k^P}t}*CXnT{#jhFoqBv1G3f$_U;}
zbX&k`q^#!rn%vn;<g5go=i7{$gPbYD_|FlvhizqSqoMUOVRHa5+hlet?L-V<Ng22g
zu_v{fcmzq?804(`Z@3t;)@J3kLEn)3EpTjhL8{d_*&cvV*J|1=S?g_srnbhqEjesu
zv|hq~Q5Z$L!CiwpfxpJXIMs5y(Kj#KrVMGcUK1&FkTUoqYn<lLNlGsFwy|;8O5~i>
zAV}ES#!`;Um9RH6)Jl{g!YTt!7t$Gg0u6HpG)z!~<*aQ80-UfR!x^MOR24_`wDx7&
zU?AMFus#6)zSTV09(G%UK6EhCb_+#7$uf3}K_cW~TZz7fZ8P?&oHAI-90IruoK_rp
z^f#Mu=x%0gtBFLzW*p7{??4$|&jbovZp+k6bFEAmQB@!#V^lJeG8*M}8kWA<7*Wod
zkU9<m&hCSgy=DiNwE>8AjRuUJO;{L;nBvU>=b=;IA(zTbcUGY&(D-R{%Mg5Wm>7+e
zgTp(>N+45e$T?GJxOLOoY4C;6{T4ElQz%tSMi)R(6sZh7Hk-WotGupVYa5n6ieWHR
z0`T2c2Kv|>!tf48e;ExV4FuFIXEYE?A-Or6Z>Q1I)NJ&v?XH>buQnhmYQ~eWGUYZz
z&753=VCX^UH01fUE<-A4fXbBD8t%u494v<i579p<Zrc)W|6%U+1HH&ULaAdPeCmBN
zJ%7*db2sIk_H6LqSW!eF5Hwh%%2-HviOp)dLaSLpm9#)U8(&;sVs%L^=<2dcpE;4I
zLA|m?g_&esSx~Fe1?6lpR{WPf9a&{!D5%i*Y+hx0T+-@FU5U(`7iaPs^+_@DNii5(
zlujp;Nt&<eD@+sf)A_nW2g(ES*Ywbo7^64Dz2ZV`^4s|$JG&mpzr|wNg8uZ?b?}ic
z#1ABDio{ZNVMU3G@j^0LrE2Z1qOM$_qFSV)E=8t9o#ulFmX|_j^k_OM8W)+AP^HMe
z$<Brl+cz86)@eYM=#21<S?M!6QIL`snGu_aOHUbMK2JjzH|i{8Dak90Mqerf3`vX5
z#)QYDxGv3VQ|~e@Ulm@^>9Q2hMi*I<8o~?>?0GntpLlzsdWjkINI?w7r71BU+*_S|
zQ<@YW_<pkZj>5!6@yB#`A+PzLl4{idwvwkptj2VmeMfqDIYymY@_Oy{isp`9&Af3c
zIOd?t;L)qEHxk!}ukqvgWp+;5I;@avi5{7YTp)p<->5$?oM0cTTc^9bHv;)fe?4>R
z;OW}WDhGX{;NQ4>L8(Z}gA-L!*<Ybg&`ZU|pZZ^W{ktT-Qh86=vaT4WqW*dJyY*=D
z1Vepa{6qF@Te0YI$tUef2-@dW#rK(SrSkQq@T<T4{l_b0vZU0`6INIdNQj8<iPfvW
zTdWhMun~*APyR!y7pIR%6(#iM={<+`RL}lKjm6So-+y2I+au30-_+xyEC_L5Y2G9G
zl{-7^igV2!D{jAs-SsHpEqH2*?0VU!5PIdB;iqJ3sSycZ7iX90{!c1#RiVD*f;jEd
zKMJi&SBa+VBb6tyq3=WT>u<g>8}bopnvlPUr+(yBf)IBb;_a(?2G_;?0wwH2?YGy?
zOdpc#!qUN-n92U+i-1CuXEzq~FE071?j__QA9ryo?t<SPE7Ce)Rm$|g2Y<l!2kSfI
zK5*j{cfq3*HIiig#wl&5|MlDW6E?22$LrJE`Fp_b==pp7sJM2I&kpx5d;gyQPGv^V
z_w*d;aIb%E-48pv#_JdAE~rP<xl*Zg^r$ycF^2_@{Cw1PbHeybBIFr7AaA^Bq2wTs
zpfDeu|9|AjJl2VS1oxW&Zi&n}+!=X}BlC7};;&<JcgR17KWF)+5TnV6G49ai4S?|%
zuJF}v+jG}D%p;6@{}I=HxcK4opg3D^K1P^FjTv~@or$WPD1T3n5j_vX-ES)((Xnmc
zK{$u_-cCN8IluUEQ-r7H(NSBC^yQs3>JDBj{#EG2CqQ`&fa`HfWZ219kX<!gOs9FB
zF5tg)ocMD)IrA#s7I{tuz0zFC`2_Gu$+KWmnkyof|D1N>`R$IPjx9bdkc!VwDq~r$
z6H?*DdLN{05aoZ*_&*WvC;JQPGucCr9rI$G$HT`SQpeU0&yB4=iuL<}$&bgg4p+ZJ
H*iZfk8k`iG

literal 0
HcmV?d00001

diff --git a/src/kallisto/kallisto_index/script.sh b/src/kallisto/kallisto_index/script.sh
new file mode 100644
index 00000000..b16e2781
--- /dev/null
+++ b/src/kallisto/kallisto_index/script.sh
@@ -0,0 +1,25 @@
+#!/bin/bash
+
+## VIASH START
+## VIASH END
+
+set -eo pipefail
+
+if [ -n "$par_kmer_size" ]; then
+    if [[ "$par_kmer_size" -lt 1 || "$par_kmer_size" -gt 31 || $(( par_kmer_size % 2 )) -eq 0 ]]; then
+        echo "Error: Kmer size must be an odd number between 1 and 31."
+        exit 1
+    fi
+fi
+
+kallisto index \
+    -i "${par_kallisto_index}" \
+    ${par_kmer_size:+--kmer-size $par_kmer_size} \
+    ${par_make_unique:+--make-unique} \
+    ${par_aa:+--aa} \
+    ${par_distinguish:+--distinguish} \
+    ${par_min_size:+--min-size $par_min_size} \
+    ${par_ec_max_size:+--ec-max-size $par_ec_max_size} \
+    ${par_d_list:+--d-list "${par_d_list}"} \
+    "${par_input}"
+
diff --git a/src/kallisto/kallisto_index/test.sh b/src/kallisto/kallisto_index/test.sh
new file mode 100644
index 00000000..67869b99
--- /dev/null
+++ b/src/kallisto/kallisto_index/test.sh
@@ -0,0 +1,23 @@
+#!/bin/bash
+
+echo ">>>Test1: Testing $meta_functionality_name"
+
+"$meta_executable" \
+  --input "$meta_resources_dir/test_data/transcriptome.fasta" \
+  --kallisto_index Kallisto \
+  --kmer_size 21 \
+  --make_unique 
+
+echo ">>> Checking whether output exists"
+[ ! -f "Kallisto" ] && echo "Kallisto index does not exist!" && exit 1
+[ ! -s "Kallisto" ] && echo "Kallisto index is empty!" && exit 1
+
+echo ">>>Test2: Testing $meta_functionality_name"
+
+"$meta_executable" \
+  --input "$meta_resources_dir/test_data/transcriptome.fasta" \
+  --kallisto_index Kallisto \
+  --d_list "$meta_resources_dir/test_data/d_list.fasta"
+
+echo "All tests succeeded!"
+exit 0
diff --git a/src/kallisto/kallisto_index/test_data/d_list.fasta b/src/kallisto/kallisto_index/test_data/d_list.fasta
new file mode 100644
index 00000000..ad5e05bf
--- /dev/null
+++ b/src/kallisto/kallisto_index/test_data/d_list.fasta
@@ -0,0 +1,5 @@
+>YAL067W-A CDS=1-228
+ATGCCAATTATAGGGGTGCCGAGGTGCCTTATAAAACCCTTTTCTGTGCCTGTGACATTTCCTTTTTCGG
+TCAAAAAGAATATCCGAATTTTAGATTTGGACCCTCGTACAGAAGCTTATTGTCTAAGCCTGAATTCAGT
+CTGCTTTAAACGGCTTCCGCGGAGGAAATATTTCCATCTCTTGAATTCGTACAACATTAAACGTGTGTTG
+GGAGTCGTATACTGTTAG
diff --git a/src/kallisto/kallisto_index/test_data/transcriptome.fasta b/src/kallisto/kallisto_index/test_data/transcriptome.fasta
new file mode 100644
index 00000000..94c06163
--- /dev/null
+++ b/src/kallisto/kallisto_index/test_data/transcriptome.fasta
@@ -0,0 +1,23 @@
+>YAL069W CDS=1-315
+ATGATCGTAAATAACACACACGTGCTTACCCTACCACTTTATACCACCACCACATGCCATACTCACCCTC
+ACTTGTATACTGATTTTACGTACGCACACGGATGCTACAGTATATACCATCTCAAACTTACCCTACTCTC
+AGATTCCACTTCACTCCATGGCCCATCTCTCACTGAATCAGTACCAAATGCACTCACATCATTATGCACG
+GCACTTGCCTCAGCGGTCTATACCCTGTGCCATTTACCCATAACGCCCATCATTATCCACATTTTGATAT
+CTATATCTCATTCGGCGGTCCCAAATATTGTATAA
+>YAL068W-A CDS=1-255
+ATGCACGGCACTTGCCTCAGCGGTCTATACCCTGTGCCATTTACCCATAACGCCCATCATTATCCACATT
+TTGATATCTATATCTCATTCGGCGGTCCCAAATATTGTATAACTGCCCTTAATACATACGTTATACCACT
+TTTGCACCATATACTTACCACTCCATTTATATACACTTATGTCAATATTACAGAAAAATCCCCACAAAAA
+TCACCTAAACATAAAAATATTCTACTTTTCAACAATAATACATAA
+>YAL068C CDS=1-363
+ATGGTCAAATTAACTTCAATCGCCGCTGGTGTCGCTGCCATCGCTGCTACTGCTTCTGCAACCACCACTC
+TAGCTCAATCTGACGAAAGAGTCAACTTGGTGGAATTGGGTGTCTACGTCTCTGATATCAGAGCTCACTT
+AGCCCAATACTACATGTTCCAAGCCGCCCACCCAACTGAAACCTACCCAGTCGAAGTTGCTGAAGCCGTT
+TTCAACTACGGTGACTTCACCACCATGTTGACCGGTATTGCTCCAGACCAAGTGACCAGAATGATCACCG
+GTGTTCCATGGTACTCCAGCAGATTAAAGCCAGCCATCTCCAGTGCTCTATCCAAGGACGGTATCTACAC
+TATCGCAAACTAG
+>YAL067W-A CDS=1-228
+ATGCCAATTATAGGGGTGCCGAGGTGCCTTATAAAACCCTTTTCTGTGCCTGTGACATTTCCTTTTTCGG
+TCAAAAAGAATATCCGAATTTTAGATTTGGACCCTCGTACAGAAGCTTATTGTCTAAGCCTGAATTCAGT
+CTGCTTTAAACGGCTTCCGCGGAGGAAATATTTCCATCTCTTGAATTCGTACAACATTAAACGTGTGTTG
+GGAGTCGTATACTGTTAG
\ No newline at end of file

From 41960e3b4eba025794e2097b2eefa70f14edbf56 Mon Sep 17 00:00:00 2001
From: emmarousseau <emmarou1@icloud.com>
Date: Sun, 8 Sep 2024 16:17:27 +0200
Subject: [PATCH 4/6] check test output contents

---
 src/kallisto/kallisto_index/test.sh | 15 +++++++++++++--
 1 file changed, 13 insertions(+), 2 deletions(-)

diff --git a/src/kallisto/kallisto_index/test.sh b/src/kallisto/kallisto_index/test.sh
index 67869b99..40ed0aab 100644
--- a/src/kallisto/kallisto_index/test.sh
+++ b/src/kallisto/kallisto_index/test.sh
@@ -1,6 +1,6 @@
 #!/bin/bash
 
-echo ">>>Test1: Testing $meta_functionality_name"
+echo ">>>Test1: Testing $meta_functionality_name with make_unique argument"
 
 "$meta_executable" \
   --input "$meta_resources_dir/test_data/transcriptome.fasta" \
@@ -8,16 +8,27 @@ echo ">>>Test1: Testing $meta_functionality_name"
   --kmer_size 21 \
   --make_unique 
 
+
 echo ">>> Checking whether output exists"
 [ ! -f "Kallisto" ] && echo "Kallisto index does not exist!" && exit 1
 [ ! -s "Kallisto" ] && echo "Kallisto index is empty!" && exit 1
 
-echo ">>>Test2: Testing $meta_functionality_name"
+kallisto inspect Kallisto 2> test.txt
+grep "number of k-mers: 2,978" test.txt || { echo "The content of the index seems to be incorrect." && exit 1; }
+
+echo ">>>Test2: Testing $meta_functionality_name with d_list argument"
 
 "$meta_executable" \
   --input "$meta_resources_dir/test_data/transcriptome.fasta" \
   --kallisto_index Kallisto \
   --d_list "$meta_resources_dir/test_data/d_list.fasta"
 
+echo ">>> Checking whether output exists"
+[ ! -f "Kallisto" ] && echo "Kallisto index does not exist!" && exit 1
+[ ! -s "Kallisto" ] && echo "Kallisto index is empty!" && exit 1
+
+kallisto inspect Kallisto 2> test.txt
+grep "number of k-mers: 3,056" test.txt || { echo "The content of the index seems to be incorrect." && exit 1; }
+
 echo "All tests succeeded!"
 exit 0

From f75d6db97e78792d3d9e7fd3299654abf6327050 Mon Sep 17 00:00:00 2001
From: emmarousseau <emmarou1@icloud.com>
Date: Sun, 8 Sep 2024 17:38:34 +0200
Subject: [PATCH 5/6] remove extra files and clean up test script

---
 src/kallisto/kallisto_index/k_index | Bin 4036 -> 0 bytes
 src/kallisto/kallisto_index/test.sh |  11 ++++++-----
 2 files changed, 6 insertions(+), 5 deletions(-)
 delete mode 100644 src/kallisto/kallisto_index/k_index

diff --git a/src/kallisto/kallisto_index/k_index b/src/kallisto/kallisto_index/k_index
deleted file mode 100644
index cbb91fd2dafbbdbb7bc1a7a942bf09d8ce3c4ad6..0000000000000000000000000000000000000000
GIT binary patch
literal 0
HcmV?d00001

literal 4036
zcmeHK-;W#B6}}#io$Iv;vt}E1)3m+*k!V{Fwn1qKLM}62lMF1Xyo+f-qRmn*C{n1D
z_6b$E@=UzcS<<Y7`Viih2&tiU<1o-4vMPBiMabYPibi>;whvz5(Nfh)mX|(E)$fjH
zDBk=~{(y{T=H5B?obP_;JI7=DJ~BGa1xBL}!xP72zxWo$2XXBhnaL~esI}v^Ii$vb
znk;Ox?q+kom*|><j!@*GINWEG&G)voH92g`9k^P}t}*CXnT{#jhFoqBv1G3f$_U;}
zbX&k`q^#!rn%vn;<g5go=i7{$gPbYD_|FlvhizqSqoMUOVRHa5+hlet?L-V<Ng22g
zu_v{fcmzq?804(`Z@3t;)@J3kLEn)3EpTjhL8{d_*&cvV*J|1=S?g_srnbhqEjesu
zv|hq~Q5Z$L!CiwpfxpJXIMs5y(Kj#KrVMGcUK1&FkTUoqYn<lLNlGsFwy|;8O5~i>
zAV}ES#!`;Um9RH6)Jl{g!YTt!7t$Gg0u6HpG)z!~<*aQ80-UfR!x^MOR24_`wDx7&
zU?AMFus#6)zSTV09(G%UK6EhCb_+#7$uf3}K_cW~TZz7fZ8P?&oHAI-90IruoK_rp
z^f#Mu=x%0gtBFLzW*p7{??4$|&jbovZp+k6bFEAmQB@!#V^lJeG8*M}8kWA<7*Wod
zkU9<m&hCSgy=DiNwE>8AjRuUJO;{L;nBvU>=b=;IA(zTbcUGY&(D-R{%Mg5Wm>7+e
zgTp(>N+45e$T?GJxOLOoY4C;6{T4ElQz%tSMi)R(6sZh7Hk-WotGupVYa5n6ieWHR
z0`T2c2Kv|>!tf48e;ExV4FuFIXEYE?A-Or6Z>Q1I)NJ&v?XH>buQnhmYQ~eWGUYZz
z&753=VCX^UH01fUE<-A4fXbBD8t%u494v<i579p<Zrc)W|6%U+1HH&ULaAdPeCmBN
zJ%7*db2sIk_H6LqSW!eF5Hwh%%2-HviOp)dLaSLpm9#)U8(&;sVs%L^=<2dcpE;4I
zLA|m?g_&esSx~Fe1?6lpR{WPf9a&{!D5%i*Y+hx0T+-@FU5U(`7iaPs^+_@DNii5(
zlujp;Nt&<eD@+sf)A_nW2g(ES*Ywbo7^64Dz2ZV`^4s|$JG&mpzr|wNg8uZ?b?}ic
z#1ABDio{ZNVMU3G@j^0LrE2Z1qOM$_qFSV)E=8t9o#ulFmX|_j^k_OM8W)+AP^HMe
z$<Brl+cz86)@eYM=#21<S?M!6QIL`snGu_aOHUbMK2JjzH|i{8Dak90Mqerf3`vX5
z#)QYDxGv3VQ|~e@Ulm@^>9Q2hMi*I<8o~?>?0GntpLlzsdWjkINI?w7r71BU+*_S|
zQ<@YW_<pkZj>5!6@yB#`A+PzLl4{idwvwkptj2VmeMfqDIYymY@_Oy{isp`9&Af3c
zIOd?t;L)qEHxk!}ukqvgWp+;5I;@avi5{7YTp)p<->5$?oM0cTTc^9bHv;)fe?4>R
z;OW}WDhGX{;NQ4>L8(Z}gA-L!*<Ybg&`ZU|pZZ^W{ktT-Qh86=vaT4WqW*dJyY*=D
z1Vepa{6qF@Te0YI$tUef2-@dW#rK(SrSkQq@T<T4{l_b0vZU0`6INIdNQj8<iPfvW
zTdWhMun~*APyR!y7pIR%6(#iM={<+`RL}lKjm6So-+y2I+au30-_+xyEC_L5Y2G9G
zl{-7^igV2!D{jAs-SsHpEqH2*?0VU!5PIdB;iqJ3sSycZ7iX90{!c1#RiVD*f;jEd
zKMJi&SBa+VBb6tyq3=WT>u<g>8}bopnvlPUr+(yBf)IBb;_a(?2G_;?0wwH2?YGy?
zOdpc#!qUN-n92U+i-1CuXEzq~FE071?j__QA9ryo?t<SPE7Ce)Rm$|g2Y<l!2kSfI
zK5*j{cfq3*HIiig#wl&5|MlDW6E?22$LrJE`Fp_b==pp7sJM2I&kpx5d;gyQPGv^V
z_w*d;aIb%E-48pv#_JdAE~rP<xl*Zg^r$ycF^2_@{Cw1PbHeybBIFr7AaA^Bq2wTs
zpfDeu|9|AjJl2VS1oxW&Zi&n}+!=X}BlC7};;&<JcgR17KWF)+5TnV6G49ai4S?|%
zuJF}v+jG}D%p;6@{}I=HxcK4opg3D^K1P^FjTv~@or$WPD1T3n5j_vX-ES)((Xnmc
zK{$u_-cCN8IluUEQ-r7H(NSBC^yQs3>JDBj{#EG2CqQ`&fa`HfWZ219kX<!gOs9FB
zF5tg)ocMD)IrA#s7I{tuz0zFC`2_Gu$+KWmnkyof|D1N>`R$IPjx9bdkc!VwDq~r$
z6H?*DdLN{05aoZ*_&*WvC;JQPGucCr9rI$G$HT`SQpeU0&yB4=iuL<}$&bgg4p+ZJ
H*iZfk8k`iG

diff --git a/src/kallisto/kallisto_index/test.sh b/src/kallisto/kallisto_index/test.sh
index 40ed0aab..bd8ace10 100644
--- a/src/kallisto/kallisto_index/test.sh
+++ b/src/kallisto/kallisto_index/test.sh
@@ -1,21 +1,22 @@
 #!/bin/bash
 
-echo ">>>Test1: Testing $meta_functionality_name with make_unique argument"
+echo ">>>Test1: Testing $meta_functionality_name with non-default k-mer size"
 
 "$meta_executable" \
   --input "$meta_resources_dir/test_data/transcriptome.fasta" \
   --kallisto_index Kallisto \
-  --kmer_size 21 \
-  --make_unique 
+  --kmer_size 21
 
 
-echo ">>> Checking whether output exists"
+echo ">>> Checking whether output exists and is correct"
 [ ! -f "Kallisto" ] && echo "Kallisto index does not exist!" && exit 1
 [ ! -s "Kallisto" ] && echo "Kallisto index is empty!" && exit 1
 
 kallisto inspect Kallisto 2> test.txt
 grep "number of k-mers: 2,978" test.txt || { echo "The content of the index seems to be incorrect." && exit 1; }
 
+################################################################################
+
 echo ">>>Test2: Testing $meta_functionality_name with d_list argument"
 
 "$meta_executable" \
@@ -23,7 +24,7 @@ echo ">>>Test2: Testing $meta_functionality_name with d_list argument"
   --kallisto_index Kallisto \
   --d_list "$meta_resources_dir/test_data/d_list.fasta"
 
-echo ">>> Checking whether output exists"
+echo ">>> Checking whether output exists and is correct"
 [ ! -f "Kallisto" ] && echo "Kallisto index does not exist!" && exit 1
 [ ! -s "Kallisto" ] && echo "Kallisto index is empty!" && exit 1
 

From ece9dbba3f4faa1b3bee4e58ce877eb759f111f6 Mon Sep 17 00:00:00 2001
From: emmarousseau <emmarou1@icloud.com>
Date: Wed, 11 Sep 2024 20:26:45 +0200
Subject: [PATCH 6/6] unset bool arguments, add missing arguments to script

---
 src/kallisto/kallisto_index/Kallisto        | Bin 0 -> 2439 bytes
 src/kallisto/kallisto_index/config.vsh.yaml |   9 +++++++--
 src/kallisto/kallisto_index/script.sh       |  17 +++++++++++++----
 src/kallisto/kallisto_index/test.sh         |  12 ++++++------
 4 files changed, 26 insertions(+), 12 deletions(-)
 create mode 100644 src/kallisto/kallisto_index/Kallisto

diff --git a/src/kallisto/kallisto_index/Kallisto b/src/kallisto/kallisto_index/Kallisto
new file mode 100644
index 0000000000000000000000000000000000000000..3c7b5b2bff962965d99ca3f9a4a6b6af6da1f3f0
GIT binary patch
literal 2439
zcmeHJTTBx{6rFCj(iW7(R4PH@f{iwcTCEBOksY_VO)N&jMkQb@MkU0z5<nES+iDtq
ztcrfC@k>b%pZJItq>6y3i5QJ<W1@)>C02jb;7mKSNlWm{Po|xmnK|d)x%cj5cE^Hf
zo5Ms=gP>q-=Dx`Y&8Tam%b*(*sAYc)aw*sH`%Qmb#irI!wJd#g^%N{jJ942Y*B;7v
zR8myiWp1c$UHLL&=A`AC^o$*g(yPKxE9&k2TL(+#tPc&Aci!%O@N%%KUe}u^mgT!#
zi%Y`WO!oXjbAkQw#f0xZP+kZo&F8eXH{r?boldZcY~7r&DBNCpW#D;g<%>%V>88S*
zn(fP&_poKg&FeY4ul8ovgjcuEYhCLuEry9t<T_<r;)x-6Y*vFQbgQZF#X(b>sbgo;
zfcbNEMVB@;wf0@jF3-`vqIs3Vln>G5z~*-Q*|O7R4?+L_8<&^;{;|Iv=yw^0x?1Fq
zKKH{t-pk+KZ7B%++|bB;H`mVIyPQk@FmSG}@6usnuL2HvP5$1gzh1Rfd<xy4PRpnY
z$lwb>lA=oM8O*p9pU%UxMoXtZa4MiI;e6lN9)Zj9SsczoL4F-nxW<!$M?XHx=`41$
zNSX;JB>7gjS;lL#ITXLZTe7Tm{6y<^@7*BFcmyOu`fe-N>GMM_MNm8n3s15~-A-du
zs>S(M$mE>7gVQsBMnsW@M&}f>b(D#qkcNO}MG=t0Wgt?+7*11VW2Q|{Q<m5`y=8^|
z9+;~?zuCji5Og|U(_<Ox<qoL${owUfYj%<xv>bnyveDt-4O(8?_>}HQ+|nsHK}JD>
zEI}_&#snCR!wU`=ImWmY2!IuUp@YzBuGIbjA?Q;}NCbS6Ej{38{Q(Ed(9~7CHlh~@
z(zu?LfHTHk>o~Hk>ib8~11>`F@%qmr>8X$)4UE=ZAnP=qIJp|ns6M_j(fMdSqjeZP
zKmX@E#Gf*HzUVyzWeLinBtr;M7o$H(mPA>GqA1d9hM{`wuLpH{uWG16io*!3&Oq!~
zY>JwOK3Z%+$HT0KEnpYTsK*f41@5@T5O@KlCdndB3}+_eA<9%le@sX@NTS*o#e2qq
z{mZk6y~(I%5^AViqJ%~m(IWP&+TTT!n9y(~NA!$eA9;u!LWn;?@K-_t>bR9cmu<nq
hRn%Ezn!9QyjNx;|7(NH_$x(BVXNzpCjqI)ke*rf>5}*J8

literal 0
HcmV?d00001

diff --git a/src/kallisto/kallisto_index/config.vsh.yaml b/src/kallisto/kallisto_index/config.vsh.yaml
index 3ae6241f..2c4f65c7 100644
--- a/src/kallisto/kallisto_index/config.vsh.yaml
+++ b/src/kallisto/kallisto_index/config.vsh.yaml
@@ -28,10 +28,9 @@ argument_groups:
 
 - name: "Output"
   arguments:
-  - name: "--kallisto_index"
+  - name: "--index"
     type: file
     direction: output
-    must_exist: false
     example: Kallisto_index
 
 - name: "Options"
@@ -63,6 +62,12 @@ argument_groups:
     type: integer
     description: |
       Maximum number of targets in an equivalence class (default: no maximum).
+  - name: "--tmp"
+    alternatives: ["-T"]
+    type: string
+    description: |
+      Path to a directory for temporary files.
+    example: "tmp"
 
 resources:
   - type: bash_script
diff --git a/src/kallisto/kallisto_index/script.sh b/src/kallisto/kallisto_index/script.sh
index b16e2781..59a5d3de 100644
--- a/src/kallisto/kallisto_index/script.sh
+++ b/src/kallisto/kallisto_index/script.sh
@@ -5,6 +5,13 @@
 
 set -eo pipefail
 
+unset_if_false=( par_make_unique par_aa par_distinguish )
+
+for var in "${unset_if_false[@]}"; do
+    temp_var="${!var}"
+    [[ "$temp_var" == "false" ]] && unset $var
+done
+
 if [ -n "$par_kmer_size" ]; then
     if [[ "$par_kmer_size" -lt 1 || "$par_kmer_size" -gt 31 || $(( par_kmer_size % 2 )) -eq 0 ]]; then
         echo "Error: Kmer size must be an odd number between 1 and 31."
@@ -13,13 +20,15 @@ if [ -n "$par_kmer_size" ]; then
 fi
 
 kallisto index \
-    -i "${par_kallisto_index}" \
-    ${par_kmer_size:+--kmer-size $par_kmer_size} \
+    -i "${par_index}" \
+    ${par_kmer_size:+--kmer-size "${par_kmer_size}"} \
     ${par_make_unique:+--make-unique} \
     ${par_aa:+--aa} \
     ${par_distinguish:+--distinguish} \
-    ${par_min_size:+--min-size $par_min_size} \
-    ${par_ec_max_size:+--ec-max-size $par_ec_max_size} \
+    ${par_min_size:+--min-size "${par_min_size}"} \
+    ${par_ec_max_size:+--ec-max-size "${par_ec_max_size}"} \
     ${par_d_list:+--d-list "${par_d_list}"} \
+    ${meta_cpus:+--cpu "${meta_cpus}"} \
+    ${par_tmp:+--tmp "${par_tmp}"} \
     "${par_input}"
 
diff --git a/src/kallisto/kallisto_index/test.sh b/src/kallisto/kallisto_index/test.sh
index bd8ace10..2646dcd8 100644
--- a/src/kallisto/kallisto_index/test.sh
+++ b/src/kallisto/kallisto_index/test.sh
@@ -1,10 +1,10 @@
 #!/bin/bash
 
-echo ">>>Test1: Testing $meta_functionality_name with non-default k-mer size"
+echo ">>>Test 1: Testing $meta_functionality_name with non-default k-mer size"
 
 "$meta_executable" \
   --input "$meta_resources_dir/test_data/transcriptome.fasta" \
-  --kallisto_index Kallisto \
+  --index Kallisto \
   --kmer_size 21
 
 
@@ -13,15 +13,15 @@ echo ">>> Checking whether output exists and is correct"
 [ ! -s "Kallisto" ] && echo "Kallisto index is empty!" && exit 1
 
 kallisto inspect Kallisto 2> test.txt
-grep "number of k-mers: 2,978" test.txt || { echo "The content of the index seems to be incorrect." && exit 1; }
+grep "number of k-mers: 989" test.txt || { echo "The content of the index seems to be incorrect." && exit 1; }
 
 ################################################################################
 
-echo ">>>Test2: Testing $meta_functionality_name with d_list argument"
+echo ">>>Test 2: Testing $meta_functionality_name with d_list argument"
 
 "$meta_executable" \
   --input "$meta_resources_dir/test_data/transcriptome.fasta" \
-  --kallisto_index Kallisto \
+  --index Kallisto \
   --d_list "$meta_resources_dir/test_data/d_list.fasta"
 
 echo ">>> Checking whether output exists and is correct"
@@ -29,7 +29,7 @@ echo ">>> Checking whether output exists and is correct"
 [ ! -s "Kallisto" ] && echo "Kallisto index is empty!" && exit 1
 
 kallisto inspect Kallisto 2> test.txt
-grep "number of k-mers: 3,056" test.txt || { echo "The content of the index seems to be incorrect." && exit 1; }
+grep "number of k-mers: 959" test.txt || { echo "The content of the index seems to be incorrect." && exit 1; }
 
 echo "All tests succeeded!"
 exit 0