Skip to content

Commit

Permalink
Merge pull request #26 from viash-hub/dev/cutadapt_pr
Browse files Browse the repository at this point in the history
Propose changes to cutadapt
  • Loading branch information
rcannood authored Feb 27, 2024
2 parents 9f248c4 + 8370251 commit 35e7f3c
Show file tree
Hide file tree
Showing 7 changed files with 284 additions and 192 deletions.
7 changes: 5 additions & 2 deletions src/cutadapt/config.vsh.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -437,14 +437,17 @@ functionality:
# - name: --untrimmed_paired_output
# - name: too_short_paired_output
# - name: too_long_paired_output
- name: Debug
arguments:
- type: boolean_true
name: --debug
description: Print debug information
resources:
- type: bash_script
path: script.sh
test_resources:
- type: bash_script
path: test.sh
- type: file
path: test_data
platforms:
- type: docker
image: python:3.12
Expand Down
213 changes: 101 additions & 112 deletions src/cutadapt/script.sh
Original file line number Diff line number Diff line change
@@ -1,16 +1,30 @@
#!/bin/bash

## VIASH START
par_adapter='AGATCGGAAGAGCACACGTCTGAACTCCAGTCAC;GGATCGGAAGAGCACACGTCTGAACTCCAGTCAC'
par_input='src/cutadapt/test_data/se/a.fastq'
par_report='full'
par_json='false'
par_output='output'
par_fasta='false'
par_info_file='false'
par_debug='true'
## VIASH END

# TODO: change this?
if [ -z $par_output ]; then
par_output=.
else
mkdir -p "$par_output"
fi

function debug {
[[ "$par_debug" == "true" ]] && echo "DEBUG: $@"
}

# Init
###########################################################
echo "Running cutadapt"
echo

echo ">> Paired-end data or not?"

mode=""
Expand All @@ -29,85 +43,45 @@ fi
# - string and fasta
###########################################################

multi_adapter=""
for adapter in `echo $par_adapter | tr ':' ' '`; do
multi_adapter="$multi_adapter --adapter $adapter"
done

multi_adapter_fasta=""
for adapter_fasta in `echo $par_adapter_fasta | tr ':' ' '`; do
multi_adapter_fasta="$multi_adapter_fasta --adapter file:$adapter_fasta"
done

multi_adapter_r2=""
for adapter_r2 in `echo $par_adapter_r2 | tr ':' ' '`; do
multi_adapter_r2="$multi_adapter_r2 --adapter_r2 $adapter_r2"
done

multi_adapter_fasta_r2=""
for adapter_fasta_r2 in `echo $par_adapter_fasta_r2 | tr ':' ' '`; do
multi_adapter_fasta_r2="$multi_adapter_fasta_r2 --adapter file:$adapter_fasta_r2"
done

multi_front=""
for front in `echo $par_front | tr ':' ' '`; do
multi_front="$multi_front --front $front"
done

multi_front_fasta=""
for front_fasta in `echo $par_front_fasta | tr ':' ' '`; do
multi_front_fasta="$multi_front_fasta --front file:$front_fasta"
done

multi_front_r2=""
for front_r2 in `echo $par_front_r2 | tr ':' ' '`; do
multi_front_r2="$multi_front_r2 --front_r2 $front_r2"
done

multi_front_fasta_r2=""
for front_fasta_r2 in `echo $par_front_fasta_r2 | tr ':' ' '`; do
multi_front_fasta_r2="$multi_front_fasta_r2 --front file:$front_fasta_r2"
done

multi_anywhere=""
for anywhere in `echo $par_anywhere | tr ':' ' '`; do
multi_anywhere="$multi_anywhere --anywhere $anywhere"
done

multi_anywhere_fasta=""
for anywhere_fasta in `echo $par_anywhere_fasta | tr ':' ' '`; do
multi_anywhere_fasta="$multi_anywhere_fasta --anywhere file:$anywhere_fasta"
done

multi_anywhere_r2=""
for anywhere_r2 in `echo $par_anywhere_r2 | tr ':' ' '`; do
multi_anywhere_r2="$multi_anywhere_r2 --anywhere_r2 $anywhere_r2"
done

multi_anywhere_fasta_r2=""
for anywhere_fasta_r2 in `echo $par_anywhere_fasta_r2 | tr ':' ' '`; do
multi_anywhere_fasta_r2="$multi_anywhere_fasta_r2 --anywhere file:$anywhere_fasta_r2"
done

echo ">> Parsing arguments dealing with adapters"
function add_flags {
local arg=$1
local flag=$2
local prefix=$3
[[ -z $prefix ]] && prefix=""

# This function should not be called if the input is empty
# but check for it just in case
if [[ -z $arg ]]; then
return
fi

local output=""
IFS=';' read -r -a array <<< "$arg"
for a in "${array[@]}"; do
output="$output $flag $prefix$a"
done
echo $output
}

debug ">> Parsing arguments dealing with adapters"
adapter_args=$(echo \
${par_adapter:+${multi_adapter}} \
${par_adapter_fasta:+${multi_adapter_fasta}} \
${par_front:+${multi_front}} \
${par_front_fasta:+${multi_front_fasta}} \
${par_anywhere:+${multi_anywhere}} \
${par_anywhere_fasta:+${multi_anywhere_fasta}} \
${par_adapter_r2:+${multi_adapter_r2}} \
${par_adapter_fasta_r2:+${multi_adapter_fasta_r2}} \
${par_front_r2:+${multi_front_r2}} \
${par_front_fasta_r2:+${multi_front_fasta_r2}} \
${par_anywhere_r2:+${multi_anywhere_r2}} \
${par_anywhere_fasta_r2:+${multi_anywhere_fasta_r2}} \
${par_adapter:+$(add_flags "$par_adapter" "--adapter")} \
${par_adapter_fasta:+$(add_flags "$par_adapter_fasta" "--adapter" "file:")} \
${par_front:+$(add_flags "$par_front" "--front")} \
${par_front_fasta:+$(add_flags "$par_front_fasta" "--front" "file:")} \
${par_anywhere:+$(add_flags "$par_anywhere" "--anywhere")} \
${par_anywhere_fasta:+$(add_flags "$par_anywhere_fasta" "--anywhere" "file:")} \
${par_adapter_r2:+$(add_flags "$par_adapter_r2" "-A")} \
${par_adapter_fasta_r2:+$(add_flags "$par_adapter_fasta_r2" "-A" "file:")} \
${par_front_r2:+$(add_flags "$par_front_r2" "-G")} \
${par_front_fasta_r2:+$(add_flags "$par_front_fasta_r2" "-G" "file:")} \
${par_anywhere_r2:+$(add_flags "$par_anywhere_r2" "-B")} \
${par_anywhere_fasta_r2:+$(add_flags "$par_anywhere_fasta_r2" "-B" "file:")} \
)
echo "Arguments to cutadapt:"
echo "$adapter_args"
echo

debug "Arguments to cutadapt:"
debug "$adapter_args"
debug

# Paired-end options
###########################################################
Expand All @@ -120,9 +94,9 @@ paired_args=$(echo \
${par_pair_filter:+--pair-filter "${par_pair_filter}"} \
${par_interleaved:+--interleaved}
)
echo "Arguments to cutadapt:"
echo $paired_args
echo
debug "Arguments to cutadapt:"
debug $paired_args
debug

# Input arguments
###########################################################
Expand All @@ -142,9 +116,9 @@ input_args=$(echo \
${par_action:+--action "${par_action}"} \
${par_revcomp:+--revcomp} \
)
echo "Arguments to cutadapt:"
echo $input_args
echo
debug "Arguments to cutadapt:"
debug $input_args
debug

# Read modifications
###########################################################
Expand All @@ -170,9 +144,9 @@ mod_args=$(echo \
${par_rename:+--rename "${par_rename}"} \
${par_zero_cap:+--zero-cap} \
)
echo "Arguments to cutadapt:"
echo $mod_args
echo
debug "Arguments to cutadapt:"
debug $mod_args
debug

# Filtering of processed reads arguments
###########################################################
Expand All @@ -194,46 +168,58 @@ filter_args=$(echo \
${par_discard_untrimmed:+--discard-untrimmed} \
${par_discard_casava:+--discard-casava} \
)
echo "Arguments to cutadapt:"
echo $filter_args
echo
debug "Arguments to cutadapt:"
debug $filter_args
debug

# Output arguments
# We write the output to a directory rather than
# individual files.
# Optional output arguments
###########################################################
echo ">> Output arguments"
echo ">> Optional arguments"
[[ "$par_json" == "false" ]] && unset par_json
[[ "$par_fasta" == "false" ]] && unset par_fasta
[[ "$par_info_file" == "false" ]] && unset par_info_file

optional_output_args=$(echo \
${par_report:+--report "${par_report}"} \
${par_json:+--json "${par_output}/report.json"} \
${par_fasta:+--fasta} \
${par_info_file:+--info-file "$par_output/info.txt"} \
)

debug "Arguments to cutadapt:"
debug $optional_output_args
debug

# Output arguments
# We write the output to a directory rather than
# individual files.
###########################################################

if [[ -z $par_fasta ]]; then
ext="fastq"
else
ext="fasta"
fi

if [ $mode = "se" ]; then
output_args=$(echo \
${par_report:+--report "${par_report}"} \
${par_json:+--json "${par_output}/report.json"} \
--output "$par_output/{name}_R1_001.fastq" \
${par_fasta:+--fasta} \
${par_info_file:+--info-file} \
--output "$par_output/{name}_001.$ext" \
)
else
output_args=$(echo \
${par_report:+--report "${par_report}"} \
${par_json:+--json "${par_output}/report.json"} \
--output "$par_output/{name}_R1_001.fastq" \
--paired-output "$par_output/{name}_R2_001.fastq" \
${par_fasta:+--fasta} \
${par_info_file:+--info-file} \
--output "$par_output/{name}_R1_001.$ext" \
--paired-output "$par_output/{name}_R2_001.$ext" \
)
fi
echo "Arguments to cutadapt:"
echo $output_args
echo

debug "Arguments to cutadapt:"
debug $output_args
debug

# Full CLI
# Set the --cores argument to 0 unless meta_cpus is set
###########################################################
echo ">> Full CLI to be run:"

echo ">> Running cutadapt"
par_cpus=0
[[ ! -z $meta_cpus ]] && par_cpus=$meta_cpus

Expand All @@ -244,10 +230,13 @@ cli=$(echo \
$input_args \
$mod_args \
$filter_args \
$optional_output_args \
$output_args \
--cores $par_cpus
)

echo cutadapt $cli | sed -e 's/--/\r\n --/g'
debug ">> Full CLI to be run:"
debug cutadapt $cli | sed -e 's/--/\r\n --/g'
debug

cutadapt $cli | tee $par_output/report.txt
Loading

0 comments on commit 35e7f3c

Please sign in to comment.