Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Propose changes to cutadapt #26

Merged
merged 11 commits into from
Feb 27, 2024
7 changes: 5 additions & 2 deletions src/cutadapt/config.vsh.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -437,14 +437,17 @@ functionality:
# - name: --untrimmed_paired_output
# - name: too_short_paired_output
# - name: too_long_paired_output
- name: Debug
arguments:
- type: boolean_true
name: --debug
description: Print debug information
resources:
- type: bash_script
path: script.sh
test_resources:
- type: bash_script
path: test.sh
- type: file
path: test_data
platforms:
- type: docker
image: python:3.12
Expand Down
213 changes: 101 additions & 112 deletions src/cutadapt/script.sh
Original file line number Diff line number Diff line change
@@ -1,16 +1,30 @@
#!/bin/bash

## VIASH START
par_adapter='AGATCGGAAGAGCACACGTCTGAACTCCAGTCAC;GGATCGGAAGAGCACACGTCTGAACTCCAGTCAC'
par_input='src/cutadapt/test_data/se/a.fastq'
par_report='full'
par_json='false'
par_output='output'
par_fasta='false'
par_info_file='false'
par_debug='true'
## VIASH END

# TODO: change this?
if [ -z $par_output ]; then
par_output=.
else
mkdir -p "$par_output"
fi

function debug {
[[ "$par_debug" == "true" ]] && echo "DEBUG: $@"
}

# Init
###########################################################
echo "Running cutadapt"
echo

echo ">> Paired-end data or not?"

mode=""
Expand All @@ -29,85 +43,45 @@ fi
# - string and fasta
###########################################################

multi_adapter=""
for adapter in `echo $par_adapter | tr ':' ' '`; do
multi_adapter="$multi_adapter --adapter $adapter"
done

multi_adapter_fasta=""
for adapter_fasta in `echo $par_adapter_fasta | tr ':' ' '`; do
multi_adapter_fasta="$multi_adapter_fasta --adapter file:$adapter_fasta"
done

multi_adapter_r2=""
for adapter_r2 in `echo $par_adapter_r2 | tr ':' ' '`; do
multi_adapter_r2="$multi_adapter_r2 --adapter_r2 $adapter_r2"
done

multi_adapter_fasta_r2=""
for adapter_fasta_r2 in `echo $par_adapter_fasta_r2 | tr ':' ' '`; do
multi_adapter_fasta_r2="$multi_adapter_fasta_r2 --adapter file:$adapter_fasta_r2"
done

multi_front=""
for front in `echo $par_front | tr ':' ' '`; do
multi_front="$multi_front --front $front"
done

multi_front_fasta=""
for front_fasta in `echo $par_front_fasta | tr ':' ' '`; do
multi_front_fasta="$multi_front_fasta --front file:$front_fasta"
done

multi_front_r2=""
for front_r2 in `echo $par_front_r2 | tr ':' ' '`; do
multi_front_r2="$multi_front_r2 --front_r2 $front_r2"
done

multi_front_fasta_r2=""
for front_fasta_r2 in `echo $par_front_fasta_r2 | tr ':' ' '`; do
multi_front_fasta_r2="$multi_front_fasta_r2 --front file:$front_fasta_r2"
done

multi_anywhere=""
for anywhere in `echo $par_anywhere | tr ':' ' '`; do
multi_anywhere="$multi_anywhere --anywhere $anywhere"
done

multi_anywhere_fasta=""
for anywhere_fasta in `echo $par_anywhere_fasta | tr ':' ' '`; do
multi_anywhere_fasta="$multi_anywhere_fasta --anywhere file:$anywhere_fasta"
done

multi_anywhere_r2=""
for anywhere_r2 in `echo $par_anywhere_r2 | tr ':' ' '`; do
multi_anywhere_r2="$multi_anywhere_r2 --anywhere_r2 $anywhere_r2"
done

multi_anywhere_fasta_r2=""
for anywhere_fasta_r2 in `echo $par_anywhere_fasta_r2 | tr ':' ' '`; do
multi_anywhere_fasta_r2="$multi_anywhere_fasta_r2 --anywhere file:$anywhere_fasta_r2"
done

echo ">> Parsing arguments dealing with adapters"
function add_flags {
local arg=$1
local flag=$2
local prefix=$3
[[ -z $prefix ]] && prefix=""

# This function should not be called if the input is empty
# but check for it just in case
if [[ -z $arg ]]; then
return
fi

local output=""
IFS=';' read -r -a array <<< "$arg"
for a in "${array[@]}"; do
output="$output $flag $prefix$a"
done
echo $output
}

debug ">> Parsing arguments dealing with adapters"
adapter_args=$(echo \
${par_adapter:+${multi_adapter}} \
${par_adapter_fasta:+${multi_adapter_fasta}} \
${par_front:+${multi_front}} \
${par_front_fasta:+${multi_front_fasta}} \
${par_anywhere:+${multi_anywhere}} \
${par_anywhere_fasta:+${multi_anywhere_fasta}} \

${par_adapter_r2:+${multi_adapter_r2}} \
${par_adapter_fasta_r2:+${multi_adapter_fasta_r2}} \
${par_front_r2:+${multi_front_r2}} \
${par_front_fasta_r2:+${multi_front_fasta_r2}} \
${par_anywhere_r2:+${multi_anywhere_r2}} \
${par_anywhere_fasta_r2:+${multi_anywhere_fasta_r2}} \
${par_adapter:+$(add_flags "$par_adapter" "--adapter")} \
${par_adapter_fasta:+$(add_flags "$par_adapter_fasta" "--adapter" "file:")} \
${par_front:+$(add_flags "$par_front" "--front")} \
${par_front_fasta:+$(add_flags "$par_front_fasta" "--front" "file:")} \
${par_anywhere:+$(add_flags "$par_anywhere" "--anywhere")} \
${par_anywhere_fasta:+$(add_flags "$par_anywhere_fasta" "--anywhere" "file:")} \
${par_adapter_r2:+$(add_flags "$par_adapter_r2" "-A")} \
${par_adapter_fasta_r2:+$(add_flags "$par_adapter_fasta_r2" "-A" "file:")} \
${par_front_r2:+$(add_flags "$par_front_r2" "-G")} \
${par_front_fasta_r2:+$(add_flags "$par_front_fasta_r2" "-G" "file:")} \
${par_anywhere_r2:+$(add_flags "$par_anywhere_r2" "-B")} \
${par_anywhere_fasta_r2:+$(add_flags "$par_anywhere_fasta_r2" "-B" "file:")} \
)
echo "Arguments to cutadapt:"
echo "$adapter_args"
echo

debug "Arguments to cutadapt:"
debug "$adapter_args"
debug

# Paired-end options
###########################################################
Expand All @@ -120,9 +94,9 @@ paired_args=$(echo \
${par_pair_filter:+--pair-filter "${par_pair_filter}"} \
${par_interleaved:+--interleaved}
)
echo "Arguments to cutadapt:"
echo $paired_args
echo
debug "Arguments to cutadapt:"
debug $paired_args
debug

# Input arguments
###########################################################
Expand All @@ -142,9 +116,9 @@ input_args=$(echo \
${par_action:+--action "${par_action}"} \
${par_revcomp:+--revcomp} \
)
echo "Arguments to cutadapt:"
echo $input_args
echo
debug "Arguments to cutadapt:"
debug $input_args
debug

# Read modifications
###########################################################
Expand All @@ -170,9 +144,9 @@ mod_args=$(echo \
${par_rename:+--rename "${par_rename}"} \
${par_zero_cap:+--zero-cap} \
)
echo "Arguments to cutadapt:"
echo $mod_args
echo
debug "Arguments to cutadapt:"
debug $mod_args
debug

# Filtering of processed reads arguments
###########################################################
Expand All @@ -194,46 +168,58 @@ filter_args=$(echo \
${par_discard_untrimmed:+--discard-untrimmed} \
${par_discard_casava:+--discard-casava} \
)
echo "Arguments to cutadapt:"
echo $filter_args
echo
debug "Arguments to cutadapt:"
debug $filter_args
debug

# Output arguments
# We write the output to a directory rather than
# individual files.
# Optional output arguments
###########################################################
echo ">> Output arguments"
echo ">> Optional arguments"
[[ "$par_json" == "false" ]] && unset par_json
[[ "$par_fasta" == "false" ]] && unset par_fasta
[[ "$par_info_file" == "false" ]] && unset par_info_file

optional_output_args=$(echo \
${par_report:+--report "${par_report}"} \
${par_json:+--json "${par_output}/report.json"} \
${par_fasta:+--fasta} \
${par_info_file:+--info-file "$par_output/info.txt"} \
)

debug "Arguments to cutadapt:"
debug $optional_output_args
debug

# Output arguments
# We write the output to a directory rather than
# individual files.
###########################################################

if [[ -z $par_fasta ]]; then
ext="fastq"
else
ext="fasta"
fi

if [ $mode = "se" ]; then
output_args=$(echo \
${par_report:+--report "${par_report}"} \
${par_json:+--json "${par_output}/report.json"} \
--output "$par_output/{name}_R1_001.fastq" \
${par_fasta:+--fasta} \
${par_info_file:+--info-file} \
--output "$par_output/{name}_001.$ext" \
)
else
output_args=$(echo \
${par_report:+--report "${par_report}"} \
${par_json:+--json "${par_output}/report.json"} \
--output "$par_output/{name}_R1_001.fastq" \
--paired-output "$par_output/{name}_R2_001.fastq" \
${par_fasta:+--fasta} \
${par_info_file:+--info-file} \
--output "$par_output/{name}_R1_001.$ext" \
--paired-output "$par_output/{name}_R2_001.$ext" \
)
fi
echo "Arguments to cutadapt:"
echo $output_args
echo

debug "Arguments to cutadapt:"
debug $output_args
debug

# Full CLI
# Set the --cores argument to 0 unless meta_cpus is set
###########################################################
echo ">> Full CLI to be run:"

echo ">> Running cutadapt"
par_cpus=0
[[ ! -z $meta_cpus ]] && par_cpus=$meta_cpus

Expand All @@ -244,10 +230,13 @@ cli=$(echo \
$input_args \
$mod_args \
$filter_args \
$optional_output_args \
$output_args \
--cores $par_cpus
)

echo cutadapt $cli | sed -e 's/--/\r\n --/g'
debug ">> Full CLI to be run:"
debug cutadapt $cli | sed -e 's/--/\r\n --/g'
debug

cutadapt $cli | tee $par_output/report.txt
Loading