Skip to content

Commit

Permalink
First version of script
Browse files Browse the repository at this point in the history
  • Loading branch information
tverbeiren committed Feb 7, 2024
1 parent 0de2a36 commit e0685a5
Show file tree
Hide file tree
Showing 2 changed files with 209 additions and 50 deletions.
16 changes: 8 additions & 8 deletions src/cutadapt/config.vsh.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -52,15 +52,15 @@ functionality:
- name: Specify Adapters using Fasta files for R1
arguments:
- name: --adapter_fasta
type: string
type: file
description: |
Fasta file containing sequences of an adapter ligated to the 3' end (paired data:
of the first read). The adapter and subsequent bases are
trimmed. If a '$' character is appended ('anchoring'), the
adapter is only found if it is a suffix of the read.
required: false
- name: --front_fasta
type: string
type: file
description: |
Fasta file containing sequences of an adapter ligated to the 5' end (paired data:
of the first read). The adapter and any preceding bases
Expand All @@ -69,7 +69,7 @@ functionality:
only found if it is a prefix of the read.
required: false
- name: --anywhere_fasta
type: string
type: file
description: |
Fasta file containing sequences of an adapter that may be ligated to the 5' or 3'
end (paired data: of the first read). Both types of
Expand Down Expand Up @@ -122,15 +122,15 @@ functionality:
- name: Specify Adapters using Fasta files for R2
arguments:
- name: --adapterR2_fasta
type: string
type: file
description: |
Fasta file containing sequences of an adapter ligated to the 3' end (paired data:
of the first read). The adapter and subsequent bases are
trimmed. If a '$' character is appended ('anchoring'), the
adapter is only found if it is a suffix of the read.
required: false
- name: --frontR2_fasta
type: string
type: file
description: |
Fasta file containing sequences of an adapter ligated to the 5' end (paired data:
of the first read). The adapter and any preceding bases
Expand All @@ -139,7 +139,7 @@ functionality:
only found if it is a prefix of the read.
required: false
- name: --anywhereR2_fasta
type: string
type: file
description: |
Fasta file containing sequences of an adapter that may be ligated to the 5' or 3'
end (paired data: of the first read). Both types of
Expand Down Expand Up @@ -200,11 +200,11 @@ functionality:
adapter to be found.
default: 3
- name: --match_read_wildcards
type: boolean_false
type: boolean_true
description: |
Interpret IUPAC wildcards in reads.
- name: --no_match_adapter_wildcards
type: boolean_true
type: boolean_false
description: |
Do not interpret IUPAC wildcards in adapters.
- name: --action
Expand Down
243 changes: 201 additions & 42 deletions src/cutadapt/script.sh
Original file line number Diff line number Diff line change
Expand Up @@ -6,18 +6,6 @@ else
mkdir -p "$par_output"
fi

echo "par_adapter: $par_adapter"
echo "par_front: $par_front"
echo "par_anywhere: $par_anywhere"
echo "par_adapter_fasta: $par_adapter_fasta"
echo "par_front_fasta: $par_front_fasta"
echo "par_anywhere_fasta: $par_anywhere_fasta"
echo "par_adapterR2: $par_adapterR2"
echo "par_frontR2: $par_frontR2"
echo "par_anywhereR2: $par_anywhereR2"
echo "par_adapterR2_fasta: $par_adapterR2_fasta"
echo "par_frontR2_fasta: $par_frontR2_fasta"
echo "par_anywhereR2_fasta: $par_anywhereR2_fasta"
echo "par_pair_adapters: $par_pair_adapters"
echo "par_pair_filter: $par_pair_filter"
echo "par_interleaved: $par_interleaved"
Expand Down Expand Up @@ -58,35 +46,206 @@ echo "par_output: $par_output"
echo "par_fasta: $par_fasta"
echo "par_info_file: $par_info_file"

# for f in $par_input; do
# [ ! -f "$f" ] && echo "The input file $f does not exist" && exit 1
# done
#
# barcodesFasta="barcodes.fasta"
#
# awk '{print ">"$1"\n""^"$1}' $par_barcodesFile >$barcodesFasta
#
# fastqFiles=$(echo $par_input | tr " " "\n")
# for file in $fastqFiles; do
# if echo "$file" | grep -q R1; then
# input_R1=$(echo $file | grep R1)
# fi
# if echo "$file" | grep -q R2; then
# input_R2=$(echo $file | grep R2)
# fi
# done
# demuxFilesIn="$input_R1 $input_R2"
#
# # Note to self:
# # The eval is here to expand shell globs, this way it is possible to use
# # for instance pointers to ".../...R?....fastq", but please use the double
# # quotes and an absolute path!
# eval /usr/local/bin/cutadapt \
# -e "$par_e" \
# --no-indels \
# --action=none \
# --cores=0 \
# -g "file:$barcodesFasta" \
# Do we get explicit adapter sequences or a FASTA file?
# Let the underlying tool deal with inconsistant states.
adapter_mode_R1=""
if [ ! -z "${par_adapter_fasta+set}" ]; then
adapter_mode_R1="fasta"
else
adapter_mode_R1="plain"
fi

front_mode_R1=""
if [ ! -z "${par_front_fasta+set}" ]; then
front_mode_R1="fasta"
else
front_mode_R1="plain"
fi

anywhere_mode_R1=""
if [ ! -z "${par_anywhere_fasta+set}" ]; then
anywhere_mode_R1="fasta"
else
anywhere_mode_R1="plain"
fi

adapter_mode_R2=""
if [ ! -z "${par_adapter_fastaR2+set}" ]; then
adapter_mode_R2="fasta"
else
adapter_mode_R2="plain"
fi

front_mode_R2=""
if [ ! -z "${par_front_fastaR2+set}" ]; then
front_mode_R2="fasta"
else
front_mode_R2="plain"
fi

anywhere_mode_R2=""
if [ ! -z "${par_anywhere_fastaR2+set}" ]; then
anywhere_mode_R2="fasta"
else
anywhere_mode_R2="plain"
fi

echo "Running cutadapt"
echo
echo "Adapter settings"
echo "----------------"
echo "Adapter Mode R1 : $adapter_mode_R1"
echo "Front Mode R1 : $front_mode_R1"
echo "Anywhere Mode R1 : $anywhere_mode_R1"
echo "Adapter Mode R2 : $adapter_mode_R2"
echo "Front Mode R2 : $front_mode_R2"
echo "Anywhere Mode R2 : $anywhere_mode_R2"
echo

# Adapter arguments
# - paired and single-end
# - string and fasta
###########################################################
echo ">> Parsing arguments dealing with adapters"
adapter_args=$(echo \
${par_adapter:+--adapter "${par_adapter}"} \
${par_adapter_fasta:+--adapter "file:${par_adapter_fasta}"} \
${par_front:+--front "${par_front}"} \
${par_front_fasta:+--front "file:${par_front_fasta}"} \
${par_anywhere:+--anywhere "${par_anywhere}"} \
${par_anywhere_fasta:+--anywhere "file:${par_anywhere_fasta}"} \
${par_adapterR2:+--adapterR2 "${par_adapterR2}"} \
${par_adapterR2_fasta:+--adapterR2 "file:${par_adapterR2_fasta}"} \
${par_frontR2:+--frontR2 "${par_frontR2}"} \
${par_frontR2_fasta:+--frontR2 "file:${par_frontR2_fasta}"} \
${par_anywhereR2:+--anywhereR2 "${par_anywhereR2}"} \
${par_anywhereR2_fasta:+--anywhereR2 "file:${par_anywhereR2_fasta}"}
)
echo "Arguments to cutadapt:"
echo "$adapter_args"
echo

# Paired-end options
###########################################################
echo ">> Parsing arguments for paired-end reads"
[[ "$par_pair_adapters" == "false" ]] && unset par_pair_adapters
[[ "$par_interleaved" == "false" ]] && unset par_interleaved

paired_args=$(echo \
${par_pair_adapters:+--pair-adapters} \
${par_pair_filter:+--pair-filter "${par_pair_filter}"} \
${par_interleaved:+--interleaved} \
)
echo "Arguments to cutadapt:"
echo $paired_args
echo

# Input arguments
###########################################################
echo ">> Parsing input arguments"
[[ "$par_no_indels" == "true" ]] && unset par_no_indels
[[ "$par_match_read_wildcards" == "false" ]] && unset par_match_read_wildcards
[[ "$par_no_match_adapter_wildcards" == "true" ]] && unset par_no_match_adapter_wildcards
[[ "$par_revcomp" == "false" ]] && unset par_revcomp

input_args=$(echo \
${par_error_rate:+-error-rate "${par_error_rate}"} \
${par_no_indels:+--no-indels} \
${par_times:+--times "${par_times}"} \
${par_overlap:+--overlap "${par_overlap}"} \
${par_match_read_wildcards:+--match-read-wildcards} \
${par_no_match_adapter_wildcards:+--no-match-adapter-wildcards} \
${par_action:+--action "${par_action}"} \
${par_revcomp:+--revcomp} \
)
echo "Arguments to cutadapt:"
echo $input_args
echo

# Read modifications
###########################################################
echo ">> Parsing read modification arguments"
[[ "$par_poly_a" == "false" ]] && unset par_poly_a
[[ "$par_trim_n" == "false" ]] && unset par_trim_n
[[ "$par_zero_cap" == "false" ]] && unset par_zero_cap

mod_args=$(echo \
${par_cut:+--cut "${par_cut}"} \
${par_cutR2:+--cutR2 "${par_cutR2}"} \
${par_nextseq_trim:+--nextseq-trim "${par_nextseq_trim}"} \
${par_quality_cutoff:+--quality-cutoff "${par_quality_cutoff}"} \
${par_quality_cutoffR2:+--quality-cutoffR2 "${par_quality_cutoffR2}"} \
${par_quality_base:+--quality-base "${par_quality_base}"} \
${par_poly_a:+--poly-a} \
${par_length:+--length "${par_length}"} \
${par_trim_n:+--trim-n} \
${par_length_tag:+--length-tag "${par_length_tag}"} \
${par_strip_suffix:+--strip-suffix "${par_strip_suffix}"} \
${par_prefix:+--prefix "${par_prefix}"} \
${par_suffix:+--suffix "${par_suffix}"} \
${par_rename:+--rename "${par_rename}"} \
${par_zero_cap:+--zero-cap} \
)
echo "Arguments to cutadapt:"
echo $mod_args
echo

# Filtering of processed reads arguments
###########################################################
echo ">> Filtering of processed reads arguments"
[[ "$par_discard_trimmed" == "false" ]] && unset par_discard_trimmed
[[ "$par_discard_untrimmed" == "false" ]] && unset par_discard_untrimmed
[[ "$par_discard_casava" == "false" ]] && unset par_discard_casava

filter_args=$(echo \
${par_minimum_length:+--minimum-length "${par_minimum_length}"} \
${par_maximum_length:+--maximum-length "${par_maximum_length}"} \
${par_max_n:+--max-n "${par_max_n}"} \
${par_max_expected_errors:+--max-expected-errors "${par_max_expected_errors}"} \
${par_max_average_error_rate:+--max-average-error-rate "${par_max_average_error_rate}"} \
${par_discard_trimmed:+--discard-trimmed} \
${par_discard_untrimmed:+--discard-untrimmed} \
${par_discard_casava:+--discard-casava} \
)
echo "Arguments to cutadapt:"
echo $filter_args
echo

# Output arguments
# We write the output to a directory rather than
# individual files.
###########################################################
echo ">> Output arguments"
[[ "$par_json" == "false" ]] && unset par_json
[[ "$par_fasta" == "false" ]] && unset par_fasta
[[ "$par_info_file" == "false" ]] && unset par_info_file

# -o "$par_outputDir/{name}_R1_001.fastq" \
# -p "$par_outputDir/{name}_R2_001.fastq" \
# "$demuxFilesIn" >"$par_report"

output_args=$(echo \
${par_report:+--report "${par_report}"} \
${par_json:+--json} \
-o "$par_output/{name}_R1_001.fastq" \
-p "$par_output/{name}_R1_001.fastq" \
${par_fasta:+--fasta} \
${par_info_file:+--info-file} \
)
echo "Arguments to cutadapt:"
echo $output_args
echo

echo ">> Full CLI to be run:"
cli=$(echo "cutadapt" \
$adapter_args \
$paired_args \
$input_args \
$mod_args \
$filter_args \
$output_args
)

echo $cli

# $( "$cli" ) > $par_output/report.txt

0 comments on commit e0685a5

Please sign in to comment.