Skip to content

Commit

Permalink
Arguments closer to original tool's
Browse files Browse the repository at this point in the history
  • Loading branch information
emmarousseau committed Sep 10, 2024
1 parent 3891f24 commit 1a5cca1
Show file tree
Hide file tree
Showing 3 changed files with 48 additions and 62 deletions.
29 changes: 4 additions & 25 deletions src/bbmap_bbsplit/config.vsh.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -21,20 +21,15 @@ argument_groups:
type: file
multiple: true
description: Input fastq files, either one or two (paired), separated by ";".
multiple_sep: ","
example: reads.fastq
- name: "--primary_ref"
- name: "--ref"
type: file
description: Primary reference FASTA
- name: "--other_ref_names"
type: file
description: |
Path to comma-separated file containing a list of reference genomes to filter reads
against with BBSplit.
multiple: true
description: Reference FASTA files, separated by ";". The primary reference should be specified first.
- name: "--only_build_index"
type: boolean_true
description: If set, only builds the index. Otherwise, mapping is performed.
- name: "--index"
- name: "--build"
type: string
description: |
Designate index to use. Corresponds to the number specified when building the index.
Expand Down Expand Up @@ -112,22 +107,6 @@ argument_groups:
Output file for read 2.
direction: output
example: read_out2.fastq
- name: "--primary_fastq"
type: file
description: |
Output reads that map to the primary reference.
direction: output
example: primary.fastq.gz
- name: "--all_fastq"
type: file
description: |
Output reads that map to the primary reference.
direction: output
example: all.fastq.gz
- name: "--ref_fasta_list"
type: file
description: |
Directory with index files.
- name: "--sam2bam"
alternatives: ["--bs"]
type: file
Expand Down
57 changes: 36 additions & 21 deletions src/bbmap_bbsplit/script.sh
Original file line number Diff line number Diff line change
Expand Up @@ -18,56 +18,71 @@ for var in "${unset_if_false[@]}"; do
fi
done

if [ ! -d "$par_index" ]; then
other_refs=()
while IFS="," read -r name path
if [ ! -d "$par_build" ]; then
IFS=";" read -ra ref_files <<< "$par_ref"
primary_ref="${ref_files[0]}"
refs=()
for file in "${ref_files[@]:1}"
do
other_refs+=("ref_$name=$path")
done < "$par_ref_fasta_list"
name=$(basename "$file" | sed 's/\.[^.]*$//')
refs+=("ref_$name=$file")
done
fi

if $par_only_build_index; then
if [ -f "$par_primary_ref" ] && [ ${#other_refs[@]} -gt 0 ]; then
if [ ${#refs[@]} -gt 1 ]; then
bbsplit.sh \
ref_primary="$par_primary_ref" "${other_refs[@]}" \
path=$par_index \
threads=${meta_cpus:-1}
--ref_primary="$primary_ref" \
"${refs[@]}" \
path=$par_build
else
echo "ERROR: Please specify as input a primary fasta file along with names and paths to non-primary fasta files."
echo "ERROR: Please specify at least two reference fasta files."
fi
else
IFS="," read -ra input <<< "$par_input"
IFS=";" read -ra input <<< "$par_input"
tmpdir=$(mktemp -d "$meta_temp_dir/$meta_functionality_name-XXXXXXXX")
index_files=''
if [ -d "$par_index" ]; then
index_files="path=$par_index"
elif [ -f "$par_primary_ref" ] && [ ${#other_refs[@]} -gt 0 ]; then
index_files="ref_primary=$par_primary_ref ${other_refs[@]}"
if [ -d "$par_build" ]; then
index_files="path=$par_build"
elif [ ${#refs[@]} -gt 0 ]; then
index_files="--ref_primary=$primary_ref ${refs[*]}"
else
echo "ERROR: Please either specify a BBSplit index as input or a primary fasta file along with names and paths to non-primary fasta files."
echo "ERROR: Please either specify a BBSplit index as input or at least two reference fasta files."
fi

extra_args=""
if [ -n "$par_refstats" ]; then extra_args+=" --refstats $par_refstats"; fi
if [ -n "$par_ambiguous" ]; then extra_args+=" --ambiguous $par_ambiguous"; fi
if [ -n "$par_ambiguous2" ]; then extra_args+=" --ambiguous2 $par_ambiguous2"; fi
if [ -n "$par_minratio" ]; then extra_args+=" --minratio $par_minratio"; fi
if [ -n "$par_minhits" ]; then extra_args+=" --minhits $par_minhits"; fi
if [ -n "$par_maxindel" ]; then extra_args+=" --maxindel $par_maxindel"; fi
if [ -n "$par_qin" ]; then extra_args+=" --qin $par_qin"; fi
if [ -n "$par_qtrim" ]; then extra_args+=" --qtrim $par_qtrim"; fi
if [ "$par_interleaved" = true ]; then extra_args+=" --interleaved"; fi
if [ "$par_untrim" = true ]; then extra_args+=" --untrim"; fi
if [ "$par_nzo" = true ]; then extra_args+=" --nzo"; fi

if $par_paired; then
bbsplit.sh \
$index_files \
threads=${meta_cpus:-1} \
in=${input[0]} \
in2=${input[1]} \
basename=${tmpdir}/%_#.fastq \
refstats=bbsplit_stats.txt
$extra_args
read1=$(find $tmpdir/ -iname primary_1*)
read2=$(find $tmpdir/ -iname primary_2*)
cp $read1 $par_fastq_1
cp $read2 $par_fastq_2
else
bbsplit.sh \
$index_files \
threads=${meta_cpus:-1} \
in=${input[0]} \
basename=${tmpdir}/%.fastq \
refstats=bbsplit_stats.txt
$extra_args
read1=$(find $tmpdir/ -iname primary*)
cp $read1 $par_fastq_1
fi
fi

exit 0
exit 0
24 changes: 8 additions & 16 deletions src/bbmap_bbsplit/test.sh
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,6 @@

echo ">>> Test $meta_functionality_name"

cat > bbsplit_fasta_list.txt << HERE
sarscov2,sarscov2.fa
human,human.fa
HERE

echo "> Prepare test data"

cat > reads_R1.fastq <<'EOF'
Expand Down Expand Up @@ -58,10 +53,9 @@ EOF

echo ">>> Building BBSplit index"
"${meta_executable}" \
--primary_ref "genome.fasta" \
--ref_fasta_list bbsplit_fasta_list.txt \
--ref "genome.fasta;human.fa;sarscov2.fa" \
--only_build_index \
--index "BBSplit_index"
--build "BBSplit_index"

echo ">>> Check whether output exists"
[ ! -d "BBSplit_index" ] && echo "BBSplit index does not exist!" && exit 1
Expand All @@ -73,8 +67,7 @@ echo ">>> Check whether output exists"
echo ">>> Testing with single-end reads and primary/non-primary FASTA files"
"${meta_executable}" \
--input "reads_R1.fastq" \
--primary_ref "genome.fasta" \
--ref_fasta_list bbsplit_fasta_list.txt \
--ref "genome.fasta;human.fa;sarscov2.fa" \
--fastq_1 "filtered_reads_R1.fastq"

echo ">>> Check whether output exists"
Expand All @@ -91,9 +84,8 @@ rm filtered_reads_R1.fastq
echo ">>> Testing with paired-end reads and primary/non-primary FASTA files"
"${meta_executable}" \
--paired \
--input "reads_R1.fastq,reads_R2.fastq" \
--primary_ref "genome.fasta" \
--ref_fasta_list "bbsplit_fasta_list.txt" \
--input "reads_R1.fastq;reads_R2.fastq" \
--ref "genome.fasta;human.fa;sarscov2.fa" \
--fastq_1 "filtered_reads_R1.fastq" \
--fastq_2 "filtered_reads_R2.fastq"

Expand All @@ -114,7 +106,7 @@ rm filtered_reads_R1.fastq filtered_reads_R2.fastq
echo ">>> Testing with single-end reads and BBSplit index"
"${meta_executable}" \
--input "reads_R1.fastq" \
--index "BBSplit_index" \
--build "BBSplit_index" \
--fastq_1 "filtered_reads_R1.fastq"

echo ">>> Check whether output exists"
Expand All @@ -131,8 +123,8 @@ rm filtered_reads_R1.fastq
echo ">>> Testing with paired-end reads and BBSplit index"
"${meta_executable}" \
--paired \
--input "reads_R1.fastq,reads_R2.fastq" \
--index "BBSplit_index" \
--input "reads_R1.fastq;reads_R2.fastq" \
--build "BBSplit_index" \
--fastq_1 "filtered_reads_R1.fastq" \
--fastq_2 "filtered_reads_R2.fastq"

Expand Down

0 comments on commit 1a5cca1

Please sign in to comment.