diff --git a/src/bbmap_bbsplit/config.vsh.yaml b/src/bbmap_bbsplit/config.vsh.yaml index 58c1f1c5..c59e0974 100644 --- a/src/bbmap_bbsplit/config.vsh.yaml +++ b/src/bbmap_bbsplit/config.vsh.yaml @@ -21,20 +21,15 @@ argument_groups: type: file multiple: true description: Input fastq files, either one or two (paired), separated by ";". - multiple_sep: "," example: reads.fastq - - name: "--primary_ref" + - name: "--ref" type: file - description: Primary reference FASTA - - name: "--other_ref_names" - type: file - description: | - Path to comma-separated file containing a list of reference genomes to filter reads - against with BBSplit. + multiple: true + description: Reference FASTA files, separated by ";". The primary reference should be specified first. - name: "--only_build_index" type: boolean_true description: If set, only builds the index. Otherwise, mapping is performed. - - name: "--index" + - name: "--build" type: string description: | Designate index to use. Corresponds to the number specified when building the index. @@ -112,22 +107,6 @@ argument_groups: Output file for read 2. direction: output example: read_out2.fastq - - name: "--primary_fastq" - type: file - description: | - Output reads that map to the primary reference. - direction: output - example: primary.fastq.gz - - name: "--all_fastq" - type: file - description: | - Output reads that map to the primary reference. - direction: output - example: all.fastq.gz - - name: "--ref_fasta_list" - type: file - description: | - Directory with index files. - name: "--sam2bam" alternatives: ["--bs"] type: file diff --git a/src/bbmap_bbsplit/script.sh b/src/bbmap_bbsplit/script.sh index f835731b..eb17c86c 100755 --- a/src/bbmap_bbsplit/script.sh +++ b/src/bbmap_bbsplit/script.sh @@ -18,42 +18,58 @@ for var in "${unset_if_false[@]}"; do fi done -if [ ! -d "$par_index" ]; then - other_refs=() - while IFS="," read -r name path +if [ ! -d "$par_build" ]; then + IFS=";" read -ra ref_files <<< "$par_ref" + primary_ref="${ref_files[0]}" + refs=() + for file in "${ref_files[@]:1}" do - other_refs+=("ref_$name=$path") - done < "$par_ref_fasta_list" + name=$(basename "$file" | sed 's/\.[^.]*$//') + refs+=("ref_$name=$file") + done fi if $par_only_build_index; then - if [ -f "$par_primary_ref" ] && [ ${#other_refs[@]} -gt 0 ]; then + if [ ${#refs[@]} -gt 1 ]; then bbsplit.sh \ - ref_primary="$par_primary_ref" "${other_refs[@]}" \ - path=$par_index \ - threads=${meta_cpus:-1} + --ref_primary="$primary_ref" \ + "${refs[@]}" \ + path=$par_build else - echo "ERROR: Please specify as input a primary fasta file along with names and paths to non-primary fasta files." + echo "ERROR: Please specify at least two reference fasta files." fi else - IFS="," read -ra input <<< "$par_input" + IFS=";" read -ra input <<< "$par_input" tmpdir=$(mktemp -d "$meta_temp_dir/$meta_functionality_name-XXXXXXXX") index_files='' - if [ -d "$par_index" ]; then - index_files="path=$par_index" - elif [ -f "$par_primary_ref" ] && [ ${#other_refs[@]} -gt 0 ]; then - index_files="ref_primary=$par_primary_ref ${other_refs[@]}" + if [ -d "$par_build" ]; then + index_files="path=$par_build" + elif [ ${#refs[@]} -gt 0 ]; then + index_files="--ref_primary=$primary_ref ${refs[*]}" else - echo "ERROR: Please either specify a BBSplit index as input or a primary fasta file along with names and paths to non-primary fasta files." + echo "ERROR: Please either specify a BBSplit index as input or at least two reference fasta files." fi + + extra_args="" + if [ -n "$par_refstats" ]; then extra_args+=" --refstats $par_refstats"; fi + if [ -n "$par_ambiguous" ]; then extra_args+=" --ambiguous $par_ambiguous"; fi + if [ -n "$par_ambiguous2" ]; then extra_args+=" --ambiguous2 $par_ambiguous2"; fi + if [ -n "$par_minratio" ]; then extra_args+=" --minratio $par_minratio"; fi + if [ -n "$par_minhits" ]; then extra_args+=" --minhits $par_minhits"; fi + if [ -n "$par_maxindel" ]; then extra_args+=" --maxindel $par_maxindel"; fi + if [ -n "$par_qin" ]; then extra_args+=" --qin $par_qin"; fi + if [ -n "$par_qtrim" ]; then extra_args+=" --qtrim $par_qtrim"; fi + if [ "$par_interleaved" = true ]; then extra_args+=" --interleaved"; fi + if [ "$par_untrim" = true ]; then extra_args+=" --untrim"; fi + if [ "$par_nzo" = true ]; then extra_args+=" --nzo"; fi + if $par_paired; then bbsplit.sh \ $index_files \ - threads=${meta_cpus:-1} \ in=${input[0]} \ in2=${input[1]} \ basename=${tmpdir}/%_#.fastq \ - refstats=bbsplit_stats.txt + $extra_args read1=$(find $tmpdir/ -iname primary_1*) read2=$(find $tmpdir/ -iname primary_2*) cp $read1 $par_fastq_1 @@ -61,13 +77,12 @@ else else bbsplit.sh \ $index_files \ - threads=${meta_cpus:-1} \ in=${input[0]} \ basename=${tmpdir}/%.fastq \ - refstats=bbsplit_stats.txt + $extra_args read1=$(find $tmpdir/ -iname primary*) cp $read1 $par_fastq_1 fi fi -exit 0 \ No newline at end of file +exit 0 diff --git a/src/bbmap_bbsplit/test.sh b/src/bbmap_bbsplit/test.sh index 96c317e1..1ad7aac2 100644 --- a/src/bbmap_bbsplit/test.sh +++ b/src/bbmap_bbsplit/test.sh @@ -2,11 +2,6 @@ echo ">>> Test $meta_functionality_name" -cat > bbsplit_fasta_list.txt << HERE -sarscov2,sarscov2.fa -human,human.fa -HERE - echo "> Prepare test data" cat > reads_R1.fastq <<'EOF' @@ -58,10 +53,9 @@ EOF echo ">>> Building BBSplit index" "${meta_executable}" \ - --primary_ref "genome.fasta" \ - --ref_fasta_list bbsplit_fasta_list.txt \ + --ref "genome.fasta;human.fa;sarscov2.fa" \ --only_build_index \ - --index "BBSplit_index" + --build "BBSplit_index" echo ">>> Check whether output exists" [ ! -d "BBSplit_index" ] && echo "BBSplit index does not exist!" && exit 1 @@ -73,8 +67,7 @@ echo ">>> Check whether output exists" echo ">>> Testing with single-end reads and primary/non-primary FASTA files" "${meta_executable}" \ --input "reads_R1.fastq" \ - --primary_ref "genome.fasta" \ - --ref_fasta_list bbsplit_fasta_list.txt \ + --ref "genome.fasta;human.fa;sarscov2.fa" \ --fastq_1 "filtered_reads_R1.fastq" echo ">>> Check whether output exists" @@ -91,9 +84,8 @@ rm filtered_reads_R1.fastq echo ">>> Testing with paired-end reads and primary/non-primary FASTA files" "${meta_executable}" \ --paired \ - --input "reads_R1.fastq,reads_R2.fastq" \ - --primary_ref "genome.fasta" \ - --ref_fasta_list "bbsplit_fasta_list.txt" \ + --input "reads_R1.fastq;reads_R2.fastq" \ + --ref "genome.fasta;human.fa;sarscov2.fa" \ --fastq_1 "filtered_reads_R1.fastq" \ --fastq_2 "filtered_reads_R2.fastq" @@ -114,7 +106,7 @@ rm filtered_reads_R1.fastq filtered_reads_R2.fastq echo ">>> Testing with single-end reads and BBSplit index" "${meta_executable}" \ --input "reads_R1.fastq" \ - --index "BBSplit_index" \ + --build "BBSplit_index" \ --fastq_1 "filtered_reads_R1.fastq" echo ">>> Check whether output exists" @@ -131,8 +123,8 @@ rm filtered_reads_R1.fastq echo ">>> Testing with paired-end reads and BBSplit index" "${meta_executable}" \ --paired \ - --input "reads_R1.fastq,reads_R2.fastq" \ - --index "BBSplit_index" \ + --input "reads_R1.fastq;reads_R2.fastq" \ + --build "BBSplit_index" \ --fastq_1 "filtered_reads_R1.fastq" \ --fastq_2 "filtered_reads_R2.fastq"