Skip to content

Commit

Permalink
udpate, fix pear (#6217)
Browse files Browse the repository at this point in the history
* udpate, fix pear

* fix shed.yml

* Update tools/pear/pear.xml

* Apply suggestions from code review

Co-authored-by: Marius van den Beek <[email protected]>

---------

Co-authored-by: Marius van den Beek <[email protected]>
  • Loading branch information
bgruening and mvdbeek authored Aug 12, 2024
1 parent 8a0f309 commit c142110
Show file tree
Hide file tree
Showing 2 changed files with 45 additions and 75 deletions.
4 changes: 3 additions & 1 deletion tools/pear/.shed.yml
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
# repository published to https://toolshed.g2.bx.psu.edu/repos/iuc/pear
owner: iuc
name: pear
description: PEAR evaluates all possible paired-end read overlaps
Expand All @@ -7,3 +6,6 @@ long_description: |
minimizing false-positive results. Together with a highly optimized implementation, it can merge millions of paired end reads within a couple of minutes on a
standard desktop computer. Repository-Maintainer: Björn Grüning
remote_repository_url: https://github.com/galaxyproject/tools-iuc/tree/master/tools/pear
homepage_url: https://cme.h-its.org/exelixis/web/software/pear/
categories:
- Fastq Manipulation
116 changes: 42 additions & 74 deletions tools/pear/pear.xml
Original file line number Diff line number Diff line change
@@ -1,19 +1,19 @@
<tool id="iuc_pear" name="Pear" version="@TOOL_VERSION@.@VERSION_SUFFIX@" profile="20.09">
<tool id="iuc_pear" name="Pear" version="@TOOL_VERSION@.@VERSION_SUFFIX@" profile="23.0">
<description>Paired-End read merger</description>
<macros>
<token name="@TOOL_VERSION@">0.9.6</token>
<token name="@VERSION_SUFFIX@">2</token>
<token name="@VERSION_SUFFIX@">3</token>
<xml name="format_action">
<actions>
<conditional name="library.type">
<when value="paired">
<action type="format">
<option type="from_param" name="library.forward" param_attribute="ext" />
<option type="from_param" name="library.forward" param_attribute="ext"/>
</action>
</when>
<when value="paired_collection">
<action type="format">
<option type="from_param" name="library.input_collection" param_attribute="forward.ext" />
<option type="from_param" name="library.input_collection" param_attribute="forward.ext"/>
</action>
</when>
</conditional>
Expand All @@ -26,27 +26,21 @@
<requirements>
<requirement type="package" version="@TOOL_VERSION@">pear</requirement>
</requirements>
<stdio>
<exit_code range="1:" />
<exit_code range=":-1" />
<regex match="Error:" />
<regex match="Exception:" />
</stdio>
<command>
<command detect_errors="aggressive">
<![CDATA[
pear
#if str( $library.type ) == "paired":
-f "$library.forward"
-r "$library.reverse"
-f '$library.forward'
-r '$library.reverse'
#if $library.forward.is_of_type( 'fastqillumina' ):
--phred-base 64
#else:
--phred-base 33
#end if
#else
## prepare collection
-f "$library.input_collection.forward"
-r "$library.input_collection.reverse"
-f '$library.input_collection.forward'
-r '$library.input_collection.reverse'
#if $library.input_collection.forward.is_of_type( 'fastqillumina' ):
--phred-base 64
#else:
Expand All @@ -57,7 +51,7 @@
--output pear
--p-value $pvalue
--min-overlap $min_overlap
#if int($max_assembly_length) > 0:
#if $max_assembly_length and int($max_assembly_length) > 0:
--max-asm-length $max_assembly_length
#end if
--min-asm-length $min_assembly_length
Expand All @@ -66,7 +60,7 @@
--max-uncalled-base $max_uncalled_base
--test-method $test_method
--empirical-freqs $empirical_freqs
-j "\${GALAXY_SLOTS:-8}"
--threads "\${GALAXY_SLOTS:-8}"
--score-method $score_method
--cap $cap
$nbase
Expand All @@ -75,64 +69,38 @@
<inputs>
<conditional name="library">
<param name="type" type="select" label="Dataset type">
<option value="paired">Paired-end</option>
<option value="paired_collection">Paired-end Dataset Collection</option>
<option value="paired">Paired-end</option>
<option value="paired_collection">Paired-end Dataset Collection</option>
</param>
<when value="paired">
<param name="forward" type="data" format="fastqillumina,fastqsanger"
label="Name of file that contains the forward paired-end reads" help="-f" />
<param name="reverse" type="data" format="fastqillumina,fastqsanger"
label="Name of file that contains the reverse paired-end reads" help="-r" />
<param name="forward" type="data" format="fastqillumina,fastqsanger" label="Name of file that contains the forward paired-end reads" help="-f"/>
<param name="reverse" type="data" format="fastqillumina,fastqsanger" label="Name of file that contains the reverse paired-end reads" help="-r"/>
</when>
<when value="paired_collection">
<param name="input_collection" format="fastqillumina,fastqsanger"
type="data_collection" collection_type="paired"
label="FASTQ Paired Dataset" help="Nucleotide-space: Must have PHRED-scaled quality values. (-f and -r)" />
<param name="input_collection" type="data_collection" format="fastqillumina,fastqsanger" label="FASTQ Paired Dataset" help="Nucleotide-space: Must have PHRED-scaled quality values. (-f and -r)" collection_type="paired"/>
</when>
</conditional>

<!-- optional -->
<param name="pvalue" type="float" value="0.01" min="0" optional="true" max="1" label="Specify a p-value for the statistical test"
help="If the computed p-value of a possible assembly exceeds the specified p-value then the paired-end read will not be assembled. Setting 1.0 disables the test. (--p-value)" />

<param name="min_overlap" type="integer" value="10" optional="true" label="Minimum overlap size"
help="The minimum overlap may be set to 1 when the statistical test is used. However, further restricting the minimum overlap size to a proper value may reduce false-positive assembles. (--min-overlap)" />

<param name="max_assembly_length" type="integer" value="0" optional="true" label="Maximum possible length of the assembled sequences"
help="Setting this value to 0 disables the restriction and assembled sequences may be arbitrary long. (--max-assembly-length)" />

<param name="min_assembly_length" type="integer" value="50" optional="true" label="Minimum possible length of the assembled sequences"
help="Setting this value to 0 disables the restriction and assembled sequences may be arbitrary short. (--min-assembly-length)" />

<param name="min_trim_length" type="integer" value="1" optional="true" label="Minimum length of reads after trimming the low quality part"
help="See option -q. (--min-trim-length)" />

<param name="quality_threshold" type="integer" value="0" optional="true" label="Quality score threshold for trimming the low quality part of a read"
help="If the quality scores of two consecutive bases are strictly less than the specified threshold, the rest of the read will be trimmed. (--quality-threshold)" />

<param name="max_uncalled_base" type="float" value="1.0" min="0" optional="true" max="1" label="Maximal proportion of uncalled bases in a read"
help="Setting this value to 0 will cause PEAR to discard all reads containing uncalled bases. The other extreme setting is 1 which causes PEAR to process all reads independent on the number of uncalled bases. (--max-uncalled-base)" />

<param name="cap" type="integer" value="40" optional="true" label="Specify the upper bound for the resulting quality score"
help="If set to zero, capping is disabled. (--cap)" />

<param name="pvalue" argument="--p-value" type="float" min="0" max="1" value="0.01" label="Specify a p-value for the statistical test" help="If the computed p-value of a possible assembly exceeds the specified p-value then the paired-end read will not be assembled. Setting 1.0 disables the test. (--p-value)"/>
<param argument="--min-overlap" type="integer" value="10" min="0" label="Minimum overlap size" help="The minimum overlap may be set to 1 when the statistical test is used. However, further restricting the minimum overlap size to a proper value may reduce false-positive assembles. (--min-overlap)"/>
<param argument="--max-assembly-length" min="0" type="integer" value="0" label="Maximum possible length of the assembled sequences" help="Setting this value to 0 disables the restriction and assembled sequences may be arbitrary long. (--max-assembly-length)"/>
<param argument="--min-assembly-length" min="0" type="integer" value="50" label="Minimum possible length of the assembled sequences" help="Setting this value to 0 disables the restriction and assembled sequences may be arbitrary short."/>
<param argument="--min-trim-length" min="0" type="integer" value="1" label="Minimum length of reads after trimming the low quality part" help="See option --quality-threshold."/>
<param argument="--quality-threshold" type="integer" value="0" label="Quality score threshold for trimming the low quality part of a read" help="If the quality scores of two consecutive bases are strictly less than the specified threshold, the rest of the read will be trimmed."/>
<param argument="--max-uncalled-base" type="float" min="0" max="1" value="1.0" label="Maximal proportion of uncalled bases in a read" help="Setting this value to 0 will cause PEAR to discard all reads containing uncalled bases. The other extreme setting is 1 which causes PEAR to process all reads independent on the number of uncalled bases."/>
<param argument="--cap" type="integer" value="40" min="0" label="Specify the upper bound for the resulting quality score" help="If set to zero, capping is disabled."/>
<param name="test_method" type="select" label="Type of statistical test" help="(--test-method)">
<option value="1" selected="true">Given the minimum allowed overlap, test using the highest OES (1)</option>
<option value="2">Use the acceptance probability (2)</option>
</param>

<param name="empirical_freqs" type="boolean" truevalue="-e" falsevalue="" checked="false"
label="Disable empirical base frequencies" help="(--empirical-freqs)" />
<param name="nbase" type="boolean" truevalue="--nbase" falsevalue="" checked="false"
label="Use N base if uncertain" help="When merging a base-pair that consists of two non-equal bases out of which none is degenerate, set the merged base to N and use the highest quality score of the two bases. (--nbase)" />

<param name="empirical_freqs" type="boolean" truevalue="-e" falsevalue="" checked="false" label="Disable empirical base frequencies" help="(--empirical-freqs)"/>
<param name="nbase" type="boolean" truevalue="--nbase" falsevalue="" checked="false" label="Use N base if uncertain" help="When merging a base-pair that consists of two non-equal bases out of which none is degenerate, set the merged base to N and use the highest quality score of the two bases. (--nbase)"/>
<param name="score_method" type="select" label="Scoring method" help="(--score-method)">
<option value="1">OES with +1 for match and -1 for mismatch</option>
<option value="2" selected="true">Assembly score (AS) use +1 for match and -1 for mismatch multiplied by base quality scores</option>
<option value="3">Ignore quality scores and use +1 for a match and -1 for a mismatch</option>
</param>

<param name="outputs" type="select" optional="false" multiple="true" label="Output files">
<param name="outputs" type="select" optional="false" label="Output files" multiple="true">
<option value="assembled" selected="true">Assembled reads</option>
<option value="unassembled_forward">Forward unassembled reads</option>
<option value="unassembled_reverse">Reverse unassembled reads</option>
Expand Down Expand Up @@ -161,13 +129,13 @@
<test expect_num_outputs="2">
<conditional name="library">
<param name="type" value="paired"/>
<param name="forward" value="forward.fastq" ftype="fastqsanger" />
<param name="reverse" value="reverse.fastq" ftype="fastqsanger" />
<param name="forward" value="forward.fastq" ftype="fastqsanger"/>
<param name="reverse" value="reverse.fastq" ftype="fastqsanger"/>
</conditional>
<param name="min_overlap" value="10" />
<param name="min_assembly_length" value="50" />
<param name="cap" value="0" />
<param name="outputs" value="assembled,unassembled_forward" />
<param name="min_overlap" value="10"/>
<param name="min_assembly_length" value="50"/>
<param name="cap" value="0"/>
<param name="outputs" value="assembled,unassembled_forward"/>
<output name="assembled_reads" file="pear_assembled_results1.fastq" ftype="fastqsanger"/>
<output name="unassembled_forward_reads" file="pear_unassembled_forward_results1.fastq" ftype="fastqsanger"/>
</test>
Expand All @@ -178,15 +146,15 @@
<param name="type" value="paired_collection"/>
<param name="input_collection">
<collection type="paired">
<element name="forward" value="forward.fastq" ftype="fastqillumina" />
<element name="reverse" value="reverse.fastq" ftype="fastqillumina" />
<element name="forward" value="forward.fastq" ftype="fastqillumina"/>
<element name="reverse" value="reverse.fastq" ftype="fastqillumina"/>
</collection>
</param>
</conditional>
<param name="min_overlap" value="10" />
<param name="min_assembly_length" value="50" />
<param name="cap" value="0" />
<param name="outputs" value="assembled,unassembled_forward" />
<param name="min_overlap" value="10"/>
<param name="min_assembly_length" value="50"/>
<param name="cap" value="0"/>
<param name="outputs" value="assembled,unassembled_forward"/>
<output name="assembled_reads" ftype="fastqillumina">
<assert_contents>
<has_line_matching expression="@.*"/>
Expand Down Expand Up @@ -222,7 +190,7 @@ and that commercial partners should obtain a license.
]]>
</help>
<citations>
<citation type="doi">10.1093/bioinformatics/btt593</citation>
</citations>
<citations>
<citation type="doi">10.1093/bioinformatics/btt593</citation>
</citations>
</tool>

0 comments on commit c142110

Please sign in to comment.