Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fastp - Format_source to ensure WF usability and remove non-sanger input formats #6499

Merged
merged 3 commits into from
Nov 15, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
71 changes: 16 additions & 55 deletions tools/fastp/fastp.xml
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
<tool id="fastp" name="fastp" version="@TOOL_VERSION@+galaxy0" profile="23.1">
<tool id="fastp" name="fastp" version="@TOOL_VERSION@+galaxy3" profile="23.1">
<description>fast all-in-one preprocessing for FASTQ files</description>
<macros>
<import>macros.xml</import>
Expand All @@ -18,8 +18,6 @@
#if $single_paired.single_paired_selector == 'paired_collection'
#if $single_paired.paired_input.forward.is_of_type('fastqsanger.gz')
#set ext = '.fastqsanger.gz'
#elif $single_paired.paired_input.forward.is_of_type('fastqillumina.gz')
#set ext = '.fastqillumina.gz'
#end if
#set $in1 = $single_paired.paired_input.forward
#set $in2 = $single_paired.paired_input.reverse
Expand All @@ -32,8 +30,6 @@
#else
#if $in1.is_of_type('fastqsanger.gz')
#set ext = '.fastqsanger.gz'
#elif $in1.is_of_type('fastqillumina.gz')
#set ext = '.fastqillumina.gz'
#end if

#set $in1_name = re.sub('[^\w\-\s]', '_', str($in1.element_identifier)) + $ext
Expand All @@ -45,19 +41,13 @@
#end if
#end if

cp '$c1' galaxy.json &&

## Run fastp

fastp

--thread \${GALAXY_SLOTS:-1}
--report_title 'fastp report for $in1_name'

#if $in1.is_of_type('fastqillumina', 'fastqsolexa', 'fastqillumina.gz', 'fastqsolexa.gz'):
--phred64
#end if

-i '$in1_name'

## Merge reads
Expand Down Expand Up @@ -231,40 +221,6 @@ $read_mod_options.base_correction_options.correction
mv second${ext} '${out2}'
#end if
]]></command>
<configfiles>
<configfile name="c1">
#set $ext1 = "fastqsanger"
#set $ext2 = "fastqsanger"
#if str($single_paired.single_paired_selector) == "single"
#if $in1.ext.endswith("gz")
#set $ext1 = "fastqsanger.gz"
#end if
#elif str($single_paired.single_paired_selector) == "paired"
#if $in1.ext.endswith("gz")
#set $ext1 = "fastqsanger.gz"
#end if
#if $in2.ext.endswith("gz")
#set $ext2 = "fastqsanger.gz"
#end if
#else
#if $paired_input.forward.ext.endswith("gz")
#set $ext1 = "fastqsanger.gz"
#end if
#if $paired_input.reverse.ext.endswith("gz")
#set $ext2 = "fastqsanger.gz"
#end if
#end if
{
"out1": {"ext": "$ext1"},
"out2": {"ext": "$ext2"},
"merged_reads": {"ext": "$ext1"},
"unmerged_out1": {"ext": "$ext1"},
"unmerged_out2": {"ext": "$ext2"},
"unpaired1": {"ext": "$ext1"},
"unpaired2": {"ext": "$ext2"}
}
</configfile>
</configfiles>
<inputs>
<conditional name="single_paired">
<param name="single_paired_selector" type="select" label="Single-end or paired reads">
Expand All @@ -288,7 +244,7 @@ $read_mod_options.base_correction_options.correction
<expand macro="global_trimming_options_paired" />
</when>
<when value="paired_collection">
<param name="paired_input" type="data_collection" format="fastqsanger,fastqsanger.gz,fastqillumina,fastqillumina.gz" label="Select paired collection(s)" collection_type="paired"/>
<param name="paired_input" type="data_collection" format="fastqsanger,fastqsanger.gz" label="Select paired collection(s)" collection_type="paired"/>
<expand macro="merge_reads" />
<expand macro="adapter_trimming_options">
<expand macro="adapter_sequence" read_number="2"/>
Expand Down Expand Up @@ -379,10 +335,10 @@ $read_mod_options.base_correction_options.correction
</inputs>

<outputs>
<data name="out1" format="auto" label="${tool.name} on ${on_string}: Read 1 output">
<data name="out1" format_source="in1" label="${tool.name} on ${on_string}: Read 1 output">
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Does fastp compress the output if the input is compressed ?
I would assume all output is uncompressed ?

Suggested change
<data name="out1" format_source="in1" label="${tool.name} on ${on_string}: Read 1 output">
<data name="out1" format="fastqsanger" label="${tool.name} on ${on_string}: Read 1 output">

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The tool itself produces a compressed output if the output name has the ext .gz regardless whether the input is compressed or not.
Actually i had it this way at the beginning, but changed to format_source to be able to connect the output to fastqsanger.gz inputs..

<filter>single_paired['single_paired_selector'] in ["single", "paired"] and not single_paired['merge_reads']['merge']</filter>
</data>
<data name="out2" format="auto" label="${tool.name} on ${on_string}: Read 2 output">
<data name="out2" format_source="in2" label="${tool.name} on ${on_string}: Read 2 output">
<filter>single_paired['single_paired_selector'] == "paired" and not single_paired['merge_reads']['merge']</filter>
</data>
<collection name="output_paired_coll" type="paired" format_source="paired_input['forward']" label="${tool.name} on ${on_string}: Paired-end output">
Expand All @@ -394,20 +350,25 @@ $read_mod_options.base_correction_options.correction
<data name="report_json" format="json" from_work_dir="fastp.json" label="${tool.name} on ${on_string}: JSON report">
<filter>output_options['report_json'] is True</filter>
</data>
<data name="merged_reads" format="auto" label="${tool.name} on ${on_string}: Merged reads">
<data name="merged_reads" format_source="in1" label="${tool.name} on ${on_string}: Merged reads">
<filter>single_paired['single_paired_selector'] in ["paired", "paired_collection"] and single_paired['merge_reads']['merge']</filter>
<expand macro="format_actions" read_number="1" forward_reverse="forward" />
</data>
<data name="unmerged_out1" format="auto" label="${tool.name} on ${on_string}: Unmerged filtered reads1">
<data name="unmerged_out1" format_source="in1" label="${tool.name} on ${on_string}: Unmerged filtered reads1">
<filter>single_paired['single_paired_selector'] in ["paired", "paired_collection"] and single_paired['merge_reads']['merge'] and not single_paired['merge_reads']['include_unmerged']</filter>
<expand macro="format_actions" read_number="1" forward_reverse="forward" />
</data>
<data name="unmerged_out2" format="auto" label="${tool.name} on ${on_string}: Unmerged filtered reads2">
<data name="unmerged_out2" format_source="in2" label="${tool.name} on ${on_string}: Unmerged filtered reads2">
<filter>single_paired['single_paired_selector'] in ["paired", "paired_collection"] and single_paired['merge_reads']['merge'] and not single_paired['merge_reads']['include_unmerged']</filter>
<expand macro="format_actions" read_number="2" forward_reverse="reverse" />
</data>
<data name="unpaired1" format="auto" label="${tool.name} on ${on_string}: Unmerged unfiltered reads1">
<data name="unpaired1" format_source="in1" label="${tool.name} on ${on_string}: Unmerged unfiltered reads1">
<filter>single_paired['single_paired_selector'] in ["paired", "paired_collection"] and single_paired['merge_reads']['merge'] and not single_paired['merge_reads']['include_unmerged']</filter>
<expand macro="format_actions" read_number="1" forward_reverse="forward" />
</data>
<data name="unpaired2" format="auto" label="${tool.name} on ${on_string}: Unmerged unfiltered reads2">
<data name="unpaired2" format_source="in2" label="${tool.name} on ${on_string}: Unmerged unfiltered reads2">
<filter>single_paired['single_paired_selector'] in ["paired", "paired_collection"] and single_paired['merge_reads']['merge'] and not single_paired['merge_reads']['include_unmerged']</filter>
<expand macro="format_actions" read_number="2" forward_reverse="reverse" />
</data>
</outputs>

Expand Down Expand Up @@ -664,7 +625,7 @@ afford high performance.

**Inputs**

Single-end or Paired-end (compressed) fastqsagnger or fastqillumina files
Single-end or Paired-end (compressed) fastqsanger files

-----

Expand All @@ -688,4 +649,4 @@ Optionally, under **Output Options** you can choose to output
<citations>
<citation type="doi">10.1101/274100</citation>
</citations>
</tool>
</tool>
21 changes: 19 additions & 2 deletions tools/fastp/macros.xml
Original file line number Diff line number Diff line change
Expand Up @@ -61,11 +61,28 @@
</xml>

<xml name="in" token_read_number="1" token_argument="-i">
<param name="in@READ_NUMBER@" argument="@ARGUMENT@" type="data" format="fastqsanger,fastqsanger.gz,fastqillumina,fastqillumina.gz" label="Input @READ_NUMBER@" help="Input FASTQ file #@READ_NUMBER@"/>
<param name="in@READ_NUMBER@" argument="@ARGUMENT@" type="data" format="fastqsanger,fastqsanger.gz" label="Input @READ_NUMBER@" help="Input FASTQ file #@READ_NUMBER@"/>
</xml>

<xml name="poly_g_min_len">
<param argument="--poly_g_min_len" type="integer" optional="true" label="PolyG minimum length"
help="The minimum length to detect polyG in the read tail. 10 by default."/>
</xml>
</macros>

<xml name="format_actions" token_read_number="1" token_forward_reverse="forward">
<actions>
<conditional name="single_paired.single_paired_selector">
<when value="paired">
<action type="format">
<option type="from_param" name="single_paired.in@READ_NUMBER@" param_attribute="ext" />
</action>
</when>
<when value="paired_collection">
<action type="format">
<option type="from_param" name="single_paired.paired_input" param_attribute="@[email protected]" />
</action>
</when>
</conditional>
</actions>
</xml>
</macros>