Skip to content

Commit

Permalink
Mzmine 3.9 (galaxyproject#5622)
Browse files Browse the repository at this point in the history
* mzmine: bump to 3.9

* mzmine: limit number of cores fro JVM

* mzmine: add possibility to use local CSV database

* mzmine: reduce amount of remote test data

and version the remaining

* mzmine: comment tests

don't work unless planemo can use the 23.1 galaxy packages

* Change quoting

Co-authored-by: Björn Grüning <[email protected]>

* improve help message

* allow also tsv and txt

technically all this is allowed, batch file can specify the delimiter
the example uses ;

---------

Co-authored-by: Björn Grüning <[email protected]>
  • Loading branch information
bernt-matthias and bgruening authored Nov 10, 2023
1 parent 3a3dfd6 commit 889d26f
Show file tree
Hide file tree
Showing 5 changed files with 9,199 additions and 10 deletions.
77 changes: 67 additions & 10 deletions tools/mzmine/mzmine_batch.xml
Original file line number Diff line number Diff line change
@@ -1,13 +1,14 @@
<tool id="mzmine_batch" name="MZMine batch" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="21.05">
<macros>
<token name="@TOOL_VERSION@">3.6.0</token>
<token name="@TOOL_VERSION@">3.9.0</token>
<token name="@VERSION_SUFFIX@">0</token>
</macros>
<xrefs>
<xref type="bio.tools">mzmine</xref>
</xrefs>
<requirements>
<requirement type="package" version="@TOOL_VERSION@">mzmine</requirement>
<requirement type="package" version="3.11">python</requirement>
</requirements>
<command detect_errors="exit_code"><![CDATA[
#import re
Expand All @@ -30,9 +31,16 @@
## modify output paths in batch XML to relative path
## - any output will be redirected to ./output/BASENAME
mkdir -p output/ &&
sed -e 's@<current_file>.*[\\\/]\([^\\\/]\+\)</current_file>@<current_file>./output/\1</current_file>@' '$batch' > batch.xml &&
python '$__tool_directory__/set_path.py'
--input '$batch'
--output batch.xml
#if $localdb
--localdb '$localdb'
#end if
&&
## https://github.com/mzmine/mzmine3/issues/1534
JAVA_OPTS="-XX:ActiveProcessorCount=\${GALAXY_SLOTS:-1}"
mzmine
--batch 'batch.xml'
--input 'input.txt'
Expand All @@ -54,20 +62,18 @@
<param argument="--batch" type="data" format="xml" label="MZMine batch file" help="XML batch file (ideally created with version @TOOL_VERSION@)"/>
<param argument="--input" type="data" format="mzml,mzxml,csv" multiple="true" label="Spectra (mzml)"/>
<param argument="--libraries" type="data" format="json,mgf,msp" multiple="true" optional="true" label="Spectral libraries"/>
<param name="localdb" type="data" format="csv,tsv,tabular,txt" optional="true" label="Local database" help="For 'Custom database search'"/>
</inputs>
<outputs>
<collection name="output" type="list">
<discover_datasets pattern="__name_and_ext__" directory="output"/>
</collection>
</outputs>
<tests>
<test>
<!-- <test>
<param name="batch" value="test_small.xml" ftype="xml"/>
<param name="input" value="DOM_a.mzML,DOM_b.mzXML"/>
<param name="input" location="https://raw.githubusercontent.com/mzmine/mzmine3/v@TOOL_VERSION@/src/test/resources/rawdatafiles/DOM_a.mzML,https://raw.githubusercontent.com/mzmine/mzmine3/v@TOOL_VERSION@/src/test/resources/rawdatafiles/DOM_b.mzXML"/>
<param name="libraries" value="GNPS-FAULKNERLEGACY.json" ftype="json"/>
<!-- <param name="input" location="https://raw.githubusercontent.com/mzmine/mzmine3/master/src/test/resources/rawdatafiles/DOM_a.mzML"/> -->
<!-- <param name="input" location="https://raw.githubusercontent.com/mzmine/mzmine3/master/src/test/resources/rawdatafiles/DOM_a.mzML,https://raw.githubusercontent.com/mzmine/mzmine3/master/src/test/resources/rawdatafiles/DOM_b.mzXML"/>
<param name="libraries" location="https://external.gnps2.org/gnpslibrary/GNPS-FAULKNERLEGACY.json" ftype="json"/> -->
<output_collection name="output" count="3" type="list">
<element name="test2_iimn_gnps" ftype="mgf">
<assert_contents>
Expand All @@ -88,7 +94,51 @@
</assert_contents>
</element>
</output_collection>
</test>
<assert_stderr>
<has_text text="INFO: Starting processing of task Local CSV identification using database" negate="true"/>
<has_text text="INFO: Finished a batch of 11 steps"/>
</assert_stderr>
</test> -->
<!-- test with localdb -->
<!-- <test>
<param name="batch" value="test_localdatabase.xml" ftype="xml"/>
<param name="input" location="https://raw.githubusercontent.com/mzmine/mzmine3/v@TOOL_VERSION@/src/test/resources/rawdatafiles/DOM_a.mzML,https://raw.githubusercontent.com/mzmine/mzmine3/v@TOOL_VERSION@/src/test/resources/rawdatafiles/DOM_b.mzXML"/>
<param name="libraries" value="GNPS-FAULKNERLEGACY.json" ftype="json"/>
<param name="localdb" value="local_database.csv" ftype="csv"/>
<output_collection name="output" count="3" type="list">
<element name="test2_iimn_gnps" ftype="mgf">
<assert_contents>
<has_n_lines n="2103"/>
<has_text text="BEGIN IONS"/>
</assert_contents>
</element>
<element name="test2_iimn_gnps_quant" ftype="csv">
<assert_contents>
<has_n_lines n="34"/>
<has_n_columns sep="," n="16"/>
</assert_contents>
</element>
<element name="test2_sirius" ftype="mgf">
<assert_contents>
<has_n_lines n="54406"/>
<has_text text="BEGIN IONS"/>
</assert_contents>
</element>
</output_collection>
<assert_stderr>
<has_text text="INFO: Starting processing of task Local CSV identification using database"/>
<has_text text="INFO: Finished a batch of 12 steps"/>
</assert_stderr>
</test> -->
<!-- test with localdb in batch, but user does not provide one-->
<!-- <test expect_failure="true" expect_exit_code="1">
<param name="batch" value="test_localdatabase.xml" ftype="xml"/>
<param name="input" location="https://raw.githubusercontent.com/mzmine/mzmine3/v@TOOL_VERSION@/src/test/resources/rawdatafiles/DOM_a.mzML,https://raw.githubusercontent.com/mzmine/mzmine3/v@TOOL_VERSION@/src/test/resources/rawdatafiles/DOM_b.mzXML"/>
<param name="libraries" value="GNPS-FAULKNERLEGACY.json" ftype="json"/>
<assert_stderr>
<has_text text="Batch file contains LocalCSVDatabaseSearchModule but no local DB CSV file given"/>
</assert_stderr>
</test> -->
</tests>
<help><![CDATA[
MZmine 3 is an open-source software for mass-spectrometry data processing, with the main focus on LC-MS data.
Expand All @@ -102,6 +152,13 @@
- Input spectra in mzml format
- Spectra libraries in mgf or json format (optional)
If the batch file contains steps requiring input data sets these need to be provided
via the optional inputs. Currently this is:
- 'Local database' for a 'Custom database search' (LocalCSVDatabaseSearchModule) step
Please open an issue if inputs for other steps are needed: https://github.com/galaxyproject/tools-iuc/issues/new
**Outputs**
Galaxy will store all outputs of MZMine in a single collection. When specifying the batch XML
Expand All @@ -126,4 +183,4 @@
<citations>
<citation type="doi">10.1038/s41587-023-01690-2</citation>
</citations>
</tool>
</tool>
31 changes: 31 additions & 0 deletions tools/mzmine/set_path.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
import argparse
import re
import sys
from xml.etree import ElementTree

parser = argparse.ArgumentParser(
prog="set_path.py", description="update paths in mzmine batch XML"
)
parser.add_argument("--input", help="input XML")
parser.add_argument("--output", help="output XML")
parser.add_argument("--localdb", required=False, help="Local CVS DB for Search")
args = parser.parse_args()

PATHSEP = re.compile(r"[/\\]")
tree = ElementTree.parse(args.input)

for batchstep in tree.findall(".//batchstep"):
method = batchstep.attrib.get("method")
for current_file in batchstep.findall(".//current_file"):
if (
method
== "io.github.mzmine.modules.dataprocessing.id_localcsvsearch.LocalCSVDatabaseSearchModule"
):
if args.localdb:
current_file.text = args.localdb
else:
sys.exit("Batch file contains LocalCSVDatabaseSearchModule but no local DB CSV file given")
else:
current_file.text = f"./output/{PATHSEP.split(current_file.text)[-1]}"

tree.write(args.output)
Loading

0 comments on commit 889d26f

Please sign in to comment.