Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

matchms: added filters and test and fixed linter warnings #402

Merged
merged 20 commits into from
Sep 27, 2023
Merged
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion tools/export_to_path/export_to_path.xml
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
<tool id="export_to_path" name="export to path" version="0.0.2">
<description>on a filesystem accessible to compute node</description>
<macros>
<import>macros.xml</import>
</macros>
<expand macro="creator"/>
<description>on a filesystem accessible to compute node</description>
<requirements>
<requirement type="package" version="3.6">python</requirement>
</requirements>
Expand Down
3 changes: 3 additions & 0 deletions tools/filter_compounds/filter_compounds.xml
Original file line number Diff line number Diff line change
Expand Up @@ -44,5 +44,8 @@
@HELP@
]]>
</help>
<citations>
<citation type="doi">https://doi.org/10.5281/zenodo.6035335</citation>
</citations>
</tool>

65 changes: 64 additions & 1 deletion tools/matchms/matchms_filtering.xml
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
<tool id="matchms_filtering" name="matchms filtering" version="@TOOL_VERSION@+galaxy0" profile="21.09">
<tool id="matchms_filtering" name="matchms filtering" version="@TOOL_VERSION@+galaxy1" profile="21.09">
<description>filter and normalize mass spectrometry data</description>

<macros>
Expand Down Expand Up @@ -49,6 +49,16 @@
--from_mz "$mz_range.from_mz" \
--to_mz "$mz_range.to_mz" \
#end if
#if $require_smiles.is_true
-require_smiles \
#end if
#if $require_inchi.is_true
-require_inchi \
#end if
#if $reduce_to_top_n_peaks.is_true
-reduce_to_top_n_peaks \
--n_max "$reduce_to_top_n_peaks.n_max" \
#end if
--output "$output"
</configfile>
</configfiles>
Expand All @@ -59,16 +69,22 @@
<conditional name="normalise_intensities">
<param name="is_true" label="Normalize intensities" type="boolean" truevalue="TRUE" falsevalue="FALSE" checked="false"
help="Normalize intensities of peaks (and losses) to unit height."/>
<when value="TRUE"></when>
hechth marked this conversation as resolved.
Show resolved Hide resolved
<when value="FALSE"></when>
</conditional>

<conditional name="default_filters">
<param name="is_true" label="Apply default filters" type="boolean" truevalue="TRUE" falsevalue="FALSE" checked="false"
help="Collection of filters that are considered default and that do no require any (factory) arguments."/>
<when value="TRUE"></when>
<when value="FALSE"></when>
</conditional>

<conditional name="clean_metadata">
<param name="is_true" label="Clean metadata" type="boolean" truevalue="TRUE" falsevalue="FALSE" checked="false"
help="Apply all adding and cleaning filters if possible, so that the spectra have canonical metadata." />
<when value="TRUE"></when>
<when value="FALSE"></when>
</conditional>

<conditional name="relative_intensity">
Expand All @@ -78,6 +94,7 @@
<param label="Minimum intensity" name="from_intensity" value="0" type="float" help="Lower bound for intensity filter." />
<param label="Maximum intensity" name="to_intensity" value="1" type="float" help="Upper bound for intensity filter." />
</when>
<when value="FALSE"></when>
</conditional>

<conditional name="mz_range">
Expand All @@ -87,6 +104,30 @@
<param label="Minimum m/z" name="from_mz" optional="true" type="float" help="Lower bound for m/z filter." />
<param label="Maximum m/z" name="to_mz" optional="true" type="float" help="Upper bound for m/z filter." />
</when>
<when value="FALSE"></when>
</conditional>

<conditional name="require_smiles">
<param name="is_true" label="Require SMILES" type="boolean" truevalue="TRUE" falsevalue="FALSE" checked="false"
help="Remove spectra that does not contain SMILES." />
<when value="TRUE"></when>
<when value="FALSE"></when>
</conditional>

<conditional name="require_inchi">
<param name="is_true" label="Require INCHI" type="boolean" truevalue="TRUE" falsevalue="FALSE" checked="false"
help="Remove spectra that does not contain INCHI." />
<when value="TRUE"></when>
<when value="FALSE"></when>
</conditional>

<conditional name="reduce_to_top_n_peaks">
<param name="is_true" label="Reduce to top n peaks" type="boolean" truevalue="TRUE" falsevalue="FALSE" checked="false"
help="Lowest intensity peaks will be removed when it has more peaks than desired." />
<when value="TRUE">
<param label="Maximum number of peaks" name="n_max" value="5" type="integer" help="Maximum number of peaks. Remove peaks if more peaks are found." />
</when>
<when value="FALSE"></when>
</conditional>

</inputs>
Expand Down Expand Up @@ -135,6 +176,28 @@
</section>
<output name="output" file="filtering/mz_range.msp" ftype="msp"/>
</test>
<test>
<param name="spectra" value="filtering/require_filter.msp" ftype="msp"/>
<section name="require_smiles">
<param name="is_true" value="TRUE"/>
</section>
<output name="output" file="filtering/require_smiles_out.msp" ftype="msp"/>
</test>
<test>
<param name="spectra" value="filtering/require_filter.msp" ftype="msp"/>
<section name="require_inchi">
<param name="is_true" value="TRUE"/>
</section>
<output name="output" file="filtering/require_inchi_out.msp" ftype="msp"/>
</test>
<test>
<param name="spectra" value="filtering/input.msp" ftype="msp"/>
<section name="reduce_to_top_n_peaks">
<param name="is_true" value="TRUE"/>
<param name="n_max" value="5"/>
</section>
<output name="output" file="filtering/reduce_to_top_n_peaks.msp" ftype="msp"/>
</test>
</tests>

<help>
Expand Down
34 changes: 31 additions & 3 deletions tools/matchms/matchms_filtering_wrapper.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,10 +4,18 @@
from matchms.exporting import save_as_mgf, save_as_msp
from matchms.filtering import add_compound_name, add_fingerprint, add_losses, add_parent_mass, add_precursor_mz,\
add_retention_index, add_retention_time, clean_compound_name
from matchms.filtering import default_filters, normalize_intensities, select_by_mz, select_by_relative_intensity
from matchms.filtering import default_filters, normalize_intensities, reduce_to_number_of_peaks, select_by_mz, \
select_by_relative_intensity
from matchms.importing import load_from_mgf, load_from_msp


def require_key(spectrum, key):
if spectrum.get(key):
return spectrum

return None


def main(argv):
parser = argparse.ArgumentParser(description="Compute MSP similarity scores")
parser.add_argument("--spectra", type=str, required=True, help="Mass spectra file to be filtered.")
Expand All @@ -27,13 +35,23 @@ def main(argv):
help="Keep only peaks between set m/z range (keep if to_mz >= m/z >= from_mz).")
parser.add_argument("--from_mz", type=float, help="Lower bound for m/z filter")
parser.add_argument("--to_mz", type=float, help="Upper bound for m/z filter")
parser.add_argument("-require_smiles", action='store_true',
help="Remove spectra that does not contain SMILES.")
parser.add_argument("-require_inchi", action='store_true',
help="Remove spectra that does not contain INCHI.")
parser.add_argument("-reduce_to_top_n_peaks", action='store_true',
help="reduce to top n peaks filter.")
parser.add_argument("--n_max", type=int, help="Maximum number of peaks. Remove peaks if more peaks are found.")
args = parser.parse_args()

if not (args.normalise_intensities
or args.default_filters
or args.clean_metadata
or args.relative_intensity
or args.mz_range):
or args.mz_range
or args.require_smiles
or args.require_inchi
or args.reduce_to_top_n_peaks):
raise ValueError('No filter selected.')

if args.spectra_format == 'msp':
Expand Down Expand Up @@ -63,7 +81,17 @@ def main(argv):
if args.mz_range:
spectrum = select_by_mz(spectrum, args.from_mz, args.to_mz)

filtered_spectra.append(spectrum)
if args.reduce_to_top_n_peaks:
spectrum = reduce_to_number_of_peaks(spectrum_in=spectrum, n_max=args.n_max)

if args.require_smiles and spectrum is not None:
spectrum = require_key(spectrum, "smiles")

if args.require_inchi and spectrum is not None:
spectrum = require_key(spectrum, "inchi")

if spectrum is not None:
filtered_spectra.append(spectrum)

if args.spectra_format == 'msp':
save_as_msp(filtered_spectra, args.output)
Expand Down
12 changes: 8 additions & 4 deletions tools/matchms/matchms_formatter.xml
Original file line number Diff line number Diff line change
Expand Up @@ -16,16 +16,16 @@
<requirement type="package" version="1.1.4">pandas</requirement>
<requirement type="package" version="8.0.1">click</requirement>
</requirements>

<command detect_errors="aggressive"><![CDATA[
sh ${matchms_formatter_cli}
]]></command>

<environment_variables>
<environment_variable name="MPLCONFIGDIR">\$_GALAXY_JOB_TMP_DIR</environment_variable>
<environment_variable name="XDG_CACHE_HOME">\$_GALAXY_JOB_TMP_DIR</environment_variable>
</environment_variables>

<command detect_errors="aggressive"><![CDATA[
sh ${matchms_formatter_cli}
]]></command>

<configfiles>
<configfile name="matchms_formatter_cli">
python3 ${__tool_directory__}/formatter.py \
Expand Down Expand Up @@ -57,4 +57,8 @@
@HELP_formatter@
@HELP_matchms@
</help>

<citations>
<citation type="doi">https://doi.org/10.5281/zenodo.6035335</citation>
</citations>
</tool>
2 changes: 1 addition & 1 deletion tools/matchms/matchms_metadata_export.xml
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
<tool id="matchms_metadata_export" name="matchms metadata export " version="@TOOL_VERSION@+galaxy0" profile="21.09">
<tool id="matchms_metadata_export" name="matchms metadata export" version="@TOOL_VERSION@+galaxy0" profile="21.09">
<description>extract all metadata from mass spectra file to tabular format</description>
<macros>
<import>macros.xml</import>
Expand Down
8 changes: 4 additions & 4 deletions tools/matchms/matchms_metadata_match.xml
Original file line number Diff line number Diff line change
Expand Up @@ -12,14 +12,14 @@
<requirement type="package" version="@TOOL_VERSION@">matchms</requirement>
</requirements>

<environment_variables>
<environment_variable name="MPLCONFIGDIR">\$_GALAXY_JOB_TMP_DIR</environment_variable>
</environment_variables>

<command detect_errors="exit_code"><![CDATA[
python3 '${python_wrapper}'
]]> </command>

<environment_variables>
<environment_variable name="MPLCONFIGDIR">\$_GALAXY_JOB_TMP_DIR</environment_variable>
</environment_variables>

<configfiles>
<configfile name="python_wrapper">
@init_logger@
Expand Down
10 changes: 5 additions & 5 deletions tools/matchms/matchms_networking.xml
Original file line number Diff line number Diff line change
Expand Up @@ -16,15 +16,15 @@
<requirement type="package" version="@TOOL_VERSION@">matchms</requirement>
</requirements>

<command detect_errors="aggressive"><![CDATA[
sh ${matchms_networking_cli}
]]></command>

<environment_variables>
<environment_variable name="MPLCONFIGDIR">\$_GALAXY_JOB_TMP_DIR</environment_variable>
<environment_variable name="XDG_CACHE_HOME">\$_GALAXY_JOB_TMP_DIR</environment_variable>
</environment_variables>

<command detect_errors="aggressive"><![CDATA[
sh ${matchms_networking_cli}
]]></command>

<configfiles>
<configfile name="matchms_networking_cli">
python3 ${__tool_directory__}/matchms_networking_wrapper.py \
Expand Down Expand Up @@ -78,7 +78,7 @@
</inputs>

<outputs>
<data label="Similarity network of ${scores.name}" name="similarity_network_file">
<data label="Similarity network of ${scores.name}" name="similarity_network_file" format="graphml">
<change_format>
<when input="graph_format" value="graphml" format="graphml"/>
<when input="graph_format" value="cyjs" format="cyjs"/>
Expand Down
10 changes: 6 additions & 4 deletions tools/matchms/matchms_similarity.xml
Original file line number Diff line number Diff line change
Expand Up @@ -13,14 +13,14 @@
<requirement type="package" version="@TOOL_VERSION@">matchms</requirement>
</requirements>

<environment_variables>
<environment_variable name="MPLCONFIGDIR">\$_GALAXY_JOB_TMP_DIR</environment_variable>
</environment_variables>

<command detect_errors="exit_code"><![CDATA[
sh ${matchms_python_cli}
]]> </command>

<environment_variables>
<environment_variable name="MPLCONFIGDIR">\$_GALAXY_JOB_TMP_DIR</environment_variable>
</environment_variables>

<configfiles>
<configfile name="matchms_python_cli">
python3 ${__tool_directory__}/matchms_similarity_wrapper.py \
Expand Down Expand Up @@ -64,6 +64,7 @@
<param label="Reference spectra" name="references" type="data" format="msp,mgf"
help="Reference mass spectra to match against as library."/>
</when>
<when value="TRUE"></when>
</conditional>
<param label="Scores array type" name="array_type" type="select" display="radio"
help="Matrix type for storing scores objects. Sparse type more memory-efficient and better for large arrays.
Expand Down Expand Up @@ -113,6 +114,7 @@
<param label="tolerance" name="tolerance" type="float" value="60"
help="Peaks will be considered a match when less than tolerance apart."/>
</when>
<when value="FALSE"></when>
</conditional>
</inputs>

Expand Down
8 changes: 4 additions & 4 deletions tools/matchms/matchms_spectral_similarity.xml
Original file line number Diff line number Diff line change
Expand Up @@ -12,14 +12,14 @@
<requirement type="package" version="@TOOL_VERSION@">matchms</requirement>
</requirements>

<environment_variables>
<environment_variable name="MPLCONFIGDIR">\$_GALAXY_JOB_TMP_DIR</environment_variable>
</environment_variables>

<command detect_errors="exit_code"><![CDATA[
python3 ${python_wrapper}
]]> </command>

<environment_variables>
<environment_variable name="MPLCONFIGDIR">\$_GALAXY_JOB_TMP_DIR</environment_variable>
</environment_variables>

<configfiles>
<configfile name="python_wrapper">
@init_logger@
Expand Down
57 changes: 57 additions & 0 deletions tools/matchms/test-data/filtering/reduce_to_top_n_peaks.msp
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
IONMODE: negative
SPECTRUMTYPE: Centroid
COMPOUND_NAME: C001
RETENTION_TIME: 38.74
RETENTION_INDEX: None
NUM PEAKS: 5
175.0641 26780143.0
206.9034 26130980.0
216.9205 32607700.0
254.8252 23747536.0
256.8215 31377637.0

IONMODE: negative
SPECTRUMTYPE: Centroid
COMPOUND_NAME: C002
RETENTION_TIME: 520.25
RETENTION_INDEX: 1234.5
NUM PEAKS: 5
310.1623 295359836.0
525.375 1073323842.0
526.3783 181668883.0
551.3321 111616808.0
1047.7378 150394804.0

IONMODE: negative
SPECTRUMTYPE: Centroid
COMPOUND_NAME: C003
RETENTION_TIME: 483.67
NUM PEAKS: 5
288.6414 202172046.0
522.3565 4089569222.0
523.354 1201714423.0
1043.7028 144351468.0
1044.7068 83271854.0

IONMODE: negative
SPECTRUMTYPE: Centroid
COMPOUND_NAME: C004
RETENTION_TIME: 473.48
NUM PEAKS: 5
496.34 12577588056.0
497.3442 3337125302.0
498.3462 532285213.0
991.6726 1420557258.0
992.6749 763118028.0

IONMODE: negative
SPECTRUMTYPE: Centroid
COMPOUND_NAME: C005
RETENTION_TIME: 41.72
NUM PEAKS: 5
218.1386 14009249.0
337.0623 88672453.0
353.0361 37061354.0
359.0443 48435582.0
375.018 29159485.0

Loading