From 8fc4166b55dc1847e650a28eb5c3a8c498e822c9 Mon Sep 17 00:00:00 2001 From: Henry Date: Mon, 25 Sep 2023 11:35:46 +0200 Subject: [PATCH 01/15] :art: format code (max-line-length=120!) - allow longer lines - code was not black8 formatted (which is mentioned in dev dependencies) --- .vscode/settings.json | 11 +++- proteobench/modules/dda_quant/parse.py | 3 +- .../modules/dda_quant/parse_settings.py | 60 ++++++++++--------- pyproject.toml | 49 ++++++++------- 4 files changed, 67 insertions(+), 56 deletions(-) diff --git a/.vscode/settings.json b/.vscode/settings.json index a38767a8..1b01f8fd 100644 --- a/.vscode/settings.json +++ b/.vscode/settings.json @@ -1,7 +1,9 @@ { "[python]": { "editor.formatOnSave": true, - "editor.codeActionsOnSave": {"source.organizeImports": true}, + "editor.codeActionsOnSave": { + "source.organizeImports": true + }, "editor.rulers": [ 88 ], @@ -16,5 +18,8 @@ "test_*.py" ], "python.testing.pytestEnabled": false, - "python.testing.unittestEnabled": true -} + "python.testing.unittestEnabled": true, + "flake8.args": [ + "--max-line-length=120", + ], +} \ No newline at end of file diff --git a/proteobench/modules/dda_quant/parse.py b/proteobench/modules/dda_quant/parse.py index 42b0454b..3f8337e4 100644 --- a/proteobench/modules/dda_quant/parse.py +++ b/proteobench/modules/dda_quant/parse.py @@ -17,7 +17,8 @@ def convert_to_standard_format( for k, v in parse_settings.mapper.items(): if k not in df.columns: raise ImportError( - f"Column {k} not found in input dataframe. Please check input file and selected search engine." + f"Column {k} not found in input dataframe." + " Please check input file and selected search engine." ) df.rename(columns=parse_settings.mapper, inplace=True) diff --git a/proteobench/modules/dda_quant/parse_settings.py b/proteobench/modules/dda_quant/parse_settings.py index 7dd7154d..fce2f8ad 100644 --- a/proteobench/modules/dda_quant/parse_settings.py +++ b/proteobench/modules/dda_quant/parse_settings.py @@ -8,48 +8,50 @@ from ..interfaces import Settings -#import proteobench.modules.dda_quant.p +# import proteobench.modules.dda_quant.p -PARSE_SETTINGS_DIR = os.path.join(os.path.dirname(__file__), 'io_parse_settings') +PARSE_SETTINGS_DIR = os.path.join(os.path.dirname(__file__), "io_parse_settings") MapSettingFiles: dict[str, Path] -PARSE_SETTINGS_FILES = { "WOMBAT" : os.path.join(PARSE_SETTINGS_DIR, 'parse_settings_wombat.toml'), - "MaxQuant" : os.path.join(PARSE_SETTINGS_DIR, 'parse_settings_maxquant.toml'), - "MSFragger" : os.path.join(PARSE_SETTINGS_DIR, 'parse_settings_msfragger.toml'), - "Proline" : os.path.join(PARSE_SETTINGS_DIR, 'parse_settings_proline.toml'), - "AlphaPept" : os.path.join(PARSE_SETTINGS_DIR, 'parse_settings_alphapept.toml'), - "Custom" : os.path.join(PARSE_SETTINGS_DIR, 'parse_settings_custom.toml') - } +PARSE_SETTINGS_FILES = { + "WOMBAT": os.path.join(PARSE_SETTINGS_DIR, "parse_settings_wombat.toml"), + "MaxQuant": os.path.join(PARSE_SETTINGS_DIR, "parse_settings_maxquant.toml"), + "MSFragger": os.path.join(PARSE_SETTINGS_DIR, "parse_settings_msfragger.toml"), + "Proline": os.path.join(PARSE_SETTINGS_DIR, "parse_settings_proline.toml"), + "AlphaPept": os.path.join(PARSE_SETTINGS_DIR, "parse_settings_alphapept.toml"), + "Custom": os.path.join(PARSE_SETTINGS_DIR, "parse_settings_custom.toml"), +} # ! Could be created from keys of PARSE_SETTINGS_FILES -INPUT_FORMATS = ("MaxQuant", - "AlphaPept", - "MSFragger", - "Proline", - "WOMBAT", - "Custom") +INPUT_FORMATS = ("MaxQuant", "AlphaPept", "MSFragger", "Proline", "WOMBAT", "Custom") LOCAL_DEVELOPMENT = False -# For local development change below to the json and path, if you do not want to download it from github -DDA_QUANT_RESULTS_PATH = "https://raw.githubusercontent.com/Proteobench/Results_Module2_quant_DDA/main/results.json" #e.g., K:/results.json +# For local development change below to the json and path, +# if you do not want to download it from github +DDA_QUANT_RESULTS_PATH = ( + "https://raw.githubusercontent.com/Proteobench/" + "Results_Module2_quant_DDA/main/results.json" +) # e.g., K:/results.json + class ParseSettings: - """ Structure that contains all the parameters used to parse the given database search output. """ - - def __init__(self, input_format:str): + """Structure that contains all the parameters used to parse + the given database search output.""" + + def __init__(self, input_format: str): parse_settings = toml.load(PARSE_SETTINGS_FILES[input_format]) - self.mapper = parse_settings["mapper"] - self.replicate_mapper = parse_settings["replicate_mapper"] - self.decoy_flag = parse_settings["general"]["decoy_flag"] - self.species_dict = parse_settings["species_dict"] - self.contaminant_flag = parse_settings["general"]["contaminant_flag"] - self.min_count_multispec = parse_settings["general"]["min_count_multispec"] - self.species_expected_ratio = parse_settings["species_expected_ratio"] - + self.mapper: str = parse_settings["mapper"] + self.replicate_mapper: str = parse_settings["replicate_mapper"] + self.decoy_flag: str = parse_settings["general"]["decoy_flag"] + self.species_dict: str = parse_settings["species_dict"] + self.contaminant_flag: str = parse_settings["general"]["contaminant_flag"] + self.min_count_multispec: str = parse_settings["general"]["min_count_multispec"] + self.species_expected_ratio: str = parse_settings["species_expected_ratio"] + -def parse_settings(input_format:str) -> Settings: +def parse_settings(input_format: str) -> Settings: """load settings from toml file""" raise NotImplementedError diff --git a/pyproject.toml b/pyproject.toml index b93d4bf1..c345e6eb 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,31 +1,31 @@ [project] -name = "proteobench" authors = [ - { name = "Robbin Bouwmeester", email = "robbin.bouwmeester@ugent.be" }, + {name = "Robbin Bouwmeester", email = "robbin.bouwmeester@ugent.be"}, ] # TODO: Add others -readme = "README.md" -license = { file = "LICENSE" } classifiers = [ - "Intended Audience :: Science/Research", - "License :: OSI Approved :: Apache Software License", - "Operating System :: OS Independent", - "Programming Language :: Python :: 3", - "Topic :: Scientific/Engineering :: Bio-Informatics", - "Development Status :: 4 - Beta", + "Intended Audience :: Science/Research", + "License :: OSI Approved :: Apache Software License", + "Operating System :: OS Independent", + "Programming Language :: Python :: 3", + "Topic :: Scientific/Engineering :: Bio-Informatics", + "Development Status :: 4 - Beta", ] -keywords = ['proteomics', 'peptides', 'retention time', 'mass spectrometry'] -requires-python = ">=3.7" -dynamic = ["version", "description"] dependencies = [ - "pandas", - "numpy", - "plotly", - "streamlit_extras", - "streamlit-plotly-events", - "matplotlib", - "importlib-metadata; python_version < '3.8'", - "toml", + "pandas", + "numpy", + "plotly", + "streamlit_extras", + "streamlit-plotly-events", + "matplotlib", + "importlib-metadata; python_version < '3.8'", + "toml", ] +dynamic = ["version", "description"] +keywords = ['proteomics', 'peptides', 'retention time', 'mass spectrometry'] +license = {file = "LICENSE"} +name = "proteobench" +readme = "README.md" +requires-python = ">=3.7" [project.optional-dependencies] dev = ["black"] @@ -33,9 +33,12 @@ docs = ["sphinx", "sphinx-rtd-theme", "sphinx-autobuild", "myst-parser"] web = ["streamlit", "scipy"] [project.urls] -"Homepage" = "https://github.com/ProteoBench" "Bug Tracker" = "https://github.com/ProteoBench/ProteoBench/issues/" +"Homepage" = "https://github.com/ProteoBench" [build-system] -requires = ["flit_core >=3.2,<4"] build-backend = "flit_core.buildapi" +requires = ["flit_core >=3.2,<4"] + +[tool.flake8] +max-line-length = 120 From 56e846fe0e53aa2b86a39dccb5a3c0bf3c41d8a1 Mon Sep 17 00:00:00 2001 From: Henry Date: Mon, 25 Sep 2023 17:42:27 +0200 Subject: [PATCH 02/15] :sparkles: parse mqpar.xml to dictionary - later: map selected entries per task (needs to be done) - changed to pytest (as it is run in a github action) --- CONTRIBUTING.md | 4 +- proteobench/io/params/maxquant.py | 101 +++ test/params/mqpar1.5.3.30_MBR.json | 458 ++++++++++++ test/params/mqpar1.5.3.30_MBR.xml | 253 +++++++ test/params/mqpar_MQ1.6.3.3_MBR.json | 762 ++++++++++++++++++++ test/params/mqpar_MQ1.6.3.3_MBR.xml | 466 ++++++++++++ test/params/mqpar_MQ2.1.3.0_noMBR.json | 945 +++++++++++++++++++++++++ test/params/mqpar_MQ2.1.3.0_noMBR.xml | 532 ++++++++++++++ test/test_parse_params.py | 46 ++ 9 files changed, 3566 insertions(+), 1 deletion(-) create mode 100644 proteobench/io/params/maxquant.py create mode 100644 test/params/mqpar1.5.3.30_MBR.json create mode 100644 test/params/mqpar1.5.3.30_MBR.xml create mode 100644 test/params/mqpar_MQ1.6.3.3_MBR.json create mode 100644 test/params/mqpar_MQ1.6.3.3_MBR.xml create mode 100644 test/params/mqpar_MQ2.1.3.0_noMBR.json create mode 100644 test/params/mqpar_MQ2.1.3.0_noMBR.xml create mode 100644 test/test_parse_params.py diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 6993f95c..02e4a80d 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -39,8 +39,10 @@ Using a virtual environment is recommended. To run the tests run the command: +> We use pytest which also supports unittest if you prefer that. + ``` -python -m unittest test/test_module_dda_quant.py +pytest ``` diff --git a/proteobench/io/params/maxquant.py b/proteobench/io/params/maxquant.py new file mode 100644 index 00000000..591d80b7 --- /dev/null +++ b/proteobench/io/params/maxquant.py @@ -0,0 +1,101 @@ +"""Functionality to parse Maxqunt mqpar.xml parameter files""" +import json +import logging +import xml.etree.ElementTree as ET +from pathlib import Path + +logger = logging.getLogger() + + +def extend_tuple(t, target_length: int): + """Extend tuple with None values to match target length.""" + if not isinstance(t, tuple): + raise TypeError(f"Wrong type provided. Expected tuple, got {type(t)} : {t!r}") + if len(t) > target_length: + raise ValueError( + f"Tuple is too long (got {len(t)}, expected {target_length}: {t!r}" + ) + return t + (None,) * (target_length - len(t)) + + +def extend_tuples_with_none(list_of_tuples: list[tuple], target_length: int): + """Extend the tuples in a list of tuples with None values to match target length.""" + extended_tuples = [] + for tuple_ in list_of_tuples: + # if len(tuple_) > target_length: + # raise ValueError(f"tuple is too long: {len(tuple_)}") + extended_tuple = extend_tuple(tuple_, target_length) + extended_tuples.append(extended_tuple) + return extended_tuples + + +def add_record(data: dict, tag: str, record) -> dict: + """Add tag and record to data dict. + + The record can be many things.""" + if tag in data: + if isinstance(data[tag], list): + data[tag].append(record) + else: + data[tag] = [data[tag], record] + else: + data[tag] = record + return data + + +def read_xml_record(element: ET.Element) -> dict: + """Read entire record in a nested dict structure.""" + data = dict() + for child in element: + if len(child) > 1 and child.tag: + # if there is a list, process each element one by one + # either nested or a plain text + data[child.tag] = [ + add_record( + dict(), + tag=child.tag, + record=read_xml_record(child) + if not (child.text and child.text.strip()) + else child.text.strip(), + ) + for child in child + ] + elif child.text and child.text.strip(): + # just plain text record + data = add_record(data=data, tag=child.tag, record=child.text.strip()) + else: + record = read_xml_record(child) + data = add_record(data, child.tag, record) + if not data: + # empty strings and None are normalzied to None + return None + return data + + +def read_file(file: str) -> dict: + """Read all entries in a MaxQuant xml file.""" + tree: ET.ElementTree = ET.parse(file) + root: ET.Element = tree.getroot() + params: dict = read_xml_record(root) + return params + + +# create a first version of json files to match +if __name__ == "__main__": + for test_file in [ + "../../../test/params/mqpar_MQ1.6.3.3_MBR.xml", + "../../../test/params/mqpar_MQ2.1.3.0_noMBR.xml", + "../../../test/params/mqpar1.5.3.30_MBR.xml", + ]: + print(f"{test_file = }") + record_example = read_file(test_file) + ( + Path(test_file) + .with_suffix(".json") + .write_text( + json.dumps( + record_example, + indent=4, + ) + ) + ) diff --git a/test/params/mqpar1.5.3.30_MBR.json b/test/params/mqpar1.5.3.30_MBR.json new file mode 100644 index 00000000..c8c8a3f2 --- /dev/null +++ b/test/params/mqpar1.5.3.30_MBR.json @@ -0,0 +1,458 @@ +{ + "name": "Session1", + "maxQuantVersion": "1.5.3.30", + "tempFolder": null, + "numThreads": "3", + "sendEmail": "false", + "fixedCombinedFolder": null, + "ionCountIntensities": "false", + "verboseColumnHeaders": "false", + "fullMinMz": "-1.7976931348623157E+308", + "fullMaxMz": "1.7976931348623157E+308", + "calcPeakProperties": "false", + "showCentroidMassDifferences": "false", + "showIsotopeMassDifferences": "false", + "filePaths": [ + { + "string": "/users/user/EuBIC benchmarking\n project\\MQ15330_MBR\\LFQ_Orbitrap_DDA_Condition_A_Sample_Alpha_01.raw" + }, + { + "string": "/users/user/EuBIC benchmarking\n project\\MQ15330_MBR\\LFQ_Orbitrap_DDA_Condition_A_Sample_Alpha_02.raw" + }, + { + "string": "/users/user/EuBIC benchmarking\n project\\MQ15330_MBR\\LFQ_Orbitrap_DDA_Condition_A_Sample_Alpha_03.raw" + }, + { + "string": "/users/user/EuBIC benchmarking\n project\\MQ15330_MBR\\LFQ_Orbitrap_DDA_Condition_B_Sample_Alpha_01.raw" + }, + { + "string": "/users/user/EuBIC benchmarking\n project\\MQ15330_MBR\\LFQ_Orbitrap_DDA_Condition_B_Sample_Alpha_02.raw" + }, + { + "string": "/users/user/EuBIC benchmarking\n project\\MQ15330_MBR\\LFQ_Orbitrap_DDA_Condition_B_Sample_Alpha_03.raw" + } + ], + "experiments": [ + { + "string": "A_Sample_Alpha_01" + }, + { + "string": "A_Sample_Alpha_02" + }, + { + "string": "A_Sample_Alpha_03" + }, + { + "string": "B_Sample_Alpha_01" + }, + { + "string": "B_Sample_Alpha_02" + }, + { + "string": "B_Sample_Alpha_03" + } + ], + "fractions": [ + { + "short": "32767" + }, + { + "short": "32767" + }, + { + "short": "32767" + }, + { + "short": "32767" + }, + { + "short": "32767" + }, + { + "short": "32767" + } + ], + "paramGroupIndices": [ + { + "int": "0" + }, + { + "int": "0" + }, + { + "int": "0" + }, + { + "int": "0" + }, + { + "int": "0" + }, + { + "int": "0" + } + ], + "parameterGroups": { + "parameterGroup": [ + { + "maxCharge": "7" + }, + { + "minPeakLen": "2" + }, + { + "useMs1Centroids": "false" + }, + { + "useMs2Centroids": "false" + }, + { + "cutPeaks": "true" + }, + { + "gapScans": "1" + }, + { + "minTime": "NaN" + }, + { + "maxTime": "NaN" + }, + { + "matchType": "MatchFromAndTo" + }, + { + "centroidMatchTol": "8" + }, + { + "centroidMatchTolInPpm": "true" + }, + { + "centroidHalfWidth": "35" + }, + { + "centroidHalfWidthInPpm": "true" + }, + { + "valleyFactor": "1.4" + }, + { + "advancedPeakSplitting": "false" + }, + { + "intensityThreshold": "500" + }, + { + "msInstrument": "0" + }, + { + "intensityDetermination": "0" + }, + { + "labelMods": { + "string": null + } + }, + { + "reQuantify": "false" + }, + { + "lfqSkipNorm": "false" + }, + { + "lfqMinEdgesPerNode": "3" + }, + { + "lfqAvEdgesPerNode": "6" + }, + { + "lfqMaxFeatures": "100000" + }, + { + "fastLfq": "true" + }, + { + "lfqRestrictFeatures": "false" + }, + { + "lfqMinRatioCount": "2" + }, + { + "useNormRatiosForHybridLfq": "true" + }, + { + "maxLabeledAa": "0" + }, + { + "maxNmods": "5" + }, + { + "maxMissedCleavages": "2" + }, + { + "multiplicity": "1" + }, + { + "enzymes": { + "string": "Trypsin/P" + } + }, + { + "enzymesFirstSearch": null + }, + { + "useEnzymeFirstSearch": "false" + }, + { + "useVariableModificationsFirstSearch": "false" + }, + { + "variableModifications": { + "string": [ + "Oxidation (M)", + "Acetyl (Protein N-term)" + ] + } + }, + { + "useMultiModification": "false" + }, + { + "multiModifications": null + }, + { + "isobaricLabels": null + }, + { + "variableModificationsFirstSearch": null + }, + { + "hasAdditionalVariableModifications": "false" + }, + { + "additionalVariableModifications": null + }, + { + "additionalVariableModificationProteins": null + }, + { + "doMassFiltering": "true" + }, + { + "firstSearchTol": "20" + }, + { + "mainSearchTol": "4.5" + }, + { + "searchTolInPpm": "true" + }, + { + "isotopeMatchTol": "2" + }, + { + "isotopeMatchTolInPpm": "true" + }, + { + "isotopeTimeCorrelation": "0.6" + }, + { + "theorIsotopeCorrelation": "0.6" + }, + { + "recalibrationInPpm": "true" + }, + { + "intensityDependentCalibration": "false" + }, + { + "minScoreForCalibration": "70" + }, + { + "matchLibraryFile": "false" + }, + { + "libraryFile": null + }, + { + "matchLibraryMassTolPpm": "0" + }, + { + "matchLibraryTimeTolMin": "0" + }, + { + "matchLabelTimeTolMin": "0" + }, + { + "reporterMassTolerance": "NaN" + }, + { + "reporterPif": "NaN" + }, + { + "filterPif": "false" + }, + { + "reporterFraction": "NaN" + }, + { + "reporterBasePeakRatio": "NaN" + }, + { + "timsHalfWidth": "0" + }, + { + "timsStep": "0" + }, + { + "timsResolution": "0" + }, + { + "timsMinMsmsIntensity": "0" + }, + { + "timsRemovePrecursor": "true" + }, + { + "crosslinkSearch": "false" + }, + { + "crosslinkMaxMonoUnsaturated": "0" + }, + { + "crosslinkMaxMonoSaturated": "0" + }, + { + "crosslinkMaxDiUnsaturated": "0" + }, + { + "crosslinkMaxDiSaturated": "0" + }, + { + "crosslinkUseSeparateFasta": "false" + }, + { + "crosslinkFastaFiles": null + }, + { + "crosslinkMode": "PeptidesWithCleavedLinker" + }, + { + "lcmsRunType": "Standard" + }, + { + "lfqMode": "1" + }, + { + "enzymeMode": "0" + }, + { + "enzymeModeFirstSearch": "0" + } + ] + }, + "fixedModifications": { + "string": "Carbamidomethyl (C)" + }, + "fastaFiles": { + "string": "/users/user/EuBIC benchmarking\n project\\MQ15330_MBR\\BenchmarkFASTAModule1_DDA_NOCONTA.fasta" + }, + "fastaFilesFirstSearch": null, + "fixedSearchFolder": null, + "advancedRatios": "true", + "rtShift": "false", + "separateLfq": "false", + "lfqStabilizeLargeRatios": "true", + "lfqRequireMsms": "true", + "decoyMode": "revert", + "includeContaminants": "true", + "topxWindow": "100", + "maxPeptideMass": "4600", + "epsilonMutationScore": "true", + "mutatedPeptidesSeparately": "true", + "minDeltaScoreUnmodifiedPeptides": "0", + "minDeltaScoreModifiedPeptides": "6", + "minScoreUnmodifiedPeptides": "0", + "minScoreModifiedPeptides": "40", + "secondPeptide": "true", + "matchBetweenRuns": "true", + "matchUnidentifiedFeatures": "false", + "matchBetweenRunsFdr": "false", + "dependentPeptides": "false", + "dependentPeptideFdr": "0", + "dependentPeptideMassBin": "0", + "msmsConnection": "false", + "ibaq": "false", + "useDeltaScore": "false", + "splitProteinGroupsByTaxonomy": "false", + "taxonomyLevel": "Species", + "avalon": "false", + "ibaqLogFit": "false", + "razorProteinFdr": "true", + "deNovoSequencing": "false", + "deNovoVarMods": "true", + "massDifferenceSearch": "false", + "minPepLen": "7", + "peptideFdr": "0.01", + "proteinFdr": "0.01", + "siteFdr": "0.01", + "minPeptideLengthForUnspecificSearch": "8", + "maxPeptideLengthForUnspecificSearch": "25", + "useNormRatiosForOccupancy": "true", + "minPeptides": "1", + "minRazorPeptides": "1", + "minUniquePeptides": "0", + "useCounterparts": "false", + "advancedSiteIntensities": "true", + "customProteinQuantification": "false", + "customProteinQuantificationFile": null, + "minRatioCount": "2", + "restrictProteinQuantification": "true", + "restrictMods": [ + { + "string": "Oxidation (M)" + }, + { + "string": "Acetyl (Protein N-term)" + } + ], + "matchingTimeWindow": "0.7", + "alignmentTimeWindow": "20", + "numberOfCandidatesMultiplexedMsms": "25", + "numberOfCandidatesMsms": "15", + "massDifferenceMods": null, + "mainSearchMaxCombinations": "200", + "msmsParamsArray": [ + { + "msmsParams": { + "MatchTolerance": "20", + "DeisotopeTolerance": "7", + "DeNovoTolerance": "10" + } + }, + { + "msmsParams": { + "MatchTolerance": "0.5", + "DeisotopeTolerance": "0.15", + "DeNovoTolerance": "0.25" + } + }, + { + "msmsParams": { + "MatchTolerance": "40", + "DeisotopeTolerance": "0.01", + "DeNovoTolerance": "0.02" + } + }, + { + "msmsParams": { + "MatchTolerance": "0.5", + "DeisotopeTolerance": "0.15", + "DeNovoTolerance": "0.25" + } + } + ], + "compositionPrediction": "0", + "quantMode": "1", + "variationMode": "none" +} \ No newline at end of file diff --git a/test/params/mqpar1.5.3.30_MBR.xml b/test/params/mqpar1.5.3.30_MBR.xml new file mode 100644 index 00000000..f5a57546 --- /dev/null +++ b/test/params/mqpar1.5.3.30_MBR.xml @@ -0,0 +1,253 @@ + + + Session1 + 1.5.3.30 + + 3 + false + + false + false + -1.7976931348623157E+308 + 1.7976931348623157E+308 + false + false + false + + /users/user/EuBIC benchmarking + project\MQ15330_MBR\LFQ_Orbitrap_DDA_Condition_A_Sample_Alpha_01.raw + /users/user/EuBIC benchmarking + project\MQ15330_MBR\LFQ_Orbitrap_DDA_Condition_A_Sample_Alpha_02.raw + /users/user/EuBIC benchmarking + project\MQ15330_MBR\LFQ_Orbitrap_DDA_Condition_A_Sample_Alpha_03.raw + /users/user/EuBIC benchmarking + project\MQ15330_MBR\LFQ_Orbitrap_DDA_Condition_B_Sample_Alpha_01.raw + /users/user/EuBIC benchmarking + project\MQ15330_MBR\LFQ_Orbitrap_DDA_Condition_B_Sample_Alpha_02.raw + /users/user/EuBIC benchmarking + project\MQ15330_MBR\LFQ_Orbitrap_DDA_Condition_B_Sample_Alpha_03.raw + + + A_Sample_Alpha_01 + A_Sample_Alpha_02 + A_Sample_Alpha_03 + B_Sample_Alpha_01 + B_Sample_Alpha_02 + B_Sample_Alpha_03 + + + 32767 + 32767 + 32767 + 32767 + 32767 + 32767 + + + 0 + 0 + 0 + 0 + 0 + 0 + + + + 7 + 2 + false + false + true + 1 + NaN + NaN + MatchFromAndTo + 8 + true + 35 + true + 1.4 + false + 500 + 0 + 0 + + + + false + false + 3 + 6 + 100000 + true + false + 2 + true + 0 + 5 + 2 + 1 + + Trypsin/P + + + false + false + + Oxidation (M) + Acetyl (Protein N-term) + + false + + + + false + + + true + 20 + 4.5 + true + 2 + true + 0.6 + 0.6 + true + false + 70 + false + + 0 + 0 + 0 + NaN + NaN + false + NaN + NaN + 0 + 0 + 0 + 0 + true + false + 0 + 0 + 0 + 0 + false + + PeptidesWithCleavedLinker + Standard + 1 + 0 + 0 + + + + Carbamidomethyl (C) + + + /users/user/EuBIC benchmarking + project\MQ15330_MBR\BenchmarkFASTAModule1_DDA_NOCONTA.fasta + + + + true + false + false + true + true + revert + true + 100 + 4600 + true + true + 0 + 6 + 0 + 40 + true + true + false + false + false + 0 + 0 + false + false + false + false + Species + false + false + true + false + true + false + 7 + 0.01 + 0.01 + 0.01 + 8 + 25 + true + 1 + 1 + 0 + false + true + false + + 2 + true + + Oxidation (M) + Acetyl (Protein N-term) + + 0.7 + 20 + 25 + 15 + + 200 + + + 20 + 7 + 10 + + + 0.5 + 0.15 + 0.25 + + + 40 + 0.01 + 0.02 + + + 0.5 + 0.15 + 0.25 + + + 0 + 1 + none + \ No newline at end of file diff --git a/test/params/mqpar_MQ1.6.3.3_MBR.json b/test/params/mqpar_MQ1.6.3.3_MBR.json new file mode 100644 index 00000000..4d3b9a74 --- /dev/null +++ b/test/params/mqpar_MQ1.6.3.3_MBR.json @@ -0,0 +1,762 @@ +{ + "fastaFiles": { + "FastaFileInfo": [ + { + "fastaFilePath": "/users/user/EuBIC benchmarking\n project\\MQ1633_MBR\\BenchmarkFASTAModule1_DDA_NOCONTA.fasta" + }, + { + "identifierParseRule": ">([^\\s]*)" + }, + { + "descriptionParseRule": ">(.*)" + }, + { + "taxonomyParseRule": null + }, + { + "variationParseRule": null + }, + { + "modificationParseRule": null + }, + { + "taxonomyId": null + } + ] + }, + "fastaFilesProteogenomics": null, + "fastaFilesFirstSearch": null, + "fixedSearchFolder": null, + "andromedaCacheSize": "350000", + "advancedRatios": "True", + "pvalThres": "0.005", + "neucodeRatioBasedQuantification": "False", + "neucodeStabilizeLargeRatios": "False", + "rtShift": "False", + "separateLfq": "False", + "lfqStabilizeLargeRatios": "True", + "lfqRequireMsms": "True", + "decoyMode": "revert", + "boxCarMode": "all", + "includeContaminants": "True", + "maxPeptideMass": "4600", + "epsilonMutationScore": "True", + "mutatedPeptidesSeparately": "True", + "proteogenomicPeptidesSeparately": "True", + "minDeltaScoreUnmodifiedPeptides": "0", + "minDeltaScoreModifiedPeptides": "6", + "minScoreUnmodifiedPeptides": "0", + "minScoreModifiedPeptides": "40", + "secondPeptide": "True", + "matchBetweenRuns": "True", + "matchUnidentifiedFeatures": "False", + "matchBetweenRunsFdr": "False", + "dependentPeptides": "False", + "dependentPeptideFdr": "0", + "dependentPeptideMassBin": "0", + "dependentPeptidesBetweenRuns": "False", + "dependentPeptidesWithinExperiment": "False", + "dependentPeptidesWithinParameterGroup": "False", + "dependentPeptidesRestrictFractions": "False", + "dependentPeptidesFractionDifference": "0", + "msmsConnection": "False", + "ibaq": "False", + "top3": "False", + "independentEnzymes": "False", + "useDeltaScore": "False", + "splitProteinGroupsByTaxonomy": "False", + "taxonomyLevel": "Species", + "avalon": "False", + "nModColumns": "3", + "ibaqLogFit": "False", + "razorProteinFdr": "True", + "deNovoSequencing": "False", + "deNovoVarMods": "True", + "massDifferenceSearch": "False", + "isotopeCalc": "False", + "writePeptidesForSpectrumFile": null, + "intensityPredictionsFile": null, + "minPepLen": "7", + "psmFdrCrosslink": "0.01", + "peptideFdr": "0.01", + "proteinFdr": "0.01", + "siteFdr": "0.01", + "minPeptideLengthForUnspecificSearch": "8", + "maxPeptideLengthForUnspecificSearch": "25", + "useNormRatiosForOccupancy": "True", + "minPeptides": "1", + "minRazorPeptides": "1", + "minUniquePeptides": "0", + "useCounterparts": "False", + "advancedSiteIntensities": "True", + "customProteinQuantification": "False", + "customProteinQuantificationFile": null, + "minRatioCount": "2", + "restrictProteinQuantification": "True", + "restrictMods": [ + { + "string": "Oxidation (M)" + }, + { + "string": "Acetyl (Protein N-term)" + } + ], + "matchingTimeWindow": "0.7", + "alignmentTimeWindow": "20", + "numberOfCandidatesMultiplexedMsms": "25", + "numberOfCandidatesMsms": "15", + "compositionPrediction": "0", + "quantMode": "1", + "massDifferenceMods": null, + "mainSearchMaxCombinations": "200", + "writeMsScansTable": "False", + "writeMsmsScansTable": "True", + "writePasefMsmsScansTable": "True", + "writeAccumulatedPasefMsmsScansTable": "True", + "writeMs3ScansTable": "True", + "writeAllPeptidesTable": "True", + "writeMzRangeTable": "True", + "writeMzTab": "False", + "disableMd5": "False", + "cacheBinInds": "True", + "etdIncludeB": "False", + "complementaryTmtCollapseNplets": "True", + "stackPeaks": "False", + "ms2PrecursorShift": "0", + "complementaryIonPpm": "20", + "variationParseRule": null, + "variationMode": "none", + "useSeriesReporters": "False", + "name": "session1", + "maxQuantVersion": "1.6.3.3", + "tempFolder": null, + "pluginFolder": null, + "numThreads": "6", + "emailAddress": null, + "smtpHost": null, + "emailFromAddress": null, + "fixedCombinedFolder": null, + "fullMinMz": "-1.79769313486232E+308", + "fullMaxMz": "1.79769313486232E+308", + "sendEmail": "False", + "ionCountIntensities": "False", + "verboseColumnHeaders": "False", + "calcPeakProperties": "False", + "showCentroidMassDifferences": "False", + "showIsotopeMassDifferences": "False", + "useDotNetCore": "False", + "filePaths": [ + { + "string": "/users/user/EuBIC benchmarking\n project\\MQ1633_MBR\\LFQ_Orbitrap_DDA_Condition_A_Sample_Alpha_01.raw" + }, + { + "string": "/users/user/EuBIC benchmarking\n project\\MQ1633_MBR\\LFQ_Orbitrap_DDA_Condition_A_Sample_Alpha_02.raw" + }, + { + "string": "/users/user/EuBIC benchmarking\n project\\MQ1633_MBR\\LFQ_Orbitrap_DDA_Condition_A_Sample_Alpha_03.raw" + }, + { + "string": "/users/user/EuBIC benchmarking\n project\\MQ1633_MBR\\LFQ_Orbitrap_DDA_Condition_B_Sample_Alpha_01.raw" + }, + { + "string": "/users/user/EuBIC benchmarking\n project\\MQ1633_MBR\\LFQ_Orbitrap_DDA_Condition_B_Sample_Alpha_02.raw" + }, + { + "string": "/users/user/EuBIC benchmarking\n project\\MQ1633_MBR\\LFQ_Orbitrap_DDA_Condition_B_Sample_Alpha_03.raw" + } + ], + "experiments": [ + { + "string": "A_Sample_Alpha_01" + }, + { + "string": "A_Sample_Alpha_02" + }, + { + "string": "A_Sample_Alpha_03" + }, + { + "string": "B_Sample_Alpha_01" + }, + { + "string": "B_Sample_Alpha_02" + }, + { + "string": "B_Sample_Alpha_03" + } + ], + "fractions": [ + { + "short": "32767" + }, + { + "short": "32767" + }, + { + "short": "32767" + }, + { + "short": "32767" + }, + { + "short": "32767" + }, + { + "short": "32767" + } + ], + "ptms": [ + { + "boolean": "False" + }, + { + "boolean": "False" + }, + { + "boolean": "False" + }, + { + "boolean": "False" + }, + { + "boolean": "False" + }, + { + "boolean": "False" + } + ], + "paramGroupIndices": [ + { + "int": "0" + }, + { + "int": "0" + }, + { + "int": "0" + }, + { + "int": "0" + }, + { + "int": "0" + }, + { + "int": "0" + } + ], + "referenceChannel": [ + { + "string": null + }, + { + "string": null + }, + { + "string": null + }, + { + "string": null + }, + { + "string": null + }, + { + "string": null + } + ], + "parameterGroups": { + "parameterGroup": [ + { + "msInstrument": "0" + }, + { + "maxCharge": "7" + }, + { + "minPeakLen": "2" + }, + { + "useMs1Centroids": "False" + }, + { + "useMs2Centroids": "False" + }, + { + "cutPeaks": "True" + }, + { + "gapScans": "1" + }, + { + "minTime": "NaN" + }, + { + "maxTime": "NaN" + }, + { + "matchType": "MatchFromAndTo" + }, + { + "intensityDetermination": "0" + }, + { + "centroidMatchTol": "8" + }, + { + "centroidMatchTolInPpm": "True" + }, + { + "centroidHalfWidth": "35" + }, + { + "centroidHalfWidthInPpm": "True" + }, + { + "valleyFactor": "1.4" + }, + { + "isotopeValleyFactor": "1.2" + }, + { + "advancedPeakSplitting": "False" + }, + { + "intensityThreshold": "0" + }, + { + "labelMods": { + "string": null + } + }, + { + "lcmsRunType": "Standard" + }, + { + "reQuantify": "False" + }, + { + "lfqMode": "0" + }, + { + "lfqSkipNorm": "False" + }, + { + "lfqMinEdgesPerNode": "3" + }, + { + "lfqAvEdgesPerNode": "6" + }, + { + "lfqMaxFeatures": "100000" + }, + { + "neucodeMaxPpm": "0" + }, + { + "neucodeResolution": "0" + }, + { + "neucodeResolutionInMda": "False" + }, + { + "neucodeInSilicoLowRes": "False" + }, + { + "fastLfq": "True" + }, + { + "lfqRestrictFeatures": "False" + }, + { + "lfqMinRatioCount": "2" + }, + { + "maxLabeledAa": "0" + }, + { + "maxNmods": "5" + }, + { + "maxMissedCleavages": "2" + }, + { + "multiplicity": "1" + }, + { + "enzymeMode": "0" + }, + { + "complementaryReporterType": "0" + }, + { + "reporterNormalization": "0" + }, + { + "neucodeIntensityMode": "0" + }, + { + "fixedModifications": { + "string": "Carbamidomethyl (C)" + } + }, + { + "enzymes": { + "string": "Trypsin/P" + } + }, + { + "enzymesFirstSearch": null + }, + { + "enzymeModeFirstSearch": "0" + }, + { + "useEnzymeFirstSearch": "False" + }, + { + "useVariableModificationsFirstSearch": "False" + }, + { + "variableModifications": { + "string": [ + "Oxidation (M)", + "Acetyl (Protein N-term)" + ] + } + }, + { + "useMultiModification": "False" + }, + { + "multiModifications": null + }, + { + "isobaricLabels": null + }, + { + "neucodeLabels": null + }, + { + "variableModificationsFirstSearch": null + }, + { + "hasAdditionalVariableModifications": "False" + }, + { + "additionalVariableModifications": null + }, + { + "additionalVariableModificationProteins": null + }, + { + "doMassFiltering": "True" + }, + { + "firstSearchTol": "20" + }, + { + "mainSearchTol": "4.5" + }, + { + "searchTolInPpm": "True" + }, + { + "isotopeMatchTol": "2" + }, + { + "isotopeMatchTolInPpm": "True" + }, + { + "isotopeTimeCorrelation": "0.6" + }, + { + "theorIsotopeCorrelation": "0.6" + }, + { + "checkMassDeficit": "True" + }, + { + "recalibrationInPpm": "True" + }, + { + "intensityDependentCalibration": "False" + }, + { + "minScoreForCalibration": "70" + }, + { + "matchLibraryFile": "False" + }, + { + "libraryFile": null + }, + { + "matchLibraryMassTolPpm": "0" + }, + { + "matchLibraryTimeTolMin": "0" + }, + { + "matchLabelTimeTolMin": "0" + }, + { + "reporterMassTolerance": "NaN" + }, + { + "reporterPif": "NaN" + }, + { + "filterPif": "False" + }, + { + "reporterFraction": "NaN" + }, + { + "reporterBasePeakRatio": "NaN" + }, + { + "timsHalfWidth": "0" + }, + { + "timsStep": "0" + }, + { + "timsResolution": "0" + }, + { + "timsMinMsmsIntensity": "0" + }, + { + "timsRemovePrecursor": "True" + }, + { + "timsIsobaricLabels": "False" + }, + { + "timsCollapseMsms": "True" + }, + { + "crosslinkSearch": "False" + }, + { + "crossLinker": null + }, + { + "minMatchXl": "0" + }, + { + "minPairedPepLenXl": "6" + }, + { + "crosslinkOnlyIntraProtein": "False" + }, + { + "crosslinkMaxMonoUnsaturated": "0" + }, + { + "crosslinkMaxMonoSaturated": "0" + }, + { + "crosslinkMaxDiUnsaturated": "0" + }, + { + "crosslinkMaxDiSaturated": "0" + }, + { + "crosslinkUseSeparateFasta": "False" + }, + { + "crosslinkCleaveModifications": null + }, + { + "crosslinkFastaFiles": null + }, + { + "crosslinkMode": "PeptidesWithCleavedLinker" + }, + { + "peakRefinement": "False" + }, + { + "isobaricSumOverWindow": "True" + } + ] + }, + "msmsParamsArray": [ + { + "msmsParams": { + "Name": "FTMS", + "MatchTolerance": "20", + "MatchToleranceInPpm": "True", + "DeisotopeTolerance": "7", + "DeisotopeToleranceInPpm": "True", + "DeNovoTolerance": "10", + "DeNovoToleranceInPpm": "True", + "Deisotope": "True", + "Topx": "12", + "TopxInterval": "100", + "HigherCharges": "True", + "IncludeWater": "True", + "IncludeAmmonia": "True", + "DependentLosses": "True", + "Recalibration": "False" + } + }, + { + "msmsParams": { + "Name": "ITMS", + "MatchTolerance": "0.5", + "MatchToleranceInPpm": "False", + "DeisotopeTolerance": "0.15", + "DeisotopeToleranceInPpm": "False", + "DeNovoTolerance": "0.25", + "DeNovoToleranceInPpm": "False", + "Deisotope": "False", + "Topx": "8", + "TopxInterval": "100", + "HigherCharges": "True", + "IncludeWater": "True", + "IncludeAmmonia": "True", + "DependentLosses": "True", + "Recalibration": "False" + } + }, + { + "msmsParams": { + "Name": "TOF", + "MatchTolerance": "40", + "MatchToleranceInPpm": "True", + "DeisotopeTolerance": "0.01", + "DeisotopeToleranceInPpm": "False", + "DeNovoTolerance": "0.02", + "DeNovoToleranceInPpm": "False", + "Deisotope": "True", + "Topx": "10", + "TopxInterval": "100", + "HigherCharges": "True", + "IncludeWater": "True", + "IncludeAmmonia": "True", + "DependentLosses": "True", + "Recalibration": "False" + } + }, + { + "msmsParams": { + "Name": "Unknown", + "MatchTolerance": "0.5", + "MatchToleranceInPpm": "False", + "DeisotopeTolerance": "0.15", + "DeisotopeToleranceInPpm": "False", + "DeNovoTolerance": "0.25", + "DeNovoToleranceInPpm": "False", + "Deisotope": "False", + "Topx": "8", + "TopxInterval": "100", + "HigherCharges": "True", + "IncludeWater": "True", + "IncludeAmmonia": "True", + "DependentLosses": "True", + "Recalibration": "False" + } + } + ], + "fragmentationParamsArray": [ + { + "fragmentationParams": { + "Name": "CID", + "Connected": "False", + "ConnectedScore0": "1", + "ConnectedScore1": "1", + "ConnectedScore2": "1", + "InternalFragments": "False", + "InternalFragmentWeight": "1", + "InternalFragmentAas": "KRH" + } + }, + { + "fragmentationParams": { + "Name": "HCD", + "Connected": "False", + "ConnectedScore0": "1", + "ConnectedScore1": "1", + "ConnectedScore2": "1", + "InternalFragments": "False", + "InternalFragmentWeight": "1", + "InternalFragmentAas": "KRH" + } + }, + { + "fragmentationParams": { + "Name": "ETD", + "Connected": "False", + "ConnectedScore0": "1", + "ConnectedScore1": "1", + "ConnectedScore2": "1", + "InternalFragments": "False", + "InternalFragmentWeight": "1", + "InternalFragmentAas": "KRH" + } + }, + { + "fragmentationParams": { + "Name": "PQD", + "Connected": "False", + "ConnectedScore0": "1", + "ConnectedScore1": "1", + "ConnectedScore2": "1", + "InternalFragments": "False", + "InternalFragmentWeight": "1", + "InternalFragmentAas": "KRH" + } + }, + { + "fragmentationParams": { + "Name": "ETHCD", + "Connected": "False", + "ConnectedScore0": "1", + "ConnectedScore1": "1", + "ConnectedScore2": "1", + "InternalFragments": "False", + "InternalFragmentWeight": "1", + "InternalFragmentAas": "KRH" + } + }, + { + "fragmentationParams": { + "Name": "ETCID", + "Connected": "False", + "ConnectedScore0": "1", + "ConnectedScore1": "1", + "ConnectedScore2": "1", + "InternalFragments": "False", + "InternalFragmentWeight": "1", + "InternalFragmentAas": "KRH" + } + }, + { + "fragmentationParams": { + "Name": "UVPD", + "Connected": "False", + "ConnectedScore0": "1", + "ConnectedScore1": "1", + "ConnectedScore2": "1", + "InternalFragments": "False", + "InternalFragmentWeight": "1", + "InternalFragmentAas": "KRH" + } + }, + { + "fragmentationParams": { + "Name": "Unknown", + "Connected": "False", + "ConnectedScore0": "1", + "ConnectedScore1": "1", + "ConnectedScore2": "1", + "InternalFragments": "False", + "InternalFragmentWeight": "1", + "InternalFragmentAas": "KRH" + } + } + ] +} \ No newline at end of file diff --git a/test/params/mqpar_MQ1.6.3.3_MBR.xml b/test/params/mqpar_MQ1.6.3.3_MBR.xml new file mode 100644 index 00000000..fe725e9e --- /dev/null +++ b/test/params/mqpar_MQ1.6.3.3_MBR.xml @@ -0,0 +1,466 @@ + + + + + /users/user/EuBIC benchmarking + project\MQ1633_MBR\BenchmarkFASTAModule1_DDA_NOCONTA.fasta + >([^\s]*) + >(.*) + + + + + + + + + + + + 350000 + True + 0.005 + False + False + False + False + True + True + revert + all + True + 4600 + True + True + True + 0 + 6 + 0 + 40 + True + True + False + False + False + 0 + 0 + False + False + False + False + 0 + False + False + False + False + False + False + Species + False + 3 + False + True + False + True + False + False + + + + 7 + 0.01 + 0.01 + 0.01 + 0.01 + 8 + 25 + True + 1 + 1 + 0 + False + True + False + + 2 + True + + Oxidation (M) + Acetyl (Protein N-term) + + 0.7 + 20 + 25 + 15 + 0 + 1 + + + 200 + False + True + True + True + True + True + True + False + False + True + False + True + False + 0 + 20 + + none + False + session1 + 1.6.3.3 + + + 6 + + + + + -1.79769313486232E+308 + 1.79769313486232E+308 + False + False + False + False + False + False + False + + /users/user/EuBIC benchmarking + project\MQ1633_MBR\LFQ_Orbitrap_DDA_Condition_A_Sample_Alpha_01.raw + /users/user/EuBIC benchmarking + project\MQ1633_MBR\LFQ_Orbitrap_DDA_Condition_A_Sample_Alpha_02.raw + /users/user/EuBIC benchmarking + project\MQ1633_MBR\LFQ_Orbitrap_DDA_Condition_A_Sample_Alpha_03.raw + /users/user/EuBIC benchmarking + project\MQ1633_MBR\LFQ_Orbitrap_DDA_Condition_B_Sample_Alpha_01.raw + /users/user/EuBIC benchmarking + project\MQ1633_MBR\LFQ_Orbitrap_DDA_Condition_B_Sample_Alpha_02.raw + /users/user/EuBIC benchmarking + project\MQ1633_MBR\LFQ_Orbitrap_DDA_Condition_B_Sample_Alpha_03.raw + + + A_Sample_Alpha_01 + A_Sample_Alpha_02 + A_Sample_Alpha_03 + B_Sample_Alpha_01 + B_Sample_Alpha_02 + B_Sample_Alpha_03 + + + 32767 + 32767 + 32767 + 32767 + 32767 + 32767 + + + False + False + False + False + False + False + + + 0 + 0 + 0 + 0 + 0 + 0 + + + + + + + + + + + + 0 + 7 + 2 + False + False + True + 1 + NaN + NaN + MatchFromAndTo + 0 + 8 + True + 35 + True + 1.4 + 1.2 + False + 0 + + + + Standard + False + 0 + False + 3 + 6 + 100000 + 0 + 0 + False + False + True + False + 2 + 0 + 5 + 2 + 1 + 0 + 0 + 0 + 0 + + Carbamidomethyl (C) + + + Trypsin/P + + + + 0 + False + False + + Oxidation (M) + Acetyl (Protein N-term) + + False + + + + + + + + + False + + + + + True + 20 + 4.5 + True + 2 + True + 0.6 + 0.6 + True + True + False + 70 + False + + 0 + 0 + 0 + NaN + NaN + False + NaN + NaN + 0 + 0 + 0 + 0 + True + False + True + False + + 0 + 6 + False + 0 + 0 + 0 + 0 + False + + + + + PeptidesWithCleavedLinker + False + True + + + + + FTMS + 20 + True + 7 + True + 10 + True + True + 12 + 100 + True + True + True + True + False + + + ITMS + 0.5 + False + 0.15 + False + 0.25 + False + False + 8 + 100 + True + True + True + True + False + + + TOF + 40 + True + 0.01 + False + 0.02 + False + True + 10 + 100 + True + True + True + True + False + + + Unknown + 0.5 + False + 0.15 + False + 0.25 + False + False + 8 + 100 + True + True + True + True + False + + + + + CID + False + 1 + 1 + 1 + False + 1 + KRH + + + HCD + False + 1 + 1 + 1 + False + 1 + KRH + + + ETD + False + 1 + 1 + 1 + False + 1 + KRH + + + PQD + False + 1 + 1 + 1 + False + 1 + KRH + + + ETHCD + False + 1 + 1 + 1 + False + 1 + KRH + + + ETCID + False + 1 + 1 + 1 + False + 1 + KRH + + + UVPD + False + 1 + 1 + 1 + False + 1 + KRH + + + Unknown + False + 1 + 1 + 1 + False + 1 + KRH + + + \ No newline at end of file diff --git a/test/params/mqpar_MQ2.1.3.0_noMBR.json b/test/params/mqpar_MQ2.1.3.0_noMBR.json new file mode 100644 index 00000000..1e14b0df --- /dev/null +++ b/test/params/mqpar_MQ2.1.3.0_noMBR.json @@ -0,0 +1,945 @@ +{ + "fastaFiles": { + "FastaFileInfo": [ + { + "fastaFilePath": "/users/user/EuBIC benchmarking\n project\\MQ2130_noMBR\\BenchmarkFASTAModule1_DDA_NOCONTA.fasta" + }, + { + "identifierParseRule": ">([^\\s]*)" + }, + { + "descriptionParseRule": ">(.*)" + }, + { + "taxonomyParseRule": null + }, + { + "variationParseRule": null + }, + { + "modificationParseRule": null + }, + { + "taxonomyId": null + } + ] + }, + "fastaFilesProteogenomics": null, + "fastaFilesFirstSearch": null, + "fixedSearchFolder": null, + "andromedaCacheSize": "350000", + "advancedRatios": "True", + "pvalThres": "0.005", + "rtShift": "False", + "separateLfq": "False", + "lfqStabilizeLargeRatios": "True", + "lfqRequireMsms": "True", + "lfqBayesQuant": "False", + "decoyMode": "revert", + "includeContaminants": "True", + "maxPeptideMass": "4600", + "epsilonMutationScore": "True", + "mutatedPeptidesSeparately": "True", + "proteogenomicPeptidesSeparately": "True", + "minDeltaScoreUnmodifiedPeptides": "0", + "minDeltaScoreModifiedPeptides": "6", + "minScoreUnmodifiedPeptides": "0", + "minScoreModifiedPeptides": "40", + "secondPeptide": "True", + "matchBetweenRuns": "False", + "matchUnidentifiedFeatures": "False", + "matchBetweenRunsFdr": "False", + "dependentPeptides": "False", + "dependentPeptideFdr": "0", + "dependentPeptideMassBin": "0", + "dependentPeptidesBetweenRuns": "False", + "dependentPeptidesWithinExperiment": "False", + "dependentPeptidesWithinParameterGroup": "False", + "dependentPeptidesRestrictFractions": "False", + "dependentPeptidesFractionDifference": "0", + "ibaq": "False", + "top3": "False", + "independentEnzymes": "False", + "useDeltaScore": "False", + "splitProteinGroupsByTaxonomy": "False", + "taxonomyLevel": "Species", + "avalon": "False", + "nModColumns": "3", + "ibaqLogFit": "False", + "ibaqChargeNormalization": "False", + "razorProteinFdr": "True", + "deNovoSequencing": "False", + "deNovoVarMods": "False", + "deNovoCompleteSequence": "False", + "deNovoCalibratedMasses": "False", + "deNovoMaxIterations": "0", + "deNovoProteaseReward": "0", + "deNovoProteaseRewardTof": "0", + "deNovoAgPenalty": "0", + "deNovoGgPenalty": "0", + "deNovoUseComplementScore": "True", + "deNovoUseProteaseScore": "True", + "deNovoUseWaterLossScore": "True", + "deNovoUseAmmoniaLossScore": "True", + "deNovoUseA2Score": "True", + "deNovoScalingFactor": "0", + "massDifferenceSearch": "False", + "isotopeCalc": "False", + "minPepLen": "7", + "psmFdrCrosslink": "0.01", + "peptideFdr": "0.01", + "proteinFdr": "0.01", + "siteFdr": "0.01", + "minPeptideLengthForUnspecificSearch": "8", + "maxPeptideLengthForUnspecificSearch": "25", + "useNormRatiosForOccupancy": "True", + "minPeptides": "1", + "minRazorPeptides": "1", + "minUniquePeptides": "0", + "useCounterparts": "False", + "advancedSiteIntensities": "True", + "customProteinQuantification": "False", + "customProteinQuantificationFile": null, + "minRatioCount": "2", + "restrictProteinQuantification": "True", + "restrictMods": [ + { + "string": "Oxidation (M)" + }, + { + "string": "Acetyl (Protein N-term)" + } + ], + "matchingTimeWindow": "0", + "matchingIonMobilityWindow": "0", + "alignmentTimeWindow": "0", + "alignmentIonMobilityWindow": "0", + "numberOfCandidatesMsms": "15", + "compositionPrediction": "0", + "quantMode": "1", + "massDifferenceMods": null, + "mainSearchMaxCombinations": "200", + "writeMsScansTable": "False", + "writeMsmsScansTable": "True", + "writePasefMsmsScansTable": "True", + "writeAccumulatedMsmsScansTable": "True", + "writeMs3ScansTable": "True", + "writeAllPeptidesTable": "True", + "writeMzRangeTable": "True", + "writeDiaFragmentTable": "False", + "writeDiaFragmentQuantTable": "False", + "writeMzTab": "False", + "writeSdrf": "False", + "disableMd5": "False", + "cacheBinInds": "True", + "etdIncludeB": "False", + "ms2PrecursorShift": "0", + "complementaryIonPpm": "20", + "variationParseRule": null, + "variationMode": "none", + "useSeriesReporters": "False", + "name": "session1", + "maxQuantVersion": "2.1.3.0", + "pluginFolder": null, + "numThreads": "4", + "emailAddress": null, + "smtpHost": null, + "emailFromAddress": null, + "fixedCombinedFolder": null, + "fullMinMz": "-1.79769313486232E+308", + "fullMaxMz": "1.79769313486232E+308", + "sendEmail": "False", + "ionCountIntensities": "False", + "verboseColumnHeaders": "False", + "calcPeakProperties": "False", + "showCentroidMassDifferences": "False", + "showIsotopeMassDifferences": "False", + "useDotNetCore": "True", + "profilePerformance": "False", + "filePaths": [ + { + "string": "/users/user/EuBIC benchmarking\n project\\MQ2130_noMBR\\LFQ_Orbitrap_DDA_Condition_A_Sample_Alpha_01.raw" + }, + { + "string": "/users/user/EuBIC benchmarking\n project\\MQ2130_noMBR\\LFQ_Orbitrap_DDA_Condition_A_Sample_Alpha_02.raw" + }, + { + "string": "/users/user/EuBIC benchmarking\n project\\MQ2130_noMBR\\LFQ_Orbitrap_DDA_Condition_A_Sample_Alpha_03.raw" + }, + { + "string": "/users/user/EuBIC benchmarking\n project\\MQ2130_noMBR\\LFQ_Orbitrap_DDA_Condition_B_Sample_Alpha_01.raw" + }, + { + "string": "/users/user/EuBIC benchmarking\n project\\MQ2130_noMBR\\LFQ_Orbitrap_DDA_Condition_B_Sample_Alpha_02.raw" + }, + { + "string": "/users/user/EuBIC benchmarking\n project\\MQ2130_noMBR\\LFQ_Orbitrap_DDA_Condition_B_Sample_Alpha_03.raw" + } + ], + "experiments": [ + { + "string": "A_Sample_Alpha_01" + }, + { + "string": "A_Sample_Alpha_02" + }, + { + "string": "A_Sample_Alpha_03" + }, + { + "string": "B_Sample_Alpha_01" + }, + { + "string": "B_Sample_Alpha_02" + }, + { + "string": "B_Sample_Alpha_03" + } + ], + "fractions": [ + { + "short": "32767" + }, + { + "short": "32767" + }, + { + "short": "32767" + }, + { + "short": "32767" + }, + { + "short": "32767" + }, + { + "short": "32767" + } + ], + "ptms": [ + { + "boolean": "False" + }, + { + "boolean": "False" + }, + { + "boolean": "False" + }, + { + "boolean": "False" + }, + { + "boolean": "False" + }, + { + "boolean": "False" + } + ], + "paramGroupIndices": [ + { + "int": "0" + }, + { + "int": "0" + }, + { + "int": "0" + }, + { + "int": "0" + }, + { + "int": "0" + }, + { + "int": "0" + } + ], + "referenceChannel": [ + { + "string": null + }, + { + "string": null + }, + { + "string": null + }, + { + "string": null + }, + { + "string": null + }, + { + "string": null + } + ], + "lfqTopNPeptides": "0", + "diaJoinPrecChargesForLfq": "False", + "diaFragChargesForQuant": "1", + "gridSpacing": "0.7", + "proteinGroupingFile": null, + "parameterGroups": { + "parameterGroup": [ + { + "msInstrument": "0" + }, + { + "maxCharge": "7" + }, + { + "minPeakLen": "2" + }, + { + "diaMinPeakLen": "1" + }, + { + "useMs1Centroids": "False" + }, + { + "useMs2Centroids": "False" + }, + { + "cutPeaks": "True" + }, + { + "gapScans": "1" + }, + { + "minTime": "NaN" + }, + { + "maxTime": "NaN" + }, + { + "matchType": "MatchFromAndTo" + }, + { + "intensityDetermination": "0" + }, + { + "centroidMatchTol": "8" + }, + { + "centroidMatchTolInPpm": "True" + }, + { + "centroidHalfWidth": "35" + }, + { + "centroidHalfWidthInPpm": "True" + }, + { + "valleyFactor": "1.4" + }, + { + "isotopeValleyFactor": "1.2" + }, + { + "advancedPeakSplitting": "False" + }, + { + "intensityThresholdMs1": "0" + }, + { + "intensityThresholdMs2": "0" + }, + { + "labelMods": { + "string": null + } + }, + { + "lcmsRunType": "Standard" + }, + { + "reQuantify": "False" + }, + { + "lfqMode": "1" + }, + { + "lfqNormClusterSize": "80" + }, + { + "lfqMinEdgesPerNode": "3" + }, + { + "lfqAvEdgesPerNode": "6" + }, + { + "lfqMaxFeatures": "100000" + }, + { + "neucodeMaxPpm": "0" + }, + { + "neucodeResolution": "0" + }, + { + "neucodeResolutionInMda": "False" + }, + { + "neucodeInSilicoLowRes": "False" + }, + { + "fastLfq": "True" + }, + { + "lfqRestrictFeatures": "False" + }, + { + "lfqMinRatioCount": "2" + }, + { + "maxLabeledAa": "0" + }, + { + "maxNmods": "5" + }, + { + "maxMissedCleavages": "2" + }, + { + "multiplicity": "1" + }, + { + "enzymeMode": "0" + }, + { + "complementaryReporterType": "0" + }, + { + "reporterNormalization": "0" + }, + { + "neucodeIntensityMode": "0" + }, + { + "fixedModifications": { + "string": "Carbamidomethyl (C)" + } + }, + { + "enzymes": { + "string": "Trypsin/P" + } + }, + { + "enzymesFirstSearch": null + }, + { + "enzymeModeFirstSearch": "0" + }, + { + "useEnzymeFirstSearch": "False" + }, + { + "useVariableModificationsFirstSearch": "False" + }, + { + "variableModifications": { + "string": [ + "Oxidation (M)", + "Acetyl (Protein N-term)" + ] + } + }, + { + "useMultiModification": "False" + }, + { + "multiModifications": null + }, + { + "isobaricLabels": null + }, + { + "neucodeLabels": null + }, + { + "variableModificationsFirstSearch": null + }, + { + "hasAdditionalVariableModifications": "False" + }, + { + "additionalVariableModifications": null + }, + { + "additionalVariableModificationProteins": null + }, + { + "doMassFiltering": "True" + }, + { + "firstSearchTol": "20" + }, + { + "mainSearchTol": "4.5" + }, + { + "searchTolInPpm": "True" + }, + { + "isotopeMatchTol": "2" + }, + { + "isotopeMatchTolInPpm": "True" + }, + { + "isotopeTimeCorrelation": "0.6" + }, + { + "theorIsotopeCorrelation": "0.6" + }, + { + "checkMassDeficit": "True" + }, + { + "recalibrationInPpm": "True" + }, + { + "intensityDependentCalibration": "False" + }, + { + "minScoreForCalibration": "70" + }, + { + "matchLibraryFile": "False" + }, + { + "libraryFile": null + }, + { + "matchLibraryMassTolPpm": "0" + }, + { + "matchLibraryTimeTolMin": "0" + }, + { + "matchLabelTimeTolMin": "0" + }, + { + "reporterMassTolerance": "NaN" + }, + { + "reporterPif": "NaN" + }, + { + "filterPif": "False" + }, + { + "reporterFraction": "NaN" + }, + { + "reporterBasePeakRatio": "NaN" + }, + { + "timsHalfWidth": "0" + }, + { + "timsStep": "0" + }, + { + "timsResolution": "0" + }, + { + "timsMinMsmsIntensity": "0" + }, + { + "timsRemovePrecursor": "True" + }, + { + "timsIsobaricLabels": "False" + }, + { + "timsCollapseMsms": "True" + }, + { + "crossLinkingType": "0" + }, + { + "crossLinker": null + }, + { + "minMatchXl": "3" + }, + { + "minPairedPepLenXl": "6" + }, + { + "minScoreDipeptide": "40" + }, + { + "minScoreMonopeptide": "0" + }, + { + "minScorePartialCross": "10" + }, + { + "crosslinkOnlyIntraProtein": "False" + }, + { + "crosslinkIntensityBasedPrecursor": "True" + }, + { + "isHybridPrecDetermination": "False" + }, + { + "topXcross": "3" + }, + { + "doesSeparateInterIntraProteinCross": "False" + }, + { + "crosslinkMaxMonoUnsaturated": "0" + }, + { + "crosslinkMaxMonoSaturated": "0" + }, + { + "crosslinkMaxDiUnsaturated": "0" + }, + { + "crosslinkMaxDiSaturated": "0" + }, + { + "crosslinkModifications": null + }, + { + "crosslinkFastaFiles": null + }, + { + "crosslinkSites": null + }, + { + "crosslinkNetworkFiles": null + }, + { + "crosslinkMode": null + }, + { + "peakRefinement": "False" + }, + { + "isobaricSumOverWindow": "True" + }, + { + "isobaricWeightExponent": "0.75" + }, + { + "collapseMsmsOnIsotopePatterns": "False" + }, + { + "diaLibraryType": "0" + }, + { + "diaLibraryPaths": null + }, + { + "diaPeptidePaths": null + }, + { + "diaEvidencePaths": null + }, + { + "diaMsmsPaths": null + }, + { + "diaInitialPrecMassTolPpm": "20" + }, + { + "diaInitialFragMassTolPpm": "20" + }, + { + "diaCorrThresholdFeatureClustering": "0.85" + }, + { + "diaPrecTolPpmFeatureClustering": "2" + }, + { + "diaFragTolPpmFeatureClustering": "2" + }, + { + "diaScoreN": "7" + }, + { + "diaMinScore": "1.99" + }, + { + "diaXgBoostBaseScore": "0.4" + }, + { + "diaXgBoostSubSample": "0.9" + }, + { + "centroidPosition": "0" + }, + { + "diaQuantMethod": "7" + }, + { + "diaFeatureQuantMethod": "2" + }, + { + "lfqNormType": "1" + }, + { + "diaTopNForQuant": "10" + }, + { + "diaMinMsmsIntensityForQuant": "0" + }, + { + "diaTopMsmsIntensityQuantileForQuant": "0.85" + }, + { + "diaPrecursorFilterType": "0" + }, + { + "diaMinFragmentOverlapScore": "1" + }, + { + "diaMinPrecursorScore": "0.5" + }, + { + "diaMinProfileCorrelation": "0" + }, + { + "diaXgBoostMinChildWeight": "9" + }, + { + "diaXgBoostMaximumTreeDepth": "12" + }, + { + "diaXgBoostEstimators": "580" + }, + { + "diaXgBoostGamma": "0.9" + }, + { + "diaXgBoostMaxDeltaStep": "3" + }, + { + "diaGlobalMl": "True" + }, + { + "diaAdaptiveMassAccuracy": "False" + }, + { + "diaMassWindowFactor": "3.3" + }, + { + "diaRtPrediction": "False" + }, + { + "diaRtPredictionSecondRound": "False" + }, + { + "diaNoMl": "False" + }, + { + "diaPermuteRt": "False" + }, + { + "diaPermuteCcs": "False" + }, + { + "diaBackgroundSubtraction": "False" + }, + { + "diaBackgroundSubtractionQuantile": "0.5" + }, + { + "diaBackgroundSubtractionFactor": "4" + }, + { + "diaLfqRatioType": "0" + }, + { + "diaTransferQvalue": "0.3" + }, + { + "diaOnlyIsosForRecal": "True" + }, + { + "diaMinPeaksForRecal": "5" + }, + { + "diaUseFragIntensForMl": "False" + }, + { + "diaUseFragMassesForMl": "False" + }, + { + "diaMaxTrainInstances": "1000000" + }, + { + "diaMaxFragmentCharge": "3" + } + ] + }, + "msmsParamsArray": [ + { + "msmsParams": { + "Name": "FTMS", + "MatchTolerance": "20", + "MatchToleranceInPpm": "True", + "DeisotopeTolerance": "7", + "DeisotopeToleranceInPpm": "True", + "DeNovoTolerance": "25", + "DeNovoToleranceInPpm": "True", + "Deisotope": "True", + "Topx": "12", + "TopxInterval": "100", + "HigherCharges": "True", + "IncludeWater": "True", + "IncludeAmmonia": "True", + "IncludeWaterCross": "False", + "IncludeAmmoniaCross": "False", + "DependentLosses": "True", + "Recalibration": "False" + } + }, + { + "msmsParams": { + "Name": "ITMS", + "MatchTolerance": "0.5", + "MatchToleranceInPpm": "False", + "DeisotopeTolerance": "0.15", + "DeisotopeToleranceInPpm": "False", + "DeNovoTolerance": "0.5", + "DeNovoToleranceInPpm": "False", + "Deisotope": "False", + "Topx": "8", + "TopxInterval": "100", + "HigherCharges": "True", + "IncludeWater": "True", + "IncludeAmmonia": "True", + "IncludeWaterCross": "False", + "IncludeAmmoniaCross": "False", + "DependentLosses": "True", + "Recalibration": "False" + } + }, + { + "msmsParams": { + "Name": "TOF", + "MatchTolerance": "25", + "MatchToleranceInPpm": "True", + "DeisotopeTolerance": "0.01", + "DeisotopeToleranceInPpm": "False", + "DeNovoTolerance": "25", + "DeNovoToleranceInPpm": "True", + "Deisotope": "True", + "Topx": "16", + "TopxInterval": "100", + "HigherCharges": "True", + "IncludeWater": "True", + "IncludeAmmonia": "True", + "IncludeWaterCross": "False", + "IncludeAmmoniaCross": "False", + "DependentLosses": "True", + "Recalibration": "False" + } + }, + { + "msmsParams": { + "Name": "Unknown", + "MatchTolerance": "20", + "MatchToleranceInPpm": "True", + "DeisotopeTolerance": "7", + "DeisotopeToleranceInPpm": "True", + "DeNovoTolerance": "25", + "DeNovoToleranceInPpm": "True", + "Deisotope": "True", + "Topx": "12", + "TopxInterval": "100", + "HigherCharges": "True", + "IncludeWater": "True", + "IncludeAmmonia": "True", + "IncludeWaterCross": "False", + "IncludeAmmoniaCross": "False", + "DependentLosses": "True", + "Recalibration": "False" + } + } + ], + "fragmentationParamsArray": [ + { + "fragmentationParams": { + "Name": "CID", + "UseIntensityPrediction": "False", + "InternalFragments": "False", + "InternalFragmentWeight": "1", + "InternalFragmentAas": "KRH" + } + }, + { + "fragmentationParams": { + "Name": "HCD", + "UseIntensityPrediction": "False", + "InternalFragments": "False", + "InternalFragmentWeight": "1", + "InternalFragmentAas": "KRH" + } + }, + { + "fragmentationParams": { + "Name": "ETD", + "UseIntensityPrediction": "False", + "InternalFragments": "False", + "InternalFragmentWeight": "1", + "InternalFragmentAas": "KRH" + } + }, + { + "fragmentationParams": { + "Name": "PQD", + "UseIntensityPrediction": "False", + "InternalFragments": "False", + "InternalFragmentWeight": "1", + "InternalFragmentAas": "KRH" + } + }, + { + "fragmentationParams": { + "Name": "ETHCD", + "UseIntensityPrediction": "False", + "InternalFragments": "False", + "InternalFragmentWeight": "1", + "InternalFragmentAas": "KRH" + } + }, + { + "fragmentationParams": { + "Name": "ETCID", + "UseIntensityPrediction": "False", + "InternalFragments": "False", + "InternalFragmentWeight": "1", + "InternalFragmentAas": "KRH" + } + }, + { + "fragmentationParams": { + "Name": "UVPD", + "UseIntensityPrediction": "False", + "InternalFragments": "False", + "InternalFragmentWeight": "1", + "InternalFragmentAas": "KRH" + } + }, + { + "fragmentationParams": { + "Name": "Unknown", + "UseIntensityPrediction": "False", + "InternalFragments": "False", + "InternalFragmentWeight": "1", + "InternalFragmentAas": "KRH" + } + } + ] +} \ No newline at end of file diff --git a/test/params/mqpar_MQ2.1.3.0_noMBR.xml b/test/params/mqpar_MQ2.1.3.0_noMBR.xml new file mode 100644 index 00000000..148b6044 --- /dev/null +++ b/test/params/mqpar_MQ2.1.3.0_noMBR.xml @@ -0,0 +1,532 @@ + + + + + /users/user/EuBIC benchmarking + project\MQ2130_noMBR\BenchmarkFASTAModule1_DDA_NOCONTA.fasta + >([^\s]*) + >(.*) + + + + + + + + + + + + 350000 + True + 0.005 + False + False + True + True + False + revert + True + 4600 + True + True + True + 0 + 6 + 0 + 40 + True + False + False + False + False + 0 + 0 + False + False + False + False + 0 + False + False + False + False + False + Species + False + 3 + False + False + True + False + False + False + False + 0 + 0 + 0 + 0 + 0 + True + True + True + True + True + 0 + False + False + 7 + 0.01 + 0.01 + 0.01 + 0.01 + 8 + 25 + True + 1 + 1 + 0 + False + True + False + + 2 + True + + Oxidation (M) + Acetyl (Protein N-term) + + 0 + 0 + 0 + 0 + 15 + 0 + 1 + + + 200 + False + True + True + True + True + True + True + False + False + False + False + False + True + False + 0 + 20 + + none + False + session1 + 2.1.3.0 + + 4 + + + + + -1.79769313486232E+308 + 1.79769313486232E+308 + False + False + False + False + False + False + True + False + + /users/user/EuBIC benchmarking + project\MQ2130_noMBR\LFQ_Orbitrap_DDA_Condition_A_Sample_Alpha_01.raw + /users/user/EuBIC benchmarking + project\MQ2130_noMBR\LFQ_Orbitrap_DDA_Condition_A_Sample_Alpha_02.raw + /users/user/EuBIC benchmarking + project\MQ2130_noMBR\LFQ_Orbitrap_DDA_Condition_A_Sample_Alpha_03.raw + /users/user/EuBIC benchmarking + project\MQ2130_noMBR\LFQ_Orbitrap_DDA_Condition_B_Sample_Alpha_01.raw + /users/user/EuBIC benchmarking + project\MQ2130_noMBR\LFQ_Orbitrap_DDA_Condition_B_Sample_Alpha_02.raw + /users/user/EuBIC benchmarking + project\MQ2130_noMBR\LFQ_Orbitrap_DDA_Condition_B_Sample_Alpha_03.raw + + + A_Sample_Alpha_01 + A_Sample_Alpha_02 + A_Sample_Alpha_03 + B_Sample_Alpha_01 + B_Sample_Alpha_02 + B_Sample_Alpha_03 + + + 32767 + 32767 + 32767 + 32767 + 32767 + 32767 + + + False + False + False + False + False + False + + + 0 + 0 + 0 + 0 + 0 + 0 + + + + + + + + + + 0 + False + 1 + 0.7 + + + + 0 + 7 + 2 + 1 + False + False + True + 1 + NaN + NaN + MatchFromAndTo + 0 + 8 + True + 35 + True + 1.4 + 1.2 + False + 0 + 0 + + + + Standard + False + 1 + 80 + 3 + 6 + 100000 + 0 + 0 + False + False + True + False + 2 + 0 + 5 + 2 + 1 + 0 + 0 + 0 + 0 + + Carbamidomethyl (C) + + + Trypsin/P + + + + 0 + False + False + + Oxidation (M) + Acetyl (Protein N-term) + + False + + + + + + + + + False + + + + + True + 20 + 4.5 + True + 2 + True + 0.6 + 0.6 + True + True + False + 70 + False + + 0 + 0 + 0 + NaN + NaN + False + NaN + NaN + 0 + 0 + 0 + 0 + True + False + True + 0 + + 3 + 6 + 40 + 0 + 10 + False + True + False + 3 + False + 0 + 0 + 0 + 0 + + + + + + + + + + False + True + 0.75 + False + 0 + + + + + + + + + 20 + 20 + 0.85 + 2 + 2 + 7 + 1.99 + 0.4 + 0.9 + 0 + 7 + 2 + 1 + 10 + 0 + 0.85 + 0 + 1 + 0.5 + 0 + 9 + 12 + 580 + 0.9 + 3 + True + False + 3.3 + False + False + False + False + False + False + 0.5 + 4 + 0 + 0.3 + True + 5 + False + False + 1000000 + 3 + + + + + FTMS + 20 + True + 7 + True + 25 + True + True + 12 + 100 + True + True + True + False + False + True + False + + + ITMS + 0.5 + False + 0.15 + False + 0.5 + False + False + 8 + 100 + True + True + True + False + False + True + False + + + TOF + 25 + True + 0.01 + False + 25 + True + True + 16 + 100 + True + True + True + False + False + True + False + + + Unknown + 20 + True + 7 + True + 25 + True + True + 12 + 100 + True + True + True + False + False + True + False + + + + + CID + False + False + 1 + KRH + + + HCD + False + False + 1 + KRH + + + ETD + False + False + 1 + KRH + + + PQD + False + False + 1 + KRH + + + ETHCD + False + False + 1 + KRH + + + ETCID + False + False + 1 + KRH + + + UVPD + False + False + 1 + KRH + + + Unknown + False + False + 1 + KRH + + + \ No newline at end of file diff --git a/test/test_parse_params.py b/test/test_parse_params.py new file mode 100644 index 00000000..11b1e882 --- /dev/null +++ b/test/test_parse_params.py @@ -0,0 +1,46 @@ +import json +from pathlib import Path + +import pytest + +import proteobench.io.params.maxquant as mq_params + +TESTDATA_DIR = Path(__file__).parent / "params" + +mq_paras = [ + "mqpar_MQ1.6.3.3_MBR.xml", + "mqpar_MQ2.1.3.0_noMBR.xml", + "mqpar1.5.3.30_MBR.xml", +] + +mq_paras = [TESTDATA_DIR / mq_para for mq_para in mq_paras] + + +parameters = [ + ((1, 2), (1, 2, None)), + ((3, 4, 5), (3, 4, 5)), + ((6,), (6, None, None)), +] + + +@pytest.mark.parametrize("tuple_in,tuple_out", parameters) +def test_extend_tuple(tuple_in, tuple_out): + actual = mq_params.extend_tuple(tuple_in, 3) + assert actual == tuple_out + + +def test_list_of_tuple_expansion(): + in_list_of_tuples = [(1, 2), (3, 4, 5), (6,)] + expected = [(1, 2, None), (3, 4, 5), (6, None, None)] + actual = mq_params.extend_tuples_with_none(in_list_of_tuples, 3) + assert actual == expected + + +parameters = [(fname, Path(fname).with_suffix(".json")) for fname in mq_paras] + + +@pytest.mark.parametrize("file,json_expected", parameters) +def test_file_reading(file, json_expected): + dict_expected = json.loads(json_expected.read_text()) + dict_actual = mq_params.read_file(file) + assert dict_actual == dict_expected From d7e38b77e8b0657af4dd579c56096cefe0004e26 Mon Sep 17 00:00:00 2001 From: Henry Date: Tue, 26 Sep 2023 09:49:58 +0200 Subject: [PATCH 03/15] :bug: add support for newer annotations in py38 py37 support will be drop soon --- proteobench/io/params/maxquant.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/proteobench/io/params/maxquant.py b/proteobench/io/params/maxquant.py index 591d80b7..cc5d0033 100644 --- a/proteobench/io/params/maxquant.py +++ b/proteobench/io/params/maxquant.py @@ -1,4 +1,6 @@ """Functionality to parse Maxqunt mqpar.xml parameter files""" +from __future__ import annotations + import json import logging import xml.etree.ElementTree as ET From 532157f68775db164b702ddfe68eda353b3cc675 Mon Sep 17 00:00:00 2001 From: Henry Date: Tue, 26 Sep 2023 14:57:57 +0200 Subject: [PATCH 04/15] :sparkles: DataFrame and csv format for MQ parameters - potential: allow to combine MaxQuant parameter files - easy to inspect csv parameter file ToDo: - could update the 4th index level to reflect some of the groups -> see comments in maxquant.py --- proteobench/io/params/maxquant.py | 68 ++++ test/params/mqpar1.5.3.30_MBR.csv | 204 ++++++++++++ test/params/mqpar_MQ1.6.3.3_MBR.csv | 392 ++++++++++++++++++++++ test/params/mqpar_MQ2.1.3.0_noMBR.csv | 453 ++++++++++++++++++++++++++ test/test_parse_params.py | 63 ++++ 5 files changed, 1180 insertions(+) create mode 100644 test/params/mqpar1.5.3.30_MBR.csv create mode 100644 test/params/mqpar_MQ1.6.3.3_MBR.csv create mode 100644 test/params/mqpar_MQ2.1.3.0_noMBR.csv diff --git a/proteobench/io/params/maxquant.py b/proteobench/io/params/maxquant.py index cc5d0033..43f8f4a5 100644 --- a/proteobench/io/params/maxquant.py +++ b/proteobench/io/params/maxquant.py @@ -1,11 +1,14 @@ """Functionality to parse Maxqunt mqpar.xml parameter files""" from __future__ import annotations +import collections import json import logging import xml.etree.ElementTree as ET from pathlib import Path +import pandas as pd + logger = logging.getLogger() @@ -82,6 +85,48 @@ def read_file(file: str) -> dict: return params +def flatten_dict_of_dicts(d: dict, parent_key: str = "") -> dict: + """Build tuples for nested dictionaries for use as `pandas.MultiIndex`. + + Parameters + ---------- + d : dict + Nested dictionary for which all keys are flattened to tuples. + parent_key : str, optional + Outer key (used for recursion), by default '' + + Returns + ------- + dict + Flattend dictionary with tuple keys: {(outer_key, ..., inner_key) : value} + """ + # simplified and adapted from: https://stackoverflow.com/a/6027615/9684872 + items = [] + for k, v in d.items(): + new_key = parent_key + (k,) if parent_key else (k,) + if isinstance(v, collections.abc.MutableMapping): + items.extend(flatten_dict_of_dicts(v, parent_key=new_key)) + elif isinstance(v, list): + for item in v: + if isinstance(item, collections.abc.MutableMapping): + items.extend(flatten_dict_of_dicts(item, parent_key=new_key)) + elif isinstance(item, str): + items.append((new_key, item)) + else: + raise ValueError(f"Unknown item: {item:r}") + else: + items.append((new_key, v)) + return items + + +def build_Series_from_records(records, index_length=4): + records = flatten_dict_of_dicts(records) + idx = pd.MultiIndex.from_tuples( + (extend_tuple(k, index_length) for (k, v) in records) + ) + return pd.Series((v for (k, v) in records), index=idx) + + # create a first version of json files to match if __name__ == "__main__": for test_file in [ @@ -101,3 +146,26 @@ def read_file(file: str) -> dict: ) ) ) + flattend = build_Series_from_records(record_example, 4) + flattend = flattend.to_frame("run_identifier") + flattend.to_csv(Path(test_file).with_suffix(".csv")) + + # %% + int( + flattend.loc["parameterGroups"] + .loc["parameterGroup"] + .loc["firstSearchTol"] + .squeeze() + ) + + # %% + # ! Parse msmsParamsArray + ms2_params = ( + flattend.loc["msmsParamsArray"].loc["msmsParams"].reset_index(-1, drop=True) + ) + ms2_params.loc["Name", "mode"] = ms2_params.loc["Name"].squeeze() + ms2_params["mode"] = ms2_params["mode"].fillna(method="ffill") + ms2_params = ms2_params.set_index("mode", append=True) + ms2_params.loc[("MatchTolerance", "FTMS")] + # ? reset_index level -1 + # ? update and fillna -> then set as index again diff --git a/test/params/mqpar1.5.3.30_MBR.csv b/test/params/mqpar1.5.3.30_MBR.csv new file mode 100644 index 00000000..051fdbd7 --- /dev/null +++ b/test/params/mqpar1.5.3.30_MBR.csv @@ -0,0 +1,204 @@ +,,,,run_identifier +name,,,,Session1 +maxQuantVersion,,,,1.5.3.30 +tempFolder,,,, +numThreads,,,,3 +sendEmail,,,,false +fixedCombinedFolder,,,, +ionCountIntensities,,,,false +verboseColumnHeaders,,,,false +fullMinMz,,,,-1.7976931348623157E+308 +fullMaxMz,,,,1.7976931348623157E+308 +calcPeakProperties,,,,false +showCentroidMassDifferences,,,,false +showIsotopeMassDifferences,,,,false +filePaths,string,,,"/users/user/EuBIC benchmarking + project\MQ15330_MBR\LFQ_Orbitrap_DDA_Condition_A_Sample_Alpha_01.raw" +filePaths,string,,,"/users/user/EuBIC benchmarking + project\MQ15330_MBR\LFQ_Orbitrap_DDA_Condition_A_Sample_Alpha_02.raw" +filePaths,string,,,"/users/user/EuBIC benchmarking + project\MQ15330_MBR\LFQ_Orbitrap_DDA_Condition_A_Sample_Alpha_03.raw" +filePaths,string,,,"/users/user/EuBIC benchmarking + project\MQ15330_MBR\LFQ_Orbitrap_DDA_Condition_B_Sample_Alpha_01.raw" +filePaths,string,,,"/users/user/EuBIC benchmarking + project\MQ15330_MBR\LFQ_Orbitrap_DDA_Condition_B_Sample_Alpha_02.raw" +filePaths,string,,,"/users/user/EuBIC benchmarking + project\MQ15330_MBR\LFQ_Orbitrap_DDA_Condition_B_Sample_Alpha_03.raw" +experiments,string,,,A_Sample_Alpha_01 +experiments,string,,,A_Sample_Alpha_02 +experiments,string,,,A_Sample_Alpha_03 +experiments,string,,,B_Sample_Alpha_01 +experiments,string,,,B_Sample_Alpha_02 +experiments,string,,,B_Sample_Alpha_03 +fractions,short,,,32767 +fractions,short,,,32767 +fractions,short,,,32767 +fractions,short,,,32767 +fractions,short,,,32767 +fractions,short,,,32767 +paramGroupIndices,int,,,0 +paramGroupIndices,int,,,0 +paramGroupIndices,int,,,0 +paramGroupIndices,int,,,0 +paramGroupIndices,int,,,0 +paramGroupIndices,int,,,0 +parameterGroups,parameterGroup,maxCharge,,7 +parameterGroups,parameterGroup,minPeakLen,,2 +parameterGroups,parameterGroup,useMs1Centroids,,false +parameterGroups,parameterGroup,useMs2Centroids,,false +parameterGroups,parameterGroup,cutPeaks,,true +parameterGroups,parameterGroup,gapScans,,1 +parameterGroups,parameterGroup,minTime,,NaN +parameterGroups,parameterGroup,maxTime,,NaN +parameterGroups,parameterGroup,matchType,,MatchFromAndTo +parameterGroups,parameterGroup,centroidMatchTol,,8 +parameterGroups,parameterGroup,centroidMatchTolInPpm,,true +parameterGroups,parameterGroup,centroidHalfWidth,,35 +parameterGroups,parameterGroup,centroidHalfWidthInPpm,,true +parameterGroups,parameterGroup,valleyFactor,,1.4 +parameterGroups,parameterGroup,advancedPeakSplitting,,false +parameterGroups,parameterGroup,intensityThreshold,,500 +parameterGroups,parameterGroup,msInstrument,,0 +parameterGroups,parameterGroup,intensityDetermination,,0 +parameterGroups,parameterGroup,labelMods,string, +parameterGroups,parameterGroup,reQuantify,,false +parameterGroups,parameterGroup,lfqSkipNorm,,false +parameterGroups,parameterGroup,lfqMinEdgesPerNode,,3 +parameterGroups,parameterGroup,lfqAvEdgesPerNode,,6 +parameterGroups,parameterGroup,lfqMaxFeatures,,100000 +parameterGroups,parameterGroup,fastLfq,,true +parameterGroups,parameterGroup,lfqRestrictFeatures,,false +parameterGroups,parameterGroup,lfqMinRatioCount,,2 +parameterGroups,parameterGroup,useNormRatiosForHybridLfq,,true +parameterGroups,parameterGroup,maxLabeledAa,,0 +parameterGroups,parameterGroup,maxNmods,,5 +parameterGroups,parameterGroup,maxMissedCleavages,,2 +parameterGroups,parameterGroup,multiplicity,,1 +parameterGroups,parameterGroup,enzymes,string,Trypsin/P +parameterGroups,parameterGroup,enzymesFirstSearch,, +parameterGroups,parameterGroup,useEnzymeFirstSearch,,false +parameterGroups,parameterGroup,useVariableModificationsFirstSearch,,false +parameterGroups,parameterGroup,variableModifications,string,Oxidation (M) +parameterGroups,parameterGroup,variableModifications,string,Acetyl (Protein N-term) +parameterGroups,parameterGroup,useMultiModification,,false +parameterGroups,parameterGroup,multiModifications,, +parameterGroups,parameterGroup,isobaricLabels,, +parameterGroups,parameterGroup,variableModificationsFirstSearch,, +parameterGroups,parameterGroup,hasAdditionalVariableModifications,,false +parameterGroups,parameterGroup,additionalVariableModifications,, +parameterGroups,parameterGroup,additionalVariableModificationProteins,, +parameterGroups,parameterGroup,doMassFiltering,,true +parameterGroups,parameterGroup,firstSearchTol,,20 +parameterGroups,parameterGroup,mainSearchTol,,4.5 +parameterGroups,parameterGroup,searchTolInPpm,,true +parameterGroups,parameterGroup,isotopeMatchTol,,2 +parameterGroups,parameterGroup,isotopeMatchTolInPpm,,true +parameterGroups,parameterGroup,isotopeTimeCorrelation,,0.6 +parameterGroups,parameterGroup,theorIsotopeCorrelation,,0.6 +parameterGroups,parameterGroup,recalibrationInPpm,,true +parameterGroups,parameterGroup,intensityDependentCalibration,,false +parameterGroups,parameterGroup,minScoreForCalibration,,70 +parameterGroups,parameterGroup,matchLibraryFile,,false +parameterGroups,parameterGroup,libraryFile,, +parameterGroups,parameterGroup,matchLibraryMassTolPpm,,0 +parameterGroups,parameterGroup,matchLibraryTimeTolMin,,0 +parameterGroups,parameterGroup,matchLabelTimeTolMin,,0 +parameterGroups,parameterGroup,reporterMassTolerance,,NaN +parameterGroups,parameterGroup,reporterPif,,NaN +parameterGroups,parameterGroup,filterPif,,false +parameterGroups,parameterGroup,reporterFraction,,NaN +parameterGroups,parameterGroup,reporterBasePeakRatio,,NaN +parameterGroups,parameterGroup,timsHalfWidth,,0 +parameterGroups,parameterGroup,timsStep,,0 +parameterGroups,parameterGroup,timsResolution,,0 +parameterGroups,parameterGroup,timsMinMsmsIntensity,,0 +parameterGroups,parameterGroup,timsRemovePrecursor,,true +parameterGroups,parameterGroup,crosslinkSearch,,false +parameterGroups,parameterGroup,crosslinkMaxMonoUnsaturated,,0 +parameterGroups,parameterGroup,crosslinkMaxMonoSaturated,,0 +parameterGroups,parameterGroup,crosslinkMaxDiUnsaturated,,0 +parameterGroups,parameterGroup,crosslinkMaxDiSaturated,,0 +parameterGroups,parameterGroup,crosslinkUseSeparateFasta,,false +parameterGroups,parameterGroup,crosslinkFastaFiles,, +parameterGroups,parameterGroup,crosslinkMode,,PeptidesWithCleavedLinker +parameterGroups,parameterGroup,lcmsRunType,,Standard +parameterGroups,parameterGroup,lfqMode,,1 +parameterGroups,parameterGroup,enzymeMode,,0 +parameterGroups,parameterGroup,enzymeModeFirstSearch,,0 +fixedModifications,string,,,Carbamidomethyl (C) +fastaFiles,string,,,"/users/user/EuBIC benchmarking + project\MQ15330_MBR\BenchmarkFASTAModule1_DDA_NOCONTA.fasta" +fastaFilesFirstSearch,,,, +fixedSearchFolder,,,, +advancedRatios,,,,true +rtShift,,,,false +separateLfq,,,,false +lfqStabilizeLargeRatios,,,,true +lfqRequireMsms,,,,true +decoyMode,,,,revert +includeContaminants,,,,true +topxWindow,,,,100 +maxPeptideMass,,,,4600 +epsilonMutationScore,,,,true +mutatedPeptidesSeparately,,,,true +minDeltaScoreUnmodifiedPeptides,,,,0 +minDeltaScoreModifiedPeptides,,,,6 +minScoreUnmodifiedPeptides,,,,0 +minScoreModifiedPeptides,,,,40 +secondPeptide,,,,true +matchBetweenRuns,,,,true +matchUnidentifiedFeatures,,,,false +matchBetweenRunsFdr,,,,false +dependentPeptides,,,,false +dependentPeptideFdr,,,,0 +dependentPeptideMassBin,,,,0 +msmsConnection,,,,false +ibaq,,,,false +useDeltaScore,,,,false +splitProteinGroupsByTaxonomy,,,,false +taxonomyLevel,,,,Species +avalon,,,,false +ibaqLogFit,,,,false +razorProteinFdr,,,,true +deNovoSequencing,,,,false +deNovoVarMods,,,,true +massDifferenceSearch,,,,false +minPepLen,,,,7 +peptideFdr,,,,0.01 +proteinFdr,,,,0.01 +siteFdr,,,,0.01 +minPeptideLengthForUnspecificSearch,,,,8 +maxPeptideLengthForUnspecificSearch,,,,25 +useNormRatiosForOccupancy,,,,true +minPeptides,,,,1 +minRazorPeptides,,,,1 +minUniquePeptides,,,,0 +useCounterparts,,,,false +advancedSiteIntensities,,,,true +customProteinQuantification,,,,false +customProteinQuantificationFile,,,, +minRatioCount,,,,2 +restrictProteinQuantification,,,,true +restrictMods,string,,,Oxidation (M) +restrictMods,string,,,Acetyl (Protein N-term) +matchingTimeWindow,,,,0.7 +alignmentTimeWindow,,,,20 +numberOfCandidatesMultiplexedMsms,,,,25 +numberOfCandidatesMsms,,,,15 +massDifferenceMods,,,, +mainSearchMaxCombinations,,,,200 +msmsParamsArray,msmsParams,MatchTolerance,,20 +msmsParamsArray,msmsParams,DeisotopeTolerance,,7 +msmsParamsArray,msmsParams,DeNovoTolerance,,10 +msmsParamsArray,msmsParams,MatchTolerance,,0.5 +msmsParamsArray,msmsParams,DeisotopeTolerance,,0.15 +msmsParamsArray,msmsParams,DeNovoTolerance,,0.25 +msmsParamsArray,msmsParams,MatchTolerance,,40 +msmsParamsArray,msmsParams,DeisotopeTolerance,,0.01 +msmsParamsArray,msmsParams,DeNovoTolerance,,0.02 +msmsParamsArray,msmsParams,MatchTolerance,,0.5 +msmsParamsArray,msmsParams,DeisotopeTolerance,,0.15 +msmsParamsArray,msmsParams,DeNovoTolerance,,0.25 +compositionPrediction,,,,0 +quantMode,,,,1 +variationMode,,,,none diff --git a/test/params/mqpar_MQ1.6.3.3_MBR.csv b/test/params/mqpar_MQ1.6.3.3_MBR.csv new file mode 100644 index 00000000..bd206f61 --- /dev/null +++ b/test/params/mqpar_MQ1.6.3.3_MBR.csv @@ -0,0 +1,392 @@ +,,,,run_identifier +fastaFiles,FastaFileInfo,fastaFilePath,,"/users/user/EuBIC benchmarking + project\MQ1633_MBR\BenchmarkFASTAModule1_DDA_NOCONTA.fasta" +fastaFiles,FastaFileInfo,identifierParseRule,,>([^\s]*) +fastaFiles,FastaFileInfo,descriptionParseRule,,>(.*) +fastaFiles,FastaFileInfo,taxonomyParseRule,, +fastaFiles,FastaFileInfo,variationParseRule,, +fastaFiles,FastaFileInfo,modificationParseRule,, +fastaFiles,FastaFileInfo,taxonomyId,, +fastaFilesProteogenomics,,,, +fastaFilesFirstSearch,,,, +fixedSearchFolder,,,, +andromedaCacheSize,,,,350000 +advancedRatios,,,,True +pvalThres,,,,0.005 +neucodeRatioBasedQuantification,,,,False +neucodeStabilizeLargeRatios,,,,False +rtShift,,,,False +separateLfq,,,,False +lfqStabilizeLargeRatios,,,,True +lfqRequireMsms,,,,True +decoyMode,,,,revert +boxCarMode,,,,all +includeContaminants,,,,True +maxPeptideMass,,,,4600 +epsilonMutationScore,,,,True +mutatedPeptidesSeparately,,,,True +proteogenomicPeptidesSeparately,,,,True +minDeltaScoreUnmodifiedPeptides,,,,0 +minDeltaScoreModifiedPeptides,,,,6 +minScoreUnmodifiedPeptides,,,,0 +minScoreModifiedPeptides,,,,40 +secondPeptide,,,,True +matchBetweenRuns,,,,True +matchUnidentifiedFeatures,,,,False +matchBetweenRunsFdr,,,,False +dependentPeptides,,,,False +dependentPeptideFdr,,,,0 +dependentPeptideMassBin,,,,0 +dependentPeptidesBetweenRuns,,,,False +dependentPeptidesWithinExperiment,,,,False +dependentPeptidesWithinParameterGroup,,,,False +dependentPeptidesRestrictFractions,,,,False +dependentPeptidesFractionDifference,,,,0 +msmsConnection,,,,False +ibaq,,,,False +top3,,,,False +independentEnzymes,,,,False +useDeltaScore,,,,False +splitProteinGroupsByTaxonomy,,,,False +taxonomyLevel,,,,Species +avalon,,,,False +nModColumns,,,,3 +ibaqLogFit,,,,False +razorProteinFdr,,,,True +deNovoSequencing,,,,False +deNovoVarMods,,,,True +massDifferenceSearch,,,,False +isotopeCalc,,,,False +writePeptidesForSpectrumFile,,,, +intensityPredictionsFile,,,, +minPepLen,,,,7 +psmFdrCrosslink,,,,0.01 +peptideFdr,,,,0.01 +proteinFdr,,,,0.01 +siteFdr,,,,0.01 +minPeptideLengthForUnspecificSearch,,,,8 +maxPeptideLengthForUnspecificSearch,,,,25 +useNormRatiosForOccupancy,,,,True +minPeptides,,,,1 +minRazorPeptides,,,,1 +minUniquePeptides,,,,0 +useCounterparts,,,,False +advancedSiteIntensities,,,,True +customProteinQuantification,,,,False +customProteinQuantificationFile,,,, +minRatioCount,,,,2 +restrictProteinQuantification,,,,True +restrictMods,string,,,Oxidation (M) +restrictMods,string,,,Acetyl (Protein N-term) +matchingTimeWindow,,,,0.7 +alignmentTimeWindow,,,,20 +numberOfCandidatesMultiplexedMsms,,,,25 +numberOfCandidatesMsms,,,,15 +compositionPrediction,,,,0 +quantMode,,,,1 +massDifferenceMods,,,, +mainSearchMaxCombinations,,,,200 +writeMsScansTable,,,,False +writeMsmsScansTable,,,,True +writePasefMsmsScansTable,,,,True +writeAccumulatedPasefMsmsScansTable,,,,True +writeMs3ScansTable,,,,True +writeAllPeptidesTable,,,,True +writeMzRangeTable,,,,True +writeMzTab,,,,False +disableMd5,,,,False +cacheBinInds,,,,True +etdIncludeB,,,,False +complementaryTmtCollapseNplets,,,,True +stackPeaks,,,,False +ms2PrecursorShift,,,,0 +complementaryIonPpm,,,,20 +variationParseRule,,,, +variationMode,,,,none +useSeriesReporters,,,,False +name,,,,session1 +maxQuantVersion,,,,1.6.3.3 +tempFolder,,,, +pluginFolder,,,, +numThreads,,,,6 +emailAddress,,,, +smtpHost,,,, +emailFromAddress,,,, +fixedCombinedFolder,,,, +fullMinMz,,,,-1.79769313486232E+308 +fullMaxMz,,,,1.79769313486232E+308 +sendEmail,,,,False +ionCountIntensities,,,,False +verboseColumnHeaders,,,,False +calcPeakProperties,,,,False +showCentroidMassDifferences,,,,False +showIsotopeMassDifferences,,,,False +useDotNetCore,,,,False +filePaths,string,,,"/users/user/EuBIC benchmarking + project\MQ1633_MBR\LFQ_Orbitrap_DDA_Condition_A_Sample_Alpha_01.raw" +filePaths,string,,,"/users/user/EuBIC benchmarking + project\MQ1633_MBR\LFQ_Orbitrap_DDA_Condition_A_Sample_Alpha_02.raw" +filePaths,string,,,"/users/user/EuBIC benchmarking + project\MQ1633_MBR\LFQ_Orbitrap_DDA_Condition_A_Sample_Alpha_03.raw" +filePaths,string,,,"/users/user/EuBIC benchmarking + project\MQ1633_MBR\LFQ_Orbitrap_DDA_Condition_B_Sample_Alpha_01.raw" +filePaths,string,,,"/users/user/EuBIC benchmarking + project\MQ1633_MBR\LFQ_Orbitrap_DDA_Condition_B_Sample_Alpha_02.raw" +filePaths,string,,,"/users/user/EuBIC benchmarking + project\MQ1633_MBR\LFQ_Orbitrap_DDA_Condition_B_Sample_Alpha_03.raw" +experiments,string,,,A_Sample_Alpha_01 +experiments,string,,,A_Sample_Alpha_02 +experiments,string,,,A_Sample_Alpha_03 +experiments,string,,,B_Sample_Alpha_01 +experiments,string,,,B_Sample_Alpha_02 +experiments,string,,,B_Sample_Alpha_03 +fractions,short,,,32767 +fractions,short,,,32767 +fractions,short,,,32767 +fractions,short,,,32767 +fractions,short,,,32767 +fractions,short,,,32767 +ptms,boolean,,,False +ptms,boolean,,,False +ptms,boolean,,,False +ptms,boolean,,,False +ptms,boolean,,,False +ptms,boolean,,,False +paramGroupIndices,int,,,0 +paramGroupIndices,int,,,0 +paramGroupIndices,int,,,0 +paramGroupIndices,int,,,0 +paramGroupIndices,int,,,0 +paramGroupIndices,int,,,0 +referenceChannel,string,,, +referenceChannel,string,,, +referenceChannel,string,,, +referenceChannel,string,,, +referenceChannel,string,,, +referenceChannel,string,,, +parameterGroups,parameterGroup,msInstrument,,0 +parameterGroups,parameterGroup,maxCharge,,7 +parameterGroups,parameterGroup,minPeakLen,,2 +parameterGroups,parameterGroup,useMs1Centroids,,False +parameterGroups,parameterGroup,useMs2Centroids,,False +parameterGroups,parameterGroup,cutPeaks,,True +parameterGroups,parameterGroup,gapScans,,1 +parameterGroups,parameterGroup,minTime,,NaN +parameterGroups,parameterGroup,maxTime,,NaN +parameterGroups,parameterGroup,matchType,,MatchFromAndTo +parameterGroups,parameterGroup,intensityDetermination,,0 +parameterGroups,parameterGroup,centroidMatchTol,,8 +parameterGroups,parameterGroup,centroidMatchTolInPpm,,True +parameterGroups,parameterGroup,centroidHalfWidth,,35 +parameterGroups,parameterGroup,centroidHalfWidthInPpm,,True +parameterGroups,parameterGroup,valleyFactor,,1.4 +parameterGroups,parameterGroup,isotopeValleyFactor,,1.2 +parameterGroups,parameterGroup,advancedPeakSplitting,,False +parameterGroups,parameterGroup,intensityThreshold,,0 +parameterGroups,parameterGroup,labelMods,string, +parameterGroups,parameterGroup,lcmsRunType,,Standard +parameterGroups,parameterGroup,reQuantify,,False +parameterGroups,parameterGroup,lfqMode,,0 +parameterGroups,parameterGroup,lfqSkipNorm,,False +parameterGroups,parameterGroup,lfqMinEdgesPerNode,,3 +parameterGroups,parameterGroup,lfqAvEdgesPerNode,,6 +parameterGroups,parameterGroup,lfqMaxFeatures,,100000 +parameterGroups,parameterGroup,neucodeMaxPpm,,0 +parameterGroups,parameterGroup,neucodeResolution,,0 +parameterGroups,parameterGroup,neucodeResolutionInMda,,False +parameterGroups,parameterGroup,neucodeInSilicoLowRes,,False +parameterGroups,parameterGroup,fastLfq,,True +parameterGroups,parameterGroup,lfqRestrictFeatures,,False +parameterGroups,parameterGroup,lfqMinRatioCount,,2 +parameterGroups,parameterGroup,maxLabeledAa,,0 +parameterGroups,parameterGroup,maxNmods,,5 +parameterGroups,parameterGroup,maxMissedCleavages,,2 +parameterGroups,parameterGroup,multiplicity,,1 +parameterGroups,parameterGroup,enzymeMode,,0 +parameterGroups,parameterGroup,complementaryReporterType,,0 +parameterGroups,parameterGroup,reporterNormalization,,0 +parameterGroups,parameterGroup,neucodeIntensityMode,,0 +parameterGroups,parameterGroup,fixedModifications,string,Carbamidomethyl (C) +parameterGroups,parameterGroup,enzymes,string,Trypsin/P +parameterGroups,parameterGroup,enzymesFirstSearch,, +parameterGroups,parameterGroup,enzymeModeFirstSearch,,0 +parameterGroups,parameterGroup,useEnzymeFirstSearch,,False +parameterGroups,parameterGroup,useVariableModificationsFirstSearch,,False +parameterGroups,parameterGroup,variableModifications,string,Oxidation (M) +parameterGroups,parameterGroup,variableModifications,string,Acetyl (Protein N-term) +parameterGroups,parameterGroup,useMultiModification,,False +parameterGroups,parameterGroup,multiModifications,, +parameterGroups,parameterGroup,isobaricLabels,, +parameterGroups,parameterGroup,neucodeLabels,, +parameterGroups,parameterGroup,variableModificationsFirstSearch,, +parameterGroups,parameterGroup,hasAdditionalVariableModifications,,False +parameterGroups,parameterGroup,additionalVariableModifications,, +parameterGroups,parameterGroup,additionalVariableModificationProteins,, +parameterGroups,parameterGroup,doMassFiltering,,True +parameterGroups,parameterGroup,firstSearchTol,,20 +parameterGroups,parameterGroup,mainSearchTol,,4.5 +parameterGroups,parameterGroup,searchTolInPpm,,True +parameterGroups,parameterGroup,isotopeMatchTol,,2 +parameterGroups,parameterGroup,isotopeMatchTolInPpm,,True +parameterGroups,parameterGroup,isotopeTimeCorrelation,,0.6 +parameterGroups,parameterGroup,theorIsotopeCorrelation,,0.6 +parameterGroups,parameterGroup,checkMassDeficit,,True +parameterGroups,parameterGroup,recalibrationInPpm,,True +parameterGroups,parameterGroup,intensityDependentCalibration,,False +parameterGroups,parameterGroup,minScoreForCalibration,,70 +parameterGroups,parameterGroup,matchLibraryFile,,False +parameterGroups,parameterGroup,libraryFile,, +parameterGroups,parameterGroup,matchLibraryMassTolPpm,,0 +parameterGroups,parameterGroup,matchLibraryTimeTolMin,,0 +parameterGroups,parameterGroup,matchLabelTimeTolMin,,0 +parameterGroups,parameterGroup,reporterMassTolerance,,NaN +parameterGroups,parameterGroup,reporterPif,,NaN +parameterGroups,parameterGroup,filterPif,,False +parameterGroups,parameterGroup,reporterFraction,,NaN +parameterGroups,parameterGroup,reporterBasePeakRatio,,NaN +parameterGroups,parameterGroup,timsHalfWidth,,0 +parameterGroups,parameterGroup,timsStep,,0 +parameterGroups,parameterGroup,timsResolution,,0 +parameterGroups,parameterGroup,timsMinMsmsIntensity,,0 +parameterGroups,parameterGroup,timsRemovePrecursor,,True +parameterGroups,parameterGroup,timsIsobaricLabels,,False +parameterGroups,parameterGroup,timsCollapseMsms,,True +parameterGroups,parameterGroup,crosslinkSearch,,False +parameterGroups,parameterGroup,crossLinker,, +parameterGroups,parameterGroup,minMatchXl,,0 +parameterGroups,parameterGroup,minPairedPepLenXl,,6 +parameterGroups,parameterGroup,crosslinkOnlyIntraProtein,,False +parameterGroups,parameterGroup,crosslinkMaxMonoUnsaturated,,0 +parameterGroups,parameterGroup,crosslinkMaxMonoSaturated,,0 +parameterGroups,parameterGroup,crosslinkMaxDiUnsaturated,,0 +parameterGroups,parameterGroup,crosslinkMaxDiSaturated,,0 +parameterGroups,parameterGroup,crosslinkUseSeparateFasta,,False +parameterGroups,parameterGroup,crosslinkCleaveModifications,, +parameterGroups,parameterGroup,crosslinkFastaFiles,, +parameterGroups,parameterGroup,crosslinkMode,,PeptidesWithCleavedLinker +parameterGroups,parameterGroup,peakRefinement,,False +parameterGroups,parameterGroup,isobaricSumOverWindow,,True +msmsParamsArray,msmsParams,Name,,FTMS +msmsParamsArray,msmsParams,MatchTolerance,,20 +msmsParamsArray,msmsParams,MatchToleranceInPpm,,True +msmsParamsArray,msmsParams,DeisotopeTolerance,,7 +msmsParamsArray,msmsParams,DeisotopeToleranceInPpm,,True +msmsParamsArray,msmsParams,DeNovoTolerance,,10 +msmsParamsArray,msmsParams,DeNovoToleranceInPpm,,True +msmsParamsArray,msmsParams,Deisotope,,True +msmsParamsArray,msmsParams,Topx,,12 +msmsParamsArray,msmsParams,TopxInterval,,100 +msmsParamsArray,msmsParams,HigherCharges,,True +msmsParamsArray,msmsParams,IncludeWater,,True +msmsParamsArray,msmsParams,IncludeAmmonia,,True +msmsParamsArray,msmsParams,DependentLosses,,True +msmsParamsArray,msmsParams,Recalibration,,False +msmsParamsArray,msmsParams,Name,,ITMS +msmsParamsArray,msmsParams,MatchTolerance,,0.5 +msmsParamsArray,msmsParams,MatchToleranceInPpm,,False +msmsParamsArray,msmsParams,DeisotopeTolerance,,0.15 +msmsParamsArray,msmsParams,DeisotopeToleranceInPpm,,False +msmsParamsArray,msmsParams,DeNovoTolerance,,0.25 +msmsParamsArray,msmsParams,DeNovoToleranceInPpm,,False +msmsParamsArray,msmsParams,Deisotope,,False +msmsParamsArray,msmsParams,Topx,,8 +msmsParamsArray,msmsParams,TopxInterval,,100 +msmsParamsArray,msmsParams,HigherCharges,,True +msmsParamsArray,msmsParams,IncludeWater,,True +msmsParamsArray,msmsParams,IncludeAmmonia,,True +msmsParamsArray,msmsParams,DependentLosses,,True +msmsParamsArray,msmsParams,Recalibration,,False +msmsParamsArray,msmsParams,Name,,TOF +msmsParamsArray,msmsParams,MatchTolerance,,40 +msmsParamsArray,msmsParams,MatchToleranceInPpm,,True +msmsParamsArray,msmsParams,DeisotopeTolerance,,0.01 +msmsParamsArray,msmsParams,DeisotopeToleranceInPpm,,False +msmsParamsArray,msmsParams,DeNovoTolerance,,0.02 +msmsParamsArray,msmsParams,DeNovoToleranceInPpm,,False +msmsParamsArray,msmsParams,Deisotope,,True +msmsParamsArray,msmsParams,Topx,,10 +msmsParamsArray,msmsParams,TopxInterval,,100 +msmsParamsArray,msmsParams,HigherCharges,,True +msmsParamsArray,msmsParams,IncludeWater,,True +msmsParamsArray,msmsParams,IncludeAmmonia,,True +msmsParamsArray,msmsParams,DependentLosses,,True +msmsParamsArray,msmsParams,Recalibration,,False +msmsParamsArray,msmsParams,Name,,Unknown +msmsParamsArray,msmsParams,MatchTolerance,,0.5 +msmsParamsArray,msmsParams,MatchToleranceInPpm,,False +msmsParamsArray,msmsParams,DeisotopeTolerance,,0.15 +msmsParamsArray,msmsParams,DeisotopeToleranceInPpm,,False +msmsParamsArray,msmsParams,DeNovoTolerance,,0.25 +msmsParamsArray,msmsParams,DeNovoToleranceInPpm,,False +msmsParamsArray,msmsParams,Deisotope,,False +msmsParamsArray,msmsParams,Topx,,8 +msmsParamsArray,msmsParams,TopxInterval,,100 +msmsParamsArray,msmsParams,HigherCharges,,True +msmsParamsArray,msmsParams,IncludeWater,,True +msmsParamsArray,msmsParams,IncludeAmmonia,,True +msmsParamsArray,msmsParams,DependentLosses,,True +msmsParamsArray,msmsParams,Recalibration,,False +fragmentationParamsArray,fragmentationParams,Name,,CID +fragmentationParamsArray,fragmentationParams,Connected,,False +fragmentationParamsArray,fragmentationParams,ConnectedScore0,,1 +fragmentationParamsArray,fragmentationParams,ConnectedScore1,,1 +fragmentationParamsArray,fragmentationParams,ConnectedScore2,,1 +fragmentationParamsArray,fragmentationParams,InternalFragments,,False +fragmentationParamsArray,fragmentationParams,InternalFragmentWeight,,1 +fragmentationParamsArray,fragmentationParams,InternalFragmentAas,,KRH +fragmentationParamsArray,fragmentationParams,Name,,HCD +fragmentationParamsArray,fragmentationParams,Connected,,False +fragmentationParamsArray,fragmentationParams,ConnectedScore0,,1 +fragmentationParamsArray,fragmentationParams,ConnectedScore1,,1 +fragmentationParamsArray,fragmentationParams,ConnectedScore2,,1 +fragmentationParamsArray,fragmentationParams,InternalFragments,,False +fragmentationParamsArray,fragmentationParams,InternalFragmentWeight,,1 +fragmentationParamsArray,fragmentationParams,InternalFragmentAas,,KRH +fragmentationParamsArray,fragmentationParams,Name,,ETD +fragmentationParamsArray,fragmentationParams,Connected,,False +fragmentationParamsArray,fragmentationParams,ConnectedScore0,,1 +fragmentationParamsArray,fragmentationParams,ConnectedScore1,,1 +fragmentationParamsArray,fragmentationParams,ConnectedScore2,,1 +fragmentationParamsArray,fragmentationParams,InternalFragments,,False +fragmentationParamsArray,fragmentationParams,InternalFragmentWeight,,1 +fragmentationParamsArray,fragmentationParams,InternalFragmentAas,,KRH +fragmentationParamsArray,fragmentationParams,Name,,PQD +fragmentationParamsArray,fragmentationParams,Connected,,False +fragmentationParamsArray,fragmentationParams,ConnectedScore0,,1 +fragmentationParamsArray,fragmentationParams,ConnectedScore1,,1 +fragmentationParamsArray,fragmentationParams,ConnectedScore2,,1 +fragmentationParamsArray,fragmentationParams,InternalFragments,,False +fragmentationParamsArray,fragmentationParams,InternalFragmentWeight,,1 +fragmentationParamsArray,fragmentationParams,InternalFragmentAas,,KRH +fragmentationParamsArray,fragmentationParams,Name,,ETHCD +fragmentationParamsArray,fragmentationParams,Connected,,False +fragmentationParamsArray,fragmentationParams,ConnectedScore0,,1 +fragmentationParamsArray,fragmentationParams,ConnectedScore1,,1 +fragmentationParamsArray,fragmentationParams,ConnectedScore2,,1 +fragmentationParamsArray,fragmentationParams,InternalFragments,,False +fragmentationParamsArray,fragmentationParams,InternalFragmentWeight,,1 +fragmentationParamsArray,fragmentationParams,InternalFragmentAas,,KRH +fragmentationParamsArray,fragmentationParams,Name,,ETCID +fragmentationParamsArray,fragmentationParams,Connected,,False +fragmentationParamsArray,fragmentationParams,ConnectedScore0,,1 +fragmentationParamsArray,fragmentationParams,ConnectedScore1,,1 +fragmentationParamsArray,fragmentationParams,ConnectedScore2,,1 +fragmentationParamsArray,fragmentationParams,InternalFragments,,False +fragmentationParamsArray,fragmentationParams,InternalFragmentWeight,,1 +fragmentationParamsArray,fragmentationParams,InternalFragmentAas,,KRH +fragmentationParamsArray,fragmentationParams,Name,,UVPD +fragmentationParamsArray,fragmentationParams,Connected,,False +fragmentationParamsArray,fragmentationParams,ConnectedScore0,,1 +fragmentationParamsArray,fragmentationParams,ConnectedScore1,,1 +fragmentationParamsArray,fragmentationParams,ConnectedScore2,,1 +fragmentationParamsArray,fragmentationParams,InternalFragments,,False +fragmentationParamsArray,fragmentationParams,InternalFragmentWeight,,1 +fragmentationParamsArray,fragmentationParams,InternalFragmentAas,,KRH +fragmentationParamsArray,fragmentationParams,Name,,Unknown +fragmentationParamsArray,fragmentationParams,Connected,,False +fragmentationParamsArray,fragmentationParams,ConnectedScore0,,1 +fragmentationParamsArray,fragmentationParams,ConnectedScore1,,1 +fragmentationParamsArray,fragmentationParams,ConnectedScore2,,1 +fragmentationParamsArray,fragmentationParams,InternalFragments,,False +fragmentationParamsArray,fragmentationParams,InternalFragmentWeight,,1 +fragmentationParamsArray,fragmentationParams,InternalFragmentAas,,KRH diff --git a/test/params/mqpar_MQ2.1.3.0_noMBR.csv b/test/params/mqpar_MQ2.1.3.0_noMBR.csv new file mode 100644 index 00000000..73ea2763 --- /dev/null +++ b/test/params/mqpar_MQ2.1.3.0_noMBR.csv @@ -0,0 +1,453 @@ +,,,,run_identifier +fastaFiles,FastaFileInfo,fastaFilePath,,"/users/user/EuBIC benchmarking + project\MQ2130_noMBR\BenchmarkFASTAModule1_DDA_NOCONTA.fasta" +fastaFiles,FastaFileInfo,identifierParseRule,,>([^\s]*) +fastaFiles,FastaFileInfo,descriptionParseRule,,>(.*) +fastaFiles,FastaFileInfo,taxonomyParseRule,, +fastaFiles,FastaFileInfo,variationParseRule,, +fastaFiles,FastaFileInfo,modificationParseRule,, +fastaFiles,FastaFileInfo,taxonomyId,, +fastaFilesProteogenomics,,,, +fastaFilesFirstSearch,,,, +fixedSearchFolder,,,, +andromedaCacheSize,,,,350000 +advancedRatios,,,,True +pvalThres,,,,0.005 +rtShift,,,,False +separateLfq,,,,False +lfqStabilizeLargeRatios,,,,True +lfqRequireMsms,,,,True +lfqBayesQuant,,,,False +decoyMode,,,,revert +includeContaminants,,,,True +maxPeptideMass,,,,4600 +epsilonMutationScore,,,,True +mutatedPeptidesSeparately,,,,True +proteogenomicPeptidesSeparately,,,,True +minDeltaScoreUnmodifiedPeptides,,,,0 +minDeltaScoreModifiedPeptides,,,,6 +minScoreUnmodifiedPeptides,,,,0 +minScoreModifiedPeptides,,,,40 +secondPeptide,,,,True +matchBetweenRuns,,,,False +matchUnidentifiedFeatures,,,,False +matchBetweenRunsFdr,,,,False +dependentPeptides,,,,False +dependentPeptideFdr,,,,0 +dependentPeptideMassBin,,,,0 +dependentPeptidesBetweenRuns,,,,False +dependentPeptidesWithinExperiment,,,,False +dependentPeptidesWithinParameterGroup,,,,False +dependentPeptidesRestrictFractions,,,,False +dependentPeptidesFractionDifference,,,,0 +ibaq,,,,False +top3,,,,False +independentEnzymes,,,,False +useDeltaScore,,,,False +splitProteinGroupsByTaxonomy,,,,False +taxonomyLevel,,,,Species +avalon,,,,False +nModColumns,,,,3 +ibaqLogFit,,,,False +ibaqChargeNormalization,,,,False +razorProteinFdr,,,,True +deNovoSequencing,,,,False +deNovoVarMods,,,,False +deNovoCompleteSequence,,,,False +deNovoCalibratedMasses,,,,False +deNovoMaxIterations,,,,0 +deNovoProteaseReward,,,,0 +deNovoProteaseRewardTof,,,,0 +deNovoAgPenalty,,,,0 +deNovoGgPenalty,,,,0 +deNovoUseComplementScore,,,,True +deNovoUseProteaseScore,,,,True +deNovoUseWaterLossScore,,,,True +deNovoUseAmmoniaLossScore,,,,True +deNovoUseA2Score,,,,True +deNovoScalingFactor,,,,0 +massDifferenceSearch,,,,False +isotopeCalc,,,,False +minPepLen,,,,7 +psmFdrCrosslink,,,,0.01 +peptideFdr,,,,0.01 +proteinFdr,,,,0.01 +siteFdr,,,,0.01 +minPeptideLengthForUnspecificSearch,,,,8 +maxPeptideLengthForUnspecificSearch,,,,25 +useNormRatiosForOccupancy,,,,True +minPeptides,,,,1 +minRazorPeptides,,,,1 +minUniquePeptides,,,,0 +useCounterparts,,,,False +advancedSiteIntensities,,,,True +customProteinQuantification,,,,False +customProteinQuantificationFile,,,, +minRatioCount,,,,2 +restrictProteinQuantification,,,,True +restrictMods,string,,,Oxidation (M) +restrictMods,string,,,Acetyl (Protein N-term) +matchingTimeWindow,,,,0 +matchingIonMobilityWindow,,,,0 +alignmentTimeWindow,,,,0 +alignmentIonMobilityWindow,,,,0 +numberOfCandidatesMsms,,,,15 +compositionPrediction,,,,0 +quantMode,,,,1 +massDifferenceMods,,,, +mainSearchMaxCombinations,,,,200 +writeMsScansTable,,,,False +writeMsmsScansTable,,,,True +writePasefMsmsScansTable,,,,True +writeAccumulatedMsmsScansTable,,,,True +writeMs3ScansTable,,,,True +writeAllPeptidesTable,,,,True +writeMzRangeTable,,,,True +writeDiaFragmentTable,,,,False +writeDiaFragmentQuantTable,,,,False +writeMzTab,,,,False +writeSdrf,,,,False +disableMd5,,,,False +cacheBinInds,,,,True +etdIncludeB,,,,False +ms2PrecursorShift,,,,0 +complementaryIonPpm,,,,20 +variationParseRule,,,, +variationMode,,,,none +useSeriesReporters,,,,False +name,,,,session1 +maxQuantVersion,,,,2.1.3.0 +pluginFolder,,,, +numThreads,,,,4 +emailAddress,,,, +smtpHost,,,, +emailFromAddress,,,, +fixedCombinedFolder,,,, +fullMinMz,,,,-1.79769313486232E+308 +fullMaxMz,,,,1.79769313486232E+308 +sendEmail,,,,False +ionCountIntensities,,,,False +verboseColumnHeaders,,,,False +calcPeakProperties,,,,False +showCentroidMassDifferences,,,,False +showIsotopeMassDifferences,,,,False +useDotNetCore,,,,True +profilePerformance,,,,False +filePaths,string,,,"/users/user/EuBIC benchmarking + project\MQ2130_noMBR\LFQ_Orbitrap_DDA_Condition_A_Sample_Alpha_01.raw" +filePaths,string,,,"/users/user/EuBIC benchmarking + project\MQ2130_noMBR\LFQ_Orbitrap_DDA_Condition_A_Sample_Alpha_02.raw" +filePaths,string,,,"/users/user/EuBIC benchmarking + project\MQ2130_noMBR\LFQ_Orbitrap_DDA_Condition_A_Sample_Alpha_03.raw" +filePaths,string,,,"/users/user/EuBIC benchmarking + project\MQ2130_noMBR\LFQ_Orbitrap_DDA_Condition_B_Sample_Alpha_01.raw" +filePaths,string,,,"/users/user/EuBIC benchmarking + project\MQ2130_noMBR\LFQ_Orbitrap_DDA_Condition_B_Sample_Alpha_02.raw" +filePaths,string,,,"/users/user/EuBIC benchmarking + project\MQ2130_noMBR\LFQ_Orbitrap_DDA_Condition_B_Sample_Alpha_03.raw" +experiments,string,,,A_Sample_Alpha_01 +experiments,string,,,A_Sample_Alpha_02 +experiments,string,,,A_Sample_Alpha_03 +experiments,string,,,B_Sample_Alpha_01 +experiments,string,,,B_Sample_Alpha_02 +experiments,string,,,B_Sample_Alpha_03 +fractions,short,,,32767 +fractions,short,,,32767 +fractions,short,,,32767 +fractions,short,,,32767 +fractions,short,,,32767 +fractions,short,,,32767 +ptms,boolean,,,False +ptms,boolean,,,False +ptms,boolean,,,False +ptms,boolean,,,False +ptms,boolean,,,False +ptms,boolean,,,False +paramGroupIndices,int,,,0 +paramGroupIndices,int,,,0 +paramGroupIndices,int,,,0 +paramGroupIndices,int,,,0 +paramGroupIndices,int,,,0 +paramGroupIndices,int,,,0 +referenceChannel,string,,, +referenceChannel,string,,, +referenceChannel,string,,, +referenceChannel,string,,, +referenceChannel,string,,, +referenceChannel,string,,, +lfqTopNPeptides,,,,0 +diaJoinPrecChargesForLfq,,,,False +diaFragChargesForQuant,,,,1 +gridSpacing,,,,0.7 +proteinGroupingFile,,,, +parameterGroups,parameterGroup,msInstrument,,0 +parameterGroups,parameterGroup,maxCharge,,7 +parameterGroups,parameterGroup,minPeakLen,,2 +parameterGroups,parameterGroup,diaMinPeakLen,,1 +parameterGroups,parameterGroup,useMs1Centroids,,False +parameterGroups,parameterGroup,useMs2Centroids,,False +parameterGroups,parameterGroup,cutPeaks,,True +parameterGroups,parameterGroup,gapScans,,1 +parameterGroups,parameterGroup,minTime,,NaN +parameterGroups,parameterGroup,maxTime,,NaN +parameterGroups,parameterGroup,matchType,,MatchFromAndTo +parameterGroups,parameterGroup,intensityDetermination,,0 +parameterGroups,parameterGroup,centroidMatchTol,,8 +parameterGroups,parameterGroup,centroidMatchTolInPpm,,True +parameterGroups,parameterGroup,centroidHalfWidth,,35 +parameterGroups,parameterGroup,centroidHalfWidthInPpm,,True +parameterGroups,parameterGroup,valleyFactor,,1.4 +parameterGroups,parameterGroup,isotopeValleyFactor,,1.2 +parameterGroups,parameterGroup,advancedPeakSplitting,,False +parameterGroups,parameterGroup,intensityThresholdMs1,,0 +parameterGroups,parameterGroup,intensityThresholdMs2,,0 +parameterGroups,parameterGroup,labelMods,string, +parameterGroups,parameterGroup,lcmsRunType,,Standard +parameterGroups,parameterGroup,reQuantify,,False +parameterGroups,parameterGroup,lfqMode,,1 +parameterGroups,parameterGroup,lfqNormClusterSize,,80 +parameterGroups,parameterGroup,lfqMinEdgesPerNode,,3 +parameterGroups,parameterGroup,lfqAvEdgesPerNode,,6 +parameterGroups,parameterGroup,lfqMaxFeatures,,100000 +parameterGroups,parameterGroup,neucodeMaxPpm,,0 +parameterGroups,parameterGroup,neucodeResolution,,0 +parameterGroups,parameterGroup,neucodeResolutionInMda,,False +parameterGroups,parameterGroup,neucodeInSilicoLowRes,,False +parameterGroups,parameterGroup,fastLfq,,True +parameterGroups,parameterGroup,lfqRestrictFeatures,,False +parameterGroups,parameterGroup,lfqMinRatioCount,,2 +parameterGroups,parameterGroup,maxLabeledAa,,0 +parameterGroups,parameterGroup,maxNmods,,5 +parameterGroups,parameterGroup,maxMissedCleavages,,2 +parameterGroups,parameterGroup,multiplicity,,1 +parameterGroups,parameterGroup,enzymeMode,,0 +parameterGroups,parameterGroup,complementaryReporterType,,0 +parameterGroups,parameterGroup,reporterNormalization,,0 +parameterGroups,parameterGroup,neucodeIntensityMode,,0 +parameterGroups,parameterGroup,fixedModifications,string,Carbamidomethyl (C) +parameterGroups,parameterGroup,enzymes,string,Trypsin/P +parameterGroups,parameterGroup,enzymesFirstSearch,, +parameterGroups,parameterGroup,enzymeModeFirstSearch,,0 +parameterGroups,parameterGroup,useEnzymeFirstSearch,,False +parameterGroups,parameterGroup,useVariableModificationsFirstSearch,,False +parameterGroups,parameterGroup,variableModifications,string,Oxidation (M) +parameterGroups,parameterGroup,variableModifications,string,Acetyl (Protein N-term) +parameterGroups,parameterGroup,useMultiModification,,False +parameterGroups,parameterGroup,multiModifications,, +parameterGroups,parameterGroup,isobaricLabels,, +parameterGroups,parameterGroup,neucodeLabels,, +parameterGroups,parameterGroup,variableModificationsFirstSearch,, +parameterGroups,parameterGroup,hasAdditionalVariableModifications,,False +parameterGroups,parameterGroup,additionalVariableModifications,, +parameterGroups,parameterGroup,additionalVariableModificationProteins,, +parameterGroups,parameterGroup,doMassFiltering,,True +parameterGroups,parameterGroup,firstSearchTol,,20 +parameterGroups,parameterGroup,mainSearchTol,,4.5 +parameterGroups,parameterGroup,searchTolInPpm,,True +parameterGroups,parameterGroup,isotopeMatchTol,,2 +parameterGroups,parameterGroup,isotopeMatchTolInPpm,,True +parameterGroups,parameterGroup,isotopeTimeCorrelation,,0.6 +parameterGroups,parameterGroup,theorIsotopeCorrelation,,0.6 +parameterGroups,parameterGroup,checkMassDeficit,,True +parameterGroups,parameterGroup,recalibrationInPpm,,True +parameterGroups,parameterGroup,intensityDependentCalibration,,False +parameterGroups,parameterGroup,minScoreForCalibration,,70 +parameterGroups,parameterGroup,matchLibraryFile,,False +parameterGroups,parameterGroup,libraryFile,, +parameterGroups,parameterGroup,matchLibraryMassTolPpm,,0 +parameterGroups,parameterGroup,matchLibraryTimeTolMin,,0 +parameterGroups,parameterGroup,matchLabelTimeTolMin,,0 +parameterGroups,parameterGroup,reporterMassTolerance,,NaN +parameterGroups,parameterGroup,reporterPif,,NaN +parameterGroups,parameterGroup,filterPif,,False +parameterGroups,parameterGroup,reporterFraction,,NaN +parameterGroups,parameterGroup,reporterBasePeakRatio,,NaN +parameterGroups,parameterGroup,timsHalfWidth,,0 +parameterGroups,parameterGroup,timsStep,,0 +parameterGroups,parameterGroup,timsResolution,,0 +parameterGroups,parameterGroup,timsMinMsmsIntensity,,0 +parameterGroups,parameterGroup,timsRemovePrecursor,,True +parameterGroups,parameterGroup,timsIsobaricLabels,,False +parameterGroups,parameterGroup,timsCollapseMsms,,True +parameterGroups,parameterGroup,crossLinkingType,,0 +parameterGroups,parameterGroup,crossLinker,, +parameterGroups,parameterGroup,minMatchXl,,3 +parameterGroups,parameterGroup,minPairedPepLenXl,,6 +parameterGroups,parameterGroup,minScoreDipeptide,,40 +parameterGroups,parameterGroup,minScoreMonopeptide,,0 +parameterGroups,parameterGroup,minScorePartialCross,,10 +parameterGroups,parameterGroup,crosslinkOnlyIntraProtein,,False +parameterGroups,parameterGroup,crosslinkIntensityBasedPrecursor,,True +parameterGroups,parameterGroup,isHybridPrecDetermination,,False +parameterGroups,parameterGroup,topXcross,,3 +parameterGroups,parameterGroup,doesSeparateInterIntraProteinCross,,False +parameterGroups,parameterGroup,crosslinkMaxMonoUnsaturated,,0 +parameterGroups,parameterGroup,crosslinkMaxMonoSaturated,,0 +parameterGroups,parameterGroup,crosslinkMaxDiUnsaturated,,0 +parameterGroups,parameterGroup,crosslinkMaxDiSaturated,,0 +parameterGroups,parameterGroup,crosslinkModifications,, +parameterGroups,parameterGroup,crosslinkFastaFiles,, +parameterGroups,parameterGroup,crosslinkSites,, +parameterGroups,parameterGroup,crosslinkNetworkFiles,, +parameterGroups,parameterGroup,crosslinkMode,, +parameterGroups,parameterGroup,peakRefinement,,False +parameterGroups,parameterGroup,isobaricSumOverWindow,,True +parameterGroups,parameterGroup,isobaricWeightExponent,,0.75 +parameterGroups,parameterGroup,collapseMsmsOnIsotopePatterns,,False +parameterGroups,parameterGroup,diaLibraryType,,0 +parameterGroups,parameterGroup,diaLibraryPaths,, +parameterGroups,parameterGroup,diaPeptidePaths,, +parameterGroups,parameterGroup,diaEvidencePaths,, +parameterGroups,parameterGroup,diaMsmsPaths,, +parameterGroups,parameterGroup,diaInitialPrecMassTolPpm,,20 +parameterGroups,parameterGroup,diaInitialFragMassTolPpm,,20 +parameterGroups,parameterGroup,diaCorrThresholdFeatureClustering,,0.85 +parameterGroups,parameterGroup,diaPrecTolPpmFeatureClustering,,2 +parameterGroups,parameterGroup,diaFragTolPpmFeatureClustering,,2 +parameterGroups,parameterGroup,diaScoreN,,7 +parameterGroups,parameterGroup,diaMinScore,,1.99 +parameterGroups,parameterGroup,diaXgBoostBaseScore,,0.4 +parameterGroups,parameterGroup,diaXgBoostSubSample,,0.9 +parameterGroups,parameterGroup,centroidPosition,,0 +parameterGroups,parameterGroup,diaQuantMethod,,7 +parameterGroups,parameterGroup,diaFeatureQuantMethod,,2 +parameterGroups,parameterGroup,lfqNormType,,1 +parameterGroups,parameterGroup,diaTopNForQuant,,10 +parameterGroups,parameterGroup,diaMinMsmsIntensityForQuant,,0 +parameterGroups,parameterGroup,diaTopMsmsIntensityQuantileForQuant,,0.85 +parameterGroups,parameterGroup,diaPrecursorFilterType,,0 +parameterGroups,parameterGroup,diaMinFragmentOverlapScore,,1 +parameterGroups,parameterGroup,diaMinPrecursorScore,,0.5 +parameterGroups,parameterGroup,diaMinProfileCorrelation,,0 +parameterGroups,parameterGroup,diaXgBoostMinChildWeight,,9 +parameterGroups,parameterGroup,diaXgBoostMaximumTreeDepth,,12 +parameterGroups,parameterGroup,diaXgBoostEstimators,,580 +parameterGroups,parameterGroup,diaXgBoostGamma,,0.9 +parameterGroups,parameterGroup,diaXgBoostMaxDeltaStep,,3 +parameterGroups,parameterGroup,diaGlobalMl,,True +parameterGroups,parameterGroup,diaAdaptiveMassAccuracy,,False +parameterGroups,parameterGroup,diaMassWindowFactor,,3.3 +parameterGroups,parameterGroup,diaRtPrediction,,False +parameterGroups,parameterGroup,diaRtPredictionSecondRound,,False +parameterGroups,parameterGroup,diaNoMl,,False +parameterGroups,parameterGroup,diaPermuteRt,,False +parameterGroups,parameterGroup,diaPermuteCcs,,False +parameterGroups,parameterGroup,diaBackgroundSubtraction,,False +parameterGroups,parameterGroup,diaBackgroundSubtractionQuantile,,0.5 +parameterGroups,parameterGroup,diaBackgroundSubtractionFactor,,4 +parameterGroups,parameterGroup,diaLfqRatioType,,0 +parameterGroups,parameterGroup,diaTransferQvalue,,0.3 +parameterGroups,parameterGroup,diaOnlyIsosForRecal,,True +parameterGroups,parameterGroup,diaMinPeaksForRecal,,5 +parameterGroups,parameterGroup,diaUseFragIntensForMl,,False +parameterGroups,parameterGroup,diaUseFragMassesForMl,,False +parameterGroups,parameterGroup,diaMaxTrainInstances,,1000000 +parameterGroups,parameterGroup,diaMaxFragmentCharge,,3 +msmsParamsArray,msmsParams,Name,,FTMS +msmsParamsArray,msmsParams,MatchTolerance,,20 +msmsParamsArray,msmsParams,MatchToleranceInPpm,,True +msmsParamsArray,msmsParams,DeisotopeTolerance,,7 +msmsParamsArray,msmsParams,DeisotopeToleranceInPpm,,True +msmsParamsArray,msmsParams,DeNovoTolerance,,25 +msmsParamsArray,msmsParams,DeNovoToleranceInPpm,,True +msmsParamsArray,msmsParams,Deisotope,,True +msmsParamsArray,msmsParams,Topx,,12 +msmsParamsArray,msmsParams,TopxInterval,,100 +msmsParamsArray,msmsParams,HigherCharges,,True +msmsParamsArray,msmsParams,IncludeWater,,True +msmsParamsArray,msmsParams,IncludeAmmonia,,True +msmsParamsArray,msmsParams,IncludeWaterCross,,False +msmsParamsArray,msmsParams,IncludeAmmoniaCross,,False +msmsParamsArray,msmsParams,DependentLosses,,True +msmsParamsArray,msmsParams,Recalibration,,False +msmsParamsArray,msmsParams,Name,,ITMS +msmsParamsArray,msmsParams,MatchTolerance,,0.5 +msmsParamsArray,msmsParams,MatchToleranceInPpm,,False +msmsParamsArray,msmsParams,DeisotopeTolerance,,0.15 +msmsParamsArray,msmsParams,DeisotopeToleranceInPpm,,False +msmsParamsArray,msmsParams,DeNovoTolerance,,0.5 +msmsParamsArray,msmsParams,DeNovoToleranceInPpm,,False +msmsParamsArray,msmsParams,Deisotope,,False +msmsParamsArray,msmsParams,Topx,,8 +msmsParamsArray,msmsParams,TopxInterval,,100 +msmsParamsArray,msmsParams,HigherCharges,,True +msmsParamsArray,msmsParams,IncludeWater,,True +msmsParamsArray,msmsParams,IncludeAmmonia,,True +msmsParamsArray,msmsParams,IncludeWaterCross,,False +msmsParamsArray,msmsParams,IncludeAmmoniaCross,,False +msmsParamsArray,msmsParams,DependentLosses,,True +msmsParamsArray,msmsParams,Recalibration,,False +msmsParamsArray,msmsParams,Name,,TOF +msmsParamsArray,msmsParams,MatchTolerance,,25 +msmsParamsArray,msmsParams,MatchToleranceInPpm,,True +msmsParamsArray,msmsParams,DeisotopeTolerance,,0.01 +msmsParamsArray,msmsParams,DeisotopeToleranceInPpm,,False +msmsParamsArray,msmsParams,DeNovoTolerance,,25 +msmsParamsArray,msmsParams,DeNovoToleranceInPpm,,True +msmsParamsArray,msmsParams,Deisotope,,True +msmsParamsArray,msmsParams,Topx,,16 +msmsParamsArray,msmsParams,TopxInterval,,100 +msmsParamsArray,msmsParams,HigherCharges,,True +msmsParamsArray,msmsParams,IncludeWater,,True +msmsParamsArray,msmsParams,IncludeAmmonia,,True +msmsParamsArray,msmsParams,IncludeWaterCross,,False +msmsParamsArray,msmsParams,IncludeAmmoniaCross,,False +msmsParamsArray,msmsParams,DependentLosses,,True +msmsParamsArray,msmsParams,Recalibration,,False +msmsParamsArray,msmsParams,Name,,Unknown +msmsParamsArray,msmsParams,MatchTolerance,,20 +msmsParamsArray,msmsParams,MatchToleranceInPpm,,True +msmsParamsArray,msmsParams,DeisotopeTolerance,,7 +msmsParamsArray,msmsParams,DeisotopeToleranceInPpm,,True +msmsParamsArray,msmsParams,DeNovoTolerance,,25 +msmsParamsArray,msmsParams,DeNovoToleranceInPpm,,True +msmsParamsArray,msmsParams,Deisotope,,True +msmsParamsArray,msmsParams,Topx,,12 +msmsParamsArray,msmsParams,TopxInterval,,100 +msmsParamsArray,msmsParams,HigherCharges,,True +msmsParamsArray,msmsParams,IncludeWater,,True +msmsParamsArray,msmsParams,IncludeAmmonia,,True +msmsParamsArray,msmsParams,IncludeWaterCross,,False +msmsParamsArray,msmsParams,IncludeAmmoniaCross,,False +msmsParamsArray,msmsParams,DependentLosses,,True +msmsParamsArray,msmsParams,Recalibration,,False +fragmentationParamsArray,fragmentationParams,Name,,CID +fragmentationParamsArray,fragmentationParams,UseIntensityPrediction,,False +fragmentationParamsArray,fragmentationParams,InternalFragments,,False +fragmentationParamsArray,fragmentationParams,InternalFragmentWeight,,1 +fragmentationParamsArray,fragmentationParams,InternalFragmentAas,,KRH +fragmentationParamsArray,fragmentationParams,Name,,HCD +fragmentationParamsArray,fragmentationParams,UseIntensityPrediction,,False +fragmentationParamsArray,fragmentationParams,InternalFragments,,False +fragmentationParamsArray,fragmentationParams,InternalFragmentWeight,,1 +fragmentationParamsArray,fragmentationParams,InternalFragmentAas,,KRH +fragmentationParamsArray,fragmentationParams,Name,,ETD +fragmentationParamsArray,fragmentationParams,UseIntensityPrediction,,False +fragmentationParamsArray,fragmentationParams,InternalFragments,,False +fragmentationParamsArray,fragmentationParams,InternalFragmentWeight,,1 +fragmentationParamsArray,fragmentationParams,InternalFragmentAas,,KRH +fragmentationParamsArray,fragmentationParams,Name,,PQD +fragmentationParamsArray,fragmentationParams,UseIntensityPrediction,,False +fragmentationParamsArray,fragmentationParams,InternalFragments,,False +fragmentationParamsArray,fragmentationParams,InternalFragmentWeight,,1 +fragmentationParamsArray,fragmentationParams,InternalFragmentAas,,KRH +fragmentationParamsArray,fragmentationParams,Name,,ETHCD +fragmentationParamsArray,fragmentationParams,UseIntensityPrediction,,False +fragmentationParamsArray,fragmentationParams,InternalFragments,,False +fragmentationParamsArray,fragmentationParams,InternalFragmentWeight,,1 +fragmentationParamsArray,fragmentationParams,InternalFragmentAas,,KRH +fragmentationParamsArray,fragmentationParams,Name,,ETCID +fragmentationParamsArray,fragmentationParams,UseIntensityPrediction,,False +fragmentationParamsArray,fragmentationParams,InternalFragments,,False +fragmentationParamsArray,fragmentationParams,InternalFragmentWeight,,1 +fragmentationParamsArray,fragmentationParams,InternalFragmentAas,,KRH +fragmentationParamsArray,fragmentationParams,Name,,UVPD +fragmentationParamsArray,fragmentationParams,UseIntensityPrediction,,False +fragmentationParamsArray,fragmentationParams,InternalFragments,,False +fragmentationParamsArray,fragmentationParams,InternalFragmentWeight,,1 +fragmentationParamsArray,fragmentationParams,InternalFragmentAas,,KRH +fragmentationParamsArray,fragmentationParams,Name,,Unknown +fragmentationParamsArray,fragmentationParams,UseIntensityPrediction,,False +fragmentationParamsArray,fragmentationParams,InternalFragments,,False +fragmentationParamsArray,fragmentationParams,InternalFragmentWeight,,1 +fragmentationParamsArray,fragmentationParams,InternalFragmentAas,,KRH diff --git a/test/test_parse_params.py b/test/test_parse_params.py index 11b1e882..df3c2714 100644 --- a/test/test_parse_params.py +++ b/test/test_parse_params.py @@ -1,6 +1,8 @@ +import io import json from pathlib import Path +import pandas as pd import pytest import proteobench.io.params.maxquant as mq_params @@ -44,3 +46,64 @@ def test_file_reading(file, json_expected): dict_expected = json.loads(json_expected.read_text()) dict_actual = mq_params.read_file(file) assert dict_actual == dict_expected + + +parameters = [ + ({"k": "v"}, [(("k",), "v")]), + ({"k1": {"k2": "v1", "k3": "v2"}}, [(("k1", "k2"), "v1"), (("k1", "k3"), "v2")]), + ( + {"k1": {"k2": [{"k4": "v1"}, {"k4": "v2"}]}}, + [(("k1", "k2", "k4"), "v1"), (("k1", "k2", "k4"), "v2")], + ), + ( + {"k1": [{"k2": {"k4": "v1", "k5": "v2"}}, {"k2": {"k4": "v1", "k5": "v2"}}]}, + [ + (("k1", "k2", "k4"), "v1"), + (("k1", "k2", "k5"), "v2"), + (("k1", "k2", "k4"), "v1"), + (("k1", "k2", "k5"), "v2"), + ], + ), + ( + { + "restrictMods": [ + {"string": "Oxidation (M)"}, + {"string": "Acetyl (Protein N-term)"}, + ] + }, + [ + (("restrictMods", "string"), "Oxidation (M)"), + (("restrictMods", "string"), "Acetyl (Protein N-term)"), + ], + ), + ( + { + "variableModifications": { + "string": ["Oxidation (M)", "Acetyl (Protein N-term)"] + } + }, + [ + (("variableModifications", "string"), "Oxidation (M)"), + (("variableModifications", "string"), "Acetyl (Protein N-term)"), + ], + ), +] + + +@pytest.mark.parametrize("dict_in,list_expected", parameters) +def test_flatten_of_dicts(dict_in, list_expected): + actual = mq_params.flatten_dict_of_dicts(dict_in) + assert actual == list_expected + + +parameters = [(fname, Path(fname).with_suffix(".csv")) for fname in mq_paras] + + +@pytest.mark.parametrize("file,csv_expected", parameters) +def test_file_parsing_to_csv(file, csv_expected): + expected = pd.read_csv(csv_expected, index_col=[0, 1, 2, 3]) + actual = mq_params.read_file(file) + actual = mq_params.build_Series_from_records(actual, 4) + actual = actual.to_frame("run_identifier") + actual = pd.read_csv(io.StringIO(actual.to_csv()), index_col=[0, 1, 2, 3]) + assert actual.equals(expected) From be17263bee73dd279656fd547218f0c813c50062 Mon Sep 17 00:00:00 2001 From: Henry Date: Thu, 12 Oct 2023 21:10:56 +0200 Subject: [PATCH 05/15] :art: format merged file --- .../modules/dda_quant/parse_settings.py | 43 +++++++++++-------- 1 file changed, 24 insertions(+), 19 deletions(-) diff --git a/proteobench/modules/dda_quant/parse_settings.py b/proteobench/modules/dda_quant/parse_settings.py index c78de05f..7129aaf8 100644 --- a/proteobench/modules/dda_quant/parse_settings.py +++ b/proteobench/modules/dda_quant/parse_settings.py @@ -14,25 +14,28 @@ MapSettingFiles: dict[str, Path] -PARSE_SETTINGS_FILES = { "WOMBAT" : os.path.join(PARSE_SETTINGS_DIR, 'parse_settings_wombat.toml'), - "MaxQuant" : os.path.join(PARSE_SETTINGS_DIR, 'parse_settings_maxquant.toml'), - "MSFragger" : os.path.join(PARSE_SETTINGS_DIR, 'parse_settings_msfragger.toml'), - "Proline" : os.path.join(PARSE_SETTINGS_DIR, 'parse_settings_proline.toml'), - "AlphaPept" : os.path.join(PARSE_SETTINGS_DIR, 'parse_settings_alphapept.toml'), - "Sage" : os.path.join(PARSE_SETTINGS_DIR, 'parse_settings_sage.toml'), - "Custom" : os.path.join(PARSE_SETTINGS_DIR, 'parse_settings_custom.toml') - } - -PARSE_SETTINGS_FILES_MODULE = os.path.join(PARSE_SETTINGS_DIR, 'module_settings.toml') +PARSE_SETTINGS_FILES = { + "WOMBAT": os.path.join(PARSE_SETTINGS_DIR, "parse_settings_wombat.toml"), + "MaxQuant": os.path.join(PARSE_SETTINGS_DIR, "parse_settings_maxquant.toml"), + "MSFragger": os.path.join(PARSE_SETTINGS_DIR, "parse_settings_msfragger.toml"), + "Proline": os.path.join(PARSE_SETTINGS_DIR, "parse_settings_proline.toml"), + "AlphaPept": os.path.join(PARSE_SETTINGS_DIR, "parse_settings_alphapept.toml"), + "Sage": os.path.join(PARSE_SETTINGS_DIR, "parse_settings_sage.toml"), + "Custom": os.path.join(PARSE_SETTINGS_DIR, "parse_settings_custom.toml"), +} + +PARSE_SETTINGS_FILES_MODULE = os.path.join(PARSE_SETTINGS_DIR, "module_settings.toml") # ! Could be created from keys of PARSE_SETTINGS_FILES -INPUT_FORMATS = ("MaxQuant", - "AlphaPept", - "MSFragger", - "Proline", - "WOMBAT", - "Sage", - "Custom") +INPUT_FORMATS = ( + "MaxQuant", + "AlphaPept", + "MSFragger", + "Proline", + "WOMBAT", + "Sage", + "Custom", +) LOCAL_DEVELOPMENT = False @@ -46,6 +49,7 @@ DDA_QUANT_RESULTS_REPO = "https://github.com/Proteobench/Results_Module2_quant_DDA.git" + class ParseSettings: """Structure that contains all the parameters used to parse the given database search output.""" @@ -61,9 +65,10 @@ def __init__(self, input_format: str): self.contaminant_flag = parse_settings["general"]["contaminant_flag"] parse_settings_module = toml.load(PARSE_SETTINGS_FILES_MODULE) - self.min_count_multispec = parse_settings_module["general"]["min_count_multispec"] + self.min_count_multispec = parse_settings_module["general"][ + "min_count_multispec" + ] self.species_expected_ratio = parse_settings_module["species_expected_ratio"] - def parse_settings(input_format: str) -> Settings: From fcce49ff61eda45bda0c1655a3635c192af359a5 Mon Sep 17 00:00:00 2001 From: Henry Date: Sat, 21 Oct 2023 15:16:49 +0200 Subject: [PATCH 06/15] :sparkles: Parse MSFragger parameter files --- proteobench/io/params/fragger.py | 60 ++++++++ test/params/fragger.csv | 105 +++++++++++++ test/params/fragger.params | 139 ++++++++++++++++++ test/test_parse_params_fragger.py | 18 +++ ...arams.py => test_parse_params_maxquant.py} | 0 5 files changed, 322 insertions(+) create mode 100644 proteobench/io/params/fragger.py create mode 100644 test/params/fragger.csv create mode 100644 test/params/fragger.params create mode 100644 test/test_parse_params_fragger.py rename test/{test_parse_params.py => test_parse_params_maxquant.py} (100%) diff --git a/proteobench/io/params/fragger.py b/proteobench/io/params/fragger.py new file mode 100644 index 00000000..14530763 --- /dev/null +++ b/proteobench/io/params/fragger.py @@ -0,0 +1,60 @@ +"""Functionality to parse MSFragger fragger.params parameter files. + +MSFragger has a text based paramter file format which +separates paramters and their value using an equal sign. Optional comments are +expressed with a hash sign. +""" +from __future__ import annotations + +import logging +from collections import namedtuple + +logger = logging.getLogger(__name__) + +Parameter = namedtuple("Parameter", ["name", "value", "comment"]) + + +def read_file(file: str) -> list[Parameter]: + """Read MSFragger parameter file as list of records.""" + with open(file) as f: + data = [] + for line in f: + line = line.strip() + logger.debug(line) + # ! logic below also allows to keep the comments as comments + if line.startswith("#"): + continue + if not line: + continue + if "#" in line: + res = line.split("#") + if len(res) == 1: + comment = res[0] + data.append(Parameter(None, None, comment.strip())) + continue + param, comment = [x.strip() for x in res] + else: + param = line + comment = None + res = param.strip().split(" = ") + if len(res) == 1: + param = res[0].strip() + data.append(Parameter(param, None, comment)) + continue + param, value = [x.strip() for x in res] + data.append(Parameter(param, value, comment)) + return data + + +if __name__ == "__main__": + import pathlib + + import pandas as pd + + file = pathlib.Path("../../../test/params/fragger.params") + data = read_file(file) + df = pd.DataFrame.from_records(data, columns=Parameter._fields).set_index( + Parameter._fields[0] + ) + df + df.to_csv(file.with_suffix(".csv")) diff --git a/test/params/fragger.csv b/test/params/fragger.csv new file mode 100644 index 00000000..f325babb --- /dev/null +++ b/test/params/fragger.csv @@ -0,0 +1,105 @@ +name,value,comment +database_name,Q:\MISC_PERSONAL\Bart\Benchmark_experiment_EuBIC\Shared\2023-01-30-decoys-BenchmarkFASTAModule1_DDA.fasta.fas,Path to the protein database file in FASTA format. +num_threads,47,Number of CPU threads to use. +precursor_mass_lower,-10,Lower bound of the precursor mass window. +precursor_mass_upper,10,Upper bound of the precursor mass window. +precursor_mass_units,1,"Precursor mass tolerance units (0 for Da, 1 for ppm)." +data_type,0,"Data type (0 for DDA, 1 for DIA, 2 for gas-phase fractionation DIA)." +precursor_true_tolerance,20,True precursor mass tolerance (window is +/- this value). +precursor_true_units,1,"True precursor mass tolerance units (0 for Da, 1 for ppm)." +fragment_mass_tolerance,20,Fragment mass tolerance (window is +/- this value). +fragment_mass_units,1,"Fragment mass tolerance units (0 for Da, 1 for ppm)." +calibrate_mass,2,"Perform mass calibration (0 for OFF, 1 for ON, 2 for ON and find optimal parameters)." +use_all_mods_in_first_search,0,"Use all variable modifications in first search (0 for No, 1 for Yes)." +decoy_prefix,rev_,Prefix of the decoy protein entries. Used for parameter optimization only. +deisotope,1,"Perform deisotoping or not (0=no, 1=yes and assume singleton peaks single charged, 2=yes and assume singleton peaks single or double charged)." +deneutralloss,1,"Perform deneutrallossing or not (0=no, 1=yes)." +isotope_error,0/1/2,Also search for MS/MS events triggered on specified isotopic peaks. +mass_offsets,0.0,Creates multiple precursor tolerance windows with specified mass offsets. +precursor_mass_mode,selected,One of isolated/selected/corrected. +remove_precursor_peak,1,Remove precursor peaks from tandem mass spectra. 0 = not remove; 1 = remove the peak with precursor charge; 2 = remove the peaks with all charge states (only for DDA mode). +remove_precursor_range,"-1.500000,1.500000",m/z range in removing precursor peaks. Only for DDA mode. Unit: Th. +intensity_transform,0,Transform peaks intensities with sqrt root. 0 = not transform; 1 = transform using sqrt root. +activation_types,all,"Filter to only search scans of provided activation type(s). Allowed: All, HCD, CID, ETD, ECD." +write_calibrated_mzml,0,"Write calibrated MS2 scan to a mzML file (0 for No, 1 for Yes)." +write_uncalibrated_mgf,0,"Write uncalibrated MS2 scan to a MGF file (0 for No, 1 for Yes). Only for .raw and .d formats." +mass_diff_to_variable_mod,0,Put mass diff as a variable modification. 0 for no; 1 for yes and remove delta mass; 2 for yes and keep delta mass. +localize_delta_mass,0,"Include fragment ions mass-shifted by unknown modifications (recommended for open and mass offset searches) (0 for OFF, 1 for ON)." +delta_mass_exclude_ranges,"(-1.5,3.5)",Exclude mass range for shifted ions searching. +fragment_ion_series,"b,y","Ion series used in search, specify any of a,b,c,x,y,z,Y,b-18,y-18 (comma separated)." +ion_series_definitions =,,"User defined ion series. Example: ""b* N -17.026548;b0 N -18.010565""." +labile_search_mode,off,"type of search (nglycan, labile, or off). Off means non-labile/typical search." +restrict_deltamass_to,all,"Specify amino acids on which delta masses (mass offsets or search modifications) can occur. Allowed values are single letter codes (e.g. ACD) and '-', must be capitalized. Use 'all' to allow any amino acid." +diagnostic_intensity_filter,0,[nglycan/labile search_mode only]. Minimum relative intensity for SUM of all detected oxonium ions to achieve for spectrum to contain diagnostic fragment evidence. Calculated relative to spectrum base peak. 0 <= value. +Y_type_masses =,,[nglycan/labile search_mode only]. Specify fragments of labile mods that are commonly retained on intact peptides (e.g. Y ions for glycans). Only used if 'Y' is included in fragment_ion_series. +diagnostic_fragments =,,[nglycan/labile search_mode only]. Specify diagnostic fragments of labile mods that appear in the low m/z region. Only used if diagnostic_intensity_filter > 0. +remainder_fragment_masses =,,[labile search_mode only] List of possible remainder fragment ions to consider. Remainder masses are partial modification masses left on b/y ions after fragmentation. +search_enzyme_name_1,stricttrypsin,Name of the first enzyme. +search_enzyme_cut_1,KR,First enzyme's cutting amino acid. +search_enzyme_nocut_1 =,,First enzyme's protecting amino acid. +search_enzyme_sense_1,C,First enzyme's cutting terminal. +allowed_missed_cleavage_1,2,First enzyme's allowed number of missed cleavages per peptide. Maximum value is 5. +search_enzyme_name_2,null,Name of the second enzyme. +search_enzyme_cut_2 =,,Second enzyme's cutting amino acid. +search_enzyme_nocut_2 =,,Second enzyme's protecting amino acid. +search_enzyme_sense_2,C,Second enzyme's cutting terminal. +allowed_missed_cleavage_2,2,Second enzyme's allowed number of missed cleavages per peptide. Maximum value is 5. +num_enzyme_termini,2,"0 for non-enzymatic, 1 for semi-enzymatic, and 2 for fully-enzymatic." +clip_nTerm_M,1,Specifies the trimming of a protein N-terminal methionine as a variable modification (0 or 1). +variable_mod_01,15.9949 M 3, +variable_mod_02,42.0106 [^ 1, +allow_multiple_variable_mods_on_residue,0, +max_variable_mods_per_peptide,3,Maximum total number of variable modifications per peptide. +max_variable_mods_combinations,5000,Maximum number of modified forms allowed for each peptide (up to 65534). +output_format,tsv_pepXML_pin,"File format of output files (tsv, pin, pepxml, tsv_pin, tsv_pepxml, pepxml_pin, or tsv_pepxml_pin)." +output_report_topN,1,Reports top N PSMs per input spectrum. +output_max_expect,50,Suppresses reporting of PSM if top hit has expectation value greater than this threshold. +report_alternative_proteins,1,"Report alternative proteins for peptides that are found in multiple proteins (0 for no, 1 for yes)." +precursor_charge,1 4,Assumed range of potential precursor charge states. Only relevant when override_charge is set to 1. +override_charge,0,Ignores precursor charge and uses charge state specified in precursor_charge range (0 or 1). +digest_min_length,7,Minimum length of peptides to be generated during in-silico digestion. +digest_max_length,50,Maximum length of peptides to be generated during in-silico digestion. +digest_mass_range,500.0 5000.0,Mass range of peptides to be generated during in-silico digestion in Daltons. +max_fragment_charge,2,Maximum charge state for theoretical fragments to match (1-4). +track_zero_topN,0,Track top N unmodified peptide results separately from main results internally for boosting features. +zero_bin_accept_expect,0,Ranks a zero-bin hit above all non-zero-bin hit if it has expectation less than this value. +zero_bin_mult_expect,1,Multiplies expect value of PSMs in the zero-bin during results ordering (set to less than 1 for boosting). +add_topN_complementary,0,Inserts complementary ions corresponding to the top N most intense fragments in each experimental spectra. +check_spectral_files,1,Checking spectral files before searching. +minimum_peaks,15,Minimum number of peaks in experimental spectrum for matching. +use_topN_peaks,150,Pre-process experimental spectrum to only use top N peaks. +min_fragments_modelling,2,Minimum number of matched peaks in PSM for inclusion in statistical modeling. +min_matched_fragments,4,Minimum number of matched peaks for PSM to be reported. +min_sequence_matches,2,[nglycan/labile search_mode only] Minimum number of sequence-specific (not Y) ions to record a match. +minimum_ratio,0.01,Filters out all peaks in experimental spectrum less intense than this multiple of the base peak intensity. +clear_mz_range,0.0 0.0,Removes peaks in this m/z range prior to matching. +add_Cterm_peptide,0.0, +add_Nterm_peptide,0.0, +add_Cterm_protein,0.0, +add_Nterm_protein,0.0, +add_G_glycine,0.0, +add_A_alanine,0.0, +add_S_serine,0.0, +add_P_proline,0.0, +add_V_valine,0.0, +add_T_threonine,0.0, +add_C_cysteine,57.02146, +add_L_leucine,0.0, +add_I_isoleucine,0.0, +add_N_asparagine,0.0, +add_D_aspartic_acid,0.0, +add_Q_glutamine,0.0, +add_K_lysine,0.0, +add_E_glutamic_acid,0.0, +add_M_methionine,0.0, +add_H_histidine,0.0, +add_F_phenylalanine,0.0, +add_R_arginine,0.0, +add_Y_tyrosine,0.0, +add_W_tryptophan,0.0, +add_B_user_amino_acid,0.0, +add_J_user_amino_acid,0.0, +add_O_user_amino_acid,0.0, +add_U_user_amino_acid,0.0, +add_X_user_amino_acid,0.0, +add_Z_user_amino_acid,0.0, diff --git a/test/params/fragger.params b/test/params/fragger.params new file mode 100644 index 00000000..63da05e9 --- /dev/null +++ b/test/params/fragger.params @@ -0,0 +1,139 @@ +database_name = Q:\MISC_PERSONAL\Bart\Benchmark_experiment_EuBIC\Shared\2023-01-30-decoys-BenchmarkFASTAModule1_DDA.fasta.fas # Path to the protein database file in FASTA format. +num_threads = 47 # Number of CPU threads to use. + +precursor_mass_lower = -10 # Lower bound of the precursor mass window. +precursor_mass_upper = 10 # Upper bound of the precursor mass window. +precursor_mass_units = 1 # Precursor mass tolerance units (0 for Da, 1 for ppm). +data_type = 0 # Data type (0 for DDA, 1 for DIA, 2 for gas-phase fractionation DIA). +precursor_true_tolerance = 20 # True precursor mass tolerance (window is +/- this value). +precursor_true_units = 1 # True precursor mass tolerance units (0 for Da, 1 for ppm). +fragment_mass_tolerance = 20 # Fragment mass tolerance (window is +/- this value). +fragment_mass_units = 1 # Fragment mass tolerance units (0 for Da, 1 for ppm). +calibrate_mass = 2 # Perform mass calibration (0 for OFF, 1 for ON, 2 for ON and find optimal parameters). +use_all_mods_in_first_search = 0 # Use all variable modifications in first search (0 for No, 1 for Yes). +decoy_prefix = rev_ # Prefix of the decoy protein entries. Used for parameter optimization only. + +deisotope = 1 # Perform deisotoping or not (0=no, 1=yes and assume singleton peaks single charged, 2=yes and assume singleton peaks single or double charged). +deneutralloss = 1 # Perform deneutrallossing or not (0=no, 1=yes). +isotope_error = 0/1/2 # Also search for MS/MS events triggered on specified isotopic peaks. +mass_offsets = 0.0 # Creates multiple precursor tolerance windows with specified mass offsets. +precursor_mass_mode = selected # One of isolated/selected/corrected. + +remove_precursor_peak = 1 # Remove precursor peaks from tandem mass spectra. 0 = not remove; 1 = remove the peak with precursor charge; 2 = remove the peaks with all charge states (only for DDA mode). +remove_precursor_range = -1.500000,1.500000 # m/z range in removing precursor peaks. Only for DDA mode. Unit: Th. +intensity_transform = 0 # Transform peaks intensities with sqrt root. 0 = not transform; 1 = transform using sqrt root. +activation_types = all # Filter to only search scans of provided activation type(s). Allowed: All, HCD, CID, ETD, ECD. + +write_calibrated_mzml = 0 # Write calibrated MS2 scan to a mzML file (0 for No, 1 for Yes). +write_uncalibrated_mgf = 0 # Write uncalibrated MS2 scan to a MGF file (0 for No, 1 for Yes). Only for .raw and .d formats. +mass_diff_to_variable_mod = 0 # Put mass diff as a variable modification. 0 for no; 1 for yes and remove delta mass; 2 for yes and keep delta mass. + +localize_delta_mass = 0 # Include fragment ions mass-shifted by unknown modifications (recommended for open and mass offset searches) (0 for OFF, 1 for ON). +delta_mass_exclude_ranges = (-1.5,3.5) # Exclude mass range for shifted ions searching. +fragment_ion_series = b,y # Ion series used in search, specify any of a,b,c,x,y,z,Y,b-18,y-18 (comma separated). +ion_series_definitions = # User defined ion series. Example: "b* N -17.026548;b0 N -18.010565". + +labile_search_mode = off # type of search (nglycan, labile, or off). Off means non-labile/typical search. +restrict_deltamass_to = all # Specify amino acids on which delta masses (mass offsets or search modifications) can occur. Allowed values are single letter codes (e.g. ACD) and '-', must be capitalized. Use 'all' to allow any amino acid. +diagnostic_intensity_filter = 0 # [nglycan/labile search_mode only]. Minimum relative intensity for SUM of all detected oxonium ions to achieve for spectrum to contain diagnostic fragment evidence. Calculated relative to spectrum base peak. 0 <= value. +Y_type_masses = # [nglycan/labile search_mode only]. Specify fragments of labile mods that are commonly retained on intact peptides (e.g. Y ions for glycans). Only used if 'Y' is included in fragment_ion_series. +diagnostic_fragments = # [nglycan/labile search_mode only]. Specify diagnostic fragments of labile mods that appear in the low m/z region. Only used if diagnostic_intensity_filter > 0. +remainder_fragment_masses = # [labile search_mode only] List of possible remainder fragment ions to consider. Remainder masses are partial modification masses left on b/y ions after fragmentation. + +search_enzyme_name_1 = stricttrypsin # Name of the first enzyme. +search_enzyme_cut_1 = KR # First enzyme's cutting amino acid. +search_enzyme_nocut_1 = # First enzyme's protecting amino acid. +search_enzyme_sense_1 = C # First enzyme's cutting terminal. +allowed_missed_cleavage_1 = 2 # First enzyme's allowed number of missed cleavages per peptide. Maximum value is 5. + +search_enzyme_name_2 = null # Name of the second enzyme. +search_enzyme_cut_2 = # Second enzyme's cutting amino acid. +search_enzyme_nocut_2 = # Second enzyme's protecting amino acid. +search_enzyme_sense_2 = C # Second enzyme's cutting terminal. +allowed_missed_cleavage_2 = 2 # Second enzyme's allowed number of missed cleavages per peptide. Maximum value is 5. + +num_enzyme_termini = 2 # 0 for non-enzymatic, 1 for semi-enzymatic, and 2 for fully-enzymatic. + +clip_nTerm_M = 1 # Specifies the trimming of a protein N-terminal methionine as a variable modification (0 or 1). + +# maximum of 16 mods - amino acid codes, * for any amino acid, [ and ] specifies protein termini, n and c specifies peptide termini +variable_mod_01 = 15.9949 M 3 +variable_mod_02 = 42.0106 [^ 1 +# variable_mod_03 = 79.96633 STY 3 +# variable_mod_04 = -17.0265 nQnC 1 +# variable_mod_05 = -18.0106 nE 1 +# variable_mod_06 = 4.025107 K 2 +# variable_mod_07 = 6.020129 R 2 +# variable_mod_08 = 8.014199 K 2 +# variable_mod_09 = 10.008269 R 2 +# variable_mod_10 = 0.0 site_10 1 +# variable_mod_11 = 0.0 site_11 1 +# variable_mod_12 = 0.0 site_12 1 +# variable_mod_13 = 0.0 site_13 1 +# variable_mod_14 = 0.0 site_14 1 +# variable_mod_15 = 0.0 site_15 1 +# variable_mod_16 = 0.0 site_16 1 + +allow_multiple_variable_mods_on_residue = 0 +max_variable_mods_per_peptide = 3 # Maximum total number of variable modifications per peptide. +max_variable_mods_combinations = 5000 # Maximum number of modified forms allowed for each peptide (up to 65534). + +output_format = tsv_pepXML_pin # File format of output files (tsv, pin, pepxml, tsv_pin, tsv_pepxml, pepxml_pin, or tsv_pepxml_pin). +output_report_topN = 1 # Reports top N PSMs per input spectrum. +output_max_expect = 50 # Suppresses reporting of PSM if top hit has expectation value greater than this threshold. +report_alternative_proteins = 1 # Report alternative proteins for peptides that are found in multiple proteins (0 for no, 1 for yes). + +precursor_charge = 1 4 # Assumed range of potential precursor charge states. Only relevant when override_charge is set to 1. +override_charge = 0 # Ignores precursor charge and uses charge state specified in precursor_charge range (0 or 1). + +digest_min_length = 7 # Minimum length of peptides to be generated during in-silico digestion. +digest_max_length = 50 # Maximum length of peptides to be generated during in-silico digestion. +digest_mass_range = 500.0 5000.0 # Mass range of peptides to be generated during in-silico digestion in Daltons. +max_fragment_charge = 2 # Maximum charge state for theoretical fragments to match (1-4). + +track_zero_topN = 0 # Track top N unmodified peptide results separately from main results internally for boosting features. +zero_bin_accept_expect = 0 # Ranks a zero-bin hit above all non-zero-bin hit if it has expectation less than this value. +zero_bin_mult_expect = 1 # Multiplies expect value of PSMs in the zero-bin during results ordering (set to less than 1 for boosting). +add_topN_complementary = 0 # Inserts complementary ions corresponding to the top N most intense fragments in each experimental spectra. + +check_spectral_files = 1 # Checking spectral files before searching. +minimum_peaks = 15 # Minimum number of peaks in experimental spectrum for matching. +use_topN_peaks = 150 # Pre-process experimental spectrum to only use top N peaks. +min_fragments_modelling = 2 # Minimum number of matched peaks in PSM for inclusion in statistical modeling. +min_matched_fragments = 4 # Minimum number of matched peaks for PSM to be reported. +min_sequence_matches = 2 # [nglycan/labile search_mode only] Minimum number of sequence-specific (not Y) ions to record a match. +minimum_ratio = 0.01 # Filters out all peaks in experimental spectrum less intense than this multiple of the base peak intensity. +clear_mz_range = 0.0 0.0 # Removes peaks in this m/z range prior to matching. + +add_Cterm_peptide = 0.0 +add_Nterm_peptide = 0.0 +add_Cterm_protein = 0.0 +add_Nterm_protein = 0.0 + +add_G_glycine = 0.0 +add_A_alanine = 0.0 +add_S_serine = 0.0 +add_P_proline = 0.0 +add_V_valine = 0.0 +add_T_threonine = 0.0 +add_C_cysteine = 57.02146 +add_L_leucine = 0.0 +add_I_isoleucine = 0.0 +add_N_asparagine = 0.0 +add_D_aspartic_acid = 0.0 +add_Q_glutamine = 0.0 +add_K_lysine = 0.0 +add_E_glutamic_acid = 0.0 +add_M_methionine = 0.0 +add_H_histidine = 0.0 +add_F_phenylalanine = 0.0 +add_R_arginine = 0.0 +add_Y_tyrosine = 0.0 +add_W_tryptophan = 0.0 +add_B_user_amino_acid = 0.0 +add_J_user_amino_acid = 0.0 +add_O_user_amino_acid = 0.0 +add_U_user_amino_acid = 0.0 +add_X_user_amino_acid = 0.0 +add_Z_user_amino_acid = 0.0 + diff --git a/test/test_parse_params_fragger.py b/test/test_parse_params_fragger.py new file mode 100644 index 00000000..a06f1fe1 --- /dev/null +++ b/test/test_parse_params_fragger.py @@ -0,0 +1,18 @@ +from pathlib import Path + +import pandas as pd + +import proteobench.io.params.fragger as fragger_params + +TESTDATA_DIR = Path(__file__).parent / "params" + + +def test_read_file(): + file = TESTDATA_DIR / "fragger.params" + csv_expected = TESTDATA_DIR / "fragger.csv" + expected = pd.read_csv(csv_expected) + data = fragger_params.read_file(file) + actual = pd.DataFrame.from_records( + data, columns=(fragger_params.Parameter._fields) + ).set_index(fragger_params.Parameter._fields[0]) + actual.equals(expected) diff --git a/test/test_parse_params.py b/test/test_parse_params_maxquant.py similarity index 100% rename from test/test_parse_params.py rename to test/test_parse_params_maxquant.py From dc1233e40d733c1f82af1ccf892cf91689c8af16 Mon Sep 17 00:00:00 2001 From: Henry Date: Sat, 21 Oct 2023 17:34:08 +0200 Subject: [PATCH 07/15] :sparkles: Proline params + mapping params to our names - new dependency for excel file reading - three excel sheets contain information needed --- proteobench/io/params/__init__.py | 30 ++++++ proteobench/io/params/proline.py | 97 ++++++++++++++++++ pyproject.toml | 1 + ...roline_example_w_Mascot_wo_proteinSets.csv | 19 ++++ ...oline_example_w_Mascot_wo_proteinSets.xlsx | Bin 0 -> 13687 bytes test/test_parse_params_proline.py | 36 +++++++ 6 files changed, 183 insertions(+) create mode 100644 proteobench/io/params/__init__.py create mode 100644 proteobench/io/params/proline.py create mode 100644 test/params/Proline_example_w_Mascot_wo_proteinSets.csv create mode 100644 test/params/Proline_example_w_Mascot_wo_proteinSets.xlsx create mode 100644 test/test_parse_params_proline.py diff --git a/proteobench/io/params/__init__.py b/proteobench/io/params/__init__.py new file mode 100644 index 00000000..941d7077 --- /dev/null +++ b/proteobench/io/params/__init__.py @@ -0,0 +1,30 @@ +from dataclasses import dataclass +from typing import Optional + + +@dataclass +class ProteoBenchParameters: + search_engine: Optional[str] = None + software_version: Optional[str] = None + fdr_psm: Optional[str] = None + fdr_peptide: Optional[str] = None + fdr_protein: Optional[str] = None + MBR: Optional[str] = None + precursor_tol: Optional[str] = None + precursor_tol_unit: Optional[str] = None + fragment_tol: Optional[str] = None + fragment_tol_unit: Optional[str] = None + enzyme_name: Optional[str] = None + missed_cleavages: Optional[str] = None + min_pep_length: Optional[str] = None + max_pep_length: Optional[str] = None + fixed_modifications: Optional[str] = None + variable_modifications: Optional[str] = None + max_num_modifications: Optional[str] = None + precursor_charge: Optional[str] = None + + +# params = ProteoBenchParameters() +# params.search_engine = "MaxQuant" +# params +# %% diff --git a/proteobench/io/params/proline.py b/proteobench/io/params/proline.py new file mode 100644 index 00000000..877e4897 --- /dev/null +++ b/proteobench/io/params/proline.py @@ -0,0 +1,97 @@ +"""Proline is a quantification tool. Search engine is often Mascot. + +The parameters are provided per raw file in separate sheets of an excel file. + +Relevant information in sheets: +- "Search settings and infos", +- "Import and filters" +- "Quant config" +""" +import pathlib +import re + +import pandas as pd + +from proteobench.io.params import ProteoBenchParameters + +use_columns = { + "Search settings and infos": [ + "software_name", + "software_version", + "enzymes", + "max_missed_cleavages", + "fixed_ptms", + "variable_ptms", + "peptide_charge_states", + "peptide_mass_error_tolerance", + "fragment_mass_error_tolerance", + ], + "Import and filters": [ + "psm_filter_expected_fdr", + "psm_filter_2", + ], + "Quant config": [], +} + +PATTERN_MIN_PEP_LENGTH = r"\[threshold_value=([0-9].*)\]" + + +def find_min_pep_length(string): + min_length = re.findall(PATTERN_MIN_PEP_LENGTH, string)[0] + return int(min_length) + + +def extract_params(fname) -> ProteoBenchParameters: + params = ProteoBenchParameters() + excel = pd.ExcelFile(fname) + + # ! First sheet contains search settings and infos + sheet_name = "Search settings and infos" + cols = use_columns[sheet_name] + # parse and validate + sheet = excel.parse(sheet_name, dtype="object", index_col=0).T + idx = sheet["quant_channel_name"].to_list() + stats = sheet.describe() + assert all(stats.loc["unique", cols] == 1), "Not all columns are unique" + sheet = sheet[cols].drop_duplicates().reset_index(drop=True) + # Extract + params.search_engine = sheet.loc[0, "software_name"] + params.software_version = sheet.loc[0, "software_version"] + params.enzyme_name = sheet.loc[0, "enzymes"] + params.missed_cleavages = sheet.loc[0, "max_missed_cleavages"] + params.fixed_modifications = sheet.loc[0, "fixed_ptms"] + params.variable_modifications = sheet.loc[0, "variable_ptms"] + level, unit = sheet.loc[0, "peptide_mass_error_tolerance"].split() + params.precursor_tol = level + params.precursor_tol_unit = unit + level, unit = sheet.loc[0, "fragment_mass_error_tolerance"].split() + params.fragment_tol = level + params.fragment_tol_unit = unit + + # ! Second sheet contains information about the import and filters + sheet_name = "Import and filters" + cols = use_columns[sheet_name] + # parse and validate + sheet = excel.parse(sheet_name, dtype="object", index_col=0).T.loc[idx, cols] + stats = sheet.describe() + assert all(stats.loc["unique", cols] == 1), "Not all columns are unique" + sheet = sheet[cols].drop_duplicates().reset_index(drop=True) + # Extract + params.fdr_psm = sheet.loc[0, "psm_filter_expected_fdr"] # ! 1 stands for 1% FDR + params.min_pep_length = find_min_pep_length(sheet.loc[0, "psm_filter_2"]) + + # ! Third sheet only contains match between runs (MBR) information indirectly + sheet = excel.parse(sheet_name, dtype="object", index_col=0) + MBR = sheet.index.str.contains("cross assignment").any() + params.MBR = MBR + return params + + +if __name__ == "__main__": + file = pathlib.Path( + "../../../test/params/Proline_example_w_Mascot_wo_proteinSets.xlsx" + ) + params = extract_params(file) + data_dict = params.__dict__ + series = pd.Series(data_dict) + series.to_csv(file.with_suffix(".csv")) diff --git a/pyproject.toml b/pyproject.toml index 013d07aa..78edaab5 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -12,6 +12,7 @@ classifiers = [ ] dependencies = [ "pandas", + "openpyxl", "numpy", "plotly", "streamlit_extras", diff --git a/test/params/Proline_example_w_Mascot_wo_proteinSets.csv b/test/params/Proline_example_w_Mascot_wo_proteinSets.csv new file mode 100644 index 00000000..a76c6142 --- /dev/null +++ b/test/params/Proline_example_w_Mascot_wo_proteinSets.csv @@ -0,0 +1,19 @@ +,0 +search_engine,Mascot +software_version,2.8.0.1 +fdr_psm,1 +fdr_peptide, +fdr_protein, +MBR,False +precursor_tol,10.0 +precursor_tol_unit,ppm +fragment_tol,0.02 +fragment_tol_unit,Da +enzyme_name,Trypsin/P +missed_cleavages,2 +min_pep_length,7 +max_pep_length, +fixed_modifications,Carbamidomethyl (C) +variable_modifications,Acetyl (Protein N-term); Oxidation (M) +max_num_modifications, +precursor_charge, diff --git a/test/params/Proline_example_w_Mascot_wo_proteinSets.xlsx b/test/params/Proline_example_w_Mascot_wo_proteinSets.xlsx new file mode 100644 index 0000000000000000000000000000000000000000..b9439c7e59895c52e3b720c57a86076a962ef394 GIT binary patch literal 13687 zcmeHORa6|?wr$+q-QC>@9^BoX;O-V6xCVC*?(PI9xVw9B32u+%$~h4g>&Xg8=|207zghVOtv~6B{QzWp_Ih zM;&@MYb(NBP+-bz0Px%W|2_U6&%j9hxc9iBjipMJ()<2xkzIfJ8t)vYwH4^-PQxUs2)&HIxt@#Qk*= zA_m&>;{s-;*qXSt>x$>ov>~ebK^9j2b0?`6>A*&oo%Soz`}`{eVm`U`SW-uQ5XK6) zjpk?rWWF8kVZb39xMWx@h}ZnufZ**1njLA{x23!1#QsL&}^~7YtV}5`W{wN}m0_j>H?g~(vDMcrk+e>fXD>S0 z^g%^pR^&V8@%IB{+1I|Bb*vY|8ScFtP(7uC=imIp35wR-{QEr=vqH zrkKHG8nyz*%isjl4Q7K#e2ov=tZp|%9;*t5QUIhtO96gx6p_>-N^{`30iCO=@?Bmd z9>BEE#UaN=DFzlYk&FjZ2IEO^czGPi6GbAE2s<>)dm zj`6wXNdh{p7&kBdgJX(A&lF}>w|vE7*Qb&pg+9kBaD#W8eW!L5`zbVM_5c) zCCd2Q_VFtgcJDNvM_UaT%u9tR!V(lISjgbwNGSJdnGiqblZK&kFQU+MRm8r=(C4t@ z(}U&{t+sW-!P}`R(2S=jK&}0*_c%L39u`A-eCb!|%vK2|}mZ>s)xV_1WFrUZU7lNk^I02=@a^kxkI2pWGjiGKwU zptscS?cV?2M_b&u#hU}|KMQCNnDn-fwK0k3ZjTVG)Vc+bD{M{lvM1h9*uIMsZV~uK zpMw!V`sDp?qVRdM@T?aD+#*9IhKvfb?n|lWGUXj-J3ns#;atO1Ms1P4fWNT4Z=-LI ztmBhN!8K`a7#Ud5;#q$Flzt_ZJ-&7N4E>K(BP{Ii&hB$?L%cGL%ull)4GXBE68!DT z*a>9=qQ_n>TU%_TyzF<)q7y6C2W>INj1jrQ4krj{k~K*OzLAIa5?1!?a0Goz(yzhk zF_vOnW-}N(_&CcGoGjE*W-+Jh@2-g!6igrF0}UH_9s9hUdV?zm#c*mwHQ>4woW5#! z0rQ_U=x8dni{{OhrjP*um~X1_Pj~A0!NkPLk>S^g>8D>!kCU>^V?-P{qq*i$@1W?< z|865Mu2Oust$K2GoJ^#C0kT*ip?u=H&a_jFoKyq}j`{eShUMATwvHpS)&;R^C2`N1 z2U_NoP?Hw>@^H_3FD4Pvtt!L=L&hhNeOsEwFr0CRmvO09*WsWN@fi z#i__+E7GgiX^?YGSfXppI$V1kg_T2#2N~EIw0Wp0;pSAsqQ4Ee$QJE=63$z)%f)hs z8-wgTSqgP35bf18M?>I?MbznKTvz^nUvcY-g{JeXY1XIJ3)e@!QDAJ9~7^)-G;%VwDo z5p1FyQ&YftXr{-{C>xpJh*2Bl*&>`%e0f!^MqSG>m?-$2AW_EPpq;qxB;Q(xUgg=| zlMg@My3fm6Tf1t>(uz-p?@EDhyVLAm^wj?FaAW75N2|7PTc>I(gR@dbRMMhP#>KN* z0Z!xh!vS;G6`%B|WhulS;+xq$B2+XZ)#S1&jm?_w!d@5Zu{iq5rvAP7>k!Zo%9n@W2g0MIl+zY~3^Ws;N5T;R2L z--`P*V~*|Qo?$A@BhjUdjb39z78p2$5=X%*HAW!WA_f)feLv_ZBA)XQD>Rzy zGJ`K>uMW9*s*gGV>E~|p^Rn36z2S(MNCZUVNJN3$@u(L4b$P7hg>Q(GMoxbow_ymL zpFQyQbP#$Or3g)CF3v(xu)et2*B3VE2qa7zMp=EYr3rtuK=Zz7r=ANPeUK-8W_pksLZF#-Qm zOtY%S$TqW5%jF!8E6()&y1UU~TQ%@DXn!Fbow`C|rF){S{Un0vw@dj^CgSU`*SY>#J6xfsH`>|2+ry-Ve<1ae+DcHSD-M@uL*!CyF3ktUX z>mz@H8in(Z@8N0vxa$8jPMi_8jtlu~RiwHa#$Wv|-ty*u`idGVacQ}08CuEl5gOG& z2^pGd73K*RHATuH>G6-%QkO7aa|d5sm29`0)IjdJ*g(H-nAH? z0IxU)Nf^Ut`Gl{?8K>&b4AX>nuV!uCKS^CLBxSEwJhnmDbpfS(OSJ0&vDLV-1m0TI z9_w=eg6ca0aK*AbKqCv(1U}dm16Y%Z#JMHv`IaTv+DyK{fZL_v@}VR)@S(o;>YC;#rX4gujbRjVQ{FaVP=%lTVI3>ii-$x?e6)6v`3*P zw=>+Lh)d)FI)q1@eHR?-{9$h*&=Ta3k`iQ)VG4{Hc!;iR!>hnM$;-rBtpObCcx2(> zfS+fRvpNG-gKxXb7!oTyEcwS%#BF#exS|#IC-_f+L%BI%b9W}#5}^Fi>m(8DB-qH{ zG}FliCp4>|n00Al(miog*0`b#_OYMT*kX?Pw?lVAw?g+qH$z8}V@y~mq@oRAaJ+m$ zLNz&I`_b9&UjFo$3hlAb(*gyA3%HK75BHx#Nl`?}Q>9^37Tkc>=h`5AA9mKW$)*!} zwlB9O!lg-5D48dX4F`MsTSqVsnq`FAqwz6KMCJ7spHqUu1VMoghtV#pb-RQ1 z?54nIe4|ZF>%uG59$eL+{G?#3)0a=W!(lI=uKsvJs1IS|e)aIGa2I}ct`Xpe)b$Y7 zNc7_iqHeu@ewHCZqAIqN82E!T&ZZ|sjjUJ!ifH00G)es~NsiMzW0iTGzo&8l5RO<# zfGl{}dSlBiMXJL5MugLHP8KCAAJJUP^kz(UI+hB*4J~)`Ddm|re_YrJUOSjK2L*@u zOLyGlP|g6kbSQ0qGQ+f9NMOvNbj!#qwKP8pe5Nz)22aVIy7>Lm2=$r)2f?S+OoNLi zZLWisEr!L56#1+H@-*Z8KvwSZ_RHCtIpxh{G{*_$CoNqc26XMRL>R-TJMzRQ{qh^4 zjs9&PW3eD8DFrS1#1KUf)wGaAmRIK)W>avqE}w`v4Vx~mtFa1hkFI^V*T)AoZ|>V$ zB7;UO9=Ls_PiOr`4}%}BcF%7-TV#`t2E18UQEW%-o7`^tdh!-i5+3y(&iJDq_YIyo z;g6(*bV&^N2VV#@T613GR_kuQ;p2~99a|P3={p%eVAU=szHiN%=U?h!W_1S5BVmz~ z!Aml0U7zP)?!jE{J{}w~QkqXPvt_O2i)iLxZsWhS7wlq$4Xzg!! z=ZIi*inkLLF_PixU(|eL^=S3>MVq(b>0ro8e4~GI`~1ia;|}?;yRnE($&YmI(Q^S| zhwcIjak0voNGC(B)JVu-{0;}!s~>D#F_JcZ{paeppx@4$lMGiS$eIMUhZx#)<#AO; zWT^+hu%}pB_ymdh_Faa4P>(8fIi~aA-D>-%ChC%LQzu7Zv@;Y%S+=V+M?u$Yi=M$W zY!7{tq+SKXPQuqoBW(VGwir9)<`=ZZa>y>&?cT7ZrOL%H5wn(Sk3u@N9Oj_0+a@6F z5k4yw!|HP)RM`k2#v6B_}1Tmz;Qf1d<7q%{>h5lhksSj{}nRH^_knkb4oUoJ0Z%u4QWU zKFFZYOu7PrZ5 zGJO_;i=bRrE2$>Ssow6X5ef-BP&~yWTDmmIWa7c!@|?P*L`L@ca8LHUkI~40KB-DB zvJ0*uY?J-U+$^o{r9SC+`Mt|xiV!kD)ANfSNRjry_0h9tXTww>&<9rkgo1_z?xO^o z7y~%@huNG~wKJ7r^tA~+o!Vx+lIXzsY-Qi7AOhHy4N;g%K`MUjp5P4ALbId!Y*an1 zo_VPgUHQfzmhvJJ4oyMyf%O)y%}Z#;-3L)9Yp?-=?Xgtj3Ndh{$BIhQ>ckwonh~eW z{D63%q=^Yw!Jx@v0$jiJN)^74Be#=7_QzFaik(lRby@;Omx z^?Ajn(HVKTfgk+i1<<>UUPhtbSOKI1m!i|*L10}i!ozOos*~yFWl>s5sWWo;mWQz24!iFKk-6n*emM%xv ztSftpENA(=j7bdM#=+oZd@c)-^1O~q(y@cRKIHI3qMuy@Brv?9 z7kNbZgmL9-m{uC08oHlNiJQQ7EK=hsLLN-U@T%J)QrZbHocyc8O_q2Y7V1YsS$Mdw ziW8PvF_oMm`Y}Bl`5o0K3|NNunfchi+UGNRyRHrRl733kG#spi2B7-L&IC5|E?HO!Jc z&`-T|>}Y&!YX|i8*7#03K(V|mun2D&Z$=RnEd6;6f*_$`F;@LU&|b7fGDuVKq z^#*O5jDTP)T2^SGXtn?vBfd*zOQv0kC!yQ6Cx1u5yc9hFt0`=<`ZYaF`dZnIg*@9R zKiHWDEo-9uQTEC$I&xapthuZuwHO90_TG(3J5o4Fvfx0I?`*An=E&zj*;OM6oN{p* z(?Hr3N5L6WK@VOCRoCp3?qZ7iq?7uj{9#g#rkMRMG@~^dn%nNphG_e4u;7wlkST8b zz7bksT}oR-1@u^om`5B-aSD*UAp;9xw3E_ei@m#rvw2CoXg(cDgi~!K;}oMm#wIP# zXyT(re@ja646|IbVKa^c%V8t}lBFmyMUx*nxza>0kIj9(1!N;N39IEj$^mB2yc>8k z$&408u2Pc=mPDq_ovT427Di35NIbW8oa_)d)8}N*_+C0aI^P61)=Au4YZ|qWi2Rz{ zsRA?vY4(UtJcDk`T_ol~tC$LzvIKscN)>i5C4K97Wu%L^-R%UC(d{7I`y5F5>go(M z5K*BY!vh=+N16Q6s?a{~-n(w;4}F+#3!CDKk0Rj?mv_lcHqPv33#}NLl%pZQlv_;g z$~mIvO)desX_s5&AebF;1UKI^E9j#>oNLI;l@-LQ#QQ5W;dfqDAV?OnRjK&0?+ zX}uAQ`pt)}4(&xR`!giMjK}(+Yz=D{i$K$QK1$Gb*}RG4IqpE+a;-BskJYM4T;B6` z5;*v!SsVEy4(NJ^MlDDSG83OXjUtEyNuHucoagLkLK-9eT`F(m4#2HNYs2e*Z0i*+ z!FY1Lbup-*0f4t8@#j9Yqm#RpiQ~_VzD-rfHtkjb^ZI_2N3M78`9S*sMsJ0jWagtnPx|vkGy2SipP~j$-Jl%(0jis zSFW^7#iQ1*k~ZPEtu3H^l2{=c5%GZzA$;&r;%Y`lW3u|j$+Cw$L;Zs20fDQJ37)@j{?FZp- zP`NT0WWiRpq))Y9Oy|^*yY3oTAm%3KTJzO*t~xS zSV(v9vD|%W=k2zUB;_DR*~6BF)y8euYb82A493&tsJ;`fgZBjRfDQQ~gr_=0zfRc@ z-0{g-bmDw2+0vpxEk)Vz09O4t$@mqc%ES2l6}o10OM;exQ%gdSr^Fog;ke*KIjiR< z#Iic;$>q5Dc#1FA3HlxnI*e!U0c{;xCgAF4+9{JfHMsiipG``MCq@O*-%qWC!!DMx zcj;MS>`fqg?#l? zFzKp~IL*V)_10SCPu1Du-z{$6Dd@Cq?Y3_&e-j6pikF1!U7@yC$ z76-2~?4-%ZjCsS-X)go%IMek+HCQ+B+p8ZvE6!YBkrKUFS)}F1b}Cd`q2bEqNt0iQ zAYIjZw)3Rzl3shDGsQ$*E@V=O{0Unq1?7RbaXae)V)L zyfLgXTxts2vs}bgc_OT694}avx^6OQ~fU3oJf{ehd z9C58Dl8&Y3kE7mTkVcs&*fcM{Bg7z*kN`}Q zcT+k-NiE5q`bgDfE8^O@25jrv&yi&*>es8}4U-jEp3-Fv)eMsL*2=E$iA;4E#nHYOvS^MPKiG0Ral!>1@)k|XD$ zYmg%t7pUYJ7$c=^oF%U5@U@oJpQ> z9ly3sVq^{bpayM%x)^H$1aD@w``p=f^<&cXHF%b`(@t_m2S;kI&%SqKFs|kGk;)ti z*+$qPn_G&`Nj=T<$sP^OBr=kHq~ONKkY-6y>Gc$ku@H#i7)|qm$~!3{z{YUdMdHI{ z#77GnP>K)YK>0y(X&G$R`Fu8Z2;mKjRt+@()aGrb=sIARZbaikNlS_&q3z&Dao^+6 zN0FK)=5I^yci`VkOBS&Zu)7QIwOmE4b~)q{pIGYwY zRiC%%Ha2s8PWVwMnVy6b^Y&bL-j!CeNuHC)|KQ2p9C+5Aq0esvbCNx^1!cecVkI+? z1MABbK4kX~dcQGfHYS+5wPfBDb2wvWyqp*PgJukC)@D1tGnNj!euA}Z{Dcnuuhh}Y zAA}M8k77Cnz7Df#cHYWRXP{7{&UoWIm!nUdVDQ48!$Bq08NJkk)D-x>1oUNW6h9&% ztX`ztb^yk10C_R}j`eO>1^x8Wspwu&<~!1@`}i&%zS^>WaK}ClK}n}$%7RPIfqf7nx;_l7{um4OnrU@j}>B>n%>yhO#2E1g4Q=F16L$Mc}e>ZSkLxPoq@=hI1eqB(C zR$YP;DsUm(LTy)Kv_Uv7wjN@~GH%;Tz1o>cW$)B@!Rpv*%v@9KopubJx%a=Q6zYah>=ZejQY~FUlf8 zurRPtEG~c$_>i4YD2AgLcTQw6>UPhE)xN*|v*QI3UZ0r_Q=$yNAR{q2vHq8_I#)~7 zcYf|al8~7wd4YPU9@+0^%s;GH5?#750Ted#Iik1V0HxaUb+KMNvFv#wbY1ks5|yZq1)dzXhG zOuz^xvlmd`+dXPfxz)MnB=U@U4orIPd-%4TV)FD5bu{5x2Fr!lukLCB=PJJ_9PHG_ECZy<^1m5ApxTz%wzG4{9N=%Mfcv&@l z&*A_b!R#UdrIM~RCuHMD)uD-`;2>`XzML!ZajtOb^0*9m?1k}uVoYc!j%**w9nUj{ za_vc{V&cx~o;4SZonx#q2xADg6ROYk?5<+T7}t%x=~icUM!4dHumgK>aC%^P3uJ!} zbvoN>6N+z%rs*3~4C##+V{B_A?_g`^$Y5maVDdAid?Qr;mk;wcbrG?8asiB}ed{t$ z@cC!*(95iFRd*EOm8g|6J~331vZq+80L-qutQ9Nlp*HC>}jd@u6e!S7-E*F=UoWWHwo#yWLXN5 zEu)L- z;L{pT^C%_BAa&mAfxZ!zM5~z$|3Geyu~Smaapn4ALB;f`xou%9gC75_@Wb8$xc$cn z{ys%rdT1)b7UlaUn%$n8;lB4&F;-<(;z*M2!0{Y=&php8#R1az@>Qa}itKNDLR3z0 zb?6w{QQE~yHgp#(XUZj(XB+g*-Y}nMR1~ynIFWmyOTulKy&%w{K>c zswT5q=-tdhTUpoJhoO{Ywyq`xCoFVlq#J=6$1`OMCM;fNaLhnfYO0H^1tt#3a?ZML zgtfG?CJ#W$;WsEv{9X{EPb94R=|b8XEj0iF~ptqXh+8TNZYdIov6tiGDgLxK|9V1p5aoL9@5 z4Wc~FH%O&`E1ZB_)Q|F{8Na*bMcY&?dp?yp>S1jTRa;p1^6Cz@Njs(Oy3!nr^O#ed;IFLre+m9P=f2s@pE&Tp3;xao{zG)&XOZ()Uhwb2e`QYo zA^OIpg!(1?zwju3$N8N<_XiR>?7s)`H#XhxD8G{={y=ekYj*wzPTnud#P2A-H%R|L z=|caHD8JgJzoY!#2>Anr0OuFV?`@Ia0e;`g`2!%6_!qz*8#}*?{$8Q}Aqq_X57FQ2 z)!z~R8u Date: Fri, 10 Nov 2023 08:19:17 +0100 Subject: [PATCH 08/15] :art: correct spelling mistake fragmnent -> fragment - otherwise updates from parameter files will continue to be aware of the spelling mistake --- proteobench/modules/dda_quant/datapoint.py | 2 +- proteobench/modules/dda_quant/module.py | 2 +- proteobench/modules/dda_quant/plot.py | 1 - proteobench/modules/template/datapoint.py | 2 +- proteobench/modules/template/module.py | 2 +- 5 files changed, 4 insertions(+), 5 deletions(-) diff --git a/proteobench/modules/dda_quant/datapoint.py b/proteobench/modules/dda_quant/datapoint.py index d9e928dc..8459245b 100644 --- a/proteobench/modules/dda_quant/datapoint.py +++ b/proteobench/modules/dda_quant/datapoint.py @@ -17,7 +17,7 @@ class Datapoint: MBR: bool = False precursor_tol: int = 0 precursor_tol_unit: str = "Da" - fragmnent_tol: int = 0 + fragment_tol: int = 0 fragment_tol_unit: str = "Da" enzyme_name: str = None missed_cleavages: int = 0 diff --git a/proteobench/modules/dda_quant/module.py b/proteobench/modules/dda_quant/module.py index 64719a22..4a3ea1b5 100644 --- a/proteobench/modules/dda_quant/module.py +++ b/proteobench/modules/dda_quant/module.py @@ -140,7 +140,7 @@ def generate_datapoint( MBR=user_input["mbr"], precursor_tol=user_input["precursor_mass_tolerance"], precursor_tol_unit=user_input["precursor_mass_tolerance_unit"], - fragmnent_tol=user_input["fragment_mass_tolerance"], + fragment_tol=user_input["fragment_mass_tolerance"], fragment_tol_unit=user_input["fragment_mass_tolerance_unit"], enzyme_name=user_input["search_enzyme_name"], missed_cleavages=user_input["allowed_missed_cleavage"], diff --git a/proteobench/modules/dda_quant/plot.py b/proteobench/modules/dda_quant/plot.py index 1e367865..990f2cd3 100644 --- a/proteobench/modules/dda_quant/plot.py +++ b/proteobench/modules/dda_quant/plot.py @@ -98,7 +98,6 @@ def plot_metric(self, benchmark_metrics_df: pd.DataFrame) -> go.Figure: for idx, row in benchmark_metrics_df.iterrows() ] - # spellerror {meta_data.fragmnent_tol[idx]} mapping = {"old": 10, "new": 20} diff --git a/proteobench/modules/template/datapoint.py b/proteobench/modules/template/datapoint.py index 79cfb577..50f8b356 100644 --- a/proteobench/modules/template/datapoint.py +++ b/proteobench/modules/template/datapoint.py @@ -40,7 +40,7 @@ class Datapoint: MBR: bool = False precursor_tol: int = 0 precursor_tol_unit: str = "Da" - fragmnent_tol: int = 0 + fragment_tol: int = 0 fragment_tol_unit: str = "Da" enzyme_name: str = None missed_cleavages: int = 0 diff --git a/proteobench/modules/template/module.py b/proteobench/modules/template/module.py index 06370eaf..10378b0c 100644 --- a/proteobench/modules/template/module.py +++ b/proteobench/modules/template/module.py @@ -52,7 +52,7 @@ def generate_datapoint( MBR=user_input["mbr"], precursor_tol=user_input["precursor_mass_tolerance"], precursor_tol_unit=user_input["precursor_mass_tolerance_unit"], - fragmnent_tol=user_input["fragment_mass_tolerance"], + fragment_tol=user_input["fragment_mass_tolerance"], fragment_tol_unit=user_input["fragment_mass_tolerance_unit"], enzyme_name=user_input["search_enzyme_name"], missed_cleavages=user_input["allowed_missed_cleavage"], From a83dbcfa5b3017e70d03351e989c89f7850825fe Mon Sep 17 00:00:00 2001 From: Henry Date: Fri, 10 Nov 2023 11:29:29 +0100 Subject: [PATCH 09/15] :construction: Prepare renaming according to SDRF Reference https://github.com/bigbio/proteomics-sample-metadata/blob/master/sdrf-proteomics/assets/param2sdrf.yml --- proteobench/io/params/__init__.py | 41 ++++++++++++++++--------------- 1 file changed, 21 insertions(+), 20 deletions(-) diff --git a/proteobench/io/params/__init__.py b/proteobench/io/params/__init__.py index 941d7077..dd00510a 100644 --- a/proteobench/io/params/__init__.py +++ b/proteobench/io/params/__init__.py @@ -2,29 +2,30 @@ from typing import Optional +# Reference for parameter names +# https://github.com/bigbio/proteomics-sample-metadata/blob/master/sdrf-proteomics/assets/param2sdrf.yml +# new name as comment @dataclass class ProteoBenchParameters: search_engine: Optional[str] = None software_version: Optional[str] = None - fdr_psm: Optional[str] = None - fdr_peptide: Optional[str] = None - fdr_protein: Optional[str] = None - MBR: Optional[str] = None - precursor_tol: Optional[str] = None + fdr_psm: Optional[str] = None # ident_fdr_psm + fdr_peptide: Optional[str] = None # ident_fdr_peptide + fdr_protein: Optional[str] = None # ident_fdr_protein + MBR: Optional[str] = None # enable_match_between_runs + precursor_tol: Optional[ + str + ] = None # precursor_mass_tolerance, value and unit not separated precursor_tol_unit: Optional[str] = None - fragment_tol: Optional[str] = None + fragment_tol: Optional[ + str + ] = None # fragment_mass_tolerance, value and unit not separated fragment_tol_unit: Optional[str] = None - enzyme_name: Optional[str] = None - missed_cleavages: Optional[str] = None - min_pep_length: Optional[str] = None - max_pep_length: Optional[str] = None - fixed_modifications: Optional[str] = None - variable_modifications: Optional[str] = None - max_num_modifications: Optional[str] = None - precursor_charge: Optional[str] = None - - -# params = ProteoBenchParameters() -# params.search_engine = "MaxQuant" -# params -# %% + enzyme_name: Optional[str] = None # enzyme + missed_cleavages: Optional[str] = None # allowed_miscleavages + min_pep_length: Optional[str] = None # min_peptide_length + max_pep_length: Optional[str] = None # max_peptide_length + fixed_modifications: Optional[str] = None # fixex_mods + variable_modifications: Optional[str] = None # variable_mods + max_num_modifications: Optional[str] = None # max_mods + precursor_charge: Optional[str] = None # min_precursor_charge From 030eb20300718baf7c26d40ec53fa21b737b53ae Mon Sep 17 00:00:00 2001 From: Henry Date: Sat, 11 Nov 2023 17:09:00 +0100 Subject: [PATCH 10/15] :sparkles: SDRF extended naming - Update ProteBenchParameters to reflect SDRF extended naming - Update Proline parameter extraction Could update DataPoint where it matches 1:1 old and new, but external changes needed first: https://github.com/Proteobench/Results_Module2_quant_DDA - test uses external git repo for validation... --- proteobench/io/params/__init__.py | 91 ++++++++++++++----- proteobench/io/params/proline.py | 33 +++---- ...roline_example_w_Mascot_wo_proteinSets.csv | 35 +++---- 3 files changed, 102 insertions(+), 57 deletions(-) diff --git a/proteobench/io/params/__init__.py b/proteobench/io/params/__init__.py index dd00510a..6fca4569 100644 --- a/proteobench/io/params/__init__.py +++ b/proteobench/io/params/__init__.py @@ -4,28 +4,75 @@ # Reference for parameter names # https://github.com/bigbio/proteomics-sample-metadata/blob/master/sdrf-proteomics/assets/param2sdrf.yml -# new name as comment @dataclass class ProteoBenchParameters: - search_engine: Optional[str] = None + """ + Parameters for a proteomics search engine. + + Attributes + ---------- + software_name : Optional[str] + Name of the software tool / pipeline used for this benchmark run + (examples: "MaxQuant", "AlphaPept", "Proline", ...). + software_version : Optional[str] + Version of the software tool / pipeline used for this benchmark run + search_engine: Optional[str] + Search engine used for this benchmark run + (examples: "Andromeda", "Mascot", ...). + search_engine_version : Optional[str] + Version of the search engine used for this benchmark run. + ident_fdr_psm : Optional[str] + False discovery rate (FDR) threshold for peptide-spectrum match + (PSM) validation ("0.01" = 1%). + ident_fdr_peptide : Optional[str] + False discovery rate (FDR) threshold for peptide validation ("0.01" = 1%). + ident_fdr_protein : Optional[str] + False discovery rate (FDR) threshold for protein validation ("0.01" = 1%). + enable_match_between_runs : Optional[bool] + Match between run (also named cross assignment) is enabled. + precursor_mass_tolerance : Optional[str] + Precursor mass tolerance used for the search, + associated with the unit: "20 ppm" = +/- 20 ppm; if several, separate with "|". + fragment_mass_tolerance : Optional[str] + Precursor mass tolerance used for the search: + "20 ppm" = +/- 20 ppm; if several, separate with "|" + enzyme : Optional[str] + Enzyme used as parameter for the search. If several, use "|". + allowed_miscleavages : Optional[int] + Maximal number of missed cleavages allowed. + min_peptide_length : Optional[str] + Minimum peptide length (number of residues) allowed for the search. + max_peptide_length : Optional[str] + Maximum peptide length (number of residues) allowed for the search. + fixed_mods : Optional[str] + Fixed modifications searched for in the search. If several, separate with "|". + variable_mods : Optional[str] + Variable modifications searched for in the search. If several, separate with "|". + max_mods : Optional[int] + Maximal number of modifications per peptide + (including fixed and variable modifications). + min_precursor_charge : Optional[int] + Minimum precursor charge allowed. + max_precursor_charge : Optional[int] + Maximum precursor charge allowed. + """ + + software_name: Optional[str] = None software_version: Optional[str] = None - fdr_psm: Optional[str] = None # ident_fdr_psm - fdr_peptide: Optional[str] = None # ident_fdr_peptide - fdr_protein: Optional[str] = None # ident_fdr_protein - MBR: Optional[str] = None # enable_match_between_runs - precursor_tol: Optional[ - str - ] = None # precursor_mass_tolerance, value and unit not separated - precursor_tol_unit: Optional[str] = None - fragment_tol: Optional[ - str - ] = None # fragment_mass_tolerance, value and unit not separated - fragment_tol_unit: Optional[str] = None - enzyme_name: Optional[str] = None # enzyme - missed_cleavages: Optional[str] = None # allowed_miscleavages - min_pep_length: Optional[str] = None # min_peptide_length - max_pep_length: Optional[str] = None # max_peptide_length - fixed_modifications: Optional[str] = None # fixex_mods - variable_modifications: Optional[str] = None # variable_mods - max_num_modifications: Optional[str] = None # max_mods - precursor_charge: Optional[str] = None # min_precursor_charge + search_engine: Optional[str] = None + search_engine_version: Optional[str] = None + ident_fdr_psm: Optional[str] = None # fdr_psm + ident_fdr_peptide: Optional[str] = None + ident_fdr_protein: Optional[str] = None # fdr_protein + enable_match_between_runs: Optional[bool] = None # MBR + precursor_mass_tolerance: Optional[str] = None # precursor_tol, precursor_tol_unit + fragment_mass_tolerance: Optional[str] = None # fragment_tol, fragment_tol_unit + enzyme: Optional[str] = None # enzyme_name + allowed_miscleavages: Optional[int] = None # missed_cleavages + min_peptide_length: Optional[str] = None # min_pep_length + max_peptide_length: Optional[str] = None # max_pep_length + fixed_mods: Optional[str] = None # fixed_modifications + variable_mods: Optional[str] = None # variable_modifications + max_mods: Optional[int] = None # max_num_modifications + min_precursor_charge: Optional[int] = None # precursor_charge + max_precursor_charge: Optional[int] = None diff --git a/proteobench/io/params/proline.py b/proteobench/io/params/proline.py index 877e4897..2883f9cd 100644 --- a/proteobench/io/params/proline.py +++ b/proteobench/io/params/proline.py @@ -55,18 +55,15 @@ def extract_params(fname) -> ProteoBenchParameters: assert all(stats.loc["unique", cols] == 1), "Not all columns are unique" sheet = sheet[cols].drop_duplicates().reset_index(drop=True) # Extract - params.search_engine = sheet.loc[0, "software_name"] + params.software_name = "Proline" params.software_version = sheet.loc[0, "software_version"] - params.enzyme_name = sheet.loc[0, "enzymes"] - params.missed_cleavages = sheet.loc[0, "max_missed_cleavages"] - params.fixed_modifications = sheet.loc[0, "fixed_ptms"] - params.variable_modifications = sheet.loc[0, "variable_ptms"] - level, unit = sheet.loc[0, "peptide_mass_error_tolerance"].split() - params.precursor_tol = level - params.precursor_tol_unit = unit - level, unit = sheet.loc[0, "fragment_mass_error_tolerance"].split() - params.fragment_tol = level - params.fragment_tol_unit = unit + params.search_engine = sheet.loc[0, "software_name"] + params.enzyme = sheet.loc[0, "enzymes"] + params.allowed_miscleavages = sheet.loc[0, "max_missed_cleavages"] + params.fixed_mods = sheet.loc[0, "fixed_ptms"] + params.variable_mods = sheet.loc[0, "variable_ptms"] + params.precursor_mass_tolerance = sheet.loc[0, "peptide_mass_error_tolerance"] + params.fragment_mass_tolerance = sheet.loc[0, "fragment_mass_error_tolerance"] # ! Second sheet contains information about the import and filters sheet_name = "Import and filters" @@ -77,20 +74,20 @@ def extract_params(fname) -> ProteoBenchParameters: assert all(stats.loc["unique", cols] == 1), "Not all columns are unique" sheet = sheet[cols].drop_duplicates().reset_index(drop=True) # Extract - params.fdr_psm = sheet.loc[0, "psm_filter_expected_fdr"] # ! 1 stands for 1% FDR - params.min_pep_length = find_min_pep_length(sheet.loc[0, "psm_filter_2"]) + params.ident_fdr_psm = sheet.loc[ + 0, "psm_filter_expected_fdr" + ] # ! 1 stands for 1% FDR + params.min_peptide_length = find_min_pep_length(sheet.loc[0, "psm_filter_2"]) # ! Third sheet only contains match between runs (MBR) information indirectly sheet = excel.parse(sheet_name, dtype="object", index_col=0) - MBR = sheet.index.str.contains("cross assignment").any() - params.MBR = MBR + enable_match_between_runs = sheet.index.str.contains("cross assignment").any() + params.enable_match_between_runs = enable_match_between_runs return params if __name__ == "__main__": - file = pathlib.Path( - "../../../test/params/Proline_example_w_Mascot_wo_proteinSets.xlsx" - ) + file = pathlib.Path("test/params/Proline_example_w_Mascot_wo_proteinSets.xlsx") params = extract_params(file) data_dict = params.__dict__ series = pd.Series(data_dict) diff --git a/test/params/Proline_example_w_Mascot_wo_proteinSets.csv b/test/params/Proline_example_w_Mascot_wo_proteinSets.csv index a76c6142..04462162 100644 --- a/test/params/Proline_example_w_Mascot_wo_proteinSets.csv +++ b/test/params/Proline_example_w_Mascot_wo_proteinSets.csv @@ -1,19 +1,20 @@ ,0 -search_engine,Mascot +software_name,Proline software_version,2.8.0.1 -fdr_psm,1 -fdr_peptide, -fdr_protein, -MBR,False -precursor_tol,10.0 -precursor_tol_unit,ppm -fragment_tol,0.02 -fragment_tol_unit,Da -enzyme_name,Trypsin/P -missed_cleavages,2 -min_pep_length,7 -max_pep_length, -fixed_modifications,Carbamidomethyl (C) -variable_modifications,Acetyl (Protein N-term); Oxidation (M) -max_num_modifications, -precursor_charge, +search_engine,Mascot +search_engine_version, +ident_fdr_psm,1 +ident_fdr_peptide, +ident_fdr_protein, +enable_match_between_runs,False +precursor_mass_tolerance,10.0 ppm +fragment_mass_tolerance,0.02 Da +enzyme,Trypsin/P +allowed_miscleavages,2 +min_peptide_length,7 +max_peptide_length, +fixed_mods,Carbamidomethyl (C) +variable_mods,Acetyl (Protein N-term); Oxidation (M) +max_mods, +min_precursor_charge, +max_precursor_charge, From 682825d27f5fb85692e6f2884b40d79334234920 Mon Sep 17 00:00:00 2001 From: Henry Date: Mon, 13 Nov 2023 21:19:44 +0100 Subject: [PATCH 11/15] :construction: start extracting MQ params - most selected parameters are easy to get - differences between version 1.6 and higher to previous (1.5) - fragment_mass_tolerance -> which fragenation method was used? -> missing information in extracted data for v1.5 --- proteobench/io/params/maxquant.py | 98 +++++++++++++++++++++++-------- 1 file changed, 73 insertions(+), 25 deletions(-) diff --git a/proteobench/io/params/maxquant.py b/proteobench/io/params/maxquant.py index 43f8f4a5..e9bcad09 100644 --- a/proteobench/io/params/maxquant.py +++ b/proteobench/io/params/maxquant.py @@ -7,8 +7,11 @@ import xml.etree.ElementTree as ET from pathlib import Path +import numpy as np import pandas as pd +from proteobench.io.params import ProteoBenchParameters + logger = logging.getLogger() @@ -127,45 +130,90 @@ def build_Series_from_records(records, index_length=4): return pd.Series((v for (k, v) in records), index=idx) +# %% +def extract_params(fname) -> ProteoBenchParameters: + params = ProteoBenchParameters() + + record = read_file(fname) + record = build_Series_from_records(record, 4).sort_index() + params.search_engine = "Andromeda" + params.software_version = record.loc["maxQuantVersion"].squeeze() + params.ident_fdr_psm = None + params.ident_fdr_peptide = record.loc["peptideFdr"].squeeze() + params.ident_fdr_protein = record.loc["proteinFdr"].squeeze() + params.enable_match_between_runs = record.loc["matchBetweenRuns"].squeeze() + precursor_mass_tolerance = record.loc[ + pd.IndexSlice["parameterGroups", "parameterGroup", "mainSearchTol", :] + ].squeeze() + params.precursor_mass_tolerance = f"{precursor_mass_tolerance} ppm" + fragment_mass_tolerance = None # ! differences between version >1.6 and <=1.5 + params.fragment_mass_tolerance = fragment_mass_tolerance + params.enzyme = record.loc[ + ("parameterGroups", "parameterGroup", "enzymes", "string") + ].squeeze() + params.allowed_miscleavages = record.loc[ + pd.IndexSlice["parameterGroups", "parameterGroup", "maxMissedCleavages", :] + ].squeeze() + params.min_peptide_length = record.loc["minPepLen"].squeeze() + params.max_peptide_length = None + # fixed mods + if params.software_version > "1.6.0.0": + fixed_mods = record.loc[ + pd.IndexSlice["parameterGroups", "parameterGroup", "fixedModifications", :] + ].squeeze() + if isinstance(fixed_mods, str): + params.fixed_mods = fixed_mods + else: + params.fixed_mods = ",".join(fixed_mods) + else: + fixed_mods = record.loc[ + pd.IndexSlice["fixedModifications", :] + ].squeeze() + if isinstance(fixed_mods, str): + params.fixed_mods = fixed_mods + else: + params.fixed_mods = ",".join(fixed_mods) + + variable_mods = record.loc[ + pd.IndexSlice["parameterGroups", "parameterGroup", "variableModifications", :] + ].squeeze() + if isinstance(variable_mods, str): + params.variable_mods = variable_mods + else: + params.variable_mods = ",".join(variable_mods) + params.max_mods = record.loc[ + ("parameterGroups", "parameterGroup", "maxNmods") + ].squeeze() + params.min_precursor_charge = None + params.max_precursor_charge = record.loc[ + pd.IndexSlice["parameterGroups", "parameterGroup", "maxCharge", :] + ].squeeze() + return params + + # create a first version of json files to match if __name__ == "__main__": + from pprint import pprint + for test_file in [ "../../../test/params/mqpar_MQ1.6.3.3_MBR.xml", "../../../test/params/mqpar_MQ2.1.3.0_noMBR.xml", "../../../test/params/mqpar1.5.3.30_MBR.xml", ]: print(f"{test_file = }") - record_example = read_file(test_file) + record = read_file(test_file) ( Path(test_file) .with_suffix(".json") .write_text( json.dumps( - record_example, + record, indent=4, ) ) ) - flattend = build_Series_from_records(record_example, 4) - flattend = flattend.to_frame("run_identifier") - flattend.to_csv(Path(test_file).with_suffix(".csv")) - - # %% - int( - flattend.loc["parameterGroups"] - .loc["parameterGroup"] - .loc["firstSearchTol"] - .squeeze() - ) - - # %% - # ! Parse msmsParamsArray - ms2_params = ( - flattend.loc["msmsParamsArray"].loc["msmsParams"].reset_index(-1, drop=True) - ) - ms2_params.loc["Name", "mode"] = ms2_params.loc["Name"].squeeze() - ms2_params["mode"] = ms2_params["mode"].fillna(method="ffill") - ms2_params = ms2_params.set_index("mode", append=True) - ms2_params.loc[("MatchTolerance", "FTMS")] - # ? reset_index level -1 - # ? update and fillna -> then set as index again + record = build_Series_from_records(record, 4) + record = record.to_frame("run_identifier") + # flattend.to_csv(Path(test_file).with_suffix(".csv")) + params = extract_params(test_file) + pprint(params.__dict__) From df208d25dd02b3a836a52fa9c19ff93e73018363 Mon Sep 17 00:00:00 2001 From: Henry Date: Mon, 13 Nov 2023 21:41:04 +0100 Subject: [PATCH 12/15] :bug: update test ("fragmnent" -> "fragment") --- proteobench/io/params/maxquant.py | 7 ++----- searchindex.js | 2 +- test/test_module_dda_quant.py | 9 ++++----- 3 files changed, 7 insertions(+), 11 deletions(-) diff --git a/proteobench/io/params/maxquant.py b/proteobench/io/params/maxquant.py index e9bcad09..ec65f42e 100644 --- a/proteobench/io/params/maxquant.py +++ b/proteobench/io/params/maxquant.py @@ -130,7 +130,6 @@ def build_Series_from_records(records, index_length=4): return pd.Series((v for (k, v) in records), index=idx) -# %% def extract_params(fname) -> ProteoBenchParameters: params = ProteoBenchParameters() @@ -166,9 +165,7 @@ def extract_params(fname) -> ProteoBenchParameters: else: params.fixed_mods = ",".join(fixed_mods) else: - fixed_mods = record.loc[ - pd.IndexSlice["fixedModifications", :] - ].squeeze() + fixed_mods = record.loc[pd.IndexSlice["fixedModifications", :]].squeeze() if isinstance(fixed_mods, str): params.fixed_mods = fixed_mods else: @@ -214,6 +211,6 @@ def extract_params(fname) -> ProteoBenchParameters: ) record = build_Series_from_records(record, 4) record = record.to_frame("run_identifier") - # flattend.to_csv(Path(test_file).with_suffix(".csv")) + record.to_csv(Path(test_file).with_suffix(".csv")) params = extract_params(test_file) pprint(params.__dict__) diff --git a/searchindex.js b/searchindex.js index e8b938e7..44b9cd58 100644 --- a/searchindex.js +++ b/searchindex.js @@ -1 +1 @@ -Search.setIndex({"docnames": ["changelog", "index", "proteobench/modules", "proteobench/proteobench", "proteobench/proteobench.modules", "reference/_autosummary/proteobench.modules", "reference/_autosummary/proteobench.modules.dda_id", "reference/_autosummary/proteobench.modules.dda_id.module_dda_id", "reference/_autosummary/proteobench.modules.dda_quant", "reference/_autosummary/proteobench.modules.dda_quant.datapoint", "reference/_autosummary/proteobench.modules.dda_quant.module", "reference/_autosummary/proteobench.modules.dda_quant.parse", "reference/_autosummary/proteobench.modules.dda_quant.parse_settings", "reference/_autosummary/proteobench.modules.dia_id", "reference/_autosummary/proteobench.modules.dia_quant", "reference/_autosummary/proteobench.modules.dia_quant.module_dia_quant", "reference/_autosummary/proteobench.modules.metaproteomics", "reference/_autosummary/proteobench.modules.metaproteomics.module_metaproteomics", "reference/_autosummary/proteobench.modules.rescoring", "reference/_autosummary/proteobench.modules.rescoring.module_rescoring", "reference/_autosummary/proteobench.modules.template", "reference/_autosummary/proteobench.modules.template.datapoint", "reference/_autosummary/proteobench.modules.template.module", "reference/_autosummary/proteobench.modules.template.parse", "reference/_autosummary/proteobench.modules.template.parse_settings", "reference/_autosummary/proteobench.plot", "reference/_autosummary/webinterface", "reference/_autosummary/webinterface.pages", "reference/_autosummary/webinterface.pages.DDA_ID", "reference/_autosummary/webinterface.pages.DIA_ID", "reference/_autosummary/webinterface.pages.DIA_Quant", "reference/_autosummary/webinterface.pages.Metaproteomics", "reference/_autosummary/webinterface.pages.Rescoring", "reference/_autosummary/webinterface.streamlit_utils", "reference/index", "user_guide/index", "webinterface/modules", "webinterface/webinterface"], "filenames": ["changelog.rst", "index.rst", "proteobench/modules.rst", "proteobench/proteobench.rst", "proteobench/proteobench.modules.rst", "reference/_autosummary/proteobench.modules.rst", "reference/_autosummary/proteobench.modules.dda_id.rst", "reference/_autosummary/proteobench.modules.dda_id.module_dda_id.rst", "reference/_autosummary/proteobench.modules.dda_quant.rst", "reference/_autosummary/proteobench.modules.dda_quant.datapoint.rst", "reference/_autosummary/proteobench.modules.dda_quant.module.rst", "reference/_autosummary/proteobench.modules.dda_quant.parse.rst", "reference/_autosummary/proteobench.modules.dda_quant.parse_settings.rst", "reference/_autosummary/proteobench.modules.dia_id.rst", "reference/_autosummary/proteobench.modules.dia_quant.rst", "reference/_autosummary/proteobench.modules.dia_quant.module_dia_quant.rst", "reference/_autosummary/proteobench.modules.metaproteomics.rst", "reference/_autosummary/proteobench.modules.metaproteomics.module_metaproteomics.rst", "reference/_autosummary/proteobench.modules.rescoring.rst", "reference/_autosummary/proteobench.modules.rescoring.module_rescoring.rst", "reference/_autosummary/proteobench.modules.template.rst", "reference/_autosummary/proteobench.modules.template.datapoint.rst", "reference/_autosummary/proteobench.modules.template.module.rst", "reference/_autosummary/proteobench.modules.template.parse.rst", "reference/_autosummary/proteobench.modules.template.parse_settings.rst", "reference/_autosummary/proteobench.plot.rst", "reference/_autosummary/webinterface.rst", "reference/_autosummary/webinterface.pages.rst", "reference/_autosummary/webinterface.pages.DDA_ID.rst", "reference/_autosummary/webinterface.pages.DIA_ID.rst", "reference/_autosummary/webinterface.pages.DIA_Quant.rst", "reference/_autosummary/webinterface.pages.Metaproteomics.rst", "reference/_autosummary/webinterface.pages.Rescoring.rst", "reference/_autosummary/webinterface.streamlit_utils.rst", "reference/index.rst", "user_guide/index.rst", "webinterface/modules.rst", "webinterface/webinterface.rst"], "titles": ["Change log", "Welcome to ProteoBench\u2019s documentation!", "proteobench", "proteobench package", "proteobench.modules package", "proteobench.modules", "proteobench.modules.dda_id", "proteobench.modules.dda_id.module_dda_id", "proteobench.modules.dda_quant", "proteobench.modules.dda_quant.datapoint", "proteobench.modules.dda_quant.module", "proteobench.modules.dda_quant.parse", "proteobench.modules.dda_quant.parse_settings", "proteobench.modules.dia_id", "proteobench.modules.dia_quant", "proteobench.modules.dia_quant.module_dia_quant", "proteobench.modules.metaproteomics", "proteobench.modules.metaproteomics.module_metaproteomics", "proteobench.modules.rescoring", "proteobench.modules.rescoring.module_rescoring", "proteobench.modules.template", "proteobench.modules.template.datapoint", "proteobench.modules.template.module", "proteobench.modules.template.parse", "proteobench.modules.template.parse_settings", "proteobench.plot", "webinterface", "webinterface.pages", "webinterface.pages.DDA_ID", "webinterface.pages.DIA_ID", "webinterface.pages.DIA_Quant", "webinterface.pages.Metaproteomics", "webinterface.pages.Rescoring", "webinterface.streamlit_utils", "Reference", "User guide", "webinterface", "webinterface package"], "terms": {"todo": 0, "index": [1, 34], "modul": [1, 2, 26, 27, 34, 37], "search": [1, 4, 34], "page": [1, 34], "packag": [2, 35, 36], "subpackag": 2, "io": [], "content": [2, 37], "plot": [4, 35], "dda_id": 4, "submodul": [4, 36], "module_dda_id": 4, "dda_quant": 4, "datapoint": [4, 35], "pars": [4, 35], "parse_set": 4, "dia_id": 4, "dia_quant": 4, "module_dia_qu": 4, "metaproteom": 4, "module_metaproteom": 4, "rescor": 4, "module_rescor": 4, "templat": [4, 35], "moduleinterfac": [3, 4, 35], "add_current_data_point": [3, 4], "benchmark": [3, 4, 35], "generate_datapoint": [3, 4], "generate_intermedi": [3, 4], "load_input_fil": [3, 4], "parseinputsinterfac": [3, 4, 35], "convert_to_standard_format": [3, 4], "is_impl": 4, "mbr": 4, "calculate_missing_quan_prec": 4, "calculate_plot_data": 4, "dump_json_object": 4, "enzyme_nam": 4, "fdr_peptid": 4, "fdr_protein": 4, "fdr_psm": 4, "fragment_tol_unit": 4, "fragmnent_tol": 4, "generate_id": 4, "id": 4, "is_temporari": 4, "max_pep_length": 4, "min_pep_length": 4, "missed_cleavag": 4, "nr_prec": 4, "precursor_tol": 4, "precursor_tol_unit": 4, "search_engin": 4, "software_vers": 4, "weighted_sum": 4, "strip_sequence_wombat": 4, "parseinput": [4, 35], "parseset": 4, "calculate_benchmarking_metric_1": 4, "calculate_benchmarking_metric_2": 4, "class": [4, 8, 9, 10, 11, 12, 20, 21, 22, 23, 24, 33, 35], "base": 4, "abc": 4, "abstract": 4, "method": 4, "us": [4, 35], "add": [4, 35], "current": 4, "data": [4, 35], "point": [4, 35], "collect": [4, 35], "run": [4, 35], "comput": 4, "provid": [4, 35], "intermedi": [4, 35], "structur": 4, "calcul": [4, 35], "valu": 4, "from": [4, 35], "upload": [4, 35], "file": [4, 35], "load": 4, "datafram": [4, 35], "depend": 4, "its": [4, 35], "format": [4, 12, 24, 35], "convert": 4, "engin": 4, "output": [4, 35], "gener": [4, 35], "support": 4, "main": [7, 15, 17, 19, 35], "interfac": [7, 15, 17, 19, 35], "bool": [], "return": [], "whether": [], "i": 35, "fulli": [], "implement": 35, "str": [], "none": [], "int": [], "0": [], "fals": [], "da": [], "true": [], "object": 35, "store": [], "df": [], "nr_missing_0": [], "file_nam": [], "librari": [], "within": [], "all_datapoint": [], "current_datapoint": [], "all": [12, 24, 35], "them": [], "empti": [], "Not": [], "clear": [], "why": [], "transpos": [], "here": 35, "input_fil": [], "input_format": [], "user_input": [], "dict": [], "workflow": [], "result": [], "metadata": 35, "filtered_df": [], "replicate_to_raw": [], "take": [], "get": [], "quantif": [], "tupl": [], "measur": [], "reliabl": [], "input_csv": [], "csv": [], "seq": [], "remov": [], "part": [], "peptid": 35, "sequenc": [], "contain": 35, "modif": [], "panda": [], "core": [], "frame": [], "list": [], "avail": [12, 24], "paramet": 35, "given": 35, "databas": [], "experiment": [], "analysi": [], "set": 35, "A": 35, "uniqu": [], "identifi": [], "type": 35, "boolean": [], "flag": [], "indic": [], "temporari": [], "The": 35, "name": [], "experi": [], "version": [], "number": [], "softwar": [], "discoveri": [], "rate": [], "spectrum": [], "match": [], "level": [], "protein": [], "between": [], "wa": [], "enabl": [], "precursor": [], "mass": [], "toler": [], "unit": [], "specifi": [], "either": [], "ppm": [], "fragment": [], "enzym": [], "digest": [], "allow": 35, "miss": [], "cleavag": [], "dure": [], "minimum": [], "length": [], "identif": [], "maximum": [], "weight": [], "sum": [], "score": [], "infer": [], "intermediate_data": [], "first": [], "metric": 35, "dictionari": [], "metric_1": [], "float": [], "second": [], "metric_2": [], "dump": [], "json": 35, "write": [], "represent": [], "append": [], "end": [], "alreadi": [], "exist": [], "attribut": [], "string": [], "compos": [], "timestamp": [], "separ": 35, "underscor": [], "print": [], "stdout": [], "input": [24, 35], "function": [7, 15, 17, 19, 33, 35], "streamlit": [33, 35], "util": 33, "welcom": 35, "proteobench": 35, "can": 35, "python": 35, "usual": 35, "pip": 35, "command": 35, "without": 35, "clone": 35, "repositori": 35, "setup": 35, "script": 35, "git": 35, "http": 35, "github": 35, "com": 35, "cd": 35, "py": 35, "start": 35, "gui": 35, "your": 35, "termin": 35, "follow": 35, "home": 35, "altern": 35, "you": 35, "jupyt": 35, "notebook": 35, "test_workflow": 35, "ipynb": 35, "more": 35, "control": 35, "over": 35, "pleas": 35, "we": 35, "hopefulli": 35, "comprehens": 35, "document": 35, "how": 35, "up": 35, "suggest": 35, "understand": 35, "term": 35, "thei": 35, "ar": 35, "crucial": 35, "compon": 35, "_module_": 35, "code": 35, "definit": 35, "creat": 35, "compar": 35, "_datapoint_": 35, "need": 35, "should": 35, "also": 35, "repres": 35, "_intermediate_": 35, "It": 35, "transform": 35, "annot": 35, "locat": 35, "directori": 35, "differ": 35, "step": 35, "modular": 35, "portabl": 35, "each": 35, "1": 35, "read": 35, "process": 35, "well": 35, "our": 35, "_datapoints_": 35, "2": 35, "standard": 35, "respect": 35, "e": 35, "g": 35, "maxquant": 35, "3": 35, "properti": 35, "acquisit": 35, "fdr": 35, "plotdatapoint": 35, "visual": 35, "5": 35, "defin": 35, "toml": 35, "like": 35, "io_parse_set": 35, "parse_settings_format1": 35, "thi": 35, "meant": 35, "help": 35, "exhaust": 35, "cover": 35, "most": 35, "import": 35, "copi": 35, "folder": 35, "same": 35, "call": 35, "__init__": 35, "just": 35, "my_modul": 35, "my_module_test": 35, "test": 35, "my_module_doc": 35, "my_module_config": 35, "recommend": 35, "keep": 35, "For": 35, "specif": 35, "layout": 35, "streamlit_util": [], "streamlitlogg": [], "hide_streamlit_menu": [], "save_datafram": [], "placehold": [], "logger_nam": [], "accumul": [], "persist": [], "pickup": [], "logger": [], "front": [], "save": [], "cach": []}, "objects": {"": [[3, 0, 0, "-", "proteobench"], [37, 0, 0, "-", "webinterface"]], "proteobench.modules": [[6, 0, 0, "-", "dda_id"], [13, 0, 0, "-", "dia_id"], [14, 0, 0, "-", "dia_quant"], [16, 0, 0, "-", "metaproteomics"], [18, 0, 0, "-", "rescoring"]], "proteobench.modules.dda_id": [[7, 0, 0, "-", "module_dda_id"]], "proteobench.modules.dia_quant": [[15, 0, 0, "-", "module_dia_quant"]], "proteobench.modules.metaproteomics": [[17, 0, 0, "-", "module_metaproteomics"]], "proteobench.modules.rescoring": [[19, 0, 0, "-", "module_rescoring"]], "proteobench": [[25, 0, 0, "-", "plot"]], "webinterface": [[27, 0, 0, "-", "pages"]], "webinterface.pages": [[28, 0, 0, "-", "DDA_ID"], [29, 0, 0, "-", "DIA_ID"], [30, 0, 0, "-", "DIA_Quant"], [31, 0, 0, "-", "Metaproteomics"], [32, 0, 0, "-", "Rescoring"]]}, "objtypes": {"0": "py:module"}, "objnames": {"0": ["py", "module", "Python module"]}, "titleterms": {"chang": 0, "log": 0, "welcom": 1, "proteobench": [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25], "": 1, "document": 1, "indic": 1, "tabl": 1, "packag": [3, 4, 37], "subpackag": [3, 4], "modul": [3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 35], "content": [3, 4], "io": [], "dda_id": [6, 7, 28], "submodul": 37, "module_dda_id": 7, "dda_quant": [8, 9, 10, 11, 12], "datapoint": [9, 21], "pars": [11, 23], "parse_set": [12, 24], "plot": 25, "dia_id": [13, 29], "dia_quant": [14, 15, 30], "module_dia_qu": 15, "metaproteom": [16, 17, 31], "module_metaproteom": 17, "rescor": [18, 19, 32], "module_rescor": 19, "templat": [20, 21, 22, 23, 24], "webinterfac": [26, 27, 28, 29, 30, 31, 32, 33, 36, 37], "page": [27, 28, 29, 30, 31, 32], "streamlit_util": 33, "refer": 34, "user": 35, "guid": 35, "instal": 35, "local": 35, "usag": 35, "develop": 35, "ad": 35, "new": 35, "name": 35, "convent": 35, "programmat": 35, "structur": 35, "checklist": 35, "modifi": 35, "an": 35, "exist": 35, "home": []}, "envversion": {"sphinx.domains.c": 2, "sphinx.domains.changeset": 1, "sphinx.domains.citation": 1, "sphinx.domains.cpp": 8, "sphinx.domains.index": 1, "sphinx.domains.javascript": 2, "sphinx.domains.math": 2, "sphinx.domains.python": 3, "sphinx.domains.rst": 2, "sphinx.domains.std": 2, "sphinx": 57}, "alltitles": {"Change log": [[0, "change-log"]], "Welcome to ProteoBench\u2019s documentation!": [[1, "welcome-to-proteobench-s-documentation"]], "Indices and tables": [[1, "indices-and-tables"]], "proteobench": [[2, "proteobench"]], "Subpackages": [[4, "subpackages"], [3, "subpackages"]], "Module contents": [[4, "module-proteobench.modules"], [3, "module-proteobench"]], "proteobench.modules package": [[4, "proteobench-modules-package"]], "proteobench.modules": [[5, "module-proteobench.modules"]], "proteobench.modules.dda_id": [[6, "module-proteobench.modules.dda_id"]], "proteobench.modules.dda_id.module_dda_id": [[7, "module-proteobench.modules.dda_id.module_dda_id"]], "proteobench.modules.dda_quant": [[8, "module-proteobench.modules.dda_quant"]], "proteobench.modules.dda_quant.datapoint": [[9, "module-proteobench.modules.dda_quant.datapoint"]], "proteobench.modules.dda_quant.module": [[10, "module-proteobench.modules.dda_quant.module"]], "proteobench.modules.dda_quant.parse": [[11, "module-proteobench.modules.dda_quant.parse"]], "proteobench.modules.dda_quant.parse_settings": [[12, "module-proteobench.modules.dda_quant.parse_settings"]], "proteobench.modules.dia_id": [[13, "module-proteobench.modules.dia_id"]], "proteobench.modules.dia_quant": [[14, "module-proteobench.modules.dia_quant"]], "proteobench.modules.dia_quant.module_dia_quant": [[15, "module-proteobench.modules.dia_quant.module_dia_quant"]], "proteobench.modules.metaproteomics": [[16, "module-proteobench.modules.metaproteomics"]], "proteobench.modules.metaproteomics.module_metaproteomics": [[17, "module-proteobench.modules.metaproteomics.module_metaproteomics"]], "proteobench.modules.rescoring": [[18, "module-proteobench.modules.rescoring"]], "proteobench.modules.rescoring.module_rescoring": [[19, "module-proteobench.modules.rescoring.module_rescoring"]], "proteobench.modules.template": [[20, "module-proteobench.modules.template"]], "proteobench.modules.template.datapoint": [[21, "module-proteobench.modules.template.datapoint"]], "proteobench.modules.template.module": [[22, "module-proteobench.modules.template.module"]], "proteobench.modules.template.parse": [[23, "module-proteobench.modules.template.parse"]], "proteobench.modules.template.parse_settings": [[24, "module-proteobench.modules.template.parse_settings"]], "proteobench.plot": [[25, "module-proteobench.plot"]], "webinterface": [[26, "module-webinterface"], [36, "webinterface"]], "webinterface.pages": [[27, "module-webinterface.pages"]], "webinterface.pages.DDA_ID": [[28, "module-webinterface.pages.DDA_ID"]], "webinterface.pages.DIA_ID": [[29, "module-webinterface.pages.DIA_ID"]], "webinterface.pages.DIA_Quant": [[30, "module-webinterface.pages.DIA_Quant"]], "webinterface.pages.Metaproteomics": [[31, "module-webinterface.pages.Metaproteomics"]], "webinterface.pages.Rescoring": [[32, "module-webinterface.pages.Rescoring"]], "webinterface.streamlit_utils": [[33, "module-webinterface.streamlit_utils"]], "Reference": [[34, "reference"]], "Submodules": [[37, "submodules"]], "webinterface package": [[37, "webinterface-package"]], "User guide": [[35, "user-guide"]], "Installation": [[35, "installation"]], "Local usage": [[35, "local-usage"]], "Development": [[35, "development"]], "Adding a new modules": [[35, "adding-a-new-modules"]], "Naming convention": [[35, "naming-convention"]], "Programmatic structure": [[35, "programmatic-structure"]], "Checklist": [[35, "checklist"]], "Modifying an existing module": [[35, "modifying-an-existing-module"]], "proteobench package": [[3, "proteobench-package"]]}, "indexentries": {"module": [[3, "module-proteobench"]], "proteobench": [[3, "module-proteobench"]]}}) \ No newline at end of file +Search.setIndex({ "docnames": ["changelog", "index", "proteobench/modules", "proteobench/proteobench", "proteobench/proteobench.modules", "reference/_autosummary/proteobench.modules", "reference/_autosummary/proteobench.modules.dda_id", "reference/_autosummary/proteobench.modules.dda_id.module_dda_id", "reference/_autosummary/proteobench.modules.dda_quant", "reference/_autosummary/proteobench.modules.dda_quant.datapoint", "reference/_autosummary/proteobench.modules.dda_quant.module", "reference/_autosummary/proteobench.modules.dda_quant.parse", "reference/_autosummary/proteobench.modules.dda_quant.parse_settings", "reference/_autosummary/proteobench.modules.dia_id", "reference/_autosummary/proteobench.modules.dia_quant", "reference/_autosummary/proteobench.modules.dia_quant.module_dia_quant", "reference/_autosummary/proteobench.modules.metaproteomics", "reference/_autosummary/proteobench.modules.metaproteomics.module_metaproteomics", "reference/_autosummary/proteobench.modules.rescoring", "reference/_autosummary/proteobench.modules.rescoring.module_rescoring", "reference/_autosummary/proteobench.modules.template", "reference/_autosummary/proteobench.modules.template.datapoint", "reference/_autosummary/proteobench.modules.template.module", "reference/_autosummary/proteobench.modules.template.parse", "reference/_autosummary/proteobench.modules.template.parse_settings", "reference/_autosummary/proteobench.plot", "reference/_autosummary/webinterface", "reference/_autosummary/webinterface.pages", "reference/_autosummary/webinterface.pages.DDA_ID", "reference/_autosummary/webinterface.pages.DIA_ID", "reference/_autosummary/webinterface.pages.DIA_Quant", "reference/_autosummary/webinterface.pages.Metaproteomics", "reference/_autosummary/webinterface.pages.Rescoring", "reference/_autosummary/webinterface.streamlit_utils", "reference/index", "user_guide/index", "webinterface/modules", "webinterface/webinterface"], "filenames": ["changelog.rst", "index.rst", "proteobench/modules.rst", "proteobench/proteobench.rst", "proteobench/proteobench.modules.rst", "reference/_autosummary/proteobench.modules.rst", "reference/_autosummary/proteobench.modules.dda_id.rst", "reference/_autosummary/proteobench.modules.dda_id.module_dda_id.rst", "reference/_autosummary/proteobench.modules.dda_quant.rst", "reference/_autosummary/proteobench.modules.dda_quant.datapoint.rst", "reference/_autosummary/proteobench.modules.dda_quant.module.rst", "reference/_autosummary/proteobench.modules.dda_quant.parse.rst", "reference/_autosummary/proteobench.modules.dda_quant.parse_settings.rst", "reference/_autosummary/proteobench.modules.dia_id.rst", "reference/_autosummary/proteobench.modules.dia_quant.rst", "reference/_autosummary/proteobench.modules.dia_quant.module_dia_quant.rst", "reference/_autosummary/proteobench.modules.metaproteomics.rst", "reference/_autosummary/proteobench.modules.metaproteomics.module_metaproteomics.rst", "reference/_autosummary/proteobench.modules.rescoring.rst", "reference/_autosummary/proteobench.modules.rescoring.module_rescoring.rst", "reference/_autosummary/proteobench.modules.template.rst", "reference/_autosummary/proteobench.modules.template.datapoint.rst", "reference/_autosummary/proteobench.modules.template.module.rst", "reference/_autosummary/proteobench.modules.template.parse.rst", "reference/_autosummary/proteobench.modules.template.parse_settings.rst", "reference/_autosummary/proteobench.plot.rst", "reference/_autosummary/webinterface.rst", "reference/_autosummary/webinterface.pages.rst", "reference/_autosummary/webinterface.pages.DDA_ID.rst", "reference/_autosummary/webinterface.pages.DIA_ID.rst", "reference/_autosummary/webinterface.pages.DIA_Quant.rst", "reference/_autosummary/webinterface.pages.Metaproteomics.rst", "reference/_autosummary/webinterface.pages.Rescoring.rst", "reference/_autosummary/webinterface.streamlit_utils.rst", "reference/index.rst", "user_guide/index.rst", "webinterface/modules.rst", "webinterface/webinterface.rst"], "titles": ["Change log", "Welcome to ProteoBench\u2019s documentation!", "proteobench", "proteobench package", "proteobench.modules package", "proteobench.modules", "proteobench.modules.dda_id", "proteobench.modules.dda_id.module_dda_id", "proteobench.modules.dda_quant", "proteobench.modules.dda_quant.datapoint", "proteobench.modules.dda_quant.module", "proteobench.modules.dda_quant.parse", "proteobench.modules.dda_quant.parse_settings", "proteobench.modules.dia_id", "proteobench.modules.dia_quant", "proteobench.modules.dia_quant.module_dia_quant", "proteobench.modules.metaproteomics", "proteobench.modules.metaproteomics.module_metaproteomics", "proteobench.modules.rescoring", "proteobench.modules.rescoring.module_rescoring", "proteobench.modules.template", "proteobench.modules.template.datapoint", "proteobench.modules.template.module", "proteobench.modules.template.parse", "proteobench.modules.template.parse_settings", "proteobench.plot", "webinterface", "webinterface.pages", "webinterface.pages.DDA_ID", "webinterface.pages.DIA_ID", "webinterface.pages.DIA_Quant", "webinterface.pages.Metaproteomics", "webinterface.pages.Rescoring", "webinterface.streamlit_utils", "Reference", "User guide", "webinterface", "webinterface package"], "terms": { "todo": 0, "index": [1, 34], "modul": [1, 2, 26, 27, 34, 37], "search": [1, 4, 34], "page": [1, 34], "packag": [2, 35, 36], "subpackag": 2, "io": [], "content": [2, 37], "plot": [4, 35], "dda_id": 4, "submodul": [4, 36], "module_dda_id": 4, "dda_quant": 4, "datapoint": [4, 35], "pars": [4, 35], "parse_set": 4, "dia_id": 4, "dia_quant": 4, "module_dia_qu": 4, "metaproteom": 4, "module_metaproteom": 4, "rescor": 4, "module_rescor": 4, "templat": [4, 35], "moduleinterfac": [3, 4, 35], "add_current_data_point": [3, 4], "benchmark": [3, 4, 35], "generate_datapoint": [3, 4], "generate_intermedi": [3, 4], "load_input_fil": [3, 4], "parseinputsinterfac": [3, 4, 35], "convert_to_standard_format": [3, 4], "is_impl": 4, "mbr": 4, "calculate_missing_quan_prec": 4, "calculate_plot_data": 4, "dump_json_object": 4, "enzyme_nam": 4, "fdr_peptid": 4, "fdr_protein": 4, "fdr_psm": 4, "fragment_tol_unit": 4, "fragment_tol": 4, "generate_id": 4, "id": 4, "is_temporari": 4, "max_pep_length": 4, "min_pep_length": 4, "missed_cleavag": 4, "nr_prec": 4, "precursor_tol": 4, "precursor_tol_unit": 4, "search_engin": 4, "software_vers": 4, "weighted_sum": 4, "strip_sequence_wombat": 4, "parseinput": [4, 35], "parseset": 4, "calculate_benchmarking_metric_1": 4, "calculate_benchmarking_metric_2": 4, "class": [4, 8, 9, 10, 11, 12, 20, 21, 22, 23, 24, 33, 35], "base": 4, "abc": 4, "abstract": 4, "method": 4, "us": [4, 35], "add": [4, 35], "current": 4, "data": [4, 35], "point": [4, 35], "collect": [4, 35], "run": [4, 35], "comput": 4, "provid": [4, 35], "intermedi": [4, 35], "structur": 4, "calcul": [4, 35], "valu": 4, "from": [4, 35], "upload": [4, 35], "file": [4, 35], "load": 4, "datafram": [4, 35], "depend": 4, "its": [4, 35], "format": [4, 12, 24, 35], "convert": 4, "engin": 4, "output": [4, 35], "gener": [4, 35], "support": 4, "main": [7, 15, 17, 19, 35], "interfac": [7, 15, 17, 19, 35], "bool": [], "return": [], "whether": [], "i": 35, "fulli": [], "implement": 35, "str": [], "none": [], "int": [], "0": [], "fals": [], "da": [], "true": [], "object": 35, "store": [], "df": [], "nr_missing_0": [], "file_nam": [], "librari": [], "within": [], "all_datapoint": [], "current_datapoint": [], "all": [12, 24, 35], "them": [], "empti": [], "Not": [], "clear": [], "why": [], "transpos": [], "here": 35, "input_fil": [], "input_format": [], "user_input": [], "dict": [], "workflow": [], "result": [], "metadata": 35, "filtered_df": [], "replicate_to_raw": [], "take": [], "get": [], "quantif": [], "tupl": [], "measur": [], "reliabl": [], "input_csv": [], "csv": [], "seq": [], "remov": [], "part": [], "peptid": 35, "sequenc": [], "contain": 35, "modif": [], "panda": [], "core": [], "frame": [], "list": [], "avail": [12, 24], "paramet": 35, "given": 35, "databas": [], "experiment": [], "analysi": [], "set": 35, "A": 35, "uniqu": [], "identifi": [], "type": 35, "boolean": [], "flag": [], "indic": [], "temporari": [], "The": 35, "name": [], "experi": [], "version": [], "number": [], "softwar": [], "discoveri": [], "rate": [], "spectrum": [], "match": [], "level": [], "protein": [], "between": [], "wa": [], "enabl": [], "precursor": [], "mass": [], "toler": [], "unit": [], "specifi": [], "either": [], "ppm": [], "fragment": [], "enzym": [], "digest": [], "allow": 35, "miss": [], "cleavag": [], "dure": [], "minimum": [], "length": [], "identif": [], "maximum": [], "weight": [], "sum": [], "score": [], "infer": [], "intermediate_data": [], "first": [], "metric": 35, "dictionari": [], "metric_1": [], "float": [], "second": [], "metric_2": [], "dump": [], "json": 35, "write": [], "represent": [], "append": [], "end": [], "alreadi": [], "exist": [], "attribut": [], "string": [], "compos": [], "timestamp": [], "separ": 35, "underscor": [], "print": [], "stdout": [], "input": [24, 35], "function": [7, 15, 17, 19, 33, 35], "streamlit": [33, 35], "util": 33, "welcom": 35, "proteobench": 35, "can": 35, "python": 35, "usual": 35, "pip": 35, "command": 35, "without": 35, "clone": 35, "repositori": 35, "setup": 35, "script": 35, "git": 35, "http": 35, "github": 35, "com": 35, "cd": 35, "py": 35, "start": 35, "gui": 35, "your": 35, "termin": 35, "follow": 35, "home": 35, "altern": 35, "you": 35, "jupyt": 35, "notebook": 35, "test_workflow": 35, "ipynb": 35, "more": 35, "control": 35, "over": 35, "pleas": 35, "we": 35, "hopefulli": 35, "comprehens": 35, "document": 35, "how": 35, "up": 35, "suggest": 35, "understand": 35, "term": 35, "thei": 35, "ar": 35, "crucial": 35, "compon": 35, "_module_": 35, "code": 35, "definit": 35, "creat": 35, "compar": 35, "_datapoint_": 35, "need": 35, "should": 35, "also": 35, "repres": 35, "_intermediate_": 35, "It": 35, "transform": 35, "annot": 35, "locat": 35, "directori": 35, "differ": 35, "step": 35, "modular": 35, "portabl": 35, "each": 35, "1": 35, "read": 35, "process": 35, "well": 35, "our": 35, "_datapoints_": 35, "2": 35, "standard": 35, "respect": 35, "e": 35, "g": 35, "maxquant": 35, "3": 35, "properti": 35, "acquisit": 35, "fdr": 35, "plotdatapoint": 35, "visual": 35, "5": 35, "defin": 35, "toml": 35, "like": 35, "io_parse_set": 35, "parse_settings_format1": 35, "thi": 35, "meant": 35, "help": 35, "exhaust": 35, "cover": 35, "most": 35, "import": 35, "copi": 35, "folder": 35, "same": 35, "call": 35, "__init__": 35, "just": 35, "my_modul": 35, "my_module_test": 35, "test": 35, "my_module_doc": 35, "my_module_config": 35, "recommend": 35, "keep": 35, "For": 35, "specif": 35, "layout": 35, "streamlit_util": [], "streamlitlogg": [], "hide_streamlit_menu": [], "save_datafram": [], "placehold": [], "logger_nam": [], "accumul": [], "persist": [], "pickup": [], "logger": [], "front": [], "save": [], "cach": [] }, "objects": { "": [[3, 0, 0, "-", "proteobench"], [37, 0, 0, "-", "webinterface"]], "proteobench.modules": [[6, 0, 0, "-", "dda_id"], [13, 0, 0, "-", "dia_id"], [14, 0, 0, "-", "dia_quant"], [16, 0, 0, "-", "metaproteomics"], [18, 0, 0, "-", "rescoring"]], "proteobench.modules.dda_id": [[7, 0, 0, "-", "module_dda_id"]], "proteobench.modules.dia_quant": [[15, 0, 0, "-", "module_dia_quant"]], "proteobench.modules.metaproteomics": [[17, 0, 0, "-", "module_metaproteomics"]], "proteobench.modules.rescoring": [[19, 0, 0, "-", "module_rescoring"]], "proteobench": [[25, 0, 0, "-", "plot"]], "webinterface": [[27, 0, 0, "-", "pages"]], "webinterface.pages": [[28, 0, 0, "-", "DDA_ID"], [29, 0, 0, "-", "DIA_ID"], [30, 0, 0, "-", "DIA_Quant"], [31, 0, 0, "-", "Metaproteomics"], [32, 0, 0, "-", "Rescoring"]] }, "objtypes": { "0": "py:module" }, "objnames": { "0": ["py", "module", "Python module"] }, "titleterms": { "chang": 0, "log": 0, "welcom": 1, "proteobench": [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25], "": 1, "document": 1, "indic": 1, "tabl": 1, "packag": [3, 4, 37], "subpackag": [3, 4], "modul": [3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 35], "content": [3, 4], "io": [], "dda_id": [6, 7, 28], "submodul": 37, "module_dda_id": 7, "dda_quant": [8, 9, 10, 11, 12], "datapoint": [9, 21], "pars": [11, 23], "parse_set": [12, 24], "plot": 25, "dia_id": [13, 29], "dia_quant": [14, 15, 30], "module_dia_qu": 15, "metaproteom": [16, 17, 31], "module_metaproteom": 17, "rescor": [18, 19, 32], "module_rescor": 19, "templat": [20, 21, 22, 23, 24], "webinterfac": [26, 27, 28, 29, 30, 31, 32, 33, 36, 37], "page": [27, 28, 29, 30, 31, 32], "streamlit_util": 33, "refer": 34, "user": 35, "guid": 35, "instal": 35, "local": 35, "usag": 35, "develop": 35, "ad": 35, "new": 35, "name": 35, "convent": 35, "programmat": 35, "structur": 35, "checklist": 35, "modifi": 35, "an": 35, "exist": 35, "home": [] }, "envversion": { "sphinx.domains.c": 2, "sphinx.domains.changeset": 1, "sphinx.domains.citation": 1, "sphinx.domains.cpp": 8, "sphinx.domains.index": 1, "sphinx.domains.javascript": 2, "sphinx.domains.math": 2, "sphinx.domains.python": 3, "sphinx.domains.rst": 2, "sphinx.domains.std": 2, "sphinx": 57 }, "alltitles": { "Change log": [[0, "change-log"]], "Welcome to ProteoBench\u2019s documentation!": [[1, "welcome-to-proteobench-s-documentation"]], "Indices and tables": [[1, "indices-and-tables"]], "proteobench": [[2, "proteobench"]], "Subpackages": [[4, "subpackages"], [3, "subpackages"]], "Module contents": [[4, "module-proteobench.modules"], [3, "module-proteobench"]], "proteobench.modules package": [[4, "proteobench-modules-package"]], "proteobench.modules": [[5, "module-proteobench.modules"]], "proteobench.modules.dda_id": [[6, "module-proteobench.modules.dda_id"]], "proteobench.modules.dda_id.module_dda_id": [[7, "module-proteobench.modules.dda_id.module_dda_id"]], "proteobench.modules.dda_quant": [[8, "module-proteobench.modules.dda_quant"]], "proteobench.modules.dda_quant.datapoint": [[9, "module-proteobench.modules.dda_quant.datapoint"]], "proteobench.modules.dda_quant.module": [[10, "module-proteobench.modules.dda_quant.module"]], "proteobench.modules.dda_quant.parse": [[11, "module-proteobench.modules.dda_quant.parse"]], "proteobench.modules.dda_quant.parse_settings": [[12, "module-proteobench.modules.dda_quant.parse_settings"]], "proteobench.modules.dia_id": [[13, "module-proteobench.modules.dia_id"]], "proteobench.modules.dia_quant": [[14, "module-proteobench.modules.dia_quant"]], "proteobench.modules.dia_quant.module_dia_quant": [[15, "module-proteobench.modules.dia_quant.module_dia_quant"]], "proteobench.modules.metaproteomics": [[16, "module-proteobench.modules.metaproteomics"]], "proteobench.modules.metaproteomics.module_metaproteomics": [[17, "module-proteobench.modules.metaproteomics.module_metaproteomics"]], "proteobench.modules.rescoring": [[18, "module-proteobench.modules.rescoring"]], "proteobench.modules.rescoring.module_rescoring": [[19, "module-proteobench.modules.rescoring.module_rescoring"]], "proteobench.modules.template": [[20, "module-proteobench.modules.template"]], "proteobench.modules.template.datapoint": [[21, "module-proteobench.modules.template.datapoint"]], "proteobench.modules.template.module": [[22, "module-proteobench.modules.template.module"]], "proteobench.modules.template.parse": [[23, "module-proteobench.modules.template.parse"]], "proteobench.modules.template.parse_settings": [[24, "module-proteobench.modules.template.parse_settings"]], "proteobench.plot": [[25, "module-proteobench.plot"]], "webinterface": [[26, "module-webinterface"], [36, "webinterface"]], "webinterface.pages": [[27, "module-webinterface.pages"]], "webinterface.pages.DDA_ID": [[28, "module-webinterface.pages.DDA_ID"]], "webinterface.pages.DIA_ID": [[29, "module-webinterface.pages.DIA_ID"]], "webinterface.pages.DIA_Quant": [[30, "module-webinterface.pages.DIA_Quant"]], "webinterface.pages.Metaproteomics": [[31, "module-webinterface.pages.Metaproteomics"]], "webinterface.pages.Rescoring": [[32, "module-webinterface.pages.Rescoring"]], "webinterface.streamlit_utils": [[33, "module-webinterface.streamlit_utils"]], "Reference": [[34, "reference"]], "Submodules": [[37, "submodules"]], "webinterface package": [[37, "webinterface-package"]], "User guide": [[35, "user-guide"]], "Installation": [[35, "installation"]], "Local usage": [[35, "local-usage"]], "Development": [[35, "development"]], "Adding a new modules": [[35, "adding-a-new-modules"]], "Naming convention": [[35, "naming-convention"]], "Programmatic structure": [[35, "programmatic-structure"]], "Checklist": [[35, "checklist"]], "Modifying an existing module": [[35, "modifying-an-existing-module"]], "proteobench package": [[3, "proteobench-package"]] }, "indexentries": { "module": [[3, "module-proteobench"]], "proteobench": [[3, "module-proteobench"]] } }) \ No newline at end of file diff --git a/test/test_module_dda_quant.py b/test/test_module_dda_quant.py index d34c9ab2..5de4e7af 100644 --- a/test/test_module_dda_quant.py +++ b/test/test_module_dda_quant.py @@ -2,11 +2,11 @@ import os import unittest -import pandas as pd import numpy as np +import pandas as pd from proteobench.github.gh import read_results_json_repo -from proteobench.modules.dda_quant.module import Module +from proteobench.modules.dda_quant.module import Datapoint, Module from proteobench.modules.dda_quant.parse import ParseInputs from proteobench.modules.dda_quant.parse_settings import ( DDA_QUANT_RESULTS_REPO, @@ -14,7 +14,6 @@ ParseSettings, ) from proteobench.modules.dda_quant.plot import PlotDataPoint -from proteobench.modules.dda_quant.module import Datapoint # genereate_input_field @@ -210,14 +209,14 @@ def test_Datapoint_constructor(self): MBR=user_input["mbr"], precursor_tol=user_input["precursor_mass_tolerance"], precursor_tol_unit=user_input["precursor_mass_tolerance_unit"], - fragmnent_tol=user_input["fragment_mass_tolerance"], + fragment_tol=user_input["fragment_mass_tolerance"], fragment_tol_unit=user_input["fragment_mass_tolerance_unit"], enzyme_name=user_input["search_enzyme_name"], missed_cleavages=user_input["allowed_missed_cleavage"], min_pep_length=user_input["min_peptide_length"], max_pep_length=user_input["max_peptide_length"], ) - + if __name__ == "__main__": unittest.main() From 5a95fa04bb58da080e4e5d7a8e6c0d22494da13d Mon Sep 17 00:00:00 2001 From: Henry Date: Sun, 19 Nov 2023 17:39:05 +0100 Subject: [PATCH 13/15] :sparkles: Add parsing of attributes for MaxQuant 1.5 - from 1.6 onwords, information is given explicitly --- proteobench/io/params/maxquant.py | 4 +- test/params/mqpar1.5.3.30_MBR.csv | 60 ++++++++++++++++++++++++++++++ test/params/mqpar1.5.3.30_MBR.json | 60 ++++++++++++++++++++++++++++++ 3 files changed, 123 insertions(+), 1 deletion(-) diff --git a/proteobench/io/params/maxquant.py b/proteobench/io/params/maxquant.py index ec65f42e..35fe15fa 100644 --- a/proteobench/io/params/maxquant.py +++ b/proteobench/io/params/maxquant.py @@ -54,6 +54,8 @@ def add_record(data: dict, tag: str, record) -> dict: def read_xml_record(element: ET.Element) -> dict: """Read entire record in a nested dict structure.""" data = dict() + if element.attrib: + data.update(element.attrib) for child in element: if len(child) > 1 and child.tag: # if there is a list, process each element one by one @@ -125,7 +127,7 @@ def flatten_dict_of_dicts(d: dict, parent_key: str = "") -> dict: def build_Series_from_records(records, index_length=4): records = flatten_dict_of_dicts(records) idx = pd.MultiIndex.from_tuples( - (extend_tuple(k, index_length) for (k, v) in records) + (extend_tuple(k, index_length) for (k, _) in records) ) return pd.Series((v for (k, v) in records), index=idx) diff --git a/test/params/mqpar1.5.3.30_MBR.csv b/test/params/mqpar1.5.3.30_MBR.csv index 051fdbd7..eb160e35 100644 --- a/test/params/mqpar1.5.3.30_MBR.csv +++ b/test/params/mqpar1.5.3.30_MBR.csv @@ -1,4 +1,20 @@ ,,,,run_identifier +aifSilWeight,,,,0 +aifIsoWeight,,,,0 +aifTopx,,,,0 +aifCorrelation,,,,0 +aifCorrelationFirstPass,,,,0 +aifMinMass,,,,0 +aifMsmsTol,,,,0 +aifSecondPass,,,,false +aifIterative,,,,false +aifThresholdFdr,,,,0 +writeMsScansTable,,,,true +writeMsmsScansTable,,,,true +writeMs3ScansTable,,,,true +writeAllPeptidesTable,,,,true +writeMzRangeTable,,,,true +disableMd5,,,,false name,,,,Session1 maxQuantVersion,,,,1.5.3.30 tempFolder,,,, @@ -187,15 +203,59 @@ numberOfCandidatesMultiplexedMsms,,,,25 numberOfCandidatesMsms,,,,15 massDifferenceMods,,,, mainSearchMaxCombinations,,,,200 +msmsParamsArray,msmsParams,Name,,FTMS +msmsParamsArray,msmsParams,MatchToleranceInPpm,,true +msmsParamsArray,msmsParams,DeisotopeToleranceInPpm,,true +msmsParamsArray,msmsParams,DeNovoToleranceInPpm,,true +msmsParamsArray,msmsParams,Deisotope,,true +msmsParamsArray,msmsParams,Topx,,12 +msmsParamsArray,msmsParams,HigherCharges,,true +msmsParamsArray,msmsParams,IncludeWater,,true +msmsParamsArray,msmsParams,IncludeAmmonia,,true +msmsParamsArray,msmsParams,DependentLosses,,true +msmsParamsArray,msmsParams,Recalibration,,false msmsParamsArray,msmsParams,MatchTolerance,,20 msmsParamsArray,msmsParams,DeisotopeTolerance,,7 msmsParamsArray,msmsParams,DeNovoTolerance,,10 +msmsParamsArray,msmsParams,Name,,ITMS +msmsParamsArray,msmsParams,MatchToleranceInPpm,,false +msmsParamsArray,msmsParams,DeisotopeToleranceInPpm,,false +msmsParamsArray,msmsParams,DeNovoToleranceInPpm,,false +msmsParamsArray,msmsParams,Deisotope,,false +msmsParamsArray,msmsParams,Topx,,8 +msmsParamsArray,msmsParams,HigherCharges,,true +msmsParamsArray,msmsParams,IncludeWater,,true +msmsParamsArray,msmsParams,IncludeAmmonia,,true +msmsParamsArray,msmsParams,DependentLosses,,true +msmsParamsArray,msmsParams,Recalibration,,false msmsParamsArray,msmsParams,MatchTolerance,,0.5 msmsParamsArray,msmsParams,DeisotopeTolerance,,0.15 msmsParamsArray,msmsParams,DeNovoTolerance,,0.25 +msmsParamsArray,msmsParams,Name,,TOF +msmsParamsArray,msmsParams,MatchToleranceInPpm,,true +msmsParamsArray,msmsParams,DeisotopeToleranceInPpm,,false +msmsParamsArray,msmsParams,DeNovoToleranceInPpm,,false +msmsParamsArray,msmsParams,Deisotope,,true +msmsParamsArray,msmsParams,Topx,,10 +msmsParamsArray,msmsParams,HigherCharges,,true +msmsParamsArray,msmsParams,IncludeWater,,true +msmsParamsArray,msmsParams,IncludeAmmonia,,true +msmsParamsArray,msmsParams,DependentLosses,,true +msmsParamsArray,msmsParams,Recalibration,,false msmsParamsArray,msmsParams,MatchTolerance,,40 msmsParamsArray,msmsParams,DeisotopeTolerance,,0.01 msmsParamsArray,msmsParams,DeNovoTolerance,,0.02 +msmsParamsArray,msmsParams,Name,,Unknown +msmsParamsArray,msmsParams,MatchToleranceInPpm,,false +msmsParamsArray,msmsParams,DeisotopeToleranceInPpm,,false +msmsParamsArray,msmsParams,DeNovoToleranceInPpm,,false +msmsParamsArray,msmsParams,Deisotope,,false +msmsParamsArray,msmsParams,Topx,,8 +msmsParamsArray,msmsParams,HigherCharges,,true +msmsParamsArray,msmsParams,IncludeWater,,true +msmsParamsArray,msmsParams,IncludeAmmonia,,true +msmsParamsArray,msmsParams,DependentLosses,,true +msmsParamsArray,msmsParams,Recalibration,,false msmsParamsArray,msmsParams,MatchTolerance,,0.5 msmsParamsArray,msmsParams,DeisotopeTolerance,,0.15 msmsParamsArray,msmsParams,DeNovoTolerance,,0.25 diff --git a/test/params/mqpar1.5.3.30_MBR.json b/test/params/mqpar1.5.3.30_MBR.json index c8c8a3f2..29f170bc 100644 --- a/test/params/mqpar1.5.3.30_MBR.json +++ b/test/params/mqpar1.5.3.30_MBR.json @@ -1,4 +1,20 @@ { + "aifSilWeight": "0", + "aifIsoWeight": "0", + "aifTopx": "0", + "aifCorrelation": "0", + "aifCorrelationFirstPass": "0", + "aifMinMass": "0", + "aifMsmsTol": "0", + "aifSecondPass": "false", + "aifIterative": "false", + "aifThresholdFdr": "0", + "writeMsScansTable": "true", + "writeMsmsScansTable": "true", + "writeMs3ScansTable": "true", + "writeAllPeptidesTable": "true", + "writeMzRangeTable": "true", + "disableMd5": "false", "name": "Session1", "maxQuantVersion": "1.5.3.30", "tempFolder": null, @@ -425,6 +441,17 @@ "msmsParamsArray": [ { "msmsParams": { + "Name": "FTMS", + "MatchToleranceInPpm": "true", + "DeisotopeToleranceInPpm": "true", + "DeNovoToleranceInPpm": "true", + "Deisotope": "true", + "Topx": "12", + "HigherCharges": "true", + "IncludeWater": "true", + "IncludeAmmonia": "true", + "DependentLosses": "true", + "Recalibration": "false", "MatchTolerance": "20", "DeisotopeTolerance": "7", "DeNovoTolerance": "10" @@ -432,6 +459,17 @@ }, { "msmsParams": { + "Name": "ITMS", + "MatchToleranceInPpm": "false", + "DeisotopeToleranceInPpm": "false", + "DeNovoToleranceInPpm": "false", + "Deisotope": "false", + "Topx": "8", + "HigherCharges": "true", + "IncludeWater": "true", + "IncludeAmmonia": "true", + "DependentLosses": "true", + "Recalibration": "false", "MatchTolerance": "0.5", "DeisotopeTolerance": "0.15", "DeNovoTolerance": "0.25" @@ -439,6 +477,17 @@ }, { "msmsParams": { + "Name": "TOF", + "MatchToleranceInPpm": "true", + "DeisotopeToleranceInPpm": "false", + "DeNovoToleranceInPpm": "false", + "Deisotope": "true", + "Topx": "10", + "HigherCharges": "true", + "IncludeWater": "true", + "IncludeAmmonia": "true", + "DependentLosses": "true", + "Recalibration": "false", "MatchTolerance": "40", "DeisotopeTolerance": "0.01", "DeNovoTolerance": "0.02" @@ -446,6 +495,17 @@ }, { "msmsParams": { + "Name": "Unknown", + "MatchToleranceInPpm": "false", + "DeisotopeToleranceInPpm": "false", + "DeNovoToleranceInPpm": "false", + "Deisotope": "false", + "Topx": "8", + "HigherCharges": "true", + "IncludeWater": "true", + "IncludeAmmonia": "true", + "DependentLosses": "true", + "Recalibration": "false", "MatchTolerance": "0.5", "DeisotopeTolerance": "0.15", "DeNovoTolerance": "0.25" From f47c6fc1ae4a954d74ce4bc48cb33552e96071b6 Mon Sep 17 00:00:00 2001 From: Henry Date: Sun, 19 Nov 2023 18:11:08 +0100 Subject: [PATCH 14/15] :sparkles: extract ms2 parameter based on specified method - MQ stores several settings in parameter file, which are then applied based on information stored in the rawfile metadata --- proteobench/io/params/maxquant.py | 21 ++- test/params/mqpar1.5.3.30_noMBR.xml | 253 ++++++++++++++++++++++++++++ 2 files changed, 271 insertions(+), 3 deletions(-) create mode 100644 test/params/mqpar1.5.3.30_noMBR.xml diff --git a/proteobench/io/params/maxquant.py b/proteobench/io/params/maxquant.py index 35fe15fa..979e9f40 100644 --- a/proteobench/io/params/maxquant.py +++ b/proteobench/io/params/maxquant.py @@ -132,10 +132,15 @@ def build_Series_from_records(records, index_length=4): return pd.Series((v for (k, v) in records), index=idx) -def extract_params(fname) -> ProteoBenchParameters: +def extract_params(fname, ms2frac="FTMS") -> ProteoBenchParameters: params = ProteoBenchParameters() record = read_file(fname) + # select ms2 fragmentation method specified by parameter + # MaxQuant does this to our knowledge based on the binary rawfile metadata + record["msmsParamsArray"] = [ + d for d in record["msmsParamsArray"] if d["msmsParams"]["Name"] == ms2frac + ] record = build_Series_from_records(record, 4).sort_index() params.search_engine = "Andromeda" params.software_version = record.loc["maxQuantVersion"].squeeze() @@ -147,7 +152,17 @@ def extract_params(fname) -> ProteoBenchParameters: pd.IndexSlice["parameterGroups", "parameterGroup", "mainSearchTol", :] ].squeeze() params.precursor_mass_tolerance = f"{precursor_mass_tolerance} ppm" - fragment_mass_tolerance = None # ! differences between version >1.6 and <=1.5 + # ! differences between version >1.6 and <=1.5 + fragment_mass_tolerance = record.loc[ + pd.IndexSlice["msmsParamsArray", "msmsParams", "MatchTolerance", :] + ].squeeze() + in_ppm = bool( + record.loc[ + pd.IndexSlice["msmsParamsArray", "msmsParams", "MatchToleranceInPpm", :] + ].squeeze() + ) + if in_ppm: + fragment_mass_tolerance = f"{fragment_mass_tolerance} ppm" params.fragment_mass_tolerance = fragment_mass_tolerance params.enzyme = record.loc[ ("parameterGroups", "parameterGroup", "enzymes", "string") @@ -214,5 +229,5 @@ def extract_params(fname) -> ProteoBenchParameters: record = build_Series_from_records(record, 4) record = record.to_frame("run_identifier") record.to_csv(Path(test_file).with_suffix(".csv")) - params = extract_params(test_file) + params = extract_params(test_file, ms2frac="FTMS") pprint(params.__dict__) diff --git a/test/params/mqpar1.5.3.30_noMBR.xml b/test/params/mqpar1.5.3.30_noMBR.xml new file mode 100644 index 00000000..18f177d8 --- /dev/null +++ b/test/params/mqpar1.5.3.30_noMBR.xml @@ -0,0 +1,253 @@ + + + Session1 + 1.5.3.30 + + 3 + false + + false + false + -1.7976931348623157E+308 + 1.7976931348623157E+308 + false + false + false + + /users/user/EuBIC benchmarking + project\MQ15330_noMBR\LFQ_Orbitrap_DDA_Condition_A_Sample_Alpha_01.raw + /users/user/EuBIC benchmarking + project\MQ15330_noMBR\LFQ_Orbitrap_DDA_Condition_A_Sample_Alpha_02.raw + /users/user/EuBIC benchmarking + project\MQ15330_noMBR\LFQ_Orbitrap_DDA_Condition_A_Sample_Alpha_03.raw + /users/user/EuBIC benchmarking + project\MQ15330_noMBR\LFQ_Orbitrap_DDA_Condition_B_Sample_Alpha_01.raw + /users/user/EuBIC benchmarking + project\MQ15330_noMBR\LFQ_Orbitrap_DDA_Condition_B_Sample_Alpha_02.raw + /users/user/EuBIC benchmarking + project\MQ15330_noMBR\LFQ_Orbitrap_DDA_Condition_B_Sample_Alpha_03.raw + + + A_Sample_Alpha_01 + A_Sample_Alpha_02 + A_Sample_Alpha_03 + B_Sample_Alpha_01 + B_Sample_Alpha_02 + B_Sample_Alpha_03 + + + 32767 + 32767 + 32767 + 32767 + 32767 + 32767 + + + 0 + 0 + 0 + 0 + 0 + 0 + + + + 7 + 2 + false + false + true + 1 + NaN + NaN + MatchFromAndTo + 8 + true + 35 + true + 1.4 + false + 500 + 0 + 0 + + + + false + false + 3 + 6 + 100000 + true + false + 2 + true + 0 + 5 + 2 + 1 + + Trypsin/P + + + false + false + + Oxidation (M) + Acetyl (Protein N-term) + + false + + + + false + + + true + 20 + 4.5 + true + 2 + true + 0.6 + 0.6 + true + false + 70 + false + + 0 + 0 + 0 + NaN + NaN + false + NaN + NaN + 0 + 0 + 0 + 0 + true + false + 0 + 0 + 0 + 0 + false + + PeptidesWithCleavedLinker + Standard + 0 + 0 + 0 + + + + Carbamidomethyl (C) + + + /users/user/EuBIC benchmarking + project\MQ15330_noMBR\BenchmarkFASTAModule1_DDA_NOCONTA.fasta + + + + true + false + false + true + true + revert + true + 100 + 4600 + true + true + 0 + 6 + 0 + 40 + true + false + false + false + false + 0 + 0 + false + false + false + false + Species + false + false + true + false + true + false + 7 + 0.01 + 0.01 + 0.01 + 8 + 25 + true + 1 + 1 + 0 + false + true + false + + 2 + true + + Oxidation (M) + Acetyl (Protein N-term) + + 0 + 0 + 25 + 15 + + 200 + + + 20 + 7 + 10 + + + 0.5 + 0.15 + 0.25 + + + 40 + 0.01 + 0.02 + + + 0.5 + 0.15 + 0.25 + + + 0 + 1 + none + \ No newline at end of file From 769624a74a384185cd12e1f73a6db8a0de398f93 Mon Sep 17 00:00:00 2001 From: Henry Date: Sun, 19 Nov 2023 19:19:27 +0100 Subject: [PATCH 15/15] :construction_worker: - see open PR #95 --- .github/workflows/python-package.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/python-package.yml b/.github/workflows/python-package.yml index 42a9cd11..d0797a61 100644 --- a/.github/workflows/python-package.yml +++ b/.github/workflows/python-package.yml @@ -16,7 +16,7 @@ jobs: strategy: fail-fast: false matrix: - python-version: ["3.7", "3.8", "3.9", "3.10"] + python-version: ["3.8", "3.9", "3.10", "3.11"] steps: - uses: actions/checkout@v3