From 5a95fa04bb58da080e4e5d7a8e6c0d22494da13d Mon Sep 17 00:00:00 2001 From: Henry Date: Sun, 19 Nov 2023 17:39:05 +0100 Subject: [PATCH] :sparkles: Add parsing of attributes for MaxQuant 1.5 - from 1.6 onwords, information is given explicitly --- proteobench/io/params/maxquant.py | 4 +- test/params/mqpar1.5.3.30_MBR.csv | 60 ++++++++++++++++++++++++++++++ test/params/mqpar1.5.3.30_MBR.json | 60 ++++++++++++++++++++++++++++++ 3 files changed, 123 insertions(+), 1 deletion(-) diff --git a/proteobench/io/params/maxquant.py b/proteobench/io/params/maxquant.py index ec65f42e..35fe15fa 100644 --- a/proteobench/io/params/maxquant.py +++ b/proteobench/io/params/maxquant.py @@ -54,6 +54,8 @@ def add_record(data: dict, tag: str, record) -> dict: def read_xml_record(element: ET.Element) -> dict: """Read entire record in a nested dict structure.""" data = dict() + if element.attrib: + data.update(element.attrib) for child in element: if len(child) > 1 and child.tag: # if there is a list, process each element one by one @@ -125,7 +127,7 @@ def flatten_dict_of_dicts(d: dict, parent_key: str = "") -> dict: def build_Series_from_records(records, index_length=4): records = flatten_dict_of_dicts(records) idx = pd.MultiIndex.from_tuples( - (extend_tuple(k, index_length) for (k, v) in records) + (extend_tuple(k, index_length) for (k, _) in records) ) return pd.Series((v for (k, v) in records), index=idx) diff --git a/test/params/mqpar1.5.3.30_MBR.csv b/test/params/mqpar1.5.3.30_MBR.csv index 051fdbd7..eb160e35 100644 --- a/test/params/mqpar1.5.3.30_MBR.csv +++ b/test/params/mqpar1.5.3.30_MBR.csv @@ -1,4 +1,20 @@ ,,,,run_identifier +aifSilWeight,,,,0 +aifIsoWeight,,,,0 +aifTopx,,,,0 +aifCorrelation,,,,0 +aifCorrelationFirstPass,,,,0 +aifMinMass,,,,0 +aifMsmsTol,,,,0 +aifSecondPass,,,,false +aifIterative,,,,false +aifThresholdFdr,,,,0 +writeMsScansTable,,,,true +writeMsmsScansTable,,,,true +writeMs3ScansTable,,,,true +writeAllPeptidesTable,,,,true +writeMzRangeTable,,,,true +disableMd5,,,,false name,,,,Session1 maxQuantVersion,,,,1.5.3.30 tempFolder,,,, @@ -187,15 +203,59 @@ numberOfCandidatesMultiplexedMsms,,,,25 numberOfCandidatesMsms,,,,15 massDifferenceMods,,,, mainSearchMaxCombinations,,,,200 +msmsParamsArray,msmsParams,Name,,FTMS +msmsParamsArray,msmsParams,MatchToleranceInPpm,,true +msmsParamsArray,msmsParams,DeisotopeToleranceInPpm,,true +msmsParamsArray,msmsParams,DeNovoToleranceInPpm,,true +msmsParamsArray,msmsParams,Deisotope,,true +msmsParamsArray,msmsParams,Topx,,12 +msmsParamsArray,msmsParams,HigherCharges,,true +msmsParamsArray,msmsParams,IncludeWater,,true +msmsParamsArray,msmsParams,IncludeAmmonia,,true +msmsParamsArray,msmsParams,DependentLosses,,true +msmsParamsArray,msmsParams,Recalibration,,false msmsParamsArray,msmsParams,MatchTolerance,,20 msmsParamsArray,msmsParams,DeisotopeTolerance,,7 msmsParamsArray,msmsParams,DeNovoTolerance,,10 +msmsParamsArray,msmsParams,Name,,ITMS +msmsParamsArray,msmsParams,MatchToleranceInPpm,,false +msmsParamsArray,msmsParams,DeisotopeToleranceInPpm,,false +msmsParamsArray,msmsParams,DeNovoToleranceInPpm,,false +msmsParamsArray,msmsParams,Deisotope,,false +msmsParamsArray,msmsParams,Topx,,8 +msmsParamsArray,msmsParams,HigherCharges,,true +msmsParamsArray,msmsParams,IncludeWater,,true +msmsParamsArray,msmsParams,IncludeAmmonia,,true +msmsParamsArray,msmsParams,DependentLosses,,true +msmsParamsArray,msmsParams,Recalibration,,false msmsParamsArray,msmsParams,MatchTolerance,,0.5 msmsParamsArray,msmsParams,DeisotopeTolerance,,0.15 msmsParamsArray,msmsParams,DeNovoTolerance,,0.25 +msmsParamsArray,msmsParams,Name,,TOF +msmsParamsArray,msmsParams,MatchToleranceInPpm,,true +msmsParamsArray,msmsParams,DeisotopeToleranceInPpm,,false +msmsParamsArray,msmsParams,DeNovoToleranceInPpm,,false +msmsParamsArray,msmsParams,Deisotope,,true +msmsParamsArray,msmsParams,Topx,,10 +msmsParamsArray,msmsParams,HigherCharges,,true +msmsParamsArray,msmsParams,IncludeWater,,true +msmsParamsArray,msmsParams,IncludeAmmonia,,true +msmsParamsArray,msmsParams,DependentLosses,,true +msmsParamsArray,msmsParams,Recalibration,,false msmsParamsArray,msmsParams,MatchTolerance,,40 msmsParamsArray,msmsParams,DeisotopeTolerance,,0.01 msmsParamsArray,msmsParams,DeNovoTolerance,,0.02 +msmsParamsArray,msmsParams,Name,,Unknown +msmsParamsArray,msmsParams,MatchToleranceInPpm,,false +msmsParamsArray,msmsParams,DeisotopeToleranceInPpm,,false +msmsParamsArray,msmsParams,DeNovoToleranceInPpm,,false +msmsParamsArray,msmsParams,Deisotope,,false +msmsParamsArray,msmsParams,Topx,,8 +msmsParamsArray,msmsParams,HigherCharges,,true +msmsParamsArray,msmsParams,IncludeWater,,true +msmsParamsArray,msmsParams,IncludeAmmonia,,true +msmsParamsArray,msmsParams,DependentLosses,,true +msmsParamsArray,msmsParams,Recalibration,,false msmsParamsArray,msmsParams,MatchTolerance,,0.5 msmsParamsArray,msmsParams,DeisotopeTolerance,,0.15 msmsParamsArray,msmsParams,DeNovoTolerance,,0.25 diff --git a/test/params/mqpar1.5.3.30_MBR.json b/test/params/mqpar1.5.3.30_MBR.json index c8c8a3f2..29f170bc 100644 --- a/test/params/mqpar1.5.3.30_MBR.json +++ b/test/params/mqpar1.5.3.30_MBR.json @@ -1,4 +1,20 @@ { + "aifSilWeight": "0", + "aifIsoWeight": "0", + "aifTopx": "0", + "aifCorrelation": "0", + "aifCorrelationFirstPass": "0", + "aifMinMass": "0", + "aifMsmsTol": "0", + "aifSecondPass": "false", + "aifIterative": "false", + "aifThresholdFdr": "0", + "writeMsScansTable": "true", + "writeMsmsScansTable": "true", + "writeMs3ScansTable": "true", + "writeAllPeptidesTable": "true", + "writeMzRangeTable": "true", + "disableMd5": "false", "name": "Session1", "maxQuantVersion": "1.5.3.30", "tempFolder": null, @@ -425,6 +441,17 @@ "msmsParamsArray": [ { "msmsParams": { + "Name": "FTMS", + "MatchToleranceInPpm": "true", + "DeisotopeToleranceInPpm": "true", + "DeNovoToleranceInPpm": "true", + "Deisotope": "true", + "Topx": "12", + "HigherCharges": "true", + "IncludeWater": "true", + "IncludeAmmonia": "true", + "DependentLosses": "true", + "Recalibration": "false", "MatchTolerance": "20", "DeisotopeTolerance": "7", "DeNovoTolerance": "10" @@ -432,6 +459,17 @@ }, { "msmsParams": { + "Name": "ITMS", + "MatchToleranceInPpm": "false", + "DeisotopeToleranceInPpm": "false", + "DeNovoToleranceInPpm": "false", + "Deisotope": "false", + "Topx": "8", + "HigherCharges": "true", + "IncludeWater": "true", + "IncludeAmmonia": "true", + "DependentLosses": "true", + "Recalibration": "false", "MatchTolerance": "0.5", "DeisotopeTolerance": "0.15", "DeNovoTolerance": "0.25" @@ -439,6 +477,17 @@ }, { "msmsParams": { + "Name": "TOF", + "MatchToleranceInPpm": "true", + "DeisotopeToleranceInPpm": "false", + "DeNovoToleranceInPpm": "false", + "Deisotope": "true", + "Topx": "10", + "HigherCharges": "true", + "IncludeWater": "true", + "IncludeAmmonia": "true", + "DependentLosses": "true", + "Recalibration": "false", "MatchTolerance": "40", "DeisotopeTolerance": "0.01", "DeNovoTolerance": "0.02" @@ -446,6 +495,17 @@ }, { "msmsParams": { + "Name": "Unknown", + "MatchToleranceInPpm": "false", + "DeisotopeToleranceInPpm": "false", + "DeNovoToleranceInPpm": "false", + "Deisotope": "false", + "Topx": "8", + "HigherCharges": "true", + "IncludeWater": "true", + "IncludeAmmonia": "true", + "DependentLosses": "true", + "Recalibration": "false", "MatchTolerance": "0.5", "DeisotopeTolerance": "0.15", "DeNovoTolerance": "0.25"