diff --git a/CHANGELOG.md b/CHANGELOG.md index 1d3c418..338597f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,24 @@ All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). +## [0.7.4] - 2024-03-18 + +### Added + +- `Peptidoform`: Support adding and applying global terminal modifications. For now using a + workaround while waiting for official support and an implementation in Pyteomics. See + HUPO-PSI/ProForma#6. + +## [0.7.3] - 2024-03-04 + +### Changed + +- `io.xtandem`: Parse double mass modifications as double modification instead of merging and + summing mass shifts into a single modification. +- `io.xtandem`: Avoid float formatting issues when parsing modification mass label. +- `io.xtandem`: Parse all proteins into `protein_list` instead of only the first one. +- `io.tsv`: Log error instead of raising exception when a TSV row cannot be parsed. + ## [0.7.2] - 2023-11-29 ### Fixed diff --git a/psm_utils/__init__.py b/psm_utils/__init__.py index 2cce631..cf3cb66 100644 --- a/psm_utils/__init__.py +++ b/psm_utils/__init__.py @@ -1,6 +1,6 @@ """Common utilities for parsing and handling PSMs, and search engine results.""" -__version__ = "0.7.3" +__version__ = "0.7.4" from warnings import filterwarnings @@ -12,6 +12,6 @@ module="psims.mzmlb", ) -from psm_utils.peptidoform import Peptidoform -from psm_utils.psm import PSM -from psm_utils.psm_list import PSMList +from psm_utils.peptidoform import Peptidoform # noqa: E402, F401 +from psm_utils.psm import PSM # noqa: E402, F401 +from psm_utils.psm_list import PSMList # noqa: E402, F401 diff --git a/psm_utils/io/maxquant.py b/psm_utils/io/maxquant.py index b34c25d..99b13a1 100644 --- a/psm_utils/io/maxquant.py +++ b/psm_utils/io/maxquant.py @@ -14,7 +14,6 @@ from psm_utils.io._base_classes import ReaderBase from psm_utils.peptidoform import Peptidoform from psm_utils.psm import PSM -from psm_utils.psm_list import PSMList logger = logging.getLogger(__name__) diff --git a/psm_utils/io/tsv.py b/psm_utils/io/tsv.py index ca3105e..9fc0089 100644 --- a/psm_utils/io/tsv.py +++ b/psm_utils/io/tsv.py @@ -73,7 +73,7 @@ def __iter__(self): for row in reader: try: yield PSM(**self._parse_entry(row)) - except ValidationError as e: + except ValidationError: logger.warning("Could not parse PSM from row: `{row}`") continue diff --git a/psm_utils/peptidoform.py b/psm_utils/peptidoform.py index f56f98f..cb0482a 100644 --- a/psm_utils/peptidoform.py +++ b/psm_utils/peptidoform.py @@ -1,5 +1,6 @@ from __future__ import annotations +from collections import defaultdict from typing import Iterable, List, Tuple, Union import numpy as np @@ -14,7 +15,7 @@ class Peptidoform: Peptide sequence, modifications and charge state represented in ProForma notation. """ - def __init__(self, proforma_sequence: [str, proforma.ProForma]) -> None: + def __init__(self, proforma_sequence: Union[str, proforma.ProForma]) -> None: """ Peptide sequence, modifications and charge state represented in ProForma notation. @@ -454,7 +455,7 @@ def add_fixed_modifications( See also -------- - psm_utils.peptidoform.Peptidoform.add_fixed_modifications + psm_utils.peptidoform.Peptidoform.apply_fixed_modifications Examples -------- @@ -463,6 +464,14 @@ def add_fixed_modifications( >>> peptidoform.proforma '<[Carbamidomethyl]@C>ATPEILTCNSIGCLK' + Notes + ----- + While globally defined terminal modifications are not explicitly supported in ProForma v2, + this function supports adding terminal modifications using the ``N-term`` and ``C-term`` + targets in place of an amino acid target. These global modifications are supported in the + :py:meth:`psm_utils.peptidoform.Peptidoform.apply_fixed_modifications` method through a + workaround. See https://github.com/HUPO-PSI/ProForma/issues/6 for discussions on the issue. + """ if isinstance(modification_rules, dict): modification_rules = modification_rules.items() @@ -497,13 +506,10 @@ def apply_fixed_modifications(self): """ if self.properties["fixed_modifications"]: # Setup target_aa -> modification_list dictionary - rule_dict = {} + rule_dict = defaultdict(list) for rule in self.properties["fixed_modifications"]: for target_aa in rule.targets: - try: - rule_dict[target_aa].append(rule.modification_tag) - except KeyError: - rule_dict[target_aa] = [rule.modification_tag] + rule_dict[target_aa].append(rule.modification_tag) # Apply modifications to sequence for i, (aa, site_mods) in enumerate(self.parsed_sequence): @@ -513,6 +519,14 @@ def apply_fixed_modifications(self): else: self.parsed_sequence[i] = (aa, rule_dict[aa]) + # Apply terminal modifications + for term, term_name in [("n_term", "N-term"), ("c_term", "C-term")]: + if term_name in rule_dict: + if self.properties[term]: + self.properties[term].extend(rule_dict[term_name]) + else: + self.properties[term] = rule_dict[term_name] + # Remove fixed modifications self.properties["fixed_modifications"] = [] @@ -526,7 +540,6 @@ def format_number_as_string(num): return plus + num - class PeptidoformException(PSMUtilsException): """Error while handling :py:class:`Peptidoform`.""" diff --git a/tests/test_peptidoform.py b/tests/test_peptidoform.py index ccdb6cd..f36e3a2 100644 --- a/tests/test_peptidoform.py +++ b/tests/test_peptidoform.py @@ -51,6 +51,20 @@ def test_rename_modifications(self): peptidoform.rename_modifications(label_mapping) assert peptidoform.proforma == expected_out + def test_add_apply_fixed_modifications(self): + test_cases = [ + ("ACDEK", [("Cmm", ["C"])], "AC[Cmm]DEK"), + ("AC[Cmm]DEK", [("SecondMod", ["C"])], "AC[Cmm][SecondMod]DEK"), + ("ACDEK", [("TMT6plex", ["K", "N-term"])], "[TMT6plex]-ACDEK[TMT6plex]"), + ("ACDEK-[CT]", [("TMT6plex", ["K", "N-term"])], "[TMT6plex]-ACDEK[TMT6plex]-[CT]"), + ] + + for test_case_in, fixed_modifications, expected_out in test_cases: + peptidoform = Peptidoform(test_case_in) + peptidoform.add_fixed_modifications(fixed_modifications) + peptidoform.apply_fixed_modifications() + assert peptidoform.proforma == expected_out + def test_format_number_as_string(): test_cases = [