Skip to content

Commit

Permalink
Implement final to do's
Browse files Browse the repository at this point in the history
- terminal modifications
- is_decoy field
- use measured ion mobility
- use tims_score
  • Loading branch information
RalfG committed Mar 25, 2024
1 parent a0dc262 commit a2e4b6a
Show file tree
Hide file tree
Showing 2 changed files with 23 additions and 11 deletions.
27 changes: 18 additions & 9 deletions psm_utils/io/timscore.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
"""Reader for Proteome Discoverer MSF PSM files."""
"""Reader for TIMScore Parquet files."""

import logging
import re
from pathlib import Path
from typing import Union

Expand All @@ -9,6 +10,7 @@

from psm_utils import PSM
from psm_utils.io._base_classes import ReaderBase
from psm_utils.peptidoform import format_number_as_string

logger = logging.getLogger(__name__)

Expand All @@ -32,6 +34,7 @@ def __init__(
"""
super().__init__(filename, *args, **kwargs)
self._decoy_pattern = re.compile(r"^Reverse_")

self.data = pd.read_parquet(self.filename)

Expand All @@ -47,11 +50,11 @@ def __iter__(self):
entry.stripped_peptide, entry.ptms, entry.ptm_locations, entry.precursor_charge
),
spectrum_id=entry.ms2_id,
is_decoy=None, # TODO: Parse from protein?
score=entry.tims_score, # TODO: Correct score?
is_decoy=all(self._decoy_pattern.match(p) for p in entry.locus_name),
score=entry.tims_score,
precursor_mz=entry.precursor_mz,
retention_time=entry.rt,
ion_mobility=entry.corrected_ook0,
ion_mobility=entry.ook0,
protein_list=list(entry.locus_name),
rank=entry.rank,
source="TIMScore",
Expand All @@ -63,7 +66,7 @@ def __iter__(self):
metadata={
"leading_aa": str(entry.leading_aa),
"trailing_aa": str(entry.trailing_aa),
"ook0": str(entry.ook0),
"corrected_ook0": str(entry.corrected_ook0),
},
rescoring_features={
"x_corr_score": float(entry.x_corr_score),
Expand All @@ -81,9 +84,15 @@ def _parse_peptidoform(
stripped_peptide: str, ptms: np.ndarray, ptm_locations: np.ndarray, precursor_charge: int
) -> str:
"""Parse peptide sequence and modifications to ProForma."""
# TODO: How are terminal modifications handled?
peptidoform = list(stripped_peptide)
n_term = ""
c_term = ""
for ptm, ptm_location in zip(ptms, ptm_locations):
peptidoform[ptm_location] = f"{peptidoform[ptm_location]}[{ptm}]"
peptidoform.append(f"/{precursor_charge}")
return "".join(peptidoform)
ptm = format_number_as_string(ptm)
if ptm_location == -1:
n_term = f"[{ptm}]-"
elif ptm_location == len(peptidoform):
c_term = f"-[{ptm}]"
else:
peptidoform[ptm_location] = f"{peptidoform[ptm_location]}[{ptm}]"
return f"{n_term}{''.join(peptidoform)}{c_term}/{precursor_charge}"
7 changes: 5 additions & 2 deletions tests/test_io/test_timscore.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,11 @@
def test_parse_peptidoform():
test_cases = [
("ACDMEK", np.array([]), np.array([]), 2, "ACDMEK/2"),
("ACDMEK", np.array([15.99]), np.array([3]), 2, "ACDM[15.99]EK/2"),
("ACDMEK", np.array([57.02, 15.99]), np.array([1, 3]), 2, "AC[57.02]DM[15.99]EK/2"),
("ACDMEK", np.array([15.99]), np.array([3]), 2, "ACDM[+15.99]EK/2"),
("ACDMEK", np.array([57.02, 15.99]), np.array([1, 3]), 2, "AC[+57.02]DM[+15.99]EK/2"),
("ACDMEK", np.array([42.01]), np.array([-1]), 2, "[+42.01]-ACDMEK/2"),
("ACDMEK", np.array([-0.98]), np.array([6]), 2, "ACDMEK-[-0.98]/2"),
("ACDMEK", np.array([42.01, -0.98]), np.array([-1, 6]), 2, "[+42.01]-ACDMEK-[-0.98]/2"),
]

for peptide, ptms, ptm_locations, precursor_charge, expected in test_cases:
Expand Down

0 comments on commit a2e4b6a

Please sign in to comment.