Skip to content

Commit

Permalink
TSV: Raise exception if three consecutive rows cannot be parsed
Browse files Browse the repository at this point in the history
  • Loading branch information
RalfG committed Aug 14, 2024
1 parent 8438dbc commit f6e4086
Show file tree
Hide file tree
Showing 3 changed files with 35 additions and 3 deletions.
15 changes: 12 additions & 3 deletions psm_utils/io/tsv.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,10 +57,10 @@
from pydantic import ValidationError

from psm_utils.io._base_classes import ReaderBase, WriterBase
from psm_utils.io._utils import set_csv_field_size_limit
from psm_utils.io.exceptions import PSMUtilsIOException
from psm_utils.psm import PSM
from psm_utils.psm_list import PSMList
from psm_utils.io._utils import set_csv_field_size_limit

set_csv_field_size_limit()

Expand All @@ -74,12 +74,21 @@ def __iter__(self):
"""Iterate over file and return PSMs one-by-one."""
with open(self.filename, "rt") as open_file:
reader = csv.DictReader(open_file, delimiter="\t")
failed_rows = 0
for row in reader:
try:
yield PSM(**self._parse_entry(row))
except ValidationError:
except ValidationError as e:
failed_rows += 1
logger.warning(f"Could not parse PSM from row: `{row}`")
continue
if failed_rows >= 3:
raise PSMUtilsIOException(
"Could not parse PSM from three consecutive rows. Verify that the "
"file is formatted correctly as a psm_utils TSV file or that the "
"correct file type reader is used."
) from e
else:
failed_rows = 0

@staticmethod
def _parse_entry(entry: dict) -> dict:
Expand Down
4 changes: 4 additions & 0 deletions tests/test_data/test.tsv
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
spectrum_id peptidoform
peptide1 ACDEK/2
peptide2 AC[Carbamidomethyl]DEFGR/3
peptide3 [Acetyl]-AC[Carbamidomethyl]DEFGHIK/2
19 changes: 19 additions & 0 deletions tests/test_io/test_tsv.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,8 @@
"""Tests for psm_utils.io.tsv."""

import pytest

from psm_utils.io.exceptions import PSMUtilsIOException # noqa: F401
from psm_utils.io.tsv import TSVReader, TSVWriter # noqa: F401

test_cases = [
Expand Down Expand Up @@ -30,3 +33,19 @@ class TestTSVReader:
def test__parse_entry(self):
for test_in, expected_out in test_cases:
assert TSVReader._parse_entry(test_in) == expected_out

def test_iter(self):
reader = TSVReader("tests/test_data/test.tsv")
for psm in reader:
assert psm.peptidoform == "ACDEK/2"
assert psm.spectrum_id == "peptide1"
assert psm.provenance_data == {}
assert psm.metadata == {}
assert psm.rescoring_features == {}
break

def test_iter_raises(self):
with TSVReader("tests/test_data/peprec.tsv") as reader:
with pytest.raises(PSMUtilsIOException):
for psm in reader:
pass

0 comments on commit f6e4086

Please sign in to comment.