Skip to content

Commit

Permalink
Add parse method to CigarHit
Browse files Browse the repository at this point in the history
  • Loading branch information
Donaim committed Oct 11, 2024
1 parent a8c962d commit d539187
Showing 1 changed file with 51 additions and 0 deletions.
51 changes: 51 additions & 0 deletions src/aligntools/cigar_hit.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
from dataclasses import dataclass
from functools import cached_property, reduce
from fractions import Fraction
import re

from aligntools.coordinate_mapping import CoordinateMapping
from aligntools.cigar_actions import CigarActions
Expand All @@ -19,6 +20,11 @@ def intervals_overlap(x: Tuple[int, int], y: Tuple[int, int]) -> bool:
return x[0] <= y[1] and x[1] >= y[0]


parse_expr = re.compile(r'(?P<cigar>.+)@'
r'\[(?P<q_st>\d+),(?P<q_ei>\d+)\]->'
r'\[(?P<r_st>\d+),(?P<r_ei>\d+)\]')


@dataclass(frozen=True)
class CigarHit:
"""
Expand Down Expand Up @@ -332,6 +338,51 @@ def translate(self, reference_delta: int, query_delta: int) -> 'CigarHit':
q_st=self.q_st + query_delta,
q_ei=self.q_ei + query_delta)

@staticmethod
def parse_cigar_hit(string: str) -> 'CigarHit':
"""
Parses a string representation of a CigarHit
and returns a CigarHit object.
This method is inverse of CigarHit.__str__.
:param hit_str: A string representation of a CigarHit.
:return: CigarHit object parsed from the input string.
:raises ParseError: If the string cannot be parsed into a CigarHit.
"""

# Regular expression to match the structure of a serialized CigarHit
match = parse_expr.match(string)

if not match:
raise ex.ParseError(f"Invalid CigarHit string format: {string!r}.")

try:
# Extracting components from the matched regex groups
cigar_str = match.group('cigar')
q_st = int(match.group('q_st'))
q_ei = int(match.group('q_ei'))
r_st = int(match.group('r_st'))
r_ei = int(match.group('r_ei'))
except ValueError as e:
raise ex.ParseError(f"Error parsing indices in: {string!r}.") \
from e

# Validating that start indices
# are less than or equal to end indices.
if q_st > q_ei + 1:
raise ex.ParseError(
f"Query start index ({q_st}) "
f"greater than end index ({q_ei} + 1) in: {string!r}.")

if r_st > r_ei + 1:
raise ex.ParseError(
f"Reference start index ({r_st}) "
f"greater than end index ({r_ei} + 1) in: {string!r}.")

cigar: Cigar = Cigar.coerce(cigar_str)
return CigarHit(cigar, r_st, r_ei, q_st, q_ei)

def __repr__(self):
return 'CigarHit(%r, r_st=%r, r_ei=%r, q_st=%r, q_ei=%r)' \
% (self.cigar, self.r_st, self.r_ei, self.q_st, self.q_ei)
Expand Down

0 comments on commit d539187

Please sign in to comment.