Skip to content

Commit

Permalink
Improve algo to find UNA when the file does not start by UNA
Browse files Browse the repository at this point in the history
  • Loading branch information
pulse-mind committed Oct 18, 2024
1 parent 3724dc1 commit 303cfd5
Showing 1 changed file with 13 additions and 3 deletions.
16 changes: 13 additions & 3 deletions pydifact/parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,15 +52,25 @@ def parse(
# If there is a UNA, take the following 6 characters
# unconditionally, strip them, and make control Characters()
# for further parsing
idx_una = message.find("UNA")

# If it starts by UNA
una_pattern = "UNA"
if message.startswith(una_pattern):
idx_una = 0
# Otherwise we look for UNA, so to avoid finding "lorem ipsum UNA lorem ipsum" we look for the segment separator following by UNA.
else:
una_pattern = "'UNA"
idx_una = message.find(una_pattern)
una_found = idx_una != -1

if una_found:
characters = Characters.from_str("UNA" + message[idx_una + 3: idx_una + 9])
idx_begin = idx_una + len(una_pattern)
idx_end = idx_begin + 6
characters = Characters.from_str(f"UNA{message[idx_begin: idx_end]}")

# remove the UNA segment from the string,
# ignore everything before UNA because it should be the first segment if una_found.
message = message[idx_una + 9 :].lstrip("\r\n")
message = message[idx_end:].lstrip("\r\n")

else:
# if no UNA header present, use default control characters
Expand Down

0 comments on commit 303cfd5

Please sign in to comment.