Skip to content

Commit

Permalink
Remove redundant bytes conversion during truncation
Browse files Browse the repository at this point in the history
  • Loading branch information
devonh committed Jun 24, 2024
1 parent 11916b5 commit 1897bc1
Showing 1 changed file with 12 additions and 7 deletions.
19 changes: 12 additions & 7 deletions sygnal/gcmpushkin.py
Original file line number Diff line number Diff line change
Expand Up @@ -708,18 +708,20 @@ def _build_data(
if data[attr] is not None and isinstance(data[attr], str):
# The only `attr` that shouldn't be of type `str` is `content`,
# which is handled explicitly later on.
if len(bytes(data[attr], "utf-8")) > MAX_BYTES_PER_FIELD:
data[attr], truncated = truncate_str(
data[attr], MAX_BYTES_PER_FIELD
)
if truncated:
overflow_fields += 1
data[attr] = truncate_str(data[attr], MAX_BYTES_PER_FIELD)

if api_version is APIVersion.V1:
if isinstance(data.get("content"), dict):
for attr, value in data["content"].items():
if not isinstance(value, str):
continue
if len(bytes(value, "utf-8")) > MAX_BYTES_PER_FIELD:
value, truncated = truncate_str(value, MAX_BYTES_PER_FIELD)
if truncated:
overflow_fields += 1
value = truncate_str(value, MAX_BYTES_PER_FIELD)
data["content_" + attr] = value
del data["content"]

Expand All @@ -743,9 +745,12 @@ def _build_data(
return data


def truncate_str(input: str, max_length: int) -> str:
def truncate_str(input: str, max_length: int) -> tuple[str, bool]:
str_bytes = input.encode("utf-8")
if len(str_bytes) <= max_length:
return (input, False)

try:
return str_bytes[: max_length - 3].decode("utf-8") + "…"
return (str_bytes[: max_length - 3].decode("utf-8") + "…", True)
except UnicodeDecodeError as err:
return str_bytes[: err.start].decode("utf-8") + "…"
return (str_bytes[: err.start].decode("utf-8") + "…", True)

0 comments on commit 1897bc1

Please sign in to comment.