Skip to content

Commit

Permalink
Fix: Prevent splitting multi-byte characters in message snippets (#404)
Browse files Browse the repository at this point in the history
  • Loading branch information
axllent committed Dec 10, 2024
1 parent b1c745f commit b27a28c
Showing 1 changed file with 27 additions and 2 deletions.
29 changes: 27 additions & 2 deletions internal/tools/snippets.go
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ func CreateSnippet(text, html string) string {
return data
}

return data[0:limit] + "..."
return truncate(data, limit) + "..."
}

if text != "" {
Expand All @@ -37,8 +37,33 @@ func CreateSnippet(text, html string) string {
return text
}

return text[0:limit] + "..."
return truncate(text, limit) + "..."
}

return ""
}

// Truncate a string allowing for multi-byte encoding.
// Shamelessly borrowed from Tailscale.
// See https://github.com/tailscale/tailscale/blob/main/util/truncate/truncate.go
func truncate(s string, n int) string {
if n >= len(s) {
return s
}

// Back up until we find the beginning of a UTF-8 encoding.
for n > 0 && s[n-1]&0xc0 == 0x80 { // 0x10... is a continuation byte
n--
}

// If we're at the beginning of a multi-byte encoding, back up one more to
// skip it. It's possible the value was already complete, but it's simpler
// if we only have to check in one direction.
//
// Otherwise, we have a single-byte code (0x00... or 0x01...).
if n > 0 && s[n-1]&0xc0 == 0xc0 { // 0x11... starts a multibyte encoding
n--
}

return s[:n]
}

0 comments on commit b27a28c

Please sign in to comment.