Skip to content

Commit

Permalink
Bugix infinite loop in term.wrap() with 1 length
Browse files Browse the repository at this point in the history
From #273,

> The following code enters an infinite loop:

    import blessed
    blessed.Terminal().wrap('\u5973', 1)

This fixes by explicit test: when the given individual sequence is
of length '2', and the width is '1', and the cur_len is '0', we cannot
break down this "Wide" character any further -- so it is allowed to
flow outside the given cell.

- 'faulthandler_timeout = 30' is added to [pytest] in tox.ini,
- Tests for East-Asian, Emoji, and ZWJ are added
- Further noting that blessed gets ZWJ wrong
  • Loading branch information
jquast committed Jun 26, 2024
1 parent c28b53f commit 593c286
Show file tree
Hide file tree
Showing 4 changed files with 57 additions and 9 deletions.
30 changes: 22 additions & 8 deletions blessed/sequences.py
Original file line number Diff line number Diff line change
Expand Up @@ -184,11 +184,11 @@ def _wrap_chunks(self, chunks):
while chunks:
chunk_len = Sequence(chunks[-1], term).length()
if cur_len + chunk_len > width:
if chunk_len > width:
self._handle_long_word(chunks, cur_line, cur_len, width)
break
cur_line.append(chunks.pop())
cur_len += chunk_len
if chunks and Sequence(chunks[-1], term).length() > width:
self._handle_long_word(chunks, cur_line, cur_len, width)
if drop_whitespace and (
cur_line and Sequence(cur_line[-1], term).strip() == ''):
del cur_line[-1]
Expand All @@ -200,10 +200,18 @@ def _handle_long_word(self, reversed_chunks, cur_line, cur_len, width):
"""
Sequence-aware :meth:`textwrap.TextWrapper._handle_long_word`.
This simply ensures that word boundaries are not broken mid-sequence, as standard python
textwrap would incorrectly determine the length of a string containing sequences, and may
also break consider sequences part of a "word" that may be broken by hyphen (``-``), where
this implementation corrects both.
This method ensures that word boundaries are not broken mid-sequence, as
standard python textwrap would incorrectly determine the length of a
string containing sequences and wide characters it would also break
these "words" that would be broken by hyphen (``-``), this
implementation corrects both.
This is done by mutating the passed arguments, removing items from
'reversed_chunks' and appending them to 'cur_line'.
However, some characters (east-asian, emoji, etc.) cannot be split any
less than 2 cells, so in the case of a width of 1, we have no choice
but to allow those characters to flow outside of the given cell.
"""
# Figure out when indent is larger than the specified width, and make
# sure at least one character is stripped off on every pass
Expand All @@ -217,8 +225,14 @@ def _handle_long_word(self, reversed_chunks, cur_line, cur_len, width):
idx = nxt = 0
for text, _ in iter_parse(term, chunk):
nxt += len(text)
if Sequence(chunk[:nxt], term).length() > space_left:
break
seq_length = Sequence(chunk[:nxt], term).length()
if seq_length > space_left:
if cur_len == 0 and width == 1 and seq_length == 2:
# Emoji etc. cannot be split under 2 cells, so in the case of a width of 1, we have no choice
# but to allow those characters to flow outside of the given cell.
pass
else:
break
idx = nxt
cur_line.append(chunk[:idx])
reversed_chunks[-1] = chunk[idx:]
Expand Down
18 changes: 17 additions & 1 deletion tests/test_length_sequence.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,22 @@ def child():
child()


def test_length_with_zwj_is_wrong():
"""Because of the way Zero-Width Joiner (ZWJ) is measured, blessed gets this wrong"""
# But for the time being, so do many terminals (~85%), so its not a huge deal..
# https://ucs-detect.readthedocs.io/results.html
@as_subprocess
def child():
term = TestTerminal()
# RGI_Emoji_ZWJ_Sequence ; family: woman, woman, girl, boy
given = term.bold_red(u'\U0001F469\u200D\U0001F469\u200D\U0001F467\u200D\U0001F466')
expected = sum((2, 0, 2, 0, 2, 0, 2))

# exercise,
assert term.length(given) == expected



def test_length_ansiart():
"""Test length of ANSI art"""
@as_subprocess
Expand All @@ -60,7 +76,7 @@ def child(kind):


def test_sequence_length(all_terms):
"""Ensure T.length(string containing sequence) is correcterm."""
"""Ensure T.length(string containing sequence) is correct."""
# pylint: disable=too-complex,too-many-statements
@as_subprocess
def child(kind):
Expand Down
16 changes: 16 additions & 0 deletions tests/test_wrap.py
Original file line number Diff line number Diff line change
Expand Up @@ -113,3 +113,19 @@ def child():
assert expected == result

child()

def test_east_asian_emojis_width_1():
"""Tests edge-case of east-asian and emoji characters split into single columns."""
@as_subprocess
def child():
term = TestTerminal()
# by @grayjk from https://github.com/jquast/blessed/issues/273
result = term.wrap('\u5973', 1)
assert result == ['\u5973']

# much like test_length_with_zwj_is_wrong(), blessed gets ZWJ wrong when wrapping, also.
# RGI_Emoji_ZWJ_Sequence ; family: woman, woman, girl, boy
result = term.wrap(u'\U0001F469\u200D\U0001F469\u200D\U0001F467\u200D\U0001F466', 1)
assert result == ['\U0001F469\u200D', '\U0001F469\u200D', '\U0001F467\u200D', '\U0001F466']

child()
2 changes: 2 additions & 0 deletions tox.ini
Original file line number Diff line number Diff line change
Expand Up @@ -275,6 +275,8 @@ addopts =
--ignore=setup.py
--ignore=.tox
--junit-xml=.tox/results.{envname}.xml
# if any test takes over 30 seconds, dump traceback
faulthandler_timeout = 30
filterwarnings = error
junit_family = xunit1
log_format=%(levelname)s %(relativeCreated)2.2f %(filename)s:%(lineno)d %(message)s
Expand Down

0 comments on commit 593c286

Please sign in to comment.