From 593c286b52537f519dfe3b7ff4cfcde4b72e6bf7 Mon Sep 17 00:00:00 2001
From: Jeff Quast <contact@jeffquast.com>
Date: Wed, 26 Jun 2024 11:59:16 -0400
Subject: [PATCH] Bugix infinite loop in term.wrap() with 1 length

From https://github.com/jquast/blessed/issues/273,

> The following code enters an infinite loop:

    import blessed
    blessed.Terminal().wrap('\u5973', 1)

This fixes by explicit test: when the given individual sequence is
of length '2', and the width is '1', and the cur_len is '0', we cannot
break down this "Wide" character any further -- so it is allowed to
flow outside the given cell.

- 'faulthandler_timeout = 30' is added to [pytest] in tox.ini,
- Tests for East-Asian, Emoji, and ZWJ are added
- Further noting that blessed gets ZWJ wrong
---
 blessed/sequences.py          | 30 ++++++++++++++++++++++--------
 tests/test_length_sequence.py | 18 +++++++++++++++++-
 tests/test_wrap.py            | 16 ++++++++++++++++
 tox.ini                       |  2 ++
 4 files changed, 57 insertions(+), 9 deletions(-)

diff --git a/blessed/sequences.py b/blessed/sequences.py
index 486294cf..fca03bea 100644
--- a/blessed/sequences.py
+++ b/blessed/sequences.py
@@ -184,11 +184,11 @@ def _wrap_chunks(self, chunks):
             while chunks:
                 chunk_len = Sequence(chunks[-1], term).length()
                 if cur_len + chunk_len > width:
+                    if chunk_len > width:
+                        self._handle_long_word(chunks, cur_line, cur_len, width)
                     break
                 cur_line.append(chunks.pop())
                 cur_len += chunk_len
-            if chunks and Sequence(chunks[-1], term).length() > width:
-                self._handle_long_word(chunks, cur_line, cur_len, width)
             if drop_whitespace and (
                     cur_line and Sequence(cur_line[-1], term).strip() == ''):
                 del cur_line[-1]
@@ -200,10 +200,18 @@ def _handle_long_word(self, reversed_chunks, cur_line, cur_len, width):
         """
         Sequence-aware :meth:`textwrap.TextWrapper._handle_long_word`.
 
-        This simply ensures that word boundaries are not broken mid-sequence, as standard python
-        textwrap would incorrectly determine the length of a string containing sequences, and may
-        also break consider sequences part of a "word" that may be broken by hyphen (``-``), where
-        this implementation corrects both.
+        This method ensures that word boundaries are not broken mid-sequence, as
+        standard python textwrap would incorrectly determine the length of a
+        string containing sequences and wide characters it would also break
+        these "words" that would be broken by hyphen (``-``), this
+        implementation corrects both.
+
+        This is done by mutating the passed arguments, removing items from
+        'reversed_chunks' and appending them to 'cur_line'.
+
+        However, some characters (east-asian, emoji, etc.) cannot be split any
+        less than 2 cells, so in the case of a width of 1, we have no choice
+        but to allow those characters to flow outside of the given cell.
         """
         # Figure out when indent is larger than the specified width, and make
         # sure at least one character is stripped off on every pass
@@ -217,8 +225,14 @@ def _handle_long_word(self, reversed_chunks, cur_line, cur_len, width):
             idx = nxt = 0
             for text, _ in iter_parse(term, chunk):
                 nxt += len(text)
-                if Sequence(chunk[:nxt], term).length() > space_left:
-                    break
+                seq_length = Sequence(chunk[:nxt], term).length()
+                if seq_length > space_left:
+                    if cur_len == 0 and width == 1 and seq_length == 2:
+                        # Emoji etc. cannot be split under 2 cells, so in the case of a width of 1, we have no choice
+                        # but to allow those characters to flow outside of the given cell.
+                        pass
+                    else:
+                        break
                 idx = nxt
             cur_line.append(chunk[:idx])
             reversed_chunks[-1] = chunk[idx:]
diff --git a/tests/test_length_sequence.py b/tests/test_length_sequence.py
index 03db8505..b65a1af8 100644
--- a/tests/test_length_sequence.py
+++ b/tests/test_length_sequence.py
@@ -37,6 +37,22 @@ def child():
     child()
 
 
+def test_length_with_zwj_is_wrong():
+    """Because of the way Zero-Width Joiner (ZWJ) is measured, blessed gets this wrong"""
+    # But for the time being, so do many terminals (~85%), so its not a huge deal..
+    # https://ucs-detect.readthedocs.io/results.html
+    @as_subprocess
+    def child():
+        term = TestTerminal()
+        # RGI_Emoji_ZWJ_Sequence  ; family: woman, woman, girl, boy
+        given = term.bold_red(u'\U0001F469\u200D\U0001F469\u200D\U0001F467\u200D\U0001F466')
+        expected = sum((2, 0, 2, 0, 2, 0, 2))
+
+        # exercise,
+        assert term.length(given) == expected
+
+
+
 def test_length_ansiart():
     """Test length of ANSI art"""
     @as_subprocess
@@ -60,7 +76,7 @@ def child(kind):
 
 
 def test_sequence_length(all_terms):
-    """Ensure T.length(string containing sequence) is correcterm."""
+    """Ensure T.length(string containing sequence) is correct."""
     # pylint: disable=too-complex,too-many-statements
     @as_subprocess
     def child(kind):
diff --git a/tests/test_wrap.py b/tests/test_wrap.py
index e74aa2a9..f31986e9 100644
--- a/tests/test_wrap.py
+++ b/tests/test_wrap.py
@@ -113,3 +113,19 @@ def child():
         assert expected == result
 
     child()
+
+def test_east_asian_emojis_width_1():
+    """Tests edge-case of east-asian and emoji characters split into single columns."""
+    @as_subprocess
+    def child():
+        term = TestTerminal()
+        # by @grayjk from https://github.com/jquast/blessed/issues/273
+        result = term.wrap('\u5973', 1)
+        assert result == ['\u5973']
+
+        # much like test_length_with_zwj_is_wrong(), blessed gets ZWJ wrong when wrapping, also.
+        # RGI_Emoji_ZWJ_Sequence  ; family: woman, woman, girl, boy
+        result = term.wrap(u'\U0001F469\u200D\U0001F469\u200D\U0001F467\u200D\U0001F466', 1)
+        assert result == ['\U0001F469\u200D', '\U0001F469\u200D', '\U0001F467\u200D', '\U0001F466']
+
+    child()
\ No newline at end of file
diff --git a/tox.ini b/tox.ini
index d3a44051..b18eb6a6 100644
--- a/tox.ini
+++ b/tox.ini
@@ -275,6 +275,8 @@ addopts =
     --ignore=setup.py
     --ignore=.tox
     --junit-xml=.tox/results.{envname}.xml
+# if any test takes over 30 seconds, dump traceback
+faulthandler_timeout = 30
 filterwarnings = error
 junit_family = xunit1
 log_format=%(levelname)s %(relativeCreated)2.2f %(filename)s:%(lineno)d %(message)s