From 593c286b52537f519dfe3b7ff4cfcde4b72e6bf7 Mon Sep 17 00:00:00 2001 From: Jeff Quast Date: Wed, 26 Jun 2024 11:59:16 -0400 Subject: [PATCH 01/13] Bugix infinite loop in term.wrap() with 1 length From https://github.com/jquast/blessed/issues/273, > The following code enters an infinite loop: import blessed blessed.Terminal().wrap('\u5973', 1) This fixes by explicit test: when the given individual sequence is of length '2', and the width is '1', and the cur_len is '0', we cannot break down this "Wide" character any further -- so it is allowed to flow outside the given cell. - 'faulthandler_timeout = 30' is added to [pytest] in tox.ini, - Tests for East-Asian, Emoji, and ZWJ are added - Further noting that blessed gets ZWJ wrong --- blessed/sequences.py | 30 ++++++++++++++++++++++-------- tests/test_length_sequence.py | 18 +++++++++++++++++- tests/test_wrap.py | 16 ++++++++++++++++ tox.ini | 2 ++ 4 files changed, 57 insertions(+), 9 deletions(-) diff --git a/blessed/sequences.py b/blessed/sequences.py index 486294cf..fca03bea 100644 --- a/blessed/sequences.py +++ b/blessed/sequences.py @@ -184,11 +184,11 @@ def _wrap_chunks(self, chunks): while chunks: chunk_len = Sequence(chunks[-1], term).length() if cur_len + chunk_len > width: + if chunk_len > width: + self._handle_long_word(chunks, cur_line, cur_len, width) break cur_line.append(chunks.pop()) cur_len += chunk_len - if chunks and Sequence(chunks[-1], term).length() > width: - self._handle_long_word(chunks, cur_line, cur_len, width) if drop_whitespace and ( cur_line and Sequence(cur_line[-1], term).strip() == ''): del cur_line[-1] @@ -200,10 +200,18 @@ def _handle_long_word(self, reversed_chunks, cur_line, cur_len, width): """ Sequence-aware :meth:`textwrap.TextWrapper._handle_long_word`. - This simply ensures that word boundaries are not broken mid-sequence, as standard python - textwrap would incorrectly determine the length of a string containing sequences, and may - also break consider sequences part of a "word" that may be broken by hyphen (``-``), where - this implementation corrects both. + This method ensures that word boundaries are not broken mid-sequence, as + standard python textwrap would incorrectly determine the length of a + string containing sequences and wide characters it would also break + these "words" that would be broken by hyphen (``-``), this + implementation corrects both. + + This is done by mutating the passed arguments, removing items from + 'reversed_chunks' and appending them to 'cur_line'. + + However, some characters (east-asian, emoji, etc.) cannot be split any + less than 2 cells, so in the case of a width of 1, we have no choice + but to allow those characters to flow outside of the given cell. """ # Figure out when indent is larger than the specified width, and make # sure at least one character is stripped off on every pass @@ -217,8 +225,14 @@ def _handle_long_word(self, reversed_chunks, cur_line, cur_len, width): idx = nxt = 0 for text, _ in iter_parse(term, chunk): nxt += len(text) - if Sequence(chunk[:nxt], term).length() > space_left: - break + seq_length = Sequence(chunk[:nxt], term).length() + if seq_length > space_left: + if cur_len == 0 and width == 1 and seq_length == 2: + # Emoji etc. cannot be split under 2 cells, so in the case of a width of 1, we have no choice + # but to allow those characters to flow outside of the given cell. + pass + else: + break idx = nxt cur_line.append(chunk[:idx]) reversed_chunks[-1] = chunk[idx:] diff --git a/tests/test_length_sequence.py b/tests/test_length_sequence.py index 03db8505..b65a1af8 100644 --- a/tests/test_length_sequence.py +++ b/tests/test_length_sequence.py @@ -37,6 +37,22 @@ def child(): child() +def test_length_with_zwj_is_wrong(): + """Because of the way Zero-Width Joiner (ZWJ) is measured, blessed gets this wrong""" + # But for the time being, so do many terminals (~85%), so its not a huge deal.. + # https://ucs-detect.readthedocs.io/results.html + @as_subprocess + def child(): + term = TestTerminal() + # RGI_Emoji_ZWJ_Sequence ; family: woman, woman, girl, boy + given = term.bold_red(u'\U0001F469\u200D\U0001F469\u200D\U0001F467\u200D\U0001F466') + expected = sum((2, 0, 2, 0, 2, 0, 2)) + + # exercise, + assert term.length(given) == expected + + + def test_length_ansiart(): """Test length of ANSI art""" @as_subprocess @@ -60,7 +76,7 @@ def child(kind): def test_sequence_length(all_terms): - """Ensure T.length(string containing sequence) is correcterm.""" + """Ensure T.length(string containing sequence) is correct.""" # pylint: disable=too-complex,too-many-statements @as_subprocess def child(kind): diff --git a/tests/test_wrap.py b/tests/test_wrap.py index e74aa2a9..f31986e9 100644 --- a/tests/test_wrap.py +++ b/tests/test_wrap.py @@ -113,3 +113,19 @@ def child(): assert expected == result child() + +def test_east_asian_emojis_width_1(): + """Tests edge-case of east-asian and emoji characters split into single columns.""" + @as_subprocess + def child(): + term = TestTerminal() + # by @grayjk from https://github.com/jquast/blessed/issues/273 + result = term.wrap('\u5973', 1) + assert result == ['\u5973'] + + # much like test_length_with_zwj_is_wrong(), blessed gets ZWJ wrong when wrapping, also. + # RGI_Emoji_ZWJ_Sequence ; family: woman, woman, girl, boy + result = term.wrap(u'\U0001F469\u200D\U0001F469\u200D\U0001F467\u200D\U0001F466', 1) + assert result == ['\U0001F469\u200D', '\U0001F469\u200D', '\U0001F467\u200D', '\U0001F466'] + + child() \ No newline at end of file diff --git a/tox.ini b/tox.ini index d3a44051..b18eb6a6 100644 --- a/tox.ini +++ b/tox.ini @@ -275,6 +275,8 @@ addopts = --ignore=setup.py --ignore=.tox --junit-xml=.tox/results.{envname}.xml +# if any test takes over 30 seconds, dump traceback +faulthandler_timeout = 30 filterwarnings = error junit_family = xunit1 log_format=%(levelname)s %(relativeCreated)2.2f %(filename)s:%(lineno)d %(message)s From c1e4f3b431bd2b47e335776a7518f2e5448fa103 Mon Sep 17 00:00:00 2001 From: Jeff Quast Date: Wed, 26 Jun 2024 12:05:05 -0400 Subject: [PATCH 02/13] prepare for 1.21 --- blessed/__init__.py | 2 +- docs/history.rst | 5 +++++ version.json | 2 +- 3 files changed, 7 insertions(+), 2 deletions(-) diff --git a/blessed/__init__.py b/blessed/__init__.py index e7b00b54..85683ad9 100644 --- a/blessed/__init__.py +++ b/blessed/__init__.py @@ -20,4 +20,4 @@ 'support due to http://bugs.python.org/issue10570.') __all__ = ('Terminal',) -__version__ = "1.20.0" +__version__ = "1.21.0" diff --git a/docs/history.rst b/docs/history.rst index 443dec9e..aaaf29f7 100644 --- a/docs/history.rst +++ b/docs/history.rst @@ -2,6 +2,11 @@ Version History =============== +1.21 + * bugfix infinite loop in :meth:`~Terminal.wrap` when "Wide" characters of + width 2 (East-Asian or Emoji) are used with a wrap width of 1, and a small + performance enhancement, :ghissue:`273` and :ghpull:`274` by :ghuser:`grayjk` + 1.20 * introduced :meth:`~Terminal.get_fgcolor` and :meth:`~Terminal.get_bgcolor` to query the terminal for the currently set colors. :ghissue:`237` by :ghuser:`stefanholek` diff --git a/version.json b/version.json index 3e93c0ce..2baa8e0c 100644 --- a/version.json +++ b/version.json @@ -1 +1 @@ -{"version": "1.20.0"} +{"version": "1.21.0"} From 0b8f2c5c11140637037d01e3f97579f61844ddd0 Mon Sep 17 00:00:00 2001 From: Jeff Quast Date: Wed, 26 Jun 2024 12:08:45 -0400 Subject: [PATCH 03/13] py2.7 compat --- tests/test_wrap.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/test_wrap.py b/tests/test_wrap.py index f31986e9..16d1da1a 100644 --- a/tests/test_wrap.py +++ b/tests/test_wrap.py @@ -120,12 +120,12 @@ def test_east_asian_emojis_width_1(): def child(): term = TestTerminal() # by @grayjk from https://github.com/jquast/blessed/issues/273 - result = term.wrap('\u5973', 1) - assert result == ['\u5973'] + result = term.wrap(u'\u5973', 1) + assert result == [u'\u5973'] # much like test_length_with_zwj_is_wrong(), blessed gets ZWJ wrong when wrapping, also. # RGI_Emoji_ZWJ_Sequence ; family: woman, woman, girl, boy result = term.wrap(u'\U0001F469\u200D\U0001F469\u200D\U0001F467\u200D\U0001F466', 1) - assert result == ['\U0001F469\u200D', '\U0001F469\u200D', '\U0001F467\u200D', '\U0001F466'] + assert result == [u'\U0001F469\u200D', u'\U0001F469\u200D', u'\U0001F467\u200D', u'\U0001F466'] child() \ No newline at end of file From 9ff977038e371153f02e1418d54766b55a79d9e8 Mon Sep 17 00:00:00 2001 From: Jeff Quast Date: Wed, 26 Jun 2024 16:12:10 -0400 Subject: [PATCH 04/13] farx --- blessed/sequences.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/blessed/sequences.py b/blessed/sequences.py index fca03bea..1a9de31b 100644 --- a/blessed/sequences.py +++ b/blessed/sequences.py @@ -227,9 +227,10 @@ def _handle_long_word(self, reversed_chunks, cur_line, cur_len, width): nxt += len(text) seq_length = Sequence(chunk[:nxt], term).length() if seq_length > space_left: - if cur_len == 0 and width == 1 and seq_length == 2: - # Emoji etc. cannot be split under 2 cells, so in the case of a width of 1, we have no choice - # but to allow those characters to flow outside of the given cell. + if cur_len == 0 and width == 1 and nxt == 1: + # Emoji etc. cannot be split under 2 cells, so in the + # case of a width of 1, we have no choice but to allow + # those characters to flow outside of the given cell. pass else: break From d07c3d3493c35fba9411680dc8fb3280b46a2720 Mon Sep 17 00:00:00 2001 From: Jeff Quast Date: Wed, 26 Jun 2024 16:17:02 -0400 Subject: [PATCH 05/13] try this, CI --- blessed/sequences.py | 2 +- tests/test_wrap.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/blessed/sequences.py b/blessed/sequences.py index 15edbf7d..999b27e4 100644 --- a/blessed/sequences.py +++ b/blessed/sequences.py @@ -229,7 +229,7 @@ def _handle_long_word(self, reversed_chunks, cur_line, cur_len, width): nxt += len(text) seq_length = Sequence(chunk[:nxt], term).length() if seq_length > space_left: - if cur_len == 0 and width == 1 and nxt == 1: + if cur_len == 0 and width == 1 and nxt == 1 and seq_length == 2: # Emoji etc. cannot be split under 2 cells, so in the # case of a width of 1, we have no choice but to allow # those characters to flow outside of the given cell. diff --git a/tests/test_wrap.py b/tests/test_wrap.py index 16d1da1a..42001314 100644 --- a/tests/test_wrap.py +++ b/tests/test_wrap.py @@ -128,4 +128,4 @@ def child(): result = term.wrap(u'\U0001F469\u200D\U0001F469\u200D\U0001F467\u200D\U0001F466', 1) assert result == [u'\U0001F469\u200D', u'\U0001F469\u200D', u'\U0001F467\u200D', u'\U0001F466'] - child() \ No newline at end of file + child() From 9de76a247169295a0d02cb876649fc0a925f29e0 Mon Sep 17 00:00:00 2001 From: Jeff Quast Date: Wed, 26 Jun 2024 16:25:51 -0400 Subject: [PATCH 06/13] testsz --- tests/test_wrap.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/tests/test_wrap.py b/tests/test_wrap.py index 42001314..a755bcf0 100644 --- a/tests/test_wrap.py +++ b/tests/test_wrap.py @@ -116,7 +116,7 @@ def child(): def test_east_asian_emojis_width_1(): """Tests edge-case of east-asian and emoji characters split into single columns.""" - @as_subprocess + #@as_subprocess def child(): term = TestTerminal() # by @grayjk from https://github.com/jquast/blessed/issues/273 @@ -124,8 +124,11 @@ def child(): assert result == [u'\u5973'] # much like test_length_with_zwj_is_wrong(), blessed gets ZWJ wrong when wrapping, also. + # In this case, each character gets its own line--even though '\u200D' is considered + # a width of 0, the next emoji is "too large to fit". # RGI_Emoji_ZWJ_Sequence ; family: woman, woman, girl, boy result = term.wrap(u'\U0001F469\u200D\U0001F469\u200D\U0001F467\u200D\U0001F466', 1) - assert result == [u'\U0001F469\u200D', u'\U0001F469\u200D', u'\U0001F467\u200D', u'\U0001F466'] + assert result == [u'\U0001F469', u'\u200D', u'\U0001F469', u'\u200D', + u'\U0001F467', u'\u200D', u'\U0001F466'] child() From 5b0df8846c9f1e5710564b6462e9e8de2e81f436 Mon Sep 17 00:00:00 2001 From: Jeff Quast Date: Wed, 26 Jun 2024 16:26:55 -0400 Subject: [PATCH 07/13] narf --- tests/test_wrap.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tests/test_wrap.py b/tests/test_wrap.py index a755bcf0..9049ba4e 100644 --- a/tests/test_wrap.py +++ b/tests/test_wrap.py @@ -116,7 +116,7 @@ def child(): def test_east_asian_emojis_width_1(): """Tests edge-case of east-asian and emoji characters split into single columns.""" - #@as_subprocess + @as_subprocess def child(): term = TestTerminal() # by @grayjk from https://github.com/jquast/blessed/issues/273 @@ -127,8 +127,8 @@ def child(): # In this case, each character gets its own line--even though '\u200D' is considered # a width of 0, the next emoji is "too large to fit". # RGI_Emoji_ZWJ_Sequence ; family: woman, woman, girl, boy - result = term.wrap(u'\U0001F469\u200D\U0001F469\u200D\U0001F467\u200D\U0001F466', 1) - assert result == [u'\U0001F469', u'\u200D', u'\U0001F469', u'\u200D', - u'\U0001F467', u'\u200D', u'\U0001F466'] + given = u'\U0001F469\u200D\U0001F469\u200D\U0001F467\u200D\U0001F466' + result = term.wrap(given, 1) + assert result == list(given) child() From c7959d6c46fb0c5d194b06e8eb0fd2ad1c096aaa Mon Sep 17 00:00:00 2001 From: Jeff Quast Date: Wed, 26 Jun 2024 16:46:40 -0400 Subject: [PATCH 08/13] more tests, lints --- tests/test_length_sequence.py | 1 - tests/test_wrap.py | 33 +++++++++++++++++++++++++++++++++ 2 files changed, 33 insertions(+), 1 deletion(-) diff --git a/tests/test_length_sequence.py b/tests/test_length_sequence.py index 14be023b..94e9c396 100644 --- a/tests/test_length_sequence.py +++ b/tests/test_length_sequence.py @@ -52,7 +52,6 @@ def child(): assert term.length(given) == expected - def test_length_ansiart(): """Test length of ANSI art""" @as_subprocess diff --git a/tests/test_wrap.py b/tests/test_wrap.py index 9049ba4e..619f0146 100644 --- a/tests/test_wrap.py +++ b/tests/test_wrap.py @@ -114,6 +114,7 @@ def child(): child() + def test_east_asian_emojis_width_1(): """Tests edge-case of east-asian and emoji characters split into single columns.""" @as_subprocess @@ -131,4 +132,36 @@ def child(): result = term.wrap(given, 1) assert result == list(given) + # in another example, two *narrow* characters, \u1100, "ᄀ" HANGUL + # CHOSEONG KIYEOK (consonant) is joined with \u1161, "ᅡ" HANGUL + # JUNGSEONG A (vowel), to form a single *wide* character "가" HANGUL + # SYLLABLE GA. Ideally, a native speaker would rather have the cojoined + # wide character, and word-wrapping to a column width of '1' for any + # language that includes wide characters or emoji is a bit foolish! + given = u'\u1100\u1161' + result = term.wrap(given, 1) + assert result == list(given) + + child() + + +def test_emojis_width_2_and_greater(): + """Tests emoji characters split into multiple columns.""" + @as_subprocess + def child(): + term = TestTerminal() + given = u'\U0001F469\U0001F467\U0001F466' # woman, girl, boy + result = term.wrap(given, 2) + assert result == list(given) + result = term.wrap(given, 3) + assert result == list(given) + result = term.wrap(given, 4) + assert result == [u'\U0001F469\U0001F467', '\U0001F466'] + result = term.wrap(given, 5) + assert result == [u'\U0001F469\U0001F467', '\U0001F466'] + result = term.wrap(given, 6) + assert result == [u'\U0001F469\U0001F467\U0001F466'] + result = term.wrap(given, 7) + assert result == [u'\U0001F469\U0001F467\U0001F466'] + child() From fe5c249e91e2028807548d1fd01b3f32145b7e84 Mon Sep 17 00:00:00 2001 From: Jeff Quast Date: Wed, 26 Jun 2024 16:52:44 -0400 Subject: [PATCH 09/13] py27 fix --- tests/test_wrap.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/tests/test_wrap.py b/tests/test_wrap.py index 619f0146..b53cbacd 100644 --- a/tests/test_wrap.py +++ b/tests/test_wrap.py @@ -1,4 +1,5 @@ """Tests for Terminal.wrap()""" +# coding: utf-8 # std imports import textwrap @@ -156,9 +157,9 @@ def child(): result = term.wrap(given, 3) assert result == list(given) result = term.wrap(given, 4) - assert result == [u'\U0001F469\U0001F467', '\U0001F466'] + assert result == [u'\U0001F469\U0001F467', u'\U0001F466'] result = term.wrap(given, 5) - assert result == [u'\U0001F469\U0001F467', '\U0001F466'] + assert result == [u'\U0001F469\U0001F467', u'\U0001F466'] result = term.wrap(given, 6) assert result == [u'\U0001F469\U0001F467\U0001F466'] result = term.wrap(given, 7) From e555041455e421d23f1afb31825286172c6eca73 Mon Sep 17 00:00:00 2001 From: Jeff Quast Date: Wed, 26 Jun 2024 16:58:24 -0400 Subject: [PATCH 10/13] document actual PR used for merge --- docs/history.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/docs/history.rst b/docs/history.rst index aaaf29f7..3dad9baf 100644 --- a/docs/history.rst +++ b/docs/history.rst @@ -6,6 +6,7 @@ Version History * bugfix infinite loop in :meth:`~Terminal.wrap` when "Wide" characters of width 2 (East-Asian or Emoji) are used with a wrap width of 1, and a small performance enhancement, :ghissue:`273` and :ghpull:`274` by :ghuser:`grayjk` + merged as :ghpull:`275`. 1.20 * introduced :meth:`~Terminal.get_fgcolor` and :meth:`~Terminal.get_bgcolor` to query From 6beb3c67fe221150f47cbe9b6c9bae332af81520 Mon Sep 17 00:00:00 2001 From: Jeff Quast Date: Wed, 26 Jun 2024 17:58:15 -0400 Subject: [PATCH 11/13] Add test around combining character --- tests/test_wrap.py | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/tests/test_wrap.py b/tests/test_wrap.py index b53cbacd..f4227e93 100644 --- a/tests/test_wrap.py +++ b/tests/test_wrap.py @@ -166,3 +166,20 @@ def child(): assert result == [u'\U0001F469\U0001F467\U0001F466'] child() + +def test_greedy_join_with_cojoining(): + """Test that a word with trailing combining (café) wraps correctly.""" + @as_subprocess + def child(): + term = TestTerminal() + given = u'cafe\u0301-latte' + result = term.wrap(given, 5) + assert result == [u'cafe\u0301-', u'latte'] + result = term.wrap(given, 4) + assert result == [u'cafe\u0301', u'-lat', u'te'] + result = term.wrap(given, 3) + assert result == [u'caf', u'e\u0301-l', u'att', u'e'] + result = term.wrap(given, 2) + assert result == [u'ca', u'fe\u0301', u'-l', u'at', u'te'] + + child() \ No newline at end of file From b0d74fdc0ea5385c85404c7e38826d0f36b25839 Mon Sep 17 00:00:00 2001 From: Jeff Quast Date: Wed, 26 Jun 2024 18:00:37 -0400 Subject: [PATCH 12/13] final newline --- tests/test_wrap.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_wrap.py b/tests/test_wrap.py index f4227e93..af08d5c9 100644 --- a/tests/test_wrap.py +++ b/tests/test_wrap.py @@ -182,4 +182,4 @@ def child(): result = term.wrap(given, 2) assert result == [u'ca', u'fe\u0301', u'-l', u'at', u'te'] - child() \ No newline at end of file + child() From 25263843e45f6f4df5291468454277265812ab32 Mon Sep 17 00:00:00 2001 From: Jeff Quast Date: Wed, 26 Jun 2024 18:03:36 -0400 Subject: [PATCH 13/13] more pylint --- tests/test_wrap.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/test_wrap.py b/tests/test_wrap.py index af08d5c9..4a23101c 100644 --- a/tests/test_wrap.py +++ b/tests/test_wrap.py @@ -167,6 +167,7 @@ def child(): child() + def test_greedy_join_with_cojoining(): """Test that a word with trailing combining (café) wraps correctly.""" @as_subprocess