diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 6b58483..75edbc0 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -33,7 +33,6 @@ jobs: - "3.10" - "3.11" - "3.12" - - "pypy-2.7" - "pypy-3.7" - "pypy-3.8" - "pypy-3.9" @@ -43,11 +42,6 @@ jobs: python-version: "pypy-3.6" - os: ubuntu-20.04 python-version: "3.6" - - os: ubuntu-20.04 - python-version: "3.5" - - os: ubuntu-20.04 - container: python:2.7-buster - python-version: "2.7" runs-on: ${{ matrix.os }} container: ${{ matrix.container }} @@ -77,22 +71,13 @@ jobs: echo IS_PYPY=$IS_PYPY >>$GITHUB_ENV echo TOX_PYTHON=$V >>$GITHUB_ENV - if [[ ${{ matrix.python-version }} = *2.7 ]]; then - python -m pip install tox - else - python -Im pip install tox - fi + python -Im pip install tox - name: Prepare sdist and source-dir shell: bash run: | - if [[ ${{ matrix.python-version }} = *2.7 ]]; then - python -m pip install build - python -m build - else - python -Im pip install build - python -Im build - fi + python -Im pip install build + python -Im build mkdir source-dir tar -xzvf dist/wcwidth-*.tar.gz -C source-dir --strip-components=1 @@ -101,11 +86,7 @@ jobs: shell: bash working-directory: ./source-dir run: | - if [[ ${{ matrix.python-version }} = *2.7 ]]; then - python -m tox -e ${{ env.TOX_PYTHON }} - else - python -Im tox -e ${{ env.TOX_PYTHON }} - fi + python -Im tox -e ${{ env.TOX_PYTHON }} - name: Rename coverage data shell: bash diff --git a/.travis.yml b/.travis.yml index f0f6cd9..c82786e 100644 --- a/.travis.yml +++ b/.travis.yml @@ -4,12 +4,6 @@ matrix: include: - python: 3.8 env: TOXENV=update,compile,autopep8,docformatter,isort,pylint,flake8,flake8_tests,pydocstyle,docs COVERAGE_ID=travis-ci - - python: 2.7 - env: TOXENV=py27,codecov COVERAGE_ID=travis-ci - - python: 3.4 - env: TOXENV=py34,codecov COVERAGE_ID=travis-ci - - python: 3.5 - env: TOXENV=py35,codecov COVERAGE_ID=travis-ci - python: 3.6 env: TOXENV=py36,codecov COVERAGE_ID=travis-ci - python: 3.7 diff --git a/bin/wcwidth-browser.py b/bin/wcwidth-browser.py index 0718cae..545f911 100755 --- a/bin/wcwidth-browser.py +++ b/bin/wcwidth-browser.py @@ -22,7 +22,6 @@ # Invalid constant name "echo" # Invalid constant name "flushout" (col 4) # Invalid module name "wcwidth-browser" -from __future__ import division, print_function # std imports import sys @@ -44,7 +43,7 @@ #: printable length of highest unicode character description LIMIT_UCS = 0x3fffd -UCS_PRINTLEN = len('{value:0x}'.format(value=LIMIT_UCS)) +UCS_PRINTLEN = len(f'{LIMIT_UCS:0x}') def readline(term, width): @@ -69,7 +68,7 @@ def readline(term, width): return text -class WcWideCharacterGenerator(object): +class WcWideCharacterGenerator: """Generator yields unicode characters of the given ``width``.""" # pylint: disable=R0903 @@ -101,7 +100,7 @@ def __next__(self): return (ucs, name) -class WcCombinedCharacterGenerator(object): +class WcCombinedCharacterGenerator: """Generator yields unicode characters with combining.""" # pylint: disable=R0903 @@ -148,11 +147,8 @@ def __next__(self): continue return (ucs, name) - # python 2.6 - 3.3 compatibility - next = __next__ - -class Style(object): +class Style: """Styling decorator class instance for terminal output.""" # pylint: disable=R0903 @@ -184,7 +180,7 @@ def __init__(self, **kwargs): setattr(self, key, val) -class Screen(object): +class Screen: """Represents terminal style, data dimensions, and drawables.""" intro_msg_fmt = ('Delimiters ({delim}) should align, ' @@ -217,8 +213,7 @@ def head_item(self): """Text of a single column heading.""" delimiter = self.style.attr_minor(self.style.delimiter) hint = self.style.header_hint * self.wide - heading = ('{delimiter}{hint}{delimiter}' - .format(delimiter=delimiter, hint=hint)) + heading = f'{delimiter}{hint}{delimiter}' def alignment(*args): if self.style.alignment == 'right': @@ -264,7 +259,7 @@ def page_size(self): return self.num_rows * self.num_columns -class Pager(object): +class Pager: """A less(1)-like browser for browsing unicode characters.""" # pylint: disable=too-many-instance-attributes @@ -570,10 +565,10 @@ def draw_status(self, writer, idx): if idx == self.last_page: last_end = '(END)' else: - last_end = '/{0}'.format(self.last_page) + last_end = f'/{self.last_page}' txt = ('Page {idx}{last_end} - ' '{q} to quit, [keys: {keyset}]' - .format(idx=style.attr_minor('{0}'.format(idx)), + .format(idx=style.attr_minor(f'{idx}'), last_end=style.attr_major(last_end), keyset=style.attr_major('kjfbvc12-='), q=style.attr_minor('q'))) diff --git a/bin/wcwidth-libc-comparator.py b/bin/wcwidth-libc-comparator.py index 984048a..f07a4fe 100755 --- a/bin/wcwidth-libc-comparator.py +++ b/bin/wcwidth-libc-comparator.py @@ -1,5 +1,4 @@ #!/usr/bin/env python -# coding: utf-8 """ Manual tests comparing wcwidth.py to libc's wcwidth(3) and wcswidth(3). @@ -18,7 +17,6 @@ # Invalid module name "wcwidth-libc-comparator" # standard imports -from __future__ import print_function # std imports import sys @@ -64,25 +62,12 @@ def report_ucs_msg(ucs, wcwidth_libc, wcwidth_local): .decode('ascii') .upper() .lstrip('0')) - url = "http://codepoints.net/U+{}".format(ucp) + url = f"http://codepoints.net/U+{ucp}" name = unicodedata.name(ucs) - return (u"libc,ours={},{} [--o{}o--] name={} val={} {}" + return ("libc,ours={},{} [--o{}o--] name={} val={} {}" " ".format(wcwidth_libc, wcwidth_local, ucs, name, ord(ucs), url)) -# use chr() for py3.x, -# unichr() for py2.x -try: - _ = unichr(0) -except NameError as err: - if err.args[0] == "name 'unichr' is not defined": - # pylint: disable=W0622 - # Redefining built-in 'unichr' (col 8) - - unichr = chr - else: - raise - if sys.maxunicode < 1114111: warnings.warn('narrow Python build, only a small subset of ' 'characters may be tested.') @@ -108,7 +93,7 @@ def main(using_locale=('en_US', 'UTF-8',)): report a detailed AssertionError to stdout. """ all_ucs = (ucs for ucs in - [unichr(val) for val in range(sys.maxunicode)] + [chr(val) for val in range(sys.maxunicode)] if is_named(ucs) and is_not_combining(ucs)) libc_name = ctypes.util.find_library('c') diff --git a/docs/conf.py b/docs/conf.py index bf77a7e..c3f5811 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -1,5 +1,4 @@ #!/usr/bin/env python3 -# -*- coding: utf-8 -*- # # wcwidth documentation build configuration file, created by # sphinx-quickstart on Fri Oct 20 15:18:02 2017. diff --git a/docs/intro.rst b/docs/intro.rst index 65df208..e560b1b 100644 --- a/docs/intro.rst +++ b/docs/intro.rst @@ -107,7 +107,7 @@ Install wcwidth in editable mode:: Execute unit tests using tox_:: - tox -e py27,py35,py36,py37,py38,py39,py310,py311,py312 + tox -e py36,py37,py38,py39,py310,py311,py312 Updating Unicode Version ------------------------ diff --git a/setup.cfg b/setup.cfg deleted file mode 100644 index 2a9acf1..0000000 --- a/setup.cfg +++ /dev/null @@ -1,2 +0,0 @@ -[bdist_wheel] -universal = 1 diff --git a/setup.py b/setup.py index 87e5704..04753f8 100755 --- a/setup.py +++ b/setup.py @@ -51,8 +51,6 @@ def main(): _get_here('README.rst'), 'rb', 'utf8').read(), author='Jeff Quast', author_email='contact@jeffquast.com', - install_requires=('backports.functools-lru-cache>=1.2.1;' - 'python_version < "3.2"'), license='MIT', packages=['wcwidth'], url='https://github.com/jquast/wcwidth', @@ -60,6 +58,7 @@ def main(): '': ['LICENSE', '*.rst'], }, zip_safe=True, + python_requires='>=3.6', classifiers=[ 'Intended Audience :: Developers', 'Natural Language :: English', @@ -67,8 +66,6 @@ def main(): 'Environment :: Console', 'License :: OSI Approved :: MIT License', 'Operating System :: POSIX', - 'Programming Language :: Python :: 2.7', - 'Programming Language :: Python :: 3.5', 'Programming Language :: Python :: 3.6', 'Programming Language :: Python :: 3.7', 'Programming Language :: Python :: 3.8', diff --git a/tests/test_core.py b/tests/test_core.py index 60ed6b1..ccb2bf0 100755 --- a/tests/test_core.py +++ b/tests/test_core.py @@ -1,4 +1,3 @@ -# coding: utf-8 """Core tests for wcwidth module. isort:skip_file""" try: # std import @@ -10,13 +9,6 @@ # local import wcwidth -try: - # python 2 - _ = unichr -except NameError: - # python 3 - unichr = chr - def test_package_version(): """wcwidth.__version__ is expected value.""" @@ -70,7 +62,7 @@ def basic_string_type(): def test_hello_jp(): - u""" + """ Width of Japanese phrase: コンニチハ, セカイ! Given a phrase of 5 and 3 Katakana ideographs, joined with @@ -78,7 +70,7 @@ def test_hello_jp(): phrase consumes 19 cells of a terminal emulator. """ # given, - phrase = u'コンニチハ, セカイ!' + phrase = 'コンニチハ, セカイ!' expect_length_each = (2, 2, 2, 2, 2, 1, 1, 2, 2, 2, 1) expect_length_phrase = sum(expect_length_each) @@ -99,7 +91,7 @@ def test_wcswidth_substr(): to stop counting length. """ # given, - phrase = u'コンニチハ, セカイ!' + phrase = 'コンニチハ, セカイ!' end = 7 expect_length_each = (2, 2, 2, 2, 2, 1, 1,) expect_length_phrase = sum(expect_length_each) @@ -116,7 +108,7 @@ def test_wcswidth_substr(): def test_null_width_0(): """NULL (0) reports width 0.""" # given, - phrase = u'abc\x00def' + phrase = 'abc\x00def' expect_length_each = (1, 1, 1, 0, 1, 1, 1) expect_length_phrase = sum(expect_length_each) @@ -140,7 +132,7 @@ def test_control_c0_width_negative_1(): any string containing the C1 control character \x1b (ESC). """ # given, - phrase = u'\x1b[0m' + phrase = '\x1b[0m' expect_length_each = (-1, 1, 1, 1) expect_length_phrase = -1 @@ -156,7 +148,7 @@ def test_control_c0_width_negative_1(): def test_combining_width(): """Simple test combining reports total width of 4.""" # given, - phrase = u'--\u05bf--' + phrase = '--\u05bf--' expect_length_each = (1, 1, 0, 1, 1) expect_length_phrase = 4 @@ -170,8 +162,8 @@ def test_combining_width(): def test_combining_cafe(): - u"""Phrase cafe + COMBINING ACUTE ACCENT is café of length 4.""" - phrase = u"cafe\u0301" + """Phrase cafe + COMBINING ACUTE ACCENT is café of length 4.""" + phrase = "cafe\u0301" expect_length_each = (1, 1, 1, 1, 0) expect_length_phrase = 4 @@ -185,8 +177,8 @@ def test_combining_cafe(): def test_combining_enclosing(): - u"""CYRILLIC CAPITAL LETTER A + COMBINING CYRILLIC HUNDRED THOUSANDS SIGN is of length 1.""" - phrase = u"\u0410\u0488" + """CYRILLIC CAPITAL LETTER A + COMBINING CYRILLIC HUNDRED THOUSANDS SIGN is of length 1.""" + phrase = "\u0410\u0488" expect_length_each = (1, 0) expect_length_phrase = 1 @@ -200,16 +192,16 @@ def test_combining_enclosing(): def test_balinese_script(): - u""" + """ Balinese kapal (ship) is length 3. This may be an example that is not yet correctly rendered by any terminal so far, like devanagari. """ - phrase = (u"\u1B13" # Category 'Lo', EAW 'N' -- BALINESE LETTER KA - u"\u1B28" # Category 'Lo', EAW 'N' -- BALINESE LETTER PA KAPAL - u"\u1B2E" # Category 'Lo', EAW 'N' -- BALINESE LETTER LA - u"\u1B44") # Category 'Mc', EAW 'N' -- BALINESE ADEG ADEG + phrase = ("\u1B13" # Category 'Lo', EAW 'N' -- BALINESE LETTER KA + "\u1B28" # Category 'Lo', EAW 'N' -- BALINESE LETTER PA KAPAL + "\u1B2E" # Category 'Lo', EAW 'N' -- BALINESE LETTER LA + "\u1B44") # Category 'Mc', EAW 'N' -- BALINESE ADEG ADEG expect_length_each = (1, 1, 1, 0) expect_length_phrase = 3 @@ -237,8 +229,8 @@ def test_kr_jamo(): # and not by independent display, like other zero-width characters that may # only combine with an appropriate preceding character. phrase = ( - u"\u1100" # ᄀ HANGUL CHOSEONG KIYEOK (consonant) - u"\u1161" # ᅡ HANGUL JUNGSEONG A (vowel) + "\u1100" # ᄀ HANGUL CHOSEONG KIYEOK (consonant) + "\u1161" # ᅡ HANGUL JUNGSEONG A (vowel) ) expect_length_each = (2, 0) expect_length_phrase = 2 @@ -253,14 +245,14 @@ def test_kr_jamo(): def test_kr_jamo_filler(): - u""" + """ Jamo filler is 0 width. Example from https://www.unicode.org/L2/L2006/06310-hangul-decompose9.pdf """ phrase = ( - u"\u1100" # HANGUL CHOSEONG KIYEOK (consonant) - u"\u1160" # HANGUL JUNGSEONG FILLER (vowel) + "\u1100" # HANGUL CHOSEONG KIYEOK (consonant) + "\u1160" # HANGUL JUNGSEONG FILLER (vowel) ) expect_length_each = (2, 0) expect_length_phrase = 2 @@ -305,10 +297,10 @@ def test_devanagari_script(): # as a sum of each individual width, as this library currently performs with exception of # ZWJ, but I think it incorrectly gestures what a stateless call to wcwidth.wcwidth of # each codepoint *should* return. - phrase = (u"\u0915" # Akhand, Category 'Lo', East Asian Width property 'N' -- DEVANAGARI LETTER KA - u"\u094D" # Joiner, Category 'Mn', East Asian Width property 'N' -- DEVANAGARI SIGN VIRAMA - u"\u0937" # Fused, Category 'Lo', East Asian Width property 'N' -- DEVANAGARI LETTER SSA - u"\u093F") # MatraL, Category 'Mc', East Asian Width property 'N' -- DEVANAGARI VOWEL SIGN I + phrase = ("\u0915" # Akhand, Category 'Lo', East Asian Width property 'N' -- DEVANAGARI LETTER KA + "\u094D" # Joiner, Category 'Mn', East Asian Width property 'N' -- DEVANAGARI SIGN VIRAMA + "\u0937" # Fused, Category 'Lo', East Asian Width property 'N' -- DEVANAGARI LETTER SSA + "\u093F") # MatraL, Category 'Mc', East Asian Width property 'N' -- DEVANAGARI VOWEL SIGN I # 23107-terminal-suppt.pdf suggests wcwidth.wcwidth should return (2, 0, 0, 1) expect_length_each = (1, 0, 1, 0) # I believe the final width *should* be 3. @@ -325,10 +317,10 @@ def test_devanagari_script(): def test_tamil_script(): # This test adapted from https://www.unicode.org/L2/L2023/23107-terminal-suppt.pdf - phrase = (u"\u0b95" # Akhand, Category 'Lo', East Asian Width property 'N' -- TAMIL LETTER KA - u"\u0bcd" # Joiner, Category 'Mn', East Asian Width property 'N' -- TAMIL SIGN VIRAMA - u"\u0bb7" # Fused, Category 'Lo', East Asian Width property 'N' -- TAMIL LETTER SSA - u"\u0bcc") # MatraLR, Category 'Mc', East Asian Width property 'N' -- TAMIL VOWEL SIGN AU + phrase = ("\u0b95" # Akhand, Category 'Lo', East Asian Width property 'N' -- TAMIL LETTER KA + "\u0bcd" # Joiner, Category 'Mn', East Asian Width property 'N' -- TAMIL SIGN VIRAMA + "\u0bb7" # Fused, Category 'Lo', East Asian Width property 'N' -- TAMIL LETTER SSA + "\u0bcc") # MatraLR, Category 'Mc', East Asian Width property 'N' -- TAMIL VOWEL SIGN AU # 23107-terminal-suppt.pdf suggests wcwidth.wcwidth should return (3, 0, 0, 4) expect_length_each = (1, 0, 1, 0) @@ -348,10 +340,10 @@ def test_kannada_script(): # This test adapted from https://www.unicode.org/L2/L2023/23107-terminal-suppt.pdf # |ರ್ಝೈ| # |123| - phrase = (u"\u0cb0" # Repha, Category 'Lo', East Asian Width property 'N' -- KANNADA LETTER RA - u"\u0ccd" # Joiner, Category 'Mn', East Asian Width property 'N' -- KANNADA SIGN VIRAMA - u"\u0c9d" # Base, Category 'Lo', East Asian Width property 'N' -- KANNADA LETTER JHA - u"\u0cc8") # MatraUR, Category 'Mc', East Asian Width property 'N' -- KANNADA VOWEL SIGN AI + phrase = ("\u0cb0" # Repha, Category 'Lo', East Asian Width property 'N' -- KANNADA LETTER RA + "\u0ccd" # Joiner, Category 'Mn', East Asian Width property 'N' -- KANNADA SIGN VIRAMA + "\u0c9d" # Base, Category 'Lo', East Asian Width property 'N' -- KANNADA LETTER JHA + "\u0cc8") # MatraUR, Category 'Mc', East Asian Width property 'N' -- KANNADA VOWEL SIGN AI # 23107-terminal-suppt.pdf suggests should be (2, 0, 3, 1) expect_length_each = (1, 0, 1, 0) # I believe the correct final width *should* be 3 or 4. @@ -370,10 +362,10 @@ def test_kannada_script_2(): # This test adapted from https://www.unicode.org/L2/L2023/23107-terminal-suppt.pdf # |ರ಼್ಚ| # |12| - phrase = (u"\u0cb0" # Base, Category 'Lo', East Asian Width property 'N' -- KANNADA LETTER RA - u"\u0cbc" # Nukta, Category 'Mn', East Asian Width property 'N' -- KANNADA SIGN NUKTA - u"\u0ccd" # Joiner, Category 'Lo', East Asian Width property 'N' -- KANNADA SIGN VIRAMA - u"\u0c9a") # Subjoin, Category 'Mc', East Asian Width property 'N' -- KANNADA LETTER CA + phrase = ("\u0cb0" # Base, Category 'Lo', East Asian Width property 'N' -- KANNADA LETTER RA + "\u0cbc" # Nukta, Category 'Mn', East Asian Width property 'N' -- KANNADA SIGN NUKTA + "\u0ccd" # Joiner, Category 'Lo', East Asian Width property 'N' -- KANNADA SIGN VIRAMA + "\u0c9a") # Subjoin, Category 'Mc', East Asian Width property 'N' -- KANNADA LETTER CA # 23107-terminal-suppt.pdf suggests wcwidth.wcwidth should return (2, 0, 0, 1) expect_length_each = (1, 0, 0, 1) # I believe the final width is correct, but maybe for the wrong reasons! @@ -392,11 +384,11 @@ def test_zero_wide_conflict(): # Test characters considered both "wide" and "zero" width # - (0x03000, 0x0303e,), # Ideographic Space ..Ideographic Variation In # + (0x03000, 0x03029,), # Ideographic Space ..Hangzhou Numeral Nine - assert wcwidth.wcwidth(unichr(0x03029), unicode_version='4.1.0') == 2 - assert wcwidth.wcwidth(unichr(0x0302a), unicode_version='4.1.0') == 0 + assert wcwidth.wcwidth(chr(0x03029), unicode_version='4.1.0') == 2 + assert wcwidth.wcwidth(chr(0x0302a), unicode_version='4.1.0') == 0 # - (0x03099, 0x030ff,), # Combining Katakana-hirag..Katakana Digraph Koto # + (0x0309b, 0x030ff,), # Katakana-hiragana Voiced..Katakana Digraph Koto - assert wcwidth.wcwidth(unichr(0x03099), unicode_version='4.1.0') == 0 - assert wcwidth.wcwidth(unichr(0x0309a), unicode_version='4.1.0') == 0 - assert wcwidth.wcwidth(unichr(0x0309b), unicode_version='4.1.0') == 2 + assert wcwidth.wcwidth(chr(0x03099), unicode_version='4.1.0') == 0 + assert wcwidth.wcwidth(chr(0x0309a), unicode_version='4.1.0') == 0 + assert wcwidth.wcwidth(chr(0x0309b), unicode_version='4.1.0') == 2 diff --git a/tests/test_emojis.py b/tests/test_emojis.py index 4f88e23..a7795f0 100644 --- a/tests/test_emojis.py +++ b/tests/test_emojis.py @@ -5,18 +5,11 @@ # 3rd party import pytest -try: - # python 2 - _ = unichr -except NameError: - # python 3 - unichr = chr - # some tests cannot be done on some builds of python, where the internal # unicode structure is limited to 0x10000 for memory conservation, # "ValueError: unichr() arg not in range(0x10000) (narrow Python build)" try: - unichr(0x2fffe) + chr(0x2fffe) NARROW_ONLY = False except ValueError: NARROW_ONLY = True @@ -27,18 +20,18 @@ def make_sequence_from_line(line): # convert '002A FE0F ; ..' -> (0x2a, 0xfe0f) -> chr(0x2a) + chr(0xfe0f) - return ''.join(unichr(int(cp, 16)) for cp in line.split(';', 1)[0].strip().split()) + return ''.join(chr(int(cp, 16)) for cp in line.split(';', 1)[0].strip().split()) @pytest.mark.skipif(NARROW_ONLY, reason="Test cannot verify on python 'narrow' builds") def emoji_zwj_sequence(): - u""" + """ Emoji zwj sequence of four codepoints is just 2 cells. """ - phrase = (u"\U0001f469" # Base, Category So, East Asian Width property 'W' -- WOMAN - u"\U0001f3fb" # Modifier, Category Sk, East Asian Width property 'W' -- EMOJI MODIFIER FITZPATRICK TYPE-1-2 - u"\u200d" # Joiner, Category Cf, East Asian Width property 'N' -- ZERO WIDTH JOINER - u"\U0001f4bb") # Fused, Category So, East Asian Width peroperty 'W' -- PERSONAL COMPUTER + phrase = ("\U0001f469" # Base, Category So, East Asian Width property 'W' -- WOMAN + "\U0001f3fb" # Modifier, Category Sk, East Asian Width property 'W' -- EMOJI MODIFIER FITZPATRICK TYPE-1-2 + "\u200d" # Joiner, Category Cf, East Asian Width property 'N' -- ZERO WIDTH JOINER + "\U0001f4bb") # Fused, Category So, East Asian Width peroperty 'W' -- PERSONAL COMPUTER # This test adapted from https://www.unicode.org/L2/L2023/23107-terminal-suppt.pdf expect_length_each = (2, 0, 0, 2) expect_length_phrase = 2 @@ -54,12 +47,12 @@ def emoji_zwj_sequence(): @pytest.mark.skipif(NARROW_ONLY, reason="Test cannot verify on python 'narrow' builds") def test_unfinished_zwj_sequence(): - u""" + """ Ensure index-out-of-bounds does not occur for zero-width joiner without any following character """ - phrase = (u"\U0001f469" # Base, Category So, East Asian Width property 'W' -- WOMAN - u"\U0001f3fb" # Modifier, Category Sk, East Asian Width property 'W' -- EMOJI MODIFIER FITZPATRICK TYPE-1-2 - u"\u200d") # Joiner, Category Cf, East Asian Width property 'N' -- ZERO WIDTH JOINER + phrase = ("\U0001f469" # Base, Category So, East Asian Width property 'W' -- WOMAN + "\U0001f3fb" # Modifier, Category Sk, East Asian Width property 'W' -- EMOJI MODIFIER FITZPATRICK TYPE-1-2 + "\u200d") # Joiner, Category Cf, East Asian Width property 'N' -- ZERO WIDTH JOINER expect_length_each = (2, 0, 0) expect_length_phrase = 2 @@ -77,9 +70,9 @@ def test_non_recommended_zwj_sequence(): """ Verify ZWJ is measured as though successful with characters that cannot be joined, wcwidth does not verify """ - phrase = (u"\U0001f469" # Base, Category So, East Asian Width property 'W' -- WOMAN - u"\U0001f3fb" # Modifier, Category Sk, East Asian Width property 'W' -- EMOJI MODIFIER FITZPATRICK TYPE-1-2 - u"\u200d") # Joiner, Category Cf, East Asian Width property 'N' -- ZERO WIDTH JOINER + phrase = ("\U0001f469" # Base, Category So, East Asian Width property 'W' -- WOMAN + "\U0001f3fb" # Modifier, Category Sk, East Asian Width property 'W' -- EMOJI MODIFIER FITZPATRICK TYPE-1-2 + "\u200d") # Joiner, Category Cf, East Asian Width property 'N' -- ZERO WIDTH JOINER expect_length_each = (2, 0, 0) expect_length_phrase = 2 @@ -95,11 +88,11 @@ def test_non_recommended_zwj_sequence(): @pytest.mark.skipif(NARROW_ONLY, reason="Test cannot verify on python 'narrow' builds") def test_another_emoji_zwj_sequence(): phrase = ( - u"\u26F9" # PERSON WITH BALL - u"\U0001F3FB" # EMOJI MODIFIER FITZPATRICK TYPE-1-2 - u"\u200D" # ZERO WIDTH JOINER - u"\u2640" # FEMALE SIGN - u"\uFE0F") # VARIATION SELECTOR-16 + "\u26F9" # PERSON WITH BALL + "\U0001F3FB" # EMOJI MODIFIER FITZPATRICK TYPE-1-2 + "\u200D" # ZERO WIDTH JOINER + "\u2640" # FEMALE SIGN + "\uFE0F") # VARIATION SELECTOR-16 expect_length_each = (1, 0, 0, 1, 0) expect_length_phrase = 2 @@ -121,16 +114,16 @@ def test_longer_emoji_zwj_sequence(): in a single function call. """ # 'Category Code', 'East Asian Width property' -- 'description' - phrase = (u"\U0001F9D1" # 'So', 'W' -- ADULT - u"\U0001F3FB" # 'Sk', 'W' -- EMOJI MODIFIER FITZPATRICK TYPE-1-2 - u"\u200d" # 'Cf', 'N' -- ZERO WIDTH JOINER - u"\u2764" # 'So', 'N' -- HEAVY BLACK HEART - u"\uFE0F" # 'Mn', 'A' -- VARIATION SELECTOR-16 - u"\u200d" # 'Cf', 'N' -- ZERO WIDTH JOINER - u"\U0001F48B" # 'So', 'W' -- KISS MARK - u"\u200d" # 'Cf', 'N' -- ZERO WIDTH JOINER - u"\U0001F9D1" # 'So', 'W' -- ADULT - u"\U0001F3FD" # 'Sk', 'W' -- EMOJI MODIFIER FITZPATRICK TYPE-4 + phrase = ("\U0001F9D1" # 'So', 'W' -- ADULT + "\U0001F3FB" # 'Sk', 'W' -- EMOJI MODIFIER FITZPATRICK TYPE-1-2 + "\u200d" # 'Cf', 'N' -- ZERO WIDTH JOINER + "\u2764" # 'So', 'N' -- HEAVY BLACK HEART + "\uFE0F" # 'Mn', 'A' -- VARIATION SELECTOR-16 + "\u200d" # 'Cf', 'N' -- ZERO WIDTH JOINER + "\U0001F48B" # 'So', 'W' -- KISS MARK + "\u200d" # 'Cf', 'N' -- ZERO WIDTH JOINER + "\U0001F9D1" # 'So', 'W' -- ADULT + "\U0001F3FD" # 'Sk', 'W' -- EMOJI MODIFIER FITZPATRICK TYPE-4 ) * 2 # This test adapted from https://www.unicode.org/L2/L2023/23107-terminal-suppt.pdf expect_length_each = (2, 0, 0, 1, 0, 0, 2, 0, 2, 0) * 2 @@ -212,8 +205,8 @@ def test_recommended_variation_16_sequences(): def test_unicode_9_vs16(): """Verify effect of VS-16 on unicode_version 9.0 and later""" - phrase = (u"\u2640" # FEMALE SIGN - u"\uFE0F") # VARIATION SELECTOR-16 + phrase = ("\u2640" # FEMALE SIGN + "\uFE0F") # VARIATION SELECTOR-16 expect_length_each = (1, 0) expect_length_phrase = 2 @@ -228,8 +221,8 @@ def test_unicode_9_vs16(): def test_unicode_8_vs16(): """Verify that VS-16 has no effect on unicode_version 8.0 and earler""" - phrase = (u"\u2640" # FEMALE SIGN - u"\uFE0F") # VARIATION SELECTOR-16 + phrase = ("\u2640" # FEMALE SIGN + "\uFE0F") # VARIATION SELECTOR-16 expect_length_each = (1, 0) expect_length_phrase = 1 diff --git a/tests/test_ucslevel.py b/tests/test_ucslevel.py index 654e835..b15fb5f 100644 --- a/tests/test_ucslevel.py +++ b/tests/test_ucslevel.py @@ -1,4 +1,3 @@ -# coding: utf-8 """Unicode version level tests for wcwidth.""" # std imports import warnings @@ -37,7 +36,7 @@ def test_exact_410_str(): def test_exact_410_unicode(): """wcwidth._wcmatch_version(u'4.1.0') returns equal value (unicode).""" # given, - given = expected = u'4.1.0' + given = expected = '4.1.0' # exercise, result = wcwidth._wcmatch_version(given) @@ -61,7 +60,7 @@ def test_nearest_505_str(): def test_nearest_505_unicode(): """wcwidth._wcmatch_version(u'5.0.5') returns nearest u'5.0.0'. (unicode)""" # given - given, expected = u'5.0.5', u'5.0.0' + given, expected = '5.0.5', '5.0.0' # exercise result = wcwidth._wcmatch_version(given) @@ -89,7 +88,7 @@ def test_nearest_lowint40_str(): def test_nearest_lowint40_unicode(): """wcwidth._wcmatch_version(u'4.0') returns nearest u'4.1.0'.""" # given - given, expected = u'4.0', u'4.1.0' + given, expected = '4.0', '4.1.0' warnings.resetwarnings() wcwidth._wcmatch_version.cache_clear() @@ -117,7 +116,7 @@ def test_nearest_800_str(): def test_nearest_800_unicode(): """wcwidth._wcmatch_version(u'8') returns nearest u'8.0.0'.""" # given - given, expected = u'8', u'8.0.0' + given, expected = '8', '8.0.0' # exercise result = wcwidth._wcmatch_version(given) @@ -141,7 +140,7 @@ def test_nearest_999_str(): def test_nearest_999_unicode(): """wcwidth._wcmatch_version(u'999.0') returns nearest (latest).""" # given - given, expected = u'999.0', wcwidth.list_versions()[-1] + given, expected = '999.0', wcwidth.list_versions()[-1] # exercise result = wcwidth._wcmatch_version(given) @@ -153,7 +152,7 @@ def test_nearest_999_unicode(): def test_nonint_unicode(): """wcwidth._wcmatch_version(u'x.y.z') returns latest (unicode).""" # given - given, expected = u'x.y.z', wcwidth.list_versions()[-1] + given, expected = 'x.y.z', wcwidth.list_versions()[-1] warnings.resetwarnings() wcwidth._wcmatch_version.cache_clear() diff --git a/tox.ini b/tox.ini index 5ffb5ff..0346af7 100644 --- a/tox.ini +++ b/tox.ini @@ -1,5 +1,5 @@ [tox] -envlist = update, compile, autopep8, docformatter, isort, pylint, flake8, pydocstyle, docs, verify_tables, py{27, 35, 36, 37, 38, 39, 310, 311, 312}, pypy{27, 36, 37, 38, 39, 310} +envlist = update, compile, autopep8, docformatter, isort, pylint, flake8, pydocstyle, docs, verify_tables, py{36, 37, 38, 39, 310, 311, 312}, pypy{36, 37, 38, 39, 310} skip_missing_interpreters = true # https://tox.wiki/en/4.11.3/faq.html#testing-end-of-life-python-versions requires = virtualenv<20.22.0 @@ -110,15 +110,6 @@ deps = -r requirements-tests36.txt [testenv:pypy36] deps = -r requirements-tests36.txt -[testenv:py35] -deps = -r requirements-tests37.in - -[testenv:py27] -deps = -r requirements-tests37.in - -[testenv:pypy27] -deps = -r requirements-tests37.in - [testenv:update] basepython = python3.12 usedevelop = true diff --git a/wcwidth/wcwidth.py b/wcwidth/wcwidth.py index e924020..92ca14a 100644 --- a/wcwidth/wcwidth.py +++ b/wcwidth/wcwidth.py @@ -60,12 +60,11 @@ Latest version: http://www.cl.cam.ac.uk/~mgk25/ucs/wcwidth.c """ -from __future__ import division # std imports import os -import sys import warnings +from functools import lru_cache # local from .table_vs16 import VS16_NARROW_TO_WIDE @@ -73,17 +72,6 @@ from .table_zero import ZERO_WIDTH from .unicode_versions import list_versions -try: - # std imports - from functools import lru_cache -except ImportError: - # lru_cache was added in Python 3.2 - # 3rd party - from backports.functools_lru_cache import lru_cache - -# global cache -_PY3 = sys.version_info[0] >= 3 - def _bisearch(ucs, table): """ @@ -186,11 +174,11 @@ def wcswidth(pwcs, n=None, unicode_version='auto'): last_measured_char = None while idx < end: char = pwcs[idx] - if char == u'\u200D': + if char == '\u200D': # Zero Width Joiner, do not measure this or next character idx += 2 continue - if char == u'\uFE0F' and last_measured_char: + if char == '\uFE0F' and last_measured_char: # on variation selector 16 (VS16) following another character, # conditionally add '1' to the measured width if that character is # known to be converted from narrow to wide by the VS16 character. @@ -250,8 +238,7 @@ def _wcmatch_version(given_version): ``UNICODE_VERSION``. If the environment variable is not set, then the latest is used. :rtype: str - :returns: unicode string, or non-unicode ``str`` type for python 2 - when given ``version`` is also type ``str``. + :returns: unicode string. """ # Design note: the choice to return the same type that is given certainly # complicates it for python 2 str-type, but allows us to define an api that @@ -261,30 +248,24 @@ def _wcmatch_version(given_version): # That, along with the string-to-numeric and comparisons of earliest, # latest, matching, or nearest, greatly complicates this function. # Performance is somewhat curbed by memoization. - _return_str = not _PY3 and isinstance(given_version, str) - - if _return_str: - # avoid list-comprehension to work around a coverage issue: - # https://github.com/nedbat/coveragepy/issues/753 - unicode_versions = list(map(lambda ucs: ucs.encode(), list_versions())) - else: - unicode_versions = list_versions() + + unicode_versions = list_versions() latest_version = unicode_versions[-1] - if given_version in (u'auto', 'auto'): + if given_version == 'auto': given_version = os.environ.get( 'UNICODE_VERSION', - 'latest' if not _return_str else latest_version.encode()) + 'latest') - if given_version in (u'latest', 'latest'): + if given_version == 'latest': # default match, when given as 'latest', use the most latest unicode # version specification level supported. - return latest_version if not _return_str else latest_version.encode() + return latest_version if given_version in unicode_versions: # exact match, downstream has specified an explicit matching version # matching any value of list_versions(). - return given_version if not _return_str else given_version.encode() + return given_version # The user's version is not supported by ours. We return the newest unicode # version level that we support below their given value. @@ -298,7 +279,7 @@ def _wcmatch_version(given_version): "supported unicode version {latest_version!r} has been " "inferred.".format(given_version=given_version, latest_version=latest_version)) - return latest_version if not _return_str else latest_version.encode() + return latest_version # given version is less than any available version, return earliest # version. @@ -314,7 +295,7 @@ def _wcmatch_version(given_version): "version level, {earliest_version!r}".format( given_version=given_version, earliest_version=earliest_version)) - return earliest_version if not _return_str else earliest_version.encode() + return earliest_version # create list of versions which are less than our equal to given version, # and return the tail value, which is the highest level we may support, @@ -328,7 +309,7 @@ def _wcmatch_version(given_version): cmp_next_version = _wcversion_value(unicode_versions[idx + 1]) except IndexError: # at end of list, return latest version - return latest_version if not _return_str else latest_version.encode() + return latest_version # Maybe our given version has less parts, as in tuple(8, 0), than the # next compare version tuple(8, 0, 0). Test for an exact match by @@ -338,7 +319,7 @@ def _wcmatch_version(given_version): # Or, if any next value is greater than our given support level # version, return the current value in index. Even though it must - # be less than the given value, its our closest possible match. That + # be less than the given value, it's our closest possible match. That # is, 4.1 is returned for given 4.9.9, where 4.1 and 5.0 are available. if cmp_next_version > cmp_given: return unicode_version