diff --git a/bin/update-tables.py b/bin/update-tables.py index 8e9104e..e7a4395 100644 --- a/bin/update-tables.py +++ b/bin/update-tables.py @@ -112,11 +112,11 @@ class TableEntry: properties: tuple[str, ...] comment: str - def filter_by_category(self, category_codes: str, wide: int) -> bool: + def filter_by_category_width(self, wide: int) -> bool: """ - Return whether entry matches given category code and displayed width. + Return whether entry matches displayed width. - Categories are described here, https://www.unicode.org/reports/tr44/#GC_Values_Table + Parses both DerivedGeneralCategory.txt and EastAsianWidth.txt """ if self.code_range is None: return False @@ -146,13 +146,12 @@ def filter_by_category(self, category_codes: str, wide: int) -> bool: return wide == 1 @staticmethod - def parse_category_values(category_codes: str, - table_iter: Iterator[TableEntry], - wide: int) -> set[tuple[int, int]]: + def parse_width_category_values(table_iter: Iterator[TableEntry], + wide: int) -> set[tuple[int, int]]: """Parse value ranges of unicode data files, by given category and width.""" return {n for entry in table_iter - if entry.filter_by_category(category_codes, wide) + if entry.filter_by_category_width(wide) for n in list(range(entry.code_range[0], entry.code_range[1]))} @@ -326,18 +325,16 @@ def fetch_table_wide_data() -> UnicodeTableRenderCtx: for version in fetch_unicode_versions(): # parse typical 'wide' characters by categories 'W' and 'F', table[version] = parse_category(fname=UnicodeDataFile.EastAsianWidth(version), - category_codes=('W', 'F'), wide=2) # subtract(!) wide characters that were defined above as 'W' category in EastAsianWidth, # but also zero-width category 'Mn' or 'Mc' in DerivedGeneralCategory! - table[version].values.discard(parse_category(fname=UnicodeDataFile.DerivedGeneralCategory(version), - category_codes=('Mn', 'Mc'), - wide=0).values) + table[version].values = table[version].values.difference(parse_category( + fname=UnicodeDataFile.DerivedGeneralCategory(version), + wide=0).values) # finally, join with atypical 'wide' characters defined by category 'Sk', table[version].values.update(parse_category(fname=UnicodeDataFile.DerivedGeneralCategory(version), - category_codes=('Sk',), wide=2).values) return UnicodeTableRenderCtx('WIDE_EASTASIAN', table) @@ -352,7 +349,6 @@ def fetch_table_zero_data() -> UnicodeTableRenderCtx: for version in fetch_unicode_versions(): # Determine values of zero-width character lookup table by the following category codes table[version] = parse_category(fname=UnicodeDataFile.DerivedGeneralCategory(version), - category_codes=('Me', 'Mn', 'Mc', 'Cf', 'Zl', 'Zp', 'Sk'), wide=0) # And, include NULL @@ -501,9 +497,9 @@ def parse_vs16_table(fp: Iterable[str]) -> Iterator[TableEntry]: @functools.cache -def parse_category(fname: str, category_codes: Container[str], wide: int) -> TableDef: +def parse_category(fname: str, wide: int) -> TableDef: """Parse value ranges of unicode data files, by given categories into string tables.""" - print(f'parsing {fname} category_codes={",".join(category_codes)}: ', end='', flush=True) + print(f'parsing {fname}, wide={wide}: ', end='', flush=True) with open(fname, encoding='utf-8') as f: table_iter = parse_unicode_table(f) @@ -512,7 +508,7 @@ def parse_category(fname: str, category_codes: Container[str], wide: int) -> Tab version = next(table_iter).comment.strip() # and "date string" from second line date = next(table_iter).comment.split(':', 1)[1].strip() - values = TableEntry.parse_category_values(category_codes, table_iter, wide) + values = TableEntry.parse_width_category_values(table_iter, wide) print('ok') return TableDef(version, date, values) diff --git a/bin/verify-table-integrity.py b/bin/verify-table-integrity.py index 7ba691b..bab458d 100644 --- a/bin/verify-table-integrity.py +++ b/bin/verify-table-integrity.py @@ -63,9 +63,30 @@ import logging +def bisearch_pair(ucs, table): + """ + A copy of wcwidth._bisearch() but also returns the range of matched values. + """ + lbound = 0 + ubound = len(table) - 1 + + if ucs < table[0][0] or ucs > table[ubound][1]: + return (0, None, None) + while ubound >= lbound: + mid = (lbound + ubound) // 2 + if ucs > table[mid][1]: + lbound = mid + 1 + elif ucs < table[mid][0]: + ubound = mid - 1 + else: + return (1, table[mid][0], table[mid][1]) + + return (0, None, None) + + def main(log: logging.Logger): - # local - from wcwidth import ZERO_WIDTH, WIDE_EASTASIAN, _bisearch, list_versions + from wcwidth import ZERO_WIDTH, WIDE_EASTASIAN, list_versions + reversed_uni_versions = list(reversed(list_versions())) tables = {'ZERO_WIDTH': ZERO_WIDTH, 'WIDE_EASTASIAN': WIDE_EASTASIAN} @@ -81,14 +102,21 @@ def main(log: logging.Logger): other_table = tables[other_table_name][version] for start_range, stop_range in curr_table: for unichar_n in range(start_range, stop_range): - if not _bisearch(unichar_n, next_table): - log.info(f'value {hex(unichar_n)} in table_name={table_name}' - f' version={version} is not defined in next_version={next_version}' - f' from inclusive range {hex(start_range)}-{hex(stop_range)}') - if _bisearch(unichar_n, other_table): - log.error(f'value {hex(unichar_n)} in table_name={table_name}' - f' version={version} is duplicated in other_table_name={other_table_name}' - f' from inclusive range {hex(start_range)}-{hex(stop_range)}') + result, _, _ = bisearch_pair(unichar_n, next_table) + if not result: + log.info( + f'value 0x{unichar_n:05x} in table_name={table_name}' + f' version={version} is not defined in next_version={next_version}' + f' from inclusive range {hex(start_range)}-{hex(stop_range)}' + ) + result, lbound, ubound = bisearch_pair(unichar_n, other_table) + if result: + log.error( + f'value 0x{unichar_n:05x} in table_name={table_name}' + f' version={version} is duplicated in other_table_name={other_table_name}' + f' from inclusive range 0x{start_range:05x}-0x{stop_range:05x} of' + f' {table_name} against 0x{lbound:05x}-0x{ubound:05x} in {other_table_name}' + ) errors += 1 if errors: log.error(f'{errors} errors, exit 1') diff --git a/docs/intro.rst b/docs/intro.rst index 3b4a8fe..e454962 100644 --- a/docs/intro.rst +++ b/docs/intro.rst @@ -217,7 +217,7 @@ Other Languages History ======= 0.2.12 *2023-11-21* - * re-release to remove .pyi file misplaced in wheel files `Issue #101`. + * re-release to remove .pyi file misplaced in wheel files `Issue #101`_. 0.2.11 *2023-11-20* * Include tests files in the source distribution (`PR #98`_, `PR #100`_). diff --git a/tests/test_core.py b/tests/test_core.py index d2776cd..c190698 100755 --- a/tests/test_core.py +++ b/tests/test_core.py @@ -355,3 +355,17 @@ def test_kannada_script_2(): # verify. assert length_each == expect_length_each assert length_phrase == expect_length_phrase + + +def test_zero_wide_conflict(): + # Test characters considered both "wide" and "zero" width + # - (0x03000, 0x0303e,), # Ideographic Space ..Ideographic Variation In + # + (0x03000, 0x03029,), # Ideographic Space ..Hangzhou Numeral Nine + assert wcwidth.wcwidth(unichr(0x03029), unicode_version='4.1.0') == 2 + assert wcwidth.wcwidth(unichr(0x0302a), unicode_version='4.1.0') == 0 + + # - (0x03099, 0x030ff,), # Combining Katakana-hirag..Katakana Digraph Koto + # + (0x0309b, 0x030ff,), # Katakana-hiragana Voiced..Katakana Digraph Koto + assert wcwidth.wcwidth(unichr(0x03099), unicode_version='4.1.0') == 0 + assert wcwidth.wcwidth(unichr(0x0309a), unicode_version='4.1.0') == 0 + assert wcwidth.wcwidth(unichr(0x0309b), unicode_version='4.1.0') == 2 diff --git a/tests/test_table_integrity.py b/tests/test_table_integrity.py new file mode 100644 index 0000000..66e63dd --- /dev/null +++ b/tests/test_table_integrity.py @@ -0,0 +1,15 @@ +""" +Executes verify-table-integrity.py as a unit test. +""" +import os +import sys +import subprocess + +import pytest + +@pytest.mark.skipif(sys.version_info[:2] != (3, 12), reason='Test only with a single version of python') +def test_verify_table_integrity(): + subprocess.check_output([sys.executable, os.path.join(os.path.dirname(__file__), + os.path.pardir, + 'bin', + 'verify-table-integrity.py')]) \ No newline at end of file diff --git a/wcwidth/table_wide.py b/wcwidth/table_wide.py index 02afd5c..4c26119 100644 --- a/wcwidth/table_wide.py +++ b/wcwidth/table_wide.py @@ -1,7 +1,7 @@ """ Exports WIDE_EASTASIAN table keyed by supporting unicode version level. -This code generated by wcwidth/bin/update-tables.py on 2023-09-14 15:45:33 UTC. +This code generated by wcwidth/bin/update-tables.py on 2024-01-03 17:16:09 UTC. """ WIDE_EASTASIAN = { '4.1.0': ( @@ -15,9 +15,10 @@ (0x02e9b, 0x02ef3,), # Cjk Radical Choke ..Cjk Radical C-simplified (0x02f00, 0x02fd5,), # Kangxi Radical One ..Kangxi Radical Flute (0x02ff0, 0x02ffb,), # Ideographic Description ..Ideographic Description - (0x03000, 0x0303e,), # Ideographic Space ..Ideographic Variation In + (0x03000, 0x03029,), # Ideographic Space ..Hangzhou Numeral Nine + (0x03030, 0x0303e,), # Wavy Dash ..Ideographic Variation In (0x03041, 0x03096,), # Hiragana Letter Small A ..Hiragana Letter Small Ke - (0x03099, 0x030ff,), # Combining Katakana-hirag..Katakana Digraph Koto + (0x0309b, 0x030ff,), # Katakana-hiragana Voiced..Katakana Digraph Koto (0x03105, 0x0312c,), # Bopomofo Letter B ..Bopomofo Letter Gn (0x03131, 0x0318e,), # Hangul Letter Kiyeok ..Hangul Letter Araeae (0x03190, 0x031b7,), # Ideographic Annotation L..Bopomofo Final Letter H @@ -53,9 +54,10 @@ (0x02e9b, 0x02ef3,), # Cjk Radical Choke ..Cjk Radical C-simplified (0x02f00, 0x02fd5,), # Kangxi Radical One ..Kangxi Radical Flute (0x02ff0, 0x02ffb,), # Ideographic Description ..Ideographic Description - (0x03000, 0x0303e,), # Ideographic Space ..Ideographic Variation In + (0x03000, 0x03029,), # Ideographic Space ..Hangzhou Numeral Nine + (0x03030, 0x0303e,), # Wavy Dash ..Ideographic Variation In (0x03041, 0x03096,), # Hiragana Letter Small A ..Hiragana Letter Small Ke - (0x03099, 0x030ff,), # Combining Katakana-hirag..Katakana Digraph Koto + (0x0309b, 0x030ff,), # Katakana-hiragana Voiced..Katakana Digraph Koto (0x03105, 0x0312c,), # Bopomofo Letter B ..Bopomofo Letter Gn (0x03131, 0x0318e,), # Hangul Letter Kiyeok ..Hangul Letter Araeae (0x03190, 0x031b7,), # Ideographic Annotation L..Bopomofo Final Letter H @@ -91,9 +93,10 @@ (0x02e9b, 0x02ef3,), # Cjk Radical Choke ..Cjk Radical C-simplified (0x02f00, 0x02fd5,), # Kangxi Radical One ..Kangxi Radical Flute (0x02ff0, 0x02ffb,), # Ideographic Description ..Ideographic Description - (0x03000, 0x0303e,), # Ideographic Space ..Ideographic Variation In + (0x03000, 0x03029,), # Ideographic Space ..Hangzhou Numeral Nine + (0x03030, 0x0303e,), # Wavy Dash ..Ideographic Variation In (0x03041, 0x03096,), # Hiragana Letter Small A ..Hiragana Letter Small Ke - (0x03099, 0x030ff,), # Combining Katakana-hirag..Katakana Digraph Koto + (0x0309b, 0x030ff,), # Katakana-hiragana Voiced..Katakana Digraph Koto (0x03105, 0x0312d,), # Bopomofo Letter B ..Bopomofo Letter Ih (0x03131, 0x0318e,), # Hangul Letter Kiyeok ..Hangul Letter Araeae (0x03190, 0x031b7,), # Ideographic Annotation L..Bopomofo Final Letter H @@ -130,9 +133,10 @@ (0x02e9b, 0x02ef3,), # Cjk Radical Choke ..Cjk Radical C-simplified (0x02f00, 0x02fd5,), # Kangxi Radical One ..Kangxi Radical Flute (0x02ff0, 0x02ffb,), # Ideographic Description ..Ideographic Description - (0x03000, 0x0303e,), # Ideographic Space ..Ideographic Variation In + (0x03000, 0x03029,), # Ideographic Space ..Hangzhou Numeral Nine + (0x03030, 0x0303e,), # Wavy Dash ..Ideographic Variation In (0x03041, 0x03096,), # Hiragana Letter Small A ..Hiragana Letter Small Ke - (0x03099, 0x030ff,), # Combining Katakana-hirag..Katakana Digraph Koto + (0x0309b, 0x030ff,), # Katakana-hiragana Voiced..Katakana Digraph Koto (0x03105, 0x0312d,), # Bopomofo Letter B ..Bopomofo Letter Ih (0x03131, 0x0318e,), # Hangul Letter Kiyeok ..Hangul Letter Araeae (0x03190, 0x031b7,), # Ideographic Annotation L..Bopomofo Final Letter H @@ -172,9 +176,10 @@ (0x02e9b, 0x02ef3,), # Cjk Radical Choke ..Cjk Radical C-simplified (0x02f00, 0x02fd5,), # Kangxi Radical One ..Kangxi Radical Flute (0x02ff0, 0x02ffb,), # Ideographic Description ..Ideographic Description - (0x03000, 0x0303e,), # Ideographic Space ..Ideographic Variation In + (0x03000, 0x03029,), # Ideographic Space ..Hangzhou Numeral Nine + (0x03030, 0x0303e,), # Wavy Dash ..Ideographic Variation In (0x03041, 0x03096,), # Hiragana Letter Small A ..Hiragana Letter Small Ke - (0x03099, 0x030ff,), # Combining Katakana-hirag..Katakana Digraph Koto + (0x0309b, 0x030ff,), # Katakana-hiragana Voiced..Katakana Digraph Koto (0x03105, 0x0312d,), # Bopomofo Letter B ..Bopomofo Letter Ih (0x03131, 0x0318e,), # Hangul Letter Kiyeok ..Hangul Letter Araeae (0x03190, 0x031ba,), # Ideographic Annotation L..Bopomofo Letter Zy @@ -216,9 +221,10 @@ (0x02e9b, 0x02ef3,), # Cjk Radical Choke ..Cjk Radical C-simplified (0x02f00, 0x02fd5,), # Kangxi Radical One ..Kangxi Radical Flute (0x02ff0, 0x02ffb,), # Ideographic Description ..Ideographic Description - (0x03000, 0x0303e,), # Ideographic Space ..Ideographic Variation In + (0x03000, 0x03029,), # Ideographic Space ..Hangzhou Numeral Nine + (0x03030, 0x0303e,), # Wavy Dash ..Ideographic Variation In (0x03041, 0x03096,), # Hiragana Letter Small A ..Hiragana Letter Small Ke - (0x03099, 0x030ff,), # Combining Katakana-hirag..Katakana Digraph Koto + (0x0309b, 0x030ff,), # Katakana-hiragana Voiced..Katakana Digraph Koto (0x03105, 0x0312d,), # Bopomofo Letter B ..Bopomofo Letter Ih (0x03131, 0x0318e,), # Hangul Letter Kiyeok ..Hangul Letter Araeae (0x03190, 0x031ba,), # Ideographic Annotation L..Bopomofo Letter Zy @@ -258,9 +264,10 @@ (0x02e9b, 0x02ef3,), # Cjk Radical Choke ..Cjk Radical C-simplified (0x02f00, 0x02fd5,), # Kangxi Radical One ..Kangxi Radical Flute (0x02ff0, 0x02ffb,), # Ideographic Description ..Ideographic Description - (0x03000, 0x0303e,), # Ideographic Space ..Ideographic Variation In + (0x03000, 0x03029,), # Ideographic Space ..Hangzhou Numeral Nine + (0x03030, 0x0303e,), # Wavy Dash ..Ideographic Variation In (0x03041, 0x03096,), # Hiragana Letter Small A ..Hiragana Letter Small Ke - (0x03099, 0x030ff,), # Combining Katakana-hirag..Katakana Digraph Koto + (0x0309b, 0x030ff,), # Katakana-hiragana Voiced..Katakana Digraph Koto (0x03105, 0x0312d,), # Bopomofo Letter B ..Bopomofo Letter Ih (0x03131, 0x0318e,), # Hangul Letter Kiyeok ..Hangul Letter Araeae (0x03190, 0x031ba,), # Ideographic Annotation L..Bopomofo Letter Zy @@ -298,9 +305,10 @@ (0x02e9b, 0x02ef3,), # Cjk Radical Choke ..Cjk Radical C-simplified (0x02f00, 0x02fd5,), # Kangxi Radical One ..Kangxi Radical Flute (0x02ff0, 0x02ffb,), # Ideographic Description ..Ideographic Description - (0x03000, 0x0303e,), # Ideographic Space ..Ideographic Variation In + (0x03000, 0x03029,), # Ideographic Space ..Hangzhou Numeral Nine + (0x03030, 0x0303e,), # Wavy Dash ..Ideographic Variation In (0x03041, 0x03096,), # Hiragana Letter Small A ..Hiragana Letter Small Ke - (0x03099, 0x030ff,), # Combining Katakana-hirag..Katakana Digraph Koto + (0x0309b, 0x030ff,), # Katakana-hiragana Voiced..Katakana Digraph Koto (0x03105, 0x0312d,), # Bopomofo Letter B ..Bopomofo Letter Ih (0x03131, 0x0318e,), # Hangul Letter Kiyeok ..Hangul Letter Araeae (0x03190, 0x031ba,), # Ideographic Annotation L..Bopomofo Letter Zy @@ -338,9 +346,10 @@ (0x02e9b, 0x02ef3,), # Cjk Radical Choke ..Cjk Radical C-simplified (0x02f00, 0x02fd5,), # Kangxi Radical One ..Kangxi Radical Flute (0x02ff0, 0x02ffb,), # Ideographic Description ..Ideographic Description - (0x03000, 0x0303e,), # Ideographic Space ..Ideographic Variation In + (0x03000, 0x03029,), # Ideographic Space ..Hangzhou Numeral Nine + (0x03030, 0x0303e,), # Wavy Dash ..Ideographic Variation In (0x03041, 0x03096,), # Hiragana Letter Small A ..Hiragana Letter Small Ke - (0x03099, 0x030ff,), # Combining Katakana-hirag..Katakana Digraph Koto + (0x0309b, 0x030ff,), # Katakana-hiragana Voiced..Katakana Digraph Koto (0x03105, 0x0312d,), # Bopomofo Letter B ..Bopomofo Letter Ih (0x03131, 0x0318e,), # Hangul Letter Kiyeok ..Hangul Letter Araeae (0x03190, 0x031ba,), # Ideographic Annotation L..Bopomofo Letter Zy @@ -378,9 +387,10 @@ (0x02e9b, 0x02ef3,), # Cjk Radical Choke ..Cjk Radical C-simplified (0x02f00, 0x02fd5,), # Kangxi Radical One ..Kangxi Radical Flute (0x02ff0, 0x02ffb,), # Ideographic Description ..Ideographic Description - (0x03000, 0x0303e,), # Ideographic Space ..Ideographic Variation In + (0x03000, 0x03029,), # Ideographic Space ..Hangzhou Numeral Nine + (0x03030, 0x0303e,), # Wavy Dash ..Ideographic Variation In (0x03041, 0x03096,), # Hiragana Letter Small A ..Hiragana Letter Small Ke - (0x03099, 0x030ff,), # Combining Katakana-hirag..Katakana Digraph Koto + (0x0309b, 0x030ff,), # Katakana-hiragana Voiced..Katakana Digraph Koto (0x03105, 0x0312d,), # Bopomofo Letter B ..Bopomofo Letter Ih (0x03131, 0x0318e,), # Hangul Letter Kiyeok ..Hangul Letter Araeae (0x03190, 0x031ba,), # Ideographic Annotation L..Bopomofo Letter Zy @@ -451,9 +461,10 @@ (0x02e9b, 0x02ef3,), # Cjk Radical Choke ..Cjk Radical C-simplified (0x02f00, 0x02fd5,), # Kangxi Radical One ..Kangxi Radical Flute (0x02ff0, 0x02ffb,), # Ideographic Description ..Ideographic Description - (0x03000, 0x0303e,), # Ideographic Space ..Ideographic Variation In + (0x03000, 0x03029,), # Ideographic Space ..Hangzhou Numeral Nine + (0x03030, 0x0303e,), # Wavy Dash ..Ideographic Variation In (0x03041, 0x03096,), # Hiragana Letter Small A ..Hiragana Letter Small Ke - (0x03099, 0x030ff,), # Combining Katakana-hirag..Katakana Digraph Koto + (0x0309b, 0x030ff,), # Katakana-hiragana Voiced..Katakana Digraph Koto (0x03105, 0x0312d,), # Bopomofo Letter B ..Bopomofo Letter Ih (0x03131, 0x0318e,), # Hangul Letter Kiyeok ..Hangul Letter Araeae (0x03190, 0x031ba,), # Ideographic Annotation L..Bopomofo Letter Zy @@ -493,7 +504,8 @@ (0x1f3cf, 0x1f3d3,), # Cricket Bat And Ball ..Table Tennis Paddle And (0x1f3e0, 0x1f3f0,), # House Building ..European Castle (0x1f3f4, 0x1f3f4,), # Waving Black Flag - (0x1f3f8, 0x1f43e,), # Badminton Racquet And Sh..Paw Prints + (0x1f3f8, 0x1f3fa,), # Badminton Racquet And Sh..Amphora + (0x1f400, 0x1f43e,), # Rat ..Paw Prints (0x1f440, 0x1f440,), # Eyes (0x1f442, 0x1f4fc,), # Ear ..Videocassette (0x1f4ff, 0x1f53d,), # Prayer Beads ..Down-pointing Small Red @@ -562,9 +574,10 @@ (0x02e9b, 0x02ef3,), # Cjk Radical Choke ..Cjk Radical C-simplified (0x02f00, 0x02fd5,), # Kangxi Radical One ..Kangxi Radical Flute (0x02ff0, 0x02ffb,), # Ideographic Description ..Ideographic Description - (0x03000, 0x0303e,), # Ideographic Space ..Ideographic Variation In + (0x03000, 0x03029,), # Ideographic Space ..Hangzhou Numeral Nine + (0x03030, 0x0303e,), # Wavy Dash ..Ideographic Variation In (0x03041, 0x03096,), # Hiragana Letter Small A ..Hiragana Letter Small Ke - (0x03099, 0x030ff,), # Combining Katakana-hirag..Katakana Digraph Koto + (0x0309b, 0x030ff,), # Katakana-hiragana Voiced..Katakana Digraph Koto (0x03105, 0x0312e,), # Bopomofo Letter B ..Bopomofo Letter O With D (0x03131, 0x0318e,), # Hangul Letter Kiyeok ..Hangul Letter Araeae (0x03190, 0x031ba,), # Ideographic Annotation L..Bopomofo Letter Zy @@ -606,7 +619,8 @@ (0x1f3cf, 0x1f3d3,), # Cricket Bat And Ball ..Table Tennis Paddle And (0x1f3e0, 0x1f3f0,), # House Building ..European Castle (0x1f3f4, 0x1f3f4,), # Waving Black Flag - (0x1f3f8, 0x1f43e,), # Badminton Racquet And Sh..Paw Prints + (0x1f3f8, 0x1f3fa,), # Badminton Racquet And Sh..Amphora + (0x1f400, 0x1f43e,), # Rat ..Paw Prints (0x1f440, 0x1f440,), # Eyes (0x1f442, 0x1f4fc,), # Ear ..Videocassette (0x1f4ff, 0x1f53d,), # Prayer Beads ..Down-pointing Small Red @@ -673,9 +687,10 @@ (0x02e9b, 0x02ef3,), # Cjk Radical Choke ..Cjk Radical C-simplified (0x02f00, 0x02fd5,), # Kangxi Radical One ..Kangxi Radical Flute (0x02ff0, 0x02ffb,), # Ideographic Description ..Ideographic Description - (0x03000, 0x0303e,), # Ideographic Space ..Ideographic Variation In + (0x03000, 0x03029,), # Ideographic Space ..Hangzhou Numeral Nine + (0x03030, 0x0303e,), # Wavy Dash ..Ideographic Variation In (0x03041, 0x03096,), # Hiragana Letter Small A ..Hiragana Letter Small Ke - (0x03099, 0x030ff,), # Combining Katakana-hirag..Katakana Digraph Koto + (0x0309b, 0x030ff,), # Katakana-hiragana Voiced..Katakana Digraph Koto (0x03105, 0x0312f,), # Bopomofo Letter B ..Bopomofo Letter Nn (0x03131, 0x0318e,), # Hangul Letter Kiyeok ..Hangul Letter Araeae (0x03190, 0x031ba,), # Ideographic Annotation L..Bopomofo Letter Zy @@ -717,7 +732,8 @@ (0x1f3cf, 0x1f3d3,), # Cricket Bat And Ball ..Table Tennis Paddle And (0x1f3e0, 0x1f3f0,), # House Building ..European Castle (0x1f3f4, 0x1f3f4,), # Waving Black Flag - (0x1f3f8, 0x1f43e,), # Badminton Racquet And Sh..Paw Prints + (0x1f3f8, 0x1f3fa,), # Badminton Racquet And Sh..Amphora + (0x1f400, 0x1f43e,), # Rat ..Paw Prints (0x1f440, 0x1f440,), # Eyes (0x1f442, 0x1f4fc,), # Ear ..Videocassette (0x1f4ff, 0x1f53d,), # Prayer Beads ..Down-pointing Small Red @@ -786,9 +802,10 @@ (0x02e9b, 0x02ef3,), # Cjk Radical Choke ..Cjk Radical C-simplified (0x02f00, 0x02fd5,), # Kangxi Radical One ..Kangxi Radical Flute (0x02ff0, 0x02ffb,), # Ideographic Description ..Ideographic Description - (0x03000, 0x0303e,), # Ideographic Space ..Ideographic Variation In + (0x03000, 0x03029,), # Ideographic Space ..Hangzhou Numeral Nine + (0x03030, 0x0303e,), # Wavy Dash ..Ideographic Variation In (0x03041, 0x03096,), # Hiragana Letter Small A ..Hiragana Letter Small Ke - (0x03099, 0x030ff,), # Combining Katakana-hirag..Katakana Digraph Koto + (0x0309b, 0x030ff,), # Katakana-hiragana Voiced..Katakana Digraph Koto (0x03105, 0x0312f,), # Bopomofo Letter B ..Bopomofo Letter Nn (0x03131, 0x0318e,), # Hangul Letter Kiyeok ..Hangul Letter Araeae (0x03190, 0x031ba,), # Ideographic Annotation L..Bopomofo Letter Zy @@ -832,7 +849,8 @@ (0x1f3cf, 0x1f3d3,), # Cricket Bat And Ball ..Table Tennis Paddle And (0x1f3e0, 0x1f3f0,), # House Building ..European Castle (0x1f3f4, 0x1f3f4,), # Waving Black Flag - (0x1f3f8, 0x1f43e,), # Badminton Racquet And Sh..Paw Prints + (0x1f3f8, 0x1f3fa,), # Badminton Racquet And Sh..Amphora + (0x1f400, 0x1f43e,), # Rat ..Paw Prints (0x1f440, 0x1f440,), # Eyes (0x1f442, 0x1f4fc,), # Ear ..Videocassette (0x1f4ff, 0x1f53d,), # Prayer Beads ..Down-pointing Small Red @@ -905,9 +923,10 @@ (0x02e9b, 0x02ef3,), # Cjk Radical Choke ..Cjk Radical C-simplified (0x02f00, 0x02fd5,), # Kangxi Radical One ..Kangxi Radical Flute (0x02ff0, 0x02ffb,), # Ideographic Description ..Ideographic Description - (0x03000, 0x0303e,), # Ideographic Space ..Ideographic Variation In + (0x03000, 0x03029,), # Ideographic Space ..Hangzhou Numeral Nine + (0x03030, 0x0303e,), # Wavy Dash ..Ideographic Variation In (0x03041, 0x03096,), # Hiragana Letter Small A ..Hiragana Letter Small Ke - (0x03099, 0x030ff,), # Combining Katakana-hirag..Katakana Digraph Koto + (0x0309b, 0x030ff,), # Katakana-hiragana Voiced..Katakana Digraph Koto (0x03105, 0x0312f,), # Bopomofo Letter B ..Bopomofo Letter Nn (0x03131, 0x0318e,), # Hangul Letter Kiyeok ..Hangul Letter Araeae (0x03190, 0x031ba,), # Ideographic Annotation L..Bopomofo Letter Zy @@ -950,7 +969,8 @@ (0x1f3cf, 0x1f3d3,), # Cricket Bat And Ball ..Table Tennis Paddle And (0x1f3e0, 0x1f3f0,), # House Building ..European Castle (0x1f3f4, 0x1f3f4,), # Waving Black Flag - (0x1f3f8, 0x1f43e,), # Badminton Racquet And Sh..Paw Prints + (0x1f3f8, 0x1f3fa,), # Badminton Racquet And Sh..Amphora + (0x1f400, 0x1f43e,), # Rat ..Paw Prints (0x1f440, 0x1f440,), # Eyes (0x1f442, 0x1f4fc,), # Ear ..Videocassette (0x1f4ff, 0x1f53d,), # Prayer Beads ..Down-pointing Small Red @@ -1023,9 +1043,10 @@ (0x02e9b, 0x02ef3,), # Cjk Radical Choke ..Cjk Radical C-simplified (0x02f00, 0x02fd5,), # Kangxi Radical One ..Kangxi Radical Flute (0x02ff0, 0x02ffb,), # Ideographic Description ..Ideographic Description - (0x03000, 0x0303e,), # Ideographic Space ..Ideographic Variation In + (0x03000, 0x03029,), # Ideographic Space ..Hangzhou Numeral Nine + (0x03030, 0x0303e,), # Wavy Dash ..Ideographic Variation In (0x03041, 0x03096,), # Hiragana Letter Small A ..Hiragana Letter Small Ke - (0x03099, 0x030ff,), # Combining Katakana-hirag..Katakana Digraph Koto + (0x0309b, 0x030ff,), # Katakana-hiragana Voiced..Katakana Digraph Koto (0x03105, 0x0312f,), # Bopomofo Letter B ..Bopomofo Letter Nn (0x03131, 0x0318e,), # Hangul Letter Kiyeok ..Hangul Letter Araeae (0x03190, 0x031e3,), # Ideographic Annotation L..Cjk Stroke Q @@ -1043,8 +1064,7 @@ (0x0fe68, 0x0fe6b,), # Small Reverse Solidus ..Small Commercial At (0x0ff01, 0x0ff60,), # Fullwidth Exclamation Ma..Fullwidth Right White Pa (0x0ffe0, 0x0ffe6,), # Fullwidth Cent Sign ..Fullwidth Won Sign - (0x16fe0, 0x16fe4,), # Tangut Iteration Mark ..Khitan Small Script Fill - (0x16ff0, 0x16ff1,), # Vietnamese Alternate Rea..Vietnamese Alternate Rea + (0x16fe0, 0x16fe3,), # Tangut Iteration Mark ..Old Chinese Iteration Ma (0x17000, 0x187f7,), # (nil) (0x18800, 0x18cd5,), # Tangut Component-001 ..Khitan Small Script Char (0x18d00, 0x18d08,), # (nil) @@ -1069,7 +1089,8 @@ (0x1f3cf, 0x1f3d3,), # Cricket Bat And Ball ..Table Tennis Paddle And (0x1f3e0, 0x1f3f0,), # House Building ..European Castle (0x1f3f4, 0x1f3f4,), # Waving Black Flag - (0x1f3f8, 0x1f43e,), # Badminton Racquet And Sh..Paw Prints + (0x1f3f8, 0x1f3fa,), # Badminton Racquet And Sh..Amphora + (0x1f400, 0x1f43e,), # Rat ..Paw Prints (0x1f440, 0x1f440,), # Eyes (0x1f442, 0x1f4fc,), # Ear ..Videocassette (0x1f4ff, 0x1f53d,), # Prayer Beads ..Down-pointing Small Red @@ -1144,9 +1165,10 @@ (0x02e9b, 0x02ef3,), # Cjk Radical Choke ..Cjk Radical C-simplified (0x02f00, 0x02fd5,), # Kangxi Radical One ..Kangxi Radical Flute (0x02ff0, 0x02ffb,), # Ideographic Description ..Ideographic Description - (0x03000, 0x0303e,), # Ideographic Space ..Ideographic Variation In + (0x03000, 0x03029,), # Ideographic Space ..Hangzhou Numeral Nine + (0x03030, 0x0303e,), # Wavy Dash ..Ideographic Variation In (0x03041, 0x03096,), # Hiragana Letter Small A ..Hiragana Letter Small Ke - (0x03099, 0x030ff,), # Combining Katakana-hirag..Katakana Digraph Koto + (0x0309b, 0x030ff,), # Katakana-hiragana Voiced..Katakana Digraph Koto (0x03105, 0x0312f,), # Bopomofo Letter B ..Bopomofo Letter Nn (0x03131, 0x0318e,), # Hangul Letter Kiyeok ..Hangul Letter Araeae (0x03190, 0x031e3,), # Ideographic Annotation L..Cjk Stroke Q @@ -1164,8 +1186,7 @@ (0x0fe68, 0x0fe6b,), # Small Reverse Solidus ..Small Commercial At (0x0ff01, 0x0ff60,), # Fullwidth Exclamation Ma..Fullwidth Right White Pa (0x0ffe0, 0x0ffe6,), # Fullwidth Cent Sign ..Fullwidth Won Sign - (0x16fe0, 0x16fe4,), # Tangut Iteration Mark ..Khitan Small Script Fill - (0x16ff0, 0x16ff1,), # Vietnamese Alternate Rea..Vietnamese Alternate Rea + (0x16fe0, 0x16fe3,), # Tangut Iteration Mark ..Old Chinese Iteration Ma (0x17000, 0x187f7,), # (nil) (0x18800, 0x18cd5,), # Tangut Component-001 ..Khitan Small Script Char (0x18d00, 0x18d08,), # (nil) @@ -1193,7 +1214,8 @@ (0x1f3cf, 0x1f3d3,), # Cricket Bat And Ball ..Table Tennis Paddle And (0x1f3e0, 0x1f3f0,), # House Building ..European Castle (0x1f3f4, 0x1f3f4,), # Waving Black Flag - (0x1f3f8, 0x1f43e,), # Badminton Racquet And Sh..Paw Prints + (0x1f3f8, 0x1f3fa,), # Badminton Racquet And Sh..Amphora + (0x1f400, 0x1f43e,), # Rat ..Paw Prints (0x1f440, 0x1f440,), # Eyes (0x1f442, 0x1f4fc,), # Ear ..Videocassette (0x1f4ff, 0x1f53d,), # Prayer Beads ..Down-pointing Small Red @@ -1270,9 +1292,10 @@ (0x02e9b, 0x02ef3,), # Cjk Radical Choke ..Cjk Radical C-simplified (0x02f00, 0x02fd5,), # Kangxi Radical One ..Kangxi Radical Flute (0x02ff0, 0x02ffb,), # Ideographic Description ..Ideographic Description - (0x03000, 0x0303e,), # Ideographic Space ..Ideographic Variation In + (0x03000, 0x03029,), # Ideographic Space ..Hangzhou Numeral Nine + (0x03030, 0x0303e,), # Wavy Dash ..Ideographic Variation In (0x03041, 0x03096,), # Hiragana Letter Small A ..Hiragana Letter Small Ke - (0x03099, 0x030ff,), # Combining Katakana-hirag..Katakana Digraph Koto + (0x0309b, 0x030ff,), # Katakana-hiragana Voiced..Katakana Digraph Koto (0x03105, 0x0312f,), # Bopomofo Letter B ..Bopomofo Letter Nn (0x03131, 0x0318e,), # Hangul Letter Kiyeok ..Hangul Letter Araeae (0x03190, 0x031e3,), # Ideographic Annotation L..Cjk Stroke Q @@ -1290,8 +1313,7 @@ (0x0fe68, 0x0fe6b,), # Small Reverse Solidus ..Small Commercial At (0x0ff01, 0x0ff60,), # Fullwidth Exclamation Ma..Fullwidth Right White Pa (0x0ffe0, 0x0ffe6,), # Fullwidth Cent Sign ..Fullwidth Won Sign - (0x16fe0, 0x16fe4,), # Tangut Iteration Mark ..Khitan Small Script Fill - (0x16ff0, 0x16ff1,), # Vietnamese Alternate Rea..Vietnamese Alternate Rea + (0x16fe0, 0x16fe3,), # Tangut Iteration Mark ..Old Chinese Iteration Ma (0x17000, 0x187f7,), # (nil) (0x18800, 0x18cd5,), # Tangut Component-001 ..Khitan Small Script Char (0x18d00, 0x18d08,), # (nil) @@ -1321,7 +1343,8 @@ (0x1f3cf, 0x1f3d3,), # Cricket Bat And Ball ..Table Tennis Paddle And (0x1f3e0, 0x1f3f0,), # House Building ..European Castle (0x1f3f4, 0x1f3f4,), # Waving Black Flag - (0x1f3f8, 0x1f43e,), # Badminton Racquet And Sh..Paw Prints + (0x1f3f8, 0x1f3fa,), # Badminton Racquet And Sh..Amphora + (0x1f400, 0x1f43e,), # Rat ..Paw Prints (0x1f440, 0x1f440,), # Eyes (0x1f442, 0x1f4fc,), # Ear ..Videocassette (0x1f4ff, 0x1f53d,), # Prayer Beads ..Down-pointing Small Red @@ -1395,9 +1418,10 @@ (0x02e80, 0x02e99,), # Cjk Radical Repeat ..Cjk Radical Rap (0x02e9b, 0x02ef3,), # Cjk Radical Choke ..Cjk Radical C-simplified (0x02f00, 0x02fd5,), # Kangxi Radical One ..Kangxi Radical Flute - (0x02ff0, 0x0303e,), # Ideographic Description ..Ideographic Variation In + (0x02ff0, 0x03029,), # Ideographic Description ..Hangzhou Numeral Nine + (0x03030, 0x0303e,), # Wavy Dash ..Ideographic Variation In (0x03041, 0x03096,), # Hiragana Letter Small A ..Hiragana Letter Small Ke - (0x03099, 0x030ff,), # Combining Katakana-hirag..Katakana Digraph Koto + (0x0309b, 0x030ff,), # Katakana-hiragana Voiced..Katakana Digraph Koto (0x03105, 0x0312f,), # Bopomofo Letter B ..Bopomofo Letter Nn (0x03131, 0x0318e,), # Hangul Letter Kiyeok ..Hangul Letter Araeae (0x03190, 0x031e3,), # Ideographic Annotation L..Cjk Stroke Q @@ -1415,8 +1439,7 @@ (0x0fe68, 0x0fe6b,), # Small Reverse Solidus ..Small Commercial At (0x0ff01, 0x0ff60,), # Fullwidth Exclamation Ma..Fullwidth Right White Pa (0x0ffe0, 0x0ffe6,), # Fullwidth Cent Sign ..Fullwidth Won Sign - (0x16fe0, 0x16fe4,), # Tangut Iteration Mark ..Khitan Small Script Fill - (0x16ff0, 0x16ff1,), # Vietnamese Alternate Rea..Vietnamese Alternate Rea + (0x16fe0, 0x16fe3,), # Tangut Iteration Mark ..Old Chinese Iteration Ma (0x17000, 0x187f7,), # (nil) (0x18800, 0x18cd5,), # Tangut Component-001 ..Khitan Small Script Char (0x18d00, 0x18d08,), # (nil) @@ -1446,7 +1469,8 @@ (0x1f3cf, 0x1f3d3,), # Cricket Bat And Ball ..Table Tennis Paddle And (0x1f3e0, 0x1f3f0,), # House Building ..European Castle (0x1f3f4, 0x1f3f4,), # Waving Black Flag - (0x1f3f8, 0x1f43e,), # Badminton Racquet And Sh..Paw Prints + (0x1f3f8, 0x1f3fa,), # Badminton Racquet And Sh..Amphora + (0x1f400, 0x1f43e,), # Rat ..Paw Prints (0x1f440, 0x1f440,), # Eyes (0x1f442, 0x1f4fc,), # Ear ..Videocassette (0x1f4ff, 0x1f53d,), # Prayer Beads ..Down-pointing Small Red