diff --git a/lib/std/text/unicode.kk b/lib/std/text/unicode.kk index f8ca71531..8a8106e14 100644 --- a/lib/std/text/unicode.kk +++ b/lib/std/text/unicode.kk @@ -125,11 +125,11 @@ pub fun string/width( s : string ) : int { //-------------------------------------------------------------- // These characters are considered wide, i.e. 2 columns wide. -// https://www.unicode.org/Public/15.0.0/ucd/EastAsianWidth.txt +// https://www.unicode.org/Public/16.0.0/ucd/EastAsianWidth.txt // See ranges with postfix ;W // // Update with `python3 util/update-unicode.py -a` -// TODO: Handle 'unassigned' ranges: (Following is an excerpt from https://www.unicode.org/Public/15.0.0/ucd/EastAsianWidth.txt ) +// TODO: Handle 'unassigned' ranges: (Following is an excerpt from https://www.unicode.org/Public/16.0.0/ucd/EastAsianWidth.txt ) // - All code points, assigned or unassigned, that are not listed // explicitly are given the value "N". // - The unassigned code points in the following blocks default to "W": @@ -181,7 +181,7 @@ val asian-wide : delayed = delay{ single(0x2E80,0x2E99), single(0x2E9B,0x2EF3), single(0x2F00,0x2FD5), - single(0x2FF0,0x2FFB), + single(0x2FF0,0x2FFF), single(0x3001,0x3003), single(0x3004,0x3004), single(0x3005,0x3005), @@ -237,7 +237,8 @@ val asian-wide : delayed = delay{ single(0x3192,0x3195), single(0x3196,0x319F), single(0x31A0,0x31BF), - single(0x31C0,0x31E3), + single(0x31C0,0x31E5), + single(0x31EF,0x31EF), single(0x31F0,0x31FF), single(0x3200,0x321E), single(0x3220,0x3229), @@ -314,6 +315,7 @@ val asian-wide : delayed = delay{ single(0x17000,0x187F7), single(0x18800,0x18AFF), single(0x18B00,0x18CD5), + single(0x18CFF,0x18CFF), single(0x18D00,0x18D08), single(0x1AFF0,0x1AFF3), single(0x1AFF5,0x1AFFB), @@ -368,11 +370,10 @@ val asian-wide : delayed = delay{ single(0x1F93C,0x1F945), single(0x1F947,0x1F9FF), single(0x1FA70,0x1FA7C), - single(0x1FA80,0x1FA88), - single(0x1FA90,0x1FABD), - single(0x1FABF,0x1FAC5), - single(0x1FACE,0x1FADB), - single(0x1FAE0,0x1FAE8), + single(0x1FA80,0x1FA89), + single(0x1FA8F,0x1FAC6), + single(0x1FACE,0x1FADC), + single(0x1FADF,0x1FAE9), single(0x1FAF0,0x1FAF8), single(0x20000,0x2A6DF), single(0x2A6E0,0x2A6FF), @@ -383,7 +384,9 @@ val asian-wide : delayed = delay{ single(0x2B820,0x2CEA1), single(0x2CEA2,0x2CEAF), single(0x2CEB0,0x2EBE0), - single(0x2EBE1,0x2F7FF), + single(0x2EBE1,0x2EBEF), + single(0x2EBF0,0x2EE5D), + single(0x2EE5E,0x2F7FF), single(0x2F800,0x2FA1D), single(0x2FA1E,0x2FA1F), single(0x2FA20,0x2FFFD), diff --git a/package.yaml b/package.yaml index d178457eb..f44137541 100644 --- a/package.yaml +++ b/package.yaml @@ -8,6 +8,9 @@ # - support/vscode/koka.language-koka/package.json # - whatsnew.md, readme.md +# Also update unicode asian-width list in `std/text/unicode` +# using the output of `python3 util/update-unicode.py -a` +# after checking for a newer version of the unicode link it fetches from name: koka version: 3.0.5 diff --git a/util/update-unicode.py b/util/update-unicode.py index 948b7f9c6..1acd12c24 100644 --- a/util/update-unicode.py +++ b/util/update-unicode.py @@ -6,13 +6,14 @@ parser.add_argument("-a", "--asian_wide", default=False, action='store_true') args = parser.parse_args() if args.asian_wide: - result = requests.get("https://www.unicode.org/Public/15.0.0/ucd/EastAsianWidth.txt") + # TODO: Check for latest version (right now it is 16.0.0), also two links in documentation in std/text/unicode + result = requests.get("https://www.unicode.org/Public/16.0.0/ucd/EastAsianWidth.txt") for line in result.text.split("\n"): values = line.split(";") if len(values) >= 2: width = values[1].split("#")[0].strip() if width == "W": - charrange = values[0].split("..") + charrange = values[0].strip().split("..") if len(charrange) == 2: print(f" single(0x{charrange[0]},0x{charrange[1]}),") elif len(charrange) == 1: