diff --git a/Makefile b/Makefile index 39f6d65c..4e49c028 100644 --- a/Makefile +++ b/Makefile @@ -2,7 +2,7 @@ include Makefile.bats .NOTPARALLEL: -VERSION=6.2.19 +VERSION=6.3.0 GO_PACKAGE_PREFIX := github.com/clearlinux/mixer-tools GOPATH ?= ${HOME}/go gopath = $(shell go env GOPATH) diff --git a/go.mod b/go.mod index a8c90fab..a971c38a 100644 --- a/go.mod +++ b/go.mod @@ -15,7 +15,7 @@ require ( github.com/inconshreveable/mousetrap v1.1.0 // indirect github.com/mattn/go-runewidth v0.0.15 // indirect github.com/pmezard/go-difflib v1.0.0 // indirect - github.com/rivo/uniseg v0.4.4 // indirect + github.com/rivo/uniseg v0.4.6 // indirect github.com/spf13/pflag v1.0.5 // indirect github.com/stretchr/testify v1.2.2 // indirect ) diff --git a/go.sum b/go.sum index 76074e4a..6a5da3ce 100644 --- a/go.sum +++ b/go.sum @@ -17,8 +17,8 @@ github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINE github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= github.com/rivo/uniseg v0.2.0/go.mod h1:J6wj4VEh+S6ZtnVlnTBMWIodfgj8LQOQFoIToxlJtxc= -github.com/rivo/uniseg v0.4.4 h1:8TfxU8dW6PdqD27gjM8MVNuicgxIjxpm4K7x4jp8sis= -github.com/rivo/uniseg v0.4.4/go.mod h1:FN3SvrM+Zdj16jyLfmOkMNblXMcoc8DfTHruCPUcx88= +github.com/rivo/uniseg v0.4.6 h1:Sovz9sDSwbOz9tgUy8JpT+KgCkPYJEN/oYzlJiYTNLg= +github.com/rivo/uniseg v0.4.6/go.mod h1:FN3SvrM+Zdj16jyLfmOkMNblXMcoc8DfTHruCPUcx88= github.com/russross/blackfriday/v2 v2.1.0/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM= github.com/spf13/cobra v1.8.0 h1:7aJaZx1B85qltLMc546zn58BxxfZdR/W22ej9CFoEf0= github.com/spf13/cobra v1.8.0/go.mod h1:WXLWApfZ71AjXPya3WOlMsY9yMs7YeiHhFVlvLyhcho= diff --git a/vendor/github.com/rivo/uniseg/README.md b/vendor/github.com/rivo/uniseg/README.md index 25e93468..a8191b81 100644 --- a/vendor/github.com/rivo/uniseg/README.md +++ b/vendor/github.com/rivo/uniseg/README.md @@ -3,7 +3,7 @@ [![Go Reference](https://pkg.go.dev/badge/github.com/rivo/uniseg.svg)](https://pkg.go.dev/github.com/rivo/uniseg) [![Go Report](https://img.shields.io/badge/go%20report-A%2B-brightgreen.svg)](https://goreportcard.com/report/github.com/rivo/uniseg) -This Go package implements Unicode Text Segmentation according to [Unicode Standard Annex #29](https://unicode.org/reports/tr29/), Unicode Line Breaking according to [Unicode Standard Annex #14](https://unicode.org/reports/tr14/) (Unicode version 14.0.0), and monospace font string width calculation similar to [wcwidth](https://man7.org/linux/man-pages/man3/wcwidth.3.html). +This Go package implements Unicode Text Segmentation according to [Unicode Standard Annex #29](https://unicode.org/reports/tr29/), Unicode Line Breaking according to [Unicode Standard Annex #14](https://unicode.org/reports/tr14/) (Unicode version 15.0.0), and monospace font string width calculation similar to [wcwidth](https://man7.org/linux/man-pages/man3/wcwidth.3.html). ## Background @@ -73,7 +73,7 @@ for gr.Next() { ### Using the [`Step`](https://pkg.go.dev/github.com/rivo/uniseg#Step) or [`StepString`](https://pkg.go.dev/github.com/rivo/uniseg#StepString) Function -This is orders of magnitude faster than the `Graphemes` class, but it requires the handling of states and boundaries: +This avoids allocating a new `Graphemes` object but it requires the handling of states and boundaries: ```go str := "πŸ‡©πŸ‡ͺπŸ³οΈβ€πŸŒˆ" @@ -88,29 +88,7 @@ for len(str) > 0 { ### Advanced Examples -Breaking into grapheme clusters and evaluating line breaks: - -```go -str := "First line.\nSecond line." -state := -1 -var ( - c string - boundaries int -) -for len(str) > 0 { - c, str, boundaries, state = uniseg.StepString(str, state) - fmt.Print(c) - if boundaries&uniseg.MaskLine == uniseg.LineCanBreak { - fmt.Print("|") - } else if boundaries&uniseg.MaskLine == uniseg.LineMustBreak { - fmt.Print("β€–") - } -} -// First |line. -// β€–Second |line.β€– -``` - -If you're only interested in word segmentation, use [`FirstWord`](https://pkg.go.dev/github.com/rivo/uniseg#FirstWord) or [`FirstWordInString`](https://pkg.go.dev/github.com/rivo/uniseg#FirstWordInString): +The [`Graphemes`](https://pkg.go.dev/github.com/rivo/uniseg#Graphemes) class offers the most convenient way to access all functionality of this package. But in some cases, it may be better to use the specialized functions directly. For example, if you're only interested in word segmentation, use [`FirstWord`](https://pkg.go.dev/github.com/rivo/uniseg#FirstWord) or [`FirstWordInString`](https://pkg.go.dev/github.com/rivo/uniseg#FirstWordInString): ```go str := "Hello, world!" @@ -133,6 +111,8 @@ Similarly, use - [`FirstSentence`](https://pkg.go.dev/github.com/rivo/uniseg#FirstSentence) or [`FirstSentenceInString`](https://pkg.go.dev/github.com/rivo/uniseg#FirstSentenceInString) for sentence segmentation only, and - [`FirstLineSegment`](https://pkg.go.dev/github.com/rivo/uniseg#FirstLineSegment) or [`FirstLineSegmentInString`](https://pkg.go.dev/github.com/rivo/uniseg#FirstLineSegmentInString) for line breaking / word wrapping (although using [`Step`](https://pkg.go.dev/github.com/rivo/uniseg#Step) or [`StepString`](https://pkg.go.dev/github.com/rivo/uniseg#StepString) is preferred as it will observe grapheme cluster boundaries). +If you're only interested in the width of characters, use [`FirstGraphemeCluster`](https://pkg.go.dev/github.com/rivo/uniseg#FirstGraphemeCluster) or [`FirstGraphemeClusterInString`](https://pkg.go.dev/github.com/rivo/uniseg#FirstGraphemeClusterInString). It is much faster than using [`Step`](https://pkg.go.dev/github.com/rivo/uniseg#Step), [`StepString`](https://pkg.go.dev/github.com/rivo/uniseg#StepString), or the [`Graphemes`](https://pkg.go.dev/github.com/rivo/uniseg#Graphemes) class because it does not include the logic for word / sentence / line boundaries. + Finally, if you need to reverse a string while preserving grapheme clusters, use [`ReverseString`](https://pkg.go.dev/github.com/rivo/uniseg#ReverseString): ```go diff --git a/vendor/github.com/rivo/uniseg/eastasianwidth.go b/vendor/github.com/rivo/uniseg/eastasianwidth.go index 661934ac..5fc54d99 100644 --- a/vendor/github.com/rivo/uniseg/eastasianwidth.go +++ b/vendor/github.com/rivo/uniseg/eastasianwidth.go @@ -1,13 +1,13 @@ -package uniseg - // Code generated via go generate from gen_properties.go. DO NOT EDIT. +package uniseg + // eastAsianWidth are taken from -// https://www.unicode.org/Public/14.0.0/ucd/EastAsianWidth.txt +// https://www.unicode.org/Public/15.0.0/ucd/EastAsianWidth.txt // and -// https://unicode.org/Public/14.0.0/ucd/emoji/emoji-data.txt +// https://unicode.org/Public/15.0.0/ucd/emoji/emoji-data.txt // ("Extended_Pictographic" only) -// on September 10, 2022. See https://www.unicode.org/license.html for the Unicode +// on September 5, 2023. See https://www.unicode.org/license.html for the Unicode // license agreement. var eastAsianWidth = [][3]int{ {0x0000, 0x001F, prN}, // Cc [32] .. @@ -504,6 +504,7 @@ var eastAsianWidth = [][3]int{ {0x0CE2, 0x0CE3, prN}, // Mn [2] KANNADA VOWEL SIGN VOCALIC L..KANNADA VOWEL SIGN VOCALIC LL {0x0CE6, 0x0CEF, prN}, // Nd [10] KANNADA DIGIT ZERO..KANNADA DIGIT NINE {0x0CF1, 0x0CF2, prN}, // Lo [2] KANNADA SIGN JIHVAMULIYA..KANNADA SIGN UPADHMANIYA + {0x0CF3, 0x0CF3, prN}, // Mc KANNADA SIGN COMBINING ANUSVARA ABOVE RIGHT {0x0D00, 0x0D01, prN}, // Mn [2] MALAYALAM SIGN COMBINING ANUSVARA ABOVE..MALAYALAM SIGN CANDRABINDU {0x0D02, 0x0D03, prN}, // Mc [2] MALAYALAM SIGN ANUSVARA..MALAYALAM SIGN VISARGA {0x0D04, 0x0D0C, prN}, // Lo [9] MALAYALAM LETTER VEDIC ANUSVARA..MALAYALAM LETTER VOCALIC L @@ -565,7 +566,7 @@ var eastAsianWidth = [][3]int{ {0x0EBD, 0x0EBD, prN}, // Lo LAO SEMIVOWEL SIGN NYO {0x0EC0, 0x0EC4, prN}, // Lo [5] LAO VOWEL SIGN E..LAO VOWEL SIGN AI {0x0EC6, 0x0EC6, prN}, // Lm LAO KO LA - {0x0EC8, 0x0ECD, prN}, // Mn [6] LAO TONE MAI EK..LAO NIGGAHITA + {0x0EC8, 0x0ECE, prN}, // Mn [7] LAO TONE MAI EK..LAO YAMAKKAN {0x0ED0, 0x0ED9, prN}, // Nd [10] LAO DIGIT ZERO..LAO DIGIT NINE {0x0EDC, 0x0EDF, prN}, // Lo [4] LAO HO NO..LAO LETTER KHMU NYO {0x0F00, 0x0F00, prN}, // Lo TIBETAN SYLLABLE OM @@ -1916,6 +1917,7 @@ var eastAsianWidth = [][3]int{ {0x10EAB, 0x10EAC, prN}, // Mn [2] YEZIDI COMBINING HAMZA MARK..YEZIDI COMBINING MADDA MARK {0x10EAD, 0x10EAD, prN}, // Pd YEZIDI HYPHENATION MARK {0x10EB0, 0x10EB1, prN}, // Lo [2] YEZIDI LETTER LAM WITH DOT ABOVE..YEZIDI LETTER YOT WITH CIRCUMFLEX ABOVE + {0x10EFD, 0x10EFF, prN}, // Mn [3] ARABIC SMALL LOW WORD SAKTA..ARABIC SMALL LOW WORD MADDA {0x10F00, 0x10F1C, prN}, // Lo [29] OLD SOGDIAN LETTER ALEPH..OLD SOGDIAN LETTER FINAL TAW WITH VERTICAL TAIL {0x10F1D, 0x10F26, prN}, // No [10] OLD SOGDIAN NUMBER ONE..OLD SOGDIAN FRACTION ONE HALF {0x10F27, 0x10F27, prN}, // Lo OLD SOGDIAN LIGATURE AYIN-DALETH @@ -1998,6 +2000,8 @@ var eastAsianWidth = [][3]int{ {0x11236, 0x11237, prN}, // Mn [2] KHOJKI SIGN NUKTA..KHOJKI SIGN SHADDA {0x11238, 0x1123D, prN}, // Po [6] KHOJKI DANDA..KHOJKI ABBREVIATION SIGN {0x1123E, 0x1123E, prN}, // Mn KHOJKI SIGN SUKUN + {0x1123F, 0x11240, prN}, // Lo [2] KHOJKI LETTER QA..KHOJKI LETTER SHORT I + {0x11241, 0x11241, prN}, // Mn KHOJKI VOWEL SIGN VOCALIC R {0x11280, 0x11286, prN}, // Lo [7] MULTANI LETTER A..MULTANI LETTER GA {0x11288, 0x11288, prN}, // Lo MULTANI LETTER GHA {0x1128A, 0x1128D, prN}, // Lo [4] MULTANI LETTER CA..MULTANI LETTER JJA @@ -2160,6 +2164,7 @@ var eastAsianWidth = [][3]int{ {0x11A9E, 0x11AA2, prN}, // Po [5] SOYOMBO HEAD MARK WITH MOON AND SUN AND TRIPLE FLAME..SOYOMBO TERMINAL MARK-2 {0x11AB0, 0x11ABF, prN}, // Lo [16] CANADIAN SYLLABICS NATTILIK HI..CANADIAN SYLLABICS SPA {0x11AC0, 0x11AF8, prN}, // Lo [57] PAU CIN HAU LETTER PA..PAU CIN HAU GLOTTAL STOP FINAL + {0x11B00, 0x11B09, prN}, // Po [10] DEVANAGARI HEAD MARK..DEVANAGARI SIGN MINDU {0x11C00, 0x11C08, prN}, // Lo [9] BHAIKSUKI LETTER A..BHAIKSUKI LETTER VOCALIC L {0x11C0A, 0x11C2E, prN}, // Lo [37] BHAIKSUKI LETTER E..BHAIKSUKI LETTER HA {0x11C2F, 0x11C2F, prN}, // Mc BHAIKSUKI VOWEL SIGN AA @@ -2205,6 +2210,19 @@ var eastAsianWidth = [][3]int{ {0x11EF3, 0x11EF4, prN}, // Mn [2] MAKASAR VOWEL SIGN I..MAKASAR VOWEL SIGN U {0x11EF5, 0x11EF6, prN}, // Mc [2] MAKASAR VOWEL SIGN E..MAKASAR VOWEL SIGN O {0x11EF7, 0x11EF8, prN}, // Po [2] MAKASAR PASSIMBANG..MAKASAR END OF SECTION + {0x11F00, 0x11F01, prN}, // Mn [2] KAWI SIGN CANDRABINDU..KAWI SIGN ANUSVARA + {0x11F02, 0x11F02, prN}, // Lo KAWI SIGN REPHA + {0x11F03, 0x11F03, prN}, // Mc KAWI SIGN VISARGA + {0x11F04, 0x11F10, prN}, // Lo [13] KAWI LETTER A..KAWI LETTER O + {0x11F12, 0x11F33, prN}, // Lo [34] KAWI LETTER KA..KAWI LETTER JNYA + {0x11F34, 0x11F35, prN}, // Mc [2] KAWI VOWEL SIGN AA..KAWI VOWEL SIGN ALTERNATE AA + {0x11F36, 0x11F3A, prN}, // Mn [5] KAWI VOWEL SIGN I..KAWI VOWEL SIGN VOCALIC R + {0x11F3E, 0x11F3F, prN}, // Mc [2] KAWI VOWEL SIGN E..KAWI VOWEL SIGN AI + {0x11F40, 0x11F40, prN}, // Mn KAWI VOWEL SIGN EU + {0x11F41, 0x11F41, prN}, // Mc KAWI SIGN KILLER + {0x11F42, 0x11F42, prN}, // Mn KAWI CONJOINER + {0x11F43, 0x11F4F, prN}, // Po [13] KAWI DANDA..KAWI PUNCTUATION CLOSING SPIRAL + {0x11F50, 0x11F59, prN}, // Nd [10] KAWI DIGIT ZERO..KAWI DIGIT NINE {0x11FB0, 0x11FB0, prN}, // Lo LISU LETTER YHA {0x11FC0, 0x11FD4, prN}, // No [21] TAMIL FRACTION ONE THREE-HUNDRED-AND-TWENTIETH..TAMIL FRACTION DOWNSCALING FACTOR KIIZH {0x11FD5, 0x11FDC, prN}, // So [8] TAMIL SIGN NEL..TAMIL SIGN MUKKURUNI @@ -2217,8 +2235,11 @@ var eastAsianWidth = [][3]int{ {0x12480, 0x12543, prN}, // Lo [196] CUNEIFORM SIGN AB TIMES NUN TENU..CUNEIFORM SIGN ZU5 TIMES THREE DISH TENU {0x12F90, 0x12FF0, prN}, // Lo [97] CYPRO-MINOAN SIGN CM001..CYPRO-MINOAN SIGN CM114 {0x12FF1, 0x12FF2, prN}, // Po [2] CYPRO-MINOAN SIGN CM301..CYPRO-MINOAN SIGN CM302 - {0x13000, 0x1342E, prN}, // Lo [1071] EGYPTIAN HIEROGLYPH A001..EGYPTIAN HIEROGLYPH AA032 - {0x13430, 0x13438, prN}, // Cf [9] EGYPTIAN HIEROGLYPH VERTICAL JOINER..EGYPTIAN HIEROGLYPH END SEGMENT + {0x13000, 0x1342F, prN}, // Lo [1072] EGYPTIAN HIEROGLYPH A001..EGYPTIAN HIEROGLYPH V011D + {0x13430, 0x1343F, prN}, // Cf [16] EGYPTIAN HIEROGLYPH VERTICAL JOINER..EGYPTIAN HIEROGLYPH END WALLED ENCLOSURE + {0x13440, 0x13440, prN}, // Mn EGYPTIAN HIEROGLYPH MIRROR HORIZONTALLY + {0x13441, 0x13446, prN}, // Lo [6] EGYPTIAN HIEROGLYPH FULL BLANK..EGYPTIAN HIEROGLYPH WIDE LOST SIGN + {0x13447, 0x13455, prN}, // Mn [15] EGYPTIAN HIEROGLYPH MODIFIER DAMAGED AT TOP START..EGYPTIAN HIEROGLYPH MODIFIER DAMAGED {0x14400, 0x14646, prN}, // Lo [583] ANATOLIAN HIEROGLYPH A001..ANATOLIAN HIEROGLYPH A530 {0x16800, 0x16A38, prN}, // Lo [569] BAMUM LETTER PHASE-A NGKUE MFON..BAMUM LETTER PHASE-F VUEQ {0x16A40, 0x16A5E, prN}, // Lo [31] MRO LETTER TA..MRO LETTER TEK @@ -2263,7 +2284,9 @@ var eastAsianWidth = [][3]int{ {0x1AFFD, 0x1AFFE, prW}, // Lm [2] KATAKANA LETTER MINNAN NASALIZED TONE-7..KATAKANA LETTER MINNAN NASALIZED TONE-8 {0x1B000, 0x1B0FF, prW}, // Lo [256] KATAKANA LETTER ARCHAIC E..HENTAIGANA LETTER RE-2 {0x1B100, 0x1B122, prW}, // Lo [35] HENTAIGANA LETTER RE-3..KATAKANA LETTER ARCHAIC WU + {0x1B132, 0x1B132, prW}, // Lo HIRAGANA LETTER SMALL KO {0x1B150, 0x1B152, prW}, // Lo [3] HIRAGANA LETTER SMALL WI..HIRAGANA LETTER SMALL WO + {0x1B155, 0x1B155, prW}, // Lo KATAKANA LETTER SMALL KO {0x1B164, 0x1B167, prW}, // Lo [4] KATAKANA LETTER SMALL WI..KATAKANA LETTER SMALL N {0x1B170, 0x1B2FB, prW}, // Lo [396] NUSHU CHARACTER-1B170..NUSHU CHARACTER-1B2FB {0x1BC00, 0x1BC6A, prN}, // Lo [107] DUPLOYAN LETTER H..DUPLOYAN LETTER VOCALIC M @@ -2294,6 +2317,7 @@ var eastAsianWidth = [][3]int{ {0x1D200, 0x1D241, prN}, // So [66] GREEK VOCAL NOTATION SYMBOL-1..GREEK INSTRUMENTAL NOTATION SYMBOL-54 {0x1D242, 0x1D244, prN}, // Mn [3] COMBINING GREEK MUSICAL TRISEME..COMBINING GREEK MUSICAL PENTASEME {0x1D245, 0x1D245, prN}, // So GREEK MUSICAL LEIMMA + {0x1D2C0, 0x1D2D3, prN}, // No [20] KAKTOVIK NUMERAL ZERO..KAKTOVIK NUMERAL NINETEEN {0x1D2E0, 0x1D2F3, prN}, // No [20] MAYAN NUMERAL ZERO..MAYAN NUMERAL NINETEEN {0x1D300, 0x1D356, prN}, // So [87] MONOGRAM FOR EARTH..TETRAGRAM FOR FOSTERING {0x1D360, 0x1D378, prN}, // No [25] COUNTING ROD UNIT DIGIT ONE..TALLY MARK FIVE @@ -2353,11 +2377,14 @@ var eastAsianWidth = [][3]int{ {0x1DF00, 0x1DF09, prN}, // Ll [10] LATIN SMALL LETTER FENG DIGRAPH WITH TRILL..LATIN SMALL LETTER T WITH HOOK AND RETROFLEX HOOK {0x1DF0A, 0x1DF0A, prN}, // Lo LATIN LETTER RETROFLEX CLICK WITH RETROFLEX HOOK {0x1DF0B, 0x1DF1E, prN}, // Ll [20] LATIN SMALL LETTER ESH WITH DOUBLE BAR..LATIN SMALL LETTER S WITH CURL + {0x1DF25, 0x1DF2A, prN}, // Ll [6] LATIN SMALL LETTER D WITH MID-HEIGHT LEFT HOOK..LATIN SMALL LETTER T WITH MID-HEIGHT LEFT HOOK {0x1E000, 0x1E006, prN}, // Mn [7] COMBINING GLAGOLITIC LETTER AZU..COMBINING GLAGOLITIC LETTER ZHIVETE {0x1E008, 0x1E018, prN}, // Mn [17] COMBINING GLAGOLITIC LETTER ZEMLJA..COMBINING GLAGOLITIC LETTER HERU {0x1E01B, 0x1E021, prN}, // Mn [7] COMBINING GLAGOLITIC LETTER SHTA..COMBINING GLAGOLITIC LETTER YATI {0x1E023, 0x1E024, prN}, // Mn [2] COMBINING GLAGOLITIC LETTER YU..COMBINING GLAGOLITIC LETTER SMALL YUS {0x1E026, 0x1E02A, prN}, // Mn [5] COMBINING GLAGOLITIC LETTER YO..COMBINING GLAGOLITIC LETTER FITA + {0x1E030, 0x1E06D, prN}, // Lm [62] MODIFIER LETTER CYRILLIC SMALL A..MODIFIER LETTER CYRILLIC SMALL STRAIGHT U WITH STROKE + {0x1E08F, 0x1E08F, prN}, // Mn COMBINING CYRILLIC SMALL LETTER BYELORUSSIAN-UKRAINIAN I {0x1E100, 0x1E12C, prN}, // Lo [45] NYIAKENG PUACHUE HMONG LETTER MA..NYIAKENG PUACHUE HMONG LETTER W {0x1E130, 0x1E136, prN}, // Mn [7] NYIAKENG PUACHUE HMONG TONE-B..NYIAKENG PUACHUE HMONG TONE-D {0x1E137, 0x1E13D, prN}, // Lm [7] NYIAKENG PUACHUE HMONG SIGN FOR PERSON..NYIAKENG PUACHUE HMONG SYLLABLE LENGTHENER @@ -2370,6 +2397,10 @@ var eastAsianWidth = [][3]int{ {0x1E2EC, 0x1E2EF, prN}, // Mn [4] WANCHO TONE TUP..WANCHO TONE KOINI {0x1E2F0, 0x1E2F9, prN}, // Nd [10] WANCHO DIGIT ZERO..WANCHO DIGIT NINE {0x1E2FF, 0x1E2FF, prN}, // Sc WANCHO NGUN SIGN + {0x1E4D0, 0x1E4EA, prN}, // Lo [27] NAG MUNDARI LETTER O..NAG MUNDARI LETTER ELL + {0x1E4EB, 0x1E4EB, prN}, // Lm NAG MUNDARI SIGN OJOD + {0x1E4EC, 0x1E4EF, prN}, // Mn [4] NAG MUNDARI SIGN MUHOR..NAG MUNDARI SIGN SUTUH + {0x1E4F0, 0x1E4F9, prN}, // Nd [10] NAG MUNDARI DIGIT ZERO..NAG MUNDARI DIGIT NINE {0x1E7E0, 0x1E7E6, prN}, // Lo [7] ETHIOPIC SYLLABLE HHYA..ETHIOPIC SYLLABLE HHYO {0x1E7E8, 0x1E7EB, prN}, // Lo [4] ETHIOPIC SYLLABLE GURAGE HHWA..ETHIOPIC SYLLABLE HHWE {0x1E7ED, 0x1E7EE, prN}, // Lo [2] ETHIOPIC SYLLABLE GURAGE MWI..ETHIOPIC SYLLABLE GURAGE MWEE @@ -2498,13 +2529,14 @@ var eastAsianWidth = [][3]int{ {0x1F6D0, 0x1F6D2, prW}, // So [3] PLACE OF WORSHIP..SHOPPING TROLLEY {0x1F6D3, 0x1F6D4, prN}, // So [2] STUPA..PAGODA {0x1F6D5, 0x1F6D7, prW}, // So [3] HINDU TEMPLE..ELEVATOR - {0x1F6DD, 0x1F6DF, prW}, // So [3] PLAYGROUND SLIDE..RING BUOY + {0x1F6DC, 0x1F6DF, prW}, // So [4] WIRELESS..RING BUOY {0x1F6E0, 0x1F6EA, prN}, // So [11] HAMMER AND WRENCH..NORTHEAST-POINTING AIRPLANE {0x1F6EB, 0x1F6EC, prW}, // So [2] AIRPLANE DEPARTURE..AIRPLANE ARRIVING {0x1F6F0, 0x1F6F3, prN}, // So [4] SATELLITE..PASSENGER SHIP {0x1F6F4, 0x1F6FC, prW}, // So [9] SCOOTER..ROLLER SKATE - {0x1F700, 0x1F773, prN}, // So [116] ALCHEMICAL SYMBOL FOR QUINTESSENCE..ALCHEMICAL SYMBOL FOR HALF OUNCE - {0x1F780, 0x1F7D8, prN}, // So [89] BLACK LEFT-POINTING ISOSCELES RIGHT TRIANGLE..NEGATIVE CIRCLED SQUARE + {0x1F700, 0x1F776, prN}, // So [119] ALCHEMICAL SYMBOL FOR QUINTESSENCE..LUNAR ECLIPSE + {0x1F77B, 0x1F77F, prN}, // So [5] HAUMEA..ORCUS + {0x1F780, 0x1F7D9, prN}, // So [90] BLACK LEFT-POINTING ISOSCELES RIGHT TRIANGLE..NINE POINTED WHITE STAR {0x1F7E0, 0x1F7EB, prW}, // So [12] LARGE ORANGE CIRCLE..LARGE BROWN SQUARE {0x1F7F0, 0x1F7F0, prW}, // So HEAVY EQUALS SIGN {0x1F800, 0x1F80B, prN}, // So [12] LEFTWARDS ARROW WITH SMALL TRIANGLE ARROWHEAD..DOWNWARDS ARROW WITH LARGE TRIANGLE ARROWHEAD @@ -2521,22 +2553,20 @@ var eastAsianWidth = [][3]int{ {0x1F947, 0x1F9FF, prW}, // So [185] FIRST PLACE MEDAL..NAZAR AMULET {0x1FA00, 0x1FA53, prN}, // So [84] NEUTRAL CHESS KING..BLACK CHESS KNIGHT-BISHOP {0x1FA60, 0x1FA6D, prN}, // So [14] XIANGQI RED GENERAL..XIANGQI BLACK SOLDIER - {0x1FA70, 0x1FA74, prW}, // So [5] BALLET SHOES..THONG SANDAL - {0x1FA78, 0x1FA7C, prW}, // So [5] DROP OF BLOOD..CRUTCH - {0x1FA80, 0x1FA86, prW}, // So [7] YO-YO..NESTING DOLLS - {0x1FA90, 0x1FAAC, prW}, // So [29] RINGED PLANET..HAMSA - {0x1FAB0, 0x1FABA, prW}, // So [11] FLY..NEST WITH EGGS - {0x1FAC0, 0x1FAC5, prW}, // So [6] ANATOMICAL HEART..PERSON WITH CROWN - {0x1FAD0, 0x1FAD9, prW}, // So [10] BLUEBERRIES..JAR - {0x1FAE0, 0x1FAE7, prW}, // So [8] MELTING FACE..BUBBLES - {0x1FAF0, 0x1FAF6, prW}, // So [7] HAND WITH INDEX FINGER AND THUMB CROSSED..HEART HANDS + {0x1FA70, 0x1FA7C, prW}, // So [13] BALLET SHOES..CRUTCH + {0x1FA80, 0x1FA88, prW}, // So [9] YO-YO..FLUTE + {0x1FA90, 0x1FABD, prW}, // So [46] RINGED PLANET..WING + {0x1FABF, 0x1FAC5, prW}, // So [7] GOOSE..PERSON WITH CROWN + {0x1FACE, 0x1FADB, prW}, // So [14] MOOSE..PEA POD + {0x1FAE0, 0x1FAE8, prW}, // So [9] MELTING FACE..SHAKING FACE + {0x1FAF0, 0x1FAF8, prW}, // So [9] HAND WITH INDEX FINGER AND THUMB CROSSED..RIGHTWARDS PUSHING HAND {0x1FB00, 0x1FB92, prN}, // So [147] BLOCK SEXTANT-1..UPPER HALF INVERSE MEDIUM SHADE AND LOWER HALF BLOCK {0x1FB94, 0x1FBCA, prN}, // So [55] LEFT HALF INVERSE MEDIUM SHADE AND RIGHT HALF BLOCK..WHITE UP-POINTING CHEVRON {0x1FBF0, 0x1FBF9, prN}, // Nd [10] SEGMENTED DIGIT ZERO..SEGMENTED DIGIT NINE {0x20000, 0x2A6DF, prW}, // Lo [42720] CJK UNIFIED IDEOGRAPH-20000..CJK UNIFIED IDEOGRAPH-2A6DF {0x2A6E0, 0x2A6FF, prW}, // Cn [32] .. - {0x2A700, 0x2B738, prW}, // Lo [4153] CJK UNIFIED IDEOGRAPH-2A700..CJK UNIFIED IDEOGRAPH-2B738 - {0x2B739, 0x2B73F, prW}, // Cn [7] .. + {0x2A700, 0x2B739, prW}, // Lo [4154] CJK UNIFIED IDEOGRAPH-2A700..CJK UNIFIED IDEOGRAPH-2B739 + {0x2B73A, 0x2B73F, prW}, // Cn [6] .. {0x2B740, 0x2B81D, prW}, // Lo [222] CJK UNIFIED IDEOGRAPH-2B740..CJK UNIFIED IDEOGRAPH-2B81D {0x2B81E, 0x2B81F, prW}, // Cn [2] .. {0x2B820, 0x2CEA1, prW}, // Lo [5762] CJK UNIFIED IDEOGRAPH-2B820..CJK UNIFIED IDEOGRAPH-2CEA1 @@ -2547,7 +2577,9 @@ var eastAsianWidth = [][3]int{ {0x2FA1E, 0x2FA1F, prW}, // Cn [2] .. {0x2FA20, 0x2FFFD, prW}, // Cn [1502] .. {0x30000, 0x3134A, prW}, // Lo [4939] CJK UNIFIED IDEOGRAPH-30000..CJK UNIFIED IDEOGRAPH-3134A - {0x3134B, 0x3FFFD, prW}, // Cn [60595] .. + {0x3134B, 0x3134F, prW}, // Cn [5] .. + {0x31350, 0x323AF, prW}, // Lo [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF + {0x323B0, 0x3FFFD, prW}, // Cn [56398] .. {0xE0001, 0xE0001, prN}, // Cf LANGUAGE TAG {0xE0020, 0xE007F, prN}, // Cf [96] TAG SPACE..CANCEL TAG {0xE0100, 0xE01EF, prA}, // Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 diff --git a/vendor/github.com/rivo/uniseg/emojipresentation.go b/vendor/github.com/rivo/uniseg/emojipresentation.go index fd0f7451..9b5f499c 100644 --- a/vendor/github.com/rivo/uniseg/emojipresentation.go +++ b/vendor/github.com/rivo/uniseg/emojipresentation.go @@ -1,13 +1,13 @@ -package uniseg - // Code generated via go generate from gen_properties.go. DO NOT EDIT. +package uniseg + // emojiPresentation are taken from // // and -// https://unicode.org/Public/14.0.0/ucd/emoji/emoji-data.txt +// https://unicode.org/Public/15.0.0/ucd/emoji/emoji-data.txt // ("Extended_Pictographic" only) -// on September 10, 2022. See https://www.unicode.org/license.html for the Unicode +// on September 5, 2023. See https://www.unicode.org/license.html for the Unicode // license agreement. var emojiPresentation = [][3]int{ {0x231A, 0x231B, prEmojiPresentation}, // E0.6 [2] (⌚..βŒ›) watch..hourglass done @@ -211,6 +211,7 @@ var emojiPresentation = [][3]int{ {0x1F6D1, 0x1F6D2, prEmojiPresentation}, // E3.0 [2] (πŸ›‘..πŸ›’) stop sign..shopping cart {0x1F6D5, 0x1F6D5, prEmojiPresentation}, // E12.0 [1] (πŸ›•) hindu temple {0x1F6D6, 0x1F6D7, prEmojiPresentation}, // E13.0 [2] (πŸ›–..πŸ›—) hut..elevator + {0x1F6DC, 0x1F6DC, prEmojiPresentation}, // E15.0 [1] (πŸ›œ) wireless {0x1F6DD, 0x1F6DF, prEmojiPresentation}, // E14.0 [3] (πŸ›..πŸ›Ÿ) playground slide..ring buoy {0x1F6EB, 0x1F6EC, prEmojiPresentation}, // E1.0 [2] (πŸ›«..πŸ›¬) airplane departure..airplane arrival {0x1F6F4, 0x1F6F6, prEmojiPresentation}, // E3.0 [3] (πŸ›΄..πŸ›Ά) kick scooter..canoe @@ -267,19 +268,28 @@ var emojiPresentation = [][3]int{ {0x1F9E7, 0x1F9FF, prEmojiPresentation}, // E11.0 [25] (🧧..🧿) red envelope..nazar amulet {0x1FA70, 0x1FA73, prEmojiPresentation}, // E12.0 [4] (🩰..🩳) ballet shoes..shorts {0x1FA74, 0x1FA74, prEmojiPresentation}, // E13.0 [1] (🩴) thong sandal + {0x1FA75, 0x1FA77, prEmojiPresentation}, // E15.0 [3] (🩡..🩷) light blue heart..pink heart {0x1FA78, 0x1FA7A, prEmojiPresentation}, // E12.0 [3] (🩸..🩺) drop of blood..stethoscope {0x1FA7B, 0x1FA7C, prEmojiPresentation}, // E14.0 [2] (🩻..🩼) x-ray..crutch {0x1FA80, 0x1FA82, prEmojiPresentation}, // E12.0 [3] (πŸͺ€..πŸͺ‚) yo-yo..parachute {0x1FA83, 0x1FA86, prEmojiPresentation}, // E13.0 [4] (πŸͺƒ..πŸͺ†) boomerang..nesting dolls + {0x1FA87, 0x1FA88, prEmojiPresentation}, // E15.0 [2] (πŸͺ‡..πŸͺˆ) maracas..flute {0x1FA90, 0x1FA95, prEmojiPresentation}, // E12.0 [6] (πŸͺ..πŸͺ•) ringed planet..banjo {0x1FA96, 0x1FAA8, prEmojiPresentation}, // E13.0 [19] (πŸͺ–..πŸͺ¨) military helmet..rock {0x1FAA9, 0x1FAAC, prEmojiPresentation}, // E14.0 [4] (πŸͺ©..πŸͺ¬) mirror ball..hamsa + {0x1FAAD, 0x1FAAF, prEmojiPresentation}, // E15.0 [3] (πŸͺ­..πŸͺ―) folding hand fan..khanda {0x1FAB0, 0x1FAB6, prEmojiPresentation}, // E13.0 [7] (πŸͺ°..πŸͺΆ) fly..feather {0x1FAB7, 0x1FABA, prEmojiPresentation}, // E14.0 [4] (πŸͺ·..πŸͺΊ) lotus..nest with eggs + {0x1FABB, 0x1FABD, prEmojiPresentation}, // E15.0 [3] (πŸͺ»..πŸͺ½) hyacinth..wing + {0x1FABF, 0x1FABF, prEmojiPresentation}, // E15.0 [1] (πŸͺΏ) goose {0x1FAC0, 0x1FAC2, prEmojiPresentation}, // E13.0 [3] (πŸ«€..πŸ«‚) anatomical heart..people hugging {0x1FAC3, 0x1FAC5, prEmojiPresentation}, // E14.0 [3] (πŸ«ƒ..πŸ«…) pregnant man..person with crown + {0x1FACE, 0x1FACF, prEmojiPresentation}, // E15.0 [2] (🫎..🫏) moose..donkey {0x1FAD0, 0x1FAD6, prEmojiPresentation}, // E13.0 [7] (🫐..πŸ«–) blueberries..teapot {0x1FAD7, 0x1FAD9, prEmojiPresentation}, // E14.0 [3] (πŸ«—..πŸ«™) pouring liquid..jar + {0x1FADA, 0x1FADB, prEmojiPresentation}, // E15.0 [2] (🫚..πŸ«›) ginger root..pea pod {0x1FAE0, 0x1FAE7, prEmojiPresentation}, // E14.0 [8] (🫠..🫧) melting face..bubbles + {0x1FAE8, 0x1FAE8, prEmojiPresentation}, // E15.0 [1] (🫨) shaking face {0x1FAF0, 0x1FAF6, prEmojiPresentation}, // E14.0 [7] (🫰..🫢) hand with index finger and thumb crossed..heart hands + {0x1FAF7, 0x1FAF8, prEmojiPresentation}, // E15.0 [2] (🫷..🫸) leftwards pushing hand..rightwards pushing hand } diff --git a/vendor/github.com/rivo/uniseg/gen_breaktest.go b/vendor/github.com/rivo/uniseg/gen_breaktest.go index e613c4cd..6bfbeb5e 100644 --- a/vendor/github.com/rivo/uniseg/gen_breaktest.go +++ b/vendor/github.com/rivo/uniseg/gen_breaktest.go @@ -32,7 +32,7 @@ import ( // We want to test against a specific version rather than the latest. When the // package is upgraded to a new version, change these to generate new tests. const ( - testCaseURL = `https://www.unicode.org/Public/14.0.0/ucd/auxiliary/%s.txt` + testCaseURL = `https://www.unicode.org/Public/15.0.0/ucd/auxiliary/%s.txt` ) func main() { @@ -76,9 +76,9 @@ func parse(url string) ([]byte, error) { buf := new(bytes.Buffer) buf.Grow(120 << 10) - buf.WriteString(`package uniseg + buf.WriteString(`// Code generated via go generate from gen_breaktest.go. DO NOT EDIT. -// Code generated via go generate from gen_breaktest.go. DO NOT EDIT. +package uniseg // ` + os.Args[3] + ` are Grapheme testcases taken from // ` + url + ` @@ -136,7 +136,9 @@ var ( // // E.g.Β for the inputΒ b="Γ· 0020 Γ— 0308 Γ· 1F1E6 Γ·" // it will append -// "\u0020\u0308\U0001F1E6" +// +// "\u0020\u0308\U0001F1E6" +// // and "[][]rune{{0x0020,0x0308},{0x1F1E6},}" // to orig and exp respectively. // diff --git a/vendor/github.com/rivo/uniseg/gen_properties.go b/vendor/github.com/rivo/uniseg/gen_properties.go index 999d5efd..8992d2c5 100644 --- a/vendor/github.com/rivo/uniseg/gen_properties.go +++ b/vendor/github.com/rivo/uniseg/gen_properties.go @@ -41,8 +41,8 @@ import ( // We want to test against a specific version rather than the latest. When the // package is upgraded to a new version, change these to generate new tests. const ( - propertyURL = `https://www.unicode.org/Public/14.0.0/ucd/%s.txt` - emojiURL = `https://unicode.org/Public/14.0.0/ucd/emoji/emoji-data.txt` + propertyURL = `https://www.unicode.org/Public/15.0.0/ucd/%s.txt` + emojiURL = `https://unicode.org/Public/15.0.0/ucd/emoji/emoji-data.txt` ) // The regular expression for a line containing a code point range property. @@ -178,6 +178,11 @@ func parse(propertyURL, emojiProperty string, includeGeneralCategory bool) (stri } } + // Avoid overflow during binary search. + if len(properties) >= 1<<31 { + return "", errors.New("too many properties") + } + // Sort properties. sort.Slice(properties, func(i, j int) bool { left, _ := strconv.ParseUint(properties[i][0], 16, 64) @@ -200,9 +205,9 @@ func parse(propertyURL, emojiProperty string, includeGeneralCategory bool) (stri // ` + emojiURL + ` // ("Extended_Pictographic" only)` } - buf.WriteString(`package uniseg + buf.WriteString(`// Code generated via go generate from gen_properties.go. DO NOT EDIT. -// Code generated via go generate from gen_properties.go. DO NOT EDIT. +package uniseg // ` + os.Args[3] + ` are taken from // ` + propertyURL + emojiComment + ` diff --git a/vendor/github.com/rivo/uniseg/grapheme.go b/vendor/github.com/rivo/uniseg/grapheme.go index 0086fc1b..a0bcc554 100644 --- a/vendor/github.com/rivo/uniseg/grapheme.go +++ b/vendor/github.com/rivo/uniseg/grapheme.go @@ -222,7 +222,7 @@ func FirstGraphemeCluster(b []byte, state int) (cluster, rest []byte, width, new if len(b) <= length { // If we're already past the end, there is nothing else to parse. var prop int if state < 0 { - prop = property(graphemeCodePoints, r) + prop = propertyGraphemes(r) } else { prop = state >> shiftGraphemePropState } @@ -284,7 +284,7 @@ func FirstGraphemeClusterInString(str string, state int) (cluster, rest string, if len(str) <= length { // If we're already past the end, there is nothing else to parse. var prop int if state < 0 { - prop = property(graphemeCodePoints, r) + prop = propertyGraphemes(r) } else { prop = state >> shiftGraphemePropState } diff --git a/vendor/github.com/rivo/uniseg/graphemeproperties.go b/vendor/github.com/rivo/uniseg/graphemeproperties.go index a87d140b..0aff4a61 100644 --- a/vendor/github.com/rivo/uniseg/graphemeproperties.go +++ b/vendor/github.com/rivo/uniseg/graphemeproperties.go @@ -1,13 +1,13 @@ -package uniseg - // Code generated via go generate from gen_properties.go. DO NOT EDIT. +package uniseg + // graphemeCodePoints are taken from -// https://www.unicode.org/Public/14.0.0/ucd/auxiliary/GraphemeBreakProperty.txt +// https://www.unicode.org/Public/15.0.0/ucd/auxiliary/GraphemeBreakProperty.txt // and -// https://unicode.org/Public/14.0.0/ucd/emoji/emoji-data.txt +// https://unicode.org/Public/15.0.0/ucd/emoji/emoji-data.txt // ("Extended_Pictographic" only) -// on September 10, 2022. See https://www.unicode.org/license.html for the Unicode +// on September 5, 2023. See https://www.unicode.org/license.html for the Unicode // license agreement. var graphemeCodePoints = [][3]int{ {0x0000, 0x0009, prControl}, // Cc [10] .. @@ -143,6 +143,7 @@ var graphemeCodePoints = [][3]int{ {0x0CCC, 0x0CCD, prExtend}, // Mn [2] KANNADA VOWEL SIGN AU..KANNADA SIGN VIRAMA {0x0CD5, 0x0CD6, prExtend}, // Mc [2] KANNADA LENGTH MARK..KANNADA AI LENGTH MARK {0x0CE2, 0x0CE3, prExtend}, // Mn [2] KANNADA VOWEL SIGN VOCALIC L..KANNADA VOWEL SIGN VOCALIC LL + {0x0CF3, 0x0CF3, prSpacingMark}, // Mc KANNADA SIGN COMBINING ANUSVARA ABOVE RIGHT {0x0D00, 0x0D01, prExtend}, // Mn [2] MALAYALAM SIGN COMBINING ANUSVARA ABOVE..MALAYALAM SIGN CANDRABINDU {0x0D02, 0x0D03, prSpacingMark}, // Mc [2] MALAYALAM SIGN ANUSVARA..MALAYALAM SIGN VISARGA {0x0D3B, 0x0D3C, prExtend}, // Mn [2] MALAYALAM SIGN VERTICAL BAR VIRAMA..MALAYALAM SIGN CIRCULAR VIRAMA @@ -172,7 +173,7 @@ var graphemeCodePoints = [][3]int{ {0x0EB1, 0x0EB1, prExtend}, // Mn LAO VOWEL SIGN MAI KAN {0x0EB3, 0x0EB3, prSpacingMark}, // Lo LAO VOWEL SIGN AM {0x0EB4, 0x0EBC, prExtend}, // Mn [9] LAO VOWEL SIGN I..LAO SEMIVOWEL SIGN LO - {0x0EC8, 0x0ECD, prExtend}, // Mn [6] LAO TONE MAI EK..LAO NIGGAHITA + {0x0EC8, 0x0ECE, prExtend}, // Mn [7] LAO TONE MAI EK..LAO YAMAKKAN {0x0F18, 0x0F19, prExtend}, // Mn [2] TIBETAN ASTROLOGICAL SIGN -KHYUD PA..TIBETAN ASTROLOGICAL SIGN SDONG TSHUGS {0x0F35, 0x0F35, prExtend}, // Mn TIBETAN MARK NGAS BZUNG NYI ZLA {0x0F37, 0x0F37, prExtend}, // Mn TIBETAN MARK NGAS BZUNG SGOR RTAGS @@ -1336,6 +1337,7 @@ var graphemeCodePoints = [][3]int{ {0x10AE5, 0x10AE6, prExtend}, // Mn [2] MANICHAEAN ABBREVIATION MARK ABOVE..MANICHAEAN ABBREVIATION MARK BELOW {0x10D24, 0x10D27, prExtend}, // Mn [4] HANIFI ROHINGYA SIGN HARBAHAY..HANIFI ROHINGYA SIGN TASSI {0x10EAB, 0x10EAC, prExtend}, // Mn [2] YEZIDI COMBINING HAMZA MARK..YEZIDI COMBINING MADDA MARK + {0x10EFD, 0x10EFF, prExtend}, // Mn [3] ARABIC SMALL LOW WORD SAKTA..ARABIC SMALL LOW WORD MADDA {0x10F46, 0x10F50, prExtend}, // Mn [11] SOGDIAN COMBINING DOT BELOW..SOGDIAN COMBINING STROKE BELOW {0x10F82, 0x10F85, prExtend}, // Mn [4] OLD UYGHUR COMBINING DOT ABOVE..OLD UYGHUR COMBINING TWO DOTS BELOW {0x11000, 0x11000, prSpacingMark}, // Mc BRAHMI SIGN CANDRABINDU @@ -1375,6 +1377,7 @@ var graphemeCodePoints = [][3]int{ {0x11235, 0x11235, prSpacingMark}, // Mc KHOJKI SIGN VIRAMA {0x11236, 0x11237, prExtend}, // Mn [2] KHOJKI SIGN NUKTA..KHOJKI SIGN SHADDA {0x1123E, 0x1123E, prExtend}, // Mn KHOJKI SIGN SUKUN + {0x11241, 0x11241, prExtend}, // Mn KHOJKI VOWEL SIGN VOCALIC R {0x112DF, 0x112DF, prExtend}, // Mn KHUDAWADI SIGN ANUSVARA {0x112E0, 0x112E2, prSpacingMark}, // Mc [3] KHUDAWADI VOWEL SIGN AA..KHUDAWADI VOWEL SIGN II {0x112E3, 0x112EA, prExtend}, // Mn [8] KHUDAWADI VOWEL SIGN U..KHUDAWADI SIGN VIRAMA @@ -1494,7 +1497,18 @@ var graphemeCodePoints = [][3]int{ {0x11D97, 0x11D97, prExtend}, // Mn GUNJALA GONDI VIRAMA {0x11EF3, 0x11EF4, prExtend}, // Mn [2] MAKASAR VOWEL SIGN I..MAKASAR VOWEL SIGN U {0x11EF5, 0x11EF6, prSpacingMark}, // Mc [2] MAKASAR VOWEL SIGN E..MAKASAR VOWEL SIGN O - {0x13430, 0x13438, prControl}, // Cf [9] EGYPTIAN HIEROGLYPH VERTICAL JOINER..EGYPTIAN HIEROGLYPH END SEGMENT + {0x11F00, 0x11F01, prExtend}, // Mn [2] KAWI SIGN CANDRABINDU..KAWI SIGN ANUSVARA + {0x11F02, 0x11F02, prPrepend}, // Lo KAWI SIGN REPHA + {0x11F03, 0x11F03, prSpacingMark}, // Mc KAWI SIGN VISARGA + {0x11F34, 0x11F35, prSpacingMark}, // Mc [2] KAWI VOWEL SIGN AA..KAWI VOWEL SIGN ALTERNATE AA + {0x11F36, 0x11F3A, prExtend}, // Mn [5] KAWI VOWEL SIGN I..KAWI VOWEL SIGN VOCALIC R + {0x11F3E, 0x11F3F, prSpacingMark}, // Mc [2] KAWI VOWEL SIGN E..KAWI VOWEL SIGN AI + {0x11F40, 0x11F40, prExtend}, // Mn KAWI VOWEL SIGN EU + {0x11F41, 0x11F41, prSpacingMark}, // Mc KAWI SIGN KILLER + {0x11F42, 0x11F42, prExtend}, // Mn KAWI CONJOINER + {0x13430, 0x1343F, prControl}, // Cf [16] EGYPTIAN HIEROGLYPH VERTICAL JOINER..EGYPTIAN HIEROGLYPH END WALLED ENCLOSURE + {0x13440, 0x13440, prExtend}, // Mn EGYPTIAN HIEROGLYPH MIRROR HORIZONTALLY + {0x13447, 0x13455, prExtend}, // Mn [15] EGYPTIAN HIEROGLYPH MODIFIER DAMAGED AT TOP START..EGYPTIAN HIEROGLYPH MODIFIER DAMAGED {0x16AF0, 0x16AF4, prExtend}, // Mn [5] BASSA VAH COMBINING HIGH TONE..BASSA VAH COMBINING HIGH-LOW TONE {0x16B30, 0x16B36, prExtend}, // Mn [7] PAHAWH HMONG MARK CIM TUB..PAHAWH HMONG MARK CIM TAUM {0x16F4F, 0x16F4F, prExtend}, // Mn MIAO SIGN CONSONANT MODIFIER BAR @@ -1527,9 +1541,11 @@ var graphemeCodePoints = [][3]int{ {0x1E01B, 0x1E021, prExtend}, // Mn [7] COMBINING GLAGOLITIC LETTER SHTA..COMBINING GLAGOLITIC LETTER YATI {0x1E023, 0x1E024, prExtend}, // Mn [2] COMBINING GLAGOLITIC LETTER YU..COMBINING GLAGOLITIC LETTER SMALL YUS {0x1E026, 0x1E02A, prExtend}, // Mn [5] COMBINING GLAGOLITIC LETTER YO..COMBINING GLAGOLITIC LETTER FITA + {0x1E08F, 0x1E08F, prExtend}, // Mn COMBINING CYRILLIC SMALL LETTER BYELORUSSIAN-UKRAINIAN I {0x1E130, 0x1E136, prExtend}, // Mn [7] NYIAKENG PUACHUE HMONG TONE-B..NYIAKENG PUACHUE HMONG TONE-D {0x1E2AE, 0x1E2AE, prExtend}, // Mn TOTO SIGN RISING TONE {0x1E2EC, 0x1E2EF, prExtend}, // Mn [4] WANCHO TONE TUP..WANCHO TONE KOINI + {0x1E4EC, 0x1E4EF, prExtend}, // Mn [4] NAG MUNDARI SIGN MUHOR..NAG MUNDARI SIGN SUTUH {0x1E8D0, 0x1E8D6, prExtend}, // Mn [7] MENDE KIKAKUI COMBINING NUMBER TEENS..MENDE KIKAKUI COMBINING NUMBER MILLIONS {0x1E944, 0x1E94A, prExtend}, // Mn [7] ADLAM ALIF LENGTHENER..ADLAM NUKTA {0x1F000, 0x1F003, prExtendedPictographic}, // E0.0 [4] (πŸ€€..πŸ€ƒ) MAHJONG TILE EAST WIND..MAHJONG TILE NORTH WIND @@ -1780,7 +1796,8 @@ var graphemeCodePoints = [][3]int{ {0x1F6D3, 0x1F6D4, prExtendedPictographic}, // E0.0 [2] (πŸ›“..πŸ›”) STUPA..PAGODA {0x1F6D5, 0x1F6D5, prExtendedPictographic}, // E12.0 [1] (πŸ›•) hindu temple {0x1F6D6, 0x1F6D7, prExtendedPictographic}, // E13.0 [2] (πŸ›–..πŸ›—) hut..elevator - {0x1F6D8, 0x1F6DC, prExtendedPictographic}, // E0.0 [5] (πŸ›˜..πŸ›œ) .. + {0x1F6D8, 0x1F6DB, prExtendedPictographic}, // E0.0 [4] (πŸ›˜..πŸ››) .. + {0x1F6DC, 0x1F6DC, prExtendedPictographic}, // E15.0 [1] (πŸ›œ) wireless {0x1F6DD, 0x1F6DF, prExtendedPictographic}, // E14.0 [3] (πŸ›..πŸ›Ÿ) playground slide..ring buoy {0x1F6E0, 0x1F6E5, prExtendedPictographic}, // E0.7 [6] (πŸ› οΈ..πŸ›₯️) hammer and wrench..motor boat {0x1F6E6, 0x1F6E8, prExtendedPictographic}, // E0.0 [3] (πŸ›¦..πŸ›¨) UP-POINTING MILITARY AIRPLANE..UP-POINTING SMALL AIRPLANE @@ -1797,7 +1814,7 @@ var graphemeCodePoints = [][3]int{ {0x1F6FA, 0x1F6FA, prExtendedPictographic}, // E12.0 [1] (πŸ›Ί) auto rickshaw {0x1F6FB, 0x1F6FC, prExtendedPictographic}, // E13.0 [2] (πŸ›»..πŸ›Ό) pickup truck..roller skate {0x1F6FD, 0x1F6FF, prExtendedPictographic}, // E0.0 [3] (πŸ›½..πŸ›Ώ) .. - {0x1F774, 0x1F77F, prExtendedPictographic}, // E0.0 [12] (🝴..🝿) .. + {0x1F774, 0x1F77F, prExtendedPictographic}, // E0.0 [12] (🝴..🝿) LOT OF FORTUNE..ORCUS {0x1F7D5, 0x1F7DF, prExtendedPictographic}, // E0.0 [11] (πŸŸ•..🟟) CIRCLED TRIANGLE.. {0x1F7E0, 0x1F7EB, prExtendedPictographic}, // E12.0 [12] (🟠..🟫) orange circle..brown square {0x1F7EC, 0x1F7EF, prExtendedPictographic}, // E0.0 [4] (🟬..🟯) .. @@ -1856,30 +1873,37 @@ var graphemeCodePoints = [][3]int{ {0x1FA00, 0x1FA6F, prExtendedPictographic}, // E0.0 [112] (πŸ¨€..🩯) NEUTRAL CHESS KING.. {0x1FA70, 0x1FA73, prExtendedPictographic}, // E12.0 [4] (🩰..🩳) ballet shoes..shorts {0x1FA74, 0x1FA74, prExtendedPictographic}, // E13.0 [1] (🩴) thong sandal - {0x1FA75, 0x1FA77, prExtendedPictographic}, // E0.0 [3] (🩡..🩷) .. + {0x1FA75, 0x1FA77, prExtendedPictographic}, // E15.0 [3] (🩡..🩷) light blue heart..pink heart {0x1FA78, 0x1FA7A, prExtendedPictographic}, // E12.0 [3] (🩸..🩺) drop of blood..stethoscope {0x1FA7B, 0x1FA7C, prExtendedPictographic}, // E14.0 [2] (🩻..🩼) x-ray..crutch {0x1FA7D, 0x1FA7F, prExtendedPictographic}, // E0.0 [3] (🩽..🩿) .. {0x1FA80, 0x1FA82, prExtendedPictographic}, // E12.0 [3] (πŸͺ€..πŸͺ‚) yo-yo..parachute {0x1FA83, 0x1FA86, prExtendedPictographic}, // E13.0 [4] (πŸͺƒ..πŸͺ†) boomerang..nesting dolls - {0x1FA87, 0x1FA8F, prExtendedPictographic}, // E0.0 [9] (πŸͺ‡..πŸͺ) .. + {0x1FA87, 0x1FA88, prExtendedPictographic}, // E15.0 [2] (πŸͺ‡..πŸͺˆ) maracas..flute + {0x1FA89, 0x1FA8F, prExtendedPictographic}, // E0.0 [7] (πŸͺ‰..πŸͺ) .. {0x1FA90, 0x1FA95, prExtendedPictographic}, // E12.0 [6] (πŸͺ..πŸͺ•) ringed planet..banjo {0x1FA96, 0x1FAA8, prExtendedPictographic}, // E13.0 [19] (πŸͺ–..πŸͺ¨) military helmet..rock {0x1FAA9, 0x1FAAC, prExtendedPictographic}, // E14.0 [4] (πŸͺ©..πŸͺ¬) mirror ball..hamsa - {0x1FAAD, 0x1FAAF, prExtendedPictographic}, // E0.0 [3] (πŸͺ­..πŸͺ―) .. + {0x1FAAD, 0x1FAAF, prExtendedPictographic}, // E15.0 [3] (πŸͺ­..πŸͺ―) folding hand fan..khanda {0x1FAB0, 0x1FAB6, prExtendedPictographic}, // E13.0 [7] (πŸͺ°..πŸͺΆ) fly..feather {0x1FAB7, 0x1FABA, prExtendedPictographic}, // E14.0 [4] (πŸͺ·..πŸͺΊ) lotus..nest with eggs - {0x1FABB, 0x1FABF, prExtendedPictographic}, // E0.0 [5] (πŸͺ»..πŸͺΏ) .. + {0x1FABB, 0x1FABD, prExtendedPictographic}, // E15.0 [3] (πŸͺ»..πŸͺ½) hyacinth..wing + {0x1FABE, 0x1FABE, prExtendedPictographic}, // E0.0 [1] (πŸͺΎ) + {0x1FABF, 0x1FABF, prExtendedPictographic}, // E15.0 [1] (πŸͺΏ) goose {0x1FAC0, 0x1FAC2, prExtendedPictographic}, // E13.0 [3] (πŸ«€..πŸ«‚) anatomical heart..people hugging {0x1FAC3, 0x1FAC5, prExtendedPictographic}, // E14.0 [3] (πŸ«ƒ..πŸ«…) pregnant man..person with crown - {0x1FAC6, 0x1FACF, prExtendedPictographic}, // E0.0 [10] (πŸ«†..🫏) .. + {0x1FAC6, 0x1FACD, prExtendedPictographic}, // E0.0 [8] (πŸ«†..🫍) .. + {0x1FACE, 0x1FACF, prExtendedPictographic}, // E15.0 [2] (🫎..🫏) moose..donkey {0x1FAD0, 0x1FAD6, prExtendedPictographic}, // E13.0 [7] (🫐..πŸ«–) blueberries..teapot {0x1FAD7, 0x1FAD9, prExtendedPictographic}, // E14.0 [3] (πŸ«—..πŸ«™) pouring liquid..jar - {0x1FADA, 0x1FADF, prExtendedPictographic}, // E0.0 [6] (🫚..🫟) .. + {0x1FADA, 0x1FADB, prExtendedPictographic}, // E15.0 [2] (🫚..πŸ«›) ginger root..pea pod + {0x1FADC, 0x1FADF, prExtendedPictographic}, // E0.0 [4] (🫜..🫟) .. {0x1FAE0, 0x1FAE7, prExtendedPictographic}, // E14.0 [8] (🫠..🫧) melting face..bubbles - {0x1FAE8, 0x1FAEF, prExtendedPictographic}, // E0.0 [8] (🫨..🫯) .. + {0x1FAE8, 0x1FAE8, prExtendedPictographic}, // E15.0 [1] (🫨) shaking face + {0x1FAE9, 0x1FAEF, prExtendedPictographic}, // E0.0 [7] (🫩..🫯) .. {0x1FAF0, 0x1FAF6, prExtendedPictographic}, // E14.0 [7] (🫰..🫢) hand with index finger and thumb crossed..heart hands - {0x1FAF7, 0x1FAFF, prExtendedPictographic}, // E0.0 [9] (🫷..🫿) .. + {0x1FAF7, 0x1FAF8, prExtendedPictographic}, // E15.0 [2] (🫷..🫸) leftwards pushing hand..rightwards pushing hand + {0x1FAF9, 0x1FAFF, prExtendedPictographic}, // E0.0 [7] (🫹..🫿) .. {0x1FC00, 0x1FFFD, prExtendedPictographic}, // E0.0[1022] (πŸ°€..🿽) .. {0xE0000, 0xE0000, prControl}, // Cn {0xE0001, 0xE0001, prControl}, // Cf LANGUAGE TAG diff --git a/vendor/github.com/rivo/uniseg/graphemerules.go b/vendor/github.com/rivo/uniseg/graphemerules.go index 9f46b575..5d399d29 100644 --- a/vendor/github.com/rivo/uniseg/graphemerules.go +++ b/vendor/github.com/rivo/uniseg/graphemerules.go @@ -21,11 +21,12 @@ const ( grBoundary ) -// The grapheme cluster parser's state transitions. Maps (state, property) to -// (new state, breaking instruction, rule number). The breaking instruction -// always refers to the boundary between the last and next code point. +// grTransitions implements the grapheme cluster parser's state transitions. +// Maps state and property to a new state, a breaking instruction, and rule +// number. The breaking instruction always refers to the boundary between the +// last and next code point. Returns negative values if no transition is found. // -// This map is queried as follows: +// This function is used as follows: // // 1. Find specific state + specific property. Stop if found. // 2. Find specific state + any property. @@ -36,59 +37,96 @@ const ( // are equal. Stop. // 6. Assume grAny and grBoundary. // -// Unicode version 14.0.0. -var grTransitions = map[[2]int][3]int{ +// Unicode version 15.0.0. +func grTransitions(state, prop int) (newState int, newProp int, boundary int) { + // It turns out that using a big switch statement is much faster than using + // a map. + + switch uint64(state) | uint64(prop)<<32 { // GB5 - {grAny, prCR}: {grCR, grBoundary, 50}, - {grAny, prLF}: {grControlLF, grBoundary, 50}, - {grAny, prControl}: {grControlLF, grBoundary, 50}, + case grAny | prCR<<32: + return grCR, grBoundary, 50 + case grAny | prLF<<32: + return grControlLF, grBoundary, 50 + case grAny | prControl<<32: + return grControlLF, grBoundary, 50 // GB4 - {grCR, prAny}: {grAny, grBoundary, 40}, - {grControlLF, prAny}: {grAny, grBoundary, 40}, - - // GB3. - {grCR, prLF}: {grControlLF, grNoBoundary, 30}, - - // GB6. - {grAny, prL}: {grL, grBoundary, 9990}, - {grL, prL}: {grL, grNoBoundary, 60}, - {grL, prV}: {grLVV, grNoBoundary, 60}, - {grL, prLV}: {grLVV, grNoBoundary, 60}, - {grL, prLVT}: {grLVTT, grNoBoundary, 60}, - - // GB7. - {grAny, prLV}: {grLVV, grBoundary, 9990}, - {grAny, prV}: {grLVV, grBoundary, 9990}, - {grLVV, prV}: {grLVV, grNoBoundary, 70}, - {grLVV, prT}: {grLVTT, grNoBoundary, 70}, - - // GB8. - {grAny, prLVT}: {grLVTT, grBoundary, 9990}, - {grAny, prT}: {grLVTT, grBoundary, 9990}, - {grLVTT, prT}: {grLVTT, grNoBoundary, 80}, - - // GB9. - {grAny, prExtend}: {grAny, grNoBoundary, 90}, - {grAny, prZWJ}: {grAny, grNoBoundary, 90}, - - // GB9a. - {grAny, prSpacingMark}: {grAny, grNoBoundary, 91}, - - // GB9b. - {grAny, prPrepend}: {grPrepend, grBoundary, 9990}, - {grPrepend, prAny}: {grAny, grNoBoundary, 92}, - - // GB11. - {grAny, prExtendedPictographic}: {grExtendedPictographic, grBoundary, 9990}, - {grExtendedPictographic, prExtend}: {grExtendedPictographic, grNoBoundary, 110}, - {grExtendedPictographic, prZWJ}: {grExtendedPictographicZWJ, grNoBoundary, 110}, - {grExtendedPictographicZWJ, prExtendedPictographic}: {grExtendedPictographic, grNoBoundary, 110}, - - // GB12 / GB13. - {grAny, prRegionalIndicator}: {grRIOdd, grBoundary, 9990}, - {grRIOdd, prRegionalIndicator}: {grRIEven, grNoBoundary, 120}, - {grRIEven, prRegionalIndicator}: {grRIOdd, grBoundary, 120}, + case grCR | prAny<<32: + return grAny, grBoundary, 40 + case grControlLF | prAny<<32: + return grAny, grBoundary, 40 + + // GB3 + case grCR | prLF<<32: + return grControlLF, grNoBoundary, 30 + + // GB6 + case grAny | prL<<32: + return grL, grBoundary, 9990 + case grL | prL<<32: + return grL, grNoBoundary, 60 + case grL | prV<<32: + return grLVV, grNoBoundary, 60 + case grL | prLV<<32: + return grLVV, grNoBoundary, 60 + case grL | prLVT<<32: + return grLVTT, grNoBoundary, 60 + + // GB7 + case grAny | prLV<<32: + return grLVV, grBoundary, 9990 + case grAny | prV<<32: + return grLVV, grBoundary, 9990 + case grLVV | prV<<32: + return grLVV, grNoBoundary, 70 + case grLVV | prT<<32: + return grLVTT, grNoBoundary, 70 + + // GB8 + case grAny | prLVT<<32: + return grLVTT, grBoundary, 9990 + case grAny | prT<<32: + return grLVTT, grBoundary, 9990 + case grLVTT | prT<<32: + return grLVTT, grNoBoundary, 80 + + // GB9 + case grAny | prExtend<<32: + return grAny, grNoBoundary, 90 + case grAny | prZWJ<<32: + return grAny, grNoBoundary, 90 + + // GB9a + case grAny | prSpacingMark<<32: + return grAny, grNoBoundary, 91 + + // GB9b + case grAny | prPrepend<<32: + return grPrepend, grBoundary, 9990 + case grPrepend | prAny<<32: + return grAny, grNoBoundary, 92 + + // GB11 + case grAny | prExtendedPictographic<<32: + return grExtendedPictographic, grBoundary, 9990 + case grExtendedPictographic | prExtend<<32: + return grExtendedPictographic, grNoBoundary, 110 + case grExtendedPictographic | prZWJ<<32: + return grExtendedPictographicZWJ, grNoBoundary, 110 + case grExtendedPictographicZWJ | prExtendedPictographic<<32: + return grExtendedPictographic, grNoBoundary, 110 + + // GB12 / GB13 + case grAny | prRegionalIndicator<<32: + return grRIOdd, grBoundary, 9990 + case grRIOdd | prRegionalIndicator<<32: + return grRIEven, grNoBoundary, 120 + case grRIEven | prRegionalIndicator<<32: + return grRIOdd, grBoundary, 120 + default: + return -1, -1, -1 + } } // transitionGraphemeState determines the new state of the grapheme cluster @@ -97,40 +135,40 @@ var grTransitions = map[[2]int][3]int{ // table) and whether a cluster boundary was detected. func transitionGraphemeState(state int, r rune) (newState, prop int, boundary bool) { // Determine the property of the next character. - prop = property(graphemeCodePoints, r) + prop = propertyGraphemes(r) // Find the applicable transition. - transition, ok := grTransitions[[2]int{state, prop}] - if ok { + nextState, nextProp, _ := grTransitions(state, prop) + if nextState >= 0 { // We have a specific transition. We'll use it. - return transition[0], prop, transition[1] == grBoundary + return nextState, prop, nextProp == grBoundary } // No specific transition found. Try the less specific ones. - transAnyProp, okAnyProp := grTransitions[[2]int{state, prAny}] - transAnyState, okAnyState := grTransitions[[2]int{grAny, prop}] - if okAnyProp && okAnyState { + anyPropState, anyPropProp, anyPropRule := grTransitions(state, prAny) + anyStateState, anyStateProp, anyStateRule := grTransitions(grAny, prop) + if anyPropState >= 0 && anyStateState >= 0 { // Both apply. We'll use a mix (see comments for grTransitions). - newState = transAnyState[0] - boundary = transAnyState[1] == grBoundary - if transAnyProp[2] < transAnyState[2] { - boundary = transAnyProp[1] == grBoundary + newState = anyStateState + boundary = anyStateProp == grBoundary + if anyPropRule < anyStateRule { + boundary = anyPropProp == grBoundary } return } - if okAnyProp { + if anyPropState >= 0 { // We only have a specific state. - return transAnyProp[0], prop, transAnyProp[1] == grBoundary + return anyPropState, prop, anyPropProp == grBoundary // This branch will probably never be reached because okAnyState will // always be true given the current transition map. But we keep it here // for future modifications to the transition map where this may not be // true anymore. } - if okAnyState { + if anyStateState >= 0 { // We only have a specific property. - return transAnyState[0], prop, transAnyState[1] == grBoundary + return anyStateState, prop, anyStateProp == grBoundary } // No known transition. GB999: Any Γ· Any. diff --git a/vendor/github.com/rivo/uniseg/line.go b/vendor/github.com/rivo/uniseg/line.go index 87f28503..7a46318d 100644 --- a/vendor/github.com/rivo/uniseg/line.go +++ b/vendor/github.com/rivo/uniseg/line.go @@ -80,7 +80,7 @@ func FirstLineSegment(b []byte, state int) (segment, rest []byte, mustBreak bool } } -// FirstLineSegmentInString is like FirstLineSegment() but its input and outputs +// FirstLineSegmentInString is like [FirstLineSegment] but its input and outputs // are strings. func FirstLineSegmentInString(str string, state int) (segment, rest string, mustBreak bool, newState int) { // An empty byte slice returns nothing. @@ -122,13 +122,13 @@ func FirstLineSegmentInString(str string, state int) (segment, rest string, must // [UAX #14]: https://www.unicode.org/reports/tr14/#Algorithm func HasTrailingLineBreak(b []byte) bool { r, _ := utf8.DecodeLastRune(b) - property, _ := propertyWithGenCat(lineBreakCodePoints, r) - return property == lbBK || property == lbCR || property == lbLF || property == lbNL + property, _ := propertyLineBreak(r) + return property == prBK || property == prCR || property == prLF || property == prNL } // HasTrailingLineBreakInString is like [HasTrailingLineBreak] but for a string. func HasTrailingLineBreakInString(str string) bool { r, _ := utf8.DecodeLastRuneInString(str) - property, _ := propertyWithGenCat(lineBreakCodePoints, r) - return property == lbBK || property == lbCR || property == lbLF || property == lbNL + property, _ := propertyLineBreak(r) + return property == prBK || property == prCR || property == prLF || property == prNL } diff --git a/vendor/github.com/rivo/uniseg/lineproperties.go b/vendor/github.com/rivo/uniseg/lineproperties.go index 32169306..ac7fac4c 100644 --- a/vendor/github.com/rivo/uniseg/lineproperties.go +++ b/vendor/github.com/rivo/uniseg/lineproperties.go @@ -1,13 +1,13 @@ -package uniseg - // Code generated via go generate from gen_properties.go. DO NOT EDIT. +package uniseg + // lineBreakCodePoints are taken from -// https://www.unicode.org/Public/14.0.0/ucd/LineBreak.txt +// https://www.unicode.org/Public/15.0.0/ucd/LineBreak.txt // and -// https://unicode.org/Public/14.0.0/ucd/emoji/emoji-data.txt +// https://unicode.org/Public/15.0.0/ucd/emoji/emoji-data.txt // ("Extended_Pictographic" only) -// on September 10, 2022. See https://www.unicode.org/license.html for the Unicode +// on September 5, 2023. See https://www.unicode.org/license.html for the Unicode // license agreement. var lineBreakCodePoints = [][4]int{ {0x0000, 0x0008, prCM, gcCc}, // [9] .. @@ -439,6 +439,7 @@ var lineBreakCodePoints = [][4]int{ {0x0CE2, 0x0CE3, prCM, gcMn}, // [2] KANNADA VOWEL SIGN VOCALIC L..KANNADA VOWEL SIGN VOCALIC LL {0x0CE6, 0x0CEF, prNU, gcNd}, // [10] KANNADA DIGIT ZERO..KANNADA DIGIT NINE {0x0CF1, 0x0CF2, prAL, gcLo}, // [2] KANNADA SIGN JIHVAMULIYA..KANNADA SIGN UPADHMANIYA + {0x0CF3, 0x0CF3, prCM, gcMc}, // KANNADA SIGN COMBINING ANUSVARA ABOVE RIGHT {0x0D00, 0x0D01, prCM, gcMn}, // [2] MALAYALAM SIGN COMBINING ANUSVARA ABOVE..MALAYALAM SIGN CANDRABINDU {0x0D02, 0x0D03, prCM, gcMc}, // [2] MALAYALAM SIGN ANUSVARA..MALAYALAM SIGN VISARGA {0x0D04, 0x0D0C, prAL, gcLo}, // [9] MALAYALAM LETTER VEDIC ANUSVARA..MALAYALAM LETTER VOCALIC L @@ -500,7 +501,7 @@ var lineBreakCodePoints = [][4]int{ {0x0EBD, 0x0EBD, prSA, gcLo}, // LAO SEMIVOWEL SIGN NYO {0x0EC0, 0x0EC4, prSA, gcLo}, // [5] LAO VOWEL SIGN E..LAO VOWEL SIGN AI {0x0EC6, 0x0EC6, prSA, gcLm}, // LAO KO LA - {0x0EC8, 0x0ECD, prSA, gcMn}, // [6] LAO TONE MAI EK..LAO NIGGAHITA + {0x0EC8, 0x0ECE, prSA, gcMn}, // [7] LAO TONE MAI EK..LAO YAMAKKAN {0x0ED0, 0x0ED9, prNU, gcNd}, // [10] LAO DIGIT ZERO..LAO DIGIT NINE {0x0EDC, 0x0EDF, prSA, gcLo}, // [4] LAO HO NO..LAO LETTER KHMU NYO {0x0F00, 0x0F00, prAL, gcLo}, // TIBETAN SYLLABLE OM @@ -813,7 +814,11 @@ var lineBreakCodePoints = [][4]int{ {0x1D79, 0x1D7F, prAL, gcLl}, // [7] LATIN SMALL LETTER INSULAR G..LATIN SMALL LETTER UPSILON WITH STROKE {0x1D80, 0x1D9A, prAL, gcLl}, // [27] LATIN SMALL LETTER B WITH PALATAL HOOK..LATIN SMALL LETTER EZH WITH RETROFLEX HOOK {0x1D9B, 0x1DBF, prAL, gcLm}, // [37] MODIFIER LETTER SMALL TURNED ALPHA..MODIFIER LETTER SMALL THETA - {0x1DC0, 0x1DFF, prCM, gcMn}, // [64] COMBINING DOTTED GRAVE ACCENT..COMBINING RIGHT ARROWHEAD AND DOWN ARROWHEAD BELOW + {0x1DC0, 0x1DCC, prCM, gcMn}, // [13] COMBINING DOTTED GRAVE ACCENT..COMBINING MACRON-BREVE + {0x1DCD, 0x1DCD, prGL, gcMn}, // COMBINING DOUBLE CIRCUMFLEX ABOVE + {0x1DCE, 0x1DFB, prCM, gcMn}, // [46] COMBINING OGONEK ABOVE..COMBINING DELETION MARK + {0x1DFC, 0x1DFC, prGL, gcMn}, // COMBINING DOUBLE INVERTED BREVE BELOW + {0x1DFD, 0x1DFF, prCM, gcMn}, // [3] COMBINING ALMOST EQUAL TO BELOW..COMBINING RIGHT ARROWHEAD AND DOWN ARROWHEAD BELOW {0x1E00, 0x1EFF, prAL, gcLC}, // [256] LATIN CAPITAL LETTER A WITH RING BELOW..LATIN SMALL LETTER Y WITH LOOP {0x1F00, 0x1F15, prAL, gcLC}, // [22] GREEK SMALL LETTER ALPHA WITH PSILI..GREEK SMALL LETTER EPSILON WITH DASIA AND OXIA {0x1F18, 0x1F1D, prAL, gcLu}, // [6] GREEK CAPITAL LETTER EPSILON WITH PSILI..GREEK CAPITAL LETTER EPSILON WITH DASIA AND OXIA @@ -889,7 +894,7 @@ var lineBreakCodePoints = [][4]int{ {0x2054, 0x2054, prAL, gcPc}, // INVERTED UNDERTIE {0x2055, 0x2055, prAL, gcPo}, // FLOWER PUNCTUATION MARK {0x2056, 0x2056, prBA, gcPo}, // THREE DOT PUNCTUATION - {0x2057, 0x2057, prAL, gcPo}, // QUADRUPLE PRIME + {0x2057, 0x2057, prPO, gcPo}, // QUADRUPLE PRIME {0x2058, 0x205B, prBA, gcPo}, // [4] FOUR DOT PUNCTUATION..FOUR DOT MARK {0x205C, 0x205C, prAL, gcPo}, // DOTTED CROSS {0x205D, 0x205E, prBA, gcPo}, // [2] TRICOLON..VERTICAL FOUR DOTS @@ -2751,6 +2756,7 @@ var lineBreakCodePoints = [][4]int{ {0x10EAB, 0x10EAC, prCM, gcMn}, // [2] YEZIDI COMBINING HAMZA MARK..YEZIDI COMBINING MADDA MARK {0x10EAD, 0x10EAD, prBA, gcPd}, // YEZIDI HYPHENATION MARK {0x10EB0, 0x10EB1, prAL, gcLo}, // [2] YEZIDI LETTER LAM WITH DOT ABOVE..YEZIDI LETTER YOT WITH CIRCUMFLEX ABOVE + {0x10EFD, 0x10EFF, prCM, gcMn}, // [3] ARABIC SMALL LOW WORD SAKTA..ARABIC SMALL LOW WORD MADDA {0x10F00, 0x10F1C, prAL, gcLo}, // [29] OLD SOGDIAN LETTER ALEPH..OLD SOGDIAN LETTER FINAL TAW WITH VERTICAL TAIL {0x10F1D, 0x10F26, prAL, gcNo}, // [10] OLD SOGDIAN NUMBER ONE..OLD SOGDIAN FRACTION ONE HALF {0x10F27, 0x10F27, prAL, gcLo}, // OLD SOGDIAN LIGATURE AYIN-DALETH @@ -2840,6 +2846,8 @@ var lineBreakCodePoints = [][4]int{ {0x1123B, 0x1123C, prBA, gcPo}, // [2] KHOJKI SECTION MARK..KHOJKI DOUBLE SECTION MARK {0x1123D, 0x1123D, prAL, gcPo}, // KHOJKI ABBREVIATION SIGN {0x1123E, 0x1123E, prCM, gcMn}, // KHOJKI SIGN SUKUN + {0x1123F, 0x11240, prAL, gcLo}, // [2] KHOJKI LETTER QA..KHOJKI LETTER SHORT I + {0x11241, 0x11241, prCM, gcMn}, // KHOJKI VOWEL SIGN VOCALIC R {0x11280, 0x11286, prAL, gcLo}, // [7] MULTANI LETTER A..MULTANI LETTER GA {0x11288, 0x11288, prAL, gcLo}, // MULTANI LETTER GHA {0x1128A, 0x1128D, prAL, gcLo}, // [4] MULTANI LETTER CA..MULTANI LETTER JJA @@ -3013,6 +3021,7 @@ var lineBreakCodePoints = [][4]int{ {0x11AA1, 0x11AA2, prBA, gcPo}, // [2] SOYOMBO TERMINAL MARK-1..SOYOMBO TERMINAL MARK-2 {0x11AB0, 0x11ABF, prAL, gcLo}, // [16] CANADIAN SYLLABICS NATTILIK HI..CANADIAN SYLLABICS SPA {0x11AC0, 0x11AF8, prAL, gcLo}, // [57] PAU CIN HAU LETTER PA..PAU CIN HAU GLOTTAL STOP FINAL + {0x11B00, 0x11B09, prBB, gcPo}, // [10] DEVANAGARI HEAD MARK..DEVANAGARI SIGN MINDU {0x11C00, 0x11C08, prAL, gcLo}, // [9] BHAIKSUKI LETTER A..BHAIKSUKI LETTER VOCALIC L {0x11C0A, 0x11C2E, prAL, gcLo}, // [37] BHAIKSUKI LETTER E..BHAIKSUKI LETTER HA {0x11C2F, 0x11C2F, prCM, gcMc}, // BHAIKSUKI VOWEL SIGN AA @@ -3059,6 +3068,20 @@ var lineBreakCodePoints = [][4]int{ {0x11EF3, 0x11EF4, prCM, gcMn}, // [2] MAKASAR VOWEL SIGN I..MAKASAR VOWEL SIGN U {0x11EF5, 0x11EF6, prCM, gcMc}, // [2] MAKASAR VOWEL SIGN E..MAKASAR VOWEL SIGN O {0x11EF7, 0x11EF8, prAL, gcPo}, // [2] MAKASAR PASSIMBANG..MAKASAR END OF SECTION + {0x11F00, 0x11F01, prCM, gcMn}, // [2] KAWI SIGN CANDRABINDU..KAWI SIGN ANUSVARA + {0x11F02, 0x11F02, prAL, gcLo}, // KAWI SIGN REPHA + {0x11F03, 0x11F03, prCM, gcMc}, // KAWI SIGN VISARGA + {0x11F04, 0x11F10, prAL, gcLo}, // [13] KAWI LETTER A..KAWI LETTER O + {0x11F12, 0x11F33, prAL, gcLo}, // [34] KAWI LETTER KA..KAWI LETTER JNYA + {0x11F34, 0x11F35, prCM, gcMc}, // [2] KAWI VOWEL SIGN AA..KAWI VOWEL SIGN ALTERNATE AA + {0x11F36, 0x11F3A, prCM, gcMn}, // [5] KAWI VOWEL SIGN I..KAWI VOWEL SIGN VOCALIC R + {0x11F3E, 0x11F3F, prCM, gcMc}, // [2] KAWI VOWEL SIGN E..KAWI VOWEL SIGN AI + {0x11F40, 0x11F40, prCM, gcMn}, // KAWI VOWEL SIGN EU + {0x11F41, 0x11F41, prCM, gcMc}, // KAWI SIGN KILLER + {0x11F42, 0x11F42, prCM, gcMn}, // KAWI CONJOINER + {0x11F43, 0x11F44, prBA, gcPo}, // [2] KAWI DANDA..KAWI DOUBLE DANDA + {0x11F45, 0x11F4F, prID, gcPo}, // [11] KAWI PUNCTUATION SECTION MARKER..KAWI PUNCTUATION CLOSING SPIRAL + {0x11F50, 0x11F59, prNU, gcNd}, // [10] KAWI DIGIT ZERO..KAWI DIGIT NINE {0x11FB0, 0x11FB0, prAL, gcLo}, // LISU LETTER YHA {0x11FC0, 0x11FD4, prAL, gcNo}, // [21] TAMIL FRACTION ONE THREE-HUNDRED-AND-TWENTIETH..TAMIL FRACTION DOWNSCALING FACTOR KIIZH {0x11FD5, 0x11FDC, prAL, gcSo}, // [8] TAMIL SIGN NEL..TAMIL SIGN MUKKURUNI @@ -3084,10 +3107,18 @@ var lineBreakCodePoints = [][4]int{ {0x1328A, 0x13378, prAL, gcLo}, // [239] EGYPTIAN HIEROGLYPH O037..EGYPTIAN HIEROGLYPH V011 {0x13379, 0x13379, prOP, gcLo}, // EGYPTIAN HIEROGLYPH V011A {0x1337A, 0x1337B, prCL, gcLo}, // [2] EGYPTIAN HIEROGLYPH V011B..EGYPTIAN HIEROGLYPH V011C - {0x1337C, 0x1342E, prAL, gcLo}, // [179] EGYPTIAN HIEROGLYPH V012..EGYPTIAN HIEROGLYPH AA032 + {0x1337C, 0x1342F, prAL, gcLo}, // [180] EGYPTIAN HIEROGLYPH V012..EGYPTIAN HIEROGLYPH V011D {0x13430, 0x13436, prGL, gcCf}, // [7] EGYPTIAN HIEROGLYPH VERTICAL JOINER..EGYPTIAN HIEROGLYPH OVERLAY MIDDLE {0x13437, 0x13437, prOP, gcCf}, // EGYPTIAN HIEROGLYPH BEGIN SEGMENT {0x13438, 0x13438, prCL, gcCf}, // EGYPTIAN HIEROGLYPH END SEGMENT + {0x13439, 0x1343B, prGL, gcCf}, // [3] EGYPTIAN HIEROGLYPH INSERT AT MIDDLE..EGYPTIAN HIEROGLYPH INSERT AT BOTTOM + {0x1343C, 0x1343C, prOP, gcCf}, // EGYPTIAN HIEROGLYPH BEGIN ENCLOSURE + {0x1343D, 0x1343D, prCL, gcCf}, // EGYPTIAN HIEROGLYPH END ENCLOSURE + {0x1343E, 0x1343E, prOP, gcCf}, // EGYPTIAN HIEROGLYPH BEGIN WALLED ENCLOSURE + {0x1343F, 0x1343F, prCL, gcCf}, // EGYPTIAN HIEROGLYPH END WALLED ENCLOSURE + {0x13440, 0x13440, prCM, gcMn}, // EGYPTIAN HIEROGLYPH MIRROR HORIZONTALLY + {0x13441, 0x13446, prAL, gcLo}, // [6] EGYPTIAN HIEROGLYPH FULL BLANK..EGYPTIAN HIEROGLYPH WIDE LOST SIGN + {0x13447, 0x13455, prCM, gcMn}, // [15] EGYPTIAN HIEROGLYPH MODIFIER DAMAGED AT TOP START..EGYPTIAN HIEROGLYPH MODIFIER DAMAGED {0x14400, 0x145CD, prAL, gcLo}, // [462] ANATOLIAN HIEROGLYPH A001..ANATOLIAN HIEROGLYPH A409 {0x145CE, 0x145CE, prOP, gcLo}, // ANATOLIAN HIEROGLYPH A410 BEGIN LOGOGRAM MARK {0x145CF, 0x145CF, prCL, gcLo}, // ANATOLIAN HIEROGLYPH A410A END LOGOGRAM MARK @@ -3137,7 +3168,9 @@ var lineBreakCodePoints = [][4]int{ {0x1AFFD, 0x1AFFE, prAL, gcLm}, // [2] KATAKANA LETTER MINNAN NASALIZED TONE-7..KATAKANA LETTER MINNAN NASALIZED TONE-8 {0x1B000, 0x1B0FF, prID, gcLo}, // [256] KATAKANA LETTER ARCHAIC E..HENTAIGANA LETTER RE-2 {0x1B100, 0x1B122, prID, gcLo}, // [35] HENTAIGANA LETTER RE-3..KATAKANA LETTER ARCHAIC WU + {0x1B132, 0x1B132, prCJ, gcLo}, // HIRAGANA LETTER SMALL KO {0x1B150, 0x1B152, prCJ, gcLo}, // [3] HIRAGANA LETTER SMALL WI..HIRAGANA LETTER SMALL WO + {0x1B155, 0x1B155, prCJ, gcLo}, // KATAKANA LETTER SMALL KO {0x1B164, 0x1B167, prCJ, gcLo}, // [4] KATAKANA LETTER SMALL WI..KATAKANA LETTER SMALL N {0x1B170, 0x1B2FB, prID, gcLo}, // [396] NUSHU CHARACTER-1B170..NUSHU CHARACTER-1B2FB {0x1BC00, 0x1BC6A, prAL, gcLo}, // [107] DUPLOYAN LETTER H..DUPLOYAN LETTER VOCALIC M @@ -3168,6 +3201,7 @@ var lineBreakCodePoints = [][4]int{ {0x1D200, 0x1D241, prAL, gcSo}, // [66] GREEK VOCAL NOTATION SYMBOL-1..GREEK INSTRUMENTAL NOTATION SYMBOL-54 {0x1D242, 0x1D244, prCM, gcMn}, // [3] COMBINING GREEK MUSICAL TRISEME..COMBINING GREEK MUSICAL PENTASEME {0x1D245, 0x1D245, prAL, gcSo}, // GREEK MUSICAL LEIMMA + {0x1D2C0, 0x1D2D3, prAL, gcNo}, // [20] KAKTOVIK NUMERAL ZERO..KAKTOVIK NUMERAL NINETEEN {0x1D2E0, 0x1D2F3, prAL, gcNo}, // [20] MAYAN NUMERAL ZERO..MAYAN NUMERAL NINETEEN {0x1D300, 0x1D356, prAL, gcSo}, // [87] MONOGRAM FOR EARTH..TETRAGRAM FOR FOSTERING {0x1D360, 0x1D378, prAL, gcNo}, // [25] COUNTING ROD UNIT DIGIT ONE..TALLY MARK FIVE @@ -3228,11 +3262,14 @@ var lineBreakCodePoints = [][4]int{ {0x1DF00, 0x1DF09, prAL, gcLl}, // [10] LATIN SMALL LETTER FENG DIGRAPH WITH TRILL..LATIN SMALL LETTER T WITH HOOK AND RETROFLEX HOOK {0x1DF0A, 0x1DF0A, prAL, gcLo}, // LATIN LETTER RETROFLEX CLICK WITH RETROFLEX HOOK {0x1DF0B, 0x1DF1E, prAL, gcLl}, // [20] LATIN SMALL LETTER ESH WITH DOUBLE BAR..LATIN SMALL LETTER S WITH CURL + {0x1DF25, 0x1DF2A, prAL, gcLl}, // [6] LATIN SMALL LETTER D WITH MID-HEIGHT LEFT HOOK..LATIN SMALL LETTER T WITH MID-HEIGHT LEFT HOOK {0x1E000, 0x1E006, prCM, gcMn}, // [7] COMBINING GLAGOLITIC LETTER AZU..COMBINING GLAGOLITIC LETTER ZHIVETE {0x1E008, 0x1E018, prCM, gcMn}, // [17] COMBINING GLAGOLITIC LETTER ZEMLJA..COMBINING GLAGOLITIC LETTER HERU {0x1E01B, 0x1E021, prCM, gcMn}, // [7] COMBINING GLAGOLITIC LETTER SHTA..COMBINING GLAGOLITIC LETTER YATI {0x1E023, 0x1E024, prCM, gcMn}, // [2] COMBINING GLAGOLITIC LETTER YU..COMBINING GLAGOLITIC LETTER SMALL YUS {0x1E026, 0x1E02A, prCM, gcMn}, // [5] COMBINING GLAGOLITIC LETTER YO..COMBINING GLAGOLITIC LETTER FITA + {0x1E030, 0x1E06D, prAL, gcLm}, // [62] MODIFIER LETTER CYRILLIC SMALL A..MODIFIER LETTER CYRILLIC SMALL STRAIGHT U WITH STROKE + {0x1E08F, 0x1E08F, prCM, gcMn}, // COMBINING CYRILLIC SMALL LETTER BYELORUSSIAN-UKRAINIAN I {0x1E100, 0x1E12C, prAL, gcLo}, // [45] NYIAKENG PUACHUE HMONG LETTER MA..NYIAKENG PUACHUE HMONG LETTER W {0x1E130, 0x1E136, prCM, gcMn}, // [7] NYIAKENG PUACHUE HMONG TONE-B..NYIAKENG PUACHUE HMONG TONE-D {0x1E137, 0x1E13D, prAL, gcLm}, // [7] NYIAKENG PUACHUE HMONG SIGN FOR PERSON..NYIAKENG PUACHUE HMONG SYLLABLE LENGTHENER @@ -3245,6 +3282,10 @@ var lineBreakCodePoints = [][4]int{ {0x1E2EC, 0x1E2EF, prCM, gcMn}, // [4] WANCHO TONE TUP..WANCHO TONE KOINI {0x1E2F0, 0x1E2F9, prNU, gcNd}, // [10] WANCHO DIGIT ZERO..WANCHO DIGIT NINE {0x1E2FF, 0x1E2FF, prPR, gcSc}, // WANCHO NGUN SIGN + {0x1E4D0, 0x1E4EA, prAL, gcLo}, // [27] NAG MUNDARI LETTER O..NAG MUNDARI LETTER ELL + {0x1E4EB, 0x1E4EB, prAL, gcLm}, // NAG MUNDARI SIGN OJOD + {0x1E4EC, 0x1E4EF, prCM, gcMn}, // [4] NAG MUNDARI SIGN MUHOR..NAG MUNDARI SIGN SUTUH + {0x1E4F0, 0x1E4F9, prNU, gcNd}, // [10] NAG MUNDARI DIGIT ZERO..NAG MUNDARI DIGIT NINE {0x1E7E0, 0x1E7E6, prAL, gcLo}, // [7] ETHIOPIC SYLLABLE HHYA..ETHIOPIC SYLLABLE HHYO {0x1E7E8, 0x1E7EB, prAL, gcLo}, // [4] ETHIOPIC SYLLABLE GURAGE HHWA..ETHIOPIC SYLLABLE HHWE {0x1E7ED, 0x1E7EE, prAL, gcLo}, // [2] ETHIOPIC SYLLABLE GURAGE MWI..ETHIOPIC SYLLABLE GURAGE MWEE @@ -3412,16 +3453,18 @@ var lineBreakCodePoints = [][4]int{ {0x1F6C1, 0x1F6CB, prID, gcSo}, // [11] BATHTUB..COUCH AND LAMP {0x1F6CC, 0x1F6CC, prEB, gcSo}, // SLEEPING ACCOMMODATION {0x1F6CD, 0x1F6D7, prID, gcSo}, // [11] SHOPPING BAGS..ELEVATOR - {0x1F6D8, 0x1F6DC, prID, gcCn}, // [5] .. - {0x1F6DD, 0x1F6EC, prID, gcSo}, // [16] PLAYGROUND SLIDE..AIRPLANE ARRIVING + {0x1F6D8, 0x1F6DB, prID, gcCn}, // [4] .. + {0x1F6DC, 0x1F6EC, prID, gcSo}, // [17] WIRELESS..AIRPLANE ARRIVING {0x1F6ED, 0x1F6EF, prID, gcCn}, // [3] .. {0x1F6F0, 0x1F6FC, prID, gcSo}, // [13] SATELLITE..ROLLER SKATE {0x1F6FD, 0x1F6FF, prID, gcCn}, // [3] .. {0x1F700, 0x1F773, prAL, gcSo}, // [116] ALCHEMICAL SYMBOL FOR QUINTESSENCE..ALCHEMICAL SYMBOL FOR HALF OUNCE - {0x1F774, 0x1F77F, prID, gcCn}, // [12] .. + {0x1F774, 0x1F776, prID, gcSo}, // [3] LOT OF FORTUNE..LUNAR ECLIPSE + {0x1F777, 0x1F77A, prID, gcCn}, // [4] .. + {0x1F77B, 0x1F77F, prID, gcSo}, // [5] HAUMEA..ORCUS {0x1F780, 0x1F7D4, prAL, gcSo}, // [85] BLACK LEFT-POINTING ISOSCELES RIGHT TRIANGLE..HEAVY TWELVE POINTED PINWHEEL STAR - {0x1F7D5, 0x1F7D8, prID, gcSo}, // [4] CIRCLED TRIANGLE..NEGATIVE CIRCLED SQUARE - {0x1F7D9, 0x1F7DF, prID, gcCn}, // [7] .. + {0x1F7D5, 0x1F7D9, prID, gcSo}, // [5] CIRCLED TRIANGLE..NINE POINTED WHITE STAR + {0x1F7DA, 0x1F7DF, prID, gcCn}, // [6] .. {0x1F7E0, 0x1F7EB, prID, gcSo}, // [12] LARGE ORANGE CIRCLE..LARGE BROWN SQUARE {0x1F7EC, 0x1F7EF, prID, gcCn}, // [4] .. {0x1F7F0, 0x1F7F0, prID, gcSo}, // HEAVY EQUALS SIGN @@ -3467,33 +3510,29 @@ var lineBreakCodePoints = [][4]int{ {0x1FA54, 0x1FA5F, prID, gcCn}, // [12] .. {0x1FA60, 0x1FA6D, prID, gcSo}, // [14] XIANGQI RED GENERAL..XIANGQI BLACK SOLDIER {0x1FA6E, 0x1FA6F, prID, gcCn}, // [2] .. - {0x1FA70, 0x1FA74, prID, gcSo}, // [5] BALLET SHOES..THONG SANDAL - {0x1FA75, 0x1FA77, prID, gcCn}, // [3] .. - {0x1FA78, 0x1FA7C, prID, gcSo}, // [5] DROP OF BLOOD..CRUTCH + {0x1FA70, 0x1FA7C, prID, gcSo}, // [13] BALLET SHOES..CRUTCH {0x1FA7D, 0x1FA7F, prID, gcCn}, // [3] .. - {0x1FA80, 0x1FA86, prID, gcSo}, // [7] YO-YO..NESTING DOLLS - {0x1FA87, 0x1FA8F, prID, gcCn}, // [9] .. - {0x1FA90, 0x1FAAC, prID, gcSo}, // [29] RINGED PLANET..HAMSA - {0x1FAAD, 0x1FAAF, prID, gcCn}, // [3] .. - {0x1FAB0, 0x1FABA, prID, gcSo}, // [11] FLY..NEST WITH EGGS - {0x1FABB, 0x1FABF, prID, gcCn}, // [5] .. - {0x1FAC0, 0x1FAC2, prID, gcSo}, // [3] ANATOMICAL HEART..PEOPLE HUGGING + {0x1FA80, 0x1FA88, prID, gcSo}, // [9] YO-YO..FLUTE + {0x1FA89, 0x1FA8F, prID, gcCn}, // [7] .. + {0x1FA90, 0x1FABD, prID, gcSo}, // [46] RINGED PLANET..WING + {0x1FABE, 0x1FABE, prID, gcCn}, // + {0x1FABF, 0x1FAC2, prID, gcSo}, // [4] GOOSE..PEOPLE HUGGING {0x1FAC3, 0x1FAC5, prEB, gcSo}, // [3] PREGNANT MAN..PERSON WITH CROWN - {0x1FAC6, 0x1FACF, prID, gcCn}, // [10] .. - {0x1FAD0, 0x1FAD9, prID, gcSo}, // [10] BLUEBERRIES..JAR - {0x1FADA, 0x1FADF, prID, gcCn}, // [6] .. - {0x1FAE0, 0x1FAE7, prID, gcSo}, // [8] MELTING FACE..BUBBLES - {0x1FAE8, 0x1FAEF, prID, gcCn}, // [8] .. - {0x1FAF0, 0x1FAF6, prEB, gcSo}, // [7] HAND WITH INDEX FINGER AND THUMB CROSSED..HEART HANDS - {0x1FAF7, 0x1FAFF, prID, gcCn}, // [9] .. + {0x1FAC6, 0x1FACD, prID, gcCn}, // [8] .. + {0x1FACE, 0x1FADB, prID, gcSo}, // [14] MOOSE..PEA POD + {0x1FADC, 0x1FADF, prID, gcCn}, // [4] .. + {0x1FAE0, 0x1FAE8, prID, gcSo}, // [9] MELTING FACE..SHAKING FACE + {0x1FAE9, 0x1FAEF, prID, gcCn}, // [7] .. + {0x1FAF0, 0x1FAF8, prEB, gcSo}, // [9] HAND WITH INDEX FINGER AND THUMB CROSSED..RIGHTWARDS PUSHING HAND + {0x1FAF9, 0x1FAFF, prID, gcCn}, // [7] .. {0x1FB00, 0x1FB92, prAL, gcSo}, // [147] BLOCK SEXTANT-1..UPPER HALF INVERSE MEDIUM SHADE AND LOWER HALF BLOCK {0x1FB94, 0x1FBCA, prAL, gcSo}, // [55] LEFT HALF INVERSE MEDIUM SHADE AND RIGHT HALF BLOCK..WHITE UP-POINTING CHEVRON {0x1FBF0, 0x1FBF9, prNU, gcNd}, // [10] SEGMENTED DIGIT ZERO..SEGMENTED DIGIT NINE {0x1FC00, 0x1FFFD, prID, gcCn}, // [1022] .. {0x20000, 0x2A6DF, prID, gcLo}, // [42720] CJK UNIFIED IDEOGRAPH-20000..CJK UNIFIED IDEOGRAPH-2A6DF {0x2A6E0, 0x2A6FF, prID, gcCn}, // [32] .. - {0x2A700, 0x2B738, prID, gcLo}, // [4153] CJK UNIFIED IDEOGRAPH-2A700..CJK UNIFIED IDEOGRAPH-2B738 - {0x2B739, 0x2B73F, prID, gcCn}, // [7] .. + {0x2A700, 0x2B739, prID, gcLo}, // [4154] CJK UNIFIED IDEOGRAPH-2A700..CJK UNIFIED IDEOGRAPH-2B739 + {0x2B73A, 0x2B73F, prID, gcCn}, // [6] .. {0x2B740, 0x2B81D, prID, gcLo}, // [222] CJK UNIFIED IDEOGRAPH-2B740..CJK UNIFIED IDEOGRAPH-2B81D {0x2B81E, 0x2B81F, prID, gcCn}, // [2] .. {0x2B820, 0x2CEA1, prID, gcLo}, // [5762] CJK UNIFIED IDEOGRAPH-2B820..CJK UNIFIED IDEOGRAPH-2CEA1 @@ -3504,7 +3543,9 @@ var lineBreakCodePoints = [][4]int{ {0x2FA1E, 0x2FA1F, prID, gcCn}, // [2] .. {0x2FA20, 0x2FFFD, prID, gcCn}, // [1502] .. {0x30000, 0x3134A, prID, gcLo}, // [4939] CJK UNIFIED IDEOGRAPH-30000..CJK UNIFIED IDEOGRAPH-3134A - {0x3134B, 0x3FFFD, prID, gcCn}, // [60595] .. + {0x3134B, 0x3134F, prID, gcCn}, // [5] .. + {0x31350, 0x323AF, prID, gcLo}, // [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF + {0x323B0, 0x3FFFD, prID, gcCn}, // [56398] .. {0xE0001, 0xE0001, prCM, gcCf}, // LANGUAGE TAG {0xE0020, 0xE007F, prCM, gcCf}, // [96] TAG SPACE..CANCEL TAG {0xE0100, 0xE01EF, prCM, gcMn}, // [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 diff --git a/vendor/github.com/rivo/uniseg/linerules.go b/vendor/github.com/rivo/uniseg/linerules.go index d2ad5168..7708ae0f 100644 --- a/vendor/github.com/rivo/uniseg/linerules.go +++ b/vendor/github.com/rivo/uniseg/linerules.go @@ -64,222 +64,381 @@ const ( LineMustBreak // You must break the line here. ) -// The line break parser's state transitions. It's anologous to grTransitions, -// see comments there for details. Unicode version 14.0.0. -var lbTransitions = map[[2]int][3]int{ +// lbTransitions implements the line break parser's state transitions. It's +// anologous to [grTransitions], see comments there for details. +// +// Unicode version 15.0.0. +func lbTransitions(state, prop int) (newState, lineBreak, rule int) { + switch uint64(state) | uint64(prop)<<32 { // LB4. - {lbAny, prBK}: {lbBK, LineCanBreak, 310}, - {lbBK, prAny}: {lbAny, LineMustBreak, 40}, + case lbBK | prAny<<32: + return lbAny, LineMustBreak, 40 // LB5. - {lbAny, prCR}: {lbCR, LineCanBreak, 310}, - {lbAny, prLF}: {lbLF, LineCanBreak, 310}, - {lbAny, prNL}: {lbNL, LineCanBreak, 310}, - {lbCR, prLF}: {lbLF, LineDontBreak, 50}, - {lbCR, prAny}: {lbAny, LineMustBreak, 50}, - {lbLF, prAny}: {lbAny, LineMustBreak, 50}, - {lbNL, prAny}: {lbAny, LineMustBreak, 50}, + case lbCR | prLF<<32: + return lbLF, LineDontBreak, 50 + case lbCR | prAny<<32: + return lbAny, LineMustBreak, 50 + case lbLF | prAny<<32: + return lbAny, LineMustBreak, 50 + case lbNL | prAny<<32: + return lbAny, LineMustBreak, 50 // LB6. - {lbAny, prBK}: {lbBK, LineDontBreak, 60}, - {lbAny, prCR}: {lbCR, LineDontBreak, 60}, - {lbAny, prLF}: {lbLF, LineDontBreak, 60}, - {lbAny, prNL}: {lbNL, LineDontBreak, 60}, + case lbAny | prBK<<32: + return lbBK, LineDontBreak, 60 + case lbAny | prCR<<32: + return lbCR, LineDontBreak, 60 + case lbAny | prLF<<32: + return lbLF, LineDontBreak, 60 + case lbAny | prNL<<32: + return lbNL, LineDontBreak, 60 // LB7. - {lbAny, prSP}: {lbSP, LineDontBreak, 70}, - {lbAny, prZW}: {lbZW, LineDontBreak, 70}, + case lbAny | prSP<<32: + return lbSP, LineDontBreak, 70 + case lbAny | prZW<<32: + return lbZW, LineDontBreak, 70 // LB8. - {lbZW, prSP}: {lbZW, LineDontBreak, 70}, - {lbZW, prAny}: {lbAny, LineCanBreak, 80}, + case lbZW | prSP<<32: + return lbZW, LineDontBreak, 70 + case lbZW | prAny<<32: + return lbAny, LineCanBreak, 80 // LB11. - {lbAny, prWJ}: {lbWJ, LineDontBreak, 110}, - {lbWJ, prAny}: {lbAny, LineDontBreak, 110}, + case lbAny | prWJ<<32: + return lbWJ, LineDontBreak, 110 + case lbWJ | prAny<<32: + return lbAny, LineDontBreak, 110 // LB12. - {lbAny, prGL}: {lbGL, LineCanBreak, 310}, - {lbGL, prAny}: {lbAny, LineDontBreak, 120}, + case lbAny | prGL<<32: + return lbGL, LineCanBreak, 310 + case lbGL | prAny<<32: + return lbAny, LineDontBreak, 120 // LB13 (simple transitions). - {lbAny, prCL}: {lbCL, LineCanBreak, 310}, - {lbAny, prCP}: {lbCP, LineCanBreak, 310}, - {lbAny, prEX}: {lbEX, LineDontBreak, 130}, - {lbAny, prIS}: {lbIS, LineCanBreak, 310}, - {lbAny, prSY}: {lbSY, LineCanBreak, 310}, + case lbAny | prCL<<32: + return lbCL, LineCanBreak, 310 + case lbAny | prCP<<32: + return lbCP, LineCanBreak, 310 + case lbAny | prEX<<32: + return lbEX, LineDontBreak, 130 + case lbAny | prIS<<32: + return lbIS, LineCanBreak, 310 + case lbAny | prSY<<32: + return lbSY, LineCanBreak, 310 // LB14. - {lbAny, prOP}: {lbOP, LineCanBreak, 310}, - {lbOP, prSP}: {lbOP, LineDontBreak, 70}, - {lbOP, prAny}: {lbAny, LineDontBreak, 140}, + case lbAny | prOP<<32: + return lbOP, LineCanBreak, 310 + case lbOP | prSP<<32: + return lbOP, LineDontBreak, 70 + case lbOP | prAny<<32: + return lbAny, LineDontBreak, 140 // LB15. - {lbQU, prSP}: {lbQUSP, LineDontBreak, 70}, - {lbQU, prOP}: {lbOP, LineDontBreak, 150}, - {lbQUSP, prOP}: {lbOP, LineDontBreak, 150}, + case lbQU | prSP<<32: + return lbQUSP, LineDontBreak, 70 + case lbQU | prOP<<32: + return lbOP, LineDontBreak, 150 + case lbQUSP | prOP<<32: + return lbOP, LineDontBreak, 150 // LB16. - {lbCL, prSP}: {lbCLCPSP, LineDontBreak, 70}, - {lbNUCL, prSP}: {lbCLCPSP, LineDontBreak, 70}, - {lbCP, prSP}: {lbCLCPSP, LineDontBreak, 70}, - {lbNUCP, prSP}: {lbCLCPSP, LineDontBreak, 70}, - {lbCL, prNS}: {lbNS, LineDontBreak, 160}, - {lbNUCL, prNS}: {lbNS, LineDontBreak, 160}, - {lbCP, prNS}: {lbNS, LineDontBreak, 160}, - {lbNUCP, prNS}: {lbNS, LineDontBreak, 160}, - {lbCLCPSP, prNS}: {lbNS, LineDontBreak, 160}, + case lbCL | prSP<<32: + return lbCLCPSP, LineDontBreak, 70 + case lbNUCL | prSP<<32: + return lbCLCPSP, LineDontBreak, 70 + case lbCP | prSP<<32: + return lbCLCPSP, LineDontBreak, 70 + case lbNUCP | prSP<<32: + return lbCLCPSP, LineDontBreak, 70 + case lbCL | prNS<<32: + return lbNS, LineDontBreak, 160 + case lbNUCL | prNS<<32: + return lbNS, LineDontBreak, 160 + case lbCP | prNS<<32: + return lbNS, LineDontBreak, 160 + case lbNUCP | prNS<<32: + return lbNS, LineDontBreak, 160 + case lbCLCPSP | prNS<<32: + return lbNS, LineDontBreak, 160 // LB17. - {lbAny, prB2}: {lbB2, LineCanBreak, 310}, - {lbB2, prSP}: {lbB2SP, LineDontBreak, 70}, - {lbB2, prB2}: {lbB2, LineDontBreak, 170}, - {lbB2SP, prB2}: {lbB2, LineDontBreak, 170}, + case lbAny | prB2<<32: + return lbB2, LineCanBreak, 310 + case lbB2 | prSP<<32: + return lbB2SP, LineDontBreak, 70 + case lbB2 | prB2<<32: + return lbB2, LineDontBreak, 170 + case lbB2SP | prB2<<32: + return lbB2, LineDontBreak, 170 // LB18. - {lbSP, prAny}: {lbAny, LineCanBreak, 180}, - {lbQUSP, prAny}: {lbAny, LineCanBreak, 180}, - {lbCLCPSP, prAny}: {lbAny, LineCanBreak, 180}, - {lbB2SP, prAny}: {lbAny, LineCanBreak, 180}, + case lbSP | prAny<<32: + return lbAny, LineCanBreak, 180 + case lbQUSP | prAny<<32: + return lbAny, LineCanBreak, 180 + case lbCLCPSP | prAny<<32: + return lbAny, LineCanBreak, 180 + case lbB2SP | prAny<<32: + return lbAny, LineCanBreak, 180 // LB19. - {lbAny, prQU}: {lbQU, LineDontBreak, 190}, - {lbQU, prAny}: {lbAny, LineDontBreak, 190}, + case lbAny | prQU<<32: + return lbQU, LineDontBreak, 190 + case lbQU | prAny<<32: + return lbAny, LineDontBreak, 190 // LB20. - {lbAny, prCB}: {lbCB, LineCanBreak, 200}, - {lbCB, prAny}: {lbAny, LineCanBreak, 200}, + case lbAny | prCB<<32: + return lbCB, LineCanBreak, 200 + case lbCB | prAny<<32: + return lbAny, LineCanBreak, 200 // LB21. - {lbAny, prBA}: {lbBA, LineDontBreak, 210}, - {lbAny, prHY}: {lbHY, LineDontBreak, 210}, - {lbAny, prNS}: {lbNS, LineDontBreak, 210}, - {lbAny, prBB}: {lbBB, LineCanBreak, 310}, - {lbBB, prAny}: {lbAny, LineDontBreak, 210}, + case lbAny | prBA<<32: + return lbBA, LineDontBreak, 210 + case lbAny | prHY<<32: + return lbHY, LineDontBreak, 210 + case lbAny | prNS<<32: + return lbNS, LineDontBreak, 210 + case lbAny | prBB<<32: + return lbBB, LineCanBreak, 310 + case lbBB | prAny<<32: + return lbAny, LineDontBreak, 210 // LB21a. - {lbAny, prHL}: {lbHL, LineCanBreak, 310}, - {lbHL, prHY}: {lbLB21a, LineDontBreak, 210}, - {lbHL, prBA}: {lbLB21a, LineDontBreak, 210}, - {lbLB21a, prAny}: {lbAny, LineDontBreak, 211}, + case lbAny | prHL<<32: + return lbHL, LineCanBreak, 310 + case lbHL | prHY<<32: + return lbLB21a, LineDontBreak, 210 + case lbHL | prBA<<32: + return lbLB21a, LineDontBreak, 210 + case lbLB21a | prAny<<32: + return lbAny, LineDontBreak, 211 // LB21b. - {lbSY, prHL}: {lbHL, LineDontBreak, 212}, - {lbNUSY, prHL}: {lbHL, LineDontBreak, 212}, + case lbSY | prHL<<32: + return lbHL, LineDontBreak, 212 + case lbNUSY | prHL<<32: + return lbHL, LineDontBreak, 212 // LB22. - {lbAny, prIN}: {lbAny, LineDontBreak, 220}, + case lbAny | prIN<<32: + return lbAny, LineDontBreak, 220 // LB23. - {lbAny, prAL}: {lbAL, LineCanBreak, 310}, - {lbAny, prNU}: {lbNU, LineCanBreak, 310}, - {lbAL, prNU}: {lbNU, LineDontBreak, 230}, - {lbHL, prNU}: {lbNU, LineDontBreak, 230}, - {lbNU, prAL}: {lbAL, LineDontBreak, 230}, - {lbNU, prHL}: {lbHL, LineDontBreak, 230}, - {lbNUNU, prAL}: {lbAL, LineDontBreak, 230}, - {lbNUNU, prHL}: {lbHL, LineDontBreak, 230}, + case lbAny | prAL<<32: + return lbAL, LineCanBreak, 310 + case lbAny | prNU<<32: + return lbNU, LineCanBreak, 310 + case lbAL | prNU<<32: + return lbNU, LineDontBreak, 230 + case lbHL | prNU<<32: + return lbNU, LineDontBreak, 230 + case lbNU | prAL<<32: + return lbAL, LineDontBreak, 230 + case lbNU | prHL<<32: + return lbHL, LineDontBreak, 230 + case lbNUNU | prAL<<32: + return lbAL, LineDontBreak, 230 + case lbNUNU | prHL<<32: + return lbHL, LineDontBreak, 230 // LB23a. - {lbAny, prPR}: {lbPR, LineCanBreak, 310}, - {lbAny, prID}: {lbIDEM, LineCanBreak, 310}, - {lbAny, prEB}: {lbEB, LineCanBreak, 310}, - {lbAny, prEM}: {lbIDEM, LineCanBreak, 310}, - {lbPR, prID}: {lbIDEM, LineDontBreak, 231}, - {lbPR, prEB}: {lbEB, LineDontBreak, 231}, - {lbPR, prEM}: {lbIDEM, LineDontBreak, 231}, - {lbIDEM, prPO}: {lbPO, LineDontBreak, 231}, - {lbEB, prPO}: {lbPO, LineDontBreak, 231}, + case lbAny | prPR<<32: + return lbPR, LineCanBreak, 310 + case lbAny | prID<<32: + return lbIDEM, LineCanBreak, 310 + case lbAny | prEB<<32: + return lbEB, LineCanBreak, 310 + case lbAny | prEM<<32: + return lbIDEM, LineCanBreak, 310 + case lbPR | prID<<32: + return lbIDEM, LineDontBreak, 231 + case lbPR | prEB<<32: + return lbEB, LineDontBreak, 231 + case lbPR | prEM<<32: + return lbIDEM, LineDontBreak, 231 + case lbIDEM | prPO<<32: + return lbPO, LineDontBreak, 231 + case lbEB | prPO<<32: + return lbPO, LineDontBreak, 231 // LB24. - {lbAny, prPO}: {lbPO, LineCanBreak, 310}, - {lbPR, prAL}: {lbAL, LineDontBreak, 240}, - {lbPR, prHL}: {lbHL, LineDontBreak, 240}, - {lbPO, prAL}: {lbAL, LineDontBreak, 240}, - {lbPO, prHL}: {lbHL, LineDontBreak, 240}, - {lbAL, prPR}: {lbPR, LineDontBreak, 240}, - {lbAL, prPO}: {lbPO, LineDontBreak, 240}, - {lbHL, prPR}: {lbPR, LineDontBreak, 240}, - {lbHL, prPO}: {lbPO, LineDontBreak, 240}, + case lbAny | prPO<<32: + return lbPO, LineCanBreak, 310 + case lbPR | prAL<<32: + return lbAL, LineDontBreak, 240 + case lbPR | prHL<<32: + return lbHL, LineDontBreak, 240 + case lbPO | prAL<<32: + return lbAL, LineDontBreak, 240 + case lbPO | prHL<<32: + return lbHL, LineDontBreak, 240 + case lbAL | prPR<<32: + return lbPR, LineDontBreak, 240 + case lbAL | prPO<<32: + return lbPO, LineDontBreak, 240 + case lbHL | prPR<<32: + return lbPR, LineDontBreak, 240 + case lbHL | prPO<<32: + return lbPO, LineDontBreak, 240 // LB25 (simple transitions). - {lbPR, prNU}: {lbNU, LineDontBreak, 250}, - {lbPO, prNU}: {lbNU, LineDontBreak, 250}, - {lbOP, prNU}: {lbNU, LineDontBreak, 250}, - {lbHY, prNU}: {lbNU, LineDontBreak, 250}, - {lbNU, prNU}: {lbNUNU, LineDontBreak, 250}, - {lbNU, prSY}: {lbNUSY, LineDontBreak, 250}, - {lbNU, prIS}: {lbNUIS, LineDontBreak, 250}, - {lbNUNU, prNU}: {lbNUNU, LineDontBreak, 250}, - {lbNUNU, prSY}: {lbNUSY, LineDontBreak, 250}, - {lbNUNU, prIS}: {lbNUIS, LineDontBreak, 250}, - {lbNUSY, prNU}: {lbNUNU, LineDontBreak, 250}, - {lbNUSY, prSY}: {lbNUSY, LineDontBreak, 250}, - {lbNUSY, prIS}: {lbNUIS, LineDontBreak, 250}, - {lbNUIS, prNU}: {lbNUNU, LineDontBreak, 250}, - {lbNUIS, prSY}: {lbNUSY, LineDontBreak, 250}, - {lbNUIS, prIS}: {lbNUIS, LineDontBreak, 250}, - {lbNU, prCL}: {lbNUCL, LineDontBreak, 250}, - {lbNU, prCP}: {lbNUCP, LineDontBreak, 250}, - {lbNUNU, prCL}: {lbNUCL, LineDontBreak, 250}, - {lbNUNU, prCP}: {lbNUCP, LineDontBreak, 250}, - {lbNUSY, prCL}: {lbNUCL, LineDontBreak, 250}, - {lbNUSY, prCP}: {lbNUCP, LineDontBreak, 250}, - {lbNUIS, prCL}: {lbNUCL, LineDontBreak, 250}, - {lbNUIS, prCP}: {lbNUCP, LineDontBreak, 250}, - {lbNU, prPO}: {lbPO, LineDontBreak, 250}, - {lbNUNU, prPO}: {lbPO, LineDontBreak, 250}, - {lbNUSY, prPO}: {lbPO, LineDontBreak, 250}, - {lbNUIS, prPO}: {lbPO, LineDontBreak, 250}, - {lbNUCL, prPO}: {lbPO, LineDontBreak, 250}, - {lbNUCP, prPO}: {lbPO, LineDontBreak, 250}, - {lbNU, prPR}: {lbPR, LineDontBreak, 250}, - {lbNUNU, prPR}: {lbPR, LineDontBreak, 250}, - {lbNUSY, prPR}: {lbPR, LineDontBreak, 250}, - {lbNUIS, prPR}: {lbPR, LineDontBreak, 250}, - {lbNUCL, prPR}: {lbPR, LineDontBreak, 250}, - {lbNUCP, prPR}: {lbPR, LineDontBreak, 250}, + case lbPR | prNU<<32: + return lbNU, LineDontBreak, 250 + case lbPO | prNU<<32: + return lbNU, LineDontBreak, 250 + case lbOP | prNU<<32: + return lbNU, LineDontBreak, 250 + case lbHY | prNU<<32: + return lbNU, LineDontBreak, 250 + case lbNU | prNU<<32: + return lbNUNU, LineDontBreak, 250 + case lbNU | prSY<<32: + return lbNUSY, LineDontBreak, 250 + case lbNU | prIS<<32: + return lbNUIS, LineDontBreak, 250 + case lbNUNU | prNU<<32: + return lbNUNU, LineDontBreak, 250 + case lbNUNU | prSY<<32: + return lbNUSY, LineDontBreak, 250 + case lbNUNU | prIS<<32: + return lbNUIS, LineDontBreak, 250 + case lbNUSY | prNU<<32: + return lbNUNU, LineDontBreak, 250 + case lbNUSY | prSY<<32: + return lbNUSY, LineDontBreak, 250 + case lbNUSY | prIS<<32: + return lbNUIS, LineDontBreak, 250 + case lbNUIS | prNU<<32: + return lbNUNU, LineDontBreak, 250 + case lbNUIS | prSY<<32: + return lbNUSY, LineDontBreak, 250 + case lbNUIS | prIS<<32: + return lbNUIS, LineDontBreak, 250 + case lbNU | prCL<<32: + return lbNUCL, LineDontBreak, 250 + case lbNU | prCP<<32: + return lbNUCP, LineDontBreak, 250 + case lbNUNU | prCL<<32: + return lbNUCL, LineDontBreak, 250 + case lbNUNU | prCP<<32: + return lbNUCP, LineDontBreak, 250 + case lbNUSY | prCL<<32: + return lbNUCL, LineDontBreak, 250 + case lbNUSY | prCP<<32: + return lbNUCP, LineDontBreak, 250 + case lbNUIS | prCL<<32: + return lbNUCL, LineDontBreak, 250 + case lbNUIS | prCP<<32: + return lbNUCP, LineDontBreak, 250 + case lbNU | prPO<<32: + return lbPO, LineDontBreak, 250 + case lbNUNU | prPO<<32: + return lbPO, LineDontBreak, 250 + case lbNUSY | prPO<<32: + return lbPO, LineDontBreak, 250 + case lbNUIS | prPO<<32: + return lbPO, LineDontBreak, 250 + case lbNUCL | prPO<<32: + return lbPO, LineDontBreak, 250 + case lbNUCP | prPO<<32: + return lbPO, LineDontBreak, 250 + case lbNU | prPR<<32: + return lbPR, LineDontBreak, 250 + case lbNUNU | prPR<<32: + return lbPR, LineDontBreak, 250 + case lbNUSY | prPR<<32: + return lbPR, LineDontBreak, 250 + case lbNUIS | prPR<<32: + return lbPR, LineDontBreak, 250 + case lbNUCL | prPR<<32: + return lbPR, LineDontBreak, 250 + case lbNUCP | prPR<<32: + return lbPR, LineDontBreak, 250 // LB26. - {lbAny, prJL}: {lbJL, LineCanBreak, 310}, - {lbAny, prJV}: {lbJV, LineCanBreak, 310}, - {lbAny, prJT}: {lbJT, LineCanBreak, 310}, - {lbAny, prH2}: {lbH2, LineCanBreak, 310}, - {lbAny, prH3}: {lbH3, LineCanBreak, 310}, - {lbJL, prJL}: {lbJL, LineDontBreak, 260}, - {lbJL, prJV}: {lbJV, LineDontBreak, 260}, - {lbJL, prH2}: {lbH2, LineDontBreak, 260}, - {lbJL, prH3}: {lbH3, LineDontBreak, 260}, - {lbJV, prJV}: {lbJV, LineDontBreak, 260}, - {lbJV, prJT}: {lbJT, LineDontBreak, 260}, - {lbH2, prJV}: {lbJV, LineDontBreak, 260}, - {lbH2, prJT}: {lbJT, LineDontBreak, 260}, - {lbJT, prJT}: {lbJT, LineDontBreak, 260}, - {lbH3, prJT}: {lbJT, LineDontBreak, 260}, + case lbAny | prJL<<32: + return lbJL, LineCanBreak, 310 + case lbAny | prJV<<32: + return lbJV, LineCanBreak, 310 + case lbAny | prJT<<32: + return lbJT, LineCanBreak, 310 + case lbAny | prH2<<32: + return lbH2, LineCanBreak, 310 + case lbAny | prH3<<32: + return lbH3, LineCanBreak, 310 + case lbJL | prJL<<32: + return lbJL, LineDontBreak, 260 + case lbJL | prJV<<32: + return lbJV, LineDontBreak, 260 + case lbJL | prH2<<32: + return lbH2, LineDontBreak, 260 + case lbJL | prH3<<32: + return lbH3, LineDontBreak, 260 + case lbJV | prJV<<32: + return lbJV, LineDontBreak, 260 + case lbJV | prJT<<32: + return lbJT, LineDontBreak, 260 + case lbH2 | prJV<<32: + return lbJV, LineDontBreak, 260 + case lbH2 | prJT<<32: + return lbJT, LineDontBreak, 260 + case lbJT | prJT<<32: + return lbJT, LineDontBreak, 260 + case lbH3 | prJT<<32: + return lbJT, LineDontBreak, 260 // LB27. - {lbJL, prPO}: {lbPO, LineDontBreak, 270}, - {lbJV, prPO}: {lbPO, LineDontBreak, 270}, - {lbJT, prPO}: {lbPO, LineDontBreak, 270}, - {lbH2, prPO}: {lbPO, LineDontBreak, 270}, - {lbH3, prPO}: {lbPO, LineDontBreak, 270}, - {lbPR, prJL}: {lbJL, LineDontBreak, 270}, - {lbPR, prJV}: {lbJV, LineDontBreak, 270}, - {lbPR, prJT}: {lbJT, LineDontBreak, 270}, - {lbPR, prH2}: {lbH2, LineDontBreak, 270}, - {lbPR, prH3}: {lbH3, LineDontBreak, 270}, + case lbJL | prPO<<32: + return lbPO, LineDontBreak, 270 + case lbJV | prPO<<32: + return lbPO, LineDontBreak, 270 + case lbJT | prPO<<32: + return lbPO, LineDontBreak, 270 + case lbH2 | prPO<<32: + return lbPO, LineDontBreak, 270 + case lbH3 | prPO<<32: + return lbPO, LineDontBreak, 270 + case lbPR | prJL<<32: + return lbJL, LineDontBreak, 270 + case lbPR | prJV<<32: + return lbJV, LineDontBreak, 270 + case lbPR | prJT<<32: + return lbJT, LineDontBreak, 270 + case lbPR | prH2<<32: + return lbH2, LineDontBreak, 270 + case lbPR | prH3<<32: + return lbH3, LineDontBreak, 270 // LB28. - {lbAL, prAL}: {lbAL, LineDontBreak, 280}, - {lbAL, prHL}: {lbHL, LineDontBreak, 280}, - {lbHL, prAL}: {lbAL, LineDontBreak, 280}, - {lbHL, prHL}: {lbHL, LineDontBreak, 280}, + case lbAL | prAL<<32: + return lbAL, LineDontBreak, 280 + case lbAL | prHL<<32: + return lbHL, LineDontBreak, 280 + case lbHL | prAL<<32: + return lbAL, LineDontBreak, 280 + case lbHL | prHL<<32: + return lbHL, LineDontBreak, 280 // LB29. - {lbIS, prAL}: {lbAL, LineDontBreak, 290}, - {lbIS, prHL}: {lbHL, LineDontBreak, 290}, - {lbNUIS, prAL}: {lbAL, LineDontBreak, 290}, - {lbNUIS, prHL}: {lbHL, LineDontBreak, 290}, + case lbIS | prAL<<32: + return lbAL, LineDontBreak, 290 + case lbIS | prHL<<32: + return lbHL, LineDontBreak, 290 + case lbNUIS | prAL<<32: + return lbAL, LineDontBreak, 290 + case lbNUIS | prHL<<32: + return lbHL, LineDontBreak, 290 + + default: + return -1, -1, -1 + } } // transitionLineBreakState determines the new state of the line break parser @@ -290,7 +449,7 @@ var lbTransitions = map[[2]int][3]int{ // further lookups. func transitionLineBreakState(state int, r rune, b []byte, str string) (newState int, lineBreak int) { // Determine the property of the next character. - nextProperty, generalCategory := propertyWithGenCat(lineBreakCodePoints, r) + nextProperty, generalCategory := propertyLineBreak(r) // Prepare. var forceNoBreak, isCPeaFWH bool @@ -306,7 +465,7 @@ func transitionLineBreakState(state int, r rune, b []byte, str string) (newState defer func() { // Transition into LB30. if newState == lbCP || newState == lbNUCP { - ea := property(eastAsianWidth, r) + ea := propertyEastAsianWidth(r) if ea != prF && ea != prW && ea != prH { newState |= lbCPeaFWHBit } @@ -352,30 +511,27 @@ func transitionLineBreakState(state int, r rune, b []byte, str string) (newState // Find the applicable transition in the table. var rule int - transition, ok := lbTransitions[[2]int{state, nextProperty}] - if ok { - // We have a specific transition. We'll use it. - newState, lineBreak, rule = transition[0], transition[1], transition[2] - } else { + newState, lineBreak, rule = lbTransitions(state, nextProperty) + if newState < 0 { // No specific transition found. Try the less specific ones. - transAnyProp, okAnyProp := lbTransitions[[2]int{state, prAny}] - transAnyState, okAnyState := lbTransitions[[2]int{lbAny, nextProperty}] - if okAnyProp && okAnyState { + anyPropProp, anyPropLineBreak, anyPropRule := lbTransitions(state, prAny) + anyStateProp, anyStateLineBreak, anyStateRule := lbTransitions(lbAny, nextProperty) + if anyPropProp >= 0 && anyStateProp >= 0 { // Both apply. We'll use a mix (see comments for grTransitions). - newState, lineBreak, rule = transAnyState[0], transAnyState[1], transAnyState[2] - if transAnyProp[2] < transAnyState[2] { - lineBreak, rule = transAnyProp[1], transAnyProp[2] + newState, lineBreak, rule = anyStateProp, anyStateLineBreak, anyStateRule + if anyPropRule < anyStateRule { + lineBreak, rule = anyPropLineBreak, anyPropRule } - } else if okAnyProp { + } else if anyPropProp >= 0 { // We only have a specific state. - newState, lineBreak, rule = transAnyProp[0], transAnyProp[1], transAnyProp[2] + newState, lineBreak, rule = anyPropProp, anyPropLineBreak, anyPropRule // This branch will probably never be reached because okAnyState will // always be true given the current transition map. But we keep it here // for future modifications to the transition map where this may not be // true anymore. - } else if okAnyState { + } else if anyStateProp >= 0 { // We only have a specific property. - newState, lineBreak, rule = transAnyState[0], transAnyState[1], transAnyState[2] + newState, lineBreak, rule = anyStateProp, anyStateLineBreak, anyStateRule } else { // No known transition. LB31: ALL Γ· ALL. newState, lineBreak, rule = lbAny, LineCanBreak, 310 @@ -414,7 +570,7 @@ func transitionLineBreakState(state int, r rune, b []byte, str string) (newState r, _ = utf8.DecodeRuneInString(str) } if r != utf8.RuneError { - pr, _ := propertyWithGenCat(lineBreakCodePoints, r) + pr, _ := propertyLineBreak(r) if pr == prNU { return lbNU, LineDontBreak } @@ -424,7 +580,7 @@ func transitionLineBreakState(state int, r rune, b []byte, str string) (newState // LB30 (part one). if rule > 300 { if (state == lbAL || state == lbHL || state == lbNU || state == lbNUNU) && nextProperty == prOP { - ea := property(eastAsianWidth, r) + ea := propertyEastAsianWidth(r) if ea != prF && ea != prW && ea != prH { return lbOP, LineDontBreak } @@ -460,7 +616,7 @@ func transitionLineBreakState(state int, r rune, b []byte, str string) (newState return prAny, LineDontBreak } } - graphemeProperty := property(graphemeCodePoints, r) + graphemeProperty := propertyGraphemes(r) if graphemeProperty == prExtendedPictographic && generalCategory == gcCn { return lbExtPicCn, LineCanBreak } diff --git a/vendor/github.com/rivo/uniseg/properties.go b/vendor/github.com/rivo/uniseg/properties.go index bc3c7bcf..6290e681 100644 --- a/vendor/github.com/rivo/uniseg/properties.go +++ b/vendor/github.com/rivo/uniseg/properties.go @@ -160,9 +160,49 @@ func property(dictionary [][3]int, r rune) int { return propertySearch(dictionary, r)[2] } -// propertyWithGenCat returns the Unicode property value and General Category -// (see constants above) of the given code point. -func propertyWithGenCat(dictionary [][4]int, r rune) (property, generalCategory int) { - entry := propertySearch(dictionary, r) +// propertyLineBreak returns the Unicode property value and General Category +// (see constants above) of the given code point, as listed in the line break +// code points table, while fast tracking ASCII digits and letters. +func propertyLineBreak(r rune) (property, generalCategory int) { + if r >= 'a' && r <= 'z' { + return prAL, gcLl + } + if r >= 'A' && r <= 'Z' { + return prAL, gcLu + } + if r >= '0' && r <= '9' { + return prNU, gcNd + } + entry := propertySearch(lineBreakCodePoints, r) return entry[2], entry[3] } + +// propertyGraphemes returns the Unicode grapheme cluster property value of the +// given code point while fast tracking ASCII characters. +func propertyGraphemes(r rune) int { + if r >= 0x20 && r <= 0x7e { + return prAny + } + if r == 0x0a { + return prLF + } + if r == 0x0d { + return prCR + } + if r >= 0 && r <= 0x1f || r == 0x7f { + return prControl + } + return property(graphemeCodePoints, r) +} + +// propertyEastAsianWidth returns the Unicode East Asian Width property value of +// the given code point while fast tracking ASCII characters. +func propertyEastAsianWidth(r rune) int { + if r >= 0x20 && r <= 0x7e { + return prNa + } + if r >= 0 && r <= 0x1f || r == 0x7f { + return prN + } + return property(eastAsianWidth, r) +} diff --git a/vendor/github.com/rivo/uniseg/sentenceproperties.go b/vendor/github.com/rivo/uniseg/sentenceproperties.go index ba0cf2de..67717ec1 100644 --- a/vendor/github.com/rivo/uniseg/sentenceproperties.go +++ b/vendor/github.com/rivo/uniseg/sentenceproperties.go @@ -1,13 +1,13 @@ -package uniseg - // Code generated via go generate from gen_properties.go. DO NOT EDIT. +package uniseg + // sentenceBreakCodePoints are taken from -// https://www.unicode.org/Public/14.0.0/ucd/auxiliary/SentenceBreakProperty.txt +// https://www.unicode.org/Public/15.0.0/ucd/auxiliary/SentenceBreakProperty.txt // and -// https://unicode.org/Public/14.0.0/ucd/emoji/emoji-data.txt +// https://unicode.org/Public/15.0.0/ucd/emoji/emoji-data.txt // ("Extended_Pictographic" only) -// on September 10, 2022. See https://www.unicode.org/license.html for the Unicode +// on September 5, 2023. See https://www.unicode.org/license.html for the Unicode // license agreement. var sentenceBreakCodePoints = [][3]int{ {0x0009, 0x0009, prSp}, // Cc @@ -843,6 +843,7 @@ var sentenceBreakCodePoints = [][3]int{ {0x0CE2, 0x0CE3, prExtend}, // Mn [2] KANNADA VOWEL SIGN VOCALIC L..KANNADA VOWEL SIGN VOCALIC LL {0x0CE6, 0x0CEF, prNumeric}, // Nd [10] KANNADA DIGIT ZERO..KANNADA DIGIT NINE {0x0CF1, 0x0CF2, prOLetter}, // Lo [2] KANNADA SIGN JIHVAMULIYA..KANNADA SIGN UPADHMANIYA + {0x0CF3, 0x0CF3, prExtend}, // Mc KANNADA SIGN COMBINING ANUSVARA ABOVE RIGHT {0x0D00, 0x0D01, prExtend}, // Mn [2] MALAYALAM SIGN COMBINING ANUSVARA ABOVE..MALAYALAM SIGN CANDRABINDU {0x0D02, 0x0D03, prExtend}, // Mc [2] MALAYALAM SIGN ANUSVARA..MALAYALAM SIGN VISARGA {0x0D04, 0x0D0C, prOLetter}, // Lo [9] MALAYALAM LETTER VEDIC ANUSVARA..MALAYALAM LETTER VOCALIC L @@ -896,7 +897,7 @@ var sentenceBreakCodePoints = [][3]int{ {0x0EBD, 0x0EBD, prOLetter}, // Lo LAO SEMIVOWEL SIGN NYO {0x0EC0, 0x0EC4, prOLetter}, // Lo [5] LAO VOWEL SIGN E..LAO VOWEL SIGN AI {0x0EC6, 0x0EC6, prOLetter}, // Lm LAO KO LA - {0x0EC8, 0x0ECD, prExtend}, // Mn [6] LAO TONE MAI EK..LAO NIGGAHITA + {0x0EC8, 0x0ECE, prExtend}, // Mn [7] LAO TONE MAI EK..LAO YAMAKKAN {0x0ED0, 0x0ED9, prNumeric}, // Nd [10] LAO DIGIT ZERO..LAO DIGIT NINE {0x0EDC, 0x0EDF, prOLetter}, // Lo [4] LAO HO NO..LAO LETTER KHMU NYO {0x0F00, 0x0F00, prOLetter}, // Lo TIBETAN SYLLABLE OM @@ -958,7 +959,7 @@ var sentenceBreakCodePoints = [][3]int{ {0x10C7, 0x10C7, prUpper}, // L& GEORGIAN CAPITAL LETTER YN {0x10CD, 0x10CD, prUpper}, // L& GEORGIAN CAPITAL LETTER AEN {0x10D0, 0x10FA, prOLetter}, // L& [43] GEORGIAN LETTER AN..GEORGIAN LETTER AIN - {0x10FC, 0x10FC, prOLetter}, // Lm MODIFIER LETTER GEORGIAN NAR + {0x10FC, 0x10FC, prLower}, // Lm MODIFIER LETTER GEORGIAN NAR {0x10FD, 0x10FF, prOLetter}, // L& [3] GEORGIAN LETTER AEN..GEORGIAN LETTER LABIAL SIGN {0x1100, 0x1248, prOLetter}, // Lo [329] HANGUL CHOSEONG KIYEOK..ETHIOPIC SYLLABLE QWA {0x124A, 0x124D, prOLetter}, // Lo [4] ETHIOPIC SYLLABLE QWI..ETHIOPIC SYLLABLE QWE @@ -2034,7 +2035,7 @@ var sentenceBreakCodePoints = [][3]int{ {0xA7D7, 0xA7D7, prLower}, // L& LATIN SMALL LETTER MIDDLE SCOTS S {0xA7D8, 0xA7D8, prUpper}, // L& LATIN CAPITAL LETTER SIGMOID S {0xA7D9, 0xA7D9, prLower}, // L& LATIN SMALL LETTER SIGMOID S - {0xA7F2, 0xA7F4, prOLetter}, // Lm [3] MODIFIER LETTER CAPITAL C..MODIFIER LETTER CAPITAL Q + {0xA7F2, 0xA7F4, prLower}, // Lm [3] MODIFIER LETTER CAPITAL C..MODIFIER LETTER CAPITAL Q {0xA7F5, 0xA7F5, prUpper}, // L& LATIN CAPITAL LETTER REVERSED HALF H {0xA7F6, 0xA7F6, prLower}, // L& LATIN SMALL LETTER REVERSED HALF H {0xA7F7, 0xA7F7, prOLetter}, // Lo LATIN EPIGRAPHIC LETTER SIDEWAYS I @@ -2140,7 +2141,7 @@ var sentenceBreakCodePoints = [][3]int{ {0xAB30, 0xAB5A, prLower}, // L& [43] LATIN SMALL LETTER BARRED ALPHA..LATIN SMALL LETTER Y WITH SHORT RIGHT LEG {0xAB5C, 0xAB5F, prLower}, // Lm [4] MODIFIER LETTER SMALL HENG..MODIFIER LETTER SMALL U WITH LEFT HOOK {0xAB60, 0xAB68, prLower}, // L& [9] LATIN SMALL LETTER SAKHA YAT..LATIN SMALL LETTER TURNED R WITH MIDDLE TILDE - {0xAB69, 0xAB69, prOLetter}, // Lm MODIFIER LETTER SMALL TURNED W + {0xAB69, 0xAB69, prLower}, // Lm MODIFIER LETTER SMALL TURNED W {0xAB70, 0xABBF, prLower}, // L& [80] CHEROKEE SMALL LETTER A..CHEROKEE SMALL LETTER YA {0xABC0, 0xABE2, prOLetter}, // Lo [35] MEETEI MAYEK LETTER KOK..MEETEI MAYEK LETTER I LONSUM {0xABE3, 0xABE4, prExtend}, // Mc [2] MEETEI MAYEK VOWEL SIGN ONAP..MEETEI MAYEK VOWEL SIGN INAP @@ -2334,6 +2335,7 @@ var sentenceBreakCodePoints = [][3]int{ {0x10E80, 0x10EA9, prOLetter}, // Lo [42] YEZIDI LETTER ELIF..YEZIDI LETTER ET {0x10EAB, 0x10EAC, prExtend}, // Mn [2] YEZIDI COMBINING HAMZA MARK..YEZIDI COMBINING MADDA MARK {0x10EB0, 0x10EB1, prOLetter}, // Lo [2] YEZIDI LETTER LAM WITH DOT ABOVE..YEZIDI LETTER YOT WITH CIRCUMFLEX ABOVE + {0x10EFD, 0x10EFF, prExtend}, // Mn [3] ARABIC SMALL LOW WORD SAKTA..ARABIC SMALL LOW WORD MADDA {0x10F00, 0x10F1C, prOLetter}, // Lo [29] OLD SOGDIAN LETTER ALEPH..OLD SOGDIAN LETTER FINAL TAW WITH VERTICAL TAIL {0x10F27, 0x10F27, prOLetter}, // Lo OLD SOGDIAN LIGATURE AYIN-DALETH {0x10F30, 0x10F45, prOLetter}, // Lo [22] SOGDIAN LETTER ALEPH..SOGDIAN INDEPENDENT SHIN @@ -2408,6 +2410,8 @@ var sentenceBreakCodePoints = [][3]int{ {0x11238, 0x11239, prSTerm}, // Po [2] KHOJKI DANDA..KHOJKI DOUBLE DANDA {0x1123B, 0x1123C, prSTerm}, // Po [2] KHOJKI SECTION MARK..KHOJKI DOUBLE SECTION MARK {0x1123E, 0x1123E, prExtend}, // Mn KHOJKI SIGN SUKUN + {0x1123F, 0x11240, prOLetter}, // Lo [2] KHOJKI LETTER QA..KHOJKI LETTER SHORT I + {0x11241, 0x11241, prExtend}, // Mn KHOJKI VOWEL SIGN VOCALIC R {0x11280, 0x11286, prOLetter}, // Lo [7] MULTANI LETTER A..MULTANI LETTER GA {0x11288, 0x11288, prOLetter}, // Lo MULTANI LETTER GHA {0x1128A, 0x1128D, prOLetter}, // Lo [4] MULTANI LETTER CA..MULTANI LETTER JJA @@ -2603,13 +2607,29 @@ var sentenceBreakCodePoints = [][3]int{ {0x11EF3, 0x11EF4, prExtend}, // Mn [2] MAKASAR VOWEL SIGN I..MAKASAR VOWEL SIGN U {0x11EF5, 0x11EF6, prExtend}, // Mc [2] MAKASAR VOWEL SIGN E..MAKASAR VOWEL SIGN O {0x11EF7, 0x11EF8, prSTerm}, // Po [2] MAKASAR PASSIMBANG..MAKASAR END OF SECTION + {0x11F00, 0x11F01, prExtend}, // Mn [2] KAWI SIGN CANDRABINDU..KAWI SIGN ANUSVARA + {0x11F02, 0x11F02, prOLetter}, // Lo KAWI SIGN REPHA + {0x11F03, 0x11F03, prExtend}, // Mc KAWI SIGN VISARGA + {0x11F04, 0x11F10, prOLetter}, // Lo [13] KAWI LETTER A..KAWI LETTER O + {0x11F12, 0x11F33, prOLetter}, // Lo [34] KAWI LETTER KA..KAWI LETTER JNYA + {0x11F34, 0x11F35, prExtend}, // Mc [2] KAWI VOWEL SIGN AA..KAWI VOWEL SIGN ALTERNATE AA + {0x11F36, 0x11F3A, prExtend}, // Mn [5] KAWI VOWEL SIGN I..KAWI VOWEL SIGN VOCALIC R + {0x11F3E, 0x11F3F, prExtend}, // Mc [2] KAWI VOWEL SIGN E..KAWI VOWEL SIGN AI + {0x11F40, 0x11F40, prExtend}, // Mn KAWI VOWEL SIGN EU + {0x11F41, 0x11F41, prExtend}, // Mc KAWI SIGN KILLER + {0x11F42, 0x11F42, prExtend}, // Mn KAWI CONJOINER + {0x11F43, 0x11F44, prSTerm}, // Po [2] KAWI DANDA..KAWI DOUBLE DANDA + {0x11F50, 0x11F59, prNumeric}, // Nd [10] KAWI DIGIT ZERO..KAWI DIGIT NINE {0x11FB0, 0x11FB0, prOLetter}, // Lo LISU LETTER YHA {0x12000, 0x12399, prOLetter}, // Lo [922] CUNEIFORM SIGN A..CUNEIFORM SIGN U U {0x12400, 0x1246E, prOLetter}, // Nl [111] CUNEIFORM NUMERIC SIGN TWO ASH..CUNEIFORM NUMERIC SIGN NINE U VARIANT FORM {0x12480, 0x12543, prOLetter}, // Lo [196] CUNEIFORM SIGN AB TIMES NUN TENU..CUNEIFORM SIGN ZU5 TIMES THREE DISH TENU {0x12F90, 0x12FF0, prOLetter}, // Lo [97] CYPRO-MINOAN SIGN CM001..CYPRO-MINOAN SIGN CM114 - {0x13000, 0x1342E, prOLetter}, // Lo [1071] EGYPTIAN HIEROGLYPH A001..EGYPTIAN HIEROGLYPH AA032 - {0x13430, 0x13438, prFormat}, // Cf [9] EGYPTIAN HIEROGLYPH VERTICAL JOINER..EGYPTIAN HIEROGLYPH END SEGMENT + {0x13000, 0x1342F, prOLetter}, // Lo [1072] EGYPTIAN HIEROGLYPH A001..EGYPTIAN HIEROGLYPH V011D + {0x13430, 0x1343F, prFormat}, // Cf [16] EGYPTIAN HIEROGLYPH VERTICAL JOINER..EGYPTIAN HIEROGLYPH END WALLED ENCLOSURE + {0x13440, 0x13440, prExtend}, // Mn EGYPTIAN HIEROGLYPH MIRROR HORIZONTALLY + {0x13441, 0x13446, prOLetter}, // Lo [6] EGYPTIAN HIEROGLYPH FULL BLANK..EGYPTIAN HIEROGLYPH WIDE LOST SIGN + {0x13447, 0x13455, prExtend}, // Mn [15] EGYPTIAN HIEROGLYPH MODIFIER DAMAGED AT TOP START..EGYPTIAN HIEROGLYPH MODIFIER DAMAGED {0x14400, 0x14646, prOLetter}, // Lo [583] ANATOLIAN HIEROGLYPH A001..ANATOLIAN HIEROGLYPH A530 {0x16800, 0x16A38, prOLetter}, // Lo [569] BAMUM LETTER PHASE-A NGKUE MFON..BAMUM LETTER PHASE-F VUEQ {0x16A40, 0x16A5E, prOLetter}, // Lo [31] MRO LETTER TA..MRO LETTER TEK @@ -2648,7 +2668,9 @@ var sentenceBreakCodePoints = [][3]int{ {0x1AFF5, 0x1AFFB, prOLetter}, // Lm [7] KATAKANA LETTER MINNAN TONE-7..KATAKANA LETTER MINNAN NASALIZED TONE-5 {0x1AFFD, 0x1AFFE, prOLetter}, // Lm [2] KATAKANA LETTER MINNAN NASALIZED TONE-7..KATAKANA LETTER MINNAN NASALIZED TONE-8 {0x1B000, 0x1B122, prOLetter}, // Lo [291] KATAKANA LETTER ARCHAIC E..KATAKANA LETTER ARCHAIC WU + {0x1B132, 0x1B132, prOLetter}, // Lo HIRAGANA LETTER SMALL KO {0x1B150, 0x1B152, prOLetter}, // Lo [3] HIRAGANA LETTER SMALL WI..HIRAGANA LETTER SMALL WO + {0x1B155, 0x1B155, prOLetter}, // Lo KATAKANA LETTER SMALL KO {0x1B164, 0x1B167, prOLetter}, // Lo [4] KATAKANA LETTER SMALL WI..KATAKANA LETTER SMALL N {0x1B170, 0x1B2FB, prOLetter}, // Lo [396] NUSHU CHARACTER-1B170..NUSHU CHARACTER-1B2FB {0x1BC00, 0x1BC6A, prOLetter}, // Lo [107] DUPLOYAN LETTER H..DUPLOYAN LETTER VOCALIC M @@ -2738,11 +2760,14 @@ var sentenceBreakCodePoints = [][3]int{ {0x1DF00, 0x1DF09, prLower}, // L& [10] LATIN SMALL LETTER FENG DIGRAPH WITH TRILL..LATIN SMALL LETTER T WITH HOOK AND RETROFLEX HOOK {0x1DF0A, 0x1DF0A, prOLetter}, // Lo LATIN LETTER RETROFLEX CLICK WITH RETROFLEX HOOK {0x1DF0B, 0x1DF1E, prLower}, // L& [20] LATIN SMALL LETTER ESH WITH DOUBLE BAR..LATIN SMALL LETTER S WITH CURL + {0x1DF25, 0x1DF2A, prLower}, // L& [6] LATIN SMALL LETTER D WITH MID-HEIGHT LEFT HOOK..LATIN SMALL LETTER T WITH MID-HEIGHT LEFT HOOK {0x1E000, 0x1E006, prExtend}, // Mn [7] COMBINING GLAGOLITIC LETTER AZU..COMBINING GLAGOLITIC LETTER ZHIVETE {0x1E008, 0x1E018, prExtend}, // Mn [17] COMBINING GLAGOLITIC LETTER ZEMLJA..COMBINING GLAGOLITIC LETTER HERU {0x1E01B, 0x1E021, prExtend}, // Mn [7] COMBINING GLAGOLITIC LETTER SHTA..COMBINING GLAGOLITIC LETTER YATI {0x1E023, 0x1E024, prExtend}, // Mn [2] COMBINING GLAGOLITIC LETTER YU..COMBINING GLAGOLITIC LETTER SMALL YUS {0x1E026, 0x1E02A, prExtend}, // Mn [5] COMBINING GLAGOLITIC LETTER YO..COMBINING GLAGOLITIC LETTER FITA + {0x1E030, 0x1E06D, prLower}, // Lm [62] MODIFIER LETTER CYRILLIC SMALL A..MODIFIER LETTER CYRILLIC SMALL STRAIGHT U WITH STROKE + {0x1E08F, 0x1E08F, prExtend}, // Mn COMBINING CYRILLIC SMALL LETTER BYELORUSSIAN-UKRAINIAN I {0x1E100, 0x1E12C, prOLetter}, // Lo [45] NYIAKENG PUACHUE HMONG LETTER MA..NYIAKENG PUACHUE HMONG LETTER W {0x1E130, 0x1E136, prExtend}, // Mn [7] NYIAKENG PUACHUE HMONG TONE-B..NYIAKENG PUACHUE HMONG TONE-D {0x1E137, 0x1E13D, prOLetter}, // Lm [7] NYIAKENG PUACHUE HMONG SIGN FOR PERSON..NYIAKENG PUACHUE HMONG SYLLABLE LENGTHENER @@ -2753,6 +2778,10 @@ var sentenceBreakCodePoints = [][3]int{ {0x1E2C0, 0x1E2EB, prOLetter}, // Lo [44] WANCHO LETTER AA..WANCHO LETTER YIH {0x1E2EC, 0x1E2EF, prExtend}, // Mn [4] WANCHO TONE TUP..WANCHO TONE KOINI {0x1E2F0, 0x1E2F9, prNumeric}, // Nd [10] WANCHO DIGIT ZERO..WANCHO DIGIT NINE + {0x1E4D0, 0x1E4EA, prOLetter}, // Lo [27] NAG MUNDARI LETTER O..NAG MUNDARI LETTER ELL + {0x1E4EB, 0x1E4EB, prOLetter}, // Lm NAG MUNDARI SIGN OJOD + {0x1E4EC, 0x1E4EF, prExtend}, // Mn [4] NAG MUNDARI SIGN MUHOR..NAG MUNDARI SIGN SUTUH + {0x1E4F0, 0x1E4F9, prNumeric}, // Nd [10] NAG MUNDARI DIGIT ZERO..NAG MUNDARI DIGIT NINE {0x1E7E0, 0x1E7E6, prOLetter}, // Lo [7] ETHIOPIC SYLLABLE HHYA..ETHIOPIC SYLLABLE HHYO {0x1E7E8, 0x1E7EB, prOLetter}, // Lo [4] ETHIOPIC SYLLABLE GURAGE HHWA..ETHIOPIC SYLLABLE HHWE {0x1E7ED, 0x1E7EE, prOLetter}, // Lo [2] ETHIOPIC SYLLABLE GURAGE MWI..ETHIOPIC SYLLABLE GURAGE MWEE @@ -2803,12 +2832,13 @@ var sentenceBreakCodePoints = [][3]int{ {0x1F676, 0x1F678, prClose}, // So [3] SANS-SERIF HEAVY DOUBLE TURNED COMMA QUOTATION MARK ORNAMENT..SANS-SERIF HEAVY LOW DOUBLE COMMA QUOTATION MARK ORNAMENT {0x1FBF0, 0x1FBF9, prNumeric}, // Nd [10] SEGMENTED DIGIT ZERO..SEGMENTED DIGIT NINE {0x20000, 0x2A6DF, prOLetter}, // Lo [42720] CJK UNIFIED IDEOGRAPH-20000..CJK UNIFIED IDEOGRAPH-2A6DF - {0x2A700, 0x2B738, prOLetter}, // Lo [4153] CJK UNIFIED IDEOGRAPH-2A700..CJK UNIFIED IDEOGRAPH-2B738 + {0x2A700, 0x2B739, prOLetter}, // Lo [4154] CJK UNIFIED IDEOGRAPH-2A700..CJK UNIFIED IDEOGRAPH-2B739 {0x2B740, 0x2B81D, prOLetter}, // Lo [222] CJK UNIFIED IDEOGRAPH-2B740..CJK UNIFIED IDEOGRAPH-2B81D {0x2B820, 0x2CEA1, prOLetter}, // Lo [5762] CJK UNIFIED IDEOGRAPH-2B820..CJK UNIFIED IDEOGRAPH-2CEA1 {0x2CEB0, 0x2EBE0, prOLetter}, // Lo [7473] CJK UNIFIED IDEOGRAPH-2CEB0..CJK UNIFIED IDEOGRAPH-2EBE0 {0x2F800, 0x2FA1D, prOLetter}, // Lo [542] CJK COMPATIBILITY IDEOGRAPH-2F800..CJK COMPATIBILITY IDEOGRAPH-2FA1D {0x30000, 0x3134A, prOLetter}, // Lo [4939] CJK UNIFIED IDEOGRAPH-30000..CJK UNIFIED IDEOGRAPH-3134A + {0x31350, 0x323AF, prOLetter}, // Lo [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF {0xE0001, 0xE0001, prFormat}, // Cf LANGUAGE TAG {0xE0020, 0xE007F, prExtend}, // Cf [96] TAG SPACE..CANCEL TAG {0xE0100, 0xE01EF, prExtend}, // Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 diff --git a/vendor/github.com/rivo/uniseg/sentencerules.go b/vendor/github.com/rivo/uniseg/sentencerules.go index 58c04794..0b29c7bd 100644 --- a/vendor/github.com/rivo/uniseg/sentencerules.go +++ b/vendor/github.com/rivo/uniseg/sentencerules.go @@ -18,104 +18,178 @@ const ( sbSB8aSp ) -// The sentence break parser's breaking instructions. -const ( - sbDontBreak = iota - sbBreak -) - -// The sentence break parser's state transitions. It's anologous to -// grTransitions, see comments there for details. Unicode version 14.0.0. -var sbTransitions = map[[2]int][3]int{ +// sbTransitions implements the sentence break parser's state transitions. It's +// anologous to [grTransitions], see comments there for details. +// +// Unicode version 15.0.0. +func sbTransitions(state, prop int) (newState int, sentenceBreak bool, rule int) { + switch uint64(state) | uint64(prop)<<32 { // SB3. - {sbAny, prCR}: {sbCR, sbDontBreak, 9990}, - {sbCR, prLF}: {sbParaSep, sbDontBreak, 30}, + case sbAny | prCR<<32: + return sbCR, false, 9990 + case sbCR | prLF<<32: + return sbParaSep, false, 30 // SB4. - {sbAny, prSep}: {sbParaSep, sbDontBreak, 9990}, - {sbAny, prLF}: {sbParaSep, sbDontBreak, 9990}, - {sbParaSep, prAny}: {sbAny, sbBreak, 40}, - {sbCR, prAny}: {sbAny, sbBreak, 40}, + case sbAny | prSep<<32: + return sbParaSep, false, 9990 + case sbAny | prLF<<32: + return sbParaSep, false, 9990 + case sbParaSep | prAny<<32: + return sbAny, true, 40 + case sbCR | prAny<<32: + return sbAny, true, 40 // SB6. - {sbAny, prATerm}: {sbATerm, sbDontBreak, 9990}, - {sbATerm, prNumeric}: {sbAny, sbDontBreak, 60}, - {sbSB7, prNumeric}: {sbAny, sbDontBreak, 60}, // Because ATerm also appears in SB7. + case sbAny | prATerm<<32: + return sbATerm, false, 9990 + case sbATerm | prNumeric<<32: + return sbAny, false, 60 + case sbSB7 | prNumeric<<32: + return sbAny, false, 60 // Because ATerm also appears in SB7. // SB7. - {sbAny, prUpper}: {sbUpper, sbDontBreak, 9990}, - {sbAny, prLower}: {sbLower, sbDontBreak, 9990}, - {sbUpper, prATerm}: {sbSB7, sbDontBreak, 70}, - {sbLower, prATerm}: {sbSB7, sbDontBreak, 70}, - {sbSB7, prUpper}: {sbUpper, sbDontBreak, 70}, + case sbAny | prUpper<<32: + return sbUpper, false, 9990 + case sbAny | prLower<<32: + return sbLower, false, 9990 + case sbUpper | prATerm<<32: + return sbSB7, false, 70 + case sbLower | prATerm<<32: + return sbSB7, false, 70 + case sbSB7 | prUpper<<32: + return sbUpper, false, 70 // SB8a. - {sbAny, prSTerm}: {sbSTerm, sbDontBreak, 9990}, - {sbATerm, prSContinue}: {sbAny, sbDontBreak, 81}, - {sbATerm, prATerm}: {sbATerm, sbDontBreak, 81}, - {sbATerm, prSTerm}: {sbSTerm, sbDontBreak, 81}, - {sbSB7, prSContinue}: {sbAny, sbDontBreak, 81}, - {sbSB7, prATerm}: {sbATerm, sbDontBreak, 81}, - {sbSB7, prSTerm}: {sbSTerm, sbDontBreak, 81}, - {sbSB8Close, prSContinue}: {sbAny, sbDontBreak, 81}, - {sbSB8Close, prATerm}: {sbATerm, sbDontBreak, 81}, - {sbSB8Close, prSTerm}: {sbSTerm, sbDontBreak, 81}, - {sbSB8Sp, prSContinue}: {sbAny, sbDontBreak, 81}, - {sbSB8Sp, prATerm}: {sbATerm, sbDontBreak, 81}, - {sbSB8Sp, prSTerm}: {sbSTerm, sbDontBreak, 81}, - {sbSTerm, prSContinue}: {sbAny, sbDontBreak, 81}, - {sbSTerm, prATerm}: {sbATerm, sbDontBreak, 81}, - {sbSTerm, prSTerm}: {sbSTerm, sbDontBreak, 81}, - {sbSB8aClose, prSContinue}: {sbAny, sbDontBreak, 81}, - {sbSB8aClose, prATerm}: {sbATerm, sbDontBreak, 81}, - {sbSB8aClose, prSTerm}: {sbSTerm, sbDontBreak, 81}, - {sbSB8aSp, prSContinue}: {sbAny, sbDontBreak, 81}, - {sbSB8aSp, prATerm}: {sbATerm, sbDontBreak, 81}, - {sbSB8aSp, prSTerm}: {sbSTerm, sbDontBreak, 81}, + case sbAny | prSTerm<<32: + return sbSTerm, false, 9990 + case sbATerm | prSContinue<<32: + return sbAny, false, 81 + case sbATerm | prATerm<<32: + return sbATerm, false, 81 + case sbATerm | prSTerm<<32: + return sbSTerm, false, 81 + case sbSB7 | prSContinue<<32: + return sbAny, false, 81 + case sbSB7 | prATerm<<32: + return sbATerm, false, 81 + case sbSB7 | prSTerm<<32: + return sbSTerm, false, 81 + case sbSB8Close | prSContinue<<32: + return sbAny, false, 81 + case sbSB8Close | prATerm<<32: + return sbATerm, false, 81 + case sbSB8Close | prSTerm<<32: + return sbSTerm, false, 81 + case sbSB8Sp | prSContinue<<32: + return sbAny, false, 81 + case sbSB8Sp | prATerm<<32: + return sbATerm, false, 81 + case sbSB8Sp | prSTerm<<32: + return sbSTerm, false, 81 + case sbSTerm | prSContinue<<32: + return sbAny, false, 81 + case sbSTerm | prATerm<<32: + return sbATerm, false, 81 + case sbSTerm | prSTerm<<32: + return sbSTerm, false, 81 + case sbSB8aClose | prSContinue<<32: + return sbAny, false, 81 + case sbSB8aClose | prATerm<<32: + return sbATerm, false, 81 + case sbSB8aClose | prSTerm<<32: + return sbSTerm, false, 81 + case sbSB8aSp | prSContinue<<32: + return sbAny, false, 81 + case sbSB8aSp | prATerm<<32: + return sbATerm, false, 81 + case sbSB8aSp | prSTerm<<32: + return sbSTerm, false, 81 // SB9. - {sbATerm, prClose}: {sbSB8Close, sbDontBreak, 90}, - {sbSB7, prClose}: {sbSB8Close, sbDontBreak, 90}, - {sbSB8Close, prClose}: {sbSB8Close, sbDontBreak, 90}, - {sbATerm, prSp}: {sbSB8Sp, sbDontBreak, 90}, - {sbSB7, prSp}: {sbSB8Sp, sbDontBreak, 90}, - {sbSB8Close, prSp}: {sbSB8Sp, sbDontBreak, 90}, - {sbSTerm, prClose}: {sbSB8aClose, sbDontBreak, 90}, - {sbSB8aClose, prClose}: {sbSB8aClose, sbDontBreak, 90}, - {sbSTerm, prSp}: {sbSB8aSp, sbDontBreak, 90}, - {sbSB8aClose, prSp}: {sbSB8aSp, sbDontBreak, 90}, - {sbATerm, prSep}: {sbParaSep, sbDontBreak, 90}, - {sbATerm, prCR}: {sbParaSep, sbDontBreak, 90}, - {sbATerm, prLF}: {sbParaSep, sbDontBreak, 90}, - {sbSB7, prSep}: {sbParaSep, sbDontBreak, 90}, - {sbSB7, prCR}: {sbParaSep, sbDontBreak, 90}, - {sbSB7, prLF}: {sbParaSep, sbDontBreak, 90}, - {sbSB8Close, prSep}: {sbParaSep, sbDontBreak, 90}, - {sbSB8Close, prCR}: {sbParaSep, sbDontBreak, 90}, - {sbSB8Close, prLF}: {sbParaSep, sbDontBreak, 90}, - {sbSTerm, prSep}: {sbParaSep, sbDontBreak, 90}, - {sbSTerm, prCR}: {sbParaSep, sbDontBreak, 90}, - {sbSTerm, prLF}: {sbParaSep, sbDontBreak, 90}, - {sbSB8aClose, prSep}: {sbParaSep, sbDontBreak, 90}, - {sbSB8aClose, prCR}: {sbParaSep, sbDontBreak, 90}, - {sbSB8aClose, prLF}: {sbParaSep, sbDontBreak, 90}, + case sbATerm | prClose<<32: + return sbSB8Close, false, 90 + case sbSB7 | prClose<<32: + return sbSB8Close, false, 90 + case sbSB8Close | prClose<<32: + return sbSB8Close, false, 90 + case sbATerm | prSp<<32: + return sbSB8Sp, false, 90 + case sbSB7 | prSp<<32: + return sbSB8Sp, false, 90 + case sbSB8Close | prSp<<32: + return sbSB8Sp, false, 90 + case sbSTerm | prClose<<32: + return sbSB8aClose, false, 90 + case sbSB8aClose | prClose<<32: + return sbSB8aClose, false, 90 + case sbSTerm | prSp<<32: + return sbSB8aSp, false, 90 + case sbSB8aClose | prSp<<32: + return sbSB8aSp, false, 90 + case sbATerm | prSep<<32: + return sbParaSep, false, 90 + case sbATerm | prCR<<32: + return sbParaSep, false, 90 + case sbATerm | prLF<<32: + return sbParaSep, false, 90 + case sbSB7 | prSep<<32: + return sbParaSep, false, 90 + case sbSB7 | prCR<<32: + return sbParaSep, false, 90 + case sbSB7 | prLF<<32: + return sbParaSep, false, 90 + case sbSB8Close | prSep<<32: + return sbParaSep, false, 90 + case sbSB8Close | prCR<<32: + return sbParaSep, false, 90 + case sbSB8Close | prLF<<32: + return sbParaSep, false, 90 + case sbSTerm | prSep<<32: + return sbParaSep, false, 90 + case sbSTerm | prCR<<32: + return sbParaSep, false, 90 + case sbSTerm | prLF<<32: + return sbParaSep, false, 90 + case sbSB8aClose | prSep<<32: + return sbParaSep, false, 90 + case sbSB8aClose | prCR<<32: + return sbParaSep, false, 90 + case sbSB8aClose | prLF<<32: + return sbParaSep, false, 90 // SB10. - {sbSB8Sp, prSp}: {sbSB8Sp, sbDontBreak, 100}, - {sbSB8aSp, prSp}: {sbSB8aSp, sbDontBreak, 100}, - {sbSB8Sp, prSep}: {sbParaSep, sbDontBreak, 100}, - {sbSB8Sp, prCR}: {sbParaSep, sbDontBreak, 100}, - {sbSB8Sp, prLF}: {sbParaSep, sbDontBreak, 100}, + case sbSB8Sp | prSp<<32: + return sbSB8Sp, false, 100 + case sbSB8aSp | prSp<<32: + return sbSB8aSp, false, 100 + case sbSB8Sp | prSep<<32: + return sbParaSep, false, 100 + case sbSB8Sp | prCR<<32: + return sbParaSep, false, 100 + case sbSB8Sp | prLF<<32: + return sbParaSep, false, 100 // SB11. - {sbATerm, prAny}: {sbAny, sbBreak, 110}, - {sbSB7, prAny}: {sbAny, sbBreak, 110}, - {sbSB8Close, prAny}: {sbAny, sbBreak, 110}, - {sbSB8Sp, prAny}: {sbAny, sbBreak, 110}, - {sbSTerm, prAny}: {sbAny, sbBreak, 110}, - {sbSB8aClose, prAny}: {sbAny, sbBreak, 110}, - {sbSB8aSp, prAny}: {sbAny, sbBreak, 110}, + case sbATerm | prAny<<32: + return sbAny, true, 110 + case sbSB7 | prAny<<32: + return sbAny, true, 110 + case sbSB8Close | prAny<<32: + return sbAny, true, 110 + case sbSB8Sp | prAny<<32: + return sbAny, true, 110 + case sbSTerm | prAny<<32: + return sbAny, true, 110 + case sbSB8aClose | prAny<<32: + return sbAny, true, 110 + case sbSB8aSp | prAny<<32: + return sbAny, true, 110 // We'll always break after ParaSep due to SB4. + + default: + return -1, false, -1 + } } // transitionSentenceBreakState determines the new state of the sentence break @@ -141,30 +215,27 @@ func transitionSentenceBreakState(state int, r rune, b []byte, str string) (newS // Find the applicable transition in the table. var rule int - transition, ok := sbTransitions[[2]int{state, nextProperty}] - if ok { - // We have a specific transition. We'll use it. - newState, sentenceBreak, rule = transition[0], transition[1] == sbBreak, transition[2] - } else { + newState, sentenceBreak, rule = sbTransitions(state, nextProperty) + if newState < 0 { // No specific transition found. Try the less specific ones. - transAnyProp, okAnyProp := sbTransitions[[2]int{state, prAny}] - transAnyState, okAnyState := sbTransitions[[2]int{sbAny, nextProperty}] - if okAnyProp && okAnyState { + anyPropState, anyPropProp, anyPropRule := sbTransitions(state, prAny) + anyStateState, anyStateProp, anyStateRule := sbTransitions(sbAny, nextProperty) + if anyPropState >= 0 && anyStateState >= 0 { // Both apply. We'll use a mix (see comments for grTransitions). - newState, sentenceBreak, rule = transAnyState[0], transAnyState[1] == sbBreak, transAnyState[2] - if transAnyProp[2] < transAnyState[2] { - sentenceBreak, rule = transAnyProp[1] == sbBreak, transAnyProp[2] + newState, sentenceBreak, rule = anyStateState, anyStateProp, anyStateRule + if anyPropRule < anyStateRule { + sentenceBreak, rule = anyPropProp, anyPropRule } - } else if okAnyProp { + } else if anyPropState >= 0 { // We only have a specific state. - newState, sentenceBreak, rule = transAnyProp[0], transAnyProp[1] == sbBreak, transAnyProp[2] + newState, sentenceBreak, rule = anyPropState, anyPropProp, anyPropRule // This branch will probably never be reached because okAnyState will // always be true given the current transition map. But we keep it here // for future modifications to the transition map where this may not be // true anymore. - } else if okAnyState { + } else if anyStateState >= 0 { // We only have a specific property. - newState, sentenceBreak, rule = transAnyState[0], transAnyState[1] == sbBreak, transAnyState[2] + newState, sentenceBreak, rule = anyStateState, anyStateProp, anyStateRule } else { // No known transition. SB999: Any Γ— Any. newState, sentenceBreak, rule = sbAny, false, 9990 diff --git a/vendor/github.com/rivo/uniseg/step.go b/vendor/github.com/rivo/uniseg/step.go index 6eca4b5d..2959b690 100644 --- a/vendor/github.com/rivo/uniseg/step.go +++ b/vendor/github.com/rivo/uniseg/step.go @@ -100,7 +100,7 @@ func Step(b []byte, state int) (cluster, rest []byte, boundaries int, newState i if len(b) <= length { // If we're already past the end, there is nothing else to parse. var prop int if state < 0 { - prop = property(graphemeCodePoints, r) + prop = propertyGraphemes(r) } else { prop = state >> shiftPropState } @@ -179,7 +179,7 @@ func StepString(str string, state int) (cluster, rest string, boundaries int, ne // Extract the first rune. r, length := utf8.DecodeRuneInString(str) if len(str) <= length { // If we're already past the end, there is nothing else to parse. - prop := property(graphemeCodePoints, r) + prop := propertyGraphemes(r) return str, "", LineMustBreak | (1 << shiftWord) | (1 << shiftSentence) | (runeWidth(r, prop) << ShiftWidth), grAny | (wbAny << shiftWordState) | (sbAny << shiftSentenceState) | (lbAny << shiftLineState) } diff --git a/vendor/github.com/rivo/uniseg/width.go b/vendor/github.com/rivo/uniseg/width.go index 12a57cc2..975a9f13 100644 --- a/vendor/github.com/rivo/uniseg/width.go +++ b/vendor/github.com/rivo/uniseg/width.go @@ -1,5 +1,10 @@ package uniseg +// EastAsianAmbiguousWidth specifies the monospace width for East Asian +// characters classified as Ambiguous. The default is 1 but some rare fonts +// render them with a width of 2. +var EastAsianAmbiguousWidth = 1 + // runeWidth returns the monospace width for the given rune. The provided // grapheme property is a value mapped by the [graphemeCodePoints] table. // @@ -33,9 +38,11 @@ func runeWidth(r rune, graphemeProperty int) int { return 4 } - switch property(eastAsianWidth, r) { + switch propertyEastAsianWidth(r) { case prW, prF: return 2 + case prA: + return EastAsianAmbiguousWidth } return 1 diff --git a/vendor/github.com/rivo/uniseg/wordproperties.go b/vendor/github.com/rivo/uniseg/wordproperties.go index 805cc536..277ca100 100644 --- a/vendor/github.com/rivo/uniseg/wordproperties.go +++ b/vendor/github.com/rivo/uniseg/wordproperties.go @@ -1,13 +1,13 @@ -package uniseg - // Code generated via go generate from gen_properties.go. DO NOT EDIT. +package uniseg + // workBreakCodePoints are taken from -// https://www.unicode.org/Public/14.0.0/ucd/auxiliary/WordBreakProperty.txt +// https://www.unicode.org/Public/15.0.0/ucd/auxiliary/WordBreakProperty.txt // and -// https://unicode.org/Public/14.0.0/ucd/emoji/emoji-data.txt +// https://unicode.org/Public/15.0.0/ucd/emoji/emoji-data.txt // ("Extended_Pictographic" only) -// on September 10, 2022. See https://www.unicode.org/license.html for the Unicode +// on September 5, 2023. See https://www.unicode.org/license.html for the Unicode // license agreement. var workBreakCodePoints = [][3]int{ {0x000A, 0x000A, prLF}, // Cc @@ -318,6 +318,7 @@ var workBreakCodePoints = [][3]int{ {0x0CE2, 0x0CE3, prExtend}, // Mn [2] KANNADA VOWEL SIGN VOCALIC L..KANNADA VOWEL SIGN VOCALIC LL {0x0CE6, 0x0CEF, prNumeric}, // Nd [10] KANNADA DIGIT ZERO..KANNADA DIGIT NINE {0x0CF1, 0x0CF2, prALetter}, // Lo [2] KANNADA SIGN JIHVAMULIYA..KANNADA SIGN UPADHMANIYA + {0x0CF3, 0x0CF3, prExtend}, // Mc KANNADA SIGN COMBINING ANUSVARA ABOVE RIGHT {0x0D00, 0x0D01, prExtend}, // Mn [2] MALAYALAM SIGN COMBINING ANUSVARA ABOVE..MALAYALAM SIGN CANDRABINDU {0x0D02, 0x0D03, prExtend}, // Mc [2] MALAYALAM SIGN ANUSVARA..MALAYALAM SIGN VISARGA {0x0D04, 0x0D0C, prALetter}, // Lo [9] MALAYALAM LETTER VEDIC ANUSVARA..MALAYALAM LETTER VOCALIC L @@ -357,7 +358,7 @@ var workBreakCodePoints = [][3]int{ {0x0E50, 0x0E59, prNumeric}, // Nd [10] THAI DIGIT ZERO..THAI DIGIT NINE {0x0EB1, 0x0EB1, prExtend}, // Mn LAO VOWEL SIGN MAI KAN {0x0EB4, 0x0EBC, prExtend}, // Mn [9] LAO VOWEL SIGN I..LAO SEMIVOWEL SIGN LO - {0x0EC8, 0x0ECD, prExtend}, // Mn [6] LAO TONE MAI EK..LAO NIGGAHITA + {0x0EC8, 0x0ECE, prExtend}, // Mn [7] LAO TONE MAI EK..LAO YAMAKKAN {0x0ED0, 0x0ED9, prNumeric}, // Nd [10] LAO DIGIT ZERO..LAO DIGIT NINE {0x0F00, 0x0F00, prALetter}, // Lo TIBETAN SYLLABLE OM {0x0F18, 0x0F19, prExtend}, // Mn [2] TIBETAN ASTROLOGICAL SIGN -KHYUD PA..TIBETAN ASTROLOGICAL SIGN SDONG TSHUGS @@ -1093,6 +1094,7 @@ var workBreakCodePoints = [][3]int{ {0x10E80, 0x10EA9, prALetter}, // Lo [42] YEZIDI LETTER ELIF..YEZIDI LETTER ET {0x10EAB, 0x10EAC, prExtend}, // Mn [2] YEZIDI COMBINING HAMZA MARK..YEZIDI COMBINING MADDA MARK {0x10EB0, 0x10EB1, prALetter}, // Lo [2] YEZIDI LETTER LAM WITH DOT ABOVE..YEZIDI LETTER YOT WITH CIRCUMFLEX ABOVE + {0x10EFD, 0x10EFF, prExtend}, // Mn [3] ARABIC SMALL LOW WORD SAKTA..ARABIC SMALL LOW WORD MADDA {0x10F00, 0x10F1C, prALetter}, // Lo [29] OLD SOGDIAN LETTER ALEPH..OLD SOGDIAN LETTER FINAL TAW WITH VERTICAL TAIL {0x10F27, 0x10F27, prALetter}, // Lo OLD SOGDIAN LIGATURE AYIN-DALETH {0x10F30, 0x10F45, prALetter}, // Lo [22] SOGDIAN LETTER ALEPH..SOGDIAN INDEPENDENT SHIN @@ -1157,6 +1159,8 @@ var workBreakCodePoints = [][3]int{ {0x11235, 0x11235, prExtend}, // Mc KHOJKI SIGN VIRAMA {0x11236, 0x11237, prExtend}, // Mn [2] KHOJKI SIGN NUKTA..KHOJKI SIGN SHADDA {0x1123E, 0x1123E, prExtend}, // Mn KHOJKI SIGN SUKUN + {0x1123F, 0x11240, prALetter}, // Lo [2] KHOJKI LETTER QA..KHOJKI LETTER SHORT I + {0x11241, 0x11241, prExtend}, // Mn KHOJKI VOWEL SIGN VOCALIC R {0x11280, 0x11286, prALetter}, // Lo [7] MULTANI LETTER A..MULTANI LETTER GA {0x11288, 0x11288, prALetter}, // Lo MULTANI LETTER GHA {0x1128A, 0x1128D, prALetter}, // Lo [4] MULTANI LETTER CA..MULTANI LETTER JJA @@ -1337,13 +1341,28 @@ var workBreakCodePoints = [][3]int{ {0x11EE0, 0x11EF2, prALetter}, // Lo [19] MAKASAR LETTER KA..MAKASAR ANGKA {0x11EF3, 0x11EF4, prExtend}, // Mn [2] MAKASAR VOWEL SIGN I..MAKASAR VOWEL SIGN U {0x11EF5, 0x11EF6, prExtend}, // Mc [2] MAKASAR VOWEL SIGN E..MAKASAR VOWEL SIGN O + {0x11F00, 0x11F01, prExtend}, // Mn [2] KAWI SIGN CANDRABINDU..KAWI SIGN ANUSVARA + {0x11F02, 0x11F02, prALetter}, // Lo KAWI SIGN REPHA + {0x11F03, 0x11F03, prExtend}, // Mc KAWI SIGN VISARGA + {0x11F04, 0x11F10, prALetter}, // Lo [13] KAWI LETTER A..KAWI LETTER O + {0x11F12, 0x11F33, prALetter}, // Lo [34] KAWI LETTER KA..KAWI LETTER JNYA + {0x11F34, 0x11F35, prExtend}, // Mc [2] KAWI VOWEL SIGN AA..KAWI VOWEL SIGN ALTERNATE AA + {0x11F36, 0x11F3A, prExtend}, // Mn [5] KAWI VOWEL SIGN I..KAWI VOWEL SIGN VOCALIC R + {0x11F3E, 0x11F3F, prExtend}, // Mc [2] KAWI VOWEL SIGN E..KAWI VOWEL SIGN AI + {0x11F40, 0x11F40, prExtend}, // Mn KAWI VOWEL SIGN EU + {0x11F41, 0x11F41, prExtend}, // Mc KAWI SIGN KILLER + {0x11F42, 0x11F42, prExtend}, // Mn KAWI CONJOINER + {0x11F50, 0x11F59, prNumeric}, // Nd [10] KAWI DIGIT ZERO..KAWI DIGIT NINE {0x11FB0, 0x11FB0, prALetter}, // Lo LISU LETTER YHA {0x12000, 0x12399, prALetter}, // Lo [922] CUNEIFORM SIGN A..CUNEIFORM SIGN U U {0x12400, 0x1246E, prALetter}, // Nl [111] CUNEIFORM NUMERIC SIGN TWO ASH..CUNEIFORM NUMERIC SIGN NINE U VARIANT FORM {0x12480, 0x12543, prALetter}, // Lo [196] CUNEIFORM SIGN AB TIMES NUN TENU..CUNEIFORM SIGN ZU5 TIMES THREE DISH TENU {0x12F90, 0x12FF0, prALetter}, // Lo [97] CYPRO-MINOAN SIGN CM001..CYPRO-MINOAN SIGN CM114 - {0x13000, 0x1342E, prALetter}, // Lo [1071] EGYPTIAN HIEROGLYPH A001..EGYPTIAN HIEROGLYPH AA032 - {0x13430, 0x13438, prFormat}, // Cf [9] EGYPTIAN HIEROGLYPH VERTICAL JOINER..EGYPTIAN HIEROGLYPH END SEGMENT + {0x13000, 0x1342F, prALetter}, // Lo [1072] EGYPTIAN HIEROGLYPH A001..EGYPTIAN HIEROGLYPH V011D + {0x13430, 0x1343F, prFormat}, // Cf [16] EGYPTIAN HIEROGLYPH VERTICAL JOINER..EGYPTIAN HIEROGLYPH END WALLED ENCLOSURE + {0x13440, 0x13440, prExtend}, // Mn EGYPTIAN HIEROGLYPH MIRROR HORIZONTALLY + {0x13441, 0x13446, prALetter}, // Lo [6] EGYPTIAN HIEROGLYPH FULL BLANK..EGYPTIAN HIEROGLYPH WIDE LOST SIGN + {0x13447, 0x13455, prExtend}, // Mn [15] EGYPTIAN HIEROGLYPH MODIFIER DAMAGED AT TOP START..EGYPTIAN HIEROGLYPH MODIFIER DAMAGED {0x14400, 0x14646, prALetter}, // Lo [583] ANATOLIAN HIEROGLYPH A001..ANATOLIAN HIEROGLYPH A530 {0x16800, 0x16A38, prALetter}, // Lo [569] BAMUM LETTER PHASE-A NGKUE MFON..BAMUM LETTER PHASE-F VUEQ {0x16A40, 0x16A5E, prALetter}, // Lo [31] MRO LETTER TA..MRO LETTER TEK @@ -1374,6 +1393,7 @@ var workBreakCodePoints = [][3]int{ {0x1AFFD, 0x1AFFE, prKatakana}, // Lm [2] KATAKANA LETTER MINNAN NASALIZED TONE-7..KATAKANA LETTER MINNAN NASALIZED TONE-8 {0x1B000, 0x1B000, prKatakana}, // Lo KATAKANA LETTER ARCHAIC E {0x1B120, 0x1B122, prKatakana}, // Lo [3] KATAKANA LETTER ARCHAIC YI..KATAKANA LETTER ARCHAIC WU + {0x1B155, 0x1B155, prKatakana}, // Lo KATAKANA LETTER SMALL KO {0x1B164, 0x1B167, prKatakana}, // Lo [4] KATAKANA LETTER SMALL WI..KATAKANA LETTER SMALL N {0x1BC00, 0x1BC6A, prALetter}, // Lo [107] DUPLOYAN LETTER H..DUPLOYAN LETTER VOCALIC M {0x1BC70, 0x1BC7C, prALetter}, // Lo [13] DUPLOYAN AFFIX LEFT HORIZONTAL SECANT..DUPLOYAN AFFIX ATTACHED TANGENT HOOK @@ -1431,11 +1451,14 @@ var workBreakCodePoints = [][3]int{ {0x1DF00, 0x1DF09, prALetter}, // L& [10] LATIN SMALL LETTER FENG DIGRAPH WITH TRILL..LATIN SMALL LETTER T WITH HOOK AND RETROFLEX HOOK {0x1DF0A, 0x1DF0A, prALetter}, // Lo LATIN LETTER RETROFLEX CLICK WITH RETROFLEX HOOK {0x1DF0B, 0x1DF1E, prALetter}, // L& [20] LATIN SMALL LETTER ESH WITH DOUBLE BAR..LATIN SMALL LETTER S WITH CURL + {0x1DF25, 0x1DF2A, prALetter}, // L& [6] LATIN SMALL LETTER D WITH MID-HEIGHT LEFT HOOK..LATIN SMALL LETTER T WITH MID-HEIGHT LEFT HOOK {0x1E000, 0x1E006, prExtend}, // Mn [7] COMBINING GLAGOLITIC LETTER AZU..COMBINING GLAGOLITIC LETTER ZHIVETE {0x1E008, 0x1E018, prExtend}, // Mn [17] COMBINING GLAGOLITIC LETTER ZEMLJA..COMBINING GLAGOLITIC LETTER HERU {0x1E01B, 0x1E021, prExtend}, // Mn [7] COMBINING GLAGOLITIC LETTER SHTA..COMBINING GLAGOLITIC LETTER YATI {0x1E023, 0x1E024, prExtend}, // Mn [2] COMBINING GLAGOLITIC LETTER YU..COMBINING GLAGOLITIC LETTER SMALL YUS {0x1E026, 0x1E02A, prExtend}, // Mn [5] COMBINING GLAGOLITIC LETTER YO..COMBINING GLAGOLITIC LETTER FITA + {0x1E030, 0x1E06D, prALetter}, // Lm [62] MODIFIER LETTER CYRILLIC SMALL A..MODIFIER LETTER CYRILLIC SMALL STRAIGHT U WITH STROKE + {0x1E08F, 0x1E08F, prExtend}, // Mn COMBINING CYRILLIC SMALL LETTER BYELORUSSIAN-UKRAINIAN I {0x1E100, 0x1E12C, prALetter}, // Lo [45] NYIAKENG PUACHUE HMONG LETTER MA..NYIAKENG PUACHUE HMONG LETTER W {0x1E130, 0x1E136, prExtend}, // Mn [7] NYIAKENG PUACHUE HMONG TONE-B..NYIAKENG PUACHUE HMONG TONE-D {0x1E137, 0x1E13D, prALetter}, // Lm [7] NYIAKENG PUACHUE HMONG SIGN FOR PERSON..NYIAKENG PUACHUE HMONG SYLLABLE LENGTHENER @@ -1446,6 +1469,10 @@ var workBreakCodePoints = [][3]int{ {0x1E2C0, 0x1E2EB, prALetter}, // Lo [44] WANCHO LETTER AA..WANCHO LETTER YIH {0x1E2EC, 0x1E2EF, prExtend}, // Mn [4] WANCHO TONE TUP..WANCHO TONE KOINI {0x1E2F0, 0x1E2F9, prNumeric}, // Nd [10] WANCHO DIGIT ZERO..WANCHO DIGIT NINE + {0x1E4D0, 0x1E4EA, prALetter}, // Lo [27] NAG MUNDARI LETTER O..NAG MUNDARI LETTER ELL + {0x1E4EB, 0x1E4EB, prALetter}, // Lm NAG MUNDARI SIGN OJOD + {0x1E4EC, 0x1E4EF, prExtend}, // Mn [4] NAG MUNDARI SIGN MUHOR..NAG MUNDARI SIGN SUTUH + {0x1E4F0, 0x1E4F9, prNumeric}, // Nd [10] NAG MUNDARI DIGIT ZERO..NAG MUNDARI DIGIT NINE {0x1E7E0, 0x1E7E6, prALetter}, // Lo [7] ETHIOPIC SYLLABLE HHYA..ETHIOPIC SYLLABLE HHYO {0x1E7E8, 0x1E7EB, prALetter}, // Lo [4] ETHIOPIC SYLLABLE GURAGE HHWA..ETHIOPIC SYLLABLE HHWE {0x1E7ED, 0x1E7EE, prALetter}, // Lo [2] ETHIOPIC SYLLABLE GURAGE MWI..ETHIOPIC SYLLABLE GURAGE MWEE @@ -1740,7 +1767,8 @@ var workBreakCodePoints = [][3]int{ {0x1F6D3, 0x1F6D4, prExtendedPictographic}, // E0.0 [2] (πŸ›“..πŸ›”) STUPA..PAGODA {0x1F6D5, 0x1F6D5, prExtendedPictographic}, // E12.0 [1] (πŸ›•) hindu temple {0x1F6D6, 0x1F6D7, prExtendedPictographic}, // E13.0 [2] (πŸ›–..πŸ›—) hut..elevator - {0x1F6D8, 0x1F6DC, prExtendedPictographic}, // E0.0 [5] (πŸ›˜..πŸ›œ) .. + {0x1F6D8, 0x1F6DB, prExtendedPictographic}, // E0.0 [4] (πŸ›˜..πŸ››) .. + {0x1F6DC, 0x1F6DC, prExtendedPictographic}, // E15.0 [1] (πŸ›œ) wireless {0x1F6DD, 0x1F6DF, prExtendedPictographic}, // E14.0 [3] (πŸ›..πŸ›Ÿ) playground slide..ring buoy {0x1F6E0, 0x1F6E5, prExtendedPictographic}, // E0.7 [6] (πŸ› οΈ..πŸ›₯️) hammer and wrench..motor boat {0x1F6E6, 0x1F6E8, prExtendedPictographic}, // E0.0 [3] (πŸ›¦..πŸ›¨) UP-POINTING MILITARY AIRPLANE..UP-POINTING SMALL AIRPLANE @@ -1757,7 +1785,7 @@ var workBreakCodePoints = [][3]int{ {0x1F6FA, 0x1F6FA, prExtendedPictographic}, // E12.0 [1] (πŸ›Ί) auto rickshaw {0x1F6FB, 0x1F6FC, prExtendedPictographic}, // E13.0 [2] (πŸ›»..πŸ›Ό) pickup truck..roller skate {0x1F6FD, 0x1F6FF, prExtendedPictographic}, // E0.0 [3] (πŸ›½..πŸ›Ώ) .. - {0x1F774, 0x1F77F, prExtendedPictographic}, // E0.0 [12] (🝴..🝿) .. + {0x1F774, 0x1F77F, prExtendedPictographic}, // E0.0 [12] (🝴..🝿) LOT OF FORTUNE..ORCUS {0x1F7D5, 0x1F7DF, prExtendedPictographic}, // E0.0 [11] (πŸŸ•..🟟) CIRCLED TRIANGLE.. {0x1F7E0, 0x1F7EB, prExtendedPictographic}, // E12.0 [12] (🟠..🟫) orange circle..brown square {0x1F7EC, 0x1F7EF, prExtendedPictographic}, // E0.0 [4] (🟬..🟯) .. @@ -1816,30 +1844,37 @@ var workBreakCodePoints = [][3]int{ {0x1FA00, 0x1FA6F, prExtendedPictographic}, // E0.0 [112] (πŸ¨€..🩯) NEUTRAL CHESS KING.. {0x1FA70, 0x1FA73, prExtendedPictographic}, // E12.0 [4] (🩰..🩳) ballet shoes..shorts {0x1FA74, 0x1FA74, prExtendedPictographic}, // E13.0 [1] (🩴) thong sandal - {0x1FA75, 0x1FA77, prExtendedPictographic}, // E0.0 [3] (🩡..🩷) .. + {0x1FA75, 0x1FA77, prExtendedPictographic}, // E15.0 [3] (🩡..🩷) light blue heart..pink heart {0x1FA78, 0x1FA7A, prExtendedPictographic}, // E12.0 [3] (🩸..🩺) drop of blood..stethoscope {0x1FA7B, 0x1FA7C, prExtendedPictographic}, // E14.0 [2] (🩻..🩼) x-ray..crutch {0x1FA7D, 0x1FA7F, prExtendedPictographic}, // E0.0 [3] (🩽..🩿) .. {0x1FA80, 0x1FA82, prExtendedPictographic}, // E12.0 [3] (πŸͺ€..πŸͺ‚) yo-yo..parachute {0x1FA83, 0x1FA86, prExtendedPictographic}, // E13.0 [4] (πŸͺƒ..πŸͺ†) boomerang..nesting dolls - {0x1FA87, 0x1FA8F, prExtendedPictographic}, // E0.0 [9] (πŸͺ‡..πŸͺ) .. + {0x1FA87, 0x1FA88, prExtendedPictographic}, // E15.0 [2] (πŸͺ‡..πŸͺˆ) maracas..flute + {0x1FA89, 0x1FA8F, prExtendedPictographic}, // E0.0 [7] (πŸͺ‰..πŸͺ) .. {0x1FA90, 0x1FA95, prExtendedPictographic}, // E12.0 [6] (πŸͺ..πŸͺ•) ringed planet..banjo {0x1FA96, 0x1FAA8, prExtendedPictographic}, // E13.0 [19] (πŸͺ–..πŸͺ¨) military helmet..rock {0x1FAA9, 0x1FAAC, prExtendedPictographic}, // E14.0 [4] (πŸͺ©..πŸͺ¬) mirror ball..hamsa - {0x1FAAD, 0x1FAAF, prExtendedPictographic}, // E0.0 [3] (πŸͺ­..πŸͺ―) .. + {0x1FAAD, 0x1FAAF, prExtendedPictographic}, // E15.0 [3] (πŸͺ­..πŸͺ―) folding hand fan..khanda {0x1FAB0, 0x1FAB6, prExtendedPictographic}, // E13.0 [7] (πŸͺ°..πŸͺΆ) fly..feather {0x1FAB7, 0x1FABA, prExtendedPictographic}, // E14.0 [4] (πŸͺ·..πŸͺΊ) lotus..nest with eggs - {0x1FABB, 0x1FABF, prExtendedPictographic}, // E0.0 [5] (πŸͺ»..πŸͺΏ) .. + {0x1FABB, 0x1FABD, prExtendedPictographic}, // E15.0 [3] (πŸͺ»..πŸͺ½) hyacinth..wing + {0x1FABE, 0x1FABE, prExtendedPictographic}, // E0.0 [1] (πŸͺΎ) + {0x1FABF, 0x1FABF, prExtendedPictographic}, // E15.0 [1] (πŸͺΏ) goose {0x1FAC0, 0x1FAC2, prExtendedPictographic}, // E13.0 [3] (πŸ«€..πŸ«‚) anatomical heart..people hugging {0x1FAC3, 0x1FAC5, prExtendedPictographic}, // E14.0 [3] (πŸ«ƒ..πŸ«…) pregnant man..person with crown - {0x1FAC6, 0x1FACF, prExtendedPictographic}, // E0.0 [10] (πŸ«†..🫏) .. + {0x1FAC6, 0x1FACD, prExtendedPictographic}, // E0.0 [8] (πŸ«†..🫍) .. + {0x1FACE, 0x1FACF, prExtendedPictographic}, // E15.0 [2] (🫎..🫏) moose..donkey {0x1FAD0, 0x1FAD6, prExtendedPictographic}, // E13.0 [7] (🫐..πŸ«–) blueberries..teapot {0x1FAD7, 0x1FAD9, prExtendedPictographic}, // E14.0 [3] (πŸ«—..πŸ«™) pouring liquid..jar - {0x1FADA, 0x1FADF, prExtendedPictographic}, // E0.0 [6] (🫚..🫟) .. + {0x1FADA, 0x1FADB, prExtendedPictographic}, // E15.0 [2] (🫚..πŸ«›) ginger root..pea pod + {0x1FADC, 0x1FADF, prExtendedPictographic}, // E0.0 [4] (🫜..🫟) .. {0x1FAE0, 0x1FAE7, prExtendedPictographic}, // E14.0 [8] (🫠..🫧) melting face..bubbles - {0x1FAE8, 0x1FAEF, prExtendedPictographic}, // E0.0 [8] (🫨..🫯) .. + {0x1FAE8, 0x1FAE8, prExtendedPictographic}, // E15.0 [1] (🫨) shaking face + {0x1FAE9, 0x1FAEF, prExtendedPictographic}, // E0.0 [7] (🫩..🫯) .. {0x1FAF0, 0x1FAF6, prExtendedPictographic}, // E14.0 [7] (🫰..🫢) hand with index finger and thumb crossed..heart hands - {0x1FAF7, 0x1FAFF, prExtendedPictographic}, // E0.0 [9] (🫷..🫿) .. + {0x1FAF7, 0x1FAF8, prExtendedPictographic}, // E15.0 [2] (🫷..🫸) leftwards pushing hand..rightwards pushing hand + {0x1FAF9, 0x1FAFF, prExtendedPictographic}, // E0.0 [7] (🫹..🫿) .. {0x1FBF0, 0x1FBF9, prNumeric}, // Nd [10] SEGMENTED DIGIT ZERO..SEGMENTED DIGIT NINE {0x1FC00, 0x1FFFD, prExtendedPictographic}, // E0.0[1022] (πŸ°€..🿽) .. {0xE0001, 0xE0001, prFormat}, // Cf LANGUAGE TAG diff --git a/vendor/github.com/rivo/uniseg/wordrules.go b/vendor/github.com/rivo/uniseg/wordrules.go index 325407e4..57a8c683 100644 --- a/vendor/github.com/rivo/uniseg/wordrules.go +++ b/vendor/github.com/rivo/uniseg/wordrules.go @@ -22,82 +22,121 @@ const ( wbZWJBit = 16 // This bit is set for any states followed by at least one zero-width joiner (see WB4 and WB3c). ) -// The word break parser's breaking instructions. -const ( - wbDontBreak = iota - wbBreak -) - -// The word break parser's state transitions. It's anologous to grTransitions, -// see comments there for details. Unicode version 14.0.0. -var wbTransitions = map[[2]int][3]int{ +// wbTransitions implements the word break parser's state transitions. It's +// anologous to [grTransitions], see comments there for details. +// +// Unicode version 15.0.0. +func wbTransitions(state, prop int) (newState int, wordBreak bool, rule int) { + switch uint64(state) | uint64(prop)<<32 { // WB3b. - {wbAny, prNewline}: {wbNewline, wbBreak, 32}, - {wbAny, prCR}: {wbCR, wbBreak, 32}, - {wbAny, prLF}: {wbLF, wbBreak, 32}, + case wbAny | prNewline<<32: + return wbNewline, true, 32 + case wbAny | prCR<<32: + return wbCR, true, 32 + case wbAny | prLF<<32: + return wbLF, true, 32 // WB3a. - {wbNewline, prAny}: {wbAny, wbBreak, 31}, - {wbCR, prAny}: {wbAny, wbBreak, 31}, - {wbLF, prAny}: {wbAny, wbBreak, 31}, + case wbNewline | prAny<<32: + return wbAny, true, 31 + case wbCR | prAny<<32: + return wbAny, true, 31 + case wbLF | prAny<<32: + return wbAny, true, 31 // WB3. - {wbCR, prLF}: {wbLF, wbDontBreak, 30}, + case wbCR | prLF<<32: + return wbLF, false, 30 // WB3d. - {wbAny, prWSegSpace}: {wbWSegSpace, wbBreak, 9990}, - {wbWSegSpace, prWSegSpace}: {wbWSegSpace, wbDontBreak, 34}, + case wbAny | prWSegSpace<<32: + return wbWSegSpace, true, 9990 + case wbWSegSpace | prWSegSpace<<32: + return wbWSegSpace, false, 34 // WB5. - {wbAny, prALetter}: {wbALetter, wbBreak, 9990}, - {wbAny, prHebrewLetter}: {wbHebrewLetter, wbBreak, 9990}, - {wbALetter, prALetter}: {wbALetter, wbDontBreak, 50}, - {wbALetter, prHebrewLetter}: {wbHebrewLetter, wbDontBreak, 50}, - {wbHebrewLetter, prALetter}: {wbALetter, wbDontBreak, 50}, - {wbHebrewLetter, prHebrewLetter}: {wbHebrewLetter, wbDontBreak, 50}, + case wbAny | prALetter<<32: + return wbALetter, true, 9990 + case wbAny | prHebrewLetter<<32: + return wbHebrewLetter, true, 9990 + case wbALetter | prALetter<<32: + return wbALetter, false, 50 + case wbALetter | prHebrewLetter<<32: + return wbHebrewLetter, false, 50 + case wbHebrewLetter | prALetter<<32: + return wbALetter, false, 50 + case wbHebrewLetter | prHebrewLetter<<32: + return wbHebrewLetter, false, 50 // WB7. Transitions to wbWB7 handled by transitionWordBreakState(). - {wbWB7, prALetter}: {wbALetter, wbDontBreak, 70}, - {wbWB7, prHebrewLetter}: {wbHebrewLetter, wbDontBreak, 70}, + case wbWB7 | prALetter<<32: + return wbALetter, false, 70 + case wbWB7 | prHebrewLetter<<32: + return wbHebrewLetter, false, 70 // WB7a. - {wbHebrewLetter, prSingleQuote}: {wbAny, wbDontBreak, 71}, + case wbHebrewLetter | prSingleQuote<<32: + return wbAny, false, 71 // WB7c. Transitions to wbWB7c handled by transitionWordBreakState(). - {wbWB7c, prHebrewLetter}: {wbHebrewLetter, wbDontBreak, 73}, + case wbWB7c | prHebrewLetter<<32: + return wbHebrewLetter, false, 73 // WB8. - {wbAny, prNumeric}: {wbNumeric, wbBreak, 9990}, - {wbNumeric, prNumeric}: {wbNumeric, wbDontBreak, 80}, + case wbAny | prNumeric<<32: + return wbNumeric, true, 9990 + case wbNumeric | prNumeric<<32: + return wbNumeric, false, 80 // WB9. - {wbALetter, prNumeric}: {wbNumeric, wbDontBreak, 90}, - {wbHebrewLetter, prNumeric}: {wbNumeric, wbDontBreak, 90}, + case wbALetter | prNumeric<<32: + return wbNumeric, false, 90 + case wbHebrewLetter | prNumeric<<32: + return wbNumeric, false, 90 // WB10. - {wbNumeric, prALetter}: {wbALetter, wbDontBreak, 100}, - {wbNumeric, prHebrewLetter}: {wbHebrewLetter, wbDontBreak, 100}, + case wbNumeric | prALetter<<32: + return wbALetter, false, 100 + case wbNumeric | prHebrewLetter<<32: + return wbHebrewLetter, false, 100 // WB11. Transitions to wbWB11 handled by transitionWordBreakState(). - {wbWB11, prNumeric}: {wbNumeric, wbDontBreak, 110}, + case wbWB11 | prNumeric<<32: + return wbNumeric, false, 110 // WB13. - {wbAny, prKatakana}: {wbKatakana, wbBreak, 9990}, - {wbKatakana, prKatakana}: {wbKatakana, wbDontBreak, 130}, + case wbAny | prKatakana<<32: + return wbKatakana, true, 9990 + case wbKatakana | prKatakana<<32: + return wbKatakana, false, 130 // WB13a. - {wbAny, prExtendNumLet}: {wbExtendNumLet, wbBreak, 9990}, - {wbALetter, prExtendNumLet}: {wbExtendNumLet, wbDontBreak, 131}, - {wbHebrewLetter, prExtendNumLet}: {wbExtendNumLet, wbDontBreak, 131}, - {wbNumeric, prExtendNumLet}: {wbExtendNumLet, wbDontBreak, 131}, - {wbKatakana, prExtendNumLet}: {wbExtendNumLet, wbDontBreak, 131}, - {wbExtendNumLet, prExtendNumLet}: {wbExtendNumLet, wbDontBreak, 131}, + case wbAny | prExtendNumLet<<32: + return wbExtendNumLet, true, 9990 + case wbALetter | prExtendNumLet<<32: + return wbExtendNumLet, false, 131 + case wbHebrewLetter | prExtendNumLet<<32: + return wbExtendNumLet, false, 131 + case wbNumeric | prExtendNumLet<<32: + return wbExtendNumLet, false, 131 + case wbKatakana | prExtendNumLet<<32: + return wbExtendNumLet, false, 131 + case wbExtendNumLet | prExtendNumLet<<32: + return wbExtendNumLet, false, 131 // WB13b. - {wbExtendNumLet, prALetter}: {wbALetter, wbDontBreak, 132}, - {wbExtendNumLet, prHebrewLetter}: {wbHebrewLetter, wbDontBreak, 132}, - {wbExtendNumLet, prNumeric}: {wbNumeric, wbDontBreak, 132}, - {wbExtendNumLet, prKatakana}: {prKatakana, wbDontBreak, 132}, + case wbExtendNumLet | prALetter<<32: + return wbALetter, false, 132 + case wbExtendNumLet | prHebrewLetter<<32: + return wbHebrewLetter, false, 132 + case wbExtendNumLet | prNumeric<<32: + return wbNumeric, false, 132 + case wbExtendNumLet | prKatakana<<32: + return wbKatakana, false, 132 + + default: + return -1, false, -1 + } } // transitionWordBreakState determines the new state of the word break parser @@ -141,30 +180,27 @@ func transitionWordBreakState(state int, r rune, b []byte, str string) (newState // Find the applicable transition in the table. var rule int - transition, ok := wbTransitions[[2]int{state, nextProperty}] - if ok { - // We have a specific transition. We'll use it. - newState, wordBreak, rule = transition[0], transition[1] == wbBreak, transition[2] - } else { + newState, wordBreak, rule = wbTransitions(state, nextProperty) + if newState < 0 { // No specific transition found. Try the less specific ones. - transAnyProp, okAnyProp := wbTransitions[[2]int{state, prAny}] - transAnyState, okAnyState := wbTransitions[[2]int{wbAny, nextProperty}] - if okAnyProp && okAnyState { + anyPropState, anyPropWordBreak, anyPropRule := wbTransitions(state, prAny) + anyStateState, anyStateWordBreak, anyStateRule := wbTransitions(wbAny, nextProperty) + if anyPropState >= 0 && anyStateState >= 0 { // Both apply. We'll use a mix (see comments for grTransitions). - newState, wordBreak, rule = transAnyState[0], transAnyState[1] == wbBreak, transAnyState[2] - if transAnyProp[2] < transAnyState[2] { - wordBreak, rule = transAnyProp[1] == wbBreak, transAnyProp[2] + newState, wordBreak, rule = anyStateState, anyStateWordBreak, anyStateRule + if anyPropRule < anyStateRule { + wordBreak, rule = anyPropWordBreak, anyPropRule } - } else if okAnyProp { + } else if anyPropState >= 0 { // We only have a specific state. - newState, wordBreak, rule = transAnyProp[0], transAnyProp[1] == wbBreak, transAnyProp[2] + newState, wordBreak, rule = anyPropState, anyPropWordBreak, anyPropRule // This branch will probably never be reached because okAnyState will // always be true given the current transition map. But we keep it here // for future modifications to the transition map where this may not be // true anymore. - } else if okAnyState { + } else if anyStateState >= 0 { // We only have a specific property. - newState, wordBreak, rule = transAnyState[0], transAnyState[1] == wbBreak, transAnyState[2] + newState, wordBreak, rule = anyStateState, anyStateWordBreak, anyStateRule } else { // No known transition. WB999: Any Γ· Any. newState, wordBreak, rule = wbAny, true, 9990 diff --git a/vendor/modules.txt b/vendor/modules.txt index 7de237ea..ba9ea921 100644 --- a/vendor/modules.txt +++ b/vendor/modules.txt @@ -21,7 +21,7 @@ github.com/olekukonko/tablewriter github.com/pkg/errors # github.com/pmezard/go-difflib v1.0.0 ## explicit -# github.com/rivo/uniseg v0.4.4 +# github.com/rivo/uniseg v0.4.6 ## explicit; go 1.18 github.com/rivo/uniseg # github.com/spf13/cobra v1.8.0