From b97971aec38c06b4a6c87c314e55b739a24d055d Mon Sep 17 00:00:00 2001
From: Dirkjan Bussink <d.bussink@gmail.com>
Date: Mon, 30 Oct 2023 17:48:26 +0100
Subject: [PATCH] Additional vendored code cleanup

Signed-off-by: Dirkjan Bussink <d.bussink@gmail.com>
Signed-off-by: Vicent Marti <vmg@strn.cat>
---
 go/mysql/collations/vindex/collate/index.go   |   8 +-
 go/mysql/collations/vindex/collate/tables.go  |   8 -
 .../vindex/internal/colltab/collelem.go       | 215 +-------
 .../vindex/internal/colltab/contract.go       |  59 ---
 .../vindex/internal/colltab/iter.go           |  14 -
 .../vindex/internal/colltab/table.go          |   6 +-
 .../vindex/internal/colltab/trie.go           |  64 ---
 .../vindex/unicode/norm/composition.go        |  34 +-
 .../vindex/unicode/norm/forminfo.go           |  13 -
 .../collations/vindex/unicode/norm/input.go   |  92 +---
 .../collations/vindex/unicode/norm/iter.go    | 458 ------------------
 .../vindex/unicode/norm/normalize.go          |  14 +-
 12 files changed, 56 insertions(+), 929 deletions(-)
 delete mode 100644 go/mysql/collations/vindex/unicode/norm/iter.go

diff --git a/go/mysql/collations/vindex/collate/index.go b/go/mysql/collations/vindex/collate/index.go
index efb118a5873..19073623ce6 100644
--- a/go/mysql/collations/vindex/collate/index.go
+++ b/go/mysql/collations/vindex/collate/index.go
@@ -16,11 +16,9 @@ func getTable(t tableIndex) *colltab.Table {
 			Index:   mainLookup[:],
 			Values:  mainValues[:],
 		},
-		ExpandElem:     mainExpandElem[:],
-		ContractTries:  mainCTEntries[:],
-		ContractElem:   mainContractElem[:],
-		MaxContractLen: 18,
-		VariableTop:    varTop,
+		ExpandElem:    mainExpandElem[:],
+		ContractTries: mainCTEntries[:],
+		ContractElem:  mainContractElem[:],
 	}
 }
 
diff --git a/go/mysql/collations/vindex/collate/tables.go b/go/mysql/collations/vindex/collate/tables.go
index 01d2aff2603..f46ff79c5ee 100644
--- a/go/mysql/collations/vindex/collate/tables.go
+++ b/go/mysql/collations/vindex/collate/tables.go
@@ -1,13 +1,5 @@
 package collate
 
-// UnicodeVersion is the Unicode version from which the tables in this package are derived.
-const UnicodeVersion = "6.2.0"
-
-// CLDRVersion is the CLDR version from which the tables in this package are derived.
-const CLDRVersion = "23"
-
-const varTop = 0x30e
-
 // mainExpandElem: 46864 entries, 187456 bytes
 var mainExpandElem = [46864]uint32{
 	// Block 0, offset 0x0
diff --git a/go/mysql/collations/vindex/internal/colltab/collelem.go b/go/mysql/collations/vindex/internal/colltab/collelem.go
index 0c23c8a48e9..41323dfee4d 100644
--- a/go/mysql/collations/vindex/internal/colltab/collelem.go
+++ b/go/mysql/collations/vindex/internal/colltab/collelem.go
@@ -5,33 +5,12 @@
 package colltab
 
 import (
-	"fmt"
 	"unicode"
 )
 
-// Level identifies the collation comparison level.
-// The primary level corresponds to the basic sorting of text.
-// The secondary level corresponds to accents and related linguistic elements.
-// The tertiary level corresponds to casing and related concepts.
-// The quaternary level is derived from the other levels by the
-// various algorithms for handling variable elements.
-type Level int
-
-const (
-	Primary Level = iota
-	Secondary
-	Tertiary
-	Quaternary
-	Identity
-
-	NumLevels
-)
-
 const (
 	defaultSecondary = 0x20
-	defaultTertiary  = 0x2
 	maxTertiary      = 0x1F
-	MaxQuaternary    = 0x1FFFFF // 21 bits.
 )
 
 // Elem is a representation of a collation element. This API provides ways to encode
@@ -42,12 +21,8 @@ type Elem uint32
 
 const (
 	maxCE       Elem = 0xAFFFFFFF
-	PrivateUse       = minContract
-	minContract      = 0xC0000000
-	maxContract      = 0xDFFFFFFF
-	minExpand        = 0xE0000000
-	maxExpand        = 0xEFFFFFFF
-	minDecomp        = 0xF0000000
+	maxContract Elem = 0xDFFFFFFF
+	maxExpand   Elem = 0xEFFFFFFF
 )
 
 type ceType int
@@ -65,14 +40,11 @@ func (ce Elem) ctype() ceType {
 	}
 	if ce <= maxContract {
 		return ceContractionIndex
-	} else {
-		if ce <= maxExpand {
-			return ceExpansionIndex
-		}
-		return ceDecompose
 	}
-	panic("should not reach here")
-	return ceType(-1)
+	if ce <= maxExpand {
+		return ceExpansionIndex
+	}
+	return ceDecompose
 }
 
 // For normal collation elements, we assume that a collation element either has
@@ -100,99 +72,23 @@ func (ce Elem) ctype() ceType {
 //	11qqqqqq qqqqqqqq qqqqqqq0 00000000
 //	  - q* quaternary value
 const (
-	ceTypeMask              = 0xC0000000
-	ceTypeMaskExt           = 0xE0000000
-	ceIgnoreMask            = 0xF00FFFFF
-	ceType1                 = 0x40000000
-	ceType2                 = 0x00000000
-	ceType3or4              = 0x80000000
-	ceType4                 = 0xA0000000
-	ceTypeQ                 = 0xC0000000
-	Ignore                  = ceType4
-	firstNonPrimary         = 0x80000000
-	lastSpecialPrimary      = 0xA0000000
-	secondaryMask           = 0x80000000
-	hasTertiaryMask         = 0x40000000
-	primaryValueMask        = 0x3FFFFE00
-	maxPrimaryBits          = 21
-	compactPrimaryBits      = 16
-	maxSecondaryBits        = 12
-	maxTertiaryBits         = 8
-	maxCCCBits              = 8
-	maxSecondaryCompactBits = 8
-	maxSecondaryDiffBits    = 4
-	maxTertiaryCompactBits  = 5
-	primaryShift            = 9
-	compactSecondaryShift   = 5
-	minCompactSecondary     = defaultSecondary - 4
+	ceTypeMask            = 0xC0000000
+	ceTypeMaskExt         = 0xE0000000
+	ceType1               = 0x40000000
+	ceType3or4            = 0x80000000
+	ceType4               = 0xA0000000
+	firstNonPrimary       = 0x80000000
+	lastSpecialPrimary    = 0xA0000000
+	primaryValueMask      = 0x3FFFFE00
+	primaryShift          = 9
+	compactSecondaryShift = 5
+	minCompactSecondary   = defaultSecondary - 4
 )
 
 func makeImplicitCE(primary int) Elem {
 	return ceType1 | Elem(primary<<primaryShift) | defaultSecondary
 }
 
-// MakeElem returns an Elem for the given values.  It will return an error
-// if the given combination of values is invalid.
-func MakeElem(primary, secondary, tertiary int, ccc uint8) (Elem, error) {
-	if w := primary; w >= 1<<maxPrimaryBits || w < 0 {
-		return 0, fmt.Errorf("makeCE: primary weight out of bounds: %x >= %x", w, 1<<maxPrimaryBits)
-	}
-	if w := secondary; w >= 1<<maxSecondaryBits || w < 0 {
-		return 0, fmt.Errorf("makeCE: secondary weight out of bounds: %x >= %x", w, 1<<maxSecondaryBits)
-	}
-	if w := tertiary; w >= 1<<maxTertiaryBits || w < 0 {
-		return 0, fmt.Errorf("makeCE: tertiary weight out of bounds: %x >= %x", w, 1<<maxTertiaryBits)
-	}
-	ce := Elem(0)
-	if primary != 0 {
-		if ccc != 0 {
-			if primary >= 1<<compactPrimaryBits {
-				return 0, fmt.Errorf("makeCE: primary weight with non-zero CCC out of bounds: %x >= %x", primary, 1<<compactPrimaryBits)
-			}
-			if secondary != defaultSecondary {
-				return 0, fmt.Errorf("makeCE: cannot combine non-default secondary value (%x) with non-zero CCC (%x)", secondary, ccc)
-			}
-			ce = Elem(tertiary << (compactPrimaryBits + maxCCCBits))
-			ce |= Elem(ccc) << compactPrimaryBits
-			ce |= Elem(primary)
-			ce |= ceType3or4
-		} else if tertiary == defaultTertiary {
-			if secondary >= 1<<maxSecondaryCompactBits {
-				return 0, fmt.Errorf("makeCE: secondary weight with non-zero primary out of bounds: %x >= %x", secondary, 1<<maxSecondaryCompactBits)
-			}
-			ce = Elem(primary<<(maxSecondaryCompactBits+1) + secondary)
-			ce |= ceType1
-		} else {
-			d := secondary - defaultSecondary + maxSecondaryDiffBits
-			if d >= 1<<maxSecondaryDiffBits || d < 0 {
-				return 0, fmt.Errorf("makeCE: secondary weight diff out of bounds: %x < 0 || %x > %x", d, d, 1<<maxSecondaryDiffBits)
-			}
-			if tertiary >= 1<<maxTertiaryCompactBits {
-				return 0, fmt.Errorf("makeCE: tertiary weight with non-zero primary out of bounds: %x > %x", tertiary, 1<<maxTertiaryCompactBits)
-			}
-			ce = Elem(primary<<maxSecondaryDiffBits + d)
-			ce = ce<<maxTertiaryCompactBits + Elem(tertiary)
-		}
-	} else {
-		ce = Elem(secondary<<maxTertiaryBits + tertiary)
-		ce += Elem(ccc) << (maxSecondaryBits + maxTertiaryBits)
-		ce |= ceType4
-	}
-	return ce, nil
-}
-
-// MakeQuaternary returns an Elem with the given quaternary value.
-func MakeQuaternary(v int) Elem {
-	return ceTypeQ | Elem(v<<primaryShift)
-}
-
-// Mask sets weights for any level smaller than l to 0.
-// The resulting Elem can be used to test for equality with
-// other Elems to which the same mask has been applied.
-func (ce Elem) Mask(l Level) uint32 {
-	return 0
-}
-
 // CCC returns the canonical combining class associated with the underlying character,
 // if applicable, or 0 otherwise.
 func (ce Elem) CCC() uint8 {
@@ -216,41 +112,6 @@ func (ce Elem) Primary() int {
 	return int(ce&primaryValueMask) >> primaryShift
 }
 
-// Secondary returns the secondary collation weight for ce.
-func (ce Elem) Secondary() int {
-	switch ce & ceTypeMask {
-	case ceType1:
-		return int(uint8(ce))
-	case ceType2:
-		return minCompactSecondary + int((ce>>compactSecondaryShift)&0xF)
-	case ceType3or4:
-		if ce < ceType4 {
-			return defaultSecondary
-		}
-		return int(ce>>8) & 0xFFF
-	case ceTypeQ:
-		return 0
-	}
-	panic("should not reach here")
-}
-
-// Tertiary returns the tertiary collation weight for ce.
-func (ce Elem) Tertiary() uint8 {
-	if ce&hasTertiaryMask == 0 {
-		if ce&ceType3or4 == 0 {
-			return uint8(ce & 0x1F)
-		}
-		if ce&ceType4 == ceType4 {
-			return uint8(ce)
-		}
-		return uint8(ce>>24) & 0x1F // type 2
-	} else if ce&ceTypeMask == ceType1 {
-		return defaultTertiary
-	}
-	// ce is a quaternary value.
-	return 0
-}
-
 func (ce Elem) updateTertiary(t uint8) Elem {
 	if ce&ceTypeMask == ceType1 {
 		// convert to type 4
@@ -267,33 +128,6 @@ func (ce Elem) updateTertiary(t uint8) Elem {
 	return ce | Elem(t)
 }
 
-// Quaternary returns the quaternary value if explicitly specified,
-// 0 if ce == Ignore, or MaxQuaternary otherwise.
-// Quaternary values are used only for shifted variants.
-func (ce Elem) Quaternary() int {
-	if ce&ceTypeMask == ceTypeQ {
-		return int(ce&primaryValueMask) >> primaryShift
-	} else if ce&ceIgnoreMask == Ignore {
-		return 0
-	}
-	return MaxQuaternary
-}
-
-// Weight returns the collation weight for the given level.
-func (ce Elem) Weight(l Level) int {
-	switch l {
-	case Primary:
-		return ce.Primary()
-	case Secondary:
-		return ce.Secondary()
-	case Tertiary:
-		return int(ce.Tertiary())
-	case Quaternary:
-		return ce.Quaternary()
-	}
-	return 0 // return 0 (ignore) for undefined levels.
-}
-
 // For contractions, collation elements are of the form
 // 110bbbbb bbbbbbbb iiiiiiii iiiinnnn, where
 //   - n* is the size of the first node in the contraction trie.
@@ -316,10 +150,6 @@ func splitContractIndex(ce Elem) (index, n, offset int) {
 	return
 }
 
-// For expansions, Elems are of the form 11100000 00000000 bbbbbbbb bbbbbbbb,
-// where b* is the index into the expansion sequence table.
-const maxExpandIndexBits = 16
-
 func splitExpandIndex(ce Elem) (index int) {
 	return int(uint16(ce))
 }
@@ -340,18 +170,15 @@ func splitDecompose(ce Elem) (t1, t2 uint8) {
 const (
 	// These constants were taken from https://www.unicode.org/versions/Unicode6.0.0/ch12.pdf.
 	minUnified       rune = 0x4E00
-	maxUnified            = 0x9FFF
-	minCompatibility      = 0xF900
-	maxCompatibility      = 0xFAFF
-	minRare               = 0x3400
-	maxRare               = 0x4DBF
+	maxUnified       rune = 0x9FFF
+	minCompatibility rune = 0xF900
+	maxCompatibility rune = 0xFAFF
 )
+
 const (
 	commonUnifiedOffset = 0x10000
 	rareUnifiedOffset   = 0x20000 // largest rune in common is U+FAFF
 	otherOffset         = 0x50000 // largest rune in rare is U+2FA1D
-	illegalOffset       = otherOffset + int(unicode.MaxRune)
-	maxPrimary          = illegalOffset + 1
 )
 
 // implicitPrimary returns the primary weight for the a rune
diff --git a/go/mysql/collations/vindex/internal/colltab/contract.go b/go/mysql/collations/vindex/internal/colltab/contract.go
index 25649d4f55f..c677b65c04d 100644
--- a/go/mysql/collations/vindex/internal/colltab/contract.go
+++ b/go/mysql/collations/vindex/internal/colltab/contract.go
@@ -27,33 +27,16 @@ type ctScanner struct {
 	done   bool
 }
 
-type ctScannerString struct {
-	states ContractTrieSet
-	s      string
-	n      int
-	index  int
-	pindex int
-	done   bool
-}
-
 func (t ContractTrieSet) scanner(index, n int, b []byte) ctScanner {
 	return ctScanner{s: b, states: t[index:], n: n}
 }
 
-func (t ContractTrieSet) scannerString(index, n int, str string) ctScannerString {
-	return ctScannerString{s: str, states: t[index:], n: n}
-}
-
 // result returns the offset i and bytes consumed p so far.  If no suffix
 // matched, i and p will be 0.
 func (s *ctScanner) result() (i, p int) {
 	return s.index, s.pindex
 }
 
-func (s *ctScannerString) result() (i, p int) {
-	return s.index, s.pindex
-}
-
 const (
 	final   = 0
 	noIndex = 0xFF
@@ -101,45 +84,3 @@ func (s *ctScanner) scan(p int) int {
 	}
 	return pr
 }
-
-// scan is a verbatim copy of ctScanner.scan.
-func (s *ctScannerString) scan(p int) int {
-	pr := p // the p at the rune start
-	str := s.s
-	states, n := s.states, s.n
-	for i := 0; i < n && p < len(str); {
-		e := states[i]
-		c := str[p]
-		// TODO: a significant number of contractions are of a form that
-		// cannot match discontiguous UTF-8 in a normalized string. We could let
-		// a negative value of e.n mean that we can set s.done = true and avoid
-		// the need for additional matches.
-		if c >= e.L {
-			if e.L == c {
-				p++
-				if e.I != noIndex {
-					s.index = int(e.I)
-					s.pindex = p
-				}
-				if e.N != final {
-					i, states, n = 0, states[int(e.H)+n:], int(e.N)
-					if p >= len(str) || utf8.RuneStart(str[p]) {
-						s.states, s.n, pr = states, n, p
-					}
-				} else {
-					s.done = true
-					return p
-				}
-				continue
-			} else if e.N == final && c <= e.H {
-				p++
-				s.done = true
-				s.index = int(c-e.L) + int(e.I)
-				s.pindex = p
-				return p
-			}
-		}
-		i++
-	}
-	return pr
-}
diff --git a/go/mysql/collations/vindex/internal/colltab/iter.go b/go/mysql/collations/vindex/internal/colltab/iter.go
index 541d66e4778..18434065c75 100644
--- a/go/mysql/collations/vindex/internal/colltab/iter.go
+++ b/go/mysql/collations/vindex/internal/colltab/iter.go
@@ -130,20 +130,6 @@ func (i *Iter) Next() bool {
 	return done
 }
 
-// nextNoNorm is the same as next, but does not "normalize" the collation
-// elements.
-func (i *Iter) nextNoNorm() bool {
-	// TODO: remove this function. Using this instead of next does not seem
-	// to improve performance in any significant way. We retain this until
-	// later for evaluation purposes.
-	if i.done() {
-		return false
-	}
-	i.appendNext()
-	i.N = len(i.Elems)
-	return true
-}
-
 const maxCombiningCharacters = 30
 
 // doNorm reorders the collation elements in i.Elems.
diff --git a/go/mysql/collations/vindex/internal/colltab/table.go b/go/mysql/collations/vindex/internal/colltab/table.go
index f06f15e1e34..14c8541f8ee 100644
--- a/go/mysql/collations/vindex/internal/colltab/table.go
+++ b/go/mysql/collations/vindex/internal/colltab/table.go
@@ -18,10 +18,8 @@ type Table struct {
 	ExpandElem []uint32
 
 	// contraction info
-	ContractTries  ContractTrieSet
-	ContractElem   []uint32
-	MaxContractLen int
-	VariableTop    uint32
+	ContractTries ContractTrieSet
+	ContractElem  []uint32
 }
 
 // AppendNext appends the weights corresponding to the next rune or
diff --git a/go/mysql/collations/vindex/internal/colltab/trie.go b/go/mysql/collations/vindex/internal/colltab/trie.go
index a0eaa0d23be..f303134a9da 100644
--- a/go/mysql/collations/vindex/internal/colltab/trie.go
+++ b/go/mysql/collations/vindex/internal/colltab/trie.go
@@ -20,14 +20,11 @@ type Trie struct {
 }
 
 const (
-	t1 = 0x00 // 0000 0000
 	tx = 0x80 // 1000 0000
 	t2 = 0xC0 // 1100 0000
 	t3 = 0xE0 // 1110 0000
 	t4 = 0xF0 // 1111 0000
 	t5 = 0xF8 // 1111 1000
-	t6 = 0xFC // 1111 1100
-	te = 0xFE // 1111 1110
 )
 
 func (t *Trie) lookupValue(n uint16, b byte) Elem {
@@ -96,64 +93,3 @@ func (t *Trie) lookup(s []byte) (v Elem, sz int) {
 	// Illegal rune
 	return 0, 1
 }
-
-// The body of lookupString is a verbatim copy of that of lookup.
-func (t *Trie) lookupString(s string) (v Elem, sz int) {
-	c0 := s[0]
-	switch {
-	case c0 < tx:
-		return Elem(t.Values0[c0]), 1
-	case c0 < t2:
-		return 0, 1
-	case c0 < t3:
-		if len(s) < 2 {
-			return 0, 0
-		}
-		i := t.Index0[c0]
-		c1 := s[1]
-		if c1 < tx || t2 <= c1 {
-			return 0, 1
-		}
-		return t.lookupValue(i, c1), 2
-	case c0 < t4:
-		if len(s) < 3 {
-			return 0, 0
-		}
-		i := t.Index0[c0]
-		c1 := s[1]
-		if c1 < tx || t2 <= c1 {
-			return 0, 1
-		}
-		o := int(i)<<6 + int(c1)
-		i = t.Index[o]
-		c2 := s[2]
-		if c2 < tx || t2 <= c2 {
-			return 0, 2
-		}
-		return t.lookupValue(i, c2), 3
-	case c0 < t5:
-		if len(s) < 4 {
-			return 0, 0
-		}
-		i := t.Index0[c0]
-		c1 := s[1]
-		if c1 < tx || t2 <= c1 {
-			return 0, 1
-		}
-		o := int(i)<<6 + int(c1)
-		i = t.Index[o]
-		c2 := s[2]
-		if c2 < tx || t2 <= c2 {
-			return 0, 2
-		}
-		o = int(i)<<6 + int(c2)
-		i = t.Index[o]
-		c3 := s[3]
-		if c3 < tx || t2 <= c3 {
-			return 0, 3
-		}
-		return t.lookupValue(i, c3), 4
-	}
-	// Illegal rune
-	return 0, 1
-}
diff --git a/go/mysql/collations/vindex/unicode/norm/composition.go b/go/mysql/collations/vindex/unicode/norm/composition.go
index e2087bce527..c186f64fbf8 100644
--- a/go/mysql/collations/vindex/unicode/norm/composition.go
+++ b/go/mysql/collations/vindex/unicode/norm/composition.go
@@ -4,7 +4,11 @@
 
 package norm
 
-import "unicode/utf8"
+import (
+	"unicode/utf8"
+
+	"vitess.io/vitess/go/hack"
+)
 
 const (
 	maxNonStarters = 30
@@ -17,6 +21,10 @@ const (
 	maxByteBufferSize = utf8.UTFMax * maxBufferSize // 128
 )
 
+// MaxSegmentSize is the maximum size of a byte buffer needed to consider any
+// sequence of starter and non-starter runes for the purpose of normalization.
+const MaxSegmentSize = maxByteBufferSize
+
 // ssState is used for reporting the segment state after inserting a rune.
 // It is returned by streamSafe.next.
 type ssState int
@@ -111,20 +119,6 @@ type reorderBuffer struct {
 	flushF func(*reorderBuffer) bool
 }
 
-func (rb *reorderBuffer) init(f Form, src []byte) {
-	rb.f = *formTable[f]
-	rb.src.setBytes(src)
-	rb.nsrc = len(src)
-	rb.ss = 0
-}
-
-func (rb *reorderBuffer) initString(f Form, src string) {
-	rb.f = *formTable[f]
-	rb.src.setString(src)
-	rb.nsrc = len(src)
-	rb.ss = 0
-}
-
 func (rb *reorderBuffer) setFlusher(out []byte, f func(*reorderBuffer) bool) {
 	rb.out = out
 	rb.flushF = f
@@ -247,7 +241,7 @@ func (rb *reorderBuffer) insertUnsafe(src input, i int, info Properties) {
 // in dcomp. dcomp must be a sequence of decomposed UTF-8-encoded runes.
 // It flushes the buffer on each new segment start.
 func (rb *reorderBuffer) insertDecomposed(dcomp []byte) insertErr {
-	rb.tmpBytes.setBytes(dcomp)
+	rb.tmpBytes = dcomp
 	// As the streamSafe accounting already handles the counting for modifiers,
 	// we don't have to call next. However, we do need to keep the accounting
 	// intact when flushing the buffer.
@@ -271,7 +265,7 @@ func (rb *reorderBuffer) insertSingle(src input, i int, info Properties) {
 
 // insertCGJ inserts a Combining Grapheme Joiner (0x034f) into rb.
 func (rb *reorderBuffer) insertCGJ() {
-	rb.insertSingle(input{str: GraphemeJoiner}, 0, Properties{size: uint8(len(GraphemeJoiner))})
+	rb.insertSingle(hack.StringBytes(GraphemeJoiner), 0, Properties{size: uint8(len(GraphemeJoiner))})
 }
 
 // appendRune inserts a rune at the end of the buffer. It is used for Hangul.
@@ -383,12 +377,6 @@ func isJamoVT(b []byte) bool {
 	return b[0] == jamoLBase0 && (b[1]&0xFC) == jamoLBase1
 }
 
-func isHangulWithoutJamoT(b []byte) bool {
-	c, _ := utf8.DecodeRune(b)
-	c -= hangulBase
-	return c < jamoLVTCount && c%jamoTCount == 0
-}
-
 // decomposeHangul writes the decomposed Hangul to buf and returns the number
 // of bytes written.  len(buf) should be at least 9.
 func decomposeHangul(buf []byte, r rune) int {
diff --git a/go/mysql/collations/vindex/unicode/norm/forminfo.go b/go/mysql/collations/vindex/unicode/norm/forminfo.go
index 487335d14d3..75f2674486a 100644
--- a/go/mysql/collations/vindex/unicode/norm/forminfo.go
+++ b/go/mysql/collations/vindex/unicode/norm/forminfo.go
@@ -55,7 +55,6 @@ type formInfo struct {
 	form                     Form
 	composing, compatibility bool // form type
 	info                     lookupFunc
-	nextMain                 iterFunc
 }
 
 var formTable = []*formInfo{{
@@ -63,25 +62,21 @@ var formTable = []*formInfo{{
 	composing:     true,
 	compatibility: false,
 	info:          lookupInfoNFC,
-	nextMain:      nextComposed,
 }, {
 	form:          NFD,
 	composing:     false,
 	compatibility: false,
 	info:          lookupInfoNFC,
-	nextMain:      nextDecomposed,
 }, {
 	form:          NFKC,
 	composing:     true,
 	compatibility: true,
 	info:          lookupInfoNFKC,
-	nextMain:      nextComposed,
 }, {
 	form:          NFKD,
 	composing:     false,
 	compatibility: true,
 	info:          lookupInfoNFKC,
-	nextMain:      nextDecomposed,
 }}
 
 // We do not distinguish between boundaries for NFC, NFD, etc. to avoid
@@ -229,14 +224,6 @@ func (f Form) Properties(s []byte) Properties {
 	return compInfo(nfkcData.lookup(s))
 }
 
-// PropertiesString returns properties for the first rune in s.
-func (f Form) PropertiesString(s string) Properties {
-	if f == NFC || f == NFD {
-		return compInfo(nfcData.lookupString(s))
-	}
-	return compInfo(nfkcData.lookupString(s))
-}
-
 // compInfo converts the information contained in v and sz
 // to a Properties.  See the comment at the top of the file
 // for more information on the format.
diff --git a/go/mysql/collations/vindex/unicode/norm/input.go b/go/mysql/collations/vindex/unicode/norm/input.go
index 479e35bc258..4dbbcd15c45 100644
--- a/go/mysql/collations/vindex/unicode/norm/input.go
+++ b/go/mysql/collations/vindex/unicode/norm/input.go
@@ -6,102 +6,46 @@ package norm
 
 import "unicode/utf8"
 
-type input struct {
-	str   string
-	bytes []byte
-}
+type input []byte
 
 func inputBytes(str []byte) input {
-	return input{bytes: str}
-}
-
-func inputString(str string) input {
-	return input{str: str}
-}
-
-func (in *input) setBytes(str []byte) {
-	in.str = ""
-	in.bytes = str
-}
-
-func (in *input) setString(str string) {
-	in.str = str
-	in.bytes = nil
-}
-
-func (in *input) _byte(p int) byte {
-	if in.bytes == nil {
-		return in.str[p]
-	}
-	return in.bytes[p]
+	return str
 }
 
-func (in *input) skipASCII(p, max int) int {
-	if in.bytes == nil {
-		for ; p < max && in.str[p] < utf8.RuneSelf; p++ {
-		}
-	} else {
-		for ; p < max && in.bytes[p] < utf8.RuneSelf; p++ {
-		}
+func (in input) skipASCII(p, max int) int {
+	for ; p < max && in[p] < utf8.RuneSelf; p++ {
 	}
 	return p
 }
 
-func (in *input) skipContinuationBytes(p int) int {
-	if in.bytes == nil {
-		for ; p < len(in.str) && !utf8.RuneStart(in.str[p]); p++ {
-		}
-	} else {
-		for ; p < len(in.bytes) && !utf8.RuneStart(in.bytes[p]); p++ {
-		}
+func (in input) skipContinuationBytes(p int) int {
+	for ; p < len(in) && !utf8.RuneStart(in[p]); p++ {
 	}
 	return p
 }
 
-func (in *input) appendSlice(buf []byte, b, e int) []byte {
-	if in.bytes != nil {
-		return append(buf, in.bytes[b:e]...)
-	}
-	for i := b; i < e; i++ {
-		buf = append(buf, in.str[i])
-	}
-	return buf
+func (in input) appendSlice(buf []byte, b, e int) []byte {
+	return append(buf, in[b:e]...)
 }
 
-func (in *input) copySlice(buf []byte, b, e int) int {
-	if in.bytes == nil {
-		return copy(buf, in.str[b:e])
-	}
-	return copy(buf, in.bytes[b:e])
+func (in input) copySlice(buf []byte, b, e int) int {
+	return copy(buf, in[b:e])
 }
 
-func (in *input) charinfoNFC(p int) (uint16, int) {
-	if in.bytes == nil {
-		return nfcData.lookupString(in.str[p:])
-	}
-	return nfcData.lookup(in.bytes[p:])
+func (in input) charinfoNFC(p int) (uint16, int) {
+	return nfcData.lookup(in[p:])
 }
 
-func (in *input) charinfoNFKC(p int) (uint16, int) {
-	if in.bytes == nil {
-		return nfkcData.lookupString(in.str[p:])
-	}
-	return nfkcData.lookup(in.bytes[p:])
+func (in input) charinfoNFKC(p int) (uint16, int) {
+	return nfkcData.lookup(in[p:])
 }
 
-func (in *input) hangul(p int) (r rune) {
+func (in input) hangul(p int) (r rune) {
 	var size int
-	if in.bytes == nil {
-		if !isHangulString(in.str[p:]) {
-			return 0
-		}
-		r, size = utf8.DecodeRuneInString(in.str[p:])
-	} else {
-		if !isHangul(in.bytes[p:]) {
-			return 0
-		}
-		r, size = utf8.DecodeRune(in.bytes[p:])
+	if !isHangul(in[p:]) {
+		return 0
 	}
+	r, size = utf8.DecodeRune(in[p:])
 	if size != hangulUTF8Size {
 		return 0
 	}
diff --git a/go/mysql/collations/vindex/unicode/norm/iter.go b/go/mysql/collations/vindex/unicode/norm/iter.go
deleted file mode 100644
index 417c6b26894..00000000000
--- a/go/mysql/collations/vindex/unicode/norm/iter.go
+++ /dev/null
@@ -1,458 +0,0 @@
-// Copyright 2011 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-package norm
-
-import (
-	"fmt"
-	"unicode/utf8"
-)
-
-// MaxSegmentSize is the maximum size of a byte buffer needed to consider any
-// sequence of starter and non-starter runes for the purpose of normalization.
-const MaxSegmentSize = maxByteBufferSize
-
-// An Iter iterates over a string or byte slice, while normalizing it
-// to a given Form.
-type Iter struct {
-	rb     reorderBuffer
-	buf    [maxByteBufferSize]byte
-	info   Properties // first character saved from previous iteration
-	next   iterFunc   // implementation of next depends on form
-	asciiF iterFunc
-
-	p        int    // current position in input source
-	multiSeg []byte // remainder of multi-segment decomposition
-}
-
-type iterFunc func(*Iter) []byte
-
-// Init initializes i to iterate over src after normalizing it to Form f.
-func (i *Iter) Init(f Form, src []byte) {
-	i.p = 0
-	if len(src) == 0 {
-		i.setDone()
-		i.rb.nsrc = 0
-		return
-	}
-	i.multiSeg = nil
-	i.rb.init(f, src)
-	i.next = i.rb.f.nextMain
-	i.asciiF = nextASCIIBytes
-	i.info = i.rb.f.info(i.rb.src, i.p)
-	i.rb.ss.first(i.info)
-}
-
-// InitString initializes i to iterate over src after normalizing it to Form f.
-func (i *Iter) InitString(f Form, src string) {
-	i.p = 0
-	if len(src) == 0 {
-		i.setDone()
-		i.rb.nsrc = 0
-		return
-	}
-	i.multiSeg = nil
-	i.rb.initString(f, src)
-	i.next = i.rb.f.nextMain
-	i.asciiF = nextASCIIString
-	i.info = i.rb.f.info(i.rb.src, i.p)
-	i.rb.ss.first(i.info)
-}
-
-// Seek sets the segment to be returned by the next call to Next to start
-// at position p.  It is the responsibility of the caller to set p to the
-// start of a segment.
-func (i *Iter) Seek(offset int64, whence int) (int64, error) {
-	var abs int64
-	switch whence {
-	case 0:
-		abs = offset
-	case 1:
-		abs = int64(i.p) + offset
-	case 2:
-		abs = int64(i.rb.nsrc) + offset
-	default:
-		return 0, fmt.Errorf("norm: invalid whence")
-	}
-	if abs < 0 {
-		return 0, fmt.Errorf("norm: negative position")
-	}
-	if int(abs) >= i.rb.nsrc {
-		i.setDone()
-		return int64(i.p), nil
-	}
-	i.p = int(abs)
-	i.multiSeg = nil
-	i.next = i.rb.f.nextMain
-	i.info = i.rb.f.info(i.rb.src, i.p)
-	i.rb.ss.first(i.info)
-	return abs, nil
-}
-
-// returnSlice returns a slice of the underlying input type as a byte slice.
-// If the underlying is of type []byte, it will simply return a slice.
-// If the underlying is of type string, it will copy the slice to the buffer
-// and return that.
-func (i *Iter) returnSlice(a, b int) []byte {
-	if i.rb.src.bytes == nil {
-		return i.buf[:copy(i.buf[:], i.rb.src.str[a:b])]
-	}
-	return i.rb.src.bytes[a:b]
-}
-
-// Pos returns the byte position at which the next call to Next will commence processing.
-func (i *Iter) Pos() int {
-	return i.p
-}
-
-func (i *Iter) setDone() {
-	i.next = nextDone
-	i.p = i.rb.nsrc
-}
-
-// Done returns true if there is no more input to process.
-func (i *Iter) Done() bool {
-	return i.p >= i.rb.nsrc
-}
-
-// Next returns f(i.input[i.Pos():n]), where n is a boundary of i.input.
-// For any input a and b for which f(a) == f(b), subsequent calls
-// to Next will return the same segments.
-// Modifying runes are grouped together with the preceding starter, if such a starter exists.
-// Although not guaranteed, n will typically be the smallest possible n.
-func (i *Iter) Next() []byte {
-	return i.next(i)
-}
-
-func nextASCIIBytes(i *Iter) []byte {
-	p := i.p + 1
-	if p >= i.rb.nsrc {
-		p0 := i.p
-		i.setDone()
-		return i.rb.src.bytes[p0:p]
-	}
-	if i.rb.src.bytes[p] < utf8.RuneSelf {
-		p0 := i.p
-		i.p = p
-		return i.rb.src.bytes[p0:p]
-	}
-	i.info = i.rb.f.info(i.rb.src, i.p)
-	i.next = i.rb.f.nextMain
-	return i.next(i)
-}
-
-func nextASCIIString(i *Iter) []byte {
-	p := i.p + 1
-	if p >= i.rb.nsrc {
-		i.buf[0] = i.rb.src.str[i.p]
-		i.setDone()
-		return i.buf[:1]
-	}
-	if i.rb.src.str[p] < utf8.RuneSelf {
-		i.buf[0] = i.rb.src.str[i.p]
-		i.p = p
-		return i.buf[:1]
-	}
-	i.info = i.rb.f.info(i.rb.src, i.p)
-	i.next = i.rb.f.nextMain
-	return i.next(i)
-}
-
-func nextHangul(i *Iter) []byte {
-	p := i.p
-	next := p + hangulUTF8Size
-	if next >= i.rb.nsrc {
-		i.setDone()
-	} else if i.rb.src.hangul(next) == 0 {
-		i.rb.ss.next(i.info)
-		i.info = i.rb.f.info(i.rb.src, i.p)
-		i.next = i.rb.f.nextMain
-		return i.next(i)
-	}
-	i.p = next
-	return i.buf[:decomposeHangul(i.buf[:], i.rb.src.hangul(p))]
-}
-
-func nextDone(i *Iter) []byte {
-	return nil
-}
-
-// nextMulti is used for iterating over multi-segment decompositions
-// for decomposing normal forms.
-func nextMulti(i *Iter) []byte {
-	j := 0
-	d := i.multiSeg
-	// skip first rune
-	for j = 1; j < len(d) && !utf8.RuneStart(d[j]); j++ {
-	}
-	for j < len(d) {
-		info := i.rb.f.info(input{bytes: d}, j)
-		if info.BoundaryBefore() {
-			i.multiSeg = d[j:]
-			return d[:j]
-		}
-		j += int(info.size)
-	}
-	// treat last segment as normal decomposition
-	i.next = i.rb.f.nextMain
-	return i.next(i)
-}
-
-// nextMultiNorm is used for iterating over multi-segment decompositions
-// for composing normal forms.
-func nextMultiNorm(i *Iter) []byte {
-	j := 0
-	d := i.multiSeg
-	for j < len(d) {
-		info := i.rb.f.info(input{bytes: d}, j)
-		if info.BoundaryBefore() {
-			i.rb.compose()
-			seg := i.buf[:i.rb.flushCopy(i.buf[:])]
-			i.rb.insertUnsafe(input{bytes: d}, j, info)
-			i.multiSeg = d[j+int(info.size):]
-			return seg
-		}
-		i.rb.insertUnsafe(input{bytes: d}, j, info)
-		j += int(info.size)
-	}
-	i.multiSeg = nil
-	i.next = nextComposed
-	return doNormComposed(i)
-}
-
-// nextDecomposed is the implementation of Next for forms NFD and NFKD.
-func nextDecomposed(i *Iter) (next []byte) {
-	outp := 0
-	inCopyStart, outCopyStart := i.p, 0
-	for {
-		if sz := int(i.info.size); sz <= 1 {
-			i.rb.ss = 0
-			p := i.p
-			i.p++ // ASCII or illegal byte.  Either way, advance by 1.
-			if i.p >= i.rb.nsrc {
-				i.setDone()
-				return i.returnSlice(p, i.p)
-			} else if i.rb.src._byte(i.p) < utf8.RuneSelf {
-				i.next = i.asciiF
-				return i.returnSlice(p, i.p)
-			}
-			outp++
-		} else if d := i.info.Decomposition(); d != nil {
-			// Note: If leading CCC != 0, then len(d) == 2 and last is also non-zero.
-			// Case 1: there is a leftover to copy.  In this case the decomposition
-			// must begin with a modifier and should always be appended.
-			// Case 2: no leftover. Simply return d if followed by a ccc == 0 value.
-			p := outp + len(d)
-			if outp > 0 {
-				i.rb.src.copySlice(i.buf[outCopyStart:], inCopyStart, i.p)
-				// TODO: this condition should not be possible, but we leave it
-				// in for defensive purposes.
-				if p > len(i.buf) {
-					return i.buf[:outp]
-				}
-			} else if i.info.multiSegment() {
-				// outp must be 0 as multi-segment decompositions always
-				// start a new segment.
-				if i.multiSeg == nil {
-					i.multiSeg = d
-					i.next = nextMulti
-					return nextMulti(i)
-				}
-				// We are in the last segment.  Treat as normal decomposition.
-				d = i.multiSeg
-				i.multiSeg = nil
-				p = len(d)
-			}
-			prevCC := i.info.tccc
-			if i.p += sz; i.p >= i.rb.nsrc {
-				i.setDone()
-				i.info = Properties{} // Force BoundaryBefore to succeed.
-			} else {
-				i.info = i.rb.f.info(i.rb.src, i.p)
-			}
-			switch i.rb.ss.next(i.info) {
-			case ssOverflow:
-				i.next = nextCGJDecompose
-				fallthrough
-			case ssStarter:
-				if outp > 0 {
-					copy(i.buf[outp:], d)
-					return i.buf[:p]
-				}
-				return d
-			}
-			copy(i.buf[outp:], d)
-			outp = p
-			inCopyStart, outCopyStart = i.p, outp
-			if i.info.ccc < prevCC {
-				goto doNorm
-			}
-			continue
-		} else if r := i.rb.src.hangul(i.p); r != 0 {
-			outp = decomposeHangul(i.buf[:], r)
-			i.p += hangulUTF8Size
-			inCopyStart, outCopyStart = i.p, outp
-			if i.p >= i.rb.nsrc {
-				i.setDone()
-				break
-			} else if i.rb.src.hangul(i.p) != 0 {
-				i.next = nextHangul
-				return i.buf[:outp]
-			}
-		} else {
-			p := outp + sz
-			if p > len(i.buf) {
-				break
-			}
-			outp = p
-			i.p += sz
-		}
-		if i.p >= i.rb.nsrc {
-			i.setDone()
-			break
-		}
-		prevCC := i.info.tccc
-		i.info = i.rb.f.info(i.rb.src, i.p)
-		if v := i.rb.ss.next(i.info); v == ssStarter {
-			break
-		} else if v == ssOverflow {
-			i.next = nextCGJDecompose
-			break
-		}
-		if i.info.ccc < prevCC {
-			goto doNorm
-		}
-	}
-	if outCopyStart == 0 {
-		return i.returnSlice(inCopyStart, i.p)
-	} else if inCopyStart < i.p {
-		i.rb.src.copySlice(i.buf[outCopyStart:], inCopyStart, i.p)
-	}
-	return i.buf[:outp]
-doNorm:
-	// Insert what we have decomposed so far in the reorderBuffer.
-	// As we will only reorder, there will always be enough room.
-	i.rb.src.copySlice(i.buf[outCopyStart:], inCopyStart, i.p)
-	i.rb.insertDecomposed(i.buf[0:outp])
-	return doNormDecomposed(i)
-}
-
-func doNormDecomposed(i *Iter) []byte {
-	for {
-		i.rb.insertUnsafe(i.rb.src, i.p, i.info)
-		if i.p += int(i.info.size); i.p >= i.rb.nsrc {
-			i.setDone()
-			break
-		}
-		i.info = i.rb.f.info(i.rb.src, i.p)
-		if i.info.ccc == 0 {
-			break
-		}
-		if s := i.rb.ss.next(i.info); s == ssOverflow {
-			i.next = nextCGJDecompose
-			break
-		}
-	}
-	// new segment or too many combining characters: exit normalization
-	return i.buf[:i.rb.flushCopy(i.buf[:])]
-}
-
-func nextCGJDecompose(i *Iter) []byte {
-	i.rb.ss = 0
-	i.rb.insertCGJ()
-	i.next = nextDecomposed
-	i.rb.ss.first(i.info)
-	buf := doNormDecomposed(i)
-	return buf
-}
-
-// nextComposed is the implementation of Next for forms NFC and NFKC.
-func nextComposed(i *Iter) []byte {
-	outp, startp := 0, i.p
-	var prevCC uint8
-	for {
-		if !i.info.isYesC() {
-			goto doNorm
-		}
-		prevCC = i.info.tccc
-		sz := int(i.info.size)
-		if sz == 0 {
-			sz = 1 // illegal rune: copy byte-by-byte
-		}
-		p := outp + sz
-		if p > len(i.buf) {
-			break
-		}
-		outp = p
-		i.p += sz
-		if i.p >= i.rb.nsrc {
-			i.setDone()
-			break
-		} else if i.rb.src._byte(i.p) < utf8.RuneSelf {
-			i.rb.ss = 0
-			i.next = i.asciiF
-			break
-		}
-		i.info = i.rb.f.info(i.rb.src, i.p)
-		if v := i.rb.ss.next(i.info); v == ssStarter {
-			break
-		} else if v == ssOverflow {
-			i.next = nextCGJCompose
-			break
-		}
-		if i.info.ccc < prevCC {
-			goto doNorm
-		}
-	}
-	return i.returnSlice(startp, i.p)
-doNorm:
-	// reset to start position
-	i.p = startp
-	i.info = i.rb.f.info(i.rb.src, i.p)
-	i.rb.ss.first(i.info)
-	if i.info.multiSegment() {
-		d := i.info.Decomposition()
-		info := i.rb.f.info(input{bytes: d}, 0)
-		i.rb.insertUnsafe(input{bytes: d}, 0, info)
-		i.multiSeg = d[int(info.size):]
-		i.next = nextMultiNorm
-		return nextMultiNorm(i)
-	}
-	i.rb.ss.first(i.info)
-	i.rb.insertUnsafe(i.rb.src, i.p, i.info)
-	return doNormComposed(i)
-}
-
-func doNormComposed(i *Iter) []byte {
-	// First rune should already be inserted.
-	for {
-		if i.p += int(i.info.size); i.p >= i.rb.nsrc {
-			i.setDone()
-			break
-		}
-		i.info = i.rb.f.info(i.rb.src, i.p)
-		if s := i.rb.ss.next(i.info); s == ssStarter {
-			break
-		} else if s == ssOverflow {
-			i.next = nextCGJCompose
-			break
-		}
-		i.rb.insertUnsafe(i.rb.src, i.p, i.info)
-	}
-	i.rb.compose()
-	seg := i.buf[:i.rb.flushCopy(i.buf[:])]
-	return seg
-}
-
-func nextCGJCompose(i *Iter) []byte {
-	i.rb.ss = 0 // instead of first
-	i.rb.insertCGJ()
-	i.next = nextComposed
-	// Note that we treat any rune with nLeadingNonStarters > 0 as a non-starter,
-	// even if they are not. This is particularly dubious for U+FF9E and UFF9A.
-	// If we ever change that, insert a check here.
-	i.rb.ss.first(i.info)
-	i.rb.insertUnsafe(i.rb.src, i.p, i.info)
-	return doNormComposed(i)
-}
diff --git a/go/mysql/collations/vindex/unicode/norm/normalize.go b/go/mysql/collations/vindex/unicode/norm/normalize.go
index 2a6964a41c4..eadfbf4a2c6 100644
--- a/go/mysql/collations/vindex/unicode/norm/normalize.go
+++ b/go/mysql/collations/vindex/unicode/norm/normalize.go
@@ -71,7 +71,7 @@ func patchTail(rb *reorderBuffer) bool {
 		rb.insertCGJ()
 		rb.ss = 0
 	}
-	rb.insertUnsafe(inputBytes(buf), 0, info)
+	rb.insertUnsafe(buf, 0, info)
 	return true
 }
 
@@ -152,12 +152,6 @@ func doAppendInner(rb *reorderBuffer, p int) []byte {
 	return rb.out
 }
 
-// AppendString returns f(append(out, []byte(s))).
-// The buffer out must be nil, empty, or equal to f(out).
-func (f Form) AppendString(out []byte, src string) []byte {
-	return f.doAppend(out, inputString(src), len(src))
-}
-
 // quickSpan returns a boundary n such that src[0:n] == f(src[0:n]) and
 // whether any non-normalized parts were found. If atEOF is false, n will
 // not point past the last segment if this segment might be become
@@ -249,12 +243,6 @@ func (f Form) firstBoundary(src input, nsrc int) int {
 	}
 }
 
-// FirstBoundaryInString returns the position i of the first boundary in s
-// or -1 if s contains no boundary.
-func (f Form) FirstBoundaryInString(s string) int {
-	return f.firstBoundary(inputString(s), len(s))
-}
-
 // decomposeSegment scans the first segment in src into rb. It inserts 0x034f
 // (Grapheme Joiner) when it encounters a sequence of more than 30 non-starters
 // and returns the number of bytes consumed from src or iShortDst or iShortSrc.