Skip to content

Commit

Permalink
optimize String.contains, add helpers for grapheme cluster boundaries
Browse files Browse the repository at this point in the history
  • Loading branch information
turbolent committed Jul 8, 2024
1 parent b48e8e2 commit 1fcafe0
Show file tree
Hide file tree
Showing 2 changed files with 132 additions and 20 deletions.
70 changes: 50 additions & 20 deletions runtime/interpreter/value.go
Original file line number Diff line number Diff line change
Expand Up @@ -1703,6 +1703,48 @@ func (v *StringValue) ForEach(
}
}

func (v *StringValue) IsBoundaryStart(start int) bool {
v.prepareGraphemes()
return v.isGraphemeBoundaryStartPrepared(start)
}

func (v *StringValue) isGraphemeBoundaryStartPrepared(start int) bool {

for {
boundaryStart, _ := v.graphemes.Positions()
if start == boundaryStart {
return true
} else if boundaryStart > start {
return false
}

if !v.graphemes.Next() {
return false
}
}
}

func (v *StringValue) IsBoundaryEnd(end int) bool {
v.prepareGraphemes()
return v.isGraphemeBoundaryEndPrepared(end)
}

func (v *StringValue) isGraphemeBoundaryEndPrepared(end int) bool {

for {
_, boundaryEnd := v.graphemes.Positions()
if end == boundaryEnd {
return true
} else if boundaryEnd > end {
return false
}

if !v.graphemes.Next() {
return false
}
}
}

func (v *StringValue) Contains(inter *Interpreter, other *StringValue) BoolValue {

// Meter computation as if the string was iterated.
Expand All @@ -1711,29 +1753,17 @@ func (v *StringValue) Contains(inter *Interpreter, other *StringValue) BoolValue

v.prepareGraphemes()

for {
start, _ := v.graphemes.Positions()
remainder := v.Str[start:]
if strings.HasPrefix(remainder, other.Str) {
for start := 0; start < len(v.Str); start++ {

// Check the end is a grapheme cluster boundary
expectedEnd := start + len(other.Str)
for {
_, end := v.graphemes.Positions()
if end == expectedEnd {
return TrueValue
} else if end > expectedEnd {
return FalseValue
}

if !v.graphemes.Next() {
break
}
}
start = strings.Index(v.Str[start:], other.Str)
if start < 0 {
break
}

if !v.graphemes.Next() {
break
if v.isGraphemeBoundaryStartPrepared(start) &&
v.isGraphemeBoundaryEndPrepared(start+len(other.Str)) {

return TrueValue
}
}

Expand Down
82 changes: 82 additions & 0 deletions runtime/interpreter/value_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -4391,3 +4391,85 @@ func TestValue_ConformsToStaticType(t *testing.T) {
})

}

func TestStringIsBoundaryStart(t *testing.T) {

t.Parallel()

test := func(s string, i int, expected bool) {

name := fmt.Sprintf("%s, %d", s, i)

t.Run(name, func(t *testing.T) {
str := NewUnmeteredStringValue(s)
assert.Equal(t, expected, str.IsBoundaryStart(i))
})
}

test("", 0, true)
test("a", 0, true)
test("a", 1, false)
test("ab", 1, true)

// πŸ‡ͺπŸ‡ΈπŸ‡ͺπŸ‡ͺ ("ES", "EE")
flagESflagEE := "\U0001F1EA\U0001F1F8\U0001F1EA\U0001F1EA"
require.Len(t, flagESflagEE, 16)
test(flagESflagEE, 0, true)
test(flagESflagEE, 1, false)
test(flagESflagEE, 2, false)
test(flagESflagEE, 3, false)
test(flagESflagEE, 4, false)
test(flagESflagEE, 5, false)
test(flagESflagEE, 6, false)
test(flagESflagEE, 7, false)

test(flagESflagEE, 8, true)
test(flagESflagEE, 9, false)
test(flagESflagEE, 10, false)
test(flagESflagEE, 11, false)
test(flagESflagEE, 12, false)
test(flagESflagEE, 13, false)
test(flagESflagEE, 14, false)
test(flagESflagEE, 15, false)
}

func TestStringIsBoundaryEnd(t *testing.T) {

t.Parallel()

test := func(s string, i int, expected bool) {

name := fmt.Sprintf("%s, %d", s, i)

t.Run(name, func(t *testing.T) {
str := NewUnmeteredStringValue(s)
assert.Equal(t, expected, str.IsBoundaryEnd(i))
})
}

test("", 0, true)
test("a", 0, true)
test("a", 1, true)
test("ab", 1, true)

// πŸ‡ͺπŸ‡ΈπŸ‡ͺπŸ‡ͺ ("ES", "EE")
flagESflagEE := "\U0001F1EA\U0001F1F8\U0001F1EA\U0001F1EA"
require.Len(t, flagESflagEE, 16)
test(flagESflagEE, 0, true)
test(flagESflagEE, 1, false)
test(flagESflagEE, 2, false)
test(flagESflagEE, 3, false)
test(flagESflagEE, 4, false)
test(flagESflagEE, 5, false)
test(flagESflagEE, 6, false)
test(flagESflagEE, 7, false)

test(flagESflagEE, 8, true)
test(flagESflagEE, 9, false)
test(flagESflagEE, 10, false)
test(flagESflagEE, 11, false)
test(flagESflagEE, 12, false)
test(flagESflagEE, 13, false)
test(flagESflagEE, 14, false)
test(flagESflagEE, 15, false)
}

0 comments on commit 1fcafe0

Please sign in to comment.