Skip to content

Commit

Permalink
Merge pull request #3458 from onflow/bastian/fix-string-replaceall
Browse files Browse the repository at this point in the history
  • Loading branch information
turbolent authored Jul 10, 2024
2 parents 33bf58a + 86215c4 commit c145b9f
Show file tree
Hide file tree
Showing 4 changed files with 245 additions and 151 deletions.
91 changes: 66 additions & 25 deletions runtime/interpreter/value.go
Original file line number Diff line number Diff line change
Expand Up @@ -1225,8 +1225,7 @@ func (v *StringValue) Concat(interpreter *Interpreter, other *StringValue, locat
memoryUsage := common.NewStringMemoryUsage(newLength)

// Meter computation as if the two strings were iterated.
length := len(v.Str) + len(other.Str)
interpreter.ReportComputation(common.ComputationKindLoop, uint(length))
interpreter.ReportComputation(common.ComputationKindLoop, uint(newLength))

return NewStringValue(
interpreter,
Expand Down Expand Up @@ -1479,17 +1478,22 @@ func (v *StringValue) GetMember(interpreter *Interpreter, locationRange Location
v,
sema.StringTypeReplaceAllFunctionType,
func(invocation Invocation) Value {
of, ok := invocation.Arguments[0].(*StringValue)
original, ok := invocation.Arguments[0].(*StringValue)
if !ok {
panic(errors.NewUnreachableError())
}

with, ok := invocation.Arguments[1].(*StringValue)
replacement, ok := invocation.Arguments[1].(*StringValue)
if !ok {
panic(errors.NewUnreachableError())
}

return v.ReplaceAll(invocation.Interpreter, invocation.LocationRange, of.Str, with.Str)
return v.ReplaceAll(
invocation.Interpreter,
invocation.LocationRange,
original,
replacement,
)
},
)
}
Expand Down Expand Up @@ -1580,7 +1584,7 @@ func (v *StringValue) Split(inter *Interpreter, locationRange LocationRange, sep
return remaining
}

separatorCharacterIndex := remaining.indexOf(inter, separator)
separatorCharacterIndex, _ := remaining.indexOf(inter, separator)
if separatorCharacterIndex < 0 {
return nil
}
Expand Down Expand Up @@ -1638,23 +1642,62 @@ func (v *StringValue) Explode(inter *Interpreter, locationRange LocationRange) *
)
}

func (v *StringValue) ReplaceAll(inter *Interpreter, _ LocationRange, of string, with string) *StringValue {
// Over-estimate the resulting string length.
// In the worst case, `of` can be empty in which case, `with` will be added at every index.
// e.g. `of` = "", `v` = "ABC", `with` = "1": result = "1A1B1C1".
strLen := len(v.Str)
lengthOverEstimate := (2*strLen + 1) * len(with)
func (v *StringValue) ReplaceAll(
inter *Interpreter,
locationRange LocationRange,
original *StringValue,
replacement *StringValue,
) *StringValue {

memoryUsage := common.NewStringMemoryUsage(lengthOverEstimate)
count := v.count(inter, locationRange, original)
if count == 0 {
return v
}

newByteLength := len(v.Str) + count*(len(replacement.Str)-len(original.Str))

memoryUsage := common.NewStringMemoryUsage(newByteLength)

// Meter computation as if the string was iterated.
inter.ReportComputation(common.ComputationKindLoop, uint(strLen))
inter.ReportComputation(common.ComputationKindLoop, uint(len(v.Str)))

remaining := v

return NewStringValue(
inter,
memoryUsage,
func() string {
return strings.ReplaceAll(v.Str, of, with)
var b strings.Builder
b.Grow(newByteLength)
for i := 0; i < count; i++ {

var originalCharacterIndex, originalByteOffset int
if original.Length() == 0 {
if i > 0 {
originalCharacterIndex = 1

remaining.prepareGraphemes()
remaining.graphemes.Next()
_, originalByteOffset = remaining.graphemes.Positions()
}
} else {
originalCharacterIndex, originalByteOffset = remaining.indexOf(inter, original)
if originalCharacterIndex < 0 {
panic(errors.NewUnreachableError())
}
}

b.WriteString(remaining.Str[:originalByteOffset])
b.WriteString(replacement.Str)

remaining = remaining.slice(
originalCharacterIndex+original.Length(),
remaining.Length(),
locationRange,
)
}
b.WriteString(remaining.Str)
return b.String()
},
)
}
Expand Down Expand Up @@ -1865,14 +1908,14 @@ func (v *StringValue) isGraphemeBoundaryEndPrepared(end int) bool {
}

func (v *StringValue) IndexOf(inter *Interpreter, other *StringValue) IntValue {
index := v.indexOf(inter, other)
index, _ := v.indexOf(inter, other)
return NewIntValueFromInt64(inter, int64(index))
}

func (v *StringValue) indexOf(inter *Interpreter, other *StringValue) int {
func (v *StringValue) indexOf(inter *Interpreter, other *StringValue) (characterIndex int, byteOffset int) {

if len(other.Str) == 0 {
return 0
return 0, 0
}

// Meter computation as if the string was iterated.
Expand All @@ -1885,9 +1928,6 @@ func (v *StringValue) indexOf(inter *Interpreter, other *StringValue) int {
// - 'CharacterIndex' indicates Cadence characters (grapheme clusters)
// - 'ByteOffset' indicates bytes

// The resulting index, in terms of Cadence characters (grapheme clusters)
var characterIndex int

// Find the position of the substring in the string,
// by using strings.Index with an increasing start byte offset.
//
Expand Down Expand Up @@ -1923,19 +1963,20 @@ func (v *StringValue) indexOf(inter *Interpreter, other *StringValue) int {
if v.seekGraphemeBoundaryStartPrepared(absoluteFoundByteOffset, &characterIndex) &&
v.isGraphemeBoundaryEndPrepared(absoluteFoundByteOffset+len(other.Str)) {

return characterIndex
return characterIndex, absoluteFoundByteOffset
}

// Restore the grapheme iterator and character index
v.graphemes = &graphemesBackup
characterIndex = characterIndexBackup
}

return -1
return -1, -1
}

func (v *StringValue) Contains(inter *Interpreter, other *StringValue) BoolValue {
return AsBoolValue(v.indexOf(inter, other) >= 0)
characterIndex, _ := v.indexOf(inter, other)
return AsBoolValue(characterIndex >= 0)
}

func (v *StringValue) Count(inter *Interpreter, locationRange LocationRange, other *StringValue) IntValue {
Expand All @@ -1955,7 +1996,7 @@ func (v *StringValue) count(inter *Interpreter, locationRange LocationRange, oth
count := 0

for {
index := remaining.indexOf(inter, other)
index, _ := remaining.indexOf(inter, other)
if index == -1 {
return count
}
Expand Down
182 changes: 92 additions & 90 deletions runtime/sema/string_type.go
Original file line number Diff line number Diff line change
Expand Up @@ -22,26 +22,101 @@ import (
"github.com/onflow/cadence/runtime/errors"
)

var StringTypeEncodeHexFunctionType = NewSimpleFunctionType(
FunctionPurityView,
[]Parameter{
{
Label: ArgumentLabelNotRequired,
Identifier: "data",
TypeAnnotation: ByteArrayTypeAnnotation,
},
},
StringTypeAnnotation,
)

const StringTypeEncodeHexFunctionName = "encodeHex"
const StringTypeEncodeHexFunctionDocString = `
Returns a hexadecimal string for the given byte array
`

var StringTypeFromUtf8FunctionType = NewSimpleFunctionType(
FunctionPurityView,
[]Parameter{
{
Label: ArgumentLabelNotRequired,
Identifier: "bytes",
TypeAnnotation: ByteArrayTypeAnnotation,
},
},
NewTypeAnnotation(
&OptionalType{
Type: StringType,
},
),
)

const StringTypeFromUtf8FunctionName = "fromUTF8"
const StringTypeFromUtf8FunctionDocString = `
Attempt to decode the input as a UTF-8 encoded string. Returns nil if the input bytes are malformed UTF-8
`

var StringTypeFromCharactersFunctionType = NewSimpleFunctionType(
FunctionPurityView,
[]Parameter{
{
Label: ArgumentLabelNotRequired,
Identifier: "characters",
TypeAnnotation: NewTypeAnnotation(&VariableSizedType{
Type: CharacterType,
}),
},
},
StringTypeAnnotation,
)

const StringTypeFromCharactersFunctionName = "fromCharacters"
const StringTypeFromCharactersFunctionDocString = `
Returns a string from the given array of characters
`

var StringTypeJoinFunctionType = NewSimpleFunctionType(
FunctionPurityView,
[]Parameter{
{
Label: ArgumentLabelNotRequired,
Identifier: "strings",
TypeAnnotation: NewTypeAnnotation(&VariableSizedType{
Type: StringType,
}),
},
{
Identifier: "separator",
TypeAnnotation: NewTypeAnnotation(StringType),
},
},
StringTypeAnnotation,
)

const StringTypeJoinFunctionName = "join"
const StringTypeJoinFunctionDocString = `
Returns a string after joining the array of strings with the provided separator.
`

var StringTypeSplitFunctionType = NewSimpleFunctionType(
FunctionPurityView,
[]Parameter{
{
Identifier: "separator",
TypeAnnotation: StringTypeAnnotation,
},
},
NewTypeAnnotation(
&VariableSizedType{
Type: StringType,
},
),
)

const StringTypeSplitFunctionName = "split"
const StringTypeSplitFunctionDocString = `
Returns a variable-sized array of strings after splitting the string on the delimiter.
Expand Down Expand Up @@ -246,6 +321,23 @@ Returns the number of non-overlapping instances of the given substring in this s
If the given substring is an empty string, the function returns 1 + the number of characters in this string.
`

var StringTypeReplaceAllFunctionType = NewSimpleFunctionType(
FunctionPurityView,
[]Parameter{
{
Label: "of",
Identifier: "old",
TypeAnnotation: StringTypeAnnotation,
},
{
Label: "with",
Identifier: "replacement",
TypeAnnotation: StringTypeAnnotation,
},
},
StringTypeAnnotation,
)

const StringTypeReplaceAllFunctionName = "replaceAll"
const StringTypeReplaceAllFunctionDocString = `
Returns a new string after replacing all the occurrences of parameter ` + "`of` with the parameter `with`" + `.
Expand Down Expand Up @@ -376,93 +468,3 @@ var StringFunctionType = func() *FunctionType {

return functionType
}()

var StringTypeEncodeHexFunctionType = NewSimpleFunctionType(
FunctionPurityView,
[]Parameter{
{
Label: ArgumentLabelNotRequired,
Identifier: "data",
TypeAnnotation: ByteArrayTypeAnnotation,
},
},
StringTypeAnnotation,
)

var StringTypeFromUtf8FunctionType = NewSimpleFunctionType(
FunctionPurityView,
[]Parameter{
{
Label: ArgumentLabelNotRequired,
Identifier: "bytes",
TypeAnnotation: ByteArrayTypeAnnotation,
},
},
NewTypeAnnotation(
&OptionalType{
Type: StringType,
},
),
)

var StringTypeFromCharactersFunctionType = NewSimpleFunctionType(
FunctionPurityView,
[]Parameter{
{
Label: ArgumentLabelNotRequired,
Identifier: "characters",
TypeAnnotation: NewTypeAnnotation(&VariableSizedType{
Type: CharacterType,
}),
},
},
StringTypeAnnotation,
)

var StringTypeJoinFunctionType = NewSimpleFunctionType(
FunctionPurityView,
[]Parameter{
{
Label: ArgumentLabelNotRequired,
Identifier: "strings",
TypeAnnotation: NewTypeAnnotation(&VariableSizedType{
Type: StringType,
}),
},
{
Identifier: "separator",
TypeAnnotation: NewTypeAnnotation(StringType),
},
},
StringTypeAnnotation,
)

var StringTypeSplitFunctionType = NewSimpleFunctionType(
FunctionPurityView,
[]Parameter{
{
Identifier: "separator",
TypeAnnotation: StringTypeAnnotation,
},
},
NewTypeAnnotation(
&VariableSizedType{
Type: StringType,
},
),
)

var StringTypeReplaceAllFunctionType = NewSimpleFunctionType(
FunctionPurityView,
[]Parameter{
{
Identifier: "of",
TypeAnnotation: StringTypeAnnotation,
},
{
Identifier: "with",
TypeAnnotation: StringTypeAnnotation,
},
},
StringTypeAnnotation,
)
Loading

0 comments on commit c145b9f

Please sign in to comment.