Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix String.replaceAll #3458

Merged
merged 3 commits into from
Jul 10, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
91 changes: 66 additions & 25 deletions runtime/interpreter/value.go
Original file line number Diff line number Diff line change
Expand Up @@ -1225,8 +1225,7 @@ func (v *StringValue) Concat(interpreter *Interpreter, other *StringValue, locat
memoryUsage := common.NewStringMemoryUsage(newLength)

// Meter computation as if the two strings were iterated.
length := len(v.Str) + len(other.Str)
interpreter.ReportComputation(common.ComputationKindLoop, uint(length))
interpreter.ReportComputation(common.ComputationKindLoop, uint(newLength))

return NewStringValue(
interpreter,
Expand Down Expand Up @@ -1479,17 +1478,22 @@ func (v *StringValue) GetMember(interpreter *Interpreter, locationRange Location
v,
sema.StringTypeReplaceAllFunctionType,
func(invocation Invocation) Value {
of, ok := invocation.Arguments[0].(*StringValue)
original, ok := invocation.Arguments[0].(*StringValue)
if !ok {
panic(errors.NewUnreachableError())
}

with, ok := invocation.Arguments[1].(*StringValue)
replacement, ok := invocation.Arguments[1].(*StringValue)
if !ok {
panic(errors.NewUnreachableError())
}

return v.ReplaceAll(invocation.Interpreter, invocation.LocationRange, of.Str, with.Str)
return v.ReplaceAll(
invocation.Interpreter,
invocation.LocationRange,
original,
replacement,
)
},
)
}
Expand Down Expand Up @@ -1580,7 +1584,7 @@ func (v *StringValue) Split(inter *Interpreter, locationRange LocationRange, sep
return remaining
}

separatorCharacterIndex := remaining.indexOf(inter, separator)
separatorCharacterIndex, _ := remaining.indexOf(inter, separator)
if separatorCharacterIndex < 0 {
return nil
}
Expand Down Expand Up @@ -1638,23 +1642,62 @@ func (v *StringValue) Explode(inter *Interpreter, locationRange LocationRange) *
)
}

func (v *StringValue) ReplaceAll(inter *Interpreter, _ LocationRange, of string, with string) *StringValue {
// Over-estimate the resulting string length.
// In the worst case, `of` can be empty in which case, `with` will be added at every index.
// e.g. `of` = "", `v` = "ABC", `with` = "1": result = "1A1B1C1".
strLen := len(v.Str)
lengthOverEstimate := (2*strLen + 1) * len(with)
func (v *StringValue) ReplaceAll(
inter *Interpreter,
locationRange LocationRange,
original *StringValue,
replacement *StringValue,
) *StringValue {

memoryUsage := common.NewStringMemoryUsage(lengthOverEstimate)
count := v.count(inter, locationRange, original)
if count == 0 {
return v
}

newByteLength := len(v.Str) + count*(len(replacement.Str)-len(original.Str))

memoryUsage := common.NewStringMemoryUsage(newByteLength)

// Meter computation as if the string was iterated.
inter.ReportComputation(common.ComputationKindLoop, uint(strLen))
inter.ReportComputation(common.ComputationKindLoop, uint(len(v.Str)))

remaining := v

return NewStringValue(
inter,
memoryUsage,
func() string {
return strings.ReplaceAll(v.Str, of, with)
var b strings.Builder
b.Grow(newByteLength)
for i := 0; i < count; i++ {

var originalCharacterIndex, originalByteOffset int
if original.Length() == 0 {
if i > 0 {
originalCharacterIndex = 1

remaining.prepareGraphemes()
remaining.graphemes.Next()
_, originalByteOffset = remaining.graphemes.Positions()
}
} else {
originalCharacterIndex, originalByteOffset = remaining.indexOf(inter, original)
if originalCharacterIndex < 0 {
panic(errors.NewUnreachableError())
}
}

b.WriteString(remaining.Str[:originalByteOffset])
b.WriteString(replacement.Str)

remaining = remaining.slice(
originalCharacterIndex+original.Length(),
remaining.Length(),
locationRange,
)
}
b.WriteString(remaining.Str)
return b.String()
},
)
}
Expand Down Expand Up @@ -1865,14 +1908,14 @@ func (v *StringValue) isGraphemeBoundaryEndPrepared(end int) bool {
}

func (v *StringValue) IndexOf(inter *Interpreter, other *StringValue) IntValue {
index := v.indexOf(inter, other)
index, _ := v.indexOf(inter, other)
return NewIntValueFromInt64(inter, int64(index))
}

func (v *StringValue) indexOf(inter *Interpreter, other *StringValue) int {
func (v *StringValue) indexOf(inter *Interpreter, other *StringValue) (characterIndex int, byteOffset int) {

if len(other.Str) == 0 {
return 0
return 0, 0
}

// Meter computation as if the string was iterated.
Expand All @@ -1885,9 +1928,6 @@ func (v *StringValue) indexOf(inter *Interpreter, other *StringValue) int {
// - 'CharacterIndex' indicates Cadence characters (grapheme clusters)
// - 'ByteOffset' indicates bytes

// The resulting index, in terms of Cadence characters (grapheme clusters)
var characterIndex int

// Find the position of the substring in the string,
// by using strings.Index with an increasing start byte offset.
//
Expand Down Expand Up @@ -1923,19 +1963,20 @@ func (v *StringValue) indexOf(inter *Interpreter, other *StringValue) int {
if v.seekGraphemeBoundaryStartPrepared(absoluteFoundByteOffset, &characterIndex) &&
v.isGraphemeBoundaryEndPrepared(absoluteFoundByteOffset+len(other.Str)) {

return characterIndex
return characterIndex, absoluteFoundByteOffset
}

// Restore the grapheme iterator and character index
v.graphemes = &graphemesBackup
characterIndex = characterIndexBackup
}

return -1
return -1, -1
}

func (v *StringValue) Contains(inter *Interpreter, other *StringValue) BoolValue {
return AsBoolValue(v.indexOf(inter, other) >= 0)
characterIndex, _ := v.indexOf(inter, other)
return AsBoolValue(characterIndex >= 0)
}

func (v *StringValue) Count(inter *Interpreter, locationRange LocationRange, other *StringValue) IntValue {
Expand All @@ -1955,7 +1996,7 @@ func (v *StringValue) count(inter *Interpreter, locationRange LocationRange, oth
count := 0

for {
index := remaining.indexOf(inter, other)
index, _ := remaining.indexOf(inter, other)
if index == -1 {
return count
}
Expand Down
182 changes: 92 additions & 90 deletions runtime/sema/string_type.go
Original file line number Diff line number Diff line change
Expand Up @@ -22,26 +22,101 @@ import (
"github.com/onflow/cadence/runtime/errors"
)

var StringTypeEncodeHexFunctionType = NewSimpleFunctionType(
FunctionPurityView,
[]Parameter{
{
Label: ArgumentLabelNotRequired,
Identifier: "data",
TypeAnnotation: ByteArrayTypeAnnotation,
},
},
StringTypeAnnotation,
)

const StringTypeEncodeHexFunctionName = "encodeHex"
const StringTypeEncodeHexFunctionDocString = `
Returns a hexadecimal string for the given byte array
`

var StringTypeFromUtf8FunctionType = NewSimpleFunctionType(
FunctionPurityView,
[]Parameter{
{
Label: ArgumentLabelNotRequired,
Identifier: "bytes",
TypeAnnotation: ByteArrayTypeAnnotation,
},
},
NewTypeAnnotation(
&OptionalType{
Type: StringType,
},
),
)

const StringTypeFromUtf8FunctionName = "fromUTF8"
const StringTypeFromUtf8FunctionDocString = `
Attempt to decode the input as a UTF-8 encoded string. Returns nil if the input bytes are malformed UTF-8
`

var StringTypeFromCharactersFunctionType = NewSimpleFunctionType(
FunctionPurityView,
[]Parameter{
{
Label: ArgumentLabelNotRequired,
Identifier: "characters",
TypeAnnotation: NewTypeAnnotation(&VariableSizedType{
Type: CharacterType,
}),
},
},
StringTypeAnnotation,
)

const StringTypeFromCharactersFunctionName = "fromCharacters"
const StringTypeFromCharactersFunctionDocString = `
Returns a string from the given array of characters
`

var StringTypeJoinFunctionType = NewSimpleFunctionType(
FunctionPurityView,
[]Parameter{
{
Label: ArgumentLabelNotRequired,
Identifier: "strings",
TypeAnnotation: NewTypeAnnotation(&VariableSizedType{
Type: StringType,
}),
},
{
Identifier: "separator",
TypeAnnotation: NewTypeAnnotation(StringType),
},
},
StringTypeAnnotation,
)

const StringTypeJoinFunctionName = "join"
const StringTypeJoinFunctionDocString = `
Returns a string after joining the array of strings with the provided separator.
`

var StringTypeSplitFunctionType = NewSimpleFunctionType(
FunctionPurityView,
[]Parameter{
{
Identifier: "separator",
TypeAnnotation: StringTypeAnnotation,
},
},
NewTypeAnnotation(
&VariableSizedType{
Type: StringType,
},
),
)

const StringTypeSplitFunctionName = "split"
const StringTypeSplitFunctionDocString = `
Returns a variable-sized array of strings after splitting the string on the delimiter.
Expand Down Expand Up @@ -246,6 +321,23 @@ Returns the number of non-overlapping instances of the given substring in this s
If the given substring is an empty string, the function returns 1 + the number of characters in this string.
`

var StringTypeReplaceAllFunctionType = NewSimpleFunctionType(
FunctionPurityView,
[]Parameter{
{
Label: "of",
Identifier: "old",
TypeAnnotation: StringTypeAnnotation,
},
{
Label: "with",
Identifier: "replacement",
TypeAnnotation: StringTypeAnnotation,
},
},
StringTypeAnnotation,
)

const StringTypeReplaceAllFunctionName = "replaceAll"
const StringTypeReplaceAllFunctionDocString = `
Returns a new string after replacing all the occurrences of parameter ` + "`of` with the parameter `with`" + `.
Expand Down Expand Up @@ -376,93 +468,3 @@ var StringFunctionType = func() *FunctionType {

return functionType
}()

var StringTypeEncodeHexFunctionType = NewSimpleFunctionType(
FunctionPurityView,
[]Parameter{
{
Label: ArgumentLabelNotRequired,
Identifier: "data",
TypeAnnotation: ByteArrayTypeAnnotation,
},
},
StringTypeAnnotation,
)

var StringTypeFromUtf8FunctionType = NewSimpleFunctionType(
FunctionPurityView,
[]Parameter{
{
Label: ArgumentLabelNotRequired,
Identifier: "bytes",
TypeAnnotation: ByteArrayTypeAnnotation,
},
},
NewTypeAnnotation(
&OptionalType{
Type: StringType,
},
),
)

var StringTypeFromCharactersFunctionType = NewSimpleFunctionType(
FunctionPurityView,
[]Parameter{
{
Label: ArgumentLabelNotRequired,
Identifier: "characters",
TypeAnnotation: NewTypeAnnotation(&VariableSizedType{
Type: CharacterType,
}),
},
},
StringTypeAnnotation,
)

var StringTypeJoinFunctionType = NewSimpleFunctionType(
FunctionPurityView,
[]Parameter{
{
Label: ArgumentLabelNotRequired,
Identifier: "strings",
TypeAnnotation: NewTypeAnnotation(&VariableSizedType{
Type: StringType,
}),
},
{
Identifier: "separator",
TypeAnnotation: NewTypeAnnotation(StringType),
},
},
StringTypeAnnotation,
)

var StringTypeSplitFunctionType = NewSimpleFunctionType(
FunctionPurityView,
[]Parameter{
{
Identifier: "separator",
TypeAnnotation: StringTypeAnnotation,
},
},
NewTypeAnnotation(
&VariableSizedType{
Type: StringType,
},
),
)

var StringTypeReplaceAllFunctionType = NewSimpleFunctionType(
FunctionPurityView,
[]Parameter{
{
Identifier: "of",
TypeAnnotation: StringTypeAnnotation,
},
{
Identifier: "with",
TypeAnnotation: StringTypeAnnotation,
},
},
StringTypeAnnotation,
)
Loading
Loading