Merge pull request #3458 from onflow/bastian/fix-string-replaceall

onflow · Jul 10, 2024 · c145b9f · c145b9f
2 parents 33bf58a + 86215c4
commit c145b9f
Show file tree

Hide file tree

Showing 4 changed files with 245 additions and 151 deletions.
diff --git a/runtime/interpreter/value.go b/runtime/interpreter/value.go
@@ -1225,8 +1225,7 @@ func (v *StringValue) Concat(interpreter *Interpreter, other *StringValue, locat
 	memoryUsage := common.NewStringMemoryUsage(newLength)
 
 	// Meter computation as if the two strings were iterated.
-	length := len(v.Str) + len(other.Str)
-	interpreter.ReportComputation(common.ComputationKindLoop, uint(length))
+	interpreter.ReportComputation(common.ComputationKindLoop, uint(newLength))
 
 	return NewStringValue(
 		interpreter,
@@ -1479,17 +1478,22 @@ func (v *StringValue) GetMember(interpreter *Interpreter, locationRange Location
 			v,
 			sema.StringTypeReplaceAllFunctionType,
 			func(invocation Invocation) Value {
-				of, ok := invocation.Arguments[0].(*StringValue)
+				original, ok := invocation.Arguments[0].(*StringValue)
 				if !ok {
 					panic(errors.NewUnreachableError())
 				}
 
-				with, ok := invocation.Arguments[1].(*StringValue)
+				replacement, ok := invocation.Arguments[1].(*StringValue)
 				if !ok {
 					panic(errors.NewUnreachableError())
 				}
 
-				return v.ReplaceAll(invocation.Interpreter, invocation.LocationRange, of.Str, with.Str)
+				return v.ReplaceAll(
+					invocation.Interpreter,
+					invocation.LocationRange,
+					original,
+					replacement,
+				)
 			},
 		)
 	}
@@ -1580,7 +1584,7 @@ func (v *StringValue) Split(inter *Interpreter, locationRange LocationRange, sep
 				return remaining
 			}
 
-			separatorCharacterIndex := remaining.indexOf(inter, separator)
+			separatorCharacterIndex, _ := remaining.indexOf(inter, separator)
 			if separatorCharacterIndex < 0 {
 				return nil
 			}
@@ -1638,23 +1642,62 @@ func (v *StringValue) Explode(inter *Interpreter, locationRange LocationRange) *
 	)
 }
 
-func (v *StringValue) ReplaceAll(inter *Interpreter, _ LocationRange, of string, with string) *StringValue {
-	// Over-estimate the resulting string length.
-	// In the worst case, `of` can be empty in which case, `with` will be added at every index.
-	// e.g. `of` = "", `v` = "ABC", `with` = "1": result = "1A1B1C1".
-	strLen := len(v.Str)
-	lengthOverEstimate := (2*strLen + 1) * len(with)
+func (v *StringValue) ReplaceAll(
+	inter *Interpreter,
+	locationRange LocationRange,
+	original *StringValue,
+	replacement *StringValue,
+) *StringValue {
 
-	memoryUsage := common.NewStringMemoryUsage(lengthOverEstimate)
+	count := v.count(inter, locationRange, original)
+	if count == 0 {
+		return v
+	}
+
+	newByteLength := len(v.Str) + count*(len(replacement.Str)-len(original.Str))
+
+	memoryUsage := common.NewStringMemoryUsage(newByteLength)
 
 	// Meter computation as if the string was iterated.
-	inter.ReportComputation(common.ComputationKindLoop, uint(strLen))
+	inter.ReportComputation(common.ComputationKindLoop, uint(len(v.Str)))
+
+	remaining := v
 
 	return NewStringValue(
 		inter,
 		memoryUsage,
 		func() string {
-			return strings.ReplaceAll(v.Str, of, with)
+			var b strings.Builder
+			b.Grow(newByteLength)
+			for i := 0; i < count; i++ {
+
+				var originalCharacterIndex, originalByteOffset int
+				if original.Length() == 0 {
+					if i > 0 {
+						originalCharacterIndex = 1
+
+						remaining.prepareGraphemes()
+						remaining.graphemes.Next()
+						_, originalByteOffset = remaining.graphemes.Positions()
+					}
+				} else {
+					originalCharacterIndex, originalByteOffset = remaining.indexOf(inter, original)
+					if originalCharacterIndex < 0 {
+						panic(errors.NewUnreachableError())
+					}
+				}
+
+				b.WriteString(remaining.Str[:originalByteOffset])
+				b.WriteString(replacement.Str)
+
+				remaining = remaining.slice(
+					originalCharacterIndex+original.Length(),
+					remaining.Length(),
+					locationRange,
+				)
+			}
+			b.WriteString(remaining.Str)
+			return b.String()
 		},
 	)
 }
@@ -1865,14 +1908,14 @@ func (v *StringValue) isGraphemeBoundaryEndPrepared(end int) bool {
 }
 
 func (v *StringValue) IndexOf(inter *Interpreter, other *StringValue) IntValue {
-	index := v.indexOf(inter, other)
+	index, _ := v.indexOf(inter, other)
 	return NewIntValueFromInt64(inter, int64(index))
 }
 
-func (v *StringValue) indexOf(inter *Interpreter, other *StringValue) int {
+func (v *StringValue) indexOf(inter *Interpreter, other *StringValue) (characterIndex int, byteOffset int) {
 
 	if len(other.Str) == 0 {
-		return 0
+		return 0, 0
 	}
 
 	// Meter computation as if the string was iterated.
@@ -1885,9 +1928,6 @@ func (v *StringValue) indexOf(inter *Interpreter, other *StringValue) int {
 	// - 'CharacterIndex' indicates Cadence characters (grapheme clusters)
 	// - 'ByteOffset' indicates bytes
 
-	// The resulting index, in terms of Cadence characters (grapheme clusters)
-	var characterIndex int
-
 	// Find the position of the substring in the string,
 	// by using strings.Index with an increasing start byte offset.
 	//
@@ -1923,19 +1963,20 @@ func (v *StringValue) indexOf(inter *Interpreter, other *StringValue) int {
 		if v.seekGraphemeBoundaryStartPrepared(absoluteFoundByteOffset, &characterIndex) &&
 			v.isGraphemeBoundaryEndPrepared(absoluteFoundByteOffset+len(other.Str)) {
 
-			return characterIndex
+			return characterIndex, absoluteFoundByteOffset
 		}
 
 		// Restore the grapheme iterator and character index
 		v.graphemes = &graphemesBackup
 		characterIndex = characterIndexBackup
 	}
 
-	return -1
+	return -1, -1
 }
 
 func (v *StringValue) Contains(inter *Interpreter, other *StringValue) BoolValue {
-	return AsBoolValue(v.indexOf(inter, other) >= 0)
+	characterIndex, _ := v.indexOf(inter, other)
+	return AsBoolValue(characterIndex >= 0)
 }
 
 func (v *StringValue) Count(inter *Interpreter, locationRange LocationRange, other *StringValue) IntValue {
@@ -1955,7 +1996,7 @@ func (v *StringValue) count(inter *Interpreter, locationRange LocationRange, oth
 	count := 0
 
 	for {
-		index := remaining.indexOf(inter, other)
+		index, _ := remaining.indexOf(inter, other)
 		if index == -1 {
 			return count
 		}

diff --git a/runtime/sema/string_type.go b/runtime/sema/string_type.go
@@ -22,26 +22,101 @@ import (
 	"github.com/onflow/cadence/runtime/errors"
 )
 
+var StringTypeEncodeHexFunctionType = NewSimpleFunctionType(
+	FunctionPurityView,
+	[]Parameter{
+		{
+			Label:          ArgumentLabelNotRequired,
+			Identifier:     "data",
+			TypeAnnotation: ByteArrayTypeAnnotation,
+		},
+	},
+	StringTypeAnnotation,
+)
+
 const StringTypeEncodeHexFunctionName = "encodeHex"
 const StringTypeEncodeHexFunctionDocString = `
 Returns a hexadecimal string for the given byte array
 `
 
+var StringTypeFromUtf8FunctionType = NewSimpleFunctionType(
+	FunctionPurityView,
+	[]Parameter{
+		{
+			Label:          ArgumentLabelNotRequired,
+			Identifier:     "bytes",
+			TypeAnnotation: ByteArrayTypeAnnotation,
+		},
+	},
+	NewTypeAnnotation(
+		&OptionalType{
+			Type: StringType,
+		},
+	),
+)
+
 const StringTypeFromUtf8FunctionName = "fromUTF8"
 const StringTypeFromUtf8FunctionDocString = `
 Attempt to decode the input as a UTF-8 encoded string. Returns nil if the input bytes are malformed UTF-8
 `
 
+var StringTypeFromCharactersFunctionType = NewSimpleFunctionType(
+	FunctionPurityView,
+	[]Parameter{
+		{
+			Label:      ArgumentLabelNotRequired,
+			Identifier: "characters",
+			TypeAnnotation: NewTypeAnnotation(&VariableSizedType{
+				Type: CharacterType,
+			}),
+		},
+	},
+	StringTypeAnnotation,
+)
+
 const StringTypeFromCharactersFunctionName = "fromCharacters"
 const StringTypeFromCharactersFunctionDocString = `
 Returns a string from the given array of characters
 `
 
+var StringTypeJoinFunctionType = NewSimpleFunctionType(
+	FunctionPurityView,
+	[]Parameter{
+		{
+			Label:      ArgumentLabelNotRequired,
+			Identifier: "strings",
+			TypeAnnotation: NewTypeAnnotation(&VariableSizedType{
+				Type: StringType,
+			}),
+		},
+		{
+			Identifier:     "separator",
+			TypeAnnotation: NewTypeAnnotation(StringType),
+		},
+	},
+	StringTypeAnnotation,
+)
+
 const StringTypeJoinFunctionName = "join"
 const StringTypeJoinFunctionDocString = `
 Returns a string after joining the array of strings with the provided separator.
 `
 
+var StringTypeSplitFunctionType = NewSimpleFunctionType(
+	FunctionPurityView,
+	[]Parameter{
+		{
+			Identifier:     "separator",
+			TypeAnnotation: StringTypeAnnotation,
+		},
+	},
+	NewTypeAnnotation(
+		&VariableSizedType{
+			Type: StringType,
+		},
+	),
+)
+
 const StringTypeSplitFunctionName = "split"
 const StringTypeSplitFunctionDocString = `
 Returns a variable-sized array of strings after splitting the string on the delimiter.
@@ -246,6 +321,23 @@ Returns the number of non-overlapping instances of the given substring in this s
 If the given substring is an empty string, the function returns 1 + the number of characters in this string.
 `
 
+var StringTypeReplaceAllFunctionType = NewSimpleFunctionType(
+	FunctionPurityView,
+	[]Parameter{
+		{
+			Label:          "of",
+			Identifier:     "old",
+			TypeAnnotation: StringTypeAnnotation,
+		},
+		{
+			Label:          "with",
+			Identifier:     "replacement",
+			TypeAnnotation: StringTypeAnnotation,
+		},
+	},
+	StringTypeAnnotation,
+)
+
 const StringTypeReplaceAllFunctionName = "replaceAll"
 const StringTypeReplaceAllFunctionDocString = `
 Returns a new string after replacing all the occurrences of parameter ` + "`of` with the parameter `with`" + `.
@@ -376,93 +468,3 @@ var StringFunctionType = func() *FunctionType {
 
 	return functionType
 }()
-
-var StringTypeEncodeHexFunctionType = NewSimpleFunctionType(
-	FunctionPurityView,
-	[]Parameter{
-		{
-			Label:          ArgumentLabelNotRequired,
-			Identifier:     "data",
-			TypeAnnotation: ByteArrayTypeAnnotation,
-		},
-	},
-	StringTypeAnnotation,
-)
-
-var StringTypeFromUtf8FunctionType = NewSimpleFunctionType(
-	FunctionPurityView,
-	[]Parameter{
-		{
-			Label:          ArgumentLabelNotRequired,
-			Identifier:     "bytes",
-			TypeAnnotation: ByteArrayTypeAnnotation,
-		},
-	},
-	NewTypeAnnotation(
-		&OptionalType{
-			Type: StringType,
-		},
-	),
-)
-
-var StringTypeFromCharactersFunctionType = NewSimpleFunctionType(
-	FunctionPurityView,
-	[]Parameter{
-		{
-			Label:      ArgumentLabelNotRequired,
-			Identifier: "characters",
-			TypeAnnotation: NewTypeAnnotation(&VariableSizedType{
-				Type: CharacterType,
-			}),
-		},
-	},
-	StringTypeAnnotation,
-)
-
-var StringTypeJoinFunctionType = NewSimpleFunctionType(
-	FunctionPurityView,
-	[]Parameter{
-		{
-			Label:      ArgumentLabelNotRequired,
-			Identifier: "strings",
-			TypeAnnotation: NewTypeAnnotation(&VariableSizedType{
-				Type: StringType,
-			}),
-		},
-		{
-			Identifier:     "separator",
-			TypeAnnotation: NewTypeAnnotation(StringType),
-		},
-	},
-	StringTypeAnnotation,
-)
-
-var StringTypeSplitFunctionType = NewSimpleFunctionType(
-	FunctionPurityView,
-	[]Parameter{
-		{
-			Identifier:     "separator",
-			TypeAnnotation: StringTypeAnnotation,
-		},
-	},
-	NewTypeAnnotation(
-		&VariableSizedType{
-			Type: StringType,
-		},
-	),
-)
-
-var StringTypeReplaceAllFunctionType = NewSimpleFunctionType(
-	FunctionPurityView,
-	[]Parameter{
-		{
-			Identifier:     "of",
-			TypeAnnotation: StringTypeAnnotation,
-		},
-		{
-			Identifier:     "with",
-			TypeAnnotation: StringTypeAnnotation,
-		},
-	},
-	StringTypeAnnotation,
-)