Skip to content

Commit

Permalink
Fix bug with the rfind on collated strings
Browse files Browse the repository at this point in the history
  • Loading branch information
miland-db committed Mar 26, 2024
1 parent b3bd34a commit 15c5491
Show file tree
Hide file tree
Showing 2 changed files with 34 additions and 6 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -894,7 +894,7 @@ private int collatedFind(UTF8String str, int start, int collationId) {

StringSearch stringSearch = CollationFactory.getStringSearch(this, str, collationId);
// Set search start position (start from character at start position)
stringSearch.setIndex(start);
stringSearch.setIndex(bytePosToChar(start));

// Return either the byte position or -1 if not found
return charPosToByte(stringSearch.next());
Expand Down Expand Up @@ -940,7 +940,7 @@ private int rfindLowercase(UTF8String str, int start) {

int prevStart = -1;
int matchStart = lowercaseThis.indexOf(lowercaseStr, 0);
while(charPosToByte(matchStart) < start) {
while(charPosToByte(matchStart) <= start) {
if(matchStart != -1) {
// Found a match, update the start position
prevStart = matchStart;
Expand All @@ -962,11 +962,20 @@ private int collatedRFind(UTF8String str, int start, int collationId) {
}

StringSearch stringSearch = CollationFactory.getStringSearch(this, str, collationId);
// Set search start position (start from character at start position)
stringSearch.setIndex(bytePosToChar(start));

// Return either the position or -1 if not found
return charPosToByte(stringSearch.previous());
int prevStart = -1;
int matchStart = stringSearch.next();
while(charPosToByte(matchStart) <= start) {
if(matchStart != StringSearch.DONE) {
// Found a match, update the start position
prevStart = matchStart;
matchStart = stringSearch.next();
} else {
return charPosToByte(prevStart);
}
}

return charPosToByte(prevStart);
}

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -87,13 +87,19 @@ class CollationStringExpressionsSuite extends QueryTest
checkEvaluation(SubstringIndex(string, delimiter, count), expected)
}

testSubstringIndex("wwwgapachegorg", "g", -3, 0, "apachegorg")
testSubstringIndex("www||apache||org", "||", 2, 0, "www||apache")
// UTF8_BINARY_LCASE
testSubstringIndex("AaAaAaAaAa", "aa", 2, 1, "A")
testSubstringIndex("www.apache.org", ".", 3, 1, "www.apache.org")
testSubstringIndex("wwwXapachexorg", "x", 2, 1, "wwwXapache")
testSubstringIndex("wwwxapacheXorg", "X", 1, 1, "www")
testSubstringIndex("www.apache.org", ".", 0, 1, "")
testSubstringIndex("www.apache.ORG", ".", -3, 1, "www.apache.ORG")
testSubstringIndex("wwwGapacheGorg", "g", 1, 1, "www")
testSubstringIndex("wwwGapacheGorg", "g", 3, 1, "wwwGapacheGor")
testSubstringIndex("gwwwGapacheGorg", "g", 3, 1, "gwwwGapache")
testSubstringIndex("wwwGapacheGorg", "g", -3, 1, "apacheGorg")
testSubstringIndex("wwwmapacheMorg", "M", -2, 1, "apacheMorg")
testSubstringIndex("www.apache.org", ".", -1, 1, "org")
testSubstringIndex("", ".", -2, 1, "")
Expand All @@ -105,25 +111,38 @@ class CollationStringExpressionsSuite extends QueryTest
testSubstringIndex("www||APACHE||org", "||", 2, 1, "www||APACHE")
testSubstringIndex("www||APACHE||org", "||", -1, 1, "org")
// UNICODE
testSubstringIndex("AaAaAaAaAa", "Aa", 2, 2, "Aa")
testSubstringIndex("wwwYapacheyorg", "y", 3, 2, "wwwYapacheyorg")
testSubstringIndex("www.apache.org", ".", 2, 2, "www.apache")
testSubstringIndex("wwwYapacheYorg", "Y", 1, 2, "www")
testSubstringIndex("wwwYapacheYorg", "y", 1, 2, "wwwYapacheYorg")
testSubstringIndex("wwwGapacheGorg", "g", 1, 2, "wwwGapacheGor")
testSubstringIndex("GwwwGapacheGorG", "G", 3, 2, "GwwwGapache")
testSubstringIndex("wwwGapacheGorG", "G", -3, 2, "apacheGorG")
testSubstringIndex("www.apache.org", ".", 0, 2, "")
testSubstringIndex("www.apache.org", ".", -3, 2, "www.apache.org")
testSubstringIndex("www.apache.org", ".", -2, 2, "apache.org")
testSubstringIndex("www.apache.org", ".", -1, 2, "org")
testSubstringIndex("", ".", -2, 2, "")
// scalastyle:off
testSubstringIndex("test大千世界X大千世界", "X", -1, 2, "大千世界")
testSubstringIndex("test大千世界X大千世界", "X", 1, 2, "test大千世界")
testSubstringIndex("大x千世界大千世x界", "x", 1, 2, "")
testSubstringIndex("大x千世界大千世x界", "x", -1, 2, "")
testSubstringIndex("大x千世界大千世x界", "x", -2, 2, "千世界大千世x界")
testSubstringIndex("大千世界大千世界", "", 2, 2, "大千世界大")
// scalastyle:on
testSubstringIndex("www||apache||org", "||", 2, 2, "www||apache")
// UNICODE_CI
testSubstringIndex("AaAaAaAaAa", "aa", 2, 3, "A")
testSubstringIndex("www.apache.org", ".", 3, 3, "www.apache.org")
testSubstringIndex("wwwXapachexorg", "x", 2, 3, "wwwXapache")
testSubstringIndex("wwwxapacheXorg", "X", 1, 3, "www")
testSubstringIndex("www.apache.org", ".", 0, 3, "")
testSubstringIndex("wwwGapacheGorg", "g", 1, 3, "www")
testSubstringIndex("wwwGapacheGorg", "g", 3, 3, "wwwGapacheGor")
testSubstringIndex("gwwwGapacheGorg", "g", 3, 3, "gwwwGapache")
testSubstringIndex("wwwGapacheGorg", "g", -3, 3, "apacheGorg")
testSubstringIndex("www.apache.ORG", ".", -3, 3, "www.apache.ORG")
testSubstringIndex("wwwmapacheMorg", "M", -2, 3, "apacheMorg")
testSubstringIndex("www.apache.org", ".", -1, 3, "org")
Expand Down

0 comments on commit 15c5491

Please sign in to comment.