Skip to content

Commit

Permalink
Add unit test for find_in_set and correct logic errors in collatedFin…
Browse files Browse the repository at this point in the history
…dInSet method
  • Loading branch information
miland-db committed Mar 21, 2024
1 parent 58aaee5 commit 6a52d92
Show file tree
Hide file tree
Showing 2 changed files with 48 additions and 4 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -578,9 +578,9 @@ private int collatedFindInSet(UTF8String match, int collationId) {
int wordStart = 0;
while ((wordStart = stringSearch.next()) != StringSearch.DONE) {
if (stringSearch.getMatchLength() == stringSearch.getPattern().length()) {
boolean isValidStart = wordStart == 0 || setString.charAt(wordStart - 1) != ',';
boolean isValidEnd = wordStart + stringSearch.getMatchLength() != setString.length()
|| setString.charAt(wordStart + stringSearch.getMatchLength()) != ',';
boolean isValidStart = wordStart == 0 || setString.charAt(wordStart - 1) == ',';
boolean isValidEnd = wordStart + stringSearch.getMatchLength() == setString.length()
|| setString.charAt(wordStart + stringSearch.getMatchLength()) == ',';

if(isValidStart && isValidEnd) {
int pos = 0;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,6 @@ import org.apache.spark.sql.test.SharedSparkSession
class CollationStringExpressionsSuite extends QueryTest with SharedSparkSession {

case class CollationTestCase[R](s1: String, s2: String, collation: String, expectedResult: R)

case class CollationTestFail[R](s1: String, s2: String, collation: String)

test("Support ConcatWs string expression with Collation") {
Expand Down Expand Up @@ -117,6 +116,51 @@ class CollationStringExpressionsSuite extends QueryTest with SharedSparkSession
})
}

test("Support FindInSet with Collation") {
// Supported collations
val checks = Seq(
CollationTestCase("a", "abc,b,ab,c,def", "UTF8_BINARY", 0),
CollationTestCase("c", "abc,b,ab,c,def", "UTF8_BINARY", 4),
CollationTestCase("abc", "abc,b,ab,c,def", "UTF8_BINARY", 1),
CollationTestCase("ab", "abc,b,ab,c,def", "UTF8_BINARY", 3),
CollationTestCase("AB", "abc,b,ab,c,def", "UTF8_BINARY", 0),
CollationTestCase("Ab", "abc,b,ab,c,def", "UTF8_BINARY_LCASE", 3),
CollationTestCase("ab", "abc,b,ab,c,def", "UNICODE", 3),
CollationTestCase("aB", "abc,b,ab,c,def", "UNICODE", 0),
CollationTestCase("AB", "abc,b,ab,c,def", "UNICODE_CI", 3)
)
checks.foreach(ct => {
checkAnswer(sql(s"SELECT find_in_set(collate('${ct.s1}', '${ct.collation}'), " +
s"collate('${ct.s2}', '${ct.collation}'))"),
Row(ct.expectedResult))
})
// Unsupported collation pairs
val fails = Seq(
SubstringIndexTestFail("a", "abc,b,ab,c,def", "UTF8_BINARY_LCASE", "UTF8_BINARY"),
SubstringIndexTestFail("a", "abc,b,ab,c,def", "UNICODE_CI", "UNICODE")
)
fails.foreach(ct => {
val expr = s"find_in_set(collate('${ct.s1}', '${ct.c1}'), collate('${ct.s2}', '${ct.c2}'))"
checkError(
exception = intercept[ExtendedAnalysisException] {
sql(s"SELECT $expr")
},
errorClass = "DATATYPE_MISMATCH.COLLATION_MISMATCH",
sqlState = "42K09",
parameters = Map(
"sqlExpr" -> s"\"find_in_set(collate(${ct.s1}), collate(${ct.s2}))\"",
"collationNameLeft" -> s"${ct.c1}",
"collationNameRight" -> s"${ct.c2}"
),
context = ExpectedContext(
fragment = s"$expr",
start = 7,
stop = 51 + ct.s1.length + ct.c1.length + ct.s2.length + ct.c2.length
)
)
})
}

// TODO: Add more tests for other string expressions

}
Expand Down

0 comments on commit 6a52d92

Please sign in to comment.