revert unnecessary changes.

jovanpavl-db · Nov 28, 2024 · 7ba3569 · 7ba3569
1 parent 8e9254d
commit 7ba3569
Show file tree

Hide file tree

Showing 3 changed files with 13 additions and 46 deletions.
diff --git a/common/unsafe/src/main/java/org/apache/spark/sql/catalyst/util/CollationAwareUTF8String.java b/common/unsafe/src/main/java/org/apache/spark/sql/catalyst/util/CollationAwareUTF8String.java
@@ -1530,29 +1530,20 @@ public static UTF8String trimRight(
 
   public static UTF8String[] splitSQL(final UTF8String input, final UTF8String delim,
       final int limit, final int collationId) {
-    CollationFactory.Collation collation = CollationFactory.fetchCollation(collationId);
-    if (collation.isUtf8BinaryType && !collation.supportsSpaceTrimming) {
-      return input.split(delim, limit);
-    }
-
     if (CollationFactory.fetchCollation(collationId).isUtf8BinaryType) {
-      // For UTF8_BINARY_rtrim collation different implementation.
-      return binarySplitSQL(input, delim, limit, collationId);
+      return input.split(delim, limit);
     } else if (CollationFactory.fetchCollation(collationId).isUtf8LcaseType) {
-      return lowercaseSplitSQL(input, delim, limit, collationId);
+      return lowercaseSplitSQL(input, delim, limit);
     } else {
       return icuSplitSQL(input, delim, limit, collationId);
     }
   }
 
-  public static UTF8String[] lowercaseSplitSQL(final UTF8String string, UTF8String delimiter,
-      final int limit, final int collationId) {
+  public static UTF8String[] lowercaseSplitSQL(final UTF8String string, final UTF8String delimiter,
+      final int limit) {
     if (delimiter.numBytes() == 0) return new UTF8String[] { string };
     if (string.numBytes() == 0) return new UTF8String[] { UTF8String.EMPTY_UTF8 };
-    CollationFactory.Collation collation = CollationFactory.fetchCollation(collationId);
-    if (collation.supportsSpaceTrimming) {
-      delimiter = CollationFactory.applyTrimmingPolicy(delimiter, collationId);
-    }
+
     List<UTF8String> strings = new ArrayList<>();
     UTF8String lowercaseDelimiter = lowerCaseCodePoints(delimiter);
     int startIndex = 0, nextMatch = 0, nextMatchLength;
@@ -1581,14 +1572,10 @@ public static UTF8String[] lowercaseSplitSQL(final UTF8String string, UTF8String
     return strings.toArray(new UTF8String[0]);
   }
 
-  public static UTF8String[] icuSplitSQL(final UTF8String string, UTF8String delimiter,
+  public static UTF8String[] icuSplitSQL(final UTF8String string, final UTF8String delimiter,
       final int limit, final int collationId) {
     if (delimiter.numBytes() == 0) return new UTF8String[] { string };
     if (string.numBytes() == 0) return new UTF8String[] { UTF8String.EMPTY_UTF8 };
-    CollationFactory.Collation collation = CollationFactory.fetchCollation(collationId);
-    if (collation.supportsSpaceTrimming) {
-      delimiter = CollationFactory.applyTrimmingPolicy(delimiter, collationId);
-    }
     List<UTF8String> strings = new ArrayList<>();
     String target = string.toValidString(), pattern = delimiter.toValidString();
     StringSearch stringSearch = CollationFactory.getStringSearch(target, pattern, collationId);
@@ -1614,15 +1601,6 @@ public static UTF8String[] icuSplitSQL(final UTF8String string, UTF8String delim
     return strings.toArray(new UTF8String[0]);
   }
 
-  public static UTF8String[] binarySplitSQL(final UTF8String string, UTF8String delimiter,
-      final int limit, final int collationId) {
-    CollationFactory.Collation collation = CollationFactory.fetchCollation(collationId);
-    if (collation.supportsSpaceTrimming) {
-      delimiter = CollationFactory.applyTrimmingPolicy(delimiter, collationId);
-    }
-    return string.split(delimiter, limit);
-  }
-
   // TODO: Add more collation-aware UTF8String operations here.
 
 }
diff --git a/common/unsafe/src/main/java/org/apache/spark/sql/catalyst/util/CollationSupport.java b/common/unsafe/src/main/java/org/apache/spark/sql/catalyst/util/CollationSupport.java
@@ -37,15 +37,13 @@ public final class CollationSupport {
   public static class StringSplitSQL {
     public static UTF8String[] exec(final UTF8String s, UTF8String d, final int collationId) {
       CollationFactory.Collation collation = CollationFactory.fetchCollation(collationId);
-      if (collation.isUtf8BinaryType && !collation.supportsSpaceTrimming) {
-        return execBinary(s, d);
+      if (collation.supportsSpaceTrimming) {
+        d = CollationFactory.applyTrimmingPolicy(d, collationId);
       }
-
       if (collation.isUtf8BinaryType) {
-        // special handling needed for utf8_binary_rtrim collation.
-        return execBinaryTrim(s, d, collationId);
+        return execBinary(s, d);
       } else if (collation.isUtf8LcaseType) {
-        return execLowercase(s, d, collationId);
+        return execLowercase(s, d);
       } else {
         return execICU(s, d, collationId);
       }
@@ -61,18 +59,13 @@ public static String genCode(final String s, final String d, final int collation
     public static UTF8String[] execBinary(final UTF8String string, final UTF8String delimiter) {
       return string.splitSQL(delimiter, -1);
     }
-    public static UTF8String[] execLowercase(final UTF8String string, final UTF8String delimiter,
-        final int collationId) {
-      return CollationAwareUTF8String.lowercaseSplitSQL(string, delimiter, -1, collationId);
+    public static UTF8String[] execLowercase(final UTF8String string, final UTF8String delimiter) {
+      return CollationAwareUTF8String.lowercaseSplitSQL(string, delimiter, -1);
     }
     public static UTF8String[] execICU(final UTF8String string, final UTF8String delimiter,
         final int collationId) {
       return CollationAwareUTF8String.icuSplitSQL(string, delimiter, -1, collationId);
     }
-    public static UTF8String[] execBinaryTrim(final UTF8String string, final UTF8String delimiter,
-        final int collationId) {
-      return CollationAwareUTF8String.binarySplitSQL(string, delimiter, -1, collationId);
-    }
   }
 
   public static class Contains {
@@ -705,7 +698,6 @@ public static boolean supportsLowercaseRegex(final int collationId) {
   }
 
   static final int lowercaseRegexFlags = Pattern.UNICODE_CASE | Pattern.CASE_INSENSITIVE;
-
   public static int collationAwareRegexFlags(final int collationId) {
     return supportsLowercaseRegex(collationId) ? lowercaseRegexFlags : 0;
   }

diff --git a/...atalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeCreator.scala b/...atalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeCreator.scala
@@ -582,10 +582,7 @@ case class StringToMap(text: Expression, pairDelim: Expression, keyValueDelim: E
   override def third: Expression = keyValueDelim
 
   override def inputTypes: Seq[AbstractDataType] =
-    Seq(
-      StringTypeNonCSAICollation,
-      StringTypeNonCSAICollation,
-      StringTypeNonCSAICollation)
+    Seq(StringTypeNonCSAICollation, StringTypeNonCSAICollation, StringTypeNonCSAICollation)
 
   override def dataType: DataType = MapType(first.dataType, first.dataType)