From 0f4d289b7932c91186d2da66095ebb41b6cd58c0 Mon Sep 17 00:00:00 2001 From: panbingkun Date: Thu, 12 Sep 2024 02:11:28 +0200 Subject: [PATCH] [SPARK-48906][SQL] Introduce `SHOW COLLATIONS LIKE ...` syntax to show all collations ### What changes were proposed in this pull request? The pr aims to introduce `SHOW COLLATIONS LIKE ...` syntax to `show all collations`. ### Why are the changes needed? End-users will be able to obtain `collations` currently supported by the spark through SQL. Other databases, such as `MySQL`, also have similar syntax, ref: https://dev.mysql.com/doc/refman/9.0/en/show-collation.html image postgresql: https://database.guide/how-to-return-a-list-of-available-collations-in-postgresql/ ### Does this PR introduce _any_ user-facing change? Yes, end-users will be able to obtain `collation` currently supported by the spark through commands similar to the following |name|provider|version|binaryEquality|binaryOrdering|lowercaseEquality| | --------- | ----------- | ----------- | ----------- | ----------- | ----------- | ``` spark-sql (default)> SHOW COLLATIONS; UTF8_BINARY spark 1.0 true true false UTF8_LCASE spark 1.0 false false true ff_Adlm icu 153.120.0.0 false false false ff_Adlm_CI icu 153.120.0.0 false false false ff_Adlm_AI icu 153.120.0.0 false false false ff_Adlm_CI_AI icu 153.120.0.0 false false false ... spark-sql (default)> SHOW COLLATIONS LIKE '*UTF8_BINARY*'; UTF8_BINARY spark 1.0 true true false Time taken: 0.043 seconds, Fetched 1 row(s) ``` image ### How was this patch tested? Add new UT. ### Was this patch authored or co-authored using generative AI tooling? No. Closes #47364 from panbingkun/show_collation_syntax. Authored-by: panbingkun Signed-off-by: Max Gekk --- .../sql/catalyst/util/CollationFactory.java | 143 +++++++++++++++++- docs/sql-ref-ansi-compliance.md | 1 + .../spark/sql/catalyst/parser/SqlBaseLexer.g4 | 1 + .../sql/catalyst/parser/SqlBaseParser.g4 | 2 + .../sql/catalyst/catalog/SessionCatalog.scala | 15 +- .../ansi-sql-2016-reserved-keywords.txt | 1 + .../spark/sql/execution/SparkSqlParser.scala | 12 ++ .../command/ShowCollationsCommand.scala | 62 ++++++++ .../sql-tests/results/ansi/keywords.sql.out | 2 + .../sql-tests/results/keywords.sql.out | 1 + .../org/apache/spark/sql/CollationSuite.scala | 42 +++++ .../ThriftServerWithSparkContextSuite.scala | 2 +- 12 files changed, 278 insertions(+), 6 deletions(-) create mode 100644 sql/core/src/main/scala/org/apache/spark/sql/execution/command/ShowCollationsCommand.scala diff --git a/common/unsafe/src/main/java/org/apache/spark/sql/catalyst/util/CollationFactory.java b/common/unsafe/src/main/java/org/apache/spark/sql/catalyst/util/CollationFactory.java index 5640a2468d02e..4b88e15e8ed72 100644 --- a/common/unsafe/src/main/java/org/apache/spark/sql/catalyst/util/CollationFactory.java +++ b/common/unsafe/src/main/java/org/apache/spark/sql/catalyst/util/CollationFactory.java @@ -23,12 +23,14 @@ import java.util.function.Function; import java.util.function.BiFunction; import java.util.function.ToLongFunction; +import java.util.stream.Stream; +import com.ibm.icu.text.CollationKey; +import com.ibm.icu.text.Collator; import com.ibm.icu.text.RuleBasedCollator; import com.ibm.icu.text.StringSearch; import com.ibm.icu.util.ULocale; -import com.ibm.icu.text.CollationKey; -import com.ibm.icu.text.Collator; +import com.ibm.icu.util.VersionInfo; import org.apache.spark.SparkException; import org.apache.spark.unsafe.types.UTF8String; @@ -88,6 +90,17 @@ public Optional getVersion() { } } + public record CollationMeta( + String catalog, + String schema, + String collationName, + String language, + String country, + String icuVersion, + String padAttribute, + boolean accentSensitivity, + boolean caseSensitivity) { } + /** * Entry encapsulating all information about a collation. */ @@ -342,6 +355,23 @@ private static int collationNameToId(String collationName) throws SparkException } protected abstract Collation buildCollation(); + + protected abstract CollationMeta buildCollationMeta(); + + static List listCollations() { + return Stream.concat( + CollationSpecUTF8.listCollations().stream(), + CollationSpecICU.listCollations().stream()).toList(); + } + + static CollationMeta loadCollationMeta(CollationIdentifier collationIdentifier) { + CollationMeta collationSpecUTF8 = + CollationSpecUTF8.loadCollationMeta(collationIdentifier); + if (collationSpecUTF8 == null) { + return CollationSpecICU.loadCollationMeta(collationIdentifier); + } + return collationSpecUTF8; + } } private static class CollationSpecUTF8 extends CollationSpec { @@ -364,6 +394,9 @@ private enum CaseSensitivity { */ private static final int CASE_SENSITIVITY_MASK = 0b1; + private static final String UTF8_BINARY_COLLATION_NAME = "UTF8_BINARY"; + private static final String UTF8_LCASE_COLLATION_NAME = "UTF8_LCASE"; + private static final int UTF8_BINARY_COLLATION_ID = new CollationSpecUTF8(CaseSensitivity.UNSPECIFIED).collationId; private static final int UTF8_LCASE_COLLATION_ID = @@ -406,7 +439,7 @@ private static CollationSpecUTF8 fromCollationId(int collationId) { protected Collation buildCollation() { if (collationId == UTF8_BINARY_COLLATION_ID) { return new Collation( - "UTF8_BINARY", + UTF8_BINARY_COLLATION_NAME, PROVIDER_SPARK, null, UTF8String::binaryCompare, @@ -417,7 +450,7 @@ protected Collation buildCollation() { /* supportsLowercaseEquality = */ false); } else { return new Collation( - "UTF8_LCASE", + UTF8_LCASE_COLLATION_NAME, PROVIDER_SPARK, null, CollationAwareUTF8String::compareLowerCase, @@ -428,6 +461,52 @@ protected Collation buildCollation() { /* supportsLowercaseEquality = */ true); } } + + @Override + protected CollationMeta buildCollationMeta() { + if (collationId == UTF8_BINARY_COLLATION_ID) { + return new CollationMeta( + CATALOG, + SCHEMA, + UTF8_BINARY_COLLATION_NAME, + /* language = */ null, + /* country = */ null, + /* icuVersion = */ null, + COLLATION_PAD_ATTRIBUTE, + /* accentSensitivity = */ true, + /* caseSensitivity = */ true); + } else { + return new CollationMeta( + CATALOG, + SCHEMA, + UTF8_LCASE_COLLATION_NAME, + /* language = */ null, + /* country = */ null, + /* icuVersion = */ null, + COLLATION_PAD_ATTRIBUTE, + /* accentSensitivity = */ true, + /* caseSensitivity = */ false); + } + } + + static List listCollations() { + CollationIdentifier UTF8_BINARY_COLLATION_IDENT = + new CollationIdentifier(PROVIDER_SPARK, UTF8_BINARY_COLLATION_NAME, "1.0"); + CollationIdentifier UTF8_LCASE_COLLATION_IDENT = + new CollationIdentifier(PROVIDER_SPARK, UTF8_LCASE_COLLATION_NAME, "1.0"); + return Arrays.asList(UTF8_BINARY_COLLATION_IDENT, UTF8_LCASE_COLLATION_IDENT); + } + + static CollationMeta loadCollationMeta(CollationIdentifier collationIdentifier) { + try { + int collationId = CollationSpecUTF8.collationNameToId( + collationIdentifier.name, collationIdentifier.name.toUpperCase()); + return CollationSpecUTF8.fromCollationId(collationId).buildCollationMeta(); + } catch (SparkException ignored) { + // ignore + return null; + } + } } private static class CollationSpecICU extends CollationSpec { @@ -684,6 +763,20 @@ protected Collation buildCollation() { /* supportsLowercaseEquality = */ false); } + @Override + protected CollationMeta buildCollationMeta() { + return new CollationMeta( + CATALOG, + SCHEMA, + collationName(), + ICULocaleMap.get(locale).getDisplayLanguage(), + ICULocaleMap.get(locale).getDisplayCountry(), + VersionInfo.ICU_VERSION.toString(), + COLLATION_PAD_ATTRIBUTE, + caseSensitivity == CaseSensitivity.CS, + accentSensitivity == AccentSensitivity.AS); + } + /** * Compute normalized collation name. Components of collation name are given in order: * - Locale name @@ -704,6 +797,37 @@ private String collationName() { } return builder.toString(); } + + private static List allCollationNames() { + List collationNames = new ArrayList<>(); + for (String locale: ICULocaleToId.keySet()) { + // CaseSensitivity.CS + AccentSensitivity.AS + collationNames.add(locale); + // CaseSensitivity.CS + AccentSensitivity.AI + collationNames.add(locale + "_AI"); + // CaseSensitivity.CI + AccentSensitivity.AS + collationNames.add(locale + "_CI"); + // CaseSensitivity.CI + AccentSensitivity.AI + collationNames.add(locale + "_CI_AI"); + } + return collationNames.stream().sorted().toList(); + } + + static List listCollations() { + return allCollationNames().stream().map(name -> + new CollationIdentifier(PROVIDER_ICU, name, VersionInfo.ICU_VERSION.toString())).toList(); + } + + static CollationMeta loadCollationMeta(CollationIdentifier collationIdentifier) { + try { + int collationId = CollationSpecICU.collationNameToId( + collationIdentifier.name, collationIdentifier.name.toUpperCase()); + return CollationSpecICU.fromCollationId(collationId).buildCollationMeta(); + } catch (SparkException ignored) { + // ignore + return null; + } + } } /** @@ -730,9 +854,12 @@ public CollationIdentifier identifier() { } } + public static final String CATALOG = "SYSTEM"; + public static final String SCHEMA = "BUILTIN"; public static final String PROVIDER_SPARK = "spark"; public static final String PROVIDER_ICU = "icu"; public static final List SUPPORTED_PROVIDERS = List.of(PROVIDER_SPARK, PROVIDER_ICU); + public static final String COLLATION_PAD_ATTRIBUTE = "NO_PAD"; public static final int UTF8_BINARY_COLLATION_ID = Collation.CollationSpecUTF8.UTF8_BINARY_COLLATION_ID; @@ -923,4 +1050,12 @@ public static String getClosestSuggestionsOnInvalidName( return String.join(", ", suggestions); } + + public static List listCollations() { + return Collation.CollationSpec.listCollations(); + } + + public static CollationMeta loadCollationMeta(CollationIdentifier collationIdentifier) { + return Collation.CollationSpec.loadCollationMeta(collationIdentifier); + } } diff --git a/docs/sql-ref-ansi-compliance.md b/docs/sql-ref-ansi-compliance.md index 3fa67036fd04b..fe5ddf27bf6c4 100644 --- a/docs/sql-ref-ansi-compliance.md +++ b/docs/sql-ref-ansi-compliance.md @@ -442,6 +442,7 @@ Below is a list of all the keywords in Spark SQL. |CODEGEN|non-reserved|non-reserved|non-reserved| |COLLATE|reserved|non-reserved|reserved| |COLLATION|reserved|non-reserved|reserved| +|COLLATIONS|reserved|non-reserved|reserved| |COLLECTION|non-reserved|non-reserved|non-reserved| |COLUMN|reserved|non-reserved|reserved| |COLUMNS|non-reserved|non-reserved|non-reserved| diff --git a/sql/api/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBaseLexer.g4 b/sql/api/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBaseLexer.g4 index 28ebaeaaed6d0..9ea213f3bf4a6 100644 --- a/sql/api/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBaseLexer.g4 +++ b/sql/api/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBaseLexer.g4 @@ -162,6 +162,7 @@ CLUSTERED: 'CLUSTERED'; CODEGEN: 'CODEGEN'; COLLATE: 'COLLATE'; COLLATION: 'COLLATION'; +COLLATIONS: 'COLLATIONS'; COLLECTION: 'COLLECTION'; COLUMN: 'COLUMN'; COLUMNS: 'COLUMNS'; diff --git a/sql/api/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBaseParser.g4 b/sql/api/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBaseParser.g4 index e9fc6c3ca4f2e..42f0094de3515 100644 --- a/sql/api/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBaseParser.g4 +++ b/sql/api/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBaseParser.g4 @@ -260,6 +260,7 @@ statement | SHOW PARTITIONS identifierReference partitionSpec? #showPartitions | SHOW identifier? FUNCTIONS ((FROM | IN) ns=identifierReference)? (LIKE? (legacy=multipartIdentifier | pattern=stringLit))? #showFunctions + | SHOW COLLATIONS (LIKE? pattern=stringLit)? #showCollations | SHOW CREATE TABLE identifierReference (AS SERDE)? #showCreateTable | SHOW CURRENT namespace #showCurrentNamespace | SHOW CATALOGS (LIKE? pattern=stringLit)? #showCatalogs @@ -1837,6 +1838,7 @@ nonReserved | CODEGEN | COLLATE | COLLATION + | COLLATIONS | COLLECTION | COLUMN | COLUMNS diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala index d3a6cb6ae2845..5c14e261fafc8 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala @@ -24,6 +24,7 @@ import java.util.concurrent.TimeUnit import javax.annotation.concurrent.GuardedBy import scala.collection.mutable +import scala.jdk.CollectionConverters.CollectionHasAsScala import scala.util.{Failure, Success, Try} import com.google.common.cache.{Cache, CacheBuilder} @@ -39,7 +40,8 @@ import org.apache.spark.sql.catalyst.expressions.{Alias, Cast, Expression, Expre import org.apache.spark.sql.catalyst.parser.{CatalystSqlParser, ParserInterface} import org.apache.spark.sql.catalyst.plans.logical.{LogicalPlan, Project, SubqueryAlias, View} import org.apache.spark.sql.catalyst.trees.CurrentOrigin -import org.apache.spark.sql.catalyst.util.{CharVarcharUtils, StringUtils} +import org.apache.spark.sql.catalyst.util.{CharVarcharUtils, CollationFactory, StringUtils} +import org.apache.spark.sql.catalyst.util.CollationFactory.CollationMeta import org.apache.spark.sql.connector.catalog.CatalogManager import org.apache.spark.sql.connector.catalog.CatalogManager.SESSION_CATALOG_NAME import org.apache.spark.sql.errors.{QueryCompilationErrors, QueryExecutionErrors} @@ -1899,6 +1901,17 @@ class SessionCatalog( .filter(isTemporaryFunction) } + /** + * List all built-in collations with the given pattern. + */ + def listCollations(pattern: Option[String]): Seq[CollationMeta] = { + val collationIdentifiers = CollationFactory.listCollations().asScala.toSeq + val filteredCollationNames = StringUtils.filterPattern( + collationIdentifiers.map(_.getName), pattern.getOrElse("*")).toSet + collationIdentifiers.filter(ident => filteredCollationNames.contains(ident.getName)).map( + CollationFactory.loadCollationMeta) + } + // ----------------- // | Other methods | // ----------------- diff --git a/sql/catalyst/src/test/resources/ansi-sql-2016-reserved-keywords.txt b/sql/catalyst/src/test/resources/ansi-sql-2016-reserved-keywords.txt index 46da60b7897b8..452cf930525bc 100644 --- a/sql/catalyst/src/test/resources/ansi-sql-2016-reserved-keywords.txt +++ b/sql/catalyst/src/test/resources/ansi-sql-2016-reserved-keywords.txt @@ -48,6 +48,7 @@ CLOSE COALESCE COLLATE COLLATION +COLLATIONS COLLECT COLUMN COMMIT diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala index a8261e5d98ba0..640abaea58abe 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala @@ -1096,4 +1096,16 @@ class SparkSqlAstBuilder extends AstBuilder { withIdentClause(ctx.identifierReference(), UnresolvedNamespace(_)), cleanedProperties) } + + /** + * Create a [[ShowCollationsCommand]] command. + * Expected format: + * {{{ + * SHOW COLLATIONS (LIKE? pattern=stringLit)?; + * }}} + */ + override def visitShowCollations(ctx: ShowCollationsContext): LogicalPlan = withOrigin(ctx) { + val pattern = Option(ctx.pattern).map(x => string(visitStringLit(x))) + ShowCollationsCommand(pattern) + } } diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/ShowCollationsCommand.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/ShowCollationsCommand.scala new file mode 100644 index 0000000000000..179a841b013bd --- /dev/null +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/ShowCollationsCommand.scala @@ -0,0 +1,62 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.execution.command + +import org.apache.spark.sql.{Row, SparkSession} +import org.apache.spark.sql.catalyst.expressions.{Attribute, AttributeReference} +import org.apache.spark.sql.catalyst.util.CollationFactory.CollationMeta +import org.apache.spark.sql.types.StringType + +/** + * A command for `SHOW COLLATIONS`. + * + * The syntax of this command is: + * {{{ + * SHOW COLLATIONS (LIKE? pattern=stringLit)?; + * }}} + */ +case class ShowCollationsCommand(pattern: Option[String]) extends LeafRunnableCommand { + + override val output: Seq[Attribute] = Seq( + AttributeReference("COLLATION_CATALOG", StringType, nullable = false)(), + AttributeReference("COLLATION_SCHEMA", StringType, nullable = false)(), + AttributeReference("COLLATION_NAME", StringType, nullable = false)(), + AttributeReference("LANGUAGE", StringType)(), + AttributeReference("COUNTRY", StringType)(), + AttributeReference("ACCENT_SENSITIVITY", StringType, nullable = false)(), + AttributeReference("CASE_SENSITIVITY", StringType, nullable = false)(), + AttributeReference("PAD_ATTRIBUTE", StringType, nullable = false)(), + AttributeReference("ICU_VERSION", StringType)()) + + override def run(sparkSession: SparkSession): Seq[Row] = { + val systemCollations: Seq[CollationMeta] = + sparkSession.sessionState.catalog.listCollations(pattern) + + systemCollations.map(m => Row( + m.catalog, + m.schema, + m.collationName, + m.language, + m.country, + if (m.accentSensitivity) "ACCENT_SENSITIVE" else "ACCENT_INSENSITIVE", + if (m.caseSensitivity) "CASE_SENSITIVE" else "CASE_INSENSITIVE", + m.padAttribute, + m.icuVersion + )) + } +} diff --git a/sql/core/src/test/resources/sql-tests/results/ansi/keywords.sql.out b/sql/core/src/test/resources/sql-tests/results/ansi/keywords.sql.out index e6a36ac2445cf..81ccc0f9efc13 100644 --- a/sql/core/src/test/resources/sql-tests/results/ansi/keywords.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/ansi/keywords.sql.out @@ -48,6 +48,7 @@ CLUSTERED false CODEGEN false COLLATE true COLLATION true +COLLATIONS true COLLECTION false COLUMN true COLUMNS false @@ -381,6 +382,7 @@ CAST CHECK COLLATE COLLATION +COLLATIONS COLUMN CONSTRAINT CREATE diff --git a/sql/core/src/test/resources/sql-tests/results/keywords.sql.out b/sql/core/src/test/resources/sql-tests/results/keywords.sql.out index 19816c8252c91..e145c57332eb2 100644 --- a/sql/core/src/test/resources/sql-tests/results/keywords.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/keywords.sql.out @@ -48,6 +48,7 @@ CLUSTERED false CODEGEN false COLLATE false COLLATION false +COLLATIONS false COLLECTION false COLUMN false COLUMNS false diff --git a/sql/core/src/test/scala/org/apache/spark/sql/CollationSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/CollationSuite.scala index a61be9eca8c31..b25cddb80762a 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/CollationSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/CollationSuite.scala @@ -1624,4 +1624,46 @@ class CollationSuite extends DatasourceV2SQLBase with AdaptiveSparkPlanHelper { } } } + + test("show collations") { + assert(sql("SHOW COLLATIONS").collect().length >= 562) + + // verify that the output ordering is as expected (UTF8_BINARY, UTF8_LCASE, etc.) + val df = sql("SHOW COLLATIONS").limit(10) + checkAnswer(df, + Seq(Row("SYSTEM", "BUILTIN", "UTF8_BINARY", null, null, + "ACCENT_SENSITIVE", "CASE_SENSITIVE", "NO_PAD", null), + Row("SYSTEM", "BUILTIN", "UTF8_LCASE", null, null, + "ACCENT_SENSITIVE", "CASE_INSENSITIVE", "NO_PAD", null), + Row("SYSTEM", "BUILTIN", "UNICODE", "", "", + "ACCENT_SENSITIVE", "CASE_SENSITIVE", "NO_PAD", "75.1.0.0"), + Row("SYSTEM", "BUILTIN", "UNICODE_AI", "", "", + "ACCENT_SENSITIVE", "CASE_INSENSITIVE", "NO_PAD", "75.1.0.0"), + Row("SYSTEM", "BUILTIN", "UNICODE_CI", "", "", + "ACCENT_INSENSITIVE", "CASE_SENSITIVE", "NO_PAD", "75.1.0.0"), + Row("SYSTEM", "BUILTIN", "UNICODE_CI_AI", "", "", + "ACCENT_INSENSITIVE", "CASE_INSENSITIVE", "NO_PAD", "75.1.0.0"), + Row("SYSTEM", "BUILTIN", "af", "Afrikaans", "", + "ACCENT_SENSITIVE", "CASE_SENSITIVE", "NO_PAD", "75.1.0.0"), + Row("SYSTEM", "BUILTIN", "af_AI", "Afrikaans", "", + "ACCENT_SENSITIVE", "CASE_INSENSITIVE", "NO_PAD", "75.1.0.0"), + Row("SYSTEM", "BUILTIN", "af_CI", "Afrikaans", "", + "ACCENT_INSENSITIVE", "CASE_SENSITIVE", "NO_PAD", "75.1.0.0"), + Row("SYSTEM", "BUILTIN", "af_CI_AI", "Afrikaans", "", + "ACCENT_INSENSITIVE", "CASE_INSENSITIVE", "NO_PAD", "75.1.0.0"))) + + checkAnswer(sql("SHOW COLLATIONS LIKE '*UTF8_BINARY*'"), + Row("SYSTEM", "BUILTIN", "UTF8_BINARY", null, null, + "ACCENT_SENSITIVE", "CASE_SENSITIVE", "NO_PAD", null)) + + checkAnswer(sql("SHOW COLLATIONS '*zh_Hant_HKG*'"), + Seq(Row("SYSTEM", "BUILTIN", "zh_Hant_HKG", "Chinese", "Hong Kong SAR China", + "ACCENT_SENSITIVE", "CASE_SENSITIVE", "NO_PAD", "75.1.0.0"), + Row("SYSTEM", "BUILTIN", "zh_Hant_HKG_AI", "Chinese", "Hong Kong SAR China", + "ACCENT_SENSITIVE", "CASE_INSENSITIVE", "NO_PAD", "75.1.0.0"), + Row("SYSTEM", "BUILTIN", "zh_Hant_HKG_CI", "Chinese", "Hong Kong SAR China", + "ACCENT_INSENSITIVE", "CASE_SENSITIVE", "NO_PAD", "75.1.0.0"), + Row("SYSTEM", "BUILTIN", "zh_Hant_HKG_CI_AI", "Chinese", "Hong Kong SAR China", + "ACCENT_INSENSITIVE", "CASE_INSENSITIVE", "NO_PAD", "75.1.0.0"))) + } } diff --git a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/ThriftServerWithSparkContextSuite.scala b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/ThriftServerWithSparkContextSuite.scala index 6f0b6bccac309..edef6371be8ae 100644 --- a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/ThriftServerWithSparkContextSuite.scala +++ b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/ThriftServerWithSparkContextSuite.scala @@ -214,7 +214,7 @@ trait ThriftServerWithSparkContextSuite extends SharedThriftServer { val sessionHandle = client.openSession(user, "") val infoValue = client.getInfo(sessionHandle, GetInfoType.CLI_ODBC_KEYWORDS) // scalastyle:off line.size.limit - assert(infoValue.getStringValue == "ADD,AFTER,ALL,ALTER,ALWAYS,ANALYZE,AND,ANTI,ANY,ANY_VALUE,ARCHIVE,ARRAY,AS,ASC,AT,AUTHORIZATION,BEGIN,BETWEEN,BIGINT,BINARY,BINDING,BOOLEAN,BOTH,BUCKET,BUCKETS,BY,BYTE,CACHE,CALLED,CASCADE,CASE,CAST,CATALOG,CATALOGS,CHANGE,CHAR,CHARACTER,CHECK,CLEAR,CLUSTER,CLUSTERED,CODEGEN,COLLATE,COLLATION,COLLECTION,COLUMN,COLUMNS,COMMENT,COMMIT,COMPACT,COMPACTIONS,COMPENSATION,COMPUTE,CONCATENATE,CONSTRAINT,CONTAINS,COST,CREATE,CROSS,CUBE,CURRENT,CURRENT_DATE,CURRENT_TIME,CURRENT_TIMESTAMP,CURRENT_USER,DATA,DATABASE,DATABASES,DATE,DATEADD,DATEDIFF,DATE_ADD,DATE_DIFF,DAY,DAYOFYEAR,DAYS,DBPROPERTIES,DEC,DECIMAL,DECLARE,DEFAULT,DEFINED,DEFINER,DELETE,DELIMITED,DESC,DESCRIBE,DETERMINISTIC,DFS,DIRECTORIES,DIRECTORY,DISTINCT,DISTRIBUTE,DIV,DO,DOUBLE,DROP,ELSE,END,ESCAPE,ESCAPED,EVOLUTION,EXCEPT,EXCHANGE,EXCLUDE,EXECUTE,EXISTS,EXPLAIN,EXPORT,EXTENDED,EXTERNAL,EXTRACT,FALSE,FETCH,FIELDS,FILEFORMAT,FILTER,FIRST,FLOAT,FOLLOWING,FOR,FOREIGN,FORMAT,FORMATTED,FROM,FULL,FUNCTION,FUNCTIONS,GENERATED,GLOBAL,GRANT,GROUP,GROUPING,HAVING,HOUR,HOURS,IDENTIFIER,IF,IGNORE,ILIKE,IMMEDIATE,IMPORT,IN,INCLUDE,INDEX,INDEXES,INNER,INPATH,INPUT,INPUTFORMAT,INSERT,INT,INTEGER,INTERSECT,INTERVAL,INTO,INVOKER,IS,ITEMS,ITERATE,JOIN,KEYS,LANGUAGE,LAST,LATERAL,LAZY,LEADING,LEAVE,LEFT,LIKE,LIMIT,LINES,LIST,LOAD,LOCAL,LOCATION,LOCK,LOCKS,LOGICAL,LONG,MACRO,MAP,MATCHED,MERGE,MICROSECOND,MICROSECONDS,MILLISECOND,MILLISECONDS,MINUS,MINUTE,MINUTES,MODIFIES,MONTH,MONTHS,MSCK,NAME,NAMESPACE,NAMESPACES,NANOSECOND,NANOSECONDS,NATURAL,NO,NONE,NOT,NULL,NULLS,NUMERIC,OF,OFFSET,ON,ONLY,OPTION,OPTIONS,OR,ORDER,OUT,OUTER,OUTPUTFORMAT,OVER,OVERLAPS,OVERLAY,OVERWRITE,PARTITION,PARTITIONED,PARTITIONS,PERCENT,PIVOT,PLACING,POSITION,PRECEDING,PRIMARY,PRINCIPALS,PROPERTIES,PURGE,QUARTER,QUERY,RANGE,READS,REAL,RECORDREADER,RECORDWRITER,RECOVER,REDUCE,REFERENCES,REFRESH,RENAME,REPAIR,REPEAT,REPEATABLE,REPLACE,RESET,RESPECT,RESTRICT,RETURN,RETURNS,REVOKE,RIGHT,ROLE,ROLES,ROLLBACK,ROLLUP,ROW,ROWS,SCHEMA,SCHEMAS,SECOND,SECONDS,SECURITY,SELECT,SEMI,SEPARATED,SERDE,SERDEPROPERTIES,SESSION_USER,SET,SETS,SHORT,SHOW,SINGLE,SKEWED,SMALLINT,SOME,SORT,SORTED,SOURCE,SPECIFIC,SQL,START,STATISTICS,STORED,STRATIFY,STRING,STRUCT,SUBSTR,SUBSTRING,SYNC,SYSTEM_TIME,SYSTEM_VERSION,TABLE,TABLES,TABLESAMPLE,TARGET,TBLPROPERTIES,TERMINATED,THEN,TIME,TIMEDIFF,TIMESTAMP,TIMESTAMPADD,TIMESTAMPDIFF,TIMESTAMP_LTZ,TIMESTAMP_NTZ,TINYINT,TO,TOUCH,TRAILING,TRANSACTION,TRANSACTIONS,TRANSFORM,TRIM,TRUE,TRUNCATE,TRY_CAST,TYPE,UNARCHIVE,UNBOUNDED,UNCACHE,UNION,UNIQUE,UNKNOWN,UNLOCK,UNPIVOT,UNSET,UNTIL,UPDATE,USE,USER,USING,VALUES,VAR,VARCHAR,VARIABLE,VARIANT,VERSION,VIEW,VIEWS,VOID,WEEK,WEEKS,WHEN,WHERE,WHILE,WINDOW,WITH,WITHIN,X,YEAR,YEARS,ZONE") + assert(infoValue.getStringValue == "ADD,AFTER,ALL,ALTER,ALWAYS,ANALYZE,AND,ANTI,ANY,ANY_VALUE,ARCHIVE,ARRAY,AS,ASC,AT,AUTHORIZATION,BEGIN,BETWEEN,BIGINT,BINARY,BINDING,BOOLEAN,BOTH,BUCKET,BUCKETS,BY,BYTE,CACHE,CALLED,CASCADE,CASE,CAST,CATALOG,CATALOGS,CHANGE,CHAR,CHARACTER,CHECK,CLEAR,CLUSTER,CLUSTERED,CODEGEN,COLLATE,COLLATION,COLLATIONS,COLLECTION,COLUMN,COLUMNS,COMMENT,COMMIT,COMPACT,COMPACTIONS,COMPENSATION,COMPUTE,CONCATENATE,CONSTRAINT,CONTAINS,COST,CREATE,CROSS,CUBE,CURRENT,CURRENT_DATE,CURRENT_TIME,CURRENT_TIMESTAMP,CURRENT_USER,DATA,DATABASE,DATABASES,DATE,DATEADD,DATEDIFF,DATE_ADD,DATE_DIFF,DAY,DAYOFYEAR,DAYS,DBPROPERTIES,DEC,DECIMAL,DECLARE,DEFAULT,DEFINED,DEFINER,DELETE,DELIMITED,DESC,DESCRIBE,DETERMINISTIC,DFS,DIRECTORIES,DIRECTORY,DISTINCT,DISTRIBUTE,DIV,DO,DOUBLE,DROP,ELSE,END,ESCAPE,ESCAPED,EVOLUTION,EXCEPT,EXCHANGE,EXCLUDE,EXECUTE,EXISTS,EXPLAIN,EXPORT,EXTENDED,EXTERNAL,EXTRACT,FALSE,FETCH,FIELDS,FILEFORMAT,FILTER,FIRST,FLOAT,FOLLOWING,FOR,FOREIGN,FORMAT,FORMATTED,FROM,FULL,FUNCTION,FUNCTIONS,GENERATED,GLOBAL,GRANT,GROUP,GROUPING,HAVING,HOUR,HOURS,IDENTIFIER,IF,IGNORE,ILIKE,IMMEDIATE,IMPORT,IN,INCLUDE,INDEX,INDEXES,INNER,INPATH,INPUT,INPUTFORMAT,INSERT,INT,INTEGER,INTERSECT,INTERVAL,INTO,INVOKER,IS,ITEMS,ITERATE,JOIN,KEYS,LANGUAGE,LAST,LATERAL,LAZY,LEADING,LEAVE,LEFT,LIKE,LIMIT,LINES,LIST,LOAD,LOCAL,LOCATION,LOCK,LOCKS,LOGICAL,LONG,MACRO,MAP,MATCHED,MERGE,MICROSECOND,MICROSECONDS,MILLISECOND,MILLISECONDS,MINUS,MINUTE,MINUTES,MODIFIES,MONTH,MONTHS,MSCK,NAME,NAMESPACE,NAMESPACES,NANOSECOND,NANOSECONDS,NATURAL,NO,NONE,NOT,NULL,NULLS,NUMERIC,OF,OFFSET,ON,ONLY,OPTION,OPTIONS,OR,ORDER,OUT,OUTER,OUTPUTFORMAT,OVER,OVERLAPS,OVERLAY,OVERWRITE,PARTITION,PARTITIONED,PARTITIONS,PERCENT,PIVOT,PLACING,POSITION,PRECEDING,PRIMARY,PRINCIPALS,PROPERTIES,PURGE,QUARTER,QUERY,RANGE,READS,REAL,RECORDREADER,RECORDWRITER,RECOVER,REDUCE,REFERENCES,REFRESH,RENAME,REPAIR,REPEAT,REPEATABLE,REPLACE,RESET,RESPECT,RESTRICT,RETURN,RETURNS,REVOKE,RIGHT,ROLE,ROLES,ROLLBACK,ROLLUP,ROW,ROWS,SCHEMA,SCHEMAS,SECOND,SECONDS,SECURITY,SELECT,SEMI,SEPARATED,SERDE,SERDEPROPERTIES,SESSION_USER,SET,SETS,SHORT,SHOW,SINGLE,SKEWED,SMALLINT,SOME,SORT,SORTED,SOURCE,SPECIFIC,SQL,START,STATISTICS,STORED,STRATIFY,STRING,STRUCT,SUBSTR,SUBSTRING,SYNC,SYSTEM_TIME,SYSTEM_VERSION,TABLE,TABLES,TABLESAMPLE,TARGET,TBLPROPERTIES,TERMINATED,THEN,TIME,TIMEDIFF,TIMESTAMP,TIMESTAMPADD,TIMESTAMPDIFF,TIMESTAMP_LTZ,TIMESTAMP_NTZ,TINYINT,TO,TOUCH,TRAILING,TRANSACTION,TRANSACTIONS,TRANSFORM,TRIM,TRUE,TRUNCATE,TRY_CAST,TYPE,UNARCHIVE,UNBOUNDED,UNCACHE,UNION,UNIQUE,UNKNOWN,UNLOCK,UNPIVOT,UNSET,UNTIL,UPDATE,USE,USER,USING,VALUES,VAR,VARCHAR,VARIABLE,VARIANT,VERSION,VIEW,VIEWS,VOID,WEEK,WEEKS,WHEN,WHERE,WHILE,WINDOW,WITH,WITHIN,X,YEAR,YEARS,ZONE") // scalastyle:on line.size.limit } }