diff --git a/docs/sql-ref-ansi-compliance.md b/docs/sql-ref-ansi-compliance.md index f8181be1a351c..5cf4c4e7579c2 100644 --- a/docs/sql-ref-ansi-compliance.md +++ b/docs/sql-ref-ansi-compliance.md @@ -460,6 +460,7 @@ Below is a list of all the keywords in Spark SQL. |CODEGEN|non-reserved|non-reserved|non-reserved| |COLLATE|reserved|non-reserved|reserved| |COLLATION|reserved|non-reserved|reserved| +|COLLATIONS|reserved|non-reserved|non-reserved| |COLLECTION|non-reserved|non-reserved|non-reserved| |COLUMN|reserved|non-reserved|reserved| |COLUMNS|non-reserved|non-reserved|non-reserved| diff --git a/docs/sql-ref-syntax-aux-show-collations.md b/docs/sql-ref-syntax-aux-show-collations.md new file mode 100644 index 0000000000000..15bed3d99d54c --- /dev/null +++ b/docs/sql-ref-syntax-aux-show-collations.md @@ -0,0 +1,111 @@ +--- +layout: global +title: SHOW COLLATIONS +displayTitle: SHOW COLLATIONS +license: | + Licensed to the Apache Software Foundation (ASF) under one or more + contributor license agreements. See the NOTICE file distributed with + this work for additional information regarding copyright ownership. + The ASF licenses this file to You under the Apache License, Version 2.0 + (the "License"); you may not use this file except in compliance with + the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +--- + +### Description + +Returns the list of collations supported by Spark. An optional pattern may be used to filter +the results. The `LIKE` clause is optional. + +### Syntax + +```sql +SHOW COLLATIONS [ LIKE regex_pattern ] +``` + +### Parameters + +* **regex_pattern** + + Specifies the regular expression pattern that is used to filter the results of the statement. + + * Except for `*` and `|` character, the pattern works like a regular expression. + * `*` alone matches 0 or more characters and `|` is used to separate multiple different regular expressions, + any of which can match. + * The leading and trailing blanks are trimmed in the input pattern before processing. The pattern match is case-insensitive. + +### Output + +The output has the following columns: + +| Column | Type | Nullable | Description | +|--------|------|----------|-------------| +| NAME | STRING | No | The name of the collation. | +| LANGUAGE | STRING | Yes | The display language of the locale, or `null` for locale-independent collations. | +| COUNTRY | STRING | Yes | The display country of the locale, or `null` for locale-independent collations. | +| ACCENT_SENSITIVITY | STRING | No | Whether the collation is accent-sensitive (`ACCENT_SENSITIVE`) or accent-insensitive (`ACCENT_INSENSITIVE`). | +| CASE_SENSITIVITY | STRING | No | Whether the collation is case-sensitive (`CASE_SENSITIVE`) or case-insensitive (`CASE_INSENSITIVE`). | +| PAD_ATTRIBUTE | STRING | No | The pad attribute of the collation: `NO_PAD` or `RTRIM`. | +| ICU_VERSION | STRING | Yes | The ICU library version used for the collation, or `null` for non-ICU collations such as `UTF8_BINARY` and `UTF8_LCASE`. | + +### Examples + +```sql +-- List all supported collations (results truncated) +SHOW COLLATIONS; ++-----------------+--------+-------+------------------+----------------+-------------+-----------+ +| NAME|LANGUAGE|COUNTRY|ACCENT_SENSITIVITY|CASE_SENSITIVITY|PAD_ATTRIBUTE|ICU_VERSION| ++-----------------+--------+-------+------------------+----------------+-------------+-----------+ +| UTF8_BINARY| null| null| ACCENT_SENSITIVE| CASE_SENSITIVE| NO_PAD| null| +| UTF8_LCASE| null| null| ACCENT_SENSITIVE|CASE_INSENSITIVE| NO_PAD| null| +| UNICODE| null| null| ACCENT_SENSITIVE| CASE_SENSITIVE| NO_PAD| 78.2| +| UNICODE_CI| null| null| ACCENT_SENSITIVE|CASE_INSENSITIVE| NO_PAD| 78.2| +| en_USA| English| United States|ACCENT_SENSITIVE|CASE_SENSITIVE| NO_PAD| 78.2| +| en_USA_CI| English| United States|ACCENT_SENSITIVE|CASE_INSENSITIVE| NO_PAD| 78.2| +| ...| ...| ...| ...| ...| ...| ...| ++-----------------+--------+-------+------------------+----------------+-------------+-----------+ + +-- List all collations matching `UTF8_BINARY*` +SHOW COLLATIONS LIKE 'UTF8_BINARY*'; ++-----------------+--------+-------+------------------+----------------+-------------+-----------+ +| NAME|LANGUAGE|COUNTRY|ACCENT_SENSITIVITY|CASE_SENSITIVITY|PAD_ATTRIBUTE|ICU_VERSION| ++-----------------+--------+-------+------------------+----------------+-------------+-----------+ +| UTF8_BINARY| null| null| ACCENT_SENSITIVE| CASE_SENSITIVE| NO_PAD| null| +| UTF8_BINARY_RTRIM| null| null| ACCENT_SENSITIVE| CASE_SENSITIVE| RTRIM| null| ++-----------------+--------+-------+------------------+----------------+-------------+-----------+ + +-- List all collations matching `UNICODE*` +SHOW COLLATIONS LIKE 'UNICODE*'; ++--------------+--------+-------+-------------------+----------------+-------------+-----------+ +| NAME|LANGUAGE|COUNTRY| ACCENT_SENSITIVITY|CASE_SENSITIVITY|PAD_ATTRIBUTE|ICU_VERSION| ++--------------+--------+-------+-------------------+----------------+-------------+-----------+ +| UNICODE| null| null| ACCENT_SENSITIVE| CASE_SENSITIVE| NO_PAD| 78.2| +| UNICODE_AI| null| null| ACCENT_INSENSITIVE| CASE_SENSITIVE| NO_PAD| 78.2| +| UNICODE_AI_RTRIM| null| null|ACCENT_INSENSITIVE|CASE_SENSITIVE| RTRIM| 78.2| +| UNICODE_CI| null| null| ACCENT_SENSITIVE|CASE_INSENSITIVE| NO_PAD| 78.2| +| UNICODE_CI_AI| null| null| ACCENT_INSENSITIVE|CASE_INSENSITIVE| NO_PAD| 78.2| +|UNICODE_CI_AI_RTRIM| null| null|ACCENT_INSENSITIVE|CASE_INSENSITIVE| RTRIM| 78.2| +| UNICODE_CI_RTRIM| null| null| ACCENT_SENSITIVE|CASE_INSENSITIVE| RTRIM| 78.2| +| UNICODE_RTRIM| null| null| ACCENT_SENSITIVE| CASE_SENSITIVE| RTRIM| 78.2| ++--------------+--------+-------+-------------------+----------------+-------------+-----------+ + +-- List all collations matching `UNICODE` or `UTF8_BINARY` +SHOW COLLATIONS LIKE 'UNICODE|UTF8_BINARY'; ++-----------+--------+-------+------------------+----------------+-------------+-----------+ +| NAME|LANGUAGE|COUNTRY|ACCENT_SENSITIVITY|CASE_SENSITIVITY|PAD_ATTRIBUTE|ICU_VERSION| ++-----------+--------+-------+------------------+----------------+-------------+-----------+ +|UTF8_BINARY| null| null| ACCENT_SENSITIVE| CASE_SENSITIVE| NO_PAD| null| +| UNICODE| null| null| ACCENT_SENSITIVE| CASE_SENSITIVE| NO_PAD| 78.2| ++-----------+--------+-------+------------------+----------------+-------------+-----------+ +``` + +### Related Statements + +* [STRING TYPE](sql-ref-datatypes.html) diff --git a/docs/sql-ref-syntax.md b/docs/sql-ref-syntax.md index cf33dd5efb2e1..c67f0a4e1d327 100644 --- a/docs/sql-ref-syntax.md +++ b/docs/sql-ref-syntax.md @@ -125,6 +125,7 @@ You use SQL scripting to execute procedural logic in SQL. * [SET](sql-ref-syntax-aux-conf-mgmt-set.html) * [SET VAR](sql-ref-syntax-aux-set-var.html) * [SHOW CACHED TABLES](sql-ref-syntax-aux-show-cached-tables.html) + * [SHOW COLLATIONS](sql-ref-syntax-aux-show-collations.html) * [SHOW COLUMNS](sql-ref-syntax-aux-show-columns.html) * [SHOW CREATE TABLE](sql-ref-syntax-aux-show-create-table.html) * [SHOW DATABASES](sql-ref-syntax-aux-show-databases.html) diff --git a/sql/api/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBaseLexer.g4 b/sql/api/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBaseLexer.g4 index 89583c57d8e81..9bd3afbf0c22b 100644 --- a/sql/api/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBaseLexer.g4 +++ b/sql/api/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBaseLexer.g4 @@ -179,6 +179,7 @@ CLUSTERED: 'CLUSTERED'; CODEGEN: 'CODEGEN'; COLLATE: 'COLLATE'; COLLATION: 'COLLATION'; +COLLATIONS: 'COLLATIONS'; COLLECTION: 'COLLECTION'; COLUMN: 'COLUMN'; COLUMNS: 'COLUMNS'; diff --git a/sql/api/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBaseParser.g4 b/sql/api/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBaseParser.g4 index 6b8a59ca9ae27..798b62a3ba17f 100644 --- a/sql/api/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBaseParser.g4 +++ b/sql/api/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBaseParser.g4 @@ -377,6 +377,7 @@ statement | SHOW CREATE TABLE identifierReference (AS SERDE)? #showCreateTable | SHOW CURRENT namespace #showCurrentNamespace | SHOW CATALOGS (LIKE? pattern=stringLit)? #showCatalogs + | SHOW COLLATIONS (LIKE? pattern=stringLit)? #showCollations | (DESC | DESCRIBE) FUNCTION EXTENDED? describeFuncName #describeFunction | (DESC | DESCRIBE) PROCEDURE identifierReference #describeProcedure | (DESC | DESCRIBE) namespace EXTENDED? @@ -2321,6 +2322,7 @@ nonReserved | CODEGEN | COLLATE | COLLATION + | COLLATIONS | COLLECTION | COLUMN | COLUMNS diff --git a/sql/connect/client/jdbc/src/test/scala/org/apache/spark/sql/connect/client/jdbc/SparkConnectDatabaseMetaDataSuite.scala b/sql/connect/client/jdbc/src/test/scala/org/apache/spark/sql/connect/client/jdbc/SparkConnectDatabaseMetaDataSuite.scala index 58811bffa3a62..64ae4aa830a69 100644 --- a/sql/connect/client/jdbc/src/test/scala/org/apache/spark/sql/connect/client/jdbc/SparkConnectDatabaseMetaDataSuite.scala +++ b/sql/connect/client/jdbc/src/test/scala/org/apache/spark/sql/connect/client/jdbc/SparkConnectDatabaseMetaDataSuite.scala @@ -209,7 +209,7 @@ class SparkConnectDatabaseMetaDataSuite extends ConnectFunSuite with RemoteSpark withConnection { conn => val metadata = conn.getMetaData // scalastyle:off line.size.limit - assert(metadata.getSQLKeywords === "ADD,AFTER,AGGREGATE,ALWAYS,ANALYZE,ANTI,ANY_VALUE,ARCHIVE,ASC,BINDING,BUCKET,BUCKETS,BYTE,CACHE,CACHED,CASCADE,CATALOG,CATALOGS,CHANGE,CHANGES,CLEAR,CLUSTER,CLUSTERED,CODEGEN,COLLATION,COLLECTION,COLUMNS,COMMENT,COMPACT,COMPACTIONS,COMPENSATION,COMPUTE,CONCATENATE,CONTAINS,CONTINUE,COST,DATA,DATABASE,DATABASES,DATEADD,DATEDIFF,DATE_ADD,DATE_DIFF,DAYOFYEAR,DAYS,DBPROPERTIES,DEFINED,DEFINER,DELAY,DELIMITED,DESC,DFS,DIRECTORIES,DIRECTORY,DISTRIBUTE,DIV,DO,ELSEIF,ENFORCED,ESCAPED,EVOLUTION,EXCHANGE,EXCLUDE,EXCLUSIVE,EXIT,EXPLAIN,EXPORT,EXTEND,EXTENDED,FIELDS,FILEFORMAT,FIRST,FLOW,FOLLOWING,FORMAT,FORMATTED,FOUND,FUNCTIONS,GENERATED,GEOGRAPHY,GEOMETRY,HANDLER,HOURS,IDENTIFIED,IDENTIFIER,IF,IGNORE,ILIKE,IMMEDIATE,INCLUDE,INCLUSIVE,INCREMENT,INDEX,INDEXES,INPATH,INPUT,INPUTFORMAT,INVOKER,ITEMS,ITERATE,JSON,KEY,KEYS,LAST,LAZY,LEAVE,LEVEL,LIMIT,LINES,LIST,LOAD,LOCATION,LOCK,LOCKS,LOGICAL,LONG,LOOP,MACRO,MAP,MATCHED,MATERIALIZED,MEASURE,METRICS,MICROSECOND,MICROSECONDS,MILLISECOND,MILLISECONDS,MINUS,MINUTES,MONTHS,MSCK,NAME,NAMESPACE,NAMESPACES,NANOSECOND,NANOSECONDS,NORELY,NULLS,OFFSET,OPTION,OPTIONS,OUTPUTFORMAT,OVERWRITE,PARTITIONED,PARTITIONS,PERCENT,PIVOT,PLACING,PRECEDING,PRINCIPALS,PROCEDURES,PROPERTIES,PURGE,QUARTER,QUERY,RECORDREADER,RECORDWRITER,RECOVER,RECURSION,REDUCE,REFRESH,RELY,RENAME,REPAIR,REPEAT,REPEATABLE,REPLACE,RESET,RESPECT,RESTRICT,ROLE,ROLES,SCHEMA,SCHEMAS,SECONDS,SECURITY,SEMI,SEPARATED,SERDE,SERDEPROPERTIES,SETS,SHORT,SHOW,SINGLE,SKEWED,SORT,SORTED,SOURCE,STATISTICS,STORED,STRATIFY,STREAM,STREAMING,STRING,STRUCT,SUBSTR,SYNC,SYSTEM_TIME,SYSTEM_VERSION,TABLES,TARGET,TBLPROPERTIES,TERMINATED,TIMEDIFF,TIMESTAMPADD,TIMESTAMPDIFF,TIMESTAMP_LTZ,TIMESTAMP_NTZ,TINYINT,TOUCH,TRANSACTION,TRANSACTIONS,TRANSFORM,TRUNCATE,TRY_CAST,TYPE,UNARCHIVE,UNBOUNDED,UNCACHE,UNLOCK,UNPIVOT,UNSET,UNTIL,USE,VAR,VARIABLE,VARIANT,VERSION,VIEW,VIEWS,VOID,WATERMARK,WEEK,WEEKS,WHILE,X,YEARS,ZONE") + assert(metadata.getSQLKeywords === "ADD,AFTER,AGGREGATE,ALWAYS,ANALYZE,ANTI,ANY_VALUE,ARCHIVE,ASC,BINDING,BUCKET,BUCKETS,BYTE,CACHE,CACHED,CASCADE,CATALOG,CATALOGS,CHANGE,CHANGES,CLEAR,CLUSTER,CLUSTERED,CODEGEN,COLLATION,COLLATIONS,COLLECTION,COLUMNS,COMMENT,COMPACT,COMPACTIONS,COMPENSATION,COMPUTE,CONCATENATE,CONTAINS,CONTINUE,COST,DATA,DATABASE,DATABASES,DATEADD,DATEDIFF,DATE_ADD,DATE_DIFF,DAYOFYEAR,DAYS,DBPROPERTIES,DEFINED,DEFINER,DELAY,DELIMITED,DESC,DFS,DIRECTORIES,DIRECTORY,DISTRIBUTE,DIV,DO,ELSEIF,ENFORCED,ESCAPED,EVOLUTION,EXCHANGE,EXCLUDE,EXCLUSIVE,EXIT,EXPLAIN,EXPORT,EXTEND,EXTENDED,FIELDS,FILEFORMAT,FIRST,FLOW,FOLLOWING,FORMAT,FORMATTED,FOUND,FUNCTIONS,GENERATED,GEOGRAPHY,GEOMETRY,HANDLER,HOURS,IDENTIFIED,IDENTIFIER,IF,IGNORE,ILIKE,IMMEDIATE,INCLUDE,INCLUSIVE,INCREMENT,INDEX,INDEXES,INPATH,INPUT,INPUTFORMAT,INVOKER,ITEMS,ITERATE,JSON,KEY,KEYS,LAST,LAZY,LEAVE,LEVEL,LIMIT,LINES,LIST,LOAD,LOCATION,LOCK,LOCKS,LOGICAL,LONG,LOOP,MACRO,MAP,MATCHED,MATERIALIZED,MEASURE,METRICS,MICROSECOND,MICROSECONDS,MILLISECOND,MILLISECONDS,MINUS,MINUTES,MONTHS,MSCK,NAME,NAMESPACE,NAMESPACES,NANOSECOND,NANOSECONDS,NORELY,NULLS,OFFSET,OPTION,OPTIONS,OUTPUTFORMAT,OVERWRITE,PARTITIONED,PARTITIONS,PERCENT,PIVOT,PLACING,PRECEDING,PRINCIPALS,PROCEDURES,PROPERTIES,PURGE,QUARTER,QUERY,RECORDREADER,RECORDWRITER,RECOVER,RECURSION,REDUCE,REFRESH,RELY,RENAME,REPAIR,REPEAT,REPEATABLE,REPLACE,RESET,RESPECT,RESTRICT,ROLE,ROLES,SCHEMA,SCHEMAS,SECONDS,SECURITY,SEMI,SEPARATED,SERDE,SERDEPROPERTIES,SETS,SHORT,SHOW,SINGLE,SKEWED,SORT,SORTED,SOURCE,STATISTICS,STORED,STRATIFY,STREAM,STREAMING,STRING,STRUCT,SUBSTR,SYNC,SYSTEM_TIME,SYSTEM_VERSION,TABLES,TARGET,TBLPROPERTIES,TERMINATED,TIMEDIFF,TIMESTAMPADD,TIMESTAMPDIFF,TIMESTAMP_LTZ,TIMESTAMP_NTZ,TINYINT,TOUCH,TRANSACTION,TRANSACTIONS,TRANSFORM,TRUNCATE,TRY_CAST,TYPE,UNARCHIVE,UNBOUNDED,UNCACHE,UNLOCK,UNPIVOT,UNSET,UNTIL,USE,VAR,VARIABLE,VARIANT,VERSION,VIEW,VIEWS,VOID,WATERMARK,WEEK,WEEKS,WHILE,X,YEARS,ZONE") // scalastyle:on line.size.limit } } diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala index 86977902407cd..2a25cdf3908b5 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala @@ -501,6 +501,13 @@ class SparkSqlAstBuilder extends AstBuilder { ShowCatalogsCommand(Option(ctx.pattern).map(x => string(visitStringLit(x)))) } + /** + * Create a [[ShowCollationsCommand]] logical command. + */ + override def visitShowCollations(ctx: ShowCollationsContext): LogicalPlan = withOrigin(ctx) { + ShowCollationsCommand(Option(ctx.pattern).map(x => string(visitStringLit(x)))) + } + /** * Converts a multi-part identifier to a TableIdentifier. * diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/ShowCollationsCommand.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/ShowCollationsCommand.scala new file mode 100644 index 0000000000000..a3495853c47cd --- /dev/null +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/ShowCollationsCommand.scala @@ -0,0 +1,57 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.execution.command + +import scala.jdk.CollectionConverters._ + +import org.apache.spark.sql.{Row, SparkSession} +import org.apache.spark.sql.catalyst.expressions.{Attribute, AttributeReference} +import org.apache.spark.sql.catalyst.util.{CollationFactory, StringUtils} +import org.apache.spark.sql.types.StringType + +/** + * The command for `SHOW COLLATIONS`. + */ +case class ShowCollationsCommand(pattern: Option[String]) extends LeafRunnableCommand { + override val output: Seq[Attribute] = Seq( + AttributeReference("NAME", StringType, nullable = false)(), + AttributeReference("LANGUAGE", StringType, nullable = true)(), + AttributeReference("COUNTRY", StringType, nullable = true)(), + AttributeReference("ACCENT_SENSITIVITY", StringType, nullable = false)(), + AttributeReference("CASE_SENSITIVITY", StringType, nullable = false)(), + AttributeReference("PAD_ATTRIBUTE", StringType, nullable = false)(), + AttributeReference("ICU_VERSION", StringType, nullable = true)()) + + override def run(sparkSession: SparkSession): Seq[Row] = { + val collations = CollationFactory.listCollations().asScala + .map(CollationFactory.loadCollationMeta) + val filtered = pattern + .map(p => collations.filter(m => StringUtils.filterPattern(Seq(m.collationName), p).nonEmpty)) + .getOrElse(collations) + filtered.map { m => + Row( + m.collationName, + m.language, + m.country, + if (m.accentSensitivity) "ACCENT_SENSITIVE" else "ACCENT_INSENSITIVE", + if (m.caseSensitivity) "CASE_SENSITIVE" else "CASE_INSENSITIVE", + m.padAttribute, + m.icuVersion) + }.toSeq + } +} diff --git a/sql/core/src/test/resources/sql-tests/results/keywords-enforced.sql.out b/sql/core/src/test/resources/sql-tests/results/keywords-enforced.sql.out index 69599411d523f..772ec2ea51259 100644 --- a/sql/core/src/test/resources/sql-tests/results/keywords-enforced.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/keywords-enforced.sql.out @@ -55,6 +55,7 @@ CLUSTERED false CODEGEN false COLLATE true COLLATION true +COLLATIONS true COLLECTION false COLUMN true COLUMNS false @@ -432,6 +433,7 @@ CAST CHECK COLLATE COLLATION +COLLATIONS COLUMN CONSTRAINT CREATE diff --git a/sql/core/src/test/resources/sql-tests/results/keywords.sql.out b/sql/core/src/test/resources/sql-tests/results/keywords.sql.out index 4f09bd2e3266d..a3a8b183bba05 100644 --- a/sql/core/src/test/resources/sql-tests/results/keywords.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/keywords.sql.out @@ -55,6 +55,7 @@ CLUSTERED false CODEGEN false COLLATE false COLLATION false +COLLATIONS false COLLECTION false COLUMN false COLUMNS false diff --git a/sql/core/src/test/resources/sql-tests/results/nonansi/keywords.sql.out b/sql/core/src/test/resources/sql-tests/results/nonansi/keywords.sql.out index 4f09bd2e3266d..a3a8b183bba05 100644 --- a/sql/core/src/test/resources/sql-tests/results/nonansi/keywords.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/nonansi/keywords.sql.out @@ -55,6 +55,7 @@ CLUSTERED false CODEGEN false COLLATE false COLLATION false +COLLATIONS false COLLECTION false COLUMN false COLUMNS false diff --git a/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala index 826c23ccb08a8..b10eeebb2b84b 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala @@ -3223,6 +3223,37 @@ class DataSourceV2SQLSuiteV1Filter Row("testcat"), Row("testcat2"))) } + test("SPARK-49543: ShowCollations") { + val schema = new StructType() + .add("NAME", StringType, nullable = false) + .add("LANGUAGE", StringType, nullable = true) + .add("COUNTRY", StringType, nullable = true) + .add("ACCENT_SENSITIVITY", StringType, nullable = false) + .add("CASE_SENSITIVITY", StringType, nullable = false) + .add("PAD_ATTRIBUTE", StringType, nullable = false) + .add("ICU_VERSION", StringType, nullable = true) + + val df = sql("SHOW COLLATIONS") + assert(df.schema === schema) + + val allCollations = df.collect() + assert(allCollations.exists(_.getString(0) == "UTF8_BINARY")) + assert(allCollations.exists(_.getString(0) == "UNICODE")) + assert(allCollations.exists(_.getString(0) == "UNICODE_CI")) + + val utf8Row = allCollations.find(_.getString(0) == "UTF8_BINARY").get + assert(utf8Row.getString(3) == "ACCENT_SENSITIVE") + assert(utf8Row.getString(4) == "CASE_SENSITIVE") + + val likeResult = sql("SHOW COLLATIONS LIKE 'UNICODE*'").collect() + assert(likeResult.nonEmpty) + assert(likeResult.forall(_.getString(0).startsWith("UNICODE"))) + + val exactResult = sql("SHOW COLLATIONS LIKE 'UTF8_BINARY'").collect() + assert(exactResult.length == 1) + assert(exactResult.head.getString(0) == "UTF8_BINARY") + } + test("CREATE INDEX should fail") { val t = "testcat.tbl" withTable(t) { diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLParserSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLParserSuite.scala index 19be761b5e111..f7f6c22559b42 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLParserSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLParserSuite.scala @@ -820,4 +820,16 @@ class DDLParserSuite extends AnalysisTest with SharedSparkSession { parser.parsePlan("SHOW CATALOGS LIKE 'defau*'"), ShowCatalogsCommand(Some("defau*"))) } + + test("SHOW COLLATIONS") { + comparePlans( + parser.parsePlan("SHOW COLLATIONS"), + ShowCollationsCommand(None)) + comparePlans( + parser.parsePlan("SHOW COLLATIONS LIKE 'UNICODE*'"), + ShowCollationsCommand(Some("UNICODE*"))) + comparePlans( + parser.parsePlan("SHOW COLLATIONS LIKE 'UTF8_BINARY'"), + ShowCollationsCommand(Some("UTF8_BINARY"))) + } } diff --git a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/ThriftServerWithSparkContextSuite.scala b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/ThriftServerWithSparkContextSuite.scala index 29b6c766bfd6d..3d0de127e965b 100644 --- a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/ThriftServerWithSparkContextSuite.scala +++ b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/ThriftServerWithSparkContextSuite.scala @@ -214,7 +214,7 @@ trait ThriftServerWithSparkContextSuite extends SharedThriftServer { val sessionHandle = client.openSession(user, "") val infoValue = client.getInfo(sessionHandle, GetInfoType.CLI_ODBC_KEYWORDS) // scalastyle:off line.size.limit - assert(infoValue.getStringValue == "ADD,AFTER,AGGREGATE,ALL,ALTER,ALWAYS,ANALYZE,AND,ANTI,ANY,ANY_VALUE,ARCHIVE,ARRAY,AS,ASC,ASENSITIVE,AT,ATOMIC,AUTHORIZATION,BEGIN,BETWEEN,BIGINT,BINARY,BINDING,BOOLEAN,BOTH,BUCKET,BUCKETS,BY,BYTE,CACHE,CACHED,CALL,CALLED,CASCADE,CASE,CAST,CATALOG,CATALOGS,CHANGE,CHANGES,CHAR,CHARACTER,CHECK,CLEAR,CLOSE,CLUSTER,CLUSTERED,CODEGEN,COLLATE,COLLATION,COLLECTION,COLUMN,COLUMNS,COMMENT,COMMIT,COMPACT,COMPACTIONS,COMPENSATION,COMPUTE,CONCATENATE,CONDITION,CONSTRAINT,CONTAINS,CONTINUE,COST,CREATE,CROSS,CUBE,CURRENT,CURRENT_DATE,CURRENT_TIME,CURRENT_TIMESTAMP,CURRENT_USER,CURSOR,DATA,DATABASE,DATABASES,DATE,DATEADD,DATEDIFF,DATE_ADD,DATE_DIFF,DAY,DAYOFYEAR,DAYS,DBPROPERTIES,DEC,DECIMAL,DECLARE,DEFAULT,DEFINED,DEFINER,DELAY,DELETE,DELIMITED,DESC,DESCRIBE,DETERMINISTIC,DFS,DIRECTORIES,DIRECTORY,DISTINCT,DISTRIBUTE,DIV,DO,DOUBLE,DROP,ELSE,ELSEIF,END,ENFORCED,ESCAPE,ESCAPED,EVOLUTION,EXCEPT,EXCHANGE,EXCLUDE,EXCLUSIVE,EXECUTE,EXISTS,EXIT,EXPLAIN,EXPORT,EXTEND,EXTENDED,EXTERNAL,EXTRACT,FALSE,FETCH,FIELDS,FILEFORMAT,FILTER,FIRST,FLOAT,FLOW,FOLLOWING,FOR,FOREIGN,FORMAT,FORMATTED,FOUND,FROM,FULL,FUNCTION,FUNCTIONS,GENERATED,GEOGRAPHY,GEOMETRY,GLOBAL,GRANT,GROUP,GROUPING,HANDLER,HAVING,HOUR,HOURS,IDENTIFIED,IDENTIFIER,IDENTITY,IF,IGNORE,ILIKE,IMMEDIATE,IMPORT,IN,INCLUDE,INCLUSIVE,INCREMENT,INDEX,INDEXES,INNER,INPATH,INPUT,INPUTFORMAT,INSENSITIVE,INSERT,INT,INTEGER,INTERSECT,INTERVAL,INTO,INVOKER,IS,ITEMS,ITERATE,JOIN,JSON,KEY,KEYS,LANGUAGE,LAST,LATERAL,LAZY,LEADING,LEAVE,LEFT,LEVEL,LIKE,LIMIT,LINES,LIST,LOAD,LOCAL,LOCATION,LOCK,LOCKS,LOGICAL,LONG,LOOP,MACRO,MAP,MATCHED,MATERIALIZED,MAX,MEASURE,MERGE,METRICS,MICROSECOND,MICROSECONDS,MILLISECOND,MILLISECONDS,MINUS,MINUTE,MINUTES,MODIFIES,MONTH,MONTHS,MSCK,NAME,NAMESPACE,NAMESPACES,NANOSECOND,NANOSECONDS,NATURAL,NEXT,NO,NONE,NORELY,NOT,NULL,NULLS,NUMERIC,OF,OFFSET,ON,ONLY,OPEN,OPTION,OPTIONS,OR,ORDER,OUT,OUTER,OUTPUTFORMAT,OVER,OVERLAPS,OVERLAY,OVERWRITE,PARTITION,PARTITIONED,PARTITIONS,PERCENT,PIVOT,PLACING,POSITION,PRECEDING,PRIMARY,PRINCIPALS,PROCEDURE,PROCEDURES,PROPERTIES,PURGE,QUARTER,QUERY,RANGE,READ,READS,REAL,RECORDREADER,RECORDWRITER,RECOVER,RECURSION,RECURSIVE,REDUCE,REFERENCES,REFRESH,RELY,RENAME,REPAIR,REPEAT,REPEATABLE,REPLACE,RESET,RESPECT,RESTRICT,RETURN,RETURNS,REVOKE,RIGHT,ROLE,ROLES,ROLLBACK,ROLLUP,ROW,ROWS,SCHEMA,SCHEMAS,SECOND,SECONDS,SECURITY,SELECT,SEMI,SEPARATED,SERDE,SERDEPROPERTIES,SESSION_USER,SET,SETS,SHORT,SHOW,SINGLE,SKEWED,SMALLINT,SOME,SORT,SORTED,SOURCE,SPECIFIC,SQL,SQLEXCEPTION,SQLSTATE,START,STATISTICS,STORED,STRATIFY,STREAM,STREAMING,STRING,STRUCT,SUBSTR,SUBSTRING,SYNC,SYSTEM_TIME,SYSTEM_VERSION,TABLE,TABLES,TABLESAMPLE,TARGET,TBLPROPERTIES,TERMINATED,THEN,TIME,TIMEDIFF,TIMESTAMP,TIMESTAMPADD,TIMESTAMPDIFF,TIMESTAMP_LTZ,TIMESTAMP_NTZ,TINYINT,TO,TOUCH,TRAILING,TRANSACTION,TRANSACTIONS,TRANSFORM,TRIM,TRUE,TRUNCATE,TRY_CAST,TYPE,UNARCHIVE,UNBOUNDED,UNCACHE,UNION,UNIQUE,UNKNOWN,UNLOCK,UNPIVOT,UNSET,UNTIL,UPDATE,USE,USER,USING,VALUE,VALUES,VAR,VARCHAR,VARIABLE,VARIANT,VERSION,VIEW,VIEWS,VOID,WATERMARK,WEEK,WEEKS,WHEN,WHERE,WHILE,WINDOW,WITH,WITHIN,WITHOUT,X,YEAR,YEARS,ZONE") + assert(infoValue.getStringValue == "ADD,AFTER,AGGREGATE,ALL,ALTER,ALWAYS,ANALYZE,AND,ANTI,ANY,ANY_VALUE,ARCHIVE,ARRAY,AS,ASC,ASENSITIVE,AT,ATOMIC,AUTHORIZATION,BEGIN,BETWEEN,BIGINT,BINARY,BINDING,BOOLEAN,BOTH,BUCKET,BUCKETS,BY,BYTE,CACHE,CACHED,CALL,CALLED,CASCADE,CASE,CAST,CATALOG,CATALOGS,CHANGE,CHANGES,CHAR,CHARACTER,CHECK,CLEAR,CLOSE,CLUSTER,CLUSTERED,CODEGEN,COLLATE,COLLATION,COLLATIONS,COLLECTION,COLUMN,COLUMNS,COMMENT,COMMIT,COMPACT,COMPACTIONS,COMPENSATION,COMPUTE,CONCATENATE,CONDITION,CONSTRAINT,CONTAINS,CONTINUE,COST,CREATE,CROSS,CUBE,CURRENT,CURRENT_DATE,CURRENT_TIME,CURRENT_TIMESTAMP,CURRENT_USER,CURSOR,DATA,DATABASE,DATABASES,DATE,DATEADD,DATEDIFF,DATE_ADD,DATE_DIFF,DAY,DAYOFYEAR,DAYS,DBPROPERTIES,DEC,DECIMAL,DECLARE,DEFAULT,DEFINED,DEFINER,DELAY,DELETE,DELIMITED,DESC,DESCRIBE,DETERMINISTIC,DFS,DIRECTORIES,DIRECTORY,DISTINCT,DISTRIBUTE,DIV,DO,DOUBLE,DROP,ELSE,ELSEIF,END,ENFORCED,ESCAPE,ESCAPED,EVOLUTION,EXCEPT,EXCHANGE,EXCLUDE,EXCLUSIVE,EXECUTE,EXISTS,EXIT,EXPLAIN,EXPORT,EXTEND,EXTENDED,EXTERNAL,EXTRACT,FALSE,FETCH,FIELDS,FILEFORMAT,FILTER,FIRST,FLOAT,FLOW,FOLLOWING,FOR,FOREIGN,FORMAT,FORMATTED,FOUND,FROM,FULL,FUNCTION,FUNCTIONS,GENERATED,GEOGRAPHY,GEOMETRY,GLOBAL,GRANT,GROUP,GROUPING,HANDLER,HAVING,HOUR,HOURS,IDENTIFIED,IDENTIFIER,IDENTITY,IF,IGNORE,ILIKE,IMMEDIATE,IMPORT,IN,INCLUDE,INCLUSIVE,INCREMENT,INDEX,INDEXES,INNER,INPATH,INPUT,INPUTFORMAT,INSENSITIVE,INSERT,INT,INTEGER,INTERSECT,INTERVAL,INTO,INVOKER,IS,ITEMS,ITERATE,JOIN,JSON,KEY,KEYS,LANGUAGE,LAST,LATERAL,LAZY,LEADING,LEAVE,LEFT,LEVEL,LIKE,LIMIT,LINES,LIST,LOAD,LOCAL,LOCATION,LOCK,LOCKS,LOGICAL,LONG,LOOP,MACRO,MAP,MATCHED,MATERIALIZED,MAX,MEASURE,MERGE,METRICS,MICROSECOND,MICROSECONDS,MILLISECOND,MILLISECONDS,MINUS,MINUTE,MINUTES,MODIFIES,MONTH,MONTHS,MSCK,NAME,NAMESPACE,NAMESPACES,NANOSECOND,NANOSECONDS,NATURAL,NEXT,NO,NONE,NORELY,NOT,NULL,NULLS,NUMERIC,OF,OFFSET,ON,ONLY,OPEN,OPTION,OPTIONS,OR,ORDER,OUT,OUTER,OUTPUTFORMAT,OVER,OVERLAPS,OVERLAY,OVERWRITE,PARTITION,PARTITIONED,PARTITIONS,PERCENT,PIVOT,PLACING,POSITION,PRECEDING,PRIMARY,PRINCIPALS,PROCEDURE,PROCEDURES,PROPERTIES,PURGE,QUARTER,QUERY,RANGE,READ,READS,REAL,RECORDREADER,RECORDWRITER,RECOVER,RECURSION,RECURSIVE,REDUCE,REFERENCES,REFRESH,RELY,RENAME,REPAIR,REPEAT,REPEATABLE,REPLACE,RESET,RESPECT,RESTRICT,RETURN,RETURNS,REVOKE,RIGHT,ROLE,ROLES,ROLLBACK,ROLLUP,ROW,ROWS,SCHEMA,SCHEMAS,SECOND,SECONDS,SECURITY,SELECT,SEMI,SEPARATED,SERDE,SERDEPROPERTIES,SESSION_USER,SET,SETS,SHORT,SHOW,SINGLE,SKEWED,SMALLINT,SOME,SORT,SORTED,SOURCE,SPECIFIC,SQL,SQLEXCEPTION,SQLSTATE,START,STATISTICS,STORED,STRATIFY,STREAM,STREAMING,STRING,STRUCT,SUBSTR,SUBSTRING,SYNC,SYSTEM_TIME,SYSTEM_VERSION,TABLE,TABLES,TABLESAMPLE,TARGET,TBLPROPERTIES,TERMINATED,THEN,TIME,TIMEDIFF,TIMESTAMP,TIMESTAMPADD,TIMESTAMPDIFF,TIMESTAMP_LTZ,TIMESTAMP_NTZ,TINYINT,TO,TOUCH,TRAILING,TRANSACTION,TRANSACTIONS,TRANSFORM,TRIM,TRUE,TRUNCATE,TRY_CAST,TYPE,UNARCHIVE,UNBOUNDED,UNCACHE,UNION,UNIQUE,UNKNOWN,UNLOCK,UNPIVOT,UNSET,UNTIL,UPDATE,USE,USER,USING,VALUE,VALUES,VAR,VARCHAR,VARIABLE,VARIANT,VERSION,VIEW,VIEWS,VOID,WATERMARK,WEEK,WEEKS,WHEN,WHERE,WHILE,WINDOW,WITH,WITHIN,WITHOUT,X,YEAR,YEARS,ZONE") // scalastyle:on line.size.limit } }