From 5c7bc4b528effe30a789ecb9b579840287f0f6fc Mon Sep 17 00:00:00 2001 From: Liang-Chi Hsieh Date: Mon, 30 Mar 2026 14:15:24 -0700 Subject: [PATCH 1/8] [SPARK-49543][SQL] Add SHOW COLLATIONS command MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add SHOW COLLATIONS SQL syntax to list all Spark built-in collations. Supports optional LIKE pattern filtering (e.g. SHOW COLLATIONS LIKE 'UNICODE%'). Output schema: NAME, LANGUAGE, COUNTRY, ACCENT_SENSITIVITY, CASE_SENSITIVITY, PAD_ATTRIBUTE, ICU_VERSION — matching the existing collations() TVF but without the constant CATALOG/SCHEMA columns. Implementation follows the ShowCatalogsCommand pattern as collations are engine-global and not tied to any catalog or namespace. Co-Authored-By: Claude Sonnet 4.6 --- .../spark/sql/catalyst/parser/SqlBaseLexer.g4 | 1 + .../sql/catalyst/parser/SqlBaseParser.g4 | 2 + .../spark/sql/execution/SparkSqlParser.scala | 7 +++ .../command/ShowCollationsCommand.scala | 57 +++++++++++++++++++ .../sql/connector/DataSourceV2SQLSuite.scala | 31 ++++++++++ .../execution/command/DDLParserSuite.scala | 12 ++++ 6 files changed, 110 insertions(+) create mode 100644 sql/core/src/main/scala/org/apache/spark/sql/execution/command/ShowCollationsCommand.scala diff --git a/sql/api/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBaseLexer.g4 b/sql/api/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBaseLexer.g4 index 89583c57d8e81..9bd3afbf0c22b 100644 --- a/sql/api/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBaseLexer.g4 +++ b/sql/api/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBaseLexer.g4 @@ -179,6 +179,7 @@ CLUSTERED: 'CLUSTERED'; CODEGEN: 'CODEGEN'; COLLATE: 'COLLATE'; COLLATION: 'COLLATION'; +COLLATIONS: 'COLLATIONS'; COLLECTION: 'COLLECTION'; COLUMN: 'COLUMN'; COLUMNS: 'COLUMNS'; diff --git a/sql/api/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBaseParser.g4 b/sql/api/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBaseParser.g4 index 6b8a59ca9ae27..798b62a3ba17f 100644 --- a/sql/api/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBaseParser.g4 +++ b/sql/api/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBaseParser.g4 @@ -377,6 +377,7 @@ statement | SHOW CREATE TABLE identifierReference (AS SERDE)? #showCreateTable | SHOW CURRENT namespace #showCurrentNamespace | SHOW CATALOGS (LIKE? pattern=stringLit)? #showCatalogs + | SHOW COLLATIONS (LIKE? pattern=stringLit)? #showCollations | (DESC | DESCRIBE) FUNCTION EXTENDED? describeFuncName #describeFunction | (DESC | DESCRIBE) PROCEDURE identifierReference #describeProcedure | (DESC | DESCRIBE) namespace EXTENDED? @@ -2321,6 +2322,7 @@ nonReserved | CODEGEN | COLLATE | COLLATION + | COLLATIONS | COLLECTION | COLUMN | COLUMNS diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala index 86977902407cd..2a25cdf3908b5 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala @@ -501,6 +501,13 @@ class SparkSqlAstBuilder extends AstBuilder { ShowCatalogsCommand(Option(ctx.pattern).map(x => string(visitStringLit(x)))) } + /** + * Create a [[ShowCollationsCommand]] logical command. + */ + override def visitShowCollations(ctx: ShowCollationsContext): LogicalPlan = withOrigin(ctx) { + ShowCollationsCommand(Option(ctx.pattern).map(x => string(visitStringLit(x)))) + } + /** * Converts a multi-part identifier to a TableIdentifier. * diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/ShowCollationsCommand.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/ShowCollationsCommand.scala new file mode 100644 index 0000000000000..a3495853c47cd --- /dev/null +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/ShowCollationsCommand.scala @@ -0,0 +1,57 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.execution.command + +import scala.jdk.CollectionConverters._ + +import org.apache.spark.sql.{Row, SparkSession} +import org.apache.spark.sql.catalyst.expressions.{Attribute, AttributeReference} +import org.apache.spark.sql.catalyst.util.{CollationFactory, StringUtils} +import org.apache.spark.sql.types.StringType + +/** + * The command for `SHOW COLLATIONS`. + */ +case class ShowCollationsCommand(pattern: Option[String]) extends LeafRunnableCommand { + override val output: Seq[Attribute] = Seq( + AttributeReference("NAME", StringType, nullable = false)(), + AttributeReference("LANGUAGE", StringType, nullable = true)(), + AttributeReference("COUNTRY", StringType, nullable = true)(), + AttributeReference("ACCENT_SENSITIVITY", StringType, nullable = false)(), + AttributeReference("CASE_SENSITIVITY", StringType, nullable = false)(), + AttributeReference("PAD_ATTRIBUTE", StringType, nullable = false)(), + AttributeReference("ICU_VERSION", StringType, nullable = true)()) + + override def run(sparkSession: SparkSession): Seq[Row] = { + val collations = CollationFactory.listCollations().asScala + .map(CollationFactory.loadCollationMeta) + val filtered = pattern + .map(p => collations.filter(m => StringUtils.filterPattern(Seq(m.collationName), p).nonEmpty)) + .getOrElse(collations) + filtered.map { m => + Row( + m.collationName, + m.language, + m.country, + if (m.accentSensitivity) "ACCENT_SENSITIVE" else "ACCENT_INSENSITIVE", + if (m.caseSensitivity) "CASE_SENSITIVE" else "CASE_INSENSITIVE", + m.padAttribute, + m.icuVersion) + }.toSeq + } +} diff --git a/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala index 826c23ccb08a8..b56be88830c79 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala @@ -3223,6 +3223,37 @@ class DataSourceV2SQLSuiteV1Filter Row("testcat"), Row("testcat2"))) } + test("SPARK-49543: ShowCollations") { + val schema = new StructType() + .add("NAME", StringType, nullable = false) + .add("LANGUAGE", StringType, nullable = true) + .add("COUNTRY", StringType, nullable = true) + .add("ACCENT_SENSITIVITY", StringType, nullable = false) + .add("CASE_SENSITIVITY", StringType, nullable = false) + .add("PAD_ATTRIBUTE", StringType, nullable = false) + .add("ICU_VERSION", StringType, nullable = true) + + val df = sql("SHOW COLLATIONS") + assert(df.schema === schema) + + val allCollations = df.collect() + assert(allCollations.exists(_.getString(0) == "UTF8_BINARY")) + assert(allCollations.exists(_.getString(0) == "UNICODE")) + assert(allCollations.exists(_.getString(0) == "UNICODE_CI")) + + val utf8Row = allCollations.find(_.getString(0) == "UTF8_BINARY").get + assert(utf8Row.getString(3) == "ACCENT_SENSITIVE") + assert(utf8Row.getString(4) == "CASE_SENSITIVE") + + val likeResult = sql("SHOW COLLATIONS LIKE 'UNICODE%'").collect() + assert(likeResult.nonEmpty) + assert(likeResult.forall(_.getString(0).startsWith("UNICODE"))) + + val exactResult = sql("SHOW COLLATIONS LIKE 'UTF8_BINARY'").collect() + assert(exactResult.length == 1) + assert(exactResult.head.getString(0) == "UTF8_BINARY") + } + test("CREATE INDEX should fail") { val t = "testcat.tbl" withTable(t) { diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLParserSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLParserSuite.scala index 19be761b5e111..819815201e928 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLParserSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLParserSuite.scala @@ -820,4 +820,16 @@ class DDLParserSuite extends AnalysisTest with SharedSparkSession { parser.parsePlan("SHOW CATALOGS LIKE 'defau*'"), ShowCatalogsCommand(Some("defau*"))) } + + test("SHOW COLLATIONS") { + comparePlans( + parser.parsePlan("SHOW COLLATIONS"), + ShowCollationsCommand(None)) + comparePlans( + parser.parsePlan("SHOW COLLATIONS LIKE 'UNICODE%'"), + ShowCollationsCommand(Some("UNICODE%"))) + comparePlans( + parser.parsePlan("SHOW COLLATIONS LIKE 'UTF8_BINARY'"), + ShowCollationsCommand(Some("UTF8_BINARY"))) + } } From dab0682782642fb1e43e8c0cc4cd472df14a63ea Mon Sep 17 00:00:00 2001 From: Liang-Chi Hsieh Date: Mon, 30 Mar 2026 17:29:37 -0700 Subject: [PATCH 2/8] [SPARK-49543][SQL] Update keyword golden files and tests for COLLATIONS token Add COLLATIONS to SQL keyword golden files and hardcoded keyword lists in ThriftServer and SparkConnect JDBC tests. Co-Authored-By: Claude Sonnet 4.6 --- .../connect/client/jdbc/SparkConnectDatabaseMetaDataSuite.scala | 2 +- .../test/resources/sql-tests/results/keywords-enforced.sql.out | 1 + sql/core/src/test/resources/sql-tests/results/keywords.sql.out | 1 + .../test/resources/sql-tests/results/nonansi/keywords.sql.out | 1 + .../hive/thriftserver/ThriftServerWithSparkContextSuite.scala | 2 +- 5 files changed, 5 insertions(+), 2 deletions(-) diff --git a/sql/connect/client/jdbc/src/test/scala/org/apache/spark/sql/connect/client/jdbc/SparkConnectDatabaseMetaDataSuite.scala b/sql/connect/client/jdbc/src/test/scala/org/apache/spark/sql/connect/client/jdbc/SparkConnectDatabaseMetaDataSuite.scala index 58811bffa3a62..64ae4aa830a69 100644 --- a/sql/connect/client/jdbc/src/test/scala/org/apache/spark/sql/connect/client/jdbc/SparkConnectDatabaseMetaDataSuite.scala +++ b/sql/connect/client/jdbc/src/test/scala/org/apache/spark/sql/connect/client/jdbc/SparkConnectDatabaseMetaDataSuite.scala @@ -209,7 +209,7 @@ class SparkConnectDatabaseMetaDataSuite extends ConnectFunSuite with RemoteSpark withConnection { conn => val metadata = conn.getMetaData // scalastyle:off line.size.limit - assert(metadata.getSQLKeywords === "ADD,AFTER,AGGREGATE,ALWAYS,ANALYZE,ANTI,ANY_VALUE,ARCHIVE,ASC,BINDING,BUCKET,BUCKETS,BYTE,CACHE,CACHED,CASCADE,CATALOG,CATALOGS,CHANGE,CHANGES,CLEAR,CLUSTER,CLUSTERED,CODEGEN,COLLATION,COLLECTION,COLUMNS,COMMENT,COMPACT,COMPACTIONS,COMPENSATION,COMPUTE,CONCATENATE,CONTAINS,CONTINUE,COST,DATA,DATABASE,DATABASES,DATEADD,DATEDIFF,DATE_ADD,DATE_DIFF,DAYOFYEAR,DAYS,DBPROPERTIES,DEFINED,DEFINER,DELAY,DELIMITED,DESC,DFS,DIRECTORIES,DIRECTORY,DISTRIBUTE,DIV,DO,ELSEIF,ENFORCED,ESCAPED,EVOLUTION,EXCHANGE,EXCLUDE,EXCLUSIVE,EXIT,EXPLAIN,EXPORT,EXTEND,EXTENDED,FIELDS,FILEFORMAT,FIRST,FLOW,FOLLOWING,FORMAT,FORMATTED,FOUND,FUNCTIONS,GENERATED,GEOGRAPHY,GEOMETRY,HANDLER,HOURS,IDENTIFIED,IDENTIFIER,IF,IGNORE,ILIKE,IMMEDIATE,INCLUDE,INCLUSIVE,INCREMENT,INDEX,INDEXES,INPATH,INPUT,INPUTFORMAT,INVOKER,ITEMS,ITERATE,JSON,KEY,KEYS,LAST,LAZY,LEAVE,LEVEL,LIMIT,LINES,LIST,LOAD,LOCATION,LOCK,LOCKS,LOGICAL,LONG,LOOP,MACRO,MAP,MATCHED,MATERIALIZED,MEASURE,METRICS,MICROSECOND,MICROSECONDS,MILLISECOND,MILLISECONDS,MINUS,MINUTES,MONTHS,MSCK,NAME,NAMESPACE,NAMESPACES,NANOSECOND,NANOSECONDS,NORELY,NULLS,OFFSET,OPTION,OPTIONS,OUTPUTFORMAT,OVERWRITE,PARTITIONED,PARTITIONS,PERCENT,PIVOT,PLACING,PRECEDING,PRINCIPALS,PROCEDURES,PROPERTIES,PURGE,QUARTER,QUERY,RECORDREADER,RECORDWRITER,RECOVER,RECURSION,REDUCE,REFRESH,RELY,RENAME,REPAIR,REPEAT,REPEATABLE,REPLACE,RESET,RESPECT,RESTRICT,ROLE,ROLES,SCHEMA,SCHEMAS,SECONDS,SECURITY,SEMI,SEPARATED,SERDE,SERDEPROPERTIES,SETS,SHORT,SHOW,SINGLE,SKEWED,SORT,SORTED,SOURCE,STATISTICS,STORED,STRATIFY,STREAM,STREAMING,STRING,STRUCT,SUBSTR,SYNC,SYSTEM_TIME,SYSTEM_VERSION,TABLES,TARGET,TBLPROPERTIES,TERMINATED,TIMEDIFF,TIMESTAMPADD,TIMESTAMPDIFF,TIMESTAMP_LTZ,TIMESTAMP_NTZ,TINYINT,TOUCH,TRANSACTION,TRANSACTIONS,TRANSFORM,TRUNCATE,TRY_CAST,TYPE,UNARCHIVE,UNBOUNDED,UNCACHE,UNLOCK,UNPIVOT,UNSET,UNTIL,USE,VAR,VARIABLE,VARIANT,VERSION,VIEW,VIEWS,VOID,WATERMARK,WEEK,WEEKS,WHILE,X,YEARS,ZONE") + assert(metadata.getSQLKeywords === "ADD,AFTER,AGGREGATE,ALWAYS,ANALYZE,ANTI,ANY_VALUE,ARCHIVE,ASC,BINDING,BUCKET,BUCKETS,BYTE,CACHE,CACHED,CASCADE,CATALOG,CATALOGS,CHANGE,CHANGES,CLEAR,CLUSTER,CLUSTERED,CODEGEN,COLLATION,COLLATIONS,COLLECTION,COLUMNS,COMMENT,COMPACT,COMPACTIONS,COMPENSATION,COMPUTE,CONCATENATE,CONTAINS,CONTINUE,COST,DATA,DATABASE,DATABASES,DATEADD,DATEDIFF,DATE_ADD,DATE_DIFF,DAYOFYEAR,DAYS,DBPROPERTIES,DEFINED,DEFINER,DELAY,DELIMITED,DESC,DFS,DIRECTORIES,DIRECTORY,DISTRIBUTE,DIV,DO,ELSEIF,ENFORCED,ESCAPED,EVOLUTION,EXCHANGE,EXCLUDE,EXCLUSIVE,EXIT,EXPLAIN,EXPORT,EXTEND,EXTENDED,FIELDS,FILEFORMAT,FIRST,FLOW,FOLLOWING,FORMAT,FORMATTED,FOUND,FUNCTIONS,GENERATED,GEOGRAPHY,GEOMETRY,HANDLER,HOURS,IDENTIFIED,IDENTIFIER,IF,IGNORE,ILIKE,IMMEDIATE,INCLUDE,INCLUSIVE,INCREMENT,INDEX,INDEXES,INPATH,INPUT,INPUTFORMAT,INVOKER,ITEMS,ITERATE,JSON,KEY,KEYS,LAST,LAZY,LEAVE,LEVEL,LIMIT,LINES,LIST,LOAD,LOCATION,LOCK,LOCKS,LOGICAL,LONG,LOOP,MACRO,MAP,MATCHED,MATERIALIZED,MEASURE,METRICS,MICROSECOND,MICROSECONDS,MILLISECOND,MILLISECONDS,MINUS,MINUTES,MONTHS,MSCK,NAME,NAMESPACE,NAMESPACES,NANOSECOND,NANOSECONDS,NORELY,NULLS,OFFSET,OPTION,OPTIONS,OUTPUTFORMAT,OVERWRITE,PARTITIONED,PARTITIONS,PERCENT,PIVOT,PLACING,PRECEDING,PRINCIPALS,PROCEDURES,PROPERTIES,PURGE,QUARTER,QUERY,RECORDREADER,RECORDWRITER,RECOVER,RECURSION,REDUCE,REFRESH,RELY,RENAME,REPAIR,REPEAT,REPEATABLE,REPLACE,RESET,RESPECT,RESTRICT,ROLE,ROLES,SCHEMA,SCHEMAS,SECONDS,SECURITY,SEMI,SEPARATED,SERDE,SERDEPROPERTIES,SETS,SHORT,SHOW,SINGLE,SKEWED,SORT,SORTED,SOURCE,STATISTICS,STORED,STRATIFY,STREAM,STREAMING,STRING,STRUCT,SUBSTR,SYNC,SYSTEM_TIME,SYSTEM_VERSION,TABLES,TARGET,TBLPROPERTIES,TERMINATED,TIMEDIFF,TIMESTAMPADD,TIMESTAMPDIFF,TIMESTAMP_LTZ,TIMESTAMP_NTZ,TINYINT,TOUCH,TRANSACTION,TRANSACTIONS,TRANSFORM,TRUNCATE,TRY_CAST,TYPE,UNARCHIVE,UNBOUNDED,UNCACHE,UNLOCK,UNPIVOT,UNSET,UNTIL,USE,VAR,VARIABLE,VARIANT,VERSION,VIEW,VIEWS,VOID,WATERMARK,WEEK,WEEKS,WHILE,X,YEARS,ZONE") // scalastyle:on line.size.limit } } diff --git a/sql/core/src/test/resources/sql-tests/results/keywords-enforced.sql.out b/sql/core/src/test/resources/sql-tests/results/keywords-enforced.sql.out index 69599411d523f..9b034793b47b0 100644 --- a/sql/core/src/test/resources/sql-tests/results/keywords-enforced.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/keywords-enforced.sql.out @@ -55,6 +55,7 @@ CLUSTERED false CODEGEN false COLLATE true COLLATION true +COLLATIONS true COLLECTION false COLUMN true COLUMNS false diff --git a/sql/core/src/test/resources/sql-tests/results/keywords.sql.out b/sql/core/src/test/resources/sql-tests/results/keywords.sql.out index 4f09bd2e3266d..a3a8b183bba05 100644 --- a/sql/core/src/test/resources/sql-tests/results/keywords.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/keywords.sql.out @@ -55,6 +55,7 @@ CLUSTERED false CODEGEN false COLLATE false COLLATION false +COLLATIONS false COLLECTION false COLUMN false COLUMNS false diff --git a/sql/core/src/test/resources/sql-tests/results/nonansi/keywords.sql.out b/sql/core/src/test/resources/sql-tests/results/nonansi/keywords.sql.out index 4f09bd2e3266d..a3a8b183bba05 100644 --- a/sql/core/src/test/resources/sql-tests/results/nonansi/keywords.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/nonansi/keywords.sql.out @@ -55,6 +55,7 @@ CLUSTERED false CODEGEN false COLLATE false COLLATION false +COLLATIONS false COLLECTION false COLUMN false COLUMNS false diff --git a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/ThriftServerWithSparkContextSuite.scala b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/ThriftServerWithSparkContextSuite.scala index 29b6c766bfd6d..3d0de127e965b 100644 --- a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/ThriftServerWithSparkContextSuite.scala +++ b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/ThriftServerWithSparkContextSuite.scala @@ -214,7 +214,7 @@ trait ThriftServerWithSparkContextSuite extends SharedThriftServer { val sessionHandle = client.openSession(user, "") val infoValue = client.getInfo(sessionHandle, GetInfoType.CLI_ODBC_KEYWORDS) // scalastyle:off line.size.limit - assert(infoValue.getStringValue == "ADD,AFTER,AGGREGATE,ALL,ALTER,ALWAYS,ANALYZE,AND,ANTI,ANY,ANY_VALUE,ARCHIVE,ARRAY,AS,ASC,ASENSITIVE,AT,ATOMIC,AUTHORIZATION,BEGIN,BETWEEN,BIGINT,BINARY,BINDING,BOOLEAN,BOTH,BUCKET,BUCKETS,BY,BYTE,CACHE,CACHED,CALL,CALLED,CASCADE,CASE,CAST,CATALOG,CATALOGS,CHANGE,CHANGES,CHAR,CHARACTER,CHECK,CLEAR,CLOSE,CLUSTER,CLUSTERED,CODEGEN,COLLATE,COLLATION,COLLECTION,COLUMN,COLUMNS,COMMENT,COMMIT,COMPACT,COMPACTIONS,COMPENSATION,COMPUTE,CONCATENATE,CONDITION,CONSTRAINT,CONTAINS,CONTINUE,COST,CREATE,CROSS,CUBE,CURRENT,CURRENT_DATE,CURRENT_TIME,CURRENT_TIMESTAMP,CURRENT_USER,CURSOR,DATA,DATABASE,DATABASES,DATE,DATEADD,DATEDIFF,DATE_ADD,DATE_DIFF,DAY,DAYOFYEAR,DAYS,DBPROPERTIES,DEC,DECIMAL,DECLARE,DEFAULT,DEFINED,DEFINER,DELAY,DELETE,DELIMITED,DESC,DESCRIBE,DETERMINISTIC,DFS,DIRECTORIES,DIRECTORY,DISTINCT,DISTRIBUTE,DIV,DO,DOUBLE,DROP,ELSE,ELSEIF,END,ENFORCED,ESCAPE,ESCAPED,EVOLUTION,EXCEPT,EXCHANGE,EXCLUDE,EXCLUSIVE,EXECUTE,EXISTS,EXIT,EXPLAIN,EXPORT,EXTEND,EXTENDED,EXTERNAL,EXTRACT,FALSE,FETCH,FIELDS,FILEFORMAT,FILTER,FIRST,FLOAT,FLOW,FOLLOWING,FOR,FOREIGN,FORMAT,FORMATTED,FOUND,FROM,FULL,FUNCTION,FUNCTIONS,GENERATED,GEOGRAPHY,GEOMETRY,GLOBAL,GRANT,GROUP,GROUPING,HANDLER,HAVING,HOUR,HOURS,IDENTIFIED,IDENTIFIER,IDENTITY,IF,IGNORE,ILIKE,IMMEDIATE,IMPORT,IN,INCLUDE,INCLUSIVE,INCREMENT,INDEX,INDEXES,INNER,INPATH,INPUT,INPUTFORMAT,INSENSITIVE,INSERT,INT,INTEGER,INTERSECT,INTERVAL,INTO,INVOKER,IS,ITEMS,ITERATE,JOIN,JSON,KEY,KEYS,LANGUAGE,LAST,LATERAL,LAZY,LEADING,LEAVE,LEFT,LEVEL,LIKE,LIMIT,LINES,LIST,LOAD,LOCAL,LOCATION,LOCK,LOCKS,LOGICAL,LONG,LOOP,MACRO,MAP,MATCHED,MATERIALIZED,MAX,MEASURE,MERGE,METRICS,MICROSECOND,MICROSECONDS,MILLISECOND,MILLISECONDS,MINUS,MINUTE,MINUTES,MODIFIES,MONTH,MONTHS,MSCK,NAME,NAMESPACE,NAMESPACES,NANOSECOND,NANOSECONDS,NATURAL,NEXT,NO,NONE,NORELY,NOT,NULL,NULLS,NUMERIC,OF,OFFSET,ON,ONLY,OPEN,OPTION,OPTIONS,OR,ORDER,OUT,OUTER,OUTPUTFORMAT,OVER,OVERLAPS,OVERLAY,OVERWRITE,PARTITION,PARTITIONED,PARTITIONS,PERCENT,PIVOT,PLACING,POSITION,PRECEDING,PRIMARY,PRINCIPALS,PROCEDURE,PROCEDURES,PROPERTIES,PURGE,QUARTER,QUERY,RANGE,READ,READS,REAL,RECORDREADER,RECORDWRITER,RECOVER,RECURSION,RECURSIVE,REDUCE,REFERENCES,REFRESH,RELY,RENAME,REPAIR,REPEAT,REPEATABLE,REPLACE,RESET,RESPECT,RESTRICT,RETURN,RETURNS,REVOKE,RIGHT,ROLE,ROLES,ROLLBACK,ROLLUP,ROW,ROWS,SCHEMA,SCHEMAS,SECOND,SECONDS,SECURITY,SELECT,SEMI,SEPARATED,SERDE,SERDEPROPERTIES,SESSION_USER,SET,SETS,SHORT,SHOW,SINGLE,SKEWED,SMALLINT,SOME,SORT,SORTED,SOURCE,SPECIFIC,SQL,SQLEXCEPTION,SQLSTATE,START,STATISTICS,STORED,STRATIFY,STREAM,STREAMING,STRING,STRUCT,SUBSTR,SUBSTRING,SYNC,SYSTEM_TIME,SYSTEM_VERSION,TABLE,TABLES,TABLESAMPLE,TARGET,TBLPROPERTIES,TERMINATED,THEN,TIME,TIMEDIFF,TIMESTAMP,TIMESTAMPADD,TIMESTAMPDIFF,TIMESTAMP_LTZ,TIMESTAMP_NTZ,TINYINT,TO,TOUCH,TRAILING,TRANSACTION,TRANSACTIONS,TRANSFORM,TRIM,TRUE,TRUNCATE,TRY_CAST,TYPE,UNARCHIVE,UNBOUNDED,UNCACHE,UNION,UNIQUE,UNKNOWN,UNLOCK,UNPIVOT,UNSET,UNTIL,UPDATE,USE,USER,USING,VALUE,VALUES,VAR,VARCHAR,VARIABLE,VARIANT,VERSION,VIEW,VIEWS,VOID,WATERMARK,WEEK,WEEKS,WHEN,WHERE,WHILE,WINDOW,WITH,WITHIN,WITHOUT,X,YEAR,YEARS,ZONE") + assert(infoValue.getStringValue == "ADD,AFTER,AGGREGATE,ALL,ALTER,ALWAYS,ANALYZE,AND,ANTI,ANY,ANY_VALUE,ARCHIVE,ARRAY,AS,ASC,ASENSITIVE,AT,ATOMIC,AUTHORIZATION,BEGIN,BETWEEN,BIGINT,BINARY,BINDING,BOOLEAN,BOTH,BUCKET,BUCKETS,BY,BYTE,CACHE,CACHED,CALL,CALLED,CASCADE,CASE,CAST,CATALOG,CATALOGS,CHANGE,CHANGES,CHAR,CHARACTER,CHECK,CLEAR,CLOSE,CLUSTER,CLUSTERED,CODEGEN,COLLATE,COLLATION,COLLATIONS,COLLECTION,COLUMN,COLUMNS,COMMENT,COMMIT,COMPACT,COMPACTIONS,COMPENSATION,COMPUTE,CONCATENATE,CONDITION,CONSTRAINT,CONTAINS,CONTINUE,COST,CREATE,CROSS,CUBE,CURRENT,CURRENT_DATE,CURRENT_TIME,CURRENT_TIMESTAMP,CURRENT_USER,CURSOR,DATA,DATABASE,DATABASES,DATE,DATEADD,DATEDIFF,DATE_ADD,DATE_DIFF,DAY,DAYOFYEAR,DAYS,DBPROPERTIES,DEC,DECIMAL,DECLARE,DEFAULT,DEFINED,DEFINER,DELAY,DELETE,DELIMITED,DESC,DESCRIBE,DETERMINISTIC,DFS,DIRECTORIES,DIRECTORY,DISTINCT,DISTRIBUTE,DIV,DO,DOUBLE,DROP,ELSE,ELSEIF,END,ENFORCED,ESCAPE,ESCAPED,EVOLUTION,EXCEPT,EXCHANGE,EXCLUDE,EXCLUSIVE,EXECUTE,EXISTS,EXIT,EXPLAIN,EXPORT,EXTEND,EXTENDED,EXTERNAL,EXTRACT,FALSE,FETCH,FIELDS,FILEFORMAT,FILTER,FIRST,FLOAT,FLOW,FOLLOWING,FOR,FOREIGN,FORMAT,FORMATTED,FOUND,FROM,FULL,FUNCTION,FUNCTIONS,GENERATED,GEOGRAPHY,GEOMETRY,GLOBAL,GRANT,GROUP,GROUPING,HANDLER,HAVING,HOUR,HOURS,IDENTIFIED,IDENTIFIER,IDENTITY,IF,IGNORE,ILIKE,IMMEDIATE,IMPORT,IN,INCLUDE,INCLUSIVE,INCREMENT,INDEX,INDEXES,INNER,INPATH,INPUT,INPUTFORMAT,INSENSITIVE,INSERT,INT,INTEGER,INTERSECT,INTERVAL,INTO,INVOKER,IS,ITEMS,ITERATE,JOIN,JSON,KEY,KEYS,LANGUAGE,LAST,LATERAL,LAZY,LEADING,LEAVE,LEFT,LEVEL,LIKE,LIMIT,LINES,LIST,LOAD,LOCAL,LOCATION,LOCK,LOCKS,LOGICAL,LONG,LOOP,MACRO,MAP,MATCHED,MATERIALIZED,MAX,MEASURE,MERGE,METRICS,MICROSECOND,MICROSECONDS,MILLISECOND,MILLISECONDS,MINUS,MINUTE,MINUTES,MODIFIES,MONTH,MONTHS,MSCK,NAME,NAMESPACE,NAMESPACES,NANOSECOND,NANOSECONDS,NATURAL,NEXT,NO,NONE,NORELY,NOT,NULL,NULLS,NUMERIC,OF,OFFSET,ON,ONLY,OPEN,OPTION,OPTIONS,OR,ORDER,OUT,OUTER,OUTPUTFORMAT,OVER,OVERLAPS,OVERLAY,OVERWRITE,PARTITION,PARTITIONED,PARTITIONS,PERCENT,PIVOT,PLACING,POSITION,PRECEDING,PRIMARY,PRINCIPALS,PROCEDURE,PROCEDURES,PROPERTIES,PURGE,QUARTER,QUERY,RANGE,READ,READS,REAL,RECORDREADER,RECORDWRITER,RECOVER,RECURSION,RECURSIVE,REDUCE,REFERENCES,REFRESH,RELY,RENAME,REPAIR,REPEAT,REPEATABLE,REPLACE,RESET,RESPECT,RESTRICT,RETURN,RETURNS,REVOKE,RIGHT,ROLE,ROLES,ROLLBACK,ROLLUP,ROW,ROWS,SCHEMA,SCHEMAS,SECOND,SECONDS,SECURITY,SELECT,SEMI,SEPARATED,SERDE,SERDEPROPERTIES,SESSION_USER,SET,SETS,SHORT,SHOW,SINGLE,SKEWED,SMALLINT,SOME,SORT,SORTED,SOURCE,SPECIFIC,SQL,SQLEXCEPTION,SQLSTATE,START,STATISTICS,STORED,STRATIFY,STREAM,STREAMING,STRING,STRUCT,SUBSTR,SUBSTRING,SYNC,SYSTEM_TIME,SYSTEM_VERSION,TABLE,TABLES,TABLESAMPLE,TARGET,TBLPROPERTIES,TERMINATED,THEN,TIME,TIMEDIFF,TIMESTAMP,TIMESTAMPADD,TIMESTAMPDIFF,TIMESTAMP_LTZ,TIMESTAMP_NTZ,TINYINT,TO,TOUCH,TRAILING,TRANSACTION,TRANSACTIONS,TRANSFORM,TRIM,TRUE,TRUNCATE,TRY_CAST,TYPE,UNARCHIVE,UNBOUNDED,UNCACHE,UNION,UNIQUE,UNKNOWN,UNLOCK,UNPIVOT,UNSET,UNTIL,UPDATE,USE,USER,USING,VALUE,VALUES,VAR,VARCHAR,VARIABLE,VARIANT,VERSION,VIEW,VIEWS,VOID,WATERMARK,WEEK,WEEKS,WHEN,WHERE,WHILE,WINDOW,WITH,WITHIN,WITHOUT,X,YEAR,YEARS,ZONE") // scalastyle:on line.size.limit } } From 928815b6e6e7d564b27eb0037abbfbe8c1b52eda Mon Sep 17 00:00:00 2001 From: Liang-Chi Hsieh Date: Mon, 30 Mar 2026 21:14:00 -0700 Subject: [PATCH 3/8] [SPARK-49543][SQL] Fix keyword golden files and documentation for COLLATIONS Add COLLATIONS to reserved keyword list in keywords-enforced.sql.out and add COLLATIONS documentation entry in sql-ref-ansi-compliance.md. COLLATIONS is reserved in ANSI mode (ansiNonReserved) and non-reserved in non-ANSI mode; it is not part of SQL-2016 standard. Co-Authored-By: Claude Sonnet 4.6 --- docs/sql-ref-ansi-compliance.md | 1 + .../test/resources/sql-tests/results/keywords-enforced.sql.out | 1 + 2 files changed, 2 insertions(+) diff --git a/docs/sql-ref-ansi-compliance.md b/docs/sql-ref-ansi-compliance.md index f8181be1a351c..5cf4c4e7579c2 100644 --- a/docs/sql-ref-ansi-compliance.md +++ b/docs/sql-ref-ansi-compliance.md @@ -460,6 +460,7 @@ Below is a list of all the keywords in Spark SQL. |CODEGEN|non-reserved|non-reserved|non-reserved| |COLLATE|reserved|non-reserved|reserved| |COLLATION|reserved|non-reserved|reserved| +|COLLATIONS|reserved|non-reserved|non-reserved| |COLLECTION|non-reserved|non-reserved|non-reserved| |COLUMN|reserved|non-reserved|reserved| |COLUMNS|non-reserved|non-reserved|non-reserved| diff --git a/sql/core/src/test/resources/sql-tests/results/keywords-enforced.sql.out b/sql/core/src/test/resources/sql-tests/results/keywords-enforced.sql.out index 9b034793b47b0..772ec2ea51259 100644 --- a/sql/core/src/test/resources/sql-tests/results/keywords-enforced.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/keywords-enforced.sql.out @@ -433,6 +433,7 @@ CAST CHECK COLLATE COLLATION +COLLATIONS COLUMN CONSTRAINT CREATE From ba0a7812d27216eed6c5145717af51e6db29b2eb Mon Sep 17 00:00:00 2001 From: Liang-Chi Hsieh Date: Mon, 30 Mar 2026 23:49:38 -0700 Subject: [PATCH 4/8] [SPARK-49543][SQL] Fix SHOW COLLATIONS LIKE pattern to use * wildcard filterPattern uses * (not %) as the wildcard character, consistent with other SHOW commands like SHOW NAMESPACES and SHOW FUNCTIONS. Co-Authored-By: Claude Sonnet 4.6 --- .../org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala index b56be88830c79..b10eeebb2b84b 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala @@ -3245,7 +3245,7 @@ class DataSourceV2SQLSuiteV1Filter assert(utf8Row.getString(3) == "ACCENT_SENSITIVE") assert(utf8Row.getString(4) == "CASE_SENSITIVE") - val likeResult = sql("SHOW COLLATIONS LIKE 'UNICODE%'").collect() + val likeResult = sql("SHOW COLLATIONS LIKE 'UNICODE*'").collect() assert(likeResult.nonEmpty) assert(likeResult.forall(_.getString(0).startsWith("UNICODE"))) From c8a3010f5f1aa78579bec7f01d92b664ddaea298 Mon Sep 17 00:00:00 2001 From: Liang-Chi Hsieh Date: Tue, 31 Mar 2026 09:15:33 -0700 Subject: [PATCH 5/8] [SPARK-49543][SQL] Support % wildcard in SHOW COLLATIONS LIKE pattern Convert SQL LIKE wildcard % to glob * before passing to filterPattern, so SHOW COLLATIONS LIKE 'UNICODE%' works correctly. Revert test to use % per SQL LIKE convention. Co-Authored-By: Claude Sonnet 4.6 --- .../spark/sql/execution/command/ShowCollationsCommand.scala | 3 ++- .../org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/ShowCollationsCommand.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/ShowCollationsCommand.scala index a3495853c47cd..70ab807db6be5 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/ShowCollationsCommand.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/ShowCollationsCommand.scala @@ -41,7 +41,8 @@ case class ShowCollationsCommand(pattern: Option[String]) extends LeafRunnableCo val collations = CollationFactory.listCollations().asScala .map(CollationFactory.loadCollationMeta) val filtered = pattern - .map(p => collations.filter(m => StringUtils.filterPattern(Seq(m.collationName), p).nonEmpty)) + .map(p => collations.filter(m => + StringUtils.filterPattern(Seq(m.collationName), p.replace('%', '*')).nonEmpty)) .getOrElse(collations) filtered.map { m => Row( diff --git a/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala index b10eeebb2b84b..b56be88830c79 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala @@ -3245,7 +3245,7 @@ class DataSourceV2SQLSuiteV1Filter assert(utf8Row.getString(3) == "ACCENT_SENSITIVE") assert(utf8Row.getString(4) == "CASE_SENSITIVE") - val likeResult = sql("SHOW COLLATIONS LIKE 'UNICODE*'").collect() + val likeResult = sql("SHOW COLLATIONS LIKE 'UNICODE%'").collect() assert(likeResult.nonEmpty) assert(likeResult.forall(_.getString(0).startsWith("UNICODE"))) From 9d117a2a9cef6582eb2351674c4210adf76f9f4a Mon Sep 17 00:00:00 2001 From: Liang-Chi Hsieh Date: Tue, 31 Mar 2026 09:29:15 -0700 Subject: [PATCH 6/8] Revert "[SPARK-49543][SQL] Support % wildcard in SHOW COLLATIONS LIKE pattern" This reverts commit c8a3010f5f1aa78579bec7f01d92b664ddaea298. --- .../spark/sql/execution/command/ShowCollationsCommand.scala | 3 +-- .../org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala | 2 +- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/ShowCollationsCommand.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/ShowCollationsCommand.scala index 70ab807db6be5..a3495853c47cd 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/ShowCollationsCommand.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/ShowCollationsCommand.scala @@ -41,8 +41,7 @@ case class ShowCollationsCommand(pattern: Option[String]) extends LeafRunnableCo val collations = CollationFactory.listCollations().asScala .map(CollationFactory.loadCollationMeta) val filtered = pattern - .map(p => collations.filter(m => - StringUtils.filterPattern(Seq(m.collationName), p.replace('%', '*')).nonEmpty)) + .map(p => collations.filter(m => StringUtils.filterPattern(Seq(m.collationName), p).nonEmpty)) .getOrElse(collations) filtered.map { m => Row( diff --git a/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala index b56be88830c79..b10eeebb2b84b 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala @@ -3245,7 +3245,7 @@ class DataSourceV2SQLSuiteV1Filter assert(utf8Row.getString(3) == "ACCENT_SENSITIVE") assert(utf8Row.getString(4) == "CASE_SENSITIVE") - val likeResult = sql("SHOW COLLATIONS LIKE 'UNICODE%'").collect() + val likeResult = sql("SHOW COLLATIONS LIKE 'UNICODE*'").collect() assert(likeResult.nonEmpty) assert(likeResult.forall(_.getString(0).startsWith("UNICODE"))) From 624dddd2b09bbdfbc91b015c62214edeff70d41b Mon Sep 17 00:00:00 2001 From: Liang-Chi Hsieh Date: Tue, 31 Mar 2026 10:13:29 -0700 Subject: [PATCH 7/8] [SPARK-49543][DOCS] Add documentation for SHOW COLLATIONS Add sql-ref-syntax-aux-show-collations.md following the same structure as other SHOW command docs (description, syntax, parameters, output schema, examples, related statements). Also add entry to the SQL syntax index in sql-ref-syntax.md. Co-Authored-By: Claude Sonnet 4.6 --- docs/sql-ref-syntax-aux-show-collations.md | 111 +++++++++++++++++++++ docs/sql-ref-syntax.md | 1 + 2 files changed, 112 insertions(+) create mode 100644 docs/sql-ref-syntax-aux-show-collations.md diff --git a/docs/sql-ref-syntax-aux-show-collations.md b/docs/sql-ref-syntax-aux-show-collations.md new file mode 100644 index 0000000000000..15bed3d99d54c --- /dev/null +++ b/docs/sql-ref-syntax-aux-show-collations.md @@ -0,0 +1,111 @@ +--- +layout: global +title: SHOW COLLATIONS +displayTitle: SHOW COLLATIONS +license: | + Licensed to the Apache Software Foundation (ASF) under one or more + contributor license agreements. See the NOTICE file distributed with + this work for additional information regarding copyright ownership. + The ASF licenses this file to You under the Apache License, Version 2.0 + (the "License"); you may not use this file except in compliance with + the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +--- + +### Description + +Returns the list of collations supported by Spark. An optional pattern may be used to filter +the results. The `LIKE` clause is optional. + +### Syntax + +```sql +SHOW COLLATIONS [ LIKE regex_pattern ] +``` + +### Parameters + +* **regex_pattern** + + Specifies the regular expression pattern that is used to filter the results of the statement. + + * Except for `*` and `|` character, the pattern works like a regular expression. + * `*` alone matches 0 or more characters and `|` is used to separate multiple different regular expressions, + any of which can match. + * The leading and trailing blanks are trimmed in the input pattern before processing. The pattern match is case-insensitive. + +### Output + +The output has the following columns: + +| Column | Type | Nullable | Description | +|--------|------|----------|-------------| +| NAME | STRING | No | The name of the collation. | +| LANGUAGE | STRING | Yes | The display language of the locale, or `null` for locale-independent collations. | +| COUNTRY | STRING | Yes | The display country of the locale, or `null` for locale-independent collations. | +| ACCENT_SENSITIVITY | STRING | No | Whether the collation is accent-sensitive (`ACCENT_SENSITIVE`) or accent-insensitive (`ACCENT_INSENSITIVE`). | +| CASE_SENSITIVITY | STRING | No | Whether the collation is case-sensitive (`CASE_SENSITIVE`) or case-insensitive (`CASE_INSENSITIVE`). | +| PAD_ATTRIBUTE | STRING | No | The pad attribute of the collation: `NO_PAD` or `RTRIM`. | +| ICU_VERSION | STRING | Yes | The ICU library version used for the collation, or `null` for non-ICU collations such as `UTF8_BINARY` and `UTF8_LCASE`. | + +### Examples + +```sql +-- List all supported collations (results truncated) +SHOW COLLATIONS; ++-----------------+--------+-------+------------------+----------------+-------------+-----------+ +| NAME|LANGUAGE|COUNTRY|ACCENT_SENSITIVITY|CASE_SENSITIVITY|PAD_ATTRIBUTE|ICU_VERSION| ++-----------------+--------+-------+------------------+----------------+-------------+-----------+ +| UTF8_BINARY| null| null| ACCENT_SENSITIVE| CASE_SENSITIVE| NO_PAD| null| +| UTF8_LCASE| null| null| ACCENT_SENSITIVE|CASE_INSENSITIVE| NO_PAD| null| +| UNICODE| null| null| ACCENT_SENSITIVE| CASE_SENSITIVE| NO_PAD| 78.2| +| UNICODE_CI| null| null| ACCENT_SENSITIVE|CASE_INSENSITIVE| NO_PAD| 78.2| +| en_USA| English| United States|ACCENT_SENSITIVE|CASE_SENSITIVE| NO_PAD| 78.2| +| en_USA_CI| English| United States|ACCENT_SENSITIVE|CASE_INSENSITIVE| NO_PAD| 78.2| +| ...| ...| ...| ...| ...| ...| ...| ++-----------------+--------+-------+------------------+----------------+-------------+-----------+ + +-- List all collations matching `UTF8_BINARY*` +SHOW COLLATIONS LIKE 'UTF8_BINARY*'; ++-----------------+--------+-------+------------------+----------------+-------------+-----------+ +| NAME|LANGUAGE|COUNTRY|ACCENT_SENSITIVITY|CASE_SENSITIVITY|PAD_ATTRIBUTE|ICU_VERSION| ++-----------------+--------+-------+------------------+----------------+-------------+-----------+ +| UTF8_BINARY| null| null| ACCENT_SENSITIVE| CASE_SENSITIVE| NO_PAD| null| +| UTF8_BINARY_RTRIM| null| null| ACCENT_SENSITIVE| CASE_SENSITIVE| RTRIM| null| ++-----------------+--------+-------+------------------+----------------+-------------+-----------+ + +-- List all collations matching `UNICODE*` +SHOW COLLATIONS LIKE 'UNICODE*'; ++--------------+--------+-------+-------------------+----------------+-------------+-----------+ +| NAME|LANGUAGE|COUNTRY| ACCENT_SENSITIVITY|CASE_SENSITIVITY|PAD_ATTRIBUTE|ICU_VERSION| ++--------------+--------+-------+-------------------+----------------+-------------+-----------+ +| UNICODE| null| null| ACCENT_SENSITIVE| CASE_SENSITIVE| NO_PAD| 78.2| +| UNICODE_AI| null| null| ACCENT_INSENSITIVE| CASE_SENSITIVE| NO_PAD| 78.2| +| UNICODE_AI_RTRIM| null| null|ACCENT_INSENSITIVE|CASE_SENSITIVE| RTRIM| 78.2| +| UNICODE_CI| null| null| ACCENT_SENSITIVE|CASE_INSENSITIVE| NO_PAD| 78.2| +| UNICODE_CI_AI| null| null| ACCENT_INSENSITIVE|CASE_INSENSITIVE| NO_PAD| 78.2| +|UNICODE_CI_AI_RTRIM| null| null|ACCENT_INSENSITIVE|CASE_INSENSITIVE| RTRIM| 78.2| +| UNICODE_CI_RTRIM| null| null| ACCENT_SENSITIVE|CASE_INSENSITIVE| RTRIM| 78.2| +| UNICODE_RTRIM| null| null| ACCENT_SENSITIVE| CASE_SENSITIVE| RTRIM| 78.2| ++--------------+--------+-------+-------------------+----------------+-------------+-----------+ + +-- List all collations matching `UNICODE` or `UTF8_BINARY` +SHOW COLLATIONS LIKE 'UNICODE|UTF8_BINARY'; ++-----------+--------+-------+------------------+----------------+-------------+-----------+ +| NAME|LANGUAGE|COUNTRY|ACCENT_SENSITIVITY|CASE_SENSITIVITY|PAD_ATTRIBUTE|ICU_VERSION| ++-----------+--------+-------+------------------+----------------+-------------+-----------+ +|UTF8_BINARY| null| null| ACCENT_SENSITIVE| CASE_SENSITIVE| NO_PAD| null| +| UNICODE| null| null| ACCENT_SENSITIVE| CASE_SENSITIVE| NO_PAD| 78.2| ++-----------+--------+-------+------------------+----------------+-------------+-----------+ +``` + +### Related Statements + +* [STRING TYPE](sql-ref-datatypes.html) diff --git a/docs/sql-ref-syntax.md b/docs/sql-ref-syntax.md index cf33dd5efb2e1..c67f0a4e1d327 100644 --- a/docs/sql-ref-syntax.md +++ b/docs/sql-ref-syntax.md @@ -125,6 +125,7 @@ You use SQL scripting to execute procedural logic in SQL. * [SET](sql-ref-syntax-aux-conf-mgmt-set.html) * [SET VAR](sql-ref-syntax-aux-set-var.html) * [SHOW CACHED TABLES](sql-ref-syntax-aux-show-cached-tables.html) + * [SHOW COLLATIONS](sql-ref-syntax-aux-show-collations.html) * [SHOW COLUMNS](sql-ref-syntax-aux-show-columns.html) * [SHOW CREATE TABLE](sql-ref-syntax-aux-show-create-table.html) * [SHOW DATABASES](sql-ref-syntax-aux-show-databases.html) From 6e0373f1caf67bb31cfb82a0b38798fee3d54aa4 Mon Sep 17 00:00:00 2001 From: Liang-Chi Hsieh Date: Tue, 31 Mar 2026 10:20:30 -0700 Subject: [PATCH 8/8] [SPARK-49543][SQL] Fix SHOW COLLATIONS parser test to use * wildcard Co-Authored-By: Claude Sonnet 4.6 --- .../apache/spark/sql/execution/command/DDLParserSuite.scala | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLParserSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLParserSuite.scala index 819815201e928..f7f6c22559b42 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLParserSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLParserSuite.scala @@ -826,8 +826,8 @@ class DDLParserSuite extends AnalysisTest with SharedSparkSession { parser.parsePlan("SHOW COLLATIONS"), ShowCollationsCommand(None)) comparePlans( - parser.parsePlan("SHOW COLLATIONS LIKE 'UNICODE%'"), - ShowCollationsCommand(Some("UNICODE%"))) + parser.parsePlan("SHOW COLLATIONS LIKE 'UNICODE*'"), + ShowCollationsCommand(Some("UNICODE*"))) comparePlans( parser.parsePlan("SHOW COLLATIONS LIKE 'UTF8_BINARY'"), ShowCollationsCommand(Some("UTF8_BINARY")))