From 116b260f0879274992ee9b3e705bec930523632c Mon Sep 17 00:00:00 2001 From: Mihailo Milosevic Date: Wed, 10 Apr 2024 13:41:34 +0200 Subject: [PATCH 01/13] Add indeterminate collation --- .../sql/catalyst/util/CollationFactory.java | 1 + python/pyspark/sql/tests/test_types.py | 3 ++ python/pyspark/sql/types.py | 2 +- .../apache/spark/sql/types/StringType.scala | 5 ++- .../analysis/CollationTypeCasts.scala | 38 +++++++++++++++---- .../plans/logical/ColumnDefinition.scala | 10 +++-- .../org/apache/spark/sql/CollationSuite.scala | 17 +++------ 7 files changed, 51 insertions(+), 25 deletions(-) diff --git a/common/unsafe/src/main/java/org/apache/spark/sql/catalyst/util/CollationFactory.java b/common/unsafe/src/main/java/org/apache/spark/sql/catalyst/util/CollationFactory.java index 119508a37e717..b1373b3017061 100644 --- a/common/unsafe/src/main/java/org/apache/spark/sql/catalyst/util/CollationFactory.java +++ b/common/unsafe/src/main/java/org/apache/spark/sql/catalyst/util/CollationFactory.java @@ -128,6 +128,7 @@ public Collation( private static final HashMap collationNameToIdMap = new HashMap<>(); public static final int UTF8_BINARY_COLLATION_ID = 0; + public static final int INDETERMINATE_COLLATION_ID = -1; public static final int UTF8_BINARY_LCASE_COLLATION_ID = 1; static { diff --git a/python/pyspark/sql/tests/test_types.py b/python/pyspark/sql/tests/test_types.py index bb854641906a9..85ab4dbcd86c8 100644 --- a/python/pyspark/sql/tests/test_types.py +++ b/python/pyspark/sql/tests/test_types.py @@ -868,6 +868,9 @@ def test_parse_datatype_string(self): self.assertEqual( StringType.fromCollationId(0), _parse_datatype_string("string COLLATE UTF8_BINARY") ) + self.assertEqual( + StringType.fromCollationId(-1), _parse_datatype_string("string COLLATE INDETERMINATE") + ) self.assertEqual( StringType.fromCollationId(1), _parse_datatype_string("string COLLATE UTF8_BINARY_LCASE"), diff --git a/python/pyspark/sql/types.py b/python/pyspark/sql/types.py index 9b1bab4c23fa9..90f284a245b69 100644 --- a/python/pyspark/sql/types.py +++ b/python/pyspark/sql/types.py @@ -252,7 +252,7 @@ class StringType(AtomicType): name of the collation, default is UTF8_BINARY. """ - collationNames = ["UTF8_BINARY", "UTF8_BINARY_LCASE", "UNICODE", "UNICODE_CI"] + collationNames = ["UTF8_BINARY", "UTF8_BINARY_LCASE", "UNICODE", "UNICODE_CI", "INDETERMINATE"] def __init__(self, collation: Optional[str] = None): self.collationId = 0 if collation is None else self.collationNameToId(collation) diff --git a/sql/api/src/main/scala/org/apache/spark/sql/types/StringType.scala b/sql/api/src/main/scala/org/apache/spark/sql/types/StringType.scala index 47d85b2c645c8..019d14642a267 100644 --- a/sql/api/src/main/scala/org/apache/spark/sql/types/StringType.scala +++ b/sql/api/src/main/scala/org/apache/spark/sql/types/StringType.scala @@ -54,7 +54,10 @@ class StringType private(val collationId: Int) extends AtomicType with Serializa * If this is an UTF8_BINARY collation output is `string` due to backwards compatibility. */ override def typeName: String = - if (collationId == 0) "string" + if (collationId == CollationFactory.UTF8_BINARY_COLLATION_ID) "string" + else if (collationId == CollationFactory.INDETERMINATE_COLLATION_ID) { + "string collate INDETERMINATE" + } else s"string collate ${CollationFactory.fetchCollation(collationId).collationName}" override def equals(obj: Any): Boolean = diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CollationTypeCasts.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CollationTypeCasts.scala index 1a14b4227de8f..104ff493eade7 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CollationTypeCasts.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CollationTypeCasts.scala @@ -22,7 +22,8 @@ import javax.annotation.Nullable import scala.annotation.tailrec import org.apache.spark.sql.catalyst.analysis.TypeCoercion.{hasStringType, haveSameType} -import org.apache.spark.sql.catalyst.expressions.{ArrayJoin, BinaryExpression, CaseWhen, Cast, Coalesce, Collate, Concat, ConcatWs, CreateArray, Expression, Greatest, If, In, InSubquery, Least} +import org.apache.spark.sql.catalyst.expressions.{ArrayJoin, BinaryExpression, CaseWhen, Cast, Coalesce, Collate, Concat, ConcatWs, CreateArray, Expression, Greatest, If, In, InSubquery, Least, SortOrder} +import org.apache.spark.sql.catalyst.util.CollationFactory import org.apache.spark.sql.errors.QueryCompilationErrors import org.apache.spark.sql.internal.SQLConf import org.apache.spark.sql.types.{ArrayType, DataType, StringType} @@ -37,7 +38,9 @@ object CollationTypeCasts extends TypeCoercionRule { case caseWhenExpr: CaseWhen if !haveSameType(caseWhenExpr.inputTypesForMerging) => val outputStringType = - getOutputCollation(caseWhenExpr.branches.map(_._2) ++ caseWhenExpr.elseValue) + getOutputCollation( + caseWhenExpr.branches.map(_._2) ++ caseWhenExpr.elseValue, + failOnIndeterminate = true) val newBranches = caseWhenExpr.branches.map { case (condition, value) => (condition, castStringType(value, outputStringType).getOrElse(value)) } @@ -45,9 +48,13 @@ object CollationTypeCasts extends TypeCoercionRule { caseWhenExpr.elseValue.map(e => castStringType(e, outputStringType).getOrElse(e)) CaseWhen(newBranches, newElseValue) + case concatExpr: Concat => + val newChildren = collateToSingleType(concatExpr.children, failOnIndeterminate = false) + concatExpr.withNewChildren(newChildren) + case otherExpr @ ( - _: In | _: InSubquery | _: CreateArray | _: ArrayJoin | _: Concat | _: Greatest | _: Least | - _: Coalesce | _: BinaryExpression | _: ConcatWs) => + _: In | _: InSubquery | _: CreateArray | _: ArrayJoin | _: Greatest | _: Least | + _: Coalesce | _: BinaryExpression | _: ConcatWs | _: SortOrder) => val newChildren = collateToSingleType(otherExpr.children) otherExpr.withNewChildren(newChildren) } @@ -83,8 +90,10 @@ object CollationTypeCasts extends TypeCoercionRule { /** * Collates input expressions to a single collation. */ - def collateToSingleType(exprs: Seq[Expression]): Seq[Expression] = { - val st = getOutputCollation(exprs) + def collateToSingleType( + exprs: Seq[Expression], + failOnIndeterminate: Boolean = true): Seq[Expression] = { + val st = getOutputCollation(exprs, failOnIndeterminate) exprs.map(e => castStringType(e, st).getOrElse(e)) } @@ -95,7 +104,7 @@ object CollationTypeCasts extends TypeCoercionRule { * any expressions, but will only be affected by collated StringTypes or * complex DataTypes with collated StringTypes (e.g. ArrayType) */ - def getOutputCollation(expr: Seq[Expression]): StringType = { + def getOutputCollation(expr: Seq[Expression], failOnIndeterminate: Boolean): StringType = { val explicitTypes = expr.filter(_.isInstanceOf[Collate]) .map(_.dataType.asInstanceOf[StringType].collationId) .distinct @@ -118,7 +127,20 @@ object CollationTypeCasts extends TypeCoercionRule { .distinctBy(_.collationId) if (implicitTypes.length > 1) { - throw QueryCompilationErrors.implicitCollationMismatchError() + if (failOnIndeterminate) { + throw QueryCompilationErrors.implicitCollationMismatchError() + } + else { + StringType(CollationFactory.INDETERMINATE_COLLATION_ID) + } + } + else if (implicitTypes.exists(_.collationId == -1)) { + if (failOnIndeterminate) { + throw QueryCompilationErrors.indeterminateCollationError() + } + else { + StringType(CollationFactory.INDETERMINATE_COLLATION_ID) + } } else { implicitTypes.headOption.getOrElse(SQLConf.get.defaultStringType) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/ColumnDefinition.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/ColumnDefinition.scala index 83e50aa33c70d..acf84f324271e 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/ColumnDefinition.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/ColumnDefinition.scala @@ -21,14 +21,14 @@ import org.apache.spark.SparkException import org.apache.spark.sql.AnalysisException import org.apache.spark.sql.catalyst.expressions.{Expression, Literal, UnaryExpression, Unevaluable} import org.apache.spark.sql.catalyst.parser.ParserInterface -import org.apache.spark.sql.catalyst.util.GeneratedColumn +import org.apache.spark.sql.catalyst.util.{CollationFactory, GeneratedColumn} import org.apache.spark.sql.catalyst.util.ResolveDefaultColumns.validateDefaultValueExpr import org.apache.spark.sql.catalyst.util.ResolveDefaultColumnsUtils.{CURRENT_DEFAULT_COLUMN_METADATA_KEY, EXISTS_DEFAULT_COLUMN_METADATA_KEY} -import org.apache.spark.sql.connector.catalog.{Column => V2Column, ColumnDefaultValue} +import org.apache.spark.sql.connector.catalog.{ColumnDefaultValue, Column => V2Column} import org.apache.spark.sql.connector.expressions.LiteralValue import org.apache.spark.sql.errors.QueryCompilationErrors import org.apache.spark.sql.internal.connector.ColumnImpl -import org.apache.spark.sql.types.{DataType, Metadata, MetadataBuilder, StructField} +import org.apache.spark.sql.types.{DataType, Metadata, MetadataBuilder, StringType, StructField} /** * Column definition for tables. This is an expression so that analyzer can resolve the default @@ -43,6 +43,10 @@ case class ColumnDefinition( generationExpression: Option[String] = None, metadata: Metadata = Metadata.empty) extends Expression with Unevaluable { + if (dataType == StringType(CollationFactory.INDETERMINATE_COLLATION_ID)) { + throw QueryCompilationErrors.indeterminateCollationError() + } + override def children: Seq[Expression] = defaultValue.toSeq override protected def withNewChildrenInternal( diff --git a/sql/core/src/test/scala/org/apache/spark/sql/CollationSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/CollationSuite.scala index 62150eaeac54d..7e31aa8e8ddb4 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/CollationSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/CollationSuite.scala @@ -544,13 +544,7 @@ class CollationSuite extends DatasourceV2SQLBase with AdaptiveSparkPlanHelper { // concat of columns of different collations is allowed // as long as we don't use the result in an unsupported function - // TODO(SPARK-47210): Add indeterminate support - checkError( - exception = intercept[AnalysisException] { - sql(s"SELECT c1 || c2 FROM $tableName") - }, - errorClass = "COLLATION_MISMATCH.IMPLICIT" - ) + checkAnswer(sql(s"SELECT c1 || c2 FROM $tableName"), Seq(Row("aa"), Row("AA"))) // concat + in @@ -610,7 +604,7 @@ class CollationSuite extends DatasourceV2SQLBase with AdaptiveSparkPlanHelper { exception = intercept[AnalysisException] { sql(s"SELECT c1 FROM $tableName WHERE c1 || c3 = 'aa'") }, - errorClass = "COLLATION_MISMATCH.IMPLICIT" + errorClass = "INDETERMINATE_COLLATION" ) // concat on different implicit collations should succeed, @@ -619,7 +613,7 @@ class CollationSuite extends DatasourceV2SQLBase with AdaptiveSparkPlanHelper { exception = intercept[AnalysisException] { sql(s"SELECT * FROM $tableName ORDER BY c1 || c3") }, - errorClass = "COLLATION_MISMATCH.IMPLICIT" + errorClass = "INDETERMINATE_COLLATION" ) // concat + in @@ -636,7 +630,7 @@ class CollationSuite extends DatasourceV2SQLBase with AdaptiveSparkPlanHelper { exception = intercept[AnalysisException] { sql(s"SELECT * FROM $tableName WHERE contains(c1||c3, 'a')") }, - errorClass = "COLLATION_MISMATCH.IMPLICIT" + errorClass = "INDETERMINATE_COLLATION" ) checkError( @@ -672,7 +666,6 @@ class CollationSuite extends DatasourceV2SQLBase with AdaptiveSparkPlanHelper { } } - // TODO(SPARK-47210): Add indeterminate support test("indeterminate collation checks") { val tableName = "t1" val newTableName = "t2" @@ -695,7 +688,7 @@ class CollationSuite extends DatasourceV2SQLBase with AdaptiveSparkPlanHelper { exception = intercept[AnalysisException] { sql(s"CREATE TABLE $newTableName AS SELECT c1 || c2 FROM $tableName") }, - errorClass = "COLLATION_MISMATCH.IMPLICIT") + errorClass = "INDETERMINATE_COLLATION") } } } From 1022c59fa027e5a465687b6ccc643a871df70d13 Mon Sep 17 00:00:00 2001 From: Mihailo Milosevic Date: Thu, 11 Apr 2024 12:25:40 +0200 Subject: [PATCH 02/13] Fail on Create --- .../analysis/CollationTypeCasts.scala | 8 ++++---- .../expressions/stringExpressions.scala | 7 ++++--- .../sql/execution/datasources/rules.scala | 20 +++++++++++++++++-- .../internal/BaseSessionStateBuilder.scala | 1 + .../org/apache/spark/sql/CollationSuite.scala | 15 +++++++++++++- .../sql/hive/HiveSessionStateBuilder.scala | 1 + 6 files changed, 42 insertions(+), 10 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CollationTypeCasts.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CollationTypeCasts.scala index 104ff493eade7..d2da40ad0f328 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CollationTypeCasts.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CollationTypeCasts.scala @@ -48,13 +48,13 @@ object CollationTypeCasts extends TypeCoercionRule { caseWhenExpr.elseValue.map(e => castStringType(e, outputStringType).getOrElse(e)) CaseWhen(newBranches, newElseValue) - case concatExpr: Concat => - val newChildren = collateToSingleType(concatExpr.children, failOnIndeterminate = false) - concatExpr.withNewChildren(newChildren) + case concatExprs @ (_: Concat | _: ConcatWs) => + val newChildren = collateToSingleType(concatExprs.children, failOnIndeterminate = false) + concatExprs.withNewChildren(newChildren) case otherExpr @ ( _: In | _: InSubquery | _: CreateArray | _: ArrayJoin | _: Greatest | _: Least | - _: Coalesce | _: BinaryExpression | _: ConcatWs | _: SortOrder) => + _: Coalesce | _: BinaryExpression | _: SortOrder) => val newChildren = collateToSingleType(otherExpr.children) otherExpr.withNewChildren(newChildren) } diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringExpressions.scala index cf6c9d4f1d942..e206569a5b72f 100755 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringExpressions.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringExpressions.scala @@ -37,7 +37,7 @@ import org.apache.spark.sql.catalyst.trees.TreePattern.{TreePattern, UPPER_OR_LO import org.apache.spark.sql.catalyst.util.{ArrayData, CollationFactory, GenericArrayData, TypeUtils} import org.apache.spark.sql.errors.{QueryCompilationErrors, QueryExecutionErrors} import org.apache.spark.sql.internal.SQLConf -import org.apache.spark.sql.internal.types.StringTypeAnyCollation +import org.apache.spark.sql.internal.types.{AbstractArrayType, StringTypeAnyCollation} import org.apache.spark.sql.types._ import org.apache.spark.unsafe.UTF8StringBuilder import org.apache.spark.unsafe.array.ByteArrayMethods @@ -79,8 +79,9 @@ case class ConcatWs(children: Seq[Expression]) /** The 1st child (separator) is str, and rest are either str or array of str. */ override def inputTypes: Seq[AbstractDataType] = { - val arrayOrStr = TypeCollection(ArrayType(StringType), StringType) - StringType +: Seq.fill(children.size - 1)(arrayOrStr) + val arrayOrStr = + TypeCollection(AbstractArrayType(StringTypeAnyCollation), StringTypeAnyCollation) + StringTypeAnyCollation +: Seq.fill(children.size - 1)(arrayOrStr) } override def dataType: DataType = StringType diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/rules.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/rules.scala index fbb2ecb70d395..2b0e7f15726f4 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/rules.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/rules.scala @@ -29,12 +29,12 @@ import org.apache.spark.sql.catalyst.types.DataTypeUtils.toAttributes import org.apache.spark.sql.catalyst.util.TypeUtils._ import org.apache.spark.sql.connector.expressions.{FieldReference, RewritableTransform} import org.apache.spark.sql.errors.QueryCompilationErrors -import org.apache.spark.sql.execution.command.DDLUtils +import org.apache.spark.sql.execution.command.{CreateDataSourceTableAsSelectCommand, CreateViewCommand, DDLUtils} import org.apache.spark.sql.execution.datasources.{CreateTable => CreateTableV1} import org.apache.spark.sql.execution.datasources.v2.FileDataSourceV2 import org.apache.spark.sql.internal.SQLConf import org.apache.spark.sql.sources.InsertableRelation -import org.apache.spark.sql.types.StructType +import org.apache.spark.sql.types.{StringType, StructType} import org.apache.spark.sql.util.PartitioningUtils.normalizePartitionSpec import org.apache.spark.sql.util.SchemaUtils import org.apache.spark.util.ArrayImplicits._ @@ -620,3 +620,19 @@ object CollationCheck extends (LogicalPlan => Unit) { private def isCollationExpression(expression: Expression): Boolean = expression.isInstanceOf[Collation] || expression.isInstanceOf[Collate] } + +object IndeterminateCheck extends (LogicalPlan => Unit) { + def apply(plan: LogicalPlan): Unit = { + plan match { + case CreateDataSourceTableAsSelectCommand(_, _, query, _) if query.resolved => + if (query.schema.exists(sf => sf.dataType == StringType(-1))) { + throw QueryCompilationErrors.indeterminateCollationError() + } + case CreateViewCommand(_, _, _, _, _, plan, _, _, _, _, _) if plan.resolved => + if (plan.schema.exists(sf => sf.dataType == StringType(-1))) { + throw QueryCompilationErrors.indeterminateCollationError() + } + case _ => () + } + } +} diff --git a/sql/core/src/main/scala/org/apache/spark/sql/internal/BaseSessionStateBuilder.scala b/sql/core/src/main/scala/org/apache/spark/sql/internal/BaseSessionStateBuilder.scala index 63c0d116ba3a4..ba8b4484bf59c 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/internal/BaseSessionStateBuilder.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/internal/BaseSessionStateBuilder.scala @@ -224,6 +224,7 @@ abstract class BaseSessionStateBuilder( TableCapabilityCheck +: CommandCheck +: CollationCheck +: + IndeterminateCheck +: customCheckRules } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/CollationSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/CollationSuite.scala index 66ad6198ea625..515024fef9146 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/CollationSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/CollationSuite.scala @@ -689,7 +689,20 @@ class CollationSuite extends DatasourceV2SQLBase with AdaptiveSparkPlanHelper { withTable(newTableName) { checkError( exception = intercept[AnalysisException] { - sql(s"CREATE TABLE $newTableName AS SELECT c1 || c2 FROM $tableName") + sql(s"CREATE TABLE $newTableName AS SELECT c1 || c2 FROM $tableName").explain(true) + }, + errorClass = "INDETERMINATE_COLLATION") + sql(s"CREATE TABLE $newTableName AS SELECT c1 FROM $tableName") + checkError( + exception = intercept[AnalysisException] { + sql(s"REPLACE TABLE $newTableName AS SELECT c1 || c2 FROM $tableName").explain(true) + }, + errorClass = "INDETERMINATE_COLLATION") + } + withView("v") { + checkError( + exception = intercept[AnalysisException] { + sql(s"CREATE VIEW v AS SELECT c1 || c2 as con FROM $tableName").explain(true) }, errorClass = "INDETERMINATE_COLLATION") } diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionStateBuilder.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionStateBuilder.scala index 416299b189cd5..bcd41245852ee 100644 --- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionStateBuilder.scala +++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionStateBuilder.scala @@ -114,6 +114,7 @@ class HiveSessionStateBuilder( TableCapabilityCheck +: CommandCheck +: CollationCheck +: + IndeterminateCheck +: customCheckRules } From b1a8b1c446c2042dc2740d78384b7cb98650cc08 Mon Sep 17 00:00:00 2001 From: Mihailo Milosevic Date: Thu, 11 Apr 2024 13:18:13 +0200 Subject: [PATCH 03/13] Fix scala error --- .../spark/sql/catalyst/plans/logical/ColumnDefinition.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/ColumnDefinition.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/ColumnDefinition.scala index acf84f324271e..7140b84253a9e 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/ColumnDefinition.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/ColumnDefinition.scala @@ -24,7 +24,7 @@ import org.apache.spark.sql.catalyst.parser.ParserInterface import org.apache.spark.sql.catalyst.util.{CollationFactory, GeneratedColumn} import org.apache.spark.sql.catalyst.util.ResolveDefaultColumns.validateDefaultValueExpr import org.apache.spark.sql.catalyst.util.ResolveDefaultColumnsUtils.{CURRENT_DEFAULT_COLUMN_METADATA_KEY, EXISTS_DEFAULT_COLUMN_METADATA_KEY} -import org.apache.spark.sql.connector.catalog.{ColumnDefaultValue, Column => V2Column} +import org.apache.spark.sql.connector.catalog.{Column => V2Column, ColumnDefaultValue} import org.apache.spark.sql.connector.expressions.LiteralValue import org.apache.spark.sql.errors.QueryCompilationErrors import org.apache.spark.sql.internal.connector.ColumnImpl From 5e7a9b41c968ebccaf9f51d5027d9008a61aeafc Mon Sep 17 00:00:00 2001 From: Mihailo Milosevic Date: Thu, 11 Apr 2024 13:19:42 +0200 Subject: [PATCH 04/13] Fix failing test --- .../test/scala/org/apache/spark/sql/CollationSuite.scala | 6 ------ 1 file changed, 6 deletions(-) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/CollationSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/CollationSuite.scala index 515024fef9146..8a6ecb3d91549 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/CollationSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/CollationSuite.scala @@ -692,12 +692,6 @@ class CollationSuite extends DatasourceV2SQLBase with AdaptiveSparkPlanHelper { sql(s"CREATE TABLE $newTableName AS SELECT c1 || c2 FROM $tableName").explain(true) }, errorClass = "INDETERMINATE_COLLATION") - sql(s"CREATE TABLE $newTableName AS SELECT c1 FROM $tableName") - checkError( - exception = intercept[AnalysisException] { - sql(s"REPLACE TABLE $newTableName AS SELECT c1 || c2 FROM $tableName").explain(true) - }, - errorClass = "INDETERMINATE_COLLATION") } withView("v") { checkError( From 811db8577c8d86cec917c58cbb6349fcf61e8e97 Mon Sep 17 00:00:00 2001 From: Mihailo Milosevic Date: Fri, 12 Apr 2024 11:42:42 +0200 Subject: [PATCH 05/13] Fix tests --- .../expressions/stringExpressions.scala | 8 +-- .../sql/CollationStringExpressionsSuite.scala | 52 +++++++++++-------- 2 files changed, 35 insertions(+), 25 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringExpressions.scala index e206569a5b72f..0a484e6f5cd95 100755 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringExpressions.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringExpressions.scala @@ -84,7 +84,7 @@ case class ConcatWs(children: Seq[Expression]) StringTypeAnyCollation +: Seq.fill(children.size - 1)(arrayOrStr) } - override def dataType: DataType = StringType + override def dataType: DataType = children.head.dataType override def nullable: Boolean = children.head.nullable override def foldable: Boolean = children.forall(_.foldable) @@ -103,7 +103,7 @@ case class ConcatWs(children: Seq[Expression]) val flatInputs = children.flatMap { child => child.eval(input) match { case s: UTF8String => Iterator(s) - case arr: ArrayData => arr.toArray[UTF8String](StringType) + case arr: ArrayData => arr.toArray[UTF8String](child.dataType) case null => Iterator(null.asInstanceOf[UTF8String]) } } @@ -111,7 +111,7 @@ case class ConcatWs(children: Seq[Expression]) } override protected def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = { - if (children.forall(_.dataType == StringType)) { + if (children.forall(_.dataType.isInstanceOf[StringType])) { // All children are strings. In that case we can construct a fixed size array. val evals = children.map(_.genCode(ctx)) val separator = evals.head @@ -164,7 +164,7 @@ case class ConcatWs(children: Seq[Expression]) """ val (varCount, varBuild) = child.dataType match { - case StringType => + case _: StringType => val reprForValueCast = s"((UTF8String) $reprForValue)" ("", // we count all the StringType arguments num at once below. if (eval.isNull == TrueLiteral) { diff --git a/sql/core/src/test/scala/org/apache/spark/sql/CollationStringExpressionsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/CollationStringExpressionsSuite.scala index c26f3ae02255f..1861bc56ad242 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/CollationStringExpressionsSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/CollationStringExpressionsSuite.scala @@ -20,8 +20,7 @@ package org.apache.spark.sql import scala.collection.immutable.Seq import org.apache.spark.SparkConf -import org.apache.spark.sql.catalyst.analysis.TypeCheckResult.DataTypeMismatch -import org.apache.spark.sql.catalyst.expressions.{Collation, ConcatWs, ExpressionEvalHelper, Literal, StringRepeat} +import org.apache.spark.sql.catalyst.expressions.{Collation, ConcatWs, CreateArray, ExpressionEvalHelper, Literal, StringRepeat} import org.apache.spark.sql.catalyst.util.CollationFactory import org.apache.spark.sql.internal.SQLConf import org.apache.spark.sql.test.SharedSparkSession @@ -48,32 +47,43 @@ class CollationStringExpressionsSuite } // Supported Collations val checks = Seq( - CollationTestCase("Spark", "SQL", "UTF8_BINARY", "Spark SQL") + CollationTestCase("Spark", "SQL", "UTF8_BINARY", "Spark SQL"), + CollationTestCase("Spark", "SQL", "UTF8_BINARY_LCASE", "Spark SQL"), + CollationTestCase("Spark", "SQL", "UNICODE", "Spark SQL"), + CollationTestCase("Spark", "SQL", "UNICODE_CI", "Spark SQL") ) checks.foreach(ct => checkEvaluation(prepareConcatWs(" ", ct.collation, ct.s1, ct.s2), ct.expectedResult) ) - // Unsupported Collations - val fails = Seq( - CollationTestFail("ABC", "%b%", "UTF8_BINARY_LCASE"), - CollationTestFail("ABC", "%B%", "UNICODE"), - CollationTestFail("ABC", "%b%", "UNICODE_CI") + // None for now + } + + test("Support ConcatWs string expression with Collated arrays") { + def prepareConcatWs( + sep: String, + collation: String, + inputs: Any*): ConcatWs = { + val collationId = CollationFactory.collationNameToId(collation) + val inputExprs = inputs.map( + s => CreateArray(s.asInstanceOf[Seq[String]] + .map(Literal.create(_, StringType(collationId))))) + val sepExpr = Literal.create(sep, StringType(collationId)) + ConcatWs(sepExpr +: inputExprs) + } + + // Supported Collations + val checks = Seq( + CollationTestCase("Spark", "SQL", "UTF8_BINARY", "Spark SQL"), + CollationTestCase("Spark", "SQL", "UTF8_BINARY_LCASE", "Spark SQL"), + CollationTestCase("Spark", "SQL", "UNICODE", "Spark SQL"), + CollationTestCase("Spark", "SQL", "UNICODE_CI", "Spark SQL") ) - fails.foreach(ct => - assert(prepareConcatWs(" ", ct.collation, ct.s1, ct.s2) - .checkInputDataTypes() == - DataTypeMismatch( - errorSubClass = "UNEXPECTED_INPUT_TYPE", - messageParameters = Map( - "paramIndex" -> "first", - "requiredType" -> """"STRING"""", - "inputSql" -> s""""' ' collate ${ct.collation}"""", - "inputType" -> s""""STRING COLLATE ${ct.collation}"""" - ) - ) - ) + checks.foreach(ct => + checkEvaluation(prepareConcatWs(" ", ct.collation, Seq(ct.s1, ct.s2)), ct.expectedResult) ) + // Unsupported Collations + // None for now } test("REPEAT check output type on explicitly collated string") { From 6c46f17a1fcc6ef663fc6b57089aaa86035a3976 Mon Sep 17 00:00:00 2001 From: Mihailo Milosevic Date: Fri, 12 Apr 2024 13:52:51 +0200 Subject: [PATCH 06/13] Fix ConcatWs --- .../expressions/stringExpressions.scala | 2 +- .../sql/CollationStringExpressionsSuite.scala | 18 +++++------------- 2 files changed, 6 insertions(+), 14 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringExpressions.scala index 96027a110c358..ed77272ac8fd2 100755 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringExpressions.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringExpressions.scala @@ -103,7 +103,7 @@ case class ConcatWs(children: Seq[Expression]) val flatInputs = children.flatMap { child => child.eval(input) match { case s: UTF8String => Iterator(s) - case arr: ArrayData => arr.toArray[UTF8String](child.dataType) + case arr: ArrayData => arr.toArray[UTF8String](StringType) case null => Iterator(null.asInstanceOf[UTF8String]) } } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/CollationStringExpressionsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/CollationStringExpressionsSuite.scala index 97dea66975410..306e7de3e16e2 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/CollationStringExpressionsSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/CollationStringExpressionsSuite.scala @@ -34,7 +34,10 @@ class CollationStringExpressionsSuite // Supported collations case class ConcatWsTestCase[R](s: String, a: Array[String], c: String, result: R) val testCases = Seq( - ConcatWsTestCase(" ", Array("Spark", "SQL"), "UTF8_BINARY", "Spark SQL") + ConcatWsTestCase(" ", Array("Spark", "SQL"), "UTF8_BINARY", "Spark SQL"), + ConcatWsTestCase(" ", Array("Spark", "SQL"), "UTF8_BINARY_LCASE", "Spark SQL"), + ConcatWsTestCase(" ", Array("Spark", "SQL"), "UNICODE", "Spark SQL"), + ConcatWsTestCase(" ", Array("Spark", "SQL"), "UNICODE_CI", "Spark SQL") ) testCases.foreach(t => { val arrCollated = t.a.map(s => s"collate('$s', '${t.c}')").mkString(", ") @@ -52,18 +55,7 @@ class CollationStringExpressionsSuite assert(sql(query).schema.fields.head.dataType.sameType(StringType(t.c))) }) // Unsupported collations - case class ConcatWsTestFail(s: String, a: Array[String], c: String) - val failCases = Seq( - ConcatWsTestFail(" ", Array("ABC", "%b%"), "UTF8_BINARY_LCASE"), - ConcatWsTestFail(" ", Array("ABC", "%B%"), "UNICODE"), - ConcatWsTestFail(" ", Array("ABC", "%b%"), "UNICODE_CI") - ) - failCases.foreach(t => { - val arrCollated = t.a.map(s => s"collate('$s', '${t.c}')").mkString(", ") - val query = s"SELECT concat_ws(collate('${t.s}', '${t.c}'), $arrCollated)" - val unsupportedCollation = intercept[AnalysisException] { sql(query) } - assert(unsupportedCollation.getErrorClass === "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE") - }) + // None // Collation mismatch val collationMismatch = intercept[AnalysisException] { sql("SELECT concat_ws(' ',collate('Spark', 'UTF8_BINARY_LCASE'),collate('SQL', 'UNICODE'))") From cb01f0897ed5ddf99d6d4fd8314afa272b7e7967 Mon Sep 17 00:00:00 2001 From: Mihailo Milosevic Date: Mon, 15 Apr 2024 11:34:53 +0200 Subject: [PATCH 07/13] Fix errors and improve code --- python/pyspark/sql/tests/test_types.py | 3 --- python/pyspark/sql/types.py | 4 +++- .../spark/sql/catalyst/analysis/CollationTypeCasts.scala | 4 ++-- .../org/apache/spark/sql/execution/datasources/rules.scala | 4 ++++ 4 files changed, 9 insertions(+), 6 deletions(-) diff --git a/python/pyspark/sql/tests/test_types.py b/python/pyspark/sql/tests/test_types.py index 30fdf2b37422f..af13adbc21bb2 100644 --- a/python/pyspark/sql/tests/test_types.py +++ b/python/pyspark/sql/tests/test_types.py @@ -869,9 +869,6 @@ def test_parse_datatype_string(self): self.assertEqual( StringType.fromCollationId(0), _parse_datatype_string("string COLLATE UTF8_BINARY") ) - self.assertEqual( - StringType.fromCollationId(-1), _parse_datatype_string("string COLLATE INDETERMINATE") - ) self.assertEqual( StringType.fromCollationId(1), _parse_datatype_string("string COLLATE UTF8_BINARY_LCASE"), diff --git a/python/pyspark/sql/types.py b/python/pyspark/sql/types.py index 8f8d01b0ca267..480daa8af4b2b 100644 --- a/python/pyspark/sql/types.py +++ b/python/pyspark/sql/types.py @@ -254,7 +254,7 @@ class StringType(AtomicType): name of the collation, default is UTF8_BINARY. """ - collationNames = ["UTF8_BINARY", "UTF8_BINARY_LCASE", "UNICODE", "UNICODE_CI", "INDETERMINATE"] + collationNames = ["UTF8_BINARY", "UTF8_BINARY_LCASE", "UNICODE", "UNICODE_CI"] def __init__(self, collation: Optional[str] = None): self.collationId = 0 if collation is None else self.collationNameToId(collation) @@ -266,6 +266,8 @@ def fromCollationId(self, collationId: int) -> "StringType": def collationIdToName(self) -> str: if self.collationId == 0: return "" + elif self.collationId == -1: + return " collate INDETERMINATE" else: return " collate %s" % StringType.collationNames[self.collationId] diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CollationTypeCasts.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CollationTypeCasts.scala index d2da40ad0f328..66d2617b38e5c 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CollationTypeCasts.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CollationTypeCasts.scala @@ -22,7 +22,7 @@ import javax.annotation.Nullable import scala.annotation.tailrec import org.apache.spark.sql.catalyst.analysis.TypeCoercion.{hasStringType, haveSameType} -import org.apache.spark.sql.catalyst.expressions.{ArrayJoin, BinaryExpression, CaseWhen, Cast, Coalesce, Collate, Concat, ConcatWs, CreateArray, Expression, Greatest, If, In, InSubquery, Least, SortOrder} +import org.apache.spark.sql.catalyst.expressions.{ArrayJoin, BinaryExpression, CaseWhen, Cast, Coalesce, Collate, Concat, ConcatWs, CreateArray, Expression, Greatest, If, In, InSubquery, Least} import org.apache.spark.sql.catalyst.util.CollationFactory import org.apache.spark.sql.errors.QueryCompilationErrors import org.apache.spark.sql.internal.SQLConf @@ -54,7 +54,7 @@ object CollationTypeCasts extends TypeCoercionRule { case otherExpr @ ( _: In | _: InSubquery | _: CreateArray | _: ArrayJoin | _: Greatest | _: Least | - _: Coalesce | _: BinaryExpression | _: SortOrder) => + _: Coalesce | _: BinaryExpression) => val newChildren = collateToSingleType(otherExpr.children) otherExpr.withNewChildren(newChildren) } diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/rules.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/rules.scala index 2b0e7f15726f4..e5c5f9508c6c1 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/rules.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/rules.scala @@ -632,6 +632,10 @@ object IndeterminateCheck extends (LogicalPlan => Unit) { if (plan.schema.exists(sf => sf.dataType == StringType(-1))) { throw QueryCompilationErrors.indeterminateCollationError() } + case Sort(order, _, child) if child.resolved => + if (order.exists(sf => sf.dataType == StringType(-1))) { + throw QueryCompilationErrors.indeterminateCollationError() + } case _ => () } } From 68a98cccef180caf3bfc14e1835e0f4d18f1c54d Mon Sep 17 00:00:00 2001 From: Mihailo Milosevic Date: Thu, 25 Apr 2024 09:45:17 +0200 Subject: [PATCH 08/13] Fix conflicts --- .../apache/spark/sql/catalyst/analysis/CollationTypeCasts.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CollationTypeCasts.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CollationTypeCasts.scala index 211f92364a15f..ee7d8c7cac2a6 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CollationTypeCasts.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CollationTypeCasts.scala @@ -161,7 +161,7 @@ object CollationTypeCasts extends TypeCoercionRule { StringType(CollationFactory.INDETERMINATE_COLLATION_ID) } } - else if (implicitTypes.exists(_.collationId == -1)) { + else if (implicitTypes.contains(-1)) { if (failOnIndeterminate) { throw QueryCompilationErrors.indeterminateCollationError() } From 471af5a9d55faf53d374fb7eea0af54f875a5f0b Mon Sep 17 00:00:00 2001 From: Mihailo Milosevic Date: Mon, 13 May 2024 14:16:38 +0200 Subject: [PATCH 09/13] Resolve conflicts --- .../spark/sql/catalyst/analysis/CollationTypeCasts.scala | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CollationTypeCasts.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CollationTypeCasts.scala index 0e519b2c33dec..01bb58d9e2109 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CollationTypeCasts.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CollationTypeCasts.scala @@ -79,9 +79,9 @@ object CollationTypeCasts extends TypeCoercionRule { concatExprs.withNewChildren(newChildren) case otherExpr @ ( - _: In | _: InSubquery | _: CreateArray | _: ArrayJoin | _: Greatest | _: Least | - _: Coalesce | _: BinaryExpression | _: Mask | _: StringReplace | - _: StringTranslate | _: StringTrim | _: StringTrimLeft | _: StringTrimRight) => + _: In | _: InSubquery | _: CreateArray | _: ArrayJoin | _: Greatest | _: Least | _: Coalesce | + _: BinaryExpression | _: Mask | _: StringReplace | _: StringTranslate | _: StringTrim | + _: StringTrimLeft | _: StringTrimRight) => val newChildren = collateToSingleType(otherExpr.children) otherExpr.withNewChildren(newChildren) } From a03ca517f8d5a96eda916b8eed66c177262e176d Mon Sep 17 00:00:00 2001 From: Mihailo Milosevic Date: Mon, 13 May 2024 14:29:11 +0200 Subject: [PATCH 10/13] Trigger tests --- .../apache/spark/sql/catalyst/analysis/CollationTypeCasts.scala | 1 + 1 file changed, 1 insertion(+) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CollationTypeCasts.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CollationTypeCasts.scala index 01bb58d9e2109..ce2c97742ace9 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CollationTypeCasts.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CollationTypeCasts.scala @@ -74,6 +74,7 @@ object CollationTypeCasts extends TypeCoercionRule { val Seq(newStr, newPad) = collateToSingleType(Seq(str, pad)) stringPadExpr.withNewChildren(Seq(newStr, len, newPad)) + case concatExprs @ (_: Concat | _: ConcatWs) => val newChildren = collateToSingleType(concatExprs.children, failOnIndeterminate = false) concatExprs.withNewChildren(newChildren) From 2a61b585eeffb09cc49c9cd53746def0c51df47b Mon Sep 17 00:00:00 2001 From: Mihailo Milosevic Date: Mon, 13 May 2024 14:30:06 +0200 Subject: [PATCH 11/13] Remove whitespace --- .../spark/sql/catalyst/analysis/CollationTypeCasts.scala | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CollationTypeCasts.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CollationTypeCasts.scala index ce2c97742ace9..21e040c3bfe6a 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CollationTypeCasts.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CollationTypeCasts.scala @@ -73,8 +73,7 @@ object CollationTypeCasts extends TypeCoercionRule { val Seq(str, len, pad) = stringPadExpr.children val Seq(newStr, newPad) = collateToSingleType(Seq(str, pad)) stringPadExpr.withNewChildren(Seq(newStr, len, newPad)) - - + case concatExprs @ (_: Concat | _: ConcatWs) => val newChildren = collateToSingleType(concatExprs.children, failOnIndeterminate = false) concatExprs.withNewChildren(newChildren) From ad8766a1c9cfc32132dd783848ae2fbf3efa3cb1 Mon Sep 17 00:00:00 2001 From: Mihailo Milosevic Date: Mon, 13 May 2024 14:53:59 +0200 Subject: [PATCH 12/13] Fix whitespace --- .../apache/spark/sql/catalyst/analysis/CollationTypeCasts.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CollationTypeCasts.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CollationTypeCasts.scala index 21e040c3bfe6a..01bb58d9e2109 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CollationTypeCasts.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CollationTypeCasts.scala @@ -73,7 +73,7 @@ object CollationTypeCasts extends TypeCoercionRule { val Seq(str, len, pad) = stringPadExpr.children val Seq(newStr, newPad) = collateToSingleType(Seq(str, pad)) stringPadExpr.withNewChildren(Seq(newStr, len, newPad)) - + case concatExprs @ (_: Concat | _: ConcatWs) => val newChildren = collateToSingleType(concatExprs.children, failOnIndeterminate = false) concatExprs.withNewChildren(newChildren) From 5a10616fa5bc703bb17806114707275418d225d5 Mon Sep 17 00:00:00 2001 From: Mihailo Milosevic Date: Tue, 14 May 2024 08:35:56 +0200 Subject: [PATCH 13/13] Resolve conflicts --- .../org/apache/spark/sql/execution/datasources/rules.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/rules.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/rules.scala index 3994cb21f2412..f53852d74b7f7 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/rules.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/rules.scala @@ -691,7 +691,7 @@ object IndeterminateCheck extends (LogicalPlan => Unit) { if (query.schema.exists(sf => sf.dataType == StringType(-1))) { throw QueryCompilationErrors.indeterminateCollationError() } - case CreateViewCommand(_, _, _, _, _, plan, _, _, _, _, _) if plan.resolved => + case CreateViewCommand(_, _, _, _, _, plan, _, _, _, _, _, _) if plan.resolved => if (plan.schema.exists(sf => sf.dataType == StringType(-1))) { throw QueryCompilationErrors.indeterminateCollationError() }