From 5543b654ef687bf83099cacc9f198bba6735a67d Mon Sep 17 00:00:00 2001 From: Mihailo Milosevic Date: Mon, 25 Mar 2024 10:52:15 +0100 Subject: [PATCH 1/6] Change simpleStringin StringTypeCollated --- .../sql/catalyst/expressions/CollationTypeConstraints.scala | 4 +++- .../src/test/scala/org/apache/spark/sql/CollationSuite.scala | 2 +- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/CollationTypeConstraints.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/CollationTypeConstraints.scala index cd909a45c1ed6..65bcc3faf802d 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/CollationTypeConstraints.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/CollationTypeConstraints.scala @@ -53,6 +53,7 @@ abstract class StringTypeCollated extends AbstractDataType { * Use StringTypeBinary for expressions supporting only binary collation. */ case object StringTypeBinary extends StringTypeCollated { + // TODO: When this AbstractDataType is first used, think of a proper simpleString name override private[sql] def simpleString: String = "string_binary" override private[sql] def acceptsType(other: DataType): Boolean = other.isInstanceOf[StringType] && other.asInstanceOf[StringType].isBinaryCollation @@ -62,6 +63,7 @@ case object StringTypeBinary extends StringTypeCollated { * Use StringTypeBinaryLcase for expressions supporting only binary and lowercase collation. */ case object StringTypeBinaryLcase extends StringTypeCollated { + // TODO: When this AbstractDataType is first used, think of a proper simpleString name override private[sql] def simpleString: String = "string_binary_lcase" override private[sql] def acceptsType(other: DataType): Boolean = other.isInstanceOf[StringType] && (other.asInstanceOf[StringType].isBinaryCollation || @@ -72,6 +74,6 @@ case object StringTypeBinaryLcase extends StringTypeCollated { * Use StringTypeAnyCollation for expressions supporting all possible collation types. */ case object StringTypeAnyCollation extends StringTypeCollated { - override private[sql] def simpleString: String = "string_any_collation" + override private[sql] def simpleString: String = "string" override private[sql] def acceptsType(other: DataType): Boolean = other.isInstanceOf[StringType] } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/CollationSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/CollationSuite.scala index 146ba63cf402a..0490caee424eb 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/CollationSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/CollationSuite.scala @@ -122,7 +122,7 @@ class CollationSuite extends DatasourceV2SQLBase with AdaptiveSparkPlanHelper { "paramIndex" -> "first", "inputSql" -> "\"1\"", "inputType" -> "\"INT\"", - "requiredType" -> "\"STRING_ANY_COLLATION\""), + "requiredType" -> "\"STRING\""), context = ExpectedContext( fragment = s"collate(1, 'UTF8_BINARY')", start = 7, stop = 31)) } From 8a11d6039fb5795437ae46a772e5de75c7d0bc48 Mon Sep 17 00:00:00 2001 From: Mihailo Milosevic Date: Tue, 26 Mar 2024 07:39:10 +0100 Subject: [PATCH 2/6] Add ticket to TODOs --- .../sql/catalyst/expressions/CollationTypeConstraints.scala | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/CollationTypeConstraints.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/CollationTypeConstraints.scala index 65bcc3faf802d..a3fec1150eed4 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/CollationTypeConstraints.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/CollationTypeConstraints.scala @@ -53,7 +53,7 @@ abstract class StringTypeCollated extends AbstractDataType { * Use StringTypeBinary for expressions supporting only binary collation. */ case object StringTypeBinary extends StringTypeCollated { - // TODO: When this AbstractDataType is first used, think of a proper simpleString name + // TODO (SPARK-47504): Rename this AbstractDataType's simpleString when it is first used override private[sql] def simpleString: String = "string_binary" override private[sql] def acceptsType(other: DataType): Boolean = other.isInstanceOf[StringType] && other.asInstanceOf[StringType].isBinaryCollation @@ -63,7 +63,7 @@ case object StringTypeBinary extends StringTypeCollated { * Use StringTypeBinaryLcase for expressions supporting only binary and lowercase collation. */ case object StringTypeBinaryLcase extends StringTypeCollated { - // TODO: When this AbstractDataType is first used, think of a proper simpleString name + // TODO (SPARK-47504): Rename this AbstractDataType's simpleString when it is first used override private[sql] def simpleString: String = "string_binary_lcase" override private[sql] def acceptsType(other: DataType): Boolean = other.isInstanceOf[StringType] && (other.asInstanceOf[StringType].isBinaryCollation || From bdd08d6be3b6a575687db24d974aaeb3057f942b Mon Sep 17 00:00:00 2001 From: Mihailo Milosevic Date: Fri, 5 Apr 2024 09:37:55 +0200 Subject: [PATCH 3/6] Fix tests that use STRING_ANY_COLLATION --- .../spark/sql/catalyst/expressions/StringExpressionsSuite.scala | 2 +- .../scala/org/apache/spark/sql/DataFrameFunctionsSuite.scala | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/StringExpressionsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/StringExpressionsSuite.scala index cda9676ca58b5..1fbd1ac9a29fd 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/StringExpressionsSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/StringExpressionsSuite.scala @@ -70,7 +70,7 @@ class StringExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper { errorSubClass = "UNEXPECTED_INPUT_TYPE", messageParameters = Map( "paramIndex" -> ordinalNumber(0), - "requiredType" -> "(\"STRING_ANY_COLLATION\" or \"BINARY\" or \"ARRAY\")", + "requiredType" -> "(\"STRING\" or \"BINARY\" or \"ARRAY\")", "inputSql" -> "\"1\"", "inputType" -> "\"INT\"" ) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameFunctionsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameFunctionsSuite.scala index 450c8dc2dc394..e42f397cbfc29 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameFunctionsSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameFunctionsSuite.scala @@ -2552,7 +2552,7 @@ class DataFrameFunctionsSuite extends QueryTest with SharedSparkSession { parameters = Map( "sqlExpr" -> "\"concat(map(1, 2), map(3, 4))\"", "paramIndex" -> "first", - "requiredType" -> "(\"STRING_ANY_COLLATION\" or \"BINARY\" or \"ARRAY\")", + "requiredType" -> "(\"STRING\" or \"BINARY\" or \"ARRAY\")", "inputSql" -> "\"map(1, 2)\"", "inputType" -> "\"MAP\"" ), From 97b54fdc488bd9a70b5069978490d75ea83b8062 Mon Sep 17 00:00:00 2001 From: Mihailo Milosevic Date: Mon, 8 Apr 2024 08:33:13 +0200 Subject: [PATCH 4/6] Remove TODOs --- .../spark/sql/catalyst/expressions/StringTypeCollated.scala | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/StringTypeCollated.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/StringTypeCollated.scala index adc9c5b504187..3c59b87fe566f 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/StringTypeCollated.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/StringTypeCollated.scala @@ -30,8 +30,7 @@ abstract class StringTypeCollated extends AbstractDataType { * Use StringTypeBinary for expressions supporting only binary collation. */ case object StringTypeBinary extends StringTypeCollated { - // TODO (SPARK-47504): Rename this AbstractDataType's simpleString when it is first used - override private[sql] def simpleString: String = "string_binary" + override private[sql] def simpleString: String = "string" override private[sql] def acceptsType(other: DataType): Boolean = other.isInstanceOf[StringType] && other.asInstanceOf[StringType].supportsBinaryEquality } @@ -40,8 +39,7 @@ case object StringTypeBinary extends StringTypeCollated { * Use StringTypeBinaryLcase for expressions supporting only binary and lowercase collation. */ case object StringTypeBinaryLcase extends StringTypeCollated { - // TODO (SPARK-47504): Rename this AbstractDataType's simpleString when it is first used - override private[sql] def simpleString: String = "string_binary_lcase" + override private[sql] def simpleString: String = "string" override private[sql] def acceptsType(other: DataType): Boolean = other.isInstanceOf[StringType] && (other.asInstanceOf[StringType].supportsBinaryEquality || other.asInstanceOf[StringType].isUTF8BinaryLcaseCollation) From 2972bb4ccf2159f5663fa397e7d3b6d3b1af599e Mon Sep 17 00:00:00 2001 From: Mihailo Milosevic Date: Mon, 8 Apr 2024 08:34:07 +0200 Subject: [PATCH 5/6] Refactor code --- .../spark/sql/catalyst/expressions/StringTypeCollated.scala | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/StringTypeCollated.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/StringTypeCollated.scala index 3c59b87fe566f..67b65859e6bbb 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/StringTypeCollated.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/StringTypeCollated.scala @@ -24,13 +24,13 @@ import org.apache.spark.sql.types.{AbstractDataType, DataType, StringType} */ abstract class StringTypeCollated extends AbstractDataType { override private[sql] def defaultConcreteType: DataType = StringType + override private[sql] def simpleString: String = "string" } /** * Use StringTypeBinary for expressions supporting only binary collation. */ case object StringTypeBinary extends StringTypeCollated { - override private[sql] def simpleString: String = "string" override private[sql] def acceptsType(other: DataType): Boolean = other.isInstanceOf[StringType] && other.asInstanceOf[StringType].supportsBinaryEquality } @@ -39,7 +39,6 @@ case object StringTypeBinary extends StringTypeCollated { * Use StringTypeBinaryLcase for expressions supporting only binary and lowercase collation. */ case object StringTypeBinaryLcase extends StringTypeCollated { - override private[sql] def simpleString: String = "string" override private[sql] def acceptsType(other: DataType): Boolean = other.isInstanceOf[StringType] && (other.asInstanceOf[StringType].supportsBinaryEquality || other.asInstanceOf[StringType].isUTF8BinaryLcaseCollation) @@ -49,6 +48,5 @@ case object StringTypeBinaryLcase extends StringTypeCollated { * Use StringTypeAnyCollation for expressions supporting all possible collation types. */ case object StringTypeAnyCollation extends StringTypeCollated { - override private[sql] def simpleString: String = "string" override private[sql] def acceptsType(other: DataType): Boolean = other.isInstanceOf[StringType] } From 96c33822d95162b73019752d56d12e14e0ec7a48 Mon Sep 17 00:00:00 2001 From: Mihailo Milosevic Date: Mon, 8 Apr 2024 12:35:19 +0200 Subject: [PATCH 6/6] Fix failing tests --- .../org/apache/spark/sql/DataFrameFunctionsSuite.scala | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameFunctionsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameFunctionsSuite.scala index ae556daf14c28..e42f397cbfc29 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameFunctionsSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameFunctionsSuite.scala @@ -1713,7 +1713,7 @@ class DataFrameFunctionsSuite extends QueryTest with SharedSparkSession { "paramIndex" -> "second", "inputSql" -> "\"1\"", "inputType" -> "\"INT\"", - "requiredType" -> "\"STRING_ANY_COLLATION\"" + "requiredType" -> "\"STRING\"" ), queryContext = Array(ExpectedContext("", "", 0, 15, "array_join(x, 1)")) ) @@ -1727,7 +1727,7 @@ class DataFrameFunctionsSuite extends QueryTest with SharedSparkSession { "paramIndex" -> "third", "inputSql" -> "\"1\"", "inputType" -> "\"INT\"", - "requiredType" -> "\"STRING_ANY_COLLATION\"" + "requiredType" -> "\"STRING\"" ), queryContext = Array(ExpectedContext("", "", 0, 21, "array_join(x, ', ', 1)")) ) @@ -1987,7 +1987,7 @@ class DataFrameFunctionsSuite extends QueryTest with SharedSparkSession { "paramIndex" -> "first", "inputSql" -> "\"struct(1, a)\"", "inputType" -> "\"STRUCT\"", - "requiredType" -> "(\"STRING_ANY_COLLATION\" or \"ARRAY\")" + "requiredType" -> "(\"STRING\" or \"ARRAY\")" ), queryContext = Array(ExpectedContext("", "", 7, 29, "reverse(struct(1, 'a'))")) ) @@ -2002,7 +2002,7 @@ class DataFrameFunctionsSuite extends QueryTest with SharedSparkSession { "paramIndex" -> "first", "inputSql" -> "\"map(1, a)\"", "inputType" -> "\"MAP\"", - "requiredType" -> "(\"STRING_ANY_COLLATION\" or \"ARRAY\")" + "requiredType" -> "(\"STRING\" or \"ARRAY\")" ), queryContext = Array(ExpectedContext("", "", 7, 26, "reverse(map(1, 'a'))")) )