From 7ec631b28aa670db2c502fe33540d0b79844e6e7 Mon Sep 17 00:00:00 2001 From: Bruce Robbins Date: Tue, 30 Aug 2022 16:50:02 -0700 Subject: [PATCH 1/3] change and tests --- .../expressions/complexTypeCreator.scala | 6 +-- .../expressions/ComplexTypeSuite.scala | 10 ++--- .../spark/sql/StringFunctionsSuite.scala | 44 +++++++++++++++++++ 3 files changed, 49 insertions(+), 11 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeCreator.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeCreator.scala index cdeb27d0c289..ca671ae09049 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeCreator.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeCreator.scala @@ -548,11 +548,7 @@ case class StringToMap(text: Expression, pairDelim: Expression, keyValueDelim: E override def dataType: DataType = MapType(StringType, StringType) override def checkInputDataTypes(): TypeCheckResult = { - if (Seq(pairDelim, keyValueDelim).exists(! _.foldable)) { - TypeCheckResult.TypeCheckFailure(s"$prettyName's delimiters must be foldable.") - } else { - super.checkInputDataTypes() - } + super.checkInputDataTypes() } private lazy val mapBuilder = new ArrayBasedMapBuilder(StringType, StringType) diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ComplexTypeSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ComplexTypeSuite.scala index 7b482dec7e22..e3afd6a3bb3c 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ComplexTypeSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ComplexTypeSuite.scala @@ -476,6 +476,10 @@ class ComplexTypeSuite extends SparkFunSuite with ExpressionEvalHelper { val m5 = Map("a" -> null) checkEvaluation(new StringToMap(s5), m5) + val s6 = Literal("a=1&b=2&c=3") + val m6 = Map("a" -> "1", "b" -> "2", "c" -> "3") + checkEvaluation(new StringToMap(s6, NonFoldableLiteral("&"), NonFoldableLiteral("=")), m6) + checkExceptionInExpression[RuntimeException]( new StringToMap(Literal("a:1,b:2,a:3")), "Duplicate map key") withSQLConf(SQLConf.MAP_KEY_DEDUP_POLICY.key -> SQLConf.MapKeyDedupPolicy.LAST_WIN.toString) { @@ -492,12 +496,6 @@ class ComplexTypeSuite extends SparkFunSuite with ExpressionEvalHelper { assert(StringToMap(Literal("a:1,b:2,c:3"), Literal(null), Literal(null)) .checkInputDataTypes().isFailure) assert(new StringToMap(Literal(null), Literal(null)).checkInputDataTypes().isFailure) - - assert(new StringToMap(Literal("a:1_b:2_c:3"), NonFoldableLiteral("_")) - .checkInputDataTypes().isFailure) - assert( - new StringToMap(Literal("a=1_b=2_c=3"), Literal("_"), NonFoldableLiteral("=")) - .checkInputDataTypes().isFailure) } test("SPARK-22693: CreateNamedStruct should not use global variables") { diff --git a/sql/core/src/test/scala/org/apache/spark/sql/StringFunctionsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/StringFunctionsSuite.scala index d07be9c19714..f05d4481b011 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/StringFunctionsSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/StringFunctionsSuite.scala @@ -606,6 +606,50 @@ class StringFunctionsSuite extends QueryTest with SharedSparkSession { Seq(Row(Map("a" -> "1", "b" -> "2", "c" -> "3"))) ) + checkAnswer( + df2.selectExpr("str_to_map(a, ',')"), + Seq(Row(Map("a" -> "1", "b" -> "2", "c" -> "3"))) + ) + + val df3 = Seq( + ("a=1&b=2", "&", "=", 0), + ("k#2%v#3", "%", "#", 1) + ).toDF("str", "delim1", "delim2", "flag") + + checkAnswer( + df3.selectExpr("str_to_map(str, delim1, delim2)"), + Seq( + Row(Map("a" -> "1", "b" -> "2")), + Row(Map("k" -> "2", "v" -> "3")) + ) + ) + + val selectExpr = + """str_to_map(str, + |if(flag = 0, '&', '%'), + |if(flag = 0, '=', '#')) + |""".stripMargin + + checkAnswer( + df3.selectExpr(selectExpr), + Seq( + Row(Map("a" -> "1", "b" -> "2")), + Row(Map("k" -> "2", "v" -> "3")) + ) + ) + + val df4 = Seq( + ("a:1&b:2", "&"), + ("k:2%v:3", "%") + ).toDF("str", "delim1") + + checkAnswer( + df4.selectExpr("str_to_map(str, delim1)"), + Seq( + Row(Map("a" -> "1", "b" -> "2")), + Row(Map("k" -> "2", "v" -> "3")) + ) + ) } test("SPARK-36148: check input data types of regexp_replace") { From bbc4f3547e419636a841b4eab4461922be53c02a Mon Sep 17 00:00:00 2001 From: Bruce Robbins Date: Wed, 31 Aug 2022 16:55:14 -0700 Subject: [PATCH 2/3] Simplify --- .../apache/spark/sql/StringFunctionsSuite.scala | 14 -------------- 1 file changed, 14 deletions(-) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/StringFunctionsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/StringFunctionsSuite.scala index f05d4481b011..fbe1cc2ffe18 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/StringFunctionsSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/StringFunctionsSuite.scala @@ -624,20 +624,6 @@ class StringFunctionsSuite extends QueryTest with SharedSparkSession { ) ) - val selectExpr = - """str_to_map(str, - |if(flag = 0, '&', '%'), - |if(flag = 0, '=', '#')) - |""".stripMargin - - checkAnswer( - df3.selectExpr(selectExpr), - Seq( - Row(Map("a" -> "1", "b" -> "2")), - Row(Map("k" -> "2", "v" -> "3")) - ) - ) - val df4 = Seq( ("a:1&b:2", "&"), ("k:2%v:3", "%") From da92fa0aa18e6e48f93b686e4a4e52bf83488b55 Mon Sep 17 00:00:00 2001 From: Bruce Robbins Date: Sat, 3 Sep 2022 08:59:11 -0700 Subject: [PATCH 3/3] Review feedback; Remove unused column in test --- .../spark/sql/catalyst/expressions/complexTypeCreator.scala | 4 ---- .../scala/org/apache/spark/sql/StringFunctionsSuite.scala | 6 +++--- 2 files changed, 3 insertions(+), 7 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeCreator.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeCreator.scala index ca671ae09049..3259b59b34c1 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeCreator.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeCreator.scala @@ -547,10 +547,6 @@ case class StringToMap(text: Expression, pairDelim: Expression, keyValueDelim: E override def dataType: DataType = MapType(StringType, StringType) - override def checkInputDataTypes(): TypeCheckResult = { - super.checkInputDataTypes() - } - private lazy val mapBuilder = new ArrayBasedMapBuilder(StringType, StringType) override def nullSafeEval( diff --git a/sql/core/src/test/scala/org/apache/spark/sql/StringFunctionsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/StringFunctionsSuite.scala index fbe1cc2ffe18..082358b744ac 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/StringFunctionsSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/StringFunctionsSuite.scala @@ -612,9 +612,9 @@ class StringFunctionsSuite extends QueryTest with SharedSparkSession { ) val df3 = Seq( - ("a=1&b=2", "&", "=", 0), - ("k#2%v#3", "%", "#", 1) - ).toDF("str", "delim1", "delim2", "flag") + ("a=1&b=2", "&", "="), + ("k#2%v#3", "%", "#") + ).toDF("str", "delim1", "delim2") checkAnswer( df3.selectExpr("str_to_map(str, delim1, delim2)"),