From a435cf38e8541a45413f8c308e83be8851e63683 Mon Sep 17 00:00:00 2001 From: Angerszhuuuu Date: Fri, 14 Jan 2022 14:08:47 +0800 Subject: [PATCH 01/11] [SPARK-37906][SQL] spark-sql should not pass last simple comment to backend --- .../spark/sql/hive/thriftserver/SparkSQLCLIDriver.scala | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLCLIDriver.scala b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLCLIDriver.scala index e17b74873395e..770e8183b694a 100644 --- a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLCLIDriver.scala +++ b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLCLIDriver.scala @@ -613,7 +613,9 @@ private[hive] class SparkSQLCLIDriver extends CliDriver with Logging { isStatement = statementInProgress(index) } - if (beginIndex < line.length()) { + // Check the last char is end of nested bracketed comment. + val endOfBracketedComment = leavingBracketedComment && bracketedCommentLevel == 1 + if (!insideSimpleComment && !endOfBracketedComment && beginIndex < line.length()) { ret.add(line.substring(beginIndex)) } ret From f0723e7d9106fe33173c6d37c060d56d76021911 Mon Sep 17 00:00:00 2001 From: Angerszhuuuu Date: Fri, 14 Jan 2022 16:59:06 +0800 Subject: [PATCH 02/11] Update SparkSQLCLIDriver.scala --- .../apache/spark/sql/hive/thriftserver/SparkSQLCLIDriver.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLCLIDriver.scala b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLCLIDriver.scala index 770e8183b694a..4794f6975fe15 100644 --- a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLCLIDriver.scala +++ b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLCLIDriver.scala @@ -615,7 +615,7 @@ private[hive] class SparkSQLCLIDriver extends CliDriver with Logging { } // Check the last char is end of nested bracketed comment. val endOfBracketedComment = leavingBracketedComment && bracketedCommentLevel == 1 - if (!insideSimpleComment && !endOfBracketedComment && beginIndex < line.length()) { + if (!endOfBracketedComment && (isStatement || insideBracketedComment)) { ret.add(line.substring(beginIndex)) } ret From 1b8de35d85ec5dee4ba05aea21b92ef48c2e9c37 Mon Sep 17 00:00:00 2001 From: Angerszhuuuu Date: Sat, 15 Jan 2022 09:37:53 +0800 Subject: [PATCH 03/11] add UT --- .../hive/thriftserver/SparkSQLCLIDriver.scala | 12 +- .../sql/hive/thriftserver/CliSuite.scala | 131 +++++++++++++++++- 2 files changed, 137 insertions(+), 6 deletions(-) diff --git a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLCLIDriver.scala b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLCLIDriver.scala index 4794f6975fe15..cca07562bcde8 100644 --- a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLCLIDriver.scala +++ b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLCLIDriver.scala @@ -48,7 +48,7 @@ import org.apache.spark.sql.hive.HiveUtils import org.apache.spark.sql.hive.client.HiveClientImpl import org.apache.spark.sql.hive.security.HiveDelegationTokenProvider import org.apache.spark.sql.internal.SharedState -import org.apache.spark.util.ShutdownHookManager +import org.apache.spark.util.{ShutdownHookManager, Utils} /** * This code doesn't support remote connections in Hive 1.2+, as the underlying CliDriver @@ -324,9 +324,11 @@ private[hive] class SparkSQLCLIDriver extends CliDriver with Logging { // Force initializing SparkSQLEnv. This is put here but not object SparkSQLCliDriver // because the Hive unit tests do not go through the main() code path. if (!isRemoteMode) { - SparkSQLEnv.init() - if (sessionState.getIsSilent) { - SparkSQLEnv.sparkContext.setLogLevel("warn") + if (!Utils.isTesting) { + SparkSQLEnv.init() + if (sessionState.getIsSilent) { + SparkSQLEnv.sparkContext.setLogLevel("warn") + } } } else { // Hive 1.2 + not supported in CLI @@ -527,7 +529,7 @@ private[hive] class SparkSQLCLIDriver extends CliDriver with Logging { // string, the origin implementation from Hive will not drop the trailing semicolon as expected, // hence we refined this function a little bit. // Note: [SPARK-33100] Ignore a semicolon inside a bracketed comment in spark-sql. - private def splitSemiColon(line: String): JList[String] = { + private[hive] def splitSemiColon(line: String): JList[String] = { var insideSingleQuote = false var insideDoubleQuote = false var insideSimpleComment = false diff --git a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/CliSuite.scala b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/CliSuite.scala index 234fb89b01a83..0beed7ad98b20 100644 --- a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/CliSuite.scala +++ b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/CliSuite.scala @@ -20,19 +20,26 @@ package org.apache.spark.sql.hive.thriftserver import java.io._ import java.nio.charset.StandardCharsets import java.sql.Timestamp +import java.util.{ArrayList => JArrayList, List => JList} import java.util.Date +import scala.collection.JavaConverters._ import scala.collection.mutable.ArrayBuffer import scala.concurrent.Promise import scala.concurrent.duration._ +import org.apache.hadoop.hive.cli.CliSessionState import org.apache.hadoop.hive.conf.HiveConf.ConfVars +import org.apache.hadoop.hive.ql.session.SessionState import org.scalatest.BeforeAndAfterAll +import org.apache.spark.{SparkConf, SparkFunSuite} import org.apache.spark.ProcessTestUtils.ProcessOutputCapturer -import org.apache.spark.SparkFunSuite +import org.apache.spark.deploy.SparkHadoopUtil import org.apache.spark.internal.Logging +import org.apache.spark.sql.hive.HiveUtils import org.apache.spark.sql.hive.HiveUtils._ +import org.apache.spark.sql.hive.client.HiveClientImpl import org.apache.spark.sql.hive.test.HiveTestJars import org.apache.spark.sql.internal.StaticSQLConf import org.apache.spark.util.{ThreadUtils, Utils} @@ -638,4 +645,126 @@ class CliSuite extends SparkFunSuite with BeforeAndAfterAll with Logging { runCliWithin(2.minute, errorResponses = Seq("ParseException"))( "delete jar dummy.jar;" -> "missing 'FROM' at 'jar'(line 1, pos 7)") } + + private def splitSemiColon(line: String): JList[String] = { + var insideSingleQuote = false + var insideDoubleQuote = false + var insideSimpleComment = false + var bracketedCommentLevel = 0 + var escape = false + var beginIndex = 0 + var leavingBracketedComment = false + var isStatement = false + val ret = new JArrayList[String] + + def insideBracketedComment: Boolean = bracketedCommentLevel > 0 + def insideComment: Boolean = insideSimpleComment || insideBracketedComment + def statementInProgress(index: Int): Boolean = isStatement || (!insideComment && + index > beginIndex && !s"${line.charAt(index)}".trim.isEmpty) + + for (index <- 0 until line.length) { + // Checks if we need to decrement a bracketed comment level; the last character '/' of + // bracketed comments is still inside the comment, so `insideBracketedComment` must keep true + // in the previous loop and we decrement the level here if needed. + if (leavingBracketedComment) { + bracketedCommentLevel -= 1 + leavingBracketedComment = false + } + + if (line.charAt(index) == '\'' && !insideComment) { + // take a look to see if it is escaped + // See the comment above about SPARK-31595 + if (!escape && !insideDoubleQuote) { + // flip the boolean variable + insideSingleQuote = !insideSingleQuote + } + } else if (line.charAt(index) == '\"' && !insideComment) { + // take a look to see if it is escaped + // See the comment above about SPARK-31595 + if (!escape && !insideSingleQuote) { + // flip the boolean variable + insideDoubleQuote = !insideDoubleQuote + } + } else if (line.charAt(index) == '-') { + val hasNext = index + 1 < line.length + if (insideDoubleQuote || insideSingleQuote || insideComment) { + // Ignores '-' in any case of quotes or comment. + // Avoids to start a comment(--) within a quoted segment or already in a comment. + // Sample query: select "quoted value --" + // ^^ avoids starting a comment if it's inside quotes. + } else if (hasNext && line.charAt(index + 1) == '-') { + // ignore quotes and ; in simple comment + insideSimpleComment = true + } + } else if (line.charAt(index) == ';') { + if (insideSingleQuote || insideDoubleQuote || insideComment) { + // do not split + } else { + if (isStatement) { + // split, do not include ; itself + ret.add(line.substring(beginIndex, index)) + } + beginIndex = index + 1 + isStatement = false + } + } else if (line.charAt(index) == '\n') { + // with a new line the inline simple comment should end. + if (!escape) { + insideSimpleComment = false + } + } else if (line.charAt(index) == '/' && !insideSimpleComment) { + val hasNext = index + 1 < line.length + if (insideSingleQuote || insideDoubleQuote) { + // Ignores '/' in any case of quotes + } else if (insideBracketedComment && line.charAt(index - 1) == '*' ) { + // Decrements `bracketedCommentLevel` at the beginning of the next loop + leavingBracketedComment = true + } else if (hasNext && line.charAt(index + 1) == '*') { + bracketedCommentLevel += 1 + } + } + // set the escape + if (escape) { + escape = false + } else if (line.charAt(index) == '\\') { + escape = true + } + + isStatement = statementInProgress(index) + } + // Check the last char is end of nested bracketed comment. + val endOfBracketedComment = leavingBracketedComment && bracketedCommentLevel == 1 + if (!endOfBracketedComment && (isStatement || insideBracketedComment)) { + ret.add(line.substring(beginIndex)) + } + ret + } + + test("SPARK-37906: Spark SQL CLI should not pass final comment") { + val sparkConf = new SparkConf(loadDefaults = true) + val hadoopConf = SparkHadoopUtil.get.newConfiguration(sparkConf) + val extraConfigs = HiveUtils.formatTimeVarsForHiveClient(hadoopConf) + + val cliConf = HiveClientImpl.newHiveConf(sparkConf, hadoopConf, extraConfigs) + + val sessionState = new CliSessionState(cliConf) + SessionState.setCurrentSessionState(sessionState) + val cli = new SparkSQLCLIDriver + Seq("SELECT 1; --comment" -> Seq("SELECT 1"), + "SELECT 1; /* comment */" -> Seq("SELECT 1"), + "SELECT 1; /* comment" -> Seq("SELECT 1", " /* comment"), + "SELECT 1; /* comment select 1;" -> Seq("SELECT 1", " /* comment select 1;"), + "/* This is a comment without end symbol SELECT 1;" -> + Seq("/* This is a comment without end symbol SELECT 1;"), + "SELECT 1; --comment\n" -> Seq("SELECT 1"), + "SELECT 1; /* comment */\n" -> Seq("SELECT 1"), + "SELECT 1; /* comment\n" -> Seq("SELECT 1", " /* comment\n"), + "SELECT 1; /* comment select 1;\n" -> Seq("SELECT 1", " /* comment select 1;\n"), + "/* This is a comment without end symbol SELECT 1;\n" -> + Seq("/* This is a comment without end symbol SELECT 1;\n") + ).foreach { case (query, ret) => + assert(cli.splitSemiColon(query).asScala === ret) + } + sessionState.close() + } } From f424afc875065a64ee71318d2ca687d12dc008bc Mon Sep 17 00:00:00 2001 From: Angerszhuuuu Date: Mon, 17 Jan 2022 10:20:11 +0800 Subject: [PATCH 04/11] update --- .../sql/hive/thriftserver/SparkSQLCLIDriver.scala | 10 ++++------ .../apache/spark/sql/hive/thriftserver/CliSuite.scala | 9 ++++++--- 2 files changed, 10 insertions(+), 9 deletions(-) diff --git a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLCLIDriver.scala b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLCLIDriver.scala index cca07562bcde8..3817b19db6d46 100644 --- a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLCLIDriver.scala +++ b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLCLIDriver.scala @@ -48,7 +48,7 @@ import org.apache.spark.sql.hive.HiveUtils import org.apache.spark.sql.hive.client.HiveClientImpl import org.apache.spark.sql.hive.security.HiveDelegationTokenProvider import org.apache.spark.sql.internal.SharedState -import org.apache.spark.util.{ShutdownHookManager, Utils} +import org.apache.spark.util.ShutdownHookManager /** * This code doesn't support remote connections in Hive 1.2+, as the underlying CliDriver @@ -324,11 +324,9 @@ private[hive] class SparkSQLCLIDriver extends CliDriver with Logging { // Force initializing SparkSQLEnv. This is put here but not object SparkSQLCliDriver // because the Hive unit tests do not go through the main() code path. if (!isRemoteMode) { - if (!Utils.isTesting) { - SparkSQLEnv.init() - if (sessionState.getIsSilent) { - SparkSQLEnv.sparkContext.setLogLevel("warn") - } + SparkSQLEnv.init() + if (sessionState.getIsSilent) { + SparkSQLEnv.sparkContext.setLogLevel("warn") } } else { // Hive 1.2 + not supported in CLI diff --git a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/CliSuite.scala b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/CliSuite.scala index 0beed7ad98b20..8ffe568a09fee 100644 --- a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/CliSuite.scala +++ b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/CliSuite.scala @@ -33,7 +33,7 @@ import org.apache.hadoop.hive.conf.HiveConf.ConfVars import org.apache.hadoop.hive.ql.session.SessionState import org.scalatest.BeforeAndAfterAll -import org.apache.spark.{SparkConf, SparkFunSuite} +import org.apache.spark.{SparkConf, SparkContext, SparkFunSuite} import org.apache.spark.ProcessTestUtils.ProcessOutputCapturer import org.apache.spark.deploy.SparkHadoopUtil import org.apache.spark.internal.Logging @@ -742,11 +742,13 @@ class CliSuite extends SparkFunSuite with BeforeAndAfterAll with Logging { test("SPARK-37906: Spark SQL CLI should not pass final comment") { val sparkConf = new SparkConf(loadDefaults = true) + .setMaster("local-cluster[1,1,1024]") + .setAppName("SPARK-37906") + val sparkContext = new SparkContext(sparkConf) + SparkSQLEnv.sparkContext = sparkContext val hadoopConf = SparkHadoopUtil.get.newConfiguration(sparkConf) val extraConfigs = HiveUtils.formatTimeVarsForHiveClient(hadoopConf) - val cliConf = HiveClientImpl.newHiveConf(sparkConf, hadoopConf, extraConfigs) - val sessionState = new CliSessionState(cliConf) SessionState.setCurrentSessionState(sessionState) val cli = new SparkSQLCLIDriver @@ -766,5 +768,6 @@ class CliSuite extends SparkFunSuite with BeforeAndAfterAll with Logging { assert(cli.splitSemiColon(query).asScala === ret) } sessionState.close() + SparkSQLEnv.stop() } } From 8028622826f5d9ee1c5de6d86a71368c7620c3ea Mon Sep 17 00:00:00 2001 From: Angerszhuuuu Date: Mon, 17 Jan 2022 11:10:36 +0800 Subject: [PATCH 05/11] Update CliSuite.scala --- .../sql/hive/thriftserver/CliSuite.scala | 94 ------------------- 1 file changed, 94 deletions(-) diff --git a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/CliSuite.scala b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/CliSuite.scala index 8ffe568a09fee..dacea5d738df5 100644 --- a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/CliSuite.scala +++ b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/CliSuite.scala @@ -646,100 +646,6 @@ class CliSuite extends SparkFunSuite with BeforeAndAfterAll with Logging { "delete jar dummy.jar;" -> "missing 'FROM' at 'jar'(line 1, pos 7)") } - private def splitSemiColon(line: String): JList[String] = { - var insideSingleQuote = false - var insideDoubleQuote = false - var insideSimpleComment = false - var bracketedCommentLevel = 0 - var escape = false - var beginIndex = 0 - var leavingBracketedComment = false - var isStatement = false - val ret = new JArrayList[String] - - def insideBracketedComment: Boolean = bracketedCommentLevel > 0 - def insideComment: Boolean = insideSimpleComment || insideBracketedComment - def statementInProgress(index: Int): Boolean = isStatement || (!insideComment && - index > beginIndex && !s"${line.charAt(index)}".trim.isEmpty) - - for (index <- 0 until line.length) { - // Checks if we need to decrement a bracketed comment level; the last character '/' of - // bracketed comments is still inside the comment, so `insideBracketedComment` must keep true - // in the previous loop and we decrement the level here if needed. - if (leavingBracketedComment) { - bracketedCommentLevel -= 1 - leavingBracketedComment = false - } - - if (line.charAt(index) == '\'' && !insideComment) { - // take a look to see if it is escaped - // See the comment above about SPARK-31595 - if (!escape && !insideDoubleQuote) { - // flip the boolean variable - insideSingleQuote = !insideSingleQuote - } - } else if (line.charAt(index) == '\"' && !insideComment) { - // take a look to see if it is escaped - // See the comment above about SPARK-31595 - if (!escape && !insideSingleQuote) { - // flip the boolean variable - insideDoubleQuote = !insideDoubleQuote - } - } else if (line.charAt(index) == '-') { - val hasNext = index + 1 < line.length - if (insideDoubleQuote || insideSingleQuote || insideComment) { - // Ignores '-' in any case of quotes or comment. - // Avoids to start a comment(--) within a quoted segment or already in a comment. - // Sample query: select "quoted value --" - // ^^ avoids starting a comment if it's inside quotes. - } else if (hasNext && line.charAt(index + 1) == '-') { - // ignore quotes and ; in simple comment - insideSimpleComment = true - } - } else if (line.charAt(index) == ';') { - if (insideSingleQuote || insideDoubleQuote || insideComment) { - // do not split - } else { - if (isStatement) { - // split, do not include ; itself - ret.add(line.substring(beginIndex, index)) - } - beginIndex = index + 1 - isStatement = false - } - } else if (line.charAt(index) == '\n') { - // with a new line the inline simple comment should end. - if (!escape) { - insideSimpleComment = false - } - } else if (line.charAt(index) == '/' && !insideSimpleComment) { - val hasNext = index + 1 < line.length - if (insideSingleQuote || insideDoubleQuote) { - // Ignores '/' in any case of quotes - } else if (insideBracketedComment && line.charAt(index - 1) == '*' ) { - // Decrements `bracketedCommentLevel` at the beginning of the next loop - leavingBracketedComment = true - } else if (hasNext && line.charAt(index + 1) == '*') { - bracketedCommentLevel += 1 - } - } - // set the escape - if (escape) { - escape = false - } else if (line.charAt(index) == '\\') { - escape = true - } - - isStatement = statementInProgress(index) - } - // Check the last char is end of nested bracketed comment. - val endOfBracketedComment = leavingBracketedComment && bracketedCommentLevel == 1 - if (!endOfBracketedComment && (isStatement || insideBracketedComment)) { - ret.add(line.substring(beginIndex)) - } - ret - } - test("SPARK-37906: Spark SQL CLI should not pass final comment") { val sparkConf = new SparkConf(loadDefaults = true) .setMaster("local-cluster[1,1,1024]") From 47d34df76843d2361fc52b850eb1bcd114fd69b8 Mon Sep 17 00:00:00 2001 From: Angerszhuuuu Date: Mon, 17 Jan 2022 14:05:57 +0800 Subject: [PATCH 06/11] Update CliSuite.scala --- .../scala/org/apache/spark/sql/hive/thriftserver/CliSuite.scala | 1 - 1 file changed, 1 deletion(-) diff --git a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/CliSuite.scala b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/CliSuite.scala index dacea5d738df5..c5ebc0e3e9344 100644 --- a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/CliSuite.scala +++ b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/CliSuite.scala @@ -20,7 +20,6 @@ package org.apache.spark.sql.hive.thriftserver import java.io._ import java.nio.charset.StandardCharsets import java.sql.Timestamp -import java.util.{ArrayList => JArrayList, List => JList} import java.util.Date import scala.collection.JavaConverters._ From ce8555e24213caaed59d5caf149371714d2bef29 Mon Sep 17 00:00:00 2001 From: Angerszhuuuu Date: Mon, 17 Jan 2022 23:19:06 +0800 Subject: [PATCH 07/11] Update CliSuite.scala --- .../org/apache/spark/sql/hive/thriftserver/CliSuite.scala | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/CliSuite.scala b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/CliSuite.scala index c5ebc0e3e9344..4af051746b96e 100644 --- a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/CliSuite.scala +++ b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/CliSuite.scala @@ -668,7 +668,12 @@ class CliSuite extends SparkFunSuite with BeforeAndAfterAll with Logging { "SELECT 1; /* comment\n" -> Seq("SELECT 1", " /* comment\n"), "SELECT 1; /* comment select 1;\n" -> Seq("SELECT 1", " /* comment select 1;\n"), "/* This is a comment without end symbol SELECT 1;\n" -> - Seq("/* This is a comment without end symbol SELECT 1;\n") + Seq("/* This is a comment without end symbol SELECT 1;\n"), + "/* comment */ SELECT 1;" -> Seq("/* comment */ SELECT 1"), + "SELECT /* comment */ 1;" -> Seq("SELECT /* comment */ 1"), + "-- comment " -> Seq(), + "-- comment \nSELECT 1" -> Seq("-- comment \nSELECT 1"), + "/* comment */ " -> Seq() ).foreach { case (query, ret) => assert(cli.splitSemiColon(query).asScala === ret) } From 41592774c0201e7d503cd9752ba2b9847193f849 Mon Sep 17 00:00:00 2001 From: Angerszhuuuu Date: Mon, 17 Jan 2022 23:26:18 +0800 Subject: [PATCH 08/11] Update SparkSQLCLIDriver.scala --- .../spark/sql/hive/thriftserver/SparkSQLCLIDriver.scala | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLCLIDriver.scala b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLCLIDriver.scala index 3817b19db6d46..4af54d82a61b5 100644 --- a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLCLIDriver.scala +++ b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLCLIDriver.scala @@ -615,6 +615,11 @@ private[hive] class SparkSQLCLIDriver extends CliDriver with Logging { } // Check the last char is end of nested bracketed comment. val endOfBracketedComment = leavingBracketedComment && bracketedCommentLevel == 1 + // Spark SQL support simple comment and nested bracketed comment in query body, + // but if Spark SQL only received a comment, it will throw mismatched input exception. + // In Spark SQL CLI, if there is a completed comment in the end of whole query, + // Spark will ignore this comment, if there is an uncompleted statement or + // an uncompleted bracketed comment, Spark should also pass this part to the backend engine. if (!endOfBracketedComment && (isStatement || insideBracketedComment)) { ret.add(line.substring(beginIndex)) } From e1acf79231498f98eaa30f00afb941baa54664e0 Mon Sep 17 00:00:00 2001 From: Angerszhuuuu Date: Mon, 17 Jan 2022 23:29:36 +0800 Subject: [PATCH 09/11] Update SparkSQLCLIDriver.scala --- .../sql/hive/thriftserver/SparkSQLCLIDriver.scala | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLCLIDriver.scala b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLCLIDriver.scala index 4af54d82a61b5..4d1f92e4eceb1 100644 --- a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLCLIDriver.scala +++ b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLCLIDriver.scala @@ -615,11 +615,13 @@ private[hive] class SparkSQLCLIDriver extends CliDriver with Logging { } // Check the last char is end of nested bracketed comment. val endOfBracketedComment = leavingBracketedComment && bracketedCommentLevel == 1 - // Spark SQL support simple comment and nested bracketed comment in query body, - // but if Spark SQL only received a comment, it will throw mismatched input exception. + // Spark SQL support simple comment and nested bracketed comment in query body. + // But if Spark SQL only received a comment, it will throw mismatched input exception. // In Spark SQL CLI, if there is a completed comment in the end of whole query, - // Spark will ignore this comment, if there is an uncompleted statement or - // an uncompleted bracketed comment, Spark should also pass this part to the backend engine. + // since Spark SQL CLL use `;` to split the query, CLI will pass the comment + // to the backend engine and throw exception. CLI should ignore this comment, + // If there is an uncompleted statement or an uncompleted bracketed comment in the end, + // CLI should also pass this part to the backend engine. if (!endOfBracketedComment && (isStatement || insideBracketedComment)) { ret.add(line.substring(beginIndex)) } From 6e1a4283ba0e607a8d3f6aa85887fcd81109d3d8 Mon Sep 17 00:00:00 2001 From: Angerszhuuuu Date: Mon, 17 Jan 2022 23:34:36 +0800 Subject: [PATCH 10/11] Update SparkSQLCLIDriver.scala --- .../spark/sql/hive/thriftserver/SparkSQLCLIDriver.scala | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLCLIDriver.scala b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLCLIDriver.scala index 4d1f92e4eceb1..41c11f4d64099 100644 --- a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLCLIDriver.scala +++ b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLCLIDriver.scala @@ -616,12 +616,13 @@ private[hive] class SparkSQLCLIDriver extends CliDriver with Logging { // Check the last char is end of nested bracketed comment. val endOfBracketedComment = leavingBracketedComment && bracketedCommentLevel == 1 // Spark SQL support simple comment and nested bracketed comment in query body. - // But if Spark SQL only received a comment, it will throw mismatched input exception. + // But if Spark SQL only received a comment, it will throw parser exception. // In Spark SQL CLI, if there is a completed comment in the end of whole query, // since Spark SQL CLL use `;` to split the query, CLI will pass the comment // to the backend engine and throw exception. CLI should ignore this comment, // If there is an uncompleted statement or an uncompleted bracketed comment in the end, - // CLI should also pass this part to the backend engine. + // CLI should also pass this part to the backend engine, which may throw an exception + // with clear error message. if (!endOfBracketedComment && (isStatement || insideBracketedComment)) { ret.add(line.substring(beginIndex)) } From 615702911c124e25595f4b2dcea322676544df9a Mon Sep 17 00:00:00 2001 From: AngersZhuuuu Date: Tue, 18 Jan 2022 00:26:21 +0800 Subject: [PATCH 11/11] Update sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLCLIDriver.scala Co-authored-by: Wenchen Fan --- .../apache/spark/sql/hive/thriftserver/SparkSQLCLIDriver.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLCLIDriver.scala b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLCLIDriver.scala index 41c11f4d64099..4c26e93606083 100644 --- a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLCLIDriver.scala +++ b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLCLIDriver.scala @@ -616,7 +616,7 @@ private[hive] class SparkSQLCLIDriver extends CliDriver with Logging { // Check the last char is end of nested bracketed comment. val endOfBracketedComment = leavingBracketedComment && bracketedCommentLevel == 1 // Spark SQL support simple comment and nested bracketed comment in query body. - // But if Spark SQL only received a comment, it will throw parser exception. + // But if Spark SQL receives a comment alone, it will throw parser exception. // In Spark SQL CLI, if there is a completed comment in the end of whole query, // since Spark SQL CLL use `;` to split the query, CLI will pass the comment // to the backend engine and throw exception. CLI should ignore this comment,